xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision a4da4a90)
1 /*
2  * Copyright (c) 2001-2017, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 #include <sys/taskqueue.h>
51 
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 #include <net/ifq_var.h>
59 #include <net/if_ringmap.h>
60 #include <net/toeplitz.h>
61 #include <net/toeplitz2.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/vlan/if_vlan_ether.h>
64 #include <net/if_poll.h>
65 
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 
70 #include <bus/pci/pcivar.h>
71 #include <bus/pci/pcireg.h>
72 
73 #include <dev/netif/ix/ixgbe_common.h>
74 #include <dev/netif/ix/ixgbe_api.h>
75 #include <dev/netif/ix/if_ix.h>
76 
77 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
78 
79 #ifdef IX_RSS_DEBUG
80 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
81 do { \
82 	if (sc->rss_debug >= lvl) \
83 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
84 } while (0)
85 #else	/* !IX_RSS_DEBUG */
86 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
87 #endif	/* IX_RSS_DEBUG */
88 
89 #define IX_NAME			"Intel(R) PRO/10GbE "
90 #define IX_DEVICE(id) \
91 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
92 #define IX_DEVICE_NULL		{ 0, 0, NULL }
93 
94 static struct ix_device {
95 	uint16_t	vid;
96 	uint16_t	did;
97 	const char	*desc;
98 } ix_devices[] = {
99 	IX_DEVICE(82598AF_DUAL_PORT),
100 	IX_DEVICE(82598AF_SINGLE_PORT),
101 	IX_DEVICE(82598EB_CX4),
102 	IX_DEVICE(82598AT),
103 	IX_DEVICE(82598AT2),
104 	IX_DEVICE(82598),
105 	IX_DEVICE(82598_DA_DUAL_PORT),
106 	IX_DEVICE(82598_CX4_DUAL_PORT),
107 	IX_DEVICE(82598EB_XF_LR),
108 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
109 	IX_DEVICE(82598EB_SFP_LOM),
110 	IX_DEVICE(82599_KX4),
111 	IX_DEVICE(82599_KX4_MEZZ),
112 	IX_DEVICE(82599_SFP),
113 	IX_DEVICE(82599_XAUI_LOM),
114 	IX_DEVICE(82599_CX4),
115 	IX_DEVICE(82599_T3_LOM),
116 	IX_DEVICE(82599_COMBO_BACKPLANE),
117 	IX_DEVICE(82599_BACKPLANE_FCOE),
118 	IX_DEVICE(82599_SFP_SF2),
119 	IX_DEVICE(82599_SFP_FCOE),
120 	IX_DEVICE(82599EN_SFP),
121 	IX_DEVICE(82599_SFP_SF_QP),
122 	IX_DEVICE(82599_QSFP_SF_QP),
123 	IX_DEVICE(X540T),
124 	IX_DEVICE(X540T1),
125 	IX_DEVICE(X550T),
126 	IX_DEVICE(X550T1),
127 	IX_DEVICE(X550EM_X_KR),
128 	IX_DEVICE(X550EM_X_KX4),
129 	IX_DEVICE(X550EM_X_10G_T),
130 	IX_DEVICE(X550EM_X_1G_T),
131 	IX_DEVICE(X550EM_X_SFP),
132 	IX_DEVICE(X550EM_A_KR),
133 	IX_DEVICE(X550EM_A_KR_L),
134 	IX_DEVICE(X550EM_A_SFP),
135 	IX_DEVICE(X550EM_A_SFP_N),
136 	IX_DEVICE(X550EM_A_SGMII),
137 	IX_DEVICE(X550EM_A_SGMII_L),
138 	IX_DEVICE(X550EM_A_10G_T),
139 	IX_DEVICE(X550EM_A_1G_T),
140 	IX_DEVICE(X550EM_A_1G_T_L),
141 #if 0
142 	IX_DEVICE(X540_BYPASS),
143 	IX_DEVICE(82599_BYPASS),
144 #endif
145 
146 	/* required last entry */
147 	IX_DEVICE_NULL
148 };
149 
150 static int	ix_probe(device_t);
151 static int	ix_attach(device_t);
152 static int	ix_detach(device_t);
153 static int	ix_shutdown(device_t);
154 
155 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
156 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
157 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
158 #ifdef INVARIANTS
159 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
160 		    boolean_t);
161 #endif
162 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
163 static void	ix_watchdog(struct ifaltq_subque *);
164 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
165 static void	ix_init(void *);
166 static void	ix_stop(struct ix_softc *);
167 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
168 static int	ix_media_change(struct ifnet *);
169 static void	ix_timer(void *);
170 static void	ix_fw_timer(void *);
171 #ifdef IFPOLL_ENABLE
172 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
173 static void	ix_npoll_rx(struct ifnet *, void *, int);
174 static void	ix_npoll_rx_direct(struct ifnet *, void *, int);
175 static void	ix_npoll_tx(struct ifnet *, void *, int);
176 static void	ix_npoll_status(struct ifnet *);
177 #endif
178 
179 static void	ix_add_sysctl(struct ix_softc *);
180 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
181 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
182 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
183 static int	ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS);
184 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
185 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
186 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
187 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
188 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
189 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
190 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
191 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
192 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
193 #if 0
194 static void     ix_add_hw_stats(struct ix_softc *);
195 #endif
196 
197 static void	ix_watchdog_reset(struct ix_softc *);
198 static void	ix_watchdog_task(void *, int);
199 static void	ix_sync_netisr(struct ix_softc *, int);
200 static void	ix_slot_info(struct ix_softc *);
201 static int	ix_alloc_rings(struct ix_softc *);
202 static void	ix_free_rings(struct ix_softc *);
203 static void	ix_setup_ifp(struct ix_softc *);
204 static void	ix_setup_serialize(struct ix_softc *);
205 static void	ix_setup_caps(struct ix_softc *);
206 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
207 static int	ix_get_timer_cpuid(const struct ix_softc *, boolean_t);
208 static void	ix_update_stats(struct ix_softc *);
209 static void	ix_detect_fanfail(struct ix_softc *, uint32_t, boolean_t);
210 
211 static void	ix_set_promisc(struct ix_softc *);
212 static void	ix_set_multi(struct ix_softc *);
213 static void	ix_set_vlan(struct ix_softc *);
214 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
215 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
216 static const char *ix_ifmedia2str(int);
217 static const char *ix_fc2str(enum ixgbe_fc_mode);
218 
219 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
220 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
221 static void	ix_init_tx_ring(struct ix_tx_ring *);
222 static void	ix_free_tx_ring(struct ix_tx_ring *);
223 static int	ix_create_tx_ring(struct ix_tx_ring *);
224 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
225 static void	ix_init_tx_unit(struct ix_softc *);
226 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
227 		    uint16_t *, int *);
228 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
229 		    const struct mbuf *, uint32_t *, uint32_t *);
230 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
231 		    const struct mbuf *, uint32_t *, uint32_t *);
232 static void	ix_txeof(struct ix_tx_ring *, int);
233 static void	ix_txgc(struct ix_tx_ring *);
234 static void	ix_txgc_timer(void *);
235 
236 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
237 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
238 static int	ix_init_rx_ring(struct ix_rx_ring *);
239 static void	ix_free_rx_ring(struct ix_rx_ring *);
240 static int	ix_create_rx_ring(struct ix_rx_ring *);
241 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
242 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
243 #if 0
244 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
245 #endif
246 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
247 static void	ix_rxeof(struct ix_rx_ring *, int);
248 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
249 static void	ix_enable_rx_drop(struct ix_softc *);
250 static void	ix_disable_rx_drop(struct ix_softc *);
251 
252 static void	ix_config_gpie(struct ix_softc *);
253 static void	ix_alloc_msix(struct ix_softc *);
254 static void	ix_free_msix(struct ix_softc *, boolean_t);
255 static void	ix_setup_msix_eims(const struct ix_softc *, int,
256 		    uint32_t *, uint32_t *);
257 static int	ix_alloc_intr(struct ix_softc *);
258 static void	ix_free_intr(struct ix_softc *);
259 static int	ix_setup_intr(struct ix_softc *);
260 static void	ix_teardown_intr(struct ix_softc *, int);
261 static void	ix_enable_intr(struct ix_softc *);
262 static void	ix_disable_intr(struct ix_softc *);
263 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
264 static void	ix_set_eitr(struct ix_softc *, int, int);
265 static void	ix_intr_status(struct ix_softc *, uint32_t);
266 static void	ix_intr_82598(void *);
267 static void	ix_intr(void *);
268 static void	ix_msix_rxtx(void *);
269 static void	ix_msix_rx(void *);
270 static void	ix_msix_tx(void *);
271 static void	ix_msix_status(void *);
272 
273 static void	ix_config_link(struct ix_softc *);
274 static boolean_t ix_sfp_probe(struct ix_softc *);
275 static boolean_t ix_is_sfp(struct ixgbe_hw *);
276 static void	ix_update_link_status(struct ix_softc *);
277 static void	ix_handle_link(struct ix_softc *);
278 static void	ix_handle_mod(struct ix_softc *);
279 static void	ix_handle_msf(struct ix_softc *);
280 static void	ix_handle_phy(struct ix_softc *);
281 static int	ix_powerdown(struct ix_softc *);
282 static void	ix_config_flowctrl(struct ix_softc *);
283 static void	ix_config_dmac(struct ix_softc *);
284 static void	ix_init_media(struct ix_softc *);
285 
286 static void	ix_serialize_skipmain(struct ix_softc *);
287 static void	ix_deserialize_skipmain(struct ix_softc *);
288 
289 static device_method_t ix_methods[] = {
290 	/* Device interface */
291 	DEVMETHOD(device_probe,		ix_probe),
292 	DEVMETHOD(device_attach,	ix_attach),
293 	DEVMETHOD(device_detach,	ix_detach),
294 	DEVMETHOD(device_shutdown,	ix_shutdown),
295 	DEVMETHOD_END
296 };
297 
298 static driver_t ix_driver = {
299 	"ix",
300 	ix_methods,
301 	sizeof(struct ix_softc)
302 };
303 
304 static devclass_t ix_devclass;
305 
306 DECLARE_DUMMY_MODULE(if_ix);
307 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
308 
309 static int	ix_msi_enable = 1;
310 static int	ix_msix_enable = 1;
311 static int	ix_rxr = 0;
312 static int	ix_txr = 0;
313 static int	ix_txd = IX_PERF_TXD;
314 static int	ix_rxd = IX_PERF_RXD;
315 static int	ix_unsupported_sfp = 0;
316 static int	ix_direct_input = 1;
317 
318 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_NONE;
319 
320 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
321 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
322 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
323 TUNABLE_INT("hw.ix.txr", &ix_txr);
324 TUNABLE_INT("hw.ix.txd", &ix_txd);
325 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
326 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
327 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
328 TUNABLE_INT("hw.ix.direct_input", &ix_direct_input);
329 
330 /*
331  * Smart speed setting, default to on.  This only works
332  * as a compile option right now as its during attach,
333  * set this to 'ixgbe_smart_speed_off' to disable.
334  */
335 static const enum ixgbe_smart_speed ix_smart_speed =
336     ixgbe_smart_speed_on;
337 
338 static __inline void
339 ix_try_txgc(struct ix_tx_ring *txr, int8_t dec)
340 {
341 
342 	if (txr->tx_running > 0) {
343 		txr->tx_running -= dec;
344 		if (txr->tx_running <= 0 && txr->tx_nmbuf &&
345 		    txr->tx_avail < txr->tx_ndesc &&
346 		    txr->tx_avail + txr->tx_intr_nsegs > txr->tx_ndesc)
347 			ix_txgc(txr);
348 	}
349 }
350 
351 static void
352 ix_txgc_timer(void *xtxr)
353 {
354 	struct ix_tx_ring *txr = xtxr;
355 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
356 
357 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
358 	    (IFF_RUNNING | IFF_UP))
359 		return;
360 
361 	if (!lwkt_serialize_try(&txr->tx_serialize))
362 		goto done;
363 
364 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
365 	    (IFF_RUNNING | IFF_UP)) {
366 		lwkt_serialize_exit(&txr->tx_serialize);
367 		return;
368 	}
369 	ix_try_txgc(txr, IX_TX_RUNNING_DEC);
370 
371 	lwkt_serialize_exit(&txr->tx_serialize);
372 done:
373 	callout_reset(&txr->tx_gc_timer, 1, ix_txgc_timer, txr);
374 }
375 
376 static __inline void
377 ix_tx_intr(struct ix_tx_ring *txr, int hdr)
378 {
379 
380 	ix_txeof(txr, hdr);
381 	if (!ifsq_is_empty(txr->tx_ifsq))
382 		ifsq_devstart(txr->tx_ifsq);
383 }
384 
385 static __inline void
386 ix_free_txbuf(struct ix_tx_ring *txr, struct ix_tx_buf *txbuf)
387 {
388 
389 	KKASSERT(txbuf->m_head != NULL);
390 	KKASSERT(txr->tx_nmbuf > 0);
391 	txr->tx_nmbuf--;
392 
393 	bus_dmamap_unload(txr->tx_tag, txbuf->map);
394 	m_freem(txbuf->m_head);
395 	txbuf->m_head = NULL;
396 }
397 
398 static int
399 ix_probe(device_t dev)
400 {
401 	const struct ix_device *d;
402 	uint16_t vid, did;
403 
404 	vid = pci_get_vendor(dev);
405 	did = pci_get_device(dev);
406 
407 	for (d = ix_devices; d->desc != NULL; ++d) {
408 		if (vid == d->vid && did == d->did) {
409 			device_set_desc(dev, d->desc);
410 			return 0;
411 		}
412 	}
413 	return ENXIO;
414 }
415 
416 static void
417 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
418 {
419 
420 	switch (sc->hw.mac.type) {
421 	case ixgbe_mac_X550:
422 	case ixgbe_mac_X550EM_x:
423 	case ixgbe_mac_X550EM_a:
424 		*ring_cntmax = IX_MAX_RXRING_X550;
425 		break;
426 
427 	default:
428 		*ring_cntmax = IX_MAX_RXRING;
429 		break;
430 	}
431 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
432 }
433 
434 static void
435 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
436 {
437 
438 	switch (sc->hw.mac.type) {
439 	case ixgbe_mac_82598EB:
440 		*ring_cntmax = IX_MAX_TXRING_82598;
441 		break;
442 
443 	case ixgbe_mac_82599EB:
444 		*ring_cntmax = IX_MAX_TXRING_82599;
445 		break;
446 
447 	case ixgbe_mac_X540:
448 		*ring_cntmax = IX_MAX_TXRING_X540;
449 		break;
450 
451 	case ixgbe_mac_X550:
452 	case ixgbe_mac_X550EM_x:
453 	case ixgbe_mac_X550EM_a:
454 		*ring_cntmax = IX_MAX_TXRING_X550;
455 		break;
456 
457 	default:
458 		*ring_cntmax = IX_MAX_TXRING;
459 		break;
460 	}
461 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
462 }
463 
464 static int
465 ix_attach(device_t dev)
466 {
467 	struct ix_softc *sc = device_get_softc(dev);
468 	struct ixgbe_hw *hw;
469 	int error, ring_cnt, ring_cntmax;
470 	uint32_t ctrl_ext;
471 	char flowctrl[IFM_ETH_FC_STRLEN];
472 
473 	sc->dev = dev;
474 	hw = &sc->hw;
475 	hw->back = sc;
476 
477 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
478 	    device_get_unit(dev));
479 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
480 	    ix_media_change, ix_media_status);
481 
482 	/* Save frame size */
483 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
484 
485 	sc->direct_input = ix_direct_input;
486 	TASK_INIT(&sc->wdog_task, 0, ix_watchdog_task, sc);
487 
488 	callout_init_mp(&sc->fw_timer);
489 	callout_init_mp(&sc->timer);
490 	lwkt_serialize_init(&sc->main_serialize);
491 
492 	/*
493 	 * Save off the information about this board
494 	 */
495 	hw->vendor_id = pci_get_vendor(dev);
496 	hw->device_id = pci_get_device(dev);
497 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
498 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
499 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
500 
501 	/* Enable bus mastering */
502 	pci_enable_busmaster(dev);
503 
504 	/*
505 	 * Allocate IO memory
506 	 */
507 	sc->mem_rid = PCIR_BAR(0);
508 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
509 	    &sc->mem_rid, RF_ACTIVE);
510 	if (sc->mem_res == NULL) {
511 		device_printf(dev, "Unable to allocate bus resource: memory\n");
512 		error = ENXIO;
513 		goto failed;
514 	}
515 
516 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
517 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
518 
519 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
520 
521 	/* Let hardware know driver is loaded */
522 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
523 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
524 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
525 
526 	/*
527 	 * Initialize the shared code
528 	 */
529 	if (ixgbe_init_shared_code(hw)) {
530 		device_printf(dev, "Unable to initialize the shared code\n");
531 		error = ENXIO;
532 		goto failed;
533 	}
534 
535 	if (hw->mbx.ops.init_params)
536 		hw->mbx.ops.init_params(hw);
537 
538 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
539 
540 	/* Pick up the 82599 settings */
541 	if (hw->mac.type != ixgbe_mac_82598EB)
542 		hw->phy.smart_speed = ix_smart_speed;
543 
544 	/* Setup hardware capabilities */
545 	ix_setup_caps(sc);
546 
547 	/* Allocate multicast array memory. */
548 	sc->mta = kmalloc(sizeof(*sc->mta) * IX_MAX_MCASTADDR,
549 	    M_DEVBUF, M_WAITOK);
550 
551 	/* Save initial wake up filter configuration; WOL is disabled. */
552 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
553 
554 	/* Verify adapter fan is still functional (if applicable) */
555 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
556 		ix_detect_fanfail(sc, IXGBE_READ_REG(hw, IXGBE_ESDP), FALSE);
557 
558 	/* Ensure SW/FW semaphore is free */
559 	ixgbe_init_swfw_semaphore(hw);
560 
561 #ifdef notyet
562 	/* Enable EEE power saving */
563 	if (sc->caps & IX_CAP_EEE)
564 		hw->mac.ops.setup_eee(hw, true);
565 #endif
566 
567 	/*
568 	 * Configure total supported RX/TX ring count
569 	 */
570 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
571 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
572 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
573 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
574 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
575 
576 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
577 	sc->rx_ring_inuse = sc->rx_ring_cnt;
578 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
579 	sc->tx_ring_inuse = sc->tx_ring_cnt;
580 
581 	/* Allocate TX/RX rings */
582 	error = ix_alloc_rings(sc);
583 	if (error)
584 		goto failed;
585 
586 	/* Allocate interrupt */
587 	error = ix_alloc_intr(sc);
588 	if (error)
589 		goto failed;
590 
591 	/* Setup serializes */
592 	ix_setup_serialize(sc);
593 
594 	hw->phy.reset_if_overtemp = TRUE;
595 	error = ixgbe_reset_hw(hw);
596 	hw->phy.reset_if_overtemp = FALSE;
597 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
598 		/*
599 		 * No optics in this port; ask timer routine
600 		 * to probe for later insertion.
601 		 */
602 		sc->sfp_probe = TRUE;
603 		error = 0;
604 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
605 		device_printf(dev, "Unsupported SFP+ module detected!\n");
606 		error = EIO;
607 		goto failed;
608 	} else if (error) {
609 		device_printf(dev, "Hardware initialization failed\n");
610 		error = EIO;
611 		goto failed;
612 	}
613 
614 	/* Make sure we have a good EEPROM before we read from it */
615 	if (ixgbe_validate_eeprom_checksum(&sc->hw, NULL) < 0) {
616 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
617 		error = EIO;
618 		goto failed;
619 	}
620 
621 	error = ixgbe_start_hw(hw);
622 	if (error == IXGBE_ERR_EEPROM_VERSION) {
623 		device_printf(dev, "Pre-production device detected\n");
624 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
625 		device_printf(dev, "Unsupported SFP+ Module\n");
626 		error = EIO;
627 		goto failed;
628 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
629 		device_printf(dev, "No SFP+ Module found\n");
630 	}
631 
632 	/* Enable the optics for 82599 SFP+ fiber */
633 	ixgbe_enable_tx_laser(hw);
634 
635 	/* Enable power to the phy. */
636 	ixgbe_set_phy_power(hw, TRUE);
637 
638 	sc->ifm_media = IX_IFM_DEFAULT;
639 	/* Get default flow control settings */
640 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
641 	    ix_flowctrl);
642 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
643 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
644 
645 	/* Setup OS specific network interface */
646 	ix_setup_ifp(sc);
647 
648 	/* Add sysctl tree */
649 	ix_add_sysctl(sc);
650 
651 	error = ix_setup_intr(sc);
652 	if (error) {
653 		ether_ifdetach(&sc->arpcom.ac_if);
654 		goto failed;
655 	}
656 
657 	/* Initialize statistics */
658 	ix_update_stats(sc);
659 
660 	/* Check PCIE slot type/speed/width */
661 	ix_slot_info(sc);
662 
663 	if (sc->caps & IX_CAP_FW_RECOVERY) {
664 		device_printf(dev, "start fw timer\n");
665 		callout_reset_bycpu(&sc->fw_timer, hz,
666 		    ix_fw_timer, sc, ix_get_timer_cpuid(sc, FALSE));
667 	}
668 
669 	return 0;
670 failed:
671 	ix_detach(dev);
672 	return error;
673 }
674 
675 static int
676 ix_detach(device_t dev)
677 {
678 	struct ix_softc *sc = device_get_softc(dev);
679 
680 	if (device_is_attached(dev)) {
681 		struct ifnet *ifp = &sc->arpcom.ac_if;
682 
683 		ix_sync_netisr(sc, IFF_UP);
684 		taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
685 
686 		ifnet_serialize_all(ifp);
687 
688 		ix_powerdown(sc);
689 		ix_teardown_intr(sc, sc->intr_cnt);
690 
691 		ifnet_deserialize_all(ifp);
692 
693 		callout_terminate(&sc->timer);
694 		ether_ifdetach(ifp);
695 	}
696 	callout_terminate(&sc->fw_timer);
697 
698 	if (sc->mem_res != NULL) {
699 		uint32_t ctrl_ext;
700 
701 		/* Let hardware know driver is unloading */
702 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
703 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
704 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
705 	}
706 
707 	ifmedia_removeall(&sc->media);
708 	bus_generic_detach(dev);
709 
710 	ix_free_intr(sc);
711 
712 	if (sc->msix_mem_res != NULL) {
713 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
714 		    sc->msix_mem_res);
715 	}
716 	if (sc->mem_res != NULL) {
717 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
718 		    sc->mem_res);
719 	}
720 
721 	ix_free_rings(sc);
722 
723 	if (sc->mta != NULL)
724 		kfree(sc->mta, M_DEVBUF);
725 	if (sc->serializes != NULL)
726 		kfree(sc->serializes, M_DEVBUF);
727 
728 	if (sc->rx_rmap != NULL)
729 		if_ringmap_free(sc->rx_rmap);
730 	if (sc->rx_rmap_intr != NULL)
731 		if_ringmap_free(sc->rx_rmap_intr);
732 	if (sc->tx_rmap != NULL)
733 		if_ringmap_free(sc->tx_rmap);
734 	if (sc->tx_rmap_intr != NULL)
735 		if_ringmap_free(sc->tx_rmap_intr);
736 
737 	return 0;
738 }
739 
740 static int
741 ix_shutdown(device_t dev)
742 {
743 	struct ix_softc *sc = device_get_softc(dev);
744 	struct ifnet *ifp = &sc->arpcom.ac_if;
745 
746 	ix_sync_netisr(sc, IFF_UP);
747 	taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
748 
749 	ifnet_serialize_all(ifp);
750 	ix_powerdown(sc);
751 	ifnet_deserialize_all(ifp);
752 
753 	return 0;
754 }
755 
756 static void
757 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
758 {
759 	struct ix_softc *sc = ifp->if_softc;
760 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
761 	int idx = -1;
762 	uint16_t nsegs;
763 
764 	KKASSERT(txr->tx_ifsq == ifsq);
765 	ASSERT_SERIALIZED(&txr->tx_serialize);
766 
767 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
768 		return;
769 
770 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
771 		ifsq_purge(ifsq);
772 		return;
773 	}
774 
775 	while (!ifsq_is_empty(ifsq)) {
776 		struct mbuf *m_head;
777 
778 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
779 			ifsq_set_oactive(ifsq);
780 			ifsq_watchdog_set_count(&txr->tx_watchdog, 5);
781 			break;
782 		}
783 
784 		m_head = ifsq_dequeue(ifsq);
785 		if (m_head == NULL)
786 			break;
787 
788 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
789 			IFNET_STAT_INC(ifp, oerrors, 1);
790 			continue;
791 		}
792 
793 		/*
794 		 * TX interrupt are aggressively aggregated, so increasing
795 		 * opackets at TX interrupt time will make the opackets
796 		 * statistics vastly inaccurate; we do the opackets increment
797 		 * now.
798 		 */
799 		IFNET_STAT_INC(ifp, opackets, 1);
800 
801 		if (nsegs >= txr->tx_wreg_nsegs) {
802 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
803 			nsegs = 0;
804 			idx = -1;
805 		}
806 
807 		ETHER_BPF_MTAP(ifp, m_head);
808 	}
809 	if (idx >= 0)
810 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
811 	txr->tx_running = IX_TX_RUNNING;
812 }
813 
814 static int
815 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
816 {
817 	struct ix_softc *sc = ifp->if_softc;
818 	struct ifreq *ifr = (struct ifreq *) data;
819 	int error = 0, mask, reinit;
820 
821 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
822 
823 	switch (command) {
824 	case SIOCSIFMTU:
825 		if (ifr->ifr_mtu > IX_MAX_MTU) {
826 			error = EINVAL;
827 		} else {
828 			ifp->if_mtu = ifr->ifr_mtu;
829 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
830 			ix_init(sc);
831 		}
832 		break;
833 
834 	case SIOCSIFFLAGS:
835 		if (ifp->if_flags & IFF_UP) {
836 			if (ifp->if_flags & IFF_RUNNING) {
837 				if ((ifp->if_flags ^ sc->if_flags) &
838 				    (IFF_PROMISC | IFF_ALLMULTI))
839 					ix_set_promisc(sc);
840 			} else {
841 				ix_init(sc);
842 			}
843 		} else if (ifp->if_flags & IFF_RUNNING) {
844 			ix_stop(sc);
845 		}
846 		sc->if_flags = ifp->if_flags;
847 		break;
848 
849 	case SIOCADDMULTI:
850 	case SIOCDELMULTI:
851 		if (ifp->if_flags & IFF_RUNNING) {
852 			ix_disable_intr(sc);
853 			ix_set_multi(sc);
854 #ifdef IFPOLL_ENABLE
855 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
856 #endif
857 				ix_enable_intr(sc);
858 		}
859 		break;
860 
861 	case SIOCSIFMEDIA:
862 	case SIOCGIFMEDIA:
863 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
864 		break;
865 
866 	case SIOCSIFCAP:
867 		reinit = 0;
868 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
869 		if (mask & IFCAP_RXCSUM) {
870 			ifp->if_capenable ^= IFCAP_RXCSUM;
871 			reinit = 1;
872 		}
873 		if (mask & IFCAP_VLAN_HWTAGGING) {
874 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
875 			reinit = 1;
876 		}
877 		if (mask & IFCAP_TXCSUM) {
878 			ifp->if_capenable ^= IFCAP_TXCSUM;
879 			if (ifp->if_capenable & IFCAP_TXCSUM)
880 				ifp->if_hwassist |= CSUM_OFFLOAD;
881 			else
882 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
883 		}
884 		if (mask & IFCAP_TSO) {
885 			ifp->if_capenable ^= IFCAP_TSO;
886 			if (ifp->if_capenable & IFCAP_TSO)
887 				ifp->if_hwassist |= CSUM_TSO;
888 			else
889 				ifp->if_hwassist &= ~CSUM_TSO;
890 		}
891 		if (mask & IFCAP_RSS)
892 			ifp->if_capenable ^= IFCAP_RSS;
893 		if (reinit && (ifp->if_flags & IFF_RUNNING))
894 			ix_init(sc);
895 		break;
896 
897 #if 0
898 	case SIOCGI2C:
899 	{
900 		struct ixgbe_i2c_req	i2c;
901 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
902 		if (error)
903 			break;
904 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
905 			error = EINVAL;
906 			break;
907 		}
908 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
909 		    i2c.dev_addr, i2c.data);
910 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
911 		break;
912 	}
913 #endif
914 
915 	default:
916 		error = ether_ioctl(ifp, command, data);
917 		break;
918 	}
919 	return error;
920 }
921 
922 #define IXGBE_MHADD_MFS_SHIFT 16
923 
924 static void
925 ix_init(void *xsc)
926 {
927 	struct ix_softc *sc = xsc;
928 	struct ifnet *ifp = &sc->arpcom.ac_if;
929 	struct ixgbe_hw *hw = &sc->hw;
930 	uint32_t rxctrl;
931 	int i, error;
932 	boolean_t polling;
933 
934 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
935 
936 	ix_stop(sc);
937 
938 	if (sc->flags & IX_FLAG_FW_RECOVERY)
939 		return;
940 
941 	polling = FALSE;
942 #ifdef IFPOLL_ENABLE
943 	if (ifp->if_flags & IFF_NPOLLING)
944 		polling = TRUE;
945 #endif
946 
947 	/* Configure # of used RX/TX rings */
948 	ix_set_ring_inuse(sc, polling);
949 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
950 
951 	/* Get the latest mac address, User can use a LAA */
952 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
953 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
954 	hw->addr_ctrl.rar_used_count = 1;
955 
956 	/* Prepare transmit descriptors and buffers */
957 	for (i = 0; i < sc->tx_ring_inuse; ++i)
958 		ix_init_tx_ring(&sc->tx_rings[i]);
959 
960 	ixgbe_init_hw(hw);
961 	ix_init_tx_unit(sc);
962 
963 	/* Setup Multicast table */
964 	ix_set_multi(sc);
965 
966 	/* Prepare receive descriptors and buffers */
967 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
968 		error = ix_init_rx_ring(&sc->rx_rings[i]);
969 		if (error) {
970 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
971 			ix_stop(sc);
972 			return;
973 		}
974 	}
975 
976 	/* Configure RX settings */
977 	ix_init_rx_unit(sc, polling);
978 
979 	/* Enable SDP & MSI-X interrupts based on adapter */
980 	ix_config_gpie(sc);
981 
982 	/* Set MTU size */
983 	if (ifp->if_mtu > ETHERMTU) {
984 		uint32_t mhadd;
985 
986 		/* aka IXGBE_MAXFRS on 82599 and newer */
987 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
988 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
989 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
990 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
991 	}
992 
993 	/*
994 	 * Enable TX rings
995 	 */
996 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
997 		uint32_t txdctl;
998 
999 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1000 		txdctl |= IXGBE_TXDCTL_ENABLE;
1001 
1002 		/*
1003 		 * Set WTHRESH to 0, since TX head write-back is used
1004 		 */
1005 		txdctl &= ~(0x7f << 16);
1006 
1007 		/*
1008 		 * When the internal queue falls below PTHRESH (32),
1009 		 * start prefetching as long as there are at least
1010 		 * HTHRESH (1) buffers ready. The values are taken
1011 		 * from the Intel linux driver 3.8.21.
1012 		 * Prefetching enables tx line rate even with 1 queue.
1013 		 */
1014 		txdctl |= (32 << 0) | (1 << 8);
1015 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1016 	}
1017 
1018 	/*
1019 	 * Enable RX rings
1020 	 */
1021 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
1022 		uint32_t rxdctl;
1023 		int k;
1024 
1025 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1026 		if (hw->mac.type == ixgbe_mac_82598EB) {
1027 			/*
1028 			 * PTHRESH = 21
1029 			 * HTHRESH = 4
1030 			 * WTHRESH = 8
1031 			 */
1032 			rxdctl &= ~0x3FFFFF;
1033 			rxdctl |= 0x080420;
1034 		}
1035 		rxdctl |= IXGBE_RXDCTL_ENABLE;
1036 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1037 		for (k = 0; k < 10; ++k) {
1038 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1039 			    IXGBE_RXDCTL_ENABLE)
1040 				break;
1041 			else
1042 				msec_delay(1);
1043 		}
1044 		wmb();
1045 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
1046 		    sc->rx_rings[0].rx_ndesc - 1);
1047 	}
1048 
1049 	/* Enable Receive engine */
1050 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1051 	if (hw->mac.type == ixgbe_mac_82598EB)
1052 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
1053 	rxctrl |= IXGBE_RXCTRL_RXEN;
1054 	ixgbe_enable_rx_dma(hw, rxctrl);
1055 
1056 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1057 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
1058 
1059 		if (txr->tx_intr_vec >= 0) {
1060 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
1061 		} else if (!polling) {
1062 			/*
1063 			 * Unconfigured TX interrupt vector could only
1064 			 * happen for MSI-X.
1065 			 */
1066 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
1067 			    ("TX intr vector is not set"));
1068 			if (bootverbose)
1069 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
1070 		}
1071 	}
1072 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
1073 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
1074 
1075 		if (polling && rxr->rx_intr_vec < 0)
1076 			continue;
1077 
1078 		KKASSERT(rxr->rx_intr_vec >= 0);
1079 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
1080 		if (rxr->rx_txr != NULL) {
1081 			/*
1082 			 * Piggyback the TX ring interrupt onto the RX
1083 			 * ring interrupt vector.
1084 			 */
1085 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
1086 			    ("piggybacked TX ring configured intr vector"));
1087 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
1088 			    rxr->rx_intr_vec, 1);
1089 			if (bootverbose) {
1090 				if_printf(ifp, "IVAR RX ring %d piggybacks "
1091 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
1092 			}
1093 		}
1094 	}
1095 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
1096 		/* Set up status MSI-X vector; it is using fixed entry 1 */
1097 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
1098 
1099 		/* Set up auto-mask for TX and RX rings */
1100 		if (hw->mac.type == ixgbe_mac_82598EB) {
1101 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1102 		} else {
1103 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1104 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1105 		}
1106 	} else {
1107 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1108 	}
1109 	for (i = 0; i < sc->intr_cnt; ++i)
1110 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1111 
1112 	/*
1113 	 * Check on any SFP devices that need to be kick-started
1114 	 */
1115 	if (hw->phy.type == ixgbe_phy_none) {
1116 		error = hw->phy.ops.identify(hw);
1117 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1118 			if_printf(ifp,
1119 			    "Unsupported SFP+ module type was detected.\n");
1120 			/* XXX stop */
1121 			return;
1122 		}
1123 	}
1124 
1125 	/* Config/Enable Link */
1126 	ix_config_link(sc);
1127 
1128 	/* Hardware Packet Buffer & Flow Control setup */
1129 	ix_config_flowctrl(sc);
1130 
1131 	/* Initialize the FC settings */
1132 	ixgbe_start_hw(hw);
1133 
1134 	/* Set up VLAN support and filter */
1135 	ix_set_vlan(sc);
1136 
1137 	/* Setup DMA Coalescing */
1138 	ix_config_dmac(sc);
1139 
1140 	/*
1141 	 * Only enable interrupts if we are not polling, make sure
1142 	 * they are off otherwise.
1143 	 */
1144 	if (polling)
1145 		ix_disable_intr(sc);
1146 	else
1147 		ix_enable_intr(sc);
1148 
1149 	ifp->if_flags |= IFF_RUNNING;
1150 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1151 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1152 
1153 		ifsq_clr_oactive(txr->tx_ifsq);
1154 		ifsq_watchdog_start(&txr->tx_watchdog);
1155 
1156 		if (!polling) {
1157 			callout_reset_bycpu(&txr->tx_gc_timer, 1,
1158 			    ix_txgc_timer, txr, txr->tx_intr_cpuid);
1159 		}
1160 	}
1161 
1162 	sc->timer_cpuid = ix_get_timer_cpuid(sc, polling);
1163 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1164 }
1165 
1166 static void
1167 ix_intr(void *xsc)
1168 {
1169 	struct ix_softc *sc = xsc;
1170 	struct ixgbe_hw	*hw = &sc->hw;
1171 	uint32_t eicr;
1172 
1173 	ASSERT_SERIALIZED(&sc->main_serialize);
1174 
1175 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1176 	if (eicr == 0) {
1177 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1178 		return;
1179 	}
1180 
1181 	if (eicr & IX_RX0_INTR_MASK) {
1182 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1183 
1184 		lwkt_serialize_enter(&rxr->rx_serialize);
1185 		ix_rxeof(rxr, -1);
1186 		lwkt_serialize_exit(&rxr->rx_serialize);
1187 	}
1188 	if (eicr & IX_RX1_INTR_MASK) {
1189 		struct ix_rx_ring *rxr;
1190 
1191 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1192 		rxr = &sc->rx_rings[1];
1193 
1194 		lwkt_serialize_enter(&rxr->rx_serialize);
1195 		ix_rxeof(rxr, -1);
1196 		lwkt_serialize_exit(&rxr->rx_serialize);
1197 	}
1198 
1199 	if (eicr & IX_TX_INTR_MASK) {
1200 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1201 
1202 		lwkt_serialize_enter(&txr->tx_serialize);
1203 		ix_tx_intr(txr, *(txr->tx_hdr));
1204 		lwkt_serialize_exit(&txr->tx_serialize);
1205 	}
1206 
1207 	if (__predict_false(eicr & IX_EICR_STATUS))
1208 		ix_intr_status(sc, eicr);
1209 
1210 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1211 }
1212 
1213 static void
1214 ix_intr_82598(void *xsc)
1215 {
1216 	struct ix_softc *sc = xsc;
1217 
1218 	ASSERT_SERIALIZED(&sc->main_serialize);
1219 
1220 	/* Software workaround for 82598 errata #26 */
1221 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
1222 
1223 	ix_intr(sc);
1224 }
1225 
1226 static void
1227 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1228 {
1229 	struct ix_softc *sc = ifp->if_softc;
1230 	struct ifmedia *ifm = &sc->media;
1231 	int layer;
1232 	boolean_t link_active;
1233 
1234 	if (sc->flags & IX_FLAG_FW_RECOVERY) {
1235 		link_active = FALSE;
1236 	} else {
1237 		ix_update_link_status(sc);
1238 		link_active = sc->link_active;
1239 	}
1240 
1241 	ifmr->ifm_status = IFM_AVALID;
1242 	ifmr->ifm_active = IFM_ETHER;
1243 
1244 	if (!link_active) {
1245 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1246 			ifmr->ifm_active |= ifm->ifm_media;
1247 		else
1248 			ifmr->ifm_active |= IFM_NONE;
1249 		return;
1250 	}
1251 	ifmr->ifm_status |= IFM_ACTIVE;
1252 
1253 	layer = sc->phy_layer;
1254 
1255 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1256 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1257 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ||
1258 	    (layer & IXGBE_PHYSICAL_LAYER_10BASE_T)) {
1259 		switch (sc->link_speed) {
1260 		case IXGBE_LINK_SPEED_10GB_FULL:
1261 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1262 			break;
1263 		case IXGBE_LINK_SPEED_1GB_FULL:
1264 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1265 			break;
1266 		case IXGBE_LINK_SPEED_100_FULL:
1267 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1268 			break;
1269 		case IXGBE_LINK_SPEED_10_FULL:
1270 			ifmr->ifm_active |= IFM_10_T | IFM_FDX;
1271 			break;
1272 		}
1273 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1274 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1275 		switch (sc->link_speed) {
1276 		case IXGBE_LINK_SPEED_10GB_FULL:
1277 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1278 			break;
1279 		}
1280 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1281 		switch (sc->link_speed) {
1282 		case IXGBE_LINK_SPEED_10GB_FULL:
1283 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1284 			break;
1285 		case IXGBE_LINK_SPEED_1GB_FULL:
1286 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1287 			break;
1288 		}
1289 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1290 		switch (sc->link_speed) {
1291 		case IXGBE_LINK_SPEED_10GB_FULL:
1292 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1293 			break;
1294 		case IXGBE_LINK_SPEED_1GB_FULL:
1295 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1296 			break;
1297 		}
1298 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1299 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1300 		switch (sc->link_speed) {
1301 		case IXGBE_LINK_SPEED_10GB_FULL:
1302 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1303 			break;
1304 		case IXGBE_LINK_SPEED_1GB_FULL:
1305 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1306 			break;
1307 		}
1308 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1309 		switch (sc->link_speed) {
1310 		case IXGBE_LINK_SPEED_10GB_FULL:
1311 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1312 			break;
1313 		}
1314 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1315 		/*
1316 		 * XXX: These need to use the proper media types once
1317 		 * they're added.
1318 		 */
1319 		switch (sc->link_speed) {
1320 		case IXGBE_LINK_SPEED_10GB_FULL:
1321 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1322 			break;
1323 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1324 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1325 			break;
1326 		case IXGBE_LINK_SPEED_1GB_FULL:
1327 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1328 			break;
1329 		}
1330 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1331 	    (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) ||
1332 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1333 		/*
1334 		 * XXX: These need to use the proper media types once
1335 		 * they're added.
1336 		 */
1337 		switch (sc->link_speed) {
1338 		case IXGBE_LINK_SPEED_10GB_FULL:
1339 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1340 			break;
1341 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1342 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1343 			break;
1344 		case IXGBE_LINK_SPEED_1GB_FULL:
1345 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1346 			break;
1347 		}
1348 	}
1349 
1350 	/* If nothing is recognized... */
1351 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1352 		ifmr->ifm_active |= IFM_NONE;
1353 
1354 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1355 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1356 
1357 	switch (sc->hw.fc.current_mode) {
1358 	case ixgbe_fc_full:
1359 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1360 		break;
1361 	case ixgbe_fc_rx_pause:
1362 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1363 		break;
1364 	case ixgbe_fc_tx_pause:
1365 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1366 		break;
1367 	default:
1368 		break;
1369 	}
1370 }
1371 
1372 static int
1373 ix_media_change(struct ifnet *ifp)
1374 {
1375 	struct ix_softc *sc = ifp->if_softc;
1376 	struct ifmedia *ifm = &sc->media;
1377 	struct ixgbe_hw *hw = &sc->hw;
1378 
1379 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1380 		return (EINVAL);
1381 
1382 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1383 	    hw->mac.ops.setup_link == NULL) {
1384 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1385 			/* Only flow control setting changes are allowed */
1386 			return (EOPNOTSUPP);
1387 		}
1388 	}
1389 
1390 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1391 	case IFM_AUTO:
1392 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1393 		break;
1394 
1395 	case IFM_10G_T:
1396 	case IFM_10G_LRM:
1397 	case IFM_10G_SR:	/* XXX also KR */
1398 	case IFM_10G_LR:
1399 	case IFM_10G_CX4:	/* XXX also KX4 */
1400 	case IFM_10G_TWINAX:
1401 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1402 		break;
1403 
1404 	case IFM_1000_T:
1405 	case IFM_1000_LX:
1406 	case IFM_1000_SX:
1407 	case IFM_1000_CX:	/* XXX is KX */
1408 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1409 		break;
1410 
1411 	case IFM_100_TX:
1412 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1413 		break;
1414 
1415 	default:
1416 		if (bootverbose) {
1417 			if_printf(ifp, "Invalid media type %d!\n",
1418 			    ifm->ifm_media);
1419 		}
1420 		return EINVAL;
1421 	}
1422 	sc->ifm_media = ifm->ifm_media;
1423 
1424 #if 0
1425 	if (hw->mac.ops.setup_link != NULL) {
1426 		hw->mac.autotry_restart = TRUE;
1427 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1428 	}
1429 #else
1430 	if (ifp->if_flags & IFF_RUNNING)
1431 		ix_init(sc);
1432 #endif
1433 	return 0;
1434 }
1435 
1436 static __inline int
1437 ix_tso_pullup(struct mbuf **mp)
1438 {
1439 	int hoff, iphlen, thoff;
1440 	struct mbuf *m;
1441 
1442 	m = *mp;
1443 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1444 
1445 	iphlen = m->m_pkthdr.csum_iphlen;
1446 	thoff = m->m_pkthdr.csum_thlen;
1447 	hoff = m->m_pkthdr.csum_lhlen;
1448 
1449 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1450 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1451 	KASSERT(hoff > 0, ("invalid ether hlen"));
1452 
1453 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1454 		m = m_pullup(m, hoff + iphlen + thoff);
1455 		if (m == NULL) {
1456 			*mp = NULL;
1457 			return ENOBUFS;
1458 		}
1459 		*mp = m;
1460 	}
1461 	return 0;
1462 }
1463 
1464 static int
1465 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1466     uint16_t *segs_used, int *idx)
1467 {
1468 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1469 	int i, j, error, nsegs, first, maxsegs;
1470 	struct mbuf *m_head = *m_headp;
1471 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1472 	bus_dmamap_t map;
1473 	struct ix_tx_buf *txbuf;
1474 	union ixgbe_adv_tx_desc *txd = NULL;
1475 
1476 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1477 		error = ix_tso_pullup(m_headp);
1478 		if (__predict_false(error))
1479 			return error;
1480 		m_head = *m_headp;
1481 	}
1482 
1483 	/* Basic descriptor defines */
1484 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1485 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1486 
1487 	if (m_head->m_flags & M_VLANTAG)
1488 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1489 
1490 	/*
1491 	 * Important to capture the first descriptor
1492 	 * used because it will contain the index of
1493 	 * the one we tell the hardware to report back
1494 	 */
1495 	first = txr->tx_next_avail;
1496 	txbuf = &txr->tx_buf[first];
1497 	map = txbuf->map;
1498 
1499 	/*
1500 	 * Map the packet for DMA.
1501 	 */
1502 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1503 	if (maxsegs > IX_MAX_SCATTER)
1504 		maxsegs = IX_MAX_SCATTER;
1505 
1506 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1507 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1508 	if (__predict_false(error)) {
1509 		m_freem(*m_headp);
1510 		*m_headp = NULL;
1511 		return error;
1512 	}
1513 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1514 
1515 	m_head = *m_headp;
1516 
1517 	/*
1518 	 * Set up the appropriate offload context if requested,
1519 	 * this may consume one TX descriptor.
1520 	 */
1521 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1522 		(*segs_used)++;
1523 		txr->tx_nsegs++;
1524 	}
1525 
1526 	*segs_used += nsegs;
1527 	txr->tx_nsegs += nsegs;
1528 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1529 		/*
1530 		 * Report Status (RS) is turned on every intr_nsegs
1531 		 * descriptors (roughly).
1532 		 */
1533 		txr->tx_nsegs = 0;
1534 		cmd_rs = IXGBE_TXD_CMD_RS;
1535 	}
1536 
1537 	i = txr->tx_next_avail;
1538 	for (j = 0; j < nsegs; j++) {
1539 		bus_size_t seglen;
1540 		bus_addr_t segaddr;
1541 
1542 		txbuf = &txr->tx_buf[i];
1543 		txd = &txr->tx_base[i];
1544 		seglen = segs[j].ds_len;
1545 		segaddr = htole64(segs[j].ds_addr);
1546 
1547 		txd->read.buffer_addr = segaddr;
1548 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1549 		    cmd_type_len |seglen);
1550 		txd->read.olinfo_status = htole32(olinfo_status);
1551 
1552 		if (++i == txr->tx_ndesc)
1553 			i = 0;
1554 	}
1555 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1556 
1557 	txr->tx_avail -= nsegs;
1558 	txr->tx_next_avail = i;
1559 	txr->tx_nmbuf++;
1560 
1561 	txbuf->m_head = m_head;
1562 	txr->tx_buf[first].map = txbuf->map;
1563 	txbuf->map = map;
1564 
1565 	/*
1566 	 * Defer TDT updating, until enough descrptors are setup
1567 	 */
1568 	*idx = i;
1569 
1570 	return 0;
1571 }
1572 
1573 static void
1574 ix_set_promisc(struct ix_softc *sc)
1575 {
1576 	struct ifnet *ifp = &sc->arpcom.ac_if;
1577 	uint32_t reg_rctl;
1578 	int mcnt = 0;
1579 
1580 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1581 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1582 	if (ifp->if_flags & IFF_ALLMULTI) {
1583 		mcnt = IX_MAX_MCASTADDR;
1584 	} else {
1585 		struct ifmultiaddr *ifma;
1586 
1587 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1588 			if (ifma->ifma_addr->sa_family != AF_LINK)
1589 				continue;
1590 			if (mcnt == IX_MAX_MCASTADDR)
1591 				break;
1592 			mcnt++;
1593 		}
1594 	}
1595 	if (mcnt < IX_MAX_MCASTADDR)
1596 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1597 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1598 
1599 	if (ifp->if_flags & IFF_PROMISC) {
1600 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1601 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1602 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1603 		reg_rctl |= IXGBE_FCTRL_MPE;
1604 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1605 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1606 	}
1607 }
1608 
1609 static void
1610 ix_set_multi(struct ix_softc *sc)
1611 {
1612 	struct ifnet *ifp = &sc->arpcom.ac_if;
1613 	struct ifmultiaddr *ifma;
1614 	uint32_t fctrl;
1615 	struct ix_mc_addr *mta;
1616 	int mcnt = 0;
1617 
1618 	mta = sc->mta;
1619 	bzero(mta, sizeof(*mta) * IX_MAX_MCASTADDR);
1620 
1621 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1622 		if (ifma->ifma_addr->sa_family != AF_LINK)
1623 			continue;
1624 		if (mcnt == IX_MAX_MCASTADDR)
1625 			break;
1626 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1627 		    mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
1628 		mcnt++;
1629 	}
1630 
1631 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1632 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1633 	if (ifp->if_flags & IFF_PROMISC) {
1634 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1635 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1636 		fctrl |= IXGBE_FCTRL_MPE;
1637 		fctrl &= ~IXGBE_FCTRL_UPE;
1638 	} else {
1639 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1640 	}
1641 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1642 
1643 	if (mcnt < IX_MAX_MCASTADDR) {
1644 		ixgbe_update_mc_addr_list(&sc->hw,
1645 		    (uint8_t *)mta, mcnt, ix_mc_array_itr, TRUE);
1646 	}
1647 }
1648 
1649 /*
1650  * This is an iterator function now needed by the multicast
1651  * shared code. It simply feeds the shared code routine the
1652  * addresses in the array of ix_set_multi() one by one.
1653  */
1654 static uint8_t *
1655 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1656 {
1657 	struct ix_mc_addr *mta = (struct ix_mc_addr *)*update_ptr;
1658 
1659 	*vmdq = mta->vmdq;
1660 	*update_ptr = (uint8_t *)(mta + 1);
1661 
1662 	return (mta->addr);
1663 }
1664 
1665 static void
1666 ix_timer(void *arg)
1667 {
1668 	struct ix_softc *sc = arg;
1669 
1670 	lwkt_serialize_enter(&sc->main_serialize);
1671 
1672 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1673 		lwkt_serialize_exit(&sc->main_serialize);
1674 		return;
1675 	}
1676 
1677 	/* Check for pluggable optics */
1678 	if (sc->sfp_probe) {
1679 		if (!ix_sfp_probe(sc))
1680 			goto done; /* Nothing to do */
1681 	}
1682 
1683 	ix_update_link_status(sc);
1684 	ix_update_stats(sc);
1685 
1686 done:
1687 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1688 	lwkt_serialize_exit(&sc->main_serialize);
1689 }
1690 
1691 static void
1692 ix_update_link_status(struct ix_softc *sc)
1693 {
1694 	struct ifnet *ifp = &sc->arpcom.ac_if;
1695 
1696 	if (sc->link_up) {
1697 		if (sc->link_active == FALSE) {
1698 			if (bootverbose) {
1699 				if_printf(ifp, "Link is up %d Gbps %s\n",
1700 				    sc->link_speed == 128 ? 10 : 1,
1701 				    "Full Duplex");
1702 			}
1703 
1704 			/*
1705 			 * Update any Flow Control changes
1706 			 */
1707 			ixgbe_fc_enable(&sc->hw);
1708 			/* MUST after ixgbe_fc_enable() */
1709 			if (sc->rx_ring_inuse > 1) {
1710 				switch (sc->hw.fc.current_mode) {
1711 				case ixgbe_fc_rx_pause:
1712 				case ixgbe_fc_tx_pause:
1713 				case ixgbe_fc_full:
1714 					ix_disable_rx_drop(sc);
1715 					break;
1716 
1717 				case ixgbe_fc_none:
1718 					ix_enable_rx_drop(sc);
1719 					break;
1720 
1721 				default:
1722 					break;
1723 				}
1724 			}
1725 
1726 			/* Update DMA coalescing config */
1727 			ix_config_dmac(sc);
1728 
1729 			sc->link_active = TRUE;
1730 
1731 			ifp->if_link_state = LINK_STATE_UP;
1732 			if_link_state_change(ifp);
1733 		}
1734 	} else { /* Link down */
1735 		if (sc->link_active == TRUE) {
1736 			if (bootverbose)
1737 				if_printf(ifp, "Link is Down\n");
1738 			ifp->if_link_state = LINK_STATE_DOWN;
1739 			if_link_state_change(ifp);
1740 
1741 			sc->link_active = FALSE;
1742 		}
1743 	}
1744 }
1745 
1746 static void
1747 ix_stop(struct ix_softc *sc)
1748 {
1749 	struct ixgbe_hw *hw = &sc->hw;
1750 	struct ifnet *ifp = &sc->arpcom.ac_if;
1751 	int i;
1752 
1753 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1754 
1755 	ix_disable_intr(sc);
1756 	callout_stop(&sc->timer);
1757 
1758 	ifp->if_flags &= ~IFF_RUNNING;
1759 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1760 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1761 
1762 		ifsq_clr_oactive(txr->tx_ifsq);
1763 		ifsq_watchdog_stop(&txr->tx_watchdog);
1764 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1765 
1766 		txr->tx_running = 0;
1767 		callout_stop(&txr->tx_gc_timer);
1768 	}
1769 
1770 	ixgbe_reset_hw(hw);
1771 	hw->adapter_stopped = FALSE;
1772 	ixgbe_stop_adapter(hw);
1773 	if (hw->mac.type == ixgbe_mac_82599EB)
1774 		ixgbe_stop_mac_link_on_d3_82599(hw);
1775 	/* Turn off the laser - noop with no optics */
1776 	ixgbe_disable_tx_laser(hw);
1777 
1778 	/* Update the stack */
1779 	sc->link_up = FALSE;
1780 	ix_update_link_status(sc);
1781 
1782 	/* Reprogram the RAR[0] in case user changed it. */
1783 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1784 
1785 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1786 		ix_free_tx_ring(&sc->tx_rings[i]);
1787 
1788 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1789 		ix_free_rx_ring(&sc->rx_rings[i]);
1790 }
1791 
1792 static void
1793 ix_setup_ifp(struct ix_softc *sc)
1794 {
1795 	struct ixgbe_hw *hw = &sc->hw;
1796 	struct ifnet *ifp = &sc->arpcom.ac_if;
1797 	int i;
1798 
1799 	ifp->if_baudrate = IF_Gbps(10UL);
1800 
1801 	ifp->if_softc = sc;
1802 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1803 	ifp->if_init = ix_init;
1804 	ifp->if_ioctl = ix_ioctl;
1805 	ifp->if_start = ix_start;
1806 	ifp->if_serialize = ix_serialize;
1807 	ifp->if_deserialize = ix_deserialize;
1808 	ifp->if_tryserialize = ix_tryserialize;
1809 #ifdef INVARIANTS
1810 	ifp->if_serialize_assert = ix_serialize_assert;
1811 #endif
1812 #ifdef IFPOLL_ENABLE
1813 	ifp->if_npoll = ix_npoll;
1814 #endif
1815 
1816 	/* Increase TSO burst length */
1817 	ifp->if_tsolen = (8 * ETHERMTU);
1818 
1819 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1820 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1821 
1822 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1823 	ifq_set_ready(&ifp->if_snd);
1824 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1825 
1826 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1827 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1828 
1829 	ether_ifattach(ifp, hw->mac.addr, NULL);
1830 
1831 	ifp->if_capabilities =
1832 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1833 	if (IX_ENABLE_HWRSS(sc))
1834 		ifp->if_capabilities |= IFCAP_RSS;
1835 	ifp->if_capenable = ifp->if_capabilities;
1836 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1837 
1838 	/*
1839 	 * Tell the upper layer(s) we support long frames.
1840 	 */
1841 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1842 
1843 	/* Setup TX rings and subqueues */
1844 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1845 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1846 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1847 
1848 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1849 		ifsq_set_priv(ifsq, txr);
1850 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1851 		txr->tx_ifsq = ifsq;
1852 
1853 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog, 0);
1854 	}
1855 
1856 	/* Specify the media types supported by this adapter */
1857 	sc->phy_layer = ixgbe_get_supported_physical_layer(hw);
1858 	ix_init_media(sc);
1859 }
1860 
1861 static boolean_t
1862 ix_is_sfp(struct ixgbe_hw *hw)
1863 {
1864 	switch (hw->mac.type) {
1865 	case ixgbe_mac_82598EB:
1866 		if (hw->phy.type == ixgbe_phy_nl)
1867 			return TRUE;
1868 		return FALSE;
1869 
1870 	case ixgbe_mac_82599EB:
1871 		switch (hw->mac.ops.get_media_type(hw)) {
1872 		case ixgbe_media_type_fiber:
1873 		case ixgbe_media_type_fiber_qsfp:
1874 			return TRUE;
1875 		default:
1876 			return FALSE;
1877 		}
1878 
1879 	case ixgbe_mac_X550EM_x:
1880 	case ixgbe_mac_X550EM_a:
1881 		if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber)
1882 			return TRUE;
1883 		return FALSE;
1884 
1885 	default:
1886 		return FALSE;
1887 	}
1888 }
1889 
1890 static void
1891 ix_config_link(struct ix_softc *sc)
1892 {
1893 	struct ixgbe_hw *hw = &sc->hw;
1894 	boolean_t sfp;
1895 
1896 	sfp = ix_is_sfp(hw);
1897 	if (sfp) {
1898 		if (hw->phy.multispeed_fiber)
1899 			ixgbe_enable_tx_laser(hw);
1900 		ix_handle_mod(sc);
1901 	} else {
1902 		uint32_t autoneg, err = 0;
1903 
1904 		if (hw->mac.ops.check_link != NULL) {
1905 			err = ixgbe_check_link(hw, &sc->link_speed,
1906 			    &sc->link_up, FALSE);
1907 			if (err)
1908 				return;
1909 		}
1910 
1911 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1912 			autoneg = sc->advspeed;
1913 		else
1914 			autoneg = hw->phy.autoneg_advertised;
1915 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1916 			bool negotiate;
1917 
1918 			err = hw->mac.ops.get_link_capabilities(hw,
1919 			    &autoneg, &negotiate);
1920 			if (err)
1921 				return;
1922 		}
1923 
1924 		if (hw->mac.ops.setup_link != NULL) {
1925 			err = hw->mac.ops.setup_link(hw,
1926 			    autoneg, sc->link_up);
1927 			if (err)
1928 				return;
1929 		}
1930 	}
1931 }
1932 
1933 static int
1934 ix_alloc_rings(struct ix_softc *sc)
1935 {
1936 	int error, i;
1937 
1938 	/*
1939 	 * Create top level busdma tag
1940 	 */
1941 	error = bus_dma_tag_create(NULL, 1, 0,
1942 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1943 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1944 	    &sc->parent_tag);
1945 	if (error) {
1946 		device_printf(sc->dev, "could not create top level DMA tag\n");
1947 		return error;
1948 	}
1949 
1950 	/*
1951 	 * Allocate TX descriptor rings and buffers
1952 	 */
1953 	sc->tx_rings = kmalloc(sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1954 			       M_DEVBUF,
1955 			       M_WAITOK | M_ZERO | M_CACHEALIGN);
1956 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1957 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1958 
1959 		txr->tx_sc = sc;
1960 		txr->tx_idx = i;
1961 		txr->tx_intr_vec = -1;
1962 		txr->tx_intr_cpuid = -1;
1963 		lwkt_serialize_init(&txr->tx_serialize);
1964 		callout_init_mp(&txr->tx_gc_timer);
1965 
1966 		error = ix_create_tx_ring(txr);
1967 		if (error)
1968 			return error;
1969 	}
1970 
1971 	/*
1972 	 * Allocate RX descriptor rings and buffers
1973 	 */
1974 	sc->rx_rings = kmalloc(sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1975 			       M_DEVBUF,
1976 			       M_WAITOK | M_ZERO | M_CACHEALIGN);
1977 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1978 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1979 
1980 		rxr->rx_sc = sc;
1981 		rxr->rx_idx = i;
1982 		rxr->rx_intr_vec = -1;
1983 		lwkt_serialize_init(&rxr->rx_serialize);
1984 
1985 		error = ix_create_rx_ring(rxr);
1986 		if (error)
1987 			return error;
1988 	}
1989 
1990 	return 0;
1991 }
1992 
1993 static int
1994 ix_create_tx_ring(struct ix_tx_ring *txr)
1995 {
1996 	int error, i, tsize, ntxd;
1997 
1998 	/*
1999 	 * Validate number of transmit descriptors.  It must not exceed
2000 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2001 	 */
2002 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
2003 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
2004 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
2005 		device_printf(txr->tx_sc->dev,
2006 		    "Using %d TX descriptors instead of %d!\n",
2007 		    IX_DEF_TXD, ntxd);
2008 		txr->tx_ndesc = IX_DEF_TXD;
2009 	} else {
2010 		txr->tx_ndesc = ntxd;
2011 	}
2012 
2013 	/*
2014 	 * Allocate TX head write-back buffer
2015 	 */
2016 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
2017 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
2018 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
2019 	if (txr->tx_hdr == NULL) {
2020 		device_printf(txr->tx_sc->dev,
2021 		    "Unable to allocate TX head write-back buffer\n");
2022 		return ENOMEM;
2023 	}
2024 
2025 	/*
2026 	 * Allocate TX descriptor ring
2027 	 */
2028 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
2029 	    IX_DBA_ALIGN);
2030 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
2031 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2032 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
2033 	if (txr->tx_base == NULL) {
2034 		device_printf(txr->tx_sc->dev,
2035 		    "Unable to allocate TX Descriptor memory\n");
2036 		return ENOMEM;
2037 	}
2038 
2039 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
2040 	txr->tx_buf = kmalloc(tsize, M_DEVBUF,
2041 			      M_WAITOK | M_ZERO | M_CACHEALIGN);
2042 
2043 	/*
2044 	 * Create DMA tag for TX buffers
2045 	 */
2046 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
2047 	    1, 0,		/* alignment, bounds */
2048 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2049 	    BUS_SPACE_MAXADDR,	/* highaddr */
2050 	    NULL, NULL,		/* filter, filterarg */
2051 	    IX_TSO_SIZE,	/* maxsize */
2052 	    IX_MAX_SCATTER,	/* nsegments */
2053 	    PAGE_SIZE,		/* maxsegsize */
2054 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
2055 	    BUS_DMA_ONEBPAGE,	/* flags */
2056 	    &txr->tx_tag);
2057 	if (error) {
2058 		device_printf(txr->tx_sc->dev,
2059 		    "Unable to allocate TX DMA tag\n");
2060 		kfree(txr->tx_buf, M_DEVBUF);
2061 		txr->tx_buf = NULL;
2062 		return error;
2063 	}
2064 
2065 	/*
2066 	 * Create DMA maps for TX buffers
2067 	 */
2068 	for (i = 0; i < txr->tx_ndesc; ++i) {
2069 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2070 
2071 		error = bus_dmamap_create(txr->tx_tag,
2072 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
2073 		if (error) {
2074 			device_printf(txr->tx_sc->dev,
2075 			    "Unable to create TX DMA map\n");
2076 			ix_destroy_tx_ring(txr, i);
2077 			return error;
2078 		}
2079 	}
2080 
2081 	/*
2082 	 * Initialize various watermark
2083 	 */
2084 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
2085 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
2086 
2087 	return 0;
2088 }
2089 
2090 static void
2091 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
2092 {
2093 	int i;
2094 
2095 	if (txr->tx_hdr != NULL) {
2096 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
2097 		bus_dmamem_free(txr->tx_hdr_dtag,
2098 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
2099 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
2100 		txr->tx_hdr = NULL;
2101 	}
2102 
2103 	if (txr->tx_base != NULL) {
2104 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
2105 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
2106 		    txr->tx_base_map);
2107 		bus_dma_tag_destroy(txr->tx_base_dtag);
2108 		txr->tx_base = NULL;
2109 	}
2110 
2111 	if (txr->tx_buf == NULL)
2112 		return;
2113 
2114 	for (i = 0; i < ndesc; ++i) {
2115 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2116 
2117 		KKASSERT(txbuf->m_head == NULL);
2118 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
2119 	}
2120 	bus_dma_tag_destroy(txr->tx_tag);
2121 
2122 	kfree(txr->tx_buf, M_DEVBUF);
2123 	txr->tx_buf = NULL;
2124 }
2125 
2126 static void
2127 ix_init_tx_ring(struct ix_tx_ring *txr)
2128 {
2129 	/* Clear the old ring contents */
2130 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
2131 
2132 	/* Clear TX head write-back buffer */
2133 	*(txr->tx_hdr) = 0;
2134 
2135 	/* Reset indices */
2136 	txr->tx_next_avail = 0;
2137 	txr->tx_next_clean = 0;
2138 	txr->tx_nsegs = 0;
2139 	txr->tx_nmbuf = 0;
2140 	txr->tx_running = 0;
2141 
2142 	/* Set number of descriptors available */
2143 	txr->tx_avail = txr->tx_ndesc;
2144 
2145 	/* Enable this TX ring */
2146 	txr->tx_flags |= IX_TXFLAG_ENABLED;
2147 }
2148 
2149 static void
2150 ix_init_tx_unit(struct ix_softc *sc)
2151 {
2152 	struct ixgbe_hw	*hw = &sc->hw;
2153 	int i;
2154 
2155 	/*
2156 	 * Setup the Base and Length of the Tx Descriptor Ring
2157 	 */
2158 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
2159 		struct ix_tx_ring *txr = &sc->tx_rings[i];
2160 		uint64_t tdba = txr->tx_base_paddr;
2161 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
2162 		uint32_t txctrl;
2163 
2164 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2165 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2166 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2167 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2168 
2169 		/* Setup the HW Tx Head and Tail descriptor pointers */
2170 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2171 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2172 
2173 		/* Disable TX head write-back relax ordering */
2174 		switch (hw->mac.type) {
2175 		case ixgbe_mac_82598EB:
2176 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2177 			break;
2178 		default:
2179 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2180 			break;
2181 		}
2182 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2183 		switch (hw->mac.type) {
2184 		case ixgbe_mac_82598EB:
2185 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2186 			break;
2187 		default:
2188 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2189 			break;
2190 		}
2191 
2192 		/* Enable TX head write-back */
2193 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2194 		    (uint32_t)(hdr_paddr >> 32));
2195 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2196 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2197 	}
2198 
2199 	if (hw->mac.type != ixgbe_mac_82598EB) {
2200 		uint32_t dmatxctl, rttdcs;
2201 
2202 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2203 		dmatxctl |= IXGBE_DMATXCTL_TE;
2204 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2205 
2206 		/* Disable arbiter to set MTQC */
2207 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2208 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2209 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2210 
2211 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2212 
2213 		/* Reenable aribter */
2214 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2215 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2216 	}
2217 }
2218 
2219 static int
2220 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2221     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2222 {
2223 	struct ixgbe_adv_tx_context_desc *TXD;
2224 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2225 	int ehdrlen, ip_hlen = 0, ctxd;
2226 	boolean_t offload = TRUE;
2227 
2228 	/* First check if TSO is to be used */
2229 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2230 		return ix_tso_ctx_setup(txr, mp,
2231 		    cmd_type_len, olinfo_status);
2232 	}
2233 
2234 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2235 		offload = FALSE;
2236 
2237 	/* Indicate the whole packet as payload when not doing TSO */
2238 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2239 
2240 	/*
2241 	 * In advanced descriptors the vlan tag must be placed into the
2242 	 * context descriptor.  Hence we need to make one even if not
2243 	 * doing checksum offloads.
2244 	 */
2245 	if (mp->m_flags & M_VLANTAG) {
2246 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2247 		    IXGBE_ADVTXD_VLAN_SHIFT;
2248 	} else if (!offload) {
2249 		/* No TX descriptor is consumed */
2250 		return 0;
2251 	}
2252 
2253 	/* Set the ether header length */
2254 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2255 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2256 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2257 
2258 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2259 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2260 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2261 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2262 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2263 	}
2264 	vlan_macip_lens |= ip_hlen;
2265 
2266 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2267 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2268 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2269 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2270 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2271 
2272 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2273 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2274 
2275 	/* Now ready a context descriptor */
2276 	ctxd = txr->tx_next_avail;
2277 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2278 
2279 	/* Now copy bits into descriptor */
2280 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2281 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2282 	TXD->seqnum_seed = htole32(0);
2283 	TXD->mss_l4len_idx = htole32(0);
2284 
2285 	/* We've consumed the first desc, adjust counters */
2286 	if (++ctxd == txr->tx_ndesc)
2287 		ctxd = 0;
2288 	txr->tx_next_avail = ctxd;
2289 	--txr->tx_avail;
2290 
2291 	/* One TX descriptor is consumed */
2292 	return 1;
2293 }
2294 
2295 static int
2296 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2297     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2298 {
2299 	struct ixgbe_adv_tx_context_desc *TXD;
2300 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2301 	uint32_t mss_l4len_idx = 0, paylen;
2302 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2303 
2304 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2305 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2306 
2307 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2308 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2309 
2310 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2311 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2312 
2313 	ctxd = txr->tx_next_avail;
2314 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2315 
2316 	if (mp->m_flags & M_VLANTAG) {
2317 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2318 		    IXGBE_ADVTXD_VLAN_SHIFT;
2319 	}
2320 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2321 	vlan_macip_lens |= ip_hlen;
2322 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2323 
2324 	/* ADV DTYPE TUCMD */
2325 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2326 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2327 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2328 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2329 
2330 	/* MSS L4LEN IDX */
2331 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2332 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2333 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2334 
2335 	TXD->seqnum_seed = htole32(0);
2336 
2337 	if (++ctxd == txr->tx_ndesc)
2338 		ctxd = 0;
2339 
2340 	txr->tx_avail--;
2341 	txr->tx_next_avail = ctxd;
2342 
2343 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2344 
2345 	/* This is used in the transmit desc in encap */
2346 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2347 
2348 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2349 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2350 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2351 
2352 	/* One TX descriptor is consumed */
2353 	return 1;
2354 }
2355 
2356 static void
2357 ix_txeof(struct ix_tx_ring *txr, int hdr)
2358 {
2359 	int first, avail;
2360 
2361 	if (txr->tx_avail == txr->tx_ndesc)
2362 		return;
2363 
2364 	first = txr->tx_next_clean;
2365 	if (first == hdr)
2366 		return;
2367 
2368 	avail = txr->tx_avail;
2369 	while (first != hdr) {
2370 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2371 
2372 		KKASSERT(avail < txr->tx_ndesc);
2373 		++avail;
2374 
2375 		if (txbuf->m_head != NULL)
2376 			ix_free_txbuf(txr, txbuf);
2377 		if (++first == txr->tx_ndesc)
2378 			first = 0;
2379 	}
2380 	txr->tx_next_clean = first;
2381 	txr->tx_avail = avail;
2382 
2383 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2384 		ifsq_clr_oactive(txr->tx_ifsq);
2385 		ifsq_watchdog_set_count(&txr->tx_watchdog, 0);
2386 	}
2387 	txr->tx_running = IX_TX_RUNNING;
2388 }
2389 
2390 static void
2391 ix_txgc(struct ix_tx_ring *txr)
2392 {
2393 	int first, hdr;
2394 #ifdef INVARIANTS
2395 	int avail;
2396 #endif
2397 
2398 	if (txr->tx_avail == txr->tx_ndesc)
2399 		return;
2400 
2401 	hdr = IXGBE_READ_REG(&txr->tx_sc->hw, IXGBE_TDH(txr->tx_idx));
2402 	first = txr->tx_next_clean;
2403 	if (first == hdr)
2404 		goto done;
2405 	txr->tx_gc++;
2406 
2407 #ifdef INVARIANTS
2408 	avail = txr->tx_avail;
2409 #endif
2410 	while (first != hdr) {
2411 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2412 
2413 #ifdef INVARIANTS
2414 		KKASSERT(avail < txr->tx_ndesc);
2415 		++avail;
2416 #endif
2417 		if (txbuf->m_head != NULL)
2418 			ix_free_txbuf(txr, txbuf);
2419 		if (++first == txr->tx_ndesc)
2420 			first = 0;
2421 	}
2422 done:
2423 	if (txr->tx_nmbuf)
2424 		txr->tx_running = IX_TX_RUNNING;
2425 }
2426 
2427 static int
2428 ix_create_rx_ring(struct ix_rx_ring *rxr)
2429 {
2430 	int i, rsize, error, nrxd;
2431 
2432 	/*
2433 	 * Validate number of receive descriptors.  It must not exceed
2434 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2435 	 */
2436 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2437 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2438 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2439 		device_printf(rxr->rx_sc->dev,
2440 		    "Using %d RX descriptors instead of %d!\n",
2441 		    IX_DEF_RXD, nrxd);
2442 		rxr->rx_ndesc = IX_DEF_RXD;
2443 	} else {
2444 		rxr->rx_ndesc = nrxd;
2445 	}
2446 
2447 	/*
2448 	 * Allocate RX descriptor ring
2449 	 */
2450 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2451 	    IX_DBA_ALIGN);
2452 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2453 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2454 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2455 	if (rxr->rx_base == NULL) {
2456 		device_printf(rxr->rx_sc->dev,
2457 		    "Unable to allocate TX Descriptor memory\n");
2458 		return ENOMEM;
2459 	}
2460 
2461 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2462 	rxr->rx_buf = kmalloc(rsize, M_DEVBUF,
2463 			      M_WAITOK | M_ZERO | M_CACHEALIGN);
2464 
2465 	/*
2466 	 * Create DMA tag for RX buffers
2467 	 */
2468 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2469 	    1, 0,		/* alignment, bounds */
2470 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2471 	    BUS_SPACE_MAXADDR,	/* highaddr */
2472 	    NULL, NULL,		/* filter, filterarg */
2473 	    PAGE_SIZE,		/* maxsize */
2474 	    1,			/* nsegments */
2475 	    PAGE_SIZE,		/* maxsegsize */
2476 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2477 	    &rxr->rx_tag);
2478 	if (error) {
2479 		device_printf(rxr->rx_sc->dev,
2480 		    "Unable to create RX DMA tag\n");
2481 		kfree(rxr->rx_buf, M_DEVBUF);
2482 		rxr->rx_buf = NULL;
2483 		return error;
2484 	}
2485 
2486 	/*
2487 	 * Create spare DMA map for RX buffers
2488 	 */
2489 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2490 	    &rxr->rx_sparemap);
2491 	if (error) {
2492 		device_printf(rxr->rx_sc->dev,
2493 		    "Unable to create spare RX DMA map\n");
2494 		bus_dma_tag_destroy(rxr->rx_tag);
2495 		kfree(rxr->rx_buf, M_DEVBUF);
2496 		rxr->rx_buf = NULL;
2497 		return error;
2498 	}
2499 
2500 	/*
2501 	 * Create DMA maps for RX buffers
2502 	 */
2503 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2504 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2505 
2506 		error = bus_dmamap_create(rxr->rx_tag,
2507 		    BUS_DMA_WAITOK, &rxbuf->map);
2508 		if (error) {
2509 			device_printf(rxr->rx_sc->dev,
2510 			    "Unable to create RX dma map\n");
2511 			ix_destroy_rx_ring(rxr, i);
2512 			return error;
2513 		}
2514 	}
2515 
2516 	/*
2517 	 * Initialize various watermark
2518 	 */
2519 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2520 
2521 	return 0;
2522 }
2523 
2524 static void
2525 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2526 {
2527 	int i;
2528 
2529 	if (rxr->rx_base != NULL) {
2530 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2531 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2532 		    rxr->rx_base_map);
2533 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2534 		rxr->rx_base = NULL;
2535 	}
2536 
2537 	if (rxr->rx_buf == NULL)
2538 		return;
2539 
2540 	for (i = 0; i < ndesc; ++i) {
2541 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2542 
2543 		KKASSERT(rxbuf->m_head == NULL);
2544 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2545 	}
2546 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2547 	bus_dma_tag_destroy(rxr->rx_tag);
2548 
2549 	kfree(rxr->rx_buf, M_DEVBUF);
2550 	rxr->rx_buf = NULL;
2551 }
2552 
2553 /*
2554 ** Used to detect a descriptor that has
2555 ** been merged by Hardware RSC.
2556 */
2557 static __inline uint32_t
2558 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2559 {
2560 	return (le32toh(rx->wb.lower.lo_dword.data) &
2561 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2562 }
2563 
2564 #if 0
2565 /*********************************************************************
2566  *
2567  *  Initialize Hardware RSC (LRO) feature on 82599
2568  *  for an RX ring, this is toggled by the LRO capability
2569  *  even though it is transparent to the stack.
2570  *
2571  *  NOTE: since this HW feature only works with IPV4 and
2572  *        our testing has shown soft LRO to be as effective
2573  *        I have decided to disable this by default.
2574  *
2575  **********************************************************************/
2576 static void
2577 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2578 {
2579 	struct	ix_softc 	*sc = rxr->rx_sc;
2580 	struct	ixgbe_hw	*hw = &sc->hw;
2581 	uint32_t			rscctrl, rdrxctl;
2582 
2583 #if 0
2584 	/* If turning LRO/RSC off we need to disable it */
2585 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2586 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2587 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2588 		return;
2589 	}
2590 #endif
2591 
2592 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2593 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2594 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2595 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2596 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2597 
2598 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2599 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2600 	/*
2601 	** Limit the total number of descriptors that
2602 	** can be combined, so it does not exceed 64K
2603 	*/
2604 	if (rxr->mbuf_sz == MCLBYTES)
2605 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2606 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2607 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2608 	else if (rxr->mbuf_sz == MJUM9BYTES)
2609 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2610 	else  /* Using 16K cluster */
2611 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2612 
2613 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2614 
2615 	/* Enable TCP header recognition */
2616 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2617 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2618 	    IXGBE_PSRTYPE_TCPHDR));
2619 
2620 	/* Disable RSC for ACK packets */
2621 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2622 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2623 
2624 	rxr->hw_rsc = TRUE;
2625 }
2626 #endif
2627 
2628 static int
2629 ix_init_rx_ring(struct ix_rx_ring *rxr)
2630 {
2631 	int i;
2632 
2633 	/* Clear the ring contents */
2634 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2635 
2636 	/* XXX we need JUMPAGESIZE for RSC too */
2637 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2638 		rxr->rx_mbuf_sz = MCLBYTES;
2639 	else
2640 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2641 
2642 	/* Now replenish the mbufs */
2643 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2644 		int error;
2645 
2646 		error = ix_newbuf(rxr, i, TRUE);
2647 		if (error)
2648 			return error;
2649 	}
2650 
2651 	/* Setup our descriptor indices */
2652 	rxr->rx_next_check = 0;
2653 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2654 
2655 #if 0
2656 	/*
2657 	** Now set up the LRO interface:
2658 	*/
2659 	if (ixgbe_rsc_enable)
2660 		ix_setup_hw_rsc(rxr);
2661 #endif
2662 
2663 	return 0;
2664 }
2665 
2666 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2667 
2668 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2669 
2670 static void
2671 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2672 {
2673 	struct ixgbe_hw	*hw = &sc->hw;
2674 	struct ifnet *ifp = &sc->arpcom.ac_if;
2675 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2676 	int i;
2677 
2678 	/*
2679 	 * Make sure receives are disabled while setting up the descriptor ring
2680 	 */
2681 	ixgbe_disable_rx(hw);
2682 
2683 	/* Enable broadcasts */
2684 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2685 	fctrl |= IXGBE_FCTRL_BAM;
2686 	if (hw->mac.type == ixgbe_mac_82598EB) {
2687 		fctrl |= IXGBE_FCTRL_DPF;
2688 		fctrl |= IXGBE_FCTRL_PMCF;
2689 	}
2690 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2691 
2692 	/* Set for Jumbo Frames? */
2693 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2694 	if (ifp->if_mtu > ETHERMTU)
2695 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2696 	else
2697 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2698 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2699 
2700 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2701 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2702 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2703 
2704 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2705 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2706 		uint64_t rdba = rxr->rx_base_paddr;
2707 		uint32_t srrctl;
2708 
2709 		/* Setup the Base and Length of the Rx Descriptor Ring */
2710 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2711 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2712 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2713 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2714 
2715 		/*
2716 		 * Set up the SRRCTL register
2717 		 */
2718 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2719 
2720 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2721 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2722 		srrctl |= bufsz;
2723 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2724 		if (sc->rx_ring_inuse > 1) {
2725 			/* See the commend near ix_enable_rx_drop() */
2726 			if (sc->ifm_media &
2727 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2728 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2729 				if (i == 0 && bootverbose) {
2730 					if_printf(ifp, "flow control %s, "
2731 					    "disable RX drop\n",
2732 					    ix_ifmedia2str(sc->ifm_media));
2733 				}
2734 			} else {
2735 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2736 				if (i == 0 && bootverbose) {
2737 					if_printf(ifp, "flow control %s, "
2738 					    "enable RX drop\n",
2739 					    ix_ifmedia2str(sc->ifm_media));
2740 				}
2741 			}
2742 		}
2743 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2744 
2745 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2746 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2747 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2748 	}
2749 
2750 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2751 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2752 
2753 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2754 
2755 	/*
2756 	 * Setup RSS
2757 	 */
2758 	if (sc->rx_ring_inuse > 1) {
2759 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2760 		const struct if_ringmap *rm;
2761 		int j, r, nreta, table_nent;
2762 
2763 		/*
2764 		 * NOTE:
2765 		 * When we reach here, RSS has already been disabled
2766 		 * in ix_stop(), so we could safely configure RSS key
2767 		 * and redirect table.
2768 		 */
2769 
2770 		/*
2771 		 * Configure RSS key
2772 		 */
2773 		toeplitz_get_key(key, sizeof(key));
2774 		for (i = 0; i < IX_NRSSRK; ++i) {
2775 			uint32_t rssrk;
2776 
2777 			rssrk = IX_RSSRK_VAL(key, i);
2778 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2779 			    i, rssrk);
2780 
2781 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2782 		}
2783 
2784 		/*
2785 		 * Configure RSS redirect table.
2786 		 */
2787 
2788 		/* Table size will differ based on MAC */
2789 		switch (hw->mac.type) {
2790 		case ixgbe_mac_X550:
2791 		case ixgbe_mac_X550EM_x:
2792 		case ixgbe_mac_X550EM_a:
2793 			nreta = IX_NRETA_X550;
2794 			break;
2795 		default:
2796 			nreta = IX_NRETA;
2797 			break;
2798 		}
2799 
2800 		table_nent = nreta * IX_RETA_SIZE;
2801 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2802 		    ("invalid RETA count %d", nreta));
2803 		if (polling)
2804 			rm = sc->rx_rmap;
2805 		else
2806 			rm = sc->rx_rmap_intr;
2807 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2808 
2809 		r = 0;
2810 		for (j = 0; j < nreta; ++j) {
2811 			uint32_t reta = 0;
2812 
2813 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2814 				uint32_t q;
2815 
2816 				q = sc->rdr_table[r];
2817 				KASSERT(q < sc->rx_ring_inuse,
2818 				    ("invalid RX ring index %d", q));
2819 				reta |= q << (8 * i);
2820 				++r;
2821 			}
2822 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2823 			if (j < IX_NRETA) {
2824 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2825 			} else {
2826 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2827 				    reta);
2828 			}
2829 		}
2830 
2831 		/*
2832 		 * Enable multiple receive queues.
2833 		 * Enable IPv4 RSS standard hash functions.
2834 		 */
2835 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2836 		    IXGBE_MRQC_RSSEN |
2837 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2838 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2839 
2840 		/*
2841 		 * NOTE:
2842 		 * PCSD must be enabled to enable multiple
2843 		 * receive queues.
2844 		 */
2845 		rxcsum |= IXGBE_RXCSUM_PCSD;
2846 	}
2847 
2848 	if (ifp->if_capenable & IFCAP_RXCSUM)
2849 		rxcsum |= IXGBE_RXCSUM_PCSD;
2850 
2851 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2852 }
2853 
2854 static __inline void
2855 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2856 {
2857 	if (--i < 0)
2858 		i = rxr->rx_ndesc - 1;
2859 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2860 }
2861 
2862 static __inline void
2863 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2864 {
2865 	if ((ptype &
2866 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2867 		/* Not IPv4 */
2868 		return;
2869 	}
2870 
2871 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2872 	    IXGBE_RXD_STAT_IPCS)
2873 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2874 
2875 	if ((ptype &
2876 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2877 		/*
2878 		 * - Neither TCP nor UDP
2879 		 * - IPv4 fragment
2880 		 */
2881 		return;
2882 	}
2883 
2884 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2885 	    IXGBE_RXD_STAT_L4CS) {
2886 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2887 		    CSUM_FRAG_NOT_CHECKED;
2888 		mp->m_pkthdr.csum_data = htons(0xffff);
2889 	}
2890 }
2891 
2892 static __inline struct pktinfo *
2893 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2894     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2895 {
2896 	switch (hashtype) {
2897 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2898 		pi->pi_netisr = NETISR_IP;
2899 		pi->pi_flags = 0;
2900 		pi->pi_l3proto = IPPROTO_TCP;
2901 		break;
2902 
2903 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2904 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2905 			/* Not UDP or is fragment */
2906 			return NULL;
2907 		}
2908 		pi->pi_netisr = NETISR_IP;
2909 		pi->pi_flags = 0;
2910 		pi->pi_l3proto = IPPROTO_UDP;
2911 		break;
2912 
2913 	default:
2914 		return NULL;
2915 	}
2916 
2917 	m_sethash(m, toeplitz_hash(hash));
2918 	return pi;
2919 }
2920 
2921 static __inline void
2922 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2923 {
2924 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2925 	rxd->wb.upper.status_error = 0;
2926 }
2927 
2928 static void
2929 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2930 {
2931 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2932 
2933 	/*
2934 	 * XXX discard may not be correct
2935 	 */
2936 	if (eop) {
2937 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2938 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2939 	} else {
2940 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2941 	}
2942 	if (rxbuf->fmp != NULL) {
2943 		m_freem(rxbuf->fmp);
2944 		rxbuf->fmp = NULL;
2945 		rxbuf->lmp = NULL;
2946 	}
2947 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2948 }
2949 
2950 static void
2951 ix_rxeof(struct ix_rx_ring *rxr, int count)
2952 {
2953 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2954 	int i, nsegs = 0, cpuid = mycpuid;
2955 
2956 	i = rxr->rx_next_check;
2957 	while (count != 0) {
2958 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2959 		union ixgbe_adv_rx_desc	*cur;
2960 		struct mbuf *sendmp = NULL, *mp;
2961 		struct pktinfo *pi = NULL, pi0;
2962 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2963 		uint16_t len;
2964 		boolean_t eop;
2965 
2966 		cur = &rxr->rx_base[i];
2967 		staterr = le32toh(cur->wb.upper.status_error);
2968 
2969 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2970 			break;
2971 		++nsegs;
2972 
2973 		rxbuf = &rxr->rx_buf[i];
2974 		mp = rxbuf->m_head;
2975 
2976 		len = le16toh(cur->wb.upper.length);
2977 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2978 		    IXGBE_RXDADV_PKTTYPE_MASK;
2979 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2980 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2981 		    IXGBE_RXDADV_RSSTYPE_MASK;
2982 
2983 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2984 		if (eop)
2985 			--count;
2986 
2987 		/*
2988 		 * Make sure bad packets are discarded
2989 		 */
2990 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2991 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2992 			ix_rx_discard(rxr, i, eop);
2993 			goto next_desc;
2994 		}
2995 
2996 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2997 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2998 			ix_rx_discard(rxr, i, eop);
2999 			goto next_desc;
3000 		}
3001 
3002 		/*
3003 		 * On 82599 which supports a hardware LRO, packets
3004 		 * need not be fragmented across sequential descriptors,
3005 		 * rather the next descriptor is indicated in bits
3006 		 * of the descriptor.  This also means that we might
3007 		 * proceses more than one packet at a time, something
3008 		 * that has never been true before, it required
3009 		 * eliminating global chain pointers in favor of what
3010 		 * we are doing here.
3011 		 */
3012 		if (!eop) {
3013 			int nextp;
3014 
3015 			/*
3016 			 * Figure out the next descriptor
3017 			 * of this frame.
3018 			 */
3019 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
3020 				rsc = ix_rsc_count(cur);
3021 			if (rsc) { /* Get hardware index */
3022 				nextp = ((staterr &
3023 				    IXGBE_RXDADV_NEXTP_MASK) >>
3024 				    IXGBE_RXDADV_NEXTP_SHIFT);
3025 			} else { /* Just sequential */
3026 				nextp = i + 1;
3027 				if (nextp == rxr->rx_ndesc)
3028 					nextp = 0;
3029 			}
3030 			nbuf = &rxr->rx_buf[nextp];
3031 			prefetch(nbuf);
3032 		}
3033 		mp->m_len = len;
3034 
3035 		/*
3036 		 * Rather than using the fmp/lmp global pointers
3037 		 * we now keep the head of a packet chain in the
3038 		 * buffer struct and pass this along from one
3039 		 * descriptor to the next, until we get EOP.
3040 		 */
3041 		if (rxbuf->fmp == NULL) {
3042 			mp->m_pkthdr.len = len;
3043 			rxbuf->fmp = mp;
3044 			rxbuf->lmp = mp;
3045 		} else {
3046 			rxbuf->fmp->m_pkthdr.len += len;
3047 			rxbuf->lmp->m_next = mp;
3048 			rxbuf->lmp = mp;
3049 		}
3050 
3051 		if (nbuf != NULL) {
3052 			/*
3053 			 * Not the last fragment of this frame,
3054 			 * pass this fragment list on
3055 			 */
3056 			nbuf->fmp = rxbuf->fmp;
3057 			nbuf->lmp = rxbuf->lmp;
3058 		} else {
3059 			/*
3060 			 * Send this frame
3061 			 */
3062 			sendmp = rxbuf->fmp;
3063 
3064 			sendmp->m_pkthdr.rcvif = ifp;
3065 			IFNET_STAT_INC(ifp, ipackets, 1);
3066 #ifdef IX_RSS_DEBUG
3067 			rxr->rx_pkts++;
3068 #endif
3069 
3070 			/* Process vlan info */
3071 			if (staterr & IXGBE_RXD_STAT_VP) {
3072 				sendmp->m_pkthdr.ether_vlantag =
3073 				    le16toh(cur->wb.upper.vlan);
3074 				sendmp->m_flags |= M_VLANTAG;
3075 			}
3076 			if (ifp->if_capenable & IFCAP_RXCSUM)
3077 				ix_rxcsum(staterr, sendmp, ptype);
3078 			if (ifp->if_capenable & IFCAP_RSS) {
3079 				pi = ix_rssinfo(sendmp, &pi0,
3080 				    hash, hashtype, ptype);
3081 			}
3082 		}
3083 		rxbuf->fmp = NULL;
3084 		rxbuf->lmp = NULL;
3085 next_desc:
3086 		/* Advance our pointers to the next descriptor. */
3087 		if (++i == rxr->rx_ndesc)
3088 			i = 0;
3089 
3090 		if (sendmp != NULL)
3091 			ifp->if_input(ifp, sendmp, pi, cpuid);
3092 
3093 		if (nsegs >= rxr->rx_wreg_nsegs) {
3094 			ix_rx_refresh(rxr, i);
3095 			nsegs = 0;
3096 		}
3097 	}
3098 	rxr->rx_next_check = i;
3099 
3100 	if (nsegs > 0)
3101 		ix_rx_refresh(rxr, i);
3102 }
3103 
3104 static void
3105 ix_set_vlan(struct ix_softc *sc)
3106 {
3107 	struct ixgbe_hw *hw = &sc->hw;
3108 	uint32_t ctrl;
3109 
3110 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
3111 		return;
3112 
3113 	if (hw->mac.type == ixgbe_mac_82598EB) {
3114 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3115 		ctrl |= IXGBE_VLNCTRL_VME;
3116 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
3117 	} else {
3118 		int i;
3119 
3120 		/*
3121 		 * On 82599 and later chips the VLAN enable is
3122 		 * per queue in RXDCTL
3123 		 */
3124 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3125 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3126 			ctrl |= IXGBE_RXDCTL_VME;
3127 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
3128 		}
3129 	}
3130 }
3131 
3132 static void
3133 ix_enable_intr(struct ix_softc *sc)
3134 {
3135 	struct ixgbe_hw	*hw = &sc->hw;
3136 	uint32_t fwsm;
3137 	int i;
3138 
3139 	for (i = 0; i < sc->intr_cnt; ++i)
3140 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
3141 
3142 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
3143 
3144 	switch (hw->mac.type) {
3145 	case ixgbe_mac_82599EB:
3146 		sc->intr_mask |= IXGBE_EIMS_ECC;
3147 		/* Temperature sensor on some adapters */
3148 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
3149 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
3150 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3151 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
3152 		break;
3153 
3154 	case ixgbe_mac_X540:
3155 		sc->intr_mask |= IXGBE_EIMS_ECC;
3156 		/* Detect if Thermal Sensor is enabled */
3157 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
3158 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
3159 			sc->intr_mask |= IXGBE_EIMS_TS;
3160 		break;
3161 
3162 	case ixgbe_mac_X550:
3163 		sc->intr_mask |= IXGBE_EIMS_ECC;
3164 		/* MAC thermal sensor is automatically enabled */
3165 		sc->intr_mask |= IXGBE_EIMS_TS;
3166 		break;
3167 
3168 	case ixgbe_mac_X550EM_a:
3169 	case ixgbe_mac_X550EM_x:
3170 		sc->intr_mask |= IXGBE_EIMS_ECC;
3171 		/* Some devices use SDP0 for important information */
3172 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3173 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
3174 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N ||
3175 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
3176 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
3177 		if (hw->phy.type == ixgbe_phy_x550em_ext_t)
3178 			sc->intr_mask |= IXGBE_EICR_GPI_SDP0_X540;
3179 		break;
3180 
3181 	default:
3182 		break;
3183 	}
3184 
3185 	/* Enable Fan Failure detection */
3186 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
3187 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3188 
3189 	/* With MSI-X we use auto clear for RX and TX rings */
3190 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3191 		/*
3192 		 * There are no EIAC1/EIAC2 for newer chips; the related
3193 		 * bits for TX and RX rings > 16 are always auto clear.
3194 		 *
3195 		 * XXX which bits?  There are _no_ documented EICR1 and
3196 		 * EICR2 at all; only EICR.
3197 		 */
3198 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
3199 	} else {
3200 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
3201 
3202 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3203 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3204 			sc->intr_mask |= IX_RX1_INTR_MASK;
3205 	}
3206 
3207 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3208 
3209 	/*
3210 	 * Enable RX and TX rings for MSI-X
3211 	 */
3212 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3213 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3214 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3215 
3216 			if (txr->tx_intr_vec >= 0) {
3217 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3218 				    txr->tx_eims_val);
3219 			}
3220 		}
3221 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3222 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3223 
3224 			KKASSERT(rxr->rx_intr_vec >= 0);
3225 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3226 		}
3227 	}
3228 
3229 	IXGBE_WRITE_FLUSH(hw);
3230 }
3231 
3232 static void
3233 ix_disable_intr(struct ix_softc *sc)
3234 {
3235 	int i;
3236 
3237 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3238 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3239 
3240 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3241 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3242 	} else {
3243 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3244 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3245 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3246 	}
3247 	IXGBE_WRITE_FLUSH(&sc->hw);
3248 
3249 	for (i = 0; i < sc->intr_cnt; ++i)
3250 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3251 }
3252 
3253 static void
3254 ix_slot_info(struct ix_softc *sc)
3255 {
3256 	device_t dev = sc->dev;
3257 	struct ixgbe_hw *hw = &sc->hw;
3258 	uint32_t offset;
3259 	uint16_t link;
3260 	boolean_t bus_info_valid = TRUE;
3261 
3262 	/* Some devices are behind an internal bridge */
3263 	switch (hw->device_id) {
3264 	case IXGBE_DEV_ID_82599_SFP_SF_QP:
3265 	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
3266 		goto get_parent_info;
3267 	default:
3268 		break;
3269 	}
3270 
3271 	ixgbe_get_bus_info(hw);
3272 
3273 	/*
3274 	 * Some devices don't use PCI-E, but there is no need
3275 	 * to display "Unknown" for bus speed and width.
3276 	 */
3277 	switch (hw->mac.type) {
3278 	case ixgbe_mac_X550EM_x:
3279 	case ixgbe_mac_X550EM_a:
3280 		return;
3281 	default:
3282 		goto display;
3283 	}
3284 
3285 get_parent_info:
3286 	/*
3287 	 * For the Quad port adapter we need to parse back up
3288 	 * the PCI tree to find the speed of the expansion slot
3289 	 * into which this adapter is plugged.  A bit more work.
3290 	 */
3291 	dev = device_get_parent(device_get_parent(dev));
3292 #ifdef IXGBE_DEBUG
3293 	device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev),
3294 	    pci_get_slot(dev), pci_get_function(dev));
3295 #endif
3296 	dev = device_get_parent(device_get_parent(dev));
3297 #ifdef IXGBE_DEBUG
3298 	device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev),
3299 	    pci_get_slot(dev), pci_get_function(dev));
3300 #endif
3301 	/* Now get the PCI Express Capabilities offset */
3302 	offset = pci_get_pciecap_ptr(dev);
3303 	if (offset == 0) {
3304 		/*
3305 		 * Hmm...can't get PCI-Express capabilities.
3306 		 * Falling back to default method.
3307 		 */
3308 		bus_info_valid = FALSE;
3309 		ixgbe_get_bus_info(hw);
3310 		goto display;
3311 	}
3312 	/* ...and read the Link Status Register */
3313 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3314 	ixgbe_set_pci_config_data_generic(hw, link);
3315 
3316 display:
3317 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3318 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3319 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3320 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3321 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3322 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3323 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3324 
3325 	if (bus_info_valid) {
3326 		if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3327 		    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3328 		    hw->bus.speed == ixgbe_bus_speed_2500) {
3329 			device_printf(dev, "PCI-Express bandwidth available "
3330 			    "for this card is not sufficient for optimal "
3331 			    "performance.\n");
3332 			device_printf(dev, "For optimal performance a "
3333 			    "x8 PCIE, or x4 PCIE Gen2 slot is required.\n");
3334 		}
3335 		if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3336 		    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3337 		    hw->bus.speed < ixgbe_bus_speed_8000) {
3338 			device_printf(dev, "PCI-Express bandwidth available "
3339 			    "for this card is not sufficient for optimal "
3340 			    "performance.\n");
3341 			device_printf(dev, "For optimal performance a "
3342 			    "x8 PCIE Gen3 slot is required.\n");
3343 		}
3344 	} else {
3345 		device_printf(dev, "Unable to determine slot speed/width.  "
3346 		    "The speed/width reported are that of the internal "
3347 		    "switch.\n");
3348 	}
3349 }
3350 
3351 /*
3352  * TODO comment is incorrect
3353  *
3354  * Setup the correct IVAR register for a particular MSIX interrupt
3355  * - entry is the register array entry
3356  * - vector is the MSIX vector for this queue
3357  * - type is RX/TX/MISC
3358  */
3359 static void
3360 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3361     int8_t type)
3362 {
3363 	struct ixgbe_hw *hw = &sc->hw;
3364 	uint32_t ivar, index;
3365 
3366 	vector |= IXGBE_IVAR_ALLOC_VAL;
3367 
3368 	switch (hw->mac.type) {
3369 	case ixgbe_mac_82598EB:
3370 		if (type == -1)
3371 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3372 		else
3373 			entry += (type * 64);
3374 		index = (entry >> 2) & 0x1F;
3375 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3376 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3377 		ivar |= (vector << (8 * (entry & 0x3)));
3378 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3379 		break;
3380 
3381 	case ixgbe_mac_82599EB:
3382 	case ixgbe_mac_X540:
3383 	case ixgbe_mac_X550:
3384 	case ixgbe_mac_X550EM_a:
3385 	case ixgbe_mac_X550EM_x:
3386 		if (type == -1) { /* MISC IVAR */
3387 			index = (entry & 1) * 8;
3388 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3389 			ivar &= ~(0xFF << index);
3390 			ivar |= (vector << index);
3391 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3392 		} else {	/* RX/TX IVARS */
3393 			index = (16 * (entry & 1)) + (8 * type);
3394 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3395 			ivar &= ~(0xFF << index);
3396 			ivar |= (vector << index);
3397 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3398 		}
3399 		/* FALL THROUGH */
3400 	default:
3401 		break;
3402 	}
3403 }
3404 
3405 static boolean_t
3406 ix_sfp_probe(struct ix_softc *sc)
3407 {
3408 	struct ixgbe_hw	*hw = &sc->hw;
3409 
3410 	if (hw->phy.type == ixgbe_phy_nl &&
3411 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3412 		int32_t ret;
3413 
3414 		ret = hw->phy.ops.identify_sfp(hw);
3415 		if (ret)
3416 			return FALSE;
3417 
3418 		ret = hw->phy.ops.reset(hw);
3419 		sc->sfp_probe = FALSE;
3420 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3421 			if_printf(&sc->arpcom.ac_if,
3422 			     "Unsupported SFP+ module detected!  "
3423 			     "Reload driver with supported module.\n");
3424 			return FALSE;
3425 		}
3426 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3427 
3428 		/* We now have supported optics */
3429 		return TRUE;
3430 	}
3431 	return FALSE;
3432 }
3433 
3434 static void
3435 ix_handle_link(struct ix_softc *sc)
3436 {
3437 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3438 	ix_update_link_status(sc);
3439 }
3440 
3441 /*
3442  * Handling SFP module
3443  */
3444 static void
3445 ix_handle_mod(struct ix_softc *sc)
3446 {
3447 	struct ixgbe_hw *hw = &sc->hw;
3448 	uint32_t err;
3449 
3450 	if (sc->hw.need_crosstalk_fix) {
3451 		uint32_t cage_full = 0;
3452 
3453 		switch (hw->mac.type) {
3454 		case ixgbe_mac_82599EB:
3455 			cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
3456 			    IXGBE_ESDP_SDP2;
3457 			break;
3458 
3459 		case ixgbe_mac_X550EM_x:
3460 		case ixgbe_mac_X550EM_a:
3461 			cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
3462 			    IXGBE_ESDP_SDP0;
3463 			break;
3464 
3465 		default:
3466 			break;
3467 		}
3468 
3469 		if (!cage_full)
3470 			return;
3471 	}
3472 
3473 	err = hw->phy.ops.identify_sfp(hw);
3474 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3475 		if_printf(&sc->arpcom.ac_if,
3476 		    "Unsupported SFP+ module type was detected.\n");
3477 		return;
3478 	}
3479 
3480 	if (hw->mac.type == ixgbe_mac_82598EB)
3481 		err = hw->phy.ops.reset(hw);
3482 	else
3483 		err = hw->mac.ops.setup_sfp(hw);
3484 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3485 		if_printf(&sc->arpcom.ac_if,
3486 		    "Setup failure - unsupported SFP+ module type.\n");
3487 		return;
3488 	}
3489 	ix_handle_msf(sc);
3490 }
3491 
3492 /*
3493  * Handling MSF (multispeed fiber)
3494  */
3495 static void
3496 ix_handle_msf(struct ix_softc *sc)
3497 {
3498 	struct ixgbe_hw *hw = &sc->hw;
3499 	uint32_t autoneg;
3500 
3501 	sc->phy_layer = ixgbe_get_supported_physical_layer(hw);
3502 	ix_init_media(sc);
3503 
3504 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3505 		autoneg = sc->advspeed;
3506 	else
3507 		autoneg = hw->phy.autoneg_advertised;
3508 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3509 		bool negotiate;
3510 
3511 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3512 	}
3513 	if (hw->mac.ops.setup_link != NULL)
3514 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3515 }
3516 
3517 static void
3518 ix_handle_phy(struct ix_softc *sc)
3519 {
3520 	struct ixgbe_hw *hw = &sc->hw;
3521 	int error;
3522 
3523 	error = hw->phy.ops.handle_lasi(hw);
3524 	if (error == IXGBE_ERR_OVERTEMP) {
3525 		if_printf(&sc->arpcom.ac_if,
3526 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3527 		    "PHY will downshift to lower power state!\n");
3528 	} else if (error) {
3529 		if_printf(&sc->arpcom.ac_if,
3530 		    "Error handling LASI interrupt: %d\n", error);
3531 	}
3532 }
3533 
3534 static void
3535 ix_update_stats(struct ix_softc *sc)
3536 {
3537 	struct ifnet *ifp = &sc->arpcom.ac_if;
3538 	struct ixgbe_hw *hw = &sc->hw;
3539 	struct ixgbe_hw_stats *stats = &sc->stats;
3540 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3541 	uint64_t total_missed_rx = 0;
3542 	int i;
3543 
3544 	stats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3545 	stats->illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3546 	stats->errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3547 	stats->mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3548 	stats->mpc[0] += IXGBE_READ_REG(hw, IXGBE_MPC(0));
3549 
3550 	for (i = 0; i < 16; i++) {
3551 		stats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3552 		stats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3553 		stats->qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3554 	}
3555 	stats->mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3556 	stats->mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3557 	stats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3558 
3559 	/* Hardware workaround, gprc counts missed packets */
3560 	stats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3561 	stats->gprc -= missed_rx;
3562 
3563 	if (hw->mac.type != ixgbe_mac_82598EB) {
3564 		stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3565 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3566 		stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3567 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3568 		stats->tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3569 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3570 		stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3571 		stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3572 	} else {
3573 		stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3574 		stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3575 		/* 82598 only has a counter in the high register */
3576 		stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3577 		stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3578 		stats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3579 	}
3580 
3581 	/*
3582 	 * Workaround: mprc hardware is incorrectly counting
3583 	 * broadcasts, so for now we subtract those.
3584 	 */
3585 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3586 	stats->bprc += bprc;
3587 	stats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3588 	if (hw->mac.type == ixgbe_mac_82598EB)
3589 		stats->mprc -= bprc;
3590 
3591 	stats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3592 	stats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3593 	stats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3594 	stats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3595 	stats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3596 	stats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3597 
3598 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3599 	stats->lxontxc += lxon;
3600 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3601 	stats->lxofftxc += lxoff;
3602 	total = lxon + lxoff;
3603 
3604 	stats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3605 	stats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3606 	stats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3607 	stats->gptc -= total;
3608 	stats->mptc -= total;
3609 	stats->ptc64 -= total;
3610 	stats->gotc -= total * ETHER_MIN_LEN;
3611 
3612 	stats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3613 	stats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3614 	stats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3615 	stats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3616 	stats->mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3617 	stats->mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3618 	stats->mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3619 	stats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3620 	stats->tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3621 	stats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3622 	stats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3623 	stats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3624 	stats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3625 	stats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3626 	stats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3627 	stats->xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3628 	stats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3629 	stats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3630 	/* Only read FCOE on 82599 */
3631 	if (hw->mac.type != ixgbe_mac_82598EB) {
3632 		stats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3633 		stats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3634 		stats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3635 		stats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3636 		stats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3637 	}
3638 
3639 	/* Rx Errors */
3640 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3641 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3642 }
3643 
3644 #if 0
3645 /*
3646  * Add sysctl variables, one per statistic, to the system.
3647  */
3648 static void
3649 ix_add_hw_stats(struct ix_softc *sc)
3650 {
3651 
3652 	device_t dev = sc->dev;
3653 
3654 	struct ix_tx_ring *txr = sc->tx_rings;
3655 	struct ix_rx_ring *rxr = sc->rx_rings;
3656 
3657 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3658 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3659 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3660 	struct ixgbe_hw_stats *stats = &sc->stats;
3661 
3662 	struct sysctl_oid *stat_node, *queue_node;
3663 	struct sysctl_oid_list *stat_list, *queue_list;
3664 
3665 #define QUEUE_NAME_LEN 32
3666 	char namebuf[QUEUE_NAME_LEN];
3667 
3668 	/* MAC stats get the own sub node */
3669 
3670 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3671 				    CTLFLAG_RD, NULL, "MAC Statistics");
3672 	stat_list = SYSCTL_CHILDREN(stat_node);
3673 
3674 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3675 			CTLFLAG_RD, &stats->crcerrs,
3676 			"CRC Errors");
3677 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3678 			CTLFLAG_RD, &stats->illerrc,
3679 			"Illegal Byte Errors");
3680 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3681 			CTLFLAG_RD, &stats->errbc,
3682 			"Byte Errors");
3683 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3684 			CTLFLAG_RD, &stats->mspdc,
3685 			"MAC Short Packets Discarded");
3686 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3687 			CTLFLAG_RD, &stats->mlfc,
3688 			"MAC Local Faults");
3689 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3690 			CTLFLAG_RD, &stats->mrfc,
3691 			"MAC Remote Faults");
3692 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3693 			CTLFLAG_RD, &stats->rlec,
3694 			"Receive Length Errors");
3695 
3696 	/* Flow Control stats */
3697 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3698 			CTLFLAG_RD, &stats->lxontxc,
3699 			"Link XON Transmitted");
3700 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3701 			CTLFLAG_RD, &stats->lxonrxc,
3702 			"Link XON Received");
3703 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3704 			CTLFLAG_RD, &stats->lxofftxc,
3705 			"Link XOFF Transmitted");
3706 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3707 			CTLFLAG_RD, &stats->lxoffrxc,
3708 			"Link XOFF Received");
3709 
3710 	/* Packet Reception Stats */
3711 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3712 			CTLFLAG_RD, &stats->tor,
3713 			"Total Octets Received");
3714 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3715 			CTLFLAG_RD, &stats->gorc,
3716 			"Good Octets Received");
3717 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3718 			CTLFLAG_RD, &stats->tpr,
3719 			"Total Packets Received");
3720 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3721 			CTLFLAG_RD, &stats->gprc,
3722 			"Good Packets Received");
3723 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3724 			CTLFLAG_RD, &stats->mprc,
3725 			"Multicast Packets Received");
3726 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3727 			CTLFLAG_RD, &stats->bprc,
3728 			"Broadcast Packets Received");
3729 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3730 			CTLFLAG_RD, &stats->prc64,
3731 			"64 byte frames received ");
3732 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3733 			CTLFLAG_RD, &stats->prc127,
3734 			"65-127 byte frames received");
3735 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3736 			CTLFLAG_RD, &stats->prc255,
3737 			"128-255 byte frames received");
3738 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3739 			CTLFLAG_RD, &stats->prc511,
3740 			"256-511 byte frames received");
3741 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3742 			CTLFLAG_RD, &stats->prc1023,
3743 			"512-1023 byte frames received");
3744 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3745 			CTLFLAG_RD, &stats->prc1522,
3746 			"1023-1522 byte frames received");
3747 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3748 			CTLFLAG_RD, &stats->ruc,
3749 			"Receive Undersized");
3750 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3751 			CTLFLAG_RD, &stats->rfc,
3752 			"Fragmented Packets Received ");
3753 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3754 			CTLFLAG_RD, &stats->roc,
3755 			"Oversized Packets Received");
3756 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3757 			CTLFLAG_RD, &stats->rjc,
3758 			"Received Jabber");
3759 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3760 			CTLFLAG_RD, &stats->mngprc,
3761 			"Management Packets Received");
3762 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3763 			CTLFLAG_RD, &stats->mngptc,
3764 			"Management Packets Dropped");
3765 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3766 			CTLFLAG_RD, &stats->xec,
3767 			"Checksum Errors");
3768 
3769 	/* Packet Transmission Stats */
3770 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3771 			CTLFLAG_RD, &stats->gotc,
3772 			"Good Octets Transmitted");
3773 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3774 			CTLFLAG_RD, &stats->tpt,
3775 			"Total Packets Transmitted");
3776 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3777 			CTLFLAG_RD, &stats->gptc,
3778 			"Good Packets Transmitted");
3779 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3780 			CTLFLAG_RD, &stats->bptc,
3781 			"Broadcast Packets Transmitted");
3782 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3783 			CTLFLAG_RD, &stats->mptc,
3784 			"Multicast Packets Transmitted");
3785 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3786 			CTLFLAG_RD, &stats->mngptc,
3787 			"Management Packets Transmitted");
3788 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3789 			CTLFLAG_RD, &stats->ptc64,
3790 			"64 byte frames transmitted ");
3791 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3792 			CTLFLAG_RD, &stats->ptc127,
3793 			"65-127 byte frames transmitted");
3794 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3795 			CTLFLAG_RD, &stats->ptc255,
3796 			"128-255 byte frames transmitted");
3797 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3798 			CTLFLAG_RD, &stats->ptc511,
3799 			"256-511 byte frames transmitted");
3800 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3801 			CTLFLAG_RD, &stats->ptc1023,
3802 			"512-1023 byte frames transmitted");
3803 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3804 			CTLFLAG_RD, &stats->ptc1522,
3805 			"1024-1522 byte frames transmitted");
3806 }
3807 #endif
3808 
3809 /*
3810  * Enable the hardware to drop packets when the buffer is full.
3811  * This is useful when multiple RX rings are used, so that no
3812  * single RX ring being full stalls the entire RX engine.  We
3813  * only enable this when multiple RX rings are used and when
3814  * flow control is disabled.
3815  */
3816 static void
3817 ix_enable_rx_drop(struct ix_softc *sc)
3818 {
3819 	struct ixgbe_hw *hw = &sc->hw;
3820 	int i;
3821 
3822 	if (bootverbose) {
3823 		if_printf(&sc->arpcom.ac_if,
3824 		    "flow control %s, enable RX drop\n",
3825 		    ix_fc2str(sc->hw.fc.current_mode));
3826 	}
3827 
3828 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3829 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3830 
3831 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3832 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3833 	}
3834 }
3835 
3836 static void
3837 ix_disable_rx_drop(struct ix_softc *sc)
3838 {
3839 	struct ixgbe_hw *hw = &sc->hw;
3840 	int i;
3841 
3842 	if (bootverbose) {
3843 		if_printf(&sc->arpcom.ac_if,
3844 		    "flow control %s, disable RX drop\n",
3845 		    ix_fc2str(sc->hw.fc.current_mode));
3846 	}
3847 
3848 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3849 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3850 
3851 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3852 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3853 	}
3854 }
3855 
3856 static void
3857 ix_setup_serialize(struct ix_softc *sc)
3858 {
3859 	int i = 0, j;
3860 
3861 	/* Main + RX + TX */
3862 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3863 	sc->serializes =
3864 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3865 	        M_DEVBUF, M_WAITOK | M_ZERO);
3866 
3867 	/*
3868 	 * Setup serializes
3869 	 *
3870 	 * NOTE: Order is critical
3871 	 */
3872 
3873 	KKASSERT(i < sc->nserialize);
3874 	sc->serializes[i++] = &sc->main_serialize;
3875 
3876 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3877 		KKASSERT(i < sc->nserialize);
3878 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3879 	}
3880 
3881 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3882 		KKASSERT(i < sc->nserialize);
3883 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3884 	}
3885 
3886 	KKASSERT(i == sc->nserialize);
3887 }
3888 
3889 static int
3890 ix_alloc_intr(struct ix_softc *sc)
3891 {
3892 	struct ix_intr_data *intr;
3893 	struct ix_tx_ring *txr;
3894 	u_int intr_flags;
3895 	int i;
3896 
3897 	ix_alloc_msix(sc);
3898 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3899 		ix_set_ring_inuse(sc, FALSE);
3900 		goto done;
3901 	}
3902 
3903 	/*
3904 	 * Reset some settings changed by ix_alloc_msix().
3905 	 */
3906 	if (sc->rx_rmap_intr != NULL) {
3907 		if_ringmap_free(sc->rx_rmap_intr);
3908 		sc->rx_rmap_intr = NULL;
3909 	}
3910 	if (sc->tx_rmap_intr != NULL) {
3911 		if_ringmap_free(sc->tx_rmap_intr);
3912 		sc->tx_rmap_intr = NULL;
3913 	}
3914 	if (sc->intr_data != NULL) {
3915 		kfree(sc->intr_data, M_DEVBUF);
3916 		sc->intr_data = NULL;
3917 	}
3918 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3919 		txr = &sc->tx_rings[i];
3920 		txr->tx_intr_vec = -1;
3921 		txr->tx_intr_cpuid = -1;
3922 	}
3923 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3924 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3925 
3926 		rxr->rx_intr_vec = -1;
3927 		rxr->rx_txr = NULL;
3928 	}
3929 
3930 	sc->intr_cnt = 1;
3931 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3932 	    M_WAITOK | M_ZERO);
3933 	intr = &sc->intr_data[0];
3934 
3935 	/*
3936 	 * Allocate MSI/legacy interrupt resource
3937 	 */
3938 	if (sc->caps & IX_CAP_LEGACY_INTR) {
3939 		sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3940 		    &intr->intr_rid, &intr_flags);
3941 	} else {
3942 		int cpu;
3943 
3944 		/*
3945 		 * Only MSI is supported.
3946 		 */
3947 		cpu = device_getenv_int(sc->dev, "msi.cpu", -1);
3948 		if (cpu >= ncpus)
3949 			cpu = ncpus - 1;
3950 
3951 		if (pci_alloc_msi(sc->dev, &intr->intr_rid, 1, cpu) == 0) {
3952 			sc->intr_type = PCI_INTR_TYPE_MSI;
3953 			intr_flags = RF_ACTIVE;
3954 		} else {
3955 			sc->intr_type = PCI_INTR_TYPE_LEGACY;
3956 			device_printf(sc->dev, "Unable to allocate MSI\n");
3957 			return ENXIO;
3958 		}
3959 	}
3960 
3961 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3962 	    &intr->intr_rid, intr_flags);
3963 	if (intr->intr_res == NULL) {
3964 		device_printf(sc->dev, "Unable to allocate bus resource: "
3965 		    "interrupt\n");
3966 		return ENXIO;
3967 	}
3968 
3969 	intr->intr_serialize = &sc->main_serialize;
3970 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3971 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
3972 		intr->intr_func = ix_intr_82598;
3973 	else
3974 		intr->intr_func = ix_intr;
3975 	intr->intr_funcarg = sc;
3976 	intr->intr_rate = IX_INTR_RATE;
3977 	intr->intr_use = IX_INTR_USE_RXTX;
3978 
3979 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3980 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3981 
3982 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3983 
3984 	ix_set_ring_inuse(sc, FALSE);
3985 
3986 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3987 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3988 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3989 
3990 		/*
3991 		 * Allocate RX ring map for RSS setup.
3992 		 */
3993 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3994 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3995 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3996 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3997 	}
3998 done:
3999 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
4000 		txr = &sc->tx_rings[i];
4001 		if (txr->tx_intr_cpuid < 0)
4002 			txr->tx_intr_cpuid = 0;
4003 	}
4004 	return 0;
4005 }
4006 
4007 static void
4008 ix_free_intr(struct ix_softc *sc)
4009 {
4010 	if (sc->intr_data == NULL)
4011 		return;
4012 
4013 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4014 		struct ix_intr_data *intr = &sc->intr_data[0];
4015 
4016 		KKASSERT(sc->intr_cnt == 1);
4017 		if (intr->intr_res != NULL) {
4018 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4019 			    intr->intr_rid, intr->intr_res);
4020 		}
4021 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
4022 			pci_release_msi(sc->dev);
4023 
4024 		kfree(sc->intr_data, M_DEVBUF);
4025 	} else {
4026 		ix_free_msix(sc, TRUE);
4027 	}
4028 }
4029 
4030 static void
4031 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
4032 {
4033 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
4034 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
4035 	if (bootverbose) {
4036 		if_printf(&sc->arpcom.ac_if,
4037 		    "RX rings %d/%d, TX rings %d/%d\n",
4038 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
4039 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
4040 	}
4041 }
4042 
4043 static int
4044 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
4045 {
4046 	if (!IX_ENABLE_HWRSS(sc))
4047 		return 1;
4048 
4049 	if (polling)
4050 		return sc->rx_ring_cnt;
4051 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
4052 		return IX_MIN_RXRING_RSS;
4053 	else
4054 		return sc->rx_ring_msix;
4055 }
4056 
4057 static int
4058 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
4059 {
4060 	if (!IX_ENABLE_HWTSS(sc))
4061 		return 1;
4062 
4063 	if (polling)
4064 		return sc->tx_ring_cnt;
4065 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
4066 		return 1;
4067 	else
4068 		return sc->tx_ring_msix;
4069 }
4070 
4071 static int
4072 ix_setup_intr(struct ix_softc *sc)
4073 {
4074 	int i;
4075 
4076 	for (i = 0; i < sc->intr_cnt; ++i) {
4077 		struct ix_intr_data *intr = &sc->intr_data[i];
4078 		int error;
4079 
4080 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
4081 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
4082 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
4083 		if (error) {
4084 			device_printf(sc->dev, "can't setup %dth intr\n", i);
4085 			ix_teardown_intr(sc, i);
4086 			return error;
4087 		}
4088 	}
4089 	return 0;
4090 }
4091 
4092 static void
4093 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
4094 {
4095 	int i;
4096 
4097 	if (sc->intr_data == NULL)
4098 		return;
4099 
4100 	for (i = 0; i < intr_cnt; ++i) {
4101 		struct ix_intr_data *intr = &sc->intr_data[i];
4102 
4103 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
4104 	}
4105 }
4106 
4107 static void
4108 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4109 {
4110 	struct ix_softc *sc = ifp->if_softc;
4111 
4112 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4113 }
4114 
4115 static void
4116 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4117 {
4118 	struct ix_softc *sc = ifp->if_softc;
4119 
4120 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4121 }
4122 
4123 static int
4124 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4125 {
4126 	struct ix_softc *sc = ifp->if_softc;
4127 
4128 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4129 }
4130 
4131 static void
4132 ix_serialize_skipmain(struct ix_softc *sc)
4133 {
4134 	lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
4135 }
4136 
4137 static void
4138 ix_deserialize_skipmain(struct ix_softc *sc)
4139 {
4140 	lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
4141 }
4142 
4143 #ifdef INVARIANTS
4144 
4145 static void
4146 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4147     boolean_t serialized)
4148 {
4149 	struct ix_softc *sc = ifp->if_softc;
4150 
4151 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
4152 	    serialized);
4153 }
4154 
4155 #endif	/* INVARIANTS */
4156 
4157 static void
4158 ix_free_rings(struct ix_softc *sc)
4159 {
4160 	int i;
4161 
4162 	if (sc->tx_rings != NULL) {
4163 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4164 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4165 
4166 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
4167 		}
4168 		kfree(sc->tx_rings, M_DEVBUF);
4169 	}
4170 
4171 	if (sc->rx_rings != NULL) {
4172 		for (i =0; i < sc->rx_ring_cnt; ++i) {
4173 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4174 
4175 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
4176 		}
4177 		kfree(sc->rx_rings, M_DEVBUF);
4178 	}
4179 
4180 	if (sc->parent_tag != NULL)
4181 		bus_dma_tag_destroy(sc->parent_tag);
4182 }
4183 
4184 static void
4185 ix_watchdog_reset(struct ix_softc *sc)
4186 {
4187 	int i;
4188 
4189 	ASSERT_IFNET_SERIALIZED_ALL(&sc->arpcom.ac_if);
4190 	ix_init(sc);
4191 	for (i = 0; i < sc->tx_ring_inuse; ++i)
4192 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
4193 }
4194 
4195 static void
4196 ix_sync_netisr(struct ix_softc *sc, int flags)
4197 {
4198 	struct ifnet *ifp = &sc->arpcom.ac_if;
4199 
4200 	ifnet_serialize_all(ifp);
4201 	if (ifp->if_flags & IFF_RUNNING) {
4202 		ifp->if_flags &= ~(IFF_RUNNING | flags);
4203 	} else {
4204 		ifnet_deserialize_all(ifp);
4205 		return;
4206 	}
4207 	ifnet_deserialize_all(ifp);
4208 
4209 	/* Make sure that polling stopped. */
4210 	netmsg_service_sync();
4211 }
4212 
4213 static void
4214 ix_watchdog_task(void *xsc, int pending __unused)
4215 {
4216 	struct ix_softc *sc = xsc;
4217 	struct ifnet *ifp = &sc->arpcom.ac_if;
4218 
4219 	ix_sync_netisr(sc, 0);
4220 
4221 	ifnet_serialize_all(ifp);
4222 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP)
4223 		ix_watchdog_reset(sc);
4224 	ifnet_deserialize_all(ifp);
4225 }
4226 
4227 static void
4228 ix_watchdog(struct ifaltq_subque *ifsq)
4229 {
4230 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
4231 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
4232 	struct ix_softc *sc = ifp->if_softc;
4233 
4234 	KKASSERT(txr->tx_ifsq == ifsq);
4235 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4236 
4237 	/*
4238 	 * If the interface has been paused then don't do the watchdog check
4239 	 */
4240 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
4241 		ifsq_watchdog_set_count(&txr->tx_watchdog, 5);
4242 		return;
4243 	}
4244 
4245 	if_printf(ifp, "Watchdog timeout -- resetting\n");
4246 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
4247 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
4248 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
4249 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
4250 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
4251 
4252 	if ((ifp->if_flags & (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING)) ==
4253 	    (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING))
4254 		taskqueue_enqueue(taskqueue_thread[0], &sc->wdog_task);
4255 	else
4256 		ix_watchdog_reset(sc);
4257 }
4258 
4259 static void
4260 ix_free_tx_ring(struct ix_tx_ring *txr)
4261 {
4262 	int i;
4263 
4264 	for (i = 0; i < txr->tx_ndesc; ++i) {
4265 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
4266 
4267 		if (txbuf->m_head != NULL)
4268 			ix_free_txbuf(txr, txbuf);
4269 	}
4270 }
4271 
4272 static void
4273 ix_free_rx_ring(struct ix_rx_ring *rxr)
4274 {
4275 	int i;
4276 
4277 	for (i = 0; i < rxr->rx_ndesc; ++i) {
4278 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
4279 
4280 		if (rxbuf->fmp != NULL) {
4281 			m_freem(rxbuf->fmp);
4282 			rxbuf->fmp = NULL;
4283 			rxbuf->lmp = NULL;
4284 		} else {
4285 			KKASSERT(rxbuf->lmp == NULL);
4286 		}
4287 		if (rxbuf->m_head != NULL) {
4288 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4289 			m_freem(rxbuf->m_head);
4290 			rxbuf->m_head = NULL;
4291 		}
4292 	}
4293 }
4294 
4295 static int
4296 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
4297 {
4298 	struct mbuf *m;
4299 	bus_dma_segment_t seg;
4300 	bus_dmamap_t map;
4301 	struct ix_rx_buf *rxbuf;
4302 	int flags, error, nseg;
4303 
4304 	flags = M_NOWAIT;
4305 	if (__predict_false(wait))
4306 		flags = M_WAITOK;
4307 
4308 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4309 	if (m == NULL) {
4310 		if (wait) {
4311 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4312 			    "Unable to allocate RX mbuf\n");
4313 		}
4314 		return ENOBUFS;
4315 	}
4316 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4317 
4318 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4319 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4320 	if (error) {
4321 		m_freem(m);
4322 		if (wait) {
4323 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4324 			    "Unable to load RX mbuf\n");
4325 		}
4326 		return error;
4327 	}
4328 
4329 	rxbuf = &rxr->rx_buf[i];
4330 	if (rxbuf->m_head != NULL)
4331 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4332 
4333 	map = rxbuf->map;
4334 	rxbuf->map = rxr->rx_sparemap;
4335 	rxr->rx_sparemap = map;
4336 
4337 	rxbuf->m_head = m;
4338 	rxbuf->paddr = seg.ds_addr;
4339 
4340 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4341 	return 0;
4342 }
4343 
4344 static void
4345 ix_add_sysctl(struct ix_softc *sc)
4346 {
4347 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4348 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4349 	char node[32];
4350 	int i;
4351 
4352 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4353 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4354 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4355 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4356 	    "# of RX rings used");
4357 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4358 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4359 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4360 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4361 	    "# of TX rings used");
4362 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4363 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4364 	    sc, 0, ix_sysctl_rxd, "I",
4365 	    "# of RX descs");
4366 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4367 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4368 	    sc, 0, ix_sysctl_txd, "I",
4369 	    "# of TX descs");
4370 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4371 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4372 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4373 	    "# of segments sent before write to hardware register");
4374 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4375 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4376 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4377 	    "# of received segments sent before write to hardware register");
4378 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4379 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4380 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4381 	    "# of segments per TX interrupt");
4382 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4383 	    OID_AUTO, "direct_input", CTLFLAG_RW, &sc->direct_input, 0,
4384 	    "Enable direct input");
4385 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4386 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4387 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4388 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4389 		    "TX MSI-X CPU map");
4390 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4391 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4392 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4393 		    "RX MSI-X CPU map");
4394 	}
4395 #ifdef IFPOLL_ENABLE
4396 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4397 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4398 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4399 	    "TX polling CPU map");
4400 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4401 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4402 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4403 	    "RX polling CPU map");
4404 #endif
4405 
4406 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4407 do { \
4408 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4409 	    ix_sysctl_##name, #use " interrupt rate"); \
4410 } while (0)
4411 
4412 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4413 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4414 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4415 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4416 
4417 #undef IX_ADD_INTR_RATE_SYSCTL
4418 
4419 #ifdef IX_RSS_DEBUG
4420 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4421 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4422 	    "RSS debug level");
4423 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4424 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4425 		SYSCTL_ADD_ULONG(ctx,
4426 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4427 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4428 	}
4429 #endif
4430 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
4431 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4432 
4433 		ksnprintf(node, sizeof(node), "tx%d_nmbuf", i);
4434 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4435 		    CTLTYPE_INT | CTLFLAG_RD, txr, 0, ix_sysctl_tx_nmbuf, "I",
4436 		    "# of pending TX mbufs");
4437 
4438 		ksnprintf(node, sizeof(node), "tx%d_gc", i);
4439 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4440 		    CTLFLAG_RW, &txr->tx_gc, "# of TX desc GC");
4441 	}
4442 
4443 #if 0
4444 	ix_add_hw_stats(sc);
4445 #endif
4446 
4447 }
4448 
4449 static int
4450 ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS)
4451 {
4452 	struct ix_tx_ring *txr = (void *)arg1;
4453 	int nmbuf;
4454 
4455 	nmbuf = txr->tx_nmbuf;
4456 	return (sysctl_handle_int(oidp, &nmbuf, 0, req));
4457 }
4458 
4459 static int
4460 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4461 {
4462 	struct ix_softc *sc = (void *)arg1;
4463 	struct ifnet *ifp = &sc->arpcom.ac_if;
4464 	int error, nsegs, i;
4465 
4466 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4467 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4468 	if (error || req->newptr == NULL)
4469 		return error;
4470 	if (nsegs < 0)
4471 		return EINVAL;
4472 
4473 	ifnet_serialize_all(ifp);
4474 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4475 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4476 	ifnet_deserialize_all(ifp);
4477 
4478 	return 0;
4479 }
4480 
4481 static int
4482 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4483 {
4484 	struct ix_softc *sc = (void *)arg1;
4485 	struct ifnet *ifp = &sc->arpcom.ac_if;
4486 	int error, nsegs, i;
4487 
4488 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4489 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4490 	if (error || req->newptr == NULL)
4491 		return error;
4492 	if (nsegs < 0)
4493 		return EINVAL;
4494 
4495 	ifnet_serialize_all(ifp);
4496 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4497 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4498 	ifnet_deserialize_all(ifp);
4499 
4500 	return 0;
4501 }
4502 
4503 static int
4504 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4505 {
4506 	struct ix_softc *sc = (void *)arg1;
4507 	int txd;
4508 
4509 	txd = sc->tx_rings[0].tx_ndesc;
4510 	return sysctl_handle_int(oidp, &txd, 0, req);
4511 }
4512 
4513 static int
4514 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4515 {
4516 	struct ix_softc *sc = (void *)arg1;
4517 	int rxd;
4518 
4519 	rxd = sc->rx_rings[0].rx_ndesc;
4520 	return sysctl_handle_int(oidp, &rxd, 0, req);
4521 }
4522 
4523 static int
4524 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4525 {
4526 	struct ix_softc *sc = (void *)arg1;
4527 	struct ifnet *ifp = &sc->arpcom.ac_if;
4528 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4529 	int error, nsegs;
4530 
4531 	nsegs = txr->tx_intr_nsegs;
4532 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4533 	if (error || req->newptr == NULL)
4534 		return error;
4535 	if (nsegs < 0)
4536 		return EINVAL;
4537 
4538 	ifnet_serialize_all(ifp);
4539 
4540 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4541 		error = EINVAL;
4542 	} else {
4543 		int i;
4544 
4545 		error = 0;
4546 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4547 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4548 	}
4549 
4550 	ifnet_deserialize_all(ifp);
4551 
4552 	return error;
4553 }
4554 
4555 static void
4556 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4557 {
4558 	uint32_t eitr, eitr_intvl;
4559 
4560 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4561 	eitr_intvl = 1000000000 / 256 / rate;
4562 
4563 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4564 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4565 		if (eitr_intvl == 0)
4566 			eitr_intvl = 1;
4567 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4568 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4569 	} else {
4570 		eitr &= ~IX_EITR_INTVL_MASK;
4571 
4572 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4573 		if (eitr_intvl == 0)
4574 			eitr_intvl = IX_EITR_INTVL_MIN;
4575 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4576 			eitr_intvl = IX_EITR_INTVL_MAX;
4577 	}
4578 	eitr |= eitr_intvl;
4579 
4580 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4581 }
4582 
4583 static int
4584 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4585 {
4586 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4587 }
4588 
4589 static int
4590 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4591 {
4592 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4593 }
4594 
4595 static int
4596 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4597 {
4598 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4599 }
4600 
4601 static int
4602 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4603 {
4604 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4605 }
4606 
4607 static int
4608 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4609 {
4610 	struct ix_softc *sc = (void *)arg1;
4611 	struct ifnet *ifp = &sc->arpcom.ac_if;
4612 	int error, rate, i;
4613 
4614 	rate = 0;
4615 	for (i = 0; i < sc->intr_cnt; ++i) {
4616 		if (sc->intr_data[i].intr_use == use) {
4617 			rate = sc->intr_data[i].intr_rate;
4618 			break;
4619 		}
4620 	}
4621 
4622 	error = sysctl_handle_int(oidp, &rate, 0, req);
4623 	if (error || req->newptr == NULL)
4624 		return error;
4625 	if (rate <= 0)
4626 		return EINVAL;
4627 
4628 	ifnet_serialize_all(ifp);
4629 
4630 	for (i = 0; i < sc->intr_cnt; ++i) {
4631 		if (sc->intr_data[i].intr_use == use) {
4632 			sc->intr_data[i].intr_rate = rate;
4633 			if (ifp->if_flags & IFF_RUNNING)
4634 				ix_set_eitr(sc, i, rate);
4635 		}
4636 	}
4637 
4638 	ifnet_deserialize_all(ifp);
4639 
4640 	return error;
4641 }
4642 
4643 static void
4644 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4645     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4646 {
4647 	int i;
4648 
4649 	for (i = 0; i < sc->intr_cnt; ++i) {
4650 		if (sc->intr_data[i].intr_use == use) {
4651 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4652 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4653 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4654 			    sc, 0, handler, "I", desc);
4655 			break;
4656 		}
4657 	}
4658 }
4659 
4660 static int
4661 ix_get_timer_cpuid(const struct ix_softc *sc, boolean_t polling)
4662 {
4663 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4664 		return 0; /* XXX fixed */
4665 	else
4666 		return rman_get_cpuid(sc->intr_data[0].intr_res);
4667 }
4668 
4669 static void
4670 ix_alloc_msix(struct ix_softc *sc)
4671 {
4672 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4673 	struct ix_intr_data *intr;
4674 	int i, x, error;
4675 	int ring_cnt, ring_cntmax;
4676 	boolean_t setup = FALSE;
4677 
4678 	msix_enable = ix_msix_enable;
4679 	/*
4680 	 * Don't enable MSI-X on 82598 by default, see:
4681 	 * 82598 specification update errata #38
4682 	 */
4683 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4684 		msix_enable = 0;
4685 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4686 	if (!msix_enable)
4687 		return;
4688 
4689 	msix_cnt = pci_msix_count(sc->dev);
4690 #ifdef IX_MSIX_DEBUG
4691 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4692 #endif
4693 	if (msix_cnt <= 1) {
4694 		/* One MSI-X model does not make sense. */
4695 		return;
4696 	}
4697 
4698 	/*
4699 	 * Make sure that we don't break interrupt related registers
4700 	 * (EIMS, etc) limitation.
4701 	 */
4702 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4703 		if (msix_cnt > IX_MAX_MSIX_82598)
4704 			msix_cnt = IX_MAX_MSIX_82598;
4705 	} else {
4706 		if (msix_cnt > IX_MAX_MSIX)
4707 			msix_cnt = IX_MAX_MSIX;
4708 	}
4709 	if (bootverbose)
4710 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4711 	msix_ring = msix_cnt - 1; /* -1 for status */
4712 
4713 	/*
4714 	 * Configure # of RX/TX rings usable by MSI-X.
4715 	 */
4716 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4717 	if (ring_cntmax > msix_ring)
4718 		ring_cntmax = msix_ring;
4719 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4720 
4721 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4722 	if (ring_cntmax > msix_ring)
4723 		ring_cntmax = msix_ring;
4724 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4725 
4726 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4727 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4728 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4729 	    ("total RX ring count %d, MSI-X RX ring count %d",
4730 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4731 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4732 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4733 	    ("total TX ring count %d, MSI-X TX ring count %d",
4734 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4735 
4736 	/*
4737 	 * Aggregate TX/RX MSI-X
4738 	 */
4739 	ring_cntmax = sc->rx_ring_msix;
4740 	if (ring_cntmax < sc->tx_ring_msix)
4741 		ring_cntmax = sc->tx_ring_msix;
4742 	KASSERT(ring_cntmax <= msix_ring,
4743 	    ("invalid ring count max %d, MSI-X count for rings %d",
4744 	     ring_cntmax, msix_ring));
4745 
4746 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4747 	if (bootverbose) {
4748 		device_printf(sc->dev, "MSI-X alloc %d, "
4749 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4750 		    sc->rx_ring_msix, sc->tx_ring_msix);
4751 	}
4752 
4753 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4754 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4755 	    &sc->msix_mem_rid, RF_ACTIVE);
4756 	if (sc->msix_mem_res == NULL) {
4757 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4758 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4759 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4760 		if (sc->msix_mem_res == NULL) {
4761 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4762 			return;
4763 		}
4764 	}
4765 
4766 	sc->intr_cnt = alloc_cnt;
4767 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4768 	    M_DEVBUF, M_WAITOK | M_ZERO);
4769 	for (x = 0; x < sc->intr_cnt; ++x) {
4770 		intr = &sc->intr_data[x];
4771 		intr->intr_rid = -1;
4772 		intr->intr_rate = IX_INTR_RATE;
4773 	}
4774 
4775 	x = 0;
4776 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4777 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4778 		struct ix_tx_ring *txr = NULL;
4779 		int cpuid, j;
4780 
4781 		KKASSERT(x < sc->intr_cnt);
4782 		rxr->rx_intr_vec = x;
4783 		ix_setup_msix_eims(sc, x,
4784 		    &rxr->rx_eims, &rxr->rx_eims_val);
4785 
4786 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4787 
4788 		/*
4789 		 * Try finding TX ring to piggyback.
4790 		 */
4791 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4792 			if (cpuid ==
4793 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4794 				txr = &sc->tx_rings[j];
4795 				KKASSERT(txr->tx_intr_cpuid < 0);
4796 				break;
4797 			}
4798 		}
4799 		rxr->rx_txr = txr;
4800 
4801 		intr = &sc->intr_data[x++];
4802 		intr->intr_serialize = &rxr->rx_serialize;
4803 		if (txr != NULL) {
4804 			ksnprintf(intr->intr_desc0,
4805 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4806 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4807 			intr->intr_use = IX_INTR_USE_RXTX;
4808 			intr->intr_func = ix_msix_rxtx;
4809 		} else {
4810 			ksnprintf(intr->intr_desc0,
4811 			    sizeof(intr->intr_desc0), "%s rx%d",
4812 			    device_get_nameunit(sc->dev), i);
4813 			intr->intr_rate = IX_MSIX_RX_RATE;
4814 			intr->intr_use = IX_INTR_USE_RX;
4815 			intr->intr_func = ix_msix_rx;
4816 		}
4817 		intr->intr_funcarg = rxr;
4818 		intr->intr_cpuid = cpuid;
4819 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4820 		intr->intr_desc = intr->intr_desc0;
4821 
4822 		if (txr != NULL) {
4823 			txr->tx_intr_cpuid = intr->intr_cpuid;
4824 			/* NOTE: Leave TX ring's intr_vec negative. */
4825 		}
4826 	}
4827 
4828 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4829 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4830 
4831 		if (txr->tx_intr_cpuid >= 0) {
4832 			/* Piggybacked by RX ring. */
4833 			continue;
4834 		}
4835 
4836 		KKASSERT(x < sc->intr_cnt);
4837 		txr->tx_intr_vec = x;
4838 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4839 
4840 		intr = &sc->intr_data[x++];
4841 		intr->intr_serialize = &txr->tx_serialize;
4842 		intr->intr_rate = IX_MSIX_TX_RATE;
4843 		intr->intr_use = IX_INTR_USE_TX;
4844 		intr->intr_func = ix_msix_tx;
4845 		intr->intr_funcarg = txr;
4846 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4847 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4848 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4849 		    device_get_nameunit(sc->dev), i);
4850 		intr->intr_desc = intr->intr_desc0;
4851 
4852 		txr->tx_intr_cpuid = intr->intr_cpuid;
4853 	}
4854 
4855 	/*
4856 	 * Status MSI-X
4857 	 */
4858 	KKASSERT(x < sc->intr_cnt);
4859 	sc->sts_msix_vec = x;
4860 
4861 	intr = &sc->intr_data[x++];
4862 
4863 	intr->intr_serialize = &sc->main_serialize;
4864 	intr->intr_func = ix_msix_status;
4865 	intr->intr_funcarg = sc;
4866 	intr->intr_cpuid = 0;
4867 	intr->intr_use = IX_INTR_USE_STATUS;
4868 
4869 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4870 	    device_get_nameunit(sc->dev));
4871 	intr->intr_desc = intr->intr_desc0;
4872 
4873 	KKASSERT(x == sc->intr_cnt);
4874 
4875 	error = pci_setup_msix(sc->dev);
4876 	if (error) {
4877 		device_printf(sc->dev, "Setup MSI-X failed\n");
4878 		goto back;
4879 	}
4880 	setup = TRUE;
4881 
4882 	for (i = 0; i < sc->intr_cnt; ++i) {
4883 		intr = &sc->intr_data[i];
4884 
4885 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4886 		    intr->intr_cpuid);
4887 		if (error) {
4888 			device_printf(sc->dev,
4889 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4890 			    intr->intr_cpuid);
4891 			goto back;
4892 		}
4893 
4894 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4895 		    &intr->intr_rid, RF_ACTIVE);
4896 		if (intr->intr_res == NULL) {
4897 			device_printf(sc->dev,
4898 			    "Unable to allocate MSI-X %d resource\n", i);
4899 			error = ENOMEM;
4900 			goto back;
4901 		}
4902 	}
4903 
4904 	pci_enable_msix(sc->dev);
4905 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4906 back:
4907 	if (error)
4908 		ix_free_msix(sc, setup);
4909 }
4910 
4911 static void
4912 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4913 {
4914 	int i;
4915 
4916 	KKASSERT(sc->intr_cnt > 1);
4917 
4918 	for (i = 0; i < sc->intr_cnt; ++i) {
4919 		struct ix_intr_data *intr = &sc->intr_data[i];
4920 
4921 		if (intr->intr_res != NULL) {
4922 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4923 			    intr->intr_rid, intr->intr_res);
4924 		}
4925 		if (intr->intr_rid >= 0)
4926 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4927 	}
4928 	if (setup)
4929 		pci_teardown_msix(sc->dev);
4930 
4931 	sc->intr_cnt = 0;
4932 	kfree(sc->intr_data, M_DEVBUF);
4933 	sc->intr_data = NULL;
4934 }
4935 
4936 static void
4937 ix_msix_rx(void *xrxr)
4938 {
4939 	struct ix_rx_ring *rxr = xrxr;
4940 
4941 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4942 
4943 	ix_rxeof(rxr, -1);
4944 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4945 }
4946 
4947 static void
4948 ix_msix_tx(void *xtxr)
4949 {
4950 	struct ix_tx_ring *txr = xtxr;
4951 
4952 	ASSERT_SERIALIZED(&txr->tx_serialize);
4953 
4954 	ix_tx_intr(txr, *(txr->tx_hdr));
4955 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4956 }
4957 
4958 static void
4959 ix_msix_rxtx(void *xrxr)
4960 {
4961 	struct ix_rx_ring *rxr = xrxr;
4962 	struct ix_tx_ring *txr;
4963 	int hdr;
4964 
4965 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4966 
4967 	ix_rxeof(rxr, -1);
4968 
4969 	/*
4970 	 * NOTE:
4971 	 * Since tx_next_clean is only changed by ix_txeof(),
4972 	 * which is called only in interrupt handler, the
4973 	 * check w/o holding tx serializer is MPSAFE.
4974 	 */
4975 	txr = rxr->rx_txr;
4976 	hdr = *(txr->tx_hdr);
4977 	if (hdr != txr->tx_next_clean) {
4978 		lwkt_serialize_enter(&txr->tx_serialize);
4979 		ix_tx_intr(txr, hdr);
4980 		lwkt_serialize_exit(&txr->tx_serialize);
4981 	}
4982 
4983 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4984 }
4985 
4986 static void
4987 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4988 {
4989 	struct ixgbe_hw *hw = &sc->hw;
4990 
4991 	/* Link status change */
4992 	if (eicr & IXGBE_EICR_LSC)
4993 		ix_handle_link(sc);
4994 
4995 	if (hw->mac.type != ixgbe_mac_82598EB) {
4996 		if (eicr & IXGBE_EICR_ECC)
4997 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  REBOOT!!\n");
4998 
4999 		/* Check for over temp condition */
5000 		if (sc->caps & IX_CAP_TEMP_SENSOR) {
5001 			int32_t retval;
5002 
5003 			switch (sc->hw.mac.type) {
5004 			case ixgbe_mac_X550EM_a:
5005 				if ((eicr & IXGBE_EICR_GPI_SDP0_X550EM_a) == 0)
5006 					break;
5007 				retval = hw->phy.ops.check_overtemp(hw);
5008 				if (retval != IXGBE_ERR_OVERTEMP)
5009 					break;
5010 
5011 				/* Disable more temp sensor interrupts. */
5012 				IXGBE_WRITE_REG(hw, IXGBE_EIMC,
5013 				    IXGBE_EICR_GPI_SDP0_X550EM_a);
5014 				if_printf(&sc->arpcom.ac_if, "CRITICAL: "
5015 				    "OVER TEMP!!  PHY IS SHUT DOWN!!  "
5016 				    "SHUTDOWN!!\n");
5017 				break;
5018 
5019 			default:
5020 				if ((eicr & IXGBE_EICR_TS) == 0)
5021 					break;
5022 				retval = hw->phy.ops.check_overtemp(hw);
5023 				if (retval != IXGBE_ERR_OVERTEMP)
5024 					break;
5025 
5026 				/* Disable more temp sensor interrupts. */
5027 				IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_TS);
5028 				if_printf(&sc->arpcom.ac_if, "CRITICAL: "
5029 				    "OVER TEMP!!  PHY IS SHUT DOWN!!  "
5030 				    "SHUTDOWN!!\n");
5031 				break;
5032 			}
5033 		}
5034 	}
5035 
5036 	if (ix_is_sfp(hw)) {
5037 		uint32_t eicr_mask;
5038 
5039 		/* Pluggable optics-related interrupt */
5040 		if (hw->mac.type >= ixgbe_mac_X540)
5041 			eicr_mask = IXGBE_EICR_GPI_SDP0_X540;
5042 		else
5043 			eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
5044 
5045 		if (eicr & eicr_mask)
5046 			ix_handle_mod(sc);
5047 
5048 		if (hw->mac.type == ixgbe_mac_82599EB &&
5049 		    (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)))
5050 			ix_handle_msf(sc);
5051 	}
5052 
5053 	/* Check for fan failure */
5054 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
5055 		ix_detect_fanfail(sc, eicr, TRUE);
5056 
5057 	/* External PHY interrupt */
5058 	if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
5059 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
5060 		ix_handle_phy(sc);
5061 }
5062 
5063 static void
5064 ix_msix_status(void *xsc)
5065 {
5066 	struct ix_softc *sc = xsc;
5067 	uint32_t eicr;
5068 
5069 	ASSERT_SERIALIZED(&sc->main_serialize);
5070 
5071 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
5072 	ix_intr_status(sc, eicr);
5073 
5074 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
5075 }
5076 
5077 static void
5078 ix_setup_msix_eims(const struct ix_softc *sc, int x,
5079     uint32_t *eims, uint32_t *eims_val)
5080 {
5081 	if (x < 32) {
5082 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
5083 			KASSERT(x < IX_MAX_MSIX_82598,
5084 			    ("%s: invalid vector %d for 82598",
5085 			     device_get_nameunit(sc->dev), x));
5086 			*eims = IXGBE_EIMS;
5087 		} else {
5088 			*eims = IXGBE_EIMS_EX(0);
5089 		}
5090 		*eims_val = 1 << x;
5091 	} else {
5092 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
5093 		    device_get_nameunit(sc->dev), x));
5094 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
5095 		    ("%s: invalid vector %d for 82598",
5096 		     device_get_nameunit(sc->dev), x));
5097 		*eims = IXGBE_EIMS_EX(1);
5098 		*eims_val = 1 << (x - 32);
5099 	}
5100 }
5101 
5102 #ifdef IFPOLL_ENABLE
5103 
5104 static void
5105 ix_npoll_status(struct ifnet *ifp)
5106 {
5107 	struct ix_softc *sc = ifp->if_softc;
5108 	uint32_t eicr;
5109 
5110 	ASSERT_SERIALIZED(&sc->main_serialize);
5111 
5112 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
5113 	ix_intr_status(sc, eicr);
5114 }
5115 
5116 static void
5117 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
5118 {
5119 	struct ix_tx_ring *txr = arg;
5120 
5121 	ASSERT_SERIALIZED(&txr->tx_serialize);
5122 
5123 	ix_tx_intr(txr, *(txr->tx_hdr));
5124 	ix_try_txgc(txr, 1);
5125 }
5126 
5127 static void
5128 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
5129 {
5130 	struct ix_rx_ring *rxr = arg;
5131 
5132 	ASSERT_SERIALIZED(&rxr->rx_serialize);
5133 	ix_rxeof(rxr, cycle);
5134 }
5135 
5136 static void
5137 ix_npoll_rx_direct(struct ifnet *ifp __unused, void *arg, int cycle)
5138 {
5139 	struct ix_rx_ring *rxr = arg;
5140 
5141 	ASSERT_NOT_SERIALIZED(&rxr->rx_serialize);
5142 	ix_rxeof(rxr, cycle);
5143 }
5144 
5145 static void
5146 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
5147 {
5148 	struct ix_softc *sc = ifp->if_softc;
5149 	int i, txr_cnt, rxr_cnt, idirect;
5150 
5151 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
5152 
5153 	idirect = sc->direct_input;
5154 	cpu_ccfence();
5155 
5156 	if (info) {
5157 		int cpu;
5158 
5159 		info->ifpi_status.status_func = ix_npoll_status;
5160 		info->ifpi_status.serializer = &sc->main_serialize;
5161 
5162 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
5163 		for (i = 0; i < txr_cnt; ++i) {
5164 			struct ix_tx_ring *txr = &sc->tx_rings[i];
5165 
5166 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
5167 			KKASSERT(cpu < netisr_ncpus);
5168 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
5169 			info->ifpi_tx[cpu].arg = txr;
5170 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
5171 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
5172 		}
5173 
5174 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
5175 		for (i = 0; i < rxr_cnt; ++i) {
5176 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
5177 
5178 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
5179 			KKASSERT(cpu < netisr_ncpus);
5180 			info->ifpi_rx[cpu].arg = rxr;
5181 			if (idirect) {
5182 				info->ifpi_rx[cpu].poll_func =
5183 				    ix_npoll_rx_direct;
5184 				info->ifpi_rx[cpu].serializer = NULL;
5185 			} else {
5186 				info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
5187 				info->ifpi_rx[cpu].serializer =
5188 				    &rxr->rx_serialize;
5189 			}
5190 		}
5191 		if (idirect)
5192 			ifp->if_flags |= IFF_IDIRECT;
5193 	} else {
5194 		ifp->if_flags &= ~IFF_IDIRECT;
5195 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
5196 			struct ix_tx_ring *txr = &sc->tx_rings[i];
5197 
5198 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
5199 		}
5200 	}
5201 	if (ifp->if_flags & IFF_RUNNING)
5202 		ix_init(sc);
5203 }
5204 
5205 #endif /* IFPOLL_ENABLE */
5206 
5207 static enum ixgbe_fc_mode
5208 ix_ifmedia2fc(int ifm)
5209 {
5210 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5211 
5212 	switch (fc_opt) {
5213 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5214 		return ixgbe_fc_full;
5215 
5216 	case IFM_ETH_RXPAUSE:
5217 		return ixgbe_fc_rx_pause;
5218 
5219 	case IFM_ETH_TXPAUSE:
5220 		return ixgbe_fc_tx_pause;
5221 
5222 	default:
5223 		return ixgbe_fc_none;
5224 	}
5225 }
5226 
5227 static const char *
5228 ix_ifmedia2str(int ifm)
5229 {
5230 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5231 
5232 	switch (fc_opt) {
5233 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5234 		return IFM_ETH_FC_FULL;
5235 
5236 	case IFM_ETH_RXPAUSE:
5237 		return IFM_ETH_FC_RXPAUSE;
5238 
5239 	case IFM_ETH_TXPAUSE:
5240 		return IFM_ETH_FC_TXPAUSE;
5241 
5242 	default:
5243 		return IFM_ETH_FC_NONE;
5244 	}
5245 }
5246 
5247 static const char *
5248 ix_fc2str(enum ixgbe_fc_mode fc)
5249 {
5250 	switch (fc) {
5251 	case ixgbe_fc_full:
5252 		return IFM_ETH_FC_FULL;
5253 
5254 	case ixgbe_fc_rx_pause:
5255 		return IFM_ETH_FC_RXPAUSE;
5256 
5257 	case ixgbe_fc_tx_pause:
5258 		return IFM_ETH_FC_TXPAUSE;
5259 
5260 	default:
5261 		return IFM_ETH_FC_NONE;
5262 	}
5263 }
5264 
5265 static int
5266 ix_powerdown(struct ix_softc *sc)
5267 {
5268 	struct ixgbe_hw *hw = &sc->hw;
5269 	int error = 0;
5270 
5271 	/* Limit power management flow to X550EM baseT */
5272 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5273 	    hw->phy.ops.enter_lplu) {
5274 		/* Turn off support for APM wakeup. (Using ACPI instead) */
5275 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
5276 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5277 
5278 		/*
5279 		 * Clear Wake Up Status register to prevent any previous wakeup
5280 		 * events from waking us up immediately after we suspend.
5281 		 */
5282 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5283 
5284 		/*
5285 		 * Program the Wakeup Filter Control register with user filter
5286 		 * settings
5287 		 */
5288 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5289 
5290 		/* Enable wakeups and power management in Wakeup Control */
5291 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5292 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5293 
5294 		/* X550EM baseT adapters need a special LPLU flow */
5295 		hw->phy.reset_disable = true;
5296 		ix_stop(sc);
5297 		error = hw->phy.ops.enter_lplu(hw);
5298 		if (error) {
5299 			if_printf(&sc->arpcom.ac_if,
5300 			    "Error entering LPLU: %d\n", error);
5301 		}
5302 		hw->phy.reset_disable = false;
5303 	} else {
5304 		/* Just stop for other adapters */
5305 		ix_stop(sc);
5306 	}
5307 	return error;
5308 }
5309 
5310 static void
5311 ix_config_flowctrl(struct ix_softc *sc)
5312 {
5313 	struct ixgbe_hw *hw = &sc->hw;
5314 	uint32_t rxpb, frame, size, tmp;
5315 
5316 	frame = sc->max_frame_size;
5317 
5318 	/* Calculate High Water */
5319 	switch (hw->mac.type) {
5320 	case ixgbe_mac_X540:
5321 	case ixgbe_mac_X550:
5322 	case ixgbe_mac_X550EM_a:
5323 	case ixgbe_mac_X550EM_x:
5324 		tmp = IXGBE_DV_X540(frame, frame);
5325 		break;
5326 	default:
5327 		tmp = IXGBE_DV(frame, frame);
5328 		break;
5329 	}
5330 	size = IXGBE_BT2KB(tmp);
5331 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5332 	hw->fc.high_water[0] = rxpb - size;
5333 
5334 	/* Now calculate Low Water */
5335 	switch (hw->mac.type) {
5336 	case ixgbe_mac_X540:
5337 	case ixgbe_mac_X550:
5338 	case ixgbe_mac_X550EM_a:
5339 	case ixgbe_mac_X550EM_x:
5340 		tmp = IXGBE_LOW_DV_X540(frame);
5341 		break;
5342 	default:
5343 		tmp = IXGBE_LOW_DV(frame);
5344 		break;
5345 	}
5346 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5347 
5348 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5349 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5350 		hw->fc.disable_fc_autoneg = TRUE;
5351 	else
5352 		hw->fc.disable_fc_autoneg = FALSE;
5353 	hw->fc.pause_time = IX_FC_PAUSE;
5354 	hw->fc.send_xon = TRUE;
5355 }
5356 
5357 static void
5358 ix_config_dmac(struct ix_softc *sc)
5359 {
5360 	struct ixgbe_hw *hw = &sc->hw;
5361 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5362 
5363 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5364 		return;
5365 
5366 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5367 	    (dcfg->link_speed ^ sc->link_speed)) {
5368 		dcfg->watchdog_timer = sc->dmac;
5369 		dcfg->fcoe_en = false;
5370 		dcfg->link_speed = sc->link_speed;
5371 		dcfg->num_tcs = 1;
5372 
5373 		if (bootverbose) {
5374 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5375 			    "watchdog %d, link speed %d\n",
5376 			    dcfg->watchdog_timer, dcfg->link_speed);
5377 		}
5378 
5379 		hw->mac.ops.dmac_config(hw);
5380 	}
5381 }
5382 
5383 static void
5384 ix_init_media(struct ix_softc *sc)
5385 {
5386 	struct ixgbe_hw *hw = &sc->hw;
5387 	uint32_t layer;
5388 
5389 	ifmedia_removeall(&sc->media);
5390 
5391 	layer = sc->phy_layer;
5392 
5393 	/*
5394 	 * Media types with matching DragonFlyBSD media defines
5395 	 */
5396 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5397 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5398 		    0, NULL);
5399 	}
5400 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5401 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5402 		    0, NULL);
5403 	}
5404 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5405 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5406 		    0, NULL);
5407 		/* No half-duplex support */
5408 	}
5409 	if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) {
5410 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
5411 		    0, NULL);
5412 		/* No half-duplex support */
5413 	}
5414 
5415 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5416 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5417 		ifmedia_add_nodup(&sc->media,
5418 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5419 	}
5420 
5421 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5422 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5423 		    0, NULL);
5424 		if (hw->phy.multispeed_fiber) {
5425 			ifmedia_add_nodup(&sc->media,
5426 			    IFM_ETHER | IFM_1000_LX | IFM_FDX, 0, NULL);
5427 		}
5428 	}
5429 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5430 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5431 		    0, NULL);
5432 		if (hw->phy.multispeed_fiber) {
5433 			ifmedia_add_nodup(&sc->media,
5434 			    IFM_ETHER | IFM_1000_LX | IFM_FDX, 0, NULL);
5435 		}
5436 	}
5437 
5438 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5439 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5440 		    0, NULL);
5441 		if (hw->phy.multispeed_fiber) {
5442 			ifmedia_add_nodup(&sc->media,
5443 			    IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL);
5444 		}
5445 	} else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5446 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5447 		    0, NULL);
5448 	}
5449 
5450 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5451 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5452 		    0, NULL);
5453 	}
5454 
5455 	/*
5456 	 * XXX Other (no matching DragonFlyBSD media type):
5457 	 * To workaround this, we'll assign these completely
5458 	 * inappropriate media types.
5459 	 */
5460 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5461 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5462 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5463 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5464 		    0, NULL);
5465 	}
5466 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5467 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5468 		if_printf(&sc->arpcom.ac_if,
5469 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5470 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5471 		    0, NULL);
5472 	}
5473 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5474 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5475 		if_printf(&sc->arpcom.ac_if,
5476 		    "1000baseKX mapped to 1000baseCX\n");
5477 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5478 		    0, NULL);
5479 	}
5480 	if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) {
5481 		if_printf(&sc->arpcom.ac_if, "Media supported: 2500baseKX\n");
5482 		if_printf(&sc->arpcom.ac_if,
5483 		    "2500baseKX mapped to 2500baseSX\n");
5484 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_2500_SX | IFM_FDX,
5485 		    0, NULL);
5486 	}
5487 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5488 		if_printf(&sc->arpcom.ac_if,
5489 		    "Media supported: 1000baseBX, ignored\n");
5490 	}
5491 
5492 	/* XXX we probably don't need this */
5493 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5494 		ifmedia_add_nodup(&sc->media,
5495 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5496 	}
5497 
5498 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5499 
5500 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5501 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5502 
5503 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5504 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5505 		ifmedia_set(&sc->media, sc->ifm_media);
5506 	}
5507 }
5508 
5509 static void
5510 ix_setup_caps(struct ix_softc *sc)
5511 {
5512 
5513 	sc->caps |= IX_CAP_LEGACY_INTR;
5514 
5515 	switch (sc->hw.mac.type) {
5516 	case ixgbe_mac_82598EB:
5517 		if (sc->hw.device_id == IXGBE_DEV_ID_82598AT)
5518 			sc->caps |= IX_CAP_DETECT_FANFAIL;
5519 		break;
5520 
5521 	case ixgbe_mac_X550:
5522 		sc->caps |= IX_CAP_TEMP_SENSOR | IX_CAP_FW_RECOVERY;
5523 		break;
5524 
5525 	case ixgbe_mac_X550EM_x:
5526 		if (sc->hw.device_id == IXGBE_DEV_ID_X550EM_X_KR)
5527 			sc->caps |= IX_CAP_EEE;
5528 		sc->caps |= IX_CAP_FW_RECOVERY;
5529 		break;
5530 
5531 	case ixgbe_mac_X550EM_a:
5532 		sc->caps &= ~IX_CAP_LEGACY_INTR;
5533 		if (sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T ||
5534 		    sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)
5535 			sc->caps |= IX_CAP_TEMP_SENSOR | IX_CAP_EEE;
5536 		sc->caps |= IX_CAP_FW_RECOVERY;
5537 		break;
5538 
5539 	case ixgbe_mac_82599EB:
5540 		if (sc->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP)
5541 			sc->caps &= ~IX_CAP_LEGACY_INTR;
5542 		break;
5543 
5544 	default:
5545 		break;
5546 	}
5547 }
5548 
5549 static void
5550 ix_detect_fanfail(struct ix_softc *sc, uint32_t reg, boolean_t intr)
5551 {
5552 	uint32_t mask;
5553 
5554 	mask = intr ? IXGBE_EICR_GPI_SDP1_BY_MAC(&sc->hw) : IXGBE_ESDP_SDP1;
5555 	if (reg & mask) {
5556 		if_printf(&sc->arpcom.ac_if,
5557 		    "CRITICAL: FAN FAILURE!!  REPLACE IMMEDIATELY!!\n");
5558 	}
5559 }
5560 
5561 static void
5562 ix_config_gpie(struct ix_softc *sc)
5563 {
5564 	struct ixgbe_hw *hw = &sc->hw;
5565 	uint32_t gpie;
5566 
5567 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
5568 
5569 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
5570 		/* Enable Enhanced MSI-X mode */
5571 		gpie |= IXGBE_GPIE_MSIX_MODE |
5572 		    IXGBE_GPIE_EIAME |
5573 		    IXGBE_GPIE_PBA_SUPPORT |
5574 		    IXGBE_GPIE_OCD;
5575 	}
5576 
5577 	/* Fan Failure Interrupt */
5578 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
5579 		gpie |= IXGBE_SDP1_GPIEN;
5580 
5581 	/* Thermal Sensor Interrupt */
5582 	if (sc->caps & IX_CAP_TEMP_SENSOR)
5583 		gpie |= IXGBE_SDP0_GPIEN_X540;
5584 
5585 	/* Link detection */
5586 	switch (hw->mac.type) {
5587 	case ixgbe_mac_82599EB:
5588 		gpie |= IXGBE_SDP1_GPIEN | IXGBE_SDP2_GPIEN;
5589 		break;
5590 
5591 	case ixgbe_mac_X550EM_x:
5592 	case ixgbe_mac_X550EM_a:
5593 		gpie |= IXGBE_SDP0_GPIEN_X540;
5594 		break;
5595 
5596 	default:
5597 		break;
5598 	}
5599 
5600 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
5601 }
5602 
5603 static void
5604 ix_fw_timer(void *xsc)
5605 {
5606 	struct ix_softc *sc = xsc;
5607 	struct ifnet *ifp = &sc->arpcom.ac_if;
5608 
5609 	lwkt_serialize_enter(&sc->main_serialize);
5610 
5611 	if (ixgbe_fw_recovery_mode(&sc->hw)) {
5612 		if ((sc->flags & IX_FLAG_FW_RECOVERY) == 0) {
5613 			sc->flags |= IX_FLAG_FW_RECOVERY;
5614 			if (ifp->if_flags & IFF_RUNNING) {
5615 				if_printf(ifp,
5616 				    "fw recovery mode entered, stop\n");
5617 				ix_serialize_skipmain(sc);
5618 				ix_stop(sc);
5619 				ix_deserialize_skipmain(sc);
5620 			} else {
5621 				if_printf(ifp, "fw recovery mode entered\n");
5622 			}
5623 		}
5624 	} else {
5625 		if (sc->flags & IX_FLAG_FW_RECOVERY) {
5626 			sc->flags &= ~IX_FLAG_FW_RECOVERY;
5627 			if (ifp->if_flags & IFF_UP) {
5628 				if_printf(ifp,
5629 				    "fw recovery mode exited, reinit\n");
5630 				ix_serialize_skipmain(sc);
5631 				ix_init(sc);
5632 				ix_deserialize_skipmain(sc);
5633 			} else {
5634 				if_printf(ifp, "fw recovery mode exited\n");
5635 			}
5636 		}
5637 	}
5638 
5639 	callout_reset_bycpu(&sc->fw_timer, hz, ix_fw_timer, sc,
5640 	    ix_get_timer_cpuid(sc,
5641 		(ifp->if_flags & IFF_NPOLLING) ? TRUE : FALSE));
5642 
5643 	lwkt_serialize_exit(&sc->main_serialize);
5644 }
5645