xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision e6e77800)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 #include <sys/taskqueue.h>
51 
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 #include <net/ifq_var.h>
59 #include <net/if_ringmap.h>
60 #include <net/toeplitz.h>
61 #include <net/toeplitz2.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/vlan/if_vlan_ether.h>
64 #include <net/if_poll.h>
65 
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 
70 #include <bus/pci/pcivar.h>
71 #include <bus/pci/pcireg.h>
72 
73 #include <dev/netif/ix/ixgbe_api.h>
74 #include <dev/netif/ix/if_ix.h>
75 
76 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
77 
78 #ifdef IX_RSS_DEBUG
79 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
80 do { \
81 	if (sc->rss_debug >= lvl) \
82 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
83 } while (0)
84 #else	/* !IX_RSS_DEBUG */
85 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
86 #endif	/* IX_RSS_DEBUG */
87 
88 #define IX_NAME			"Intel(R) PRO/10GbE "
89 #define IX_DEVICE(id) \
90 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
91 #define IX_DEVICE_NULL		{ 0, 0, NULL }
92 
93 static struct ix_device {
94 	uint16_t	vid;
95 	uint16_t	did;
96 	const char	*desc;
97 } ix_devices[] = {
98 	IX_DEVICE(82598AF_DUAL_PORT),
99 	IX_DEVICE(82598AF_SINGLE_PORT),
100 	IX_DEVICE(82598EB_CX4),
101 	IX_DEVICE(82598AT),
102 	IX_DEVICE(82598AT2),
103 	IX_DEVICE(82598),
104 	IX_DEVICE(82598_DA_DUAL_PORT),
105 	IX_DEVICE(82598_CX4_DUAL_PORT),
106 	IX_DEVICE(82598EB_XF_LR),
107 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
108 	IX_DEVICE(82598EB_SFP_LOM),
109 	IX_DEVICE(82599_KX4),
110 	IX_DEVICE(82599_KX4_MEZZ),
111 	IX_DEVICE(82599_SFP),
112 	IX_DEVICE(82599_XAUI_LOM),
113 	IX_DEVICE(82599_CX4),
114 	IX_DEVICE(82599_T3_LOM),
115 	IX_DEVICE(82599_COMBO_BACKPLANE),
116 	IX_DEVICE(82599_BACKPLANE_FCOE),
117 	IX_DEVICE(82599_SFP_SF2),
118 	IX_DEVICE(82599_SFP_FCOE),
119 	IX_DEVICE(82599EN_SFP),
120 	IX_DEVICE(82599_SFP_SF_QP),
121 	IX_DEVICE(82599_QSFP_SF_QP),
122 	IX_DEVICE(X540T),
123 	IX_DEVICE(X540T1),
124 	IX_DEVICE(X550T),
125 	IX_DEVICE(X550EM_X_KR),
126 	IX_DEVICE(X550EM_X_KX4),
127 	IX_DEVICE(X550EM_X_10G_T),
128 
129 	/* required last entry */
130 	IX_DEVICE_NULL
131 };
132 
133 static int	ix_probe(device_t);
134 static int	ix_attach(device_t);
135 static int	ix_detach(device_t);
136 static int	ix_shutdown(device_t);
137 
138 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
139 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
140 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
141 #ifdef INVARIANTS
142 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
143 		    boolean_t);
144 #endif
145 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
146 static void	ix_watchdog(struct ifaltq_subque *);
147 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
148 static void	ix_init(void *);
149 static void	ix_stop(struct ix_softc *);
150 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
151 static int	ix_media_change(struct ifnet *);
152 static void	ix_timer(void *);
153 #ifdef IFPOLL_ENABLE
154 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
155 static void	ix_npoll_rx(struct ifnet *, void *, int);
156 static void	ix_npoll_rx_direct(struct ifnet *, void *, int);
157 static void	ix_npoll_tx(struct ifnet *, void *, int);
158 static void	ix_npoll_status(struct ifnet *);
159 #endif
160 
161 static void	ix_add_sysctl(struct ix_softc *);
162 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
163 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
164 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
167 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
168 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
170 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
171 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
172 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
173 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
174 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
175 #if 0
176 static void     ix_add_hw_stats(struct ix_softc *);
177 #endif
178 
179 static void	ix_watchdog_reset(struct ix_softc *);
180 static void	ix_watchdog_task(void *, int);
181 static void	ix_sync_netisr(struct ix_softc *, int);
182 static void	ix_slot_info(struct ix_softc *);
183 static int	ix_alloc_rings(struct ix_softc *);
184 static void	ix_free_rings(struct ix_softc *);
185 static void	ix_setup_ifp(struct ix_softc *);
186 static void	ix_setup_serialize(struct ix_softc *);
187 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
188 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
189 static void	ix_update_stats(struct ix_softc *);
190 
191 static void	ix_set_promisc(struct ix_softc *);
192 static void	ix_set_multi(struct ix_softc *);
193 static void	ix_set_vlan(struct ix_softc *);
194 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
195 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
196 static const char *ix_ifmedia2str(int);
197 static const char *ix_fc2str(enum ixgbe_fc_mode);
198 
199 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
200 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
201 static void	ix_init_tx_ring(struct ix_tx_ring *);
202 static void	ix_free_tx_ring(struct ix_tx_ring *);
203 static int	ix_create_tx_ring(struct ix_tx_ring *);
204 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
205 static void	ix_init_tx_unit(struct ix_softc *);
206 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
207 		    uint16_t *, int *);
208 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
209 		    const struct mbuf *, uint32_t *, uint32_t *);
210 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
211 		    const struct mbuf *, uint32_t *, uint32_t *);
212 static void	ix_txeof(struct ix_tx_ring *, int);
213 static void	ix_txgc(struct ix_tx_ring *);
214 static void	ix_txgc_timer(void *);
215 
216 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
217 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
218 static int	ix_init_rx_ring(struct ix_rx_ring *);
219 static void	ix_free_rx_ring(struct ix_rx_ring *);
220 static int	ix_create_rx_ring(struct ix_rx_ring *);
221 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
222 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
223 #if 0
224 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
225 #endif
226 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
227 static void	ix_rxeof(struct ix_rx_ring *, int);
228 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
229 static void	ix_enable_rx_drop(struct ix_softc *);
230 static void	ix_disable_rx_drop(struct ix_softc *);
231 
232 static void	ix_alloc_msix(struct ix_softc *);
233 static void	ix_free_msix(struct ix_softc *, boolean_t);
234 static void	ix_setup_msix_eims(const struct ix_softc *, int,
235 		    uint32_t *, uint32_t *);
236 static int	ix_alloc_intr(struct ix_softc *);
237 static void	ix_free_intr(struct ix_softc *);
238 static int	ix_setup_intr(struct ix_softc *);
239 static void	ix_teardown_intr(struct ix_softc *, int);
240 static void	ix_enable_intr(struct ix_softc *);
241 static void	ix_disable_intr(struct ix_softc *);
242 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
243 static void	ix_set_eitr(struct ix_softc *, int, int);
244 static void	ix_intr_status(struct ix_softc *, uint32_t);
245 static void	ix_intr(void *);
246 static void	ix_msix_rxtx(void *);
247 static void	ix_msix_rx(void *);
248 static void	ix_msix_tx(void *);
249 static void	ix_msix_status(void *);
250 
251 static void	ix_config_link(struct ix_softc *);
252 static boolean_t ix_sfp_probe(struct ix_softc *);
253 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
254 static void	ix_update_link_status(struct ix_softc *);
255 static void	ix_handle_link(struct ix_softc *);
256 static void	ix_handle_mod(struct ix_softc *);
257 static void	ix_handle_msf(struct ix_softc *);
258 static void	ix_handle_phy(struct ix_softc *);
259 static int	ix_powerdown(struct ix_softc *);
260 static void	ix_config_flowctrl(struct ix_softc *);
261 static void	ix_config_dmac(struct ix_softc *);
262 static void	ix_init_media(struct ix_softc *);
263 
264 /* XXX Missing shared code prototype */
265 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
266 
267 static device_method_t ix_methods[] = {
268 	/* Device interface */
269 	DEVMETHOD(device_probe,		ix_probe),
270 	DEVMETHOD(device_attach,	ix_attach),
271 	DEVMETHOD(device_detach,	ix_detach),
272 	DEVMETHOD(device_shutdown,	ix_shutdown),
273 	DEVMETHOD_END
274 };
275 
276 static driver_t ix_driver = {
277 	"ix",
278 	ix_methods,
279 	sizeof(struct ix_softc)
280 };
281 
282 static devclass_t ix_devclass;
283 
284 DECLARE_DUMMY_MODULE(if_ix);
285 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
286 
287 static int	ix_msi_enable = 1;
288 static int	ix_msix_enable = 1;
289 static int	ix_rxr = 0;
290 static int	ix_txr = 0;
291 static int	ix_txd = IX_PERF_TXD;
292 static int	ix_rxd = IX_PERF_RXD;
293 static int	ix_unsupported_sfp = 0;
294 static int	ix_direct_input = 1;
295 
296 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_NONE;
297 
298 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
299 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
300 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
301 TUNABLE_INT("hw.ix.txr", &ix_txr);
302 TUNABLE_INT("hw.ix.txd", &ix_txd);
303 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
304 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
305 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
306 TUNABLE_INT("hw.ix.direct_input", &ix_direct_input);
307 
308 /*
309  * Smart speed setting, default to on.  This only works
310  * as a compile option right now as its during attach,
311  * set this to 'ixgbe_smart_speed_off' to disable.
312  */
313 static const enum ixgbe_smart_speed ix_smart_speed =
314     ixgbe_smart_speed_on;
315 
316 static __inline void
317 ix_try_txgc(struct ix_tx_ring *txr, int8_t dec)
318 {
319 
320 	if (txr->tx_running > 0) {
321 		txr->tx_running -= dec;
322 		if (txr->tx_running <= 0 && txr->tx_nmbuf &&
323 		    txr->tx_avail < txr->tx_ndesc &&
324 		    txr->tx_avail + txr->tx_intr_nsegs > txr->tx_ndesc)
325 			ix_txgc(txr);
326 	}
327 }
328 
329 static void
330 ix_txgc_timer(void *xtxr)
331 {
332 	struct ix_tx_ring *txr = xtxr;
333 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
334 
335 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
336 	    (IFF_RUNNING | IFF_UP))
337 		return;
338 
339 	if (!lwkt_serialize_try(&txr->tx_serialize))
340 		goto done;
341 
342 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
343 	    (IFF_RUNNING | IFF_UP)) {
344 		lwkt_serialize_exit(&txr->tx_serialize);
345 		return;
346 	}
347 	ix_try_txgc(txr, IX_TX_RUNNING_DEC);
348 
349 	lwkt_serialize_exit(&txr->tx_serialize);
350 done:
351 	callout_reset(&txr->tx_gc_timer, 1, ix_txgc_timer, txr);
352 }
353 
354 static __inline void
355 ix_tx_intr(struct ix_tx_ring *txr, int hdr)
356 {
357 
358 	ix_txeof(txr, hdr);
359 	if (!ifsq_is_empty(txr->tx_ifsq))
360 		ifsq_devstart(txr->tx_ifsq);
361 }
362 
363 static __inline void
364 ix_free_txbuf(struct ix_tx_ring *txr, struct ix_tx_buf *txbuf)
365 {
366 
367 	KKASSERT(txbuf->m_head != NULL);
368 	KKASSERT(txr->tx_nmbuf > 0);
369 	txr->tx_nmbuf--;
370 
371 	bus_dmamap_unload(txr->tx_tag, txbuf->map);
372 	m_freem(txbuf->m_head);
373 	txbuf->m_head = NULL;
374 }
375 
376 static int
377 ix_probe(device_t dev)
378 {
379 	const struct ix_device *d;
380 	uint16_t vid, did;
381 
382 	vid = pci_get_vendor(dev);
383 	did = pci_get_device(dev);
384 
385 	for (d = ix_devices; d->desc != NULL; ++d) {
386 		if (vid == d->vid && did == d->did) {
387 			device_set_desc(dev, d->desc);
388 			return 0;
389 		}
390 	}
391 	return ENXIO;
392 }
393 
394 static void
395 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
396 {
397 
398 	switch (sc->hw.mac.type) {
399 	case ixgbe_mac_X550:
400 	case ixgbe_mac_X550EM_x:
401 	case ixgbe_mac_X550EM_a:
402 		*ring_cntmax = IX_MAX_RXRING_X550;
403 		break;
404 
405 	default:
406 		*ring_cntmax = IX_MAX_RXRING;
407 		break;
408 	}
409 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
410 }
411 
412 static void
413 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
414 {
415 
416 	switch (sc->hw.mac.type) {
417 	case ixgbe_mac_82598EB:
418 		*ring_cntmax = IX_MAX_TXRING_82598;
419 		break;
420 
421 	case ixgbe_mac_82599EB:
422 		*ring_cntmax = IX_MAX_TXRING_82599;
423 		break;
424 
425 	case ixgbe_mac_X540:
426 		*ring_cntmax = IX_MAX_TXRING_X540;
427 		break;
428 
429 	case ixgbe_mac_X550:
430 	case ixgbe_mac_X550EM_x:
431 	case ixgbe_mac_X550EM_a:
432 		*ring_cntmax = IX_MAX_TXRING_X550;
433 		break;
434 
435 	default:
436 		*ring_cntmax = IX_MAX_TXRING;
437 		break;
438 	}
439 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
440 }
441 
442 static int
443 ix_attach(device_t dev)
444 {
445 	struct ix_softc *sc = device_get_softc(dev);
446 	struct ixgbe_hw *hw;
447 	int error, ring_cnt, ring_cntmax;
448 	uint16_t csum;
449 	uint32_t ctrl_ext;
450 	char flowctrl[IFM_ETH_FC_STRLEN];
451 
452 	sc->dev = sc->osdep.dev = dev;
453 	hw = &sc->hw;
454 
455 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
456 	    device_get_unit(dev));
457 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
458 	    ix_media_change, ix_media_status);
459 
460 	/* Save frame size */
461 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
462 
463 	sc->direct_input = ix_direct_input;
464 	TASK_INIT(&sc->wdog_task, 0, ix_watchdog_task, sc);
465 
466 	callout_init_mp(&sc->timer);
467 	lwkt_serialize_init(&sc->main_serialize);
468 
469 	/*
470 	 * Save off the information about this board
471 	 */
472 	hw->vendor_id = pci_get_vendor(dev);
473 	hw->device_id = pci_get_device(dev);
474 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
475 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
476 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
477 
478 	ixgbe_set_mac_type(hw);
479 
480 	/* Pick up the 82599 */
481 	if (hw->mac.type != ixgbe_mac_82598EB)
482 		hw->phy.smart_speed = ix_smart_speed;
483 
484 	/* Enable bus mastering */
485 	pci_enable_busmaster(dev);
486 
487 	/*
488 	 * Allocate IO memory
489 	 */
490 	sc->mem_rid = PCIR_BAR(0);
491 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 	    &sc->mem_rid, RF_ACTIVE);
493 	if (sc->mem_res == NULL) {
494 		device_printf(dev, "Unable to allocate bus resource: memory\n");
495 		error = ENXIO;
496 		goto failed;
497 	}
498 
499 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
500 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
501 
502 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
503 	sc->hw.back = &sc->osdep;
504 
505 	/*
506 	 * Configure total supported RX/TX ring count
507 	 */
508 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
509 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
510 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
511 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
512 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
513 
514 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
515 	sc->rx_ring_inuse = sc->rx_ring_cnt;
516 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
517 	sc->tx_ring_inuse = sc->tx_ring_cnt;
518 
519 	/* Allocate TX/RX rings */
520 	error = ix_alloc_rings(sc);
521 	if (error)
522 		goto failed;
523 
524 	/* Allocate interrupt */
525 	error = ix_alloc_intr(sc);
526 	if (error)
527 		goto failed;
528 
529 	/* Setup serializes */
530 	ix_setup_serialize(sc);
531 
532 	/* Allocate multicast array memory. */
533 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
534 	    M_DEVBUF, M_WAITOK);
535 
536 	/* Initialize the shared code */
537 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
538 	error = ixgbe_init_shared_code(hw);
539 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
540 		/*
541 		 * No optics in this port; ask timer routine
542 		 * to probe for later insertion.
543 		 */
544 		sc->sfp_probe = TRUE;
545 		error = 0;
546 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
547 		device_printf(dev, "Unsupported SFP+ module detected!\n");
548 		error = EIO;
549 		goto failed;
550 	} else if (error) {
551 		device_printf(dev, "Unable to initialize the shared code\n");
552 		error = EIO;
553 		goto failed;
554 	}
555 
556 	/* Make sure we have a good EEPROM before we read from it */
557 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
558 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
559 		error = EIO;
560 		goto failed;
561 	}
562 
563 	error = ixgbe_init_hw(hw);
564 	if (error == IXGBE_ERR_EEPROM_VERSION) {
565 		device_printf(dev, "Pre-production device detected\n");
566 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
567 		device_printf(dev, "Unsupported SFP+ Module\n");
568 		error = EIO;
569 		goto failed;
570 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
571 		device_printf(dev, "No SFP+ Module found\n");
572 	}
573 
574 	sc->ifm_media = IX_IFM_DEFAULT;
575 	/* Get default flow control settings */
576 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
577 	    ix_flowctrl);
578 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
579 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
580 
581 	/* Setup OS specific network interface */
582 	ix_setup_ifp(sc);
583 
584 	/* Add sysctl tree */
585 	ix_add_sysctl(sc);
586 
587 	error = ix_setup_intr(sc);
588 	if (error) {
589 		ether_ifdetach(&sc->arpcom.ac_if);
590 		goto failed;
591 	}
592 
593 	/* Initialize statistics */
594 	ix_update_stats(sc);
595 
596 	/* Check PCIE slot type/speed/width */
597 	ix_slot_info(sc);
598 
599 	/* Save initial wake up filter configuration */
600 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
601 
602 	/* Let hardware know driver is loaded */
603 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
604 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
605 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
606 
607 	return 0;
608 failed:
609 	ix_detach(dev);
610 	return error;
611 }
612 
613 static int
614 ix_detach(device_t dev)
615 {
616 	struct ix_softc *sc = device_get_softc(dev);
617 
618 	if (device_is_attached(dev)) {
619 		struct ifnet *ifp = &sc->arpcom.ac_if;
620 		uint32_t ctrl_ext;
621 
622 		ix_sync_netisr(sc, IFF_UP);
623 		taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
624 
625 		ifnet_serialize_all(ifp);
626 
627 		ix_powerdown(sc);
628 		ix_teardown_intr(sc, sc->intr_cnt);
629 
630 		ifnet_deserialize_all(ifp);
631 
632 		callout_terminate(&sc->timer);
633 		ether_ifdetach(ifp);
634 
635 		/* Let hardware know driver is unloading */
636 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
637 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
638 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
639 	}
640 
641 	ifmedia_removeall(&sc->media);
642 	bus_generic_detach(dev);
643 
644 	ix_free_intr(sc);
645 
646 	if (sc->msix_mem_res != NULL) {
647 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
648 		    sc->msix_mem_res);
649 	}
650 	if (sc->mem_res != NULL) {
651 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
652 		    sc->mem_res);
653 	}
654 
655 	ix_free_rings(sc);
656 
657 	if (sc->mta != NULL)
658 		kfree(sc->mta, M_DEVBUF);
659 	if (sc->serializes != NULL)
660 		kfree(sc->serializes, M_DEVBUF);
661 
662 	if (sc->rx_rmap != NULL)
663 		if_ringmap_free(sc->rx_rmap);
664 	if (sc->rx_rmap_intr != NULL)
665 		if_ringmap_free(sc->rx_rmap_intr);
666 	if (sc->tx_rmap != NULL)
667 		if_ringmap_free(sc->tx_rmap);
668 	if (sc->tx_rmap_intr != NULL)
669 		if_ringmap_free(sc->tx_rmap_intr);
670 
671 	return 0;
672 }
673 
674 static int
675 ix_shutdown(device_t dev)
676 {
677 	struct ix_softc *sc = device_get_softc(dev);
678 	struct ifnet *ifp = &sc->arpcom.ac_if;
679 
680 	ix_sync_netisr(sc, IFF_UP);
681 	taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
682 
683 	ifnet_serialize_all(ifp);
684 	ix_powerdown(sc);
685 	ifnet_deserialize_all(ifp);
686 
687 	return 0;
688 }
689 
690 static void
691 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
692 {
693 	struct ix_softc *sc = ifp->if_softc;
694 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
695 	int idx = -1;
696 	uint16_t nsegs;
697 
698 	KKASSERT(txr->tx_ifsq == ifsq);
699 	ASSERT_SERIALIZED(&txr->tx_serialize);
700 
701 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
702 		return;
703 
704 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
705 		ifsq_purge(ifsq);
706 		return;
707 	}
708 
709 	while (!ifsq_is_empty(ifsq)) {
710 		struct mbuf *m_head;
711 
712 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
713 			ifsq_set_oactive(ifsq);
714 			txr->tx_watchdog.wd_timer = 5;
715 			break;
716 		}
717 
718 		m_head = ifsq_dequeue(ifsq);
719 		if (m_head == NULL)
720 			break;
721 
722 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
723 			IFNET_STAT_INC(ifp, oerrors, 1);
724 			continue;
725 		}
726 
727 		/*
728 		 * TX interrupt are aggressively aggregated, so increasing
729 		 * opackets at TX interrupt time will make the opackets
730 		 * statistics vastly inaccurate; we do the opackets increment
731 		 * now.
732 		 */
733 		IFNET_STAT_INC(ifp, opackets, 1);
734 
735 		if (nsegs >= txr->tx_wreg_nsegs) {
736 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
737 			nsegs = 0;
738 			idx = -1;
739 		}
740 
741 		ETHER_BPF_MTAP(ifp, m_head);
742 	}
743 	if (idx >= 0)
744 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
745 	txr->tx_running = IX_TX_RUNNING;
746 }
747 
748 static int
749 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
750 {
751 	struct ix_softc *sc = ifp->if_softc;
752 	struct ifreq *ifr = (struct ifreq *) data;
753 	int error = 0, mask, reinit;
754 
755 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
756 
757 	switch (command) {
758 	case SIOCSIFMTU:
759 		if (ifr->ifr_mtu > IX_MAX_MTU) {
760 			error = EINVAL;
761 		} else {
762 			ifp->if_mtu = ifr->ifr_mtu;
763 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
764 			ix_init(sc);
765 		}
766 		break;
767 
768 	case SIOCSIFFLAGS:
769 		if (ifp->if_flags & IFF_UP) {
770 			if (ifp->if_flags & IFF_RUNNING) {
771 				if ((ifp->if_flags ^ sc->if_flags) &
772 				    (IFF_PROMISC | IFF_ALLMULTI))
773 					ix_set_promisc(sc);
774 			} else {
775 				ix_init(sc);
776 			}
777 		} else if (ifp->if_flags & IFF_RUNNING) {
778 			ix_stop(sc);
779 		}
780 		sc->if_flags = ifp->if_flags;
781 		break;
782 
783 	case SIOCADDMULTI:
784 	case SIOCDELMULTI:
785 		if (ifp->if_flags & IFF_RUNNING) {
786 			ix_disable_intr(sc);
787 			ix_set_multi(sc);
788 #ifdef IFPOLL_ENABLE
789 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
790 #endif
791 				ix_enable_intr(sc);
792 		}
793 		break;
794 
795 	case SIOCSIFMEDIA:
796 	case SIOCGIFMEDIA:
797 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
798 		break;
799 
800 	case SIOCSIFCAP:
801 		reinit = 0;
802 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
803 		if (mask & IFCAP_RXCSUM) {
804 			ifp->if_capenable ^= IFCAP_RXCSUM;
805 			reinit = 1;
806 		}
807 		if (mask & IFCAP_VLAN_HWTAGGING) {
808 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
809 			reinit = 1;
810 		}
811 		if (mask & IFCAP_TXCSUM) {
812 			ifp->if_capenable ^= IFCAP_TXCSUM;
813 			if (ifp->if_capenable & IFCAP_TXCSUM)
814 				ifp->if_hwassist |= CSUM_OFFLOAD;
815 			else
816 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
817 		}
818 		if (mask & IFCAP_TSO) {
819 			ifp->if_capenable ^= IFCAP_TSO;
820 			if (ifp->if_capenable & IFCAP_TSO)
821 				ifp->if_hwassist |= CSUM_TSO;
822 			else
823 				ifp->if_hwassist &= ~CSUM_TSO;
824 		}
825 		if (mask & IFCAP_RSS)
826 			ifp->if_capenable ^= IFCAP_RSS;
827 		if (reinit && (ifp->if_flags & IFF_RUNNING))
828 			ix_init(sc);
829 		break;
830 
831 #if 0
832 	case SIOCGI2C:
833 	{
834 		struct ixgbe_i2c_req	i2c;
835 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
836 		if (error)
837 			break;
838 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
839 			error = EINVAL;
840 			break;
841 		}
842 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
843 		    i2c.dev_addr, i2c.data);
844 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
845 		break;
846 	}
847 #endif
848 
849 	default:
850 		error = ether_ioctl(ifp, command, data);
851 		break;
852 	}
853 	return error;
854 }
855 
856 #define IXGBE_MHADD_MFS_SHIFT 16
857 
858 static void
859 ix_init(void *xsc)
860 {
861 	struct ix_softc *sc = xsc;
862 	struct ifnet *ifp = &sc->arpcom.ac_if;
863 	struct ixgbe_hw *hw = &sc->hw;
864 	uint32_t gpie, rxctrl;
865 	int i, error;
866 	boolean_t polling;
867 
868 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
869 
870 	ix_stop(sc);
871 
872 	polling = FALSE;
873 #ifdef IFPOLL_ENABLE
874 	if (ifp->if_flags & IFF_NPOLLING)
875 		polling = TRUE;
876 #endif
877 
878 	/* Configure # of used RX/TX rings */
879 	ix_set_ring_inuse(sc, polling);
880 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
881 
882 	/* Get the latest mac address, User can use a LAA */
883 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
884 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
885 	hw->addr_ctrl.rar_used_count = 1;
886 
887 	/* Prepare transmit descriptors and buffers */
888 	for (i = 0; i < sc->tx_ring_inuse; ++i)
889 		ix_init_tx_ring(&sc->tx_rings[i]);
890 
891 	ixgbe_init_hw(hw);
892 	ix_init_tx_unit(sc);
893 
894 	/* Setup Multicast table */
895 	ix_set_multi(sc);
896 
897 	/* Prepare receive descriptors and buffers */
898 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
899 		error = ix_init_rx_ring(&sc->rx_rings[i]);
900 		if (error) {
901 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
902 			ix_stop(sc);
903 			return;
904 		}
905 	}
906 
907 	/* Configure RX settings */
908 	ix_init_rx_unit(sc, polling);
909 
910 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
911 
912 	/* Enable Fan Failure Interrupt */
913 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
914 
915 	/* Add for Module detection */
916 	if (hw->mac.type == ixgbe_mac_82599EB)
917 		gpie |= IXGBE_SDP2_GPIEN;
918 
919 	/*
920 	 * Thermal Failure Detection (X540)
921 	 * Link Detection (X552)
922 	 */
923 	if (hw->mac.type == ixgbe_mac_X540 ||
924 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
925 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
926 		gpie |= IXGBE_SDP0_GPIEN_X540;
927 
928 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
929 		/* Enable Enhanced MSIX mode */
930 		gpie |= IXGBE_GPIE_MSIX_MODE;
931 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
932 		    IXGBE_GPIE_OCD;
933 	}
934 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
935 
936 	/* Set MTU size */
937 	if (ifp->if_mtu > ETHERMTU) {
938 		uint32_t mhadd;
939 
940 		/* aka IXGBE_MAXFRS on 82599 and newer */
941 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
942 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
943 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
944 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
945 	}
946 
947 	/*
948 	 * Enable TX rings
949 	 */
950 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
951 		uint32_t txdctl;
952 
953 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
954 		txdctl |= IXGBE_TXDCTL_ENABLE;
955 
956 		/*
957 		 * Set WTHRESH to 0, since TX head write-back is used
958 		 */
959 		txdctl &= ~(0x7f << 16);
960 
961 		/*
962 		 * When the internal queue falls below PTHRESH (32),
963 		 * start prefetching as long as there are at least
964 		 * HTHRESH (1) buffers ready. The values are taken
965 		 * from the Intel linux driver 3.8.21.
966 		 * Prefetching enables tx line rate even with 1 queue.
967 		 */
968 		txdctl |= (32 << 0) | (1 << 8);
969 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
970 	}
971 
972 	/*
973 	 * Enable RX rings
974 	 */
975 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
976 		uint32_t rxdctl;
977 		int k;
978 
979 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
980 		if (hw->mac.type == ixgbe_mac_82598EB) {
981 			/*
982 			 * PTHRESH = 21
983 			 * HTHRESH = 4
984 			 * WTHRESH = 8
985 			 */
986 			rxdctl &= ~0x3FFFFF;
987 			rxdctl |= 0x080420;
988 		}
989 		rxdctl |= IXGBE_RXDCTL_ENABLE;
990 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
991 		for (k = 0; k < 10; ++k) {
992 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
993 			    IXGBE_RXDCTL_ENABLE)
994 				break;
995 			else
996 				msec_delay(1);
997 		}
998 		wmb();
999 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
1000 		    sc->rx_rings[0].rx_ndesc - 1);
1001 	}
1002 
1003 	/* Enable Receive engine */
1004 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1005 	if (hw->mac.type == ixgbe_mac_82598EB)
1006 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
1007 	rxctrl |= IXGBE_RXCTRL_RXEN;
1008 	ixgbe_enable_rx_dma(hw, rxctrl);
1009 
1010 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1011 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
1012 
1013 		if (txr->tx_intr_vec >= 0) {
1014 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
1015 		} else if (!polling) {
1016 			/*
1017 			 * Unconfigured TX interrupt vector could only
1018 			 * happen for MSI-X.
1019 			 */
1020 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
1021 			    ("TX intr vector is not set"));
1022 			if (bootverbose)
1023 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
1024 		}
1025 	}
1026 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
1027 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
1028 
1029 		if (polling && rxr->rx_intr_vec < 0)
1030 			continue;
1031 
1032 		KKASSERT(rxr->rx_intr_vec >= 0);
1033 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
1034 		if (rxr->rx_txr != NULL) {
1035 			/*
1036 			 * Piggyback the TX ring interrupt onto the RX
1037 			 * ring interrupt vector.
1038 			 */
1039 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
1040 			    ("piggybacked TX ring configured intr vector"));
1041 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
1042 			    rxr->rx_intr_vec, 1);
1043 			if (bootverbose) {
1044 				if_printf(ifp, "IVAR RX ring %d piggybacks "
1045 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
1046 			}
1047 		}
1048 	}
1049 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
1050 		/* Set up status MSI-X vector; it is using fixed entry 1 */
1051 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
1052 
1053 		/* Set up auto-mask for TX and RX rings */
1054 		if (hw->mac.type == ixgbe_mac_82598EB) {
1055 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1056 		} else {
1057 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1058 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1059 		}
1060 	} else {
1061 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1062 	}
1063 	for (i = 0; i < sc->intr_cnt; ++i)
1064 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1065 
1066 	/*
1067 	 * Check on any SFP devices that need to be kick-started
1068 	 */
1069 	if (hw->phy.type == ixgbe_phy_none) {
1070 		error = hw->phy.ops.identify(hw);
1071 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1072 			if_printf(ifp,
1073 			    "Unsupported SFP+ module type was detected.\n");
1074 			/* XXX stop */
1075 			return;
1076 		}
1077 	}
1078 
1079 	/* Config/Enable Link */
1080 	ix_config_link(sc);
1081 
1082 	/* Hardware Packet Buffer & Flow Control setup */
1083 	ix_config_flowctrl(sc);
1084 
1085 	/* Initialize the FC settings */
1086 	ixgbe_start_hw(hw);
1087 
1088 	/* Set up VLAN support and filter */
1089 	ix_set_vlan(sc);
1090 
1091 	/* Setup DMA Coalescing */
1092 	ix_config_dmac(sc);
1093 
1094 	/*
1095 	 * Only enable interrupts if we are not polling, make sure
1096 	 * they are off otherwise.
1097 	 */
1098 	if (polling)
1099 		ix_disable_intr(sc);
1100 	else
1101 		ix_enable_intr(sc);
1102 
1103 	ifp->if_flags |= IFF_RUNNING;
1104 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1105 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1106 
1107 		ifsq_clr_oactive(txr->tx_ifsq);
1108 		ifsq_watchdog_start(&txr->tx_watchdog);
1109 
1110 		if (!polling) {
1111 			callout_reset_bycpu(&txr->tx_gc_timer, 1,
1112 			    ix_txgc_timer, txr, txr->tx_intr_cpuid);
1113 		}
1114 	}
1115 
1116 	ix_set_timer_cpuid(sc, polling);
1117 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1118 }
1119 
1120 static void
1121 ix_intr(void *xsc)
1122 {
1123 	struct ix_softc *sc = xsc;
1124 	struct ixgbe_hw	*hw = &sc->hw;
1125 	uint32_t eicr;
1126 
1127 	ASSERT_SERIALIZED(&sc->main_serialize);
1128 
1129 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1130 	if (eicr == 0) {
1131 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1132 		return;
1133 	}
1134 
1135 	if (eicr & IX_RX0_INTR_MASK) {
1136 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1137 
1138 		lwkt_serialize_enter(&rxr->rx_serialize);
1139 		ix_rxeof(rxr, -1);
1140 		lwkt_serialize_exit(&rxr->rx_serialize);
1141 	}
1142 	if (eicr & IX_RX1_INTR_MASK) {
1143 		struct ix_rx_ring *rxr;
1144 
1145 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1146 		rxr = &sc->rx_rings[1];
1147 
1148 		lwkt_serialize_enter(&rxr->rx_serialize);
1149 		ix_rxeof(rxr, -1);
1150 		lwkt_serialize_exit(&rxr->rx_serialize);
1151 	}
1152 
1153 	if (eicr & IX_TX_INTR_MASK) {
1154 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1155 
1156 		lwkt_serialize_enter(&txr->tx_serialize);
1157 		ix_tx_intr(txr, *(txr->tx_hdr));
1158 		lwkt_serialize_exit(&txr->tx_serialize);
1159 	}
1160 
1161 	if (__predict_false(eicr & IX_EICR_STATUS))
1162 		ix_intr_status(sc, eicr);
1163 
1164 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1165 }
1166 
1167 static void
1168 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1169 {
1170 	struct ix_softc *sc = ifp->if_softc;
1171 	struct ifmedia *ifm = &sc->media;
1172 	int layer;
1173 
1174 	ix_update_link_status(sc);
1175 
1176 	ifmr->ifm_status = IFM_AVALID;
1177 	ifmr->ifm_active = IFM_ETHER;
1178 
1179 	if (!sc->link_active) {
1180 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1181 			ifmr->ifm_active |= ifm->ifm_media;
1182 		else
1183 			ifmr->ifm_active |= IFM_NONE;
1184 		return;
1185 	}
1186 	ifmr->ifm_status |= IFM_ACTIVE;
1187 
1188 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1189 
1190 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1191 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1192 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1193 		switch (sc->link_speed) {
1194 		case IXGBE_LINK_SPEED_10GB_FULL:
1195 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1196 			break;
1197 		case IXGBE_LINK_SPEED_1GB_FULL:
1198 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1199 			break;
1200 		case IXGBE_LINK_SPEED_100_FULL:
1201 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1202 			break;
1203 		}
1204 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1205 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1206 		switch (sc->link_speed) {
1207 		case IXGBE_LINK_SPEED_10GB_FULL:
1208 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1209 			break;
1210 		}
1211 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1212 		switch (sc->link_speed) {
1213 		case IXGBE_LINK_SPEED_10GB_FULL:
1214 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1215 			break;
1216 		case IXGBE_LINK_SPEED_1GB_FULL:
1217 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1218 			break;
1219 		}
1220 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1221 		switch (sc->link_speed) {
1222 		case IXGBE_LINK_SPEED_10GB_FULL:
1223 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1224 			break;
1225 		case IXGBE_LINK_SPEED_1GB_FULL:
1226 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1227 			break;
1228 		}
1229 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1230 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1231 		switch (sc->link_speed) {
1232 		case IXGBE_LINK_SPEED_10GB_FULL:
1233 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1234 			break;
1235 		case IXGBE_LINK_SPEED_1GB_FULL:
1236 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1237 			break;
1238 		}
1239 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1240 		switch (sc->link_speed) {
1241 		case IXGBE_LINK_SPEED_10GB_FULL:
1242 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1243 			break;
1244 		}
1245 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1246 		/*
1247 		 * XXX: These need to use the proper media types once
1248 		 * they're added.
1249 		 */
1250 		switch (sc->link_speed) {
1251 		case IXGBE_LINK_SPEED_10GB_FULL:
1252 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1253 			break;
1254 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1255 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1256 			break;
1257 		case IXGBE_LINK_SPEED_1GB_FULL:
1258 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1259 			break;
1260 		}
1261 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1262 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1263 		/*
1264 		 * XXX: These need to use the proper media types once
1265 		 * they're added.
1266 		 */
1267 		switch (sc->link_speed) {
1268 		case IXGBE_LINK_SPEED_10GB_FULL:
1269 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1270 			break;
1271 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1272 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1273 			break;
1274 		case IXGBE_LINK_SPEED_1GB_FULL:
1275 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1276 			break;
1277 		}
1278 	}
1279 
1280 	/* If nothing is recognized... */
1281 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1282 		ifmr->ifm_active |= IFM_NONE;
1283 
1284 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1285 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1286 
1287 	switch (sc->hw.fc.current_mode) {
1288 	case ixgbe_fc_full:
1289 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1290 		break;
1291 	case ixgbe_fc_rx_pause:
1292 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1293 		break;
1294 	case ixgbe_fc_tx_pause:
1295 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1296 		break;
1297 	default:
1298 		break;
1299 	}
1300 }
1301 
1302 static int
1303 ix_media_change(struct ifnet *ifp)
1304 {
1305 	struct ix_softc *sc = ifp->if_softc;
1306 	struct ifmedia *ifm = &sc->media;
1307 	struct ixgbe_hw *hw = &sc->hw;
1308 
1309 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1310 		return (EINVAL);
1311 
1312 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1313 	    hw->mac.ops.setup_link == NULL) {
1314 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1315 			/* Only flow control setting changes are allowed */
1316 			return (EOPNOTSUPP);
1317 		}
1318 	}
1319 
1320 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1321 	case IFM_AUTO:
1322 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1323 		break;
1324 
1325 	case IFM_10G_T:
1326 	case IFM_10G_LRM:
1327 	case IFM_10G_SR:	/* XXX also KR */
1328 	case IFM_10G_LR:
1329 	case IFM_10G_CX4:	/* XXX also KX4 */
1330 	case IFM_10G_TWINAX:
1331 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1332 		break;
1333 
1334 	case IFM_1000_T:
1335 	case IFM_1000_LX:
1336 	case IFM_1000_SX:
1337 	case IFM_1000_CX:	/* XXX is KX */
1338 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1339 		break;
1340 
1341 	case IFM_100_TX:
1342 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1343 		break;
1344 
1345 	default:
1346 		if (bootverbose) {
1347 			if_printf(ifp, "Invalid media type %d!\n",
1348 			    ifm->ifm_media);
1349 		}
1350 		return EINVAL;
1351 	}
1352 	sc->ifm_media = ifm->ifm_media;
1353 
1354 #if 0
1355 	if (hw->mac.ops.setup_link != NULL) {
1356 		hw->mac.autotry_restart = TRUE;
1357 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1358 	}
1359 #else
1360 	if (ifp->if_flags & IFF_RUNNING)
1361 		ix_init(sc);
1362 #endif
1363 	return 0;
1364 }
1365 
1366 static __inline int
1367 ix_tso_pullup(struct mbuf **mp)
1368 {
1369 	int hoff, iphlen, thoff;
1370 	struct mbuf *m;
1371 
1372 	m = *mp;
1373 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1374 
1375 	iphlen = m->m_pkthdr.csum_iphlen;
1376 	thoff = m->m_pkthdr.csum_thlen;
1377 	hoff = m->m_pkthdr.csum_lhlen;
1378 
1379 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1380 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1381 	KASSERT(hoff > 0, ("invalid ether hlen"));
1382 
1383 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1384 		m = m_pullup(m, hoff + iphlen + thoff);
1385 		if (m == NULL) {
1386 			*mp = NULL;
1387 			return ENOBUFS;
1388 		}
1389 		*mp = m;
1390 	}
1391 	return 0;
1392 }
1393 
1394 static int
1395 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1396     uint16_t *segs_used, int *idx)
1397 {
1398 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1399 	int i, j, error, nsegs, first, maxsegs;
1400 	struct mbuf *m_head = *m_headp;
1401 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1402 	bus_dmamap_t map;
1403 	struct ix_tx_buf *txbuf;
1404 	union ixgbe_adv_tx_desc *txd = NULL;
1405 
1406 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1407 		error = ix_tso_pullup(m_headp);
1408 		if (__predict_false(error))
1409 			return error;
1410 		m_head = *m_headp;
1411 	}
1412 
1413 	/* Basic descriptor defines */
1414 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1415 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1416 
1417 	if (m_head->m_flags & M_VLANTAG)
1418 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1419 
1420 	/*
1421 	 * Important to capture the first descriptor
1422 	 * used because it will contain the index of
1423 	 * the one we tell the hardware to report back
1424 	 */
1425 	first = txr->tx_next_avail;
1426 	txbuf = &txr->tx_buf[first];
1427 	map = txbuf->map;
1428 
1429 	/*
1430 	 * Map the packet for DMA.
1431 	 */
1432 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1433 	if (maxsegs > IX_MAX_SCATTER)
1434 		maxsegs = IX_MAX_SCATTER;
1435 
1436 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1437 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1438 	if (__predict_false(error)) {
1439 		m_freem(*m_headp);
1440 		*m_headp = NULL;
1441 		return error;
1442 	}
1443 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1444 
1445 	m_head = *m_headp;
1446 
1447 	/*
1448 	 * Set up the appropriate offload context if requested,
1449 	 * this may consume one TX descriptor.
1450 	 */
1451 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1452 		(*segs_used)++;
1453 		txr->tx_nsegs++;
1454 	}
1455 
1456 	*segs_used += nsegs;
1457 	txr->tx_nsegs += nsegs;
1458 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1459 		/*
1460 		 * Report Status (RS) is turned on every intr_nsegs
1461 		 * descriptors (roughly).
1462 		 */
1463 		txr->tx_nsegs = 0;
1464 		cmd_rs = IXGBE_TXD_CMD_RS;
1465 	}
1466 
1467 	i = txr->tx_next_avail;
1468 	for (j = 0; j < nsegs; j++) {
1469 		bus_size_t seglen;
1470 		bus_addr_t segaddr;
1471 
1472 		txbuf = &txr->tx_buf[i];
1473 		txd = &txr->tx_base[i];
1474 		seglen = segs[j].ds_len;
1475 		segaddr = htole64(segs[j].ds_addr);
1476 
1477 		txd->read.buffer_addr = segaddr;
1478 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1479 		    cmd_type_len |seglen);
1480 		txd->read.olinfo_status = htole32(olinfo_status);
1481 
1482 		if (++i == txr->tx_ndesc)
1483 			i = 0;
1484 	}
1485 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1486 
1487 	txr->tx_avail -= nsegs;
1488 	txr->tx_next_avail = i;
1489 	txr->tx_nmbuf++;
1490 
1491 	txbuf->m_head = m_head;
1492 	txr->tx_buf[first].map = txbuf->map;
1493 	txbuf->map = map;
1494 
1495 	/*
1496 	 * Defer TDT updating, until enough descrptors are setup
1497 	 */
1498 	*idx = i;
1499 
1500 	return 0;
1501 }
1502 
1503 static void
1504 ix_set_promisc(struct ix_softc *sc)
1505 {
1506 	struct ifnet *ifp = &sc->arpcom.ac_if;
1507 	uint32_t reg_rctl;
1508 	int mcnt = 0;
1509 
1510 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1511 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1512 	if (ifp->if_flags & IFF_ALLMULTI) {
1513 		mcnt = IX_MAX_MCASTADDR;
1514 	} else {
1515 		struct ifmultiaddr *ifma;
1516 
1517 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1518 			if (ifma->ifma_addr->sa_family != AF_LINK)
1519 				continue;
1520 			if (mcnt == IX_MAX_MCASTADDR)
1521 				break;
1522 			mcnt++;
1523 		}
1524 	}
1525 	if (mcnt < IX_MAX_MCASTADDR)
1526 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1527 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1528 
1529 	if (ifp->if_flags & IFF_PROMISC) {
1530 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1531 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1532 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1533 		reg_rctl |= IXGBE_FCTRL_MPE;
1534 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1535 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1536 	}
1537 }
1538 
1539 static void
1540 ix_set_multi(struct ix_softc *sc)
1541 {
1542 	struct ifnet *ifp = &sc->arpcom.ac_if;
1543 	struct ifmultiaddr *ifma;
1544 	uint32_t fctrl;
1545 	uint8_t	*mta;
1546 	int mcnt = 0;
1547 
1548 	mta = sc->mta;
1549 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1550 
1551 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1552 		if (ifma->ifma_addr->sa_family != AF_LINK)
1553 			continue;
1554 		if (mcnt == IX_MAX_MCASTADDR)
1555 			break;
1556 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1557 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1558 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1559 		mcnt++;
1560 	}
1561 
1562 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1563 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1564 	if (ifp->if_flags & IFF_PROMISC) {
1565 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1566 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1567 		fctrl |= IXGBE_FCTRL_MPE;
1568 		fctrl &= ~IXGBE_FCTRL_UPE;
1569 	} else {
1570 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1571 	}
1572 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1573 
1574 	if (mcnt < IX_MAX_MCASTADDR) {
1575 		ixgbe_update_mc_addr_list(&sc->hw,
1576 		    mta, mcnt, ix_mc_array_itr, TRUE);
1577 	}
1578 }
1579 
1580 /*
1581  * This is an iterator function now needed by the multicast
1582  * shared code. It simply feeds the shared code routine the
1583  * addresses in the array of ix_set_multi() one by one.
1584  */
1585 static uint8_t *
1586 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1587 {
1588 	uint8_t *addr = *update_ptr;
1589 	uint8_t *newptr;
1590 	*vmdq = 0;
1591 
1592 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1593 	*update_ptr = newptr;
1594 	return addr;
1595 }
1596 
1597 static void
1598 ix_timer(void *arg)
1599 {
1600 	struct ix_softc *sc = arg;
1601 
1602 	lwkt_serialize_enter(&sc->main_serialize);
1603 
1604 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1605 		lwkt_serialize_exit(&sc->main_serialize);
1606 		return;
1607 	}
1608 
1609 	/* Check for pluggable optics */
1610 	if (sc->sfp_probe) {
1611 		if (!ix_sfp_probe(sc))
1612 			goto done; /* Nothing to do */
1613 	}
1614 
1615 	ix_update_link_status(sc);
1616 	ix_update_stats(sc);
1617 
1618 done:
1619 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1620 	lwkt_serialize_exit(&sc->main_serialize);
1621 }
1622 
1623 static void
1624 ix_update_link_status(struct ix_softc *sc)
1625 {
1626 	struct ifnet *ifp = &sc->arpcom.ac_if;
1627 
1628 	if (sc->link_up) {
1629 		if (sc->link_active == FALSE) {
1630 			if (bootverbose) {
1631 				if_printf(ifp, "Link is up %d Gbps %s\n",
1632 				    sc->link_speed == 128 ? 10 : 1,
1633 				    "Full Duplex");
1634 			}
1635 
1636 			/*
1637 			 * Update any Flow Control changes
1638 			 */
1639 			ixgbe_fc_enable(&sc->hw);
1640 			/* MUST after ixgbe_fc_enable() */
1641 			if (sc->rx_ring_inuse > 1) {
1642 				switch (sc->hw.fc.current_mode) {
1643 				case ixgbe_fc_rx_pause:
1644 				case ixgbe_fc_tx_pause:
1645 				case ixgbe_fc_full:
1646 					ix_disable_rx_drop(sc);
1647 					break;
1648 
1649 				case ixgbe_fc_none:
1650 					ix_enable_rx_drop(sc);
1651 					break;
1652 
1653 				default:
1654 					break;
1655 				}
1656 			}
1657 
1658 			/* Update DMA coalescing config */
1659 			ix_config_dmac(sc);
1660 
1661 			sc->link_active = TRUE;
1662 
1663 			ifp->if_link_state = LINK_STATE_UP;
1664 			if_link_state_change(ifp);
1665 		}
1666 	} else { /* Link down */
1667 		if (sc->link_active == TRUE) {
1668 			if (bootverbose)
1669 				if_printf(ifp, "Link is Down\n");
1670 			ifp->if_link_state = LINK_STATE_DOWN;
1671 			if_link_state_change(ifp);
1672 
1673 			sc->link_active = FALSE;
1674 		}
1675 	}
1676 }
1677 
1678 static void
1679 ix_stop(struct ix_softc *sc)
1680 {
1681 	struct ixgbe_hw *hw = &sc->hw;
1682 	struct ifnet *ifp = &sc->arpcom.ac_if;
1683 	int i;
1684 
1685 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1686 
1687 	ix_disable_intr(sc);
1688 	callout_stop(&sc->timer);
1689 
1690 	ifp->if_flags &= ~IFF_RUNNING;
1691 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1692 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1693 
1694 		ifsq_clr_oactive(txr->tx_ifsq);
1695 		ifsq_watchdog_stop(&txr->tx_watchdog);
1696 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1697 
1698 		txr->tx_running = 0;
1699 		callout_stop(&txr->tx_gc_timer);
1700 	}
1701 
1702 	ixgbe_reset_hw(hw);
1703 	hw->adapter_stopped = FALSE;
1704 	ixgbe_stop_adapter(hw);
1705 	if (hw->mac.type == ixgbe_mac_82599EB)
1706 		ixgbe_stop_mac_link_on_d3_82599(hw);
1707 	/* Turn off the laser - noop with no optics */
1708 	ixgbe_disable_tx_laser(hw);
1709 
1710 	/* Update the stack */
1711 	sc->link_up = FALSE;
1712 	ix_update_link_status(sc);
1713 
1714 	/* Reprogram the RAR[0] in case user changed it. */
1715 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1716 
1717 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1718 		ix_free_tx_ring(&sc->tx_rings[i]);
1719 
1720 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1721 		ix_free_rx_ring(&sc->rx_rings[i]);
1722 }
1723 
1724 static void
1725 ix_setup_ifp(struct ix_softc *sc)
1726 {
1727 	struct ixgbe_hw *hw = &sc->hw;
1728 	struct ifnet *ifp = &sc->arpcom.ac_if;
1729 	int i;
1730 
1731 	ifp->if_baudrate = IF_Gbps(10UL);
1732 
1733 	ifp->if_softc = sc;
1734 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1735 	ifp->if_init = ix_init;
1736 	ifp->if_ioctl = ix_ioctl;
1737 	ifp->if_start = ix_start;
1738 	ifp->if_serialize = ix_serialize;
1739 	ifp->if_deserialize = ix_deserialize;
1740 	ifp->if_tryserialize = ix_tryserialize;
1741 #ifdef INVARIANTS
1742 	ifp->if_serialize_assert = ix_serialize_assert;
1743 #endif
1744 #ifdef IFPOLL_ENABLE
1745 	ifp->if_npoll = ix_npoll;
1746 #endif
1747 
1748 	/* Increase TSO burst length */
1749 	ifp->if_tsolen = (8 * ETHERMTU);
1750 
1751 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1752 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1753 
1754 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1755 	ifq_set_ready(&ifp->if_snd);
1756 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1757 
1758 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1759 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1760 
1761 	ether_ifattach(ifp, hw->mac.addr, NULL);
1762 
1763 	ifp->if_capabilities =
1764 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1765 	if (IX_ENABLE_HWRSS(sc))
1766 		ifp->if_capabilities |= IFCAP_RSS;
1767 	ifp->if_capenable = ifp->if_capabilities;
1768 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1769 
1770 	/*
1771 	 * Tell the upper layer(s) we support long frames.
1772 	 */
1773 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1774 
1775 	/* Setup TX rings and subqueues */
1776 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1777 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1778 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1779 
1780 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1781 		ifsq_set_priv(ifsq, txr);
1782 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1783 		txr->tx_ifsq = ifsq;
1784 
1785 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1786 	}
1787 
1788 	/* Specify the media types supported by this adapter */
1789 	ix_init_media(sc);
1790 }
1791 
1792 static boolean_t
1793 ix_is_sfp(const struct ixgbe_hw *hw)
1794 {
1795 	switch (hw->phy.type) {
1796 	case ixgbe_phy_sfp_avago:
1797 	case ixgbe_phy_sfp_ftl:
1798 	case ixgbe_phy_sfp_intel:
1799 	case ixgbe_phy_sfp_unknown:
1800 	case ixgbe_phy_sfp_passive_tyco:
1801 	case ixgbe_phy_sfp_passive_unknown:
1802 	case ixgbe_phy_qsfp_passive_unknown:
1803 	case ixgbe_phy_qsfp_active_unknown:
1804 	case ixgbe_phy_qsfp_intel:
1805 	case ixgbe_phy_qsfp_unknown:
1806 		return TRUE;
1807 	default:
1808 		return FALSE;
1809 	}
1810 }
1811 
1812 static void
1813 ix_config_link(struct ix_softc *sc)
1814 {
1815 	struct ixgbe_hw *hw = &sc->hw;
1816 	boolean_t sfp;
1817 
1818 	sfp = ix_is_sfp(hw);
1819 	if (sfp) {
1820 		if (hw->phy.multispeed_fiber) {
1821 			hw->mac.ops.setup_sfp(hw);
1822 			ixgbe_enable_tx_laser(hw);
1823 			ix_handle_msf(sc);
1824 		} else {
1825 			ix_handle_mod(sc);
1826 		}
1827 	} else {
1828 		uint32_t autoneg, err = 0;
1829 
1830 		if (hw->mac.ops.check_link != NULL) {
1831 			err = ixgbe_check_link(hw, &sc->link_speed,
1832 			    &sc->link_up, FALSE);
1833 			if (err)
1834 				return;
1835 		}
1836 
1837 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1838 			autoneg = sc->advspeed;
1839 		else
1840 			autoneg = hw->phy.autoneg_advertised;
1841 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1842 			bool negotiate;
1843 
1844 			err = hw->mac.ops.get_link_capabilities(hw,
1845 			    &autoneg, &negotiate);
1846 			if (err)
1847 				return;
1848 		}
1849 
1850 		if (hw->mac.ops.setup_link != NULL) {
1851 			err = hw->mac.ops.setup_link(hw,
1852 			    autoneg, sc->link_up);
1853 			if (err)
1854 				return;
1855 		}
1856 	}
1857 }
1858 
1859 static int
1860 ix_alloc_rings(struct ix_softc *sc)
1861 {
1862 	int error, i;
1863 
1864 	/*
1865 	 * Create top level busdma tag
1866 	 */
1867 	error = bus_dma_tag_create(NULL, 1, 0,
1868 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1869 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1870 	    &sc->parent_tag);
1871 	if (error) {
1872 		device_printf(sc->dev, "could not create top level DMA tag\n");
1873 		return error;
1874 	}
1875 
1876 	/*
1877 	 * Allocate TX descriptor rings and buffers
1878 	 */
1879 	sc->tx_rings = kmalloc_cachealign(
1880 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1881 	    M_DEVBUF, M_WAITOK | M_ZERO);
1882 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1883 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1884 
1885 		txr->tx_sc = sc;
1886 		txr->tx_idx = i;
1887 		txr->tx_intr_vec = -1;
1888 		txr->tx_intr_cpuid = -1;
1889 		lwkt_serialize_init(&txr->tx_serialize);
1890 		callout_init_mp(&txr->tx_gc_timer);
1891 
1892 		error = ix_create_tx_ring(txr);
1893 		if (error)
1894 			return error;
1895 	}
1896 
1897 	/*
1898 	 * Allocate RX descriptor rings and buffers
1899 	 */
1900 	sc->rx_rings = kmalloc_cachealign(
1901 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1902 	    M_DEVBUF, M_WAITOK | M_ZERO);
1903 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1904 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1905 
1906 		rxr->rx_sc = sc;
1907 		rxr->rx_idx = i;
1908 		rxr->rx_intr_vec = -1;
1909 		lwkt_serialize_init(&rxr->rx_serialize);
1910 
1911 		error = ix_create_rx_ring(rxr);
1912 		if (error)
1913 			return error;
1914 	}
1915 
1916 	return 0;
1917 }
1918 
1919 static int
1920 ix_create_tx_ring(struct ix_tx_ring *txr)
1921 {
1922 	int error, i, tsize, ntxd;
1923 
1924 	/*
1925 	 * Validate number of transmit descriptors.  It must not exceed
1926 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1927 	 */
1928 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1929 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1930 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1931 		device_printf(txr->tx_sc->dev,
1932 		    "Using %d TX descriptors instead of %d!\n",
1933 		    IX_DEF_TXD, ntxd);
1934 		txr->tx_ndesc = IX_DEF_TXD;
1935 	} else {
1936 		txr->tx_ndesc = ntxd;
1937 	}
1938 
1939 	/*
1940 	 * Allocate TX head write-back buffer
1941 	 */
1942 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1943 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1944 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1945 	if (txr->tx_hdr == NULL) {
1946 		device_printf(txr->tx_sc->dev,
1947 		    "Unable to allocate TX head write-back buffer\n");
1948 		return ENOMEM;
1949 	}
1950 
1951 	/*
1952 	 * Allocate TX descriptor ring
1953 	 */
1954 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1955 	    IX_DBA_ALIGN);
1956 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1957 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1958 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1959 	if (txr->tx_base == NULL) {
1960 		device_printf(txr->tx_sc->dev,
1961 		    "Unable to allocate TX Descriptor memory\n");
1962 		return ENOMEM;
1963 	}
1964 
1965 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1966 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1967 
1968 	/*
1969 	 * Create DMA tag for TX buffers
1970 	 */
1971 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1972 	    1, 0,		/* alignment, bounds */
1973 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1974 	    BUS_SPACE_MAXADDR,	/* highaddr */
1975 	    NULL, NULL,		/* filter, filterarg */
1976 	    IX_TSO_SIZE,	/* maxsize */
1977 	    IX_MAX_SCATTER,	/* nsegments */
1978 	    PAGE_SIZE,		/* maxsegsize */
1979 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1980 	    BUS_DMA_ONEBPAGE,	/* flags */
1981 	    &txr->tx_tag);
1982 	if (error) {
1983 		device_printf(txr->tx_sc->dev,
1984 		    "Unable to allocate TX DMA tag\n");
1985 		kfree(txr->tx_buf, M_DEVBUF);
1986 		txr->tx_buf = NULL;
1987 		return error;
1988 	}
1989 
1990 	/*
1991 	 * Create DMA maps for TX buffers
1992 	 */
1993 	for (i = 0; i < txr->tx_ndesc; ++i) {
1994 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1995 
1996 		error = bus_dmamap_create(txr->tx_tag,
1997 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1998 		if (error) {
1999 			device_printf(txr->tx_sc->dev,
2000 			    "Unable to create TX DMA map\n");
2001 			ix_destroy_tx_ring(txr, i);
2002 			return error;
2003 		}
2004 	}
2005 
2006 	/*
2007 	 * Initialize various watermark
2008 	 */
2009 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
2010 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
2011 
2012 	return 0;
2013 }
2014 
2015 static void
2016 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
2017 {
2018 	int i;
2019 
2020 	if (txr->tx_hdr != NULL) {
2021 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
2022 		bus_dmamem_free(txr->tx_hdr_dtag,
2023 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
2024 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
2025 		txr->tx_hdr = NULL;
2026 	}
2027 
2028 	if (txr->tx_base != NULL) {
2029 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
2030 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
2031 		    txr->tx_base_map);
2032 		bus_dma_tag_destroy(txr->tx_base_dtag);
2033 		txr->tx_base = NULL;
2034 	}
2035 
2036 	if (txr->tx_buf == NULL)
2037 		return;
2038 
2039 	for (i = 0; i < ndesc; ++i) {
2040 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2041 
2042 		KKASSERT(txbuf->m_head == NULL);
2043 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
2044 	}
2045 	bus_dma_tag_destroy(txr->tx_tag);
2046 
2047 	kfree(txr->tx_buf, M_DEVBUF);
2048 	txr->tx_buf = NULL;
2049 }
2050 
2051 static void
2052 ix_init_tx_ring(struct ix_tx_ring *txr)
2053 {
2054 	/* Clear the old ring contents */
2055 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
2056 
2057 	/* Clear TX head write-back buffer */
2058 	*(txr->tx_hdr) = 0;
2059 
2060 	/* Reset indices */
2061 	txr->tx_next_avail = 0;
2062 	txr->tx_next_clean = 0;
2063 	txr->tx_nsegs = 0;
2064 	txr->tx_nmbuf = 0;
2065 	txr->tx_running = 0;
2066 
2067 	/* Set number of descriptors available */
2068 	txr->tx_avail = txr->tx_ndesc;
2069 
2070 	/* Enable this TX ring */
2071 	txr->tx_flags |= IX_TXFLAG_ENABLED;
2072 }
2073 
2074 static void
2075 ix_init_tx_unit(struct ix_softc *sc)
2076 {
2077 	struct ixgbe_hw	*hw = &sc->hw;
2078 	int i;
2079 
2080 	/*
2081 	 * Setup the Base and Length of the Tx Descriptor Ring
2082 	 */
2083 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
2084 		struct ix_tx_ring *txr = &sc->tx_rings[i];
2085 		uint64_t tdba = txr->tx_base_paddr;
2086 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
2087 		uint32_t txctrl;
2088 
2089 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2090 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2091 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2092 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2093 
2094 		/* Setup the HW Tx Head and Tail descriptor pointers */
2095 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2096 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2097 
2098 		/* Disable TX head write-back relax ordering */
2099 		switch (hw->mac.type) {
2100 		case ixgbe_mac_82598EB:
2101 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2102 			break;
2103 		case ixgbe_mac_82599EB:
2104 		case ixgbe_mac_X540:
2105 		default:
2106 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2107 			break;
2108 		}
2109 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2110 		switch (hw->mac.type) {
2111 		case ixgbe_mac_82598EB:
2112 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2113 			break;
2114 		case ixgbe_mac_82599EB:
2115 		case ixgbe_mac_X540:
2116 		default:
2117 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2118 			break;
2119 		}
2120 
2121 		/* Enable TX head write-back */
2122 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2123 		    (uint32_t)(hdr_paddr >> 32));
2124 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2125 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2126 	}
2127 
2128 	if (hw->mac.type != ixgbe_mac_82598EB) {
2129 		uint32_t dmatxctl, rttdcs;
2130 
2131 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2132 		dmatxctl |= IXGBE_DMATXCTL_TE;
2133 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2134 
2135 		/* Disable arbiter to set MTQC */
2136 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2137 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2138 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2139 
2140 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2141 
2142 		/* Reenable aribter */
2143 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2144 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2145 	}
2146 }
2147 
2148 static int
2149 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2150     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2151 {
2152 	struct ixgbe_adv_tx_context_desc *TXD;
2153 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2154 	int ehdrlen, ip_hlen = 0, ctxd;
2155 	boolean_t offload = TRUE;
2156 
2157 	/* First check if TSO is to be used */
2158 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2159 		return ix_tso_ctx_setup(txr, mp,
2160 		    cmd_type_len, olinfo_status);
2161 	}
2162 
2163 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2164 		offload = FALSE;
2165 
2166 	/* Indicate the whole packet as payload when not doing TSO */
2167 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2168 
2169 	/*
2170 	 * In advanced descriptors the vlan tag must be placed into the
2171 	 * context descriptor.  Hence we need to make one even if not
2172 	 * doing checksum offloads.
2173 	 */
2174 	if (mp->m_flags & M_VLANTAG) {
2175 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2176 		    IXGBE_ADVTXD_VLAN_SHIFT;
2177 	} else if (!offload) {
2178 		/* No TX descriptor is consumed */
2179 		return 0;
2180 	}
2181 
2182 	/* Set the ether header length */
2183 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2184 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2185 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2186 
2187 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2188 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2189 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2190 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2191 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2192 	}
2193 	vlan_macip_lens |= ip_hlen;
2194 
2195 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2196 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2197 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2198 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2199 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2200 
2201 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2202 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2203 
2204 	/* Now ready a context descriptor */
2205 	ctxd = txr->tx_next_avail;
2206 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2207 
2208 	/* Now copy bits into descriptor */
2209 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2210 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2211 	TXD->seqnum_seed = htole32(0);
2212 	TXD->mss_l4len_idx = htole32(0);
2213 
2214 	/* We've consumed the first desc, adjust counters */
2215 	if (++ctxd == txr->tx_ndesc)
2216 		ctxd = 0;
2217 	txr->tx_next_avail = ctxd;
2218 	--txr->tx_avail;
2219 
2220 	/* One TX descriptor is consumed */
2221 	return 1;
2222 }
2223 
2224 static int
2225 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2226     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2227 {
2228 	struct ixgbe_adv_tx_context_desc *TXD;
2229 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2230 	uint32_t mss_l4len_idx = 0, paylen;
2231 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2232 
2233 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2234 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2235 
2236 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2237 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2238 
2239 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2240 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2241 
2242 	ctxd = txr->tx_next_avail;
2243 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2244 
2245 	if (mp->m_flags & M_VLANTAG) {
2246 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2247 		    IXGBE_ADVTXD_VLAN_SHIFT;
2248 	}
2249 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2250 	vlan_macip_lens |= ip_hlen;
2251 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2252 
2253 	/* ADV DTYPE TUCMD */
2254 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2255 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2256 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2257 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2258 
2259 	/* MSS L4LEN IDX */
2260 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2261 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2262 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2263 
2264 	TXD->seqnum_seed = htole32(0);
2265 
2266 	if (++ctxd == txr->tx_ndesc)
2267 		ctxd = 0;
2268 
2269 	txr->tx_avail--;
2270 	txr->tx_next_avail = ctxd;
2271 
2272 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2273 
2274 	/* This is used in the transmit desc in encap */
2275 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2276 
2277 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2278 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2279 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2280 
2281 	/* One TX descriptor is consumed */
2282 	return 1;
2283 }
2284 
2285 static void
2286 ix_txeof(struct ix_tx_ring *txr, int hdr)
2287 {
2288 	int first, avail;
2289 
2290 	if (txr->tx_avail == txr->tx_ndesc)
2291 		return;
2292 
2293 	first = txr->tx_next_clean;
2294 	if (first == hdr)
2295 		return;
2296 
2297 	avail = txr->tx_avail;
2298 	while (first != hdr) {
2299 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2300 
2301 		KKASSERT(avail < txr->tx_ndesc);
2302 		++avail;
2303 
2304 		if (txbuf->m_head != NULL)
2305 			ix_free_txbuf(txr, txbuf);
2306 		if (++first == txr->tx_ndesc)
2307 			first = 0;
2308 	}
2309 	txr->tx_next_clean = first;
2310 	txr->tx_avail = avail;
2311 
2312 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2313 		ifsq_clr_oactive(txr->tx_ifsq);
2314 		txr->tx_watchdog.wd_timer = 0;
2315 	}
2316 	txr->tx_running = IX_TX_RUNNING;
2317 }
2318 
2319 static void
2320 ix_txgc(struct ix_tx_ring *txr)
2321 {
2322 	int first, hdr;
2323 #ifdef INVARIANTS
2324 	int avail;
2325 #endif
2326 
2327 	if (txr->tx_avail == txr->tx_ndesc)
2328 		return;
2329 
2330 	hdr = IXGBE_READ_REG(&txr->tx_sc->hw, IXGBE_TDH(txr->tx_idx));
2331 	first = txr->tx_next_clean;
2332 	if (first == hdr)
2333 		goto done;
2334 	txr->tx_gc++;
2335 
2336 #ifdef INVARIANTS
2337 	avail = txr->tx_avail;
2338 #endif
2339 	while (first != hdr) {
2340 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2341 
2342 #ifdef INVARIANTS
2343 		KKASSERT(avail < txr->tx_ndesc);
2344 		++avail;
2345 #endif
2346 		if (txbuf->m_head != NULL)
2347 			ix_free_txbuf(txr, txbuf);
2348 		if (++first == txr->tx_ndesc)
2349 			first = 0;
2350 	}
2351 done:
2352 	if (txr->tx_nmbuf)
2353 		txr->tx_running = IX_TX_RUNNING;
2354 }
2355 
2356 static int
2357 ix_create_rx_ring(struct ix_rx_ring *rxr)
2358 {
2359 	int i, rsize, error, nrxd;
2360 
2361 	/*
2362 	 * Validate number of receive descriptors.  It must not exceed
2363 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2364 	 */
2365 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2366 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2367 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2368 		device_printf(rxr->rx_sc->dev,
2369 		    "Using %d RX descriptors instead of %d!\n",
2370 		    IX_DEF_RXD, nrxd);
2371 		rxr->rx_ndesc = IX_DEF_RXD;
2372 	} else {
2373 		rxr->rx_ndesc = nrxd;
2374 	}
2375 
2376 	/*
2377 	 * Allocate RX descriptor ring
2378 	 */
2379 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2380 	    IX_DBA_ALIGN);
2381 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2382 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2383 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2384 	if (rxr->rx_base == NULL) {
2385 		device_printf(rxr->rx_sc->dev,
2386 		    "Unable to allocate TX Descriptor memory\n");
2387 		return ENOMEM;
2388 	}
2389 
2390 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2391 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2392 
2393 	/*
2394 	 * Create DMA tag for RX buffers
2395 	 */
2396 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2397 	    1, 0,		/* alignment, bounds */
2398 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2399 	    BUS_SPACE_MAXADDR,	/* highaddr */
2400 	    NULL, NULL,		/* filter, filterarg */
2401 	    PAGE_SIZE,		/* maxsize */
2402 	    1,			/* nsegments */
2403 	    PAGE_SIZE,		/* maxsegsize */
2404 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2405 	    &rxr->rx_tag);
2406 	if (error) {
2407 		device_printf(rxr->rx_sc->dev,
2408 		    "Unable to create RX DMA tag\n");
2409 		kfree(rxr->rx_buf, M_DEVBUF);
2410 		rxr->rx_buf = NULL;
2411 		return error;
2412 	}
2413 
2414 	/*
2415 	 * Create spare DMA map for RX buffers
2416 	 */
2417 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2418 	    &rxr->rx_sparemap);
2419 	if (error) {
2420 		device_printf(rxr->rx_sc->dev,
2421 		    "Unable to create spare RX DMA map\n");
2422 		bus_dma_tag_destroy(rxr->rx_tag);
2423 		kfree(rxr->rx_buf, M_DEVBUF);
2424 		rxr->rx_buf = NULL;
2425 		return error;
2426 	}
2427 
2428 	/*
2429 	 * Create DMA maps for RX buffers
2430 	 */
2431 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2432 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2433 
2434 		error = bus_dmamap_create(rxr->rx_tag,
2435 		    BUS_DMA_WAITOK, &rxbuf->map);
2436 		if (error) {
2437 			device_printf(rxr->rx_sc->dev,
2438 			    "Unable to create RX dma map\n");
2439 			ix_destroy_rx_ring(rxr, i);
2440 			return error;
2441 		}
2442 	}
2443 
2444 	/*
2445 	 * Initialize various watermark
2446 	 */
2447 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2448 
2449 	return 0;
2450 }
2451 
2452 static void
2453 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2454 {
2455 	int i;
2456 
2457 	if (rxr->rx_base != NULL) {
2458 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2459 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2460 		    rxr->rx_base_map);
2461 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2462 		rxr->rx_base = NULL;
2463 	}
2464 
2465 	if (rxr->rx_buf == NULL)
2466 		return;
2467 
2468 	for (i = 0; i < ndesc; ++i) {
2469 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2470 
2471 		KKASSERT(rxbuf->m_head == NULL);
2472 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2473 	}
2474 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2475 	bus_dma_tag_destroy(rxr->rx_tag);
2476 
2477 	kfree(rxr->rx_buf, M_DEVBUF);
2478 	rxr->rx_buf = NULL;
2479 }
2480 
2481 /*
2482 ** Used to detect a descriptor that has
2483 ** been merged by Hardware RSC.
2484 */
2485 static __inline uint32_t
2486 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2487 {
2488 	return (le32toh(rx->wb.lower.lo_dword.data) &
2489 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2490 }
2491 
2492 #if 0
2493 /*********************************************************************
2494  *
2495  *  Initialize Hardware RSC (LRO) feature on 82599
2496  *  for an RX ring, this is toggled by the LRO capability
2497  *  even though it is transparent to the stack.
2498  *
2499  *  NOTE: since this HW feature only works with IPV4 and
2500  *        our testing has shown soft LRO to be as effective
2501  *        I have decided to disable this by default.
2502  *
2503  **********************************************************************/
2504 static void
2505 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2506 {
2507 	struct	ix_softc 	*sc = rxr->rx_sc;
2508 	struct	ixgbe_hw	*hw = &sc->hw;
2509 	uint32_t			rscctrl, rdrxctl;
2510 
2511 #if 0
2512 	/* If turning LRO/RSC off we need to disable it */
2513 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2514 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2515 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2516 		return;
2517 	}
2518 #endif
2519 
2520 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2521 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2522 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2523 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2524 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2525 
2526 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2527 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2528 	/*
2529 	** Limit the total number of descriptors that
2530 	** can be combined, so it does not exceed 64K
2531 	*/
2532 	if (rxr->mbuf_sz == MCLBYTES)
2533 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2534 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2535 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2536 	else if (rxr->mbuf_sz == MJUM9BYTES)
2537 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2538 	else  /* Using 16K cluster */
2539 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2540 
2541 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2542 
2543 	/* Enable TCP header recognition */
2544 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2545 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2546 	    IXGBE_PSRTYPE_TCPHDR));
2547 
2548 	/* Disable RSC for ACK packets */
2549 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2550 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2551 
2552 	rxr->hw_rsc = TRUE;
2553 }
2554 #endif
2555 
2556 static int
2557 ix_init_rx_ring(struct ix_rx_ring *rxr)
2558 {
2559 	int i;
2560 
2561 	/* Clear the ring contents */
2562 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2563 
2564 	/* XXX we need JUMPAGESIZE for RSC too */
2565 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2566 		rxr->rx_mbuf_sz = MCLBYTES;
2567 	else
2568 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2569 
2570 	/* Now replenish the mbufs */
2571 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2572 		int error;
2573 
2574 		error = ix_newbuf(rxr, i, TRUE);
2575 		if (error)
2576 			return error;
2577 	}
2578 
2579 	/* Setup our descriptor indices */
2580 	rxr->rx_next_check = 0;
2581 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2582 
2583 #if 0
2584 	/*
2585 	** Now set up the LRO interface:
2586 	*/
2587 	if (ixgbe_rsc_enable)
2588 		ix_setup_hw_rsc(rxr);
2589 #endif
2590 
2591 	return 0;
2592 }
2593 
2594 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2595 
2596 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2597 
2598 static void
2599 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2600 {
2601 	struct ixgbe_hw	*hw = &sc->hw;
2602 	struct ifnet *ifp = &sc->arpcom.ac_if;
2603 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2604 	int i;
2605 
2606 	/*
2607 	 * Make sure receives are disabled while setting up the descriptor ring
2608 	 */
2609 	ixgbe_disable_rx(hw);
2610 
2611 	/* Enable broadcasts */
2612 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2613 	fctrl |= IXGBE_FCTRL_BAM;
2614 	if (hw->mac.type == ixgbe_mac_82598EB) {
2615 		fctrl |= IXGBE_FCTRL_DPF;
2616 		fctrl |= IXGBE_FCTRL_PMCF;
2617 	}
2618 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2619 
2620 	/* Set for Jumbo Frames? */
2621 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2622 	if (ifp->if_mtu > ETHERMTU)
2623 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2624 	else
2625 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2626 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2627 
2628 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2629 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2630 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2631 
2632 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2633 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2634 		uint64_t rdba = rxr->rx_base_paddr;
2635 		uint32_t srrctl;
2636 
2637 		/* Setup the Base and Length of the Rx Descriptor Ring */
2638 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2639 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2640 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2641 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2642 
2643 		/*
2644 		 * Set up the SRRCTL register
2645 		 */
2646 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2647 
2648 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2649 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2650 		srrctl |= bufsz;
2651 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2652 		if (sc->rx_ring_inuse > 1) {
2653 			/* See the commend near ix_enable_rx_drop() */
2654 			if (sc->ifm_media &
2655 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2656 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2657 				if (i == 0 && bootverbose) {
2658 					if_printf(ifp, "flow control %s, "
2659 					    "disable RX drop\n",
2660 					    ix_ifmedia2str(sc->ifm_media));
2661 				}
2662 			} else {
2663 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2664 				if (i == 0 && bootverbose) {
2665 					if_printf(ifp, "flow control %s, "
2666 					    "enable RX drop\n",
2667 					    ix_ifmedia2str(sc->ifm_media));
2668 				}
2669 			}
2670 		}
2671 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2672 
2673 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2674 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2675 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2676 	}
2677 
2678 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2679 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2680 
2681 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2682 
2683 	/*
2684 	 * Setup RSS
2685 	 */
2686 	if (sc->rx_ring_inuse > 1) {
2687 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2688 		const struct if_ringmap *rm;
2689 		int j, r, nreta, table_nent;
2690 
2691 		/*
2692 		 * NOTE:
2693 		 * When we reach here, RSS has already been disabled
2694 		 * in ix_stop(), so we could safely configure RSS key
2695 		 * and redirect table.
2696 		 */
2697 
2698 		/*
2699 		 * Configure RSS key
2700 		 */
2701 		toeplitz_get_key(key, sizeof(key));
2702 		for (i = 0; i < IX_NRSSRK; ++i) {
2703 			uint32_t rssrk;
2704 
2705 			rssrk = IX_RSSRK_VAL(key, i);
2706 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2707 			    i, rssrk);
2708 
2709 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2710 		}
2711 
2712 		/*
2713 		 * Configure RSS redirect table.
2714 		 */
2715 
2716 		/* Table size will differ based on MAC */
2717 		switch (hw->mac.type) {
2718 		case ixgbe_mac_X550:
2719 		case ixgbe_mac_X550EM_x:
2720 		case ixgbe_mac_X550EM_a:
2721 			nreta = IX_NRETA_X550;
2722 			break;
2723 		default:
2724 			nreta = IX_NRETA;
2725 			break;
2726 		}
2727 
2728 		table_nent = nreta * IX_RETA_SIZE;
2729 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2730 		    ("invalid RETA count %d", nreta));
2731 		if (polling)
2732 			rm = sc->rx_rmap;
2733 		else
2734 			rm = sc->rx_rmap_intr;
2735 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2736 
2737 		r = 0;
2738 		for (j = 0; j < nreta; ++j) {
2739 			uint32_t reta = 0;
2740 
2741 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2742 				uint32_t q;
2743 
2744 				q = sc->rdr_table[r];
2745 				KASSERT(q < sc->rx_ring_inuse,
2746 				    ("invalid RX ring index %d", q));
2747 				reta |= q << (8 * i);
2748 				++r;
2749 			}
2750 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2751 			if (j < IX_NRETA) {
2752 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2753 			} else {
2754 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2755 				    reta);
2756 			}
2757 		}
2758 
2759 		/*
2760 		 * Enable multiple receive queues.
2761 		 * Enable IPv4 RSS standard hash functions.
2762 		 */
2763 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2764 		    IXGBE_MRQC_RSSEN |
2765 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2766 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2767 
2768 		/*
2769 		 * NOTE:
2770 		 * PCSD must be enabled to enable multiple
2771 		 * receive queues.
2772 		 */
2773 		rxcsum |= IXGBE_RXCSUM_PCSD;
2774 	}
2775 
2776 	if (ifp->if_capenable & IFCAP_RXCSUM)
2777 		rxcsum |= IXGBE_RXCSUM_PCSD;
2778 
2779 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2780 }
2781 
2782 static __inline void
2783 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2784 {
2785 	if (--i < 0)
2786 		i = rxr->rx_ndesc - 1;
2787 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2788 }
2789 
2790 static __inline void
2791 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2792 {
2793 	if ((ptype &
2794 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2795 		/* Not IPv4 */
2796 		return;
2797 	}
2798 
2799 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2800 	    IXGBE_RXD_STAT_IPCS)
2801 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2802 
2803 	if ((ptype &
2804 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2805 		/*
2806 		 * - Neither TCP nor UDP
2807 		 * - IPv4 fragment
2808 		 */
2809 		return;
2810 	}
2811 
2812 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2813 	    IXGBE_RXD_STAT_L4CS) {
2814 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2815 		    CSUM_FRAG_NOT_CHECKED;
2816 		mp->m_pkthdr.csum_data = htons(0xffff);
2817 	}
2818 }
2819 
2820 static __inline struct pktinfo *
2821 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2822     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2823 {
2824 	switch (hashtype) {
2825 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2826 		pi->pi_netisr = NETISR_IP;
2827 		pi->pi_flags = 0;
2828 		pi->pi_l3proto = IPPROTO_TCP;
2829 		break;
2830 
2831 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2832 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2833 			/* Not UDP or is fragment */
2834 			return NULL;
2835 		}
2836 		pi->pi_netisr = NETISR_IP;
2837 		pi->pi_flags = 0;
2838 		pi->pi_l3proto = IPPROTO_UDP;
2839 		break;
2840 
2841 	default:
2842 		return NULL;
2843 	}
2844 
2845 	m_sethash(m, toeplitz_hash(hash));
2846 	return pi;
2847 }
2848 
2849 static __inline void
2850 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2851 {
2852 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2853 	rxd->wb.upper.status_error = 0;
2854 }
2855 
2856 static void
2857 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2858 {
2859 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2860 
2861 	/*
2862 	 * XXX discard may not be correct
2863 	 */
2864 	if (eop) {
2865 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2866 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2867 	} else {
2868 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2869 	}
2870 	if (rxbuf->fmp != NULL) {
2871 		m_freem(rxbuf->fmp);
2872 		rxbuf->fmp = NULL;
2873 		rxbuf->lmp = NULL;
2874 	}
2875 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2876 }
2877 
2878 static void
2879 ix_rxeof(struct ix_rx_ring *rxr, int count)
2880 {
2881 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2882 	int i, nsegs = 0, cpuid = mycpuid;
2883 
2884 	i = rxr->rx_next_check;
2885 	while (count != 0) {
2886 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2887 		union ixgbe_adv_rx_desc	*cur;
2888 		struct mbuf *sendmp = NULL, *mp;
2889 		struct pktinfo *pi = NULL, pi0;
2890 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2891 		uint16_t len;
2892 		boolean_t eop;
2893 
2894 		cur = &rxr->rx_base[i];
2895 		staterr = le32toh(cur->wb.upper.status_error);
2896 
2897 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2898 			break;
2899 		++nsegs;
2900 
2901 		rxbuf = &rxr->rx_buf[i];
2902 		mp = rxbuf->m_head;
2903 
2904 		len = le16toh(cur->wb.upper.length);
2905 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2906 		    IXGBE_RXDADV_PKTTYPE_MASK;
2907 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2908 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2909 		    IXGBE_RXDADV_RSSTYPE_MASK;
2910 
2911 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2912 		if (eop)
2913 			--count;
2914 
2915 		/*
2916 		 * Make sure bad packets are discarded
2917 		 */
2918 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2919 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2920 			ix_rx_discard(rxr, i, eop);
2921 			goto next_desc;
2922 		}
2923 
2924 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2925 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2926 			ix_rx_discard(rxr, i, eop);
2927 			goto next_desc;
2928 		}
2929 
2930 		/*
2931 		 * On 82599 which supports a hardware LRO, packets
2932 		 * need not be fragmented across sequential descriptors,
2933 		 * rather the next descriptor is indicated in bits
2934 		 * of the descriptor.  This also means that we might
2935 		 * proceses more than one packet at a time, something
2936 		 * that has never been true before, it required
2937 		 * eliminating global chain pointers in favor of what
2938 		 * we are doing here.
2939 		 */
2940 		if (!eop) {
2941 			int nextp;
2942 
2943 			/*
2944 			 * Figure out the next descriptor
2945 			 * of this frame.
2946 			 */
2947 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2948 				rsc = ix_rsc_count(cur);
2949 			if (rsc) { /* Get hardware index */
2950 				nextp = ((staterr &
2951 				    IXGBE_RXDADV_NEXTP_MASK) >>
2952 				    IXGBE_RXDADV_NEXTP_SHIFT);
2953 			} else { /* Just sequential */
2954 				nextp = i + 1;
2955 				if (nextp == rxr->rx_ndesc)
2956 					nextp = 0;
2957 			}
2958 			nbuf = &rxr->rx_buf[nextp];
2959 			prefetch(nbuf);
2960 		}
2961 		mp->m_len = len;
2962 
2963 		/*
2964 		 * Rather than using the fmp/lmp global pointers
2965 		 * we now keep the head of a packet chain in the
2966 		 * buffer struct and pass this along from one
2967 		 * descriptor to the next, until we get EOP.
2968 		 */
2969 		if (rxbuf->fmp == NULL) {
2970 			mp->m_pkthdr.len = len;
2971 			rxbuf->fmp = mp;
2972 			rxbuf->lmp = mp;
2973 		} else {
2974 			rxbuf->fmp->m_pkthdr.len += len;
2975 			rxbuf->lmp->m_next = mp;
2976 			rxbuf->lmp = mp;
2977 		}
2978 
2979 		if (nbuf != NULL) {
2980 			/*
2981 			 * Not the last fragment of this frame,
2982 			 * pass this fragment list on
2983 			 */
2984 			nbuf->fmp = rxbuf->fmp;
2985 			nbuf->lmp = rxbuf->lmp;
2986 		} else {
2987 			/*
2988 			 * Send this frame
2989 			 */
2990 			sendmp = rxbuf->fmp;
2991 
2992 			sendmp->m_pkthdr.rcvif = ifp;
2993 			IFNET_STAT_INC(ifp, ipackets, 1);
2994 #ifdef IX_RSS_DEBUG
2995 			rxr->rx_pkts++;
2996 #endif
2997 
2998 			/* Process vlan info */
2999 			if (staterr & IXGBE_RXD_STAT_VP) {
3000 				sendmp->m_pkthdr.ether_vlantag =
3001 				    le16toh(cur->wb.upper.vlan);
3002 				sendmp->m_flags |= M_VLANTAG;
3003 			}
3004 			if (ifp->if_capenable & IFCAP_RXCSUM)
3005 				ix_rxcsum(staterr, sendmp, ptype);
3006 			if (ifp->if_capenable & IFCAP_RSS) {
3007 				pi = ix_rssinfo(sendmp, &pi0,
3008 				    hash, hashtype, ptype);
3009 			}
3010 		}
3011 		rxbuf->fmp = NULL;
3012 		rxbuf->lmp = NULL;
3013 next_desc:
3014 		/* Advance our pointers to the next descriptor. */
3015 		if (++i == rxr->rx_ndesc)
3016 			i = 0;
3017 
3018 		if (sendmp != NULL)
3019 			ifp->if_input(ifp, sendmp, pi, cpuid);
3020 
3021 		if (nsegs >= rxr->rx_wreg_nsegs) {
3022 			ix_rx_refresh(rxr, i);
3023 			nsegs = 0;
3024 		}
3025 	}
3026 	rxr->rx_next_check = i;
3027 
3028 	if (nsegs > 0)
3029 		ix_rx_refresh(rxr, i);
3030 }
3031 
3032 static void
3033 ix_set_vlan(struct ix_softc *sc)
3034 {
3035 	struct ixgbe_hw *hw = &sc->hw;
3036 	uint32_t ctrl;
3037 
3038 	if (hw->mac.type == ixgbe_mac_82598EB) {
3039 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3040 		ctrl |= IXGBE_VLNCTRL_VME;
3041 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
3042 	} else {
3043 		int i;
3044 
3045 		/*
3046 		 * On 82599 and later chips the VLAN enable is
3047 		 * per queue in RXDCTL
3048 		 */
3049 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3050 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3051 			ctrl |= IXGBE_RXDCTL_VME;
3052 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
3053 		}
3054 	}
3055 }
3056 
3057 static void
3058 ix_enable_intr(struct ix_softc *sc)
3059 {
3060 	struct ixgbe_hw	*hw = &sc->hw;
3061 	uint32_t fwsm;
3062 	int i;
3063 
3064 	for (i = 0; i < sc->intr_cnt; ++i)
3065 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
3066 
3067 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
3068 
3069 	/* Enable Fan Failure detection */
3070 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
3071 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3072 
3073 	switch (hw->mac.type) {
3074 	case ixgbe_mac_82599EB:
3075 		sc->intr_mask |= IXGBE_EIMS_ECC;
3076 		/* Temperature sensor on some adapters */
3077 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
3078 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
3079 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3080 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
3081 		break;
3082 
3083 	case ixgbe_mac_X540:
3084 		sc->intr_mask |= IXGBE_EIMS_ECC;
3085 		/* Detect if Thermal Sensor is enabled */
3086 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
3087 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
3088 			sc->intr_mask |= IXGBE_EIMS_TS;
3089 		break;
3090 
3091 	case ixgbe_mac_X550:
3092 	case ixgbe_mac_X550EM_a:
3093 	case ixgbe_mac_X550EM_x:
3094 		sc->intr_mask |= IXGBE_EIMS_ECC;
3095 		/* MAC thermal sensor is automatically enabled */
3096 		sc->intr_mask |= IXGBE_EIMS_TS;
3097 		/* Some devices use SDP0 for important information */
3098 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3099 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
3100 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
3101 		/* FALL THROUGH */
3102 	default:
3103 		break;
3104 	}
3105 
3106 	/* With MSI-X we use auto clear for RX and TX rings */
3107 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3108 		/*
3109 		 * There are no EIAC1/EIAC2 for newer chips; the related
3110 		 * bits for TX and RX rings > 16 are always auto clear.
3111 		 *
3112 		 * XXX which bits?  There are _no_ documented EICR1 and
3113 		 * EICR2 at all; only EICR.
3114 		 */
3115 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
3116 	} else {
3117 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
3118 
3119 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3120 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3121 			sc->intr_mask |= IX_RX1_INTR_MASK;
3122 	}
3123 
3124 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3125 
3126 	/*
3127 	 * Enable RX and TX rings for MSI-X
3128 	 */
3129 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3130 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3131 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3132 
3133 			if (txr->tx_intr_vec >= 0) {
3134 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3135 				    txr->tx_eims_val);
3136 			}
3137 		}
3138 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3139 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3140 
3141 			KKASSERT(rxr->rx_intr_vec >= 0);
3142 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3143 		}
3144 	}
3145 
3146 	IXGBE_WRITE_FLUSH(hw);
3147 }
3148 
3149 static void
3150 ix_disable_intr(struct ix_softc *sc)
3151 {
3152 	int i;
3153 
3154 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3155 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3156 
3157 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3158 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3159 	} else {
3160 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3161 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3162 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3163 	}
3164 	IXGBE_WRITE_FLUSH(&sc->hw);
3165 
3166 	for (i = 0; i < sc->intr_cnt; ++i)
3167 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3168 }
3169 
3170 uint16_t
3171 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3172 {
3173 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3174 	    reg, 2);
3175 }
3176 
3177 void
3178 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3179 {
3180 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3181 	    reg, value, 2);
3182 }
3183 
3184 static void
3185 ix_slot_info(struct ix_softc *sc)
3186 {
3187 	struct ixgbe_hw *hw = &sc->hw;
3188 	device_t dev = sc->dev;
3189 	struct ixgbe_mac_info *mac = &hw->mac;
3190 	uint16_t link;
3191 	uint32_t offset;
3192 
3193 	/* For most devices simply call the shared code routine */
3194 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3195 		ixgbe_get_bus_info(hw);
3196 		/* These devices don't use PCI-E */
3197 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3198 		    hw->mac.type == ixgbe_mac_X550EM_a)
3199 			return;
3200 		goto display;
3201 	}
3202 
3203 	/*
3204 	 * For the Quad port adapter we need to parse back
3205 	 * up the PCI tree to find the speed of the expansion
3206 	 * slot into which this adapter is plugged. A bit more work.
3207 	 */
3208 	dev = device_get_parent(device_get_parent(dev));
3209 #ifdef IXGBE_DEBUG
3210 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3211 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3212 #endif
3213 	dev = device_get_parent(device_get_parent(dev));
3214 #ifdef IXGBE_DEBUG
3215 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3216 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3217 #endif
3218 	/* Now get the PCI Express Capabilities offset */
3219 	offset = pci_get_pciecap_ptr(dev);
3220 	/* ...and read the Link Status Register */
3221 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3222 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3223 	case IXGBE_PCI_LINK_WIDTH_1:
3224 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3225 		break;
3226 	case IXGBE_PCI_LINK_WIDTH_2:
3227 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3228 		break;
3229 	case IXGBE_PCI_LINK_WIDTH_4:
3230 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3231 		break;
3232 	case IXGBE_PCI_LINK_WIDTH_8:
3233 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3234 		break;
3235 	default:
3236 		hw->bus.width = ixgbe_bus_width_unknown;
3237 		break;
3238 	}
3239 
3240 	switch (link & IXGBE_PCI_LINK_SPEED) {
3241 	case IXGBE_PCI_LINK_SPEED_2500:
3242 		hw->bus.speed = ixgbe_bus_speed_2500;
3243 		break;
3244 	case IXGBE_PCI_LINK_SPEED_5000:
3245 		hw->bus.speed = ixgbe_bus_speed_5000;
3246 		break;
3247 	case IXGBE_PCI_LINK_SPEED_8000:
3248 		hw->bus.speed = ixgbe_bus_speed_8000;
3249 		break;
3250 	default:
3251 		hw->bus.speed = ixgbe_bus_speed_unknown;
3252 		break;
3253 	}
3254 
3255 	mac->ops.set_lan_id(hw);
3256 
3257 display:
3258 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3259 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3260 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3261 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3262 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3263 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3264 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3265 
3266 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3267 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3268 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3269 		device_printf(dev, "For optimal performance a x8 "
3270 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3271 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3272 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3273 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3274 		device_printf(dev, "For optimal performance a x8 "
3275 		    "PCIE Gen3 slot is required.\n");
3276 	}
3277 }
3278 
3279 /*
3280  * TODO comment is incorrect
3281  *
3282  * Setup the correct IVAR register for a particular MSIX interrupt
3283  * - entry is the register array entry
3284  * - vector is the MSIX vector for this queue
3285  * - type is RX/TX/MISC
3286  */
3287 static void
3288 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3289     int8_t type)
3290 {
3291 	struct ixgbe_hw *hw = &sc->hw;
3292 	uint32_t ivar, index;
3293 
3294 	vector |= IXGBE_IVAR_ALLOC_VAL;
3295 
3296 	switch (hw->mac.type) {
3297 	case ixgbe_mac_82598EB:
3298 		if (type == -1)
3299 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3300 		else
3301 			entry += (type * 64);
3302 		index = (entry >> 2) & 0x1F;
3303 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3304 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3305 		ivar |= (vector << (8 * (entry & 0x3)));
3306 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3307 		break;
3308 
3309 	case ixgbe_mac_82599EB:
3310 	case ixgbe_mac_X540:
3311 	case ixgbe_mac_X550:
3312 	case ixgbe_mac_X550EM_a:
3313 	case ixgbe_mac_X550EM_x:
3314 		if (type == -1) { /* MISC IVAR */
3315 			index = (entry & 1) * 8;
3316 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3317 			ivar &= ~(0xFF << index);
3318 			ivar |= (vector << index);
3319 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3320 		} else {	/* RX/TX IVARS */
3321 			index = (16 * (entry & 1)) + (8 * type);
3322 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3323 			ivar &= ~(0xFF << index);
3324 			ivar |= (vector << index);
3325 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3326 		}
3327 		/* FALL THROUGH */
3328 	default:
3329 		break;
3330 	}
3331 }
3332 
3333 static boolean_t
3334 ix_sfp_probe(struct ix_softc *sc)
3335 {
3336 	struct ixgbe_hw	*hw = &sc->hw;
3337 
3338 	if (hw->phy.type == ixgbe_phy_nl &&
3339 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3340 		int32_t ret;
3341 
3342 		ret = hw->phy.ops.identify_sfp(hw);
3343 		if (ret)
3344 			return FALSE;
3345 
3346 		ret = hw->phy.ops.reset(hw);
3347 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3348 			if_printf(&sc->arpcom.ac_if,
3349 			     "Unsupported SFP+ module detected!  "
3350 			     "Reload driver with supported module.\n");
3351 			sc->sfp_probe = FALSE;
3352 			return FALSE;
3353 		}
3354 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3355 
3356 		/* We now have supported optics */
3357 		sc->sfp_probe = FALSE;
3358 
3359 		return TRUE;
3360 	}
3361 	return FALSE;
3362 }
3363 
3364 static void
3365 ix_handle_link(struct ix_softc *sc)
3366 {
3367 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3368 	ix_update_link_status(sc);
3369 }
3370 
3371 /*
3372  * Handling SFP module
3373  */
3374 static void
3375 ix_handle_mod(struct ix_softc *sc)
3376 {
3377 	struct ixgbe_hw *hw = &sc->hw;
3378 	uint32_t err;
3379 
3380 	err = hw->phy.ops.identify_sfp(hw);
3381 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3382 		if_printf(&sc->arpcom.ac_if,
3383 		    "Unsupported SFP+ module type was detected.\n");
3384 		return;
3385 	}
3386 	err = hw->mac.ops.setup_sfp(hw);
3387 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3388 		if_printf(&sc->arpcom.ac_if,
3389 		    "Setup failure - unsupported SFP+ module type.\n");
3390 		return;
3391 	}
3392 	ix_handle_msf(sc);
3393 }
3394 
3395 /*
3396  * Handling MSF (multispeed fiber)
3397  */
3398 static void
3399 ix_handle_msf(struct ix_softc *sc)
3400 {
3401 	struct ixgbe_hw *hw = &sc->hw;
3402 	uint32_t autoneg;
3403 
3404 	hw->phy.ops.identify_sfp(hw);
3405 	ix_init_media(sc);
3406 
3407 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3408 		autoneg = sc->advspeed;
3409 	else
3410 		autoneg = hw->phy.autoneg_advertised;
3411 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3412 		bool negotiate;
3413 
3414 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3415 	}
3416 	if (hw->mac.ops.setup_link != NULL)
3417 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3418 }
3419 
3420 static void
3421 ix_handle_phy(struct ix_softc *sc)
3422 {
3423 	struct ixgbe_hw *hw = &sc->hw;
3424 	int error;
3425 
3426 	error = hw->phy.ops.handle_lasi(hw);
3427 	if (error == IXGBE_ERR_OVERTEMP) {
3428 		if_printf(&sc->arpcom.ac_if,
3429 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3430 		    "PHY will downshift to lower power state!\n");
3431 	} else if (error) {
3432 		if_printf(&sc->arpcom.ac_if,
3433 		    "Error handling LASI interrupt: %d\n", error);
3434 	}
3435 }
3436 
3437 static void
3438 ix_update_stats(struct ix_softc *sc)
3439 {
3440 	struct ifnet *ifp = &sc->arpcom.ac_if;
3441 	struct ixgbe_hw *hw = &sc->hw;
3442 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3443 	uint64_t total_missed_rx = 0;
3444 	int i;
3445 
3446 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3447 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3448 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3449 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3450 
3451 	for (i = 0; i < 16; i++) {
3452 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3453 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3454 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3455 	}
3456 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3457 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3458 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3459 
3460 	/* Hardware workaround, gprc counts missed packets */
3461 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3462 	sc->stats.gprc -= missed_rx;
3463 
3464 	if (hw->mac.type != ixgbe_mac_82598EB) {
3465 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3466 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3467 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3468 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3469 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3470 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3471 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3472 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3473 	} else {
3474 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3475 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3476 		/* 82598 only has a counter in the high register */
3477 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3478 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3479 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3480 	}
3481 
3482 	/*
3483 	 * Workaround: mprc hardware is incorrectly counting
3484 	 * broadcasts, so for now we subtract those.
3485 	 */
3486 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3487 	sc->stats.bprc += bprc;
3488 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3489 	if (hw->mac.type == ixgbe_mac_82598EB)
3490 		sc->stats.mprc -= bprc;
3491 
3492 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3493 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3494 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3495 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3496 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3497 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3498 
3499 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3500 	sc->stats.lxontxc += lxon;
3501 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3502 	sc->stats.lxofftxc += lxoff;
3503 	total = lxon + lxoff;
3504 
3505 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3506 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3507 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3508 	sc->stats.gptc -= total;
3509 	sc->stats.mptc -= total;
3510 	sc->stats.ptc64 -= total;
3511 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3512 
3513 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3514 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3515 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3516 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3517 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3518 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3519 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3520 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3521 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3522 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3523 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3524 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3525 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3526 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3527 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3528 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3529 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3530 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3531 	/* Only read FCOE on 82599 */
3532 	if (hw->mac.type != ixgbe_mac_82598EB) {
3533 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3534 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3535 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3536 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3537 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3538 	}
3539 
3540 	/* Rx Errors */
3541 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3542 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3543 }
3544 
3545 #if 0
3546 /*
3547  * Add sysctl variables, one per statistic, to the system.
3548  */
3549 static void
3550 ix_add_hw_stats(struct ix_softc *sc)
3551 {
3552 
3553 	device_t dev = sc->dev;
3554 
3555 	struct ix_tx_ring *txr = sc->tx_rings;
3556 	struct ix_rx_ring *rxr = sc->rx_rings;
3557 
3558 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3559 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3560 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3561 	struct ixgbe_hw_stats *stats = &sc->stats;
3562 
3563 	struct sysctl_oid *stat_node, *queue_node;
3564 	struct sysctl_oid_list *stat_list, *queue_list;
3565 
3566 #define QUEUE_NAME_LEN 32
3567 	char namebuf[QUEUE_NAME_LEN];
3568 
3569 	/* MAC stats get the own sub node */
3570 
3571 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3572 				    CTLFLAG_RD, NULL, "MAC Statistics");
3573 	stat_list = SYSCTL_CHILDREN(stat_node);
3574 
3575 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3576 			CTLFLAG_RD, &stats->crcerrs,
3577 			"CRC Errors");
3578 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3579 			CTLFLAG_RD, &stats->illerrc,
3580 			"Illegal Byte Errors");
3581 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3582 			CTLFLAG_RD, &stats->errbc,
3583 			"Byte Errors");
3584 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3585 			CTLFLAG_RD, &stats->mspdc,
3586 			"MAC Short Packets Discarded");
3587 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3588 			CTLFLAG_RD, &stats->mlfc,
3589 			"MAC Local Faults");
3590 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3591 			CTLFLAG_RD, &stats->mrfc,
3592 			"MAC Remote Faults");
3593 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3594 			CTLFLAG_RD, &stats->rlec,
3595 			"Receive Length Errors");
3596 
3597 	/* Flow Control stats */
3598 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3599 			CTLFLAG_RD, &stats->lxontxc,
3600 			"Link XON Transmitted");
3601 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3602 			CTLFLAG_RD, &stats->lxonrxc,
3603 			"Link XON Received");
3604 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3605 			CTLFLAG_RD, &stats->lxofftxc,
3606 			"Link XOFF Transmitted");
3607 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3608 			CTLFLAG_RD, &stats->lxoffrxc,
3609 			"Link XOFF Received");
3610 
3611 	/* Packet Reception Stats */
3612 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3613 			CTLFLAG_RD, &stats->tor,
3614 			"Total Octets Received");
3615 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3616 			CTLFLAG_RD, &stats->gorc,
3617 			"Good Octets Received");
3618 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3619 			CTLFLAG_RD, &stats->tpr,
3620 			"Total Packets Received");
3621 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3622 			CTLFLAG_RD, &stats->gprc,
3623 			"Good Packets Received");
3624 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3625 			CTLFLAG_RD, &stats->mprc,
3626 			"Multicast Packets Received");
3627 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3628 			CTLFLAG_RD, &stats->bprc,
3629 			"Broadcast Packets Received");
3630 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3631 			CTLFLAG_RD, &stats->prc64,
3632 			"64 byte frames received ");
3633 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3634 			CTLFLAG_RD, &stats->prc127,
3635 			"65-127 byte frames received");
3636 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3637 			CTLFLAG_RD, &stats->prc255,
3638 			"128-255 byte frames received");
3639 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3640 			CTLFLAG_RD, &stats->prc511,
3641 			"256-511 byte frames received");
3642 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3643 			CTLFLAG_RD, &stats->prc1023,
3644 			"512-1023 byte frames received");
3645 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3646 			CTLFLAG_RD, &stats->prc1522,
3647 			"1023-1522 byte frames received");
3648 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3649 			CTLFLAG_RD, &stats->ruc,
3650 			"Receive Undersized");
3651 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3652 			CTLFLAG_RD, &stats->rfc,
3653 			"Fragmented Packets Received ");
3654 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3655 			CTLFLAG_RD, &stats->roc,
3656 			"Oversized Packets Received");
3657 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3658 			CTLFLAG_RD, &stats->rjc,
3659 			"Received Jabber");
3660 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3661 			CTLFLAG_RD, &stats->mngprc,
3662 			"Management Packets Received");
3663 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3664 			CTLFLAG_RD, &stats->mngptc,
3665 			"Management Packets Dropped");
3666 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3667 			CTLFLAG_RD, &stats->xec,
3668 			"Checksum Errors");
3669 
3670 	/* Packet Transmission Stats */
3671 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3672 			CTLFLAG_RD, &stats->gotc,
3673 			"Good Octets Transmitted");
3674 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3675 			CTLFLAG_RD, &stats->tpt,
3676 			"Total Packets Transmitted");
3677 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3678 			CTLFLAG_RD, &stats->gptc,
3679 			"Good Packets Transmitted");
3680 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3681 			CTLFLAG_RD, &stats->bptc,
3682 			"Broadcast Packets Transmitted");
3683 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3684 			CTLFLAG_RD, &stats->mptc,
3685 			"Multicast Packets Transmitted");
3686 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3687 			CTLFLAG_RD, &stats->mngptc,
3688 			"Management Packets Transmitted");
3689 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3690 			CTLFLAG_RD, &stats->ptc64,
3691 			"64 byte frames transmitted ");
3692 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3693 			CTLFLAG_RD, &stats->ptc127,
3694 			"65-127 byte frames transmitted");
3695 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3696 			CTLFLAG_RD, &stats->ptc255,
3697 			"128-255 byte frames transmitted");
3698 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3699 			CTLFLAG_RD, &stats->ptc511,
3700 			"256-511 byte frames transmitted");
3701 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3702 			CTLFLAG_RD, &stats->ptc1023,
3703 			"512-1023 byte frames transmitted");
3704 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3705 			CTLFLAG_RD, &stats->ptc1522,
3706 			"1024-1522 byte frames transmitted");
3707 }
3708 #endif
3709 
3710 /*
3711  * Enable the hardware to drop packets when the buffer is full.
3712  * This is useful when multiple RX rings are used, so that no
3713  * single RX ring being full stalls the entire RX engine.  We
3714  * only enable this when multiple RX rings are used and when
3715  * flow control is disabled.
3716  */
3717 static void
3718 ix_enable_rx_drop(struct ix_softc *sc)
3719 {
3720 	struct ixgbe_hw *hw = &sc->hw;
3721 	int i;
3722 
3723 	if (bootverbose) {
3724 		if_printf(&sc->arpcom.ac_if,
3725 		    "flow control %s, enable RX drop\n",
3726 		    ix_fc2str(sc->hw.fc.current_mode));
3727 	}
3728 
3729 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3730 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3731 
3732 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3733 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3734 	}
3735 }
3736 
3737 static void
3738 ix_disable_rx_drop(struct ix_softc *sc)
3739 {
3740 	struct ixgbe_hw *hw = &sc->hw;
3741 	int i;
3742 
3743 	if (bootverbose) {
3744 		if_printf(&sc->arpcom.ac_if,
3745 		    "flow control %s, disable RX drop\n",
3746 		    ix_fc2str(sc->hw.fc.current_mode));
3747 	}
3748 
3749 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3750 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3751 
3752 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3753 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3754 	}
3755 }
3756 
3757 static void
3758 ix_setup_serialize(struct ix_softc *sc)
3759 {
3760 	int i = 0, j;
3761 
3762 	/* Main + RX + TX */
3763 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3764 	sc->serializes =
3765 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3766 	        M_DEVBUF, M_WAITOK | M_ZERO);
3767 
3768 	/*
3769 	 * Setup serializes
3770 	 *
3771 	 * NOTE: Order is critical
3772 	 */
3773 
3774 	KKASSERT(i < sc->nserialize);
3775 	sc->serializes[i++] = &sc->main_serialize;
3776 
3777 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3778 		KKASSERT(i < sc->nserialize);
3779 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3780 	}
3781 
3782 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3783 		KKASSERT(i < sc->nserialize);
3784 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3785 	}
3786 
3787 	KKASSERT(i == sc->nserialize);
3788 }
3789 
3790 static int
3791 ix_alloc_intr(struct ix_softc *sc)
3792 {
3793 	struct ix_intr_data *intr;
3794 	struct ix_tx_ring *txr;
3795 	u_int intr_flags;
3796 	int i;
3797 
3798 	ix_alloc_msix(sc);
3799 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3800 		ix_set_ring_inuse(sc, FALSE);
3801 		goto done;
3802 	}
3803 
3804 	/*
3805 	 * Reset some settings changed by ix_alloc_msix().
3806 	 */
3807 	if (sc->rx_rmap_intr != NULL) {
3808 		if_ringmap_free(sc->rx_rmap_intr);
3809 		sc->rx_rmap_intr = NULL;
3810 	}
3811 	if (sc->tx_rmap_intr != NULL) {
3812 		if_ringmap_free(sc->tx_rmap_intr);
3813 		sc->tx_rmap_intr = NULL;
3814 	}
3815 	if (sc->intr_data != NULL) {
3816 		kfree(sc->intr_data, M_DEVBUF);
3817 		sc->intr_data = NULL;
3818 	}
3819 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3820 		txr = &sc->tx_rings[i];
3821 		txr->tx_intr_vec = -1;
3822 		txr->tx_intr_cpuid = -1;
3823 	}
3824 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3825 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3826 
3827 		rxr->rx_intr_vec = -1;
3828 		rxr->rx_txr = NULL;
3829 	}
3830 
3831 	sc->intr_cnt = 1;
3832 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3833 	    M_WAITOK | M_ZERO);
3834 	intr = &sc->intr_data[0];
3835 
3836 	/*
3837 	 * Allocate MSI/legacy interrupt resource
3838 	 */
3839 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3840 	    &intr->intr_rid, &intr_flags);
3841 
3842 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3843 	    &intr->intr_rid, intr_flags);
3844 	if (intr->intr_res == NULL) {
3845 		device_printf(sc->dev, "Unable to allocate bus resource: "
3846 		    "interrupt\n");
3847 		return ENXIO;
3848 	}
3849 
3850 	intr->intr_serialize = &sc->main_serialize;
3851 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3852 	intr->intr_func = ix_intr;
3853 	intr->intr_funcarg = sc;
3854 	intr->intr_rate = IX_INTR_RATE;
3855 	intr->intr_use = IX_INTR_USE_RXTX;
3856 
3857 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3858 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3859 
3860 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3861 
3862 	ix_set_ring_inuse(sc, FALSE);
3863 
3864 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3865 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3866 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3867 
3868 		/*
3869 		 * Allocate RX ring map for RSS setup.
3870 		 */
3871 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3872 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3873 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3874 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3875 	}
3876 done:
3877 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3878 		txr = &sc->tx_rings[i];
3879 		if (txr->tx_intr_cpuid < 0)
3880 			txr->tx_intr_cpuid = 0;
3881 	}
3882 	return 0;
3883 }
3884 
3885 static void
3886 ix_free_intr(struct ix_softc *sc)
3887 {
3888 	if (sc->intr_data == NULL)
3889 		return;
3890 
3891 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3892 		struct ix_intr_data *intr = &sc->intr_data[0];
3893 
3894 		KKASSERT(sc->intr_cnt == 1);
3895 		if (intr->intr_res != NULL) {
3896 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3897 			    intr->intr_rid, intr->intr_res);
3898 		}
3899 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3900 			pci_release_msi(sc->dev);
3901 
3902 		kfree(sc->intr_data, M_DEVBUF);
3903 	} else {
3904 		ix_free_msix(sc, TRUE);
3905 	}
3906 }
3907 
3908 static void
3909 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3910 {
3911 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3912 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3913 	if (bootverbose) {
3914 		if_printf(&sc->arpcom.ac_if,
3915 		    "RX rings %d/%d, TX rings %d/%d\n",
3916 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3917 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3918 	}
3919 }
3920 
3921 static int
3922 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3923 {
3924 	if (!IX_ENABLE_HWRSS(sc))
3925 		return 1;
3926 
3927 	if (polling)
3928 		return sc->rx_ring_cnt;
3929 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3930 		return IX_MIN_RXRING_RSS;
3931 	else
3932 		return sc->rx_ring_msix;
3933 }
3934 
3935 static int
3936 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3937 {
3938 	if (!IX_ENABLE_HWTSS(sc))
3939 		return 1;
3940 
3941 	if (polling)
3942 		return sc->tx_ring_cnt;
3943 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3944 		return 1;
3945 	else
3946 		return sc->tx_ring_msix;
3947 }
3948 
3949 static int
3950 ix_setup_intr(struct ix_softc *sc)
3951 {
3952 	int i;
3953 
3954 	for (i = 0; i < sc->intr_cnt; ++i) {
3955 		struct ix_intr_data *intr = &sc->intr_data[i];
3956 		int error;
3957 
3958 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3959 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3960 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3961 		if (error) {
3962 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3963 			ix_teardown_intr(sc, i);
3964 			return error;
3965 		}
3966 	}
3967 	return 0;
3968 }
3969 
3970 static void
3971 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3972 {
3973 	int i;
3974 
3975 	if (sc->intr_data == NULL)
3976 		return;
3977 
3978 	for (i = 0; i < intr_cnt; ++i) {
3979 		struct ix_intr_data *intr = &sc->intr_data[i];
3980 
3981 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3982 	}
3983 }
3984 
3985 static void
3986 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3987 {
3988 	struct ix_softc *sc = ifp->if_softc;
3989 
3990 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3991 }
3992 
3993 static void
3994 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3995 {
3996 	struct ix_softc *sc = ifp->if_softc;
3997 
3998 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3999 }
4000 
4001 static int
4002 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4003 {
4004 	struct ix_softc *sc = ifp->if_softc;
4005 
4006 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4007 }
4008 
4009 #ifdef INVARIANTS
4010 
4011 static void
4012 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4013     boolean_t serialized)
4014 {
4015 	struct ix_softc *sc = ifp->if_softc;
4016 
4017 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
4018 	    serialized);
4019 }
4020 
4021 #endif	/* INVARIANTS */
4022 
4023 static void
4024 ix_free_rings(struct ix_softc *sc)
4025 {
4026 	int i;
4027 
4028 	if (sc->tx_rings != NULL) {
4029 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4030 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4031 
4032 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
4033 		}
4034 		kfree(sc->tx_rings, M_DEVBUF);
4035 	}
4036 
4037 	if (sc->rx_rings != NULL) {
4038 		for (i =0; i < sc->rx_ring_cnt; ++i) {
4039 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4040 
4041 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
4042 		}
4043 		kfree(sc->rx_rings, M_DEVBUF);
4044 	}
4045 
4046 	if (sc->parent_tag != NULL)
4047 		bus_dma_tag_destroy(sc->parent_tag);
4048 }
4049 
4050 static void
4051 ix_watchdog_reset(struct ix_softc *sc)
4052 {
4053 	int i;
4054 
4055 	ASSERT_IFNET_SERIALIZED_ALL(&sc->arpcom.ac_if);
4056 	ix_init(sc);
4057 	for (i = 0; i < sc->tx_ring_inuse; ++i)
4058 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
4059 }
4060 
4061 static void
4062 ix_sync_netisr(struct ix_softc *sc, int flags)
4063 {
4064 	struct ifnet *ifp = &sc->arpcom.ac_if;
4065 
4066 	ifnet_serialize_all(ifp);
4067 	if (ifp->if_flags & IFF_RUNNING) {
4068 		ifp->if_flags &= ~(IFF_RUNNING | flags);
4069 	} else {
4070 		ifnet_deserialize_all(ifp);
4071 		return;
4072 	}
4073 	ifnet_deserialize_all(ifp);
4074 
4075 	/* Make sure that polling stopped. */
4076 	netmsg_service_sync();
4077 }
4078 
4079 static void
4080 ix_watchdog_task(void *xsc, int pending __unused)
4081 {
4082 	struct ix_softc *sc = xsc;
4083 	struct ifnet *ifp = &sc->arpcom.ac_if;
4084 
4085 	ix_sync_netisr(sc, 0);
4086 
4087 	ifnet_serialize_all(ifp);
4088 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP)
4089 		ix_watchdog_reset(sc);
4090 	ifnet_deserialize_all(ifp);
4091 }
4092 
4093 static void
4094 ix_watchdog(struct ifaltq_subque *ifsq)
4095 {
4096 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
4097 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
4098 	struct ix_softc *sc = ifp->if_softc;
4099 
4100 	KKASSERT(txr->tx_ifsq == ifsq);
4101 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4102 
4103 	/*
4104 	 * If the interface has been paused then don't do the watchdog check
4105 	 */
4106 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
4107 		txr->tx_watchdog.wd_timer = 5;
4108 		return;
4109 	}
4110 
4111 	if_printf(ifp, "Watchdog timeout -- resetting\n");
4112 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
4113 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
4114 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
4115 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
4116 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
4117 
4118 	if ((ifp->if_flags & (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING)) ==
4119 	    (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING))
4120 		taskqueue_enqueue(taskqueue_thread[0], &sc->wdog_task);
4121 	else
4122 		ix_watchdog_reset(sc);
4123 }
4124 
4125 static void
4126 ix_free_tx_ring(struct ix_tx_ring *txr)
4127 {
4128 	int i;
4129 
4130 	for (i = 0; i < txr->tx_ndesc; ++i) {
4131 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
4132 
4133 		if (txbuf->m_head != NULL)
4134 			ix_free_txbuf(txr, txbuf);
4135 	}
4136 }
4137 
4138 static void
4139 ix_free_rx_ring(struct ix_rx_ring *rxr)
4140 {
4141 	int i;
4142 
4143 	for (i = 0; i < rxr->rx_ndesc; ++i) {
4144 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
4145 
4146 		if (rxbuf->fmp != NULL) {
4147 			m_freem(rxbuf->fmp);
4148 			rxbuf->fmp = NULL;
4149 			rxbuf->lmp = NULL;
4150 		} else {
4151 			KKASSERT(rxbuf->lmp == NULL);
4152 		}
4153 		if (rxbuf->m_head != NULL) {
4154 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4155 			m_freem(rxbuf->m_head);
4156 			rxbuf->m_head = NULL;
4157 		}
4158 	}
4159 }
4160 
4161 static int
4162 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
4163 {
4164 	struct mbuf *m;
4165 	bus_dma_segment_t seg;
4166 	bus_dmamap_t map;
4167 	struct ix_rx_buf *rxbuf;
4168 	int flags, error, nseg;
4169 
4170 	flags = M_NOWAIT;
4171 	if (__predict_false(wait))
4172 		flags = M_WAITOK;
4173 
4174 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4175 	if (m == NULL) {
4176 		if (wait) {
4177 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4178 			    "Unable to allocate RX mbuf\n");
4179 		}
4180 		return ENOBUFS;
4181 	}
4182 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4183 
4184 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4185 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4186 	if (error) {
4187 		m_freem(m);
4188 		if (wait) {
4189 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4190 			    "Unable to load RX mbuf\n");
4191 		}
4192 		return error;
4193 	}
4194 
4195 	rxbuf = &rxr->rx_buf[i];
4196 	if (rxbuf->m_head != NULL)
4197 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4198 
4199 	map = rxbuf->map;
4200 	rxbuf->map = rxr->rx_sparemap;
4201 	rxr->rx_sparemap = map;
4202 
4203 	rxbuf->m_head = m;
4204 	rxbuf->paddr = seg.ds_addr;
4205 
4206 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4207 	return 0;
4208 }
4209 
4210 static void
4211 ix_add_sysctl(struct ix_softc *sc)
4212 {
4213 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4214 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4215 	char node[32];
4216 	int i;
4217 
4218 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4219 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4220 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4221 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4222 	    "# of RX rings used");
4223 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4224 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4225 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4226 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4227 	    "# of TX rings used");
4228 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4229 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4230 	    sc, 0, ix_sysctl_rxd, "I",
4231 	    "# of RX descs");
4232 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4233 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4234 	    sc, 0, ix_sysctl_txd, "I",
4235 	    "# of TX descs");
4236 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4237 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4238 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4239 	    "# of segments sent before write to hardware register");
4240 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4241 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4242 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4243 	    "# of received segments sent before write to hardware register");
4244 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4245 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4246 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4247 	    "# of segments per TX interrupt");
4248 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4249 	    OID_AUTO, "direct_input", CTLFLAG_RW, &sc->direct_input, 0,
4250 	    "Enable direct input");
4251 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4252 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4253 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4254 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4255 		    "TX MSI-X CPU map");
4256 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4257 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4258 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4259 		    "RX MSI-X CPU map");
4260 	}
4261 #ifdef IFPOLL_ENABLE
4262 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4263 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4264 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4265 	    "TX polling CPU map");
4266 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4267 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4268 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4269 	    "RX polling CPU map");
4270 #endif
4271 
4272 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4273 do { \
4274 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4275 	    ix_sysctl_##name, #use " interrupt rate"); \
4276 } while (0)
4277 
4278 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4279 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4280 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4281 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4282 
4283 #undef IX_ADD_INTR_RATE_SYSCTL
4284 
4285 #ifdef IX_RSS_DEBUG
4286 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4287 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4288 	    "RSS debug level");
4289 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4290 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4291 		SYSCTL_ADD_ULONG(ctx,
4292 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4293 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4294 	}
4295 #endif
4296 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
4297 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4298 
4299 		ksnprintf(node, sizeof(node), "tx%d_nmbuf", i);
4300 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4301 		    CTLTYPE_INT | CTLFLAG_RD, txr, 0, ix_sysctl_tx_nmbuf, "I",
4302 		    "# of pending TX mbufs");
4303 
4304 		ksnprintf(node, sizeof(node), "tx%d_gc", i);
4305 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4306 		    CTLFLAG_RW, &txr->tx_gc, "# of TX desc GC");
4307 	}
4308 
4309 #if 0
4310 	ix_add_hw_stats(sc);
4311 #endif
4312 
4313 }
4314 
4315 static int
4316 ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS)
4317 {
4318 	struct ix_tx_ring *txr = (void *)arg1;
4319 	int nmbuf;
4320 
4321 	nmbuf = txr->tx_nmbuf;
4322 	return (sysctl_handle_int(oidp, &nmbuf, 0, req));
4323 }
4324 
4325 static int
4326 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4327 {
4328 	struct ix_softc *sc = (void *)arg1;
4329 	struct ifnet *ifp = &sc->arpcom.ac_if;
4330 	int error, nsegs, i;
4331 
4332 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4333 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4334 	if (error || req->newptr == NULL)
4335 		return error;
4336 	if (nsegs < 0)
4337 		return EINVAL;
4338 
4339 	ifnet_serialize_all(ifp);
4340 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4341 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4342 	ifnet_deserialize_all(ifp);
4343 
4344 	return 0;
4345 }
4346 
4347 static int
4348 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4349 {
4350 	struct ix_softc *sc = (void *)arg1;
4351 	struct ifnet *ifp = &sc->arpcom.ac_if;
4352 	int error, nsegs, i;
4353 
4354 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4355 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4356 	if (error || req->newptr == NULL)
4357 		return error;
4358 	if (nsegs < 0)
4359 		return EINVAL;
4360 
4361 	ifnet_serialize_all(ifp);
4362 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4363 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4364 	ifnet_deserialize_all(ifp);
4365 
4366 	return 0;
4367 }
4368 
4369 static int
4370 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4371 {
4372 	struct ix_softc *sc = (void *)arg1;
4373 	int txd;
4374 
4375 	txd = sc->tx_rings[0].tx_ndesc;
4376 	return sysctl_handle_int(oidp, &txd, 0, req);
4377 }
4378 
4379 static int
4380 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4381 {
4382 	struct ix_softc *sc = (void *)arg1;
4383 	int rxd;
4384 
4385 	rxd = sc->rx_rings[0].rx_ndesc;
4386 	return sysctl_handle_int(oidp, &rxd, 0, req);
4387 }
4388 
4389 static int
4390 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4391 {
4392 	struct ix_softc *sc = (void *)arg1;
4393 	struct ifnet *ifp = &sc->arpcom.ac_if;
4394 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4395 	int error, nsegs;
4396 
4397 	nsegs = txr->tx_intr_nsegs;
4398 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4399 	if (error || req->newptr == NULL)
4400 		return error;
4401 	if (nsegs < 0)
4402 		return EINVAL;
4403 
4404 	ifnet_serialize_all(ifp);
4405 
4406 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4407 		error = EINVAL;
4408 	} else {
4409 		int i;
4410 
4411 		error = 0;
4412 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4413 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4414 	}
4415 
4416 	ifnet_deserialize_all(ifp);
4417 
4418 	return error;
4419 }
4420 
4421 static void
4422 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4423 {
4424 	uint32_t eitr, eitr_intvl;
4425 
4426 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4427 	eitr_intvl = 1000000000 / 256 / rate;
4428 
4429 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4430 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4431 		if (eitr_intvl == 0)
4432 			eitr_intvl = 1;
4433 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4434 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4435 	} else {
4436 		eitr &= ~IX_EITR_INTVL_MASK;
4437 
4438 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4439 		if (eitr_intvl == 0)
4440 			eitr_intvl = IX_EITR_INTVL_MIN;
4441 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4442 			eitr_intvl = IX_EITR_INTVL_MAX;
4443 	}
4444 	eitr |= eitr_intvl;
4445 
4446 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4447 }
4448 
4449 static int
4450 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4451 {
4452 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4453 }
4454 
4455 static int
4456 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4457 {
4458 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4459 }
4460 
4461 static int
4462 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4463 {
4464 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4465 }
4466 
4467 static int
4468 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4469 {
4470 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4471 }
4472 
4473 static int
4474 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4475 {
4476 	struct ix_softc *sc = (void *)arg1;
4477 	struct ifnet *ifp = &sc->arpcom.ac_if;
4478 	int error, rate, i;
4479 
4480 	rate = 0;
4481 	for (i = 0; i < sc->intr_cnt; ++i) {
4482 		if (sc->intr_data[i].intr_use == use) {
4483 			rate = sc->intr_data[i].intr_rate;
4484 			break;
4485 		}
4486 	}
4487 
4488 	error = sysctl_handle_int(oidp, &rate, 0, req);
4489 	if (error || req->newptr == NULL)
4490 		return error;
4491 	if (rate <= 0)
4492 		return EINVAL;
4493 
4494 	ifnet_serialize_all(ifp);
4495 
4496 	for (i = 0; i < sc->intr_cnt; ++i) {
4497 		if (sc->intr_data[i].intr_use == use) {
4498 			sc->intr_data[i].intr_rate = rate;
4499 			if (ifp->if_flags & IFF_RUNNING)
4500 				ix_set_eitr(sc, i, rate);
4501 		}
4502 	}
4503 
4504 	ifnet_deserialize_all(ifp);
4505 
4506 	return error;
4507 }
4508 
4509 static void
4510 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4511     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4512 {
4513 	int i;
4514 
4515 	for (i = 0; i < sc->intr_cnt; ++i) {
4516 		if (sc->intr_data[i].intr_use == use) {
4517 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4518 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4519 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4520 			    sc, 0, handler, "I", desc);
4521 			break;
4522 		}
4523 	}
4524 }
4525 
4526 static void
4527 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4528 {
4529 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4530 		sc->timer_cpuid = 0; /* XXX fixed */
4531 	else
4532 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4533 }
4534 
4535 static void
4536 ix_alloc_msix(struct ix_softc *sc)
4537 {
4538 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4539 	struct ix_intr_data *intr;
4540 	int i, x, error;
4541 	int ring_cnt, ring_cntmax;
4542 	boolean_t setup = FALSE;
4543 
4544 	msix_enable = ix_msix_enable;
4545 	/*
4546 	 * Don't enable MSI-X on 82598 by default, see:
4547 	 * 82598 specification update errata #38
4548 	 */
4549 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4550 		msix_enable = 0;
4551 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4552 	if (!msix_enable)
4553 		return;
4554 
4555 	msix_cnt = pci_msix_count(sc->dev);
4556 #ifdef IX_MSIX_DEBUG
4557 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4558 #endif
4559 	if (msix_cnt <= 1) {
4560 		/* One MSI-X model does not make sense. */
4561 		return;
4562 	}
4563 
4564 	/*
4565 	 * Make sure that we don't break interrupt related registers
4566 	 * (EIMS, etc) limitation.
4567 	 */
4568 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4569 		if (msix_cnt > IX_MAX_MSIX_82598)
4570 			msix_cnt = IX_MAX_MSIX_82598;
4571 	} else {
4572 		if (msix_cnt > IX_MAX_MSIX)
4573 			msix_cnt = IX_MAX_MSIX;
4574 	}
4575 	if (bootverbose)
4576 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4577 	msix_ring = msix_cnt - 1; /* -1 for status */
4578 
4579 	/*
4580 	 * Configure # of RX/TX rings usable by MSI-X.
4581 	 */
4582 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4583 	if (ring_cntmax > msix_ring)
4584 		ring_cntmax = msix_ring;
4585 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4586 
4587 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4588 	if (ring_cntmax > msix_ring)
4589 		ring_cntmax = msix_ring;
4590 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4591 
4592 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4593 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4594 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4595 	    ("total RX ring count %d, MSI-X RX ring count %d",
4596 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4597 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4598 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4599 	    ("total TX ring count %d, MSI-X TX ring count %d",
4600 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4601 
4602 	/*
4603 	 * Aggregate TX/RX MSI-X
4604 	 */
4605 	ring_cntmax = sc->rx_ring_msix;
4606 	if (ring_cntmax < sc->tx_ring_msix)
4607 		ring_cntmax = sc->tx_ring_msix;
4608 	KASSERT(ring_cntmax <= msix_ring,
4609 	    ("invalid ring count max %d, MSI-X count for rings %d",
4610 	     ring_cntmax, msix_ring));
4611 
4612 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4613 	if (bootverbose) {
4614 		device_printf(sc->dev, "MSI-X alloc %d, "
4615 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4616 		    sc->rx_ring_msix, sc->tx_ring_msix);
4617 	}
4618 
4619 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4620 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4621 	    &sc->msix_mem_rid, RF_ACTIVE);
4622 	if (sc->msix_mem_res == NULL) {
4623 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4624 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4625 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4626 		if (sc->msix_mem_res == NULL) {
4627 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4628 			return;
4629 		}
4630 	}
4631 
4632 	sc->intr_cnt = alloc_cnt;
4633 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4634 	    M_DEVBUF, M_WAITOK | M_ZERO);
4635 	for (x = 0; x < sc->intr_cnt; ++x) {
4636 		intr = &sc->intr_data[x];
4637 		intr->intr_rid = -1;
4638 		intr->intr_rate = IX_INTR_RATE;
4639 	}
4640 
4641 	x = 0;
4642 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4643 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4644 		struct ix_tx_ring *txr = NULL;
4645 		int cpuid, j;
4646 
4647 		KKASSERT(x < sc->intr_cnt);
4648 		rxr->rx_intr_vec = x;
4649 		ix_setup_msix_eims(sc, x,
4650 		    &rxr->rx_eims, &rxr->rx_eims_val);
4651 
4652 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4653 
4654 		/*
4655 		 * Try finding TX ring to piggyback.
4656 		 */
4657 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4658 			if (cpuid ==
4659 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4660 				txr = &sc->tx_rings[j];
4661 				KKASSERT(txr->tx_intr_cpuid < 0);
4662 				break;
4663 			}
4664 		}
4665 		rxr->rx_txr = txr;
4666 
4667 		intr = &sc->intr_data[x++];
4668 		intr->intr_serialize = &rxr->rx_serialize;
4669 		if (txr != NULL) {
4670 			ksnprintf(intr->intr_desc0,
4671 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4672 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4673 			intr->intr_use = IX_INTR_USE_RXTX;
4674 			intr->intr_func = ix_msix_rxtx;
4675 		} else {
4676 			ksnprintf(intr->intr_desc0,
4677 			    sizeof(intr->intr_desc0), "%s rx%d",
4678 			    device_get_nameunit(sc->dev), i);
4679 			intr->intr_rate = IX_MSIX_RX_RATE;
4680 			intr->intr_use = IX_INTR_USE_RX;
4681 			intr->intr_func = ix_msix_rx;
4682 		}
4683 		intr->intr_funcarg = rxr;
4684 		intr->intr_cpuid = cpuid;
4685 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4686 		intr->intr_desc = intr->intr_desc0;
4687 
4688 		if (txr != NULL) {
4689 			txr->tx_intr_cpuid = intr->intr_cpuid;
4690 			/* NOTE: Leave TX ring's intr_vec negative. */
4691 		}
4692 	}
4693 
4694 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4695 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4696 
4697 		if (txr->tx_intr_cpuid >= 0) {
4698 			/* Piggybacked by RX ring. */
4699 			continue;
4700 		}
4701 
4702 		KKASSERT(x < sc->intr_cnt);
4703 		txr->tx_intr_vec = x;
4704 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4705 
4706 		intr = &sc->intr_data[x++];
4707 		intr->intr_serialize = &txr->tx_serialize;
4708 		intr->intr_rate = IX_MSIX_TX_RATE;
4709 		intr->intr_use = IX_INTR_USE_TX;
4710 		intr->intr_func = ix_msix_tx;
4711 		intr->intr_funcarg = txr;
4712 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4713 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4714 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4715 		    device_get_nameunit(sc->dev), i);
4716 		intr->intr_desc = intr->intr_desc0;
4717 
4718 		txr->tx_intr_cpuid = intr->intr_cpuid;
4719 	}
4720 
4721 	/*
4722 	 * Status MSI-X
4723 	 */
4724 	KKASSERT(x < sc->intr_cnt);
4725 	sc->sts_msix_vec = x;
4726 
4727 	intr = &sc->intr_data[x++];
4728 
4729 	intr->intr_serialize = &sc->main_serialize;
4730 	intr->intr_func = ix_msix_status;
4731 	intr->intr_funcarg = sc;
4732 	intr->intr_cpuid = 0;
4733 	intr->intr_use = IX_INTR_USE_STATUS;
4734 
4735 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4736 	    device_get_nameunit(sc->dev));
4737 	intr->intr_desc = intr->intr_desc0;
4738 
4739 	KKASSERT(x == sc->intr_cnt);
4740 
4741 	error = pci_setup_msix(sc->dev);
4742 	if (error) {
4743 		device_printf(sc->dev, "Setup MSI-X failed\n");
4744 		goto back;
4745 	}
4746 	setup = TRUE;
4747 
4748 	for (i = 0; i < sc->intr_cnt; ++i) {
4749 		intr = &sc->intr_data[i];
4750 
4751 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4752 		    intr->intr_cpuid);
4753 		if (error) {
4754 			device_printf(sc->dev,
4755 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4756 			    intr->intr_cpuid);
4757 			goto back;
4758 		}
4759 
4760 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4761 		    &intr->intr_rid, RF_ACTIVE);
4762 		if (intr->intr_res == NULL) {
4763 			device_printf(sc->dev,
4764 			    "Unable to allocate MSI-X %d resource\n", i);
4765 			error = ENOMEM;
4766 			goto back;
4767 		}
4768 	}
4769 
4770 	pci_enable_msix(sc->dev);
4771 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4772 back:
4773 	if (error)
4774 		ix_free_msix(sc, setup);
4775 }
4776 
4777 static void
4778 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4779 {
4780 	int i;
4781 
4782 	KKASSERT(sc->intr_cnt > 1);
4783 
4784 	for (i = 0; i < sc->intr_cnt; ++i) {
4785 		struct ix_intr_data *intr = &sc->intr_data[i];
4786 
4787 		if (intr->intr_res != NULL) {
4788 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4789 			    intr->intr_rid, intr->intr_res);
4790 		}
4791 		if (intr->intr_rid >= 0)
4792 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4793 	}
4794 	if (setup)
4795 		pci_teardown_msix(sc->dev);
4796 
4797 	sc->intr_cnt = 0;
4798 	kfree(sc->intr_data, M_DEVBUF);
4799 	sc->intr_data = NULL;
4800 }
4801 
4802 static void
4803 ix_msix_rx(void *xrxr)
4804 {
4805 	struct ix_rx_ring *rxr = xrxr;
4806 
4807 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4808 
4809 	ix_rxeof(rxr, -1);
4810 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4811 }
4812 
4813 static void
4814 ix_msix_tx(void *xtxr)
4815 {
4816 	struct ix_tx_ring *txr = xtxr;
4817 
4818 	ASSERT_SERIALIZED(&txr->tx_serialize);
4819 
4820 	ix_tx_intr(txr, *(txr->tx_hdr));
4821 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4822 }
4823 
4824 static void
4825 ix_msix_rxtx(void *xrxr)
4826 {
4827 	struct ix_rx_ring *rxr = xrxr;
4828 	struct ix_tx_ring *txr;
4829 	int hdr;
4830 
4831 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4832 
4833 	ix_rxeof(rxr, -1);
4834 
4835 	/*
4836 	 * NOTE:
4837 	 * Since tx_next_clean is only changed by ix_txeof(),
4838 	 * which is called only in interrupt handler, the
4839 	 * check w/o holding tx serializer is MPSAFE.
4840 	 */
4841 	txr = rxr->rx_txr;
4842 	hdr = *(txr->tx_hdr);
4843 	if (hdr != txr->tx_next_clean) {
4844 		lwkt_serialize_enter(&txr->tx_serialize);
4845 		ix_tx_intr(txr, hdr);
4846 		lwkt_serialize_exit(&txr->tx_serialize);
4847 	}
4848 
4849 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4850 }
4851 
4852 static void
4853 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4854 {
4855 	struct ixgbe_hw *hw = &sc->hw;
4856 
4857 	/* Link status change */
4858 	if (eicr & IXGBE_EICR_LSC)
4859 		ix_handle_link(sc);
4860 
4861 	if (hw->mac.type != ixgbe_mac_82598EB) {
4862 		if (eicr & IXGBE_EICR_ECC)
4863 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4864 
4865 		/* Check for over temp condition */
4866 		if (eicr & IXGBE_EICR_TS) {
4867 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4868 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4869 		}
4870 	}
4871 
4872 	if (ix_is_sfp(hw)) {
4873 		uint32_t mod_mask;
4874 
4875 		/* Pluggable optics-related interrupt */
4876 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4877 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4878 		else
4879 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4880 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4881 			ix_handle_msf(sc);
4882 		else if (eicr & mod_mask)
4883 			ix_handle_mod(sc);
4884 	}
4885 
4886 	/* Check for fan failure */
4887 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4888 	    (eicr & IXGBE_EICR_GPI_SDP1))
4889 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4890 
4891 	/* External PHY interrupt */
4892 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4893 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4894 	    	ix_handle_phy(sc);
4895 }
4896 
4897 static void
4898 ix_msix_status(void *xsc)
4899 {
4900 	struct ix_softc *sc = xsc;
4901 	uint32_t eicr;
4902 
4903 	ASSERT_SERIALIZED(&sc->main_serialize);
4904 
4905 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4906 	ix_intr_status(sc, eicr);
4907 
4908 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4909 }
4910 
4911 static void
4912 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4913     uint32_t *eims, uint32_t *eims_val)
4914 {
4915 	if (x < 32) {
4916 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4917 			KASSERT(x < IX_MAX_MSIX_82598,
4918 			    ("%s: invalid vector %d for 82598",
4919 			     device_get_nameunit(sc->dev), x));
4920 			*eims = IXGBE_EIMS;
4921 		} else {
4922 			*eims = IXGBE_EIMS_EX(0);
4923 		}
4924 		*eims_val = 1 << x;
4925 	} else {
4926 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4927 		    device_get_nameunit(sc->dev), x));
4928 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4929 		    ("%s: invalid vector %d for 82598",
4930 		     device_get_nameunit(sc->dev), x));
4931 		*eims = IXGBE_EIMS_EX(1);
4932 		*eims_val = 1 << (x - 32);
4933 	}
4934 }
4935 
4936 #ifdef IFPOLL_ENABLE
4937 
4938 static void
4939 ix_npoll_status(struct ifnet *ifp)
4940 {
4941 	struct ix_softc *sc = ifp->if_softc;
4942 	uint32_t eicr;
4943 
4944 	ASSERT_SERIALIZED(&sc->main_serialize);
4945 
4946 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4947 	ix_intr_status(sc, eicr);
4948 }
4949 
4950 static void
4951 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4952 {
4953 	struct ix_tx_ring *txr = arg;
4954 
4955 	ASSERT_SERIALIZED(&txr->tx_serialize);
4956 
4957 	ix_tx_intr(txr, *(txr->tx_hdr));
4958 	ix_try_txgc(txr, 1);
4959 }
4960 
4961 static void
4962 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4963 {
4964 	struct ix_rx_ring *rxr = arg;
4965 
4966 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4967 	ix_rxeof(rxr, cycle);
4968 }
4969 
4970 static void
4971 ix_npoll_rx_direct(struct ifnet *ifp __unused, void *arg, int cycle)
4972 {
4973 	struct ix_rx_ring *rxr = arg;
4974 
4975 	ASSERT_NOT_SERIALIZED(&rxr->rx_serialize);
4976 	ix_rxeof(rxr, cycle);
4977 }
4978 
4979 static void
4980 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4981 {
4982 	struct ix_softc *sc = ifp->if_softc;
4983 	int i, txr_cnt, rxr_cnt, idirect;
4984 
4985 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4986 
4987 	idirect = sc->direct_input;
4988 	cpu_ccfence();
4989 
4990 	if (info) {
4991 		int cpu;
4992 
4993 		info->ifpi_status.status_func = ix_npoll_status;
4994 		info->ifpi_status.serializer = &sc->main_serialize;
4995 
4996 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4997 		for (i = 0; i < txr_cnt; ++i) {
4998 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4999 
5000 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
5001 			KKASSERT(cpu < netisr_ncpus);
5002 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
5003 			info->ifpi_tx[cpu].arg = txr;
5004 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
5005 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
5006 		}
5007 
5008 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
5009 		for (i = 0; i < rxr_cnt; ++i) {
5010 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
5011 
5012 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
5013 			KKASSERT(cpu < netisr_ncpus);
5014 			info->ifpi_rx[cpu].arg = rxr;
5015 			if (idirect) {
5016 				info->ifpi_rx[cpu].poll_func =
5017 				    ix_npoll_rx_direct;
5018 				info->ifpi_rx[cpu].serializer = NULL;
5019 			} else {
5020 				info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
5021 				info->ifpi_rx[cpu].serializer =
5022 				    &rxr->rx_serialize;
5023 			}
5024 		}
5025 		if (idirect)
5026 			ifp->if_flags |= IFF_IDIRECT;
5027 	} else {
5028 		ifp->if_flags &= ~IFF_IDIRECT;
5029 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
5030 			struct ix_tx_ring *txr = &sc->tx_rings[i];
5031 
5032 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
5033 		}
5034 	}
5035 	if (ifp->if_flags & IFF_RUNNING)
5036 		ix_init(sc);
5037 }
5038 
5039 #endif /* IFPOLL_ENABLE */
5040 
5041 static enum ixgbe_fc_mode
5042 ix_ifmedia2fc(int ifm)
5043 {
5044 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5045 
5046 	switch (fc_opt) {
5047 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5048 		return ixgbe_fc_full;
5049 
5050 	case IFM_ETH_RXPAUSE:
5051 		return ixgbe_fc_rx_pause;
5052 
5053 	case IFM_ETH_TXPAUSE:
5054 		return ixgbe_fc_tx_pause;
5055 
5056 	default:
5057 		return ixgbe_fc_none;
5058 	}
5059 }
5060 
5061 static const char *
5062 ix_ifmedia2str(int ifm)
5063 {
5064 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5065 
5066 	switch (fc_opt) {
5067 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5068 		return IFM_ETH_FC_FULL;
5069 
5070 	case IFM_ETH_RXPAUSE:
5071 		return IFM_ETH_FC_RXPAUSE;
5072 
5073 	case IFM_ETH_TXPAUSE:
5074 		return IFM_ETH_FC_TXPAUSE;
5075 
5076 	default:
5077 		return IFM_ETH_FC_NONE;
5078 	}
5079 }
5080 
5081 static const char *
5082 ix_fc2str(enum ixgbe_fc_mode fc)
5083 {
5084 	switch (fc) {
5085 	case ixgbe_fc_full:
5086 		return IFM_ETH_FC_FULL;
5087 
5088 	case ixgbe_fc_rx_pause:
5089 		return IFM_ETH_FC_RXPAUSE;
5090 
5091 	case ixgbe_fc_tx_pause:
5092 		return IFM_ETH_FC_TXPAUSE;
5093 
5094 	default:
5095 		return IFM_ETH_FC_NONE;
5096 	}
5097 }
5098 
5099 static int
5100 ix_powerdown(struct ix_softc *sc)
5101 {
5102 	struct ixgbe_hw *hw = &sc->hw;
5103 	int error = 0;
5104 
5105 	/* Limit power managment flow to X550EM baseT */
5106 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5107 	    hw->phy.ops.enter_lplu) {
5108 		/* Turn off support for APM wakeup. (Using ACPI instead) */
5109 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
5110 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5111 
5112 		/*
5113 		 * Clear Wake Up Status register to prevent any previous wakeup
5114 		 * events from waking us up immediately after we suspend.
5115 		 */
5116 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5117 
5118 		/*
5119 		 * Program the Wakeup Filter Control register with user filter
5120 		 * settings
5121 		 */
5122 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5123 
5124 		/* Enable wakeups and power management in Wakeup Control */
5125 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5126 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5127 
5128 		/* X550EM baseT adapters need a special LPLU flow */
5129 		hw->phy.reset_disable = true;
5130 		ix_stop(sc);
5131 		error = hw->phy.ops.enter_lplu(hw);
5132 		if (error) {
5133 			if_printf(&sc->arpcom.ac_if,
5134 			    "Error entering LPLU: %d\n", error);
5135 		}
5136 		hw->phy.reset_disable = false;
5137 	} else {
5138 		/* Just stop for other adapters */
5139 		ix_stop(sc);
5140 	}
5141 	return error;
5142 }
5143 
5144 static void
5145 ix_config_flowctrl(struct ix_softc *sc)
5146 {
5147 	struct ixgbe_hw *hw = &sc->hw;
5148 	uint32_t rxpb, frame, size, tmp;
5149 
5150 	frame = sc->max_frame_size;
5151 
5152 	/* Calculate High Water */
5153 	switch (hw->mac.type) {
5154 	case ixgbe_mac_X540:
5155 	case ixgbe_mac_X550:
5156 	case ixgbe_mac_X550EM_a:
5157 	case ixgbe_mac_X550EM_x:
5158 		tmp = IXGBE_DV_X540(frame, frame);
5159 		break;
5160 	default:
5161 		tmp = IXGBE_DV(frame, frame);
5162 		break;
5163 	}
5164 	size = IXGBE_BT2KB(tmp);
5165 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5166 	hw->fc.high_water[0] = rxpb - size;
5167 
5168 	/* Now calculate Low Water */
5169 	switch (hw->mac.type) {
5170 	case ixgbe_mac_X540:
5171 	case ixgbe_mac_X550:
5172 	case ixgbe_mac_X550EM_a:
5173 	case ixgbe_mac_X550EM_x:
5174 		tmp = IXGBE_LOW_DV_X540(frame);
5175 		break;
5176 	default:
5177 		tmp = IXGBE_LOW_DV(frame);
5178 		break;
5179 	}
5180 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5181 
5182 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5183 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5184 		hw->fc.disable_fc_autoneg = TRUE;
5185 	else
5186 		hw->fc.disable_fc_autoneg = FALSE;
5187 	hw->fc.pause_time = IX_FC_PAUSE;
5188 	hw->fc.send_xon = TRUE;
5189 }
5190 
5191 static void
5192 ix_config_dmac(struct ix_softc *sc)
5193 {
5194 	struct ixgbe_hw *hw = &sc->hw;
5195 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5196 
5197 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5198 		return;
5199 
5200 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5201 	    (dcfg->link_speed ^ sc->link_speed)) {
5202 		dcfg->watchdog_timer = sc->dmac;
5203 		dcfg->fcoe_en = false;
5204 		dcfg->link_speed = sc->link_speed;
5205 		dcfg->num_tcs = 1;
5206 
5207 		if (bootverbose) {
5208 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5209 			    "watchdog %d, link speed %d\n",
5210 			    dcfg->watchdog_timer, dcfg->link_speed);
5211 		}
5212 
5213 		hw->mac.ops.dmac_config(hw);
5214 	}
5215 }
5216 
5217 static void
5218 ix_init_media(struct ix_softc *sc)
5219 {
5220 	struct ixgbe_hw *hw = &sc->hw;
5221 	int layer, msf_ifm = IFM_NONE;
5222 
5223 	ifmedia_removeall(&sc->media);
5224 
5225 	layer = ixgbe_get_supported_physical_layer(hw);
5226 
5227 	/*
5228 	 * Media types with matching DragonFlyBSD media defines
5229 	 */
5230 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5231 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5232 		    0, NULL);
5233 	}
5234 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5235 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5236 		    0, NULL);
5237 	}
5238 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5239 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5240 		    0, NULL);
5241 		/* No half-duplex support */
5242 	}
5243 
5244 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5245 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5246 		    0, NULL);
5247 		msf_ifm = IFM_1000_LX;
5248 	}
5249 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5250 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5251 		    0, NULL);
5252 		msf_ifm = IFM_1000_LX;
5253 	}
5254 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5255 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5256 		    0, NULL);
5257 		msf_ifm = IFM_1000_SX;
5258 	}
5259 
5260 	/* Add media for multispeed fiber */
5261 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5262 		uint32_t linkcap;
5263 		bool autoneg;
5264 
5265 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5266 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5267 			ifmedia_add_nodup(&sc->media,
5268 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5269 	}
5270 
5271 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5272 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5273 		ifmedia_add_nodup(&sc->media,
5274 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5275 	}
5276 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5277 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5278 		    0, NULL);
5279 	}
5280 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5281 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5282 		    0, NULL);
5283 	}
5284 
5285 	/*
5286 	 * XXX Other (no matching DragonFlyBSD media type):
5287 	 * To workaround this, we'll assign these completely
5288 	 * inappropriate media types.
5289 	 */
5290 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5291 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5292 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5293 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5294 		    0, NULL);
5295 	}
5296 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5297 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5298 		if_printf(&sc->arpcom.ac_if,
5299 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5300 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5301 		    0, NULL);
5302 	}
5303 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5304 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5305 		if_printf(&sc->arpcom.ac_if,
5306 		    "1000baseKX mapped to 1000baseCX\n");
5307 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5308 		    0, NULL);
5309 	}
5310 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5311 		/* Someday, someone will care about you... */
5312 		if_printf(&sc->arpcom.ac_if,
5313 		    "Media supported: 1000baseBX, ignored\n");
5314 	}
5315 
5316 	/* XXX we probably don't need this */
5317 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5318 		ifmedia_add_nodup(&sc->media,
5319 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5320 	}
5321 
5322 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5323 
5324 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5325 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5326 
5327 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5328 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5329 		ifmedia_set(&sc->media, sc->ifm_media);
5330 	}
5331 }
5332