xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision 6700dd34)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 #include <sys/taskqueue.h>
51 
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 #include <net/ifq_var.h>
59 #include <net/if_ringmap.h>
60 #include <net/toeplitz.h>
61 #include <net/toeplitz2.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/vlan/if_vlan_ether.h>
64 #include <net/if_poll.h>
65 
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 
70 #include <bus/pci/pcivar.h>
71 #include <bus/pci/pcireg.h>
72 
73 #include <dev/netif/ix/ixgbe_api.h>
74 #include <dev/netif/ix/if_ix.h>
75 
76 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
77 
78 #ifdef IX_RSS_DEBUG
79 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
80 do { \
81 	if (sc->rss_debug >= lvl) \
82 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
83 } while (0)
84 #else	/* !IX_RSS_DEBUG */
85 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
86 #endif	/* IX_RSS_DEBUG */
87 
88 #define IX_NAME			"Intel(R) PRO/10GbE "
89 #define IX_DEVICE(id) \
90 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
91 #define IX_DEVICE_NULL		{ 0, 0, NULL }
92 
93 static struct ix_device {
94 	uint16_t	vid;
95 	uint16_t	did;
96 	const char	*desc;
97 } ix_devices[] = {
98 	IX_DEVICE(82598AF_DUAL_PORT),
99 	IX_DEVICE(82598AF_SINGLE_PORT),
100 	IX_DEVICE(82598EB_CX4),
101 	IX_DEVICE(82598AT),
102 	IX_DEVICE(82598AT2),
103 	IX_DEVICE(82598),
104 	IX_DEVICE(82598_DA_DUAL_PORT),
105 	IX_DEVICE(82598_CX4_DUAL_PORT),
106 	IX_DEVICE(82598EB_XF_LR),
107 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
108 	IX_DEVICE(82598EB_SFP_LOM),
109 	IX_DEVICE(82599_KX4),
110 	IX_DEVICE(82599_KX4_MEZZ),
111 	IX_DEVICE(82599_SFP),
112 	IX_DEVICE(82599_XAUI_LOM),
113 	IX_DEVICE(82599_CX4),
114 	IX_DEVICE(82599_T3_LOM),
115 	IX_DEVICE(82599_COMBO_BACKPLANE),
116 	IX_DEVICE(82599_BACKPLANE_FCOE),
117 	IX_DEVICE(82599_SFP_SF2),
118 	IX_DEVICE(82599_SFP_FCOE),
119 	IX_DEVICE(82599EN_SFP),
120 	IX_DEVICE(82599_SFP_SF_QP),
121 	IX_DEVICE(82599_QSFP_SF_QP),
122 	IX_DEVICE(X540T),
123 	IX_DEVICE(X540T1),
124 	IX_DEVICE(X550T),
125 	IX_DEVICE(X550EM_X_KR),
126 	IX_DEVICE(X550EM_X_KX4),
127 	IX_DEVICE(X550EM_X_10G_T),
128 
129 	/* required last entry */
130 	IX_DEVICE_NULL
131 };
132 
133 static int	ix_probe(device_t);
134 static int	ix_attach(device_t);
135 static int	ix_detach(device_t);
136 static int	ix_shutdown(device_t);
137 
138 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
139 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
140 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
141 #ifdef INVARIANTS
142 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
143 		    boolean_t);
144 #endif
145 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
146 static void	ix_watchdog(struct ifaltq_subque *);
147 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
148 static void	ix_init(void *);
149 static void	ix_stop(struct ix_softc *);
150 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
151 static int	ix_media_change(struct ifnet *);
152 static void	ix_timer(void *);
153 #ifdef IFPOLL_ENABLE
154 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
155 static void	ix_npoll_rx(struct ifnet *, void *, int);
156 static void	ix_npoll_rx_direct(struct ifnet *, void *, int);
157 static void	ix_npoll_tx(struct ifnet *, void *, int);
158 static void	ix_npoll_status(struct ifnet *);
159 #endif
160 
161 static void	ix_add_sysctl(struct ix_softc *);
162 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
163 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
164 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
167 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
168 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
170 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
171 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
172 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
173 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
174 #if 0
175 static void     ix_add_hw_stats(struct ix_softc *);
176 #endif
177 
178 static void	ix_watchdog_reset(struct ix_softc *);
179 static void	ix_watchdog_task(void *, int);
180 static void	ix_sync_netisr(struct ix_softc *, int);
181 static void	ix_slot_info(struct ix_softc *);
182 static int	ix_alloc_rings(struct ix_softc *);
183 static void	ix_free_rings(struct ix_softc *);
184 static void	ix_setup_ifp(struct ix_softc *);
185 static void	ix_setup_serialize(struct ix_softc *);
186 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
187 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
188 static void	ix_update_stats(struct ix_softc *);
189 
190 static void	ix_set_promisc(struct ix_softc *);
191 static void	ix_set_multi(struct ix_softc *);
192 static void	ix_set_vlan(struct ix_softc *);
193 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
194 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
195 static const char *ix_ifmedia2str(int);
196 static const char *ix_fc2str(enum ixgbe_fc_mode);
197 
198 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
199 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
200 static void	ix_init_tx_ring(struct ix_tx_ring *);
201 static void	ix_free_tx_ring(struct ix_tx_ring *);
202 static int	ix_create_tx_ring(struct ix_tx_ring *);
203 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
204 static void	ix_init_tx_unit(struct ix_softc *);
205 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
206 		    uint16_t *, int *);
207 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
208 		    const struct mbuf *, uint32_t *, uint32_t *);
209 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
210 		    const struct mbuf *, uint32_t *, uint32_t *);
211 static void	ix_txeof(struct ix_tx_ring *, int);
212 
213 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
214 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
215 static int	ix_init_rx_ring(struct ix_rx_ring *);
216 static void	ix_free_rx_ring(struct ix_rx_ring *);
217 static int	ix_create_rx_ring(struct ix_rx_ring *);
218 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
219 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
220 #if 0
221 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
222 #endif
223 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
224 static void	ix_rxeof(struct ix_rx_ring *, int);
225 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
226 static void	ix_enable_rx_drop(struct ix_softc *);
227 static void	ix_disable_rx_drop(struct ix_softc *);
228 
229 static void	ix_alloc_msix(struct ix_softc *);
230 static void	ix_free_msix(struct ix_softc *, boolean_t);
231 static void	ix_setup_msix_eims(const struct ix_softc *, int,
232 		    uint32_t *, uint32_t *);
233 static int	ix_alloc_intr(struct ix_softc *);
234 static void	ix_free_intr(struct ix_softc *);
235 static int	ix_setup_intr(struct ix_softc *);
236 static void	ix_teardown_intr(struct ix_softc *, int);
237 static void	ix_enable_intr(struct ix_softc *);
238 static void	ix_disable_intr(struct ix_softc *);
239 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
240 static void	ix_set_eitr(struct ix_softc *, int, int);
241 static void	ix_intr_status(struct ix_softc *, uint32_t);
242 static void	ix_intr(void *);
243 static void	ix_msix_rxtx(void *);
244 static void	ix_msix_rx(void *);
245 static void	ix_msix_tx(void *);
246 static void	ix_msix_status(void *);
247 
248 static void	ix_config_link(struct ix_softc *);
249 static boolean_t ix_sfp_probe(struct ix_softc *);
250 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
251 static void	ix_update_link_status(struct ix_softc *);
252 static void	ix_handle_link(struct ix_softc *);
253 static void	ix_handle_mod(struct ix_softc *);
254 static void	ix_handle_msf(struct ix_softc *);
255 static void	ix_handle_phy(struct ix_softc *);
256 static int	ix_powerdown(struct ix_softc *);
257 static void	ix_config_flowctrl(struct ix_softc *);
258 static void	ix_config_dmac(struct ix_softc *);
259 static void	ix_init_media(struct ix_softc *);
260 
261 /* XXX Missing shared code prototype */
262 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
263 
264 static device_method_t ix_methods[] = {
265 	/* Device interface */
266 	DEVMETHOD(device_probe,		ix_probe),
267 	DEVMETHOD(device_attach,	ix_attach),
268 	DEVMETHOD(device_detach,	ix_detach),
269 	DEVMETHOD(device_shutdown,	ix_shutdown),
270 	DEVMETHOD_END
271 };
272 
273 static driver_t ix_driver = {
274 	"ix",
275 	ix_methods,
276 	sizeof(struct ix_softc)
277 };
278 
279 static devclass_t ix_devclass;
280 
281 DECLARE_DUMMY_MODULE(if_ix);
282 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
283 
284 static int	ix_msi_enable = 1;
285 static int	ix_msix_enable = 1;
286 static int	ix_rxr = 0;
287 static int	ix_txr = 0;
288 static int	ix_txd = IX_PERF_TXD;
289 static int	ix_rxd = IX_PERF_RXD;
290 static int	ix_unsupported_sfp = 0;
291 static int	ix_direct_input = 1;
292 
293 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_NONE;
294 
295 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
296 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
297 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
298 TUNABLE_INT("hw.ix.txr", &ix_txr);
299 TUNABLE_INT("hw.ix.txd", &ix_txd);
300 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
301 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
302 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
303 TUNABLE_INT("hw.ix.direct_input", &ix_direct_input);
304 
305 /*
306  * Smart speed setting, default to on.  This only works
307  * as a compile option right now as its during attach,
308  * set this to 'ixgbe_smart_speed_off' to disable.
309  */
310 static const enum ixgbe_smart_speed ix_smart_speed =
311     ixgbe_smart_speed_on;
312 
313 static int
314 ix_probe(device_t dev)
315 {
316 	const struct ix_device *d;
317 	uint16_t vid, did;
318 
319 	vid = pci_get_vendor(dev);
320 	did = pci_get_device(dev);
321 
322 	for (d = ix_devices; d->desc != NULL; ++d) {
323 		if (vid == d->vid && did == d->did) {
324 			device_set_desc(dev, d->desc);
325 			return 0;
326 		}
327 	}
328 	return ENXIO;
329 }
330 
331 static void
332 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
333 {
334 
335 	switch (sc->hw.mac.type) {
336 	case ixgbe_mac_X550:
337 	case ixgbe_mac_X550EM_x:
338 	case ixgbe_mac_X550EM_a:
339 		*ring_cntmax = IX_MAX_RXRING_X550;
340 		break;
341 
342 	default:
343 		*ring_cntmax = IX_MAX_RXRING;
344 		break;
345 	}
346 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
347 }
348 
349 static void
350 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
351 {
352 
353 	switch (sc->hw.mac.type) {
354 	case ixgbe_mac_82598EB:
355 		*ring_cntmax = IX_MAX_TXRING_82598;
356 		break;
357 
358 	case ixgbe_mac_82599EB:
359 		*ring_cntmax = IX_MAX_TXRING_82599;
360 		break;
361 
362 	case ixgbe_mac_X540:
363 		*ring_cntmax = IX_MAX_TXRING_X540;
364 		break;
365 
366 	case ixgbe_mac_X550:
367 	case ixgbe_mac_X550EM_x:
368 	case ixgbe_mac_X550EM_a:
369 		*ring_cntmax = IX_MAX_TXRING_X550;
370 		break;
371 
372 	default:
373 		*ring_cntmax = IX_MAX_TXRING;
374 		break;
375 	}
376 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
377 }
378 
379 static int
380 ix_attach(device_t dev)
381 {
382 	struct ix_softc *sc = device_get_softc(dev);
383 	struct ixgbe_hw *hw;
384 	int error, ring_cnt, ring_cntmax;
385 	uint16_t csum;
386 	uint32_t ctrl_ext;
387 	char flowctrl[IFM_ETH_FC_STRLEN];
388 
389 	sc->dev = sc->osdep.dev = dev;
390 	hw = &sc->hw;
391 
392 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
393 	    device_get_unit(dev));
394 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
395 	    ix_media_change, ix_media_status);
396 
397 	/* Save frame size */
398 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
399 
400 	sc->direct_input = ix_direct_input;
401 	TASK_INIT(&sc->wdog_task, 0, ix_watchdog_task, sc);
402 
403 	callout_init_mp(&sc->timer);
404 	lwkt_serialize_init(&sc->main_serialize);
405 
406 	/*
407 	 * Save off the information about this board
408 	 */
409 	hw->vendor_id = pci_get_vendor(dev);
410 	hw->device_id = pci_get_device(dev);
411 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
412 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
413 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
414 
415 	ixgbe_set_mac_type(hw);
416 
417 	/* Pick up the 82599 */
418 	if (hw->mac.type != ixgbe_mac_82598EB)
419 		hw->phy.smart_speed = ix_smart_speed;
420 
421 	/* Enable bus mastering */
422 	pci_enable_busmaster(dev);
423 
424 	/*
425 	 * Allocate IO memory
426 	 */
427 	sc->mem_rid = PCIR_BAR(0);
428 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
429 	    &sc->mem_rid, RF_ACTIVE);
430 	if (sc->mem_res == NULL) {
431 		device_printf(dev, "Unable to allocate bus resource: memory\n");
432 		error = ENXIO;
433 		goto failed;
434 	}
435 
436 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
437 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
438 
439 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
440 	sc->hw.back = &sc->osdep;
441 
442 	/*
443 	 * Configure total supported RX/TX ring count
444 	 */
445 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
446 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
447 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
448 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
449 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
450 
451 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
452 	sc->rx_ring_inuse = sc->rx_ring_cnt;
453 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
454 	sc->tx_ring_inuse = sc->tx_ring_cnt;
455 
456 	/* Allocate TX/RX rings */
457 	error = ix_alloc_rings(sc);
458 	if (error)
459 		goto failed;
460 
461 	/* Allocate interrupt */
462 	error = ix_alloc_intr(sc);
463 	if (error)
464 		goto failed;
465 
466 	/* Setup serializes */
467 	ix_setup_serialize(sc);
468 
469 	/* Allocate multicast array memory. */
470 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
471 	    M_DEVBUF, M_WAITOK);
472 
473 	/* Initialize the shared code */
474 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
475 	error = ixgbe_init_shared_code(hw);
476 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
477 		/*
478 		 * No optics in this port; ask timer routine
479 		 * to probe for later insertion.
480 		 */
481 		sc->sfp_probe = TRUE;
482 		error = 0;
483 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
484 		device_printf(dev, "Unsupported SFP+ module detected!\n");
485 		error = EIO;
486 		goto failed;
487 	} else if (error) {
488 		device_printf(dev, "Unable to initialize the shared code\n");
489 		error = EIO;
490 		goto failed;
491 	}
492 
493 	/* Make sure we have a good EEPROM before we read from it */
494 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
495 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
496 		error = EIO;
497 		goto failed;
498 	}
499 
500 	error = ixgbe_init_hw(hw);
501 	if (error == IXGBE_ERR_EEPROM_VERSION) {
502 		device_printf(dev, "Pre-production device detected\n");
503 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
504 		device_printf(dev, "Unsupported SFP+ Module\n");
505 		error = EIO;
506 		goto failed;
507 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
508 		device_printf(dev, "No SFP+ Module found\n");
509 	}
510 
511 	sc->ifm_media = IX_IFM_DEFAULT;
512 	/* Get default flow control settings */
513 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
514 	    ix_flowctrl);
515 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
516 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
517 
518 	/* Setup OS specific network interface */
519 	ix_setup_ifp(sc);
520 
521 	/* Add sysctl tree */
522 	ix_add_sysctl(sc);
523 
524 	error = ix_setup_intr(sc);
525 	if (error) {
526 		ether_ifdetach(&sc->arpcom.ac_if);
527 		goto failed;
528 	}
529 
530 	/* Initialize statistics */
531 	ix_update_stats(sc);
532 
533 	/* Check PCIE slot type/speed/width */
534 	ix_slot_info(sc);
535 
536 	/* Save initial wake up filter configuration */
537 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
538 
539 	/* Let hardware know driver is loaded */
540 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
541 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
542 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
543 
544 	return 0;
545 failed:
546 	ix_detach(dev);
547 	return error;
548 }
549 
550 static int
551 ix_detach(device_t dev)
552 {
553 	struct ix_softc *sc = device_get_softc(dev);
554 
555 	if (device_is_attached(dev)) {
556 		struct ifnet *ifp = &sc->arpcom.ac_if;
557 		uint32_t ctrl_ext;
558 
559 		ix_sync_netisr(sc, IFF_UP);
560 		taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
561 
562 		ifnet_serialize_all(ifp);
563 
564 		ix_powerdown(sc);
565 		ix_teardown_intr(sc, sc->intr_cnt);
566 
567 		ifnet_deserialize_all(ifp);
568 
569 		callout_terminate(&sc->timer);
570 		ether_ifdetach(ifp);
571 
572 		/* Let hardware know driver is unloading */
573 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
574 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
575 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
576 	}
577 
578 	ifmedia_removeall(&sc->media);
579 	bus_generic_detach(dev);
580 
581 	ix_free_intr(sc);
582 
583 	if (sc->msix_mem_res != NULL) {
584 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
585 		    sc->msix_mem_res);
586 	}
587 	if (sc->mem_res != NULL) {
588 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
589 		    sc->mem_res);
590 	}
591 
592 	ix_free_rings(sc);
593 
594 	if (sc->mta != NULL)
595 		kfree(sc->mta, M_DEVBUF);
596 	if (sc->serializes != NULL)
597 		kfree(sc->serializes, M_DEVBUF);
598 
599 	if (sc->rx_rmap != NULL)
600 		if_ringmap_free(sc->rx_rmap);
601 	if (sc->rx_rmap_intr != NULL)
602 		if_ringmap_free(sc->rx_rmap_intr);
603 	if (sc->tx_rmap != NULL)
604 		if_ringmap_free(sc->tx_rmap);
605 	if (sc->tx_rmap_intr != NULL)
606 		if_ringmap_free(sc->tx_rmap_intr);
607 
608 	return 0;
609 }
610 
611 static int
612 ix_shutdown(device_t dev)
613 {
614 	struct ix_softc *sc = device_get_softc(dev);
615 	struct ifnet *ifp = &sc->arpcom.ac_if;
616 
617 	ix_sync_netisr(sc, IFF_UP);
618 	taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
619 
620 	ifnet_serialize_all(ifp);
621 	ix_powerdown(sc);
622 	ifnet_deserialize_all(ifp);
623 
624 	return 0;
625 }
626 
627 static void
628 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
629 {
630 	struct ix_softc *sc = ifp->if_softc;
631 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
632 	int idx = -1;
633 	uint16_t nsegs;
634 
635 	KKASSERT(txr->tx_ifsq == ifsq);
636 	ASSERT_SERIALIZED(&txr->tx_serialize);
637 
638 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
639 		return;
640 
641 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
642 		ifsq_purge(ifsq);
643 		return;
644 	}
645 
646 	while (!ifsq_is_empty(ifsq)) {
647 		struct mbuf *m_head;
648 
649 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
650 			ifsq_set_oactive(ifsq);
651 			txr->tx_watchdog.wd_timer = 5;
652 			break;
653 		}
654 
655 		m_head = ifsq_dequeue(ifsq);
656 		if (m_head == NULL)
657 			break;
658 
659 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
660 			IFNET_STAT_INC(ifp, oerrors, 1);
661 			continue;
662 		}
663 
664 		/*
665 		 * TX interrupt are aggressively aggregated, so increasing
666 		 * opackets at TX interrupt time will make the opackets
667 		 * statistics vastly inaccurate; we do the opackets increment
668 		 * now.
669 		 */
670 		IFNET_STAT_INC(ifp, opackets, 1);
671 
672 		if (nsegs >= txr->tx_wreg_nsegs) {
673 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
674 			nsegs = 0;
675 			idx = -1;
676 		}
677 
678 		ETHER_BPF_MTAP(ifp, m_head);
679 	}
680 	if (idx >= 0)
681 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
682 }
683 
684 static int
685 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
686 {
687 	struct ix_softc *sc = ifp->if_softc;
688 	struct ifreq *ifr = (struct ifreq *) data;
689 	int error = 0, mask, reinit;
690 
691 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
692 
693 	switch (command) {
694 	case SIOCSIFMTU:
695 		if (ifr->ifr_mtu > IX_MAX_MTU) {
696 			error = EINVAL;
697 		} else {
698 			ifp->if_mtu = ifr->ifr_mtu;
699 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
700 			ix_init(sc);
701 		}
702 		break;
703 
704 	case SIOCSIFFLAGS:
705 		if (ifp->if_flags & IFF_UP) {
706 			if (ifp->if_flags & IFF_RUNNING) {
707 				if ((ifp->if_flags ^ sc->if_flags) &
708 				    (IFF_PROMISC | IFF_ALLMULTI))
709 					ix_set_promisc(sc);
710 			} else {
711 				ix_init(sc);
712 			}
713 		} else if (ifp->if_flags & IFF_RUNNING) {
714 			ix_stop(sc);
715 		}
716 		sc->if_flags = ifp->if_flags;
717 		break;
718 
719 	case SIOCADDMULTI:
720 	case SIOCDELMULTI:
721 		if (ifp->if_flags & IFF_RUNNING) {
722 			ix_disable_intr(sc);
723 			ix_set_multi(sc);
724 #ifdef IFPOLL_ENABLE
725 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
726 #endif
727 				ix_enable_intr(sc);
728 		}
729 		break;
730 
731 	case SIOCSIFMEDIA:
732 	case SIOCGIFMEDIA:
733 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
734 		break;
735 
736 	case SIOCSIFCAP:
737 		reinit = 0;
738 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
739 		if (mask & IFCAP_RXCSUM) {
740 			ifp->if_capenable ^= IFCAP_RXCSUM;
741 			reinit = 1;
742 		}
743 		if (mask & IFCAP_VLAN_HWTAGGING) {
744 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
745 			reinit = 1;
746 		}
747 		if (mask & IFCAP_TXCSUM) {
748 			ifp->if_capenable ^= IFCAP_TXCSUM;
749 			if (ifp->if_capenable & IFCAP_TXCSUM)
750 				ifp->if_hwassist |= CSUM_OFFLOAD;
751 			else
752 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
753 		}
754 		if (mask & IFCAP_TSO) {
755 			ifp->if_capenable ^= IFCAP_TSO;
756 			if (ifp->if_capenable & IFCAP_TSO)
757 				ifp->if_hwassist |= CSUM_TSO;
758 			else
759 				ifp->if_hwassist &= ~CSUM_TSO;
760 		}
761 		if (mask & IFCAP_RSS)
762 			ifp->if_capenable ^= IFCAP_RSS;
763 		if (reinit && (ifp->if_flags & IFF_RUNNING))
764 			ix_init(sc);
765 		break;
766 
767 #if 0
768 	case SIOCGI2C:
769 	{
770 		struct ixgbe_i2c_req	i2c;
771 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
772 		if (error)
773 			break;
774 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
775 			error = EINVAL;
776 			break;
777 		}
778 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
779 		    i2c.dev_addr, i2c.data);
780 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
781 		break;
782 	}
783 #endif
784 
785 	default:
786 		error = ether_ioctl(ifp, command, data);
787 		break;
788 	}
789 	return error;
790 }
791 
792 #define IXGBE_MHADD_MFS_SHIFT 16
793 
794 static void
795 ix_init(void *xsc)
796 {
797 	struct ix_softc *sc = xsc;
798 	struct ifnet *ifp = &sc->arpcom.ac_if;
799 	struct ixgbe_hw *hw = &sc->hw;
800 	uint32_t gpie, rxctrl;
801 	int i, error;
802 	boolean_t polling;
803 
804 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
805 
806 	ix_stop(sc);
807 
808 	polling = FALSE;
809 #ifdef IFPOLL_ENABLE
810 	if (ifp->if_flags & IFF_NPOLLING)
811 		polling = TRUE;
812 #endif
813 
814 	/* Configure # of used RX/TX rings */
815 	ix_set_ring_inuse(sc, polling);
816 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
817 
818 	/* Get the latest mac address, User can use a LAA */
819 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
820 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
821 	hw->addr_ctrl.rar_used_count = 1;
822 
823 	/* Prepare transmit descriptors and buffers */
824 	for (i = 0; i < sc->tx_ring_inuse; ++i)
825 		ix_init_tx_ring(&sc->tx_rings[i]);
826 
827 	ixgbe_init_hw(hw);
828 	ix_init_tx_unit(sc);
829 
830 	/* Setup Multicast table */
831 	ix_set_multi(sc);
832 
833 	/* Prepare receive descriptors and buffers */
834 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
835 		error = ix_init_rx_ring(&sc->rx_rings[i]);
836 		if (error) {
837 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
838 			ix_stop(sc);
839 			return;
840 		}
841 	}
842 
843 	/* Configure RX settings */
844 	ix_init_rx_unit(sc, polling);
845 
846 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
847 
848 	/* Enable Fan Failure Interrupt */
849 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
850 
851 	/* Add for Module detection */
852 	if (hw->mac.type == ixgbe_mac_82599EB)
853 		gpie |= IXGBE_SDP2_GPIEN;
854 
855 	/*
856 	 * Thermal Failure Detection (X540)
857 	 * Link Detection (X552)
858 	 */
859 	if (hw->mac.type == ixgbe_mac_X540 ||
860 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
861 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
862 		gpie |= IXGBE_SDP0_GPIEN_X540;
863 
864 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
865 		/* Enable Enhanced MSIX mode */
866 		gpie |= IXGBE_GPIE_MSIX_MODE;
867 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
868 		    IXGBE_GPIE_OCD;
869 	}
870 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
871 
872 	/* Set MTU size */
873 	if (ifp->if_mtu > ETHERMTU) {
874 		uint32_t mhadd;
875 
876 		/* aka IXGBE_MAXFRS on 82599 and newer */
877 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
878 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
879 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
880 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
881 	}
882 
883 	/*
884 	 * Enable TX rings
885 	 */
886 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
887 		uint32_t txdctl;
888 
889 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
890 		txdctl |= IXGBE_TXDCTL_ENABLE;
891 
892 		/*
893 		 * Set WTHRESH to 0, since TX head write-back is used
894 		 */
895 		txdctl &= ~(0x7f << 16);
896 
897 		/*
898 		 * When the internal queue falls below PTHRESH (32),
899 		 * start prefetching as long as there are at least
900 		 * HTHRESH (1) buffers ready. The values are taken
901 		 * from the Intel linux driver 3.8.21.
902 		 * Prefetching enables tx line rate even with 1 queue.
903 		 */
904 		txdctl |= (32 << 0) | (1 << 8);
905 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
906 	}
907 
908 	/*
909 	 * Enable RX rings
910 	 */
911 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
912 		uint32_t rxdctl;
913 		int k;
914 
915 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
916 		if (hw->mac.type == ixgbe_mac_82598EB) {
917 			/*
918 			 * PTHRESH = 21
919 			 * HTHRESH = 4
920 			 * WTHRESH = 8
921 			 */
922 			rxdctl &= ~0x3FFFFF;
923 			rxdctl |= 0x080420;
924 		}
925 		rxdctl |= IXGBE_RXDCTL_ENABLE;
926 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
927 		for (k = 0; k < 10; ++k) {
928 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
929 			    IXGBE_RXDCTL_ENABLE)
930 				break;
931 			else
932 				msec_delay(1);
933 		}
934 		wmb();
935 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
936 		    sc->rx_rings[0].rx_ndesc - 1);
937 	}
938 
939 	/* Enable Receive engine */
940 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
941 	if (hw->mac.type == ixgbe_mac_82598EB)
942 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
943 	rxctrl |= IXGBE_RXCTRL_RXEN;
944 	ixgbe_enable_rx_dma(hw, rxctrl);
945 
946 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
947 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
948 
949 		if (txr->tx_intr_vec >= 0) {
950 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
951 		} else if (!polling) {
952 			/*
953 			 * Unconfigured TX interrupt vector could only
954 			 * happen for MSI-X.
955 			 */
956 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
957 			    ("TX intr vector is not set"));
958 			if (bootverbose)
959 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
960 		}
961 	}
962 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
963 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
964 
965 		if (polling && rxr->rx_intr_vec < 0)
966 			continue;
967 
968 		KKASSERT(rxr->rx_intr_vec >= 0);
969 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
970 		if (rxr->rx_txr != NULL) {
971 			/*
972 			 * Piggyback the TX ring interrupt onto the RX
973 			 * ring interrupt vector.
974 			 */
975 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
976 			    ("piggybacked TX ring configured intr vector"));
977 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
978 			    rxr->rx_intr_vec, 1);
979 			if (bootverbose) {
980 				if_printf(ifp, "IVAR RX ring %d piggybacks "
981 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
982 			}
983 		}
984 	}
985 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
986 		/* Set up status MSI-X vector; it is using fixed entry 1 */
987 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
988 
989 		/* Set up auto-mask for TX and RX rings */
990 		if (hw->mac.type == ixgbe_mac_82598EB) {
991 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
992 		} else {
993 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
994 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
995 		}
996 	} else {
997 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
998 	}
999 	for (i = 0; i < sc->intr_cnt; ++i)
1000 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1001 
1002 	/*
1003 	 * Check on any SFP devices that need to be kick-started
1004 	 */
1005 	if (hw->phy.type == ixgbe_phy_none) {
1006 		error = hw->phy.ops.identify(hw);
1007 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1008 			if_printf(ifp,
1009 			    "Unsupported SFP+ module type was detected.\n");
1010 			/* XXX stop */
1011 			return;
1012 		}
1013 	}
1014 
1015 	/* Config/Enable Link */
1016 	ix_config_link(sc);
1017 
1018 	/* Hardware Packet Buffer & Flow Control setup */
1019 	ix_config_flowctrl(sc);
1020 
1021 	/* Initialize the FC settings */
1022 	ixgbe_start_hw(hw);
1023 
1024 	/* Set up VLAN support and filter */
1025 	ix_set_vlan(sc);
1026 
1027 	/* Setup DMA Coalescing */
1028 	ix_config_dmac(sc);
1029 
1030 	/*
1031 	 * Only enable interrupts if we are not polling, make sure
1032 	 * they are off otherwise.
1033 	 */
1034 	if (polling)
1035 		ix_disable_intr(sc);
1036 	else
1037 		ix_enable_intr(sc);
1038 
1039 	ifp->if_flags |= IFF_RUNNING;
1040 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1041 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1042 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1043 	}
1044 
1045 	ix_set_timer_cpuid(sc, polling);
1046 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1047 }
1048 
1049 static void
1050 ix_intr(void *xsc)
1051 {
1052 	struct ix_softc *sc = xsc;
1053 	struct ixgbe_hw	*hw = &sc->hw;
1054 	uint32_t eicr;
1055 
1056 	ASSERT_SERIALIZED(&sc->main_serialize);
1057 
1058 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1059 	if (eicr == 0) {
1060 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1061 		return;
1062 	}
1063 
1064 	if (eicr & IX_RX0_INTR_MASK) {
1065 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1066 
1067 		lwkt_serialize_enter(&rxr->rx_serialize);
1068 		ix_rxeof(rxr, -1);
1069 		lwkt_serialize_exit(&rxr->rx_serialize);
1070 	}
1071 	if (eicr & IX_RX1_INTR_MASK) {
1072 		struct ix_rx_ring *rxr;
1073 
1074 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1075 		rxr = &sc->rx_rings[1];
1076 
1077 		lwkt_serialize_enter(&rxr->rx_serialize);
1078 		ix_rxeof(rxr, -1);
1079 		lwkt_serialize_exit(&rxr->rx_serialize);
1080 	}
1081 
1082 	if (eicr & IX_TX_INTR_MASK) {
1083 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1084 
1085 		lwkt_serialize_enter(&txr->tx_serialize);
1086 		ix_txeof(txr, *(txr->tx_hdr));
1087 		if (!ifsq_is_empty(txr->tx_ifsq))
1088 			ifsq_devstart(txr->tx_ifsq);
1089 		lwkt_serialize_exit(&txr->tx_serialize);
1090 	}
1091 
1092 	if (__predict_false(eicr & IX_EICR_STATUS))
1093 		ix_intr_status(sc, eicr);
1094 
1095 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1096 }
1097 
1098 static void
1099 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1100 {
1101 	struct ix_softc *sc = ifp->if_softc;
1102 	struct ifmedia *ifm = &sc->media;
1103 	int layer;
1104 
1105 	ix_update_link_status(sc);
1106 
1107 	ifmr->ifm_status = IFM_AVALID;
1108 	ifmr->ifm_active = IFM_ETHER;
1109 
1110 	if (!sc->link_active) {
1111 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1112 			ifmr->ifm_active |= ifm->ifm_media;
1113 		else
1114 			ifmr->ifm_active |= IFM_NONE;
1115 		return;
1116 	}
1117 	ifmr->ifm_status |= IFM_ACTIVE;
1118 
1119 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1120 
1121 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1122 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1123 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1124 		switch (sc->link_speed) {
1125 		case IXGBE_LINK_SPEED_10GB_FULL:
1126 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1127 			break;
1128 		case IXGBE_LINK_SPEED_1GB_FULL:
1129 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1130 			break;
1131 		case IXGBE_LINK_SPEED_100_FULL:
1132 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1133 			break;
1134 		}
1135 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1136 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1137 		switch (sc->link_speed) {
1138 		case IXGBE_LINK_SPEED_10GB_FULL:
1139 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1140 			break;
1141 		}
1142 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1143 		switch (sc->link_speed) {
1144 		case IXGBE_LINK_SPEED_10GB_FULL:
1145 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1146 			break;
1147 		case IXGBE_LINK_SPEED_1GB_FULL:
1148 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1149 			break;
1150 		}
1151 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1152 		switch (sc->link_speed) {
1153 		case IXGBE_LINK_SPEED_10GB_FULL:
1154 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1155 			break;
1156 		case IXGBE_LINK_SPEED_1GB_FULL:
1157 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1158 			break;
1159 		}
1160 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1161 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1162 		switch (sc->link_speed) {
1163 		case IXGBE_LINK_SPEED_10GB_FULL:
1164 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1165 			break;
1166 		case IXGBE_LINK_SPEED_1GB_FULL:
1167 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1168 			break;
1169 		}
1170 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1171 		switch (sc->link_speed) {
1172 		case IXGBE_LINK_SPEED_10GB_FULL:
1173 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1174 			break;
1175 		}
1176 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1177 		/*
1178 		 * XXX: These need to use the proper media types once
1179 		 * they're added.
1180 		 */
1181 		switch (sc->link_speed) {
1182 		case IXGBE_LINK_SPEED_10GB_FULL:
1183 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1184 			break;
1185 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1186 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1187 			break;
1188 		case IXGBE_LINK_SPEED_1GB_FULL:
1189 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1190 			break;
1191 		}
1192 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1193 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1194 		/*
1195 		 * XXX: These need to use the proper media types once
1196 		 * they're added.
1197 		 */
1198 		switch (sc->link_speed) {
1199 		case IXGBE_LINK_SPEED_10GB_FULL:
1200 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1201 			break;
1202 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1203 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1204 			break;
1205 		case IXGBE_LINK_SPEED_1GB_FULL:
1206 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1207 			break;
1208 		}
1209 	}
1210 
1211 	/* If nothing is recognized... */
1212 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1213 		ifmr->ifm_active |= IFM_NONE;
1214 
1215 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1216 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1217 
1218 	switch (sc->hw.fc.current_mode) {
1219 	case ixgbe_fc_full:
1220 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1221 		break;
1222 	case ixgbe_fc_rx_pause:
1223 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1224 		break;
1225 	case ixgbe_fc_tx_pause:
1226 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1227 		break;
1228 	default:
1229 		break;
1230 	}
1231 }
1232 
1233 static int
1234 ix_media_change(struct ifnet *ifp)
1235 {
1236 	struct ix_softc *sc = ifp->if_softc;
1237 	struct ifmedia *ifm = &sc->media;
1238 	struct ixgbe_hw *hw = &sc->hw;
1239 
1240 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1241 		return (EINVAL);
1242 
1243 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1244 	    hw->mac.ops.setup_link == NULL) {
1245 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1246 			/* Only flow control setting changes are allowed */
1247 			return (EOPNOTSUPP);
1248 		}
1249 	}
1250 
1251 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1252 	case IFM_AUTO:
1253 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1254 		break;
1255 
1256 	case IFM_10G_T:
1257 	case IFM_10G_LRM:
1258 	case IFM_10G_SR:	/* XXX also KR */
1259 	case IFM_10G_LR:
1260 	case IFM_10G_CX4:	/* XXX also KX4 */
1261 	case IFM_10G_TWINAX:
1262 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1263 		break;
1264 
1265 	case IFM_1000_T:
1266 	case IFM_1000_LX:
1267 	case IFM_1000_SX:
1268 	case IFM_1000_CX:	/* XXX is KX */
1269 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1270 		break;
1271 
1272 	case IFM_100_TX:
1273 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1274 		break;
1275 
1276 	default:
1277 		if (bootverbose) {
1278 			if_printf(ifp, "Invalid media type %d!\n",
1279 			    ifm->ifm_media);
1280 		}
1281 		return EINVAL;
1282 	}
1283 	sc->ifm_media = ifm->ifm_media;
1284 
1285 #if 0
1286 	if (hw->mac.ops.setup_link != NULL) {
1287 		hw->mac.autotry_restart = TRUE;
1288 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1289 	}
1290 #else
1291 	if (ifp->if_flags & IFF_RUNNING)
1292 		ix_init(sc);
1293 #endif
1294 	return 0;
1295 }
1296 
1297 static __inline int
1298 ix_tso_pullup(struct mbuf **mp)
1299 {
1300 	int hoff, iphlen, thoff;
1301 	struct mbuf *m;
1302 
1303 	m = *mp;
1304 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1305 
1306 	iphlen = m->m_pkthdr.csum_iphlen;
1307 	thoff = m->m_pkthdr.csum_thlen;
1308 	hoff = m->m_pkthdr.csum_lhlen;
1309 
1310 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1311 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1312 	KASSERT(hoff > 0, ("invalid ether hlen"));
1313 
1314 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1315 		m = m_pullup(m, hoff + iphlen + thoff);
1316 		if (m == NULL) {
1317 			*mp = NULL;
1318 			return ENOBUFS;
1319 		}
1320 		*mp = m;
1321 	}
1322 	return 0;
1323 }
1324 
1325 static int
1326 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1327     uint16_t *segs_used, int *idx)
1328 {
1329 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1330 	int i, j, error, nsegs, first, maxsegs;
1331 	struct mbuf *m_head = *m_headp;
1332 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1333 	bus_dmamap_t map;
1334 	struct ix_tx_buf *txbuf;
1335 	union ixgbe_adv_tx_desc *txd = NULL;
1336 
1337 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1338 		error = ix_tso_pullup(m_headp);
1339 		if (__predict_false(error))
1340 			return error;
1341 		m_head = *m_headp;
1342 	}
1343 
1344 	/* Basic descriptor defines */
1345 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1346 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1347 
1348 	if (m_head->m_flags & M_VLANTAG)
1349 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1350 
1351 	/*
1352 	 * Important to capture the first descriptor
1353 	 * used because it will contain the index of
1354 	 * the one we tell the hardware to report back
1355 	 */
1356 	first = txr->tx_next_avail;
1357 	txbuf = &txr->tx_buf[first];
1358 	map = txbuf->map;
1359 
1360 	/*
1361 	 * Map the packet for DMA.
1362 	 */
1363 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1364 	if (maxsegs > IX_MAX_SCATTER)
1365 		maxsegs = IX_MAX_SCATTER;
1366 
1367 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1368 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1369 	if (__predict_false(error)) {
1370 		m_freem(*m_headp);
1371 		*m_headp = NULL;
1372 		return error;
1373 	}
1374 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1375 
1376 	m_head = *m_headp;
1377 
1378 	/*
1379 	 * Set up the appropriate offload context if requested,
1380 	 * this may consume one TX descriptor.
1381 	 */
1382 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1383 		(*segs_used)++;
1384 		txr->tx_nsegs++;
1385 	}
1386 
1387 	*segs_used += nsegs;
1388 	txr->tx_nsegs += nsegs;
1389 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1390 		/*
1391 		 * Report Status (RS) is turned on every intr_nsegs
1392 		 * descriptors (roughly).
1393 		 */
1394 		txr->tx_nsegs = 0;
1395 		cmd_rs = IXGBE_TXD_CMD_RS;
1396 	}
1397 
1398 	i = txr->tx_next_avail;
1399 	for (j = 0; j < nsegs; j++) {
1400 		bus_size_t seglen;
1401 		bus_addr_t segaddr;
1402 
1403 		txbuf = &txr->tx_buf[i];
1404 		txd = &txr->tx_base[i];
1405 		seglen = segs[j].ds_len;
1406 		segaddr = htole64(segs[j].ds_addr);
1407 
1408 		txd->read.buffer_addr = segaddr;
1409 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1410 		    cmd_type_len |seglen);
1411 		txd->read.olinfo_status = htole32(olinfo_status);
1412 
1413 		if (++i == txr->tx_ndesc)
1414 			i = 0;
1415 	}
1416 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1417 
1418 	txr->tx_avail -= nsegs;
1419 	txr->tx_next_avail = i;
1420 
1421 	txbuf->m_head = m_head;
1422 	txr->tx_buf[first].map = txbuf->map;
1423 	txbuf->map = map;
1424 
1425 	/*
1426 	 * Defer TDT updating, until enough descrptors are setup
1427 	 */
1428 	*idx = i;
1429 
1430 	return 0;
1431 }
1432 
1433 static void
1434 ix_set_promisc(struct ix_softc *sc)
1435 {
1436 	struct ifnet *ifp = &sc->arpcom.ac_if;
1437 	uint32_t reg_rctl;
1438 	int mcnt = 0;
1439 
1440 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1441 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1442 	if (ifp->if_flags & IFF_ALLMULTI) {
1443 		mcnt = IX_MAX_MCASTADDR;
1444 	} else {
1445 		struct ifmultiaddr *ifma;
1446 
1447 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1448 			if (ifma->ifma_addr->sa_family != AF_LINK)
1449 				continue;
1450 			if (mcnt == IX_MAX_MCASTADDR)
1451 				break;
1452 			mcnt++;
1453 		}
1454 	}
1455 	if (mcnt < IX_MAX_MCASTADDR)
1456 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1457 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1458 
1459 	if (ifp->if_flags & IFF_PROMISC) {
1460 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1461 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1462 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1463 		reg_rctl |= IXGBE_FCTRL_MPE;
1464 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1465 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1466 	}
1467 }
1468 
1469 static void
1470 ix_set_multi(struct ix_softc *sc)
1471 {
1472 	struct ifnet *ifp = &sc->arpcom.ac_if;
1473 	struct ifmultiaddr *ifma;
1474 	uint32_t fctrl;
1475 	uint8_t	*mta;
1476 	int mcnt = 0;
1477 
1478 	mta = sc->mta;
1479 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1480 
1481 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1482 		if (ifma->ifma_addr->sa_family != AF_LINK)
1483 			continue;
1484 		if (mcnt == IX_MAX_MCASTADDR)
1485 			break;
1486 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1487 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1488 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1489 		mcnt++;
1490 	}
1491 
1492 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1493 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1494 	if (ifp->if_flags & IFF_PROMISC) {
1495 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1496 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1497 		fctrl |= IXGBE_FCTRL_MPE;
1498 		fctrl &= ~IXGBE_FCTRL_UPE;
1499 	} else {
1500 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1501 	}
1502 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1503 
1504 	if (mcnt < IX_MAX_MCASTADDR) {
1505 		ixgbe_update_mc_addr_list(&sc->hw,
1506 		    mta, mcnt, ix_mc_array_itr, TRUE);
1507 	}
1508 }
1509 
1510 /*
1511  * This is an iterator function now needed by the multicast
1512  * shared code. It simply feeds the shared code routine the
1513  * addresses in the array of ix_set_multi() one by one.
1514  */
1515 static uint8_t *
1516 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1517 {
1518 	uint8_t *addr = *update_ptr;
1519 	uint8_t *newptr;
1520 	*vmdq = 0;
1521 
1522 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1523 	*update_ptr = newptr;
1524 	return addr;
1525 }
1526 
1527 static void
1528 ix_timer(void *arg)
1529 {
1530 	struct ix_softc *sc = arg;
1531 
1532 	lwkt_serialize_enter(&sc->main_serialize);
1533 
1534 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1535 		lwkt_serialize_exit(&sc->main_serialize);
1536 		return;
1537 	}
1538 
1539 	/* Check for pluggable optics */
1540 	if (sc->sfp_probe) {
1541 		if (!ix_sfp_probe(sc))
1542 			goto done; /* Nothing to do */
1543 	}
1544 
1545 	ix_update_link_status(sc);
1546 	ix_update_stats(sc);
1547 
1548 done:
1549 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1550 	lwkt_serialize_exit(&sc->main_serialize);
1551 }
1552 
1553 static void
1554 ix_update_link_status(struct ix_softc *sc)
1555 {
1556 	struct ifnet *ifp = &sc->arpcom.ac_if;
1557 
1558 	if (sc->link_up) {
1559 		if (sc->link_active == FALSE) {
1560 			if (bootverbose) {
1561 				if_printf(ifp, "Link is up %d Gbps %s\n",
1562 				    sc->link_speed == 128 ? 10 : 1,
1563 				    "Full Duplex");
1564 			}
1565 
1566 			/*
1567 			 * Update any Flow Control changes
1568 			 */
1569 			ixgbe_fc_enable(&sc->hw);
1570 			/* MUST after ixgbe_fc_enable() */
1571 			if (sc->rx_ring_inuse > 1) {
1572 				switch (sc->hw.fc.current_mode) {
1573 				case ixgbe_fc_rx_pause:
1574 				case ixgbe_fc_tx_pause:
1575 				case ixgbe_fc_full:
1576 					ix_disable_rx_drop(sc);
1577 					break;
1578 
1579 				case ixgbe_fc_none:
1580 					ix_enable_rx_drop(sc);
1581 					break;
1582 
1583 				default:
1584 					break;
1585 				}
1586 			}
1587 
1588 			/* Update DMA coalescing config */
1589 			ix_config_dmac(sc);
1590 
1591 			sc->link_active = TRUE;
1592 
1593 			ifp->if_link_state = LINK_STATE_UP;
1594 			if_link_state_change(ifp);
1595 		}
1596 	} else { /* Link down */
1597 		if (sc->link_active == TRUE) {
1598 			if (bootverbose)
1599 				if_printf(ifp, "Link is Down\n");
1600 			ifp->if_link_state = LINK_STATE_DOWN;
1601 			if_link_state_change(ifp);
1602 
1603 			sc->link_active = FALSE;
1604 		}
1605 	}
1606 }
1607 
1608 static void
1609 ix_stop(struct ix_softc *sc)
1610 {
1611 	struct ixgbe_hw *hw = &sc->hw;
1612 	struct ifnet *ifp = &sc->arpcom.ac_if;
1613 	int i;
1614 
1615 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1616 
1617 	ix_disable_intr(sc);
1618 	callout_stop(&sc->timer);
1619 
1620 	ifp->if_flags &= ~IFF_RUNNING;
1621 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1622 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1623 
1624 		ifsq_clr_oactive(txr->tx_ifsq);
1625 		ifsq_watchdog_stop(&txr->tx_watchdog);
1626 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1627 	}
1628 
1629 	ixgbe_reset_hw(hw);
1630 	hw->adapter_stopped = FALSE;
1631 	ixgbe_stop_adapter(hw);
1632 	if (hw->mac.type == ixgbe_mac_82599EB)
1633 		ixgbe_stop_mac_link_on_d3_82599(hw);
1634 	/* Turn off the laser - noop with no optics */
1635 	ixgbe_disable_tx_laser(hw);
1636 
1637 	/* Update the stack */
1638 	sc->link_up = FALSE;
1639 	ix_update_link_status(sc);
1640 
1641 	/* Reprogram the RAR[0] in case user changed it. */
1642 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1643 
1644 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1645 		ix_free_tx_ring(&sc->tx_rings[i]);
1646 
1647 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1648 		ix_free_rx_ring(&sc->rx_rings[i]);
1649 }
1650 
1651 static void
1652 ix_setup_ifp(struct ix_softc *sc)
1653 {
1654 	struct ixgbe_hw *hw = &sc->hw;
1655 	struct ifnet *ifp = &sc->arpcom.ac_if;
1656 	int i;
1657 
1658 	ifp->if_baudrate = IF_Gbps(10UL);
1659 
1660 	ifp->if_softc = sc;
1661 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1662 	ifp->if_init = ix_init;
1663 	ifp->if_ioctl = ix_ioctl;
1664 	ifp->if_start = ix_start;
1665 	ifp->if_serialize = ix_serialize;
1666 	ifp->if_deserialize = ix_deserialize;
1667 	ifp->if_tryserialize = ix_tryserialize;
1668 #ifdef INVARIANTS
1669 	ifp->if_serialize_assert = ix_serialize_assert;
1670 #endif
1671 #ifdef IFPOLL_ENABLE
1672 	ifp->if_npoll = ix_npoll;
1673 #endif
1674 
1675 	/* Increase TSO burst length */
1676 	ifp->if_tsolen = (8 * ETHERMTU);
1677 
1678 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1679 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1680 
1681 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1682 	ifq_set_ready(&ifp->if_snd);
1683 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1684 
1685 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1686 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1687 
1688 	ether_ifattach(ifp, hw->mac.addr, NULL);
1689 
1690 	ifp->if_capabilities =
1691 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1692 	if (IX_ENABLE_HWRSS(sc))
1693 		ifp->if_capabilities |= IFCAP_RSS;
1694 	ifp->if_capenable = ifp->if_capabilities;
1695 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1696 
1697 	/*
1698 	 * Tell the upper layer(s) we support long frames.
1699 	 */
1700 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1701 
1702 	/* Setup TX rings and subqueues */
1703 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1704 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1705 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1706 
1707 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1708 		ifsq_set_priv(ifsq, txr);
1709 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1710 		txr->tx_ifsq = ifsq;
1711 
1712 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1713 	}
1714 
1715 	/* Specify the media types supported by this adapter */
1716 	ix_init_media(sc);
1717 }
1718 
1719 static boolean_t
1720 ix_is_sfp(const struct ixgbe_hw *hw)
1721 {
1722 	switch (hw->phy.type) {
1723 	case ixgbe_phy_sfp_avago:
1724 	case ixgbe_phy_sfp_ftl:
1725 	case ixgbe_phy_sfp_intel:
1726 	case ixgbe_phy_sfp_unknown:
1727 	case ixgbe_phy_sfp_passive_tyco:
1728 	case ixgbe_phy_sfp_passive_unknown:
1729 	case ixgbe_phy_qsfp_passive_unknown:
1730 	case ixgbe_phy_qsfp_active_unknown:
1731 	case ixgbe_phy_qsfp_intel:
1732 	case ixgbe_phy_qsfp_unknown:
1733 		return TRUE;
1734 	default:
1735 		return FALSE;
1736 	}
1737 }
1738 
1739 static void
1740 ix_config_link(struct ix_softc *sc)
1741 {
1742 	struct ixgbe_hw *hw = &sc->hw;
1743 	boolean_t sfp;
1744 
1745 	sfp = ix_is_sfp(hw);
1746 	if (sfp) {
1747 		if (hw->phy.multispeed_fiber) {
1748 			hw->mac.ops.setup_sfp(hw);
1749 			ixgbe_enable_tx_laser(hw);
1750 			ix_handle_msf(sc);
1751 		} else {
1752 			ix_handle_mod(sc);
1753 		}
1754 	} else {
1755 		uint32_t autoneg, err = 0;
1756 
1757 		if (hw->mac.ops.check_link != NULL) {
1758 			err = ixgbe_check_link(hw, &sc->link_speed,
1759 			    &sc->link_up, FALSE);
1760 			if (err)
1761 				return;
1762 		}
1763 
1764 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1765 			autoneg = sc->advspeed;
1766 		else
1767 			autoneg = hw->phy.autoneg_advertised;
1768 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1769 			bool negotiate;
1770 
1771 			err = hw->mac.ops.get_link_capabilities(hw,
1772 			    &autoneg, &negotiate);
1773 			if (err)
1774 				return;
1775 		}
1776 
1777 		if (hw->mac.ops.setup_link != NULL) {
1778 			err = hw->mac.ops.setup_link(hw,
1779 			    autoneg, sc->link_up);
1780 			if (err)
1781 				return;
1782 		}
1783 	}
1784 }
1785 
1786 static int
1787 ix_alloc_rings(struct ix_softc *sc)
1788 {
1789 	int error, i;
1790 
1791 	/*
1792 	 * Create top level busdma tag
1793 	 */
1794 	error = bus_dma_tag_create(NULL, 1, 0,
1795 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1796 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1797 	    &sc->parent_tag);
1798 	if (error) {
1799 		device_printf(sc->dev, "could not create top level DMA tag\n");
1800 		return error;
1801 	}
1802 
1803 	/*
1804 	 * Allocate TX descriptor rings and buffers
1805 	 */
1806 	sc->tx_rings = kmalloc_cachealign(
1807 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1808 	    M_DEVBUF, M_WAITOK | M_ZERO);
1809 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1810 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1811 
1812 		txr->tx_sc = sc;
1813 		txr->tx_idx = i;
1814 		txr->tx_intr_vec = -1;
1815 		txr->tx_intr_cpuid = -1;
1816 		lwkt_serialize_init(&txr->tx_serialize);
1817 
1818 		error = ix_create_tx_ring(txr);
1819 		if (error)
1820 			return error;
1821 	}
1822 
1823 	/*
1824 	 * Allocate RX descriptor rings and buffers
1825 	 */
1826 	sc->rx_rings = kmalloc_cachealign(
1827 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1828 	    M_DEVBUF, M_WAITOK | M_ZERO);
1829 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1830 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1831 
1832 		rxr->rx_sc = sc;
1833 		rxr->rx_idx = i;
1834 		rxr->rx_intr_vec = -1;
1835 		lwkt_serialize_init(&rxr->rx_serialize);
1836 
1837 		error = ix_create_rx_ring(rxr);
1838 		if (error)
1839 			return error;
1840 	}
1841 
1842 	return 0;
1843 }
1844 
1845 static int
1846 ix_create_tx_ring(struct ix_tx_ring *txr)
1847 {
1848 	int error, i, tsize, ntxd;
1849 
1850 	/*
1851 	 * Validate number of transmit descriptors.  It must not exceed
1852 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1853 	 */
1854 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1855 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1856 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1857 		device_printf(txr->tx_sc->dev,
1858 		    "Using %d TX descriptors instead of %d!\n",
1859 		    IX_DEF_TXD, ntxd);
1860 		txr->tx_ndesc = IX_DEF_TXD;
1861 	} else {
1862 		txr->tx_ndesc = ntxd;
1863 	}
1864 
1865 	/*
1866 	 * Allocate TX head write-back buffer
1867 	 */
1868 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1869 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1870 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1871 	if (txr->tx_hdr == NULL) {
1872 		device_printf(txr->tx_sc->dev,
1873 		    "Unable to allocate TX head write-back buffer\n");
1874 		return ENOMEM;
1875 	}
1876 
1877 	/*
1878 	 * Allocate TX descriptor ring
1879 	 */
1880 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1881 	    IX_DBA_ALIGN);
1882 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1883 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1884 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1885 	if (txr->tx_base == NULL) {
1886 		device_printf(txr->tx_sc->dev,
1887 		    "Unable to allocate TX Descriptor memory\n");
1888 		return ENOMEM;
1889 	}
1890 
1891 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1892 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1893 
1894 	/*
1895 	 * Create DMA tag for TX buffers
1896 	 */
1897 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1898 	    1, 0,		/* alignment, bounds */
1899 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1900 	    BUS_SPACE_MAXADDR,	/* highaddr */
1901 	    NULL, NULL,		/* filter, filterarg */
1902 	    IX_TSO_SIZE,	/* maxsize */
1903 	    IX_MAX_SCATTER,	/* nsegments */
1904 	    PAGE_SIZE,		/* maxsegsize */
1905 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1906 	    BUS_DMA_ONEBPAGE,	/* flags */
1907 	    &txr->tx_tag);
1908 	if (error) {
1909 		device_printf(txr->tx_sc->dev,
1910 		    "Unable to allocate TX DMA tag\n");
1911 		kfree(txr->tx_buf, M_DEVBUF);
1912 		txr->tx_buf = NULL;
1913 		return error;
1914 	}
1915 
1916 	/*
1917 	 * Create DMA maps for TX buffers
1918 	 */
1919 	for (i = 0; i < txr->tx_ndesc; ++i) {
1920 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1921 
1922 		error = bus_dmamap_create(txr->tx_tag,
1923 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1924 		if (error) {
1925 			device_printf(txr->tx_sc->dev,
1926 			    "Unable to create TX DMA map\n");
1927 			ix_destroy_tx_ring(txr, i);
1928 			return error;
1929 		}
1930 	}
1931 
1932 	/*
1933 	 * Initialize various watermark
1934 	 */
1935 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1936 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1937 
1938 	return 0;
1939 }
1940 
1941 static void
1942 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1943 {
1944 	int i;
1945 
1946 	if (txr->tx_hdr != NULL) {
1947 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1948 		bus_dmamem_free(txr->tx_hdr_dtag,
1949 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1950 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1951 		txr->tx_hdr = NULL;
1952 	}
1953 
1954 	if (txr->tx_base != NULL) {
1955 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1956 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1957 		    txr->tx_base_map);
1958 		bus_dma_tag_destroy(txr->tx_base_dtag);
1959 		txr->tx_base = NULL;
1960 	}
1961 
1962 	if (txr->tx_buf == NULL)
1963 		return;
1964 
1965 	for (i = 0; i < ndesc; ++i) {
1966 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1967 
1968 		KKASSERT(txbuf->m_head == NULL);
1969 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1970 	}
1971 	bus_dma_tag_destroy(txr->tx_tag);
1972 
1973 	kfree(txr->tx_buf, M_DEVBUF);
1974 	txr->tx_buf = NULL;
1975 }
1976 
1977 static void
1978 ix_init_tx_ring(struct ix_tx_ring *txr)
1979 {
1980 	/* Clear the old ring contents */
1981 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1982 
1983 	/* Clear TX head write-back buffer */
1984 	*(txr->tx_hdr) = 0;
1985 
1986 	/* Reset indices */
1987 	txr->tx_next_avail = 0;
1988 	txr->tx_next_clean = 0;
1989 	txr->tx_nsegs = 0;
1990 
1991 	/* Set number of descriptors available */
1992 	txr->tx_avail = txr->tx_ndesc;
1993 
1994 	/* Enable this TX ring */
1995 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1996 }
1997 
1998 static void
1999 ix_init_tx_unit(struct ix_softc *sc)
2000 {
2001 	struct ixgbe_hw	*hw = &sc->hw;
2002 	int i;
2003 
2004 	/*
2005 	 * Setup the Base and Length of the Tx Descriptor Ring
2006 	 */
2007 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
2008 		struct ix_tx_ring *txr = &sc->tx_rings[i];
2009 		uint64_t tdba = txr->tx_base_paddr;
2010 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
2011 		uint32_t txctrl;
2012 
2013 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2014 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2015 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2016 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2017 
2018 		/* Setup the HW Tx Head and Tail descriptor pointers */
2019 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2020 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2021 
2022 		/* Disable TX head write-back relax ordering */
2023 		switch (hw->mac.type) {
2024 		case ixgbe_mac_82598EB:
2025 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2026 			break;
2027 		case ixgbe_mac_82599EB:
2028 		case ixgbe_mac_X540:
2029 		default:
2030 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2031 			break;
2032 		}
2033 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2034 		switch (hw->mac.type) {
2035 		case ixgbe_mac_82598EB:
2036 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2037 			break;
2038 		case ixgbe_mac_82599EB:
2039 		case ixgbe_mac_X540:
2040 		default:
2041 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2042 			break;
2043 		}
2044 
2045 		/* Enable TX head write-back */
2046 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2047 		    (uint32_t)(hdr_paddr >> 32));
2048 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2049 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2050 	}
2051 
2052 	if (hw->mac.type != ixgbe_mac_82598EB) {
2053 		uint32_t dmatxctl, rttdcs;
2054 
2055 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2056 		dmatxctl |= IXGBE_DMATXCTL_TE;
2057 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2058 
2059 		/* Disable arbiter to set MTQC */
2060 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2061 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2062 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2063 
2064 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2065 
2066 		/* Reenable aribter */
2067 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2068 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2069 	}
2070 }
2071 
2072 static int
2073 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2074     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2075 {
2076 	struct ixgbe_adv_tx_context_desc *TXD;
2077 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2078 	int ehdrlen, ip_hlen = 0, ctxd;
2079 	boolean_t offload = TRUE;
2080 
2081 	/* First check if TSO is to be used */
2082 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2083 		return ix_tso_ctx_setup(txr, mp,
2084 		    cmd_type_len, olinfo_status);
2085 	}
2086 
2087 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2088 		offload = FALSE;
2089 
2090 	/* Indicate the whole packet as payload when not doing TSO */
2091 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2092 
2093 	/*
2094 	 * In advanced descriptors the vlan tag must be placed into the
2095 	 * context descriptor.  Hence we need to make one even if not
2096 	 * doing checksum offloads.
2097 	 */
2098 	if (mp->m_flags & M_VLANTAG) {
2099 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2100 		    IXGBE_ADVTXD_VLAN_SHIFT;
2101 	} else if (!offload) {
2102 		/* No TX descriptor is consumed */
2103 		return 0;
2104 	}
2105 
2106 	/* Set the ether header length */
2107 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2108 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2109 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2110 
2111 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2112 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2113 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2114 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2115 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2116 	}
2117 	vlan_macip_lens |= ip_hlen;
2118 
2119 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2120 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2121 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2122 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2123 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2124 
2125 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2126 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2127 
2128 	/* Now ready a context descriptor */
2129 	ctxd = txr->tx_next_avail;
2130 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2131 
2132 	/* Now copy bits into descriptor */
2133 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2134 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2135 	TXD->seqnum_seed = htole32(0);
2136 	TXD->mss_l4len_idx = htole32(0);
2137 
2138 	/* We've consumed the first desc, adjust counters */
2139 	if (++ctxd == txr->tx_ndesc)
2140 		ctxd = 0;
2141 	txr->tx_next_avail = ctxd;
2142 	--txr->tx_avail;
2143 
2144 	/* One TX descriptor is consumed */
2145 	return 1;
2146 }
2147 
2148 static int
2149 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2150     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2151 {
2152 	struct ixgbe_adv_tx_context_desc *TXD;
2153 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2154 	uint32_t mss_l4len_idx = 0, paylen;
2155 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2156 
2157 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2158 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2159 
2160 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2161 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2162 
2163 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2164 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2165 
2166 	ctxd = txr->tx_next_avail;
2167 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2168 
2169 	if (mp->m_flags & M_VLANTAG) {
2170 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2171 		    IXGBE_ADVTXD_VLAN_SHIFT;
2172 	}
2173 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2174 	vlan_macip_lens |= ip_hlen;
2175 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2176 
2177 	/* ADV DTYPE TUCMD */
2178 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2179 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2180 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2181 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2182 
2183 	/* MSS L4LEN IDX */
2184 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2185 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2186 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2187 
2188 	TXD->seqnum_seed = htole32(0);
2189 
2190 	if (++ctxd == txr->tx_ndesc)
2191 		ctxd = 0;
2192 
2193 	txr->tx_avail--;
2194 	txr->tx_next_avail = ctxd;
2195 
2196 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2197 
2198 	/* This is used in the transmit desc in encap */
2199 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2200 
2201 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2202 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2203 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2204 
2205 	/* One TX descriptor is consumed */
2206 	return 1;
2207 }
2208 
2209 static void
2210 ix_txeof(struct ix_tx_ring *txr, int hdr)
2211 {
2212 	int first, avail;
2213 
2214 	if (txr->tx_avail == txr->tx_ndesc)
2215 		return;
2216 
2217 	first = txr->tx_next_clean;
2218 	if (first == hdr)
2219 		return;
2220 
2221 	avail = txr->tx_avail;
2222 	while (first != hdr) {
2223 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2224 
2225 		++avail;
2226 		if (txbuf->m_head) {
2227 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2228 			m_freem(txbuf->m_head);
2229 			txbuf->m_head = NULL;
2230 		}
2231 		if (++first == txr->tx_ndesc)
2232 			first = 0;
2233 	}
2234 	txr->tx_next_clean = first;
2235 	txr->tx_avail = avail;
2236 
2237 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2238 		ifsq_clr_oactive(txr->tx_ifsq);
2239 		txr->tx_watchdog.wd_timer = 0;
2240 	}
2241 }
2242 
2243 static int
2244 ix_create_rx_ring(struct ix_rx_ring *rxr)
2245 {
2246 	int i, rsize, error, nrxd;
2247 
2248 	/*
2249 	 * Validate number of receive descriptors.  It must not exceed
2250 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2251 	 */
2252 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2253 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2254 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2255 		device_printf(rxr->rx_sc->dev,
2256 		    "Using %d RX descriptors instead of %d!\n",
2257 		    IX_DEF_RXD, nrxd);
2258 		rxr->rx_ndesc = IX_DEF_RXD;
2259 	} else {
2260 		rxr->rx_ndesc = nrxd;
2261 	}
2262 
2263 	/*
2264 	 * Allocate RX descriptor ring
2265 	 */
2266 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2267 	    IX_DBA_ALIGN);
2268 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2269 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2270 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2271 	if (rxr->rx_base == NULL) {
2272 		device_printf(rxr->rx_sc->dev,
2273 		    "Unable to allocate TX Descriptor memory\n");
2274 		return ENOMEM;
2275 	}
2276 
2277 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2278 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2279 
2280 	/*
2281 	 * Create DMA tag for RX buffers
2282 	 */
2283 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2284 	    1, 0,		/* alignment, bounds */
2285 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2286 	    BUS_SPACE_MAXADDR,	/* highaddr */
2287 	    NULL, NULL,		/* filter, filterarg */
2288 	    PAGE_SIZE,		/* maxsize */
2289 	    1,			/* nsegments */
2290 	    PAGE_SIZE,		/* maxsegsize */
2291 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2292 	    &rxr->rx_tag);
2293 	if (error) {
2294 		device_printf(rxr->rx_sc->dev,
2295 		    "Unable to create RX DMA tag\n");
2296 		kfree(rxr->rx_buf, M_DEVBUF);
2297 		rxr->rx_buf = NULL;
2298 		return error;
2299 	}
2300 
2301 	/*
2302 	 * Create spare DMA map for RX buffers
2303 	 */
2304 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2305 	    &rxr->rx_sparemap);
2306 	if (error) {
2307 		device_printf(rxr->rx_sc->dev,
2308 		    "Unable to create spare RX DMA map\n");
2309 		bus_dma_tag_destroy(rxr->rx_tag);
2310 		kfree(rxr->rx_buf, M_DEVBUF);
2311 		rxr->rx_buf = NULL;
2312 		return error;
2313 	}
2314 
2315 	/*
2316 	 * Create DMA maps for RX buffers
2317 	 */
2318 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2319 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2320 
2321 		error = bus_dmamap_create(rxr->rx_tag,
2322 		    BUS_DMA_WAITOK, &rxbuf->map);
2323 		if (error) {
2324 			device_printf(rxr->rx_sc->dev,
2325 			    "Unable to create RX dma map\n");
2326 			ix_destroy_rx_ring(rxr, i);
2327 			return error;
2328 		}
2329 	}
2330 
2331 	/*
2332 	 * Initialize various watermark
2333 	 */
2334 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2335 
2336 	return 0;
2337 }
2338 
2339 static void
2340 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2341 {
2342 	int i;
2343 
2344 	if (rxr->rx_base != NULL) {
2345 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2346 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2347 		    rxr->rx_base_map);
2348 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2349 		rxr->rx_base = NULL;
2350 	}
2351 
2352 	if (rxr->rx_buf == NULL)
2353 		return;
2354 
2355 	for (i = 0; i < ndesc; ++i) {
2356 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2357 
2358 		KKASSERT(rxbuf->m_head == NULL);
2359 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2360 	}
2361 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2362 	bus_dma_tag_destroy(rxr->rx_tag);
2363 
2364 	kfree(rxr->rx_buf, M_DEVBUF);
2365 	rxr->rx_buf = NULL;
2366 }
2367 
2368 /*
2369 ** Used to detect a descriptor that has
2370 ** been merged by Hardware RSC.
2371 */
2372 static __inline uint32_t
2373 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2374 {
2375 	return (le32toh(rx->wb.lower.lo_dword.data) &
2376 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2377 }
2378 
2379 #if 0
2380 /*********************************************************************
2381  *
2382  *  Initialize Hardware RSC (LRO) feature on 82599
2383  *  for an RX ring, this is toggled by the LRO capability
2384  *  even though it is transparent to the stack.
2385  *
2386  *  NOTE: since this HW feature only works with IPV4 and
2387  *        our testing has shown soft LRO to be as effective
2388  *        I have decided to disable this by default.
2389  *
2390  **********************************************************************/
2391 static void
2392 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2393 {
2394 	struct	ix_softc 	*sc = rxr->rx_sc;
2395 	struct	ixgbe_hw	*hw = &sc->hw;
2396 	uint32_t			rscctrl, rdrxctl;
2397 
2398 #if 0
2399 	/* If turning LRO/RSC off we need to disable it */
2400 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2401 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2402 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2403 		return;
2404 	}
2405 #endif
2406 
2407 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2408 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2409 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2410 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2411 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2412 
2413 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2414 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2415 	/*
2416 	** Limit the total number of descriptors that
2417 	** can be combined, so it does not exceed 64K
2418 	*/
2419 	if (rxr->mbuf_sz == MCLBYTES)
2420 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2421 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2422 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2423 	else if (rxr->mbuf_sz == MJUM9BYTES)
2424 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2425 	else  /* Using 16K cluster */
2426 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2427 
2428 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2429 
2430 	/* Enable TCP header recognition */
2431 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2432 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2433 	    IXGBE_PSRTYPE_TCPHDR));
2434 
2435 	/* Disable RSC for ACK packets */
2436 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2437 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2438 
2439 	rxr->hw_rsc = TRUE;
2440 }
2441 #endif
2442 
2443 static int
2444 ix_init_rx_ring(struct ix_rx_ring *rxr)
2445 {
2446 	int i;
2447 
2448 	/* Clear the ring contents */
2449 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2450 
2451 	/* XXX we need JUMPAGESIZE for RSC too */
2452 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2453 		rxr->rx_mbuf_sz = MCLBYTES;
2454 	else
2455 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2456 
2457 	/* Now replenish the mbufs */
2458 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2459 		int error;
2460 
2461 		error = ix_newbuf(rxr, i, TRUE);
2462 		if (error)
2463 			return error;
2464 	}
2465 
2466 	/* Setup our descriptor indices */
2467 	rxr->rx_next_check = 0;
2468 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2469 
2470 #if 0
2471 	/*
2472 	** Now set up the LRO interface:
2473 	*/
2474 	if (ixgbe_rsc_enable)
2475 		ix_setup_hw_rsc(rxr);
2476 #endif
2477 
2478 	return 0;
2479 }
2480 
2481 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2482 
2483 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2484 
2485 static void
2486 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2487 {
2488 	struct ixgbe_hw	*hw = &sc->hw;
2489 	struct ifnet *ifp = &sc->arpcom.ac_if;
2490 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2491 	int i;
2492 
2493 	/*
2494 	 * Make sure receives are disabled while setting up the descriptor ring
2495 	 */
2496 	ixgbe_disable_rx(hw);
2497 
2498 	/* Enable broadcasts */
2499 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2500 	fctrl |= IXGBE_FCTRL_BAM;
2501 	if (hw->mac.type == ixgbe_mac_82598EB) {
2502 		fctrl |= IXGBE_FCTRL_DPF;
2503 		fctrl |= IXGBE_FCTRL_PMCF;
2504 	}
2505 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2506 
2507 	/* Set for Jumbo Frames? */
2508 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2509 	if (ifp->if_mtu > ETHERMTU)
2510 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2511 	else
2512 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2513 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2514 
2515 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2516 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2517 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2518 
2519 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2520 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2521 		uint64_t rdba = rxr->rx_base_paddr;
2522 		uint32_t srrctl;
2523 
2524 		/* Setup the Base and Length of the Rx Descriptor Ring */
2525 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2526 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2527 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2528 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2529 
2530 		/*
2531 		 * Set up the SRRCTL register
2532 		 */
2533 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2534 
2535 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2536 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2537 		srrctl |= bufsz;
2538 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2539 		if (sc->rx_ring_inuse > 1) {
2540 			/* See the commend near ix_enable_rx_drop() */
2541 			if (sc->ifm_media &
2542 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2543 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2544 				if (i == 0 && bootverbose) {
2545 					if_printf(ifp, "flow control %s, "
2546 					    "disable RX drop\n",
2547 					    ix_ifmedia2str(sc->ifm_media));
2548 				}
2549 			} else {
2550 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2551 				if (i == 0 && bootverbose) {
2552 					if_printf(ifp, "flow control %s, "
2553 					    "enable RX drop\n",
2554 					    ix_ifmedia2str(sc->ifm_media));
2555 				}
2556 			}
2557 		}
2558 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2559 
2560 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2561 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2562 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2563 	}
2564 
2565 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2566 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2567 
2568 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2569 
2570 	/*
2571 	 * Setup RSS
2572 	 */
2573 	if (sc->rx_ring_inuse > 1) {
2574 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2575 		const struct if_ringmap *rm;
2576 		int j, r, nreta, table_nent;
2577 
2578 		/*
2579 		 * NOTE:
2580 		 * When we reach here, RSS has already been disabled
2581 		 * in ix_stop(), so we could safely configure RSS key
2582 		 * and redirect table.
2583 		 */
2584 
2585 		/*
2586 		 * Configure RSS key
2587 		 */
2588 		toeplitz_get_key(key, sizeof(key));
2589 		for (i = 0; i < IX_NRSSRK; ++i) {
2590 			uint32_t rssrk;
2591 
2592 			rssrk = IX_RSSRK_VAL(key, i);
2593 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2594 			    i, rssrk);
2595 
2596 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2597 		}
2598 
2599 		/*
2600 		 * Configure RSS redirect table.
2601 		 */
2602 
2603 		/* Table size will differ based on MAC */
2604 		switch (hw->mac.type) {
2605 		case ixgbe_mac_X550:
2606 		case ixgbe_mac_X550EM_x:
2607 		case ixgbe_mac_X550EM_a:
2608 			nreta = IX_NRETA_X550;
2609 			break;
2610 		default:
2611 			nreta = IX_NRETA;
2612 			break;
2613 		}
2614 
2615 		table_nent = nreta * IX_RETA_SIZE;
2616 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2617 		    ("invalid RETA count %d", nreta));
2618 		if (polling)
2619 			rm = sc->rx_rmap;
2620 		else
2621 			rm = sc->rx_rmap_intr;
2622 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2623 
2624 		r = 0;
2625 		for (j = 0; j < nreta; ++j) {
2626 			uint32_t reta = 0;
2627 
2628 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2629 				uint32_t q;
2630 
2631 				q = sc->rdr_table[r];
2632 				KASSERT(q < sc->rx_ring_inuse,
2633 				    ("invalid RX ring index %d", q));
2634 				reta |= q << (8 * i);
2635 				++r;
2636 			}
2637 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2638 			if (j < IX_NRETA) {
2639 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2640 			} else {
2641 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2642 				    reta);
2643 			}
2644 		}
2645 
2646 		/*
2647 		 * Enable multiple receive queues.
2648 		 * Enable IPv4 RSS standard hash functions.
2649 		 */
2650 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2651 		    IXGBE_MRQC_RSSEN |
2652 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2653 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2654 
2655 		/*
2656 		 * NOTE:
2657 		 * PCSD must be enabled to enable multiple
2658 		 * receive queues.
2659 		 */
2660 		rxcsum |= IXGBE_RXCSUM_PCSD;
2661 	}
2662 
2663 	if (ifp->if_capenable & IFCAP_RXCSUM)
2664 		rxcsum |= IXGBE_RXCSUM_PCSD;
2665 
2666 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2667 }
2668 
2669 static __inline void
2670 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2671 {
2672 	if (--i < 0)
2673 		i = rxr->rx_ndesc - 1;
2674 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2675 }
2676 
2677 static __inline void
2678 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2679 {
2680 	if ((ptype &
2681 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2682 		/* Not IPv4 */
2683 		return;
2684 	}
2685 
2686 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2687 	    IXGBE_RXD_STAT_IPCS)
2688 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2689 
2690 	if ((ptype &
2691 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2692 		/*
2693 		 * - Neither TCP nor UDP
2694 		 * - IPv4 fragment
2695 		 */
2696 		return;
2697 	}
2698 
2699 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2700 	    IXGBE_RXD_STAT_L4CS) {
2701 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2702 		    CSUM_FRAG_NOT_CHECKED;
2703 		mp->m_pkthdr.csum_data = htons(0xffff);
2704 	}
2705 }
2706 
2707 static __inline struct pktinfo *
2708 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2709     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2710 {
2711 	switch (hashtype) {
2712 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2713 		pi->pi_netisr = NETISR_IP;
2714 		pi->pi_flags = 0;
2715 		pi->pi_l3proto = IPPROTO_TCP;
2716 		break;
2717 
2718 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2719 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2720 			/* Not UDP or is fragment */
2721 			return NULL;
2722 		}
2723 		pi->pi_netisr = NETISR_IP;
2724 		pi->pi_flags = 0;
2725 		pi->pi_l3proto = IPPROTO_UDP;
2726 		break;
2727 
2728 	default:
2729 		return NULL;
2730 	}
2731 
2732 	m_sethash(m, toeplitz_hash(hash));
2733 	return pi;
2734 }
2735 
2736 static __inline void
2737 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2738 {
2739 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2740 	rxd->wb.upper.status_error = 0;
2741 }
2742 
2743 static void
2744 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2745 {
2746 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2747 
2748 	/*
2749 	 * XXX discard may not be correct
2750 	 */
2751 	if (eop) {
2752 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2753 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2754 	} else {
2755 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2756 	}
2757 	if (rxbuf->fmp != NULL) {
2758 		m_freem(rxbuf->fmp);
2759 		rxbuf->fmp = NULL;
2760 		rxbuf->lmp = NULL;
2761 	}
2762 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2763 }
2764 
2765 static void
2766 ix_rxeof(struct ix_rx_ring *rxr, int count)
2767 {
2768 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2769 	int i, nsegs = 0, cpuid = mycpuid;
2770 
2771 	i = rxr->rx_next_check;
2772 	while (count != 0) {
2773 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2774 		union ixgbe_adv_rx_desc	*cur;
2775 		struct mbuf *sendmp = NULL, *mp;
2776 		struct pktinfo *pi = NULL, pi0;
2777 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2778 		uint16_t len;
2779 		boolean_t eop;
2780 
2781 		cur = &rxr->rx_base[i];
2782 		staterr = le32toh(cur->wb.upper.status_error);
2783 
2784 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2785 			break;
2786 		++nsegs;
2787 
2788 		rxbuf = &rxr->rx_buf[i];
2789 		mp = rxbuf->m_head;
2790 
2791 		len = le16toh(cur->wb.upper.length);
2792 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2793 		    IXGBE_RXDADV_PKTTYPE_MASK;
2794 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2795 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2796 		    IXGBE_RXDADV_RSSTYPE_MASK;
2797 
2798 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2799 		if (eop)
2800 			--count;
2801 
2802 		/*
2803 		 * Make sure bad packets are discarded
2804 		 */
2805 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2806 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2807 			ix_rx_discard(rxr, i, eop);
2808 			goto next_desc;
2809 		}
2810 
2811 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2812 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2813 			ix_rx_discard(rxr, i, eop);
2814 			goto next_desc;
2815 		}
2816 
2817 		/*
2818 		 * On 82599 which supports a hardware LRO, packets
2819 		 * need not be fragmented across sequential descriptors,
2820 		 * rather the next descriptor is indicated in bits
2821 		 * of the descriptor.  This also means that we might
2822 		 * proceses more than one packet at a time, something
2823 		 * that has never been true before, it required
2824 		 * eliminating global chain pointers in favor of what
2825 		 * we are doing here.
2826 		 */
2827 		if (!eop) {
2828 			int nextp;
2829 
2830 			/*
2831 			 * Figure out the next descriptor
2832 			 * of this frame.
2833 			 */
2834 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2835 				rsc = ix_rsc_count(cur);
2836 			if (rsc) { /* Get hardware index */
2837 				nextp = ((staterr &
2838 				    IXGBE_RXDADV_NEXTP_MASK) >>
2839 				    IXGBE_RXDADV_NEXTP_SHIFT);
2840 			} else { /* Just sequential */
2841 				nextp = i + 1;
2842 				if (nextp == rxr->rx_ndesc)
2843 					nextp = 0;
2844 			}
2845 			nbuf = &rxr->rx_buf[nextp];
2846 			prefetch(nbuf);
2847 		}
2848 		mp->m_len = len;
2849 
2850 		/*
2851 		 * Rather than using the fmp/lmp global pointers
2852 		 * we now keep the head of a packet chain in the
2853 		 * buffer struct and pass this along from one
2854 		 * descriptor to the next, until we get EOP.
2855 		 */
2856 		if (rxbuf->fmp == NULL) {
2857 			mp->m_pkthdr.len = len;
2858 			rxbuf->fmp = mp;
2859 			rxbuf->lmp = mp;
2860 		} else {
2861 			rxbuf->fmp->m_pkthdr.len += len;
2862 			rxbuf->lmp->m_next = mp;
2863 			rxbuf->lmp = mp;
2864 		}
2865 
2866 		if (nbuf != NULL) {
2867 			/*
2868 			 * Not the last fragment of this frame,
2869 			 * pass this fragment list on
2870 			 */
2871 			nbuf->fmp = rxbuf->fmp;
2872 			nbuf->lmp = rxbuf->lmp;
2873 		} else {
2874 			/*
2875 			 * Send this frame
2876 			 */
2877 			sendmp = rxbuf->fmp;
2878 
2879 			sendmp->m_pkthdr.rcvif = ifp;
2880 			IFNET_STAT_INC(ifp, ipackets, 1);
2881 #ifdef IX_RSS_DEBUG
2882 			rxr->rx_pkts++;
2883 #endif
2884 
2885 			/* Process vlan info */
2886 			if (staterr & IXGBE_RXD_STAT_VP) {
2887 				sendmp->m_pkthdr.ether_vlantag =
2888 				    le16toh(cur->wb.upper.vlan);
2889 				sendmp->m_flags |= M_VLANTAG;
2890 			}
2891 			if (ifp->if_capenable & IFCAP_RXCSUM)
2892 				ix_rxcsum(staterr, sendmp, ptype);
2893 			if (ifp->if_capenable & IFCAP_RSS) {
2894 				pi = ix_rssinfo(sendmp, &pi0,
2895 				    hash, hashtype, ptype);
2896 			}
2897 		}
2898 		rxbuf->fmp = NULL;
2899 		rxbuf->lmp = NULL;
2900 next_desc:
2901 		/* Advance our pointers to the next descriptor. */
2902 		if (++i == rxr->rx_ndesc)
2903 			i = 0;
2904 
2905 		if (sendmp != NULL)
2906 			ifp->if_input(ifp, sendmp, pi, cpuid);
2907 
2908 		if (nsegs >= rxr->rx_wreg_nsegs) {
2909 			ix_rx_refresh(rxr, i);
2910 			nsegs = 0;
2911 		}
2912 	}
2913 	rxr->rx_next_check = i;
2914 
2915 	if (nsegs > 0)
2916 		ix_rx_refresh(rxr, i);
2917 }
2918 
2919 static void
2920 ix_set_vlan(struct ix_softc *sc)
2921 {
2922 	struct ixgbe_hw *hw = &sc->hw;
2923 	uint32_t ctrl;
2924 
2925 	if (hw->mac.type == ixgbe_mac_82598EB) {
2926 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2927 		ctrl |= IXGBE_VLNCTRL_VME;
2928 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2929 	} else {
2930 		int i;
2931 
2932 		/*
2933 		 * On 82599 and later chips the VLAN enable is
2934 		 * per queue in RXDCTL
2935 		 */
2936 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2937 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2938 			ctrl |= IXGBE_RXDCTL_VME;
2939 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2940 		}
2941 	}
2942 }
2943 
2944 static void
2945 ix_enable_intr(struct ix_softc *sc)
2946 {
2947 	struct ixgbe_hw	*hw = &sc->hw;
2948 	uint32_t fwsm;
2949 	int i;
2950 
2951 	for (i = 0; i < sc->intr_cnt; ++i)
2952 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2953 
2954 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2955 
2956 	/* Enable Fan Failure detection */
2957 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2958 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2959 
2960 	switch (hw->mac.type) {
2961 	case ixgbe_mac_82599EB:
2962 		sc->intr_mask |= IXGBE_EIMS_ECC;
2963 		/* Temperature sensor on some adapters */
2964 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2965 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
2966 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2967 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2968 		break;
2969 
2970 	case ixgbe_mac_X540:
2971 		sc->intr_mask |= IXGBE_EIMS_ECC;
2972 		/* Detect if Thermal Sensor is enabled */
2973 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2974 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2975 			sc->intr_mask |= IXGBE_EIMS_TS;
2976 		break;
2977 
2978 	case ixgbe_mac_X550:
2979 	case ixgbe_mac_X550EM_a:
2980 	case ixgbe_mac_X550EM_x:
2981 		sc->intr_mask |= IXGBE_EIMS_ECC;
2982 		/* MAC thermal sensor is automatically enabled */
2983 		sc->intr_mask |= IXGBE_EIMS_TS;
2984 		/* Some devices use SDP0 for important information */
2985 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2986 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2987 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2988 		/* FALL THROUGH */
2989 	default:
2990 		break;
2991 	}
2992 
2993 	/* With MSI-X we use auto clear for RX and TX rings */
2994 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2995 		/*
2996 		 * There are no EIAC1/EIAC2 for newer chips; the related
2997 		 * bits for TX and RX rings > 16 are always auto clear.
2998 		 *
2999 		 * XXX which bits?  There are _no_ documented EICR1 and
3000 		 * EICR2 at all; only EICR.
3001 		 */
3002 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
3003 	} else {
3004 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
3005 
3006 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3007 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3008 			sc->intr_mask |= IX_RX1_INTR_MASK;
3009 	}
3010 
3011 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3012 
3013 	/*
3014 	 * Enable RX and TX rings for MSI-X
3015 	 */
3016 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3017 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3018 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3019 
3020 			if (txr->tx_intr_vec >= 0) {
3021 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3022 				    txr->tx_eims_val);
3023 			}
3024 		}
3025 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3026 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3027 
3028 			KKASSERT(rxr->rx_intr_vec >= 0);
3029 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3030 		}
3031 	}
3032 
3033 	IXGBE_WRITE_FLUSH(hw);
3034 }
3035 
3036 static void
3037 ix_disable_intr(struct ix_softc *sc)
3038 {
3039 	int i;
3040 
3041 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3042 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3043 
3044 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3045 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3046 	} else {
3047 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3048 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3049 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3050 	}
3051 	IXGBE_WRITE_FLUSH(&sc->hw);
3052 
3053 	for (i = 0; i < sc->intr_cnt; ++i)
3054 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3055 }
3056 
3057 uint16_t
3058 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3059 {
3060 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3061 	    reg, 2);
3062 }
3063 
3064 void
3065 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3066 {
3067 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3068 	    reg, value, 2);
3069 }
3070 
3071 static void
3072 ix_slot_info(struct ix_softc *sc)
3073 {
3074 	struct ixgbe_hw *hw = &sc->hw;
3075 	device_t dev = sc->dev;
3076 	struct ixgbe_mac_info *mac = &hw->mac;
3077 	uint16_t link;
3078 	uint32_t offset;
3079 
3080 	/* For most devices simply call the shared code routine */
3081 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3082 		ixgbe_get_bus_info(hw);
3083 		/* These devices don't use PCI-E */
3084 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3085 		    hw->mac.type == ixgbe_mac_X550EM_a)
3086 			return;
3087 		goto display;
3088 	}
3089 
3090 	/*
3091 	 * For the Quad port adapter we need to parse back
3092 	 * up the PCI tree to find the speed of the expansion
3093 	 * slot into which this adapter is plugged. A bit more work.
3094 	 */
3095 	dev = device_get_parent(device_get_parent(dev));
3096 #ifdef IXGBE_DEBUG
3097 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3098 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3099 #endif
3100 	dev = device_get_parent(device_get_parent(dev));
3101 #ifdef IXGBE_DEBUG
3102 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3103 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3104 #endif
3105 	/* Now get the PCI Express Capabilities offset */
3106 	offset = pci_get_pciecap_ptr(dev);
3107 	/* ...and read the Link Status Register */
3108 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3109 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3110 	case IXGBE_PCI_LINK_WIDTH_1:
3111 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3112 		break;
3113 	case IXGBE_PCI_LINK_WIDTH_2:
3114 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3115 		break;
3116 	case IXGBE_PCI_LINK_WIDTH_4:
3117 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3118 		break;
3119 	case IXGBE_PCI_LINK_WIDTH_8:
3120 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3121 		break;
3122 	default:
3123 		hw->bus.width = ixgbe_bus_width_unknown;
3124 		break;
3125 	}
3126 
3127 	switch (link & IXGBE_PCI_LINK_SPEED) {
3128 	case IXGBE_PCI_LINK_SPEED_2500:
3129 		hw->bus.speed = ixgbe_bus_speed_2500;
3130 		break;
3131 	case IXGBE_PCI_LINK_SPEED_5000:
3132 		hw->bus.speed = ixgbe_bus_speed_5000;
3133 		break;
3134 	case IXGBE_PCI_LINK_SPEED_8000:
3135 		hw->bus.speed = ixgbe_bus_speed_8000;
3136 		break;
3137 	default:
3138 		hw->bus.speed = ixgbe_bus_speed_unknown;
3139 		break;
3140 	}
3141 
3142 	mac->ops.set_lan_id(hw);
3143 
3144 display:
3145 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3146 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3147 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3148 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3149 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3150 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3151 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3152 
3153 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3154 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3155 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3156 		device_printf(dev, "For optimal performance a x8 "
3157 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3158 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3159 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3160 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3161 		device_printf(dev, "For optimal performance a x8 "
3162 		    "PCIE Gen3 slot is required.\n");
3163 	}
3164 }
3165 
3166 /*
3167  * TODO comment is incorrect
3168  *
3169  * Setup the correct IVAR register for a particular MSIX interrupt
3170  * - entry is the register array entry
3171  * - vector is the MSIX vector for this queue
3172  * - type is RX/TX/MISC
3173  */
3174 static void
3175 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3176     int8_t type)
3177 {
3178 	struct ixgbe_hw *hw = &sc->hw;
3179 	uint32_t ivar, index;
3180 
3181 	vector |= IXGBE_IVAR_ALLOC_VAL;
3182 
3183 	switch (hw->mac.type) {
3184 	case ixgbe_mac_82598EB:
3185 		if (type == -1)
3186 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3187 		else
3188 			entry += (type * 64);
3189 		index = (entry >> 2) & 0x1F;
3190 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3191 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3192 		ivar |= (vector << (8 * (entry & 0x3)));
3193 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3194 		break;
3195 
3196 	case ixgbe_mac_82599EB:
3197 	case ixgbe_mac_X540:
3198 	case ixgbe_mac_X550:
3199 	case ixgbe_mac_X550EM_a:
3200 	case ixgbe_mac_X550EM_x:
3201 		if (type == -1) { /* MISC IVAR */
3202 			index = (entry & 1) * 8;
3203 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3204 			ivar &= ~(0xFF << index);
3205 			ivar |= (vector << index);
3206 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3207 		} else {	/* RX/TX IVARS */
3208 			index = (16 * (entry & 1)) + (8 * type);
3209 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3210 			ivar &= ~(0xFF << index);
3211 			ivar |= (vector << index);
3212 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3213 		}
3214 		/* FALL THROUGH */
3215 	default:
3216 		break;
3217 	}
3218 }
3219 
3220 static boolean_t
3221 ix_sfp_probe(struct ix_softc *sc)
3222 {
3223 	struct ixgbe_hw	*hw = &sc->hw;
3224 
3225 	if (hw->phy.type == ixgbe_phy_nl &&
3226 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3227 		int32_t ret;
3228 
3229 		ret = hw->phy.ops.identify_sfp(hw);
3230 		if (ret)
3231 			return FALSE;
3232 
3233 		ret = hw->phy.ops.reset(hw);
3234 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3235 			if_printf(&sc->arpcom.ac_if,
3236 			     "Unsupported SFP+ module detected!  "
3237 			     "Reload driver with supported module.\n");
3238 			sc->sfp_probe = FALSE;
3239 			return FALSE;
3240 		}
3241 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3242 
3243 		/* We now have supported optics */
3244 		sc->sfp_probe = FALSE;
3245 
3246 		return TRUE;
3247 	}
3248 	return FALSE;
3249 }
3250 
3251 static void
3252 ix_handle_link(struct ix_softc *sc)
3253 {
3254 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3255 	ix_update_link_status(sc);
3256 }
3257 
3258 /*
3259  * Handling SFP module
3260  */
3261 static void
3262 ix_handle_mod(struct ix_softc *sc)
3263 {
3264 	struct ixgbe_hw *hw = &sc->hw;
3265 	uint32_t err;
3266 
3267 	err = hw->phy.ops.identify_sfp(hw);
3268 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3269 		if_printf(&sc->arpcom.ac_if,
3270 		    "Unsupported SFP+ module type was detected.\n");
3271 		return;
3272 	}
3273 	err = hw->mac.ops.setup_sfp(hw);
3274 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3275 		if_printf(&sc->arpcom.ac_if,
3276 		    "Setup failure - unsupported SFP+ module type.\n");
3277 		return;
3278 	}
3279 	ix_handle_msf(sc);
3280 }
3281 
3282 /*
3283  * Handling MSF (multispeed fiber)
3284  */
3285 static void
3286 ix_handle_msf(struct ix_softc *sc)
3287 {
3288 	struct ixgbe_hw *hw = &sc->hw;
3289 	uint32_t autoneg;
3290 
3291 	hw->phy.ops.identify_sfp(hw);
3292 	ix_init_media(sc);
3293 
3294 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3295 		autoneg = sc->advspeed;
3296 	else
3297 		autoneg = hw->phy.autoneg_advertised;
3298 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3299 		bool negotiate;
3300 
3301 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3302 	}
3303 	if (hw->mac.ops.setup_link != NULL)
3304 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3305 }
3306 
3307 static void
3308 ix_handle_phy(struct ix_softc *sc)
3309 {
3310 	struct ixgbe_hw *hw = &sc->hw;
3311 	int error;
3312 
3313 	error = hw->phy.ops.handle_lasi(hw);
3314 	if (error == IXGBE_ERR_OVERTEMP) {
3315 		if_printf(&sc->arpcom.ac_if,
3316 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3317 		    "PHY will downshift to lower power state!\n");
3318 	} else if (error) {
3319 		if_printf(&sc->arpcom.ac_if,
3320 		    "Error handling LASI interrupt: %d\n", error);
3321 	}
3322 }
3323 
3324 static void
3325 ix_update_stats(struct ix_softc *sc)
3326 {
3327 	struct ifnet *ifp = &sc->arpcom.ac_if;
3328 	struct ixgbe_hw *hw = &sc->hw;
3329 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3330 	uint64_t total_missed_rx = 0;
3331 	int i;
3332 
3333 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3334 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3335 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3336 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3337 
3338 	for (i = 0; i < 16; i++) {
3339 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3340 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3341 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3342 	}
3343 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3344 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3345 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3346 
3347 	/* Hardware workaround, gprc counts missed packets */
3348 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3349 	sc->stats.gprc -= missed_rx;
3350 
3351 	if (hw->mac.type != ixgbe_mac_82598EB) {
3352 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3353 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3354 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3355 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3356 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3357 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3358 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3359 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3360 	} else {
3361 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3362 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3363 		/* 82598 only has a counter in the high register */
3364 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3365 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3366 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3367 	}
3368 
3369 	/*
3370 	 * Workaround: mprc hardware is incorrectly counting
3371 	 * broadcasts, so for now we subtract those.
3372 	 */
3373 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3374 	sc->stats.bprc += bprc;
3375 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3376 	if (hw->mac.type == ixgbe_mac_82598EB)
3377 		sc->stats.mprc -= bprc;
3378 
3379 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3380 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3381 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3382 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3383 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3384 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3385 
3386 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3387 	sc->stats.lxontxc += lxon;
3388 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3389 	sc->stats.lxofftxc += lxoff;
3390 	total = lxon + lxoff;
3391 
3392 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3393 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3394 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3395 	sc->stats.gptc -= total;
3396 	sc->stats.mptc -= total;
3397 	sc->stats.ptc64 -= total;
3398 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3399 
3400 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3401 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3402 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3403 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3404 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3405 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3406 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3407 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3408 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3409 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3410 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3411 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3412 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3413 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3414 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3415 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3416 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3417 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3418 	/* Only read FCOE on 82599 */
3419 	if (hw->mac.type != ixgbe_mac_82598EB) {
3420 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3421 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3422 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3423 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3424 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3425 	}
3426 
3427 	/* Rx Errors */
3428 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3429 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3430 }
3431 
3432 #if 0
3433 /*
3434  * Add sysctl variables, one per statistic, to the system.
3435  */
3436 static void
3437 ix_add_hw_stats(struct ix_softc *sc)
3438 {
3439 
3440 	device_t dev = sc->dev;
3441 
3442 	struct ix_tx_ring *txr = sc->tx_rings;
3443 	struct ix_rx_ring *rxr = sc->rx_rings;
3444 
3445 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3446 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3447 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3448 	struct ixgbe_hw_stats *stats = &sc->stats;
3449 
3450 	struct sysctl_oid *stat_node, *queue_node;
3451 	struct sysctl_oid_list *stat_list, *queue_list;
3452 
3453 #define QUEUE_NAME_LEN 32
3454 	char namebuf[QUEUE_NAME_LEN];
3455 
3456 	/* MAC stats get the own sub node */
3457 
3458 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3459 				    CTLFLAG_RD, NULL, "MAC Statistics");
3460 	stat_list = SYSCTL_CHILDREN(stat_node);
3461 
3462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3463 			CTLFLAG_RD, &stats->crcerrs,
3464 			"CRC Errors");
3465 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3466 			CTLFLAG_RD, &stats->illerrc,
3467 			"Illegal Byte Errors");
3468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3469 			CTLFLAG_RD, &stats->errbc,
3470 			"Byte Errors");
3471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3472 			CTLFLAG_RD, &stats->mspdc,
3473 			"MAC Short Packets Discarded");
3474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3475 			CTLFLAG_RD, &stats->mlfc,
3476 			"MAC Local Faults");
3477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3478 			CTLFLAG_RD, &stats->mrfc,
3479 			"MAC Remote Faults");
3480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3481 			CTLFLAG_RD, &stats->rlec,
3482 			"Receive Length Errors");
3483 
3484 	/* Flow Control stats */
3485 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3486 			CTLFLAG_RD, &stats->lxontxc,
3487 			"Link XON Transmitted");
3488 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3489 			CTLFLAG_RD, &stats->lxonrxc,
3490 			"Link XON Received");
3491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3492 			CTLFLAG_RD, &stats->lxofftxc,
3493 			"Link XOFF Transmitted");
3494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3495 			CTLFLAG_RD, &stats->lxoffrxc,
3496 			"Link XOFF Received");
3497 
3498 	/* Packet Reception Stats */
3499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3500 			CTLFLAG_RD, &stats->tor,
3501 			"Total Octets Received");
3502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3503 			CTLFLAG_RD, &stats->gorc,
3504 			"Good Octets Received");
3505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3506 			CTLFLAG_RD, &stats->tpr,
3507 			"Total Packets Received");
3508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3509 			CTLFLAG_RD, &stats->gprc,
3510 			"Good Packets Received");
3511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3512 			CTLFLAG_RD, &stats->mprc,
3513 			"Multicast Packets Received");
3514 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3515 			CTLFLAG_RD, &stats->bprc,
3516 			"Broadcast Packets Received");
3517 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3518 			CTLFLAG_RD, &stats->prc64,
3519 			"64 byte frames received ");
3520 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3521 			CTLFLAG_RD, &stats->prc127,
3522 			"65-127 byte frames received");
3523 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3524 			CTLFLAG_RD, &stats->prc255,
3525 			"128-255 byte frames received");
3526 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3527 			CTLFLAG_RD, &stats->prc511,
3528 			"256-511 byte frames received");
3529 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3530 			CTLFLAG_RD, &stats->prc1023,
3531 			"512-1023 byte frames received");
3532 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3533 			CTLFLAG_RD, &stats->prc1522,
3534 			"1023-1522 byte frames received");
3535 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3536 			CTLFLAG_RD, &stats->ruc,
3537 			"Receive Undersized");
3538 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3539 			CTLFLAG_RD, &stats->rfc,
3540 			"Fragmented Packets Received ");
3541 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3542 			CTLFLAG_RD, &stats->roc,
3543 			"Oversized Packets Received");
3544 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3545 			CTLFLAG_RD, &stats->rjc,
3546 			"Received Jabber");
3547 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3548 			CTLFLAG_RD, &stats->mngprc,
3549 			"Management Packets Received");
3550 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3551 			CTLFLAG_RD, &stats->mngptc,
3552 			"Management Packets Dropped");
3553 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3554 			CTLFLAG_RD, &stats->xec,
3555 			"Checksum Errors");
3556 
3557 	/* Packet Transmission Stats */
3558 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3559 			CTLFLAG_RD, &stats->gotc,
3560 			"Good Octets Transmitted");
3561 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3562 			CTLFLAG_RD, &stats->tpt,
3563 			"Total Packets Transmitted");
3564 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3565 			CTLFLAG_RD, &stats->gptc,
3566 			"Good Packets Transmitted");
3567 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3568 			CTLFLAG_RD, &stats->bptc,
3569 			"Broadcast Packets Transmitted");
3570 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3571 			CTLFLAG_RD, &stats->mptc,
3572 			"Multicast Packets Transmitted");
3573 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3574 			CTLFLAG_RD, &stats->mngptc,
3575 			"Management Packets Transmitted");
3576 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3577 			CTLFLAG_RD, &stats->ptc64,
3578 			"64 byte frames transmitted ");
3579 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3580 			CTLFLAG_RD, &stats->ptc127,
3581 			"65-127 byte frames transmitted");
3582 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3583 			CTLFLAG_RD, &stats->ptc255,
3584 			"128-255 byte frames transmitted");
3585 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3586 			CTLFLAG_RD, &stats->ptc511,
3587 			"256-511 byte frames transmitted");
3588 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3589 			CTLFLAG_RD, &stats->ptc1023,
3590 			"512-1023 byte frames transmitted");
3591 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3592 			CTLFLAG_RD, &stats->ptc1522,
3593 			"1024-1522 byte frames transmitted");
3594 }
3595 #endif
3596 
3597 /*
3598  * Enable the hardware to drop packets when the buffer is full.
3599  * This is useful when multiple RX rings are used, so that no
3600  * single RX ring being full stalls the entire RX engine.  We
3601  * only enable this when multiple RX rings are used and when
3602  * flow control is disabled.
3603  */
3604 static void
3605 ix_enable_rx_drop(struct ix_softc *sc)
3606 {
3607 	struct ixgbe_hw *hw = &sc->hw;
3608 	int i;
3609 
3610 	if (bootverbose) {
3611 		if_printf(&sc->arpcom.ac_if,
3612 		    "flow control %s, enable RX drop\n",
3613 		    ix_fc2str(sc->hw.fc.current_mode));
3614 	}
3615 
3616 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3617 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3618 
3619 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3620 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3621 	}
3622 }
3623 
3624 static void
3625 ix_disable_rx_drop(struct ix_softc *sc)
3626 {
3627 	struct ixgbe_hw *hw = &sc->hw;
3628 	int i;
3629 
3630 	if (bootverbose) {
3631 		if_printf(&sc->arpcom.ac_if,
3632 		    "flow control %s, disable RX drop\n",
3633 		    ix_fc2str(sc->hw.fc.current_mode));
3634 	}
3635 
3636 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3637 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3638 
3639 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3640 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3641 	}
3642 }
3643 
3644 static void
3645 ix_setup_serialize(struct ix_softc *sc)
3646 {
3647 	int i = 0, j;
3648 
3649 	/* Main + RX + TX */
3650 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3651 	sc->serializes =
3652 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3653 	        M_DEVBUF, M_WAITOK | M_ZERO);
3654 
3655 	/*
3656 	 * Setup serializes
3657 	 *
3658 	 * NOTE: Order is critical
3659 	 */
3660 
3661 	KKASSERT(i < sc->nserialize);
3662 	sc->serializes[i++] = &sc->main_serialize;
3663 
3664 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3665 		KKASSERT(i < sc->nserialize);
3666 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3667 	}
3668 
3669 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3670 		KKASSERT(i < sc->nserialize);
3671 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3672 	}
3673 
3674 	KKASSERT(i == sc->nserialize);
3675 }
3676 
3677 static int
3678 ix_alloc_intr(struct ix_softc *sc)
3679 {
3680 	struct ix_intr_data *intr;
3681 	struct ix_tx_ring *txr;
3682 	u_int intr_flags;
3683 	int i;
3684 
3685 	ix_alloc_msix(sc);
3686 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3687 		ix_set_ring_inuse(sc, FALSE);
3688 		goto done;
3689 	}
3690 
3691 	/*
3692 	 * Reset some settings changed by ix_alloc_msix().
3693 	 */
3694 	if (sc->rx_rmap_intr != NULL) {
3695 		if_ringmap_free(sc->rx_rmap_intr);
3696 		sc->rx_rmap_intr = NULL;
3697 	}
3698 	if (sc->tx_rmap_intr != NULL) {
3699 		if_ringmap_free(sc->tx_rmap_intr);
3700 		sc->tx_rmap_intr = NULL;
3701 	}
3702 	if (sc->intr_data != NULL) {
3703 		kfree(sc->intr_data, M_DEVBUF);
3704 		sc->intr_data = NULL;
3705 	}
3706 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3707 		txr = &sc->tx_rings[i];
3708 		txr->tx_intr_vec = -1;
3709 		txr->tx_intr_cpuid = -1;
3710 	}
3711 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3712 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3713 
3714 		rxr->rx_intr_vec = -1;
3715 		rxr->rx_txr = NULL;
3716 	}
3717 
3718 	sc->intr_cnt = 1;
3719 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3720 	    M_WAITOK | M_ZERO);
3721 	intr = &sc->intr_data[0];
3722 
3723 	/*
3724 	 * Allocate MSI/legacy interrupt resource
3725 	 */
3726 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3727 	    &intr->intr_rid, &intr_flags);
3728 
3729 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3730 	    &intr->intr_rid, intr_flags);
3731 	if (intr->intr_res == NULL) {
3732 		device_printf(sc->dev, "Unable to allocate bus resource: "
3733 		    "interrupt\n");
3734 		return ENXIO;
3735 	}
3736 
3737 	intr->intr_serialize = &sc->main_serialize;
3738 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3739 	intr->intr_func = ix_intr;
3740 	intr->intr_funcarg = sc;
3741 	intr->intr_rate = IX_INTR_RATE;
3742 	intr->intr_use = IX_INTR_USE_RXTX;
3743 
3744 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3745 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3746 
3747 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3748 
3749 	ix_set_ring_inuse(sc, FALSE);
3750 
3751 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3752 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3753 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3754 
3755 		/*
3756 		 * Allocate RX ring map for RSS setup.
3757 		 */
3758 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3759 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3760 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3761 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3762 	}
3763 done:
3764 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3765 		txr = &sc->tx_rings[i];
3766 		if (txr->tx_intr_cpuid < 0)
3767 			txr->tx_intr_cpuid = 0;
3768 	}
3769 	return 0;
3770 }
3771 
3772 static void
3773 ix_free_intr(struct ix_softc *sc)
3774 {
3775 	if (sc->intr_data == NULL)
3776 		return;
3777 
3778 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3779 		struct ix_intr_data *intr = &sc->intr_data[0];
3780 
3781 		KKASSERT(sc->intr_cnt == 1);
3782 		if (intr->intr_res != NULL) {
3783 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3784 			    intr->intr_rid, intr->intr_res);
3785 		}
3786 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3787 			pci_release_msi(sc->dev);
3788 
3789 		kfree(sc->intr_data, M_DEVBUF);
3790 	} else {
3791 		ix_free_msix(sc, TRUE);
3792 	}
3793 }
3794 
3795 static void
3796 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3797 {
3798 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3799 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3800 	if (bootverbose) {
3801 		if_printf(&sc->arpcom.ac_if,
3802 		    "RX rings %d/%d, TX rings %d/%d\n",
3803 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3804 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3805 	}
3806 }
3807 
3808 static int
3809 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3810 {
3811 	if (!IX_ENABLE_HWRSS(sc))
3812 		return 1;
3813 
3814 	if (polling)
3815 		return sc->rx_ring_cnt;
3816 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3817 		return IX_MIN_RXRING_RSS;
3818 	else
3819 		return sc->rx_ring_msix;
3820 }
3821 
3822 static int
3823 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3824 {
3825 	if (!IX_ENABLE_HWTSS(sc))
3826 		return 1;
3827 
3828 	if (polling)
3829 		return sc->tx_ring_cnt;
3830 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3831 		return 1;
3832 	else
3833 		return sc->tx_ring_msix;
3834 }
3835 
3836 static int
3837 ix_setup_intr(struct ix_softc *sc)
3838 {
3839 	int i;
3840 
3841 	for (i = 0; i < sc->intr_cnt; ++i) {
3842 		struct ix_intr_data *intr = &sc->intr_data[i];
3843 		int error;
3844 
3845 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3846 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3847 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3848 		if (error) {
3849 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3850 			ix_teardown_intr(sc, i);
3851 			return error;
3852 		}
3853 	}
3854 	return 0;
3855 }
3856 
3857 static void
3858 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3859 {
3860 	int i;
3861 
3862 	if (sc->intr_data == NULL)
3863 		return;
3864 
3865 	for (i = 0; i < intr_cnt; ++i) {
3866 		struct ix_intr_data *intr = &sc->intr_data[i];
3867 
3868 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3869 	}
3870 }
3871 
3872 static void
3873 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3874 {
3875 	struct ix_softc *sc = ifp->if_softc;
3876 
3877 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3878 }
3879 
3880 static void
3881 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3882 {
3883 	struct ix_softc *sc = ifp->if_softc;
3884 
3885 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3886 }
3887 
3888 static int
3889 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3890 {
3891 	struct ix_softc *sc = ifp->if_softc;
3892 
3893 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3894 }
3895 
3896 #ifdef INVARIANTS
3897 
3898 static void
3899 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3900     boolean_t serialized)
3901 {
3902 	struct ix_softc *sc = ifp->if_softc;
3903 
3904 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3905 	    serialized);
3906 }
3907 
3908 #endif	/* INVARIANTS */
3909 
3910 static void
3911 ix_free_rings(struct ix_softc *sc)
3912 {
3913 	int i;
3914 
3915 	if (sc->tx_rings != NULL) {
3916 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3917 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3918 
3919 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3920 		}
3921 		kfree(sc->tx_rings, M_DEVBUF);
3922 	}
3923 
3924 	if (sc->rx_rings != NULL) {
3925 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3926 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3927 
3928 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3929 		}
3930 		kfree(sc->rx_rings, M_DEVBUF);
3931 	}
3932 
3933 	if (sc->parent_tag != NULL)
3934 		bus_dma_tag_destroy(sc->parent_tag);
3935 }
3936 
3937 static void
3938 ix_watchdog_reset(struct ix_softc *sc)
3939 {
3940 	int i;
3941 
3942 	ASSERT_IFNET_SERIALIZED_ALL(&sc->arpcom.ac_if);
3943 	ix_init(sc);
3944 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3945 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3946 }
3947 
3948 static void
3949 ix_sync_netisr(struct ix_softc *sc, int flags)
3950 {
3951 	struct ifnet *ifp = &sc->arpcom.ac_if;
3952 
3953 	ifnet_serialize_all(ifp);
3954 	if (ifp->if_flags & IFF_RUNNING) {
3955 		ifp->if_flags &= ~(IFF_RUNNING | flags);
3956 	} else {
3957 		ifnet_deserialize_all(ifp);
3958 		return;
3959 	}
3960 	ifnet_deserialize_all(ifp);
3961 
3962 	/* Make sure that polling stopped. */
3963 	netmsg_service_sync();
3964 }
3965 
3966 static void
3967 ix_watchdog_task(void *xsc, int pending __unused)
3968 {
3969 	struct ix_softc *sc = xsc;
3970 	struct ifnet *ifp = &sc->arpcom.ac_if;
3971 
3972 	ix_sync_netisr(sc, 0);
3973 
3974 	ifnet_serialize_all(ifp);
3975 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP)
3976 		ix_watchdog_reset(sc);
3977 	ifnet_deserialize_all(ifp);
3978 }
3979 
3980 static void
3981 ix_watchdog(struct ifaltq_subque *ifsq)
3982 {
3983 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3984 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3985 	struct ix_softc *sc = ifp->if_softc;
3986 
3987 	KKASSERT(txr->tx_ifsq == ifsq);
3988 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3989 
3990 	/*
3991 	 * If the interface has been paused then don't do the watchdog check
3992 	 */
3993 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3994 		txr->tx_watchdog.wd_timer = 5;
3995 		return;
3996 	}
3997 
3998 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3999 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
4000 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
4001 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
4002 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
4003 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
4004 
4005 	if ((ifp->if_flags & (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING)) ==
4006 	    (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING))
4007 		taskqueue_enqueue(taskqueue_thread[0], &sc->wdog_task);
4008 	else
4009 		ix_watchdog_reset(sc);
4010 }
4011 
4012 static void
4013 ix_free_tx_ring(struct ix_tx_ring *txr)
4014 {
4015 	int i;
4016 
4017 	for (i = 0; i < txr->tx_ndesc; ++i) {
4018 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
4019 
4020 		if (txbuf->m_head != NULL) {
4021 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
4022 			m_freem(txbuf->m_head);
4023 			txbuf->m_head = NULL;
4024 		}
4025 	}
4026 }
4027 
4028 static void
4029 ix_free_rx_ring(struct ix_rx_ring *rxr)
4030 {
4031 	int i;
4032 
4033 	for (i = 0; i < rxr->rx_ndesc; ++i) {
4034 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
4035 
4036 		if (rxbuf->fmp != NULL) {
4037 			m_freem(rxbuf->fmp);
4038 			rxbuf->fmp = NULL;
4039 			rxbuf->lmp = NULL;
4040 		} else {
4041 			KKASSERT(rxbuf->lmp == NULL);
4042 		}
4043 		if (rxbuf->m_head != NULL) {
4044 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4045 			m_freem(rxbuf->m_head);
4046 			rxbuf->m_head = NULL;
4047 		}
4048 	}
4049 }
4050 
4051 static int
4052 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
4053 {
4054 	struct mbuf *m;
4055 	bus_dma_segment_t seg;
4056 	bus_dmamap_t map;
4057 	struct ix_rx_buf *rxbuf;
4058 	int flags, error, nseg;
4059 
4060 	flags = M_NOWAIT;
4061 	if (__predict_false(wait))
4062 		flags = M_WAITOK;
4063 
4064 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4065 	if (m == NULL) {
4066 		if (wait) {
4067 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4068 			    "Unable to allocate RX mbuf\n");
4069 		}
4070 		return ENOBUFS;
4071 	}
4072 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4073 
4074 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4075 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4076 	if (error) {
4077 		m_freem(m);
4078 		if (wait) {
4079 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4080 			    "Unable to load RX mbuf\n");
4081 		}
4082 		return error;
4083 	}
4084 
4085 	rxbuf = &rxr->rx_buf[i];
4086 	if (rxbuf->m_head != NULL)
4087 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4088 
4089 	map = rxbuf->map;
4090 	rxbuf->map = rxr->rx_sparemap;
4091 	rxr->rx_sparemap = map;
4092 
4093 	rxbuf->m_head = m;
4094 	rxbuf->paddr = seg.ds_addr;
4095 
4096 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4097 	return 0;
4098 }
4099 
4100 static void
4101 ix_add_sysctl(struct ix_softc *sc)
4102 {
4103 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4104 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4105 #ifdef IX_RSS_DEBUG
4106 	char node[32];
4107 	int i;
4108 #endif
4109 
4110 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4111 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4112 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4113 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4114 	    "# of RX rings used");
4115 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4116 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4117 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4118 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4119 	    "# of TX rings used");
4120 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4121 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4122 	    sc, 0, ix_sysctl_rxd, "I",
4123 	    "# of RX descs");
4124 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4125 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4126 	    sc, 0, ix_sysctl_txd, "I",
4127 	    "# of TX descs");
4128 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4129 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4130 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4131 	    "# of segments sent before write to hardware register");
4132 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4133 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4134 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4135 	    "# of received segments sent before write to hardware register");
4136 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4137 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4138 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4139 	    "# of segments per TX interrupt");
4140 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4141 	    OID_AUTO, "direct_input", CTLFLAG_RW, &sc->direct_input, 0,
4142 	    "Enable direct input");
4143 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4144 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4145 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4146 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4147 		    "TX MSI-X CPU map");
4148 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4149 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4150 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4151 		    "RX MSI-X CPU map");
4152 	}
4153 #ifdef IFPOLL_ENABLE
4154 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4155 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4156 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4157 	    "TX polling CPU map");
4158 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4159 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4160 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4161 	    "RX polling CPU map");
4162 #endif
4163 
4164 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4165 do { \
4166 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4167 	    ix_sysctl_##name, #use " interrupt rate"); \
4168 } while (0)
4169 
4170 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4171 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4172 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4173 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4174 
4175 #undef IX_ADD_INTR_RATE_SYSCTL
4176 
4177 #ifdef IX_RSS_DEBUG
4178 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4179 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4180 	    "RSS debug level");
4181 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4182 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4183 		SYSCTL_ADD_ULONG(ctx,
4184 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4185 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4186 	}
4187 #endif
4188 
4189 #if 0
4190 	ix_add_hw_stats(sc);
4191 #endif
4192 
4193 }
4194 
4195 static int
4196 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4197 {
4198 	struct ix_softc *sc = (void *)arg1;
4199 	struct ifnet *ifp = &sc->arpcom.ac_if;
4200 	int error, nsegs, i;
4201 
4202 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4203 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4204 	if (error || req->newptr == NULL)
4205 		return error;
4206 	if (nsegs < 0)
4207 		return EINVAL;
4208 
4209 	ifnet_serialize_all(ifp);
4210 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4211 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4212 	ifnet_deserialize_all(ifp);
4213 
4214 	return 0;
4215 }
4216 
4217 static int
4218 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4219 {
4220 	struct ix_softc *sc = (void *)arg1;
4221 	struct ifnet *ifp = &sc->arpcom.ac_if;
4222 	int error, nsegs, i;
4223 
4224 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4225 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4226 	if (error || req->newptr == NULL)
4227 		return error;
4228 	if (nsegs < 0)
4229 		return EINVAL;
4230 
4231 	ifnet_serialize_all(ifp);
4232 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4233 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4234 	ifnet_deserialize_all(ifp);
4235 
4236 	return 0;
4237 }
4238 
4239 static int
4240 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4241 {
4242 	struct ix_softc *sc = (void *)arg1;
4243 	int txd;
4244 
4245 	txd = sc->tx_rings[0].tx_ndesc;
4246 	return sysctl_handle_int(oidp, &txd, 0, req);
4247 }
4248 
4249 static int
4250 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4251 {
4252 	struct ix_softc *sc = (void *)arg1;
4253 	int rxd;
4254 
4255 	rxd = sc->rx_rings[0].rx_ndesc;
4256 	return sysctl_handle_int(oidp, &rxd, 0, req);
4257 }
4258 
4259 static int
4260 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4261 {
4262 	struct ix_softc *sc = (void *)arg1;
4263 	struct ifnet *ifp = &sc->arpcom.ac_if;
4264 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4265 	int error, nsegs;
4266 
4267 	nsegs = txr->tx_intr_nsegs;
4268 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4269 	if (error || req->newptr == NULL)
4270 		return error;
4271 	if (nsegs < 0)
4272 		return EINVAL;
4273 
4274 	ifnet_serialize_all(ifp);
4275 
4276 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4277 		error = EINVAL;
4278 	} else {
4279 		int i;
4280 
4281 		error = 0;
4282 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4283 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4284 	}
4285 
4286 	ifnet_deserialize_all(ifp);
4287 
4288 	return error;
4289 }
4290 
4291 static void
4292 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4293 {
4294 	uint32_t eitr, eitr_intvl;
4295 
4296 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4297 	eitr_intvl = 1000000000 / 256 / rate;
4298 
4299 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4300 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4301 		if (eitr_intvl == 0)
4302 			eitr_intvl = 1;
4303 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4304 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4305 	} else {
4306 		eitr &= ~IX_EITR_INTVL_MASK;
4307 
4308 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4309 		if (eitr_intvl == 0)
4310 			eitr_intvl = IX_EITR_INTVL_MIN;
4311 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4312 			eitr_intvl = IX_EITR_INTVL_MAX;
4313 	}
4314 	eitr |= eitr_intvl;
4315 
4316 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4317 }
4318 
4319 static int
4320 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4321 {
4322 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4323 }
4324 
4325 static int
4326 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4327 {
4328 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4329 }
4330 
4331 static int
4332 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4333 {
4334 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4335 }
4336 
4337 static int
4338 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4339 {
4340 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4341 }
4342 
4343 static int
4344 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4345 {
4346 	struct ix_softc *sc = (void *)arg1;
4347 	struct ifnet *ifp = &sc->arpcom.ac_if;
4348 	int error, rate, i;
4349 
4350 	rate = 0;
4351 	for (i = 0; i < sc->intr_cnt; ++i) {
4352 		if (sc->intr_data[i].intr_use == use) {
4353 			rate = sc->intr_data[i].intr_rate;
4354 			break;
4355 		}
4356 	}
4357 
4358 	error = sysctl_handle_int(oidp, &rate, 0, req);
4359 	if (error || req->newptr == NULL)
4360 		return error;
4361 	if (rate <= 0)
4362 		return EINVAL;
4363 
4364 	ifnet_serialize_all(ifp);
4365 
4366 	for (i = 0; i < sc->intr_cnt; ++i) {
4367 		if (sc->intr_data[i].intr_use == use) {
4368 			sc->intr_data[i].intr_rate = rate;
4369 			if (ifp->if_flags & IFF_RUNNING)
4370 				ix_set_eitr(sc, i, rate);
4371 		}
4372 	}
4373 
4374 	ifnet_deserialize_all(ifp);
4375 
4376 	return error;
4377 }
4378 
4379 static void
4380 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4381     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4382 {
4383 	int i;
4384 
4385 	for (i = 0; i < sc->intr_cnt; ++i) {
4386 		if (sc->intr_data[i].intr_use == use) {
4387 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4388 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4389 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4390 			    sc, 0, handler, "I", desc);
4391 			break;
4392 		}
4393 	}
4394 }
4395 
4396 static void
4397 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4398 {
4399 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4400 		sc->timer_cpuid = 0; /* XXX fixed */
4401 	else
4402 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4403 }
4404 
4405 static void
4406 ix_alloc_msix(struct ix_softc *sc)
4407 {
4408 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4409 	struct ix_intr_data *intr;
4410 	int i, x, error;
4411 	int ring_cnt, ring_cntmax;
4412 	boolean_t setup = FALSE;
4413 
4414 	msix_enable = ix_msix_enable;
4415 	/*
4416 	 * Don't enable MSI-X on 82598 by default, see:
4417 	 * 82598 specification update errata #38
4418 	 */
4419 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4420 		msix_enable = 0;
4421 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4422 	if (!msix_enable)
4423 		return;
4424 
4425 	msix_cnt = pci_msix_count(sc->dev);
4426 #ifdef IX_MSIX_DEBUG
4427 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4428 #endif
4429 	if (msix_cnt <= 1) {
4430 		/* One MSI-X model does not make sense. */
4431 		return;
4432 	}
4433 
4434 	/*
4435 	 * Make sure that we don't break interrupt related registers
4436 	 * (EIMS, etc) limitation.
4437 	 */
4438 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4439 		if (msix_cnt > IX_MAX_MSIX_82598)
4440 			msix_cnt = IX_MAX_MSIX_82598;
4441 	} else {
4442 		if (msix_cnt > IX_MAX_MSIX)
4443 			msix_cnt = IX_MAX_MSIX;
4444 	}
4445 	if (bootverbose)
4446 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4447 	msix_ring = msix_cnt - 1; /* -1 for status */
4448 
4449 	/*
4450 	 * Configure # of RX/TX rings usable by MSI-X.
4451 	 */
4452 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4453 	if (ring_cntmax > msix_ring)
4454 		ring_cntmax = msix_ring;
4455 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4456 
4457 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4458 	if (ring_cntmax > msix_ring)
4459 		ring_cntmax = msix_ring;
4460 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4461 
4462 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4463 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4464 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4465 	    ("total RX ring count %d, MSI-X RX ring count %d",
4466 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4467 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4468 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4469 	    ("total TX ring count %d, MSI-X TX ring count %d",
4470 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4471 
4472 	/*
4473 	 * Aggregate TX/RX MSI-X
4474 	 */
4475 	ring_cntmax = sc->rx_ring_msix;
4476 	if (ring_cntmax < sc->tx_ring_msix)
4477 		ring_cntmax = sc->tx_ring_msix;
4478 	KASSERT(ring_cntmax <= msix_ring,
4479 	    ("invalid ring count max %d, MSI-X count for rings %d",
4480 	     ring_cntmax, msix_ring));
4481 
4482 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4483 	if (bootverbose) {
4484 		device_printf(sc->dev, "MSI-X alloc %d, "
4485 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4486 		    sc->rx_ring_msix, sc->tx_ring_msix);
4487 	}
4488 
4489 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4490 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4491 	    &sc->msix_mem_rid, RF_ACTIVE);
4492 	if (sc->msix_mem_res == NULL) {
4493 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4494 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4495 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4496 		if (sc->msix_mem_res == NULL) {
4497 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4498 			return;
4499 		}
4500 	}
4501 
4502 	sc->intr_cnt = alloc_cnt;
4503 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4504 	    M_DEVBUF, M_WAITOK | M_ZERO);
4505 	for (x = 0; x < sc->intr_cnt; ++x) {
4506 		intr = &sc->intr_data[x];
4507 		intr->intr_rid = -1;
4508 		intr->intr_rate = IX_INTR_RATE;
4509 	}
4510 
4511 	x = 0;
4512 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4513 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4514 		struct ix_tx_ring *txr = NULL;
4515 		int cpuid, j;
4516 
4517 		KKASSERT(x < sc->intr_cnt);
4518 		rxr->rx_intr_vec = x;
4519 		ix_setup_msix_eims(sc, x,
4520 		    &rxr->rx_eims, &rxr->rx_eims_val);
4521 
4522 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4523 
4524 		/*
4525 		 * Try finding TX ring to piggyback.
4526 		 */
4527 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4528 			if (cpuid ==
4529 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4530 				txr = &sc->tx_rings[j];
4531 				KKASSERT(txr->tx_intr_cpuid < 0);
4532 				break;
4533 			}
4534 		}
4535 		rxr->rx_txr = txr;
4536 
4537 		intr = &sc->intr_data[x++];
4538 		intr->intr_serialize = &rxr->rx_serialize;
4539 		if (txr != NULL) {
4540 			ksnprintf(intr->intr_desc0,
4541 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4542 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4543 			intr->intr_use = IX_INTR_USE_RXTX;
4544 			intr->intr_func = ix_msix_rxtx;
4545 		} else {
4546 			ksnprintf(intr->intr_desc0,
4547 			    sizeof(intr->intr_desc0), "%s rx%d",
4548 			    device_get_nameunit(sc->dev), i);
4549 			intr->intr_rate = IX_MSIX_RX_RATE;
4550 			intr->intr_use = IX_INTR_USE_RX;
4551 			intr->intr_func = ix_msix_rx;
4552 		}
4553 		intr->intr_funcarg = rxr;
4554 		intr->intr_cpuid = cpuid;
4555 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4556 		intr->intr_desc = intr->intr_desc0;
4557 
4558 		if (txr != NULL) {
4559 			txr->tx_intr_cpuid = intr->intr_cpuid;
4560 			/* NOTE: Leave TX ring's intr_vec negative. */
4561 		}
4562 	}
4563 
4564 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4565 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4566 
4567 		if (txr->tx_intr_cpuid >= 0) {
4568 			/* Piggybacked by RX ring. */
4569 			continue;
4570 		}
4571 
4572 		KKASSERT(x < sc->intr_cnt);
4573 		txr->tx_intr_vec = x;
4574 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4575 
4576 		intr = &sc->intr_data[x++];
4577 		intr->intr_serialize = &txr->tx_serialize;
4578 		intr->intr_rate = IX_MSIX_TX_RATE;
4579 		intr->intr_use = IX_INTR_USE_TX;
4580 		intr->intr_func = ix_msix_tx;
4581 		intr->intr_funcarg = txr;
4582 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4583 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4584 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4585 		    device_get_nameunit(sc->dev), i);
4586 		intr->intr_desc = intr->intr_desc0;
4587 
4588 		txr->tx_intr_cpuid = intr->intr_cpuid;
4589 	}
4590 
4591 	/*
4592 	 * Status MSI-X
4593 	 */
4594 	KKASSERT(x < sc->intr_cnt);
4595 	sc->sts_msix_vec = x;
4596 
4597 	intr = &sc->intr_data[x++];
4598 
4599 	intr->intr_serialize = &sc->main_serialize;
4600 	intr->intr_func = ix_msix_status;
4601 	intr->intr_funcarg = sc;
4602 	intr->intr_cpuid = 0;
4603 	intr->intr_use = IX_INTR_USE_STATUS;
4604 
4605 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4606 	    device_get_nameunit(sc->dev));
4607 	intr->intr_desc = intr->intr_desc0;
4608 
4609 	KKASSERT(x == sc->intr_cnt);
4610 
4611 	error = pci_setup_msix(sc->dev);
4612 	if (error) {
4613 		device_printf(sc->dev, "Setup MSI-X failed\n");
4614 		goto back;
4615 	}
4616 	setup = TRUE;
4617 
4618 	for (i = 0; i < sc->intr_cnt; ++i) {
4619 		intr = &sc->intr_data[i];
4620 
4621 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4622 		    intr->intr_cpuid);
4623 		if (error) {
4624 			device_printf(sc->dev,
4625 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4626 			    intr->intr_cpuid);
4627 			goto back;
4628 		}
4629 
4630 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4631 		    &intr->intr_rid, RF_ACTIVE);
4632 		if (intr->intr_res == NULL) {
4633 			device_printf(sc->dev,
4634 			    "Unable to allocate MSI-X %d resource\n", i);
4635 			error = ENOMEM;
4636 			goto back;
4637 		}
4638 	}
4639 
4640 	pci_enable_msix(sc->dev);
4641 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4642 back:
4643 	if (error)
4644 		ix_free_msix(sc, setup);
4645 }
4646 
4647 static void
4648 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4649 {
4650 	int i;
4651 
4652 	KKASSERT(sc->intr_cnt > 1);
4653 
4654 	for (i = 0; i < sc->intr_cnt; ++i) {
4655 		struct ix_intr_data *intr = &sc->intr_data[i];
4656 
4657 		if (intr->intr_res != NULL) {
4658 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4659 			    intr->intr_rid, intr->intr_res);
4660 		}
4661 		if (intr->intr_rid >= 0)
4662 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4663 	}
4664 	if (setup)
4665 		pci_teardown_msix(sc->dev);
4666 
4667 	sc->intr_cnt = 0;
4668 	kfree(sc->intr_data, M_DEVBUF);
4669 	sc->intr_data = NULL;
4670 }
4671 
4672 static void
4673 ix_msix_rx(void *xrxr)
4674 {
4675 	struct ix_rx_ring *rxr = xrxr;
4676 
4677 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4678 
4679 	ix_rxeof(rxr, -1);
4680 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4681 }
4682 
4683 static void
4684 ix_msix_tx(void *xtxr)
4685 {
4686 	struct ix_tx_ring *txr = xtxr;
4687 
4688 	ASSERT_SERIALIZED(&txr->tx_serialize);
4689 
4690 	ix_txeof(txr, *(txr->tx_hdr));
4691 	if (!ifsq_is_empty(txr->tx_ifsq))
4692 		ifsq_devstart(txr->tx_ifsq);
4693 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4694 }
4695 
4696 static void
4697 ix_msix_rxtx(void *xrxr)
4698 {
4699 	struct ix_rx_ring *rxr = xrxr;
4700 	struct ix_tx_ring *txr;
4701 	int hdr;
4702 
4703 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4704 
4705 	ix_rxeof(rxr, -1);
4706 
4707 	/*
4708 	 * NOTE:
4709 	 * Since tx_next_clean is only changed by ix_txeof(),
4710 	 * which is called only in interrupt handler, the
4711 	 * check w/o holding tx serializer is MPSAFE.
4712 	 */
4713 	txr = rxr->rx_txr;
4714 	hdr = *(txr->tx_hdr);
4715 	if (hdr != txr->tx_next_clean) {
4716 		lwkt_serialize_enter(&txr->tx_serialize);
4717 		ix_txeof(txr, hdr);
4718 		if (!ifsq_is_empty(txr->tx_ifsq))
4719 			ifsq_devstart(txr->tx_ifsq);
4720 		lwkt_serialize_exit(&txr->tx_serialize);
4721 	}
4722 
4723 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4724 }
4725 
4726 static void
4727 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4728 {
4729 	struct ixgbe_hw *hw = &sc->hw;
4730 
4731 	/* Link status change */
4732 	if (eicr & IXGBE_EICR_LSC)
4733 		ix_handle_link(sc);
4734 
4735 	if (hw->mac.type != ixgbe_mac_82598EB) {
4736 		if (eicr & IXGBE_EICR_ECC)
4737 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4738 
4739 		/* Check for over temp condition */
4740 		if (eicr & IXGBE_EICR_TS) {
4741 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4742 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4743 		}
4744 	}
4745 
4746 	if (ix_is_sfp(hw)) {
4747 		uint32_t mod_mask;
4748 
4749 		/* Pluggable optics-related interrupt */
4750 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4751 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4752 		else
4753 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4754 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4755 			ix_handle_msf(sc);
4756 		else if (eicr & mod_mask)
4757 			ix_handle_mod(sc);
4758 	}
4759 
4760 	/* Check for fan failure */
4761 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4762 	    (eicr & IXGBE_EICR_GPI_SDP1))
4763 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4764 
4765 	/* External PHY interrupt */
4766 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4767 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4768 	    	ix_handle_phy(sc);
4769 }
4770 
4771 static void
4772 ix_msix_status(void *xsc)
4773 {
4774 	struct ix_softc *sc = xsc;
4775 	uint32_t eicr;
4776 
4777 	ASSERT_SERIALIZED(&sc->main_serialize);
4778 
4779 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4780 	ix_intr_status(sc, eicr);
4781 
4782 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4783 }
4784 
4785 static void
4786 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4787     uint32_t *eims, uint32_t *eims_val)
4788 {
4789 	if (x < 32) {
4790 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4791 			KASSERT(x < IX_MAX_MSIX_82598,
4792 			    ("%s: invalid vector %d for 82598",
4793 			     device_get_nameunit(sc->dev), x));
4794 			*eims = IXGBE_EIMS;
4795 		} else {
4796 			*eims = IXGBE_EIMS_EX(0);
4797 		}
4798 		*eims_val = 1 << x;
4799 	} else {
4800 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4801 		    device_get_nameunit(sc->dev), x));
4802 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4803 		    ("%s: invalid vector %d for 82598",
4804 		     device_get_nameunit(sc->dev), x));
4805 		*eims = IXGBE_EIMS_EX(1);
4806 		*eims_val = 1 << (x - 32);
4807 	}
4808 }
4809 
4810 #ifdef IFPOLL_ENABLE
4811 
4812 static void
4813 ix_npoll_status(struct ifnet *ifp)
4814 {
4815 	struct ix_softc *sc = ifp->if_softc;
4816 	uint32_t eicr;
4817 
4818 	ASSERT_SERIALIZED(&sc->main_serialize);
4819 
4820 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4821 	ix_intr_status(sc, eicr);
4822 }
4823 
4824 static void
4825 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4826 {
4827 	struct ix_tx_ring *txr = arg;
4828 
4829 	ASSERT_SERIALIZED(&txr->tx_serialize);
4830 
4831 	ix_txeof(txr, *(txr->tx_hdr));
4832 	if (!ifsq_is_empty(txr->tx_ifsq))
4833 		ifsq_devstart(txr->tx_ifsq);
4834 }
4835 
4836 static void
4837 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4838 {
4839 	struct ix_rx_ring *rxr = arg;
4840 
4841 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4842 	ix_rxeof(rxr, cycle);
4843 }
4844 
4845 static void
4846 ix_npoll_rx_direct(struct ifnet *ifp __unused, void *arg, int cycle)
4847 {
4848 	struct ix_rx_ring *rxr = arg;
4849 
4850 	ASSERT_NOT_SERIALIZED(&rxr->rx_serialize);
4851 	ix_rxeof(rxr, cycle);
4852 }
4853 
4854 static void
4855 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4856 {
4857 	struct ix_softc *sc = ifp->if_softc;
4858 	int i, txr_cnt, rxr_cnt, idirect;
4859 
4860 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4861 
4862 	idirect = sc->direct_input;
4863 	cpu_ccfence();
4864 
4865 	if (info) {
4866 		int cpu;
4867 
4868 		info->ifpi_status.status_func = ix_npoll_status;
4869 		info->ifpi_status.serializer = &sc->main_serialize;
4870 
4871 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4872 		for (i = 0; i < txr_cnt; ++i) {
4873 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4874 
4875 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
4876 			KKASSERT(cpu < netisr_ncpus);
4877 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
4878 			info->ifpi_tx[cpu].arg = txr;
4879 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
4880 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
4881 		}
4882 
4883 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4884 		for (i = 0; i < rxr_cnt; ++i) {
4885 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4886 
4887 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
4888 			KKASSERT(cpu < netisr_ncpus);
4889 			info->ifpi_rx[cpu].arg = rxr;
4890 			if (idirect) {
4891 				info->ifpi_rx[cpu].poll_func =
4892 				    ix_npoll_rx_direct;
4893 				info->ifpi_rx[cpu].serializer = NULL;
4894 			} else {
4895 				info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
4896 				info->ifpi_rx[cpu].serializer =
4897 				    &rxr->rx_serialize;
4898 			}
4899 		}
4900 		if (idirect)
4901 			ifp->if_flags |= IFF_IDIRECT;
4902 	} else {
4903 		ifp->if_flags &= ~IFF_IDIRECT;
4904 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4905 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4906 
4907 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4908 		}
4909 	}
4910 	if (ifp->if_flags & IFF_RUNNING)
4911 		ix_init(sc);
4912 }
4913 
4914 #endif /* IFPOLL_ENABLE */
4915 
4916 static enum ixgbe_fc_mode
4917 ix_ifmedia2fc(int ifm)
4918 {
4919 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4920 
4921 	switch (fc_opt) {
4922 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4923 		return ixgbe_fc_full;
4924 
4925 	case IFM_ETH_RXPAUSE:
4926 		return ixgbe_fc_rx_pause;
4927 
4928 	case IFM_ETH_TXPAUSE:
4929 		return ixgbe_fc_tx_pause;
4930 
4931 	default:
4932 		return ixgbe_fc_none;
4933 	}
4934 }
4935 
4936 static const char *
4937 ix_ifmedia2str(int ifm)
4938 {
4939 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4940 
4941 	switch (fc_opt) {
4942 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4943 		return IFM_ETH_FC_FULL;
4944 
4945 	case IFM_ETH_RXPAUSE:
4946 		return IFM_ETH_FC_RXPAUSE;
4947 
4948 	case IFM_ETH_TXPAUSE:
4949 		return IFM_ETH_FC_TXPAUSE;
4950 
4951 	default:
4952 		return IFM_ETH_FC_NONE;
4953 	}
4954 }
4955 
4956 static const char *
4957 ix_fc2str(enum ixgbe_fc_mode fc)
4958 {
4959 	switch (fc) {
4960 	case ixgbe_fc_full:
4961 		return IFM_ETH_FC_FULL;
4962 
4963 	case ixgbe_fc_rx_pause:
4964 		return IFM_ETH_FC_RXPAUSE;
4965 
4966 	case ixgbe_fc_tx_pause:
4967 		return IFM_ETH_FC_TXPAUSE;
4968 
4969 	default:
4970 		return IFM_ETH_FC_NONE;
4971 	}
4972 }
4973 
4974 static int
4975 ix_powerdown(struct ix_softc *sc)
4976 {
4977 	struct ixgbe_hw *hw = &sc->hw;
4978 	int error = 0;
4979 
4980 	/* Limit power managment flow to X550EM baseT */
4981 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4982 	    hw->phy.ops.enter_lplu) {
4983 		/* Turn off support for APM wakeup. (Using ACPI instead) */
4984 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
4985 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
4986 
4987 		/*
4988 		 * Clear Wake Up Status register to prevent any previous wakeup
4989 		 * events from waking us up immediately after we suspend.
4990 		 */
4991 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
4992 
4993 		/*
4994 		 * Program the Wakeup Filter Control register with user filter
4995 		 * settings
4996 		 */
4997 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
4998 
4999 		/* Enable wakeups and power management in Wakeup Control */
5000 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5001 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5002 
5003 		/* X550EM baseT adapters need a special LPLU flow */
5004 		hw->phy.reset_disable = true;
5005 		ix_stop(sc);
5006 		error = hw->phy.ops.enter_lplu(hw);
5007 		if (error) {
5008 			if_printf(&sc->arpcom.ac_if,
5009 			    "Error entering LPLU: %d\n", error);
5010 		}
5011 		hw->phy.reset_disable = false;
5012 	} else {
5013 		/* Just stop for other adapters */
5014 		ix_stop(sc);
5015 	}
5016 	return error;
5017 }
5018 
5019 static void
5020 ix_config_flowctrl(struct ix_softc *sc)
5021 {
5022 	struct ixgbe_hw *hw = &sc->hw;
5023 	uint32_t rxpb, frame, size, tmp;
5024 
5025 	frame = sc->max_frame_size;
5026 
5027 	/* Calculate High Water */
5028 	switch (hw->mac.type) {
5029 	case ixgbe_mac_X540:
5030 	case ixgbe_mac_X550:
5031 	case ixgbe_mac_X550EM_a:
5032 	case ixgbe_mac_X550EM_x:
5033 		tmp = IXGBE_DV_X540(frame, frame);
5034 		break;
5035 	default:
5036 		tmp = IXGBE_DV(frame, frame);
5037 		break;
5038 	}
5039 	size = IXGBE_BT2KB(tmp);
5040 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5041 	hw->fc.high_water[0] = rxpb - size;
5042 
5043 	/* Now calculate Low Water */
5044 	switch (hw->mac.type) {
5045 	case ixgbe_mac_X540:
5046 	case ixgbe_mac_X550:
5047 	case ixgbe_mac_X550EM_a:
5048 	case ixgbe_mac_X550EM_x:
5049 		tmp = IXGBE_LOW_DV_X540(frame);
5050 		break;
5051 	default:
5052 		tmp = IXGBE_LOW_DV(frame);
5053 		break;
5054 	}
5055 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5056 
5057 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5058 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5059 		hw->fc.disable_fc_autoneg = TRUE;
5060 	else
5061 		hw->fc.disable_fc_autoneg = FALSE;
5062 	hw->fc.pause_time = IX_FC_PAUSE;
5063 	hw->fc.send_xon = TRUE;
5064 }
5065 
5066 static void
5067 ix_config_dmac(struct ix_softc *sc)
5068 {
5069 	struct ixgbe_hw *hw = &sc->hw;
5070 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5071 
5072 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5073 		return;
5074 
5075 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5076 	    (dcfg->link_speed ^ sc->link_speed)) {
5077 		dcfg->watchdog_timer = sc->dmac;
5078 		dcfg->fcoe_en = false;
5079 		dcfg->link_speed = sc->link_speed;
5080 		dcfg->num_tcs = 1;
5081 
5082 		if (bootverbose) {
5083 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5084 			    "watchdog %d, link speed %d\n",
5085 			    dcfg->watchdog_timer, dcfg->link_speed);
5086 		}
5087 
5088 		hw->mac.ops.dmac_config(hw);
5089 	}
5090 }
5091 
5092 static void
5093 ix_init_media(struct ix_softc *sc)
5094 {
5095 	struct ixgbe_hw *hw = &sc->hw;
5096 	int layer, msf_ifm = IFM_NONE;
5097 
5098 	ifmedia_removeall(&sc->media);
5099 
5100 	layer = ixgbe_get_supported_physical_layer(hw);
5101 
5102 	/*
5103 	 * Media types with matching DragonFlyBSD media defines
5104 	 */
5105 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5106 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5107 		    0, NULL);
5108 	}
5109 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5110 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5111 		    0, NULL);
5112 	}
5113 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5114 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5115 		    0, NULL);
5116 		/* No half-duplex support */
5117 	}
5118 
5119 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5120 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5121 		    0, NULL);
5122 		msf_ifm = IFM_1000_LX;
5123 	}
5124 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5125 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5126 		    0, NULL);
5127 		msf_ifm = IFM_1000_LX;
5128 	}
5129 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5130 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5131 		    0, NULL);
5132 		msf_ifm = IFM_1000_SX;
5133 	}
5134 
5135 	/* Add media for multispeed fiber */
5136 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5137 		uint32_t linkcap;
5138 		bool autoneg;
5139 
5140 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5141 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5142 			ifmedia_add_nodup(&sc->media,
5143 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5144 	}
5145 
5146 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5147 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5148 		ifmedia_add_nodup(&sc->media,
5149 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5150 	}
5151 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5152 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5153 		    0, NULL);
5154 	}
5155 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5156 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5157 		    0, NULL);
5158 	}
5159 
5160 	/*
5161 	 * XXX Other (no matching DragonFlyBSD media type):
5162 	 * To workaround this, we'll assign these completely
5163 	 * inappropriate media types.
5164 	 */
5165 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5166 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5167 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5168 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5169 		    0, NULL);
5170 	}
5171 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5172 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5173 		if_printf(&sc->arpcom.ac_if,
5174 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5175 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5176 		    0, NULL);
5177 	}
5178 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5179 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5180 		if_printf(&sc->arpcom.ac_if,
5181 		    "1000baseKX mapped to 1000baseCX\n");
5182 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5183 		    0, NULL);
5184 	}
5185 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5186 		/* Someday, someone will care about you... */
5187 		if_printf(&sc->arpcom.ac_if,
5188 		    "Media supported: 1000baseBX, ignored\n");
5189 	}
5190 
5191 	/* XXX we probably don't need this */
5192 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5193 		ifmedia_add_nodup(&sc->media,
5194 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5195 	}
5196 
5197 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5198 
5199 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5200 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5201 
5202 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5203 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5204 		ifmedia_set(&sc->media, sc->ifm_media);
5205 	}
5206 }
5207