xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision 11ef46fd)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
75 
76 #ifdef IX_RSS_DEBUG
77 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
78 do { \
79 	if (sc->rss_debug >= lvl) \
80 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
81 } while (0)
82 #else	/* !IX_RSS_DEBUG */
83 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
84 #endif	/* IX_RSS_DEBUG */
85 
86 #define IX_NAME			"Intel(R) PRO/10GbE "
87 #define IX_DEVICE(id) \
88 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
89 #define IX_DEVICE_NULL		{ 0, 0, NULL }
90 
91 static struct ix_device {
92 	uint16_t	vid;
93 	uint16_t	did;
94 	const char	*desc;
95 } ix_devices[] = {
96 	IX_DEVICE(82598AF_DUAL_PORT),
97 	IX_DEVICE(82598AF_SINGLE_PORT),
98 	IX_DEVICE(82598EB_CX4),
99 	IX_DEVICE(82598AT),
100 	IX_DEVICE(82598AT2),
101 	IX_DEVICE(82598),
102 	IX_DEVICE(82598_DA_DUAL_PORT),
103 	IX_DEVICE(82598_CX4_DUAL_PORT),
104 	IX_DEVICE(82598EB_XF_LR),
105 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
106 	IX_DEVICE(82598EB_SFP_LOM),
107 	IX_DEVICE(82599_KX4),
108 	IX_DEVICE(82599_KX4_MEZZ),
109 	IX_DEVICE(82599_SFP),
110 	IX_DEVICE(82599_XAUI_LOM),
111 	IX_DEVICE(82599_CX4),
112 	IX_DEVICE(82599_T3_LOM),
113 	IX_DEVICE(82599_COMBO_BACKPLANE),
114 	IX_DEVICE(82599_BACKPLANE_FCOE),
115 	IX_DEVICE(82599_SFP_SF2),
116 	IX_DEVICE(82599_SFP_FCOE),
117 	IX_DEVICE(82599EN_SFP),
118 	IX_DEVICE(82599_SFP_SF_QP),
119 	IX_DEVICE(82599_QSFP_SF_QP),
120 	IX_DEVICE(X540T),
121 	IX_DEVICE(X540T1),
122 	IX_DEVICE(X550T),
123 	IX_DEVICE(X550EM_X_KR),
124 	IX_DEVICE(X550EM_X_KX4),
125 	IX_DEVICE(X550EM_X_10G_T),
126 
127 	/* required last entry */
128 	IX_DEVICE_NULL
129 };
130 
131 static int	ix_probe(device_t);
132 static int	ix_attach(device_t);
133 static int	ix_detach(device_t);
134 static int	ix_shutdown(device_t);
135 
136 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
137 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
138 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
139 #ifdef INVARIANTS
140 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
141 		    boolean_t);
142 #endif
143 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
144 static void	ix_watchdog(struct ifaltq_subque *);
145 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
146 static void	ix_init(void *);
147 static void	ix_stop(struct ix_softc *);
148 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
149 static int	ix_media_change(struct ifnet *);
150 static void	ix_timer(void *);
151 #ifdef IFPOLL_ENABLE
152 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
153 static void	ix_npoll_rx(struct ifnet *, void *, int);
154 static void	ix_npoll_tx(struct ifnet *, void *, int);
155 static void	ix_npoll_status(struct ifnet *);
156 #endif
157 
158 static void	ix_add_sysctl(struct ix_softc *);
159 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
160 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
161 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
164 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
167 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
168 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
170 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
171 #if 0
172 static void     ix_add_hw_stats(struct ix_softc *);
173 #endif
174 
175 static void	ix_slot_info(struct ix_softc *);
176 static int	ix_alloc_rings(struct ix_softc *);
177 static void	ix_free_rings(struct ix_softc *);
178 static void	ix_setup_ifp(struct ix_softc *);
179 static void	ix_setup_serialize(struct ix_softc *);
180 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
181 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
182 static void	ix_update_stats(struct ix_softc *);
183 
184 static void	ix_set_promisc(struct ix_softc *);
185 static void	ix_set_multi(struct ix_softc *);
186 static void	ix_set_vlan(struct ix_softc *);
187 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
188 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
189 static const char *ix_ifmedia2str(int);
190 static const char *ix_fc2str(enum ixgbe_fc_mode);
191 
192 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
193 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
194 static void	ix_init_tx_ring(struct ix_tx_ring *);
195 static void	ix_free_tx_ring(struct ix_tx_ring *);
196 static int	ix_create_tx_ring(struct ix_tx_ring *);
197 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
198 static void	ix_init_tx_unit(struct ix_softc *);
199 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
200 		    uint16_t *, int *);
201 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
202 		    const struct mbuf *, uint32_t *, uint32_t *);
203 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
204 		    const struct mbuf *, uint32_t *, uint32_t *);
205 static void	ix_txeof(struct ix_tx_ring *, int);
206 
207 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
208 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
209 static int	ix_init_rx_ring(struct ix_rx_ring *);
210 static void	ix_free_rx_ring(struct ix_rx_ring *);
211 static int	ix_create_rx_ring(struct ix_rx_ring *);
212 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
213 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
214 #if 0
215 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
216 #endif
217 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
218 static void	ix_rxeof(struct ix_rx_ring *, int);
219 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
220 static void	ix_enable_rx_drop(struct ix_softc *);
221 static void	ix_disable_rx_drop(struct ix_softc *);
222 
223 static void	ix_alloc_msix(struct ix_softc *);
224 static void	ix_free_msix(struct ix_softc *, boolean_t);
225 static void	ix_setup_msix_eims(const struct ix_softc *, int,
226 		    uint32_t *, uint32_t *);
227 static int	ix_alloc_intr(struct ix_softc *);
228 static void	ix_free_intr(struct ix_softc *);
229 static int	ix_setup_intr(struct ix_softc *);
230 static void	ix_teardown_intr(struct ix_softc *, int);
231 static void	ix_enable_intr(struct ix_softc *);
232 static void	ix_disable_intr(struct ix_softc *);
233 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
234 static void	ix_set_eitr(struct ix_softc *, int, int);
235 static void	ix_intr_status(struct ix_softc *, uint32_t);
236 static void	ix_intr(void *);
237 static void	ix_msix_rxtx(void *);
238 static void	ix_msix_rx(void *);
239 static void	ix_msix_tx(void *);
240 static void	ix_msix_status(void *);
241 
242 static void	ix_config_link(struct ix_softc *);
243 static boolean_t ix_sfp_probe(struct ix_softc *);
244 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
245 static void	ix_update_link_status(struct ix_softc *);
246 static void	ix_handle_link(struct ix_softc *);
247 static void	ix_handle_mod(struct ix_softc *);
248 static void	ix_handle_msf(struct ix_softc *);
249 static void	ix_handle_phy(struct ix_softc *);
250 static int	ix_powerdown(struct ix_softc *);
251 static void	ix_config_flowctrl(struct ix_softc *);
252 static void	ix_config_dmac(struct ix_softc *);
253 static void	ix_init_media(struct ix_softc *);
254 
255 /* XXX Missing shared code prototype */
256 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
257 
258 static device_method_t ix_methods[] = {
259 	/* Device interface */
260 	DEVMETHOD(device_probe,		ix_probe),
261 	DEVMETHOD(device_attach,	ix_attach),
262 	DEVMETHOD(device_detach,	ix_detach),
263 	DEVMETHOD(device_shutdown,	ix_shutdown),
264 	DEVMETHOD_END
265 };
266 
267 static driver_t ix_driver = {
268 	"ix",
269 	ix_methods,
270 	sizeof(struct ix_softc)
271 };
272 
273 static devclass_t ix_devclass;
274 
275 DECLARE_DUMMY_MODULE(if_ix);
276 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
277 
278 static int	ix_msi_enable = 1;
279 static int	ix_msix_enable = 1;
280 static int	ix_rxr = 0;
281 static int	ix_txr = 0;
282 static int	ix_txd = IX_PERF_TXD;
283 static int	ix_rxd = IX_PERF_RXD;
284 static int	ix_unsupported_sfp = 0;
285 
286 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
287 
288 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
289 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
290 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
291 TUNABLE_INT("hw.ix.txr", &ix_txr);
292 TUNABLE_INT("hw.ix.txd", &ix_txd);
293 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
294 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
295 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
296 
297 /*
298  * Smart speed setting, default to on.  This only works
299  * as a compile option right now as its during attach,
300  * set this to 'ixgbe_smart_speed_off' to disable.
301  */
302 static const enum ixgbe_smart_speed ix_smart_speed =
303     ixgbe_smart_speed_on;
304 
305 static int
306 ix_probe(device_t dev)
307 {
308 	const struct ix_device *d;
309 	uint16_t vid, did;
310 
311 	vid = pci_get_vendor(dev);
312 	did = pci_get_device(dev);
313 
314 	for (d = ix_devices; d->desc != NULL; ++d) {
315 		if (vid == d->vid && did == d->did) {
316 			device_set_desc(dev, d->desc);
317 			return 0;
318 		}
319 	}
320 	return ENXIO;
321 }
322 
323 static void
324 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
325 {
326 
327 	switch (sc->hw.mac.type) {
328 	case ixgbe_mac_X550:
329 	case ixgbe_mac_X550EM_x:
330 	case ixgbe_mac_X550EM_a:
331 		*ring_cntmax = IX_MAX_RXRING_X550;
332 		break;
333 
334 	default:
335 		*ring_cntmax = IX_MAX_RXRING;
336 		break;
337 	}
338 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
339 }
340 
341 static void
342 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
343 {
344 
345 	switch (sc->hw.mac.type) {
346 	case ixgbe_mac_82598EB:
347 		*ring_cntmax = IX_MAX_TXRING_82598;
348 		break;
349 
350 	case ixgbe_mac_82599EB:
351 		*ring_cntmax = IX_MAX_TXRING_82599;
352 		break;
353 
354 	case ixgbe_mac_X540:
355 		*ring_cntmax = IX_MAX_TXRING_X540;
356 		break;
357 
358 	case ixgbe_mac_X550:
359 	case ixgbe_mac_X550EM_x:
360 	case ixgbe_mac_X550EM_a:
361 		*ring_cntmax = IX_MAX_TXRING_X550;
362 		break;
363 
364 	default:
365 		*ring_cntmax = IX_MAX_TXRING;
366 		break;
367 	}
368 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
369 }
370 
371 static int
372 ix_attach(device_t dev)
373 {
374 	struct ix_softc *sc = device_get_softc(dev);
375 	struct ixgbe_hw *hw;
376 	int error, ring_cnt, ring_cntmax;
377 	uint16_t csum;
378 	uint32_t ctrl_ext;
379 	char flowctrl[IFM_ETH_FC_STRLEN];
380 
381 	sc->dev = sc->osdep.dev = dev;
382 	hw = &sc->hw;
383 
384 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
385 	    device_get_unit(dev));
386 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
387 	    ix_media_change, ix_media_status);
388 
389 	/* Save frame size */
390 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
391 
392 	callout_init_mp(&sc->timer);
393 	lwkt_serialize_init(&sc->main_serialize);
394 
395 	/*
396 	 * Save off the information about this board
397 	 */
398 	hw->vendor_id = pci_get_vendor(dev);
399 	hw->device_id = pci_get_device(dev);
400 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
401 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
402 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
403 
404 	ixgbe_set_mac_type(hw);
405 
406 	/* Pick up the 82599 */
407 	if (hw->mac.type != ixgbe_mac_82598EB)
408 		hw->phy.smart_speed = ix_smart_speed;
409 
410 	/* Enable bus mastering */
411 	pci_enable_busmaster(dev);
412 
413 	/*
414 	 * Allocate IO memory
415 	 */
416 	sc->mem_rid = PCIR_BAR(0);
417 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
418 	    &sc->mem_rid, RF_ACTIVE);
419 	if (sc->mem_res == NULL) {
420 		device_printf(dev, "Unable to allocate bus resource: memory\n");
421 		error = ENXIO;
422 		goto failed;
423 	}
424 
425 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
426 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
427 
428 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
429 	sc->hw.back = &sc->osdep;
430 
431 	/*
432 	 * Configure total supported RX/TX ring count
433 	 */
434 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
435 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
436 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
437 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
438 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
439 
440 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
441 	sc->rx_ring_inuse = sc->rx_ring_cnt;
442 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
443 	sc->tx_ring_inuse = sc->tx_ring_cnt;
444 
445 	/* Allocate TX/RX rings */
446 	error = ix_alloc_rings(sc);
447 	if (error)
448 		goto failed;
449 
450 	/* Allocate interrupt */
451 	error = ix_alloc_intr(sc);
452 	if (error)
453 		goto failed;
454 
455 	/* Setup serializes */
456 	ix_setup_serialize(sc);
457 
458 	/* Allocate multicast array memory. */
459 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
460 	    M_DEVBUF, M_WAITOK);
461 
462 	/* Initialize the shared code */
463 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
464 	error = ixgbe_init_shared_code(hw);
465 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
466 		/*
467 		 * No optics in this port; ask timer routine
468 		 * to probe for later insertion.
469 		 */
470 		sc->sfp_probe = TRUE;
471 		error = 0;
472 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
473 		device_printf(dev, "Unsupported SFP+ module detected!\n");
474 		error = EIO;
475 		goto failed;
476 	} else if (error) {
477 		device_printf(dev, "Unable to initialize the shared code\n");
478 		error = EIO;
479 		goto failed;
480 	}
481 
482 	/* Make sure we have a good EEPROM before we read from it */
483 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
484 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
485 		error = EIO;
486 		goto failed;
487 	}
488 
489 	error = ixgbe_init_hw(hw);
490 	if (error == IXGBE_ERR_EEPROM_VERSION) {
491 		device_printf(dev, "Pre-production device detected\n");
492 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
493 		device_printf(dev, "Unsupported SFP+ Module\n");
494 		error = EIO;
495 		goto failed;
496 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
497 		device_printf(dev, "No SFP+ Module found\n");
498 	}
499 
500 	sc->ifm_media = IX_IFM_DEFAULT;
501 	/* Get default flow control settings */
502 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
503 	    ix_flowctrl);
504 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
505 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
506 
507 	/* Setup OS specific network interface */
508 	ix_setup_ifp(sc);
509 
510 	/* Add sysctl tree */
511 	ix_add_sysctl(sc);
512 
513 	error = ix_setup_intr(sc);
514 	if (error) {
515 		ether_ifdetach(&sc->arpcom.ac_if);
516 		goto failed;
517 	}
518 
519 	/* Initialize statistics */
520 	ix_update_stats(sc);
521 
522 	/* Check PCIE slot type/speed/width */
523 	ix_slot_info(sc);
524 
525 	/* Save initial wake up filter configuration */
526 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
527 
528 	/* Let hardware know driver is loaded */
529 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
530 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
531 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
532 
533 	return 0;
534 failed:
535 	ix_detach(dev);
536 	return error;
537 }
538 
539 static int
540 ix_detach(device_t dev)
541 {
542 	struct ix_softc *sc = device_get_softc(dev);
543 
544 	if (device_is_attached(dev)) {
545 		struct ifnet *ifp = &sc->arpcom.ac_if;
546 		uint32_t ctrl_ext;
547 
548 		ifnet_serialize_all(ifp);
549 
550 		ix_powerdown(sc);
551 		ix_teardown_intr(sc, sc->intr_cnt);
552 
553 		ifnet_deserialize_all(ifp);
554 
555 		callout_terminate(&sc->timer);
556 		ether_ifdetach(ifp);
557 
558 		/* Let hardware know driver is unloading */
559 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
560 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
561 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
562 	}
563 
564 	ifmedia_removeall(&sc->media);
565 	bus_generic_detach(dev);
566 
567 	ix_free_intr(sc);
568 
569 	if (sc->msix_mem_res != NULL) {
570 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
571 		    sc->msix_mem_res);
572 	}
573 	if (sc->mem_res != NULL) {
574 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
575 		    sc->mem_res);
576 	}
577 
578 	ix_free_rings(sc);
579 
580 	if (sc->mta != NULL)
581 		kfree(sc->mta, M_DEVBUF);
582 	if (sc->serializes != NULL)
583 		kfree(sc->serializes, M_DEVBUF);
584 
585 	if (sc->rx_rmap != NULL)
586 		if_ringmap_free(sc->rx_rmap);
587 	if (sc->rx_rmap_intr != NULL)
588 		if_ringmap_free(sc->rx_rmap_intr);
589 	if (sc->tx_rmap != NULL)
590 		if_ringmap_free(sc->tx_rmap);
591 	if (sc->tx_rmap_intr != NULL)
592 		if_ringmap_free(sc->tx_rmap_intr);
593 
594 	return 0;
595 }
596 
597 static int
598 ix_shutdown(device_t dev)
599 {
600 	struct ix_softc *sc = device_get_softc(dev);
601 	struct ifnet *ifp = &sc->arpcom.ac_if;
602 
603 	ifnet_serialize_all(ifp);
604 	ix_powerdown(sc);
605 	ifnet_deserialize_all(ifp);
606 
607 	return 0;
608 }
609 
610 static void
611 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
612 {
613 	struct ix_softc *sc = ifp->if_softc;
614 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
615 	int idx = -1;
616 	uint16_t nsegs;
617 
618 	KKASSERT(txr->tx_ifsq == ifsq);
619 	ASSERT_SERIALIZED(&txr->tx_serialize);
620 
621 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
622 		return;
623 
624 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
625 		ifsq_purge(ifsq);
626 		return;
627 	}
628 
629 	while (!ifsq_is_empty(ifsq)) {
630 		struct mbuf *m_head;
631 
632 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
633 			ifsq_set_oactive(ifsq);
634 			txr->tx_watchdog.wd_timer = 5;
635 			break;
636 		}
637 
638 		m_head = ifsq_dequeue(ifsq);
639 		if (m_head == NULL)
640 			break;
641 
642 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
643 			IFNET_STAT_INC(ifp, oerrors, 1);
644 			continue;
645 		}
646 
647 		/*
648 		 * TX interrupt are aggressively aggregated, so increasing
649 		 * opackets at TX interrupt time will make the opackets
650 		 * statistics vastly inaccurate; we do the opackets increment
651 		 * now.
652 		 */
653 		IFNET_STAT_INC(ifp, opackets, 1);
654 
655 		if (nsegs >= txr->tx_wreg_nsegs) {
656 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
657 			nsegs = 0;
658 			idx = -1;
659 		}
660 
661 		ETHER_BPF_MTAP(ifp, m_head);
662 	}
663 	if (idx >= 0)
664 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
665 }
666 
667 static int
668 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
669 {
670 	struct ix_softc *sc = ifp->if_softc;
671 	struct ifreq *ifr = (struct ifreq *) data;
672 	int error = 0, mask, reinit;
673 
674 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
675 
676 	switch (command) {
677 	case SIOCSIFMTU:
678 		if (ifr->ifr_mtu > IX_MAX_MTU) {
679 			error = EINVAL;
680 		} else {
681 			ifp->if_mtu = ifr->ifr_mtu;
682 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
683 			ix_init(sc);
684 		}
685 		break;
686 
687 	case SIOCSIFFLAGS:
688 		if (ifp->if_flags & IFF_UP) {
689 			if (ifp->if_flags & IFF_RUNNING) {
690 				if ((ifp->if_flags ^ sc->if_flags) &
691 				    (IFF_PROMISC | IFF_ALLMULTI))
692 					ix_set_promisc(sc);
693 			} else {
694 				ix_init(sc);
695 			}
696 		} else if (ifp->if_flags & IFF_RUNNING) {
697 			ix_stop(sc);
698 		}
699 		sc->if_flags = ifp->if_flags;
700 		break;
701 
702 	case SIOCADDMULTI:
703 	case SIOCDELMULTI:
704 		if (ifp->if_flags & IFF_RUNNING) {
705 			ix_disable_intr(sc);
706 			ix_set_multi(sc);
707 #ifdef IFPOLL_ENABLE
708 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
709 #endif
710 				ix_enable_intr(sc);
711 		}
712 		break;
713 
714 	case SIOCSIFMEDIA:
715 	case SIOCGIFMEDIA:
716 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
717 		break;
718 
719 	case SIOCSIFCAP:
720 		reinit = 0;
721 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
722 		if (mask & IFCAP_RXCSUM) {
723 			ifp->if_capenable ^= IFCAP_RXCSUM;
724 			reinit = 1;
725 		}
726 		if (mask & IFCAP_VLAN_HWTAGGING) {
727 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
728 			reinit = 1;
729 		}
730 		if (mask & IFCAP_TXCSUM) {
731 			ifp->if_capenable ^= IFCAP_TXCSUM;
732 			if (ifp->if_capenable & IFCAP_TXCSUM)
733 				ifp->if_hwassist |= CSUM_OFFLOAD;
734 			else
735 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
736 		}
737 		if (mask & IFCAP_TSO) {
738 			ifp->if_capenable ^= IFCAP_TSO;
739 			if (ifp->if_capenable & IFCAP_TSO)
740 				ifp->if_hwassist |= CSUM_TSO;
741 			else
742 				ifp->if_hwassist &= ~CSUM_TSO;
743 		}
744 		if (mask & IFCAP_RSS)
745 			ifp->if_capenable ^= IFCAP_RSS;
746 		if (reinit && (ifp->if_flags & IFF_RUNNING))
747 			ix_init(sc);
748 		break;
749 
750 #if 0
751 	case SIOCGI2C:
752 	{
753 		struct ixgbe_i2c_req	i2c;
754 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
755 		if (error)
756 			break;
757 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
758 			error = EINVAL;
759 			break;
760 		}
761 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
762 		    i2c.dev_addr, i2c.data);
763 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
764 		break;
765 	}
766 #endif
767 
768 	default:
769 		error = ether_ioctl(ifp, command, data);
770 		break;
771 	}
772 	return error;
773 }
774 
775 #define IXGBE_MHADD_MFS_SHIFT 16
776 
777 static void
778 ix_init(void *xsc)
779 {
780 	struct ix_softc *sc = xsc;
781 	struct ifnet *ifp = &sc->arpcom.ac_if;
782 	struct ixgbe_hw *hw = &sc->hw;
783 	uint32_t gpie, rxctrl;
784 	int i, error;
785 	boolean_t polling;
786 
787 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
788 
789 	ix_stop(sc);
790 
791 	polling = FALSE;
792 #ifdef IFPOLL_ENABLE
793 	if (ifp->if_flags & IFF_NPOLLING)
794 		polling = TRUE;
795 #endif
796 
797 	/* Configure # of used RX/TX rings */
798 	ix_set_ring_inuse(sc, polling);
799 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
800 
801 	/* Get the latest mac address, User can use a LAA */
802 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
803 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
804 	hw->addr_ctrl.rar_used_count = 1;
805 
806 	/* Prepare transmit descriptors and buffers */
807 	for (i = 0; i < sc->tx_ring_inuse; ++i)
808 		ix_init_tx_ring(&sc->tx_rings[i]);
809 
810 	ixgbe_init_hw(hw);
811 	ix_init_tx_unit(sc);
812 
813 	/* Setup Multicast table */
814 	ix_set_multi(sc);
815 
816 	/* Prepare receive descriptors and buffers */
817 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
818 		error = ix_init_rx_ring(&sc->rx_rings[i]);
819 		if (error) {
820 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
821 			ix_stop(sc);
822 			return;
823 		}
824 	}
825 
826 	/* Configure RX settings */
827 	ix_init_rx_unit(sc, polling);
828 
829 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
830 
831 	/* Enable Fan Failure Interrupt */
832 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
833 
834 	/* Add for Module detection */
835 	if (hw->mac.type == ixgbe_mac_82599EB)
836 		gpie |= IXGBE_SDP2_GPIEN;
837 
838 	/*
839 	 * Thermal Failure Detection (X540)
840 	 * Link Detection (X552)
841 	 */
842 	if (hw->mac.type == ixgbe_mac_X540 ||
843 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
844 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
845 		gpie |= IXGBE_SDP0_GPIEN_X540;
846 
847 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
848 		/* Enable Enhanced MSIX mode */
849 		gpie |= IXGBE_GPIE_MSIX_MODE;
850 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
851 		    IXGBE_GPIE_OCD;
852 	}
853 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
854 
855 	/* Set MTU size */
856 	if (ifp->if_mtu > ETHERMTU) {
857 		uint32_t mhadd;
858 
859 		/* aka IXGBE_MAXFRS on 82599 and newer */
860 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
861 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
862 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
863 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
864 	}
865 
866 	/*
867 	 * Enable TX rings
868 	 */
869 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
870 		uint32_t txdctl;
871 
872 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
873 		txdctl |= IXGBE_TXDCTL_ENABLE;
874 
875 		/*
876 		 * Set WTHRESH to 0, since TX head write-back is used
877 		 */
878 		txdctl &= ~(0x7f << 16);
879 
880 		/*
881 		 * When the internal queue falls below PTHRESH (32),
882 		 * start prefetching as long as there are at least
883 		 * HTHRESH (1) buffers ready. The values are taken
884 		 * from the Intel linux driver 3.8.21.
885 		 * Prefetching enables tx line rate even with 1 queue.
886 		 */
887 		txdctl |= (32 << 0) | (1 << 8);
888 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
889 	}
890 
891 	/*
892 	 * Enable RX rings
893 	 */
894 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
895 		uint32_t rxdctl;
896 		int k;
897 
898 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
899 		if (hw->mac.type == ixgbe_mac_82598EB) {
900 			/*
901 			 * PTHRESH = 21
902 			 * HTHRESH = 4
903 			 * WTHRESH = 8
904 			 */
905 			rxdctl &= ~0x3FFFFF;
906 			rxdctl |= 0x080420;
907 		}
908 		rxdctl |= IXGBE_RXDCTL_ENABLE;
909 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
910 		for (k = 0; k < 10; ++k) {
911 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
912 			    IXGBE_RXDCTL_ENABLE)
913 				break;
914 			else
915 				msec_delay(1);
916 		}
917 		wmb();
918 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
919 		    sc->rx_rings[0].rx_ndesc - 1);
920 	}
921 
922 	/* Enable Receive engine */
923 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
924 	if (hw->mac.type == ixgbe_mac_82598EB)
925 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
926 	rxctrl |= IXGBE_RXCTRL_RXEN;
927 	ixgbe_enable_rx_dma(hw, rxctrl);
928 
929 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
930 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
931 
932 		if (txr->tx_intr_vec >= 0) {
933 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
934 		} else if (!polling) {
935 			/*
936 			 * Unconfigured TX interrupt vector could only
937 			 * happen for MSI-X.
938 			 */
939 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
940 			    ("TX intr vector is not set"));
941 			if (bootverbose)
942 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
943 		}
944 	}
945 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
946 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
947 
948 		if (polling && rxr->rx_intr_vec < 0)
949 			continue;
950 
951 		KKASSERT(rxr->rx_intr_vec >= 0);
952 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
953 		if (rxr->rx_txr != NULL) {
954 			/*
955 			 * Piggyback the TX ring interrupt onto the RX
956 			 * ring interrupt vector.
957 			 */
958 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
959 			    ("piggybacked TX ring configured intr vector"));
960 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
961 			    rxr->rx_intr_vec, 1);
962 			if (bootverbose) {
963 				if_printf(ifp, "IVAR RX ring %d piggybacks "
964 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
965 			}
966 		}
967 	}
968 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
969 		/* Set up status MSI-X vector; it is using fixed entry 1 */
970 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
971 
972 		/* Set up auto-mask for TX and RX rings */
973 		if (hw->mac.type == ixgbe_mac_82598EB) {
974 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
975 		} else {
976 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
977 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
978 		}
979 	} else {
980 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
981 	}
982 	for (i = 0; i < sc->intr_cnt; ++i)
983 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
984 
985 	/*
986 	 * Check on any SFP devices that need to be kick-started
987 	 */
988 	if (hw->phy.type == ixgbe_phy_none) {
989 		error = hw->phy.ops.identify(hw);
990 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
991 			if_printf(ifp,
992 			    "Unsupported SFP+ module type was detected.\n");
993 			/* XXX stop */
994 			return;
995 		}
996 	}
997 
998 	/* Config/Enable Link */
999 	ix_config_link(sc);
1000 
1001 	/* Hardware Packet Buffer & Flow Control setup */
1002 	ix_config_flowctrl(sc);
1003 
1004 	/* Initialize the FC settings */
1005 	ixgbe_start_hw(hw);
1006 
1007 	/* Set up VLAN support and filter */
1008 	ix_set_vlan(sc);
1009 
1010 	/* Setup DMA Coalescing */
1011 	ix_config_dmac(sc);
1012 
1013 	/*
1014 	 * Only enable interrupts if we are not polling, make sure
1015 	 * they are off otherwise.
1016 	 */
1017 	if (polling)
1018 		ix_disable_intr(sc);
1019 	else
1020 		ix_enable_intr(sc);
1021 
1022 	ifp->if_flags |= IFF_RUNNING;
1023 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1024 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1025 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1026 	}
1027 
1028 	ix_set_timer_cpuid(sc, polling);
1029 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1030 }
1031 
1032 static void
1033 ix_intr(void *xsc)
1034 {
1035 	struct ix_softc *sc = xsc;
1036 	struct ixgbe_hw	*hw = &sc->hw;
1037 	uint32_t eicr;
1038 
1039 	ASSERT_SERIALIZED(&sc->main_serialize);
1040 
1041 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1042 	if (eicr == 0) {
1043 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1044 		return;
1045 	}
1046 
1047 	if (eicr & IX_RX0_INTR_MASK) {
1048 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1049 
1050 		lwkt_serialize_enter(&rxr->rx_serialize);
1051 		ix_rxeof(rxr, -1);
1052 		lwkt_serialize_exit(&rxr->rx_serialize);
1053 	}
1054 	if (eicr & IX_RX1_INTR_MASK) {
1055 		struct ix_rx_ring *rxr;
1056 
1057 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1058 		rxr = &sc->rx_rings[1];
1059 
1060 		lwkt_serialize_enter(&rxr->rx_serialize);
1061 		ix_rxeof(rxr, -1);
1062 		lwkt_serialize_exit(&rxr->rx_serialize);
1063 	}
1064 
1065 	if (eicr & IX_TX_INTR_MASK) {
1066 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1067 
1068 		lwkt_serialize_enter(&txr->tx_serialize);
1069 		ix_txeof(txr, *(txr->tx_hdr));
1070 		if (!ifsq_is_empty(txr->tx_ifsq))
1071 			ifsq_devstart(txr->tx_ifsq);
1072 		lwkt_serialize_exit(&txr->tx_serialize);
1073 	}
1074 
1075 	if (__predict_false(eicr & IX_EICR_STATUS))
1076 		ix_intr_status(sc, eicr);
1077 
1078 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1079 }
1080 
1081 static void
1082 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1083 {
1084 	struct ix_softc *sc = ifp->if_softc;
1085 	struct ifmedia *ifm = &sc->media;
1086 	int layer;
1087 
1088 	ix_update_link_status(sc);
1089 
1090 	ifmr->ifm_status = IFM_AVALID;
1091 	ifmr->ifm_active = IFM_ETHER;
1092 
1093 	if (!sc->link_active) {
1094 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1095 			ifmr->ifm_active |= ifm->ifm_media;
1096 		else
1097 			ifmr->ifm_active |= IFM_NONE;
1098 		return;
1099 	}
1100 	ifmr->ifm_status |= IFM_ACTIVE;
1101 
1102 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1103 
1104 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1105 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1106 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1107 		switch (sc->link_speed) {
1108 		case IXGBE_LINK_SPEED_10GB_FULL:
1109 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1110 			break;
1111 		case IXGBE_LINK_SPEED_1GB_FULL:
1112 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1113 			break;
1114 		case IXGBE_LINK_SPEED_100_FULL:
1115 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1116 			break;
1117 		}
1118 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1119 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1120 		switch (sc->link_speed) {
1121 		case IXGBE_LINK_SPEED_10GB_FULL:
1122 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1123 			break;
1124 		}
1125 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1126 		switch (sc->link_speed) {
1127 		case IXGBE_LINK_SPEED_10GB_FULL:
1128 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1129 			break;
1130 		case IXGBE_LINK_SPEED_1GB_FULL:
1131 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1132 			break;
1133 		}
1134 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1135 		switch (sc->link_speed) {
1136 		case IXGBE_LINK_SPEED_10GB_FULL:
1137 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1138 			break;
1139 		case IXGBE_LINK_SPEED_1GB_FULL:
1140 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1141 			break;
1142 		}
1143 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1144 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1145 		switch (sc->link_speed) {
1146 		case IXGBE_LINK_SPEED_10GB_FULL:
1147 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1148 			break;
1149 		case IXGBE_LINK_SPEED_1GB_FULL:
1150 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1151 			break;
1152 		}
1153 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1154 		switch (sc->link_speed) {
1155 		case IXGBE_LINK_SPEED_10GB_FULL:
1156 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1157 			break;
1158 		}
1159 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1160 		/*
1161 		 * XXX: These need to use the proper media types once
1162 		 * they're added.
1163 		 */
1164 		switch (sc->link_speed) {
1165 		case IXGBE_LINK_SPEED_10GB_FULL:
1166 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1167 			break;
1168 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1169 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1170 			break;
1171 		case IXGBE_LINK_SPEED_1GB_FULL:
1172 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1173 			break;
1174 		}
1175 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1176 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1177 		/*
1178 		 * XXX: These need to use the proper media types once
1179 		 * they're added.
1180 		 */
1181 		switch (sc->link_speed) {
1182 		case IXGBE_LINK_SPEED_10GB_FULL:
1183 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1184 			break;
1185 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1186 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1187 			break;
1188 		case IXGBE_LINK_SPEED_1GB_FULL:
1189 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1190 			break;
1191 		}
1192 	}
1193 
1194 	/* If nothing is recognized... */
1195 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1196 		ifmr->ifm_active |= IFM_NONE;
1197 
1198 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1199 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1200 
1201 	switch (sc->hw.fc.current_mode) {
1202 	case ixgbe_fc_full:
1203 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1204 		break;
1205 	case ixgbe_fc_rx_pause:
1206 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1207 		break;
1208 	case ixgbe_fc_tx_pause:
1209 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1210 		break;
1211 	default:
1212 		break;
1213 	}
1214 }
1215 
1216 static int
1217 ix_media_change(struct ifnet *ifp)
1218 {
1219 	struct ix_softc *sc = ifp->if_softc;
1220 	struct ifmedia *ifm = &sc->media;
1221 	struct ixgbe_hw *hw = &sc->hw;
1222 
1223 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1224 		return (EINVAL);
1225 
1226 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1227 	    hw->mac.ops.setup_link == NULL) {
1228 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1229 			/* Only flow control setting changes are allowed */
1230 			return (EOPNOTSUPP);
1231 		}
1232 	}
1233 
1234 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1235 	case IFM_AUTO:
1236 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1237 		break;
1238 
1239 	case IFM_10G_T:
1240 	case IFM_10G_LRM:
1241 	case IFM_10G_SR:	/* XXX also KR */
1242 	case IFM_10G_LR:
1243 	case IFM_10G_CX4:	/* XXX also KX4 */
1244 	case IFM_10G_TWINAX:
1245 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1246 		break;
1247 
1248 	case IFM_1000_T:
1249 	case IFM_1000_LX:
1250 	case IFM_1000_SX:
1251 	case IFM_1000_CX:	/* XXX is KX */
1252 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1253 		break;
1254 
1255 	case IFM_100_TX:
1256 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1257 		break;
1258 
1259 	default:
1260 		if (bootverbose) {
1261 			if_printf(ifp, "Invalid media type %d!\n",
1262 			    ifm->ifm_media);
1263 		}
1264 		return EINVAL;
1265 	}
1266 	sc->ifm_media = ifm->ifm_media;
1267 
1268 #if 0
1269 	if (hw->mac.ops.setup_link != NULL) {
1270 		hw->mac.autotry_restart = TRUE;
1271 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1272 	}
1273 #else
1274 	if (ifp->if_flags & IFF_RUNNING)
1275 		ix_init(sc);
1276 #endif
1277 	return 0;
1278 }
1279 
1280 static __inline int
1281 ix_tso_pullup(struct mbuf **mp)
1282 {
1283 	int hoff, iphlen, thoff;
1284 	struct mbuf *m;
1285 
1286 	m = *mp;
1287 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1288 
1289 	iphlen = m->m_pkthdr.csum_iphlen;
1290 	thoff = m->m_pkthdr.csum_thlen;
1291 	hoff = m->m_pkthdr.csum_lhlen;
1292 
1293 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1294 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1295 	KASSERT(hoff > 0, ("invalid ether hlen"));
1296 
1297 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1298 		m = m_pullup(m, hoff + iphlen + thoff);
1299 		if (m == NULL) {
1300 			*mp = NULL;
1301 			return ENOBUFS;
1302 		}
1303 		*mp = m;
1304 	}
1305 	return 0;
1306 }
1307 
1308 static int
1309 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1310     uint16_t *segs_used, int *idx)
1311 {
1312 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1313 	int i, j, error, nsegs, first, maxsegs;
1314 	struct mbuf *m_head = *m_headp;
1315 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1316 	bus_dmamap_t map;
1317 	struct ix_tx_buf *txbuf;
1318 	union ixgbe_adv_tx_desc *txd = NULL;
1319 
1320 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1321 		error = ix_tso_pullup(m_headp);
1322 		if (__predict_false(error))
1323 			return error;
1324 		m_head = *m_headp;
1325 	}
1326 
1327 	/* Basic descriptor defines */
1328 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1329 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1330 
1331 	if (m_head->m_flags & M_VLANTAG)
1332 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1333 
1334 	/*
1335 	 * Important to capture the first descriptor
1336 	 * used because it will contain the index of
1337 	 * the one we tell the hardware to report back
1338 	 */
1339 	first = txr->tx_next_avail;
1340 	txbuf = &txr->tx_buf[first];
1341 	map = txbuf->map;
1342 
1343 	/*
1344 	 * Map the packet for DMA.
1345 	 */
1346 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1347 	if (maxsegs > IX_MAX_SCATTER)
1348 		maxsegs = IX_MAX_SCATTER;
1349 
1350 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1351 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1352 	if (__predict_false(error)) {
1353 		m_freem(*m_headp);
1354 		*m_headp = NULL;
1355 		return error;
1356 	}
1357 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1358 
1359 	m_head = *m_headp;
1360 
1361 	/*
1362 	 * Set up the appropriate offload context if requested,
1363 	 * this may consume one TX descriptor.
1364 	 */
1365 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1366 		(*segs_used)++;
1367 		txr->tx_nsegs++;
1368 	}
1369 
1370 	*segs_used += nsegs;
1371 	txr->tx_nsegs += nsegs;
1372 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1373 		/*
1374 		 * Report Status (RS) is turned on every intr_nsegs
1375 		 * descriptors (roughly).
1376 		 */
1377 		txr->tx_nsegs = 0;
1378 		cmd_rs = IXGBE_TXD_CMD_RS;
1379 	}
1380 
1381 	i = txr->tx_next_avail;
1382 	for (j = 0; j < nsegs; j++) {
1383 		bus_size_t seglen;
1384 		bus_addr_t segaddr;
1385 
1386 		txbuf = &txr->tx_buf[i];
1387 		txd = &txr->tx_base[i];
1388 		seglen = segs[j].ds_len;
1389 		segaddr = htole64(segs[j].ds_addr);
1390 
1391 		txd->read.buffer_addr = segaddr;
1392 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1393 		    cmd_type_len |seglen);
1394 		txd->read.olinfo_status = htole32(olinfo_status);
1395 
1396 		if (++i == txr->tx_ndesc)
1397 			i = 0;
1398 	}
1399 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1400 
1401 	txr->tx_avail -= nsegs;
1402 	txr->tx_next_avail = i;
1403 
1404 	txbuf->m_head = m_head;
1405 	txr->tx_buf[first].map = txbuf->map;
1406 	txbuf->map = map;
1407 
1408 	/*
1409 	 * Defer TDT updating, until enough descrptors are setup
1410 	 */
1411 	*idx = i;
1412 
1413 	return 0;
1414 }
1415 
1416 static void
1417 ix_set_promisc(struct ix_softc *sc)
1418 {
1419 	struct ifnet *ifp = &sc->arpcom.ac_if;
1420 	uint32_t reg_rctl;
1421 	int mcnt = 0;
1422 
1423 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1424 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1425 	if (ifp->if_flags & IFF_ALLMULTI) {
1426 		mcnt = IX_MAX_MCASTADDR;
1427 	} else {
1428 		struct ifmultiaddr *ifma;
1429 
1430 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1431 			if (ifma->ifma_addr->sa_family != AF_LINK)
1432 				continue;
1433 			if (mcnt == IX_MAX_MCASTADDR)
1434 				break;
1435 			mcnt++;
1436 		}
1437 	}
1438 	if (mcnt < IX_MAX_MCASTADDR)
1439 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1440 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1441 
1442 	if (ifp->if_flags & IFF_PROMISC) {
1443 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1444 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1445 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1446 		reg_rctl |= IXGBE_FCTRL_MPE;
1447 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1448 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1449 	}
1450 }
1451 
1452 static void
1453 ix_set_multi(struct ix_softc *sc)
1454 {
1455 	struct ifnet *ifp = &sc->arpcom.ac_if;
1456 	struct ifmultiaddr *ifma;
1457 	uint32_t fctrl;
1458 	uint8_t	*mta;
1459 	int mcnt = 0;
1460 
1461 	mta = sc->mta;
1462 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1463 
1464 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1465 		if (ifma->ifma_addr->sa_family != AF_LINK)
1466 			continue;
1467 		if (mcnt == IX_MAX_MCASTADDR)
1468 			break;
1469 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1470 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1471 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1472 		mcnt++;
1473 	}
1474 
1475 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1476 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1477 	if (ifp->if_flags & IFF_PROMISC) {
1478 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1479 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1480 		fctrl |= IXGBE_FCTRL_MPE;
1481 		fctrl &= ~IXGBE_FCTRL_UPE;
1482 	} else {
1483 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1484 	}
1485 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1486 
1487 	if (mcnt < IX_MAX_MCASTADDR) {
1488 		ixgbe_update_mc_addr_list(&sc->hw,
1489 		    mta, mcnt, ix_mc_array_itr, TRUE);
1490 	}
1491 }
1492 
1493 /*
1494  * This is an iterator function now needed by the multicast
1495  * shared code. It simply feeds the shared code routine the
1496  * addresses in the array of ix_set_multi() one by one.
1497  */
1498 static uint8_t *
1499 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1500 {
1501 	uint8_t *addr = *update_ptr;
1502 	uint8_t *newptr;
1503 	*vmdq = 0;
1504 
1505 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1506 	*update_ptr = newptr;
1507 	return addr;
1508 }
1509 
1510 static void
1511 ix_timer(void *arg)
1512 {
1513 	struct ix_softc *sc = arg;
1514 
1515 	lwkt_serialize_enter(&sc->main_serialize);
1516 
1517 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1518 		lwkt_serialize_exit(&sc->main_serialize);
1519 		return;
1520 	}
1521 
1522 	/* Check for pluggable optics */
1523 	if (sc->sfp_probe) {
1524 		if (!ix_sfp_probe(sc))
1525 			goto done; /* Nothing to do */
1526 	}
1527 
1528 	ix_update_link_status(sc);
1529 	ix_update_stats(sc);
1530 
1531 done:
1532 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1533 	lwkt_serialize_exit(&sc->main_serialize);
1534 }
1535 
1536 static void
1537 ix_update_link_status(struct ix_softc *sc)
1538 {
1539 	struct ifnet *ifp = &sc->arpcom.ac_if;
1540 
1541 	if (sc->link_up) {
1542 		if (sc->link_active == FALSE) {
1543 			if (bootverbose) {
1544 				if_printf(ifp, "Link is up %d Gbps %s\n",
1545 				    sc->link_speed == 128 ? 10 : 1,
1546 				    "Full Duplex");
1547 			}
1548 
1549 			/*
1550 			 * Update any Flow Control changes
1551 			 */
1552 			ixgbe_fc_enable(&sc->hw);
1553 			/* MUST after ixgbe_fc_enable() */
1554 			if (sc->rx_ring_inuse > 1) {
1555 				switch (sc->hw.fc.current_mode) {
1556 				case ixgbe_fc_rx_pause:
1557 				case ixgbe_fc_tx_pause:
1558 				case ixgbe_fc_full:
1559 					ix_disable_rx_drop(sc);
1560 					break;
1561 
1562 				case ixgbe_fc_none:
1563 					ix_enable_rx_drop(sc);
1564 					break;
1565 
1566 				default:
1567 					break;
1568 				}
1569 			}
1570 
1571 			/* Update DMA coalescing config */
1572 			ix_config_dmac(sc);
1573 
1574 			sc->link_active = TRUE;
1575 
1576 			ifp->if_link_state = LINK_STATE_UP;
1577 			if_link_state_change(ifp);
1578 		}
1579 	} else { /* Link down */
1580 		if (sc->link_active == TRUE) {
1581 			if (bootverbose)
1582 				if_printf(ifp, "Link is Down\n");
1583 			ifp->if_link_state = LINK_STATE_DOWN;
1584 			if_link_state_change(ifp);
1585 
1586 			sc->link_active = FALSE;
1587 		}
1588 	}
1589 }
1590 
1591 static void
1592 ix_stop(struct ix_softc *sc)
1593 {
1594 	struct ixgbe_hw *hw = &sc->hw;
1595 	struct ifnet *ifp = &sc->arpcom.ac_if;
1596 	int i;
1597 
1598 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1599 
1600 	ix_disable_intr(sc);
1601 	callout_stop(&sc->timer);
1602 
1603 	ifp->if_flags &= ~IFF_RUNNING;
1604 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1605 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1606 
1607 		ifsq_clr_oactive(txr->tx_ifsq);
1608 		ifsq_watchdog_stop(&txr->tx_watchdog);
1609 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1610 	}
1611 
1612 	ixgbe_reset_hw(hw);
1613 	hw->adapter_stopped = FALSE;
1614 	ixgbe_stop_adapter(hw);
1615 	if (hw->mac.type == ixgbe_mac_82599EB)
1616 		ixgbe_stop_mac_link_on_d3_82599(hw);
1617 	/* Turn off the laser - noop with no optics */
1618 	ixgbe_disable_tx_laser(hw);
1619 
1620 	/* Update the stack */
1621 	sc->link_up = FALSE;
1622 	ix_update_link_status(sc);
1623 
1624 	/* Reprogram the RAR[0] in case user changed it. */
1625 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1626 
1627 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1628 		ix_free_tx_ring(&sc->tx_rings[i]);
1629 
1630 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1631 		ix_free_rx_ring(&sc->rx_rings[i]);
1632 }
1633 
1634 static void
1635 ix_setup_ifp(struct ix_softc *sc)
1636 {
1637 	struct ixgbe_hw *hw = &sc->hw;
1638 	struct ifnet *ifp = &sc->arpcom.ac_if;
1639 	int i;
1640 
1641 	ifp->if_baudrate = IF_Gbps(10UL);
1642 
1643 	ifp->if_softc = sc;
1644 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1645 	ifp->if_init = ix_init;
1646 	ifp->if_ioctl = ix_ioctl;
1647 	ifp->if_start = ix_start;
1648 	ifp->if_serialize = ix_serialize;
1649 	ifp->if_deserialize = ix_deserialize;
1650 	ifp->if_tryserialize = ix_tryserialize;
1651 #ifdef INVARIANTS
1652 	ifp->if_serialize_assert = ix_serialize_assert;
1653 #endif
1654 #ifdef IFPOLL_ENABLE
1655 	ifp->if_npoll = ix_npoll;
1656 #endif
1657 
1658 	/* Increase TSO burst length */
1659 	ifp->if_tsolen = (8 * ETHERMTU);
1660 
1661 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1662 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1663 
1664 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1665 	ifq_set_ready(&ifp->if_snd);
1666 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1667 
1668 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1669 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1670 
1671 	ether_ifattach(ifp, hw->mac.addr, NULL);
1672 
1673 	ifp->if_capabilities =
1674 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1675 	if (IX_ENABLE_HWRSS(sc))
1676 		ifp->if_capabilities |= IFCAP_RSS;
1677 	ifp->if_capenable = ifp->if_capabilities;
1678 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1679 
1680 	/*
1681 	 * Tell the upper layer(s) we support long frames.
1682 	 */
1683 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1684 
1685 	/* Setup TX rings and subqueues */
1686 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1687 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1688 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1689 
1690 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1691 		ifsq_set_priv(ifsq, txr);
1692 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1693 		txr->tx_ifsq = ifsq;
1694 
1695 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1696 	}
1697 
1698 	/* Specify the media types supported by this adapter */
1699 	ix_init_media(sc);
1700 }
1701 
1702 static boolean_t
1703 ix_is_sfp(const struct ixgbe_hw *hw)
1704 {
1705 	switch (hw->phy.type) {
1706 	case ixgbe_phy_sfp_avago:
1707 	case ixgbe_phy_sfp_ftl:
1708 	case ixgbe_phy_sfp_intel:
1709 	case ixgbe_phy_sfp_unknown:
1710 	case ixgbe_phy_sfp_passive_tyco:
1711 	case ixgbe_phy_sfp_passive_unknown:
1712 	case ixgbe_phy_qsfp_passive_unknown:
1713 	case ixgbe_phy_qsfp_active_unknown:
1714 	case ixgbe_phy_qsfp_intel:
1715 	case ixgbe_phy_qsfp_unknown:
1716 		return TRUE;
1717 	default:
1718 		return FALSE;
1719 	}
1720 }
1721 
1722 static void
1723 ix_config_link(struct ix_softc *sc)
1724 {
1725 	struct ixgbe_hw *hw = &sc->hw;
1726 	boolean_t sfp;
1727 
1728 	sfp = ix_is_sfp(hw);
1729 	if (sfp) {
1730 		if (hw->phy.multispeed_fiber) {
1731 			hw->mac.ops.setup_sfp(hw);
1732 			ixgbe_enable_tx_laser(hw);
1733 			ix_handle_msf(sc);
1734 		} else {
1735 			ix_handle_mod(sc);
1736 		}
1737 	} else {
1738 		uint32_t autoneg, err = 0;
1739 
1740 		if (hw->mac.ops.check_link != NULL) {
1741 			err = ixgbe_check_link(hw, &sc->link_speed,
1742 			    &sc->link_up, FALSE);
1743 			if (err)
1744 				return;
1745 		}
1746 
1747 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1748 			autoneg = sc->advspeed;
1749 		else
1750 			autoneg = hw->phy.autoneg_advertised;
1751 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1752 			bool negotiate;
1753 
1754 			err = hw->mac.ops.get_link_capabilities(hw,
1755 			    &autoneg, &negotiate);
1756 			if (err)
1757 				return;
1758 		}
1759 
1760 		if (hw->mac.ops.setup_link != NULL) {
1761 			err = hw->mac.ops.setup_link(hw,
1762 			    autoneg, sc->link_up);
1763 			if (err)
1764 				return;
1765 		}
1766 	}
1767 }
1768 
1769 static int
1770 ix_alloc_rings(struct ix_softc *sc)
1771 {
1772 	int error, i;
1773 
1774 	/*
1775 	 * Create top level busdma tag
1776 	 */
1777 	error = bus_dma_tag_create(NULL, 1, 0,
1778 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1779 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1780 	    &sc->parent_tag);
1781 	if (error) {
1782 		device_printf(sc->dev, "could not create top level DMA tag\n");
1783 		return error;
1784 	}
1785 
1786 	/*
1787 	 * Allocate TX descriptor rings and buffers
1788 	 */
1789 	sc->tx_rings = kmalloc_cachealign(
1790 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1791 	    M_DEVBUF, M_WAITOK | M_ZERO);
1792 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1793 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1794 
1795 		txr->tx_sc = sc;
1796 		txr->tx_idx = i;
1797 		txr->tx_intr_vec = -1;
1798 		txr->tx_intr_cpuid = -1;
1799 		lwkt_serialize_init(&txr->tx_serialize);
1800 
1801 		error = ix_create_tx_ring(txr);
1802 		if (error)
1803 			return error;
1804 	}
1805 
1806 	/*
1807 	 * Allocate RX descriptor rings and buffers
1808 	 */
1809 	sc->rx_rings = kmalloc_cachealign(
1810 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1811 	    M_DEVBUF, M_WAITOK | M_ZERO);
1812 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1813 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1814 
1815 		rxr->rx_sc = sc;
1816 		rxr->rx_idx = i;
1817 		rxr->rx_intr_vec = -1;
1818 		lwkt_serialize_init(&rxr->rx_serialize);
1819 
1820 		error = ix_create_rx_ring(rxr);
1821 		if (error)
1822 			return error;
1823 	}
1824 
1825 	return 0;
1826 }
1827 
1828 static int
1829 ix_create_tx_ring(struct ix_tx_ring *txr)
1830 {
1831 	int error, i, tsize, ntxd;
1832 
1833 	/*
1834 	 * Validate number of transmit descriptors.  It must not exceed
1835 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1836 	 */
1837 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1838 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1839 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1840 		device_printf(txr->tx_sc->dev,
1841 		    "Using %d TX descriptors instead of %d!\n",
1842 		    IX_DEF_TXD, ntxd);
1843 		txr->tx_ndesc = IX_DEF_TXD;
1844 	} else {
1845 		txr->tx_ndesc = ntxd;
1846 	}
1847 
1848 	/*
1849 	 * Allocate TX head write-back buffer
1850 	 */
1851 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1852 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1853 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1854 	if (txr->tx_hdr == NULL) {
1855 		device_printf(txr->tx_sc->dev,
1856 		    "Unable to allocate TX head write-back buffer\n");
1857 		return ENOMEM;
1858 	}
1859 
1860 	/*
1861 	 * Allocate TX descriptor ring
1862 	 */
1863 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1864 	    IX_DBA_ALIGN);
1865 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1866 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1867 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1868 	if (txr->tx_base == NULL) {
1869 		device_printf(txr->tx_sc->dev,
1870 		    "Unable to allocate TX Descriptor memory\n");
1871 		return ENOMEM;
1872 	}
1873 
1874 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1875 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1876 
1877 	/*
1878 	 * Create DMA tag for TX buffers
1879 	 */
1880 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1881 	    1, 0,		/* alignment, bounds */
1882 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1883 	    BUS_SPACE_MAXADDR,	/* highaddr */
1884 	    NULL, NULL,		/* filter, filterarg */
1885 	    IX_TSO_SIZE,	/* maxsize */
1886 	    IX_MAX_SCATTER,	/* nsegments */
1887 	    PAGE_SIZE,		/* maxsegsize */
1888 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1889 	    BUS_DMA_ONEBPAGE,	/* flags */
1890 	    &txr->tx_tag);
1891 	if (error) {
1892 		device_printf(txr->tx_sc->dev,
1893 		    "Unable to allocate TX DMA tag\n");
1894 		kfree(txr->tx_buf, M_DEVBUF);
1895 		txr->tx_buf = NULL;
1896 		return error;
1897 	}
1898 
1899 	/*
1900 	 * Create DMA maps for TX buffers
1901 	 */
1902 	for (i = 0; i < txr->tx_ndesc; ++i) {
1903 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1904 
1905 		error = bus_dmamap_create(txr->tx_tag,
1906 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1907 		if (error) {
1908 			device_printf(txr->tx_sc->dev,
1909 			    "Unable to create TX DMA map\n");
1910 			ix_destroy_tx_ring(txr, i);
1911 			return error;
1912 		}
1913 	}
1914 
1915 	/*
1916 	 * Initialize various watermark
1917 	 */
1918 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1919 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1920 
1921 	return 0;
1922 }
1923 
1924 static void
1925 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1926 {
1927 	int i;
1928 
1929 	if (txr->tx_hdr != NULL) {
1930 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1931 		bus_dmamem_free(txr->tx_hdr_dtag,
1932 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1933 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1934 		txr->tx_hdr = NULL;
1935 	}
1936 
1937 	if (txr->tx_base != NULL) {
1938 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1939 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1940 		    txr->tx_base_map);
1941 		bus_dma_tag_destroy(txr->tx_base_dtag);
1942 		txr->tx_base = NULL;
1943 	}
1944 
1945 	if (txr->tx_buf == NULL)
1946 		return;
1947 
1948 	for (i = 0; i < ndesc; ++i) {
1949 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1950 
1951 		KKASSERT(txbuf->m_head == NULL);
1952 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1953 	}
1954 	bus_dma_tag_destroy(txr->tx_tag);
1955 
1956 	kfree(txr->tx_buf, M_DEVBUF);
1957 	txr->tx_buf = NULL;
1958 }
1959 
1960 static void
1961 ix_init_tx_ring(struct ix_tx_ring *txr)
1962 {
1963 	/* Clear the old ring contents */
1964 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1965 
1966 	/* Clear TX head write-back buffer */
1967 	*(txr->tx_hdr) = 0;
1968 
1969 	/* Reset indices */
1970 	txr->tx_next_avail = 0;
1971 	txr->tx_next_clean = 0;
1972 	txr->tx_nsegs = 0;
1973 
1974 	/* Set number of descriptors available */
1975 	txr->tx_avail = txr->tx_ndesc;
1976 
1977 	/* Enable this TX ring */
1978 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1979 }
1980 
1981 static void
1982 ix_init_tx_unit(struct ix_softc *sc)
1983 {
1984 	struct ixgbe_hw	*hw = &sc->hw;
1985 	int i;
1986 
1987 	/*
1988 	 * Setup the Base and Length of the Tx Descriptor Ring
1989 	 */
1990 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1991 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1992 		uint64_t tdba = txr->tx_base_paddr;
1993 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1994 		uint32_t txctrl;
1995 
1996 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1997 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1998 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1999 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2000 
2001 		/* Setup the HW Tx Head and Tail descriptor pointers */
2002 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2003 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2004 
2005 		/* Disable TX head write-back relax ordering */
2006 		switch (hw->mac.type) {
2007 		case ixgbe_mac_82598EB:
2008 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2009 			break;
2010 		case ixgbe_mac_82599EB:
2011 		case ixgbe_mac_X540:
2012 		default:
2013 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2014 			break;
2015 		}
2016 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2017 		switch (hw->mac.type) {
2018 		case ixgbe_mac_82598EB:
2019 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2020 			break;
2021 		case ixgbe_mac_82599EB:
2022 		case ixgbe_mac_X540:
2023 		default:
2024 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2025 			break;
2026 		}
2027 
2028 		/* Enable TX head write-back */
2029 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2030 		    (uint32_t)(hdr_paddr >> 32));
2031 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2032 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2033 	}
2034 
2035 	if (hw->mac.type != ixgbe_mac_82598EB) {
2036 		uint32_t dmatxctl, rttdcs;
2037 
2038 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2039 		dmatxctl |= IXGBE_DMATXCTL_TE;
2040 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2041 
2042 		/* Disable arbiter to set MTQC */
2043 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2044 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2045 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2046 
2047 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2048 
2049 		/* Reenable aribter */
2050 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2051 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2052 	}
2053 }
2054 
2055 static int
2056 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2057     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2058 {
2059 	struct ixgbe_adv_tx_context_desc *TXD;
2060 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2061 	int ehdrlen, ip_hlen = 0, ctxd;
2062 	boolean_t offload = TRUE;
2063 
2064 	/* First check if TSO is to be used */
2065 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2066 		return ix_tso_ctx_setup(txr, mp,
2067 		    cmd_type_len, olinfo_status);
2068 	}
2069 
2070 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2071 		offload = FALSE;
2072 
2073 	/* Indicate the whole packet as payload when not doing TSO */
2074 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2075 
2076 	/*
2077 	 * In advanced descriptors the vlan tag must be placed into the
2078 	 * context descriptor.  Hence we need to make one even if not
2079 	 * doing checksum offloads.
2080 	 */
2081 	if (mp->m_flags & M_VLANTAG) {
2082 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2083 		    IXGBE_ADVTXD_VLAN_SHIFT;
2084 	} else if (!offload) {
2085 		/* No TX descriptor is consumed */
2086 		return 0;
2087 	}
2088 
2089 	/* Set the ether header length */
2090 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2091 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2092 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2093 
2094 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2095 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2096 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2097 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2098 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2099 	}
2100 	vlan_macip_lens |= ip_hlen;
2101 
2102 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2103 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2104 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2105 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2106 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2107 
2108 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2109 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2110 
2111 	/* Now ready a context descriptor */
2112 	ctxd = txr->tx_next_avail;
2113 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2114 
2115 	/* Now copy bits into descriptor */
2116 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2117 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2118 	TXD->seqnum_seed = htole32(0);
2119 	TXD->mss_l4len_idx = htole32(0);
2120 
2121 	/* We've consumed the first desc, adjust counters */
2122 	if (++ctxd == txr->tx_ndesc)
2123 		ctxd = 0;
2124 	txr->tx_next_avail = ctxd;
2125 	--txr->tx_avail;
2126 
2127 	/* One TX descriptor is consumed */
2128 	return 1;
2129 }
2130 
2131 static int
2132 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2133     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2134 {
2135 	struct ixgbe_adv_tx_context_desc *TXD;
2136 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2137 	uint32_t mss_l4len_idx = 0, paylen;
2138 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2139 
2140 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2141 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2142 
2143 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2144 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2145 
2146 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2147 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2148 
2149 	ctxd = txr->tx_next_avail;
2150 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2151 
2152 	if (mp->m_flags & M_VLANTAG) {
2153 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2154 		    IXGBE_ADVTXD_VLAN_SHIFT;
2155 	}
2156 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2157 	vlan_macip_lens |= ip_hlen;
2158 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2159 
2160 	/* ADV DTYPE TUCMD */
2161 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2162 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2163 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2164 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2165 
2166 	/* MSS L4LEN IDX */
2167 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2168 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2169 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2170 
2171 	TXD->seqnum_seed = htole32(0);
2172 
2173 	if (++ctxd == txr->tx_ndesc)
2174 		ctxd = 0;
2175 
2176 	txr->tx_avail--;
2177 	txr->tx_next_avail = ctxd;
2178 
2179 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2180 
2181 	/* This is used in the transmit desc in encap */
2182 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2183 
2184 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2185 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2186 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2187 
2188 	/* One TX descriptor is consumed */
2189 	return 1;
2190 }
2191 
2192 static void
2193 ix_txeof(struct ix_tx_ring *txr, int hdr)
2194 {
2195 	int first, avail;
2196 
2197 	if (txr->tx_avail == txr->tx_ndesc)
2198 		return;
2199 
2200 	first = txr->tx_next_clean;
2201 	if (first == hdr)
2202 		return;
2203 
2204 	avail = txr->tx_avail;
2205 	while (first != hdr) {
2206 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2207 
2208 		++avail;
2209 		if (txbuf->m_head) {
2210 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2211 			m_freem(txbuf->m_head);
2212 			txbuf->m_head = NULL;
2213 		}
2214 		if (++first == txr->tx_ndesc)
2215 			first = 0;
2216 	}
2217 	txr->tx_next_clean = first;
2218 	txr->tx_avail = avail;
2219 
2220 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2221 		ifsq_clr_oactive(txr->tx_ifsq);
2222 		txr->tx_watchdog.wd_timer = 0;
2223 	}
2224 }
2225 
2226 static int
2227 ix_create_rx_ring(struct ix_rx_ring *rxr)
2228 {
2229 	int i, rsize, error, nrxd;
2230 
2231 	/*
2232 	 * Validate number of receive descriptors.  It must not exceed
2233 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2234 	 */
2235 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2236 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2237 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2238 		device_printf(rxr->rx_sc->dev,
2239 		    "Using %d RX descriptors instead of %d!\n",
2240 		    IX_DEF_RXD, nrxd);
2241 		rxr->rx_ndesc = IX_DEF_RXD;
2242 	} else {
2243 		rxr->rx_ndesc = nrxd;
2244 	}
2245 
2246 	/*
2247 	 * Allocate RX descriptor ring
2248 	 */
2249 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2250 	    IX_DBA_ALIGN);
2251 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2252 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2253 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2254 	if (rxr->rx_base == NULL) {
2255 		device_printf(rxr->rx_sc->dev,
2256 		    "Unable to allocate TX Descriptor memory\n");
2257 		return ENOMEM;
2258 	}
2259 
2260 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2261 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2262 
2263 	/*
2264 	 * Create DMA tag for RX buffers
2265 	 */
2266 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2267 	    1, 0,		/* alignment, bounds */
2268 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2269 	    BUS_SPACE_MAXADDR,	/* highaddr */
2270 	    NULL, NULL,		/* filter, filterarg */
2271 	    PAGE_SIZE,		/* maxsize */
2272 	    1,			/* nsegments */
2273 	    PAGE_SIZE,		/* maxsegsize */
2274 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2275 	    &rxr->rx_tag);
2276 	if (error) {
2277 		device_printf(rxr->rx_sc->dev,
2278 		    "Unable to create RX DMA tag\n");
2279 		kfree(rxr->rx_buf, M_DEVBUF);
2280 		rxr->rx_buf = NULL;
2281 		return error;
2282 	}
2283 
2284 	/*
2285 	 * Create spare DMA map for RX buffers
2286 	 */
2287 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2288 	    &rxr->rx_sparemap);
2289 	if (error) {
2290 		device_printf(rxr->rx_sc->dev,
2291 		    "Unable to create spare RX DMA map\n");
2292 		bus_dma_tag_destroy(rxr->rx_tag);
2293 		kfree(rxr->rx_buf, M_DEVBUF);
2294 		rxr->rx_buf = NULL;
2295 		return error;
2296 	}
2297 
2298 	/*
2299 	 * Create DMA maps for RX buffers
2300 	 */
2301 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2302 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2303 
2304 		error = bus_dmamap_create(rxr->rx_tag,
2305 		    BUS_DMA_WAITOK, &rxbuf->map);
2306 		if (error) {
2307 			device_printf(rxr->rx_sc->dev,
2308 			    "Unable to create RX dma map\n");
2309 			ix_destroy_rx_ring(rxr, i);
2310 			return error;
2311 		}
2312 	}
2313 
2314 	/*
2315 	 * Initialize various watermark
2316 	 */
2317 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2318 
2319 	return 0;
2320 }
2321 
2322 static void
2323 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2324 {
2325 	int i;
2326 
2327 	if (rxr->rx_base != NULL) {
2328 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2329 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2330 		    rxr->rx_base_map);
2331 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2332 		rxr->rx_base = NULL;
2333 	}
2334 
2335 	if (rxr->rx_buf == NULL)
2336 		return;
2337 
2338 	for (i = 0; i < ndesc; ++i) {
2339 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2340 
2341 		KKASSERT(rxbuf->m_head == NULL);
2342 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2343 	}
2344 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2345 	bus_dma_tag_destroy(rxr->rx_tag);
2346 
2347 	kfree(rxr->rx_buf, M_DEVBUF);
2348 	rxr->rx_buf = NULL;
2349 }
2350 
2351 /*
2352 ** Used to detect a descriptor that has
2353 ** been merged by Hardware RSC.
2354 */
2355 static __inline uint32_t
2356 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2357 {
2358 	return (le32toh(rx->wb.lower.lo_dword.data) &
2359 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2360 }
2361 
2362 #if 0
2363 /*********************************************************************
2364  *
2365  *  Initialize Hardware RSC (LRO) feature on 82599
2366  *  for an RX ring, this is toggled by the LRO capability
2367  *  even though it is transparent to the stack.
2368  *
2369  *  NOTE: since this HW feature only works with IPV4 and
2370  *        our testing has shown soft LRO to be as effective
2371  *        I have decided to disable this by default.
2372  *
2373  **********************************************************************/
2374 static void
2375 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2376 {
2377 	struct	ix_softc 	*sc = rxr->rx_sc;
2378 	struct	ixgbe_hw	*hw = &sc->hw;
2379 	uint32_t			rscctrl, rdrxctl;
2380 
2381 #if 0
2382 	/* If turning LRO/RSC off we need to disable it */
2383 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2384 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2385 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2386 		return;
2387 	}
2388 #endif
2389 
2390 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2391 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2392 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2393 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2394 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2395 
2396 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2397 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2398 	/*
2399 	** Limit the total number of descriptors that
2400 	** can be combined, so it does not exceed 64K
2401 	*/
2402 	if (rxr->mbuf_sz == MCLBYTES)
2403 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2404 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2405 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2406 	else if (rxr->mbuf_sz == MJUM9BYTES)
2407 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2408 	else  /* Using 16K cluster */
2409 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2410 
2411 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2412 
2413 	/* Enable TCP header recognition */
2414 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2415 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2416 	    IXGBE_PSRTYPE_TCPHDR));
2417 
2418 	/* Disable RSC for ACK packets */
2419 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2420 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2421 
2422 	rxr->hw_rsc = TRUE;
2423 }
2424 #endif
2425 
2426 static int
2427 ix_init_rx_ring(struct ix_rx_ring *rxr)
2428 {
2429 	int i;
2430 
2431 	/* Clear the ring contents */
2432 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2433 
2434 	/* XXX we need JUMPAGESIZE for RSC too */
2435 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2436 		rxr->rx_mbuf_sz = MCLBYTES;
2437 	else
2438 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2439 
2440 	/* Now replenish the mbufs */
2441 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2442 		int error;
2443 
2444 		error = ix_newbuf(rxr, i, TRUE);
2445 		if (error)
2446 			return error;
2447 	}
2448 
2449 	/* Setup our descriptor indices */
2450 	rxr->rx_next_check = 0;
2451 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2452 
2453 #if 0
2454 	/*
2455 	** Now set up the LRO interface:
2456 	*/
2457 	if (ixgbe_rsc_enable)
2458 		ix_setup_hw_rsc(rxr);
2459 #endif
2460 
2461 	return 0;
2462 }
2463 
2464 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2465 
2466 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2467 
2468 static void
2469 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2470 {
2471 	struct ixgbe_hw	*hw = &sc->hw;
2472 	struct ifnet *ifp = &sc->arpcom.ac_if;
2473 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2474 	int i;
2475 
2476 	/*
2477 	 * Make sure receives are disabled while setting up the descriptor ring
2478 	 */
2479 	ixgbe_disable_rx(hw);
2480 
2481 	/* Enable broadcasts */
2482 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2483 	fctrl |= IXGBE_FCTRL_BAM;
2484 	if (hw->mac.type == ixgbe_mac_82598EB) {
2485 		fctrl |= IXGBE_FCTRL_DPF;
2486 		fctrl |= IXGBE_FCTRL_PMCF;
2487 	}
2488 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2489 
2490 	/* Set for Jumbo Frames? */
2491 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2492 	if (ifp->if_mtu > ETHERMTU)
2493 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2494 	else
2495 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2496 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2497 
2498 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2499 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2500 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2501 
2502 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2503 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2504 		uint64_t rdba = rxr->rx_base_paddr;
2505 		uint32_t srrctl;
2506 
2507 		/* Setup the Base and Length of the Rx Descriptor Ring */
2508 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2509 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2510 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2511 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2512 
2513 		/*
2514 		 * Set up the SRRCTL register
2515 		 */
2516 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2517 
2518 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2519 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2520 		srrctl |= bufsz;
2521 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2522 		if (sc->rx_ring_inuse > 1) {
2523 			/* See the commend near ix_enable_rx_drop() */
2524 			if (sc->ifm_media &
2525 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2526 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2527 				if (i == 0 && bootverbose) {
2528 					if_printf(ifp, "flow control %s, "
2529 					    "disable RX drop\n",
2530 					    ix_ifmedia2str(sc->ifm_media));
2531 				}
2532 			} else {
2533 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2534 				if (i == 0 && bootverbose) {
2535 					if_printf(ifp, "flow control %s, "
2536 					    "enable RX drop\n",
2537 					    ix_ifmedia2str(sc->ifm_media));
2538 				}
2539 			}
2540 		}
2541 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2542 
2543 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2544 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2545 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2546 	}
2547 
2548 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2549 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2550 
2551 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2552 
2553 	/*
2554 	 * Setup RSS
2555 	 */
2556 	if (sc->rx_ring_inuse > 1) {
2557 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2558 		const struct if_ringmap *rm;
2559 		int j, r, nreta, table_nent;
2560 
2561 		/*
2562 		 * NOTE:
2563 		 * When we reach here, RSS has already been disabled
2564 		 * in ix_stop(), so we could safely configure RSS key
2565 		 * and redirect table.
2566 		 */
2567 
2568 		/*
2569 		 * Configure RSS key
2570 		 */
2571 		toeplitz_get_key(key, sizeof(key));
2572 		for (i = 0; i < IX_NRSSRK; ++i) {
2573 			uint32_t rssrk;
2574 
2575 			rssrk = IX_RSSRK_VAL(key, i);
2576 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2577 			    i, rssrk);
2578 
2579 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2580 		}
2581 
2582 		/*
2583 		 * Configure RSS redirect table.
2584 		 */
2585 
2586 		/* Table size will differ based on MAC */
2587 		switch (hw->mac.type) {
2588 		case ixgbe_mac_X550:
2589 		case ixgbe_mac_X550EM_x:
2590 		case ixgbe_mac_X550EM_a:
2591 			nreta = IX_NRETA_X550;
2592 			break;
2593 		default:
2594 			nreta = IX_NRETA;
2595 			break;
2596 		}
2597 
2598 		table_nent = nreta * IX_RETA_SIZE;
2599 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2600 		    ("invalid RETA count %d", nreta));
2601 		if (polling)
2602 			rm = sc->rx_rmap;
2603 		else
2604 			rm = sc->rx_rmap_intr;
2605 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2606 
2607 		r = 0;
2608 		for (j = 0; j < nreta; ++j) {
2609 			uint32_t reta = 0;
2610 
2611 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2612 				uint32_t q;
2613 
2614 				q = sc->rdr_table[r];
2615 				KASSERT(q < sc->rx_ring_inuse,
2616 				    ("invalid RX ring index %d", q));
2617 				reta |= q << (8 * i);
2618 				++r;
2619 			}
2620 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2621 			if (j < IX_NRETA) {
2622 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2623 			} else {
2624 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2625 				    reta);
2626 			}
2627 		}
2628 
2629 		/*
2630 		 * Enable multiple receive queues.
2631 		 * Enable IPv4 RSS standard hash functions.
2632 		 */
2633 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2634 		    IXGBE_MRQC_RSSEN |
2635 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2636 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2637 
2638 		/*
2639 		 * NOTE:
2640 		 * PCSD must be enabled to enable multiple
2641 		 * receive queues.
2642 		 */
2643 		rxcsum |= IXGBE_RXCSUM_PCSD;
2644 	}
2645 
2646 	if (ifp->if_capenable & IFCAP_RXCSUM)
2647 		rxcsum |= IXGBE_RXCSUM_PCSD;
2648 
2649 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2650 }
2651 
2652 static __inline void
2653 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2654 {
2655 	if (--i < 0)
2656 		i = rxr->rx_ndesc - 1;
2657 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2658 }
2659 
2660 static __inline void
2661 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2662 {
2663 	if ((ptype &
2664 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2665 		/* Not IPv4 */
2666 		return;
2667 	}
2668 
2669 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2670 	    IXGBE_RXD_STAT_IPCS)
2671 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2672 
2673 	if ((ptype &
2674 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2675 		/*
2676 		 * - Neither TCP nor UDP
2677 		 * - IPv4 fragment
2678 		 */
2679 		return;
2680 	}
2681 
2682 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2683 	    IXGBE_RXD_STAT_L4CS) {
2684 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2685 		    CSUM_FRAG_NOT_CHECKED;
2686 		mp->m_pkthdr.csum_data = htons(0xffff);
2687 	}
2688 }
2689 
2690 static __inline struct pktinfo *
2691 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2692     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2693 {
2694 	switch (hashtype) {
2695 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2696 		pi->pi_netisr = NETISR_IP;
2697 		pi->pi_flags = 0;
2698 		pi->pi_l3proto = IPPROTO_TCP;
2699 		break;
2700 
2701 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2702 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2703 			/* Not UDP or is fragment */
2704 			return NULL;
2705 		}
2706 		pi->pi_netisr = NETISR_IP;
2707 		pi->pi_flags = 0;
2708 		pi->pi_l3proto = IPPROTO_UDP;
2709 		break;
2710 
2711 	default:
2712 		return NULL;
2713 	}
2714 
2715 	m_sethash(m, toeplitz_hash(hash));
2716 	return pi;
2717 }
2718 
2719 static __inline void
2720 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2721 {
2722 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2723 	rxd->wb.upper.status_error = 0;
2724 }
2725 
2726 static void
2727 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2728 {
2729 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2730 
2731 	/*
2732 	 * XXX discard may not be correct
2733 	 */
2734 	if (eop) {
2735 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2736 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2737 	} else {
2738 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2739 	}
2740 	if (rxbuf->fmp != NULL) {
2741 		m_freem(rxbuf->fmp);
2742 		rxbuf->fmp = NULL;
2743 		rxbuf->lmp = NULL;
2744 	}
2745 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2746 }
2747 
2748 static void
2749 ix_rxeof(struct ix_rx_ring *rxr, int count)
2750 {
2751 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2752 	int i, nsegs = 0, cpuid = mycpuid;
2753 
2754 	i = rxr->rx_next_check;
2755 	while (count != 0) {
2756 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2757 		union ixgbe_adv_rx_desc	*cur;
2758 		struct mbuf *sendmp = NULL, *mp;
2759 		struct pktinfo *pi = NULL, pi0;
2760 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2761 		uint16_t len;
2762 		boolean_t eop;
2763 
2764 		cur = &rxr->rx_base[i];
2765 		staterr = le32toh(cur->wb.upper.status_error);
2766 
2767 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2768 			break;
2769 		++nsegs;
2770 
2771 		rxbuf = &rxr->rx_buf[i];
2772 		mp = rxbuf->m_head;
2773 
2774 		len = le16toh(cur->wb.upper.length);
2775 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2776 		    IXGBE_RXDADV_PKTTYPE_MASK;
2777 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2778 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2779 		    IXGBE_RXDADV_RSSTYPE_MASK;
2780 
2781 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2782 		if (eop)
2783 			--count;
2784 
2785 		/*
2786 		 * Make sure bad packets are discarded
2787 		 */
2788 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2789 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2790 			ix_rx_discard(rxr, i, eop);
2791 			goto next_desc;
2792 		}
2793 
2794 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2795 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2796 			ix_rx_discard(rxr, i, eop);
2797 			goto next_desc;
2798 		}
2799 
2800 		/*
2801 		 * On 82599 which supports a hardware LRO, packets
2802 		 * need not be fragmented across sequential descriptors,
2803 		 * rather the next descriptor is indicated in bits
2804 		 * of the descriptor.  This also means that we might
2805 		 * proceses more than one packet at a time, something
2806 		 * that has never been true before, it required
2807 		 * eliminating global chain pointers in favor of what
2808 		 * we are doing here.
2809 		 */
2810 		if (!eop) {
2811 			int nextp;
2812 
2813 			/*
2814 			 * Figure out the next descriptor
2815 			 * of this frame.
2816 			 */
2817 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2818 				rsc = ix_rsc_count(cur);
2819 			if (rsc) { /* Get hardware index */
2820 				nextp = ((staterr &
2821 				    IXGBE_RXDADV_NEXTP_MASK) >>
2822 				    IXGBE_RXDADV_NEXTP_SHIFT);
2823 			} else { /* Just sequential */
2824 				nextp = i + 1;
2825 				if (nextp == rxr->rx_ndesc)
2826 					nextp = 0;
2827 			}
2828 			nbuf = &rxr->rx_buf[nextp];
2829 			prefetch(nbuf);
2830 		}
2831 		mp->m_len = len;
2832 
2833 		/*
2834 		 * Rather than using the fmp/lmp global pointers
2835 		 * we now keep the head of a packet chain in the
2836 		 * buffer struct and pass this along from one
2837 		 * descriptor to the next, until we get EOP.
2838 		 */
2839 		if (rxbuf->fmp == NULL) {
2840 			mp->m_pkthdr.len = len;
2841 			rxbuf->fmp = mp;
2842 			rxbuf->lmp = mp;
2843 		} else {
2844 			rxbuf->fmp->m_pkthdr.len += len;
2845 			rxbuf->lmp->m_next = mp;
2846 			rxbuf->lmp = mp;
2847 		}
2848 
2849 		if (nbuf != NULL) {
2850 			/*
2851 			 * Not the last fragment of this frame,
2852 			 * pass this fragment list on
2853 			 */
2854 			nbuf->fmp = rxbuf->fmp;
2855 			nbuf->lmp = rxbuf->lmp;
2856 		} else {
2857 			/*
2858 			 * Send this frame
2859 			 */
2860 			sendmp = rxbuf->fmp;
2861 
2862 			sendmp->m_pkthdr.rcvif = ifp;
2863 			IFNET_STAT_INC(ifp, ipackets, 1);
2864 #ifdef IX_RSS_DEBUG
2865 			rxr->rx_pkts++;
2866 #endif
2867 
2868 			/* Process vlan info */
2869 			if (staterr & IXGBE_RXD_STAT_VP) {
2870 				sendmp->m_pkthdr.ether_vlantag =
2871 				    le16toh(cur->wb.upper.vlan);
2872 				sendmp->m_flags |= M_VLANTAG;
2873 			}
2874 			if (ifp->if_capenable & IFCAP_RXCSUM)
2875 				ix_rxcsum(staterr, sendmp, ptype);
2876 			if (ifp->if_capenable & IFCAP_RSS) {
2877 				pi = ix_rssinfo(sendmp, &pi0,
2878 				    hash, hashtype, ptype);
2879 			}
2880 		}
2881 		rxbuf->fmp = NULL;
2882 		rxbuf->lmp = NULL;
2883 next_desc:
2884 		/* Advance our pointers to the next descriptor. */
2885 		if (++i == rxr->rx_ndesc)
2886 			i = 0;
2887 
2888 		if (sendmp != NULL)
2889 			ifp->if_input(ifp, sendmp, pi, cpuid);
2890 
2891 		if (nsegs >= rxr->rx_wreg_nsegs) {
2892 			ix_rx_refresh(rxr, i);
2893 			nsegs = 0;
2894 		}
2895 	}
2896 	rxr->rx_next_check = i;
2897 
2898 	if (nsegs > 0)
2899 		ix_rx_refresh(rxr, i);
2900 }
2901 
2902 static void
2903 ix_set_vlan(struct ix_softc *sc)
2904 {
2905 	struct ixgbe_hw *hw = &sc->hw;
2906 	uint32_t ctrl;
2907 
2908 	if (hw->mac.type == ixgbe_mac_82598EB) {
2909 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2910 		ctrl |= IXGBE_VLNCTRL_VME;
2911 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2912 	} else {
2913 		int i;
2914 
2915 		/*
2916 		 * On 82599 and later chips the VLAN enable is
2917 		 * per queue in RXDCTL
2918 		 */
2919 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2920 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2921 			ctrl |= IXGBE_RXDCTL_VME;
2922 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2923 		}
2924 	}
2925 }
2926 
2927 static void
2928 ix_enable_intr(struct ix_softc *sc)
2929 {
2930 	struct ixgbe_hw	*hw = &sc->hw;
2931 	uint32_t fwsm;
2932 	int i;
2933 
2934 	for (i = 0; i < sc->intr_cnt; ++i)
2935 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2936 
2937 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2938 
2939 	/* Enable Fan Failure detection */
2940 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2941 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2942 
2943 	switch (hw->mac.type) {
2944 	case ixgbe_mac_82599EB:
2945 		sc->intr_mask |= IXGBE_EIMS_ECC;
2946 		/* Temperature sensor on some adapters */
2947 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2948 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
2949 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2950 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2951 		break;
2952 
2953 	case ixgbe_mac_X540:
2954 		sc->intr_mask |= IXGBE_EIMS_ECC;
2955 		/* Detect if Thermal Sensor is enabled */
2956 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2957 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2958 			sc->intr_mask |= IXGBE_EIMS_TS;
2959 		break;
2960 
2961 	case ixgbe_mac_X550:
2962 	case ixgbe_mac_X550EM_a:
2963 	case ixgbe_mac_X550EM_x:
2964 		sc->intr_mask |= IXGBE_EIMS_ECC;
2965 		/* MAC thermal sensor is automatically enabled */
2966 		sc->intr_mask |= IXGBE_EIMS_TS;
2967 		/* Some devices use SDP0 for important information */
2968 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2969 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2970 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2971 		/* FALL THROUGH */
2972 	default:
2973 		break;
2974 	}
2975 
2976 	/* With MSI-X we use auto clear for RX and TX rings */
2977 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2978 		/*
2979 		 * There are no EIAC1/EIAC2 for newer chips; the related
2980 		 * bits for TX and RX rings > 16 are always auto clear.
2981 		 *
2982 		 * XXX which bits?  There are _no_ documented EICR1 and
2983 		 * EICR2 at all; only EICR.
2984 		 */
2985 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2986 	} else {
2987 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2988 
2989 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2990 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2991 			sc->intr_mask |= IX_RX1_INTR_MASK;
2992 	}
2993 
2994 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2995 
2996 	/*
2997 	 * Enable RX and TX rings for MSI-X
2998 	 */
2999 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3000 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3001 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3002 
3003 			if (txr->tx_intr_vec >= 0) {
3004 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3005 				    txr->tx_eims_val);
3006 			}
3007 		}
3008 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3009 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3010 
3011 			KKASSERT(rxr->rx_intr_vec >= 0);
3012 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3013 		}
3014 	}
3015 
3016 	IXGBE_WRITE_FLUSH(hw);
3017 }
3018 
3019 static void
3020 ix_disable_intr(struct ix_softc *sc)
3021 {
3022 	int i;
3023 
3024 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3025 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3026 
3027 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3028 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3029 	} else {
3030 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3031 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3032 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3033 	}
3034 	IXGBE_WRITE_FLUSH(&sc->hw);
3035 
3036 	for (i = 0; i < sc->intr_cnt; ++i)
3037 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3038 }
3039 
3040 uint16_t
3041 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3042 {
3043 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3044 	    reg, 2);
3045 }
3046 
3047 void
3048 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3049 {
3050 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3051 	    reg, value, 2);
3052 }
3053 
3054 static void
3055 ix_slot_info(struct ix_softc *sc)
3056 {
3057 	struct ixgbe_hw *hw = &sc->hw;
3058 	device_t dev = sc->dev;
3059 	struct ixgbe_mac_info *mac = &hw->mac;
3060 	uint16_t link;
3061 	uint32_t offset;
3062 
3063 	/* For most devices simply call the shared code routine */
3064 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3065 		ixgbe_get_bus_info(hw);
3066 		/* These devices don't use PCI-E */
3067 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3068 		    hw->mac.type == ixgbe_mac_X550EM_a)
3069 			return;
3070 		goto display;
3071 	}
3072 
3073 	/*
3074 	 * For the Quad port adapter we need to parse back
3075 	 * up the PCI tree to find the speed of the expansion
3076 	 * slot into which this adapter is plugged. A bit more work.
3077 	 */
3078 	dev = device_get_parent(device_get_parent(dev));
3079 #ifdef IXGBE_DEBUG
3080 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3081 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3082 #endif
3083 	dev = device_get_parent(device_get_parent(dev));
3084 #ifdef IXGBE_DEBUG
3085 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3086 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3087 #endif
3088 	/* Now get the PCI Express Capabilities offset */
3089 	offset = pci_get_pciecap_ptr(dev);
3090 	/* ...and read the Link Status Register */
3091 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3092 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3093 	case IXGBE_PCI_LINK_WIDTH_1:
3094 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3095 		break;
3096 	case IXGBE_PCI_LINK_WIDTH_2:
3097 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3098 		break;
3099 	case IXGBE_PCI_LINK_WIDTH_4:
3100 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3101 		break;
3102 	case IXGBE_PCI_LINK_WIDTH_8:
3103 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3104 		break;
3105 	default:
3106 		hw->bus.width = ixgbe_bus_width_unknown;
3107 		break;
3108 	}
3109 
3110 	switch (link & IXGBE_PCI_LINK_SPEED) {
3111 	case IXGBE_PCI_LINK_SPEED_2500:
3112 		hw->bus.speed = ixgbe_bus_speed_2500;
3113 		break;
3114 	case IXGBE_PCI_LINK_SPEED_5000:
3115 		hw->bus.speed = ixgbe_bus_speed_5000;
3116 		break;
3117 	case IXGBE_PCI_LINK_SPEED_8000:
3118 		hw->bus.speed = ixgbe_bus_speed_8000;
3119 		break;
3120 	default:
3121 		hw->bus.speed = ixgbe_bus_speed_unknown;
3122 		break;
3123 	}
3124 
3125 	mac->ops.set_lan_id(hw);
3126 
3127 display:
3128 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3129 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3130 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3131 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3132 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3133 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3134 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3135 
3136 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3137 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3138 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3139 		device_printf(dev, "For optimal performance a x8 "
3140 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3141 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3142 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3143 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3144 		device_printf(dev, "For optimal performance a x8 "
3145 		    "PCIE Gen3 slot is required.\n");
3146 	}
3147 }
3148 
3149 /*
3150  * TODO comment is incorrect
3151  *
3152  * Setup the correct IVAR register for a particular MSIX interrupt
3153  * - entry is the register array entry
3154  * - vector is the MSIX vector for this queue
3155  * - type is RX/TX/MISC
3156  */
3157 static void
3158 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3159     int8_t type)
3160 {
3161 	struct ixgbe_hw *hw = &sc->hw;
3162 	uint32_t ivar, index;
3163 
3164 	vector |= IXGBE_IVAR_ALLOC_VAL;
3165 
3166 	switch (hw->mac.type) {
3167 	case ixgbe_mac_82598EB:
3168 		if (type == -1)
3169 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3170 		else
3171 			entry += (type * 64);
3172 		index = (entry >> 2) & 0x1F;
3173 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3174 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3175 		ivar |= (vector << (8 * (entry & 0x3)));
3176 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3177 		break;
3178 
3179 	case ixgbe_mac_82599EB:
3180 	case ixgbe_mac_X540:
3181 	case ixgbe_mac_X550:
3182 	case ixgbe_mac_X550EM_a:
3183 	case ixgbe_mac_X550EM_x:
3184 		if (type == -1) { /* MISC IVAR */
3185 			index = (entry & 1) * 8;
3186 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3187 			ivar &= ~(0xFF << index);
3188 			ivar |= (vector << index);
3189 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3190 		} else {	/* RX/TX IVARS */
3191 			index = (16 * (entry & 1)) + (8 * type);
3192 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3193 			ivar &= ~(0xFF << index);
3194 			ivar |= (vector << index);
3195 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3196 		}
3197 		/* FALL THROUGH */
3198 	default:
3199 		break;
3200 	}
3201 }
3202 
3203 static boolean_t
3204 ix_sfp_probe(struct ix_softc *sc)
3205 {
3206 	struct ixgbe_hw	*hw = &sc->hw;
3207 
3208 	if (hw->phy.type == ixgbe_phy_nl &&
3209 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3210 		int32_t ret;
3211 
3212 		ret = hw->phy.ops.identify_sfp(hw);
3213 		if (ret)
3214 			return FALSE;
3215 
3216 		ret = hw->phy.ops.reset(hw);
3217 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3218 			if_printf(&sc->arpcom.ac_if,
3219 			     "Unsupported SFP+ module detected!  "
3220 			     "Reload driver with supported module.\n");
3221 			sc->sfp_probe = FALSE;
3222 			return FALSE;
3223 		}
3224 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3225 
3226 		/* We now have supported optics */
3227 		sc->sfp_probe = FALSE;
3228 
3229 		return TRUE;
3230 	}
3231 	return FALSE;
3232 }
3233 
3234 static void
3235 ix_handle_link(struct ix_softc *sc)
3236 {
3237 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3238 	ix_update_link_status(sc);
3239 }
3240 
3241 /*
3242  * Handling SFP module
3243  */
3244 static void
3245 ix_handle_mod(struct ix_softc *sc)
3246 {
3247 	struct ixgbe_hw *hw = &sc->hw;
3248 	uint32_t err;
3249 
3250 	err = hw->phy.ops.identify_sfp(hw);
3251 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3252 		if_printf(&sc->arpcom.ac_if,
3253 		    "Unsupported SFP+ module type was detected.\n");
3254 		return;
3255 	}
3256 	err = hw->mac.ops.setup_sfp(hw);
3257 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3258 		if_printf(&sc->arpcom.ac_if,
3259 		    "Setup failure - unsupported SFP+ module type.\n");
3260 		return;
3261 	}
3262 	ix_handle_msf(sc);
3263 }
3264 
3265 /*
3266  * Handling MSF (multispeed fiber)
3267  */
3268 static void
3269 ix_handle_msf(struct ix_softc *sc)
3270 {
3271 	struct ixgbe_hw *hw = &sc->hw;
3272 	uint32_t autoneg;
3273 
3274 	hw->phy.ops.identify_sfp(hw);
3275 	ix_init_media(sc);
3276 
3277 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3278 		autoneg = sc->advspeed;
3279 	else
3280 		autoneg = hw->phy.autoneg_advertised;
3281 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3282 		bool negotiate;
3283 
3284 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3285 	}
3286 	if (hw->mac.ops.setup_link != NULL)
3287 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3288 }
3289 
3290 static void
3291 ix_handle_phy(struct ix_softc *sc)
3292 {
3293 	struct ixgbe_hw *hw = &sc->hw;
3294 	int error;
3295 
3296 	error = hw->phy.ops.handle_lasi(hw);
3297 	if (error == IXGBE_ERR_OVERTEMP) {
3298 		if_printf(&sc->arpcom.ac_if,
3299 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3300 		    "PHY will downshift to lower power state!\n");
3301 	} else if (error) {
3302 		if_printf(&sc->arpcom.ac_if,
3303 		    "Error handling LASI interrupt: %d\n", error);
3304 	}
3305 }
3306 
3307 static void
3308 ix_update_stats(struct ix_softc *sc)
3309 {
3310 	struct ifnet *ifp = &sc->arpcom.ac_if;
3311 	struct ixgbe_hw *hw = &sc->hw;
3312 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3313 	uint64_t total_missed_rx = 0;
3314 	int i;
3315 
3316 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3317 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3318 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3319 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3320 
3321 	for (i = 0; i < 16; i++) {
3322 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3323 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3324 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3325 	}
3326 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3327 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3328 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3329 
3330 	/* Hardware workaround, gprc counts missed packets */
3331 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3332 	sc->stats.gprc -= missed_rx;
3333 
3334 	if (hw->mac.type != ixgbe_mac_82598EB) {
3335 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3336 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3337 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3338 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3339 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3340 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3341 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3342 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3343 	} else {
3344 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3345 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3346 		/* 82598 only has a counter in the high register */
3347 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3348 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3349 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3350 	}
3351 
3352 	/*
3353 	 * Workaround: mprc hardware is incorrectly counting
3354 	 * broadcasts, so for now we subtract those.
3355 	 */
3356 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3357 	sc->stats.bprc += bprc;
3358 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3359 	if (hw->mac.type == ixgbe_mac_82598EB)
3360 		sc->stats.mprc -= bprc;
3361 
3362 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3363 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3364 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3365 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3366 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3367 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3368 
3369 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3370 	sc->stats.lxontxc += lxon;
3371 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3372 	sc->stats.lxofftxc += lxoff;
3373 	total = lxon + lxoff;
3374 
3375 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3376 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3377 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3378 	sc->stats.gptc -= total;
3379 	sc->stats.mptc -= total;
3380 	sc->stats.ptc64 -= total;
3381 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3382 
3383 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3384 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3385 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3386 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3387 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3388 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3389 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3390 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3391 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3392 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3393 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3394 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3395 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3396 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3397 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3398 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3399 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3400 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3401 	/* Only read FCOE on 82599 */
3402 	if (hw->mac.type != ixgbe_mac_82598EB) {
3403 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3404 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3405 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3406 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3407 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3408 	}
3409 
3410 	/* Rx Errors */
3411 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3412 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3413 }
3414 
3415 #if 0
3416 /*
3417  * Add sysctl variables, one per statistic, to the system.
3418  */
3419 static void
3420 ix_add_hw_stats(struct ix_softc *sc)
3421 {
3422 
3423 	device_t dev = sc->dev;
3424 
3425 	struct ix_tx_ring *txr = sc->tx_rings;
3426 	struct ix_rx_ring *rxr = sc->rx_rings;
3427 
3428 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3429 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3430 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3431 	struct ixgbe_hw_stats *stats = &sc->stats;
3432 
3433 	struct sysctl_oid *stat_node, *queue_node;
3434 	struct sysctl_oid_list *stat_list, *queue_list;
3435 
3436 #define QUEUE_NAME_LEN 32
3437 	char namebuf[QUEUE_NAME_LEN];
3438 
3439 	/* MAC stats get the own sub node */
3440 
3441 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3442 				    CTLFLAG_RD, NULL, "MAC Statistics");
3443 	stat_list = SYSCTL_CHILDREN(stat_node);
3444 
3445 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3446 			CTLFLAG_RD, &stats->crcerrs,
3447 			"CRC Errors");
3448 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3449 			CTLFLAG_RD, &stats->illerrc,
3450 			"Illegal Byte Errors");
3451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3452 			CTLFLAG_RD, &stats->errbc,
3453 			"Byte Errors");
3454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3455 			CTLFLAG_RD, &stats->mspdc,
3456 			"MAC Short Packets Discarded");
3457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3458 			CTLFLAG_RD, &stats->mlfc,
3459 			"MAC Local Faults");
3460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3461 			CTLFLAG_RD, &stats->mrfc,
3462 			"MAC Remote Faults");
3463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3464 			CTLFLAG_RD, &stats->rlec,
3465 			"Receive Length Errors");
3466 
3467 	/* Flow Control stats */
3468 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3469 			CTLFLAG_RD, &stats->lxontxc,
3470 			"Link XON Transmitted");
3471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3472 			CTLFLAG_RD, &stats->lxonrxc,
3473 			"Link XON Received");
3474 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3475 			CTLFLAG_RD, &stats->lxofftxc,
3476 			"Link XOFF Transmitted");
3477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3478 			CTLFLAG_RD, &stats->lxoffrxc,
3479 			"Link XOFF Received");
3480 
3481 	/* Packet Reception Stats */
3482 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3483 			CTLFLAG_RD, &stats->tor,
3484 			"Total Octets Received");
3485 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3486 			CTLFLAG_RD, &stats->gorc,
3487 			"Good Octets Received");
3488 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3489 			CTLFLAG_RD, &stats->tpr,
3490 			"Total Packets Received");
3491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3492 			CTLFLAG_RD, &stats->gprc,
3493 			"Good Packets Received");
3494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3495 			CTLFLAG_RD, &stats->mprc,
3496 			"Multicast Packets Received");
3497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3498 			CTLFLAG_RD, &stats->bprc,
3499 			"Broadcast Packets Received");
3500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3501 			CTLFLAG_RD, &stats->prc64,
3502 			"64 byte frames received ");
3503 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3504 			CTLFLAG_RD, &stats->prc127,
3505 			"65-127 byte frames received");
3506 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3507 			CTLFLAG_RD, &stats->prc255,
3508 			"128-255 byte frames received");
3509 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3510 			CTLFLAG_RD, &stats->prc511,
3511 			"256-511 byte frames received");
3512 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3513 			CTLFLAG_RD, &stats->prc1023,
3514 			"512-1023 byte frames received");
3515 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3516 			CTLFLAG_RD, &stats->prc1522,
3517 			"1023-1522 byte frames received");
3518 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3519 			CTLFLAG_RD, &stats->ruc,
3520 			"Receive Undersized");
3521 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3522 			CTLFLAG_RD, &stats->rfc,
3523 			"Fragmented Packets Received ");
3524 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3525 			CTLFLAG_RD, &stats->roc,
3526 			"Oversized Packets Received");
3527 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3528 			CTLFLAG_RD, &stats->rjc,
3529 			"Received Jabber");
3530 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3531 			CTLFLAG_RD, &stats->mngprc,
3532 			"Management Packets Received");
3533 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3534 			CTLFLAG_RD, &stats->mngptc,
3535 			"Management Packets Dropped");
3536 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3537 			CTLFLAG_RD, &stats->xec,
3538 			"Checksum Errors");
3539 
3540 	/* Packet Transmission Stats */
3541 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3542 			CTLFLAG_RD, &stats->gotc,
3543 			"Good Octets Transmitted");
3544 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3545 			CTLFLAG_RD, &stats->tpt,
3546 			"Total Packets Transmitted");
3547 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3548 			CTLFLAG_RD, &stats->gptc,
3549 			"Good Packets Transmitted");
3550 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3551 			CTLFLAG_RD, &stats->bptc,
3552 			"Broadcast Packets Transmitted");
3553 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3554 			CTLFLAG_RD, &stats->mptc,
3555 			"Multicast Packets Transmitted");
3556 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3557 			CTLFLAG_RD, &stats->mngptc,
3558 			"Management Packets Transmitted");
3559 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3560 			CTLFLAG_RD, &stats->ptc64,
3561 			"64 byte frames transmitted ");
3562 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3563 			CTLFLAG_RD, &stats->ptc127,
3564 			"65-127 byte frames transmitted");
3565 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3566 			CTLFLAG_RD, &stats->ptc255,
3567 			"128-255 byte frames transmitted");
3568 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3569 			CTLFLAG_RD, &stats->ptc511,
3570 			"256-511 byte frames transmitted");
3571 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3572 			CTLFLAG_RD, &stats->ptc1023,
3573 			"512-1023 byte frames transmitted");
3574 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3575 			CTLFLAG_RD, &stats->ptc1522,
3576 			"1024-1522 byte frames transmitted");
3577 }
3578 #endif
3579 
3580 /*
3581  * Enable the hardware to drop packets when the buffer is full.
3582  * This is useful when multiple RX rings are used, so that no
3583  * single RX ring being full stalls the entire RX engine.  We
3584  * only enable this when multiple RX rings are used and when
3585  * flow control is disabled.
3586  */
3587 static void
3588 ix_enable_rx_drop(struct ix_softc *sc)
3589 {
3590 	struct ixgbe_hw *hw = &sc->hw;
3591 	int i;
3592 
3593 	if (bootverbose) {
3594 		if_printf(&sc->arpcom.ac_if,
3595 		    "flow control %s, enable RX drop\n",
3596 		    ix_fc2str(sc->hw.fc.current_mode));
3597 	}
3598 
3599 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3600 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3601 
3602 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3603 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3604 	}
3605 }
3606 
3607 static void
3608 ix_disable_rx_drop(struct ix_softc *sc)
3609 {
3610 	struct ixgbe_hw *hw = &sc->hw;
3611 	int i;
3612 
3613 	if (bootverbose) {
3614 		if_printf(&sc->arpcom.ac_if,
3615 		    "flow control %s, disable RX drop\n",
3616 		    ix_fc2str(sc->hw.fc.current_mode));
3617 	}
3618 
3619 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3620 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3621 
3622 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3623 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3624 	}
3625 }
3626 
3627 static void
3628 ix_setup_serialize(struct ix_softc *sc)
3629 {
3630 	int i = 0, j;
3631 
3632 	/* Main + RX + TX */
3633 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3634 	sc->serializes =
3635 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3636 	        M_DEVBUF, M_WAITOK | M_ZERO);
3637 
3638 	/*
3639 	 * Setup serializes
3640 	 *
3641 	 * NOTE: Order is critical
3642 	 */
3643 
3644 	KKASSERT(i < sc->nserialize);
3645 	sc->serializes[i++] = &sc->main_serialize;
3646 
3647 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3648 		KKASSERT(i < sc->nserialize);
3649 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3650 	}
3651 
3652 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3653 		KKASSERT(i < sc->nserialize);
3654 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3655 	}
3656 
3657 	KKASSERT(i == sc->nserialize);
3658 }
3659 
3660 static int
3661 ix_alloc_intr(struct ix_softc *sc)
3662 {
3663 	struct ix_intr_data *intr;
3664 	struct ix_tx_ring *txr;
3665 	u_int intr_flags;
3666 	int i;
3667 
3668 	ix_alloc_msix(sc);
3669 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3670 		ix_set_ring_inuse(sc, FALSE);
3671 		goto done;
3672 	}
3673 
3674 	/*
3675 	 * Reset some settings changed by ix_alloc_msix().
3676 	 */
3677 	if (sc->rx_rmap_intr != NULL) {
3678 		if_ringmap_free(sc->rx_rmap_intr);
3679 		sc->rx_rmap_intr = NULL;
3680 	}
3681 	if (sc->tx_rmap_intr != NULL) {
3682 		if_ringmap_free(sc->tx_rmap_intr);
3683 		sc->tx_rmap_intr = NULL;
3684 	}
3685 	if (sc->intr_data != NULL) {
3686 		kfree(sc->intr_data, M_DEVBUF);
3687 		sc->intr_data = NULL;
3688 	}
3689 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3690 		txr = &sc->tx_rings[i];
3691 		txr->tx_intr_vec = -1;
3692 		txr->tx_intr_cpuid = -1;
3693 	}
3694 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3695 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3696 
3697 		rxr->rx_intr_vec = -1;
3698 		rxr->rx_txr = NULL;
3699 	}
3700 
3701 	sc->intr_cnt = 1;
3702 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3703 	    M_WAITOK | M_ZERO);
3704 	intr = &sc->intr_data[0];
3705 
3706 	/*
3707 	 * Allocate MSI/legacy interrupt resource
3708 	 */
3709 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3710 	    &intr->intr_rid, &intr_flags);
3711 
3712 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3713 	    &intr->intr_rid, intr_flags);
3714 	if (intr->intr_res == NULL) {
3715 		device_printf(sc->dev, "Unable to allocate bus resource: "
3716 		    "interrupt\n");
3717 		return ENXIO;
3718 	}
3719 
3720 	intr->intr_serialize = &sc->main_serialize;
3721 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3722 	intr->intr_func = ix_intr;
3723 	intr->intr_funcarg = sc;
3724 	intr->intr_rate = IX_INTR_RATE;
3725 	intr->intr_use = IX_INTR_USE_RXTX;
3726 
3727 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3728 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3729 
3730 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3731 
3732 	ix_set_ring_inuse(sc, FALSE);
3733 
3734 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3735 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3736 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3737 
3738 		/*
3739 		 * Allocate RX ring map for RSS setup.
3740 		 */
3741 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3742 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3743 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3744 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3745 	}
3746 done:
3747 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3748 		txr = &sc->tx_rings[i];
3749 		if (txr->tx_intr_cpuid < 0)
3750 			txr->tx_intr_cpuid = 0;
3751 	}
3752 	return 0;
3753 }
3754 
3755 static void
3756 ix_free_intr(struct ix_softc *sc)
3757 {
3758 	if (sc->intr_data == NULL)
3759 		return;
3760 
3761 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3762 		struct ix_intr_data *intr = &sc->intr_data[0];
3763 
3764 		KKASSERT(sc->intr_cnt == 1);
3765 		if (intr->intr_res != NULL) {
3766 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3767 			    intr->intr_rid, intr->intr_res);
3768 		}
3769 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3770 			pci_release_msi(sc->dev);
3771 
3772 		kfree(sc->intr_data, M_DEVBUF);
3773 	} else {
3774 		ix_free_msix(sc, TRUE);
3775 	}
3776 }
3777 
3778 static void
3779 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3780 {
3781 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3782 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3783 	if (bootverbose) {
3784 		if_printf(&sc->arpcom.ac_if,
3785 		    "RX rings %d/%d, TX rings %d/%d\n",
3786 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3787 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3788 	}
3789 }
3790 
3791 static int
3792 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3793 {
3794 	if (!IX_ENABLE_HWRSS(sc))
3795 		return 1;
3796 
3797 	if (polling)
3798 		return sc->rx_ring_cnt;
3799 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3800 		return IX_MIN_RXRING_RSS;
3801 	else
3802 		return sc->rx_ring_msix;
3803 }
3804 
3805 static int
3806 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3807 {
3808 	if (!IX_ENABLE_HWTSS(sc))
3809 		return 1;
3810 
3811 	if (polling)
3812 		return sc->tx_ring_cnt;
3813 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3814 		return 1;
3815 	else
3816 		return sc->tx_ring_msix;
3817 }
3818 
3819 static int
3820 ix_setup_intr(struct ix_softc *sc)
3821 {
3822 	int i;
3823 
3824 	for (i = 0; i < sc->intr_cnt; ++i) {
3825 		struct ix_intr_data *intr = &sc->intr_data[i];
3826 		int error;
3827 
3828 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3829 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3830 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3831 		if (error) {
3832 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3833 			ix_teardown_intr(sc, i);
3834 			return error;
3835 		}
3836 	}
3837 	return 0;
3838 }
3839 
3840 static void
3841 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3842 {
3843 	int i;
3844 
3845 	if (sc->intr_data == NULL)
3846 		return;
3847 
3848 	for (i = 0; i < intr_cnt; ++i) {
3849 		struct ix_intr_data *intr = &sc->intr_data[i];
3850 
3851 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3852 	}
3853 }
3854 
3855 static void
3856 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3857 {
3858 	struct ix_softc *sc = ifp->if_softc;
3859 
3860 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3861 }
3862 
3863 static void
3864 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3865 {
3866 	struct ix_softc *sc = ifp->if_softc;
3867 
3868 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3869 }
3870 
3871 static int
3872 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3873 {
3874 	struct ix_softc *sc = ifp->if_softc;
3875 
3876 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3877 }
3878 
3879 #ifdef INVARIANTS
3880 
3881 static void
3882 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3883     boolean_t serialized)
3884 {
3885 	struct ix_softc *sc = ifp->if_softc;
3886 
3887 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3888 	    serialized);
3889 }
3890 
3891 #endif	/* INVARIANTS */
3892 
3893 static void
3894 ix_free_rings(struct ix_softc *sc)
3895 {
3896 	int i;
3897 
3898 	if (sc->tx_rings != NULL) {
3899 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3900 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3901 
3902 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3903 		}
3904 		kfree(sc->tx_rings, M_DEVBUF);
3905 	}
3906 
3907 	if (sc->rx_rings != NULL) {
3908 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3909 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3910 
3911 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3912 		}
3913 		kfree(sc->rx_rings, M_DEVBUF);
3914 	}
3915 
3916 	if (sc->parent_tag != NULL)
3917 		bus_dma_tag_destroy(sc->parent_tag);
3918 }
3919 
3920 static void
3921 ix_watchdog(struct ifaltq_subque *ifsq)
3922 {
3923 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3924 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3925 	struct ix_softc *sc = ifp->if_softc;
3926 	int i;
3927 
3928 	KKASSERT(txr->tx_ifsq == ifsq);
3929 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3930 
3931 	/*
3932 	 * If the interface has been paused then don't do the watchdog check
3933 	 */
3934 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3935 		txr->tx_watchdog.wd_timer = 5;
3936 		return;
3937 	}
3938 
3939 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3940 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3941 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3942 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3943 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3944 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3945 
3946 	ix_init(sc);
3947 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3948 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3949 }
3950 
3951 static void
3952 ix_free_tx_ring(struct ix_tx_ring *txr)
3953 {
3954 	int i;
3955 
3956 	for (i = 0; i < txr->tx_ndesc; ++i) {
3957 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3958 
3959 		if (txbuf->m_head != NULL) {
3960 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3961 			m_freem(txbuf->m_head);
3962 			txbuf->m_head = NULL;
3963 		}
3964 	}
3965 }
3966 
3967 static void
3968 ix_free_rx_ring(struct ix_rx_ring *rxr)
3969 {
3970 	int i;
3971 
3972 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3973 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3974 
3975 		if (rxbuf->fmp != NULL) {
3976 			m_freem(rxbuf->fmp);
3977 			rxbuf->fmp = NULL;
3978 			rxbuf->lmp = NULL;
3979 		} else {
3980 			KKASSERT(rxbuf->lmp == NULL);
3981 		}
3982 		if (rxbuf->m_head != NULL) {
3983 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3984 			m_freem(rxbuf->m_head);
3985 			rxbuf->m_head = NULL;
3986 		}
3987 	}
3988 }
3989 
3990 static int
3991 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3992 {
3993 	struct mbuf *m;
3994 	bus_dma_segment_t seg;
3995 	bus_dmamap_t map;
3996 	struct ix_rx_buf *rxbuf;
3997 	int flags, error, nseg;
3998 
3999 	flags = M_NOWAIT;
4000 	if (__predict_false(wait))
4001 		flags = M_WAITOK;
4002 
4003 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4004 	if (m == NULL) {
4005 		if (wait) {
4006 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4007 			    "Unable to allocate RX mbuf\n");
4008 		}
4009 		return ENOBUFS;
4010 	}
4011 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4012 
4013 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4014 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4015 	if (error) {
4016 		m_freem(m);
4017 		if (wait) {
4018 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4019 			    "Unable to load RX mbuf\n");
4020 		}
4021 		return error;
4022 	}
4023 
4024 	rxbuf = &rxr->rx_buf[i];
4025 	if (rxbuf->m_head != NULL)
4026 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4027 
4028 	map = rxbuf->map;
4029 	rxbuf->map = rxr->rx_sparemap;
4030 	rxr->rx_sparemap = map;
4031 
4032 	rxbuf->m_head = m;
4033 	rxbuf->paddr = seg.ds_addr;
4034 
4035 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4036 	return 0;
4037 }
4038 
4039 static void
4040 ix_add_sysctl(struct ix_softc *sc)
4041 {
4042 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4043 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4044 #ifdef IX_RSS_DEBUG
4045 	char node[32];
4046 	int i;
4047 #endif
4048 
4049 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4050 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4051 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4052 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4053 	    "# of RX rings used");
4054 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4055 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4056 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4057 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4058 	    "# of TX rings used");
4059 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4060 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4061 	    sc, 0, ix_sysctl_rxd, "I",
4062 	    "# of RX descs");
4063 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4064 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4065 	    sc, 0, ix_sysctl_txd, "I",
4066 	    "# of TX descs");
4067 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4068 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4069 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4070 	    "# of segments sent before write to hardware register");
4071 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4072 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4073 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4074 	    "# of received segments sent before write to hardware register");
4075 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4076 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4077 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4078 	    "# of segments per TX interrupt");
4079 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4080 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4081 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4082 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4083 		    "TX MSI-X CPU map");
4084 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4085 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4086 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4087 		    "RX MSI-X CPU map");
4088 	}
4089 #ifdef IFPOLL_ENABLE
4090 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4091 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4092 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4093 	    "TX polling CPU map");
4094 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4095 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4096 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4097 	    "RX polling CPU map");
4098 #endif
4099 
4100 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4101 do { \
4102 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4103 	    ix_sysctl_##name, #use " interrupt rate"); \
4104 } while (0)
4105 
4106 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4107 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4108 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4109 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4110 
4111 #undef IX_ADD_INTR_RATE_SYSCTL
4112 
4113 #ifdef IX_RSS_DEBUG
4114 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4115 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4116 	    "RSS debug level");
4117 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4118 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4119 		SYSCTL_ADD_ULONG(ctx,
4120 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4121 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4122 	}
4123 #endif
4124 
4125 #if 0
4126 	ix_add_hw_stats(sc);
4127 #endif
4128 
4129 }
4130 
4131 static int
4132 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4133 {
4134 	struct ix_softc *sc = (void *)arg1;
4135 	struct ifnet *ifp = &sc->arpcom.ac_if;
4136 	int error, nsegs, i;
4137 
4138 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4139 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4140 	if (error || req->newptr == NULL)
4141 		return error;
4142 	if (nsegs < 0)
4143 		return EINVAL;
4144 
4145 	ifnet_serialize_all(ifp);
4146 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4147 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4148 	ifnet_deserialize_all(ifp);
4149 
4150 	return 0;
4151 }
4152 
4153 static int
4154 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4155 {
4156 	struct ix_softc *sc = (void *)arg1;
4157 	struct ifnet *ifp = &sc->arpcom.ac_if;
4158 	int error, nsegs, i;
4159 
4160 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4161 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4162 	if (error || req->newptr == NULL)
4163 		return error;
4164 	if (nsegs < 0)
4165 		return EINVAL;
4166 
4167 	ifnet_serialize_all(ifp);
4168 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4169 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4170 	ifnet_deserialize_all(ifp);
4171 
4172 	return 0;
4173 }
4174 
4175 static int
4176 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4177 {
4178 	struct ix_softc *sc = (void *)arg1;
4179 	int txd;
4180 
4181 	txd = sc->tx_rings[0].tx_ndesc;
4182 	return sysctl_handle_int(oidp, &txd, 0, req);
4183 }
4184 
4185 static int
4186 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4187 {
4188 	struct ix_softc *sc = (void *)arg1;
4189 	int rxd;
4190 
4191 	rxd = sc->rx_rings[0].rx_ndesc;
4192 	return sysctl_handle_int(oidp, &rxd, 0, req);
4193 }
4194 
4195 static int
4196 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4197 {
4198 	struct ix_softc *sc = (void *)arg1;
4199 	struct ifnet *ifp = &sc->arpcom.ac_if;
4200 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4201 	int error, nsegs;
4202 
4203 	nsegs = txr->tx_intr_nsegs;
4204 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4205 	if (error || req->newptr == NULL)
4206 		return error;
4207 	if (nsegs < 0)
4208 		return EINVAL;
4209 
4210 	ifnet_serialize_all(ifp);
4211 
4212 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4213 		error = EINVAL;
4214 	} else {
4215 		int i;
4216 
4217 		error = 0;
4218 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4219 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4220 	}
4221 
4222 	ifnet_deserialize_all(ifp);
4223 
4224 	return error;
4225 }
4226 
4227 static void
4228 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4229 {
4230 	uint32_t eitr, eitr_intvl;
4231 
4232 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4233 	eitr_intvl = 1000000000 / 256 / rate;
4234 
4235 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4236 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4237 		if (eitr_intvl == 0)
4238 			eitr_intvl = 1;
4239 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4240 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4241 	} else {
4242 		eitr &= ~IX_EITR_INTVL_MASK;
4243 
4244 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4245 		if (eitr_intvl == 0)
4246 			eitr_intvl = IX_EITR_INTVL_MIN;
4247 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4248 			eitr_intvl = IX_EITR_INTVL_MAX;
4249 	}
4250 	eitr |= eitr_intvl;
4251 
4252 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4253 }
4254 
4255 static int
4256 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4257 {
4258 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4259 }
4260 
4261 static int
4262 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4263 {
4264 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4265 }
4266 
4267 static int
4268 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4269 {
4270 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4271 }
4272 
4273 static int
4274 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4275 {
4276 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4277 }
4278 
4279 static int
4280 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4281 {
4282 	struct ix_softc *sc = (void *)arg1;
4283 	struct ifnet *ifp = &sc->arpcom.ac_if;
4284 	int error, rate, i;
4285 
4286 	rate = 0;
4287 	for (i = 0; i < sc->intr_cnt; ++i) {
4288 		if (sc->intr_data[i].intr_use == use) {
4289 			rate = sc->intr_data[i].intr_rate;
4290 			break;
4291 		}
4292 	}
4293 
4294 	error = sysctl_handle_int(oidp, &rate, 0, req);
4295 	if (error || req->newptr == NULL)
4296 		return error;
4297 	if (rate <= 0)
4298 		return EINVAL;
4299 
4300 	ifnet_serialize_all(ifp);
4301 
4302 	for (i = 0; i < sc->intr_cnt; ++i) {
4303 		if (sc->intr_data[i].intr_use == use) {
4304 			sc->intr_data[i].intr_rate = rate;
4305 			if (ifp->if_flags & IFF_RUNNING)
4306 				ix_set_eitr(sc, i, rate);
4307 		}
4308 	}
4309 
4310 	ifnet_deserialize_all(ifp);
4311 
4312 	return error;
4313 }
4314 
4315 static void
4316 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4317     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4318 {
4319 	int i;
4320 
4321 	for (i = 0; i < sc->intr_cnt; ++i) {
4322 		if (sc->intr_data[i].intr_use == use) {
4323 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4324 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4325 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4326 			    sc, 0, handler, "I", desc);
4327 			break;
4328 		}
4329 	}
4330 }
4331 
4332 static void
4333 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4334 {
4335 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4336 		sc->timer_cpuid = 0; /* XXX fixed */
4337 	else
4338 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4339 }
4340 
4341 static void
4342 ix_alloc_msix(struct ix_softc *sc)
4343 {
4344 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4345 	struct ix_intr_data *intr;
4346 	int i, x, error;
4347 	int ring_cnt, ring_cntmax;
4348 	boolean_t setup = FALSE;
4349 
4350 	msix_enable = ix_msix_enable;
4351 	/*
4352 	 * Don't enable MSI-X on 82598 by default, see:
4353 	 * 82598 specification update errata #38
4354 	 */
4355 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4356 		msix_enable = 0;
4357 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4358 	if (!msix_enable)
4359 		return;
4360 
4361 	msix_cnt = pci_msix_count(sc->dev);
4362 #ifdef IX_MSIX_DEBUG
4363 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4364 #endif
4365 	if (msix_cnt <= 1) {
4366 		/* One MSI-X model does not make sense. */
4367 		return;
4368 	}
4369 
4370 	/*
4371 	 * Make sure that we don't break interrupt related registers
4372 	 * (EIMS, etc) limitation.
4373 	 */
4374 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4375 		if (msix_cnt > IX_MAX_MSIX_82598)
4376 			msix_cnt = IX_MAX_MSIX_82598;
4377 	} else {
4378 		if (msix_cnt > IX_MAX_MSIX)
4379 			msix_cnt = IX_MAX_MSIX;
4380 	}
4381 	if (bootverbose)
4382 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4383 	msix_ring = msix_cnt - 1; /* -1 for status */
4384 
4385 	/*
4386 	 * Configure # of RX/TX rings usable by MSI-X.
4387 	 */
4388 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4389 	if (ring_cntmax > msix_ring)
4390 		ring_cntmax = msix_ring;
4391 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4392 
4393 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4394 	if (ring_cntmax > msix_ring)
4395 		ring_cntmax = msix_ring;
4396 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4397 
4398 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4399 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4400 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4401 	    ("total RX ring count %d, MSI-X RX ring count %d",
4402 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4403 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4404 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4405 	    ("total TX ring count %d, MSI-X TX ring count %d",
4406 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4407 
4408 	/*
4409 	 * Aggregate TX/RX MSI-X
4410 	 */
4411 	ring_cntmax = sc->rx_ring_msix;
4412 	if (ring_cntmax < sc->tx_ring_msix)
4413 		ring_cntmax = sc->tx_ring_msix;
4414 	KASSERT(ring_cntmax <= msix_ring,
4415 	    ("invalid ring count max %d, MSI-X count for rings %d",
4416 	     ring_cntmax, msix_ring));
4417 
4418 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4419 	if (bootverbose) {
4420 		device_printf(sc->dev, "MSI-X alloc %d, "
4421 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4422 		    sc->rx_ring_msix, sc->tx_ring_msix);
4423 	}
4424 
4425 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4426 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4427 	    &sc->msix_mem_rid, RF_ACTIVE);
4428 	if (sc->msix_mem_res == NULL) {
4429 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4430 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4431 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4432 		if (sc->msix_mem_res == NULL) {
4433 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4434 			return;
4435 		}
4436 	}
4437 
4438 	sc->intr_cnt = alloc_cnt;
4439 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4440 	    M_DEVBUF, M_WAITOK | M_ZERO);
4441 	for (x = 0; x < sc->intr_cnt; ++x) {
4442 		intr = &sc->intr_data[x];
4443 		intr->intr_rid = -1;
4444 		intr->intr_rate = IX_INTR_RATE;
4445 	}
4446 
4447 	x = 0;
4448 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4449 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4450 		struct ix_tx_ring *txr = NULL;
4451 		int cpuid, j;
4452 
4453 		KKASSERT(x < sc->intr_cnt);
4454 		rxr->rx_intr_vec = x;
4455 		ix_setup_msix_eims(sc, x,
4456 		    &rxr->rx_eims, &rxr->rx_eims_val);
4457 
4458 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4459 
4460 		/*
4461 		 * Try finding TX ring to piggyback.
4462 		 */
4463 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4464 			if (cpuid ==
4465 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4466 				txr = &sc->tx_rings[j];
4467 				KKASSERT(txr->tx_intr_cpuid < 0);
4468 				break;
4469 			}
4470 		}
4471 		rxr->rx_txr = txr;
4472 
4473 		intr = &sc->intr_data[x++];
4474 		intr->intr_serialize = &rxr->rx_serialize;
4475 		if (txr != NULL) {
4476 			ksnprintf(intr->intr_desc0,
4477 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4478 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4479 			intr->intr_use = IX_INTR_USE_RXTX;
4480 			intr->intr_func = ix_msix_rxtx;
4481 		} else {
4482 			ksnprintf(intr->intr_desc0,
4483 			    sizeof(intr->intr_desc0), "%s rx%d",
4484 			    device_get_nameunit(sc->dev), i);
4485 			intr->intr_rate = IX_MSIX_RX_RATE;
4486 			intr->intr_use = IX_INTR_USE_RX;
4487 			intr->intr_func = ix_msix_rx;
4488 		}
4489 		intr->intr_funcarg = rxr;
4490 		intr->intr_cpuid = cpuid;
4491 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4492 		intr->intr_desc = intr->intr_desc0;
4493 
4494 		if (txr != NULL) {
4495 			txr->tx_intr_cpuid = intr->intr_cpuid;
4496 			/* NOTE: Leave TX ring's intr_vec negative. */
4497 		}
4498 	}
4499 
4500 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4501 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4502 
4503 		if (txr->tx_intr_cpuid >= 0) {
4504 			/* Piggybacked by RX ring. */
4505 			continue;
4506 		}
4507 
4508 		KKASSERT(x < sc->intr_cnt);
4509 		txr->tx_intr_vec = x;
4510 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4511 
4512 		intr = &sc->intr_data[x++];
4513 		intr->intr_serialize = &txr->tx_serialize;
4514 		intr->intr_rate = IX_MSIX_TX_RATE;
4515 		intr->intr_use = IX_INTR_USE_TX;
4516 		intr->intr_func = ix_msix_tx;
4517 		intr->intr_funcarg = txr;
4518 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4519 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4520 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4521 		    device_get_nameunit(sc->dev), i);
4522 		intr->intr_desc = intr->intr_desc0;
4523 
4524 		txr->tx_intr_cpuid = intr->intr_cpuid;
4525 	}
4526 
4527 	/*
4528 	 * Status MSI-X
4529 	 */
4530 	KKASSERT(x < sc->intr_cnt);
4531 	sc->sts_msix_vec = x;
4532 
4533 	intr = &sc->intr_data[x++];
4534 
4535 	intr->intr_serialize = &sc->main_serialize;
4536 	intr->intr_func = ix_msix_status;
4537 	intr->intr_funcarg = sc;
4538 	intr->intr_cpuid = 0;
4539 	intr->intr_use = IX_INTR_USE_STATUS;
4540 
4541 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4542 	    device_get_nameunit(sc->dev));
4543 	intr->intr_desc = intr->intr_desc0;
4544 
4545 	KKASSERT(x == sc->intr_cnt);
4546 
4547 	error = pci_setup_msix(sc->dev);
4548 	if (error) {
4549 		device_printf(sc->dev, "Setup MSI-X failed\n");
4550 		goto back;
4551 	}
4552 	setup = TRUE;
4553 
4554 	for (i = 0; i < sc->intr_cnt; ++i) {
4555 		intr = &sc->intr_data[i];
4556 
4557 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4558 		    intr->intr_cpuid);
4559 		if (error) {
4560 			device_printf(sc->dev,
4561 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4562 			    intr->intr_cpuid);
4563 			goto back;
4564 		}
4565 
4566 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4567 		    &intr->intr_rid, RF_ACTIVE);
4568 		if (intr->intr_res == NULL) {
4569 			device_printf(sc->dev,
4570 			    "Unable to allocate MSI-X %d resource\n", i);
4571 			error = ENOMEM;
4572 			goto back;
4573 		}
4574 	}
4575 
4576 	pci_enable_msix(sc->dev);
4577 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4578 back:
4579 	if (error)
4580 		ix_free_msix(sc, setup);
4581 }
4582 
4583 static void
4584 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4585 {
4586 	int i;
4587 
4588 	KKASSERT(sc->intr_cnt > 1);
4589 
4590 	for (i = 0; i < sc->intr_cnt; ++i) {
4591 		struct ix_intr_data *intr = &sc->intr_data[i];
4592 
4593 		if (intr->intr_res != NULL) {
4594 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4595 			    intr->intr_rid, intr->intr_res);
4596 		}
4597 		if (intr->intr_rid >= 0)
4598 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4599 	}
4600 	if (setup)
4601 		pci_teardown_msix(sc->dev);
4602 
4603 	sc->intr_cnt = 0;
4604 	kfree(sc->intr_data, M_DEVBUF);
4605 	sc->intr_data = NULL;
4606 }
4607 
4608 static void
4609 ix_msix_rx(void *xrxr)
4610 {
4611 	struct ix_rx_ring *rxr = xrxr;
4612 
4613 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4614 
4615 	ix_rxeof(rxr, -1);
4616 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4617 }
4618 
4619 static void
4620 ix_msix_tx(void *xtxr)
4621 {
4622 	struct ix_tx_ring *txr = xtxr;
4623 
4624 	ASSERT_SERIALIZED(&txr->tx_serialize);
4625 
4626 	ix_txeof(txr, *(txr->tx_hdr));
4627 	if (!ifsq_is_empty(txr->tx_ifsq))
4628 		ifsq_devstart(txr->tx_ifsq);
4629 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4630 }
4631 
4632 static void
4633 ix_msix_rxtx(void *xrxr)
4634 {
4635 	struct ix_rx_ring *rxr = xrxr;
4636 	struct ix_tx_ring *txr;
4637 	int hdr;
4638 
4639 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4640 
4641 	ix_rxeof(rxr, -1);
4642 
4643 	/*
4644 	 * NOTE:
4645 	 * Since tx_next_clean is only changed by ix_txeof(),
4646 	 * which is called only in interrupt handler, the
4647 	 * check w/o holding tx serializer is MPSAFE.
4648 	 */
4649 	txr = rxr->rx_txr;
4650 	hdr = *(txr->tx_hdr);
4651 	if (hdr != txr->tx_next_clean) {
4652 		lwkt_serialize_enter(&txr->tx_serialize);
4653 		ix_txeof(txr, hdr);
4654 		if (!ifsq_is_empty(txr->tx_ifsq))
4655 			ifsq_devstart(txr->tx_ifsq);
4656 		lwkt_serialize_exit(&txr->tx_serialize);
4657 	}
4658 
4659 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4660 }
4661 
4662 static void
4663 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4664 {
4665 	struct ixgbe_hw *hw = &sc->hw;
4666 
4667 	/* Link status change */
4668 	if (eicr & IXGBE_EICR_LSC)
4669 		ix_handle_link(sc);
4670 
4671 	if (hw->mac.type != ixgbe_mac_82598EB) {
4672 		if (eicr & IXGBE_EICR_ECC)
4673 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4674 
4675 		/* Check for over temp condition */
4676 		if (eicr & IXGBE_EICR_TS) {
4677 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4678 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4679 		}
4680 	}
4681 
4682 	if (ix_is_sfp(hw)) {
4683 		uint32_t mod_mask;
4684 
4685 		/* Pluggable optics-related interrupt */
4686 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4687 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4688 		else
4689 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4690 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4691 			ix_handle_msf(sc);
4692 		else if (eicr & mod_mask)
4693 			ix_handle_mod(sc);
4694 	}
4695 
4696 	/* Check for fan failure */
4697 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4698 	    (eicr & IXGBE_EICR_GPI_SDP1))
4699 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4700 
4701 	/* External PHY interrupt */
4702 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4703 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4704 	    	ix_handle_phy(sc);
4705 }
4706 
4707 static void
4708 ix_msix_status(void *xsc)
4709 {
4710 	struct ix_softc *sc = xsc;
4711 	uint32_t eicr;
4712 
4713 	ASSERT_SERIALIZED(&sc->main_serialize);
4714 
4715 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4716 	ix_intr_status(sc, eicr);
4717 
4718 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4719 }
4720 
4721 static void
4722 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4723     uint32_t *eims, uint32_t *eims_val)
4724 {
4725 	if (x < 32) {
4726 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4727 			KASSERT(x < IX_MAX_MSIX_82598,
4728 			    ("%s: invalid vector %d for 82598",
4729 			     device_get_nameunit(sc->dev), x));
4730 			*eims = IXGBE_EIMS;
4731 		} else {
4732 			*eims = IXGBE_EIMS_EX(0);
4733 		}
4734 		*eims_val = 1 << x;
4735 	} else {
4736 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4737 		    device_get_nameunit(sc->dev), x));
4738 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4739 		    ("%s: invalid vector %d for 82598",
4740 		     device_get_nameunit(sc->dev), x));
4741 		*eims = IXGBE_EIMS_EX(1);
4742 		*eims_val = 1 << (x - 32);
4743 	}
4744 }
4745 
4746 #ifdef IFPOLL_ENABLE
4747 
4748 static void
4749 ix_npoll_status(struct ifnet *ifp)
4750 {
4751 	struct ix_softc *sc = ifp->if_softc;
4752 	uint32_t eicr;
4753 
4754 	ASSERT_SERIALIZED(&sc->main_serialize);
4755 
4756 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4757 	ix_intr_status(sc, eicr);
4758 }
4759 
4760 static void
4761 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4762 {
4763 	struct ix_tx_ring *txr = arg;
4764 
4765 	ASSERT_SERIALIZED(&txr->tx_serialize);
4766 
4767 	ix_txeof(txr, *(txr->tx_hdr));
4768 	if (!ifsq_is_empty(txr->tx_ifsq))
4769 		ifsq_devstart(txr->tx_ifsq);
4770 }
4771 
4772 static void
4773 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4774 {
4775 	struct ix_rx_ring *rxr = arg;
4776 
4777 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4778 
4779 	ix_rxeof(rxr, cycle);
4780 }
4781 
4782 static void
4783 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4784 {
4785 	struct ix_softc *sc = ifp->if_softc;
4786 	int i, txr_cnt, rxr_cnt;
4787 
4788 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4789 
4790 	if (info) {
4791 		int cpu;
4792 
4793 		info->ifpi_status.status_func = ix_npoll_status;
4794 		info->ifpi_status.serializer = &sc->main_serialize;
4795 
4796 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4797 		for (i = 0; i < txr_cnt; ++i) {
4798 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4799 
4800 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
4801 			KKASSERT(cpu < netisr_ncpus);
4802 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
4803 			info->ifpi_tx[cpu].arg = txr;
4804 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
4805 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
4806 		}
4807 
4808 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4809 		for (i = 0; i < rxr_cnt; ++i) {
4810 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4811 
4812 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
4813 			KKASSERT(cpu < netisr_ncpus);
4814 			info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
4815 			info->ifpi_rx[cpu].arg = rxr;
4816 			info->ifpi_rx[cpu].serializer = &rxr->rx_serialize;
4817 		}
4818 	} else {
4819 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4820 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4821 
4822 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4823 		}
4824 	}
4825 	if (ifp->if_flags & IFF_RUNNING)
4826 		ix_init(sc);
4827 }
4828 
4829 #endif /* IFPOLL_ENABLE */
4830 
4831 static enum ixgbe_fc_mode
4832 ix_ifmedia2fc(int ifm)
4833 {
4834 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4835 
4836 	switch (fc_opt) {
4837 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4838 		return ixgbe_fc_full;
4839 
4840 	case IFM_ETH_RXPAUSE:
4841 		return ixgbe_fc_rx_pause;
4842 
4843 	case IFM_ETH_TXPAUSE:
4844 		return ixgbe_fc_tx_pause;
4845 
4846 	default:
4847 		return ixgbe_fc_none;
4848 	}
4849 }
4850 
4851 static const char *
4852 ix_ifmedia2str(int ifm)
4853 {
4854 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4855 
4856 	switch (fc_opt) {
4857 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4858 		return IFM_ETH_FC_FULL;
4859 
4860 	case IFM_ETH_RXPAUSE:
4861 		return IFM_ETH_FC_RXPAUSE;
4862 
4863 	case IFM_ETH_TXPAUSE:
4864 		return IFM_ETH_FC_TXPAUSE;
4865 
4866 	default:
4867 		return IFM_ETH_FC_NONE;
4868 	}
4869 }
4870 
4871 static const char *
4872 ix_fc2str(enum ixgbe_fc_mode fc)
4873 {
4874 	switch (fc) {
4875 	case ixgbe_fc_full:
4876 		return IFM_ETH_FC_FULL;
4877 
4878 	case ixgbe_fc_rx_pause:
4879 		return IFM_ETH_FC_RXPAUSE;
4880 
4881 	case ixgbe_fc_tx_pause:
4882 		return IFM_ETH_FC_TXPAUSE;
4883 
4884 	default:
4885 		return IFM_ETH_FC_NONE;
4886 	}
4887 }
4888 
4889 static int
4890 ix_powerdown(struct ix_softc *sc)
4891 {
4892 	struct ixgbe_hw *hw = &sc->hw;
4893 	int error = 0;
4894 
4895 	/* Limit power managment flow to X550EM baseT */
4896 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4897 	    hw->phy.ops.enter_lplu) {
4898 		/* Turn off support for APM wakeup. (Using ACPI instead) */
4899 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
4900 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
4901 
4902 		/*
4903 		 * Clear Wake Up Status register to prevent any previous wakeup
4904 		 * events from waking us up immediately after we suspend.
4905 		 */
4906 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
4907 
4908 		/*
4909 		 * Program the Wakeup Filter Control register with user filter
4910 		 * settings
4911 		 */
4912 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
4913 
4914 		/* Enable wakeups and power management in Wakeup Control */
4915 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
4916 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
4917 
4918 		/* X550EM baseT adapters need a special LPLU flow */
4919 		hw->phy.reset_disable = true;
4920 		ix_stop(sc);
4921 		error = hw->phy.ops.enter_lplu(hw);
4922 		if (error) {
4923 			if_printf(&sc->arpcom.ac_if,
4924 			    "Error entering LPLU: %d\n", error);
4925 		}
4926 		hw->phy.reset_disable = false;
4927 	} else {
4928 		/* Just stop for other adapters */
4929 		ix_stop(sc);
4930 	}
4931 	return error;
4932 }
4933 
4934 static void
4935 ix_config_flowctrl(struct ix_softc *sc)
4936 {
4937 	struct ixgbe_hw *hw = &sc->hw;
4938 	uint32_t rxpb, frame, size, tmp;
4939 
4940 	frame = sc->max_frame_size;
4941 
4942 	/* Calculate High Water */
4943 	switch (hw->mac.type) {
4944 	case ixgbe_mac_X540:
4945 	case ixgbe_mac_X550:
4946 	case ixgbe_mac_X550EM_a:
4947 	case ixgbe_mac_X550EM_x:
4948 		tmp = IXGBE_DV_X540(frame, frame);
4949 		break;
4950 	default:
4951 		tmp = IXGBE_DV(frame, frame);
4952 		break;
4953 	}
4954 	size = IXGBE_BT2KB(tmp);
4955 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
4956 	hw->fc.high_water[0] = rxpb - size;
4957 
4958 	/* Now calculate Low Water */
4959 	switch (hw->mac.type) {
4960 	case ixgbe_mac_X540:
4961 	case ixgbe_mac_X550:
4962 	case ixgbe_mac_X550EM_a:
4963 	case ixgbe_mac_X550EM_x:
4964 		tmp = IXGBE_LOW_DV_X540(frame);
4965 		break;
4966 	default:
4967 		tmp = IXGBE_LOW_DV(frame);
4968 		break;
4969 	}
4970 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
4971 
4972 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
4973 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
4974 		hw->fc.disable_fc_autoneg = TRUE;
4975 	else
4976 		hw->fc.disable_fc_autoneg = FALSE;
4977 	hw->fc.pause_time = IX_FC_PAUSE;
4978 	hw->fc.send_xon = TRUE;
4979 }
4980 
4981 static void
4982 ix_config_dmac(struct ix_softc *sc)
4983 {
4984 	struct ixgbe_hw *hw = &sc->hw;
4985 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
4986 
4987 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
4988 		return;
4989 
4990 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
4991 	    (dcfg->link_speed ^ sc->link_speed)) {
4992 		dcfg->watchdog_timer = sc->dmac;
4993 		dcfg->fcoe_en = false;
4994 		dcfg->link_speed = sc->link_speed;
4995 		dcfg->num_tcs = 1;
4996 
4997 		if (bootverbose) {
4998 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
4999 			    "watchdog %d, link speed %d\n",
5000 			    dcfg->watchdog_timer, dcfg->link_speed);
5001 		}
5002 
5003 		hw->mac.ops.dmac_config(hw);
5004 	}
5005 }
5006 
5007 static void
5008 ix_init_media(struct ix_softc *sc)
5009 {
5010 	struct ixgbe_hw *hw = &sc->hw;
5011 	int layer, msf_ifm = IFM_NONE;
5012 
5013 	ifmedia_removeall(&sc->media);
5014 
5015 	layer = ixgbe_get_supported_physical_layer(hw);
5016 
5017 	/*
5018 	 * Media types with matching DragonFlyBSD media defines
5019 	 */
5020 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5021 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5022 		    0, NULL);
5023 	}
5024 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5025 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5026 		    0, NULL);
5027 	}
5028 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5029 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5030 		    0, NULL);
5031 		/* No half-duplex support */
5032 	}
5033 
5034 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5035 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5036 		    0, NULL);
5037 		msf_ifm = IFM_1000_LX;
5038 	}
5039 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5040 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5041 		    0, NULL);
5042 		msf_ifm = IFM_1000_LX;
5043 	}
5044 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5045 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5046 		    0, NULL);
5047 		msf_ifm = IFM_1000_SX;
5048 	}
5049 
5050 	/* Add media for multispeed fiber */
5051 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5052 		uint32_t linkcap;
5053 		bool autoneg;
5054 
5055 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5056 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5057 			ifmedia_add_nodup(&sc->media,
5058 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5059 	}
5060 
5061 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5062 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5063 		ifmedia_add_nodup(&sc->media,
5064 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5065 	}
5066 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5067 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5068 		    0, NULL);
5069 	}
5070 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5071 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5072 		    0, NULL);
5073 	}
5074 
5075 	/*
5076 	 * XXX Other (no matching DragonFlyBSD media type):
5077 	 * To workaround this, we'll assign these completely
5078 	 * inappropriate media types.
5079 	 */
5080 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5081 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5082 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5083 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5084 		    0, NULL);
5085 	}
5086 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5087 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5088 		if_printf(&sc->arpcom.ac_if,
5089 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5090 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5091 		    0, NULL);
5092 	}
5093 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5094 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5095 		if_printf(&sc->arpcom.ac_if,
5096 		    "1000baseKX mapped to 1000baseCX\n");
5097 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5098 		    0, NULL);
5099 	}
5100 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5101 		/* Someday, someone will care about you... */
5102 		if_printf(&sc->arpcom.ac_if,
5103 		    "Media supported: 1000baseBX, ignored\n");
5104 	}
5105 
5106 	/* XXX we probably don't need this */
5107 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5108 		ifmedia_add_nodup(&sc->media,
5109 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5110 	}
5111 
5112 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5113 
5114 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5115 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5116 
5117 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5118 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5119 		ifmedia_set(&sc->media, sc->ifm_media);
5120 	}
5121 }
5122