xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision edf2e657)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/if_ringmap.h>
59 #include <net/toeplitz.h>
60 #include <net/toeplitz2.h>
61 #include <net/vlan/if_vlan_var.h>
62 #include <net/vlan/if_vlan_ether.h>
63 #include <net/if_poll.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 
69 #include <bus/pci/pcivar.h>
70 #include <bus/pci/pcireg.h>
71 
72 #include <dev/netif/ix/ixgbe_api.h>
73 #include <dev/netif/ix/if_ix.h>
74 
75 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
76 
77 #ifdef IX_RSS_DEBUG
78 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
79 do { \
80 	if (sc->rss_debug >= lvl) \
81 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
82 } while (0)
83 #else	/* !IX_RSS_DEBUG */
84 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
85 #endif	/* IX_RSS_DEBUG */
86 
87 #define IX_NAME			"Intel(R) PRO/10GbE "
88 #define IX_DEVICE(id) \
89 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
90 #define IX_DEVICE_NULL		{ 0, 0, NULL }
91 
92 static struct ix_device {
93 	uint16_t	vid;
94 	uint16_t	did;
95 	const char	*desc;
96 } ix_devices[] = {
97 	IX_DEVICE(82598AF_DUAL_PORT),
98 	IX_DEVICE(82598AF_SINGLE_PORT),
99 	IX_DEVICE(82598EB_CX4),
100 	IX_DEVICE(82598AT),
101 	IX_DEVICE(82598AT2),
102 	IX_DEVICE(82598),
103 	IX_DEVICE(82598_DA_DUAL_PORT),
104 	IX_DEVICE(82598_CX4_DUAL_PORT),
105 	IX_DEVICE(82598EB_XF_LR),
106 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
107 	IX_DEVICE(82598EB_SFP_LOM),
108 	IX_DEVICE(82599_KX4),
109 	IX_DEVICE(82599_KX4_MEZZ),
110 	IX_DEVICE(82599_SFP),
111 	IX_DEVICE(82599_XAUI_LOM),
112 	IX_DEVICE(82599_CX4),
113 	IX_DEVICE(82599_T3_LOM),
114 	IX_DEVICE(82599_COMBO_BACKPLANE),
115 	IX_DEVICE(82599_BACKPLANE_FCOE),
116 	IX_DEVICE(82599_SFP_SF2),
117 	IX_DEVICE(82599_SFP_FCOE),
118 	IX_DEVICE(82599EN_SFP),
119 	IX_DEVICE(82599_SFP_SF_QP),
120 	IX_DEVICE(82599_QSFP_SF_QP),
121 	IX_DEVICE(X540T),
122 	IX_DEVICE(X540T1),
123 	IX_DEVICE(X550T),
124 	IX_DEVICE(X550EM_X_KR),
125 	IX_DEVICE(X550EM_X_KX4),
126 	IX_DEVICE(X550EM_X_10G_T),
127 
128 	/* required last entry */
129 	IX_DEVICE_NULL
130 };
131 
132 static int	ix_probe(device_t);
133 static int	ix_attach(device_t);
134 static int	ix_detach(device_t);
135 static int	ix_shutdown(device_t);
136 
137 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
138 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
139 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
140 #ifdef INVARIANTS
141 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
142 		    boolean_t);
143 #endif
144 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
145 static void	ix_watchdog(struct ifaltq_subque *);
146 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
147 static void	ix_init(void *);
148 static void	ix_stop(struct ix_softc *);
149 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
150 static int	ix_media_change(struct ifnet *);
151 static void	ix_timer(void *);
152 #ifdef IFPOLL_ENABLE
153 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
154 static void	ix_npoll_rx(struct ifnet *, void *, int);
155 static void	ix_npoll_tx(struct ifnet *, void *, int);
156 static void	ix_npoll_status(struct ifnet *);
157 #endif
158 
159 static void	ix_add_sysctl(struct ix_softc *);
160 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
161 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
162 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
164 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
167 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
168 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
170 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
171 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
172 #if 0
173 static void     ix_add_hw_stats(struct ix_softc *);
174 #endif
175 
176 static void	ix_slot_info(struct ix_softc *);
177 static int	ix_alloc_rings(struct ix_softc *);
178 static void	ix_free_rings(struct ix_softc *);
179 static void	ix_setup_ifp(struct ix_softc *);
180 static void	ix_setup_serialize(struct ix_softc *);
181 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
182 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
183 static void	ix_update_stats(struct ix_softc *);
184 
185 static void	ix_set_promisc(struct ix_softc *);
186 static void	ix_set_multi(struct ix_softc *);
187 static void	ix_set_vlan(struct ix_softc *);
188 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
189 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
190 static const char *ix_ifmedia2str(int);
191 static const char *ix_fc2str(enum ixgbe_fc_mode);
192 
193 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
194 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
195 static void	ix_init_tx_ring(struct ix_tx_ring *);
196 static void	ix_free_tx_ring(struct ix_tx_ring *);
197 static int	ix_create_tx_ring(struct ix_tx_ring *);
198 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
199 static void	ix_init_tx_unit(struct ix_softc *);
200 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
201 		    uint16_t *, int *);
202 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
203 		    const struct mbuf *, uint32_t *, uint32_t *);
204 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
205 		    const struct mbuf *, uint32_t *, uint32_t *);
206 static void	ix_txeof(struct ix_tx_ring *, int);
207 
208 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
209 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
210 static int	ix_init_rx_ring(struct ix_rx_ring *);
211 static void	ix_free_rx_ring(struct ix_rx_ring *);
212 static int	ix_create_rx_ring(struct ix_rx_ring *);
213 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
214 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
215 #if 0
216 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
217 #endif
218 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
219 static void	ix_rxeof(struct ix_rx_ring *, int);
220 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
221 static void	ix_enable_rx_drop(struct ix_softc *);
222 static void	ix_disable_rx_drop(struct ix_softc *);
223 
224 static void	ix_alloc_msix(struct ix_softc *);
225 static void	ix_free_msix(struct ix_softc *, boolean_t);
226 static void	ix_setup_msix_eims(const struct ix_softc *, int,
227 		    uint32_t *, uint32_t *);
228 static int	ix_alloc_intr(struct ix_softc *);
229 static void	ix_free_intr(struct ix_softc *);
230 static int	ix_setup_intr(struct ix_softc *);
231 static void	ix_teardown_intr(struct ix_softc *, int);
232 static void	ix_enable_intr(struct ix_softc *);
233 static void	ix_disable_intr(struct ix_softc *);
234 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
235 static void	ix_set_eitr(struct ix_softc *, int, int);
236 static void	ix_intr_status(struct ix_softc *, uint32_t);
237 static void	ix_intr(void *);
238 static void	ix_msix_rxtx(void *);
239 static void	ix_msix_rx(void *);
240 static void	ix_msix_tx(void *);
241 static void	ix_msix_status(void *);
242 
243 static void	ix_config_link(struct ix_softc *);
244 static boolean_t ix_sfp_probe(struct ix_softc *);
245 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
246 static void	ix_update_link_status(struct ix_softc *);
247 static void	ix_handle_link(struct ix_softc *);
248 static void	ix_handle_mod(struct ix_softc *);
249 static void	ix_handle_msf(struct ix_softc *);
250 static void	ix_handle_phy(struct ix_softc *);
251 static int	ix_powerdown(struct ix_softc *);
252 static void	ix_config_flowctrl(struct ix_softc *);
253 static void	ix_config_dmac(struct ix_softc *);
254 static void	ix_init_media(struct ix_softc *);
255 
256 /* XXX Missing shared code prototype */
257 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
258 
259 static device_method_t ix_methods[] = {
260 	/* Device interface */
261 	DEVMETHOD(device_probe,		ix_probe),
262 	DEVMETHOD(device_attach,	ix_attach),
263 	DEVMETHOD(device_detach,	ix_detach),
264 	DEVMETHOD(device_shutdown,	ix_shutdown),
265 	DEVMETHOD_END
266 };
267 
268 static driver_t ix_driver = {
269 	"ix",
270 	ix_methods,
271 	sizeof(struct ix_softc)
272 };
273 
274 static devclass_t ix_devclass;
275 
276 DECLARE_DUMMY_MODULE(if_ix);
277 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
278 
279 static int	ix_msi_enable = 1;
280 static int	ix_msix_enable = 1;
281 static int	ix_rxr = 0;
282 static int	ix_txr = 0;
283 static int	ix_txd = IX_PERF_TXD;
284 static int	ix_rxd = IX_PERF_RXD;
285 static int	ix_unsupported_sfp = 0;
286 
287 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
288 
289 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
290 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
291 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
292 TUNABLE_INT("hw.ix.txr", &ix_txr);
293 TUNABLE_INT("hw.ix.txd", &ix_txd);
294 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
295 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
296 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
297 
298 /*
299  * Smart speed setting, default to on.  This only works
300  * as a compile option right now as its during attach,
301  * set this to 'ixgbe_smart_speed_off' to disable.
302  */
303 static const enum ixgbe_smart_speed ix_smart_speed =
304     ixgbe_smart_speed_on;
305 
306 static int
307 ix_probe(device_t dev)
308 {
309 	const struct ix_device *d;
310 	uint16_t vid, did;
311 
312 	vid = pci_get_vendor(dev);
313 	did = pci_get_device(dev);
314 
315 	for (d = ix_devices; d->desc != NULL; ++d) {
316 		if (vid == d->vid && did == d->did) {
317 			device_set_desc(dev, d->desc);
318 			return 0;
319 		}
320 	}
321 	return ENXIO;
322 }
323 
324 static void
325 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
326 {
327 
328 	switch (sc->hw.mac.type) {
329 	case ixgbe_mac_X550:
330 	case ixgbe_mac_X550EM_x:
331 	case ixgbe_mac_X550EM_a:
332 		*ring_cntmax = IX_MAX_RXRING_X550;
333 		break;
334 
335 	default:
336 		*ring_cntmax = IX_MAX_RXRING;
337 		break;
338 	}
339 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
340 }
341 
342 static void
343 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
344 {
345 
346 	switch (sc->hw.mac.type) {
347 	case ixgbe_mac_82598EB:
348 		*ring_cntmax = IX_MAX_TXRING_82598;
349 		break;
350 
351 	case ixgbe_mac_82599EB:
352 		*ring_cntmax = IX_MAX_TXRING_82599;
353 		break;
354 
355 	case ixgbe_mac_X540:
356 		*ring_cntmax = IX_MAX_TXRING_X540;
357 		break;
358 
359 	case ixgbe_mac_X550:
360 	case ixgbe_mac_X550EM_x:
361 	case ixgbe_mac_X550EM_a:
362 		*ring_cntmax = IX_MAX_TXRING_X550;
363 		break;
364 
365 	default:
366 		*ring_cntmax = IX_MAX_TXRING;
367 		break;
368 	}
369 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
370 }
371 
372 static int
373 ix_attach(device_t dev)
374 {
375 	struct ix_softc *sc = device_get_softc(dev);
376 	struct ixgbe_hw *hw;
377 	int error, ring_cnt, ring_cntmax;
378 	uint16_t csum;
379 	uint32_t ctrl_ext;
380 	char flowctrl[IFM_ETH_FC_STRLEN];
381 
382 	sc->dev = sc->osdep.dev = dev;
383 	hw = &sc->hw;
384 
385 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
386 	    device_get_unit(dev));
387 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
388 	    ix_media_change, ix_media_status);
389 
390 	/* Save frame size */
391 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
392 
393 	callout_init_mp(&sc->timer);
394 	lwkt_serialize_init(&sc->main_serialize);
395 
396 	/*
397 	 * Save off the information about this board
398 	 */
399 	hw->vendor_id = pci_get_vendor(dev);
400 	hw->device_id = pci_get_device(dev);
401 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
402 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
403 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
404 
405 	ixgbe_set_mac_type(hw);
406 
407 	/* Pick up the 82599 */
408 	if (hw->mac.type != ixgbe_mac_82598EB)
409 		hw->phy.smart_speed = ix_smart_speed;
410 
411 	/* Enable bus mastering */
412 	pci_enable_busmaster(dev);
413 
414 	/*
415 	 * Allocate IO memory
416 	 */
417 	sc->mem_rid = PCIR_BAR(0);
418 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
419 	    &sc->mem_rid, RF_ACTIVE);
420 	if (sc->mem_res == NULL) {
421 		device_printf(dev, "Unable to allocate bus resource: memory\n");
422 		error = ENXIO;
423 		goto failed;
424 	}
425 
426 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
427 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
428 
429 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
430 	sc->hw.back = &sc->osdep;
431 
432 	/*
433 	 * Configure total supported RX/TX ring count
434 	 */
435 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
436 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
437 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
438 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
439 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
440 
441 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
442 	sc->rx_ring_inuse = sc->rx_ring_cnt;
443 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
444 	sc->tx_ring_inuse = sc->tx_ring_cnt;
445 
446 	/* Allocate TX/RX rings */
447 	error = ix_alloc_rings(sc);
448 	if (error)
449 		goto failed;
450 
451 	/* Allocate interrupt */
452 	error = ix_alloc_intr(sc);
453 	if (error)
454 		goto failed;
455 
456 	/* Setup serializes */
457 	ix_setup_serialize(sc);
458 
459 	/* Allocate multicast array memory. */
460 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
461 	    M_DEVBUF, M_WAITOK);
462 
463 	/* Initialize the shared code */
464 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
465 	error = ixgbe_init_shared_code(hw);
466 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
467 		/*
468 		 * No optics in this port; ask timer routine
469 		 * to probe for later insertion.
470 		 */
471 		sc->sfp_probe = TRUE;
472 		error = 0;
473 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
474 		device_printf(dev, "Unsupported SFP+ module detected!\n");
475 		error = EIO;
476 		goto failed;
477 	} else if (error) {
478 		device_printf(dev, "Unable to initialize the shared code\n");
479 		error = EIO;
480 		goto failed;
481 	}
482 
483 	/* Make sure we have a good EEPROM before we read from it */
484 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
485 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
486 		error = EIO;
487 		goto failed;
488 	}
489 
490 	error = ixgbe_init_hw(hw);
491 	if (error == IXGBE_ERR_EEPROM_VERSION) {
492 		device_printf(dev, "Pre-production device detected\n");
493 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
494 		device_printf(dev, "Unsupported SFP+ Module\n");
495 		error = EIO;
496 		goto failed;
497 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
498 		device_printf(dev, "No SFP+ Module found\n");
499 	}
500 
501 	sc->ifm_media = IX_IFM_DEFAULT;
502 	/* Get default flow control settings */
503 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
504 	    ix_flowctrl);
505 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
506 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
507 
508 	/* Setup OS specific network interface */
509 	ix_setup_ifp(sc);
510 
511 	/* Add sysctl tree */
512 	ix_add_sysctl(sc);
513 
514 	error = ix_setup_intr(sc);
515 	if (error) {
516 		ether_ifdetach(&sc->arpcom.ac_if);
517 		goto failed;
518 	}
519 
520 	/* Initialize statistics */
521 	ix_update_stats(sc);
522 
523 	/* Check PCIE slot type/speed/width */
524 	ix_slot_info(sc);
525 
526 	/* Save initial wake up filter configuration */
527 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
528 
529 	/* Let hardware know driver is loaded */
530 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
531 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
532 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
533 
534 	return 0;
535 failed:
536 	ix_detach(dev);
537 	return error;
538 }
539 
540 static int
541 ix_detach(device_t dev)
542 {
543 	struct ix_softc *sc = device_get_softc(dev);
544 
545 	if (device_is_attached(dev)) {
546 		struct ifnet *ifp = &sc->arpcom.ac_if;
547 		uint32_t ctrl_ext;
548 
549 		ifnet_serialize_all(ifp);
550 
551 		ix_powerdown(sc);
552 		ix_teardown_intr(sc, sc->intr_cnt);
553 
554 		ifnet_deserialize_all(ifp);
555 
556 		callout_terminate(&sc->timer);
557 		ether_ifdetach(ifp);
558 
559 		/* Let hardware know driver is unloading */
560 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
561 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
562 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
563 	}
564 
565 	ifmedia_removeall(&sc->media);
566 	bus_generic_detach(dev);
567 
568 	ix_free_intr(sc);
569 
570 	if (sc->msix_mem_res != NULL) {
571 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
572 		    sc->msix_mem_res);
573 	}
574 	if (sc->mem_res != NULL) {
575 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
576 		    sc->mem_res);
577 	}
578 
579 	ix_free_rings(sc);
580 
581 	if (sc->mta != NULL)
582 		kfree(sc->mta, M_DEVBUF);
583 	if (sc->serializes != NULL)
584 		kfree(sc->serializes, M_DEVBUF);
585 
586 	if (sc->rx_rmap != NULL)
587 		if_ringmap_free(sc->rx_rmap);
588 	if (sc->rx_rmap_intr != NULL)
589 		if_ringmap_free(sc->rx_rmap_intr);
590 	if (sc->tx_rmap != NULL)
591 		if_ringmap_free(sc->tx_rmap);
592 	if (sc->tx_rmap_intr != NULL)
593 		if_ringmap_free(sc->tx_rmap_intr);
594 
595 	return 0;
596 }
597 
598 static int
599 ix_shutdown(device_t dev)
600 {
601 	struct ix_softc *sc = device_get_softc(dev);
602 	struct ifnet *ifp = &sc->arpcom.ac_if;
603 
604 	ifnet_serialize_all(ifp);
605 	ix_powerdown(sc);
606 	ifnet_deserialize_all(ifp);
607 
608 	return 0;
609 }
610 
611 static void
612 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
613 {
614 	struct ix_softc *sc = ifp->if_softc;
615 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
616 	int idx = -1;
617 	uint16_t nsegs;
618 
619 	KKASSERT(txr->tx_ifsq == ifsq);
620 	ASSERT_SERIALIZED(&txr->tx_serialize);
621 
622 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
623 		return;
624 
625 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
626 		ifsq_purge(ifsq);
627 		return;
628 	}
629 
630 	while (!ifsq_is_empty(ifsq)) {
631 		struct mbuf *m_head;
632 
633 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
634 			ifsq_set_oactive(ifsq);
635 			txr->tx_watchdog.wd_timer = 5;
636 			break;
637 		}
638 
639 		m_head = ifsq_dequeue(ifsq);
640 		if (m_head == NULL)
641 			break;
642 
643 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
644 			IFNET_STAT_INC(ifp, oerrors, 1);
645 			continue;
646 		}
647 
648 		/*
649 		 * TX interrupt are aggressively aggregated, so increasing
650 		 * opackets at TX interrupt time will make the opackets
651 		 * statistics vastly inaccurate; we do the opackets increment
652 		 * now.
653 		 */
654 		IFNET_STAT_INC(ifp, opackets, 1);
655 
656 		if (nsegs >= txr->tx_wreg_nsegs) {
657 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
658 			nsegs = 0;
659 			idx = -1;
660 		}
661 
662 		ETHER_BPF_MTAP(ifp, m_head);
663 	}
664 	if (idx >= 0)
665 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
666 }
667 
668 static int
669 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
670 {
671 	struct ix_softc *sc = ifp->if_softc;
672 	struct ifreq *ifr = (struct ifreq *) data;
673 	int error = 0, mask, reinit;
674 
675 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
676 
677 	switch (command) {
678 	case SIOCSIFMTU:
679 		if (ifr->ifr_mtu > IX_MAX_MTU) {
680 			error = EINVAL;
681 		} else {
682 			ifp->if_mtu = ifr->ifr_mtu;
683 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
684 			ix_init(sc);
685 		}
686 		break;
687 
688 	case SIOCSIFFLAGS:
689 		if (ifp->if_flags & IFF_UP) {
690 			if (ifp->if_flags & IFF_RUNNING) {
691 				if ((ifp->if_flags ^ sc->if_flags) &
692 				    (IFF_PROMISC | IFF_ALLMULTI))
693 					ix_set_promisc(sc);
694 			} else {
695 				ix_init(sc);
696 			}
697 		} else if (ifp->if_flags & IFF_RUNNING) {
698 			ix_stop(sc);
699 		}
700 		sc->if_flags = ifp->if_flags;
701 		break;
702 
703 	case SIOCADDMULTI:
704 	case SIOCDELMULTI:
705 		if (ifp->if_flags & IFF_RUNNING) {
706 			ix_disable_intr(sc);
707 			ix_set_multi(sc);
708 #ifdef IFPOLL_ENABLE
709 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
710 #endif
711 				ix_enable_intr(sc);
712 		}
713 		break;
714 
715 	case SIOCSIFMEDIA:
716 	case SIOCGIFMEDIA:
717 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
718 		break;
719 
720 	case SIOCSIFCAP:
721 		reinit = 0;
722 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
723 		if (mask & IFCAP_RXCSUM) {
724 			ifp->if_capenable ^= IFCAP_RXCSUM;
725 			reinit = 1;
726 		}
727 		if (mask & IFCAP_VLAN_HWTAGGING) {
728 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
729 			reinit = 1;
730 		}
731 		if (mask & IFCAP_TXCSUM) {
732 			ifp->if_capenable ^= IFCAP_TXCSUM;
733 			if (ifp->if_capenable & IFCAP_TXCSUM)
734 				ifp->if_hwassist |= CSUM_OFFLOAD;
735 			else
736 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
737 		}
738 		if (mask & IFCAP_TSO) {
739 			ifp->if_capenable ^= IFCAP_TSO;
740 			if (ifp->if_capenable & IFCAP_TSO)
741 				ifp->if_hwassist |= CSUM_TSO;
742 			else
743 				ifp->if_hwassist &= ~CSUM_TSO;
744 		}
745 		if (mask & IFCAP_RSS)
746 			ifp->if_capenable ^= IFCAP_RSS;
747 		if (reinit && (ifp->if_flags & IFF_RUNNING))
748 			ix_init(sc);
749 		break;
750 
751 #if 0
752 	case SIOCGI2C:
753 	{
754 		struct ixgbe_i2c_req	i2c;
755 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
756 		if (error)
757 			break;
758 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
759 			error = EINVAL;
760 			break;
761 		}
762 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
763 		    i2c.dev_addr, i2c.data);
764 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
765 		break;
766 	}
767 #endif
768 
769 	default:
770 		error = ether_ioctl(ifp, command, data);
771 		break;
772 	}
773 	return error;
774 }
775 
776 #define IXGBE_MHADD_MFS_SHIFT 16
777 
778 static void
779 ix_init(void *xsc)
780 {
781 	struct ix_softc *sc = xsc;
782 	struct ifnet *ifp = &sc->arpcom.ac_if;
783 	struct ixgbe_hw *hw = &sc->hw;
784 	uint32_t gpie, rxctrl;
785 	int i, error;
786 	boolean_t polling;
787 
788 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
789 
790 	ix_stop(sc);
791 
792 	polling = FALSE;
793 #ifdef IFPOLL_ENABLE
794 	if (ifp->if_flags & IFF_NPOLLING)
795 		polling = TRUE;
796 #endif
797 
798 	/* Configure # of used RX/TX rings */
799 	ix_set_ring_inuse(sc, polling);
800 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
801 
802 	/* Get the latest mac address, User can use a LAA */
803 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
804 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
805 	hw->addr_ctrl.rar_used_count = 1;
806 
807 	/* Prepare transmit descriptors and buffers */
808 	for (i = 0; i < sc->tx_ring_inuse; ++i)
809 		ix_init_tx_ring(&sc->tx_rings[i]);
810 
811 	ixgbe_init_hw(hw);
812 	ix_init_tx_unit(sc);
813 
814 	/* Setup Multicast table */
815 	ix_set_multi(sc);
816 
817 	/* Prepare receive descriptors and buffers */
818 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
819 		error = ix_init_rx_ring(&sc->rx_rings[i]);
820 		if (error) {
821 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
822 			ix_stop(sc);
823 			return;
824 		}
825 	}
826 
827 	/* Configure RX settings */
828 	ix_init_rx_unit(sc, polling);
829 
830 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
831 
832 	/* Enable Fan Failure Interrupt */
833 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
834 
835 	/* Add for Module detection */
836 	if (hw->mac.type == ixgbe_mac_82599EB)
837 		gpie |= IXGBE_SDP2_GPIEN;
838 
839 	/*
840 	 * Thermal Failure Detection (X540)
841 	 * Link Detection (X552)
842 	 */
843 	if (hw->mac.type == ixgbe_mac_X540 ||
844 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
845 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
846 		gpie |= IXGBE_SDP0_GPIEN_X540;
847 
848 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
849 		/* Enable Enhanced MSIX mode */
850 		gpie |= IXGBE_GPIE_MSIX_MODE;
851 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
852 		    IXGBE_GPIE_OCD;
853 	}
854 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
855 
856 	/* Set MTU size */
857 	if (ifp->if_mtu > ETHERMTU) {
858 		uint32_t mhadd;
859 
860 		/* aka IXGBE_MAXFRS on 82599 and newer */
861 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
862 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
863 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
864 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
865 	}
866 
867 	/*
868 	 * Enable TX rings
869 	 */
870 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
871 		uint32_t txdctl;
872 
873 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
874 		txdctl |= IXGBE_TXDCTL_ENABLE;
875 
876 		/*
877 		 * Set WTHRESH to 0, since TX head write-back is used
878 		 */
879 		txdctl &= ~(0x7f << 16);
880 
881 		/*
882 		 * When the internal queue falls below PTHRESH (32),
883 		 * start prefetching as long as there are at least
884 		 * HTHRESH (1) buffers ready. The values are taken
885 		 * from the Intel linux driver 3.8.21.
886 		 * Prefetching enables tx line rate even with 1 queue.
887 		 */
888 		txdctl |= (32 << 0) | (1 << 8);
889 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
890 	}
891 
892 	/*
893 	 * Enable RX rings
894 	 */
895 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
896 		uint32_t rxdctl;
897 		int k;
898 
899 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
900 		if (hw->mac.type == ixgbe_mac_82598EB) {
901 			/*
902 			 * PTHRESH = 21
903 			 * HTHRESH = 4
904 			 * WTHRESH = 8
905 			 */
906 			rxdctl &= ~0x3FFFFF;
907 			rxdctl |= 0x080420;
908 		}
909 		rxdctl |= IXGBE_RXDCTL_ENABLE;
910 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
911 		for (k = 0; k < 10; ++k) {
912 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
913 			    IXGBE_RXDCTL_ENABLE)
914 				break;
915 			else
916 				msec_delay(1);
917 		}
918 		wmb();
919 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
920 		    sc->rx_rings[0].rx_ndesc - 1);
921 	}
922 
923 	/* Enable Receive engine */
924 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
925 	if (hw->mac.type == ixgbe_mac_82598EB)
926 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
927 	rxctrl |= IXGBE_RXCTRL_RXEN;
928 	ixgbe_enable_rx_dma(hw, rxctrl);
929 
930 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
931 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
932 
933 		if (txr->tx_intr_vec >= 0) {
934 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
935 		} else if (!polling) {
936 			/*
937 			 * Unconfigured TX interrupt vector could only
938 			 * happen for MSI-X.
939 			 */
940 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
941 			    ("TX intr vector is not set"));
942 			if (bootverbose)
943 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
944 		}
945 	}
946 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
947 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
948 
949 		if (polling && rxr->rx_intr_vec < 0)
950 			continue;
951 
952 		KKASSERT(rxr->rx_intr_vec >= 0);
953 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
954 		if (rxr->rx_txr != NULL) {
955 			/*
956 			 * Piggyback the TX ring interrupt onto the RX
957 			 * ring interrupt vector.
958 			 */
959 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
960 			    ("piggybacked TX ring configured intr vector"));
961 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
962 			    rxr->rx_intr_vec, 1);
963 			if (bootverbose) {
964 				if_printf(ifp, "IVAR RX ring %d piggybacks "
965 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
966 			}
967 		}
968 	}
969 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
970 		/* Set up status MSI-X vector; it is using fixed entry 1 */
971 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
972 
973 		/* Set up auto-mask for TX and RX rings */
974 		if (hw->mac.type == ixgbe_mac_82598EB) {
975 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
976 		} else {
977 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
978 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
979 		}
980 	} else {
981 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
982 	}
983 	for (i = 0; i < sc->intr_cnt; ++i)
984 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
985 
986 	/*
987 	 * Check on any SFP devices that need to be kick-started
988 	 */
989 	if (hw->phy.type == ixgbe_phy_none) {
990 		error = hw->phy.ops.identify(hw);
991 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
992 			if_printf(ifp,
993 			    "Unsupported SFP+ module type was detected.\n");
994 			/* XXX stop */
995 			return;
996 		}
997 	}
998 
999 	/* Config/Enable Link */
1000 	ix_config_link(sc);
1001 
1002 	/* Hardware Packet Buffer & Flow Control setup */
1003 	ix_config_flowctrl(sc);
1004 
1005 	/* Initialize the FC settings */
1006 	ixgbe_start_hw(hw);
1007 
1008 	/* Set up VLAN support and filter */
1009 	ix_set_vlan(sc);
1010 
1011 	/* Setup DMA Coalescing */
1012 	ix_config_dmac(sc);
1013 
1014 	/*
1015 	 * Only enable interrupts if we are not polling, make sure
1016 	 * they are off otherwise.
1017 	 */
1018 	if (polling)
1019 		ix_disable_intr(sc);
1020 	else
1021 		ix_enable_intr(sc);
1022 
1023 	ifp->if_flags |= IFF_RUNNING;
1024 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1025 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1026 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1027 	}
1028 
1029 	ix_set_timer_cpuid(sc, polling);
1030 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1031 }
1032 
1033 static void
1034 ix_intr(void *xsc)
1035 {
1036 	struct ix_softc *sc = xsc;
1037 	struct ixgbe_hw	*hw = &sc->hw;
1038 	uint32_t eicr;
1039 
1040 	ASSERT_SERIALIZED(&sc->main_serialize);
1041 
1042 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1043 	if (eicr == 0) {
1044 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1045 		return;
1046 	}
1047 
1048 	if (eicr & IX_RX0_INTR_MASK) {
1049 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1050 
1051 		lwkt_serialize_enter(&rxr->rx_serialize);
1052 		ix_rxeof(rxr, -1);
1053 		lwkt_serialize_exit(&rxr->rx_serialize);
1054 	}
1055 	if (eicr & IX_RX1_INTR_MASK) {
1056 		struct ix_rx_ring *rxr;
1057 
1058 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1059 		rxr = &sc->rx_rings[1];
1060 
1061 		lwkt_serialize_enter(&rxr->rx_serialize);
1062 		ix_rxeof(rxr, -1);
1063 		lwkt_serialize_exit(&rxr->rx_serialize);
1064 	}
1065 
1066 	if (eicr & IX_TX_INTR_MASK) {
1067 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1068 
1069 		lwkt_serialize_enter(&txr->tx_serialize);
1070 		ix_txeof(txr, *(txr->tx_hdr));
1071 		if (!ifsq_is_empty(txr->tx_ifsq))
1072 			ifsq_devstart(txr->tx_ifsq);
1073 		lwkt_serialize_exit(&txr->tx_serialize);
1074 	}
1075 
1076 	if (__predict_false(eicr & IX_EICR_STATUS))
1077 		ix_intr_status(sc, eicr);
1078 
1079 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1080 }
1081 
1082 static void
1083 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1084 {
1085 	struct ix_softc *sc = ifp->if_softc;
1086 	struct ifmedia *ifm = &sc->media;
1087 	int layer;
1088 
1089 	ix_update_link_status(sc);
1090 
1091 	ifmr->ifm_status = IFM_AVALID;
1092 	ifmr->ifm_active = IFM_ETHER;
1093 
1094 	if (!sc->link_active) {
1095 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1096 			ifmr->ifm_active |= ifm->ifm_media;
1097 		else
1098 			ifmr->ifm_active |= IFM_NONE;
1099 		return;
1100 	}
1101 	ifmr->ifm_status |= IFM_ACTIVE;
1102 
1103 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1104 
1105 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1106 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1107 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1108 		switch (sc->link_speed) {
1109 		case IXGBE_LINK_SPEED_10GB_FULL:
1110 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1111 			break;
1112 		case IXGBE_LINK_SPEED_1GB_FULL:
1113 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1114 			break;
1115 		case IXGBE_LINK_SPEED_100_FULL:
1116 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1117 			break;
1118 		}
1119 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1120 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1121 		switch (sc->link_speed) {
1122 		case IXGBE_LINK_SPEED_10GB_FULL:
1123 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1124 			break;
1125 		}
1126 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1127 		switch (sc->link_speed) {
1128 		case IXGBE_LINK_SPEED_10GB_FULL:
1129 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1130 			break;
1131 		case IXGBE_LINK_SPEED_1GB_FULL:
1132 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1133 			break;
1134 		}
1135 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1136 		switch (sc->link_speed) {
1137 		case IXGBE_LINK_SPEED_10GB_FULL:
1138 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1139 			break;
1140 		case IXGBE_LINK_SPEED_1GB_FULL:
1141 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1142 			break;
1143 		}
1144 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1145 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1146 		switch (sc->link_speed) {
1147 		case IXGBE_LINK_SPEED_10GB_FULL:
1148 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1149 			break;
1150 		case IXGBE_LINK_SPEED_1GB_FULL:
1151 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1152 			break;
1153 		}
1154 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1155 		switch (sc->link_speed) {
1156 		case IXGBE_LINK_SPEED_10GB_FULL:
1157 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1158 			break;
1159 		}
1160 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1161 		/*
1162 		 * XXX: These need to use the proper media types once
1163 		 * they're added.
1164 		 */
1165 		switch (sc->link_speed) {
1166 		case IXGBE_LINK_SPEED_10GB_FULL:
1167 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1168 			break;
1169 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1170 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1171 			break;
1172 		case IXGBE_LINK_SPEED_1GB_FULL:
1173 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1174 			break;
1175 		}
1176 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1177 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1178 		/*
1179 		 * XXX: These need to use the proper media types once
1180 		 * they're added.
1181 		 */
1182 		switch (sc->link_speed) {
1183 		case IXGBE_LINK_SPEED_10GB_FULL:
1184 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1185 			break;
1186 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1187 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1188 			break;
1189 		case IXGBE_LINK_SPEED_1GB_FULL:
1190 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1191 			break;
1192 		}
1193 	}
1194 
1195 	/* If nothing is recognized... */
1196 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1197 		ifmr->ifm_active |= IFM_NONE;
1198 
1199 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1200 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1201 
1202 	switch (sc->hw.fc.current_mode) {
1203 	case ixgbe_fc_full:
1204 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1205 		break;
1206 	case ixgbe_fc_rx_pause:
1207 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1208 		break;
1209 	case ixgbe_fc_tx_pause:
1210 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1211 		break;
1212 	default:
1213 		break;
1214 	}
1215 }
1216 
1217 static int
1218 ix_media_change(struct ifnet *ifp)
1219 {
1220 	struct ix_softc *sc = ifp->if_softc;
1221 	struct ifmedia *ifm = &sc->media;
1222 	struct ixgbe_hw *hw = &sc->hw;
1223 
1224 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1225 		return (EINVAL);
1226 
1227 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1228 	    hw->mac.ops.setup_link == NULL) {
1229 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1230 			/* Only flow control setting changes are allowed */
1231 			return (EOPNOTSUPP);
1232 		}
1233 	}
1234 
1235 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1236 	case IFM_AUTO:
1237 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1238 		break;
1239 
1240 	case IFM_10G_T:
1241 	case IFM_10G_LRM:
1242 	case IFM_10G_SR:	/* XXX also KR */
1243 	case IFM_10G_LR:
1244 	case IFM_10G_CX4:	/* XXX also KX4 */
1245 	case IFM_10G_TWINAX:
1246 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1247 		break;
1248 
1249 	case IFM_1000_T:
1250 	case IFM_1000_LX:
1251 	case IFM_1000_SX:
1252 	case IFM_1000_CX:	/* XXX is KX */
1253 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1254 		break;
1255 
1256 	case IFM_100_TX:
1257 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1258 		break;
1259 
1260 	default:
1261 		if (bootverbose) {
1262 			if_printf(ifp, "Invalid media type %d!\n",
1263 			    ifm->ifm_media);
1264 		}
1265 		return EINVAL;
1266 	}
1267 	sc->ifm_media = ifm->ifm_media;
1268 
1269 #if 0
1270 	if (hw->mac.ops.setup_link != NULL) {
1271 		hw->mac.autotry_restart = TRUE;
1272 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1273 	}
1274 #else
1275 	if (ifp->if_flags & IFF_RUNNING)
1276 		ix_init(sc);
1277 #endif
1278 	return 0;
1279 }
1280 
1281 static __inline int
1282 ix_tso_pullup(struct mbuf **mp)
1283 {
1284 	int hoff, iphlen, thoff;
1285 	struct mbuf *m;
1286 
1287 	m = *mp;
1288 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1289 
1290 	iphlen = m->m_pkthdr.csum_iphlen;
1291 	thoff = m->m_pkthdr.csum_thlen;
1292 	hoff = m->m_pkthdr.csum_lhlen;
1293 
1294 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1295 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1296 	KASSERT(hoff > 0, ("invalid ether hlen"));
1297 
1298 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1299 		m = m_pullup(m, hoff + iphlen + thoff);
1300 		if (m == NULL) {
1301 			*mp = NULL;
1302 			return ENOBUFS;
1303 		}
1304 		*mp = m;
1305 	}
1306 	return 0;
1307 }
1308 
1309 static int
1310 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1311     uint16_t *segs_used, int *idx)
1312 {
1313 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1314 	int i, j, error, nsegs, first, maxsegs;
1315 	struct mbuf *m_head = *m_headp;
1316 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1317 	bus_dmamap_t map;
1318 	struct ix_tx_buf *txbuf;
1319 	union ixgbe_adv_tx_desc *txd = NULL;
1320 
1321 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1322 		error = ix_tso_pullup(m_headp);
1323 		if (__predict_false(error))
1324 			return error;
1325 		m_head = *m_headp;
1326 	}
1327 
1328 	/* Basic descriptor defines */
1329 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1330 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1331 
1332 	if (m_head->m_flags & M_VLANTAG)
1333 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1334 
1335 	/*
1336 	 * Important to capture the first descriptor
1337 	 * used because it will contain the index of
1338 	 * the one we tell the hardware to report back
1339 	 */
1340 	first = txr->tx_next_avail;
1341 	txbuf = &txr->tx_buf[first];
1342 	map = txbuf->map;
1343 
1344 	/*
1345 	 * Map the packet for DMA.
1346 	 */
1347 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1348 	if (maxsegs > IX_MAX_SCATTER)
1349 		maxsegs = IX_MAX_SCATTER;
1350 
1351 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1352 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1353 	if (__predict_false(error)) {
1354 		m_freem(*m_headp);
1355 		*m_headp = NULL;
1356 		return error;
1357 	}
1358 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1359 
1360 	m_head = *m_headp;
1361 
1362 	/*
1363 	 * Set up the appropriate offload context if requested,
1364 	 * this may consume one TX descriptor.
1365 	 */
1366 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1367 		(*segs_used)++;
1368 		txr->tx_nsegs++;
1369 	}
1370 
1371 	*segs_used += nsegs;
1372 	txr->tx_nsegs += nsegs;
1373 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1374 		/*
1375 		 * Report Status (RS) is turned on every intr_nsegs
1376 		 * descriptors (roughly).
1377 		 */
1378 		txr->tx_nsegs = 0;
1379 		cmd_rs = IXGBE_TXD_CMD_RS;
1380 	}
1381 
1382 	i = txr->tx_next_avail;
1383 	for (j = 0; j < nsegs; j++) {
1384 		bus_size_t seglen;
1385 		bus_addr_t segaddr;
1386 
1387 		txbuf = &txr->tx_buf[i];
1388 		txd = &txr->tx_base[i];
1389 		seglen = segs[j].ds_len;
1390 		segaddr = htole64(segs[j].ds_addr);
1391 
1392 		txd->read.buffer_addr = segaddr;
1393 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1394 		    cmd_type_len |seglen);
1395 		txd->read.olinfo_status = htole32(olinfo_status);
1396 
1397 		if (++i == txr->tx_ndesc)
1398 			i = 0;
1399 	}
1400 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1401 
1402 	txr->tx_avail -= nsegs;
1403 	txr->tx_next_avail = i;
1404 
1405 	txbuf->m_head = m_head;
1406 	txr->tx_buf[first].map = txbuf->map;
1407 	txbuf->map = map;
1408 
1409 	/*
1410 	 * Defer TDT updating, until enough descrptors are setup
1411 	 */
1412 	*idx = i;
1413 
1414 	return 0;
1415 }
1416 
1417 static void
1418 ix_set_promisc(struct ix_softc *sc)
1419 {
1420 	struct ifnet *ifp = &sc->arpcom.ac_if;
1421 	uint32_t reg_rctl;
1422 	int mcnt = 0;
1423 
1424 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1425 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1426 	if (ifp->if_flags & IFF_ALLMULTI) {
1427 		mcnt = IX_MAX_MCASTADDR;
1428 	} else {
1429 		struct ifmultiaddr *ifma;
1430 
1431 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1432 			if (ifma->ifma_addr->sa_family != AF_LINK)
1433 				continue;
1434 			if (mcnt == IX_MAX_MCASTADDR)
1435 				break;
1436 			mcnt++;
1437 		}
1438 	}
1439 	if (mcnt < IX_MAX_MCASTADDR)
1440 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1441 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1442 
1443 	if (ifp->if_flags & IFF_PROMISC) {
1444 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1445 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1446 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1447 		reg_rctl |= IXGBE_FCTRL_MPE;
1448 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1449 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1450 	}
1451 }
1452 
1453 static void
1454 ix_set_multi(struct ix_softc *sc)
1455 {
1456 	struct ifnet *ifp = &sc->arpcom.ac_if;
1457 	struct ifmultiaddr *ifma;
1458 	uint32_t fctrl;
1459 	uint8_t	*mta;
1460 	int mcnt = 0;
1461 
1462 	mta = sc->mta;
1463 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1464 
1465 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1466 		if (ifma->ifma_addr->sa_family != AF_LINK)
1467 			continue;
1468 		if (mcnt == IX_MAX_MCASTADDR)
1469 			break;
1470 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1471 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1472 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1473 		mcnt++;
1474 	}
1475 
1476 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1477 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1478 	if (ifp->if_flags & IFF_PROMISC) {
1479 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1480 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1481 		fctrl |= IXGBE_FCTRL_MPE;
1482 		fctrl &= ~IXGBE_FCTRL_UPE;
1483 	} else {
1484 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1485 	}
1486 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1487 
1488 	if (mcnt < IX_MAX_MCASTADDR) {
1489 		ixgbe_update_mc_addr_list(&sc->hw,
1490 		    mta, mcnt, ix_mc_array_itr, TRUE);
1491 	}
1492 }
1493 
1494 /*
1495  * This is an iterator function now needed by the multicast
1496  * shared code. It simply feeds the shared code routine the
1497  * addresses in the array of ix_set_multi() one by one.
1498  */
1499 static uint8_t *
1500 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1501 {
1502 	uint8_t *addr = *update_ptr;
1503 	uint8_t *newptr;
1504 	*vmdq = 0;
1505 
1506 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1507 	*update_ptr = newptr;
1508 	return addr;
1509 }
1510 
1511 static void
1512 ix_timer(void *arg)
1513 {
1514 	struct ix_softc *sc = arg;
1515 
1516 	lwkt_serialize_enter(&sc->main_serialize);
1517 
1518 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1519 		lwkt_serialize_exit(&sc->main_serialize);
1520 		return;
1521 	}
1522 
1523 	/* Check for pluggable optics */
1524 	if (sc->sfp_probe) {
1525 		if (!ix_sfp_probe(sc))
1526 			goto done; /* Nothing to do */
1527 	}
1528 
1529 	ix_update_link_status(sc);
1530 	ix_update_stats(sc);
1531 
1532 done:
1533 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1534 	lwkt_serialize_exit(&sc->main_serialize);
1535 }
1536 
1537 static void
1538 ix_update_link_status(struct ix_softc *sc)
1539 {
1540 	struct ifnet *ifp = &sc->arpcom.ac_if;
1541 
1542 	if (sc->link_up) {
1543 		if (sc->link_active == FALSE) {
1544 			if (bootverbose) {
1545 				if_printf(ifp, "Link is up %d Gbps %s\n",
1546 				    sc->link_speed == 128 ? 10 : 1,
1547 				    "Full Duplex");
1548 			}
1549 
1550 			/*
1551 			 * Update any Flow Control changes
1552 			 */
1553 			ixgbe_fc_enable(&sc->hw);
1554 			/* MUST after ixgbe_fc_enable() */
1555 			if (sc->rx_ring_inuse > 1) {
1556 				switch (sc->hw.fc.current_mode) {
1557 				case ixgbe_fc_rx_pause:
1558 				case ixgbe_fc_tx_pause:
1559 				case ixgbe_fc_full:
1560 					ix_disable_rx_drop(sc);
1561 					break;
1562 
1563 				case ixgbe_fc_none:
1564 					ix_enable_rx_drop(sc);
1565 					break;
1566 
1567 				default:
1568 					break;
1569 				}
1570 			}
1571 
1572 			/* Update DMA coalescing config */
1573 			ix_config_dmac(sc);
1574 
1575 			sc->link_active = TRUE;
1576 
1577 			ifp->if_link_state = LINK_STATE_UP;
1578 			if_link_state_change(ifp);
1579 		}
1580 	} else { /* Link down */
1581 		if (sc->link_active == TRUE) {
1582 			if (bootverbose)
1583 				if_printf(ifp, "Link is Down\n");
1584 			ifp->if_link_state = LINK_STATE_DOWN;
1585 			if_link_state_change(ifp);
1586 
1587 			sc->link_active = FALSE;
1588 		}
1589 	}
1590 }
1591 
1592 static void
1593 ix_stop(struct ix_softc *sc)
1594 {
1595 	struct ixgbe_hw *hw = &sc->hw;
1596 	struct ifnet *ifp = &sc->arpcom.ac_if;
1597 	int i;
1598 
1599 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1600 
1601 	ix_disable_intr(sc);
1602 	callout_stop(&sc->timer);
1603 
1604 	ifp->if_flags &= ~IFF_RUNNING;
1605 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1606 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1607 
1608 		ifsq_clr_oactive(txr->tx_ifsq);
1609 		ifsq_watchdog_stop(&txr->tx_watchdog);
1610 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1611 	}
1612 
1613 	ixgbe_reset_hw(hw);
1614 	hw->adapter_stopped = FALSE;
1615 	ixgbe_stop_adapter(hw);
1616 	if (hw->mac.type == ixgbe_mac_82599EB)
1617 		ixgbe_stop_mac_link_on_d3_82599(hw);
1618 	/* Turn off the laser - noop with no optics */
1619 	ixgbe_disable_tx_laser(hw);
1620 
1621 	/* Update the stack */
1622 	sc->link_up = FALSE;
1623 	ix_update_link_status(sc);
1624 
1625 	/* Reprogram the RAR[0] in case user changed it. */
1626 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1627 
1628 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1629 		ix_free_tx_ring(&sc->tx_rings[i]);
1630 
1631 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1632 		ix_free_rx_ring(&sc->rx_rings[i]);
1633 }
1634 
1635 static void
1636 ix_setup_ifp(struct ix_softc *sc)
1637 {
1638 	struct ixgbe_hw *hw = &sc->hw;
1639 	struct ifnet *ifp = &sc->arpcom.ac_if;
1640 	int i;
1641 
1642 	ifp->if_baudrate = IF_Gbps(10UL);
1643 
1644 	ifp->if_softc = sc;
1645 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1646 	ifp->if_init = ix_init;
1647 	ifp->if_ioctl = ix_ioctl;
1648 	ifp->if_start = ix_start;
1649 	ifp->if_serialize = ix_serialize;
1650 	ifp->if_deserialize = ix_deserialize;
1651 	ifp->if_tryserialize = ix_tryserialize;
1652 #ifdef INVARIANTS
1653 	ifp->if_serialize_assert = ix_serialize_assert;
1654 #endif
1655 #ifdef IFPOLL_ENABLE
1656 	ifp->if_npoll = ix_npoll;
1657 #endif
1658 
1659 	/* Increase TSO burst length */
1660 	ifp->if_tsolen = (8 * ETHERMTU);
1661 
1662 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1663 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1664 
1665 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1666 	ifq_set_ready(&ifp->if_snd);
1667 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1668 
1669 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1670 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1671 
1672 	ether_ifattach(ifp, hw->mac.addr, NULL);
1673 
1674 	ifp->if_capabilities =
1675 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1676 	if (IX_ENABLE_HWRSS(sc))
1677 		ifp->if_capabilities |= IFCAP_RSS;
1678 	ifp->if_capenable = ifp->if_capabilities;
1679 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1680 
1681 	/*
1682 	 * Tell the upper layer(s) we support long frames.
1683 	 */
1684 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1685 
1686 	/* Setup TX rings and subqueues */
1687 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1688 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1689 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1690 
1691 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1692 		ifsq_set_priv(ifsq, txr);
1693 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1694 		txr->tx_ifsq = ifsq;
1695 
1696 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1697 	}
1698 
1699 	/* Specify the media types supported by this adapter */
1700 	ix_init_media(sc);
1701 }
1702 
1703 static boolean_t
1704 ix_is_sfp(const struct ixgbe_hw *hw)
1705 {
1706 	switch (hw->phy.type) {
1707 	case ixgbe_phy_sfp_avago:
1708 	case ixgbe_phy_sfp_ftl:
1709 	case ixgbe_phy_sfp_intel:
1710 	case ixgbe_phy_sfp_unknown:
1711 	case ixgbe_phy_sfp_passive_tyco:
1712 	case ixgbe_phy_sfp_passive_unknown:
1713 	case ixgbe_phy_qsfp_passive_unknown:
1714 	case ixgbe_phy_qsfp_active_unknown:
1715 	case ixgbe_phy_qsfp_intel:
1716 	case ixgbe_phy_qsfp_unknown:
1717 		return TRUE;
1718 	default:
1719 		return FALSE;
1720 	}
1721 }
1722 
1723 static void
1724 ix_config_link(struct ix_softc *sc)
1725 {
1726 	struct ixgbe_hw *hw = &sc->hw;
1727 	boolean_t sfp;
1728 
1729 	sfp = ix_is_sfp(hw);
1730 	if (sfp) {
1731 		if (hw->phy.multispeed_fiber) {
1732 			hw->mac.ops.setup_sfp(hw);
1733 			ixgbe_enable_tx_laser(hw);
1734 			ix_handle_msf(sc);
1735 		} else {
1736 			ix_handle_mod(sc);
1737 		}
1738 	} else {
1739 		uint32_t autoneg, err = 0;
1740 
1741 		if (hw->mac.ops.check_link != NULL) {
1742 			err = ixgbe_check_link(hw, &sc->link_speed,
1743 			    &sc->link_up, FALSE);
1744 			if (err)
1745 				return;
1746 		}
1747 
1748 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1749 			autoneg = sc->advspeed;
1750 		else
1751 			autoneg = hw->phy.autoneg_advertised;
1752 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1753 			bool negotiate;
1754 
1755 			err = hw->mac.ops.get_link_capabilities(hw,
1756 			    &autoneg, &negotiate);
1757 			if (err)
1758 				return;
1759 		}
1760 
1761 		if (hw->mac.ops.setup_link != NULL) {
1762 			err = hw->mac.ops.setup_link(hw,
1763 			    autoneg, sc->link_up);
1764 			if (err)
1765 				return;
1766 		}
1767 	}
1768 }
1769 
1770 static int
1771 ix_alloc_rings(struct ix_softc *sc)
1772 {
1773 	int error, i;
1774 
1775 	/*
1776 	 * Create top level busdma tag
1777 	 */
1778 	error = bus_dma_tag_create(NULL, 1, 0,
1779 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1780 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1781 	    &sc->parent_tag);
1782 	if (error) {
1783 		device_printf(sc->dev, "could not create top level DMA tag\n");
1784 		return error;
1785 	}
1786 
1787 	/*
1788 	 * Allocate TX descriptor rings and buffers
1789 	 */
1790 	sc->tx_rings = kmalloc_cachealign(
1791 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1792 	    M_DEVBUF, M_WAITOK | M_ZERO);
1793 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1794 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1795 
1796 		txr->tx_sc = sc;
1797 		txr->tx_idx = i;
1798 		txr->tx_intr_vec = -1;
1799 		txr->tx_intr_cpuid = -1;
1800 		lwkt_serialize_init(&txr->tx_serialize);
1801 
1802 		error = ix_create_tx_ring(txr);
1803 		if (error)
1804 			return error;
1805 	}
1806 
1807 	/*
1808 	 * Allocate RX descriptor rings and buffers
1809 	 */
1810 	sc->rx_rings = kmalloc_cachealign(
1811 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1812 	    M_DEVBUF, M_WAITOK | M_ZERO);
1813 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1814 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1815 
1816 		rxr->rx_sc = sc;
1817 		rxr->rx_idx = i;
1818 		rxr->rx_intr_vec = -1;
1819 		lwkt_serialize_init(&rxr->rx_serialize);
1820 
1821 		error = ix_create_rx_ring(rxr);
1822 		if (error)
1823 			return error;
1824 	}
1825 
1826 	return 0;
1827 }
1828 
1829 static int
1830 ix_create_tx_ring(struct ix_tx_ring *txr)
1831 {
1832 	int error, i, tsize, ntxd;
1833 
1834 	/*
1835 	 * Validate number of transmit descriptors.  It must not exceed
1836 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1837 	 */
1838 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1839 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1840 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1841 		device_printf(txr->tx_sc->dev,
1842 		    "Using %d TX descriptors instead of %d!\n",
1843 		    IX_DEF_TXD, ntxd);
1844 		txr->tx_ndesc = IX_DEF_TXD;
1845 	} else {
1846 		txr->tx_ndesc = ntxd;
1847 	}
1848 
1849 	/*
1850 	 * Allocate TX head write-back buffer
1851 	 */
1852 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1853 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1854 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1855 	if (txr->tx_hdr == NULL) {
1856 		device_printf(txr->tx_sc->dev,
1857 		    "Unable to allocate TX head write-back buffer\n");
1858 		return ENOMEM;
1859 	}
1860 
1861 	/*
1862 	 * Allocate TX descriptor ring
1863 	 */
1864 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1865 	    IX_DBA_ALIGN);
1866 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1867 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1868 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1869 	if (txr->tx_base == NULL) {
1870 		device_printf(txr->tx_sc->dev,
1871 		    "Unable to allocate TX Descriptor memory\n");
1872 		return ENOMEM;
1873 	}
1874 
1875 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1876 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1877 
1878 	/*
1879 	 * Create DMA tag for TX buffers
1880 	 */
1881 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1882 	    1, 0,		/* alignment, bounds */
1883 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1884 	    BUS_SPACE_MAXADDR,	/* highaddr */
1885 	    NULL, NULL,		/* filter, filterarg */
1886 	    IX_TSO_SIZE,	/* maxsize */
1887 	    IX_MAX_SCATTER,	/* nsegments */
1888 	    PAGE_SIZE,		/* maxsegsize */
1889 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1890 	    BUS_DMA_ONEBPAGE,	/* flags */
1891 	    &txr->tx_tag);
1892 	if (error) {
1893 		device_printf(txr->tx_sc->dev,
1894 		    "Unable to allocate TX DMA tag\n");
1895 		kfree(txr->tx_buf, M_DEVBUF);
1896 		txr->tx_buf = NULL;
1897 		return error;
1898 	}
1899 
1900 	/*
1901 	 * Create DMA maps for TX buffers
1902 	 */
1903 	for (i = 0; i < txr->tx_ndesc; ++i) {
1904 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1905 
1906 		error = bus_dmamap_create(txr->tx_tag,
1907 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1908 		if (error) {
1909 			device_printf(txr->tx_sc->dev,
1910 			    "Unable to create TX DMA map\n");
1911 			ix_destroy_tx_ring(txr, i);
1912 			return error;
1913 		}
1914 	}
1915 
1916 	/*
1917 	 * Initialize various watermark
1918 	 */
1919 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1920 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1921 
1922 	return 0;
1923 }
1924 
1925 static void
1926 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1927 {
1928 	int i;
1929 
1930 	if (txr->tx_hdr != NULL) {
1931 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1932 		bus_dmamem_free(txr->tx_hdr_dtag,
1933 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1934 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1935 		txr->tx_hdr = NULL;
1936 	}
1937 
1938 	if (txr->tx_base != NULL) {
1939 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1940 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1941 		    txr->tx_base_map);
1942 		bus_dma_tag_destroy(txr->tx_base_dtag);
1943 		txr->tx_base = NULL;
1944 	}
1945 
1946 	if (txr->tx_buf == NULL)
1947 		return;
1948 
1949 	for (i = 0; i < ndesc; ++i) {
1950 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1951 
1952 		KKASSERT(txbuf->m_head == NULL);
1953 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1954 	}
1955 	bus_dma_tag_destroy(txr->tx_tag);
1956 
1957 	kfree(txr->tx_buf, M_DEVBUF);
1958 	txr->tx_buf = NULL;
1959 }
1960 
1961 static void
1962 ix_init_tx_ring(struct ix_tx_ring *txr)
1963 {
1964 	/* Clear the old ring contents */
1965 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1966 
1967 	/* Clear TX head write-back buffer */
1968 	*(txr->tx_hdr) = 0;
1969 
1970 	/* Reset indices */
1971 	txr->tx_next_avail = 0;
1972 	txr->tx_next_clean = 0;
1973 	txr->tx_nsegs = 0;
1974 
1975 	/* Set number of descriptors available */
1976 	txr->tx_avail = txr->tx_ndesc;
1977 
1978 	/* Enable this TX ring */
1979 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1980 }
1981 
1982 static void
1983 ix_init_tx_unit(struct ix_softc *sc)
1984 {
1985 	struct ixgbe_hw	*hw = &sc->hw;
1986 	int i;
1987 
1988 	/*
1989 	 * Setup the Base and Length of the Tx Descriptor Ring
1990 	 */
1991 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1992 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1993 		uint64_t tdba = txr->tx_base_paddr;
1994 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1995 		uint32_t txctrl;
1996 
1997 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1998 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1999 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2000 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2001 
2002 		/* Setup the HW Tx Head and Tail descriptor pointers */
2003 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2004 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2005 
2006 		/* Disable TX head write-back relax ordering */
2007 		switch (hw->mac.type) {
2008 		case ixgbe_mac_82598EB:
2009 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2010 			break;
2011 		case ixgbe_mac_82599EB:
2012 		case ixgbe_mac_X540:
2013 		default:
2014 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2015 			break;
2016 		}
2017 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2018 		switch (hw->mac.type) {
2019 		case ixgbe_mac_82598EB:
2020 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2021 			break;
2022 		case ixgbe_mac_82599EB:
2023 		case ixgbe_mac_X540:
2024 		default:
2025 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2026 			break;
2027 		}
2028 
2029 		/* Enable TX head write-back */
2030 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2031 		    (uint32_t)(hdr_paddr >> 32));
2032 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2033 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2034 	}
2035 
2036 	if (hw->mac.type != ixgbe_mac_82598EB) {
2037 		uint32_t dmatxctl, rttdcs;
2038 
2039 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2040 		dmatxctl |= IXGBE_DMATXCTL_TE;
2041 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2042 
2043 		/* Disable arbiter to set MTQC */
2044 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2045 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2046 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2047 
2048 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2049 
2050 		/* Reenable aribter */
2051 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2052 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2053 	}
2054 }
2055 
2056 static int
2057 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2058     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2059 {
2060 	struct ixgbe_adv_tx_context_desc *TXD;
2061 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2062 	int ehdrlen, ip_hlen = 0, ctxd;
2063 	boolean_t offload = TRUE;
2064 
2065 	/* First check if TSO is to be used */
2066 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2067 		return ix_tso_ctx_setup(txr, mp,
2068 		    cmd_type_len, olinfo_status);
2069 	}
2070 
2071 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2072 		offload = FALSE;
2073 
2074 	/* Indicate the whole packet as payload when not doing TSO */
2075 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2076 
2077 	/*
2078 	 * In advanced descriptors the vlan tag must be placed into the
2079 	 * context descriptor.  Hence we need to make one even if not
2080 	 * doing checksum offloads.
2081 	 */
2082 	if (mp->m_flags & M_VLANTAG) {
2083 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2084 		    IXGBE_ADVTXD_VLAN_SHIFT;
2085 	} else if (!offload) {
2086 		/* No TX descriptor is consumed */
2087 		return 0;
2088 	}
2089 
2090 	/* Set the ether header length */
2091 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2092 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2093 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2094 
2095 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2096 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2097 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2098 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2099 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2100 	}
2101 	vlan_macip_lens |= ip_hlen;
2102 
2103 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2104 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2105 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2106 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2107 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2108 
2109 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2110 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2111 
2112 	/* Now ready a context descriptor */
2113 	ctxd = txr->tx_next_avail;
2114 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2115 
2116 	/* Now copy bits into descriptor */
2117 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2118 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2119 	TXD->seqnum_seed = htole32(0);
2120 	TXD->mss_l4len_idx = htole32(0);
2121 
2122 	/* We've consumed the first desc, adjust counters */
2123 	if (++ctxd == txr->tx_ndesc)
2124 		ctxd = 0;
2125 	txr->tx_next_avail = ctxd;
2126 	--txr->tx_avail;
2127 
2128 	/* One TX descriptor is consumed */
2129 	return 1;
2130 }
2131 
2132 static int
2133 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2134     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2135 {
2136 	struct ixgbe_adv_tx_context_desc *TXD;
2137 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2138 	uint32_t mss_l4len_idx = 0, paylen;
2139 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2140 
2141 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2142 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2143 
2144 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2145 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2146 
2147 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2148 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2149 
2150 	ctxd = txr->tx_next_avail;
2151 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2152 
2153 	if (mp->m_flags & M_VLANTAG) {
2154 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2155 		    IXGBE_ADVTXD_VLAN_SHIFT;
2156 	}
2157 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2158 	vlan_macip_lens |= ip_hlen;
2159 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2160 
2161 	/* ADV DTYPE TUCMD */
2162 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2163 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2164 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2165 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2166 
2167 	/* MSS L4LEN IDX */
2168 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2169 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2170 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2171 
2172 	TXD->seqnum_seed = htole32(0);
2173 
2174 	if (++ctxd == txr->tx_ndesc)
2175 		ctxd = 0;
2176 
2177 	txr->tx_avail--;
2178 	txr->tx_next_avail = ctxd;
2179 
2180 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2181 
2182 	/* This is used in the transmit desc in encap */
2183 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2184 
2185 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2186 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2187 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2188 
2189 	/* One TX descriptor is consumed */
2190 	return 1;
2191 }
2192 
2193 static void
2194 ix_txeof(struct ix_tx_ring *txr, int hdr)
2195 {
2196 	int first, avail;
2197 
2198 	if (txr->tx_avail == txr->tx_ndesc)
2199 		return;
2200 
2201 	first = txr->tx_next_clean;
2202 	if (first == hdr)
2203 		return;
2204 
2205 	avail = txr->tx_avail;
2206 	while (first != hdr) {
2207 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2208 
2209 		++avail;
2210 		if (txbuf->m_head) {
2211 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2212 			m_freem(txbuf->m_head);
2213 			txbuf->m_head = NULL;
2214 		}
2215 		if (++first == txr->tx_ndesc)
2216 			first = 0;
2217 	}
2218 	txr->tx_next_clean = first;
2219 	txr->tx_avail = avail;
2220 
2221 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2222 		ifsq_clr_oactive(txr->tx_ifsq);
2223 		txr->tx_watchdog.wd_timer = 0;
2224 	}
2225 }
2226 
2227 static int
2228 ix_create_rx_ring(struct ix_rx_ring *rxr)
2229 {
2230 	int i, rsize, error, nrxd;
2231 
2232 	/*
2233 	 * Validate number of receive descriptors.  It must not exceed
2234 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2235 	 */
2236 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2237 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2238 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2239 		device_printf(rxr->rx_sc->dev,
2240 		    "Using %d RX descriptors instead of %d!\n",
2241 		    IX_DEF_RXD, nrxd);
2242 		rxr->rx_ndesc = IX_DEF_RXD;
2243 	} else {
2244 		rxr->rx_ndesc = nrxd;
2245 	}
2246 
2247 	/*
2248 	 * Allocate RX descriptor ring
2249 	 */
2250 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2251 	    IX_DBA_ALIGN);
2252 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2253 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2254 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2255 	if (rxr->rx_base == NULL) {
2256 		device_printf(rxr->rx_sc->dev,
2257 		    "Unable to allocate TX Descriptor memory\n");
2258 		return ENOMEM;
2259 	}
2260 
2261 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2262 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2263 
2264 	/*
2265 	 * Create DMA tag for RX buffers
2266 	 */
2267 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2268 	    1, 0,		/* alignment, bounds */
2269 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2270 	    BUS_SPACE_MAXADDR,	/* highaddr */
2271 	    NULL, NULL,		/* filter, filterarg */
2272 	    PAGE_SIZE,		/* maxsize */
2273 	    1,			/* nsegments */
2274 	    PAGE_SIZE,		/* maxsegsize */
2275 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2276 	    &rxr->rx_tag);
2277 	if (error) {
2278 		device_printf(rxr->rx_sc->dev,
2279 		    "Unable to create RX DMA tag\n");
2280 		kfree(rxr->rx_buf, M_DEVBUF);
2281 		rxr->rx_buf = NULL;
2282 		return error;
2283 	}
2284 
2285 	/*
2286 	 * Create spare DMA map for RX buffers
2287 	 */
2288 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2289 	    &rxr->rx_sparemap);
2290 	if (error) {
2291 		device_printf(rxr->rx_sc->dev,
2292 		    "Unable to create spare RX DMA map\n");
2293 		bus_dma_tag_destroy(rxr->rx_tag);
2294 		kfree(rxr->rx_buf, M_DEVBUF);
2295 		rxr->rx_buf = NULL;
2296 		return error;
2297 	}
2298 
2299 	/*
2300 	 * Create DMA maps for RX buffers
2301 	 */
2302 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2303 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2304 
2305 		error = bus_dmamap_create(rxr->rx_tag,
2306 		    BUS_DMA_WAITOK, &rxbuf->map);
2307 		if (error) {
2308 			device_printf(rxr->rx_sc->dev,
2309 			    "Unable to create RX dma map\n");
2310 			ix_destroy_rx_ring(rxr, i);
2311 			return error;
2312 		}
2313 	}
2314 
2315 	/*
2316 	 * Initialize various watermark
2317 	 */
2318 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2319 
2320 	return 0;
2321 }
2322 
2323 static void
2324 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2325 {
2326 	int i;
2327 
2328 	if (rxr->rx_base != NULL) {
2329 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2330 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2331 		    rxr->rx_base_map);
2332 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2333 		rxr->rx_base = NULL;
2334 	}
2335 
2336 	if (rxr->rx_buf == NULL)
2337 		return;
2338 
2339 	for (i = 0; i < ndesc; ++i) {
2340 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2341 
2342 		KKASSERT(rxbuf->m_head == NULL);
2343 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2344 	}
2345 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2346 	bus_dma_tag_destroy(rxr->rx_tag);
2347 
2348 	kfree(rxr->rx_buf, M_DEVBUF);
2349 	rxr->rx_buf = NULL;
2350 }
2351 
2352 /*
2353 ** Used to detect a descriptor that has
2354 ** been merged by Hardware RSC.
2355 */
2356 static __inline uint32_t
2357 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2358 {
2359 	return (le32toh(rx->wb.lower.lo_dword.data) &
2360 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2361 }
2362 
2363 #if 0
2364 /*********************************************************************
2365  *
2366  *  Initialize Hardware RSC (LRO) feature on 82599
2367  *  for an RX ring, this is toggled by the LRO capability
2368  *  even though it is transparent to the stack.
2369  *
2370  *  NOTE: since this HW feature only works with IPV4 and
2371  *        our testing has shown soft LRO to be as effective
2372  *        I have decided to disable this by default.
2373  *
2374  **********************************************************************/
2375 static void
2376 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2377 {
2378 	struct	ix_softc 	*sc = rxr->rx_sc;
2379 	struct	ixgbe_hw	*hw = &sc->hw;
2380 	uint32_t			rscctrl, rdrxctl;
2381 
2382 #if 0
2383 	/* If turning LRO/RSC off we need to disable it */
2384 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2385 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2386 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2387 		return;
2388 	}
2389 #endif
2390 
2391 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2392 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2393 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2394 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2395 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2396 
2397 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2398 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2399 	/*
2400 	** Limit the total number of descriptors that
2401 	** can be combined, so it does not exceed 64K
2402 	*/
2403 	if (rxr->mbuf_sz == MCLBYTES)
2404 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2405 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2406 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2407 	else if (rxr->mbuf_sz == MJUM9BYTES)
2408 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2409 	else  /* Using 16K cluster */
2410 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2411 
2412 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2413 
2414 	/* Enable TCP header recognition */
2415 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2416 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2417 	    IXGBE_PSRTYPE_TCPHDR));
2418 
2419 	/* Disable RSC for ACK packets */
2420 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2421 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2422 
2423 	rxr->hw_rsc = TRUE;
2424 }
2425 #endif
2426 
2427 static int
2428 ix_init_rx_ring(struct ix_rx_ring *rxr)
2429 {
2430 	int i;
2431 
2432 	/* Clear the ring contents */
2433 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2434 
2435 	/* XXX we need JUMPAGESIZE for RSC too */
2436 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2437 		rxr->rx_mbuf_sz = MCLBYTES;
2438 	else
2439 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2440 
2441 	/* Now replenish the mbufs */
2442 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2443 		int error;
2444 
2445 		error = ix_newbuf(rxr, i, TRUE);
2446 		if (error)
2447 			return error;
2448 	}
2449 
2450 	/* Setup our descriptor indices */
2451 	rxr->rx_next_check = 0;
2452 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2453 
2454 #if 0
2455 	/*
2456 	** Now set up the LRO interface:
2457 	*/
2458 	if (ixgbe_rsc_enable)
2459 		ix_setup_hw_rsc(rxr);
2460 #endif
2461 
2462 	return 0;
2463 }
2464 
2465 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2466 
2467 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2468 
2469 static void
2470 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2471 {
2472 	struct ixgbe_hw	*hw = &sc->hw;
2473 	struct ifnet *ifp = &sc->arpcom.ac_if;
2474 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2475 	int i;
2476 
2477 	/*
2478 	 * Make sure receives are disabled while setting up the descriptor ring
2479 	 */
2480 	ixgbe_disable_rx(hw);
2481 
2482 	/* Enable broadcasts */
2483 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2484 	fctrl |= IXGBE_FCTRL_BAM;
2485 	if (hw->mac.type == ixgbe_mac_82598EB) {
2486 		fctrl |= IXGBE_FCTRL_DPF;
2487 		fctrl |= IXGBE_FCTRL_PMCF;
2488 	}
2489 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2490 
2491 	/* Set for Jumbo Frames? */
2492 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2493 	if (ifp->if_mtu > ETHERMTU)
2494 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2495 	else
2496 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2497 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2498 
2499 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2500 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2501 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2502 
2503 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2504 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2505 		uint64_t rdba = rxr->rx_base_paddr;
2506 		uint32_t srrctl;
2507 
2508 		/* Setup the Base and Length of the Rx Descriptor Ring */
2509 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2510 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2511 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2512 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2513 
2514 		/*
2515 		 * Set up the SRRCTL register
2516 		 */
2517 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2518 
2519 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2520 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2521 		srrctl |= bufsz;
2522 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2523 		if (sc->rx_ring_inuse > 1) {
2524 			/* See the commend near ix_enable_rx_drop() */
2525 			if (sc->ifm_media &
2526 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2527 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2528 				if (i == 0 && bootverbose) {
2529 					if_printf(ifp, "flow control %s, "
2530 					    "disable RX drop\n",
2531 					    ix_ifmedia2str(sc->ifm_media));
2532 				}
2533 			} else {
2534 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2535 				if (i == 0 && bootverbose) {
2536 					if_printf(ifp, "flow control %s, "
2537 					    "enable RX drop\n",
2538 					    ix_ifmedia2str(sc->ifm_media));
2539 				}
2540 			}
2541 		}
2542 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2543 
2544 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2545 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2546 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2547 	}
2548 
2549 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2550 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2551 
2552 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2553 
2554 	/*
2555 	 * Setup RSS
2556 	 */
2557 	if (sc->rx_ring_inuse > 1) {
2558 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2559 		const struct if_ringmap *rm;
2560 		int j, r, nreta, table_nent;
2561 
2562 		/*
2563 		 * NOTE:
2564 		 * When we reach here, RSS has already been disabled
2565 		 * in ix_stop(), so we could safely configure RSS key
2566 		 * and redirect table.
2567 		 */
2568 
2569 		/*
2570 		 * Configure RSS key
2571 		 */
2572 		toeplitz_get_key(key, sizeof(key));
2573 		for (i = 0; i < IX_NRSSRK; ++i) {
2574 			uint32_t rssrk;
2575 
2576 			rssrk = IX_RSSRK_VAL(key, i);
2577 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2578 			    i, rssrk);
2579 
2580 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2581 		}
2582 
2583 		/*
2584 		 * Configure RSS redirect table.
2585 		 */
2586 
2587 		/* Table size will differ based on MAC */
2588 		switch (hw->mac.type) {
2589 		case ixgbe_mac_X550:
2590 		case ixgbe_mac_X550EM_x:
2591 		case ixgbe_mac_X550EM_a:
2592 			nreta = IX_NRETA_X550;
2593 			break;
2594 		default:
2595 			nreta = IX_NRETA;
2596 			break;
2597 		}
2598 
2599 		table_nent = nreta * IX_RETA_SIZE;
2600 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2601 		    ("invalid RETA count %d", nreta));
2602 		if (polling)
2603 			rm = sc->rx_rmap;
2604 		else
2605 			rm = sc->rx_rmap_intr;
2606 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2607 
2608 		r = 0;
2609 		for (j = 0; j < nreta; ++j) {
2610 			uint32_t reta = 0;
2611 
2612 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2613 				uint32_t q;
2614 
2615 				q = sc->rdr_table[r];
2616 				KASSERT(q < sc->rx_ring_inuse,
2617 				    ("invalid RX ring index %d", q));
2618 				reta |= q << (8 * i);
2619 				++r;
2620 			}
2621 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2622 			if (j < IX_NRETA) {
2623 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2624 			} else {
2625 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2626 				    reta);
2627 			}
2628 		}
2629 
2630 		/*
2631 		 * Enable multiple receive queues.
2632 		 * Enable IPv4 RSS standard hash functions.
2633 		 */
2634 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2635 		    IXGBE_MRQC_RSSEN |
2636 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2637 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2638 
2639 		/*
2640 		 * NOTE:
2641 		 * PCSD must be enabled to enable multiple
2642 		 * receive queues.
2643 		 */
2644 		rxcsum |= IXGBE_RXCSUM_PCSD;
2645 	}
2646 
2647 	if (ifp->if_capenable & IFCAP_RXCSUM)
2648 		rxcsum |= IXGBE_RXCSUM_PCSD;
2649 
2650 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2651 }
2652 
2653 static __inline void
2654 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2655 {
2656 	if (--i < 0)
2657 		i = rxr->rx_ndesc - 1;
2658 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2659 }
2660 
2661 static __inline void
2662 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2663 {
2664 	if ((ptype &
2665 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2666 		/* Not IPv4 */
2667 		return;
2668 	}
2669 
2670 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2671 	    IXGBE_RXD_STAT_IPCS)
2672 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2673 
2674 	if ((ptype &
2675 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2676 		/*
2677 		 * - Neither TCP nor UDP
2678 		 * - IPv4 fragment
2679 		 */
2680 		return;
2681 	}
2682 
2683 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2684 	    IXGBE_RXD_STAT_L4CS) {
2685 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2686 		    CSUM_FRAG_NOT_CHECKED;
2687 		mp->m_pkthdr.csum_data = htons(0xffff);
2688 	}
2689 }
2690 
2691 static __inline struct pktinfo *
2692 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2693     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2694 {
2695 	switch (hashtype) {
2696 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2697 		pi->pi_netisr = NETISR_IP;
2698 		pi->pi_flags = 0;
2699 		pi->pi_l3proto = IPPROTO_TCP;
2700 		break;
2701 
2702 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2703 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2704 			/* Not UDP or is fragment */
2705 			return NULL;
2706 		}
2707 		pi->pi_netisr = NETISR_IP;
2708 		pi->pi_flags = 0;
2709 		pi->pi_l3proto = IPPROTO_UDP;
2710 		break;
2711 
2712 	default:
2713 		return NULL;
2714 	}
2715 
2716 	m_sethash(m, toeplitz_hash(hash));
2717 	return pi;
2718 }
2719 
2720 static __inline void
2721 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2722 {
2723 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2724 	rxd->wb.upper.status_error = 0;
2725 }
2726 
2727 static void
2728 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2729 {
2730 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2731 
2732 	/*
2733 	 * XXX discard may not be correct
2734 	 */
2735 	if (eop) {
2736 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2737 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2738 	} else {
2739 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2740 	}
2741 	if (rxbuf->fmp != NULL) {
2742 		m_freem(rxbuf->fmp);
2743 		rxbuf->fmp = NULL;
2744 		rxbuf->lmp = NULL;
2745 	}
2746 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2747 }
2748 
2749 static void
2750 ix_rxeof(struct ix_rx_ring *rxr, int count)
2751 {
2752 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2753 	int i, nsegs = 0, cpuid = mycpuid;
2754 
2755 	i = rxr->rx_next_check;
2756 	while (count != 0) {
2757 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2758 		union ixgbe_adv_rx_desc	*cur;
2759 		struct mbuf *sendmp = NULL, *mp;
2760 		struct pktinfo *pi = NULL, pi0;
2761 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2762 		uint16_t len;
2763 		boolean_t eop;
2764 
2765 		cur = &rxr->rx_base[i];
2766 		staterr = le32toh(cur->wb.upper.status_error);
2767 
2768 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2769 			break;
2770 		++nsegs;
2771 
2772 		rxbuf = &rxr->rx_buf[i];
2773 		mp = rxbuf->m_head;
2774 
2775 		len = le16toh(cur->wb.upper.length);
2776 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2777 		    IXGBE_RXDADV_PKTTYPE_MASK;
2778 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2779 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2780 		    IXGBE_RXDADV_RSSTYPE_MASK;
2781 
2782 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2783 		if (eop)
2784 			--count;
2785 
2786 		/*
2787 		 * Make sure bad packets are discarded
2788 		 */
2789 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2790 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2791 			ix_rx_discard(rxr, i, eop);
2792 			goto next_desc;
2793 		}
2794 
2795 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2796 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2797 			ix_rx_discard(rxr, i, eop);
2798 			goto next_desc;
2799 		}
2800 
2801 		/*
2802 		 * On 82599 which supports a hardware LRO, packets
2803 		 * need not be fragmented across sequential descriptors,
2804 		 * rather the next descriptor is indicated in bits
2805 		 * of the descriptor.  This also means that we might
2806 		 * proceses more than one packet at a time, something
2807 		 * that has never been true before, it required
2808 		 * eliminating global chain pointers in favor of what
2809 		 * we are doing here.
2810 		 */
2811 		if (!eop) {
2812 			int nextp;
2813 
2814 			/*
2815 			 * Figure out the next descriptor
2816 			 * of this frame.
2817 			 */
2818 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2819 				rsc = ix_rsc_count(cur);
2820 			if (rsc) { /* Get hardware index */
2821 				nextp = ((staterr &
2822 				    IXGBE_RXDADV_NEXTP_MASK) >>
2823 				    IXGBE_RXDADV_NEXTP_SHIFT);
2824 			} else { /* Just sequential */
2825 				nextp = i + 1;
2826 				if (nextp == rxr->rx_ndesc)
2827 					nextp = 0;
2828 			}
2829 			nbuf = &rxr->rx_buf[nextp];
2830 			prefetch(nbuf);
2831 		}
2832 		mp->m_len = len;
2833 
2834 		/*
2835 		 * Rather than using the fmp/lmp global pointers
2836 		 * we now keep the head of a packet chain in the
2837 		 * buffer struct and pass this along from one
2838 		 * descriptor to the next, until we get EOP.
2839 		 */
2840 		if (rxbuf->fmp == NULL) {
2841 			mp->m_pkthdr.len = len;
2842 			rxbuf->fmp = mp;
2843 			rxbuf->lmp = mp;
2844 		} else {
2845 			rxbuf->fmp->m_pkthdr.len += len;
2846 			rxbuf->lmp->m_next = mp;
2847 			rxbuf->lmp = mp;
2848 		}
2849 
2850 		if (nbuf != NULL) {
2851 			/*
2852 			 * Not the last fragment of this frame,
2853 			 * pass this fragment list on
2854 			 */
2855 			nbuf->fmp = rxbuf->fmp;
2856 			nbuf->lmp = rxbuf->lmp;
2857 		} else {
2858 			/*
2859 			 * Send this frame
2860 			 */
2861 			sendmp = rxbuf->fmp;
2862 
2863 			sendmp->m_pkthdr.rcvif = ifp;
2864 			IFNET_STAT_INC(ifp, ipackets, 1);
2865 #ifdef IX_RSS_DEBUG
2866 			rxr->rx_pkts++;
2867 #endif
2868 
2869 			/* Process vlan info */
2870 			if (staterr & IXGBE_RXD_STAT_VP) {
2871 				sendmp->m_pkthdr.ether_vlantag =
2872 				    le16toh(cur->wb.upper.vlan);
2873 				sendmp->m_flags |= M_VLANTAG;
2874 			}
2875 			if (ifp->if_capenable & IFCAP_RXCSUM)
2876 				ix_rxcsum(staterr, sendmp, ptype);
2877 			if (ifp->if_capenable & IFCAP_RSS) {
2878 				pi = ix_rssinfo(sendmp, &pi0,
2879 				    hash, hashtype, ptype);
2880 			}
2881 		}
2882 		rxbuf->fmp = NULL;
2883 		rxbuf->lmp = NULL;
2884 next_desc:
2885 		/* Advance our pointers to the next descriptor. */
2886 		if (++i == rxr->rx_ndesc)
2887 			i = 0;
2888 
2889 		if (sendmp != NULL)
2890 			ifp->if_input(ifp, sendmp, pi, cpuid);
2891 
2892 		if (nsegs >= rxr->rx_wreg_nsegs) {
2893 			ix_rx_refresh(rxr, i);
2894 			nsegs = 0;
2895 		}
2896 	}
2897 	rxr->rx_next_check = i;
2898 
2899 	if (nsegs > 0)
2900 		ix_rx_refresh(rxr, i);
2901 }
2902 
2903 static void
2904 ix_set_vlan(struct ix_softc *sc)
2905 {
2906 	struct ixgbe_hw *hw = &sc->hw;
2907 	uint32_t ctrl;
2908 
2909 	if (hw->mac.type == ixgbe_mac_82598EB) {
2910 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2911 		ctrl |= IXGBE_VLNCTRL_VME;
2912 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2913 	} else {
2914 		int i;
2915 
2916 		/*
2917 		 * On 82599 and later chips the VLAN enable is
2918 		 * per queue in RXDCTL
2919 		 */
2920 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2921 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2922 			ctrl |= IXGBE_RXDCTL_VME;
2923 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2924 		}
2925 	}
2926 }
2927 
2928 static void
2929 ix_enable_intr(struct ix_softc *sc)
2930 {
2931 	struct ixgbe_hw	*hw = &sc->hw;
2932 	uint32_t fwsm;
2933 	int i;
2934 
2935 	for (i = 0; i < sc->intr_cnt; ++i)
2936 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2937 
2938 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2939 
2940 	/* Enable Fan Failure detection */
2941 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2942 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2943 
2944 	switch (hw->mac.type) {
2945 	case ixgbe_mac_82599EB:
2946 		sc->intr_mask |= IXGBE_EIMS_ECC;
2947 		/* Temperature sensor on some adapters */
2948 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2949 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
2950 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2951 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2952 		break;
2953 
2954 	case ixgbe_mac_X540:
2955 		sc->intr_mask |= IXGBE_EIMS_ECC;
2956 		/* Detect if Thermal Sensor is enabled */
2957 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2958 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2959 			sc->intr_mask |= IXGBE_EIMS_TS;
2960 		break;
2961 
2962 	case ixgbe_mac_X550:
2963 	case ixgbe_mac_X550EM_a:
2964 	case ixgbe_mac_X550EM_x:
2965 		sc->intr_mask |= IXGBE_EIMS_ECC;
2966 		/* MAC thermal sensor is automatically enabled */
2967 		sc->intr_mask |= IXGBE_EIMS_TS;
2968 		/* Some devices use SDP0 for important information */
2969 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2970 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2971 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2972 		/* FALL THROUGH */
2973 	default:
2974 		break;
2975 	}
2976 
2977 	/* With MSI-X we use auto clear for RX and TX rings */
2978 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2979 		/*
2980 		 * There are no EIAC1/EIAC2 for newer chips; the related
2981 		 * bits for TX and RX rings > 16 are always auto clear.
2982 		 *
2983 		 * XXX which bits?  There are _no_ documented EICR1 and
2984 		 * EICR2 at all; only EICR.
2985 		 */
2986 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2987 	} else {
2988 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2989 
2990 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2991 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2992 			sc->intr_mask |= IX_RX1_INTR_MASK;
2993 	}
2994 
2995 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2996 
2997 	/*
2998 	 * Enable RX and TX rings for MSI-X
2999 	 */
3000 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3001 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3002 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3003 
3004 			if (txr->tx_intr_vec >= 0) {
3005 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3006 				    txr->tx_eims_val);
3007 			}
3008 		}
3009 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3010 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3011 
3012 			KKASSERT(rxr->rx_intr_vec >= 0);
3013 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3014 		}
3015 	}
3016 
3017 	IXGBE_WRITE_FLUSH(hw);
3018 }
3019 
3020 static void
3021 ix_disable_intr(struct ix_softc *sc)
3022 {
3023 	int i;
3024 
3025 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3026 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3027 
3028 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3029 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3030 	} else {
3031 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3032 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3033 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3034 	}
3035 	IXGBE_WRITE_FLUSH(&sc->hw);
3036 
3037 	for (i = 0; i < sc->intr_cnt; ++i)
3038 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3039 }
3040 
3041 uint16_t
3042 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3043 {
3044 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3045 	    reg, 2);
3046 }
3047 
3048 void
3049 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3050 {
3051 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3052 	    reg, value, 2);
3053 }
3054 
3055 static void
3056 ix_slot_info(struct ix_softc *sc)
3057 {
3058 	struct ixgbe_hw *hw = &sc->hw;
3059 	device_t dev = sc->dev;
3060 	struct ixgbe_mac_info *mac = &hw->mac;
3061 	uint16_t link;
3062 	uint32_t offset;
3063 
3064 	/* For most devices simply call the shared code routine */
3065 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3066 		ixgbe_get_bus_info(hw);
3067 		/* These devices don't use PCI-E */
3068 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3069 		    hw->mac.type == ixgbe_mac_X550EM_a)
3070 			return;
3071 		goto display;
3072 	}
3073 
3074 	/*
3075 	 * For the Quad port adapter we need to parse back
3076 	 * up the PCI tree to find the speed of the expansion
3077 	 * slot into which this adapter is plugged. A bit more work.
3078 	 */
3079 	dev = device_get_parent(device_get_parent(dev));
3080 #ifdef IXGBE_DEBUG
3081 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3082 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3083 #endif
3084 	dev = device_get_parent(device_get_parent(dev));
3085 #ifdef IXGBE_DEBUG
3086 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3087 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3088 #endif
3089 	/* Now get the PCI Express Capabilities offset */
3090 	offset = pci_get_pciecap_ptr(dev);
3091 	/* ...and read the Link Status Register */
3092 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3093 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3094 	case IXGBE_PCI_LINK_WIDTH_1:
3095 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3096 		break;
3097 	case IXGBE_PCI_LINK_WIDTH_2:
3098 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3099 		break;
3100 	case IXGBE_PCI_LINK_WIDTH_4:
3101 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3102 		break;
3103 	case IXGBE_PCI_LINK_WIDTH_8:
3104 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3105 		break;
3106 	default:
3107 		hw->bus.width = ixgbe_bus_width_unknown;
3108 		break;
3109 	}
3110 
3111 	switch (link & IXGBE_PCI_LINK_SPEED) {
3112 	case IXGBE_PCI_LINK_SPEED_2500:
3113 		hw->bus.speed = ixgbe_bus_speed_2500;
3114 		break;
3115 	case IXGBE_PCI_LINK_SPEED_5000:
3116 		hw->bus.speed = ixgbe_bus_speed_5000;
3117 		break;
3118 	case IXGBE_PCI_LINK_SPEED_8000:
3119 		hw->bus.speed = ixgbe_bus_speed_8000;
3120 		break;
3121 	default:
3122 		hw->bus.speed = ixgbe_bus_speed_unknown;
3123 		break;
3124 	}
3125 
3126 	mac->ops.set_lan_id(hw);
3127 
3128 display:
3129 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3130 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3131 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3132 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3133 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3134 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3135 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3136 
3137 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3138 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3139 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3140 		device_printf(dev, "For optimal performance a x8 "
3141 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3142 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3143 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3144 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3145 		device_printf(dev, "For optimal performance a x8 "
3146 		    "PCIE Gen3 slot is required.\n");
3147 	}
3148 }
3149 
3150 /*
3151  * TODO comment is incorrect
3152  *
3153  * Setup the correct IVAR register for a particular MSIX interrupt
3154  * - entry is the register array entry
3155  * - vector is the MSIX vector for this queue
3156  * - type is RX/TX/MISC
3157  */
3158 static void
3159 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3160     int8_t type)
3161 {
3162 	struct ixgbe_hw *hw = &sc->hw;
3163 	uint32_t ivar, index;
3164 
3165 	vector |= IXGBE_IVAR_ALLOC_VAL;
3166 
3167 	switch (hw->mac.type) {
3168 	case ixgbe_mac_82598EB:
3169 		if (type == -1)
3170 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3171 		else
3172 			entry += (type * 64);
3173 		index = (entry >> 2) & 0x1F;
3174 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3175 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3176 		ivar |= (vector << (8 * (entry & 0x3)));
3177 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3178 		break;
3179 
3180 	case ixgbe_mac_82599EB:
3181 	case ixgbe_mac_X540:
3182 	case ixgbe_mac_X550:
3183 	case ixgbe_mac_X550EM_a:
3184 	case ixgbe_mac_X550EM_x:
3185 		if (type == -1) { /* MISC IVAR */
3186 			index = (entry & 1) * 8;
3187 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3188 			ivar &= ~(0xFF << index);
3189 			ivar |= (vector << index);
3190 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3191 		} else {	/* RX/TX IVARS */
3192 			index = (16 * (entry & 1)) + (8 * type);
3193 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3194 			ivar &= ~(0xFF << index);
3195 			ivar |= (vector << index);
3196 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3197 		}
3198 		/* FALL THROUGH */
3199 	default:
3200 		break;
3201 	}
3202 }
3203 
3204 static boolean_t
3205 ix_sfp_probe(struct ix_softc *sc)
3206 {
3207 	struct ixgbe_hw	*hw = &sc->hw;
3208 
3209 	if (hw->phy.type == ixgbe_phy_nl &&
3210 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3211 		int32_t ret;
3212 
3213 		ret = hw->phy.ops.identify_sfp(hw);
3214 		if (ret)
3215 			return FALSE;
3216 
3217 		ret = hw->phy.ops.reset(hw);
3218 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3219 			if_printf(&sc->arpcom.ac_if,
3220 			     "Unsupported SFP+ module detected!  "
3221 			     "Reload driver with supported module.\n");
3222 			sc->sfp_probe = FALSE;
3223 			return FALSE;
3224 		}
3225 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3226 
3227 		/* We now have supported optics */
3228 		sc->sfp_probe = FALSE;
3229 
3230 		return TRUE;
3231 	}
3232 	return FALSE;
3233 }
3234 
3235 static void
3236 ix_handle_link(struct ix_softc *sc)
3237 {
3238 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3239 	ix_update_link_status(sc);
3240 }
3241 
3242 /*
3243  * Handling SFP module
3244  */
3245 static void
3246 ix_handle_mod(struct ix_softc *sc)
3247 {
3248 	struct ixgbe_hw *hw = &sc->hw;
3249 	uint32_t err;
3250 
3251 	err = hw->phy.ops.identify_sfp(hw);
3252 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3253 		if_printf(&sc->arpcom.ac_if,
3254 		    "Unsupported SFP+ module type was detected.\n");
3255 		return;
3256 	}
3257 	err = hw->mac.ops.setup_sfp(hw);
3258 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3259 		if_printf(&sc->arpcom.ac_if,
3260 		    "Setup failure - unsupported SFP+ module type.\n");
3261 		return;
3262 	}
3263 	ix_handle_msf(sc);
3264 }
3265 
3266 /*
3267  * Handling MSF (multispeed fiber)
3268  */
3269 static void
3270 ix_handle_msf(struct ix_softc *sc)
3271 {
3272 	struct ixgbe_hw *hw = &sc->hw;
3273 	uint32_t autoneg;
3274 
3275 	hw->phy.ops.identify_sfp(hw);
3276 	ix_init_media(sc);
3277 
3278 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3279 		autoneg = sc->advspeed;
3280 	else
3281 		autoneg = hw->phy.autoneg_advertised;
3282 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3283 		bool negotiate;
3284 
3285 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3286 	}
3287 	if (hw->mac.ops.setup_link != NULL)
3288 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3289 }
3290 
3291 static void
3292 ix_handle_phy(struct ix_softc *sc)
3293 {
3294 	struct ixgbe_hw *hw = &sc->hw;
3295 	int error;
3296 
3297 	error = hw->phy.ops.handle_lasi(hw);
3298 	if (error == IXGBE_ERR_OVERTEMP) {
3299 		if_printf(&sc->arpcom.ac_if,
3300 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3301 		    "PHY will downshift to lower power state!\n");
3302 	} else if (error) {
3303 		if_printf(&sc->arpcom.ac_if,
3304 		    "Error handling LASI interrupt: %d\n", error);
3305 	}
3306 }
3307 
3308 static void
3309 ix_update_stats(struct ix_softc *sc)
3310 {
3311 	struct ifnet *ifp = &sc->arpcom.ac_if;
3312 	struct ixgbe_hw *hw = &sc->hw;
3313 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3314 	uint64_t total_missed_rx = 0;
3315 	int i;
3316 
3317 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3318 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3319 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3320 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3321 
3322 	for (i = 0; i < 16; i++) {
3323 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3324 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3325 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3326 	}
3327 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3328 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3329 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3330 
3331 	/* Hardware workaround, gprc counts missed packets */
3332 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3333 	sc->stats.gprc -= missed_rx;
3334 
3335 	if (hw->mac.type != ixgbe_mac_82598EB) {
3336 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3337 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3338 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3339 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3340 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3341 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3342 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3343 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3344 	} else {
3345 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3346 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3347 		/* 82598 only has a counter in the high register */
3348 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3349 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3350 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3351 	}
3352 
3353 	/*
3354 	 * Workaround: mprc hardware is incorrectly counting
3355 	 * broadcasts, so for now we subtract those.
3356 	 */
3357 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3358 	sc->stats.bprc += bprc;
3359 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3360 	if (hw->mac.type == ixgbe_mac_82598EB)
3361 		sc->stats.mprc -= bprc;
3362 
3363 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3364 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3365 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3366 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3367 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3368 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3369 
3370 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3371 	sc->stats.lxontxc += lxon;
3372 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3373 	sc->stats.lxofftxc += lxoff;
3374 	total = lxon + lxoff;
3375 
3376 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3377 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3378 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3379 	sc->stats.gptc -= total;
3380 	sc->stats.mptc -= total;
3381 	sc->stats.ptc64 -= total;
3382 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3383 
3384 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3385 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3386 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3387 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3388 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3389 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3390 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3391 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3392 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3393 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3394 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3395 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3396 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3397 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3398 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3399 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3400 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3401 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3402 	/* Only read FCOE on 82599 */
3403 	if (hw->mac.type != ixgbe_mac_82598EB) {
3404 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3405 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3406 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3407 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3408 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3409 	}
3410 
3411 	/* Rx Errors */
3412 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3413 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3414 }
3415 
3416 #if 0
3417 /*
3418  * Add sysctl variables, one per statistic, to the system.
3419  */
3420 static void
3421 ix_add_hw_stats(struct ix_softc *sc)
3422 {
3423 
3424 	device_t dev = sc->dev;
3425 
3426 	struct ix_tx_ring *txr = sc->tx_rings;
3427 	struct ix_rx_ring *rxr = sc->rx_rings;
3428 
3429 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3430 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3431 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3432 	struct ixgbe_hw_stats *stats = &sc->stats;
3433 
3434 	struct sysctl_oid *stat_node, *queue_node;
3435 	struct sysctl_oid_list *stat_list, *queue_list;
3436 
3437 #define QUEUE_NAME_LEN 32
3438 	char namebuf[QUEUE_NAME_LEN];
3439 
3440 	/* MAC stats get the own sub node */
3441 
3442 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3443 				    CTLFLAG_RD, NULL, "MAC Statistics");
3444 	stat_list = SYSCTL_CHILDREN(stat_node);
3445 
3446 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3447 			CTLFLAG_RD, &stats->crcerrs,
3448 			"CRC Errors");
3449 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3450 			CTLFLAG_RD, &stats->illerrc,
3451 			"Illegal Byte Errors");
3452 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3453 			CTLFLAG_RD, &stats->errbc,
3454 			"Byte Errors");
3455 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3456 			CTLFLAG_RD, &stats->mspdc,
3457 			"MAC Short Packets Discarded");
3458 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3459 			CTLFLAG_RD, &stats->mlfc,
3460 			"MAC Local Faults");
3461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3462 			CTLFLAG_RD, &stats->mrfc,
3463 			"MAC Remote Faults");
3464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3465 			CTLFLAG_RD, &stats->rlec,
3466 			"Receive Length Errors");
3467 
3468 	/* Flow Control stats */
3469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3470 			CTLFLAG_RD, &stats->lxontxc,
3471 			"Link XON Transmitted");
3472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3473 			CTLFLAG_RD, &stats->lxonrxc,
3474 			"Link XON Received");
3475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3476 			CTLFLAG_RD, &stats->lxofftxc,
3477 			"Link XOFF Transmitted");
3478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3479 			CTLFLAG_RD, &stats->lxoffrxc,
3480 			"Link XOFF Received");
3481 
3482 	/* Packet Reception Stats */
3483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3484 			CTLFLAG_RD, &stats->tor,
3485 			"Total Octets Received");
3486 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3487 			CTLFLAG_RD, &stats->gorc,
3488 			"Good Octets Received");
3489 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3490 			CTLFLAG_RD, &stats->tpr,
3491 			"Total Packets Received");
3492 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3493 			CTLFLAG_RD, &stats->gprc,
3494 			"Good Packets Received");
3495 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3496 			CTLFLAG_RD, &stats->mprc,
3497 			"Multicast Packets Received");
3498 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3499 			CTLFLAG_RD, &stats->bprc,
3500 			"Broadcast Packets Received");
3501 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3502 			CTLFLAG_RD, &stats->prc64,
3503 			"64 byte frames received ");
3504 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3505 			CTLFLAG_RD, &stats->prc127,
3506 			"65-127 byte frames received");
3507 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3508 			CTLFLAG_RD, &stats->prc255,
3509 			"128-255 byte frames received");
3510 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3511 			CTLFLAG_RD, &stats->prc511,
3512 			"256-511 byte frames received");
3513 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3514 			CTLFLAG_RD, &stats->prc1023,
3515 			"512-1023 byte frames received");
3516 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3517 			CTLFLAG_RD, &stats->prc1522,
3518 			"1023-1522 byte frames received");
3519 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3520 			CTLFLAG_RD, &stats->ruc,
3521 			"Receive Undersized");
3522 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3523 			CTLFLAG_RD, &stats->rfc,
3524 			"Fragmented Packets Received ");
3525 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3526 			CTLFLAG_RD, &stats->roc,
3527 			"Oversized Packets Received");
3528 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3529 			CTLFLAG_RD, &stats->rjc,
3530 			"Received Jabber");
3531 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3532 			CTLFLAG_RD, &stats->mngprc,
3533 			"Management Packets Received");
3534 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3535 			CTLFLAG_RD, &stats->mngptc,
3536 			"Management Packets Dropped");
3537 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3538 			CTLFLAG_RD, &stats->xec,
3539 			"Checksum Errors");
3540 
3541 	/* Packet Transmission Stats */
3542 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3543 			CTLFLAG_RD, &stats->gotc,
3544 			"Good Octets Transmitted");
3545 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3546 			CTLFLAG_RD, &stats->tpt,
3547 			"Total Packets Transmitted");
3548 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3549 			CTLFLAG_RD, &stats->gptc,
3550 			"Good Packets Transmitted");
3551 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3552 			CTLFLAG_RD, &stats->bptc,
3553 			"Broadcast Packets Transmitted");
3554 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3555 			CTLFLAG_RD, &stats->mptc,
3556 			"Multicast Packets Transmitted");
3557 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3558 			CTLFLAG_RD, &stats->mngptc,
3559 			"Management Packets Transmitted");
3560 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3561 			CTLFLAG_RD, &stats->ptc64,
3562 			"64 byte frames transmitted ");
3563 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3564 			CTLFLAG_RD, &stats->ptc127,
3565 			"65-127 byte frames transmitted");
3566 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3567 			CTLFLAG_RD, &stats->ptc255,
3568 			"128-255 byte frames transmitted");
3569 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3570 			CTLFLAG_RD, &stats->ptc511,
3571 			"256-511 byte frames transmitted");
3572 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3573 			CTLFLAG_RD, &stats->ptc1023,
3574 			"512-1023 byte frames transmitted");
3575 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3576 			CTLFLAG_RD, &stats->ptc1522,
3577 			"1024-1522 byte frames transmitted");
3578 }
3579 #endif
3580 
3581 /*
3582  * Enable the hardware to drop packets when the buffer is full.
3583  * This is useful when multiple RX rings are used, so that no
3584  * single RX ring being full stalls the entire RX engine.  We
3585  * only enable this when multiple RX rings are used and when
3586  * flow control is disabled.
3587  */
3588 static void
3589 ix_enable_rx_drop(struct ix_softc *sc)
3590 {
3591 	struct ixgbe_hw *hw = &sc->hw;
3592 	int i;
3593 
3594 	if (bootverbose) {
3595 		if_printf(&sc->arpcom.ac_if,
3596 		    "flow control %s, enable RX drop\n",
3597 		    ix_fc2str(sc->hw.fc.current_mode));
3598 	}
3599 
3600 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3601 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3602 
3603 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3604 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3605 	}
3606 }
3607 
3608 static void
3609 ix_disable_rx_drop(struct ix_softc *sc)
3610 {
3611 	struct ixgbe_hw *hw = &sc->hw;
3612 	int i;
3613 
3614 	if (bootverbose) {
3615 		if_printf(&sc->arpcom.ac_if,
3616 		    "flow control %s, disable RX drop\n",
3617 		    ix_fc2str(sc->hw.fc.current_mode));
3618 	}
3619 
3620 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3621 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3622 
3623 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3624 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3625 	}
3626 }
3627 
3628 static void
3629 ix_setup_serialize(struct ix_softc *sc)
3630 {
3631 	int i = 0, j;
3632 
3633 	/* Main + RX + TX */
3634 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3635 	sc->serializes =
3636 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3637 	        M_DEVBUF, M_WAITOK | M_ZERO);
3638 
3639 	/*
3640 	 * Setup serializes
3641 	 *
3642 	 * NOTE: Order is critical
3643 	 */
3644 
3645 	KKASSERT(i < sc->nserialize);
3646 	sc->serializes[i++] = &sc->main_serialize;
3647 
3648 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3649 		KKASSERT(i < sc->nserialize);
3650 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3651 	}
3652 
3653 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3654 		KKASSERT(i < sc->nserialize);
3655 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3656 	}
3657 
3658 	KKASSERT(i == sc->nserialize);
3659 }
3660 
3661 static int
3662 ix_alloc_intr(struct ix_softc *sc)
3663 {
3664 	struct ix_intr_data *intr;
3665 	struct ix_tx_ring *txr;
3666 	u_int intr_flags;
3667 	int i;
3668 
3669 	ix_alloc_msix(sc);
3670 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3671 		ix_set_ring_inuse(sc, FALSE);
3672 		goto done;
3673 	}
3674 
3675 	/*
3676 	 * Reset some settings changed by ix_alloc_msix().
3677 	 */
3678 	if (sc->rx_rmap_intr != NULL) {
3679 		if_ringmap_free(sc->rx_rmap_intr);
3680 		sc->rx_rmap_intr = NULL;
3681 	}
3682 	if (sc->tx_rmap_intr != NULL) {
3683 		if_ringmap_free(sc->tx_rmap_intr);
3684 		sc->tx_rmap_intr = NULL;
3685 	}
3686 	if (sc->intr_data != NULL) {
3687 		kfree(sc->intr_data, M_DEVBUF);
3688 		sc->intr_data = NULL;
3689 	}
3690 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3691 		txr = &sc->tx_rings[i];
3692 		txr->tx_intr_vec = -1;
3693 		txr->tx_intr_cpuid = -1;
3694 	}
3695 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3696 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3697 
3698 		rxr->rx_intr_vec = -1;
3699 		rxr->rx_txr = NULL;
3700 	}
3701 
3702 	sc->intr_cnt = 1;
3703 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3704 	    M_WAITOK | M_ZERO);
3705 	intr = &sc->intr_data[0];
3706 
3707 	/*
3708 	 * Allocate MSI/legacy interrupt resource
3709 	 */
3710 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3711 	    &intr->intr_rid, &intr_flags);
3712 
3713 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3714 	    &intr->intr_rid, intr_flags);
3715 	if (intr->intr_res == NULL) {
3716 		device_printf(sc->dev, "Unable to allocate bus resource: "
3717 		    "interrupt\n");
3718 		return ENXIO;
3719 	}
3720 
3721 	intr->intr_serialize = &sc->main_serialize;
3722 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3723 	intr->intr_func = ix_intr;
3724 	intr->intr_funcarg = sc;
3725 	intr->intr_rate = IX_INTR_RATE;
3726 	intr->intr_use = IX_INTR_USE_RXTX;
3727 
3728 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3729 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3730 
3731 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3732 
3733 	ix_set_ring_inuse(sc, FALSE);
3734 
3735 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3736 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3737 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3738 
3739 		/*
3740 		 * Allocate RX ring map for RSS setup.
3741 		 */
3742 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3743 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3744 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3745 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3746 	}
3747 done:
3748 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3749 		txr = &sc->tx_rings[i];
3750 		if (txr->tx_intr_cpuid < 0)
3751 			txr->tx_intr_cpuid = 0;
3752 	}
3753 	return 0;
3754 }
3755 
3756 static void
3757 ix_free_intr(struct ix_softc *sc)
3758 {
3759 	if (sc->intr_data == NULL)
3760 		return;
3761 
3762 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3763 		struct ix_intr_data *intr = &sc->intr_data[0];
3764 
3765 		KKASSERT(sc->intr_cnt == 1);
3766 		if (intr->intr_res != NULL) {
3767 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3768 			    intr->intr_rid, intr->intr_res);
3769 		}
3770 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3771 			pci_release_msi(sc->dev);
3772 
3773 		kfree(sc->intr_data, M_DEVBUF);
3774 	} else {
3775 		ix_free_msix(sc, TRUE);
3776 	}
3777 }
3778 
3779 static void
3780 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3781 {
3782 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3783 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3784 	if (bootverbose) {
3785 		if_printf(&sc->arpcom.ac_if,
3786 		    "RX rings %d/%d, TX rings %d/%d\n",
3787 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3788 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3789 	}
3790 }
3791 
3792 static int
3793 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3794 {
3795 	if (!IX_ENABLE_HWRSS(sc))
3796 		return 1;
3797 
3798 	if (polling)
3799 		return sc->rx_ring_cnt;
3800 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3801 		return IX_MIN_RXRING_RSS;
3802 	else
3803 		return sc->rx_ring_msix;
3804 }
3805 
3806 static int
3807 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3808 {
3809 	if (!IX_ENABLE_HWTSS(sc))
3810 		return 1;
3811 
3812 	if (polling)
3813 		return sc->tx_ring_cnt;
3814 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3815 		return 1;
3816 	else
3817 		return sc->tx_ring_msix;
3818 }
3819 
3820 static int
3821 ix_setup_intr(struct ix_softc *sc)
3822 {
3823 	int i;
3824 
3825 	for (i = 0; i < sc->intr_cnt; ++i) {
3826 		struct ix_intr_data *intr = &sc->intr_data[i];
3827 		int error;
3828 
3829 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3830 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3831 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3832 		if (error) {
3833 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3834 			ix_teardown_intr(sc, i);
3835 			return error;
3836 		}
3837 	}
3838 	return 0;
3839 }
3840 
3841 static void
3842 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3843 {
3844 	int i;
3845 
3846 	if (sc->intr_data == NULL)
3847 		return;
3848 
3849 	for (i = 0; i < intr_cnt; ++i) {
3850 		struct ix_intr_data *intr = &sc->intr_data[i];
3851 
3852 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3853 	}
3854 }
3855 
3856 static void
3857 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3858 {
3859 	struct ix_softc *sc = ifp->if_softc;
3860 
3861 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3862 }
3863 
3864 static void
3865 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3866 {
3867 	struct ix_softc *sc = ifp->if_softc;
3868 
3869 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3870 }
3871 
3872 static int
3873 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3874 {
3875 	struct ix_softc *sc = ifp->if_softc;
3876 
3877 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3878 }
3879 
3880 #ifdef INVARIANTS
3881 
3882 static void
3883 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3884     boolean_t serialized)
3885 {
3886 	struct ix_softc *sc = ifp->if_softc;
3887 
3888 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3889 	    serialized);
3890 }
3891 
3892 #endif	/* INVARIANTS */
3893 
3894 static void
3895 ix_free_rings(struct ix_softc *sc)
3896 {
3897 	int i;
3898 
3899 	if (sc->tx_rings != NULL) {
3900 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3901 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3902 
3903 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3904 		}
3905 		kfree(sc->tx_rings, M_DEVBUF);
3906 	}
3907 
3908 	if (sc->rx_rings != NULL) {
3909 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3910 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3911 
3912 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3913 		}
3914 		kfree(sc->rx_rings, M_DEVBUF);
3915 	}
3916 
3917 	if (sc->parent_tag != NULL)
3918 		bus_dma_tag_destroy(sc->parent_tag);
3919 }
3920 
3921 static void
3922 ix_watchdog(struct ifaltq_subque *ifsq)
3923 {
3924 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3925 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3926 	struct ix_softc *sc = ifp->if_softc;
3927 	int i;
3928 
3929 	KKASSERT(txr->tx_ifsq == ifsq);
3930 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3931 
3932 	/*
3933 	 * If the interface has been paused then don't do the watchdog check
3934 	 */
3935 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3936 		txr->tx_watchdog.wd_timer = 5;
3937 		return;
3938 	}
3939 
3940 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3941 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3942 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3943 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3944 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3945 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3946 
3947 	ix_init(sc);
3948 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3949 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3950 }
3951 
3952 static void
3953 ix_free_tx_ring(struct ix_tx_ring *txr)
3954 {
3955 	int i;
3956 
3957 	for (i = 0; i < txr->tx_ndesc; ++i) {
3958 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3959 
3960 		if (txbuf->m_head != NULL) {
3961 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3962 			m_freem(txbuf->m_head);
3963 			txbuf->m_head = NULL;
3964 		}
3965 	}
3966 }
3967 
3968 static void
3969 ix_free_rx_ring(struct ix_rx_ring *rxr)
3970 {
3971 	int i;
3972 
3973 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3974 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3975 
3976 		if (rxbuf->fmp != NULL) {
3977 			m_freem(rxbuf->fmp);
3978 			rxbuf->fmp = NULL;
3979 			rxbuf->lmp = NULL;
3980 		} else {
3981 			KKASSERT(rxbuf->lmp == NULL);
3982 		}
3983 		if (rxbuf->m_head != NULL) {
3984 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3985 			m_freem(rxbuf->m_head);
3986 			rxbuf->m_head = NULL;
3987 		}
3988 	}
3989 }
3990 
3991 static int
3992 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3993 {
3994 	struct mbuf *m;
3995 	bus_dma_segment_t seg;
3996 	bus_dmamap_t map;
3997 	struct ix_rx_buf *rxbuf;
3998 	int flags, error, nseg;
3999 
4000 	flags = M_NOWAIT;
4001 	if (__predict_false(wait))
4002 		flags = M_WAITOK;
4003 
4004 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4005 	if (m == NULL) {
4006 		if (wait) {
4007 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4008 			    "Unable to allocate RX mbuf\n");
4009 		}
4010 		return ENOBUFS;
4011 	}
4012 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4013 
4014 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4015 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4016 	if (error) {
4017 		m_freem(m);
4018 		if (wait) {
4019 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4020 			    "Unable to load RX mbuf\n");
4021 		}
4022 		return error;
4023 	}
4024 
4025 	rxbuf = &rxr->rx_buf[i];
4026 	if (rxbuf->m_head != NULL)
4027 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4028 
4029 	map = rxbuf->map;
4030 	rxbuf->map = rxr->rx_sparemap;
4031 	rxr->rx_sparemap = map;
4032 
4033 	rxbuf->m_head = m;
4034 	rxbuf->paddr = seg.ds_addr;
4035 
4036 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4037 	return 0;
4038 }
4039 
4040 static void
4041 ix_add_sysctl(struct ix_softc *sc)
4042 {
4043 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4044 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4045 #ifdef IX_RSS_DEBUG
4046 	char node[32];
4047 	int i;
4048 #endif
4049 
4050 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4051 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4052 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4053 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4054 	    "# of RX rings used");
4055 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4056 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4057 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4058 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4059 	    "# of TX rings used");
4060 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4061 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4062 	    sc, 0, ix_sysctl_rxd, "I",
4063 	    "# of RX descs");
4064 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4065 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4066 	    sc, 0, ix_sysctl_txd, "I",
4067 	    "# of TX descs");
4068 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4069 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4070 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4071 	    "# of segments sent before write to hardware register");
4072 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4073 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4074 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4075 	    "# of received segments sent before write to hardware register");
4076 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4077 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4078 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4079 	    "# of segments per TX interrupt");
4080 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4081 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4082 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4083 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4084 		    "TX MSI-X CPU map");
4085 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4086 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4087 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4088 		    "RX MSI-X CPU map");
4089 	}
4090 #ifdef IFPOLL_ENABLE
4091 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4092 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4093 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4094 	    "TX polling CPU map");
4095 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4096 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4097 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4098 	    "RX polling CPU map");
4099 #endif
4100 
4101 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4102 do { \
4103 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4104 	    ix_sysctl_##name, #use " interrupt rate"); \
4105 } while (0)
4106 
4107 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4108 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4109 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4110 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4111 
4112 #undef IX_ADD_INTR_RATE_SYSCTL
4113 
4114 #ifdef IX_RSS_DEBUG
4115 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4116 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4117 	    "RSS debug level");
4118 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4119 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4120 		SYSCTL_ADD_ULONG(ctx,
4121 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4122 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4123 	}
4124 #endif
4125 
4126 #if 0
4127 	ix_add_hw_stats(sc);
4128 #endif
4129 
4130 }
4131 
4132 static int
4133 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4134 {
4135 	struct ix_softc *sc = (void *)arg1;
4136 	struct ifnet *ifp = &sc->arpcom.ac_if;
4137 	int error, nsegs, i;
4138 
4139 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4140 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4141 	if (error || req->newptr == NULL)
4142 		return error;
4143 	if (nsegs < 0)
4144 		return EINVAL;
4145 
4146 	ifnet_serialize_all(ifp);
4147 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4148 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4149 	ifnet_deserialize_all(ifp);
4150 
4151 	return 0;
4152 }
4153 
4154 static int
4155 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4156 {
4157 	struct ix_softc *sc = (void *)arg1;
4158 	struct ifnet *ifp = &sc->arpcom.ac_if;
4159 	int error, nsegs, i;
4160 
4161 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4162 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4163 	if (error || req->newptr == NULL)
4164 		return error;
4165 	if (nsegs < 0)
4166 		return EINVAL;
4167 
4168 	ifnet_serialize_all(ifp);
4169 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4170 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4171 	ifnet_deserialize_all(ifp);
4172 
4173 	return 0;
4174 }
4175 
4176 static int
4177 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4178 {
4179 	struct ix_softc *sc = (void *)arg1;
4180 	int txd;
4181 
4182 	txd = sc->tx_rings[0].tx_ndesc;
4183 	return sysctl_handle_int(oidp, &txd, 0, req);
4184 }
4185 
4186 static int
4187 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4188 {
4189 	struct ix_softc *sc = (void *)arg1;
4190 	int rxd;
4191 
4192 	rxd = sc->rx_rings[0].rx_ndesc;
4193 	return sysctl_handle_int(oidp, &rxd, 0, req);
4194 }
4195 
4196 static int
4197 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4198 {
4199 	struct ix_softc *sc = (void *)arg1;
4200 	struct ifnet *ifp = &sc->arpcom.ac_if;
4201 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4202 	int error, nsegs;
4203 
4204 	nsegs = txr->tx_intr_nsegs;
4205 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4206 	if (error || req->newptr == NULL)
4207 		return error;
4208 	if (nsegs < 0)
4209 		return EINVAL;
4210 
4211 	ifnet_serialize_all(ifp);
4212 
4213 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4214 		error = EINVAL;
4215 	} else {
4216 		int i;
4217 
4218 		error = 0;
4219 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4220 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4221 	}
4222 
4223 	ifnet_deserialize_all(ifp);
4224 
4225 	return error;
4226 }
4227 
4228 static void
4229 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4230 {
4231 	uint32_t eitr, eitr_intvl;
4232 
4233 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4234 	eitr_intvl = 1000000000 / 256 / rate;
4235 
4236 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4237 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4238 		if (eitr_intvl == 0)
4239 			eitr_intvl = 1;
4240 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4241 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4242 	} else {
4243 		eitr &= ~IX_EITR_INTVL_MASK;
4244 
4245 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4246 		if (eitr_intvl == 0)
4247 			eitr_intvl = IX_EITR_INTVL_MIN;
4248 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4249 			eitr_intvl = IX_EITR_INTVL_MAX;
4250 	}
4251 	eitr |= eitr_intvl;
4252 
4253 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4254 }
4255 
4256 static int
4257 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4258 {
4259 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4260 }
4261 
4262 static int
4263 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4264 {
4265 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4266 }
4267 
4268 static int
4269 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4270 {
4271 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4272 }
4273 
4274 static int
4275 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4276 {
4277 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4278 }
4279 
4280 static int
4281 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4282 {
4283 	struct ix_softc *sc = (void *)arg1;
4284 	struct ifnet *ifp = &sc->arpcom.ac_if;
4285 	int error, rate, i;
4286 
4287 	rate = 0;
4288 	for (i = 0; i < sc->intr_cnt; ++i) {
4289 		if (sc->intr_data[i].intr_use == use) {
4290 			rate = sc->intr_data[i].intr_rate;
4291 			break;
4292 		}
4293 	}
4294 
4295 	error = sysctl_handle_int(oidp, &rate, 0, req);
4296 	if (error || req->newptr == NULL)
4297 		return error;
4298 	if (rate <= 0)
4299 		return EINVAL;
4300 
4301 	ifnet_serialize_all(ifp);
4302 
4303 	for (i = 0; i < sc->intr_cnt; ++i) {
4304 		if (sc->intr_data[i].intr_use == use) {
4305 			sc->intr_data[i].intr_rate = rate;
4306 			if (ifp->if_flags & IFF_RUNNING)
4307 				ix_set_eitr(sc, i, rate);
4308 		}
4309 	}
4310 
4311 	ifnet_deserialize_all(ifp);
4312 
4313 	return error;
4314 }
4315 
4316 static void
4317 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4318     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4319 {
4320 	int i;
4321 
4322 	for (i = 0; i < sc->intr_cnt; ++i) {
4323 		if (sc->intr_data[i].intr_use == use) {
4324 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4325 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4326 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4327 			    sc, 0, handler, "I", desc);
4328 			break;
4329 		}
4330 	}
4331 }
4332 
4333 static void
4334 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4335 {
4336 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4337 		sc->timer_cpuid = 0; /* XXX fixed */
4338 	else
4339 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4340 }
4341 
4342 static void
4343 ix_alloc_msix(struct ix_softc *sc)
4344 {
4345 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4346 	struct ix_intr_data *intr;
4347 	int i, x, error;
4348 	int ring_cnt, ring_cntmax;
4349 	boolean_t setup = FALSE;
4350 
4351 	msix_enable = ix_msix_enable;
4352 	/*
4353 	 * Don't enable MSI-X on 82598 by default, see:
4354 	 * 82598 specification update errata #38
4355 	 */
4356 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4357 		msix_enable = 0;
4358 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4359 	if (!msix_enable)
4360 		return;
4361 
4362 	msix_cnt = pci_msix_count(sc->dev);
4363 #ifdef IX_MSIX_DEBUG
4364 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4365 #endif
4366 	if (msix_cnt <= 1) {
4367 		/* One MSI-X model does not make sense. */
4368 		return;
4369 	}
4370 
4371 	/*
4372 	 * Make sure that we don't break interrupt related registers
4373 	 * (EIMS, etc) limitation.
4374 	 */
4375 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4376 		if (msix_cnt > IX_MAX_MSIX_82598)
4377 			msix_cnt = IX_MAX_MSIX_82598;
4378 	} else {
4379 		if (msix_cnt > IX_MAX_MSIX)
4380 			msix_cnt = IX_MAX_MSIX;
4381 	}
4382 	if (bootverbose)
4383 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4384 	msix_ring = msix_cnt - 1; /* -1 for status */
4385 
4386 	/*
4387 	 * Configure # of RX/TX rings usable by MSI-X.
4388 	 */
4389 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4390 	if (ring_cntmax > msix_ring)
4391 		ring_cntmax = msix_ring;
4392 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4393 
4394 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4395 	if (ring_cntmax > msix_ring)
4396 		ring_cntmax = msix_ring;
4397 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4398 
4399 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4400 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4401 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4402 	    ("total RX ring count %d, MSI-X RX ring count %d",
4403 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4404 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4405 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4406 	    ("total TX ring count %d, MSI-X TX ring count %d",
4407 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4408 
4409 	/*
4410 	 * Aggregate TX/RX MSI-X
4411 	 */
4412 	ring_cntmax = sc->rx_ring_msix;
4413 	if (ring_cntmax < sc->tx_ring_msix)
4414 		ring_cntmax = sc->tx_ring_msix;
4415 	KASSERT(ring_cntmax <= msix_ring,
4416 	    ("invalid ring count max %d, MSI-X count for rings %d",
4417 	     ring_cntmax, msix_ring));
4418 
4419 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4420 	if (bootverbose) {
4421 		device_printf(sc->dev, "MSI-X alloc %d, "
4422 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4423 		    sc->rx_ring_msix, sc->tx_ring_msix);
4424 	}
4425 
4426 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4427 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4428 	    &sc->msix_mem_rid, RF_ACTIVE);
4429 	if (sc->msix_mem_res == NULL) {
4430 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4431 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4432 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4433 		if (sc->msix_mem_res == NULL) {
4434 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4435 			return;
4436 		}
4437 	}
4438 
4439 	sc->intr_cnt = alloc_cnt;
4440 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4441 	    M_DEVBUF, M_WAITOK | M_ZERO);
4442 	for (x = 0; x < sc->intr_cnt; ++x) {
4443 		intr = &sc->intr_data[x];
4444 		intr->intr_rid = -1;
4445 		intr->intr_rate = IX_INTR_RATE;
4446 	}
4447 
4448 	x = 0;
4449 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4450 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4451 		struct ix_tx_ring *txr = NULL;
4452 		int cpuid, j;
4453 
4454 		KKASSERT(x < sc->intr_cnt);
4455 		rxr->rx_intr_vec = x;
4456 		ix_setup_msix_eims(sc, x,
4457 		    &rxr->rx_eims, &rxr->rx_eims_val);
4458 
4459 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4460 
4461 		/*
4462 		 * Try finding TX ring to piggyback.
4463 		 */
4464 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4465 			if (cpuid ==
4466 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4467 				txr = &sc->tx_rings[j];
4468 				KKASSERT(txr->tx_intr_cpuid < 0);
4469 				break;
4470 			}
4471 		}
4472 		rxr->rx_txr = txr;
4473 
4474 		intr = &sc->intr_data[x++];
4475 		intr->intr_serialize = &rxr->rx_serialize;
4476 		if (txr != NULL) {
4477 			ksnprintf(intr->intr_desc0,
4478 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4479 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4480 			intr->intr_use = IX_INTR_USE_RXTX;
4481 			intr->intr_func = ix_msix_rxtx;
4482 		} else {
4483 			ksnprintf(intr->intr_desc0,
4484 			    sizeof(intr->intr_desc0), "%s rx%d",
4485 			    device_get_nameunit(sc->dev), i);
4486 			intr->intr_rate = IX_MSIX_RX_RATE;
4487 			intr->intr_use = IX_INTR_USE_RX;
4488 			intr->intr_func = ix_msix_rx;
4489 		}
4490 		intr->intr_funcarg = rxr;
4491 		intr->intr_cpuid = cpuid;
4492 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4493 		intr->intr_desc = intr->intr_desc0;
4494 
4495 		if (txr != NULL) {
4496 			txr->tx_intr_cpuid = intr->intr_cpuid;
4497 			/* NOTE: Leave TX ring's intr_vec negative. */
4498 		}
4499 	}
4500 
4501 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4502 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4503 
4504 		if (txr->tx_intr_cpuid >= 0) {
4505 			/* Piggybacked by RX ring. */
4506 			continue;
4507 		}
4508 
4509 		KKASSERT(x < sc->intr_cnt);
4510 		txr->tx_intr_vec = x;
4511 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4512 
4513 		intr = &sc->intr_data[x++];
4514 		intr->intr_serialize = &txr->tx_serialize;
4515 		intr->intr_rate = IX_MSIX_TX_RATE;
4516 		intr->intr_use = IX_INTR_USE_TX;
4517 		intr->intr_func = ix_msix_tx;
4518 		intr->intr_funcarg = txr;
4519 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4520 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4521 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4522 		    device_get_nameunit(sc->dev), i);
4523 		intr->intr_desc = intr->intr_desc0;
4524 
4525 		txr->tx_intr_cpuid = intr->intr_cpuid;
4526 	}
4527 
4528 	/*
4529 	 * Status MSI-X
4530 	 */
4531 	KKASSERT(x < sc->intr_cnt);
4532 	sc->sts_msix_vec = x;
4533 
4534 	intr = &sc->intr_data[x++];
4535 
4536 	intr->intr_serialize = &sc->main_serialize;
4537 	intr->intr_func = ix_msix_status;
4538 	intr->intr_funcarg = sc;
4539 	intr->intr_cpuid = 0;
4540 	intr->intr_use = IX_INTR_USE_STATUS;
4541 
4542 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4543 	    device_get_nameunit(sc->dev));
4544 	intr->intr_desc = intr->intr_desc0;
4545 
4546 	KKASSERT(x == sc->intr_cnt);
4547 
4548 	error = pci_setup_msix(sc->dev);
4549 	if (error) {
4550 		device_printf(sc->dev, "Setup MSI-X failed\n");
4551 		goto back;
4552 	}
4553 	setup = TRUE;
4554 
4555 	for (i = 0; i < sc->intr_cnt; ++i) {
4556 		intr = &sc->intr_data[i];
4557 
4558 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4559 		    intr->intr_cpuid);
4560 		if (error) {
4561 			device_printf(sc->dev,
4562 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4563 			    intr->intr_cpuid);
4564 			goto back;
4565 		}
4566 
4567 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4568 		    &intr->intr_rid, RF_ACTIVE);
4569 		if (intr->intr_res == NULL) {
4570 			device_printf(sc->dev,
4571 			    "Unable to allocate MSI-X %d resource\n", i);
4572 			error = ENOMEM;
4573 			goto back;
4574 		}
4575 	}
4576 
4577 	pci_enable_msix(sc->dev);
4578 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4579 back:
4580 	if (error)
4581 		ix_free_msix(sc, setup);
4582 }
4583 
4584 static void
4585 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4586 {
4587 	int i;
4588 
4589 	KKASSERT(sc->intr_cnt > 1);
4590 
4591 	for (i = 0; i < sc->intr_cnt; ++i) {
4592 		struct ix_intr_data *intr = &sc->intr_data[i];
4593 
4594 		if (intr->intr_res != NULL) {
4595 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4596 			    intr->intr_rid, intr->intr_res);
4597 		}
4598 		if (intr->intr_rid >= 0)
4599 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4600 	}
4601 	if (setup)
4602 		pci_teardown_msix(sc->dev);
4603 
4604 	sc->intr_cnt = 0;
4605 	kfree(sc->intr_data, M_DEVBUF);
4606 	sc->intr_data = NULL;
4607 }
4608 
4609 static void
4610 ix_msix_rx(void *xrxr)
4611 {
4612 	struct ix_rx_ring *rxr = xrxr;
4613 
4614 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4615 
4616 	ix_rxeof(rxr, -1);
4617 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4618 }
4619 
4620 static void
4621 ix_msix_tx(void *xtxr)
4622 {
4623 	struct ix_tx_ring *txr = xtxr;
4624 
4625 	ASSERT_SERIALIZED(&txr->tx_serialize);
4626 
4627 	ix_txeof(txr, *(txr->tx_hdr));
4628 	if (!ifsq_is_empty(txr->tx_ifsq))
4629 		ifsq_devstart(txr->tx_ifsq);
4630 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4631 }
4632 
4633 static void
4634 ix_msix_rxtx(void *xrxr)
4635 {
4636 	struct ix_rx_ring *rxr = xrxr;
4637 	struct ix_tx_ring *txr;
4638 	int hdr;
4639 
4640 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4641 
4642 	ix_rxeof(rxr, -1);
4643 
4644 	/*
4645 	 * NOTE:
4646 	 * Since tx_next_clean is only changed by ix_txeof(),
4647 	 * which is called only in interrupt handler, the
4648 	 * check w/o holding tx serializer is MPSAFE.
4649 	 */
4650 	txr = rxr->rx_txr;
4651 	hdr = *(txr->tx_hdr);
4652 	if (hdr != txr->tx_next_clean) {
4653 		lwkt_serialize_enter(&txr->tx_serialize);
4654 		ix_txeof(txr, hdr);
4655 		if (!ifsq_is_empty(txr->tx_ifsq))
4656 			ifsq_devstart(txr->tx_ifsq);
4657 		lwkt_serialize_exit(&txr->tx_serialize);
4658 	}
4659 
4660 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4661 }
4662 
4663 static void
4664 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4665 {
4666 	struct ixgbe_hw *hw = &sc->hw;
4667 
4668 	/* Link status change */
4669 	if (eicr & IXGBE_EICR_LSC)
4670 		ix_handle_link(sc);
4671 
4672 	if (hw->mac.type != ixgbe_mac_82598EB) {
4673 		if (eicr & IXGBE_EICR_ECC)
4674 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4675 
4676 		/* Check for over temp condition */
4677 		if (eicr & IXGBE_EICR_TS) {
4678 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4679 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4680 		}
4681 	}
4682 
4683 	if (ix_is_sfp(hw)) {
4684 		uint32_t mod_mask;
4685 
4686 		/* Pluggable optics-related interrupt */
4687 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4688 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4689 		else
4690 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4691 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4692 			ix_handle_msf(sc);
4693 		else if (eicr & mod_mask)
4694 			ix_handle_mod(sc);
4695 	}
4696 
4697 	/* Check for fan failure */
4698 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4699 	    (eicr & IXGBE_EICR_GPI_SDP1))
4700 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4701 
4702 	/* External PHY interrupt */
4703 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4704 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4705 	    	ix_handle_phy(sc);
4706 }
4707 
4708 static void
4709 ix_msix_status(void *xsc)
4710 {
4711 	struct ix_softc *sc = xsc;
4712 	uint32_t eicr;
4713 
4714 	ASSERT_SERIALIZED(&sc->main_serialize);
4715 
4716 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4717 	ix_intr_status(sc, eicr);
4718 
4719 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4720 }
4721 
4722 static void
4723 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4724     uint32_t *eims, uint32_t *eims_val)
4725 {
4726 	if (x < 32) {
4727 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4728 			KASSERT(x < IX_MAX_MSIX_82598,
4729 			    ("%s: invalid vector %d for 82598",
4730 			     device_get_nameunit(sc->dev), x));
4731 			*eims = IXGBE_EIMS;
4732 		} else {
4733 			*eims = IXGBE_EIMS_EX(0);
4734 		}
4735 		*eims_val = 1 << x;
4736 	} else {
4737 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4738 		    device_get_nameunit(sc->dev), x));
4739 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4740 		    ("%s: invalid vector %d for 82598",
4741 		     device_get_nameunit(sc->dev), x));
4742 		*eims = IXGBE_EIMS_EX(1);
4743 		*eims_val = 1 << (x - 32);
4744 	}
4745 }
4746 
4747 #ifdef IFPOLL_ENABLE
4748 
4749 static void
4750 ix_npoll_status(struct ifnet *ifp)
4751 {
4752 	struct ix_softc *sc = ifp->if_softc;
4753 	uint32_t eicr;
4754 
4755 	ASSERT_SERIALIZED(&sc->main_serialize);
4756 
4757 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4758 	ix_intr_status(sc, eicr);
4759 }
4760 
4761 static void
4762 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4763 {
4764 	struct ix_tx_ring *txr = arg;
4765 
4766 	ASSERT_SERIALIZED(&txr->tx_serialize);
4767 
4768 	ix_txeof(txr, *(txr->tx_hdr));
4769 	if (!ifsq_is_empty(txr->tx_ifsq))
4770 		ifsq_devstart(txr->tx_ifsq);
4771 }
4772 
4773 static void
4774 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4775 {
4776 	struct ix_rx_ring *rxr = arg;
4777 
4778 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4779 
4780 	ix_rxeof(rxr, cycle);
4781 }
4782 
4783 static void
4784 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4785 {
4786 	struct ix_softc *sc = ifp->if_softc;
4787 	int i, txr_cnt, rxr_cnt;
4788 
4789 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4790 
4791 	if (info) {
4792 		int cpu;
4793 
4794 		info->ifpi_status.status_func = ix_npoll_status;
4795 		info->ifpi_status.serializer = &sc->main_serialize;
4796 
4797 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4798 		for (i = 0; i < txr_cnt; ++i) {
4799 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4800 
4801 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
4802 			KKASSERT(cpu < netisr_ncpus);
4803 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
4804 			info->ifpi_tx[cpu].arg = txr;
4805 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
4806 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
4807 		}
4808 
4809 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4810 		for (i = 0; i < rxr_cnt; ++i) {
4811 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4812 
4813 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
4814 			KKASSERT(cpu < netisr_ncpus);
4815 			info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
4816 			info->ifpi_rx[cpu].arg = rxr;
4817 			info->ifpi_rx[cpu].serializer = &rxr->rx_serialize;
4818 		}
4819 	} else {
4820 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4821 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4822 
4823 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4824 		}
4825 	}
4826 	if (ifp->if_flags & IFF_RUNNING)
4827 		ix_init(sc);
4828 }
4829 
4830 #endif /* IFPOLL_ENABLE */
4831 
4832 static enum ixgbe_fc_mode
4833 ix_ifmedia2fc(int ifm)
4834 {
4835 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4836 
4837 	switch (fc_opt) {
4838 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4839 		return ixgbe_fc_full;
4840 
4841 	case IFM_ETH_RXPAUSE:
4842 		return ixgbe_fc_rx_pause;
4843 
4844 	case IFM_ETH_TXPAUSE:
4845 		return ixgbe_fc_tx_pause;
4846 
4847 	default:
4848 		return ixgbe_fc_none;
4849 	}
4850 }
4851 
4852 static const char *
4853 ix_ifmedia2str(int ifm)
4854 {
4855 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4856 
4857 	switch (fc_opt) {
4858 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4859 		return IFM_ETH_FC_FULL;
4860 
4861 	case IFM_ETH_RXPAUSE:
4862 		return IFM_ETH_FC_RXPAUSE;
4863 
4864 	case IFM_ETH_TXPAUSE:
4865 		return IFM_ETH_FC_TXPAUSE;
4866 
4867 	default:
4868 		return IFM_ETH_FC_NONE;
4869 	}
4870 }
4871 
4872 static const char *
4873 ix_fc2str(enum ixgbe_fc_mode fc)
4874 {
4875 	switch (fc) {
4876 	case ixgbe_fc_full:
4877 		return IFM_ETH_FC_FULL;
4878 
4879 	case ixgbe_fc_rx_pause:
4880 		return IFM_ETH_FC_RXPAUSE;
4881 
4882 	case ixgbe_fc_tx_pause:
4883 		return IFM_ETH_FC_TXPAUSE;
4884 
4885 	default:
4886 		return IFM_ETH_FC_NONE;
4887 	}
4888 }
4889 
4890 static int
4891 ix_powerdown(struct ix_softc *sc)
4892 {
4893 	struct ixgbe_hw *hw = &sc->hw;
4894 	int error = 0;
4895 
4896 	/* Limit power managment flow to X550EM baseT */
4897 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4898 	    hw->phy.ops.enter_lplu) {
4899 		/* Turn off support for APM wakeup. (Using ACPI instead) */
4900 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
4901 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
4902 
4903 		/*
4904 		 * Clear Wake Up Status register to prevent any previous wakeup
4905 		 * events from waking us up immediately after we suspend.
4906 		 */
4907 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
4908 
4909 		/*
4910 		 * Program the Wakeup Filter Control register with user filter
4911 		 * settings
4912 		 */
4913 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
4914 
4915 		/* Enable wakeups and power management in Wakeup Control */
4916 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
4917 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
4918 
4919 		/* X550EM baseT adapters need a special LPLU flow */
4920 		hw->phy.reset_disable = true;
4921 		ix_stop(sc);
4922 		error = hw->phy.ops.enter_lplu(hw);
4923 		if (error) {
4924 			if_printf(&sc->arpcom.ac_if,
4925 			    "Error entering LPLU: %d\n", error);
4926 		}
4927 		hw->phy.reset_disable = false;
4928 	} else {
4929 		/* Just stop for other adapters */
4930 		ix_stop(sc);
4931 	}
4932 	return error;
4933 }
4934 
4935 static void
4936 ix_config_flowctrl(struct ix_softc *sc)
4937 {
4938 	struct ixgbe_hw *hw = &sc->hw;
4939 	uint32_t rxpb, frame, size, tmp;
4940 
4941 	frame = sc->max_frame_size;
4942 
4943 	/* Calculate High Water */
4944 	switch (hw->mac.type) {
4945 	case ixgbe_mac_X540:
4946 	case ixgbe_mac_X550:
4947 	case ixgbe_mac_X550EM_a:
4948 	case ixgbe_mac_X550EM_x:
4949 		tmp = IXGBE_DV_X540(frame, frame);
4950 		break;
4951 	default:
4952 		tmp = IXGBE_DV(frame, frame);
4953 		break;
4954 	}
4955 	size = IXGBE_BT2KB(tmp);
4956 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
4957 	hw->fc.high_water[0] = rxpb - size;
4958 
4959 	/* Now calculate Low Water */
4960 	switch (hw->mac.type) {
4961 	case ixgbe_mac_X540:
4962 	case ixgbe_mac_X550:
4963 	case ixgbe_mac_X550EM_a:
4964 	case ixgbe_mac_X550EM_x:
4965 		tmp = IXGBE_LOW_DV_X540(frame);
4966 		break;
4967 	default:
4968 		tmp = IXGBE_LOW_DV(frame);
4969 		break;
4970 	}
4971 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
4972 
4973 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
4974 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
4975 		hw->fc.disable_fc_autoneg = TRUE;
4976 	else
4977 		hw->fc.disable_fc_autoneg = FALSE;
4978 	hw->fc.pause_time = IX_FC_PAUSE;
4979 	hw->fc.send_xon = TRUE;
4980 }
4981 
4982 static void
4983 ix_config_dmac(struct ix_softc *sc)
4984 {
4985 	struct ixgbe_hw *hw = &sc->hw;
4986 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
4987 
4988 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
4989 		return;
4990 
4991 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
4992 	    (dcfg->link_speed ^ sc->link_speed)) {
4993 		dcfg->watchdog_timer = sc->dmac;
4994 		dcfg->fcoe_en = false;
4995 		dcfg->link_speed = sc->link_speed;
4996 		dcfg->num_tcs = 1;
4997 
4998 		if (bootverbose) {
4999 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5000 			    "watchdog %d, link speed %d\n",
5001 			    dcfg->watchdog_timer, dcfg->link_speed);
5002 		}
5003 
5004 		hw->mac.ops.dmac_config(hw);
5005 	}
5006 }
5007 
5008 static void
5009 ix_init_media(struct ix_softc *sc)
5010 {
5011 	struct ixgbe_hw *hw = &sc->hw;
5012 	int layer, msf_ifm = IFM_NONE;
5013 
5014 	ifmedia_removeall(&sc->media);
5015 
5016 	layer = ixgbe_get_supported_physical_layer(hw);
5017 
5018 	/*
5019 	 * Media types with matching DragonFlyBSD media defines
5020 	 */
5021 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5022 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5023 		    0, NULL);
5024 	}
5025 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5026 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5027 		    0, NULL);
5028 	}
5029 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5030 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5031 		    0, NULL);
5032 		/* No half-duplex support */
5033 	}
5034 
5035 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5036 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5037 		    0, NULL);
5038 		msf_ifm = IFM_1000_LX;
5039 	}
5040 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5041 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5042 		    0, NULL);
5043 		msf_ifm = IFM_1000_LX;
5044 	}
5045 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5046 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5047 		    0, NULL);
5048 		msf_ifm = IFM_1000_SX;
5049 	}
5050 
5051 	/* Add media for multispeed fiber */
5052 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5053 		uint32_t linkcap;
5054 		bool autoneg;
5055 
5056 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5057 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5058 			ifmedia_add_nodup(&sc->media,
5059 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5060 	}
5061 
5062 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5063 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5064 		ifmedia_add_nodup(&sc->media,
5065 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5066 	}
5067 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5068 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5069 		    0, NULL);
5070 	}
5071 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5072 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5073 		    0, NULL);
5074 	}
5075 
5076 	/*
5077 	 * XXX Other (no matching DragonFlyBSD media type):
5078 	 * To workaround this, we'll assign these completely
5079 	 * inappropriate media types.
5080 	 */
5081 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5082 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5083 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5084 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5085 		    0, NULL);
5086 	}
5087 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5088 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5089 		if_printf(&sc->arpcom.ac_if,
5090 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5091 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5092 		    0, NULL);
5093 	}
5094 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5095 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5096 		if_printf(&sc->arpcom.ac_if,
5097 		    "1000baseKX mapped to 1000baseCX\n");
5098 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5099 		    0, NULL);
5100 	}
5101 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5102 		/* Someday, someone will care about you... */
5103 		if_printf(&sc->arpcom.ac_if,
5104 		    "Media supported: 1000baseBX, ignored\n");
5105 	}
5106 
5107 	/* XXX we probably don't need this */
5108 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5109 		ifmedia_add_nodup(&sc->media,
5110 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5111 	}
5112 
5113 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5114 
5115 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5116 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5117 
5118 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5119 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5120 		ifmedia_set(&sc->media, sc->ifm_media);
5121 	}
5122 }
5123