xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision b8c93cad)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
75 
76 #ifdef IX_RSS_DEBUG
77 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
78 do { \
79 	if (sc->rss_debug >= lvl) \
80 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
81 } while (0)
82 #else	/* !IX_RSS_DEBUG */
83 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
84 #endif	/* IX_RSS_DEBUG */
85 
86 #define IX_NAME			"Intel(R) PRO/10GbE "
87 #define IX_DEVICE(id) \
88 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
89 #define IX_DEVICE_NULL		{ 0, 0, NULL }
90 
91 static struct ix_device {
92 	uint16_t	vid;
93 	uint16_t	did;
94 	const char	*desc;
95 } ix_devices[] = {
96 	IX_DEVICE(82598AF_DUAL_PORT),
97 	IX_DEVICE(82598AF_SINGLE_PORT),
98 	IX_DEVICE(82598EB_CX4),
99 	IX_DEVICE(82598AT),
100 	IX_DEVICE(82598AT2),
101 	IX_DEVICE(82598),
102 	IX_DEVICE(82598_DA_DUAL_PORT),
103 	IX_DEVICE(82598_CX4_DUAL_PORT),
104 	IX_DEVICE(82598EB_XF_LR),
105 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
106 	IX_DEVICE(82598EB_SFP_LOM),
107 	IX_DEVICE(82599_KX4),
108 	IX_DEVICE(82599_KX4_MEZZ),
109 	IX_DEVICE(82599_SFP),
110 	IX_DEVICE(82599_XAUI_LOM),
111 	IX_DEVICE(82599_CX4),
112 	IX_DEVICE(82599_T3_LOM),
113 	IX_DEVICE(82599_COMBO_BACKPLANE),
114 	IX_DEVICE(82599_BACKPLANE_FCOE),
115 	IX_DEVICE(82599_SFP_SF2),
116 	IX_DEVICE(82599_SFP_FCOE),
117 	IX_DEVICE(82599EN_SFP),
118 	IX_DEVICE(82599_SFP_SF_QP),
119 	IX_DEVICE(82599_QSFP_SF_QP),
120 	IX_DEVICE(X540T),
121 	IX_DEVICE(X540T1),
122 	IX_DEVICE(X550T),
123 	IX_DEVICE(X550EM_X_KR),
124 	IX_DEVICE(X550EM_X_KX4),
125 	IX_DEVICE(X550EM_X_10G_T),
126 
127 	/* required last entry */
128 	IX_DEVICE_NULL
129 };
130 
131 static int	ix_probe(device_t);
132 static int	ix_attach(device_t);
133 static int	ix_detach(device_t);
134 static int	ix_shutdown(device_t);
135 
136 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
137 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
138 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
139 #ifdef INVARIANTS
140 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
141 		    boolean_t);
142 #endif
143 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
144 static void	ix_watchdog(struct ifaltq_subque *);
145 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
146 static void	ix_init(void *);
147 static void	ix_stop(struct ix_softc *);
148 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
149 static int	ix_media_change(struct ifnet *);
150 static void	ix_timer(void *);
151 #ifdef IFPOLL_ENABLE
152 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
153 static void	ix_npoll_rx(struct ifnet *, void *, int);
154 static void	ix_npoll_tx(struct ifnet *, void *, int);
155 static void	ix_npoll_status(struct ifnet *);
156 #endif
157 
158 static void	ix_add_sysctl(struct ix_softc *);
159 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
160 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
161 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
164 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
167 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
168 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
170 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
171 #if 0
172 static void     ix_add_hw_stats(struct ix_softc *);
173 #endif
174 #ifdef IFPOLL_ENABLE
175 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
176 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
177 #endif
178 
179 static void	ix_slot_info(struct ix_softc *);
180 static int	ix_alloc_rings(struct ix_softc *);
181 static void	ix_free_rings(struct ix_softc *);
182 static void	ix_setup_ifp(struct ix_softc *);
183 static void	ix_setup_serialize(struct ix_softc *);
184 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
185 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
186 static void	ix_update_stats(struct ix_softc *);
187 
188 static void	ix_set_promisc(struct ix_softc *);
189 static void	ix_set_multi(struct ix_softc *);
190 static void	ix_set_vlan(struct ix_softc *);
191 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
192 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
193 static const char *ix_ifmedia2str(int);
194 static const char *ix_fc2str(enum ixgbe_fc_mode);
195 
196 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
197 static void	ix_init_tx_ring(struct ix_tx_ring *);
198 static void	ix_free_tx_ring(struct ix_tx_ring *);
199 static int	ix_create_tx_ring(struct ix_tx_ring *);
200 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
201 static void	ix_init_tx_unit(struct ix_softc *);
202 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
203 		    uint16_t *, int *);
204 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
205 		    const struct mbuf *, uint32_t *, uint32_t *);
206 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
207 		    const struct mbuf *, uint32_t *, uint32_t *);
208 static void	ix_txeof(struct ix_tx_ring *, int);
209 
210 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
211 static int	ix_init_rx_ring(struct ix_rx_ring *);
212 static void	ix_free_rx_ring(struct ix_rx_ring *);
213 static int	ix_create_rx_ring(struct ix_rx_ring *);
214 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
215 static void	ix_init_rx_unit(struct ix_softc *);
216 #if 0
217 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
218 #endif
219 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
220 static void	ix_rxeof(struct ix_rx_ring *, int);
221 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
222 static void	ix_enable_rx_drop(struct ix_softc *);
223 static void	ix_disable_rx_drop(struct ix_softc *);
224 
225 static void	ix_alloc_msix(struct ix_softc *);
226 static void	ix_free_msix(struct ix_softc *, boolean_t);
227 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
228 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
229 static void	ix_setup_msix_eims(const struct ix_softc *, int,
230 		    uint32_t *, uint32_t *);
231 static int	ix_alloc_intr(struct ix_softc *);
232 static void	ix_free_intr(struct ix_softc *);
233 static int	ix_setup_intr(struct ix_softc *);
234 static void	ix_teardown_intr(struct ix_softc *, int);
235 static void	ix_enable_intr(struct ix_softc *);
236 static void	ix_disable_intr(struct ix_softc *);
237 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
238 static void	ix_set_eitr(struct ix_softc *, int, int);
239 static void	ix_intr_status(struct ix_softc *, uint32_t);
240 static void	ix_intr(void *);
241 static void	ix_msix_rxtx(void *);
242 static void	ix_msix_rx(void *);
243 static void	ix_msix_tx(void *);
244 static void	ix_msix_status(void *);
245 
246 static void	ix_config_link(struct ix_softc *);
247 static boolean_t ix_sfp_probe(struct ix_softc *);
248 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
249 static void	ix_update_link_status(struct ix_softc *);
250 static void	ix_handle_link(struct ix_softc *);
251 static void	ix_handle_mod(struct ix_softc *);
252 static void	ix_handle_msf(struct ix_softc *);
253 static void	ix_handle_phy(struct ix_softc *);
254 static int	ix_powerdown(struct ix_softc *);
255 static void	ix_config_flowctrl(struct ix_softc *);
256 static void	ix_config_dmac(struct ix_softc *);
257 static void	ix_init_media(struct ix_softc *);
258 
259 /* XXX Missing shared code prototype */
260 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
261 
262 static device_method_t ix_methods[] = {
263 	/* Device interface */
264 	DEVMETHOD(device_probe,		ix_probe),
265 	DEVMETHOD(device_attach,	ix_attach),
266 	DEVMETHOD(device_detach,	ix_detach),
267 	DEVMETHOD(device_shutdown,	ix_shutdown),
268 	DEVMETHOD_END
269 };
270 
271 static driver_t ix_driver = {
272 	"ix",
273 	ix_methods,
274 	sizeof(struct ix_softc)
275 };
276 
277 static devclass_t ix_devclass;
278 
279 DECLARE_DUMMY_MODULE(if_ix);
280 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
281 
282 static int	ix_msi_enable = 1;
283 static int	ix_msix_enable = 1;
284 static int	ix_msix_agg_rxtx = 1;
285 static int	ix_rxr = 0;
286 static int	ix_txr = 0;
287 static int	ix_txd = IX_PERF_TXD;
288 static int	ix_rxd = IX_PERF_RXD;
289 static int	ix_unsupported_sfp = 0;
290 
291 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
292 
293 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
294 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
295 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
296 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
297 TUNABLE_INT("hw.ix.txr", &ix_txr);
298 TUNABLE_INT("hw.ix.txd", &ix_txd);
299 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
300 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
301 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
302 
303 /*
304  * Smart speed setting, default to on.  This only works
305  * as a compile option right now as its during attach,
306  * set this to 'ixgbe_smart_speed_off' to disable.
307  */
308 static const enum ixgbe_smart_speed ix_smart_speed =
309     ixgbe_smart_speed_on;
310 
311 static int
312 ix_probe(device_t dev)
313 {
314 	const struct ix_device *d;
315 	uint16_t vid, did;
316 
317 	vid = pci_get_vendor(dev);
318 	did = pci_get_device(dev);
319 
320 	for (d = ix_devices; d->desc != NULL; ++d) {
321 		if (vid == d->vid && did == d->did) {
322 			device_set_desc(dev, d->desc);
323 			return 0;
324 		}
325 	}
326 	return ENXIO;
327 }
328 
329 static int
330 ix_attach(device_t dev)
331 {
332 	struct ix_softc *sc = device_get_softc(dev);
333 	struct ixgbe_hw *hw;
334 	int error, ring_cnt_max;
335 	uint16_t csum;
336 	uint32_t ctrl_ext;
337 #ifdef IFPOLL_ENABLE
338 	int offset, offset_def;
339 #endif
340 	char flowctrl[IFM_ETH_FC_STRLEN];
341 
342 	sc->dev = sc->osdep.dev = dev;
343 	hw = &sc->hw;
344 
345 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
346 	    device_get_unit(dev));
347 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
348 	    ix_media_change, ix_media_status);
349 
350 	/* Save frame size */
351 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
352 
353 	callout_init_mp(&sc->timer);
354 	lwkt_serialize_init(&sc->main_serialize);
355 
356 	/*
357 	 * Save off the information about this board
358 	 */
359 	hw->vendor_id = pci_get_vendor(dev);
360 	hw->device_id = pci_get_device(dev);
361 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
362 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
363 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
364 
365 	ixgbe_set_mac_type(hw);
366 
367 	/* Pick up the 82599 */
368 	if (hw->mac.type != ixgbe_mac_82598EB)
369 		hw->phy.smart_speed = ix_smart_speed;
370 
371 	/* Enable bus mastering */
372 	pci_enable_busmaster(dev);
373 
374 	/*
375 	 * Allocate IO memory
376 	 */
377 	sc->mem_rid = PCIR_BAR(0);
378 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
379 	    &sc->mem_rid, RF_ACTIVE);
380 	if (sc->mem_res == NULL) {
381 		device_printf(dev, "Unable to allocate bus resource: memory\n");
382 		error = ENXIO;
383 		goto failed;
384 	}
385 
386 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
387 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
388 
389 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
390 	sc->hw.back = &sc->osdep;
391 
392 	/*
393 	 * Configure total supported RX/TX ring count
394 	 */
395 	switch (hw->mac.type) {
396 	case ixgbe_mac_X550:
397 	case ixgbe_mac_X550EM_x:
398 	case ixgbe_mac_X550EM_a:
399 		ring_cnt_max = IX_MAX_RXRING_X550;
400 		break;
401 
402 	default:
403 		ring_cnt_max = IX_MAX_RXRING;
404 		break;
405 	}
406 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
407 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_cnt_max);
408 	sc->rx_ring_inuse = sc->rx_ring_cnt;
409 
410 	switch (hw->mac.type) {
411 	case ixgbe_mac_82598EB:
412 		ring_cnt_max = IX_MAX_TXRING_82598;
413 		break;
414 
415 	case ixgbe_mac_82599EB:
416 		ring_cnt_max = IX_MAX_TXRING_82599;
417 		break;
418 
419 	case ixgbe_mac_X540:
420 		ring_cnt_max = IX_MAX_TXRING_X540;
421 		break;
422 
423 	case ixgbe_mac_X550:
424 	case ixgbe_mac_X550EM_x:
425 	case ixgbe_mac_X550EM_a:
426 		ring_cnt_max = IX_MAX_TXRING_X550;
427 		break;
428 
429 	default:
430 		ring_cnt_max = IX_MAX_TXRING;
431 		break;
432 	}
433 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
434 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
435 	sc->tx_ring_inuse = sc->tx_ring_cnt;
436 
437 	/* Allocate TX/RX rings */
438 	error = ix_alloc_rings(sc);
439 	if (error)
440 		goto failed;
441 
442 #ifdef IFPOLL_ENABLE
443 	/*
444 	 * NPOLLING RX CPU offset
445 	 */
446 	if (sc->rx_ring_cnt == ncpus2) {
447 		offset = 0;
448 	} else {
449 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
450 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
451 		if (offset >= ncpus2 ||
452 		    offset % sc->rx_ring_cnt != 0) {
453 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
454 			    offset, offset_def);
455 			offset = offset_def;
456 		}
457 	}
458 	sc->rx_npoll_off = offset;
459 
460 	/*
461 	 * NPOLLING TX CPU offset
462 	 */
463 	if (sc->tx_ring_cnt == ncpus2) {
464 		offset = 0;
465 	} else {
466 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
467 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
468 		if (offset >= ncpus2 ||
469 		    offset % sc->tx_ring_cnt != 0) {
470 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
471 			    offset, offset_def);
472 			offset = offset_def;
473 		}
474 	}
475 	sc->tx_npoll_off = offset;
476 #endif
477 
478 	/* Allocate interrupt */
479 	error = ix_alloc_intr(sc);
480 	if (error)
481 		goto failed;
482 
483 	/* Setup serializes */
484 	ix_setup_serialize(sc);
485 
486 	/* Allocate multicast array memory. */
487 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
488 	    M_DEVBUF, M_WAITOK);
489 
490 	/* Initialize the shared code */
491 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
492 	error = ixgbe_init_shared_code(hw);
493 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
494 		/*
495 		 * No optics in this port; ask timer routine
496 		 * to probe for later insertion.
497 		 */
498 		sc->sfp_probe = TRUE;
499 		error = 0;
500 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
501 		device_printf(dev, "Unsupported SFP+ module detected!\n");
502 		error = EIO;
503 		goto failed;
504 	} else if (error) {
505 		device_printf(dev, "Unable to initialize the shared code\n");
506 		error = EIO;
507 		goto failed;
508 	}
509 
510 	/* Make sure we have a good EEPROM before we read from it */
511 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
512 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
513 		error = EIO;
514 		goto failed;
515 	}
516 
517 	error = ixgbe_init_hw(hw);
518 	if (error == IXGBE_ERR_EEPROM_VERSION) {
519 		device_printf(dev, "Pre-production device detected\n");
520 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
521 		device_printf(dev, "Unsupported SFP+ Module\n");
522 		error = EIO;
523 		goto failed;
524 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
525 		device_printf(dev, "No SFP+ Module found\n");
526 	}
527 
528 	sc->ifm_media = IX_IFM_DEFAULT;
529 	/* Get default flow control settings */
530 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
531 	    ix_flowctrl);
532 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
533 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
534 
535 	/* Setup OS specific network interface */
536 	ix_setup_ifp(sc);
537 
538 	/* Add sysctl tree */
539 	ix_add_sysctl(sc);
540 
541 	error = ix_setup_intr(sc);
542 	if (error) {
543 		ether_ifdetach(&sc->arpcom.ac_if);
544 		goto failed;
545 	}
546 
547 	/* Initialize statistics */
548 	ix_update_stats(sc);
549 
550 	/* Check PCIE slot type/speed/width */
551 	ix_slot_info(sc);
552 
553 	/* Save initial wake up filter configuration */
554 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
555 
556 	/* Let hardware know driver is loaded */
557 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
558 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
559 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
560 
561 	return 0;
562 failed:
563 	ix_detach(dev);
564 	return error;
565 }
566 
567 static int
568 ix_detach(device_t dev)
569 {
570 	struct ix_softc *sc = device_get_softc(dev);
571 
572 	if (device_is_attached(dev)) {
573 		struct ifnet *ifp = &sc->arpcom.ac_if;
574 		uint32_t ctrl_ext;
575 
576 		ifnet_serialize_all(ifp);
577 
578 		ix_powerdown(sc);
579 		ix_teardown_intr(sc, sc->intr_cnt);
580 
581 		ifnet_deserialize_all(ifp);
582 
583 		callout_terminate(&sc->timer);
584 		ether_ifdetach(ifp);
585 
586 		/* Let hardware know driver is unloading */
587 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
588 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
589 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
590 	}
591 
592 	ifmedia_removeall(&sc->media);
593 	bus_generic_detach(dev);
594 
595 	ix_free_intr(sc);
596 
597 	if (sc->msix_mem_res != NULL) {
598 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
599 		    sc->msix_mem_res);
600 	}
601 	if (sc->mem_res != NULL) {
602 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
603 		    sc->mem_res);
604 	}
605 
606 	ix_free_rings(sc);
607 
608 	if (sc->mta != NULL)
609 		kfree(sc->mta, M_DEVBUF);
610 	if (sc->serializes != NULL)
611 		kfree(sc->serializes, M_DEVBUF);
612 
613 	return 0;
614 }
615 
616 static int
617 ix_shutdown(device_t dev)
618 {
619 	struct ix_softc *sc = device_get_softc(dev);
620 	struct ifnet *ifp = &sc->arpcom.ac_if;
621 
622 	ifnet_serialize_all(ifp);
623 	ix_powerdown(sc);
624 	ifnet_deserialize_all(ifp);
625 
626 	return 0;
627 }
628 
629 static void
630 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
631 {
632 	struct ix_softc *sc = ifp->if_softc;
633 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
634 	int idx = -1;
635 	uint16_t nsegs;
636 
637 	KKASSERT(txr->tx_ifsq == ifsq);
638 	ASSERT_SERIALIZED(&txr->tx_serialize);
639 
640 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
641 		return;
642 
643 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
644 		ifsq_purge(ifsq);
645 		return;
646 	}
647 
648 	while (!ifsq_is_empty(ifsq)) {
649 		struct mbuf *m_head;
650 
651 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
652 			ifsq_set_oactive(ifsq);
653 			txr->tx_watchdog.wd_timer = 5;
654 			break;
655 		}
656 
657 		m_head = ifsq_dequeue(ifsq);
658 		if (m_head == NULL)
659 			break;
660 
661 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
662 			IFNET_STAT_INC(ifp, oerrors, 1);
663 			continue;
664 		}
665 
666 		/*
667 		 * TX interrupt are aggressively aggregated, so increasing
668 		 * opackets at TX interrupt time will make the opackets
669 		 * statistics vastly inaccurate; we do the opackets increment
670 		 * now.
671 		 */
672 		IFNET_STAT_INC(ifp, opackets, 1);
673 
674 		if (nsegs >= txr->tx_wreg_nsegs) {
675 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
676 			nsegs = 0;
677 			idx = -1;
678 		}
679 
680 		ETHER_BPF_MTAP(ifp, m_head);
681 	}
682 	if (idx >= 0)
683 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
684 }
685 
686 static int
687 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
688 {
689 	struct ix_softc *sc = ifp->if_softc;
690 	struct ifreq *ifr = (struct ifreq *) data;
691 	int error = 0, mask, reinit;
692 
693 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
694 
695 	switch (command) {
696 	case SIOCSIFMTU:
697 		if (ifr->ifr_mtu > IX_MAX_MTU) {
698 			error = EINVAL;
699 		} else {
700 			ifp->if_mtu = ifr->ifr_mtu;
701 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
702 			ix_init(sc);
703 		}
704 		break;
705 
706 	case SIOCSIFFLAGS:
707 		if (ifp->if_flags & IFF_UP) {
708 			if (ifp->if_flags & IFF_RUNNING) {
709 				if ((ifp->if_flags ^ sc->if_flags) &
710 				    (IFF_PROMISC | IFF_ALLMULTI))
711 					ix_set_promisc(sc);
712 			} else {
713 				ix_init(sc);
714 			}
715 		} else if (ifp->if_flags & IFF_RUNNING) {
716 			ix_stop(sc);
717 		}
718 		sc->if_flags = ifp->if_flags;
719 		break;
720 
721 	case SIOCADDMULTI:
722 	case SIOCDELMULTI:
723 		if (ifp->if_flags & IFF_RUNNING) {
724 			ix_disable_intr(sc);
725 			ix_set_multi(sc);
726 #ifdef IFPOLL_ENABLE
727 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
728 #endif
729 				ix_enable_intr(sc);
730 		}
731 		break;
732 
733 	case SIOCSIFMEDIA:
734 	case SIOCGIFMEDIA:
735 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
736 		break;
737 
738 	case SIOCSIFCAP:
739 		reinit = 0;
740 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
741 		if (mask & IFCAP_RXCSUM) {
742 			ifp->if_capenable ^= IFCAP_RXCSUM;
743 			reinit = 1;
744 		}
745 		if (mask & IFCAP_VLAN_HWTAGGING) {
746 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
747 			reinit = 1;
748 		}
749 		if (mask & IFCAP_TXCSUM) {
750 			ifp->if_capenable ^= IFCAP_TXCSUM;
751 			if (ifp->if_capenable & IFCAP_TXCSUM)
752 				ifp->if_hwassist |= CSUM_OFFLOAD;
753 			else
754 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
755 		}
756 		if (mask & IFCAP_TSO) {
757 			ifp->if_capenable ^= IFCAP_TSO;
758 			if (ifp->if_capenable & IFCAP_TSO)
759 				ifp->if_hwassist |= CSUM_TSO;
760 			else
761 				ifp->if_hwassist &= ~CSUM_TSO;
762 		}
763 		if (mask & IFCAP_RSS)
764 			ifp->if_capenable ^= IFCAP_RSS;
765 		if (reinit && (ifp->if_flags & IFF_RUNNING))
766 			ix_init(sc);
767 		break;
768 
769 #if 0
770 	case SIOCGI2C:
771 	{
772 		struct ixgbe_i2c_req	i2c;
773 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
774 		if (error)
775 			break;
776 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
777 			error = EINVAL;
778 			break;
779 		}
780 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
781 		    i2c.dev_addr, i2c.data);
782 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
783 		break;
784 	}
785 #endif
786 
787 	default:
788 		error = ether_ioctl(ifp, command, data);
789 		break;
790 	}
791 	return error;
792 }
793 
794 #define IXGBE_MHADD_MFS_SHIFT 16
795 
796 static void
797 ix_init(void *xsc)
798 {
799 	struct ix_softc *sc = xsc;
800 	struct ifnet *ifp = &sc->arpcom.ac_if;
801 	struct ixgbe_hw *hw = &sc->hw;
802 	uint32_t gpie, rxctrl;
803 	int i, error;
804 	boolean_t polling;
805 
806 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
807 
808 	ix_stop(sc);
809 
810 	polling = FALSE;
811 #ifdef IFPOLL_ENABLE
812 	if (ifp->if_flags & IFF_NPOLLING)
813 		polling = TRUE;
814 #endif
815 
816 	/* Configure # of used RX/TX rings */
817 	ix_set_ring_inuse(sc, polling);
818 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
819 
820 	/* Get the latest mac address, User can use a LAA */
821 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
822 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
823 	hw->addr_ctrl.rar_used_count = 1;
824 
825 	/* Prepare transmit descriptors and buffers */
826 	for (i = 0; i < sc->tx_ring_inuse; ++i)
827 		ix_init_tx_ring(&sc->tx_rings[i]);
828 
829 	ixgbe_init_hw(hw);
830 	ix_init_tx_unit(sc);
831 
832 	/* Setup Multicast table */
833 	ix_set_multi(sc);
834 
835 	/* Prepare receive descriptors and buffers */
836 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
837 		error = ix_init_rx_ring(&sc->rx_rings[i]);
838 		if (error) {
839 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
840 			ix_stop(sc);
841 			return;
842 		}
843 	}
844 
845 	/* Configure RX settings */
846 	ix_init_rx_unit(sc);
847 
848 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
849 
850 	/* Enable Fan Failure Interrupt */
851 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
852 
853 	/* Add for Module detection */
854 	if (hw->mac.type == ixgbe_mac_82599EB)
855 		gpie |= IXGBE_SDP2_GPIEN;
856 
857 	/*
858 	 * Thermal Failure Detection (X540)
859 	 * Link Detection (X552)
860 	 */
861 	if (hw->mac.type == ixgbe_mac_X540 ||
862 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
863 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
864 		gpie |= IXGBE_SDP0_GPIEN_X540;
865 
866 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
867 		/* Enable Enhanced MSIX mode */
868 		gpie |= IXGBE_GPIE_MSIX_MODE;
869 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
870 		    IXGBE_GPIE_OCD;
871 	}
872 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
873 
874 	/* Set MTU size */
875 	if (ifp->if_mtu > ETHERMTU) {
876 		uint32_t mhadd;
877 
878 		/* aka IXGBE_MAXFRS on 82599 and newer */
879 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
880 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
881 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
882 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
883 	}
884 
885 	/*
886 	 * Enable TX rings
887 	 */
888 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
889 		uint32_t txdctl;
890 
891 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
892 		txdctl |= IXGBE_TXDCTL_ENABLE;
893 
894 		/*
895 		 * Set WTHRESH to 0, since TX head write-back is used
896 		 */
897 		txdctl &= ~(0x7f << 16);
898 
899 		/*
900 		 * When the internal queue falls below PTHRESH (32),
901 		 * start prefetching as long as there are at least
902 		 * HTHRESH (1) buffers ready. The values are taken
903 		 * from the Intel linux driver 3.8.21.
904 		 * Prefetching enables tx line rate even with 1 queue.
905 		 */
906 		txdctl |= (32 << 0) | (1 << 8);
907 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
908 	}
909 
910 	/*
911 	 * Enable RX rings
912 	 */
913 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
914 		uint32_t rxdctl;
915 		int k;
916 
917 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
918 		if (hw->mac.type == ixgbe_mac_82598EB) {
919 			/*
920 			 * PTHRESH = 21
921 			 * HTHRESH = 4
922 			 * WTHRESH = 8
923 			 */
924 			rxdctl &= ~0x3FFFFF;
925 			rxdctl |= 0x080420;
926 		}
927 		rxdctl |= IXGBE_RXDCTL_ENABLE;
928 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
929 		for (k = 0; k < 10; ++k) {
930 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
931 			    IXGBE_RXDCTL_ENABLE)
932 				break;
933 			else
934 				msec_delay(1);
935 		}
936 		wmb();
937 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
938 		    sc->rx_rings[0].rx_ndesc - 1);
939 	}
940 
941 	/* Enable Receive engine */
942 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
943 	if (hw->mac.type == ixgbe_mac_82598EB)
944 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
945 	rxctrl |= IXGBE_RXCTRL_RXEN;
946 	ixgbe_enable_rx_dma(hw, rxctrl);
947 
948 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
949 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
950 
951 		if (txr->tx_intr_vec >= 0) {
952 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
953 		} else {
954 			/*
955 			 * Unconfigured TX interrupt vector could only
956 			 * happen for MSI-X.
957 			 */
958 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
959 			    ("TX intr vector is not set"));
960 			KASSERT(i < sc->rx_ring_inuse,
961 			    ("invalid TX ring %d, no piggyback RX ring", i));
962 			KASSERT(sc->rx_rings[i].rx_txr == txr,
963 			    ("RX ring %d piggybacked TX ring mismatch", i));
964 			if (bootverbose)
965 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
966 		}
967 	}
968 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
969 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
970 
971 		KKASSERT(rxr->rx_intr_vec >= 0);
972 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
973 		if (rxr->rx_txr != NULL) {
974 			/*
975 			 * Piggyback the TX ring interrupt onto the RX
976 			 * ring interrupt vector.
977 			 */
978 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
979 			    ("piggybacked TX ring configured intr vector"));
980 			KASSERT(rxr->rx_txr->tx_idx == i,
981 			    ("RX ring %d piggybacked TX ring %u",
982 			     i, rxr->rx_txr->tx_idx));
983 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
984 			if (bootverbose) {
985 				if_printf(ifp, "IVAR RX ring %d piggybacks "
986 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
987 			}
988 		}
989 	}
990 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
991 		/* Set up status MSI-X vector; it is using fixed entry 1 */
992 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
993 
994 		/* Set up auto-mask for TX and RX rings */
995 		if (hw->mac.type == ixgbe_mac_82598EB) {
996 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
997 		} else {
998 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
999 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1000 		}
1001 	} else {
1002 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1003 	}
1004 	for (i = 0; i < sc->intr_cnt; ++i)
1005 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1006 
1007 	/*
1008 	 * Check on any SFP devices that need to be kick-started
1009 	 */
1010 	if (hw->phy.type == ixgbe_phy_none) {
1011 		error = hw->phy.ops.identify(hw);
1012 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1013 			if_printf(ifp,
1014 			    "Unsupported SFP+ module type was detected.\n");
1015 			/* XXX stop */
1016 			return;
1017 		}
1018 	}
1019 
1020 	/* Config/Enable Link */
1021 	ix_config_link(sc);
1022 
1023 	/* Hardware Packet Buffer & Flow Control setup */
1024 	ix_config_flowctrl(sc);
1025 
1026 	/* Initialize the FC settings */
1027 	ixgbe_start_hw(hw);
1028 
1029 	/* Set up VLAN support and filter */
1030 	ix_set_vlan(sc);
1031 
1032 	/* Setup DMA Coalescing */
1033 	ix_config_dmac(sc);
1034 
1035 	/*
1036 	 * Only enable interrupts if we are not polling, make sure
1037 	 * they are off otherwise.
1038 	 */
1039 	if (polling)
1040 		ix_disable_intr(sc);
1041 	else
1042 		ix_enable_intr(sc);
1043 
1044 	ifp->if_flags |= IFF_RUNNING;
1045 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1046 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1047 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1048 	}
1049 
1050 	ix_set_timer_cpuid(sc, polling);
1051 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1052 }
1053 
1054 static void
1055 ix_intr(void *xsc)
1056 {
1057 	struct ix_softc *sc = xsc;
1058 	struct ixgbe_hw	*hw = &sc->hw;
1059 	uint32_t eicr;
1060 
1061 	ASSERT_SERIALIZED(&sc->main_serialize);
1062 
1063 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1064 	if (eicr == 0) {
1065 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1066 		return;
1067 	}
1068 
1069 	if (eicr & IX_RX0_INTR_MASK) {
1070 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1071 
1072 		lwkt_serialize_enter(&rxr->rx_serialize);
1073 		ix_rxeof(rxr, -1);
1074 		lwkt_serialize_exit(&rxr->rx_serialize);
1075 	}
1076 	if (eicr & IX_RX1_INTR_MASK) {
1077 		struct ix_rx_ring *rxr;
1078 
1079 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1080 		rxr = &sc->rx_rings[1];
1081 
1082 		lwkt_serialize_enter(&rxr->rx_serialize);
1083 		ix_rxeof(rxr, -1);
1084 		lwkt_serialize_exit(&rxr->rx_serialize);
1085 	}
1086 
1087 	if (eicr & IX_TX_INTR_MASK) {
1088 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1089 
1090 		lwkt_serialize_enter(&txr->tx_serialize);
1091 		ix_txeof(txr, *(txr->tx_hdr));
1092 		if (!ifsq_is_empty(txr->tx_ifsq))
1093 			ifsq_devstart(txr->tx_ifsq);
1094 		lwkt_serialize_exit(&txr->tx_serialize);
1095 	}
1096 
1097 	if (__predict_false(eicr & IX_EICR_STATUS))
1098 		ix_intr_status(sc, eicr);
1099 
1100 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1101 }
1102 
1103 static void
1104 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1105 {
1106 	struct ix_softc *sc = ifp->if_softc;
1107 	struct ifmedia *ifm = &sc->media;
1108 	int layer;
1109 
1110 	ix_update_link_status(sc);
1111 
1112 	ifmr->ifm_status = IFM_AVALID;
1113 	ifmr->ifm_active = IFM_ETHER;
1114 
1115 	if (!sc->link_active) {
1116 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1117 			ifmr->ifm_active |= ifm->ifm_media;
1118 		else
1119 			ifmr->ifm_active |= IFM_NONE;
1120 		return;
1121 	}
1122 	ifmr->ifm_status |= IFM_ACTIVE;
1123 
1124 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1125 
1126 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1127 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1128 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1129 		switch (sc->link_speed) {
1130 		case IXGBE_LINK_SPEED_10GB_FULL:
1131 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1132 			break;
1133 		case IXGBE_LINK_SPEED_1GB_FULL:
1134 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1135 			break;
1136 		case IXGBE_LINK_SPEED_100_FULL:
1137 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1138 			break;
1139 		}
1140 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1141 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1142 		switch (sc->link_speed) {
1143 		case IXGBE_LINK_SPEED_10GB_FULL:
1144 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1145 			break;
1146 		}
1147 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1148 		switch (sc->link_speed) {
1149 		case IXGBE_LINK_SPEED_10GB_FULL:
1150 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1151 			break;
1152 		case IXGBE_LINK_SPEED_1GB_FULL:
1153 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1154 			break;
1155 		}
1156 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1157 		switch (sc->link_speed) {
1158 		case IXGBE_LINK_SPEED_10GB_FULL:
1159 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1160 			break;
1161 		case IXGBE_LINK_SPEED_1GB_FULL:
1162 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1163 			break;
1164 		}
1165 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1166 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1167 		switch (sc->link_speed) {
1168 		case IXGBE_LINK_SPEED_10GB_FULL:
1169 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1170 			break;
1171 		case IXGBE_LINK_SPEED_1GB_FULL:
1172 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1173 			break;
1174 		}
1175 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1176 		switch (sc->link_speed) {
1177 		case IXGBE_LINK_SPEED_10GB_FULL:
1178 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1179 			break;
1180 		}
1181 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1182 		/*
1183 		 * XXX: These need to use the proper media types once
1184 		 * they're added.
1185 		 */
1186 		switch (sc->link_speed) {
1187 		case IXGBE_LINK_SPEED_10GB_FULL:
1188 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1189 			break;
1190 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1191 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1192 			break;
1193 		case IXGBE_LINK_SPEED_1GB_FULL:
1194 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1195 			break;
1196 		}
1197 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1198 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1199 		/*
1200 		 * XXX: These need to use the proper media types once
1201 		 * they're added.
1202 		 */
1203 		switch (sc->link_speed) {
1204 		case IXGBE_LINK_SPEED_10GB_FULL:
1205 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1206 			break;
1207 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1208 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1209 			break;
1210 		case IXGBE_LINK_SPEED_1GB_FULL:
1211 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1212 			break;
1213 		}
1214 	}
1215 
1216 	/* If nothing is recognized... */
1217 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1218 		ifmr->ifm_active |= IFM_NONE;
1219 
1220 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1221 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1222 
1223 	switch (sc->hw.fc.current_mode) {
1224 	case ixgbe_fc_full:
1225 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1226 		break;
1227 	case ixgbe_fc_rx_pause:
1228 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1229 		break;
1230 	case ixgbe_fc_tx_pause:
1231 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1232 		break;
1233 	default:
1234 		break;
1235 	}
1236 }
1237 
1238 static int
1239 ix_media_change(struct ifnet *ifp)
1240 {
1241 	struct ix_softc *sc = ifp->if_softc;
1242 	struct ifmedia *ifm = &sc->media;
1243 	struct ixgbe_hw *hw = &sc->hw;
1244 
1245 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1246 		return (EINVAL);
1247 
1248 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1249 	    hw->mac.ops.setup_link == NULL) {
1250 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1251 			/* Only flow control setting changes are allowed */
1252 			return (EOPNOTSUPP);
1253 		}
1254 	}
1255 
1256 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1257 	case IFM_AUTO:
1258 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1259 		break;
1260 
1261 	case IFM_10G_T:
1262 	case IFM_10G_LRM:
1263 	case IFM_10G_SR:	/* XXX also KR */
1264 	case IFM_10G_LR:
1265 	case IFM_10G_CX4:	/* XXX also KX4 */
1266 	case IFM_10G_TWINAX:
1267 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1268 		break;
1269 
1270 	case IFM_1000_T:
1271 	case IFM_1000_LX:
1272 	case IFM_1000_SX:
1273 	case IFM_1000_CX:	/* XXX is KX */
1274 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1275 		break;
1276 
1277 	case IFM_100_TX:
1278 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1279 		break;
1280 
1281 	default:
1282 		if (bootverbose) {
1283 			if_printf(ifp, "Invalid media type %d!\n",
1284 			    ifm->ifm_media);
1285 		}
1286 		return EINVAL;
1287 	}
1288 	sc->ifm_media = ifm->ifm_media;
1289 
1290 #if 0
1291 	if (hw->mac.ops.setup_link != NULL) {
1292 		hw->mac.autotry_restart = TRUE;
1293 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1294 	}
1295 #else
1296 	if (ifp->if_flags & IFF_RUNNING)
1297 		ix_init(sc);
1298 #endif
1299 	return 0;
1300 }
1301 
1302 static __inline int
1303 ix_tso_pullup(struct mbuf **mp)
1304 {
1305 	int hoff, iphlen, thoff;
1306 	struct mbuf *m;
1307 
1308 	m = *mp;
1309 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1310 
1311 	iphlen = m->m_pkthdr.csum_iphlen;
1312 	thoff = m->m_pkthdr.csum_thlen;
1313 	hoff = m->m_pkthdr.csum_lhlen;
1314 
1315 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1316 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1317 	KASSERT(hoff > 0, ("invalid ether hlen"));
1318 
1319 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1320 		m = m_pullup(m, hoff + iphlen + thoff);
1321 		if (m == NULL) {
1322 			*mp = NULL;
1323 			return ENOBUFS;
1324 		}
1325 		*mp = m;
1326 	}
1327 	return 0;
1328 }
1329 
1330 static int
1331 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1332     uint16_t *segs_used, int *idx)
1333 {
1334 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1335 	int i, j, error, nsegs, first, maxsegs;
1336 	struct mbuf *m_head = *m_headp;
1337 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1338 	bus_dmamap_t map;
1339 	struct ix_tx_buf *txbuf;
1340 	union ixgbe_adv_tx_desc *txd = NULL;
1341 
1342 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1343 		error = ix_tso_pullup(m_headp);
1344 		if (__predict_false(error))
1345 			return error;
1346 		m_head = *m_headp;
1347 	}
1348 
1349 	/* Basic descriptor defines */
1350 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1351 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1352 
1353 	if (m_head->m_flags & M_VLANTAG)
1354 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1355 
1356 	/*
1357 	 * Important to capture the first descriptor
1358 	 * used because it will contain the index of
1359 	 * the one we tell the hardware to report back
1360 	 */
1361 	first = txr->tx_next_avail;
1362 	txbuf = &txr->tx_buf[first];
1363 	map = txbuf->map;
1364 
1365 	/*
1366 	 * Map the packet for DMA.
1367 	 */
1368 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1369 	if (maxsegs > IX_MAX_SCATTER)
1370 		maxsegs = IX_MAX_SCATTER;
1371 
1372 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1373 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1374 	if (__predict_false(error)) {
1375 		m_freem(*m_headp);
1376 		*m_headp = NULL;
1377 		return error;
1378 	}
1379 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1380 
1381 	m_head = *m_headp;
1382 
1383 	/*
1384 	 * Set up the appropriate offload context if requested,
1385 	 * this may consume one TX descriptor.
1386 	 */
1387 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1388 		(*segs_used)++;
1389 		txr->tx_nsegs++;
1390 	}
1391 
1392 	*segs_used += nsegs;
1393 	txr->tx_nsegs += nsegs;
1394 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1395 		/*
1396 		 * Report Status (RS) is turned on every intr_nsegs
1397 		 * descriptors (roughly).
1398 		 */
1399 		txr->tx_nsegs = 0;
1400 		cmd_rs = IXGBE_TXD_CMD_RS;
1401 	}
1402 
1403 	i = txr->tx_next_avail;
1404 	for (j = 0; j < nsegs; j++) {
1405 		bus_size_t seglen;
1406 		bus_addr_t segaddr;
1407 
1408 		txbuf = &txr->tx_buf[i];
1409 		txd = &txr->tx_base[i];
1410 		seglen = segs[j].ds_len;
1411 		segaddr = htole64(segs[j].ds_addr);
1412 
1413 		txd->read.buffer_addr = segaddr;
1414 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1415 		    cmd_type_len |seglen);
1416 		txd->read.olinfo_status = htole32(olinfo_status);
1417 
1418 		if (++i == txr->tx_ndesc)
1419 			i = 0;
1420 	}
1421 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1422 
1423 	txr->tx_avail -= nsegs;
1424 	txr->tx_next_avail = i;
1425 
1426 	txbuf->m_head = m_head;
1427 	txr->tx_buf[first].map = txbuf->map;
1428 	txbuf->map = map;
1429 
1430 	/*
1431 	 * Defer TDT updating, until enough descrptors are setup
1432 	 */
1433 	*idx = i;
1434 
1435 	return 0;
1436 }
1437 
1438 static void
1439 ix_set_promisc(struct ix_softc *sc)
1440 {
1441 	struct ifnet *ifp = &sc->arpcom.ac_if;
1442 	uint32_t reg_rctl;
1443 	int mcnt = 0;
1444 
1445 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1446 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1447 	if (ifp->if_flags & IFF_ALLMULTI) {
1448 		mcnt = IX_MAX_MCASTADDR;
1449 	} else {
1450 		struct ifmultiaddr *ifma;
1451 
1452 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1453 			if (ifma->ifma_addr->sa_family != AF_LINK)
1454 				continue;
1455 			if (mcnt == IX_MAX_MCASTADDR)
1456 				break;
1457 			mcnt++;
1458 		}
1459 	}
1460 	if (mcnt < IX_MAX_MCASTADDR)
1461 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1462 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1463 
1464 	if (ifp->if_flags & IFF_PROMISC) {
1465 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1466 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1467 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1468 		reg_rctl |= IXGBE_FCTRL_MPE;
1469 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1470 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1471 	}
1472 }
1473 
1474 static void
1475 ix_set_multi(struct ix_softc *sc)
1476 {
1477 	struct ifnet *ifp = &sc->arpcom.ac_if;
1478 	struct ifmultiaddr *ifma;
1479 	uint32_t fctrl;
1480 	uint8_t	*mta;
1481 	int mcnt = 0;
1482 
1483 	mta = sc->mta;
1484 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1485 
1486 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1487 		if (ifma->ifma_addr->sa_family != AF_LINK)
1488 			continue;
1489 		if (mcnt == IX_MAX_MCASTADDR)
1490 			break;
1491 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1492 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1493 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1494 		mcnt++;
1495 	}
1496 
1497 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1498 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1499 	if (ifp->if_flags & IFF_PROMISC) {
1500 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1501 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1502 		fctrl |= IXGBE_FCTRL_MPE;
1503 		fctrl &= ~IXGBE_FCTRL_UPE;
1504 	} else {
1505 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1506 	}
1507 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1508 
1509 	if (mcnt < IX_MAX_MCASTADDR) {
1510 		ixgbe_update_mc_addr_list(&sc->hw,
1511 		    mta, mcnt, ix_mc_array_itr, TRUE);
1512 	}
1513 }
1514 
1515 /*
1516  * This is an iterator function now needed by the multicast
1517  * shared code. It simply feeds the shared code routine the
1518  * addresses in the array of ix_set_multi() one by one.
1519  */
1520 static uint8_t *
1521 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1522 {
1523 	uint8_t *addr = *update_ptr;
1524 	uint8_t *newptr;
1525 	*vmdq = 0;
1526 
1527 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1528 	*update_ptr = newptr;
1529 	return addr;
1530 }
1531 
1532 static void
1533 ix_timer(void *arg)
1534 {
1535 	struct ix_softc *sc = arg;
1536 
1537 	lwkt_serialize_enter(&sc->main_serialize);
1538 
1539 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1540 		lwkt_serialize_exit(&sc->main_serialize);
1541 		return;
1542 	}
1543 
1544 	/* Check for pluggable optics */
1545 	if (sc->sfp_probe) {
1546 		if (!ix_sfp_probe(sc))
1547 			goto done; /* Nothing to do */
1548 	}
1549 
1550 	ix_update_link_status(sc);
1551 	ix_update_stats(sc);
1552 
1553 done:
1554 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1555 	lwkt_serialize_exit(&sc->main_serialize);
1556 }
1557 
1558 static void
1559 ix_update_link_status(struct ix_softc *sc)
1560 {
1561 	struct ifnet *ifp = &sc->arpcom.ac_if;
1562 
1563 	if (sc->link_up) {
1564 		if (sc->link_active == FALSE) {
1565 			if (bootverbose) {
1566 				if_printf(ifp, "Link is up %d Gbps %s\n",
1567 				    sc->link_speed == 128 ? 10 : 1,
1568 				    "Full Duplex");
1569 			}
1570 
1571 			/*
1572 			 * Update any Flow Control changes
1573 			 */
1574 			ixgbe_fc_enable(&sc->hw);
1575 			/* MUST after ixgbe_fc_enable() */
1576 			if (sc->rx_ring_inuse > 1) {
1577 				switch (sc->hw.fc.current_mode) {
1578 				case ixgbe_fc_rx_pause:
1579 				case ixgbe_fc_tx_pause:
1580 				case ixgbe_fc_full:
1581 					ix_disable_rx_drop(sc);
1582 					break;
1583 
1584 				case ixgbe_fc_none:
1585 					ix_enable_rx_drop(sc);
1586 					break;
1587 
1588 				default:
1589 					break;
1590 				}
1591 			}
1592 
1593 			/* Update DMA coalescing config */
1594 			ix_config_dmac(sc);
1595 
1596 			sc->link_active = TRUE;
1597 
1598 			ifp->if_link_state = LINK_STATE_UP;
1599 			if_link_state_change(ifp);
1600 		}
1601 	} else { /* Link down */
1602 		if (sc->link_active == TRUE) {
1603 			if (bootverbose)
1604 				if_printf(ifp, "Link is Down\n");
1605 			ifp->if_link_state = LINK_STATE_DOWN;
1606 			if_link_state_change(ifp);
1607 
1608 			sc->link_active = FALSE;
1609 		}
1610 	}
1611 }
1612 
1613 static void
1614 ix_stop(struct ix_softc *sc)
1615 {
1616 	struct ixgbe_hw *hw = &sc->hw;
1617 	struct ifnet *ifp = &sc->arpcom.ac_if;
1618 	int i;
1619 
1620 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1621 
1622 	ix_disable_intr(sc);
1623 	callout_stop(&sc->timer);
1624 
1625 	ifp->if_flags &= ~IFF_RUNNING;
1626 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1627 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1628 
1629 		ifsq_clr_oactive(txr->tx_ifsq);
1630 		ifsq_watchdog_stop(&txr->tx_watchdog);
1631 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1632 	}
1633 
1634 	ixgbe_reset_hw(hw);
1635 	hw->adapter_stopped = FALSE;
1636 	ixgbe_stop_adapter(hw);
1637 	if (hw->mac.type == ixgbe_mac_82599EB)
1638 		ixgbe_stop_mac_link_on_d3_82599(hw);
1639 	/* Turn off the laser - noop with no optics */
1640 	ixgbe_disable_tx_laser(hw);
1641 
1642 	/* Update the stack */
1643 	sc->link_up = FALSE;
1644 	ix_update_link_status(sc);
1645 
1646 	/* Reprogram the RAR[0] in case user changed it. */
1647 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1648 
1649 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1650 		ix_free_tx_ring(&sc->tx_rings[i]);
1651 
1652 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1653 		ix_free_rx_ring(&sc->rx_rings[i]);
1654 }
1655 
1656 static void
1657 ix_setup_ifp(struct ix_softc *sc)
1658 {
1659 	struct ixgbe_hw *hw = &sc->hw;
1660 	struct ifnet *ifp = &sc->arpcom.ac_if;
1661 	int i;
1662 
1663 	ifp->if_baudrate = IF_Gbps(10UL);
1664 
1665 	ifp->if_softc = sc;
1666 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1667 	ifp->if_init = ix_init;
1668 	ifp->if_ioctl = ix_ioctl;
1669 	ifp->if_start = ix_start;
1670 	ifp->if_serialize = ix_serialize;
1671 	ifp->if_deserialize = ix_deserialize;
1672 	ifp->if_tryserialize = ix_tryserialize;
1673 #ifdef INVARIANTS
1674 	ifp->if_serialize_assert = ix_serialize_assert;
1675 #endif
1676 #ifdef IFPOLL_ENABLE
1677 	ifp->if_npoll = ix_npoll;
1678 #endif
1679 
1680 	/* Increase TSO burst length */
1681 	ifp->if_tsolen = (8 * ETHERMTU);
1682 
1683 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1684 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1685 
1686 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1687 	ifq_set_ready(&ifp->if_snd);
1688 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1689 
1690 	ifp->if_mapsubq = ifq_mapsubq_mask;
1691 	ifq_set_subq_mask(&ifp->if_snd, 0);
1692 
1693 	ether_ifattach(ifp, hw->mac.addr, NULL);
1694 
1695 	ifp->if_capabilities =
1696 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1697 	if (IX_ENABLE_HWRSS(sc))
1698 		ifp->if_capabilities |= IFCAP_RSS;
1699 	ifp->if_capenable = ifp->if_capabilities;
1700 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1701 
1702 	/*
1703 	 * Tell the upper layer(s) we support long frames.
1704 	 */
1705 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1706 
1707 	/* Setup TX rings and subqueues */
1708 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1709 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1710 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1711 
1712 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1713 		ifsq_set_priv(ifsq, txr);
1714 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1715 		txr->tx_ifsq = ifsq;
1716 
1717 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1718 	}
1719 
1720 	/* Specify the media types supported by this adapter */
1721 	ix_init_media(sc);
1722 }
1723 
1724 static boolean_t
1725 ix_is_sfp(const struct ixgbe_hw *hw)
1726 {
1727 	switch (hw->phy.type) {
1728 	case ixgbe_phy_sfp_avago:
1729 	case ixgbe_phy_sfp_ftl:
1730 	case ixgbe_phy_sfp_intel:
1731 	case ixgbe_phy_sfp_unknown:
1732 	case ixgbe_phy_sfp_passive_tyco:
1733 	case ixgbe_phy_sfp_passive_unknown:
1734 	case ixgbe_phy_qsfp_passive_unknown:
1735 	case ixgbe_phy_qsfp_active_unknown:
1736 	case ixgbe_phy_qsfp_intel:
1737 	case ixgbe_phy_qsfp_unknown:
1738 		return TRUE;
1739 	default:
1740 		return FALSE;
1741 	}
1742 }
1743 
1744 static void
1745 ix_config_link(struct ix_softc *sc)
1746 {
1747 	struct ixgbe_hw *hw = &sc->hw;
1748 	boolean_t sfp;
1749 
1750 	sfp = ix_is_sfp(hw);
1751 	if (sfp) {
1752 		if (hw->phy.multispeed_fiber) {
1753 			hw->mac.ops.setup_sfp(hw);
1754 			ixgbe_enable_tx_laser(hw);
1755 			ix_handle_msf(sc);
1756 		} else {
1757 			ix_handle_mod(sc);
1758 		}
1759 	} else {
1760 		uint32_t autoneg, err = 0;
1761 
1762 		if (hw->mac.ops.check_link != NULL) {
1763 			err = ixgbe_check_link(hw, &sc->link_speed,
1764 			    &sc->link_up, FALSE);
1765 			if (err)
1766 				return;
1767 		}
1768 
1769 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1770 			autoneg = sc->advspeed;
1771 		else
1772 			autoneg = hw->phy.autoneg_advertised;
1773 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1774 			bool negotiate;
1775 
1776 			err = hw->mac.ops.get_link_capabilities(hw,
1777 			    &autoneg, &negotiate);
1778 			if (err)
1779 				return;
1780 		}
1781 
1782 		if (hw->mac.ops.setup_link != NULL) {
1783 			err = hw->mac.ops.setup_link(hw,
1784 			    autoneg, sc->link_up);
1785 			if (err)
1786 				return;
1787 		}
1788 	}
1789 }
1790 
1791 static int
1792 ix_alloc_rings(struct ix_softc *sc)
1793 {
1794 	int error, i;
1795 
1796 	/*
1797 	 * Create top level busdma tag
1798 	 */
1799 	error = bus_dma_tag_create(NULL, 1, 0,
1800 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1801 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1802 	    &sc->parent_tag);
1803 	if (error) {
1804 		device_printf(sc->dev, "could not create top level DMA tag\n");
1805 		return error;
1806 	}
1807 
1808 	/*
1809 	 * Allocate TX descriptor rings and buffers
1810 	 */
1811 	sc->tx_rings = kmalloc_cachealign(
1812 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1813 	    M_DEVBUF, M_WAITOK | M_ZERO);
1814 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1815 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1816 
1817 		txr->tx_sc = sc;
1818 		txr->tx_idx = i;
1819 		txr->tx_intr_vec = -1;
1820 		lwkt_serialize_init(&txr->tx_serialize);
1821 
1822 		error = ix_create_tx_ring(txr);
1823 		if (error)
1824 			return error;
1825 	}
1826 
1827 	/*
1828 	 * Allocate RX descriptor rings and buffers
1829 	 */
1830 	sc->rx_rings = kmalloc_cachealign(
1831 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1832 	    M_DEVBUF, M_WAITOK | M_ZERO);
1833 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1834 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1835 
1836 		rxr->rx_sc = sc;
1837 		rxr->rx_idx = i;
1838 		rxr->rx_intr_vec = -1;
1839 		lwkt_serialize_init(&rxr->rx_serialize);
1840 
1841 		error = ix_create_rx_ring(rxr);
1842 		if (error)
1843 			return error;
1844 	}
1845 
1846 	return 0;
1847 }
1848 
1849 static int
1850 ix_create_tx_ring(struct ix_tx_ring *txr)
1851 {
1852 	int error, i, tsize, ntxd;
1853 
1854 	/*
1855 	 * Validate number of transmit descriptors.  It must not exceed
1856 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1857 	 */
1858 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1859 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1860 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1861 		device_printf(txr->tx_sc->dev,
1862 		    "Using %d TX descriptors instead of %d!\n",
1863 		    IX_DEF_TXD, ntxd);
1864 		txr->tx_ndesc = IX_DEF_TXD;
1865 	} else {
1866 		txr->tx_ndesc = ntxd;
1867 	}
1868 
1869 	/*
1870 	 * Allocate TX head write-back buffer
1871 	 */
1872 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1873 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1874 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1875 	if (txr->tx_hdr == NULL) {
1876 		device_printf(txr->tx_sc->dev,
1877 		    "Unable to allocate TX head write-back buffer\n");
1878 		return ENOMEM;
1879 	}
1880 
1881 	/*
1882 	 * Allocate TX descriptor ring
1883 	 */
1884 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1885 	    IX_DBA_ALIGN);
1886 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1887 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1888 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1889 	if (txr->tx_base == NULL) {
1890 		device_printf(txr->tx_sc->dev,
1891 		    "Unable to allocate TX Descriptor memory\n");
1892 		return ENOMEM;
1893 	}
1894 
1895 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1896 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1897 
1898 	/*
1899 	 * Create DMA tag for TX buffers
1900 	 */
1901 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1902 	    1, 0,		/* alignment, bounds */
1903 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1904 	    BUS_SPACE_MAXADDR,	/* highaddr */
1905 	    NULL, NULL,		/* filter, filterarg */
1906 	    IX_TSO_SIZE,	/* maxsize */
1907 	    IX_MAX_SCATTER,	/* nsegments */
1908 	    PAGE_SIZE,		/* maxsegsize */
1909 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1910 	    BUS_DMA_ONEBPAGE,	/* flags */
1911 	    &txr->tx_tag);
1912 	if (error) {
1913 		device_printf(txr->tx_sc->dev,
1914 		    "Unable to allocate TX DMA tag\n");
1915 		kfree(txr->tx_buf, M_DEVBUF);
1916 		txr->tx_buf = NULL;
1917 		return error;
1918 	}
1919 
1920 	/*
1921 	 * Create DMA maps for TX buffers
1922 	 */
1923 	for (i = 0; i < txr->tx_ndesc; ++i) {
1924 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1925 
1926 		error = bus_dmamap_create(txr->tx_tag,
1927 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1928 		if (error) {
1929 			device_printf(txr->tx_sc->dev,
1930 			    "Unable to create TX DMA map\n");
1931 			ix_destroy_tx_ring(txr, i);
1932 			return error;
1933 		}
1934 	}
1935 
1936 	/*
1937 	 * Initialize various watermark
1938 	 */
1939 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1940 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1941 
1942 	return 0;
1943 }
1944 
1945 static void
1946 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1947 {
1948 	int i;
1949 
1950 	if (txr->tx_hdr != NULL) {
1951 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1952 		bus_dmamem_free(txr->tx_hdr_dtag,
1953 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1954 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1955 		txr->tx_hdr = NULL;
1956 	}
1957 
1958 	if (txr->tx_base != NULL) {
1959 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1960 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1961 		    txr->tx_base_map);
1962 		bus_dma_tag_destroy(txr->tx_base_dtag);
1963 		txr->tx_base = NULL;
1964 	}
1965 
1966 	if (txr->tx_buf == NULL)
1967 		return;
1968 
1969 	for (i = 0; i < ndesc; ++i) {
1970 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1971 
1972 		KKASSERT(txbuf->m_head == NULL);
1973 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1974 	}
1975 	bus_dma_tag_destroy(txr->tx_tag);
1976 
1977 	kfree(txr->tx_buf, M_DEVBUF);
1978 	txr->tx_buf = NULL;
1979 }
1980 
1981 static void
1982 ix_init_tx_ring(struct ix_tx_ring *txr)
1983 {
1984 	/* Clear the old ring contents */
1985 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1986 
1987 	/* Clear TX head write-back buffer */
1988 	*(txr->tx_hdr) = 0;
1989 
1990 	/* Reset indices */
1991 	txr->tx_next_avail = 0;
1992 	txr->tx_next_clean = 0;
1993 	txr->tx_nsegs = 0;
1994 
1995 	/* Set number of descriptors available */
1996 	txr->tx_avail = txr->tx_ndesc;
1997 
1998 	/* Enable this TX ring */
1999 	txr->tx_flags |= IX_TXFLAG_ENABLED;
2000 }
2001 
2002 static void
2003 ix_init_tx_unit(struct ix_softc *sc)
2004 {
2005 	struct ixgbe_hw	*hw = &sc->hw;
2006 	int i;
2007 
2008 	/*
2009 	 * Setup the Base and Length of the Tx Descriptor Ring
2010 	 */
2011 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
2012 		struct ix_tx_ring *txr = &sc->tx_rings[i];
2013 		uint64_t tdba = txr->tx_base_paddr;
2014 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
2015 		uint32_t txctrl;
2016 
2017 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2018 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2019 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2020 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2021 
2022 		/* Setup the HW Tx Head and Tail descriptor pointers */
2023 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2024 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2025 
2026 		/* Disable TX head write-back relax ordering */
2027 		switch (hw->mac.type) {
2028 		case ixgbe_mac_82598EB:
2029 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2030 			break;
2031 		case ixgbe_mac_82599EB:
2032 		case ixgbe_mac_X540:
2033 		default:
2034 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2035 			break;
2036 		}
2037 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2038 		switch (hw->mac.type) {
2039 		case ixgbe_mac_82598EB:
2040 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2041 			break;
2042 		case ixgbe_mac_82599EB:
2043 		case ixgbe_mac_X540:
2044 		default:
2045 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2046 			break;
2047 		}
2048 
2049 		/* Enable TX head write-back */
2050 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2051 		    (uint32_t)(hdr_paddr >> 32));
2052 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2053 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2054 	}
2055 
2056 	if (hw->mac.type != ixgbe_mac_82598EB) {
2057 		uint32_t dmatxctl, rttdcs;
2058 
2059 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2060 		dmatxctl |= IXGBE_DMATXCTL_TE;
2061 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2062 
2063 		/* Disable arbiter to set MTQC */
2064 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2065 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2066 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2067 
2068 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2069 
2070 		/* Reenable aribter */
2071 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2072 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2073 	}
2074 }
2075 
2076 static int
2077 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2078     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2079 {
2080 	struct ixgbe_adv_tx_context_desc *TXD;
2081 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2082 	int ehdrlen, ip_hlen = 0, ctxd;
2083 	boolean_t offload = TRUE;
2084 
2085 	/* First check if TSO is to be used */
2086 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2087 		return ix_tso_ctx_setup(txr, mp,
2088 		    cmd_type_len, olinfo_status);
2089 	}
2090 
2091 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2092 		offload = FALSE;
2093 
2094 	/* Indicate the whole packet as payload when not doing TSO */
2095 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2096 
2097 	/*
2098 	 * In advanced descriptors the vlan tag must be placed into the
2099 	 * context descriptor.  Hence we need to make one even if not
2100 	 * doing checksum offloads.
2101 	 */
2102 	if (mp->m_flags & M_VLANTAG) {
2103 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2104 		    IXGBE_ADVTXD_VLAN_SHIFT;
2105 	} else if (!offload) {
2106 		/* No TX descriptor is consumed */
2107 		return 0;
2108 	}
2109 
2110 	/* Set the ether header length */
2111 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2112 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2113 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2114 
2115 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2116 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2117 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2118 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2119 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2120 	}
2121 	vlan_macip_lens |= ip_hlen;
2122 
2123 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2124 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2125 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2126 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2127 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2128 
2129 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2130 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2131 
2132 	/* Now ready a context descriptor */
2133 	ctxd = txr->tx_next_avail;
2134 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2135 
2136 	/* Now copy bits into descriptor */
2137 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2138 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2139 	TXD->seqnum_seed = htole32(0);
2140 	TXD->mss_l4len_idx = htole32(0);
2141 
2142 	/* We've consumed the first desc, adjust counters */
2143 	if (++ctxd == txr->tx_ndesc)
2144 		ctxd = 0;
2145 	txr->tx_next_avail = ctxd;
2146 	--txr->tx_avail;
2147 
2148 	/* One TX descriptor is consumed */
2149 	return 1;
2150 }
2151 
2152 static int
2153 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2154     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2155 {
2156 	struct ixgbe_adv_tx_context_desc *TXD;
2157 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2158 	uint32_t mss_l4len_idx = 0, paylen;
2159 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2160 
2161 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2162 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2163 
2164 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2165 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2166 
2167 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2168 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2169 
2170 	ctxd = txr->tx_next_avail;
2171 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2172 
2173 	if (mp->m_flags & M_VLANTAG) {
2174 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2175 		    IXGBE_ADVTXD_VLAN_SHIFT;
2176 	}
2177 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2178 	vlan_macip_lens |= ip_hlen;
2179 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2180 
2181 	/* ADV DTYPE TUCMD */
2182 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2183 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2184 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2185 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2186 
2187 	/* MSS L4LEN IDX */
2188 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2189 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2190 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2191 
2192 	TXD->seqnum_seed = htole32(0);
2193 
2194 	if (++ctxd == txr->tx_ndesc)
2195 		ctxd = 0;
2196 
2197 	txr->tx_avail--;
2198 	txr->tx_next_avail = ctxd;
2199 
2200 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2201 
2202 	/* This is used in the transmit desc in encap */
2203 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2204 
2205 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2206 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2207 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2208 
2209 	/* One TX descriptor is consumed */
2210 	return 1;
2211 }
2212 
2213 static void
2214 ix_txeof(struct ix_tx_ring *txr, int hdr)
2215 {
2216 	int first, avail;
2217 
2218 	if (txr->tx_avail == txr->tx_ndesc)
2219 		return;
2220 
2221 	first = txr->tx_next_clean;
2222 	if (first == hdr)
2223 		return;
2224 
2225 	avail = txr->tx_avail;
2226 	while (first != hdr) {
2227 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2228 
2229 		++avail;
2230 		if (txbuf->m_head) {
2231 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2232 			m_freem(txbuf->m_head);
2233 			txbuf->m_head = NULL;
2234 		}
2235 		if (++first == txr->tx_ndesc)
2236 			first = 0;
2237 	}
2238 	txr->tx_next_clean = first;
2239 	txr->tx_avail = avail;
2240 
2241 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2242 		ifsq_clr_oactive(txr->tx_ifsq);
2243 		txr->tx_watchdog.wd_timer = 0;
2244 	}
2245 }
2246 
2247 static int
2248 ix_create_rx_ring(struct ix_rx_ring *rxr)
2249 {
2250 	int i, rsize, error, nrxd;
2251 
2252 	/*
2253 	 * Validate number of receive descriptors.  It must not exceed
2254 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2255 	 */
2256 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2257 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2258 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2259 		device_printf(rxr->rx_sc->dev,
2260 		    "Using %d RX descriptors instead of %d!\n",
2261 		    IX_DEF_RXD, nrxd);
2262 		rxr->rx_ndesc = IX_DEF_RXD;
2263 	} else {
2264 		rxr->rx_ndesc = nrxd;
2265 	}
2266 
2267 	/*
2268 	 * Allocate RX descriptor ring
2269 	 */
2270 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2271 	    IX_DBA_ALIGN);
2272 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2273 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2274 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2275 	if (rxr->rx_base == NULL) {
2276 		device_printf(rxr->rx_sc->dev,
2277 		    "Unable to allocate TX Descriptor memory\n");
2278 		return ENOMEM;
2279 	}
2280 
2281 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2282 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2283 
2284 	/*
2285 	 * Create DMA tag for RX buffers
2286 	 */
2287 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2288 	    1, 0,		/* alignment, bounds */
2289 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2290 	    BUS_SPACE_MAXADDR,	/* highaddr */
2291 	    NULL, NULL,		/* filter, filterarg */
2292 	    PAGE_SIZE,		/* maxsize */
2293 	    1,			/* nsegments */
2294 	    PAGE_SIZE,		/* maxsegsize */
2295 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2296 	    &rxr->rx_tag);
2297 	if (error) {
2298 		device_printf(rxr->rx_sc->dev,
2299 		    "Unable to create RX DMA tag\n");
2300 		kfree(rxr->rx_buf, M_DEVBUF);
2301 		rxr->rx_buf = NULL;
2302 		return error;
2303 	}
2304 
2305 	/*
2306 	 * Create spare DMA map for RX buffers
2307 	 */
2308 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2309 	    &rxr->rx_sparemap);
2310 	if (error) {
2311 		device_printf(rxr->rx_sc->dev,
2312 		    "Unable to create spare RX DMA map\n");
2313 		bus_dma_tag_destroy(rxr->rx_tag);
2314 		kfree(rxr->rx_buf, M_DEVBUF);
2315 		rxr->rx_buf = NULL;
2316 		return error;
2317 	}
2318 
2319 	/*
2320 	 * Create DMA maps for RX buffers
2321 	 */
2322 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2323 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2324 
2325 		error = bus_dmamap_create(rxr->rx_tag,
2326 		    BUS_DMA_WAITOK, &rxbuf->map);
2327 		if (error) {
2328 			device_printf(rxr->rx_sc->dev,
2329 			    "Unable to create RX dma map\n");
2330 			ix_destroy_rx_ring(rxr, i);
2331 			return error;
2332 		}
2333 	}
2334 
2335 	/*
2336 	 * Initialize various watermark
2337 	 */
2338 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2339 
2340 	return 0;
2341 }
2342 
2343 static void
2344 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2345 {
2346 	int i;
2347 
2348 	if (rxr->rx_base != NULL) {
2349 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2350 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2351 		    rxr->rx_base_map);
2352 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2353 		rxr->rx_base = NULL;
2354 	}
2355 
2356 	if (rxr->rx_buf == NULL)
2357 		return;
2358 
2359 	for (i = 0; i < ndesc; ++i) {
2360 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2361 
2362 		KKASSERT(rxbuf->m_head == NULL);
2363 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2364 	}
2365 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2366 	bus_dma_tag_destroy(rxr->rx_tag);
2367 
2368 	kfree(rxr->rx_buf, M_DEVBUF);
2369 	rxr->rx_buf = NULL;
2370 }
2371 
2372 /*
2373 ** Used to detect a descriptor that has
2374 ** been merged by Hardware RSC.
2375 */
2376 static __inline uint32_t
2377 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2378 {
2379 	return (le32toh(rx->wb.lower.lo_dword.data) &
2380 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2381 }
2382 
2383 #if 0
2384 /*********************************************************************
2385  *
2386  *  Initialize Hardware RSC (LRO) feature on 82599
2387  *  for an RX ring, this is toggled by the LRO capability
2388  *  even though it is transparent to the stack.
2389  *
2390  *  NOTE: since this HW feature only works with IPV4 and
2391  *        our testing has shown soft LRO to be as effective
2392  *        I have decided to disable this by default.
2393  *
2394  **********************************************************************/
2395 static void
2396 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2397 {
2398 	struct	ix_softc 	*sc = rxr->rx_sc;
2399 	struct	ixgbe_hw	*hw = &sc->hw;
2400 	uint32_t			rscctrl, rdrxctl;
2401 
2402 #if 0
2403 	/* If turning LRO/RSC off we need to disable it */
2404 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2405 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2406 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2407 		return;
2408 	}
2409 #endif
2410 
2411 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2412 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2413 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2414 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2415 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2416 
2417 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2418 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2419 	/*
2420 	** Limit the total number of descriptors that
2421 	** can be combined, so it does not exceed 64K
2422 	*/
2423 	if (rxr->mbuf_sz == MCLBYTES)
2424 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2425 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2426 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2427 	else if (rxr->mbuf_sz == MJUM9BYTES)
2428 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2429 	else  /* Using 16K cluster */
2430 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2431 
2432 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2433 
2434 	/* Enable TCP header recognition */
2435 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2436 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2437 	    IXGBE_PSRTYPE_TCPHDR));
2438 
2439 	/* Disable RSC for ACK packets */
2440 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2441 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2442 
2443 	rxr->hw_rsc = TRUE;
2444 }
2445 #endif
2446 
2447 static int
2448 ix_init_rx_ring(struct ix_rx_ring *rxr)
2449 {
2450 	int i;
2451 
2452 	/* Clear the ring contents */
2453 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2454 
2455 	/* XXX we need JUMPAGESIZE for RSC too */
2456 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2457 		rxr->rx_mbuf_sz = MCLBYTES;
2458 	else
2459 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2460 
2461 	/* Now replenish the mbufs */
2462 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2463 		int error;
2464 
2465 		error = ix_newbuf(rxr, i, TRUE);
2466 		if (error)
2467 			return error;
2468 	}
2469 
2470 	/* Setup our descriptor indices */
2471 	rxr->rx_next_check = 0;
2472 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2473 
2474 #if 0
2475 	/*
2476 	** Now set up the LRO interface:
2477 	*/
2478 	if (ixgbe_rsc_enable)
2479 		ix_setup_hw_rsc(rxr);
2480 #endif
2481 
2482 	return 0;
2483 }
2484 
2485 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2486 
2487 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2488 
2489 static void
2490 ix_init_rx_unit(struct ix_softc *sc)
2491 {
2492 	struct ixgbe_hw	*hw = &sc->hw;
2493 	struct ifnet *ifp = &sc->arpcom.ac_if;
2494 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2495 	int i;
2496 
2497 	/*
2498 	 * Make sure receives are disabled while setting up the descriptor ring
2499 	 */
2500 	ixgbe_disable_rx(hw);
2501 
2502 	/* Enable broadcasts */
2503 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2504 	fctrl |= IXGBE_FCTRL_BAM;
2505 	if (hw->mac.type == ixgbe_mac_82598EB) {
2506 		fctrl |= IXGBE_FCTRL_DPF;
2507 		fctrl |= IXGBE_FCTRL_PMCF;
2508 	}
2509 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2510 
2511 	/* Set for Jumbo Frames? */
2512 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2513 	if (ifp->if_mtu > ETHERMTU)
2514 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2515 	else
2516 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2517 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2518 
2519 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2520 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2521 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2522 
2523 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2524 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2525 		uint64_t rdba = rxr->rx_base_paddr;
2526 		uint32_t srrctl;
2527 
2528 		/* Setup the Base and Length of the Rx Descriptor Ring */
2529 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2530 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2531 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2532 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2533 
2534 		/*
2535 		 * Set up the SRRCTL register
2536 		 */
2537 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2538 
2539 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2540 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2541 		srrctl |= bufsz;
2542 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2543 		if (sc->rx_ring_inuse > 1) {
2544 			/* See the commend near ix_enable_rx_drop() */
2545 			if (sc->ifm_media &
2546 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2547 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2548 				if (i == 0 && bootverbose) {
2549 					if_printf(ifp, "flow control %s, "
2550 					    "disable RX drop\n",
2551 					    ix_ifmedia2str(sc->ifm_media));
2552 				}
2553 			} else {
2554 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2555 				if (i == 0 && bootverbose) {
2556 					if_printf(ifp, "flow control %s, "
2557 					    "enable RX drop\n",
2558 					    ix_ifmedia2str(sc->ifm_media));
2559 				}
2560 			}
2561 		}
2562 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2563 
2564 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2565 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2566 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2567 	}
2568 
2569 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2570 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2571 
2572 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2573 
2574 	/*
2575 	 * Setup RSS
2576 	 */
2577 	if (IX_ENABLE_HWRSS(sc)) {
2578 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2579 		int j, r, nreta;
2580 
2581 		/*
2582 		 * NOTE:
2583 		 * When we reach here, RSS has already been disabled
2584 		 * in ix_stop(), so we could safely configure RSS key
2585 		 * and redirect table.
2586 		 */
2587 
2588 		/*
2589 		 * Configure RSS key
2590 		 */
2591 		toeplitz_get_key(key, sizeof(key));
2592 		for (i = 0; i < IX_NRSSRK; ++i) {
2593 			uint32_t rssrk;
2594 
2595 			rssrk = IX_RSSRK_VAL(key, i);
2596 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2597 			    i, rssrk);
2598 
2599 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2600 		}
2601 
2602 		/* Table size will differ based on MAC */
2603 		switch (hw->mac.type) {
2604 		case ixgbe_mac_X550:
2605 		case ixgbe_mac_X550EM_x:
2606 		case ixgbe_mac_X550EM_a:
2607 			nreta = IX_NRETA_X550;
2608 			break;
2609 		default:
2610 			nreta = IX_NRETA;
2611 			break;
2612 		}
2613 
2614 		/*
2615 		 * Configure RSS redirect table in following fashion:
2616 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2617 		 */
2618 		r = 0;
2619 		for (j = 0; j < nreta; ++j) {
2620 			uint32_t reta = 0;
2621 
2622 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2623 				uint32_t q;
2624 
2625 				q = r % sc->rx_ring_inuse;
2626 				reta |= q << (8 * i);
2627 				++r;
2628 			}
2629 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2630 			if (j < IX_NRETA) {
2631 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2632 			} else {
2633 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2634 				    reta);
2635 			}
2636 		}
2637 
2638 		/*
2639 		 * Enable multiple receive queues.
2640 		 * Enable IPv4 RSS standard hash functions.
2641 		 */
2642 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2643 		    IXGBE_MRQC_RSSEN |
2644 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2645 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2646 
2647 		/*
2648 		 * NOTE:
2649 		 * PCSD must be enabled to enable multiple
2650 		 * receive queues.
2651 		 */
2652 		rxcsum |= IXGBE_RXCSUM_PCSD;
2653 	}
2654 
2655 	if (ifp->if_capenable & IFCAP_RXCSUM)
2656 		rxcsum |= IXGBE_RXCSUM_PCSD;
2657 
2658 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2659 }
2660 
2661 static __inline void
2662 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2663 {
2664 	if (--i < 0)
2665 		i = rxr->rx_ndesc - 1;
2666 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2667 }
2668 
2669 static __inline void
2670 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2671 {
2672 	if ((ptype &
2673 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2674 		/* Not IPv4 */
2675 		return;
2676 	}
2677 
2678 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2679 	    IXGBE_RXD_STAT_IPCS)
2680 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2681 
2682 	if ((ptype &
2683 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2684 		/*
2685 		 * - Neither TCP nor UDP
2686 		 * - IPv4 fragment
2687 		 */
2688 		return;
2689 	}
2690 
2691 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2692 	    IXGBE_RXD_STAT_L4CS) {
2693 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2694 		    CSUM_FRAG_NOT_CHECKED;
2695 		mp->m_pkthdr.csum_data = htons(0xffff);
2696 	}
2697 }
2698 
2699 static __inline struct pktinfo *
2700 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2701     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2702 {
2703 	switch (hashtype) {
2704 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2705 		pi->pi_netisr = NETISR_IP;
2706 		pi->pi_flags = 0;
2707 		pi->pi_l3proto = IPPROTO_TCP;
2708 		break;
2709 
2710 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2711 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2712 			/* Not UDP or is fragment */
2713 			return NULL;
2714 		}
2715 		pi->pi_netisr = NETISR_IP;
2716 		pi->pi_flags = 0;
2717 		pi->pi_l3proto = IPPROTO_UDP;
2718 		break;
2719 
2720 	default:
2721 		return NULL;
2722 	}
2723 
2724 	m_sethash(m, toeplitz_hash(hash));
2725 	return pi;
2726 }
2727 
2728 static __inline void
2729 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2730 {
2731 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2732 	rxd->wb.upper.status_error = 0;
2733 }
2734 
2735 static void
2736 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2737 {
2738 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2739 
2740 	/*
2741 	 * XXX discard may not be correct
2742 	 */
2743 	if (eop) {
2744 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2745 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2746 	} else {
2747 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2748 	}
2749 	if (rxbuf->fmp != NULL) {
2750 		m_freem(rxbuf->fmp);
2751 		rxbuf->fmp = NULL;
2752 		rxbuf->lmp = NULL;
2753 	}
2754 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2755 }
2756 
2757 static void
2758 ix_rxeof(struct ix_rx_ring *rxr, int count)
2759 {
2760 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2761 	int i, nsegs = 0, cpuid = mycpuid;
2762 
2763 	i = rxr->rx_next_check;
2764 	while (count != 0) {
2765 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2766 		union ixgbe_adv_rx_desc	*cur;
2767 		struct mbuf *sendmp = NULL, *mp;
2768 		struct pktinfo *pi = NULL, pi0;
2769 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2770 		uint16_t len;
2771 		boolean_t eop;
2772 
2773 		cur = &rxr->rx_base[i];
2774 		staterr = le32toh(cur->wb.upper.status_error);
2775 
2776 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2777 			break;
2778 		++nsegs;
2779 
2780 		rxbuf = &rxr->rx_buf[i];
2781 		mp = rxbuf->m_head;
2782 
2783 		len = le16toh(cur->wb.upper.length);
2784 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2785 		    IXGBE_RXDADV_PKTTYPE_MASK;
2786 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2787 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2788 		    IXGBE_RXDADV_RSSTYPE_MASK;
2789 
2790 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2791 		if (eop)
2792 			--count;
2793 
2794 		/*
2795 		 * Make sure bad packets are discarded
2796 		 */
2797 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2798 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2799 			ix_rx_discard(rxr, i, eop);
2800 			goto next_desc;
2801 		}
2802 
2803 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2804 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2805 			ix_rx_discard(rxr, i, eop);
2806 			goto next_desc;
2807 		}
2808 
2809 		/*
2810 		 * On 82599 which supports a hardware LRO, packets
2811 		 * need not be fragmented across sequential descriptors,
2812 		 * rather the next descriptor is indicated in bits
2813 		 * of the descriptor.  This also means that we might
2814 		 * proceses more than one packet at a time, something
2815 		 * that has never been true before, it required
2816 		 * eliminating global chain pointers in favor of what
2817 		 * we are doing here.
2818 		 */
2819 		if (!eop) {
2820 			int nextp;
2821 
2822 			/*
2823 			 * Figure out the next descriptor
2824 			 * of this frame.
2825 			 */
2826 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2827 				rsc = ix_rsc_count(cur);
2828 			if (rsc) { /* Get hardware index */
2829 				nextp = ((staterr &
2830 				    IXGBE_RXDADV_NEXTP_MASK) >>
2831 				    IXGBE_RXDADV_NEXTP_SHIFT);
2832 			} else { /* Just sequential */
2833 				nextp = i + 1;
2834 				if (nextp == rxr->rx_ndesc)
2835 					nextp = 0;
2836 			}
2837 			nbuf = &rxr->rx_buf[nextp];
2838 			prefetch(nbuf);
2839 		}
2840 		mp->m_len = len;
2841 
2842 		/*
2843 		 * Rather than using the fmp/lmp global pointers
2844 		 * we now keep the head of a packet chain in the
2845 		 * buffer struct and pass this along from one
2846 		 * descriptor to the next, until we get EOP.
2847 		 */
2848 		if (rxbuf->fmp == NULL) {
2849 			mp->m_pkthdr.len = len;
2850 			rxbuf->fmp = mp;
2851 			rxbuf->lmp = mp;
2852 		} else {
2853 			rxbuf->fmp->m_pkthdr.len += len;
2854 			rxbuf->lmp->m_next = mp;
2855 			rxbuf->lmp = mp;
2856 		}
2857 
2858 		if (nbuf != NULL) {
2859 			/*
2860 			 * Not the last fragment of this frame,
2861 			 * pass this fragment list on
2862 			 */
2863 			nbuf->fmp = rxbuf->fmp;
2864 			nbuf->lmp = rxbuf->lmp;
2865 		} else {
2866 			/*
2867 			 * Send this frame
2868 			 */
2869 			sendmp = rxbuf->fmp;
2870 
2871 			sendmp->m_pkthdr.rcvif = ifp;
2872 			IFNET_STAT_INC(ifp, ipackets, 1);
2873 #ifdef IX_RSS_DEBUG
2874 			rxr->rx_pkts++;
2875 #endif
2876 
2877 			/* Process vlan info */
2878 			if (staterr & IXGBE_RXD_STAT_VP) {
2879 				sendmp->m_pkthdr.ether_vlantag =
2880 				    le16toh(cur->wb.upper.vlan);
2881 				sendmp->m_flags |= M_VLANTAG;
2882 			}
2883 			if (ifp->if_capenable & IFCAP_RXCSUM)
2884 				ix_rxcsum(staterr, sendmp, ptype);
2885 			if (ifp->if_capenable & IFCAP_RSS) {
2886 				pi = ix_rssinfo(sendmp, &pi0,
2887 				    hash, hashtype, ptype);
2888 			}
2889 		}
2890 		rxbuf->fmp = NULL;
2891 		rxbuf->lmp = NULL;
2892 next_desc:
2893 		/* Advance our pointers to the next descriptor. */
2894 		if (++i == rxr->rx_ndesc)
2895 			i = 0;
2896 
2897 		if (sendmp != NULL)
2898 			ifp->if_input(ifp, sendmp, pi, cpuid);
2899 
2900 		if (nsegs >= rxr->rx_wreg_nsegs) {
2901 			ix_rx_refresh(rxr, i);
2902 			nsegs = 0;
2903 		}
2904 	}
2905 	rxr->rx_next_check = i;
2906 
2907 	if (nsegs > 0)
2908 		ix_rx_refresh(rxr, i);
2909 }
2910 
2911 static void
2912 ix_set_vlan(struct ix_softc *sc)
2913 {
2914 	struct ixgbe_hw *hw = &sc->hw;
2915 	uint32_t ctrl;
2916 
2917 	if (hw->mac.type == ixgbe_mac_82598EB) {
2918 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2919 		ctrl |= IXGBE_VLNCTRL_VME;
2920 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2921 	} else {
2922 		int i;
2923 
2924 		/*
2925 		 * On 82599 and later chips the VLAN enable is
2926 		 * per queue in RXDCTL
2927 		 */
2928 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2929 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2930 			ctrl |= IXGBE_RXDCTL_VME;
2931 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2932 		}
2933 	}
2934 }
2935 
2936 static void
2937 ix_enable_intr(struct ix_softc *sc)
2938 {
2939 	struct ixgbe_hw	*hw = &sc->hw;
2940 	uint32_t fwsm;
2941 	int i;
2942 
2943 	for (i = 0; i < sc->intr_cnt; ++i)
2944 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2945 
2946 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2947 
2948 	/* Enable Fan Failure detection */
2949 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2950 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2951 
2952 	switch (hw->mac.type) {
2953 	case ixgbe_mac_82599EB:
2954 		sc->intr_mask |= IXGBE_EIMS_ECC;
2955 		/* Temperature sensor on some adapters */
2956 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2957 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
2958 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2959 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2960 		break;
2961 
2962 	case ixgbe_mac_X540:
2963 		sc->intr_mask |= IXGBE_EIMS_ECC;
2964 		/* Detect if Thermal Sensor is enabled */
2965 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2966 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2967 			sc->intr_mask |= IXGBE_EIMS_TS;
2968 		break;
2969 
2970 	case ixgbe_mac_X550:
2971 	case ixgbe_mac_X550EM_a:
2972 	case ixgbe_mac_X550EM_x:
2973 		sc->intr_mask |= IXGBE_EIMS_ECC;
2974 		/* MAC thermal sensor is automatically enabled */
2975 		sc->intr_mask |= IXGBE_EIMS_TS;
2976 		/* Some devices use SDP0 for important information */
2977 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2978 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2979 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2980 		/* FALL THROUGH */
2981 	default:
2982 		break;
2983 	}
2984 
2985 	/* With MSI-X we use auto clear for RX and TX rings */
2986 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2987 		/*
2988 		 * There are no EIAC1/EIAC2 for newer chips; the related
2989 		 * bits for TX and RX rings > 16 are always auto clear.
2990 		 *
2991 		 * XXX which bits?  There are _no_ documented EICR1 and
2992 		 * EICR2 at all; only EICR.
2993 		 */
2994 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2995 	} else {
2996 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2997 
2998 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2999 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3000 			sc->intr_mask |= IX_RX1_INTR_MASK;
3001 	}
3002 
3003 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3004 
3005 	/*
3006 	 * Enable RX and TX rings for MSI-X
3007 	 */
3008 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3009 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3010 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3011 
3012 			if (txr->tx_intr_vec >= 0) {
3013 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3014 				    txr->tx_eims_val);
3015 			}
3016 		}
3017 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3018 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3019 
3020 			KKASSERT(rxr->rx_intr_vec >= 0);
3021 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3022 		}
3023 	}
3024 
3025 	IXGBE_WRITE_FLUSH(hw);
3026 }
3027 
3028 static void
3029 ix_disable_intr(struct ix_softc *sc)
3030 {
3031 	int i;
3032 
3033 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3034 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3035 
3036 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3037 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3038 	} else {
3039 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3040 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3041 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3042 	}
3043 	IXGBE_WRITE_FLUSH(&sc->hw);
3044 
3045 	for (i = 0; i < sc->intr_cnt; ++i)
3046 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3047 }
3048 
3049 uint16_t
3050 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3051 {
3052 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3053 	    reg, 2);
3054 }
3055 
3056 void
3057 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3058 {
3059 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3060 	    reg, value, 2);
3061 }
3062 
3063 static void
3064 ix_slot_info(struct ix_softc *sc)
3065 {
3066 	struct ixgbe_hw *hw = &sc->hw;
3067 	device_t dev = sc->dev;
3068 	struct ixgbe_mac_info *mac = &hw->mac;
3069 	uint16_t link;
3070 	uint32_t offset;
3071 
3072 	/* For most devices simply call the shared code routine */
3073 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3074 		ixgbe_get_bus_info(hw);
3075 		/* These devices don't use PCI-E */
3076 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3077 		    hw->mac.type == ixgbe_mac_X550EM_a)
3078 			return;
3079 		goto display;
3080 	}
3081 
3082 	/*
3083 	 * For the Quad port adapter we need to parse back
3084 	 * up the PCI tree to find the speed of the expansion
3085 	 * slot into which this adapter is plugged. A bit more work.
3086 	 */
3087 	dev = device_get_parent(device_get_parent(dev));
3088 #ifdef IXGBE_DEBUG
3089 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3090 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3091 #endif
3092 	dev = device_get_parent(device_get_parent(dev));
3093 #ifdef IXGBE_DEBUG
3094 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3095 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3096 #endif
3097 	/* Now get the PCI Express Capabilities offset */
3098 	offset = pci_get_pciecap_ptr(dev);
3099 	/* ...and read the Link Status Register */
3100 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3101 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3102 	case IXGBE_PCI_LINK_WIDTH_1:
3103 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3104 		break;
3105 	case IXGBE_PCI_LINK_WIDTH_2:
3106 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3107 		break;
3108 	case IXGBE_PCI_LINK_WIDTH_4:
3109 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3110 		break;
3111 	case IXGBE_PCI_LINK_WIDTH_8:
3112 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3113 		break;
3114 	default:
3115 		hw->bus.width = ixgbe_bus_width_unknown;
3116 		break;
3117 	}
3118 
3119 	switch (link & IXGBE_PCI_LINK_SPEED) {
3120 	case IXGBE_PCI_LINK_SPEED_2500:
3121 		hw->bus.speed = ixgbe_bus_speed_2500;
3122 		break;
3123 	case IXGBE_PCI_LINK_SPEED_5000:
3124 		hw->bus.speed = ixgbe_bus_speed_5000;
3125 		break;
3126 	case IXGBE_PCI_LINK_SPEED_8000:
3127 		hw->bus.speed = ixgbe_bus_speed_8000;
3128 		break;
3129 	default:
3130 		hw->bus.speed = ixgbe_bus_speed_unknown;
3131 		break;
3132 	}
3133 
3134 	mac->ops.set_lan_id(hw);
3135 
3136 display:
3137 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3138 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3139 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3140 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3141 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3142 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3143 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3144 
3145 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3146 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3147 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3148 		device_printf(dev, "For optimal performance a x8 "
3149 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3150 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3151 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3152 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3153 		device_printf(dev, "For optimal performance a x8 "
3154 		    "PCIE Gen3 slot is required.\n");
3155 	}
3156 }
3157 
3158 /*
3159  * TODO comment is incorrect
3160  *
3161  * Setup the correct IVAR register for a particular MSIX interrupt
3162  * - entry is the register array entry
3163  * - vector is the MSIX vector for this queue
3164  * - type is RX/TX/MISC
3165  */
3166 static void
3167 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3168     int8_t type)
3169 {
3170 	struct ixgbe_hw *hw = &sc->hw;
3171 	uint32_t ivar, index;
3172 
3173 	vector |= IXGBE_IVAR_ALLOC_VAL;
3174 
3175 	switch (hw->mac.type) {
3176 	case ixgbe_mac_82598EB:
3177 		if (type == -1)
3178 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3179 		else
3180 			entry += (type * 64);
3181 		index = (entry >> 2) & 0x1F;
3182 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3183 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3184 		ivar |= (vector << (8 * (entry & 0x3)));
3185 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3186 		break;
3187 
3188 	case ixgbe_mac_82599EB:
3189 	case ixgbe_mac_X540:
3190 	case ixgbe_mac_X550:
3191 	case ixgbe_mac_X550EM_a:
3192 	case ixgbe_mac_X550EM_x:
3193 		if (type == -1) { /* MISC IVAR */
3194 			index = (entry & 1) * 8;
3195 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3196 			ivar &= ~(0xFF << index);
3197 			ivar |= (vector << index);
3198 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3199 		} else {	/* RX/TX IVARS */
3200 			index = (16 * (entry & 1)) + (8 * type);
3201 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3202 			ivar &= ~(0xFF << index);
3203 			ivar |= (vector << index);
3204 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3205 		}
3206 		/* FALL THROUGH */
3207 	default:
3208 		break;
3209 	}
3210 }
3211 
3212 static boolean_t
3213 ix_sfp_probe(struct ix_softc *sc)
3214 {
3215 	struct ixgbe_hw	*hw = &sc->hw;
3216 
3217 	if (hw->phy.type == ixgbe_phy_nl &&
3218 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3219 		int32_t ret;
3220 
3221 		ret = hw->phy.ops.identify_sfp(hw);
3222 		if (ret)
3223 			return FALSE;
3224 
3225 		ret = hw->phy.ops.reset(hw);
3226 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3227 			if_printf(&sc->arpcom.ac_if,
3228 			     "Unsupported SFP+ module detected!  "
3229 			     "Reload driver with supported module.\n");
3230 			sc->sfp_probe = FALSE;
3231 			return FALSE;
3232 		}
3233 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3234 
3235 		/* We now have supported optics */
3236 		sc->sfp_probe = FALSE;
3237 
3238 		return TRUE;
3239 	}
3240 	return FALSE;
3241 }
3242 
3243 static void
3244 ix_handle_link(struct ix_softc *sc)
3245 {
3246 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3247 	ix_update_link_status(sc);
3248 }
3249 
3250 /*
3251  * Handling SFP module
3252  */
3253 static void
3254 ix_handle_mod(struct ix_softc *sc)
3255 {
3256 	struct ixgbe_hw *hw = &sc->hw;
3257 	uint32_t err;
3258 
3259 	err = hw->phy.ops.identify_sfp(hw);
3260 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3261 		if_printf(&sc->arpcom.ac_if,
3262 		    "Unsupported SFP+ module type was detected.\n");
3263 		return;
3264 	}
3265 	err = hw->mac.ops.setup_sfp(hw);
3266 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3267 		if_printf(&sc->arpcom.ac_if,
3268 		    "Setup failure - unsupported SFP+ module type.\n");
3269 		return;
3270 	}
3271 	ix_handle_msf(sc);
3272 }
3273 
3274 /*
3275  * Handling MSF (multispeed fiber)
3276  */
3277 static void
3278 ix_handle_msf(struct ix_softc *sc)
3279 {
3280 	struct ixgbe_hw *hw = &sc->hw;
3281 	uint32_t autoneg;
3282 
3283 	hw->phy.ops.identify_sfp(hw);
3284 	ix_init_media(sc);
3285 
3286 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3287 		autoneg = sc->advspeed;
3288 	else
3289 		autoneg = hw->phy.autoneg_advertised;
3290 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3291 		bool negotiate;
3292 
3293 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3294 	}
3295 	if (hw->mac.ops.setup_link != NULL)
3296 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3297 }
3298 
3299 static void
3300 ix_handle_phy(struct ix_softc *sc)
3301 {
3302 	struct ixgbe_hw *hw = &sc->hw;
3303 	int error;
3304 
3305 	error = hw->phy.ops.handle_lasi(hw);
3306 	if (error == IXGBE_ERR_OVERTEMP) {
3307 		if_printf(&sc->arpcom.ac_if,
3308 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3309 		    "PHY will downshift to lower power state!\n");
3310 	} else if (error) {
3311 		if_printf(&sc->arpcom.ac_if,
3312 		    "Error handling LASI interrupt: %d\n", error);
3313 	}
3314 }
3315 
3316 static void
3317 ix_update_stats(struct ix_softc *sc)
3318 {
3319 	struct ifnet *ifp = &sc->arpcom.ac_if;
3320 	struct ixgbe_hw *hw = &sc->hw;
3321 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3322 	uint64_t total_missed_rx = 0;
3323 	int i;
3324 
3325 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3326 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3327 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3328 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3329 
3330 	for (i = 0; i < 16; i++) {
3331 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3332 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3333 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3334 	}
3335 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3336 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3337 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3338 
3339 	/* Hardware workaround, gprc counts missed packets */
3340 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3341 	sc->stats.gprc -= missed_rx;
3342 
3343 	if (hw->mac.type != ixgbe_mac_82598EB) {
3344 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3345 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3346 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3347 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3348 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3349 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3350 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3351 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3352 	} else {
3353 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3354 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3355 		/* 82598 only has a counter in the high register */
3356 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3357 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3358 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3359 	}
3360 
3361 	/*
3362 	 * Workaround: mprc hardware is incorrectly counting
3363 	 * broadcasts, so for now we subtract those.
3364 	 */
3365 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3366 	sc->stats.bprc += bprc;
3367 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3368 	if (hw->mac.type == ixgbe_mac_82598EB)
3369 		sc->stats.mprc -= bprc;
3370 
3371 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3372 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3373 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3374 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3375 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3376 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3377 
3378 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3379 	sc->stats.lxontxc += lxon;
3380 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3381 	sc->stats.lxofftxc += lxoff;
3382 	total = lxon + lxoff;
3383 
3384 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3385 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3386 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3387 	sc->stats.gptc -= total;
3388 	sc->stats.mptc -= total;
3389 	sc->stats.ptc64 -= total;
3390 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3391 
3392 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3393 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3394 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3395 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3396 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3397 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3398 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3399 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3400 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3401 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3402 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3403 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3404 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3405 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3406 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3407 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3408 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3409 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3410 	/* Only read FCOE on 82599 */
3411 	if (hw->mac.type != ixgbe_mac_82598EB) {
3412 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3413 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3414 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3415 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3416 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3417 	}
3418 
3419 	/* Rx Errors */
3420 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3421 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3422 }
3423 
3424 #if 0
3425 /*
3426  * Add sysctl variables, one per statistic, to the system.
3427  */
3428 static void
3429 ix_add_hw_stats(struct ix_softc *sc)
3430 {
3431 
3432 	device_t dev = sc->dev;
3433 
3434 	struct ix_tx_ring *txr = sc->tx_rings;
3435 	struct ix_rx_ring *rxr = sc->rx_rings;
3436 
3437 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3438 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3439 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3440 	struct ixgbe_hw_stats *stats = &sc->stats;
3441 
3442 	struct sysctl_oid *stat_node, *queue_node;
3443 	struct sysctl_oid_list *stat_list, *queue_list;
3444 
3445 #define QUEUE_NAME_LEN 32
3446 	char namebuf[QUEUE_NAME_LEN];
3447 
3448 	/* MAC stats get the own sub node */
3449 
3450 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3451 				    CTLFLAG_RD, NULL, "MAC Statistics");
3452 	stat_list = SYSCTL_CHILDREN(stat_node);
3453 
3454 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3455 			CTLFLAG_RD, &stats->crcerrs,
3456 			"CRC Errors");
3457 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3458 			CTLFLAG_RD, &stats->illerrc,
3459 			"Illegal Byte Errors");
3460 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3461 			CTLFLAG_RD, &stats->errbc,
3462 			"Byte Errors");
3463 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3464 			CTLFLAG_RD, &stats->mspdc,
3465 			"MAC Short Packets Discarded");
3466 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3467 			CTLFLAG_RD, &stats->mlfc,
3468 			"MAC Local Faults");
3469 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3470 			CTLFLAG_RD, &stats->mrfc,
3471 			"MAC Remote Faults");
3472 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3473 			CTLFLAG_RD, &stats->rlec,
3474 			"Receive Length Errors");
3475 
3476 	/* Flow Control stats */
3477 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3478 			CTLFLAG_RD, &stats->lxontxc,
3479 			"Link XON Transmitted");
3480 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3481 			CTLFLAG_RD, &stats->lxonrxc,
3482 			"Link XON Received");
3483 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3484 			CTLFLAG_RD, &stats->lxofftxc,
3485 			"Link XOFF Transmitted");
3486 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3487 			CTLFLAG_RD, &stats->lxoffrxc,
3488 			"Link XOFF Received");
3489 
3490 	/* Packet Reception Stats */
3491 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3492 			CTLFLAG_RD, &stats->tor,
3493 			"Total Octets Received");
3494 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3495 			CTLFLAG_RD, &stats->gorc,
3496 			"Good Octets Received");
3497 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3498 			CTLFLAG_RD, &stats->tpr,
3499 			"Total Packets Received");
3500 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3501 			CTLFLAG_RD, &stats->gprc,
3502 			"Good Packets Received");
3503 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3504 			CTLFLAG_RD, &stats->mprc,
3505 			"Multicast Packets Received");
3506 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3507 			CTLFLAG_RD, &stats->bprc,
3508 			"Broadcast Packets Received");
3509 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3510 			CTLFLAG_RD, &stats->prc64,
3511 			"64 byte frames received ");
3512 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3513 			CTLFLAG_RD, &stats->prc127,
3514 			"65-127 byte frames received");
3515 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3516 			CTLFLAG_RD, &stats->prc255,
3517 			"128-255 byte frames received");
3518 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3519 			CTLFLAG_RD, &stats->prc511,
3520 			"256-511 byte frames received");
3521 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3522 			CTLFLAG_RD, &stats->prc1023,
3523 			"512-1023 byte frames received");
3524 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3525 			CTLFLAG_RD, &stats->prc1522,
3526 			"1023-1522 byte frames received");
3527 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3528 			CTLFLAG_RD, &stats->ruc,
3529 			"Receive Undersized");
3530 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3531 			CTLFLAG_RD, &stats->rfc,
3532 			"Fragmented Packets Received ");
3533 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3534 			CTLFLAG_RD, &stats->roc,
3535 			"Oversized Packets Received");
3536 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3537 			CTLFLAG_RD, &stats->rjc,
3538 			"Received Jabber");
3539 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3540 			CTLFLAG_RD, &stats->mngprc,
3541 			"Management Packets Received");
3542 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3543 			CTLFLAG_RD, &stats->mngptc,
3544 			"Management Packets Dropped");
3545 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3546 			CTLFLAG_RD, &stats->xec,
3547 			"Checksum Errors");
3548 
3549 	/* Packet Transmission Stats */
3550 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3551 			CTLFLAG_RD, &stats->gotc,
3552 			"Good Octets Transmitted");
3553 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3554 			CTLFLAG_RD, &stats->tpt,
3555 			"Total Packets Transmitted");
3556 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3557 			CTLFLAG_RD, &stats->gptc,
3558 			"Good Packets Transmitted");
3559 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3560 			CTLFLAG_RD, &stats->bptc,
3561 			"Broadcast Packets Transmitted");
3562 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3563 			CTLFLAG_RD, &stats->mptc,
3564 			"Multicast Packets Transmitted");
3565 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3566 			CTLFLAG_RD, &stats->mngptc,
3567 			"Management Packets Transmitted");
3568 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3569 			CTLFLAG_RD, &stats->ptc64,
3570 			"64 byte frames transmitted ");
3571 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3572 			CTLFLAG_RD, &stats->ptc127,
3573 			"65-127 byte frames transmitted");
3574 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3575 			CTLFLAG_RD, &stats->ptc255,
3576 			"128-255 byte frames transmitted");
3577 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3578 			CTLFLAG_RD, &stats->ptc511,
3579 			"256-511 byte frames transmitted");
3580 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3581 			CTLFLAG_RD, &stats->ptc1023,
3582 			"512-1023 byte frames transmitted");
3583 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3584 			CTLFLAG_RD, &stats->ptc1522,
3585 			"1024-1522 byte frames transmitted");
3586 }
3587 #endif
3588 
3589 /*
3590  * Enable the hardware to drop packets when the buffer is full.
3591  * This is useful when multiple RX rings are used, so that no
3592  * single RX ring being full stalls the entire RX engine.  We
3593  * only enable this when multiple RX rings are used and when
3594  * flow control is disabled.
3595  */
3596 static void
3597 ix_enable_rx_drop(struct ix_softc *sc)
3598 {
3599 	struct ixgbe_hw *hw = &sc->hw;
3600 	int i;
3601 
3602 	if (bootverbose) {
3603 		if_printf(&sc->arpcom.ac_if,
3604 		    "flow control %s, enable RX drop\n",
3605 		    ix_fc2str(sc->hw.fc.current_mode));
3606 	}
3607 
3608 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3609 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3610 
3611 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3612 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3613 	}
3614 }
3615 
3616 static void
3617 ix_disable_rx_drop(struct ix_softc *sc)
3618 {
3619 	struct ixgbe_hw *hw = &sc->hw;
3620 	int i;
3621 
3622 	if (bootverbose) {
3623 		if_printf(&sc->arpcom.ac_if,
3624 		    "flow control %s, disable RX drop\n",
3625 		    ix_fc2str(sc->hw.fc.current_mode));
3626 	}
3627 
3628 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3629 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3630 
3631 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3632 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3633 	}
3634 }
3635 
3636 static void
3637 ix_setup_serialize(struct ix_softc *sc)
3638 {
3639 	int i = 0, j;
3640 
3641 	/* Main + RX + TX */
3642 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3643 	sc->serializes =
3644 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3645 	        M_DEVBUF, M_WAITOK | M_ZERO);
3646 
3647 	/*
3648 	 * Setup serializes
3649 	 *
3650 	 * NOTE: Order is critical
3651 	 */
3652 
3653 	KKASSERT(i < sc->nserialize);
3654 	sc->serializes[i++] = &sc->main_serialize;
3655 
3656 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3657 		KKASSERT(i < sc->nserialize);
3658 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3659 	}
3660 
3661 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3662 		KKASSERT(i < sc->nserialize);
3663 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3664 	}
3665 
3666 	KKASSERT(i == sc->nserialize);
3667 }
3668 
3669 static int
3670 ix_alloc_intr(struct ix_softc *sc)
3671 {
3672 	struct ix_intr_data *intr;
3673 	u_int intr_flags;
3674 
3675 	ix_alloc_msix(sc);
3676 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3677 		ix_set_ring_inuse(sc, FALSE);
3678 		return 0;
3679 	}
3680 
3681 	if (sc->intr_data != NULL)
3682 		kfree(sc->intr_data, M_DEVBUF);
3683 
3684 	sc->intr_cnt = 1;
3685 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3686 	    M_WAITOK | M_ZERO);
3687 	intr = &sc->intr_data[0];
3688 
3689 	/*
3690 	 * Allocate MSI/legacy interrupt resource
3691 	 */
3692 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3693 	    &intr->intr_rid, &intr_flags);
3694 
3695 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3696 	    &intr->intr_rid, intr_flags);
3697 	if (intr->intr_res == NULL) {
3698 		device_printf(sc->dev, "Unable to allocate bus resource: "
3699 		    "interrupt\n");
3700 		return ENXIO;
3701 	}
3702 
3703 	intr->intr_serialize = &sc->main_serialize;
3704 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3705 	intr->intr_func = ix_intr;
3706 	intr->intr_funcarg = sc;
3707 	intr->intr_rate = IX_INTR_RATE;
3708 	intr->intr_use = IX_INTR_USE_RXTX;
3709 
3710 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3711 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3712 
3713 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3714 
3715 	ix_set_ring_inuse(sc, FALSE);
3716 
3717 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3718 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3719 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3720 
3721 	return 0;
3722 }
3723 
3724 static void
3725 ix_free_intr(struct ix_softc *sc)
3726 {
3727 	if (sc->intr_data == NULL)
3728 		return;
3729 
3730 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3731 		struct ix_intr_data *intr = &sc->intr_data[0];
3732 
3733 		KKASSERT(sc->intr_cnt == 1);
3734 		if (intr->intr_res != NULL) {
3735 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3736 			    intr->intr_rid, intr->intr_res);
3737 		}
3738 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3739 			pci_release_msi(sc->dev);
3740 
3741 		kfree(sc->intr_data, M_DEVBUF);
3742 	} else {
3743 		ix_free_msix(sc, TRUE);
3744 	}
3745 }
3746 
3747 static void
3748 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3749 {
3750 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3751 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3752 	if (bootverbose) {
3753 		if_printf(&sc->arpcom.ac_if,
3754 		    "RX rings %d/%d, TX rings %d/%d\n",
3755 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3756 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3757 	}
3758 }
3759 
3760 static int
3761 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3762 {
3763 	if (!IX_ENABLE_HWRSS(sc))
3764 		return 1;
3765 
3766 	if (polling)
3767 		return sc->rx_ring_cnt;
3768 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3769 		return IX_MIN_RXRING_RSS;
3770 	else
3771 		return sc->rx_ring_msix;
3772 }
3773 
3774 static int
3775 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3776 {
3777 	if (!IX_ENABLE_HWTSS(sc))
3778 		return 1;
3779 
3780 	if (polling)
3781 		return sc->tx_ring_cnt;
3782 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3783 		return 1;
3784 	else
3785 		return sc->tx_ring_msix;
3786 }
3787 
3788 static int
3789 ix_setup_intr(struct ix_softc *sc)
3790 {
3791 	int i;
3792 
3793 	for (i = 0; i < sc->intr_cnt; ++i) {
3794 		struct ix_intr_data *intr = &sc->intr_data[i];
3795 		int error;
3796 
3797 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3798 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3799 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3800 		if (error) {
3801 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3802 			ix_teardown_intr(sc, i);
3803 			return error;
3804 		}
3805 	}
3806 	return 0;
3807 }
3808 
3809 static void
3810 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3811 {
3812 	int i;
3813 
3814 	if (sc->intr_data == NULL)
3815 		return;
3816 
3817 	for (i = 0; i < intr_cnt; ++i) {
3818 		struct ix_intr_data *intr = &sc->intr_data[i];
3819 
3820 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3821 	}
3822 }
3823 
3824 static void
3825 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3826 {
3827 	struct ix_softc *sc = ifp->if_softc;
3828 
3829 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3830 }
3831 
3832 static void
3833 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3834 {
3835 	struct ix_softc *sc = ifp->if_softc;
3836 
3837 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3838 }
3839 
3840 static int
3841 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3842 {
3843 	struct ix_softc *sc = ifp->if_softc;
3844 
3845 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3846 }
3847 
3848 #ifdef INVARIANTS
3849 
3850 static void
3851 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3852     boolean_t serialized)
3853 {
3854 	struct ix_softc *sc = ifp->if_softc;
3855 
3856 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3857 	    serialized);
3858 }
3859 
3860 #endif	/* INVARIANTS */
3861 
3862 static void
3863 ix_free_rings(struct ix_softc *sc)
3864 {
3865 	int i;
3866 
3867 	if (sc->tx_rings != NULL) {
3868 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3869 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3870 
3871 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3872 		}
3873 		kfree(sc->tx_rings, M_DEVBUF);
3874 	}
3875 
3876 	if (sc->rx_rings != NULL) {
3877 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3878 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3879 
3880 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3881 		}
3882 		kfree(sc->rx_rings, M_DEVBUF);
3883 	}
3884 
3885 	if (sc->parent_tag != NULL)
3886 		bus_dma_tag_destroy(sc->parent_tag);
3887 }
3888 
3889 static void
3890 ix_watchdog(struct ifaltq_subque *ifsq)
3891 {
3892 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3893 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3894 	struct ix_softc *sc = ifp->if_softc;
3895 	int i;
3896 
3897 	KKASSERT(txr->tx_ifsq == ifsq);
3898 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3899 
3900 	/*
3901 	 * If the interface has been paused then don't do the watchdog check
3902 	 */
3903 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3904 		txr->tx_watchdog.wd_timer = 5;
3905 		return;
3906 	}
3907 
3908 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3909 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3910 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3911 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3912 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3913 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3914 
3915 	ix_init(sc);
3916 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3917 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3918 }
3919 
3920 static void
3921 ix_free_tx_ring(struct ix_tx_ring *txr)
3922 {
3923 	int i;
3924 
3925 	for (i = 0; i < txr->tx_ndesc; ++i) {
3926 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3927 
3928 		if (txbuf->m_head != NULL) {
3929 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3930 			m_freem(txbuf->m_head);
3931 			txbuf->m_head = NULL;
3932 		}
3933 	}
3934 }
3935 
3936 static void
3937 ix_free_rx_ring(struct ix_rx_ring *rxr)
3938 {
3939 	int i;
3940 
3941 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3942 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3943 
3944 		if (rxbuf->fmp != NULL) {
3945 			m_freem(rxbuf->fmp);
3946 			rxbuf->fmp = NULL;
3947 			rxbuf->lmp = NULL;
3948 		} else {
3949 			KKASSERT(rxbuf->lmp == NULL);
3950 		}
3951 		if (rxbuf->m_head != NULL) {
3952 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3953 			m_freem(rxbuf->m_head);
3954 			rxbuf->m_head = NULL;
3955 		}
3956 	}
3957 }
3958 
3959 static int
3960 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3961 {
3962 	struct mbuf *m;
3963 	bus_dma_segment_t seg;
3964 	bus_dmamap_t map;
3965 	struct ix_rx_buf *rxbuf;
3966 	int flags, error, nseg;
3967 
3968 	flags = M_NOWAIT;
3969 	if (__predict_false(wait))
3970 		flags = M_WAITOK;
3971 
3972 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3973 	if (m == NULL) {
3974 		if (wait) {
3975 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3976 			    "Unable to allocate RX mbuf\n");
3977 		}
3978 		return ENOBUFS;
3979 	}
3980 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3981 
3982 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3983 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3984 	if (error) {
3985 		m_freem(m);
3986 		if (wait) {
3987 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3988 			    "Unable to load RX mbuf\n");
3989 		}
3990 		return error;
3991 	}
3992 
3993 	rxbuf = &rxr->rx_buf[i];
3994 	if (rxbuf->m_head != NULL)
3995 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3996 
3997 	map = rxbuf->map;
3998 	rxbuf->map = rxr->rx_sparemap;
3999 	rxr->rx_sparemap = map;
4000 
4001 	rxbuf->m_head = m;
4002 	rxbuf->paddr = seg.ds_addr;
4003 
4004 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4005 	return 0;
4006 }
4007 
4008 static void
4009 ix_add_sysctl(struct ix_softc *sc)
4010 {
4011 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4012 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4013 #ifdef IX_RSS_DEBUG
4014 	char node[32];
4015 	int i;
4016 #endif
4017 
4018 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4019 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4020 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4021 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4022 	    "# of RX rings used");
4023 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4024 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4025 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4026 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4027 	    "# of TX rings used");
4028 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4029 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4030 	    sc, 0, ix_sysctl_rxd, "I",
4031 	    "# of RX descs");
4032 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4033 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4034 	    sc, 0, ix_sysctl_txd, "I",
4035 	    "# of TX descs");
4036 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4037 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4038 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4039 	    "# of segments sent before write to hardware register");
4040 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4041 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4042 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4043 	    "# of received segments sent before write to hardware register");
4044 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4045 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4046 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4047 	    "# of segments per TX interrupt");
4048 
4049 #ifdef IFPOLL_ENABLE
4050 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4051 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4052 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4053 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4054 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4055 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4056 #endif
4057 
4058 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4059 do { \
4060 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4061 	    ix_sysctl_##name, #use " interrupt rate"); \
4062 } while (0)
4063 
4064 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4065 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4066 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4067 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4068 
4069 #undef IX_ADD_INTR_RATE_SYSCTL
4070 
4071 #ifdef IX_RSS_DEBUG
4072 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4073 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4074 	    "RSS debug level");
4075 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4076 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4077 		SYSCTL_ADD_ULONG(ctx,
4078 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4079 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4080 	}
4081 #endif
4082 
4083 #if 0
4084 	ix_add_hw_stats(sc);
4085 #endif
4086 
4087 }
4088 
4089 static int
4090 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4091 {
4092 	struct ix_softc *sc = (void *)arg1;
4093 	struct ifnet *ifp = &sc->arpcom.ac_if;
4094 	int error, nsegs, i;
4095 
4096 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4097 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4098 	if (error || req->newptr == NULL)
4099 		return error;
4100 	if (nsegs < 0)
4101 		return EINVAL;
4102 
4103 	ifnet_serialize_all(ifp);
4104 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4105 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4106 	ifnet_deserialize_all(ifp);
4107 
4108 	return 0;
4109 }
4110 
4111 static int
4112 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4113 {
4114 	struct ix_softc *sc = (void *)arg1;
4115 	struct ifnet *ifp = &sc->arpcom.ac_if;
4116 	int error, nsegs, i;
4117 
4118 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4119 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4120 	if (error || req->newptr == NULL)
4121 		return error;
4122 	if (nsegs < 0)
4123 		return EINVAL;
4124 
4125 	ifnet_serialize_all(ifp);
4126 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4127 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4128 	ifnet_deserialize_all(ifp);
4129 
4130 	return 0;
4131 }
4132 
4133 static int
4134 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4135 {
4136 	struct ix_softc *sc = (void *)arg1;
4137 	int txd;
4138 
4139 	txd = sc->tx_rings[0].tx_ndesc;
4140 	return sysctl_handle_int(oidp, &txd, 0, req);
4141 }
4142 
4143 static int
4144 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4145 {
4146 	struct ix_softc *sc = (void *)arg1;
4147 	int rxd;
4148 
4149 	rxd = sc->rx_rings[0].rx_ndesc;
4150 	return sysctl_handle_int(oidp, &rxd, 0, req);
4151 }
4152 
4153 static int
4154 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4155 {
4156 	struct ix_softc *sc = (void *)arg1;
4157 	struct ifnet *ifp = &sc->arpcom.ac_if;
4158 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4159 	int error, nsegs;
4160 
4161 	nsegs = txr->tx_intr_nsegs;
4162 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4163 	if (error || req->newptr == NULL)
4164 		return error;
4165 	if (nsegs < 0)
4166 		return EINVAL;
4167 
4168 	ifnet_serialize_all(ifp);
4169 
4170 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4171 		error = EINVAL;
4172 	} else {
4173 		int i;
4174 
4175 		error = 0;
4176 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4177 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4178 	}
4179 
4180 	ifnet_deserialize_all(ifp);
4181 
4182 	return error;
4183 }
4184 
4185 static void
4186 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4187 {
4188 	uint32_t eitr, eitr_intvl;
4189 
4190 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4191 	eitr_intvl = 1000000000 / 256 / rate;
4192 
4193 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4194 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4195 		if (eitr_intvl == 0)
4196 			eitr_intvl = 1;
4197 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4198 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4199 	} else {
4200 		eitr &= ~IX_EITR_INTVL_MASK;
4201 
4202 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4203 		if (eitr_intvl == 0)
4204 			eitr_intvl = IX_EITR_INTVL_MIN;
4205 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4206 			eitr_intvl = IX_EITR_INTVL_MAX;
4207 	}
4208 	eitr |= eitr_intvl;
4209 
4210 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4211 }
4212 
4213 static int
4214 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4215 {
4216 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4217 }
4218 
4219 static int
4220 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4221 {
4222 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4223 }
4224 
4225 static int
4226 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4227 {
4228 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4229 }
4230 
4231 static int
4232 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4233 {
4234 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4235 }
4236 
4237 static int
4238 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4239 {
4240 	struct ix_softc *sc = (void *)arg1;
4241 	struct ifnet *ifp = &sc->arpcom.ac_if;
4242 	int error, rate, i;
4243 
4244 	rate = 0;
4245 	for (i = 0; i < sc->intr_cnt; ++i) {
4246 		if (sc->intr_data[i].intr_use == use) {
4247 			rate = sc->intr_data[i].intr_rate;
4248 			break;
4249 		}
4250 	}
4251 
4252 	error = sysctl_handle_int(oidp, &rate, 0, req);
4253 	if (error || req->newptr == NULL)
4254 		return error;
4255 	if (rate <= 0)
4256 		return EINVAL;
4257 
4258 	ifnet_serialize_all(ifp);
4259 
4260 	for (i = 0; i < sc->intr_cnt; ++i) {
4261 		if (sc->intr_data[i].intr_use == use) {
4262 			sc->intr_data[i].intr_rate = rate;
4263 			if (ifp->if_flags & IFF_RUNNING)
4264 				ix_set_eitr(sc, i, rate);
4265 		}
4266 	}
4267 
4268 	ifnet_deserialize_all(ifp);
4269 
4270 	return error;
4271 }
4272 
4273 static void
4274 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4275     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4276 {
4277 	int i;
4278 
4279 	for (i = 0; i < sc->intr_cnt; ++i) {
4280 		if (sc->intr_data[i].intr_use == use) {
4281 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4282 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4283 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4284 			    sc, 0, handler, "I", desc);
4285 			break;
4286 		}
4287 	}
4288 }
4289 
4290 static void
4291 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4292 {
4293 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4294 		sc->timer_cpuid = 0; /* XXX fixed */
4295 	else
4296 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4297 }
4298 
4299 static void
4300 ix_alloc_msix(struct ix_softc *sc)
4301 {
4302 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4303 	struct ix_intr_data *intr;
4304 	int i, x, error;
4305 	int offset, offset_def, agg_rxtx, ring_max;
4306 	boolean_t aggregate, setup = FALSE;
4307 
4308 	msix_enable = ix_msix_enable;
4309 	/*
4310 	 * Don't enable MSI-X on 82598 by default, see:
4311 	 * 82598 specification update errata #38
4312 	 */
4313 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4314 		msix_enable = 0;
4315 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4316 	if (!msix_enable)
4317 		return;
4318 
4319 	msix_cnt = pci_msix_count(sc->dev);
4320 #ifdef IX_MSIX_DEBUG
4321 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4322 #endif
4323 	if (msix_cnt <= 1) {
4324 		/* One MSI-X model does not make sense */
4325 		return;
4326 	}
4327 
4328 	i = 0;
4329 	while ((1 << (i + 1)) <= msix_cnt)
4330 		++i;
4331 	msix_cnt2 = 1 << i;
4332 
4333 	if (bootverbose) {
4334 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4335 		    msix_cnt2, msix_cnt);
4336 	}
4337 
4338 	KKASSERT(msix_cnt >= msix_cnt2);
4339 	if (msix_cnt == msix_cnt2) {
4340 		/* We need at least one MSI-X for link status */
4341 		msix_cnt2 >>= 1;
4342 		if (msix_cnt2 <= 1) {
4343 			/* One MSI-X for RX/TX does not make sense */
4344 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4345 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4346 			return;
4347 		}
4348 		KKASSERT(msix_cnt > msix_cnt2);
4349 
4350 		if (bootverbose) {
4351 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4352 			    msix_cnt2, msix_cnt);
4353 		}
4354 	}
4355 
4356 	/*
4357 	 * Make sure that we don't break interrupt related registers
4358 	 * (EIMS, etc) limitation.
4359 	 *
4360 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4361 	 */
4362 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4363 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4364 			msix_cnt2 = IX_MAX_MSIX_82598;
4365 	} else {
4366 		if (msix_cnt2 > IX_MAX_MSIX)
4367 			msix_cnt2 = IX_MAX_MSIX;
4368 	}
4369 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4370 
4371 	if (bootverbose) {
4372 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4373 		    msix_cnt2, msix_cnt);
4374 	}
4375 
4376 	sc->rx_ring_msix = sc->rx_ring_cnt;
4377 	if (sc->rx_ring_msix > msix_cnt2)
4378 		sc->rx_ring_msix = msix_cnt2;
4379 
4380 	sc->tx_ring_msix = sc->tx_ring_cnt;
4381 	if (sc->tx_ring_msix > msix_cnt2)
4382 		sc->tx_ring_msix = msix_cnt2;
4383 
4384 	ring_max = sc->rx_ring_msix;
4385 	if (ring_max < sc->tx_ring_msix)
4386 		ring_max = sc->tx_ring_msix;
4387 
4388 	/* Allow user to force independent RX/TX MSI-X handling */
4389 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4390 	    ix_msix_agg_rxtx);
4391 
4392 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4393 		/*
4394 		 * Independent TX/RX MSI-X
4395 		 */
4396 		aggregate = FALSE;
4397 		if (bootverbose)
4398 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4399 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4400 	} else {
4401 		/*
4402 		 * Aggregate TX/RX MSI-X
4403 		 */
4404 		aggregate = TRUE;
4405 		if (bootverbose)
4406 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4407 		alloc_cnt = msix_cnt2;
4408 		if (alloc_cnt > ring_max)
4409 			alloc_cnt = ring_max;
4410 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4411 		    alloc_cnt >= sc->tx_ring_msix);
4412 	}
4413 	++alloc_cnt;	/* For status */
4414 
4415 	if (bootverbose) {
4416 		device_printf(sc->dev, "MSI-X alloc %d, "
4417 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4418 		    sc->rx_ring_msix, sc->tx_ring_msix);
4419 	}
4420 
4421 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4422 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4423 	    &sc->msix_mem_rid, RF_ACTIVE);
4424 	if (sc->msix_mem_res == NULL) {
4425 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4426 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4427 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4428 		if (sc->msix_mem_res == NULL) {
4429 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4430 			return;
4431 		}
4432 	}
4433 
4434 	sc->intr_cnt = alloc_cnt;
4435 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4436 	    M_DEVBUF, M_WAITOK | M_ZERO);
4437 	for (x = 0; x < sc->intr_cnt; ++x) {
4438 		intr = &sc->intr_data[x];
4439 		intr->intr_rid = -1;
4440 		intr->intr_rate = IX_INTR_RATE;
4441 	}
4442 
4443 	x = 0;
4444 	if (!aggregate) {
4445 		/*
4446 		 * RX rings
4447 		 */
4448 		if (sc->rx_ring_msix == ncpus2) {
4449 			offset = 0;
4450 		} else {
4451 			offset_def = (sc->rx_ring_msix *
4452 			    device_get_unit(sc->dev)) % ncpus2;
4453 
4454 			offset = device_getenv_int(sc->dev,
4455 			    "msix.rxoff", offset_def);
4456 			if (offset >= ncpus2 ||
4457 			    offset % sc->rx_ring_msix != 0) {
4458 				device_printf(sc->dev,
4459 				    "invalid msix.rxoff %d, use %d\n",
4460 				    offset, offset_def);
4461 				offset = offset_def;
4462 			}
4463 		}
4464 		ix_conf_rx_msix(sc, 0, &x, offset);
4465 
4466 		/*
4467 		 * TX rings
4468 		 */
4469 		if (sc->tx_ring_msix == ncpus2) {
4470 			offset = 0;
4471 		} else {
4472 			offset_def = (sc->tx_ring_msix *
4473 			    device_get_unit(sc->dev)) % ncpus2;
4474 
4475 			offset = device_getenv_int(sc->dev,
4476 			    "msix.txoff", offset_def);
4477 			if (offset >= ncpus2 ||
4478 			    offset % sc->tx_ring_msix != 0) {
4479 				device_printf(sc->dev,
4480 				    "invalid msix.txoff %d, use %d\n",
4481 				    offset, offset_def);
4482 				offset = offset_def;
4483 			}
4484 		}
4485 		ix_conf_tx_msix(sc, 0, &x, offset);
4486 	} else {
4487 		int ring_agg;
4488 
4489 		ring_agg = sc->rx_ring_msix;
4490 		if (ring_agg > sc->tx_ring_msix)
4491 			ring_agg = sc->tx_ring_msix;
4492 
4493 		if (ring_max == ncpus2) {
4494 			offset = 0;
4495 		} else {
4496 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4497 			    ncpus2;
4498 
4499 			offset = device_getenv_int(sc->dev, "msix.off",
4500 			    offset_def);
4501 			if (offset >= ncpus2 || offset % ring_max != 0) {
4502 				device_printf(sc->dev,
4503 				    "invalid msix.off %d, use %d\n",
4504 				    offset, offset_def);
4505 				offset = offset_def;
4506 			}
4507 		}
4508 
4509 		for (i = 0; i < ring_agg; ++i) {
4510 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4511 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4512 
4513 			KKASSERT(x < sc->intr_cnt);
4514 			rxr->rx_intr_vec = x;
4515 			ix_setup_msix_eims(sc, x,
4516 			    &rxr->rx_eims, &rxr->rx_eims_val);
4517 			rxr->rx_txr = txr;
4518 			/* NOTE: Leave TX ring's intr_vec negative */
4519 
4520 			intr = &sc->intr_data[x++];
4521 
4522 			intr->intr_serialize = &rxr->rx_serialize;
4523 			intr->intr_func = ix_msix_rxtx;
4524 			intr->intr_funcarg = rxr;
4525 			intr->intr_use = IX_INTR_USE_RXTX;
4526 
4527 			intr->intr_cpuid = i + offset;
4528 			KKASSERT(intr->intr_cpuid < ncpus2);
4529 			txr->tx_intr_cpuid = intr->intr_cpuid;
4530 
4531 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4532 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4533 			intr->intr_desc = intr->intr_desc0;
4534 		}
4535 
4536 		if (ring_agg != ring_max) {
4537 			if (ring_max == sc->tx_ring_msix)
4538 				ix_conf_tx_msix(sc, i, &x, offset);
4539 			else
4540 				ix_conf_rx_msix(sc, i, &x, offset);
4541 		}
4542 	}
4543 
4544 	/*
4545 	 * Status MSI-X
4546 	 */
4547 	KKASSERT(x < sc->intr_cnt);
4548 	sc->sts_msix_vec = x;
4549 
4550 	intr = &sc->intr_data[x++];
4551 
4552 	intr->intr_serialize = &sc->main_serialize;
4553 	intr->intr_func = ix_msix_status;
4554 	intr->intr_funcarg = sc;
4555 	intr->intr_cpuid = 0;
4556 	intr->intr_use = IX_INTR_USE_STATUS;
4557 
4558 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4559 	    device_get_nameunit(sc->dev));
4560 	intr->intr_desc = intr->intr_desc0;
4561 
4562 	KKASSERT(x == sc->intr_cnt);
4563 
4564 	error = pci_setup_msix(sc->dev);
4565 	if (error) {
4566 		device_printf(sc->dev, "Setup MSI-X failed\n");
4567 		goto back;
4568 	}
4569 	setup = TRUE;
4570 
4571 	for (i = 0; i < sc->intr_cnt; ++i) {
4572 		intr = &sc->intr_data[i];
4573 
4574 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4575 		    intr->intr_cpuid);
4576 		if (error) {
4577 			device_printf(sc->dev,
4578 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4579 			    intr->intr_cpuid);
4580 			goto back;
4581 		}
4582 
4583 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4584 		    &intr->intr_rid, RF_ACTIVE);
4585 		if (intr->intr_res == NULL) {
4586 			device_printf(sc->dev,
4587 			    "Unable to allocate MSI-X %d resource\n", i);
4588 			error = ENOMEM;
4589 			goto back;
4590 		}
4591 	}
4592 
4593 	pci_enable_msix(sc->dev);
4594 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4595 back:
4596 	if (error)
4597 		ix_free_msix(sc, setup);
4598 }
4599 
4600 static void
4601 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4602 {
4603 	int i;
4604 
4605 	KKASSERT(sc->intr_cnt > 1);
4606 
4607 	for (i = 0; i < sc->intr_cnt; ++i) {
4608 		struct ix_intr_data *intr = &sc->intr_data[i];
4609 
4610 		if (intr->intr_res != NULL) {
4611 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4612 			    intr->intr_rid, intr->intr_res);
4613 		}
4614 		if (intr->intr_rid >= 0)
4615 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4616 	}
4617 	if (setup)
4618 		pci_teardown_msix(sc->dev);
4619 
4620 	sc->intr_cnt = 0;
4621 	kfree(sc->intr_data, M_DEVBUF);
4622 	sc->intr_data = NULL;
4623 }
4624 
4625 static void
4626 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4627 {
4628 	int x = *x0;
4629 
4630 	for (; i < sc->rx_ring_msix; ++i) {
4631 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4632 		struct ix_intr_data *intr;
4633 
4634 		KKASSERT(x < sc->intr_cnt);
4635 		rxr->rx_intr_vec = x;
4636 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4637 
4638 		intr = &sc->intr_data[x++];
4639 
4640 		intr->intr_serialize = &rxr->rx_serialize;
4641 		intr->intr_func = ix_msix_rx;
4642 		intr->intr_funcarg = rxr;
4643 		intr->intr_rate = IX_MSIX_RX_RATE;
4644 		intr->intr_use = IX_INTR_USE_RX;
4645 
4646 		intr->intr_cpuid = i + offset;
4647 		KKASSERT(intr->intr_cpuid < ncpus2);
4648 
4649 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4650 		    device_get_nameunit(sc->dev), i);
4651 		intr->intr_desc = intr->intr_desc0;
4652 	}
4653 	*x0 = x;
4654 }
4655 
4656 static void
4657 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4658 {
4659 	int x = *x0;
4660 
4661 	for (; i < sc->tx_ring_msix; ++i) {
4662 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4663 		struct ix_intr_data *intr;
4664 
4665 		KKASSERT(x < sc->intr_cnt);
4666 		txr->tx_intr_vec = x;
4667 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4668 
4669 		intr = &sc->intr_data[x++];
4670 
4671 		intr->intr_serialize = &txr->tx_serialize;
4672 		intr->intr_func = ix_msix_tx;
4673 		intr->intr_funcarg = txr;
4674 		intr->intr_rate = IX_MSIX_TX_RATE;
4675 		intr->intr_use = IX_INTR_USE_TX;
4676 
4677 		intr->intr_cpuid = i + offset;
4678 		KKASSERT(intr->intr_cpuid < ncpus2);
4679 		txr->tx_intr_cpuid = intr->intr_cpuid;
4680 
4681 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4682 		    device_get_nameunit(sc->dev), i);
4683 		intr->intr_desc = intr->intr_desc0;
4684 	}
4685 	*x0 = x;
4686 }
4687 
4688 static void
4689 ix_msix_rx(void *xrxr)
4690 {
4691 	struct ix_rx_ring *rxr = xrxr;
4692 
4693 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4694 
4695 	ix_rxeof(rxr, -1);
4696 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4697 }
4698 
4699 static void
4700 ix_msix_tx(void *xtxr)
4701 {
4702 	struct ix_tx_ring *txr = xtxr;
4703 
4704 	ASSERT_SERIALIZED(&txr->tx_serialize);
4705 
4706 	ix_txeof(txr, *(txr->tx_hdr));
4707 	if (!ifsq_is_empty(txr->tx_ifsq))
4708 		ifsq_devstart(txr->tx_ifsq);
4709 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4710 }
4711 
4712 static void
4713 ix_msix_rxtx(void *xrxr)
4714 {
4715 	struct ix_rx_ring *rxr = xrxr;
4716 	struct ix_tx_ring *txr;
4717 	int hdr;
4718 
4719 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4720 
4721 	ix_rxeof(rxr, -1);
4722 
4723 	/*
4724 	 * NOTE:
4725 	 * Since tx_next_clean is only changed by ix_txeof(),
4726 	 * which is called only in interrupt handler, the
4727 	 * check w/o holding tx serializer is MPSAFE.
4728 	 */
4729 	txr = rxr->rx_txr;
4730 	hdr = *(txr->tx_hdr);
4731 	if (hdr != txr->tx_next_clean) {
4732 		lwkt_serialize_enter(&txr->tx_serialize);
4733 		ix_txeof(txr, hdr);
4734 		if (!ifsq_is_empty(txr->tx_ifsq))
4735 			ifsq_devstart(txr->tx_ifsq);
4736 		lwkt_serialize_exit(&txr->tx_serialize);
4737 	}
4738 
4739 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4740 }
4741 
4742 static void
4743 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4744 {
4745 	struct ixgbe_hw *hw = &sc->hw;
4746 
4747 	/* Link status change */
4748 	if (eicr & IXGBE_EICR_LSC)
4749 		ix_handle_link(sc);
4750 
4751 	if (hw->mac.type != ixgbe_mac_82598EB) {
4752 		if (eicr & IXGBE_EICR_ECC)
4753 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4754 
4755 		/* Check for over temp condition */
4756 		if (eicr & IXGBE_EICR_TS) {
4757 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4758 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4759 		}
4760 	}
4761 
4762 	if (ix_is_sfp(hw)) {
4763 		uint32_t mod_mask;
4764 
4765 		/* Pluggable optics-related interrupt */
4766 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4767 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4768 		else
4769 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4770 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4771 			ix_handle_msf(sc);
4772 		else if (eicr & mod_mask)
4773 			ix_handle_mod(sc);
4774 	}
4775 
4776 	/* Check for fan failure */
4777 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4778 	    (eicr & IXGBE_EICR_GPI_SDP1))
4779 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4780 
4781 	/* External PHY interrupt */
4782 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4783 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4784 	    	ix_handle_phy(sc);
4785 }
4786 
4787 static void
4788 ix_msix_status(void *xsc)
4789 {
4790 	struct ix_softc *sc = xsc;
4791 	uint32_t eicr;
4792 
4793 	ASSERT_SERIALIZED(&sc->main_serialize);
4794 
4795 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4796 	ix_intr_status(sc, eicr);
4797 
4798 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4799 }
4800 
4801 static void
4802 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4803     uint32_t *eims, uint32_t *eims_val)
4804 {
4805 	if (x < 32) {
4806 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4807 			KASSERT(x < IX_MAX_MSIX_82598,
4808 			    ("%s: invalid vector %d for 82598",
4809 			     device_get_nameunit(sc->dev), x));
4810 			*eims = IXGBE_EIMS;
4811 		} else {
4812 			*eims = IXGBE_EIMS_EX(0);
4813 		}
4814 		*eims_val = 1 << x;
4815 	} else {
4816 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4817 		    device_get_nameunit(sc->dev), x));
4818 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4819 		    ("%s: invalid vector %d for 82598",
4820 		     device_get_nameunit(sc->dev), x));
4821 		*eims = IXGBE_EIMS_EX(1);
4822 		*eims_val = 1 << (x - 32);
4823 	}
4824 }
4825 
4826 #ifdef IFPOLL_ENABLE
4827 
4828 static void
4829 ix_npoll_status(struct ifnet *ifp)
4830 {
4831 	struct ix_softc *sc = ifp->if_softc;
4832 	uint32_t eicr;
4833 
4834 	ASSERT_SERIALIZED(&sc->main_serialize);
4835 
4836 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4837 	ix_intr_status(sc, eicr);
4838 }
4839 
4840 static void
4841 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4842 {
4843 	struct ix_tx_ring *txr = arg;
4844 
4845 	ASSERT_SERIALIZED(&txr->tx_serialize);
4846 
4847 	ix_txeof(txr, *(txr->tx_hdr));
4848 	if (!ifsq_is_empty(txr->tx_ifsq))
4849 		ifsq_devstart(txr->tx_ifsq);
4850 }
4851 
4852 static void
4853 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4854 {
4855 	struct ix_rx_ring *rxr = arg;
4856 
4857 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4858 
4859 	ix_rxeof(rxr, cycle);
4860 }
4861 
4862 static void
4863 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4864 {
4865 	struct ix_softc *sc = ifp->if_softc;
4866 	int i, txr_cnt, rxr_cnt;
4867 
4868 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4869 
4870 	if (info) {
4871 		int off;
4872 
4873 		info->ifpi_status.status_func = ix_npoll_status;
4874 		info->ifpi_status.serializer = &sc->main_serialize;
4875 
4876 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4877 		off = sc->tx_npoll_off;
4878 		for (i = 0; i < txr_cnt; ++i) {
4879 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4880 			int idx = i + off;
4881 
4882 			KKASSERT(idx < ncpus2);
4883 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4884 			info->ifpi_tx[idx].arg = txr;
4885 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4886 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4887 		}
4888 
4889 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4890 		off = sc->rx_npoll_off;
4891 		for (i = 0; i < rxr_cnt; ++i) {
4892 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4893 			int idx = i + off;
4894 
4895 			KKASSERT(idx < ncpus2);
4896 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4897 			info->ifpi_rx[idx].arg = rxr;
4898 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4899 		}
4900 
4901 		if (ifp->if_flags & IFF_RUNNING) {
4902 			if (rxr_cnt == sc->rx_ring_inuse &&
4903 			    txr_cnt == sc->tx_ring_inuse) {
4904 				ix_set_timer_cpuid(sc, TRUE);
4905 				ix_disable_intr(sc);
4906 			} else {
4907 				ix_init(sc);
4908 			}
4909 		}
4910 	} else {
4911 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4912 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4913 
4914 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4915 		}
4916 
4917 		if (ifp->if_flags & IFF_RUNNING) {
4918 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4919 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4920 
4921 			if (rxr_cnt == sc->rx_ring_inuse &&
4922 			    txr_cnt == sc->tx_ring_inuse) {
4923 				ix_set_timer_cpuid(sc, FALSE);
4924 				ix_enable_intr(sc);
4925 			} else {
4926 				ix_init(sc);
4927 			}
4928 		}
4929 	}
4930 }
4931 
4932 static int
4933 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4934 {
4935 	struct ix_softc *sc = (void *)arg1;
4936 	struct ifnet *ifp = &sc->arpcom.ac_if;
4937 	int error, off;
4938 
4939 	off = sc->rx_npoll_off;
4940 	error = sysctl_handle_int(oidp, &off, 0, req);
4941 	if (error || req->newptr == NULL)
4942 		return error;
4943 	if (off < 0)
4944 		return EINVAL;
4945 
4946 	ifnet_serialize_all(ifp);
4947 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4948 		error = EINVAL;
4949 	} else {
4950 		error = 0;
4951 		sc->rx_npoll_off = off;
4952 	}
4953 	ifnet_deserialize_all(ifp);
4954 
4955 	return error;
4956 }
4957 
4958 static int
4959 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4960 {
4961 	struct ix_softc *sc = (void *)arg1;
4962 	struct ifnet *ifp = &sc->arpcom.ac_if;
4963 	int error, off;
4964 
4965 	off = sc->tx_npoll_off;
4966 	error = sysctl_handle_int(oidp, &off, 0, req);
4967 	if (error || req->newptr == NULL)
4968 		return error;
4969 	if (off < 0)
4970 		return EINVAL;
4971 
4972 	ifnet_serialize_all(ifp);
4973 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4974 		error = EINVAL;
4975 	} else {
4976 		error = 0;
4977 		sc->tx_npoll_off = off;
4978 	}
4979 	ifnet_deserialize_all(ifp);
4980 
4981 	return error;
4982 }
4983 
4984 #endif /* IFPOLL_ENABLE */
4985 
4986 static enum ixgbe_fc_mode
4987 ix_ifmedia2fc(int ifm)
4988 {
4989 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4990 
4991 	switch (fc_opt) {
4992 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4993 		return ixgbe_fc_full;
4994 
4995 	case IFM_ETH_RXPAUSE:
4996 		return ixgbe_fc_rx_pause;
4997 
4998 	case IFM_ETH_TXPAUSE:
4999 		return ixgbe_fc_tx_pause;
5000 
5001 	default:
5002 		return ixgbe_fc_none;
5003 	}
5004 }
5005 
5006 static const char *
5007 ix_ifmedia2str(int ifm)
5008 {
5009 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5010 
5011 	switch (fc_opt) {
5012 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5013 		return IFM_ETH_FC_FULL;
5014 
5015 	case IFM_ETH_RXPAUSE:
5016 		return IFM_ETH_FC_RXPAUSE;
5017 
5018 	case IFM_ETH_TXPAUSE:
5019 		return IFM_ETH_FC_TXPAUSE;
5020 
5021 	default:
5022 		return IFM_ETH_FC_NONE;
5023 	}
5024 }
5025 
5026 static const char *
5027 ix_fc2str(enum ixgbe_fc_mode fc)
5028 {
5029 	switch (fc) {
5030 	case ixgbe_fc_full:
5031 		return IFM_ETH_FC_FULL;
5032 
5033 	case ixgbe_fc_rx_pause:
5034 		return IFM_ETH_FC_RXPAUSE;
5035 
5036 	case ixgbe_fc_tx_pause:
5037 		return IFM_ETH_FC_TXPAUSE;
5038 
5039 	default:
5040 		return IFM_ETH_FC_NONE;
5041 	}
5042 }
5043 
5044 static int
5045 ix_powerdown(struct ix_softc *sc)
5046 {
5047 	struct ixgbe_hw *hw = &sc->hw;
5048 	int error = 0;
5049 
5050 	/* Limit power managment flow to X550EM baseT */
5051 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5052 	    hw->phy.ops.enter_lplu) {
5053 		/* Turn off support for APM wakeup. (Using ACPI instead) */
5054 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
5055 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5056 
5057 		/*
5058 		 * Clear Wake Up Status register to prevent any previous wakeup
5059 		 * events from waking us up immediately after we suspend.
5060 		 */
5061 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5062 
5063 		/*
5064 		 * Program the Wakeup Filter Control register with user filter
5065 		 * settings
5066 		 */
5067 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5068 
5069 		/* Enable wakeups and power management in Wakeup Control */
5070 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5071 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5072 
5073 		/* X550EM baseT adapters need a special LPLU flow */
5074 		hw->phy.reset_disable = true;
5075 		ix_stop(sc);
5076 		error = hw->phy.ops.enter_lplu(hw);
5077 		if (error) {
5078 			if_printf(&sc->arpcom.ac_if,
5079 			    "Error entering LPLU: %d\n", error);
5080 		}
5081 		hw->phy.reset_disable = false;
5082 	} else {
5083 		/* Just stop for other adapters */
5084 		ix_stop(sc);
5085 	}
5086 	return error;
5087 }
5088 
5089 static void
5090 ix_config_flowctrl(struct ix_softc *sc)
5091 {
5092 	struct ixgbe_hw *hw = &sc->hw;
5093 	uint32_t rxpb, frame, size, tmp;
5094 
5095 	frame = sc->max_frame_size;
5096 
5097 	/* Calculate High Water */
5098 	switch (hw->mac.type) {
5099 	case ixgbe_mac_X540:
5100 	case ixgbe_mac_X550:
5101 	case ixgbe_mac_X550EM_a:
5102 	case ixgbe_mac_X550EM_x:
5103 		tmp = IXGBE_DV_X540(frame, frame);
5104 		break;
5105 	default:
5106 		tmp = IXGBE_DV(frame, frame);
5107 		break;
5108 	}
5109 	size = IXGBE_BT2KB(tmp);
5110 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5111 	hw->fc.high_water[0] = rxpb - size;
5112 
5113 	/* Now calculate Low Water */
5114 	switch (hw->mac.type) {
5115 	case ixgbe_mac_X540:
5116 	case ixgbe_mac_X550:
5117 	case ixgbe_mac_X550EM_a:
5118 	case ixgbe_mac_X550EM_x:
5119 		tmp = IXGBE_LOW_DV_X540(frame);
5120 		break;
5121 	default:
5122 		tmp = IXGBE_LOW_DV(frame);
5123 		break;
5124 	}
5125 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5126 
5127 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5128 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5129 		hw->fc.disable_fc_autoneg = TRUE;
5130 	else
5131 		hw->fc.disable_fc_autoneg = FALSE;
5132 	hw->fc.pause_time = IX_FC_PAUSE;
5133 	hw->fc.send_xon = TRUE;
5134 }
5135 
5136 static void
5137 ix_config_dmac(struct ix_softc *sc)
5138 {
5139 	struct ixgbe_hw *hw = &sc->hw;
5140 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5141 
5142 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5143 		return;
5144 
5145 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5146 	    (dcfg->link_speed ^ sc->link_speed)) {
5147 		dcfg->watchdog_timer = sc->dmac;
5148 		dcfg->fcoe_en = false;
5149 		dcfg->link_speed = sc->link_speed;
5150 		dcfg->num_tcs = 1;
5151 
5152 		if (bootverbose) {
5153 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5154 			    "watchdog %d, link speed %d\n",
5155 			    dcfg->watchdog_timer, dcfg->link_speed);
5156 		}
5157 
5158 		hw->mac.ops.dmac_config(hw);
5159 	}
5160 }
5161 
5162 static void
5163 ix_init_media(struct ix_softc *sc)
5164 {
5165 	struct ixgbe_hw *hw = &sc->hw;
5166 	int layer, msf_ifm = IFM_NONE;
5167 
5168 	ifmedia_removeall(&sc->media);
5169 
5170 	layer = ixgbe_get_supported_physical_layer(hw);
5171 
5172 	/*
5173 	 * Media types with matching DragonFlyBSD media defines
5174 	 */
5175 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5176 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5177 		    0, NULL);
5178 	}
5179 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5180 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5181 		    0, NULL);
5182 	}
5183 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5184 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5185 		    0, NULL);
5186 		/* No half-duplex support */
5187 	}
5188 
5189 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5190 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5191 		    0, NULL);
5192 		msf_ifm = IFM_1000_LX;
5193 	}
5194 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5195 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5196 		    0, NULL);
5197 		msf_ifm = IFM_1000_LX;
5198 	}
5199 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5200 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5201 		    0, NULL);
5202 		msf_ifm = IFM_1000_SX;
5203 	}
5204 
5205 	/* Add media for multispeed fiber */
5206 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5207 		uint32_t linkcap;
5208 		bool autoneg;
5209 
5210 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5211 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5212 			ifmedia_add_nodup(&sc->media,
5213 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5214 	}
5215 
5216 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5217 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5218 		ifmedia_add_nodup(&sc->media,
5219 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5220 	}
5221 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5222 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5223 		    0, NULL);
5224 	}
5225 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5226 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5227 		    0, NULL);
5228 	}
5229 
5230 	/*
5231 	 * XXX Other (no matching DragonFlyBSD media type):
5232 	 * To workaround this, we'll assign these completely
5233 	 * inappropriate media types.
5234 	 */
5235 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5236 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5237 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5238 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5239 		    0, NULL);
5240 	}
5241 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5242 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5243 		if_printf(&sc->arpcom.ac_if,
5244 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5245 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5246 		    0, NULL);
5247 	}
5248 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5249 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5250 		if_printf(&sc->arpcom.ac_if,
5251 		    "1000baseKX mapped to 1000baseCX\n");
5252 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5253 		    0, NULL);
5254 	}
5255 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5256 		/* Someday, someone will care about you... */
5257 		if_printf(&sc->arpcom.ac_if,
5258 		    "Media supported: 1000baseBX, ignored\n");
5259 	}
5260 
5261 	/* XXX we probably don't need this */
5262 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5263 		ifmedia_add_nodup(&sc->media,
5264 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5265 	}
5266 
5267 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5268 
5269 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5270 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5271 
5272 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5273 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5274 		ifmedia_set(&sc->media, sc->ifm_media);
5275 	}
5276 }
5277