xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision f26349bc)
1 /*
2  * Copyright (c) 2001-2013, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #ifdef IX_RSS_DEBUG
75 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
76 do { \
77 	if (sc->rss_debug >= lvl) \
78 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
79 } while (0)
80 #else	/* !IX_RSS_DEBUG */
81 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
82 #endif	/* IX_RSS_DEBUG */
83 
84 #define IX_NAME			"Intel(R) PRO/10GbE "
85 #define IX_DEVICE(id) \
86 	{ IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
87 #define IX_DEVICE_NULL		{ 0, 0, NULL }
88 
89 static struct ix_device {
90 	uint16_t	vid;
91 	uint16_t	did;
92 	const char	*desc;
93 } ix_devices[] = {
94 	IX_DEVICE(82598AF_DUAL_PORT),
95 	IX_DEVICE(82598AF_SINGLE_PORT),
96 	IX_DEVICE(82598EB_CX4),
97 	IX_DEVICE(82598AT),
98 	IX_DEVICE(82598AT2),
99 	IX_DEVICE(82598),
100 	IX_DEVICE(82598_DA_DUAL_PORT),
101 	IX_DEVICE(82598_CX4_DUAL_PORT),
102 	IX_DEVICE(82598EB_XF_LR),
103 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
104 	IX_DEVICE(82598EB_SFP_LOM),
105 	IX_DEVICE(82599_KX4),
106 	IX_DEVICE(82599_KX4_MEZZ),
107 	IX_DEVICE(82599_SFP),
108 	IX_DEVICE(82599_XAUI_LOM),
109 	IX_DEVICE(82599_CX4),
110 	IX_DEVICE(82599_T3_LOM),
111 	IX_DEVICE(82599_COMBO_BACKPLANE),
112 	IX_DEVICE(82599_BACKPLANE_FCOE),
113 	IX_DEVICE(82599_SFP_SF2),
114 	IX_DEVICE(82599_SFP_FCOE),
115 	IX_DEVICE(82599EN_SFP),
116 	IX_DEVICE(82599_SFP_SF_QP),
117 	IX_DEVICE(X540T),
118 
119 	/* required last entry */
120 	IX_DEVICE_NULL
121 };
122 
123 static int	ix_probe(device_t);
124 static int	ix_attach(device_t);
125 static int	ix_detach(device_t);
126 static int	ix_shutdown(device_t);
127 
128 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
129 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
130 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
131 #ifdef INVARIANTS
132 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
133 		    boolean_t);
134 #endif
135 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
136 static void	ix_watchdog(struct ifaltq_subque *);
137 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
138 static void	ix_init(void *);
139 static void	ix_stop(struct ix_softc *);
140 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
141 static int	ix_media_change(struct ifnet *);
142 static void	ix_timer(void *);
143 #ifdef IFPOLL_ENABLE
144 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
145 static void	ix_npoll_rx(struct ifnet *, void *, int);
146 static void	ix_npoll_tx(struct ifnet *, void *, int);
147 static void	ix_npoll_status(struct ifnet *);
148 #endif
149 
150 static void	ix_add_sysctl(struct ix_softc *);
151 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
152 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
153 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
154 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
155 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
156 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
157 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
158 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
159 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
160 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
161 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS);
164 #ifdef foo
165 static int	ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
166 #endif
167 #if 0
168 static void     ix_add_hw_stats(struct ix_softc *);
169 #endif
170 #ifdef IFPOLL_ENABLE
171 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
172 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
173 #endif
174 
175 static void	ix_slot_info(struct ix_softc *);
176 static int	ix_alloc_rings(struct ix_softc *);
177 static void	ix_free_rings(struct ix_softc *);
178 static void	ix_setup_ifp(struct ix_softc *);
179 static void	ix_setup_serialize(struct ix_softc *);
180 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
181 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
182 static void	ix_update_stats(struct ix_softc *);
183 
184 static void	ix_set_promisc(struct ix_softc *);
185 static void	ix_set_multi(struct ix_softc *);
186 static void	ix_set_vlan(struct ix_softc *);
187 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
188 
189 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
190 static void	ix_init_tx_ring(struct ix_tx_ring *);
191 static void	ix_free_tx_ring(struct ix_tx_ring *);
192 static int	ix_create_tx_ring(struct ix_tx_ring *);
193 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
194 static void	ix_init_tx_unit(struct ix_softc *);
195 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
196 		    uint16_t *, int *);
197 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
198 		    const struct mbuf *, uint32_t *, uint32_t *);
199 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
200 		    const struct mbuf *, uint32_t *, uint32_t *);
201 static void	ix_txeof(struct ix_tx_ring *, int);
202 
203 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
204 static int	ix_init_rx_ring(struct ix_rx_ring *);
205 static void	ix_free_rx_ring(struct ix_rx_ring *);
206 static int	ix_create_rx_ring(struct ix_rx_ring *);
207 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
208 static void	ix_init_rx_unit(struct ix_softc *);
209 #if 0
210 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
211 #endif
212 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
213 static void	ix_rxeof(struct ix_rx_ring *, int);
214 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
215 static void	ix_enable_rx_drop(struct ix_softc *);
216 static void	ix_disable_rx_drop(struct ix_softc *);
217 
218 static void	ix_alloc_msix(struct ix_softc *);
219 static void	ix_free_msix(struct ix_softc *, boolean_t);
220 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
221 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
222 static void	ix_setup_msix_eims(const struct ix_softc *, int,
223 		    uint32_t *, uint32_t *);
224 static int	ix_alloc_intr(struct ix_softc *);
225 static void	ix_free_intr(struct ix_softc *);
226 static int	ix_setup_intr(struct ix_softc *);
227 static void	ix_teardown_intr(struct ix_softc *, int);
228 static void	ix_enable_intr(struct ix_softc *);
229 static void	ix_disable_intr(struct ix_softc *);
230 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
231 static void	ix_set_eitr(struct ix_softc *, int, int);
232 static void	ix_intr_status(struct ix_softc *, uint32_t);
233 static void	ix_intr(void *);
234 static void	ix_msix_rxtx(void *);
235 static void	ix_msix_rx(void *);
236 static void	ix_msix_tx(void *);
237 static void	ix_msix_status(void *);
238 
239 static void	ix_config_link(struct ix_softc *);
240 static boolean_t ix_sfp_probe(struct ix_softc *);
241 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
242 static void	ix_setup_optics(struct ix_softc *);
243 static void	ix_update_link_status(struct ix_softc *);
244 static void	ix_handle_link(struct ix_softc *);
245 static void	ix_handle_mod(struct ix_softc *);
246 static void	ix_handle_msf(struct ix_softc *);
247 
248 /* XXX Shared code structure requires this for the moment */
249 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
250 
251 static device_method_t ix_methods[] = {
252 	/* Device interface */
253 	DEVMETHOD(device_probe,		ix_probe),
254 	DEVMETHOD(device_attach,	ix_attach),
255 	DEVMETHOD(device_detach,	ix_detach),
256 	DEVMETHOD(device_shutdown,	ix_shutdown),
257 	DEVMETHOD_END
258 };
259 
260 static driver_t ix_driver = {
261 	"ix",
262 	ix_methods,
263 	sizeof(struct ix_softc)
264 };
265 
266 static devclass_t ix_devclass;
267 
268 DECLARE_DUMMY_MODULE(if_ix);
269 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
270 
271 static int	ix_msi_enable = 1;
272 static int	ix_msix_enable = 1;
273 static int	ix_msix_agg_rxtx = 1;
274 static int	ix_rxr = 0;
275 static int	ix_txr = 0;
276 static int	ix_txd = IX_PERF_TXD;
277 static int	ix_rxd = IX_PERF_RXD;
278 static int	ix_unsupported_sfp = 0;
279 
280 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
281 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
282 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
283 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
284 TUNABLE_INT("hw.ix.txr", &ix_txr);
285 TUNABLE_INT("hw.ix.txd", &ix_txd);
286 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
287 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
288 
289 /*
290  * Smart speed setting, default to on.  This only works
291  * as a compile option right now as its during attach,
292  * set this to 'ixgbe_smart_speed_off' to disable.
293  */
294 static const enum ixgbe_smart_speed ix_smart_speed =
295     ixgbe_smart_speed_on;
296 
297 static int
298 ix_probe(device_t dev)
299 {
300 	const struct ix_device *d;
301 	uint16_t vid, did;
302 
303 	vid = pci_get_vendor(dev);
304 	did = pci_get_device(dev);
305 
306 	for (d = ix_devices; d->desc != NULL; ++d) {
307 		if (vid == d->vid && did == d->did) {
308 			device_set_desc(dev, d->desc);
309 			return 0;
310 		}
311 	}
312 	return ENXIO;
313 }
314 
315 static int
316 ix_attach(device_t dev)
317 {
318 	struct ix_softc *sc = device_get_softc(dev);
319 	struct ixgbe_hw *hw;
320 	int error, ring_cnt_max;
321 	uint16_t csum;
322 	uint32_t ctrl_ext;
323 #ifdef IFPOLL_ENABLE
324 	int offset, offset_def;
325 #endif
326 
327 	sc->dev = sc->osdep.dev = dev;
328 	hw = &sc->hw;
329 
330 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
331 	    device_get_unit(dev));
332 	ifmedia_init(&sc->media, IFM_IMASK,
333 	    ix_media_change, ix_media_status);
334 
335 	/* Save frame size */
336 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
337 
338 	callout_init_mp(&sc->timer);
339 	lwkt_serialize_init(&sc->main_serialize);
340 
341 	/*
342 	 * Save off the information about this board
343 	 */
344 	hw->vendor_id = pci_get_vendor(dev);
345 	hw->device_id = pci_get_device(dev);
346 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
347 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
348 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
349 
350 	ixgbe_set_mac_type(hw);
351 
352 	/* Pick up the 82599 and VF settings */
353 	if (hw->mac.type != ixgbe_mac_82598EB)
354 		hw->phy.smart_speed = ix_smart_speed;
355 
356 	/* Enable bus mastering */
357 	pci_enable_busmaster(dev);
358 
359 	/*
360 	 * Allocate IO memory
361 	 */
362 	sc->mem_rid = PCIR_BAR(0);
363 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
364 	    &sc->mem_rid, RF_ACTIVE);
365 	if (sc->mem_res == NULL) {
366 		device_printf(dev, "Unable to allocate bus resource: memory\n");
367 		error = ENXIO;
368 		goto failed;
369 	}
370 
371 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
372 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
373 
374 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
375 	sc->hw.back = &sc->osdep;
376 
377 	/*
378 	 * Configure total supported RX/TX ring count
379 	 */
380 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
381 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
382 	sc->rx_ring_inuse = sc->rx_ring_cnt;
383 
384 	switch (hw->mac.type) {
385 	case ixgbe_mac_82598EB:
386 		ring_cnt_max = IX_MAX_TXRING_82598;
387 		break;
388 
389 	case ixgbe_mac_82599EB:
390 		ring_cnt_max = IX_MAX_TXRING_82599;
391 		break;
392 
393 	case ixgbe_mac_X540:
394 		ring_cnt_max = IX_MAX_TXRING_X540;
395 		break;
396 
397 	default:
398 		ring_cnt_max = 1;
399 		break;
400 	}
401 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
402 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
403 	sc->tx_ring_inuse = sc->tx_ring_cnt;
404 
405 	/* Allocate TX/RX rings */
406 	error = ix_alloc_rings(sc);
407 	if (error)
408 		goto failed;
409 
410 #ifdef IFPOLL_ENABLE
411 	/*
412 	 * NPOLLING RX CPU offset
413 	 */
414 	if (sc->rx_ring_cnt == ncpus2) {
415 		offset = 0;
416 	} else {
417 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
418 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
419 		if (offset >= ncpus2 ||
420 		    offset % sc->rx_ring_cnt != 0) {
421 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
422 			    offset, offset_def);
423 			offset = offset_def;
424 		}
425 	}
426 	sc->rx_npoll_off = offset;
427 
428 	/*
429 	 * NPOLLING TX CPU offset
430 	 */
431 	if (sc->tx_ring_cnt == ncpus2) {
432 		offset = 0;
433 	} else {
434 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
435 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
436 		if (offset >= ncpus2 ||
437 		    offset % sc->tx_ring_cnt != 0) {
438 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
439 			    offset, offset_def);
440 			offset = offset_def;
441 		}
442 	}
443 	sc->tx_npoll_off = offset;
444 #endif
445 
446 	/* Allocate interrupt */
447 	error = ix_alloc_intr(sc);
448 	if (error)
449 		goto failed;
450 
451 	/* Setup serializes */
452 	ix_setup_serialize(sc);
453 
454 	/* Allocate multicast array memory. */
455 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
456 	    M_DEVBUF, M_WAITOK);
457 
458 	/* Initialize the shared code */
459 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
460 	error = ixgbe_init_shared_code(hw);
461 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
462 		/*
463 		 * No optics in this port; ask timer routine
464 		 * to probe for later insertion.
465 		 */
466 		sc->sfp_probe = TRUE;
467 		error = 0;
468 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
469 		device_printf(dev, "Unsupported SFP+ module detected!\n");
470 		error = EIO;
471 		goto failed;
472 	} else if (error) {
473 		device_printf(dev, "Unable to initialize the shared code\n");
474 		error = EIO;
475 		goto failed;
476 	}
477 
478 	/* Make sure we have a good EEPROM before we read from it */
479 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
480 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
481 		error = EIO;
482 		goto failed;
483 	}
484 
485 	error = ixgbe_init_hw(hw);
486 	if (error == IXGBE_ERR_EEPROM_VERSION) {
487 		device_printf(dev, "Pre-production device detected\n");
488 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
489 		device_printf(dev, "Unsupported SFP+ Module\n");
490 		error = EIO;
491 		goto failed;
492 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
493 		device_printf(dev, "No SFP+ Module found\n");
494 	}
495 
496 	/* Detect and set physical type */
497 	ix_setup_optics(sc);
498 
499 	/* Setup OS specific network interface */
500 	ix_setup_ifp(sc);
501 
502 	/* Add sysctl tree */
503 	ix_add_sysctl(sc);
504 
505 	error = ix_setup_intr(sc);
506 	if (error) {
507 		ether_ifdetach(&sc->arpcom.ac_if);
508 		goto failed;
509 	}
510 
511 	/* Initialize statistics */
512 	ix_update_stats(sc);
513 
514 	/*
515 	 * Check PCIE slot type/speed/width
516 	 */
517 	ix_slot_info(sc);
518 
519 	/* Set an initial default flow control value */
520 	sc->fc = ixgbe_fc_full;
521 
522 	/* Let hardware know driver is loaded */
523 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
524 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
525 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
526 
527 	return 0;
528 failed:
529 	ix_detach(dev);
530 	return error;
531 }
532 
533 static int
534 ix_detach(device_t dev)
535 {
536 	struct ix_softc *sc = device_get_softc(dev);
537 
538 	if (device_is_attached(dev)) {
539 		struct ifnet *ifp = &sc->arpcom.ac_if;
540 		uint32_t ctrl_ext;
541 
542 		ifnet_serialize_all(ifp);
543 
544 		ix_stop(sc);
545 		ix_teardown_intr(sc, sc->intr_cnt);
546 
547 		ifnet_deserialize_all(ifp);
548 
549 		callout_terminate(&sc->timer);
550 		ether_ifdetach(ifp);
551 
552 		/* Let hardware know driver is unloading */
553 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
554 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
555 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
556 	}
557 
558 	ifmedia_removeall(&sc->media);
559 	bus_generic_detach(dev);
560 
561 	ix_free_intr(sc);
562 
563 	if (sc->msix_mem_res != NULL) {
564 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
565 		    sc->msix_mem_res);
566 	}
567 	if (sc->mem_res != NULL) {
568 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
569 		    sc->mem_res);
570 	}
571 
572 	ix_free_rings(sc);
573 
574 	if (sc->mta != NULL)
575 		kfree(sc->mta, M_DEVBUF);
576 	if (sc->serializes != NULL)
577 		kfree(sc->serializes, M_DEVBUF);
578 
579 	return 0;
580 }
581 
582 static int
583 ix_shutdown(device_t dev)
584 {
585 	struct ix_softc *sc = device_get_softc(dev);
586 	struct ifnet *ifp = &sc->arpcom.ac_if;
587 
588 	ifnet_serialize_all(ifp);
589 	ix_stop(sc);
590 	ifnet_deserialize_all(ifp);
591 
592 	return 0;
593 }
594 
595 static void
596 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
597 {
598 	struct ix_softc *sc = ifp->if_softc;
599 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
600 	int idx = -1;
601 	uint16_t nsegs;
602 
603 	KKASSERT(txr->tx_ifsq == ifsq);
604 	ASSERT_SERIALIZED(&txr->tx_serialize);
605 
606 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
607 		return;
608 
609 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
610 		ifsq_purge(ifsq);
611 		return;
612 	}
613 
614 	while (!ifsq_is_empty(ifsq)) {
615 		struct mbuf *m_head;
616 
617 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
618 			ifsq_set_oactive(ifsq);
619 			txr->tx_watchdog.wd_timer = 5;
620 			break;
621 		}
622 
623 		m_head = ifsq_dequeue(ifsq);
624 		if (m_head == NULL)
625 			break;
626 
627 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
628 			IFNET_STAT_INC(ifp, oerrors, 1);
629 			continue;
630 		}
631 
632 		if (nsegs >= txr->tx_wreg_nsegs) {
633 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
634 			nsegs = 0;
635 			idx = -1;
636 		}
637 
638 		ETHER_BPF_MTAP(ifp, m_head);
639 	}
640 	if (idx >= 0)
641 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
642 }
643 
644 static int
645 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
646 {
647 	struct ix_softc *sc = ifp->if_softc;
648 	struct ifreq *ifr = (struct ifreq *) data;
649 	int error = 0, mask, reinit;
650 
651 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
652 
653 	switch (command) {
654 	case SIOCSIFMTU:
655 		if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
656 			error = EINVAL;
657 		} else {
658 			ifp->if_mtu = ifr->ifr_mtu;
659 			sc->max_frame_size =
660 			    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
661 			ix_init(sc);
662 		}
663 		break;
664 
665 	case SIOCSIFFLAGS:
666 		if (ifp->if_flags & IFF_UP) {
667 			if (ifp->if_flags & IFF_RUNNING) {
668 				if ((ifp->if_flags ^ sc->if_flags) &
669 				    (IFF_PROMISC | IFF_ALLMULTI))
670 					ix_set_promisc(sc);
671 			} else {
672 				ix_init(sc);
673 			}
674 		} else if (ifp->if_flags & IFF_RUNNING) {
675 			ix_stop(sc);
676 		}
677 		sc->if_flags = ifp->if_flags;
678 		break;
679 
680 	case SIOCADDMULTI:
681 	case SIOCDELMULTI:
682 		if (ifp->if_flags & IFF_RUNNING) {
683 			ix_disable_intr(sc);
684 			ix_set_multi(sc);
685 #ifdef IFPOLL_ENABLE
686 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
687 #endif
688 				ix_enable_intr(sc);
689 		}
690 		break;
691 
692 	case SIOCSIFMEDIA:
693 	case SIOCGIFMEDIA:
694 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
695 		break;
696 
697 	case SIOCSIFCAP:
698 		reinit = 0;
699 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
700 		if (mask & IFCAP_RXCSUM) {
701 			ifp->if_capenable ^= IFCAP_RXCSUM;
702 			reinit = 1;
703 		}
704 		if (mask & IFCAP_VLAN_HWTAGGING) {
705 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
706 			reinit = 1;
707 		}
708 		if (mask & IFCAP_TXCSUM) {
709 			ifp->if_capenable ^= IFCAP_TXCSUM;
710 			if (ifp->if_capenable & IFCAP_TXCSUM)
711 				ifp->if_hwassist |= CSUM_OFFLOAD;
712 			else
713 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
714 		}
715 		if (mask & IFCAP_TSO) {
716 			ifp->if_capenable ^= IFCAP_TSO;
717 			if (ifp->if_capenable & IFCAP_TSO)
718 				ifp->if_hwassist |= CSUM_TSO;
719 			else
720 				ifp->if_hwassist &= ~CSUM_TSO;
721 		}
722 		if (mask & IFCAP_RSS)
723 			ifp->if_capenable ^= IFCAP_RSS;
724 		if (reinit && (ifp->if_flags & IFF_RUNNING))
725 			ix_init(sc);
726 		break;
727 
728 #if 0
729 	case SIOCGI2C:
730 	{
731 		struct ixgbe_i2c_req	i2c;
732 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
733 		if (error)
734 			break;
735 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
736 			error = EINVAL;
737 			break;
738 		}
739 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
740 		    i2c.dev_addr, i2c.data);
741 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
742 		break;
743 	}
744 #endif
745 
746 	default:
747 		error = ether_ioctl(ifp, command, data);
748 		break;
749 	}
750 	return error;
751 }
752 
753 #define IXGBE_MHADD_MFS_SHIFT 16
754 
755 static void
756 ix_init(void *xsc)
757 {
758 	struct ix_softc *sc = xsc;
759 	struct ifnet *ifp = &sc->arpcom.ac_if;
760 	struct ixgbe_hw *hw = &sc->hw;
761 	uint32_t rxpb, frame, size, tmp;
762 	uint32_t gpie, rxctrl;
763 	int i, error;
764 	boolean_t polling;
765 
766 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
767 
768 	ix_stop(sc);
769 
770 	polling = FALSE;
771 #ifdef IFPOLL_ENABLE
772 	if (ifp->if_flags & IFF_NPOLLING)
773 		polling = TRUE;
774 #endif
775 
776 	/* Configure # of used RX/TX rings */
777 	ix_set_ring_inuse(sc, polling);
778 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
779 
780 	/* Get the latest mac address, User can use a LAA */
781 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
782 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
783 	hw->addr_ctrl.rar_used_count = 1;
784 
785 	/* Prepare transmit descriptors and buffers */
786 	for (i = 0; i < sc->tx_ring_inuse; ++i)
787 		ix_init_tx_ring(&sc->tx_rings[i]);
788 
789 	ixgbe_init_hw(hw);
790 	ix_init_tx_unit(sc);
791 
792 	/* Setup Multicast table */
793 	ix_set_multi(sc);
794 
795 	/* Prepare receive descriptors and buffers */
796 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
797 		error = ix_init_rx_ring(&sc->rx_rings[i]);
798 		if (error) {
799 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
800 			ix_stop(sc);
801 			return;
802 		}
803 	}
804 
805 	/* Configure RX settings */
806 	ix_init_rx_unit(sc);
807 
808 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
809 
810 	/* Enable Fan Failure Interrupt */
811 	gpie |= IXGBE_SDP1_GPIEN;
812 
813 	/* Add for Module detection */
814 	if (hw->mac.type == ixgbe_mac_82599EB)
815 		gpie |= IXGBE_SDP2_GPIEN;
816 
817 	/* Thermal Failure Detection */
818 	if (hw->mac.type == ixgbe_mac_X540)
819 		gpie |= IXGBE_SDP0_GPIEN;
820 
821 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
822 		/* Enable Enhanced MSIX mode */
823 		gpie |= IXGBE_GPIE_MSIX_MODE;
824 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
825 		    IXGBE_GPIE_OCD;
826 	}
827 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
828 
829 	/* Set MTU size */
830 	if (ifp->if_mtu > ETHERMTU) {
831 		uint32_t mhadd;
832 
833 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
834 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
835 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
836 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
837 	}
838 
839 	/*
840 	 * Enable TX rings
841 	 */
842 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
843 		uint32_t txdctl;
844 
845 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
846 		txdctl |= IXGBE_TXDCTL_ENABLE;
847 
848 		/*
849 		 * Set WTHRESH to 0, since TX head write-back is used
850 		 */
851 		txdctl &= ~(0x7f << 16);
852 
853 		/*
854 		 * When the internal queue falls below PTHRESH (32),
855 		 * start prefetching as long as there are at least
856 		 * HTHRESH (1) buffers ready. The values are taken
857 		 * from the Intel linux driver 3.8.21.
858 		 * Prefetching enables tx line rate even with 1 queue.
859 		 */
860 		txdctl |= (32 << 0) | (1 << 8);
861 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
862 	}
863 
864 	/*
865 	 * Enable RX rings
866 	 */
867 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
868 		uint32_t rxdctl;
869 		int k;
870 
871 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
872 		if (hw->mac.type == ixgbe_mac_82598EB) {
873 			/*
874 			 * PTHRESH = 21
875 			 * HTHRESH = 4
876 			 * WTHRESH = 8
877 			 */
878 			rxdctl &= ~0x3FFFFF;
879 			rxdctl |= 0x080420;
880 		}
881 		rxdctl |= IXGBE_RXDCTL_ENABLE;
882 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
883 		for (k = 0; k < 10; ++k) {
884 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
885 			    IXGBE_RXDCTL_ENABLE)
886 				break;
887 			else
888 				msec_delay(1);
889 		}
890 		wmb();
891 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
892 		    sc->rx_rings[0].rx_ndesc - 1);
893 	}
894 
895 	/* Set up VLAN support and filter */
896 	ix_set_vlan(sc);
897 
898 	/* Enable Receive engine */
899 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
900 	if (hw->mac.type == ixgbe_mac_82598EB)
901 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
902 	rxctrl |= IXGBE_RXCTRL_RXEN;
903 	ixgbe_enable_rx_dma(hw, rxctrl);
904 
905 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
906 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
907 
908 		if (txr->tx_intr_vec >= 0) {
909 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
910 		} else {
911 			/*
912 			 * Unconfigured TX interrupt vector could only
913 			 * happen for MSI-X.
914 			 */
915 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
916 			    ("TX intr vector is not set"));
917 			KASSERT(i < sc->rx_ring_inuse,
918 			    ("invalid TX ring %d, no piggyback RX ring", i));
919 			KASSERT(sc->rx_rings[i].rx_txr == txr,
920 			    ("RX ring %d piggybacked TX ring mismatch", i));
921 			if (bootverbose)
922 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
923 		}
924 	}
925 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
926 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
927 
928 		KKASSERT(rxr->rx_intr_vec >= 0);
929 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
930 		if (rxr->rx_txr != NULL) {
931 			/*
932 			 * Piggyback the TX ring interrupt onto the RX
933 			 * ring interrupt vector.
934 			 */
935 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
936 			    ("piggybacked TX ring configured intr vector"));
937 			KASSERT(rxr->rx_txr->tx_idx == i,
938 			    ("RX ring %d piggybacked TX ring %u",
939 			     i, rxr->rx_txr->tx_idx));
940 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
941 			if (bootverbose) {
942 				if_printf(ifp, "IVAR RX ring %d piggybacks "
943 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
944 			}
945 		}
946 	}
947 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
948 		/* Set up status MSI-X vector; it is using fixed entry 1 */
949 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
950 
951 		/* Set up auto-mask for TX and RX rings */
952 		if (hw->mac.type == ixgbe_mac_82598EB) {
953 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
954 		} else {
955 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
956 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
957 		}
958 	} else {
959 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
960 	}
961 	for (i = 0; i < sc->intr_cnt; ++i)
962 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
963 
964 	/*
965 	 * Check on any SFP devices that need to be kick-started
966 	 */
967 	if (hw->phy.type == ixgbe_phy_none) {
968 		error = hw->phy.ops.identify(hw);
969 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
970 			if_printf(ifp,
971 			    "Unsupported SFP+ module type was detected.\n");
972 			/* XXX stop */
973 			return;
974 		}
975 	}
976 
977 	/* Config/Enable Link */
978 	ix_config_link(sc);
979 
980 	/*
981 	 * Hardware Packet Buffer & Flow Control setup
982 	 */
983 	frame = sc->max_frame_size;
984 
985 	/* Calculate High Water */
986 	if (hw->mac.type == ixgbe_mac_X540)
987 		tmp = IXGBE_DV_X540(frame, frame);
988 	else
989 		tmp = IXGBE_DV(frame, frame);
990 	size = IXGBE_BT2KB(tmp);
991 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
992 	hw->fc.high_water[0] = rxpb - size;
993 
994 	/* Now calculate Low Water */
995 	if (hw->mac.type == ixgbe_mac_X540)
996 		tmp = IXGBE_LOW_DV_X540(frame);
997 	else
998 		tmp = IXGBE_LOW_DV(frame);
999 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1000 
1001 	hw->fc.requested_mode = sc->fc;
1002 	hw->fc.pause_time = IX_FC_PAUSE;
1003 	hw->fc.send_xon = TRUE;
1004 
1005 	/* Initialize the FC settings */
1006 	ixgbe_start_hw(hw);
1007 
1008 	/*
1009 	 * Only enable interrupts if we are not polling, make sure
1010 	 * they are off otherwise.
1011 	 */
1012 	if (polling)
1013 		ix_disable_intr(sc);
1014 	else
1015 		ix_enable_intr(sc);
1016 
1017 	ifp->if_flags |= IFF_RUNNING;
1018 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1019 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1020 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1021 	}
1022 
1023 	ix_set_timer_cpuid(sc, polling);
1024 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1025 }
1026 
1027 static void
1028 ix_intr(void *xsc)
1029 {
1030 	struct ix_softc *sc = xsc;
1031 	struct ixgbe_hw	*hw = &sc->hw;
1032 	uint32_t eicr;
1033 
1034 	ASSERT_SERIALIZED(&sc->main_serialize);
1035 
1036 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1037 	if (eicr == 0) {
1038 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1039 		return;
1040 	}
1041 
1042 	if (eicr & IX_RX0_INTR_MASK) {
1043 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1044 
1045 		lwkt_serialize_enter(&rxr->rx_serialize);
1046 		ix_rxeof(rxr, -1);
1047 		lwkt_serialize_exit(&rxr->rx_serialize);
1048 	}
1049 	if (eicr & IX_RX1_INTR_MASK) {
1050 		struct ix_rx_ring *rxr;
1051 
1052 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1053 		rxr = &sc->rx_rings[1];
1054 
1055 		lwkt_serialize_enter(&rxr->rx_serialize);
1056 		ix_rxeof(rxr, -1);
1057 		lwkt_serialize_exit(&rxr->rx_serialize);
1058 	}
1059 
1060 	if (eicr & IX_TX_INTR_MASK) {
1061 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1062 
1063 		lwkt_serialize_enter(&txr->tx_serialize);
1064 		ix_txeof(txr, *(txr->tx_hdr));
1065 		if (!ifsq_is_empty(txr->tx_ifsq))
1066 			ifsq_devstart(txr->tx_ifsq);
1067 		lwkt_serialize_exit(&txr->tx_serialize);
1068 	}
1069 
1070 	if (__predict_false(eicr & IX_EICR_STATUS))
1071 		ix_intr_status(sc, eicr);
1072 
1073 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1074 }
1075 
1076 static void
1077 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1078 {
1079 	struct ix_softc *sc = ifp->if_softc;
1080 
1081 	ix_update_link_status(sc);
1082 
1083 	ifmr->ifm_status = IFM_AVALID;
1084 	ifmr->ifm_active = IFM_ETHER;
1085 
1086 	if (!sc->link_active)
1087 		return;
1088 
1089 	ifmr->ifm_status |= IFM_ACTIVE;
1090 
1091 	switch (sc->link_speed) {
1092 	case IXGBE_LINK_SPEED_100_FULL:
1093 		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1094 		break;
1095 	case IXGBE_LINK_SPEED_1GB_FULL:
1096 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1097 		break;
1098 	case IXGBE_LINK_SPEED_10GB_FULL:
1099 		ifmr->ifm_active |= sc->optics | IFM_FDX;
1100 		break;
1101 	}
1102 }
1103 
1104 static int
1105 ix_media_change(struct ifnet *ifp)
1106 {
1107 	struct ix_softc *sc = ifp->if_softc;
1108 	struct ifmedia *ifm = &sc->media;
1109 
1110 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1111 		return EINVAL;
1112 
1113 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1114 	case IFM_AUTO:
1115 		sc->hw.phy.autoneg_advertised =
1116 		    IXGBE_LINK_SPEED_100_FULL |
1117 		    IXGBE_LINK_SPEED_1GB_FULL |
1118 		    IXGBE_LINK_SPEED_10GB_FULL;
1119 		break;
1120 	default:
1121 		if_printf(ifp, "Only auto media type\n");
1122 		return EINVAL;
1123 	}
1124 	return 0;
1125 }
1126 
1127 static __inline int
1128 ix_tso_pullup(struct mbuf **mp)
1129 {
1130 	int hoff, iphlen, thoff;
1131 	struct mbuf *m;
1132 
1133 	m = *mp;
1134 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1135 
1136 	iphlen = m->m_pkthdr.csum_iphlen;
1137 	thoff = m->m_pkthdr.csum_thlen;
1138 	hoff = m->m_pkthdr.csum_lhlen;
1139 
1140 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1141 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1142 	KASSERT(hoff > 0, ("invalid ether hlen"));
1143 
1144 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1145 		m = m_pullup(m, hoff + iphlen + thoff);
1146 		if (m == NULL) {
1147 			*mp = NULL;
1148 			return ENOBUFS;
1149 		}
1150 		*mp = m;
1151 	}
1152 	return 0;
1153 }
1154 
1155 static int
1156 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1157     uint16_t *segs_used, int *idx)
1158 {
1159 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1160 	int i, j, error, nsegs, first, maxsegs;
1161 	struct mbuf *m_head = *m_headp;
1162 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1163 	bus_dmamap_t map;
1164 	struct ix_tx_buf *txbuf;
1165 	union ixgbe_adv_tx_desc *txd = NULL;
1166 
1167 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1168 		error = ix_tso_pullup(m_headp);
1169 		if (__predict_false(error))
1170 			return error;
1171 		m_head = *m_headp;
1172 	}
1173 
1174 	/* Basic descriptor defines */
1175 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1176 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1177 
1178 	if (m_head->m_flags & M_VLANTAG)
1179 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1180 
1181 	/*
1182 	 * Important to capture the first descriptor
1183 	 * used because it will contain the index of
1184 	 * the one we tell the hardware to report back
1185 	 */
1186 	first = txr->tx_next_avail;
1187 	txbuf = &txr->tx_buf[first];
1188 	map = txbuf->map;
1189 
1190 	/*
1191 	 * Map the packet for DMA.
1192 	 */
1193 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1194 	if (maxsegs > IX_MAX_SCATTER)
1195 		maxsegs = IX_MAX_SCATTER;
1196 
1197 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1198 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1199 	if (__predict_false(error)) {
1200 		m_freem(*m_headp);
1201 		*m_headp = NULL;
1202 		return error;
1203 	}
1204 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1205 
1206 	m_head = *m_headp;
1207 
1208 	/*
1209 	 * Set up the appropriate offload context if requested,
1210 	 * this may consume one TX descriptor.
1211 	 */
1212 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1213 		(*segs_used)++;
1214 		txr->tx_nsegs++;
1215 	}
1216 
1217 	*segs_used += nsegs;
1218 	txr->tx_nsegs += nsegs;
1219 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1220 		/*
1221 		 * Report Status (RS) is turned on every intr_nsegs
1222 		 * descriptors (roughly).
1223 		 */
1224 		txr->tx_nsegs = 0;
1225 		cmd_rs = IXGBE_TXD_CMD_RS;
1226 	}
1227 
1228 	i = txr->tx_next_avail;
1229 	for (j = 0; j < nsegs; j++) {
1230 		bus_size_t seglen;
1231 		bus_addr_t segaddr;
1232 
1233 		txbuf = &txr->tx_buf[i];
1234 		txd = &txr->tx_base[i];
1235 		seglen = segs[j].ds_len;
1236 		segaddr = htole64(segs[j].ds_addr);
1237 
1238 		txd->read.buffer_addr = segaddr;
1239 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1240 		    cmd_type_len |seglen);
1241 		txd->read.olinfo_status = htole32(olinfo_status);
1242 
1243 		if (++i == txr->tx_ndesc)
1244 			i = 0;
1245 	}
1246 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1247 
1248 	txr->tx_avail -= nsegs;
1249 	txr->tx_next_avail = i;
1250 
1251 	txbuf->m_head = m_head;
1252 	txr->tx_buf[first].map = txbuf->map;
1253 	txbuf->map = map;
1254 
1255 	/*
1256 	 * Defer TDT updating, until enough descrptors are setup
1257 	 */
1258 	*idx = i;
1259 
1260 	return 0;
1261 }
1262 
1263 static void
1264 ix_set_promisc(struct ix_softc *sc)
1265 {
1266 	struct ifnet *ifp = &sc->arpcom.ac_if;
1267 	uint32_t reg_rctl;
1268 	int mcnt = 0;
1269 
1270 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1271 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1272 	if (ifp->if_flags & IFF_ALLMULTI) {
1273 		mcnt = IX_MAX_MCASTADDR;
1274 	} else {
1275 		struct ifmultiaddr *ifma;
1276 
1277 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1278 			if (ifma->ifma_addr->sa_family != AF_LINK)
1279 				continue;
1280 			if (mcnt == IX_MAX_MCASTADDR)
1281 				break;
1282 			mcnt++;
1283 		}
1284 	}
1285 	if (mcnt < IX_MAX_MCASTADDR)
1286 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1287 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1288 
1289 	if (ifp->if_flags & IFF_PROMISC) {
1290 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1291 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1292 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1293 		reg_rctl |= IXGBE_FCTRL_MPE;
1294 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1295 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1296 	}
1297 }
1298 
1299 static void
1300 ix_set_multi(struct ix_softc *sc)
1301 {
1302 	struct ifnet *ifp = &sc->arpcom.ac_if;
1303 	struct ifmultiaddr *ifma;
1304 	uint32_t fctrl;
1305 	uint8_t	*mta;
1306 	int mcnt = 0;
1307 
1308 	mta = sc->mta;
1309 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1310 
1311 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1312 		if (ifma->ifma_addr->sa_family != AF_LINK)
1313 			continue;
1314 		if (mcnt == IX_MAX_MCASTADDR)
1315 			break;
1316 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1317 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1318 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1319 		mcnt++;
1320 	}
1321 
1322 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1323 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1324 	if (ifp->if_flags & IFF_PROMISC) {
1325 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1326 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1327 		fctrl |= IXGBE_FCTRL_MPE;
1328 		fctrl &= ~IXGBE_FCTRL_UPE;
1329 	} else {
1330 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1331 	}
1332 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1333 
1334 	if (mcnt < IX_MAX_MCASTADDR) {
1335 		ixgbe_update_mc_addr_list(&sc->hw,
1336 		    mta, mcnt, ix_mc_array_itr, TRUE);
1337 	}
1338 }
1339 
1340 /*
1341  * This is an iterator function now needed by the multicast
1342  * shared code. It simply feeds the shared code routine the
1343  * addresses in the array of ix_set_multi() one by one.
1344  */
1345 static uint8_t *
1346 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1347 {
1348 	uint8_t *addr = *update_ptr;
1349 	uint8_t *newptr;
1350 	*vmdq = 0;
1351 
1352 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1353 	*update_ptr = newptr;
1354 	return addr;
1355 }
1356 
1357 static void
1358 ix_timer(void *arg)
1359 {
1360 	struct ix_softc *sc = arg;
1361 
1362 	lwkt_serialize_enter(&sc->main_serialize);
1363 
1364 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1365 		lwkt_serialize_exit(&sc->main_serialize);
1366 		return;
1367 	}
1368 
1369 	/* Check for pluggable optics */
1370 	if (sc->sfp_probe) {
1371 		if (!ix_sfp_probe(sc))
1372 			goto done; /* Nothing to do */
1373 	}
1374 
1375 	ix_update_link_status(sc);
1376 	ix_update_stats(sc);
1377 
1378 done:
1379 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1380 	lwkt_serialize_exit(&sc->main_serialize);
1381 }
1382 
1383 static void
1384 ix_update_link_status(struct ix_softc *sc)
1385 {
1386 	struct ifnet *ifp = &sc->arpcom.ac_if;
1387 
1388 	if (sc->link_up) {
1389 		if (sc->link_active == FALSE) {
1390 			if (bootverbose) {
1391 				if_printf(ifp, "Link is up %d Gbps %s\n",
1392 				    sc->link_speed == 128 ? 10 : 1,
1393 				    "Full Duplex");
1394 			}
1395 			sc->link_active = TRUE;
1396 
1397 			/* Update any Flow Control changes */
1398 			ixgbe_fc_enable(&sc->hw);
1399 
1400 			ifp->if_link_state = LINK_STATE_UP;
1401 			if_link_state_change(ifp);
1402 		}
1403 	} else { /* Link down */
1404 		if (sc->link_active == TRUE) {
1405 			if (bootverbose)
1406 				if_printf(ifp, "Link is Down\n");
1407 			ifp->if_link_state = LINK_STATE_DOWN;
1408 			if_link_state_change(ifp);
1409 
1410 			sc->link_active = FALSE;
1411 		}
1412 	}
1413 }
1414 
1415 static void
1416 ix_stop(struct ix_softc *sc)
1417 {
1418 	struct ixgbe_hw *hw = &sc->hw;
1419 	struct ifnet *ifp = &sc->arpcom.ac_if;
1420 	int i;
1421 
1422 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1423 
1424 	ix_disable_intr(sc);
1425 	callout_stop(&sc->timer);
1426 
1427 	ifp->if_flags &= ~IFF_RUNNING;
1428 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1429 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1430 
1431 		ifsq_clr_oactive(txr->tx_ifsq);
1432 		ifsq_watchdog_stop(&txr->tx_watchdog);
1433 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1434 	}
1435 
1436 	ixgbe_reset_hw(hw);
1437 	hw->adapter_stopped = FALSE;
1438 	ixgbe_stop_adapter(hw);
1439 	if (hw->mac.type == ixgbe_mac_82599EB)
1440 		ixgbe_stop_mac_link_on_d3_82599(hw);
1441 	/* Turn off the laser - noop with no optics */
1442 	ixgbe_disable_tx_laser(hw);
1443 
1444 	/* Update the stack */
1445 	sc->link_up = FALSE;
1446 	ix_update_link_status(sc);
1447 
1448 	/* Reprogram the RAR[0] in case user changed it. */
1449 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1450 
1451 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1452 		ix_free_tx_ring(&sc->tx_rings[i]);
1453 
1454 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1455 		ix_free_rx_ring(&sc->rx_rings[i]);
1456 }
1457 
1458 static void
1459 ix_setup_optics(struct ix_softc *sc)
1460 {
1461 	struct ixgbe_hw *hw = &sc->hw;
1462 	int layer;
1463 
1464 	layer = ixgbe_get_supported_physical_layer(hw);
1465 
1466 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1467 		sc->optics = IFM_10G_T;
1468 		return;
1469 	}
1470 
1471 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1472 		sc->optics = IFM_1000_T;
1473 		return;
1474 	}
1475 
1476 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1477 		sc->optics = IFM_1000_SX;
1478 		return;
1479 	}
1480 
1481 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1482 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1483 		sc->optics = IFM_10G_LR;
1484 		return;
1485 	}
1486 
1487 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1488 		sc->optics = IFM_10G_SR;
1489 		return;
1490 	}
1491 
1492 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1493 		sc->optics = IFM_10G_TWINAX;
1494 		return;
1495 	}
1496 
1497 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1498 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1499 		sc->optics = IFM_10G_CX4;
1500 		return;
1501 	}
1502 
1503 	/* If we get here just set the default */
1504 	sc->optics = IFM_ETHER | IFM_AUTO;
1505 }
1506 
1507 static void
1508 ix_setup_ifp(struct ix_softc *sc)
1509 {
1510 	struct ixgbe_hw *hw = &sc->hw;
1511 	struct ifnet *ifp = &sc->arpcom.ac_if;
1512 	int i;
1513 
1514 	ifp->if_baudrate = IF_Gbps(10UL);
1515 
1516 	ifp->if_softc = sc;
1517 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1518 	ifp->if_init = ix_init;
1519 	ifp->if_ioctl = ix_ioctl;
1520 	ifp->if_start = ix_start;
1521 	ifp->if_serialize = ix_serialize;
1522 	ifp->if_deserialize = ix_deserialize;
1523 	ifp->if_tryserialize = ix_tryserialize;
1524 #ifdef INVARIANTS
1525 	ifp->if_serialize_assert = ix_serialize_assert;
1526 #endif
1527 #ifdef IFPOLL_ENABLE
1528 	ifp->if_npoll = ix_npoll;
1529 #endif
1530 
1531 	/* Increase TSO burst length */
1532 	ifp->if_tsolen = (8 * ETHERMTU);
1533 
1534 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1535 	ifq_set_ready(&ifp->if_snd);
1536 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1537 
1538 	ifp->if_mapsubq = ifq_mapsubq_mask;
1539 	ifq_set_subq_mask(&ifp->if_snd, 0);
1540 
1541 	ether_ifattach(ifp, hw->mac.addr, NULL);
1542 
1543 	ifp->if_capabilities =
1544 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1545 	if (IX_ENABLE_HWRSS(sc))
1546 		ifp->if_capabilities |= IFCAP_RSS;
1547 	ifp->if_capenable = ifp->if_capabilities;
1548 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1549 
1550 	/*
1551 	 * Tell the upper layer(s) we support long frames.
1552 	 */
1553 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1554 
1555 	/* Setup TX rings and subqueues */
1556 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1557 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1558 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1559 
1560 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1561 		ifsq_set_priv(ifsq, txr);
1562 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1563 		txr->tx_ifsq = ifsq;
1564 
1565 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1566 	}
1567 
1568 	/*
1569 	 * Specify the media types supported by this adapter and register
1570 	 * callbacks to update media and link information
1571 	 */
1572 	ifmedia_add(&sc->media, IFM_ETHER | sc->optics, 0, NULL);
1573 	ifmedia_set(&sc->media, IFM_ETHER | sc->optics);
1574 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1575 		ifmedia_add(&sc->media,
1576 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1577 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1578 	}
1579 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1580 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1581 }
1582 
1583 static boolean_t
1584 ix_is_sfp(const struct ixgbe_hw *hw)
1585 {
1586 	switch (hw->phy.type) {
1587 	case ixgbe_phy_sfp_avago:
1588 	case ixgbe_phy_sfp_ftl:
1589 	case ixgbe_phy_sfp_intel:
1590 	case ixgbe_phy_sfp_unknown:
1591 	case ixgbe_phy_sfp_passive_tyco:
1592 	case ixgbe_phy_sfp_passive_unknown:
1593 		return TRUE;
1594 	default:
1595 		return FALSE;
1596 	}
1597 }
1598 
1599 static void
1600 ix_config_link(struct ix_softc *sc)
1601 {
1602 	struct ixgbe_hw *hw = &sc->hw;
1603 	boolean_t sfp;
1604 
1605 	sfp = ix_is_sfp(hw);
1606 	if (sfp) {
1607 		if (hw->phy.multispeed_fiber) {
1608 			hw->mac.ops.setup_sfp(hw);
1609 			ixgbe_enable_tx_laser(hw);
1610 			ix_handle_msf(sc);
1611 		} else {
1612 			ix_handle_mod(sc);
1613 		}
1614 	} else {
1615 		uint32_t autoneg, err = 0;
1616 
1617 		if (hw->mac.ops.check_link != NULL) {
1618 			err = ixgbe_check_link(hw, &sc->link_speed,
1619 			    &sc->link_up, FALSE);
1620 			if (err)
1621 				return;
1622 		}
1623 
1624 		autoneg = hw->phy.autoneg_advertised;
1625 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1626 			bool negotiate;
1627 
1628 			err = hw->mac.ops.get_link_capabilities(hw,
1629 			    &autoneg, &negotiate);
1630 			if (err)
1631 				return;
1632 		}
1633 
1634 		if (hw->mac.ops.setup_link != NULL) {
1635 			err = hw->mac.ops.setup_link(hw,
1636 			    autoneg, sc->link_up);
1637 			if (err)
1638 				return;
1639 		}
1640 	}
1641 }
1642 
1643 static int
1644 ix_alloc_rings(struct ix_softc *sc)
1645 {
1646 	int error, i;
1647 
1648 	/*
1649 	 * Create top level busdma tag
1650 	 */
1651 	error = bus_dma_tag_create(NULL, 1, 0,
1652 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1653 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1654 	    &sc->parent_tag);
1655 	if (error) {
1656 		device_printf(sc->dev, "could not create top level DMA tag\n");
1657 		return error;
1658 	}
1659 
1660 	/*
1661 	 * Allocate TX descriptor rings and buffers
1662 	 */
1663 	sc->tx_rings = kmalloc_cachealign(
1664 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1665 	    M_DEVBUF, M_WAITOK | M_ZERO);
1666 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1667 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1668 
1669 		txr->tx_sc = sc;
1670 		txr->tx_idx = i;
1671 		txr->tx_intr_vec = -1;
1672 		lwkt_serialize_init(&txr->tx_serialize);
1673 
1674 		error = ix_create_tx_ring(txr);
1675 		if (error)
1676 			return error;
1677 	}
1678 
1679 	/*
1680 	 * Allocate RX descriptor rings and buffers
1681 	 */
1682 	sc->rx_rings = kmalloc_cachealign(
1683 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1684 	    M_DEVBUF, M_WAITOK | M_ZERO);
1685 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1686 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1687 
1688 		rxr->rx_sc = sc;
1689 		rxr->rx_idx = i;
1690 		rxr->rx_intr_vec = -1;
1691 		lwkt_serialize_init(&rxr->rx_serialize);
1692 
1693 		error = ix_create_rx_ring(rxr);
1694 		if (error)
1695 			return error;
1696 	}
1697 
1698 	return 0;
1699 }
1700 
1701 static int
1702 ix_create_tx_ring(struct ix_tx_ring *txr)
1703 {
1704 	int error, i, tsize, ntxd;
1705 
1706 	/*
1707 	 * Validate number of transmit descriptors.  It must not exceed
1708 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1709 	 */
1710 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1711 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1712 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1713 		device_printf(txr->tx_sc->dev,
1714 		    "Using %d TX descriptors instead of %d!\n",
1715 		    IX_DEF_TXD, ntxd);
1716 		txr->tx_ndesc = IX_DEF_TXD;
1717 	} else {
1718 		txr->tx_ndesc = ntxd;
1719 	}
1720 
1721 	/*
1722 	 * Allocate TX head write-back buffer
1723 	 */
1724 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1725 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1726 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1727 	if (txr->tx_hdr == NULL) {
1728 		device_printf(txr->tx_sc->dev,
1729 		    "Unable to allocate TX head write-back buffer\n");
1730 		return ENOMEM;
1731 	}
1732 
1733 	/*
1734 	 * Allocate TX descriptor ring
1735 	 */
1736 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1737 	    IX_DBA_ALIGN);
1738 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1739 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1740 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1741 	if (txr->tx_base == NULL) {
1742 		device_printf(txr->tx_sc->dev,
1743 		    "Unable to allocate TX Descriptor memory\n");
1744 		return ENOMEM;
1745 	}
1746 
1747 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1748 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1749 
1750 	/*
1751 	 * Create DMA tag for TX buffers
1752 	 */
1753 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1754 	    1, 0,		/* alignment, bounds */
1755 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1756 	    BUS_SPACE_MAXADDR,	/* highaddr */
1757 	    NULL, NULL,		/* filter, filterarg */
1758 	    IX_TSO_SIZE,	/* maxsize */
1759 	    IX_MAX_SCATTER,	/* nsegments */
1760 	    PAGE_SIZE,		/* maxsegsize */
1761 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1762 	    BUS_DMA_ONEBPAGE,	/* flags */
1763 	    &txr->tx_tag);
1764 	if (error) {
1765 		device_printf(txr->tx_sc->dev,
1766 		    "Unable to allocate TX DMA tag\n");
1767 		kfree(txr->tx_buf, M_DEVBUF);
1768 		txr->tx_buf = NULL;
1769 		return error;
1770 	}
1771 
1772 	/*
1773 	 * Create DMA maps for TX buffers
1774 	 */
1775 	for (i = 0; i < txr->tx_ndesc; ++i) {
1776 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1777 
1778 		error = bus_dmamap_create(txr->tx_tag,
1779 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1780 		if (error) {
1781 			device_printf(txr->tx_sc->dev,
1782 			    "Unable to create TX DMA map\n");
1783 			ix_destroy_tx_ring(txr, i);
1784 			return error;
1785 		}
1786 	}
1787 
1788 	/*
1789 	 * Initialize various watermark
1790 	 */
1791 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1792 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1793 
1794 	return 0;
1795 }
1796 
1797 static void
1798 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1799 {
1800 	int i;
1801 
1802 	if (txr->tx_hdr != NULL) {
1803 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1804 		bus_dmamem_free(txr->tx_hdr_dtag,
1805 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1806 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1807 		txr->tx_hdr = NULL;
1808 	}
1809 
1810 	if (txr->tx_base != NULL) {
1811 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1812 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1813 		    txr->tx_base_map);
1814 		bus_dma_tag_destroy(txr->tx_base_dtag);
1815 		txr->tx_base = NULL;
1816 	}
1817 
1818 	if (txr->tx_buf == NULL)
1819 		return;
1820 
1821 	for (i = 0; i < ndesc; ++i) {
1822 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1823 
1824 		KKASSERT(txbuf->m_head == NULL);
1825 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1826 	}
1827 	bus_dma_tag_destroy(txr->tx_tag);
1828 
1829 	kfree(txr->tx_buf, M_DEVBUF);
1830 	txr->tx_buf = NULL;
1831 }
1832 
1833 static void
1834 ix_init_tx_ring(struct ix_tx_ring *txr)
1835 {
1836 	/* Clear the old ring contents */
1837 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1838 
1839 	/* Clear TX head write-back buffer */
1840 	*(txr->tx_hdr) = 0;
1841 
1842 	/* Reset indices */
1843 	txr->tx_next_avail = 0;
1844 	txr->tx_next_clean = 0;
1845 	txr->tx_nsegs = 0;
1846 
1847 	/* Set number of descriptors available */
1848 	txr->tx_avail = txr->tx_ndesc;
1849 
1850 	/* Enable this TX ring */
1851 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1852 }
1853 
1854 static void
1855 ix_init_tx_unit(struct ix_softc *sc)
1856 {
1857 	struct ixgbe_hw	*hw = &sc->hw;
1858 	int i;
1859 
1860 	/*
1861 	 * Setup the Base and Length of the Tx Descriptor Ring
1862 	 */
1863 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1864 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1865 		uint64_t tdba = txr->tx_base_paddr;
1866 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1867 		uint32_t txctrl;
1868 
1869 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1870 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1871 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1872 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1873 
1874 		/* Setup the HW Tx Head and Tail descriptor pointers */
1875 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1876 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1877 
1878 		/* Disable TX head write-back relax ordering */
1879 		switch (hw->mac.type) {
1880 		case ixgbe_mac_82598EB:
1881 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1882 			break;
1883 		case ixgbe_mac_82599EB:
1884 		case ixgbe_mac_X540:
1885 		default:
1886 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1887 			break;
1888 		}
1889 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1890 		switch (hw->mac.type) {
1891 		case ixgbe_mac_82598EB:
1892 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1893 			break;
1894 		case ixgbe_mac_82599EB:
1895 		case ixgbe_mac_X540:
1896 		default:
1897 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1898 			break;
1899 		}
1900 
1901 		/* Enable TX head write-back */
1902 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1903 		    (uint32_t)(hdr_paddr >> 32));
1904 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1905 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1906 	}
1907 
1908 	if (hw->mac.type != ixgbe_mac_82598EB) {
1909 		uint32_t dmatxctl, rttdcs;
1910 
1911 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1912 		dmatxctl |= IXGBE_DMATXCTL_TE;
1913 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1914 
1915 		/* Disable arbiter to set MTQC */
1916 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1917 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
1918 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1919 
1920 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1921 
1922 		/* Reenable aribter */
1923 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1924 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1925 	}
1926 }
1927 
1928 static int
1929 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1930     uint32_t *cmd_type_len, uint32_t *olinfo_status)
1931 {
1932 	struct ixgbe_adv_tx_context_desc *TXD;
1933 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1934 	int ehdrlen, ip_hlen = 0, ctxd;
1935 	boolean_t offload = TRUE;
1936 
1937 	/* First check if TSO is to be used */
1938 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
1939 		return ix_tso_ctx_setup(txr, mp,
1940 		    cmd_type_len, olinfo_status);
1941 	}
1942 
1943 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
1944 		offload = FALSE;
1945 
1946 	/* Indicate the whole packet as payload when not doing TSO */
1947 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
1948 
1949 	/*
1950 	 * In advanced descriptors the vlan tag must be placed into the
1951 	 * context descriptor.  Hence we need to make one even if not
1952 	 * doing checksum offloads.
1953 	 */
1954 	if (mp->m_flags & M_VLANTAG) {
1955 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1956 		    IXGBE_ADVTXD_VLAN_SHIFT;
1957 	} else if (!offload) {
1958 		/* No TX descriptor is consumed */
1959 		return 0;
1960 	}
1961 
1962 	/* Set the ether header length */
1963 	ehdrlen = mp->m_pkthdr.csum_lhlen;
1964 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1965 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1966 
1967 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
1968 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1969 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1970 		ip_hlen = mp->m_pkthdr.csum_iphlen;
1971 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1972 	}
1973 	vlan_macip_lens |= ip_hlen;
1974 
1975 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1976 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
1977 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1978 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
1979 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
1980 
1981 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1982 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1983 
1984 	/* Now ready a context descriptor */
1985 	ctxd = txr->tx_next_avail;
1986 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1987 
1988 	/* Now copy bits into descriptor */
1989 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1990 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1991 	TXD->seqnum_seed = htole32(0);
1992 	TXD->mss_l4len_idx = htole32(0);
1993 
1994 	/* We've consumed the first desc, adjust counters */
1995 	if (++ctxd == txr->tx_ndesc)
1996 		ctxd = 0;
1997 	txr->tx_next_avail = ctxd;
1998 	--txr->tx_avail;
1999 
2000 	/* One TX descriptor is consumed */
2001 	return 1;
2002 }
2003 
2004 static int
2005 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2006     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2007 {
2008 	struct ixgbe_adv_tx_context_desc *TXD;
2009 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2010 	uint32_t mss_l4len_idx = 0, paylen;
2011 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2012 
2013 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2014 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2015 
2016 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2017 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2018 
2019 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2020 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2021 
2022 	ctxd = txr->tx_next_avail;
2023 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2024 
2025 	if (mp->m_flags & M_VLANTAG) {
2026 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2027 		    IXGBE_ADVTXD_VLAN_SHIFT;
2028 	}
2029 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2030 	vlan_macip_lens |= ip_hlen;
2031 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2032 
2033 	/* ADV DTYPE TUCMD */
2034 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2035 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2036 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2037 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2038 
2039 	/* MSS L4LEN IDX */
2040 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2041 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2042 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2043 
2044 	TXD->seqnum_seed = htole32(0);
2045 
2046 	if (++ctxd == txr->tx_ndesc)
2047 		ctxd = 0;
2048 
2049 	txr->tx_avail--;
2050 	txr->tx_next_avail = ctxd;
2051 
2052 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2053 
2054 	/* This is used in the transmit desc in encap */
2055 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2056 
2057 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2058 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2059 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2060 
2061 	/* One TX descriptor is consumed */
2062 	return 1;
2063 }
2064 
2065 static void
2066 ix_txeof(struct ix_tx_ring *txr, int hdr)
2067 {
2068 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
2069 	int first, avail;
2070 
2071 	if (txr->tx_avail == txr->tx_ndesc)
2072 		return;
2073 
2074 	first = txr->tx_next_clean;
2075 	if (first == hdr)
2076 		return;
2077 
2078 	avail = txr->tx_avail;
2079 	while (first != hdr) {
2080 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2081 
2082 		++avail;
2083 		if (txbuf->m_head) {
2084 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2085 			m_freem(txbuf->m_head);
2086 			txbuf->m_head = NULL;
2087 			IFNET_STAT_INC(ifp, opackets, 1);
2088 		}
2089 		if (++first == txr->tx_ndesc)
2090 			first = 0;
2091 	}
2092 	txr->tx_next_clean = first;
2093 	txr->tx_avail = avail;
2094 
2095 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2096 		ifsq_clr_oactive(txr->tx_ifsq);
2097 		txr->tx_watchdog.wd_timer = 0;
2098 	}
2099 }
2100 
2101 static int
2102 ix_create_rx_ring(struct ix_rx_ring *rxr)
2103 {
2104 	int i, rsize, error, nrxd;
2105 
2106 	/*
2107 	 * Validate number of receive descriptors.  It must not exceed
2108 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2109 	 */
2110 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2111 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2112 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2113 		device_printf(rxr->rx_sc->dev,
2114 		    "Using %d RX descriptors instead of %d!\n",
2115 		    IX_DEF_RXD, nrxd);
2116 		rxr->rx_ndesc = IX_DEF_RXD;
2117 	} else {
2118 		rxr->rx_ndesc = nrxd;
2119 	}
2120 
2121 	/*
2122 	 * Allocate RX descriptor ring
2123 	 */
2124 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2125 	    IX_DBA_ALIGN);
2126 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2127 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2128 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2129 	if (rxr->rx_base == NULL) {
2130 		device_printf(rxr->rx_sc->dev,
2131 		    "Unable to allocate TX Descriptor memory\n");
2132 		return ENOMEM;
2133 	}
2134 
2135 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2136 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2137 
2138 	/*
2139 	 * Create DMA tag for RX buffers
2140 	 */
2141 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2142 	    1, 0,		/* alignment, bounds */
2143 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2144 	    BUS_SPACE_MAXADDR,	/* highaddr */
2145 	    NULL, NULL,		/* filter, filterarg */
2146 	    PAGE_SIZE,		/* maxsize */
2147 	    1,			/* nsegments */
2148 	    PAGE_SIZE,		/* maxsegsize */
2149 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2150 	    &rxr->rx_tag);
2151 	if (error) {
2152 		device_printf(rxr->rx_sc->dev,
2153 		    "Unable to create RX DMA tag\n");
2154 		kfree(rxr->rx_buf, M_DEVBUF);
2155 		rxr->rx_buf = NULL;
2156 		return error;
2157 	}
2158 
2159 	/*
2160 	 * Create spare DMA map for RX buffers
2161 	 */
2162 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2163 	    &rxr->rx_sparemap);
2164 	if (error) {
2165 		device_printf(rxr->rx_sc->dev,
2166 		    "Unable to create spare RX DMA map\n");
2167 		bus_dma_tag_destroy(rxr->rx_tag);
2168 		kfree(rxr->rx_buf, M_DEVBUF);
2169 		rxr->rx_buf = NULL;
2170 		return error;
2171 	}
2172 
2173 	/*
2174 	 * Create DMA maps for RX buffers
2175 	 */
2176 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2177 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2178 
2179 		error = bus_dmamap_create(rxr->rx_tag,
2180 		    BUS_DMA_WAITOK, &rxbuf->map);
2181 		if (error) {
2182 			device_printf(rxr->rx_sc->dev,
2183 			    "Unable to create RX dma map\n");
2184 			ix_destroy_rx_ring(rxr, i);
2185 			return error;
2186 		}
2187 	}
2188 
2189 	/*
2190 	 * Initialize various watermark
2191 	 */
2192 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2193 
2194 	return 0;
2195 }
2196 
2197 static void
2198 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2199 {
2200 	int i;
2201 
2202 	if (rxr->rx_base != NULL) {
2203 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2204 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2205 		    rxr->rx_base_map);
2206 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2207 		rxr->rx_base = NULL;
2208 	}
2209 
2210 	if (rxr->rx_buf == NULL)
2211 		return;
2212 
2213 	for (i = 0; i < ndesc; ++i) {
2214 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2215 
2216 		KKASSERT(rxbuf->m_head == NULL);
2217 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2218 	}
2219 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2220 	bus_dma_tag_destroy(rxr->rx_tag);
2221 
2222 	kfree(rxr->rx_buf, M_DEVBUF);
2223 	rxr->rx_buf = NULL;
2224 }
2225 
2226 /*
2227 ** Used to detect a descriptor that has
2228 ** been merged by Hardware RSC.
2229 */
2230 static __inline uint32_t
2231 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2232 {
2233 	return (le32toh(rx->wb.lower.lo_dword.data) &
2234 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2235 }
2236 
2237 #if 0
2238 /*********************************************************************
2239  *
2240  *  Initialize Hardware RSC (LRO) feature on 82599
2241  *  for an RX ring, this is toggled by the LRO capability
2242  *  even though it is transparent to the stack.
2243  *
2244  *  NOTE: since this HW feature only works with IPV4 and
2245  *        our testing has shown soft LRO to be as effective
2246  *        I have decided to disable this by default.
2247  *
2248  **********************************************************************/
2249 static void
2250 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2251 {
2252 	struct	ix_softc 	*sc = rxr->rx_sc;
2253 	struct	ixgbe_hw	*hw = &sc->hw;
2254 	uint32_t			rscctrl, rdrxctl;
2255 
2256 #if 0
2257 	/* If turning LRO/RSC off we need to disable it */
2258 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2259 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2260 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2261 		return;
2262 	}
2263 #endif
2264 
2265 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2266 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2267 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2268 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2269 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2270 
2271 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2272 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2273 	/*
2274 	** Limit the total number of descriptors that
2275 	** can be combined, so it does not exceed 64K
2276 	*/
2277 	if (rxr->mbuf_sz == MCLBYTES)
2278 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2279 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2280 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2281 	else if (rxr->mbuf_sz == MJUM9BYTES)
2282 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2283 	else  /* Using 16K cluster */
2284 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2285 
2286 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2287 
2288 	/* Enable TCP header recognition */
2289 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2290 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2291 	    IXGBE_PSRTYPE_TCPHDR));
2292 
2293 	/* Disable RSC for ACK packets */
2294 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2295 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2296 
2297 	rxr->hw_rsc = TRUE;
2298 }
2299 #endif
2300 
2301 static int
2302 ix_init_rx_ring(struct ix_rx_ring *rxr)
2303 {
2304 	int i;
2305 
2306 	/* Clear the ring contents */
2307 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2308 
2309 	/* XXX we need JUMPAGESIZE for RSC too */
2310 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2311 		rxr->rx_mbuf_sz = MCLBYTES;
2312 	else
2313 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2314 
2315 	/* Now replenish the mbufs */
2316 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2317 		int error;
2318 
2319 		error = ix_newbuf(rxr, i, TRUE);
2320 		if (error)
2321 			return error;
2322 	}
2323 
2324 	/* Setup our descriptor indices */
2325 	rxr->rx_next_check = 0;
2326 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2327 
2328 #if 0
2329 	/*
2330 	** Now set up the LRO interface:
2331 	*/
2332 	if (ixgbe_rsc_enable)
2333 		ix_setup_hw_rsc(rxr);
2334 #endif
2335 
2336 	return 0;
2337 }
2338 
2339 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2340 
2341 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2342 
2343 static void
2344 ix_init_rx_unit(struct ix_softc *sc)
2345 {
2346 	struct ixgbe_hw	*hw = &sc->hw;
2347 	struct ifnet *ifp = &sc->arpcom.ac_if;
2348 	uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2349 	int i;
2350 
2351 	/*
2352 	 * Make sure receives are disabled while setting up the descriptor ring
2353 	 */
2354 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2355 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2356 
2357 	/* Enable broadcasts */
2358 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2359 	fctrl |= IXGBE_FCTRL_BAM;
2360 	fctrl |= IXGBE_FCTRL_DPF;
2361 	fctrl |= IXGBE_FCTRL_PMCF;
2362 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2363 
2364 	/* Set for Jumbo Frames? */
2365 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2366 	if (ifp->if_mtu > ETHERMTU)
2367 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2368 	else
2369 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2370 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2371 
2372 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2373 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2374 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2375 
2376 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2377 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2378 		uint64_t rdba = rxr->rx_base_paddr;
2379 		uint32_t srrctl;
2380 
2381 		/* Setup the Base and Length of the Rx Descriptor Ring */
2382 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2383 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2384 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2385 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2386 
2387 		/*
2388 		 * Set up the SRRCTL register
2389 		 */
2390 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2391 
2392 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2393 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2394 		srrctl |= bufsz;
2395 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2396 		if (sc->rx_ring_inuse > 1) {
2397 			/* See the commend near ix_enable_rx_drop() */
2398 			switch (sc->fc) {
2399 			case ixgbe_fc_rx_pause:
2400 			case ixgbe_fc_tx_pause:
2401 			case ixgbe_fc_full:
2402 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2403 				if (i == 0 && bootverbose) {
2404 					if_printf(ifp, "flow control %d, "
2405 					    "disable RX drop\n", sc->fc);
2406 				}
2407 				break;
2408 
2409 			case ixgbe_fc_none:
2410 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2411 				if (i == 0 && bootverbose) {
2412 					if_printf(ifp, "flow control %d, "
2413 					    "enable RX drop\n", sc->fc);
2414 				}
2415 				break;
2416 
2417 			default:
2418 				break;
2419 			}
2420 		}
2421 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2422 
2423 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2424 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2425 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2426 	}
2427 
2428 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2429 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2430 
2431 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2432 
2433 	/*
2434 	 * Setup RSS
2435 	 */
2436 	if (IX_ENABLE_HWRSS(sc)) {
2437 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2438 		int j, r;
2439 
2440 		/*
2441 		 * NOTE:
2442 		 * When we reach here, RSS has already been disabled
2443 		 * in ix_stop(), so we could safely configure RSS key
2444 		 * and redirect table.
2445 		 */
2446 
2447 		/*
2448 		 * Configure RSS key
2449 		 */
2450 		toeplitz_get_key(key, sizeof(key));
2451 		for (i = 0; i < IX_NRSSRK; ++i) {
2452 			uint32_t rssrk;
2453 
2454 			rssrk = IX_RSSRK_VAL(key, i);
2455 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2456 			    i, rssrk);
2457 
2458 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2459 		}
2460 
2461 		/*
2462 		 * Configure RSS redirect table in following fashion:
2463 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2464 		 */
2465 		r = 0;
2466 		for (j = 0; j < IX_NRETA; ++j) {
2467 			uint32_t reta = 0;
2468 
2469 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2470 				uint32_t q;
2471 
2472 				q = r % sc->rx_ring_inuse;
2473 				reta |= q << (8 * i);
2474 				++r;
2475 			}
2476 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2477 			IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2478 		}
2479 
2480 		/*
2481 		 * Enable multiple receive queues.
2482 		 * Enable IPv4 RSS standard hash functions.
2483 		 */
2484 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2485 		    IXGBE_MRQC_RSSEN |
2486 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2487 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2488 
2489 		/*
2490 		 * NOTE:
2491 		 * PCSD must be enabled to enable multiple
2492 		 * receive queues.
2493 		 */
2494 		rxcsum |= IXGBE_RXCSUM_PCSD;
2495 	}
2496 
2497 	if (ifp->if_capenable & IFCAP_RXCSUM)
2498 		rxcsum |= IXGBE_RXCSUM_PCSD;
2499 
2500 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2501 }
2502 
2503 static __inline void
2504 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2505 {
2506 	if (--i < 0)
2507 		i = rxr->rx_ndesc - 1;
2508 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2509 }
2510 
2511 static __inline void
2512 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2513 {
2514 	if ((ptype &
2515 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2516 		/* Not IPv4 */
2517 		return;
2518 	}
2519 
2520 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2521 	    IXGBE_RXD_STAT_IPCS)
2522 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2523 
2524 	if ((ptype &
2525 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2526 		/*
2527 		 * - Neither TCP nor UDP
2528 		 * - IPv4 fragment
2529 		 */
2530 		return;
2531 	}
2532 
2533 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2534 	    IXGBE_RXD_STAT_L4CS) {
2535 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2536 		    CSUM_FRAG_NOT_CHECKED;
2537 		mp->m_pkthdr.csum_data = htons(0xffff);
2538 	}
2539 }
2540 
2541 static __inline struct pktinfo *
2542 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2543     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2544 {
2545 	switch (hashtype) {
2546 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2547 		pi->pi_netisr = NETISR_IP;
2548 		pi->pi_flags = 0;
2549 		pi->pi_l3proto = IPPROTO_TCP;
2550 		break;
2551 
2552 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2553 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2554 			/* Not UDP or is fragment */
2555 			return NULL;
2556 		}
2557 		pi->pi_netisr = NETISR_IP;
2558 		pi->pi_flags = 0;
2559 		pi->pi_l3proto = IPPROTO_UDP;
2560 		break;
2561 
2562 	default:
2563 		return NULL;
2564 	}
2565 
2566 	m->m_flags |= M_HASH;
2567 	m->m_pkthdr.hash = toeplitz_hash(hash);
2568 	return pi;
2569 }
2570 
2571 static __inline void
2572 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2573 {
2574 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2575 	rxd->wb.upper.status_error = 0;
2576 }
2577 
2578 static void
2579 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2580 {
2581 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2582 
2583 	/*
2584 	 * XXX discard may not be correct
2585 	 */
2586 	if (eop) {
2587 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2588 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2589 	} else {
2590 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2591 	}
2592 	if (rxbuf->fmp != NULL) {
2593 		m_freem(rxbuf->fmp);
2594 		rxbuf->fmp = NULL;
2595 		rxbuf->lmp = NULL;
2596 	}
2597 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2598 }
2599 
2600 static void
2601 ix_rxeof(struct ix_rx_ring *rxr, int count)
2602 {
2603 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2604 	int i, nsegs = 0, cpuid = mycpuid;
2605 
2606 	i = rxr->rx_next_check;
2607 	while (count != 0) {
2608 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2609 		union ixgbe_adv_rx_desc	*cur;
2610 		struct mbuf *sendmp = NULL, *mp;
2611 		struct pktinfo *pi = NULL, pi0;
2612 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2613 		uint16_t len;
2614 		boolean_t eop;
2615 
2616 		cur = &rxr->rx_base[i];
2617 		staterr = le32toh(cur->wb.upper.status_error);
2618 
2619 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2620 			break;
2621 		++nsegs;
2622 
2623 		rxbuf = &rxr->rx_buf[i];
2624 		mp = rxbuf->m_head;
2625 
2626 		len = le16toh(cur->wb.upper.length);
2627 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2628 		    IXGBE_RXDADV_PKTTYPE_MASK;
2629 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2630 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2631 		    IXGBE_RXDADV_RSSTYPE_MASK;
2632 
2633 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2634 		if (eop)
2635 			--count;
2636 
2637 		/*
2638 		 * Make sure bad packets are discarded
2639 		 */
2640 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2641 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2642 			ix_rx_discard(rxr, i, eop);
2643 			goto next_desc;
2644 		}
2645 
2646 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2647 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2648 			ix_rx_discard(rxr, i, eop);
2649 			goto next_desc;
2650 		}
2651 
2652 		/*
2653 		 * On 82599 which supports a hardware LRO, packets
2654 		 * need not be fragmented across sequential descriptors,
2655 		 * rather the next descriptor is indicated in bits
2656 		 * of the descriptor.  This also means that we might
2657 		 * proceses more than one packet at a time, something
2658 		 * that has never been true before, it required
2659 		 * eliminating global chain pointers in favor of what
2660 		 * we are doing here.
2661 		 */
2662 		if (!eop) {
2663 			int nextp;
2664 
2665 			/*
2666 			 * Figure out the next descriptor
2667 			 * of this frame.
2668 			 */
2669 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2670 				rsc = ix_rsc_count(cur);
2671 			if (rsc) { /* Get hardware index */
2672 				nextp = ((staterr &
2673 				    IXGBE_RXDADV_NEXTP_MASK) >>
2674 				    IXGBE_RXDADV_NEXTP_SHIFT);
2675 			} else { /* Just sequential */
2676 				nextp = i + 1;
2677 				if (nextp == rxr->rx_ndesc)
2678 					nextp = 0;
2679 			}
2680 			nbuf = &rxr->rx_buf[nextp];
2681 			prefetch(nbuf);
2682 		}
2683 		mp->m_len = len;
2684 
2685 		/*
2686 		 * Rather than using the fmp/lmp global pointers
2687 		 * we now keep the head of a packet chain in the
2688 		 * buffer struct and pass this along from one
2689 		 * descriptor to the next, until we get EOP.
2690 		 */
2691 		if (rxbuf->fmp == NULL) {
2692 			mp->m_pkthdr.len = len;
2693 			rxbuf->fmp = mp;
2694 			rxbuf->lmp = mp;
2695 		} else {
2696 			rxbuf->fmp->m_pkthdr.len += len;
2697 			rxbuf->lmp->m_next = mp;
2698 			rxbuf->lmp = mp;
2699 		}
2700 
2701 		if (nbuf != NULL) {
2702 			/*
2703 			 * Not the last fragment of this frame,
2704 			 * pass this fragment list on
2705 			 */
2706 			nbuf->fmp = rxbuf->fmp;
2707 			nbuf->lmp = rxbuf->lmp;
2708 		} else {
2709 			/*
2710 			 * Send this frame
2711 			 */
2712 			sendmp = rxbuf->fmp;
2713 
2714 			sendmp->m_pkthdr.rcvif = ifp;
2715 			IFNET_STAT_INC(ifp, ipackets, 1);
2716 #ifdef IX_RSS_DEBUG
2717 			rxr->rx_pkts++;
2718 #endif
2719 
2720 			/* Process vlan info */
2721 			if (staterr & IXGBE_RXD_STAT_VP) {
2722 				sendmp->m_pkthdr.ether_vlantag =
2723 				    le16toh(cur->wb.upper.vlan);
2724 				sendmp->m_flags |= M_VLANTAG;
2725 			}
2726 			if (ifp->if_capenable & IFCAP_RXCSUM)
2727 				ix_rxcsum(staterr, sendmp, ptype);
2728 			if (ifp->if_capenable & IFCAP_RSS) {
2729 				pi = ix_rssinfo(sendmp, &pi0,
2730 				    hash, hashtype, ptype);
2731 			}
2732 		}
2733 		rxbuf->fmp = NULL;
2734 		rxbuf->lmp = NULL;
2735 next_desc:
2736 		/* Advance our pointers to the next descriptor. */
2737 		if (++i == rxr->rx_ndesc)
2738 			i = 0;
2739 
2740 		if (sendmp != NULL)
2741 			ifp->if_input(ifp, sendmp, pi, cpuid);
2742 
2743 		if (nsegs >= rxr->rx_wreg_nsegs) {
2744 			ix_rx_refresh(rxr, i);
2745 			nsegs = 0;
2746 		}
2747 	}
2748 	rxr->rx_next_check = i;
2749 
2750 	if (nsegs > 0)
2751 		ix_rx_refresh(rxr, i);
2752 }
2753 
2754 static void
2755 ix_set_vlan(struct ix_softc *sc)
2756 {
2757 	struct ixgbe_hw *hw = &sc->hw;
2758 	uint32_t ctrl;
2759 
2760 	if (hw->mac.type == ixgbe_mac_82598EB) {
2761 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2762 		ctrl |= IXGBE_VLNCTRL_VME;
2763 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2764 	} else {
2765 		int i;
2766 
2767 		/*
2768 		 * On 82599 and later chips the VLAN enable is
2769 		 * per queue in RXDCTL
2770 		 */
2771 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2772 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2773 			ctrl |= IXGBE_RXDCTL_VME;
2774 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2775 		}
2776 	}
2777 }
2778 
2779 static void
2780 ix_enable_intr(struct ix_softc *sc)
2781 {
2782 	struct ixgbe_hw	*hw = &sc->hw;
2783 	uint32_t fwsm;
2784 	int i;
2785 
2786 	for (i = 0; i < sc->intr_cnt; ++i)
2787 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2788 
2789 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2790 
2791 	/* Enable Fan Failure detection */
2792 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2793 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2794 
2795 	switch (sc->hw.mac.type) {
2796 	case ixgbe_mac_82599EB:
2797 		sc->intr_mask |= IXGBE_EIMS_ECC;
2798 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2799 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2800 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2801 		break;
2802 
2803 	case ixgbe_mac_X540:
2804 		sc->intr_mask |= IXGBE_EIMS_ECC;
2805 		/* Detect if Thermal Sensor is enabled */
2806 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2807 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2808 			sc->intr_mask |= IXGBE_EIMS_TS;
2809 		/* FALL THROUGH */
2810 	default:
2811 		break;
2812 	}
2813 
2814 	/* With MSI-X we use auto clear for RX and TX rings */
2815 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2816 		/*
2817 		 * There are no EIAC1/EIAC2 for newer chips; the related
2818 		 * bits for TX and RX rings > 16 are always auto clear.
2819 		 *
2820 		 * XXX which bits?  There are _no_ documented EICR1 and
2821 		 * EICR2 at all; only EICR.
2822 		 */
2823 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2824 	} else {
2825 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2826 
2827 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2828 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2829 			sc->intr_mask |= IX_RX1_INTR_MASK;
2830 	}
2831 
2832 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2833 
2834 	/*
2835 	 * Enable RX and TX rings for MSI-X
2836 	 */
2837 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2838 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2839 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2840 
2841 			if (txr->tx_intr_vec >= 0) {
2842 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2843 				    txr->tx_eims_val);
2844 			}
2845 		}
2846 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2847 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2848 
2849 			KKASSERT(rxr->rx_intr_vec >= 0);
2850 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2851 		}
2852 	}
2853 
2854 	IXGBE_WRITE_FLUSH(hw);
2855 }
2856 
2857 static void
2858 ix_disable_intr(struct ix_softc *sc)
2859 {
2860 	int i;
2861 
2862 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
2863 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
2864 
2865 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2866 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2867 	} else {
2868 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2869 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2870 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2871 	}
2872 	IXGBE_WRITE_FLUSH(&sc->hw);
2873 
2874 	for (i = 0; i < sc->intr_cnt; ++i)
2875 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2876 }
2877 
2878 uint16_t
2879 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2880 {
2881 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2882 	    reg, 2);
2883 }
2884 
2885 void
2886 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2887 {
2888 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2889 	    reg, value, 2);
2890 }
2891 
2892 static void
2893 ix_slot_info(struct ix_softc *sc)
2894 {
2895 	struct ixgbe_hw *hw = &sc->hw;
2896 	device_t dev = sc->dev;
2897 	struct ixgbe_mac_info *mac = &hw->mac;
2898 	uint16_t link;
2899 	uint32_t offset;
2900 
2901 	/* For most devices simply call the shared code routine */
2902 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2903 		ixgbe_get_bus_info(hw);
2904 		goto display;
2905 	}
2906 
2907 	/*
2908 	 * For the Quad port adapter we need to parse back
2909 	 * up the PCI tree to find the speed of the expansion
2910 	 * slot into which this adapter is plugged. A bit more work.
2911 	 */
2912 	dev = device_get_parent(device_get_parent(dev));
2913 #ifdef IXGBE_DEBUG
2914 	device_printf(dev, "parent pcib = %x,%x,%x\n",
2915 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2916 #endif
2917 	dev = device_get_parent(device_get_parent(dev));
2918 #ifdef IXGBE_DEBUG
2919 	device_printf(dev, "slot pcib = %x,%x,%x\n",
2920 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2921 #endif
2922 	/* Now get the PCI Express Capabilities offset */
2923 	offset = pci_get_pciecap_ptr(dev);
2924 	/* ...and read the Link Status Register */
2925 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2926 	switch (link & IXGBE_PCI_LINK_WIDTH) {
2927 	case IXGBE_PCI_LINK_WIDTH_1:
2928 		hw->bus.width = ixgbe_bus_width_pcie_x1;
2929 		break;
2930 	case IXGBE_PCI_LINK_WIDTH_2:
2931 		hw->bus.width = ixgbe_bus_width_pcie_x2;
2932 		break;
2933 	case IXGBE_PCI_LINK_WIDTH_4:
2934 		hw->bus.width = ixgbe_bus_width_pcie_x4;
2935 		break;
2936 	case IXGBE_PCI_LINK_WIDTH_8:
2937 		hw->bus.width = ixgbe_bus_width_pcie_x8;
2938 		break;
2939 	default:
2940 		hw->bus.width = ixgbe_bus_width_unknown;
2941 		break;
2942 	}
2943 
2944 	switch (link & IXGBE_PCI_LINK_SPEED) {
2945 	case IXGBE_PCI_LINK_SPEED_2500:
2946 		hw->bus.speed = ixgbe_bus_speed_2500;
2947 		break;
2948 	case IXGBE_PCI_LINK_SPEED_5000:
2949 		hw->bus.speed = ixgbe_bus_speed_5000;
2950 		break;
2951 	case IXGBE_PCI_LINK_SPEED_8000:
2952 		hw->bus.speed = ixgbe_bus_speed_8000;
2953 		break;
2954 	default:
2955 		hw->bus.speed = ixgbe_bus_speed_unknown;
2956 		break;
2957 	}
2958 
2959 	mac->ops.set_lan_id(hw);
2960 
2961 display:
2962 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
2963 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
2964 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
2965 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
2966 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
2967 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
2968 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
2969 
2970 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
2971 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
2972 	    hw->bus.speed == ixgbe_bus_speed_2500) {
2973 		device_printf(dev, "For optimal performance a x8 "
2974 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
2975 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
2976 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
2977 	    hw->bus.speed < ixgbe_bus_speed_8000) {
2978 		device_printf(dev, "For optimal performance a x8 "
2979 		    "PCIE Gen3 slot is required.\n");
2980 	}
2981 }
2982 
2983 /*
2984  * TODO comment is incorrect
2985  *
2986  * Setup the correct IVAR register for a particular MSIX interrupt
2987  * - entry is the register array entry
2988  * - vector is the MSIX vector for this queue
2989  * - type is RX/TX/MISC
2990  */
2991 static void
2992 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
2993     int8_t type)
2994 {
2995 	struct ixgbe_hw *hw = &sc->hw;
2996 	uint32_t ivar, index;
2997 
2998 	vector |= IXGBE_IVAR_ALLOC_VAL;
2999 
3000 	switch (hw->mac.type) {
3001 	case ixgbe_mac_82598EB:
3002 		if (type == -1)
3003 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3004 		else
3005 			entry += (type * 64);
3006 		index = (entry >> 2) & 0x1F;
3007 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3008 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3009 		ivar |= (vector << (8 * (entry & 0x3)));
3010 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3011 		break;
3012 
3013 	case ixgbe_mac_82599EB:
3014 	case ixgbe_mac_X540:
3015 		if (type == -1) { /* MISC IVAR */
3016 			index = (entry & 1) * 8;
3017 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3018 			ivar &= ~(0xFF << index);
3019 			ivar |= (vector << index);
3020 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3021 		} else {	/* RX/TX IVARS */
3022 			index = (16 * (entry & 1)) + (8 * type);
3023 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3024 			ivar &= ~(0xFF << index);
3025 			ivar |= (vector << index);
3026 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3027 		}
3028 
3029 	default:
3030 		break;
3031 	}
3032 }
3033 
3034 static boolean_t
3035 ix_sfp_probe(struct ix_softc *sc)
3036 {
3037 	struct ixgbe_hw	*hw = &sc->hw;
3038 
3039 	if (hw->phy.type == ixgbe_phy_nl &&
3040 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3041 		int32_t ret;
3042 
3043 		ret = hw->phy.ops.identify_sfp(hw);
3044 		if (ret)
3045 			return FALSE;
3046 
3047 		ret = hw->phy.ops.reset(hw);
3048 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3049 			if_printf(&sc->arpcom.ac_if,
3050 			     "Unsupported SFP+ module detected!  "
3051 			     "Reload driver with supported module.\n");
3052 			sc->sfp_probe = FALSE;
3053 			return FALSE;
3054 		}
3055 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3056 
3057 		/* We now have supported optics */
3058 		sc->sfp_probe = FALSE;
3059 		/* Set the optics type so system reports correctly */
3060 		ix_setup_optics(sc);
3061 
3062 		return TRUE;
3063 	}
3064 	return FALSE;
3065 }
3066 
3067 static void
3068 ix_handle_link(struct ix_softc *sc)
3069 {
3070 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3071 	ix_update_link_status(sc);
3072 }
3073 
3074 /*
3075  * Handling SFP module
3076  */
3077 static void
3078 ix_handle_mod(struct ix_softc *sc)
3079 {
3080 	struct ixgbe_hw *hw = &sc->hw;
3081 	uint32_t err;
3082 
3083 	err = hw->phy.ops.identify_sfp(hw);
3084 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3085 		if_printf(&sc->arpcom.ac_if,
3086 		    "Unsupported SFP+ module type was detected.\n");
3087 		return;
3088 	}
3089 	err = hw->mac.ops.setup_sfp(hw);
3090 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3091 		if_printf(&sc->arpcom.ac_if,
3092 		    "Setup failure - unsupported SFP+ module type.\n");
3093 		return;
3094 	}
3095 	ix_handle_msf(sc);
3096 }
3097 
3098 /*
3099  * Handling MSF (multispeed fiber)
3100  */
3101 static void
3102 ix_handle_msf(struct ix_softc *sc)
3103 {
3104 	struct ixgbe_hw *hw = &sc->hw;
3105 	uint32_t autoneg;
3106 
3107 	autoneg = hw->phy.autoneg_advertised;
3108 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3109 		bool negotiate;
3110 
3111 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3112 	}
3113 	if (hw->mac.ops.setup_link != NULL)
3114 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3115 }
3116 
3117 static void
3118 ix_update_stats(struct ix_softc *sc)
3119 {
3120 	struct ifnet *ifp = &sc->arpcom.ac_if;
3121 	struct ixgbe_hw *hw = &sc->hw;
3122 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3123 	uint64_t total_missed_rx = 0;
3124 	int i;
3125 
3126 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3127 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3128 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3129 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3130 
3131 	/*
3132 	 * Note: These are for the 8 possible traffic classes, which
3133 	 * in current implementation is unused, therefore only 0 should
3134 	 * read real data.
3135 	 */
3136 	for (i = 0; i < 8; i++) {
3137 		uint32_t mp;
3138 
3139 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3140 		/* missed_rx tallies misses for the gprc workaround */
3141 		missed_rx += mp;
3142 		/* global total per queue */
3143 		sc->stats.mpc[i] += mp;
3144 
3145 		/* Running comprehensive total for stats display */
3146 		total_missed_rx += sc->stats.mpc[i];
3147 
3148 		if (hw->mac.type == ixgbe_mac_82598EB) {
3149 			sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3150 			sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3151 			sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3152 			sc->stats.pxonrxc[i] +=
3153 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3154 		} else {
3155 			sc->stats.pxonrxc[i] +=
3156 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3157 		}
3158 		sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3159 		sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3160 		sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3161 		sc->stats.pxon2offc[i] +=
3162 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3163 	}
3164 	for (i = 0; i < 16; i++) {
3165 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3166 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3167 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3168 	}
3169 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3170 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3171 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3172 
3173 	/* Hardware workaround, gprc counts missed packets */
3174 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3175 	sc->stats.gprc -= missed_rx;
3176 
3177 	if (hw->mac.type != ixgbe_mac_82598EB) {
3178 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3179 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3180 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3181 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3182 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3183 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3184 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3185 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3186 	} else {
3187 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3188 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3189 		/* 82598 only has a counter in the high register */
3190 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3191 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3192 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3193 	}
3194 
3195 	/*
3196 	 * Workaround: mprc hardware is incorrectly counting
3197 	 * broadcasts, so for now we subtract those.
3198 	 */
3199 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3200 	sc->stats.bprc += bprc;
3201 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3202 	if (hw->mac.type == ixgbe_mac_82598EB)
3203 		sc->stats.mprc -= bprc;
3204 
3205 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3206 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3207 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3208 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3209 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3210 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3211 
3212 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3213 	sc->stats.lxontxc += lxon;
3214 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3215 	sc->stats.lxofftxc += lxoff;
3216 	total = lxon + lxoff;
3217 
3218 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3219 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3220 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3221 	sc->stats.gptc -= total;
3222 	sc->stats.mptc -= total;
3223 	sc->stats.ptc64 -= total;
3224 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3225 
3226 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3227 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3228 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3229 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3230 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3231 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3232 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3233 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3234 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3235 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3236 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3237 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3238 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3239 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3240 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3241 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3242 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3243 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3244 	/* Only read FCOE on 82599 */
3245 	if (hw->mac.type != ixgbe_mac_82598EB) {
3246 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3247 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3248 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3249 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3250 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3251 	}
3252 
3253 	/* Rx Errors */
3254 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3255 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3256 }
3257 
3258 #if 0
3259 /*
3260  * Add sysctl variables, one per statistic, to the system.
3261  */
3262 static void
3263 ix_add_hw_stats(struct ix_softc *sc)
3264 {
3265 
3266 	device_t dev = sc->dev;
3267 
3268 	struct ix_tx_ring *txr = sc->tx_rings;
3269 	struct ix_rx_ring *rxr = sc->rx_rings;
3270 
3271 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3272 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3273 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3274 	struct ixgbe_hw_stats *stats = &sc->stats;
3275 
3276 	struct sysctl_oid *stat_node, *queue_node;
3277 	struct sysctl_oid_list *stat_list, *queue_list;
3278 
3279 #define QUEUE_NAME_LEN 32
3280 	char namebuf[QUEUE_NAME_LEN];
3281 
3282 	/* MAC stats get the own sub node */
3283 
3284 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3285 				    CTLFLAG_RD, NULL, "MAC Statistics");
3286 	stat_list = SYSCTL_CHILDREN(stat_node);
3287 
3288 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3289 			CTLFLAG_RD, &stats->crcerrs,
3290 			"CRC Errors");
3291 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3292 			CTLFLAG_RD, &stats->illerrc,
3293 			"Illegal Byte Errors");
3294 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3295 			CTLFLAG_RD, &stats->errbc,
3296 			"Byte Errors");
3297 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3298 			CTLFLAG_RD, &stats->mspdc,
3299 			"MAC Short Packets Discarded");
3300 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3301 			CTLFLAG_RD, &stats->mlfc,
3302 			"MAC Local Faults");
3303 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3304 			CTLFLAG_RD, &stats->mrfc,
3305 			"MAC Remote Faults");
3306 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3307 			CTLFLAG_RD, &stats->rlec,
3308 			"Receive Length Errors");
3309 
3310 	/* Flow Control stats */
3311 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3312 			CTLFLAG_RD, &stats->lxontxc,
3313 			"Link XON Transmitted");
3314 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3315 			CTLFLAG_RD, &stats->lxonrxc,
3316 			"Link XON Received");
3317 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3318 			CTLFLAG_RD, &stats->lxofftxc,
3319 			"Link XOFF Transmitted");
3320 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3321 			CTLFLAG_RD, &stats->lxoffrxc,
3322 			"Link XOFF Received");
3323 
3324 	/* Packet Reception Stats */
3325 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3326 			CTLFLAG_RD, &stats->tor,
3327 			"Total Octets Received");
3328 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3329 			CTLFLAG_RD, &stats->gorc,
3330 			"Good Octets Received");
3331 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3332 			CTLFLAG_RD, &stats->tpr,
3333 			"Total Packets Received");
3334 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3335 			CTLFLAG_RD, &stats->gprc,
3336 			"Good Packets Received");
3337 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3338 			CTLFLAG_RD, &stats->mprc,
3339 			"Multicast Packets Received");
3340 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3341 			CTLFLAG_RD, &stats->bprc,
3342 			"Broadcast Packets Received");
3343 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3344 			CTLFLAG_RD, &stats->prc64,
3345 			"64 byte frames received ");
3346 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3347 			CTLFLAG_RD, &stats->prc127,
3348 			"65-127 byte frames received");
3349 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3350 			CTLFLAG_RD, &stats->prc255,
3351 			"128-255 byte frames received");
3352 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3353 			CTLFLAG_RD, &stats->prc511,
3354 			"256-511 byte frames received");
3355 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3356 			CTLFLAG_RD, &stats->prc1023,
3357 			"512-1023 byte frames received");
3358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3359 			CTLFLAG_RD, &stats->prc1522,
3360 			"1023-1522 byte frames received");
3361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3362 			CTLFLAG_RD, &stats->ruc,
3363 			"Receive Undersized");
3364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3365 			CTLFLAG_RD, &stats->rfc,
3366 			"Fragmented Packets Received ");
3367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3368 			CTLFLAG_RD, &stats->roc,
3369 			"Oversized Packets Received");
3370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3371 			CTLFLAG_RD, &stats->rjc,
3372 			"Received Jabber");
3373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3374 			CTLFLAG_RD, &stats->mngprc,
3375 			"Management Packets Received");
3376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3377 			CTLFLAG_RD, &stats->mngptc,
3378 			"Management Packets Dropped");
3379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3380 			CTLFLAG_RD, &stats->xec,
3381 			"Checksum Errors");
3382 
3383 	/* Packet Transmission Stats */
3384 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3385 			CTLFLAG_RD, &stats->gotc,
3386 			"Good Octets Transmitted");
3387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3388 			CTLFLAG_RD, &stats->tpt,
3389 			"Total Packets Transmitted");
3390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3391 			CTLFLAG_RD, &stats->gptc,
3392 			"Good Packets Transmitted");
3393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3394 			CTLFLAG_RD, &stats->bptc,
3395 			"Broadcast Packets Transmitted");
3396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3397 			CTLFLAG_RD, &stats->mptc,
3398 			"Multicast Packets Transmitted");
3399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3400 			CTLFLAG_RD, &stats->mngptc,
3401 			"Management Packets Transmitted");
3402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3403 			CTLFLAG_RD, &stats->ptc64,
3404 			"64 byte frames transmitted ");
3405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3406 			CTLFLAG_RD, &stats->ptc127,
3407 			"65-127 byte frames transmitted");
3408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3409 			CTLFLAG_RD, &stats->ptc255,
3410 			"128-255 byte frames transmitted");
3411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3412 			CTLFLAG_RD, &stats->ptc511,
3413 			"256-511 byte frames transmitted");
3414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3415 			CTLFLAG_RD, &stats->ptc1023,
3416 			"512-1023 byte frames transmitted");
3417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3418 			CTLFLAG_RD, &stats->ptc1522,
3419 			"1024-1522 byte frames transmitted");
3420 }
3421 #endif
3422 
3423 /*
3424  * Enable the hardware to drop packets when the buffer is full.
3425  * This is useful when multiple RX rings are used, so that no
3426  * single RX ring being full stalls the entire RX engine.  We
3427  * only enable this when multiple RX rings are used and when
3428  * flow control is disabled.
3429  */
3430 static void
3431 ix_enable_rx_drop(struct ix_softc *sc)
3432 {
3433 	struct ixgbe_hw *hw = &sc->hw;
3434 	int i;
3435 
3436 	if (bootverbose) {
3437 		if_printf(&sc->arpcom.ac_if,
3438 		    "flow control %d, enable RX drop\n", sc->fc);
3439 	}
3440 
3441 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3442 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3443 
3444 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3445 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3446 	}
3447 }
3448 
3449 static void
3450 ix_disable_rx_drop(struct ix_softc *sc)
3451 {
3452 	struct ixgbe_hw *hw = &sc->hw;
3453 	int i;
3454 
3455 	if (bootverbose) {
3456 		if_printf(&sc->arpcom.ac_if,
3457 		    "flow control %d, disable RX drop\n", sc->fc);
3458 	}
3459 
3460 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3461 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3462 
3463 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3464 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3465 	}
3466 }
3467 
3468 static int
3469 ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS)
3470 {
3471 	struct ix_softc *sc = (struct ix_softc *)arg1;
3472 	struct ifnet *ifp = &sc->arpcom.ac_if;
3473 	int error, fc;
3474 
3475 	fc = sc->fc;
3476 	error = sysctl_handle_int(oidp, &fc, 0, req);
3477 	if (error || req->newptr == NULL)
3478 		return error;
3479 
3480 	switch (fc) {
3481 	case ixgbe_fc_rx_pause:
3482 	case ixgbe_fc_tx_pause:
3483 	case ixgbe_fc_full:
3484 	case ixgbe_fc_none:
3485 		break;
3486 	default:
3487 		return EINVAL;
3488 	}
3489 
3490 	ifnet_serialize_all(ifp);
3491 
3492 	/* Don't bother if it's not changed */
3493 	if (sc->fc == fc)
3494 		goto done;
3495 	sc->fc = fc;
3496 
3497 	/* Don't do anything, if the interface is not up yet */
3498 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3499 		goto done;
3500 
3501 	if (sc->rx_ring_inuse > 1) {
3502 		switch (sc->fc) {
3503 		case ixgbe_fc_rx_pause:
3504 		case ixgbe_fc_tx_pause:
3505 		case ixgbe_fc_full:
3506 			ix_disable_rx_drop(sc);
3507 			break;
3508 
3509 		case ixgbe_fc_none:
3510 			ix_enable_rx_drop(sc);
3511 			break;
3512 
3513 		default:
3514 			panic("leading fc check mismatch");
3515 		}
3516 	}
3517 
3518 	sc->hw.fc.requested_mode = sc->fc;
3519 	/* Don't autoneg if forcing a value */
3520 	sc->hw.fc.disable_fc_autoneg = TRUE;
3521 	ixgbe_fc_enable(&sc->hw);
3522 
3523 done:
3524 	ifnet_deserialize_all(ifp);
3525 	return error;
3526 }
3527 
3528 #ifdef foo
3529 /* XXX not working properly w/ 82599 connected w/ DAC */
3530 /* XXX only work after the interface is up */
3531 static int
3532 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3533 {
3534 	struct ix_softc *sc = (struct ix_softc *)arg1;
3535 	struct ifnet *ifp = &sc->arpcom.ac_if;
3536 	struct ixgbe_hw *hw = &sc->hw;
3537 	ixgbe_link_speed speed;
3538 	int error, advspeed;
3539 
3540 	advspeed = sc->advspeed;
3541 	error = sysctl_handle_int(oidp, &advspeed, 0, req);
3542 	if (error || req->newptr == NULL)
3543 		return error;
3544 
3545 	if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3546 	    hw->phy.multispeed_fiber))
3547 		return EOPNOTSUPP;
3548 	if (hw->mac.ops.setup_link == NULL)
3549 		return EOPNOTSUPP;
3550 
3551 	switch (advspeed) {
3552 	case 0:	/* auto */
3553 		speed = IXGBE_LINK_SPEED_UNKNOWN;
3554 		break;
3555 
3556 	case 1:	/* 1Gb */
3557 		speed = IXGBE_LINK_SPEED_1GB_FULL;
3558 		break;
3559 
3560 	case 2:	/* 100Mb */
3561 		speed = IXGBE_LINK_SPEED_100_FULL;
3562 		break;
3563 
3564 	case 3:	/* 1Gb/10Gb */
3565 		speed = IXGBE_LINK_SPEED_1GB_FULL |
3566 		    IXGBE_LINK_SPEED_10GB_FULL;
3567 		break;
3568 
3569 	default:
3570 		return EINVAL;
3571 	}
3572 
3573 	ifnet_serialize_all(ifp);
3574 
3575 	if (sc->advspeed == advspeed) /* no change */
3576 		goto done;
3577 
3578 	if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3579 	    hw->mac.type != ixgbe_mac_X540) {
3580 		error = EOPNOTSUPP;
3581 		goto done;
3582 	}
3583 
3584 	sc->advspeed = advspeed;
3585 
3586 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3587 		goto done;
3588 
3589 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3590 		ix_config_link(sc);
3591 	} else {
3592 		hw->mac.autotry_restart = TRUE;
3593 		hw->mac.ops.setup_link(hw, speed, sc->link_up);
3594 	}
3595 
3596 done:
3597 	ifnet_deserialize_all(ifp);
3598 	return error;
3599 }
3600 #endif
3601 
3602 static void
3603 ix_setup_serialize(struct ix_softc *sc)
3604 {
3605 	int i = 0, j;
3606 
3607 	/* Main + RX + TX */
3608 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3609 	sc->serializes =
3610 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3611 	        M_DEVBUF, M_WAITOK | M_ZERO);
3612 
3613 	/*
3614 	 * Setup serializes
3615 	 *
3616 	 * NOTE: Order is critical
3617 	 */
3618 
3619 	KKASSERT(i < sc->nserialize);
3620 	sc->serializes[i++] = &sc->main_serialize;
3621 
3622 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3623 		KKASSERT(i < sc->nserialize);
3624 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3625 	}
3626 
3627 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3628 		KKASSERT(i < sc->nserialize);
3629 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3630 	}
3631 
3632 	KKASSERT(i == sc->nserialize);
3633 }
3634 
3635 static int
3636 ix_alloc_intr(struct ix_softc *sc)
3637 {
3638 	struct ix_intr_data *intr;
3639 	u_int intr_flags;
3640 
3641 	ix_alloc_msix(sc);
3642 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3643 		ix_set_ring_inuse(sc, FALSE);
3644 		return 0;
3645 	}
3646 
3647 	if (sc->intr_data != NULL)
3648 		kfree(sc->intr_data, M_DEVBUF);
3649 
3650 	sc->intr_cnt = 1;
3651 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3652 	    M_WAITOK | M_ZERO);
3653 	intr = &sc->intr_data[0];
3654 
3655 	/*
3656 	 * Allocate MSI/legacy interrupt resource
3657 	 */
3658 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3659 	    &intr->intr_rid, &intr_flags);
3660 
3661 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3662 	    &intr->intr_rid, intr_flags);
3663 	if (intr->intr_res == NULL) {
3664 		device_printf(sc->dev, "Unable to allocate bus resource: "
3665 		    "interrupt\n");
3666 		return ENXIO;
3667 	}
3668 
3669 	intr->intr_serialize = &sc->main_serialize;
3670 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3671 	intr->intr_func = ix_intr;
3672 	intr->intr_funcarg = sc;
3673 	intr->intr_rate = IX_INTR_RATE;
3674 	intr->intr_use = IX_INTR_USE_RXTX;
3675 
3676 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3677 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3678 
3679 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3680 
3681 	ix_set_ring_inuse(sc, FALSE);
3682 
3683 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3684 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3685 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3686 
3687 	return 0;
3688 }
3689 
3690 static void
3691 ix_free_intr(struct ix_softc *sc)
3692 {
3693 	if (sc->intr_data == NULL)
3694 		return;
3695 
3696 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3697 		struct ix_intr_data *intr = &sc->intr_data[0];
3698 
3699 		KKASSERT(sc->intr_cnt == 1);
3700 		if (intr->intr_res != NULL) {
3701 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3702 			    intr->intr_rid, intr->intr_res);
3703 		}
3704 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3705 			pci_release_msi(sc->dev);
3706 
3707 		kfree(sc->intr_data, M_DEVBUF);
3708 	} else {
3709 		ix_free_msix(sc, TRUE);
3710 	}
3711 }
3712 
3713 static void
3714 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3715 {
3716 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3717 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3718 	if (bootverbose) {
3719 		if_printf(&sc->arpcom.ac_if,
3720 		    "RX rings %d/%d, TX rings %d/%d\n",
3721 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3722 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3723 	}
3724 }
3725 
3726 static int
3727 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3728 {
3729 	if (!IX_ENABLE_HWRSS(sc))
3730 		return 1;
3731 
3732 	if (polling)
3733 		return sc->rx_ring_cnt;
3734 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3735 		return IX_MIN_RXRING_RSS;
3736 	else
3737 		return sc->rx_ring_msix;
3738 }
3739 
3740 static int
3741 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3742 {
3743 	if (!IX_ENABLE_HWTSS(sc))
3744 		return 1;
3745 
3746 	if (polling)
3747 		return sc->tx_ring_cnt;
3748 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3749 		return 1;
3750 	else
3751 		return sc->tx_ring_msix;
3752 }
3753 
3754 static int
3755 ix_setup_intr(struct ix_softc *sc)
3756 {
3757 	int i;
3758 
3759 	for (i = 0; i < sc->intr_cnt; ++i) {
3760 		struct ix_intr_data *intr = &sc->intr_data[i];
3761 		int error;
3762 
3763 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3764 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3765 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3766 		if (error) {
3767 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3768 			ix_teardown_intr(sc, i);
3769 			return error;
3770 		}
3771 	}
3772 	return 0;
3773 }
3774 
3775 static void
3776 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3777 {
3778 	int i;
3779 
3780 	if (sc->intr_data == NULL)
3781 		return;
3782 
3783 	for (i = 0; i < intr_cnt; ++i) {
3784 		struct ix_intr_data *intr = &sc->intr_data[i];
3785 
3786 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3787 	}
3788 }
3789 
3790 static void
3791 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3792 {
3793 	struct ix_softc *sc = ifp->if_softc;
3794 
3795 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3796 }
3797 
3798 static void
3799 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3800 {
3801 	struct ix_softc *sc = ifp->if_softc;
3802 
3803 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3804 }
3805 
3806 static int
3807 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3808 {
3809 	struct ix_softc *sc = ifp->if_softc;
3810 
3811 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3812 }
3813 
3814 #ifdef INVARIANTS
3815 
3816 static void
3817 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3818     boolean_t serialized)
3819 {
3820 	struct ix_softc *sc = ifp->if_softc;
3821 
3822 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3823 	    serialized);
3824 }
3825 
3826 #endif	/* INVARIANTS */
3827 
3828 static void
3829 ix_free_rings(struct ix_softc *sc)
3830 {
3831 	int i;
3832 
3833 	if (sc->tx_rings != NULL) {
3834 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3835 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3836 
3837 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3838 		}
3839 		kfree(sc->tx_rings, M_DEVBUF);
3840 	}
3841 
3842 	if (sc->rx_rings != NULL) {
3843 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3844 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3845 
3846 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3847 		}
3848 		kfree(sc->rx_rings, M_DEVBUF);
3849 	}
3850 
3851 	if (sc->parent_tag != NULL)
3852 		bus_dma_tag_destroy(sc->parent_tag);
3853 }
3854 
3855 static void
3856 ix_watchdog(struct ifaltq_subque *ifsq)
3857 {
3858 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3859 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3860 	struct ix_softc *sc = ifp->if_softc;
3861 	int i;
3862 
3863 	KKASSERT(txr->tx_ifsq == ifsq);
3864 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3865 
3866 	/*
3867 	 * If the interface has been paused then don't do the watchdog check
3868 	 */
3869 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3870 		txr->tx_watchdog.wd_timer = 5;
3871 		return;
3872 	}
3873 
3874 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3875 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3876 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3877 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3878 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3879 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3880 
3881 	ix_init(sc);
3882 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3883 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3884 }
3885 
3886 static void
3887 ix_free_tx_ring(struct ix_tx_ring *txr)
3888 {
3889 	int i;
3890 
3891 	for (i = 0; i < txr->tx_ndesc; ++i) {
3892 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3893 
3894 		if (txbuf->m_head != NULL) {
3895 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3896 			m_freem(txbuf->m_head);
3897 			txbuf->m_head = NULL;
3898 		}
3899 	}
3900 }
3901 
3902 static void
3903 ix_free_rx_ring(struct ix_rx_ring *rxr)
3904 {
3905 	int i;
3906 
3907 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3908 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3909 
3910 		if (rxbuf->fmp != NULL) {
3911 			m_freem(rxbuf->fmp);
3912 			rxbuf->fmp = NULL;
3913 			rxbuf->lmp = NULL;
3914 		} else {
3915 			KKASSERT(rxbuf->lmp == NULL);
3916 		}
3917 		if (rxbuf->m_head != NULL) {
3918 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3919 			m_freem(rxbuf->m_head);
3920 			rxbuf->m_head = NULL;
3921 		}
3922 	}
3923 }
3924 
3925 static int
3926 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3927 {
3928 	struct mbuf *m;
3929 	bus_dma_segment_t seg;
3930 	bus_dmamap_t map;
3931 	struct ix_rx_buf *rxbuf;
3932 	int flags, error, nseg;
3933 
3934 	flags = MB_DONTWAIT;
3935 	if (__predict_false(wait))
3936 		flags = MB_WAIT;
3937 
3938 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3939 	if (m == NULL) {
3940 		if (wait) {
3941 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3942 			    "Unable to allocate RX mbuf\n");
3943 		}
3944 		return ENOBUFS;
3945 	}
3946 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3947 
3948 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3949 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3950 	if (error) {
3951 		m_freem(m);
3952 		if (wait) {
3953 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3954 			    "Unable to load RX mbuf\n");
3955 		}
3956 		return error;
3957 	}
3958 
3959 	rxbuf = &rxr->rx_buf[i];
3960 	if (rxbuf->m_head != NULL)
3961 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3962 
3963 	map = rxbuf->map;
3964 	rxbuf->map = rxr->rx_sparemap;
3965 	rxr->rx_sparemap = map;
3966 
3967 	rxbuf->m_head = m;
3968 	rxbuf->paddr = seg.ds_addr;
3969 
3970 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3971 	return 0;
3972 }
3973 
3974 static void
3975 ix_add_sysctl(struct ix_softc *sc)
3976 {
3977 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3978 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
3979 #ifdef IX_RSS_DEBUG
3980 	char node[32];
3981 	int i;
3982 #endif
3983 
3984 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3985 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
3986 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3987 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
3988 	    "# of RX rings used");
3989 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3990 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
3991 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
3992 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
3993 	    "# of TX rings used");
3994 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
3995 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
3996 	    sc, 0, ix_sysctl_rxd, "I",
3997 	    "# of RX descs");
3998 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
3999 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4000 	    sc, 0, ix_sysctl_txd, "I",
4001 	    "# of TX descs");
4002 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4003 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4004 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4005 	    "# of segments sent before write to hardware register");
4006 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4007 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4008 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4009 	    "# of received segments sent before write to hardware register");
4010 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4011 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4012 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4013 	    "# of segments per TX interrupt");
4014 
4015 #ifdef IFPOLL_ENABLE
4016 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4017 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4018 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4019 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4020 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4021 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4022 #endif
4023 
4024 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4025 do { \
4026 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4027 	    ix_sysctl_##name, #use " interrupt rate"); \
4028 } while (0)
4029 
4030 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4031 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4032 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4033 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4034 
4035 #undef IX_ADD_INTR_RATE_SYSCTL
4036 
4037 #ifdef IX_RSS_DEBUG
4038 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4039 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4040 	    "RSS debug level");
4041 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4042 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4043 		SYSCTL_ADD_ULONG(ctx,
4044 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4045 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4046 	}
4047 #endif
4048 
4049 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4050 	    OID_AUTO, "flowctrl", CTLTYPE_INT | CTLFLAG_RW,
4051 	    sc, 0, ix_sysctl_flowctrl, "I",
4052 	    "flow control, 0 - off, 1 - rx pause, 2 - tx pause, 3 - full");
4053 
4054 #ifdef foo
4055 	/*
4056 	 * Allow a kind of speed control by forcing the autoneg
4057 	 * advertised speed list to only a certain value, this
4058 	 * supports 1G on 82599 devices, and 100Mb on X540.
4059 	 */
4060 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4061 	    OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
4062 	    sc, 0, ix_sysctl_advspeed, "I",
4063 	    "advertised link speed, "
4064 	    "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
4065 #endif
4066 
4067 #if 0
4068 	ix_add_hw_stats(sc);
4069 #endif
4070 
4071 }
4072 
4073 static int
4074 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4075 {
4076 	struct ix_softc *sc = (void *)arg1;
4077 	struct ifnet *ifp = &sc->arpcom.ac_if;
4078 	int error, nsegs, i;
4079 
4080 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4081 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4082 	if (error || req->newptr == NULL)
4083 		return error;
4084 	if (nsegs < 0)
4085 		return EINVAL;
4086 
4087 	ifnet_serialize_all(ifp);
4088 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4089 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4090 	ifnet_deserialize_all(ifp);
4091 
4092 	return 0;
4093 }
4094 
4095 static int
4096 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4097 {
4098 	struct ix_softc *sc = (void *)arg1;
4099 	struct ifnet *ifp = &sc->arpcom.ac_if;
4100 	int error, nsegs, i;
4101 
4102 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4103 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4104 	if (error || req->newptr == NULL)
4105 		return error;
4106 	if (nsegs < 0)
4107 		return EINVAL;
4108 
4109 	ifnet_serialize_all(ifp);
4110 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4111 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4112 	ifnet_deserialize_all(ifp);
4113 
4114 	return 0;
4115 }
4116 
4117 static int
4118 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4119 {
4120 	struct ix_softc *sc = (void *)arg1;
4121 	int txd;
4122 
4123 	txd = sc->tx_rings[0].tx_ndesc;
4124 	return sysctl_handle_int(oidp, &txd, 0, req);
4125 }
4126 
4127 static int
4128 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4129 {
4130 	struct ix_softc *sc = (void *)arg1;
4131 	int rxd;
4132 
4133 	rxd = sc->rx_rings[0].rx_ndesc;
4134 	return sysctl_handle_int(oidp, &rxd, 0, req);
4135 }
4136 
4137 static int
4138 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4139 {
4140 	struct ix_softc *sc = (void *)arg1;
4141 	struct ifnet *ifp = &sc->arpcom.ac_if;
4142 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4143 	int error, nsegs;
4144 
4145 	nsegs = txr->tx_intr_nsegs;
4146 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4147 	if (error || req->newptr == NULL)
4148 		return error;
4149 	if (nsegs < 0)
4150 		return EINVAL;
4151 
4152 	ifnet_serialize_all(ifp);
4153 
4154 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4155 		error = EINVAL;
4156 	} else {
4157 		int i;
4158 
4159 		error = 0;
4160 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4161 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4162 	}
4163 
4164 	ifnet_deserialize_all(ifp);
4165 
4166 	return error;
4167 }
4168 
4169 static void
4170 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4171 {
4172 	uint32_t eitr, eitr_intvl;
4173 
4174 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4175 	eitr_intvl = 1000000000 / 256 / rate;
4176 
4177 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4178 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4179 		if (eitr_intvl == 0)
4180 			eitr_intvl = 1;
4181 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4182 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4183 	} else {
4184 		eitr &= ~IX_EITR_INTVL_MASK;
4185 
4186 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4187 		if (eitr_intvl == 0)
4188 			eitr_intvl = IX_EITR_INTVL_MIN;
4189 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4190 			eitr_intvl = IX_EITR_INTVL_MAX;
4191 	}
4192 	eitr |= eitr_intvl;
4193 
4194 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4195 }
4196 
4197 static int
4198 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4199 {
4200 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4201 }
4202 
4203 static int
4204 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4205 {
4206 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4207 }
4208 
4209 static int
4210 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4211 {
4212 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4213 }
4214 
4215 static int
4216 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4217 {
4218 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4219 }
4220 
4221 static int
4222 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4223 {
4224 	struct ix_softc *sc = (void *)arg1;
4225 	struct ifnet *ifp = &sc->arpcom.ac_if;
4226 	int error, rate, i;
4227 
4228 	rate = 0;
4229 	for (i = 0; i < sc->intr_cnt; ++i) {
4230 		if (sc->intr_data[i].intr_use == use) {
4231 			rate = sc->intr_data[i].intr_rate;
4232 			break;
4233 		}
4234 	}
4235 
4236 	error = sysctl_handle_int(oidp, &rate, 0, req);
4237 	if (error || req->newptr == NULL)
4238 		return error;
4239 	if (rate <= 0)
4240 		return EINVAL;
4241 
4242 	ifnet_serialize_all(ifp);
4243 
4244 	for (i = 0; i < sc->intr_cnt; ++i) {
4245 		if (sc->intr_data[i].intr_use == use) {
4246 			sc->intr_data[i].intr_rate = rate;
4247 			if (ifp->if_flags & IFF_RUNNING)
4248 				ix_set_eitr(sc, i, rate);
4249 		}
4250 	}
4251 
4252 	ifnet_deserialize_all(ifp);
4253 
4254 	return error;
4255 }
4256 
4257 static void
4258 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4259     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4260 {
4261 	int i;
4262 
4263 	for (i = 0; i < sc->intr_cnt; ++i) {
4264 		if (sc->intr_data[i].intr_use == use) {
4265 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4266 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4267 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4268 			    sc, 0, handler, "I", desc);
4269 			break;
4270 		}
4271 	}
4272 }
4273 
4274 static void
4275 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4276 {
4277 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4278 		sc->timer_cpuid = 0; /* XXX fixed */
4279 	else
4280 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4281 }
4282 
4283 static void
4284 ix_alloc_msix(struct ix_softc *sc)
4285 {
4286 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4287 	struct ix_intr_data *intr;
4288 	int i, x, error;
4289 	int offset, offset_def, agg_rxtx, ring_max;
4290 	boolean_t aggregate, setup = FALSE;
4291 
4292 	msix_enable = ix_msix_enable;
4293 	/*
4294 	 * Don't enable MSI-X on 82598 by default, see:
4295 	 * 82598 specification update errata #38
4296 	 */
4297 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4298 		msix_enable = 0;
4299 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4300 	if (!msix_enable)
4301 		return;
4302 
4303 	msix_cnt = pci_msix_count(sc->dev);
4304 #ifdef IX_MSIX_DEBUG
4305 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4306 #endif
4307 	if (msix_cnt <= 1) {
4308 		/* One MSI-X model does not make sense */
4309 		return;
4310 	}
4311 
4312 	i = 0;
4313 	while ((1 << (i + 1)) <= msix_cnt)
4314 		++i;
4315 	msix_cnt2 = 1 << i;
4316 
4317 	if (bootverbose) {
4318 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4319 		    msix_cnt2, msix_cnt);
4320 	}
4321 
4322 	KKASSERT(msix_cnt >= msix_cnt2);
4323 	if (msix_cnt == msix_cnt2) {
4324 		/* We need at least one MSI-X for link status */
4325 		msix_cnt2 >>= 1;
4326 		if (msix_cnt2 <= 1) {
4327 			/* One MSI-X for RX/TX does not make sense */
4328 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4329 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4330 			return;
4331 		}
4332 		KKASSERT(msix_cnt > msix_cnt2);
4333 
4334 		if (bootverbose) {
4335 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4336 			    msix_cnt2, msix_cnt);
4337 		}
4338 	}
4339 
4340 	/*
4341 	 * Make sure that we don't break interrupt related registers
4342 	 * (EIMS, etc) limitation.
4343 	 *
4344 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4345 	 */
4346 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4347 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4348 			msix_cnt2 = IX_MAX_MSIX_82598;
4349 	} else {
4350 		if (msix_cnt2 > IX_MAX_MSIX)
4351 			msix_cnt2 = IX_MAX_MSIX;
4352 	}
4353 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4354 
4355 	if (bootverbose) {
4356 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4357 		    msix_cnt2, msix_cnt);
4358 	}
4359 
4360 	sc->rx_ring_msix = sc->rx_ring_cnt;
4361 	if (sc->rx_ring_msix > msix_cnt2)
4362 		sc->rx_ring_msix = msix_cnt2;
4363 
4364 	sc->tx_ring_msix = sc->tx_ring_cnt;
4365 	if (sc->tx_ring_msix > msix_cnt2)
4366 		sc->tx_ring_msix = msix_cnt2;
4367 
4368 	ring_max = sc->rx_ring_msix;
4369 	if (ring_max < sc->tx_ring_msix)
4370 		ring_max = sc->tx_ring_msix;
4371 
4372 	/* Allow user to force independent RX/TX MSI-X handling */
4373 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4374 	    ix_msix_agg_rxtx);
4375 
4376 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4377 		/*
4378 		 * Independent TX/RX MSI-X
4379 		 */
4380 		aggregate = FALSE;
4381 		if (bootverbose)
4382 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4383 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4384 	} else {
4385 		/*
4386 		 * Aggregate TX/RX MSI-X
4387 		 */
4388 		aggregate = TRUE;
4389 		if (bootverbose)
4390 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4391 		alloc_cnt = msix_cnt2;
4392 		if (alloc_cnt > ring_max)
4393 			alloc_cnt = ring_max;
4394 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4395 		    alloc_cnt >= sc->tx_ring_msix);
4396 	}
4397 	++alloc_cnt;	/* For status */
4398 
4399 	if (bootverbose) {
4400 		device_printf(sc->dev, "MSI-X alloc %d, "
4401 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4402 		    sc->rx_ring_msix, sc->tx_ring_msix);
4403 	}
4404 
4405 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4406 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4407 	    &sc->msix_mem_rid, RF_ACTIVE);
4408 	if (sc->msix_mem_res == NULL) {
4409 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4410 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4411 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4412 		if (sc->msix_mem_res == NULL) {
4413 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4414 			return;
4415 		}
4416 	}
4417 
4418 	sc->intr_cnt = alloc_cnt;
4419 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4420 	    M_DEVBUF, M_WAITOK | M_ZERO);
4421 	for (x = 0; x < sc->intr_cnt; ++x) {
4422 		intr = &sc->intr_data[x];
4423 		intr->intr_rid = -1;
4424 		intr->intr_rate = IX_INTR_RATE;
4425 	}
4426 
4427 	x = 0;
4428 	if (!aggregate) {
4429 		/*
4430 		 * RX rings
4431 		 */
4432 		if (sc->rx_ring_msix == ncpus2) {
4433 			offset = 0;
4434 		} else {
4435 			offset_def = (sc->rx_ring_msix *
4436 			    device_get_unit(sc->dev)) % ncpus2;
4437 
4438 			offset = device_getenv_int(sc->dev,
4439 			    "msix.rxoff", offset_def);
4440 			if (offset >= ncpus2 ||
4441 			    offset % sc->rx_ring_msix != 0) {
4442 				device_printf(sc->dev,
4443 				    "invalid msix.rxoff %d, use %d\n",
4444 				    offset, offset_def);
4445 				offset = offset_def;
4446 			}
4447 		}
4448 		ix_conf_rx_msix(sc, 0, &x, offset);
4449 
4450 		/*
4451 		 * TX rings
4452 		 */
4453 		if (sc->tx_ring_msix == ncpus2) {
4454 			offset = 0;
4455 		} else {
4456 			offset_def = (sc->tx_ring_msix *
4457 			    device_get_unit(sc->dev)) % ncpus2;
4458 
4459 			offset = device_getenv_int(sc->dev,
4460 			    "msix.txoff", offset_def);
4461 			if (offset >= ncpus2 ||
4462 			    offset % sc->tx_ring_msix != 0) {
4463 				device_printf(sc->dev,
4464 				    "invalid msix.txoff %d, use %d\n",
4465 				    offset, offset_def);
4466 				offset = offset_def;
4467 			}
4468 		}
4469 		ix_conf_tx_msix(sc, 0, &x, offset);
4470 	} else {
4471 		int ring_agg;
4472 
4473 		ring_agg = sc->rx_ring_msix;
4474 		if (ring_agg > sc->tx_ring_msix)
4475 			ring_agg = sc->tx_ring_msix;
4476 
4477 		if (ring_max == ncpus2) {
4478 			offset = 0;
4479 		} else {
4480 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4481 			    ncpus2;
4482 
4483 			offset = device_getenv_int(sc->dev, "msix.off",
4484 			    offset_def);
4485 			if (offset >= ncpus2 || offset % ring_max != 0) {
4486 				device_printf(sc->dev,
4487 				    "invalid msix.off %d, use %d\n",
4488 				    offset, offset_def);
4489 				offset = offset_def;
4490 			}
4491 		}
4492 
4493 		for (i = 0; i < ring_agg; ++i) {
4494 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4495 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4496 
4497 			KKASSERT(x < sc->intr_cnt);
4498 			rxr->rx_intr_vec = x;
4499 			ix_setup_msix_eims(sc, x,
4500 			    &rxr->rx_eims, &rxr->rx_eims_val);
4501 			rxr->rx_txr = txr;
4502 			/* NOTE: Leave TX ring's intr_vec negative */
4503 
4504 			intr = &sc->intr_data[x++];
4505 
4506 			intr->intr_serialize = &rxr->rx_serialize;
4507 			intr->intr_func = ix_msix_rxtx;
4508 			intr->intr_funcarg = rxr;
4509 			intr->intr_use = IX_INTR_USE_RXTX;
4510 
4511 			intr->intr_cpuid = i + offset;
4512 			KKASSERT(intr->intr_cpuid < ncpus2);
4513 			txr->tx_intr_cpuid = intr->intr_cpuid;
4514 
4515 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4516 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4517 			intr->intr_desc = intr->intr_desc0;
4518 		}
4519 
4520 		if (ring_agg != ring_max) {
4521 			if (ring_max == sc->tx_ring_msix)
4522 				ix_conf_tx_msix(sc, i, &x, offset);
4523 			else
4524 				ix_conf_rx_msix(sc, i, &x, offset);
4525 		}
4526 	}
4527 
4528 	/*
4529 	 * Status MSI-X
4530 	 */
4531 	KKASSERT(x < sc->intr_cnt);
4532 	sc->sts_msix_vec = x;
4533 
4534 	intr = &sc->intr_data[x++];
4535 
4536 	intr->intr_serialize = &sc->main_serialize;
4537 	intr->intr_func = ix_msix_status;
4538 	intr->intr_funcarg = sc;
4539 	intr->intr_cpuid = 0;
4540 	intr->intr_use = IX_INTR_USE_STATUS;
4541 
4542 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4543 	    device_get_nameunit(sc->dev));
4544 	intr->intr_desc = intr->intr_desc0;
4545 
4546 	KKASSERT(x == sc->intr_cnt);
4547 
4548 	error = pci_setup_msix(sc->dev);
4549 	if (error) {
4550 		device_printf(sc->dev, "Setup MSI-X failed\n");
4551 		goto back;
4552 	}
4553 	setup = TRUE;
4554 
4555 	for (i = 0; i < sc->intr_cnt; ++i) {
4556 		intr = &sc->intr_data[i];
4557 
4558 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4559 		    intr->intr_cpuid);
4560 		if (error) {
4561 			device_printf(sc->dev,
4562 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4563 			    intr->intr_cpuid);
4564 			goto back;
4565 		}
4566 
4567 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4568 		    &intr->intr_rid, RF_ACTIVE);
4569 		if (intr->intr_res == NULL) {
4570 			device_printf(sc->dev,
4571 			    "Unable to allocate MSI-X %d resource\n", i);
4572 			error = ENOMEM;
4573 			goto back;
4574 		}
4575 	}
4576 
4577 	pci_enable_msix(sc->dev);
4578 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4579 back:
4580 	if (error)
4581 		ix_free_msix(sc, setup);
4582 }
4583 
4584 static void
4585 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4586 {
4587 	int i;
4588 
4589 	KKASSERT(sc->intr_cnt > 1);
4590 
4591 	for (i = 0; i < sc->intr_cnt; ++i) {
4592 		struct ix_intr_data *intr = &sc->intr_data[i];
4593 
4594 		if (intr->intr_res != NULL) {
4595 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4596 			    intr->intr_rid, intr->intr_res);
4597 		}
4598 		if (intr->intr_rid >= 0)
4599 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4600 	}
4601 	if (setup)
4602 		pci_teardown_msix(sc->dev);
4603 
4604 	sc->intr_cnt = 0;
4605 	kfree(sc->intr_data, M_DEVBUF);
4606 	sc->intr_data = NULL;
4607 }
4608 
4609 static void
4610 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4611 {
4612 	int x = *x0;
4613 
4614 	for (; i < sc->rx_ring_msix; ++i) {
4615 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4616 		struct ix_intr_data *intr;
4617 
4618 		KKASSERT(x < sc->intr_cnt);
4619 		rxr->rx_intr_vec = x;
4620 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4621 
4622 		intr = &sc->intr_data[x++];
4623 
4624 		intr->intr_serialize = &rxr->rx_serialize;
4625 		intr->intr_func = ix_msix_rx;
4626 		intr->intr_funcarg = rxr;
4627 		intr->intr_rate = IX_MSIX_RX_RATE;
4628 		intr->intr_use = IX_INTR_USE_RX;
4629 
4630 		intr->intr_cpuid = i + offset;
4631 		KKASSERT(intr->intr_cpuid < ncpus2);
4632 
4633 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4634 		    device_get_nameunit(sc->dev), i);
4635 		intr->intr_desc = intr->intr_desc0;
4636 	}
4637 	*x0 = x;
4638 }
4639 
4640 static void
4641 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4642 {
4643 	int x = *x0;
4644 
4645 	for (; i < sc->tx_ring_msix; ++i) {
4646 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4647 		struct ix_intr_data *intr;
4648 
4649 		KKASSERT(x < sc->intr_cnt);
4650 		txr->tx_intr_vec = x;
4651 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4652 
4653 		intr = &sc->intr_data[x++];
4654 
4655 		intr->intr_serialize = &txr->tx_serialize;
4656 		intr->intr_func = ix_msix_tx;
4657 		intr->intr_funcarg = txr;
4658 		intr->intr_rate = IX_MSIX_TX_RATE;
4659 		intr->intr_use = IX_INTR_USE_TX;
4660 
4661 		intr->intr_cpuid = i + offset;
4662 		KKASSERT(intr->intr_cpuid < ncpus2);
4663 		txr->tx_intr_cpuid = intr->intr_cpuid;
4664 
4665 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4666 		    device_get_nameunit(sc->dev), i);
4667 		intr->intr_desc = intr->intr_desc0;
4668 	}
4669 	*x0 = x;
4670 }
4671 
4672 static void
4673 ix_msix_rx(void *xrxr)
4674 {
4675 	struct ix_rx_ring *rxr = xrxr;
4676 
4677 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4678 
4679 	ix_rxeof(rxr, -1);
4680 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4681 }
4682 
4683 static void
4684 ix_msix_tx(void *xtxr)
4685 {
4686 	struct ix_tx_ring *txr = xtxr;
4687 
4688 	ASSERT_SERIALIZED(&txr->tx_serialize);
4689 
4690 	ix_txeof(txr, *(txr->tx_hdr));
4691 	if (!ifsq_is_empty(txr->tx_ifsq))
4692 		ifsq_devstart(txr->tx_ifsq);
4693 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4694 }
4695 
4696 static void
4697 ix_msix_rxtx(void *xrxr)
4698 {
4699 	struct ix_rx_ring *rxr = xrxr;
4700 	struct ix_tx_ring *txr;
4701 	int hdr;
4702 
4703 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4704 
4705 	ix_rxeof(rxr, -1);
4706 
4707 	/*
4708 	 * NOTE:
4709 	 * Since tx_next_clean is only changed by ix_txeof(),
4710 	 * which is called only in interrupt handler, the
4711 	 * check w/o holding tx serializer is MPSAFE.
4712 	 */
4713 	txr = rxr->rx_txr;
4714 	hdr = *(txr->tx_hdr);
4715 	if (hdr != txr->tx_next_clean) {
4716 		lwkt_serialize_enter(&txr->tx_serialize);
4717 		ix_txeof(txr, hdr);
4718 		if (!ifsq_is_empty(txr->tx_ifsq))
4719 			ifsq_devstart(txr->tx_ifsq);
4720 		lwkt_serialize_exit(&txr->tx_serialize);
4721 	}
4722 
4723 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4724 }
4725 
4726 static void
4727 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4728 {
4729 	struct ixgbe_hw *hw = &sc->hw;
4730 
4731 	/* Link status change */
4732 	if (eicr & IXGBE_EICR_LSC)
4733 		ix_handle_link(sc);
4734 
4735 	if (hw->mac.type != ixgbe_mac_82598EB) {
4736 		if (eicr & IXGBE_EICR_ECC)
4737 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4738 		else if (eicr & IXGBE_EICR_GPI_SDP1)
4739 			ix_handle_msf(sc);
4740 		else if (eicr & IXGBE_EICR_GPI_SDP2)
4741 			ix_handle_mod(sc);
4742 	}
4743 
4744 	/* Check for fan failure */
4745 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4746 	    (eicr & IXGBE_EICR_GPI_SDP1))
4747 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4748 
4749 	/* Check for over temp condition */
4750 	if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) {
4751 		if_printf(&sc->arpcom.ac_if, "OVER TEMP!!  "
4752 		    "PHY IS SHUT DOWN!!  Reboot\n");
4753 	}
4754 }
4755 
4756 static void
4757 ix_msix_status(void *xsc)
4758 {
4759 	struct ix_softc *sc = xsc;
4760 	uint32_t eicr;
4761 
4762 	ASSERT_SERIALIZED(&sc->main_serialize);
4763 
4764 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4765 	ix_intr_status(sc, eicr);
4766 
4767 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4768 }
4769 
4770 static void
4771 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4772     uint32_t *eims, uint32_t *eims_val)
4773 {
4774 	if (x < 32) {
4775 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4776 			KASSERT(x < IX_MAX_MSIX_82598,
4777 			    ("%s: invalid vector %d for 82598",
4778 			     device_get_nameunit(sc->dev), x));
4779 			*eims = IXGBE_EIMS;
4780 		} else {
4781 			*eims = IXGBE_EIMS_EX(0);
4782 		}
4783 		*eims_val = 1 << x;
4784 	} else {
4785 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4786 		    device_get_nameunit(sc->dev), x));
4787 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4788 		    ("%s: invalid vector %d for 82598",
4789 		     device_get_nameunit(sc->dev), x));
4790 		*eims = IXGBE_EIMS_EX(1);
4791 		*eims_val = 1 << (x - 32);
4792 	}
4793 }
4794 
4795 #ifdef IFPOLL_ENABLE
4796 
4797 static void
4798 ix_npoll_status(struct ifnet *ifp)
4799 {
4800 	struct ix_softc *sc = ifp->if_softc;
4801 	uint32_t eicr;
4802 
4803 	ASSERT_SERIALIZED(&sc->main_serialize);
4804 
4805 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4806 	ix_intr_status(sc, eicr);
4807 }
4808 
4809 static void
4810 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4811 {
4812 	struct ix_tx_ring *txr = arg;
4813 
4814 	ASSERT_SERIALIZED(&txr->tx_serialize);
4815 
4816 	ix_txeof(txr, *(txr->tx_hdr));
4817 	if (!ifsq_is_empty(txr->tx_ifsq))
4818 		ifsq_devstart(txr->tx_ifsq);
4819 }
4820 
4821 static void
4822 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4823 {
4824 	struct ix_rx_ring *rxr = arg;
4825 
4826 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4827 
4828 	ix_rxeof(rxr, cycle);
4829 }
4830 
4831 static void
4832 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4833 {
4834 	struct ix_softc *sc = ifp->if_softc;
4835 	int i, txr_cnt, rxr_cnt;
4836 
4837 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4838 
4839 	if (info) {
4840 		int off;
4841 
4842 		info->ifpi_status.status_func = ix_npoll_status;
4843 		info->ifpi_status.serializer = &sc->main_serialize;
4844 
4845 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4846 		off = sc->tx_npoll_off;
4847 		for (i = 0; i < txr_cnt; ++i) {
4848 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4849 			int idx = i + off;
4850 
4851 			KKASSERT(idx < ncpus2);
4852 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4853 			info->ifpi_tx[idx].arg = txr;
4854 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4855 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4856 		}
4857 
4858 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4859 		off = sc->rx_npoll_off;
4860 		for (i = 0; i < rxr_cnt; ++i) {
4861 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4862 			int idx = i + off;
4863 
4864 			KKASSERT(idx < ncpus2);
4865 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4866 			info->ifpi_rx[idx].arg = rxr;
4867 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4868 		}
4869 
4870 		if (ifp->if_flags & IFF_RUNNING) {
4871 			if (rxr_cnt == sc->rx_ring_inuse &&
4872 			    txr_cnt == sc->tx_ring_inuse) {
4873 				ix_set_timer_cpuid(sc, TRUE);
4874 				ix_disable_intr(sc);
4875 			} else {
4876 				ix_init(sc);
4877 			}
4878 		}
4879 	} else {
4880 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4881 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4882 
4883 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4884 		}
4885 
4886 		if (ifp->if_flags & IFF_RUNNING) {
4887 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4888 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4889 
4890 			if (rxr_cnt == sc->rx_ring_inuse &&
4891 			    txr_cnt == sc->tx_ring_inuse) {
4892 				ix_set_timer_cpuid(sc, FALSE);
4893 				ix_enable_intr(sc);
4894 			} else {
4895 				ix_init(sc);
4896 			}
4897 		}
4898 	}
4899 }
4900 
4901 static int
4902 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4903 {
4904 	struct ix_softc *sc = (void *)arg1;
4905 	struct ifnet *ifp = &sc->arpcom.ac_if;
4906 	int error, off;
4907 
4908 	off = sc->rx_npoll_off;
4909 	error = sysctl_handle_int(oidp, &off, 0, req);
4910 	if (error || req->newptr == NULL)
4911 		return error;
4912 	if (off < 0)
4913 		return EINVAL;
4914 
4915 	ifnet_serialize_all(ifp);
4916 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4917 		error = EINVAL;
4918 	} else {
4919 		error = 0;
4920 		sc->rx_npoll_off = off;
4921 	}
4922 	ifnet_deserialize_all(ifp);
4923 
4924 	return error;
4925 }
4926 
4927 static int
4928 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4929 {
4930 	struct ix_softc *sc = (void *)arg1;
4931 	struct ifnet *ifp = &sc->arpcom.ac_if;
4932 	int error, off;
4933 
4934 	off = sc->tx_npoll_off;
4935 	error = sysctl_handle_int(oidp, &off, 0, req);
4936 	if (error || req->newptr == NULL)
4937 		return error;
4938 	if (off < 0)
4939 		return EINVAL;
4940 
4941 	ifnet_serialize_all(ifp);
4942 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4943 		error = EINVAL;
4944 	} else {
4945 		error = 0;
4946 		sc->tx_npoll_off = off;
4947 	}
4948 	ifnet_deserialize_all(ifp);
4949 
4950 	return error;
4951 }
4952 
4953 #endif /* IFPOLL_ENABLE */
4954