xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision ef3ac1d1)
1 /*
2  * Copyright (c) 2001-2013, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #ifdef IX_RSS_DEBUG
75 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
76 do { \
77 	if (sc->rss_debug >= lvl) \
78 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
79 } while (0)
80 #else	/* !IX_RSS_DEBUG */
81 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
82 #endif	/* IX_RSS_DEBUG */
83 
84 #define IX_NAME			"Intel(R) PRO/10GbE "
85 #define IX_DEVICE(id) \
86 	{ IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
87 #define IX_DEVICE_NULL		{ 0, 0, NULL }
88 
89 static struct ix_device {
90 	uint16_t	vid;
91 	uint16_t	did;
92 	const char	*desc;
93 } ix_devices[] = {
94 	IX_DEVICE(82598AF_DUAL_PORT),
95 	IX_DEVICE(82598AF_SINGLE_PORT),
96 	IX_DEVICE(82598EB_CX4),
97 	IX_DEVICE(82598AT),
98 	IX_DEVICE(82598AT2),
99 	IX_DEVICE(82598),
100 	IX_DEVICE(82598_DA_DUAL_PORT),
101 	IX_DEVICE(82598_CX4_DUAL_PORT),
102 	IX_DEVICE(82598EB_XF_LR),
103 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
104 	IX_DEVICE(82598EB_SFP_LOM),
105 	IX_DEVICE(82599_KX4),
106 	IX_DEVICE(82599_KX4_MEZZ),
107 	IX_DEVICE(82599_SFP),
108 	IX_DEVICE(82599_XAUI_LOM),
109 	IX_DEVICE(82599_CX4),
110 	IX_DEVICE(82599_T3_LOM),
111 	IX_DEVICE(82599_COMBO_BACKPLANE),
112 	IX_DEVICE(82599_BACKPLANE_FCOE),
113 	IX_DEVICE(82599_SFP_SF2),
114 	IX_DEVICE(82599_SFP_FCOE),
115 	IX_DEVICE(82599EN_SFP),
116 	IX_DEVICE(82599_SFP_SF_QP),
117 	IX_DEVICE(X540T),
118 
119 	/* required last entry */
120 	IX_DEVICE_NULL
121 };
122 
123 static int	ix_probe(device_t);
124 static int	ix_attach(device_t);
125 static int	ix_detach(device_t);
126 static int	ix_shutdown(device_t);
127 
128 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
129 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
130 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
131 #ifdef INVARIANTS
132 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
133 		    boolean_t);
134 #endif
135 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
136 static void	ix_watchdog(struct ifaltq_subque *);
137 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
138 static void	ix_init(void *);
139 static void	ix_stop(struct ix_softc *);
140 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
141 static int	ix_media_change(struct ifnet *);
142 static void	ix_timer(void *);
143 #ifdef IFPOLL_ENABLE
144 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
145 static void	ix_npoll_rx(struct ifnet *, void *, int);
146 static void	ix_npoll_tx(struct ifnet *, void *, int);
147 static void	ix_npoll_status(struct ifnet *);
148 #endif
149 
150 static void	ix_add_sysctl(struct ix_softc *);
151 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
152 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
153 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
154 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
155 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
156 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
157 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
158 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
159 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
160 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
161 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS);
164 #ifdef foo
165 static int	ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
166 #endif
167 #if 0
168 static void     ix_add_hw_stats(struct ix_softc *);
169 #endif
170 #ifdef IFPOLL_ENABLE
171 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
172 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
173 #endif
174 
175 static void	ix_slot_info(struct ix_softc *);
176 static int	ix_alloc_rings(struct ix_softc *);
177 static void	ix_free_rings(struct ix_softc *);
178 static void	ix_setup_ifp(struct ix_softc *);
179 static void	ix_setup_serialize(struct ix_softc *);
180 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
181 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
182 static void	ix_update_stats(struct ix_softc *);
183 
184 static void	ix_set_promisc(struct ix_softc *);
185 static void	ix_set_multi(struct ix_softc *);
186 static void	ix_set_vlan(struct ix_softc *);
187 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
188 
189 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
190 static void	ix_init_tx_ring(struct ix_tx_ring *);
191 static void	ix_free_tx_ring(struct ix_tx_ring *);
192 static int	ix_create_tx_ring(struct ix_tx_ring *);
193 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
194 static void	ix_init_tx_unit(struct ix_softc *);
195 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
196 		    uint16_t *, int *);
197 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
198 		    const struct mbuf *, uint32_t *, uint32_t *);
199 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
200 		    const struct mbuf *, uint32_t *, uint32_t *);
201 static void	ix_txeof(struct ix_tx_ring *, int);
202 
203 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
204 static int	ix_init_rx_ring(struct ix_rx_ring *);
205 static void	ix_free_rx_ring(struct ix_rx_ring *);
206 static int	ix_create_rx_ring(struct ix_rx_ring *);
207 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
208 static void	ix_init_rx_unit(struct ix_softc *);
209 #if 0
210 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
211 #endif
212 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
213 static void	ix_rxeof(struct ix_rx_ring *, int);
214 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
215 static void	ix_enable_rx_drop(struct ix_softc *);
216 static void	ix_disable_rx_drop(struct ix_softc *);
217 
218 static void	ix_alloc_msix(struct ix_softc *);
219 static void	ix_free_msix(struct ix_softc *, boolean_t);
220 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
221 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
222 static void	ix_setup_msix_eims(const struct ix_softc *, int,
223 		    uint32_t *, uint32_t *);
224 static int	ix_alloc_intr(struct ix_softc *);
225 static void	ix_free_intr(struct ix_softc *);
226 static int	ix_setup_intr(struct ix_softc *);
227 static void	ix_teardown_intr(struct ix_softc *, int);
228 static void	ix_enable_intr(struct ix_softc *);
229 static void	ix_disable_intr(struct ix_softc *);
230 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
231 static void	ix_set_eitr(struct ix_softc *, int, int);
232 static void	ix_intr_status(struct ix_softc *, uint32_t);
233 static void	ix_intr(void *);
234 static void	ix_msix_rxtx(void *);
235 static void	ix_msix_rx(void *);
236 static void	ix_msix_tx(void *);
237 static void	ix_msix_status(void *);
238 
239 static void	ix_config_link(struct ix_softc *);
240 static boolean_t ix_sfp_probe(struct ix_softc *);
241 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
242 static void	ix_setup_optics(struct ix_softc *);
243 static void	ix_update_link_status(struct ix_softc *);
244 static void	ix_handle_link(struct ix_softc *);
245 static void	ix_handle_mod(struct ix_softc *);
246 static void	ix_handle_msf(struct ix_softc *);
247 
248 /* XXX Shared code structure requires this for the moment */
249 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
250 
251 static device_method_t ix_methods[] = {
252 	/* Device interface */
253 	DEVMETHOD(device_probe,		ix_probe),
254 	DEVMETHOD(device_attach,	ix_attach),
255 	DEVMETHOD(device_detach,	ix_detach),
256 	DEVMETHOD(device_shutdown,	ix_shutdown),
257 	DEVMETHOD_END
258 };
259 
260 static driver_t ix_driver = {
261 	"ix",
262 	ix_methods,
263 	sizeof(struct ix_softc)
264 };
265 
266 static devclass_t ix_devclass;
267 
268 DECLARE_DUMMY_MODULE(if_ix);
269 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
270 
271 static int	ix_msi_enable = 1;
272 static int	ix_msix_enable = 1;
273 static int	ix_msix_agg_rxtx = 1;
274 static int	ix_rxr = 0;
275 static int	ix_txr = 0;
276 static int	ix_txd = IX_PERF_TXD;
277 static int	ix_rxd = IX_PERF_RXD;
278 static int	ix_unsupported_sfp = 0;
279 
280 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
281 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
282 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
283 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
284 TUNABLE_INT("hw.ix.txr", &ix_txr);
285 TUNABLE_INT("hw.ix.txd", &ix_txd);
286 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
287 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
288 
289 /*
290  * Smart speed setting, default to on.  This only works
291  * as a compile option right now as its during attach,
292  * set this to 'ixgbe_smart_speed_off' to disable.
293  */
294 static const enum ixgbe_smart_speed ix_smart_speed =
295     ixgbe_smart_speed_on;
296 
297 static int
298 ix_probe(device_t dev)
299 {
300 	const struct ix_device *d;
301 	uint16_t vid, did;
302 
303 	vid = pci_get_vendor(dev);
304 	did = pci_get_device(dev);
305 
306 	for (d = ix_devices; d->desc != NULL; ++d) {
307 		if (vid == d->vid && did == d->did) {
308 			device_set_desc(dev, d->desc);
309 			return 0;
310 		}
311 	}
312 	return ENXIO;
313 }
314 
315 static int
316 ix_attach(device_t dev)
317 {
318 	struct ix_softc *sc = device_get_softc(dev);
319 	struct ixgbe_hw *hw;
320 	int error, ring_cnt_max;
321 	uint16_t csum;
322 	uint32_t ctrl_ext;
323 #ifdef IFPOLL_ENABLE
324 	int offset, offset_def;
325 #endif
326 
327 	sc->dev = sc->osdep.dev = dev;
328 	hw = &sc->hw;
329 
330 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
331 	    device_get_unit(dev));
332 	ifmedia_init(&sc->media, IFM_IMASK,
333 	    ix_media_change, ix_media_status);
334 
335 	/* Save frame size */
336 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
337 
338 	callout_init_mp(&sc->timer);
339 	lwkt_serialize_init(&sc->main_serialize);
340 
341 	/*
342 	 * Save off the information about this board
343 	 */
344 	hw->vendor_id = pci_get_vendor(dev);
345 	hw->device_id = pci_get_device(dev);
346 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
347 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
348 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
349 
350 	ixgbe_set_mac_type(hw);
351 
352 	/* Pick up the 82599 and VF settings */
353 	if (hw->mac.type != ixgbe_mac_82598EB)
354 		hw->phy.smart_speed = ix_smart_speed;
355 
356 	/* Enable bus mastering */
357 	pci_enable_busmaster(dev);
358 
359 	/*
360 	 * Allocate IO memory
361 	 */
362 	sc->mem_rid = PCIR_BAR(0);
363 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
364 	    &sc->mem_rid, RF_ACTIVE);
365 	if (sc->mem_res == NULL) {
366 		device_printf(dev, "Unable to allocate bus resource: memory\n");
367 		error = ENXIO;
368 		goto failed;
369 	}
370 
371 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
372 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
373 
374 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
375 	sc->hw.back = &sc->osdep;
376 
377 	/*
378 	 * Configure total supported RX/TX ring count
379 	 */
380 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
381 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
382 	sc->rx_ring_inuse = sc->rx_ring_cnt;
383 
384 	switch (hw->mac.type) {
385 	case ixgbe_mac_82598EB:
386 		ring_cnt_max = IX_MAX_TXRING_82598;
387 		break;
388 
389 	case ixgbe_mac_82599EB:
390 		ring_cnt_max = IX_MAX_TXRING_82599;
391 		break;
392 
393 	case ixgbe_mac_X540:
394 		ring_cnt_max = IX_MAX_TXRING_X540;
395 		break;
396 
397 	default:
398 		ring_cnt_max = 1;
399 		break;
400 	}
401 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
402 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
403 	sc->tx_ring_inuse = sc->tx_ring_cnt;
404 
405 	/* Allocate TX/RX rings */
406 	error = ix_alloc_rings(sc);
407 	if (error)
408 		goto failed;
409 
410 #ifdef IFPOLL_ENABLE
411 	/*
412 	 * NPOLLING RX CPU offset
413 	 */
414 	if (sc->rx_ring_cnt == ncpus2) {
415 		offset = 0;
416 	} else {
417 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
418 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
419 		if (offset >= ncpus2 ||
420 		    offset % sc->rx_ring_cnt != 0) {
421 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
422 			    offset, offset_def);
423 			offset = offset_def;
424 		}
425 	}
426 	sc->rx_npoll_off = offset;
427 
428 	/*
429 	 * NPOLLING TX CPU offset
430 	 */
431 	if (sc->tx_ring_cnt == ncpus2) {
432 		offset = 0;
433 	} else {
434 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
435 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
436 		if (offset >= ncpus2 ||
437 		    offset % sc->tx_ring_cnt != 0) {
438 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
439 			    offset, offset_def);
440 			offset = offset_def;
441 		}
442 	}
443 	sc->tx_npoll_off = offset;
444 #endif
445 
446 	/* Allocate interrupt */
447 	error = ix_alloc_intr(sc);
448 	if (error)
449 		goto failed;
450 
451 	/* Setup serializes */
452 	ix_setup_serialize(sc);
453 
454 	/* Allocate multicast array memory. */
455 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
456 	    M_DEVBUF, M_WAITOK);
457 
458 	/* Initialize the shared code */
459 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
460 	error = ixgbe_init_shared_code(hw);
461 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
462 		/*
463 		 * No optics in this port; ask timer routine
464 		 * to probe for later insertion.
465 		 */
466 		sc->sfp_probe = TRUE;
467 		error = 0;
468 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
469 		device_printf(dev, "Unsupported SFP+ module detected!\n");
470 		error = EIO;
471 		goto failed;
472 	} else if (error) {
473 		device_printf(dev, "Unable to initialize the shared code\n");
474 		error = EIO;
475 		goto failed;
476 	}
477 
478 	/* Make sure we have a good EEPROM before we read from it */
479 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
480 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
481 		error = EIO;
482 		goto failed;
483 	}
484 
485 	error = ixgbe_init_hw(hw);
486 	if (error == IXGBE_ERR_EEPROM_VERSION) {
487 		device_printf(dev, "Pre-production device detected\n");
488 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
489 		device_printf(dev, "Unsupported SFP+ Module\n");
490 		error = EIO;
491 		goto failed;
492 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
493 		device_printf(dev, "No SFP+ Module found\n");
494 	}
495 
496 	/* Detect and set physical type */
497 	ix_setup_optics(sc);
498 
499 	/* Setup OS specific network interface */
500 	ix_setup_ifp(sc);
501 
502 	/* Add sysctl tree */
503 	ix_add_sysctl(sc);
504 
505 	error = ix_setup_intr(sc);
506 	if (error) {
507 		ether_ifdetach(&sc->arpcom.ac_if);
508 		goto failed;
509 	}
510 
511 	/* Initialize statistics */
512 	ix_update_stats(sc);
513 
514 	/*
515 	 * Check PCIE slot type/speed/width
516 	 */
517 	ix_slot_info(sc);
518 
519 	/* Set an initial default flow control value */
520 	sc->fc = ixgbe_fc_full;
521 
522 	/* Let hardware know driver is loaded */
523 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
524 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
525 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
526 
527 	return 0;
528 failed:
529 	ix_detach(dev);
530 	return error;
531 }
532 
533 static int
534 ix_detach(device_t dev)
535 {
536 	struct ix_softc *sc = device_get_softc(dev);
537 
538 	if (device_is_attached(dev)) {
539 		struct ifnet *ifp = &sc->arpcom.ac_if;
540 		uint32_t ctrl_ext;
541 
542 		ifnet_serialize_all(ifp);
543 
544 		ix_stop(sc);
545 		ix_teardown_intr(sc, sc->intr_cnt);
546 
547 		ifnet_deserialize_all(ifp);
548 
549 		callout_terminate(&sc->timer);
550 		ether_ifdetach(ifp);
551 
552 		/* Let hardware know driver is unloading */
553 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
554 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
555 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
556 	}
557 
558 	ifmedia_removeall(&sc->media);
559 	bus_generic_detach(dev);
560 
561 	if (sc->sysctl_tree != NULL)
562 		sysctl_ctx_free(&sc->sysctl_ctx);
563 
564 	ix_free_intr(sc);
565 
566 	if (sc->msix_mem_res != NULL) {
567 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
568 		    sc->msix_mem_res);
569 	}
570 	if (sc->mem_res != NULL) {
571 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
572 		    sc->mem_res);
573 	}
574 
575 	ix_free_rings(sc);
576 
577 	if (sc->mta != NULL)
578 		kfree(sc->mta, M_DEVBUF);
579 	if (sc->serializes != NULL)
580 		kfree(sc->serializes, M_DEVBUF);
581 
582 	return 0;
583 }
584 
585 static int
586 ix_shutdown(device_t dev)
587 {
588 	struct ix_softc *sc = device_get_softc(dev);
589 	struct ifnet *ifp = &sc->arpcom.ac_if;
590 
591 	ifnet_serialize_all(ifp);
592 	ix_stop(sc);
593 	ifnet_deserialize_all(ifp);
594 
595 	return 0;
596 }
597 
598 static void
599 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
600 {
601 	struct ix_softc *sc = ifp->if_softc;
602 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
603 	int idx = -1;
604 	uint16_t nsegs;
605 
606 	KKASSERT(txr->tx_ifsq == ifsq);
607 	ASSERT_SERIALIZED(&txr->tx_serialize);
608 
609 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
610 		return;
611 
612 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
613 		ifsq_purge(ifsq);
614 		return;
615 	}
616 
617 	while (!ifsq_is_empty(ifsq)) {
618 		struct mbuf *m_head;
619 
620 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
621 			ifsq_set_oactive(ifsq);
622 			txr->tx_watchdog.wd_timer = 5;
623 			break;
624 		}
625 
626 		m_head = ifsq_dequeue(ifsq);
627 		if (m_head == NULL)
628 			break;
629 
630 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
631 			IFNET_STAT_INC(ifp, oerrors, 1);
632 			continue;
633 		}
634 
635 		if (nsegs >= txr->tx_wreg_nsegs) {
636 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
637 			nsegs = 0;
638 			idx = -1;
639 		}
640 
641 		ETHER_BPF_MTAP(ifp, m_head);
642 	}
643 	if (idx >= 0)
644 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
645 }
646 
647 static int
648 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
649 {
650 	struct ix_softc *sc = ifp->if_softc;
651 	struct ifreq *ifr = (struct ifreq *) data;
652 	int error = 0, mask, reinit;
653 
654 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
655 
656 	switch (command) {
657 	case SIOCSIFMTU:
658 		if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
659 			error = EINVAL;
660 		} else {
661 			ifp->if_mtu = ifr->ifr_mtu;
662 			sc->max_frame_size =
663 			    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
664 			ix_init(sc);
665 		}
666 		break;
667 
668 	case SIOCSIFFLAGS:
669 		if (ifp->if_flags & IFF_UP) {
670 			if (ifp->if_flags & IFF_RUNNING) {
671 				if ((ifp->if_flags ^ sc->if_flags) &
672 				    (IFF_PROMISC | IFF_ALLMULTI))
673 					ix_set_promisc(sc);
674 			} else {
675 				ix_init(sc);
676 			}
677 		} else if (ifp->if_flags & IFF_RUNNING) {
678 			ix_stop(sc);
679 		}
680 		sc->if_flags = ifp->if_flags;
681 		break;
682 
683 	case SIOCADDMULTI:
684 	case SIOCDELMULTI:
685 		if (ifp->if_flags & IFF_RUNNING) {
686 			ix_disable_intr(sc);
687 			ix_set_multi(sc);
688 #ifdef IFPOLL_ENABLE
689 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
690 #endif
691 				ix_enable_intr(sc);
692 		}
693 		break;
694 
695 	case SIOCSIFMEDIA:
696 	case SIOCGIFMEDIA:
697 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
698 		break;
699 
700 	case SIOCSIFCAP:
701 		reinit = 0;
702 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
703 		if (mask & IFCAP_RXCSUM) {
704 			ifp->if_capenable ^= IFCAP_RXCSUM;
705 			reinit = 1;
706 		}
707 		if (mask & IFCAP_VLAN_HWTAGGING) {
708 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
709 			reinit = 1;
710 		}
711 		if (mask & IFCAP_TXCSUM) {
712 			ifp->if_capenable ^= IFCAP_TXCSUM;
713 			if (ifp->if_capenable & IFCAP_TXCSUM)
714 				ifp->if_hwassist |= CSUM_OFFLOAD;
715 			else
716 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
717 		}
718 		if (mask & IFCAP_TSO) {
719 			ifp->if_capenable ^= IFCAP_TSO;
720 			if (ifp->if_capenable & IFCAP_TSO)
721 				ifp->if_hwassist |= CSUM_TSO;
722 			else
723 				ifp->if_hwassist &= ~CSUM_TSO;
724 		}
725 		if (mask & IFCAP_RSS)
726 			ifp->if_capenable ^= IFCAP_RSS;
727 		if (reinit && (ifp->if_flags & IFF_RUNNING))
728 			ix_init(sc);
729 		break;
730 
731 #if 0
732 	case SIOCGI2C:
733 	{
734 		struct ixgbe_i2c_req	i2c;
735 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
736 		if (error)
737 			break;
738 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
739 			error = EINVAL;
740 			break;
741 		}
742 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
743 		    i2c.dev_addr, i2c.data);
744 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
745 		break;
746 	}
747 #endif
748 
749 	default:
750 		error = ether_ioctl(ifp, command, data);
751 		break;
752 	}
753 	return error;
754 }
755 
756 #define IXGBE_MHADD_MFS_SHIFT 16
757 
758 static void
759 ix_init(void *xsc)
760 {
761 	struct ix_softc *sc = xsc;
762 	struct ifnet *ifp = &sc->arpcom.ac_if;
763 	struct ixgbe_hw *hw = &sc->hw;
764 	uint32_t rxpb, frame, size, tmp;
765 	uint32_t gpie, rxctrl;
766 	int i, error;
767 	boolean_t polling;
768 
769 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
770 
771 	ix_stop(sc);
772 
773 	polling = FALSE;
774 #ifdef IFPOLL_ENABLE
775 	if (ifp->if_flags & IFF_NPOLLING)
776 		polling = TRUE;
777 #endif
778 
779 	/* Configure # of used RX/TX rings */
780 	ix_set_ring_inuse(sc, polling);
781 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
782 
783 	/* Get the latest mac address, User can use a LAA */
784 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
785 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
786 	hw->addr_ctrl.rar_used_count = 1;
787 
788 	/* Prepare transmit descriptors and buffers */
789 	for (i = 0; i < sc->tx_ring_inuse; ++i)
790 		ix_init_tx_ring(&sc->tx_rings[i]);
791 
792 	ixgbe_init_hw(hw);
793 	ix_init_tx_unit(sc);
794 
795 	/* Setup Multicast table */
796 	ix_set_multi(sc);
797 
798 	/* Prepare receive descriptors and buffers */
799 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
800 		error = ix_init_rx_ring(&sc->rx_rings[i]);
801 		if (error) {
802 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
803 			ix_stop(sc);
804 			return;
805 		}
806 	}
807 
808 	/* Configure RX settings */
809 	ix_init_rx_unit(sc);
810 
811 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
812 
813 	/* Enable Fan Failure Interrupt */
814 	gpie |= IXGBE_SDP1_GPIEN;
815 
816 	/* Add for Module detection */
817 	if (hw->mac.type == ixgbe_mac_82599EB)
818 		gpie |= IXGBE_SDP2_GPIEN;
819 
820 	/* Thermal Failure Detection */
821 	if (hw->mac.type == ixgbe_mac_X540)
822 		gpie |= IXGBE_SDP0_GPIEN;
823 
824 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
825 		/* Enable Enhanced MSIX mode */
826 		gpie |= IXGBE_GPIE_MSIX_MODE;
827 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
828 		    IXGBE_GPIE_OCD;
829 	}
830 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
831 
832 	/* Set MTU size */
833 	if (ifp->if_mtu > ETHERMTU) {
834 		uint32_t mhadd;
835 
836 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
837 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
838 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
839 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
840 	}
841 
842 	/*
843 	 * Enable TX rings
844 	 */
845 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
846 		uint32_t txdctl;
847 
848 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
849 		txdctl |= IXGBE_TXDCTL_ENABLE;
850 
851 		/*
852 		 * Set WTHRESH to 0, since TX head write-back is used
853 		 */
854 		txdctl &= ~(0x7f << 16);
855 
856 		/*
857 		 * When the internal queue falls below PTHRESH (32),
858 		 * start prefetching as long as there are at least
859 		 * HTHRESH (1) buffers ready. The values are taken
860 		 * from the Intel linux driver 3.8.21.
861 		 * Prefetching enables tx line rate even with 1 queue.
862 		 */
863 		txdctl |= (32 << 0) | (1 << 8);
864 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
865 	}
866 
867 	/*
868 	 * Enable RX rings
869 	 */
870 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
871 		uint32_t rxdctl;
872 		int k;
873 
874 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
875 		if (hw->mac.type == ixgbe_mac_82598EB) {
876 			/*
877 			 * PTHRESH = 21
878 			 * HTHRESH = 4
879 			 * WTHRESH = 8
880 			 */
881 			rxdctl &= ~0x3FFFFF;
882 			rxdctl |= 0x080420;
883 		}
884 		rxdctl |= IXGBE_RXDCTL_ENABLE;
885 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
886 		for (k = 0; k < 10; ++k) {
887 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
888 			    IXGBE_RXDCTL_ENABLE)
889 				break;
890 			else
891 				msec_delay(1);
892 		}
893 		wmb();
894 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
895 		    sc->rx_rings[0].rx_ndesc - 1);
896 	}
897 
898 	/* Set up VLAN support and filter */
899 	ix_set_vlan(sc);
900 
901 	/* Enable Receive engine */
902 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
903 	if (hw->mac.type == ixgbe_mac_82598EB)
904 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
905 	rxctrl |= IXGBE_RXCTRL_RXEN;
906 	ixgbe_enable_rx_dma(hw, rxctrl);
907 
908 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
909 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
910 
911 		if (txr->tx_intr_vec >= 0) {
912 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
913 		} else {
914 			/*
915 			 * Unconfigured TX interrupt vector could only
916 			 * happen for MSI-X.
917 			 */
918 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
919 			    ("TX intr vector is not set"));
920 			KASSERT(i < sc->rx_ring_inuse,
921 			    ("invalid TX ring %d, no piggyback RX ring", i));
922 			KASSERT(sc->rx_rings[i].rx_txr == txr,
923 			    ("RX ring %d piggybacked TX ring mismatch", i));
924 			if (bootverbose)
925 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
926 		}
927 	}
928 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
929 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
930 
931 		KKASSERT(rxr->rx_intr_vec >= 0);
932 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
933 		if (rxr->rx_txr != NULL) {
934 			/*
935 			 * Piggyback the TX ring interrupt onto the RX
936 			 * ring interrupt vector.
937 			 */
938 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
939 			    ("piggybacked TX ring configured intr vector"));
940 			KASSERT(rxr->rx_txr->tx_idx == i,
941 			    ("RX ring %d piggybacked TX ring %u",
942 			     i, rxr->rx_txr->tx_idx));
943 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
944 			if (bootverbose) {
945 				if_printf(ifp, "IVAR RX ring %d piggybacks "
946 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
947 			}
948 		}
949 	}
950 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
951 		/* Set up status MSI-X vector; it is using fixed entry 1 */
952 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
953 
954 		/* Set up auto-mask for TX and RX rings */
955 		if (hw->mac.type == ixgbe_mac_82598EB) {
956 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
957 		} else {
958 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
959 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
960 		}
961 	} else {
962 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
963 	}
964 	for (i = 0; i < sc->intr_cnt; ++i)
965 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
966 
967 	/*
968 	 * Check on any SFP devices that need to be kick-started
969 	 */
970 	if (hw->phy.type == ixgbe_phy_none) {
971 		error = hw->phy.ops.identify(hw);
972 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
973 			if_printf(ifp,
974 			    "Unsupported SFP+ module type was detected.\n");
975 			/* XXX stop */
976 			return;
977 		}
978 	}
979 
980 	/* Config/Enable Link */
981 	ix_config_link(sc);
982 
983 	/*
984 	 * Hardware Packet Buffer & Flow Control setup
985 	 */
986 	frame = sc->max_frame_size;
987 
988 	/* Calculate High Water */
989 	if (hw->mac.type == ixgbe_mac_X540)
990 		tmp = IXGBE_DV_X540(frame, frame);
991 	else
992 		tmp = IXGBE_DV(frame, frame);
993 	size = IXGBE_BT2KB(tmp);
994 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
995 	hw->fc.high_water[0] = rxpb - size;
996 
997 	/* Now calculate Low Water */
998 	if (hw->mac.type == ixgbe_mac_X540)
999 		tmp = IXGBE_LOW_DV_X540(frame);
1000 	else
1001 		tmp = IXGBE_LOW_DV(frame);
1002 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1003 
1004 	hw->fc.requested_mode = sc->fc;
1005 	hw->fc.pause_time = IX_FC_PAUSE;
1006 	hw->fc.send_xon = TRUE;
1007 
1008 	/* Initialize the FC settings */
1009 	ixgbe_start_hw(hw);
1010 
1011 	/*
1012 	 * Only enable interrupts if we are not polling, make sure
1013 	 * they are off otherwise.
1014 	 */
1015 	if (polling)
1016 		ix_disable_intr(sc);
1017 	else
1018 		ix_enable_intr(sc);
1019 
1020 	ifp->if_flags |= IFF_RUNNING;
1021 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1022 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1023 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1024 	}
1025 
1026 	ix_set_timer_cpuid(sc, polling);
1027 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1028 }
1029 
1030 static void
1031 ix_intr(void *xsc)
1032 {
1033 	struct ix_softc *sc = xsc;
1034 	struct ixgbe_hw	*hw = &sc->hw;
1035 	uint32_t eicr;
1036 
1037 	ASSERT_SERIALIZED(&sc->main_serialize);
1038 
1039 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1040 	if (eicr == 0) {
1041 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1042 		return;
1043 	}
1044 
1045 	if (eicr & IX_RX0_INTR_MASK) {
1046 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1047 
1048 		lwkt_serialize_enter(&rxr->rx_serialize);
1049 		ix_rxeof(rxr, -1);
1050 		lwkt_serialize_exit(&rxr->rx_serialize);
1051 	}
1052 	if (eicr & IX_RX1_INTR_MASK) {
1053 		struct ix_rx_ring *rxr;
1054 
1055 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1056 		rxr = &sc->rx_rings[1];
1057 
1058 		lwkt_serialize_enter(&rxr->rx_serialize);
1059 		ix_rxeof(rxr, -1);
1060 		lwkt_serialize_exit(&rxr->rx_serialize);
1061 	}
1062 
1063 	if (eicr & IX_TX_INTR_MASK) {
1064 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1065 
1066 		lwkt_serialize_enter(&txr->tx_serialize);
1067 		ix_txeof(txr, *(txr->tx_hdr));
1068 		if (!ifsq_is_empty(txr->tx_ifsq))
1069 			ifsq_devstart(txr->tx_ifsq);
1070 		lwkt_serialize_exit(&txr->tx_serialize);
1071 	}
1072 
1073 	if (__predict_false(eicr & IX_EICR_STATUS))
1074 		ix_intr_status(sc, eicr);
1075 
1076 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1077 }
1078 
1079 static void
1080 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1081 {
1082 	struct ix_softc *sc = ifp->if_softc;
1083 
1084 	ix_update_link_status(sc);
1085 
1086 	ifmr->ifm_status = IFM_AVALID;
1087 	ifmr->ifm_active = IFM_ETHER;
1088 
1089 	if (!sc->link_active)
1090 		return;
1091 
1092 	ifmr->ifm_status |= IFM_ACTIVE;
1093 
1094 	switch (sc->link_speed) {
1095 	case IXGBE_LINK_SPEED_100_FULL:
1096 		ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1097 		break;
1098 	case IXGBE_LINK_SPEED_1GB_FULL:
1099 		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1100 		break;
1101 	case IXGBE_LINK_SPEED_10GB_FULL:
1102 		ifmr->ifm_active |= sc->optics | IFM_FDX;
1103 		break;
1104 	}
1105 }
1106 
1107 static int
1108 ix_media_change(struct ifnet *ifp)
1109 {
1110 	struct ix_softc *sc = ifp->if_softc;
1111 	struct ifmedia *ifm = &sc->media;
1112 
1113 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1114 		return EINVAL;
1115 
1116 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1117 	case IFM_AUTO:
1118 		sc->hw.phy.autoneg_advertised =
1119 		    IXGBE_LINK_SPEED_100_FULL |
1120 		    IXGBE_LINK_SPEED_1GB_FULL |
1121 		    IXGBE_LINK_SPEED_10GB_FULL;
1122 		break;
1123 	default:
1124 		if_printf(ifp, "Only auto media type\n");
1125 		return EINVAL;
1126 	}
1127 	return 0;
1128 }
1129 
1130 static __inline int
1131 ix_tso_pullup(struct mbuf **mp)
1132 {
1133 	int hoff, iphlen, thoff;
1134 	struct mbuf *m;
1135 
1136 	m = *mp;
1137 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1138 
1139 	iphlen = m->m_pkthdr.csum_iphlen;
1140 	thoff = m->m_pkthdr.csum_thlen;
1141 	hoff = m->m_pkthdr.csum_lhlen;
1142 
1143 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1144 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1145 	KASSERT(hoff > 0, ("invalid ether hlen"));
1146 
1147 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1148 		m = m_pullup(m, hoff + iphlen + thoff);
1149 		if (m == NULL) {
1150 			*mp = NULL;
1151 			return ENOBUFS;
1152 		}
1153 		*mp = m;
1154 	}
1155 	return 0;
1156 }
1157 
1158 static int
1159 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1160     uint16_t *segs_used, int *idx)
1161 {
1162 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1163 	int i, j, error, nsegs, first, maxsegs;
1164 	struct mbuf *m_head = *m_headp;
1165 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1166 	bus_dmamap_t map;
1167 	struct ix_tx_buf *txbuf;
1168 	union ixgbe_adv_tx_desc *txd = NULL;
1169 
1170 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1171 		error = ix_tso_pullup(m_headp);
1172 		if (__predict_false(error))
1173 			return error;
1174 		m_head = *m_headp;
1175 	}
1176 
1177 	/* Basic descriptor defines */
1178 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1179 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1180 
1181 	if (m_head->m_flags & M_VLANTAG)
1182 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1183 
1184 	/*
1185 	 * Important to capture the first descriptor
1186 	 * used because it will contain the index of
1187 	 * the one we tell the hardware to report back
1188 	 */
1189 	first = txr->tx_next_avail;
1190 	txbuf = &txr->tx_buf[first];
1191 	map = txbuf->map;
1192 
1193 	/*
1194 	 * Map the packet for DMA.
1195 	 */
1196 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1197 	if (maxsegs > IX_MAX_SCATTER)
1198 		maxsegs = IX_MAX_SCATTER;
1199 
1200 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1201 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1202 	if (__predict_false(error)) {
1203 		m_freem(*m_headp);
1204 		*m_headp = NULL;
1205 		return error;
1206 	}
1207 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1208 
1209 	m_head = *m_headp;
1210 
1211 	/*
1212 	 * Set up the appropriate offload context if requested,
1213 	 * this may consume one TX descriptor.
1214 	 */
1215 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1216 		(*segs_used)++;
1217 		txr->tx_nsegs++;
1218 	}
1219 
1220 	*segs_used += nsegs;
1221 	txr->tx_nsegs += nsegs;
1222 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1223 		/*
1224 		 * Report Status (RS) is turned on every intr_nsegs
1225 		 * descriptors (roughly).
1226 		 */
1227 		txr->tx_nsegs = 0;
1228 		cmd_rs = IXGBE_TXD_CMD_RS;
1229 	}
1230 
1231 	i = txr->tx_next_avail;
1232 	for (j = 0; j < nsegs; j++) {
1233 		bus_size_t seglen;
1234 		bus_addr_t segaddr;
1235 
1236 		txbuf = &txr->tx_buf[i];
1237 		txd = &txr->tx_base[i];
1238 		seglen = segs[j].ds_len;
1239 		segaddr = htole64(segs[j].ds_addr);
1240 
1241 		txd->read.buffer_addr = segaddr;
1242 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1243 		    cmd_type_len |seglen);
1244 		txd->read.olinfo_status = htole32(olinfo_status);
1245 
1246 		if (++i == txr->tx_ndesc)
1247 			i = 0;
1248 	}
1249 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1250 
1251 	txr->tx_avail -= nsegs;
1252 	txr->tx_next_avail = i;
1253 
1254 	txbuf->m_head = m_head;
1255 	txr->tx_buf[first].map = txbuf->map;
1256 	txbuf->map = map;
1257 
1258 	/*
1259 	 * Defer TDT updating, until enough descrptors are setup
1260 	 */
1261 	*idx = i;
1262 
1263 	return 0;
1264 }
1265 
1266 static void
1267 ix_set_promisc(struct ix_softc *sc)
1268 {
1269 	struct ifnet *ifp = &sc->arpcom.ac_if;
1270 	uint32_t reg_rctl;
1271 	int mcnt = 0;
1272 
1273 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1274 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1275 	if (ifp->if_flags & IFF_ALLMULTI) {
1276 		mcnt = IX_MAX_MCASTADDR;
1277 	} else {
1278 		struct ifmultiaddr *ifma;
1279 
1280 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1281 			if (ifma->ifma_addr->sa_family != AF_LINK)
1282 				continue;
1283 			if (mcnt == IX_MAX_MCASTADDR)
1284 				break;
1285 			mcnt++;
1286 		}
1287 	}
1288 	if (mcnt < IX_MAX_MCASTADDR)
1289 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1290 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1291 
1292 	if (ifp->if_flags & IFF_PROMISC) {
1293 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1294 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1295 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1296 		reg_rctl |= IXGBE_FCTRL_MPE;
1297 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1298 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1299 	}
1300 }
1301 
1302 static void
1303 ix_set_multi(struct ix_softc *sc)
1304 {
1305 	struct ifnet *ifp = &sc->arpcom.ac_if;
1306 	struct ifmultiaddr *ifma;
1307 	uint32_t fctrl;
1308 	uint8_t	*mta;
1309 	int mcnt = 0;
1310 
1311 	mta = sc->mta;
1312 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1313 
1314 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1315 		if (ifma->ifma_addr->sa_family != AF_LINK)
1316 			continue;
1317 		if (mcnt == IX_MAX_MCASTADDR)
1318 			break;
1319 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1320 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1321 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1322 		mcnt++;
1323 	}
1324 
1325 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1326 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1327 	if (ifp->if_flags & IFF_PROMISC) {
1328 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1329 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1330 		fctrl |= IXGBE_FCTRL_MPE;
1331 		fctrl &= ~IXGBE_FCTRL_UPE;
1332 	} else {
1333 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1334 	}
1335 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1336 
1337 	if (mcnt < IX_MAX_MCASTADDR) {
1338 		ixgbe_update_mc_addr_list(&sc->hw,
1339 		    mta, mcnt, ix_mc_array_itr, TRUE);
1340 	}
1341 }
1342 
1343 /*
1344  * This is an iterator function now needed by the multicast
1345  * shared code. It simply feeds the shared code routine the
1346  * addresses in the array of ix_set_multi() one by one.
1347  */
1348 static uint8_t *
1349 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1350 {
1351 	uint8_t *addr = *update_ptr;
1352 	uint8_t *newptr;
1353 	*vmdq = 0;
1354 
1355 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1356 	*update_ptr = newptr;
1357 	return addr;
1358 }
1359 
1360 static void
1361 ix_timer(void *arg)
1362 {
1363 	struct ix_softc *sc = arg;
1364 
1365 	lwkt_serialize_enter(&sc->main_serialize);
1366 
1367 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1368 		lwkt_serialize_exit(&sc->main_serialize);
1369 		return;
1370 	}
1371 
1372 	/* Check for pluggable optics */
1373 	if (sc->sfp_probe) {
1374 		if (!ix_sfp_probe(sc))
1375 			goto done; /* Nothing to do */
1376 	}
1377 
1378 	ix_update_link_status(sc);
1379 	ix_update_stats(sc);
1380 
1381 done:
1382 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1383 	lwkt_serialize_exit(&sc->main_serialize);
1384 }
1385 
1386 static void
1387 ix_update_link_status(struct ix_softc *sc)
1388 {
1389 	struct ifnet *ifp = &sc->arpcom.ac_if;
1390 
1391 	if (sc->link_up) {
1392 		if (sc->link_active == FALSE) {
1393 			if (bootverbose) {
1394 				if_printf(ifp, "Link is up %d Gbps %s\n",
1395 				    sc->link_speed == 128 ? 10 : 1,
1396 				    "Full Duplex");
1397 			}
1398 			sc->link_active = TRUE;
1399 
1400 			/* Update any Flow Control changes */
1401 			ixgbe_fc_enable(&sc->hw);
1402 
1403 			ifp->if_link_state = LINK_STATE_UP;
1404 			if_link_state_change(ifp);
1405 		}
1406 	} else { /* Link down */
1407 		if (sc->link_active == TRUE) {
1408 			if (bootverbose)
1409 				if_printf(ifp, "Link is Down\n");
1410 			ifp->if_link_state = LINK_STATE_DOWN;
1411 			if_link_state_change(ifp);
1412 
1413 			sc->link_active = FALSE;
1414 		}
1415 	}
1416 }
1417 
1418 static void
1419 ix_stop(struct ix_softc *sc)
1420 {
1421 	struct ixgbe_hw *hw = &sc->hw;
1422 	struct ifnet *ifp = &sc->arpcom.ac_if;
1423 	int i;
1424 
1425 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1426 
1427 	ix_disable_intr(sc);
1428 	callout_stop(&sc->timer);
1429 
1430 	ifp->if_flags &= ~IFF_RUNNING;
1431 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1432 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1433 
1434 		ifsq_clr_oactive(txr->tx_ifsq);
1435 		ifsq_watchdog_stop(&txr->tx_watchdog);
1436 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1437 	}
1438 
1439 	ixgbe_reset_hw(hw);
1440 	hw->adapter_stopped = FALSE;
1441 	ixgbe_stop_adapter(hw);
1442 	if (hw->mac.type == ixgbe_mac_82599EB)
1443 		ixgbe_stop_mac_link_on_d3_82599(hw);
1444 	/* Turn off the laser - noop with no optics */
1445 	ixgbe_disable_tx_laser(hw);
1446 
1447 	/* Update the stack */
1448 	sc->link_up = FALSE;
1449 	ix_update_link_status(sc);
1450 
1451 	/* Reprogram the RAR[0] in case user changed it. */
1452 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1453 
1454 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1455 		ix_free_tx_ring(&sc->tx_rings[i]);
1456 
1457 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1458 		ix_free_rx_ring(&sc->rx_rings[i]);
1459 }
1460 
1461 static void
1462 ix_setup_optics(struct ix_softc *sc)
1463 {
1464 	struct ixgbe_hw *hw = &sc->hw;
1465 	int layer;
1466 
1467 	layer = ixgbe_get_supported_physical_layer(hw);
1468 
1469 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1470 		sc->optics = IFM_10G_T;
1471 		return;
1472 	}
1473 
1474 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1475 		sc->optics = IFM_1000_T;
1476 		return;
1477 	}
1478 
1479 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1480 		sc->optics = IFM_1000_SX;
1481 		return;
1482 	}
1483 
1484 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1485 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1486 		sc->optics = IFM_10G_LR;
1487 		return;
1488 	}
1489 
1490 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1491 		sc->optics = IFM_10G_SR;
1492 		return;
1493 	}
1494 
1495 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1496 		sc->optics = IFM_10G_TWINAX;
1497 		return;
1498 	}
1499 
1500 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1501 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1502 		sc->optics = IFM_10G_CX4;
1503 		return;
1504 	}
1505 
1506 	/* If we get here just set the default */
1507 	sc->optics = IFM_ETHER | IFM_AUTO;
1508 }
1509 
1510 static void
1511 ix_setup_ifp(struct ix_softc *sc)
1512 {
1513 	struct ixgbe_hw *hw = &sc->hw;
1514 	struct ifnet *ifp = &sc->arpcom.ac_if;
1515 	int i;
1516 
1517 	ifp->if_baudrate = IF_Gbps(10UL);
1518 
1519 	ifp->if_softc = sc;
1520 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1521 	ifp->if_init = ix_init;
1522 	ifp->if_ioctl = ix_ioctl;
1523 	ifp->if_start = ix_start;
1524 	ifp->if_serialize = ix_serialize;
1525 	ifp->if_deserialize = ix_deserialize;
1526 	ifp->if_tryserialize = ix_tryserialize;
1527 #ifdef INVARIANTS
1528 	ifp->if_serialize_assert = ix_serialize_assert;
1529 #endif
1530 #ifdef IFPOLL_ENABLE
1531 	ifp->if_npoll = ix_npoll;
1532 #endif
1533 
1534 	/* Increase TSO burst length */
1535 	ifp->if_tsolen = (8 * ETHERMTU);
1536 
1537 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1538 	ifq_set_ready(&ifp->if_snd);
1539 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1540 
1541 	ifp->if_mapsubq = ifq_mapsubq_mask;
1542 	ifq_set_subq_mask(&ifp->if_snd, 0);
1543 
1544 	ether_ifattach(ifp, hw->mac.addr, NULL);
1545 
1546 	ifp->if_capabilities =
1547 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1548 	if (IX_ENABLE_HWRSS(sc))
1549 		ifp->if_capabilities |= IFCAP_RSS;
1550 	ifp->if_capenable = ifp->if_capabilities;
1551 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1552 
1553 	/*
1554 	 * Tell the upper layer(s) we support long frames.
1555 	 */
1556 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1557 
1558 	/* Setup TX rings and subqueues */
1559 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1560 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1561 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1562 
1563 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1564 		ifsq_set_priv(ifsq, txr);
1565 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1566 		txr->tx_ifsq = ifsq;
1567 
1568 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1569 	}
1570 
1571 	/*
1572 	 * Specify the media types supported by this adapter and register
1573 	 * callbacks to update media and link information
1574 	 */
1575 	ifmedia_add(&sc->media, IFM_ETHER | sc->optics, 0, NULL);
1576 	ifmedia_set(&sc->media, IFM_ETHER | sc->optics);
1577 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1578 		ifmedia_add(&sc->media,
1579 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1580 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1581 	}
1582 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1583 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1584 }
1585 
1586 static boolean_t
1587 ix_is_sfp(const struct ixgbe_hw *hw)
1588 {
1589 	switch (hw->phy.type) {
1590 	case ixgbe_phy_sfp_avago:
1591 	case ixgbe_phy_sfp_ftl:
1592 	case ixgbe_phy_sfp_intel:
1593 	case ixgbe_phy_sfp_unknown:
1594 	case ixgbe_phy_sfp_passive_tyco:
1595 	case ixgbe_phy_sfp_passive_unknown:
1596 		return TRUE;
1597 	default:
1598 		return FALSE;
1599 	}
1600 }
1601 
1602 static void
1603 ix_config_link(struct ix_softc *sc)
1604 {
1605 	struct ixgbe_hw *hw = &sc->hw;
1606 	boolean_t sfp;
1607 
1608 	sfp = ix_is_sfp(hw);
1609 	if (sfp) {
1610 		if (hw->phy.multispeed_fiber) {
1611 			hw->mac.ops.setup_sfp(hw);
1612 			ixgbe_enable_tx_laser(hw);
1613 			ix_handle_msf(sc);
1614 		} else {
1615 			ix_handle_mod(sc);
1616 		}
1617 	} else {
1618 		uint32_t autoneg, err = 0;
1619 
1620 		if (hw->mac.ops.check_link != NULL) {
1621 			err = ixgbe_check_link(hw, &sc->link_speed,
1622 			    &sc->link_up, FALSE);
1623 			if (err)
1624 				return;
1625 		}
1626 
1627 		autoneg = hw->phy.autoneg_advertised;
1628 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1629 			bool negotiate;
1630 
1631 			err = hw->mac.ops.get_link_capabilities(hw,
1632 			    &autoneg, &negotiate);
1633 			if (err)
1634 				return;
1635 		}
1636 
1637 		if (hw->mac.ops.setup_link != NULL) {
1638 			err = hw->mac.ops.setup_link(hw,
1639 			    autoneg, sc->link_up);
1640 			if (err)
1641 				return;
1642 		}
1643 	}
1644 }
1645 
1646 static int
1647 ix_alloc_rings(struct ix_softc *sc)
1648 {
1649 	int error, i;
1650 
1651 	/*
1652 	 * Create top level busdma tag
1653 	 */
1654 	error = bus_dma_tag_create(NULL, 1, 0,
1655 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1656 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1657 	    &sc->parent_tag);
1658 	if (error) {
1659 		device_printf(sc->dev, "could not create top level DMA tag\n");
1660 		return error;
1661 	}
1662 
1663 	/*
1664 	 * Allocate TX descriptor rings and buffers
1665 	 */
1666 	sc->tx_rings = kmalloc_cachealign(
1667 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1668 	    M_DEVBUF, M_WAITOK | M_ZERO);
1669 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1670 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1671 
1672 		txr->tx_sc = sc;
1673 		txr->tx_idx = i;
1674 		txr->tx_intr_vec = -1;
1675 		lwkt_serialize_init(&txr->tx_serialize);
1676 
1677 		error = ix_create_tx_ring(txr);
1678 		if (error)
1679 			return error;
1680 	}
1681 
1682 	/*
1683 	 * Allocate RX descriptor rings and buffers
1684 	 */
1685 	sc->rx_rings = kmalloc_cachealign(
1686 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1687 	    M_DEVBUF, M_WAITOK | M_ZERO);
1688 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1689 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1690 
1691 		rxr->rx_sc = sc;
1692 		rxr->rx_idx = i;
1693 		rxr->rx_intr_vec = -1;
1694 		lwkt_serialize_init(&rxr->rx_serialize);
1695 
1696 		error = ix_create_rx_ring(rxr);
1697 		if (error)
1698 			return error;
1699 	}
1700 
1701 	return 0;
1702 }
1703 
1704 static int
1705 ix_create_tx_ring(struct ix_tx_ring *txr)
1706 {
1707 	int error, i, tsize, ntxd;
1708 
1709 	/*
1710 	 * Validate number of transmit descriptors.  It must not exceed
1711 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1712 	 */
1713 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1714 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1715 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1716 		device_printf(txr->tx_sc->dev,
1717 		    "Using %d TX descriptors instead of %d!\n",
1718 		    IX_DEF_TXD, ntxd);
1719 		txr->tx_ndesc = IX_DEF_TXD;
1720 	} else {
1721 		txr->tx_ndesc = ntxd;
1722 	}
1723 
1724 	/*
1725 	 * Allocate TX head write-back buffer
1726 	 */
1727 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1728 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1729 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1730 	if (txr->tx_hdr == NULL) {
1731 		device_printf(txr->tx_sc->dev,
1732 		    "Unable to allocate TX head write-back buffer\n");
1733 		return ENOMEM;
1734 	}
1735 
1736 	/*
1737 	 * Allocate TX descriptor ring
1738 	 */
1739 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1740 	    IX_DBA_ALIGN);
1741 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1742 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1743 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1744 	if (txr->tx_base == NULL) {
1745 		device_printf(txr->tx_sc->dev,
1746 		    "Unable to allocate TX Descriptor memory\n");
1747 		return ENOMEM;
1748 	}
1749 
1750 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1751 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1752 
1753 	/*
1754 	 * Create DMA tag for TX buffers
1755 	 */
1756 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1757 	    1, 0,		/* alignment, bounds */
1758 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1759 	    BUS_SPACE_MAXADDR,	/* highaddr */
1760 	    NULL, NULL,		/* filter, filterarg */
1761 	    IX_TSO_SIZE,	/* maxsize */
1762 	    IX_MAX_SCATTER,	/* nsegments */
1763 	    PAGE_SIZE,		/* maxsegsize */
1764 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1765 	    BUS_DMA_ONEBPAGE,	/* flags */
1766 	    &txr->tx_tag);
1767 	if (error) {
1768 		device_printf(txr->tx_sc->dev,
1769 		    "Unable to allocate TX DMA tag\n");
1770 		kfree(txr->tx_buf, M_DEVBUF);
1771 		txr->tx_buf = NULL;
1772 		return error;
1773 	}
1774 
1775 	/*
1776 	 * Create DMA maps for TX buffers
1777 	 */
1778 	for (i = 0; i < txr->tx_ndesc; ++i) {
1779 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1780 
1781 		error = bus_dmamap_create(txr->tx_tag,
1782 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1783 		if (error) {
1784 			device_printf(txr->tx_sc->dev,
1785 			    "Unable to create TX DMA map\n");
1786 			ix_destroy_tx_ring(txr, i);
1787 			return error;
1788 		}
1789 	}
1790 
1791 	/*
1792 	 * Initialize various watermark
1793 	 */
1794 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1795 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1796 
1797 	return 0;
1798 }
1799 
1800 static void
1801 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1802 {
1803 	int i;
1804 
1805 	if (txr->tx_hdr != NULL) {
1806 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1807 		bus_dmamem_free(txr->tx_hdr_dtag,
1808 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1809 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1810 		txr->tx_hdr = NULL;
1811 	}
1812 
1813 	if (txr->tx_base != NULL) {
1814 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1815 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1816 		    txr->tx_base_map);
1817 		bus_dma_tag_destroy(txr->tx_base_dtag);
1818 		txr->tx_base = NULL;
1819 	}
1820 
1821 	if (txr->tx_buf == NULL)
1822 		return;
1823 
1824 	for (i = 0; i < ndesc; ++i) {
1825 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1826 
1827 		KKASSERT(txbuf->m_head == NULL);
1828 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1829 	}
1830 	bus_dma_tag_destroy(txr->tx_tag);
1831 
1832 	kfree(txr->tx_buf, M_DEVBUF);
1833 	txr->tx_buf = NULL;
1834 }
1835 
1836 static void
1837 ix_init_tx_ring(struct ix_tx_ring *txr)
1838 {
1839 	/* Clear the old ring contents */
1840 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1841 
1842 	/* Clear TX head write-back buffer */
1843 	*(txr->tx_hdr) = 0;
1844 
1845 	/* Reset indices */
1846 	txr->tx_next_avail = 0;
1847 	txr->tx_next_clean = 0;
1848 	txr->tx_nsegs = 0;
1849 
1850 	/* Set number of descriptors available */
1851 	txr->tx_avail = txr->tx_ndesc;
1852 
1853 	/* Enable this TX ring */
1854 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1855 }
1856 
1857 static void
1858 ix_init_tx_unit(struct ix_softc *sc)
1859 {
1860 	struct ixgbe_hw	*hw = &sc->hw;
1861 	int i;
1862 
1863 	/*
1864 	 * Setup the Base and Length of the Tx Descriptor Ring
1865 	 */
1866 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1867 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1868 		uint64_t tdba = txr->tx_base_paddr;
1869 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1870 		uint32_t txctrl;
1871 
1872 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1873 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1874 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1875 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1876 
1877 		/* Setup the HW Tx Head and Tail descriptor pointers */
1878 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1879 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1880 
1881 		/* Disable TX head write-back relax ordering */
1882 		switch (hw->mac.type) {
1883 		case ixgbe_mac_82598EB:
1884 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1885 			break;
1886 		case ixgbe_mac_82599EB:
1887 		case ixgbe_mac_X540:
1888 		default:
1889 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1890 			break;
1891 		}
1892 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1893 		switch (hw->mac.type) {
1894 		case ixgbe_mac_82598EB:
1895 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1896 			break;
1897 		case ixgbe_mac_82599EB:
1898 		case ixgbe_mac_X540:
1899 		default:
1900 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1901 			break;
1902 		}
1903 
1904 		/* Enable TX head write-back */
1905 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1906 		    (uint32_t)(hdr_paddr >> 32));
1907 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1908 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1909 	}
1910 
1911 	if (hw->mac.type != ixgbe_mac_82598EB) {
1912 		uint32_t dmatxctl, rttdcs;
1913 
1914 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1915 		dmatxctl |= IXGBE_DMATXCTL_TE;
1916 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1917 
1918 		/* Disable arbiter to set MTQC */
1919 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1920 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
1921 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1922 
1923 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1924 
1925 		/* Reenable aribter */
1926 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1927 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1928 	}
1929 }
1930 
1931 static int
1932 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1933     uint32_t *cmd_type_len, uint32_t *olinfo_status)
1934 {
1935 	struct ixgbe_adv_tx_context_desc *TXD;
1936 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1937 	int ehdrlen, ip_hlen = 0, ctxd;
1938 	boolean_t offload = TRUE;
1939 
1940 	/* First check if TSO is to be used */
1941 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
1942 		return ix_tso_ctx_setup(txr, mp,
1943 		    cmd_type_len, olinfo_status);
1944 	}
1945 
1946 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
1947 		offload = FALSE;
1948 
1949 	/* Indicate the whole packet as payload when not doing TSO */
1950 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
1951 
1952 	/*
1953 	 * In advanced descriptors the vlan tag must be placed into the
1954 	 * context descriptor.  Hence we need to make one even if not
1955 	 * doing checksum offloads.
1956 	 */
1957 	if (mp->m_flags & M_VLANTAG) {
1958 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1959 		    IXGBE_ADVTXD_VLAN_SHIFT;
1960 	} else if (!offload) {
1961 		/* No TX descriptor is consumed */
1962 		return 0;
1963 	}
1964 
1965 	/* Set the ether header length */
1966 	ehdrlen = mp->m_pkthdr.csum_lhlen;
1967 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1968 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1969 
1970 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
1971 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1972 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1973 		ip_hlen = mp->m_pkthdr.csum_iphlen;
1974 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1975 	}
1976 	vlan_macip_lens |= ip_hlen;
1977 
1978 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1979 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
1980 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1981 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
1982 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
1983 
1984 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1985 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1986 
1987 	/* Now ready a context descriptor */
1988 	ctxd = txr->tx_next_avail;
1989 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1990 
1991 	/* Now copy bits into descriptor */
1992 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1993 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1994 	TXD->seqnum_seed = htole32(0);
1995 	TXD->mss_l4len_idx = htole32(0);
1996 
1997 	/* We've consumed the first desc, adjust counters */
1998 	if (++ctxd == txr->tx_ndesc)
1999 		ctxd = 0;
2000 	txr->tx_next_avail = ctxd;
2001 	--txr->tx_avail;
2002 
2003 	/* One TX descriptor is consumed */
2004 	return 1;
2005 }
2006 
2007 static int
2008 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2009     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2010 {
2011 	struct ixgbe_adv_tx_context_desc *TXD;
2012 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2013 	uint32_t mss_l4len_idx = 0, paylen;
2014 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2015 
2016 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2017 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2018 
2019 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2020 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2021 
2022 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2023 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2024 
2025 	ctxd = txr->tx_next_avail;
2026 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2027 
2028 	if (mp->m_flags & M_VLANTAG) {
2029 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2030 		    IXGBE_ADVTXD_VLAN_SHIFT;
2031 	}
2032 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2033 	vlan_macip_lens |= ip_hlen;
2034 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2035 
2036 	/* ADV DTYPE TUCMD */
2037 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2038 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2039 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2040 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2041 
2042 	/* MSS L4LEN IDX */
2043 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2044 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2045 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2046 
2047 	TXD->seqnum_seed = htole32(0);
2048 
2049 	if (++ctxd == txr->tx_ndesc)
2050 		ctxd = 0;
2051 
2052 	txr->tx_avail--;
2053 	txr->tx_next_avail = ctxd;
2054 
2055 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2056 
2057 	/* This is used in the transmit desc in encap */
2058 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2059 
2060 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2061 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2062 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2063 
2064 	/* One TX descriptor is consumed */
2065 	return 1;
2066 }
2067 
2068 static void
2069 ix_txeof(struct ix_tx_ring *txr, int hdr)
2070 {
2071 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
2072 	int first, avail;
2073 
2074 	if (txr->tx_avail == txr->tx_ndesc)
2075 		return;
2076 
2077 	first = txr->tx_next_clean;
2078 	if (first == hdr)
2079 		return;
2080 
2081 	avail = txr->tx_avail;
2082 	while (first != hdr) {
2083 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2084 
2085 		++avail;
2086 		if (txbuf->m_head) {
2087 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2088 			m_freem(txbuf->m_head);
2089 			txbuf->m_head = NULL;
2090 			IFNET_STAT_INC(ifp, opackets, 1);
2091 		}
2092 		if (++first == txr->tx_ndesc)
2093 			first = 0;
2094 	}
2095 	txr->tx_next_clean = first;
2096 	txr->tx_avail = avail;
2097 
2098 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2099 		ifsq_clr_oactive(txr->tx_ifsq);
2100 		txr->tx_watchdog.wd_timer = 0;
2101 	}
2102 }
2103 
2104 static int
2105 ix_create_rx_ring(struct ix_rx_ring *rxr)
2106 {
2107 	int i, rsize, error, nrxd;
2108 
2109 	/*
2110 	 * Validate number of receive descriptors.  It must not exceed
2111 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2112 	 */
2113 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2114 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2115 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2116 		device_printf(rxr->rx_sc->dev,
2117 		    "Using %d RX descriptors instead of %d!\n",
2118 		    IX_DEF_RXD, nrxd);
2119 		rxr->rx_ndesc = IX_DEF_RXD;
2120 	} else {
2121 		rxr->rx_ndesc = nrxd;
2122 	}
2123 
2124 	/*
2125 	 * Allocate RX descriptor ring
2126 	 */
2127 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2128 	    IX_DBA_ALIGN);
2129 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2130 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2131 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2132 	if (rxr->rx_base == NULL) {
2133 		device_printf(rxr->rx_sc->dev,
2134 		    "Unable to allocate TX Descriptor memory\n");
2135 		return ENOMEM;
2136 	}
2137 
2138 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2139 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2140 
2141 	/*
2142 	 * Create DMA tag for RX buffers
2143 	 */
2144 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2145 	    1, 0,		/* alignment, bounds */
2146 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2147 	    BUS_SPACE_MAXADDR,	/* highaddr */
2148 	    NULL, NULL,		/* filter, filterarg */
2149 	    PAGE_SIZE,		/* maxsize */
2150 	    1,			/* nsegments */
2151 	    PAGE_SIZE,		/* maxsegsize */
2152 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2153 	    &rxr->rx_tag);
2154 	if (error) {
2155 		device_printf(rxr->rx_sc->dev,
2156 		    "Unable to create RX DMA tag\n");
2157 		kfree(rxr->rx_buf, M_DEVBUF);
2158 		rxr->rx_buf = NULL;
2159 		return error;
2160 	}
2161 
2162 	/*
2163 	 * Create spare DMA map for RX buffers
2164 	 */
2165 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2166 	    &rxr->rx_sparemap);
2167 	if (error) {
2168 		device_printf(rxr->rx_sc->dev,
2169 		    "Unable to create spare RX DMA map\n");
2170 		bus_dma_tag_destroy(rxr->rx_tag);
2171 		kfree(rxr->rx_buf, M_DEVBUF);
2172 		rxr->rx_buf = NULL;
2173 		return error;
2174 	}
2175 
2176 	/*
2177 	 * Create DMA maps for RX buffers
2178 	 */
2179 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2180 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2181 
2182 		error = bus_dmamap_create(rxr->rx_tag,
2183 		    BUS_DMA_WAITOK, &rxbuf->map);
2184 		if (error) {
2185 			device_printf(rxr->rx_sc->dev,
2186 			    "Unable to create RX dma map\n");
2187 			ix_destroy_rx_ring(rxr, i);
2188 			return error;
2189 		}
2190 	}
2191 
2192 	/*
2193 	 * Initialize various watermark
2194 	 */
2195 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2196 
2197 	return 0;
2198 }
2199 
2200 static void
2201 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2202 {
2203 	int i;
2204 
2205 	if (rxr->rx_base != NULL) {
2206 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2207 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2208 		    rxr->rx_base_map);
2209 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2210 		rxr->rx_base = NULL;
2211 	}
2212 
2213 	if (rxr->rx_buf == NULL)
2214 		return;
2215 
2216 	for (i = 0; i < ndesc; ++i) {
2217 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2218 
2219 		KKASSERT(rxbuf->m_head == NULL);
2220 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2221 	}
2222 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2223 	bus_dma_tag_destroy(rxr->rx_tag);
2224 
2225 	kfree(rxr->rx_buf, M_DEVBUF);
2226 	rxr->rx_buf = NULL;
2227 }
2228 
2229 /*
2230 ** Used to detect a descriptor that has
2231 ** been merged by Hardware RSC.
2232 */
2233 static __inline uint32_t
2234 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2235 {
2236 	return (le32toh(rx->wb.lower.lo_dword.data) &
2237 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2238 }
2239 
2240 #if 0
2241 /*********************************************************************
2242  *
2243  *  Initialize Hardware RSC (LRO) feature on 82599
2244  *  for an RX ring, this is toggled by the LRO capability
2245  *  even though it is transparent to the stack.
2246  *
2247  *  NOTE: since this HW feature only works with IPV4 and
2248  *        our testing has shown soft LRO to be as effective
2249  *        I have decided to disable this by default.
2250  *
2251  **********************************************************************/
2252 static void
2253 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2254 {
2255 	struct	ix_softc 	*sc = rxr->rx_sc;
2256 	struct	ixgbe_hw	*hw = &sc->hw;
2257 	uint32_t			rscctrl, rdrxctl;
2258 
2259 #if 0
2260 	/* If turning LRO/RSC off we need to disable it */
2261 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2262 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2263 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2264 		return;
2265 	}
2266 #endif
2267 
2268 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2269 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2270 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2271 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2272 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2273 
2274 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2275 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2276 	/*
2277 	** Limit the total number of descriptors that
2278 	** can be combined, so it does not exceed 64K
2279 	*/
2280 	if (rxr->mbuf_sz == MCLBYTES)
2281 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2282 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2283 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2284 	else if (rxr->mbuf_sz == MJUM9BYTES)
2285 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2286 	else  /* Using 16K cluster */
2287 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2288 
2289 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2290 
2291 	/* Enable TCP header recognition */
2292 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2293 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2294 	    IXGBE_PSRTYPE_TCPHDR));
2295 
2296 	/* Disable RSC for ACK packets */
2297 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2298 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2299 
2300 	rxr->hw_rsc = TRUE;
2301 }
2302 #endif
2303 
2304 static int
2305 ix_init_rx_ring(struct ix_rx_ring *rxr)
2306 {
2307 	int i;
2308 
2309 	/* Clear the ring contents */
2310 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2311 
2312 	/* XXX we need JUMPAGESIZE for RSC too */
2313 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2314 		rxr->rx_mbuf_sz = MCLBYTES;
2315 	else
2316 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2317 
2318 	/* Now replenish the mbufs */
2319 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2320 		int error;
2321 
2322 		error = ix_newbuf(rxr, i, TRUE);
2323 		if (error)
2324 			return error;
2325 	}
2326 
2327 	/* Setup our descriptor indices */
2328 	rxr->rx_next_check = 0;
2329 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2330 
2331 #if 0
2332 	/*
2333 	** Now set up the LRO interface:
2334 	*/
2335 	if (ixgbe_rsc_enable)
2336 		ix_setup_hw_rsc(rxr);
2337 #endif
2338 
2339 	return 0;
2340 }
2341 
2342 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2343 
2344 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2345 
2346 static void
2347 ix_init_rx_unit(struct ix_softc *sc)
2348 {
2349 	struct ixgbe_hw	*hw = &sc->hw;
2350 	struct ifnet *ifp = &sc->arpcom.ac_if;
2351 	uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2352 	int i;
2353 
2354 	/*
2355 	 * Make sure receives are disabled while setting up the descriptor ring
2356 	 */
2357 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2358 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2359 
2360 	/* Enable broadcasts */
2361 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2362 	fctrl |= IXGBE_FCTRL_BAM;
2363 	fctrl |= IXGBE_FCTRL_DPF;
2364 	fctrl |= IXGBE_FCTRL_PMCF;
2365 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2366 
2367 	/* Set for Jumbo Frames? */
2368 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2369 	if (ifp->if_mtu > ETHERMTU)
2370 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2371 	else
2372 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2373 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2374 
2375 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2376 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2377 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2378 
2379 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2380 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2381 		uint64_t rdba = rxr->rx_base_paddr;
2382 		uint32_t srrctl;
2383 
2384 		/* Setup the Base and Length of the Rx Descriptor Ring */
2385 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2386 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2387 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2388 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2389 
2390 		/*
2391 		 * Set up the SRRCTL register
2392 		 */
2393 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2394 
2395 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2396 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2397 		srrctl |= bufsz;
2398 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2399 		if (sc->rx_ring_inuse > 1) {
2400 			/* See the commend near ix_enable_rx_drop() */
2401 			switch (sc->fc) {
2402 			case ixgbe_fc_rx_pause:
2403 			case ixgbe_fc_tx_pause:
2404 			case ixgbe_fc_full:
2405 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2406 				if (i == 0 && bootverbose) {
2407 					if_printf(ifp, "flow control %d, "
2408 					    "disable RX drop\n", sc->fc);
2409 				}
2410 				break;
2411 
2412 			case ixgbe_fc_none:
2413 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2414 				if (i == 0 && bootverbose) {
2415 					if_printf(ifp, "flow control %d, "
2416 					    "enable RX drop\n", sc->fc);
2417 				}
2418 				break;
2419 
2420 			default:
2421 				break;
2422 			}
2423 		}
2424 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2425 
2426 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2427 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2428 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2429 	}
2430 
2431 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2432 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2433 
2434 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2435 
2436 	/*
2437 	 * Setup RSS
2438 	 */
2439 	if (IX_ENABLE_HWRSS(sc)) {
2440 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2441 		int j, r;
2442 
2443 		/*
2444 		 * NOTE:
2445 		 * When we reach here, RSS has already been disabled
2446 		 * in ix_stop(), so we could safely configure RSS key
2447 		 * and redirect table.
2448 		 */
2449 
2450 		/*
2451 		 * Configure RSS key
2452 		 */
2453 		toeplitz_get_key(key, sizeof(key));
2454 		for (i = 0; i < IX_NRSSRK; ++i) {
2455 			uint32_t rssrk;
2456 
2457 			rssrk = IX_RSSRK_VAL(key, i);
2458 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2459 			    i, rssrk);
2460 
2461 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2462 		}
2463 
2464 		/*
2465 		 * Configure RSS redirect table in following fashion:
2466 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2467 		 */
2468 		r = 0;
2469 		for (j = 0; j < IX_NRETA; ++j) {
2470 			uint32_t reta = 0;
2471 
2472 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2473 				uint32_t q;
2474 
2475 				q = r % sc->rx_ring_inuse;
2476 				reta |= q << (8 * i);
2477 				++r;
2478 			}
2479 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2480 			IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2481 		}
2482 
2483 		/*
2484 		 * Enable multiple receive queues.
2485 		 * Enable IPv4 RSS standard hash functions.
2486 		 */
2487 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2488 		    IXGBE_MRQC_RSSEN |
2489 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2490 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2491 
2492 		/*
2493 		 * NOTE:
2494 		 * PCSD must be enabled to enable multiple
2495 		 * receive queues.
2496 		 */
2497 		rxcsum |= IXGBE_RXCSUM_PCSD;
2498 	}
2499 
2500 	if (ifp->if_capenable & IFCAP_RXCSUM)
2501 		rxcsum |= IXGBE_RXCSUM_PCSD;
2502 
2503 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2504 }
2505 
2506 static __inline void
2507 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2508 {
2509 	if (--i < 0)
2510 		i = rxr->rx_ndesc - 1;
2511 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2512 }
2513 
2514 static __inline void
2515 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2516 {
2517 	if ((ptype &
2518 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2519 		/* Not IPv4 */
2520 		return;
2521 	}
2522 
2523 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2524 	    IXGBE_RXD_STAT_IPCS)
2525 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2526 
2527 	if ((ptype &
2528 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2529 		/*
2530 		 * - Neither TCP nor UDP
2531 		 * - IPv4 fragment
2532 		 */
2533 		return;
2534 	}
2535 
2536 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2537 	    IXGBE_RXD_STAT_L4CS) {
2538 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2539 		    CSUM_FRAG_NOT_CHECKED;
2540 		mp->m_pkthdr.csum_data = htons(0xffff);
2541 	}
2542 }
2543 
2544 static __inline struct pktinfo *
2545 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2546     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2547 {
2548 	switch (hashtype) {
2549 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2550 		pi->pi_netisr = NETISR_IP;
2551 		pi->pi_flags = 0;
2552 		pi->pi_l3proto = IPPROTO_TCP;
2553 		break;
2554 
2555 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2556 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2557 			/* Not UDP or is fragment */
2558 			return NULL;
2559 		}
2560 		pi->pi_netisr = NETISR_IP;
2561 		pi->pi_flags = 0;
2562 		pi->pi_l3proto = IPPROTO_UDP;
2563 		break;
2564 
2565 	default:
2566 		return NULL;
2567 	}
2568 
2569 	m->m_flags |= M_HASH;
2570 	m->m_pkthdr.hash = toeplitz_hash(hash);
2571 	return pi;
2572 }
2573 
2574 static __inline void
2575 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2576 {
2577 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2578 	rxd->wb.upper.status_error = 0;
2579 }
2580 
2581 static void
2582 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2583 {
2584 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2585 
2586 	/*
2587 	 * XXX discard may not be correct
2588 	 */
2589 	if (eop) {
2590 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2591 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2592 	} else {
2593 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2594 	}
2595 	if (rxbuf->fmp != NULL) {
2596 		m_freem(rxbuf->fmp);
2597 		rxbuf->fmp = NULL;
2598 		rxbuf->lmp = NULL;
2599 	}
2600 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2601 }
2602 
2603 static void
2604 ix_rxeof(struct ix_rx_ring *rxr, int count)
2605 {
2606 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2607 	int i, nsegs = 0, cpuid = mycpuid;
2608 
2609 	i = rxr->rx_next_check;
2610 	while (count != 0) {
2611 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2612 		union ixgbe_adv_rx_desc	*cur;
2613 		struct mbuf *sendmp = NULL, *mp;
2614 		struct pktinfo *pi = NULL, pi0;
2615 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2616 		uint16_t len;
2617 		boolean_t eop;
2618 
2619 		cur = &rxr->rx_base[i];
2620 		staterr = le32toh(cur->wb.upper.status_error);
2621 
2622 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2623 			break;
2624 		++nsegs;
2625 
2626 		rxbuf = &rxr->rx_buf[i];
2627 		mp = rxbuf->m_head;
2628 
2629 		len = le16toh(cur->wb.upper.length);
2630 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2631 		    IXGBE_RXDADV_PKTTYPE_MASK;
2632 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2633 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2634 		    IXGBE_RXDADV_RSSTYPE_MASK;
2635 
2636 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2637 		if (eop)
2638 			--count;
2639 
2640 		/*
2641 		 * Make sure bad packets are discarded
2642 		 */
2643 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2644 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2645 			ix_rx_discard(rxr, i, eop);
2646 			goto next_desc;
2647 		}
2648 
2649 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2650 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2651 			ix_rx_discard(rxr, i, eop);
2652 			goto next_desc;
2653 		}
2654 
2655 		/*
2656 		 * On 82599 which supports a hardware LRO, packets
2657 		 * need not be fragmented across sequential descriptors,
2658 		 * rather the next descriptor is indicated in bits
2659 		 * of the descriptor.  This also means that we might
2660 		 * proceses more than one packet at a time, something
2661 		 * that has never been true before, it required
2662 		 * eliminating global chain pointers in favor of what
2663 		 * we are doing here.
2664 		 */
2665 		if (!eop) {
2666 			int nextp;
2667 
2668 			/*
2669 			 * Figure out the next descriptor
2670 			 * of this frame.
2671 			 */
2672 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2673 				rsc = ix_rsc_count(cur);
2674 			if (rsc) { /* Get hardware index */
2675 				nextp = ((staterr &
2676 				    IXGBE_RXDADV_NEXTP_MASK) >>
2677 				    IXGBE_RXDADV_NEXTP_SHIFT);
2678 			} else { /* Just sequential */
2679 				nextp = i + 1;
2680 				if (nextp == rxr->rx_ndesc)
2681 					nextp = 0;
2682 			}
2683 			nbuf = &rxr->rx_buf[nextp];
2684 			prefetch(nbuf);
2685 		}
2686 		mp->m_len = len;
2687 
2688 		/*
2689 		 * Rather than using the fmp/lmp global pointers
2690 		 * we now keep the head of a packet chain in the
2691 		 * buffer struct and pass this along from one
2692 		 * descriptor to the next, until we get EOP.
2693 		 */
2694 		if (rxbuf->fmp == NULL) {
2695 			mp->m_pkthdr.len = len;
2696 			rxbuf->fmp = mp;
2697 			rxbuf->lmp = mp;
2698 		} else {
2699 			rxbuf->fmp->m_pkthdr.len += len;
2700 			rxbuf->lmp->m_next = mp;
2701 			rxbuf->lmp = mp;
2702 		}
2703 
2704 		if (nbuf != NULL) {
2705 			/*
2706 			 * Not the last fragment of this frame,
2707 			 * pass this fragment list on
2708 			 */
2709 			nbuf->fmp = rxbuf->fmp;
2710 			nbuf->lmp = rxbuf->lmp;
2711 		} else {
2712 			/*
2713 			 * Send this frame
2714 			 */
2715 			sendmp = rxbuf->fmp;
2716 
2717 			sendmp->m_pkthdr.rcvif = ifp;
2718 			IFNET_STAT_INC(ifp, ipackets, 1);
2719 #ifdef IX_RSS_DEBUG
2720 			rxr->rx_pkts++;
2721 #endif
2722 
2723 			/* Process vlan info */
2724 			if (staterr & IXGBE_RXD_STAT_VP) {
2725 				sendmp->m_pkthdr.ether_vlantag =
2726 				    le16toh(cur->wb.upper.vlan);
2727 				sendmp->m_flags |= M_VLANTAG;
2728 			}
2729 			if (ifp->if_capenable & IFCAP_RXCSUM)
2730 				ix_rxcsum(staterr, sendmp, ptype);
2731 			if (ifp->if_capenable & IFCAP_RSS) {
2732 				pi = ix_rssinfo(sendmp, &pi0,
2733 				    hash, hashtype, ptype);
2734 			}
2735 		}
2736 		rxbuf->fmp = NULL;
2737 		rxbuf->lmp = NULL;
2738 next_desc:
2739 		/* Advance our pointers to the next descriptor. */
2740 		if (++i == rxr->rx_ndesc)
2741 			i = 0;
2742 
2743 		if (sendmp != NULL)
2744 			ifp->if_input(ifp, sendmp, pi, cpuid);
2745 
2746 		if (nsegs >= rxr->rx_wreg_nsegs) {
2747 			ix_rx_refresh(rxr, i);
2748 			nsegs = 0;
2749 		}
2750 	}
2751 	rxr->rx_next_check = i;
2752 
2753 	if (nsegs > 0)
2754 		ix_rx_refresh(rxr, i);
2755 }
2756 
2757 static void
2758 ix_set_vlan(struct ix_softc *sc)
2759 {
2760 	struct ixgbe_hw *hw = &sc->hw;
2761 	uint32_t ctrl;
2762 
2763 	if (hw->mac.type == ixgbe_mac_82598EB) {
2764 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2765 		ctrl |= IXGBE_VLNCTRL_VME;
2766 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2767 	} else {
2768 		int i;
2769 
2770 		/*
2771 		 * On 82599 and later chips the VLAN enable is
2772 		 * per queue in RXDCTL
2773 		 */
2774 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2775 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2776 			ctrl |= IXGBE_RXDCTL_VME;
2777 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2778 		}
2779 	}
2780 }
2781 
2782 static void
2783 ix_enable_intr(struct ix_softc *sc)
2784 {
2785 	struct ixgbe_hw	*hw = &sc->hw;
2786 	uint32_t fwsm;
2787 	int i;
2788 
2789 	for (i = 0; i < sc->intr_cnt; ++i)
2790 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2791 
2792 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2793 
2794 	/* Enable Fan Failure detection */
2795 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2796 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2797 
2798 	switch (sc->hw.mac.type) {
2799 	case ixgbe_mac_82599EB:
2800 		sc->intr_mask |= IXGBE_EIMS_ECC;
2801 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2802 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2803 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2804 		break;
2805 
2806 	case ixgbe_mac_X540:
2807 		sc->intr_mask |= IXGBE_EIMS_ECC;
2808 		/* Detect if Thermal Sensor is enabled */
2809 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2810 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2811 			sc->intr_mask |= IXGBE_EIMS_TS;
2812 		/* FALL THROUGH */
2813 	default:
2814 		break;
2815 	}
2816 
2817 	/* With MSI-X we use auto clear for RX and TX rings */
2818 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2819 		/*
2820 		 * There are no EIAC1/EIAC2 for newer chips; the related
2821 		 * bits for TX and RX rings > 16 are always auto clear.
2822 		 *
2823 		 * XXX which bits?  There are _no_ documented EICR1 and
2824 		 * EICR2 at all; only EICR.
2825 		 */
2826 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2827 	} else {
2828 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2829 
2830 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2831 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2832 			sc->intr_mask |= IX_RX1_INTR_MASK;
2833 	}
2834 
2835 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2836 
2837 	/*
2838 	 * Enable RX and TX rings for MSI-X
2839 	 */
2840 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2841 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2842 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2843 
2844 			if (txr->tx_intr_vec >= 0) {
2845 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2846 				    txr->tx_eims_val);
2847 			}
2848 		}
2849 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2850 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2851 
2852 			KKASSERT(rxr->rx_intr_vec >= 0);
2853 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2854 		}
2855 	}
2856 
2857 	IXGBE_WRITE_FLUSH(hw);
2858 }
2859 
2860 static void
2861 ix_disable_intr(struct ix_softc *sc)
2862 {
2863 	int i;
2864 
2865 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
2866 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
2867 
2868 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2869 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2870 	} else {
2871 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2872 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2873 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2874 	}
2875 	IXGBE_WRITE_FLUSH(&sc->hw);
2876 
2877 	for (i = 0; i < sc->intr_cnt; ++i)
2878 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2879 }
2880 
2881 uint16_t
2882 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2883 {
2884 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2885 	    reg, 2);
2886 }
2887 
2888 void
2889 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2890 {
2891 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2892 	    reg, value, 2);
2893 }
2894 
2895 static void
2896 ix_slot_info(struct ix_softc *sc)
2897 {
2898 	struct ixgbe_hw *hw = &sc->hw;
2899 	device_t dev = sc->dev;
2900 	struct ixgbe_mac_info *mac = &hw->mac;
2901 	uint16_t link;
2902 	uint32_t offset;
2903 
2904 	/* For most devices simply call the shared code routine */
2905 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2906 		ixgbe_get_bus_info(hw);
2907 		goto display;
2908 	}
2909 
2910 	/*
2911 	 * For the Quad port adapter we need to parse back
2912 	 * up the PCI tree to find the speed of the expansion
2913 	 * slot into which this adapter is plugged. A bit more work.
2914 	 */
2915 	dev = device_get_parent(device_get_parent(dev));
2916 #ifdef IXGBE_DEBUG
2917 	device_printf(dev, "parent pcib = %x,%x,%x\n",
2918 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2919 #endif
2920 	dev = device_get_parent(device_get_parent(dev));
2921 #ifdef IXGBE_DEBUG
2922 	device_printf(dev, "slot pcib = %x,%x,%x\n",
2923 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2924 #endif
2925 	/* Now get the PCI Express Capabilities offset */
2926 	offset = pci_get_pciecap_ptr(dev);
2927 	/* ...and read the Link Status Register */
2928 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2929 	switch (link & IXGBE_PCI_LINK_WIDTH) {
2930 	case IXGBE_PCI_LINK_WIDTH_1:
2931 		hw->bus.width = ixgbe_bus_width_pcie_x1;
2932 		break;
2933 	case IXGBE_PCI_LINK_WIDTH_2:
2934 		hw->bus.width = ixgbe_bus_width_pcie_x2;
2935 		break;
2936 	case IXGBE_PCI_LINK_WIDTH_4:
2937 		hw->bus.width = ixgbe_bus_width_pcie_x4;
2938 		break;
2939 	case IXGBE_PCI_LINK_WIDTH_8:
2940 		hw->bus.width = ixgbe_bus_width_pcie_x8;
2941 		break;
2942 	default:
2943 		hw->bus.width = ixgbe_bus_width_unknown;
2944 		break;
2945 	}
2946 
2947 	switch (link & IXGBE_PCI_LINK_SPEED) {
2948 	case IXGBE_PCI_LINK_SPEED_2500:
2949 		hw->bus.speed = ixgbe_bus_speed_2500;
2950 		break;
2951 	case IXGBE_PCI_LINK_SPEED_5000:
2952 		hw->bus.speed = ixgbe_bus_speed_5000;
2953 		break;
2954 	case IXGBE_PCI_LINK_SPEED_8000:
2955 		hw->bus.speed = ixgbe_bus_speed_8000;
2956 		break;
2957 	default:
2958 		hw->bus.speed = ixgbe_bus_speed_unknown;
2959 		break;
2960 	}
2961 
2962 	mac->ops.set_lan_id(hw);
2963 
2964 display:
2965 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
2966 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
2967 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
2968 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
2969 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
2970 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
2971 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
2972 
2973 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
2974 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
2975 	    hw->bus.speed == ixgbe_bus_speed_2500) {
2976 		device_printf(dev, "For optimal performance a x8 "
2977 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
2978 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
2979 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
2980 	    hw->bus.speed < ixgbe_bus_speed_8000) {
2981 		device_printf(dev, "For optimal performance a x8 "
2982 		    "PCIE Gen3 slot is required.\n");
2983 	}
2984 }
2985 
2986 /*
2987  * TODO comment is incorrect
2988  *
2989  * Setup the correct IVAR register for a particular MSIX interrupt
2990  * - entry is the register array entry
2991  * - vector is the MSIX vector for this queue
2992  * - type is RX/TX/MISC
2993  */
2994 static void
2995 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
2996     int8_t type)
2997 {
2998 	struct ixgbe_hw *hw = &sc->hw;
2999 	uint32_t ivar, index;
3000 
3001 	vector |= IXGBE_IVAR_ALLOC_VAL;
3002 
3003 	switch (hw->mac.type) {
3004 	case ixgbe_mac_82598EB:
3005 		if (type == -1)
3006 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3007 		else
3008 			entry += (type * 64);
3009 		index = (entry >> 2) & 0x1F;
3010 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3011 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3012 		ivar |= (vector << (8 * (entry & 0x3)));
3013 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3014 		break;
3015 
3016 	case ixgbe_mac_82599EB:
3017 	case ixgbe_mac_X540:
3018 		if (type == -1) { /* MISC IVAR */
3019 			index = (entry & 1) * 8;
3020 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3021 			ivar &= ~(0xFF << index);
3022 			ivar |= (vector << index);
3023 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3024 		} else {	/* RX/TX IVARS */
3025 			index = (16 * (entry & 1)) + (8 * type);
3026 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3027 			ivar &= ~(0xFF << index);
3028 			ivar |= (vector << index);
3029 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3030 		}
3031 
3032 	default:
3033 		break;
3034 	}
3035 }
3036 
3037 static boolean_t
3038 ix_sfp_probe(struct ix_softc *sc)
3039 {
3040 	struct ixgbe_hw	*hw = &sc->hw;
3041 
3042 	if (hw->phy.type == ixgbe_phy_nl &&
3043 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3044 		int32_t ret;
3045 
3046 		ret = hw->phy.ops.identify_sfp(hw);
3047 		if (ret)
3048 			return FALSE;
3049 
3050 		ret = hw->phy.ops.reset(hw);
3051 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3052 			if_printf(&sc->arpcom.ac_if,
3053 			     "Unsupported SFP+ module detected!  "
3054 			     "Reload driver with supported module.\n");
3055 			sc->sfp_probe = FALSE;
3056 			return FALSE;
3057 		}
3058 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3059 
3060 		/* We now have supported optics */
3061 		sc->sfp_probe = FALSE;
3062 		/* Set the optics type so system reports correctly */
3063 		ix_setup_optics(sc);
3064 
3065 		return TRUE;
3066 	}
3067 	return FALSE;
3068 }
3069 
3070 static void
3071 ix_handle_link(struct ix_softc *sc)
3072 {
3073 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3074 	ix_update_link_status(sc);
3075 }
3076 
3077 /*
3078  * Handling SFP module
3079  */
3080 static void
3081 ix_handle_mod(struct ix_softc *sc)
3082 {
3083 	struct ixgbe_hw *hw = &sc->hw;
3084 	uint32_t err;
3085 
3086 	err = hw->phy.ops.identify_sfp(hw);
3087 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3088 		if_printf(&sc->arpcom.ac_if,
3089 		    "Unsupported SFP+ module type was detected.\n");
3090 		return;
3091 	}
3092 	err = hw->mac.ops.setup_sfp(hw);
3093 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3094 		if_printf(&sc->arpcom.ac_if,
3095 		    "Setup failure - unsupported SFP+ module type.\n");
3096 		return;
3097 	}
3098 	ix_handle_msf(sc);
3099 }
3100 
3101 /*
3102  * Handling MSF (multispeed fiber)
3103  */
3104 static void
3105 ix_handle_msf(struct ix_softc *sc)
3106 {
3107 	struct ixgbe_hw *hw = &sc->hw;
3108 	uint32_t autoneg;
3109 
3110 	autoneg = hw->phy.autoneg_advertised;
3111 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3112 		bool negotiate;
3113 
3114 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3115 	}
3116 	if (hw->mac.ops.setup_link != NULL)
3117 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3118 }
3119 
3120 static void
3121 ix_update_stats(struct ix_softc *sc)
3122 {
3123 	struct ifnet *ifp = &sc->arpcom.ac_if;
3124 	struct ixgbe_hw *hw = &sc->hw;
3125 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3126 	uint64_t total_missed_rx = 0;
3127 	int i;
3128 
3129 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3130 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3131 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3132 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3133 
3134 	/*
3135 	 * Note: These are for the 8 possible traffic classes, which
3136 	 * in current implementation is unused, therefore only 0 should
3137 	 * read real data.
3138 	 */
3139 	for (i = 0; i < 8; i++) {
3140 		uint32_t mp;
3141 
3142 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3143 		/* missed_rx tallies misses for the gprc workaround */
3144 		missed_rx += mp;
3145 		/* global total per queue */
3146 		sc->stats.mpc[i] += mp;
3147 
3148 		/* Running comprehensive total for stats display */
3149 		total_missed_rx += sc->stats.mpc[i];
3150 
3151 		if (hw->mac.type == ixgbe_mac_82598EB) {
3152 			sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3153 			sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3154 			sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3155 			sc->stats.pxonrxc[i] +=
3156 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3157 		} else {
3158 			sc->stats.pxonrxc[i] +=
3159 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3160 		}
3161 		sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3162 		sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3163 		sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3164 		sc->stats.pxon2offc[i] +=
3165 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3166 	}
3167 	for (i = 0; i < 16; i++) {
3168 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3169 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3170 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3171 	}
3172 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3173 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3174 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3175 
3176 	/* Hardware workaround, gprc counts missed packets */
3177 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3178 	sc->stats.gprc -= missed_rx;
3179 
3180 	if (hw->mac.type != ixgbe_mac_82598EB) {
3181 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3182 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3183 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3184 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3185 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3186 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3187 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3188 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3189 	} else {
3190 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3191 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3192 		/* 82598 only has a counter in the high register */
3193 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3194 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3195 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3196 	}
3197 
3198 	/*
3199 	 * Workaround: mprc hardware is incorrectly counting
3200 	 * broadcasts, so for now we subtract those.
3201 	 */
3202 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3203 	sc->stats.bprc += bprc;
3204 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3205 	if (hw->mac.type == ixgbe_mac_82598EB)
3206 		sc->stats.mprc -= bprc;
3207 
3208 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3209 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3210 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3211 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3212 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3213 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3214 
3215 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3216 	sc->stats.lxontxc += lxon;
3217 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3218 	sc->stats.lxofftxc += lxoff;
3219 	total = lxon + lxoff;
3220 
3221 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3222 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3223 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3224 	sc->stats.gptc -= total;
3225 	sc->stats.mptc -= total;
3226 	sc->stats.ptc64 -= total;
3227 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3228 
3229 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3230 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3231 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3232 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3233 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3234 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3235 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3236 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3237 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3238 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3239 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3240 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3241 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3242 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3243 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3244 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3245 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3246 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3247 	/* Only read FCOE on 82599 */
3248 	if (hw->mac.type != ixgbe_mac_82598EB) {
3249 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3250 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3251 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3252 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3253 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3254 	}
3255 
3256 	/* Rx Errors */
3257 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3258 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3259 }
3260 
3261 #if 0
3262 /*
3263  * Add sysctl variables, one per statistic, to the system.
3264  */
3265 static void
3266 ix_add_hw_stats(struct ix_softc *sc)
3267 {
3268 
3269 	device_t dev = sc->dev;
3270 
3271 	struct ix_tx_ring *txr = sc->tx_rings;
3272 	struct ix_rx_ring *rxr = sc->rx_rings;
3273 
3274 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3275 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3276 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3277 	struct ixgbe_hw_stats *stats = &sc->stats;
3278 
3279 	struct sysctl_oid *stat_node, *queue_node;
3280 	struct sysctl_oid_list *stat_list, *queue_list;
3281 
3282 #define QUEUE_NAME_LEN 32
3283 	char namebuf[QUEUE_NAME_LEN];
3284 
3285 	/* MAC stats get the own sub node */
3286 
3287 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3288 				    CTLFLAG_RD, NULL, "MAC Statistics");
3289 	stat_list = SYSCTL_CHILDREN(stat_node);
3290 
3291 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3292 			CTLFLAG_RD, &stats->crcerrs,
3293 			"CRC Errors");
3294 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3295 			CTLFLAG_RD, &stats->illerrc,
3296 			"Illegal Byte Errors");
3297 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3298 			CTLFLAG_RD, &stats->errbc,
3299 			"Byte Errors");
3300 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3301 			CTLFLAG_RD, &stats->mspdc,
3302 			"MAC Short Packets Discarded");
3303 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3304 			CTLFLAG_RD, &stats->mlfc,
3305 			"MAC Local Faults");
3306 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3307 			CTLFLAG_RD, &stats->mrfc,
3308 			"MAC Remote Faults");
3309 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3310 			CTLFLAG_RD, &stats->rlec,
3311 			"Receive Length Errors");
3312 
3313 	/* Flow Control stats */
3314 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3315 			CTLFLAG_RD, &stats->lxontxc,
3316 			"Link XON Transmitted");
3317 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3318 			CTLFLAG_RD, &stats->lxonrxc,
3319 			"Link XON Received");
3320 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3321 			CTLFLAG_RD, &stats->lxofftxc,
3322 			"Link XOFF Transmitted");
3323 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3324 			CTLFLAG_RD, &stats->lxoffrxc,
3325 			"Link XOFF Received");
3326 
3327 	/* Packet Reception Stats */
3328 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3329 			CTLFLAG_RD, &stats->tor,
3330 			"Total Octets Received");
3331 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3332 			CTLFLAG_RD, &stats->gorc,
3333 			"Good Octets Received");
3334 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3335 			CTLFLAG_RD, &stats->tpr,
3336 			"Total Packets Received");
3337 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3338 			CTLFLAG_RD, &stats->gprc,
3339 			"Good Packets Received");
3340 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3341 			CTLFLAG_RD, &stats->mprc,
3342 			"Multicast Packets Received");
3343 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3344 			CTLFLAG_RD, &stats->bprc,
3345 			"Broadcast Packets Received");
3346 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3347 			CTLFLAG_RD, &stats->prc64,
3348 			"64 byte frames received ");
3349 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3350 			CTLFLAG_RD, &stats->prc127,
3351 			"65-127 byte frames received");
3352 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3353 			CTLFLAG_RD, &stats->prc255,
3354 			"128-255 byte frames received");
3355 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3356 			CTLFLAG_RD, &stats->prc511,
3357 			"256-511 byte frames received");
3358 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3359 			CTLFLAG_RD, &stats->prc1023,
3360 			"512-1023 byte frames received");
3361 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3362 			CTLFLAG_RD, &stats->prc1522,
3363 			"1023-1522 byte frames received");
3364 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3365 			CTLFLAG_RD, &stats->ruc,
3366 			"Receive Undersized");
3367 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3368 			CTLFLAG_RD, &stats->rfc,
3369 			"Fragmented Packets Received ");
3370 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3371 			CTLFLAG_RD, &stats->roc,
3372 			"Oversized Packets Received");
3373 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3374 			CTLFLAG_RD, &stats->rjc,
3375 			"Received Jabber");
3376 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3377 			CTLFLAG_RD, &stats->mngprc,
3378 			"Management Packets Received");
3379 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3380 			CTLFLAG_RD, &stats->mngptc,
3381 			"Management Packets Dropped");
3382 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3383 			CTLFLAG_RD, &stats->xec,
3384 			"Checksum Errors");
3385 
3386 	/* Packet Transmission Stats */
3387 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3388 			CTLFLAG_RD, &stats->gotc,
3389 			"Good Octets Transmitted");
3390 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3391 			CTLFLAG_RD, &stats->tpt,
3392 			"Total Packets Transmitted");
3393 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3394 			CTLFLAG_RD, &stats->gptc,
3395 			"Good Packets Transmitted");
3396 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3397 			CTLFLAG_RD, &stats->bptc,
3398 			"Broadcast Packets Transmitted");
3399 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3400 			CTLFLAG_RD, &stats->mptc,
3401 			"Multicast Packets Transmitted");
3402 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3403 			CTLFLAG_RD, &stats->mngptc,
3404 			"Management Packets Transmitted");
3405 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3406 			CTLFLAG_RD, &stats->ptc64,
3407 			"64 byte frames transmitted ");
3408 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3409 			CTLFLAG_RD, &stats->ptc127,
3410 			"65-127 byte frames transmitted");
3411 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3412 			CTLFLAG_RD, &stats->ptc255,
3413 			"128-255 byte frames transmitted");
3414 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3415 			CTLFLAG_RD, &stats->ptc511,
3416 			"256-511 byte frames transmitted");
3417 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3418 			CTLFLAG_RD, &stats->ptc1023,
3419 			"512-1023 byte frames transmitted");
3420 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3421 			CTLFLAG_RD, &stats->ptc1522,
3422 			"1024-1522 byte frames transmitted");
3423 }
3424 #endif
3425 
3426 /*
3427  * Enable the hardware to drop packets when the buffer is full.
3428  * This is useful when multiple RX rings are used, so that no
3429  * single RX ring being full stalls the entire RX engine.  We
3430  * only enable this when multiple RX rings are used and when
3431  * flow control is disabled.
3432  */
3433 static void
3434 ix_enable_rx_drop(struct ix_softc *sc)
3435 {
3436 	struct ixgbe_hw *hw = &sc->hw;
3437 	int i;
3438 
3439 	if (bootverbose) {
3440 		if_printf(&sc->arpcom.ac_if,
3441 		    "flow control %d, enable RX drop\n", sc->fc);
3442 	}
3443 
3444 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3445 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3446 
3447 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3448 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3449 	}
3450 }
3451 
3452 static void
3453 ix_disable_rx_drop(struct ix_softc *sc)
3454 {
3455 	struct ixgbe_hw *hw = &sc->hw;
3456 	int i;
3457 
3458 	if (bootverbose) {
3459 		if_printf(&sc->arpcom.ac_if,
3460 		    "flow control %d, disable RX drop\n", sc->fc);
3461 	}
3462 
3463 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3464 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3465 
3466 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3467 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3468 	}
3469 }
3470 
3471 static int
3472 ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS)
3473 {
3474 	struct ix_softc *sc = (struct ix_softc *)arg1;
3475 	struct ifnet *ifp = &sc->arpcom.ac_if;
3476 	int error, fc;
3477 
3478 	fc = sc->fc;
3479 	error = sysctl_handle_int(oidp, &fc, 0, req);
3480 	if (error || req->newptr == NULL)
3481 		return error;
3482 
3483 	switch (fc) {
3484 	case ixgbe_fc_rx_pause:
3485 	case ixgbe_fc_tx_pause:
3486 	case ixgbe_fc_full:
3487 	case ixgbe_fc_none:
3488 		break;
3489 	default:
3490 		return EINVAL;
3491 	}
3492 
3493 	ifnet_serialize_all(ifp);
3494 
3495 	/* Don't bother if it's not changed */
3496 	if (sc->fc == fc)
3497 		goto done;
3498 	sc->fc = fc;
3499 
3500 	/* Don't do anything, if the interface is not up yet */
3501 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3502 		goto done;
3503 
3504 	if (sc->rx_ring_inuse > 1) {
3505 		switch (sc->fc) {
3506 		case ixgbe_fc_rx_pause:
3507 		case ixgbe_fc_tx_pause:
3508 		case ixgbe_fc_full:
3509 			ix_disable_rx_drop(sc);
3510 			break;
3511 
3512 		case ixgbe_fc_none:
3513 			ix_enable_rx_drop(sc);
3514 			break;
3515 
3516 		default:
3517 			panic("leading fc check mismatch");
3518 		}
3519 	}
3520 
3521 	sc->hw.fc.requested_mode = sc->fc;
3522 	/* Don't autoneg if forcing a value */
3523 	sc->hw.fc.disable_fc_autoneg = TRUE;
3524 	ixgbe_fc_enable(&sc->hw);
3525 
3526 done:
3527 	ifnet_deserialize_all(ifp);
3528 	return error;
3529 }
3530 
3531 #ifdef foo
3532 /* XXX not working properly w/ 82599 connected w/ DAC */
3533 /* XXX only work after the interface is up */
3534 static int
3535 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3536 {
3537 	struct ix_softc *sc = (struct ix_softc *)arg1;
3538 	struct ifnet *ifp = &sc->arpcom.ac_if;
3539 	struct ixgbe_hw *hw = &sc->hw;
3540 	ixgbe_link_speed speed;
3541 	int error, advspeed;
3542 
3543 	advspeed = sc->advspeed;
3544 	error = sysctl_handle_int(oidp, &advspeed, 0, req);
3545 	if (error || req->newptr == NULL)
3546 		return error;
3547 
3548 	if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3549 	    hw->phy.multispeed_fiber))
3550 		return EOPNOTSUPP;
3551 	if (hw->mac.ops.setup_link == NULL)
3552 		return EOPNOTSUPP;
3553 
3554 	switch (advspeed) {
3555 	case 0:	/* auto */
3556 		speed = IXGBE_LINK_SPEED_UNKNOWN;
3557 		break;
3558 
3559 	case 1:	/* 1Gb */
3560 		speed = IXGBE_LINK_SPEED_1GB_FULL;
3561 		break;
3562 
3563 	case 2:	/* 100Mb */
3564 		speed = IXGBE_LINK_SPEED_100_FULL;
3565 		break;
3566 
3567 	case 3:	/* 1Gb/10Gb */
3568 		speed = IXGBE_LINK_SPEED_1GB_FULL |
3569 		    IXGBE_LINK_SPEED_10GB_FULL;
3570 		break;
3571 
3572 	default:
3573 		return EINVAL;
3574 	}
3575 
3576 	ifnet_serialize_all(ifp);
3577 
3578 	if (sc->advspeed == advspeed) /* no change */
3579 		goto done;
3580 
3581 	if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3582 	    hw->mac.type != ixgbe_mac_X540) {
3583 		error = EOPNOTSUPP;
3584 		goto done;
3585 	}
3586 
3587 	sc->advspeed = advspeed;
3588 
3589 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3590 		goto done;
3591 
3592 	if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3593 		ix_config_link(sc);
3594 	} else {
3595 		hw->mac.autotry_restart = TRUE;
3596 		hw->mac.ops.setup_link(hw, speed, sc->link_up);
3597 	}
3598 
3599 done:
3600 	ifnet_deserialize_all(ifp);
3601 	return error;
3602 }
3603 #endif
3604 
3605 static void
3606 ix_setup_serialize(struct ix_softc *sc)
3607 {
3608 	int i = 0, j;
3609 
3610 	/* Main + RX + TX */
3611 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3612 	sc->serializes =
3613 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3614 	        M_DEVBUF, M_WAITOK | M_ZERO);
3615 
3616 	/*
3617 	 * Setup serializes
3618 	 *
3619 	 * NOTE: Order is critical
3620 	 */
3621 
3622 	KKASSERT(i < sc->nserialize);
3623 	sc->serializes[i++] = &sc->main_serialize;
3624 
3625 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3626 		KKASSERT(i < sc->nserialize);
3627 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3628 	}
3629 
3630 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3631 		KKASSERT(i < sc->nserialize);
3632 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3633 	}
3634 
3635 	KKASSERT(i == sc->nserialize);
3636 }
3637 
3638 static int
3639 ix_alloc_intr(struct ix_softc *sc)
3640 {
3641 	struct ix_intr_data *intr;
3642 	u_int intr_flags;
3643 
3644 	ix_alloc_msix(sc);
3645 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3646 		ix_set_ring_inuse(sc, FALSE);
3647 		return 0;
3648 	}
3649 
3650 	if (sc->intr_data != NULL)
3651 		kfree(sc->intr_data, M_DEVBUF);
3652 
3653 	sc->intr_cnt = 1;
3654 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3655 	    M_WAITOK | M_ZERO);
3656 	intr = &sc->intr_data[0];
3657 
3658 	/*
3659 	 * Allocate MSI/legacy interrupt resource
3660 	 */
3661 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3662 	    &intr->intr_rid, &intr_flags);
3663 
3664 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3665 	    &intr->intr_rid, intr_flags);
3666 	if (intr->intr_res == NULL) {
3667 		device_printf(sc->dev, "Unable to allocate bus resource: "
3668 		    "interrupt\n");
3669 		return ENXIO;
3670 	}
3671 
3672 	intr->intr_serialize = &sc->main_serialize;
3673 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3674 	intr->intr_func = ix_intr;
3675 	intr->intr_funcarg = sc;
3676 	intr->intr_rate = IX_INTR_RATE;
3677 	intr->intr_use = IX_INTR_USE_RXTX;
3678 
3679 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3680 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3681 
3682 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3683 
3684 	ix_set_ring_inuse(sc, FALSE);
3685 
3686 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3687 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3688 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3689 
3690 	return 0;
3691 }
3692 
3693 static void
3694 ix_free_intr(struct ix_softc *sc)
3695 {
3696 	if (sc->intr_data == NULL)
3697 		return;
3698 
3699 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3700 		struct ix_intr_data *intr = &sc->intr_data[0];
3701 
3702 		KKASSERT(sc->intr_cnt == 1);
3703 		if (intr->intr_res != NULL) {
3704 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3705 			    intr->intr_rid, intr->intr_res);
3706 		}
3707 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3708 			pci_release_msi(sc->dev);
3709 
3710 		kfree(sc->intr_data, M_DEVBUF);
3711 	} else {
3712 		ix_free_msix(sc, TRUE);
3713 	}
3714 }
3715 
3716 static void
3717 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3718 {
3719 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3720 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3721 	if (bootverbose) {
3722 		if_printf(&sc->arpcom.ac_if,
3723 		    "RX rings %d/%d, TX rings %d/%d\n",
3724 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3725 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3726 	}
3727 }
3728 
3729 static int
3730 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3731 {
3732 	if (!IX_ENABLE_HWRSS(sc))
3733 		return 1;
3734 
3735 	if (polling)
3736 		return sc->rx_ring_cnt;
3737 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3738 		return IX_MIN_RXRING_RSS;
3739 	else
3740 		return sc->rx_ring_msix;
3741 }
3742 
3743 static int
3744 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3745 {
3746 	if (!IX_ENABLE_HWTSS(sc))
3747 		return 1;
3748 
3749 	if (polling)
3750 		return sc->tx_ring_cnt;
3751 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3752 		return 1;
3753 	else
3754 		return sc->tx_ring_msix;
3755 }
3756 
3757 static int
3758 ix_setup_intr(struct ix_softc *sc)
3759 {
3760 	int i;
3761 
3762 	for (i = 0; i < sc->intr_cnt; ++i) {
3763 		struct ix_intr_data *intr = &sc->intr_data[i];
3764 		int error;
3765 
3766 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3767 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3768 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3769 		if (error) {
3770 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3771 			ix_teardown_intr(sc, i);
3772 			return error;
3773 		}
3774 	}
3775 	return 0;
3776 }
3777 
3778 static void
3779 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3780 {
3781 	int i;
3782 
3783 	if (sc->intr_data == NULL)
3784 		return;
3785 
3786 	for (i = 0; i < intr_cnt; ++i) {
3787 		struct ix_intr_data *intr = &sc->intr_data[i];
3788 
3789 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3790 	}
3791 }
3792 
3793 static void
3794 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3795 {
3796 	struct ix_softc *sc = ifp->if_softc;
3797 
3798 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3799 }
3800 
3801 static void
3802 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3803 {
3804 	struct ix_softc *sc = ifp->if_softc;
3805 
3806 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3807 }
3808 
3809 static int
3810 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3811 {
3812 	struct ix_softc *sc = ifp->if_softc;
3813 
3814 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3815 }
3816 
3817 #ifdef INVARIANTS
3818 
3819 static void
3820 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3821     boolean_t serialized)
3822 {
3823 	struct ix_softc *sc = ifp->if_softc;
3824 
3825 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3826 	    serialized);
3827 }
3828 
3829 #endif	/* INVARIANTS */
3830 
3831 static void
3832 ix_free_rings(struct ix_softc *sc)
3833 {
3834 	int i;
3835 
3836 	if (sc->tx_rings != NULL) {
3837 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3838 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3839 
3840 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3841 		}
3842 		kfree(sc->tx_rings, M_DEVBUF);
3843 	}
3844 
3845 	if (sc->rx_rings != NULL) {
3846 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3847 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3848 
3849 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3850 		}
3851 		kfree(sc->rx_rings, M_DEVBUF);
3852 	}
3853 
3854 	if (sc->parent_tag != NULL)
3855 		bus_dma_tag_destroy(sc->parent_tag);
3856 }
3857 
3858 static void
3859 ix_watchdog(struct ifaltq_subque *ifsq)
3860 {
3861 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3862 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3863 	struct ix_softc *sc = ifp->if_softc;
3864 	int i;
3865 
3866 	KKASSERT(txr->tx_ifsq == ifsq);
3867 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3868 
3869 	/*
3870 	 * If the interface has been paused then don't do the watchdog check
3871 	 */
3872 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3873 		txr->tx_watchdog.wd_timer = 5;
3874 		return;
3875 	}
3876 
3877 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3878 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3879 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3880 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3881 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3882 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3883 
3884 	ix_init(sc);
3885 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3886 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3887 }
3888 
3889 static void
3890 ix_free_tx_ring(struct ix_tx_ring *txr)
3891 {
3892 	int i;
3893 
3894 	for (i = 0; i < txr->tx_ndesc; ++i) {
3895 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3896 
3897 		if (txbuf->m_head != NULL) {
3898 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3899 			m_freem(txbuf->m_head);
3900 			txbuf->m_head = NULL;
3901 		}
3902 	}
3903 }
3904 
3905 static void
3906 ix_free_rx_ring(struct ix_rx_ring *rxr)
3907 {
3908 	int i;
3909 
3910 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3911 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3912 
3913 		if (rxbuf->fmp != NULL) {
3914 			m_freem(rxbuf->fmp);
3915 			rxbuf->fmp = NULL;
3916 			rxbuf->lmp = NULL;
3917 		} else {
3918 			KKASSERT(rxbuf->lmp == NULL);
3919 		}
3920 		if (rxbuf->m_head != NULL) {
3921 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3922 			m_freem(rxbuf->m_head);
3923 			rxbuf->m_head = NULL;
3924 		}
3925 	}
3926 }
3927 
3928 static int
3929 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3930 {
3931 	struct mbuf *m;
3932 	bus_dma_segment_t seg;
3933 	bus_dmamap_t map;
3934 	struct ix_rx_buf *rxbuf;
3935 	int flags, error, nseg;
3936 
3937 	flags = MB_DONTWAIT;
3938 	if (__predict_false(wait))
3939 		flags = MB_WAIT;
3940 
3941 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3942 	if (m == NULL) {
3943 		if (wait) {
3944 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3945 			    "Unable to allocate RX mbuf\n");
3946 		}
3947 		return ENOBUFS;
3948 	}
3949 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3950 
3951 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3952 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3953 	if (error) {
3954 		m_freem(m);
3955 		if (wait) {
3956 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3957 			    "Unable to load RX mbuf\n");
3958 		}
3959 		return error;
3960 	}
3961 
3962 	rxbuf = &rxr->rx_buf[i];
3963 	if (rxbuf->m_head != NULL)
3964 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3965 
3966 	map = rxbuf->map;
3967 	rxbuf->map = rxr->rx_sparemap;
3968 	rxr->rx_sparemap = map;
3969 
3970 	rxbuf->m_head = m;
3971 	rxbuf->paddr = seg.ds_addr;
3972 
3973 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3974 	return 0;
3975 }
3976 
3977 static void
3978 ix_add_sysctl(struct ix_softc *sc)
3979 {
3980 #ifdef IX_RSS_DEBUG
3981 	char node[32];
3982 	int i;
3983 #endif
3984 
3985 	sysctl_ctx_init(&sc->sysctl_ctx);
3986 	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
3987 	    SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
3988 	    device_get_nameunit(sc->dev), CTLFLAG_RD, 0, "");
3989 	if (sc->sysctl_tree == NULL) {
3990 		device_printf(sc->dev, "can't add sysctl node\n");
3991 		return;
3992 	}
3993 
3994 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3995 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
3996 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
3997 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
3998 	    "# of RX rings used");
3999 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4000 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4001 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4002 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4003 	    "# of TX rings used");
4004 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4005 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4006 	    sc, 0, ix_sysctl_rxd, "I",
4007 	    "# of RX descs");
4008 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4009 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4010 	    sc, 0, ix_sysctl_txd, "I",
4011 	    "# of TX descs");
4012 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4013 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4014 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4015 	    "# of segments sent before write to hardware register");
4016 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4017 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4018 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4019 	    "# of received segments sent before write to hardware register");
4020 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4021 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4022 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4023 	    "# of segments per TX interrupt");
4024 
4025 #ifdef IFPOLL_ENABLE
4026 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4027 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4028 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4029 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4030 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4031 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4032 #endif
4033 
4034 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4035 do { \
4036 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4037 	    ix_sysctl_##name, #use " interrupt rate"); \
4038 } while (0)
4039 
4040 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4041 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4042 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4043 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4044 
4045 #undef IX_ADD_INTR_RATE_SYSCTL
4046 
4047 #ifdef IX_RSS_DEBUG
4048 	SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4049 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4050 	    "RSS debug level");
4051 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4052 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4053 		SYSCTL_ADD_ULONG(&sc->sysctl_ctx,
4054 		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node,
4055 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4056 	}
4057 #endif
4058 
4059 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4060 	    OID_AUTO, "flowctrl", CTLTYPE_INT | CTLFLAG_RW,
4061 	    sc, 0, ix_sysctl_flowctrl, "I",
4062 	    "flow control, 0 - off, 1 - rx pause, 2 - tx pause, 3 - full");
4063 
4064 #ifdef foo
4065 	/*
4066 	 * Allow a kind of speed control by forcing the autoneg
4067 	 * advertised speed list to only a certain value, this
4068 	 * supports 1G on 82599 devices, and 100Mb on X540.
4069 	 */
4070 	SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree),
4071 	    OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
4072 	    sc, 0, ix_sysctl_advspeed, "I",
4073 	    "advertised link speed, "
4074 	    "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
4075 #endif
4076 
4077 #if 0
4078 	ix_add_hw_stats(sc);
4079 #endif
4080 
4081 }
4082 
4083 static int
4084 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4085 {
4086 	struct ix_softc *sc = (void *)arg1;
4087 	struct ifnet *ifp = &sc->arpcom.ac_if;
4088 	int error, nsegs, i;
4089 
4090 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4091 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4092 	if (error || req->newptr == NULL)
4093 		return error;
4094 	if (nsegs < 0)
4095 		return EINVAL;
4096 
4097 	ifnet_serialize_all(ifp);
4098 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4099 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4100 	ifnet_deserialize_all(ifp);
4101 
4102 	return 0;
4103 }
4104 
4105 static int
4106 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4107 {
4108 	struct ix_softc *sc = (void *)arg1;
4109 	struct ifnet *ifp = &sc->arpcom.ac_if;
4110 	int error, nsegs, i;
4111 
4112 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4113 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4114 	if (error || req->newptr == NULL)
4115 		return error;
4116 	if (nsegs < 0)
4117 		return EINVAL;
4118 
4119 	ifnet_serialize_all(ifp);
4120 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4121 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4122 	ifnet_deserialize_all(ifp);
4123 
4124 	return 0;
4125 }
4126 
4127 static int
4128 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4129 {
4130 	struct ix_softc *sc = (void *)arg1;
4131 	int txd;
4132 
4133 	txd = sc->tx_rings[0].tx_ndesc;
4134 	return sysctl_handle_int(oidp, &txd, 0, req);
4135 }
4136 
4137 static int
4138 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4139 {
4140 	struct ix_softc *sc = (void *)arg1;
4141 	int rxd;
4142 
4143 	rxd = sc->rx_rings[0].rx_ndesc;
4144 	return sysctl_handle_int(oidp, &rxd, 0, req);
4145 }
4146 
4147 static int
4148 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4149 {
4150 	struct ix_softc *sc = (void *)arg1;
4151 	struct ifnet *ifp = &sc->arpcom.ac_if;
4152 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4153 	int error, nsegs;
4154 
4155 	nsegs = txr->tx_intr_nsegs;
4156 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4157 	if (error || req->newptr == NULL)
4158 		return error;
4159 	if (nsegs < 0)
4160 		return EINVAL;
4161 
4162 	ifnet_serialize_all(ifp);
4163 
4164 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4165 		error = EINVAL;
4166 	} else {
4167 		int i;
4168 
4169 		error = 0;
4170 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4171 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4172 	}
4173 
4174 	ifnet_deserialize_all(ifp);
4175 
4176 	return error;
4177 }
4178 
4179 static void
4180 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4181 {
4182 	uint32_t eitr, eitr_intvl;
4183 
4184 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4185 	eitr_intvl = 1000000000 / 256 / rate;
4186 
4187 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4188 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4189 		if (eitr_intvl == 0)
4190 			eitr_intvl = 1;
4191 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4192 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4193 	} else {
4194 		eitr &= ~IX_EITR_INTVL_MASK;
4195 
4196 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4197 		if (eitr_intvl == 0)
4198 			eitr_intvl = IX_EITR_INTVL_MIN;
4199 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4200 			eitr_intvl = IX_EITR_INTVL_MAX;
4201 	}
4202 	eitr |= eitr_intvl;
4203 
4204 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4205 }
4206 
4207 static int
4208 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4209 {
4210 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4211 }
4212 
4213 static int
4214 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4215 {
4216 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4217 }
4218 
4219 static int
4220 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4221 {
4222 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4223 }
4224 
4225 static int
4226 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4227 {
4228 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4229 }
4230 
4231 static int
4232 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4233 {
4234 	struct ix_softc *sc = (void *)arg1;
4235 	struct ifnet *ifp = &sc->arpcom.ac_if;
4236 	int error, rate, i;
4237 
4238 	rate = 0;
4239 	for (i = 0; i < sc->intr_cnt; ++i) {
4240 		if (sc->intr_data[i].intr_use == use) {
4241 			rate = sc->intr_data[i].intr_rate;
4242 			break;
4243 		}
4244 	}
4245 
4246 	error = sysctl_handle_int(oidp, &rate, 0, req);
4247 	if (error || req->newptr == NULL)
4248 		return error;
4249 	if (rate <= 0)
4250 		return EINVAL;
4251 
4252 	ifnet_serialize_all(ifp);
4253 
4254 	for (i = 0; i < sc->intr_cnt; ++i) {
4255 		if (sc->intr_data[i].intr_use == use) {
4256 			sc->intr_data[i].intr_rate = rate;
4257 			if (ifp->if_flags & IFF_RUNNING)
4258 				ix_set_eitr(sc, i, rate);
4259 		}
4260 	}
4261 
4262 	ifnet_deserialize_all(ifp);
4263 
4264 	return error;
4265 }
4266 
4267 static void
4268 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4269     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4270 {
4271 	int i;
4272 
4273 	for (i = 0; i < sc->intr_cnt; ++i) {
4274 		if (sc->intr_data[i].intr_use == use) {
4275 			SYSCTL_ADD_PROC(&sc->sysctl_ctx,
4276 			    SYSCTL_CHILDREN(sc->sysctl_tree),
4277 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4278 			    sc, 0, handler, "I", desc);
4279 			break;
4280 		}
4281 	}
4282 }
4283 
4284 static void
4285 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4286 {
4287 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4288 		sc->timer_cpuid = 0; /* XXX fixed */
4289 	else
4290 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4291 }
4292 
4293 static void
4294 ix_alloc_msix(struct ix_softc *sc)
4295 {
4296 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4297 	struct ix_intr_data *intr;
4298 	int i, x, error;
4299 	int offset, offset_def, agg_rxtx, ring_max;
4300 	boolean_t aggregate, setup = FALSE;
4301 
4302 	msix_enable = ix_msix_enable;
4303 	/*
4304 	 * Don't enable MSI-X on 82598 by default, see:
4305 	 * 82598 specification update errata #38
4306 	 */
4307 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4308 		msix_enable = 0;
4309 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4310 	if (!msix_enable)
4311 		return;
4312 
4313 	msix_cnt = pci_msix_count(sc->dev);
4314 #ifdef IX_MSIX_DEBUG
4315 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4316 #endif
4317 	if (msix_cnt <= 1) {
4318 		/* One MSI-X model does not make sense */
4319 		return;
4320 	}
4321 
4322 	i = 0;
4323 	while ((1 << (i + 1)) <= msix_cnt)
4324 		++i;
4325 	msix_cnt2 = 1 << i;
4326 
4327 	if (bootverbose) {
4328 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4329 		    msix_cnt2, msix_cnt);
4330 	}
4331 
4332 	KKASSERT(msix_cnt >= msix_cnt2);
4333 	if (msix_cnt == msix_cnt2) {
4334 		/* We need at least one MSI-X for link status */
4335 		msix_cnt2 >>= 1;
4336 		if (msix_cnt2 <= 1) {
4337 			/* One MSI-X for RX/TX does not make sense */
4338 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4339 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4340 			return;
4341 		}
4342 		KKASSERT(msix_cnt > msix_cnt2);
4343 
4344 		if (bootverbose) {
4345 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4346 			    msix_cnt2, msix_cnt);
4347 		}
4348 	}
4349 
4350 	/*
4351 	 * Make sure that we don't break interrupt related registers
4352 	 * (EIMS, etc) limitation.
4353 	 *
4354 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4355 	 */
4356 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4357 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4358 			msix_cnt2 = IX_MAX_MSIX_82598;
4359 	} else {
4360 		if (msix_cnt2 > IX_MAX_MSIX)
4361 			msix_cnt2 = IX_MAX_MSIX;
4362 	}
4363 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4364 
4365 	if (bootverbose) {
4366 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4367 		    msix_cnt2, msix_cnt);
4368 	}
4369 
4370 	sc->rx_ring_msix = sc->rx_ring_cnt;
4371 	if (sc->rx_ring_msix > msix_cnt2)
4372 		sc->rx_ring_msix = msix_cnt2;
4373 
4374 	sc->tx_ring_msix = sc->tx_ring_cnt;
4375 	if (sc->tx_ring_msix > msix_cnt2)
4376 		sc->tx_ring_msix = msix_cnt2;
4377 
4378 	ring_max = sc->rx_ring_msix;
4379 	if (ring_max < sc->tx_ring_msix)
4380 		ring_max = sc->tx_ring_msix;
4381 
4382 	/* Allow user to force independent RX/TX MSI-X handling */
4383 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4384 	    ix_msix_agg_rxtx);
4385 
4386 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4387 		/*
4388 		 * Independent TX/RX MSI-X
4389 		 */
4390 		aggregate = FALSE;
4391 		if (bootverbose)
4392 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4393 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4394 	} else {
4395 		/*
4396 		 * Aggregate TX/RX MSI-X
4397 		 */
4398 		aggregate = TRUE;
4399 		if (bootverbose)
4400 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4401 		alloc_cnt = msix_cnt2;
4402 		if (alloc_cnt > ring_max)
4403 			alloc_cnt = ring_max;
4404 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4405 		    alloc_cnt >= sc->tx_ring_msix);
4406 	}
4407 	++alloc_cnt;	/* For status */
4408 
4409 	if (bootverbose) {
4410 		device_printf(sc->dev, "MSI-X alloc %d, "
4411 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4412 		    sc->rx_ring_msix, sc->tx_ring_msix);
4413 	}
4414 
4415 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4416 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4417 	    &sc->msix_mem_rid, RF_ACTIVE);
4418 	if (sc->msix_mem_res == NULL) {
4419 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4420 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4421 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4422 		if (sc->msix_mem_res == NULL) {
4423 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4424 			return;
4425 		}
4426 	}
4427 
4428 	sc->intr_cnt = alloc_cnt;
4429 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4430 	    M_DEVBUF, M_WAITOK | M_ZERO);
4431 	for (x = 0; x < sc->intr_cnt; ++x) {
4432 		intr = &sc->intr_data[x];
4433 		intr->intr_rid = -1;
4434 		intr->intr_rate = IX_INTR_RATE;
4435 	}
4436 
4437 	x = 0;
4438 	if (!aggregate) {
4439 		/*
4440 		 * RX rings
4441 		 */
4442 		if (sc->rx_ring_msix == ncpus2) {
4443 			offset = 0;
4444 		} else {
4445 			offset_def = (sc->rx_ring_msix *
4446 			    device_get_unit(sc->dev)) % ncpus2;
4447 
4448 			offset = device_getenv_int(sc->dev,
4449 			    "msix.rxoff", offset_def);
4450 			if (offset >= ncpus2 ||
4451 			    offset % sc->rx_ring_msix != 0) {
4452 				device_printf(sc->dev,
4453 				    "invalid msix.rxoff %d, use %d\n",
4454 				    offset, offset_def);
4455 				offset = offset_def;
4456 			}
4457 		}
4458 		ix_conf_rx_msix(sc, 0, &x, offset);
4459 
4460 		/*
4461 		 * TX rings
4462 		 */
4463 		if (sc->tx_ring_msix == ncpus2) {
4464 			offset = 0;
4465 		} else {
4466 			offset_def = (sc->tx_ring_msix *
4467 			    device_get_unit(sc->dev)) % ncpus2;
4468 
4469 			offset = device_getenv_int(sc->dev,
4470 			    "msix.txoff", offset_def);
4471 			if (offset >= ncpus2 ||
4472 			    offset % sc->tx_ring_msix != 0) {
4473 				device_printf(sc->dev,
4474 				    "invalid msix.txoff %d, use %d\n",
4475 				    offset, offset_def);
4476 				offset = offset_def;
4477 			}
4478 		}
4479 		ix_conf_tx_msix(sc, 0, &x, offset);
4480 	} else {
4481 		int ring_agg;
4482 
4483 		ring_agg = sc->rx_ring_msix;
4484 		if (ring_agg > sc->tx_ring_msix)
4485 			ring_agg = sc->tx_ring_msix;
4486 
4487 		if (ring_max == ncpus2) {
4488 			offset = 0;
4489 		} else {
4490 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4491 			    ncpus2;
4492 
4493 			offset = device_getenv_int(sc->dev, "msix.off",
4494 			    offset_def);
4495 			if (offset >= ncpus2 || offset % ring_max != 0) {
4496 				device_printf(sc->dev,
4497 				    "invalid msix.off %d, use %d\n",
4498 				    offset, offset_def);
4499 				offset = offset_def;
4500 			}
4501 		}
4502 
4503 		for (i = 0; i < ring_agg; ++i) {
4504 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4505 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4506 
4507 			KKASSERT(x < sc->intr_cnt);
4508 			rxr->rx_intr_vec = x;
4509 			ix_setup_msix_eims(sc, x,
4510 			    &rxr->rx_eims, &rxr->rx_eims_val);
4511 			rxr->rx_txr = txr;
4512 			/* NOTE: Leave TX ring's intr_vec negative */
4513 
4514 			intr = &sc->intr_data[x++];
4515 
4516 			intr->intr_serialize = &rxr->rx_serialize;
4517 			intr->intr_func = ix_msix_rxtx;
4518 			intr->intr_funcarg = rxr;
4519 			intr->intr_use = IX_INTR_USE_RXTX;
4520 
4521 			intr->intr_cpuid = i + offset;
4522 			KKASSERT(intr->intr_cpuid < ncpus2);
4523 			txr->tx_intr_cpuid = intr->intr_cpuid;
4524 
4525 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4526 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4527 			intr->intr_desc = intr->intr_desc0;
4528 		}
4529 
4530 		if (ring_agg != ring_max) {
4531 			if (ring_max == sc->tx_ring_msix)
4532 				ix_conf_tx_msix(sc, i, &x, offset);
4533 			else
4534 				ix_conf_rx_msix(sc, i, &x, offset);
4535 		}
4536 	}
4537 
4538 	/*
4539 	 * Status MSI-X
4540 	 */
4541 	KKASSERT(x < sc->intr_cnt);
4542 	sc->sts_msix_vec = x;
4543 
4544 	intr = &sc->intr_data[x++];
4545 
4546 	intr->intr_serialize = &sc->main_serialize;
4547 	intr->intr_func = ix_msix_status;
4548 	intr->intr_funcarg = sc;
4549 	intr->intr_cpuid = 0;
4550 	intr->intr_use = IX_INTR_USE_STATUS;
4551 
4552 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4553 	    device_get_nameunit(sc->dev));
4554 	intr->intr_desc = intr->intr_desc0;
4555 
4556 	KKASSERT(x == sc->intr_cnt);
4557 
4558 	error = pci_setup_msix(sc->dev);
4559 	if (error) {
4560 		device_printf(sc->dev, "Setup MSI-X failed\n");
4561 		goto back;
4562 	}
4563 	setup = TRUE;
4564 
4565 	for (i = 0; i < sc->intr_cnt; ++i) {
4566 		intr = &sc->intr_data[i];
4567 
4568 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4569 		    intr->intr_cpuid);
4570 		if (error) {
4571 			device_printf(sc->dev,
4572 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4573 			    intr->intr_cpuid);
4574 			goto back;
4575 		}
4576 
4577 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4578 		    &intr->intr_rid, RF_ACTIVE);
4579 		if (intr->intr_res == NULL) {
4580 			device_printf(sc->dev,
4581 			    "Unable to allocate MSI-X %d resource\n", i);
4582 			error = ENOMEM;
4583 			goto back;
4584 		}
4585 	}
4586 
4587 	pci_enable_msix(sc->dev);
4588 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4589 back:
4590 	if (error)
4591 		ix_free_msix(sc, setup);
4592 }
4593 
4594 static void
4595 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4596 {
4597 	int i;
4598 
4599 	KKASSERT(sc->intr_cnt > 1);
4600 
4601 	for (i = 0; i < sc->intr_cnt; ++i) {
4602 		struct ix_intr_data *intr = &sc->intr_data[i];
4603 
4604 		if (intr->intr_res != NULL) {
4605 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4606 			    intr->intr_rid, intr->intr_res);
4607 		}
4608 		if (intr->intr_rid >= 0)
4609 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4610 	}
4611 	if (setup)
4612 		pci_teardown_msix(sc->dev);
4613 
4614 	sc->intr_cnt = 0;
4615 	kfree(sc->intr_data, M_DEVBUF);
4616 	sc->intr_data = NULL;
4617 }
4618 
4619 static void
4620 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4621 {
4622 	int x = *x0;
4623 
4624 	for (; i < sc->rx_ring_msix; ++i) {
4625 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4626 		struct ix_intr_data *intr;
4627 
4628 		KKASSERT(x < sc->intr_cnt);
4629 		rxr->rx_intr_vec = x;
4630 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4631 
4632 		intr = &sc->intr_data[x++];
4633 
4634 		intr->intr_serialize = &rxr->rx_serialize;
4635 		intr->intr_func = ix_msix_rx;
4636 		intr->intr_funcarg = rxr;
4637 		intr->intr_rate = IX_MSIX_RX_RATE;
4638 		intr->intr_use = IX_INTR_USE_RX;
4639 
4640 		intr->intr_cpuid = i + offset;
4641 		KKASSERT(intr->intr_cpuid < ncpus2);
4642 
4643 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4644 		    device_get_nameunit(sc->dev), i);
4645 		intr->intr_desc = intr->intr_desc0;
4646 	}
4647 	*x0 = x;
4648 }
4649 
4650 static void
4651 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4652 {
4653 	int x = *x0;
4654 
4655 	for (; i < sc->tx_ring_msix; ++i) {
4656 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4657 		struct ix_intr_data *intr;
4658 
4659 		KKASSERT(x < sc->intr_cnt);
4660 		txr->tx_intr_vec = x;
4661 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4662 
4663 		intr = &sc->intr_data[x++];
4664 
4665 		intr->intr_serialize = &txr->tx_serialize;
4666 		intr->intr_func = ix_msix_tx;
4667 		intr->intr_funcarg = txr;
4668 		intr->intr_rate = IX_MSIX_TX_RATE;
4669 		intr->intr_use = IX_INTR_USE_TX;
4670 
4671 		intr->intr_cpuid = i + offset;
4672 		KKASSERT(intr->intr_cpuid < ncpus2);
4673 		txr->tx_intr_cpuid = intr->intr_cpuid;
4674 
4675 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4676 		    device_get_nameunit(sc->dev), i);
4677 		intr->intr_desc = intr->intr_desc0;
4678 	}
4679 	*x0 = x;
4680 }
4681 
4682 static void
4683 ix_msix_rx(void *xrxr)
4684 {
4685 	struct ix_rx_ring *rxr = xrxr;
4686 
4687 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4688 
4689 	ix_rxeof(rxr, -1);
4690 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4691 }
4692 
4693 static void
4694 ix_msix_tx(void *xtxr)
4695 {
4696 	struct ix_tx_ring *txr = xtxr;
4697 
4698 	ASSERT_SERIALIZED(&txr->tx_serialize);
4699 
4700 	ix_txeof(txr, *(txr->tx_hdr));
4701 	if (!ifsq_is_empty(txr->tx_ifsq))
4702 		ifsq_devstart(txr->tx_ifsq);
4703 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4704 }
4705 
4706 static void
4707 ix_msix_rxtx(void *xrxr)
4708 {
4709 	struct ix_rx_ring *rxr = xrxr;
4710 	struct ix_tx_ring *txr;
4711 	int hdr;
4712 
4713 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4714 
4715 	ix_rxeof(rxr, -1);
4716 
4717 	/*
4718 	 * NOTE:
4719 	 * Since tx_next_clean is only changed by ix_txeof(),
4720 	 * which is called only in interrupt handler, the
4721 	 * check w/o holding tx serializer is MPSAFE.
4722 	 */
4723 	txr = rxr->rx_txr;
4724 	hdr = *(txr->tx_hdr);
4725 	if (hdr != txr->tx_next_clean) {
4726 		lwkt_serialize_enter(&txr->tx_serialize);
4727 		ix_txeof(txr, hdr);
4728 		if (!ifsq_is_empty(txr->tx_ifsq))
4729 			ifsq_devstart(txr->tx_ifsq);
4730 		lwkt_serialize_exit(&txr->tx_serialize);
4731 	}
4732 
4733 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4734 }
4735 
4736 static void
4737 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4738 {
4739 	struct ixgbe_hw *hw = &sc->hw;
4740 
4741 	/* Link status change */
4742 	if (eicr & IXGBE_EICR_LSC)
4743 		ix_handle_link(sc);
4744 
4745 	if (hw->mac.type != ixgbe_mac_82598EB) {
4746 		if (eicr & IXGBE_EICR_ECC)
4747 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4748 		else if (eicr & IXGBE_EICR_GPI_SDP1)
4749 			ix_handle_msf(sc);
4750 		else if (eicr & IXGBE_EICR_GPI_SDP2)
4751 			ix_handle_mod(sc);
4752 	}
4753 
4754 	/* Check for fan failure */
4755 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4756 	    (eicr & IXGBE_EICR_GPI_SDP1))
4757 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4758 
4759 	/* Check for over temp condition */
4760 	if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) {
4761 		if_printf(&sc->arpcom.ac_if, "OVER TEMP!!  "
4762 		    "PHY IS SHUT DOWN!!  Reboot\n");
4763 	}
4764 }
4765 
4766 static void
4767 ix_msix_status(void *xsc)
4768 {
4769 	struct ix_softc *sc = xsc;
4770 	uint32_t eicr;
4771 
4772 	ASSERT_SERIALIZED(&sc->main_serialize);
4773 
4774 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4775 	ix_intr_status(sc, eicr);
4776 
4777 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4778 }
4779 
4780 static void
4781 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4782     uint32_t *eims, uint32_t *eims_val)
4783 {
4784 	if (x < 32) {
4785 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4786 			KASSERT(x < IX_MAX_MSIX_82598,
4787 			    ("%s: invalid vector %d for 82598",
4788 			     device_get_nameunit(sc->dev), x));
4789 			*eims = IXGBE_EIMS;
4790 		} else {
4791 			*eims = IXGBE_EIMS_EX(0);
4792 		}
4793 		*eims_val = 1 << x;
4794 	} else {
4795 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4796 		    device_get_nameunit(sc->dev), x));
4797 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4798 		    ("%s: invalid vector %d for 82598",
4799 		     device_get_nameunit(sc->dev), x));
4800 		*eims = IXGBE_EIMS_EX(1);
4801 		*eims_val = 1 << (x - 32);
4802 	}
4803 }
4804 
4805 #ifdef IFPOLL_ENABLE
4806 
4807 static void
4808 ix_npoll_status(struct ifnet *ifp)
4809 {
4810 	struct ix_softc *sc = ifp->if_softc;
4811 	uint32_t eicr;
4812 
4813 	ASSERT_SERIALIZED(&sc->main_serialize);
4814 
4815 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4816 	ix_intr_status(sc, eicr);
4817 }
4818 
4819 static void
4820 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4821 {
4822 	struct ix_tx_ring *txr = arg;
4823 
4824 	ASSERT_SERIALIZED(&txr->tx_serialize);
4825 
4826 	ix_txeof(txr, *(txr->tx_hdr));
4827 	if (!ifsq_is_empty(txr->tx_ifsq))
4828 		ifsq_devstart(txr->tx_ifsq);
4829 }
4830 
4831 static void
4832 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4833 {
4834 	struct ix_rx_ring *rxr = arg;
4835 
4836 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4837 
4838 	ix_rxeof(rxr, cycle);
4839 }
4840 
4841 static void
4842 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4843 {
4844 	struct ix_softc *sc = ifp->if_softc;
4845 	int i, txr_cnt, rxr_cnt;
4846 
4847 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4848 
4849 	if (info) {
4850 		int off;
4851 
4852 		info->ifpi_status.status_func = ix_npoll_status;
4853 		info->ifpi_status.serializer = &sc->main_serialize;
4854 
4855 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4856 		off = sc->tx_npoll_off;
4857 		for (i = 0; i < txr_cnt; ++i) {
4858 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4859 			int idx = i + off;
4860 
4861 			KKASSERT(idx < ncpus2);
4862 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4863 			info->ifpi_tx[idx].arg = txr;
4864 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4865 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4866 		}
4867 
4868 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4869 		off = sc->rx_npoll_off;
4870 		for (i = 0; i < rxr_cnt; ++i) {
4871 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4872 			int idx = i + off;
4873 
4874 			KKASSERT(idx < ncpus2);
4875 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4876 			info->ifpi_rx[idx].arg = rxr;
4877 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4878 		}
4879 
4880 		if (ifp->if_flags & IFF_RUNNING) {
4881 			if (rxr_cnt == sc->rx_ring_inuse &&
4882 			    txr_cnt == sc->tx_ring_inuse) {
4883 				ix_set_timer_cpuid(sc, TRUE);
4884 				ix_disable_intr(sc);
4885 			} else {
4886 				ix_init(sc);
4887 			}
4888 		}
4889 	} else {
4890 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4891 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4892 
4893 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4894 		}
4895 
4896 		if (ifp->if_flags & IFF_RUNNING) {
4897 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4898 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4899 
4900 			if (rxr_cnt == sc->rx_ring_inuse &&
4901 			    txr_cnt == sc->tx_ring_inuse) {
4902 				ix_set_timer_cpuid(sc, FALSE);
4903 				ix_enable_intr(sc);
4904 			} else {
4905 				ix_init(sc);
4906 			}
4907 		}
4908 	}
4909 }
4910 
4911 static int
4912 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4913 {
4914 	struct ix_softc *sc = (void *)arg1;
4915 	struct ifnet *ifp = &sc->arpcom.ac_if;
4916 	int error, off;
4917 
4918 	off = sc->rx_npoll_off;
4919 	error = sysctl_handle_int(oidp, &off, 0, req);
4920 	if (error || req->newptr == NULL)
4921 		return error;
4922 	if (off < 0)
4923 		return EINVAL;
4924 
4925 	ifnet_serialize_all(ifp);
4926 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4927 		error = EINVAL;
4928 	} else {
4929 		error = 0;
4930 		sc->rx_npoll_off = off;
4931 	}
4932 	ifnet_deserialize_all(ifp);
4933 
4934 	return error;
4935 }
4936 
4937 static int
4938 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4939 {
4940 	struct ix_softc *sc = (void *)arg1;
4941 	struct ifnet *ifp = &sc->arpcom.ac_if;
4942 	int error, off;
4943 
4944 	off = sc->tx_npoll_off;
4945 	error = sysctl_handle_int(oidp, &off, 0, req);
4946 	if (error || req->newptr == NULL)
4947 		return error;
4948 	if (off < 0)
4949 		return EINVAL;
4950 
4951 	ifnet_serialize_all(ifp);
4952 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4953 		error = EINVAL;
4954 	} else {
4955 		error = 0;
4956 		sc->tx_npoll_off = off;
4957 	}
4958 	ifnet_deserialize_all(ifp);
4959 
4960 	return error;
4961 }
4962 
4963 #endif /* IFPOLL_ENABLE */
4964