xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision 8edfbc5e)
1 /*
2  * Copyright (c) 2001-2014, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/bpf.h>
52 #include <net/ethernet.h>
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
67 
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
70 
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
73 
74 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
75 
76 #ifdef IX_RSS_DEBUG
77 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
78 do { \
79 	if (sc->rss_debug >= lvl) \
80 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
81 } while (0)
82 #else	/* !IX_RSS_DEBUG */
83 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
84 #endif	/* IX_RSS_DEBUG */
85 
86 #define IX_NAME			"Intel(R) PRO/10GbE "
87 #define IX_DEVICE(id) \
88 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
89 #define IX_DEVICE_NULL		{ 0, 0, NULL }
90 
91 static struct ix_device {
92 	uint16_t	vid;
93 	uint16_t	did;
94 	const char	*desc;
95 } ix_devices[] = {
96 	IX_DEVICE(82598AF_DUAL_PORT),
97 	IX_DEVICE(82598AF_SINGLE_PORT),
98 	IX_DEVICE(82598EB_CX4),
99 	IX_DEVICE(82598AT),
100 	IX_DEVICE(82598AT2),
101 	IX_DEVICE(82598),
102 	IX_DEVICE(82598_DA_DUAL_PORT),
103 	IX_DEVICE(82598_CX4_DUAL_PORT),
104 	IX_DEVICE(82598EB_XF_LR),
105 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
106 	IX_DEVICE(82598EB_SFP_LOM),
107 	IX_DEVICE(82599_KX4),
108 	IX_DEVICE(82599_KX4_MEZZ),
109 	IX_DEVICE(82599_SFP),
110 	IX_DEVICE(82599_XAUI_LOM),
111 	IX_DEVICE(82599_CX4),
112 	IX_DEVICE(82599_T3_LOM),
113 	IX_DEVICE(82599_COMBO_BACKPLANE),
114 	IX_DEVICE(82599_BACKPLANE_FCOE),
115 	IX_DEVICE(82599_SFP_SF2),
116 	IX_DEVICE(82599_SFP_FCOE),
117 	IX_DEVICE(82599EN_SFP),
118 	IX_DEVICE(82599_SFP_SF_QP),
119 	IX_DEVICE(82599_QSFP_SF_QP),
120 	IX_DEVICE(X540T),
121 	IX_DEVICE(X540T1),
122 	IX_DEVICE(X550T),
123 	IX_DEVICE(X550EM_X_KR),
124 	IX_DEVICE(X550EM_X_KX4),
125 	IX_DEVICE(X550EM_X_10G_T),
126 
127 	/* required last entry */
128 	IX_DEVICE_NULL
129 };
130 
131 static int	ix_probe(device_t);
132 static int	ix_attach(device_t);
133 static int	ix_detach(device_t);
134 static int	ix_shutdown(device_t);
135 
136 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
137 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
138 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
139 #ifdef INVARIANTS
140 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
141 		    boolean_t);
142 #endif
143 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
144 static void	ix_watchdog(struct ifaltq_subque *);
145 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
146 static void	ix_init(void *);
147 static void	ix_stop(struct ix_softc *);
148 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
149 static int	ix_media_change(struct ifnet *);
150 static void	ix_timer(void *);
151 #ifdef IFPOLL_ENABLE
152 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
153 static void	ix_npoll_rx(struct ifnet *, void *, int);
154 static void	ix_npoll_tx(struct ifnet *, void *, int);
155 static void	ix_npoll_status(struct ifnet *);
156 #endif
157 
158 static void	ix_add_sysctl(struct ix_softc *);
159 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
160 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
161 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
162 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
163 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
164 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
165 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
166 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
167 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
168 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
169 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
170 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
171 #if 0
172 static void     ix_add_hw_stats(struct ix_softc *);
173 #endif
174 #ifdef IFPOLL_ENABLE
175 static int	ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
176 static int	ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
177 #endif
178 
179 static void	ix_slot_info(struct ix_softc *);
180 static int	ix_alloc_rings(struct ix_softc *);
181 static void	ix_free_rings(struct ix_softc *);
182 static void	ix_setup_ifp(struct ix_softc *);
183 static void	ix_setup_serialize(struct ix_softc *);
184 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
185 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
186 static void	ix_update_stats(struct ix_softc *);
187 
188 static void	ix_set_promisc(struct ix_softc *);
189 static void	ix_set_multi(struct ix_softc *);
190 static void	ix_set_vlan(struct ix_softc *);
191 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
192 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
193 static const char *ix_ifmedia2str(int);
194 static const char *ix_fc2str(enum ixgbe_fc_mode);
195 
196 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
197 static void	ix_init_tx_ring(struct ix_tx_ring *);
198 static void	ix_free_tx_ring(struct ix_tx_ring *);
199 static int	ix_create_tx_ring(struct ix_tx_ring *);
200 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
201 static void	ix_init_tx_unit(struct ix_softc *);
202 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
203 		    uint16_t *, int *);
204 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
205 		    const struct mbuf *, uint32_t *, uint32_t *);
206 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
207 		    const struct mbuf *, uint32_t *, uint32_t *);
208 static void	ix_txeof(struct ix_tx_ring *, int);
209 
210 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
211 static int	ix_init_rx_ring(struct ix_rx_ring *);
212 static void	ix_free_rx_ring(struct ix_rx_ring *);
213 static int	ix_create_rx_ring(struct ix_rx_ring *);
214 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
215 static void	ix_init_rx_unit(struct ix_softc *);
216 #if 0
217 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
218 #endif
219 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
220 static void	ix_rxeof(struct ix_rx_ring *, int);
221 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
222 static void	ix_enable_rx_drop(struct ix_softc *);
223 static void	ix_disable_rx_drop(struct ix_softc *);
224 
225 static void	ix_alloc_msix(struct ix_softc *);
226 static void	ix_free_msix(struct ix_softc *, boolean_t);
227 static void	ix_conf_rx_msix(struct ix_softc *, int, int *, int);
228 static void	ix_conf_tx_msix(struct ix_softc *, int, int *, int);
229 static void	ix_setup_msix_eims(const struct ix_softc *, int,
230 		    uint32_t *, uint32_t *);
231 static int	ix_alloc_intr(struct ix_softc *);
232 static void	ix_free_intr(struct ix_softc *);
233 static int	ix_setup_intr(struct ix_softc *);
234 static void	ix_teardown_intr(struct ix_softc *, int);
235 static void	ix_enable_intr(struct ix_softc *);
236 static void	ix_disable_intr(struct ix_softc *);
237 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
238 static void	ix_set_eitr(struct ix_softc *, int, int);
239 static void	ix_intr_status(struct ix_softc *, uint32_t);
240 static void	ix_intr(void *);
241 static void	ix_msix_rxtx(void *);
242 static void	ix_msix_rx(void *);
243 static void	ix_msix_tx(void *);
244 static void	ix_msix_status(void *);
245 
246 static void	ix_config_link(struct ix_softc *);
247 static boolean_t ix_sfp_probe(struct ix_softc *);
248 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
249 static void	ix_update_link_status(struct ix_softc *);
250 static void	ix_handle_link(struct ix_softc *);
251 static void	ix_handle_mod(struct ix_softc *);
252 static void	ix_handle_msf(struct ix_softc *);
253 static void	ix_handle_phy(struct ix_softc *);
254 static int	ix_powerdown(struct ix_softc *);
255 static void	ix_config_flowctrl(struct ix_softc *);
256 static void	ix_config_dmac(struct ix_softc *);
257 static void	ix_init_media(struct ix_softc *);
258 
259 /* XXX Missing shared code prototype */
260 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
261 
262 static device_method_t ix_methods[] = {
263 	/* Device interface */
264 	DEVMETHOD(device_probe,		ix_probe),
265 	DEVMETHOD(device_attach,	ix_attach),
266 	DEVMETHOD(device_detach,	ix_detach),
267 	DEVMETHOD(device_shutdown,	ix_shutdown),
268 	DEVMETHOD_END
269 };
270 
271 static driver_t ix_driver = {
272 	"ix",
273 	ix_methods,
274 	sizeof(struct ix_softc)
275 };
276 
277 static devclass_t ix_devclass;
278 
279 DECLARE_DUMMY_MODULE(if_ix);
280 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
281 
282 static int	ix_msi_enable = 1;
283 static int	ix_msix_enable = 1;
284 static int	ix_msix_agg_rxtx = 1;
285 static int	ix_rxr = 0;
286 static int	ix_txr = 0;
287 static int	ix_txd = IX_PERF_TXD;
288 static int	ix_rxd = IX_PERF_RXD;
289 static int	ix_unsupported_sfp = 0;
290 
291 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
292 
293 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
294 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
295 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
296 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
297 TUNABLE_INT("hw.ix.txr", &ix_txr);
298 TUNABLE_INT("hw.ix.txd", &ix_txd);
299 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
300 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
301 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
302 
303 /*
304  * Smart speed setting, default to on.  This only works
305  * as a compile option right now as its during attach,
306  * set this to 'ixgbe_smart_speed_off' to disable.
307  */
308 static const enum ixgbe_smart_speed ix_smart_speed =
309     ixgbe_smart_speed_on;
310 
311 static int
312 ix_probe(device_t dev)
313 {
314 	const struct ix_device *d;
315 	uint16_t vid, did;
316 
317 	vid = pci_get_vendor(dev);
318 	did = pci_get_device(dev);
319 
320 	for (d = ix_devices; d->desc != NULL; ++d) {
321 		if (vid == d->vid && did == d->did) {
322 			device_set_desc(dev, d->desc);
323 			return 0;
324 		}
325 	}
326 	return ENXIO;
327 }
328 
329 static int
330 ix_attach(device_t dev)
331 {
332 	struct ix_softc *sc = device_get_softc(dev);
333 	struct ixgbe_hw *hw;
334 	int error, ring_cnt_max;
335 	uint16_t csum;
336 	uint32_t ctrl_ext;
337 #ifdef IFPOLL_ENABLE
338 	int offset, offset_def;
339 #endif
340 	char flowctrl[IFM_ETH_FC_STRLEN];
341 
342 	sc->dev = sc->osdep.dev = dev;
343 	hw = &sc->hw;
344 
345 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
346 	    device_get_unit(dev));
347 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
348 	    ix_media_change, ix_media_status);
349 
350 	/* Save frame size */
351 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
352 
353 	callout_init_mp(&sc->timer);
354 	lwkt_serialize_init(&sc->main_serialize);
355 
356 	/*
357 	 * Save off the information about this board
358 	 */
359 	hw->vendor_id = pci_get_vendor(dev);
360 	hw->device_id = pci_get_device(dev);
361 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
362 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
363 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
364 
365 	ixgbe_set_mac_type(hw);
366 
367 	/* Pick up the 82599 */
368 	if (hw->mac.type != ixgbe_mac_82598EB)
369 		hw->phy.smart_speed = ix_smart_speed;
370 
371 	/* Enable bus mastering */
372 	pci_enable_busmaster(dev);
373 
374 	/*
375 	 * Allocate IO memory
376 	 */
377 	sc->mem_rid = PCIR_BAR(0);
378 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
379 	    &sc->mem_rid, RF_ACTIVE);
380 	if (sc->mem_res == NULL) {
381 		device_printf(dev, "Unable to allocate bus resource: memory\n");
382 		error = ENXIO;
383 		goto failed;
384 	}
385 
386 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
387 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
388 
389 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
390 	sc->hw.back = &sc->osdep;
391 
392 	/*
393 	 * Configure total supported RX/TX ring count
394 	 */
395 	sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
396 	sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
397 	sc->rx_ring_inuse = sc->rx_ring_cnt;
398 
399 	switch (hw->mac.type) {
400 	case ixgbe_mac_82598EB:
401 		ring_cnt_max = IX_MAX_TXRING_82598;
402 		break;
403 
404 	case ixgbe_mac_82599EB:
405 		ring_cnt_max = IX_MAX_TXRING_82599;
406 		break;
407 
408 	case ixgbe_mac_X540:
409 		ring_cnt_max = IX_MAX_TXRING_X540;
410 		break;
411 
412 	default:
413 		ring_cnt_max = 1;
414 		break;
415 	}
416 	sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
417 	sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
418 	sc->tx_ring_inuse = sc->tx_ring_cnt;
419 
420 	/* Allocate TX/RX rings */
421 	error = ix_alloc_rings(sc);
422 	if (error)
423 		goto failed;
424 
425 #ifdef IFPOLL_ENABLE
426 	/*
427 	 * NPOLLING RX CPU offset
428 	 */
429 	if (sc->rx_ring_cnt == ncpus2) {
430 		offset = 0;
431 	} else {
432 		offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
433 		offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
434 		if (offset >= ncpus2 ||
435 		    offset % sc->rx_ring_cnt != 0) {
436 			device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
437 			    offset, offset_def);
438 			offset = offset_def;
439 		}
440 	}
441 	sc->rx_npoll_off = offset;
442 
443 	/*
444 	 * NPOLLING TX CPU offset
445 	 */
446 	if (sc->tx_ring_cnt == ncpus2) {
447 		offset = 0;
448 	} else {
449 		offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
450 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
451 		if (offset >= ncpus2 ||
452 		    offset % sc->tx_ring_cnt != 0) {
453 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
454 			    offset, offset_def);
455 			offset = offset_def;
456 		}
457 	}
458 	sc->tx_npoll_off = offset;
459 #endif
460 
461 	/* Allocate interrupt */
462 	error = ix_alloc_intr(sc);
463 	if (error)
464 		goto failed;
465 
466 	/* Setup serializes */
467 	ix_setup_serialize(sc);
468 
469 	/* Allocate multicast array memory. */
470 	sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
471 	    M_DEVBUF, M_WAITOK);
472 
473 	/* Initialize the shared code */
474 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
475 	error = ixgbe_init_shared_code(hw);
476 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
477 		/*
478 		 * No optics in this port; ask timer routine
479 		 * to probe for later insertion.
480 		 */
481 		sc->sfp_probe = TRUE;
482 		error = 0;
483 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
484 		device_printf(dev, "Unsupported SFP+ module detected!\n");
485 		error = EIO;
486 		goto failed;
487 	} else if (error) {
488 		device_printf(dev, "Unable to initialize the shared code\n");
489 		error = EIO;
490 		goto failed;
491 	}
492 
493 	/* Make sure we have a good EEPROM before we read from it */
494 	if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
495 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
496 		error = EIO;
497 		goto failed;
498 	}
499 
500 	error = ixgbe_init_hw(hw);
501 	if (error == IXGBE_ERR_EEPROM_VERSION) {
502 		device_printf(dev, "Pre-production device detected\n");
503 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
504 		device_printf(dev, "Unsupported SFP+ Module\n");
505 		error = EIO;
506 		goto failed;
507 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
508 		device_printf(dev, "No SFP+ Module found\n");
509 	}
510 
511 	sc->ifm_media = IX_IFM_DEFAULT;
512 	/* Get default flow control settings */
513 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
514 	    ix_flowctrl);
515 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
516 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
517 
518 	/* Setup OS specific network interface */
519 	ix_setup_ifp(sc);
520 
521 	/* Add sysctl tree */
522 	ix_add_sysctl(sc);
523 
524 	error = ix_setup_intr(sc);
525 	if (error) {
526 		ether_ifdetach(&sc->arpcom.ac_if);
527 		goto failed;
528 	}
529 
530 	/* Initialize statistics */
531 	ix_update_stats(sc);
532 
533 	/* Check PCIE slot type/speed/width */
534 	ix_slot_info(sc);
535 
536 	/* Save initial wake up filter configuration */
537 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
538 
539 	/* Let hardware know driver is loaded */
540 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
541 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
542 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
543 
544 	return 0;
545 failed:
546 	ix_detach(dev);
547 	return error;
548 }
549 
550 static int
551 ix_detach(device_t dev)
552 {
553 	struct ix_softc *sc = device_get_softc(dev);
554 
555 	if (device_is_attached(dev)) {
556 		struct ifnet *ifp = &sc->arpcom.ac_if;
557 		uint32_t ctrl_ext;
558 
559 		ifnet_serialize_all(ifp);
560 
561 		ix_powerdown(sc);
562 		ix_teardown_intr(sc, sc->intr_cnt);
563 
564 		ifnet_deserialize_all(ifp);
565 
566 		callout_terminate(&sc->timer);
567 		ether_ifdetach(ifp);
568 
569 		/* Let hardware know driver is unloading */
570 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
571 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
572 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
573 	}
574 
575 	ifmedia_removeall(&sc->media);
576 	bus_generic_detach(dev);
577 
578 	ix_free_intr(sc);
579 
580 	if (sc->msix_mem_res != NULL) {
581 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
582 		    sc->msix_mem_res);
583 	}
584 	if (sc->mem_res != NULL) {
585 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
586 		    sc->mem_res);
587 	}
588 
589 	ix_free_rings(sc);
590 
591 	if (sc->mta != NULL)
592 		kfree(sc->mta, M_DEVBUF);
593 	if (sc->serializes != NULL)
594 		kfree(sc->serializes, M_DEVBUF);
595 
596 	return 0;
597 }
598 
599 static int
600 ix_shutdown(device_t dev)
601 {
602 	struct ix_softc *sc = device_get_softc(dev);
603 	struct ifnet *ifp = &sc->arpcom.ac_if;
604 
605 	ifnet_serialize_all(ifp);
606 	ix_powerdown(sc);
607 	ifnet_deserialize_all(ifp);
608 
609 	return 0;
610 }
611 
612 static void
613 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
614 {
615 	struct ix_softc *sc = ifp->if_softc;
616 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
617 	int idx = -1;
618 	uint16_t nsegs;
619 
620 	KKASSERT(txr->tx_ifsq == ifsq);
621 	ASSERT_SERIALIZED(&txr->tx_serialize);
622 
623 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
624 		return;
625 
626 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
627 		ifsq_purge(ifsq);
628 		return;
629 	}
630 
631 	while (!ifsq_is_empty(ifsq)) {
632 		struct mbuf *m_head;
633 
634 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
635 			ifsq_set_oactive(ifsq);
636 			txr->tx_watchdog.wd_timer = 5;
637 			break;
638 		}
639 
640 		m_head = ifsq_dequeue(ifsq);
641 		if (m_head == NULL)
642 			break;
643 
644 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
645 			IFNET_STAT_INC(ifp, oerrors, 1);
646 			continue;
647 		}
648 
649 		/*
650 		 * TX interrupt are aggressively aggregated, so increasing
651 		 * opackets at TX interrupt time will make the opackets
652 		 * statistics vastly inaccurate; we do the opackets increment
653 		 * now.
654 		 */
655 		IFNET_STAT_INC(ifp, opackets, 1);
656 
657 		if (nsegs >= txr->tx_wreg_nsegs) {
658 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
659 			nsegs = 0;
660 			idx = -1;
661 		}
662 
663 		ETHER_BPF_MTAP(ifp, m_head);
664 	}
665 	if (idx >= 0)
666 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
667 }
668 
669 static int
670 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
671 {
672 	struct ix_softc *sc = ifp->if_softc;
673 	struct ifreq *ifr = (struct ifreq *) data;
674 	int error = 0, mask, reinit;
675 
676 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
677 
678 	switch (command) {
679 	case SIOCSIFMTU:
680 		if (ifr->ifr_mtu > IX_MAX_MTU) {
681 			error = EINVAL;
682 		} else {
683 			ifp->if_mtu = ifr->ifr_mtu;
684 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
685 			ix_init(sc);
686 		}
687 		break;
688 
689 	case SIOCSIFFLAGS:
690 		if (ifp->if_flags & IFF_UP) {
691 			if (ifp->if_flags & IFF_RUNNING) {
692 				if ((ifp->if_flags ^ sc->if_flags) &
693 				    (IFF_PROMISC | IFF_ALLMULTI))
694 					ix_set_promisc(sc);
695 			} else {
696 				ix_init(sc);
697 			}
698 		} else if (ifp->if_flags & IFF_RUNNING) {
699 			ix_stop(sc);
700 		}
701 		sc->if_flags = ifp->if_flags;
702 		break;
703 
704 	case SIOCADDMULTI:
705 	case SIOCDELMULTI:
706 		if (ifp->if_flags & IFF_RUNNING) {
707 			ix_disable_intr(sc);
708 			ix_set_multi(sc);
709 #ifdef IFPOLL_ENABLE
710 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
711 #endif
712 				ix_enable_intr(sc);
713 		}
714 		break;
715 
716 	case SIOCSIFMEDIA:
717 	case SIOCGIFMEDIA:
718 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
719 		break;
720 
721 	case SIOCSIFCAP:
722 		reinit = 0;
723 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
724 		if (mask & IFCAP_RXCSUM) {
725 			ifp->if_capenable ^= IFCAP_RXCSUM;
726 			reinit = 1;
727 		}
728 		if (mask & IFCAP_VLAN_HWTAGGING) {
729 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
730 			reinit = 1;
731 		}
732 		if (mask & IFCAP_TXCSUM) {
733 			ifp->if_capenable ^= IFCAP_TXCSUM;
734 			if (ifp->if_capenable & IFCAP_TXCSUM)
735 				ifp->if_hwassist |= CSUM_OFFLOAD;
736 			else
737 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
738 		}
739 		if (mask & IFCAP_TSO) {
740 			ifp->if_capenable ^= IFCAP_TSO;
741 			if (ifp->if_capenable & IFCAP_TSO)
742 				ifp->if_hwassist |= CSUM_TSO;
743 			else
744 				ifp->if_hwassist &= ~CSUM_TSO;
745 		}
746 		if (mask & IFCAP_RSS)
747 			ifp->if_capenable ^= IFCAP_RSS;
748 		if (reinit && (ifp->if_flags & IFF_RUNNING))
749 			ix_init(sc);
750 		break;
751 
752 #if 0
753 	case SIOCGI2C:
754 	{
755 		struct ixgbe_i2c_req	i2c;
756 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
757 		if (error)
758 			break;
759 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
760 			error = EINVAL;
761 			break;
762 		}
763 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
764 		    i2c.dev_addr, i2c.data);
765 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
766 		break;
767 	}
768 #endif
769 
770 	default:
771 		error = ether_ioctl(ifp, command, data);
772 		break;
773 	}
774 	return error;
775 }
776 
777 #define IXGBE_MHADD_MFS_SHIFT 16
778 
779 static void
780 ix_init(void *xsc)
781 {
782 	struct ix_softc *sc = xsc;
783 	struct ifnet *ifp = &sc->arpcom.ac_if;
784 	struct ixgbe_hw *hw = &sc->hw;
785 	uint32_t gpie, rxctrl;
786 	int i, error;
787 	boolean_t polling;
788 
789 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
790 
791 	ix_stop(sc);
792 
793 	polling = FALSE;
794 #ifdef IFPOLL_ENABLE
795 	if (ifp->if_flags & IFF_NPOLLING)
796 		polling = TRUE;
797 #endif
798 
799 	/* Configure # of used RX/TX rings */
800 	ix_set_ring_inuse(sc, polling);
801 	ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
802 
803 	/* Get the latest mac address, User can use a LAA */
804 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
805 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
806 	hw->addr_ctrl.rar_used_count = 1;
807 
808 	/* Prepare transmit descriptors and buffers */
809 	for (i = 0; i < sc->tx_ring_inuse; ++i)
810 		ix_init_tx_ring(&sc->tx_rings[i]);
811 
812 	ixgbe_init_hw(hw);
813 	ix_init_tx_unit(sc);
814 
815 	/* Setup Multicast table */
816 	ix_set_multi(sc);
817 
818 	/* Prepare receive descriptors and buffers */
819 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
820 		error = ix_init_rx_ring(&sc->rx_rings[i]);
821 		if (error) {
822 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
823 			ix_stop(sc);
824 			return;
825 		}
826 	}
827 
828 	/* Configure RX settings */
829 	ix_init_rx_unit(sc);
830 
831 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
832 
833 	/* Enable Fan Failure Interrupt */
834 	gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
835 
836 	/* Add for Module detection */
837 	if (hw->mac.type == ixgbe_mac_82599EB)
838 		gpie |= IXGBE_SDP2_GPIEN;
839 
840 	/*
841 	 * Thermal Failure Detection (X540)
842 	 * Link Detection (X552)
843 	 */
844 	if (hw->mac.type == ixgbe_mac_X540 ||
845 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
846 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
847 		gpie |= IXGBE_SDP0_GPIEN_X540;
848 
849 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
850 		/* Enable Enhanced MSIX mode */
851 		gpie |= IXGBE_GPIE_MSIX_MODE;
852 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
853 		    IXGBE_GPIE_OCD;
854 	}
855 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
856 
857 	/* Set MTU size */
858 	if (ifp->if_mtu > ETHERMTU) {
859 		uint32_t mhadd;
860 
861 		/* aka IXGBE_MAXFRS on 82599 and newer */
862 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
863 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
864 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
865 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
866 	}
867 
868 	/*
869 	 * Enable TX rings
870 	 */
871 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
872 		uint32_t txdctl;
873 
874 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
875 		txdctl |= IXGBE_TXDCTL_ENABLE;
876 
877 		/*
878 		 * Set WTHRESH to 0, since TX head write-back is used
879 		 */
880 		txdctl &= ~(0x7f << 16);
881 
882 		/*
883 		 * When the internal queue falls below PTHRESH (32),
884 		 * start prefetching as long as there are at least
885 		 * HTHRESH (1) buffers ready. The values are taken
886 		 * from the Intel linux driver 3.8.21.
887 		 * Prefetching enables tx line rate even with 1 queue.
888 		 */
889 		txdctl |= (32 << 0) | (1 << 8);
890 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
891 	}
892 
893 	/*
894 	 * Enable RX rings
895 	 */
896 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
897 		uint32_t rxdctl;
898 		int k;
899 
900 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
901 		if (hw->mac.type == ixgbe_mac_82598EB) {
902 			/*
903 			 * PTHRESH = 21
904 			 * HTHRESH = 4
905 			 * WTHRESH = 8
906 			 */
907 			rxdctl &= ~0x3FFFFF;
908 			rxdctl |= 0x080420;
909 		}
910 		rxdctl |= IXGBE_RXDCTL_ENABLE;
911 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
912 		for (k = 0; k < 10; ++k) {
913 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
914 			    IXGBE_RXDCTL_ENABLE)
915 				break;
916 			else
917 				msec_delay(1);
918 		}
919 		wmb();
920 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
921 		    sc->rx_rings[0].rx_ndesc - 1);
922 	}
923 
924 	/* Enable Receive engine */
925 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
926 	if (hw->mac.type == ixgbe_mac_82598EB)
927 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
928 	rxctrl |= IXGBE_RXCTRL_RXEN;
929 	ixgbe_enable_rx_dma(hw, rxctrl);
930 
931 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
932 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
933 
934 		if (txr->tx_intr_vec >= 0) {
935 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
936 		} else {
937 			/*
938 			 * Unconfigured TX interrupt vector could only
939 			 * happen for MSI-X.
940 			 */
941 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
942 			    ("TX intr vector is not set"));
943 			KASSERT(i < sc->rx_ring_inuse,
944 			    ("invalid TX ring %d, no piggyback RX ring", i));
945 			KASSERT(sc->rx_rings[i].rx_txr == txr,
946 			    ("RX ring %d piggybacked TX ring mismatch", i));
947 			if (bootverbose)
948 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
949 		}
950 	}
951 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
952 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
953 
954 		KKASSERT(rxr->rx_intr_vec >= 0);
955 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
956 		if (rxr->rx_txr != NULL) {
957 			/*
958 			 * Piggyback the TX ring interrupt onto the RX
959 			 * ring interrupt vector.
960 			 */
961 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
962 			    ("piggybacked TX ring configured intr vector"));
963 			KASSERT(rxr->rx_txr->tx_idx == i,
964 			    ("RX ring %d piggybacked TX ring %u",
965 			     i, rxr->rx_txr->tx_idx));
966 			ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
967 			if (bootverbose) {
968 				if_printf(ifp, "IVAR RX ring %d piggybacks "
969 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
970 			}
971 		}
972 	}
973 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
974 		/* Set up status MSI-X vector; it is using fixed entry 1 */
975 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
976 
977 		/* Set up auto-mask for TX and RX rings */
978 		if (hw->mac.type == ixgbe_mac_82598EB) {
979 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
980 		} else {
981 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
982 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
983 		}
984 	} else {
985 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
986 	}
987 	for (i = 0; i < sc->intr_cnt; ++i)
988 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
989 
990 	/*
991 	 * Check on any SFP devices that need to be kick-started
992 	 */
993 	if (hw->phy.type == ixgbe_phy_none) {
994 		error = hw->phy.ops.identify(hw);
995 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
996 			if_printf(ifp,
997 			    "Unsupported SFP+ module type was detected.\n");
998 			/* XXX stop */
999 			return;
1000 		}
1001 	}
1002 
1003 	/* Config/Enable Link */
1004 	ix_config_link(sc);
1005 
1006 	/* Hardware Packet Buffer & Flow Control setup */
1007 	ix_config_flowctrl(sc);
1008 
1009 	/* Initialize the FC settings */
1010 	ixgbe_start_hw(hw);
1011 
1012 	/* Set up VLAN support and filter */
1013 	ix_set_vlan(sc);
1014 
1015 	/* Setup DMA Coalescing */
1016 	ix_config_dmac(sc);
1017 
1018 	/*
1019 	 * Only enable interrupts if we are not polling, make sure
1020 	 * they are off otherwise.
1021 	 */
1022 	if (polling)
1023 		ix_disable_intr(sc);
1024 	else
1025 		ix_enable_intr(sc);
1026 
1027 	ifp->if_flags |= IFF_RUNNING;
1028 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1029 		ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1030 		ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1031 	}
1032 
1033 	ix_set_timer_cpuid(sc, polling);
1034 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1035 }
1036 
1037 static void
1038 ix_intr(void *xsc)
1039 {
1040 	struct ix_softc *sc = xsc;
1041 	struct ixgbe_hw	*hw = &sc->hw;
1042 	uint32_t eicr;
1043 
1044 	ASSERT_SERIALIZED(&sc->main_serialize);
1045 
1046 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1047 	if (eicr == 0) {
1048 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1049 		return;
1050 	}
1051 
1052 	if (eicr & IX_RX0_INTR_MASK) {
1053 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1054 
1055 		lwkt_serialize_enter(&rxr->rx_serialize);
1056 		ix_rxeof(rxr, -1);
1057 		lwkt_serialize_exit(&rxr->rx_serialize);
1058 	}
1059 	if (eicr & IX_RX1_INTR_MASK) {
1060 		struct ix_rx_ring *rxr;
1061 
1062 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1063 		rxr = &sc->rx_rings[1];
1064 
1065 		lwkt_serialize_enter(&rxr->rx_serialize);
1066 		ix_rxeof(rxr, -1);
1067 		lwkt_serialize_exit(&rxr->rx_serialize);
1068 	}
1069 
1070 	if (eicr & IX_TX_INTR_MASK) {
1071 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1072 
1073 		lwkt_serialize_enter(&txr->tx_serialize);
1074 		ix_txeof(txr, *(txr->tx_hdr));
1075 		if (!ifsq_is_empty(txr->tx_ifsq))
1076 			ifsq_devstart(txr->tx_ifsq);
1077 		lwkt_serialize_exit(&txr->tx_serialize);
1078 	}
1079 
1080 	if (__predict_false(eicr & IX_EICR_STATUS))
1081 		ix_intr_status(sc, eicr);
1082 
1083 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1084 }
1085 
1086 static void
1087 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1088 {
1089 	struct ix_softc *sc = ifp->if_softc;
1090 	struct ifmedia *ifm = &sc->media;
1091 	int layer;
1092 
1093 	ix_update_link_status(sc);
1094 
1095 	ifmr->ifm_status = IFM_AVALID;
1096 	ifmr->ifm_active = IFM_ETHER;
1097 
1098 	if (!sc->link_active) {
1099 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1100 			ifmr->ifm_active |= ifm->ifm_media;
1101 		else
1102 			ifmr->ifm_active |= IFM_NONE;
1103 		return;
1104 	}
1105 	ifmr->ifm_status |= IFM_ACTIVE;
1106 
1107 	layer = ixgbe_get_supported_physical_layer(&sc->hw);
1108 
1109 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1110 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1111 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1112 		switch (sc->link_speed) {
1113 		case IXGBE_LINK_SPEED_10GB_FULL:
1114 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1115 			break;
1116 		case IXGBE_LINK_SPEED_1GB_FULL:
1117 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1118 			break;
1119 		case IXGBE_LINK_SPEED_100_FULL:
1120 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1121 			break;
1122 		}
1123 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1124 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1125 		switch (sc->link_speed) {
1126 		case IXGBE_LINK_SPEED_10GB_FULL:
1127 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1128 			break;
1129 		}
1130 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1131 		switch (sc->link_speed) {
1132 		case IXGBE_LINK_SPEED_10GB_FULL:
1133 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1134 			break;
1135 		case IXGBE_LINK_SPEED_1GB_FULL:
1136 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1137 			break;
1138 		}
1139 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1140 		switch (sc->link_speed) {
1141 		case IXGBE_LINK_SPEED_10GB_FULL:
1142 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1143 			break;
1144 		case IXGBE_LINK_SPEED_1GB_FULL:
1145 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1146 			break;
1147 		}
1148 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1149 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1150 		switch (sc->link_speed) {
1151 		case IXGBE_LINK_SPEED_10GB_FULL:
1152 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1153 			break;
1154 		case IXGBE_LINK_SPEED_1GB_FULL:
1155 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1156 			break;
1157 		}
1158 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1159 		switch (sc->link_speed) {
1160 		case IXGBE_LINK_SPEED_10GB_FULL:
1161 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1162 			break;
1163 		}
1164 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1165 		/*
1166 		 * XXX: These need to use the proper media types once
1167 		 * they're added.
1168 		 */
1169 		switch (sc->link_speed) {
1170 		case IXGBE_LINK_SPEED_10GB_FULL:
1171 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1172 			break;
1173 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1174 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1175 			break;
1176 		case IXGBE_LINK_SPEED_1GB_FULL:
1177 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1178 			break;
1179 		}
1180 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1181 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1182 		/*
1183 		 * XXX: These need to use the proper media types once
1184 		 * they're added.
1185 		 */
1186 		switch (sc->link_speed) {
1187 		case IXGBE_LINK_SPEED_10GB_FULL:
1188 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1189 			break;
1190 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1191 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1192 			break;
1193 		case IXGBE_LINK_SPEED_1GB_FULL:
1194 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1195 			break;
1196 		}
1197 	}
1198 
1199 	/* If nothing is recognized... */
1200 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1201 		ifmr->ifm_active |= IFM_NONE;
1202 
1203 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1204 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1205 
1206 	switch (sc->hw.fc.current_mode) {
1207 	case ixgbe_fc_full:
1208 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1209 		break;
1210 	case ixgbe_fc_rx_pause:
1211 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1212 		break;
1213 	case ixgbe_fc_tx_pause:
1214 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1215 		break;
1216 	default:
1217 		break;
1218 	}
1219 }
1220 
1221 static int
1222 ix_media_change(struct ifnet *ifp)
1223 {
1224 	struct ix_softc *sc = ifp->if_softc;
1225 	struct ifmedia *ifm = &sc->media;
1226 	struct ixgbe_hw *hw = &sc->hw;
1227 
1228 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1229 		return (EINVAL);
1230 
1231 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1232 	    hw->mac.ops.setup_link == NULL) {
1233 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1234 			/* Only flow control setting changes are allowed */
1235 			return (EOPNOTSUPP);
1236 		}
1237 	}
1238 
1239 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1240 	case IFM_AUTO:
1241 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1242 		break;
1243 
1244 	case IFM_10G_T:
1245 	case IFM_10G_LRM:
1246 	case IFM_10G_SR:	/* XXX also KR */
1247 	case IFM_10G_LR:
1248 	case IFM_10G_CX4:	/* XXX also KX4 */
1249 	case IFM_10G_TWINAX:
1250 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1251 		break;
1252 
1253 	case IFM_1000_T:
1254 	case IFM_1000_LX:
1255 	case IFM_1000_SX:
1256 	case IFM_1000_CX:	/* XXX is KX */
1257 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1258 		break;
1259 
1260 	case IFM_100_TX:
1261 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1262 		break;
1263 
1264 	default:
1265 		if (bootverbose) {
1266 			if_printf(ifp, "Invalid media type %d!\n",
1267 			    ifm->ifm_media);
1268 		}
1269 		return EINVAL;
1270 	}
1271 	sc->ifm_media = ifm->ifm_media;
1272 
1273 #if 0
1274 	if (hw->mac.ops.setup_link != NULL) {
1275 		hw->mac.autotry_restart = TRUE;
1276 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1277 	}
1278 #else
1279 	if (ifp->if_flags & IFF_RUNNING)
1280 		ix_init(sc);
1281 #endif
1282 	return 0;
1283 }
1284 
1285 static __inline int
1286 ix_tso_pullup(struct mbuf **mp)
1287 {
1288 	int hoff, iphlen, thoff;
1289 	struct mbuf *m;
1290 
1291 	m = *mp;
1292 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1293 
1294 	iphlen = m->m_pkthdr.csum_iphlen;
1295 	thoff = m->m_pkthdr.csum_thlen;
1296 	hoff = m->m_pkthdr.csum_lhlen;
1297 
1298 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1299 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1300 	KASSERT(hoff > 0, ("invalid ether hlen"));
1301 
1302 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1303 		m = m_pullup(m, hoff + iphlen + thoff);
1304 		if (m == NULL) {
1305 			*mp = NULL;
1306 			return ENOBUFS;
1307 		}
1308 		*mp = m;
1309 	}
1310 	return 0;
1311 }
1312 
1313 static int
1314 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1315     uint16_t *segs_used, int *idx)
1316 {
1317 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1318 	int i, j, error, nsegs, first, maxsegs;
1319 	struct mbuf *m_head = *m_headp;
1320 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1321 	bus_dmamap_t map;
1322 	struct ix_tx_buf *txbuf;
1323 	union ixgbe_adv_tx_desc *txd = NULL;
1324 
1325 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1326 		error = ix_tso_pullup(m_headp);
1327 		if (__predict_false(error))
1328 			return error;
1329 		m_head = *m_headp;
1330 	}
1331 
1332 	/* Basic descriptor defines */
1333 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1334 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1335 
1336 	if (m_head->m_flags & M_VLANTAG)
1337 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1338 
1339 	/*
1340 	 * Important to capture the first descriptor
1341 	 * used because it will contain the index of
1342 	 * the one we tell the hardware to report back
1343 	 */
1344 	first = txr->tx_next_avail;
1345 	txbuf = &txr->tx_buf[first];
1346 	map = txbuf->map;
1347 
1348 	/*
1349 	 * Map the packet for DMA.
1350 	 */
1351 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1352 	if (maxsegs > IX_MAX_SCATTER)
1353 		maxsegs = IX_MAX_SCATTER;
1354 
1355 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1356 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1357 	if (__predict_false(error)) {
1358 		m_freem(*m_headp);
1359 		*m_headp = NULL;
1360 		return error;
1361 	}
1362 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1363 
1364 	m_head = *m_headp;
1365 
1366 	/*
1367 	 * Set up the appropriate offload context if requested,
1368 	 * this may consume one TX descriptor.
1369 	 */
1370 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1371 		(*segs_used)++;
1372 		txr->tx_nsegs++;
1373 	}
1374 
1375 	*segs_used += nsegs;
1376 	txr->tx_nsegs += nsegs;
1377 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1378 		/*
1379 		 * Report Status (RS) is turned on every intr_nsegs
1380 		 * descriptors (roughly).
1381 		 */
1382 		txr->tx_nsegs = 0;
1383 		cmd_rs = IXGBE_TXD_CMD_RS;
1384 	}
1385 
1386 	i = txr->tx_next_avail;
1387 	for (j = 0; j < nsegs; j++) {
1388 		bus_size_t seglen;
1389 		bus_addr_t segaddr;
1390 
1391 		txbuf = &txr->tx_buf[i];
1392 		txd = &txr->tx_base[i];
1393 		seglen = segs[j].ds_len;
1394 		segaddr = htole64(segs[j].ds_addr);
1395 
1396 		txd->read.buffer_addr = segaddr;
1397 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1398 		    cmd_type_len |seglen);
1399 		txd->read.olinfo_status = htole32(olinfo_status);
1400 
1401 		if (++i == txr->tx_ndesc)
1402 			i = 0;
1403 	}
1404 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1405 
1406 	txr->tx_avail -= nsegs;
1407 	txr->tx_next_avail = i;
1408 
1409 	txbuf->m_head = m_head;
1410 	txr->tx_buf[first].map = txbuf->map;
1411 	txbuf->map = map;
1412 
1413 	/*
1414 	 * Defer TDT updating, until enough descrptors are setup
1415 	 */
1416 	*idx = i;
1417 
1418 	return 0;
1419 }
1420 
1421 static void
1422 ix_set_promisc(struct ix_softc *sc)
1423 {
1424 	struct ifnet *ifp = &sc->arpcom.ac_if;
1425 	uint32_t reg_rctl;
1426 	int mcnt = 0;
1427 
1428 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1429 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1430 	if (ifp->if_flags & IFF_ALLMULTI) {
1431 		mcnt = IX_MAX_MCASTADDR;
1432 	} else {
1433 		struct ifmultiaddr *ifma;
1434 
1435 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1436 			if (ifma->ifma_addr->sa_family != AF_LINK)
1437 				continue;
1438 			if (mcnt == IX_MAX_MCASTADDR)
1439 				break;
1440 			mcnt++;
1441 		}
1442 	}
1443 	if (mcnt < IX_MAX_MCASTADDR)
1444 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1445 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1446 
1447 	if (ifp->if_flags & IFF_PROMISC) {
1448 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1449 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1450 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1451 		reg_rctl |= IXGBE_FCTRL_MPE;
1452 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1453 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1454 	}
1455 }
1456 
1457 static void
1458 ix_set_multi(struct ix_softc *sc)
1459 {
1460 	struct ifnet *ifp = &sc->arpcom.ac_if;
1461 	struct ifmultiaddr *ifma;
1462 	uint32_t fctrl;
1463 	uint8_t	*mta;
1464 	int mcnt = 0;
1465 
1466 	mta = sc->mta;
1467 	bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1468 
1469 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1470 		if (ifma->ifma_addr->sa_family != AF_LINK)
1471 			continue;
1472 		if (mcnt == IX_MAX_MCASTADDR)
1473 			break;
1474 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1475 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1476 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1477 		mcnt++;
1478 	}
1479 
1480 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1481 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1482 	if (ifp->if_flags & IFF_PROMISC) {
1483 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1484 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1485 		fctrl |= IXGBE_FCTRL_MPE;
1486 		fctrl &= ~IXGBE_FCTRL_UPE;
1487 	} else {
1488 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1489 	}
1490 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1491 
1492 	if (mcnt < IX_MAX_MCASTADDR) {
1493 		ixgbe_update_mc_addr_list(&sc->hw,
1494 		    mta, mcnt, ix_mc_array_itr, TRUE);
1495 	}
1496 }
1497 
1498 /*
1499  * This is an iterator function now needed by the multicast
1500  * shared code. It simply feeds the shared code routine the
1501  * addresses in the array of ix_set_multi() one by one.
1502  */
1503 static uint8_t *
1504 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1505 {
1506 	uint8_t *addr = *update_ptr;
1507 	uint8_t *newptr;
1508 	*vmdq = 0;
1509 
1510 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1511 	*update_ptr = newptr;
1512 	return addr;
1513 }
1514 
1515 static void
1516 ix_timer(void *arg)
1517 {
1518 	struct ix_softc *sc = arg;
1519 
1520 	lwkt_serialize_enter(&sc->main_serialize);
1521 
1522 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1523 		lwkt_serialize_exit(&sc->main_serialize);
1524 		return;
1525 	}
1526 
1527 	/* Check for pluggable optics */
1528 	if (sc->sfp_probe) {
1529 		if (!ix_sfp_probe(sc))
1530 			goto done; /* Nothing to do */
1531 	}
1532 
1533 	ix_update_link_status(sc);
1534 	ix_update_stats(sc);
1535 
1536 done:
1537 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1538 	lwkt_serialize_exit(&sc->main_serialize);
1539 }
1540 
1541 static void
1542 ix_update_link_status(struct ix_softc *sc)
1543 {
1544 	struct ifnet *ifp = &sc->arpcom.ac_if;
1545 
1546 	if (sc->link_up) {
1547 		if (sc->link_active == FALSE) {
1548 			if (bootverbose) {
1549 				if_printf(ifp, "Link is up %d Gbps %s\n",
1550 				    sc->link_speed == 128 ? 10 : 1,
1551 				    "Full Duplex");
1552 			}
1553 
1554 			/*
1555 			 * Update any Flow Control changes
1556 			 */
1557 			ixgbe_fc_enable(&sc->hw);
1558 			/* MUST after ixgbe_fc_enable() */
1559 			if (sc->rx_ring_inuse > 1) {
1560 				switch (sc->hw.fc.current_mode) {
1561 				case ixgbe_fc_rx_pause:
1562 				case ixgbe_fc_tx_pause:
1563 				case ixgbe_fc_full:
1564 					ix_disable_rx_drop(sc);
1565 					break;
1566 
1567 				case ixgbe_fc_none:
1568 					ix_enable_rx_drop(sc);
1569 					break;
1570 
1571 				default:
1572 					break;
1573 				}
1574 			}
1575 
1576 			/* Update DMA coalescing config */
1577 			ix_config_dmac(sc);
1578 
1579 			sc->link_active = TRUE;
1580 
1581 			ifp->if_link_state = LINK_STATE_UP;
1582 			if_link_state_change(ifp);
1583 		}
1584 	} else { /* Link down */
1585 		if (sc->link_active == TRUE) {
1586 			if (bootverbose)
1587 				if_printf(ifp, "Link is Down\n");
1588 			ifp->if_link_state = LINK_STATE_DOWN;
1589 			if_link_state_change(ifp);
1590 
1591 			sc->link_active = FALSE;
1592 		}
1593 	}
1594 }
1595 
1596 static void
1597 ix_stop(struct ix_softc *sc)
1598 {
1599 	struct ixgbe_hw *hw = &sc->hw;
1600 	struct ifnet *ifp = &sc->arpcom.ac_if;
1601 	int i;
1602 
1603 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1604 
1605 	ix_disable_intr(sc);
1606 	callout_stop(&sc->timer);
1607 
1608 	ifp->if_flags &= ~IFF_RUNNING;
1609 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1610 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1611 
1612 		ifsq_clr_oactive(txr->tx_ifsq);
1613 		ifsq_watchdog_stop(&txr->tx_watchdog);
1614 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1615 	}
1616 
1617 	ixgbe_reset_hw(hw);
1618 	hw->adapter_stopped = FALSE;
1619 	ixgbe_stop_adapter(hw);
1620 	if (hw->mac.type == ixgbe_mac_82599EB)
1621 		ixgbe_stop_mac_link_on_d3_82599(hw);
1622 	/* Turn off the laser - noop with no optics */
1623 	ixgbe_disable_tx_laser(hw);
1624 
1625 	/* Update the stack */
1626 	sc->link_up = FALSE;
1627 	ix_update_link_status(sc);
1628 
1629 	/* Reprogram the RAR[0] in case user changed it. */
1630 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1631 
1632 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1633 		ix_free_tx_ring(&sc->tx_rings[i]);
1634 
1635 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1636 		ix_free_rx_ring(&sc->rx_rings[i]);
1637 }
1638 
1639 static void
1640 ix_setup_ifp(struct ix_softc *sc)
1641 {
1642 	struct ixgbe_hw *hw = &sc->hw;
1643 	struct ifnet *ifp = &sc->arpcom.ac_if;
1644 	int i;
1645 
1646 	ifp->if_baudrate = IF_Gbps(10UL);
1647 
1648 	ifp->if_softc = sc;
1649 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1650 	ifp->if_init = ix_init;
1651 	ifp->if_ioctl = ix_ioctl;
1652 	ifp->if_start = ix_start;
1653 	ifp->if_serialize = ix_serialize;
1654 	ifp->if_deserialize = ix_deserialize;
1655 	ifp->if_tryserialize = ix_tryserialize;
1656 #ifdef INVARIANTS
1657 	ifp->if_serialize_assert = ix_serialize_assert;
1658 #endif
1659 #ifdef IFPOLL_ENABLE
1660 	ifp->if_npoll = ix_npoll;
1661 #endif
1662 
1663 	/* Increase TSO burst length */
1664 	ifp->if_tsolen = (8 * ETHERMTU);
1665 
1666 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1667 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1668 
1669 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1670 	ifq_set_ready(&ifp->if_snd);
1671 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1672 
1673 	ifp->if_mapsubq = ifq_mapsubq_mask;
1674 	ifq_set_subq_mask(&ifp->if_snd, 0);
1675 
1676 	ether_ifattach(ifp, hw->mac.addr, NULL);
1677 
1678 	ifp->if_capabilities =
1679 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1680 	if (IX_ENABLE_HWRSS(sc))
1681 		ifp->if_capabilities |= IFCAP_RSS;
1682 	ifp->if_capenable = ifp->if_capabilities;
1683 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1684 
1685 	/*
1686 	 * Tell the upper layer(s) we support long frames.
1687 	 */
1688 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1689 
1690 	/* Setup TX rings and subqueues */
1691 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1692 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1693 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1694 
1695 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1696 		ifsq_set_priv(ifsq, txr);
1697 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1698 		txr->tx_ifsq = ifsq;
1699 
1700 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1701 	}
1702 
1703 	/* Specify the media types supported by this adapter */
1704 	ix_init_media(sc);
1705 }
1706 
1707 static boolean_t
1708 ix_is_sfp(const struct ixgbe_hw *hw)
1709 {
1710 	switch (hw->phy.type) {
1711 	case ixgbe_phy_sfp_avago:
1712 	case ixgbe_phy_sfp_ftl:
1713 	case ixgbe_phy_sfp_intel:
1714 	case ixgbe_phy_sfp_unknown:
1715 	case ixgbe_phy_sfp_passive_tyco:
1716 	case ixgbe_phy_sfp_passive_unknown:
1717 	case ixgbe_phy_qsfp_passive_unknown:
1718 	case ixgbe_phy_qsfp_active_unknown:
1719 	case ixgbe_phy_qsfp_intel:
1720 	case ixgbe_phy_qsfp_unknown:
1721 		return TRUE;
1722 	default:
1723 		return FALSE;
1724 	}
1725 }
1726 
1727 static void
1728 ix_config_link(struct ix_softc *sc)
1729 {
1730 	struct ixgbe_hw *hw = &sc->hw;
1731 	boolean_t sfp;
1732 
1733 	sfp = ix_is_sfp(hw);
1734 	if (sfp) {
1735 		if (hw->phy.multispeed_fiber) {
1736 			hw->mac.ops.setup_sfp(hw);
1737 			ixgbe_enable_tx_laser(hw);
1738 			ix_handle_msf(sc);
1739 		} else {
1740 			ix_handle_mod(sc);
1741 		}
1742 	} else {
1743 		uint32_t autoneg, err = 0;
1744 
1745 		if (hw->mac.ops.check_link != NULL) {
1746 			err = ixgbe_check_link(hw, &sc->link_speed,
1747 			    &sc->link_up, FALSE);
1748 			if (err)
1749 				return;
1750 		}
1751 
1752 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1753 			autoneg = sc->advspeed;
1754 		else
1755 			autoneg = hw->phy.autoneg_advertised;
1756 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1757 			bool negotiate;
1758 
1759 			err = hw->mac.ops.get_link_capabilities(hw,
1760 			    &autoneg, &negotiate);
1761 			if (err)
1762 				return;
1763 		}
1764 
1765 		if (hw->mac.ops.setup_link != NULL) {
1766 			err = hw->mac.ops.setup_link(hw,
1767 			    autoneg, sc->link_up);
1768 			if (err)
1769 				return;
1770 		}
1771 	}
1772 }
1773 
1774 static int
1775 ix_alloc_rings(struct ix_softc *sc)
1776 {
1777 	int error, i;
1778 
1779 	/*
1780 	 * Create top level busdma tag
1781 	 */
1782 	error = bus_dma_tag_create(NULL, 1, 0,
1783 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1784 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1785 	    &sc->parent_tag);
1786 	if (error) {
1787 		device_printf(sc->dev, "could not create top level DMA tag\n");
1788 		return error;
1789 	}
1790 
1791 	/*
1792 	 * Allocate TX descriptor rings and buffers
1793 	 */
1794 	sc->tx_rings = kmalloc_cachealign(
1795 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1796 	    M_DEVBUF, M_WAITOK | M_ZERO);
1797 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1798 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1799 
1800 		txr->tx_sc = sc;
1801 		txr->tx_idx = i;
1802 		txr->tx_intr_vec = -1;
1803 		lwkt_serialize_init(&txr->tx_serialize);
1804 
1805 		error = ix_create_tx_ring(txr);
1806 		if (error)
1807 			return error;
1808 	}
1809 
1810 	/*
1811 	 * Allocate RX descriptor rings and buffers
1812 	 */
1813 	sc->rx_rings = kmalloc_cachealign(
1814 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1815 	    M_DEVBUF, M_WAITOK | M_ZERO);
1816 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1817 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1818 
1819 		rxr->rx_sc = sc;
1820 		rxr->rx_idx = i;
1821 		rxr->rx_intr_vec = -1;
1822 		lwkt_serialize_init(&rxr->rx_serialize);
1823 
1824 		error = ix_create_rx_ring(rxr);
1825 		if (error)
1826 			return error;
1827 	}
1828 
1829 	return 0;
1830 }
1831 
1832 static int
1833 ix_create_tx_ring(struct ix_tx_ring *txr)
1834 {
1835 	int error, i, tsize, ntxd;
1836 
1837 	/*
1838 	 * Validate number of transmit descriptors.  It must not exceed
1839 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1840 	 */
1841 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1842 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1843 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1844 		device_printf(txr->tx_sc->dev,
1845 		    "Using %d TX descriptors instead of %d!\n",
1846 		    IX_DEF_TXD, ntxd);
1847 		txr->tx_ndesc = IX_DEF_TXD;
1848 	} else {
1849 		txr->tx_ndesc = ntxd;
1850 	}
1851 
1852 	/*
1853 	 * Allocate TX head write-back buffer
1854 	 */
1855 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1856 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1857 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1858 	if (txr->tx_hdr == NULL) {
1859 		device_printf(txr->tx_sc->dev,
1860 		    "Unable to allocate TX head write-back buffer\n");
1861 		return ENOMEM;
1862 	}
1863 
1864 	/*
1865 	 * Allocate TX descriptor ring
1866 	 */
1867 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1868 	    IX_DBA_ALIGN);
1869 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1870 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1871 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1872 	if (txr->tx_base == NULL) {
1873 		device_printf(txr->tx_sc->dev,
1874 		    "Unable to allocate TX Descriptor memory\n");
1875 		return ENOMEM;
1876 	}
1877 
1878 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1879 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1880 
1881 	/*
1882 	 * Create DMA tag for TX buffers
1883 	 */
1884 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1885 	    1, 0,		/* alignment, bounds */
1886 	    BUS_SPACE_MAXADDR,	/* lowaddr */
1887 	    BUS_SPACE_MAXADDR,	/* highaddr */
1888 	    NULL, NULL,		/* filter, filterarg */
1889 	    IX_TSO_SIZE,	/* maxsize */
1890 	    IX_MAX_SCATTER,	/* nsegments */
1891 	    PAGE_SIZE,		/* maxsegsize */
1892 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1893 	    BUS_DMA_ONEBPAGE,	/* flags */
1894 	    &txr->tx_tag);
1895 	if (error) {
1896 		device_printf(txr->tx_sc->dev,
1897 		    "Unable to allocate TX DMA tag\n");
1898 		kfree(txr->tx_buf, M_DEVBUF);
1899 		txr->tx_buf = NULL;
1900 		return error;
1901 	}
1902 
1903 	/*
1904 	 * Create DMA maps for TX buffers
1905 	 */
1906 	for (i = 0; i < txr->tx_ndesc; ++i) {
1907 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1908 
1909 		error = bus_dmamap_create(txr->tx_tag,
1910 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1911 		if (error) {
1912 			device_printf(txr->tx_sc->dev,
1913 			    "Unable to create TX DMA map\n");
1914 			ix_destroy_tx_ring(txr, i);
1915 			return error;
1916 		}
1917 	}
1918 
1919 	/*
1920 	 * Initialize various watermark
1921 	 */
1922 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1923 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1924 
1925 	return 0;
1926 }
1927 
1928 static void
1929 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1930 {
1931 	int i;
1932 
1933 	if (txr->tx_hdr != NULL) {
1934 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1935 		bus_dmamem_free(txr->tx_hdr_dtag,
1936 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1937 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
1938 		txr->tx_hdr = NULL;
1939 	}
1940 
1941 	if (txr->tx_base != NULL) {
1942 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1943 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1944 		    txr->tx_base_map);
1945 		bus_dma_tag_destroy(txr->tx_base_dtag);
1946 		txr->tx_base = NULL;
1947 	}
1948 
1949 	if (txr->tx_buf == NULL)
1950 		return;
1951 
1952 	for (i = 0; i < ndesc; ++i) {
1953 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1954 
1955 		KKASSERT(txbuf->m_head == NULL);
1956 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1957 	}
1958 	bus_dma_tag_destroy(txr->tx_tag);
1959 
1960 	kfree(txr->tx_buf, M_DEVBUF);
1961 	txr->tx_buf = NULL;
1962 }
1963 
1964 static void
1965 ix_init_tx_ring(struct ix_tx_ring *txr)
1966 {
1967 	/* Clear the old ring contents */
1968 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1969 
1970 	/* Clear TX head write-back buffer */
1971 	*(txr->tx_hdr) = 0;
1972 
1973 	/* Reset indices */
1974 	txr->tx_next_avail = 0;
1975 	txr->tx_next_clean = 0;
1976 	txr->tx_nsegs = 0;
1977 
1978 	/* Set number of descriptors available */
1979 	txr->tx_avail = txr->tx_ndesc;
1980 
1981 	/* Enable this TX ring */
1982 	txr->tx_flags |= IX_TXFLAG_ENABLED;
1983 }
1984 
1985 static void
1986 ix_init_tx_unit(struct ix_softc *sc)
1987 {
1988 	struct ixgbe_hw	*hw = &sc->hw;
1989 	int i;
1990 
1991 	/*
1992 	 * Setup the Base and Length of the Tx Descriptor Ring
1993 	 */
1994 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1995 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1996 		uint64_t tdba = txr->tx_base_paddr;
1997 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
1998 		uint32_t txctrl;
1999 
2000 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2001 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2002 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2003 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2004 
2005 		/* Setup the HW Tx Head and Tail descriptor pointers */
2006 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2007 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2008 
2009 		/* Disable TX head write-back relax ordering */
2010 		switch (hw->mac.type) {
2011 		case ixgbe_mac_82598EB:
2012 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2013 			break;
2014 		case ixgbe_mac_82599EB:
2015 		case ixgbe_mac_X540:
2016 		default:
2017 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2018 			break;
2019 		}
2020 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2021 		switch (hw->mac.type) {
2022 		case ixgbe_mac_82598EB:
2023 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2024 			break;
2025 		case ixgbe_mac_82599EB:
2026 		case ixgbe_mac_X540:
2027 		default:
2028 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2029 			break;
2030 		}
2031 
2032 		/* Enable TX head write-back */
2033 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2034 		    (uint32_t)(hdr_paddr >> 32));
2035 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2036 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2037 	}
2038 
2039 	if (hw->mac.type != ixgbe_mac_82598EB) {
2040 		uint32_t dmatxctl, rttdcs;
2041 
2042 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2043 		dmatxctl |= IXGBE_DMATXCTL_TE;
2044 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2045 
2046 		/* Disable arbiter to set MTQC */
2047 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2048 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2049 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2050 
2051 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2052 
2053 		/* Reenable aribter */
2054 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2055 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2056 	}
2057 }
2058 
2059 static int
2060 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2061     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2062 {
2063 	struct ixgbe_adv_tx_context_desc *TXD;
2064 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2065 	int ehdrlen, ip_hlen = 0, ctxd;
2066 	boolean_t offload = TRUE;
2067 
2068 	/* First check if TSO is to be used */
2069 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2070 		return ix_tso_ctx_setup(txr, mp,
2071 		    cmd_type_len, olinfo_status);
2072 	}
2073 
2074 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2075 		offload = FALSE;
2076 
2077 	/* Indicate the whole packet as payload when not doing TSO */
2078 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2079 
2080 	/*
2081 	 * In advanced descriptors the vlan tag must be placed into the
2082 	 * context descriptor.  Hence we need to make one even if not
2083 	 * doing checksum offloads.
2084 	 */
2085 	if (mp->m_flags & M_VLANTAG) {
2086 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2087 		    IXGBE_ADVTXD_VLAN_SHIFT;
2088 	} else if (!offload) {
2089 		/* No TX descriptor is consumed */
2090 		return 0;
2091 	}
2092 
2093 	/* Set the ether header length */
2094 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2095 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2096 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2097 
2098 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2099 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2100 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2101 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2102 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2103 	}
2104 	vlan_macip_lens |= ip_hlen;
2105 
2106 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2107 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2108 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2109 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2110 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2111 
2112 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2113 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2114 
2115 	/* Now ready a context descriptor */
2116 	ctxd = txr->tx_next_avail;
2117 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2118 
2119 	/* Now copy bits into descriptor */
2120 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2121 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2122 	TXD->seqnum_seed = htole32(0);
2123 	TXD->mss_l4len_idx = htole32(0);
2124 
2125 	/* We've consumed the first desc, adjust counters */
2126 	if (++ctxd == txr->tx_ndesc)
2127 		ctxd = 0;
2128 	txr->tx_next_avail = ctxd;
2129 	--txr->tx_avail;
2130 
2131 	/* One TX descriptor is consumed */
2132 	return 1;
2133 }
2134 
2135 static int
2136 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2137     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2138 {
2139 	struct ixgbe_adv_tx_context_desc *TXD;
2140 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2141 	uint32_t mss_l4len_idx = 0, paylen;
2142 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2143 
2144 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2145 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2146 
2147 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2148 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2149 
2150 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2151 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2152 
2153 	ctxd = txr->tx_next_avail;
2154 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2155 
2156 	if (mp->m_flags & M_VLANTAG) {
2157 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2158 		    IXGBE_ADVTXD_VLAN_SHIFT;
2159 	}
2160 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2161 	vlan_macip_lens |= ip_hlen;
2162 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2163 
2164 	/* ADV DTYPE TUCMD */
2165 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2166 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2167 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2168 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2169 
2170 	/* MSS L4LEN IDX */
2171 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2172 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2173 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2174 
2175 	TXD->seqnum_seed = htole32(0);
2176 
2177 	if (++ctxd == txr->tx_ndesc)
2178 		ctxd = 0;
2179 
2180 	txr->tx_avail--;
2181 	txr->tx_next_avail = ctxd;
2182 
2183 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2184 
2185 	/* This is used in the transmit desc in encap */
2186 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2187 
2188 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2189 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2190 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2191 
2192 	/* One TX descriptor is consumed */
2193 	return 1;
2194 }
2195 
2196 static void
2197 ix_txeof(struct ix_tx_ring *txr, int hdr)
2198 {
2199 	int first, avail;
2200 
2201 	if (txr->tx_avail == txr->tx_ndesc)
2202 		return;
2203 
2204 	first = txr->tx_next_clean;
2205 	if (first == hdr)
2206 		return;
2207 
2208 	avail = txr->tx_avail;
2209 	while (first != hdr) {
2210 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2211 
2212 		++avail;
2213 		if (txbuf->m_head) {
2214 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
2215 			m_freem(txbuf->m_head);
2216 			txbuf->m_head = NULL;
2217 		}
2218 		if (++first == txr->tx_ndesc)
2219 			first = 0;
2220 	}
2221 	txr->tx_next_clean = first;
2222 	txr->tx_avail = avail;
2223 
2224 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2225 		ifsq_clr_oactive(txr->tx_ifsq);
2226 		txr->tx_watchdog.wd_timer = 0;
2227 	}
2228 }
2229 
2230 static int
2231 ix_create_rx_ring(struct ix_rx_ring *rxr)
2232 {
2233 	int i, rsize, error, nrxd;
2234 
2235 	/*
2236 	 * Validate number of receive descriptors.  It must not exceed
2237 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2238 	 */
2239 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2240 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2241 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2242 		device_printf(rxr->rx_sc->dev,
2243 		    "Using %d RX descriptors instead of %d!\n",
2244 		    IX_DEF_RXD, nrxd);
2245 		rxr->rx_ndesc = IX_DEF_RXD;
2246 	} else {
2247 		rxr->rx_ndesc = nrxd;
2248 	}
2249 
2250 	/*
2251 	 * Allocate RX descriptor ring
2252 	 */
2253 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2254 	    IX_DBA_ALIGN);
2255 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2256 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2257 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2258 	if (rxr->rx_base == NULL) {
2259 		device_printf(rxr->rx_sc->dev,
2260 		    "Unable to allocate TX Descriptor memory\n");
2261 		return ENOMEM;
2262 	}
2263 
2264 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2265 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2266 
2267 	/*
2268 	 * Create DMA tag for RX buffers
2269 	 */
2270 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2271 	    1, 0,		/* alignment, bounds */
2272 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2273 	    BUS_SPACE_MAXADDR,	/* highaddr */
2274 	    NULL, NULL,		/* filter, filterarg */
2275 	    PAGE_SIZE,		/* maxsize */
2276 	    1,			/* nsegments */
2277 	    PAGE_SIZE,		/* maxsegsize */
2278 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2279 	    &rxr->rx_tag);
2280 	if (error) {
2281 		device_printf(rxr->rx_sc->dev,
2282 		    "Unable to create RX DMA tag\n");
2283 		kfree(rxr->rx_buf, M_DEVBUF);
2284 		rxr->rx_buf = NULL;
2285 		return error;
2286 	}
2287 
2288 	/*
2289 	 * Create spare DMA map for RX buffers
2290 	 */
2291 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2292 	    &rxr->rx_sparemap);
2293 	if (error) {
2294 		device_printf(rxr->rx_sc->dev,
2295 		    "Unable to create spare RX DMA map\n");
2296 		bus_dma_tag_destroy(rxr->rx_tag);
2297 		kfree(rxr->rx_buf, M_DEVBUF);
2298 		rxr->rx_buf = NULL;
2299 		return error;
2300 	}
2301 
2302 	/*
2303 	 * Create DMA maps for RX buffers
2304 	 */
2305 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2306 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2307 
2308 		error = bus_dmamap_create(rxr->rx_tag,
2309 		    BUS_DMA_WAITOK, &rxbuf->map);
2310 		if (error) {
2311 			device_printf(rxr->rx_sc->dev,
2312 			    "Unable to create RX dma map\n");
2313 			ix_destroy_rx_ring(rxr, i);
2314 			return error;
2315 		}
2316 	}
2317 
2318 	/*
2319 	 * Initialize various watermark
2320 	 */
2321 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2322 
2323 	return 0;
2324 }
2325 
2326 static void
2327 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2328 {
2329 	int i;
2330 
2331 	if (rxr->rx_base != NULL) {
2332 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2333 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2334 		    rxr->rx_base_map);
2335 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2336 		rxr->rx_base = NULL;
2337 	}
2338 
2339 	if (rxr->rx_buf == NULL)
2340 		return;
2341 
2342 	for (i = 0; i < ndesc; ++i) {
2343 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2344 
2345 		KKASSERT(rxbuf->m_head == NULL);
2346 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2347 	}
2348 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2349 	bus_dma_tag_destroy(rxr->rx_tag);
2350 
2351 	kfree(rxr->rx_buf, M_DEVBUF);
2352 	rxr->rx_buf = NULL;
2353 }
2354 
2355 /*
2356 ** Used to detect a descriptor that has
2357 ** been merged by Hardware RSC.
2358 */
2359 static __inline uint32_t
2360 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2361 {
2362 	return (le32toh(rx->wb.lower.lo_dword.data) &
2363 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2364 }
2365 
2366 #if 0
2367 /*********************************************************************
2368  *
2369  *  Initialize Hardware RSC (LRO) feature on 82599
2370  *  for an RX ring, this is toggled by the LRO capability
2371  *  even though it is transparent to the stack.
2372  *
2373  *  NOTE: since this HW feature only works with IPV4 and
2374  *        our testing has shown soft LRO to be as effective
2375  *        I have decided to disable this by default.
2376  *
2377  **********************************************************************/
2378 static void
2379 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2380 {
2381 	struct	ix_softc 	*sc = rxr->rx_sc;
2382 	struct	ixgbe_hw	*hw = &sc->hw;
2383 	uint32_t			rscctrl, rdrxctl;
2384 
2385 #if 0
2386 	/* If turning LRO/RSC off we need to disable it */
2387 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2388 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2389 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2390 		return;
2391 	}
2392 #endif
2393 
2394 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2395 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2396 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2397 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2398 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2399 
2400 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2401 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2402 	/*
2403 	** Limit the total number of descriptors that
2404 	** can be combined, so it does not exceed 64K
2405 	*/
2406 	if (rxr->mbuf_sz == MCLBYTES)
2407 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2408 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2409 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2410 	else if (rxr->mbuf_sz == MJUM9BYTES)
2411 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2412 	else  /* Using 16K cluster */
2413 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2414 
2415 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2416 
2417 	/* Enable TCP header recognition */
2418 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2419 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2420 	    IXGBE_PSRTYPE_TCPHDR));
2421 
2422 	/* Disable RSC for ACK packets */
2423 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2424 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2425 
2426 	rxr->hw_rsc = TRUE;
2427 }
2428 #endif
2429 
2430 static int
2431 ix_init_rx_ring(struct ix_rx_ring *rxr)
2432 {
2433 	int i;
2434 
2435 	/* Clear the ring contents */
2436 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2437 
2438 	/* XXX we need JUMPAGESIZE for RSC too */
2439 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2440 		rxr->rx_mbuf_sz = MCLBYTES;
2441 	else
2442 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2443 
2444 	/* Now replenish the mbufs */
2445 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2446 		int error;
2447 
2448 		error = ix_newbuf(rxr, i, TRUE);
2449 		if (error)
2450 			return error;
2451 	}
2452 
2453 	/* Setup our descriptor indices */
2454 	rxr->rx_next_check = 0;
2455 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2456 
2457 #if 0
2458 	/*
2459 	** Now set up the LRO interface:
2460 	*/
2461 	if (ixgbe_rsc_enable)
2462 		ix_setup_hw_rsc(rxr);
2463 #endif
2464 
2465 	return 0;
2466 }
2467 
2468 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2469 
2470 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2471 
2472 static void
2473 ix_init_rx_unit(struct ix_softc *sc)
2474 {
2475 	struct ixgbe_hw	*hw = &sc->hw;
2476 	struct ifnet *ifp = &sc->arpcom.ac_if;
2477 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2478 	int i;
2479 
2480 	/*
2481 	 * Make sure receives are disabled while setting up the descriptor ring
2482 	 */
2483 	ixgbe_disable_rx(hw);
2484 
2485 	/* Enable broadcasts */
2486 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2487 	fctrl |= IXGBE_FCTRL_BAM;
2488 	if (hw->mac.type == ixgbe_mac_82598EB) {
2489 		fctrl |= IXGBE_FCTRL_DPF;
2490 		fctrl |= IXGBE_FCTRL_PMCF;
2491 	}
2492 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2493 
2494 	/* Set for Jumbo Frames? */
2495 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2496 	if (ifp->if_mtu > ETHERMTU)
2497 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2498 	else
2499 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2500 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2501 
2502 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2503 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2504 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2505 
2506 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2507 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2508 		uint64_t rdba = rxr->rx_base_paddr;
2509 		uint32_t srrctl;
2510 
2511 		/* Setup the Base and Length of the Rx Descriptor Ring */
2512 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2513 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2514 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2515 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2516 
2517 		/*
2518 		 * Set up the SRRCTL register
2519 		 */
2520 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2521 
2522 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2523 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2524 		srrctl |= bufsz;
2525 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2526 		if (sc->rx_ring_inuse > 1) {
2527 			/* See the commend near ix_enable_rx_drop() */
2528 			if (sc->ifm_media &
2529 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2530 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2531 				if (i == 0 && bootverbose) {
2532 					if_printf(ifp, "flow control %s, "
2533 					    "disable RX drop\n",
2534 					    ix_ifmedia2str(sc->ifm_media));
2535 				}
2536 			} else {
2537 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2538 				if (i == 0 && bootverbose) {
2539 					if_printf(ifp, "flow control %s, "
2540 					    "enable RX drop\n",
2541 					    ix_ifmedia2str(sc->ifm_media));
2542 				}
2543 			}
2544 		}
2545 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2546 
2547 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2548 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2549 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2550 	}
2551 
2552 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2553 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2554 
2555 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2556 
2557 	/*
2558 	 * Setup RSS
2559 	 */
2560 	if (IX_ENABLE_HWRSS(sc)) {
2561 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2562 		int j, r, nreta;
2563 
2564 		/*
2565 		 * NOTE:
2566 		 * When we reach here, RSS has already been disabled
2567 		 * in ix_stop(), so we could safely configure RSS key
2568 		 * and redirect table.
2569 		 */
2570 
2571 		/*
2572 		 * Configure RSS key
2573 		 */
2574 		toeplitz_get_key(key, sizeof(key));
2575 		for (i = 0; i < IX_NRSSRK; ++i) {
2576 			uint32_t rssrk;
2577 
2578 			rssrk = IX_RSSRK_VAL(key, i);
2579 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2580 			    i, rssrk);
2581 
2582 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2583 		}
2584 
2585 		/* Table size will differ based on MAC */
2586 		switch (hw->mac.type) {
2587 		case ixgbe_mac_X550:
2588 		case ixgbe_mac_X550EM_x:
2589 		case ixgbe_mac_X550EM_a:
2590 			nreta = IX_NRETA_X550;
2591 			break;
2592 		default:
2593 			nreta = IX_NRETA;
2594 			break;
2595 		}
2596 
2597 		/*
2598 		 * Configure RSS redirect table in following fashion:
2599 		 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2600 		 */
2601 		r = 0;
2602 		for (j = 0; j < nreta; ++j) {
2603 			uint32_t reta = 0;
2604 
2605 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2606 				uint32_t q;
2607 
2608 				q = r % sc->rx_ring_inuse;
2609 				reta |= q << (8 * i);
2610 				++r;
2611 			}
2612 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2613 			if (j < IX_NRETA) {
2614 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2615 			} else {
2616 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2617 				    reta);
2618 			}
2619 		}
2620 
2621 		/*
2622 		 * Enable multiple receive queues.
2623 		 * Enable IPv4 RSS standard hash functions.
2624 		 */
2625 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2626 		    IXGBE_MRQC_RSSEN |
2627 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2628 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2629 
2630 		/*
2631 		 * NOTE:
2632 		 * PCSD must be enabled to enable multiple
2633 		 * receive queues.
2634 		 */
2635 		rxcsum |= IXGBE_RXCSUM_PCSD;
2636 	}
2637 
2638 	if (ifp->if_capenable & IFCAP_RXCSUM)
2639 		rxcsum |= IXGBE_RXCSUM_PCSD;
2640 
2641 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2642 }
2643 
2644 static __inline void
2645 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2646 {
2647 	if (--i < 0)
2648 		i = rxr->rx_ndesc - 1;
2649 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2650 }
2651 
2652 static __inline void
2653 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2654 {
2655 	if ((ptype &
2656 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2657 		/* Not IPv4 */
2658 		return;
2659 	}
2660 
2661 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2662 	    IXGBE_RXD_STAT_IPCS)
2663 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2664 
2665 	if ((ptype &
2666 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2667 		/*
2668 		 * - Neither TCP nor UDP
2669 		 * - IPv4 fragment
2670 		 */
2671 		return;
2672 	}
2673 
2674 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2675 	    IXGBE_RXD_STAT_L4CS) {
2676 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2677 		    CSUM_FRAG_NOT_CHECKED;
2678 		mp->m_pkthdr.csum_data = htons(0xffff);
2679 	}
2680 }
2681 
2682 static __inline struct pktinfo *
2683 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2684     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2685 {
2686 	switch (hashtype) {
2687 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2688 		pi->pi_netisr = NETISR_IP;
2689 		pi->pi_flags = 0;
2690 		pi->pi_l3proto = IPPROTO_TCP;
2691 		break;
2692 
2693 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2694 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2695 			/* Not UDP or is fragment */
2696 			return NULL;
2697 		}
2698 		pi->pi_netisr = NETISR_IP;
2699 		pi->pi_flags = 0;
2700 		pi->pi_l3proto = IPPROTO_UDP;
2701 		break;
2702 
2703 	default:
2704 		return NULL;
2705 	}
2706 
2707 	m->m_flags |= M_HASH;
2708 	m->m_pkthdr.hash = toeplitz_hash(hash);
2709 	return pi;
2710 }
2711 
2712 static __inline void
2713 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2714 {
2715 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2716 	rxd->wb.upper.status_error = 0;
2717 }
2718 
2719 static void
2720 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2721 {
2722 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2723 
2724 	/*
2725 	 * XXX discard may not be correct
2726 	 */
2727 	if (eop) {
2728 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2729 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2730 	} else {
2731 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2732 	}
2733 	if (rxbuf->fmp != NULL) {
2734 		m_freem(rxbuf->fmp);
2735 		rxbuf->fmp = NULL;
2736 		rxbuf->lmp = NULL;
2737 	}
2738 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2739 }
2740 
2741 static void
2742 ix_rxeof(struct ix_rx_ring *rxr, int count)
2743 {
2744 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2745 	int i, nsegs = 0, cpuid = mycpuid;
2746 
2747 	i = rxr->rx_next_check;
2748 	while (count != 0) {
2749 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2750 		union ixgbe_adv_rx_desc	*cur;
2751 		struct mbuf *sendmp = NULL, *mp;
2752 		struct pktinfo *pi = NULL, pi0;
2753 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2754 		uint16_t len;
2755 		boolean_t eop;
2756 
2757 		cur = &rxr->rx_base[i];
2758 		staterr = le32toh(cur->wb.upper.status_error);
2759 
2760 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2761 			break;
2762 		++nsegs;
2763 
2764 		rxbuf = &rxr->rx_buf[i];
2765 		mp = rxbuf->m_head;
2766 
2767 		len = le16toh(cur->wb.upper.length);
2768 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2769 		    IXGBE_RXDADV_PKTTYPE_MASK;
2770 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2771 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2772 		    IXGBE_RXDADV_RSSTYPE_MASK;
2773 
2774 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2775 		if (eop)
2776 			--count;
2777 
2778 		/*
2779 		 * Make sure bad packets are discarded
2780 		 */
2781 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2782 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2783 			ix_rx_discard(rxr, i, eop);
2784 			goto next_desc;
2785 		}
2786 
2787 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2788 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2789 			ix_rx_discard(rxr, i, eop);
2790 			goto next_desc;
2791 		}
2792 
2793 		/*
2794 		 * On 82599 which supports a hardware LRO, packets
2795 		 * need not be fragmented across sequential descriptors,
2796 		 * rather the next descriptor is indicated in bits
2797 		 * of the descriptor.  This also means that we might
2798 		 * proceses more than one packet at a time, something
2799 		 * that has never been true before, it required
2800 		 * eliminating global chain pointers in favor of what
2801 		 * we are doing here.
2802 		 */
2803 		if (!eop) {
2804 			int nextp;
2805 
2806 			/*
2807 			 * Figure out the next descriptor
2808 			 * of this frame.
2809 			 */
2810 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2811 				rsc = ix_rsc_count(cur);
2812 			if (rsc) { /* Get hardware index */
2813 				nextp = ((staterr &
2814 				    IXGBE_RXDADV_NEXTP_MASK) >>
2815 				    IXGBE_RXDADV_NEXTP_SHIFT);
2816 			} else { /* Just sequential */
2817 				nextp = i + 1;
2818 				if (nextp == rxr->rx_ndesc)
2819 					nextp = 0;
2820 			}
2821 			nbuf = &rxr->rx_buf[nextp];
2822 			prefetch(nbuf);
2823 		}
2824 		mp->m_len = len;
2825 
2826 		/*
2827 		 * Rather than using the fmp/lmp global pointers
2828 		 * we now keep the head of a packet chain in the
2829 		 * buffer struct and pass this along from one
2830 		 * descriptor to the next, until we get EOP.
2831 		 */
2832 		if (rxbuf->fmp == NULL) {
2833 			mp->m_pkthdr.len = len;
2834 			rxbuf->fmp = mp;
2835 			rxbuf->lmp = mp;
2836 		} else {
2837 			rxbuf->fmp->m_pkthdr.len += len;
2838 			rxbuf->lmp->m_next = mp;
2839 			rxbuf->lmp = mp;
2840 		}
2841 
2842 		if (nbuf != NULL) {
2843 			/*
2844 			 * Not the last fragment of this frame,
2845 			 * pass this fragment list on
2846 			 */
2847 			nbuf->fmp = rxbuf->fmp;
2848 			nbuf->lmp = rxbuf->lmp;
2849 		} else {
2850 			/*
2851 			 * Send this frame
2852 			 */
2853 			sendmp = rxbuf->fmp;
2854 
2855 			sendmp->m_pkthdr.rcvif = ifp;
2856 			IFNET_STAT_INC(ifp, ipackets, 1);
2857 #ifdef IX_RSS_DEBUG
2858 			rxr->rx_pkts++;
2859 #endif
2860 
2861 			/* Process vlan info */
2862 			if (staterr & IXGBE_RXD_STAT_VP) {
2863 				sendmp->m_pkthdr.ether_vlantag =
2864 				    le16toh(cur->wb.upper.vlan);
2865 				sendmp->m_flags |= M_VLANTAG;
2866 			}
2867 			if (ifp->if_capenable & IFCAP_RXCSUM)
2868 				ix_rxcsum(staterr, sendmp, ptype);
2869 			if (ifp->if_capenable & IFCAP_RSS) {
2870 				pi = ix_rssinfo(sendmp, &pi0,
2871 				    hash, hashtype, ptype);
2872 			}
2873 		}
2874 		rxbuf->fmp = NULL;
2875 		rxbuf->lmp = NULL;
2876 next_desc:
2877 		/* Advance our pointers to the next descriptor. */
2878 		if (++i == rxr->rx_ndesc)
2879 			i = 0;
2880 
2881 		if (sendmp != NULL)
2882 			ifp->if_input(ifp, sendmp, pi, cpuid);
2883 
2884 		if (nsegs >= rxr->rx_wreg_nsegs) {
2885 			ix_rx_refresh(rxr, i);
2886 			nsegs = 0;
2887 		}
2888 	}
2889 	rxr->rx_next_check = i;
2890 
2891 	if (nsegs > 0)
2892 		ix_rx_refresh(rxr, i);
2893 }
2894 
2895 static void
2896 ix_set_vlan(struct ix_softc *sc)
2897 {
2898 	struct ixgbe_hw *hw = &sc->hw;
2899 	uint32_t ctrl;
2900 
2901 	if (hw->mac.type == ixgbe_mac_82598EB) {
2902 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2903 		ctrl |= IXGBE_VLNCTRL_VME;
2904 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2905 	} else {
2906 		int i;
2907 
2908 		/*
2909 		 * On 82599 and later chips the VLAN enable is
2910 		 * per queue in RXDCTL
2911 		 */
2912 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
2913 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2914 			ctrl |= IXGBE_RXDCTL_VME;
2915 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2916 		}
2917 	}
2918 }
2919 
2920 static void
2921 ix_enable_intr(struct ix_softc *sc)
2922 {
2923 	struct ixgbe_hw	*hw = &sc->hw;
2924 	uint32_t fwsm;
2925 	int i;
2926 
2927 	for (i = 0; i < sc->intr_cnt; ++i)
2928 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2929 
2930 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2931 
2932 	/* Enable Fan Failure detection */
2933 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
2934 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2935 
2936 	switch (hw->mac.type) {
2937 	case ixgbe_mac_82599EB:
2938 		sc->intr_mask |= IXGBE_EIMS_ECC;
2939 		/* Temperature sensor on some adapters */
2940 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2941 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
2942 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2943 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2944 		break;
2945 
2946 	case ixgbe_mac_X540:
2947 		sc->intr_mask |= IXGBE_EIMS_ECC;
2948 		/* Detect if Thermal Sensor is enabled */
2949 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2950 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
2951 			sc->intr_mask |= IXGBE_EIMS_TS;
2952 		break;
2953 
2954 	case ixgbe_mac_X550:
2955 	case ixgbe_mac_X550EM_a:
2956 	case ixgbe_mac_X550EM_x:
2957 		sc->intr_mask |= IXGBE_EIMS_ECC;
2958 		/* MAC thermal sensor is automatically enabled */
2959 		sc->intr_mask |= IXGBE_EIMS_TS;
2960 		/* Some devices use SDP0 for important information */
2961 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2962 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2963 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2964 		/* FALL THROUGH */
2965 	default:
2966 		break;
2967 	}
2968 
2969 	/* With MSI-X we use auto clear for RX and TX rings */
2970 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2971 		/*
2972 		 * There are no EIAC1/EIAC2 for newer chips; the related
2973 		 * bits for TX and RX rings > 16 are always auto clear.
2974 		 *
2975 		 * XXX which bits?  There are _no_ documented EICR1 and
2976 		 * EICR2 at all; only EICR.
2977 		 */
2978 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2979 	} else {
2980 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2981 
2982 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2983 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2984 			sc->intr_mask |= IX_RX1_INTR_MASK;
2985 	}
2986 
2987 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2988 
2989 	/*
2990 	 * Enable RX and TX rings for MSI-X
2991 	 */
2992 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2993 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
2994 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
2995 
2996 			if (txr->tx_intr_vec >= 0) {
2997 				IXGBE_WRITE_REG(hw, txr->tx_eims,
2998 				    txr->tx_eims_val);
2999 			}
3000 		}
3001 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3002 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3003 
3004 			KKASSERT(rxr->rx_intr_vec >= 0);
3005 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3006 		}
3007 	}
3008 
3009 	IXGBE_WRITE_FLUSH(hw);
3010 }
3011 
3012 static void
3013 ix_disable_intr(struct ix_softc *sc)
3014 {
3015 	int i;
3016 
3017 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3018 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3019 
3020 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3021 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3022 	} else {
3023 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3024 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3025 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3026 	}
3027 	IXGBE_WRITE_FLUSH(&sc->hw);
3028 
3029 	for (i = 0; i < sc->intr_cnt; ++i)
3030 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3031 }
3032 
3033 uint16_t
3034 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3035 {
3036 	return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3037 	    reg, 2);
3038 }
3039 
3040 void
3041 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3042 {
3043 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3044 	    reg, value, 2);
3045 }
3046 
3047 static void
3048 ix_slot_info(struct ix_softc *sc)
3049 {
3050 	struct ixgbe_hw *hw = &sc->hw;
3051 	device_t dev = sc->dev;
3052 	struct ixgbe_mac_info *mac = &hw->mac;
3053 	uint16_t link;
3054 	uint32_t offset;
3055 
3056 	/* For most devices simply call the shared code routine */
3057 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3058 		ixgbe_get_bus_info(hw);
3059 		/* These devices don't use PCI-E */
3060 		if (hw->mac.type == ixgbe_mac_X550EM_x ||
3061 		    hw->mac.type == ixgbe_mac_X550EM_a)
3062 			return;
3063 		goto display;
3064 	}
3065 
3066 	/*
3067 	 * For the Quad port adapter we need to parse back
3068 	 * up the PCI tree to find the speed of the expansion
3069 	 * slot into which this adapter is plugged. A bit more work.
3070 	 */
3071 	dev = device_get_parent(device_get_parent(dev));
3072 #ifdef IXGBE_DEBUG
3073 	device_printf(dev, "parent pcib = %x,%x,%x\n",
3074 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3075 #endif
3076 	dev = device_get_parent(device_get_parent(dev));
3077 #ifdef IXGBE_DEBUG
3078 	device_printf(dev, "slot pcib = %x,%x,%x\n",
3079 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3080 #endif
3081 	/* Now get the PCI Express Capabilities offset */
3082 	offset = pci_get_pciecap_ptr(dev);
3083 	/* ...and read the Link Status Register */
3084 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3085 	switch (link & IXGBE_PCI_LINK_WIDTH) {
3086 	case IXGBE_PCI_LINK_WIDTH_1:
3087 		hw->bus.width = ixgbe_bus_width_pcie_x1;
3088 		break;
3089 	case IXGBE_PCI_LINK_WIDTH_2:
3090 		hw->bus.width = ixgbe_bus_width_pcie_x2;
3091 		break;
3092 	case IXGBE_PCI_LINK_WIDTH_4:
3093 		hw->bus.width = ixgbe_bus_width_pcie_x4;
3094 		break;
3095 	case IXGBE_PCI_LINK_WIDTH_8:
3096 		hw->bus.width = ixgbe_bus_width_pcie_x8;
3097 		break;
3098 	default:
3099 		hw->bus.width = ixgbe_bus_width_unknown;
3100 		break;
3101 	}
3102 
3103 	switch (link & IXGBE_PCI_LINK_SPEED) {
3104 	case IXGBE_PCI_LINK_SPEED_2500:
3105 		hw->bus.speed = ixgbe_bus_speed_2500;
3106 		break;
3107 	case IXGBE_PCI_LINK_SPEED_5000:
3108 		hw->bus.speed = ixgbe_bus_speed_5000;
3109 		break;
3110 	case IXGBE_PCI_LINK_SPEED_8000:
3111 		hw->bus.speed = ixgbe_bus_speed_8000;
3112 		break;
3113 	default:
3114 		hw->bus.speed = ixgbe_bus_speed_unknown;
3115 		break;
3116 	}
3117 
3118 	mac->ops.set_lan_id(hw);
3119 
3120 display:
3121 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3122 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3123 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3124 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3125 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3126 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3127 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3128 
3129 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3130 	    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3131 	    hw->bus.speed == ixgbe_bus_speed_2500) {
3132 		device_printf(dev, "For optimal performance a x8 "
3133 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
3134 	} else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3135 	    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3136 	    hw->bus.speed < ixgbe_bus_speed_8000) {
3137 		device_printf(dev, "For optimal performance a x8 "
3138 		    "PCIE Gen3 slot is required.\n");
3139 	}
3140 }
3141 
3142 /*
3143  * TODO comment is incorrect
3144  *
3145  * Setup the correct IVAR register for a particular MSIX interrupt
3146  * - entry is the register array entry
3147  * - vector is the MSIX vector for this queue
3148  * - type is RX/TX/MISC
3149  */
3150 static void
3151 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3152     int8_t type)
3153 {
3154 	struct ixgbe_hw *hw = &sc->hw;
3155 	uint32_t ivar, index;
3156 
3157 	vector |= IXGBE_IVAR_ALLOC_VAL;
3158 
3159 	switch (hw->mac.type) {
3160 	case ixgbe_mac_82598EB:
3161 		if (type == -1)
3162 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3163 		else
3164 			entry += (type * 64);
3165 		index = (entry >> 2) & 0x1F;
3166 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3167 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3168 		ivar |= (vector << (8 * (entry & 0x3)));
3169 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3170 		break;
3171 
3172 	case ixgbe_mac_82599EB:
3173 	case ixgbe_mac_X540:
3174 	case ixgbe_mac_X550:
3175 	case ixgbe_mac_X550EM_a:
3176 	case ixgbe_mac_X550EM_x:
3177 		if (type == -1) { /* MISC IVAR */
3178 			index = (entry & 1) * 8;
3179 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3180 			ivar &= ~(0xFF << index);
3181 			ivar |= (vector << index);
3182 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3183 		} else {	/* RX/TX IVARS */
3184 			index = (16 * (entry & 1)) + (8 * type);
3185 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3186 			ivar &= ~(0xFF << index);
3187 			ivar |= (vector << index);
3188 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3189 		}
3190 		/* FALL THROUGH */
3191 	default:
3192 		break;
3193 	}
3194 }
3195 
3196 static boolean_t
3197 ix_sfp_probe(struct ix_softc *sc)
3198 {
3199 	struct ixgbe_hw	*hw = &sc->hw;
3200 
3201 	if (hw->phy.type == ixgbe_phy_nl &&
3202 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3203 		int32_t ret;
3204 
3205 		ret = hw->phy.ops.identify_sfp(hw);
3206 		if (ret)
3207 			return FALSE;
3208 
3209 		ret = hw->phy.ops.reset(hw);
3210 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3211 			if_printf(&sc->arpcom.ac_if,
3212 			     "Unsupported SFP+ module detected!  "
3213 			     "Reload driver with supported module.\n");
3214 			sc->sfp_probe = FALSE;
3215 			return FALSE;
3216 		}
3217 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3218 
3219 		/* We now have supported optics */
3220 		sc->sfp_probe = FALSE;
3221 
3222 		return TRUE;
3223 	}
3224 	return FALSE;
3225 }
3226 
3227 static void
3228 ix_handle_link(struct ix_softc *sc)
3229 {
3230 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3231 	ix_update_link_status(sc);
3232 }
3233 
3234 /*
3235  * Handling SFP module
3236  */
3237 static void
3238 ix_handle_mod(struct ix_softc *sc)
3239 {
3240 	struct ixgbe_hw *hw = &sc->hw;
3241 	uint32_t err;
3242 
3243 	err = hw->phy.ops.identify_sfp(hw);
3244 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3245 		if_printf(&sc->arpcom.ac_if,
3246 		    "Unsupported SFP+ module type was detected.\n");
3247 		return;
3248 	}
3249 	err = hw->mac.ops.setup_sfp(hw);
3250 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3251 		if_printf(&sc->arpcom.ac_if,
3252 		    "Setup failure - unsupported SFP+ module type.\n");
3253 		return;
3254 	}
3255 	ix_handle_msf(sc);
3256 }
3257 
3258 /*
3259  * Handling MSF (multispeed fiber)
3260  */
3261 static void
3262 ix_handle_msf(struct ix_softc *sc)
3263 {
3264 	struct ixgbe_hw *hw = &sc->hw;
3265 	uint32_t autoneg;
3266 
3267 	hw->phy.ops.identify_sfp(hw);
3268 	ix_init_media(sc);
3269 
3270 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3271 		autoneg = sc->advspeed;
3272 	else
3273 		autoneg = hw->phy.autoneg_advertised;
3274 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3275 		bool negotiate;
3276 
3277 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3278 	}
3279 	if (hw->mac.ops.setup_link != NULL)
3280 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3281 }
3282 
3283 static void
3284 ix_handle_phy(struct ix_softc *sc)
3285 {
3286 	struct ixgbe_hw *hw = &sc->hw;
3287 	int error;
3288 
3289 	error = hw->phy.ops.handle_lasi(hw);
3290 	if (error == IXGBE_ERR_OVERTEMP) {
3291 		if_printf(&sc->arpcom.ac_if,
3292 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3293 		    "PHY will downshift to lower power state!\n");
3294 	} else if (error) {
3295 		if_printf(&sc->arpcom.ac_if,
3296 		    "Error handling LASI interrupt: %d\n", error);
3297 	}
3298 }
3299 
3300 static void
3301 ix_update_stats(struct ix_softc *sc)
3302 {
3303 	struct ifnet *ifp = &sc->arpcom.ac_if;
3304 	struct ixgbe_hw *hw = &sc->hw;
3305 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3306 	uint64_t total_missed_rx = 0;
3307 	int i;
3308 
3309 	sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3310 	sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3311 	sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3312 	sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3313 
3314 	for (i = 0; i < 16; i++) {
3315 		sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3316 		sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3317 		sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3318 	}
3319 	sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3320 	sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3321 	sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3322 
3323 	/* Hardware workaround, gprc counts missed packets */
3324 	sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3325 	sc->stats.gprc -= missed_rx;
3326 
3327 	if (hw->mac.type != ixgbe_mac_82598EB) {
3328 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3329 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3330 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3331 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3332 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3333 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3334 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3335 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3336 	} else {
3337 		sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3338 		sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3339 		/* 82598 only has a counter in the high register */
3340 		sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3341 		sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3342 		sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3343 	}
3344 
3345 	/*
3346 	 * Workaround: mprc hardware is incorrectly counting
3347 	 * broadcasts, so for now we subtract those.
3348 	 */
3349 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3350 	sc->stats.bprc += bprc;
3351 	sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3352 	if (hw->mac.type == ixgbe_mac_82598EB)
3353 		sc->stats.mprc -= bprc;
3354 
3355 	sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3356 	sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3357 	sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3358 	sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3359 	sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3360 	sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3361 
3362 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3363 	sc->stats.lxontxc += lxon;
3364 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3365 	sc->stats.lxofftxc += lxoff;
3366 	total = lxon + lxoff;
3367 
3368 	sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3369 	sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3370 	sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3371 	sc->stats.gptc -= total;
3372 	sc->stats.mptc -= total;
3373 	sc->stats.ptc64 -= total;
3374 	sc->stats.gotc -= total * ETHER_MIN_LEN;
3375 
3376 	sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3377 	sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3378 	sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3379 	sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3380 	sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3381 	sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3382 	sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3383 	sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3384 	sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3385 	sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3386 	sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3387 	sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3388 	sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3389 	sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3390 	sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3391 	sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3392 	sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3393 	sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3394 	/* Only read FCOE on 82599 */
3395 	if (hw->mac.type != ixgbe_mac_82598EB) {
3396 		sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3397 		sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3398 		sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3399 		sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3400 		sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3401 	}
3402 
3403 	/* Rx Errors */
3404 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3405 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3406 }
3407 
3408 #if 0
3409 /*
3410  * Add sysctl variables, one per statistic, to the system.
3411  */
3412 static void
3413 ix_add_hw_stats(struct ix_softc *sc)
3414 {
3415 
3416 	device_t dev = sc->dev;
3417 
3418 	struct ix_tx_ring *txr = sc->tx_rings;
3419 	struct ix_rx_ring *rxr = sc->rx_rings;
3420 
3421 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3422 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3423 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3424 	struct ixgbe_hw_stats *stats = &sc->stats;
3425 
3426 	struct sysctl_oid *stat_node, *queue_node;
3427 	struct sysctl_oid_list *stat_list, *queue_list;
3428 
3429 #define QUEUE_NAME_LEN 32
3430 	char namebuf[QUEUE_NAME_LEN];
3431 
3432 	/* MAC stats get the own sub node */
3433 
3434 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3435 				    CTLFLAG_RD, NULL, "MAC Statistics");
3436 	stat_list = SYSCTL_CHILDREN(stat_node);
3437 
3438 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3439 			CTLFLAG_RD, &stats->crcerrs,
3440 			"CRC Errors");
3441 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3442 			CTLFLAG_RD, &stats->illerrc,
3443 			"Illegal Byte Errors");
3444 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3445 			CTLFLAG_RD, &stats->errbc,
3446 			"Byte Errors");
3447 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3448 			CTLFLAG_RD, &stats->mspdc,
3449 			"MAC Short Packets Discarded");
3450 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3451 			CTLFLAG_RD, &stats->mlfc,
3452 			"MAC Local Faults");
3453 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3454 			CTLFLAG_RD, &stats->mrfc,
3455 			"MAC Remote Faults");
3456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3457 			CTLFLAG_RD, &stats->rlec,
3458 			"Receive Length Errors");
3459 
3460 	/* Flow Control stats */
3461 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3462 			CTLFLAG_RD, &stats->lxontxc,
3463 			"Link XON Transmitted");
3464 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3465 			CTLFLAG_RD, &stats->lxonrxc,
3466 			"Link XON Received");
3467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3468 			CTLFLAG_RD, &stats->lxofftxc,
3469 			"Link XOFF Transmitted");
3470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3471 			CTLFLAG_RD, &stats->lxoffrxc,
3472 			"Link XOFF Received");
3473 
3474 	/* Packet Reception Stats */
3475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3476 			CTLFLAG_RD, &stats->tor,
3477 			"Total Octets Received");
3478 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3479 			CTLFLAG_RD, &stats->gorc,
3480 			"Good Octets Received");
3481 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3482 			CTLFLAG_RD, &stats->tpr,
3483 			"Total Packets Received");
3484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3485 			CTLFLAG_RD, &stats->gprc,
3486 			"Good Packets Received");
3487 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3488 			CTLFLAG_RD, &stats->mprc,
3489 			"Multicast Packets Received");
3490 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3491 			CTLFLAG_RD, &stats->bprc,
3492 			"Broadcast Packets Received");
3493 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3494 			CTLFLAG_RD, &stats->prc64,
3495 			"64 byte frames received ");
3496 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3497 			CTLFLAG_RD, &stats->prc127,
3498 			"65-127 byte frames received");
3499 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3500 			CTLFLAG_RD, &stats->prc255,
3501 			"128-255 byte frames received");
3502 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3503 			CTLFLAG_RD, &stats->prc511,
3504 			"256-511 byte frames received");
3505 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3506 			CTLFLAG_RD, &stats->prc1023,
3507 			"512-1023 byte frames received");
3508 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3509 			CTLFLAG_RD, &stats->prc1522,
3510 			"1023-1522 byte frames received");
3511 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3512 			CTLFLAG_RD, &stats->ruc,
3513 			"Receive Undersized");
3514 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3515 			CTLFLAG_RD, &stats->rfc,
3516 			"Fragmented Packets Received ");
3517 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3518 			CTLFLAG_RD, &stats->roc,
3519 			"Oversized Packets Received");
3520 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3521 			CTLFLAG_RD, &stats->rjc,
3522 			"Received Jabber");
3523 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3524 			CTLFLAG_RD, &stats->mngprc,
3525 			"Management Packets Received");
3526 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3527 			CTLFLAG_RD, &stats->mngptc,
3528 			"Management Packets Dropped");
3529 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3530 			CTLFLAG_RD, &stats->xec,
3531 			"Checksum Errors");
3532 
3533 	/* Packet Transmission Stats */
3534 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3535 			CTLFLAG_RD, &stats->gotc,
3536 			"Good Octets Transmitted");
3537 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3538 			CTLFLAG_RD, &stats->tpt,
3539 			"Total Packets Transmitted");
3540 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3541 			CTLFLAG_RD, &stats->gptc,
3542 			"Good Packets Transmitted");
3543 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3544 			CTLFLAG_RD, &stats->bptc,
3545 			"Broadcast Packets Transmitted");
3546 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3547 			CTLFLAG_RD, &stats->mptc,
3548 			"Multicast Packets Transmitted");
3549 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3550 			CTLFLAG_RD, &stats->mngptc,
3551 			"Management Packets Transmitted");
3552 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3553 			CTLFLAG_RD, &stats->ptc64,
3554 			"64 byte frames transmitted ");
3555 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3556 			CTLFLAG_RD, &stats->ptc127,
3557 			"65-127 byte frames transmitted");
3558 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3559 			CTLFLAG_RD, &stats->ptc255,
3560 			"128-255 byte frames transmitted");
3561 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3562 			CTLFLAG_RD, &stats->ptc511,
3563 			"256-511 byte frames transmitted");
3564 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3565 			CTLFLAG_RD, &stats->ptc1023,
3566 			"512-1023 byte frames transmitted");
3567 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3568 			CTLFLAG_RD, &stats->ptc1522,
3569 			"1024-1522 byte frames transmitted");
3570 }
3571 #endif
3572 
3573 /*
3574  * Enable the hardware to drop packets when the buffer is full.
3575  * This is useful when multiple RX rings are used, so that no
3576  * single RX ring being full stalls the entire RX engine.  We
3577  * only enable this when multiple RX rings are used and when
3578  * flow control is disabled.
3579  */
3580 static void
3581 ix_enable_rx_drop(struct ix_softc *sc)
3582 {
3583 	struct ixgbe_hw *hw = &sc->hw;
3584 	int i;
3585 
3586 	if (bootverbose) {
3587 		if_printf(&sc->arpcom.ac_if,
3588 		    "flow control %s, enable RX drop\n",
3589 		    ix_fc2str(sc->hw.fc.current_mode));
3590 	}
3591 
3592 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3593 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3594 
3595 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3596 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3597 	}
3598 }
3599 
3600 static void
3601 ix_disable_rx_drop(struct ix_softc *sc)
3602 {
3603 	struct ixgbe_hw *hw = &sc->hw;
3604 	int i;
3605 
3606 	if (bootverbose) {
3607 		if_printf(&sc->arpcom.ac_if,
3608 		    "flow control %s, disable RX drop\n",
3609 		    ix_fc2str(sc->hw.fc.current_mode));
3610 	}
3611 
3612 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3613 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3614 
3615 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3616 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3617 	}
3618 }
3619 
3620 static void
3621 ix_setup_serialize(struct ix_softc *sc)
3622 {
3623 	int i = 0, j;
3624 
3625 	/* Main + RX + TX */
3626 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3627 	sc->serializes =
3628 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3629 	        M_DEVBUF, M_WAITOK | M_ZERO);
3630 
3631 	/*
3632 	 * Setup serializes
3633 	 *
3634 	 * NOTE: Order is critical
3635 	 */
3636 
3637 	KKASSERT(i < sc->nserialize);
3638 	sc->serializes[i++] = &sc->main_serialize;
3639 
3640 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3641 		KKASSERT(i < sc->nserialize);
3642 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3643 	}
3644 
3645 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3646 		KKASSERT(i < sc->nserialize);
3647 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3648 	}
3649 
3650 	KKASSERT(i == sc->nserialize);
3651 }
3652 
3653 static int
3654 ix_alloc_intr(struct ix_softc *sc)
3655 {
3656 	struct ix_intr_data *intr;
3657 	u_int intr_flags;
3658 
3659 	ix_alloc_msix(sc);
3660 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3661 		ix_set_ring_inuse(sc, FALSE);
3662 		return 0;
3663 	}
3664 
3665 	if (sc->intr_data != NULL)
3666 		kfree(sc->intr_data, M_DEVBUF);
3667 
3668 	sc->intr_cnt = 1;
3669 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3670 	    M_WAITOK | M_ZERO);
3671 	intr = &sc->intr_data[0];
3672 
3673 	/*
3674 	 * Allocate MSI/legacy interrupt resource
3675 	 */
3676 	sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3677 	    &intr->intr_rid, &intr_flags);
3678 
3679 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3680 	    &intr->intr_rid, intr_flags);
3681 	if (intr->intr_res == NULL) {
3682 		device_printf(sc->dev, "Unable to allocate bus resource: "
3683 		    "interrupt\n");
3684 		return ENXIO;
3685 	}
3686 
3687 	intr->intr_serialize = &sc->main_serialize;
3688 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3689 	intr->intr_func = ix_intr;
3690 	intr->intr_funcarg = sc;
3691 	intr->intr_rate = IX_INTR_RATE;
3692 	intr->intr_use = IX_INTR_USE_RXTX;
3693 
3694 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3695 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3696 
3697 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3698 
3699 	ix_set_ring_inuse(sc, FALSE);
3700 
3701 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3702 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3703 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3704 
3705 	return 0;
3706 }
3707 
3708 static void
3709 ix_free_intr(struct ix_softc *sc)
3710 {
3711 	if (sc->intr_data == NULL)
3712 		return;
3713 
3714 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3715 		struct ix_intr_data *intr = &sc->intr_data[0];
3716 
3717 		KKASSERT(sc->intr_cnt == 1);
3718 		if (intr->intr_res != NULL) {
3719 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3720 			    intr->intr_rid, intr->intr_res);
3721 		}
3722 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3723 			pci_release_msi(sc->dev);
3724 
3725 		kfree(sc->intr_data, M_DEVBUF);
3726 	} else {
3727 		ix_free_msix(sc, TRUE);
3728 	}
3729 }
3730 
3731 static void
3732 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3733 {
3734 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3735 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3736 	if (bootverbose) {
3737 		if_printf(&sc->arpcom.ac_if,
3738 		    "RX rings %d/%d, TX rings %d/%d\n",
3739 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
3740 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
3741 	}
3742 }
3743 
3744 static int
3745 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3746 {
3747 	if (!IX_ENABLE_HWRSS(sc))
3748 		return 1;
3749 
3750 	if (polling)
3751 		return sc->rx_ring_cnt;
3752 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3753 		return IX_MIN_RXRING_RSS;
3754 	else
3755 		return sc->rx_ring_msix;
3756 }
3757 
3758 static int
3759 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3760 {
3761 	if (!IX_ENABLE_HWTSS(sc))
3762 		return 1;
3763 
3764 	if (polling)
3765 		return sc->tx_ring_cnt;
3766 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3767 		return 1;
3768 	else
3769 		return sc->tx_ring_msix;
3770 }
3771 
3772 static int
3773 ix_setup_intr(struct ix_softc *sc)
3774 {
3775 	int i;
3776 
3777 	for (i = 0; i < sc->intr_cnt; ++i) {
3778 		struct ix_intr_data *intr = &sc->intr_data[i];
3779 		int error;
3780 
3781 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3782 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3783 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3784 		if (error) {
3785 			device_printf(sc->dev, "can't setup %dth intr\n", i);
3786 			ix_teardown_intr(sc, i);
3787 			return error;
3788 		}
3789 	}
3790 	return 0;
3791 }
3792 
3793 static void
3794 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3795 {
3796 	int i;
3797 
3798 	if (sc->intr_data == NULL)
3799 		return;
3800 
3801 	for (i = 0; i < intr_cnt; ++i) {
3802 		struct ix_intr_data *intr = &sc->intr_data[i];
3803 
3804 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3805 	}
3806 }
3807 
3808 static void
3809 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3810 {
3811 	struct ix_softc *sc = ifp->if_softc;
3812 
3813 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3814 }
3815 
3816 static void
3817 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3818 {
3819 	struct ix_softc *sc = ifp->if_softc;
3820 
3821 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3822 }
3823 
3824 static int
3825 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3826 {
3827 	struct ix_softc *sc = ifp->if_softc;
3828 
3829 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3830 }
3831 
3832 #ifdef INVARIANTS
3833 
3834 static void
3835 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3836     boolean_t serialized)
3837 {
3838 	struct ix_softc *sc = ifp->if_softc;
3839 
3840 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3841 	    serialized);
3842 }
3843 
3844 #endif	/* INVARIANTS */
3845 
3846 static void
3847 ix_free_rings(struct ix_softc *sc)
3848 {
3849 	int i;
3850 
3851 	if (sc->tx_rings != NULL) {
3852 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
3853 			struct ix_tx_ring *txr = &sc->tx_rings[i];
3854 
3855 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
3856 		}
3857 		kfree(sc->tx_rings, M_DEVBUF);
3858 	}
3859 
3860 	if (sc->rx_rings != NULL) {
3861 		for (i =0; i < sc->rx_ring_cnt; ++i) {
3862 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
3863 
3864 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3865 		}
3866 		kfree(sc->rx_rings, M_DEVBUF);
3867 	}
3868 
3869 	if (sc->parent_tag != NULL)
3870 		bus_dma_tag_destroy(sc->parent_tag);
3871 }
3872 
3873 static void
3874 ix_watchdog(struct ifaltq_subque *ifsq)
3875 {
3876 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3877 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3878 	struct ix_softc *sc = ifp->if_softc;
3879 	int i;
3880 
3881 	KKASSERT(txr->tx_ifsq == ifsq);
3882 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3883 
3884 	/*
3885 	 * If the interface has been paused then don't do the watchdog check
3886 	 */
3887 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3888 		txr->tx_watchdog.wd_timer = 5;
3889 		return;
3890 	}
3891 
3892 	if_printf(ifp, "Watchdog timeout -- resetting\n");
3893 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3894 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3895 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3896 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3897 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3898 
3899 	ix_init(sc);
3900 	for (i = 0; i < sc->tx_ring_inuse; ++i)
3901 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3902 }
3903 
3904 static void
3905 ix_free_tx_ring(struct ix_tx_ring *txr)
3906 {
3907 	int i;
3908 
3909 	for (i = 0; i < txr->tx_ndesc; ++i) {
3910 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3911 
3912 		if (txbuf->m_head != NULL) {
3913 			bus_dmamap_unload(txr->tx_tag, txbuf->map);
3914 			m_freem(txbuf->m_head);
3915 			txbuf->m_head = NULL;
3916 		}
3917 	}
3918 }
3919 
3920 static void
3921 ix_free_rx_ring(struct ix_rx_ring *rxr)
3922 {
3923 	int i;
3924 
3925 	for (i = 0; i < rxr->rx_ndesc; ++i) {
3926 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3927 
3928 		if (rxbuf->fmp != NULL) {
3929 			m_freem(rxbuf->fmp);
3930 			rxbuf->fmp = NULL;
3931 			rxbuf->lmp = NULL;
3932 		} else {
3933 			KKASSERT(rxbuf->lmp == NULL);
3934 		}
3935 		if (rxbuf->m_head != NULL) {
3936 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3937 			m_freem(rxbuf->m_head);
3938 			rxbuf->m_head = NULL;
3939 		}
3940 	}
3941 }
3942 
3943 static int
3944 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3945 {
3946 	struct mbuf *m;
3947 	bus_dma_segment_t seg;
3948 	bus_dmamap_t map;
3949 	struct ix_rx_buf *rxbuf;
3950 	int flags, error, nseg;
3951 
3952 	flags = M_NOWAIT;
3953 	if (__predict_false(wait))
3954 		flags = M_WAITOK;
3955 
3956 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3957 	if (m == NULL) {
3958 		if (wait) {
3959 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3960 			    "Unable to allocate RX mbuf\n");
3961 		}
3962 		return ENOBUFS;
3963 	}
3964 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3965 
3966 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3967 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3968 	if (error) {
3969 		m_freem(m);
3970 		if (wait) {
3971 			if_printf(&rxr->rx_sc->arpcom.ac_if,
3972 			    "Unable to load RX mbuf\n");
3973 		}
3974 		return error;
3975 	}
3976 
3977 	rxbuf = &rxr->rx_buf[i];
3978 	if (rxbuf->m_head != NULL)
3979 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3980 
3981 	map = rxbuf->map;
3982 	rxbuf->map = rxr->rx_sparemap;
3983 	rxr->rx_sparemap = map;
3984 
3985 	rxbuf->m_head = m;
3986 	rxbuf->paddr = seg.ds_addr;
3987 
3988 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3989 	return 0;
3990 }
3991 
3992 static void
3993 ix_add_sysctl(struct ix_softc *sc)
3994 {
3995 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3996 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
3997 #ifdef IX_RSS_DEBUG
3998 	char node[32];
3999 	int i;
4000 #endif
4001 
4002 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4003 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4004 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4005 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4006 	    "# of RX rings used");
4007 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4008 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4009 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4010 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4011 	    "# of TX rings used");
4012 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4013 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4014 	    sc, 0, ix_sysctl_rxd, "I",
4015 	    "# of RX descs");
4016 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4017 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4018 	    sc, 0, ix_sysctl_txd, "I",
4019 	    "# of TX descs");
4020 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4021 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4022 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4023 	    "# of segments sent before write to hardware register");
4024 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4025 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4026 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4027 	    "# of received segments sent before write to hardware register");
4028 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4029 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4030 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4031 	    "# of segments per TX interrupt");
4032 
4033 #ifdef IFPOLL_ENABLE
4034 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4035 	    OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4036 	    sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4037 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4038 	    OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4039 	    sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4040 #endif
4041 
4042 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4043 do { \
4044 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4045 	    ix_sysctl_##name, #use " interrupt rate"); \
4046 } while (0)
4047 
4048 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4049 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4050 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4051 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4052 
4053 #undef IX_ADD_INTR_RATE_SYSCTL
4054 
4055 #ifdef IX_RSS_DEBUG
4056 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4057 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4058 	    "RSS debug level");
4059 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4060 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4061 		SYSCTL_ADD_ULONG(ctx,
4062 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4063 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4064 	}
4065 #endif
4066 
4067 #if 0
4068 	ix_add_hw_stats(sc);
4069 #endif
4070 
4071 }
4072 
4073 static int
4074 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4075 {
4076 	struct ix_softc *sc = (void *)arg1;
4077 	struct ifnet *ifp = &sc->arpcom.ac_if;
4078 	int error, nsegs, i;
4079 
4080 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4081 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4082 	if (error || req->newptr == NULL)
4083 		return error;
4084 	if (nsegs < 0)
4085 		return EINVAL;
4086 
4087 	ifnet_serialize_all(ifp);
4088 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4089 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4090 	ifnet_deserialize_all(ifp);
4091 
4092 	return 0;
4093 }
4094 
4095 static int
4096 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4097 {
4098 	struct ix_softc *sc = (void *)arg1;
4099 	struct ifnet *ifp = &sc->arpcom.ac_if;
4100 	int error, nsegs, i;
4101 
4102 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4103 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4104 	if (error || req->newptr == NULL)
4105 		return error;
4106 	if (nsegs < 0)
4107 		return EINVAL;
4108 
4109 	ifnet_serialize_all(ifp);
4110 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4111 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4112 	ifnet_deserialize_all(ifp);
4113 
4114 	return 0;
4115 }
4116 
4117 static int
4118 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4119 {
4120 	struct ix_softc *sc = (void *)arg1;
4121 	int txd;
4122 
4123 	txd = sc->tx_rings[0].tx_ndesc;
4124 	return sysctl_handle_int(oidp, &txd, 0, req);
4125 }
4126 
4127 static int
4128 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4129 {
4130 	struct ix_softc *sc = (void *)arg1;
4131 	int rxd;
4132 
4133 	rxd = sc->rx_rings[0].rx_ndesc;
4134 	return sysctl_handle_int(oidp, &rxd, 0, req);
4135 }
4136 
4137 static int
4138 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4139 {
4140 	struct ix_softc *sc = (void *)arg1;
4141 	struct ifnet *ifp = &sc->arpcom.ac_if;
4142 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4143 	int error, nsegs;
4144 
4145 	nsegs = txr->tx_intr_nsegs;
4146 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4147 	if (error || req->newptr == NULL)
4148 		return error;
4149 	if (nsegs < 0)
4150 		return EINVAL;
4151 
4152 	ifnet_serialize_all(ifp);
4153 
4154 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4155 		error = EINVAL;
4156 	} else {
4157 		int i;
4158 
4159 		error = 0;
4160 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4161 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4162 	}
4163 
4164 	ifnet_deserialize_all(ifp);
4165 
4166 	return error;
4167 }
4168 
4169 static void
4170 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4171 {
4172 	uint32_t eitr, eitr_intvl;
4173 
4174 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4175 	eitr_intvl = 1000000000 / 256 / rate;
4176 
4177 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4178 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4179 		if (eitr_intvl == 0)
4180 			eitr_intvl = 1;
4181 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4182 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4183 	} else {
4184 		eitr &= ~IX_EITR_INTVL_MASK;
4185 
4186 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4187 		if (eitr_intvl == 0)
4188 			eitr_intvl = IX_EITR_INTVL_MIN;
4189 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4190 			eitr_intvl = IX_EITR_INTVL_MAX;
4191 	}
4192 	eitr |= eitr_intvl;
4193 
4194 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4195 }
4196 
4197 static int
4198 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4199 {
4200 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4201 }
4202 
4203 static int
4204 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4205 {
4206 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4207 }
4208 
4209 static int
4210 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4211 {
4212 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4213 }
4214 
4215 static int
4216 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4217 {
4218 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4219 }
4220 
4221 static int
4222 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4223 {
4224 	struct ix_softc *sc = (void *)arg1;
4225 	struct ifnet *ifp = &sc->arpcom.ac_if;
4226 	int error, rate, i;
4227 
4228 	rate = 0;
4229 	for (i = 0; i < sc->intr_cnt; ++i) {
4230 		if (sc->intr_data[i].intr_use == use) {
4231 			rate = sc->intr_data[i].intr_rate;
4232 			break;
4233 		}
4234 	}
4235 
4236 	error = sysctl_handle_int(oidp, &rate, 0, req);
4237 	if (error || req->newptr == NULL)
4238 		return error;
4239 	if (rate <= 0)
4240 		return EINVAL;
4241 
4242 	ifnet_serialize_all(ifp);
4243 
4244 	for (i = 0; i < sc->intr_cnt; ++i) {
4245 		if (sc->intr_data[i].intr_use == use) {
4246 			sc->intr_data[i].intr_rate = rate;
4247 			if (ifp->if_flags & IFF_RUNNING)
4248 				ix_set_eitr(sc, i, rate);
4249 		}
4250 	}
4251 
4252 	ifnet_deserialize_all(ifp);
4253 
4254 	return error;
4255 }
4256 
4257 static void
4258 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4259     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4260 {
4261 	int i;
4262 
4263 	for (i = 0; i < sc->intr_cnt; ++i) {
4264 		if (sc->intr_data[i].intr_use == use) {
4265 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4266 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4267 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4268 			    sc, 0, handler, "I", desc);
4269 			break;
4270 		}
4271 	}
4272 }
4273 
4274 static void
4275 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4276 {
4277 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4278 		sc->timer_cpuid = 0; /* XXX fixed */
4279 	else
4280 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4281 }
4282 
4283 static void
4284 ix_alloc_msix(struct ix_softc *sc)
4285 {
4286 	int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4287 	struct ix_intr_data *intr;
4288 	int i, x, error;
4289 	int offset, offset_def, agg_rxtx, ring_max;
4290 	boolean_t aggregate, setup = FALSE;
4291 
4292 	msix_enable = ix_msix_enable;
4293 	/*
4294 	 * Don't enable MSI-X on 82598 by default, see:
4295 	 * 82598 specification update errata #38
4296 	 */
4297 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4298 		msix_enable = 0;
4299 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4300 	if (!msix_enable)
4301 		return;
4302 
4303 	msix_cnt = pci_msix_count(sc->dev);
4304 #ifdef IX_MSIX_DEBUG
4305 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4306 #endif
4307 	if (msix_cnt <= 1) {
4308 		/* One MSI-X model does not make sense */
4309 		return;
4310 	}
4311 
4312 	i = 0;
4313 	while ((1 << (i + 1)) <= msix_cnt)
4314 		++i;
4315 	msix_cnt2 = 1 << i;
4316 
4317 	if (bootverbose) {
4318 		device_printf(sc->dev, "MSI-X count %d/%d\n",
4319 		    msix_cnt2, msix_cnt);
4320 	}
4321 
4322 	KKASSERT(msix_cnt >= msix_cnt2);
4323 	if (msix_cnt == msix_cnt2) {
4324 		/* We need at least one MSI-X for link status */
4325 		msix_cnt2 >>= 1;
4326 		if (msix_cnt2 <= 1) {
4327 			/* One MSI-X for RX/TX does not make sense */
4328 			device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4329 			    "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4330 			return;
4331 		}
4332 		KKASSERT(msix_cnt > msix_cnt2);
4333 
4334 		if (bootverbose) {
4335 			device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4336 			    msix_cnt2, msix_cnt);
4337 		}
4338 	}
4339 
4340 	/*
4341 	 * Make sure that we don't break interrupt related registers
4342 	 * (EIMS, etc) limitation.
4343 	 *
4344 	 * NOTE: msix_cnt > msix_cnt2, when we reach here
4345 	 */
4346 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4347 		if (msix_cnt2 > IX_MAX_MSIX_82598)
4348 			msix_cnt2 = IX_MAX_MSIX_82598;
4349 	} else {
4350 		if (msix_cnt2 > IX_MAX_MSIX)
4351 			msix_cnt2 = IX_MAX_MSIX;
4352 	}
4353 	msix_cnt = msix_cnt2 + 1;	/* +1 for status */
4354 
4355 	if (bootverbose) {
4356 		device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4357 		    msix_cnt2, msix_cnt);
4358 	}
4359 
4360 	sc->rx_ring_msix = sc->rx_ring_cnt;
4361 	if (sc->rx_ring_msix > msix_cnt2)
4362 		sc->rx_ring_msix = msix_cnt2;
4363 
4364 	sc->tx_ring_msix = sc->tx_ring_cnt;
4365 	if (sc->tx_ring_msix > msix_cnt2)
4366 		sc->tx_ring_msix = msix_cnt2;
4367 
4368 	ring_max = sc->rx_ring_msix;
4369 	if (ring_max < sc->tx_ring_msix)
4370 		ring_max = sc->tx_ring_msix;
4371 
4372 	/* Allow user to force independent RX/TX MSI-X handling */
4373 	agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4374 	    ix_msix_agg_rxtx);
4375 
4376 	if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4377 		/*
4378 		 * Independent TX/RX MSI-X
4379 		 */
4380 		aggregate = FALSE;
4381 		if (bootverbose)
4382 			device_printf(sc->dev, "independent TX/RX MSI-X\n");
4383 		alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4384 	} else {
4385 		/*
4386 		 * Aggregate TX/RX MSI-X
4387 		 */
4388 		aggregate = TRUE;
4389 		if (bootverbose)
4390 			device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4391 		alloc_cnt = msix_cnt2;
4392 		if (alloc_cnt > ring_max)
4393 			alloc_cnt = ring_max;
4394 		KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4395 		    alloc_cnt >= sc->tx_ring_msix);
4396 	}
4397 	++alloc_cnt;	/* For status */
4398 
4399 	if (bootverbose) {
4400 		device_printf(sc->dev, "MSI-X alloc %d, "
4401 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4402 		    sc->rx_ring_msix, sc->tx_ring_msix);
4403 	}
4404 
4405 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4406 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4407 	    &sc->msix_mem_rid, RF_ACTIVE);
4408 	if (sc->msix_mem_res == NULL) {
4409 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4410 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4411 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4412 		if (sc->msix_mem_res == NULL) {
4413 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4414 			return;
4415 		}
4416 	}
4417 
4418 	sc->intr_cnt = alloc_cnt;
4419 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4420 	    M_DEVBUF, M_WAITOK | M_ZERO);
4421 	for (x = 0; x < sc->intr_cnt; ++x) {
4422 		intr = &sc->intr_data[x];
4423 		intr->intr_rid = -1;
4424 		intr->intr_rate = IX_INTR_RATE;
4425 	}
4426 
4427 	x = 0;
4428 	if (!aggregate) {
4429 		/*
4430 		 * RX rings
4431 		 */
4432 		if (sc->rx_ring_msix == ncpus2) {
4433 			offset = 0;
4434 		} else {
4435 			offset_def = (sc->rx_ring_msix *
4436 			    device_get_unit(sc->dev)) % ncpus2;
4437 
4438 			offset = device_getenv_int(sc->dev,
4439 			    "msix.rxoff", offset_def);
4440 			if (offset >= ncpus2 ||
4441 			    offset % sc->rx_ring_msix != 0) {
4442 				device_printf(sc->dev,
4443 				    "invalid msix.rxoff %d, use %d\n",
4444 				    offset, offset_def);
4445 				offset = offset_def;
4446 			}
4447 		}
4448 		ix_conf_rx_msix(sc, 0, &x, offset);
4449 
4450 		/*
4451 		 * TX rings
4452 		 */
4453 		if (sc->tx_ring_msix == ncpus2) {
4454 			offset = 0;
4455 		} else {
4456 			offset_def = (sc->tx_ring_msix *
4457 			    device_get_unit(sc->dev)) % ncpus2;
4458 
4459 			offset = device_getenv_int(sc->dev,
4460 			    "msix.txoff", offset_def);
4461 			if (offset >= ncpus2 ||
4462 			    offset % sc->tx_ring_msix != 0) {
4463 				device_printf(sc->dev,
4464 				    "invalid msix.txoff %d, use %d\n",
4465 				    offset, offset_def);
4466 				offset = offset_def;
4467 			}
4468 		}
4469 		ix_conf_tx_msix(sc, 0, &x, offset);
4470 	} else {
4471 		int ring_agg;
4472 
4473 		ring_agg = sc->rx_ring_msix;
4474 		if (ring_agg > sc->tx_ring_msix)
4475 			ring_agg = sc->tx_ring_msix;
4476 
4477 		if (ring_max == ncpus2) {
4478 			offset = 0;
4479 		} else {
4480 			offset_def = (ring_max * device_get_unit(sc->dev)) %
4481 			    ncpus2;
4482 
4483 			offset = device_getenv_int(sc->dev, "msix.off",
4484 			    offset_def);
4485 			if (offset >= ncpus2 || offset % ring_max != 0) {
4486 				device_printf(sc->dev,
4487 				    "invalid msix.off %d, use %d\n",
4488 				    offset, offset_def);
4489 				offset = offset_def;
4490 			}
4491 		}
4492 
4493 		for (i = 0; i < ring_agg; ++i) {
4494 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4495 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4496 
4497 			KKASSERT(x < sc->intr_cnt);
4498 			rxr->rx_intr_vec = x;
4499 			ix_setup_msix_eims(sc, x,
4500 			    &rxr->rx_eims, &rxr->rx_eims_val);
4501 			rxr->rx_txr = txr;
4502 			/* NOTE: Leave TX ring's intr_vec negative */
4503 
4504 			intr = &sc->intr_data[x++];
4505 
4506 			intr->intr_serialize = &rxr->rx_serialize;
4507 			intr->intr_func = ix_msix_rxtx;
4508 			intr->intr_funcarg = rxr;
4509 			intr->intr_use = IX_INTR_USE_RXTX;
4510 
4511 			intr->intr_cpuid = i + offset;
4512 			KKASSERT(intr->intr_cpuid < ncpus2);
4513 			txr->tx_intr_cpuid = intr->intr_cpuid;
4514 
4515 			ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4516 			    "%s rxtx%d", device_get_nameunit(sc->dev), i);
4517 			intr->intr_desc = intr->intr_desc0;
4518 		}
4519 
4520 		if (ring_agg != ring_max) {
4521 			if (ring_max == sc->tx_ring_msix)
4522 				ix_conf_tx_msix(sc, i, &x, offset);
4523 			else
4524 				ix_conf_rx_msix(sc, i, &x, offset);
4525 		}
4526 	}
4527 
4528 	/*
4529 	 * Status MSI-X
4530 	 */
4531 	KKASSERT(x < sc->intr_cnt);
4532 	sc->sts_msix_vec = x;
4533 
4534 	intr = &sc->intr_data[x++];
4535 
4536 	intr->intr_serialize = &sc->main_serialize;
4537 	intr->intr_func = ix_msix_status;
4538 	intr->intr_funcarg = sc;
4539 	intr->intr_cpuid = 0;
4540 	intr->intr_use = IX_INTR_USE_STATUS;
4541 
4542 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4543 	    device_get_nameunit(sc->dev));
4544 	intr->intr_desc = intr->intr_desc0;
4545 
4546 	KKASSERT(x == sc->intr_cnt);
4547 
4548 	error = pci_setup_msix(sc->dev);
4549 	if (error) {
4550 		device_printf(sc->dev, "Setup MSI-X failed\n");
4551 		goto back;
4552 	}
4553 	setup = TRUE;
4554 
4555 	for (i = 0; i < sc->intr_cnt; ++i) {
4556 		intr = &sc->intr_data[i];
4557 
4558 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4559 		    intr->intr_cpuid);
4560 		if (error) {
4561 			device_printf(sc->dev,
4562 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4563 			    intr->intr_cpuid);
4564 			goto back;
4565 		}
4566 
4567 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4568 		    &intr->intr_rid, RF_ACTIVE);
4569 		if (intr->intr_res == NULL) {
4570 			device_printf(sc->dev,
4571 			    "Unable to allocate MSI-X %d resource\n", i);
4572 			error = ENOMEM;
4573 			goto back;
4574 		}
4575 	}
4576 
4577 	pci_enable_msix(sc->dev);
4578 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4579 back:
4580 	if (error)
4581 		ix_free_msix(sc, setup);
4582 }
4583 
4584 static void
4585 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4586 {
4587 	int i;
4588 
4589 	KKASSERT(sc->intr_cnt > 1);
4590 
4591 	for (i = 0; i < sc->intr_cnt; ++i) {
4592 		struct ix_intr_data *intr = &sc->intr_data[i];
4593 
4594 		if (intr->intr_res != NULL) {
4595 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4596 			    intr->intr_rid, intr->intr_res);
4597 		}
4598 		if (intr->intr_rid >= 0)
4599 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4600 	}
4601 	if (setup)
4602 		pci_teardown_msix(sc->dev);
4603 
4604 	sc->intr_cnt = 0;
4605 	kfree(sc->intr_data, M_DEVBUF);
4606 	sc->intr_data = NULL;
4607 }
4608 
4609 static void
4610 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4611 {
4612 	int x = *x0;
4613 
4614 	for (; i < sc->rx_ring_msix; ++i) {
4615 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4616 		struct ix_intr_data *intr;
4617 
4618 		KKASSERT(x < sc->intr_cnt);
4619 		rxr->rx_intr_vec = x;
4620 		ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4621 
4622 		intr = &sc->intr_data[x++];
4623 
4624 		intr->intr_serialize = &rxr->rx_serialize;
4625 		intr->intr_func = ix_msix_rx;
4626 		intr->intr_funcarg = rxr;
4627 		intr->intr_rate = IX_MSIX_RX_RATE;
4628 		intr->intr_use = IX_INTR_USE_RX;
4629 
4630 		intr->intr_cpuid = i + offset;
4631 		KKASSERT(intr->intr_cpuid < ncpus2);
4632 
4633 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4634 		    device_get_nameunit(sc->dev), i);
4635 		intr->intr_desc = intr->intr_desc0;
4636 	}
4637 	*x0 = x;
4638 }
4639 
4640 static void
4641 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4642 {
4643 	int x = *x0;
4644 
4645 	for (; i < sc->tx_ring_msix; ++i) {
4646 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4647 		struct ix_intr_data *intr;
4648 
4649 		KKASSERT(x < sc->intr_cnt);
4650 		txr->tx_intr_vec = x;
4651 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4652 
4653 		intr = &sc->intr_data[x++];
4654 
4655 		intr->intr_serialize = &txr->tx_serialize;
4656 		intr->intr_func = ix_msix_tx;
4657 		intr->intr_funcarg = txr;
4658 		intr->intr_rate = IX_MSIX_TX_RATE;
4659 		intr->intr_use = IX_INTR_USE_TX;
4660 
4661 		intr->intr_cpuid = i + offset;
4662 		KKASSERT(intr->intr_cpuid < ncpus2);
4663 		txr->tx_intr_cpuid = intr->intr_cpuid;
4664 
4665 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4666 		    device_get_nameunit(sc->dev), i);
4667 		intr->intr_desc = intr->intr_desc0;
4668 	}
4669 	*x0 = x;
4670 }
4671 
4672 static void
4673 ix_msix_rx(void *xrxr)
4674 {
4675 	struct ix_rx_ring *rxr = xrxr;
4676 
4677 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4678 
4679 	ix_rxeof(rxr, -1);
4680 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4681 }
4682 
4683 static void
4684 ix_msix_tx(void *xtxr)
4685 {
4686 	struct ix_tx_ring *txr = xtxr;
4687 
4688 	ASSERT_SERIALIZED(&txr->tx_serialize);
4689 
4690 	ix_txeof(txr, *(txr->tx_hdr));
4691 	if (!ifsq_is_empty(txr->tx_ifsq))
4692 		ifsq_devstart(txr->tx_ifsq);
4693 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4694 }
4695 
4696 static void
4697 ix_msix_rxtx(void *xrxr)
4698 {
4699 	struct ix_rx_ring *rxr = xrxr;
4700 	struct ix_tx_ring *txr;
4701 	int hdr;
4702 
4703 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4704 
4705 	ix_rxeof(rxr, -1);
4706 
4707 	/*
4708 	 * NOTE:
4709 	 * Since tx_next_clean is only changed by ix_txeof(),
4710 	 * which is called only in interrupt handler, the
4711 	 * check w/o holding tx serializer is MPSAFE.
4712 	 */
4713 	txr = rxr->rx_txr;
4714 	hdr = *(txr->tx_hdr);
4715 	if (hdr != txr->tx_next_clean) {
4716 		lwkt_serialize_enter(&txr->tx_serialize);
4717 		ix_txeof(txr, hdr);
4718 		if (!ifsq_is_empty(txr->tx_ifsq))
4719 			ifsq_devstart(txr->tx_ifsq);
4720 		lwkt_serialize_exit(&txr->tx_serialize);
4721 	}
4722 
4723 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4724 }
4725 
4726 static void
4727 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4728 {
4729 	struct ixgbe_hw *hw = &sc->hw;
4730 
4731 	/* Link status change */
4732 	if (eicr & IXGBE_EICR_LSC)
4733 		ix_handle_link(sc);
4734 
4735 	if (hw->mac.type != ixgbe_mac_82598EB) {
4736 		if (eicr & IXGBE_EICR_ECC)
4737 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  Reboot!!\n");
4738 
4739 		/* Check for over temp condition */
4740 		if (eicr & IXGBE_EICR_TS) {
4741 			if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!!  "
4742 			    "PHY IS SHUT DOWN!!  Shutdown!!\n");
4743 		}
4744 	}
4745 
4746 	if (ix_is_sfp(hw)) {
4747 		uint32_t mod_mask;
4748 
4749 		/* Pluggable optics-related interrupt */
4750 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4751 			mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4752 		else
4753 			mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4754 		if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4755 			ix_handle_msf(sc);
4756 		else if (eicr & mod_mask)
4757 			ix_handle_mod(sc);
4758 	}
4759 
4760 	/* Check for fan failure */
4761 	if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4762 	    (eicr & IXGBE_EICR_GPI_SDP1))
4763 		if_printf(&sc->arpcom.ac_if, "FAN FAILURE!!  Replace!!\n");
4764 
4765 	/* External PHY interrupt */
4766 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4767 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
4768 	    	ix_handle_phy(sc);
4769 }
4770 
4771 static void
4772 ix_msix_status(void *xsc)
4773 {
4774 	struct ix_softc *sc = xsc;
4775 	uint32_t eicr;
4776 
4777 	ASSERT_SERIALIZED(&sc->main_serialize);
4778 
4779 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4780 	ix_intr_status(sc, eicr);
4781 
4782 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4783 }
4784 
4785 static void
4786 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4787     uint32_t *eims, uint32_t *eims_val)
4788 {
4789 	if (x < 32) {
4790 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4791 			KASSERT(x < IX_MAX_MSIX_82598,
4792 			    ("%s: invalid vector %d for 82598",
4793 			     device_get_nameunit(sc->dev), x));
4794 			*eims = IXGBE_EIMS;
4795 		} else {
4796 			*eims = IXGBE_EIMS_EX(0);
4797 		}
4798 		*eims_val = 1 << x;
4799 	} else {
4800 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4801 		    device_get_nameunit(sc->dev), x));
4802 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4803 		    ("%s: invalid vector %d for 82598",
4804 		     device_get_nameunit(sc->dev), x));
4805 		*eims = IXGBE_EIMS_EX(1);
4806 		*eims_val = 1 << (x - 32);
4807 	}
4808 }
4809 
4810 #ifdef IFPOLL_ENABLE
4811 
4812 static void
4813 ix_npoll_status(struct ifnet *ifp)
4814 {
4815 	struct ix_softc *sc = ifp->if_softc;
4816 	uint32_t eicr;
4817 
4818 	ASSERT_SERIALIZED(&sc->main_serialize);
4819 
4820 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4821 	ix_intr_status(sc, eicr);
4822 }
4823 
4824 static void
4825 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4826 {
4827 	struct ix_tx_ring *txr = arg;
4828 
4829 	ASSERT_SERIALIZED(&txr->tx_serialize);
4830 
4831 	ix_txeof(txr, *(txr->tx_hdr));
4832 	if (!ifsq_is_empty(txr->tx_ifsq))
4833 		ifsq_devstart(txr->tx_ifsq);
4834 }
4835 
4836 static void
4837 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4838 {
4839 	struct ix_rx_ring *rxr = arg;
4840 
4841 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4842 
4843 	ix_rxeof(rxr, cycle);
4844 }
4845 
4846 static void
4847 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4848 {
4849 	struct ix_softc *sc = ifp->if_softc;
4850 	int i, txr_cnt, rxr_cnt;
4851 
4852 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4853 
4854 	if (info) {
4855 		int off;
4856 
4857 		info->ifpi_status.status_func = ix_npoll_status;
4858 		info->ifpi_status.serializer = &sc->main_serialize;
4859 
4860 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
4861 		off = sc->tx_npoll_off;
4862 		for (i = 0; i < txr_cnt; ++i) {
4863 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4864 			int idx = i + off;
4865 
4866 			KKASSERT(idx < ncpus2);
4867 			info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4868 			info->ifpi_tx[idx].arg = txr;
4869 			info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4870 			ifsq_set_cpuid(txr->tx_ifsq, idx);
4871 		}
4872 
4873 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4874 		off = sc->rx_npoll_off;
4875 		for (i = 0; i < rxr_cnt; ++i) {
4876 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4877 			int idx = i + off;
4878 
4879 			KKASSERT(idx < ncpus2);
4880 			info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4881 			info->ifpi_rx[idx].arg = rxr;
4882 			info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4883 		}
4884 
4885 		if (ifp->if_flags & IFF_RUNNING) {
4886 			if (rxr_cnt == sc->rx_ring_inuse &&
4887 			    txr_cnt == sc->tx_ring_inuse) {
4888 				ix_set_timer_cpuid(sc, TRUE);
4889 				ix_disable_intr(sc);
4890 			} else {
4891 				ix_init(sc);
4892 			}
4893 		}
4894 	} else {
4895 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4896 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4897 
4898 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4899 		}
4900 
4901 		if (ifp->if_flags & IFF_RUNNING) {
4902 			txr_cnt = ix_get_txring_inuse(sc, FALSE);
4903 			rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4904 
4905 			if (rxr_cnt == sc->rx_ring_inuse &&
4906 			    txr_cnt == sc->tx_ring_inuse) {
4907 				ix_set_timer_cpuid(sc, FALSE);
4908 				ix_enable_intr(sc);
4909 			} else {
4910 				ix_init(sc);
4911 			}
4912 		}
4913 	}
4914 }
4915 
4916 static int
4917 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4918 {
4919 	struct ix_softc *sc = (void *)arg1;
4920 	struct ifnet *ifp = &sc->arpcom.ac_if;
4921 	int error, off;
4922 
4923 	off = sc->rx_npoll_off;
4924 	error = sysctl_handle_int(oidp, &off, 0, req);
4925 	if (error || req->newptr == NULL)
4926 		return error;
4927 	if (off < 0)
4928 		return EINVAL;
4929 
4930 	ifnet_serialize_all(ifp);
4931 	if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4932 		error = EINVAL;
4933 	} else {
4934 		error = 0;
4935 		sc->rx_npoll_off = off;
4936 	}
4937 	ifnet_deserialize_all(ifp);
4938 
4939 	return error;
4940 }
4941 
4942 static int
4943 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4944 {
4945 	struct ix_softc *sc = (void *)arg1;
4946 	struct ifnet *ifp = &sc->arpcom.ac_if;
4947 	int error, off;
4948 
4949 	off = sc->tx_npoll_off;
4950 	error = sysctl_handle_int(oidp, &off, 0, req);
4951 	if (error || req->newptr == NULL)
4952 		return error;
4953 	if (off < 0)
4954 		return EINVAL;
4955 
4956 	ifnet_serialize_all(ifp);
4957 	if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4958 		error = EINVAL;
4959 	} else {
4960 		error = 0;
4961 		sc->tx_npoll_off = off;
4962 	}
4963 	ifnet_deserialize_all(ifp);
4964 
4965 	return error;
4966 }
4967 
4968 #endif /* IFPOLL_ENABLE */
4969 
4970 static enum ixgbe_fc_mode
4971 ix_ifmedia2fc(int ifm)
4972 {
4973 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4974 
4975 	switch (fc_opt) {
4976 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4977 		return ixgbe_fc_full;
4978 
4979 	case IFM_ETH_RXPAUSE:
4980 		return ixgbe_fc_rx_pause;
4981 
4982 	case IFM_ETH_TXPAUSE:
4983 		return ixgbe_fc_tx_pause;
4984 
4985 	default:
4986 		return ixgbe_fc_none;
4987 	}
4988 }
4989 
4990 static const char *
4991 ix_ifmedia2str(int ifm)
4992 {
4993 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4994 
4995 	switch (fc_opt) {
4996 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4997 		return IFM_ETH_FC_FULL;
4998 
4999 	case IFM_ETH_RXPAUSE:
5000 		return IFM_ETH_FC_RXPAUSE;
5001 
5002 	case IFM_ETH_TXPAUSE:
5003 		return IFM_ETH_FC_TXPAUSE;
5004 
5005 	default:
5006 		return IFM_ETH_FC_NONE;
5007 	}
5008 }
5009 
5010 static const char *
5011 ix_fc2str(enum ixgbe_fc_mode fc)
5012 {
5013 	switch (fc) {
5014 	case ixgbe_fc_full:
5015 		return IFM_ETH_FC_FULL;
5016 
5017 	case ixgbe_fc_rx_pause:
5018 		return IFM_ETH_FC_RXPAUSE;
5019 
5020 	case ixgbe_fc_tx_pause:
5021 		return IFM_ETH_FC_TXPAUSE;
5022 
5023 	default:
5024 		return IFM_ETH_FC_NONE;
5025 	}
5026 }
5027 
5028 static int
5029 ix_powerdown(struct ix_softc *sc)
5030 {
5031 	struct ixgbe_hw *hw = &sc->hw;
5032 	int error = 0;
5033 
5034 	/* Limit power managment flow to X550EM baseT */
5035 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5036 	    hw->phy.ops.enter_lplu) {
5037 		/* Turn off support for APM wakeup. (Using ACPI instead) */
5038 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
5039 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5040 
5041 		/*
5042 		 * Clear Wake Up Status register to prevent any previous wakeup
5043 		 * events from waking us up immediately after we suspend.
5044 		 */
5045 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5046 
5047 		/*
5048 		 * Program the Wakeup Filter Control register with user filter
5049 		 * settings
5050 		 */
5051 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5052 
5053 		/* Enable wakeups and power management in Wakeup Control */
5054 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5055 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5056 
5057 		/* X550EM baseT adapters need a special LPLU flow */
5058 		hw->phy.reset_disable = true;
5059 		ix_stop(sc);
5060 		error = hw->phy.ops.enter_lplu(hw);
5061 		if (error) {
5062 			if_printf(&sc->arpcom.ac_if,
5063 			    "Error entering LPLU: %d\n", error);
5064 		}
5065 		hw->phy.reset_disable = false;
5066 	} else {
5067 		/* Just stop for other adapters */
5068 		ix_stop(sc);
5069 	}
5070 	return error;
5071 }
5072 
5073 static void
5074 ix_config_flowctrl(struct ix_softc *sc)
5075 {
5076 	struct ixgbe_hw *hw = &sc->hw;
5077 	uint32_t rxpb, frame, size, tmp;
5078 
5079 	frame = sc->max_frame_size;
5080 
5081 	/* Calculate High Water */
5082 	switch (hw->mac.type) {
5083 	case ixgbe_mac_X540:
5084 	case ixgbe_mac_X550:
5085 	case ixgbe_mac_X550EM_a:
5086 	case ixgbe_mac_X550EM_x:
5087 		tmp = IXGBE_DV_X540(frame, frame);
5088 		break;
5089 	default:
5090 		tmp = IXGBE_DV(frame, frame);
5091 		break;
5092 	}
5093 	size = IXGBE_BT2KB(tmp);
5094 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5095 	hw->fc.high_water[0] = rxpb - size;
5096 
5097 	/* Now calculate Low Water */
5098 	switch (hw->mac.type) {
5099 	case ixgbe_mac_X540:
5100 	case ixgbe_mac_X550:
5101 	case ixgbe_mac_X550EM_a:
5102 	case ixgbe_mac_X550EM_x:
5103 		tmp = IXGBE_LOW_DV_X540(frame);
5104 		break;
5105 	default:
5106 		tmp = IXGBE_LOW_DV(frame);
5107 		break;
5108 	}
5109 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5110 
5111 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5112 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5113 		hw->fc.disable_fc_autoneg = TRUE;
5114 	else
5115 		hw->fc.disable_fc_autoneg = FALSE;
5116 	hw->fc.pause_time = IX_FC_PAUSE;
5117 	hw->fc.send_xon = TRUE;
5118 }
5119 
5120 static void
5121 ix_config_dmac(struct ix_softc *sc)
5122 {
5123 	struct ixgbe_hw *hw = &sc->hw;
5124 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5125 
5126 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5127 		return;
5128 
5129 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5130 	    (dcfg->link_speed ^ sc->link_speed)) {
5131 		dcfg->watchdog_timer = sc->dmac;
5132 		dcfg->fcoe_en = false;
5133 		dcfg->link_speed = sc->link_speed;
5134 		dcfg->num_tcs = 1;
5135 
5136 		if (bootverbose) {
5137 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5138 			    "watchdog %d, link speed %d\n",
5139 			    dcfg->watchdog_timer, dcfg->link_speed);
5140 		}
5141 
5142 		hw->mac.ops.dmac_config(hw);
5143 	}
5144 }
5145 
5146 static void
5147 ix_init_media(struct ix_softc *sc)
5148 {
5149 	struct ixgbe_hw *hw = &sc->hw;
5150 	int layer, msf_ifm = IFM_NONE;
5151 
5152 	ifmedia_removeall(&sc->media);
5153 
5154 	layer = ixgbe_get_supported_physical_layer(hw);
5155 
5156 	/*
5157 	 * Media types with matching DragonFlyBSD media defines
5158 	 */
5159 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5160 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5161 		    0, NULL);
5162 	}
5163 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5164 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5165 		    0, NULL);
5166 	}
5167 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5168 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5169 		    0, NULL);
5170 		/* No half-duplex support */
5171 	}
5172 
5173 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5174 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5175 		    0, NULL);
5176 		msf_ifm = IFM_1000_LX;
5177 	}
5178 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5179 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5180 		    0, NULL);
5181 		msf_ifm = IFM_1000_LX;
5182 	}
5183 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5184 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5185 		    0, NULL);
5186 		msf_ifm = IFM_1000_SX;
5187 	}
5188 
5189 	/* Add media for multispeed fiber */
5190 	if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5191 		uint32_t linkcap;
5192 		bool autoneg;
5193 
5194 		hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5195 		if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5196 			ifmedia_add_nodup(&sc->media,
5197 			    IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5198 	}
5199 
5200 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5201 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5202 		ifmedia_add_nodup(&sc->media,
5203 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5204 	}
5205 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5206 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5207 		    0, NULL);
5208 	}
5209 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5210 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5211 		    0, NULL);
5212 	}
5213 
5214 	/*
5215 	 * XXX Other (no matching DragonFlyBSD media type):
5216 	 * To workaround this, we'll assign these completely
5217 	 * inappropriate media types.
5218 	 */
5219 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5220 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5221 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5222 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5223 		    0, NULL);
5224 	}
5225 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5226 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5227 		if_printf(&sc->arpcom.ac_if,
5228 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5229 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5230 		    0, NULL);
5231 	}
5232 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5233 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5234 		if_printf(&sc->arpcom.ac_if,
5235 		    "1000baseKX mapped to 1000baseCX\n");
5236 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5237 		    0, NULL);
5238 	}
5239 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5240 		/* Someday, someone will care about you... */
5241 		if_printf(&sc->arpcom.ac_if,
5242 		    "Media supported: 1000baseBX, ignored\n");
5243 	}
5244 
5245 	/* XXX we probably don't need this */
5246 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5247 		ifmedia_add_nodup(&sc->media,
5248 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5249 	}
5250 
5251 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5252 
5253 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5254 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5255 
5256 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5257 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5258 		ifmedia_set(&sc->media, sc->ifm_media);
5259 	}
5260 }
5261