xref: /dragonfly/sys/dev/netif/re/if_re.c (revision 7485684f)
1 /*
2  * Copyright (c) 2004
3  *	Joerg Sonnenberger <joerg@bec.de>.  All rights reserved.
4  *
5  * Copyright (c) 1997, 1998-2003
6  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Bill Paul.
19  * 4. Neither the name of the author nor the names of any co-contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * $FreeBSD: src/sys/dev/re/if_re.c,v 1.25 2004/06/09 14:34:01 naddy Exp $
36  */
37 
38 /*
39  * RealTek 8169S/8110S/8168/8111/8101E/8125 PCI NIC driver
40  *
41  * Written by Bill Paul <wpaul@windriver.com>
42  * Senior Networking Software Engineer
43  * Wind River Systems
44  */
45 
46 /*
47  * This driver is designed to support RealTek's next generation of
48  * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
49  * seven devices in this family: the the RTL8169, the RTL8169S, RTL8110S,
50  * the RTL8168, the RTL8111 and the RTL8101E.
51  *
52  * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC:
53  *
54  *	o Descriptor based DMA mechanism.  Each descriptor represents
55  *	  a single packet fragment. Data buffers may be aligned on
56  *	  any byte boundary.
57  *
58  *	o 64-bit DMA.
59  *
60  *	o TCP/IP checksum offload for both RX and TX.
61  *
62  *	o High and normal priority transmit DMA rings.
63  *
64  *	o VLAN tag insertion and extraction.
65  *
66  *	o TCP large send (segmentation offload).
67  *
68  *	o 1000Mbps mode.
69  *
70  *	o Jumbo frames.
71  *
72  * 	o GMII and TBI ports/registers for interfacing with copper
73  *	  or fiber PHYs.
74  *
75  *      o RX and TX DMA rings can have up to 1024 descriptors.
76  *
77  * The 8169 does not have a built-in PHY.  Most reference boards use a
78  * Marvell 88E1000 'Alaska' copper gigE PHY.  8169/8110 is _no longer_
79  * supported.
80  *
81  * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
82  * (the 'S' stands for 'single-chip').  These devices have the same
83  * programming API as the older 8169, but also have some vendor-specific
84  * registers for the on-board PHY.  The 8110S is a LAN-on-motherboard
85  * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
86  * 8125 supports 10/100/1000/2500.
87  *
88  * This driver takes advantage of the RX and TX checksum offload and
89  * VLAN tag insertion/extraction features.  It also implements
90  * interrupt moderation using the timer interrupt registers, which
91  * significantly reduces interrupt load.
92  */
93 
94 #define _IP_VHL
95 
96 #include "opt_ifpoll.h"
97 
98 #include <sys/param.h>
99 #include <sys/bus.h>
100 #include <sys/endian.h>
101 #include <sys/kernel.h>
102 #include <sys/in_cksum.h>
103 #include <sys/interrupt.h>
104 #include <sys/malloc.h>
105 #include <sys/mbuf.h>
106 #include <sys/rman.h>
107 #include <sys/serialize.h>
108 #include <sys/socket.h>
109 #include <sys/sockio.h>
110 #include <sys/sysctl.h>
111 
112 #include <net/bpf.h>
113 #include <net/ethernet.h>
114 #include <net/if.h>
115 #include <net/ifq_var.h>
116 #include <net/if_arp.h>
117 #include <net/if_dl.h>
118 #include <net/if_media.h>
119 #include <net/if_poll.h>
120 #include <net/if_types.h>
121 #include <net/vlan/if_vlan_var.h>
122 #include <net/vlan/if_vlan_ether.h>
123 
124 #include <netinet/ip.h>
125 
126 #include "pcidevs.h"
127 #include <bus/pci/pcireg.h>
128 #include <bus/pci/pcivar.h>
129 
130 #include <dev/netif/re/if_rereg.h>
131 #include <dev/netif/re/if_revar.h>
132 #include <dev/netif/re/re.h>
133 #include <dev/netif/re/re_dragonfly.h>
134 
135 /*
136  * Various supported device vendors/types and their names.
137  */
138 static const struct re_type {
139 	uint16_t	re_vid;
140 	uint16_t	re_did;
141 	const char	*re_name;
142 } re_devs[] = {
143 	{ PCI_VENDOR_DLINK, PCI_PRODUCT_DLINK_DGE528T,
144 	  "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
145 
146 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8101E,
147 	  "RealTek 810x PCIe 10/100baseTX" },
148 
149 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8168,
150 	  "RealTek 8111/8168 PCIe Gigabit Ethernet" },
151 
152 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8168_1,
153 	  "RealTek 8168 PCIe Gigabit Ethernet" },
154 
155 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8125,
156 	  "RealTek 8125 PCIe Gigabit Ethernet" },
157 
158 #ifdef notyet
159 	/*
160 	 * This driver now only supports built-in PHYs.
161 	 */
162 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8169,
163 	  "RealTek 8110/8169 Gigabit Ethernet" },
164 #endif
165 
166 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT8169SC,
167 	  "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
168 
169 	{ PCI_VENDOR_COREGA, PCI_PRODUCT_COREGA_CG_LAPCIGT,
170 	  "Corega CG-LAPCIGT Gigabit Ethernet" },
171 
172 	{ PCI_VENDOR_LINKSYS, PCI_PRODUCT_LINKSYS_EG1032,
173 	  "Linksys EG1032 Gigabit Ethernet" },
174 
175 	{ PCI_VENDOR_USR2, PCI_PRODUCT_USR2_997902,
176 	  "US Robotics 997902 Gigabit Ethernet" },
177 
178 	{ PCI_VENDOR_TTTECH, PCI_PRODUCT_TTTECH_MC322,
179 	  "TTTech MC322 Gigabit Ethernet" },
180 
181 	{ PCI_VENDOR_REALTEK, PCI_PRODUCT_REALTEK_RT2600,
182            "RealTek Killer E2600 Gigabit Ethernet Controller" },
183 
184 	{ 0, 0, NULL }
185 };
186 
187 static int	re_probe(device_t);
188 static int	re_attach(device_t);
189 static int	re_detach(device_t);
190 static int	re_suspend(device_t);
191 static int	re_resume(device_t);
192 static void	re_shutdown(device_t);
193 
194 static int	re_allocmem(device_t);
195 static void	re_freemem(device_t);
196 static void	re_freebufmem(struct re_softc *, int, int);
197 static int	re_encap(struct re_softc *, struct mbuf **, int *);
198 static int	re_newbuf_std(struct re_softc *, int, int);
199 #ifdef RE_JUMBO
200 static int	re_newbuf_jumbo(struct re_softc *, int, int);
201 #endif
202 static void	re_setup_rxdesc(struct re_softc *, int);
203 static int	re_rx_list_init(struct re_softc *);
204 static int	re_tx_list_init(struct re_softc *);
205 static int	re_rxeof(struct re_softc *);
206 static int	re_txeof(struct re_softc *);
207 static int	re_tx_collect(struct re_softc *);
208 static void	re_intr(void *);
209 static void	re_tick(void *);
210 static void	re_tick_serialized(void *);
211 static void	re_disable_aspm(device_t);
212 static void	re_link_up(struct re_softc *);
213 static void	re_link_down(struct re_softc *);
214 
215 static void	re_start_xmit(struct re_softc *);
216 static void	re_write_imr(struct re_softc *, uint32_t);
217 static void	re_write_isr(struct re_softc *, uint32_t);
218 static uint32_t	re_read_isr(struct re_softc *);
219 static void	re_start_xmit_8125(struct re_softc *);
220 static void	re_write_imr_8125(struct re_softc *, uint32_t);
221 static void	re_write_isr_8125(struct re_softc *, uint32_t);
222 static uint32_t	re_read_isr_8125(struct re_softc *);
223 
224 static void	re_start(struct ifnet *, struct ifaltq_subque *);
225 static int	re_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
226 static void	re_init(void *);
227 static void	re_stop(struct re_softc *, boolean_t);
228 static void	re_watchdog(struct ifnet *);
229 
230 static void	re_setup_hw_im(struct re_softc *);
231 static void	re_setup_sim_im(struct re_softc *);
232 static void	re_disable_hw_im(struct re_softc *);
233 static void	re_disable_sim_im(struct re_softc *);
234 static void	re_config_imtype(struct re_softc *, int);
235 static void	re_setup_intr(struct re_softc *, int, int);
236 
237 static int	re_sysctl_hwtime(SYSCTL_HANDLER_ARGS, int *);
238 static int	re_sysctl_rxtime(SYSCTL_HANDLER_ARGS);
239 static int	re_sysctl_txtime(SYSCTL_HANDLER_ARGS);
240 static int	re_sysctl_simtime(SYSCTL_HANDLER_ARGS);
241 static int	re_sysctl_imtype(SYSCTL_HANDLER_ARGS);
242 
243 static int	re_jpool_alloc(struct re_softc *);
244 static void	re_jpool_free(struct re_softc *);
245 #ifdef RE_JUMBO
246 static struct re_jbuf *re_jbuf_alloc(struct re_softc *);
247 static void	re_jbuf_free(void *);
248 static void	re_jbuf_ref(void *);
249 #endif
250 
251 #ifdef IFPOLL_ENABLE
252 static void	re_npoll(struct ifnet *, struct ifpoll_info *);
253 static void	re_npoll_compat(struct ifnet *, void *, int);
254 #endif
255 
256 static device_method_t re_methods[] = {
257 	/* Device interface */
258 	DEVMETHOD(device_probe,		re_probe),
259 	DEVMETHOD(device_attach,	re_attach),
260 	DEVMETHOD(device_detach,	re_detach),
261 	DEVMETHOD(device_suspend,	re_suspend),
262 	DEVMETHOD(device_resume,	re_resume),
263 	DEVMETHOD(device_shutdown,	re_shutdown),
264 	DEVMETHOD_END
265 };
266 
267 static driver_t re_driver = {
268 	"re",
269 	re_methods,
270 	sizeof(struct re_softc)
271 };
272 
273 static devclass_t re_devclass;
274 
275 DECLARE_DUMMY_MODULE(if_re);
276 DRIVER_MODULE(if_re, pci, re_driver, re_devclass, NULL, NULL);
277 DRIVER_MODULE(if_re, cardbus, re_driver, re_devclass, NULL, NULL);
278 
279 static int	re_rx_desc_count = RE_RX_DESC_CNT_DEF;
280 static int	re_tx_desc_count = RE_TX_DESC_CNT_DEF;
281 static int	re_msi_enable = 1;
282 
283 TUNABLE_INT("hw.re.rx_desc_count", &re_rx_desc_count);
284 TUNABLE_INT("hw.re.tx_desc_count", &re_tx_desc_count);
285 TUNABLE_INT("hw.re.msi.enable", &re_msi_enable);
286 
287 static __inline void
288 re_free_rxchain(struct re_softc *sc)
289 {
290 	if (sc->re_head != NULL) {
291 		m_freem(sc->re_head);
292 		sc->re_head = sc->re_tail = NULL;
293 	}
294 }
295 
296 static int
297 re_probe(device_t dev)
298 {
299 	const struct re_type *t;
300 	uint16_t vendor, product;
301 
302 	vendor = pci_get_vendor(dev);
303 	product = pci_get_device(dev);
304 
305 	/*
306 	 * Only attach to rev.3 of the Linksys EG1032 adapter.
307 	 * Rev.2 is supported by sk(4).
308 	 */
309 	if (vendor == PCI_VENDOR_LINKSYS &&
310 	    product == PCI_PRODUCT_LINKSYS_EG1032 &&
311 	    pci_get_subdevice(dev) != PCI_SUBDEVICE_LINKSYS_EG1032_REV3)
312 		return ENXIO;
313 
314 	for (t = re_devs; t->re_name != NULL; t++) {
315 		if (product == t->re_did && vendor == t->re_vid)
316 			break;
317 	}
318 	if (t->re_name == NULL)
319 		return ENXIO;
320 
321 	device_set_desc(dev, t->re_name);
322 	return 0;
323 }
324 
325 static int
326 re_allocmem(device_t dev)
327 {
328 	struct re_softc *sc = device_get_softc(dev);
329 	bus_dmamem_t dmem;
330 	int error, i;
331 
332 	/*
333 	 * Allocate list data
334 	 */
335 	sc->re_ldata.re_tx_mbuf =
336 	kmalloc(sc->re_tx_desc_cnt * sizeof(struct mbuf *),
337 		M_DEVBUF, M_ZERO | M_WAITOK);
338 
339 	sc->re_ldata.re_rx_mbuf =
340 	kmalloc(sc->re_rx_desc_cnt * sizeof(struct mbuf *),
341 		M_DEVBUF, M_ZERO | M_WAITOK);
342 
343 	sc->re_ldata.re_rx_paddr =
344 	kmalloc(sc->re_rx_desc_cnt * sizeof(bus_addr_t),
345 		M_DEVBUF, M_ZERO | M_WAITOK);
346 
347 	sc->re_ldata.re_tx_dmamap =
348 	kmalloc(sc->re_tx_desc_cnt * sizeof(bus_dmamap_t),
349 		M_DEVBUF, M_ZERO | M_WAITOK);
350 
351 	sc->re_ldata.re_rx_dmamap =
352 	kmalloc(sc->re_rx_desc_cnt * sizeof(bus_dmamap_t),
353 		M_DEVBUF, M_ZERO | M_WAITOK);
354 
355 	/*
356 	 * Allocate the parent bus DMA tag appropriate for PCI.
357 	 */
358 	error = bus_dma_tag_create(NULL,	/* parent */
359 			1, 0,			/* alignment, boundary */
360 			BUS_SPACE_MAXADDR,	/* lowaddr */
361 			BUS_SPACE_MAXADDR,	/* highaddr */
362 			BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
363 			0,			/* nsegments */
364 			BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
365 			0,			/* flags */
366 			&sc->re_parent_tag);
367 	if (error) {
368 		device_printf(dev, "could not allocate parent dma tag\n");
369 		return error;
370 	}
371 
372 	/* Allocate TX descriptor list. */
373 	error = bus_dmamem_coherent(sc->re_parent_tag,
374 			RE_RING_ALIGN, 0,
375 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
376 			RE_TX_LIST_SZ(sc), BUS_DMA_WAITOK | BUS_DMA_ZERO,
377 			&dmem);
378 	if (error) {
379 		device_printf(dev, "could not allocate TX ring\n");
380 		return error;
381 	}
382 	sc->re_ldata.re_tx_list_tag = dmem.dmem_tag;
383 	sc->re_ldata.re_tx_list_map = dmem.dmem_map;
384 	sc->re_ldata.re_tx_list = dmem.dmem_addr;
385 	sc->re_ldata.re_tx_list_addr = dmem.dmem_busaddr;
386 
387 	/* Allocate RX descriptor list. */
388 	error = bus_dmamem_coherent(sc->re_parent_tag,
389 			RE_RING_ALIGN, 0,
390 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
391 			RE_RX_LIST_SZ(sc), BUS_DMA_WAITOK | BUS_DMA_ZERO,
392 			&dmem);
393 	if (error) {
394 		device_printf(dev, "could not allocate RX ring\n");
395 		return error;
396 	}
397 	sc->re_ldata.re_rx_list_tag = dmem.dmem_tag;
398 	sc->re_ldata.re_rx_list_map = dmem.dmem_map;
399 	sc->re_ldata.re_rx_list = dmem.dmem_addr;
400 	sc->re_ldata.re_rx_list_addr = dmem.dmem_busaddr;
401 
402 	/* Allocate maps for TX mbufs. */
403 	error = bus_dma_tag_create(sc->re_parent_tag,
404 			1, 0,
405 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
406 			RE_FRAMELEN_MAX, RE_MAXSEGS, MCLBYTES,
407 			BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
408 			&sc->re_ldata.re_tx_mtag);
409 	if (error) {
410 		device_printf(dev, "could not allocate TX buf dma tag\n");
411 		return(error);
412 	}
413 
414 	/* Create DMA maps for TX buffers */
415 	for (i = 0; i < sc->re_tx_desc_cnt; i++) {
416 		error = bus_dmamap_create(sc->re_ldata.re_tx_mtag,
417 				BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
418 				&sc->re_ldata.re_tx_dmamap[i]);
419 		if (error) {
420 			device_printf(dev, "can't create DMA map for TX buf\n");
421 			re_freebufmem(sc, i, 0);
422 			return(error);
423 		}
424 	}
425 
426 	/* Allocate maps for RX mbufs. */
427 	error = bus_dma_tag_create(sc->re_parent_tag,
428 			RE_RXBUF_ALIGN, 0,
429 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
430 			MCLBYTES, 1, MCLBYTES,
431 			BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ALIGNED,
432 			&sc->re_ldata.re_rx_mtag);
433 	if (error) {
434 		device_printf(dev, "could not allocate RX buf dma tag\n");
435 		return(error);
436 	}
437 
438 	/* Create spare DMA map for RX */
439 	error = bus_dmamap_create(sc->re_ldata.re_rx_mtag, BUS_DMA_WAITOK,
440 			&sc->re_ldata.re_rx_spare);
441 	if (error) {
442 		device_printf(dev, "can't create spare DMA map for RX\n");
443 		bus_dma_tag_destroy(sc->re_ldata.re_rx_mtag);
444 		sc->re_ldata.re_rx_mtag = NULL;
445 		return error;
446 	}
447 
448 	/* Create DMA maps for RX buffers */
449 	for (i = 0; i < sc->re_rx_desc_cnt; i++) {
450 		error = bus_dmamap_create(sc->re_ldata.re_rx_mtag,
451 				BUS_DMA_WAITOK, &sc->re_ldata.re_rx_dmamap[i]);
452 		if (error) {
453 			device_printf(dev, "can't create DMA map for RX buf\n");
454 			re_freebufmem(sc, sc->re_tx_desc_cnt, i);
455 			return(error);
456 		}
457 	}
458 
459 	/* Create jumbo buffer pool for RX if required */
460 	if (sc->re_caps & RE_C_CONTIGRX) {
461 		error = re_jpool_alloc(sc);
462 		if (error) {
463 			re_jpool_free(sc);
464 #ifdef RE_JUMBO
465 			/* Disable jumbo frame support */
466 			sc->re_maxmtu = ETHERMTU;
467 #endif
468 		}
469 	}
470 	return(0);
471 }
472 
473 static void
474 re_freebufmem(struct re_softc *sc, int tx_cnt, int rx_cnt)
475 {
476 	int i;
477 
478 	/* Destroy all the RX and TX buffer maps */
479 	if (sc->re_ldata.re_tx_mtag) {
480 		for (i = 0; i < tx_cnt; i++) {
481 			bus_dmamap_destroy(sc->re_ldata.re_tx_mtag,
482 					   sc->re_ldata.re_tx_dmamap[i]);
483 		}
484 		bus_dma_tag_destroy(sc->re_ldata.re_tx_mtag);
485 		sc->re_ldata.re_tx_mtag = NULL;
486 	}
487 
488 	if (sc->re_ldata.re_rx_mtag) {
489 		for (i = 0; i < rx_cnt; i++) {
490 			bus_dmamap_destroy(sc->re_ldata.re_rx_mtag,
491 					   sc->re_ldata.re_rx_dmamap[i]);
492 		}
493 		bus_dmamap_destroy(sc->re_ldata.re_rx_mtag,
494 				   sc->re_ldata.re_rx_spare);
495 		bus_dma_tag_destroy(sc->re_ldata.re_rx_mtag);
496 		sc->re_ldata.re_rx_mtag = NULL;
497 	}
498 }
499 
500 static void
501 re_freemem(device_t dev)
502 {
503 	struct re_softc *sc = device_get_softc(dev);
504 
505 	/* Unload and free the RX DMA ring memory and map */
506 	if (sc->re_ldata.re_rx_list_tag) {
507 		bus_dmamap_unload(sc->re_ldata.re_rx_list_tag,
508 				  sc->re_ldata.re_rx_list_map);
509 		bus_dmamem_free(sc->re_ldata.re_rx_list_tag,
510 				sc->re_ldata.re_rx_list,
511 				sc->re_ldata.re_rx_list_map);
512 		bus_dma_tag_destroy(sc->re_ldata.re_rx_list_tag);
513 	}
514 
515 	/* Unload and free the TX DMA ring memory and map */
516 	if (sc->re_ldata.re_tx_list_tag) {
517 		bus_dmamap_unload(sc->re_ldata.re_tx_list_tag,
518 				  sc->re_ldata.re_tx_list_map);
519 		bus_dmamem_free(sc->re_ldata.re_tx_list_tag,
520 				sc->re_ldata.re_tx_list,
521 				sc->re_ldata.re_tx_list_map);
522 		bus_dma_tag_destroy(sc->re_ldata.re_tx_list_tag);
523 	}
524 
525 	/* Free RX/TX buf DMA stuffs */
526 	re_freebufmem(sc, sc->re_tx_desc_cnt, sc->re_rx_desc_cnt);
527 
528 	/* Unload and free the stats buffer and map */
529 	if (sc->re_ldata.re_stag) {
530 		bus_dmamap_unload(sc->re_ldata.re_stag, sc->re_ldata.re_smap);
531 		bus_dmamem_free(sc->re_ldata.re_stag,
532 				sc->re_ldata.re_stats,
533 				sc->re_ldata.re_smap);
534 		bus_dma_tag_destroy(sc->re_ldata.re_stag);
535 	}
536 
537 	if (sc->re_caps & RE_C_CONTIGRX)
538 		re_jpool_free(sc);
539 
540 	if (sc->re_parent_tag)
541 		bus_dma_tag_destroy(sc->re_parent_tag);
542 
543 	if (sc->re_ldata.re_tx_mbuf != NULL)
544 		kfree(sc->re_ldata.re_tx_mbuf, M_DEVBUF);
545 	if (sc->re_ldata.re_rx_mbuf != NULL)
546 		kfree(sc->re_ldata.re_rx_mbuf, M_DEVBUF);
547 	if (sc->re_ldata.re_rx_paddr != NULL)
548 		kfree(sc->re_ldata.re_rx_paddr, M_DEVBUF);
549 	if (sc->re_ldata.re_tx_dmamap != NULL)
550 		kfree(sc->re_ldata.re_tx_dmamap, M_DEVBUF);
551 	if (sc->re_ldata.re_rx_dmamap != NULL)
552 		kfree(sc->re_ldata.re_rx_dmamap, M_DEVBUF);
553 }
554 
555 static boolean_t
556 re_is_faste(struct re_softc *sc)
557 {
558 	if (pci_get_vendor(sc->dev) == PCI_VENDOR_REALTEK) {
559 		switch (sc->re_device_id) {
560 		case PCI_PRODUCT_REALTEK_RT8169:
561 		case PCI_PRODUCT_REALTEK_RT8169SC:
562 		case PCI_PRODUCT_REALTEK_RT8168:
563 		case PCI_PRODUCT_REALTEK_RT8168_1:
564 		case PCI_PRODUCT_REALTEK_RT8125:
565 			return FALSE;
566 		default:
567 			return TRUE;
568 		}
569 	} else {
570 		return FALSE;
571 	}
572 }
573 
574 static bool
575 re_is_2500e(const struct re_softc *sc)
576 {
577 	if (pci_get_vendor(sc->dev) == PCI_VENDOR_REALTEK) {
578 		switch (sc->re_device_id) {
579 		case PCI_PRODUCT_REALTEK_RT8125:
580 			return true;
581 
582 		default:
583 			return false;
584 		}
585 	}
586 	return false;
587 }
588 
589 /*
590  * Attach the interface. Allocate softc structures, do ifmedia
591  * setup and ethernet/BPF attach.
592  */
593 static int
594 re_attach(device_t dev)
595 {
596 	struct re_softc	*sc = device_get_softc(dev);
597 	struct ifnet *ifp;
598 	struct sysctl_ctx_list *ctx;
599 	struct sysctl_oid *tree;
600 	uint8_t eaddr[ETHER_ADDR_LEN];
601 	int error = 0, qlen, msi_enable;
602 	u_int irq_flags;
603 
604 	callout_init_mp(&sc->re_timer);
605 	sc->dev = dev;
606 	sc->re_device_id = pci_get_device(dev);
607 	sc->re_unit = device_get_unit(dev);
608 	ifmedia_init(&sc->media, IFM_IMASK, rtl_ifmedia_upd, rtl_ifmedia_sts);
609 
610 	if (pci_get_vendor(dev) == PCI_VENDOR_REALTEK &&
611 	    sc->re_device_id == PCI_PRODUCT_REALTEK_RT8125) {
612 		sc->re_start_xmit = re_start_xmit_8125;
613 		sc->re_write_imr = re_write_imr_8125;
614 		sc->re_write_isr = re_write_isr_8125;
615 		sc->re_read_isr = re_read_isr_8125;
616 	} else {
617 		sc->re_start_xmit = re_start_xmit;
618 		sc->re_write_imr = re_write_imr;
619 		sc->re_write_isr = re_write_isr;
620 		sc->re_read_isr = re_read_isr;
621 	}
622 
623 	sc->re_caps = RE_C_HWIM;
624 
625 	sc->re_rx_desc_cnt = re_rx_desc_count;
626 	if (sc->re_rx_desc_cnt > RE_RX_DESC_CNT_MAX)
627 		sc->re_rx_desc_cnt = RE_RX_DESC_CNT_MAX;
628 
629 	sc->re_tx_desc_cnt = re_tx_desc_count;
630 	if (sc->re_tx_desc_cnt > RE_TX_DESC_CNT_MAX)
631 		sc->re_tx_desc_cnt = RE_TX_DESC_CNT_MAX;
632 
633 	qlen = RE_IFQ_MAXLEN;
634 	if (sc->re_tx_desc_cnt > qlen)
635 		qlen = sc->re_tx_desc_cnt;
636 
637 	sc->re_rxbuf_size = MCLBYTES;
638 	sc->re_newbuf = re_newbuf_std;
639 
640 	/*
641 	 * Hardware interrupt moderation settings.
642 	 * XXX does not seem correct, undocumented.
643 	 */
644 	sc->re_tx_time = 5;		/* 125us */
645 	sc->re_rx_time = 2;		/* 50us */
646 
647 	/* Simulated interrupt moderation setting. */
648 	sc->re_sim_time = 150;		/* 150us */
649 
650 	/* Use simulated interrupt moderation by default. */
651 	sc->re_imtype = RE_IMTYPE_SIM;
652 	re_config_imtype(sc, sc->re_imtype);
653 
654 	ctx = device_get_sysctl_ctx(dev);
655 	tree = device_get_sysctl_tree(dev);
656 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
657 		       "rx_desc_count", CTLFLAG_RD, &sc->re_rx_desc_cnt,
658 		       0, "RX desc count");
659 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
660 		       "tx_desc_count", CTLFLAG_RD, &sc->re_tx_desc_cnt,
661 		       0, "TX desc count");
662 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "sim_time",
663 			CTLTYPE_INT | CTLFLAG_RW,
664 			sc, 0, re_sysctl_simtime, "I",
665 			"Simulated interrupt moderation time (usec).");
666 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "imtype",
667 			CTLTYPE_INT | CTLFLAG_RW,
668 			sc, 0, re_sysctl_imtype, "I",
669 			"Interrupt moderation type -- "
670 			"0:disable, 1:simulated, "
671 			"2:hardware(if supported)");
672 	if (sc->re_caps & RE_C_HWIM) {
673 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
674 				OID_AUTO, "hw_rxtime",
675 				CTLTYPE_INT | CTLFLAG_RW,
676 				sc, 0, re_sysctl_rxtime, "I",
677 				"Hardware interrupt moderation time "
678 				"(unit: 25usec).");
679 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
680 				OID_AUTO, "hw_txtime",
681 				CTLTYPE_INT | CTLFLAG_RW,
682 				sc, 0, re_sysctl_txtime, "I",
683 				"Hardware interrupt moderation time "
684 				"(unit: 25usec).");
685 	}
686 
687 #ifndef BURN_BRIDGES
688 	/*
689 	 * Handle power management nonsense.
690 	 */
691 
692 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
693 		uint32_t membase, irq;
694 
695 		/* Save important PCI config data. */
696 		membase = pci_read_config(dev, RE_PCI_LOMEM, 4);
697 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
698 
699 		/* Reset the power state. */
700 		device_printf(dev, "chip is in D%d power mode "
701 		    "-- setting to D0\n", pci_get_powerstate(dev));
702 
703 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
704 
705 		/* Restore PCI config data. */
706 		pci_write_config(dev, RE_PCI_LOMEM, membase, 4);
707 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
708 	}
709 #endif
710 	/*
711 	 * Map control/status registers.
712 	 */
713 	pci_enable_busmaster(dev);
714 
715 	if (pci_is_pcie(dev)) {
716 		sc->re_res_rid = PCIR_BAR(2);
717 		sc->re_res_type = SYS_RES_MEMORY;
718 	} else {
719 		sc->re_res_rid = PCIR_BAR(0);
720 		sc->re_res_type = SYS_RES_IOPORT;
721 	}
722 	sc->re_res = bus_alloc_resource_any(dev, sc->re_res_type,
723 	    &sc->re_res_rid, RF_ACTIVE);
724 	if (sc->re_res == NULL) {
725 		device_printf(dev, "couldn't map IO\n");
726 		error = ENXIO;
727 		goto fail;
728 	}
729 
730 	sc->re_btag = rman_get_bustag(sc->re_res);
731 	sc->re_bhandle = rman_get_bushandle(sc->re_res);
732 
733 	error = rtl_check_mac_version(sc);
734 	if (error) {
735 		device_printf(dev, "check mac version failed\n");
736 		goto fail;
737 	}
738 
739 	rtl_init_software_variable(sc);
740 	if (pci_is_pcie(dev))
741 		sc->re_if_flags |= RL_FLAG_PCIE;
742 	else
743 		sc->re_if_flags &= ~RL_FLAG_PCIE;
744 	device_printf(dev, "MAC version 0x%08x, MACFG %u%s%s%s\n",
745 	    (CSR_READ_4(sc, RE_TXCFG) & 0xFCF00000), sc->re_type,
746 	    sc->re_coalesce_tx_pkt ? ", software TX defrag" : "",
747 	    sc->re_pad_runt ? ", pad runt" : "",
748 	    sc->re_hw_enable_msi_msix ? ", support MSI" : "");
749 
750 	/*
751 	 * Allocate interrupt
752 	 */
753 	if (pci_is_pcie(dev) && sc->re_hw_enable_msi_msix)
754 		msi_enable = re_msi_enable;
755 	else
756 		msi_enable = 0;
757 	sc->re_irq_type = pci_alloc_1intr(dev, msi_enable,
758 	    &sc->re_irq_rid, &irq_flags);
759 
760 	sc->re_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->re_irq_rid,
761 					    irq_flags);
762 	if (sc->re_irq == NULL) {
763 		device_printf(dev, "couldn't map interrupt\n");
764 		error = ENXIO;
765 		goto fail;
766 	}
767 
768 	/* Disable ASPM */
769 	re_disable_aspm(dev);
770 
771 	rtl_exit_oob(sc);
772 	rtl_hw_init(sc);
773 
774 	/* Reset the adapter. */
775 	rtl_reset(sc);
776 
777 	rtl_get_hw_mac_address(sc, eaddr);
778 	if (sc->re_type == MACFG_3)	/* Change PCI Latency time*/
779 		pci_write_config(dev, PCIR_LATTIMER, 0x40, 1);
780 
781 	/* Allocate DMA stuffs */
782 	error = re_allocmem(dev);
783 	if (error)
784 		goto fail;
785 
786 	if (pci_is_pcie(dev)) {
787 		sc->re_bus_speed = 125;
788 	} else {
789 		uint8_t cfg2;
790 
791 		cfg2 = CSR_READ_1(sc, RE_CFG2);
792 		switch (cfg2 & RE_CFG2_PCICLK_MASK) {
793 		case RE_CFG2_PCICLK_33MHZ:
794 			sc->re_bus_speed = 33;
795 			break;
796 		case RE_CFG2_PCICLK_66MHZ:
797 			sc->re_bus_speed = 66;
798 			break;
799 		default:
800 			device_printf(dev, "unknown bus speed, assume 33MHz\n");
801 			sc->re_bus_speed = 33;
802 			break;
803 		}
804 	}
805 	device_printf(dev, "bus speed %dMHz\n", sc->re_bus_speed);
806 
807 	/* Enable hardware checksum if available. */
808 	sc->re_tx_cstag = 1;
809 	sc->re_rx_cstag = 1;
810 
811 	ifp = &sc->arpcom.ac_if;
812 	ifp->if_softc = sc;
813 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
814 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
815 	ifp->if_ioctl = re_ioctl;
816 	ifp->if_start = re_start;
817 #ifdef IFPOLL_ENABLE
818 	ifp->if_npoll = re_npoll;
819 #endif
820 	ifp->if_watchdog = re_watchdog;
821 	ifp->if_init = re_init;
822 	if (re_is_faste(sc))
823 		ifp->if_baudrate = IF_Mbps(100ULL);
824 	else if (re_is_2500e(sc))
825 		ifp->if_baudrate = IF_Mbps(2500ULL);
826 	else
827 		ifp->if_baudrate = IF_Mbps(1000ULL);
828 	ifp->if_nmbclusters = sc->re_rx_desc_cnt;
829 	ifq_set_maxlen(&ifp->if_snd, qlen);
830 	ifq_set_ready(&ifp->if_snd);
831 
832 	ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
833 	    IFCAP_RXCSUM | IFCAP_TXCSUM;
834 	ifp->if_capenable = ifp->if_capabilities;
835 	/* NOTE: if_hwassist will be setup after the interface is up. */
836 
837 	/*
838 	 * Call MI attach routine.
839 	 */
840 	ether_ifattach(ifp, eaddr, NULL);
841 
842 	ifq_set_cpuid(&ifp->if_snd, rman_get_cpuid(sc->re_irq));
843 
844 	rtl_phy_power_up(sc);
845 	rtl_hw_phy_config(sc);
846 	rtl_clrwol(sc);
847 
848 	/* TODO: jumbo frame */
849 	CSR_WRITE_2(sc, RE_RxMaxSize, sc->re_rxbuf_size);
850 
851 #ifdef IFPOLL_ENABLE
852 	ifpoll_compat_setup(&sc->re_npoll, ctx, (struct sysctl_oid *)tree,
853 	    device_get_unit(dev), ifp->if_serializer);
854 #endif
855 
856 	/* Hook interrupt last to avoid having to lock softc */
857 	error = bus_setup_intr(dev, sc->re_irq, INTR_MPSAFE | INTR_HIFREQ,
858 	    re_intr, sc, &sc->re_intrhand, ifp->if_serializer);
859 	if (error) {
860 		device_printf(dev, "couldn't set up irq\n");
861 		ether_ifdetach(ifp);
862 		goto fail;
863 	}
864 
865 	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
866 	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL);
867 	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL);
868 	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL);
869 	if (!re_is_faste(sc)) {
870 		ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
871 		    0, NULL);
872 	}
873 	if (re_is_2500e(sc)) {
874 #ifndef IFM_2500_T
875 		ifmedia_add(&sc->media, IFM_ETHER | IFM_2500_SX | IFM_FDX,
876 		    0, NULL);
877 #else
878 		ifmedia_add(&sc->media, IFM_ETHER | IFM_2500_T | IFM_FDX,
879 		    0, NULL);
880 #endif
881 	}
882 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
883 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
884 	rtl_ifmedia_upd(ifp);
885 
886 fail:
887 	if (error)
888 		re_detach(dev);
889 
890 	return (error);
891 }
892 
893 /*
894  * Shutdown hardware and free up resources. This can be called any
895  * time after the mutex has been initialized. It is called in both
896  * the error case in attach and the normal detach case so it needs
897  * to be careful about only freeing resources that have actually been
898  * allocated.
899  */
900 static int
901 re_detach(device_t dev)
902 {
903 	struct re_softc *sc = device_get_softc(dev);
904 	struct ifnet *ifp = &sc->arpcom.ac_if;
905 
906 	/* These should only be active if attach succeeded */
907 	if (device_is_attached(dev)) {
908 		lwkt_serialize_enter(ifp->if_serializer);
909 		re_stop(sc, TRUE);
910 		bus_teardown_intr(dev, sc->re_irq, sc->re_intrhand);
911 		lwkt_serialize_exit(ifp->if_serializer);
912 
913 		ether_ifdetach(ifp);
914 	}
915 	ifmedia_removeall(&sc->media);
916 
917 	if (sc->re_irq)
918 		bus_release_resource(dev, SYS_RES_IRQ, sc->re_irq_rid,
919 				     sc->re_irq);
920 
921 	if (sc->re_irq_type == PCI_INTR_TYPE_MSI)
922 		pci_release_msi(dev);
923 
924 	if (sc->re_res) {
925 		bus_release_resource(dev, sc->re_res_type, sc->re_res_rid,
926 		    sc->re_res);
927 	}
928 	rtl_cmac_unmap(sc);
929 
930 	/* Free DMA stuffs */
931 	re_freemem(dev);
932 
933 	return(0);
934 }
935 
936 static void
937 re_setup_rxdesc(struct re_softc *sc, int idx)
938 {
939 	bus_addr_t paddr;
940 	uint32_t cmdstat;
941 	struct re_desc *d;
942 
943 	paddr = sc->re_ldata.re_rx_paddr[idx];
944 	d = &sc->re_ldata.re_rx_list[idx];
945 
946 	d->re_bufaddr_lo = htole32(RE_ADDR_LO(paddr));
947 	d->re_bufaddr_hi = htole32(RE_ADDR_HI(paddr));
948 
949 	cmdstat = sc->re_rxbuf_size | RE_RDESC_CMD_OWN;
950 	if (idx == (sc->re_rx_desc_cnt - 1))
951 		cmdstat |= RE_RDESC_CMD_EOR;
952 	d->re_cmdstat = htole32(cmdstat);
953 }
954 
955 static int
956 re_newbuf_std(struct re_softc *sc, int idx, int init)
957 {
958 	bus_dma_segment_t seg;
959 	bus_dmamap_t map;
960 	struct mbuf *m;
961 	int error, nsegs;
962 
963 	m = m_getcl(init ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR);
964 	if (m == NULL) {
965 		error = ENOBUFS;
966 
967 		if (init) {
968 			if_printf(&sc->arpcom.ac_if, "m_getcl failed\n");
969 			return error;
970 		} else {
971 			goto back;
972 		}
973 	}
974 	m->m_len = m->m_pkthdr.len = MCLBYTES;
975 
976 	/*
977 	 * NOTE:
978 	 * re(4) chips need address of the receive buffer to be 8-byte
979 	 * aligned, so don't call m_adj(m, ETHER_ALIGN) here.
980 	 */
981 
982 	error = bus_dmamap_load_mbuf_segment(sc->re_ldata.re_rx_mtag,
983 			sc->re_ldata.re_rx_spare, m,
984 			&seg, 1, &nsegs, BUS_DMA_NOWAIT);
985 	if (error) {
986 		m_freem(m);
987 		if (init) {
988 			if_printf(&sc->arpcom.ac_if, "can't load RX mbuf\n");
989 			return error;
990 		} else {
991 			goto back;
992 		}
993 	}
994 
995 	if (!init) {
996 		bus_dmamap_sync(sc->re_ldata.re_rx_mtag,
997 				sc->re_ldata.re_rx_dmamap[idx],
998 				BUS_DMASYNC_POSTREAD);
999 		bus_dmamap_unload(sc->re_ldata.re_rx_mtag,
1000 				  sc->re_ldata.re_rx_dmamap[idx]);
1001 	}
1002 	sc->re_ldata.re_rx_mbuf[idx] = m;
1003 	sc->re_ldata.re_rx_paddr[idx] = seg.ds_addr;
1004 
1005 	map = sc->re_ldata.re_rx_dmamap[idx];
1006 	sc->re_ldata.re_rx_dmamap[idx] = sc->re_ldata.re_rx_spare;
1007 	sc->re_ldata.re_rx_spare = map;
1008 back:
1009 	re_setup_rxdesc(sc, idx);
1010 	return error;
1011 }
1012 
1013 #ifdef RE_JUMBO
1014 static int
1015 re_newbuf_jumbo(struct re_softc *sc, int idx, int init)
1016 {
1017 	struct mbuf *m;
1018 	struct re_jbuf *jbuf;
1019 	int error = 0;
1020 
1021 	MGETHDR(m, init ? M_WAITOK : M_NOWAIT, MT_DATA);
1022 	if (m == NULL) {
1023 		error = ENOBUFS;
1024 		if (init) {
1025 			if_printf(&sc->arpcom.ac_if, "MGETHDR failed\n");
1026 			return error;
1027 		} else {
1028 			goto back;
1029 		}
1030 	}
1031 
1032 	jbuf = re_jbuf_alloc(sc);
1033 	if (jbuf == NULL) {
1034 		m_freem(m);
1035 
1036 		error = ENOBUFS;
1037 		if (init) {
1038 			if_printf(&sc->arpcom.ac_if, "jpool is empty\n");
1039 			return error;
1040 		} else {
1041 			goto back;
1042 		}
1043 	}
1044 
1045 	m->m_ext.ext_arg = jbuf;
1046 	m->m_ext.ext_buf = jbuf->re_buf;
1047 	m->m_ext.ext_free = re_jbuf_free;
1048 	m->m_ext.ext_ref = re_jbuf_ref;
1049 	m->m_ext.ext_size = sc->re_rxbuf_size;
1050 
1051 	m->m_data = m->m_ext.ext_buf;
1052 	m->m_flags |= M_EXT;
1053 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
1054 
1055 	/*
1056 	 * NOTE:
1057 	 * Some re(4) chips(e.g. RTL8101E) need address of the receive buffer
1058 	 * to be 8-byte aligned, so don't call m_adj(m, ETHER_ALIGN) here.
1059 	 */
1060 
1061 	sc->re_ldata.re_rx_mbuf[idx] = m;
1062 	sc->re_ldata.re_rx_paddr[idx] = jbuf->re_paddr;
1063 back:
1064 	re_setup_rxdesc(sc, idx);
1065 	return error;
1066 }
1067 #endif	/* RE_JUMBO */
1068 
1069 static int
1070 re_tx_list_init(struct re_softc *sc)
1071 {
1072 	bzero(sc->re_ldata.re_tx_list, RE_TX_LIST_SZ(sc));
1073 
1074 	sc->re_ldata.re_tx_prodidx = 0;
1075 	sc->re_ldata.re_tx_considx = 0;
1076 	sc->re_ldata.re_tx_free = sc->re_tx_desc_cnt;
1077 
1078 	return(0);
1079 }
1080 
1081 static int
1082 re_rx_list_init(struct re_softc *sc)
1083 {
1084 	int i, error;
1085 
1086 	bzero(sc->re_ldata.re_rx_list, RE_RX_LIST_SZ(sc));
1087 
1088 	for (i = 0; i < sc->re_rx_desc_cnt; i++) {
1089 		error = sc->re_newbuf(sc, i, 1);
1090 		if (error)
1091 			return(error);
1092 	}
1093 
1094 	sc->re_ldata.re_rx_prodidx = 0;
1095 	sc->re_head = sc->re_tail = NULL;
1096 
1097 	return(0);
1098 }
1099 
1100 #define RE_IP4_PACKET	0x1
1101 #define RE_TCP_PACKET	0x2
1102 #define RE_UDP_PACKET	0x4
1103 
1104 static __inline uint8_t
1105 re_packet_type(struct re_softc *sc, uint32_t rxstat, uint32_t rxctrl)
1106 {
1107 	uint8_t packet_type = 0;
1108 
1109 	if (sc->re_if_flags & RL_FLAG_DESCV2) {
1110 		if (rxctrl & RE_RDESC_CTL_PROTOIP4)
1111 			packet_type |= RE_IP4_PACKET;
1112 	} else {
1113 		if (rxstat & RE_RDESC_STAT_PROTOID)
1114 			packet_type |= RE_IP4_PACKET;
1115 	}
1116 	if (RE_TCPPKT(rxstat))
1117 		packet_type |= RE_TCP_PACKET;
1118 	else if (RE_UDPPKT(rxstat))
1119 		packet_type |= RE_UDP_PACKET;
1120 	return packet_type;
1121 }
1122 
1123 /*
1124  * RX handler for C+ and 8169. For the gigE chips, we support
1125  * the reception of jumbo frames that have been fragmented
1126  * across multiple 2K mbuf cluster buffers.
1127  */
1128 static int
1129 re_rxeof(struct re_softc *sc)
1130 {
1131 	struct ifnet *ifp = &sc->arpcom.ac_if;
1132 	struct mbuf *m;
1133 	struct re_desc 	*cur_rx;
1134 	uint32_t rxstat, rxctrl;
1135 	int i, total_len, rx = 0;
1136 
1137 	for (i = sc->re_ldata.re_rx_prodidx;
1138 	     RE_OWN(&sc->re_ldata.re_rx_list[i]) == 0; RE_RXDESC_INC(sc, i)) {
1139 		cur_rx = &sc->re_ldata.re_rx_list[i];
1140 		m = sc->re_ldata.re_rx_mbuf[i];
1141 		total_len = RE_RXBYTES(cur_rx);
1142 		rxstat = le32toh(cur_rx->re_cmdstat);
1143 		rxctrl = le32toh(cur_rx->re_control);
1144 
1145 		rx = 1;
1146 
1147 #ifdef INVARIANTS
1148 		if (sc->re_flags & RE_F_USE_JPOOL)
1149 			KKASSERT(rxstat & RE_RDESC_STAT_EOF);
1150 #endif
1151 
1152 		if ((rxstat & RE_RDESC_STAT_EOF) == 0) {
1153 			if (sc->re_flags & RE_F_DROP_RXFRAG) {
1154 				re_setup_rxdesc(sc, i);
1155 				continue;
1156 			}
1157 
1158 			if (sc->re_newbuf(sc, i, 0)) {
1159 				/* Drop upcoming fragments */
1160 				sc->re_flags |= RE_F_DROP_RXFRAG;
1161 				continue;
1162 			}
1163 
1164 			m->m_len = MCLBYTES;
1165 			if (sc->re_head == NULL) {
1166 				sc->re_head = sc->re_tail = m;
1167 			} else {
1168 				sc->re_tail->m_next = m;
1169 				sc->re_tail = m;
1170 			}
1171 			continue;
1172 		} else if (sc->re_flags & RE_F_DROP_RXFRAG) {
1173 			/*
1174 			 * Last fragment of a multi-fragment packet.
1175 			 *
1176 			 * Since error already happened, this fragment
1177 			 * must be dropped as well as the fragment chain.
1178 			 */
1179 			re_setup_rxdesc(sc, i);
1180 			re_free_rxchain(sc);
1181 			sc->re_flags &= ~RE_F_DROP_RXFRAG;
1182 			continue;
1183 		}
1184 
1185 		rxstat >>= 1;
1186 		if (rxstat & RE_RDESC_STAT_RXERRSUM) {
1187 			IFNET_STAT_INC(ifp, ierrors, 1);
1188 			/*
1189 			 * If this is part of a multi-fragment packet,
1190 			 * discard all the pieces.
1191 			 */
1192 			re_free_rxchain(sc);
1193 			re_setup_rxdesc(sc, i);
1194 			continue;
1195 		}
1196 
1197 		/*
1198 		 * If allocating a replacement mbuf fails,
1199 		 * reload the current one.
1200 		 */
1201 
1202 		if (sc->re_newbuf(sc, i, 0)) {
1203 			IFNET_STAT_INC(ifp, ierrors, 1);
1204 			continue;
1205 		}
1206 
1207 		if (sc->re_head != NULL) {
1208 			m->m_len = total_len % MCLBYTES;
1209 			/*
1210 			 * Special case: if there's 4 bytes or less
1211 			 * in this buffer, the mbuf can be discarded:
1212 			 * the last 4 bytes is the CRC, which we don't
1213 			 * care about anyway.
1214 			 */
1215 			if (m->m_len <= ETHER_CRC_LEN) {
1216 				sc->re_tail->m_len -=
1217 				    (ETHER_CRC_LEN - m->m_len);
1218 				m_freem(m);
1219 			} else {
1220 				m->m_len -= ETHER_CRC_LEN;
1221 				sc->re_tail->m_next = m;
1222 			}
1223 			m = sc->re_head;
1224 			sc->re_head = sc->re_tail = NULL;
1225 			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1226 		} else {
1227 			m->m_pkthdr.len = m->m_len =
1228 			    (total_len - ETHER_CRC_LEN);
1229 		}
1230 
1231 		IFNET_STAT_INC(ifp, ipackets, 1);
1232 		m->m_pkthdr.rcvif = ifp;
1233 
1234 		/* Do RX checksumming if enabled */
1235 
1236 		if (ifp->if_capenable & IFCAP_RXCSUM) {
1237 			uint8_t packet_type;
1238 
1239 			packet_type = re_packet_type(sc, rxstat, rxctrl);
1240 
1241 			/* Check IP header checksum */
1242 			if (packet_type & RE_IP4_PACKET) {
1243 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
1244 				if ((rxstat & RE_RDESC_STAT_IPSUMBAD) == 0)
1245 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1246 			}
1247 
1248 			/* Check TCP/UDP checksum */
1249 			if (((packet_type & RE_TCP_PACKET) &&
1250 			     (rxstat & RE_RDESC_STAT_TCPSUMBAD) == 0) ||
1251 			    ((packet_type & RE_UDP_PACKET) &&
1252 			     (rxstat & RE_RDESC_STAT_UDPSUMBAD) == 0)) {
1253 				m->m_pkthdr.csum_flags |=
1254 				    CSUM_DATA_VALID|CSUM_PSEUDO_HDR|
1255 				    CSUM_FRAG_NOT_CHECKED;
1256 				m->m_pkthdr.csum_data = 0xffff;
1257 			}
1258 		}
1259 
1260 		if (rxctrl & RE_RDESC_CTL_HASTAG) {
1261 			m->m_flags |= M_VLANTAG;
1262 			m->m_pkthdr.ether_vlantag =
1263 				be16toh((rxctrl & RE_RDESC_CTL_TAGDATA));
1264 		}
1265 		ifp->if_input(ifp, m, NULL, -1);
1266 	}
1267 
1268 	sc->re_ldata.re_rx_prodidx = i;
1269 
1270 	return rx;
1271 }
1272 
1273 #undef RE_IP4_PACKET
1274 #undef RE_TCP_PACKET
1275 #undef RE_UDP_PACKET
1276 
1277 static int
1278 re_tx_collect(struct re_softc *sc)
1279 {
1280 	struct ifnet *ifp = &sc->arpcom.ac_if;
1281 	uint32_t txstat;
1282 	int idx, tx = 0;
1283 
1284 	for (idx = sc->re_ldata.re_tx_considx;
1285 	     sc->re_ldata.re_tx_free < sc->re_tx_desc_cnt;
1286 	     RE_TXDESC_INC(sc, idx)) {
1287 		txstat = le32toh(sc->re_ldata.re_tx_list[idx].re_cmdstat);
1288 		if (txstat & RE_TDESC_CMD_OWN)
1289 			break;
1290 
1291 		tx = 1;
1292 
1293 		sc->re_ldata.re_tx_list[idx].re_bufaddr_lo = 0;
1294 
1295 		/*
1296 		 * We only stash mbufs in the last descriptor
1297 		 * in a fragment chain, which also happens to
1298 		 * be the only place where the TX status bits
1299 		 * are valid.
1300 		 *
1301 		 * NOTE:
1302 		 * On 8125, RE_TDESC_CMD_EOF is no longer left
1303 		 * uncleared.
1304 		 */
1305 		if (sc->re_ldata.re_tx_mbuf[idx] != NULL) {
1306 			bus_dmamap_unload(sc->re_ldata.re_tx_mtag,
1307 			    sc->re_ldata.re_tx_dmamap[idx]);
1308 			m_freem(sc->re_ldata.re_tx_mbuf[idx]);
1309 			sc->re_ldata.re_tx_mbuf[idx] = NULL;
1310 			if (txstat & (RE_TDESC_STAT_EXCESSCOL|
1311 			    RE_TDESC_STAT_COLCNT))
1312 				IFNET_STAT_INC(ifp, collisions, 1);
1313 			if (txstat & RE_TDESC_STAT_TXERRSUM)
1314 				IFNET_STAT_INC(ifp, oerrors, 1);
1315 			else
1316 				IFNET_STAT_INC(ifp, opackets, 1);
1317 		}
1318 		sc->re_ldata.re_tx_free++;
1319 	}
1320 	sc->re_ldata.re_tx_considx = idx;
1321 
1322 	return tx;
1323 }
1324 
1325 static int
1326 re_txeof(struct re_softc *sc)
1327 {
1328 	struct ifnet *ifp = &sc->arpcom.ac_if;
1329 	int tx;
1330 
1331 	tx = re_tx_collect(sc);
1332 
1333 	/* There is enough free TX descs */
1334 	if (sc->re_ldata.re_tx_free > RE_TXDESC_SPARE)
1335 		ifq_clr_oactive(&ifp->if_snd);
1336 
1337 	/*
1338 	 * Some chips will ignore a second TX request issued while an
1339 	 * existing transmission is in progress. If the transmitter goes
1340 	 * idle but there are still packets waiting to be sent, we need
1341 	 * to restart the channel here to flush them out. This only seems
1342 	 * to be required with the PCIe devices.
1343 	 */
1344 	if (sc->re_ldata.re_tx_free < sc->re_tx_desc_cnt)
1345 		sc->re_start_xmit(sc);
1346 	else
1347 		ifp->if_timer = 0;
1348 
1349 	return tx;
1350 }
1351 
1352 static void
1353 re_tick(void *xsc)
1354 {
1355 	struct re_softc *sc = xsc;
1356 
1357 	lwkt_serialize_enter(sc->arpcom.ac_if.if_serializer);
1358 	re_tick_serialized(xsc);
1359 	lwkt_serialize_exit(sc->arpcom.ac_if.if_serializer);
1360 }
1361 
1362 static void
1363 re_tick_serialized(void *xsc)
1364 {
1365 	struct re_softc *sc = xsc;
1366 	struct ifnet *ifp = &sc->arpcom.ac_if;
1367 
1368 	ASSERT_SERIALIZED(ifp->if_serializer);
1369 
1370 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1371 		return;
1372 
1373 	if (rtl_link_ok(sc)) {
1374 		if ((sc->re_flags & RE_F_LINKED) == 0)
1375 			re_link_up(sc);
1376 	} else if (sc->re_flags & RE_F_LINKED) {
1377 		re_link_down(sc);
1378 	}
1379 	callout_reset(&sc->re_timer, hz, re_tick, sc);
1380 }
1381 
1382 #ifdef IFPOLL_ENABLE
1383 
1384 static void
1385 re_npoll_compat(struct ifnet *ifp, void *arg __unused, int count)
1386 {
1387 	struct re_softc *sc = ifp->if_softc;
1388 
1389 	ASSERT_SERIALIZED(ifp->if_serializer);
1390 
1391 	if (sc->re_npoll.ifpc_stcount-- == 0) {
1392 		uint32_t status;
1393 
1394 		sc->re_npoll.ifpc_stcount = sc->re_npoll.ifpc_stfrac;
1395 
1396 		status = sc->re_read_isr(sc);
1397 		if (status)
1398 			sc->re_write_isr(sc, status);
1399 
1400 		/*
1401 		 * XXX check behaviour on receiver stalls.
1402 		 */
1403 
1404 		if (status & RE_ISR_SYSTEM_ERR) {
1405 			rtl_reset(sc);
1406 			re_init(sc);
1407 			/* Done! */
1408 			return;
1409 		}
1410 	}
1411 
1412 	sc->rxcycles = count;
1413 	re_rxeof(sc);
1414 	re_txeof(sc);
1415 
1416 	if (!ifq_is_empty(&ifp->if_snd))
1417 		if_devstart(ifp);
1418 }
1419 
1420 static void
1421 re_npoll(struct ifnet *ifp, struct ifpoll_info *info)
1422 {
1423 	struct re_softc *sc = ifp->if_softc;
1424 
1425 	ASSERT_SERIALIZED(ifp->if_serializer);
1426 
1427 	if (info != NULL) {
1428 		int cpuid = sc->re_npoll.ifpc_cpuid;
1429 
1430 		info->ifpi_rx[cpuid].poll_func = re_npoll_compat;
1431 		info->ifpi_rx[cpuid].arg = NULL;
1432 		info->ifpi_rx[cpuid].serializer = ifp->if_serializer;
1433 
1434 		if (ifp->if_flags & IFF_RUNNING)
1435 			re_setup_intr(sc, 0, RE_IMTYPE_NONE);
1436 		ifq_set_cpuid(&ifp->if_snd, cpuid);
1437 	} else {
1438 		if (ifp->if_flags & IFF_RUNNING)
1439 			re_setup_intr(sc, 1, sc->re_imtype);
1440 		ifq_set_cpuid(&ifp->if_snd, rman_get_cpuid(sc->re_irq));
1441 	}
1442 }
1443 #endif /* IFPOLL_ENABLE */
1444 
1445 static void
1446 re_intr(void *arg)
1447 {
1448 	struct re_softc	*sc = arg;
1449 	struct ifnet *ifp = &sc->arpcom.ac_if;
1450 	uint32_t status;
1451 	int proc;
1452 
1453 	ASSERT_SERIALIZED(ifp->if_serializer);
1454 
1455 	if ((sc->re_flags & RE_F_SUSPENDED) ||
1456 	    (ifp->if_flags & IFF_RUNNING) == 0)
1457 		return;
1458 
1459 	/* Disable interrupts. */
1460 	sc->re_write_imr(sc, 0);
1461 
1462 	status = sc->re_read_isr(sc);
1463 again:
1464 	proc = 0;
1465 	if (status)
1466 		sc->re_write_isr(sc, status);
1467 	if (status & sc->re_intrs) {
1468 		if (status & RE_ISR_SYSTEM_ERR) {
1469 			rtl_reset(sc);
1470 			re_init(sc);
1471 			/* Done! */
1472 			return;
1473 		}
1474 		proc |= re_rxeof(sc);
1475 		proc |= re_txeof(sc);
1476 	}
1477 
1478 	if (sc->re_imtype == RE_IMTYPE_SIM) {
1479 		if ((sc->re_flags & RE_F_TIMER_INTR)) {
1480 			if (!proc) {
1481 				/*
1482 				 * Nothing needs to be processed, fallback
1483 				 * to use TX/RX interrupts.
1484 				 *
1485 				 * NOTE: This will re-enable interrupts.
1486 				 */
1487 				re_setup_intr(sc, 1, RE_IMTYPE_NONE);
1488 
1489 				/*
1490 				 * Recollect, mainly to avoid the possible
1491 				 * race introduced by changing interrupt
1492 				 * masks.
1493 				 */
1494 				re_rxeof(sc);
1495 				re_txeof(sc);
1496 			} else {
1497 				/* Re-enable interrupts. */
1498 				sc->re_write_imr(sc, sc->re_intrs);
1499 				CSR_WRITE_4(sc, RE_TIMERCNT, 1); /* reload */
1500 			}
1501 		} else if (proc) {
1502 			/*
1503 			 * Assume that using simulated interrupt moderation
1504 			 * (hardware timer based) could reduce the interript
1505 			 * rate.
1506 			 *
1507 			 * NOTE: This will re-enable interrupts.
1508 			 */
1509 			re_setup_intr(sc, 1, RE_IMTYPE_SIM);
1510 		} else {
1511 			/* Re-enable interrupts. */
1512 			sc->re_write_imr(sc, sc->re_intrs);
1513 		}
1514 	} else {
1515 		status = sc->re_read_isr(sc);
1516 		if (status & sc->re_intrs) {
1517 			if (!ifq_is_empty(&ifp->if_snd))
1518 				if_devstart(ifp);
1519 			/* NOTE: Interrupts are still disabled. */
1520 			goto again;
1521 		}
1522 		/* Re-enable interrupts. */
1523 		sc->re_write_imr(sc, sc->re_intrs);
1524 	}
1525 
1526 	if (!ifq_is_empty(&ifp->if_snd))
1527 		if_devstart(ifp);
1528 }
1529 
1530 static int
1531 re_encap(struct re_softc *sc, struct mbuf **m_head, int *idx0)
1532 {
1533 	struct mbuf *m = *m_head;
1534 	bus_dma_segment_t segs[RE_MAXSEGS];
1535 	bus_dmamap_t map;
1536 	int error, maxsegs, idx, i, nsegs;
1537 	struct re_desc *d, *tx_ring;
1538 	uint32_t cmd_csum, ctl_csum, vlantag;
1539 
1540 	KASSERT(sc->re_ldata.re_tx_free > RE_TXDESC_SPARE,
1541 		("not enough free TX desc"));
1542 
1543 	if (sc->re_coalesce_tx_pkt && m->m_pkthdr.len != m->m_len) {
1544 		struct mbuf *m_new;
1545 
1546 		m_new = m_defrag(m, M_NOWAIT);
1547 		if (m_new == NULL) {
1548 			error = ENOBUFS;
1549 			goto back;
1550 		} else {
1551 			*m_head = m = m_new;
1552 			if (m->m_pkthdr.len != m->m_len) {
1553 				/* Still not configuous; give up. */
1554 				error = ENOBUFS;
1555 				goto back;
1556 			}
1557 		}
1558 	}
1559 
1560 	map = sc->re_ldata.re_tx_dmamap[*idx0];
1561 
1562 	/*
1563 	 * Set up checksum offload. Note: checksum offload bits must
1564 	 * appear in all descriptors of a multi-descriptor transmit
1565 	 * attempt. (This is according to testing done with an 8169
1566 	 * chip. I'm not sure if this is a requirement or a bug.)
1567 	 */
1568 	cmd_csum = ctl_csum = 0;
1569 	if (m->m_pkthdr.csum_flags & CSUM_IP) {
1570 		cmd_csum |= RE_TDESC_CMD_IPCSUM;
1571 		ctl_csum |= RE_TDESC_CTL_IPCSUM;
1572 	}
1573 	if (m->m_pkthdr.csum_flags & CSUM_TCP) {
1574 		cmd_csum |= RE_TDESC_CMD_TCPCSUM;
1575 		ctl_csum |= RE_TDESC_CTL_TCPCSUM;
1576 	}
1577 	if (m->m_pkthdr.csum_flags & CSUM_UDP) {
1578 		cmd_csum |= RE_TDESC_CMD_UDPCSUM;
1579 		ctl_csum |= RE_TDESC_CTL_UDPCSUM;
1580 	}
1581 
1582 	/* For version2 descriptor, csum flags are set on re_control */
1583 	if (sc->re_if_flags & RL_FLAG_DESCV2)
1584 		cmd_csum = 0;
1585 	else
1586 		ctl_csum = 0;
1587 
1588 	if (sc->re_pad_runt) {
1589 		/*
1590 		 * With some of the RealTek chips, using the checksum offload
1591 		 * support in conjunction with the autopadding feature results
1592 		 * in the transmission of corrupt frames. For example, if we
1593 		 * need to send a really small IP fragment that's less than 60
1594 		 * bytes in size, and IP header checksumming is enabled, the
1595 		 * resulting ethernet frame that appears on the wire will
1596 		 * have garbled payload. To work around this, if TX checksum
1597 		 * offload is enabled, we always manually pad short frames out
1598 		 * to the minimum ethernet frame size.
1599 		 *
1600 		 * Note: this appears unnecessary for TCP, and doing it for TCP
1601 		 * with PCIe adapters seems to result in bad checksums.
1602 		 */
1603 		if ((m->m_pkthdr.csum_flags &
1604 		     (CSUM_DELAY_IP | CSUM_DELAY_DATA)) &&
1605 		    (m->m_pkthdr.csum_flags & CSUM_TCP) == 0 &&
1606 		    m->m_pkthdr.len < RE_MIN_FRAMELEN) {
1607 			error = m_devpad(m, RE_MIN_FRAMELEN);
1608 			if (error)
1609 				goto back;
1610 		}
1611 	}
1612 
1613 	vlantag = 0;
1614 	if (m->m_flags & M_VLANTAG) {
1615 		vlantag = htobe16(m->m_pkthdr.ether_vlantag) |
1616 			  RE_TDESC_CTL_INSTAG;
1617 	}
1618 
1619 	maxsegs = sc->re_ldata.re_tx_free;
1620 	if (maxsegs > RE_MAXSEGS)
1621 		maxsegs = RE_MAXSEGS;
1622 
1623 	error = bus_dmamap_load_mbuf_defrag(sc->re_ldata.re_tx_mtag, map,
1624 			m_head, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1625 	if (error)
1626 		goto back;
1627 
1628 	m = *m_head;
1629 	bus_dmamap_sync(sc->re_ldata.re_tx_mtag, map, BUS_DMASYNC_PREWRITE);
1630 
1631 	/*
1632 	 * Map the segment array into descriptors.  We also keep track
1633 	 * of the end of the ring and set the end-of-ring bits as needed,
1634 	 * and we set the ownership bits in all except the very first
1635 	 * descriptor, whose ownership bits will be turned on later.
1636 	 */
1637 	tx_ring = sc->re_ldata.re_tx_list;
1638 	idx = *idx0;
1639 	i = 0;
1640 	for (;;) {
1641 		uint32_t cmdstat;
1642 
1643 		d = &tx_ring[idx];
1644 
1645 		KKASSERT(sc->re_ldata.re_tx_mbuf[idx] == NULL);
1646 
1647 		d->re_bufaddr_lo = htole32(RE_ADDR_LO(segs[i].ds_addr));
1648 		d->re_bufaddr_hi = htole32(RE_ADDR_HI(segs[i].ds_addr));
1649 
1650 		cmdstat = segs[i].ds_len;
1651 		if (i == 0) {
1652 			cmdstat |= RE_TDESC_CMD_SOF;
1653 		} else if (i != nsegs - 1) {
1654 			/*
1655 			 * Last descriptor's ownership will be transfered
1656 			 * later.
1657 			 */
1658 			cmdstat |= RE_TDESC_CMD_OWN;
1659 		}
1660 		if (idx == (sc->re_tx_desc_cnt - 1))
1661 			cmdstat |= RE_TDESC_CMD_EOR;
1662 
1663 		d->re_control = htole32(ctl_csum | vlantag);
1664 		d->re_cmdstat = htole32(cmdstat | cmd_csum);
1665 
1666 		i++;
1667 		if (i == nsegs)
1668 			break;
1669 		RE_TXDESC_INC(sc, idx);
1670 	}
1671 	d->re_cmdstat |= htole32(RE_TDESC_CMD_EOF);
1672 
1673 	/* Transfer ownership of packet to the chip. */
1674 	d->re_cmdstat |= htole32(RE_TDESC_CMD_OWN);
1675 	if (*idx0 != idx)
1676 		tx_ring[*idx0].re_cmdstat |= htole32(RE_TDESC_CMD_OWN);
1677 
1678 	/*
1679 	 * Insure that the map for this transmission
1680 	 * is placed at the array index of the last descriptor
1681 	 * in this chain.
1682 	 */
1683 	sc->re_ldata.re_tx_dmamap[*idx0] = sc->re_ldata.re_tx_dmamap[idx];
1684 	sc->re_ldata.re_tx_dmamap[idx] = map;
1685 
1686 	sc->re_ldata.re_tx_mbuf[idx] = m;
1687 	sc->re_ldata.re_tx_free -= nsegs;
1688 
1689 	RE_TXDESC_INC(sc, idx);
1690 	*idx0 = idx;
1691 back:
1692 	if (error) {
1693 		m_freem(*m_head);
1694 		*m_head = NULL;
1695 	}
1696 	return error;
1697 }
1698 
1699 /*
1700  * Main transmit routine for C+ and gigE NICs.
1701  */
1702 
1703 static void
1704 re_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1705 {
1706 	struct re_softc	*sc = ifp->if_softc;
1707 	struct mbuf *m_head;
1708 	int idx, need_trans, oactive, error;
1709 
1710 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1711 	ASSERT_SERIALIZED(ifp->if_serializer);
1712 
1713 	if ((sc->re_flags & RE_F_LINKED) == 0) {
1714 		ifq_purge(&ifp->if_snd);
1715 		return;
1716 	}
1717 
1718 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifq_is_oactive(&ifp->if_snd))
1719 		return;
1720 
1721 	idx = sc->re_ldata.re_tx_prodidx;
1722 
1723 	need_trans = 0;
1724 	oactive = 0;
1725 	for (;;) {
1726 		if (sc->re_ldata.re_tx_free <= RE_TXDESC_SPARE) {
1727 			if (!oactive) {
1728 				if (re_tx_collect(sc)) {
1729 					oactive = 1;
1730 					continue;
1731 				}
1732 			}
1733 			ifq_set_oactive(&ifp->if_snd);
1734 			break;
1735 		}
1736 
1737 		m_head = ifq_dequeue(&ifp->if_snd);
1738 		if (m_head == NULL)
1739 			break;
1740 
1741 		error = re_encap(sc, &m_head, &idx);
1742 		if (error) {
1743 			/* m_head is freed by re_encap(), if we reach here */
1744 			IFNET_STAT_INC(ifp, oerrors, 1);
1745 
1746 			if (error == EFBIG && !oactive) {
1747 				if (re_tx_collect(sc)) {
1748 					oactive = 1;
1749 					continue;
1750 				}
1751 			}
1752 			ifq_set_oactive(&ifp->if_snd);
1753 			break;
1754 		}
1755 
1756 		oactive = 0;
1757 		need_trans = 1;
1758 
1759 		/*
1760 		 * If there's a BPF listener, bounce a copy of this frame
1761 		 * to him.
1762 		 */
1763 		ETHER_BPF_MTAP(ifp, m_head);
1764 	}
1765 
1766 	if (!need_trans)
1767 		return;
1768 
1769 	sc->re_ldata.re_tx_prodidx = idx;
1770 
1771 	/*
1772 	 * RealTek put the TX poll request register in a different
1773 	 * location on the 8169 gigE chip. I don't know why.
1774 	 */
1775 	sc->re_start_xmit(sc);
1776 
1777 	/*
1778 	 * Set a timeout in case the chip goes out to lunch.
1779 	 */
1780 	ifp->if_timer = 5;
1781 }
1782 
1783 static void
1784 re_link_up(struct re_softc *sc)
1785 {
1786 	struct ifnet *ifp = &sc->arpcom.ac_if;
1787 	int error;
1788 
1789 	ASSERT_SERIALIZED(ifp->if_serializer);
1790 
1791 	rtl_link_on_patch(sc);
1792 	re_stop(sc, FALSE);
1793 	rtl_set_eaddr(sc);
1794 
1795 	error = re_rx_list_init(sc);
1796 	if (error) {
1797 		re_stop(sc, TRUE);
1798 		return;
1799 	}
1800 	error = re_tx_list_init(sc);
1801 	if (error) {
1802 		re_stop(sc, TRUE);
1803 		return;
1804 	}
1805 
1806 	/*
1807 	 * Load the addresses of the RX and TX lists into the chip.
1808 	 */
1809 	CSR_WRITE_4(sc, RE_RXLIST_ADDR_HI,
1810 	    RE_ADDR_HI(sc->re_ldata.re_rx_list_addr));
1811 	CSR_WRITE_4(sc, RE_RXLIST_ADDR_LO,
1812 	    RE_ADDR_LO(sc->re_ldata.re_rx_list_addr));
1813 
1814 	CSR_WRITE_4(sc, RE_TXLIST_ADDR_HI,
1815 	    RE_ADDR_HI(sc->re_ldata.re_tx_list_addr));
1816 	CSR_WRITE_4(sc, RE_TXLIST_ADDR_LO,
1817 	    RE_ADDR_LO(sc->re_ldata.re_tx_list_addr));
1818 
1819 	rtl_hw_start(sc);
1820 
1821 #ifdef IFPOLL_ENABLE
1822 	/*
1823 	 * Disable interrupts if we are polling.
1824 	 */
1825 	if (ifp->if_flags & IFF_NPOLLING)
1826 		re_setup_intr(sc, 0, RE_IMTYPE_NONE);
1827 	else	/* otherwise ... */
1828 #endif /* IFPOLL_ENABLE */
1829 	/*
1830 	 * Enable interrupts.
1831 	 */
1832 	re_setup_intr(sc, 1, sc->re_imtype);
1833 	sc->re_write_isr(sc, sc->re_intrs);
1834 
1835 	sc->re_flags |= RE_F_LINKED;
1836 	ifp->if_link_state = LINK_STATE_UP;
1837 	if_link_state_change(ifp);
1838 
1839 	if (bootverbose)
1840 		if_printf(ifp, "link UP\n");
1841 
1842 	if (!ifq_is_empty(&ifp->if_snd))
1843 		if_devstart(ifp);
1844 }
1845 
1846 static void
1847 re_link_down(struct re_softc *sc)
1848 {
1849 	struct ifnet *ifp = &sc->arpcom.ac_if;
1850 
1851 	/* NOTE: re_stop() will reset RE_F_LINKED. */
1852 	ifp->if_link_state = LINK_STATE_DOWN;
1853 	if_link_state_change(ifp);
1854 
1855 	re_stop(sc, FALSE);
1856 	rtl_ifmedia_upd(ifp);
1857 
1858 	if (bootverbose)
1859 		if_printf(ifp, "link DOWN\n");
1860 }
1861 
1862 static void
1863 re_init(void *xsc)
1864 {
1865 	struct re_softc *sc = xsc;
1866 	struct ifnet *ifp = &sc->arpcom.ac_if;
1867 
1868 	ASSERT_SERIALIZED(ifp->if_serializer);
1869 
1870 	re_stop(sc, TRUE);
1871 	if (rtl_link_ok(sc)) {
1872 		if (bootverbose)
1873 			if_printf(ifp, "link is UP in if_init\n");
1874 		re_link_up(sc);
1875 	}
1876 
1877 	ifp->if_flags |= IFF_RUNNING;
1878 	ifq_clr_oactive(&ifp->if_snd);
1879 
1880 	callout_reset(&sc->re_timer, hz, re_tick, sc);
1881 }
1882 
1883 static int
1884 re_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
1885 {
1886 	struct re_softc *sc = ifp->if_softc;
1887 	struct ifreq *ifr = (struct ifreq *)data;
1888 	int error = 0, mask;
1889 
1890 	ASSERT_SERIALIZED(ifp->if_serializer);
1891 
1892 	switch(command) {
1893 	case SIOCSIFMTU:
1894 #ifdef RE_JUMBO
1895 		if (ifr->ifr_mtu > sc->re_maxmtu) {
1896 			error = EINVAL;
1897 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
1898 			ifp->if_mtu = ifr->ifr_mtu;
1899 			if (ifp->if_flags & IFF_RUNNING)
1900 				ifp->if_init(sc);
1901 		}
1902 #else
1903 		error = EOPNOTSUPP;
1904 #endif
1905 		break;
1906 
1907 	case SIOCSIFFLAGS:
1908 		if (ifp->if_flags & IFF_UP) {
1909 			if (ifp->if_flags & IFF_RUNNING) {
1910 				if ((ifp->if_flags ^ sc->re_saved_ifflags) &
1911 				    (IFF_PROMISC | IFF_ALLMULTI))
1912 					rtl_set_rx_packet_filter(sc);
1913 			} else {
1914 				re_init(sc);
1915 			}
1916 		} else if (ifp->if_flags & IFF_RUNNING) {
1917 			re_stop(sc, TRUE);
1918 		}
1919 		sc->re_saved_ifflags = ifp->if_flags;
1920 		break;
1921 
1922 	case SIOCADDMULTI:
1923 	case SIOCDELMULTI:
1924 		rtl_set_rx_packet_filter(sc);
1925 		break;
1926 
1927 	case SIOCGIFMEDIA:
1928 	case SIOCGIFXMEDIA:
1929 	case SIOCSIFMEDIA:
1930 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
1931 		break;
1932 
1933 	case SIOCSIFCAP:
1934 		mask = (ifr->ifr_reqcap ^ ifp->if_capenable) &
1935 		       ifp->if_capabilities;
1936 		ifp->if_capenable ^= mask;
1937 
1938 		/* NOTE: re_init will setup if_hwassist. */
1939 		ifp->if_hwassist = 0;
1940 
1941 		/* Setup flags for the backend. */
1942 		if (ifp->if_capenable & IFCAP_RXCSUM)
1943 			sc->re_rx_cstag = 1;
1944 		else
1945 			sc->re_rx_cstag = 0;
1946 		if (ifp->if_capenable & IFCAP_TXCSUM)
1947 			sc->re_tx_cstag = 1;
1948 		else
1949 			sc->re_tx_cstag = 0;
1950 
1951 		if (mask && (ifp->if_flags & IFF_RUNNING))
1952 			re_init(sc);
1953 		break;
1954 
1955 	default:
1956 		error = ether_ioctl(ifp, command, data);
1957 		break;
1958 	}
1959 	return(error);
1960 }
1961 
1962 static void
1963 re_watchdog(struct ifnet *ifp)
1964 {
1965 	struct re_softc *sc = ifp->if_softc;
1966 
1967 	ASSERT_SERIALIZED(ifp->if_serializer);
1968 
1969 	IFNET_STAT_INC(ifp, oerrors, 1);
1970 
1971 	re_txeof(sc);
1972 	re_rxeof(sc);
1973 
1974 	if (sc->re_ldata.re_tx_free != sc->re_tx_desc_cnt) {
1975 		if_printf(ifp, "watchdog timeout, txd free %d\n",
1976 		    sc->re_ldata.re_tx_free);
1977 		rtl_reset(sc);
1978 		re_init(sc);
1979 	}
1980 }
1981 
1982 /*
1983  * Stop the adapter and free any mbufs allocated to the
1984  * RX and TX lists.
1985  */
1986 static void
1987 re_stop(struct re_softc *sc, boolean_t full_stop)
1988 {
1989 	struct ifnet *ifp = &sc->arpcom.ac_if;
1990 	int i;
1991 
1992 	ASSERT_SERIALIZED(ifp->if_serializer);
1993 
1994 	/* Stop the adapter. */
1995 	rtl_stop(sc);
1996 
1997 	ifp->if_timer = 0;
1998 	if (full_stop) {
1999 		callout_stop(&sc->re_timer);
2000 		ifp->if_flags &= ~IFF_RUNNING;
2001 	}
2002 	ifq_clr_oactive(&ifp->if_snd);
2003 	sc->re_flags &= ~(RE_F_TIMER_INTR | RE_F_DROP_RXFRAG | RE_F_LINKED);
2004 
2005 	re_free_rxchain(sc);
2006 
2007 	/* Free the TX list buffers. */
2008 	for (i = 0; i < sc->re_tx_desc_cnt; i++) {
2009 		if (sc->re_ldata.re_tx_mbuf[i] != NULL) {
2010 			bus_dmamap_unload(sc->re_ldata.re_tx_mtag,
2011 					  sc->re_ldata.re_tx_dmamap[i]);
2012 			m_freem(sc->re_ldata.re_tx_mbuf[i]);
2013 			sc->re_ldata.re_tx_mbuf[i] = NULL;
2014 		}
2015 	}
2016 
2017 	/* Free the RX list buffers. */
2018 	for (i = 0; i < sc->re_rx_desc_cnt; i++) {
2019 		if (sc->re_ldata.re_rx_mbuf[i] != NULL) {
2020 			if ((sc->re_flags & RE_F_USE_JPOOL) == 0) {
2021 				bus_dmamap_unload(sc->re_ldata.re_rx_mtag,
2022 						  sc->re_ldata.re_rx_dmamap[i]);
2023 			}
2024 			m_freem(sc->re_ldata.re_rx_mbuf[i]);
2025 			sc->re_ldata.re_rx_mbuf[i] = NULL;
2026 		}
2027 	}
2028 }
2029 
2030 /*
2031  * Device suspend routine.  Stop the interface and save some PCI
2032  * settings in case the BIOS doesn't restore them properly on
2033  * resume.
2034  */
2035 static int
2036 re_suspend(device_t dev)
2037 {
2038 #ifndef BURN_BRIDGES
2039 	int i;
2040 #endif
2041 	struct re_softc *sc = device_get_softc(dev);
2042 	struct ifnet *ifp = &sc->arpcom.ac_if;
2043 
2044 	lwkt_serialize_enter(ifp->if_serializer);
2045 
2046 	re_stop(sc, TRUE);
2047 
2048 #ifndef BURN_BRIDGES
2049 	for (i = 0; i < 5; i++)
2050 		sc->saved_maps[i] = pci_read_config(dev, PCIR_MAPS + i * 4, 4);
2051 	sc->saved_biosaddr = pci_read_config(dev, PCIR_BIOS, 4);
2052 	sc->saved_intline = pci_read_config(dev, PCIR_INTLINE, 1);
2053 	sc->saved_cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
2054 	sc->saved_lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
2055 #endif
2056 
2057 	sc->re_flags |= RE_F_SUSPENDED;
2058 
2059 	lwkt_serialize_exit(ifp->if_serializer);
2060 
2061 	return (0);
2062 }
2063 
2064 /*
2065  * Device resume routine.  Restore some PCI settings in case the BIOS
2066  * doesn't, re-enable busmastering, and restart the interface if
2067  * appropriate.
2068  */
2069 static int
2070 re_resume(device_t dev)
2071 {
2072 	struct re_softc *sc = device_get_softc(dev);
2073 	struct ifnet *ifp = &sc->arpcom.ac_if;
2074 #ifndef BURN_BRIDGES
2075 	int i;
2076 #endif
2077 
2078 	lwkt_serialize_enter(ifp->if_serializer);
2079 
2080 #ifndef BURN_BRIDGES
2081 	/* better way to do this? */
2082 	for (i = 0; i < 5; i++)
2083 		pci_write_config(dev, PCIR_MAPS + i * 4, sc->saved_maps[i], 4);
2084 	pci_write_config(dev, PCIR_BIOS, sc->saved_biosaddr, 4);
2085 	pci_write_config(dev, PCIR_INTLINE, sc->saved_intline, 1);
2086 	pci_write_config(dev, PCIR_CACHELNSZ, sc->saved_cachelnsz, 1);
2087 	pci_write_config(dev, PCIR_LATTIMER, sc->saved_lattimer, 1);
2088 
2089 	/* reenable busmastering */
2090 	pci_enable_busmaster(dev);
2091 	pci_enable_io(dev, SYS_RES_IOPORT);
2092 #endif
2093 
2094 	/* reinitialize interface if necessary */
2095 	if (ifp->if_flags & IFF_UP)
2096 		re_init(sc);
2097 
2098 	sc->re_flags &= ~RE_F_SUSPENDED;
2099 
2100 	lwkt_serialize_exit(ifp->if_serializer);
2101 
2102 	return (0);
2103 }
2104 
2105 /*
2106  * Stop all chip I/O so that the kernel's probe routines don't
2107  * get confused by errant DMAs when rebooting.
2108  */
2109 static void
2110 re_shutdown(device_t dev)
2111 {
2112 	struct re_softc *sc = device_get_softc(dev);
2113 	struct ifnet *ifp = &sc->arpcom.ac_if;
2114 
2115 	lwkt_serialize_enter(ifp->if_serializer);
2116 	re_stop(sc, TRUE);
2117 	rtl_hw_d3_para(sc);
2118 	rtl_phy_power_down(sc);
2119 	lwkt_serialize_exit(ifp->if_serializer);
2120 }
2121 
2122 static int
2123 re_sysctl_rxtime(SYSCTL_HANDLER_ARGS)
2124 {
2125 	struct re_softc *sc = arg1;
2126 
2127 	return re_sysctl_hwtime(oidp, arg1, arg2, req, &sc->re_rx_time);
2128 }
2129 
2130 static int
2131 re_sysctl_txtime(SYSCTL_HANDLER_ARGS)
2132 {
2133 	struct re_softc *sc = arg1;
2134 
2135 	return re_sysctl_hwtime(oidp, arg1, arg2, req, &sc->re_tx_time);
2136 }
2137 
2138 static int
2139 re_sysctl_hwtime(SYSCTL_HANDLER_ARGS, int *hwtime)
2140 {
2141 	struct re_softc *sc = arg1;
2142 	struct ifnet *ifp = &sc->arpcom.ac_if;
2143 	int error, v;
2144 
2145 	lwkt_serialize_enter(ifp->if_serializer);
2146 
2147 	v = *hwtime;
2148 	error = sysctl_handle_int(oidp, &v, 0, req);
2149 	if (error || req->newptr == NULL)
2150 		goto back;
2151 
2152 	if (v <= 0) {
2153 		error = EINVAL;
2154 		goto back;
2155 	}
2156 
2157 	if (v != *hwtime) {
2158 		*hwtime = v;
2159 
2160 		if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
2161 		    IFF_RUNNING && sc->re_imtype == RE_IMTYPE_HW)
2162 			re_setup_hw_im(sc);
2163 	}
2164 back:
2165 	lwkt_serialize_exit(ifp->if_serializer);
2166 	return error;
2167 }
2168 
2169 static int
2170 re_sysctl_simtime(SYSCTL_HANDLER_ARGS)
2171 {
2172 	struct re_softc *sc = arg1;
2173 	struct ifnet *ifp = &sc->arpcom.ac_if;
2174 	int error, v;
2175 
2176 	lwkt_serialize_enter(ifp->if_serializer);
2177 
2178 	v = sc->re_sim_time;
2179 	error = sysctl_handle_int(oidp, &v, 0, req);
2180 	if (error || req->newptr == NULL)
2181 		goto back;
2182 
2183 	if (v <= 0) {
2184 		error = EINVAL;
2185 		goto back;
2186 	}
2187 
2188 	if (v != sc->re_sim_time) {
2189 		sc->re_sim_time = v;
2190 
2191 		if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
2192 		    IFF_RUNNING && sc->re_imtype == RE_IMTYPE_SIM) {
2193 #ifdef foo
2194 			/*
2195 			 * Following code causes various strange
2196 			 * performance problems.  Hmm ...
2197 			 */
2198 			sc->re_write_imr(sc, 0);
2199 			CSR_WRITE_4(sc, RE_TIMERINT, 0);
2200 			CSR_READ_4(sc, RE_TIMERINT); /* flush */
2201 
2202 			sc->re_write_imr(sc, sc->re_intrs);
2203 			re_setup_sim_im(sc);
2204 #else
2205 			re_setup_intr(sc, 0, RE_IMTYPE_NONE);
2206 			DELAY(10);
2207 			re_setup_intr(sc, 1, RE_IMTYPE_SIM);
2208 #endif
2209 		}
2210 	}
2211 back:
2212 	lwkt_serialize_exit(ifp->if_serializer);
2213 	return error;
2214 }
2215 
2216 static int
2217 re_sysctl_imtype(SYSCTL_HANDLER_ARGS)
2218 {
2219 	struct re_softc *sc = arg1;
2220 	struct ifnet *ifp = &sc->arpcom.ac_if;
2221 	int error, v;
2222 
2223 	lwkt_serialize_enter(ifp->if_serializer);
2224 
2225 	v = sc->re_imtype;
2226 	error = sysctl_handle_int(oidp, &v, 0, req);
2227 	if (error || req->newptr == NULL)
2228 		goto back;
2229 
2230 	if (v != RE_IMTYPE_HW && v != RE_IMTYPE_SIM && v != RE_IMTYPE_NONE) {
2231 		error = EINVAL;
2232 		goto back;
2233 	}
2234 	if (v == RE_IMTYPE_HW && (sc->re_caps & RE_C_HWIM) == 0) {
2235 		/* Can't do hardware interrupt moderation */
2236 		error = EOPNOTSUPP;
2237 		goto back;
2238 	}
2239 
2240 	if (v != sc->re_imtype) {
2241 		sc->re_imtype = v;
2242 		if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) ==
2243 		    IFF_RUNNING)
2244 			re_setup_intr(sc, 1, sc->re_imtype);
2245 	}
2246 back:
2247 	lwkt_serialize_exit(ifp->if_serializer);
2248 	return error;
2249 }
2250 
2251 static void
2252 re_setup_hw_im(struct re_softc *sc)
2253 {
2254 	KKASSERT(sc->re_caps & RE_C_HWIM);
2255 
2256 	/*
2257 	 * Interrupt moderation
2258 	 *
2259 	 * 0xABCD
2260 	 * A - unknown (maybe TX related)
2261 	 * B - TX timer (unit: 25us)
2262 	 * C - unknown (maybe RX related)
2263 	 * D - RX timer (unit: 25us)
2264 	 *
2265 	 *
2266 	 * re(4)'s interrupt moderation is actually controlled by
2267 	 * two variables, like most other NICs (bge, bce etc.)
2268 	 * o  timer
2269 	 * o  number of packets [P]
2270 	 *
2271 	 * The logic relationship between these two variables is
2272 	 * similar to other NICs too:
2273 	 * if (timer expire || packets > [P])
2274 	 *     Interrupt is delivered
2275 	 *
2276 	 * Currently we only know how to set 'timer', but not
2277 	 * 'number of packets', which should be ~30, as far as I
2278 	 * tested (sink ~900Kpps, interrupt rate is 30KHz)
2279 	 */
2280 	CSR_WRITE_2(sc, RE_IM,
2281 		    RE_IM_RXTIME(sc->re_rx_time) |
2282 		    RE_IM_TXTIME(sc->re_tx_time) |
2283 		    RE_IM_MAGIC);
2284 }
2285 
2286 static void
2287 re_disable_hw_im(struct re_softc *sc)
2288 {
2289 	if (sc->re_caps & RE_C_HWIM)
2290 		CSR_WRITE_2(sc, RE_IM, 0);
2291 }
2292 
2293 static void
2294 re_setup_sim_im(struct re_softc *sc)
2295 {
2296 	uint32_t ticks;
2297 
2298 	if (sc->re_if_flags & RL_FLAG_PCIE) {
2299 		ticks = sc->re_sim_time * sc->re_bus_speed;
2300 	} else {
2301 		/*
2302 		 * Datasheet says tick decreases at bus speed,
2303 		 * but it seems the clock runs a little bit
2304 		 * faster, so we do some compensation here.
2305 		 */
2306 		ticks = (sc->re_sim_time * sc->re_bus_speed * 8) / 5;
2307 	}
2308 	CSR_WRITE_4(sc, RE_TIMERINT, ticks);
2309 
2310 	CSR_WRITE_4(sc, RE_TIMERCNT, 1); /* reload */
2311 	sc->re_flags |= RE_F_TIMER_INTR;
2312 }
2313 
2314 static void
2315 re_disable_sim_im(struct re_softc *sc)
2316 {
2317 	CSR_WRITE_4(sc, RE_TIMERINT, 0);
2318 	sc->re_flags &= ~RE_F_TIMER_INTR;
2319 }
2320 
2321 static void
2322 re_config_imtype(struct re_softc *sc, int imtype)
2323 {
2324 	switch (imtype) {
2325 	case RE_IMTYPE_HW:
2326 		KKASSERT(sc->re_caps & RE_C_HWIM);
2327 		/* FALL THROUGH */
2328 	case RE_IMTYPE_NONE:
2329 		sc->re_intrs = RE_INTRS;
2330 		sc->re_rx_ack = RE_ISR_RX_OK | RE_ISR_FIFO_OFLOW |
2331 				RE_ISR_RX_OVERRUN;
2332 		sc->re_tx_ack = RE_ISR_TX_OK;
2333 		break;
2334 
2335 	case RE_IMTYPE_SIM:
2336 		sc->re_intrs = RE_INTRS_TIMER;
2337 		sc->re_rx_ack = RE_ISR_PCS_TIMEOUT;
2338 		sc->re_tx_ack = RE_ISR_PCS_TIMEOUT;
2339 		break;
2340 
2341 	default:
2342 		panic("%s: unknown imtype %d",
2343 		      sc->arpcom.ac_if.if_xname, imtype);
2344 	}
2345 }
2346 
2347 static void
2348 re_setup_intr(struct re_softc *sc, int enable_intrs, int imtype)
2349 {
2350 	re_config_imtype(sc, imtype);
2351 
2352 	if (enable_intrs)
2353 		sc->re_write_imr(sc, sc->re_intrs);
2354 	else
2355 		sc->re_write_imr(sc, 0);
2356 
2357 	sc->re_npoll.ifpc_stcount = 0;
2358 
2359 	switch (imtype) {
2360 	case RE_IMTYPE_NONE:
2361 		re_disable_sim_im(sc);
2362 		re_disable_hw_im(sc);
2363 		break;
2364 
2365 	case RE_IMTYPE_HW:
2366 		KKASSERT(sc->re_caps & RE_C_HWIM);
2367 		re_disable_sim_im(sc);
2368 		re_setup_hw_im(sc);
2369 		break;
2370 
2371 	case RE_IMTYPE_SIM:
2372 		re_disable_hw_im(sc);
2373 		re_setup_sim_im(sc);
2374 		break;
2375 
2376 	default:
2377 		panic("%s: unknown imtype %d",
2378 		      sc->arpcom.ac_if.if_xname, imtype);
2379 	}
2380 }
2381 
2382 static int
2383 re_jpool_alloc(struct re_softc *sc)
2384 {
2385 	struct re_list_data *ldata = &sc->re_ldata;
2386 	struct re_jbuf *jbuf;
2387 	bus_addr_t paddr;
2388 	bus_size_t jpool_size;
2389 	bus_dmamem_t dmem;
2390 	caddr_t buf;
2391 	int i, error;
2392 
2393 	lwkt_serialize_init(&ldata->re_jbuf_serializer);
2394 
2395 	ldata->re_jbuf = kmalloc(sizeof(struct re_jbuf) * RE_JBUF_COUNT(sc),
2396 				 M_DEVBUF, M_WAITOK | M_ZERO);
2397 
2398 	jpool_size = RE_JBUF_COUNT(sc) * RE_JBUF_SIZE;
2399 
2400 	error = bus_dmamem_coherent(sc->re_parent_tag,
2401 			RE_RXBUF_ALIGN, 0,
2402 			BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
2403 			jpool_size, BUS_DMA_WAITOK, &dmem);
2404 	if (error) {
2405 		device_printf(sc->dev, "could not allocate jumbo memory\n");
2406 		return error;
2407 	}
2408 	ldata->re_jpool_tag = dmem.dmem_tag;
2409 	ldata->re_jpool_map = dmem.dmem_map;
2410 	ldata->re_jpool = dmem.dmem_addr;
2411 	paddr = dmem.dmem_busaddr;
2412 
2413 	/* ..and split it into 9KB chunks */
2414 	SLIST_INIT(&ldata->re_jbuf_free);
2415 
2416 	buf = ldata->re_jpool;
2417 	for (i = 0; i < RE_JBUF_COUNT(sc); i++) {
2418 		jbuf = &ldata->re_jbuf[i];
2419 
2420 		jbuf->re_sc = sc;
2421 		jbuf->re_inuse = 0;
2422 		jbuf->re_slot = i;
2423 		jbuf->re_buf = buf;
2424 		jbuf->re_paddr = paddr;
2425 
2426 		SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link);
2427 
2428 		buf += RE_JBUF_SIZE;
2429 		paddr += RE_JBUF_SIZE;
2430 	}
2431 	return 0;
2432 }
2433 
2434 static void
2435 re_jpool_free(struct re_softc *sc)
2436 {
2437 	struct re_list_data *ldata = &sc->re_ldata;
2438 
2439 	if (ldata->re_jpool_tag != NULL) {
2440 		bus_dmamap_unload(ldata->re_jpool_tag, ldata->re_jpool_map);
2441 		bus_dmamem_free(ldata->re_jpool_tag, ldata->re_jpool,
2442 				ldata->re_jpool_map);
2443 		bus_dma_tag_destroy(ldata->re_jpool_tag);
2444 		ldata->re_jpool_tag = NULL;
2445 	}
2446 
2447 	if (ldata->re_jbuf != NULL) {
2448 		kfree(ldata->re_jbuf, M_DEVBUF);
2449 		ldata->re_jbuf = NULL;
2450 	}
2451 }
2452 
2453 #ifdef RE_JUMBO
2454 static struct re_jbuf *
2455 re_jbuf_alloc(struct re_softc *sc)
2456 {
2457 	struct re_list_data *ldata = &sc->re_ldata;
2458 	struct re_jbuf *jbuf;
2459 
2460 	lwkt_serialize_enter(&ldata->re_jbuf_serializer);
2461 
2462 	jbuf = SLIST_FIRST(&ldata->re_jbuf_free);
2463 	if (jbuf != NULL) {
2464 		SLIST_REMOVE_HEAD(&ldata->re_jbuf_free, re_link);
2465 		jbuf->re_inuse = 1;
2466 	}
2467 
2468 	lwkt_serialize_exit(&ldata->re_jbuf_serializer);
2469 
2470 	return jbuf;
2471 }
2472 
2473 static void
2474 re_jbuf_free(void *arg)
2475 {
2476 	struct re_jbuf *jbuf = arg;
2477 	struct re_softc *sc = jbuf->re_sc;
2478 	struct re_list_data *ldata = &sc->re_ldata;
2479 
2480 	if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) {
2481 		panic("%s: free wrong jumbo buffer",
2482 		      sc->arpcom.ac_if.if_xname);
2483 	} else if (jbuf->re_inuse == 0) {
2484 		panic("%s: jumbo buffer already freed",
2485 		      sc->arpcom.ac_if.if_xname);
2486 	}
2487 
2488 	lwkt_serialize_enter(&ldata->re_jbuf_serializer);
2489 	atomic_subtract_int(&jbuf->re_inuse, 1);
2490 	if (jbuf->re_inuse == 0)
2491 		SLIST_INSERT_HEAD(&ldata->re_jbuf_free, jbuf, re_link);
2492 	lwkt_serialize_exit(&ldata->re_jbuf_serializer);
2493 }
2494 
2495 static void
2496 re_jbuf_ref(void *arg)
2497 {
2498 	struct re_jbuf *jbuf = arg;
2499 	struct re_softc *sc = jbuf->re_sc;
2500 	struct re_list_data *ldata = &sc->re_ldata;
2501 
2502 	if (&ldata->re_jbuf[jbuf->re_slot] != jbuf) {
2503 		panic("%s: ref wrong jumbo buffer",
2504 		      sc->arpcom.ac_if.if_xname);
2505 	} else if (jbuf->re_inuse == 0) {
2506 		panic("%s: jumbo buffer already freed",
2507 		      sc->arpcom.ac_if.if_xname);
2508 	}
2509 	atomic_add_int(&jbuf->re_inuse, 1);
2510 }
2511 #endif	/* RE_JUMBO */
2512 
2513 static void
2514 re_disable_aspm(device_t dev)
2515 {
2516 	uint16_t link_cap, link_ctrl;
2517 	uint8_t pcie_ptr, reg;
2518 
2519 	pcie_ptr = pci_get_pciecap_ptr(dev);
2520 	if (pcie_ptr == 0)
2521 		return;
2522 
2523 	link_cap = pci_read_config(dev, pcie_ptr + PCIER_LINKCAP, 2);
2524 	if ((link_cap & PCIEM_LNKCAP_ASPM_MASK) == 0)
2525 		return;
2526 
2527 	if (bootverbose)
2528 		device_printf(dev, "disable ASPM\n");
2529 
2530 	reg = pcie_ptr + PCIER_LINKCTRL;
2531 	link_ctrl = pci_read_config(dev, reg, 2);
2532 	link_ctrl &= ~(PCIEM_LNKCTL_ASPM_L0S | PCIEM_LNKCTL_ASPM_L1);
2533 	pci_write_config(dev, reg, link_ctrl, 2);
2534 }
2535 
2536 static void
2537 re_start_xmit(struct re_softc *sc)
2538 {
2539 	CSR_WRITE_1(sc, RE_TPPOLL, RE_NPQ);
2540 }
2541 
2542 static void
2543 re_write_imr(struct re_softc *sc, uint32_t val)
2544 {
2545 	CSR_WRITE_2(sc, RE_IMR, val);
2546 }
2547 
2548 static void
2549 re_write_isr(struct re_softc *sc, uint32_t val)
2550 {
2551 	CSR_WRITE_2(sc, RE_ISR, val);
2552 }
2553 
2554 static uint32_t
2555 re_read_isr(struct re_softc *sc)
2556 {
2557 	return CSR_READ_2(sc, RE_ISR);
2558 }
2559 
2560 static void
2561 re_start_xmit_8125(struct re_softc *sc)
2562 {
2563 	CSR_WRITE_2(sc, RE_TPPOLL_8125, RE_NPQ_8125);
2564 }
2565 
2566 static void
2567 re_write_imr_8125(struct re_softc *sc, uint32_t val)
2568 {
2569 	CSR_WRITE_4(sc, RE_IMR0_8125, val);
2570 }
2571 
2572 static void
2573 re_write_isr_8125(struct re_softc *sc, uint32_t val)
2574 {
2575 	CSR_WRITE_4(sc, RE_ISR0_8125, val);
2576 }
2577 
2578 static uint32_t
2579 re_read_isr_8125(struct re_softc *sc)
2580 {
2581 	return CSR_READ_4(sc, RE_ISR0_8125);
2582 }
2583