xref: /dragonfly/sys/dev/netif/nfe/if_nfe.c (revision 8a7bdfea)
1 /*	$OpenBSD: if_nfe.c,v 1.63 2006/06/17 18:00:43 brad Exp $	*/
2 /*	$DragonFly: src/sys/dev/netif/nfe/if_nfe.c,v 1.19 2008/03/10 12:59:51 sephe Exp $	*/
3 
4 /*
5  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
6  *
7  * This code is derived from software contributed to The DragonFly Project
8  * by Sepherosa Ziehau <sepherosa@gmail.com> and
9  * Matthew Dillon <dillon@apollo.backplane.com>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in
19  *    the documentation and/or other materials provided with the
20  *    distribution.
21  * 3. Neither the name of The DragonFly Project nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific, prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
29  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
35  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * Copyright (c) 2006 Damien Bergamini <damien.bergamini@free.fr>
41  * Copyright (c) 2005, 2006 Jonathan Gray <jsg@openbsd.org>
42  *
43  * Permission to use, copy, modify, and distribute this software for any
44  * purpose with or without fee is hereby granted, provided that the above
45  * copyright notice and this permission notice appear in all copies.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
48  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
49  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
50  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
51  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
52  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
53  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
54  */
55 
56 /* Driver for NVIDIA nForce MCP Fast Ethernet and Gigabit Ethernet */
57 
58 #include "opt_polling.h"
59 
60 #include <sys/param.h>
61 #include <sys/endian.h>
62 #include <sys/kernel.h>
63 #include <sys/bus.h>
64 #include <sys/proc.h>
65 #include <sys/rman.h>
66 #include <sys/serialize.h>
67 #include <sys/socket.h>
68 #include <sys/sockio.h>
69 #include <sys/sysctl.h>
70 
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/bpf.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 #include <net/ifq_var.h>
78 #include <net/if_types.h>
79 #include <net/if_var.h>
80 #include <net/vlan/if_vlan_var.h>
81 #include <net/vlan/if_vlan_ether.h>
82 
83 #include <bus/pci/pcireg.h>
84 #include <bus/pci/pcivar.h>
85 #include <bus/pci/pcidevs.h>
86 
87 #include <dev/netif/mii_layer/mii.h>
88 #include <dev/netif/mii_layer/miivar.h>
89 
90 #include "miibus_if.h"
91 
92 #include <dev/netif/nfe/if_nfereg.h>
93 #include <dev/netif/nfe/if_nfevar.h>
94 
95 #define NFE_CSUM
96 #define NFE_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
97 
98 static int	nfe_probe(device_t);
99 static int	nfe_attach(device_t);
100 static int	nfe_detach(device_t);
101 static void	nfe_shutdown(device_t);
102 static int	nfe_resume(device_t);
103 static int	nfe_suspend(device_t);
104 
105 static int	nfe_miibus_readreg(device_t, int, int);
106 static void	nfe_miibus_writereg(device_t, int, int, int);
107 static void	nfe_miibus_statchg(device_t);
108 
109 #ifdef DEVICE_POLLING
110 static void	nfe_poll(struct ifnet *, enum poll_cmd, int);
111 #endif
112 static void	nfe_intr(void *);
113 static int	nfe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void	nfe_rxeof(struct nfe_softc *);
115 static void	nfe_txeof(struct nfe_softc *);
116 static int	nfe_encap(struct nfe_softc *, struct nfe_tx_ring *,
117 			  struct mbuf *);
118 static void	nfe_start(struct ifnet *);
119 static void	nfe_watchdog(struct ifnet *);
120 static void	nfe_init(void *);
121 static void	nfe_stop(struct nfe_softc *);
122 static struct nfe_jbuf *nfe_jalloc(struct nfe_softc *);
123 static void	nfe_jfree(void *);
124 static void	nfe_jref(void *);
125 static int	nfe_jpool_alloc(struct nfe_softc *, struct nfe_rx_ring *);
126 static void	nfe_jpool_free(struct nfe_softc *, struct nfe_rx_ring *);
127 static int	nfe_alloc_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
128 static void	nfe_reset_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
129 static int	nfe_init_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
130 static void	nfe_free_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
131 static int	nfe_alloc_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
132 static void	nfe_reset_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
133 static int	nfe_init_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
134 static void	nfe_free_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
135 static int	nfe_ifmedia_upd(struct ifnet *);
136 static void	nfe_ifmedia_sts(struct ifnet *, struct ifmediareq *);
137 static void	nfe_setmulti(struct nfe_softc *);
138 static void	nfe_get_macaddr(struct nfe_softc *, uint8_t *);
139 static void	nfe_set_macaddr(struct nfe_softc *, const uint8_t *);
140 static void	nfe_tick(void *);
141 static void	nfe_ring_dma_addr(void *, bus_dma_segment_t *, int, int);
142 static void	nfe_buf_dma_addr(void *, bus_dma_segment_t *, int, bus_size_t,
143 				 int);
144 static void	nfe_set_paddr_rxdesc(struct nfe_softc *, struct nfe_rx_ring *,
145 				     int, bus_addr_t);
146 static void	nfe_set_ready_rxdesc(struct nfe_softc *, struct nfe_rx_ring *,
147 				     int);
148 static int	nfe_newbuf_std(struct nfe_softc *, struct nfe_rx_ring *, int,
149 			       int);
150 static int	nfe_newbuf_jumbo(struct nfe_softc *, struct nfe_rx_ring *, int,
151 				 int);
152 
153 static int	nfe_sysctl_imtime(SYSCTL_HANDLER_ARGS);
154 
155 #define NFE_DEBUG
156 #ifdef NFE_DEBUG
157 
158 static int	nfe_debug = 0;
159 static int	nfe_rx_ring_count = NFE_RX_RING_DEF_COUNT;
160 static int	nfe_imtime = -1;
161 
162 TUNABLE_INT("hw.nfe.rx_ring_count", &nfe_rx_ring_count);
163 TUNABLE_INT("hw.nfe.imtime", &nfe_imtime);
164 TUNABLE_INT("hw.nfe.debug", &nfe_debug);
165 
166 #define DPRINTF(sc, fmt, ...) do {		\
167 	if ((sc)->sc_debug) {			\
168 		if_printf(&(sc)->arpcom.ac_if,	\
169 			  fmt, __VA_ARGS__);	\
170 	}					\
171 } while (0)
172 
173 #define DPRINTFN(sc, lv, fmt, ...) do {		\
174 	if ((sc)->sc_debug >= (lv)) {		\
175 		if_printf(&(sc)->arpcom.ac_if,	\
176 			  fmt, __VA_ARGS__);	\
177 	}					\
178 } while (0)
179 
180 #else	/* !NFE_DEBUG */
181 
182 #define DPRINTF(sc, fmt, ...)
183 #define DPRINTFN(sc, lv, fmt, ...)
184 
185 #endif	/* NFE_DEBUG */
186 
187 struct nfe_dma_ctx {
188 	int			nsegs;
189 	bus_dma_segment_t	*segs;
190 };
191 
192 static const struct nfe_dev {
193 	uint16_t	vid;
194 	uint16_t	did;
195 	const char	*desc;
196 } nfe_devices[] = {
197 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE_LAN,
198 	  "NVIDIA nForce Fast Ethernet" },
199 
200 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE2_LAN,
201 	  "NVIDIA nForce2 Fast Ethernet" },
202 
203 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN1,
204 	  "NVIDIA nForce3 Gigabit Ethernet" },
205 
206 	/* XXX TGEN the next chip can also be found in the nForce2 Ultra 400Gb
207 	   chipset, and possibly also the 400R; it might be both nForce2- and
208 	   nForce3-based boards can use the same MCPs (= southbridges) */
209 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN2,
210 	  "NVIDIA nForce3 Gigabit Ethernet" },
211 
212 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN3,
213 	  "NVIDIA nForce3 Gigabit Ethernet" },
214 
215 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN4,
216 	  "NVIDIA nForce3 Gigabit Ethernet" },
217 
218 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN5,
219 	  "NVIDIA nForce3 Gigabit Ethernet" },
220 
221 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_CK804_LAN1,
222 	  "NVIDIA CK804 Gigabit Ethernet" },
223 
224 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_CK804_LAN2,
225 	  "NVIDIA CK804 Gigabit Ethernet" },
226 
227 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP04_LAN1,
228 	  "NVIDIA MCP04 Gigabit Ethernet" },
229 
230 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP04_LAN2,
231 	  "NVIDIA MCP04 Gigabit Ethernet" },
232 
233 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP51_LAN1,
234 	  "NVIDIA MCP51 Gigabit Ethernet" },
235 
236 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP51_LAN2,
237 	  "NVIDIA MCP51 Gigabit Ethernet" },
238 
239 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP55_LAN1,
240 	  "NVIDIA MCP55 Gigabit Ethernet" },
241 
242 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP55_LAN2,
243 	  "NVIDIA MCP55 Gigabit Ethernet" },
244 
245 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN1,
246 	  "NVIDIA MCP61 Gigabit Ethernet" },
247 
248 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN2,
249 	  "NVIDIA MCP61 Gigabit Ethernet" },
250 
251 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN3,
252 	  "NVIDIA MCP61 Gigabit Ethernet" },
253 
254 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN4,
255 	  "NVIDIA MCP61 Gigabit Ethernet" },
256 
257 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN1,
258 	  "NVIDIA MCP65 Gigabit Ethernet" },
259 
260 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN2,
261 	  "NVIDIA MCP65 Gigabit Ethernet" },
262 
263 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN3,
264 	  "NVIDIA MCP65 Gigabit Ethernet" },
265 
266 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN4,
267 	  "NVIDIA MCP65 Gigabit Ethernet" },
268 
269 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN1,
270 	  "NVIDIA MCP67 Gigabit Ethernet" },
271 
272 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN2,
273 	  "NVIDIA MCP67 Gigabit Ethernet" },
274 
275 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN3,
276 	  "NVIDIA MCP67 Gigabit Ethernet" },
277 
278 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN4,
279 	  "NVIDIA MCP67 Gigabit Ethernet" }
280 };
281 
282 static device_method_t nfe_methods[] = {
283 	/* Device interface */
284 	DEVMETHOD(device_probe,		nfe_probe),
285 	DEVMETHOD(device_attach,	nfe_attach),
286 	DEVMETHOD(device_detach,	nfe_detach),
287 	DEVMETHOD(device_suspend,	nfe_suspend),
288 	DEVMETHOD(device_resume,	nfe_resume),
289 	DEVMETHOD(device_shutdown,	nfe_shutdown),
290 
291 	/* Bus interface */
292 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
293 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
294 
295 	/* MII interface */
296 	DEVMETHOD(miibus_readreg,	nfe_miibus_readreg),
297 	DEVMETHOD(miibus_writereg,	nfe_miibus_writereg),
298 	DEVMETHOD(miibus_statchg,	nfe_miibus_statchg),
299 
300 	{ 0, 0 }
301 };
302 
303 static driver_t nfe_driver = {
304 	"nfe",
305 	nfe_methods,
306 	sizeof(struct nfe_softc)
307 };
308 
309 static devclass_t	nfe_devclass;
310 
311 DECLARE_DUMMY_MODULE(if_nfe);
312 MODULE_DEPEND(if_nfe, miibus, 1, 1, 1);
313 DRIVER_MODULE(if_nfe, pci, nfe_driver, nfe_devclass, 0, 0);
314 DRIVER_MODULE(miibus, nfe, miibus_driver, miibus_devclass, 0, 0);
315 
316 static int
317 nfe_probe(device_t dev)
318 {
319 	const struct nfe_dev *n;
320 	uint16_t vid, did;
321 
322 	vid = pci_get_vendor(dev);
323 	did = pci_get_device(dev);
324 	for (n = nfe_devices; n->desc != NULL; ++n) {
325 		if (vid == n->vid && did == n->did) {
326 			struct nfe_softc *sc = device_get_softc(dev);
327 
328 			switch (did) {
329 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN2:
330 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN3:
331 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN4:
332 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN5:
333 				sc->sc_flags = NFE_JUMBO_SUP |
334 					       NFE_HW_CSUM;
335 				break;
336 			case PCI_PRODUCT_NVIDIA_MCP51_LAN1:
337 			case PCI_PRODUCT_NVIDIA_MCP51_LAN2:
338 			case PCI_PRODUCT_NVIDIA_MCP61_LAN1:
339 			case PCI_PRODUCT_NVIDIA_MCP61_LAN2:
340 			case PCI_PRODUCT_NVIDIA_MCP61_LAN3:
341 			case PCI_PRODUCT_NVIDIA_MCP61_LAN4:
342 			case PCI_PRODUCT_NVIDIA_MCP67_LAN1:
343 			case PCI_PRODUCT_NVIDIA_MCP67_LAN2:
344 			case PCI_PRODUCT_NVIDIA_MCP67_LAN3:
345 			case PCI_PRODUCT_NVIDIA_MCP67_LAN4:
346 				sc->sc_flags = NFE_40BIT_ADDR;
347 				break;
348 			case PCI_PRODUCT_NVIDIA_CK804_LAN1:
349 			case PCI_PRODUCT_NVIDIA_CK804_LAN2:
350 			case PCI_PRODUCT_NVIDIA_MCP04_LAN1:
351 			case PCI_PRODUCT_NVIDIA_MCP04_LAN2:
352 			case PCI_PRODUCT_NVIDIA_MCP65_LAN1:
353 			case PCI_PRODUCT_NVIDIA_MCP65_LAN2:
354 			case PCI_PRODUCT_NVIDIA_MCP65_LAN3:
355 			case PCI_PRODUCT_NVIDIA_MCP65_LAN4:
356 				sc->sc_flags = NFE_JUMBO_SUP |
357 					       NFE_40BIT_ADDR |
358 					       NFE_HW_CSUM;
359 				break;
360 			case PCI_PRODUCT_NVIDIA_MCP55_LAN1:
361 			case PCI_PRODUCT_NVIDIA_MCP55_LAN2:
362 				sc->sc_flags = NFE_JUMBO_SUP |
363 					       NFE_40BIT_ADDR |
364 					       NFE_HW_CSUM |
365 					       NFE_HW_VLAN;
366 				break;
367 			}
368 
369 			device_set_desc(dev, n->desc);
370 			device_set_async_attach(dev, TRUE);
371 			return 0;
372 		}
373 	}
374 	return ENXIO;
375 }
376 
377 static int
378 nfe_attach(device_t dev)
379 {
380 	struct nfe_softc *sc = device_get_softc(dev);
381 	struct ifnet *ifp = &sc->arpcom.ac_if;
382 	uint8_t eaddr[ETHER_ADDR_LEN];
383 	int error;
384 
385 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
386 	lwkt_serialize_init(&sc->sc_jbuf_serializer);
387 
388 	/*
389 	 * Initialize sysctl variables
390 	 */
391 	sc->sc_imtime = nfe_imtime;
392 	sc->sc_irq_enable = NFE_IRQ_ENABLE(sc);
393 	sc->sc_rx_ring_count = nfe_rx_ring_count;
394 	sc->sc_debug = nfe_debug;
395 
396 	sc->sc_mem_rid = PCIR_BAR(0);
397 
398 #ifndef BURN_BRIDGES
399 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
400 		uint32_t mem, irq;
401 
402 		mem = pci_read_config(dev, sc->sc_mem_rid, 4);
403 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
404 
405 		device_printf(dev, "chip is in D%d power mode "
406 		    "-- setting to D0\n", pci_get_powerstate(dev));
407 
408 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
409 
410 		pci_write_config(dev, sc->sc_mem_rid, mem, 4);
411 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
412 	}
413 #endif	/* !BURN_BRIDGE */
414 
415 	/* Enable bus mastering */
416 	pci_enable_busmaster(dev);
417 
418 	/* Allocate IO memory */
419 	sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
420 						&sc->sc_mem_rid, RF_ACTIVE);
421 	if (sc->sc_mem_res == NULL) {
422 		device_printf(dev, "cound not allocate io memory\n");
423 		return ENXIO;
424 	}
425 	sc->sc_memh = rman_get_bushandle(sc->sc_mem_res);
426 	sc->sc_memt = rman_get_bustag(sc->sc_mem_res);
427 
428 	/* Allocate IRQ */
429 	sc->sc_irq_rid = 0;
430 	sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
431 						&sc->sc_irq_rid,
432 						RF_SHAREABLE | RF_ACTIVE);
433 	if (sc->sc_irq_res == NULL) {
434 		device_printf(dev, "could not allocate irq\n");
435 		error = ENXIO;
436 		goto fail;
437 	}
438 
439 	nfe_get_macaddr(sc, eaddr);
440 
441 	/*
442 	 * Allocate Tx and Rx rings.
443 	 */
444 	error = nfe_alloc_tx_ring(sc, &sc->txq);
445 	if (error) {
446 		device_printf(dev, "could not allocate Tx ring\n");
447 		goto fail;
448 	}
449 
450 	error = nfe_alloc_rx_ring(sc, &sc->rxq);
451 	if (error) {
452 		device_printf(dev, "could not allocate Rx ring\n");
453 		goto fail;
454 	}
455 
456 	/*
457 	 * Create sysctl tree
458 	 */
459 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
460 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
461 					     SYSCTL_STATIC_CHILDREN(_hw),
462 					     OID_AUTO,
463 					     device_get_nameunit(dev),
464 					     CTLFLAG_RD, 0, "");
465 	if (sc->sc_sysctl_tree == NULL) {
466 		device_printf(dev, "can't add sysctl node\n");
467 		error = ENXIO;
468 		goto fail;
469 	}
470 	SYSCTL_ADD_PROC(&sc->sc_sysctl_ctx,
471 			SYSCTL_CHILDREN(sc->sc_sysctl_tree),
472 			OID_AUTO, "imtimer", CTLTYPE_INT | CTLFLAG_RW,
473 			sc, 0, nfe_sysctl_imtime, "I",
474 			"Interrupt moderation time (usec).  "
475 			"-1 to disable interrupt moderation.");
476 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(sc->sc_sysctl_tree), OID_AUTO,
477 		       "rx_ring_count", CTLFLAG_RD, &sc->sc_rx_ring_count,
478 		       0, "RX ring count");
479 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(sc->sc_sysctl_tree), OID_AUTO,
480 		       "debug", CTLFLAG_RW, &sc->sc_debug,
481 		       0, "control debugging printfs");
482 
483 	error = mii_phy_probe(dev, &sc->sc_miibus, nfe_ifmedia_upd,
484 			      nfe_ifmedia_sts);
485 	if (error) {
486 		device_printf(dev, "MII without any phy\n");
487 		goto fail;
488 	}
489 
490 	ifp->if_softc = sc;
491 	ifp->if_mtu = ETHERMTU;
492 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
493 	ifp->if_ioctl = nfe_ioctl;
494 	ifp->if_start = nfe_start;
495 #ifdef DEVICE_POLLING
496 	ifp->if_poll = nfe_poll;
497 #endif
498 	ifp->if_watchdog = nfe_watchdog;
499 	ifp->if_init = nfe_init;
500 	ifq_set_maxlen(&ifp->if_snd, NFE_IFQ_MAXLEN);
501 	ifq_set_ready(&ifp->if_snd);
502 
503 	ifp->if_capabilities = IFCAP_VLAN_MTU;
504 
505 	if (sc->sc_flags & NFE_HW_VLAN)
506 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
507 
508 #ifdef NFE_CSUM
509 	if (sc->sc_flags & NFE_HW_CSUM) {
510 		ifp->if_capabilities |= IFCAP_HWCSUM;
511 		ifp->if_hwassist = NFE_CSUM_FEATURES;
512 	}
513 #else
514 	sc->sc_flags &= ~NFE_HW_CSUM;
515 #endif
516 	ifp->if_capenable = ifp->if_capabilities;
517 
518 	callout_init(&sc->sc_tick_ch);
519 
520 	ether_ifattach(ifp, eaddr, NULL);
521 
522 	error = bus_setup_intr(dev, sc->sc_irq_res, INTR_MPSAFE, nfe_intr, sc,
523 			       &sc->sc_ih, ifp->if_serializer);
524 	if (error) {
525 		device_printf(dev, "could not setup intr\n");
526 		ether_ifdetach(ifp);
527 		goto fail;
528 	}
529 
530 	return 0;
531 fail:
532 	nfe_detach(dev);
533 	return error;
534 }
535 
536 static int
537 nfe_detach(device_t dev)
538 {
539 	struct nfe_softc *sc = device_get_softc(dev);
540 
541 	if (device_is_attached(dev)) {
542 		struct ifnet *ifp = &sc->arpcom.ac_if;
543 
544 		lwkt_serialize_enter(ifp->if_serializer);
545 		nfe_stop(sc);
546 		bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_ih);
547 		lwkt_serialize_exit(ifp->if_serializer);
548 
549 		ether_ifdetach(ifp);
550 	}
551 
552 	if (sc->sc_miibus != NULL)
553 		device_delete_child(dev, sc->sc_miibus);
554 	bus_generic_detach(dev);
555 
556 	if (sc->sc_sysctl_tree != NULL)
557 		sysctl_ctx_free(&sc->sc_sysctl_ctx);
558 
559 	if (sc->sc_irq_res != NULL) {
560 		bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid,
561 				     sc->sc_irq_res);
562 	}
563 
564 	if (sc->sc_mem_res != NULL) {
565 		bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mem_rid,
566 				     sc->sc_mem_res);
567 	}
568 
569 	nfe_free_tx_ring(sc, &sc->txq);
570 	nfe_free_rx_ring(sc, &sc->rxq);
571 
572 	return 0;
573 }
574 
575 static void
576 nfe_shutdown(device_t dev)
577 {
578 	struct nfe_softc *sc = device_get_softc(dev);
579 	struct ifnet *ifp = &sc->arpcom.ac_if;
580 
581 	lwkt_serialize_enter(ifp->if_serializer);
582 	nfe_stop(sc);
583 	lwkt_serialize_exit(ifp->if_serializer);
584 }
585 
586 static int
587 nfe_suspend(device_t dev)
588 {
589 	struct nfe_softc *sc = device_get_softc(dev);
590 	struct ifnet *ifp = &sc->arpcom.ac_if;
591 
592 	lwkt_serialize_enter(ifp->if_serializer);
593 	nfe_stop(sc);
594 	lwkt_serialize_exit(ifp->if_serializer);
595 
596 	return 0;
597 }
598 
599 static int
600 nfe_resume(device_t dev)
601 {
602 	struct nfe_softc *sc = device_get_softc(dev);
603 	struct ifnet *ifp = &sc->arpcom.ac_if;
604 
605 	lwkt_serialize_enter(ifp->if_serializer);
606 	if (ifp->if_flags & IFF_UP)
607 		nfe_init(sc);
608 	lwkt_serialize_exit(ifp->if_serializer);
609 
610 	return 0;
611 }
612 
613 static void
614 nfe_miibus_statchg(device_t dev)
615 {
616 	struct nfe_softc *sc = device_get_softc(dev);
617 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
618 	uint32_t phy, seed, misc = NFE_MISC1_MAGIC, link = NFE_MEDIA_SET;
619 
620 	phy = NFE_READ(sc, NFE_PHY_IFACE);
621 	phy &= ~(NFE_PHY_HDX | NFE_PHY_100TX | NFE_PHY_1000T);
622 
623 	seed = NFE_READ(sc, NFE_RNDSEED);
624 	seed &= ~NFE_SEED_MASK;
625 
626 	if ((mii->mii_media_active & IFM_GMASK) == IFM_HDX) {
627 		phy  |= NFE_PHY_HDX;	/* half-duplex */
628 		misc |= NFE_MISC1_HDX;
629 	}
630 
631 	switch (IFM_SUBTYPE(mii->mii_media_active)) {
632 	case IFM_1000_T:	/* full-duplex only */
633 		link |= NFE_MEDIA_1000T;
634 		seed |= NFE_SEED_1000T;
635 		phy  |= NFE_PHY_1000T;
636 		break;
637 	case IFM_100_TX:
638 		link |= NFE_MEDIA_100TX;
639 		seed |= NFE_SEED_100TX;
640 		phy  |= NFE_PHY_100TX;
641 		break;
642 	case IFM_10_T:
643 		link |= NFE_MEDIA_10T;
644 		seed |= NFE_SEED_10T;
645 		break;
646 	}
647 
648 	NFE_WRITE(sc, NFE_RNDSEED, seed);	/* XXX: gigabit NICs only? */
649 
650 	NFE_WRITE(sc, NFE_PHY_IFACE, phy);
651 	NFE_WRITE(sc, NFE_MISC1, misc);
652 	NFE_WRITE(sc, NFE_LINKSPEED, link);
653 }
654 
655 static int
656 nfe_miibus_readreg(device_t dev, int phy, int reg)
657 {
658 	struct nfe_softc *sc = device_get_softc(dev);
659 	uint32_t val;
660 	int ntries;
661 
662 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
663 
664 	if (NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY) {
665 		NFE_WRITE(sc, NFE_PHY_CTL, NFE_PHY_BUSY);
666 		DELAY(100);
667 	}
668 
669 	NFE_WRITE(sc, NFE_PHY_CTL, (phy << NFE_PHYADD_SHIFT) | reg);
670 
671 	for (ntries = 0; ntries < 1000; ntries++) {
672 		DELAY(100);
673 		if (!(NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY))
674 			break;
675 	}
676 	if (ntries == 1000) {
677 		DPRINTFN(sc, 2, "timeout waiting for PHY %s\n", "");
678 		return 0;
679 	}
680 
681 	if (NFE_READ(sc, NFE_PHY_STATUS) & NFE_PHY_ERROR) {
682 		DPRINTFN(sc, 2, "could not read PHY %s\n", "");
683 		return 0;
684 	}
685 
686 	val = NFE_READ(sc, NFE_PHY_DATA);
687 	if (val != 0xffffffff && val != 0)
688 		sc->mii_phyaddr = phy;
689 
690 	DPRINTFN(sc, 2, "mii read phy %d reg 0x%x ret 0x%x\n", phy, reg, val);
691 
692 	return val;
693 }
694 
695 static void
696 nfe_miibus_writereg(device_t dev, int phy, int reg, int val)
697 {
698 	struct nfe_softc *sc = device_get_softc(dev);
699 	uint32_t ctl;
700 	int ntries;
701 
702 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
703 
704 	if (NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY) {
705 		NFE_WRITE(sc, NFE_PHY_CTL, NFE_PHY_BUSY);
706 		DELAY(100);
707 	}
708 
709 	NFE_WRITE(sc, NFE_PHY_DATA, val);
710 	ctl = NFE_PHY_WRITE | (phy << NFE_PHYADD_SHIFT) | reg;
711 	NFE_WRITE(sc, NFE_PHY_CTL, ctl);
712 
713 	for (ntries = 0; ntries < 1000; ntries++) {
714 		DELAY(100);
715 		if (!(NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY))
716 			break;
717 	}
718 
719 #ifdef NFE_DEBUG
720 	if (ntries == 1000)
721 		DPRINTFN(sc, 2, "could not write to PHY %s\n", "");
722 #endif
723 }
724 
725 #ifdef DEVICE_POLLING
726 
727 static void
728 nfe_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
729 {
730 	struct nfe_softc *sc = ifp->if_softc;
731 
732 	ASSERT_SERIALIZED(ifp->if_serializer);
733 
734 	switch(cmd) {
735 	case POLL_REGISTER:
736 		/* Disable interrupts */
737 		NFE_WRITE(sc, NFE_IRQ_MASK, 0);
738 		break;
739 	case POLL_DEREGISTER:
740 		/* enable interrupts */
741 		NFE_WRITE(sc, NFE_IRQ_MASK, sc->sc_irq_enable);
742 		break;
743 	case POLL_AND_CHECK_STATUS:
744 		/* fall through */
745 	case POLL_ONLY:
746 		if (ifp->if_flags & IFF_RUNNING) {
747 			nfe_rxeof(sc);
748 			nfe_txeof(sc);
749 		}
750 		break;
751 	}
752 }
753 
754 #endif
755 
756 static void
757 nfe_intr(void *arg)
758 {
759 	struct nfe_softc *sc = arg;
760 	struct ifnet *ifp = &sc->arpcom.ac_if;
761 	uint32_t r;
762 
763 	r = NFE_READ(sc, NFE_IRQ_STATUS);
764 	if (r == 0)
765 		return;	/* not for us */
766 	NFE_WRITE(sc, NFE_IRQ_STATUS, r);
767 
768 	DPRINTFN(sc, 5, "%s: interrupt register %x\n", __func__, r);
769 
770 	if (r & NFE_IRQ_LINK) {
771 		NFE_READ(sc, NFE_PHY_STATUS);
772 		NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
773 		DPRINTF(sc, "link state changed %s\n", "");
774 	}
775 
776 	if (ifp->if_flags & IFF_RUNNING) {
777 		/* check Rx ring */
778 		nfe_rxeof(sc);
779 
780 		/* check Tx ring */
781 		nfe_txeof(sc);
782 	}
783 }
784 
785 static int
786 nfe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
787 {
788 	struct nfe_softc *sc = ifp->if_softc;
789 	struct ifreq *ifr = (struct ifreq *)data;
790 	struct mii_data *mii;
791 	int error = 0, mask;
792 
793 	switch (cmd) {
794 	case SIOCSIFMTU:
795 		if (((sc->sc_flags & NFE_JUMBO_SUP) &&
796 		     ifr->ifr_mtu > NFE_JUMBO_MTU) ||
797 		    ((sc->sc_flags & NFE_JUMBO_SUP) == 0 &&
798 		     ifr->ifr_mtu > ETHERMTU)) {
799 			return EINVAL;
800 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
801 			ifp->if_mtu = ifr->ifr_mtu;
802 			nfe_init(sc);
803 		}
804 		break;
805 	case SIOCSIFFLAGS:
806 		if (ifp->if_flags & IFF_UP) {
807 			/*
808 			 * If only the PROMISC or ALLMULTI flag changes, then
809 			 * don't do a full re-init of the chip, just update
810 			 * the Rx filter.
811 			 */
812 			if ((ifp->if_flags & IFF_RUNNING) &&
813 			    ((ifp->if_flags ^ sc->sc_if_flags) &
814 			     (IFF_ALLMULTI | IFF_PROMISC)) != 0) {
815 				nfe_setmulti(sc);
816 			} else {
817 				if (!(ifp->if_flags & IFF_RUNNING))
818 					nfe_init(sc);
819 			}
820 		} else {
821 			if (ifp->if_flags & IFF_RUNNING)
822 				nfe_stop(sc);
823 		}
824 		sc->sc_if_flags = ifp->if_flags;
825 		break;
826 	case SIOCADDMULTI:
827 	case SIOCDELMULTI:
828 		if (ifp->if_flags & IFF_RUNNING)
829 			nfe_setmulti(sc);
830 		break;
831 	case SIOCSIFMEDIA:
832 	case SIOCGIFMEDIA:
833 		mii = device_get_softc(sc->sc_miibus);
834 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd);
835 		break;
836         case SIOCSIFCAP:
837 		mask = (ifr->ifr_reqcap ^ ifp->if_capenable) & IFCAP_HWCSUM;
838 		if (mask && (ifp->if_capabilities & IFCAP_HWCSUM)) {
839 			ifp->if_capenable ^= mask;
840 			if (IFCAP_TXCSUM & ifp->if_capenable)
841 				ifp->if_hwassist = NFE_CSUM_FEATURES;
842 			else
843 				ifp->if_hwassist = 0;
844 
845 			if (ifp->if_flags & IFF_RUNNING)
846 				nfe_init(sc);
847 		}
848 		break;
849 	default:
850 		error = ether_ioctl(ifp, cmd, data);
851 		break;
852 	}
853 	return error;
854 }
855 
856 static void
857 nfe_rxeof(struct nfe_softc *sc)
858 {
859 	struct ifnet *ifp = &sc->arpcom.ac_if;
860 	struct nfe_rx_ring *ring = &sc->rxq;
861 	int reap;
862 
863 	reap = 0;
864 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_POSTREAD);
865 
866 	for (;;) {
867 		struct nfe_rx_data *data = &ring->data[ring->cur];
868 		struct mbuf *m;
869 		uint16_t flags;
870 		int len, error;
871 
872 		if (sc->sc_flags & NFE_40BIT_ADDR) {
873 			struct nfe_desc64 *desc64 = &ring->desc64[ring->cur];
874 
875 			flags = le16toh(desc64->flags);
876 			len = le16toh(desc64->length) & 0x3fff;
877 		} else {
878 			struct nfe_desc32 *desc32 = &ring->desc32[ring->cur];
879 
880 			flags = le16toh(desc32->flags);
881 			len = le16toh(desc32->length) & 0x3fff;
882 		}
883 
884 		if (flags & NFE_RX_READY)
885 			break;
886 
887 		reap = 1;
888 
889 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
890 			if (!(flags & NFE_RX_VALID_V1))
891 				goto skip;
892 
893 			if ((flags & NFE_RX_FIXME_V1) == NFE_RX_FIXME_V1) {
894 				flags &= ~NFE_RX_ERROR;
895 				len--;	/* fix buffer length */
896 			}
897 		} else {
898 			if (!(flags & NFE_RX_VALID_V2))
899 				goto skip;
900 
901 			if ((flags & NFE_RX_FIXME_V2) == NFE_RX_FIXME_V2) {
902 				flags &= ~NFE_RX_ERROR;
903 				len--;	/* fix buffer length */
904 			}
905 		}
906 
907 		if (flags & NFE_RX_ERROR) {
908 			ifp->if_ierrors++;
909 			goto skip;
910 		}
911 
912 		m = data->m;
913 
914 		if (sc->sc_flags & NFE_USE_JUMBO)
915 			error = nfe_newbuf_jumbo(sc, ring, ring->cur, 0);
916 		else
917 			error = nfe_newbuf_std(sc, ring, ring->cur, 0);
918 		if (error) {
919 			ifp->if_ierrors++;
920 			goto skip;
921 		}
922 
923 		/* finalize mbuf */
924 		m->m_pkthdr.len = m->m_len = len;
925 		m->m_pkthdr.rcvif = ifp;
926 
927 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
928 		    (flags & NFE_RX_CSUMOK)) {
929 			if (flags & NFE_RX_IP_CSUMOK_V2) {
930 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
931 							  CSUM_IP_VALID;
932 			}
933 
934 			if (flags &
935 			    (NFE_RX_UDP_CSUMOK_V2 | NFE_RX_TCP_CSUMOK_V2)) {
936 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
937 							  CSUM_PSEUDO_HDR |
938 							  CSUM_FRAG_NOT_CHECKED;
939 				m->m_pkthdr.csum_data = 0xffff;
940 			}
941 		}
942 
943 		ifp->if_ipackets++;
944 		ifp->if_input(ifp, m);
945 skip:
946 		nfe_set_ready_rxdesc(sc, ring, ring->cur);
947 		sc->rxq.cur = (sc->rxq.cur + 1) % sc->sc_rx_ring_count;
948 	}
949 
950 	if (reap)
951 		bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
952 }
953 
954 static void
955 nfe_txeof(struct nfe_softc *sc)
956 {
957 	struct ifnet *ifp = &sc->arpcom.ac_if;
958 	struct nfe_tx_ring *ring = &sc->txq;
959 	struct nfe_tx_data *data = NULL;
960 
961 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_POSTREAD);
962 	while (ring->next != ring->cur) {
963 		uint16_t flags;
964 
965 		if (sc->sc_flags & NFE_40BIT_ADDR)
966 			flags = le16toh(ring->desc64[ring->next].flags);
967 		else
968 			flags = le16toh(ring->desc32[ring->next].flags);
969 
970 		if (flags & NFE_TX_VALID)
971 			break;
972 
973 		data = &ring->data[ring->next];
974 
975 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
976 			if (!(flags & NFE_TX_LASTFRAG_V1) && data->m == NULL)
977 				goto skip;
978 
979 			if ((flags & NFE_TX_ERROR_V1) != 0) {
980 				if_printf(ifp, "tx v1 error 0x%4b\n", flags,
981 					  NFE_V1_TXERR);
982 				ifp->if_oerrors++;
983 			} else {
984 				ifp->if_opackets++;
985 			}
986 		} else {
987 			if (!(flags & NFE_TX_LASTFRAG_V2) && data->m == NULL)
988 				goto skip;
989 
990 			if ((flags & NFE_TX_ERROR_V2) != 0) {
991 				if_printf(ifp, "tx v2 error 0x%4b\n", flags,
992 					  NFE_V2_TXERR);
993 				ifp->if_oerrors++;
994 			} else {
995 				ifp->if_opackets++;
996 			}
997 		}
998 
999 		if (data->m == NULL) {	/* should not get there */
1000 			if_printf(ifp,
1001 				  "last fragment bit w/o associated mbuf!\n");
1002 			goto skip;
1003 		}
1004 
1005 		/* last fragment of the mbuf chain transmitted */
1006 		bus_dmamap_sync(ring->data_tag, data->map,
1007 				BUS_DMASYNC_POSTWRITE);
1008 		bus_dmamap_unload(ring->data_tag, data->map);
1009 		m_freem(data->m);
1010 		data->m = NULL;
1011 
1012 		ifp->if_timer = 0;
1013 skip:
1014 		ring->queued--;
1015 		KKASSERT(ring->queued >= 0);
1016 		ring->next = (ring->next + 1) % NFE_TX_RING_COUNT;
1017 	}
1018 
1019 	if (data != NULL) {	/* at least one slot freed */
1020 		ifp->if_flags &= ~IFF_OACTIVE;
1021 		ifp->if_start(ifp);
1022 	}
1023 }
1024 
1025 static int
1026 nfe_encap(struct nfe_softc *sc, struct nfe_tx_ring *ring, struct mbuf *m0)
1027 {
1028 	struct nfe_dma_ctx ctx;
1029 	bus_dma_segment_t segs[NFE_MAX_SCATTER];
1030 	struct nfe_tx_data *data, *data_map;
1031 	bus_dmamap_t map;
1032 	struct nfe_desc64 *desc64 = NULL;
1033 	struct nfe_desc32 *desc32 = NULL;
1034 	uint16_t flags = 0;
1035 	uint32_t vtag = 0;
1036 	int error, i, j;
1037 
1038 	data = &ring->data[ring->cur];
1039 	map = data->map;
1040 	data_map = data;	/* Remember who owns the DMA map */
1041 
1042 	ctx.nsegs = NFE_MAX_SCATTER;
1043 	ctx.segs = segs;
1044 	error = bus_dmamap_load_mbuf(ring->data_tag, map, m0,
1045 				     nfe_buf_dma_addr, &ctx, BUS_DMA_NOWAIT);
1046 	if (error && error != EFBIG) {
1047 		if_printf(&sc->arpcom.ac_if, "could not map TX mbuf\n");
1048 		goto back;
1049 	}
1050 
1051 	if (error) {	/* error == EFBIG */
1052 		struct mbuf *m_new;
1053 
1054 		m_new = m_defrag(m0, MB_DONTWAIT);
1055 		if (m_new == NULL) {
1056 			if_printf(&sc->arpcom.ac_if,
1057 				  "could not defrag TX mbuf\n");
1058 			error = ENOBUFS;
1059 			goto back;
1060 		} else {
1061 			m0 = m_new;
1062 		}
1063 
1064 		ctx.nsegs = NFE_MAX_SCATTER;
1065 		ctx.segs = segs;
1066 		error = bus_dmamap_load_mbuf(ring->data_tag, map, m0,
1067 					     nfe_buf_dma_addr, &ctx,
1068 					     BUS_DMA_NOWAIT);
1069 		if (error) {
1070 			if_printf(&sc->arpcom.ac_if,
1071 				  "could not map defraged TX mbuf\n");
1072 			goto back;
1073 		}
1074 	}
1075 
1076 	error = 0;
1077 
1078 	if (ring->queued + ctx.nsegs >= NFE_TX_RING_COUNT - 1) {
1079 		bus_dmamap_unload(ring->data_tag, map);
1080 		error = ENOBUFS;
1081 		goto back;
1082 	}
1083 
1084 	/* setup h/w VLAN tagging */
1085 	if (m0->m_flags & M_VLANTAG)
1086 		vtag = m0->m_pkthdr.ether_vlantag;
1087 
1088 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
1089 		if (m0->m_pkthdr.csum_flags & CSUM_IP)
1090 			flags |= NFE_TX_IP_CSUM;
1091 		if (m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1092 			flags |= NFE_TX_TCP_CSUM;
1093 	}
1094 
1095 	/*
1096 	 * XXX urm. somebody is unaware of how hardware works.  You
1097 	 * absolutely CANNOT set NFE_TX_VALID on the next descriptor in
1098 	 * the ring until the entire chain is actually *VALID*.  Otherwise
1099 	 * the hardware may encounter a partially initialized chain that
1100 	 * is marked as being ready to go when it in fact is not ready to
1101 	 * go.
1102 	 */
1103 
1104 	for (i = 0; i < ctx.nsegs; i++) {
1105 		j = (ring->cur + i) % NFE_TX_RING_COUNT;
1106 		data = &ring->data[j];
1107 
1108 		if (sc->sc_flags & NFE_40BIT_ADDR) {
1109 			desc64 = &ring->desc64[j];
1110 #if defined(__LP64__)
1111 			desc64->physaddr[0] =
1112 			    htole32(segs[i].ds_addr >> 32);
1113 #endif
1114 			desc64->physaddr[1] =
1115 			    htole32(segs[i].ds_addr & 0xffffffff);
1116 			desc64->length = htole16(segs[i].ds_len - 1);
1117 			desc64->vtag = htole32(vtag);
1118 			desc64->flags = htole16(flags);
1119 		} else {
1120 			desc32 = &ring->desc32[j];
1121 			desc32->physaddr = htole32(segs[i].ds_addr);
1122 			desc32->length = htole16(segs[i].ds_len - 1);
1123 			desc32->flags = htole16(flags);
1124 		}
1125 
1126 		/* csum flags and vtag belong to the first fragment only */
1127 		flags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_CSUM);
1128 		vtag = 0;
1129 
1130 		ring->queued++;
1131 		KKASSERT(ring->queued <= NFE_TX_RING_COUNT);
1132 	}
1133 
1134 	/* the whole mbuf chain has been DMA mapped, fix last descriptor */
1135 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1136 		desc64->flags |= htole16(NFE_TX_LASTFRAG_V2);
1137 	} else {
1138 		if (sc->sc_flags & NFE_JUMBO_SUP)
1139 			flags = NFE_TX_LASTFRAG_V2;
1140 		else
1141 			flags = NFE_TX_LASTFRAG_V1;
1142 		desc32->flags |= htole16(flags);
1143 	}
1144 
1145 	/*
1146 	 * Set NFE_TX_VALID backwards so the hardware doesn't see the
1147 	 * whole mess until the first descriptor in the map is flagged.
1148 	 */
1149 	for (i = ctx.nsegs - 1; i >= 0; --i) {
1150 		j = (ring->cur + i) % NFE_TX_RING_COUNT;
1151 		if (sc->sc_flags & NFE_40BIT_ADDR) {
1152 			desc64 = &ring->desc64[j];
1153 			desc64->flags |= htole16(NFE_TX_VALID);
1154 		} else {
1155 			desc32 = &ring->desc32[j];
1156 			desc32->flags |= htole16(NFE_TX_VALID);
1157 		}
1158 	}
1159 	ring->cur = (ring->cur + ctx.nsegs) % NFE_TX_RING_COUNT;
1160 
1161 	/* Exchange DMA map */
1162 	data_map->map = data->map;
1163 	data->map = map;
1164 	data->m = m0;
1165 
1166 	bus_dmamap_sync(ring->data_tag, map, BUS_DMASYNC_PREWRITE);
1167 back:
1168 	if (error)
1169 		m_freem(m0);
1170 	return error;
1171 }
1172 
1173 static void
1174 nfe_start(struct ifnet *ifp)
1175 {
1176 	struct nfe_softc *sc = ifp->if_softc;
1177 	struct nfe_tx_ring *ring = &sc->txq;
1178 	int count = 0;
1179 	struct mbuf *m0;
1180 
1181 	if (ifp->if_flags & IFF_OACTIVE)
1182 		return;
1183 
1184 	if (ifq_is_empty(&ifp->if_snd))
1185 		return;
1186 
1187 	for (;;) {
1188 		m0 = ifq_dequeue(&ifp->if_snd, NULL);
1189 		if (m0 == NULL)
1190 			break;
1191 
1192 		ETHER_BPF_MTAP(ifp, m0);
1193 
1194 		if (nfe_encap(sc, ring, m0) != 0) {
1195 			ifp->if_flags |= IFF_OACTIVE;
1196 			break;
1197 		}
1198 		++count;
1199 
1200 		/*
1201 		 * NOTE:
1202 		 * `m0' may be freed in nfe_encap(), so
1203 		 * it should not be touched any more.
1204 		 */
1205 	}
1206 	if (count == 0)	/* nothing sent */
1207 		return;
1208 
1209 	/* Sync TX descriptor ring */
1210 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1211 
1212 	/* Kick Tx */
1213 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
1214 
1215 	/*
1216 	 * Set a timeout in case the chip goes out to lunch.
1217 	 */
1218 	ifp->if_timer = 5;
1219 }
1220 
1221 static void
1222 nfe_watchdog(struct ifnet *ifp)
1223 {
1224 	struct nfe_softc *sc = ifp->if_softc;
1225 
1226 	if (ifp->if_flags & IFF_RUNNING) {
1227 		if_printf(ifp, "watchdog timeout - lost interrupt recovered\n");
1228 		nfe_txeof(sc);
1229 		return;
1230 	}
1231 
1232 	if_printf(ifp, "watchdog timeout\n");
1233 
1234 	nfe_init(ifp->if_softc);
1235 
1236 	ifp->if_oerrors++;
1237 }
1238 
1239 static void
1240 nfe_init(void *xsc)
1241 {
1242 	struct nfe_softc *sc = xsc;
1243 	struct ifnet *ifp = &sc->arpcom.ac_if;
1244 	uint32_t tmp;
1245 	int error;
1246 
1247 	nfe_stop(sc);
1248 
1249 	/*
1250 	 * NOTE:
1251 	 * Switching between jumbo frames and normal frames should
1252 	 * be done _after_ nfe_stop() but _before_ nfe_init_rx_ring().
1253 	 */
1254 	if (ifp->if_mtu > ETHERMTU) {
1255 		sc->sc_flags |= NFE_USE_JUMBO;
1256 		sc->rxq.bufsz = NFE_JBYTES;
1257 		if (bootverbose)
1258 			if_printf(ifp, "use jumbo frames\n");
1259 	} else {
1260 		sc->sc_flags &= ~NFE_USE_JUMBO;
1261 		sc->rxq.bufsz = MCLBYTES;
1262 		if (bootverbose)
1263 			if_printf(ifp, "use non-jumbo frames\n");
1264 	}
1265 
1266 	error = nfe_init_tx_ring(sc, &sc->txq);
1267 	if (error) {
1268 		nfe_stop(sc);
1269 		return;
1270 	}
1271 
1272 	error = nfe_init_rx_ring(sc, &sc->rxq);
1273 	if (error) {
1274 		nfe_stop(sc);
1275 		return;
1276 	}
1277 
1278 	NFE_WRITE(sc, NFE_TX_UNK, 0);
1279 	NFE_WRITE(sc, NFE_STATUS, 0);
1280 
1281 	sc->rxtxctl = NFE_RXTX_BIT2;
1282 	if (sc->sc_flags & NFE_40BIT_ADDR)
1283 		sc->rxtxctl |= NFE_RXTX_V3MAGIC;
1284 	else if (sc->sc_flags & NFE_JUMBO_SUP)
1285 		sc->rxtxctl |= NFE_RXTX_V2MAGIC;
1286 
1287 	if (ifp->if_capenable & IFCAP_RXCSUM)
1288 		sc->rxtxctl |= NFE_RXTX_RXCSUM;
1289 
1290 	/*
1291 	 * Although the adapter is capable of stripping VLAN tags from received
1292 	 * frames (NFE_RXTX_VTAG_STRIP), we do not enable this functionality on
1293 	 * purpose.  This will be done in software by our network stack.
1294 	 */
1295 	if (sc->sc_flags & NFE_HW_VLAN)
1296 		sc->rxtxctl |= NFE_RXTX_VTAG_INSERT;
1297 
1298 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_RESET | sc->rxtxctl);
1299 	DELAY(10);
1300 	NFE_WRITE(sc, NFE_RXTX_CTL, sc->rxtxctl);
1301 
1302 	if (sc->sc_flags & NFE_HW_VLAN)
1303 		NFE_WRITE(sc, NFE_VTAG_CTL, NFE_VTAG_ENABLE);
1304 
1305 	NFE_WRITE(sc, NFE_SETUP_R6, 0);
1306 
1307 	/* set MAC address */
1308 	nfe_set_macaddr(sc, sc->arpcom.ac_enaddr);
1309 
1310 	/* tell MAC where rings are in memory */
1311 #ifdef __LP64__
1312 	NFE_WRITE(sc, NFE_RX_RING_ADDR_HI, sc->rxq.physaddr >> 32);
1313 #endif
1314 	NFE_WRITE(sc, NFE_RX_RING_ADDR_LO, sc->rxq.physaddr & 0xffffffff);
1315 #ifdef __LP64__
1316 	NFE_WRITE(sc, NFE_TX_RING_ADDR_HI, sc->txq.physaddr >> 32);
1317 #endif
1318 	NFE_WRITE(sc, NFE_TX_RING_ADDR_LO, sc->txq.physaddr & 0xffffffff);
1319 
1320 	NFE_WRITE(sc, NFE_RING_SIZE,
1321 	    (sc->sc_rx_ring_count - 1) << 16 |
1322 	    (NFE_TX_RING_COUNT - 1));
1323 
1324 	NFE_WRITE(sc, NFE_RXBUFSZ, sc->rxq.bufsz);
1325 
1326 	/* force MAC to wakeup */
1327 	tmp = NFE_READ(sc, NFE_PWR_STATE);
1328 	NFE_WRITE(sc, NFE_PWR_STATE, tmp | NFE_PWR_WAKEUP);
1329 	DELAY(10);
1330 	tmp = NFE_READ(sc, NFE_PWR_STATE);
1331 	NFE_WRITE(sc, NFE_PWR_STATE, tmp | NFE_PWR_VALID);
1332 
1333 	/*
1334 	 * NFE_IMTIMER generates a periodic interrupt via NFE_IRQ_TIMER.
1335 	 * It is unclear how wide the timer is.  Base programming does
1336 	 * not seem to effect NFE_IRQ_TX_DONE or NFE_IRQ_RX_DONE so
1337 	 * we don't get any interrupt moderation.  TX moderation is
1338 	 * possible by using the timer interrupt instead of TX_DONE.
1339 	 *
1340 	 * It is unclear whether there are other bits that can be
1341 	 * set to make the NFE device actually do interrupt moderation
1342 	 * on the RX side.
1343 	 *
1344 	 * For now set a 128uS interval as a placemark, but don't use
1345 	 * the timer.
1346 	 */
1347 	if (sc->sc_imtime < 0)
1348 		NFE_WRITE(sc, NFE_IMTIMER, NFE_IMTIME_DEFAULT);
1349 	else
1350 		NFE_WRITE(sc, NFE_IMTIMER, NFE_IMTIME(sc->sc_imtime));
1351 
1352 	NFE_WRITE(sc, NFE_SETUP_R1, NFE_R1_MAGIC);
1353 	NFE_WRITE(sc, NFE_SETUP_R2, NFE_R2_MAGIC);
1354 	NFE_WRITE(sc, NFE_SETUP_R6, NFE_R6_MAGIC);
1355 
1356 	/* update MAC knowledge of PHY; generates a NFE_IRQ_LINK interrupt */
1357 	NFE_WRITE(sc, NFE_STATUS, sc->mii_phyaddr << 24 | NFE_STATUS_MAGIC);
1358 
1359 	NFE_WRITE(sc, NFE_SETUP_R4, NFE_R4_MAGIC);
1360 	NFE_WRITE(sc, NFE_WOL_CTL, NFE_WOL_MAGIC);
1361 
1362 	sc->rxtxctl &= ~NFE_RXTX_BIT2;
1363 	NFE_WRITE(sc, NFE_RXTX_CTL, sc->rxtxctl);
1364 	DELAY(10);
1365 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_BIT1 | sc->rxtxctl);
1366 
1367 	/* set Rx filter */
1368 	nfe_setmulti(sc);
1369 
1370 	nfe_ifmedia_upd(ifp);
1371 
1372 	/* enable Rx */
1373 	NFE_WRITE(sc, NFE_RX_CTL, NFE_RX_START);
1374 
1375 	/* enable Tx */
1376 	NFE_WRITE(sc, NFE_TX_CTL, NFE_TX_START);
1377 
1378 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
1379 
1380 #ifdef DEVICE_POLLING
1381 	if ((ifp->if_flags & IFF_POLLING) == 0)
1382 #endif
1383 	/* enable interrupts */
1384 	NFE_WRITE(sc, NFE_IRQ_MASK, sc->sc_irq_enable);
1385 
1386 	callout_reset(&sc->sc_tick_ch, hz, nfe_tick, sc);
1387 
1388 	ifp->if_flags |= IFF_RUNNING;
1389 	ifp->if_flags &= ~IFF_OACTIVE;
1390 
1391 	/*
1392 	 * If we had stuff in the tx ring before its all cleaned out now
1393 	 * so we are not going to get an interrupt, jump-start any pending
1394 	 * output.
1395 	 */
1396 	ifp->if_start(ifp);
1397 }
1398 
1399 static void
1400 nfe_stop(struct nfe_softc *sc)
1401 {
1402 	struct ifnet *ifp = &sc->arpcom.ac_if;
1403 
1404 	callout_stop(&sc->sc_tick_ch);
1405 
1406 	ifp->if_timer = 0;
1407 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1408 
1409 	/*
1410 	 * Are NFE_TX_CTL and NFE_RX_CTL polled by the chip microcontroller
1411 	 * or do they directly reset/terminate the DMA hardware?  Nobody
1412 	 * knows.
1413 	 *
1414 	 * Add two delays:
1415 	 *
1416 	 * (1) Delay before zeroing out NFE_TX_CTL.  This seems to help a
1417 	 * watchdog timeout that occurs after a stop/init sequence.  I am
1418 	 * theorizing that a TX KICK occuring just prior to a reinit (e.g.
1419 	 * due to dhclient) is queueing an interrupt to the microcontroller
1420 	 * which gets delayed until after we clear the control registers
1421 	 * down below, resulting in mass confusion.  TX KICK is clearly
1422 	 * hardware aided whereas the other bits in the control register
1423 	 * are more likely to be polled by the microcontroller.
1424 	 *
1425 	 * (2) Delay after zeroing out TX and RX CTL registers, under the
1426 	 * assumption that primary DMA is initiated and terminated by
1427 	 * the microcontroller and not hardware (and anyway, one can hardly
1428 	 * expect the DMA engine to just instantly stop!).  We don't want
1429 	 * to rip the rings out from under it before it has had a chance to
1430 	 * actually stop!
1431 	 */
1432 	DELAY(1000);
1433 
1434 	/* Abort Tx */
1435 	NFE_WRITE(sc, NFE_TX_CTL, 0);
1436 
1437 	/* Disable Rx */
1438 	NFE_WRITE(sc, NFE_RX_CTL, 0);
1439 
1440 	/* Disable interrupts */
1441 	NFE_WRITE(sc, NFE_IRQ_MASK, 0);
1442 
1443 	DELAY(1000);
1444 
1445 	/* Reset Tx and Rx rings */
1446 	nfe_reset_tx_ring(sc, &sc->txq);
1447 	nfe_reset_rx_ring(sc, &sc->rxq);
1448 }
1449 
1450 static int
1451 nfe_alloc_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1452 {
1453 	int i, j, error, descsize;
1454 	void **desc;
1455 
1456 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1457 		desc = (void **)&ring->desc64;
1458 		descsize = sizeof(struct nfe_desc64);
1459 	} else {
1460 		desc = (void **)&ring->desc32;
1461 		descsize = sizeof(struct nfe_desc32);
1462 	}
1463 
1464 	ring->jbuf = kmalloc(sizeof(struct nfe_jbuf) * NFE_JPOOL_COUNT,
1465 			     M_DEVBUF, M_WAITOK | M_ZERO);
1466 	ring->data = kmalloc(sizeof(struct nfe_rx_data) * sc->sc_rx_ring_count,
1467 			     M_DEVBUF, M_WAITOK | M_ZERO);
1468 
1469 	ring->bufsz = MCLBYTES;
1470 	ring->cur = ring->next = 0;
1471 
1472 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1473 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1474 				   NULL, NULL,
1475 				   sc->sc_rx_ring_count * descsize, 1,
1476 				   sc->sc_rx_ring_count * descsize,
1477 				   0, &ring->tag);
1478 	if (error) {
1479 		if_printf(&sc->arpcom.ac_if,
1480 			  "could not create desc RX DMA tag\n");
1481 		return error;
1482 	}
1483 
1484 	error = bus_dmamem_alloc(ring->tag, desc, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1485 				 &ring->map);
1486 	if (error) {
1487 		if_printf(&sc->arpcom.ac_if,
1488 			  "could not allocate RX desc DMA memory\n");
1489 		bus_dma_tag_destroy(ring->tag);
1490 		ring->tag = NULL;
1491 		return error;
1492 	}
1493 
1494 	error = bus_dmamap_load(ring->tag, ring->map, *desc,
1495 				sc->sc_rx_ring_count * descsize,
1496 				nfe_ring_dma_addr, &ring->physaddr,
1497 				BUS_DMA_WAITOK);
1498 	if (error) {
1499 		if_printf(&sc->arpcom.ac_if,
1500 			  "could not load RX desc DMA map\n");
1501 		bus_dmamem_free(ring->tag, *desc, ring->map);
1502 		bus_dma_tag_destroy(ring->tag);
1503 		ring->tag = NULL;
1504 		return error;
1505 	}
1506 
1507 	if (sc->sc_flags & NFE_JUMBO_SUP) {
1508 		error = nfe_jpool_alloc(sc, ring);
1509 		if (error) {
1510 			if_printf(&sc->arpcom.ac_if,
1511 				  "could not allocate jumbo frames\n");
1512 			return error;
1513 		}
1514 	}
1515 
1516 	error = bus_dma_tag_create(NULL, 1, 0,
1517 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1518 				   NULL, NULL,
1519 				   MCLBYTES, 1, MCLBYTES,
1520 				   0, &ring->data_tag);
1521 	if (error) {
1522 		if_printf(&sc->arpcom.ac_if,
1523 			  "could not create RX mbuf DMA tag\n");
1524 		return error;
1525 	}
1526 
1527 	/* Create a spare RX mbuf DMA map */
1528 	error = bus_dmamap_create(ring->data_tag, 0, &ring->data_tmpmap);
1529 	if (error) {
1530 		if_printf(&sc->arpcom.ac_if,
1531 			  "could not create spare RX mbuf DMA map\n");
1532 		bus_dma_tag_destroy(ring->data_tag);
1533 		ring->data_tag = NULL;
1534 		return error;
1535 	}
1536 
1537 	for (i = 0; i < sc->sc_rx_ring_count; i++) {
1538 		error = bus_dmamap_create(ring->data_tag, 0,
1539 					  &ring->data[i].map);
1540 		if (error) {
1541 			if_printf(&sc->arpcom.ac_if,
1542 				  "could not create %dth RX mbuf DMA mapn", i);
1543 			goto fail;
1544 		}
1545 	}
1546 	return 0;
1547 fail:
1548 	for (j = 0; j < i; ++j)
1549 		bus_dmamap_destroy(ring->data_tag, ring->data[i].map);
1550 	bus_dmamap_destroy(ring->data_tag, ring->data_tmpmap);
1551 	bus_dma_tag_destroy(ring->data_tag);
1552 	ring->data_tag = NULL;
1553 	return error;
1554 }
1555 
1556 static void
1557 nfe_reset_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1558 {
1559 	int i;
1560 
1561 	for (i = 0; i < sc->sc_rx_ring_count; i++) {
1562 		struct nfe_rx_data *data = &ring->data[i];
1563 
1564 		if (data->m != NULL) {
1565 			if ((sc->sc_flags & NFE_USE_JUMBO) == 0)
1566 				bus_dmamap_unload(ring->data_tag, data->map);
1567 			m_freem(data->m);
1568 			data->m = NULL;
1569 		}
1570 	}
1571 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1572 
1573 	ring->cur = ring->next = 0;
1574 }
1575 
1576 static int
1577 nfe_init_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1578 {
1579 	int i;
1580 
1581 	for (i = 0; i < sc->sc_rx_ring_count; ++i) {
1582 		int error;
1583 
1584 		/* XXX should use a function pointer */
1585 		if (sc->sc_flags & NFE_USE_JUMBO)
1586 			error = nfe_newbuf_jumbo(sc, ring, i, 1);
1587 		else
1588 			error = nfe_newbuf_std(sc, ring, i, 1);
1589 		if (error) {
1590 			if_printf(&sc->arpcom.ac_if,
1591 				  "could not allocate RX buffer\n");
1592 			return error;
1593 		}
1594 
1595 		nfe_set_ready_rxdesc(sc, ring, i);
1596 	}
1597 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1598 
1599 	return 0;
1600 }
1601 
1602 static void
1603 nfe_free_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1604 {
1605 	if (ring->data_tag != NULL) {
1606 		struct nfe_rx_data *data;
1607 		int i;
1608 
1609 		for (i = 0; i < sc->sc_rx_ring_count; i++) {
1610 			data = &ring->data[i];
1611 
1612 			if (data->m != NULL) {
1613 				bus_dmamap_unload(ring->data_tag, data->map);
1614 				m_freem(data->m);
1615 			}
1616 			bus_dmamap_destroy(ring->data_tag, data->map);
1617 		}
1618 		bus_dmamap_destroy(ring->data_tag, ring->data_tmpmap);
1619 		bus_dma_tag_destroy(ring->data_tag);
1620 	}
1621 
1622 	nfe_jpool_free(sc, ring);
1623 
1624 	if (ring->jbuf != NULL)
1625 		kfree(ring->jbuf, M_DEVBUF);
1626 	if (ring->data != NULL)
1627 		kfree(ring->data, M_DEVBUF);
1628 
1629 	if (ring->tag != NULL) {
1630 		void *desc;
1631 
1632 		if (sc->sc_flags & NFE_40BIT_ADDR)
1633 			desc = ring->desc64;
1634 		else
1635 			desc = ring->desc32;
1636 
1637 		bus_dmamap_unload(ring->tag, ring->map);
1638 		bus_dmamem_free(ring->tag, desc, ring->map);
1639 		bus_dma_tag_destroy(ring->tag);
1640 	}
1641 }
1642 
1643 static struct nfe_jbuf *
1644 nfe_jalloc(struct nfe_softc *sc)
1645 {
1646 	struct ifnet *ifp = &sc->arpcom.ac_if;
1647 	struct nfe_jbuf *jbuf;
1648 
1649 	lwkt_serialize_enter(&sc->sc_jbuf_serializer);
1650 
1651 	jbuf = SLIST_FIRST(&sc->rxq.jfreelist);
1652 	if (jbuf != NULL) {
1653 		SLIST_REMOVE_HEAD(&sc->rxq.jfreelist, jnext);
1654 		jbuf->inuse = 1;
1655 	} else {
1656 		if_printf(ifp, "no free jumbo buffer\n");
1657 	}
1658 
1659 	lwkt_serialize_exit(&sc->sc_jbuf_serializer);
1660 
1661 	return jbuf;
1662 }
1663 
1664 static void
1665 nfe_jfree(void *arg)
1666 {
1667 	struct nfe_jbuf *jbuf = arg;
1668 	struct nfe_softc *sc = jbuf->sc;
1669 	struct nfe_rx_ring *ring = jbuf->ring;
1670 
1671 	if (&ring->jbuf[jbuf->slot] != jbuf)
1672 		panic("%s: free wrong jumbo buffer\n", __func__);
1673 	else if (jbuf->inuse == 0)
1674 		panic("%s: jumbo buffer already freed\n", __func__);
1675 
1676 	lwkt_serialize_enter(&sc->sc_jbuf_serializer);
1677 	atomic_subtract_int(&jbuf->inuse, 1);
1678 	if (jbuf->inuse == 0)
1679 		SLIST_INSERT_HEAD(&ring->jfreelist, jbuf, jnext);
1680 	lwkt_serialize_exit(&sc->sc_jbuf_serializer);
1681 }
1682 
1683 static void
1684 nfe_jref(void *arg)
1685 {
1686 	struct nfe_jbuf *jbuf = arg;
1687 	struct nfe_rx_ring *ring = jbuf->ring;
1688 
1689 	if (&ring->jbuf[jbuf->slot] != jbuf)
1690 		panic("%s: ref wrong jumbo buffer\n", __func__);
1691 	else if (jbuf->inuse == 0)
1692 		panic("%s: jumbo buffer already freed\n", __func__);
1693 
1694 	atomic_add_int(&jbuf->inuse, 1);
1695 }
1696 
1697 static int
1698 nfe_jpool_alloc(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1699 {
1700 	struct nfe_jbuf *jbuf;
1701 	bus_addr_t physaddr;
1702 	caddr_t buf;
1703 	int i, error;
1704 
1705 	/*
1706 	 * Allocate a big chunk of DMA'able memory.
1707 	 */
1708 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1709 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1710 				   NULL, NULL,
1711 				   NFE_JPOOL_SIZE, 1, NFE_JPOOL_SIZE,
1712 				   0, &ring->jtag);
1713 	if (error) {
1714 		if_printf(&sc->arpcom.ac_if,
1715 			  "could not create jumbo DMA tag\n");
1716 		return error;
1717 	}
1718 
1719 	error = bus_dmamem_alloc(ring->jtag, (void **)&ring->jpool,
1720 				 BUS_DMA_WAITOK, &ring->jmap);
1721 	if (error) {
1722 		if_printf(&sc->arpcom.ac_if,
1723 			  "could not allocate jumbo DMA memory\n");
1724 		bus_dma_tag_destroy(ring->jtag);
1725 		ring->jtag = NULL;
1726 		return error;
1727 	}
1728 
1729 	error = bus_dmamap_load(ring->jtag, ring->jmap, ring->jpool,
1730 				NFE_JPOOL_SIZE, nfe_ring_dma_addr, &physaddr,
1731 				BUS_DMA_WAITOK);
1732 	if (error) {
1733 		if_printf(&sc->arpcom.ac_if,
1734 			  "could not load jumbo DMA map\n");
1735 		bus_dmamem_free(ring->jtag, ring->jpool, ring->jmap);
1736 		bus_dma_tag_destroy(ring->jtag);
1737 		ring->jtag = NULL;
1738 		return error;
1739 	}
1740 
1741 	/* ..and split it into 9KB chunks */
1742 	SLIST_INIT(&ring->jfreelist);
1743 
1744 	buf = ring->jpool;
1745 	for (i = 0; i < NFE_JPOOL_COUNT; i++) {
1746 		jbuf = &ring->jbuf[i];
1747 
1748 		jbuf->sc = sc;
1749 		jbuf->ring = ring;
1750 		jbuf->inuse = 0;
1751 		jbuf->slot = i;
1752 		jbuf->buf = buf;
1753 		jbuf->physaddr = physaddr;
1754 
1755 		SLIST_INSERT_HEAD(&ring->jfreelist, jbuf, jnext);
1756 
1757 		buf += NFE_JBYTES;
1758 		physaddr += NFE_JBYTES;
1759 	}
1760 
1761 	return 0;
1762 }
1763 
1764 static void
1765 nfe_jpool_free(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1766 {
1767 	if (ring->jtag != NULL) {
1768 		bus_dmamap_unload(ring->jtag, ring->jmap);
1769 		bus_dmamem_free(ring->jtag, ring->jpool, ring->jmap);
1770 		bus_dma_tag_destroy(ring->jtag);
1771 	}
1772 }
1773 
1774 static int
1775 nfe_alloc_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1776 {
1777 	int i, j, error, descsize;
1778 	void **desc;
1779 
1780 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1781 		desc = (void **)&ring->desc64;
1782 		descsize = sizeof(struct nfe_desc64);
1783 	} else {
1784 		desc = (void **)&ring->desc32;
1785 		descsize = sizeof(struct nfe_desc32);
1786 	}
1787 
1788 	ring->queued = 0;
1789 	ring->cur = ring->next = 0;
1790 
1791 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1792 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1793 				   NULL, NULL,
1794 				   NFE_TX_RING_COUNT * descsize, 1,
1795 				   NFE_TX_RING_COUNT * descsize,
1796 				   0, &ring->tag);
1797 	if (error) {
1798 		if_printf(&sc->arpcom.ac_if,
1799 			  "could not create TX desc DMA map\n");
1800 		return error;
1801 	}
1802 
1803 	error = bus_dmamem_alloc(ring->tag, desc, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1804 				 &ring->map);
1805 	if (error) {
1806 		if_printf(&sc->arpcom.ac_if,
1807 			  "could not allocate TX desc DMA memory\n");
1808 		bus_dma_tag_destroy(ring->tag);
1809 		ring->tag = NULL;
1810 		return error;
1811 	}
1812 
1813 	error = bus_dmamap_load(ring->tag, ring->map, *desc,
1814 				NFE_TX_RING_COUNT * descsize,
1815 				nfe_ring_dma_addr, &ring->physaddr,
1816 				BUS_DMA_WAITOK);
1817 	if (error) {
1818 		if_printf(&sc->arpcom.ac_if,
1819 			  "could not load TX desc DMA map\n");
1820 		bus_dmamem_free(ring->tag, *desc, ring->map);
1821 		bus_dma_tag_destroy(ring->tag);
1822 		ring->tag = NULL;
1823 		return error;
1824 	}
1825 
1826 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1827 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1828 				   NULL, NULL,
1829 				   NFE_JBYTES * NFE_MAX_SCATTER,
1830 				   NFE_MAX_SCATTER, NFE_JBYTES,
1831 				   0, &ring->data_tag);
1832 	if (error) {
1833 		if_printf(&sc->arpcom.ac_if,
1834 			  "could not create TX buf DMA tag\n");
1835 		return error;
1836 	}
1837 
1838 	for (i = 0; i < NFE_TX_RING_COUNT; i++) {
1839 		error = bus_dmamap_create(ring->data_tag, 0,
1840 					  &ring->data[i].map);
1841 		if (error) {
1842 			if_printf(&sc->arpcom.ac_if,
1843 				  "could not create %dth TX buf DMA map\n", i);
1844 			goto fail;
1845 		}
1846 	}
1847 
1848 	return 0;
1849 fail:
1850 	for (j = 0; j < i; ++j)
1851 		bus_dmamap_destroy(ring->data_tag, ring->data[i].map);
1852 	bus_dma_tag_destroy(ring->data_tag);
1853 	ring->data_tag = NULL;
1854 	return error;
1855 }
1856 
1857 static void
1858 nfe_reset_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1859 {
1860 	int i;
1861 
1862 	for (i = 0; i < NFE_TX_RING_COUNT; i++) {
1863 		struct nfe_tx_data *data = &ring->data[i];
1864 
1865 		if (sc->sc_flags & NFE_40BIT_ADDR)
1866 			ring->desc64[i].flags = 0;
1867 		else
1868 			ring->desc32[i].flags = 0;
1869 
1870 		if (data->m != NULL) {
1871 			bus_dmamap_sync(ring->data_tag, data->map,
1872 					BUS_DMASYNC_POSTWRITE);
1873 			bus_dmamap_unload(ring->data_tag, data->map);
1874 			m_freem(data->m);
1875 			data->m = NULL;
1876 		}
1877 	}
1878 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1879 
1880 	ring->queued = 0;
1881 	ring->cur = ring->next = 0;
1882 }
1883 
1884 static int
1885 nfe_init_tx_ring(struct nfe_softc *sc __unused,
1886 		 struct nfe_tx_ring *ring __unused)
1887 {
1888 	return 0;
1889 }
1890 
1891 static void
1892 nfe_free_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1893 {
1894 	if (ring->data_tag != NULL) {
1895 		struct nfe_tx_data *data;
1896 		int i;
1897 
1898 		for (i = 0; i < NFE_TX_RING_COUNT; ++i) {
1899 			data = &ring->data[i];
1900 
1901 			if (data->m != NULL) {
1902 				bus_dmamap_unload(ring->data_tag, data->map);
1903 				m_freem(data->m);
1904 			}
1905 			bus_dmamap_destroy(ring->data_tag, data->map);
1906 		}
1907 
1908 		bus_dma_tag_destroy(ring->data_tag);
1909 	}
1910 
1911 	if (ring->tag != NULL) {
1912 		void *desc;
1913 
1914 		if (sc->sc_flags & NFE_40BIT_ADDR)
1915 			desc = ring->desc64;
1916 		else
1917 			desc = ring->desc32;
1918 
1919 		bus_dmamap_unload(ring->tag, ring->map);
1920 		bus_dmamem_free(ring->tag, desc, ring->map);
1921 		bus_dma_tag_destroy(ring->tag);
1922 	}
1923 }
1924 
1925 static int
1926 nfe_ifmedia_upd(struct ifnet *ifp)
1927 {
1928 	struct nfe_softc *sc = ifp->if_softc;
1929 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
1930 
1931 	if (mii->mii_instance != 0) {
1932 		struct mii_softc *miisc;
1933 
1934 		LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
1935 			mii_phy_reset(miisc);
1936 	}
1937 	mii_mediachg(mii);
1938 
1939 	return 0;
1940 }
1941 
1942 static void
1943 nfe_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
1944 {
1945 	struct nfe_softc *sc = ifp->if_softc;
1946 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
1947 
1948 	mii_pollstat(mii);
1949 	ifmr->ifm_status = mii->mii_media_status;
1950 	ifmr->ifm_active = mii->mii_media_active;
1951 }
1952 
1953 static void
1954 nfe_setmulti(struct nfe_softc *sc)
1955 {
1956 	struct ifnet *ifp = &sc->arpcom.ac_if;
1957 	struct ifmultiaddr *ifma;
1958 	uint8_t addr[ETHER_ADDR_LEN], mask[ETHER_ADDR_LEN];
1959 	uint32_t filter = NFE_RXFILTER_MAGIC;
1960 	int i;
1961 
1962 	if ((ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) != 0) {
1963 		bzero(addr, ETHER_ADDR_LEN);
1964 		bzero(mask, ETHER_ADDR_LEN);
1965 		goto done;
1966 	}
1967 
1968 	bcopy(etherbroadcastaddr, addr, ETHER_ADDR_LEN);
1969 	bcopy(etherbroadcastaddr, mask, ETHER_ADDR_LEN);
1970 
1971 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1972 		caddr_t maddr;
1973 
1974 		if (ifma->ifma_addr->sa_family != AF_LINK)
1975 			continue;
1976 
1977 		maddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
1978 		for (i = 0; i < ETHER_ADDR_LEN; i++) {
1979 			addr[i] &= maddr[i];
1980 			mask[i] &= ~maddr[i];
1981 		}
1982 	}
1983 
1984 	for (i = 0; i < ETHER_ADDR_LEN; i++)
1985 		mask[i] |= addr[i];
1986 
1987 done:
1988 	addr[0] |= 0x01;	/* make sure multicast bit is set */
1989 
1990 	NFE_WRITE(sc, NFE_MULTIADDR_HI,
1991 	    addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0]);
1992 	NFE_WRITE(sc, NFE_MULTIADDR_LO,
1993 	    addr[5] <<  8 | addr[4]);
1994 	NFE_WRITE(sc, NFE_MULTIMASK_HI,
1995 	    mask[3] << 24 | mask[2] << 16 | mask[1] << 8 | mask[0]);
1996 	NFE_WRITE(sc, NFE_MULTIMASK_LO,
1997 	    mask[5] <<  8 | mask[4]);
1998 
1999 	filter |= (ifp->if_flags & IFF_PROMISC) ? NFE_PROMISC : NFE_U2M;
2000 	NFE_WRITE(sc, NFE_RXFILTER, filter);
2001 }
2002 
2003 static void
2004 nfe_get_macaddr(struct nfe_softc *sc, uint8_t *addr)
2005 {
2006 	uint32_t tmp;
2007 
2008 	tmp = NFE_READ(sc, NFE_MACADDR_LO);
2009 	addr[0] = (tmp >> 8) & 0xff;
2010 	addr[1] = (tmp & 0xff);
2011 
2012 	tmp = NFE_READ(sc, NFE_MACADDR_HI);
2013 	addr[2] = (tmp >> 24) & 0xff;
2014 	addr[3] = (tmp >> 16) & 0xff;
2015 	addr[4] = (tmp >>  8) & 0xff;
2016 	addr[5] = (tmp & 0xff);
2017 }
2018 
2019 static void
2020 nfe_set_macaddr(struct nfe_softc *sc, const uint8_t *addr)
2021 {
2022 	NFE_WRITE(sc, NFE_MACADDR_LO,
2023 	    addr[5] <<  8 | addr[4]);
2024 	NFE_WRITE(sc, NFE_MACADDR_HI,
2025 	    addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0]);
2026 }
2027 
2028 static void
2029 nfe_tick(void *arg)
2030 {
2031 	struct nfe_softc *sc = arg;
2032 	struct ifnet *ifp = &sc->arpcom.ac_if;
2033 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
2034 
2035 	lwkt_serialize_enter(ifp->if_serializer);
2036 
2037 	mii_tick(mii);
2038 	callout_reset(&sc->sc_tick_ch, hz, nfe_tick, sc);
2039 
2040 	lwkt_serialize_exit(ifp->if_serializer);
2041 }
2042 
2043 static void
2044 nfe_ring_dma_addr(void *arg, bus_dma_segment_t *seg, int nseg, int error)
2045 {
2046 	if (error)
2047 		return;
2048 
2049 	KASSERT(nseg == 1, ("too many segments, should be 1\n"));
2050 
2051 	*((uint32_t *)arg) = seg->ds_addr;
2052 }
2053 
2054 static void
2055 nfe_buf_dma_addr(void *arg, bus_dma_segment_t *segs, int nsegs,
2056 		 bus_size_t mapsz __unused, int error)
2057 {
2058 	struct nfe_dma_ctx *ctx = arg;
2059 	int i;
2060 
2061 	if (error)
2062 		return;
2063 
2064 	KASSERT(nsegs <= ctx->nsegs,
2065 		("too many segments(%d), should be <= %d\n",
2066 		 nsegs, ctx->nsegs));
2067 
2068 	ctx->nsegs = nsegs;
2069 	for (i = 0; i < nsegs; ++i)
2070 		ctx->segs[i] = segs[i];
2071 }
2072 
2073 static int
2074 nfe_newbuf_std(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2075 	       int wait)
2076 {
2077 	struct nfe_rx_data *data = &ring->data[idx];
2078 	struct nfe_dma_ctx ctx;
2079 	bus_dma_segment_t seg;
2080 	bus_dmamap_t map;
2081 	struct mbuf *m;
2082 	int error;
2083 
2084 	m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
2085 	if (m == NULL)
2086 		return ENOBUFS;
2087 	m->m_len = m->m_pkthdr.len = MCLBYTES;
2088 
2089 	ctx.nsegs = 1;
2090 	ctx.segs = &seg;
2091 	error = bus_dmamap_load_mbuf(ring->data_tag, ring->data_tmpmap,
2092 				     m, nfe_buf_dma_addr, &ctx,
2093 				     wait ? BUS_DMA_WAITOK : BUS_DMA_NOWAIT);
2094 	if (error) {
2095 		m_freem(m);
2096 		if_printf(&sc->arpcom.ac_if, "could map RX mbuf %d\n", error);
2097 		return error;
2098 	}
2099 
2100 	/* Unload originally mapped mbuf */
2101 	bus_dmamap_unload(ring->data_tag, data->map);
2102 
2103 	/* Swap this DMA map with tmp DMA map */
2104 	map = data->map;
2105 	data->map = ring->data_tmpmap;
2106 	ring->data_tmpmap = map;
2107 
2108 	/* Caller is assumed to have collected the old mbuf */
2109 	data->m = m;
2110 
2111 	nfe_set_paddr_rxdesc(sc, ring, idx, seg.ds_addr);
2112 
2113 	bus_dmamap_sync(ring->data_tag, data->map, BUS_DMASYNC_PREREAD);
2114 	return 0;
2115 }
2116 
2117 static int
2118 nfe_newbuf_jumbo(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2119 		 int wait)
2120 {
2121 	struct nfe_rx_data *data = &ring->data[idx];
2122 	struct nfe_jbuf *jbuf;
2123 	struct mbuf *m;
2124 
2125 	MGETHDR(m, wait ? MB_WAIT : MB_DONTWAIT, MT_DATA);
2126 	if (m == NULL)
2127 		return ENOBUFS;
2128 
2129 	jbuf = nfe_jalloc(sc);
2130 	if (jbuf == NULL) {
2131 		m_freem(m);
2132 		if_printf(&sc->arpcom.ac_if, "jumbo allocation failed "
2133 		    "-- packet dropped!\n");
2134 		return ENOBUFS;
2135 	}
2136 
2137 	m->m_ext.ext_arg = jbuf;
2138 	m->m_ext.ext_buf = jbuf->buf;
2139 	m->m_ext.ext_free = nfe_jfree;
2140 	m->m_ext.ext_ref = nfe_jref;
2141 	m->m_ext.ext_size = NFE_JBYTES;
2142 
2143 	m->m_data = m->m_ext.ext_buf;
2144 	m->m_flags |= M_EXT;
2145 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
2146 
2147 	/* Caller is assumed to have collected the old mbuf */
2148 	data->m = m;
2149 
2150 	nfe_set_paddr_rxdesc(sc, ring, idx, jbuf->physaddr);
2151 
2152 	bus_dmamap_sync(ring->jtag, ring->jmap, BUS_DMASYNC_PREREAD);
2153 	return 0;
2154 }
2155 
2156 static void
2157 nfe_set_paddr_rxdesc(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2158 		     bus_addr_t physaddr)
2159 {
2160 	if (sc->sc_flags & NFE_40BIT_ADDR) {
2161 		struct nfe_desc64 *desc64 = &ring->desc64[idx];
2162 
2163 #if defined(__LP64__)
2164 		desc64->physaddr[0] = htole32(physaddr >> 32);
2165 #endif
2166 		desc64->physaddr[1] = htole32(physaddr & 0xffffffff);
2167 	} else {
2168 		struct nfe_desc32 *desc32 = &ring->desc32[idx];
2169 
2170 		desc32->physaddr = htole32(physaddr);
2171 	}
2172 }
2173 
2174 static void
2175 nfe_set_ready_rxdesc(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx)
2176 {
2177 	if (sc->sc_flags & NFE_40BIT_ADDR) {
2178 		struct nfe_desc64 *desc64 = &ring->desc64[idx];
2179 
2180 		desc64->length = htole16(ring->bufsz);
2181 		desc64->flags = htole16(NFE_RX_READY);
2182 	} else {
2183 		struct nfe_desc32 *desc32 = &ring->desc32[idx];
2184 
2185 		desc32->length = htole16(ring->bufsz);
2186 		desc32->flags = htole16(NFE_RX_READY);
2187 	}
2188 }
2189 
2190 static int
2191 nfe_sysctl_imtime(SYSCTL_HANDLER_ARGS)
2192 {
2193 	struct nfe_softc *sc = arg1;
2194 	struct ifnet *ifp = &sc->arpcom.ac_if;
2195 	int error, v;
2196 
2197 	lwkt_serialize_enter(ifp->if_serializer);
2198 
2199 	v = sc->sc_imtime;
2200 	error = sysctl_handle_int(oidp, &v, 0, req);
2201 	if (error || req->newptr == NULL)
2202 		goto back;
2203 	if (v == 0) {
2204 		error = EINVAL;
2205 		goto back;
2206 	}
2207 
2208 	if (sc->sc_imtime != v) {
2209 		int old_imtime = sc->sc_imtime;
2210 
2211 		sc->sc_imtime = v;
2212 		sc->sc_irq_enable = NFE_IRQ_ENABLE(sc);
2213 
2214 		if ((ifp->if_flags & (IFF_POLLING | IFF_RUNNING))
2215 		    == IFF_RUNNING) {
2216 			if (old_imtime > 0 && sc->sc_imtime > 0) {
2217 				NFE_WRITE(sc, NFE_IMTIMER,
2218 					  NFE_IMTIME(sc->sc_imtime));
2219 			} else if ((old_imtime * sc->sc_imtime) < 0) {
2220 				ifp->if_init(sc);
2221 			}
2222 		}
2223 	}
2224 back:
2225 	lwkt_serialize_exit(ifp->if_serializer);
2226 	return error;
2227 }
2228