xref: /dragonfly/sys/dev/netif/nfe/if_nfe.c (revision 7bc7e232)
1 /*	$OpenBSD: if_nfe.c,v 1.63 2006/06/17 18:00:43 brad Exp $	*/
2 /*	$DragonFly: src/sys/dev/netif/nfe/if_nfe.c,v 1.17 2007/09/10 14:08:28 sephe Exp $	*/
3 
4 /*
5  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
6  *
7  * This code is derived from software contributed to The DragonFly Project
8  * by Sepherosa Ziehau <sepherosa@gmail.com> and
9  * Matthew Dillon <dillon@apollo.backplane.com>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in
19  *    the documentation and/or other materials provided with the
20  *    distribution.
21  * 3. Neither the name of The DragonFly Project nor the names of its
22  *    contributors may be used to endorse or promote products derived
23  *    from this software without specific, prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
29  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
35  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /*
40  * Copyright (c) 2006 Damien Bergamini <damien.bergamini@free.fr>
41  * Copyright (c) 2005, 2006 Jonathan Gray <jsg@openbsd.org>
42  *
43  * Permission to use, copy, modify, and distribute this software for any
44  * purpose with or without fee is hereby granted, provided that the above
45  * copyright notice and this permission notice appear in all copies.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
48  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
49  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
50  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
51  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
52  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
53  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
54  */
55 
56 /* Driver for NVIDIA nForce MCP Fast Ethernet and Gigabit Ethernet */
57 
58 #include "opt_polling.h"
59 
60 #include <sys/param.h>
61 #include <sys/endian.h>
62 #include <sys/kernel.h>
63 #include <sys/bus.h>
64 #include <sys/proc.h>
65 #include <sys/rman.h>
66 #include <sys/serialize.h>
67 #include <sys/socket.h>
68 #include <sys/sockio.h>
69 #include <sys/sysctl.h>
70 
71 #include <net/ethernet.h>
72 #include <net/if.h>
73 #include <net/bpf.h>
74 #include <net/if_arp.h>
75 #include <net/if_dl.h>
76 #include <net/if_media.h>
77 #include <net/ifq_var.h>
78 #include <net/if_types.h>
79 #include <net/if_var.h>
80 #include <net/vlan/if_vlan_var.h>
81 
82 #include <bus/pci/pcireg.h>
83 #include <bus/pci/pcivar.h>
84 #include <bus/pci/pcidevs.h>
85 
86 #include <dev/netif/mii_layer/mii.h>
87 #include <dev/netif/mii_layer/miivar.h>
88 
89 #include "miibus_if.h"
90 
91 #include <dev/netif/nfe/if_nfereg.h>
92 #include <dev/netif/nfe/if_nfevar.h>
93 
94 #define NFE_CSUM
95 #define NFE_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
96 
97 static int	nfe_probe(device_t);
98 static int	nfe_attach(device_t);
99 static int	nfe_detach(device_t);
100 static void	nfe_shutdown(device_t);
101 static int	nfe_resume(device_t);
102 static int	nfe_suspend(device_t);
103 
104 static int	nfe_miibus_readreg(device_t, int, int);
105 static void	nfe_miibus_writereg(device_t, int, int, int);
106 static void	nfe_miibus_statchg(device_t);
107 
108 #ifdef DEVICE_POLLING
109 static void	nfe_poll(struct ifnet *, enum poll_cmd, int);
110 #endif
111 static void	nfe_intr(void *);
112 static int	nfe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
113 static void	nfe_rxeof(struct nfe_softc *);
114 static void	nfe_txeof(struct nfe_softc *);
115 static int	nfe_encap(struct nfe_softc *, struct nfe_tx_ring *,
116 			  struct mbuf *);
117 static void	nfe_start(struct ifnet *);
118 static void	nfe_watchdog(struct ifnet *);
119 static void	nfe_init(void *);
120 static void	nfe_stop(struct nfe_softc *);
121 static struct nfe_jbuf *nfe_jalloc(struct nfe_softc *);
122 static void	nfe_jfree(void *);
123 static void	nfe_jref(void *);
124 static int	nfe_jpool_alloc(struct nfe_softc *, struct nfe_rx_ring *);
125 static void	nfe_jpool_free(struct nfe_softc *, struct nfe_rx_ring *);
126 static int	nfe_alloc_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
127 static void	nfe_reset_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
128 static int	nfe_init_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
129 static void	nfe_free_rx_ring(struct nfe_softc *, struct nfe_rx_ring *);
130 static int	nfe_alloc_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
131 static void	nfe_reset_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
132 static int	nfe_init_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
133 static void	nfe_free_tx_ring(struct nfe_softc *, struct nfe_tx_ring *);
134 static int	nfe_ifmedia_upd(struct ifnet *);
135 static void	nfe_ifmedia_sts(struct ifnet *, struct ifmediareq *);
136 static void	nfe_setmulti(struct nfe_softc *);
137 static void	nfe_get_macaddr(struct nfe_softc *, uint8_t *);
138 static void	nfe_set_macaddr(struct nfe_softc *, const uint8_t *);
139 static void	nfe_tick(void *);
140 static void	nfe_ring_dma_addr(void *, bus_dma_segment_t *, int, int);
141 static void	nfe_buf_dma_addr(void *, bus_dma_segment_t *, int, bus_size_t,
142 				 int);
143 static void	nfe_set_paddr_rxdesc(struct nfe_softc *, struct nfe_rx_ring *,
144 				     int, bus_addr_t);
145 static void	nfe_set_ready_rxdesc(struct nfe_softc *, struct nfe_rx_ring *,
146 				     int);
147 static int	nfe_newbuf_std(struct nfe_softc *, struct nfe_rx_ring *, int,
148 			       int);
149 static int	nfe_newbuf_jumbo(struct nfe_softc *, struct nfe_rx_ring *, int,
150 				 int);
151 
152 static int	nfe_sysctl_imtime(SYSCTL_HANDLER_ARGS);
153 
154 #define NFE_DEBUG
155 #ifdef NFE_DEBUG
156 
157 static int	nfe_debug = 0;
158 static int	nfe_rx_ring_count = NFE_RX_RING_DEF_COUNT;
159 static int	nfe_imtime = -1;
160 
161 TUNABLE_INT("hw.nfe.rx_ring_count", &nfe_rx_ring_count);
162 TUNABLE_INT("hw.nfe.imtime", &nfe_imtime);
163 TUNABLE_INT("hw.nfe.debug", &nfe_debug);
164 
165 #define DPRINTF(sc, fmt, ...) do {		\
166 	if ((sc)->sc_debug) {			\
167 		if_printf(&(sc)->arpcom.ac_if,	\
168 			  fmt, __VA_ARGS__);	\
169 	}					\
170 } while (0)
171 
172 #define DPRINTFN(sc, lv, fmt, ...) do {		\
173 	if ((sc)->sc_debug >= (lv)) {		\
174 		if_printf(&(sc)->arpcom.ac_if,	\
175 			  fmt, __VA_ARGS__);	\
176 	}					\
177 } while (0)
178 
179 #else	/* !NFE_DEBUG */
180 
181 #define DPRINTF(sc, fmt, ...)
182 #define DPRINTFN(sc, lv, fmt, ...)
183 
184 #endif	/* NFE_DEBUG */
185 
186 struct nfe_dma_ctx {
187 	int			nsegs;
188 	bus_dma_segment_t	*segs;
189 };
190 
191 static const struct nfe_dev {
192 	uint16_t	vid;
193 	uint16_t	did;
194 	const char	*desc;
195 } nfe_devices[] = {
196 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE_LAN,
197 	  "NVIDIA nForce Fast Ethernet" },
198 
199 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE2_LAN,
200 	  "NVIDIA nForce2 Fast Ethernet" },
201 
202 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN1,
203 	  "NVIDIA nForce3 Gigabit Ethernet" },
204 
205 	/* XXX TGEN the next chip can also be found in the nForce2 Ultra 400Gb
206 	   chipset, and possibly also the 400R; it might be both nForce2- and
207 	   nForce3-based boards can use the same MCPs (= southbridges) */
208 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN2,
209 	  "NVIDIA nForce3 Gigabit Ethernet" },
210 
211 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN3,
212 	  "NVIDIA nForce3 Gigabit Ethernet" },
213 
214 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN4,
215 	  "NVIDIA nForce3 Gigabit Ethernet" },
216 
217 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE3_LAN5,
218 	  "NVIDIA nForce3 Gigabit Ethernet" },
219 
220 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_CK804_LAN1,
221 	  "NVIDIA CK804 Gigabit Ethernet" },
222 
223 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_CK804_LAN2,
224 	  "NVIDIA CK804 Gigabit Ethernet" },
225 
226 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP04_LAN1,
227 	  "NVIDIA MCP04 Gigabit Ethernet" },
228 
229 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP04_LAN2,
230 	  "NVIDIA MCP04 Gigabit Ethernet" },
231 
232 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP51_LAN1,
233 	  "NVIDIA MCP51 Gigabit Ethernet" },
234 
235 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP51_LAN2,
236 	  "NVIDIA MCP51 Gigabit Ethernet" },
237 
238 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP55_LAN1,
239 	  "NVIDIA MCP55 Gigabit Ethernet" },
240 
241 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP55_LAN2,
242 	  "NVIDIA MCP55 Gigabit Ethernet" },
243 
244 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN1,
245 	  "NVIDIA MCP61 Gigabit Ethernet" },
246 
247 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN2,
248 	  "NVIDIA MCP61 Gigabit Ethernet" },
249 
250 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN3,
251 	  "NVIDIA MCP61 Gigabit Ethernet" },
252 
253 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP61_LAN4,
254 	  "NVIDIA MCP61 Gigabit Ethernet" },
255 
256 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN1,
257 	  "NVIDIA MCP65 Gigabit Ethernet" },
258 
259 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN2,
260 	  "NVIDIA MCP65 Gigabit Ethernet" },
261 
262 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN3,
263 	  "NVIDIA MCP65 Gigabit Ethernet" },
264 
265 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP65_LAN4,
266 	  "NVIDIA MCP65 Gigabit Ethernet" },
267 
268 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN1,
269 	  "NVIDIA MCP67 Gigabit Ethernet" },
270 
271 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN2,
272 	  "NVIDIA MCP67 Gigabit Ethernet" },
273 
274 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN3,
275 	  "NVIDIA MCP67 Gigabit Ethernet" },
276 
277 	{ PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_MCP67_LAN4,
278 	  "NVIDIA MCP67 Gigabit Ethernet" }
279 };
280 
281 static device_method_t nfe_methods[] = {
282 	/* Device interface */
283 	DEVMETHOD(device_probe,		nfe_probe),
284 	DEVMETHOD(device_attach,	nfe_attach),
285 	DEVMETHOD(device_detach,	nfe_detach),
286 	DEVMETHOD(device_suspend,	nfe_suspend),
287 	DEVMETHOD(device_resume,	nfe_resume),
288 	DEVMETHOD(device_shutdown,	nfe_shutdown),
289 
290 	/* Bus interface */
291 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
292 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
293 
294 	/* MII interface */
295 	DEVMETHOD(miibus_readreg,	nfe_miibus_readreg),
296 	DEVMETHOD(miibus_writereg,	nfe_miibus_writereg),
297 	DEVMETHOD(miibus_statchg,	nfe_miibus_statchg),
298 
299 	{ 0, 0 }
300 };
301 
302 static driver_t nfe_driver = {
303 	"nfe",
304 	nfe_methods,
305 	sizeof(struct nfe_softc)
306 };
307 
308 static devclass_t	nfe_devclass;
309 
310 DECLARE_DUMMY_MODULE(if_nfe);
311 MODULE_DEPEND(if_nfe, miibus, 1, 1, 1);
312 DRIVER_MODULE(if_nfe, pci, nfe_driver, nfe_devclass, 0, 0);
313 DRIVER_MODULE(miibus, nfe, miibus_driver, miibus_devclass, 0, 0);
314 
315 static int
316 nfe_probe(device_t dev)
317 {
318 	const struct nfe_dev *n;
319 	uint16_t vid, did;
320 
321 	vid = pci_get_vendor(dev);
322 	did = pci_get_device(dev);
323 	for (n = nfe_devices; n->desc != NULL; ++n) {
324 		if (vid == n->vid && did == n->did) {
325 			struct nfe_softc *sc = device_get_softc(dev);
326 
327 			switch (did) {
328 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN2:
329 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN3:
330 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN4:
331 			case PCI_PRODUCT_NVIDIA_NFORCE3_LAN5:
332 				sc->sc_flags = NFE_JUMBO_SUP |
333 					       NFE_HW_CSUM;
334 				break;
335 			case PCI_PRODUCT_NVIDIA_MCP51_LAN1:
336 			case PCI_PRODUCT_NVIDIA_MCP51_LAN2:
337 			case PCI_PRODUCT_NVIDIA_MCP61_LAN1:
338 			case PCI_PRODUCT_NVIDIA_MCP61_LAN2:
339 			case PCI_PRODUCT_NVIDIA_MCP61_LAN3:
340 			case PCI_PRODUCT_NVIDIA_MCP61_LAN4:
341 			case PCI_PRODUCT_NVIDIA_MCP67_LAN1:
342 			case PCI_PRODUCT_NVIDIA_MCP67_LAN2:
343 			case PCI_PRODUCT_NVIDIA_MCP67_LAN3:
344 			case PCI_PRODUCT_NVIDIA_MCP67_LAN4:
345 				sc->sc_flags = NFE_40BIT_ADDR;
346 				break;
347 			case PCI_PRODUCT_NVIDIA_CK804_LAN1:
348 			case PCI_PRODUCT_NVIDIA_CK804_LAN2:
349 			case PCI_PRODUCT_NVIDIA_MCP04_LAN1:
350 			case PCI_PRODUCT_NVIDIA_MCP04_LAN2:
351 			case PCI_PRODUCT_NVIDIA_MCP65_LAN1:
352 			case PCI_PRODUCT_NVIDIA_MCP65_LAN2:
353 			case PCI_PRODUCT_NVIDIA_MCP65_LAN3:
354 			case PCI_PRODUCT_NVIDIA_MCP65_LAN4:
355 				sc->sc_flags = NFE_JUMBO_SUP |
356 					       NFE_40BIT_ADDR |
357 					       NFE_HW_CSUM;
358 				break;
359 			case PCI_PRODUCT_NVIDIA_MCP55_LAN1:
360 			case PCI_PRODUCT_NVIDIA_MCP55_LAN2:
361 				sc->sc_flags = NFE_JUMBO_SUP |
362 					       NFE_40BIT_ADDR |
363 					       NFE_HW_CSUM |
364 					       NFE_HW_VLAN;
365 				break;
366 			}
367 
368 			device_set_desc(dev, n->desc);
369 			device_set_async_attach(dev, TRUE);
370 			return 0;
371 		}
372 	}
373 	return ENXIO;
374 }
375 
376 static int
377 nfe_attach(device_t dev)
378 {
379 	struct nfe_softc *sc = device_get_softc(dev);
380 	struct ifnet *ifp = &sc->arpcom.ac_if;
381 	uint8_t eaddr[ETHER_ADDR_LEN];
382 	int error;
383 
384 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
385 	lwkt_serialize_init(&sc->sc_jbuf_serializer);
386 
387 	/*
388 	 * Initialize sysctl variables
389 	 */
390 	sc->sc_imtime = nfe_imtime;
391 	sc->sc_irq_enable = NFE_IRQ_ENABLE(sc);
392 	sc->sc_rx_ring_count = nfe_rx_ring_count;
393 	sc->sc_debug = nfe_debug;
394 
395 	sc->sc_mem_rid = PCIR_BAR(0);
396 
397 #ifndef BURN_BRIDGES
398 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
399 		uint32_t mem, irq;
400 
401 		mem = pci_read_config(dev, sc->sc_mem_rid, 4);
402 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
403 
404 		device_printf(dev, "chip is in D%d power mode "
405 		    "-- setting to D0\n", pci_get_powerstate(dev));
406 
407 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
408 
409 		pci_write_config(dev, sc->sc_mem_rid, mem, 4);
410 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
411 	}
412 #endif	/* !BURN_BRIDGE */
413 
414 	/* Enable bus mastering */
415 	pci_enable_busmaster(dev);
416 
417 	/* Allocate IO memory */
418 	sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
419 						&sc->sc_mem_rid, RF_ACTIVE);
420 	if (sc->sc_mem_res == NULL) {
421 		device_printf(dev, "cound not allocate io memory\n");
422 		return ENXIO;
423 	}
424 	sc->sc_memh = rman_get_bushandle(sc->sc_mem_res);
425 	sc->sc_memt = rman_get_bustag(sc->sc_mem_res);
426 
427 	/* Allocate IRQ */
428 	sc->sc_irq_rid = 0;
429 	sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
430 						&sc->sc_irq_rid,
431 						RF_SHAREABLE | RF_ACTIVE);
432 	if (sc->sc_irq_res == NULL) {
433 		device_printf(dev, "could not allocate irq\n");
434 		error = ENXIO;
435 		goto fail;
436 	}
437 
438 	nfe_get_macaddr(sc, eaddr);
439 
440 	/*
441 	 * Allocate Tx and Rx rings.
442 	 */
443 	error = nfe_alloc_tx_ring(sc, &sc->txq);
444 	if (error) {
445 		device_printf(dev, "could not allocate Tx ring\n");
446 		goto fail;
447 	}
448 
449 	error = nfe_alloc_rx_ring(sc, &sc->rxq);
450 	if (error) {
451 		device_printf(dev, "could not allocate Rx ring\n");
452 		goto fail;
453 	}
454 
455 	/*
456 	 * Create sysctl tree
457 	 */
458 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
459 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
460 					     SYSCTL_STATIC_CHILDREN(_hw),
461 					     OID_AUTO,
462 					     device_get_nameunit(dev),
463 					     CTLFLAG_RD, 0, "");
464 	if (sc->sc_sysctl_tree == NULL) {
465 		device_printf(dev, "can't add sysctl node\n");
466 		error = ENXIO;
467 		goto fail;
468 	}
469 	SYSCTL_ADD_PROC(&sc->sc_sysctl_ctx,
470 			SYSCTL_CHILDREN(sc->sc_sysctl_tree),
471 			OID_AUTO, "imtimer", CTLTYPE_INT | CTLFLAG_RW,
472 			sc, 0, nfe_sysctl_imtime, "I",
473 			"Interrupt moderation time (usec).  "
474 			"-1 to disable interrupt moderation.");
475 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(sc->sc_sysctl_tree), OID_AUTO,
476 		       "rx_ring_count", CTLFLAG_RD, &sc->sc_rx_ring_count,
477 		       0, "RX ring count");
478 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(sc->sc_sysctl_tree), OID_AUTO,
479 		       "debug", CTLFLAG_RW, &sc->sc_debug,
480 		       0, "control debugging printfs");
481 
482 	error = mii_phy_probe(dev, &sc->sc_miibus, nfe_ifmedia_upd,
483 			      nfe_ifmedia_sts);
484 	if (error) {
485 		device_printf(dev, "MII without any phy\n");
486 		goto fail;
487 	}
488 
489 	ifp->if_softc = sc;
490 	ifp->if_mtu = ETHERMTU;
491 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
492 	ifp->if_ioctl = nfe_ioctl;
493 	ifp->if_start = nfe_start;
494 #ifdef DEVICE_POLLING
495 	ifp->if_poll = nfe_poll;
496 #endif
497 	ifp->if_watchdog = nfe_watchdog;
498 	ifp->if_init = nfe_init;
499 	ifq_set_maxlen(&ifp->if_snd, NFE_IFQ_MAXLEN);
500 	ifq_set_ready(&ifp->if_snd);
501 
502 	ifp->if_capabilities = IFCAP_VLAN_MTU;
503 
504 	if (sc->sc_flags & NFE_HW_VLAN)
505 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
506 
507 #ifdef NFE_CSUM
508 	if (sc->sc_flags & NFE_HW_CSUM) {
509 		ifp->if_capabilities |= IFCAP_HWCSUM;
510 		ifp->if_hwassist = NFE_CSUM_FEATURES;
511 	}
512 #else
513 	sc->sc_flags &= ~NFE_HW_CSUM;
514 #endif
515 	ifp->if_capenable = ifp->if_capabilities;
516 
517 	callout_init(&sc->sc_tick_ch);
518 
519 	ether_ifattach(ifp, eaddr, NULL);
520 
521 	error = bus_setup_intr(dev, sc->sc_irq_res, INTR_MPSAFE, nfe_intr, sc,
522 			       &sc->sc_ih, ifp->if_serializer);
523 	if (error) {
524 		device_printf(dev, "could not setup intr\n");
525 		ether_ifdetach(ifp);
526 		goto fail;
527 	}
528 
529 	return 0;
530 fail:
531 	nfe_detach(dev);
532 	return error;
533 }
534 
535 static int
536 nfe_detach(device_t dev)
537 {
538 	struct nfe_softc *sc = device_get_softc(dev);
539 
540 	if (device_is_attached(dev)) {
541 		struct ifnet *ifp = &sc->arpcom.ac_if;
542 
543 		lwkt_serialize_enter(ifp->if_serializer);
544 		nfe_stop(sc);
545 		bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_ih);
546 		lwkt_serialize_exit(ifp->if_serializer);
547 
548 		ether_ifdetach(ifp);
549 	}
550 
551 	if (sc->sc_miibus != NULL)
552 		device_delete_child(dev, sc->sc_miibus);
553 	bus_generic_detach(dev);
554 
555 	if (sc->sc_sysctl_tree != NULL)
556 		sysctl_ctx_free(&sc->sc_sysctl_ctx);
557 
558 	if (sc->sc_irq_res != NULL) {
559 		bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid,
560 				     sc->sc_irq_res);
561 	}
562 
563 	if (sc->sc_mem_res != NULL) {
564 		bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mem_rid,
565 				     sc->sc_mem_res);
566 	}
567 
568 	nfe_free_tx_ring(sc, &sc->txq);
569 	nfe_free_rx_ring(sc, &sc->rxq);
570 
571 	return 0;
572 }
573 
574 static void
575 nfe_shutdown(device_t dev)
576 {
577 	struct nfe_softc *sc = device_get_softc(dev);
578 	struct ifnet *ifp = &sc->arpcom.ac_if;
579 
580 	lwkt_serialize_enter(ifp->if_serializer);
581 	nfe_stop(sc);
582 	lwkt_serialize_exit(ifp->if_serializer);
583 }
584 
585 static int
586 nfe_suspend(device_t dev)
587 {
588 	struct nfe_softc *sc = device_get_softc(dev);
589 	struct ifnet *ifp = &sc->arpcom.ac_if;
590 
591 	lwkt_serialize_enter(ifp->if_serializer);
592 	nfe_stop(sc);
593 	lwkt_serialize_exit(ifp->if_serializer);
594 
595 	return 0;
596 }
597 
598 static int
599 nfe_resume(device_t dev)
600 {
601 	struct nfe_softc *sc = device_get_softc(dev);
602 	struct ifnet *ifp = &sc->arpcom.ac_if;
603 
604 	lwkt_serialize_enter(ifp->if_serializer);
605 	if (ifp->if_flags & IFF_UP)
606 		nfe_init(sc);
607 	lwkt_serialize_exit(ifp->if_serializer);
608 
609 	return 0;
610 }
611 
612 static void
613 nfe_miibus_statchg(device_t dev)
614 {
615 	struct nfe_softc *sc = device_get_softc(dev);
616 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
617 	uint32_t phy, seed, misc = NFE_MISC1_MAGIC, link = NFE_MEDIA_SET;
618 
619 	phy = NFE_READ(sc, NFE_PHY_IFACE);
620 	phy &= ~(NFE_PHY_HDX | NFE_PHY_100TX | NFE_PHY_1000T);
621 
622 	seed = NFE_READ(sc, NFE_RNDSEED);
623 	seed &= ~NFE_SEED_MASK;
624 
625 	if ((mii->mii_media_active & IFM_GMASK) == IFM_HDX) {
626 		phy  |= NFE_PHY_HDX;	/* half-duplex */
627 		misc |= NFE_MISC1_HDX;
628 	}
629 
630 	switch (IFM_SUBTYPE(mii->mii_media_active)) {
631 	case IFM_1000_T:	/* full-duplex only */
632 		link |= NFE_MEDIA_1000T;
633 		seed |= NFE_SEED_1000T;
634 		phy  |= NFE_PHY_1000T;
635 		break;
636 	case IFM_100_TX:
637 		link |= NFE_MEDIA_100TX;
638 		seed |= NFE_SEED_100TX;
639 		phy  |= NFE_PHY_100TX;
640 		break;
641 	case IFM_10_T:
642 		link |= NFE_MEDIA_10T;
643 		seed |= NFE_SEED_10T;
644 		break;
645 	}
646 
647 	NFE_WRITE(sc, NFE_RNDSEED, seed);	/* XXX: gigabit NICs only? */
648 
649 	NFE_WRITE(sc, NFE_PHY_IFACE, phy);
650 	NFE_WRITE(sc, NFE_MISC1, misc);
651 	NFE_WRITE(sc, NFE_LINKSPEED, link);
652 }
653 
654 static int
655 nfe_miibus_readreg(device_t dev, int phy, int reg)
656 {
657 	struct nfe_softc *sc = device_get_softc(dev);
658 	uint32_t val;
659 	int ntries;
660 
661 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
662 
663 	if (NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY) {
664 		NFE_WRITE(sc, NFE_PHY_CTL, NFE_PHY_BUSY);
665 		DELAY(100);
666 	}
667 
668 	NFE_WRITE(sc, NFE_PHY_CTL, (phy << NFE_PHYADD_SHIFT) | reg);
669 
670 	for (ntries = 0; ntries < 1000; ntries++) {
671 		DELAY(100);
672 		if (!(NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY))
673 			break;
674 	}
675 	if (ntries == 1000) {
676 		DPRINTFN(sc, 2, "timeout waiting for PHY %s\n", "");
677 		return 0;
678 	}
679 
680 	if (NFE_READ(sc, NFE_PHY_STATUS) & NFE_PHY_ERROR) {
681 		DPRINTFN(sc, 2, "could not read PHY %s\n", "");
682 		return 0;
683 	}
684 
685 	val = NFE_READ(sc, NFE_PHY_DATA);
686 	if (val != 0xffffffff && val != 0)
687 		sc->mii_phyaddr = phy;
688 
689 	DPRINTFN(sc, 2, "mii read phy %d reg 0x%x ret 0x%x\n", phy, reg, val);
690 
691 	return val;
692 }
693 
694 static void
695 nfe_miibus_writereg(device_t dev, int phy, int reg, int val)
696 {
697 	struct nfe_softc *sc = device_get_softc(dev);
698 	uint32_t ctl;
699 	int ntries;
700 
701 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
702 
703 	if (NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY) {
704 		NFE_WRITE(sc, NFE_PHY_CTL, NFE_PHY_BUSY);
705 		DELAY(100);
706 	}
707 
708 	NFE_WRITE(sc, NFE_PHY_DATA, val);
709 	ctl = NFE_PHY_WRITE | (phy << NFE_PHYADD_SHIFT) | reg;
710 	NFE_WRITE(sc, NFE_PHY_CTL, ctl);
711 
712 	for (ntries = 0; ntries < 1000; ntries++) {
713 		DELAY(100);
714 		if (!(NFE_READ(sc, NFE_PHY_CTL) & NFE_PHY_BUSY))
715 			break;
716 	}
717 
718 #ifdef NFE_DEBUG
719 	if (ntries == 1000)
720 		DPRINTFN(sc, 2, "could not write to PHY %s\n", "");
721 #endif
722 }
723 
724 #ifdef DEVICE_POLLING
725 
726 static void
727 nfe_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
728 {
729 	struct nfe_softc *sc = ifp->if_softc;
730 
731 	ASSERT_SERIALIZED(ifp->if_serializer);
732 
733 	switch(cmd) {
734 	case POLL_REGISTER:
735 		/* Disable interrupts */
736 		NFE_WRITE(sc, NFE_IRQ_MASK, 0);
737 		break;
738 	case POLL_DEREGISTER:
739 		/* enable interrupts */
740 		NFE_WRITE(sc, NFE_IRQ_MASK, sc->sc_irq_enable);
741 		break;
742 	case POLL_AND_CHECK_STATUS:
743 		/* fall through */
744 	case POLL_ONLY:
745 		if (ifp->if_flags & IFF_RUNNING) {
746 			nfe_rxeof(sc);
747 			nfe_txeof(sc);
748 		}
749 		break;
750 	}
751 }
752 
753 #endif
754 
755 static void
756 nfe_intr(void *arg)
757 {
758 	struct nfe_softc *sc = arg;
759 	struct ifnet *ifp = &sc->arpcom.ac_if;
760 	uint32_t r;
761 
762 	r = NFE_READ(sc, NFE_IRQ_STATUS);
763 	if (r == 0)
764 		return;	/* not for us */
765 	NFE_WRITE(sc, NFE_IRQ_STATUS, r);
766 
767 	DPRINTFN(sc, 5, "%s: interrupt register %x\n", __func__, r);
768 
769 	if (r & NFE_IRQ_LINK) {
770 		NFE_READ(sc, NFE_PHY_STATUS);
771 		NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
772 		DPRINTF(sc, "link state changed %s\n", "");
773 	}
774 
775 	if (ifp->if_flags & IFF_RUNNING) {
776 		/* check Rx ring */
777 		nfe_rxeof(sc);
778 
779 		/* check Tx ring */
780 		nfe_txeof(sc);
781 	}
782 }
783 
784 static int
785 nfe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
786 {
787 	struct nfe_softc *sc = ifp->if_softc;
788 	struct ifreq *ifr = (struct ifreq *)data;
789 	struct mii_data *mii;
790 	int error = 0, mask;
791 
792 	switch (cmd) {
793 	case SIOCSIFMTU:
794 		if (((sc->sc_flags & NFE_JUMBO_SUP) &&
795 		     ifr->ifr_mtu > NFE_JUMBO_MTU) ||
796 		    ((sc->sc_flags & NFE_JUMBO_SUP) == 0 &&
797 		     ifr->ifr_mtu > ETHERMTU)) {
798 			return EINVAL;
799 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
800 			ifp->if_mtu = ifr->ifr_mtu;
801 			nfe_init(sc);
802 		}
803 		break;
804 	case SIOCSIFFLAGS:
805 		if (ifp->if_flags & IFF_UP) {
806 			/*
807 			 * If only the PROMISC or ALLMULTI flag changes, then
808 			 * don't do a full re-init of the chip, just update
809 			 * the Rx filter.
810 			 */
811 			if ((ifp->if_flags & IFF_RUNNING) &&
812 			    ((ifp->if_flags ^ sc->sc_if_flags) &
813 			     (IFF_ALLMULTI | IFF_PROMISC)) != 0) {
814 				nfe_setmulti(sc);
815 			} else {
816 				if (!(ifp->if_flags & IFF_RUNNING))
817 					nfe_init(sc);
818 			}
819 		} else {
820 			if (ifp->if_flags & IFF_RUNNING)
821 				nfe_stop(sc);
822 		}
823 		sc->sc_if_flags = ifp->if_flags;
824 		break;
825 	case SIOCADDMULTI:
826 	case SIOCDELMULTI:
827 		if (ifp->if_flags & IFF_RUNNING)
828 			nfe_setmulti(sc);
829 		break;
830 	case SIOCSIFMEDIA:
831 	case SIOCGIFMEDIA:
832 		mii = device_get_softc(sc->sc_miibus);
833 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd);
834 		break;
835         case SIOCSIFCAP:
836 		mask = (ifr->ifr_reqcap ^ ifp->if_capenable) & IFCAP_HWCSUM;
837 		if (mask && (ifp->if_capabilities & IFCAP_HWCSUM)) {
838 			ifp->if_capenable ^= mask;
839 			if (IFCAP_TXCSUM & ifp->if_capenable)
840 				ifp->if_hwassist = NFE_CSUM_FEATURES;
841 			else
842 				ifp->if_hwassist = 0;
843 
844 			if (ifp->if_flags & IFF_RUNNING)
845 				nfe_init(sc);
846 		}
847 		break;
848 	default:
849 		error = ether_ioctl(ifp, cmd, data);
850 		break;
851 	}
852 	return error;
853 }
854 
855 static void
856 nfe_rxeof(struct nfe_softc *sc)
857 {
858 	struct ifnet *ifp = &sc->arpcom.ac_if;
859 	struct nfe_rx_ring *ring = &sc->rxq;
860 	int reap;
861 
862 	reap = 0;
863 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_POSTREAD);
864 
865 	for (;;) {
866 		struct nfe_rx_data *data = &ring->data[ring->cur];
867 		struct mbuf *m;
868 		uint16_t flags;
869 		int len, error;
870 
871 		if (sc->sc_flags & NFE_40BIT_ADDR) {
872 			struct nfe_desc64 *desc64 = &ring->desc64[ring->cur];
873 
874 			flags = le16toh(desc64->flags);
875 			len = le16toh(desc64->length) & 0x3fff;
876 		} else {
877 			struct nfe_desc32 *desc32 = &ring->desc32[ring->cur];
878 
879 			flags = le16toh(desc32->flags);
880 			len = le16toh(desc32->length) & 0x3fff;
881 		}
882 
883 		if (flags & NFE_RX_READY)
884 			break;
885 
886 		reap = 1;
887 
888 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
889 			if (!(flags & NFE_RX_VALID_V1))
890 				goto skip;
891 
892 			if ((flags & NFE_RX_FIXME_V1) == NFE_RX_FIXME_V1) {
893 				flags &= ~NFE_RX_ERROR;
894 				len--;	/* fix buffer length */
895 			}
896 		} else {
897 			if (!(flags & NFE_RX_VALID_V2))
898 				goto skip;
899 
900 			if ((flags & NFE_RX_FIXME_V2) == NFE_RX_FIXME_V2) {
901 				flags &= ~NFE_RX_ERROR;
902 				len--;	/* fix buffer length */
903 			}
904 		}
905 
906 		if (flags & NFE_RX_ERROR) {
907 			ifp->if_ierrors++;
908 			goto skip;
909 		}
910 
911 		m = data->m;
912 
913 		if (sc->sc_flags & NFE_USE_JUMBO)
914 			error = nfe_newbuf_jumbo(sc, ring, ring->cur, 0);
915 		else
916 			error = nfe_newbuf_std(sc, ring, ring->cur, 0);
917 		if (error) {
918 			ifp->if_ierrors++;
919 			goto skip;
920 		}
921 
922 		/* finalize mbuf */
923 		m->m_pkthdr.len = m->m_len = len;
924 		m->m_pkthdr.rcvif = ifp;
925 
926 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
927 		    (flags & NFE_RX_CSUMOK)) {
928 			if (flags & NFE_RX_IP_CSUMOK_V2) {
929 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
930 							  CSUM_IP_VALID;
931 			}
932 
933 			if (flags &
934 			    (NFE_RX_UDP_CSUMOK_V2 | NFE_RX_TCP_CSUMOK_V2)) {
935 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
936 							  CSUM_PSEUDO_HDR |
937 							  CSUM_FRAG_NOT_CHECKED;
938 				m->m_pkthdr.csum_data = 0xffff;
939 			}
940 		}
941 
942 		ifp->if_ipackets++;
943 		ifp->if_input(ifp, m);
944 skip:
945 		nfe_set_ready_rxdesc(sc, ring, ring->cur);
946 		sc->rxq.cur = (sc->rxq.cur + 1) % sc->sc_rx_ring_count;
947 	}
948 
949 	if (reap)
950 		bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
951 }
952 
953 static void
954 nfe_txeof(struct nfe_softc *sc)
955 {
956 	struct ifnet *ifp = &sc->arpcom.ac_if;
957 	struct nfe_tx_ring *ring = &sc->txq;
958 	struct nfe_tx_data *data = NULL;
959 
960 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_POSTREAD);
961 	while (ring->next != ring->cur) {
962 		uint16_t flags;
963 
964 		if (sc->sc_flags & NFE_40BIT_ADDR)
965 			flags = le16toh(ring->desc64[ring->next].flags);
966 		else
967 			flags = le16toh(ring->desc32[ring->next].flags);
968 
969 		if (flags & NFE_TX_VALID)
970 			break;
971 
972 		data = &ring->data[ring->next];
973 
974 		if ((sc->sc_flags & (NFE_JUMBO_SUP | NFE_40BIT_ADDR)) == 0) {
975 			if (!(flags & NFE_TX_LASTFRAG_V1) && data->m == NULL)
976 				goto skip;
977 
978 			if ((flags & NFE_TX_ERROR_V1) != 0) {
979 				if_printf(ifp, "tx v1 error 0x%4b\n", flags,
980 					  NFE_V1_TXERR);
981 				ifp->if_oerrors++;
982 			} else {
983 				ifp->if_opackets++;
984 			}
985 		} else {
986 			if (!(flags & NFE_TX_LASTFRAG_V2) && data->m == NULL)
987 				goto skip;
988 
989 			if ((flags & NFE_TX_ERROR_V2) != 0) {
990 				if_printf(ifp, "tx v2 error 0x%4b\n", flags,
991 					  NFE_V2_TXERR);
992 				ifp->if_oerrors++;
993 			} else {
994 				ifp->if_opackets++;
995 			}
996 		}
997 
998 		if (data->m == NULL) {	/* should not get there */
999 			if_printf(ifp,
1000 				  "last fragment bit w/o associated mbuf!\n");
1001 			goto skip;
1002 		}
1003 
1004 		/* last fragment of the mbuf chain transmitted */
1005 		bus_dmamap_sync(ring->data_tag, data->map,
1006 				BUS_DMASYNC_POSTWRITE);
1007 		bus_dmamap_unload(ring->data_tag, data->map);
1008 		m_freem(data->m);
1009 		data->m = NULL;
1010 
1011 		ifp->if_timer = 0;
1012 skip:
1013 		ring->queued--;
1014 		KKASSERT(ring->queued >= 0);
1015 		ring->next = (ring->next + 1) % NFE_TX_RING_COUNT;
1016 	}
1017 
1018 	if (data != NULL) {	/* at least one slot freed */
1019 		ifp->if_flags &= ~IFF_OACTIVE;
1020 		ifp->if_start(ifp);
1021 	}
1022 }
1023 
1024 static int
1025 nfe_encap(struct nfe_softc *sc, struct nfe_tx_ring *ring, struct mbuf *m0)
1026 {
1027 	struct nfe_dma_ctx ctx;
1028 	bus_dma_segment_t segs[NFE_MAX_SCATTER];
1029 	struct nfe_tx_data *data, *data_map;
1030 	bus_dmamap_t map;
1031 	struct nfe_desc64 *desc64 = NULL;
1032 	struct nfe_desc32 *desc32 = NULL;
1033 	uint16_t flags = 0;
1034 	uint32_t vtag = 0;
1035 	int error, i, j;
1036 
1037 	data = &ring->data[ring->cur];
1038 	map = data->map;
1039 	data_map = data;	/* Remember who owns the DMA map */
1040 
1041 	ctx.nsegs = NFE_MAX_SCATTER;
1042 	ctx.segs = segs;
1043 	error = bus_dmamap_load_mbuf(ring->data_tag, map, m0,
1044 				     nfe_buf_dma_addr, &ctx, BUS_DMA_NOWAIT);
1045 	if (error && error != EFBIG) {
1046 		if_printf(&sc->arpcom.ac_if, "could not map TX mbuf\n");
1047 		goto back;
1048 	}
1049 
1050 	if (error) {	/* error == EFBIG */
1051 		struct mbuf *m_new;
1052 
1053 		m_new = m_defrag(m0, MB_DONTWAIT);
1054 		if (m_new == NULL) {
1055 			if_printf(&sc->arpcom.ac_if,
1056 				  "could not defrag TX mbuf\n");
1057 			error = ENOBUFS;
1058 			goto back;
1059 		} else {
1060 			m0 = m_new;
1061 		}
1062 
1063 		ctx.nsegs = NFE_MAX_SCATTER;
1064 		ctx.segs = segs;
1065 		error = bus_dmamap_load_mbuf(ring->data_tag, map, m0,
1066 					     nfe_buf_dma_addr, &ctx,
1067 					     BUS_DMA_NOWAIT);
1068 		if (error) {
1069 			if_printf(&sc->arpcom.ac_if,
1070 				  "could not map defraged TX mbuf\n");
1071 			goto back;
1072 		}
1073 	}
1074 
1075 	error = 0;
1076 
1077 	if (ring->queued + ctx.nsegs >= NFE_TX_RING_COUNT - 1) {
1078 		bus_dmamap_unload(ring->data_tag, map);
1079 		error = ENOBUFS;
1080 		goto back;
1081 	}
1082 
1083 	/* setup h/w VLAN tagging */
1084 	if ((m0->m_flags & (M_PROTO1 | M_PKTHDR)) == (M_PROTO1 | M_PKTHDR) &&
1085 	    m0->m_pkthdr.rcvif != NULL &&
1086 	    m0->m_pkthdr.rcvif->if_type == IFT_L2VLAN) {
1087 		struct ifvlan *ifv = m0->m_pkthdr.rcvif->if_softc;
1088 
1089 		if (ifv != NULL)
1090 			vtag = NFE_TX_VTAG | htons(ifv->ifv_tag);
1091 	}
1092 
1093 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
1094 		if (m0->m_pkthdr.csum_flags & CSUM_IP)
1095 			flags |= NFE_TX_IP_CSUM;
1096 		if (m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1097 			flags |= NFE_TX_TCP_CSUM;
1098 	}
1099 
1100 	/*
1101 	 * XXX urm. somebody is unaware of how hardware works.  You
1102 	 * absolutely CANNOT set NFE_TX_VALID on the next descriptor in
1103 	 * the ring until the entire chain is actually *VALID*.  Otherwise
1104 	 * the hardware may encounter a partially initialized chain that
1105 	 * is marked as being ready to go when it in fact is not ready to
1106 	 * go.
1107 	 */
1108 
1109 	for (i = 0; i < ctx.nsegs; i++) {
1110 		j = (ring->cur + i) % NFE_TX_RING_COUNT;
1111 		data = &ring->data[j];
1112 
1113 		if (sc->sc_flags & NFE_40BIT_ADDR) {
1114 			desc64 = &ring->desc64[j];
1115 #if defined(__LP64__)
1116 			desc64->physaddr[0] =
1117 			    htole32(segs[i].ds_addr >> 32);
1118 #endif
1119 			desc64->physaddr[1] =
1120 			    htole32(segs[i].ds_addr & 0xffffffff);
1121 			desc64->length = htole16(segs[i].ds_len - 1);
1122 			desc64->vtag = htole32(vtag);
1123 			desc64->flags = htole16(flags);
1124 		} else {
1125 			desc32 = &ring->desc32[j];
1126 			desc32->physaddr = htole32(segs[i].ds_addr);
1127 			desc32->length = htole16(segs[i].ds_len - 1);
1128 			desc32->flags = htole16(flags);
1129 		}
1130 
1131 		/* csum flags and vtag belong to the first fragment only */
1132 		flags &= ~(NFE_TX_IP_CSUM | NFE_TX_TCP_CSUM);
1133 		vtag = 0;
1134 
1135 		ring->queued++;
1136 		KKASSERT(ring->queued <= NFE_TX_RING_COUNT);
1137 	}
1138 
1139 	/* the whole mbuf chain has been DMA mapped, fix last descriptor */
1140 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1141 		desc64->flags |= htole16(NFE_TX_LASTFRAG_V2);
1142 	} else {
1143 		if (sc->sc_flags & NFE_JUMBO_SUP)
1144 			flags = NFE_TX_LASTFRAG_V2;
1145 		else
1146 			flags = NFE_TX_LASTFRAG_V1;
1147 		desc32->flags |= htole16(flags);
1148 	}
1149 
1150 	/*
1151 	 * Set NFE_TX_VALID backwards so the hardware doesn't see the
1152 	 * whole mess until the first descriptor in the map is flagged.
1153 	 */
1154 	for (i = ctx.nsegs - 1; i >= 0; --i) {
1155 		j = (ring->cur + i) % NFE_TX_RING_COUNT;
1156 		if (sc->sc_flags & NFE_40BIT_ADDR) {
1157 			desc64 = &ring->desc64[j];
1158 			desc64->flags |= htole16(NFE_TX_VALID);
1159 		} else {
1160 			desc32 = &ring->desc32[j];
1161 			desc32->flags |= htole16(NFE_TX_VALID);
1162 		}
1163 	}
1164 	ring->cur = (ring->cur + ctx.nsegs) % NFE_TX_RING_COUNT;
1165 
1166 	/* Exchange DMA map */
1167 	data_map->map = data->map;
1168 	data->map = map;
1169 	data->m = m0;
1170 
1171 	bus_dmamap_sync(ring->data_tag, map, BUS_DMASYNC_PREWRITE);
1172 back:
1173 	if (error)
1174 		m_freem(m0);
1175 	return error;
1176 }
1177 
1178 static void
1179 nfe_start(struct ifnet *ifp)
1180 {
1181 	struct nfe_softc *sc = ifp->if_softc;
1182 	struct nfe_tx_ring *ring = &sc->txq;
1183 	int count = 0;
1184 	struct mbuf *m0;
1185 
1186 	if (ifp->if_flags & IFF_OACTIVE)
1187 		return;
1188 
1189 	if (ifq_is_empty(&ifp->if_snd))
1190 		return;
1191 
1192 	for (;;) {
1193 		m0 = ifq_dequeue(&ifp->if_snd, NULL);
1194 		if (m0 == NULL)
1195 			break;
1196 
1197 		BPF_MTAP(ifp, m0);
1198 
1199 		if (nfe_encap(sc, ring, m0) != 0) {
1200 			ifp->if_flags |= IFF_OACTIVE;
1201 			break;
1202 		}
1203 		++count;
1204 
1205 		/*
1206 		 * NOTE:
1207 		 * `m0' may be freed in nfe_encap(), so
1208 		 * it should not be touched any more.
1209 		 */
1210 	}
1211 	if (count == 0)	/* nothing sent */
1212 		return;
1213 
1214 	/* Sync TX descriptor ring */
1215 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1216 
1217 	/* Kick Tx */
1218 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
1219 
1220 	/*
1221 	 * Set a timeout in case the chip goes out to lunch.
1222 	 */
1223 	ifp->if_timer = 5;
1224 }
1225 
1226 static void
1227 nfe_watchdog(struct ifnet *ifp)
1228 {
1229 	struct nfe_softc *sc = ifp->if_softc;
1230 
1231 	if (ifp->if_flags & IFF_RUNNING) {
1232 		if_printf(ifp, "watchdog timeout - lost interrupt recovered\n");
1233 		nfe_txeof(sc);
1234 		return;
1235 	}
1236 
1237 	if_printf(ifp, "watchdog timeout\n");
1238 
1239 	nfe_init(ifp->if_softc);
1240 
1241 	ifp->if_oerrors++;
1242 }
1243 
1244 static void
1245 nfe_init(void *xsc)
1246 {
1247 	struct nfe_softc *sc = xsc;
1248 	struct ifnet *ifp = &sc->arpcom.ac_if;
1249 	uint32_t tmp;
1250 	int error;
1251 
1252 	nfe_stop(sc);
1253 
1254 	/*
1255 	 * NOTE:
1256 	 * Switching between jumbo frames and normal frames should
1257 	 * be done _after_ nfe_stop() but _before_ nfe_init_rx_ring().
1258 	 */
1259 	if (ifp->if_mtu > ETHERMTU) {
1260 		sc->sc_flags |= NFE_USE_JUMBO;
1261 		sc->rxq.bufsz = NFE_JBYTES;
1262 		if (bootverbose)
1263 			if_printf(ifp, "use jumbo frames\n");
1264 	} else {
1265 		sc->sc_flags &= ~NFE_USE_JUMBO;
1266 		sc->rxq.bufsz = MCLBYTES;
1267 		if (bootverbose)
1268 			if_printf(ifp, "use non-jumbo frames\n");
1269 	}
1270 
1271 	error = nfe_init_tx_ring(sc, &sc->txq);
1272 	if (error) {
1273 		nfe_stop(sc);
1274 		return;
1275 	}
1276 
1277 	error = nfe_init_rx_ring(sc, &sc->rxq);
1278 	if (error) {
1279 		nfe_stop(sc);
1280 		return;
1281 	}
1282 
1283 	NFE_WRITE(sc, NFE_TX_UNK, 0);
1284 	NFE_WRITE(sc, NFE_STATUS, 0);
1285 
1286 	sc->rxtxctl = NFE_RXTX_BIT2;
1287 	if (sc->sc_flags & NFE_40BIT_ADDR)
1288 		sc->rxtxctl |= NFE_RXTX_V3MAGIC;
1289 	else if (sc->sc_flags & NFE_JUMBO_SUP)
1290 		sc->rxtxctl |= NFE_RXTX_V2MAGIC;
1291 
1292 	if (ifp->if_capenable & IFCAP_RXCSUM)
1293 		sc->rxtxctl |= NFE_RXTX_RXCSUM;
1294 
1295 	/*
1296 	 * Although the adapter is capable of stripping VLAN tags from received
1297 	 * frames (NFE_RXTX_VTAG_STRIP), we do not enable this functionality on
1298 	 * purpose.  This will be done in software by our network stack.
1299 	 */
1300 	if (sc->sc_flags & NFE_HW_VLAN)
1301 		sc->rxtxctl |= NFE_RXTX_VTAG_INSERT;
1302 
1303 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_RESET | sc->rxtxctl);
1304 	DELAY(10);
1305 	NFE_WRITE(sc, NFE_RXTX_CTL, sc->rxtxctl);
1306 
1307 	if (sc->sc_flags & NFE_HW_VLAN)
1308 		NFE_WRITE(sc, NFE_VTAG_CTL, NFE_VTAG_ENABLE);
1309 
1310 	NFE_WRITE(sc, NFE_SETUP_R6, 0);
1311 
1312 	/* set MAC address */
1313 	nfe_set_macaddr(sc, sc->arpcom.ac_enaddr);
1314 
1315 	/* tell MAC where rings are in memory */
1316 #ifdef __LP64__
1317 	NFE_WRITE(sc, NFE_RX_RING_ADDR_HI, sc->rxq.physaddr >> 32);
1318 #endif
1319 	NFE_WRITE(sc, NFE_RX_RING_ADDR_LO, sc->rxq.physaddr & 0xffffffff);
1320 #ifdef __LP64__
1321 	NFE_WRITE(sc, NFE_TX_RING_ADDR_HI, sc->txq.physaddr >> 32);
1322 #endif
1323 	NFE_WRITE(sc, NFE_TX_RING_ADDR_LO, sc->txq.physaddr & 0xffffffff);
1324 
1325 	NFE_WRITE(sc, NFE_RING_SIZE,
1326 	    (sc->sc_rx_ring_count - 1) << 16 |
1327 	    (NFE_TX_RING_COUNT - 1));
1328 
1329 	NFE_WRITE(sc, NFE_RXBUFSZ, sc->rxq.bufsz);
1330 
1331 	/* force MAC to wakeup */
1332 	tmp = NFE_READ(sc, NFE_PWR_STATE);
1333 	NFE_WRITE(sc, NFE_PWR_STATE, tmp | NFE_PWR_WAKEUP);
1334 	DELAY(10);
1335 	tmp = NFE_READ(sc, NFE_PWR_STATE);
1336 	NFE_WRITE(sc, NFE_PWR_STATE, tmp | NFE_PWR_VALID);
1337 
1338 	/*
1339 	 * NFE_IMTIMER generates a periodic interrupt via NFE_IRQ_TIMER.
1340 	 * It is unclear how wide the timer is.  Base programming does
1341 	 * not seem to effect NFE_IRQ_TX_DONE or NFE_IRQ_RX_DONE so
1342 	 * we don't get any interrupt moderation.  TX moderation is
1343 	 * possible by using the timer interrupt instead of TX_DONE.
1344 	 *
1345 	 * It is unclear whether there are other bits that can be
1346 	 * set to make the NFE device actually do interrupt moderation
1347 	 * on the RX side.
1348 	 *
1349 	 * For now set a 128uS interval as a placemark, but don't use
1350 	 * the timer.
1351 	 */
1352 	if (sc->sc_imtime < 0)
1353 		NFE_WRITE(sc, NFE_IMTIMER, NFE_IMTIME_DEFAULT);
1354 	else
1355 		NFE_WRITE(sc, NFE_IMTIMER, NFE_IMTIME(sc->sc_imtime));
1356 
1357 	NFE_WRITE(sc, NFE_SETUP_R1, NFE_R1_MAGIC);
1358 	NFE_WRITE(sc, NFE_SETUP_R2, NFE_R2_MAGIC);
1359 	NFE_WRITE(sc, NFE_SETUP_R6, NFE_R6_MAGIC);
1360 
1361 	/* update MAC knowledge of PHY; generates a NFE_IRQ_LINK interrupt */
1362 	NFE_WRITE(sc, NFE_STATUS, sc->mii_phyaddr << 24 | NFE_STATUS_MAGIC);
1363 
1364 	NFE_WRITE(sc, NFE_SETUP_R4, NFE_R4_MAGIC);
1365 	NFE_WRITE(sc, NFE_WOL_CTL, NFE_WOL_MAGIC);
1366 
1367 	sc->rxtxctl &= ~NFE_RXTX_BIT2;
1368 	NFE_WRITE(sc, NFE_RXTX_CTL, sc->rxtxctl);
1369 	DELAY(10);
1370 	NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_BIT1 | sc->rxtxctl);
1371 
1372 	/* set Rx filter */
1373 	nfe_setmulti(sc);
1374 
1375 	nfe_ifmedia_upd(ifp);
1376 
1377 	/* enable Rx */
1378 	NFE_WRITE(sc, NFE_RX_CTL, NFE_RX_START);
1379 
1380 	/* enable Tx */
1381 	NFE_WRITE(sc, NFE_TX_CTL, NFE_TX_START);
1382 
1383 	NFE_WRITE(sc, NFE_PHY_STATUS, 0xf);
1384 
1385 #ifdef DEVICE_POLLING
1386 	if ((ifp->if_flags & IFF_POLLING) == 0)
1387 #endif
1388 	/* enable interrupts */
1389 	NFE_WRITE(sc, NFE_IRQ_MASK, sc->sc_irq_enable);
1390 
1391 	callout_reset(&sc->sc_tick_ch, hz, nfe_tick, sc);
1392 
1393 	ifp->if_flags |= IFF_RUNNING;
1394 	ifp->if_flags &= ~IFF_OACTIVE;
1395 
1396 	/*
1397 	 * If we had stuff in the tx ring before its all cleaned out now
1398 	 * so we are not going to get an interrupt, jump-start any pending
1399 	 * output.
1400 	 */
1401 	ifp->if_start(ifp);
1402 }
1403 
1404 static void
1405 nfe_stop(struct nfe_softc *sc)
1406 {
1407 	struct ifnet *ifp = &sc->arpcom.ac_if;
1408 
1409 	callout_stop(&sc->sc_tick_ch);
1410 
1411 	ifp->if_timer = 0;
1412 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1413 
1414 	/*
1415 	 * Are NFE_TX_CTL and NFE_RX_CTL polled by the chip microcontroller
1416 	 * or do they directly reset/terminate the DMA hardware?  Nobody
1417 	 * knows.
1418 	 *
1419 	 * Add two delays:
1420 	 *
1421 	 * (1) Delay before zeroing out NFE_TX_CTL.  This seems to help a
1422 	 * watchdog timeout that occurs after a stop/init sequence.  I am
1423 	 * theorizing that a TX KICK occuring just prior to a reinit (e.g.
1424 	 * due to dhclient) is queueing an interrupt to the microcontroller
1425 	 * which gets delayed until after we clear the control registers
1426 	 * down below, resulting in mass confusion.  TX KICK is clearly
1427 	 * hardware aided whereas the other bits in the control register
1428 	 * are more likely to be polled by the microcontroller.
1429 	 *
1430 	 * (2) Delay after zeroing out TX and RX CTL registers, under the
1431 	 * assumption that primary DMA is initiated and terminated by
1432 	 * the microcontroller and not hardware (and anyway, one can hardly
1433 	 * expect the DMA engine to just instantly stop!).  We don't want
1434 	 * to rip the rings out from under it before it has had a chance to
1435 	 * actually stop!
1436 	 */
1437 	DELAY(1000);
1438 
1439 	/* Abort Tx */
1440 	NFE_WRITE(sc, NFE_TX_CTL, 0);
1441 
1442 	/* Disable Rx */
1443 	NFE_WRITE(sc, NFE_RX_CTL, 0);
1444 
1445 	/* Disable interrupts */
1446 	NFE_WRITE(sc, NFE_IRQ_MASK, 0);
1447 
1448 	DELAY(1000);
1449 
1450 	/* Reset Tx and Rx rings */
1451 	nfe_reset_tx_ring(sc, &sc->txq);
1452 	nfe_reset_rx_ring(sc, &sc->rxq);
1453 }
1454 
1455 static int
1456 nfe_alloc_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1457 {
1458 	int i, j, error, descsize;
1459 	void **desc;
1460 
1461 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1462 		desc = (void **)&ring->desc64;
1463 		descsize = sizeof(struct nfe_desc64);
1464 	} else {
1465 		desc = (void **)&ring->desc32;
1466 		descsize = sizeof(struct nfe_desc32);
1467 	}
1468 
1469 	ring->jbuf = kmalloc(sizeof(struct nfe_jbuf) * NFE_JPOOL_COUNT,
1470 			     M_DEVBUF, M_WAITOK | M_ZERO);
1471 	ring->data = kmalloc(sizeof(struct nfe_rx_data) * sc->sc_rx_ring_count,
1472 			     M_DEVBUF, M_WAITOK | M_ZERO);
1473 
1474 	ring->bufsz = MCLBYTES;
1475 	ring->cur = ring->next = 0;
1476 
1477 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1478 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1479 				   NULL, NULL,
1480 				   sc->sc_rx_ring_count * descsize, 1,
1481 				   sc->sc_rx_ring_count * descsize,
1482 				   0, &ring->tag);
1483 	if (error) {
1484 		if_printf(&sc->arpcom.ac_if,
1485 			  "could not create desc RX DMA tag\n");
1486 		return error;
1487 	}
1488 
1489 	error = bus_dmamem_alloc(ring->tag, desc, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1490 				 &ring->map);
1491 	if (error) {
1492 		if_printf(&sc->arpcom.ac_if,
1493 			  "could not allocate RX desc DMA memory\n");
1494 		bus_dma_tag_destroy(ring->tag);
1495 		ring->tag = NULL;
1496 		return error;
1497 	}
1498 
1499 	error = bus_dmamap_load(ring->tag, ring->map, *desc,
1500 				sc->sc_rx_ring_count * descsize,
1501 				nfe_ring_dma_addr, &ring->physaddr,
1502 				BUS_DMA_WAITOK);
1503 	if (error) {
1504 		if_printf(&sc->arpcom.ac_if,
1505 			  "could not load RX desc DMA map\n");
1506 		bus_dmamem_free(ring->tag, *desc, ring->map);
1507 		bus_dma_tag_destroy(ring->tag);
1508 		ring->tag = NULL;
1509 		return error;
1510 	}
1511 
1512 	if (sc->sc_flags & NFE_JUMBO_SUP) {
1513 		error = nfe_jpool_alloc(sc, ring);
1514 		if (error) {
1515 			if_printf(&sc->arpcom.ac_if,
1516 				  "could not allocate jumbo frames\n");
1517 			return error;
1518 		}
1519 	}
1520 
1521 	error = bus_dma_tag_create(NULL, 1, 0,
1522 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1523 				   NULL, NULL,
1524 				   MCLBYTES, 1, MCLBYTES,
1525 				   0, &ring->data_tag);
1526 	if (error) {
1527 		if_printf(&sc->arpcom.ac_if,
1528 			  "could not create RX mbuf DMA tag\n");
1529 		return error;
1530 	}
1531 
1532 	/* Create a spare RX mbuf DMA map */
1533 	error = bus_dmamap_create(ring->data_tag, 0, &ring->data_tmpmap);
1534 	if (error) {
1535 		if_printf(&sc->arpcom.ac_if,
1536 			  "could not create spare RX mbuf DMA map\n");
1537 		bus_dma_tag_destroy(ring->data_tag);
1538 		ring->data_tag = NULL;
1539 		return error;
1540 	}
1541 
1542 	for (i = 0; i < sc->sc_rx_ring_count; i++) {
1543 		error = bus_dmamap_create(ring->data_tag, 0,
1544 					  &ring->data[i].map);
1545 		if (error) {
1546 			if_printf(&sc->arpcom.ac_if,
1547 				  "could not create %dth RX mbuf DMA mapn", i);
1548 			goto fail;
1549 		}
1550 	}
1551 	return 0;
1552 fail:
1553 	for (j = 0; j < i; ++j)
1554 		bus_dmamap_destroy(ring->data_tag, ring->data[i].map);
1555 	bus_dmamap_destroy(ring->data_tag, ring->data_tmpmap);
1556 	bus_dma_tag_destroy(ring->data_tag);
1557 	ring->data_tag = NULL;
1558 	return error;
1559 }
1560 
1561 static void
1562 nfe_reset_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1563 {
1564 	int i;
1565 
1566 	for (i = 0; i < sc->sc_rx_ring_count; i++) {
1567 		struct nfe_rx_data *data = &ring->data[i];
1568 
1569 		if (data->m != NULL) {
1570 			if ((sc->sc_flags & NFE_USE_JUMBO) == 0)
1571 				bus_dmamap_unload(ring->data_tag, data->map);
1572 			m_freem(data->m);
1573 			data->m = NULL;
1574 		}
1575 	}
1576 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1577 
1578 	ring->cur = ring->next = 0;
1579 }
1580 
1581 static int
1582 nfe_init_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1583 {
1584 	int i;
1585 
1586 	for (i = 0; i < sc->sc_rx_ring_count; ++i) {
1587 		int error;
1588 
1589 		/* XXX should use a function pointer */
1590 		if (sc->sc_flags & NFE_USE_JUMBO)
1591 			error = nfe_newbuf_jumbo(sc, ring, i, 1);
1592 		else
1593 			error = nfe_newbuf_std(sc, ring, i, 1);
1594 		if (error) {
1595 			if_printf(&sc->arpcom.ac_if,
1596 				  "could not allocate RX buffer\n");
1597 			return error;
1598 		}
1599 
1600 		nfe_set_ready_rxdesc(sc, ring, i);
1601 	}
1602 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1603 
1604 	return 0;
1605 }
1606 
1607 static void
1608 nfe_free_rx_ring(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1609 {
1610 	if (ring->data_tag != NULL) {
1611 		struct nfe_rx_data *data;
1612 		int i;
1613 
1614 		for (i = 0; i < sc->sc_rx_ring_count; i++) {
1615 			data = &ring->data[i];
1616 
1617 			if (data->m != NULL) {
1618 				bus_dmamap_unload(ring->data_tag, data->map);
1619 				m_freem(data->m);
1620 			}
1621 			bus_dmamap_destroy(ring->data_tag, data->map);
1622 		}
1623 		bus_dmamap_destroy(ring->data_tag, ring->data_tmpmap);
1624 		bus_dma_tag_destroy(ring->data_tag);
1625 	}
1626 
1627 	nfe_jpool_free(sc, ring);
1628 
1629 	if (ring->jbuf != NULL)
1630 		kfree(ring->jbuf, M_DEVBUF);
1631 	if (ring->data != NULL)
1632 		kfree(ring->data, M_DEVBUF);
1633 
1634 	if (ring->tag != NULL) {
1635 		void *desc;
1636 
1637 		if (sc->sc_flags & NFE_40BIT_ADDR)
1638 			desc = ring->desc64;
1639 		else
1640 			desc = ring->desc32;
1641 
1642 		bus_dmamap_unload(ring->tag, ring->map);
1643 		bus_dmamem_free(ring->tag, desc, ring->map);
1644 		bus_dma_tag_destroy(ring->tag);
1645 	}
1646 }
1647 
1648 static struct nfe_jbuf *
1649 nfe_jalloc(struct nfe_softc *sc)
1650 {
1651 	struct ifnet *ifp = &sc->arpcom.ac_if;
1652 	struct nfe_jbuf *jbuf;
1653 
1654 	lwkt_serialize_enter(&sc->sc_jbuf_serializer);
1655 
1656 	jbuf = SLIST_FIRST(&sc->rxq.jfreelist);
1657 	if (jbuf != NULL) {
1658 		SLIST_REMOVE_HEAD(&sc->rxq.jfreelist, jnext);
1659 		jbuf->inuse = 1;
1660 	} else {
1661 		if_printf(ifp, "no free jumbo buffer\n");
1662 	}
1663 
1664 	lwkt_serialize_exit(&sc->sc_jbuf_serializer);
1665 
1666 	return jbuf;
1667 }
1668 
1669 static void
1670 nfe_jfree(void *arg)
1671 {
1672 	struct nfe_jbuf *jbuf = arg;
1673 	struct nfe_softc *sc = jbuf->sc;
1674 	struct nfe_rx_ring *ring = jbuf->ring;
1675 
1676 	if (&ring->jbuf[jbuf->slot] != jbuf)
1677 		panic("%s: free wrong jumbo buffer\n", __func__);
1678 	else if (jbuf->inuse == 0)
1679 		panic("%s: jumbo buffer already freed\n", __func__);
1680 
1681 	lwkt_serialize_enter(&sc->sc_jbuf_serializer);
1682 	atomic_subtract_int(&jbuf->inuse, 1);
1683 	if (jbuf->inuse == 0)
1684 		SLIST_INSERT_HEAD(&ring->jfreelist, jbuf, jnext);
1685 	lwkt_serialize_exit(&sc->sc_jbuf_serializer);
1686 }
1687 
1688 static void
1689 nfe_jref(void *arg)
1690 {
1691 	struct nfe_jbuf *jbuf = arg;
1692 	struct nfe_rx_ring *ring = jbuf->ring;
1693 
1694 	if (&ring->jbuf[jbuf->slot] != jbuf)
1695 		panic("%s: ref wrong jumbo buffer\n", __func__);
1696 	else if (jbuf->inuse == 0)
1697 		panic("%s: jumbo buffer already freed\n", __func__);
1698 
1699 	atomic_add_int(&jbuf->inuse, 1);
1700 }
1701 
1702 static int
1703 nfe_jpool_alloc(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1704 {
1705 	struct nfe_jbuf *jbuf;
1706 	bus_addr_t physaddr;
1707 	caddr_t buf;
1708 	int i, error;
1709 
1710 	/*
1711 	 * Allocate a big chunk of DMA'able memory.
1712 	 */
1713 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1714 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1715 				   NULL, NULL,
1716 				   NFE_JPOOL_SIZE, 1, NFE_JPOOL_SIZE,
1717 				   0, &ring->jtag);
1718 	if (error) {
1719 		if_printf(&sc->arpcom.ac_if,
1720 			  "could not create jumbo DMA tag\n");
1721 		return error;
1722 	}
1723 
1724 	error = bus_dmamem_alloc(ring->jtag, (void **)&ring->jpool,
1725 				 BUS_DMA_WAITOK, &ring->jmap);
1726 	if (error) {
1727 		if_printf(&sc->arpcom.ac_if,
1728 			  "could not allocate jumbo DMA memory\n");
1729 		bus_dma_tag_destroy(ring->jtag);
1730 		ring->jtag = NULL;
1731 		return error;
1732 	}
1733 
1734 	error = bus_dmamap_load(ring->jtag, ring->jmap, ring->jpool,
1735 				NFE_JPOOL_SIZE, nfe_ring_dma_addr, &physaddr,
1736 				BUS_DMA_WAITOK);
1737 	if (error) {
1738 		if_printf(&sc->arpcom.ac_if,
1739 			  "could not load jumbo DMA map\n");
1740 		bus_dmamem_free(ring->jtag, ring->jpool, ring->jmap);
1741 		bus_dma_tag_destroy(ring->jtag);
1742 		ring->jtag = NULL;
1743 		return error;
1744 	}
1745 
1746 	/* ..and split it into 9KB chunks */
1747 	SLIST_INIT(&ring->jfreelist);
1748 
1749 	buf = ring->jpool;
1750 	for (i = 0; i < NFE_JPOOL_COUNT; i++) {
1751 		jbuf = &ring->jbuf[i];
1752 
1753 		jbuf->sc = sc;
1754 		jbuf->ring = ring;
1755 		jbuf->inuse = 0;
1756 		jbuf->slot = i;
1757 		jbuf->buf = buf;
1758 		jbuf->physaddr = physaddr;
1759 
1760 		SLIST_INSERT_HEAD(&ring->jfreelist, jbuf, jnext);
1761 
1762 		buf += NFE_JBYTES;
1763 		physaddr += NFE_JBYTES;
1764 	}
1765 
1766 	return 0;
1767 }
1768 
1769 static void
1770 nfe_jpool_free(struct nfe_softc *sc, struct nfe_rx_ring *ring)
1771 {
1772 	if (ring->jtag != NULL) {
1773 		bus_dmamap_unload(ring->jtag, ring->jmap);
1774 		bus_dmamem_free(ring->jtag, ring->jpool, ring->jmap);
1775 		bus_dma_tag_destroy(ring->jtag);
1776 	}
1777 }
1778 
1779 static int
1780 nfe_alloc_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1781 {
1782 	int i, j, error, descsize;
1783 	void **desc;
1784 
1785 	if (sc->sc_flags & NFE_40BIT_ADDR) {
1786 		desc = (void **)&ring->desc64;
1787 		descsize = sizeof(struct nfe_desc64);
1788 	} else {
1789 		desc = (void **)&ring->desc32;
1790 		descsize = sizeof(struct nfe_desc32);
1791 	}
1792 
1793 	ring->queued = 0;
1794 	ring->cur = ring->next = 0;
1795 
1796 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1797 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1798 				   NULL, NULL,
1799 				   NFE_TX_RING_COUNT * descsize, 1,
1800 				   NFE_TX_RING_COUNT * descsize,
1801 				   0, &ring->tag);
1802 	if (error) {
1803 		if_printf(&sc->arpcom.ac_if,
1804 			  "could not create TX desc DMA map\n");
1805 		return error;
1806 	}
1807 
1808 	error = bus_dmamem_alloc(ring->tag, desc, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1809 				 &ring->map);
1810 	if (error) {
1811 		if_printf(&sc->arpcom.ac_if,
1812 			  "could not allocate TX desc DMA memory\n");
1813 		bus_dma_tag_destroy(ring->tag);
1814 		ring->tag = NULL;
1815 		return error;
1816 	}
1817 
1818 	error = bus_dmamap_load(ring->tag, ring->map, *desc,
1819 				NFE_TX_RING_COUNT * descsize,
1820 				nfe_ring_dma_addr, &ring->physaddr,
1821 				BUS_DMA_WAITOK);
1822 	if (error) {
1823 		if_printf(&sc->arpcom.ac_if,
1824 			  "could not load TX desc DMA map\n");
1825 		bus_dmamem_free(ring->tag, *desc, ring->map);
1826 		bus_dma_tag_destroy(ring->tag);
1827 		ring->tag = NULL;
1828 		return error;
1829 	}
1830 
1831 	error = bus_dma_tag_create(NULL, PAGE_SIZE, 0,
1832 				   BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR,
1833 				   NULL, NULL,
1834 				   NFE_JBYTES * NFE_MAX_SCATTER,
1835 				   NFE_MAX_SCATTER, NFE_JBYTES,
1836 				   0, &ring->data_tag);
1837 	if (error) {
1838 		if_printf(&sc->arpcom.ac_if,
1839 			  "could not create TX buf DMA tag\n");
1840 		return error;
1841 	}
1842 
1843 	for (i = 0; i < NFE_TX_RING_COUNT; i++) {
1844 		error = bus_dmamap_create(ring->data_tag, 0,
1845 					  &ring->data[i].map);
1846 		if (error) {
1847 			if_printf(&sc->arpcom.ac_if,
1848 				  "could not create %dth TX buf DMA map\n", i);
1849 			goto fail;
1850 		}
1851 	}
1852 
1853 	return 0;
1854 fail:
1855 	for (j = 0; j < i; ++j)
1856 		bus_dmamap_destroy(ring->data_tag, ring->data[i].map);
1857 	bus_dma_tag_destroy(ring->data_tag);
1858 	ring->data_tag = NULL;
1859 	return error;
1860 }
1861 
1862 static void
1863 nfe_reset_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1864 {
1865 	int i;
1866 
1867 	for (i = 0; i < NFE_TX_RING_COUNT; i++) {
1868 		struct nfe_tx_data *data = &ring->data[i];
1869 
1870 		if (sc->sc_flags & NFE_40BIT_ADDR)
1871 			ring->desc64[i].flags = 0;
1872 		else
1873 			ring->desc32[i].flags = 0;
1874 
1875 		if (data->m != NULL) {
1876 			bus_dmamap_sync(ring->data_tag, data->map,
1877 					BUS_DMASYNC_POSTWRITE);
1878 			bus_dmamap_unload(ring->data_tag, data->map);
1879 			m_freem(data->m);
1880 			data->m = NULL;
1881 		}
1882 	}
1883 	bus_dmamap_sync(ring->tag, ring->map, BUS_DMASYNC_PREWRITE);
1884 
1885 	ring->queued = 0;
1886 	ring->cur = ring->next = 0;
1887 }
1888 
1889 static int
1890 nfe_init_tx_ring(struct nfe_softc *sc __unused,
1891 		 struct nfe_tx_ring *ring __unused)
1892 {
1893 	return 0;
1894 }
1895 
1896 static void
1897 nfe_free_tx_ring(struct nfe_softc *sc, struct nfe_tx_ring *ring)
1898 {
1899 	if (ring->data_tag != NULL) {
1900 		struct nfe_tx_data *data;
1901 		int i;
1902 
1903 		for (i = 0; i < NFE_TX_RING_COUNT; ++i) {
1904 			data = &ring->data[i];
1905 
1906 			if (data->m != NULL) {
1907 				bus_dmamap_unload(ring->data_tag, data->map);
1908 				m_freem(data->m);
1909 			}
1910 			bus_dmamap_destroy(ring->data_tag, data->map);
1911 		}
1912 
1913 		bus_dma_tag_destroy(ring->data_tag);
1914 	}
1915 
1916 	if (ring->tag != NULL) {
1917 		void *desc;
1918 
1919 		if (sc->sc_flags & NFE_40BIT_ADDR)
1920 			desc = ring->desc64;
1921 		else
1922 			desc = ring->desc32;
1923 
1924 		bus_dmamap_unload(ring->tag, ring->map);
1925 		bus_dmamem_free(ring->tag, desc, ring->map);
1926 		bus_dma_tag_destroy(ring->tag);
1927 	}
1928 }
1929 
1930 static int
1931 nfe_ifmedia_upd(struct ifnet *ifp)
1932 {
1933 	struct nfe_softc *sc = ifp->if_softc;
1934 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
1935 
1936 	if (mii->mii_instance != 0) {
1937 		struct mii_softc *miisc;
1938 
1939 		LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
1940 			mii_phy_reset(miisc);
1941 	}
1942 	mii_mediachg(mii);
1943 
1944 	return 0;
1945 }
1946 
1947 static void
1948 nfe_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
1949 {
1950 	struct nfe_softc *sc = ifp->if_softc;
1951 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
1952 
1953 	mii_pollstat(mii);
1954 	ifmr->ifm_status = mii->mii_media_status;
1955 	ifmr->ifm_active = mii->mii_media_active;
1956 }
1957 
1958 static void
1959 nfe_setmulti(struct nfe_softc *sc)
1960 {
1961 	struct ifnet *ifp = &sc->arpcom.ac_if;
1962 	struct ifmultiaddr *ifma;
1963 	uint8_t addr[ETHER_ADDR_LEN], mask[ETHER_ADDR_LEN];
1964 	uint32_t filter = NFE_RXFILTER_MAGIC;
1965 	int i;
1966 
1967 	if ((ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) != 0) {
1968 		bzero(addr, ETHER_ADDR_LEN);
1969 		bzero(mask, ETHER_ADDR_LEN);
1970 		goto done;
1971 	}
1972 
1973 	bcopy(etherbroadcastaddr, addr, ETHER_ADDR_LEN);
1974 	bcopy(etherbroadcastaddr, mask, ETHER_ADDR_LEN);
1975 
1976 	LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1977 		caddr_t maddr;
1978 
1979 		if (ifma->ifma_addr->sa_family != AF_LINK)
1980 			continue;
1981 
1982 		maddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
1983 		for (i = 0; i < ETHER_ADDR_LEN; i++) {
1984 			addr[i] &= maddr[i];
1985 			mask[i] &= ~maddr[i];
1986 		}
1987 	}
1988 
1989 	for (i = 0; i < ETHER_ADDR_LEN; i++)
1990 		mask[i] |= addr[i];
1991 
1992 done:
1993 	addr[0] |= 0x01;	/* make sure multicast bit is set */
1994 
1995 	NFE_WRITE(sc, NFE_MULTIADDR_HI,
1996 	    addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0]);
1997 	NFE_WRITE(sc, NFE_MULTIADDR_LO,
1998 	    addr[5] <<  8 | addr[4]);
1999 	NFE_WRITE(sc, NFE_MULTIMASK_HI,
2000 	    mask[3] << 24 | mask[2] << 16 | mask[1] << 8 | mask[0]);
2001 	NFE_WRITE(sc, NFE_MULTIMASK_LO,
2002 	    mask[5] <<  8 | mask[4]);
2003 
2004 	filter |= (ifp->if_flags & IFF_PROMISC) ? NFE_PROMISC : NFE_U2M;
2005 	NFE_WRITE(sc, NFE_RXFILTER, filter);
2006 }
2007 
2008 static void
2009 nfe_get_macaddr(struct nfe_softc *sc, uint8_t *addr)
2010 {
2011 	uint32_t tmp;
2012 
2013 	tmp = NFE_READ(sc, NFE_MACADDR_LO);
2014 	addr[0] = (tmp >> 8) & 0xff;
2015 	addr[1] = (tmp & 0xff);
2016 
2017 	tmp = NFE_READ(sc, NFE_MACADDR_HI);
2018 	addr[2] = (tmp >> 24) & 0xff;
2019 	addr[3] = (tmp >> 16) & 0xff;
2020 	addr[4] = (tmp >>  8) & 0xff;
2021 	addr[5] = (tmp & 0xff);
2022 }
2023 
2024 static void
2025 nfe_set_macaddr(struct nfe_softc *sc, const uint8_t *addr)
2026 {
2027 	NFE_WRITE(sc, NFE_MACADDR_LO,
2028 	    addr[5] <<  8 | addr[4]);
2029 	NFE_WRITE(sc, NFE_MACADDR_HI,
2030 	    addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0]);
2031 }
2032 
2033 static void
2034 nfe_tick(void *arg)
2035 {
2036 	struct nfe_softc *sc = arg;
2037 	struct ifnet *ifp = &sc->arpcom.ac_if;
2038 	struct mii_data *mii = device_get_softc(sc->sc_miibus);
2039 
2040 	lwkt_serialize_enter(ifp->if_serializer);
2041 
2042 	mii_tick(mii);
2043 	callout_reset(&sc->sc_tick_ch, hz, nfe_tick, sc);
2044 
2045 	lwkt_serialize_exit(ifp->if_serializer);
2046 }
2047 
2048 static void
2049 nfe_ring_dma_addr(void *arg, bus_dma_segment_t *seg, int nseg, int error)
2050 {
2051 	if (error)
2052 		return;
2053 
2054 	KASSERT(nseg == 1, ("too many segments, should be 1\n"));
2055 
2056 	*((uint32_t *)arg) = seg->ds_addr;
2057 }
2058 
2059 static void
2060 nfe_buf_dma_addr(void *arg, bus_dma_segment_t *segs, int nsegs,
2061 		 bus_size_t mapsz __unused, int error)
2062 {
2063 	struct nfe_dma_ctx *ctx = arg;
2064 	int i;
2065 
2066 	if (error)
2067 		return;
2068 
2069 	KASSERT(nsegs <= ctx->nsegs,
2070 		("too many segments(%d), should be <= %d\n",
2071 		 nsegs, ctx->nsegs));
2072 
2073 	ctx->nsegs = nsegs;
2074 	for (i = 0; i < nsegs; ++i)
2075 		ctx->segs[i] = segs[i];
2076 }
2077 
2078 static int
2079 nfe_newbuf_std(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2080 	       int wait)
2081 {
2082 	struct nfe_rx_data *data = &ring->data[idx];
2083 	struct nfe_dma_ctx ctx;
2084 	bus_dma_segment_t seg;
2085 	bus_dmamap_t map;
2086 	struct mbuf *m;
2087 	int error;
2088 
2089 	m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR);
2090 	if (m == NULL)
2091 		return ENOBUFS;
2092 	m->m_len = m->m_pkthdr.len = MCLBYTES;
2093 
2094 	ctx.nsegs = 1;
2095 	ctx.segs = &seg;
2096 	error = bus_dmamap_load_mbuf(ring->data_tag, ring->data_tmpmap,
2097 				     m, nfe_buf_dma_addr, &ctx,
2098 				     wait ? BUS_DMA_WAITOK : BUS_DMA_NOWAIT);
2099 	if (error) {
2100 		m_freem(m);
2101 		if_printf(&sc->arpcom.ac_if, "could map RX mbuf %d\n", error);
2102 		return error;
2103 	}
2104 
2105 	/* Unload originally mapped mbuf */
2106 	bus_dmamap_unload(ring->data_tag, data->map);
2107 
2108 	/* Swap this DMA map with tmp DMA map */
2109 	map = data->map;
2110 	data->map = ring->data_tmpmap;
2111 	ring->data_tmpmap = map;
2112 
2113 	/* Caller is assumed to have collected the old mbuf */
2114 	data->m = m;
2115 
2116 	nfe_set_paddr_rxdesc(sc, ring, idx, seg.ds_addr);
2117 
2118 	bus_dmamap_sync(ring->data_tag, data->map, BUS_DMASYNC_PREREAD);
2119 	return 0;
2120 }
2121 
2122 static int
2123 nfe_newbuf_jumbo(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2124 		 int wait)
2125 {
2126 	struct nfe_rx_data *data = &ring->data[idx];
2127 	struct nfe_jbuf *jbuf;
2128 	struct mbuf *m;
2129 
2130 	MGETHDR(m, wait ? MB_WAIT : MB_DONTWAIT, MT_DATA);
2131 	if (m == NULL)
2132 		return ENOBUFS;
2133 
2134 	jbuf = nfe_jalloc(sc);
2135 	if (jbuf == NULL) {
2136 		m_freem(m);
2137 		if_printf(&sc->arpcom.ac_if, "jumbo allocation failed "
2138 		    "-- packet dropped!\n");
2139 		return ENOBUFS;
2140 	}
2141 
2142 	m->m_ext.ext_arg = jbuf;
2143 	m->m_ext.ext_buf = jbuf->buf;
2144 	m->m_ext.ext_free = nfe_jfree;
2145 	m->m_ext.ext_ref = nfe_jref;
2146 	m->m_ext.ext_size = NFE_JBYTES;
2147 
2148 	m->m_data = m->m_ext.ext_buf;
2149 	m->m_flags |= M_EXT;
2150 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
2151 
2152 	/* Caller is assumed to have collected the old mbuf */
2153 	data->m = m;
2154 
2155 	nfe_set_paddr_rxdesc(sc, ring, idx, jbuf->physaddr);
2156 
2157 	bus_dmamap_sync(ring->jtag, ring->jmap, BUS_DMASYNC_PREREAD);
2158 	return 0;
2159 }
2160 
2161 static void
2162 nfe_set_paddr_rxdesc(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx,
2163 		     bus_addr_t physaddr)
2164 {
2165 	if (sc->sc_flags & NFE_40BIT_ADDR) {
2166 		struct nfe_desc64 *desc64 = &ring->desc64[idx];
2167 
2168 #if defined(__LP64__)
2169 		desc64->physaddr[0] = htole32(physaddr >> 32);
2170 #endif
2171 		desc64->physaddr[1] = htole32(physaddr & 0xffffffff);
2172 	} else {
2173 		struct nfe_desc32 *desc32 = &ring->desc32[idx];
2174 
2175 		desc32->physaddr = htole32(physaddr);
2176 	}
2177 }
2178 
2179 static void
2180 nfe_set_ready_rxdesc(struct nfe_softc *sc, struct nfe_rx_ring *ring, int idx)
2181 {
2182 	if (sc->sc_flags & NFE_40BIT_ADDR) {
2183 		struct nfe_desc64 *desc64 = &ring->desc64[idx];
2184 
2185 		desc64->length = htole16(ring->bufsz);
2186 		desc64->flags = htole16(NFE_RX_READY);
2187 	} else {
2188 		struct nfe_desc32 *desc32 = &ring->desc32[idx];
2189 
2190 		desc32->length = htole16(ring->bufsz);
2191 		desc32->flags = htole16(NFE_RX_READY);
2192 	}
2193 }
2194 
2195 static int
2196 nfe_sysctl_imtime(SYSCTL_HANDLER_ARGS)
2197 {
2198 	struct nfe_softc *sc = arg1;
2199 	struct ifnet *ifp = &sc->arpcom.ac_if;
2200 	int error, v;
2201 
2202 	lwkt_serialize_enter(ifp->if_serializer);
2203 
2204 	v = sc->sc_imtime;
2205 	error = sysctl_handle_int(oidp, &v, 0, req);
2206 	if (error || req->newptr == NULL)
2207 		goto back;
2208 	if (v == 0) {
2209 		error = EINVAL;
2210 		goto back;
2211 	}
2212 
2213 	if (sc->sc_imtime != v) {
2214 		int old_imtime = sc->sc_imtime;
2215 
2216 		sc->sc_imtime = v;
2217 		sc->sc_irq_enable = NFE_IRQ_ENABLE(sc);
2218 
2219 		if ((ifp->if_flags & (IFF_POLLING | IFF_RUNNING))
2220 		    == IFF_RUNNING) {
2221 			if (old_imtime > 0 && sc->sc_imtime > 0) {
2222 				NFE_WRITE(sc, NFE_IMTIMER,
2223 					  NFE_IMTIME(sc->sc_imtime));
2224 			} else if ((old_imtime * sc->sc_imtime) < 0) {
2225 				ifp->if_init(sc);
2226 			}
2227 		}
2228 	}
2229 back:
2230 	lwkt_serialize_exit(ifp->if_serializer);
2231 	return error;
2232 }
2233