xref: /dragonfly/sys/dev/netif/bnx/if_bnx.c (revision 245bd6bc)
1 /*
2  * Copyright (c) 2001 Wind River Systems
3  * Copyright (c) 1997, 1998, 1999, 2001
4  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by Bill Paul.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * $FreeBSD: src/sys/dev/bge/if_bge.c,v 1.3.2.39 2005/07/03 03:41:18 silby Exp $
34  */
35 
36 #include "opt_bnx.h"
37 #include "opt_ifpoll.h"
38 
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/endian.h>
42 #include <sys/kernel.h>
43 #include <sys/interrupt.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/rman.h>
48 #include <sys/serialize.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 
53 #include <netinet/ip.h>
54 #include <netinet/tcp.h>
55 
56 #include <net/bpf.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_poll.h>
63 #include <net/if_types.h>
64 #include <net/ifq_var.h>
65 #include <net/if_ringmap.h>
66 #include <net/toeplitz.h>
67 #include <net/toeplitz2.h>
68 #include <net/vlan/if_vlan_var.h>
69 #include <net/vlan/if_vlan_ether.h>
70 
71 #include <dev/netif/mii_layer/mii.h>
72 #include <dev/netif/mii_layer/miivar.h>
73 #include <dev/netif/mii_layer/brgphyreg.h>
74 
75 #include "pcidevs.h"
76 #include <bus/pci/pcireg.h>
77 #include <bus/pci/pcivar.h>
78 
79 #include <dev/netif/bge/if_bgereg.h>
80 #include <dev/netif/bnx/if_bnxvar.h>
81 
82 /* "device miibus" required.  See GENERIC if you get errors here. */
83 #include "miibus_if.h"
84 
85 #define BNX_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
86 
87 #define	BNX_RESET_SHUTDOWN	0
88 #define	BNX_RESET_START		1
89 #define	BNX_RESET_SUSPEND	2
90 
91 #define BNX_INTR_CKINTVL	((10 * hz) / 1000)	/* 10ms */
92 
93 #ifdef BNX_RSS_DEBUG
94 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...) \
95 do { \
96 	if (sc->bnx_rss_debug >= lvl) \
97 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
98 } while (0)
99 #else	/* !BNX_RSS_DEBUG */
100 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
101 #endif	/* BNX_RSS_DEBUG */
102 
103 static const struct bnx_type {
104 	uint16_t		bnx_vid;
105 	uint16_t		bnx_did;
106 	char			*bnx_name;
107 } bnx_devs[] = {
108 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717,
109 		"Broadcom BCM5717 Gigabit Ethernet" },
110 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717C,
111 		"Broadcom BCM5717C Gigabit Ethernet" },
112 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5718,
113 		"Broadcom BCM5718 Gigabit Ethernet" },
114 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5719,
115 		"Broadcom BCM5719 Gigabit Ethernet" },
116 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5720_ALT,
117 		"Broadcom BCM5720 Gigabit Ethernet" },
118 
119 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5725,
120 		"Broadcom BCM5725 Gigabit Ethernet" },
121 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5727,
122 		"Broadcom BCM5727 Gigabit Ethernet" },
123 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5762,
124 		"Broadcom BCM5762 Gigabit Ethernet" },
125 
126 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57761,
127 		"Broadcom BCM57761 Gigabit Ethernet" },
128 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57762,
129 		"Broadcom BCM57762 Gigabit Ethernet" },
130 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57764,
131 		"Broadcom BCM57764 Gigabit Ethernet" },
132 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57765,
133 		"Broadcom BCM57765 Gigabit Ethernet" },
134 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57766,
135 		"Broadcom BCM57766 Gigabit Ethernet" },
136 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57767,
137 		"Broadcom BCM57767 Gigabit Ethernet" },
138 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57781,
139 		"Broadcom BCM57781 Gigabit Ethernet" },
140 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57782,
141 		"Broadcom BCM57782 Gigabit Ethernet" },
142 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57785,
143 		"Broadcom BCM57785 Gigabit Ethernet" },
144 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57786,
145 		"Broadcom BCM57786 Gigabit Ethernet" },
146 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57787,
147 		"Broadcom BCM57787 Gigabit Ethernet" },
148 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57791,
149 		"Broadcom BCM57791 Fast Ethernet" },
150 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57795,
151 		"Broadcom BCM57795 Fast Ethernet" },
152 
153 	{ 0, 0, NULL }
154 };
155 
156 static const int bnx_tx_mailbox[BNX_TX_RING_MAX] = {
157 	BGE_MBX_TX_HOST_PROD0_LO,
158 	BGE_MBX_TX_HOST_PROD0_HI,
159 	BGE_MBX_TX_HOST_PROD1_LO,
160 	BGE_MBX_TX_HOST_PROD1_HI
161 };
162 
163 #define BNX_IS_JUMBO_CAPABLE(sc)	((sc)->bnx_flags & BNX_FLAG_JUMBO)
164 #define BNX_IS_5717_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_5717_PLUS)
165 #define BNX_IS_57765_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_57765_PLUS)
166 #define BNX_IS_57765_FAMILY(sc)	 \
167 	((sc)->bnx_flags & BNX_FLAG_57765_FAMILY)
168 
169 typedef int	(*bnx_eaddr_fcn_t)(struct bnx_softc *, uint8_t[]);
170 
171 static int	bnx_probe(device_t);
172 static int	bnx_attach(device_t);
173 static int	bnx_detach(device_t);
174 static void	bnx_shutdown(device_t);
175 static int	bnx_suspend(device_t);
176 static int	bnx_resume(device_t);
177 static int	bnx_miibus_readreg(device_t, int, int);
178 static int	bnx_miibus_writereg(device_t, int, int, int);
179 static void	bnx_miibus_statchg(device_t);
180 
181 static int	bnx_handle_status(struct bnx_softc *);
182 #ifdef IFPOLL_ENABLE
183 static void	bnx_npoll(struct ifnet *, struct ifpoll_info *);
184 static void	bnx_npoll_rx(struct ifnet *, void *, int);
185 static void	bnx_npoll_tx(struct ifnet *, void *, int);
186 static void	bnx_npoll_tx_notag(struct ifnet *, void *, int);
187 static void	bnx_npoll_status(struct ifnet *);
188 static void	bnx_npoll_status_notag(struct ifnet *);
189 #endif
190 static void	bnx_intr_legacy(void *);
191 static void	bnx_msi(void *);
192 static void	bnx_intr(struct bnx_softc *);
193 static void	bnx_msix_status(void *);
194 static void	bnx_msix_tx_status(void *);
195 static void	bnx_msix_rx(void *);
196 static void	bnx_msix_rxtx(void *);
197 static void	bnx_enable_intr(struct bnx_softc *);
198 static void	bnx_disable_intr(struct bnx_softc *);
199 static void	bnx_txeof(struct bnx_tx_ring *, uint16_t);
200 static void	bnx_rxeof(struct bnx_rx_ret_ring *, uint16_t, int);
201 static int	bnx_alloc_intr(struct bnx_softc *);
202 static int	bnx_setup_intr(struct bnx_softc *);
203 static void	bnx_free_intr(struct bnx_softc *);
204 static void	bnx_teardown_intr(struct bnx_softc *, int);
205 static int	bnx_alloc_msix(struct bnx_softc *);
206 static void	bnx_free_msix(struct bnx_softc *, boolean_t);
207 static void	bnx_check_intr_rxtx(void *);
208 static void	bnx_check_intr_rx(void *);
209 static void	bnx_check_intr_tx(void *);
210 static void	bnx_rx_std_refill_ithread(void *);
211 static void	bnx_rx_std_refill(void *, void *);
212 static void	bnx_rx_std_refill_sched_ipi(void *);
213 static void	bnx_rx_std_refill_stop(void *);
214 static void	bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *,
215 		    struct bnx_rx_std_ring *);
216 
217 static void	bnx_start(struct ifnet *, struct ifaltq_subque *);
218 static int	bnx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
219 static void	bnx_init(void *);
220 static void	bnx_stop(struct bnx_softc *);
221 static void	bnx_watchdog(struct ifaltq_subque *);
222 static int	bnx_ifmedia_upd(struct ifnet *);
223 static void	bnx_ifmedia_sts(struct ifnet *, struct ifmediareq *);
224 static void	bnx_tick(void *);
225 static void	bnx_serialize(struct ifnet *, enum ifnet_serialize);
226 static void	bnx_deserialize(struct ifnet *, enum ifnet_serialize);
227 static int	bnx_tryserialize(struct ifnet *, enum ifnet_serialize);
228 #ifdef INVARIANTS
229 static void	bnx_serialize_assert(struct ifnet *, enum ifnet_serialize,
230 		    boolean_t);
231 #endif
232 static void	bnx_serialize_skipmain(struct bnx_softc *);
233 static void	bnx_deserialize_skipmain(struct bnx_softc *sc);
234 
235 static int	bnx_alloc_jumbo_mem(struct bnx_softc *);
236 static void	bnx_free_jumbo_mem(struct bnx_softc *);
237 static struct bnx_jslot
238 		*bnx_jalloc(struct bnx_softc *);
239 static void	bnx_jfree(void *);
240 static void	bnx_jref(void *);
241 static int	bnx_newbuf_std(struct bnx_rx_ret_ring *, int, int);
242 static int	bnx_newbuf_jumbo(struct bnx_softc *, int, int);
243 static void	bnx_setup_rxdesc_std(struct bnx_rx_std_ring *, int);
244 static void	bnx_setup_rxdesc_jumbo(struct bnx_softc *, int);
245 static int	bnx_init_rx_ring_std(struct bnx_rx_std_ring *);
246 static void	bnx_free_rx_ring_std(struct bnx_rx_std_ring *);
247 static int	bnx_init_rx_ring_jumbo(struct bnx_softc *);
248 static void	bnx_free_rx_ring_jumbo(struct bnx_softc *);
249 static void	bnx_free_tx_ring(struct bnx_tx_ring *);
250 static int	bnx_init_tx_ring(struct bnx_tx_ring *);
251 static int	bnx_create_tx_ring(struct bnx_tx_ring *);
252 static void	bnx_destroy_tx_ring(struct bnx_tx_ring *);
253 static int	bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *);
254 static void	bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *);
255 static int	bnx_dma_alloc(device_t);
256 static void	bnx_dma_free(struct bnx_softc *);
257 static int	bnx_dma_block_alloc(struct bnx_softc *, bus_size_t,
258 		    bus_dma_tag_t *, bus_dmamap_t *, void **, bus_addr_t *);
259 static void	bnx_dma_block_free(bus_dma_tag_t, bus_dmamap_t, void *);
260 static struct mbuf *
261 		bnx_defrag_shortdma(struct mbuf *);
262 static int	bnx_encap(struct bnx_tx_ring *, struct mbuf **,
263 		    uint32_t *, int *);
264 static int	bnx_setup_tso(struct bnx_tx_ring *, struct mbuf **,
265 		    uint16_t *, uint16_t *);
266 static void	bnx_setup_serialize(struct bnx_softc *);
267 static void	bnx_set_tick_cpuid(struct bnx_softc *, boolean_t);
268 static void	bnx_setup_ring_cnt(struct bnx_softc *);
269 
270 static struct pktinfo *bnx_rss_info(struct pktinfo *,
271 		    const struct bge_rx_bd *);
272 static void	bnx_init_rss(struct bnx_softc *);
273 static void	bnx_reset(struct bnx_softc *);
274 static int	bnx_chipinit(struct bnx_softc *);
275 static int	bnx_blockinit(struct bnx_softc *);
276 static void	bnx_stop_block(struct bnx_softc *, bus_size_t, uint32_t);
277 static void	bnx_enable_msi(struct bnx_softc *, boolean_t);
278 static void	bnx_setmulti(struct bnx_softc *);
279 static void	bnx_setpromisc(struct bnx_softc *);
280 static void	bnx_stats_update_regs(struct bnx_softc *);
281 static uint32_t	bnx_dma_swap_options(struct bnx_softc *);
282 
283 static uint32_t	bnx_readmem_ind(struct bnx_softc *, uint32_t);
284 static void	bnx_writemem_ind(struct bnx_softc *, uint32_t, uint32_t);
285 #ifdef notdef
286 static uint32_t	bnx_readreg_ind(struct bnx_softc *, uint32_t);
287 #endif
288 static void	bnx_writemem_direct(struct bnx_softc *, uint32_t, uint32_t);
289 static void	bnx_writembx(struct bnx_softc *, int, int);
290 static int	bnx_read_nvram(struct bnx_softc *, caddr_t, int, int);
291 static uint8_t	bnx_eeprom_getbyte(struct bnx_softc *, uint32_t, uint8_t *);
292 static int	bnx_read_eeprom(struct bnx_softc *, caddr_t, uint32_t, size_t);
293 
294 static void	bnx_tbi_link_upd(struct bnx_softc *, uint32_t);
295 static void	bnx_copper_link_upd(struct bnx_softc *, uint32_t);
296 static void	bnx_autopoll_link_upd(struct bnx_softc *, uint32_t);
297 static void	bnx_link_poll(struct bnx_softc *);
298 
299 static int	bnx_get_eaddr_mem(struct bnx_softc *, uint8_t[]);
300 static int	bnx_get_eaddr_nvram(struct bnx_softc *, uint8_t[]);
301 static int	bnx_get_eaddr_eeprom(struct bnx_softc *, uint8_t[]);
302 static int	bnx_get_eaddr(struct bnx_softc *, uint8_t[]);
303 
304 static void	bnx_coal_change(struct bnx_softc *);
305 static int	bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS);
306 static int	bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS);
307 static int	bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS);
308 static int	bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS);
309 static int	bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS);
310 static int	bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
311 static int	bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS);
312 static int	bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
313 static int	bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS);
314 static int	bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS);
315 static int	bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *,
316 		    int, int, uint32_t);
317 static int	bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS);
318 
319 static void	bnx_sig_post_reset(struct bnx_softc *, int);
320 static void	bnx_sig_pre_reset(struct bnx_softc *, int);
321 static void	bnx_ape_lock_init(struct bnx_softc *);
322 static void	bnx_ape_read_fw_ver(struct bnx_softc *);
323 static int	bnx_ape_lock(struct bnx_softc *, int);
324 static void	bnx_ape_unlock(struct bnx_softc *, int);
325 static void	bnx_ape_send_event(struct bnx_softc *, uint32_t);
326 static void	bnx_ape_driver_state_change(struct bnx_softc *, int);
327 
328 static int	bnx_msi_enable = 1;
329 static int	bnx_msix_enable = 1;
330 
331 static int	bnx_rx_rings = 0; /* auto */
332 static int	bnx_tx_rings = 0; /* auto */
333 
334 TUNABLE_INT("hw.bnx.msi.enable", &bnx_msi_enable);
335 TUNABLE_INT("hw.bnx.msix.enable", &bnx_msix_enable);
336 TUNABLE_INT("hw.bnx.rx_rings", &bnx_rx_rings);
337 TUNABLE_INT("hw.bnx.tx_rings", &bnx_tx_rings);
338 
339 static device_method_t bnx_methods[] = {
340 	/* Device interface */
341 	DEVMETHOD(device_probe,		bnx_probe),
342 	DEVMETHOD(device_attach,	bnx_attach),
343 	DEVMETHOD(device_detach,	bnx_detach),
344 	DEVMETHOD(device_shutdown,	bnx_shutdown),
345 	DEVMETHOD(device_suspend,	bnx_suspend),
346 	DEVMETHOD(device_resume,	bnx_resume),
347 
348 	/* bus interface */
349 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
350 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
351 
352 	/* MII interface */
353 	DEVMETHOD(miibus_readreg,	bnx_miibus_readreg),
354 	DEVMETHOD(miibus_writereg,	bnx_miibus_writereg),
355 	DEVMETHOD(miibus_statchg,	bnx_miibus_statchg),
356 
357 	DEVMETHOD_END
358 };
359 
360 static DEFINE_CLASS_0(bnx, bnx_driver, bnx_methods, sizeof(struct bnx_softc));
361 static devclass_t bnx_devclass;
362 
363 DECLARE_DUMMY_MODULE(if_bnx);
364 MODULE_DEPEND(if_bnx, miibus, 1, 1, 1);
365 DRIVER_MODULE(if_bnx, pci, bnx_driver, bnx_devclass, NULL, NULL);
366 DRIVER_MODULE(miibus, bnx, miibus_driver, miibus_devclass, NULL, NULL);
367 
368 static uint32_t
369 bnx_readmem_ind(struct bnx_softc *sc, uint32_t off)
370 {
371 	device_t dev = sc->bnx_dev;
372 	uint32_t val;
373 
374 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
375 	val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4);
376 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
377 	return (val);
378 }
379 
380 static void
381 bnx_writemem_ind(struct bnx_softc *sc, uint32_t off, uint32_t val)
382 {
383 	device_t dev = sc->bnx_dev;
384 
385 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
386 	pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4);
387 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
388 }
389 
390 static void
391 bnx_writemem_direct(struct bnx_softc *sc, uint32_t off, uint32_t val)
392 {
393 	CSR_WRITE_4(sc, off, val);
394 }
395 
396 static void
397 bnx_writembx(struct bnx_softc *sc, int off, int val)
398 {
399 	CSR_WRITE_4(sc, off, val);
400 }
401 
402 /*
403  * Read a sequence of bytes from NVRAM.
404  */
405 static int
406 bnx_read_nvram(struct bnx_softc *sc, caddr_t dest, int off, int cnt)
407 {
408 	return (1);
409 }
410 
411 /*
412  * Read a byte of data stored in the EEPROM at address 'addr.' The
413  * BCM570x supports both the traditional bitbang interface and an
414  * auto access interface for reading the EEPROM. We use the auto
415  * access method.
416  */
417 static uint8_t
418 bnx_eeprom_getbyte(struct bnx_softc *sc, uint32_t addr, uint8_t *dest)
419 {
420 	int i;
421 	uint32_t byte = 0;
422 
423 	/*
424 	 * Enable use of auto EEPROM access so we can avoid
425 	 * having to use the bitbang method.
426 	 */
427 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM);
428 
429 	/* Reset the EEPROM, load the clock period. */
430 	CSR_WRITE_4(sc, BGE_EE_ADDR,
431 	    BGE_EEADDR_RESET|BGE_EEHALFCLK(BGE_HALFCLK_384SCL));
432 	DELAY(20);
433 
434 	/* Issue the read EEPROM command. */
435 	CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr);
436 
437 	/* Wait for completion */
438 	for(i = 0; i < BNX_TIMEOUT * 10; i++) {
439 		DELAY(10);
440 		if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE)
441 			break;
442 	}
443 
444 	if (i == BNX_TIMEOUT) {
445 		if_printf(&sc->arpcom.ac_if, "eeprom read timed out\n");
446 		return(1);
447 	}
448 
449 	/* Get result. */
450 	byte = CSR_READ_4(sc, BGE_EE_DATA);
451 
452         *dest = (byte >> ((addr % 4) * 8)) & 0xFF;
453 
454 	return(0);
455 }
456 
457 /*
458  * Read a sequence of bytes from the EEPROM.
459  */
460 static int
461 bnx_read_eeprom(struct bnx_softc *sc, caddr_t dest, uint32_t off, size_t len)
462 {
463 	size_t i;
464 	int err;
465 	uint8_t byte;
466 
467 	for (byte = 0, err = 0, i = 0; i < len; i++) {
468 		err = bnx_eeprom_getbyte(sc, off + i, &byte);
469 		if (err)
470 			break;
471 		*(dest + i) = byte;
472 	}
473 
474 	return(err ? 1 : 0);
475 }
476 
477 static int
478 bnx_miibus_readreg(device_t dev, int phy, int reg)
479 {
480 	struct bnx_softc *sc = device_get_softc(dev);
481 	uint32_t val;
482 	int i;
483 
484 	KASSERT(phy == sc->bnx_phyno,
485 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
486 
487 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
488 		return 0;
489 
490 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
491 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
492 		CSR_WRITE_4(sc, BGE_MI_MODE,
493 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
494 		DELAY(80);
495 	}
496 
497 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY |
498 	    BGE_MIPHY(phy) | BGE_MIREG(reg));
499 
500 	/* Poll for the PHY register access to complete. */
501 	for (i = 0; i < BNX_TIMEOUT; i++) {
502 		DELAY(10);
503 		val = CSR_READ_4(sc, BGE_MI_COMM);
504 		if ((val & BGE_MICOMM_BUSY) == 0) {
505 			DELAY(5);
506 			val = CSR_READ_4(sc, BGE_MI_COMM);
507 			break;
508 		}
509 	}
510 	if (i == BNX_TIMEOUT) {
511 		if_printf(&sc->arpcom.ac_if, "PHY read timed out "
512 		    "(phy %d, reg %d, val 0x%08x)\n", phy, reg, val);
513 		val = 0;
514 	}
515 
516 	/* Restore the autopoll bit if necessary. */
517 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
518 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
519 		DELAY(80);
520 	}
521 
522 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
523 
524 	if (val & BGE_MICOMM_READFAIL)
525 		return 0;
526 
527 	return (val & 0xFFFF);
528 }
529 
530 static int
531 bnx_miibus_writereg(device_t dev, int phy, int reg, int val)
532 {
533 	struct bnx_softc *sc = device_get_softc(dev);
534 	int i;
535 
536 	KASSERT(phy == sc->bnx_phyno,
537 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
538 
539 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
540 		return 0;
541 
542 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
543 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
544 		CSR_WRITE_4(sc, BGE_MI_MODE,
545 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
546 		DELAY(80);
547 	}
548 
549 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY |
550 	    BGE_MIPHY(phy) | BGE_MIREG(reg) | val);
551 
552 	for (i = 0; i < BNX_TIMEOUT; i++) {
553 		DELAY(10);
554 		if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) {
555 			DELAY(5);
556 			CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */
557 			break;
558 		}
559 	}
560 	if (i == BNX_TIMEOUT) {
561 		if_printf(&sc->arpcom.ac_if, "PHY write timed out "
562 		    "(phy %d, reg %d, val %d)\n", phy, reg, val);
563 	}
564 
565 	/* Restore the autopoll bit if necessary. */
566 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
567 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
568 		DELAY(80);
569 	}
570 
571 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
572 
573 	return 0;
574 }
575 
576 static void
577 bnx_miibus_statchg(device_t dev)
578 {
579 	struct bnx_softc *sc;
580 	struct mii_data *mii;
581 	uint32_t mac_mode;
582 
583 	sc = device_get_softc(dev);
584 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0)
585 		return;
586 
587 	mii = device_get_softc(sc->bnx_miibus);
588 
589 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
590 	    (IFM_ACTIVE | IFM_AVALID)) {
591 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
592 		case IFM_10_T:
593 		case IFM_100_TX:
594 			sc->bnx_link = 1;
595 			break;
596 		case IFM_1000_T:
597 		case IFM_1000_SX:
598 		case IFM_2500_SX:
599 			sc->bnx_link = 1;
600 			break;
601 		default:
602 			sc->bnx_link = 0;
603 			break;
604 		}
605 	} else {
606 		sc->bnx_link = 0;
607 	}
608 	if (sc->bnx_link == 0)
609 		return;
610 
611 	/*
612 	 * APE firmware touches these registers to keep the MAC
613 	 * connected to the outside world.  Try to keep the
614 	 * accesses atomic.
615 	 */
616 
617 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) &
618 	    ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX);
619 
620 	if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
621 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX)
622 		mac_mode |= BGE_PORTMODE_GMII;
623 	else
624 		mac_mode |= BGE_PORTMODE_MII;
625 
626 	if ((mii->mii_media_active & IFM_GMASK) != IFM_FDX)
627 		mac_mode |= BGE_MACMODE_HALF_DUPLEX;
628 
629 	CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode);
630 	DELAY(40);
631 }
632 
633 /*
634  * Memory management for jumbo frames.
635  */
636 static int
637 bnx_alloc_jumbo_mem(struct bnx_softc *sc)
638 {
639 	struct ifnet *ifp = &sc->arpcom.ac_if;
640 	struct bnx_jslot *entry;
641 	uint8_t *ptr;
642 	bus_addr_t paddr;
643 	int i, error;
644 
645 	/*
646 	 * Create tag for jumbo mbufs.
647 	 * This is really a bit of a kludge. We allocate a special
648 	 * jumbo buffer pool which (thanks to the way our DMA
649 	 * memory allocation works) will consist of contiguous
650 	 * pages. This means that even though a jumbo buffer might
651 	 * be larger than a page size, we don't really need to
652 	 * map it into more than one DMA segment. However, the
653 	 * default mbuf tag will result in multi-segment mappings,
654 	 * so we have to create a special jumbo mbuf tag that
655 	 * lets us get away with mapping the jumbo buffers as
656 	 * a single segment. I think eventually the driver should
657 	 * be changed so that it uses ordinary mbufs and cluster
658 	 * buffers, i.e. jumbo frames can span multiple DMA
659 	 * descriptors. But that's a project for another day.
660 	 */
661 
662 	/*
663 	 * Create DMA stuffs for jumbo RX ring.
664 	 */
665 	error = bnx_dma_block_alloc(sc, BGE_JUMBO_RX_RING_SZ,
666 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
667 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_map,
668 				    (void *)&sc->bnx_ldata.bnx_rx_jumbo_ring,
669 				    &sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
670 	if (error) {
671 		if_printf(ifp, "could not create jumbo RX ring\n");
672 		return error;
673 	}
674 
675 	/*
676 	 * Create DMA stuffs for jumbo buffer block.
677 	 */
678 	error = bnx_dma_block_alloc(sc, BNX_JMEM,
679 				    &sc->bnx_cdata.bnx_jumbo_tag,
680 				    &sc->bnx_cdata.bnx_jumbo_map,
681 				    (void **)&sc->bnx_ldata.bnx_jumbo_buf,
682 				    &paddr);
683 	if (error) {
684 		if_printf(ifp, "could not create jumbo buffer\n");
685 		return error;
686 	}
687 
688 	SLIST_INIT(&sc->bnx_jfree_listhead);
689 
690 	/*
691 	 * Now divide it up into 9K pieces and save the addresses
692 	 * in an array. Note that we play an evil trick here by using
693 	 * the first few bytes in the buffer to hold the the address
694 	 * of the softc structure for this interface. This is because
695 	 * bnx_jfree() needs it, but it is called by the mbuf management
696 	 * code which will not pass it to us explicitly.
697 	 */
698 	for (i = 0, ptr = sc->bnx_ldata.bnx_jumbo_buf; i < BNX_JSLOTS; i++) {
699 		entry = &sc->bnx_cdata.bnx_jslots[i];
700 		entry->bnx_sc = sc;
701 		entry->bnx_buf = ptr;
702 		entry->bnx_paddr = paddr;
703 		entry->bnx_inuse = 0;
704 		entry->bnx_slot = i;
705 		SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead, entry, jslot_link);
706 
707 		ptr += BNX_JLEN;
708 		paddr += BNX_JLEN;
709 	}
710 	return 0;
711 }
712 
713 static void
714 bnx_free_jumbo_mem(struct bnx_softc *sc)
715 {
716 	/* Destroy jumbo RX ring. */
717 	bnx_dma_block_free(sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
718 			   sc->bnx_cdata.bnx_rx_jumbo_ring_map,
719 			   sc->bnx_ldata.bnx_rx_jumbo_ring);
720 
721 	/* Destroy jumbo buffer block. */
722 	bnx_dma_block_free(sc->bnx_cdata.bnx_jumbo_tag,
723 			   sc->bnx_cdata.bnx_jumbo_map,
724 			   sc->bnx_ldata.bnx_jumbo_buf);
725 }
726 
727 /*
728  * Allocate a jumbo buffer.
729  */
730 static struct bnx_jslot *
731 bnx_jalloc(struct bnx_softc *sc)
732 {
733 	struct bnx_jslot *entry;
734 
735 	lwkt_serialize_enter(&sc->bnx_jslot_serializer);
736 	entry = SLIST_FIRST(&sc->bnx_jfree_listhead);
737 	if (entry) {
738 		SLIST_REMOVE_HEAD(&sc->bnx_jfree_listhead, jslot_link);
739 		entry->bnx_inuse = 1;
740 	} else {
741 		if_printf(&sc->arpcom.ac_if, "no free jumbo buffers\n");
742 	}
743 	lwkt_serialize_exit(&sc->bnx_jslot_serializer);
744 	return(entry);
745 }
746 
747 /*
748  * Adjust usage count on a jumbo buffer.
749  */
750 static void
751 bnx_jref(void *arg)
752 {
753 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
754 	struct bnx_softc *sc = entry->bnx_sc;
755 
756 	if (sc == NULL)
757 		panic("bnx_jref: can't find softc pointer!");
758 
759 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
760 		panic("bnx_jref: asked to reference buffer "
761 		    "that we don't manage!");
762 	} else if (entry->bnx_inuse == 0) {
763 		panic("bnx_jref: buffer already free!");
764 	} else {
765 		atomic_add_int(&entry->bnx_inuse, 1);
766 	}
767 }
768 
769 /*
770  * Release a jumbo buffer.
771  */
772 static void
773 bnx_jfree(void *arg)
774 {
775 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
776 	struct bnx_softc *sc = entry->bnx_sc;
777 
778 	if (sc == NULL)
779 		panic("bnx_jfree: can't find softc pointer!");
780 
781 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
782 		panic("bnx_jfree: asked to free buffer that we don't manage!");
783 	} else if (entry->bnx_inuse == 0) {
784 		panic("bnx_jfree: buffer already free!");
785 	} else {
786 		/*
787 		 * Possible MP race to 0, use the serializer.  The atomic insn
788 		 * is still needed for races against bnx_jref().
789 		 */
790 		lwkt_serialize_enter(&sc->bnx_jslot_serializer);
791 		atomic_subtract_int(&entry->bnx_inuse, 1);
792 		if (entry->bnx_inuse == 0) {
793 			SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead,
794 					  entry, jslot_link);
795 		}
796 		lwkt_serialize_exit(&sc->bnx_jslot_serializer);
797 	}
798 }
799 
800 
801 /*
802  * Intialize a standard receive ring descriptor.
803  */
804 static int
805 bnx_newbuf_std(struct bnx_rx_ret_ring *ret, int i, int init)
806 {
807 	struct mbuf *m_new = NULL;
808 	bus_dma_segment_t seg;
809 	bus_dmamap_t map;
810 	int error, nsegs;
811 	struct bnx_rx_buf *rb;
812 
813 	rb = &ret->bnx_std->bnx_rx_std_buf[i];
814 	KASSERT(!rb->bnx_rx_refilled, ("RX buf %dth has been refilled", i));
815 
816 	m_new = m_getcl(init ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR);
817 	if (m_new == NULL) {
818 		error = ENOBUFS;
819 		goto back;
820 	}
821 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
822 	m_adj(m_new, ETHER_ALIGN);
823 
824 	error = bus_dmamap_load_mbuf_segment(ret->bnx_rx_mtag,
825 	    ret->bnx_rx_tmpmap, m_new, &seg, 1, &nsegs, BUS_DMA_NOWAIT);
826 	if (error) {
827 		m_freem(m_new);
828 		goto back;
829 	}
830 
831 	if (!init) {
832 		bus_dmamap_sync(ret->bnx_rx_mtag, rb->bnx_rx_dmamap,
833 		    BUS_DMASYNC_POSTREAD);
834 		bus_dmamap_unload(ret->bnx_rx_mtag, rb->bnx_rx_dmamap);
835 	}
836 
837 	map = ret->bnx_rx_tmpmap;
838 	ret->bnx_rx_tmpmap = rb->bnx_rx_dmamap;
839 
840 	rb->bnx_rx_dmamap = map;
841 	rb->bnx_rx_mbuf = m_new;
842 	rb->bnx_rx_paddr = seg.ds_addr;
843 	rb->bnx_rx_len = m_new->m_len;
844 back:
845 	cpu_sfence();
846 	rb->bnx_rx_refilled = 1;
847 	return error;
848 }
849 
850 static void
851 bnx_setup_rxdesc_std(struct bnx_rx_std_ring *std, int i)
852 {
853 	struct bnx_rx_buf *rb;
854 	struct bge_rx_bd *r;
855 	bus_addr_t paddr;
856 	int len;
857 
858 	rb = &std->bnx_rx_std_buf[i];
859 	KASSERT(rb->bnx_rx_refilled, ("RX buf %dth is not refilled", i));
860 
861 	paddr = rb->bnx_rx_paddr;
862 	len = rb->bnx_rx_len;
863 
864 	cpu_mfence();
865 
866 	rb->bnx_rx_refilled = 0;
867 
868 	r = &std->bnx_rx_std_ring[i];
869 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr);
870 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr);
871 	r->bge_len = len;
872 	r->bge_idx = i;
873 	r->bge_flags = BGE_RXBDFLAG_END;
874 }
875 
876 /*
877  * Initialize a jumbo receive ring descriptor. This allocates
878  * a jumbo buffer from the pool managed internally by the driver.
879  */
880 static int
881 bnx_newbuf_jumbo(struct bnx_softc *sc, int i, int init)
882 {
883 	struct mbuf *m_new = NULL;
884 	struct bnx_jslot *buf;
885 	bus_addr_t paddr;
886 
887 	/* Allocate the mbuf. */
888 	MGETHDR(m_new, init ? M_WAITOK : M_NOWAIT, MT_DATA);
889 	if (m_new == NULL)
890 		return ENOBUFS;
891 
892 	/* Allocate the jumbo buffer */
893 	buf = bnx_jalloc(sc);
894 	if (buf == NULL) {
895 		m_freem(m_new);
896 		return ENOBUFS;
897 	}
898 
899 	/* Attach the buffer to the mbuf. */
900 	m_new->m_ext.ext_arg = buf;
901 	m_new->m_ext.ext_buf = buf->bnx_buf;
902 	m_new->m_ext.ext_free = bnx_jfree;
903 	m_new->m_ext.ext_ref = bnx_jref;
904 	m_new->m_ext.ext_size = BNX_JUMBO_FRAMELEN;
905 
906 	m_new->m_flags |= M_EXT;
907 
908 	m_new->m_data = m_new->m_ext.ext_buf;
909 	m_new->m_len = m_new->m_pkthdr.len = m_new->m_ext.ext_size;
910 
911 	paddr = buf->bnx_paddr;
912 	m_adj(m_new, ETHER_ALIGN);
913 	paddr += ETHER_ALIGN;
914 
915 	/* Save necessary information */
916 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_mbuf = m_new;
917 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_paddr = paddr;
918 
919 	/* Set up the descriptor. */
920 	bnx_setup_rxdesc_jumbo(sc, i);
921 	return 0;
922 }
923 
924 static void
925 bnx_setup_rxdesc_jumbo(struct bnx_softc *sc, int i)
926 {
927 	struct bge_rx_bd *r;
928 	struct bnx_rx_buf *rc;
929 
930 	r = &sc->bnx_ldata.bnx_rx_jumbo_ring[i];
931 	rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
932 
933 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(rc->bnx_rx_paddr);
934 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(rc->bnx_rx_paddr);
935 	r->bge_len = rc->bnx_rx_mbuf->m_len;
936 	r->bge_idx = i;
937 	r->bge_flags = BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING;
938 }
939 
940 static int
941 bnx_init_rx_ring_std(struct bnx_rx_std_ring *std)
942 {
943 	int i, error;
944 
945 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
946 		/* Use the first RX return ring's tmp RX mbuf DMA map */
947 		error = bnx_newbuf_std(&std->bnx_sc->bnx_rx_ret_ring[0], i, 1);
948 		if (error)
949 			return error;
950 		bnx_setup_rxdesc_std(std, i);
951 	}
952 
953 	std->bnx_rx_std_used = 0;
954 	std->bnx_rx_std_refill = 0;
955 	std->bnx_rx_std_running = 0;
956 	cpu_sfence();
957 	lwkt_serialize_handler_enable(&std->bnx_rx_std_serialize);
958 
959 	std->bnx_rx_std = BGE_STD_RX_RING_CNT - 1;
960 	bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO, std->bnx_rx_std);
961 
962 	return(0);
963 }
964 
965 static void
966 bnx_free_rx_ring_std(struct bnx_rx_std_ring *std)
967 {
968 	int i;
969 
970 	lwkt_serialize_handler_disable(&std->bnx_rx_std_serialize);
971 
972 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
973 		struct bnx_rx_buf *rb = &std->bnx_rx_std_buf[i];
974 
975 		rb->bnx_rx_refilled = 0;
976 		if (rb->bnx_rx_mbuf != NULL) {
977 			bus_dmamap_unload(std->bnx_rx_mtag, rb->bnx_rx_dmamap);
978 			m_freem(rb->bnx_rx_mbuf);
979 			rb->bnx_rx_mbuf = NULL;
980 		}
981 		bzero(&std->bnx_rx_std_ring[i], sizeof(struct bge_rx_bd));
982 	}
983 }
984 
985 static int
986 bnx_init_rx_ring_jumbo(struct bnx_softc *sc)
987 {
988 	struct bge_rcb *rcb;
989 	int i, error;
990 
991 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
992 		error = bnx_newbuf_jumbo(sc, i, 1);
993 		if (error)
994 			return error;
995 	}
996 
997 	sc->bnx_jumbo = BGE_JUMBO_RX_RING_CNT - 1;
998 
999 	rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1000 	rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, 0);
1001 	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1002 
1003 	bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, sc->bnx_jumbo);
1004 
1005 	return(0);
1006 }
1007 
1008 static void
1009 bnx_free_rx_ring_jumbo(struct bnx_softc *sc)
1010 {
1011 	int i;
1012 
1013 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
1014 		struct bnx_rx_buf *rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
1015 
1016 		if (rc->bnx_rx_mbuf != NULL) {
1017 			m_freem(rc->bnx_rx_mbuf);
1018 			rc->bnx_rx_mbuf = NULL;
1019 		}
1020 		bzero(&sc->bnx_ldata.bnx_rx_jumbo_ring[i],
1021 		    sizeof(struct bge_rx_bd));
1022 	}
1023 }
1024 
1025 static void
1026 bnx_free_tx_ring(struct bnx_tx_ring *txr)
1027 {
1028 	int i;
1029 
1030 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
1031 		struct bnx_tx_buf *buf = &txr->bnx_tx_buf[i];
1032 
1033 		if (buf->bnx_tx_mbuf != NULL) {
1034 			bus_dmamap_unload(txr->bnx_tx_mtag,
1035 			    buf->bnx_tx_dmamap);
1036 			m_freem(buf->bnx_tx_mbuf);
1037 			buf->bnx_tx_mbuf = NULL;
1038 		}
1039 		bzero(&txr->bnx_tx_ring[i], sizeof(struct bge_tx_bd));
1040 	}
1041 	txr->bnx_tx_saved_considx = BNX_TXCONS_UNSET;
1042 }
1043 
1044 static int
1045 bnx_init_tx_ring(struct bnx_tx_ring *txr)
1046 {
1047 	txr->bnx_tx_cnt = 0;
1048 	txr->bnx_tx_saved_considx = 0;
1049 	txr->bnx_tx_prodidx = 0;
1050 
1051 	/* Initialize transmit producer index for host-memory send ring. */
1052 	bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, txr->bnx_tx_prodidx);
1053 
1054 	return(0);
1055 }
1056 
1057 static void
1058 bnx_setmulti(struct bnx_softc *sc)
1059 {
1060 	struct ifnet *ifp;
1061 	struct ifmultiaddr *ifma;
1062 	uint32_t hashes[4] = { 0, 0, 0, 0 };
1063 	int h, i;
1064 
1065 	ifp = &sc->arpcom.ac_if;
1066 
1067 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
1068 		for (i = 0; i < 4; i++)
1069 			CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF);
1070 		return;
1071 	}
1072 
1073 	/* First, zot all the existing filters. */
1074 	for (i = 0; i < 4; i++)
1075 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0);
1076 
1077 	/* Now program new ones. */
1078 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1079 		if (ifma->ifma_addr->sa_family != AF_LINK)
1080 			continue;
1081 		h = ether_crc32_le(
1082 		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1083 		    ETHER_ADDR_LEN) & 0x7f;
1084 		hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F);
1085 	}
1086 
1087 	for (i = 0; i < 4; i++)
1088 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]);
1089 }
1090 
1091 /*
1092  * Do endian, PCI and DMA initialization. Also check the on-board ROM
1093  * self-test results.
1094  */
1095 static int
1096 bnx_chipinit(struct bnx_softc *sc)
1097 {
1098 	uint32_t dma_rw_ctl, mode_ctl;
1099 	int i;
1100 
1101 	/* Set endian type before we access any non-PCI registers. */
1102 	pci_write_config(sc->bnx_dev, BGE_PCI_MISC_CTL,
1103 	    BGE_INIT | BGE_PCIMISCCTL_TAGGED_STATUS, 4);
1104 
1105 	/*
1106 	 * Clear the MAC statistics block in the NIC's
1107 	 * internal memory.
1108 	 */
1109 	for (i = BGE_STATS_BLOCK;
1110 	    i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t))
1111 		BNX_MEMWIN_WRITE(sc, i, 0);
1112 
1113 	for (i = BGE_STATUS_BLOCK;
1114 	    i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t))
1115 		BNX_MEMWIN_WRITE(sc, i, 0);
1116 
1117 	if (BNX_IS_57765_FAMILY(sc)) {
1118 		uint32_t val;
1119 
1120 		if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0) {
1121 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1122 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1123 
1124 			/* Access the lower 1K of PL PCI-E block registers. */
1125 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1126 			    val | BGE_MODECTL_PCIE_PL_SEL);
1127 
1128 			val = CSR_READ_4(sc, BGE_PCIE_PL_LO_PHYCTL5);
1129 			val |= BGE_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ;
1130 			CSR_WRITE_4(sc, BGE_PCIE_PL_LO_PHYCTL5, val);
1131 
1132 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1133 		}
1134 		if (sc->bnx_chiprev != BGE_CHIPREV_57765_AX) {
1135 			/* Fix transmit hangs */
1136 			val = CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL);
1137 			val |= BGE_CPMU_PADRNG_CTL_RDIV2;
1138 			CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, val);
1139 
1140 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1141 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1142 
1143 			/* Access the lower 1K of DL PCI-E block registers. */
1144 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1145 			    val | BGE_MODECTL_PCIE_DL_SEL);
1146 
1147 			val = CSR_READ_4(sc, BGE_PCIE_DL_LO_FTSMAX);
1148 			val &= ~BGE_PCIE_DL_LO_FTSMAX_MASK;
1149 			val |= BGE_PCIE_DL_LO_FTSMAX_VAL;
1150 			CSR_WRITE_4(sc, BGE_PCIE_DL_LO_FTSMAX, val);
1151 
1152 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1153 		}
1154 
1155 		val = CSR_READ_4(sc, BGE_CPMU_LSPD_10MB_CLK);
1156 		val &= ~BGE_CPMU_LSPD_10MB_MACCLK_MASK;
1157 		val |= BGE_CPMU_LSPD_10MB_MACCLK_6_25;
1158 		CSR_WRITE_4(sc, BGE_CPMU_LSPD_10MB_CLK, val);
1159 	}
1160 
1161 	/*
1162 	 * Set up the PCI DMA control register.
1163 	 */
1164 	dma_rw_ctl = pci_read_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, 4);
1165 	/*
1166 	 * Disable 32bytes cache alignment for DMA write to host memory
1167 	 *
1168 	 * NOTE:
1169 	 * 64bytes cache alignment for DMA write to host memory is still
1170 	 * enabled.
1171 	 */
1172 	dma_rw_ctl |= BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT;
1173 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
1174 		dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK;
1175 	/*
1176 	 * Enable HW workaround for controllers that misinterpret
1177 	 * a status tag update and leave interrupts permanently
1178 	 * disabled.
1179 	 */
1180 	if (sc->bnx_asicrev != BGE_ASICREV_BCM5717 &&
1181 	    sc->bnx_asicrev != BGE_ASICREV_BCM5762 &&
1182 	    !BNX_IS_57765_FAMILY(sc))
1183 		dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA;
1184 	if (bootverbose) {
1185 		if_printf(&sc->arpcom.ac_if, "DMA read/write %#x\n",
1186 		    dma_rw_ctl);
1187 	}
1188 	pci_write_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4);
1189 
1190 	/*
1191 	 * Set up general mode register.
1192 	 */
1193 	mode_ctl = bnx_dma_swap_options(sc);
1194 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1195 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1196 		/* Retain Host-2-BMC settings written by APE firmware. */
1197 		mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) &
1198 		    (BGE_MODECTL_BYTESWAP_B2HRX_DATA |
1199 		    BGE_MODECTL_WORDSWAP_B2HRX_DATA |
1200 		    BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE);
1201 	}
1202 	mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR |
1203 	    BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM;
1204 	CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1205 
1206 	/*
1207 	 * Disable memory write invalidate.  Apparently it is not supported
1208 	 * properly by these devices.  Also ensure that INTx isn't disabled,
1209 	 * as these chips need it even when using MSI.
1210 	 */
1211 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_CMD,
1212 	    (PCIM_CMD_MWRICEN | PCIM_CMD_INTxDIS), 4);
1213 
1214 	/* Set the timer prescaler (always 66Mhz) */
1215 	CSR_WRITE_4(sc, BGE_MISC_CFG, 65 << 1/*BGE_32BITTIME_66MHZ*/);
1216 
1217 	return(0);
1218 }
1219 
1220 static int
1221 bnx_blockinit(struct bnx_softc *sc)
1222 {
1223 	struct bnx_intr_data *intr;
1224 	struct bge_rcb *rcb;
1225 	bus_size_t vrcb;
1226 	bge_hostaddr taddr;
1227 	uint32_t val;
1228 	int i, limit;
1229 
1230 	/*
1231 	 * Initialize the memory window pointer register so that
1232 	 * we can access the first 32K of internal NIC RAM. This will
1233 	 * allow us to set up the TX send ring RCBs and the RX return
1234 	 * ring RCBs, plus other things which live in NIC memory.
1235 	 */
1236 	CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0);
1237 
1238 	/* Configure mbuf pool watermarks */
1239 	if (BNX_IS_57765_PLUS(sc)) {
1240 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1241 		if (sc->arpcom.ac_if.if_mtu > ETHERMTU) {
1242 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e);
1243 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea);
1244 		} else {
1245 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a);
1246 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0);
1247 		}
1248 	} else {
1249 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1250 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10);
1251 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60);
1252 	}
1253 
1254 	/* Configure DMA resource watermarks */
1255 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5);
1256 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10);
1257 
1258 	/* Enable buffer manager */
1259 	val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN;
1260 	/*
1261 	 * Change the arbitration algorithm of TXMBUF read request to
1262 	 * round-robin instead of priority based for BCM5719.  When
1263 	 * TXFIFO is almost empty, RDMA will hold its request until
1264 	 * TXFIFO is not almost empty.
1265 	 */
1266 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1267 		val |= BGE_BMANMODE_NO_TX_UNDERRUN;
1268 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1269 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0 ||
1270 	    sc->bnx_chipid == BGE_CHIPID_BCM5720_A0)
1271 		val |= BGE_BMANMODE_LOMBUF_ATTN;
1272 	CSR_WRITE_4(sc, BGE_BMAN_MODE, val);
1273 
1274 	/* Poll for buffer manager start indication */
1275 	for (i = 0; i < BNX_TIMEOUT; i++) {
1276 		if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
1277 			break;
1278 		DELAY(10);
1279 	}
1280 
1281 	if (i == BNX_TIMEOUT) {
1282 		if_printf(&sc->arpcom.ac_if,
1283 			  "buffer manager failed to start\n");
1284 		return(ENXIO);
1285 	}
1286 
1287 	/* Enable flow-through queues */
1288 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
1289 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
1290 
1291 	/* Wait until queue initialization is complete */
1292 	for (i = 0; i < BNX_TIMEOUT; i++) {
1293 		if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0)
1294 			break;
1295 		DELAY(10);
1296 	}
1297 
1298 	if (i == BNX_TIMEOUT) {
1299 		if_printf(&sc->arpcom.ac_if,
1300 			  "flow-through queue init failed\n");
1301 		return(ENXIO);
1302 	}
1303 
1304 	/*
1305 	 * Summary of rings supported by the controller:
1306 	 *
1307 	 * Standard Receive Producer Ring
1308 	 * - This ring is used to feed receive buffers for "standard"
1309 	 *   sized frames (typically 1536 bytes) to the controller.
1310 	 *
1311 	 * Jumbo Receive Producer Ring
1312 	 * - This ring is used to feed receive buffers for jumbo sized
1313 	 *   frames (i.e. anything bigger than the "standard" frames)
1314 	 *   to the controller.
1315 	 *
1316 	 * Mini Receive Producer Ring
1317 	 * - This ring is used to feed receive buffers for "mini"
1318 	 *   sized frames to the controller.
1319 	 * - This feature required external memory for the controller
1320 	 *   but was never used in a production system.  Should always
1321 	 *   be disabled.
1322 	 *
1323 	 * Receive Return Ring
1324 	 * - After the controller has placed an incoming frame into a
1325 	 *   receive buffer that buffer is moved into a receive return
1326 	 *   ring.  The driver is then responsible to passing the
1327 	 *   buffer up to the stack.  BCM5718/BCM57785 families support
1328 	 *   multiple receive return rings.
1329 	 *
1330 	 * Send Ring
1331 	 * - This ring is used for outgoing frames.  BCM5719/BCM5720
1332 	 *   support multiple send rings.
1333 	 */
1334 
1335 	/* Initialize the standard receive producer ring control block. */
1336 	rcb = &sc->bnx_ldata.bnx_info.bnx_std_rx_rcb;
1337 	rcb->bge_hostaddr.bge_addr_lo =
1338 	    BGE_ADDR_LO(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1339 	rcb->bge_hostaddr.bge_addr_hi =
1340 	    BGE_ADDR_HI(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1341 	if (BNX_IS_57765_PLUS(sc)) {
1342 		/*
1343 		 * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32)
1344 		 * Bits 15-2 : Maximum RX frame size
1345 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring ENabled
1346 		 * Bit 0     : Reserved
1347 		 */
1348 		rcb->bge_maxlen_flags =
1349 		    BGE_RCB_MAXLEN_FLAGS(512, BNX_MAX_FRAMELEN << 2);
1350 	} else {
1351 		/*
1352 		 * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32)
1353 		 * Bits 15-2 : Reserved (should be 0)
1354 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring Enabled
1355 		 * Bit 0     : Reserved
1356 		 */
1357 		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0);
1358 	}
1359 	if (BNX_IS_5717_PLUS(sc))
1360 		rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717;
1361 	else
1362 		rcb->bge_nicaddr = BGE_STD_RX_RINGS;
1363 	/* Write the standard receive producer ring control block. */
1364 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi);
1365 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo);
1366 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1367 	if (!BNX_IS_5717_PLUS(sc))
1368 		CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr);
1369 	/* Reset the standard receive producer ring producer index. */
1370 	bnx_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0);
1371 
1372 	/*
1373 	 * Initialize the jumbo RX producer ring control
1374 	 * block.  We set the 'ring disabled' bit in the
1375 	 * flags field until we're actually ready to start
1376 	 * using this ring (i.e. once we set the MTU
1377 	 * high enough to require it).
1378 	 */
1379 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1380 		rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1381 		/* Get the jumbo receive producer ring RCB parameters. */
1382 		rcb->bge_hostaddr.bge_addr_lo =
1383 		    BGE_ADDR_LO(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1384 		rcb->bge_hostaddr.bge_addr_hi =
1385 		    BGE_ADDR_HI(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1386 		rcb->bge_maxlen_flags =
1387 		    BGE_RCB_MAXLEN_FLAGS(BNX_MAX_FRAMELEN,
1388 		    BGE_RCB_FLAG_RING_DISABLED);
1389 		if (BNX_IS_5717_PLUS(sc))
1390 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717;
1391 		else
1392 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS;
1393 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI,
1394 		    rcb->bge_hostaddr.bge_addr_hi);
1395 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO,
1396 		    rcb->bge_hostaddr.bge_addr_lo);
1397 		/* Program the jumbo receive producer ring RCB parameters. */
1398 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS,
1399 		    rcb->bge_maxlen_flags);
1400 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr);
1401 		/* Reset the jumbo receive producer ring producer index. */
1402 		bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
1403 	}
1404 
1405 	/*
1406 	 * The BD ring replenish thresholds control how often the
1407 	 * hardware fetches new BD's from the producer rings in host
1408 	 * memory.  Setting the value too low on a busy system can
1409 	 * starve the hardware and recue the throughpout.
1410 	 *
1411 	 * Set the BD ring replentish thresholds. The recommended
1412 	 * values are 1/8th the number of descriptors allocated to
1413 	 * each ring.
1414 	 */
1415 	val = 8;
1416 	CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val);
1417 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1418 		CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH,
1419 		    BGE_JUMBO_RX_RING_CNT/8);
1420 	}
1421 	if (BNX_IS_57765_PLUS(sc)) {
1422 		CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32);
1423 		CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16);
1424 	}
1425 
1426 	/*
1427 	 * Disable all send rings by setting the 'ring disabled' bit
1428 	 * in the flags field of all the TX send ring control blocks,
1429 	 * located in NIC memory.
1430 	 */
1431 	if (BNX_IS_5717_PLUS(sc))
1432 		limit = 4;
1433 	else if (BNX_IS_57765_FAMILY(sc) ||
1434 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1435 		limit = 2;
1436 	else
1437 		limit = 1;
1438 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1439 	for (i = 0; i < limit; i++) {
1440 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1441 		    BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED));
1442 		vrcb += sizeof(struct bge_rcb);
1443 	}
1444 
1445 	/*
1446 	 * Configure send ring RCBs
1447 	 */
1448 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1449 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
1450 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
1451 
1452 		BGE_HOSTADDR(taddr, txr->bnx_tx_ring_paddr);
1453 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1454 		    taddr.bge_addr_hi);
1455 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1456 		    taddr.bge_addr_lo);
1457 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1458 		    BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0));
1459 		vrcb += sizeof(struct bge_rcb);
1460 	}
1461 
1462 	/*
1463 	 * Disable all receive return rings by setting the
1464 	 * 'ring disabled' bit in the flags field of all the receive
1465 	 * return ring control blocks, located in NIC memory.
1466 	 */
1467 	if (BNX_IS_5717_PLUS(sc)) {
1468 		/* Should be 17, use 16 until we get an SRAM map. */
1469 		limit = 16;
1470 	} else if (BNX_IS_57765_FAMILY(sc) ||
1471 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1472 		limit = 4;
1473 	} else {
1474 		limit = 1;
1475 	}
1476 	/* Disable all receive return rings. */
1477 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1478 	for (i = 0; i < limit; i++) {
1479 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0);
1480 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0);
1481 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1482 		    BGE_RCB_FLAG_RING_DISABLED);
1483 		bnx_writembx(sc, BGE_MBX_RX_CONS0_LO +
1484 		    (i * (sizeof(uint64_t))), 0);
1485 		vrcb += sizeof(struct bge_rcb);
1486 	}
1487 
1488 	/*
1489 	 * Set up receive return rings.
1490 	 */
1491 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1492 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
1493 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
1494 
1495 		BGE_HOSTADDR(taddr, ret->bnx_rx_ret_ring_paddr);
1496 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1497 		    taddr.bge_addr_hi);
1498 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1499 		    taddr.bge_addr_lo);
1500 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1501 		    BGE_RCB_MAXLEN_FLAGS(BNX_RETURN_RING_CNT, 0));
1502 		vrcb += sizeof(struct bge_rcb);
1503 	}
1504 
1505 	/* Set random backoff seed for TX */
1506 	CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF,
1507 	    (sc->arpcom.ac_enaddr[0] + sc->arpcom.ac_enaddr[1] +
1508 	     sc->arpcom.ac_enaddr[2] + sc->arpcom.ac_enaddr[3] +
1509 	     sc->arpcom.ac_enaddr[4] + sc->arpcom.ac_enaddr[5]) &
1510 	    BGE_TX_BACKOFF_SEED_MASK);
1511 
1512 	/* Set inter-packet gap */
1513 	val = 0x2620;
1514 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1515 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1516 		val |= CSR_READ_4(sc, BGE_TX_LENGTHS) &
1517 		    (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK);
1518 	}
1519 	CSR_WRITE_4(sc, BGE_TX_LENGTHS, val);
1520 
1521 	/*
1522 	 * Specify which ring to use for packets that don't match
1523 	 * any RX rules.
1524 	 */
1525 	CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08);
1526 
1527 	/*
1528 	 * Configure number of RX lists. One interrupt distribution
1529 	 * list, sixteen active lists, one bad frames class.
1530 	 */
1531 	CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181);
1532 
1533 	/* Inialize RX list placement stats mask. */
1534 	CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF);
1535 	CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1);
1536 
1537 	/* Disable host coalescing until we get it set up */
1538 	CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000);
1539 
1540 	/* Poll to make sure it's shut down. */
1541 	for (i = 0; i < BNX_TIMEOUT; i++) {
1542 		if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE))
1543 			break;
1544 		DELAY(10);
1545 	}
1546 
1547 	if (i == BNX_TIMEOUT) {
1548 		if_printf(&sc->arpcom.ac_if,
1549 			  "host coalescing engine failed to idle\n");
1550 		return(ENXIO);
1551 	}
1552 
1553 	/* Set up host coalescing defaults */
1554 	sc->bnx_coal_chg = BNX_RX_COAL_TICKS_CHG |
1555 	    BNX_TX_COAL_TICKS_CHG |
1556 	    BNX_RX_COAL_BDS_CHG |
1557 	    BNX_TX_COAL_BDS_CHG |
1558 	    BNX_RX_COAL_BDS_INT_CHG |
1559 	    BNX_TX_COAL_BDS_INT_CHG;
1560 	bnx_coal_change(sc);
1561 
1562 	/*
1563 	 * Set up addresses of status blocks
1564 	 */
1565 	intr = &sc->bnx_intr_data[0];
1566 	bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1567 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI,
1568 	    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1569 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO,
1570 	    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1571 	for (i = 1; i < sc->bnx_intr_cnt; ++i) {
1572 		intr = &sc->bnx_intr_data[i];
1573 		bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1574 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_HI + ((i - 1) * 8),
1575 		    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1576 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_LO + ((i - 1) * 8),
1577 		    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1578 	}
1579 
1580 	/* Set up status block partail update size. */
1581 	val = BGE_STATBLKSZ_32BYTE;
1582 #if 0
1583 	/*
1584 	 * Does not seem to have visible effect in both
1585 	 * bulk data (1472B UDP datagram) and tiny data
1586 	 * (18B UDP datagram) TX tests.
1587 	 */
1588 	val |= BGE_HCCMODE_CLRTICK_TX;
1589 #endif
1590 	/* Turn on host coalescing state machine */
1591 	CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE);
1592 
1593 	/* Turn on RX BD completion state machine and enable attentions */
1594 	CSR_WRITE_4(sc, BGE_RBDC_MODE,
1595 	    BGE_RBDCMODE_ENABLE|BGE_RBDCMODE_ATTN);
1596 
1597 	/* Turn on RX list placement state machine */
1598 	CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
1599 
1600 	val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB |
1601 	    BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR |
1602 	    BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB |
1603 	    BGE_MACMODE_FRMHDR_DMA_ENB;
1604 
1605 	if (sc->bnx_flags & BNX_FLAG_TBI)
1606 		val |= BGE_PORTMODE_TBI;
1607 	else if (sc->bnx_flags & BNX_FLAG_MII_SERDES)
1608 		val |= BGE_PORTMODE_GMII;
1609 	else
1610 		val |= BGE_PORTMODE_MII;
1611 
1612 	/* Allow APE to send/receive frames. */
1613 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
1614 		val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
1615 
1616 	/* Turn on DMA, clear stats */
1617 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
1618 	DELAY(40);
1619 
1620 	/* Set misc. local control, enable interrupts on attentions */
1621 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
1622 
1623 #ifdef notdef
1624 	/* Assert GPIO pins for PHY reset */
1625 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0|
1626 	    BGE_MLC_MISCIO_OUT1|BGE_MLC_MISCIO_OUT2);
1627 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0|
1628 	    BGE_MLC_MISCIO_OUTEN1|BGE_MLC_MISCIO_OUTEN2);
1629 #endif
1630 
1631 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSIX)
1632 		bnx_enable_msi(sc, TRUE);
1633 
1634 	/* Turn on write DMA state machine */
1635 	val = BGE_WDMAMODE_ENABLE|BGE_WDMAMODE_ALL_ATTNS;
1636 	/* Enable host coalescing bug fix. */
1637 	val |= BGE_WDMAMODE_STATUS_TAG_FIX;
1638 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5785) {
1639 		/* Request larger DMA burst size to get better performance. */
1640 		val |= BGE_WDMAMODE_BURST_ALL_DATA;
1641 	}
1642 	CSR_WRITE_4(sc, BGE_WDMA_MODE, val);
1643 	DELAY(40);
1644 
1645 	if (BNX_IS_57765_PLUS(sc)) {
1646 		uint32_t dmactl, dmactl_reg;
1647 
1648 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1649 			dmactl_reg = BGE_RDMA_RSRVCTRL2;
1650 		else
1651 			dmactl_reg = BGE_RDMA_RSRVCTRL;
1652 
1653 		dmactl = CSR_READ_4(sc, dmactl_reg);
1654 		/*
1655 		 * Adjust tx margin to prevent TX data corruption and
1656 		 * fix internal FIFO overflow.
1657 		 */
1658 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1659 		    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1660 		    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1661 			dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK |
1662 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK |
1663 			    BGE_RDMA_RSRVCTRL_TXMRGN_MASK);
1664 			dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K |
1665 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K |
1666 			    BGE_RDMA_RSRVCTRL_TXMRGN_320B;
1667 		}
1668 		/*
1669 		 * Enable fix for read DMA FIFO overruns.
1670 		 * The fix is to limit the number of RX BDs
1671 		 * the hardware would fetch at a fime.
1672 		 */
1673 		CSR_WRITE_4(sc, dmactl_reg,
1674 		    dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
1675 	}
1676 
1677 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719) {
1678 		CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1679 		    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1680 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K |
1681 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1682 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1683 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1684 		uint32_t ctrl_reg;
1685 
1686 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1687 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL2;
1688 		else
1689 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL;
1690 
1691 		/*
1692 		 * Allow 4KB burst length reads for non-LSO frames.
1693 		 * Enable 512B burst length reads for buffer descriptors.
1694 		 */
1695 		CSR_WRITE_4(sc, ctrl_reg,
1696 		    CSR_READ_4(sc, ctrl_reg) |
1697 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 |
1698 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1699 	}
1700 
1701 	/* Turn on read DMA state machine */
1702 	val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS;
1703 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717)
1704 		val |= BGE_RDMAMODE_MULT_DMA_RD_DIS;
1705         if (sc->bnx_asicrev == BGE_ASICREV_BCM5784 ||
1706             sc->bnx_asicrev == BGE_ASICREV_BCM5785 ||
1707             sc->bnx_asicrev == BGE_ASICREV_BCM57780) {
1708 		val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN |
1709 		    BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN |
1710 		    BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN;
1711 	}
1712 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1713 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1714 		val |= CSR_READ_4(sc, BGE_RDMA_MODE) &
1715 		    BGE_RDMAMODE_H2BNC_VLAN_DET;
1716 		/*
1717 		 * Allow multiple outstanding read requests from
1718 		 * non-LSO read DMA engine.
1719 		 */
1720 		val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS;
1721 	}
1722 	if (sc->bnx_asicrev == BGE_ASICREV_BCM57766)
1723 		val |= BGE_RDMAMODE_JMB_2K_MMRR;
1724 	if (sc->bnx_flags & BNX_FLAG_TSO)
1725 		val |= BGE_RDMAMODE_TSO4_ENABLE;
1726 	val |= BGE_RDMAMODE_FIFO_LONG_BURST;
1727 	CSR_WRITE_4(sc, BGE_RDMA_MODE, val);
1728 	DELAY(40);
1729 
1730 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1731 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
1732 	    	uint32_t thresh;
1733 
1734 		thresh = ETHERMTU_JUMBO;
1735 		if (sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
1736 			thresh = ETHERMTU;
1737 
1738 		for (i = 0; i < BGE_RDMA_NCHAN; ++i) {
1739 			if (CSR_READ_4(sc, BGE_RDMA_LENGTH + (i << 2)) > thresh)
1740 				break;
1741 		}
1742 		if (i < BGE_RDMA_NCHAN) {
1743 			if (bootverbose) {
1744 				if_printf(&sc->arpcom.ac_if,
1745 				    "enable RDMA WA\n");
1746 			}
1747 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1748 				sc->bnx_rdma_wa = BGE_RDMA_TX_LENGTH_WA_5719;
1749 			else
1750 				sc->bnx_rdma_wa = BGE_RDMA_TX_LENGTH_WA_5720;
1751 			CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1752 			    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1753 			    sc->bnx_rdma_wa);
1754 		} else {
1755 			sc->bnx_rdma_wa = 0;
1756 		}
1757 	}
1758 
1759 	/* Turn on RX data completion state machine */
1760 	CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
1761 
1762 	/* Turn on RX BD initiator state machine */
1763 	CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
1764 
1765 	/* Turn on RX data and RX BD initiator state machine */
1766 	CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE);
1767 
1768 	/* Turn on send BD completion state machine */
1769 	CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
1770 
1771 	/* Turn on send data completion state machine */
1772 	val = BGE_SDCMODE_ENABLE;
1773 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5761)
1774 		val |= BGE_SDCMODE_CDELAY;
1775 	CSR_WRITE_4(sc, BGE_SDC_MODE, val);
1776 
1777 	/* Turn on send data initiator state machine */
1778 	if (sc->bnx_flags & BNX_FLAG_TSO) {
1779 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE |
1780 		    BGE_SDIMODE_HW_LSO_PRE_DMA);
1781 	} else {
1782 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
1783 	}
1784 
1785 	/* Turn on send BD initiator state machine */
1786 	val = BGE_SBDIMODE_ENABLE;
1787 	if (sc->bnx_tx_ringcnt > 1)
1788 		val |= BGE_SBDIMODE_MULTI_TXR;
1789 	CSR_WRITE_4(sc, BGE_SBDI_MODE, val);
1790 
1791 	/* Turn on send BD selector state machine */
1792 	CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
1793 
1794 	CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF);
1795 	CSR_WRITE_4(sc, BGE_SDI_STATS_CTL,
1796 	    BGE_SDISTATSCTL_ENABLE|BGE_SDISTATSCTL_FASTER);
1797 
1798 	/* ack/clear link change events */
1799 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1800 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1801 	    BGE_MACSTAT_LINK_CHANGED);
1802 	CSR_WRITE_4(sc, BGE_MI_STS, 0);
1803 
1804 	/*
1805 	 * Enable attention when the link has changed state for
1806 	 * devices that use auto polling.
1807 	 */
1808 	if (sc->bnx_flags & BNX_FLAG_TBI) {
1809 		CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK);
1810  	} else {
1811 		if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
1812 			CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
1813 			DELAY(80);
1814 		}
1815 	}
1816 
1817 	/*
1818 	 * Clear any pending link state attention.
1819 	 * Otherwise some link state change events may be lost until attention
1820 	 * is cleared by bnx_intr() -> bnx_softc.bnx_link_upd() sequence.
1821 	 * It's not necessary on newer BCM chips - perhaps enabling link
1822 	 * state change attentions implies clearing pending attention.
1823 	 */
1824 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1825 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1826 	    BGE_MACSTAT_LINK_CHANGED);
1827 
1828 	/* Enable link state change attentions. */
1829 	BNX_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED);
1830 
1831 	return(0);
1832 }
1833 
1834 /*
1835  * Probe for a Broadcom chip. Check the PCI vendor and device IDs
1836  * against our list and return its name if we find a match. Note
1837  * that since the Broadcom controller contains VPD support, we
1838  * can get the device name string from the controller itself instead
1839  * of the compiled-in string. This is a little slow, but it guarantees
1840  * we'll always announce the right product name.
1841  */
1842 static int
1843 bnx_probe(device_t dev)
1844 {
1845 	const struct bnx_type *t;
1846 	uint16_t product, vendor;
1847 
1848 	if (!pci_is_pcie(dev))
1849 		return ENXIO;
1850 
1851 	product = pci_get_device(dev);
1852 	vendor = pci_get_vendor(dev);
1853 
1854 	for (t = bnx_devs; t->bnx_name != NULL; t++) {
1855 		if (vendor == t->bnx_vid && product == t->bnx_did)
1856 			break;
1857 	}
1858 	if (t->bnx_name == NULL)
1859 		return ENXIO;
1860 
1861 	device_set_desc(dev, t->bnx_name);
1862 	return 0;
1863 }
1864 
1865 static int
1866 bnx_attach(device_t dev)
1867 {
1868 	struct ifnet *ifp;
1869 	struct bnx_softc *sc;
1870 	struct bnx_rx_std_ring *std;
1871 	struct sysctl_ctx_list *ctx;
1872 	struct sysctl_oid_list *tree;
1873 	uint32_t hwcfg = 0;
1874 	int error = 0, rid, capmask, i, std_cpuid, std_cpuid_def;
1875 	uint8_t ether_addr[ETHER_ADDR_LEN];
1876 	uint16_t product;
1877 	uintptr_t mii_priv = 0;
1878 #if defined(BNX_TSO_DEBUG) || defined(BNX_RSS_DEBUG) || defined(BNX_TSS_DEBUG)
1879 	char desc[32];
1880 #endif
1881 
1882 	sc = device_get_softc(dev);
1883 	sc->bnx_dev = dev;
1884 	callout_init_mp(&sc->bnx_tick_timer);
1885 	lwkt_serialize_init(&sc->bnx_jslot_serializer);
1886 	lwkt_serialize_init(&sc->bnx_main_serialize);
1887 
1888 	/* Always setup interrupt mailboxes */
1889 	for (i = 0; i < BNX_INTR_MAX; ++i) {
1890 		callout_init_mp(&sc->bnx_intr_data[i].bnx_intr_timer);
1891 		sc->bnx_intr_data[i].bnx_sc = sc;
1892 		sc->bnx_intr_data[i].bnx_intr_mbx = BGE_MBX_IRQ0_LO + (i * 8);
1893 		sc->bnx_intr_data[i].bnx_intr_rid = -1;
1894 		sc->bnx_intr_data[i].bnx_intr_cpuid = -1;
1895 	}
1896 
1897 	sc->bnx_func_addr = pci_get_function(dev);
1898 	product = pci_get_device(dev);
1899 
1900 #ifndef BURN_BRIDGES
1901 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
1902 		uint32_t irq, mem;
1903 
1904 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
1905 		mem = pci_read_config(dev, BGE_PCI_BAR0, 4);
1906 
1907 		device_printf(dev, "chip is in D%d power mode "
1908 		    "-- setting to D0\n", pci_get_powerstate(dev));
1909 
1910 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
1911 
1912 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
1913 		pci_write_config(dev, BGE_PCI_BAR0, mem, 4);
1914 	}
1915 #endif	/* !BURN_BRIDGE */
1916 
1917 	/*
1918 	 * Map control/status registers.
1919 	 */
1920 	pci_enable_busmaster(dev);
1921 
1922 	rid = BGE_PCI_BAR0;
1923 	sc->bnx_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1924 	    RF_ACTIVE);
1925 
1926 	if (sc->bnx_res == NULL) {
1927 		device_printf(dev, "couldn't map memory\n");
1928 		return ENXIO;
1929 	}
1930 
1931 	sc->bnx_btag = rman_get_bustag(sc->bnx_res);
1932 	sc->bnx_bhandle = rman_get_bushandle(sc->bnx_res);
1933 
1934 	/* Save various chip information */
1935 	sc->bnx_chipid =
1936 	    pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
1937 	    BGE_PCIMISCCTL_ASICREV_SHIFT;
1938 	if (BGE_ASICREV(sc->bnx_chipid) == BGE_ASICREV_USE_PRODID_REG) {
1939 		/* All chips having dedicated ASICREV register have CPMU */
1940 		sc->bnx_flags |= BNX_FLAG_CPMU;
1941 
1942 		switch (product) {
1943 		case PCI_PRODUCT_BROADCOM_BCM5717:
1944 		case PCI_PRODUCT_BROADCOM_BCM5717C:
1945 		case PCI_PRODUCT_BROADCOM_BCM5718:
1946 		case PCI_PRODUCT_BROADCOM_BCM5719:
1947 		case PCI_PRODUCT_BROADCOM_BCM5720_ALT:
1948 		case PCI_PRODUCT_BROADCOM_BCM5725:
1949 		case PCI_PRODUCT_BROADCOM_BCM5727:
1950 		case PCI_PRODUCT_BROADCOM_BCM5762:
1951 		case PCI_PRODUCT_BROADCOM_BCM57764:
1952 		case PCI_PRODUCT_BROADCOM_BCM57767:
1953 		case PCI_PRODUCT_BROADCOM_BCM57787:
1954 			sc->bnx_chipid = pci_read_config(dev,
1955 			    BGE_PCI_GEN2_PRODID_ASICREV, 4);
1956 			break;
1957 
1958 		case PCI_PRODUCT_BROADCOM_BCM57761:
1959 		case PCI_PRODUCT_BROADCOM_BCM57762:
1960 		case PCI_PRODUCT_BROADCOM_BCM57765:
1961 		case PCI_PRODUCT_BROADCOM_BCM57766:
1962 		case PCI_PRODUCT_BROADCOM_BCM57781:
1963 		case PCI_PRODUCT_BROADCOM_BCM57782:
1964 		case PCI_PRODUCT_BROADCOM_BCM57785:
1965 		case PCI_PRODUCT_BROADCOM_BCM57786:
1966 		case PCI_PRODUCT_BROADCOM_BCM57791:
1967 		case PCI_PRODUCT_BROADCOM_BCM57795:
1968 			sc->bnx_chipid = pci_read_config(dev,
1969 			    BGE_PCI_GEN15_PRODID_ASICREV, 4);
1970 			break;
1971 
1972 		default:
1973 			sc->bnx_chipid = pci_read_config(dev,
1974 			    BGE_PCI_PRODID_ASICREV, 4);
1975 			break;
1976 		}
1977 	}
1978 	if (sc->bnx_chipid == BGE_CHIPID_BCM5717_C0)
1979 		sc->bnx_chipid = BGE_CHIPID_BCM5720_A0;
1980 
1981 	sc->bnx_asicrev = BGE_ASICREV(sc->bnx_chipid);
1982 	sc->bnx_chiprev = BGE_CHIPREV(sc->bnx_chipid);
1983 
1984 	switch (sc->bnx_asicrev) {
1985 	case BGE_ASICREV_BCM5717:
1986 	case BGE_ASICREV_BCM5719:
1987 	case BGE_ASICREV_BCM5720:
1988 		sc->bnx_flags |= BNX_FLAG_5717_PLUS | BNX_FLAG_57765_PLUS;
1989 		break;
1990 
1991 	case BGE_ASICREV_BCM5762:
1992 		sc->bnx_flags |= BNX_FLAG_57765_PLUS;
1993 		break;
1994 
1995 	case BGE_ASICREV_BCM57765:
1996 	case BGE_ASICREV_BCM57766:
1997 		sc->bnx_flags |= BNX_FLAG_57765_FAMILY | BNX_FLAG_57765_PLUS;
1998 		break;
1999 	}
2000 
2001 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
2002 	    sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2003 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
2004 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
2005 		sc->bnx_flags |= BNX_FLAG_APE;
2006 
2007 	sc->bnx_flags |= BNX_FLAG_TSO;
2008 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 &&
2009 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
2010 		sc->bnx_flags &= ~BNX_FLAG_TSO;
2011 
2012 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
2013 	    BNX_IS_57765_FAMILY(sc)) {
2014 		/*
2015 		 * All BCM57785 and BCM5718 families chips have a bug that
2016 		 * under certain situation interrupt will not be enabled
2017 		 * even if status tag is written to interrupt mailbox.
2018 		 *
2019 		 * While BCM5719 and BCM5720 have a hardware workaround
2020 		 * which could fix the above bug.
2021 		 * See the comment near BGE_PCIDMARWCTL_TAGGED_STATUS_WA in
2022 		 * bnx_chipinit().
2023 		 *
2024 		 * For the rest of the chips in these two families, we will
2025 		 * have to poll the status block at high rate (10ms currently)
2026 		 * to check whether the interrupt is hosed or not.
2027 		 * See bnx_check_intr_*() for details.
2028 		 */
2029 		sc->bnx_flags |= BNX_FLAG_STATUSTAG_BUG;
2030 	}
2031 
2032 	sc->bnx_pciecap = pci_get_pciecap_ptr(sc->bnx_dev);
2033 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2034 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720)
2035 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_2048);
2036 	else
2037 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
2038 	device_printf(dev, "CHIP ID 0x%08x; "
2039 		      "ASIC REV 0x%02x; CHIP REV 0x%02x\n",
2040 		      sc->bnx_chipid, sc->bnx_asicrev, sc->bnx_chiprev);
2041 
2042 	/*
2043 	 * Set various PHY quirk flags.
2044 	 */
2045 
2046 	capmask = MII_CAPMASK_DEFAULT;
2047 	if (product == PCI_PRODUCT_BROADCOM_BCM57791 ||
2048 	    product == PCI_PRODUCT_BROADCOM_BCM57795) {
2049 		/* 10/100 only */
2050 		capmask &= ~BMSR_EXTSTAT;
2051 	}
2052 
2053 	mii_priv |= BRGPHY_FLAG_WIRESPEED;
2054 	if (sc->bnx_chipid == BGE_CHIPID_BCM5762_A0)
2055 		mii_priv |= BRGPHY_FLAG_5762_A0;
2056 
2057 	/*
2058 	 * Chips with APE need BAR2 access for APE registers/memory.
2059 	 */
2060 	if (sc->bnx_flags & BNX_FLAG_APE) {
2061 		uint32_t pcistate;
2062 
2063 		rid = PCIR_BAR(2);
2064 		sc->bnx_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
2065 		    RF_ACTIVE);
2066 		if (sc->bnx_res2 == NULL) {
2067 			device_printf(dev, "couldn't map BAR2 memory\n");
2068 			error = ENXIO;
2069 			goto fail;
2070 		}
2071 
2072 		/* Enable APE register/memory access by host driver. */
2073 		pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4);
2074 		pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2075 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2076 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2077 		pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4);
2078 
2079 		bnx_ape_lock_init(sc);
2080 		bnx_ape_read_fw_ver(sc);
2081 	}
2082 
2083 	/* Initialize if_name earlier, so if_printf could be used */
2084 	ifp = &sc->arpcom.ac_if;
2085 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2086 
2087 	/*
2088 	 * Try to reset the chip.
2089 	 */
2090 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
2091 	bnx_reset(sc);
2092 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
2093 
2094 	if (bnx_chipinit(sc)) {
2095 		device_printf(dev, "chip initialization failed\n");
2096 		error = ENXIO;
2097 		goto fail;
2098 	}
2099 
2100 	/*
2101 	 * Get station address
2102 	 */
2103 	error = bnx_get_eaddr(sc, ether_addr);
2104 	if (error) {
2105 		device_printf(dev, "failed to read station address\n");
2106 		goto fail;
2107 	}
2108 
2109 	/* Setup RX/TX and interrupt count */
2110 	bnx_setup_ring_cnt(sc);
2111 
2112 	if ((sc->bnx_rx_retcnt == 1 && sc->bnx_tx_ringcnt == 1) ||
2113 	    (sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt > 1)) {
2114 	    	/*
2115 		 * The RX ring and the corresponding TX ring processing
2116 		 * should be on the same CPU, since they share the same
2117 		 * status block.
2118 		 */
2119 		sc->bnx_flags |= BNX_FLAG_RXTX_BUNDLE;
2120 		if (bootverbose)
2121 			device_printf(dev, "RX/TX bundle\n");
2122 		if (sc->bnx_tx_ringcnt > 1) {
2123 			/*
2124 			 * Multiple TX rings do not share status block
2125 			 * with link status, so link status will have
2126 			 * to save its own status_tag.
2127 			 */
2128 			sc->bnx_flags |= BNX_FLAG_STATUS_HASTAG;
2129 			if (bootverbose)
2130 				device_printf(dev, "status needs tag\n");
2131 		}
2132 	} else {
2133 		KKASSERT(sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt == 1);
2134 		if (bootverbose)
2135 			device_printf(dev, "RX/TX not bundled\n");
2136 	}
2137 
2138 	error = bnx_dma_alloc(dev);
2139 	if (error)
2140 		goto fail;
2141 
2142 	/*
2143 	 * Allocate interrupt
2144 	 */
2145 	error = bnx_alloc_intr(sc);
2146 	if (error)
2147 		goto fail;
2148 
2149 	/* Setup serializers */
2150 	bnx_setup_serialize(sc);
2151 
2152 	/* Set default tuneable values. */
2153 	sc->bnx_rx_coal_ticks = BNX_RX_COAL_TICKS_DEF;
2154 	sc->bnx_tx_coal_ticks = BNX_TX_COAL_TICKS_DEF;
2155 	sc->bnx_rx_coal_bds = BNX_RX_COAL_BDS_DEF;
2156 	sc->bnx_rx_coal_bds_poll = sc->bnx_rx_ret_ring[0].bnx_rx_cntmax;
2157 	sc->bnx_tx_coal_bds = BNX_TX_COAL_BDS_DEF;
2158 	sc->bnx_tx_coal_bds_poll = BNX_TX_COAL_BDS_POLL_DEF;
2159 	sc->bnx_rx_coal_bds_int = BNX_RX_COAL_BDS_INT_DEF;
2160 	sc->bnx_tx_coal_bds_int = BNX_TX_COAL_BDS_INT_DEF;
2161 
2162 	/* Set up ifnet structure */
2163 	ifp->if_softc = sc;
2164 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2165 	ifp->if_ioctl = bnx_ioctl;
2166 	ifp->if_start = bnx_start;
2167 #ifdef IFPOLL_ENABLE
2168 	ifp->if_npoll = bnx_npoll;
2169 #endif
2170 	ifp->if_init = bnx_init;
2171 	ifp->if_serialize = bnx_serialize;
2172 	ifp->if_deserialize = bnx_deserialize;
2173 	ifp->if_tryserialize = bnx_tryserialize;
2174 #ifdef INVARIANTS
2175 	ifp->if_serialize_assert = bnx_serialize_assert;
2176 #endif
2177 	ifp->if_mtu = ETHERMTU;
2178 	ifp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2179 
2180 	ifp->if_capabilities |= IFCAP_HWCSUM;
2181 	ifp->if_hwassist = BNX_CSUM_FEATURES;
2182 	if (sc->bnx_flags & BNX_FLAG_TSO) {
2183 		ifp->if_capabilities |= IFCAP_TSO;
2184 		ifp->if_hwassist |= CSUM_TSO;
2185 	}
2186 	if (BNX_RSS_ENABLED(sc))
2187 		ifp->if_capabilities |= IFCAP_RSS;
2188 	ifp->if_capenable = ifp->if_capabilities;
2189 
2190 	ifp->if_nmbclusters = BGE_STD_RX_RING_CNT;
2191 
2192 	ifq_set_maxlen(&ifp->if_snd, BGE_TX_RING_CNT - 1);
2193 	ifq_set_ready(&ifp->if_snd);
2194 	ifq_set_subq_cnt(&ifp->if_snd, sc->bnx_tx_ringcnt);
2195 
2196 	if (sc->bnx_tx_ringcnt > 1) {
2197 		ifp->if_mapsubq = ifq_mapsubq_modulo;
2198 		ifq_set_subq_divisor(&ifp->if_snd, sc->bnx_tx_ringcnt);
2199 	}
2200 
2201 	/*
2202 	 * Figure out what sort of media we have by checking the
2203 	 * hardware config word in the first 32k of NIC internal memory,
2204 	 * or fall back to examining the EEPROM if necessary.
2205 	 * Note: on some BCM5700 cards, this value appears to be unset.
2206 	 * If that's the case, we have to rely on identifying the NIC
2207 	 * by its PCI subsystem ID, as we do below for the SysKonnect
2208 	 * SK-9D41.
2209 	 */
2210 	if (bnx_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) {
2211 		hwcfg = bnx_readmem_ind(sc, BGE_SRAM_DATA_CFG);
2212 	} else {
2213 		if (bnx_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET,
2214 				    sizeof(hwcfg))) {
2215 			device_printf(dev, "failed to read EEPROM\n");
2216 			error = ENXIO;
2217 			goto fail;
2218 		}
2219 		hwcfg = ntohl(hwcfg);
2220 	}
2221 
2222 	/* The SysKonnect SK-9D41 is a 1000baseSX card. */
2223 	if (pci_get_subvendor(dev) == PCI_PRODUCT_SCHNEIDERKOCH_SK_9D41 ||
2224 	    (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER)
2225 		sc->bnx_flags |= BNX_FLAG_TBI;
2226 
2227 	/* Setup MI MODE */
2228 	if (sc->bnx_flags & BNX_FLAG_CPMU)
2229 		sc->bnx_mi_mode = BGE_MIMODE_500KHZ_CONST;
2230 	else
2231 		sc->bnx_mi_mode = BGE_MIMODE_BASE;
2232 
2233 	/* Setup link status update stuffs */
2234 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2235 		sc->bnx_link_upd = bnx_tbi_link_upd;
2236 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2237 	} else if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
2238 		sc->bnx_link_upd = bnx_autopoll_link_upd;
2239 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2240 	} else {
2241 		sc->bnx_link_upd = bnx_copper_link_upd;
2242 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2243 	}
2244 
2245 	/* Set default PHY address */
2246 	sc->bnx_phyno = 1;
2247 
2248 	/*
2249 	 * PHY address mapping for various devices.
2250 	 *
2251 	 *          | F0 Cu | F0 Sr | F1 Cu | F1 Sr |
2252 	 * ---------+-------+-------+-------+-------+
2253 	 * BCM57XX  |   1   |   X   |   X   |   X   |
2254 	 * BCM5717  |   1   |   8   |   2   |   9   |
2255 	 * BCM5719  |   1   |   8   |   2   |   9   |
2256 	 * BCM5720  |   1   |   8   |   2   |   9   |
2257 	 *
2258 	 *          | F2 Cu | F2 Sr | F3 Cu | F3 Sr |
2259 	 * ---------+-------+-------+-------+-------+
2260 	 * BCM57XX  |   X   |   X   |   X   |   X   |
2261 	 * BCM5717  |   X   |   X   |   X   |   X   |
2262 	 * BCM5719  |   3   |   10  |   4   |   11  |
2263 	 * BCM5720  |   X   |   X   |   X   |   X   |
2264 	 *
2265 	 * Other addresses may respond but they are not
2266 	 * IEEE compliant PHYs and should be ignored.
2267 	 */
2268 	if (BNX_IS_5717_PLUS(sc)) {
2269 		if (sc->bnx_chipid == BGE_CHIPID_BCM5717_A0) {
2270 			if (CSR_READ_4(sc, BGE_SGDIG_STS) &
2271 			    BGE_SGDIGSTS_IS_SERDES)
2272 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2273 			else
2274 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2275 		} else {
2276 			if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) &
2277 			    BGE_CPMU_PHY_STRAP_IS_SERDES)
2278 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2279 			else
2280 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2281 		}
2282 	}
2283 
2284 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2285 		ifmedia_init(&sc->bnx_ifmedia, IFM_IMASK,
2286 		    bnx_ifmedia_upd, bnx_ifmedia_sts);
2287 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL);
2288 		ifmedia_add(&sc->bnx_ifmedia,
2289 		    IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL);
2290 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
2291 		ifmedia_set(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO);
2292 		sc->bnx_ifmedia.ifm_media = sc->bnx_ifmedia.ifm_cur->ifm_media;
2293 	} else {
2294 		struct mii_probe_args mii_args;
2295 
2296 		mii_probe_args_init(&mii_args, bnx_ifmedia_upd, bnx_ifmedia_sts);
2297 		mii_args.mii_probemask = 1 << sc->bnx_phyno;
2298 		mii_args.mii_capmask = capmask;
2299 		mii_args.mii_privtag = MII_PRIVTAG_BRGPHY;
2300 		mii_args.mii_priv = mii_priv;
2301 
2302 		error = mii_probe(dev, &sc->bnx_miibus, &mii_args);
2303 		if (error) {
2304 			device_printf(dev, "MII without any PHY!\n");
2305 			goto fail;
2306 		}
2307 	}
2308 
2309 	ctx = device_get_sysctl_ctx(sc->bnx_dev);
2310 	tree = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bnx_dev));
2311 
2312 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2313 	    "rx_rings", CTLFLAG_RD, &sc->bnx_rx_retcnt, 0, "# of RX rings");
2314 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2315 	    "tx_rings", CTLFLAG_RD, &sc->bnx_tx_ringcnt, 0, "# of TX rings");
2316 
2317 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_ticks",
2318 			CTLTYPE_INT | CTLFLAG_RW,
2319 			sc, 0, bnx_sysctl_rx_coal_ticks, "I",
2320 			"Receive coalescing ticks (usec).");
2321 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_ticks",
2322 			CTLTYPE_INT | CTLFLAG_RW,
2323 			sc, 0, bnx_sysctl_tx_coal_ticks, "I",
2324 			"Transmit coalescing ticks (usec).");
2325 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds",
2326 			CTLTYPE_INT | CTLFLAG_RW,
2327 			sc, 0, bnx_sysctl_rx_coal_bds, "I",
2328 			"Receive max coalesced BD count.");
2329 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds_poll",
2330 			CTLTYPE_INT | CTLFLAG_RW,
2331 			sc, 0, bnx_sysctl_rx_coal_bds_poll, "I",
2332 			"Receive max coalesced BD count in polling.");
2333 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds",
2334 			CTLTYPE_INT | CTLFLAG_RW,
2335 			sc, 0, bnx_sysctl_tx_coal_bds, "I",
2336 			"Transmit max coalesced BD count.");
2337 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds_poll",
2338 			CTLTYPE_INT | CTLFLAG_RW,
2339 			sc, 0, bnx_sysctl_tx_coal_bds_poll, "I",
2340 			"Transmit max coalesced BD count in polling.");
2341 	/*
2342 	 * A common design characteristic for many Broadcom
2343 	 * client controllers is that they only support a
2344 	 * single outstanding DMA read operation on the PCIe
2345 	 * bus. This means that it will take twice as long to
2346 	 * fetch a TX frame that is split into header and
2347 	 * payload buffers as it does to fetch a single,
2348 	 * contiguous TX frame (2 reads vs. 1 read). For these
2349 	 * controllers, coalescing buffers to reduce the number
2350 	 * of memory reads is effective way to get maximum
2351 	 * performance(about 940Mbps).  Without collapsing TX
2352 	 * buffers the maximum TCP bulk transfer performance
2353 	 * is about 850Mbps. However forcing coalescing mbufs
2354 	 * consumes a lot of CPU cycles, so leave it off by
2355 	 * default.
2356 	 */
2357 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2358 	    "force_defrag", CTLTYPE_INT | CTLFLAG_RW,
2359 	    sc, 0, bnx_sysctl_force_defrag, "I",
2360 	    "Force defragment on TX path");
2361 
2362 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2363 	    "tx_wreg", CTLTYPE_INT | CTLFLAG_RW,
2364 	    sc, 0, bnx_sysctl_tx_wreg, "I",
2365 	    "# of segments before writing to hardware register");
2366 
2367 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2368 	    "std_refill", CTLTYPE_INT | CTLFLAG_RW,
2369 	    sc, 0, bnx_sysctl_std_refill, "I",
2370 	    "# of packets received before scheduling standard refilling");
2371 
2372 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2373 	    "rx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2374 	    sc, 0, bnx_sysctl_rx_coal_bds_int, "I",
2375 	    "Receive max coalesced BD count during interrupt.");
2376 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2377 	    "tx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2378 	    sc, 0, bnx_sysctl_tx_coal_bds_int, "I",
2379 	    "Transmit max coalesced BD count during interrupt.");
2380 
2381 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSIX) {
2382 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_cpumap",
2383 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2384 		    sc->bnx_tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2385 		    "TX ring CPU map");
2386 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_cpumap",
2387 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2388 		    sc->bnx_rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2389 		    "RX ring CPU map");
2390 	} else {
2391 #ifdef IFPOLL_ENABLE
2392 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_poll_cpumap",
2393 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2394 		    sc->bnx_tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2395 		    "TX poll CPU map");
2396 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_poll_cpumap",
2397 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2398 		    sc->bnx_rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2399 		    "RX poll CPU map");
2400 #endif
2401 	}
2402 
2403 #ifdef BNX_RSS_DEBUG
2404 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2405 	    "std_refill_mask", CTLFLAG_RD,
2406 	    &sc->bnx_rx_std_ring.bnx_rx_std_refill, 0, "");
2407 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2408 	    "std_used", CTLFLAG_RD,
2409 	    &sc->bnx_rx_std_ring.bnx_rx_std_used, 0, "");
2410 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2411 	    "rss_debug", CTLFLAG_RW, &sc->bnx_rss_debug, 0, "");
2412 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
2413 		ksnprintf(desc, sizeof(desc), "rx_pkt%d", i);
2414 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2415 		    desc, CTLFLAG_RW, &sc->bnx_rx_ret_ring[i].bnx_rx_pkt, "");
2416 
2417 		ksnprintf(desc, sizeof(desc), "rx_force_sched%d", i);
2418 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2419 		    desc, CTLFLAG_RW,
2420 		    &sc->bnx_rx_ret_ring[i].bnx_rx_force_sched, "");
2421 	}
2422 #endif
2423 #ifdef BNX_TSS_DEBUG
2424 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2425 		ksnprintf(desc, sizeof(desc), "tx_pkt%d", i);
2426 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2427 		    desc, CTLFLAG_RW, &sc->bnx_tx_ring[i].bnx_tx_pkt, "");
2428 	}
2429 #endif
2430 
2431 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2432 	    "norxbds", CTLFLAG_RW, &sc->bnx_norxbds, "");
2433 
2434 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2435 	    "errors", CTLFLAG_RW, &sc->bnx_errors, "");
2436 
2437 #ifdef BNX_TSO_DEBUG
2438 	for (i = 0; i < BNX_TSO_NSTATS; ++i) {
2439 		ksnprintf(desc, sizeof(desc), "tso%d", i + 1);
2440 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2441 		    desc, CTLFLAG_RW, &sc->bnx_tsosegs[i], "");
2442 	}
2443 #endif
2444 
2445 	/*
2446 	 * Call MI attach routine.
2447 	 */
2448 	ether_ifattach(ifp, ether_addr, NULL);
2449 
2450 	/* Setup TX rings and subqueues */
2451 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2452 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
2453 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
2454 
2455 		ifsq_set_cpuid(ifsq, txr->bnx_tx_cpuid);
2456 		ifsq_set_hw_serialize(ifsq, &txr->bnx_tx_serialize);
2457 		ifsq_set_priv(ifsq, txr);
2458 		txr->bnx_ifsq = ifsq;
2459 
2460 		ifsq_watchdog_init(&txr->bnx_tx_watchdog, ifsq,
2461 				   bnx_watchdog, 0);
2462 
2463 		if (bootverbose) {
2464 			device_printf(dev, "txr %d -> cpu%d\n", i,
2465 			    txr->bnx_tx_cpuid);
2466 		}
2467 	}
2468 
2469 	error = bnx_setup_intr(sc);
2470 	if (error) {
2471 		ether_ifdetach(ifp);
2472 		goto fail;
2473 	}
2474 	bnx_set_tick_cpuid(sc, FALSE);
2475 
2476 	/*
2477 	 * Create RX standard ring refilling thread
2478 	 */
2479 	std_cpuid_def = if_ringmap_cpumap(sc->bnx_rx_rmap, 0);
2480 	std_cpuid = device_getenv_int(dev, "std.cpuid", std_cpuid_def);
2481 	if (std_cpuid < 0 || std_cpuid >= ncpus) {
2482 		device_printf(dev, "invalid std.cpuid %d, use %d\n",
2483 		    std_cpuid, std_cpuid_def);
2484 		std_cpuid = std_cpuid_def;
2485 	}
2486 
2487 	std = &sc->bnx_rx_std_ring;
2488 	lwkt_create(bnx_rx_std_refill_ithread, std, &std->bnx_rx_std_ithread,
2489 	    NULL, TDF_NOSTART | TDF_INTTHREAD, std_cpuid,
2490 	    "%s std", device_get_nameunit(dev));
2491 	lwkt_setpri(std->bnx_rx_std_ithread, TDPRI_INT_MED);
2492 	std->bnx_rx_std_ithread->td_preemptable = lwkt_preempt;
2493 
2494 	return(0);
2495 fail:
2496 	bnx_detach(dev);
2497 	return(error);
2498 }
2499 
2500 static int
2501 bnx_detach(device_t dev)
2502 {
2503 	struct bnx_softc *sc = device_get_softc(dev);
2504 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
2505 
2506 	if (device_is_attached(dev)) {
2507 		struct ifnet *ifp = &sc->arpcom.ac_if;
2508 
2509 		ifnet_serialize_all(ifp);
2510 		bnx_stop(sc);
2511 		bnx_teardown_intr(sc, sc->bnx_intr_cnt);
2512 		ifnet_deserialize_all(ifp);
2513 
2514 		ether_ifdetach(ifp);
2515 	}
2516 
2517 	if (std->bnx_rx_std_ithread != NULL) {
2518 		tsleep_interlock(std, 0);
2519 
2520 		if (std->bnx_rx_std_ithread->td_gd == mycpu) {
2521 			bnx_rx_std_refill_stop(std);
2522 		} else {
2523 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
2524 			    bnx_rx_std_refill_stop, std);
2525 		}
2526 
2527 		tsleep(std, PINTERLOCKED, "bnx_detach", 0);
2528 		if (bootverbose)
2529 			device_printf(dev, "RX std ithread exited\n");
2530 
2531 		lwkt_synchronize_ipiqs("bnx_detach_ipiq");
2532 	}
2533 
2534 	if (sc->bnx_flags & BNX_FLAG_TBI)
2535 		ifmedia_removeall(&sc->bnx_ifmedia);
2536 	if (sc->bnx_miibus)
2537 		device_delete_child(dev, sc->bnx_miibus);
2538 	bus_generic_detach(dev);
2539 
2540 	bnx_free_intr(sc);
2541 
2542 	if (sc->bnx_msix_mem_res != NULL) {
2543 		bus_release_resource(dev, SYS_RES_MEMORY, sc->bnx_msix_mem_rid,
2544 		    sc->bnx_msix_mem_res);
2545 	}
2546 	if (sc->bnx_res != NULL) {
2547 		bus_release_resource(dev, SYS_RES_MEMORY,
2548 		    BGE_PCI_BAR0, sc->bnx_res);
2549 	}
2550 	if (sc->bnx_res2 != NULL) {
2551 		bus_release_resource(dev, SYS_RES_MEMORY,
2552 		    PCIR_BAR(2), sc->bnx_res2);
2553 	}
2554 
2555 	bnx_dma_free(sc);
2556 
2557 	if (sc->bnx_serialize != NULL)
2558 		kfree(sc->bnx_serialize, M_DEVBUF);
2559 
2560 	if (sc->bnx_rx_rmap != NULL)
2561 		if_ringmap_free(sc->bnx_rx_rmap);
2562 	if (sc->bnx_tx_rmap != NULL)
2563 		if_ringmap_free(sc->bnx_tx_rmap);
2564 
2565 	return 0;
2566 }
2567 
2568 static void
2569 bnx_reset(struct bnx_softc *sc)
2570 {
2571 	device_t dev = sc->bnx_dev;
2572 	uint32_t cachesize, command, reset, mac_mode, mac_mode_mask;
2573 	void (*write_op)(struct bnx_softc *, uint32_t, uint32_t);
2574 	int i, val = 0;
2575 	uint16_t devctl;
2576 
2577 	mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE;
2578 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
2579 		mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
2580 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask;
2581 
2582 	write_op = bnx_writemem_direct;
2583 
2584 	CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1);
2585 	for (i = 0; i < 8000; i++) {
2586 		if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1)
2587 			break;
2588 		DELAY(20);
2589 	}
2590 	if (i == 8000)
2591 		if_printf(&sc->arpcom.ac_if, "NVRAM lock timedout!\n");
2592 
2593 	/* Take APE lock when performing reset. */
2594 	bnx_ape_lock(sc, BGE_APE_LOCK_GRC);
2595 
2596 	/* Save some important PCI state. */
2597 	cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4);
2598 	command = pci_read_config(dev, BGE_PCI_CMD, 4);
2599 
2600 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2601 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2602 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2603 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2604 
2605 	/* Disable fastboot on controllers that support it. */
2606 	if (bootverbose)
2607 		if_printf(&sc->arpcom.ac_if, "Disabling fastboot\n");
2608 	CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0);
2609 
2610 	/*
2611 	 * Write the magic number to SRAM at offset 0xB50.
2612 	 * When firmware finishes its initialization it will
2613 	 * write ~BGE_SRAM_FW_MB_MAGIC to the same location.
2614 	 */
2615 	bnx_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC);
2616 
2617 	reset = BGE_MISCCFG_RESET_CORE_CLOCKS|(65<<1);
2618 
2619 	/* XXX: Broadcom Linux driver. */
2620 	/* Force PCI-E 1.0a mode */
2621 	if (!BNX_IS_57765_PLUS(sc) &&
2622 	    CSR_READ_4(sc, BGE_PCIE_PHY_TSTCTL) ==
2623 	    (BGE_PCIE_PHY_TSTCTL_PSCRAM |
2624 	     BGE_PCIE_PHY_TSTCTL_PCIE10)) {
2625 		CSR_WRITE_4(sc, BGE_PCIE_PHY_TSTCTL,
2626 		    BGE_PCIE_PHY_TSTCTL_PSCRAM);
2627 	}
2628 	if (sc->bnx_chipid != BGE_CHIPID_BCM5750_A0) {
2629 		/* Prevent PCIE link training during global reset */
2630 		CSR_WRITE_4(sc, BGE_MISC_CFG, (1<<29));
2631 		reset |= (1<<29);
2632 	}
2633 
2634 	/*
2635 	 * Set GPHY Power Down Override to leave GPHY
2636 	 * powered up in D0 uninitialized.
2637 	 */
2638 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0)
2639 		reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE;
2640 
2641 	/* Issue global reset */
2642 	write_op(sc, BGE_MISC_CFG, reset);
2643 
2644 	DELAY(100 * 1000);
2645 
2646 	/* XXX: Broadcom Linux driver. */
2647 	if (sc->bnx_chipid == BGE_CHIPID_BCM5750_A0) {
2648 		uint32_t v;
2649 
2650 		DELAY(500000); /* wait for link training to complete */
2651 		v = pci_read_config(dev, 0xc4, 4);
2652 		pci_write_config(dev, 0xc4, v | (1<<15), 4);
2653 	}
2654 
2655 	devctl = pci_read_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL, 2);
2656 
2657 	/* Disable no snoop and disable relaxed ordering. */
2658 	devctl &= ~(PCIEM_DEVCTL_RELAX_ORDER | PCIEM_DEVCTL_NOSNOOP);
2659 
2660 	/* Old PCI-E chips only support 128 bytes Max PayLoad Size. */
2661 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0) {
2662 		devctl &= ~PCIEM_DEVCTL_MAX_PAYLOAD_MASK;
2663 		devctl |= PCIEM_DEVCTL_MAX_PAYLOAD_128;
2664 	}
2665 
2666 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL,
2667 	    devctl, 2);
2668 
2669 	/* Clear error status. */
2670 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVSTS,
2671 	    PCIEM_DEVSTS_CORR_ERR |
2672 	    PCIEM_DEVSTS_NFATAL_ERR |
2673 	    PCIEM_DEVSTS_FATAL_ERR |
2674 	    PCIEM_DEVSTS_UNSUPP_REQ, 2);
2675 
2676 	/* Reset some of the PCI state that got zapped by reset */
2677 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2678 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2679 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2680 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2681 	val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE;
2682 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE) {
2683 		val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2684 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2685 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2686 	}
2687 	pci_write_config(dev, BGE_PCI_PCISTATE, val, 4);
2688 	pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4);
2689 	pci_write_config(dev, BGE_PCI_CMD, command, 4);
2690 
2691 	/* Enable memory arbiter */
2692 	CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE);
2693 
2694 	/* Fix up byte swapping */
2695 	CSR_WRITE_4(sc, BGE_MODE_CTL, bnx_dma_swap_options(sc));
2696 
2697 	val = CSR_READ_4(sc, BGE_MAC_MODE);
2698 	val = (val & ~mac_mode_mask) | mac_mode;
2699 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
2700 	DELAY(40);
2701 
2702 	bnx_ape_unlock(sc, BGE_APE_LOCK_GRC);
2703 
2704 	/*
2705 	 * Poll until we see the 1's complement of the magic number.
2706 	 * This indicates that the firmware initialization is complete.
2707 	 */
2708 	for (i = 0; i < BNX_FIRMWARE_TIMEOUT; i++) {
2709 		val = bnx_readmem_ind(sc, BGE_SRAM_FW_MB);
2710 		if (val == ~BGE_SRAM_FW_MB_MAGIC)
2711 			break;
2712 		DELAY(10);
2713 	}
2714 	if (i == BNX_FIRMWARE_TIMEOUT) {
2715 		if_printf(&sc->arpcom.ac_if, "firmware handshake "
2716 			  "timed out, found 0x%08x\n", val);
2717 	}
2718 
2719 	/* BCM57765 A0 needs additional time before accessing. */
2720 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
2721 		DELAY(10 * 1000);
2722 
2723 	/*
2724 	 * The 5704 in TBI mode apparently needs some special
2725 	 * adjustment to insure the SERDES drive level is set
2726 	 * to 1.2V.
2727 	 */
2728 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5704 &&
2729 	    (sc->bnx_flags & BNX_FLAG_TBI)) {
2730 		uint32_t serdescfg;
2731 
2732 		serdescfg = CSR_READ_4(sc, BGE_SERDES_CFG);
2733 		serdescfg = (serdescfg & ~0xFFF) | 0x880;
2734 		CSR_WRITE_4(sc, BGE_SERDES_CFG, serdescfg);
2735 	}
2736 
2737 	CSR_WRITE_4(sc, BGE_MI_MODE,
2738 	    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
2739 	DELAY(80);
2740 
2741 	/* XXX: Broadcom Linux driver. */
2742 	if (!BNX_IS_57765_PLUS(sc)) {
2743 		uint32_t v;
2744 
2745 		/* Enable Data FIFO protection. */
2746 		v = CSR_READ_4(sc, BGE_PCIE_TLDLPL_PORT);
2747 		CSR_WRITE_4(sc, BGE_PCIE_TLDLPL_PORT, v | (1 << 25));
2748 	}
2749 
2750 	DELAY(10000);
2751 
2752 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
2753 		BNX_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE,
2754 		    CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
2755 	}
2756 }
2757 
2758 /*
2759  * Frame reception handling. This is called if there's a frame
2760  * on the receive return list.
2761  *
2762  * Note: we have to be able to handle two possibilities here:
2763  * 1) the frame is from the jumbo recieve ring
2764  * 2) the frame is from the standard receive ring
2765  */
2766 
2767 static void
2768 bnx_rxeof(struct bnx_rx_ret_ring *ret, uint16_t rx_prod, int count)
2769 {
2770 	struct bnx_softc *sc = ret->bnx_sc;
2771 	struct bnx_rx_std_ring *std = ret->bnx_std;
2772 	struct ifnet *ifp = &sc->arpcom.ac_if;
2773 	int std_used = 0, cpuid = mycpuid;
2774 
2775 	while (ret->bnx_rx_saved_considx != rx_prod && count != 0) {
2776 		struct pktinfo pi0, *pi = NULL;
2777 		struct bge_rx_bd *cur_rx;
2778 		struct bnx_rx_buf *rb;
2779 		uint32_t rxidx;
2780 		struct mbuf *m = NULL;
2781 		uint16_t vlan_tag = 0;
2782 		int have_tag = 0;
2783 
2784 		--count;
2785 
2786 		cur_rx = &ret->bnx_rx_ret_ring[ret->bnx_rx_saved_considx];
2787 
2788 		rxidx = cur_rx->bge_idx;
2789 		KKASSERT(rxidx < BGE_STD_RX_RING_CNT);
2790 
2791 		BNX_INC(ret->bnx_rx_saved_considx, BNX_RETURN_RING_CNT);
2792 #ifdef BNX_RSS_DEBUG
2793 		ret->bnx_rx_pkt++;
2794 #endif
2795 
2796 		if (cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) {
2797 			have_tag = 1;
2798 			vlan_tag = cur_rx->bge_vlan_tag;
2799 		}
2800 
2801 		if (ret->bnx_rx_cnt >= ret->bnx_rx_cntmax) {
2802 			atomic_add_int(&std->bnx_rx_std_used, std_used);
2803 			std_used = 0;
2804 
2805 			bnx_rx_std_refill_sched(ret, std);
2806 		}
2807 		ret->bnx_rx_cnt++;
2808 		++std_used;
2809 
2810 		rb = &std->bnx_rx_std_buf[rxidx];
2811 		m = rb->bnx_rx_mbuf;
2812 		if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
2813 			IFNET_STAT_INC(ifp, ierrors, 1);
2814 			cpu_sfence();
2815 			rb->bnx_rx_refilled = 1;
2816 			continue;
2817 		}
2818 		if (bnx_newbuf_std(ret, rxidx, 0)) {
2819 			IFNET_STAT_INC(ifp, ierrors, 1);
2820 			continue;
2821 		}
2822 
2823 		IFNET_STAT_INC(ifp, ipackets, 1);
2824 		m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
2825 		m->m_pkthdr.rcvif = ifp;
2826 
2827 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2828 		    (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) {
2829 			if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) {
2830 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2831 				if ((cur_rx->bge_error_flag &
2832 				    BGE_RXERRFLAG_IP_CSUM_NOK) == 0)
2833 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2834 			}
2835 			if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) {
2836 				m->m_pkthdr.csum_data =
2837 				    cur_rx->bge_tcp_udp_csum;
2838 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2839 				    CSUM_PSEUDO_HDR;
2840 			}
2841 		}
2842 		if (ifp->if_capenable & IFCAP_RSS) {
2843 			pi = bnx_rss_info(&pi0, cur_rx);
2844 			if (pi != NULL &&
2845 			    (cur_rx->bge_flags & BGE_RXBDFLAG_RSS_HASH))
2846 				m_sethash(m, toeplitz_hash(cur_rx->bge_hash));
2847 		}
2848 
2849 		/*
2850 		 * If we received a packet with a vlan tag, pass it
2851 		 * to vlan_input() instead of ether_input().
2852 		 */
2853 		if (have_tag) {
2854 			m->m_flags |= M_VLANTAG;
2855 			m->m_pkthdr.ether_vlantag = vlan_tag;
2856 		}
2857 		ifp->if_input(ifp, m, pi, cpuid);
2858 	}
2859 	bnx_writembx(sc, ret->bnx_rx_mbx, ret->bnx_rx_saved_considx);
2860 
2861 	if (std_used > 0) {
2862 		int cur_std_used;
2863 
2864 		cur_std_used = atomic_fetchadd_int(&std->bnx_rx_std_used,
2865 		    std_used);
2866 		if (cur_std_used + std_used >= (BGE_STD_RX_RING_CNT / 2)) {
2867 #ifdef BNX_RSS_DEBUG
2868 			ret->bnx_rx_force_sched++;
2869 #endif
2870 			bnx_rx_std_refill_sched(ret, std);
2871 		}
2872 	}
2873 }
2874 
2875 static void
2876 bnx_txeof(struct bnx_tx_ring *txr, uint16_t tx_cons)
2877 {
2878 	struct ifnet *ifp = &txr->bnx_sc->arpcom.ac_if;
2879 
2880 	/*
2881 	 * Go through our tx ring and free mbufs for those
2882 	 * frames that have been sent.
2883 	 */
2884 	while (txr->bnx_tx_saved_considx != tx_cons) {
2885 		struct bnx_tx_buf *buf;
2886 		uint32_t idx = 0;
2887 
2888 		idx = txr->bnx_tx_saved_considx;
2889 		buf = &txr->bnx_tx_buf[idx];
2890 		if (buf->bnx_tx_mbuf != NULL) {
2891 			IFNET_STAT_INC(ifp, opackets, 1);
2892 #ifdef BNX_TSS_DEBUG
2893 			txr->bnx_tx_pkt++;
2894 #endif
2895 			bus_dmamap_unload(txr->bnx_tx_mtag,
2896 			    buf->bnx_tx_dmamap);
2897 			m_freem(buf->bnx_tx_mbuf);
2898 			buf->bnx_tx_mbuf = NULL;
2899 		}
2900 		txr->bnx_tx_cnt--;
2901 		BNX_INC(txr->bnx_tx_saved_considx, BGE_TX_RING_CNT);
2902 	}
2903 
2904 	if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) >=
2905 	    (BNX_NSEG_RSVD + BNX_NSEG_SPARE))
2906 		ifsq_clr_oactive(txr->bnx_ifsq);
2907 
2908 	if (txr->bnx_tx_cnt == 0)
2909 		ifsq_watchdog_set_count(&txr->bnx_tx_watchdog, 0);
2910 
2911 	if (!ifsq_is_empty(txr->bnx_ifsq))
2912 		ifsq_devstart(txr->bnx_ifsq);
2913 }
2914 
2915 static int
2916 bnx_handle_status(struct bnx_softc *sc)
2917 {
2918 	uint32_t status;
2919 	int handle = 0;
2920 
2921 	status = *sc->bnx_hw_status;
2922 
2923 	if (status & BGE_STATFLAG_ERROR) {
2924 		uint32_t val;
2925 		int reset = 0;
2926 
2927 		sc->bnx_errors++;
2928 
2929 		val = CSR_READ_4(sc, BGE_FLOW_ATTN);
2930 		if (val & ~BGE_FLOWATTN_MB_LOWAT) {
2931 			if_printf(&sc->arpcom.ac_if,
2932 			    "flow attn 0x%08x\n", val);
2933 			reset = 1;
2934 		}
2935 
2936 		val = CSR_READ_4(sc, BGE_MSI_STATUS);
2937 		if (val & ~BGE_MSISTAT_MSI_PCI_REQ) {
2938 			if_printf(&sc->arpcom.ac_if,
2939 			    "msi status 0x%08x\n", val);
2940 			reset = 1;
2941 		}
2942 
2943 		val = CSR_READ_4(sc, BGE_RDMA_STATUS);
2944 		if (val) {
2945 			if_printf(&sc->arpcom.ac_if,
2946 			    "rmda status 0x%08x\n", val);
2947 			reset = 1;
2948 		}
2949 
2950 		val = CSR_READ_4(sc, BGE_WDMA_STATUS);
2951 		if (val) {
2952 			if_printf(&sc->arpcom.ac_if,
2953 			    "wdma status 0x%08x\n", val);
2954 			reset = 1;
2955 		}
2956 
2957 		if (reset) {
2958 			bnx_serialize_skipmain(sc);
2959 			bnx_init(sc);
2960 			bnx_deserialize_skipmain(sc);
2961 		}
2962 		handle = 1;
2963 	}
2964 
2965 	if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) || sc->bnx_link_evt) {
2966 		if (bootverbose) {
2967 			if_printf(&sc->arpcom.ac_if, "link change, "
2968 			    "link_evt %d\n", sc->bnx_link_evt);
2969 		}
2970 		bnx_link_poll(sc);
2971 		handle = 1;
2972 	}
2973 
2974 	return handle;
2975 }
2976 
2977 #ifdef IFPOLL_ENABLE
2978 
2979 static void
2980 bnx_npoll_rx(struct ifnet *ifp __unused, void *xret, int cycle)
2981 {
2982 	struct bnx_rx_ret_ring *ret = xret;
2983 	uint16_t rx_prod;
2984 
2985 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
2986 
2987 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
2988 	cpu_lfence();
2989 
2990 	rx_prod = *ret->bnx_rx_considx;
2991 	if (ret->bnx_rx_saved_considx != rx_prod)
2992 		bnx_rxeof(ret, rx_prod, cycle);
2993 }
2994 
2995 static void
2996 bnx_npoll_tx_notag(struct ifnet *ifp __unused, void *xtxr, int cycle __unused)
2997 {
2998 	struct bnx_tx_ring *txr = xtxr;
2999 	uint16_t tx_cons;
3000 
3001 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3002 
3003 	tx_cons = *txr->bnx_tx_considx;
3004 	if (txr->bnx_tx_saved_considx != tx_cons)
3005 		bnx_txeof(txr, tx_cons);
3006 }
3007 
3008 static void
3009 bnx_npoll_tx(struct ifnet *ifp, void *xtxr, int cycle)
3010 {
3011 	struct bnx_tx_ring *txr = xtxr;
3012 
3013 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3014 
3015 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3016 	cpu_lfence();
3017 	bnx_npoll_tx_notag(ifp, txr, cycle);
3018 }
3019 
3020 static void
3021 bnx_npoll_status_notag(struct ifnet *ifp)
3022 {
3023 	struct bnx_softc *sc = ifp->if_softc;
3024 
3025 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3026 
3027 	if (bnx_handle_status(sc)) {
3028 		/*
3029 		 * Status changes are handled; force the chip to
3030 		 * update the status block to reflect whether there
3031 		 * are more status changes or not, else staled status
3032 		 * changes are always seen.
3033 		 */
3034 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3035 	}
3036 }
3037 
3038 static void
3039 bnx_npoll_status(struct ifnet *ifp)
3040 {
3041 	struct bnx_softc *sc = ifp->if_softc;
3042 
3043 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3044 
3045 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3046 	cpu_lfence();
3047 	bnx_npoll_status_notag(ifp);
3048 }
3049 
3050 static void
3051 bnx_npoll(struct ifnet *ifp, struct ifpoll_info *info)
3052 {
3053 	struct bnx_softc *sc = ifp->if_softc;
3054 	int i;
3055 
3056 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3057 
3058 	if (info != NULL) {
3059 		if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG)
3060 			info->ifpi_status.status_func = bnx_npoll_status;
3061 		else
3062 			info->ifpi_status.status_func = bnx_npoll_status_notag;
3063 		info->ifpi_status.serializer = &sc->bnx_main_serialize;
3064 
3065 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3066 			struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3067 			int cpu = if_ringmap_cpumap(sc->bnx_tx_rmap, i);
3068 
3069 			KKASSERT(cpu < netisr_ncpus);
3070 			if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
3071 				info->ifpi_tx[cpu].poll_func =
3072 				    bnx_npoll_tx_notag;
3073 			} else {
3074 				info->ifpi_tx[cpu].poll_func = bnx_npoll_tx;
3075 			}
3076 			info->ifpi_tx[cpu].arg = txr;
3077 			info->ifpi_tx[cpu].serializer = &txr->bnx_tx_serialize;
3078 			ifsq_set_cpuid(txr->bnx_ifsq, cpu);
3079 		}
3080 
3081 		for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3082 			struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3083 			int cpu = if_ringmap_cpumap(sc->bnx_rx_rmap, i);
3084 
3085 			KKASSERT(cpu < netisr_ncpus);
3086 			info->ifpi_rx[cpu].poll_func = bnx_npoll_rx;
3087 			info->ifpi_rx[cpu].arg = ret;
3088 			info->ifpi_rx[cpu].serializer =
3089 			    &ret->bnx_rx_ret_serialize;
3090 		}
3091 
3092 		if (ifp->if_flags & IFF_RUNNING) {
3093 			bnx_disable_intr(sc);
3094 			bnx_set_tick_cpuid(sc, TRUE);
3095 
3096 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3097 			    BNX_RX_COAL_BDS_CHG;
3098 			bnx_coal_change(sc);
3099 		}
3100 	} else {
3101 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3102 			ifsq_set_cpuid(sc->bnx_tx_ring[i].bnx_ifsq,
3103 			    sc->bnx_tx_ring[i].bnx_tx_cpuid);
3104 		}
3105 		if (ifp->if_flags & IFF_RUNNING) {
3106 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3107 			    BNX_RX_COAL_BDS_CHG;
3108 			bnx_coal_change(sc);
3109 
3110 			bnx_enable_intr(sc);
3111 			bnx_set_tick_cpuid(sc, FALSE);
3112 		}
3113 	}
3114 }
3115 
3116 #endif	/* IFPOLL_ENABLE */
3117 
3118 static void
3119 bnx_intr_legacy(void *xsc)
3120 {
3121 	struct bnx_softc *sc = xsc;
3122 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3123 
3124 	if (ret->bnx_saved_status_tag == *ret->bnx_hw_status_tag) {
3125 		uint32_t val;
3126 
3127 		val = pci_read_config(sc->bnx_dev, BGE_PCI_PCISTATE, 4);
3128 		if (val & BGE_PCISTAT_INTR_NOTACT)
3129 			return;
3130 	}
3131 
3132 	/*
3133 	 * NOTE:
3134 	 * Interrupt will have to be disabled if tagged status
3135 	 * is used, else interrupt will always be asserted on
3136 	 * certain chips (at least on BCM5750 AX/BX).
3137 	 */
3138 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, 1);
3139 
3140 	bnx_intr(sc);
3141 }
3142 
3143 static void
3144 bnx_msi(void *xsc)
3145 {
3146 	bnx_intr(xsc);
3147 }
3148 
3149 static void
3150 bnx_intr(struct bnx_softc *sc)
3151 {
3152 	struct ifnet *ifp = &sc->arpcom.ac_if;
3153 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3154 
3155 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3156 
3157 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3158 	/*
3159 	 * Use a load fence to ensure that status_tag is saved
3160 	 * before rx_prod, tx_cons and status.
3161 	 */
3162 	cpu_lfence();
3163 
3164 	bnx_handle_status(sc);
3165 
3166 	if (ifp->if_flags & IFF_RUNNING) {
3167 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
3168 		uint16_t rx_prod, tx_cons;
3169 
3170 		lwkt_serialize_enter(&ret->bnx_rx_ret_serialize);
3171 		rx_prod = *ret->bnx_rx_considx;
3172 		if (ret->bnx_rx_saved_considx != rx_prod)
3173 			bnx_rxeof(ret, rx_prod, -1);
3174 		lwkt_serialize_exit(&ret->bnx_rx_ret_serialize);
3175 
3176 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3177 		tx_cons = *txr->bnx_tx_considx;
3178 		if (txr->bnx_tx_saved_considx != tx_cons)
3179 			bnx_txeof(txr, tx_cons);
3180 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3181 	}
3182 
3183 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, ret->bnx_saved_status_tag << 24);
3184 }
3185 
3186 static void
3187 bnx_msix_tx_status(void *xtxr)
3188 {
3189 	struct bnx_tx_ring *txr = xtxr;
3190 	struct bnx_softc *sc = txr->bnx_sc;
3191 	struct ifnet *ifp = &sc->arpcom.ac_if;
3192 
3193 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3194 
3195 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3196 	/*
3197 	 * Use a load fence to ensure that status_tag is saved
3198 	 * before tx_cons and status.
3199 	 */
3200 	cpu_lfence();
3201 
3202 	bnx_handle_status(sc);
3203 
3204 	if (ifp->if_flags & IFF_RUNNING) {
3205 		uint16_t tx_cons;
3206 
3207 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3208 		tx_cons = *txr->bnx_tx_considx;
3209 		if (txr->bnx_tx_saved_considx != tx_cons)
3210 			bnx_txeof(txr, tx_cons);
3211 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3212 	}
3213 
3214 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, txr->bnx_saved_status_tag << 24);
3215 }
3216 
3217 static void
3218 bnx_msix_rx(void *xret)
3219 {
3220 	struct bnx_rx_ret_ring *ret = xret;
3221 	uint16_t rx_prod;
3222 
3223 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3224 
3225 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3226 	/*
3227 	 * Use a load fence to ensure that status_tag is saved
3228 	 * before rx_prod.
3229 	 */
3230 	cpu_lfence();
3231 
3232 	rx_prod = *ret->bnx_rx_considx;
3233 	if (ret->bnx_rx_saved_considx != rx_prod)
3234 		bnx_rxeof(ret, rx_prod, -1);
3235 
3236 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3237 	    ret->bnx_saved_status_tag << 24);
3238 }
3239 
3240 static void
3241 bnx_msix_rxtx(void *xret)
3242 {
3243 	struct bnx_rx_ret_ring *ret = xret;
3244 	struct bnx_tx_ring *txr = ret->bnx_txr;
3245 	uint16_t rx_prod, tx_cons;
3246 
3247 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3248 
3249 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3250 	/*
3251 	 * Use a load fence to ensure that status_tag is saved
3252 	 * before rx_prod and tx_cons.
3253 	 */
3254 	cpu_lfence();
3255 
3256 	rx_prod = *ret->bnx_rx_considx;
3257 	if (ret->bnx_rx_saved_considx != rx_prod)
3258 		bnx_rxeof(ret, rx_prod, -1);
3259 
3260 	lwkt_serialize_enter(&txr->bnx_tx_serialize);
3261 	tx_cons = *txr->bnx_tx_considx;
3262 	if (txr->bnx_tx_saved_considx != tx_cons)
3263 		bnx_txeof(txr, tx_cons);
3264 	lwkt_serialize_exit(&txr->bnx_tx_serialize);
3265 
3266 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3267 	    ret->bnx_saved_status_tag << 24);
3268 }
3269 
3270 static void
3271 bnx_msix_status(void *xsc)
3272 {
3273 	struct bnx_softc *sc = xsc;
3274 
3275 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3276 
3277 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3278 	/*
3279 	 * Use a load fence to ensure that status_tag is saved
3280 	 * before status.
3281 	 */
3282 	cpu_lfence();
3283 
3284 	bnx_handle_status(sc);
3285 
3286 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, sc->bnx_saved_status_tag << 24);
3287 }
3288 
3289 static void
3290 bnx_tick(void *xsc)
3291 {
3292 	struct bnx_softc *sc = xsc;
3293 
3294 	lwkt_serialize_enter(&sc->bnx_main_serialize);
3295 
3296 	bnx_stats_update_regs(sc);
3297 
3298 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3299 		/*
3300 		 * Since in TBI mode auto-polling can't be used we should poll
3301 		 * link status manually. Here we register pending link event
3302 		 * and trigger interrupt.
3303 		 */
3304 		sc->bnx_link_evt++;
3305 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3306 	} else if (!sc->bnx_link) {
3307 		mii_tick(device_get_softc(sc->bnx_miibus));
3308 	}
3309 
3310 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3311 	    sc->bnx_tick_cpuid);
3312 
3313 	lwkt_serialize_exit(&sc->bnx_main_serialize);
3314 }
3315 
3316 static void
3317 bnx_stats_update_regs(struct bnx_softc *sc)
3318 {
3319 	struct ifnet *ifp = &sc->arpcom.ac_if;
3320 	struct bge_mac_stats_regs stats;
3321 	uint32_t *s, val;
3322 	int i;
3323 
3324 	s = (uint32_t *)&stats;
3325 	for (i = 0; i < sizeof(struct bge_mac_stats_regs); i += 4) {
3326 		*s = CSR_READ_4(sc, BGE_RX_STATS + i);
3327 		s++;
3328 	}
3329 
3330 	IFNET_STAT_SET(ifp, collisions,
3331 	   (stats.dot3StatsSingleCollisionFrames +
3332 	   stats.dot3StatsMultipleCollisionFrames +
3333 	   stats.dot3StatsExcessiveCollisions +
3334 	   stats.dot3StatsLateCollisions));
3335 
3336 	val = CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS);
3337 	sc->bnx_norxbds += val;
3338 
3339 	if (sc->bnx_rdma_wa != 0) {
3340 		if (stats.ifHCOutUcastPkts + stats.ifHCOutMulticastPkts +
3341 		    stats.ifHCOutBroadcastPkts > BGE_RDMA_NCHAN) {
3342 			CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
3343 			    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) &
3344 			    ~sc->bnx_rdma_wa);
3345 			sc->bnx_rdma_wa = 0;
3346 			if (bootverbose)
3347 				if_printf(ifp, "disable RDMA WA\n");
3348 		}
3349 	}
3350 }
3351 
3352 /*
3353  * Encapsulate an mbuf chain in the tx ring  by coupling the mbuf data
3354  * pointers to descriptors.
3355  */
3356 static int
3357 bnx_encap(struct bnx_tx_ring *txr, struct mbuf **m_head0, uint32_t *txidx,
3358     int *segs_used)
3359 {
3360 	struct bge_tx_bd *d = NULL;
3361 	uint16_t csum_flags = 0, vlan_tag = 0, mss = 0;
3362 	bus_dma_segment_t segs[BNX_NSEG_NEW];
3363 	bus_dmamap_t map;
3364 	int error, maxsegs, nsegs, idx, i;
3365 	struct mbuf *m_head = *m_head0, *m_new;
3366 
3367 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
3368 #ifdef BNX_TSO_DEBUG
3369 		int tso_nsegs;
3370 #endif
3371 
3372 		error = bnx_setup_tso(txr, m_head0, &mss, &csum_flags);
3373 		if (error)
3374 			return error;
3375 		m_head = *m_head0;
3376 
3377 #ifdef BNX_TSO_DEBUG
3378 		tso_nsegs = (m_head->m_pkthdr.len /
3379 		    m_head->m_pkthdr.tso_segsz) - 1;
3380 		if (tso_nsegs > (BNX_TSO_NSTATS - 1))
3381 			tso_nsegs = BNX_TSO_NSTATS - 1;
3382 		else if (tso_nsegs < 0)
3383 			tso_nsegs = 0;
3384 		txr->bnx_sc->bnx_tsosegs[tso_nsegs]++;
3385 #endif
3386 	} else if (m_head->m_pkthdr.csum_flags & BNX_CSUM_FEATURES) {
3387 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
3388 			csum_flags |= BGE_TXBDFLAG_IP_CSUM;
3389 		if (m_head->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
3390 			csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM;
3391 		if (m_head->m_flags & M_LASTFRAG)
3392 			csum_flags |= BGE_TXBDFLAG_IP_FRAG_END;
3393 		else if (m_head->m_flags & M_FRAG)
3394 			csum_flags |= BGE_TXBDFLAG_IP_FRAG;
3395 	}
3396 	if (m_head->m_flags & M_VLANTAG) {
3397 		csum_flags |= BGE_TXBDFLAG_VLAN_TAG;
3398 		vlan_tag = m_head->m_pkthdr.ether_vlantag;
3399 	}
3400 
3401 	idx = *txidx;
3402 	map = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3403 
3404 	maxsegs = (BGE_TX_RING_CNT - txr->bnx_tx_cnt) - BNX_NSEG_RSVD;
3405 	KASSERT(maxsegs >= BNX_NSEG_SPARE,
3406 		("not enough segments %d", maxsegs));
3407 
3408 	if (maxsegs > BNX_NSEG_NEW)
3409 		maxsegs = BNX_NSEG_NEW;
3410 
3411 	/*
3412 	 * Pad outbound frame to BNX_MIN_FRAMELEN for an unusual reason.
3413 	 * The bge hardware will pad out Tx runts to BNX_MIN_FRAMELEN,
3414 	 * but when such padded frames employ the bge IP/TCP checksum
3415 	 * offload, the hardware checksum assist gives incorrect results
3416 	 * (possibly from incorporating its own padding into the UDP/TCP
3417 	 * checksum; who knows).  If we pad such runts with zeros, the
3418 	 * onboard checksum comes out correct.
3419 	 */
3420 	if ((csum_flags & BGE_TXBDFLAG_TCP_UDP_CSUM) &&
3421 	    m_head->m_pkthdr.len < BNX_MIN_FRAMELEN) {
3422 		error = m_devpad(m_head, BNX_MIN_FRAMELEN);
3423 		if (error)
3424 			goto back;
3425 	}
3426 
3427 	if ((txr->bnx_tx_flags & BNX_TX_FLAG_SHORTDMA) &&
3428 	    m_head->m_next != NULL) {
3429 		m_new = bnx_defrag_shortdma(m_head);
3430 		if (m_new == NULL) {
3431 			error = ENOBUFS;
3432 			goto back;
3433 		}
3434 		*m_head0 = m_head = m_new;
3435 	}
3436 	if ((m_head->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
3437 	    (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG) &&
3438 	    m_head->m_next != NULL) {
3439 		/*
3440 		 * Forcefully defragment mbuf chain to overcome hardware
3441 		 * limitation which only support a single outstanding
3442 		 * DMA read operation.  If it fails, keep moving on using
3443 		 * the original mbuf chain.
3444 		 */
3445 		m_new = m_defrag(m_head, M_NOWAIT);
3446 		if (m_new != NULL)
3447 			*m_head0 = m_head = m_new;
3448 	}
3449 
3450 	error = bus_dmamap_load_mbuf_defrag(txr->bnx_tx_mtag, map,
3451 	    m_head0, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
3452 	if (error)
3453 		goto back;
3454 	*segs_used += nsegs;
3455 
3456 	m_head = *m_head0;
3457 	bus_dmamap_sync(txr->bnx_tx_mtag, map, BUS_DMASYNC_PREWRITE);
3458 
3459 	for (i = 0; ; i++) {
3460 		d = &txr->bnx_tx_ring[idx];
3461 
3462 		d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr);
3463 		d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr);
3464 		d->bge_len = segs[i].ds_len;
3465 		d->bge_flags = csum_flags;
3466 		d->bge_vlan_tag = vlan_tag;
3467 		d->bge_mss = mss;
3468 
3469 		if (i == nsegs - 1)
3470 			break;
3471 		BNX_INC(idx, BGE_TX_RING_CNT);
3472 	}
3473 	/* Mark the last segment as end of packet... */
3474 	d->bge_flags |= BGE_TXBDFLAG_END;
3475 
3476 	/*
3477 	 * Insure that the map for this transmission is placed at
3478 	 * the array index of the last descriptor in this chain.
3479 	 */
3480 	txr->bnx_tx_buf[*txidx].bnx_tx_dmamap = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3481 	txr->bnx_tx_buf[idx].bnx_tx_dmamap = map;
3482 	txr->bnx_tx_buf[idx].bnx_tx_mbuf = m_head;
3483 	txr->bnx_tx_cnt += nsegs;
3484 
3485 	BNX_INC(idx, BGE_TX_RING_CNT);
3486 	*txidx = idx;
3487 back:
3488 	if (error) {
3489 		m_freem(*m_head0);
3490 		*m_head0 = NULL;
3491 	}
3492 	return error;
3493 }
3494 
3495 /*
3496  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
3497  * to the mbuf data regions directly in the transmit descriptors.
3498  */
3499 static void
3500 bnx_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3501 {
3502 	struct bnx_tx_ring *txr = ifsq_get_priv(ifsq);
3503 	struct mbuf *m_head = NULL;
3504 	uint32_t prodidx;
3505 	int nsegs = 0;
3506 
3507 	KKASSERT(txr->bnx_ifsq == ifsq);
3508 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3509 
3510 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
3511 		return;
3512 
3513 	prodidx = txr->bnx_tx_prodidx;
3514 
3515 	while (txr->bnx_tx_buf[prodidx].bnx_tx_mbuf == NULL) {
3516 		/*
3517 		 * Sanity check: avoid coming within BGE_NSEG_RSVD
3518 		 * descriptors of the end of the ring.  Also make
3519 		 * sure there are BGE_NSEG_SPARE descriptors for
3520 		 * jumbo buffers' or TSO segments' defragmentation.
3521 		 */
3522 		if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) <
3523 		    (BNX_NSEG_RSVD + BNX_NSEG_SPARE)) {
3524 			ifsq_set_oactive(ifsq);
3525 			break;
3526 		}
3527 
3528 		m_head = ifsq_dequeue(ifsq);
3529 		if (m_head == NULL)
3530 			break;
3531 
3532 		/*
3533 		 * Pack the data into the transmit ring. If we
3534 		 * don't have room, set the OACTIVE flag and wait
3535 		 * for the NIC to drain the ring.
3536 		 */
3537 		if (bnx_encap(txr, &m_head, &prodidx, &nsegs)) {
3538 			ifsq_set_oactive(ifsq);
3539 			IFNET_STAT_INC(ifp, oerrors, 1);
3540 			break;
3541 		}
3542 
3543 		if (nsegs >= txr->bnx_tx_wreg) {
3544 			/* Transmit */
3545 			bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3546 			nsegs = 0;
3547 		}
3548 
3549 		ETHER_BPF_MTAP(ifp, m_head);
3550 
3551 		/*
3552 		 * Set a timeout in case the chip goes out to lunch.
3553 		 */
3554 		ifsq_watchdog_set_count(&txr->bnx_tx_watchdog, 5);
3555 	}
3556 
3557 	if (nsegs > 0) {
3558 		/* Transmit */
3559 		bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3560 	}
3561 	txr->bnx_tx_prodidx = prodidx;
3562 }
3563 
3564 static void
3565 bnx_init(void *xsc)
3566 {
3567 	struct bnx_softc *sc = xsc;
3568 	struct ifnet *ifp = &sc->arpcom.ac_if;
3569 	uint16_t *m;
3570 	uint32_t mode;
3571 	int i;
3572 	boolean_t polling;
3573 
3574 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3575 
3576 	/* Cancel pending I/O and flush buffers. */
3577 	bnx_stop(sc);
3578 
3579 	bnx_sig_pre_reset(sc, BNX_RESET_START);
3580 	bnx_reset(sc);
3581 	bnx_sig_post_reset(sc, BNX_RESET_START);
3582 
3583 	bnx_chipinit(sc);
3584 
3585 	/*
3586 	 * Init the various state machines, ring
3587 	 * control blocks and firmware.
3588 	 */
3589 	if (bnx_blockinit(sc)) {
3590 		if_printf(ifp, "initialization failure\n");
3591 		bnx_stop(sc);
3592 		return;
3593 	}
3594 
3595 	/* Specify MTU. */
3596 	CSR_WRITE_4(sc, BGE_RX_MTU, ifp->if_mtu +
3597 	    ETHER_HDR_LEN + ETHER_CRC_LEN + EVL_ENCAPLEN);
3598 
3599 	/* Load our MAC address. */
3600 	m = (uint16_t *)&sc->arpcom.ac_enaddr[0];
3601 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0]));
3602 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2]));
3603 
3604 	/* Enable or disable promiscuous mode as needed. */
3605 	bnx_setpromisc(sc);
3606 
3607 	/* Program multicast filter. */
3608 	bnx_setmulti(sc);
3609 
3610 	/* Init RX ring. */
3611 	if (bnx_init_rx_ring_std(&sc->bnx_rx_std_ring)) {
3612 		if_printf(ifp, "RX ring initialization failed\n");
3613 		bnx_stop(sc);
3614 		return;
3615 	}
3616 
3617 	/* Init jumbo RX ring. */
3618 	if (ifp->if_mtu > (ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN)) {
3619 		if (bnx_init_rx_ring_jumbo(sc)) {
3620 			if_printf(ifp, "Jumbo RX ring initialization failed\n");
3621 			bnx_stop(sc);
3622 			return;
3623 		}
3624 	}
3625 
3626 	/* Init our RX return ring index */
3627 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3628 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3629 
3630 		ret->bnx_rx_saved_considx = 0;
3631 		ret->bnx_rx_cnt = 0;
3632 	}
3633 
3634 	/* Init TX ring. */
3635 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3636 		bnx_init_tx_ring(&sc->bnx_tx_ring[i]);
3637 
3638 	/* Enable TX MAC state machine lockup fix. */
3639 	mode = CSR_READ_4(sc, BGE_TX_MODE);
3640 	mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
3641 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
3642 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
3643 		mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3644 		mode |= CSR_READ_4(sc, BGE_TX_MODE) &
3645 		    (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3646 	}
3647 	/* Turn on transmitter */
3648 	CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
3649 	DELAY(100);
3650 
3651 	/* Initialize RSS */
3652 	mode = BGE_RXMODE_ENABLE | BGE_RXMODE_IPV6_ENABLE;
3653 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
3654 		mode |= BGE_RXMODE_IPV4_FRAG_FIX;
3655 	if (BNX_RSS_ENABLED(sc)) {
3656 		bnx_init_rss(sc);
3657 		mode |= BGE_RXMODE_RSS_ENABLE |
3658 		    BGE_RXMODE_RSS_HASH_MASK_BITS |
3659 		    BGE_RXMODE_RSS_IPV4_HASH |
3660 		    BGE_RXMODE_RSS_TCP_IPV4_HASH;
3661 	}
3662 	/* Turn on receiver */
3663 	BNX_SETBIT(sc, BGE_RX_MODE, mode);
3664 	DELAY(10);
3665 
3666 	/*
3667 	 * Set the number of good frames to receive after RX MBUF
3668 	 * Low Watermark has been reached.  After the RX MAC receives
3669 	 * this number of frames, it will drop subsequent incoming
3670 	 * frames until the MBUF High Watermark is reached.
3671 	 */
3672 	if (BNX_IS_57765_FAMILY(sc))
3673 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1);
3674 	else
3675 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
3676 
3677 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI ||
3678 	    sc->bnx_intr_type == PCI_INTR_TYPE_MSIX) {
3679 		if (bootverbose) {
3680 			if_printf(ifp, "MSI_MODE: %#x\n",
3681 			    CSR_READ_4(sc, BGE_MSI_MODE));
3682 		}
3683 	}
3684 
3685 	/* Tell firmware we're alive. */
3686 	BNX_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3687 
3688 	/* Enable host interrupts if polling(4) is not enabled. */
3689 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA, 4);
3690 
3691 	polling = FALSE;
3692 #ifdef IFPOLL_ENABLE
3693 	if (ifp->if_flags & IFF_NPOLLING)
3694 		polling = TRUE;
3695 #endif
3696 	if (polling)
3697 		bnx_disable_intr(sc);
3698 	else
3699 		bnx_enable_intr(sc);
3700 	bnx_set_tick_cpuid(sc, polling);
3701 
3702 	ifp->if_flags |= IFF_RUNNING;
3703 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3704 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3705 
3706 		ifsq_clr_oactive(txr->bnx_ifsq);
3707 		ifsq_watchdog_start(&txr->bnx_tx_watchdog);
3708 	}
3709 
3710 	bnx_ifmedia_upd(ifp);
3711 
3712 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3713 	    sc->bnx_tick_cpuid);
3714 }
3715 
3716 /*
3717  * Set media options.
3718  */
3719 static int
3720 bnx_ifmedia_upd(struct ifnet *ifp)
3721 {
3722 	struct bnx_softc *sc = ifp->if_softc;
3723 
3724 	/* If this is a 1000baseX NIC, enable the TBI port. */
3725 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3726 		struct ifmedia *ifm = &sc->bnx_ifmedia;
3727 
3728 		if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3729 			return(EINVAL);
3730 
3731 		switch(IFM_SUBTYPE(ifm->ifm_media)) {
3732 		case IFM_AUTO:
3733 			break;
3734 
3735 		case IFM_1000_SX:
3736 			if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
3737 				BNX_CLRBIT(sc, BGE_MAC_MODE,
3738 				    BGE_MACMODE_HALF_DUPLEX);
3739 			} else {
3740 				BNX_SETBIT(sc, BGE_MAC_MODE,
3741 				    BGE_MACMODE_HALF_DUPLEX);
3742 			}
3743 			DELAY(40);
3744 			break;
3745 		default:
3746 			return(EINVAL);
3747 		}
3748 	} else {
3749 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3750 
3751 		sc->bnx_link_evt++;
3752 		sc->bnx_link = 0;
3753 		if (mii->mii_instance) {
3754 			struct mii_softc *miisc;
3755 
3756 			LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
3757 				mii_phy_reset(miisc);
3758 		}
3759 		mii_mediachg(mii);
3760 
3761 		/*
3762 		 * Force an interrupt so that we will call bnx_link_upd
3763 		 * if needed and clear any pending link state attention.
3764 		 * Without this we are not getting any further interrupts
3765 		 * for link state changes and thus will not UP the link and
3766 		 * not be able to send in bnx_start.  The only way to get
3767 		 * things working was to receive a packet and get an RX
3768 		 * intr.
3769 		 *
3770 		 * bnx_tick should help for fiber cards and we might not
3771 		 * need to do this here if BNX_FLAG_TBI is set but as
3772 		 * we poll for fiber anyway it should not harm.
3773 		 */
3774 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3775 	}
3776 	return(0);
3777 }
3778 
3779 /*
3780  * Report current media status.
3781  */
3782 static void
3783 bnx_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3784 {
3785 	struct bnx_softc *sc = ifp->if_softc;
3786 
3787 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3788 		return;
3789 
3790 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3791 		ifmr->ifm_status = IFM_AVALID;
3792 		ifmr->ifm_active = IFM_ETHER;
3793 		if (CSR_READ_4(sc, BGE_MAC_STS) &
3794 		    BGE_MACSTAT_TBI_PCS_SYNCHED) {
3795 			ifmr->ifm_status |= IFM_ACTIVE;
3796 		} else {
3797 			ifmr->ifm_active |= IFM_NONE;
3798 			return;
3799 		}
3800 
3801 		ifmr->ifm_active |= IFM_1000_SX;
3802 		if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX)
3803 			ifmr->ifm_active |= IFM_HDX;
3804 		else
3805 			ifmr->ifm_active |= IFM_FDX;
3806 	} else {
3807 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3808 
3809 		mii_pollstat(mii);
3810 		ifmr->ifm_active = mii->mii_media_active;
3811 		ifmr->ifm_status = mii->mii_media_status;
3812 	}
3813 }
3814 
3815 static int
3816 bnx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3817 {
3818 	struct bnx_softc *sc = ifp->if_softc;
3819 	struct ifreq *ifr = (struct ifreq *)data;
3820 	int mask, error = 0;
3821 
3822 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3823 
3824 	switch (command) {
3825 	case SIOCSIFMTU:
3826 		if ((!BNX_IS_JUMBO_CAPABLE(sc) && ifr->ifr_mtu > ETHERMTU) ||
3827 		    (BNX_IS_JUMBO_CAPABLE(sc) &&
3828 		     ifr->ifr_mtu > BNX_JUMBO_MTU)) {
3829 			error = EINVAL;
3830 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
3831 			ifp->if_mtu = ifr->ifr_mtu;
3832 			if (ifp->if_flags & IFF_RUNNING)
3833 				bnx_init(sc);
3834 		}
3835 		break;
3836 	case SIOCSIFFLAGS:
3837 		if (ifp->if_flags & IFF_UP) {
3838 			if (ifp->if_flags & IFF_RUNNING) {
3839 				mask = ifp->if_flags ^ sc->bnx_if_flags;
3840 
3841 				/*
3842 				 * If only the state of the PROMISC flag
3843 				 * changed, then just use the 'set promisc
3844 				 * mode' command instead of reinitializing
3845 				 * the entire NIC. Doing a full re-init
3846 				 * means reloading the firmware and waiting
3847 				 * for it to start up, which may take a
3848 				 * second or two.  Similarly for ALLMULTI.
3849 				 */
3850 				if (mask & IFF_PROMISC)
3851 					bnx_setpromisc(sc);
3852 				if (mask & IFF_ALLMULTI)
3853 					bnx_setmulti(sc);
3854 			} else {
3855 				bnx_init(sc);
3856 			}
3857 		} else if (ifp->if_flags & IFF_RUNNING) {
3858 			bnx_stop(sc);
3859 		}
3860 		sc->bnx_if_flags = ifp->if_flags;
3861 		break;
3862 	case SIOCADDMULTI:
3863 	case SIOCDELMULTI:
3864 		if (ifp->if_flags & IFF_RUNNING)
3865 			bnx_setmulti(sc);
3866 		break;
3867 	case SIOCSIFMEDIA:
3868 	case SIOCGIFMEDIA:
3869 		if (sc->bnx_flags & BNX_FLAG_TBI) {
3870 			error = ifmedia_ioctl(ifp, ifr,
3871 			    &sc->bnx_ifmedia, command);
3872 		} else {
3873 			struct mii_data *mii;
3874 
3875 			mii = device_get_softc(sc->bnx_miibus);
3876 			error = ifmedia_ioctl(ifp, ifr,
3877 					      &mii->mii_media, command);
3878 		}
3879 		break;
3880         case SIOCSIFCAP:
3881 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3882 		if (mask & IFCAP_HWCSUM) {
3883 			ifp->if_capenable ^= (mask & IFCAP_HWCSUM);
3884 			if (ifp->if_capenable & IFCAP_TXCSUM)
3885 				ifp->if_hwassist |= BNX_CSUM_FEATURES;
3886 			else
3887 				ifp->if_hwassist &= ~BNX_CSUM_FEATURES;
3888 		}
3889 		if (mask & IFCAP_TSO) {
3890 			ifp->if_capenable ^= (mask & IFCAP_TSO);
3891 			if (ifp->if_capenable & IFCAP_TSO)
3892 				ifp->if_hwassist |= CSUM_TSO;
3893 			else
3894 				ifp->if_hwassist &= ~CSUM_TSO;
3895 		}
3896 		if (mask & IFCAP_RSS)
3897 			ifp->if_capenable ^= IFCAP_RSS;
3898 		break;
3899 	default:
3900 		error = ether_ioctl(ifp, command, data);
3901 		break;
3902 	}
3903 	return error;
3904 }
3905 
3906 static void
3907 bnx_watchdog(struct ifaltq_subque *ifsq)
3908 {
3909 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3910 	struct bnx_softc *sc = ifp->if_softc;
3911 	int i;
3912 
3913 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3914 
3915 	if_printf(ifp, "watchdog timeout -- resetting\n");
3916 
3917 	bnx_init(sc);
3918 
3919 	IFNET_STAT_INC(ifp, oerrors, 1);
3920 
3921 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3922 		ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
3923 }
3924 
3925 /*
3926  * Stop the adapter and free any mbufs allocated to the
3927  * RX and TX lists.
3928  */
3929 static void
3930 bnx_stop(struct bnx_softc *sc)
3931 {
3932 	struct ifnet *ifp = &sc->arpcom.ac_if;
3933 	int i;
3934 
3935 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3936 
3937 	callout_stop(&sc->bnx_tick_timer);
3938 
3939 	/* Disable host interrupts. */
3940 	bnx_disable_intr(sc);
3941 
3942 	/*
3943 	 * Tell firmware we're shutting down.
3944 	 */
3945 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
3946 
3947 	/*
3948 	 * Disable all of the receiver blocks
3949 	 */
3950 	bnx_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
3951 	bnx_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
3952 	bnx_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
3953 	bnx_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
3954 	bnx_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
3955 	bnx_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
3956 
3957 	/*
3958 	 * Disable all of the transmit blocks
3959 	 */
3960 	bnx_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
3961 	bnx_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
3962 	bnx_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
3963 	bnx_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
3964 	bnx_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
3965 	bnx_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
3966 
3967 	/*
3968 	 * Shut down all of the memory managers and related
3969 	 * state machines.
3970 	 */
3971 	bnx_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
3972 	bnx_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
3973 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
3974 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
3975 
3976 	bnx_reset(sc);
3977 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
3978 
3979 	/*
3980 	 * Tell firmware we're shutting down.
3981 	 */
3982 	BNX_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3983 
3984 	/* Free the RX lists. */
3985 	bnx_free_rx_ring_std(&sc->bnx_rx_std_ring);
3986 
3987 	/* Free jumbo RX list. */
3988 	if (BNX_IS_JUMBO_CAPABLE(sc))
3989 		bnx_free_rx_ring_jumbo(sc);
3990 
3991 	/* Free TX buffers. */
3992 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3993 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3994 
3995 		txr->bnx_saved_status_tag = 0;
3996 		bnx_free_tx_ring(txr);
3997 	}
3998 
3999 	/* Clear saved status tag */
4000 	for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4001 		sc->bnx_rx_ret_ring[i].bnx_saved_status_tag = 0;
4002 
4003 	sc->bnx_link = 0;
4004 	sc->bnx_coal_chg = 0;
4005 
4006 	ifp->if_flags &= ~IFF_RUNNING;
4007 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4008 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4009 
4010 		ifsq_clr_oactive(txr->bnx_ifsq);
4011 		ifsq_watchdog_stop(&txr->bnx_tx_watchdog);
4012 	}
4013 }
4014 
4015 /*
4016  * Stop all chip I/O so that the kernel's probe routines don't
4017  * get confused by errant DMAs when rebooting.
4018  */
4019 static void
4020 bnx_shutdown(device_t dev)
4021 {
4022 	struct bnx_softc *sc = device_get_softc(dev);
4023 	struct ifnet *ifp = &sc->arpcom.ac_if;
4024 
4025 	ifnet_serialize_all(ifp);
4026 	bnx_stop(sc);
4027 	ifnet_deserialize_all(ifp);
4028 }
4029 
4030 static int
4031 bnx_suspend(device_t dev)
4032 {
4033 	struct bnx_softc *sc = device_get_softc(dev);
4034 	struct ifnet *ifp = &sc->arpcom.ac_if;
4035 
4036 	ifnet_serialize_all(ifp);
4037 	bnx_stop(sc);
4038 	ifnet_deserialize_all(ifp);
4039 
4040 	return 0;
4041 }
4042 
4043 static int
4044 bnx_resume(device_t dev)
4045 {
4046 	struct bnx_softc *sc = device_get_softc(dev);
4047 	struct ifnet *ifp = &sc->arpcom.ac_if;
4048 
4049 	ifnet_serialize_all(ifp);
4050 
4051 	if (ifp->if_flags & IFF_UP) {
4052 		int i;
4053 
4054 		bnx_init(sc);
4055 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4056 			ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
4057 	}
4058 
4059 	ifnet_deserialize_all(ifp);
4060 
4061 	return 0;
4062 }
4063 
4064 static void
4065 bnx_setpromisc(struct bnx_softc *sc)
4066 {
4067 	struct ifnet *ifp = &sc->arpcom.ac_if;
4068 
4069 	if (ifp->if_flags & IFF_PROMISC)
4070 		BNX_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4071 	else
4072 		BNX_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4073 }
4074 
4075 static void
4076 bnx_dma_free(struct bnx_softc *sc)
4077 {
4078 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4079 	int i;
4080 
4081 	/* Destroy RX return rings */
4082 	if (sc->bnx_rx_ret_ring != NULL) {
4083 		for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4084 			bnx_destroy_rx_ret_ring(&sc->bnx_rx_ret_ring[i]);
4085 		kfree(sc->bnx_rx_ret_ring, M_DEVBUF);
4086 	}
4087 
4088 	/* Destroy RX mbuf DMA stuffs. */
4089 	if (std->bnx_rx_mtag != NULL) {
4090 		for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
4091 			KKASSERT(std->bnx_rx_std_buf[i].bnx_rx_mbuf == NULL);
4092 			bus_dmamap_destroy(std->bnx_rx_mtag,
4093 			    std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4094 		}
4095 		bus_dma_tag_destroy(std->bnx_rx_mtag);
4096 	}
4097 
4098 	/* Destroy standard RX ring */
4099 	bnx_dma_block_free(std->bnx_rx_std_ring_tag,
4100 	    std->bnx_rx_std_ring_map, std->bnx_rx_std_ring);
4101 
4102 	/* Destroy TX rings */
4103 	if (sc->bnx_tx_ring != NULL) {
4104 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4105 			bnx_destroy_tx_ring(&sc->bnx_tx_ring[i]);
4106 		kfree(sc->bnx_tx_ring, M_DEVBUF);
4107 	}
4108 
4109 	if (BNX_IS_JUMBO_CAPABLE(sc))
4110 		bnx_free_jumbo_mem(sc);
4111 
4112 	/* Destroy status blocks */
4113 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4114 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4115 
4116 		bnx_dma_block_free(intr->bnx_status_tag,
4117 		    intr->bnx_status_map, intr->bnx_status_block);
4118 	}
4119 
4120 	/* Destroy the parent tag */
4121 	if (sc->bnx_cdata.bnx_parent_tag != NULL)
4122 		bus_dma_tag_destroy(sc->bnx_cdata.bnx_parent_tag);
4123 }
4124 
4125 static int
4126 bnx_dma_alloc(device_t dev)
4127 {
4128 	struct bnx_softc *sc = device_get_softc(dev);
4129 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4130 	int i, error, mbx;
4131 
4132 	/*
4133 	 * Allocate the parent bus DMA tag appropriate for PCI.
4134 	 *
4135 	 * All of the NetExtreme/NetLink controllers have 4GB boundary
4136 	 * DMA bug.
4137 	 * Whenever an address crosses a multiple of the 4GB boundary
4138 	 * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition
4139 	 * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA
4140 	 * state machine will lockup and cause the device to hang.
4141 	 */
4142 	error = bus_dma_tag_create(NULL, 1, BGE_DMA_BOUNDARY_4G,
4143 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
4144 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
4145 	    0, &sc->bnx_cdata.bnx_parent_tag);
4146 	if (error) {
4147 		device_printf(dev, "could not create parent DMA tag\n");
4148 		return error;
4149 	}
4150 
4151 	/*
4152 	 * Create DMA stuffs for status blocks.
4153 	 */
4154 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4155 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4156 
4157 		error = bnx_dma_block_alloc(sc,
4158 		    __VM_CACHELINE_ALIGN(BGE_STATUS_BLK_SZ),
4159 		    &intr->bnx_status_tag, &intr->bnx_status_map,
4160 		    (void *)&intr->bnx_status_block,
4161 		    &intr->bnx_status_block_paddr);
4162 		if (error) {
4163 			device_printf(dev,
4164 			    "could not create %dth status block\n", i);
4165 			return error;
4166 		}
4167 	}
4168 	sc->bnx_hw_status = &sc->bnx_intr_data[0].bnx_status_block->bge_status;
4169 	if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG) {
4170 		sc->bnx_hw_status_tag =
4171 		    &sc->bnx_intr_data[0].bnx_status_block->bge_status_tag;
4172 	}
4173 
4174 	/*
4175 	 * Create DMA tag and maps for RX mbufs.
4176 	 */
4177 	std->bnx_sc = sc;
4178 	lwkt_serialize_init(&std->bnx_rx_std_serialize);
4179 	error = bus_dma_tag_create(sc->bnx_cdata.bnx_parent_tag, 1, 0,
4180 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4181 	    NULL, NULL, MCLBYTES, 1, MCLBYTES,
4182 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK, &std->bnx_rx_mtag);
4183 	if (error) {
4184 		device_printf(dev, "could not create RX mbuf DMA tag\n");
4185 		return error;
4186 	}
4187 
4188 	for (i = 0; i < BGE_STD_RX_RING_CNT; ++i) {
4189 		error = bus_dmamap_create(std->bnx_rx_mtag, BUS_DMA_WAITOK,
4190 		    &std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4191 		if (error) {
4192 			int j;
4193 
4194 			for (j = 0; j < i; ++j) {
4195 				bus_dmamap_destroy(std->bnx_rx_mtag,
4196 				    std->bnx_rx_std_buf[j].bnx_rx_dmamap);
4197 			}
4198 			bus_dma_tag_destroy(std->bnx_rx_mtag);
4199 			std->bnx_rx_mtag = NULL;
4200 
4201 			device_printf(dev,
4202 			    "could not create %dth RX mbuf DMA map\n", i);
4203 			return error;
4204 		}
4205 	}
4206 
4207 	/*
4208 	 * Create DMA stuffs for standard RX ring.
4209 	 */
4210 	error = bnx_dma_block_alloc(sc, BGE_STD_RX_RING_SZ,
4211 	    &std->bnx_rx_std_ring_tag,
4212 	    &std->bnx_rx_std_ring_map,
4213 	    (void *)&std->bnx_rx_std_ring,
4214 	    &std->bnx_rx_std_ring_paddr);
4215 	if (error) {
4216 		device_printf(dev, "could not create std RX ring\n");
4217 		return error;
4218 	}
4219 
4220 	/*
4221 	 * Create RX return rings
4222 	 */
4223 	mbx = BGE_MBX_RX_CONS0_LO;
4224 	sc->bnx_rx_ret_ring =
4225 		kmalloc(sizeof(struct bnx_rx_ret_ring) * sc->bnx_rx_retcnt,
4226 			M_DEVBUF,
4227 			M_WAITOK | M_ZERO | M_CACHEALIGN);
4228 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4229 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
4230 		struct bnx_intr_data *intr;
4231 
4232 		ret->bnx_sc = sc;
4233 		ret->bnx_std = std;
4234 		ret->bnx_rx_mbx = mbx;
4235 		ret->bnx_rx_cntmax = (BGE_STD_RX_RING_CNT / 4) /
4236 		    sc->bnx_rx_retcnt;
4237 		ret->bnx_rx_mask = 1 << i;
4238 
4239 		if (!BNX_RSS_ENABLED(sc)) {
4240 			intr = &sc->bnx_intr_data[0];
4241 		} else {
4242 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4243 			intr = &sc->bnx_intr_data[i + 1];
4244 		}
4245 
4246 		if (i == 0) {
4247 			ret->bnx_rx_considx =
4248 			    &intr->bnx_status_block->bge_idx[0].bge_rx_prod_idx;
4249 		} else if (i == 1) {
4250 			ret->bnx_rx_considx =
4251 			    &intr->bnx_status_block->bge_rx_jumbo_cons_idx;
4252 		} else if (i == 2) {
4253 			ret->bnx_rx_considx =
4254 			    &intr->bnx_status_block->bge_rsvd1;
4255 		} else if (i == 3) {
4256 			ret->bnx_rx_considx =
4257 			    &intr->bnx_status_block->bge_rx_mini_cons_idx;
4258 		} else {
4259 			panic("unknown RX return ring %d\n", i);
4260 		}
4261 		ret->bnx_hw_status_tag =
4262 		    &intr->bnx_status_block->bge_status_tag;
4263 
4264 		error = bnx_create_rx_ret_ring(ret);
4265 		if (error) {
4266 			device_printf(dev,
4267 			    "could not create %dth RX ret ring\n", i);
4268 			return error;
4269 		}
4270 		mbx += 8;
4271 	}
4272 
4273 	/*
4274 	 * Create TX rings
4275 	 */
4276 	sc->bnx_tx_ring =
4277 		kmalloc(sizeof(struct bnx_tx_ring) * sc->bnx_tx_ringcnt,
4278 			M_DEVBUF,
4279 			M_WAITOK | M_ZERO | M_CACHEALIGN);
4280 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4281 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4282 		struct bnx_intr_data *intr;
4283 
4284 		txr->bnx_sc = sc;
4285 		txr->bnx_tx_mbx = bnx_tx_mailbox[i];
4286 
4287 		if (sc->bnx_tx_ringcnt == 1) {
4288 			intr = &sc->bnx_intr_data[0];
4289 		} else {
4290 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4291 			intr = &sc->bnx_intr_data[i + 1];
4292 		}
4293 
4294 		if ((sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) == 0) {
4295 			txr->bnx_hw_status_tag =
4296 			    &intr->bnx_status_block->bge_status_tag;
4297 		}
4298 		txr->bnx_tx_considx =
4299 		    &intr->bnx_status_block->bge_idx[0].bge_tx_cons_idx;
4300 
4301 		error = bnx_create_tx_ring(txr);
4302 		if (error) {
4303 			device_printf(dev,
4304 			    "could not create %dth TX ring\n", i);
4305 			return error;
4306 		}
4307 	}
4308 
4309 	/*
4310 	 * Create jumbo buffer pool.
4311 	 */
4312 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
4313 		error = bnx_alloc_jumbo_mem(sc);
4314 		if (error) {
4315 			device_printf(dev,
4316 			    "could not create jumbo buffer pool\n");
4317 			return error;
4318 		}
4319 	}
4320 
4321 	return 0;
4322 }
4323 
4324 static int
4325 bnx_dma_block_alloc(struct bnx_softc *sc, bus_size_t size, bus_dma_tag_t *tag,
4326 		    bus_dmamap_t *map, void **addr, bus_addr_t *paddr)
4327 {
4328 	bus_dmamem_t dmem;
4329 	int error;
4330 
4331 	error = bus_dmamem_coherent(sc->bnx_cdata.bnx_parent_tag, PAGE_SIZE, 0,
4332 				    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4333 				    size, BUS_DMA_WAITOK | BUS_DMA_ZERO, &dmem);
4334 	if (error)
4335 		return error;
4336 
4337 	*tag = dmem.dmem_tag;
4338 	*map = dmem.dmem_map;
4339 	*addr = dmem.dmem_addr;
4340 	*paddr = dmem.dmem_busaddr;
4341 
4342 	return 0;
4343 }
4344 
4345 static void
4346 bnx_dma_block_free(bus_dma_tag_t tag, bus_dmamap_t map, void *addr)
4347 {
4348 	if (tag != NULL) {
4349 		bus_dmamap_unload(tag, map);
4350 		bus_dmamem_free(tag, addr, map);
4351 		bus_dma_tag_destroy(tag);
4352 	}
4353 }
4354 
4355 static void
4356 bnx_tbi_link_upd(struct bnx_softc *sc, uint32_t status)
4357 {
4358 	struct ifnet *ifp = &sc->arpcom.ac_if;
4359 
4360 #define PCS_ENCODE_ERR	(BGE_MACSTAT_PORT_DECODE_ERROR|BGE_MACSTAT_MI_COMPLETE)
4361 
4362 	/*
4363 	 * Sometimes PCS encoding errors are detected in
4364 	 * TBI mode (on fiber NICs), and for some reason
4365 	 * the chip will signal them as link changes.
4366 	 * If we get a link change event, but the 'PCS
4367 	 * encoding error' bit in the MAC status register
4368 	 * is set, don't bother doing a link check.
4369 	 * This avoids spurious "gigabit link up" messages
4370 	 * that sometimes appear on fiber NICs during
4371 	 * periods of heavy traffic.
4372 	 */
4373 	if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) {
4374 		if (!sc->bnx_link) {
4375 			sc->bnx_link++;
4376 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5704) {
4377 				BNX_CLRBIT(sc, BGE_MAC_MODE,
4378 				    BGE_MACMODE_TBI_SEND_CFGS);
4379 				DELAY(40);
4380 			}
4381 			CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF);
4382 
4383 			if (bootverbose)
4384 				if_printf(ifp, "link UP\n");
4385 
4386 			ifp->if_link_state = LINK_STATE_UP;
4387 			if_link_state_change(ifp);
4388 		}
4389 	} else if ((status & PCS_ENCODE_ERR) != PCS_ENCODE_ERR) {
4390 		if (sc->bnx_link) {
4391 			sc->bnx_link = 0;
4392 
4393 			if (bootverbose)
4394 				if_printf(ifp, "link DOWN\n");
4395 
4396 			ifp->if_link_state = LINK_STATE_DOWN;
4397 			if_link_state_change(ifp);
4398 		}
4399 	}
4400 
4401 #undef PCS_ENCODE_ERR
4402 
4403 	/* Clear the attention. */
4404 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4405 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4406 	    BGE_MACSTAT_LINK_CHANGED);
4407 }
4408 
4409 static void
4410 bnx_copper_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4411 {
4412 	struct ifnet *ifp = &sc->arpcom.ac_if;
4413 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4414 
4415 	mii_pollstat(mii);
4416 	bnx_miibus_statchg(sc->bnx_dev);
4417 
4418 	if (bootverbose) {
4419 		if (sc->bnx_link)
4420 			if_printf(ifp, "link UP\n");
4421 		else
4422 			if_printf(ifp, "link DOWN\n");
4423 	}
4424 
4425 	/* Clear the attention. */
4426 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4427 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4428 	    BGE_MACSTAT_LINK_CHANGED);
4429 }
4430 
4431 static void
4432 bnx_autopoll_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4433 {
4434 	struct ifnet *ifp = &sc->arpcom.ac_if;
4435 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4436 
4437 	mii_pollstat(mii);
4438 
4439 	if (!sc->bnx_link &&
4440 	    (mii->mii_media_status & IFM_ACTIVE) &&
4441 	    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
4442 		sc->bnx_link++;
4443 		if (bootverbose)
4444 			if_printf(ifp, "link UP\n");
4445 	} else if (sc->bnx_link &&
4446 	    (!(mii->mii_media_status & IFM_ACTIVE) ||
4447 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) {
4448 		sc->bnx_link = 0;
4449 		if (bootverbose)
4450 			if_printf(ifp, "link DOWN\n");
4451 	}
4452 
4453 	/* Clear the attention. */
4454 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4455 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4456 	    BGE_MACSTAT_LINK_CHANGED);
4457 }
4458 
4459 static int
4460 bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS)
4461 {
4462 	struct bnx_softc *sc = arg1;
4463 
4464 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4465 	    &sc->bnx_rx_coal_ticks,
4466 	    BNX_RX_COAL_TICKS_MIN, BNX_RX_COAL_TICKS_MAX,
4467 	    BNX_RX_COAL_TICKS_CHG);
4468 }
4469 
4470 static int
4471 bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS)
4472 {
4473 	struct bnx_softc *sc = arg1;
4474 
4475 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4476 	    &sc->bnx_tx_coal_ticks,
4477 	    BNX_TX_COAL_TICKS_MIN, BNX_TX_COAL_TICKS_MAX,
4478 	    BNX_TX_COAL_TICKS_CHG);
4479 }
4480 
4481 static int
4482 bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS)
4483 {
4484 	struct bnx_softc *sc = arg1;
4485 
4486 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4487 	    &sc->bnx_rx_coal_bds,
4488 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4489 	    BNX_RX_COAL_BDS_CHG);
4490 }
4491 
4492 static int
4493 bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4494 {
4495 	struct bnx_softc *sc = arg1;
4496 
4497 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4498 	    &sc->bnx_rx_coal_bds_poll,
4499 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4500 	    BNX_RX_COAL_BDS_CHG);
4501 }
4502 
4503 static int
4504 bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS)
4505 {
4506 	struct bnx_softc *sc = arg1;
4507 
4508 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4509 	    &sc->bnx_tx_coal_bds,
4510 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4511 	    BNX_TX_COAL_BDS_CHG);
4512 }
4513 
4514 static int
4515 bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4516 {
4517 	struct bnx_softc *sc = arg1;
4518 
4519 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4520 	    &sc->bnx_tx_coal_bds_poll,
4521 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4522 	    BNX_TX_COAL_BDS_CHG);
4523 }
4524 
4525 static int
4526 bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4527 {
4528 	struct bnx_softc *sc = arg1;
4529 
4530 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4531 	    &sc->bnx_rx_coal_bds_int,
4532 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4533 	    BNX_RX_COAL_BDS_INT_CHG);
4534 }
4535 
4536 static int
4537 bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4538 {
4539 	struct bnx_softc *sc = arg1;
4540 
4541 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4542 	    &sc->bnx_tx_coal_bds_int,
4543 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4544 	    BNX_TX_COAL_BDS_INT_CHG);
4545 }
4546 
4547 static int
4548 bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
4549     int coal_min, int coal_max, uint32_t coal_chg_mask)
4550 {
4551 	struct bnx_softc *sc = arg1;
4552 	struct ifnet *ifp = &sc->arpcom.ac_if;
4553 	int error = 0, v;
4554 
4555 	ifnet_serialize_all(ifp);
4556 
4557 	v = *coal;
4558 	error = sysctl_handle_int(oidp, &v, 0, req);
4559 	if (!error && req->newptr != NULL) {
4560 		if (v < coal_min || v > coal_max) {
4561 			error = EINVAL;
4562 		} else {
4563 			*coal = v;
4564 			sc->bnx_coal_chg |= coal_chg_mask;
4565 
4566 			/* Commit changes */
4567 			bnx_coal_change(sc);
4568 		}
4569 	}
4570 
4571 	ifnet_deserialize_all(ifp);
4572 	return error;
4573 }
4574 
4575 static void
4576 bnx_coal_change(struct bnx_softc *sc)
4577 {
4578 	struct ifnet *ifp = &sc->arpcom.ac_if;
4579 	int i;
4580 
4581 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4582 
4583 	if (sc->bnx_coal_chg & BNX_RX_COAL_TICKS_CHG) {
4584 		if (sc->bnx_rx_retcnt == 1) {
4585 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS,
4586 			    sc->bnx_rx_coal_ticks);
4587 			i = 0;
4588 		} else {
4589 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, 0);
4590 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4591 				CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4592 				    (i * BGE_VEC_COALSET_SIZE),
4593 				    sc->bnx_rx_coal_ticks);
4594 			}
4595 		}
4596 		for (; i < BNX_INTR_MAX - 1; ++i) {
4597 			CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4598 			    (i * BGE_VEC_COALSET_SIZE), 0);
4599 		}
4600 		if (bootverbose) {
4601 			if_printf(ifp, "rx_coal_ticks -> %u\n",
4602 			    sc->bnx_rx_coal_ticks);
4603 		}
4604 	}
4605 
4606 	if (sc->bnx_coal_chg & BNX_TX_COAL_TICKS_CHG) {
4607 		if (sc->bnx_tx_ringcnt == 1) {
4608 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS,
4609 			    sc->bnx_tx_coal_ticks);
4610 			i = 0;
4611 		} else {
4612 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, 0);
4613 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4614 				CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4615 				    (i * BGE_VEC_COALSET_SIZE),
4616 				    sc->bnx_tx_coal_ticks);
4617 			}
4618 		}
4619 		for (; i < BNX_INTR_MAX - 1; ++i) {
4620 			CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4621 			    (i * BGE_VEC_COALSET_SIZE), 0);
4622 		}
4623 		if (bootverbose) {
4624 			if_printf(ifp, "tx_coal_ticks -> %u\n",
4625 			    sc->bnx_tx_coal_ticks);
4626 		}
4627 	}
4628 
4629 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_CHG) {
4630 		uint32_t rx_coal_bds;
4631 
4632 		if (ifp->if_flags & IFF_NPOLLING)
4633 			rx_coal_bds = sc->bnx_rx_coal_bds_poll;
4634 		else
4635 			rx_coal_bds = sc->bnx_rx_coal_bds;
4636 
4637 		if (sc->bnx_rx_retcnt == 1) {
4638 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, rx_coal_bds);
4639 			i = 0;
4640 		} else {
4641 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, 0);
4642 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4643 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4644 				    (i * BGE_VEC_COALSET_SIZE), rx_coal_bds);
4645 			}
4646 		}
4647 		for (; i < BNX_INTR_MAX - 1; ++i) {
4648 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4649 			    (i * BGE_VEC_COALSET_SIZE), 0);
4650 		}
4651 		if (bootverbose) {
4652 			if_printf(ifp, "%srx_coal_bds -> %u\n",
4653 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4654 			    rx_coal_bds);
4655 		}
4656 	}
4657 
4658 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_CHG) {
4659 		uint32_t tx_coal_bds;
4660 
4661 		if (ifp->if_flags & IFF_NPOLLING)
4662 			tx_coal_bds = sc->bnx_tx_coal_bds_poll;
4663 		else
4664 			tx_coal_bds = sc->bnx_tx_coal_bds;
4665 
4666 		if (sc->bnx_tx_ringcnt == 1) {
4667 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, tx_coal_bds);
4668 			i = 0;
4669 		} else {
4670 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, 0);
4671 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4672 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4673 				    (i * BGE_VEC_COALSET_SIZE), tx_coal_bds);
4674 			}
4675 		}
4676 		for (; i < BNX_INTR_MAX - 1; ++i) {
4677 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4678 			    (i * BGE_VEC_COALSET_SIZE), 0);
4679 		}
4680 		if (bootverbose) {
4681 			if_printf(ifp, "%stx_coal_bds -> %u\n",
4682 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4683 			    tx_coal_bds);
4684 		}
4685 	}
4686 
4687 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_INT_CHG) {
4688 		if (sc->bnx_rx_retcnt == 1) {
4689 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT,
4690 			    sc->bnx_rx_coal_bds_int);
4691 			i = 0;
4692 		} else {
4693 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 0);
4694 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4695 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4696 				    (i * BGE_VEC_COALSET_SIZE),
4697 				    sc->bnx_rx_coal_bds_int);
4698 			}
4699 		}
4700 		for (; i < BNX_INTR_MAX - 1; ++i) {
4701 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4702 			    (i * BGE_VEC_COALSET_SIZE), 0);
4703 		}
4704 		if (bootverbose) {
4705 			if_printf(ifp, "rx_coal_bds_int -> %u\n",
4706 			    sc->bnx_rx_coal_bds_int);
4707 		}
4708 	}
4709 
4710 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_INT_CHG) {
4711 		if (sc->bnx_tx_ringcnt == 1) {
4712 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT,
4713 			    sc->bnx_tx_coal_bds_int);
4714 			i = 0;
4715 		} else {
4716 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 0);
4717 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4718 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4719 				    (i * BGE_VEC_COALSET_SIZE),
4720 				    sc->bnx_tx_coal_bds_int);
4721 			}
4722 		}
4723 		for (; i < BNX_INTR_MAX - 1; ++i) {
4724 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4725 			    (i * BGE_VEC_COALSET_SIZE), 0);
4726 		}
4727 		if (bootverbose) {
4728 			if_printf(ifp, "tx_coal_bds_int -> %u\n",
4729 			    sc->bnx_tx_coal_bds_int);
4730 		}
4731 	}
4732 
4733 	sc->bnx_coal_chg = 0;
4734 }
4735 
4736 static void
4737 bnx_check_intr_rxtx(void *xintr)
4738 {
4739 	struct bnx_intr_data *intr = xintr;
4740 	struct bnx_rx_ret_ring *ret;
4741 	struct bnx_tx_ring *txr;
4742 	struct ifnet *ifp;
4743 
4744 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4745 
4746 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4747 
4748 	ifp = &intr->bnx_sc->arpcom.ac_if;
4749 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4750 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4751 		return;
4752 	}
4753 
4754 	txr = intr->bnx_txr;
4755 	ret = intr->bnx_ret;
4756 
4757 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx ||
4758 	    *txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4759 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx &&
4760 		    intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4761 			if (!intr->bnx_intr_maylose) {
4762 				intr->bnx_intr_maylose = TRUE;
4763 				goto done;
4764 			}
4765 			if (bootverbose)
4766 				if_printf(ifp, "lost interrupt\n");
4767 			intr->bnx_intr_func(intr->bnx_intr_arg);
4768 		}
4769 	}
4770 	intr->bnx_intr_maylose = FALSE;
4771 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4772 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4773 
4774 done:
4775 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4776 	    intr->bnx_intr_check, intr);
4777 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4778 }
4779 
4780 static void
4781 bnx_check_intr_tx(void *xintr)
4782 {
4783 	struct bnx_intr_data *intr = xintr;
4784 	struct bnx_tx_ring *txr;
4785 	struct ifnet *ifp;
4786 
4787 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4788 
4789 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4790 
4791 	ifp = &intr->bnx_sc->arpcom.ac_if;
4792 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4793 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4794 		return;
4795 	}
4796 
4797 	txr = intr->bnx_txr;
4798 
4799 	if (*txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4800 		if (intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4801 			if (!intr->bnx_intr_maylose) {
4802 				intr->bnx_intr_maylose = TRUE;
4803 				goto done;
4804 			}
4805 			if (bootverbose)
4806 				if_printf(ifp, "lost interrupt\n");
4807 			intr->bnx_intr_func(intr->bnx_intr_arg);
4808 		}
4809 	}
4810 	intr->bnx_intr_maylose = FALSE;
4811 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4812 
4813 done:
4814 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4815 	    intr->bnx_intr_check, intr);
4816 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4817 }
4818 
4819 static void
4820 bnx_check_intr_rx(void *xintr)
4821 {
4822 	struct bnx_intr_data *intr = xintr;
4823 	struct bnx_rx_ret_ring *ret;
4824 	struct ifnet *ifp;
4825 
4826 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4827 
4828 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4829 
4830 	ifp = &intr->bnx_sc->arpcom.ac_if;
4831 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4832 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4833 		return;
4834 	}
4835 
4836 	ret = intr->bnx_ret;
4837 
4838 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx) {
4839 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx) {
4840 			if (!intr->bnx_intr_maylose) {
4841 				intr->bnx_intr_maylose = TRUE;
4842 				goto done;
4843 			}
4844 			if (bootverbose)
4845 				if_printf(ifp, "lost interrupt\n");
4846 			intr->bnx_intr_func(intr->bnx_intr_arg);
4847 		}
4848 	}
4849 	intr->bnx_intr_maylose = FALSE;
4850 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4851 
4852 done:
4853 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4854 	    intr->bnx_intr_check, intr);
4855 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4856 }
4857 
4858 static void
4859 bnx_enable_intr(struct bnx_softc *sc)
4860 {
4861 	struct ifnet *ifp = &sc->arpcom.ac_if;
4862 	int i;
4863 
4864 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4865 		lwkt_serialize_handler_enable(
4866 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4867 	}
4868 
4869 	/*
4870 	 * Enable interrupt.
4871 	 */
4872 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4873 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4874 
4875 		bnx_writembx(sc, intr->bnx_intr_mbx,
4876 		    (*intr->bnx_saved_status_tag) << 24);
4877 		/* XXX Linux driver */
4878 		bnx_writembx(sc, intr->bnx_intr_mbx,
4879 		    (*intr->bnx_saved_status_tag) << 24);
4880 	}
4881 
4882 	/*
4883 	 * Unmask the interrupt when we stop polling.
4884 	 */
4885 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4886 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4887 
4888 	/*
4889 	 * Trigger another interrupt, since above writing
4890 	 * to interrupt mailbox0 may acknowledge pending
4891 	 * interrupt.
4892 	 */
4893 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
4894 
4895 	if (sc->bnx_flags & BNX_FLAG_STATUSTAG_BUG) {
4896 		if (bootverbose)
4897 			if_printf(ifp, "status tag bug workaround\n");
4898 
4899 		for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4900 			struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4901 
4902 			if (intr->bnx_intr_check == NULL)
4903 				continue;
4904 			intr->bnx_intr_maylose = FALSE;
4905 			intr->bnx_rx_check_considx = 0;
4906 			intr->bnx_tx_check_considx = 0;
4907 			callout_reset_bycpu(&intr->bnx_intr_timer,
4908 			    BNX_INTR_CKINTVL, intr->bnx_intr_check, intr,
4909 			    intr->bnx_intr_cpuid);
4910 		}
4911 	}
4912 }
4913 
4914 static void
4915 bnx_disable_intr(struct bnx_softc *sc)
4916 {
4917 	int i;
4918 
4919 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4920 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4921 
4922 		callout_stop(&intr->bnx_intr_timer);
4923 		intr->bnx_intr_maylose = FALSE;
4924 		intr->bnx_rx_check_considx = 0;
4925 		intr->bnx_tx_check_considx = 0;
4926 	}
4927 
4928 	/*
4929 	 * Mask the interrupt when we start polling.
4930 	 */
4931 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4932 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4933 
4934 	/*
4935 	 * Acknowledge possible asserted interrupt.
4936 	 */
4937 	for (i = 0; i < BNX_INTR_MAX; ++i)
4938 		bnx_writembx(sc, sc->bnx_intr_data[i].bnx_intr_mbx, 1);
4939 
4940 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4941 		lwkt_serialize_handler_disable(
4942 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4943 	}
4944 }
4945 
4946 static int
4947 bnx_get_eaddr_mem(struct bnx_softc *sc, uint8_t ether_addr[])
4948 {
4949 	uint32_t mac_addr;
4950 	int ret = 1;
4951 
4952 	mac_addr = bnx_readmem_ind(sc, 0x0c14);
4953 	if ((mac_addr >> 16) == 0x484b) {
4954 		ether_addr[0] = (uint8_t)(mac_addr >> 8);
4955 		ether_addr[1] = (uint8_t)mac_addr;
4956 		mac_addr = bnx_readmem_ind(sc, 0x0c18);
4957 		ether_addr[2] = (uint8_t)(mac_addr >> 24);
4958 		ether_addr[3] = (uint8_t)(mac_addr >> 16);
4959 		ether_addr[4] = (uint8_t)(mac_addr >> 8);
4960 		ether_addr[5] = (uint8_t)mac_addr;
4961 		ret = 0;
4962 	}
4963 	return ret;
4964 }
4965 
4966 static int
4967 bnx_get_eaddr_nvram(struct bnx_softc *sc, uint8_t ether_addr[])
4968 {
4969 	int mac_offset = BGE_EE_MAC_OFFSET;
4970 
4971 	if (BNX_IS_5717_PLUS(sc)) {
4972 		int f;
4973 
4974 		f = pci_get_function(sc->bnx_dev);
4975 		if (f & 1)
4976 			mac_offset = BGE_EE_MAC_OFFSET_5717;
4977 		if (f > 1)
4978 			mac_offset += BGE_EE_MAC_OFFSET_5717_OFF;
4979 	}
4980 
4981 	return bnx_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN);
4982 }
4983 
4984 static int
4985 bnx_get_eaddr_eeprom(struct bnx_softc *sc, uint8_t ether_addr[])
4986 {
4987 	if (sc->bnx_flags & BNX_FLAG_NO_EEPROM)
4988 		return 1;
4989 
4990 	return bnx_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2,
4991 			       ETHER_ADDR_LEN);
4992 }
4993 
4994 static int
4995 bnx_get_eaddr(struct bnx_softc *sc, uint8_t eaddr[])
4996 {
4997 	static const bnx_eaddr_fcn_t bnx_eaddr_funcs[] = {
4998 		/* NOTE: Order is critical */
4999 		bnx_get_eaddr_mem,
5000 		bnx_get_eaddr_nvram,
5001 		bnx_get_eaddr_eeprom,
5002 		NULL
5003 	};
5004 	const bnx_eaddr_fcn_t *func;
5005 
5006 	for (func = bnx_eaddr_funcs; *func != NULL; ++func) {
5007 		if ((*func)(sc, eaddr) == 0)
5008 			break;
5009 	}
5010 	return (*func == NULL ? ENXIO : 0);
5011 }
5012 
5013 /*
5014  * NOTE: 'm' is not freed upon failure
5015  */
5016 static struct mbuf *
5017 bnx_defrag_shortdma(struct mbuf *m)
5018 {
5019 	struct mbuf *n;
5020 	int found;
5021 
5022 	/*
5023 	 * If device receive two back-to-back send BDs with less than
5024 	 * or equal to 8 total bytes then the device may hang.  The two
5025 	 * back-to-back send BDs must in the same frame for this failure
5026 	 * to occur.  Scan mbuf chains and see whether two back-to-back
5027 	 * send BDs are there.  If this is the case, allocate new mbuf
5028 	 * and copy the frame to workaround the silicon bug.
5029 	 */
5030 	for (n = m, found = 0; n != NULL; n = n->m_next) {
5031 		if (n->m_len < 8) {
5032 			found++;
5033 			if (found > 1)
5034 				break;
5035 			continue;
5036 		}
5037 		found = 0;
5038 	}
5039 
5040 	if (found > 1)
5041 		n = m_defrag(m, M_NOWAIT);
5042 	else
5043 		n = m;
5044 	return n;
5045 }
5046 
5047 static void
5048 bnx_stop_block(struct bnx_softc *sc, bus_size_t reg, uint32_t bit)
5049 {
5050 	int i;
5051 
5052 	BNX_CLRBIT(sc, reg, bit);
5053 	for (i = 0; i < BNX_TIMEOUT; i++) {
5054 		if ((CSR_READ_4(sc, reg) & bit) == 0)
5055 			return;
5056 		DELAY(100);
5057 	}
5058 }
5059 
5060 static void
5061 bnx_link_poll(struct bnx_softc *sc)
5062 {
5063 	uint32_t status;
5064 
5065 	status = CSR_READ_4(sc, BGE_MAC_STS);
5066 	if ((status & sc->bnx_link_chg) || sc->bnx_link_evt) {
5067 		sc->bnx_link_evt = 0;
5068 		sc->bnx_link_upd(sc, status);
5069 	}
5070 }
5071 
5072 static void
5073 bnx_enable_msi(struct bnx_softc *sc, boolean_t is_msix)
5074 {
5075 	uint32_t msi_mode;
5076 
5077 	msi_mode = CSR_READ_4(sc, BGE_MSI_MODE);
5078 	msi_mode |= BGE_MSIMODE_ENABLE;
5079 	/*
5080 	 * NOTE:
5081 	 * 5718-PG105-R says that "one shot" mode does not work
5082 	 * if MSI is used, however, it obviously works.
5083 	 */
5084 	msi_mode &= ~BGE_MSIMODE_ONESHOT_DISABLE;
5085 	if (is_msix)
5086 		msi_mode |= BGE_MSIMODE_MSIX_MULTIMODE;
5087 	else
5088 		msi_mode &= ~BGE_MSIMODE_MSIX_MULTIMODE;
5089 	CSR_WRITE_4(sc, BGE_MSI_MODE, msi_mode);
5090 }
5091 
5092 static uint32_t
5093 bnx_dma_swap_options(struct bnx_softc *sc)
5094 {
5095 	uint32_t dma_options;
5096 
5097 	dma_options = BGE_MODECTL_WORDSWAP_NONFRAME |
5098 	    BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA;
5099 #if BYTE_ORDER == BIG_ENDIAN
5100 	dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME;
5101 #endif
5102 	return dma_options;
5103 }
5104 
5105 static int
5106 bnx_setup_tso(struct bnx_tx_ring *txr, struct mbuf **mp,
5107     uint16_t *mss0, uint16_t *flags0)
5108 {
5109 	struct mbuf *m;
5110 	struct ip *ip;
5111 	struct tcphdr *th;
5112 	int thoff, iphlen, hoff, hlen;
5113 	uint16_t flags, mss;
5114 
5115 	m = *mp;
5116 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5117 
5118 	hoff = m->m_pkthdr.csum_lhlen;
5119 	iphlen = m->m_pkthdr.csum_iphlen;
5120 	thoff = m->m_pkthdr.csum_thlen;
5121 
5122 	KASSERT(hoff > 0, ("invalid ether header len"));
5123 	KASSERT(iphlen > 0, ("invalid ip header len"));
5124 	KASSERT(thoff > 0, ("invalid tcp header len"));
5125 
5126 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5127 		m = m_pullup(m, hoff + iphlen + thoff);
5128 		if (m == NULL) {
5129 			*mp = NULL;
5130 			return ENOBUFS;
5131 		}
5132 		*mp = m;
5133 	}
5134 	ip = mtodoff(m, struct ip *, hoff);
5135 	th = mtodoff(m, struct tcphdr *, hoff + iphlen);
5136 
5137 	mss = m->m_pkthdr.tso_segsz;
5138 	flags = BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA;
5139 
5140 	ip->ip_len = htons(mss + iphlen + thoff);
5141 	th->th_sum = 0;
5142 
5143 	hlen = (iphlen + thoff) >> 2;
5144 	mss |= ((hlen & 0x3) << 14);
5145 	flags |= ((hlen & 0xf8) << 7) | ((hlen & 0x4) << 2);
5146 
5147 	*mss0 = mss;
5148 	*flags0 = flags;
5149 
5150 	return 0;
5151 }
5152 
5153 static int
5154 bnx_create_tx_ring(struct bnx_tx_ring *txr)
5155 {
5156 	bus_size_t txmaxsz, txmaxsegsz;
5157 	int i, error;
5158 
5159 	lwkt_serialize_init(&txr->bnx_tx_serialize);
5160 
5161 	/*
5162 	 * Create DMA tag and maps for TX mbufs.
5163 	 */
5164 	if (txr->bnx_sc->bnx_flags & BNX_FLAG_TSO)
5165 		txmaxsz = IP_MAXPACKET + sizeof(struct ether_vlan_header);
5166 	else
5167 		txmaxsz = BNX_JUMBO_FRAMELEN;
5168 	if (txr->bnx_sc->bnx_asicrev == BGE_ASICREV_BCM57766)
5169 		txmaxsegsz = MCLBYTES;
5170 	else
5171 		txmaxsegsz = PAGE_SIZE;
5172 	error = bus_dma_tag_create(txr->bnx_sc->bnx_cdata.bnx_parent_tag,
5173 	    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
5174 	    txmaxsz, BNX_NSEG_NEW, txmaxsegsz,
5175 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5176 	    &txr->bnx_tx_mtag);
5177 	if (error) {
5178 		device_printf(txr->bnx_sc->bnx_dev,
5179 		    "could not create TX mbuf DMA tag\n");
5180 		return error;
5181 	}
5182 
5183 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
5184 		error = bus_dmamap_create(txr->bnx_tx_mtag,
5185 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5186 		    &txr->bnx_tx_buf[i].bnx_tx_dmamap);
5187 		if (error) {
5188 			int j;
5189 
5190 			for (j = 0; j < i; ++j) {
5191 				bus_dmamap_destroy(txr->bnx_tx_mtag,
5192 				    txr->bnx_tx_buf[j].bnx_tx_dmamap);
5193 			}
5194 			bus_dma_tag_destroy(txr->bnx_tx_mtag);
5195 			txr->bnx_tx_mtag = NULL;
5196 
5197 			device_printf(txr->bnx_sc->bnx_dev,
5198 			    "could not create TX mbuf DMA map\n");
5199 			return error;
5200 		}
5201 	}
5202 
5203 	/*
5204 	 * Create DMA stuffs for TX ring.
5205 	 */
5206 	error = bnx_dma_block_alloc(txr->bnx_sc, BGE_TX_RING_SZ,
5207 	    &txr->bnx_tx_ring_tag,
5208 	    &txr->bnx_tx_ring_map,
5209 	    (void *)&txr->bnx_tx_ring,
5210 	    &txr->bnx_tx_ring_paddr);
5211 	if (error) {
5212 		device_printf(txr->bnx_sc->bnx_dev,
5213 		    "could not create TX ring\n");
5214 		return error;
5215 	}
5216 
5217 	txr->bnx_tx_flags |= BNX_TX_FLAG_SHORTDMA;
5218 	txr->bnx_tx_wreg = BNX_TX_WREG_NSEGS;
5219 
5220 	return 0;
5221 }
5222 
5223 static void
5224 bnx_destroy_tx_ring(struct bnx_tx_ring *txr)
5225 {
5226 	/* Destroy TX mbuf DMA stuffs. */
5227 	if (txr->bnx_tx_mtag != NULL) {
5228 		int i;
5229 
5230 		for (i = 0; i < BGE_TX_RING_CNT; i++) {
5231 			KKASSERT(txr->bnx_tx_buf[i].bnx_tx_mbuf == NULL);
5232 			bus_dmamap_destroy(txr->bnx_tx_mtag,
5233 			    txr->bnx_tx_buf[i].bnx_tx_dmamap);
5234 		}
5235 		bus_dma_tag_destroy(txr->bnx_tx_mtag);
5236 	}
5237 
5238 	/* Destroy TX ring */
5239 	bnx_dma_block_free(txr->bnx_tx_ring_tag,
5240 	    txr->bnx_tx_ring_map, txr->bnx_tx_ring);
5241 }
5242 
5243 static int
5244 bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS)
5245 {
5246 	struct bnx_softc *sc = (void *)arg1;
5247 	struct ifnet *ifp = &sc->arpcom.ac_if;
5248 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5249 	int error, defrag, i;
5250 
5251 	if (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG)
5252 		defrag = 1;
5253 	else
5254 		defrag = 0;
5255 
5256 	error = sysctl_handle_int(oidp, &defrag, 0, req);
5257 	if (error || req->newptr == NULL)
5258 		return error;
5259 
5260 	ifnet_serialize_all(ifp);
5261 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
5262 		txr = &sc->bnx_tx_ring[i];
5263 		if (defrag)
5264 			txr->bnx_tx_flags |= BNX_TX_FLAG_FORCE_DEFRAG;
5265 		else
5266 			txr->bnx_tx_flags &= ~BNX_TX_FLAG_FORCE_DEFRAG;
5267 	}
5268 	ifnet_deserialize_all(ifp);
5269 
5270 	return 0;
5271 }
5272 
5273 static int
5274 bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS)
5275 {
5276 	struct bnx_softc *sc = (void *)arg1;
5277 	struct ifnet *ifp = &sc->arpcom.ac_if;
5278 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5279 	int error, tx_wreg, i;
5280 
5281 	tx_wreg = txr->bnx_tx_wreg;
5282 	error = sysctl_handle_int(oidp, &tx_wreg, 0, req);
5283 	if (error || req->newptr == NULL)
5284 		return error;
5285 
5286 	ifnet_serialize_all(ifp);
5287 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5288 		sc->bnx_tx_ring[i].bnx_tx_wreg = tx_wreg;
5289 	ifnet_deserialize_all(ifp);
5290 
5291 	return 0;
5292 }
5293 
5294 static int
5295 bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5296 {
5297 	int error;
5298 
5299 	lwkt_serialize_init(&ret->bnx_rx_ret_serialize);
5300 
5301 	/*
5302 	 * Create DMA stuffs for RX return ring.
5303 	 */
5304 	error = bnx_dma_block_alloc(ret->bnx_sc,
5305 	    BGE_RX_RTN_RING_SZ(BNX_RETURN_RING_CNT),
5306 	    &ret->bnx_rx_ret_ring_tag,
5307 	    &ret->bnx_rx_ret_ring_map,
5308 	    (void *)&ret->bnx_rx_ret_ring,
5309 	    &ret->bnx_rx_ret_ring_paddr);
5310 	if (error) {
5311 		device_printf(ret->bnx_sc->bnx_dev,
5312 		    "could not create RX ret ring\n");
5313 		return error;
5314 	}
5315 
5316 	/* Shadow standard ring's RX mbuf DMA tag */
5317 	ret->bnx_rx_mtag = ret->bnx_std->bnx_rx_mtag;
5318 
5319 	/*
5320 	 * Create tmp DMA map for RX mbufs.
5321 	 */
5322 	error = bus_dmamap_create(ret->bnx_rx_mtag, BUS_DMA_WAITOK,
5323 	    &ret->bnx_rx_tmpmap);
5324 	if (error) {
5325 		device_printf(ret->bnx_sc->bnx_dev,
5326 		    "could not create tmp RX mbuf DMA map\n");
5327 		ret->bnx_rx_mtag = NULL;
5328 		return error;
5329 	}
5330 	return 0;
5331 }
5332 
5333 static void
5334 bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5335 {
5336 	/* Destroy tmp RX mbuf DMA map */
5337 	if (ret->bnx_rx_mtag != NULL)
5338 		bus_dmamap_destroy(ret->bnx_rx_mtag, ret->bnx_rx_tmpmap);
5339 
5340 	/* Destroy RX return ring */
5341 	bnx_dma_block_free(ret->bnx_rx_ret_ring_tag,
5342 	    ret->bnx_rx_ret_ring_map, ret->bnx_rx_ret_ring);
5343 }
5344 
5345 static int
5346 bnx_alloc_intr(struct bnx_softc *sc)
5347 {
5348 	struct bnx_intr_data *intr;
5349 	u_int intr_flags;
5350 	int error;
5351 
5352 	if (sc->bnx_intr_cnt > 1) {
5353 		error = bnx_alloc_msix(sc);
5354 		if (error)
5355 			return error;
5356 		KKASSERT(sc->bnx_intr_type == PCI_INTR_TYPE_MSIX);
5357 		return 0;
5358 	}
5359 
5360 	KKASSERT(sc->bnx_intr_cnt == 1);
5361 
5362 	intr = &sc->bnx_intr_data[0];
5363 	intr->bnx_ret = &sc->bnx_rx_ret_ring[0];
5364 	intr->bnx_txr = &sc->bnx_tx_ring[0];
5365 	intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5366 	intr->bnx_intr_check = bnx_check_intr_rxtx;
5367 	intr->bnx_saved_status_tag = &intr->bnx_ret->bnx_saved_status_tag;
5368 
5369 	sc->bnx_intr_type = pci_alloc_1intr(sc->bnx_dev, bnx_msi_enable,
5370 	    &intr->bnx_intr_rid, &intr_flags);
5371 
5372 	intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev, SYS_RES_IRQ,
5373 	    &intr->bnx_intr_rid, intr_flags);
5374 	if (intr->bnx_intr_res == NULL) {
5375 		device_printf(sc->bnx_dev, "could not alloc interrupt\n");
5376 		return ENXIO;
5377 	}
5378 
5379 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI) {
5380 		bnx_enable_msi(sc, FALSE);
5381 		intr->bnx_intr_func = bnx_msi;
5382 		if (bootverbose)
5383 			device_printf(sc->bnx_dev, "oneshot MSI\n");
5384 	} else {
5385 		intr->bnx_intr_func = bnx_intr_legacy;
5386 	}
5387 	intr->bnx_intr_arg = sc;
5388 	intr->bnx_intr_cpuid = rman_get_cpuid(intr->bnx_intr_res);
5389 
5390 	intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5391 
5392 	return 0;
5393 }
5394 
5395 static int
5396 bnx_setup_intr(struct bnx_softc *sc)
5397 {
5398 	int error, i;
5399 
5400 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5401 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5402 
5403 		error = bus_setup_intr_descr(sc->bnx_dev, intr->bnx_intr_res,
5404 		    INTR_MPSAFE, intr->bnx_intr_func, intr->bnx_intr_arg,
5405 		    &intr->bnx_intr_hand, intr->bnx_intr_serialize,
5406 		    intr->bnx_intr_desc);
5407 		if (error) {
5408 			device_printf(sc->bnx_dev,
5409 			    "could not set up %dth intr\n", i);
5410 			bnx_teardown_intr(sc, i);
5411 			return error;
5412 		}
5413 	}
5414 	return 0;
5415 }
5416 
5417 static void
5418 bnx_teardown_intr(struct bnx_softc *sc, int cnt)
5419 {
5420 	int i;
5421 
5422 	for (i = 0; i < cnt; ++i) {
5423 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5424 
5425 		bus_teardown_intr(sc->bnx_dev, intr->bnx_intr_res,
5426 		    intr->bnx_intr_hand);
5427 	}
5428 }
5429 
5430 static void
5431 bnx_free_intr(struct bnx_softc *sc)
5432 {
5433 	if (sc->bnx_intr_type != PCI_INTR_TYPE_MSIX) {
5434 		struct bnx_intr_data *intr;
5435 
5436 		KKASSERT(sc->bnx_intr_cnt <= 1);
5437 		intr = &sc->bnx_intr_data[0];
5438 
5439 		if (intr->bnx_intr_res != NULL) {
5440 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
5441 			    intr->bnx_intr_rid, intr->bnx_intr_res);
5442 		}
5443 		if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI)
5444 			pci_release_msi(sc->bnx_dev);
5445 	} else {
5446 		bnx_free_msix(sc, TRUE);
5447 	}
5448 }
5449 
5450 static void
5451 bnx_setup_serialize(struct bnx_softc *sc)
5452 {
5453 	int i, j;
5454 
5455 	/*
5456 	 * Allocate serializer array
5457 	 */
5458 
5459 	/* Main + RX STD + TX + RX RET */
5460 	sc->bnx_serialize_cnt = 1 + 1 + sc->bnx_tx_ringcnt + sc->bnx_rx_retcnt;
5461 
5462 	sc->bnx_serialize =
5463 	    kmalloc(sc->bnx_serialize_cnt * sizeof(struct lwkt_serialize *),
5464 	        M_DEVBUF, M_WAITOK | M_ZERO);
5465 
5466 	/*
5467 	 * Setup serializers
5468 	 *
5469 	 * NOTE: Order is critical
5470 	 */
5471 
5472 	i = 0;
5473 
5474 	KKASSERT(i < sc->bnx_serialize_cnt);
5475 	sc->bnx_serialize[i++] = &sc->bnx_main_serialize;
5476 
5477 	KKASSERT(i < sc->bnx_serialize_cnt);
5478 	sc->bnx_serialize[i++] = &sc->bnx_rx_std_ring.bnx_rx_std_serialize;
5479 
5480 	for (j = 0; j < sc->bnx_rx_retcnt; ++j) {
5481 		KKASSERT(i < sc->bnx_serialize_cnt);
5482 		sc->bnx_serialize[i++] =
5483 		    &sc->bnx_rx_ret_ring[j].bnx_rx_ret_serialize;
5484 	}
5485 
5486 	for (j = 0; j < sc->bnx_tx_ringcnt; ++j) {
5487 		KKASSERT(i < sc->bnx_serialize_cnt);
5488 		sc->bnx_serialize[i++] =
5489 		    &sc->bnx_tx_ring[j].bnx_tx_serialize;
5490 	}
5491 
5492 	KKASSERT(i == sc->bnx_serialize_cnt);
5493 }
5494 
5495 static void
5496 bnx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
5497 {
5498 	struct bnx_softc *sc = ifp->if_softc;
5499 
5500 	ifnet_serialize_array_enter(sc->bnx_serialize,
5501 	    sc->bnx_serialize_cnt, slz);
5502 }
5503 
5504 static void
5505 bnx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5506 {
5507 	struct bnx_softc *sc = ifp->if_softc;
5508 
5509 	ifnet_serialize_array_exit(sc->bnx_serialize,
5510 	    sc->bnx_serialize_cnt, slz);
5511 }
5512 
5513 static int
5514 bnx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5515 {
5516 	struct bnx_softc *sc = ifp->if_softc;
5517 
5518 	return ifnet_serialize_array_try(sc->bnx_serialize,
5519 	    sc->bnx_serialize_cnt, slz);
5520 }
5521 
5522 #ifdef INVARIANTS
5523 
5524 static void
5525 bnx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
5526     boolean_t serialized)
5527 {
5528 	struct bnx_softc *sc = ifp->if_softc;
5529 
5530 	ifnet_serialize_array_assert(sc->bnx_serialize, sc->bnx_serialize_cnt,
5531 	    slz, serialized);
5532 }
5533 
5534 #endif	/* INVARIANTS */
5535 
5536 static void
5537 bnx_set_tick_cpuid(struct bnx_softc *sc, boolean_t polling)
5538 {
5539 	if (polling)
5540 		sc->bnx_tick_cpuid = 0; /* XXX */
5541 	else
5542 		sc->bnx_tick_cpuid = sc->bnx_intr_data[0].bnx_intr_cpuid;
5543 }
5544 
5545 static void
5546 bnx_rx_std_refill_ithread(void *xstd)
5547 {
5548 	struct bnx_rx_std_ring *std = xstd;
5549 	struct globaldata *gd = mycpu;
5550 
5551 	crit_enter_gd(gd);
5552 
5553 	while (!std->bnx_rx_std_stop) {
5554 		if (std->bnx_rx_std_refill) {
5555 			lwkt_serialize_handler_call(
5556 			    &std->bnx_rx_std_serialize,
5557 			    bnx_rx_std_refill, std, NULL);
5558 		}
5559 
5560 		crit_exit_gd(gd);
5561 		crit_enter_gd(gd);
5562 
5563 		atomic_poll_release_int(&std->bnx_rx_std_running);
5564 		cpu_mfence();
5565 
5566 		if (!std->bnx_rx_std_refill && !std->bnx_rx_std_stop) {
5567 			lwkt_deschedule_self(gd->gd_curthread);
5568 			lwkt_switch();
5569 		}
5570 	}
5571 
5572 	crit_exit_gd(gd);
5573 
5574 	wakeup(std);
5575 
5576 	lwkt_exit();
5577 }
5578 
5579 static void
5580 bnx_rx_std_refill(void *xstd, void *frame __unused)
5581 {
5582 	struct bnx_rx_std_ring *std = xstd;
5583 	int cnt, refill_mask;
5584 
5585 again:
5586 	cnt = 0;
5587 
5588 	cpu_lfence();
5589 	refill_mask = std->bnx_rx_std_refill;
5590 	atomic_clear_int(&std->bnx_rx_std_refill, refill_mask);
5591 
5592 	while (refill_mask) {
5593 		uint16_t check_idx = std->bnx_rx_std;
5594 		int ret_idx;
5595 
5596 		ret_idx = bsfl(refill_mask);
5597 		for (;;) {
5598 			struct bnx_rx_buf *rb;
5599 			int refilled;
5600 
5601 			BNX_INC(check_idx, BGE_STD_RX_RING_CNT);
5602 			rb = &std->bnx_rx_std_buf[check_idx];
5603 			refilled = rb->bnx_rx_refilled;
5604 			cpu_lfence();
5605 			if (refilled) {
5606 				bnx_setup_rxdesc_std(std, check_idx);
5607 				std->bnx_rx_std = check_idx;
5608 				++cnt;
5609 				if (cnt >= 8) {
5610 					atomic_subtract_int(
5611 					    &std->bnx_rx_std_used, cnt);
5612 					bnx_writembx(std->bnx_sc,
5613 					    BGE_MBX_RX_STD_PROD_LO,
5614 					    std->bnx_rx_std);
5615 					cnt = 0;
5616 				}
5617 			} else {
5618 				break;
5619 			}
5620 		}
5621 		refill_mask &= ~(1 << ret_idx);
5622 	}
5623 
5624 	if (cnt) {
5625 		atomic_subtract_int(&std->bnx_rx_std_used, cnt);
5626 		bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO,
5627 		    std->bnx_rx_std);
5628 	}
5629 
5630 	if (std->bnx_rx_std_refill)
5631 		goto again;
5632 
5633 	atomic_poll_release_int(&std->bnx_rx_std_running);
5634 	cpu_mfence();
5635 
5636 	if (std->bnx_rx_std_refill)
5637 		goto again;
5638 }
5639 
5640 static int
5641 bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS)
5642 {
5643 	struct bnx_softc *sc = (void *)arg1;
5644 	struct ifnet *ifp = &sc->arpcom.ac_if;
5645 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
5646 	int error, cntmax, i;
5647 
5648 	cntmax = ret->bnx_rx_cntmax;
5649 	error = sysctl_handle_int(oidp, &cntmax, 0, req);
5650 	if (error || req->newptr == NULL)
5651 		return error;
5652 
5653 	ifnet_serialize_all(ifp);
5654 
5655 	if ((cntmax * sc->bnx_rx_retcnt) >= BGE_STD_RX_RING_CNT / 2) {
5656 		error = EINVAL;
5657 		goto back;
5658 	}
5659 
5660 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5661 		sc->bnx_rx_ret_ring[i].bnx_rx_cntmax = cntmax;
5662 	error = 0;
5663 
5664 back:
5665 	ifnet_deserialize_all(ifp);
5666 
5667 	return error;
5668 }
5669 
5670 static void
5671 bnx_init_rss(struct bnx_softc *sc)
5672 {
5673 	uint8_t key[BGE_RSS_KEYREG_CNT * BGE_RSS_KEYREG_SIZE];
5674 	int i, j, r;
5675 
5676 	KKASSERT(BNX_RSS_ENABLED(sc));
5677 
5678 	/*
5679 	 * Configure RSS redirect table.
5680 	 */
5681 	if_ringmap_rdrtable(sc->bnx_rx_rmap, sc->bnx_rdr_table,
5682 	    BNX_RDRTABLE_SIZE);
5683 	r = 0;
5684 	for (j = 0; j < BGE_RSS_INDIR_TBL_CNT; ++j) {
5685 		uint32_t tbl = 0;
5686 
5687 		for (i = 0; i < BGE_RSS_INDIR_TBLENT_CNT; ++i) {
5688 			uint32_t q;
5689 
5690 			q = sc->bnx_rdr_table[r];
5691 			tbl |= q << (BGE_RSS_INDIR_TBLENT_SHIFT *
5692 			    (BGE_RSS_INDIR_TBLENT_CNT - i - 1));
5693 			++r;
5694 		}
5695 
5696 		BNX_RSS_DPRINTF(sc, 1, "tbl%d %08x\n", j, tbl);
5697 		CSR_WRITE_4(sc, BGE_RSS_INDIR_TBL(j), tbl);
5698 	}
5699 
5700 	toeplitz_get_key(key, sizeof(key));
5701 	for (i = 0; i < BGE_RSS_KEYREG_CNT; ++i) {
5702 		uint32_t keyreg;
5703 
5704 		keyreg = BGE_RSS_KEYREG_VAL(key, i);
5705 
5706 		BNX_RSS_DPRINTF(sc, 1, "key%d %08x\n", i, keyreg);
5707 		CSR_WRITE_4(sc, BGE_RSS_KEYREG(i), keyreg);
5708 	}
5709 }
5710 
5711 static void
5712 bnx_setup_ring_cnt(struct bnx_softc *sc)
5713 {
5714 	int msix_enable, msix_cnt, msix_ring, ring_max, ring_cnt;
5715 
5716 	/* One RX ring. */
5717 	sc->bnx_rx_rmap = if_ringmap_alloc(sc->bnx_dev, 1, 1);
5718 
5719 	if (netisr_ncpus == 1)
5720 		goto skip_rx;
5721 
5722 	msix_enable = device_getenv_int(sc->bnx_dev, "msix.enable",
5723 	    bnx_msix_enable);
5724 	if (!msix_enable)
5725 		goto skip_rx;
5726 
5727 	/*
5728 	 * One MSI-X vector is dedicated to status or single TX queue,
5729 	 * so make sure that there are enough MSI-X vectors.
5730 	 */
5731 	msix_cnt = pci_msix_count(sc->bnx_dev);
5732 	if (msix_cnt <= 1)
5733 		goto skip_rx;
5734 	if (bootverbose)
5735 		device_printf(sc->bnx_dev, "MSI-X count %d\n", msix_cnt);
5736 	msix_ring = msix_cnt - 1;
5737 
5738 	/*
5739 	 * Setup RX ring count
5740 	 */
5741 	ring_max = BNX_RX_RING_MAX;
5742 	if (ring_max > msix_ring)
5743 		ring_max = msix_ring;
5744 	ring_cnt = device_getenv_int(sc->bnx_dev, "rx_rings", bnx_rx_rings);
5745 
5746 	if_ringmap_free(sc->bnx_rx_rmap);
5747 	sc->bnx_rx_rmap = if_ringmap_alloc(sc->bnx_dev, ring_cnt, ring_max);
5748 
5749 skip_rx:
5750 	sc->bnx_rx_retcnt = if_ringmap_count(sc->bnx_rx_rmap);
5751 
5752 	/*
5753 	 * Setup TX ring count
5754 	 *
5755 	 * Currently only BCM5719 and BCM5720 support multiple TX rings
5756 	 * and the TX ring count must be less than the RX ring count.
5757 	 */
5758 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
5759 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
5760 		ring_max = BNX_TX_RING_MAX;
5761 		if (ring_max > sc->bnx_rx_retcnt)
5762 			ring_max = sc->bnx_rx_retcnt;
5763 		ring_cnt = device_getenv_int(sc->bnx_dev, "tx_rings",
5764 		    bnx_tx_rings);
5765 	} else {
5766 		ring_max = 1;
5767 		ring_cnt = 1;
5768 	}
5769 	sc->bnx_tx_rmap = if_ringmap_alloc(sc->bnx_dev, ring_cnt, ring_max);
5770 	if_ringmap_align(sc->bnx_dev, sc->bnx_rx_rmap, sc->bnx_tx_rmap);
5771 
5772 	sc->bnx_tx_ringcnt = if_ringmap_count(sc->bnx_tx_rmap);
5773 	KASSERT(sc->bnx_tx_ringcnt <= sc->bnx_rx_retcnt,
5774 	    ("invalid TX ring count %d and RX ring count %d",
5775 	     sc->bnx_tx_ringcnt, sc->bnx_rx_retcnt));
5776 
5777 	/*
5778 	 * Setup interrupt count.
5779 	 */
5780 	if (sc->bnx_rx_retcnt == 1) {
5781 		sc->bnx_intr_cnt = 1;
5782 	} else {
5783 		/*
5784 		 * We need one extra MSI-X vector for link status or
5785 		 * TX ring (if only one TX ring is enabled).
5786 		 */
5787 		sc->bnx_intr_cnt = sc->bnx_rx_retcnt + 1;
5788 	}
5789 	KKASSERT(sc->bnx_intr_cnt <= BNX_INTR_MAX);
5790 
5791 	if (bootverbose) {
5792 		device_printf(sc->bnx_dev, "intr count %d, "
5793 		    "RX ring %d, TX ring %d\n", sc->bnx_intr_cnt,
5794 		    sc->bnx_rx_retcnt, sc->bnx_tx_ringcnt);
5795 	}
5796 }
5797 
5798 static int
5799 bnx_alloc_msix(struct bnx_softc *sc)
5800 {
5801 	struct bnx_intr_data *intr;
5802 	boolean_t setup = FALSE;
5803 	int error, i;
5804 
5805 	KKASSERT(sc->bnx_intr_cnt > 1);
5806 	KKASSERT(sc->bnx_intr_cnt == sc->bnx_rx_retcnt + 1);
5807 
5808 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
5809 		/*
5810 		 * Link status
5811 		 */
5812 		intr = &sc->bnx_intr_data[0];
5813 
5814 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5815 		intr->bnx_saved_status_tag = &sc->bnx_saved_status_tag;
5816 
5817 		intr->bnx_intr_func = bnx_msix_status;
5818 		intr->bnx_intr_arg = sc;
5819 		intr->bnx_intr_cpuid = 0; /* XXX */
5820 
5821 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5822 		    "%s sts", device_get_nameunit(sc->bnx_dev));
5823 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5824 
5825 		/*
5826 		 * RX/TX rings
5827 		 */
5828 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
5829 			int idx = i - 1;
5830 
5831 			intr = &sc->bnx_intr_data[i];
5832 
5833 			KKASSERT(idx < sc->bnx_rx_retcnt);
5834 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
5835 			if (idx < sc->bnx_tx_ringcnt) {
5836 				intr->bnx_txr = &sc->bnx_tx_ring[idx];
5837 				intr->bnx_ret->bnx_txr = intr->bnx_txr;
5838 			}
5839 
5840 			intr->bnx_intr_serialize =
5841 			    &intr->bnx_ret->bnx_rx_ret_serialize;
5842 			intr->bnx_saved_status_tag =
5843 			    &intr->bnx_ret->bnx_saved_status_tag;
5844 
5845 			intr->bnx_intr_arg = intr->bnx_ret;
5846 			intr->bnx_intr_cpuid =
5847 			    if_ringmap_cpumap(sc->bnx_rx_rmap, idx);
5848 			KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5849 
5850 			if (intr->bnx_txr == NULL) {
5851 				intr->bnx_intr_check = bnx_check_intr_rx;
5852 				intr->bnx_intr_func = bnx_msix_rx;
5853 				ksnprintf(intr->bnx_intr_desc0,
5854 				    sizeof(intr->bnx_intr_desc0), "%s rx%d",
5855 				    device_get_nameunit(sc->bnx_dev), idx);
5856 			} else {
5857 #ifdef INVARIANTS
5858 				int tx_cpuid;
5859 #endif
5860 
5861 				intr->bnx_intr_check = bnx_check_intr_rxtx;
5862 				intr->bnx_intr_func = bnx_msix_rxtx;
5863 				ksnprintf(intr->bnx_intr_desc0,
5864 				    sizeof(intr->bnx_intr_desc0), "%s rxtx%d",
5865 				    device_get_nameunit(sc->bnx_dev), idx);
5866 
5867 #ifdef INVARIANTS
5868 				tx_cpuid = if_ringmap_cpumap(sc->bnx_tx_rmap,
5869 				    idx);
5870 				KASSERT(intr->bnx_intr_cpuid == tx_cpuid,
5871 				    ("RX intr cpu%d, TX intr cpu%d, mismatch",
5872 				     intr->bnx_intr_cpuid, tx_cpuid));
5873 #endif
5874 				intr->bnx_txr->bnx_tx_cpuid =
5875 				    intr->bnx_intr_cpuid;
5876 			}
5877 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
5878 
5879 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
5880 		}
5881 	} else {
5882 		/*
5883 		 * TX ring0 and link status
5884 		 */
5885 		intr = &sc->bnx_intr_data[0];
5886 
5887 		intr->bnx_txr = &sc->bnx_tx_ring[0];
5888 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5889 		intr->bnx_intr_check = bnx_check_intr_tx;
5890 		intr->bnx_saved_status_tag =
5891 		    &intr->bnx_txr->bnx_saved_status_tag;
5892 
5893 		intr->bnx_intr_func = bnx_msix_tx_status;
5894 		intr->bnx_intr_arg = intr->bnx_txr;
5895 		intr->bnx_intr_cpuid = if_ringmap_cpumap(sc->bnx_tx_rmap, 0);
5896 		KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5897 
5898 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5899 		    "%s ststx", device_get_nameunit(sc->bnx_dev));
5900 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5901 
5902 		intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5903 
5904 		/*
5905 		 * RX rings
5906 		 */
5907 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
5908 			int idx = i - 1;
5909 
5910 			intr = &sc->bnx_intr_data[i];
5911 
5912 			KKASSERT(idx < sc->bnx_rx_retcnt);
5913 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
5914 			intr->bnx_intr_serialize =
5915 			    &intr->bnx_ret->bnx_rx_ret_serialize;
5916 			intr->bnx_intr_check = bnx_check_intr_rx;
5917 			intr->bnx_saved_status_tag =
5918 			    &intr->bnx_ret->bnx_saved_status_tag;
5919 
5920 			intr->bnx_intr_func = bnx_msix_rx;
5921 			intr->bnx_intr_arg = intr->bnx_ret;
5922 			intr->bnx_intr_cpuid =
5923 			    if_ringmap_cpumap(sc->bnx_rx_rmap, idx);
5924 			KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5925 
5926 			ksnprintf(intr->bnx_intr_desc0,
5927 			    sizeof(intr->bnx_intr_desc0), "%s rx%d",
5928 			    device_get_nameunit(sc->bnx_dev), idx);
5929 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
5930 
5931 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
5932 		}
5933 	}
5934 
5935 	if (BNX_IS_5717_PLUS(sc)) {
5936 		sc->bnx_msix_mem_rid = PCIR_BAR(4);
5937 	} else {
5938 		if (sc->bnx_res2 == NULL)
5939 			sc->bnx_msix_mem_rid = PCIR_BAR(2);
5940 	}
5941 	if (sc->bnx_msix_mem_rid != 0) {
5942 		sc->bnx_msix_mem_res = bus_alloc_resource_any(sc->bnx_dev,
5943 		    SYS_RES_MEMORY, &sc->bnx_msix_mem_rid, RF_ACTIVE);
5944 		if (sc->bnx_msix_mem_res == NULL) {
5945 			device_printf(sc->bnx_dev,
5946 			    "could not alloc MSI-X table\n");
5947 			return ENXIO;
5948 		}
5949 	}
5950 
5951 	bnx_enable_msi(sc, TRUE);
5952 
5953 	error = pci_setup_msix(sc->bnx_dev);
5954 	if (error) {
5955 		device_printf(sc->bnx_dev, "could not setup MSI-X\n");
5956 		goto back;
5957 	}
5958 	setup = TRUE;
5959 
5960 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5961 		intr = &sc->bnx_intr_data[i];
5962 
5963 		error = pci_alloc_msix_vector(sc->bnx_dev, i,
5964 		    &intr->bnx_intr_rid, intr->bnx_intr_cpuid);
5965 		if (error) {
5966 			device_printf(sc->bnx_dev,
5967 			    "could not alloc MSI-X %d on cpu%d\n",
5968 			    i, intr->bnx_intr_cpuid);
5969 			goto back;
5970 		}
5971 
5972 		intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev,
5973 		    SYS_RES_IRQ, &intr->bnx_intr_rid, RF_ACTIVE);
5974 		if (intr->bnx_intr_res == NULL) {
5975 			device_printf(sc->bnx_dev,
5976 			    "could not alloc MSI-X %d resource\n", i);
5977 			error = ENXIO;
5978 			goto back;
5979 		}
5980 	}
5981 
5982 	pci_enable_msix(sc->bnx_dev);
5983 	sc->bnx_intr_type = PCI_INTR_TYPE_MSIX;
5984 back:
5985 	if (error)
5986 		bnx_free_msix(sc, setup);
5987 	return error;
5988 }
5989 
5990 static void
5991 bnx_free_msix(struct bnx_softc *sc, boolean_t setup)
5992 {
5993 	int i;
5994 
5995 	KKASSERT(sc->bnx_intr_cnt > 1);
5996 
5997 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5998 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5999 
6000 		if (intr->bnx_intr_res != NULL) {
6001 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
6002 			    intr->bnx_intr_rid, intr->bnx_intr_res);
6003 		}
6004 		if (intr->bnx_intr_rid >= 0) {
6005 			pci_release_msix_vector(sc->bnx_dev,
6006 			    intr->bnx_intr_rid);
6007 		}
6008 	}
6009 	if (setup)
6010 		pci_teardown_msix(sc->bnx_dev);
6011 }
6012 
6013 static void
6014 bnx_rx_std_refill_sched_ipi(void *xret)
6015 {
6016 	struct bnx_rx_ret_ring *ret = xret;
6017 	struct bnx_rx_std_ring *std = ret->bnx_std;
6018 	struct globaldata *gd = mycpu;
6019 
6020 	crit_enter_gd(gd);
6021 
6022 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6023 	cpu_sfence();
6024 
6025 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6026 	lwkt_schedule(std->bnx_rx_std_ithread);
6027 
6028 	crit_exit_gd(gd);
6029 }
6030 
6031 static void
6032 bnx_rx_std_refill_stop(void *xstd)
6033 {
6034 	struct bnx_rx_std_ring *std = xstd;
6035 	struct globaldata *gd = mycpu;
6036 
6037 	crit_enter_gd(gd);
6038 
6039 	std->bnx_rx_std_stop = 1;
6040 	cpu_sfence();
6041 
6042 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6043 	lwkt_schedule(std->bnx_rx_std_ithread);
6044 
6045 	crit_exit_gd(gd);
6046 }
6047 
6048 static void
6049 bnx_serialize_skipmain(struct bnx_softc *sc)
6050 {
6051 	lwkt_serialize_array_enter(sc->bnx_serialize,
6052 	    sc->bnx_serialize_cnt, 1);
6053 }
6054 
6055 static void
6056 bnx_deserialize_skipmain(struct bnx_softc *sc)
6057 {
6058 	lwkt_serialize_array_exit(sc->bnx_serialize,
6059 	    sc->bnx_serialize_cnt, 1);
6060 }
6061 
6062 static void
6063 bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *ret,
6064     struct bnx_rx_std_ring *std)
6065 {
6066 	struct globaldata *gd = mycpu;
6067 
6068 	ret->bnx_rx_cnt = 0;
6069 	cpu_sfence();
6070 
6071 	crit_enter_gd(gd);
6072 
6073 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6074 	cpu_sfence();
6075 	if (atomic_poll_acquire_int(&std->bnx_rx_std_running)) {
6076 		if (std->bnx_rx_std_ithread->td_gd == gd) {
6077 			lwkt_schedule(std->bnx_rx_std_ithread);
6078 		} else {
6079 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
6080 			    bnx_rx_std_refill_sched_ipi, ret);
6081 		}
6082 	}
6083 
6084 	crit_exit_gd(gd);
6085 }
6086 
6087 static struct pktinfo *
6088 bnx_rss_info(struct pktinfo *pi, const struct bge_rx_bd *cur_rx)
6089 {
6090 	/* Don't pick up IPv6 packet */
6091 	if (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6)
6092 		return NULL;
6093 
6094 	/* Don't pick up IP packet w/o IP checksum */
6095 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) == 0 ||
6096 	    (cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK))
6097 		return NULL;
6098 
6099 	/* Don't pick up IP packet w/o TCP/UDP checksum */
6100 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) == 0)
6101 		return NULL;
6102 
6103 	/* May be IP fragment */
6104 	if (cur_rx->bge_tcp_udp_csum != 0xffff)
6105 		return NULL;
6106 
6107 	if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_IS_TCP)
6108 		pi->pi_l3proto = IPPROTO_TCP;
6109 	else
6110 		pi->pi_l3proto = IPPROTO_UDP;
6111 	pi->pi_netisr = NETISR_IP;
6112 	pi->pi_flags = 0;
6113 
6114 	return pi;
6115 }
6116 
6117 static void
6118 bnx_sig_pre_reset(struct bnx_softc *sc, int type)
6119 {
6120 	if (type == BNX_RESET_START || type == BNX_RESET_SUSPEND)
6121 		bnx_ape_driver_state_change(sc, type);
6122 }
6123 
6124 static void
6125 bnx_sig_post_reset(struct bnx_softc *sc, int type)
6126 {
6127 	if (type == BNX_RESET_SHUTDOWN)
6128 		bnx_ape_driver_state_change(sc, type);
6129 }
6130 
6131 /*
6132  * Clear all stale locks and select the lock for this driver instance.
6133  */
6134 static void
6135 bnx_ape_lock_init(struct bnx_softc *sc)
6136 {
6137 	uint32_t bit, regbase;
6138 	int i;
6139 
6140 	regbase = BGE_APE_PER_LOCK_GRANT;
6141 
6142 	/* Clear any stale locks. */
6143 	for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) {
6144 		switch (i) {
6145 		case BGE_APE_LOCK_PHY0:
6146 		case BGE_APE_LOCK_PHY1:
6147 		case BGE_APE_LOCK_PHY2:
6148 		case BGE_APE_LOCK_PHY3:
6149 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6150 			break;
6151 
6152 		default:
6153 			if (sc->bnx_func_addr == 0)
6154 				bit = BGE_APE_LOCK_GRANT_DRIVER0;
6155 			else
6156 				bit = 1 << sc->bnx_func_addr;
6157 			break;
6158 		}
6159 		APE_WRITE_4(sc, regbase + 4 * i, bit);
6160 	}
6161 
6162 	/* Select the PHY lock based on the device's function number. */
6163 	switch (sc->bnx_func_addr) {
6164 	case 0:
6165 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY0;
6166 		break;
6167 
6168 	case 1:
6169 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY1;
6170 		break;
6171 
6172 	case 2:
6173 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY2;
6174 		break;
6175 
6176 	case 3:
6177 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY3;
6178 		break;
6179 
6180 	default:
6181 		device_printf(sc->bnx_dev,
6182 		    "PHY lock not supported on this function\n");
6183 		break;
6184 	}
6185 }
6186 
6187 /*
6188  * Check for APE firmware, set flags, and print version info.
6189  */
6190 static void
6191 bnx_ape_read_fw_ver(struct bnx_softc *sc)
6192 {
6193 	const char *fwtype;
6194 	uint32_t apedata, features;
6195 
6196 	/* Check for a valid APE signature in shared memory. */
6197 	apedata = APE_READ_4(sc, BGE_APE_SEG_SIG);
6198 	if (apedata != BGE_APE_SEG_SIG_MAGIC) {
6199 		device_printf(sc->bnx_dev, "no APE signature\n");
6200 		sc->bnx_mfw_flags &= ~BNX_MFW_ON_APE;
6201 		return;
6202 	}
6203 
6204 	/* Check if APE firmware is running. */
6205 	apedata = APE_READ_4(sc, BGE_APE_FW_STATUS);
6206 	if ((apedata & BGE_APE_FW_STATUS_READY) == 0) {
6207 		device_printf(sc->bnx_dev, "APE signature found "
6208 		    "but FW status not ready! 0x%08x\n", apedata);
6209 		return;
6210 	}
6211 
6212 	sc->bnx_mfw_flags |= BNX_MFW_ON_APE;
6213 
6214 	/* Fetch the APE firwmare type and version. */
6215 	apedata = APE_READ_4(sc, BGE_APE_FW_VERSION);
6216 	features = APE_READ_4(sc, BGE_APE_FW_FEATURES);
6217 	if (features & BGE_APE_FW_FEATURE_NCSI) {
6218 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_NCSI;
6219 		fwtype = "NCSI";
6220 	} else if (features & BGE_APE_FW_FEATURE_DASH) {
6221 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_DASH;
6222 		fwtype = "DASH";
6223 	} else {
6224 		fwtype = "UNKN";
6225 	}
6226 
6227 	/* Print the APE firmware version. */
6228 	device_printf(sc->bnx_dev, "APE FW version: %s v%d.%d.%d.%d\n",
6229 	    fwtype,
6230 	    (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT,
6231 	    (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT,
6232 	    (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT,
6233 	    (apedata & BGE_APE_FW_VERSION_BLDMSK));
6234 }
6235 
6236 static int
6237 bnx_ape_lock(struct bnx_softc *sc, int locknum)
6238 {
6239 	uint32_t bit, gnt, req, status;
6240 	int i, off;
6241 
6242 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6243 		return 0;
6244 
6245 	/* Lock request/grant registers have different bases. */
6246 	req = BGE_APE_PER_LOCK_REQ;
6247 	gnt = BGE_APE_PER_LOCK_GRANT;
6248 
6249 	off = 4 * locknum;
6250 
6251 	switch (locknum) {
6252 	case BGE_APE_LOCK_GPIO:
6253 		/* Lock required when using GPIO. */
6254 		if (sc->bnx_func_addr == 0)
6255 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6256 		else
6257 			bit = 1 << sc->bnx_func_addr;
6258 		break;
6259 
6260 	case BGE_APE_LOCK_GRC:
6261 		/* Lock required to reset the device. */
6262 		if (sc->bnx_func_addr == 0)
6263 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6264 		else
6265 			bit = 1 << sc->bnx_func_addr;
6266 		break;
6267 
6268 	case BGE_APE_LOCK_MEM:
6269 		/* Lock required when accessing certain APE memory. */
6270 		if (sc->bnx_func_addr == 0)
6271 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6272 		else
6273 			bit = 1 << sc->bnx_func_addr;
6274 		break;
6275 
6276 	case BGE_APE_LOCK_PHY0:
6277 	case BGE_APE_LOCK_PHY1:
6278 	case BGE_APE_LOCK_PHY2:
6279 	case BGE_APE_LOCK_PHY3:
6280 		/* Lock required when accessing PHYs. */
6281 		bit = BGE_APE_LOCK_REQ_DRIVER0;
6282 		break;
6283 
6284 	default:
6285 		return EINVAL;
6286 	}
6287 
6288 	/* Request a lock. */
6289 	APE_WRITE_4(sc, req + off, bit);
6290 
6291 	/* Wait up to 1 second to acquire lock. */
6292 	for (i = 0; i < 20000; i++) {
6293 		status = APE_READ_4(sc, gnt + off);
6294 		if (status == bit)
6295 			break;
6296 		DELAY(50);
6297 	}
6298 
6299 	/* Handle any errors. */
6300 	if (status != bit) {
6301 		if_printf(&sc->arpcom.ac_if, "APE lock %d request failed! "
6302 		    "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n",
6303 		    locknum, req + off, bit & 0xFFFF, gnt + off,
6304 		    status & 0xFFFF);
6305 		/* Revoke the lock request. */
6306 		APE_WRITE_4(sc, gnt + off, bit);
6307 		return EBUSY;
6308 	}
6309 
6310 	return 0;
6311 }
6312 
6313 static void
6314 bnx_ape_unlock(struct bnx_softc *sc, int locknum)
6315 {
6316 	uint32_t bit, gnt;
6317 	int off;
6318 
6319 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6320 		return;
6321 
6322 	gnt = BGE_APE_PER_LOCK_GRANT;
6323 
6324 	off = 4 * locknum;
6325 
6326 	switch (locknum) {
6327 	case BGE_APE_LOCK_GPIO:
6328 		if (sc->bnx_func_addr == 0)
6329 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6330 		else
6331 			bit = 1 << sc->bnx_func_addr;
6332 		break;
6333 
6334 	case BGE_APE_LOCK_GRC:
6335 		if (sc->bnx_func_addr == 0)
6336 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6337 		else
6338 			bit = 1 << sc->bnx_func_addr;
6339 		break;
6340 
6341 	case BGE_APE_LOCK_MEM:
6342 		if (sc->bnx_func_addr == 0)
6343 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6344 		else
6345 			bit = 1 << sc->bnx_func_addr;
6346 		break;
6347 
6348 	case BGE_APE_LOCK_PHY0:
6349 	case BGE_APE_LOCK_PHY1:
6350 	case BGE_APE_LOCK_PHY2:
6351 	case BGE_APE_LOCK_PHY3:
6352 		bit = BGE_APE_LOCK_GRANT_DRIVER0;
6353 		break;
6354 
6355 	default:
6356 		return;
6357 	}
6358 
6359 	APE_WRITE_4(sc, gnt + off, bit);
6360 }
6361 
6362 /*
6363  * Send an event to the APE firmware.
6364  */
6365 static void
6366 bnx_ape_send_event(struct bnx_softc *sc, uint32_t event)
6367 {
6368 	uint32_t apedata;
6369 	int i;
6370 
6371 	/* NCSI does not support APE events. */
6372 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6373 		return;
6374 
6375 	/* Wait up to 1ms for APE to service previous event. */
6376 	for (i = 10; i > 0; i--) {
6377 		if (bnx_ape_lock(sc, BGE_APE_LOCK_MEM) != 0)
6378 			break;
6379 		apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS);
6380 		if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) {
6381 			APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event |
6382 			    BGE_APE_EVENT_STATUS_EVENT_PENDING);
6383 			bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6384 			APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1);
6385 			break;
6386 		}
6387 		bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6388 		DELAY(100);
6389 	}
6390 	if (i == 0) {
6391 		if_printf(&sc->arpcom.ac_if,
6392 		    "APE event 0x%08x send timed out\n", event);
6393 	}
6394 }
6395 
6396 static void
6397 bnx_ape_driver_state_change(struct bnx_softc *sc, int kind)
6398 {
6399 	uint32_t apedata, event;
6400 
6401 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6402 		return;
6403 
6404 	switch (kind) {
6405 	case BNX_RESET_START:
6406 		/* If this is the first load, clear the load counter. */
6407 		apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG);
6408 		if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) {
6409 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0);
6410 		} else {
6411 			apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT);
6412 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata);
6413 		}
6414 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG,
6415 		    BGE_APE_HOST_SEG_SIG_MAGIC);
6416 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN,
6417 		    BGE_APE_HOST_SEG_LEN_MAGIC);
6418 
6419 		/* Add some version info if bnx(4) supports it. */
6420 		APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID,
6421 		    BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0));
6422 		APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR,
6423 		    BGE_APE_HOST_BEHAV_NO_PHYLOCK);
6424 		APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS,
6425 		    BGE_APE_HOST_HEARTBEAT_INT_DISABLE);
6426 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6427 		    BGE_APE_HOST_DRVR_STATE_START);
6428 		event = BGE_APE_EVENT_STATUS_STATE_START;
6429 		break;
6430 
6431 	case BNX_RESET_SHUTDOWN:
6432 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6433 		    BGE_APE_HOST_DRVR_STATE_UNLOAD);
6434 		event = BGE_APE_EVENT_STATUS_STATE_UNLOAD;
6435 		break;
6436 
6437 	case BNX_RESET_SUSPEND:
6438 		event = BGE_APE_EVENT_STATUS_STATE_SUSPEND;
6439 		break;
6440 
6441 	default:
6442 		return;
6443 	}
6444 
6445 	bnx_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT |
6446 	    BGE_APE_EVENT_STATUS_STATE_CHNGE);
6447 }
6448