xref: /dragonfly/sys/dev/netif/bnx/if_bnx.c (revision 7d84b73d)
1 /*
2  * Copyright (c) 2001 Wind River Systems
3  * Copyright (c) 1997, 1998, 1999, 2001
4  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by Bill Paul.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * $FreeBSD: src/sys/dev/bge/if_bge.c,v 1.3.2.39 2005/07/03 03:41:18 silby Exp $
34  */
35 
36 #include "opt_bnx.h"
37 #include "opt_ifpoll.h"
38 
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/endian.h>
42 #include <sys/kernel.h>
43 #include <sys/interrupt.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/rman.h>
48 #include <sys/serialize.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 
53 #include <netinet/ip.h>
54 #include <netinet/tcp.h>
55 
56 #include <net/bpf.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_poll.h>
63 #include <net/if_types.h>
64 #include <net/ifq_var.h>
65 #include <net/if_ringmap.h>
66 #include <net/toeplitz.h>
67 #include <net/toeplitz2.h>
68 #include <net/vlan/if_vlan_var.h>
69 #include <net/vlan/if_vlan_ether.h>
70 
71 #include <dev/netif/mii_layer/mii.h>
72 #include <dev/netif/mii_layer/miivar.h>
73 #include <dev/netif/mii_layer/brgphyreg.h>
74 
75 #include "pcidevs.h"
76 #include <bus/pci/pcireg.h>
77 #include <bus/pci/pcivar.h>
78 
79 #include <dev/netif/bge/if_bgereg.h>
80 #include <dev/netif/bnx/if_bnxvar.h>
81 
82 /* "device miibus" required.  See GENERIC if you get errors here. */
83 #include "miibus_if.h"
84 
85 #define BNX_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
86 
87 #define	BNX_RESET_SHUTDOWN	0
88 #define	BNX_RESET_START		1
89 #define	BNX_RESET_SUSPEND	2
90 
91 #define BNX_INTR_CKINTVL	((10 * hz) / 1000)	/* 10ms */
92 
93 #ifdef BNX_RSS_DEBUG
94 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...) \
95 do { \
96 	if (sc->bnx_rss_debug >= lvl) \
97 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
98 } while (0)
99 #else	/* !BNX_RSS_DEBUG */
100 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
101 #endif	/* BNX_RSS_DEBUG */
102 
103 static const struct bnx_type {
104 	uint16_t		bnx_vid;
105 	uint16_t		bnx_did;
106 	char			*bnx_name;
107 } bnx_devs[] = {
108 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717,
109 		"Broadcom BCM5717 Gigabit Ethernet" },
110 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717C,
111 		"Broadcom BCM5717C Gigabit Ethernet" },
112 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5718,
113 		"Broadcom BCM5718 Gigabit Ethernet" },
114 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5719,
115 		"Broadcom BCM5719 Gigabit Ethernet" },
116 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5720_ALT,
117 		"Broadcom BCM5720 Gigabit Ethernet" },
118 
119 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5725,
120 		"Broadcom BCM5725 Gigabit Ethernet" },
121 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5727,
122 		"Broadcom BCM5727 Gigabit Ethernet" },
123 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5762,
124 		"Broadcom BCM5762 Gigabit Ethernet" },
125 
126 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57761,
127 		"Broadcom BCM57761 Gigabit Ethernet" },
128 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57762,
129 		"Broadcom BCM57762 Gigabit Ethernet" },
130 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57764,
131 		"Broadcom BCM57764 Gigabit Ethernet" },
132 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57765,
133 		"Broadcom BCM57765 Gigabit Ethernet" },
134 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57766,
135 		"Broadcom BCM57766 Gigabit Ethernet" },
136 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57767,
137 		"Broadcom BCM57767 Gigabit Ethernet" },
138 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57781,
139 		"Broadcom BCM57781 Gigabit Ethernet" },
140 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57782,
141 		"Broadcom BCM57782 Gigabit Ethernet" },
142 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57785,
143 		"Broadcom BCM57785 Gigabit Ethernet" },
144 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57786,
145 		"Broadcom BCM57786 Gigabit Ethernet" },
146 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57787,
147 		"Broadcom BCM57787 Gigabit Ethernet" },
148 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57791,
149 		"Broadcom BCM57791 Fast Ethernet" },
150 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57795,
151 		"Broadcom BCM57795 Fast Ethernet" },
152 
153 	{ 0, 0, NULL }
154 };
155 
156 static const int bnx_tx_mailbox[BNX_TX_RING_MAX] = {
157 	BGE_MBX_TX_HOST_PROD0_LO,
158 	BGE_MBX_TX_HOST_PROD0_HI,
159 	BGE_MBX_TX_HOST_PROD1_LO,
160 	BGE_MBX_TX_HOST_PROD1_HI
161 };
162 
163 #define BNX_IS_JUMBO_CAPABLE(sc)	((sc)->bnx_flags & BNX_FLAG_JUMBO)
164 #define BNX_IS_5717_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_5717_PLUS)
165 #define BNX_IS_57765_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_57765_PLUS)
166 #define BNX_IS_57765_FAMILY(sc)	 \
167 	((sc)->bnx_flags & BNX_FLAG_57765_FAMILY)
168 
169 typedef int	(*bnx_eaddr_fcn_t)(struct bnx_softc *, uint8_t[]);
170 
171 static int	bnx_probe(device_t);
172 static int	bnx_attach(device_t);
173 static int	bnx_detach(device_t);
174 static void	bnx_shutdown(device_t);
175 static int	bnx_suspend(device_t);
176 static int	bnx_resume(device_t);
177 static int	bnx_miibus_readreg(device_t, int, int);
178 static int	bnx_miibus_writereg(device_t, int, int, int);
179 static void	bnx_miibus_statchg(device_t);
180 
181 static int	bnx_handle_status(struct bnx_softc *);
182 #ifdef IFPOLL_ENABLE
183 static void	bnx_npoll(struct ifnet *, struct ifpoll_info *);
184 static void	bnx_npoll_rx(struct ifnet *, void *, int);
185 static void	bnx_npoll_tx(struct ifnet *, void *, int);
186 static void	bnx_npoll_tx_notag(struct ifnet *, void *, int);
187 static void	bnx_npoll_status(struct ifnet *);
188 static void	bnx_npoll_status_notag(struct ifnet *);
189 #endif
190 static void	bnx_intr_legacy(void *);
191 static void	bnx_msi(void *);
192 static void	bnx_intr(struct bnx_softc *);
193 static void	bnx_msix_status(void *);
194 static void	bnx_msix_tx_status(void *);
195 static void	bnx_msix_rx(void *);
196 static void	bnx_msix_rxtx(void *);
197 static void	bnx_enable_intr(struct bnx_softc *);
198 static void	bnx_disable_intr(struct bnx_softc *);
199 static void	bnx_txeof(struct bnx_tx_ring *, uint16_t);
200 static void	bnx_rxeof(struct bnx_rx_ret_ring *, uint16_t, int);
201 static int	bnx_alloc_intr(struct bnx_softc *);
202 static int	bnx_setup_intr(struct bnx_softc *);
203 static void	bnx_free_intr(struct bnx_softc *);
204 static void	bnx_teardown_intr(struct bnx_softc *, int);
205 static int	bnx_alloc_msix(struct bnx_softc *);
206 static void	bnx_free_msix(struct bnx_softc *, boolean_t);
207 static void	bnx_check_intr_rxtx(void *);
208 static void	bnx_check_intr_rx(void *);
209 static void	bnx_check_intr_tx(void *);
210 static void	bnx_rx_std_refill_ithread(void *);
211 static void	bnx_rx_std_refill(void *, void *);
212 static void	bnx_rx_std_refill_sched_ipi(void *);
213 static void	bnx_rx_std_refill_stop(void *);
214 static void	bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *,
215 		    struct bnx_rx_std_ring *);
216 
217 static void	bnx_start(struct ifnet *, struct ifaltq_subque *);
218 static int	bnx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
219 static void	bnx_init(void *);
220 static void	bnx_stop(struct bnx_softc *);
221 static void	bnx_watchdog(struct ifaltq_subque *);
222 static int	bnx_ifmedia_upd(struct ifnet *);
223 static void	bnx_ifmedia_sts(struct ifnet *, struct ifmediareq *);
224 static void	bnx_tick(void *);
225 static void	bnx_serialize(struct ifnet *, enum ifnet_serialize);
226 static void	bnx_deserialize(struct ifnet *, enum ifnet_serialize);
227 static int	bnx_tryserialize(struct ifnet *, enum ifnet_serialize);
228 #ifdef INVARIANTS
229 static void	bnx_serialize_assert(struct ifnet *, enum ifnet_serialize,
230 		    boolean_t);
231 #endif
232 static void	bnx_serialize_skipmain(struct bnx_softc *);
233 static void	bnx_deserialize_skipmain(struct bnx_softc *sc);
234 
235 static int	bnx_alloc_jumbo_mem(struct bnx_softc *);
236 static void	bnx_free_jumbo_mem(struct bnx_softc *);
237 static struct bnx_jslot
238 		*bnx_jalloc(struct bnx_softc *);
239 static void	bnx_jfree(void *);
240 static void	bnx_jref(void *);
241 static int	bnx_newbuf_std(struct bnx_rx_ret_ring *, int, int);
242 static int	bnx_newbuf_jumbo(struct bnx_softc *, int, int);
243 static void	bnx_setup_rxdesc_std(struct bnx_rx_std_ring *, int);
244 static void	bnx_setup_rxdesc_jumbo(struct bnx_softc *, int);
245 static int	bnx_init_rx_ring_std(struct bnx_rx_std_ring *);
246 static void	bnx_free_rx_ring_std(struct bnx_rx_std_ring *);
247 static int	bnx_init_rx_ring_jumbo(struct bnx_softc *);
248 static void	bnx_free_rx_ring_jumbo(struct bnx_softc *);
249 static void	bnx_free_tx_ring(struct bnx_tx_ring *);
250 static int	bnx_init_tx_ring(struct bnx_tx_ring *);
251 static int	bnx_create_tx_ring(struct bnx_tx_ring *);
252 static void	bnx_destroy_tx_ring(struct bnx_tx_ring *);
253 static int	bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *);
254 static void	bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *);
255 static int	bnx_dma_alloc(device_t);
256 static void	bnx_dma_free(struct bnx_softc *);
257 static int	bnx_dma_block_alloc(struct bnx_softc *, bus_size_t,
258 		    bus_dma_tag_t *, bus_dmamap_t *, void **, bus_addr_t *);
259 static void	bnx_dma_block_free(bus_dma_tag_t, bus_dmamap_t, void *);
260 static struct mbuf *
261 		bnx_defrag_shortdma(struct mbuf *);
262 static int	bnx_encap(struct bnx_tx_ring *, struct mbuf **,
263 		    uint32_t *, int *);
264 static int	bnx_setup_tso(struct bnx_tx_ring *, struct mbuf **,
265 		    uint16_t *, uint16_t *);
266 static void	bnx_setup_serialize(struct bnx_softc *);
267 static void	bnx_set_tick_cpuid(struct bnx_softc *, boolean_t);
268 static void	bnx_setup_ring_cnt(struct bnx_softc *);
269 
270 static struct pktinfo *bnx_rss_info(struct pktinfo *,
271 		    const struct bge_rx_bd *);
272 static void	bnx_init_rss(struct bnx_softc *);
273 static void	bnx_reset(struct bnx_softc *);
274 static int	bnx_chipinit(struct bnx_softc *);
275 static int	bnx_blockinit(struct bnx_softc *);
276 static void	bnx_stop_block(struct bnx_softc *, bus_size_t, uint32_t);
277 static void	bnx_enable_msi(struct bnx_softc *, boolean_t);
278 static void	bnx_setmulti(struct bnx_softc *);
279 static void	bnx_setpromisc(struct bnx_softc *);
280 static void	bnx_stats_update_regs(struct bnx_softc *);
281 static uint32_t	bnx_dma_swap_options(struct bnx_softc *);
282 
283 static uint32_t	bnx_readmem_ind(struct bnx_softc *, uint32_t);
284 static void	bnx_writemem_ind(struct bnx_softc *, uint32_t, uint32_t);
285 #ifdef notdef
286 static uint32_t	bnx_readreg_ind(struct bnx_softc *, uint32_t);
287 #endif
288 static void	bnx_writemem_direct(struct bnx_softc *, uint32_t, uint32_t);
289 static void	bnx_writembx(struct bnx_softc *, int, int);
290 static int	bnx_read_nvram(struct bnx_softc *, caddr_t, int, int);
291 static uint8_t	bnx_eeprom_getbyte(struct bnx_softc *, uint32_t, uint8_t *);
292 static int	bnx_read_eeprom(struct bnx_softc *, caddr_t, uint32_t, size_t);
293 
294 static void	bnx_tbi_link_upd(struct bnx_softc *, uint32_t);
295 static void	bnx_copper_link_upd(struct bnx_softc *, uint32_t);
296 static void	bnx_autopoll_link_upd(struct bnx_softc *, uint32_t);
297 static void	bnx_link_poll(struct bnx_softc *);
298 
299 static int	bnx_get_eaddr_mem(struct bnx_softc *, uint8_t[]);
300 static int	bnx_get_eaddr_nvram(struct bnx_softc *, uint8_t[]);
301 static int	bnx_get_eaddr_eeprom(struct bnx_softc *, uint8_t[]);
302 static int	bnx_get_eaddr(struct bnx_softc *, uint8_t[]);
303 
304 static void	bnx_coal_change(struct bnx_softc *);
305 static int	bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS);
306 static int	bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS);
307 static int	bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS);
308 static int	bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS);
309 static int	bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS);
310 static int	bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
311 static int	bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS);
312 static int	bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
313 static int	bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS);
314 static int	bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS);
315 static int	bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *,
316 		    int, int, uint32_t);
317 static int	bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS);
318 
319 static void	bnx_sig_post_reset(struct bnx_softc *, int);
320 static void	bnx_sig_pre_reset(struct bnx_softc *, int);
321 static void	bnx_ape_lock_init(struct bnx_softc *);
322 static void	bnx_ape_read_fw_ver(struct bnx_softc *);
323 static int	bnx_ape_lock(struct bnx_softc *, int);
324 static void	bnx_ape_unlock(struct bnx_softc *, int);
325 static void	bnx_ape_send_event(struct bnx_softc *, uint32_t);
326 static void	bnx_ape_driver_state_change(struct bnx_softc *, int);
327 
328 static int	bnx_msi_enable = 1;
329 static int	bnx_msix_enable = 1;
330 
331 static int	bnx_rx_rings = 0; /* auto */
332 static int	bnx_tx_rings = 0; /* auto */
333 
334 TUNABLE_INT("hw.bnx.msi.enable", &bnx_msi_enable);
335 TUNABLE_INT("hw.bnx.msix.enable", &bnx_msix_enable);
336 TUNABLE_INT("hw.bnx.rx_rings", &bnx_rx_rings);
337 TUNABLE_INT("hw.bnx.tx_rings", &bnx_tx_rings);
338 
339 static device_method_t bnx_methods[] = {
340 	/* Device interface */
341 	DEVMETHOD(device_probe,		bnx_probe),
342 	DEVMETHOD(device_attach,	bnx_attach),
343 	DEVMETHOD(device_detach,	bnx_detach),
344 	DEVMETHOD(device_shutdown,	bnx_shutdown),
345 	DEVMETHOD(device_suspend,	bnx_suspend),
346 	DEVMETHOD(device_resume,	bnx_resume),
347 
348 	/* bus interface */
349 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
350 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
351 
352 	/* MII interface */
353 	DEVMETHOD(miibus_readreg,	bnx_miibus_readreg),
354 	DEVMETHOD(miibus_writereg,	bnx_miibus_writereg),
355 	DEVMETHOD(miibus_statchg,	bnx_miibus_statchg),
356 
357 	DEVMETHOD_END
358 };
359 
360 static DEFINE_CLASS_0(bnx, bnx_driver, bnx_methods, sizeof(struct bnx_softc));
361 static devclass_t bnx_devclass;
362 
363 DECLARE_DUMMY_MODULE(if_bnx);
364 MODULE_DEPEND(if_bnx, miibus, 1, 1, 1);
365 DRIVER_MODULE(if_bnx, pci, bnx_driver, bnx_devclass, NULL, NULL);
366 DRIVER_MODULE(miibus, bnx, miibus_driver, miibus_devclass, NULL, NULL);
367 
368 static uint32_t
369 bnx_readmem_ind(struct bnx_softc *sc, uint32_t off)
370 {
371 	device_t dev = sc->bnx_dev;
372 	uint32_t val;
373 
374 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
375 	val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4);
376 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
377 	return (val);
378 }
379 
380 static void
381 bnx_writemem_ind(struct bnx_softc *sc, uint32_t off, uint32_t val)
382 {
383 	device_t dev = sc->bnx_dev;
384 
385 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
386 	pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4);
387 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
388 }
389 
390 static void
391 bnx_writemem_direct(struct bnx_softc *sc, uint32_t off, uint32_t val)
392 {
393 	CSR_WRITE_4(sc, off, val);
394 }
395 
396 static void
397 bnx_writembx(struct bnx_softc *sc, int off, int val)
398 {
399 	CSR_WRITE_4(sc, off, val);
400 }
401 
402 /*
403  * Read a sequence of bytes from NVRAM.
404  */
405 static int
406 bnx_read_nvram(struct bnx_softc *sc, caddr_t dest, int off, int cnt)
407 {
408 	return (1);
409 }
410 
411 /*
412  * Read a byte of data stored in the EEPROM at address 'addr.' The
413  * BCM570x supports both the traditional bitbang interface and an
414  * auto access interface for reading the EEPROM. We use the auto
415  * access method.
416  */
417 static uint8_t
418 bnx_eeprom_getbyte(struct bnx_softc *sc, uint32_t addr, uint8_t *dest)
419 {
420 	int i;
421 	uint32_t byte = 0;
422 
423 	/*
424 	 * Enable use of auto EEPROM access so we can avoid
425 	 * having to use the bitbang method.
426 	 */
427 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM);
428 
429 	/* Reset the EEPROM, load the clock period. */
430 	CSR_WRITE_4(sc, BGE_EE_ADDR,
431 	    BGE_EEADDR_RESET|BGE_EEHALFCLK(BGE_HALFCLK_384SCL));
432 	DELAY(20);
433 
434 	/* Issue the read EEPROM command. */
435 	CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr);
436 
437 	/* Wait for completion */
438 	for(i = 0; i < BNX_TIMEOUT * 10; i++) {
439 		DELAY(10);
440 		if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE)
441 			break;
442 	}
443 
444 	if (i == BNX_TIMEOUT) {
445 		if_printf(&sc->arpcom.ac_if, "eeprom read timed out\n");
446 		return(1);
447 	}
448 
449 	/* Get result. */
450 	byte = CSR_READ_4(sc, BGE_EE_DATA);
451 
452         *dest = (byte >> ((addr % 4) * 8)) & 0xFF;
453 
454 	return(0);
455 }
456 
457 /*
458  * Read a sequence of bytes from the EEPROM.
459  */
460 static int
461 bnx_read_eeprom(struct bnx_softc *sc, caddr_t dest, uint32_t off, size_t len)
462 {
463 	size_t i;
464 	int err;
465 	uint8_t byte;
466 
467 	for (byte = 0, err = 0, i = 0; i < len; i++) {
468 		err = bnx_eeprom_getbyte(sc, off + i, &byte);
469 		if (err)
470 			break;
471 		*(dest + i) = byte;
472 	}
473 
474 	return(err ? 1 : 0);
475 }
476 
477 static int
478 bnx_miibus_readreg(device_t dev, int phy, int reg)
479 {
480 	struct bnx_softc *sc = device_get_softc(dev);
481 	uint32_t val;
482 	int i;
483 
484 	KASSERT(phy == sc->bnx_phyno,
485 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
486 
487 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
488 		return 0;
489 
490 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
491 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
492 		CSR_WRITE_4(sc, BGE_MI_MODE,
493 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
494 		DELAY(80);
495 	}
496 
497 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY |
498 	    BGE_MIPHY(phy) | BGE_MIREG(reg));
499 
500 	/* Poll for the PHY register access to complete. */
501 	for (i = 0; i < BNX_TIMEOUT; i++) {
502 		DELAY(10);
503 		val = CSR_READ_4(sc, BGE_MI_COMM);
504 		if ((val & BGE_MICOMM_BUSY) == 0) {
505 			DELAY(5);
506 			val = CSR_READ_4(sc, BGE_MI_COMM);
507 			break;
508 		}
509 	}
510 	if (i == BNX_TIMEOUT) {
511 		if_printf(&sc->arpcom.ac_if, "PHY read timed out "
512 		    "(phy %d, reg %d, val 0x%08x)\n", phy, reg, val);
513 		val = 0;
514 	}
515 
516 	/* Restore the autopoll bit if necessary. */
517 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
518 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
519 		DELAY(80);
520 	}
521 
522 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
523 
524 	if (val & BGE_MICOMM_READFAIL)
525 		return 0;
526 
527 	return (val & 0xFFFF);
528 }
529 
530 static int
531 bnx_miibus_writereg(device_t dev, int phy, int reg, int val)
532 {
533 	struct bnx_softc *sc = device_get_softc(dev);
534 	int i;
535 
536 	KASSERT(phy == sc->bnx_phyno,
537 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
538 
539 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
540 		return 0;
541 
542 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
543 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
544 		CSR_WRITE_4(sc, BGE_MI_MODE,
545 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
546 		DELAY(80);
547 	}
548 
549 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY |
550 	    BGE_MIPHY(phy) | BGE_MIREG(reg) | val);
551 
552 	for (i = 0; i < BNX_TIMEOUT; i++) {
553 		DELAY(10);
554 		if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) {
555 			DELAY(5);
556 			CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */
557 			break;
558 		}
559 	}
560 	if (i == BNX_TIMEOUT) {
561 		if_printf(&sc->arpcom.ac_if, "PHY write timed out "
562 		    "(phy %d, reg %d, val %d)\n", phy, reg, val);
563 	}
564 
565 	/* Restore the autopoll bit if necessary. */
566 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
567 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
568 		DELAY(80);
569 	}
570 
571 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
572 
573 	return 0;
574 }
575 
576 static void
577 bnx_miibus_statchg(device_t dev)
578 {
579 	struct bnx_softc *sc;
580 	struct mii_data *mii;
581 	uint32_t mac_mode;
582 
583 	sc = device_get_softc(dev);
584 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0)
585 		return;
586 
587 	mii = device_get_softc(sc->bnx_miibus);
588 
589 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
590 	    (IFM_ACTIVE | IFM_AVALID)) {
591 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
592 		case IFM_10_T:
593 		case IFM_100_TX:
594 			sc->bnx_link = 1;
595 			break;
596 		case IFM_1000_T:
597 		case IFM_1000_SX:
598 		case IFM_2500_SX:
599 			sc->bnx_link = 1;
600 			break;
601 		default:
602 			sc->bnx_link = 0;
603 			break;
604 		}
605 	} else {
606 		sc->bnx_link = 0;
607 	}
608 	if (sc->bnx_link == 0)
609 		return;
610 
611 	/*
612 	 * APE firmware touches these registers to keep the MAC
613 	 * connected to the outside world.  Try to keep the
614 	 * accesses atomic.
615 	 */
616 
617 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) &
618 	    ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX);
619 
620 	if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
621 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX)
622 		mac_mode |= BGE_PORTMODE_GMII;
623 	else
624 		mac_mode |= BGE_PORTMODE_MII;
625 
626 	if ((mii->mii_media_active & IFM_GMASK) != IFM_FDX)
627 		mac_mode |= BGE_MACMODE_HALF_DUPLEX;
628 
629 	CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode);
630 	DELAY(40);
631 }
632 
633 /*
634  * Memory management for jumbo frames.
635  */
636 static int
637 bnx_alloc_jumbo_mem(struct bnx_softc *sc)
638 {
639 	struct ifnet *ifp = &sc->arpcom.ac_if;
640 	struct bnx_jslot *entry;
641 	uint8_t *ptr;
642 	bus_addr_t paddr;
643 	int i, error;
644 
645 	/*
646 	 * Create tag for jumbo mbufs.
647 	 * This is really a bit of a kludge. We allocate a special
648 	 * jumbo buffer pool which (thanks to the way our DMA
649 	 * memory allocation works) will consist of contiguous
650 	 * pages. This means that even though a jumbo buffer might
651 	 * be larger than a page size, we don't really need to
652 	 * map it into more than one DMA segment. However, the
653 	 * default mbuf tag will result in multi-segment mappings,
654 	 * so we have to create a special jumbo mbuf tag that
655 	 * lets us get away with mapping the jumbo buffers as
656 	 * a single segment. I think eventually the driver should
657 	 * be changed so that it uses ordinary mbufs and cluster
658 	 * buffers, i.e. jumbo frames can span multiple DMA
659 	 * descriptors. But that's a project for another day.
660 	 */
661 
662 	/*
663 	 * Create DMA stuffs for jumbo RX ring.
664 	 */
665 	error = bnx_dma_block_alloc(sc, BGE_JUMBO_RX_RING_SZ,
666 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
667 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_map,
668 				    (void *)&sc->bnx_ldata.bnx_rx_jumbo_ring,
669 				    &sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
670 	if (error) {
671 		if_printf(ifp, "could not create jumbo RX ring\n");
672 		return error;
673 	}
674 
675 	/*
676 	 * Create DMA stuffs for jumbo buffer block.
677 	 */
678 	error = bnx_dma_block_alloc(sc, BNX_JMEM,
679 				    &sc->bnx_cdata.bnx_jumbo_tag,
680 				    &sc->bnx_cdata.bnx_jumbo_map,
681 				    (void **)&sc->bnx_ldata.bnx_jumbo_buf,
682 				    &paddr);
683 	if (error) {
684 		if_printf(ifp, "could not create jumbo buffer\n");
685 		return error;
686 	}
687 
688 	SLIST_INIT(&sc->bnx_jfree_listhead);
689 
690 	/*
691 	 * Now divide it up into 9K pieces and save the addresses
692 	 * in an array. Note that we play an evil trick here by using
693 	 * the first few bytes in the buffer to hold the the address
694 	 * of the softc structure for this interface. This is because
695 	 * bnx_jfree() needs it, but it is called by the mbuf management
696 	 * code which will not pass it to us explicitly.
697 	 */
698 	for (i = 0, ptr = sc->bnx_ldata.bnx_jumbo_buf; i < BNX_JSLOTS; i++) {
699 		entry = &sc->bnx_cdata.bnx_jslots[i];
700 		entry->bnx_sc = sc;
701 		entry->bnx_buf = ptr;
702 		entry->bnx_paddr = paddr;
703 		entry->bnx_inuse = 0;
704 		entry->bnx_slot = i;
705 		SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead, entry, jslot_link);
706 
707 		ptr += BNX_JLEN;
708 		paddr += BNX_JLEN;
709 	}
710 	return 0;
711 }
712 
713 static void
714 bnx_free_jumbo_mem(struct bnx_softc *sc)
715 {
716 	/* Destroy jumbo RX ring. */
717 	bnx_dma_block_free(sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
718 			   sc->bnx_cdata.bnx_rx_jumbo_ring_map,
719 			   sc->bnx_ldata.bnx_rx_jumbo_ring);
720 
721 	/* Destroy jumbo buffer block. */
722 	bnx_dma_block_free(sc->bnx_cdata.bnx_jumbo_tag,
723 			   sc->bnx_cdata.bnx_jumbo_map,
724 			   sc->bnx_ldata.bnx_jumbo_buf);
725 }
726 
727 /*
728  * Allocate a jumbo buffer.
729  */
730 static struct bnx_jslot *
731 bnx_jalloc(struct bnx_softc *sc)
732 {
733 	struct bnx_jslot *entry;
734 
735 	lwkt_serialize_enter(&sc->bnx_jslot_serializer);
736 	entry = SLIST_FIRST(&sc->bnx_jfree_listhead);
737 	if (entry) {
738 		SLIST_REMOVE_HEAD(&sc->bnx_jfree_listhead, jslot_link);
739 		entry->bnx_inuse = 1;
740 	} else {
741 		if_printf(&sc->arpcom.ac_if, "no free jumbo buffers\n");
742 	}
743 	lwkt_serialize_exit(&sc->bnx_jslot_serializer);
744 	return(entry);
745 }
746 
747 /*
748  * Adjust usage count on a jumbo buffer.
749  */
750 static void
751 bnx_jref(void *arg)
752 {
753 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
754 	struct bnx_softc *sc = entry->bnx_sc;
755 
756 	if (sc == NULL)
757 		panic("bnx_jref: can't find softc pointer!");
758 
759 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
760 		panic("bnx_jref: asked to reference buffer "
761 		    "that we don't manage!");
762 	} else if (entry->bnx_inuse == 0) {
763 		panic("bnx_jref: buffer already free!");
764 	} else {
765 		atomic_add_int(&entry->bnx_inuse, 1);
766 	}
767 }
768 
769 /*
770  * Release a jumbo buffer.
771  */
772 static void
773 bnx_jfree(void *arg)
774 {
775 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
776 	struct bnx_softc *sc = entry->bnx_sc;
777 
778 	if (sc == NULL)
779 		panic("bnx_jfree: can't find softc pointer!");
780 
781 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
782 		panic("bnx_jfree: asked to free buffer that we don't manage!");
783 	} else if (entry->bnx_inuse == 0) {
784 		panic("bnx_jfree: buffer already free!");
785 	} else {
786 		/*
787 		 * Possible MP race to 0, use the serializer.  The atomic insn
788 		 * is still needed for races against bnx_jref().
789 		 */
790 		lwkt_serialize_enter(&sc->bnx_jslot_serializer);
791 		atomic_subtract_int(&entry->bnx_inuse, 1);
792 		if (entry->bnx_inuse == 0) {
793 			SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead,
794 					  entry, jslot_link);
795 		}
796 		lwkt_serialize_exit(&sc->bnx_jslot_serializer);
797 	}
798 }
799 
800 
801 /*
802  * Intialize a standard receive ring descriptor.
803  */
804 static int
805 bnx_newbuf_std(struct bnx_rx_ret_ring *ret, int i, int init)
806 {
807 	struct mbuf *m_new = NULL;
808 	bus_dma_segment_t seg;
809 	bus_dmamap_t map;
810 	int error, nsegs;
811 	struct bnx_rx_buf *rb;
812 
813 	rb = &ret->bnx_std->bnx_rx_std_buf[i];
814 	KASSERT(!rb->bnx_rx_refilled, ("RX buf %dth has been refilled", i));
815 
816 	m_new = m_getcl(init ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR);
817 	if (m_new == NULL) {
818 		error = ENOBUFS;
819 		goto back;
820 	}
821 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
822 	m_adj(m_new, ETHER_ALIGN);
823 
824 	error = bus_dmamap_load_mbuf_segment(ret->bnx_rx_mtag,
825 	    ret->bnx_rx_tmpmap, m_new, &seg, 1, &nsegs, BUS_DMA_NOWAIT);
826 	if (error) {
827 		m_freem(m_new);
828 		goto back;
829 	}
830 
831 	if (!init) {
832 		bus_dmamap_sync(ret->bnx_rx_mtag, rb->bnx_rx_dmamap,
833 		    BUS_DMASYNC_POSTREAD);
834 		bus_dmamap_unload(ret->bnx_rx_mtag, rb->bnx_rx_dmamap);
835 	}
836 
837 	map = ret->bnx_rx_tmpmap;
838 	ret->bnx_rx_tmpmap = rb->bnx_rx_dmamap;
839 
840 	rb->bnx_rx_dmamap = map;
841 	rb->bnx_rx_mbuf = m_new;
842 	rb->bnx_rx_paddr = seg.ds_addr;
843 	rb->bnx_rx_len = m_new->m_len;
844 back:
845 	cpu_sfence();
846 	rb->bnx_rx_refilled = 1;
847 	return error;
848 }
849 
850 static void
851 bnx_setup_rxdesc_std(struct bnx_rx_std_ring *std, int i)
852 {
853 	struct bnx_rx_buf *rb;
854 	struct bge_rx_bd *r;
855 	bus_addr_t paddr;
856 	int len;
857 
858 	rb = &std->bnx_rx_std_buf[i];
859 	KASSERT(rb->bnx_rx_refilled, ("RX buf %dth is not refilled", i));
860 
861 	paddr = rb->bnx_rx_paddr;
862 	len = rb->bnx_rx_len;
863 
864 	cpu_mfence();
865 
866 	rb->bnx_rx_refilled = 0;
867 
868 	r = &std->bnx_rx_std_ring[i];
869 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr);
870 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr);
871 	r->bge_len = len;
872 	r->bge_idx = i;
873 	r->bge_flags = BGE_RXBDFLAG_END;
874 }
875 
876 /*
877  * Initialize a jumbo receive ring descriptor. This allocates
878  * a jumbo buffer from the pool managed internally by the driver.
879  */
880 static int
881 bnx_newbuf_jumbo(struct bnx_softc *sc, int i, int init)
882 {
883 	struct mbuf *m_new = NULL;
884 	struct bnx_jslot *buf;
885 	bus_addr_t paddr;
886 
887 	/* Allocate the mbuf. */
888 	MGETHDR(m_new, init ? M_WAITOK : M_NOWAIT, MT_DATA);
889 	if (m_new == NULL)
890 		return ENOBUFS;
891 
892 	/* Allocate the jumbo buffer */
893 	buf = bnx_jalloc(sc);
894 	if (buf == NULL) {
895 		m_freem(m_new);
896 		return ENOBUFS;
897 	}
898 
899 	/* Attach the buffer to the mbuf. */
900 	m_new->m_ext.ext_arg = buf;
901 	m_new->m_ext.ext_buf = buf->bnx_buf;
902 	m_new->m_ext.ext_free = bnx_jfree;
903 	m_new->m_ext.ext_ref = bnx_jref;
904 	m_new->m_ext.ext_size = BNX_JUMBO_FRAMELEN;
905 
906 	m_new->m_flags |= M_EXT;
907 
908 	m_new->m_data = m_new->m_ext.ext_buf;
909 	m_new->m_len = m_new->m_pkthdr.len = m_new->m_ext.ext_size;
910 
911 	paddr = buf->bnx_paddr;
912 	m_adj(m_new, ETHER_ALIGN);
913 	paddr += ETHER_ALIGN;
914 
915 	/* Save necessary information */
916 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_mbuf = m_new;
917 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_paddr = paddr;
918 
919 	/* Set up the descriptor. */
920 	bnx_setup_rxdesc_jumbo(sc, i);
921 	return 0;
922 }
923 
924 static void
925 bnx_setup_rxdesc_jumbo(struct bnx_softc *sc, int i)
926 {
927 	struct bge_rx_bd *r;
928 	struct bnx_rx_buf *rc;
929 
930 	r = &sc->bnx_ldata.bnx_rx_jumbo_ring[i];
931 	rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
932 
933 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(rc->bnx_rx_paddr);
934 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(rc->bnx_rx_paddr);
935 	r->bge_len = rc->bnx_rx_mbuf->m_len;
936 	r->bge_idx = i;
937 	r->bge_flags = BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING;
938 }
939 
940 static int
941 bnx_init_rx_ring_std(struct bnx_rx_std_ring *std)
942 {
943 	int i, error;
944 
945 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
946 		/* Use the first RX return ring's tmp RX mbuf DMA map */
947 		error = bnx_newbuf_std(&std->bnx_sc->bnx_rx_ret_ring[0], i, 1);
948 		if (error)
949 			return error;
950 		bnx_setup_rxdesc_std(std, i);
951 	}
952 
953 	std->bnx_rx_std_used = 0;
954 	std->bnx_rx_std_refill = 0;
955 	std->bnx_rx_std_running = 0;
956 	cpu_sfence();
957 	lwkt_serialize_handler_enable(&std->bnx_rx_std_serialize);
958 
959 	std->bnx_rx_std = BGE_STD_RX_RING_CNT - 1;
960 	bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO, std->bnx_rx_std);
961 
962 	return(0);
963 }
964 
965 static void
966 bnx_free_rx_ring_std(struct bnx_rx_std_ring *std)
967 {
968 	int i;
969 
970 	lwkt_serialize_handler_disable(&std->bnx_rx_std_serialize);
971 
972 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
973 		struct bnx_rx_buf *rb = &std->bnx_rx_std_buf[i];
974 
975 		rb->bnx_rx_refilled = 0;
976 		if (rb->bnx_rx_mbuf != NULL) {
977 			bus_dmamap_unload(std->bnx_rx_mtag, rb->bnx_rx_dmamap);
978 			m_freem(rb->bnx_rx_mbuf);
979 			rb->bnx_rx_mbuf = NULL;
980 		}
981 		bzero(&std->bnx_rx_std_ring[i], sizeof(struct bge_rx_bd));
982 	}
983 }
984 
985 static int
986 bnx_init_rx_ring_jumbo(struct bnx_softc *sc)
987 {
988 	struct bge_rcb *rcb;
989 	int i, error;
990 
991 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
992 		error = bnx_newbuf_jumbo(sc, i, 1);
993 		if (error)
994 			return error;
995 	}
996 
997 	sc->bnx_jumbo = BGE_JUMBO_RX_RING_CNT - 1;
998 
999 	rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1000 	rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, 0);
1001 	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1002 
1003 	bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, sc->bnx_jumbo);
1004 
1005 	return(0);
1006 }
1007 
1008 static void
1009 bnx_free_rx_ring_jumbo(struct bnx_softc *sc)
1010 {
1011 	int i;
1012 
1013 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
1014 		struct bnx_rx_buf *rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
1015 
1016 		if (rc->bnx_rx_mbuf != NULL) {
1017 			m_freem(rc->bnx_rx_mbuf);
1018 			rc->bnx_rx_mbuf = NULL;
1019 		}
1020 		bzero(&sc->bnx_ldata.bnx_rx_jumbo_ring[i],
1021 		    sizeof(struct bge_rx_bd));
1022 	}
1023 }
1024 
1025 static void
1026 bnx_free_tx_ring(struct bnx_tx_ring *txr)
1027 {
1028 	int i;
1029 
1030 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
1031 		struct bnx_tx_buf *buf = &txr->bnx_tx_buf[i];
1032 
1033 		if (buf->bnx_tx_mbuf != NULL) {
1034 			bus_dmamap_unload(txr->bnx_tx_mtag,
1035 			    buf->bnx_tx_dmamap);
1036 			m_freem(buf->bnx_tx_mbuf);
1037 			buf->bnx_tx_mbuf = NULL;
1038 		}
1039 		bzero(&txr->bnx_tx_ring[i], sizeof(struct bge_tx_bd));
1040 	}
1041 	txr->bnx_tx_saved_considx = BNX_TXCONS_UNSET;
1042 }
1043 
1044 static int
1045 bnx_init_tx_ring(struct bnx_tx_ring *txr)
1046 {
1047 	txr->bnx_tx_cnt = 0;
1048 	txr->bnx_tx_saved_considx = 0;
1049 	txr->bnx_tx_prodidx = 0;
1050 
1051 	/* Initialize transmit producer index for host-memory send ring. */
1052 	bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, txr->bnx_tx_prodidx);
1053 
1054 	return(0);
1055 }
1056 
1057 static void
1058 bnx_setmulti(struct bnx_softc *sc)
1059 {
1060 	struct ifnet *ifp;
1061 	struct ifmultiaddr *ifma;
1062 	uint32_t hashes[4] = { 0, 0, 0, 0 };
1063 	int h, i;
1064 
1065 	ifp = &sc->arpcom.ac_if;
1066 
1067 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
1068 		for (i = 0; i < 4; i++)
1069 			CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF);
1070 		return;
1071 	}
1072 
1073 	/* First, zot all the existing filters. */
1074 	for (i = 0; i < 4; i++)
1075 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0);
1076 
1077 	/* Now program new ones. */
1078 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1079 		if (ifma->ifma_addr->sa_family != AF_LINK)
1080 			continue;
1081 		h = ether_crc32_le(
1082 		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1083 		    ETHER_ADDR_LEN) & 0x7f;
1084 		hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F);
1085 	}
1086 
1087 	for (i = 0; i < 4; i++)
1088 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]);
1089 }
1090 
1091 /*
1092  * Do endian, PCI and DMA initialization. Also check the on-board ROM
1093  * self-test results.
1094  */
1095 static int
1096 bnx_chipinit(struct bnx_softc *sc)
1097 {
1098 	uint32_t dma_rw_ctl, mode_ctl;
1099 	int i;
1100 
1101 	/* Set endian type before we access any non-PCI registers. */
1102 	pci_write_config(sc->bnx_dev, BGE_PCI_MISC_CTL,
1103 	    BGE_INIT | BGE_PCIMISCCTL_TAGGED_STATUS, 4);
1104 
1105 	/*
1106 	 * Clear the MAC statistics block in the NIC's
1107 	 * internal memory.
1108 	 */
1109 	for (i = BGE_STATS_BLOCK;
1110 	    i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t))
1111 		BNX_MEMWIN_WRITE(sc, i, 0);
1112 
1113 	for (i = BGE_STATUS_BLOCK;
1114 	    i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t))
1115 		BNX_MEMWIN_WRITE(sc, i, 0);
1116 
1117 	if (BNX_IS_57765_FAMILY(sc)) {
1118 		uint32_t val;
1119 
1120 		if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0) {
1121 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1122 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1123 
1124 			/* Access the lower 1K of PL PCI-E block registers. */
1125 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1126 			    val | BGE_MODECTL_PCIE_PL_SEL);
1127 
1128 			val = CSR_READ_4(sc, BGE_PCIE_PL_LO_PHYCTL5);
1129 			val |= BGE_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ;
1130 			CSR_WRITE_4(sc, BGE_PCIE_PL_LO_PHYCTL5, val);
1131 
1132 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1133 		}
1134 		if (sc->bnx_chiprev != BGE_CHIPREV_57765_AX) {
1135 			/* Fix transmit hangs */
1136 			val = CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL);
1137 			val |= BGE_CPMU_PADRNG_CTL_RDIV2;
1138 			CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, val);
1139 
1140 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1141 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1142 
1143 			/* Access the lower 1K of DL PCI-E block registers. */
1144 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1145 			    val | BGE_MODECTL_PCIE_DL_SEL);
1146 
1147 			val = CSR_READ_4(sc, BGE_PCIE_DL_LO_FTSMAX);
1148 			val &= ~BGE_PCIE_DL_LO_FTSMAX_MASK;
1149 			val |= BGE_PCIE_DL_LO_FTSMAX_VAL;
1150 			CSR_WRITE_4(sc, BGE_PCIE_DL_LO_FTSMAX, val);
1151 
1152 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1153 		}
1154 
1155 		val = CSR_READ_4(sc, BGE_CPMU_LSPD_10MB_CLK);
1156 		val &= ~BGE_CPMU_LSPD_10MB_MACCLK_MASK;
1157 		val |= BGE_CPMU_LSPD_10MB_MACCLK_6_25;
1158 		CSR_WRITE_4(sc, BGE_CPMU_LSPD_10MB_CLK, val);
1159 	}
1160 
1161 	/*
1162 	 * Set up the PCI DMA control register.
1163 	 */
1164 	dma_rw_ctl = pci_read_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, 4);
1165 	/*
1166 	 * Disable 32bytes cache alignment for DMA write to host memory
1167 	 *
1168 	 * NOTE:
1169 	 * 64bytes cache alignment for DMA write to host memory is still
1170 	 * enabled.
1171 	 */
1172 	dma_rw_ctl |= BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT;
1173 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
1174 		dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK;
1175 	/*
1176 	 * Enable HW workaround for controllers that misinterpret
1177 	 * a status tag update and leave interrupts permanently
1178 	 * disabled.
1179 	 */
1180 	if (sc->bnx_asicrev != BGE_ASICREV_BCM5717 &&
1181 	    sc->bnx_asicrev != BGE_ASICREV_BCM5762 &&
1182 	    !BNX_IS_57765_FAMILY(sc))
1183 		dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA;
1184 	if (bootverbose) {
1185 		if_printf(&sc->arpcom.ac_if, "DMA read/write %#x\n",
1186 		    dma_rw_ctl);
1187 	}
1188 	pci_write_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4);
1189 
1190 	/*
1191 	 * Set up general mode register.
1192 	 */
1193 	mode_ctl = bnx_dma_swap_options(sc);
1194 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1195 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1196 		/* Retain Host-2-BMC settings written by APE firmware. */
1197 		mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) &
1198 		    (BGE_MODECTL_BYTESWAP_B2HRX_DATA |
1199 		    BGE_MODECTL_WORDSWAP_B2HRX_DATA |
1200 		    BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE);
1201 	}
1202 	mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR |
1203 	    BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM;
1204 	CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1205 
1206 	/*
1207 	 * Disable memory write invalidate.  Apparently it is not supported
1208 	 * properly by these devices.  Also ensure that INTx isn't disabled,
1209 	 * as these chips need it even when using MSI.
1210 	 */
1211 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_CMD,
1212 	    (PCIM_CMD_MWRICEN | PCIM_CMD_INTxDIS), 4);
1213 
1214 	/* Set the timer prescaler (always 66Mhz) */
1215 	CSR_WRITE_4(sc, BGE_MISC_CFG, 65 << 1/*BGE_32BITTIME_66MHZ*/);
1216 
1217 	return(0);
1218 }
1219 
1220 static int
1221 bnx_blockinit(struct bnx_softc *sc)
1222 {
1223 	struct bnx_intr_data *intr;
1224 	struct bge_rcb *rcb;
1225 	bus_size_t vrcb;
1226 	bge_hostaddr taddr;
1227 	uint32_t val;
1228 	int i, limit;
1229 
1230 	/*
1231 	 * Initialize the memory window pointer register so that
1232 	 * we can access the first 32K of internal NIC RAM. This will
1233 	 * allow us to set up the TX send ring RCBs and the RX return
1234 	 * ring RCBs, plus other things which live in NIC memory.
1235 	 */
1236 	CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0);
1237 
1238 	/* Configure mbuf pool watermarks */
1239 	if (BNX_IS_57765_PLUS(sc)) {
1240 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1241 		if (sc->arpcom.ac_if.if_mtu > ETHERMTU) {
1242 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e);
1243 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea);
1244 		} else {
1245 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a);
1246 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0);
1247 		}
1248 	} else {
1249 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1250 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10);
1251 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60);
1252 	}
1253 
1254 	/* Configure DMA resource watermarks */
1255 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5);
1256 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10);
1257 
1258 	/* Enable buffer manager */
1259 	val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN;
1260 	/*
1261 	 * Change the arbitration algorithm of TXMBUF read request to
1262 	 * round-robin instead of priority based for BCM5719.  When
1263 	 * TXFIFO is almost empty, RDMA will hold its request until
1264 	 * TXFIFO is not almost empty.
1265 	 */
1266 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1267 		val |= BGE_BMANMODE_NO_TX_UNDERRUN;
1268 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1269 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0 ||
1270 	    sc->bnx_chipid == BGE_CHIPID_BCM5720_A0)
1271 		val |= BGE_BMANMODE_LOMBUF_ATTN;
1272 	CSR_WRITE_4(sc, BGE_BMAN_MODE, val);
1273 
1274 	/* Poll for buffer manager start indication */
1275 	for (i = 0; i < BNX_TIMEOUT; i++) {
1276 		if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
1277 			break;
1278 		DELAY(10);
1279 	}
1280 
1281 	if (i == BNX_TIMEOUT) {
1282 		if_printf(&sc->arpcom.ac_if,
1283 			  "buffer manager failed to start\n");
1284 		return(ENXIO);
1285 	}
1286 
1287 	/* Enable flow-through queues */
1288 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
1289 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
1290 
1291 	/* Wait until queue initialization is complete */
1292 	for (i = 0; i < BNX_TIMEOUT; i++) {
1293 		if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0)
1294 			break;
1295 		DELAY(10);
1296 	}
1297 
1298 	if (i == BNX_TIMEOUT) {
1299 		if_printf(&sc->arpcom.ac_if,
1300 			  "flow-through queue init failed\n");
1301 		return(ENXIO);
1302 	}
1303 
1304 	/*
1305 	 * Summary of rings supported by the controller:
1306 	 *
1307 	 * Standard Receive Producer Ring
1308 	 * - This ring is used to feed receive buffers for "standard"
1309 	 *   sized frames (typically 1536 bytes) to the controller.
1310 	 *
1311 	 * Jumbo Receive Producer Ring
1312 	 * - This ring is used to feed receive buffers for jumbo sized
1313 	 *   frames (i.e. anything bigger than the "standard" frames)
1314 	 *   to the controller.
1315 	 *
1316 	 * Mini Receive Producer Ring
1317 	 * - This ring is used to feed receive buffers for "mini"
1318 	 *   sized frames to the controller.
1319 	 * - This feature required external memory for the controller
1320 	 *   but was never used in a production system.  Should always
1321 	 *   be disabled.
1322 	 *
1323 	 * Receive Return Ring
1324 	 * - After the controller has placed an incoming frame into a
1325 	 *   receive buffer that buffer is moved into a receive return
1326 	 *   ring.  The driver is then responsible to passing the
1327 	 *   buffer up to the stack.  BCM5718/BCM57785 families support
1328 	 *   multiple receive return rings.
1329 	 *
1330 	 * Send Ring
1331 	 * - This ring is used for outgoing frames.  BCM5719/BCM5720
1332 	 *   support multiple send rings.
1333 	 */
1334 
1335 	/* Initialize the standard receive producer ring control block. */
1336 	rcb = &sc->bnx_ldata.bnx_info.bnx_std_rx_rcb;
1337 	rcb->bge_hostaddr.bge_addr_lo =
1338 	    BGE_ADDR_LO(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1339 	rcb->bge_hostaddr.bge_addr_hi =
1340 	    BGE_ADDR_HI(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1341 	if (BNX_IS_57765_PLUS(sc)) {
1342 		/*
1343 		 * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32)
1344 		 * Bits 15-2 : Maximum RX frame size
1345 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring ENabled
1346 		 * Bit 0     : Reserved
1347 		 */
1348 		rcb->bge_maxlen_flags =
1349 		    BGE_RCB_MAXLEN_FLAGS(512, BNX_MAX_FRAMELEN << 2);
1350 	} else {
1351 		/*
1352 		 * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32)
1353 		 * Bits 15-2 : Reserved (should be 0)
1354 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring Enabled
1355 		 * Bit 0     : Reserved
1356 		 */
1357 		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0);
1358 	}
1359 	if (BNX_IS_5717_PLUS(sc))
1360 		rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717;
1361 	else
1362 		rcb->bge_nicaddr = BGE_STD_RX_RINGS;
1363 	/* Write the standard receive producer ring control block. */
1364 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi);
1365 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo);
1366 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1367 	if (!BNX_IS_5717_PLUS(sc))
1368 		CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr);
1369 	/* Reset the standard receive producer ring producer index. */
1370 	bnx_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0);
1371 
1372 	/*
1373 	 * Initialize the jumbo RX producer ring control
1374 	 * block.  We set the 'ring disabled' bit in the
1375 	 * flags field until we're actually ready to start
1376 	 * using this ring (i.e. once we set the MTU
1377 	 * high enough to require it).
1378 	 */
1379 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1380 		rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1381 		/* Get the jumbo receive producer ring RCB parameters. */
1382 		rcb->bge_hostaddr.bge_addr_lo =
1383 		    BGE_ADDR_LO(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1384 		rcb->bge_hostaddr.bge_addr_hi =
1385 		    BGE_ADDR_HI(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1386 		rcb->bge_maxlen_flags =
1387 		    BGE_RCB_MAXLEN_FLAGS(BNX_MAX_FRAMELEN,
1388 		    BGE_RCB_FLAG_RING_DISABLED);
1389 		if (BNX_IS_5717_PLUS(sc))
1390 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717;
1391 		else
1392 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS;
1393 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI,
1394 		    rcb->bge_hostaddr.bge_addr_hi);
1395 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO,
1396 		    rcb->bge_hostaddr.bge_addr_lo);
1397 		/* Program the jumbo receive producer ring RCB parameters. */
1398 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS,
1399 		    rcb->bge_maxlen_flags);
1400 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr);
1401 		/* Reset the jumbo receive producer ring producer index. */
1402 		bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
1403 	}
1404 
1405 	/*
1406 	 * The BD ring replenish thresholds control how often the
1407 	 * hardware fetches new BD's from the producer rings in host
1408 	 * memory.  Setting the value too low on a busy system can
1409 	 * starve the hardware and recue the throughpout.
1410 	 *
1411 	 * Set the BD ring replentish thresholds. The recommended
1412 	 * values are 1/8th the number of descriptors allocated to
1413 	 * each ring.
1414 	 */
1415 	val = 8;
1416 	CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val);
1417 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1418 		CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH,
1419 		    BGE_JUMBO_RX_RING_CNT/8);
1420 	}
1421 	if (BNX_IS_57765_PLUS(sc)) {
1422 		CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32);
1423 		CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16);
1424 	}
1425 
1426 	/*
1427 	 * Disable all send rings by setting the 'ring disabled' bit
1428 	 * in the flags field of all the TX send ring control blocks,
1429 	 * located in NIC memory.
1430 	 */
1431 	if (BNX_IS_5717_PLUS(sc))
1432 		limit = 4;
1433 	else if (BNX_IS_57765_FAMILY(sc) ||
1434 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1435 		limit = 2;
1436 	else
1437 		limit = 1;
1438 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1439 	for (i = 0; i < limit; i++) {
1440 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1441 		    BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED));
1442 		vrcb += sizeof(struct bge_rcb);
1443 	}
1444 
1445 	/*
1446 	 * Configure send ring RCBs
1447 	 */
1448 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1449 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
1450 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
1451 
1452 		BGE_HOSTADDR(taddr, txr->bnx_tx_ring_paddr);
1453 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1454 		    taddr.bge_addr_hi);
1455 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1456 		    taddr.bge_addr_lo);
1457 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1458 		    BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0));
1459 		vrcb += sizeof(struct bge_rcb);
1460 	}
1461 
1462 	/*
1463 	 * Disable all receive return rings by setting the
1464 	 * 'ring disabled' bit in the flags field of all the receive
1465 	 * return ring control blocks, located in NIC memory.
1466 	 */
1467 	if (BNX_IS_5717_PLUS(sc)) {
1468 		/* Should be 17, use 16 until we get an SRAM map. */
1469 		limit = 16;
1470 	} else if (BNX_IS_57765_FAMILY(sc) ||
1471 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1472 		limit = 4;
1473 	} else {
1474 		limit = 1;
1475 	}
1476 	/* Disable all receive return rings. */
1477 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1478 	for (i = 0; i < limit; i++) {
1479 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0);
1480 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0);
1481 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1482 		    BGE_RCB_FLAG_RING_DISABLED);
1483 		bnx_writembx(sc, BGE_MBX_RX_CONS0_LO +
1484 		    (i * (sizeof(uint64_t))), 0);
1485 		vrcb += sizeof(struct bge_rcb);
1486 	}
1487 
1488 	/*
1489 	 * Set up receive return rings.
1490 	 */
1491 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1492 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
1493 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
1494 
1495 		BGE_HOSTADDR(taddr, ret->bnx_rx_ret_ring_paddr);
1496 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1497 		    taddr.bge_addr_hi);
1498 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1499 		    taddr.bge_addr_lo);
1500 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1501 		    BGE_RCB_MAXLEN_FLAGS(BNX_RETURN_RING_CNT, 0));
1502 		vrcb += sizeof(struct bge_rcb);
1503 	}
1504 
1505 	/* Set random backoff seed for TX */
1506 	CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF,
1507 	    (sc->arpcom.ac_enaddr[0] + sc->arpcom.ac_enaddr[1] +
1508 	     sc->arpcom.ac_enaddr[2] + sc->arpcom.ac_enaddr[3] +
1509 	     sc->arpcom.ac_enaddr[4] + sc->arpcom.ac_enaddr[5]) &
1510 	    BGE_TX_BACKOFF_SEED_MASK);
1511 
1512 	/* Set inter-packet gap */
1513 	val = 0x2620;
1514 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1515 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1516 		val |= CSR_READ_4(sc, BGE_TX_LENGTHS) &
1517 		    (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK);
1518 	}
1519 	CSR_WRITE_4(sc, BGE_TX_LENGTHS, val);
1520 
1521 	/*
1522 	 * Specify which ring to use for packets that don't match
1523 	 * any RX rules.
1524 	 */
1525 	CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08);
1526 
1527 	/*
1528 	 * Configure number of RX lists. One interrupt distribution
1529 	 * list, sixteen active lists, one bad frames class.
1530 	 */
1531 	CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181);
1532 
1533 	/* Inialize RX list placement stats mask. */
1534 	CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF);
1535 	CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1);
1536 
1537 	/* Disable host coalescing until we get it set up */
1538 	CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000);
1539 
1540 	/* Poll to make sure it's shut down. */
1541 	for (i = 0; i < BNX_TIMEOUT; i++) {
1542 		if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE))
1543 			break;
1544 		DELAY(10);
1545 	}
1546 
1547 	if (i == BNX_TIMEOUT) {
1548 		if_printf(&sc->arpcom.ac_if,
1549 			  "host coalescing engine failed to idle\n");
1550 		return(ENXIO);
1551 	}
1552 
1553 	/* Set up host coalescing defaults */
1554 	sc->bnx_coal_chg = BNX_RX_COAL_TICKS_CHG |
1555 	    BNX_TX_COAL_TICKS_CHG |
1556 	    BNX_RX_COAL_BDS_CHG |
1557 	    BNX_TX_COAL_BDS_CHG |
1558 	    BNX_RX_COAL_BDS_INT_CHG |
1559 	    BNX_TX_COAL_BDS_INT_CHG;
1560 	bnx_coal_change(sc);
1561 
1562 	/*
1563 	 * Set up addresses of status blocks
1564 	 */
1565 	intr = &sc->bnx_intr_data[0];
1566 	bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1567 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI,
1568 	    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1569 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO,
1570 	    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1571 	for (i = 1; i < sc->bnx_intr_cnt; ++i) {
1572 		intr = &sc->bnx_intr_data[i];
1573 		bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1574 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_HI + ((i - 1) * 8),
1575 		    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1576 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_LO + ((i - 1) * 8),
1577 		    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1578 	}
1579 
1580 	/* Set up status block partail update size. */
1581 	val = BGE_STATBLKSZ_32BYTE;
1582 #if 0
1583 	/*
1584 	 * Does not seem to have visible effect in both
1585 	 * bulk data (1472B UDP datagram) and tiny data
1586 	 * (18B UDP datagram) TX tests.
1587 	 */
1588 	val |= BGE_HCCMODE_CLRTICK_TX;
1589 #endif
1590 	/* Turn on host coalescing state machine */
1591 	CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE);
1592 
1593 	/* Turn on RX BD completion state machine and enable attentions */
1594 	CSR_WRITE_4(sc, BGE_RBDC_MODE,
1595 	    BGE_RBDCMODE_ENABLE|BGE_RBDCMODE_ATTN);
1596 
1597 	/* Turn on RX list placement state machine */
1598 	CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
1599 
1600 	val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB |
1601 	    BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR |
1602 	    BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB |
1603 	    BGE_MACMODE_FRMHDR_DMA_ENB;
1604 
1605 	if (sc->bnx_flags & BNX_FLAG_TBI)
1606 		val |= BGE_PORTMODE_TBI;
1607 	else if (sc->bnx_flags & BNX_FLAG_MII_SERDES)
1608 		val |= BGE_PORTMODE_GMII;
1609 	else
1610 		val |= BGE_PORTMODE_MII;
1611 
1612 	/* Allow APE to send/receive frames. */
1613 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
1614 		val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
1615 
1616 	/* Turn on DMA, clear stats */
1617 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
1618 	DELAY(40);
1619 
1620 	/* Set misc. local control, enable interrupts on attentions */
1621 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
1622 
1623 #ifdef notdef
1624 	/* Assert GPIO pins for PHY reset */
1625 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0|
1626 	    BGE_MLC_MISCIO_OUT1|BGE_MLC_MISCIO_OUT2);
1627 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0|
1628 	    BGE_MLC_MISCIO_OUTEN1|BGE_MLC_MISCIO_OUTEN2);
1629 #endif
1630 
1631 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSIX)
1632 		bnx_enable_msi(sc, TRUE);
1633 
1634 	/* Turn on write DMA state machine */
1635 	val = BGE_WDMAMODE_ENABLE|BGE_WDMAMODE_ALL_ATTNS;
1636 	/* Enable host coalescing bug fix. */
1637 	val |= BGE_WDMAMODE_STATUS_TAG_FIX;
1638 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5785) {
1639 		/* Request larger DMA burst size to get better performance. */
1640 		val |= BGE_WDMAMODE_BURST_ALL_DATA;
1641 	}
1642 	CSR_WRITE_4(sc, BGE_WDMA_MODE, val);
1643 	DELAY(40);
1644 
1645 	if (BNX_IS_57765_PLUS(sc)) {
1646 		uint32_t dmactl, dmactl_reg;
1647 
1648 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1649 			dmactl_reg = BGE_RDMA_RSRVCTRL2;
1650 		else
1651 			dmactl_reg = BGE_RDMA_RSRVCTRL;
1652 
1653 		dmactl = CSR_READ_4(sc, dmactl_reg);
1654 		/*
1655 		 * Adjust tx margin to prevent TX data corruption and
1656 		 * fix internal FIFO overflow.
1657 		 */
1658 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1659 		    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1660 		    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1661 			dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK |
1662 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK |
1663 			    BGE_RDMA_RSRVCTRL_TXMRGN_MASK);
1664 			dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K |
1665 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K |
1666 			    BGE_RDMA_RSRVCTRL_TXMRGN_320B;
1667 		}
1668 		/*
1669 		 * Enable fix for read DMA FIFO overruns.
1670 		 * The fix is to limit the number of RX BDs
1671 		 * the hardware would fetch at a fime.
1672 		 */
1673 		CSR_WRITE_4(sc, dmactl_reg,
1674 		    dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
1675 	}
1676 
1677 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719) {
1678 		CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1679 		    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1680 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K |
1681 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1682 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1683 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1684 		uint32_t ctrl_reg;
1685 
1686 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1687 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL2;
1688 		else
1689 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL;
1690 
1691 		/*
1692 		 * Allow 4KB burst length reads for non-LSO frames.
1693 		 * Enable 512B burst length reads for buffer descriptors.
1694 		 */
1695 		CSR_WRITE_4(sc, ctrl_reg,
1696 		    CSR_READ_4(sc, ctrl_reg) |
1697 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 |
1698 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1699 	}
1700 
1701 	/* Turn on read DMA state machine */
1702 	val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS;
1703 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717)
1704 		val |= BGE_RDMAMODE_MULT_DMA_RD_DIS;
1705         if (sc->bnx_asicrev == BGE_ASICREV_BCM5784 ||
1706             sc->bnx_asicrev == BGE_ASICREV_BCM5785 ||
1707             sc->bnx_asicrev == BGE_ASICREV_BCM57780) {
1708 		val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN |
1709 		    BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN |
1710 		    BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN;
1711 	}
1712 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1713 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1714 		val |= CSR_READ_4(sc, BGE_RDMA_MODE) &
1715 		    BGE_RDMAMODE_H2BNC_VLAN_DET;
1716 		/*
1717 		 * Allow multiple outstanding read requests from
1718 		 * non-LSO read DMA engine.
1719 		 */
1720 		val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS;
1721 	}
1722 	if (sc->bnx_asicrev == BGE_ASICREV_BCM57766)
1723 		val |= BGE_RDMAMODE_JMB_2K_MMRR;
1724 	if (sc->bnx_flags & BNX_FLAG_TSO)
1725 		val |= BGE_RDMAMODE_TSO4_ENABLE;
1726 	val |= BGE_RDMAMODE_FIFO_LONG_BURST;
1727 	CSR_WRITE_4(sc, BGE_RDMA_MODE, val);
1728 	DELAY(40);
1729 
1730 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1731 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
1732 	    	uint32_t thresh;
1733 
1734 		thresh = ETHERMTU_JUMBO;
1735 		if (sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
1736 			thresh = ETHERMTU;
1737 
1738 		for (i = 0; i < BGE_RDMA_NCHAN; ++i) {
1739 			if (CSR_READ_4(sc, BGE_RDMA_LENGTH + (i << 2)) > thresh)
1740 				break;
1741 		}
1742 		if (i < BGE_RDMA_NCHAN) {
1743 			if (bootverbose) {
1744 				if_printf(&sc->arpcom.ac_if,
1745 				    "enable RDMA WA\n");
1746 			}
1747 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1748 				sc->bnx_rdma_wa = BGE_RDMA_TX_LENGTH_WA_5719;
1749 			else
1750 				sc->bnx_rdma_wa = BGE_RDMA_TX_LENGTH_WA_5720;
1751 			CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1752 			    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1753 			    sc->bnx_rdma_wa);
1754 		} else {
1755 			sc->bnx_rdma_wa = 0;
1756 		}
1757 	}
1758 
1759 	/* Turn on RX data completion state machine */
1760 	CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
1761 
1762 	/* Turn on RX BD initiator state machine */
1763 	CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
1764 
1765 	/* Turn on RX data and RX BD initiator state machine */
1766 	CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE);
1767 
1768 	/* Turn on send BD completion state machine */
1769 	CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
1770 
1771 	/* Turn on send data completion state machine */
1772 	val = BGE_SDCMODE_ENABLE;
1773 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5761)
1774 		val |= BGE_SDCMODE_CDELAY;
1775 	CSR_WRITE_4(sc, BGE_SDC_MODE, val);
1776 
1777 	/* Turn on send data initiator state machine */
1778 	if (sc->bnx_flags & BNX_FLAG_TSO) {
1779 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE |
1780 		    BGE_SDIMODE_HW_LSO_PRE_DMA);
1781 	} else {
1782 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
1783 	}
1784 
1785 	/* Turn on send BD initiator state machine */
1786 	val = BGE_SBDIMODE_ENABLE;
1787 	if (sc->bnx_tx_ringcnt > 1)
1788 		val |= BGE_SBDIMODE_MULTI_TXR;
1789 	CSR_WRITE_4(sc, BGE_SBDI_MODE, val);
1790 
1791 	/* Turn on send BD selector state machine */
1792 	CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
1793 
1794 	CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF);
1795 	CSR_WRITE_4(sc, BGE_SDI_STATS_CTL,
1796 	    BGE_SDISTATSCTL_ENABLE|BGE_SDISTATSCTL_FASTER);
1797 
1798 	/* ack/clear link change events */
1799 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1800 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1801 	    BGE_MACSTAT_LINK_CHANGED);
1802 	CSR_WRITE_4(sc, BGE_MI_STS, 0);
1803 
1804 	/*
1805 	 * Enable attention when the link has changed state for
1806 	 * devices that use auto polling.
1807 	 */
1808 	if (sc->bnx_flags & BNX_FLAG_TBI) {
1809 		CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK);
1810  	} else {
1811 		if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
1812 			CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
1813 			DELAY(80);
1814 		}
1815 	}
1816 
1817 	/*
1818 	 * Clear any pending link state attention.
1819 	 * Otherwise some link state change events may be lost until attention
1820 	 * is cleared by bnx_intr() -> bnx_softc.bnx_link_upd() sequence.
1821 	 * It's not necessary on newer BCM chips - perhaps enabling link
1822 	 * state change attentions implies clearing pending attention.
1823 	 */
1824 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1825 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1826 	    BGE_MACSTAT_LINK_CHANGED);
1827 
1828 	/* Enable link state change attentions. */
1829 	BNX_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED);
1830 
1831 	return(0);
1832 }
1833 
1834 /*
1835  * Probe for a Broadcom chip. Check the PCI vendor and device IDs
1836  * against our list and return its name if we find a match. Note
1837  * that since the Broadcom controller contains VPD support, we
1838  * can get the device name string from the controller itself instead
1839  * of the compiled-in string. This is a little slow, but it guarantees
1840  * we'll always announce the right product name.
1841  */
1842 static int
1843 bnx_probe(device_t dev)
1844 {
1845 	const struct bnx_type *t;
1846 	uint16_t product, vendor;
1847 
1848 	if (!pci_is_pcie(dev))
1849 		return ENXIO;
1850 
1851 	product = pci_get_device(dev);
1852 	vendor = pci_get_vendor(dev);
1853 
1854 	for (t = bnx_devs; t->bnx_name != NULL; t++) {
1855 		if (vendor == t->bnx_vid && product == t->bnx_did)
1856 			break;
1857 	}
1858 	if (t->bnx_name == NULL)
1859 		return ENXIO;
1860 
1861 	device_set_desc(dev, t->bnx_name);
1862 	return 0;
1863 }
1864 
1865 static int
1866 bnx_attach(device_t dev)
1867 {
1868 	struct ifnet *ifp;
1869 	struct bnx_softc *sc;
1870 	struct bnx_rx_std_ring *std;
1871 	struct sysctl_ctx_list *ctx;
1872 	struct sysctl_oid_list *tree;
1873 	uint32_t hwcfg = 0;
1874 	int error = 0, rid, capmask, i, std_cpuid, std_cpuid_def;
1875 	uint8_t ether_addr[ETHER_ADDR_LEN];
1876 	uint16_t product;
1877 	uintptr_t mii_priv = 0;
1878 #if defined(BNX_TSO_DEBUG) || defined(BNX_RSS_DEBUG) || defined(BNX_TSS_DEBUG)
1879 	char desc[32];
1880 #endif
1881 
1882 	sc = device_get_softc(dev);
1883 	sc->bnx_dev = dev;
1884 	callout_init_mp(&sc->bnx_tick_timer);
1885 	lwkt_serialize_init(&sc->bnx_jslot_serializer);
1886 	lwkt_serialize_init(&sc->bnx_main_serialize);
1887 
1888 	/* Always setup interrupt mailboxes */
1889 	for (i = 0; i < BNX_INTR_MAX; ++i) {
1890 		callout_init_mp(&sc->bnx_intr_data[i].bnx_intr_timer);
1891 		sc->bnx_intr_data[i].bnx_sc = sc;
1892 		sc->bnx_intr_data[i].bnx_intr_mbx = BGE_MBX_IRQ0_LO + (i * 8);
1893 		sc->bnx_intr_data[i].bnx_intr_rid = -1;
1894 		sc->bnx_intr_data[i].bnx_intr_cpuid = -1;
1895 	}
1896 
1897 	sc->bnx_func_addr = pci_get_function(dev);
1898 	product = pci_get_device(dev);
1899 
1900 #ifndef BURN_BRIDGES
1901 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
1902 		uint32_t irq, mem;
1903 
1904 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
1905 		mem = pci_read_config(dev, BGE_PCI_BAR0, 4);
1906 
1907 		device_printf(dev, "chip is in D%d power mode "
1908 		    "-- setting to D0\n", pci_get_powerstate(dev));
1909 
1910 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
1911 
1912 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
1913 		pci_write_config(dev, BGE_PCI_BAR0, mem, 4);
1914 	}
1915 #endif	/* !BURN_BRIDGE */
1916 
1917 	/*
1918 	 * Map control/status registers.
1919 	 */
1920 	pci_enable_busmaster(dev);
1921 
1922 	rid = BGE_PCI_BAR0;
1923 	sc->bnx_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1924 	    RF_ACTIVE);
1925 
1926 	if (sc->bnx_res == NULL) {
1927 		device_printf(dev, "couldn't map memory\n");
1928 		return ENXIO;
1929 	}
1930 
1931 	sc->bnx_btag = rman_get_bustag(sc->bnx_res);
1932 	sc->bnx_bhandle = rman_get_bushandle(sc->bnx_res);
1933 
1934 	/* Save various chip information */
1935 	sc->bnx_chipid =
1936 	    pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
1937 	    BGE_PCIMISCCTL_ASICREV_SHIFT;
1938 	if (BGE_ASICREV(sc->bnx_chipid) == BGE_ASICREV_USE_PRODID_REG) {
1939 		/* All chips having dedicated ASICREV register have CPMU */
1940 		sc->bnx_flags |= BNX_FLAG_CPMU;
1941 
1942 		switch (product) {
1943 		case PCI_PRODUCT_BROADCOM_BCM5717:
1944 		case PCI_PRODUCT_BROADCOM_BCM5717C:
1945 		case PCI_PRODUCT_BROADCOM_BCM5718:
1946 		case PCI_PRODUCT_BROADCOM_BCM5719:
1947 		case PCI_PRODUCT_BROADCOM_BCM5720_ALT:
1948 		case PCI_PRODUCT_BROADCOM_BCM5725:
1949 		case PCI_PRODUCT_BROADCOM_BCM5727:
1950 		case PCI_PRODUCT_BROADCOM_BCM5762:
1951 		case PCI_PRODUCT_BROADCOM_BCM57764:
1952 		case PCI_PRODUCT_BROADCOM_BCM57767:
1953 		case PCI_PRODUCT_BROADCOM_BCM57787:
1954 			sc->bnx_chipid = pci_read_config(dev,
1955 			    BGE_PCI_GEN2_PRODID_ASICREV, 4);
1956 			break;
1957 
1958 		case PCI_PRODUCT_BROADCOM_BCM57761:
1959 		case PCI_PRODUCT_BROADCOM_BCM57762:
1960 		case PCI_PRODUCT_BROADCOM_BCM57765:
1961 		case PCI_PRODUCT_BROADCOM_BCM57766:
1962 		case PCI_PRODUCT_BROADCOM_BCM57781:
1963 		case PCI_PRODUCT_BROADCOM_BCM57782:
1964 		case PCI_PRODUCT_BROADCOM_BCM57785:
1965 		case PCI_PRODUCT_BROADCOM_BCM57786:
1966 		case PCI_PRODUCT_BROADCOM_BCM57791:
1967 		case PCI_PRODUCT_BROADCOM_BCM57795:
1968 			sc->bnx_chipid = pci_read_config(dev,
1969 			    BGE_PCI_GEN15_PRODID_ASICREV, 4);
1970 			break;
1971 
1972 		default:
1973 			sc->bnx_chipid = pci_read_config(dev,
1974 			    BGE_PCI_PRODID_ASICREV, 4);
1975 			break;
1976 		}
1977 	}
1978 	if (sc->bnx_chipid == BGE_CHIPID_BCM5717_C0)
1979 		sc->bnx_chipid = BGE_CHIPID_BCM5720_A0;
1980 
1981 	sc->bnx_asicrev = BGE_ASICREV(sc->bnx_chipid);
1982 	sc->bnx_chiprev = BGE_CHIPREV(sc->bnx_chipid);
1983 
1984 	switch (sc->bnx_asicrev) {
1985 	case BGE_ASICREV_BCM5717:
1986 	case BGE_ASICREV_BCM5719:
1987 	case BGE_ASICREV_BCM5720:
1988 		sc->bnx_flags |= BNX_FLAG_5717_PLUS | BNX_FLAG_57765_PLUS;
1989 		break;
1990 
1991 	case BGE_ASICREV_BCM5762:
1992 		sc->bnx_flags |= BNX_FLAG_57765_PLUS;
1993 		break;
1994 
1995 	case BGE_ASICREV_BCM57765:
1996 	case BGE_ASICREV_BCM57766:
1997 		sc->bnx_flags |= BNX_FLAG_57765_FAMILY | BNX_FLAG_57765_PLUS;
1998 		break;
1999 	}
2000 
2001 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
2002 	    sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2003 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
2004 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
2005 		sc->bnx_flags |= BNX_FLAG_APE;
2006 
2007 	sc->bnx_flags |= BNX_FLAG_TSO;
2008 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 &&
2009 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
2010 		sc->bnx_flags &= ~BNX_FLAG_TSO;
2011 
2012 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
2013 	    BNX_IS_57765_FAMILY(sc)) {
2014 		/*
2015 		 * All BCM57785 and BCM5718 families chips have a bug that
2016 		 * under certain situation interrupt will not be enabled
2017 		 * even if status tag is written to interrupt mailbox.
2018 		 *
2019 		 * While BCM5719 and BCM5720 have a hardware workaround
2020 		 * which could fix the above bug.
2021 		 * See the comment near BGE_PCIDMARWCTL_TAGGED_STATUS_WA in
2022 		 * bnx_chipinit().
2023 		 *
2024 		 * For the rest of the chips in these two families, we will
2025 		 * have to poll the status block at high rate (10ms currently)
2026 		 * to check whether the interrupt is hosed or not.
2027 		 * See bnx_check_intr_*() for details.
2028 		 */
2029 		sc->bnx_flags |= BNX_FLAG_STATUSTAG_BUG;
2030 	}
2031 
2032 	sc->bnx_pciecap = pci_get_pciecap_ptr(sc->bnx_dev);
2033 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2034 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720)
2035 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_2048);
2036 	else
2037 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
2038 	device_printf(dev, "CHIP ID 0x%08x; "
2039 		      "ASIC REV 0x%02x; CHIP REV 0x%02x\n",
2040 		      sc->bnx_chipid, sc->bnx_asicrev, sc->bnx_chiprev);
2041 
2042 	/*
2043 	 * Set various PHY quirk flags.
2044 	 */
2045 
2046 	capmask = MII_CAPMASK_DEFAULT;
2047 	if (product == PCI_PRODUCT_BROADCOM_BCM57791 ||
2048 	    product == PCI_PRODUCT_BROADCOM_BCM57795) {
2049 		/* 10/100 only */
2050 		capmask &= ~BMSR_EXTSTAT;
2051 	}
2052 
2053 	mii_priv |= BRGPHY_FLAG_WIRESPEED;
2054 	if (sc->bnx_chipid == BGE_CHIPID_BCM5762_A0)
2055 		mii_priv |= BRGPHY_FLAG_5762_A0;
2056 
2057 	/*
2058 	 * Chips with APE need BAR2 access for APE registers/memory.
2059 	 */
2060 	if (sc->bnx_flags & BNX_FLAG_APE) {
2061 		uint32_t pcistate;
2062 
2063 		rid = PCIR_BAR(2);
2064 		sc->bnx_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
2065 		    RF_ACTIVE);
2066 		if (sc->bnx_res2 == NULL) {
2067 			device_printf(dev, "couldn't map BAR2 memory\n");
2068 			error = ENXIO;
2069 			goto fail;
2070 		}
2071 
2072 		/* Enable APE register/memory access by host driver. */
2073 		pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4);
2074 		pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2075 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2076 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2077 		pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4);
2078 
2079 		bnx_ape_lock_init(sc);
2080 		bnx_ape_read_fw_ver(sc);
2081 	}
2082 
2083 	/* Initialize if_name earlier, so if_printf could be used */
2084 	ifp = &sc->arpcom.ac_if;
2085 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2086 
2087 	/*
2088 	 * Try to reset the chip.
2089 	 */
2090 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
2091 	bnx_reset(sc);
2092 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
2093 
2094 	if (bnx_chipinit(sc)) {
2095 		device_printf(dev, "chip initialization failed\n");
2096 		error = ENXIO;
2097 		goto fail;
2098 	}
2099 
2100 	/*
2101 	 * Get station address
2102 	 */
2103 	error = bnx_get_eaddr(sc, ether_addr);
2104 	if (error) {
2105 		device_printf(dev, "failed to read station address\n");
2106 		goto fail;
2107 	}
2108 
2109 	/* Setup RX/TX and interrupt count */
2110 	bnx_setup_ring_cnt(sc);
2111 
2112 	if ((sc->bnx_rx_retcnt == 1 && sc->bnx_tx_ringcnt == 1) ||
2113 	    (sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt > 1)) {
2114 	    	/*
2115 		 * The RX ring and the corresponding TX ring processing
2116 		 * should be on the same CPU, since they share the same
2117 		 * status block.
2118 		 */
2119 		sc->bnx_flags |= BNX_FLAG_RXTX_BUNDLE;
2120 		if (bootverbose)
2121 			device_printf(dev, "RX/TX bundle\n");
2122 		if (sc->bnx_tx_ringcnt > 1) {
2123 			/*
2124 			 * Multiple TX rings do not share status block
2125 			 * with link status, so link status will have
2126 			 * to save its own status_tag.
2127 			 */
2128 			sc->bnx_flags |= BNX_FLAG_STATUS_HASTAG;
2129 			if (bootverbose)
2130 				device_printf(dev, "status needs tag\n");
2131 		}
2132 	} else {
2133 		KKASSERT(sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt == 1);
2134 		if (bootverbose)
2135 			device_printf(dev, "RX/TX not bundled\n");
2136 	}
2137 
2138 	error = bnx_dma_alloc(dev);
2139 	if (error)
2140 		goto fail;
2141 
2142 	/*
2143 	 * Allocate interrupt
2144 	 */
2145 	error = bnx_alloc_intr(sc);
2146 	if (error)
2147 		goto fail;
2148 
2149 	/* Setup serializers */
2150 	bnx_setup_serialize(sc);
2151 
2152 	/* Set default tuneable values. */
2153 	sc->bnx_rx_coal_ticks = BNX_RX_COAL_TICKS_DEF;
2154 	sc->bnx_tx_coal_ticks = BNX_TX_COAL_TICKS_DEF;
2155 	sc->bnx_rx_coal_bds = BNX_RX_COAL_BDS_DEF;
2156 	sc->bnx_rx_coal_bds_poll = sc->bnx_rx_ret_ring[0].bnx_rx_cntmax;
2157 	sc->bnx_tx_coal_bds = BNX_TX_COAL_BDS_DEF;
2158 	sc->bnx_tx_coal_bds_poll = BNX_TX_COAL_BDS_POLL_DEF;
2159 	sc->bnx_rx_coal_bds_int = BNX_RX_COAL_BDS_INT_DEF;
2160 	sc->bnx_tx_coal_bds_int = BNX_TX_COAL_BDS_INT_DEF;
2161 
2162 	/* Set up ifnet structure */
2163 	ifp->if_softc = sc;
2164 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2165 	ifp->if_ioctl = bnx_ioctl;
2166 	ifp->if_start = bnx_start;
2167 #ifdef IFPOLL_ENABLE
2168 	ifp->if_npoll = bnx_npoll;
2169 #endif
2170 	ifp->if_init = bnx_init;
2171 	ifp->if_serialize = bnx_serialize;
2172 	ifp->if_deserialize = bnx_deserialize;
2173 	ifp->if_tryserialize = bnx_tryserialize;
2174 #ifdef INVARIANTS
2175 	ifp->if_serialize_assert = bnx_serialize_assert;
2176 #endif
2177 	ifp->if_mtu = ETHERMTU;
2178 	ifp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2179 
2180 	ifp->if_capabilities |= IFCAP_HWCSUM;
2181 	ifp->if_hwassist = BNX_CSUM_FEATURES;
2182 	if (sc->bnx_flags & BNX_FLAG_TSO) {
2183 		ifp->if_capabilities |= IFCAP_TSO;
2184 		ifp->if_hwassist |= CSUM_TSO;
2185 	}
2186 	if (BNX_RSS_ENABLED(sc))
2187 		ifp->if_capabilities |= IFCAP_RSS;
2188 	ifp->if_capenable = ifp->if_capabilities;
2189 
2190 	ifp->if_nmbclusters = BGE_STD_RX_RING_CNT;
2191 
2192 	ifq_set_maxlen(&ifp->if_snd, BGE_TX_RING_CNT - 1);
2193 	ifq_set_ready(&ifp->if_snd);
2194 	ifq_set_subq_cnt(&ifp->if_snd, sc->bnx_tx_ringcnt);
2195 
2196 	if (sc->bnx_tx_ringcnt > 1) {
2197 		ifp->if_mapsubq = ifq_mapsubq_modulo;
2198 		ifq_set_subq_divisor(&ifp->if_snd, sc->bnx_tx_ringcnt);
2199 	}
2200 
2201 	/*
2202 	 * Figure out what sort of media we have by checking the
2203 	 * hardware config word in the first 32k of NIC internal memory,
2204 	 * or fall back to examining the EEPROM if necessary.
2205 	 * Note: on some BCM5700 cards, this value appears to be unset.
2206 	 * If that's the case, we have to rely on identifying the NIC
2207 	 * by its PCI subsystem ID, as we do below for the SysKonnect
2208 	 * SK-9D41.
2209 	 */
2210 	if (bnx_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) {
2211 		hwcfg = bnx_readmem_ind(sc, BGE_SRAM_DATA_CFG);
2212 	} else {
2213 		if (bnx_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET,
2214 				    sizeof(hwcfg))) {
2215 			device_printf(dev, "failed to read EEPROM\n");
2216 			error = ENXIO;
2217 			goto fail;
2218 		}
2219 		hwcfg = ntohl(hwcfg);
2220 	}
2221 
2222 	/* The SysKonnect SK-9D41 is a 1000baseSX card. */
2223 	if (pci_get_subvendor(dev) == PCI_PRODUCT_SCHNEIDERKOCH_SK_9D41 ||
2224 	    (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER)
2225 		sc->bnx_flags |= BNX_FLAG_TBI;
2226 
2227 	/* Setup MI MODE */
2228 	if (sc->bnx_flags & BNX_FLAG_CPMU)
2229 		sc->bnx_mi_mode = BGE_MIMODE_500KHZ_CONST;
2230 	else
2231 		sc->bnx_mi_mode = BGE_MIMODE_BASE;
2232 
2233 	/* Setup link status update stuffs */
2234 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2235 		sc->bnx_link_upd = bnx_tbi_link_upd;
2236 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2237 	} else if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
2238 		sc->bnx_link_upd = bnx_autopoll_link_upd;
2239 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2240 	} else {
2241 		sc->bnx_link_upd = bnx_copper_link_upd;
2242 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2243 	}
2244 
2245 	/* Set default PHY address */
2246 	sc->bnx_phyno = 1;
2247 
2248 	/*
2249 	 * PHY address mapping for various devices.
2250 	 *
2251 	 *          | F0 Cu | F0 Sr | F1 Cu | F1 Sr |
2252 	 * ---------+-------+-------+-------+-------+
2253 	 * BCM57XX  |   1   |   X   |   X   |   X   |
2254 	 * BCM5717  |   1   |   8   |   2   |   9   |
2255 	 * BCM5719  |   1   |   8   |   2   |   9   |
2256 	 * BCM5720  |   1   |   8   |   2   |   9   |
2257 	 *
2258 	 *          | F2 Cu | F2 Sr | F3 Cu | F3 Sr |
2259 	 * ---------+-------+-------+-------+-------+
2260 	 * BCM57XX  |   X   |   X   |   X   |   X   |
2261 	 * BCM5717  |   X   |   X   |   X   |   X   |
2262 	 * BCM5719  |   3   |   10  |   4   |   11  |
2263 	 * BCM5720  |   X   |   X   |   X   |   X   |
2264 	 *
2265 	 * Other addresses may respond but they are not
2266 	 * IEEE compliant PHYs and should be ignored.
2267 	 */
2268 	if (BNX_IS_5717_PLUS(sc)) {
2269 		if (sc->bnx_chipid == BGE_CHIPID_BCM5717_A0) {
2270 			if (CSR_READ_4(sc, BGE_SGDIG_STS) &
2271 			    BGE_SGDIGSTS_IS_SERDES)
2272 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2273 			else
2274 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2275 		} else {
2276 			if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) &
2277 			    BGE_CPMU_PHY_STRAP_IS_SERDES)
2278 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2279 			else
2280 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2281 		}
2282 	}
2283 
2284 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2285 		ifmedia_init(&sc->bnx_ifmedia, IFM_IMASK,
2286 		    bnx_ifmedia_upd, bnx_ifmedia_sts);
2287 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL);
2288 		ifmedia_add(&sc->bnx_ifmedia,
2289 		    IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL);
2290 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
2291 		ifmedia_set(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO);
2292 		sc->bnx_ifmedia.ifm_media = sc->bnx_ifmedia.ifm_cur->ifm_media;
2293 	} else {
2294 		struct mii_probe_args mii_args;
2295 
2296 		mii_probe_args_init(&mii_args, bnx_ifmedia_upd, bnx_ifmedia_sts);
2297 		mii_args.mii_probemask = 1 << sc->bnx_phyno;
2298 		mii_args.mii_capmask = capmask;
2299 		mii_args.mii_privtag = MII_PRIVTAG_BRGPHY;
2300 		mii_args.mii_priv = mii_priv;
2301 
2302 		error = mii_probe(dev, &sc->bnx_miibus, &mii_args);
2303 		if (error) {
2304 			device_printf(dev, "MII without any PHY!\n");
2305 			goto fail;
2306 		}
2307 	}
2308 
2309 	ctx = device_get_sysctl_ctx(sc->bnx_dev);
2310 	tree = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bnx_dev));
2311 
2312 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2313 	    "rx_rings", CTLFLAG_RD, &sc->bnx_rx_retcnt, 0, "# of RX rings");
2314 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2315 	    "tx_rings", CTLFLAG_RD, &sc->bnx_tx_ringcnt, 0, "# of TX rings");
2316 
2317 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_ticks",
2318 			CTLTYPE_INT | CTLFLAG_RW,
2319 			sc, 0, bnx_sysctl_rx_coal_ticks, "I",
2320 			"Receive coalescing ticks (usec).");
2321 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_ticks",
2322 			CTLTYPE_INT | CTLFLAG_RW,
2323 			sc, 0, bnx_sysctl_tx_coal_ticks, "I",
2324 			"Transmit coalescing ticks (usec).");
2325 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds",
2326 			CTLTYPE_INT | CTLFLAG_RW,
2327 			sc, 0, bnx_sysctl_rx_coal_bds, "I",
2328 			"Receive max coalesced BD count.");
2329 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds_poll",
2330 			CTLTYPE_INT | CTLFLAG_RW,
2331 			sc, 0, bnx_sysctl_rx_coal_bds_poll, "I",
2332 			"Receive max coalesced BD count in polling.");
2333 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds",
2334 			CTLTYPE_INT | CTLFLAG_RW,
2335 			sc, 0, bnx_sysctl_tx_coal_bds, "I",
2336 			"Transmit max coalesced BD count.");
2337 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds_poll",
2338 			CTLTYPE_INT | CTLFLAG_RW,
2339 			sc, 0, bnx_sysctl_tx_coal_bds_poll, "I",
2340 			"Transmit max coalesced BD count in polling.");
2341 	/*
2342 	 * A common design characteristic for many Broadcom
2343 	 * client controllers is that they only support a
2344 	 * single outstanding DMA read operation on the PCIe
2345 	 * bus. This means that it will take twice as long to
2346 	 * fetch a TX frame that is split into header and
2347 	 * payload buffers as it does to fetch a single,
2348 	 * contiguous TX frame (2 reads vs. 1 read). For these
2349 	 * controllers, coalescing buffers to reduce the number
2350 	 * of memory reads is effective way to get maximum
2351 	 * performance(about 940Mbps).  Without collapsing TX
2352 	 * buffers the maximum TCP bulk transfer performance
2353 	 * is about 850Mbps. However forcing coalescing mbufs
2354 	 * consumes a lot of CPU cycles, so leave it off by
2355 	 * default.
2356 	 */
2357 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2358 	    "force_defrag", CTLTYPE_INT | CTLFLAG_RW,
2359 	    sc, 0, bnx_sysctl_force_defrag, "I",
2360 	    "Force defragment on TX path");
2361 
2362 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2363 	    "tx_wreg", CTLTYPE_INT | CTLFLAG_RW,
2364 	    sc, 0, bnx_sysctl_tx_wreg, "I",
2365 	    "# of segments before writing to hardware register");
2366 
2367 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2368 	    "std_refill", CTLTYPE_INT | CTLFLAG_RW,
2369 	    sc, 0, bnx_sysctl_std_refill, "I",
2370 	    "# of packets received before scheduling standard refilling");
2371 
2372 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2373 	    "rx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2374 	    sc, 0, bnx_sysctl_rx_coal_bds_int, "I",
2375 	    "Receive max coalesced BD count during interrupt.");
2376 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2377 	    "tx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2378 	    sc, 0, bnx_sysctl_tx_coal_bds_int, "I",
2379 	    "Transmit max coalesced BD count during interrupt.");
2380 
2381 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSIX) {
2382 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_cpumap",
2383 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2384 		    sc->bnx_tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2385 		    "TX ring CPU map");
2386 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_cpumap",
2387 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2388 		    sc->bnx_rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2389 		    "RX ring CPU map");
2390 	} else {
2391 #ifdef IFPOLL_ENABLE
2392 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_poll_cpumap",
2393 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2394 		    sc->bnx_tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2395 		    "TX poll CPU map");
2396 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_poll_cpumap",
2397 		    CTLTYPE_OPAQUE | CTLFLAG_RD,
2398 		    sc->bnx_rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
2399 		    "RX poll CPU map");
2400 #endif
2401 	}
2402 
2403 #ifdef BNX_RSS_DEBUG
2404 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2405 	    "std_refill_mask", CTLFLAG_RD,
2406 	    &sc->bnx_rx_std_ring.bnx_rx_std_refill, 0, "");
2407 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2408 	    "std_used", CTLFLAG_RD,
2409 	    &sc->bnx_rx_std_ring.bnx_rx_std_used, 0, "");
2410 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2411 	    "rss_debug", CTLFLAG_RW, &sc->bnx_rss_debug, 0, "");
2412 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
2413 		ksnprintf(desc, sizeof(desc), "rx_pkt%d", i);
2414 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2415 		    desc, CTLFLAG_RW, &sc->bnx_rx_ret_ring[i].bnx_rx_pkt, "");
2416 
2417 		ksnprintf(desc, sizeof(desc), "rx_force_sched%d", i);
2418 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2419 		    desc, CTLFLAG_RW,
2420 		    &sc->bnx_rx_ret_ring[i].bnx_rx_force_sched, "");
2421 	}
2422 #endif
2423 #ifdef BNX_TSS_DEBUG
2424 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2425 		ksnprintf(desc, sizeof(desc), "tx_pkt%d", i);
2426 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2427 		    desc, CTLFLAG_RW, &sc->bnx_tx_ring[i].bnx_tx_pkt, "");
2428 	}
2429 #endif
2430 
2431 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2432 	    "norxbds", CTLFLAG_RW, &sc->bnx_norxbds, "");
2433 
2434 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2435 	    "errors", CTLFLAG_RW, &sc->bnx_errors, "");
2436 
2437 #ifdef BNX_TSO_DEBUG
2438 	for (i = 0; i < BNX_TSO_NSTATS; ++i) {
2439 		ksnprintf(desc, sizeof(desc), "tso%d", i + 1);
2440 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2441 		    desc, CTLFLAG_RW, &sc->bnx_tsosegs[i], "");
2442 	}
2443 #endif
2444 
2445 	/*
2446 	 * Call MI attach routine.
2447 	 */
2448 	ether_ifattach(ifp, ether_addr, NULL);
2449 
2450 	/* Setup TX rings and subqueues */
2451 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2452 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
2453 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
2454 
2455 		ifsq_set_cpuid(ifsq, txr->bnx_tx_cpuid);
2456 		ifsq_set_hw_serialize(ifsq, &txr->bnx_tx_serialize);
2457 		ifsq_set_priv(ifsq, txr);
2458 		txr->bnx_ifsq = ifsq;
2459 
2460 		ifsq_watchdog_init(&txr->bnx_tx_watchdog, ifsq,
2461 				   bnx_watchdog, 0);
2462 
2463 		if (bootverbose) {
2464 			device_printf(dev, "txr %d -> cpu%d\n", i,
2465 			    txr->bnx_tx_cpuid);
2466 		}
2467 	}
2468 
2469 	error = bnx_setup_intr(sc);
2470 	if (error) {
2471 		ether_ifdetach(ifp);
2472 		goto fail;
2473 	}
2474 	bnx_set_tick_cpuid(sc, FALSE);
2475 
2476 	/*
2477 	 * Create RX standard ring refilling thread
2478 	 */
2479 	std_cpuid_def = if_ringmap_cpumap(sc->bnx_rx_rmap, 0);
2480 	std_cpuid = device_getenv_int(dev, "std.cpuid", std_cpuid_def);
2481 	if (std_cpuid < 0 || std_cpuid >= ncpus) {
2482 		device_printf(dev, "invalid std.cpuid %d, use %d\n",
2483 		    std_cpuid, std_cpuid_def);
2484 		std_cpuid = std_cpuid_def;
2485 	}
2486 
2487 	std = &sc->bnx_rx_std_ring;
2488 	lwkt_create(bnx_rx_std_refill_ithread, std, &std->bnx_rx_std_ithread,
2489 	    NULL, TDF_NOSTART | TDF_INTTHREAD, std_cpuid,
2490 	    "%s std", device_get_nameunit(dev));
2491 	lwkt_setpri(std->bnx_rx_std_ithread, TDPRI_INT_MED);
2492 	std->bnx_rx_std_ithread->td_preemptable = lwkt_preempt;
2493 
2494 	return(0);
2495 fail:
2496 	bnx_detach(dev);
2497 	return(error);
2498 }
2499 
2500 static int
2501 bnx_detach(device_t dev)
2502 {
2503 	struct bnx_softc *sc = device_get_softc(dev);
2504 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
2505 
2506 	if (device_is_attached(dev)) {
2507 		struct ifnet *ifp = &sc->arpcom.ac_if;
2508 
2509 		ifnet_serialize_all(ifp);
2510 		bnx_stop(sc);
2511 		bnx_teardown_intr(sc, sc->bnx_intr_cnt);
2512 		ifnet_deserialize_all(ifp);
2513 
2514 		ether_ifdetach(ifp);
2515 	}
2516 
2517 	if (std->bnx_rx_std_ithread != NULL) {
2518 		tsleep_interlock(std, 0);
2519 
2520 		if (std->bnx_rx_std_ithread->td_gd == mycpu) {
2521 			bnx_rx_std_refill_stop(std);
2522 		} else {
2523 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
2524 			    bnx_rx_std_refill_stop, std);
2525 		}
2526 
2527 		tsleep(std, PINTERLOCKED, "bnx_detach", 0);
2528 		if (bootverbose)
2529 			device_printf(dev, "RX std ithread exited\n");
2530 
2531 		lwkt_synchronize_ipiqs("bnx_detach_ipiq");
2532 	}
2533 
2534 	if (sc->bnx_flags & BNX_FLAG_TBI)
2535 		ifmedia_removeall(&sc->bnx_ifmedia);
2536 	if (sc->bnx_miibus)
2537 		device_delete_child(dev, sc->bnx_miibus);
2538 	bus_generic_detach(dev);
2539 
2540 	bnx_free_intr(sc);
2541 
2542 	if (sc->bnx_msix_mem_res != NULL) {
2543 		bus_release_resource(dev, SYS_RES_MEMORY, sc->bnx_msix_mem_rid,
2544 		    sc->bnx_msix_mem_res);
2545 	}
2546 	if (sc->bnx_res != NULL) {
2547 		bus_release_resource(dev, SYS_RES_MEMORY,
2548 		    BGE_PCI_BAR0, sc->bnx_res);
2549 	}
2550 	if (sc->bnx_res2 != NULL) {
2551 		bus_release_resource(dev, SYS_RES_MEMORY,
2552 		    PCIR_BAR(2), sc->bnx_res2);
2553 	}
2554 
2555 	bnx_dma_free(sc);
2556 
2557 	if (sc->bnx_serialize != NULL)
2558 		kfree(sc->bnx_serialize, M_DEVBUF);
2559 
2560 	if (sc->bnx_rx_rmap != NULL)
2561 		if_ringmap_free(sc->bnx_rx_rmap);
2562 	if (sc->bnx_tx_rmap != NULL)
2563 		if_ringmap_free(sc->bnx_tx_rmap);
2564 
2565 	return 0;
2566 }
2567 
2568 static void
2569 bnx_reset(struct bnx_softc *sc)
2570 {
2571 	device_t dev = sc->bnx_dev;
2572 	uint32_t cachesize, command, reset, mac_mode, mac_mode_mask;
2573 	void (*write_op)(struct bnx_softc *, uint32_t, uint32_t);
2574 	int i, val = 0;
2575 	uint16_t devctl;
2576 
2577 	mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE;
2578 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
2579 		mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
2580 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask;
2581 
2582 	write_op = bnx_writemem_direct;
2583 
2584 	CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1);
2585 	for (i = 0; i < 8000; i++) {
2586 		if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1)
2587 			break;
2588 		DELAY(20);
2589 	}
2590 	if (i == 8000)
2591 		if_printf(&sc->arpcom.ac_if, "NVRAM lock timedout!\n");
2592 
2593 	/* Take APE lock when performing reset. */
2594 	bnx_ape_lock(sc, BGE_APE_LOCK_GRC);
2595 
2596 	/* Save some important PCI state. */
2597 	cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4);
2598 	command = pci_read_config(dev, BGE_PCI_CMD, 4);
2599 
2600 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2601 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2602 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2603 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2604 
2605 	/* Disable fastboot on controllers that support it. */
2606 	if (bootverbose)
2607 		if_printf(&sc->arpcom.ac_if, "Disabling fastboot\n");
2608 	CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0);
2609 
2610 	/*
2611 	 * Write the magic number to SRAM at offset 0xB50.
2612 	 * When firmware finishes its initialization it will
2613 	 * write ~BGE_SRAM_FW_MB_MAGIC to the same location.
2614 	 */
2615 	bnx_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC);
2616 
2617 	reset = BGE_MISCCFG_RESET_CORE_CLOCKS|(65<<1);
2618 
2619 	/* XXX: Broadcom Linux driver. */
2620 	/* Force PCI-E 1.0a mode */
2621 	if (!BNX_IS_57765_PLUS(sc) &&
2622 	    CSR_READ_4(sc, BGE_PCIE_PHY_TSTCTL) ==
2623 	    (BGE_PCIE_PHY_TSTCTL_PSCRAM |
2624 	     BGE_PCIE_PHY_TSTCTL_PCIE10)) {
2625 		CSR_WRITE_4(sc, BGE_PCIE_PHY_TSTCTL,
2626 		    BGE_PCIE_PHY_TSTCTL_PSCRAM);
2627 	}
2628 	if (sc->bnx_chipid != BGE_CHIPID_BCM5750_A0) {
2629 		/* Prevent PCIE link training during global reset */
2630 		CSR_WRITE_4(sc, BGE_MISC_CFG, (1<<29));
2631 		reset |= (1<<29);
2632 	}
2633 
2634 	/*
2635 	 * Set the clock to the highest frequency to avoid timeout.
2636 	 */
2637 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717) {
2638 		BNX_SETBIT(sc, BGE_CPMU_CLCK_ORIDE_ENABLE,
2639 		    BGE_CPMU_MAC_ORIDE_ENABLE);
2640 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2641 	    sc->bnx_asicrev ==  BGE_ASICREV_BCM5720) {
2642 		BNX_SETBIT(sc, BGE_CPMU_CLCK_ORIDE,
2643 		    CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
2644 	}
2645 
2646 	/*
2647 	 * Set GPHY Power Down Override to leave GPHY
2648 	 * powered up in D0 uninitialized.
2649 	 */
2650 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0)
2651 		reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE;
2652 
2653 	/* Issue global reset */
2654 	write_op(sc, BGE_MISC_CFG, reset);
2655 
2656 	DELAY(100 * 1000);
2657 
2658 	/* XXX: Broadcom Linux driver. */
2659 	if (sc->bnx_chipid == BGE_CHIPID_BCM5750_A0) {
2660 		uint32_t v;
2661 
2662 		DELAY(500000); /* wait for link training to complete */
2663 		v = pci_read_config(dev, 0xc4, 4);
2664 		pci_write_config(dev, 0xc4, v | (1<<15), 4);
2665 	}
2666 
2667 	devctl = pci_read_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL, 2);
2668 
2669 	/* Disable no snoop and disable relaxed ordering. */
2670 	devctl &= ~(PCIEM_DEVCTL_RELAX_ORDER | PCIEM_DEVCTL_NOSNOOP);
2671 
2672 	/* Old PCI-E chips only support 128 bytes Max PayLoad Size. */
2673 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0) {
2674 		devctl &= ~PCIEM_DEVCTL_MAX_PAYLOAD_MASK;
2675 		devctl |= PCIEM_DEVCTL_MAX_PAYLOAD_128;
2676 	}
2677 
2678 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL,
2679 	    devctl, 2);
2680 
2681 	/* Clear error status. */
2682 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVSTS,
2683 	    PCIEM_DEVSTS_CORR_ERR |
2684 	    PCIEM_DEVSTS_NFATAL_ERR |
2685 	    PCIEM_DEVSTS_FATAL_ERR |
2686 	    PCIEM_DEVSTS_UNSUPP_REQ, 2);
2687 
2688 	/* Reset some of the PCI state that got zapped by reset */
2689 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2690 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2691 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2692 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2693 	val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE;
2694 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE) {
2695 		val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2696 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2697 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2698 	}
2699 	pci_write_config(dev, BGE_PCI_PCISTATE, val, 4);
2700 	pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4);
2701 	pci_write_config(dev, BGE_PCI_CMD, command, 4);
2702 
2703 	/* Enable memory arbiter */
2704 	CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE);
2705 
2706 	/* Fix up byte swapping */
2707 	CSR_WRITE_4(sc, BGE_MODE_CTL, bnx_dma_swap_options(sc));
2708 
2709 	val = CSR_READ_4(sc, BGE_MAC_MODE);
2710 	val = (val & ~mac_mode_mask) | mac_mode;
2711 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
2712 	DELAY(40);
2713 
2714 	bnx_ape_unlock(sc, BGE_APE_LOCK_GRC);
2715 
2716 	/*
2717 	 * Poll until we see the 1's complement of the magic number.
2718 	 * This indicates that the firmware initialization is complete.
2719 	 */
2720 	for (i = 0; i < BNX_FIRMWARE_TIMEOUT; i++) {
2721 		val = bnx_readmem_ind(sc, BGE_SRAM_FW_MB);
2722 		if (val == ~BGE_SRAM_FW_MB_MAGIC)
2723 			break;
2724 		DELAY(10);
2725 	}
2726 	if (i == BNX_FIRMWARE_TIMEOUT) {
2727 		if_printf(&sc->arpcom.ac_if, "firmware handshake "
2728 			  "timed out, found 0x%08x\n", val);
2729 	}
2730 
2731 	/* BCM57765 A0 needs additional time before accessing. */
2732 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
2733 		DELAY(10 * 1000);
2734 
2735 	/*
2736 	 * The 5704 in TBI mode apparently needs some special
2737 	 * adjustment to insure the SERDES drive level is set
2738 	 * to 1.2V.
2739 	 */
2740 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5704 &&
2741 	    (sc->bnx_flags & BNX_FLAG_TBI)) {
2742 		uint32_t serdescfg;
2743 
2744 		serdescfg = CSR_READ_4(sc, BGE_SERDES_CFG);
2745 		serdescfg = (serdescfg & ~0xFFF) | 0x880;
2746 		CSR_WRITE_4(sc, BGE_SERDES_CFG, serdescfg);
2747 	}
2748 
2749 	CSR_WRITE_4(sc, BGE_MI_MODE,
2750 	    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
2751 	DELAY(80);
2752 
2753 	/* XXX: Broadcom Linux driver. */
2754 	if (!BNX_IS_57765_PLUS(sc)) {
2755 		uint32_t v;
2756 
2757 		/* Enable Data FIFO protection. */
2758 		v = CSR_READ_4(sc, BGE_PCIE_TLDLPL_PORT);
2759 		CSR_WRITE_4(sc, BGE_PCIE_TLDLPL_PORT, v | (1 << 25));
2760 	}
2761 
2762 	DELAY(10000);
2763 
2764 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717) {
2765 		BNX_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE_ENABLE,
2766 		    BGE_CPMU_MAC_ORIDE_ENABLE);
2767 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2768 	    sc->bnx_asicrev ==  BGE_ASICREV_BCM5720) {
2769 		BNX_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE,
2770 		    CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
2771 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
2772 		/*
2773 		 * Increase the core clock speed to fix TX timeout for
2774 		 * 5762 on 100Mbps link.
2775 		 */
2776 		BNX_SETBIT(sc, BGE_CPMU_CLCK_ORIDE_ENABLE,
2777 		    BGE_CPMU_MAC_ORIDE_ENABLE);
2778 	}
2779 }
2780 
2781 /*
2782  * Frame reception handling. This is called if there's a frame
2783  * on the receive return list.
2784  *
2785  * Note: we have to be able to handle two possibilities here:
2786  * 1) the frame is from the jumbo recieve ring
2787  * 2) the frame is from the standard receive ring
2788  */
2789 
2790 static void
2791 bnx_rxeof(struct bnx_rx_ret_ring *ret, uint16_t rx_prod, int count)
2792 {
2793 	struct bnx_softc *sc = ret->bnx_sc;
2794 	struct bnx_rx_std_ring *std = ret->bnx_std;
2795 	struct ifnet *ifp = &sc->arpcom.ac_if;
2796 	int std_used = 0, cpuid = mycpuid;
2797 
2798 	while (ret->bnx_rx_saved_considx != rx_prod && count != 0) {
2799 		struct pktinfo pi0, *pi = NULL;
2800 		struct bge_rx_bd *cur_rx;
2801 		struct bnx_rx_buf *rb;
2802 		uint32_t rxidx;
2803 		struct mbuf *m = NULL;
2804 		uint16_t vlan_tag = 0;
2805 		int have_tag = 0;
2806 
2807 		--count;
2808 
2809 		cur_rx = &ret->bnx_rx_ret_ring[ret->bnx_rx_saved_considx];
2810 
2811 		rxidx = cur_rx->bge_idx;
2812 		KKASSERT(rxidx < BGE_STD_RX_RING_CNT);
2813 
2814 		BNX_INC(ret->bnx_rx_saved_considx, BNX_RETURN_RING_CNT);
2815 #ifdef BNX_RSS_DEBUG
2816 		ret->bnx_rx_pkt++;
2817 #endif
2818 
2819 		if (cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) {
2820 			have_tag = 1;
2821 			vlan_tag = cur_rx->bge_vlan_tag;
2822 		}
2823 
2824 		if (ret->bnx_rx_cnt >= ret->bnx_rx_cntmax) {
2825 			atomic_add_int(&std->bnx_rx_std_used, std_used);
2826 			std_used = 0;
2827 
2828 			bnx_rx_std_refill_sched(ret, std);
2829 		}
2830 		ret->bnx_rx_cnt++;
2831 		++std_used;
2832 
2833 		rb = &std->bnx_rx_std_buf[rxidx];
2834 		m = rb->bnx_rx_mbuf;
2835 		if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
2836 			IFNET_STAT_INC(ifp, ierrors, 1);
2837 			cpu_sfence();
2838 			rb->bnx_rx_refilled = 1;
2839 			continue;
2840 		}
2841 		if (bnx_newbuf_std(ret, rxidx, 0)) {
2842 			IFNET_STAT_INC(ifp, ierrors, 1);
2843 			continue;
2844 		}
2845 
2846 		IFNET_STAT_INC(ifp, ipackets, 1);
2847 		m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
2848 		m->m_pkthdr.rcvif = ifp;
2849 
2850 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2851 		    (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) {
2852 			if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) {
2853 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2854 				if ((cur_rx->bge_error_flag &
2855 				    BGE_RXERRFLAG_IP_CSUM_NOK) == 0)
2856 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2857 			}
2858 			if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) {
2859 				m->m_pkthdr.csum_data =
2860 				    cur_rx->bge_tcp_udp_csum;
2861 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2862 				    CSUM_PSEUDO_HDR;
2863 			}
2864 		}
2865 		if (ifp->if_capenable & IFCAP_RSS) {
2866 			pi = bnx_rss_info(&pi0, cur_rx);
2867 			if (pi != NULL &&
2868 			    (cur_rx->bge_flags & BGE_RXBDFLAG_RSS_HASH))
2869 				m_sethash(m, toeplitz_hash(cur_rx->bge_hash));
2870 		}
2871 
2872 		/*
2873 		 * If we received a packet with a vlan tag, pass it
2874 		 * to vlan_input() instead of ether_input().
2875 		 */
2876 		if (have_tag) {
2877 			m->m_flags |= M_VLANTAG;
2878 			m->m_pkthdr.ether_vlantag = vlan_tag;
2879 		}
2880 		ifp->if_input(ifp, m, pi, cpuid);
2881 	}
2882 	bnx_writembx(sc, ret->bnx_rx_mbx, ret->bnx_rx_saved_considx);
2883 
2884 	if (std_used > 0) {
2885 		int cur_std_used;
2886 
2887 		cur_std_used = atomic_fetchadd_int(&std->bnx_rx_std_used,
2888 		    std_used);
2889 		if (cur_std_used + std_used >= (BGE_STD_RX_RING_CNT / 2)) {
2890 #ifdef BNX_RSS_DEBUG
2891 			ret->bnx_rx_force_sched++;
2892 #endif
2893 			bnx_rx_std_refill_sched(ret, std);
2894 		}
2895 	}
2896 }
2897 
2898 static void
2899 bnx_txeof(struct bnx_tx_ring *txr, uint16_t tx_cons)
2900 {
2901 	struct ifnet *ifp = &txr->bnx_sc->arpcom.ac_if;
2902 
2903 	/*
2904 	 * Go through our tx ring and free mbufs for those
2905 	 * frames that have been sent.
2906 	 */
2907 	while (txr->bnx_tx_saved_considx != tx_cons) {
2908 		struct bnx_tx_buf *buf;
2909 		uint32_t idx = 0;
2910 
2911 		idx = txr->bnx_tx_saved_considx;
2912 		buf = &txr->bnx_tx_buf[idx];
2913 		if (buf->bnx_tx_mbuf != NULL) {
2914 			IFNET_STAT_INC(ifp, opackets, 1);
2915 #ifdef BNX_TSS_DEBUG
2916 			txr->bnx_tx_pkt++;
2917 #endif
2918 			bus_dmamap_unload(txr->bnx_tx_mtag,
2919 			    buf->bnx_tx_dmamap);
2920 			m_freem(buf->bnx_tx_mbuf);
2921 			buf->bnx_tx_mbuf = NULL;
2922 		}
2923 		txr->bnx_tx_cnt--;
2924 		BNX_INC(txr->bnx_tx_saved_considx, BGE_TX_RING_CNT);
2925 	}
2926 
2927 	if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) >=
2928 	    (BNX_NSEG_RSVD + BNX_NSEG_SPARE))
2929 		ifsq_clr_oactive(txr->bnx_ifsq);
2930 
2931 	if (txr->bnx_tx_cnt == 0)
2932 		ifsq_watchdog_set_count(&txr->bnx_tx_watchdog, 0);
2933 
2934 	if (!ifsq_is_empty(txr->bnx_ifsq))
2935 		ifsq_devstart(txr->bnx_ifsq);
2936 }
2937 
2938 static int
2939 bnx_handle_status(struct bnx_softc *sc)
2940 {
2941 	uint32_t status;
2942 	int handle = 0;
2943 
2944 	status = *sc->bnx_hw_status;
2945 
2946 	if (status & BGE_STATFLAG_ERROR) {
2947 		uint32_t val;
2948 		int reset = 0;
2949 
2950 		sc->bnx_errors++;
2951 
2952 		val = CSR_READ_4(sc, BGE_FLOW_ATTN);
2953 		if (val & ~BGE_FLOWATTN_MB_LOWAT) {
2954 			if_printf(&sc->arpcom.ac_if,
2955 			    "flow attn 0x%08x\n", val);
2956 			reset = 1;
2957 		}
2958 
2959 		val = CSR_READ_4(sc, BGE_MSI_STATUS);
2960 		if (val & ~BGE_MSISTAT_MSI_PCI_REQ) {
2961 			if_printf(&sc->arpcom.ac_if,
2962 			    "msi status 0x%08x\n", val);
2963 			reset = 1;
2964 		}
2965 
2966 		val = CSR_READ_4(sc, BGE_RDMA_STATUS);
2967 		if (val) {
2968 			if_printf(&sc->arpcom.ac_if,
2969 			    "rmda status 0x%08x\n", val);
2970 			reset = 1;
2971 		}
2972 
2973 		val = CSR_READ_4(sc, BGE_WDMA_STATUS);
2974 		if (val) {
2975 			if_printf(&sc->arpcom.ac_if,
2976 			    "wdma status 0x%08x\n", val);
2977 			reset = 1;
2978 		}
2979 
2980 		if (reset) {
2981 			bnx_serialize_skipmain(sc);
2982 			bnx_init(sc);
2983 			bnx_deserialize_skipmain(sc);
2984 		}
2985 		handle = 1;
2986 	}
2987 
2988 	if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) || sc->bnx_link_evt) {
2989 		if (bootverbose) {
2990 			if_printf(&sc->arpcom.ac_if, "link change, "
2991 			    "link_evt %d\n", sc->bnx_link_evt);
2992 		}
2993 		bnx_link_poll(sc);
2994 		handle = 1;
2995 	}
2996 
2997 	return handle;
2998 }
2999 
3000 #ifdef IFPOLL_ENABLE
3001 
3002 static void
3003 bnx_npoll_rx(struct ifnet *ifp __unused, void *xret, int cycle)
3004 {
3005 	struct bnx_rx_ret_ring *ret = xret;
3006 	uint16_t rx_prod;
3007 
3008 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3009 
3010 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3011 	cpu_lfence();
3012 
3013 	rx_prod = *ret->bnx_rx_considx;
3014 	if (ret->bnx_rx_saved_considx != rx_prod)
3015 		bnx_rxeof(ret, rx_prod, cycle);
3016 }
3017 
3018 static void
3019 bnx_npoll_tx_notag(struct ifnet *ifp __unused, void *xtxr, int cycle __unused)
3020 {
3021 	struct bnx_tx_ring *txr = xtxr;
3022 	uint16_t tx_cons;
3023 
3024 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3025 
3026 	tx_cons = *txr->bnx_tx_considx;
3027 	if (txr->bnx_tx_saved_considx != tx_cons)
3028 		bnx_txeof(txr, tx_cons);
3029 }
3030 
3031 static void
3032 bnx_npoll_tx(struct ifnet *ifp, void *xtxr, int cycle)
3033 {
3034 	struct bnx_tx_ring *txr = xtxr;
3035 
3036 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3037 
3038 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3039 	cpu_lfence();
3040 	bnx_npoll_tx_notag(ifp, txr, cycle);
3041 }
3042 
3043 static void
3044 bnx_npoll_status_notag(struct ifnet *ifp)
3045 {
3046 	struct bnx_softc *sc = ifp->if_softc;
3047 
3048 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3049 
3050 	if (bnx_handle_status(sc)) {
3051 		/*
3052 		 * Status changes are handled; force the chip to
3053 		 * update the status block to reflect whether there
3054 		 * are more status changes or not, else staled status
3055 		 * changes are always seen.
3056 		 */
3057 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3058 	}
3059 }
3060 
3061 static void
3062 bnx_npoll_status(struct ifnet *ifp)
3063 {
3064 	struct bnx_softc *sc = ifp->if_softc;
3065 
3066 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3067 
3068 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3069 	cpu_lfence();
3070 	bnx_npoll_status_notag(ifp);
3071 }
3072 
3073 static void
3074 bnx_npoll(struct ifnet *ifp, struct ifpoll_info *info)
3075 {
3076 	struct bnx_softc *sc = ifp->if_softc;
3077 	int i;
3078 
3079 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3080 
3081 	if (info != NULL) {
3082 		if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG)
3083 			info->ifpi_status.status_func = bnx_npoll_status;
3084 		else
3085 			info->ifpi_status.status_func = bnx_npoll_status_notag;
3086 		info->ifpi_status.serializer = &sc->bnx_main_serialize;
3087 
3088 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3089 			struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3090 			int cpu = if_ringmap_cpumap(sc->bnx_tx_rmap, i);
3091 
3092 			KKASSERT(cpu < netisr_ncpus);
3093 			if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
3094 				info->ifpi_tx[cpu].poll_func =
3095 				    bnx_npoll_tx_notag;
3096 			} else {
3097 				info->ifpi_tx[cpu].poll_func = bnx_npoll_tx;
3098 			}
3099 			info->ifpi_tx[cpu].arg = txr;
3100 			info->ifpi_tx[cpu].serializer = &txr->bnx_tx_serialize;
3101 			ifsq_set_cpuid(txr->bnx_ifsq, cpu);
3102 		}
3103 
3104 		for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3105 			struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3106 			int cpu = if_ringmap_cpumap(sc->bnx_rx_rmap, i);
3107 
3108 			KKASSERT(cpu < netisr_ncpus);
3109 			info->ifpi_rx[cpu].poll_func = bnx_npoll_rx;
3110 			info->ifpi_rx[cpu].arg = ret;
3111 			info->ifpi_rx[cpu].serializer =
3112 			    &ret->bnx_rx_ret_serialize;
3113 		}
3114 
3115 		if (ifp->if_flags & IFF_RUNNING) {
3116 			bnx_disable_intr(sc);
3117 			bnx_set_tick_cpuid(sc, TRUE);
3118 
3119 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3120 			    BNX_RX_COAL_BDS_CHG;
3121 			bnx_coal_change(sc);
3122 		}
3123 	} else {
3124 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3125 			ifsq_set_cpuid(sc->bnx_tx_ring[i].bnx_ifsq,
3126 			    sc->bnx_tx_ring[i].bnx_tx_cpuid);
3127 		}
3128 		if (ifp->if_flags & IFF_RUNNING) {
3129 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3130 			    BNX_RX_COAL_BDS_CHG;
3131 			bnx_coal_change(sc);
3132 
3133 			bnx_enable_intr(sc);
3134 			bnx_set_tick_cpuid(sc, FALSE);
3135 		}
3136 	}
3137 }
3138 
3139 #endif	/* IFPOLL_ENABLE */
3140 
3141 static void
3142 bnx_intr_legacy(void *xsc)
3143 {
3144 	struct bnx_softc *sc = xsc;
3145 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3146 
3147 	if (ret->bnx_saved_status_tag == *ret->bnx_hw_status_tag) {
3148 		uint32_t val;
3149 
3150 		val = pci_read_config(sc->bnx_dev, BGE_PCI_PCISTATE, 4);
3151 		if (val & BGE_PCISTAT_INTR_NOTACT)
3152 			return;
3153 	}
3154 
3155 	/*
3156 	 * NOTE:
3157 	 * Interrupt will have to be disabled if tagged status
3158 	 * is used, else interrupt will always be asserted on
3159 	 * certain chips (at least on BCM5750 AX/BX).
3160 	 */
3161 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, 1);
3162 
3163 	bnx_intr(sc);
3164 }
3165 
3166 static void
3167 bnx_msi(void *xsc)
3168 {
3169 	bnx_intr(xsc);
3170 }
3171 
3172 static void
3173 bnx_intr(struct bnx_softc *sc)
3174 {
3175 	struct ifnet *ifp = &sc->arpcom.ac_if;
3176 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3177 
3178 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3179 
3180 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3181 	/*
3182 	 * Use a load fence to ensure that status_tag is saved
3183 	 * before rx_prod, tx_cons and status.
3184 	 */
3185 	cpu_lfence();
3186 
3187 	bnx_handle_status(sc);
3188 
3189 	if (ifp->if_flags & IFF_RUNNING) {
3190 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
3191 		uint16_t rx_prod, tx_cons;
3192 
3193 		lwkt_serialize_enter(&ret->bnx_rx_ret_serialize);
3194 		rx_prod = *ret->bnx_rx_considx;
3195 		if (ret->bnx_rx_saved_considx != rx_prod)
3196 			bnx_rxeof(ret, rx_prod, -1);
3197 		lwkt_serialize_exit(&ret->bnx_rx_ret_serialize);
3198 
3199 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3200 		tx_cons = *txr->bnx_tx_considx;
3201 		if (txr->bnx_tx_saved_considx != tx_cons)
3202 			bnx_txeof(txr, tx_cons);
3203 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3204 	}
3205 
3206 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, ret->bnx_saved_status_tag << 24);
3207 }
3208 
3209 static void
3210 bnx_msix_tx_status(void *xtxr)
3211 {
3212 	struct bnx_tx_ring *txr = xtxr;
3213 	struct bnx_softc *sc = txr->bnx_sc;
3214 	struct ifnet *ifp = &sc->arpcom.ac_if;
3215 
3216 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3217 
3218 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3219 	/*
3220 	 * Use a load fence to ensure that status_tag is saved
3221 	 * before tx_cons and status.
3222 	 */
3223 	cpu_lfence();
3224 
3225 	bnx_handle_status(sc);
3226 
3227 	if (ifp->if_flags & IFF_RUNNING) {
3228 		uint16_t tx_cons;
3229 
3230 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3231 		tx_cons = *txr->bnx_tx_considx;
3232 		if (txr->bnx_tx_saved_considx != tx_cons)
3233 			bnx_txeof(txr, tx_cons);
3234 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3235 	}
3236 
3237 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, txr->bnx_saved_status_tag << 24);
3238 }
3239 
3240 static void
3241 bnx_msix_rx(void *xret)
3242 {
3243 	struct bnx_rx_ret_ring *ret = xret;
3244 	uint16_t rx_prod;
3245 
3246 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3247 
3248 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3249 	/*
3250 	 * Use a load fence to ensure that status_tag is saved
3251 	 * before rx_prod.
3252 	 */
3253 	cpu_lfence();
3254 
3255 	rx_prod = *ret->bnx_rx_considx;
3256 	if (ret->bnx_rx_saved_considx != rx_prod)
3257 		bnx_rxeof(ret, rx_prod, -1);
3258 
3259 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3260 	    ret->bnx_saved_status_tag << 24);
3261 }
3262 
3263 static void
3264 bnx_msix_rxtx(void *xret)
3265 {
3266 	struct bnx_rx_ret_ring *ret = xret;
3267 	struct bnx_tx_ring *txr = ret->bnx_txr;
3268 	uint16_t rx_prod, tx_cons;
3269 
3270 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3271 
3272 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3273 	/*
3274 	 * Use a load fence to ensure that status_tag is saved
3275 	 * before rx_prod and tx_cons.
3276 	 */
3277 	cpu_lfence();
3278 
3279 	rx_prod = *ret->bnx_rx_considx;
3280 	if (ret->bnx_rx_saved_considx != rx_prod)
3281 		bnx_rxeof(ret, rx_prod, -1);
3282 
3283 	lwkt_serialize_enter(&txr->bnx_tx_serialize);
3284 	tx_cons = *txr->bnx_tx_considx;
3285 	if (txr->bnx_tx_saved_considx != tx_cons)
3286 		bnx_txeof(txr, tx_cons);
3287 	lwkt_serialize_exit(&txr->bnx_tx_serialize);
3288 
3289 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3290 	    ret->bnx_saved_status_tag << 24);
3291 }
3292 
3293 static void
3294 bnx_msix_status(void *xsc)
3295 {
3296 	struct bnx_softc *sc = xsc;
3297 
3298 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3299 
3300 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3301 	/*
3302 	 * Use a load fence to ensure that status_tag is saved
3303 	 * before status.
3304 	 */
3305 	cpu_lfence();
3306 
3307 	bnx_handle_status(sc);
3308 
3309 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, sc->bnx_saved_status_tag << 24);
3310 }
3311 
3312 static void
3313 bnx_tick(void *xsc)
3314 {
3315 	struct bnx_softc *sc = xsc;
3316 
3317 	lwkt_serialize_enter(&sc->bnx_main_serialize);
3318 
3319 	bnx_stats_update_regs(sc);
3320 
3321 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3322 		/*
3323 		 * Since in TBI mode auto-polling can't be used we should poll
3324 		 * link status manually. Here we register pending link event
3325 		 * and trigger interrupt.
3326 		 */
3327 		sc->bnx_link_evt++;
3328 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3329 	} else if (!sc->bnx_link) {
3330 		mii_tick(device_get_softc(sc->bnx_miibus));
3331 	}
3332 
3333 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3334 	    sc->bnx_tick_cpuid);
3335 
3336 	lwkt_serialize_exit(&sc->bnx_main_serialize);
3337 }
3338 
3339 static void
3340 bnx_stats_update_regs(struct bnx_softc *sc)
3341 {
3342 	struct ifnet *ifp = &sc->arpcom.ac_if;
3343 	struct bge_mac_stats_regs stats;
3344 	uint32_t *s, val;
3345 	int i;
3346 
3347 	s = (uint32_t *)&stats;
3348 	for (i = 0; i < sizeof(struct bge_mac_stats_regs); i += 4) {
3349 		*s = CSR_READ_4(sc, BGE_RX_STATS + i);
3350 		s++;
3351 	}
3352 
3353 	IFNET_STAT_SET(ifp, collisions,
3354 	   (stats.dot3StatsSingleCollisionFrames +
3355 	   stats.dot3StatsMultipleCollisionFrames +
3356 	   stats.dot3StatsExcessiveCollisions +
3357 	   stats.dot3StatsLateCollisions));
3358 
3359 	val = CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS);
3360 	sc->bnx_norxbds += val;
3361 
3362 	if (sc->bnx_rdma_wa != 0) {
3363 		if (stats.ifHCOutUcastPkts + stats.ifHCOutMulticastPkts +
3364 		    stats.ifHCOutBroadcastPkts > BGE_RDMA_NCHAN) {
3365 			CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
3366 			    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) &
3367 			    ~sc->bnx_rdma_wa);
3368 			sc->bnx_rdma_wa = 0;
3369 			if (bootverbose)
3370 				if_printf(ifp, "disable RDMA WA\n");
3371 		}
3372 	}
3373 }
3374 
3375 /*
3376  * Encapsulate an mbuf chain in the tx ring  by coupling the mbuf data
3377  * pointers to descriptors.
3378  */
3379 static int
3380 bnx_encap(struct bnx_tx_ring *txr, struct mbuf **m_head0, uint32_t *txidx,
3381     int *segs_used)
3382 {
3383 	struct bge_tx_bd *d = NULL;
3384 	uint16_t csum_flags = 0, vlan_tag = 0, mss = 0;
3385 	bus_dma_segment_t segs[BNX_NSEG_NEW];
3386 	bus_dmamap_t map;
3387 	int error, maxsegs, nsegs, idx, i;
3388 	struct mbuf *m_head = *m_head0, *m_new;
3389 
3390 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
3391 #ifdef BNX_TSO_DEBUG
3392 		int tso_nsegs;
3393 #endif
3394 
3395 		error = bnx_setup_tso(txr, m_head0, &mss, &csum_flags);
3396 		if (error)
3397 			return error;
3398 		m_head = *m_head0;
3399 
3400 #ifdef BNX_TSO_DEBUG
3401 		tso_nsegs = (m_head->m_pkthdr.len /
3402 		    m_head->m_pkthdr.tso_segsz) - 1;
3403 		if (tso_nsegs > (BNX_TSO_NSTATS - 1))
3404 			tso_nsegs = BNX_TSO_NSTATS - 1;
3405 		else if (tso_nsegs < 0)
3406 			tso_nsegs = 0;
3407 		txr->bnx_sc->bnx_tsosegs[tso_nsegs]++;
3408 #endif
3409 	} else if (m_head->m_pkthdr.csum_flags & BNX_CSUM_FEATURES) {
3410 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
3411 			csum_flags |= BGE_TXBDFLAG_IP_CSUM;
3412 		if (m_head->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
3413 			csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM;
3414 		if (m_head->m_flags & M_LASTFRAG)
3415 			csum_flags |= BGE_TXBDFLAG_IP_FRAG_END;
3416 		else if (m_head->m_flags & M_FRAG)
3417 			csum_flags |= BGE_TXBDFLAG_IP_FRAG;
3418 	}
3419 	if (m_head->m_flags & M_VLANTAG) {
3420 		csum_flags |= BGE_TXBDFLAG_VLAN_TAG;
3421 		vlan_tag = m_head->m_pkthdr.ether_vlantag;
3422 	}
3423 
3424 	idx = *txidx;
3425 	map = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3426 
3427 	maxsegs = (BGE_TX_RING_CNT - txr->bnx_tx_cnt) - BNX_NSEG_RSVD;
3428 	KASSERT(maxsegs >= BNX_NSEG_SPARE,
3429 		("not enough segments %d", maxsegs));
3430 
3431 	if (maxsegs > BNX_NSEG_NEW)
3432 		maxsegs = BNX_NSEG_NEW;
3433 
3434 	/*
3435 	 * Pad outbound frame to BNX_MIN_FRAMELEN for an unusual reason.
3436 	 * The bge hardware will pad out Tx runts to BNX_MIN_FRAMELEN,
3437 	 * but when such padded frames employ the bge IP/TCP checksum
3438 	 * offload, the hardware checksum assist gives incorrect results
3439 	 * (possibly from incorporating its own padding into the UDP/TCP
3440 	 * checksum; who knows).  If we pad such runts with zeros, the
3441 	 * onboard checksum comes out correct.
3442 	 */
3443 	if ((csum_flags & BGE_TXBDFLAG_TCP_UDP_CSUM) &&
3444 	    m_head->m_pkthdr.len < BNX_MIN_FRAMELEN) {
3445 		error = m_devpad(m_head, BNX_MIN_FRAMELEN);
3446 		if (error)
3447 			goto back;
3448 	}
3449 
3450 	if ((txr->bnx_tx_flags & BNX_TX_FLAG_SHORTDMA) &&
3451 	    m_head->m_next != NULL) {
3452 		m_new = bnx_defrag_shortdma(m_head);
3453 		if (m_new == NULL) {
3454 			error = ENOBUFS;
3455 			goto back;
3456 		}
3457 		*m_head0 = m_head = m_new;
3458 	}
3459 	if ((m_head->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
3460 	    (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG) &&
3461 	    m_head->m_next != NULL) {
3462 		/*
3463 		 * Forcefully defragment mbuf chain to overcome hardware
3464 		 * limitation which only support a single outstanding
3465 		 * DMA read operation.  If it fails, keep moving on using
3466 		 * the original mbuf chain.
3467 		 */
3468 		m_new = m_defrag(m_head, M_NOWAIT);
3469 		if (m_new != NULL)
3470 			*m_head0 = m_head = m_new;
3471 	}
3472 
3473 	error = bus_dmamap_load_mbuf_defrag(txr->bnx_tx_mtag, map,
3474 	    m_head0, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
3475 	if (error)
3476 		goto back;
3477 	*segs_used += nsegs;
3478 
3479 	m_head = *m_head0;
3480 	bus_dmamap_sync(txr->bnx_tx_mtag, map, BUS_DMASYNC_PREWRITE);
3481 
3482 	for (i = 0; ; i++) {
3483 		d = &txr->bnx_tx_ring[idx];
3484 
3485 		d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr);
3486 		d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr);
3487 		d->bge_len = segs[i].ds_len;
3488 		d->bge_flags = csum_flags;
3489 		d->bge_vlan_tag = vlan_tag;
3490 		d->bge_mss = mss;
3491 
3492 		if (i == nsegs - 1)
3493 			break;
3494 		BNX_INC(idx, BGE_TX_RING_CNT);
3495 	}
3496 	/* Mark the last segment as end of packet... */
3497 	d->bge_flags |= BGE_TXBDFLAG_END;
3498 
3499 	/*
3500 	 * Insure that the map for this transmission is placed at
3501 	 * the array index of the last descriptor in this chain.
3502 	 */
3503 	txr->bnx_tx_buf[*txidx].bnx_tx_dmamap = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3504 	txr->bnx_tx_buf[idx].bnx_tx_dmamap = map;
3505 	txr->bnx_tx_buf[idx].bnx_tx_mbuf = m_head;
3506 	txr->bnx_tx_cnt += nsegs;
3507 
3508 	BNX_INC(idx, BGE_TX_RING_CNT);
3509 	*txidx = idx;
3510 back:
3511 	if (error) {
3512 		m_freem(*m_head0);
3513 		*m_head0 = NULL;
3514 	}
3515 	return error;
3516 }
3517 
3518 /*
3519  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
3520  * to the mbuf data regions directly in the transmit descriptors.
3521  */
3522 static void
3523 bnx_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3524 {
3525 	struct bnx_tx_ring *txr = ifsq_get_priv(ifsq);
3526 	struct mbuf *m_head = NULL;
3527 	uint32_t prodidx;
3528 	int nsegs = 0;
3529 
3530 	KKASSERT(txr->bnx_ifsq == ifsq);
3531 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3532 
3533 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
3534 		return;
3535 
3536 	prodidx = txr->bnx_tx_prodidx;
3537 
3538 	while (txr->bnx_tx_buf[prodidx].bnx_tx_mbuf == NULL) {
3539 		/*
3540 		 * Sanity check: avoid coming within BGE_NSEG_RSVD
3541 		 * descriptors of the end of the ring.  Also make
3542 		 * sure there are BGE_NSEG_SPARE descriptors for
3543 		 * jumbo buffers' or TSO segments' defragmentation.
3544 		 */
3545 		if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) <
3546 		    (BNX_NSEG_RSVD + BNX_NSEG_SPARE)) {
3547 			ifsq_set_oactive(ifsq);
3548 			break;
3549 		}
3550 
3551 		m_head = ifsq_dequeue(ifsq);
3552 		if (m_head == NULL)
3553 			break;
3554 
3555 		/*
3556 		 * Pack the data into the transmit ring. If we
3557 		 * don't have room, set the OACTIVE flag and wait
3558 		 * for the NIC to drain the ring.
3559 		 */
3560 		if (bnx_encap(txr, &m_head, &prodidx, &nsegs)) {
3561 			ifsq_set_oactive(ifsq);
3562 			IFNET_STAT_INC(ifp, oerrors, 1);
3563 			break;
3564 		}
3565 
3566 		if (nsegs >= txr->bnx_tx_wreg) {
3567 			/* Transmit */
3568 			bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3569 			nsegs = 0;
3570 		}
3571 
3572 		ETHER_BPF_MTAP(ifp, m_head);
3573 
3574 		/*
3575 		 * Set a timeout in case the chip goes out to lunch.
3576 		 */
3577 		ifsq_watchdog_set_count(&txr->bnx_tx_watchdog, 5);
3578 	}
3579 
3580 	if (nsegs > 0) {
3581 		/* Transmit */
3582 		bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3583 	}
3584 	txr->bnx_tx_prodidx = prodidx;
3585 }
3586 
3587 static void
3588 bnx_init(void *xsc)
3589 {
3590 	struct bnx_softc *sc = xsc;
3591 	struct ifnet *ifp = &sc->arpcom.ac_if;
3592 	uint16_t *m;
3593 	uint32_t mode;
3594 	int i;
3595 	boolean_t polling;
3596 
3597 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3598 
3599 	/* Cancel pending I/O and flush buffers. */
3600 	bnx_stop(sc);
3601 
3602 	bnx_sig_pre_reset(sc, BNX_RESET_START);
3603 	bnx_reset(sc);
3604 	bnx_sig_post_reset(sc, BNX_RESET_START);
3605 
3606 	bnx_chipinit(sc);
3607 
3608 	/*
3609 	 * Init the various state machines, ring
3610 	 * control blocks and firmware.
3611 	 */
3612 	if (bnx_blockinit(sc)) {
3613 		if_printf(ifp, "initialization failure\n");
3614 		bnx_stop(sc);
3615 		return;
3616 	}
3617 
3618 	/* Specify MTU. */
3619 	CSR_WRITE_4(sc, BGE_RX_MTU, ifp->if_mtu +
3620 	    ETHER_HDR_LEN + ETHER_CRC_LEN + EVL_ENCAPLEN);
3621 
3622 	/* Load our MAC address. */
3623 	m = (uint16_t *)&sc->arpcom.ac_enaddr[0];
3624 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0]));
3625 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2]));
3626 
3627 	/* Enable or disable promiscuous mode as needed. */
3628 	bnx_setpromisc(sc);
3629 
3630 	/* Program multicast filter. */
3631 	bnx_setmulti(sc);
3632 
3633 	/* Init RX ring. */
3634 	if (bnx_init_rx_ring_std(&sc->bnx_rx_std_ring)) {
3635 		if_printf(ifp, "RX ring initialization failed\n");
3636 		bnx_stop(sc);
3637 		return;
3638 	}
3639 
3640 	/* Init jumbo RX ring. */
3641 	if (ifp->if_mtu > (ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN)) {
3642 		if (bnx_init_rx_ring_jumbo(sc)) {
3643 			if_printf(ifp, "Jumbo RX ring initialization failed\n");
3644 			bnx_stop(sc);
3645 			return;
3646 		}
3647 	}
3648 
3649 	/* Init our RX return ring index */
3650 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3651 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3652 
3653 		ret->bnx_rx_saved_considx = 0;
3654 		ret->bnx_rx_cnt = 0;
3655 	}
3656 
3657 	/* Init TX ring. */
3658 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3659 		bnx_init_tx_ring(&sc->bnx_tx_ring[i]);
3660 
3661 	/* Enable TX MAC state machine lockup fix. */
3662 	mode = CSR_READ_4(sc, BGE_TX_MODE);
3663 	mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
3664 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
3665 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
3666 		mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3667 		mode |= CSR_READ_4(sc, BGE_TX_MODE) &
3668 		    (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3669 	}
3670 	/* Turn on transmitter */
3671 	CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
3672 	DELAY(100);
3673 
3674 	/* Initialize RSS */
3675 	mode = BGE_RXMODE_ENABLE | BGE_RXMODE_IPV6_ENABLE;
3676 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
3677 		mode |= BGE_RXMODE_IPV4_FRAG_FIX;
3678 	if (BNX_RSS_ENABLED(sc)) {
3679 		bnx_init_rss(sc);
3680 		mode |= BGE_RXMODE_RSS_ENABLE |
3681 		    BGE_RXMODE_RSS_HASH_MASK_BITS |
3682 		    BGE_RXMODE_RSS_IPV4_HASH |
3683 		    BGE_RXMODE_RSS_TCP_IPV4_HASH;
3684 	}
3685 	/* Turn on receiver */
3686 	BNX_SETBIT(sc, BGE_RX_MODE, mode);
3687 	DELAY(10);
3688 
3689 	/*
3690 	 * Set the number of good frames to receive after RX MBUF
3691 	 * Low Watermark has been reached.  After the RX MAC receives
3692 	 * this number of frames, it will drop subsequent incoming
3693 	 * frames until the MBUF High Watermark is reached.
3694 	 */
3695 	if (BNX_IS_57765_FAMILY(sc))
3696 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1);
3697 	else
3698 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
3699 
3700 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI ||
3701 	    sc->bnx_intr_type == PCI_INTR_TYPE_MSIX) {
3702 		if (bootverbose) {
3703 			if_printf(ifp, "MSI_MODE: %#x\n",
3704 			    CSR_READ_4(sc, BGE_MSI_MODE));
3705 		}
3706 	}
3707 
3708 	/* Tell firmware we're alive. */
3709 	BNX_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3710 
3711 	/* Enable host interrupts if polling(4) is not enabled. */
3712 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA, 4);
3713 
3714 	polling = FALSE;
3715 #ifdef IFPOLL_ENABLE
3716 	if (ifp->if_flags & IFF_NPOLLING)
3717 		polling = TRUE;
3718 #endif
3719 	if (polling)
3720 		bnx_disable_intr(sc);
3721 	else
3722 		bnx_enable_intr(sc);
3723 	bnx_set_tick_cpuid(sc, polling);
3724 
3725 	ifp->if_flags |= IFF_RUNNING;
3726 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3727 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3728 
3729 		ifsq_clr_oactive(txr->bnx_ifsq);
3730 		ifsq_watchdog_start(&txr->bnx_tx_watchdog);
3731 	}
3732 
3733 	bnx_ifmedia_upd(ifp);
3734 
3735 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3736 	    sc->bnx_tick_cpuid);
3737 }
3738 
3739 /*
3740  * Set media options.
3741  */
3742 static int
3743 bnx_ifmedia_upd(struct ifnet *ifp)
3744 {
3745 	struct bnx_softc *sc = ifp->if_softc;
3746 
3747 	/* If this is a 1000baseX NIC, enable the TBI port. */
3748 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3749 		struct ifmedia *ifm = &sc->bnx_ifmedia;
3750 
3751 		if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3752 			return(EINVAL);
3753 
3754 		switch(IFM_SUBTYPE(ifm->ifm_media)) {
3755 		case IFM_AUTO:
3756 			break;
3757 
3758 		case IFM_1000_SX:
3759 			if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
3760 				BNX_CLRBIT(sc, BGE_MAC_MODE,
3761 				    BGE_MACMODE_HALF_DUPLEX);
3762 			} else {
3763 				BNX_SETBIT(sc, BGE_MAC_MODE,
3764 				    BGE_MACMODE_HALF_DUPLEX);
3765 			}
3766 			DELAY(40);
3767 			break;
3768 		default:
3769 			return(EINVAL);
3770 		}
3771 	} else {
3772 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3773 
3774 		sc->bnx_link_evt++;
3775 		sc->bnx_link = 0;
3776 		if (mii->mii_instance) {
3777 			struct mii_softc *miisc;
3778 
3779 			LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
3780 				mii_phy_reset(miisc);
3781 		}
3782 		mii_mediachg(mii);
3783 
3784 		/*
3785 		 * Force an interrupt so that we will call bnx_link_upd
3786 		 * if needed and clear any pending link state attention.
3787 		 * Without this we are not getting any further interrupts
3788 		 * for link state changes and thus will not UP the link and
3789 		 * not be able to send in bnx_start.  The only way to get
3790 		 * things working was to receive a packet and get an RX
3791 		 * intr.
3792 		 *
3793 		 * bnx_tick should help for fiber cards and we might not
3794 		 * need to do this here if BNX_FLAG_TBI is set but as
3795 		 * we poll for fiber anyway it should not harm.
3796 		 */
3797 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3798 	}
3799 	return(0);
3800 }
3801 
3802 /*
3803  * Report current media status.
3804  */
3805 static void
3806 bnx_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3807 {
3808 	struct bnx_softc *sc = ifp->if_softc;
3809 
3810 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3811 		return;
3812 
3813 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3814 		ifmr->ifm_status = IFM_AVALID;
3815 		ifmr->ifm_active = IFM_ETHER;
3816 		if (CSR_READ_4(sc, BGE_MAC_STS) &
3817 		    BGE_MACSTAT_TBI_PCS_SYNCHED) {
3818 			ifmr->ifm_status |= IFM_ACTIVE;
3819 		} else {
3820 			ifmr->ifm_active |= IFM_NONE;
3821 			return;
3822 		}
3823 
3824 		ifmr->ifm_active |= IFM_1000_SX;
3825 		if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX)
3826 			ifmr->ifm_active |= IFM_HDX;
3827 		else
3828 			ifmr->ifm_active |= IFM_FDX;
3829 	} else {
3830 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3831 
3832 		mii_pollstat(mii);
3833 		ifmr->ifm_active = mii->mii_media_active;
3834 		ifmr->ifm_status = mii->mii_media_status;
3835 	}
3836 }
3837 
3838 static int
3839 bnx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3840 {
3841 	struct bnx_softc *sc = ifp->if_softc;
3842 	struct ifreq *ifr = (struct ifreq *)data;
3843 	int mask, error = 0;
3844 
3845 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3846 
3847 	switch (command) {
3848 	case SIOCSIFMTU:
3849 		if ((!BNX_IS_JUMBO_CAPABLE(sc) && ifr->ifr_mtu > ETHERMTU) ||
3850 		    (BNX_IS_JUMBO_CAPABLE(sc) &&
3851 		     ifr->ifr_mtu > BNX_JUMBO_MTU)) {
3852 			error = EINVAL;
3853 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
3854 			ifp->if_mtu = ifr->ifr_mtu;
3855 			if (ifp->if_flags & IFF_RUNNING)
3856 				bnx_init(sc);
3857 		}
3858 		break;
3859 	case SIOCSIFFLAGS:
3860 		if (ifp->if_flags & IFF_UP) {
3861 			if (ifp->if_flags & IFF_RUNNING) {
3862 				mask = ifp->if_flags ^ sc->bnx_if_flags;
3863 
3864 				/*
3865 				 * If only the state of the PROMISC flag
3866 				 * changed, then just use the 'set promisc
3867 				 * mode' command instead of reinitializing
3868 				 * the entire NIC. Doing a full re-init
3869 				 * means reloading the firmware and waiting
3870 				 * for it to start up, which may take a
3871 				 * second or two.  Similarly for ALLMULTI.
3872 				 */
3873 				if (mask & IFF_PROMISC)
3874 					bnx_setpromisc(sc);
3875 				if (mask & IFF_ALLMULTI)
3876 					bnx_setmulti(sc);
3877 			} else {
3878 				bnx_init(sc);
3879 			}
3880 		} else if (ifp->if_flags & IFF_RUNNING) {
3881 			bnx_stop(sc);
3882 		}
3883 		sc->bnx_if_flags = ifp->if_flags;
3884 		break;
3885 	case SIOCADDMULTI:
3886 	case SIOCDELMULTI:
3887 		if (ifp->if_flags & IFF_RUNNING)
3888 			bnx_setmulti(sc);
3889 		break;
3890 	case SIOCSIFMEDIA:
3891 	case SIOCGIFMEDIA:
3892 		if (sc->bnx_flags & BNX_FLAG_TBI) {
3893 			error = ifmedia_ioctl(ifp, ifr,
3894 			    &sc->bnx_ifmedia, command);
3895 		} else {
3896 			struct mii_data *mii;
3897 
3898 			mii = device_get_softc(sc->bnx_miibus);
3899 			error = ifmedia_ioctl(ifp, ifr,
3900 					      &mii->mii_media, command);
3901 		}
3902 		break;
3903         case SIOCSIFCAP:
3904 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3905 		if (mask & IFCAP_HWCSUM) {
3906 			ifp->if_capenable ^= (mask & IFCAP_HWCSUM);
3907 			if (ifp->if_capenable & IFCAP_TXCSUM)
3908 				ifp->if_hwassist |= BNX_CSUM_FEATURES;
3909 			else
3910 				ifp->if_hwassist &= ~BNX_CSUM_FEATURES;
3911 		}
3912 		if (mask & IFCAP_TSO) {
3913 			ifp->if_capenable ^= (mask & IFCAP_TSO);
3914 			if (ifp->if_capenable & IFCAP_TSO)
3915 				ifp->if_hwassist |= CSUM_TSO;
3916 			else
3917 				ifp->if_hwassist &= ~CSUM_TSO;
3918 		}
3919 		if (mask & IFCAP_RSS)
3920 			ifp->if_capenable ^= IFCAP_RSS;
3921 		break;
3922 	default:
3923 		error = ether_ioctl(ifp, command, data);
3924 		break;
3925 	}
3926 	return error;
3927 }
3928 
3929 static void
3930 bnx_watchdog(struct ifaltq_subque *ifsq)
3931 {
3932 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3933 	struct bnx_softc *sc = ifp->if_softc;
3934 	int i;
3935 
3936 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3937 
3938 	if_printf(ifp, "watchdog timeout -- resetting\n");
3939 
3940 	bnx_init(sc);
3941 
3942 	IFNET_STAT_INC(ifp, oerrors, 1);
3943 
3944 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3945 		ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
3946 }
3947 
3948 /*
3949  * Stop the adapter and free any mbufs allocated to the
3950  * RX and TX lists.
3951  */
3952 static void
3953 bnx_stop(struct bnx_softc *sc)
3954 {
3955 	struct ifnet *ifp = &sc->arpcom.ac_if;
3956 	int i;
3957 
3958 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3959 
3960 	callout_stop(&sc->bnx_tick_timer);
3961 
3962 	/* Disable host interrupts. */
3963 	bnx_disable_intr(sc);
3964 
3965 	/*
3966 	 * Tell firmware we're shutting down.
3967 	 */
3968 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
3969 
3970 	/*
3971 	 * Disable all of the receiver blocks
3972 	 */
3973 	bnx_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
3974 	bnx_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
3975 	bnx_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
3976 	bnx_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
3977 	bnx_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
3978 	bnx_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
3979 
3980 	/*
3981 	 * Disable all of the transmit blocks
3982 	 */
3983 	bnx_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
3984 	bnx_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
3985 	bnx_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
3986 	bnx_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
3987 	bnx_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
3988 	bnx_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
3989 
3990 	/*
3991 	 * Shut down all of the memory managers and related
3992 	 * state machines.
3993 	 */
3994 	bnx_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
3995 	bnx_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
3996 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
3997 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
3998 
3999 	bnx_reset(sc);
4000 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
4001 
4002 	/*
4003 	 * Tell firmware we're shutting down.
4004 	 */
4005 	BNX_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
4006 
4007 	/* Free the RX lists. */
4008 	bnx_free_rx_ring_std(&sc->bnx_rx_std_ring);
4009 
4010 	/* Free jumbo RX list. */
4011 	if (BNX_IS_JUMBO_CAPABLE(sc))
4012 		bnx_free_rx_ring_jumbo(sc);
4013 
4014 	/* Free TX buffers. */
4015 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4016 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4017 
4018 		txr->bnx_saved_status_tag = 0;
4019 		bnx_free_tx_ring(txr);
4020 	}
4021 
4022 	/* Clear saved status tag */
4023 	for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4024 		sc->bnx_rx_ret_ring[i].bnx_saved_status_tag = 0;
4025 
4026 	sc->bnx_link = 0;
4027 	sc->bnx_coal_chg = 0;
4028 
4029 	ifp->if_flags &= ~IFF_RUNNING;
4030 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4031 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4032 
4033 		ifsq_clr_oactive(txr->bnx_ifsq);
4034 		ifsq_watchdog_stop(&txr->bnx_tx_watchdog);
4035 	}
4036 }
4037 
4038 /*
4039  * Stop all chip I/O so that the kernel's probe routines don't
4040  * get confused by errant DMAs when rebooting.
4041  */
4042 static void
4043 bnx_shutdown(device_t dev)
4044 {
4045 	struct bnx_softc *sc = device_get_softc(dev);
4046 	struct ifnet *ifp = &sc->arpcom.ac_if;
4047 
4048 	ifnet_serialize_all(ifp);
4049 	bnx_stop(sc);
4050 	ifnet_deserialize_all(ifp);
4051 }
4052 
4053 static int
4054 bnx_suspend(device_t dev)
4055 {
4056 	struct bnx_softc *sc = device_get_softc(dev);
4057 	struct ifnet *ifp = &sc->arpcom.ac_if;
4058 
4059 	ifnet_serialize_all(ifp);
4060 	bnx_stop(sc);
4061 	ifnet_deserialize_all(ifp);
4062 
4063 	return 0;
4064 }
4065 
4066 static int
4067 bnx_resume(device_t dev)
4068 {
4069 	struct bnx_softc *sc = device_get_softc(dev);
4070 	struct ifnet *ifp = &sc->arpcom.ac_if;
4071 
4072 	ifnet_serialize_all(ifp);
4073 
4074 	if (ifp->if_flags & IFF_UP) {
4075 		int i;
4076 
4077 		bnx_init(sc);
4078 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4079 			ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
4080 	}
4081 
4082 	ifnet_deserialize_all(ifp);
4083 
4084 	return 0;
4085 }
4086 
4087 static void
4088 bnx_setpromisc(struct bnx_softc *sc)
4089 {
4090 	struct ifnet *ifp = &sc->arpcom.ac_if;
4091 
4092 	if (ifp->if_flags & IFF_PROMISC)
4093 		BNX_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4094 	else
4095 		BNX_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4096 }
4097 
4098 static void
4099 bnx_dma_free(struct bnx_softc *sc)
4100 {
4101 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4102 	int i;
4103 
4104 	/* Destroy RX return rings */
4105 	if (sc->bnx_rx_ret_ring != NULL) {
4106 		for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4107 			bnx_destroy_rx_ret_ring(&sc->bnx_rx_ret_ring[i]);
4108 		kfree(sc->bnx_rx_ret_ring, M_DEVBUF);
4109 	}
4110 
4111 	/* Destroy RX mbuf DMA stuffs. */
4112 	if (std->bnx_rx_mtag != NULL) {
4113 		for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
4114 			KKASSERT(std->bnx_rx_std_buf[i].bnx_rx_mbuf == NULL);
4115 			bus_dmamap_destroy(std->bnx_rx_mtag,
4116 			    std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4117 		}
4118 		bus_dma_tag_destroy(std->bnx_rx_mtag);
4119 	}
4120 
4121 	/* Destroy standard RX ring */
4122 	bnx_dma_block_free(std->bnx_rx_std_ring_tag,
4123 	    std->bnx_rx_std_ring_map, std->bnx_rx_std_ring);
4124 
4125 	/* Destroy TX rings */
4126 	if (sc->bnx_tx_ring != NULL) {
4127 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4128 			bnx_destroy_tx_ring(&sc->bnx_tx_ring[i]);
4129 		kfree(sc->bnx_tx_ring, M_DEVBUF);
4130 	}
4131 
4132 	if (BNX_IS_JUMBO_CAPABLE(sc))
4133 		bnx_free_jumbo_mem(sc);
4134 
4135 	/* Destroy status blocks */
4136 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4137 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4138 
4139 		bnx_dma_block_free(intr->bnx_status_tag,
4140 		    intr->bnx_status_map, intr->bnx_status_block);
4141 	}
4142 
4143 	/* Destroy the parent tag */
4144 	if (sc->bnx_cdata.bnx_parent_tag != NULL)
4145 		bus_dma_tag_destroy(sc->bnx_cdata.bnx_parent_tag);
4146 }
4147 
4148 static int
4149 bnx_dma_alloc(device_t dev)
4150 {
4151 	struct bnx_softc *sc = device_get_softc(dev);
4152 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4153 	int i, error, mbx;
4154 
4155 	/*
4156 	 * Allocate the parent bus DMA tag appropriate for PCI.
4157 	 *
4158 	 * All of the NetExtreme/NetLink controllers have 4GB boundary
4159 	 * DMA bug.
4160 	 * Whenever an address crosses a multiple of the 4GB boundary
4161 	 * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition
4162 	 * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA
4163 	 * state machine will lockup and cause the device to hang.
4164 	 */
4165 	error = bus_dma_tag_create(NULL, 1, BGE_DMA_BOUNDARY_4G,
4166 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4167 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
4168 	    0, &sc->bnx_cdata.bnx_parent_tag);
4169 	if (error) {
4170 		device_printf(dev, "could not create parent DMA tag\n");
4171 		return error;
4172 	}
4173 
4174 	/*
4175 	 * Create DMA stuffs for status blocks.
4176 	 */
4177 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4178 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4179 
4180 		error = bnx_dma_block_alloc(sc,
4181 		    __VM_CACHELINE_ALIGN(BGE_STATUS_BLK_SZ),
4182 		    &intr->bnx_status_tag, &intr->bnx_status_map,
4183 		    (void *)&intr->bnx_status_block,
4184 		    &intr->bnx_status_block_paddr);
4185 		if (error) {
4186 			device_printf(dev,
4187 			    "could not create %dth status block\n", i);
4188 			return error;
4189 		}
4190 	}
4191 	sc->bnx_hw_status = &sc->bnx_intr_data[0].bnx_status_block->bge_status;
4192 	if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG) {
4193 		sc->bnx_hw_status_tag =
4194 		    &sc->bnx_intr_data[0].bnx_status_block->bge_status_tag;
4195 	}
4196 
4197 	/*
4198 	 * Create DMA tag and maps for RX mbufs.
4199 	 */
4200 	std->bnx_sc = sc;
4201 	lwkt_serialize_init(&std->bnx_rx_std_serialize);
4202 	error = bus_dma_tag_create(sc->bnx_cdata.bnx_parent_tag, 1, 0,
4203 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4204 	    MCLBYTES, 1, MCLBYTES,
4205 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK, &std->bnx_rx_mtag);
4206 	if (error) {
4207 		device_printf(dev, "could not create RX mbuf DMA tag\n");
4208 		return error;
4209 	}
4210 
4211 	for (i = 0; i < BGE_STD_RX_RING_CNT; ++i) {
4212 		error = bus_dmamap_create(std->bnx_rx_mtag, BUS_DMA_WAITOK,
4213 		    &std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4214 		if (error) {
4215 			int j;
4216 
4217 			for (j = 0; j < i; ++j) {
4218 				bus_dmamap_destroy(std->bnx_rx_mtag,
4219 				    std->bnx_rx_std_buf[j].bnx_rx_dmamap);
4220 			}
4221 			bus_dma_tag_destroy(std->bnx_rx_mtag);
4222 			std->bnx_rx_mtag = NULL;
4223 
4224 			device_printf(dev,
4225 			    "could not create %dth RX mbuf DMA map\n", i);
4226 			return error;
4227 		}
4228 	}
4229 
4230 	/*
4231 	 * Create DMA stuffs for standard RX ring.
4232 	 */
4233 	error = bnx_dma_block_alloc(sc, BGE_STD_RX_RING_SZ,
4234 	    &std->bnx_rx_std_ring_tag,
4235 	    &std->bnx_rx_std_ring_map,
4236 	    (void *)&std->bnx_rx_std_ring,
4237 	    &std->bnx_rx_std_ring_paddr);
4238 	if (error) {
4239 		device_printf(dev, "could not create std RX ring\n");
4240 		return error;
4241 	}
4242 
4243 	/*
4244 	 * Create RX return rings
4245 	 */
4246 	mbx = BGE_MBX_RX_CONS0_LO;
4247 	sc->bnx_rx_ret_ring =
4248 		kmalloc(sizeof(struct bnx_rx_ret_ring) * sc->bnx_rx_retcnt,
4249 			M_DEVBUF,
4250 			M_WAITOK | M_ZERO | M_CACHEALIGN);
4251 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4252 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
4253 		struct bnx_intr_data *intr;
4254 
4255 		ret->bnx_sc = sc;
4256 		ret->bnx_std = std;
4257 		ret->bnx_rx_mbx = mbx;
4258 		ret->bnx_rx_cntmax = (BGE_STD_RX_RING_CNT / 4) /
4259 		    sc->bnx_rx_retcnt;
4260 		ret->bnx_rx_mask = 1 << i;
4261 
4262 		if (!BNX_RSS_ENABLED(sc)) {
4263 			intr = &sc->bnx_intr_data[0];
4264 		} else {
4265 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4266 			intr = &sc->bnx_intr_data[i + 1];
4267 		}
4268 
4269 		if (i == 0) {
4270 			ret->bnx_rx_considx =
4271 			    &intr->bnx_status_block->bge_idx[0].bge_rx_prod_idx;
4272 		} else if (i == 1) {
4273 			ret->bnx_rx_considx =
4274 			    &intr->bnx_status_block->bge_rx_jumbo_cons_idx;
4275 		} else if (i == 2) {
4276 			ret->bnx_rx_considx =
4277 			    &intr->bnx_status_block->bge_rsvd1;
4278 		} else if (i == 3) {
4279 			ret->bnx_rx_considx =
4280 			    &intr->bnx_status_block->bge_rx_mini_cons_idx;
4281 		} else {
4282 			panic("unknown RX return ring %d\n", i);
4283 		}
4284 		ret->bnx_hw_status_tag =
4285 		    &intr->bnx_status_block->bge_status_tag;
4286 
4287 		error = bnx_create_rx_ret_ring(ret);
4288 		if (error) {
4289 			device_printf(dev,
4290 			    "could not create %dth RX ret ring\n", i);
4291 			return error;
4292 		}
4293 		mbx += 8;
4294 	}
4295 
4296 	/*
4297 	 * Create TX rings
4298 	 */
4299 	sc->bnx_tx_ring =
4300 		kmalloc(sizeof(struct bnx_tx_ring) * sc->bnx_tx_ringcnt,
4301 			M_DEVBUF,
4302 			M_WAITOK | M_ZERO | M_CACHEALIGN);
4303 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4304 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4305 		struct bnx_intr_data *intr;
4306 
4307 		txr->bnx_sc = sc;
4308 		txr->bnx_tx_mbx = bnx_tx_mailbox[i];
4309 
4310 		if (sc->bnx_tx_ringcnt == 1) {
4311 			intr = &sc->bnx_intr_data[0];
4312 		} else {
4313 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4314 			intr = &sc->bnx_intr_data[i + 1];
4315 		}
4316 
4317 		if ((sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) == 0) {
4318 			txr->bnx_hw_status_tag =
4319 			    &intr->bnx_status_block->bge_status_tag;
4320 		}
4321 		txr->bnx_tx_considx =
4322 		    &intr->bnx_status_block->bge_idx[0].bge_tx_cons_idx;
4323 
4324 		error = bnx_create_tx_ring(txr);
4325 		if (error) {
4326 			device_printf(dev,
4327 			    "could not create %dth TX ring\n", i);
4328 			return error;
4329 		}
4330 	}
4331 
4332 	/*
4333 	 * Create jumbo buffer pool.
4334 	 */
4335 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
4336 		error = bnx_alloc_jumbo_mem(sc);
4337 		if (error) {
4338 			device_printf(dev,
4339 			    "could not create jumbo buffer pool\n");
4340 			return error;
4341 		}
4342 	}
4343 
4344 	return 0;
4345 }
4346 
4347 static int
4348 bnx_dma_block_alloc(struct bnx_softc *sc, bus_size_t size, bus_dma_tag_t *tag,
4349 		    bus_dmamap_t *map, void **addr, bus_addr_t *paddr)
4350 {
4351 	bus_dmamem_t dmem;
4352 	int error;
4353 
4354 	error = bus_dmamem_coherent(sc->bnx_cdata.bnx_parent_tag, PAGE_SIZE, 0,
4355 				    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4356 				    size, BUS_DMA_WAITOK | BUS_DMA_ZERO, &dmem);
4357 	if (error)
4358 		return error;
4359 
4360 	*tag = dmem.dmem_tag;
4361 	*map = dmem.dmem_map;
4362 	*addr = dmem.dmem_addr;
4363 	*paddr = dmem.dmem_busaddr;
4364 
4365 	return 0;
4366 }
4367 
4368 static void
4369 bnx_dma_block_free(bus_dma_tag_t tag, bus_dmamap_t map, void *addr)
4370 {
4371 	if (tag != NULL) {
4372 		bus_dmamap_unload(tag, map);
4373 		bus_dmamem_free(tag, addr, map);
4374 		bus_dma_tag_destroy(tag);
4375 	}
4376 }
4377 
4378 static void
4379 bnx_tbi_link_upd(struct bnx_softc *sc, uint32_t status)
4380 {
4381 	struct ifnet *ifp = &sc->arpcom.ac_if;
4382 
4383 #define PCS_ENCODE_ERR	(BGE_MACSTAT_PORT_DECODE_ERROR|BGE_MACSTAT_MI_COMPLETE)
4384 
4385 	/*
4386 	 * Sometimes PCS encoding errors are detected in
4387 	 * TBI mode (on fiber NICs), and for some reason
4388 	 * the chip will signal them as link changes.
4389 	 * If we get a link change event, but the 'PCS
4390 	 * encoding error' bit in the MAC status register
4391 	 * is set, don't bother doing a link check.
4392 	 * This avoids spurious "gigabit link up" messages
4393 	 * that sometimes appear on fiber NICs during
4394 	 * periods of heavy traffic.
4395 	 */
4396 	if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) {
4397 		if (!sc->bnx_link) {
4398 			sc->bnx_link++;
4399 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5704) {
4400 				BNX_CLRBIT(sc, BGE_MAC_MODE,
4401 				    BGE_MACMODE_TBI_SEND_CFGS);
4402 				DELAY(40);
4403 			}
4404 			CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF);
4405 
4406 			if (bootverbose)
4407 				if_printf(ifp, "link UP\n");
4408 
4409 			ifp->if_link_state = LINK_STATE_UP;
4410 			if_link_state_change(ifp);
4411 		}
4412 	} else if ((status & PCS_ENCODE_ERR) != PCS_ENCODE_ERR) {
4413 		if (sc->bnx_link) {
4414 			sc->bnx_link = 0;
4415 
4416 			if (bootverbose)
4417 				if_printf(ifp, "link DOWN\n");
4418 
4419 			ifp->if_link_state = LINK_STATE_DOWN;
4420 			if_link_state_change(ifp);
4421 		}
4422 	}
4423 
4424 #undef PCS_ENCODE_ERR
4425 
4426 	/* Clear the attention. */
4427 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4428 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4429 	    BGE_MACSTAT_LINK_CHANGED);
4430 }
4431 
4432 static void
4433 bnx_copper_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4434 {
4435 	struct ifnet *ifp = &sc->arpcom.ac_if;
4436 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4437 
4438 	mii_pollstat(mii);
4439 	bnx_miibus_statchg(sc->bnx_dev);
4440 
4441 	if (bootverbose) {
4442 		if (sc->bnx_link)
4443 			if_printf(ifp, "link UP\n");
4444 		else
4445 			if_printf(ifp, "link DOWN\n");
4446 	}
4447 
4448 	/* Clear the attention. */
4449 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4450 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4451 	    BGE_MACSTAT_LINK_CHANGED);
4452 }
4453 
4454 static void
4455 bnx_autopoll_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4456 {
4457 	struct ifnet *ifp = &sc->arpcom.ac_if;
4458 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4459 
4460 	mii_pollstat(mii);
4461 
4462 	if (!sc->bnx_link &&
4463 	    (mii->mii_media_status & IFM_ACTIVE) &&
4464 	    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
4465 		sc->bnx_link++;
4466 		if (bootverbose)
4467 			if_printf(ifp, "link UP\n");
4468 	} else if (sc->bnx_link &&
4469 	    (!(mii->mii_media_status & IFM_ACTIVE) ||
4470 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) {
4471 		sc->bnx_link = 0;
4472 		if (bootverbose)
4473 			if_printf(ifp, "link DOWN\n");
4474 	}
4475 
4476 	/* Clear the attention. */
4477 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4478 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4479 	    BGE_MACSTAT_LINK_CHANGED);
4480 }
4481 
4482 static int
4483 bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS)
4484 {
4485 	struct bnx_softc *sc = arg1;
4486 
4487 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4488 	    &sc->bnx_rx_coal_ticks,
4489 	    BNX_RX_COAL_TICKS_MIN, BNX_RX_COAL_TICKS_MAX,
4490 	    BNX_RX_COAL_TICKS_CHG);
4491 }
4492 
4493 static int
4494 bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS)
4495 {
4496 	struct bnx_softc *sc = arg1;
4497 
4498 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4499 	    &sc->bnx_tx_coal_ticks,
4500 	    BNX_TX_COAL_TICKS_MIN, BNX_TX_COAL_TICKS_MAX,
4501 	    BNX_TX_COAL_TICKS_CHG);
4502 }
4503 
4504 static int
4505 bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS)
4506 {
4507 	struct bnx_softc *sc = arg1;
4508 
4509 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4510 	    &sc->bnx_rx_coal_bds,
4511 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4512 	    BNX_RX_COAL_BDS_CHG);
4513 }
4514 
4515 static int
4516 bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4517 {
4518 	struct bnx_softc *sc = arg1;
4519 
4520 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4521 	    &sc->bnx_rx_coal_bds_poll,
4522 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4523 	    BNX_RX_COAL_BDS_CHG);
4524 }
4525 
4526 static int
4527 bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS)
4528 {
4529 	struct bnx_softc *sc = arg1;
4530 
4531 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4532 	    &sc->bnx_tx_coal_bds,
4533 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4534 	    BNX_TX_COAL_BDS_CHG);
4535 }
4536 
4537 static int
4538 bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4539 {
4540 	struct bnx_softc *sc = arg1;
4541 
4542 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4543 	    &sc->bnx_tx_coal_bds_poll,
4544 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4545 	    BNX_TX_COAL_BDS_CHG);
4546 }
4547 
4548 static int
4549 bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4550 {
4551 	struct bnx_softc *sc = arg1;
4552 
4553 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4554 	    &sc->bnx_rx_coal_bds_int,
4555 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4556 	    BNX_RX_COAL_BDS_INT_CHG);
4557 }
4558 
4559 static int
4560 bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4561 {
4562 	struct bnx_softc *sc = arg1;
4563 
4564 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4565 	    &sc->bnx_tx_coal_bds_int,
4566 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4567 	    BNX_TX_COAL_BDS_INT_CHG);
4568 }
4569 
4570 static int
4571 bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
4572     int coal_min, int coal_max, uint32_t coal_chg_mask)
4573 {
4574 	struct bnx_softc *sc = arg1;
4575 	struct ifnet *ifp = &sc->arpcom.ac_if;
4576 	int error = 0, v;
4577 
4578 	ifnet_serialize_all(ifp);
4579 
4580 	v = *coal;
4581 	error = sysctl_handle_int(oidp, &v, 0, req);
4582 	if (!error && req->newptr != NULL) {
4583 		if (v < coal_min || v > coal_max) {
4584 			error = EINVAL;
4585 		} else {
4586 			*coal = v;
4587 			sc->bnx_coal_chg |= coal_chg_mask;
4588 
4589 			/* Commit changes */
4590 			bnx_coal_change(sc);
4591 		}
4592 	}
4593 
4594 	ifnet_deserialize_all(ifp);
4595 	return error;
4596 }
4597 
4598 static void
4599 bnx_coal_change(struct bnx_softc *sc)
4600 {
4601 	struct ifnet *ifp = &sc->arpcom.ac_if;
4602 	int i;
4603 
4604 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4605 
4606 	if (sc->bnx_coal_chg & BNX_RX_COAL_TICKS_CHG) {
4607 		if (sc->bnx_rx_retcnt == 1) {
4608 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS,
4609 			    sc->bnx_rx_coal_ticks);
4610 			i = 0;
4611 		} else {
4612 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, 0);
4613 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4614 				CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4615 				    (i * BGE_VEC_COALSET_SIZE),
4616 				    sc->bnx_rx_coal_ticks);
4617 			}
4618 		}
4619 		for (; i < BNX_INTR_MAX - 1; ++i) {
4620 			CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4621 			    (i * BGE_VEC_COALSET_SIZE), 0);
4622 		}
4623 		if (bootverbose) {
4624 			if_printf(ifp, "rx_coal_ticks -> %u\n",
4625 			    sc->bnx_rx_coal_ticks);
4626 		}
4627 	}
4628 
4629 	if (sc->bnx_coal_chg & BNX_TX_COAL_TICKS_CHG) {
4630 		if (sc->bnx_tx_ringcnt == 1) {
4631 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS,
4632 			    sc->bnx_tx_coal_ticks);
4633 			i = 0;
4634 		} else {
4635 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, 0);
4636 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4637 				CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4638 				    (i * BGE_VEC_COALSET_SIZE),
4639 				    sc->bnx_tx_coal_ticks);
4640 			}
4641 		}
4642 		for (; i < BNX_INTR_MAX - 1; ++i) {
4643 			CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4644 			    (i * BGE_VEC_COALSET_SIZE), 0);
4645 		}
4646 		if (bootverbose) {
4647 			if_printf(ifp, "tx_coal_ticks -> %u\n",
4648 			    sc->bnx_tx_coal_ticks);
4649 		}
4650 	}
4651 
4652 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_CHG) {
4653 		uint32_t rx_coal_bds;
4654 
4655 		if (ifp->if_flags & IFF_NPOLLING)
4656 			rx_coal_bds = sc->bnx_rx_coal_bds_poll;
4657 		else
4658 			rx_coal_bds = sc->bnx_rx_coal_bds;
4659 
4660 		if (sc->bnx_rx_retcnt == 1) {
4661 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, rx_coal_bds);
4662 			i = 0;
4663 		} else {
4664 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, 0);
4665 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4666 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4667 				    (i * BGE_VEC_COALSET_SIZE), rx_coal_bds);
4668 			}
4669 		}
4670 		for (; i < BNX_INTR_MAX - 1; ++i) {
4671 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4672 			    (i * BGE_VEC_COALSET_SIZE), 0);
4673 		}
4674 		if (bootverbose) {
4675 			if_printf(ifp, "%srx_coal_bds -> %u\n",
4676 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4677 			    rx_coal_bds);
4678 		}
4679 	}
4680 
4681 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_CHG) {
4682 		uint32_t tx_coal_bds;
4683 
4684 		if (ifp->if_flags & IFF_NPOLLING)
4685 			tx_coal_bds = sc->bnx_tx_coal_bds_poll;
4686 		else
4687 			tx_coal_bds = sc->bnx_tx_coal_bds;
4688 
4689 		if (sc->bnx_tx_ringcnt == 1) {
4690 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, tx_coal_bds);
4691 			i = 0;
4692 		} else {
4693 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, 0);
4694 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4695 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4696 				    (i * BGE_VEC_COALSET_SIZE), tx_coal_bds);
4697 			}
4698 		}
4699 		for (; i < BNX_INTR_MAX - 1; ++i) {
4700 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4701 			    (i * BGE_VEC_COALSET_SIZE), 0);
4702 		}
4703 		if (bootverbose) {
4704 			if_printf(ifp, "%stx_coal_bds -> %u\n",
4705 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4706 			    tx_coal_bds);
4707 		}
4708 	}
4709 
4710 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_INT_CHG) {
4711 		if (sc->bnx_rx_retcnt == 1) {
4712 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT,
4713 			    sc->bnx_rx_coal_bds_int);
4714 			i = 0;
4715 		} else {
4716 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 0);
4717 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4718 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4719 				    (i * BGE_VEC_COALSET_SIZE),
4720 				    sc->bnx_rx_coal_bds_int);
4721 			}
4722 		}
4723 		for (; i < BNX_INTR_MAX - 1; ++i) {
4724 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4725 			    (i * BGE_VEC_COALSET_SIZE), 0);
4726 		}
4727 		if (bootverbose) {
4728 			if_printf(ifp, "rx_coal_bds_int -> %u\n",
4729 			    sc->bnx_rx_coal_bds_int);
4730 		}
4731 	}
4732 
4733 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_INT_CHG) {
4734 		if (sc->bnx_tx_ringcnt == 1) {
4735 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT,
4736 			    sc->bnx_tx_coal_bds_int);
4737 			i = 0;
4738 		} else {
4739 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 0);
4740 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4741 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4742 				    (i * BGE_VEC_COALSET_SIZE),
4743 				    sc->bnx_tx_coal_bds_int);
4744 			}
4745 		}
4746 		for (; i < BNX_INTR_MAX - 1; ++i) {
4747 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4748 			    (i * BGE_VEC_COALSET_SIZE), 0);
4749 		}
4750 		if (bootverbose) {
4751 			if_printf(ifp, "tx_coal_bds_int -> %u\n",
4752 			    sc->bnx_tx_coal_bds_int);
4753 		}
4754 	}
4755 
4756 	sc->bnx_coal_chg = 0;
4757 }
4758 
4759 static void
4760 bnx_check_intr_rxtx(void *xintr)
4761 {
4762 	struct bnx_intr_data *intr = xintr;
4763 	struct bnx_rx_ret_ring *ret;
4764 	struct bnx_tx_ring *txr;
4765 	struct ifnet *ifp;
4766 
4767 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4768 
4769 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4770 
4771 	ifp = &intr->bnx_sc->arpcom.ac_if;
4772 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4773 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4774 		return;
4775 	}
4776 
4777 	txr = intr->bnx_txr;
4778 	ret = intr->bnx_ret;
4779 
4780 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx ||
4781 	    *txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4782 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx &&
4783 		    intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4784 			if (!intr->bnx_intr_maylose) {
4785 				intr->bnx_intr_maylose = TRUE;
4786 				goto done;
4787 			}
4788 			if (bootverbose)
4789 				if_printf(ifp, "lost interrupt\n");
4790 			intr->bnx_intr_func(intr->bnx_intr_arg);
4791 		}
4792 	}
4793 	intr->bnx_intr_maylose = FALSE;
4794 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4795 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4796 
4797 done:
4798 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4799 	    intr->bnx_intr_check, intr);
4800 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4801 }
4802 
4803 static void
4804 bnx_check_intr_tx(void *xintr)
4805 {
4806 	struct bnx_intr_data *intr = xintr;
4807 	struct bnx_tx_ring *txr;
4808 	struct ifnet *ifp;
4809 
4810 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4811 
4812 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4813 
4814 	ifp = &intr->bnx_sc->arpcom.ac_if;
4815 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4816 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4817 		return;
4818 	}
4819 
4820 	txr = intr->bnx_txr;
4821 
4822 	if (*txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4823 		if (intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4824 			if (!intr->bnx_intr_maylose) {
4825 				intr->bnx_intr_maylose = TRUE;
4826 				goto done;
4827 			}
4828 			if (bootverbose)
4829 				if_printf(ifp, "lost interrupt\n");
4830 			intr->bnx_intr_func(intr->bnx_intr_arg);
4831 		}
4832 	}
4833 	intr->bnx_intr_maylose = FALSE;
4834 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4835 
4836 done:
4837 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4838 	    intr->bnx_intr_check, intr);
4839 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4840 }
4841 
4842 static void
4843 bnx_check_intr_rx(void *xintr)
4844 {
4845 	struct bnx_intr_data *intr = xintr;
4846 	struct bnx_rx_ret_ring *ret;
4847 	struct ifnet *ifp;
4848 
4849 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4850 
4851 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4852 
4853 	ifp = &intr->bnx_sc->arpcom.ac_if;
4854 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4855 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4856 		return;
4857 	}
4858 
4859 	ret = intr->bnx_ret;
4860 
4861 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx) {
4862 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx) {
4863 			if (!intr->bnx_intr_maylose) {
4864 				intr->bnx_intr_maylose = TRUE;
4865 				goto done;
4866 			}
4867 			if (bootverbose)
4868 				if_printf(ifp, "lost interrupt\n");
4869 			intr->bnx_intr_func(intr->bnx_intr_arg);
4870 		}
4871 	}
4872 	intr->bnx_intr_maylose = FALSE;
4873 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4874 
4875 done:
4876 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4877 	    intr->bnx_intr_check, intr);
4878 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4879 }
4880 
4881 static void
4882 bnx_enable_intr(struct bnx_softc *sc)
4883 {
4884 	struct ifnet *ifp = &sc->arpcom.ac_if;
4885 	int i;
4886 
4887 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4888 		lwkt_serialize_handler_enable(
4889 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4890 	}
4891 
4892 	/*
4893 	 * Enable interrupt.
4894 	 */
4895 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4896 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4897 
4898 		bnx_writembx(sc, intr->bnx_intr_mbx,
4899 		    (*intr->bnx_saved_status_tag) << 24);
4900 		/* XXX Linux driver */
4901 		bnx_writembx(sc, intr->bnx_intr_mbx,
4902 		    (*intr->bnx_saved_status_tag) << 24);
4903 	}
4904 
4905 	/*
4906 	 * Unmask the interrupt when we stop polling.
4907 	 */
4908 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4909 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4910 
4911 	/*
4912 	 * Trigger another interrupt, since above writing
4913 	 * to interrupt mailbox0 may acknowledge pending
4914 	 * interrupt.
4915 	 */
4916 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
4917 
4918 	if (sc->bnx_flags & BNX_FLAG_STATUSTAG_BUG) {
4919 		if (bootverbose)
4920 			if_printf(ifp, "status tag bug workaround\n");
4921 
4922 		for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4923 			struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4924 
4925 			if (intr->bnx_intr_check == NULL)
4926 				continue;
4927 			intr->bnx_intr_maylose = FALSE;
4928 			intr->bnx_rx_check_considx = 0;
4929 			intr->bnx_tx_check_considx = 0;
4930 			callout_reset_bycpu(&intr->bnx_intr_timer,
4931 			    BNX_INTR_CKINTVL, intr->bnx_intr_check, intr,
4932 			    intr->bnx_intr_cpuid);
4933 		}
4934 	}
4935 }
4936 
4937 static void
4938 bnx_disable_intr(struct bnx_softc *sc)
4939 {
4940 	int i;
4941 
4942 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4943 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4944 
4945 		callout_stop(&intr->bnx_intr_timer);
4946 		intr->bnx_intr_maylose = FALSE;
4947 		intr->bnx_rx_check_considx = 0;
4948 		intr->bnx_tx_check_considx = 0;
4949 	}
4950 
4951 	/*
4952 	 * Mask the interrupt when we start polling.
4953 	 */
4954 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4955 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4956 
4957 	/*
4958 	 * Acknowledge possible asserted interrupt.
4959 	 */
4960 	for (i = 0; i < BNX_INTR_MAX; ++i)
4961 		bnx_writembx(sc, sc->bnx_intr_data[i].bnx_intr_mbx, 1);
4962 
4963 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4964 		lwkt_serialize_handler_disable(
4965 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4966 	}
4967 }
4968 
4969 static int
4970 bnx_get_eaddr_mem(struct bnx_softc *sc, uint8_t ether_addr[])
4971 {
4972 	uint32_t mac_addr;
4973 	int ret = 1;
4974 
4975 	mac_addr = bnx_readmem_ind(sc, 0x0c14);
4976 	if ((mac_addr >> 16) == 0x484b) {
4977 		ether_addr[0] = (uint8_t)(mac_addr >> 8);
4978 		ether_addr[1] = (uint8_t)mac_addr;
4979 		mac_addr = bnx_readmem_ind(sc, 0x0c18);
4980 		ether_addr[2] = (uint8_t)(mac_addr >> 24);
4981 		ether_addr[3] = (uint8_t)(mac_addr >> 16);
4982 		ether_addr[4] = (uint8_t)(mac_addr >> 8);
4983 		ether_addr[5] = (uint8_t)mac_addr;
4984 		ret = 0;
4985 	}
4986 	return ret;
4987 }
4988 
4989 static int
4990 bnx_get_eaddr_nvram(struct bnx_softc *sc, uint8_t ether_addr[])
4991 {
4992 	int mac_offset = BGE_EE_MAC_OFFSET;
4993 
4994 	if (BNX_IS_5717_PLUS(sc)) {
4995 		int f;
4996 
4997 		f = pci_get_function(sc->bnx_dev);
4998 		if (f & 1)
4999 			mac_offset = BGE_EE_MAC_OFFSET_5717;
5000 		if (f > 1)
5001 			mac_offset += BGE_EE_MAC_OFFSET_5717_OFF;
5002 	}
5003 
5004 	return bnx_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN);
5005 }
5006 
5007 static int
5008 bnx_get_eaddr_eeprom(struct bnx_softc *sc, uint8_t ether_addr[])
5009 {
5010 	if (sc->bnx_flags & BNX_FLAG_NO_EEPROM)
5011 		return 1;
5012 
5013 	return bnx_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2,
5014 			       ETHER_ADDR_LEN);
5015 }
5016 
5017 static int
5018 bnx_get_eaddr(struct bnx_softc *sc, uint8_t eaddr[])
5019 {
5020 	static const bnx_eaddr_fcn_t bnx_eaddr_funcs[] = {
5021 		/* NOTE: Order is critical */
5022 		bnx_get_eaddr_mem,
5023 		bnx_get_eaddr_nvram,
5024 		bnx_get_eaddr_eeprom,
5025 		NULL
5026 	};
5027 	const bnx_eaddr_fcn_t *func;
5028 
5029 	for (func = bnx_eaddr_funcs; *func != NULL; ++func) {
5030 		if ((*func)(sc, eaddr) == 0)
5031 			break;
5032 	}
5033 	return (*func == NULL ? ENXIO : 0);
5034 }
5035 
5036 /*
5037  * NOTE: 'm' is not freed upon failure
5038  */
5039 static struct mbuf *
5040 bnx_defrag_shortdma(struct mbuf *m)
5041 {
5042 	struct mbuf *n;
5043 	int found;
5044 
5045 	/*
5046 	 * If device receive two back-to-back send BDs with less than
5047 	 * or equal to 8 total bytes then the device may hang.  The two
5048 	 * back-to-back send BDs must in the same frame for this failure
5049 	 * to occur.  Scan mbuf chains and see whether two back-to-back
5050 	 * send BDs are there.  If this is the case, allocate new mbuf
5051 	 * and copy the frame to workaround the silicon bug.
5052 	 */
5053 	for (n = m, found = 0; n != NULL; n = n->m_next) {
5054 		if (n->m_len < 8) {
5055 			found++;
5056 			if (found > 1)
5057 				break;
5058 			continue;
5059 		}
5060 		found = 0;
5061 	}
5062 
5063 	if (found > 1)
5064 		n = m_defrag(m, M_NOWAIT);
5065 	else
5066 		n = m;
5067 	return n;
5068 }
5069 
5070 static void
5071 bnx_stop_block(struct bnx_softc *sc, bus_size_t reg, uint32_t bit)
5072 {
5073 	int i;
5074 
5075 	BNX_CLRBIT(sc, reg, bit);
5076 	for (i = 0; i < BNX_TIMEOUT; i++) {
5077 		if ((CSR_READ_4(sc, reg) & bit) == 0)
5078 			return;
5079 		DELAY(100);
5080 	}
5081 }
5082 
5083 static void
5084 bnx_link_poll(struct bnx_softc *sc)
5085 {
5086 	uint32_t status;
5087 
5088 	status = CSR_READ_4(sc, BGE_MAC_STS);
5089 	if ((status & sc->bnx_link_chg) || sc->bnx_link_evt) {
5090 		sc->bnx_link_evt = 0;
5091 		sc->bnx_link_upd(sc, status);
5092 	}
5093 }
5094 
5095 static void
5096 bnx_enable_msi(struct bnx_softc *sc, boolean_t is_msix)
5097 {
5098 	uint32_t msi_mode;
5099 
5100 	msi_mode = CSR_READ_4(sc, BGE_MSI_MODE);
5101 	msi_mode |= BGE_MSIMODE_ENABLE;
5102 	/*
5103 	 * NOTE:
5104 	 * 5718-PG105-R says that "one shot" mode does not work
5105 	 * if MSI is used, however, it obviously works.
5106 	 */
5107 	msi_mode &= ~BGE_MSIMODE_ONESHOT_DISABLE;
5108 	if (is_msix)
5109 		msi_mode |= BGE_MSIMODE_MSIX_MULTIMODE;
5110 	else
5111 		msi_mode &= ~BGE_MSIMODE_MSIX_MULTIMODE;
5112 	CSR_WRITE_4(sc, BGE_MSI_MODE, msi_mode);
5113 }
5114 
5115 static uint32_t
5116 bnx_dma_swap_options(struct bnx_softc *sc)
5117 {
5118 	uint32_t dma_options;
5119 
5120 	dma_options = BGE_MODECTL_WORDSWAP_NONFRAME |
5121 	    BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA;
5122 #if BYTE_ORDER == BIG_ENDIAN
5123 	dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME;
5124 #endif
5125 	return dma_options;
5126 }
5127 
5128 static int
5129 bnx_setup_tso(struct bnx_tx_ring *txr, struct mbuf **mp,
5130     uint16_t *mss0, uint16_t *flags0)
5131 {
5132 	struct mbuf *m;
5133 	struct ip *ip;
5134 	struct tcphdr *th;
5135 	int thoff, iphlen, hoff, hlen;
5136 	uint16_t flags, mss;
5137 
5138 	m = *mp;
5139 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5140 
5141 	hoff = m->m_pkthdr.csum_lhlen;
5142 	iphlen = m->m_pkthdr.csum_iphlen;
5143 	thoff = m->m_pkthdr.csum_thlen;
5144 
5145 	KASSERT(hoff > 0, ("invalid ether header len"));
5146 	KASSERT(iphlen > 0, ("invalid ip header len"));
5147 	KASSERT(thoff > 0, ("invalid tcp header len"));
5148 
5149 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5150 		m = m_pullup(m, hoff + iphlen + thoff);
5151 		if (m == NULL) {
5152 			*mp = NULL;
5153 			return ENOBUFS;
5154 		}
5155 		*mp = m;
5156 	}
5157 	ip = mtodoff(m, struct ip *, hoff);
5158 	th = mtodoff(m, struct tcphdr *, hoff + iphlen);
5159 
5160 	mss = m->m_pkthdr.tso_segsz;
5161 	flags = BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA;
5162 
5163 	ip->ip_len = htons(mss + iphlen + thoff);
5164 	th->th_sum = 0;
5165 
5166 	hlen = (iphlen + thoff) >> 2;
5167 	mss |= ((hlen & 0x3) << 14);
5168 	flags |= ((hlen & 0xf8) << 7) | ((hlen & 0x4) << 2);
5169 
5170 	*mss0 = mss;
5171 	*flags0 = flags;
5172 
5173 	return 0;
5174 }
5175 
5176 static int
5177 bnx_create_tx_ring(struct bnx_tx_ring *txr)
5178 {
5179 	bus_size_t txmaxsz, txmaxsegsz;
5180 	int i, error;
5181 
5182 	lwkt_serialize_init(&txr->bnx_tx_serialize);
5183 
5184 	/*
5185 	 * Create DMA tag and maps for TX mbufs.
5186 	 */
5187 	if (txr->bnx_sc->bnx_flags & BNX_FLAG_TSO)
5188 		txmaxsz = IP_MAXPACKET + sizeof(struct ether_vlan_header);
5189 	else
5190 		txmaxsz = BNX_JUMBO_FRAMELEN;
5191 	if (txr->bnx_sc->bnx_asicrev == BGE_ASICREV_BCM57766)
5192 		txmaxsegsz = MCLBYTES;
5193 	else
5194 		txmaxsegsz = PAGE_SIZE;
5195 	error = bus_dma_tag_create(txr->bnx_sc->bnx_cdata.bnx_parent_tag,
5196 	    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
5197 	    txmaxsz, BNX_NSEG_NEW, txmaxsegsz,
5198 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5199 	    &txr->bnx_tx_mtag);
5200 	if (error) {
5201 		device_printf(txr->bnx_sc->bnx_dev,
5202 		    "could not create TX mbuf DMA tag\n");
5203 		return error;
5204 	}
5205 
5206 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
5207 		error = bus_dmamap_create(txr->bnx_tx_mtag,
5208 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5209 		    &txr->bnx_tx_buf[i].bnx_tx_dmamap);
5210 		if (error) {
5211 			int j;
5212 
5213 			for (j = 0; j < i; ++j) {
5214 				bus_dmamap_destroy(txr->bnx_tx_mtag,
5215 				    txr->bnx_tx_buf[j].bnx_tx_dmamap);
5216 			}
5217 			bus_dma_tag_destroy(txr->bnx_tx_mtag);
5218 			txr->bnx_tx_mtag = NULL;
5219 
5220 			device_printf(txr->bnx_sc->bnx_dev,
5221 			    "could not create TX mbuf DMA map\n");
5222 			return error;
5223 		}
5224 	}
5225 
5226 	/*
5227 	 * Create DMA stuffs for TX ring.
5228 	 */
5229 	error = bnx_dma_block_alloc(txr->bnx_sc, BGE_TX_RING_SZ,
5230 	    &txr->bnx_tx_ring_tag,
5231 	    &txr->bnx_tx_ring_map,
5232 	    (void *)&txr->bnx_tx_ring,
5233 	    &txr->bnx_tx_ring_paddr);
5234 	if (error) {
5235 		device_printf(txr->bnx_sc->bnx_dev,
5236 		    "could not create TX ring\n");
5237 		return error;
5238 	}
5239 
5240 	txr->bnx_tx_flags |= BNX_TX_FLAG_SHORTDMA;
5241 	txr->bnx_tx_wreg = BNX_TX_WREG_NSEGS;
5242 
5243 	return 0;
5244 }
5245 
5246 static void
5247 bnx_destroy_tx_ring(struct bnx_tx_ring *txr)
5248 {
5249 	/* Destroy TX mbuf DMA stuffs. */
5250 	if (txr->bnx_tx_mtag != NULL) {
5251 		int i;
5252 
5253 		for (i = 0; i < BGE_TX_RING_CNT; i++) {
5254 			KKASSERT(txr->bnx_tx_buf[i].bnx_tx_mbuf == NULL);
5255 			bus_dmamap_destroy(txr->bnx_tx_mtag,
5256 			    txr->bnx_tx_buf[i].bnx_tx_dmamap);
5257 		}
5258 		bus_dma_tag_destroy(txr->bnx_tx_mtag);
5259 	}
5260 
5261 	/* Destroy TX ring */
5262 	bnx_dma_block_free(txr->bnx_tx_ring_tag,
5263 	    txr->bnx_tx_ring_map, txr->bnx_tx_ring);
5264 }
5265 
5266 static int
5267 bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS)
5268 {
5269 	struct bnx_softc *sc = (void *)arg1;
5270 	struct ifnet *ifp = &sc->arpcom.ac_if;
5271 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5272 	int error, defrag, i;
5273 
5274 	if (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG)
5275 		defrag = 1;
5276 	else
5277 		defrag = 0;
5278 
5279 	error = sysctl_handle_int(oidp, &defrag, 0, req);
5280 	if (error || req->newptr == NULL)
5281 		return error;
5282 
5283 	ifnet_serialize_all(ifp);
5284 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
5285 		txr = &sc->bnx_tx_ring[i];
5286 		if (defrag)
5287 			txr->bnx_tx_flags |= BNX_TX_FLAG_FORCE_DEFRAG;
5288 		else
5289 			txr->bnx_tx_flags &= ~BNX_TX_FLAG_FORCE_DEFRAG;
5290 	}
5291 	ifnet_deserialize_all(ifp);
5292 
5293 	return 0;
5294 }
5295 
5296 static int
5297 bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS)
5298 {
5299 	struct bnx_softc *sc = (void *)arg1;
5300 	struct ifnet *ifp = &sc->arpcom.ac_if;
5301 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5302 	int error, tx_wreg, i;
5303 
5304 	tx_wreg = txr->bnx_tx_wreg;
5305 	error = sysctl_handle_int(oidp, &tx_wreg, 0, req);
5306 	if (error || req->newptr == NULL)
5307 		return error;
5308 
5309 	ifnet_serialize_all(ifp);
5310 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5311 		sc->bnx_tx_ring[i].bnx_tx_wreg = tx_wreg;
5312 	ifnet_deserialize_all(ifp);
5313 
5314 	return 0;
5315 }
5316 
5317 static int
5318 bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5319 {
5320 	int error;
5321 
5322 	lwkt_serialize_init(&ret->bnx_rx_ret_serialize);
5323 
5324 	/*
5325 	 * Create DMA stuffs for RX return ring.
5326 	 */
5327 	error = bnx_dma_block_alloc(ret->bnx_sc,
5328 	    BGE_RX_RTN_RING_SZ(BNX_RETURN_RING_CNT),
5329 	    &ret->bnx_rx_ret_ring_tag,
5330 	    &ret->bnx_rx_ret_ring_map,
5331 	    (void *)&ret->bnx_rx_ret_ring,
5332 	    &ret->bnx_rx_ret_ring_paddr);
5333 	if (error) {
5334 		device_printf(ret->bnx_sc->bnx_dev,
5335 		    "could not create RX ret ring\n");
5336 		return error;
5337 	}
5338 
5339 	/* Shadow standard ring's RX mbuf DMA tag */
5340 	ret->bnx_rx_mtag = ret->bnx_std->bnx_rx_mtag;
5341 
5342 	/*
5343 	 * Create tmp DMA map for RX mbufs.
5344 	 */
5345 	error = bus_dmamap_create(ret->bnx_rx_mtag, BUS_DMA_WAITOK,
5346 	    &ret->bnx_rx_tmpmap);
5347 	if (error) {
5348 		device_printf(ret->bnx_sc->bnx_dev,
5349 		    "could not create tmp RX mbuf DMA map\n");
5350 		ret->bnx_rx_mtag = NULL;
5351 		return error;
5352 	}
5353 	return 0;
5354 }
5355 
5356 static void
5357 bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5358 {
5359 	/* Destroy tmp RX mbuf DMA map */
5360 	if (ret->bnx_rx_mtag != NULL)
5361 		bus_dmamap_destroy(ret->bnx_rx_mtag, ret->bnx_rx_tmpmap);
5362 
5363 	/* Destroy RX return ring */
5364 	bnx_dma_block_free(ret->bnx_rx_ret_ring_tag,
5365 	    ret->bnx_rx_ret_ring_map, ret->bnx_rx_ret_ring);
5366 }
5367 
5368 static int
5369 bnx_alloc_intr(struct bnx_softc *sc)
5370 {
5371 	struct bnx_intr_data *intr;
5372 	u_int intr_flags;
5373 	int error;
5374 
5375 	if (sc->bnx_intr_cnt > 1) {
5376 		error = bnx_alloc_msix(sc);
5377 		if (error)
5378 			return error;
5379 		KKASSERT(sc->bnx_intr_type == PCI_INTR_TYPE_MSIX);
5380 		return 0;
5381 	}
5382 
5383 	KKASSERT(sc->bnx_intr_cnt == 1);
5384 
5385 	intr = &sc->bnx_intr_data[0];
5386 	intr->bnx_ret = &sc->bnx_rx_ret_ring[0];
5387 	intr->bnx_txr = &sc->bnx_tx_ring[0];
5388 	intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5389 	intr->bnx_intr_check = bnx_check_intr_rxtx;
5390 	intr->bnx_saved_status_tag = &intr->bnx_ret->bnx_saved_status_tag;
5391 
5392 	sc->bnx_intr_type = pci_alloc_1intr(sc->bnx_dev, bnx_msi_enable,
5393 	    &intr->bnx_intr_rid, &intr_flags);
5394 
5395 	intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev, SYS_RES_IRQ,
5396 	    &intr->bnx_intr_rid, intr_flags);
5397 	if (intr->bnx_intr_res == NULL) {
5398 		device_printf(sc->bnx_dev, "could not alloc interrupt\n");
5399 		return ENXIO;
5400 	}
5401 
5402 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI) {
5403 		bnx_enable_msi(sc, FALSE);
5404 		intr->bnx_intr_func = bnx_msi;
5405 		if (bootverbose)
5406 			device_printf(sc->bnx_dev, "oneshot MSI\n");
5407 	} else {
5408 		intr->bnx_intr_func = bnx_intr_legacy;
5409 	}
5410 	intr->bnx_intr_arg = sc;
5411 	intr->bnx_intr_cpuid = rman_get_cpuid(intr->bnx_intr_res);
5412 
5413 	intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5414 
5415 	return 0;
5416 }
5417 
5418 static int
5419 bnx_setup_intr(struct bnx_softc *sc)
5420 {
5421 	int error, i;
5422 
5423 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5424 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5425 
5426 		error = bus_setup_intr_descr(sc->bnx_dev, intr->bnx_intr_res,
5427 		    INTR_MPSAFE, intr->bnx_intr_func, intr->bnx_intr_arg,
5428 		    &intr->bnx_intr_hand, intr->bnx_intr_serialize,
5429 		    intr->bnx_intr_desc);
5430 		if (error) {
5431 			device_printf(sc->bnx_dev,
5432 			    "could not set up %dth intr\n", i);
5433 			bnx_teardown_intr(sc, i);
5434 			return error;
5435 		}
5436 	}
5437 	return 0;
5438 }
5439 
5440 static void
5441 bnx_teardown_intr(struct bnx_softc *sc, int cnt)
5442 {
5443 	int i;
5444 
5445 	for (i = 0; i < cnt; ++i) {
5446 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5447 
5448 		bus_teardown_intr(sc->bnx_dev, intr->bnx_intr_res,
5449 		    intr->bnx_intr_hand);
5450 	}
5451 }
5452 
5453 static void
5454 bnx_free_intr(struct bnx_softc *sc)
5455 {
5456 	if (sc->bnx_intr_type != PCI_INTR_TYPE_MSIX) {
5457 		struct bnx_intr_data *intr;
5458 
5459 		KKASSERT(sc->bnx_intr_cnt <= 1);
5460 		intr = &sc->bnx_intr_data[0];
5461 
5462 		if (intr->bnx_intr_res != NULL) {
5463 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
5464 			    intr->bnx_intr_rid, intr->bnx_intr_res);
5465 		}
5466 		if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI)
5467 			pci_release_msi(sc->bnx_dev);
5468 	} else {
5469 		bnx_free_msix(sc, TRUE);
5470 	}
5471 }
5472 
5473 static void
5474 bnx_setup_serialize(struct bnx_softc *sc)
5475 {
5476 	int i, j;
5477 
5478 	/*
5479 	 * Allocate serializer array
5480 	 */
5481 
5482 	/* Main + RX STD + TX + RX RET */
5483 	sc->bnx_serialize_cnt = 1 + 1 + sc->bnx_tx_ringcnt + sc->bnx_rx_retcnt;
5484 
5485 	sc->bnx_serialize =
5486 	    kmalloc(sc->bnx_serialize_cnt * sizeof(struct lwkt_serialize *),
5487 	        M_DEVBUF, M_WAITOK | M_ZERO);
5488 
5489 	/*
5490 	 * Setup serializers
5491 	 *
5492 	 * NOTE: Order is critical
5493 	 */
5494 
5495 	i = 0;
5496 
5497 	KKASSERT(i < sc->bnx_serialize_cnt);
5498 	sc->bnx_serialize[i++] = &sc->bnx_main_serialize;
5499 
5500 	KKASSERT(i < sc->bnx_serialize_cnt);
5501 	sc->bnx_serialize[i++] = &sc->bnx_rx_std_ring.bnx_rx_std_serialize;
5502 
5503 	for (j = 0; j < sc->bnx_rx_retcnt; ++j) {
5504 		KKASSERT(i < sc->bnx_serialize_cnt);
5505 		sc->bnx_serialize[i++] =
5506 		    &sc->bnx_rx_ret_ring[j].bnx_rx_ret_serialize;
5507 	}
5508 
5509 	for (j = 0; j < sc->bnx_tx_ringcnt; ++j) {
5510 		KKASSERT(i < sc->bnx_serialize_cnt);
5511 		sc->bnx_serialize[i++] =
5512 		    &sc->bnx_tx_ring[j].bnx_tx_serialize;
5513 	}
5514 
5515 	KKASSERT(i == sc->bnx_serialize_cnt);
5516 }
5517 
5518 static void
5519 bnx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
5520 {
5521 	struct bnx_softc *sc = ifp->if_softc;
5522 
5523 	ifnet_serialize_array_enter(sc->bnx_serialize,
5524 	    sc->bnx_serialize_cnt, slz);
5525 }
5526 
5527 static void
5528 bnx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5529 {
5530 	struct bnx_softc *sc = ifp->if_softc;
5531 
5532 	ifnet_serialize_array_exit(sc->bnx_serialize,
5533 	    sc->bnx_serialize_cnt, slz);
5534 }
5535 
5536 static int
5537 bnx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5538 {
5539 	struct bnx_softc *sc = ifp->if_softc;
5540 
5541 	return ifnet_serialize_array_try(sc->bnx_serialize,
5542 	    sc->bnx_serialize_cnt, slz);
5543 }
5544 
5545 #ifdef INVARIANTS
5546 
5547 static void
5548 bnx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
5549     boolean_t serialized)
5550 {
5551 	struct bnx_softc *sc = ifp->if_softc;
5552 
5553 	ifnet_serialize_array_assert(sc->bnx_serialize, sc->bnx_serialize_cnt,
5554 	    slz, serialized);
5555 }
5556 
5557 #endif	/* INVARIANTS */
5558 
5559 static void
5560 bnx_set_tick_cpuid(struct bnx_softc *sc, boolean_t polling)
5561 {
5562 	if (polling)
5563 		sc->bnx_tick_cpuid = 0; /* XXX */
5564 	else
5565 		sc->bnx_tick_cpuid = sc->bnx_intr_data[0].bnx_intr_cpuid;
5566 }
5567 
5568 static void
5569 bnx_rx_std_refill_ithread(void *xstd)
5570 {
5571 	struct bnx_rx_std_ring *std = xstd;
5572 	struct globaldata *gd = mycpu;
5573 
5574 	crit_enter_gd(gd);
5575 
5576 	while (!std->bnx_rx_std_stop) {
5577 		if (std->bnx_rx_std_refill) {
5578 			lwkt_serialize_handler_call(
5579 			    &std->bnx_rx_std_serialize,
5580 			    bnx_rx_std_refill, std, NULL);
5581 		}
5582 
5583 		crit_exit_gd(gd);
5584 		crit_enter_gd(gd);
5585 
5586 		atomic_poll_release_int(&std->bnx_rx_std_running);
5587 		cpu_mfence();
5588 
5589 		if (!std->bnx_rx_std_refill && !std->bnx_rx_std_stop) {
5590 			lwkt_deschedule_self(gd->gd_curthread);
5591 			lwkt_switch();
5592 		}
5593 	}
5594 
5595 	crit_exit_gd(gd);
5596 
5597 	wakeup(std);
5598 
5599 	lwkt_exit();
5600 }
5601 
5602 static void
5603 bnx_rx_std_refill(void *xstd, void *frame __unused)
5604 {
5605 	struct bnx_rx_std_ring *std = xstd;
5606 	int cnt, refill_mask;
5607 
5608 again:
5609 	cnt = 0;
5610 
5611 	cpu_lfence();
5612 	refill_mask = std->bnx_rx_std_refill;
5613 	atomic_clear_int(&std->bnx_rx_std_refill, refill_mask);
5614 
5615 	while (refill_mask) {
5616 		uint16_t check_idx = std->bnx_rx_std;
5617 		int ret_idx;
5618 
5619 		ret_idx = bsfl(refill_mask);
5620 		for (;;) {
5621 			struct bnx_rx_buf *rb;
5622 			int refilled;
5623 
5624 			BNX_INC(check_idx, BGE_STD_RX_RING_CNT);
5625 			rb = &std->bnx_rx_std_buf[check_idx];
5626 			refilled = rb->bnx_rx_refilled;
5627 			cpu_lfence();
5628 			if (refilled) {
5629 				bnx_setup_rxdesc_std(std, check_idx);
5630 				std->bnx_rx_std = check_idx;
5631 				++cnt;
5632 				if (cnt >= 8) {
5633 					atomic_subtract_int(
5634 					    &std->bnx_rx_std_used, cnt);
5635 					bnx_writembx(std->bnx_sc,
5636 					    BGE_MBX_RX_STD_PROD_LO,
5637 					    std->bnx_rx_std);
5638 					cnt = 0;
5639 				}
5640 			} else {
5641 				break;
5642 			}
5643 		}
5644 		refill_mask &= ~(1 << ret_idx);
5645 	}
5646 
5647 	if (cnt) {
5648 		atomic_subtract_int(&std->bnx_rx_std_used, cnt);
5649 		bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO,
5650 		    std->bnx_rx_std);
5651 	}
5652 
5653 	if (std->bnx_rx_std_refill)
5654 		goto again;
5655 
5656 	atomic_poll_release_int(&std->bnx_rx_std_running);
5657 	cpu_mfence();
5658 
5659 	if (std->bnx_rx_std_refill)
5660 		goto again;
5661 }
5662 
5663 static int
5664 bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS)
5665 {
5666 	struct bnx_softc *sc = (void *)arg1;
5667 	struct ifnet *ifp = &sc->arpcom.ac_if;
5668 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
5669 	int error, cntmax, i;
5670 
5671 	cntmax = ret->bnx_rx_cntmax;
5672 	error = sysctl_handle_int(oidp, &cntmax, 0, req);
5673 	if (error || req->newptr == NULL)
5674 		return error;
5675 
5676 	ifnet_serialize_all(ifp);
5677 
5678 	if ((cntmax * sc->bnx_rx_retcnt) >= BGE_STD_RX_RING_CNT / 2) {
5679 		error = EINVAL;
5680 		goto back;
5681 	}
5682 
5683 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5684 		sc->bnx_rx_ret_ring[i].bnx_rx_cntmax = cntmax;
5685 	error = 0;
5686 
5687 back:
5688 	ifnet_deserialize_all(ifp);
5689 
5690 	return error;
5691 }
5692 
5693 static void
5694 bnx_init_rss(struct bnx_softc *sc)
5695 {
5696 	uint8_t key[BGE_RSS_KEYREG_CNT * BGE_RSS_KEYREG_SIZE];
5697 	int i, j, r;
5698 
5699 	KKASSERT(BNX_RSS_ENABLED(sc));
5700 
5701 	/*
5702 	 * Configure RSS redirect table.
5703 	 */
5704 	if_ringmap_rdrtable(sc->bnx_rx_rmap, sc->bnx_rdr_table,
5705 	    BNX_RDRTABLE_SIZE);
5706 	r = 0;
5707 	for (j = 0; j < BGE_RSS_INDIR_TBL_CNT; ++j) {
5708 		uint32_t tbl = 0;
5709 
5710 		for (i = 0; i < BGE_RSS_INDIR_TBLENT_CNT; ++i) {
5711 			uint32_t q;
5712 
5713 			q = sc->bnx_rdr_table[r];
5714 			tbl |= q << (BGE_RSS_INDIR_TBLENT_SHIFT *
5715 			    (BGE_RSS_INDIR_TBLENT_CNT - i - 1));
5716 			++r;
5717 		}
5718 
5719 		BNX_RSS_DPRINTF(sc, 1, "tbl%d %08x\n", j, tbl);
5720 		CSR_WRITE_4(sc, BGE_RSS_INDIR_TBL(j), tbl);
5721 	}
5722 
5723 	toeplitz_get_key(key, sizeof(key));
5724 	for (i = 0; i < BGE_RSS_KEYREG_CNT; ++i) {
5725 		uint32_t keyreg;
5726 
5727 		keyreg = BGE_RSS_KEYREG_VAL(key, i);
5728 
5729 		BNX_RSS_DPRINTF(sc, 1, "key%d %08x\n", i, keyreg);
5730 		CSR_WRITE_4(sc, BGE_RSS_KEYREG(i), keyreg);
5731 	}
5732 }
5733 
5734 static void
5735 bnx_setup_ring_cnt(struct bnx_softc *sc)
5736 {
5737 	int msix_enable, msix_cnt, msix_ring, ring_max, ring_cnt;
5738 
5739 	/* One RX ring. */
5740 	sc->bnx_rx_rmap = if_ringmap_alloc(sc->bnx_dev, 1, 1);
5741 
5742 	if (netisr_ncpus == 1)
5743 		goto skip_rx;
5744 
5745 	msix_enable = device_getenv_int(sc->bnx_dev, "msix.enable",
5746 	    bnx_msix_enable);
5747 	if (!msix_enable)
5748 		goto skip_rx;
5749 
5750 	/*
5751 	 * One MSI-X vector is dedicated to status or single TX queue,
5752 	 * so make sure that there are enough MSI-X vectors.
5753 	 */
5754 	msix_cnt = pci_msix_count(sc->bnx_dev);
5755 	if (msix_cnt <= 1)
5756 		goto skip_rx;
5757 	if (bootverbose)
5758 		device_printf(sc->bnx_dev, "MSI-X count %d\n", msix_cnt);
5759 	msix_ring = msix_cnt - 1;
5760 
5761 	/*
5762 	 * Setup RX ring count
5763 	 */
5764 	ring_max = BNX_RX_RING_MAX;
5765 	if (ring_max > msix_ring)
5766 		ring_max = msix_ring;
5767 	ring_cnt = device_getenv_int(sc->bnx_dev, "rx_rings", bnx_rx_rings);
5768 
5769 	if_ringmap_free(sc->bnx_rx_rmap);
5770 	sc->bnx_rx_rmap = if_ringmap_alloc(sc->bnx_dev, ring_cnt, ring_max);
5771 
5772 skip_rx:
5773 	sc->bnx_rx_retcnt = if_ringmap_count(sc->bnx_rx_rmap);
5774 
5775 	/*
5776 	 * Setup TX ring count
5777 	 *
5778 	 * Currently only BCM5719 and BCM5720 support multiple TX rings
5779 	 * and the TX ring count must be less than the RX ring count.
5780 	 */
5781 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
5782 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
5783 		ring_max = BNX_TX_RING_MAX;
5784 		if (ring_max > sc->bnx_rx_retcnt)
5785 			ring_max = sc->bnx_rx_retcnt;
5786 		ring_cnt = device_getenv_int(sc->bnx_dev, "tx_rings",
5787 		    bnx_tx_rings);
5788 	} else {
5789 		ring_max = 1;
5790 		ring_cnt = 1;
5791 	}
5792 	sc->bnx_tx_rmap = if_ringmap_alloc(sc->bnx_dev, ring_cnt, ring_max);
5793 	if_ringmap_align(sc->bnx_dev, sc->bnx_rx_rmap, sc->bnx_tx_rmap);
5794 
5795 	sc->bnx_tx_ringcnt = if_ringmap_count(sc->bnx_tx_rmap);
5796 	KASSERT(sc->bnx_tx_ringcnt <= sc->bnx_rx_retcnt,
5797 	    ("invalid TX ring count %d and RX ring count %d",
5798 	     sc->bnx_tx_ringcnt, sc->bnx_rx_retcnt));
5799 
5800 	/*
5801 	 * Setup interrupt count.
5802 	 */
5803 	if (sc->bnx_rx_retcnt == 1) {
5804 		sc->bnx_intr_cnt = 1;
5805 	} else {
5806 		/*
5807 		 * We need one extra MSI-X vector for link status or
5808 		 * TX ring (if only one TX ring is enabled).
5809 		 */
5810 		sc->bnx_intr_cnt = sc->bnx_rx_retcnt + 1;
5811 	}
5812 	KKASSERT(sc->bnx_intr_cnt <= BNX_INTR_MAX);
5813 
5814 	if (bootverbose) {
5815 		device_printf(sc->bnx_dev, "intr count %d, "
5816 		    "RX ring %d, TX ring %d\n", sc->bnx_intr_cnt,
5817 		    sc->bnx_rx_retcnt, sc->bnx_tx_ringcnt);
5818 	}
5819 }
5820 
5821 static int
5822 bnx_alloc_msix(struct bnx_softc *sc)
5823 {
5824 	struct bnx_intr_data *intr;
5825 	boolean_t setup = FALSE;
5826 	int error, i;
5827 
5828 	KKASSERT(sc->bnx_intr_cnt > 1);
5829 	KKASSERT(sc->bnx_intr_cnt == sc->bnx_rx_retcnt + 1);
5830 
5831 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
5832 		/*
5833 		 * Link status
5834 		 */
5835 		intr = &sc->bnx_intr_data[0];
5836 
5837 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5838 		intr->bnx_saved_status_tag = &sc->bnx_saved_status_tag;
5839 
5840 		intr->bnx_intr_func = bnx_msix_status;
5841 		intr->bnx_intr_arg = sc;
5842 		intr->bnx_intr_cpuid = 0; /* XXX */
5843 
5844 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5845 		    "%s sts", device_get_nameunit(sc->bnx_dev));
5846 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5847 
5848 		/*
5849 		 * RX/TX rings
5850 		 */
5851 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
5852 			int idx = i - 1;
5853 
5854 			intr = &sc->bnx_intr_data[i];
5855 
5856 			KKASSERT(idx < sc->bnx_rx_retcnt);
5857 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
5858 			if (idx < sc->bnx_tx_ringcnt) {
5859 				intr->bnx_txr = &sc->bnx_tx_ring[idx];
5860 				intr->bnx_ret->bnx_txr = intr->bnx_txr;
5861 			}
5862 
5863 			intr->bnx_intr_serialize =
5864 			    &intr->bnx_ret->bnx_rx_ret_serialize;
5865 			intr->bnx_saved_status_tag =
5866 			    &intr->bnx_ret->bnx_saved_status_tag;
5867 
5868 			intr->bnx_intr_arg = intr->bnx_ret;
5869 			intr->bnx_intr_cpuid =
5870 			    if_ringmap_cpumap(sc->bnx_rx_rmap, idx);
5871 			KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5872 
5873 			if (intr->bnx_txr == NULL) {
5874 				intr->bnx_intr_check = bnx_check_intr_rx;
5875 				intr->bnx_intr_func = bnx_msix_rx;
5876 				ksnprintf(intr->bnx_intr_desc0,
5877 				    sizeof(intr->bnx_intr_desc0), "%s rx%d",
5878 				    device_get_nameunit(sc->bnx_dev), idx);
5879 			} else {
5880 #ifdef INVARIANTS
5881 				int tx_cpuid;
5882 #endif
5883 
5884 				intr->bnx_intr_check = bnx_check_intr_rxtx;
5885 				intr->bnx_intr_func = bnx_msix_rxtx;
5886 				ksnprintf(intr->bnx_intr_desc0,
5887 				    sizeof(intr->bnx_intr_desc0), "%s rxtx%d",
5888 				    device_get_nameunit(sc->bnx_dev), idx);
5889 
5890 #ifdef INVARIANTS
5891 				tx_cpuid = if_ringmap_cpumap(sc->bnx_tx_rmap,
5892 				    idx);
5893 				KASSERT(intr->bnx_intr_cpuid == tx_cpuid,
5894 				    ("RX intr cpu%d, TX intr cpu%d, mismatch",
5895 				     intr->bnx_intr_cpuid, tx_cpuid));
5896 #endif
5897 				intr->bnx_txr->bnx_tx_cpuid =
5898 				    intr->bnx_intr_cpuid;
5899 			}
5900 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
5901 
5902 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
5903 		}
5904 	} else {
5905 		/*
5906 		 * TX ring0 and link status
5907 		 */
5908 		intr = &sc->bnx_intr_data[0];
5909 
5910 		intr->bnx_txr = &sc->bnx_tx_ring[0];
5911 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5912 		intr->bnx_intr_check = bnx_check_intr_tx;
5913 		intr->bnx_saved_status_tag =
5914 		    &intr->bnx_txr->bnx_saved_status_tag;
5915 
5916 		intr->bnx_intr_func = bnx_msix_tx_status;
5917 		intr->bnx_intr_arg = intr->bnx_txr;
5918 		intr->bnx_intr_cpuid = if_ringmap_cpumap(sc->bnx_tx_rmap, 0);
5919 		KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5920 
5921 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5922 		    "%s ststx", device_get_nameunit(sc->bnx_dev));
5923 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5924 
5925 		intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5926 
5927 		/*
5928 		 * RX rings
5929 		 */
5930 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
5931 			int idx = i - 1;
5932 
5933 			intr = &sc->bnx_intr_data[i];
5934 
5935 			KKASSERT(idx < sc->bnx_rx_retcnt);
5936 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
5937 			intr->bnx_intr_serialize =
5938 			    &intr->bnx_ret->bnx_rx_ret_serialize;
5939 			intr->bnx_intr_check = bnx_check_intr_rx;
5940 			intr->bnx_saved_status_tag =
5941 			    &intr->bnx_ret->bnx_saved_status_tag;
5942 
5943 			intr->bnx_intr_func = bnx_msix_rx;
5944 			intr->bnx_intr_arg = intr->bnx_ret;
5945 			intr->bnx_intr_cpuid =
5946 			    if_ringmap_cpumap(sc->bnx_rx_rmap, idx);
5947 			KKASSERT(intr->bnx_intr_cpuid < netisr_ncpus);
5948 
5949 			ksnprintf(intr->bnx_intr_desc0,
5950 			    sizeof(intr->bnx_intr_desc0), "%s rx%d",
5951 			    device_get_nameunit(sc->bnx_dev), idx);
5952 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
5953 
5954 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
5955 		}
5956 	}
5957 
5958 	if (BNX_IS_5717_PLUS(sc)) {
5959 		sc->bnx_msix_mem_rid = PCIR_BAR(4);
5960 	} else {
5961 		if (sc->bnx_res2 == NULL)
5962 			sc->bnx_msix_mem_rid = PCIR_BAR(2);
5963 	}
5964 	if (sc->bnx_msix_mem_rid != 0) {
5965 		sc->bnx_msix_mem_res = bus_alloc_resource_any(sc->bnx_dev,
5966 		    SYS_RES_MEMORY, &sc->bnx_msix_mem_rid, RF_ACTIVE);
5967 		if (sc->bnx_msix_mem_res == NULL) {
5968 			device_printf(sc->bnx_dev,
5969 			    "could not alloc MSI-X table\n");
5970 			return ENXIO;
5971 		}
5972 	}
5973 
5974 	bnx_enable_msi(sc, TRUE);
5975 
5976 	error = pci_setup_msix(sc->bnx_dev);
5977 	if (error) {
5978 		device_printf(sc->bnx_dev, "could not setup MSI-X\n");
5979 		goto back;
5980 	}
5981 	setup = TRUE;
5982 
5983 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5984 		intr = &sc->bnx_intr_data[i];
5985 
5986 		error = pci_alloc_msix_vector(sc->bnx_dev, i,
5987 		    &intr->bnx_intr_rid, intr->bnx_intr_cpuid);
5988 		if (error) {
5989 			device_printf(sc->bnx_dev,
5990 			    "could not alloc MSI-X %d on cpu%d\n",
5991 			    i, intr->bnx_intr_cpuid);
5992 			goto back;
5993 		}
5994 
5995 		intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev,
5996 		    SYS_RES_IRQ, &intr->bnx_intr_rid, RF_ACTIVE);
5997 		if (intr->bnx_intr_res == NULL) {
5998 			device_printf(sc->bnx_dev,
5999 			    "could not alloc MSI-X %d resource\n", i);
6000 			error = ENXIO;
6001 			goto back;
6002 		}
6003 	}
6004 
6005 	pci_enable_msix(sc->bnx_dev);
6006 	sc->bnx_intr_type = PCI_INTR_TYPE_MSIX;
6007 back:
6008 	if (error)
6009 		bnx_free_msix(sc, setup);
6010 	return error;
6011 }
6012 
6013 static void
6014 bnx_free_msix(struct bnx_softc *sc, boolean_t setup)
6015 {
6016 	int i;
6017 
6018 	KKASSERT(sc->bnx_intr_cnt > 1);
6019 
6020 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
6021 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
6022 
6023 		if (intr->bnx_intr_res != NULL) {
6024 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
6025 			    intr->bnx_intr_rid, intr->bnx_intr_res);
6026 		}
6027 		if (intr->bnx_intr_rid >= 0) {
6028 			pci_release_msix_vector(sc->bnx_dev,
6029 			    intr->bnx_intr_rid);
6030 		}
6031 	}
6032 	if (setup)
6033 		pci_teardown_msix(sc->bnx_dev);
6034 }
6035 
6036 static void
6037 bnx_rx_std_refill_sched_ipi(void *xret)
6038 {
6039 	struct bnx_rx_ret_ring *ret = xret;
6040 	struct bnx_rx_std_ring *std = ret->bnx_std;
6041 	struct globaldata *gd = mycpu;
6042 
6043 	crit_enter_gd(gd);
6044 
6045 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6046 	cpu_sfence();
6047 
6048 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6049 	lwkt_schedule(std->bnx_rx_std_ithread);
6050 
6051 	crit_exit_gd(gd);
6052 }
6053 
6054 static void
6055 bnx_rx_std_refill_stop(void *xstd)
6056 {
6057 	struct bnx_rx_std_ring *std = xstd;
6058 	struct globaldata *gd = mycpu;
6059 
6060 	crit_enter_gd(gd);
6061 
6062 	std->bnx_rx_std_stop = 1;
6063 	cpu_sfence();
6064 
6065 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6066 	lwkt_schedule(std->bnx_rx_std_ithread);
6067 
6068 	crit_exit_gd(gd);
6069 }
6070 
6071 static void
6072 bnx_serialize_skipmain(struct bnx_softc *sc)
6073 {
6074 	lwkt_serialize_array_enter(sc->bnx_serialize,
6075 	    sc->bnx_serialize_cnt, 1);
6076 }
6077 
6078 static void
6079 bnx_deserialize_skipmain(struct bnx_softc *sc)
6080 {
6081 	lwkt_serialize_array_exit(sc->bnx_serialize,
6082 	    sc->bnx_serialize_cnt, 1);
6083 }
6084 
6085 static void
6086 bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *ret,
6087     struct bnx_rx_std_ring *std)
6088 {
6089 	struct globaldata *gd = mycpu;
6090 
6091 	ret->bnx_rx_cnt = 0;
6092 	cpu_sfence();
6093 
6094 	crit_enter_gd(gd);
6095 
6096 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6097 	cpu_sfence();
6098 	if (atomic_poll_acquire_int(&std->bnx_rx_std_running)) {
6099 		if (std->bnx_rx_std_ithread->td_gd == gd) {
6100 			lwkt_schedule(std->bnx_rx_std_ithread);
6101 		} else {
6102 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
6103 			    bnx_rx_std_refill_sched_ipi, ret);
6104 		}
6105 	}
6106 
6107 	crit_exit_gd(gd);
6108 }
6109 
6110 static struct pktinfo *
6111 bnx_rss_info(struct pktinfo *pi, const struct bge_rx_bd *cur_rx)
6112 {
6113 	/* Don't pick up IPv6 packet */
6114 	if (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6)
6115 		return NULL;
6116 
6117 	/* Don't pick up IP packet w/o IP checksum */
6118 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) == 0 ||
6119 	    (cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK))
6120 		return NULL;
6121 
6122 	/* Don't pick up IP packet w/o TCP/UDP checksum */
6123 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) == 0)
6124 		return NULL;
6125 
6126 	/* May be IP fragment */
6127 	if (cur_rx->bge_tcp_udp_csum != 0xffff)
6128 		return NULL;
6129 
6130 	if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_IS_TCP)
6131 		pi->pi_l3proto = IPPROTO_TCP;
6132 	else
6133 		pi->pi_l3proto = IPPROTO_UDP;
6134 	pi->pi_netisr = NETISR_IP;
6135 	pi->pi_flags = 0;
6136 
6137 	return pi;
6138 }
6139 
6140 static void
6141 bnx_sig_pre_reset(struct bnx_softc *sc, int type)
6142 {
6143 	if (type == BNX_RESET_START || type == BNX_RESET_SUSPEND)
6144 		bnx_ape_driver_state_change(sc, type);
6145 }
6146 
6147 static void
6148 bnx_sig_post_reset(struct bnx_softc *sc, int type)
6149 {
6150 	if (type == BNX_RESET_SHUTDOWN)
6151 		bnx_ape_driver_state_change(sc, type);
6152 }
6153 
6154 /*
6155  * Clear all stale locks and select the lock for this driver instance.
6156  */
6157 static void
6158 bnx_ape_lock_init(struct bnx_softc *sc)
6159 {
6160 	uint32_t bit, regbase;
6161 	int i;
6162 
6163 	regbase = BGE_APE_PER_LOCK_GRANT;
6164 
6165 	/* Clear any stale locks. */
6166 	for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) {
6167 		switch (i) {
6168 		case BGE_APE_LOCK_PHY0:
6169 		case BGE_APE_LOCK_PHY1:
6170 		case BGE_APE_LOCK_PHY2:
6171 		case BGE_APE_LOCK_PHY3:
6172 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6173 			break;
6174 
6175 		default:
6176 			if (sc->bnx_func_addr == 0)
6177 				bit = BGE_APE_LOCK_GRANT_DRIVER0;
6178 			else
6179 				bit = 1 << sc->bnx_func_addr;
6180 			break;
6181 		}
6182 		APE_WRITE_4(sc, regbase + 4 * i, bit);
6183 	}
6184 
6185 	/* Select the PHY lock based on the device's function number. */
6186 	switch (sc->bnx_func_addr) {
6187 	case 0:
6188 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY0;
6189 		break;
6190 
6191 	case 1:
6192 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY1;
6193 		break;
6194 
6195 	case 2:
6196 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY2;
6197 		break;
6198 
6199 	case 3:
6200 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY3;
6201 		break;
6202 
6203 	default:
6204 		device_printf(sc->bnx_dev,
6205 		    "PHY lock not supported on this function\n");
6206 		break;
6207 	}
6208 }
6209 
6210 /*
6211  * Check for APE firmware, set flags, and print version info.
6212  */
6213 static void
6214 bnx_ape_read_fw_ver(struct bnx_softc *sc)
6215 {
6216 	const char *fwtype;
6217 	uint32_t apedata, features;
6218 
6219 	/* Check for a valid APE signature in shared memory. */
6220 	apedata = APE_READ_4(sc, BGE_APE_SEG_SIG);
6221 	if (apedata != BGE_APE_SEG_SIG_MAGIC) {
6222 		device_printf(sc->bnx_dev, "no APE signature\n");
6223 		sc->bnx_mfw_flags &= ~BNX_MFW_ON_APE;
6224 		return;
6225 	}
6226 
6227 	/* Check if APE firmware is running. */
6228 	apedata = APE_READ_4(sc, BGE_APE_FW_STATUS);
6229 	if ((apedata & BGE_APE_FW_STATUS_READY) == 0) {
6230 		device_printf(sc->bnx_dev, "APE signature found "
6231 		    "but FW status not ready! 0x%08x\n", apedata);
6232 		return;
6233 	}
6234 
6235 	sc->bnx_mfw_flags |= BNX_MFW_ON_APE;
6236 
6237 	/* Fetch the APE firwmare type and version. */
6238 	apedata = APE_READ_4(sc, BGE_APE_FW_VERSION);
6239 	features = APE_READ_4(sc, BGE_APE_FW_FEATURES);
6240 	if (features & BGE_APE_FW_FEATURE_NCSI) {
6241 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_NCSI;
6242 		fwtype = "NCSI";
6243 	} else if (features & BGE_APE_FW_FEATURE_DASH) {
6244 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_DASH;
6245 		fwtype = "DASH";
6246 	} else {
6247 		fwtype = "UNKN";
6248 	}
6249 
6250 	/* Print the APE firmware version. */
6251 	device_printf(sc->bnx_dev, "APE FW version: %s v%d.%d.%d.%d\n",
6252 	    fwtype,
6253 	    (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT,
6254 	    (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT,
6255 	    (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT,
6256 	    (apedata & BGE_APE_FW_VERSION_BLDMSK));
6257 }
6258 
6259 static int
6260 bnx_ape_lock(struct bnx_softc *sc, int locknum)
6261 {
6262 	uint32_t bit, gnt, req, status;
6263 	int i, off;
6264 
6265 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6266 		return 0;
6267 
6268 	/* Lock request/grant registers have different bases. */
6269 	req = BGE_APE_PER_LOCK_REQ;
6270 	gnt = BGE_APE_PER_LOCK_GRANT;
6271 
6272 	off = 4 * locknum;
6273 
6274 	switch (locknum) {
6275 	case BGE_APE_LOCK_GPIO:
6276 		/* Lock required when using GPIO. */
6277 		if (sc->bnx_func_addr == 0)
6278 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6279 		else
6280 			bit = 1 << sc->bnx_func_addr;
6281 		break;
6282 
6283 	case BGE_APE_LOCK_GRC:
6284 		/* Lock required to reset the device. */
6285 		if (sc->bnx_func_addr == 0)
6286 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6287 		else
6288 			bit = 1 << sc->bnx_func_addr;
6289 		break;
6290 
6291 	case BGE_APE_LOCK_MEM:
6292 		/* Lock required when accessing certain APE memory. */
6293 		if (sc->bnx_func_addr == 0)
6294 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6295 		else
6296 			bit = 1 << sc->bnx_func_addr;
6297 		break;
6298 
6299 	case BGE_APE_LOCK_PHY0:
6300 	case BGE_APE_LOCK_PHY1:
6301 	case BGE_APE_LOCK_PHY2:
6302 	case BGE_APE_LOCK_PHY3:
6303 		/* Lock required when accessing PHYs. */
6304 		bit = BGE_APE_LOCK_REQ_DRIVER0;
6305 		break;
6306 
6307 	default:
6308 		return EINVAL;
6309 	}
6310 
6311 	/* Request a lock. */
6312 	APE_WRITE_4(sc, req + off, bit);
6313 
6314 	/* Wait up to 1 second to acquire lock. */
6315 	for (i = 0; i < 20000; i++) {
6316 		status = APE_READ_4(sc, gnt + off);
6317 		if (status == bit)
6318 			break;
6319 		DELAY(50);
6320 	}
6321 
6322 	/* Handle any errors. */
6323 	if (status != bit) {
6324 		if_printf(&sc->arpcom.ac_if, "APE lock %d request failed! "
6325 		    "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n",
6326 		    locknum, req + off, bit & 0xFFFF, gnt + off,
6327 		    status & 0xFFFF);
6328 		/* Revoke the lock request. */
6329 		APE_WRITE_4(sc, gnt + off, bit);
6330 		return EBUSY;
6331 	}
6332 
6333 	return 0;
6334 }
6335 
6336 static void
6337 bnx_ape_unlock(struct bnx_softc *sc, int locknum)
6338 {
6339 	uint32_t bit, gnt;
6340 	int off;
6341 
6342 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6343 		return;
6344 
6345 	gnt = BGE_APE_PER_LOCK_GRANT;
6346 
6347 	off = 4 * locknum;
6348 
6349 	switch (locknum) {
6350 	case BGE_APE_LOCK_GPIO:
6351 		if (sc->bnx_func_addr == 0)
6352 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6353 		else
6354 			bit = 1 << sc->bnx_func_addr;
6355 		break;
6356 
6357 	case BGE_APE_LOCK_GRC:
6358 		if (sc->bnx_func_addr == 0)
6359 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6360 		else
6361 			bit = 1 << sc->bnx_func_addr;
6362 		break;
6363 
6364 	case BGE_APE_LOCK_MEM:
6365 		if (sc->bnx_func_addr == 0)
6366 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6367 		else
6368 			bit = 1 << sc->bnx_func_addr;
6369 		break;
6370 
6371 	case BGE_APE_LOCK_PHY0:
6372 	case BGE_APE_LOCK_PHY1:
6373 	case BGE_APE_LOCK_PHY2:
6374 	case BGE_APE_LOCK_PHY3:
6375 		bit = BGE_APE_LOCK_GRANT_DRIVER0;
6376 		break;
6377 
6378 	default:
6379 		return;
6380 	}
6381 
6382 	APE_WRITE_4(sc, gnt + off, bit);
6383 }
6384 
6385 /*
6386  * Send an event to the APE firmware.
6387  */
6388 static void
6389 bnx_ape_send_event(struct bnx_softc *sc, uint32_t event)
6390 {
6391 	uint32_t apedata;
6392 	int i;
6393 
6394 	/* NCSI does not support APE events. */
6395 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6396 		return;
6397 
6398 	/* Wait up to 1ms for APE to service previous event. */
6399 	for (i = 10; i > 0; i--) {
6400 		if (bnx_ape_lock(sc, BGE_APE_LOCK_MEM) != 0)
6401 			break;
6402 		apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS);
6403 		if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) {
6404 			APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event |
6405 			    BGE_APE_EVENT_STATUS_EVENT_PENDING);
6406 			bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6407 			APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1);
6408 			break;
6409 		}
6410 		bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6411 		DELAY(100);
6412 	}
6413 	if (i == 0) {
6414 		if_printf(&sc->arpcom.ac_if,
6415 		    "APE event 0x%08x send timed out\n", event);
6416 	}
6417 }
6418 
6419 static void
6420 bnx_ape_driver_state_change(struct bnx_softc *sc, int kind)
6421 {
6422 	uint32_t apedata, event;
6423 
6424 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6425 		return;
6426 
6427 	switch (kind) {
6428 	case BNX_RESET_START:
6429 		/* If this is the first load, clear the load counter. */
6430 		apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG);
6431 		if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) {
6432 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0);
6433 		} else {
6434 			apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT);
6435 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata);
6436 		}
6437 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG,
6438 		    BGE_APE_HOST_SEG_SIG_MAGIC);
6439 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN,
6440 		    BGE_APE_HOST_SEG_LEN_MAGIC);
6441 
6442 		/* Add some version info if bnx(4) supports it. */
6443 		APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID,
6444 		    BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0));
6445 		APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR,
6446 		    BGE_APE_HOST_BEHAV_NO_PHYLOCK);
6447 		APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS,
6448 		    BGE_APE_HOST_HEARTBEAT_INT_DISABLE);
6449 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6450 		    BGE_APE_HOST_DRVR_STATE_START);
6451 		event = BGE_APE_EVENT_STATUS_STATE_START;
6452 		break;
6453 
6454 	case BNX_RESET_SHUTDOWN:
6455 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6456 		    BGE_APE_HOST_DRVR_STATE_UNLOAD);
6457 		event = BGE_APE_EVENT_STATUS_STATE_UNLOAD;
6458 		break;
6459 
6460 	case BNX_RESET_SUSPEND:
6461 		event = BGE_APE_EVENT_STATUS_STATE_SUSPEND;
6462 		break;
6463 
6464 	default:
6465 		return;
6466 	}
6467 
6468 	bnx_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT |
6469 	    BGE_APE_EVENT_STATUS_STATE_CHNGE);
6470 }
6471