xref: /dragonfly/sys/dev/netif/bnx/if_bnx.c (revision 38b5d46c)
1 /*
2  * Copyright (c) 2001 Wind River Systems
3  * Copyright (c) 1997, 1998, 1999, 2001
4  *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by Bill Paul.
17  * 4. Neither the name of the author nor the names of any co-contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * $FreeBSD: src/sys/dev/bge/if_bge.c,v 1.3.2.39 2005/07/03 03:41:18 silby Exp $
34  */
35 
36 #include "opt_bnx.h"
37 #include "opt_ifpoll.h"
38 
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/endian.h>
42 #include <sys/kernel.h>
43 #include <sys/interrupt.h>
44 #include <sys/mbuf.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/rman.h>
48 #include <sys/serialize.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/sysctl.h>
52 
53 #include <netinet/ip.h>
54 #include <netinet/tcp.h>
55 
56 #include <net/bpf.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_poll.h>
63 #include <net/if_types.h>
64 #include <net/ifq_var.h>
65 #include <net/toeplitz.h>
66 #include <net/toeplitz2.h>
67 #include <net/vlan/if_vlan_var.h>
68 #include <net/vlan/if_vlan_ether.h>
69 
70 #include <dev/netif/mii_layer/mii.h>
71 #include <dev/netif/mii_layer/miivar.h>
72 #include <dev/netif/mii_layer/brgphyreg.h>
73 
74 #include "pcidevs.h"
75 #include <bus/pci/pcireg.h>
76 #include <bus/pci/pcivar.h>
77 
78 #include <dev/netif/bge/if_bgereg.h>
79 #include <dev/netif/bnx/if_bnxvar.h>
80 
81 /* "device miibus" required.  See GENERIC if you get errors here. */
82 #include "miibus_if.h"
83 
84 #define BNX_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
85 
86 #define	BNX_RESET_SHUTDOWN	0
87 #define	BNX_RESET_START		1
88 #define	BNX_RESET_SUSPEND	2
89 
90 #define BNX_INTR_CKINTVL	((10 * hz) / 1000)	/* 10ms */
91 
92 #ifdef BNX_RSS_DEBUG
93 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...) \
94 do { \
95 	if (sc->bnx_rss_debug >= lvl) \
96 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
97 } while (0)
98 #else	/* !BNX_RSS_DEBUG */
99 #define BNX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
100 #endif	/* BNX_RSS_DEBUG */
101 
102 static const struct bnx_type {
103 	uint16_t		bnx_vid;
104 	uint16_t		bnx_did;
105 	char			*bnx_name;
106 } bnx_devs[] = {
107 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717,
108 		"Broadcom BCM5717 Gigabit Ethernet" },
109 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5717C,
110 		"Broadcom BCM5717C Gigabit Ethernet" },
111 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5718,
112 		"Broadcom BCM5718 Gigabit Ethernet" },
113 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5719,
114 		"Broadcom BCM5719 Gigabit Ethernet" },
115 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5720_ALT,
116 		"Broadcom BCM5720 Gigabit Ethernet" },
117 
118 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5725,
119 		"Broadcom BCM5725 Gigabit Ethernet" },
120 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5727,
121 		"Broadcom BCM5727 Gigabit Ethernet" },
122 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM5762,
123 		"Broadcom BCM5762 Gigabit Ethernet" },
124 
125 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57761,
126 		"Broadcom BCM57761 Gigabit Ethernet" },
127 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57762,
128 		"Broadcom BCM57762 Gigabit Ethernet" },
129 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57765,
130 		"Broadcom BCM57765 Gigabit Ethernet" },
131 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57766,
132 		"Broadcom BCM57766 Gigabit Ethernet" },
133 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57781,
134 		"Broadcom BCM57781 Gigabit Ethernet" },
135 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57782,
136 		"Broadcom BCM57782 Gigabit Ethernet" },
137 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57785,
138 		"Broadcom BCM57785 Gigabit Ethernet" },
139 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57786,
140 		"Broadcom BCM57786 Gigabit Ethernet" },
141 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57791,
142 		"Broadcom BCM57791 Fast Ethernet" },
143 	{ PCI_VENDOR_BROADCOM, PCI_PRODUCT_BROADCOM_BCM57795,
144 		"Broadcom BCM57795 Fast Ethernet" },
145 
146 	{ 0, 0, NULL }
147 };
148 
149 static const int bnx_tx_mailbox[BNX_TX_RING_MAX] = {
150 	BGE_MBX_TX_HOST_PROD0_LO,
151 	BGE_MBX_TX_HOST_PROD0_HI,
152 	BGE_MBX_TX_HOST_PROD1_LO,
153 	BGE_MBX_TX_HOST_PROD1_HI
154 };
155 
156 #define BNX_IS_JUMBO_CAPABLE(sc)	((sc)->bnx_flags & BNX_FLAG_JUMBO)
157 #define BNX_IS_5717_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_5717_PLUS)
158 #define BNX_IS_57765_PLUS(sc)		((sc)->bnx_flags & BNX_FLAG_57765_PLUS)
159 #define BNX_IS_57765_FAMILY(sc)	 \
160 	((sc)->bnx_flags & BNX_FLAG_57765_FAMILY)
161 
162 typedef int	(*bnx_eaddr_fcn_t)(struct bnx_softc *, uint8_t[]);
163 
164 static int	bnx_probe(device_t);
165 static int	bnx_attach(device_t);
166 static int	bnx_detach(device_t);
167 static void	bnx_shutdown(device_t);
168 static int	bnx_suspend(device_t);
169 static int	bnx_resume(device_t);
170 static int	bnx_miibus_readreg(device_t, int, int);
171 static int	bnx_miibus_writereg(device_t, int, int, int);
172 static void	bnx_miibus_statchg(device_t);
173 
174 static int	bnx_handle_status(struct bnx_softc *);
175 #ifdef IFPOLL_ENABLE
176 static void	bnx_npoll(struct ifnet *, struct ifpoll_info *);
177 static void	bnx_npoll_rx(struct ifnet *, void *, int);
178 static void	bnx_npoll_tx(struct ifnet *, void *, int);
179 static void	bnx_npoll_tx_notag(struct ifnet *, void *, int);
180 static void	bnx_npoll_status(struct ifnet *);
181 static void	bnx_npoll_status_notag(struct ifnet *);
182 #endif
183 static void	bnx_intr_legacy(void *);
184 static void	bnx_msi(void *);
185 static void	bnx_intr(struct bnx_softc *);
186 static void	bnx_msix_status(void *);
187 static void	bnx_msix_tx_status(void *);
188 static void	bnx_msix_rx(void *);
189 static void	bnx_msix_rxtx(void *);
190 static void	bnx_enable_intr(struct bnx_softc *);
191 static void	bnx_disable_intr(struct bnx_softc *);
192 static void	bnx_txeof(struct bnx_tx_ring *, uint16_t);
193 static void	bnx_rxeof(struct bnx_rx_ret_ring *, uint16_t, int);
194 static int	bnx_alloc_intr(struct bnx_softc *);
195 static int	bnx_setup_intr(struct bnx_softc *);
196 static void	bnx_free_intr(struct bnx_softc *);
197 static void	bnx_teardown_intr(struct bnx_softc *, int);
198 static int	bnx_alloc_msix(struct bnx_softc *);
199 static void	bnx_free_msix(struct bnx_softc *, boolean_t);
200 static void	bnx_check_intr_rxtx(void *);
201 static void	bnx_check_intr_rx(void *);
202 static void	bnx_check_intr_tx(void *);
203 static void	bnx_rx_std_refill_ithread(void *);
204 static void	bnx_rx_std_refill(void *, void *);
205 static void	bnx_rx_std_refill_sched_ipi(void *);
206 static void	bnx_rx_std_refill_stop(void *);
207 static void	bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *,
208 		    struct bnx_rx_std_ring *);
209 
210 static void	bnx_start(struct ifnet *, struct ifaltq_subque *);
211 static int	bnx_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
212 static void	bnx_init(void *);
213 static void	bnx_stop(struct bnx_softc *);
214 static void	bnx_watchdog(struct ifaltq_subque *);
215 static int	bnx_ifmedia_upd(struct ifnet *);
216 static void	bnx_ifmedia_sts(struct ifnet *, struct ifmediareq *);
217 static void	bnx_tick(void *);
218 static void	bnx_serialize(struct ifnet *, enum ifnet_serialize);
219 static void	bnx_deserialize(struct ifnet *, enum ifnet_serialize);
220 static int	bnx_tryserialize(struct ifnet *, enum ifnet_serialize);
221 #ifdef INVARIANTS
222 static void	bnx_serialize_assert(struct ifnet *, enum ifnet_serialize,
223 		    boolean_t);
224 #endif
225 static void	bnx_serialize_skipmain(struct bnx_softc *);
226 static void	bnx_deserialize_skipmain(struct bnx_softc *sc);
227 
228 static int	bnx_alloc_jumbo_mem(struct bnx_softc *);
229 static void	bnx_free_jumbo_mem(struct bnx_softc *);
230 static struct bnx_jslot
231 		*bnx_jalloc(struct bnx_softc *);
232 static void	bnx_jfree(void *);
233 static void	bnx_jref(void *);
234 static int	bnx_newbuf_std(struct bnx_rx_ret_ring *, int, int);
235 static int	bnx_newbuf_jumbo(struct bnx_softc *, int, int);
236 static void	bnx_setup_rxdesc_std(struct bnx_rx_std_ring *, int);
237 static void	bnx_setup_rxdesc_jumbo(struct bnx_softc *, int);
238 static int	bnx_init_rx_ring_std(struct bnx_rx_std_ring *);
239 static void	bnx_free_rx_ring_std(struct bnx_rx_std_ring *);
240 static int	bnx_init_rx_ring_jumbo(struct bnx_softc *);
241 static void	bnx_free_rx_ring_jumbo(struct bnx_softc *);
242 static void	bnx_free_tx_ring(struct bnx_tx_ring *);
243 static int	bnx_init_tx_ring(struct bnx_tx_ring *);
244 static int	bnx_create_tx_ring(struct bnx_tx_ring *);
245 static void	bnx_destroy_tx_ring(struct bnx_tx_ring *);
246 static int	bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *);
247 static void	bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *);
248 static int	bnx_dma_alloc(device_t);
249 static void	bnx_dma_free(struct bnx_softc *);
250 static int	bnx_dma_block_alloc(struct bnx_softc *, bus_size_t,
251 		    bus_dma_tag_t *, bus_dmamap_t *, void **, bus_addr_t *);
252 static void	bnx_dma_block_free(bus_dma_tag_t, bus_dmamap_t, void *);
253 static struct mbuf *
254 		bnx_defrag_shortdma(struct mbuf *);
255 static int	bnx_encap(struct bnx_tx_ring *, struct mbuf **,
256 		    uint32_t *, int *);
257 static int	bnx_setup_tso(struct bnx_tx_ring *, struct mbuf **,
258 		    uint16_t *, uint16_t *);
259 static void	bnx_setup_serialize(struct bnx_softc *);
260 static void	bnx_set_tick_cpuid(struct bnx_softc *, boolean_t);
261 static void	bnx_setup_ring_cnt(struct bnx_softc *);
262 
263 static struct pktinfo *bnx_rss_info(struct pktinfo *,
264 		    const struct bge_rx_bd *);
265 static void	bnx_init_rss(struct bnx_softc *);
266 static void	bnx_reset(struct bnx_softc *);
267 static int	bnx_chipinit(struct bnx_softc *);
268 static int	bnx_blockinit(struct bnx_softc *);
269 static void	bnx_stop_block(struct bnx_softc *, bus_size_t, uint32_t);
270 static void	bnx_enable_msi(struct bnx_softc *, boolean_t);
271 static void	bnx_setmulti(struct bnx_softc *);
272 static void	bnx_setpromisc(struct bnx_softc *);
273 static void	bnx_stats_update_regs(struct bnx_softc *);
274 static uint32_t	bnx_dma_swap_options(struct bnx_softc *);
275 
276 static uint32_t	bnx_readmem_ind(struct bnx_softc *, uint32_t);
277 static void	bnx_writemem_ind(struct bnx_softc *, uint32_t, uint32_t);
278 #ifdef notdef
279 static uint32_t	bnx_readreg_ind(struct bnx_softc *, uint32_t);
280 #endif
281 static void	bnx_writemem_direct(struct bnx_softc *, uint32_t, uint32_t);
282 static void	bnx_writembx(struct bnx_softc *, int, int);
283 static int	bnx_read_nvram(struct bnx_softc *, caddr_t, int, int);
284 static uint8_t	bnx_eeprom_getbyte(struct bnx_softc *, uint32_t, uint8_t *);
285 static int	bnx_read_eeprom(struct bnx_softc *, caddr_t, uint32_t, size_t);
286 
287 static void	bnx_tbi_link_upd(struct bnx_softc *, uint32_t);
288 static void	bnx_copper_link_upd(struct bnx_softc *, uint32_t);
289 static void	bnx_autopoll_link_upd(struct bnx_softc *, uint32_t);
290 static void	bnx_link_poll(struct bnx_softc *);
291 
292 static int	bnx_get_eaddr_mem(struct bnx_softc *, uint8_t[]);
293 static int	bnx_get_eaddr_nvram(struct bnx_softc *, uint8_t[]);
294 static int	bnx_get_eaddr_eeprom(struct bnx_softc *, uint8_t[]);
295 static int	bnx_get_eaddr(struct bnx_softc *, uint8_t[]);
296 
297 static void	bnx_coal_change(struct bnx_softc *);
298 static int	bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS);
299 static int	bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS);
300 static int	bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS);
301 static int	bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS);
302 static int	bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS);
303 static int	bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
304 static int	bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS);
305 static int	bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS);
306 static int	bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS);
307 static int	bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS);
308 static int	bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *,
309 		    int, int, uint32_t);
310 #ifdef IFPOLL_ENABLE
311 static int	bnx_sysctl_npoll_offset(SYSCTL_HANDLER_ARGS);
312 static int	bnx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
313 static int	bnx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
314 #endif
315 static int	bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS);
316 
317 static void	bnx_sig_post_reset(struct bnx_softc *, int);
318 static void	bnx_sig_pre_reset(struct bnx_softc *, int);
319 static void	bnx_ape_lock_init(struct bnx_softc *);
320 static void	bnx_ape_read_fw_ver(struct bnx_softc *);
321 static int	bnx_ape_lock(struct bnx_softc *, int);
322 static void	bnx_ape_unlock(struct bnx_softc *, int);
323 static void	bnx_ape_send_event(struct bnx_softc *, uint32_t);
324 static void	bnx_ape_driver_state_change(struct bnx_softc *, int);
325 
326 static int	bnx_msi_enable = 1;
327 static int	bnx_msix_enable = 1;
328 
329 static int	bnx_rx_rings = 0; /* auto */
330 static int	bnx_tx_rings = 0; /* auto */
331 
332 TUNABLE_INT("hw.bnx.msi.enable", &bnx_msi_enable);
333 TUNABLE_INT("hw.bnx.msix.enable", &bnx_msix_enable);
334 TUNABLE_INT("hw.bnx.rx_rings", &bnx_rx_rings);
335 TUNABLE_INT("hw.bnx.tx_rings", &bnx_tx_rings);
336 
337 static device_method_t bnx_methods[] = {
338 	/* Device interface */
339 	DEVMETHOD(device_probe,		bnx_probe),
340 	DEVMETHOD(device_attach,	bnx_attach),
341 	DEVMETHOD(device_detach,	bnx_detach),
342 	DEVMETHOD(device_shutdown,	bnx_shutdown),
343 	DEVMETHOD(device_suspend,	bnx_suspend),
344 	DEVMETHOD(device_resume,	bnx_resume),
345 
346 	/* bus interface */
347 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
348 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
349 
350 	/* MII interface */
351 	DEVMETHOD(miibus_readreg,	bnx_miibus_readreg),
352 	DEVMETHOD(miibus_writereg,	bnx_miibus_writereg),
353 	DEVMETHOD(miibus_statchg,	bnx_miibus_statchg),
354 
355 	DEVMETHOD_END
356 };
357 
358 static DEFINE_CLASS_0(bnx, bnx_driver, bnx_methods, sizeof(struct bnx_softc));
359 static devclass_t bnx_devclass;
360 
361 DECLARE_DUMMY_MODULE(if_bnx);
362 MODULE_DEPEND(if_bnx, miibus, 1, 1, 1);
363 DRIVER_MODULE(if_bnx, pci, bnx_driver, bnx_devclass, NULL, NULL);
364 DRIVER_MODULE(miibus, bnx, miibus_driver, miibus_devclass, NULL, NULL);
365 
366 static uint32_t
367 bnx_readmem_ind(struct bnx_softc *sc, uint32_t off)
368 {
369 	device_t dev = sc->bnx_dev;
370 	uint32_t val;
371 
372 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
373 	val = pci_read_config(dev, BGE_PCI_MEMWIN_DATA, 4);
374 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
375 	return (val);
376 }
377 
378 static void
379 bnx_writemem_ind(struct bnx_softc *sc, uint32_t off, uint32_t val)
380 {
381 	device_t dev = sc->bnx_dev;
382 
383 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, off, 4);
384 	pci_write_config(dev, BGE_PCI_MEMWIN_DATA, val, 4);
385 	pci_write_config(dev, BGE_PCI_MEMWIN_BASEADDR, 0, 4);
386 }
387 
388 static void
389 bnx_writemem_direct(struct bnx_softc *sc, uint32_t off, uint32_t val)
390 {
391 	CSR_WRITE_4(sc, off, val);
392 }
393 
394 static void
395 bnx_writembx(struct bnx_softc *sc, int off, int val)
396 {
397 	CSR_WRITE_4(sc, off, val);
398 }
399 
400 /*
401  * Read a sequence of bytes from NVRAM.
402  */
403 static int
404 bnx_read_nvram(struct bnx_softc *sc, caddr_t dest, int off, int cnt)
405 {
406 	return (1);
407 }
408 
409 /*
410  * Read a byte of data stored in the EEPROM at address 'addr.' The
411  * BCM570x supports both the traditional bitbang interface and an
412  * auto access interface for reading the EEPROM. We use the auto
413  * access method.
414  */
415 static uint8_t
416 bnx_eeprom_getbyte(struct bnx_softc *sc, uint32_t addr, uint8_t *dest)
417 {
418 	int i;
419 	uint32_t byte = 0;
420 
421 	/*
422 	 * Enable use of auto EEPROM access so we can avoid
423 	 * having to use the bitbang method.
424 	 */
425 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_AUTO_EEPROM);
426 
427 	/* Reset the EEPROM, load the clock period. */
428 	CSR_WRITE_4(sc, BGE_EE_ADDR,
429 	    BGE_EEADDR_RESET|BGE_EEHALFCLK(BGE_HALFCLK_384SCL));
430 	DELAY(20);
431 
432 	/* Issue the read EEPROM command. */
433 	CSR_WRITE_4(sc, BGE_EE_ADDR, BGE_EE_READCMD | addr);
434 
435 	/* Wait for completion */
436 	for(i = 0; i < BNX_TIMEOUT * 10; i++) {
437 		DELAY(10);
438 		if (CSR_READ_4(sc, BGE_EE_ADDR) & BGE_EEADDR_DONE)
439 			break;
440 	}
441 
442 	if (i == BNX_TIMEOUT) {
443 		if_printf(&sc->arpcom.ac_if, "eeprom read timed out\n");
444 		return(1);
445 	}
446 
447 	/* Get result. */
448 	byte = CSR_READ_4(sc, BGE_EE_DATA);
449 
450         *dest = (byte >> ((addr % 4) * 8)) & 0xFF;
451 
452 	return(0);
453 }
454 
455 /*
456  * Read a sequence of bytes from the EEPROM.
457  */
458 static int
459 bnx_read_eeprom(struct bnx_softc *sc, caddr_t dest, uint32_t off, size_t len)
460 {
461 	size_t i;
462 	int err;
463 	uint8_t byte;
464 
465 	for (byte = 0, err = 0, i = 0; i < len; i++) {
466 		err = bnx_eeprom_getbyte(sc, off + i, &byte);
467 		if (err)
468 			break;
469 		*(dest + i) = byte;
470 	}
471 
472 	return(err ? 1 : 0);
473 }
474 
475 static int
476 bnx_miibus_readreg(device_t dev, int phy, int reg)
477 {
478 	struct bnx_softc *sc = device_get_softc(dev);
479 	uint32_t val;
480 	int i;
481 
482 	KASSERT(phy == sc->bnx_phyno,
483 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
484 
485 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
486 		return 0;
487 
488 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
489 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
490 		CSR_WRITE_4(sc, BGE_MI_MODE,
491 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
492 		DELAY(80);
493 	}
494 
495 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_READ | BGE_MICOMM_BUSY |
496 	    BGE_MIPHY(phy) | BGE_MIREG(reg));
497 
498 	/* Poll for the PHY register access to complete. */
499 	for (i = 0; i < BNX_TIMEOUT; i++) {
500 		DELAY(10);
501 		val = CSR_READ_4(sc, BGE_MI_COMM);
502 		if ((val & BGE_MICOMM_BUSY) == 0) {
503 			DELAY(5);
504 			val = CSR_READ_4(sc, BGE_MI_COMM);
505 			break;
506 		}
507 	}
508 	if (i == BNX_TIMEOUT) {
509 		if_printf(&sc->arpcom.ac_if, "PHY read timed out "
510 		    "(phy %d, reg %d, val 0x%08x)\n", phy, reg, val);
511 		val = 0;
512 	}
513 
514 	/* Restore the autopoll bit if necessary. */
515 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
516 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
517 		DELAY(80);
518 	}
519 
520 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
521 
522 	if (val & BGE_MICOMM_READFAIL)
523 		return 0;
524 
525 	return (val & 0xFFFF);
526 }
527 
528 static int
529 bnx_miibus_writereg(device_t dev, int phy, int reg, int val)
530 {
531 	struct bnx_softc *sc = device_get_softc(dev);
532 	int i;
533 
534 	KASSERT(phy == sc->bnx_phyno,
535 	    ("invalid phyno %d, should be %d", phy, sc->bnx_phyno));
536 
537 	if (bnx_ape_lock(sc, sc->bnx_phy_ape_lock) != 0)
538 		return 0;
539 
540 	/* Clear the autopoll bit if set, otherwise may trigger PCI errors. */
541 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
542 		CSR_WRITE_4(sc, BGE_MI_MODE,
543 		    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
544 		DELAY(80);
545 	}
546 
547 	CSR_WRITE_4(sc, BGE_MI_COMM, BGE_MICMD_WRITE | BGE_MICOMM_BUSY |
548 	    BGE_MIPHY(phy) | BGE_MIREG(reg) | val);
549 
550 	for (i = 0; i < BNX_TIMEOUT; i++) {
551 		DELAY(10);
552 		if (!(CSR_READ_4(sc, BGE_MI_COMM) & BGE_MICOMM_BUSY)) {
553 			DELAY(5);
554 			CSR_READ_4(sc, BGE_MI_COMM); /* dummy read */
555 			break;
556 		}
557 	}
558 	if (i == BNX_TIMEOUT) {
559 		if_printf(&sc->arpcom.ac_if, "PHY write timed out "
560 		    "(phy %d, reg %d, val %d)\n", phy, reg, val);
561 	}
562 
563 	/* Restore the autopoll bit if necessary. */
564 	if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
565 		CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
566 		DELAY(80);
567 	}
568 
569 	bnx_ape_unlock(sc, sc->bnx_phy_ape_lock);
570 
571 	return 0;
572 }
573 
574 static void
575 bnx_miibus_statchg(device_t dev)
576 {
577 	struct bnx_softc *sc;
578 	struct mii_data *mii;
579 	uint32_t mac_mode;
580 
581 	sc = device_get_softc(dev);
582 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0)
583 		return;
584 
585 	mii = device_get_softc(sc->bnx_miibus);
586 
587 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
588 	    (IFM_ACTIVE | IFM_AVALID)) {
589 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
590 		case IFM_10_T:
591 		case IFM_100_TX:
592 			sc->bnx_link = 1;
593 			break;
594 		case IFM_1000_T:
595 		case IFM_1000_SX:
596 		case IFM_2500_SX:
597 			sc->bnx_link = 1;
598 			break;
599 		default:
600 			sc->bnx_link = 0;
601 			break;
602 		}
603 	} else {
604 		sc->bnx_link = 0;
605 	}
606 	if (sc->bnx_link == 0)
607 		return;
608 
609 	/*
610 	 * APE firmware touches these registers to keep the MAC
611 	 * connected to the outside world.  Try to keep the
612 	 * accesses atomic.
613 	 */
614 
615 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) &
616 	    ~(BGE_MACMODE_PORTMODE | BGE_MACMODE_HALF_DUPLEX);
617 
618 	if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T ||
619 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_SX)
620 		mac_mode |= BGE_PORTMODE_GMII;
621 	else
622 		mac_mode |= BGE_PORTMODE_MII;
623 
624 	if ((mii->mii_media_active & IFM_GMASK) != IFM_FDX)
625 		mac_mode |= BGE_MACMODE_HALF_DUPLEX;
626 
627 	CSR_WRITE_4(sc, BGE_MAC_MODE, mac_mode);
628 	DELAY(40);
629 }
630 
631 /*
632  * Memory management for jumbo frames.
633  */
634 static int
635 bnx_alloc_jumbo_mem(struct bnx_softc *sc)
636 {
637 	struct ifnet *ifp = &sc->arpcom.ac_if;
638 	struct bnx_jslot *entry;
639 	uint8_t *ptr;
640 	bus_addr_t paddr;
641 	int i, error;
642 
643 	/*
644 	 * Create tag for jumbo mbufs.
645 	 * This is really a bit of a kludge. We allocate a special
646 	 * jumbo buffer pool which (thanks to the way our DMA
647 	 * memory allocation works) will consist of contiguous
648 	 * pages. This means that even though a jumbo buffer might
649 	 * be larger than a page size, we don't really need to
650 	 * map it into more than one DMA segment. However, the
651 	 * default mbuf tag will result in multi-segment mappings,
652 	 * so we have to create a special jumbo mbuf tag that
653 	 * lets us get away with mapping the jumbo buffers as
654 	 * a single segment. I think eventually the driver should
655 	 * be changed so that it uses ordinary mbufs and cluster
656 	 * buffers, i.e. jumbo frames can span multiple DMA
657 	 * descriptors. But that's a project for another day.
658 	 */
659 
660 	/*
661 	 * Create DMA stuffs for jumbo RX ring.
662 	 */
663 	error = bnx_dma_block_alloc(sc, BGE_JUMBO_RX_RING_SZ,
664 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
665 				    &sc->bnx_cdata.bnx_rx_jumbo_ring_map,
666 				    (void *)&sc->bnx_ldata.bnx_rx_jumbo_ring,
667 				    &sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
668 	if (error) {
669 		if_printf(ifp, "could not create jumbo RX ring\n");
670 		return error;
671 	}
672 
673 	/*
674 	 * Create DMA stuffs for jumbo buffer block.
675 	 */
676 	error = bnx_dma_block_alloc(sc, BNX_JMEM,
677 				    &sc->bnx_cdata.bnx_jumbo_tag,
678 				    &sc->bnx_cdata.bnx_jumbo_map,
679 				    (void **)&sc->bnx_ldata.bnx_jumbo_buf,
680 				    &paddr);
681 	if (error) {
682 		if_printf(ifp, "could not create jumbo buffer\n");
683 		return error;
684 	}
685 
686 	SLIST_INIT(&sc->bnx_jfree_listhead);
687 
688 	/*
689 	 * Now divide it up into 9K pieces and save the addresses
690 	 * in an array. Note that we play an evil trick here by using
691 	 * the first few bytes in the buffer to hold the the address
692 	 * of the softc structure for this interface. This is because
693 	 * bnx_jfree() needs it, but it is called by the mbuf management
694 	 * code which will not pass it to us explicitly.
695 	 */
696 	for (i = 0, ptr = sc->bnx_ldata.bnx_jumbo_buf; i < BNX_JSLOTS; i++) {
697 		entry = &sc->bnx_cdata.bnx_jslots[i];
698 		entry->bnx_sc = sc;
699 		entry->bnx_buf = ptr;
700 		entry->bnx_paddr = paddr;
701 		entry->bnx_inuse = 0;
702 		entry->bnx_slot = i;
703 		SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead, entry, jslot_link);
704 
705 		ptr += BNX_JLEN;
706 		paddr += BNX_JLEN;
707 	}
708 	return 0;
709 }
710 
711 static void
712 bnx_free_jumbo_mem(struct bnx_softc *sc)
713 {
714 	/* Destroy jumbo RX ring. */
715 	bnx_dma_block_free(sc->bnx_cdata.bnx_rx_jumbo_ring_tag,
716 			   sc->bnx_cdata.bnx_rx_jumbo_ring_map,
717 			   sc->bnx_ldata.bnx_rx_jumbo_ring);
718 
719 	/* Destroy jumbo buffer block. */
720 	bnx_dma_block_free(sc->bnx_cdata.bnx_jumbo_tag,
721 			   sc->bnx_cdata.bnx_jumbo_map,
722 			   sc->bnx_ldata.bnx_jumbo_buf);
723 }
724 
725 /*
726  * Allocate a jumbo buffer.
727  */
728 static struct bnx_jslot *
729 bnx_jalloc(struct bnx_softc *sc)
730 {
731 	struct bnx_jslot *entry;
732 
733 	lwkt_serialize_enter(&sc->bnx_jslot_serializer);
734 	entry = SLIST_FIRST(&sc->bnx_jfree_listhead);
735 	if (entry) {
736 		SLIST_REMOVE_HEAD(&sc->bnx_jfree_listhead, jslot_link);
737 		entry->bnx_inuse = 1;
738 	} else {
739 		if_printf(&sc->arpcom.ac_if, "no free jumbo buffers\n");
740 	}
741 	lwkt_serialize_exit(&sc->bnx_jslot_serializer);
742 	return(entry);
743 }
744 
745 /*
746  * Adjust usage count on a jumbo buffer.
747  */
748 static void
749 bnx_jref(void *arg)
750 {
751 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
752 	struct bnx_softc *sc = entry->bnx_sc;
753 
754 	if (sc == NULL)
755 		panic("bnx_jref: can't find softc pointer!");
756 
757 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
758 		panic("bnx_jref: asked to reference buffer "
759 		    "that we don't manage!");
760 	} else if (entry->bnx_inuse == 0) {
761 		panic("bnx_jref: buffer already free!");
762 	} else {
763 		atomic_add_int(&entry->bnx_inuse, 1);
764 	}
765 }
766 
767 /*
768  * Release a jumbo buffer.
769  */
770 static void
771 bnx_jfree(void *arg)
772 {
773 	struct bnx_jslot *entry = (struct bnx_jslot *)arg;
774 	struct bnx_softc *sc = entry->bnx_sc;
775 
776 	if (sc == NULL)
777 		panic("bnx_jfree: can't find softc pointer!");
778 
779 	if (&sc->bnx_cdata.bnx_jslots[entry->bnx_slot] != entry) {
780 		panic("bnx_jfree: asked to free buffer that we don't manage!");
781 	} else if (entry->bnx_inuse == 0) {
782 		panic("bnx_jfree: buffer already free!");
783 	} else {
784 		/*
785 		 * Possible MP race to 0, use the serializer.  The atomic insn
786 		 * is still needed for races against bnx_jref().
787 		 */
788 		lwkt_serialize_enter(&sc->bnx_jslot_serializer);
789 		atomic_subtract_int(&entry->bnx_inuse, 1);
790 		if (entry->bnx_inuse == 0) {
791 			SLIST_INSERT_HEAD(&sc->bnx_jfree_listhead,
792 					  entry, jslot_link);
793 		}
794 		lwkt_serialize_exit(&sc->bnx_jslot_serializer);
795 	}
796 }
797 
798 
799 /*
800  * Intialize a standard receive ring descriptor.
801  */
802 static int
803 bnx_newbuf_std(struct bnx_rx_ret_ring *ret, int i, int init)
804 {
805 	struct mbuf *m_new = NULL;
806 	bus_dma_segment_t seg;
807 	bus_dmamap_t map;
808 	int error, nsegs;
809 	struct bnx_rx_buf *rb;
810 
811 	rb = &ret->bnx_std->bnx_rx_std_buf[i];
812 	KASSERT(!rb->bnx_rx_refilled, ("RX buf %dth has been refilled", i));
813 
814 	m_new = m_getcl(init ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR);
815 	if (m_new == NULL) {
816 		error = ENOBUFS;
817 		goto back;
818 	}
819 	m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
820 	m_adj(m_new, ETHER_ALIGN);
821 
822 	error = bus_dmamap_load_mbuf_segment(ret->bnx_rx_mtag,
823 	    ret->bnx_rx_tmpmap, m_new, &seg, 1, &nsegs, BUS_DMA_NOWAIT);
824 	if (error) {
825 		m_freem(m_new);
826 		goto back;
827 	}
828 
829 	if (!init) {
830 		bus_dmamap_sync(ret->bnx_rx_mtag, rb->bnx_rx_dmamap,
831 		    BUS_DMASYNC_POSTREAD);
832 		bus_dmamap_unload(ret->bnx_rx_mtag, rb->bnx_rx_dmamap);
833 	}
834 
835 	map = ret->bnx_rx_tmpmap;
836 	ret->bnx_rx_tmpmap = rb->bnx_rx_dmamap;
837 
838 	rb->bnx_rx_dmamap = map;
839 	rb->bnx_rx_mbuf = m_new;
840 	rb->bnx_rx_paddr = seg.ds_addr;
841 	rb->bnx_rx_len = m_new->m_len;
842 back:
843 	cpu_sfence();
844 	rb->bnx_rx_refilled = 1;
845 	return error;
846 }
847 
848 static void
849 bnx_setup_rxdesc_std(struct bnx_rx_std_ring *std, int i)
850 {
851 	struct bnx_rx_buf *rb;
852 	struct bge_rx_bd *r;
853 	bus_addr_t paddr;
854 	int len;
855 
856 	rb = &std->bnx_rx_std_buf[i];
857 	KASSERT(rb->bnx_rx_refilled, ("RX buf %dth is not refilled", i));
858 
859 	paddr = rb->bnx_rx_paddr;
860 	len = rb->bnx_rx_len;
861 
862 	cpu_mfence();
863 
864 	rb->bnx_rx_refilled = 0;
865 
866 	r = &std->bnx_rx_std_ring[i];
867 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr);
868 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr);
869 	r->bge_len = len;
870 	r->bge_idx = i;
871 	r->bge_flags = BGE_RXBDFLAG_END;
872 }
873 
874 /*
875  * Initialize a jumbo receive ring descriptor. This allocates
876  * a jumbo buffer from the pool managed internally by the driver.
877  */
878 static int
879 bnx_newbuf_jumbo(struct bnx_softc *sc, int i, int init)
880 {
881 	struct mbuf *m_new = NULL;
882 	struct bnx_jslot *buf;
883 	bus_addr_t paddr;
884 
885 	/* Allocate the mbuf. */
886 	MGETHDR(m_new, init ? M_WAITOK : M_NOWAIT, MT_DATA);
887 	if (m_new == NULL)
888 		return ENOBUFS;
889 
890 	/* Allocate the jumbo buffer */
891 	buf = bnx_jalloc(sc);
892 	if (buf == NULL) {
893 		m_freem(m_new);
894 		return ENOBUFS;
895 	}
896 
897 	/* Attach the buffer to the mbuf. */
898 	m_new->m_ext.ext_arg = buf;
899 	m_new->m_ext.ext_buf = buf->bnx_buf;
900 	m_new->m_ext.ext_free = bnx_jfree;
901 	m_new->m_ext.ext_ref = bnx_jref;
902 	m_new->m_ext.ext_size = BNX_JUMBO_FRAMELEN;
903 
904 	m_new->m_flags |= M_EXT;
905 
906 	m_new->m_data = m_new->m_ext.ext_buf;
907 	m_new->m_len = m_new->m_pkthdr.len = m_new->m_ext.ext_size;
908 
909 	paddr = buf->bnx_paddr;
910 	m_adj(m_new, ETHER_ALIGN);
911 	paddr += ETHER_ALIGN;
912 
913 	/* Save necessary information */
914 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_mbuf = m_new;
915 	sc->bnx_cdata.bnx_rx_jumbo_chain[i].bnx_rx_paddr = paddr;
916 
917 	/* Set up the descriptor. */
918 	bnx_setup_rxdesc_jumbo(sc, i);
919 	return 0;
920 }
921 
922 static void
923 bnx_setup_rxdesc_jumbo(struct bnx_softc *sc, int i)
924 {
925 	struct bge_rx_bd *r;
926 	struct bnx_rx_buf *rc;
927 
928 	r = &sc->bnx_ldata.bnx_rx_jumbo_ring[i];
929 	rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
930 
931 	r->bge_addr.bge_addr_lo = BGE_ADDR_LO(rc->bnx_rx_paddr);
932 	r->bge_addr.bge_addr_hi = BGE_ADDR_HI(rc->bnx_rx_paddr);
933 	r->bge_len = rc->bnx_rx_mbuf->m_len;
934 	r->bge_idx = i;
935 	r->bge_flags = BGE_RXBDFLAG_END|BGE_RXBDFLAG_JUMBO_RING;
936 }
937 
938 static int
939 bnx_init_rx_ring_std(struct bnx_rx_std_ring *std)
940 {
941 	int i, error;
942 
943 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
944 		/* Use the first RX return ring's tmp RX mbuf DMA map */
945 		error = bnx_newbuf_std(&std->bnx_sc->bnx_rx_ret_ring[0], i, 1);
946 		if (error)
947 			return error;
948 		bnx_setup_rxdesc_std(std, i);
949 	}
950 
951 	std->bnx_rx_std_used = 0;
952 	std->bnx_rx_std_refill = 0;
953 	std->bnx_rx_std_running = 0;
954 	cpu_sfence();
955 	lwkt_serialize_handler_enable(&std->bnx_rx_std_serialize);
956 
957 	std->bnx_rx_std = BGE_STD_RX_RING_CNT - 1;
958 	bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO, std->bnx_rx_std);
959 
960 	return(0);
961 }
962 
963 static void
964 bnx_free_rx_ring_std(struct bnx_rx_std_ring *std)
965 {
966 	int i;
967 
968 	lwkt_serialize_handler_disable(&std->bnx_rx_std_serialize);
969 
970 	for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
971 		struct bnx_rx_buf *rb = &std->bnx_rx_std_buf[i];
972 
973 		rb->bnx_rx_refilled = 0;
974 		if (rb->bnx_rx_mbuf != NULL) {
975 			bus_dmamap_unload(std->bnx_rx_mtag, rb->bnx_rx_dmamap);
976 			m_freem(rb->bnx_rx_mbuf);
977 			rb->bnx_rx_mbuf = NULL;
978 		}
979 		bzero(&std->bnx_rx_std_ring[i], sizeof(struct bge_rx_bd));
980 	}
981 }
982 
983 static int
984 bnx_init_rx_ring_jumbo(struct bnx_softc *sc)
985 {
986 	struct bge_rcb *rcb;
987 	int i, error;
988 
989 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
990 		error = bnx_newbuf_jumbo(sc, i, 1);
991 		if (error)
992 			return error;
993 	}
994 
995 	sc->bnx_jumbo = BGE_JUMBO_RX_RING_CNT - 1;
996 
997 	rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
998 	rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(0, 0);
999 	CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1000 
1001 	bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, sc->bnx_jumbo);
1002 
1003 	return(0);
1004 }
1005 
1006 static void
1007 bnx_free_rx_ring_jumbo(struct bnx_softc *sc)
1008 {
1009 	int i;
1010 
1011 	for (i = 0; i < BGE_JUMBO_RX_RING_CNT; i++) {
1012 		struct bnx_rx_buf *rc = &sc->bnx_cdata.bnx_rx_jumbo_chain[i];
1013 
1014 		if (rc->bnx_rx_mbuf != NULL) {
1015 			m_freem(rc->bnx_rx_mbuf);
1016 			rc->bnx_rx_mbuf = NULL;
1017 		}
1018 		bzero(&sc->bnx_ldata.bnx_rx_jumbo_ring[i],
1019 		    sizeof(struct bge_rx_bd));
1020 	}
1021 }
1022 
1023 static void
1024 bnx_free_tx_ring(struct bnx_tx_ring *txr)
1025 {
1026 	int i;
1027 
1028 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
1029 		struct bnx_tx_buf *buf = &txr->bnx_tx_buf[i];
1030 
1031 		if (buf->bnx_tx_mbuf != NULL) {
1032 			bus_dmamap_unload(txr->bnx_tx_mtag,
1033 			    buf->bnx_tx_dmamap);
1034 			m_freem(buf->bnx_tx_mbuf);
1035 			buf->bnx_tx_mbuf = NULL;
1036 		}
1037 		bzero(&txr->bnx_tx_ring[i], sizeof(struct bge_tx_bd));
1038 	}
1039 	txr->bnx_tx_saved_considx = BNX_TXCONS_UNSET;
1040 }
1041 
1042 static int
1043 bnx_init_tx_ring(struct bnx_tx_ring *txr)
1044 {
1045 	txr->bnx_tx_cnt = 0;
1046 	txr->bnx_tx_saved_considx = 0;
1047 	txr->bnx_tx_prodidx = 0;
1048 
1049 	/* Initialize transmit producer index for host-memory send ring. */
1050 	bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, txr->bnx_tx_prodidx);
1051 
1052 	return(0);
1053 }
1054 
1055 static void
1056 bnx_setmulti(struct bnx_softc *sc)
1057 {
1058 	struct ifnet *ifp;
1059 	struct ifmultiaddr *ifma;
1060 	uint32_t hashes[4] = { 0, 0, 0, 0 };
1061 	int h, i;
1062 
1063 	ifp = &sc->arpcom.ac_if;
1064 
1065 	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
1066 		for (i = 0; i < 4; i++)
1067 			CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0xFFFFFFFF);
1068 		return;
1069 	}
1070 
1071 	/* First, zot all the existing filters. */
1072 	for (i = 0; i < 4; i++)
1073 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), 0);
1074 
1075 	/* Now program new ones. */
1076 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1077 		if (ifma->ifma_addr->sa_family != AF_LINK)
1078 			continue;
1079 		h = ether_crc32_le(
1080 		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1081 		    ETHER_ADDR_LEN) & 0x7f;
1082 		hashes[(h & 0x60) >> 5] |= 1 << (h & 0x1F);
1083 	}
1084 
1085 	for (i = 0; i < 4; i++)
1086 		CSR_WRITE_4(sc, BGE_MAR0 + (i * 4), hashes[i]);
1087 }
1088 
1089 /*
1090  * Do endian, PCI and DMA initialization. Also check the on-board ROM
1091  * self-test results.
1092  */
1093 static int
1094 bnx_chipinit(struct bnx_softc *sc)
1095 {
1096 	uint32_t dma_rw_ctl, mode_ctl;
1097 	int i;
1098 
1099 	/* Set endian type before we access any non-PCI registers. */
1100 	pci_write_config(sc->bnx_dev, BGE_PCI_MISC_CTL,
1101 	    BGE_INIT | BGE_PCIMISCCTL_TAGGED_STATUS, 4);
1102 
1103 	/*
1104 	 * Clear the MAC statistics block in the NIC's
1105 	 * internal memory.
1106 	 */
1107 	for (i = BGE_STATS_BLOCK;
1108 	    i < BGE_STATS_BLOCK_END + 1; i += sizeof(uint32_t))
1109 		BNX_MEMWIN_WRITE(sc, i, 0);
1110 
1111 	for (i = BGE_STATUS_BLOCK;
1112 	    i < BGE_STATUS_BLOCK_END + 1; i += sizeof(uint32_t))
1113 		BNX_MEMWIN_WRITE(sc, i, 0);
1114 
1115 	if (BNX_IS_57765_FAMILY(sc)) {
1116 		uint32_t val;
1117 
1118 		if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0) {
1119 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1120 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1121 
1122 			/* Access the lower 1K of PL PCI-E block registers. */
1123 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1124 			    val | BGE_MODECTL_PCIE_PL_SEL);
1125 
1126 			val = CSR_READ_4(sc, BGE_PCIE_PL_LO_PHYCTL5);
1127 			val |= BGE_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ;
1128 			CSR_WRITE_4(sc, BGE_PCIE_PL_LO_PHYCTL5, val);
1129 
1130 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1131 		}
1132 		if (sc->bnx_chiprev != BGE_CHIPREV_57765_AX) {
1133 			/* Fix transmit hangs */
1134 			val = CSR_READ_4(sc, BGE_CPMU_PADRNG_CTL);
1135 			val |= BGE_CPMU_PADRNG_CTL_RDIV2;
1136 			CSR_WRITE_4(sc, BGE_CPMU_PADRNG_CTL, val);
1137 
1138 			mode_ctl = CSR_READ_4(sc, BGE_MODE_CTL);
1139 			val = mode_ctl & ~BGE_MODECTL_PCIE_PORTS;
1140 
1141 			/* Access the lower 1K of DL PCI-E block registers. */
1142 			CSR_WRITE_4(sc, BGE_MODE_CTL,
1143 			    val | BGE_MODECTL_PCIE_DL_SEL);
1144 
1145 			val = CSR_READ_4(sc, BGE_PCIE_DL_LO_FTSMAX);
1146 			val &= ~BGE_PCIE_DL_LO_FTSMAX_MASK;
1147 			val |= BGE_PCIE_DL_LO_FTSMAX_VAL;
1148 			CSR_WRITE_4(sc, BGE_PCIE_DL_LO_FTSMAX, val);
1149 
1150 			CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1151 		}
1152 
1153 		val = CSR_READ_4(sc, BGE_CPMU_LSPD_10MB_CLK);
1154 		val &= ~BGE_CPMU_LSPD_10MB_MACCLK_MASK;
1155 		val |= BGE_CPMU_LSPD_10MB_MACCLK_6_25;
1156 		CSR_WRITE_4(sc, BGE_CPMU_LSPD_10MB_CLK, val);
1157 	}
1158 
1159 	/*
1160 	 * Set up the PCI DMA control register.
1161 	 */
1162 	dma_rw_ctl = pci_read_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, 4);
1163 	/*
1164 	 * Disable 32bytes cache alignment for DMA write to host memory
1165 	 *
1166 	 * NOTE:
1167 	 * 64bytes cache alignment for DMA write to host memory is still
1168 	 * enabled.
1169 	 */
1170 	dma_rw_ctl |= BGE_PCIDMARWCTL_DIS_CACHE_ALIGNMENT;
1171 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
1172 		dma_rw_ctl &= ~BGE_PCIDMARWCTL_CRDRDR_RDMA_MRRS_MSK;
1173 	/*
1174 	 * Enable HW workaround for controllers that misinterpret
1175 	 * a status tag update and leave interrupts permanently
1176 	 * disabled.
1177 	 */
1178 	if (sc->bnx_asicrev != BGE_ASICREV_BCM5717 &&
1179 	    sc->bnx_asicrev != BGE_ASICREV_BCM5762 &&
1180 	    !BNX_IS_57765_FAMILY(sc))
1181 		dma_rw_ctl |= BGE_PCIDMARWCTL_TAGGED_STATUS_WA;
1182 	if (bootverbose) {
1183 		if_printf(&sc->arpcom.ac_if, "DMA read/write %#x\n",
1184 		    dma_rw_ctl);
1185 	}
1186 	pci_write_config(sc->bnx_dev, BGE_PCI_DMA_RW_CTL, dma_rw_ctl, 4);
1187 
1188 	/*
1189 	 * Set up general mode register.
1190 	 */
1191 	mode_ctl = bnx_dma_swap_options(sc);
1192 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1193 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1194 		/* Retain Host-2-BMC settings written by APE firmware. */
1195 		mode_ctl |= CSR_READ_4(sc, BGE_MODE_CTL) &
1196 		    (BGE_MODECTL_BYTESWAP_B2HRX_DATA |
1197 		    BGE_MODECTL_WORDSWAP_B2HRX_DATA |
1198 		    BGE_MODECTL_B2HRX_ENABLE | BGE_MODECTL_HTX2B_ENABLE);
1199 	}
1200 	mode_ctl |= BGE_MODECTL_MAC_ATTN_INTR |
1201 	    BGE_MODECTL_HOST_SEND_BDS | BGE_MODECTL_TX_NO_PHDR_CSUM;
1202 	CSR_WRITE_4(sc, BGE_MODE_CTL, mode_ctl);
1203 
1204 	/*
1205 	 * Disable memory write invalidate.  Apparently it is not supported
1206 	 * properly by these devices.  Also ensure that INTx isn't disabled,
1207 	 * as these chips need it even when using MSI.
1208 	 */
1209 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_CMD,
1210 	    (PCIM_CMD_MWRICEN | PCIM_CMD_INTxDIS), 4);
1211 
1212 	/* Set the timer prescaler (always 66Mhz) */
1213 	CSR_WRITE_4(sc, BGE_MISC_CFG, 65 << 1/*BGE_32BITTIME_66MHZ*/);
1214 
1215 	return(0);
1216 }
1217 
1218 static int
1219 bnx_blockinit(struct bnx_softc *sc)
1220 {
1221 	struct bnx_intr_data *intr;
1222 	struct bge_rcb *rcb;
1223 	bus_size_t vrcb;
1224 	bge_hostaddr taddr;
1225 	uint32_t val;
1226 	int i, limit;
1227 
1228 	/*
1229 	 * Initialize the memory window pointer register so that
1230 	 * we can access the first 32K of internal NIC RAM. This will
1231 	 * allow us to set up the TX send ring RCBs and the RX return
1232 	 * ring RCBs, plus other things which live in NIC memory.
1233 	 */
1234 	CSR_WRITE_4(sc, BGE_PCI_MEMWIN_BASEADDR, 0);
1235 
1236 	/* Configure mbuf pool watermarks */
1237 	if (BNX_IS_57765_PLUS(sc)) {
1238 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1239 		if (sc->arpcom.ac_if.if_mtu > ETHERMTU) {
1240 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x7e);
1241 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xea);
1242 		} else {
1243 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x2a);
1244 			CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0xa0);
1245 		}
1246 	} else {
1247 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_READDMA_LOWAT, 0x0);
1248 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_MACRX_LOWAT, 0x10);
1249 		CSR_WRITE_4(sc, BGE_BMAN_MBUFPOOL_HIWAT, 0x60);
1250 	}
1251 
1252 	/* Configure DMA resource watermarks */
1253 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_LOWAT, 5);
1254 	CSR_WRITE_4(sc, BGE_BMAN_DMA_DESCPOOL_HIWAT, 10);
1255 
1256 	/* Enable buffer manager */
1257 	val = BGE_BMANMODE_ENABLE | BGE_BMANMODE_LOMBUF_ATTN;
1258 	/*
1259 	 * Change the arbitration algorithm of TXMBUF read request to
1260 	 * round-robin instead of priority based for BCM5719.  When
1261 	 * TXFIFO is almost empty, RDMA will hold its request until
1262 	 * TXFIFO is not almost empty.
1263 	 */
1264 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719)
1265 		val |= BGE_BMANMODE_NO_TX_UNDERRUN;
1266 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1267 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0 ||
1268 	    sc->bnx_chipid == BGE_CHIPID_BCM5720_A0)
1269 		val |= BGE_BMANMODE_LOMBUF_ATTN;
1270 	CSR_WRITE_4(sc, BGE_BMAN_MODE, val);
1271 
1272 	/* Poll for buffer manager start indication */
1273 	for (i = 0; i < BNX_TIMEOUT; i++) {
1274 		if (CSR_READ_4(sc, BGE_BMAN_MODE) & BGE_BMANMODE_ENABLE)
1275 			break;
1276 		DELAY(10);
1277 	}
1278 
1279 	if (i == BNX_TIMEOUT) {
1280 		if_printf(&sc->arpcom.ac_if,
1281 			  "buffer manager failed to start\n");
1282 		return(ENXIO);
1283 	}
1284 
1285 	/* Enable flow-through queues */
1286 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
1287 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
1288 
1289 	/* Wait until queue initialization is complete */
1290 	for (i = 0; i < BNX_TIMEOUT; i++) {
1291 		if (CSR_READ_4(sc, BGE_FTQ_RESET) == 0)
1292 			break;
1293 		DELAY(10);
1294 	}
1295 
1296 	if (i == BNX_TIMEOUT) {
1297 		if_printf(&sc->arpcom.ac_if,
1298 			  "flow-through queue init failed\n");
1299 		return(ENXIO);
1300 	}
1301 
1302 	/*
1303 	 * Summary of rings supported by the controller:
1304 	 *
1305 	 * Standard Receive Producer Ring
1306 	 * - This ring is used to feed receive buffers for "standard"
1307 	 *   sized frames (typically 1536 bytes) to the controller.
1308 	 *
1309 	 * Jumbo Receive Producer Ring
1310 	 * - This ring is used to feed receive buffers for jumbo sized
1311 	 *   frames (i.e. anything bigger than the "standard" frames)
1312 	 *   to the controller.
1313 	 *
1314 	 * Mini Receive Producer Ring
1315 	 * - This ring is used to feed receive buffers for "mini"
1316 	 *   sized frames to the controller.
1317 	 * - This feature required external memory for the controller
1318 	 *   but was never used in a production system.  Should always
1319 	 *   be disabled.
1320 	 *
1321 	 * Receive Return Ring
1322 	 * - After the controller has placed an incoming frame into a
1323 	 *   receive buffer that buffer is moved into a receive return
1324 	 *   ring.  The driver is then responsible to passing the
1325 	 *   buffer up to the stack.  BCM5718/BCM57785 families support
1326 	 *   multiple receive return rings.
1327 	 *
1328 	 * Send Ring
1329 	 * - This ring is used for outgoing frames.  BCM5719/BCM5720
1330 	 *   support multiple send rings.
1331 	 */
1332 
1333 	/* Initialize the standard receive producer ring control block. */
1334 	rcb = &sc->bnx_ldata.bnx_info.bnx_std_rx_rcb;
1335 	rcb->bge_hostaddr.bge_addr_lo =
1336 	    BGE_ADDR_LO(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1337 	rcb->bge_hostaddr.bge_addr_hi =
1338 	    BGE_ADDR_HI(sc->bnx_rx_std_ring.bnx_rx_std_ring_paddr);
1339 	if (BNX_IS_57765_PLUS(sc)) {
1340 		/*
1341 		 * Bits 31-16: Programmable ring size (2048, 1024, 512, .., 32)
1342 		 * Bits 15-2 : Maximum RX frame size
1343 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring ENabled
1344 		 * Bit 0     : Reserved
1345 		 */
1346 		rcb->bge_maxlen_flags =
1347 		    BGE_RCB_MAXLEN_FLAGS(512, BNX_MAX_FRAMELEN << 2);
1348 	} else {
1349 		/*
1350 		 * Bits 31-16: Programmable ring size (512, 256, 128, 64, 32)
1351 		 * Bits 15-2 : Reserved (should be 0)
1352 		 * Bit 1     : 1 = Ring Disabled, 0 = Ring Enabled
1353 		 * Bit 0     : Reserved
1354 		 */
1355 		rcb->bge_maxlen_flags = BGE_RCB_MAXLEN_FLAGS(512, 0);
1356 	}
1357 	if (BNX_IS_5717_PLUS(sc))
1358 		rcb->bge_nicaddr = BGE_STD_RX_RINGS_5717;
1359 	else
1360 		rcb->bge_nicaddr = BGE_STD_RX_RINGS;
1361 	/* Write the standard receive producer ring control block. */
1362 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_HI, rcb->bge_hostaddr.bge_addr_hi);
1363 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_HADDR_LO, rcb->bge_hostaddr.bge_addr_lo);
1364 	CSR_WRITE_4(sc, BGE_RX_STD_RCB_MAXLEN_FLAGS, rcb->bge_maxlen_flags);
1365 	if (!BNX_IS_5717_PLUS(sc))
1366 		CSR_WRITE_4(sc, BGE_RX_STD_RCB_NICADDR, rcb->bge_nicaddr);
1367 	/* Reset the standard receive producer ring producer index. */
1368 	bnx_writembx(sc, BGE_MBX_RX_STD_PROD_LO, 0);
1369 
1370 	/*
1371 	 * Initialize the jumbo RX producer ring control
1372 	 * block.  We set the 'ring disabled' bit in the
1373 	 * flags field until we're actually ready to start
1374 	 * using this ring (i.e. once we set the MTU
1375 	 * high enough to require it).
1376 	 */
1377 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1378 		rcb = &sc->bnx_ldata.bnx_info.bnx_jumbo_rx_rcb;
1379 		/* Get the jumbo receive producer ring RCB parameters. */
1380 		rcb->bge_hostaddr.bge_addr_lo =
1381 		    BGE_ADDR_LO(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1382 		rcb->bge_hostaddr.bge_addr_hi =
1383 		    BGE_ADDR_HI(sc->bnx_ldata.bnx_rx_jumbo_ring_paddr);
1384 		rcb->bge_maxlen_flags =
1385 		    BGE_RCB_MAXLEN_FLAGS(BNX_MAX_FRAMELEN,
1386 		    BGE_RCB_FLAG_RING_DISABLED);
1387 		if (BNX_IS_5717_PLUS(sc))
1388 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS_5717;
1389 		else
1390 			rcb->bge_nicaddr = BGE_JUMBO_RX_RINGS;
1391 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_HI,
1392 		    rcb->bge_hostaddr.bge_addr_hi);
1393 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_HADDR_LO,
1394 		    rcb->bge_hostaddr.bge_addr_lo);
1395 		/* Program the jumbo receive producer ring RCB parameters. */
1396 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_MAXLEN_FLAGS,
1397 		    rcb->bge_maxlen_flags);
1398 		CSR_WRITE_4(sc, BGE_RX_JUMBO_RCB_NICADDR, rcb->bge_nicaddr);
1399 		/* Reset the jumbo receive producer ring producer index. */
1400 		bnx_writembx(sc, BGE_MBX_RX_JUMBO_PROD_LO, 0);
1401 	}
1402 
1403 	/*
1404 	 * The BD ring replenish thresholds control how often the
1405 	 * hardware fetches new BD's from the producer rings in host
1406 	 * memory.  Setting the value too low on a busy system can
1407 	 * starve the hardware and recue the throughpout.
1408 	 *
1409 	 * Set the BD ring replentish thresholds. The recommended
1410 	 * values are 1/8th the number of descriptors allocated to
1411 	 * each ring.
1412 	 */
1413 	val = 8;
1414 	CSR_WRITE_4(sc, BGE_RBDI_STD_REPL_THRESH, val);
1415 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
1416 		CSR_WRITE_4(sc, BGE_RBDI_JUMBO_REPL_THRESH,
1417 		    BGE_JUMBO_RX_RING_CNT/8);
1418 	}
1419 	if (BNX_IS_57765_PLUS(sc)) {
1420 		CSR_WRITE_4(sc, BGE_STD_REPLENISH_LWM, 32);
1421 		CSR_WRITE_4(sc, BGE_JMB_REPLENISH_LWM, 16);
1422 	}
1423 
1424 	/*
1425 	 * Disable all send rings by setting the 'ring disabled' bit
1426 	 * in the flags field of all the TX send ring control blocks,
1427 	 * located in NIC memory.
1428 	 */
1429 	if (BNX_IS_5717_PLUS(sc))
1430 		limit = 4;
1431 	else if (BNX_IS_57765_FAMILY(sc) ||
1432 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1433 		limit = 2;
1434 	else
1435 		limit = 1;
1436 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1437 	for (i = 0; i < limit; i++) {
1438 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1439 		    BGE_RCB_MAXLEN_FLAGS(0, BGE_RCB_FLAG_RING_DISABLED));
1440 		vrcb += sizeof(struct bge_rcb);
1441 	}
1442 
1443 	/*
1444 	 * Configure send ring RCBs
1445 	 */
1446 	vrcb = BGE_MEMWIN_START + BGE_SEND_RING_RCB;
1447 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
1448 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
1449 
1450 		BGE_HOSTADDR(taddr, txr->bnx_tx_ring_paddr);
1451 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1452 		    taddr.bge_addr_hi);
1453 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1454 		    taddr.bge_addr_lo);
1455 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1456 		    BGE_RCB_MAXLEN_FLAGS(BGE_TX_RING_CNT, 0));
1457 		vrcb += sizeof(struct bge_rcb);
1458 	}
1459 
1460 	/*
1461 	 * Disable all receive return rings by setting the
1462 	 * 'ring disabled' bit in the flags field of all the receive
1463 	 * return ring control blocks, located in NIC memory.
1464 	 */
1465 	if (BNX_IS_5717_PLUS(sc)) {
1466 		/* Should be 17, use 16 until we get an SRAM map. */
1467 		limit = 16;
1468 	} else if (BNX_IS_57765_FAMILY(sc) ||
1469 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1470 		limit = 4;
1471 	} else {
1472 		limit = 1;
1473 	}
1474 	/* Disable all receive return rings. */
1475 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1476 	for (i = 0; i < limit; i++) {
1477 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi, 0);
1478 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo, 0);
1479 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1480 		    BGE_RCB_FLAG_RING_DISABLED);
1481 		bnx_writembx(sc, BGE_MBX_RX_CONS0_LO +
1482 		    (i * (sizeof(uint64_t))), 0);
1483 		vrcb += sizeof(struct bge_rcb);
1484 	}
1485 
1486 	/*
1487 	 * Set up receive return rings.
1488 	 */
1489 	vrcb = BGE_MEMWIN_START + BGE_RX_RETURN_RING_RCB;
1490 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
1491 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
1492 
1493 		BGE_HOSTADDR(taddr, ret->bnx_rx_ret_ring_paddr);
1494 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_hi,
1495 		    taddr.bge_addr_hi);
1496 		RCB_WRITE_4(sc, vrcb, bge_hostaddr.bge_addr_lo,
1497 		    taddr.bge_addr_lo);
1498 		RCB_WRITE_4(sc, vrcb, bge_maxlen_flags,
1499 		    BGE_RCB_MAXLEN_FLAGS(BNX_RETURN_RING_CNT, 0));
1500 		vrcb += sizeof(struct bge_rcb);
1501 	}
1502 
1503 	/* Set random backoff seed for TX */
1504 	CSR_WRITE_4(sc, BGE_TX_RANDOM_BACKOFF,
1505 	    (sc->arpcom.ac_enaddr[0] + sc->arpcom.ac_enaddr[1] +
1506 	     sc->arpcom.ac_enaddr[2] + sc->arpcom.ac_enaddr[3] +
1507 	     sc->arpcom.ac_enaddr[4] + sc->arpcom.ac_enaddr[5]) &
1508 	    BGE_TX_BACKOFF_SEED_MASK);
1509 
1510 	/* Set inter-packet gap */
1511 	val = 0x2620;
1512 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1513 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1514 		val |= CSR_READ_4(sc, BGE_TX_LENGTHS) &
1515 		    (BGE_TXLEN_JMB_FRM_LEN_MSK | BGE_TXLEN_CNT_DN_VAL_MSK);
1516 	}
1517 	CSR_WRITE_4(sc, BGE_TX_LENGTHS, val);
1518 
1519 	/*
1520 	 * Specify which ring to use for packets that don't match
1521 	 * any RX rules.
1522 	 */
1523 	CSR_WRITE_4(sc, BGE_RX_RULES_CFG, 0x08);
1524 
1525 	/*
1526 	 * Configure number of RX lists. One interrupt distribution
1527 	 * list, sixteen active lists, one bad frames class.
1528 	 */
1529 	CSR_WRITE_4(sc, BGE_RXLP_CFG, 0x181);
1530 
1531 	/* Inialize RX list placement stats mask. */
1532 	CSR_WRITE_4(sc, BGE_RXLP_STATS_ENABLE_MASK, 0x007FFFFF);
1533 	CSR_WRITE_4(sc, BGE_RXLP_STATS_CTL, 0x1);
1534 
1535 	/* Disable host coalescing until we get it set up */
1536 	CSR_WRITE_4(sc, BGE_HCC_MODE, 0x00000000);
1537 
1538 	/* Poll to make sure it's shut down. */
1539 	for (i = 0; i < BNX_TIMEOUT; i++) {
1540 		if (!(CSR_READ_4(sc, BGE_HCC_MODE) & BGE_HCCMODE_ENABLE))
1541 			break;
1542 		DELAY(10);
1543 	}
1544 
1545 	if (i == BNX_TIMEOUT) {
1546 		if_printf(&sc->arpcom.ac_if,
1547 			  "host coalescing engine failed to idle\n");
1548 		return(ENXIO);
1549 	}
1550 
1551 	/* Set up host coalescing defaults */
1552 	sc->bnx_coal_chg = BNX_RX_COAL_TICKS_CHG |
1553 	    BNX_TX_COAL_TICKS_CHG |
1554 	    BNX_RX_COAL_BDS_CHG |
1555 	    BNX_TX_COAL_BDS_CHG |
1556 	    BNX_RX_COAL_BDS_INT_CHG |
1557 	    BNX_TX_COAL_BDS_INT_CHG;
1558 	bnx_coal_change(sc);
1559 
1560 	/*
1561 	 * Set up addresses of status blocks
1562 	 */
1563 	intr = &sc->bnx_intr_data[0];
1564 	bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1565 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_HI,
1566 	    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1567 	CSR_WRITE_4(sc, BGE_HCC_STATUSBLK_ADDR_LO,
1568 	    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1569 	for (i = 1; i < sc->bnx_intr_cnt; ++i) {
1570 		intr = &sc->bnx_intr_data[i];
1571 		bzero(intr->bnx_status_block, BGE_STATUS_BLK_SZ);
1572 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_HI + ((i - 1) * 8),
1573 		    BGE_ADDR_HI(intr->bnx_status_block_paddr));
1574 		CSR_WRITE_4(sc, BGE_VEC1_STATUSBLK_ADDR_LO + ((i - 1) * 8),
1575 		    BGE_ADDR_LO(intr->bnx_status_block_paddr));
1576 	}
1577 
1578 	/* Set up status block partail update size. */
1579 	val = BGE_STATBLKSZ_32BYTE;
1580 #if 0
1581 	/*
1582 	 * Does not seem to have visible effect in both
1583 	 * bulk data (1472B UDP datagram) and tiny data
1584 	 * (18B UDP datagram) TX tests.
1585 	 */
1586 	val |= BGE_HCCMODE_CLRTICK_TX;
1587 #endif
1588 	/* Turn on host coalescing state machine */
1589 	CSR_WRITE_4(sc, BGE_HCC_MODE, val | BGE_HCCMODE_ENABLE);
1590 
1591 	/* Turn on RX BD completion state machine and enable attentions */
1592 	CSR_WRITE_4(sc, BGE_RBDC_MODE,
1593 	    BGE_RBDCMODE_ENABLE|BGE_RBDCMODE_ATTN);
1594 
1595 	/* Turn on RX list placement state machine */
1596 	CSR_WRITE_4(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
1597 
1598 	val = BGE_MACMODE_TXDMA_ENB | BGE_MACMODE_RXDMA_ENB |
1599 	    BGE_MACMODE_RX_STATS_CLEAR | BGE_MACMODE_TX_STATS_CLEAR |
1600 	    BGE_MACMODE_RX_STATS_ENB | BGE_MACMODE_TX_STATS_ENB |
1601 	    BGE_MACMODE_FRMHDR_DMA_ENB;
1602 
1603 	if (sc->bnx_flags & BNX_FLAG_TBI)
1604 		val |= BGE_PORTMODE_TBI;
1605 	else if (sc->bnx_flags & BNX_FLAG_MII_SERDES)
1606 		val |= BGE_PORTMODE_GMII;
1607 	else
1608 		val |= BGE_PORTMODE_MII;
1609 
1610 	/* Allow APE to send/receive frames. */
1611 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
1612 		val |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
1613 
1614 	/* Turn on DMA, clear stats */
1615 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
1616 	DELAY(40);
1617 
1618 	/* Set misc. local control, enable interrupts on attentions */
1619 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_ONATTN);
1620 
1621 #ifdef notdef
1622 	/* Assert GPIO pins for PHY reset */
1623 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUT0|
1624 	    BGE_MLC_MISCIO_OUT1|BGE_MLC_MISCIO_OUT2);
1625 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_MISCIO_OUTEN0|
1626 	    BGE_MLC_MISCIO_OUTEN1|BGE_MLC_MISCIO_OUTEN2);
1627 #endif
1628 
1629 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSIX)
1630 		bnx_enable_msi(sc, TRUE);
1631 
1632 	/* Turn on write DMA state machine */
1633 	val = BGE_WDMAMODE_ENABLE|BGE_WDMAMODE_ALL_ATTNS;
1634 	/* Enable host coalescing bug fix. */
1635 	val |= BGE_WDMAMODE_STATUS_TAG_FIX;
1636 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5785) {
1637 		/* Request larger DMA burst size to get better performance. */
1638 		val |= BGE_WDMAMODE_BURST_ALL_DATA;
1639 	}
1640 	CSR_WRITE_4(sc, BGE_WDMA_MODE, val);
1641 	DELAY(40);
1642 
1643 	if (BNX_IS_57765_PLUS(sc)) {
1644 		uint32_t dmactl, dmactl_reg;
1645 
1646 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1647 			dmactl_reg = BGE_RDMA_RSRVCTRL2;
1648 		else
1649 			dmactl_reg = BGE_RDMA_RSRVCTRL;
1650 
1651 		dmactl = CSR_READ_4(sc, dmactl_reg);
1652 		/*
1653 		 * Adjust tx margin to prevent TX data corruption and
1654 		 * fix internal FIFO overflow.
1655 		 */
1656 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1657 		    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1658 		    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1659 			dmactl &= ~(BGE_RDMA_RSRVCTRL_FIFO_LWM_MASK |
1660 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_MASK |
1661 			    BGE_RDMA_RSRVCTRL_TXMRGN_MASK);
1662 			dmactl |= BGE_RDMA_RSRVCTRL_FIFO_LWM_1_5K |
1663 			    BGE_RDMA_RSRVCTRL_FIFO_HWM_1_5K |
1664 			    BGE_RDMA_RSRVCTRL_TXMRGN_320B;
1665 		}
1666 		/*
1667 		 * Enable fix for read DMA FIFO overruns.
1668 		 * The fix is to limit the number of RX BDs
1669 		 * the hardware would fetch at a fime.
1670 		 */
1671 		CSR_WRITE_4(sc, dmactl_reg,
1672 		    dmactl | BGE_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
1673 	}
1674 
1675 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719) {
1676 		CSR_WRITE_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL,
1677 		    CSR_READ_4(sc, BGE_RDMA_LSO_CRPTEN_CTRL) |
1678 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_4K |
1679 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1680 	} else if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1681 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1682 		uint32_t ctrl_reg;
1683 
1684 		if (sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1685 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL2;
1686 		else
1687 			ctrl_reg = BGE_RDMA_LSO_CRPTEN_CTRL;
1688 
1689 		/*
1690 		 * Allow 4KB burst length reads for non-LSO frames.
1691 		 * Enable 512B burst length reads for buffer descriptors.
1692 		 */
1693 		CSR_WRITE_4(sc, ctrl_reg,
1694 		    CSR_READ_4(sc, ctrl_reg) |
1695 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_BD_512 |
1696 		    BGE_RDMA_LSO_CRPTEN_CTRL_BLEN_LSO_4K);
1697 	}
1698 
1699 	/* Turn on read DMA state machine */
1700 	val = BGE_RDMAMODE_ENABLE | BGE_RDMAMODE_ALL_ATTNS;
1701 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717)
1702 		val |= BGE_RDMAMODE_MULT_DMA_RD_DIS;
1703         if (sc->bnx_asicrev == BGE_ASICREV_BCM5784 ||
1704             sc->bnx_asicrev == BGE_ASICREV_BCM5785 ||
1705             sc->bnx_asicrev == BGE_ASICREV_BCM57780) {
1706 		val |= BGE_RDMAMODE_BD_SBD_CRPT_ATTN |
1707 		    BGE_RDMAMODE_MBUF_RBD_CRPT_ATTN |
1708 		    BGE_RDMAMODE_MBUF_SBD_CRPT_ATTN;
1709 	}
1710 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1711 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
1712 		val |= CSR_READ_4(sc, BGE_RDMA_MODE) &
1713 		    BGE_RDMAMODE_H2BNC_VLAN_DET;
1714 		/*
1715 		 * Allow multiple outstanding read requests from
1716 		 * non-LSO read DMA engine.
1717 		 */
1718 		val &= ~BGE_RDMAMODE_MULT_DMA_RD_DIS;
1719 	}
1720 	if (sc->bnx_asicrev == BGE_ASICREV_BCM57766)
1721 		val |= BGE_RDMAMODE_JMB_2K_MMRR;
1722 	if (sc->bnx_flags & BNX_FLAG_TSO)
1723 		val |= BGE_RDMAMODE_TSO4_ENABLE;
1724 	val |= BGE_RDMAMODE_FIFO_LONG_BURST;
1725 	CSR_WRITE_4(sc, BGE_RDMA_MODE, val);
1726 	DELAY(40);
1727 
1728 	/* Turn on RX data completion state machine */
1729 	CSR_WRITE_4(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
1730 
1731 	/* Turn on RX BD initiator state machine */
1732 	CSR_WRITE_4(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
1733 
1734 	/* Turn on RX data and RX BD initiator state machine */
1735 	CSR_WRITE_4(sc, BGE_RDBDI_MODE, BGE_RDBDIMODE_ENABLE);
1736 
1737 	/* Turn on send BD completion state machine */
1738 	CSR_WRITE_4(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
1739 
1740 	/* Turn on send data completion state machine */
1741 	val = BGE_SDCMODE_ENABLE;
1742 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5761)
1743 		val |= BGE_SDCMODE_CDELAY;
1744 	CSR_WRITE_4(sc, BGE_SDC_MODE, val);
1745 
1746 	/* Turn on send data initiator state machine */
1747 	if (sc->bnx_flags & BNX_FLAG_TSO) {
1748 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE |
1749 		    BGE_SDIMODE_HW_LSO_PRE_DMA);
1750 	} else {
1751 		CSR_WRITE_4(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
1752 	}
1753 
1754 	/* Turn on send BD initiator state machine */
1755 	val = BGE_SBDIMODE_ENABLE;
1756 	if (sc->bnx_tx_ringcnt > 1)
1757 		val |= BGE_SBDIMODE_MULTI_TXR;
1758 	CSR_WRITE_4(sc, BGE_SBDI_MODE, val);
1759 
1760 	/* Turn on send BD selector state machine */
1761 	CSR_WRITE_4(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
1762 
1763 	CSR_WRITE_4(sc, BGE_SDI_STATS_ENABLE_MASK, 0x007FFFFF);
1764 	CSR_WRITE_4(sc, BGE_SDI_STATS_CTL,
1765 	    BGE_SDISTATSCTL_ENABLE|BGE_SDISTATSCTL_FASTER);
1766 
1767 	/* ack/clear link change events */
1768 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1769 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1770 	    BGE_MACSTAT_LINK_CHANGED);
1771 	CSR_WRITE_4(sc, BGE_MI_STS, 0);
1772 
1773 	/*
1774 	 * Enable attention when the link has changed state for
1775 	 * devices that use auto polling.
1776 	 */
1777 	if (sc->bnx_flags & BNX_FLAG_TBI) {
1778 		CSR_WRITE_4(sc, BGE_MI_STS, BGE_MISTS_LINK);
1779  	} else {
1780 		if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
1781 			CSR_WRITE_4(sc, BGE_MI_MODE, sc->bnx_mi_mode);
1782 			DELAY(80);
1783 		}
1784 	}
1785 
1786 	/*
1787 	 * Clear any pending link state attention.
1788 	 * Otherwise some link state change events may be lost until attention
1789 	 * is cleared by bnx_intr() -> bnx_softc.bnx_link_upd() sequence.
1790 	 * It's not necessary on newer BCM chips - perhaps enabling link
1791 	 * state change attentions implies clearing pending attention.
1792 	 */
1793 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED|
1794 	    BGE_MACSTAT_CFG_CHANGED|BGE_MACSTAT_MI_COMPLETE|
1795 	    BGE_MACSTAT_LINK_CHANGED);
1796 
1797 	/* Enable link state change attentions. */
1798 	BNX_SETBIT(sc, BGE_MAC_EVT_ENB, BGE_EVTENB_LINK_CHANGED);
1799 
1800 	return(0);
1801 }
1802 
1803 /*
1804  * Probe for a Broadcom chip. Check the PCI vendor and device IDs
1805  * against our list and return its name if we find a match. Note
1806  * that since the Broadcom controller contains VPD support, we
1807  * can get the device name string from the controller itself instead
1808  * of the compiled-in string. This is a little slow, but it guarantees
1809  * we'll always announce the right product name.
1810  */
1811 static int
1812 bnx_probe(device_t dev)
1813 {
1814 	const struct bnx_type *t;
1815 	uint16_t product, vendor;
1816 
1817 	if (!pci_is_pcie(dev))
1818 		return ENXIO;
1819 
1820 	product = pci_get_device(dev);
1821 	vendor = pci_get_vendor(dev);
1822 
1823 	for (t = bnx_devs; t->bnx_name != NULL; t++) {
1824 		if (vendor == t->bnx_vid && product == t->bnx_did)
1825 			break;
1826 	}
1827 	if (t->bnx_name == NULL)
1828 		return ENXIO;
1829 
1830 	device_set_desc(dev, t->bnx_name);
1831 	return 0;
1832 }
1833 
1834 static int
1835 bnx_attach(device_t dev)
1836 {
1837 	struct ifnet *ifp;
1838 	struct bnx_softc *sc;
1839 	struct bnx_rx_std_ring *std;
1840 	struct sysctl_ctx_list *ctx;
1841 	struct sysctl_oid_list *tree;
1842 	uint32_t hwcfg = 0;
1843 	int error = 0, rid, capmask, i, std_cpuid, std_cpuid_def;
1844 	uint8_t ether_addr[ETHER_ADDR_LEN];
1845 	uint16_t product;
1846 	uintptr_t mii_priv = 0;
1847 #if defined(BNX_TSO_DEBUG) || defined(BNX_RSS_DEBUG) || defined(BNX_TSS_DEBUG)
1848 	char desc[32];
1849 #endif
1850 #ifdef IFPOLL_ENABLE
1851 	int offset, offset_def;
1852 #endif
1853 
1854 	sc = device_get_softc(dev);
1855 	sc->bnx_dev = dev;
1856 	callout_init_mp(&sc->bnx_tick_timer);
1857 	lwkt_serialize_init(&sc->bnx_jslot_serializer);
1858 	lwkt_serialize_init(&sc->bnx_main_serialize);
1859 
1860 	/* Always setup interrupt mailboxes */
1861 	for (i = 0; i < BNX_INTR_MAX; ++i) {
1862 		callout_init_mp(&sc->bnx_intr_data[i].bnx_intr_timer);
1863 		sc->bnx_intr_data[i].bnx_sc = sc;
1864 		sc->bnx_intr_data[i].bnx_intr_mbx = BGE_MBX_IRQ0_LO + (i * 8);
1865 		sc->bnx_intr_data[i].bnx_intr_rid = -1;
1866 		sc->bnx_intr_data[i].bnx_intr_cpuid = -1;
1867 	}
1868 
1869 	sc->bnx_func_addr = pci_get_function(dev);
1870 	product = pci_get_device(dev);
1871 
1872 #ifndef BURN_BRIDGES
1873 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
1874 		uint32_t irq, mem;
1875 
1876 		irq = pci_read_config(dev, PCIR_INTLINE, 4);
1877 		mem = pci_read_config(dev, BGE_PCI_BAR0, 4);
1878 
1879 		device_printf(dev, "chip is in D%d power mode "
1880 		    "-- setting to D0\n", pci_get_powerstate(dev));
1881 
1882 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
1883 
1884 		pci_write_config(dev, PCIR_INTLINE, irq, 4);
1885 		pci_write_config(dev, BGE_PCI_BAR0, mem, 4);
1886 	}
1887 #endif	/* !BURN_BRIDGE */
1888 
1889 	/*
1890 	 * Map control/status registers.
1891 	 */
1892 	pci_enable_busmaster(dev);
1893 
1894 	rid = BGE_PCI_BAR0;
1895 	sc->bnx_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
1896 	    RF_ACTIVE);
1897 
1898 	if (sc->bnx_res == NULL) {
1899 		device_printf(dev, "couldn't map memory\n");
1900 		return ENXIO;
1901 	}
1902 
1903 	sc->bnx_btag = rman_get_bustag(sc->bnx_res);
1904 	sc->bnx_bhandle = rman_get_bushandle(sc->bnx_res);
1905 
1906 	/* Save various chip information */
1907 	sc->bnx_chipid =
1908 	    pci_read_config(dev, BGE_PCI_MISC_CTL, 4) >>
1909 	    BGE_PCIMISCCTL_ASICREV_SHIFT;
1910 	if (BGE_ASICREV(sc->bnx_chipid) == BGE_ASICREV_USE_PRODID_REG) {
1911 		/* All chips having dedicated ASICREV register have CPMU */
1912 		sc->bnx_flags |= BNX_FLAG_CPMU;
1913 
1914 		switch (product) {
1915 		case PCI_PRODUCT_BROADCOM_BCM5717:
1916 		case PCI_PRODUCT_BROADCOM_BCM5717C:
1917 		case PCI_PRODUCT_BROADCOM_BCM5718:
1918 		case PCI_PRODUCT_BROADCOM_BCM5719:
1919 		case PCI_PRODUCT_BROADCOM_BCM5720_ALT:
1920 		case PCI_PRODUCT_BROADCOM_BCM5725:
1921 		case PCI_PRODUCT_BROADCOM_BCM5727:
1922 		case PCI_PRODUCT_BROADCOM_BCM5762:
1923 			sc->bnx_chipid = pci_read_config(dev,
1924 			    BGE_PCI_GEN2_PRODID_ASICREV, 4);
1925 			break;
1926 
1927 		case PCI_PRODUCT_BROADCOM_BCM57761:
1928 		case PCI_PRODUCT_BROADCOM_BCM57762:
1929 		case PCI_PRODUCT_BROADCOM_BCM57765:
1930 		case PCI_PRODUCT_BROADCOM_BCM57766:
1931 		case PCI_PRODUCT_BROADCOM_BCM57781:
1932 		case PCI_PRODUCT_BROADCOM_BCM57782:
1933 		case PCI_PRODUCT_BROADCOM_BCM57785:
1934 		case PCI_PRODUCT_BROADCOM_BCM57786:
1935 		case PCI_PRODUCT_BROADCOM_BCM57791:
1936 		case PCI_PRODUCT_BROADCOM_BCM57795:
1937 			sc->bnx_chipid = pci_read_config(dev,
1938 			    BGE_PCI_GEN15_PRODID_ASICREV, 4);
1939 			break;
1940 
1941 		default:
1942 			sc->bnx_chipid = pci_read_config(dev,
1943 			    BGE_PCI_PRODID_ASICREV, 4);
1944 			break;
1945 		}
1946 	}
1947 	if (sc->bnx_chipid == BGE_CHIPID_BCM5717_C0)
1948 		sc->bnx_chipid = BGE_CHIPID_BCM5720_A0;
1949 
1950 	sc->bnx_asicrev = BGE_ASICREV(sc->bnx_chipid);
1951 	sc->bnx_chiprev = BGE_CHIPREV(sc->bnx_chipid);
1952 
1953 	switch (sc->bnx_asicrev) {
1954 	case BGE_ASICREV_BCM5717:
1955 	case BGE_ASICREV_BCM5719:
1956 	case BGE_ASICREV_BCM5720:
1957 		sc->bnx_flags |= BNX_FLAG_5717_PLUS | BNX_FLAG_57765_PLUS;
1958 		break;
1959 
1960 	case BGE_ASICREV_BCM5762:
1961 		sc->bnx_flags |= BNX_FLAG_57765_PLUS;
1962 		break;
1963 
1964 	case BGE_ASICREV_BCM57765:
1965 	case BGE_ASICREV_BCM57766:
1966 		sc->bnx_flags |= BNX_FLAG_57765_FAMILY | BNX_FLAG_57765_PLUS;
1967 		break;
1968 	}
1969 
1970 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1971 	    sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
1972 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
1973 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762)
1974 		sc->bnx_flags |= BNX_FLAG_APE;
1975 
1976 	sc->bnx_flags |= BNX_FLAG_TSO;
1977 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 &&
1978 	    sc->bnx_chipid == BGE_CHIPID_BCM5719_A0)
1979 		sc->bnx_flags &= ~BNX_FLAG_TSO;
1980 
1981 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5717 ||
1982 	    BNX_IS_57765_FAMILY(sc)) {
1983 		/*
1984 		 * All BCM57785 and BCM5718 families chips have a bug that
1985 		 * under certain situation interrupt will not be enabled
1986 		 * even if status tag is written to interrupt mailbox.
1987 		 *
1988 		 * While BCM5719 and BCM5720 have a hardware workaround
1989 		 * which could fix the above bug.
1990 		 * See the comment near BGE_PCIDMARWCTL_TAGGED_STATUS_WA in
1991 		 * bnx_chipinit().
1992 		 *
1993 		 * For the rest of the chips in these two families, we will
1994 		 * have to poll the status block at high rate (10ms currently)
1995 		 * to check whether the interrupt is hosed or not.
1996 		 * See bnx_check_intr_*() for details.
1997 		 */
1998 		sc->bnx_flags |= BNX_FLAG_STATUSTAG_BUG;
1999 	}
2000 
2001 	sc->bnx_pciecap = pci_get_pciecap_ptr(sc->bnx_dev);
2002 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
2003 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720)
2004 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_2048);
2005 	else
2006 		pcie_set_max_readrq(dev, PCIEM_DEVCTL_MAX_READRQ_4096);
2007 	device_printf(dev, "CHIP ID 0x%08x; "
2008 		      "ASIC REV 0x%02x; CHIP REV 0x%02x\n",
2009 		      sc->bnx_chipid, sc->bnx_asicrev, sc->bnx_chiprev);
2010 
2011 	/*
2012 	 * Set various PHY quirk flags.
2013 	 */
2014 
2015 	capmask = MII_CAPMASK_DEFAULT;
2016 	if (product == PCI_PRODUCT_BROADCOM_BCM57791 ||
2017 	    product == PCI_PRODUCT_BROADCOM_BCM57795) {
2018 		/* 10/100 only */
2019 		capmask &= ~BMSR_EXTSTAT;
2020 	}
2021 
2022 	mii_priv |= BRGPHY_FLAG_WIRESPEED;
2023 	if (sc->bnx_chipid == BGE_CHIPID_BCM5762_A0)
2024 		mii_priv |= BRGPHY_FLAG_5762_A0;
2025 
2026 	/*
2027 	 * Chips with APE need BAR2 access for APE registers/memory.
2028 	 */
2029 	if (sc->bnx_flags & BNX_FLAG_APE) {
2030 		uint32_t pcistate;
2031 
2032 		rid = PCIR_BAR(2);
2033 		sc->bnx_res2 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
2034 		    RF_ACTIVE);
2035 		if (sc->bnx_res2 == NULL) {
2036 			device_printf(dev, "couldn't map BAR2 memory\n");
2037 			error = ENXIO;
2038 			goto fail;
2039 		}
2040 
2041 		/* Enable APE register/memory access by host driver. */
2042 		pcistate = pci_read_config(dev, BGE_PCI_PCISTATE, 4);
2043 		pcistate |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2044 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2045 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2046 		pci_write_config(dev, BGE_PCI_PCISTATE, pcistate, 4);
2047 
2048 		bnx_ape_lock_init(sc);
2049 		bnx_ape_read_fw_ver(sc);
2050 	}
2051 
2052 	/* Initialize if_name earlier, so if_printf could be used */
2053 	ifp = &sc->arpcom.ac_if;
2054 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2055 
2056 	/*
2057 	 * Try to reset the chip.
2058 	 */
2059 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
2060 	bnx_reset(sc);
2061 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
2062 
2063 	if (bnx_chipinit(sc)) {
2064 		device_printf(dev, "chip initialization failed\n");
2065 		error = ENXIO;
2066 		goto fail;
2067 	}
2068 
2069 	/*
2070 	 * Get station address
2071 	 */
2072 	error = bnx_get_eaddr(sc, ether_addr);
2073 	if (error) {
2074 		device_printf(dev, "failed to read station address\n");
2075 		goto fail;
2076 	}
2077 
2078 	/* Setup RX/TX and interrupt count */
2079 	bnx_setup_ring_cnt(sc);
2080 
2081 	if ((sc->bnx_rx_retcnt == 1 && sc->bnx_tx_ringcnt == 1) ||
2082 	    (sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt > 1)) {
2083 	    	/*
2084 		 * The RX ring and the corresponding TX ring processing
2085 		 * should be on the same CPU, since they share the same
2086 		 * status block.
2087 		 */
2088 		sc->bnx_flags |= BNX_FLAG_RXTX_BUNDLE;
2089 		if (bootverbose)
2090 			device_printf(dev, "RX/TX bundle\n");
2091 		if (sc->bnx_tx_ringcnt > 1) {
2092 			/*
2093 			 * Multiple TX rings do not share status block
2094 			 * with link status, so link status will have
2095 			 * to save its own status_tag.
2096 			 */
2097 			sc->bnx_flags |= BNX_FLAG_STATUS_HASTAG;
2098 			if (bootverbose)
2099 				device_printf(dev, "status needs tag\n");
2100 		}
2101 	} else {
2102 		KKASSERT(sc->bnx_rx_retcnt > 1 && sc->bnx_tx_ringcnt == 1);
2103 		if (bootverbose)
2104 			device_printf(dev, "RX/TX not bundled\n");
2105 	}
2106 
2107 	error = bnx_dma_alloc(dev);
2108 	if (error)
2109 		goto fail;
2110 
2111 #ifdef IFPOLL_ENABLE
2112 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
2113 		/*
2114 		 * NPOLLING RX/TX CPU offset
2115 		 */
2116 		if (sc->bnx_rx_retcnt == ncpus2) {
2117 			offset = 0;
2118 		} else {
2119 			offset_def =
2120 			(sc->bnx_rx_retcnt * device_get_unit(dev)) % ncpus2;
2121 			offset = device_getenv_int(dev, "npoll.offset",
2122 			    offset_def);
2123 			if (offset >= ncpus2 ||
2124 			    offset % sc->bnx_rx_retcnt != 0) {
2125 				device_printf(dev, "invalid npoll.offset %d, "
2126 				    "use %d\n", offset, offset_def);
2127 				offset = offset_def;
2128 			}
2129 		}
2130 		sc->bnx_npoll_rxoff = offset;
2131 		sc->bnx_npoll_txoff = offset;
2132 	} else {
2133 		/*
2134 		 * NPOLLING RX CPU offset
2135 		 */
2136 		if (sc->bnx_rx_retcnt == ncpus2) {
2137 			offset = 0;
2138 		} else {
2139 			offset_def =
2140 			(sc->bnx_rx_retcnt * device_get_unit(dev)) % ncpus2;
2141 			offset = device_getenv_int(dev, "npoll.rxoff",
2142 			    offset_def);
2143 			if (offset >= ncpus2 ||
2144 			    offset % sc->bnx_rx_retcnt != 0) {
2145 				device_printf(dev, "invalid npoll.rxoff %d, "
2146 				    "use %d\n", offset, offset_def);
2147 				offset = offset_def;
2148 			}
2149 		}
2150 		sc->bnx_npoll_rxoff = offset;
2151 
2152 		/*
2153 		 * NPOLLING TX CPU offset
2154 		 */
2155 		offset_def = device_get_unit(dev) % ncpus2;
2156 		offset = device_getenv_int(dev, "npoll.txoff", offset_def);
2157 		if (offset >= ncpus2) {
2158 			device_printf(dev, "invalid npoll.txoff %d, use %d\n",
2159 			    offset, offset_def);
2160 			offset = offset_def;
2161 		}
2162 		sc->bnx_npoll_txoff = offset;
2163 	}
2164 #endif	/* IFPOLL_ENABLE */
2165 
2166 	/*
2167 	 * Allocate interrupt
2168 	 */
2169 	error = bnx_alloc_intr(sc);
2170 	if (error)
2171 		goto fail;
2172 
2173 	/* Setup serializers */
2174 	bnx_setup_serialize(sc);
2175 
2176 	/* Set default tuneable values. */
2177 	sc->bnx_rx_coal_ticks = BNX_RX_COAL_TICKS_DEF;
2178 	sc->bnx_tx_coal_ticks = BNX_TX_COAL_TICKS_DEF;
2179 	sc->bnx_rx_coal_bds = BNX_RX_COAL_BDS_DEF;
2180 	sc->bnx_rx_coal_bds_poll = sc->bnx_rx_ret_ring[0].bnx_rx_cntmax;
2181 	sc->bnx_tx_coal_bds = BNX_TX_COAL_BDS_DEF;
2182 	sc->bnx_tx_coal_bds_poll = BNX_TX_COAL_BDS_POLL_DEF;
2183 	sc->bnx_rx_coal_bds_int = BNX_RX_COAL_BDS_INT_DEF;
2184 	sc->bnx_tx_coal_bds_int = BNX_TX_COAL_BDS_INT_DEF;
2185 
2186 	/* Set up ifnet structure */
2187 	ifp->if_softc = sc;
2188 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2189 	ifp->if_ioctl = bnx_ioctl;
2190 	ifp->if_start = bnx_start;
2191 #ifdef IFPOLL_ENABLE
2192 	ifp->if_npoll = bnx_npoll;
2193 #endif
2194 	ifp->if_init = bnx_init;
2195 	ifp->if_serialize = bnx_serialize;
2196 	ifp->if_deserialize = bnx_deserialize;
2197 	ifp->if_tryserialize = bnx_tryserialize;
2198 #ifdef INVARIANTS
2199 	ifp->if_serialize_assert = bnx_serialize_assert;
2200 #endif
2201 	ifp->if_mtu = ETHERMTU;
2202 	ifp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2203 
2204 	ifp->if_capabilities |= IFCAP_HWCSUM;
2205 	ifp->if_hwassist = BNX_CSUM_FEATURES;
2206 	if (sc->bnx_flags & BNX_FLAG_TSO) {
2207 		ifp->if_capabilities |= IFCAP_TSO;
2208 		ifp->if_hwassist |= CSUM_TSO;
2209 	}
2210 	if (BNX_RSS_ENABLED(sc))
2211 		ifp->if_capabilities |= IFCAP_RSS;
2212 	ifp->if_capenable = ifp->if_capabilities;
2213 
2214 	ifp->if_nmbclusters = BGE_STD_RX_RING_CNT;
2215 
2216 	ifq_set_maxlen(&ifp->if_snd, BGE_TX_RING_CNT - 1);
2217 	ifq_set_ready(&ifp->if_snd);
2218 	ifq_set_subq_cnt(&ifp->if_snd, sc->bnx_tx_ringcnt);
2219 
2220 	if (sc->bnx_tx_ringcnt > 1) {
2221 		ifp->if_mapsubq = ifq_mapsubq_mask;
2222 		ifq_set_subq_mask(&ifp->if_snd, sc->bnx_tx_ringcnt - 1);
2223 	}
2224 
2225 	/*
2226 	 * Figure out what sort of media we have by checking the
2227 	 * hardware config word in the first 32k of NIC internal memory,
2228 	 * or fall back to examining the EEPROM if necessary.
2229 	 * Note: on some BCM5700 cards, this value appears to be unset.
2230 	 * If that's the case, we have to rely on identifying the NIC
2231 	 * by its PCI subsystem ID, as we do below for the SysKonnect
2232 	 * SK-9D41.
2233 	 */
2234 	if (bnx_readmem_ind(sc, BGE_SRAM_DATA_SIG) == BGE_SRAM_DATA_SIG_MAGIC) {
2235 		hwcfg = bnx_readmem_ind(sc, BGE_SRAM_DATA_CFG);
2236 	} else {
2237 		if (bnx_read_eeprom(sc, (caddr_t)&hwcfg, BGE_EE_HWCFG_OFFSET,
2238 				    sizeof(hwcfg))) {
2239 			device_printf(dev, "failed to read EEPROM\n");
2240 			error = ENXIO;
2241 			goto fail;
2242 		}
2243 		hwcfg = ntohl(hwcfg);
2244 	}
2245 
2246 	/* The SysKonnect SK-9D41 is a 1000baseSX card. */
2247 	if (pci_get_subvendor(dev) == PCI_PRODUCT_SCHNEIDERKOCH_SK_9D41 ||
2248 	    (hwcfg & BGE_HWCFG_MEDIA) == BGE_MEDIA_FIBER)
2249 		sc->bnx_flags |= BNX_FLAG_TBI;
2250 
2251 	/* Setup MI MODE */
2252 	if (sc->bnx_flags & BNX_FLAG_CPMU)
2253 		sc->bnx_mi_mode = BGE_MIMODE_500KHZ_CONST;
2254 	else
2255 		sc->bnx_mi_mode = BGE_MIMODE_BASE;
2256 
2257 	/* Setup link status update stuffs */
2258 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2259 		sc->bnx_link_upd = bnx_tbi_link_upd;
2260 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2261 	} else if (sc->bnx_mi_mode & BGE_MIMODE_AUTOPOLL) {
2262 		sc->bnx_link_upd = bnx_autopoll_link_upd;
2263 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2264 	} else {
2265 		sc->bnx_link_upd = bnx_copper_link_upd;
2266 		sc->bnx_link_chg = BGE_MACSTAT_LINK_CHANGED;
2267 	}
2268 
2269 	/* Set default PHY address */
2270 	sc->bnx_phyno = 1;
2271 
2272 	/*
2273 	 * PHY address mapping for various devices.
2274 	 *
2275 	 *          | F0 Cu | F0 Sr | F1 Cu | F1 Sr |
2276 	 * ---------+-------+-------+-------+-------+
2277 	 * BCM57XX  |   1   |   X   |   X   |   X   |
2278 	 * BCM5717  |   1   |   8   |   2   |   9   |
2279 	 * BCM5719  |   1   |   8   |   2   |   9   |
2280 	 * BCM5720  |   1   |   8   |   2   |   9   |
2281 	 *
2282 	 *          | F2 Cu | F2 Sr | F3 Cu | F3 Sr |
2283 	 * ---------+-------+-------+-------+-------+
2284 	 * BCM57XX  |   X   |   X   |   X   |   X   |
2285 	 * BCM5717  |   X   |   X   |   X   |   X   |
2286 	 * BCM5719  |   3   |   10  |   4   |   11  |
2287 	 * BCM5720  |   X   |   X   |   X   |   X   |
2288 	 *
2289 	 * Other addresses may respond but they are not
2290 	 * IEEE compliant PHYs and should be ignored.
2291 	 */
2292 	if (BNX_IS_5717_PLUS(sc)) {
2293 		if (sc->bnx_chipid == BGE_CHIPID_BCM5717_A0) {
2294 			if (CSR_READ_4(sc, BGE_SGDIG_STS) &
2295 			    BGE_SGDIGSTS_IS_SERDES)
2296 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2297 			else
2298 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2299 		} else {
2300 			if (CSR_READ_4(sc, BGE_CPMU_PHY_STRAP) &
2301 			    BGE_CPMU_PHY_STRAP_IS_SERDES)
2302 				sc->bnx_phyno = sc->bnx_func_addr + 8;
2303 			else
2304 				sc->bnx_phyno = sc->bnx_func_addr + 1;
2305 		}
2306 	}
2307 
2308 	if (sc->bnx_flags & BNX_FLAG_TBI) {
2309 		ifmedia_init(&sc->bnx_ifmedia, IFM_IMASK,
2310 		    bnx_ifmedia_upd, bnx_ifmedia_sts);
2311 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL);
2312 		ifmedia_add(&sc->bnx_ifmedia,
2313 		    IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL);
2314 		ifmedia_add(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
2315 		ifmedia_set(&sc->bnx_ifmedia, IFM_ETHER|IFM_AUTO);
2316 		sc->bnx_ifmedia.ifm_media = sc->bnx_ifmedia.ifm_cur->ifm_media;
2317 	} else {
2318 		struct mii_probe_args mii_args;
2319 
2320 		mii_probe_args_init(&mii_args, bnx_ifmedia_upd, bnx_ifmedia_sts);
2321 		mii_args.mii_probemask = 1 << sc->bnx_phyno;
2322 		mii_args.mii_capmask = capmask;
2323 		mii_args.mii_privtag = MII_PRIVTAG_BRGPHY;
2324 		mii_args.mii_priv = mii_priv;
2325 
2326 		error = mii_probe(dev, &sc->bnx_miibus, &mii_args);
2327 		if (error) {
2328 			device_printf(dev, "MII without any PHY!\n");
2329 			goto fail;
2330 		}
2331 	}
2332 
2333 	ctx = device_get_sysctl_ctx(sc->bnx_dev);
2334 	tree = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->bnx_dev));
2335 
2336 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2337 	    "rx_rings", CTLFLAG_RD, &sc->bnx_rx_retcnt, 0, "# of RX rings");
2338 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2339 	    "tx_rings", CTLFLAG_RD, &sc->bnx_tx_ringcnt, 0, "# of TX rings");
2340 
2341 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_ticks",
2342 			CTLTYPE_INT | CTLFLAG_RW,
2343 			sc, 0, bnx_sysctl_rx_coal_ticks, "I",
2344 			"Receive coalescing ticks (usec).");
2345 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_ticks",
2346 			CTLTYPE_INT | CTLFLAG_RW,
2347 			sc, 0, bnx_sysctl_tx_coal_ticks, "I",
2348 			"Transmit coalescing ticks (usec).");
2349 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds",
2350 			CTLTYPE_INT | CTLFLAG_RW,
2351 			sc, 0, bnx_sysctl_rx_coal_bds, "I",
2352 			"Receive max coalesced BD count.");
2353 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "rx_coal_bds_poll",
2354 			CTLTYPE_INT | CTLFLAG_RW,
2355 			sc, 0, bnx_sysctl_rx_coal_bds_poll, "I",
2356 			"Receive max coalesced BD count in polling.");
2357 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds",
2358 			CTLTYPE_INT | CTLFLAG_RW,
2359 			sc, 0, bnx_sysctl_tx_coal_bds, "I",
2360 			"Transmit max coalesced BD count.");
2361 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO, "tx_coal_bds_poll",
2362 			CTLTYPE_INT | CTLFLAG_RW,
2363 			sc, 0, bnx_sysctl_tx_coal_bds_poll, "I",
2364 			"Transmit max coalesced BD count in polling.");
2365 	/*
2366 	 * A common design characteristic for many Broadcom
2367 	 * client controllers is that they only support a
2368 	 * single outstanding DMA read operation on the PCIe
2369 	 * bus. This means that it will take twice as long to
2370 	 * fetch a TX frame that is split into header and
2371 	 * payload buffers as it does to fetch a single,
2372 	 * contiguous TX frame (2 reads vs. 1 read). For these
2373 	 * controllers, coalescing buffers to reduce the number
2374 	 * of memory reads is effective way to get maximum
2375 	 * performance(about 940Mbps).  Without collapsing TX
2376 	 * buffers the maximum TCP bulk transfer performance
2377 	 * is about 850Mbps. However forcing coalescing mbufs
2378 	 * consumes a lot of CPU cycles, so leave it off by
2379 	 * default.
2380 	 */
2381 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2382 	    "force_defrag", CTLTYPE_INT | CTLFLAG_RW,
2383 	    sc, 0, bnx_sysctl_force_defrag, "I",
2384 	    "Force defragment on TX path");
2385 
2386 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2387 	    "tx_wreg", CTLTYPE_INT | CTLFLAG_RW,
2388 	    sc, 0, bnx_sysctl_tx_wreg, "I",
2389 	    "# of segments before writing to hardware register");
2390 
2391 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2392 	    "std_refill", CTLTYPE_INT | CTLFLAG_RW,
2393 	    sc, 0, bnx_sysctl_std_refill, "I",
2394 	    "# of packets received before scheduling standard refilling");
2395 
2396 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2397 	    "rx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2398 	    sc, 0, bnx_sysctl_rx_coal_bds_int, "I",
2399 	    "Receive max coalesced BD count during interrupt.");
2400 	SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2401 	    "tx_coal_bds_int", CTLTYPE_INT | CTLFLAG_RW,
2402 	    sc, 0, bnx_sysctl_tx_coal_bds_int, "I",
2403 	    "Transmit max coalesced BD count during interrupt.");
2404 
2405 #ifdef IFPOLL_ENABLE
2406 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
2407 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2408 		    "npoll_offset", CTLTYPE_INT | CTLFLAG_RW,
2409 		    sc, 0, bnx_sysctl_npoll_offset, "I",
2410 		    "NPOLLING cpu offset");
2411 	} else {
2412 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2413 		    "npoll_rxoff", CTLTYPE_INT | CTLFLAG_RW,
2414 		    sc, 0, bnx_sysctl_npoll_rxoff, "I",
2415 		    "NPOLLING RX cpu offset");
2416 		SYSCTL_ADD_PROC(ctx, tree, OID_AUTO,
2417 		    "npoll_txoff", CTLTYPE_INT | CTLFLAG_RW,
2418 		    sc, 0, bnx_sysctl_npoll_txoff, "I",
2419 		    "NPOLLING TX cpu offset");
2420 	}
2421 #endif
2422 
2423 #ifdef BNX_RSS_DEBUG
2424 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2425 	    "std_refill_mask", CTLFLAG_RD,
2426 	    &sc->bnx_rx_std_ring.bnx_rx_std_refill, 0, "");
2427 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2428 	    "std_used", CTLFLAG_RD,
2429 	    &sc->bnx_rx_std_ring.bnx_rx_std_used, 0, "");
2430 	SYSCTL_ADD_INT(ctx, tree, OID_AUTO,
2431 	    "rss_debug", CTLFLAG_RW, &sc->bnx_rss_debug, 0, "");
2432 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
2433 		ksnprintf(desc, sizeof(desc), "rx_pkt%d", i);
2434 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2435 		    desc, CTLFLAG_RW, &sc->bnx_rx_ret_ring[i].bnx_rx_pkt, "");
2436 
2437 		ksnprintf(desc, sizeof(desc), "rx_force_sched%d", i);
2438 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2439 		    desc, CTLFLAG_RW,
2440 		    &sc->bnx_rx_ret_ring[i].bnx_rx_force_sched, "");
2441 	}
2442 #endif
2443 #ifdef BNX_TSS_DEBUG
2444 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2445 		ksnprintf(desc, sizeof(desc), "tx_pkt%d", i);
2446 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2447 		    desc, CTLFLAG_RW, &sc->bnx_tx_ring[i].bnx_tx_pkt, "");
2448 	}
2449 #endif
2450 
2451 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2452 	    "norxbds", CTLFLAG_RW, &sc->bnx_norxbds, "");
2453 
2454 	SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2455 	    "errors", CTLFLAG_RW, &sc->bnx_errors, "");
2456 
2457 #ifdef BNX_TSO_DEBUG
2458 	for (i = 0; i < BNX_TSO_NSTATS; ++i) {
2459 		ksnprintf(desc, sizeof(desc), "tso%d", i + 1);
2460 		SYSCTL_ADD_ULONG(ctx, tree, OID_AUTO,
2461 		    desc, CTLFLAG_RW, &sc->bnx_tsosegs[i], "");
2462 	}
2463 #endif
2464 
2465 	/*
2466 	 * Call MI attach routine.
2467 	 */
2468 	ether_ifattach(ifp, ether_addr, NULL);
2469 
2470 	/* Setup TX rings and subqueues */
2471 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
2472 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
2473 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
2474 
2475 		ifsq_set_cpuid(ifsq, txr->bnx_tx_cpuid);
2476 		ifsq_set_hw_serialize(ifsq, &txr->bnx_tx_serialize);
2477 		ifsq_set_priv(ifsq, txr);
2478 		txr->bnx_ifsq = ifsq;
2479 
2480 		ifsq_watchdog_init(&txr->bnx_tx_watchdog, ifsq, bnx_watchdog);
2481 
2482 		if (bootverbose) {
2483 			device_printf(dev, "txr %d -> cpu%d\n", i,
2484 			    txr->bnx_tx_cpuid);
2485 		}
2486 	}
2487 
2488 	error = bnx_setup_intr(sc);
2489 	if (error) {
2490 		ether_ifdetach(ifp);
2491 		goto fail;
2492 	}
2493 	bnx_set_tick_cpuid(sc, FALSE);
2494 
2495 	/*
2496 	 * Create RX standard ring refilling thread
2497 	 */
2498 	std_cpuid_def = device_get_unit(dev) % ncpus;
2499 	std_cpuid = device_getenv_int(dev, "std.cpuid", std_cpuid_def);
2500 	if (std_cpuid < 0 || std_cpuid >= ncpus) {
2501 		device_printf(dev, "invalid std.cpuid %d, use %d\n",
2502 		    std_cpuid, std_cpuid_def);
2503 		std_cpuid = std_cpuid_def;
2504 	}
2505 
2506 	std = &sc->bnx_rx_std_ring;
2507 	lwkt_create(bnx_rx_std_refill_ithread, std, &std->bnx_rx_std_ithread,
2508 	    NULL, TDF_NOSTART | TDF_INTTHREAD, std_cpuid,
2509 	    "%s std", device_get_nameunit(dev));
2510 	lwkt_setpri(std->bnx_rx_std_ithread, TDPRI_INT_MED);
2511 	std->bnx_rx_std_ithread->td_preemptable = lwkt_preempt;
2512 
2513 	return(0);
2514 fail:
2515 	bnx_detach(dev);
2516 	return(error);
2517 }
2518 
2519 static int
2520 bnx_detach(device_t dev)
2521 {
2522 	struct bnx_softc *sc = device_get_softc(dev);
2523 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
2524 
2525 	if (device_is_attached(dev)) {
2526 		struct ifnet *ifp = &sc->arpcom.ac_if;
2527 
2528 		ifnet_serialize_all(ifp);
2529 		bnx_stop(sc);
2530 		bnx_teardown_intr(sc, sc->bnx_intr_cnt);
2531 		ifnet_deserialize_all(ifp);
2532 
2533 		ether_ifdetach(ifp);
2534 	}
2535 
2536 	if (std->bnx_rx_std_ithread != NULL) {
2537 		tsleep_interlock(std, 0);
2538 
2539 		if (std->bnx_rx_std_ithread->td_gd == mycpu) {
2540 			bnx_rx_std_refill_stop(std);
2541 		} else {
2542 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
2543 			    bnx_rx_std_refill_stop, std);
2544 		}
2545 
2546 		tsleep(std, PINTERLOCKED, "bnx_detach", 0);
2547 		if (bootverbose)
2548 			device_printf(dev, "RX std ithread exited\n");
2549 
2550 		lwkt_synchronize_ipiqs("bnx_detach_ipiq");
2551 	}
2552 
2553 	if (sc->bnx_flags & BNX_FLAG_TBI)
2554 		ifmedia_removeall(&sc->bnx_ifmedia);
2555 	if (sc->bnx_miibus)
2556 		device_delete_child(dev, sc->bnx_miibus);
2557 	bus_generic_detach(dev);
2558 
2559 	bnx_free_intr(sc);
2560 
2561 	if (sc->bnx_msix_mem_res != NULL) {
2562 		bus_release_resource(dev, SYS_RES_MEMORY, sc->bnx_msix_mem_rid,
2563 		    sc->bnx_msix_mem_res);
2564 	}
2565 	if (sc->bnx_res != NULL) {
2566 		bus_release_resource(dev, SYS_RES_MEMORY,
2567 		    BGE_PCI_BAR0, sc->bnx_res);
2568 	}
2569 	if (sc->bnx_res2 != NULL) {
2570 		bus_release_resource(dev, SYS_RES_MEMORY,
2571 		    PCIR_BAR(2), sc->bnx_res2);
2572 	}
2573 
2574 	bnx_dma_free(sc);
2575 
2576 	if (sc->bnx_serialize != NULL)
2577 		kfree(sc->bnx_serialize, M_DEVBUF);
2578 
2579 	return 0;
2580 }
2581 
2582 static void
2583 bnx_reset(struct bnx_softc *sc)
2584 {
2585 	device_t dev = sc->bnx_dev;
2586 	uint32_t cachesize, command, reset, mac_mode, mac_mode_mask;
2587 	void (*write_op)(struct bnx_softc *, uint32_t, uint32_t);
2588 	int i, val = 0;
2589 	uint16_t devctl;
2590 
2591 	mac_mode_mask = BGE_MACMODE_HALF_DUPLEX | BGE_MACMODE_PORTMODE;
2592 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE)
2593 		mac_mode_mask |= BGE_MACMODE_APE_RX_EN | BGE_MACMODE_APE_TX_EN;
2594 	mac_mode = CSR_READ_4(sc, BGE_MAC_MODE) & mac_mode_mask;
2595 
2596 	write_op = bnx_writemem_direct;
2597 
2598 	CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1);
2599 	for (i = 0; i < 8000; i++) {
2600 		if (CSR_READ_4(sc, BGE_NVRAM_SWARB) & BGE_NVRAMSWARB_GNT1)
2601 			break;
2602 		DELAY(20);
2603 	}
2604 	if (i == 8000)
2605 		if_printf(&sc->arpcom.ac_if, "NVRAM lock timedout!\n");
2606 
2607 	/* Take APE lock when performing reset. */
2608 	bnx_ape_lock(sc, BGE_APE_LOCK_GRC);
2609 
2610 	/* Save some important PCI state. */
2611 	cachesize = pci_read_config(dev, BGE_PCI_CACHESZ, 4);
2612 	command = pci_read_config(dev, BGE_PCI_CMD, 4);
2613 
2614 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2615 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2616 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2617 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2618 
2619 	/* Disable fastboot on controllers that support it. */
2620 	if (bootverbose)
2621 		if_printf(&sc->arpcom.ac_if, "Disabling fastboot\n");
2622 	CSR_WRITE_4(sc, BGE_FASTBOOT_PC, 0x0);
2623 
2624 	/*
2625 	 * Write the magic number to SRAM at offset 0xB50.
2626 	 * When firmware finishes its initialization it will
2627 	 * write ~BGE_SRAM_FW_MB_MAGIC to the same location.
2628 	 */
2629 	bnx_writemem_ind(sc, BGE_SRAM_FW_MB, BGE_SRAM_FW_MB_MAGIC);
2630 
2631 	reset = BGE_MISCCFG_RESET_CORE_CLOCKS|(65<<1);
2632 
2633 	/* XXX: Broadcom Linux driver. */
2634 	/* Force PCI-E 1.0a mode */
2635 	if (!BNX_IS_57765_PLUS(sc) &&
2636 	    CSR_READ_4(sc, BGE_PCIE_PHY_TSTCTL) ==
2637 	    (BGE_PCIE_PHY_TSTCTL_PSCRAM |
2638 	     BGE_PCIE_PHY_TSTCTL_PCIE10)) {
2639 		CSR_WRITE_4(sc, BGE_PCIE_PHY_TSTCTL,
2640 		    BGE_PCIE_PHY_TSTCTL_PSCRAM);
2641 	}
2642 	if (sc->bnx_chipid != BGE_CHIPID_BCM5750_A0) {
2643 		/* Prevent PCIE link training during global reset */
2644 		CSR_WRITE_4(sc, BGE_MISC_CFG, (1<<29));
2645 		reset |= (1<<29);
2646 	}
2647 
2648 	/*
2649 	 * Set GPHY Power Down Override to leave GPHY
2650 	 * powered up in D0 uninitialized.
2651 	 */
2652 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0)
2653 		reset |= BGE_MISCCFG_GPHY_PD_OVERRIDE;
2654 
2655 	/* Issue global reset */
2656 	write_op(sc, BGE_MISC_CFG, reset);
2657 
2658 	DELAY(100 * 1000);
2659 
2660 	/* XXX: Broadcom Linux driver. */
2661 	if (sc->bnx_chipid == BGE_CHIPID_BCM5750_A0) {
2662 		uint32_t v;
2663 
2664 		DELAY(500000); /* wait for link training to complete */
2665 		v = pci_read_config(dev, 0xc4, 4);
2666 		pci_write_config(dev, 0xc4, v | (1<<15), 4);
2667 	}
2668 
2669 	devctl = pci_read_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL, 2);
2670 
2671 	/* Disable no snoop and disable relaxed ordering. */
2672 	devctl &= ~(PCIEM_DEVCTL_RELAX_ORDER | PCIEM_DEVCTL_NOSNOOP);
2673 
2674 	/* Old PCI-E chips only support 128 bytes Max PayLoad Size. */
2675 	if ((sc->bnx_flags & BNX_FLAG_CPMU) == 0) {
2676 		devctl &= ~PCIEM_DEVCTL_MAX_PAYLOAD_MASK;
2677 		devctl |= PCIEM_DEVCTL_MAX_PAYLOAD_128;
2678 	}
2679 
2680 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVCTRL,
2681 	    devctl, 2);
2682 
2683 	/* Clear error status. */
2684 	pci_write_config(dev, sc->bnx_pciecap + PCIER_DEVSTS,
2685 	    PCIEM_DEVSTS_CORR_ERR |
2686 	    PCIEM_DEVSTS_NFATAL_ERR |
2687 	    PCIEM_DEVSTS_FATAL_ERR |
2688 	    PCIEM_DEVSTS_UNSUPP_REQ, 2);
2689 
2690 	/* Reset some of the PCI state that got zapped by reset */
2691 	pci_write_config(dev, BGE_PCI_MISC_CTL,
2692 	    BGE_PCIMISCCTL_INDIRECT_ACCESS|BGE_PCIMISCCTL_MASK_PCI_INTR|
2693 	    BGE_HIF_SWAP_OPTIONS|BGE_PCIMISCCTL_PCISTATE_RW|
2694 	    BGE_PCIMISCCTL_TAGGED_STATUS, 4);
2695 	val = BGE_PCISTATE_ROM_ENABLE | BGE_PCISTATE_ROM_RETRY_ENABLE;
2696 	if (sc->bnx_mfw_flags & BNX_MFW_ON_APE) {
2697 		val |= BGE_PCISTATE_ALLOW_APE_CTLSPC_WR |
2698 		    BGE_PCISTATE_ALLOW_APE_SHMEM_WR |
2699 		    BGE_PCISTATE_ALLOW_APE_PSPACE_WR;
2700 	}
2701 	pci_write_config(dev, BGE_PCI_PCISTATE, val, 4);
2702 	pci_write_config(dev, BGE_PCI_CACHESZ, cachesize, 4);
2703 	pci_write_config(dev, BGE_PCI_CMD, command, 4);
2704 
2705 	/* Enable memory arbiter */
2706 	CSR_WRITE_4(sc, BGE_MARB_MODE, BGE_MARBMODE_ENABLE);
2707 
2708 	/* Fix up byte swapping */
2709 	CSR_WRITE_4(sc, BGE_MODE_CTL, bnx_dma_swap_options(sc));
2710 
2711 	val = CSR_READ_4(sc, BGE_MAC_MODE);
2712 	val = (val & ~mac_mode_mask) | mac_mode;
2713 	CSR_WRITE_4(sc, BGE_MAC_MODE, val);
2714 	DELAY(40);
2715 
2716 	bnx_ape_unlock(sc, BGE_APE_LOCK_GRC);
2717 
2718 	/*
2719 	 * Poll until we see the 1's complement of the magic number.
2720 	 * This indicates that the firmware initialization is complete.
2721 	 */
2722 	for (i = 0; i < BNX_FIRMWARE_TIMEOUT; i++) {
2723 		val = bnx_readmem_ind(sc, BGE_SRAM_FW_MB);
2724 		if (val == ~BGE_SRAM_FW_MB_MAGIC)
2725 			break;
2726 		DELAY(10);
2727 	}
2728 	if (i == BNX_FIRMWARE_TIMEOUT) {
2729 		if_printf(&sc->arpcom.ac_if, "firmware handshake "
2730 			  "timed out, found 0x%08x\n", val);
2731 	}
2732 
2733 	/* BCM57765 A0 needs additional time before accessing. */
2734 	if (sc->bnx_chipid == BGE_CHIPID_BCM57765_A0)
2735 		DELAY(10 * 1000);
2736 
2737 	/*
2738 	 * The 5704 in TBI mode apparently needs some special
2739 	 * adjustment to insure the SERDES drive level is set
2740 	 * to 1.2V.
2741 	 */
2742 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5704 &&
2743 	    (sc->bnx_flags & BNX_FLAG_TBI)) {
2744 		uint32_t serdescfg;
2745 
2746 		serdescfg = CSR_READ_4(sc, BGE_SERDES_CFG);
2747 		serdescfg = (serdescfg & ~0xFFF) | 0x880;
2748 		CSR_WRITE_4(sc, BGE_SERDES_CFG, serdescfg);
2749 	}
2750 
2751 	CSR_WRITE_4(sc, BGE_MI_MODE,
2752 	    sc->bnx_mi_mode & ~BGE_MIMODE_AUTOPOLL);
2753 	DELAY(80);
2754 
2755 	/* XXX: Broadcom Linux driver. */
2756 	if (!BNX_IS_57765_PLUS(sc)) {
2757 		uint32_t v;
2758 
2759 		/* Enable Data FIFO protection. */
2760 		v = CSR_READ_4(sc, BGE_PCIE_TLDLPL_PORT);
2761 		CSR_WRITE_4(sc, BGE_PCIE_TLDLPL_PORT, v | (1 << 25));
2762 	}
2763 
2764 	DELAY(10000);
2765 
2766 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
2767 		BNX_CLRBIT(sc, BGE_CPMU_CLCK_ORIDE,
2768 		    CPMU_CLCK_ORIDE_MAC_ORIDE_EN);
2769 	}
2770 }
2771 
2772 /*
2773  * Frame reception handling. This is called if there's a frame
2774  * on the receive return list.
2775  *
2776  * Note: we have to be able to handle two possibilities here:
2777  * 1) the frame is from the jumbo recieve ring
2778  * 2) the frame is from the standard receive ring
2779  */
2780 
2781 static void
2782 bnx_rxeof(struct bnx_rx_ret_ring *ret, uint16_t rx_prod, int count)
2783 {
2784 	struct bnx_softc *sc = ret->bnx_sc;
2785 	struct bnx_rx_std_ring *std = ret->bnx_std;
2786 	struct ifnet *ifp = &sc->arpcom.ac_if;
2787 	int std_used = 0, cpuid = mycpuid;
2788 
2789 	while (ret->bnx_rx_saved_considx != rx_prod && count != 0) {
2790 		struct pktinfo pi0, *pi = NULL;
2791 		struct bge_rx_bd *cur_rx;
2792 		struct bnx_rx_buf *rb;
2793 		uint32_t rxidx;
2794 		struct mbuf *m = NULL;
2795 		uint16_t vlan_tag = 0;
2796 		int have_tag = 0;
2797 
2798 		--count;
2799 
2800 		cur_rx = &ret->bnx_rx_ret_ring[ret->bnx_rx_saved_considx];
2801 
2802 		rxidx = cur_rx->bge_idx;
2803 		KKASSERT(rxidx < BGE_STD_RX_RING_CNT);
2804 
2805 		BNX_INC(ret->bnx_rx_saved_considx, BNX_RETURN_RING_CNT);
2806 #ifdef BNX_RSS_DEBUG
2807 		ret->bnx_rx_pkt++;
2808 #endif
2809 
2810 		if (cur_rx->bge_flags & BGE_RXBDFLAG_VLAN_TAG) {
2811 			have_tag = 1;
2812 			vlan_tag = cur_rx->bge_vlan_tag;
2813 		}
2814 
2815 		if (ret->bnx_rx_cnt >= ret->bnx_rx_cntmax) {
2816 			atomic_add_int(&std->bnx_rx_std_used, std_used);
2817 			std_used = 0;
2818 
2819 			bnx_rx_std_refill_sched(ret, std);
2820 		}
2821 		ret->bnx_rx_cnt++;
2822 		++std_used;
2823 
2824 		rb = &std->bnx_rx_std_buf[rxidx];
2825 		m = rb->bnx_rx_mbuf;
2826 		if (cur_rx->bge_flags & BGE_RXBDFLAG_ERROR) {
2827 			IFNET_STAT_INC(ifp, ierrors, 1);
2828 			cpu_sfence();
2829 			rb->bnx_rx_refilled = 1;
2830 			continue;
2831 		}
2832 		if (bnx_newbuf_std(ret, rxidx, 0)) {
2833 			IFNET_STAT_INC(ifp, ierrors, 1);
2834 			continue;
2835 		}
2836 
2837 		IFNET_STAT_INC(ifp, ipackets, 1);
2838 		m->m_pkthdr.len = m->m_len = cur_rx->bge_len - ETHER_CRC_LEN;
2839 		m->m_pkthdr.rcvif = ifp;
2840 
2841 		if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2842 		    (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6) == 0) {
2843 			if (cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) {
2844 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2845 				if ((cur_rx->bge_error_flag &
2846 				    BGE_RXERRFLAG_IP_CSUM_NOK) == 0)
2847 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2848 			}
2849 			if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) {
2850 				m->m_pkthdr.csum_data =
2851 				    cur_rx->bge_tcp_udp_csum;
2852 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2853 				    CSUM_PSEUDO_HDR;
2854 			}
2855 		}
2856 		if (ifp->if_capenable & IFCAP_RSS) {
2857 			pi = bnx_rss_info(&pi0, cur_rx);
2858 			if (pi != NULL &&
2859 			    (cur_rx->bge_flags & BGE_RXBDFLAG_RSS_HASH))
2860 				m_sethash(m, toeplitz_hash(cur_rx->bge_hash));
2861 		}
2862 
2863 		/*
2864 		 * If we received a packet with a vlan tag, pass it
2865 		 * to vlan_input() instead of ether_input().
2866 		 */
2867 		if (have_tag) {
2868 			m->m_flags |= M_VLANTAG;
2869 			m->m_pkthdr.ether_vlantag = vlan_tag;
2870 		}
2871 		ifp->if_input(ifp, m, pi, cpuid);
2872 	}
2873 	bnx_writembx(sc, ret->bnx_rx_mbx, ret->bnx_rx_saved_considx);
2874 
2875 	if (std_used > 0) {
2876 		int cur_std_used;
2877 
2878 		cur_std_used = atomic_fetchadd_int(&std->bnx_rx_std_used,
2879 		    std_used);
2880 		if (cur_std_used + std_used >= (BGE_STD_RX_RING_CNT / 2)) {
2881 #ifdef BNX_RSS_DEBUG
2882 			ret->bnx_rx_force_sched++;
2883 #endif
2884 			bnx_rx_std_refill_sched(ret, std);
2885 		}
2886 	}
2887 }
2888 
2889 static void
2890 bnx_txeof(struct bnx_tx_ring *txr, uint16_t tx_cons)
2891 {
2892 	struct ifnet *ifp = &txr->bnx_sc->arpcom.ac_if;
2893 
2894 	/*
2895 	 * Go through our tx ring and free mbufs for those
2896 	 * frames that have been sent.
2897 	 */
2898 	while (txr->bnx_tx_saved_considx != tx_cons) {
2899 		struct bnx_tx_buf *buf;
2900 		uint32_t idx = 0;
2901 
2902 		idx = txr->bnx_tx_saved_considx;
2903 		buf = &txr->bnx_tx_buf[idx];
2904 		if (buf->bnx_tx_mbuf != NULL) {
2905 			IFNET_STAT_INC(ifp, opackets, 1);
2906 #ifdef BNX_TSS_DEBUG
2907 			txr->bnx_tx_pkt++;
2908 #endif
2909 			bus_dmamap_unload(txr->bnx_tx_mtag,
2910 			    buf->bnx_tx_dmamap);
2911 			m_freem(buf->bnx_tx_mbuf);
2912 			buf->bnx_tx_mbuf = NULL;
2913 		}
2914 		txr->bnx_tx_cnt--;
2915 		BNX_INC(txr->bnx_tx_saved_considx, BGE_TX_RING_CNT);
2916 	}
2917 
2918 	if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) >=
2919 	    (BNX_NSEG_RSVD + BNX_NSEG_SPARE))
2920 		ifsq_clr_oactive(txr->bnx_ifsq);
2921 
2922 	if (txr->bnx_tx_cnt == 0)
2923 		txr->bnx_tx_watchdog.wd_timer = 0;
2924 
2925 	if (!ifsq_is_empty(txr->bnx_ifsq))
2926 		ifsq_devstart(txr->bnx_ifsq);
2927 }
2928 
2929 static int
2930 bnx_handle_status(struct bnx_softc *sc)
2931 {
2932 	uint32_t status;
2933 	int handle = 0;
2934 
2935 	status = *sc->bnx_hw_status;
2936 
2937 	if (status & BGE_STATFLAG_ERROR) {
2938 		uint32_t val;
2939 		int reset = 0;
2940 
2941 		sc->bnx_errors++;
2942 
2943 		val = CSR_READ_4(sc, BGE_FLOW_ATTN);
2944 		if (val & ~BGE_FLOWATTN_MB_LOWAT) {
2945 			if_printf(&sc->arpcom.ac_if,
2946 			    "flow attn 0x%08x\n", val);
2947 			reset = 1;
2948 		}
2949 
2950 		val = CSR_READ_4(sc, BGE_MSI_STATUS);
2951 		if (val & ~BGE_MSISTAT_MSI_PCI_REQ) {
2952 			if_printf(&sc->arpcom.ac_if,
2953 			    "msi status 0x%08x\n", val);
2954 			reset = 1;
2955 		}
2956 
2957 		val = CSR_READ_4(sc, BGE_RDMA_STATUS);
2958 		if (val) {
2959 			if_printf(&sc->arpcom.ac_if,
2960 			    "rmda status 0x%08x\n", val);
2961 			reset = 1;
2962 		}
2963 
2964 		val = CSR_READ_4(sc, BGE_WDMA_STATUS);
2965 		if (val) {
2966 			if_printf(&sc->arpcom.ac_if,
2967 			    "wdma status 0x%08x\n", val);
2968 			reset = 1;
2969 		}
2970 
2971 		if (reset) {
2972 			bnx_serialize_skipmain(sc);
2973 			bnx_init(sc);
2974 			bnx_deserialize_skipmain(sc);
2975 		}
2976 		handle = 1;
2977 	}
2978 
2979 	if ((status & BGE_STATFLAG_LINKSTATE_CHANGED) || sc->bnx_link_evt) {
2980 		if (bootverbose) {
2981 			if_printf(&sc->arpcom.ac_if, "link change, "
2982 			    "link_evt %d\n", sc->bnx_link_evt);
2983 		}
2984 		bnx_link_poll(sc);
2985 		handle = 1;
2986 	}
2987 
2988 	return handle;
2989 }
2990 
2991 #ifdef IFPOLL_ENABLE
2992 
2993 static void
2994 bnx_npoll_rx(struct ifnet *ifp __unused, void *xret, int cycle)
2995 {
2996 	struct bnx_rx_ret_ring *ret = xret;
2997 	uint16_t rx_prod;
2998 
2999 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3000 
3001 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3002 	cpu_lfence();
3003 
3004 	rx_prod = *ret->bnx_rx_considx;
3005 	if (ret->bnx_rx_saved_considx != rx_prod)
3006 		bnx_rxeof(ret, rx_prod, cycle);
3007 }
3008 
3009 static void
3010 bnx_npoll_tx_notag(struct ifnet *ifp __unused, void *xtxr, int cycle __unused)
3011 {
3012 	struct bnx_tx_ring *txr = xtxr;
3013 	uint16_t tx_cons;
3014 
3015 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3016 
3017 	tx_cons = *txr->bnx_tx_considx;
3018 	if (txr->bnx_tx_saved_considx != tx_cons)
3019 		bnx_txeof(txr, tx_cons);
3020 }
3021 
3022 static void
3023 bnx_npoll_tx(struct ifnet *ifp, void *xtxr, int cycle)
3024 {
3025 	struct bnx_tx_ring *txr = xtxr;
3026 
3027 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3028 
3029 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3030 	cpu_lfence();
3031 	bnx_npoll_tx_notag(ifp, txr, cycle);
3032 }
3033 
3034 static void
3035 bnx_npoll_status_notag(struct ifnet *ifp)
3036 {
3037 	struct bnx_softc *sc = ifp->if_softc;
3038 
3039 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3040 
3041 	if (bnx_handle_status(sc)) {
3042 		/*
3043 		 * Status changes are handled; force the chip to
3044 		 * update the status block to reflect whether there
3045 		 * are more status changes or not, else staled status
3046 		 * changes are always seen.
3047 		 */
3048 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3049 	}
3050 }
3051 
3052 static void
3053 bnx_npoll_status(struct ifnet *ifp)
3054 {
3055 	struct bnx_softc *sc = ifp->if_softc;
3056 
3057 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3058 
3059 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3060 	cpu_lfence();
3061 	bnx_npoll_status_notag(ifp);
3062 }
3063 
3064 static void
3065 bnx_npoll(struct ifnet *ifp, struct ifpoll_info *info)
3066 {
3067 	struct bnx_softc *sc = ifp->if_softc;
3068 	int i;
3069 
3070 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3071 
3072 	if (info != NULL) {
3073 		if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG)
3074 			info->ifpi_status.status_func = bnx_npoll_status;
3075 		else
3076 			info->ifpi_status.status_func = bnx_npoll_status_notag;
3077 		info->ifpi_status.serializer = &sc->bnx_main_serialize;
3078 
3079 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3080 			struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3081 			int idx = i + sc->bnx_npoll_txoff;
3082 
3083 			KKASSERT(idx < ncpus2);
3084 			if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
3085 				info->ifpi_tx[idx].poll_func =
3086 				    bnx_npoll_tx_notag;
3087 			} else {
3088 				info->ifpi_tx[idx].poll_func = bnx_npoll_tx;
3089 			}
3090 			info->ifpi_tx[idx].arg = txr;
3091 			info->ifpi_tx[idx].serializer = &txr->bnx_tx_serialize;
3092 			ifsq_set_cpuid(txr->bnx_ifsq, idx);
3093 		}
3094 
3095 		for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3096 			struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3097 			int idx = i + sc->bnx_npoll_rxoff;
3098 
3099 			KKASSERT(idx < ncpus2);
3100 			info->ifpi_rx[idx].poll_func = bnx_npoll_rx;
3101 			info->ifpi_rx[idx].arg = ret;
3102 			info->ifpi_rx[idx].serializer =
3103 			    &ret->bnx_rx_ret_serialize;
3104 		}
3105 
3106 		if (ifp->if_flags & IFF_RUNNING) {
3107 			bnx_disable_intr(sc);
3108 			bnx_set_tick_cpuid(sc, TRUE);
3109 
3110 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3111 			    BNX_RX_COAL_BDS_CHG;
3112 			bnx_coal_change(sc);
3113 		}
3114 	} else {
3115 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3116 			ifsq_set_cpuid(sc->bnx_tx_ring[i].bnx_ifsq,
3117 			    sc->bnx_tx_ring[i].bnx_tx_cpuid);
3118 		}
3119 		if (ifp->if_flags & IFF_RUNNING) {
3120 			sc->bnx_coal_chg = BNX_TX_COAL_BDS_CHG |
3121 			    BNX_RX_COAL_BDS_CHG;
3122 			bnx_coal_change(sc);
3123 
3124 			bnx_enable_intr(sc);
3125 			bnx_set_tick_cpuid(sc, FALSE);
3126 		}
3127 	}
3128 }
3129 
3130 #endif	/* IFPOLL_ENABLE */
3131 
3132 static void
3133 bnx_intr_legacy(void *xsc)
3134 {
3135 	struct bnx_softc *sc = xsc;
3136 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3137 
3138 	if (ret->bnx_saved_status_tag == *ret->bnx_hw_status_tag) {
3139 		uint32_t val;
3140 
3141 		val = pci_read_config(sc->bnx_dev, BGE_PCI_PCISTATE, 4);
3142 		if (val & BGE_PCISTAT_INTR_NOTACT)
3143 			return;
3144 	}
3145 
3146 	/*
3147 	 * NOTE:
3148 	 * Interrupt will have to be disabled if tagged status
3149 	 * is used, else interrupt will always be asserted on
3150 	 * certain chips (at least on BCM5750 AX/BX).
3151 	 */
3152 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, 1);
3153 
3154 	bnx_intr(sc);
3155 }
3156 
3157 static void
3158 bnx_msi(void *xsc)
3159 {
3160 	bnx_intr(xsc);
3161 }
3162 
3163 static void
3164 bnx_intr(struct bnx_softc *sc)
3165 {
3166 	struct ifnet *ifp = &sc->arpcom.ac_if;
3167 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
3168 
3169 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3170 
3171 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3172 	/*
3173 	 * Use a load fence to ensure that status_tag is saved
3174 	 * before rx_prod, tx_cons and status.
3175 	 */
3176 	cpu_lfence();
3177 
3178 	bnx_handle_status(sc);
3179 
3180 	if (ifp->if_flags & IFF_RUNNING) {
3181 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
3182 		uint16_t rx_prod, tx_cons;
3183 
3184 		lwkt_serialize_enter(&ret->bnx_rx_ret_serialize);
3185 		rx_prod = *ret->bnx_rx_considx;
3186 		if (ret->bnx_rx_saved_considx != rx_prod)
3187 			bnx_rxeof(ret, rx_prod, -1);
3188 		lwkt_serialize_exit(&ret->bnx_rx_ret_serialize);
3189 
3190 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3191 		tx_cons = *txr->bnx_tx_considx;
3192 		if (txr->bnx_tx_saved_considx != tx_cons)
3193 			bnx_txeof(txr, tx_cons);
3194 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3195 	}
3196 
3197 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, ret->bnx_saved_status_tag << 24);
3198 }
3199 
3200 static void
3201 bnx_msix_tx_status(void *xtxr)
3202 {
3203 	struct bnx_tx_ring *txr = xtxr;
3204 	struct bnx_softc *sc = txr->bnx_sc;
3205 	struct ifnet *ifp = &sc->arpcom.ac_if;
3206 
3207 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3208 
3209 	txr->bnx_saved_status_tag = *txr->bnx_hw_status_tag;
3210 	/*
3211 	 * Use a load fence to ensure that status_tag is saved
3212 	 * before tx_cons and status.
3213 	 */
3214 	cpu_lfence();
3215 
3216 	bnx_handle_status(sc);
3217 
3218 	if (ifp->if_flags & IFF_RUNNING) {
3219 		uint16_t tx_cons;
3220 
3221 		lwkt_serialize_enter(&txr->bnx_tx_serialize);
3222 		tx_cons = *txr->bnx_tx_considx;
3223 		if (txr->bnx_tx_saved_considx != tx_cons)
3224 			bnx_txeof(txr, tx_cons);
3225 		lwkt_serialize_exit(&txr->bnx_tx_serialize);
3226 	}
3227 
3228 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, txr->bnx_saved_status_tag << 24);
3229 }
3230 
3231 static void
3232 bnx_msix_rx(void *xret)
3233 {
3234 	struct bnx_rx_ret_ring *ret = xret;
3235 	uint16_t rx_prod;
3236 
3237 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3238 
3239 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3240 	/*
3241 	 * Use a load fence to ensure that status_tag is saved
3242 	 * before rx_prod.
3243 	 */
3244 	cpu_lfence();
3245 
3246 	rx_prod = *ret->bnx_rx_considx;
3247 	if (ret->bnx_rx_saved_considx != rx_prod)
3248 		bnx_rxeof(ret, rx_prod, -1);
3249 
3250 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3251 	    ret->bnx_saved_status_tag << 24);
3252 }
3253 
3254 static void
3255 bnx_msix_rxtx(void *xret)
3256 {
3257 	struct bnx_rx_ret_ring *ret = xret;
3258 	struct bnx_tx_ring *txr = ret->bnx_txr;
3259 	uint16_t rx_prod, tx_cons;
3260 
3261 	ASSERT_SERIALIZED(&ret->bnx_rx_ret_serialize);
3262 
3263 	ret->bnx_saved_status_tag = *ret->bnx_hw_status_tag;
3264 	/*
3265 	 * Use a load fence to ensure that status_tag is saved
3266 	 * before rx_prod and tx_cons.
3267 	 */
3268 	cpu_lfence();
3269 
3270 	rx_prod = *ret->bnx_rx_considx;
3271 	if (ret->bnx_rx_saved_considx != rx_prod)
3272 		bnx_rxeof(ret, rx_prod, -1);
3273 
3274 	lwkt_serialize_enter(&txr->bnx_tx_serialize);
3275 	tx_cons = *txr->bnx_tx_considx;
3276 	if (txr->bnx_tx_saved_considx != tx_cons)
3277 		bnx_txeof(txr, tx_cons);
3278 	lwkt_serialize_exit(&txr->bnx_tx_serialize);
3279 
3280 	bnx_writembx(ret->bnx_sc, ret->bnx_msix_mbx,
3281 	    ret->bnx_saved_status_tag << 24);
3282 }
3283 
3284 static void
3285 bnx_msix_status(void *xsc)
3286 {
3287 	struct bnx_softc *sc = xsc;
3288 
3289 	ASSERT_SERIALIZED(&sc->bnx_main_serialize);
3290 
3291 	sc->bnx_saved_status_tag = *sc->bnx_hw_status_tag;
3292 	/*
3293 	 * Use a load fence to ensure that status_tag is saved
3294 	 * before status.
3295 	 */
3296 	cpu_lfence();
3297 
3298 	bnx_handle_status(sc);
3299 
3300 	bnx_writembx(sc, BGE_MBX_IRQ0_LO, sc->bnx_saved_status_tag << 24);
3301 }
3302 
3303 static void
3304 bnx_tick(void *xsc)
3305 {
3306 	struct bnx_softc *sc = xsc;
3307 
3308 	lwkt_serialize_enter(&sc->bnx_main_serialize);
3309 
3310 	bnx_stats_update_regs(sc);
3311 
3312 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3313 		/*
3314 		 * Since in TBI mode auto-polling can't be used we should poll
3315 		 * link status manually. Here we register pending link event
3316 		 * and trigger interrupt.
3317 		 */
3318 		sc->bnx_link_evt++;
3319 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3320 	} else if (!sc->bnx_link) {
3321 		mii_tick(device_get_softc(sc->bnx_miibus));
3322 	}
3323 
3324 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3325 	    sc->bnx_tick_cpuid);
3326 
3327 	lwkt_serialize_exit(&sc->bnx_main_serialize);
3328 }
3329 
3330 static void
3331 bnx_stats_update_regs(struct bnx_softc *sc)
3332 {
3333 	struct ifnet *ifp = &sc->arpcom.ac_if;
3334 	struct bge_mac_stats_regs stats;
3335 	uint32_t *s, val;
3336 	int i;
3337 
3338 	s = (uint32_t *)&stats;
3339 	for (i = 0; i < sizeof(struct bge_mac_stats_regs); i += 4) {
3340 		*s = CSR_READ_4(sc, BGE_RX_STATS + i);
3341 		s++;
3342 	}
3343 
3344 	IFNET_STAT_SET(ifp, collisions,
3345 	   (stats.dot3StatsSingleCollisionFrames +
3346 	   stats.dot3StatsMultipleCollisionFrames +
3347 	   stats.dot3StatsExcessiveCollisions +
3348 	   stats.dot3StatsLateCollisions));
3349 
3350 	val = CSR_READ_4(sc, BGE_RXLP_LOCSTAT_OUT_OF_BDS);
3351 	sc->bnx_norxbds += val;
3352 }
3353 
3354 /*
3355  * Encapsulate an mbuf chain in the tx ring  by coupling the mbuf data
3356  * pointers to descriptors.
3357  */
3358 static int
3359 bnx_encap(struct bnx_tx_ring *txr, struct mbuf **m_head0, uint32_t *txidx,
3360     int *segs_used)
3361 {
3362 	struct bge_tx_bd *d = NULL;
3363 	uint16_t csum_flags = 0, vlan_tag = 0, mss = 0;
3364 	bus_dma_segment_t segs[BNX_NSEG_NEW];
3365 	bus_dmamap_t map;
3366 	int error, maxsegs, nsegs, idx, i;
3367 	struct mbuf *m_head = *m_head0, *m_new;
3368 
3369 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
3370 #ifdef BNX_TSO_DEBUG
3371 		int tso_nsegs;
3372 #endif
3373 
3374 		error = bnx_setup_tso(txr, m_head0, &mss, &csum_flags);
3375 		if (error)
3376 			return error;
3377 		m_head = *m_head0;
3378 
3379 #ifdef BNX_TSO_DEBUG
3380 		tso_nsegs = (m_head->m_pkthdr.len /
3381 		    m_head->m_pkthdr.tso_segsz) - 1;
3382 		if (tso_nsegs > (BNX_TSO_NSTATS - 1))
3383 			tso_nsegs = BNX_TSO_NSTATS - 1;
3384 		else if (tso_nsegs < 0)
3385 			tso_nsegs = 0;
3386 		txr->bnx_sc->bnx_tsosegs[tso_nsegs]++;
3387 #endif
3388 	} else if (m_head->m_pkthdr.csum_flags & BNX_CSUM_FEATURES) {
3389 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
3390 			csum_flags |= BGE_TXBDFLAG_IP_CSUM;
3391 		if (m_head->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
3392 			csum_flags |= BGE_TXBDFLAG_TCP_UDP_CSUM;
3393 		if (m_head->m_flags & M_LASTFRAG)
3394 			csum_flags |= BGE_TXBDFLAG_IP_FRAG_END;
3395 		else if (m_head->m_flags & M_FRAG)
3396 			csum_flags |= BGE_TXBDFLAG_IP_FRAG;
3397 	}
3398 	if (m_head->m_flags & M_VLANTAG) {
3399 		csum_flags |= BGE_TXBDFLAG_VLAN_TAG;
3400 		vlan_tag = m_head->m_pkthdr.ether_vlantag;
3401 	}
3402 
3403 	idx = *txidx;
3404 	map = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3405 
3406 	maxsegs = (BGE_TX_RING_CNT - txr->bnx_tx_cnt) - BNX_NSEG_RSVD;
3407 	KASSERT(maxsegs >= BNX_NSEG_SPARE,
3408 		("not enough segments %d", maxsegs));
3409 
3410 	if (maxsegs > BNX_NSEG_NEW)
3411 		maxsegs = BNX_NSEG_NEW;
3412 
3413 	/*
3414 	 * Pad outbound frame to BGE_MIN_FRAMELEN for an unusual reason.
3415 	 * The bge hardware will pad out Tx runts to BGE_MIN_FRAMELEN,
3416 	 * but when such padded frames employ the bge IP/TCP checksum
3417 	 * offload, the hardware checksum assist gives incorrect results
3418 	 * (possibly from incorporating its own padding into the UDP/TCP
3419 	 * checksum; who knows).  If we pad such runts with zeros, the
3420 	 * onboard checksum comes out correct.
3421 	 */
3422 	if ((csum_flags & BGE_TXBDFLAG_TCP_UDP_CSUM) &&
3423 	    m_head->m_pkthdr.len < BNX_MIN_FRAMELEN) {
3424 		error = m_devpad(m_head, BNX_MIN_FRAMELEN);
3425 		if (error)
3426 			goto back;
3427 	}
3428 
3429 	if ((txr->bnx_tx_flags & BNX_TX_FLAG_SHORTDMA) &&
3430 	    m_head->m_next != NULL) {
3431 		m_new = bnx_defrag_shortdma(m_head);
3432 		if (m_new == NULL) {
3433 			error = ENOBUFS;
3434 			goto back;
3435 		}
3436 		*m_head0 = m_head = m_new;
3437 	}
3438 	if ((m_head->m_pkthdr.csum_flags & CSUM_TSO) == 0 &&
3439 	    (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG) &&
3440 	    m_head->m_next != NULL) {
3441 		/*
3442 		 * Forcefully defragment mbuf chain to overcome hardware
3443 		 * limitation which only support a single outstanding
3444 		 * DMA read operation.  If it fails, keep moving on using
3445 		 * the original mbuf chain.
3446 		 */
3447 		m_new = m_defrag(m_head, M_NOWAIT);
3448 		if (m_new != NULL)
3449 			*m_head0 = m_head = m_new;
3450 	}
3451 
3452 	error = bus_dmamap_load_mbuf_defrag(txr->bnx_tx_mtag, map,
3453 	    m_head0, segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
3454 	if (error)
3455 		goto back;
3456 	*segs_used += nsegs;
3457 
3458 	m_head = *m_head0;
3459 	bus_dmamap_sync(txr->bnx_tx_mtag, map, BUS_DMASYNC_PREWRITE);
3460 
3461 	for (i = 0; ; i++) {
3462 		d = &txr->bnx_tx_ring[idx];
3463 
3464 		d->bge_addr.bge_addr_lo = BGE_ADDR_LO(segs[i].ds_addr);
3465 		d->bge_addr.bge_addr_hi = BGE_ADDR_HI(segs[i].ds_addr);
3466 		d->bge_len = segs[i].ds_len;
3467 		d->bge_flags = csum_flags;
3468 		d->bge_vlan_tag = vlan_tag;
3469 		d->bge_mss = mss;
3470 
3471 		if (i == nsegs - 1)
3472 			break;
3473 		BNX_INC(idx, BGE_TX_RING_CNT);
3474 	}
3475 	/* Mark the last segment as end of packet... */
3476 	d->bge_flags |= BGE_TXBDFLAG_END;
3477 
3478 	/*
3479 	 * Insure that the map for this transmission is placed at
3480 	 * the array index of the last descriptor in this chain.
3481 	 */
3482 	txr->bnx_tx_buf[*txidx].bnx_tx_dmamap = txr->bnx_tx_buf[idx].bnx_tx_dmamap;
3483 	txr->bnx_tx_buf[idx].bnx_tx_dmamap = map;
3484 	txr->bnx_tx_buf[idx].bnx_tx_mbuf = m_head;
3485 	txr->bnx_tx_cnt += nsegs;
3486 
3487 	BNX_INC(idx, BGE_TX_RING_CNT);
3488 	*txidx = idx;
3489 back:
3490 	if (error) {
3491 		m_freem(*m_head0);
3492 		*m_head0 = NULL;
3493 	}
3494 	return error;
3495 }
3496 
3497 /*
3498  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
3499  * to the mbuf data regions directly in the transmit descriptors.
3500  */
3501 static void
3502 bnx_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3503 {
3504 	struct bnx_tx_ring *txr = ifsq_get_priv(ifsq);
3505 	struct mbuf *m_head = NULL;
3506 	uint32_t prodidx;
3507 	int nsegs = 0;
3508 
3509 	KKASSERT(txr->bnx_ifsq == ifsq);
3510 	ASSERT_SERIALIZED(&txr->bnx_tx_serialize);
3511 
3512 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
3513 		return;
3514 
3515 	prodidx = txr->bnx_tx_prodidx;
3516 
3517 	while (txr->bnx_tx_buf[prodidx].bnx_tx_mbuf == NULL) {
3518 		/*
3519 		 * Sanity check: avoid coming within BGE_NSEG_RSVD
3520 		 * descriptors of the end of the ring.  Also make
3521 		 * sure there are BGE_NSEG_SPARE descriptors for
3522 		 * jumbo buffers' or TSO segments' defragmentation.
3523 		 */
3524 		if ((BGE_TX_RING_CNT - txr->bnx_tx_cnt) <
3525 		    (BNX_NSEG_RSVD + BNX_NSEG_SPARE)) {
3526 			ifsq_set_oactive(ifsq);
3527 			break;
3528 		}
3529 
3530 		m_head = ifsq_dequeue(ifsq);
3531 		if (m_head == NULL)
3532 			break;
3533 
3534 		/*
3535 		 * Pack the data into the transmit ring. If we
3536 		 * don't have room, set the OACTIVE flag and wait
3537 		 * for the NIC to drain the ring.
3538 		 */
3539 		if (bnx_encap(txr, &m_head, &prodidx, &nsegs)) {
3540 			ifsq_set_oactive(ifsq);
3541 			IFNET_STAT_INC(ifp, oerrors, 1);
3542 			break;
3543 		}
3544 
3545 		if (nsegs >= txr->bnx_tx_wreg) {
3546 			/* Transmit */
3547 			bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3548 			nsegs = 0;
3549 		}
3550 
3551 		ETHER_BPF_MTAP(ifp, m_head);
3552 
3553 		/*
3554 		 * Set a timeout in case the chip goes out to lunch.
3555 		 */
3556 		txr->bnx_tx_watchdog.wd_timer = 5;
3557 	}
3558 
3559 	if (nsegs > 0) {
3560 		/* Transmit */
3561 		bnx_writembx(txr->bnx_sc, txr->bnx_tx_mbx, prodidx);
3562 	}
3563 	txr->bnx_tx_prodidx = prodidx;
3564 }
3565 
3566 static void
3567 bnx_init(void *xsc)
3568 {
3569 	struct bnx_softc *sc = xsc;
3570 	struct ifnet *ifp = &sc->arpcom.ac_if;
3571 	uint16_t *m;
3572 	uint32_t mode;
3573 	int i;
3574 	boolean_t polling;
3575 
3576 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3577 
3578 	/* Cancel pending I/O and flush buffers. */
3579 	bnx_stop(sc);
3580 
3581 	bnx_sig_pre_reset(sc, BNX_RESET_START);
3582 	bnx_reset(sc);
3583 	bnx_sig_post_reset(sc, BNX_RESET_START);
3584 
3585 	bnx_chipinit(sc);
3586 
3587 	/*
3588 	 * Init the various state machines, ring
3589 	 * control blocks and firmware.
3590 	 */
3591 	if (bnx_blockinit(sc)) {
3592 		if_printf(ifp, "initialization failure\n");
3593 		bnx_stop(sc);
3594 		return;
3595 	}
3596 
3597 	/* Specify MTU. */
3598 	CSR_WRITE_4(sc, BGE_RX_MTU, ifp->if_mtu +
3599 	    ETHER_HDR_LEN + ETHER_CRC_LEN + EVL_ENCAPLEN);
3600 
3601 	/* Load our MAC address. */
3602 	m = (uint16_t *)&sc->arpcom.ac_enaddr[0];
3603 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_LO, htons(m[0]));
3604 	CSR_WRITE_4(sc, BGE_MAC_ADDR1_HI, (htons(m[1]) << 16) | htons(m[2]));
3605 
3606 	/* Enable or disable promiscuous mode as needed. */
3607 	bnx_setpromisc(sc);
3608 
3609 	/* Program multicast filter. */
3610 	bnx_setmulti(sc);
3611 
3612 	/* Init RX ring. */
3613 	if (bnx_init_rx_ring_std(&sc->bnx_rx_std_ring)) {
3614 		if_printf(ifp, "RX ring initialization failed\n");
3615 		bnx_stop(sc);
3616 		return;
3617 	}
3618 
3619 	/* Init jumbo RX ring. */
3620 	if (ifp->if_mtu > (ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN)) {
3621 		if (bnx_init_rx_ring_jumbo(sc)) {
3622 			if_printf(ifp, "Jumbo RX ring initialization failed\n");
3623 			bnx_stop(sc);
3624 			return;
3625 		}
3626 	}
3627 
3628 	/* Init our RX return ring index */
3629 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
3630 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
3631 
3632 		ret->bnx_rx_saved_considx = 0;
3633 		ret->bnx_rx_cnt = 0;
3634 	}
3635 
3636 	/* Init TX ring. */
3637 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3638 		bnx_init_tx_ring(&sc->bnx_tx_ring[i]);
3639 
3640 	/* Enable TX MAC state machine lockup fix. */
3641 	mode = CSR_READ_4(sc, BGE_TX_MODE);
3642 	mode |= BGE_TXMODE_MBUF_LOCKUP_FIX;
3643 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5720 ||
3644 	    sc->bnx_asicrev == BGE_ASICREV_BCM5762) {
3645 		mode &= ~(BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3646 		mode |= CSR_READ_4(sc, BGE_TX_MODE) &
3647 		    (BGE_TXMODE_JMB_FRM_LEN | BGE_TXMODE_CNT_DN_MODE);
3648 	}
3649 	/* Turn on transmitter */
3650 	CSR_WRITE_4(sc, BGE_TX_MODE, mode | BGE_TXMODE_ENABLE);
3651 	DELAY(100);
3652 
3653 	/* Initialize RSS */
3654 	mode = BGE_RXMODE_ENABLE | BGE_RXMODE_IPV6_ENABLE;
3655 	if (BNX_RSS_ENABLED(sc)) {
3656 		bnx_init_rss(sc);
3657 		mode |= BGE_RXMODE_RSS_ENABLE |
3658 		    BGE_RXMODE_RSS_HASH_MASK_BITS |
3659 		    BGE_RXMODE_RSS_IPV4_HASH |
3660 		    BGE_RXMODE_RSS_TCP_IPV4_HASH;
3661 	}
3662 	/* Turn on receiver */
3663 	BNX_SETBIT(sc, BGE_RX_MODE, mode);
3664 	DELAY(10);
3665 
3666 	/*
3667 	 * Set the number of good frames to receive after RX MBUF
3668 	 * Low Watermark has been reached.  After the RX MAC receives
3669 	 * this number of frames, it will drop subsequent incoming
3670 	 * frames until the MBUF High Watermark is reached.
3671 	 */
3672 	if (BNX_IS_57765_FAMILY(sc))
3673 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 1);
3674 	else
3675 		CSR_WRITE_4(sc, BGE_MAX_RX_FRAME_LOWAT, 2);
3676 
3677 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI ||
3678 	    sc->bnx_intr_type == PCI_INTR_TYPE_MSIX) {
3679 		if (bootverbose) {
3680 			if_printf(ifp, "MSI_MODE: %#x\n",
3681 			    CSR_READ_4(sc, BGE_MSI_MODE));
3682 		}
3683 	}
3684 
3685 	/* Tell firmware we're alive. */
3686 	BNX_SETBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3687 
3688 	/* Enable host interrupts if polling(4) is not enabled. */
3689 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL, BGE_PCIMISCCTL_CLEAR_INTA, 4);
3690 
3691 	polling = FALSE;
3692 #ifdef IFPOLL_ENABLE
3693 	if (ifp->if_flags & IFF_NPOLLING)
3694 		polling = TRUE;
3695 #endif
3696 	if (polling)
3697 		bnx_disable_intr(sc);
3698 	else
3699 		bnx_enable_intr(sc);
3700 	bnx_set_tick_cpuid(sc, polling);
3701 
3702 	ifp->if_flags |= IFF_RUNNING;
3703 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3704 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3705 
3706 		ifsq_clr_oactive(txr->bnx_ifsq);
3707 		ifsq_watchdog_start(&txr->bnx_tx_watchdog);
3708 	}
3709 
3710 	bnx_ifmedia_upd(ifp);
3711 
3712 	callout_reset_bycpu(&sc->bnx_tick_timer, hz, bnx_tick, sc,
3713 	    sc->bnx_tick_cpuid);
3714 }
3715 
3716 /*
3717  * Set media options.
3718  */
3719 static int
3720 bnx_ifmedia_upd(struct ifnet *ifp)
3721 {
3722 	struct bnx_softc *sc = ifp->if_softc;
3723 
3724 	/* If this is a 1000baseX NIC, enable the TBI port. */
3725 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3726 		struct ifmedia *ifm = &sc->bnx_ifmedia;
3727 
3728 		if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3729 			return(EINVAL);
3730 
3731 		switch(IFM_SUBTYPE(ifm->ifm_media)) {
3732 		case IFM_AUTO:
3733 			break;
3734 
3735 		case IFM_1000_SX:
3736 			if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
3737 				BNX_CLRBIT(sc, BGE_MAC_MODE,
3738 				    BGE_MACMODE_HALF_DUPLEX);
3739 			} else {
3740 				BNX_SETBIT(sc, BGE_MAC_MODE,
3741 				    BGE_MACMODE_HALF_DUPLEX);
3742 			}
3743 			DELAY(40);
3744 			break;
3745 		default:
3746 			return(EINVAL);
3747 		}
3748 	} else {
3749 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3750 
3751 		sc->bnx_link_evt++;
3752 		sc->bnx_link = 0;
3753 		if (mii->mii_instance) {
3754 			struct mii_softc *miisc;
3755 
3756 			LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
3757 				mii_phy_reset(miisc);
3758 		}
3759 		mii_mediachg(mii);
3760 
3761 		/*
3762 		 * Force an interrupt so that we will call bnx_link_upd
3763 		 * if needed and clear any pending link state attention.
3764 		 * Without this we are not getting any further interrupts
3765 		 * for link state changes and thus will not UP the link and
3766 		 * not be able to send in bnx_start.  The only way to get
3767 		 * things working was to receive a packet and get an RX
3768 		 * intr.
3769 		 *
3770 		 * bnx_tick should help for fiber cards and we might not
3771 		 * need to do this here if BNX_FLAG_TBI is set but as
3772 		 * we poll for fiber anyway it should not harm.
3773 		 */
3774 		BNX_SETBIT(sc, BGE_HCC_MODE, BGE_HCCMODE_COAL_NOW);
3775 	}
3776 	return(0);
3777 }
3778 
3779 /*
3780  * Report current media status.
3781  */
3782 static void
3783 bnx_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3784 {
3785 	struct bnx_softc *sc = ifp->if_softc;
3786 
3787 	if ((ifp->if_flags & IFF_RUNNING) == 0)
3788 		return;
3789 
3790 	if (sc->bnx_flags & BNX_FLAG_TBI) {
3791 		ifmr->ifm_status = IFM_AVALID;
3792 		ifmr->ifm_active = IFM_ETHER;
3793 		if (CSR_READ_4(sc, BGE_MAC_STS) &
3794 		    BGE_MACSTAT_TBI_PCS_SYNCHED) {
3795 			ifmr->ifm_status |= IFM_ACTIVE;
3796 		} else {
3797 			ifmr->ifm_active |= IFM_NONE;
3798 			return;
3799 		}
3800 
3801 		ifmr->ifm_active |= IFM_1000_SX;
3802 		if (CSR_READ_4(sc, BGE_MAC_MODE) & BGE_MACMODE_HALF_DUPLEX)
3803 			ifmr->ifm_active |= IFM_HDX;
3804 		else
3805 			ifmr->ifm_active |= IFM_FDX;
3806 	} else {
3807 		struct mii_data *mii = device_get_softc(sc->bnx_miibus);
3808 
3809 		mii_pollstat(mii);
3810 		ifmr->ifm_active = mii->mii_media_active;
3811 		ifmr->ifm_status = mii->mii_media_status;
3812 	}
3813 }
3814 
3815 static int
3816 bnx_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3817 {
3818 	struct bnx_softc *sc = ifp->if_softc;
3819 	struct ifreq *ifr = (struct ifreq *)data;
3820 	int mask, error = 0;
3821 
3822 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3823 
3824 	switch (command) {
3825 	case SIOCSIFMTU:
3826 		if ((!BNX_IS_JUMBO_CAPABLE(sc) && ifr->ifr_mtu > ETHERMTU) ||
3827 		    (BNX_IS_JUMBO_CAPABLE(sc) &&
3828 		     ifr->ifr_mtu > BNX_JUMBO_MTU)) {
3829 			error = EINVAL;
3830 		} else if (ifp->if_mtu != ifr->ifr_mtu) {
3831 			ifp->if_mtu = ifr->ifr_mtu;
3832 			if (ifp->if_flags & IFF_RUNNING)
3833 				bnx_init(sc);
3834 		}
3835 		break;
3836 	case SIOCSIFFLAGS:
3837 		if (ifp->if_flags & IFF_UP) {
3838 			if (ifp->if_flags & IFF_RUNNING) {
3839 				mask = ifp->if_flags ^ sc->bnx_if_flags;
3840 
3841 				/*
3842 				 * If only the state of the PROMISC flag
3843 				 * changed, then just use the 'set promisc
3844 				 * mode' command instead of reinitializing
3845 				 * the entire NIC. Doing a full re-init
3846 				 * means reloading the firmware and waiting
3847 				 * for it to start up, which may take a
3848 				 * second or two.  Similarly for ALLMULTI.
3849 				 */
3850 				if (mask & IFF_PROMISC)
3851 					bnx_setpromisc(sc);
3852 				if (mask & IFF_ALLMULTI)
3853 					bnx_setmulti(sc);
3854 			} else {
3855 				bnx_init(sc);
3856 			}
3857 		} else if (ifp->if_flags & IFF_RUNNING) {
3858 			bnx_stop(sc);
3859 		}
3860 		sc->bnx_if_flags = ifp->if_flags;
3861 		break;
3862 	case SIOCADDMULTI:
3863 	case SIOCDELMULTI:
3864 		if (ifp->if_flags & IFF_RUNNING)
3865 			bnx_setmulti(sc);
3866 		break;
3867 	case SIOCSIFMEDIA:
3868 	case SIOCGIFMEDIA:
3869 		if (sc->bnx_flags & BNX_FLAG_TBI) {
3870 			error = ifmedia_ioctl(ifp, ifr,
3871 			    &sc->bnx_ifmedia, command);
3872 		} else {
3873 			struct mii_data *mii;
3874 
3875 			mii = device_get_softc(sc->bnx_miibus);
3876 			error = ifmedia_ioctl(ifp, ifr,
3877 					      &mii->mii_media, command);
3878 		}
3879 		break;
3880         case SIOCSIFCAP:
3881 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3882 		if (mask & IFCAP_HWCSUM) {
3883 			ifp->if_capenable ^= (mask & IFCAP_HWCSUM);
3884 			if (ifp->if_capenable & IFCAP_TXCSUM)
3885 				ifp->if_hwassist |= BNX_CSUM_FEATURES;
3886 			else
3887 				ifp->if_hwassist &= ~BNX_CSUM_FEATURES;
3888 		}
3889 		if (mask & IFCAP_TSO) {
3890 			ifp->if_capenable ^= (mask & IFCAP_TSO);
3891 			if (ifp->if_capenable & IFCAP_TSO)
3892 				ifp->if_hwassist |= CSUM_TSO;
3893 			else
3894 				ifp->if_hwassist &= ~CSUM_TSO;
3895 		}
3896 		if (mask & IFCAP_RSS)
3897 			ifp->if_capenable ^= IFCAP_RSS;
3898 		break;
3899 	default:
3900 		error = ether_ioctl(ifp, command, data);
3901 		break;
3902 	}
3903 	return error;
3904 }
3905 
3906 static void
3907 bnx_watchdog(struct ifaltq_subque *ifsq)
3908 {
3909 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
3910 	struct bnx_softc *sc = ifp->if_softc;
3911 	int i;
3912 
3913 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3914 
3915 	if_printf(ifp, "watchdog timeout -- resetting\n");
3916 
3917 	bnx_init(sc);
3918 
3919 	IFNET_STAT_INC(ifp, oerrors, 1);
3920 
3921 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
3922 		ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
3923 }
3924 
3925 /*
3926  * Stop the adapter and free any mbufs allocated to the
3927  * RX and TX lists.
3928  */
3929 static void
3930 bnx_stop(struct bnx_softc *sc)
3931 {
3932 	struct ifnet *ifp = &sc->arpcom.ac_if;
3933 	int i;
3934 
3935 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
3936 
3937 	callout_stop(&sc->bnx_tick_timer);
3938 
3939 	/* Disable host interrupts. */
3940 	bnx_disable_intr(sc);
3941 
3942 	/*
3943 	 * Tell firmware we're shutting down.
3944 	 */
3945 	bnx_sig_pre_reset(sc, BNX_RESET_SHUTDOWN);
3946 
3947 	/*
3948 	 * Disable all of the receiver blocks
3949 	 */
3950 	bnx_stop_block(sc, BGE_RX_MODE, BGE_RXMODE_ENABLE);
3951 	bnx_stop_block(sc, BGE_RBDI_MODE, BGE_RBDIMODE_ENABLE);
3952 	bnx_stop_block(sc, BGE_RXLP_MODE, BGE_RXLPMODE_ENABLE);
3953 	bnx_stop_block(sc, BGE_RDBDI_MODE, BGE_RBDIMODE_ENABLE);
3954 	bnx_stop_block(sc, BGE_RDC_MODE, BGE_RDCMODE_ENABLE);
3955 	bnx_stop_block(sc, BGE_RBDC_MODE, BGE_RBDCMODE_ENABLE);
3956 
3957 	/*
3958 	 * Disable all of the transmit blocks
3959 	 */
3960 	bnx_stop_block(sc, BGE_SRS_MODE, BGE_SRSMODE_ENABLE);
3961 	bnx_stop_block(sc, BGE_SBDI_MODE, BGE_SBDIMODE_ENABLE);
3962 	bnx_stop_block(sc, BGE_SDI_MODE, BGE_SDIMODE_ENABLE);
3963 	bnx_stop_block(sc, BGE_RDMA_MODE, BGE_RDMAMODE_ENABLE);
3964 	bnx_stop_block(sc, BGE_SDC_MODE, BGE_SDCMODE_ENABLE);
3965 	bnx_stop_block(sc, BGE_SBDC_MODE, BGE_SBDCMODE_ENABLE);
3966 
3967 	/*
3968 	 * Shut down all of the memory managers and related
3969 	 * state machines.
3970 	 */
3971 	bnx_stop_block(sc, BGE_HCC_MODE, BGE_HCCMODE_ENABLE);
3972 	bnx_stop_block(sc, BGE_WDMA_MODE, BGE_WDMAMODE_ENABLE);
3973 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0xFFFFFFFF);
3974 	CSR_WRITE_4(sc, BGE_FTQ_RESET, 0);
3975 
3976 	bnx_reset(sc);
3977 	bnx_sig_post_reset(sc, BNX_RESET_SHUTDOWN);
3978 
3979 	/*
3980 	 * Tell firmware we're shutting down.
3981 	 */
3982 	BNX_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
3983 
3984 	/* Free the RX lists. */
3985 	bnx_free_rx_ring_std(&sc->bnx_rx_std_ring);
3986 
3987 	/* Free jumbo RX list. */
3988 	if (BNX_IS_JUMBO_CAPABLE(sc))
3989 		bnx_free_rx_ring_jumbo(sc);
3990 
3991 	/* Free TX buffers. */
3992 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
3993 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
3994 
3995 		txr->bnx_saved_status_tag = 0;
3996 		bnx_free_tx_ring(txr);
3997 	}
3998 
3999 	/* Clear saved status tag */
4000 	for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4001 		sc->bnx_rx_ret_ring[i].bnx_saved_status_tag = 0;
4002 
4003 	sc->bnx_link = 0;
4004 	sc->bnx_coal_chg = 0;
4005 
4006 	ifp->if_flags &= ~IFF_RUNNING;
4007 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4008 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4009 
4010 		ifsq_clr_oactive(txr->bnx_ifsq);
4011 		ifsq_watchdog_stop(&txr->bnx_tx_watchdog);
4012 	}
4013 }
4014 
4015 /*
4016  * Stop all chip I/O so that the kernel's probe routines don't
4017  * get confused by errant DMAs when rebooting.
4018  */
4019 static void
4020 bnx_shutdown(device_t dev)
4021 {
4022 	struct bnx_softc *sc = device_get_softc(dev);
4023 	struct ifnet *ifp = &sc->arpcom.ac_if;
4024 
4025 	ifnet_serialize_all(ifp);
4026 	bnx_stop(sc);
4027 	ifnet_deserialize_all(ifp);
4028 }
4029 
4030 static int
4031 bnx_suspend(device_t dev)
4032 {
4033 	struct bnx_softc *sc = device_get_softc(dev);
4034 	struct ifnet *ifp = &sc->arpcom.ac_if;
4035 
4036 	ifnet_serialize_all(ifp);
4037 	bnx_stop(sc);
4038 	ifnet_deserialize_all(ifp);
4039 
4040 	return 0;
4041 }
4042 
4043 static int
4044 bnx_resume(device_t dev)
4045 {
4046 	struct bnx_softc *sc = device_get_softc(dev);
4047 	struct ifnet *ifp = &sc->arpcom.ac_if;
4048 
4049 	ifnet_serialize_all(ifp);
4050 
4051 	if (ifp->if_flags & IFF_UP) {
4052 		int i;
4053 
4054 		bnx_init(sc);
4055 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4056 			ifsq_devstart_sched(sc->bnx_tx_ring[i].bnx_ifsq);
4057 	}
4058 
4059 	ifnet_deserialize_all(ifp);
4060 
4061 	return 0;
4062 }
4063 
4064 static void
4065 bnx_setpromisc(struct bnx_softc *sc)
4066 {
4067 	struct ifnet *ifp = &sc->arpcom.ac_if;
4068 
4069 	if (ifp->if_flags & IFF_PROMISC)
4070 		BNX_SETBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4071 	else
4072 		BNX_CLRBIT(sc, BGE_RX_MODE, BGE_RXMODE_RX_PROMISC);
4073 }
4074 
4075 static void
4076 bnx_dma_free(struct bnx_softc *sc)
4077 {
4078 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4079 	int i;
4080 
4081 	/* Destroy RX return rings */
4082 	if (sc->bnx_rx_ret_ring != NULL) {
4083 		for (i = 0; i < sc->bnx_rx_retcnt; ++i)
4084 			bnx_destroy_rx_ret_ring(&sc->bnx_rx_ret_ring[i]);
4085 		kfree(sc->bnx_rx_ret_ring, M_DEVBUF);
4086 	}
4087 
4088 	/* Destroy RX mbuf DMA stuffs. */
4089 	if (std->bnx_rx_mtag != NULL) {
4090 		for (i = 0; i < BGE_STD_RX_RING_CNT; i++) {
4091 			KKASSERT(std->bnx_rx_std_buf[i].bnx_rx_mbuf == NULL);
4092 			bus_dmamap_destroy(std->bnx_rx_mtag,
4093 			    std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4094 		}
4095 		bus_dma_tag_destroy(std->bnx_rx_mtag);
4096 	}
4097 
4098 	/* Destroy standard RX ring */
4099 	bnx_dma_block_free(std->bnx_rx_std_ring_tag,
4100 	    std->bnx_rx_std_ring_map, std->bnx_rx_std_ring);
4101 
4102 	/* Destroy TX rings */
4103 	if (sc->bnx_tx_ring != NULL) {
4104 		for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
4105 			bnx_destroy_tx_ring(&sc->bnx_tx_ring[i]);
4106 		kfree(sc->bnx_tx_ring, M_DEVBUF);
4107 	}
4108 
4109 	if (BNX_IS_JUMBO_CAPABLE(sc))
4110 		bnx_free_jumbo_mem(sc);
4111 
4112 	/* Destroy status blocks */
4113 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4114 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4115 
4116 		bnx_dma_block_free(intr->bnx_status_tag,
4117 		    intr->bnx_status_map, intr->bnx_status_block);
4118 	}
4119 
4120 	/* Destroy the parent tag */
4121 	if (sc->bnx_cdata.bnx_parent_tag != NULL)
4122 		bus_dma_tag_destroy(sc->bnx_cdata.bnx_parent_tag);
4123 }
4124 
4125 static int
4126 bnx_dma_alloc(device_t dev)
4127 {
4128 	struct bnx_softc *sc = device_get_softc(dev);
4129 	struct bnx_rx_std_ring *std = &sc->bnx_rx_std_ring;
4130 	int i, error, mbx;
4131 
4132 	/*
4133 	 * Allocate the parent bus DMA tag appropriate for PCI.
4134 	 *
4135 	 * All of the NetExtreme/NetLink controllers have 4GB boundary
4136 	 * DMA bug.
4137 	 * Whenever an address crosses a multiple of the 4GB boundary
4138 	 * (including 4GB, 8Gb, 12Gb, etc.) and makes the transition
4139 	 * from 0xX_FFFF_FFFF to 0x(X+1)_0000_0000 an internal DMA
4140 	 * state machine will lockup and cause the device to hang.
4141 	 */
4142 	error = bus_dma_tag_create(NULL, 1, BGE_DMA_BOUNDARY_4G,
4143 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
4144 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT,
4145 	    0, &sc->bnx_cdata.bnx_parent_tag);
4146 	if (error) {
4147 		device_printf(dev, "could not create parent DMA tag\n");
4148 		return error;
4149 	}
4150 
4151 	/*
4152 	 * Create DMA stuffs for status blocks.
4153 	 */
4154 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4155 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4156 
4157 		error = bnx_dma_block_alloc(sc,
4158 		    __VM_CACHELINE_ALIGN(BGE_STATUS_BLK_SZ),
4159 		    &intr->bnx_status_tag, &intr->bnx_status_map,
4160 		    (void *)&intr->bnx_status_block,
4161 		    &intr->bnx_status_block_paddr);
4162 		if (error) {
4163 			device_printf(dev,
4164 			    "could not create %dth status block\n", i);
4165 			return error;
4166 		}
4167 	}
4168 	sc->bnx_hw_status = &sc->bnx_intr_data[0].bnx_status_block->bge_status;
4169 	if (sc->bnx_flags & BNX_FLAG_STATUS_HASTAG) {
4170 		sc->bnx_hw_status_tag =
4171 		    &sc->bnx_intr_data[0].bnx_status_block->bge_status_tag;
4172 	}
4173 
4174 	/*
4175 	 * Create DMA tag and maps for RX mbufs.
4176 	 */
4177 	std->bnx_sc = sc;
4178 	lwkt_serialize_init(&std->bnx_rx_std_serialize);
4179 	error = bus_dma_tag_create(sc->bnx_cdata.bnx_parent_tag, 1, 0,
4180 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4181 	    NULL, NULL, MCLBYTES, 1, MCLBYTES,
4182 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK, &std->bnx_rx_mtag);
4183 	if (error) {
4184 		device_printf(dev, "could not create RX mbuf DMA tag\n");
4185 		return error;
4186 	}
4187 
4188 	for (i = 0; i < BGE_STD_RX_RING_CNT; ++i) {
4189 		error = bus_dmamap_create(std->bnx_rx_mtag, BUS_DMA_WAITOK,
4190 		    &std->bnx_rx_std_buf[i].bnx_rx_dmamap);
4191 		if (error) {
4192 			int j;
4193 
4194 			for (j = 0; j < i; ++j) {
4195 				bus_dmamap_destroy(std->bnx_rx_mtag,
4196 				    std->bnx_rx_std_buf[j].bnx_rx_dmamap);
4197 			}
4198 			bus_dma_tag_destroy(std->bnx_rx_mtag);
4199 			std->bnx_rx_mtag = NULL;
4200 
4201 			device_printf(dev,
4202 			    "could not create %dth RX mbuf DMA map\n", i);
4203 			return error;
4204 		}
4205 	}
4206 
4207 	/*
4208 	 * Create DMA stuffs for standard RX ring.
4209 	 */
4210 	error = bnx_dma_block_alloc(sc, BGE_STD_RX_RING_SZ,
4211 	    &std->bnx_rx_std_ring_tag,
4212 	    &std->bnx_rx_std_ring_map,
4213 	    (void *)&std->bnx_rx_std_ring,
4214 	    &std->bnx_rx_std_ring_paddr);
4215 	if (error) {
4216 		device_printf(dev, "could not create std RX ring\n");
4217 		return error;
4218 	}
4219 
4220 	/*
4221 	 * Create RX return rings
4222 	 */
4223 	mbx = BGE_MBX_RX_CONS0_LO;
4224 	sc->bnx_rx_ret_ring = kmalloc_cachealign(
4225 	    sizeof(struct bnx_rx_ret_ring) * sc->bnx_rx_retcnt, M_DEVBUF,
4226 	    M_WAITOK | M_ZERO);
4227 	for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4228 		struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[i];
4229 		struct bnx_intr_data *intr;
4230 
4231 		ret->bnx_sc = sc;
4232 		ret->bnx_std = std;
4233 		ret->bnx_rx_mbx = mbx;
4234 		ret->bnx_rx_cntmax = (BGE_STD_RX_RING_CNT / 4) /
4235 		    sc->bnx_rx_retcnt;
4236 		ret->bnx_rx_mask = 1 << i;
4237 
4238 		if (!BNX_RSS_ENABLED(sc)) {
4239 			intr = &sc->bnx_intr_data[0];
4240 		} else {
4241 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4242 			intr = &sc->bnx_intr_data[i + 1];
4243 		}
4244 
4245 		if (i == 0) {
4246 			ret->bnx_rx_considx =
4247 			    &intr->bnx_status_block->bge_idx[0].bge_rx_prod_idx;
4248 		} else if (i == 1) {
4249 			ret->bnx_rx_considx =
4250 			    &intr->bnx_status_block->bge_rx_jumbo_cons_idx;
4251 		} else if (i == 2) {
4252 			ret->bnx_rx_considx =
4253 			    &intr->bnx_status_block->bge_rsvd1;
4254 		} else if (i == 3) {
4255 			ret->bnx_rx_considx =
4256 			    &intr->bnx_status_block->bge_rx_mini_cons_idx;
4257 		} else {
4258 			panic("unknown RX return ring %d\n", i);
4259 		}
4260 		ret->bnx_hw_status_tag =
4261 		    &intr->bnx_status_block->bge_status_tag;
4262 
4263 		error = bnx_create_rx_ret_ring(ret);
4264 		if (error) {
4265 			device_printf(dev,
4266 			    "could not create %dth RX ret ring\n", i);
4267 			return error;
4268 		}
4269 		mbx += 8;
4270 	}
4271 
4272 	/*
4273 	 * Create TX rings
4274 	 */
4275 	sc->bnx_tx_ring = kmalloc_cachealign(
4276 	    sizeof(struct bnx_tx_ring) * sc->bnx_tx_ringcnt, M_DEVBUF,
4277 	    M_WAITOK | M_ZERO);
4278 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4279 		struct bnx_tx_ring *txr = &sc->bnx_tx_ring[i];
4280 		struct bnx_intr_data *intr;
4281 
4282 		txr->bnx_sc = sc;
4283 		txr->bnx_tx_mbx = bnx_tx_mailbox[i];
4284 
4285 		if (sc->bnx_tx_ringcnt == 1) {
4286 			intr = &sc->bnx_intr_data[0];
4287 		} else {
4288 			KKASSERT(i + 1 < sc->bnx_intr_cnt);
4289 			intr = &sc->bnx_intr_data[i + 1];
4290 		}
4291 
4292 		if ((sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) == 0) {
4293 			txr->bnx_hw_status_tag =
4294 			    &intr->bnx_status_block->bge_status_tag;
4295 		}
4296 		txr->bnx_tx_considx =
4297 		    &intr->bnx_status_block->bge_idx[0].bge_tx_cons_idx;
4298 
4299 		error = bnx_create_tx_ring(txr);
4300 		if (error) {
4301 			device_printf(dev,
4302 			    "could not create %dth TX ring\n", i);
4303 			return error;
4304 		}
4305 	}
4306 
4307 	/*
4308 	 * Create jumbo buffer pool.
4309 	 */
4310 	if (BNX_IS_JUMBO_CAPABLE(sc)) {
4311 		error = bnx_alloc_jumbo_mem(sc);
4312 		if (error) {
4313 			device_printf(dev,
4314 			    "could not create jumbo buffer pool\n");
4315 			return error;
4316 		}
4317 	}
4318 
4319 	return 0;
4320 }
4321 
4322 static int
4323 bnx_dma_block_alloc(struct bnx_softc *sc, bus_size_t size, bus_dma_tag_t *tag,
4324 		    bus_dmamap_t *map, void **addr, bus_addr_t *paddr)
4325 {
4326 	bus_dmamem_t dmem;
4327 	int error;
4328 
4329 	error = bus_dmamem_coherent(sc->bnx_cdata.bnx_parent_tag, PAGE_SIZE, 0,
4330 				    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
4331 				    size, BUS_DMA_WAITOK | BUS_DMA_ZERO, &dmem);
4332 	if (error)
4333 		return error;
4334 
4335 	*tag = dmem.dmem_tag;
4336 	*map = dmem.dmem_map;
4337 	*addr = dmem.dmem_addr;
4338 	*paddr = dmem.dmem_busaddr;
4339 
4340 	return 0;
4341 }
4342 
4343 static void
4344 bnx_dma_block_free(bus_dma_tag_t tag, bus_dmamap_t map, void *addr)
4345 {
4346 	if (tag != NULL) {
4347 		bus_dmamap_unload(tag, map);
4348 		bus_dmamem_free(tag, addr, map);
4349 		bus_dma_tag_destroy(tag);
4350 	}
4351 }
4352 
4353 static void
4354 bnx_tbi_link_upd(struct bnx_softc *sc, uint32_t status)
4355 {
4356 	struct ifnet *ifp = &sc->arpcom.ac_if;
4357 
4358 #define PCS_ENCODE_ERR	(BGE_MACSTAT_PORT_DECODE_ERROR|BGE_MACSTAT_MI_COMPLETE)
4359 
4360 	/*
4361 	 * Sometimes PCS encoding errors are detected in
4362 	 * TBI mode (on fiber NICs), and for some reason
4363 	 * the chip will signal them as link changes.
4364 	 * If we get a link change event, but the 'PCS
4365 	 * encoding error' bit in the MAC status register
4366 	 * is set, don't bother doing a link check.
4367 	 * This avoids spurious "gigabit link up" messages
4368 	 * that sometimes appear on fiber NICs during
4369 	 * periods of heavy traffic.
4370 	 */
4371 	if (status & BGE_MACSTAT_TBI_PCS_SYNCHED) {
4372 		if (!sc->bnx_link) {
4373 			sc->bnx_link++;
4374 			if (sc->bnx_asicrev == BGE_ASICREV_BCM5704) {
4375 				BNX_CLRBIT(sc, BGE_MAC_MODE,
4376 				    BGE_MACMODE_TBI_SEND_CFGS);
4377 				DELAY(40);
4378 			}
4379 			CSR_WRITE_4(sc, BGE_MAC_STS, 0xFFFFFFFF);
4380 
4381 			if (bootverbose)
4382 				if_printf(ifp, "link UP\n");
4383 
4384 			ifp->if_link_state = LINK_STATE_UP;
4385 			if_link_state_change(ifp);
4386 		}
4387 	} else if ((status & PCS_ENCODE_ERR) != PCS_ENCODE_ERR) {
4388 		if (sc->bnx_link) {
4389 			sc->bnx_link = 0;
4390 
4391 			if (bootverbose)
4392 				if_printf(ifp, "link DOWN\n");
4393 
4394 			ifp->if_link_state = LINK_STATE_DOWN;
4395 			if_link_state_change(ifp);
4396 		}
4397 	}
4398 
4399 #undef PCS_ENCODE_ERR
4400 
4401 	/* Clear the attention. */
4402 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4403 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4404 	    BGE_MACSTAT_LINK_CHANGED);
4405 }
4406 
4407 static void
4408 bnx_copper_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4409 {
4410 	struct ifnet *ifp = &sc->arpcom.ac_if;
4411 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4412 
4413 	mii_pollstat(mii);
4414 	bnx_miibus_statchg(sc->bnx_dev);
4415 
4416 	if (bootverbose) {
4417 		if (sc->bnx_link)
4418 			if_printf(ifp, "link UP\n");
4419 		else
4420 			if_printf(ifp, "link DOWN\n");
4421 	}
4422 
4423 	/* Clear the attention. */
4424 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4425 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4426 	    BGE_MACSTAT_LINK_CHANGED);
4427 }
4428 
4429 static void
4430 bnx_autopoll_link_upd(struct bnx_softc *sc, uint32_t status __unused)
4431 {
4432 	struct ifnet *ifp = &sc->arpcom.ac_if;
4433 	struct mii_data *mii = device_get_softc(sc->bnx_miibus);
4434 
4435 	mii_pollstat(mii);
4436 
4437 	if (!sc->bnx_link &&
4438 	    (mii->mii_media_status & IFM_ACTIVE) &&
4439 	    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
4440 		sc->bnx_link++;
4441 		if (bootverbose)
4442 			if_printf(ifp, "link UP\n");
4443 	} else if (sc->bnx_link &&
4444 	    (!(mii->mii_media_status & IFM_ACTIVE) ||
4445 	    IFM_SUBTYPE(mii->mii_media_active) == IFM_NONE)) {
4446 		sc->bnx_link = 0;
4447 		if (bootverbose)
4448 			if_printf(ifp, "link DOWN\n");
4449 	}
4450 
4451 	/* Clear the attention. */
4452 	CSR_WRITE_4(sc, BGE_MAC_STS, BGE_MACSTAT_SYNC_CHANGED |
4453 	    BGE_MACSTAT_CFG_CHANGED | BGE_MACSTAT_MI_COMPLETE |
4454 	    BGE_MACSTAT_LINK_CHANGED);
4455 }
4456 
4457 static int
4458 bnx_sysctl_rx_coal_ticks(SYSCTL_HANDLER_ARGS)
4459 {
4460 	struct bnx_softc *sc = arg1;
4461 
4462 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4463 	    &sc->bnx_rx_coal_ticks,
4464 	    BNX_RX_COAL_TICKS_MIN, BNX_RX_COAL_TICKS_MAX,
4465 	    BNX_RX_COAL_TICKS_CHG);
4466 }
4467 
4468 static int
4469 bnx_sysctl_tx_coal_ticks(SYSCTL_HANDLER_ARGS)
4470 {
4471 	struct bnx_softc *sc = arg1;
4472 
4473 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4474 	    &sc->bnx_tx_coal_ticks,
4475 	    BNX_TX_COAL_TICKS_MIN, BNX_TX_COAL_TICKS_MAX,
4476 	    BNX_TX_COAL_TICKS_CHG);
4477 }
4478 
4479 static int
4480 bnx_sysctl_rx_coal_bds(SYSCTL_HANDLER_ARGS)
4481 {
4482 	struct bnx_softc *sc = arg1;
4483 
4484 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4485 	    &sc->bnx_rx_coal_bds,
4486 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4487 	    BNX_RX_COAL_BDS_CHG);
4488 }
4489 
4490 static int
4491 bnx_sysctl_rx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4492 {
4493 	struct bnx_softc *sc = arg1;
4494 
4495 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4496 	    &sc->bnx_rx_coal_bds_poll,
4497 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4498 	    BNX_RX_COAL_BDS_CHG);
4499 }
4500 
4501 static int
4502 bnx_sysctl_tx_coal_bds(SYSCTL_HANDLER_ARGS)
4503 {
4504 	struct bnx_softc *sc = arg1;
4505 
4506 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4507 	    &sc->bnx_tx_coal_bds,
4508 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4509 	    BNX_TX_COAL_BDS_CHG);
4510 }
4511 
4512 static int
4513 bnx_sysctl_tx_coal_bds_poll(SYSCTL_HANDLER_ARGS)
4514 {
4515 	struct bnx_softc *sc = arg1;
4516 
4517 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4518 	    &sc->bnx_tx_coal_bds_poll,
4519 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4520 	    BNX_TX_COAL_BDS_CHG);
4521 }
4522 
4523 static int
4524 bnx_sysctl_rx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4525 {
4526 	struct bnx_softc *sc = arg1;
4527 
4528 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4529 	    &sc->bnx_rx_coal_bds_int,
4530 	    BNX_RX_COAL_BDS_MIN, BNX_RX_COAL_BDS_MAX,
4531 	    BNX_RX_COAL_BDS_INT_CHG);
4532 }
4533 
4534 static int
4535 bnx_sysctl_tx_coal_bds_int(SYSCTL_HANDLER_ARGS)
4536 {
4537 	struct bnx_softc *sc = arg1;
4538 
4539 	return bnx_sysctl_coal_chg(oidp, arg1, arg2, req,
4540 	    &sc->bnx_tx_coal_bds_int,
4541 	    BNX_TX_COAL_BDS_MIN, BNX_TX_COAL_BDS_MAX,
4542 	    BNX_TX_COAL_BDS_INT_CHG);
4543 }
4544 
4545 static int
4546 bnx_sysctl_coal_chg(SYSCTL_HANDLER_ARGS, uint32_t *coal,
4547     int coal_min, int coal_max, uint32_t coal_chg_mask)
4548 {
4549 	struct bnx_softc *sc = arg1;
4550 	struct ifnet *ifp = &sc->arpcom.ac_if;
4551 	int error = 0, v;
4552 
4553 	ifnet_serialize_all(ifp);
4554 
4555 	v = *coal;
4556 	error = sysctl_handle_int(oidp, &v, 0, req);
4557 	if (!error && req->newptr != NULL) {
4558 		if (v < coal_min || v > coal_max) {
4559 			error = EINVAL;
4560 		} else {
4561 			*coal = v;
4562 			sc->bnx_coal_chg |= coal_chg_mask;
4563 
4564 			/* Commit changes */
4565 			bnx_coal_change(sc);
4566 		}
4567 	}
4568 
4569 	ifnet_deserialize_all(ifp);
4570 	return error;
4571 }
4572 
4573 static void
4574 bnx_coal_change(struct bnx_softc *sc)
4575 {
4576 	struct ifnet *ifp = &sc->arpcom.ac_if;
4577 	int i;
4578 
4579 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4580 
4581 	if (sc->bnx_coal_chg & BNX_RX_COAL_TICKS_CHG) {
4582 		if (sc->bnx_rx_retcnt == 1) {
4583 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS,
4584 			    sc->bnx_rx_coal_ticks);
4585 			i = 0;
4586 		} else {
4587 			CSR_WRITE_4(sc, BGE_HCC_RX_COAL_TICKS, 0);
4588 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4589 				CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4590 				    (i * BGE_VEC_COALSET_SIZE),
4591 				    sc->bnx_rx_coal_ticks);
4592 			}
4593 		}
4594 		for (; i < BNX_INTR_MAX - 1; ++i) {
4595 			CSR_WRITE_4(sc, BGE_VEC1_RX_COAL_TICKS +
4596 			    (i * BGE_VEC_COALSET_SIZE), 0);
4597 		}
4598 		if (bootverbose) {
4599 			if_printf(ifp, "rx_coal_ticks -> %u\n",
4600 			    sc->bnx_rx_coal_ticks);
4601 		}
4602 	}
4603 
4604 	if (sc->bnx_coal_chg & BNX_TX_COAL_TICKS_CHG) {
4605 		if (sc->bnx_tx_ringcnt == 1) {
4606 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS,
4607 			    sc->bnx_tx_coal_ticks);
4608 			i = 0;
4609 		} else {
4610 			CSR_WRITE_4(sc, BGE_HCC_TX_COAL_TICKS, 0);
4611 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4612 				CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4613 				    (i * BGE_VEC_COALSET_SIZE),
4614 				    sc->bnx_tx_coal_ticks);
4615 			}
4616 		}
4617 		for (; i < BNX_INTR_MAX - 1; ++i) {
4618 			CSR_WRITE_4(sc, BGE_VEC1_TX_COAL_TICKS +
4619 			    (i * BGE_VEC_COALSET_SIZE), 0);
4620 		}
4621 		if (bootverbose) {
4622 			if_printf(ifp, "tx_coal_ticks -> %u\n",
4623 			    sc->bnx_tx_coal_ticks);
4624 		}
4625 	}
4626 
4627 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_CHG) {
4628 		uint32_t rx_coal_bds;
4629 
4630 		if (ifp->if_flags & IFF_NPOLLING)
4631 			rx_coal_bds = sc->bnx_rx_coal_bds_poll;
4632 		else
4633 			rx_coal_bds = sc->bnx_rx_coal_bds;
4634 
4635 		if (sc->bnx_rx_retcnt == 1) {
4636 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, rx_coal_bds);
4637 			i = 0;
4638 		} else {
4639 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS, 0);
4640 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4641 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4642 				    (i * BGE_VEC_COALSET_SIZE), rx_coal_bds);
4643 			}
4644 		}
4645 		for (; i < BNX_INTR_MAX - 1; ++i) {
4646 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS +
4647 			    (i * BGE_VEC_COALSET_SIZE), 0);
4648 		}
4649 		if (bootverbose) {
4650 			if_printf(ifp, "%srx_coal_bds -> %u\n",
4651 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4652 			    rx_coal_bds);
4653 		}
4654 	}
4655 
4656 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_CHG) {
4657 		uint32_t tx_coal_bds;
4658 
4659 		if (ifp->if_flags & IFF_NPOLLING)
4660 			tx_coal_bds = sc->bnx_tx_coal_bds_poll;
4661 		else
4662 			tx_coal_bds = sc->bnx_tx_coal_bds;
4663 
4664 		if (sc->bnx_tx_ringcnt == 1) {
4665 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, tx_coal_bds);
4666 			i = 0;
4667 		} else {
4668 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS, 0);
4669 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4670 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4671 				    (i * BGE_VEC_COALSET_SIZE), tx_coal_bds);
4672 			}
4673 		}
4674 		for (; i < BNX_INTR_MAX - 1; ++i) {
4675 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS +
4676 			    (i * BGE_VEC_COALSET_SIZE), 0);
4677 		}
4678 		if (bootverbose) {
4679 			if_printf(ifp, "%stx_coal_bds -> %u\n",
4680 			    (ifp->if_flags & IFF_NPOLLING) ? "polling " : "",
4681 			    tx_coal_bds);
4682 		}
4683 	}
4684 
4685 	if (sc->bnx_coal_chg & BNX_RX_COAL_BDS_INT_CHG) {
4686 		if (sc->bnx_rx_retcnt == 1) {
4687 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT,
4688 			    sc->bnx_rx_coal_bds_int);
4689 			i = 0;
4690 		} else {
4691 			CSR_WRITE_4(sc, BGE_HCC_RX_MAX_COAL_BDS_INT, 0);
4692 			for (i = 0; i < sc->bnx_rx_retcnt; ++i) {
4693 				CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4694 				    (i * BGE_VEC_COALSET_SIZE),
4695 				    sc->bnx_rx_coal_bds_int);
4696 			}
4697 		}
4698 		for (; i < BNX_INTR_MAX - 1; ++i) {
4699 			CSR_WRITE_4(sc, BGE_VEC1_RX_MAX_COAL_BDS_INT +
4700 			    (i * BGE_VEC_COALSET_SIZE), 0);
4701 		}
4702 		if (bootverbose) {
4703 			if_printf(ifp, "rx_coal_bds_int -> %u\n",
4704 			    sc->bnx_rx_coal_bds_int);
4705 		}
4706 	}
4707 
4708 	if (sc->bnx_coal_chg & BNX_TX_COAL_BDS_INT_CHG) {
4709 		if (sc->bnx_tx_ringcnt == 1) {
4710 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT,
4711 			    sc->bnx_tx_coal_bds_int);
4712 			i = 0;
4713 		} else {
4714 			CSR_WRITE_4(sc, BGE_HCC_TX_MAX_COAL_BDS_INT, 0);
4715 			for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
4716 				CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4717 				    (i * BGE_VEC_COALSET_SIZE),
4718 				    sc->bnx_tx_coal_bds_int);
4719 			}
4720 		}
4721 		for (; i < BNX_INTR_MAX - 1; ++i) {
4722 			CSR_WRITE_4(sc, BGE_VEC1_TX_MAX_COAL_BDS_INT +
4723 			    (i * BGE_VEC_COALSET_SIZE), 0);
4724 		}
4725 		if (bootverbose) {
4726 			if_printf(ifp, "tx_coal_bds_int -> %u\n",
4727 			    sc->bnx_tx_coal_bds_int);
4728 		}
4729 	}
4730 
4731 	sc->bnx_coal_chg = 0;
4732 }
4733 
4734 static void
4735 bnx_check_intr_rxtx(void *xintr)
4736 {
4737 	struct bnx_intr_data *intr = xintr;
4738 	struct bnx_rx_ret_ring *ret;
4739 	struct bnx_tx_ring *txr;
4740 	struct ifnet *ifp;
4741 
4742 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4743 
4744 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4745 
4746 	ifp = &intr->bnx_sc->arpcom.ac_if;
4747 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4748 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4749 		return;
4750 	}
4751 
4752 	txr = intr->bnx_txr;
4753 	ret = intr->bnx_ret;
4754 
4755 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx ||
4756 	    *txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4757 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx &&
4758 		    intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4759 			if (!intr->bnx_intr_maylose) {
4760 				intr->bnx_intr_maylose = TRUE;
4761 				goto done;
4762 			}
4763 			if (bootverbose)
4764 				if_printf(ifp, "lost interrupt\n");
4765 			intr->bnx_intr_func(intr->bnx_intr_arg);
4766 		}
4767 	}
4768 	intr->bnx_intr_maylose = FALSE;
4769 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4770 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4771 
4772 done:
4773 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4774 	    intr->bnx_intr_check, intr);
4775 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4776 }
4777 
4778 static void
4779 bnx_check_intr_tx(void *xintr)
4780 {
4781 	struct bnx_intr_data *intr = xintr;
4782 	struct bnx_tx_ring *txr;
4783 	struct ifnet *ifp;
4784 
4785 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4786 
4787 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4788 
4789 	ifp = &intr->bnx_sc->arpcom.ac_if;
4790 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4791 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4792 		return;
4793 	}
4794 
4795 	txr = intr->bnx_txr;
4796 
4797 	if (*txr->bnx_tx_considx != txr->bnx_tx_saved_considx) {
4798 		if (intr->bnx_tx_check_considx == txr->bnx_tx_saved_considx) {
4799 			if (!intr->bnx_intr_maylose) {
4800 				intr->bnx_intr_maylose = TRUE;
4801 				goto done;
4802 			}
4803 			if (bootverbose)
4804 				if_printf(ifp, "lost interrupt\n");
4805 			intr->bnx_intr_func(intr->bnx_intr_arg);
4806 		}
4807 	}
4808 	intr->bnx_intr_maylose = FALSE;
4809 	intr->bnx_tx_check_considx = txr->bnx_tx_saved_considx;
4810 
4811 done:
4812 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4813 	    intr->bnx_intr_check, intr);
4814 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4815 }
4816 
4817 static void
4818 bnx_check_intr_rx(void *xintr)
4819 {
4820 	struct bnx_intr_data *intr = xintr;
4821 	struct bnx_rx_ret_ring *ret;
4822 	struct ifnet *ifp;
4823 
4824 	lwkt_serialize_enter(intr->bnx_intr_serialize);
4825 
4826 	KKASSERT(mycpuid == intr->bnx_intr_cpuid);
4827 
4828 	ifp = &intr->bnx_sc->arpcom.ac_if;
4829 	if ((ifp->if_flags & (IFF_RUNNING | IFF_NPOLLING)) != IFF_RUNNING) {
4830 		lwkt_serialize_exit(intr->bnx_intr_serialize);
4831 		return;
4832 	}
4833 
4834 	ret = intr->bnx_ret;
4835 
4836 	if (*ret->bnx_rx_considx != ret->bnx_rx_saved_considx) {
4837 		if (intr->bnx_rx_check_considx == ret->bnx_rx_saved_considx) {
4838 			if (!intr->bnx_intr_maylose) {
4839 				intr->bnx_intr_maylose = TRUE;
4840 				goto done;
4841 			}
4842 			if (bootverbose)
4843 				if_printf(ifp, "lost interrupt\n");
4844 			intr->bnx_intr_func(intr->bnx_intr_arg);
4845 		}
4846 	}
4847 	intr->bnx_intr_maylose = FALSE;
4848 	intr->bnx_rx_check_considx = ret->bnx_rx_saved_considx;
4849 
4850 done:
4851 	callout_reset(&intr->bnx_intr_timer, BNX_INTR_CKINTVL,
4852 	    intr->bnx_intr_check, intr);
4853 	lwkt_serialize_exit(intr->bnx_intr_serialize);
4854 }
4855 
4856 static void
4857 bnx_enable_intr(struct bnx_softc *sc)
4858 {
4859 	struct ifnet *ifp = &sc->arpcom.ac_if;
4860 	int i;
4861 
4862 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4863 		lwkt_serialize_handler_enable(
4864 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4865 	}
4866 
4867 	/*
4868 	 * Enable interrupt.
4869 	 */
4870 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4871 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4872 
4873 		bnx_writembx(sc, intr->bnx_intr_mbx,
4874 		    (*intr->bnx_saved_status_tag) << 24);
4875 		/* XXX Linux driver */
4876 		bnx_writembx(sc, intr->bnx_intr_mbx,
4877 		    (*intr->bnx_saved_status_tag) << 24);
4878 	}
4879 
4880 	/*
4881 	 * Unmask the interrupt when we stop polling.
4882 	 */
4883 	PCI_CLRBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4884 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4885 
4886 	/*
4887 	 * Trigger another interrupt, since above writing
4888 	 * to interrupt mailbox0 may acknowledge pending
4889 	 * interrupt.
4890 	 */
4891 	BNX_SETBIT(sc, BGE_MISC_LOCAL_CTL, BGE_MLC_INTR_SET);
4892 
4893 	if (sc->bnx_flags & BNX_FLAG_STATUSTAG_BUG) {
4894 		if (bootverbose)
4895 			if_printf(ifp, "status tag bug workaround\n");
4896 
4897 		for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4898 			struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4899 
4900 			if (intr->bnx_intr_check == NULL)
4901 				continue;
4902 			intr->bnx_intr_maylose = FALSE;
4903 			intr->bnx_rx_check_considx = 0;
4904 			intr->bnx_tx_check_considx = 0;
4905 			callout_reset_bycpu(&intr->bnx_intr_timer,
4906 			    BNX_INTR_CKINTVL, intr->bnx_intr_check, intr,
4907 			    intr->bnx_intr_cpuid);
4908 		}
4909 	}
4910 }
4911 
4912 static void
4913 bnx_disable_intr(struct bnx_softc *sc)
4914 {
4915 	int i;
4916 
4917 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4918 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
4919 
4920 		callout_stop(&intr->bnx_intr_timer);
4921 		intr->bnx_intr_maylose = FALSE;
4922 		intr->bnx_rx_check_considx = 0;
4923 		intr->bnx_tx_check_considx = 0;
4924 	}
4925 
4926 	/*
4927 	 * Mask the interrupt when we start polling.
4928 	 */
4929 	PCI_SETBIT(sc->bnx_dev, BGE_PCI_MISC_CTL,
4930 	    BGE_PCIMISCCTL_MASK_PCI_INTR, 4);
4931 
4932 	/*
4933 	 * Acknowledge possible asserted interrupt.
4934 	 */
4935 	for (i = 0; i < BNX_INTR_MAX; ++i)
4936 		bnx_writembx(sc, sc->bnx_intr_data[i].bnx_intr_mbx, 1);
4937 
4938 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
4939 		lwkt_serialize_handler_disable(
4940 		    sc->bnx_intr_data[i].bnx_intr_serialize);
4941 	}
4942 }
4943 
4944 static int
4945 bnx_get_eaddr_mem(struct bnx_softc *sc, uint8_t ether_addr[])
4946 {
4947 	uint32_t mac_addr;
4948 	int ret = 1;
4949 
4950 	mac_addr = bnx_readmem_ind(sc, 0x0c14);
4951 	if ((mac_addr >> 16) == 0x484b) {
4952 		ether_addr[0] = (uint8_t)(mac_addr >> 8);
4953 		ether_addr[1] = (uint8_t)mac_addr;
4954 		mac_addr = bnx_readmem_ind(sc, 0x0c18);
4955 		ether_addr[2] = (uint8_t)(mac_addr >> 24);
4956 		ether_addr[3] = (uint8_t)(mac_addr >> 16);
4957 		ether_addr[4] = (uint8_t)(mac_addr >> 8);
4958 		ether_addr[5] = (uint8_t)mac_addr;
4959 		ret = 0;
4960 	}
4961 	return ret;
4962 }
4963 
4964 static int
4965 bnx_get_eaddr_nvram(struct bnx_softc *sc, uint8_t ether_addr[])
4966 {
4967 	int mac_offset = BGE_EE_MAC_OFFSET;
4968 
4969 	if (BNX_IS_5717_PLUS(sc)) {
4970 		int f;
4971 
4972 		f = pci_get_function(sc->bnx_dev);
4973 		if (f & 1)
4974 			mac_offset = BGE_EE_MAC_OFFSET_5717;
4975 		if (f > 1)
4976 			mac_offset += BGE_EE_MAC_OFFSET_5717_OFF;
4977 	}
4978 
4979 	return bnx_read_nvram(sc, ether_addr, mac_offset + 2, ETHER_ADDR_LEN);
4980 }
4981 
4982 static int
4983 bnx_get_eaddr_eeprom(struct bnx_softc *sc, uint8_t ether_addr[])
4984 {
4985 	if (sc->bnx_flags & BNX_FLAG_NO_EEPROM)
4986 		return 1;
4987 
4988 	return bnx_read_eeprom(sc, ether_addr, BGE_EE_MAC_OFFSET + 2,
4989 			       ETHER_ADDR_LEN);
4990 }
4991 
4992 static int
4993 bnx_get_eaddr(struct bnx_softc *sc, uint8_t eaddr[])
4994 {
4995 	static const bnx_eaddr_fcn_t bnx_eaddr_funcs[] = {
4996 		/* NOTE: Order is critical */
4997 		bnx_get_eaddr_mem,
4998 		bnx_get_eaddr_nvram,
4999 		bnx_get_eaddr_eeprom,
5000 		NULL
5001 	};
5002 	const bnx_eaddr_fcn_t *func;
5003 
5004 	for (func = bnx_eaddr_funcs; *func != NULL; ++func) {
5005 		if ((*func)(sc, eaddr) == 0)
5006 			break;
5007 	}
5008 	return (*func == NULL ? ENXIO : 0);
5009 }
5010 
5011 /*
5012  * NOTE: 'm' is not freed upon failure
5013  */
5014 struct mbuf *
5015 bnx_defrag_shortdma(struct mbuf *m)
5016 {
5017 	struct mbuf *n;
5018 	int found;
5019 
5020 	/*
5021 	 * If device receive two back-to-back send BDs with less than
5022 	 * or equal to 8 total bytes then the device may hang.  The two
5023 	 * back-to-back send BDs must in the same frame for this failure
5024 	 * to occur.  Scan mbuf chains and see whether two back-to-back
5025 	 * send BDs are there.  If this is the case, allocate new mbuf
5026 	 * and copy the frame to workaround the silicon bug.
5027 	 */
5028 	for (n = m, found = 0; n != NULL; n = n->m_next) {
5029 		if (n->m_len < 8) {
5030 			found++;
5031 			if (found > 1)
5032 				break;
5033 			continue;
5034 		}
5035 		found = 0;
5036 	}
5037 
5038 	if (found > 1)
5039 		n = m_defrag(m, M_NOWAIT);
5040 	else
5041 		n = m;
5042 	return n;
5043 }
5044 
5045 static void
5046 bnx_stop_block(struct bnx_softc *sc, bus_size_t reg, uint32_t bit)
5047 {
5048 	int i;
5049 
5050 	BNX_CLRBIT(sc, reg, bit);
5051 	for (i = 0; i < BNX_TIMEOUT; i++) {
5052 		if ((CSR_READ_4(sc, reg) & bit) == 0)
5053 			return;
5054 		DELAY(100);
5055 	}
5056 }
5057 
5058 static void
5059 bnx_link_poll(struct bnx_softc *sc)
5060 {
5061 	uint32_t status;
5062 
5063 	status = CSR_READ_4(sc, BGE_MAC_STS);
5064 	if ((status & sc->bnx_link_chg) || sc->bnx_link_evt) {
5065 		sc->bnx_link_evt = 0;
5066 		sc->bnx_link_upd(sc, status);
5067 	}
5068 }
5069 
5070 static void
5071 bnx_enable_msi(struct bnx_softc *sc, boolean_t is_msix)
5072 {
5073 	uint32_t msi_mode;
5074 
5075 	msi_mode = CSR_READ_4(sc, BGE_MSI_MODE);
5076 	msi_mode |= BGE_MSIMODE_ENABLE;
5077 	/*
5078 	 * NOTE:
5079 	 * 5718-PG105-R says that "one shot" mode does not work
5080 	 * if MSI is used, however, it obviously works.
5081 	 */
5082 	msi_mode &= ~BGE_MSIMODE_ONESHOT_DISABLE;
5083 	if (is_msix)
5084 		msi_mode |= BGE_MSIMODE_MSIX_MULTIMODE;
5085 	else
5086 		msi_mode &= ~BGE_MSIMODE_MSIX_MULTIMODE;
5087 	CSR_WRITE_4(sc, BGE_MSI_MODE, msi_mode);
5088 }
5089 
5090 static uint32_t
5091 bnx_dma_swap_options(struct bnx_softc *sc)
5092 {
5093 	uint32_t dma_options;
5094 
5095 	dma_options = BGE_MODECTL_WORDSWAP_NONFRAME |
5096 	    BGE_MODECTL_BYTESWAP_DATA | BGE_MODECTL_WORDSWAP_DATA;
5097 #if BYTE_ORDER == BIG_ENDIAN
5098 	dma_options |= BGE_MODECTL_BYTESWAP_NONFRAME;
5099 #endif
5100 	return dma_options;
5101 }
5102 
5103 static int
5104 bnx_setup_tso(struct bnx_tx_ring *txr, struct mbuf **mp,
5105     uint16_t *mss0, uint16_t *flags0)
5106 {
5107 	struct mbuf *m;
5108 	struct ip *ip;
5109 	struct tcphdr *th;
5110 	int thoff, iphlen, hoff, hlen;
5111 	uint16_t flags, mss;
5112 
5113 	m = *mp;
5114 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5115 
5116 	hoff = m->m_pkthdr.csum_lhlen;
5117 	iphlen = m->m_pkthdr.csum_iphlen;
5118 	thoff = m->m_pkthdr.csum_thlen;
5119 
5120 	KASSERT(hoff > 0, ("invalid ether header len"));
5121 	KASSERT(iphlen > 0, ("invalid ip header len"));
5122 	KASSERT(thoff > 0, ("invalid tcp header len"));
5123 
5124 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5125 		m = m_pullup(m, hoff + iphlen + thoff);
5126 		if (m == NULL) {
5127 			*mp = NULL;
5128 			return ENOBUFS;
5129 		}
5130 		*mp = m;
5131 	}
5132 	ip = mtodoff(m, struct ip *, hoff);
5133 	th = mtodoff(m, struct tcphdr *, hoff + iphlen);
5134 
5135 	mss = m->m_pkthdr.tso_segsz;
5136 	flags = BGE_TXBDFLAG_CPU_PRE_DMA | BGE_TXBDFLAG_CPU_POST_DMA;
5137 
5138 	ip->ip_len = htons(mss + iphlen + thoff);
5139 	th->th_sum = 0;
5140 
5141 	hlen = (iphlen + thoff) >> 2;
5142 	mss |= ((hlen & 0x3) << 14);
5143 	flags |= ((hlen & 0xf8) << 7) | ((hlen & 0x4) << 2);
5144 
5145 	*mss0 = mss;
5146 	*flags0 = flags;
5147 
5148 	return 0;
5149 }
5150 
5151 static int
5152 bnx_create_tx_ring(struct bnx_tx_ring *txr)
5153 {
5154 	bus_size_t txmaxsz, txmaxsegsz;
5155 	int i, error;
5156 
5157 	lwkt_serialize_init(&txr->bnx_tx_serialize);
5158 
5159 	/*
5160 	 * Create DMA tag and maps for TX mbufs.
5161 	 */
5162 	if (txr->bnx_sc->bnx_flags & BNX_FLAG_TSO)
5163 		txmaxsz = IP_MAXPACKET + sizeof(struct ether_vlan_header);
5164 	else
5165 		txmaxsz = BNX_JUMBO_FRAMELEN;
5166 	if (txr->bnx_sc->bnx_asicrev == BGE_ASICREV_BCM57766)
5167 		txmaxsegsz = MCLBYTES;
5168 	else
5169 		txmaxsegsz = PAGE_SIZE;
5170 	error = bus_dma_tag_create(txr->bnx_sc->bnx_cdata.bnx_parent_tag,
5171 	    1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
5172 	    txmaxsz, BNX_NSEG_NEW, txmaxsegsz,
5173 	    BUS_DMA_ALLOCNOW | BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5174 	    &txr->bnx_tx_mtag);
5175 	if (error) {
5176 		device_printf(txr->bnx_sc->bnx_dev,
5177 		    "could not create TX mbuf DMA tag\n");
5178 		return error;
5179 	}
5180 
5181 	for (i = 0; i < BGE_TX_RING_CNT; i++) {
5182 		error = bus_dmamap_create(txr->bnx_tx_mtag,
5183 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE,
5184 		    &txr->bnx_tx_buf[i].bnx_tx_dmamap);
5185 		if (error) {
5186 			int j;
5187 
5188 			for (j = 0; j < i; ++j) {
5189 				bus_dmamap_destroy(txr->bnx_tx_mtag,
5190 				    txr->bnx_tx_buf[j].bnx_tx_dmamap);
5191 			}
5192 			bus_dma_tag_destroy(txr->bnx_tx_mtag);
5193 			txr->bnx_tx_mtag = NULL;
5194 
5195 			device_printf(txr->bnx_sc->bnx_dev,
5196 			    "could not create TX mbuf DMA map\n");
5197 			return error;
5198 		}
5199 	}
5200 
5201 	/*
5202 	 * Create DMA stuffs for TX ring.
5203 	 */
5204 	error = bnx_dma_block_alloc(txr->bnx_sc, BGE_TX_RING_SZ,
5205 	    &txr->bnx_tx_ring_tag,
5206 	    &txr->bnx_tx_ring_map,
5207 	    (void *)&txr->bnx_tx_ring,
5208 	    &txr->bnx_tx_ring_paddr);
5209 	if (error) {
5210 		device_printf(txr->bnx_sc->bnx_dev,
5211 		    "could not create TX ring\n");
5212 		return error;
5213 	}
5214 
5215 	txr->bnx_tx_flags |= BNX_TX_FLAG_SHORTDMA;
5216 	txr->bnx_tx_wreg = BNX_TX_WREG_NSEGS;
5217 
5218 	return 0;
5219 }
5220 
5221 static void
5222 bnx_destroy_tx_ring(struct bnx_tx_ring *txr)
5223 {
5224 	/* Destroy TX mbuf DMA stuffs. */
5225 	if (txr->bnx_tx_mtag != NULL) {
5226 		int i;
5227 
5228 		for (i = 0; i < BGE_TX_RING_CNT; i++) {
5229 			KKASSERT(txr->bnx_tx_buf[i].bnx_tx_mbuf == NULL);
5230 			bus_dmamap_destroy(txr->bnx_tx_mtag,
5231 			    txr->bnx_tx_buf[i].bnx_tx_dmamap);
5232 		}
5233 		bus_dma_tag_destroy(txr->bnx_tx_mtag);
5234 	}
5235 
5236 	/* Destroy TX ring */
5237 	bnx_dma_block_free(txr->bnx_tx_ring_tag,
5238 	    txr->bnx_tx_ring_map, txr->bnx_tx_ring);
5239 }
5240 
5241 static int
5242 bnx_sysctl_force_defrag(SYSCTL_HANDLER_ARGS)
5243 {
5244 	struct bnx_softc *sc = (void *)arg1;
5245 	struct ifnet *ifp = &sc->arpcom.ac_if;
5246 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5247 	int error, defrag, i;
5248 
5249 	if (txr->bnx_tx_flags & BNX_TX_FLAG_FORCE_DEFRAG)
5250 		defrag = 1;
5251 	else
5252 		defrag = 0;
5253 
5254 	error = sysctl_handle_int(oidp, &defrag, 0, req);
5255 	if (error || req->newptr == NULL)
5256 		return error;
5257 
5258 	ifnet_serialize_all(ifp);
5259 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i) {
5260 		txr = &sc->bnx_tx_ring[i];
5261 		if (defrag)
5262 			txr->bnx_tx_flags |= BNX_TX_FLAG_FORCE_DEFRAG;
5263 		else
5264 			txr->bnx_tx_flags &= ~BNX_TX_FLAG_FORCE_DEFRAG;
5265 	}
5266 	ifnet_deserialize_all(ifp);
5267 
5268 	return 0;
5269 }
5270 
5271 static int
5272 bnx_sysctl_tx_wreg(SYSCTL_HANDLER_ARGS)
5273 {
5274 	struct bnx_softc *sc = (void *)arg1;
5275 	struct ifnet *ifp = &sc->arpcom.ac_if;
5276 	struct bnx_tx_ring *txr = &sc->bnx_tx_ring[0];
5277 	int error, tx_wreg, i;
5278 
5279 	tx_wreg = txr->bnx_tx_wreg;
5280 	error = sysctl_handle_int(oidp, &tx_wreg, 0, req);
5281 	if (error || req->newptr == NULL)
5282 		return error;
5283 
5284 	ifnet_serialize_all(ifp);
5285 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5286 		sc->bnx_tx_ring[i].bnx_tx_wreg = tx_wreg;
5287 	ifnet_deserialize_all(ifp);
5288 
5289 	return 0;
5290 }
5291 
5292 static int
5293 bnx_create_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5294 {
5295 	int error;
5296 
5297 	lwkt_serialize_init(&ret->bnx_rx_ret_serialize);
5298 
5299 	/*
5300 	 * Create DMA stuffs for RX return ring.
5301 	 */
5302 	error = bnx_dma_block_alloc(ret->bnx_sc,
5303 	    BGE_RX_RTN_RING_SZ(BNX_RETURN_RING_CNT),
5304 	    &ret->bnx_rx_ret_ring_tag,
5305 	    &ret->bnx_rx_ret_ring_map,
5306 	    (void *)&ret->bnx_rx_ret_ring,
5307 	    &ret->bnx_rx_ret_ring_paddr);
5308 	if (error) {
5309 		device_printf(ret->bnx_sc->bnx_dev,
5310 		    "could not create RX ret ring\n");
5311 		return error;
5312 	}
5313 
5314 	/* Shadow standard ring's RX mbuf DMA tag */
5315 	ret->bnx_rx_mtag = ret->bnx_std->bnx_rx_mtag;
5316 
5317 	/*
5318 	 * Create tmp DMA map for RX mbufs.
5319 	 */
5320 	error = bus_dmamap_create(ret->bnx_rx_mtag, BUS_DMA_WAITOK,
5321 	    &ret->bnx_rx_tmpmap);
5322 	if (error) {
5323 		device_printf(ret->bnx_sc->bnx_dev,
5324 		    "could not create tmp RX mbuf DMA map\n");
5325 		ret->bnx_rx_mtag = NULL;
5326 		return error;
5327 	}
5328 	return 0;
5329 }
5330 
5331 static void
5332 bnx_destroy_rx_ret_ring(struct bnx_rx_ret_ring *ret)
5333 {
5334 	/* Destroy tmp RX mbuf DMA map */
5335 	if (ret->bnx_rx_mtag != NULL)
5336 		bus_dmamap_destroy(ret->bnx_rx_mtag, ret->bnx_rx_tmpmap);
5337 
5338 	/* Destroy RX return ring */
5339 	bnx_dma_block_free(ret->bnx_rx_ret_ring_tag,
5340 	    ret->bnx_rx_ret_ring_map, ret->bnx_rx_ret_ring);
5341 }
5342 
5343 static int
5344 bnx_alloc_intr(struct bnx_softc *sc)
5345 {
5346 	struct bnx_intr_data *intr;
5347 	u_int intr_flags;
5348 	int error;
5349 
5350 	if (sc->bnx_intr_cnt > 1) {
5351 		error = bnx_alloc_msix(sc);
5352 		if (error)
5353 			return error;
5354 		KKASSERT(sc->bnx_intr_type == PCI_INTR_TYPE_MSIX);
5355 		return 0;
5356 	}
5357 
5358 	KKASSERT(sc->bnx_intr_cnt == 1);
5359 
5360 	intr = &sc->bnx_intr_data[0];
5361 	intr->bnx_ret = &sc->bnx_rx_ret_ring[0];
5362 	intr->bnx_txr = &sc->bnx_tx_ring[0];
5363 	intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5364 	intr->bnx_intr_check = bnx_check_intr_rxtx;
5365 	intr->bnx_saved_status_tag = &intr->bnx_ret->bnx_saved_status_tag;
5366 
5367 	sc->bnx_intr_type = pci_alloc_1intr(sc->bnx_dev, bnx_msi_enable,
5368 	    &intr->bnx_intr_rid, &intr_flags);
5369 
5370 	intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev, SYS_RES_IRQ,
5371 	    &intr->bnx_intr_rid, intr_flags);
5372 	if (intr->bnx_intr_res == NULL) {
5373 		device_printf(sc->bnx_dev, "could not alloc interrupt\n");
5374 		return ENXIO;
5375 	}
5376 
5377 	if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI) {
5378 		bnx_enable_msi(sc, FALSE);
5379 		intr->bnx_intr_func = bnx_msi;
5380 		if (bootverbose)
5381 			device_printf(sc->bnx_dev, "oneshot MSI\n");
5382 	} else {
5383 		intr->bnx_intr_func = bnx_intr_legacy;
5384 	}
5385 	intr->bnx_intr_arg = sc;
5386 	intr->bnx_intr_cpuid = rman_get_cpuid(intr->bnx_intr_res);
5387 
5388 	intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5389 
5390 	return 0;
5391 }
5392 
5393 static int
5394 bnx_setup_intr(struct bnx_softc *sc)
5395 {
5396 	int error, i;
5397 
5398 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
5399 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5400 
5401 		error = bus_setup_intr_descr(sc->bnx_dev, intr->bnx_intr_res,
5402 		    INTR_MPSAFE, intr->bnx_intr_func, intr->bnx_intr_arg,
5403 		    &intr->bnx_intr_hand, intr->bnx_intr_serialize,
5404 		    intr->bnx_intr_desc);
5405 		if (error) {
5406 			device_printf(sc->bnx_dev,
5407 			    "could not set up %dth intr\n", i);
5408 			bnx_teardown_intr(sc, i);
5409 			return error;
5410 		}
5411 	}
5412 	return 0;
5413 }
5414 
5415 static void
5416 bnx_teardown_intr(struct bnx_softc *sc, int cnt)
5417 {
5418 	int i;
5419 
5420 	for (i = 0; i < cnt; ++i) {
5421 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
5422 
5423 		bus_teardown_intr(sc->bnx_dev, intr->bnx_intr_res,
5424 		    intr->bnx_intr_hand);
5425 	}
5426 }
5427 
5428 static void
5429 bnx_free_intr(struct bnx_softc *sc)
5430 {
5431 	if (sc->bnx_intr_type != PCI_INTR_TYPE_MSIX) {
5432 		struct bnx_intr_data *intr;
5433 
5434 		KKASSERT(sc->bnx_intr_cnt <= 1);
5435 		intr = &sc->bnx_intr_data[0];
5436 
5437 		if (intr->bnx_intr_res != NULL) {
5438 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
5439 			    intr->bnx_intr_rid, intr->bnx_intr_res);
5440 		}
5441 		if (sc->bnx_intr_type == PCI_INTR_TYPE_MSI)
5442 			pci_release_msi(sc->bnx_dev);
5443 	} else {
5444 		bnx_free_msix(sc, TRUE);
5445 	}
5446 }
5447 
5448 static void
5449 bnx_setup_serialize(struct bnx_softc *sc)
5450 {
5451 	int i, j;
5452 
5453 	/*
5454 	 * Allocate serializer array
5455 	 */
5456 
5457 	/* Main + RX STD + TX + RX RET */
5458 	sc->bnx_serialize_cnt = 1 + 1 + sc->bnx_tx_ringcnt + sc->bnx_rx_retcnt;
5459 
5460 	sc->bnx_serialize =
5461 	    kmalloc(sc->bnx_serialize_cnt * sizeof(struct lwkt_serialize *),
5462 	        M_DEVBUF, M_WAITOK | M_ZERO);
5463 
5464 	/*
5465 	 * Setup serializers
5466 	 *
5467 	 * NOTE: Order is critical
5468 	 */
5469 
5470 	i = 0;
5471 
5472 	KKASSERT(i < sc->bnx_serialize_cnt);
5473 	sc->bnx_serialize[i++] = &sc->bnx_main_serialize;
5474 
5475 	KKASSERT(i < sc->bnx_serialize_cnt);
5476 	sc->bnx_serialize[i++] = &sc->bnx_rx_std_ring.bnx_rx_std_serialize;
5477 
5478 	for (j = 0; j < sc->bnx_rx_retcnt; ++j) {
5479 		KKASSERT(i < sc->bnx_serialize_cnt);
5480 		sc->bnx_serialize[i++] =
5481 		    &sc->bnx_rx_ret_ring[j].bnx_rx_ret_serialize;
5482 	}
5483 
5484 	for (j = 0; j < sc->bnx_tx_ringcnt; ++j) {
5485 		KKASSERT(i < sc->bnx_serialize_cnt);
5486 		sc->bnx_serialize[i++] =
5487 		    &sc->bnx_tx_ring[j].bnx_tx_serialize;
5488 	}
5489 
5490 	KKASSERT(i == sc->bnx_serialize_cnt);
5491 }
5492 
5493 static void
5494 bnx_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
5495 {
5496 	struct bnx_softc *sc = ifp->if_softc;
5497 
5498 	ifnet_serialize_array_enter(sc->bnx_serialize,
5499 	    sc->bnx_serialize_cnt, slz);
5500 }
5501 
5502 static void
5503 bnx_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5504 {
5505 	struct bnx_softc *sc = ifp->if_softc;
5506 
5507 	ifnet_serialize_array_exit(sc->bnx_serialize,
5508 	    sc->bnx_serialize_cnt, slz);
5509 }
5510 
5511 static int
5512 bnx_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
5513 {
5514 	struct bnx_softc *sc = ifp->if_softc;
5515 
5516 	return ifnet_serialize_array_try(sc->bnx_serialize,
5517 	    sc->bnx_serialize_cnt, slz);
5518 }
5519 
5520 #ifdef INVARIANTS
5521 
5522 static void
5523 bnx_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
5524     boolean_t serialized)
5525 {
5526 	struct bnx_softc *sc = ifp->if_softc;
5527 
5528 	ifnet_serialize_array_assert(sc->bnx_serialize, sc->bnx_serialize_cnt,
5529 	    slz, serialized);
5530 }
5531 
5532 #endif	/* INVARIANTS */
5533 
5534 #ifdef IFPOLL_ENABLE
5535 
5536 static int
5537 bnx_sysctl_npoll_offset(SYSCTL_HANDLER_ARGS)
5538 {
5539 	struct bnx_softc *sc = (void *)arg1;
5540 	struct ifnet *ifp = &sc->arpcom.ac_if;
5541 	int error, off;
5542 
5543 	off = sc->bnx_npoll_rxoff;
5544 	error = sysctl_handle_int(oidp, &off, 0, req);
5545 	if (error || req->newptr == NULL)
5546 		return error;
5547 	if (off < 0)
5548 		return EINVAL;
5549 
5550 	ifnet_serialize_all(ifp);
5551 	if (off >= ncpus2 || off % sc->bnx_rx_retcnt != 0) {
5552 		error = EINVAL;
5553 	} else {
5554 		error = 0;
5555 		sc->bnx_npoll_txoff = off;
5556 		sc->bnx_npoll_rxoff = off;
5557 	}
5558 	ifnet_deserialize_all(ifp);
5559 
5560 	return error;
5561 }
5562 
5563 static int
5564 bnx_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
5565 {
5566 	struct bnx_softc *sc = (void *)arg1;
5567 	struct ifnet *ifp = &sc->arpcom.ac_if;
5568 	int error, off;
5569 
5570 	off = sc->bnx_npoll_rxoff;
5571 	error = sysctl_handle_int(oidp, &off, 0, req);
5572 	if (error || req->newptr == NULL)
5573 		return error;
5574 	if (off < 0)
5575 		return EINVAL;
5576 
5577 	ifnet_serialize_all(ifp);
5578 	if (off >= ncpus2 || off % sc->bnx_rx_retcnt != 0) {
5579 		error = EINVAL;
5580 	} else {
5581 		error = 0;
5582 		sc->bnx_npoll_rxoff = off;
5583 	}
5584 	ifnet_deserialize_all(ifp);
5585 
5586 	return error;
5587 }
5588 
5589 static int
5590 bnx_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
5591 {
5592 	struct bnx_softc *sc = (void *)arg1;
5593 	struct ifnet *ifp = &sc->arpcom.ac_if;
5594 	int error, off;
5595 
5596 	off = sc->bnx_npoll_txoff;
5597 	error = sysctl_handle_int(oidp, &off, 0, req);
5598 	if (error || req->newptr == NULL)
5599 		return error;
5600 	if (off < 0)
5601 		return EINVAL;
5602 
5603 	ifnet_serialize_all(ifp);
5604 	if (off >= ncpus2) {
5605 		error = EINVAL;
5606 	} else {
5607 		error = 0;
5608 		sc->bnx_npoll_txoff = off;
5609 	}
5610 	ifnet_deserialize_all(ifp);
5611 
5612 	return error;
5613 }
5614 
5615 #endif	/* IFPOLL_ENABLE */
5616 
5617 static void
5618 bnx_set_tick_cpuid(struct bnx_softc *sc, boolean_t polling)
5619 {
5620 	if (polling)
5621 		sc->bnx_tick_cpuid = 0; /* XXX */
5622 	else
5623 		sc->bnx_tick_cpuid = sc->bnx_intr_data[0].bnx_intr_cpuid;
5624 }
5625 
5626 static void
5627 bnx_rx_std_refill_ithread(void *xstd)
5628 {
5629 	struct bnx_rx_std_ring *std = xstd;
5630 	struct globaldata *gd = mycpu;
5631 
5632 	crit_enter_gd(gd);
5633 
5634 	while (!std->bnx_rx_std_stop) {
5635 		if (std->bnx_rx_std_refill) {
5636 			lwkt_serialize_handler_call(
5637 			    &std->bnx_rx_std_serialize,
5638 			    bnx_rx_std_refill, std, NULL);
5639 		}
5640 
5641 		crit_exit_gd(gd);
5642 		crit_enter_gd(gd);
5643 
5644 		atomic_poll_release_int(&std->bnx_rx_std_running);
5645 		cpu_mfence();
5646 
5647 		if (!std->bnx_rx_std_refill && !std->bnx_rx_std_stop) {
5648 			lwkt_deschedule_self(gd->gd_curthread);
5649 			lwkt_switch();
5650 		}
5651 	}
5652 
5653 	crit_exit_gd(gd);
5654 
5655 	wakeup(std);
5656 
5657 	lwkt_exit();
5658 }
5659 
5660 static void
5661 bnx_rx_std_refill(void *xstd, void *frame __unused)
5662 {
5663 	struct bnx_rx_std_ring *std = xstd;
5664 	int cnt, refill_mask;
5665 
5666 again:
5667 	cnt = 0;
5668 
5669 	cpu_lfence();
5670 	refill_mask = std->bnx_rx_std_refill;
5671 	atomic_clear_int(&std->bnx_rx_std_refill, refill_mask);
5672 
5673 	while (refill_mask) {
5674 		uint16_t check_idx = std->bnx_rx_std;
5675 		int ret_idx;
5676 
5677 		ret_idx = bsfl(refill_mask);
5678 		for (;;) {
5679 			struct bnx_rx_buf *rb;
5680 			int refilled;
5681 
5682 			BNX_INC(check_idx, BGE_STD_RX_RING_CNT);
5683 			rb = &std->bnx_rx_std_buf[check_idx];
5684 			refilled = rb->bnx_rx_refilled;
5685 			cpu_lfence();
5686 			if (refilled) {
5687 				bnx_setup_rxdesc_std(std, check_idx);
5688 				std->bnx_rx_std = check_idx;
5689 				++cnt;
5690 				if (cnt >= 8) {
5691 					atomic_subtract_int(
5692 					    &std->bnx_rx_std_used, cnt);
5693 					bnx_writembx(std->bnx_sc,
5694 					    BGE_MBX_RX_STD_PROD_LO,
5695 					    std->bnx_rx_std);
5696 					cnt = 0;
5697 				}
5698 			} else {
5699 				break;
5700 			}
5701 		}
5702 		refill_mask &= ~(1 << ret_idx);
5703 	}
5704 
5705 	if (cnt) {
5706 		atomic_subtract_int(&std->bnx_rx_std_used, cnt);
5707 		bnx_writembx(std->bnx_sc, BGE_MBX_RX_STD_PROD_LO,
5708 		    std->bnx_rx_std);
5709 	}
5710 
5711 	if (std->bnx_rx_std_refill)
5712 		goto again;
5713 
5714 	atomic_poll_release_int(&std->bnx_rx_std_running);
5715 	cpu_mfence();
5716 
5717 	if (std->bnx_rx_std_refill)
5718 		goto again;
5719 }
5720 
5721 static int
5722 bnx_sysctl_std_refill(SYSCTL_HANDLER_ARGS)
5723 {
5724 	struct bnx_softc *sc = (void *)arg1;
5725 	struct ifnet *ifp = &sc->arpcom.ac_if;
5726 	struct bnx_rx_ret_ring *ret = &sc->bnx_rx_ret_ring[0];
5727 	int error, cntmax, i;
5728 
5729 	cntmax = ret->bnx_rx_cntmax;
5730 	error = sysctl_handle_int(oidp, &cntmax, 0, req);
5731 	if (error || req->newptr == NULL)
5732 		return error;
5733 
5734 	ifnet_serialize_all(ifp);
5735 
5736 	if ((cntmax * sc->bnx_rx_retcnt) >= BGE_STD_RX_RING_CNT / 2) {
5737 		error = EINVAL;
5738 		goto back;
5739 	}
5740 
5741 	for (i = 0; i < sc->bnx_tx_ringcnt; ++i)
5742 		sc->bnx_rx_ret_ring[i].bnx_rx_cntmax = cntmax;
5743 	error = 0;
5744 
5745 back:
5746 	ifnet_deserialize_all(ifp);
5747 
5748 	return error;
5749 }
5750 
5751 static void
5752 bnx_init_rss(struct bnx_softc *sc)
5753 {
5754 	uint8_t key[BGE_RSS_KEYREG_CNT * BGE_RSS_KEYREG_SIZE];
5755 	int i, j, r;
5756 
5757 	KKASSERT(BNX_RSS_ENABLED(sc));
5758 
5759 	/*
5760 	 * Configure RSS redirect table in following fashion:
5761 	 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
5762 	 */
5763 	r = 0;
5764 	for (j = 0; j < BGE_RSS_INDIR_TBL_CNT; ++j) {
5765 		uint32_t tbl = 0;
5766 
5767 		for (i = 0; i < BGE_RSS_INDIR_TBLENT_CNT; ++i) {
5768 			uint32_t q;
5769 
5770 			q = r % sc->bnx_rx_retcnt;
5771 			tbl |= q << (BGE_RSS_INDIR_TBLENT_SHIFT *
5772 			    (BGE_RSS_INDIR_TBLENT_CNT - i - 1));
5773 			++r;
5774 		}
5775 
5776 		BNX_RSS_DPRINTF(sc, 1, "tbl%d %08x\n", j, tbl);
5777 		CSR_WRITE_4(sc, BGE_RSS_INDIR_TBL(j), tbl);
5778 	}
5779 
5780 	toeplitz_get_key(key, sizeof(key));
5781 	for (i = 0; i < BGE_RSS_KEYREG_CNT; ++i) {
5782 		uint32_t keyreg;
5783 
5784 		keyreg = BGE_RSS_KEYREG_VAL(key, i);
5785 
5786 		BNX_RSS_DPRINTF(sc, 1, "key%d %08x\n", i, keyreg);
5787 		CSR_WRITE_4(sc, BGE_RSS_KEYREG(i), keyreg);
5788 	}
5789 }
5790 
5791 static void
5792 bnx_setup_ring_cnt(struct bnx_softc *sc)
5793 {
5794 	int msix_enable, i, msix_cnt, msix_cnt2, ring_max;
5795 
5796 	sc->bnx_tx_ringcnt = 1;
5797 	sc->bnx_rx_retcnt = 1;
5798 	sc->bnx_intr_cnt = 1;
5799 
5800 	msix_enable = device_getenv_int(sc->bnx_dev, "msix.enable",
5801 	    bnx_msix_enable);
5802 	if (!msix_enable)
5803 		return;
5804 
5805 	if (ncpus2 == 1)
5806 		return;
5807 
5808 	msix_cnt = pci_msix_count(sc->bnx_dev);
5809 	if (msix_cnt <= 1)
5810 		return;
5811 
5812 	i = 0;
5813 	while ((1 << (i + 1)) <= msix_cnt)
5814 		++i;
5815 	msix_cnt2 = 1 << i;
5816 
5817 	/*
5818 	 * One MSI-X vector is dedicated to status or single TX queue,
5819 	 * so make sure that there are enough MSI-X vectors.
5820 	 */
5821 	if (msix_cnt == msix_cnt2) {
5822 		/*
5823 		 * XXX
5824 		 * This probably will not happen; 57785/5718 families
5825 		 * come with at least 5 MSI-X vectors.
5826 		 */
5827 		msix_cnt2 >>= 1;
5828 		if (msix_cnt2 <= 1) {
5829 			device_printf(sc->bnx_dev,
5830 			    "MSI-X count %d could not be used\n", msix_cnt);
5831 			return;
5832 		}
5833 		device_printf(sc->bnx_dev, "MSI-X count %d is power of 2\n",
5834 		    msix_cnt);
5835 	}
5836 
5837 	/*
5838 	 * Setup RX ring count
5839 	 */
5840 	ring_max = BNX_RX_RING_MAX;
5841 	if (ring_max > msix_cnt2)
5842 		ring_max = msix_cnt2;
5843 	sc->bnx_rx_retcnt = device_getenv_int(sc->bnx_dev, "rx_rings",
5844 	    bnx_rx_rings);
5845 	sc->bnx_rx_retcnt = if_ring_count2(sc->bnx_rx_retcnt, ring_max);
5846 
5847 	if (sc->bnx_rx_retcnt == 1)
5848 		return;
5849 
5850 	/*
5851 	 * We need one extra MSI-X vector for link status or
5852 	 * TX ring (if only one TX ring is enabled).
5853 	 */
5854 	sc->bnx_intr_cnt = sc->bnx_rx_retcnt + 1;
5855 
5856 	/*
5857 	 * Setup TX ring count
5858 	 *
5859 	 * Currently only BCM5719 and BCM5720 support multiple TX rings
5860 	 * and the TX ring count must be less than the RX ring count.
5861 	 */
5862 	if (sc->bnx_asicrev == BGE_ASICREV_BCM5719 ||
5863 	    sc->bnx_asicrev == BGE_ASICREV_BCM5720) {
5864 		ring_max = BNX_TX_RING_MAX;
5865 		if (ring_max > msix_cnt2)
5866 			ring_max = msix_cnt2;
5867 		if (ring_max > sc->bnx_rx_retcnt)
5868 			ring_max = sc->bnx_rx_retcnt;
5869 		sc->bnx_tx_ringcnt = device_getenv_int(sc->bnx_dev, "tx_rings",
5870 		    bnx_tx_rings);
5871 		sc->bnx_tx_ringcnt = if_ring_count2(sc->bnx_tx_ringcnt,
5872 		    ring_max);
5873 	}
5874 }
5875 
5876 static int
5877 bnx_alloc_msix(struct bnx_softc *sc)
5878 {
5879 	struct bnx_intr_data *intr;
5880 	boolean_t setup = FALSE;
5881 	int error, i, offset, offset_def;
5882 
5883 	KKASSERT(sc->bnx_intr_cnt > 1);
5884 	KKASSERT(sc->bnx_intr_cnt == sc->bnx_rx_retcnt + 1);
5885 
5886 	if (sc->bnx_flags & BNX_FLAG_RXTX_BUNDLE) {
5887 		/*
5888 		 * Link status
5889 		 */
5890 		intr = &sc->bnx_intr_data[0];
5891 
5892 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5893 		intr->bnx_saved_status_tag = &sc->bnx_saved_status_tag;
5894 
5895 		intr->bnx_intr_func = bnx_msix_status;
5896 		intr->bnx_intr_arg = sc;
5897 		intr->bnx_intr_cpuid = 0; /* XXX */
5898 
5899 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5900 		    "%s sts", device_get_nameunit(sc->bnx_dev));
5901 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5902 
5903 		/*
5904 		 * RX/TX rings
5905 		 */
5906 		if (sc->bnx_rx_retcnt == ncpus2) {
5907 			offset = 0;
5908 		} else {
5909 			offset_def = (sc->bnx_rx_retcnt *
5910 			    device_get_unit(sc->bnx_dev)) % ncpus2;
5911 
5912 			offset = device_getenv_int(sc->bnx_dev,
5913 			    "msix.offset", offset_def);
5914 			if (offset >= ncpus2 ||
5915 			    offset % sc->bnx_rx_retcnt != 0) {
5916 				device_printf(sc->bnx_dev,
5917 				    "invalid msix.offset %d, use %d\n",
5918 				    offset, offset_def);
5919 				offset = offset_def;
5920 			}
5921 		}
5922 
5923 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
5924 			int idx = i - 1;
5925 
5926 			intr = &sc->bnx_intr_data[i];
5927 
5928 			KKASSERT(idx < sc->bnx_rx_retcnt);
5929 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
5930 			if (idx < sc->bnx_tx_ringcnt) {
5931 				intr->bnx_txr = &sc->bnx_tx_ring[idx];
5932 				intr->bnx_ret->bnx_txr = intr->bnx_txr;
5933 			}
5934 
5935 			intr->bnx_intr_serialize =
5936 			    &intr->bnx_ret->bnx_rx_ret_serialize;
5937 			intr->bnx_saved_status_tag =
5938 			    &intr->bnx_ret->bnx_saved_status_tag;
5939 
5940 			intr->bnx_intr_arg = intr->bnx_ret;
5941 			KKASSERT(idx + offset < ncpus2);
5942 			intr->bnx_intr_cpuid = idx + offset;
5943 
5944 			if (intr->bnx_txr == NULL) {
5945 				intr->bnx_intr_check = bnx_check_intr_rx;
5946 				intr->bnx_intr_func = bnx_msix_rx;
5947 				ksnprintf(intr->bnx_intr_desc0,
5948 				    sizeof(intr->bnx_intr_desc0), "%s rx%d",
5949 				    device_get_nameunit(sc->bnx_dev), idx);
5950 			} else {
5951 				intr->bnx_intr_check = bnx_check_intr_rxtx;
5952 				intr->bnx_intr_func = bnx_msix_rxtx;
5953 				ksnprintf(intr->bnx_intr_desc0,
5954 				    sizeof(intr->bnx_intr_desc0), "%s rxtx%d",
5955 				    device_get_nameunit(sc->bnx_dev), idx);
5956 
5957 				intr->bnx_txr->bnx_tx_cpuid =
5958 				    intr->bnx_intr_cpuid;
5959 			}
5960 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
5961 
5962 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
5963 		}
5964 	} else {
5965 		/*
5966 		 * TX ring and link status
5967 		 */
5968 		offset_def = device_get_unit(sc->bnx_dev) % ncpus2;
5969 		offset = device_getenv_int(sc->bnx_dev, "msix.txoff",
5970 		    offset_def);
5971 		if (offset >= ncpus2) {
5972 			device_printf(sc->bnx_dev,
5973 			    "invalid msix.txoff %d, use %d\n",
5974 			    offset, offset_def);
5975 			offset = offset_def;
5976 		}
5977 
5978 		intr = &sc->bnx_intr_data[0];
5979 
5980 		intr->bnx_txr = &sc->bnx_tx_ring[0];
5981 		intr->bnx_intr_serialize = &sc->bnx_main_serialize;
5982 		intr->bnx_intr_check = bnx_check_intr_tx;
5983 		intr->bnx_saved_status_tag =
5984 		    &intr->bnx_txr->bnx_saved_status_tag;
5985 
5986 		intr->bnx_intr_func = bnx_msix_tx_status;
5987 		intr->bnx_intr_arg = intr->bnx_txr;
5988 		intr->bnx_intr_cpuid = offset;
5989 
5990 		ksnprintf(intr->bnx_intr_desc0, sizeof(intr->bnx_intr_desc0),
5991 		    "%s ststx", device_get_nameunit(sc->bnx_dev));
5992 		intr->bnx_intr_desc = intr->bnx_intr_desc0;
5993 
5994 		intr->bnx_txr->bnx_tx_cpuid = intr->bnx_intr_cpuid;
5995 
5996 		/*
5997 		 * RX rings
5998 		 */
5999 		if (sc->bnx_rx_retcnt == ncpus2) {
6000 			offset = 0;
6001 		} else {
6002 			offset_def = (sc->bnx_rx_retcnt *
6003 			    device_get_unit(sc->bnx_dev)) % ncpus2;
6004 
6005 			offset = device_getenv_int(sc->bnx_dev,
6006 			    "msix.rxoff", offset_def);
6007 			if (offset >= ncpus2 ||
6008 			    offset % sc->bnx_rx_retcnt != 0) {
6009 				device_printf(sc->bnx_dev,
6010 				    "invalid msix.rxoff %d, use %d\n",
6011 				    offset, offset_def);
6012 				offset = offset_def;
6013 			}
6014 		}
6015 
6016 		for (i = 1; i < sc->bnx_intr_cnt; ++i) {
6017 			int idx = i - 1;
6018 
6019 			intr = &sc->bnx_intr_data[i];
6020 
6021 			KKASSERT(idx < sc->bnx_rx_retcnt);
6022 			intr->bnx_ret = &sc->bnx_rx_ret_ring[idx];
6023 			intr->bnx_intr_serialize =
6024 			    &intr->bnx_ret->bnx_rx_ret_serialize;
6025 			intr->bnx_intr_check = bnx_check_intr_rx;
6026 			intr->bnx_saved_status_tag =
6027 			    &intr->bnx_ret->bnx_saved_status_tag;
6028 
6029 			intr->bnx_intr_func = bnx_msix_rx;
6030 			intr->bnx_intr_arg = intr->bnx_ret;
6031 			KKASSERT(idx + offset < ncpus2);
6032 			intr->bnx_intr_cpuid = idx + offset;
6033 
6034 			ksnprintf(intr->bnx_intr_desc0,
6035 			    sizeof(intr->bnx_intr_desc0), "%s rx%d",
6036 			    device_get_nameunit(sc->bnx_dev), idx);
6037 			intr->bnx_intr_desc = intr->bnx_intr_desc0;
6038 
6039 			intr->bnx_ret->bnx_msix_mbx = intr->bnx_intr_mbx;
6040 		}
6041 	}
6042 
6043 	if (BNX_IS_5717_PLUS(sc)) {
6044 		sc->bnx_msix_mem_rid = PCIR_BAR(4);
6045 	} else {
6046 		if (sc->bnx_res2 == NULL)
6047 			sc->bnx_msix_mem_rid = PCIR_BAR(2);
6048 	}
6049 	if (sc->bnx_msix_mem_rid != 0) {
6050 		sc->bnx_msix_mem_res = bus_alloc_resource_any(sc->bnx_dev,
6051 		    SYS_RES_MEMORY, &sc->bnx_msix_mem_rid, RF_ACTIVE);
6052 		if (sc->bnx_msix_mem_res == NULL) {
6053 			device_printf(sc->bnx_dev,
6054 			    "could not alloc MSI-X table\n");
6055 			return ENXIO;
6056 		}
6057 	}
6058 
6059 	bnx_enable_msi(sc, TRUE);
6060 
6061 	error = pci_setup_msix(sc->bnx_dev);
6062 	if (error) {
6063 		device_printf(sc->bnx_dev, "could not setup MSI-X\n");
6064 		goto back;
6065 	}
6066 	setup = TRUE;
6067 
6068 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
6069 		intr = &sc->bnx_intr_data[i];
6070 
6071 		error = pci_alloc_msix_vector(sc->bnx_dev, i,
6072 		    &intr->bnx_intr_rid, intr->bnx_intr_cpuid);
6073 		if (error) {
6074 			device_printf(sc->bnx_dev,
6075 			    "could not alloc MSI-X %d on cpu%d\n",
6076 			    i, intr->bnx_intr_cpuid);
6077 			goto back;
6078 		}
6079 
6080 		intr->bnx_intr_res = bus_alloc_resource_any(sc->bnx_dev,
6081 		    SYS_RES_IRQ, &intr->bnx_intr_rid, RF_ACTIVE);
6082 		if (intr->bnx_intr_res == NULL) {
6083 			device_printf(sc->bnx_dev,
6084 			    "could not alloc MSI-X %d resource\n", i);
6085 			error = ENXIO;
6086 			goto back;
6087 		}
6088 	}
6089 
6090 	pci_enable_msix(sc->bnx_dev);
6091 	sc->bnx_intr_type = PCI_INTR_TYPE_MSIX;
6092 back:
6093 	if (error)
6094 		bnx_free_msix(sc, setup);
6095 	return error;
6096 }
6097 
6098 static void
6099 bnx_free_msix(struct bnx_softc *sc, boolean_t setup)
6100 {
6101 	int i;
6102 
6103 	KKASSERT(sc->bnx_intr_cnt > 1);
6104 
6105 	for (i = 0; i < sc->bnx_intr_cnt; ++i) {
6106 		struct bnx_intr_data *intr = &sc->bnx_intr_data[i];
6107 
6108 		if (intr->bnx_intr_res != NULL) {
6109 			bus_release_resource(sc->bnx_dev, SYS_RES_IRQ,
6110 			    intr->bnx_intr_rid, intr->bnx_intr_res);
6111 		}
6112 		if (intr->bnx_intr_rid >= 0) {
6113 			pci_release_msix_vector(sc->bnx_dev,
6114 			    intr->bnx_intr_rid);
6115 		}
6116 	}
6117 	if (setup)
6118 		pci_teardown_msix(sc->bnx_dev);
6119 }
6120 
6121 static void
6122 bnx_rx_std_refill_sched_ipi(void *xret)
6123 {
6124 	struct bnx_rx_ret_ring *ret = xret;
6125 	struct bnx_rx_std_ring *std = ret->bnx_std;
6126 	struct globaldata *gd = mycpu;
6127 
6128 	crit_enter_gd(gd);
6129 
6130 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6131 	cpu_sfence();
6132 
6133 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6134 	lwkt_schedule(std->bnx_rx_std_ithread);
6135 
6136 	crit_exit_gd(gd);
6137 }
6138 
6139 static void
6140 bnx_rx_std_refill_stop(void *xstd)
6141 {
6142 	struct bnx_rx_std_ring *std = xstd;
6143 	struct globaldata *gd = mycpu;
6144 
6145 	crit_enter_gd(gd);
6146 
6147 	std->bnx_rx_std_stop = 1;
6148 	cpu_sfence();
6149 
6150 	KKASSERT(std->bnx_rx_std_ithread->td_gd == gd);
6151 	lwkt_schedule(std->bnx_rx_std_ithread);
6152 
6153 	crit_exit_gd(gd);
6154 }
6155 
6156 static void
6157 bnx_serialize_skipmain(struct bnx_softc *sc)
6158 {
6159 	lwkt_serialize_array_enter(sc->bnx_serialize,
6160 	    sc->bnx_serialize_cnt, 1);
6161 }
6162 
6163 static void
6164 bnx_deserialize_skipmain(struct bnx_softc *sc)
6165 {
6166 	lwkt_serialize_array_exit(sc->bnx_serialize,
6167 	    sc->bnx_serialize_cnt, 1);
6168 }
6169 
6170 static void
6171 bnx_rx_std_refill_sched(struct bnx_rx_ret_ring *ret,
6172     struct bnx_rx_std_ring *std)
6173 {
6174 	struct globaldata *gd = mycpu;
6175 
6176 	ret->bnx_rx_cnt = 0;
6177 	cpu_sfence();
6178 
6179 	crit_enter_gd(gd);
6180 
6181 	atomic_set_int(&std->bnx_rx_std_refill, ret->bnx_rx_mask);
6182 	cpu_sfence();
6183 	if (atomic_poll_acquire_int(&std->bnx_rx_std_running)) {
6184 		if (std->bnx_rx_std_ithread->td_gd == gd) {
6185 			lwkt_schedule(std->bnx_rx_std_ithread);
6186 		} else {
6187 			lwkt_send_ipiq(std->bnx_rx_std_ithread->td_gd,
6188 			    bnx_rx_std_refill_sched_ipi, ret);
6189 		}
6190 	}
6191 
6192 	crit_exit_gd(gd);
6193 }
6194 
6195 static struct pktinfo *
6196 bnx_rss_info(struct pktinfo *pi, const struct bge_rx_bd *cur_rx)
6197 {
6198 	/* Don't pick up IPv6 packet */
6199 	if (cur_rx->bge_flags & BGE_RXBDFLAG_IPV6)
6200 		return NULL;
6201 
6202 	/* Don't pick up IP packet w/o IP checksum */
6203 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_IP_CSUM) == 0 ||
6204 	    (cur_rx->bge_error_flag & BGE_RXERRFLAG_IP_CSUM_NOK))
6205 		return NULL;
6206 
6207 	/* Don't pick up IP packet w/o TCP/UDP checksum */
6208 	if ((cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_CSUM) == 0)
6209 		return NULL;
6210 
6211 	/* May be IP fragment */
6212 	if (cur_rx->bge_tcp_udp_csum != 0xffff)
6213 		return NULL;
6214 
6215 	if (cur_rx->bge_flags & BGE_RXBDFLAG_TCP_UDP_IS_TCP)
6216 		pi->pi_l3proto = IPPROTO_TCP;
6217 	else
6218 		pi->pi_l3proto = IPPROTO_UDP;
6219 	pi->pi_netisr = NETISR_IP;
6220 	pi->pi_flags = 0;
6221 
6222 	return pi;
6223 }
6224 
6225 static void
6226 bnx_sig_pre_reset(struct bnx_softc *sc, int type)
6227 {
6228 	if (type == BNX_RESET_START || type == BNX_RESET_SUSPEND)
6229 		bnx_ape_driver_state_change(sc, type);
6230 }
6231 
6232 static void
6233 bnx_sig_post_reset(struct bnx_softc *sc, int type)
6234 {
6235 	if (type == BNX_RESET_SHUTDOWN)
6236 		bnx_ape_driver_state_change(sc, type);
6237 }
6238 
6239 /*
6240  * Clear all stale locks and select the lock for this driver instance.
6241  */
6242 static void
6243 bnx_ape_lock_init(struct bnx_softc *sc)
6244 {
6245 	uint32_t bit, regbase;
6246 	int i;
6247 
6248 	regbase = BGE_APE_PER_LOCK_GRANT;
6249 
6250 	/* Clear any stale locks. */
6251 	for (i = BGE_APE_LOCK_PHY0; i <= BGE_APE_LOCK_GPIO; i++) {
6252 		switch (i) {
6253 		case BGE_APE_LOCK_PHY0:
6254 		case BGE_APE_LOCK_PHY1:
6255 		case BGE_APE_LOCK_PHY2:
6256 		case BGE_APE_LOCK_PHY3:
6257 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6258 			break;
6259 
6260 		default:
6261 			if (sc->bnx_func_addr == 0)
6262 				bit = BGE_APE_LOCK_GRANT_DRIVER0;
6263 			else
6264 				bit = 1 << sc->bnx_func_addr;
6265 			break;
6266 		}
6267 		APE_WRITE_4(sc, regbase + 4 * i, bit);
6268 	}
6269 
6270 	/* Select the PHY lock based on the device's function number. */
6271 	switch (sc->bnx_func_addr) {
6272 	case 0:
6273 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY0;
6274 		break;
6275 
6276 	case 1:
6277 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY1;
6278 		break;
6279 
6280 	case 2:
6281 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY2;
6282 		break;
6283 
6284 	case 3:
6285 		sc->bnx_phy_ape_lock = BGE_APE_LOCK_PHY3;
6286 		break;
6287 
6288 	default:
6289 		device_printf(sc->bnx_dev,
6290 		    "PHY lock not supported on this function\n");
6291 		break;
6292 	}
6293 }
6294 
6295 /*
6296  * Check for APE firmware, set flags, and print version info.
6297  */
6298 static void
6299 bnx_ape_read_fw_ver(struct bnx_softc *sc)
6300 {
6301 	const char *fwtype;
6302 	uint32_t apedata, features;
6303 
6304 	/* Check for a valid APE signature in shared memory. */
6305 	apedata = APE_READ_4(sc, BGE_APE_SEG_SIG);
6306 	if (apedata != BGE_APE_SEG_SIG_MAGIC) {
6307 		device_printf(sc->bnx_dev, "no APE signature\n");
6308 		sc->bnx_mfw_flags &= ~BNX_MFW_ON_APE;
6309 		return;
6310 	}
6311 
6312 	/* Check if APE firmware is running. */
6313 	apedata = APE_READ_4(sc, BGE_APE_FW_STATUS);
6314 	if ((apedata & BGE_APE_FW_STATUS_READY) == 0) {
6315 		device_printf(sc->bnx_dev, "APE signature found "
6316 		    "but FW status not ready! 0x%08x\n", apedata);
6317 		return;
6318 	}
6319 
6320 	sc->bnx_mfw_flags |= BNX_MFW_ON_APE;
6321 
6322 	/* Fetch the APE firwmare type and version. */
6323 	apedata = APE_READ_4(sc, BGE_APE_FW_VERSION);
6324 	features = APE_READ_4(sc, BGE_APE_FW_FEATURES);
6325 	if (features & BGE_APE_FW_FEATURE_NCSI) {
6326 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_NCSI;
6327 		fwtype = "NCSI";
6328 	} else if (features & BGE_APE_FW_FEATURE_DASH) {
6329 		sc->bnx_mfw_flags |= BNX_MFW_TYPE_DASH;
6330 		fwtype = "DASH";
6331 	} else {
6332 		fwtype = "UNKN";
6333 	}
6334 
6335 	/* Print the APE firmware version. */
6336 	device_printf(sc->bnx_dev, "APE FW version: %s v%d.%d.%d.%d\n",
6337 	    fwtype,
6338 	    (apedata & BGE_APE_FW_VERSION_MAJMSK) >> BGE_APE_FW_VERSION_MAJSFT,
6339 	    (apedata & BGE_APE_FW_VERSION_MINMSK) >> BGE_APE_FW_VERSION_MINSFT,
6340 	    (apedata & BGE_APE_FW_VERSION_REVMSK) >> BGE_APE_FW_VERSION_REVSFT,
6341 	    (apedata & BGE_APE_FW_VERSION_BLDMSK));
6342 }
6343 
6344 static int
6345 bnx_ape_lock(struct bnx_softc *sc, int locknum)
6346 {
6347 	uint32_t bit, gnt, req, status;
6348 	int i, off;
6349 
6350 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6351 		return 0;
6352 
6353 	/* Lock request/grant registers have different bases. */
6354 	req = BGE_APE_PER_LOCK_REQ;
6355 	gnt = BGE_APE_PER_LOCK_GRANT;
6356 
6357 	off = 4 * locknum;
6358 
6359 	switch (locknum) {
6360 	case BGE_APE_LOCK_GPIO:
6361 		/* Lock required when using GPIO. */
6362 		if (sc->bnx_func_addr == 0)
6363 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6364 		else
6365 			bit = 1 << sc->bnx_func_addr;
6366 		break;
6367 
6368 	case BGE_APE_LOCK_GRC:
6369 		/* Lock required to reset the device. */
6370 		if (sc->bnx_func_addr == 0)
6371 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6372 		else
6373 			bit = 1 << sc->bnx_func_addr;
6374 		break;
6375 
6376 	case BGE_APE_LOCK_MEM:
6377 		/* Lock required when accessing certain APE memory. */
6378 		if (sc->bnx_func_addr == 0)
6379 			bit = BGE_APE_LOCK_REQ_DRIVER0;
6380 		else
6381 			bit = 1 << sc->bnx_func_addr;
6382 		break;
6383 
6384 	case BGE_APE_LOCK_PHY0:
6385 	case BGE_APE_LOCK_PHY1:
6386 	case BGE_APE_LOCK_PHY2:
6387 	case BGE_APE_LOCK_PHY3:
6388 		/* Lock required when accessing PHYs. */
6389 		bit = BGE_APE_LOCK_REQ_DRIVER0;
6390 		break;
6391 
6392 	default:
6393 		return EINVAL;
6394 	}
6395 
6396 	/* Request a lock. */
6397 	APE_WRITE_4(sc, req + off, bit);
6398 
6399 	/* Wait up to 1 second to acquire lock. */
6400 	for (i = 0; i < 20000; i++) {
6401 		status = APE_READ_4(sc, gnt + off);
6402 		if (status == bit)
6403 			break;
6404 		DELAY(50);
6405 	}
6406 
6407 	/* Handle any errors. */
6408 	if (status != bit) {
6409 		if_printf(&sc->arpcom.ac_if, "APE lock %d request failed! "
6410 		    "request = 0x%04x[0x%04x], status = 0x%04x[0x%04x]\n",
6411 		    locknum, req + off, bit & 0xFFFF, gnt + off,
6412 		    status & 0xFFFF);
6413 		/* Revoke the lock request. */
6414 		APE_WRITE_4(sc, gnt + off, bit);
6415 		return EBUSY;
6416 	}
6417 
6418 	return 0;
6419 }
6420 
6421 static void
6422 bnx_ape_unlock(struct bnx_softc *sc, int locknum)
6423 {
6424 	uint32_t bit, gnt;
6425 	int off;
6426 
6427 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6428 		return;
6429 
6430 	gnt = BGE_APE_PER_LOCK_GRANT;
6431 
6432 	off = 4 * locknum;
6433 
6434 	switch (locknum) {
6435 	case BGE_APE_LOCK_GPIO:
6436 		if (sc->bnx_func_addr == 0)
6437 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6438 		else
6439 			bit = 1 << sc->bnx_func_addr;
6440 		break;
6441 
6442 	case BGE_APE_LOCK_GRC:
6443 		if (sc->bnx_func_addr == 0)
6444 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6445 		else
6446 			bit = 1 << sc->bnx_func_addr;
6447 		break;
6448 
6449 	case BGE_APE_LOCK_MEM:
6450 		if (sc->bnx_func_addr == 0)
6451 			bit = BGE_APE_LOCK_GRANT_DRIVER0;
6452 		else
6453 			bit = 1 << sc->bnx_func_addr;
6454 		break;
6455 
6456 	case BGE_APE_LOCK_PHY0:
6457 	case BGE_APE_LOCK_PHY1:
6458 	case BGE_APE_LOCK_PHY2:
6459 	case BGE_APE_LOCK_PHY3:
6460 		bit = BGE_APE_LOCK_GRANT_DRIVER0;
6461 		break;
6462 
6463 	default:
6464 		return;
6465 	}
6466 
6467 	APE_WRITE_4(sc, gnt + off, bit);
6468 }
6469 
6470 /*
6471  * Send an event to the APE firmware.
6472  */
6473 static void
6474 bnx_ape_send_event(struct bnx_softc *sc, uint32_t event)
6475 {
6476 	uint32_t apedata;
6477 	int i;
6478 
6479 	/* NCSI does not support APE events. */
6480 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6481 		return;
6482 
6483 	/* Wait up to 1ms for APE to service previous event. */
6484 	for (i = 10; i > 0; i--) {
6485 		if (bnx_ape_lock(sc, BGE_APE_LOCK_MEM) != 0)
6486 			break;
6487 		apedata = APE_READ_4(sc, BGE_APE_EVENT_STATUS);
6488 		if ((apedata & BGE_APE_EVENT_STATUS_EVENT_PENDING) == 0) {
6489 			APE_WRITE_4(sc, BGE_APE_EVENT_STATUS, event |
6490 			    BGE_APE_EVENT_STATUS_EVENT_PENDING);
6491 			bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6492 			APE_WRITE_4(sc, BGE_APE_EVENT, BGE_APE_EVENT_1);
6493 			break;
6494 		}
6495 		bnx_ape_unlock(sc, BGE_APE_LOCK_MEM);
6496 		DELAY(100);
6497 	}
6498 	if (i == 0) {
6499 		if_printf(&sc->arpcom.ac_if,
6500 		    "APE event 0x%08x send timed out\n", event);
6501 	}
6502 }
6503 
6504 static void
6505 bnx_ape_driver_state_change(struct bnx_softc *sc, int kind)
6506 {
6507 	uint32_t apedata, event;
6508 
6509 	if ((sc->bnx_mfw_flags & BNX_MFW_ON_APE) == 0)
6510 		return;
6511 
6512 	switch (kind) {
6513 	case BNX_RESET_START:
6514 		/* If this is the first load, clear the load counter. */
6515 		apedata = APE_READ_4(sc, BGE_APE_HOST_SEG_SIG);
6516 		if (apedata != BGE_APE_HOST_SEG_SIG_MAGIC) {
6517 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, 0);
6518 		} else {
6519 			apedata = APE_READ_4(sc, BGE_APE_HOST_INIT_COUNT);
6520 			APE_WRITE_4(sc, BGE_APE_HOST_INIT_COUNT, ++apedata);
6521 		}
6522 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_SIG,
6523 		    BGE_APE_HOST_SEG_SIG_MAGIC);
6524 		APE_WRITE_4(sc, BGE_APE_HOST_SEG_LEN,
6525 		    BGE_APE_HOST_SEG_LEN_MAGIC);
6526 
6527 		/* Add some version info if bnx(4) supports it. */
6528 		APE_WRITE_4(sc, BGE_APE_HOST_DRIVER_ID,
6529 		    BGE_APE_HOST_DRIVER_ID_MAGIC(1, 0));
6530 		APE_WRITE_4(sc, BGE_APE_HOST_BEHAVIOR,
6531 		    BGE_APE_HOST_BEHAV_NO_PHYLOCK);
6532 		APE_WRITE_4(sc, BGE_APE_HOST_HEARTBEAT_INT_MS,
6533 		    BGE_APE_HOST_HEARTBEAT_INT_DISABLE);
6534 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6535 		    BGE_APE_HOST_DRVR_STATE_START);
6536 		event = BGE_APE_EVENT_STATUS_STATE_START;
6537 		break;
6538 
6539 	case BNX_RESET_SHUTDOWN:
6540 		APE_WRITE_4(sc, BGE_APE_HOST_DRVR_STATE,
6541 		    BGE_APE_HOST_DRVR_STATE_UNLOAD);
6542 		event = BGE_APE_EVENT_STATUS_STATE_UNLOAD;
6543 		break;
6544 
6545 	case BNX_RESET_SUSPEND:
6546 		event = BGE_APE_EVENT_STATUS_STATE_SUSPEND;
6547 		break;
6548 
6549 	default:
6550 		return;
6551 	}
6552 
6553 	bnx_ape_send_event(sc, event | BGE_APE_EVENT_STATUS_DRIVER_EVNT |
6554 	    BGE_APE_EVENT_STATUS_STATE_CHNGE);
6555 }
6556