xref: /freebsd/sys/dev/e1000/if_em.c (revision aa0a1e58)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2010, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #if __FreeBSD_version >= 800000
43 #include <sys/buf_ring.h>
44 #endif
45 #include <sys/bus.h>
46 #include <sys/endian.h>
47 #include <sys/kernel.h>
48 #include <sys/kthread.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/rman.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/taskqueue.h>
57 #include <sys/eventhandler.h>
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 
61 #include <net/bpf.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <machine/in_cksum.h>
80 #include <dev/led/led.h>
81 #include <dev/pci/pcivar.h>
82 #include <dev/pci/pcireg.h>
83 
84 #include "e1000_api.h"
85 #include "e1000_82571.h"
86 #include "if_em.h"
87 
88 /*********************************************************************
89  *  Set this to one to display debug statistics
90  *********************************************************************/
91 int	em_display_debug_stats = 0;
92 
93 /*********************************************************************
94  *  Driver version:
95  *********************************************************************/
96 char em_driver_version[] = "7.2.2";
97 
98 /*********************************************************************
99  *  PCI Device ID Table
100  *
101  *  Used by probe to select devices to load on
102  *  Last field stores an index into e1000_strings
103  *  Last entry must be all 0s
104  *
105  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
106  *********************************************************************/
107 
108 static em_vendor_info_t em_vendor_info_array[] =
109 {
110 	/* Intel(R) PRO/1000 Network Connection */
111 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
112 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
114 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
115 						PCI_ANY_ID, PCI_ANY_ID, 0},
116 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
117 						PCI_ANY_ID, PCI_ANY_ID, 0},
118 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
119 						PCI_ANY_ID, PCI_ANY_ID, 0},
120 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
121 						PCI_ANY_ID, PCI_ANY_ID, 0},
122 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
123 						PCI_ANY_ID, PCI_ANY_ID, 0},
124 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
125 						PCI_ANY_ID, PCI_ANY_ID, 0},
126 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
130 
131 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
132 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
133 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
134 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
135 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
136 						PCI_ANY_ID, PCI_ANY_ID, 0},
137 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
138 						PCI_ANY_ID, PCI_ANY_ID, 0},
139 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
140 						PCI_ANY_ID, PCI_ANY_ID, 0},
141 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
142 						PCI_ANY_ID, PCI_ANY_ID, 0},
143 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
144 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
146 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
147 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
149 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
150 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
151 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
154 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
156 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
157 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
159 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
160 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
161 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
162 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
163 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
164 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
165 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
166 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
167 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
168 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
174 	/* required last entry */
175 	{ 0, 0, 0, 0, 0}
176 };
177 
178 /*********************************************************************
179  *  Table of branding strings for all supported NICs.
180  *********************************************************************/
181 
182 static char *em_strings[] = {
183 	"Intel(R) PRO/1000 Network Connection"
184 };
185 
186 /*********************************************************************
187  *  Function prototypes
188  *********************************************************************/
189 static int	em_probe(device_t);
190 static int	em_attach(device_t);
191 static int	em_detach(device_t);
192 static int	em_shutdown(device_t);
193 static int	em_suspend(device_t);
194 static int	em_resume(device_t);
195 static void	em_start(struct ifnet *);
196 static void	em_start_locked(struct ifnet *, struct tx_ring *);
197 #ifdef EM_MULTIQUEUE
198 static int	em_mq_start(struct ifnet *, struct mbuf *);
199 static int	em_mq_start_locked(struct ifnet *,
200 		    struct tx_ring *, struct mbuf *);
201 static void	em_qflush(struct ifnet *);
202 #endif
203 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
204 static void	em_init(void *);
205 static void	em_init_locked(struct adapter *);
206 static void	em_stop(void *);
207 static void	em_media_status(struct ifnet *, struct ifmediareq *);
208 static int	em_media_change(struct ifnet *);
209 static void	em_identify_hardware(struct adapter *);
210 static int	em_allocate_pci_resources(struct adapter *);
211 static int	em_allocate_legacy(struct adapter *);
212 static int	em_allocate_msix(struct adapter *);
213 static int	em_allocate_queues(struct adapter *);
214 static int	em_setup_msix(struct adapter *);
215 static void	em_free_pci_resources(struct adapter *);
216 static void	em_local_timer(void *);
217 static void	em_reset(struct adapter *);
218 static int	em_setup_interface(device_t, struct adapter *);
219 
220 static void	em_setup_transmit_structures(struct adapter *);
221 static void	em_initialize_transmit_unit(struct adapter *);
222 static int	em_allocate_transmit_buffers(struct tx_ring *);
223 static void	em_free_transmit_structures(struct adapter *);
224 static void	em_free_transmit_buffers(struct tx_ring *);
225 
226 static int	em_setup_receive_structures(struct adapter *);
227 static int	em_allocate_receive_buffers(struct rx_ring *);
228 static void	em_initialize_receive_unit(struct adapter *);
229 static void	em_free_receive_structures(struct adapter *);
230 static void	em_free_receive_buffers(struct rx_ring *);
231 
232 static void	em_enable_intr(struct adapter *);
233 static void	em_disable_intr(struct adapter *);
234 static void	em_update_stats_counters(struct adapter *);
235 static void	em_add_hw_stats(struct adapter *adapter);
236 static bool	em_txeof(struct tx_ring *);
237 static bool	em_rxeof(struct rx_ring *, int, int *);
238 #ifndef __NO_STRICT_ALIGNMENT
239 static int	em_fixup_rx(struct rx_ring *);
240 #endif
241 static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
242 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
243 		    struct ip *, u32 *, u32 *);
244 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
245 		    struct tcphdr *, u32 *, u32 *);
246 static void	em_set_promisc(struct adapter *);
247 static void	em_disable_promisc(struct adapter *);
248 static void	em_set_multi(struct adapter *);
249 static void	em_update_link_status(struct adapter *);
250 static void	em_refresh_mbufs(struct rx_ring *, int);
251 static void	em_register_vlan(void *, struct ifnet *, u16);
252 static void	em_unregister_vlan(void *, struct ifnet *, u16);
253 static void	em_setup_vlan_hw_support(struct adapter *);
254 static int	em_xmit(struct tx_ring *, struct mbuf **);
255 static int	em_dma_malloc(struct adapter *, bus_size_t,
256 		    struct em_dma_alloc *, int);
257 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
258 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
259 static void	em_print_nvm_info(struct adapter *);
260 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
261 static void	em_print_debug_info(struct adapter *);
262 static int 	em_is_valid_ether_addr(u8 *);
263 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
264 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
265 		    const char *, struct em_int_delay_info *, int, int);
266 /* Management and WOL Support */
267 static void	em_init_manageability(struct adapter *);
268 static void	em_release_manageability(struct adapter *);
269 static void     em_get_hw_control(struct adapter *);
270 static void     em_release_hw_control(struct adapter *);
271 static void	em_get_wakeup(device_t);
272 static void     em_enable_wakeup(device_t);
273 static int	em_enable_phy_wakeup(struct adapter *);
274 static void	em_led_func(void *, int);
275 static void	em_disable_aspm(struct adapter *);
276 
277 static int	em_irq_fast(void *);
278 
279 /* MSIX handlers */
280 static void	em_msix_tx(void *);
281 static void	em_msix_rx(void *);
282 static void	em_msix_link(void *);
283 static void	em_handle_tx(void *context, int pending);
284 static void	em_handle_rx(void *context, int pending);
285 static void	em_handle_link(void *context, int pending);
286 
287 static void	em_set_sysctl_value(struct adapter *, const char *,
288 		    const char *, int *, int);
289 
290 static __inline void em_rx_discard(struct rx_ring *, int);
291 
292 #ifdef DEVICE_POLLING
293 static poll_handler_t em_poll;
294 #endif /* POLLING */
295 
296 /*********************************************************************
297  *  FreeBSD Device Interface Entry Points
298  *********************************************************************/
299 
300 static device_method_t em_methods[] = {
301 	/* Device interface */
302 	DEVMETHOD(device_probe, em_probe),
303 	DEVMETHOD(device_attach, em_attach),
304 	DEVMETHOD(device_detach, em_detach),
305 	DEVMETHOD(device_shutdown, em_shutdown),
306 	DEVMETHOD(device_suspend, em_suspend),
307 	DEVMETHOD(device_resume, em_resume),
308 	{0, 0}
309 };
310 
311 static driver_t em_driver = {
312 	"em", em_methods, sizeof(struct adapter),
313 };
314 
315 devclass_t em_devclass;
316 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
317 MODULE_DEPEND(em, pci, 1, 1, 1);
318 MODULE_DEPEND(em, ether, 1, 1, 1);
319 
320 /*********************************************************************
321  *  Tunable default values.
322  *********************************************************************/
323 
324 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
325 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
326 #define M_TSO_LEN			66
327 
328 /* Allow common code without TSO */
329 #ifndef CSUM_TSO
330 #define CSUM_TSO	0
331 #endif
332 
333 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
334 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
335 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
336 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
337 
338 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
339 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
340 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
341 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
342 
343 static int em_rxd = EM_DEFAULT_RXD;
344 static int em_txd = EM_DEFAULT_TXD;
345 TUNABLE_INT("hw.em.rxd", &em_rxd);
346 TUNABLE_INT("hw.em.txd", &em_txd);
347 
348 static int em_smart_pwr_down = FALSE;
349 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
350 
351 /* Controls whether promiscuous also shows bad packets */
352 static int em_debug_sbp = FALSE;
353 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
354 
355 static int em_enable_msix = TRUE;
356 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
357 
358 /* How many packets rxeof tries to clean at a time */
359 static int em_rx_process_limit = 100;
360 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361 
362 /* Flow control setting - default to FULL */
363 static int em_fc_setting = e1000_fc_full;
364 TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365 
366 /* Energy efficient ethernet - default to OFF */
367 static int eee_setting = 0;
368 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
369 
370 /* Global used in WOL setup with multiport cards */
371 static int global_quad_port_a = 0;
372 
373 /*********************************************************************
374  *  Device identification routine
375  *
376  *  em_probe determines if the driver should be loaded on
377  *  adapter based on PCI vendor/device id of the adapter.
378  *
379  *  return BUS_PROBE_DEFAULT on success, positive on failure
380  *********************************************************************/
381 
382 static int
383 em_probe(device_t dev)
384 {
385 	char		adapter_name[60];
386 	u16		pci_vendor_id = 0;
387 	u16		pci_device_id = 0;
388 	u16		pci_subvendor_id = 0;
389 	u16		pci_subdevice_id = 0;
390 	em_vendor_info_t *ent;
391 
392 	INIT_DEBUGOUT("em_probe: begin");
393 
394 	pci_vendor_id = pci_get_vendor(dev);
395 	if (pci_vendor_id != EM_VENDOR_ID)
396 		return (ENXIO);
397 
398 	pci_device_id = pci_get_device(dev);
399 	pci_subvendor_id = pci_get_subvendor(dev);
400 	pci_subdevice_id = pci_get_subdevice(dev);
401 
402 	ent = em_vendor_info_array;
403 	while (ent->vendor_id != 0) {
404 		if ((pci_vendor_id == ent->vendor_id) &&
405 		    (pci_device_id == ent->device_id) &&
406 
407 		    ((pci_subvendor_id == ent->subvendor_id) ||
408 		    (ent->subvendor_id == PCI_ANY_ID)) &&
409 
410 		    ((pci_subdevice_id == ent->subdevice_id) ||
411 		    (ent->subdevice_id == PCI_ANY_ID))) {
412 			sprintf(adapter_name, "%s %s",
413 				em_strings[ent->index],
414 				em_driver_version);
415 			device_set_desc_copy(dev, adapter_name);
416 			return (BUS_PROBE_DEFAULT);
417 		}
418 		ent++;
419 	}
420 
421 	return (ENXIO);
422 }
423 
424 /*********************************************************************
425  *  Device initialization routine
426  *
427  *  The attach entry point is called when the driver is being loaded.
428  *  This routine identifies the type of hardware, allocates all resources
429  *  and initializes the hardware.
430  *
431  *  return 0 on success, positive on failure
432  *********************************************************************/
433 
434 static int
435 em_attach(device_t dev)
436 {
437 	struct adapter	*adapter;
438 	struct e1000_hw	*hw;
439 	int		error = 0;
440 
441 	INIT_DEBUGOUT("em_attach: begin");
442 
443 	adapter = device_get_softc(dev);
444 	adapter->dev = adapter->osdep.dev = dev;
445 	hw = &adapter->hw;
446 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
447 
448 	/* SYSCTL stuff */
449 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
452 	    em_sysctl_nvm_info, "I", "NVM Information");
453 
454 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457 	    em_sysctl_debug_info, "I", "Debug Information");
458 
459 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
460 
461 	/* Determine hardware and mac info */
462 	em_identify_hardware(adapter);
463 
464 	/* Setup PCI resources */
465 	if (em_allocate_pci_resources(adapter)) {
466 		device_printf(dev, "Allocation of PCI resources failed\n");
467 		error = ENXIO;
468 		goto err_pci;
469 	}
470 
471 	/*
472 	** For ICH8 and family we need to
473 	** map the flash memory, and this
474 	** must happen after the MAC is
475 	** identified
476 	*/
477 	if ((hw->mac.type == e1000_ich8lan) ||
478 	    (hw->mac.type == e1000_ich9lan) ||
479 	    (hw->mac.type == e1000_ich10lan) ||
480 	    (hw->mac.type == e1000_pchlan) ||
481 	    (hw->mac.type == e1000_pch2lan)) {
482 		int rid = EM_BAR_TYPE_FLASH;
483 		adapter->flash = bus_alloc_resource_any(dev,
484 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
485 		if (adapter->flash == NULL) {
486 			device_printf(dev, "Mapping of Flash failed\n");
487 			error = ENXIO;
488 			goto err_pci;
489 		}
490 		/* This is used in the shared code */
491 		hw->flash_address = (u8 *)adapter->flash;
492 		adapter->osdep.flash_bus_space_tag =
493 		    rman_get_bustag(adapter->flash);
494 		adapter->osdep.flash_bus_space_handle =
495 		    rman_get_bushandle(adapter->flash);
496 	}
497 
498 	/* Do Shared Code initialization */
499 	if (e1000_setup_init_funcs(hw, TRUE)) {
500 		device_printf(dev, "Setup of Shared code failed\n");
501 		error = ENXIO;
502 		goto err_pci;
503 	}
504 
505 	e1000_get_bus_info(hw);
506 
507 	/* Set up some sysctls for the tunable interrupt delays */
508 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
509 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
510 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
511 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
512 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
513 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
514 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
515 	    "receive interrupt delay limit in usecs",
516 	    &adapter->rx_abs_int_delay,
517 	    E1000_REGISTER(hw, E1000_RADV),
518 	    em_rx_abs_int_delay_dflt);
519 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
520 	    "transmit interrupt delay limit in usecs",
521 	    &adapter->tx_abs_int_delay,
522 	    E1000_REGISTER(hw, E1000_TADV),
523 	    em_tx_abs_int_delay_dflt);
524 
525 	/* Sysctl for limiting the amount of work done in the taskqueue */
526 	em_set_sysctl_value(adapter, "rx_processing_limit",
527 	    "max number of rx packets to process", &adapter->rx_process_limit,
528 	    em_rx_process_limit);
529 
530 	/* Sysctl for setting the interface flow control */
531 	em_set_sysctl_value(adapter, "flow_control",
532 	    "configure flow control",
533 	    &adapter->fc_setting, em_fc_setting);
534 
535 	/*
536 	 * Validate number of transmit and receive descriptors. It
537 	 * must not exceed hardware maximum, and must be multiple
538 	 * of E1000_DBA_ALIGN.
539 	 */
540 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
541 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
542 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
543 		    EM_DEFAULT_TXD, em_txd);
544 		adapter->num_tx_desc = EM_DEFAULT_TXD;
545 	} else
546 		adapter->num_tx_desc = em_txd;
547 
548 	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
549 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
550 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
551 		    EM_DEFAULT_RXD, em_rxd);
552 		adapter->num_rx_desc = EM_DEFAULT_RXD;
553 	} else
554 		adapter->num_rx_desc = em_rxd;
555 
556 	hw->mac.autoneg = DO_AUTO_NEG;
557 	hw->phy.autoneg_wait_to_complete = FALSE;
558 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
559 
560 	/* Copper options */
561 	if (hw->phy.media_type == e1000_media_type_copper) {
562 		hw->phy.mdix = AUTO_ALL_MODES;
563 		hw->phy.disable_polarity_correction = FALSE;
564 		hw->phy.ms_type = EM_MASTER_SLAVE;
565 	}
566 
567 	/*
568 	 * Set the frame limits assuming
569 	 * standard ethernet sized frames.
570 	 */
571 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
572 	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
573 
574 	/*
575 	 * This controls when hardware reports transmit completion
576 	 * status.
577 	 */
578 	hw->mac.report_tx_early = 1;
579 
580 	/*
581 	** Get queue/ring memory
582 	*/
583 	if (em_allocate_queues(adapter)) {
584 		error = ENOMEM;
585 		goto err_pci;
586 	}
587 
588 	/* Allocate multicast array memory. */
589 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
590 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
591 	if (adapter->mta == NULL) {
592 		device_printf(dev, "Can not allocate multicast setup array\n");
593 		error = ENOMEM;
594 		goto err_late;
595 	}
596 
597 	/* Check SOL/IDER usage */
598 	if (e1000_check_reset_block(hw))
599 		device_printf(dev, "PHY reset is blocked"
600 		    " due to SOL/IDER session.\n");
601 
602 	/* Sysctl for setting Energy Efficient Ethernet */
603 	em_set_sysctl_value(adapter, "eee_control",
604 	    "enable Energy Efficient Ethernet",
605 	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
606 
607 	/*
608 	** Start from a known state, this is
609 	** important in reading the nvm and
610 	** mac from that.
611 	*/
612 	e1000_reset_hw(hw);
613 
614 
615 	/* Make sure we have a good EEPROM before we read from it */
616 	if (e1000_validate_nvm_checksum(hw) < 0) {
617 		/*
618 		** Some PCI-E parts fail the first check due to
619 		** the link being in sleep state, call it again,
620 		** if it fails a second time its a real issue.
621 		*/
622 		if (e1000_validate_nvm_checksum(hw) < 0) {
623 			device_printf(dev,
624 			    "The EEPROM Checksum Is Not Valid\n");
625 			error = EIO;
626 			goto err_late;
627 		}
628 	}
629 
630 	/* Copy the permanent MAC address out of the EEPROM */
631 	if (e1000_read_mac_addr(hw) < 0) {
632 		device_printf(dev, "EEPROM read error while reading MAC"
633 		    " address\n");
634 		error = EIO;
635 		goto err_late;
636 	}
637 
638 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
639 		device_printf(dev, "Invalid MAC address\n");
640 		error = EIO;
641 		goto err_late;
642 	}
643 
644 	/*
645 	**  Do interrupt configuration
646 	*/
647 	if (adapter->msix > 1) /* Do MSIX */
648 		error = em_allocate_msix(adapter);
649 	else  /* MSI or Legacy */
650 		error = em_allocate_legacy(adapter);
651 	if (error)
652 		goto err_late;
653 
654 	/*
655 	 * Get Wake-on-Lan and Management info for later use
656 	 */
657 	em_get_wakeup(dev);
658 
659 	/* Setup OS specific network interface */
660 	if (em_setup_interface(dev, adapter) != 0)
661 		goto err_late;
662 
663 	em_reset(adapter);
664 
665 	/* Initialize statistics */
666 	em_update_stats_counters(adapter);
667 
668 	hw->mac.get_link_status = 1;
669 	em_update_link_status(adapter);
670 
671 	/* Register for VLAN events */
672 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
673 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
674 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
675 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
676 
677 	em_add_hw_stats(adapter);
678 
679 	/* Non-AMT based hardware can now take control from firmware */
680 	if (adapter->has_manage && !adapter->has_amt)
681 		em_get_hw_control(adapter);
682 
683 	/* Tell the stack that the interface is not active */
684 	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
685 
686 	adapter->led_dev = led_create(em_led_func, adapter,
687 	    device_get_nameunit(dev));
688 
689 	INIT_DEBUGOUT("em_attach: end");
690 
691 	return (0);
692 
693 err_late:
694 	em_free_transmit_structures(adapter);
695 	em_free_receive_structures(adapter);
696 	em_release_hw_control(adapter);
697 	if (adapter->ifp != NULL)
698 		if_free(adapter->ifp);
699 err_pci:
700 	em_free_pci_resources(adapter);
701 	free(adapter->mta, M_DEVBUF);
702 	EM_CORE_LOCK_DESTROY(adapter);
703 
704 	return (error);
705 }
706 
707 /*********************************************************************
708  *  Device removal routine
709  *
710  *  The detach entry point is called when the driver is being removed.
711  *  This routine stops the adapter and deallocates all the resources
712  *  that were allocated for driver operation.
713  *
714  *  return 0 on success, positive on failure
715  *********************************************************************/
716 
717 static int
718 em_detach(device_t dev)
719 {
720 	struct adapter	*adapter = device_get_softc(dev);
721 	struct ifnet	*ifp = adapter->ifp;
722 
723 	INIT_DEBUGOUT("em_detach: begin");
724 
725 	/* Make sure VLANS are not using driver */
726 	if (adapter->ifp->if_vlantrunk != NULL) {
727 		device_printf(dev,"Vlan in use, detach first\n");
728 		return (EBUSY);
729 	}
730 
731 #ifdef DEVICE_POLLING
732 	if (ifp->if_capenable & IFCAP_POLLING)
733 		ether_poll_deregister(ifp);
734 #endif
735 
736 	if (adapter->led_dev != NULL)
737 		led_destroy(adapter->led_dev);
738 
739 	EM_CORE_LOCK(adapter);
740 	adapter->in_detach = 1;
741 	em_stop(adapter);
742 	EM_CORE_UNLOCK(adapter);
743 	EM_CORE_LOCK_DESTROY(adapter);
744 
745 	e1000_phy_hw_reset(&adapter->hw);
746 
747 	em_release_manageability(adapter);
748 	em_release_hw_control(adapter);
749 
750 	/* Unregister VLAN events */
751 	if (adapter->vlan_attach != NULL)
752 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
753 	if (adapter->vlan_detach != NULL)
754 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
755 
756 	ether_ifdetach(adapter->ifp);
757 	callout_drain(&adapter->timer);
758 
759 	em_free_pci_resources(adapter);
760 	bus_generic_detach(dev);
761 	if_free(ifp);
762 
763 	em_free_transmit_structures(adapter);
764 	em_free_receive_structures(adapter);
765 
766 	em_release_hw_control(adapter);
767 	free(adapter->mta, M_DEVBUF);
768 
769 	return (0);
770 }
771 
772 /*********************************************************************
773  *
774  *  Shutdown entry point
775  *
776  **********************************************************************/
777 
778 static int
779 em_shutdown(device_t dev)
780 {
781 	return em_suspend(dev);
782 }
783 
784 /*
785  * Suspend/resume device methods.
786  */
787 static int
788 em_suspend(device_t dev)
789 {
790 	struct adapter *adapter = device_get_softc(dev);
791 
792 	EM_CORE_LOCK(adapter);
793 
794         em_release_manageability(adapter);
795 	em_release_hw_control(adapter);
796 	em_enable_wakeup(dev);
797 
798 	EM_CORE_UNLOCK(adapter);
799 
800 	return bus_generic_suspend(dev);
801 }
802 
803 static int
804 em_resume(device_t dev)
805 {
806 	struct adapter *adapter = device_get_softc(dev);
807 	struct ifnet *ifp = adapter->ifp;
808 
809 	EM_CORE_LOCK(adapter);
810 	em_init_locked(adapter);
811 	em_init_manageability(adapter);
812 	EM_CORE_UNLOCK(adapter);
813 	em_start(ifp);
814 
815 	return bus_generic_resume(dev);
816 }
817 
818 
819 /*********************************************************************
820  *  Transmit entry point
821  *
822  *  em_start is called by the stack to initiate a transmit.
823  *  The driver will remain in this routine as long as there are
824  *  packets to transmit and transmit resources are available.
825  *  In case resources are not available stack is notified and
826  *  the packet is requeued.
827  **********************************************************************/
828 
829 #ifdef EM_MULTIQUEUE
830 static int
831 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
832 {
833 	struct adapter  *adapter = txr->adapter;
834         struct mbuf     *next;
835         int             err = 0, enq = 0;
836 
837 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
838 	    IFF_DRV_RUNNING || adapter->link_active == 0) {
839 		if (m != NULL)
840 			err = drbr_enqueue(ifp, txr->br, m);
841 		return (err);
842 	}
843 
844         /* Call cleanup if number of TX descriptors low */
845 	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
846 		em_txeof(txr);
847 
848 	enq = 0;
849 	if (m == NULL) {
850 		next = drbr_dequeue(ifp, txr->br);
851 	} else if (drbr_needs_enqueue(ifp, txr->br)) {
852 		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853 			return (err);
854 		next = drbr_dequeue(ifp, txr->br);
855 	} else
856 		next = m;
857 
858 	/* Process the queue */
859 	while (next != NULL) {
860 		if ((err = em_xmit(txr, &next)) != 0) {
861                         if (next != NULL)
862                                 err = drbr_enqueue(ifp, txr->br, next);
863                         break;
864 		}
865 		enq++;
866 		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867 		ETHER_BPF_MTAP(ifp, next);
868 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
869                         break;
870 		if (txr->tx_avail < EM_MAX_SCATTER) {
871 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
872 			break;
873 		}
874 		next = drbr_dequeue(ifp, txr->br);
875 	}
876 
877 	if (enq > 0) {
878                 /* Set the watchdog */
879                 txr->queue_status = EM_QUEUE_WORKING;
880 		txr->watchdog_time = ticks;
881 	}
882 	return (err);
883 }
884 
885 /*
886 ** Multiqueue capable stack interface
887 */
888 static int
889 em_mq_start(struct ifnet *ifp, struct mbuf *m)
890 {
891 	struct adapter	*adapter = ifp->if_softc;
892 	struct tx_ring	*txr = adapter->tx_rings;
893 	int 		error;
894 
895 	if (EM_TX_TRYLOCK(txr)) {
896 		error = em_mq_start_locked(ifp, txr, m);
897 		EM_TX_UNLOCK(txr);
898 	} else
899 		error = drbr_enqueue(ifp, txr->br, m);
900 
901 	return (error);
902 }
903 
904 /*
905 ** Flush all ring buffers
906 */
907 static void
908 em_qflush(struct ifnet *ifp)
909 {
910 	struct adapter  *adapter = ifp->if_softc;
911 	struct tx_ring  *txr = adapter->tx_rings;
912 	struct mbuf     *m;
913 
914 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
915 		EM_TX_LOCK(txr);
916 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
917 			m_freem(m);
918 		EM_TX_UNLOCK(txr);
919 	}
920 	if_qflush(ifp);
921 }
922 
923 #endif /* EM_MULTIQUEUE */
924 
925 static void
926 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
927 {
928 	struct adapter	*adapter = ifp->if_softc;
929 	struct mbuf	*m_head;
930 
931 	EM_TX_LOCK_ASSERT(txr);
932 
933 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
934 	    IFF_DRV_RUNNING)
935 		return;
936 
937 	if (!adapter->link_active)
938 		return;
939 
940 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
941         	/* Call cleanup if number of TX descriptors low */
942 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
943 			em_txeof(txr);
944 		if (txr->tx_avail < EM_MAX_SCATTER) {
945 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
946 			break;
947 		}
948                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
949 		if (m_head == NULL)
950 			break;
951 		/*
952 		 *  Encapsulation can modify our pointer, and or make it
953 		 *  NULL on failure.  In that event, we can't requeue.
954 		 */
955 		if (em_xmit(txr, &m_head)) {
956 			if (m_head == NULL)
957 				break;
958 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
960 			break;
961 		}
962 
963 		/* Send a copy of the frame to the BPF listener */
964 		ETHER_BPF_MTAP(ifp, m_head);
965 
966 		/* Set timeout in case hardware has problems transmitting. */
967 		txr->watchdog_time = ticks;
968                 txr->queue_status = EM_QUEUE_WORKING;
969 	}
970 
971 	return;
972 }
973 
974 static void
975 em_start(struct ifnet *ifp)
976 {
977 	struct adapter	*adapter = ifp->if_softc;
978 	struct tx_ring	*txr = adapter->tx_rings;
979 
980 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
981 		EM_TX_LOCK(txr);
982 		em_start_locked(ifp, txr);
983 		EM_TX_UNLOCK(txr);
984 	}
985 	return;
986 }
987 
988 /*********************************************************************
989  *  Ioctl entry point
990  *
991  *  em_ioctl is called when the user wants to configure the
992  *  interface.
993  *
994  *  return 0 on success, positive on failure
995  **********************************************************************/
996 
997 static int
998 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
999 {
1000 	struct adapter	*adapter = ifp->if_softc;
1001 	struct ifreq *ifr = (struct ifreq *)data;
1002 #ifdef INET
1003 	struct ifaddr *ifa = (struct ifaddr *)data;
1004 #endif
1005 	int error = 0;
1006 
1007 	if (adapter->in_detach)
1008 		return (error);
1009 
1010 	switch (command) {
1011 	case SIOCSIFADDR:
1012 #ifdef INET
1013 		if (ifa->ifa_addr->sa_family == AF_INET) {
1014 			/*
1015 			 * XXX
1016 			 * Since resetting hardware takes a very long time
1017 			 * and results in link renegotiation we only
1018 			 * initialize the hardware only when it is absolutely
1019 			 * required.
1020 			 */
1021 			ifp->if_flags |= IFF_UP;
1022 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1023 				EM_CORE_LOCK(adapter);
1024 				em_init_locked(adapter);
1025 				EM_CORE_UNLOCK(adapter);
1026 			}
1027 			arp_ifinit(ifp, ifa);
1028 		} else
1029 #endif
1030 			error = ether_ioctl(ifp, command, data);
1031 		break;
1032 	case SIOCSIFMTU:
1033 	    {
1034 		int max_frame_size;
1035 
1036 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1037 
1038 		EM_CORE_LOCK(adapter);
1039 		switch (adapter->hw.mac.type) {
1040 		case e1000_82571:
1041 		case e1000_82572:
1042 		case e1000_ich9lan:
1043 		case e1000_ich10lan:
1044 		case e1000_pch2lan:
1045 		case e1000_82574:
1046 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1047 			max_frame_size = 9234;
1048 			break;
1049 		case e1000_pchlan:
1050 			max_frame_size = 4096;
1051 			break;
1052 			/* Adapters that do not support jumbo frames */
1053 		case e1000_82583:
1054 		case e1000_ich8lan:
1055 			max_frame_size = ETHER_MAX_LEN;
1056 			break;
1057 		default:
1058 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1059 		}
1060 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1061 		    ETHER_CRC_LEN) {
1062 			EM_CORE_UNLOCK(adapter);
1063 			error = EINVAL;
1064 			break;
1065 		}
1066 
1067 		ifp->if_mtu = ifr->ifr_mtu;
1068 		adapter->max_frame_size =
1069 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1070 		em_init_locked(adapter);
1071 		EM_CORE_UNLOCK(adapter);
1072 		break;
1073 	    }
1074 	case SIOCSIFFLAGS:
1075 		IOCTL_DEBUGOUT("ioctl rcv'd:\
1076 		    SIOCSIFFLAGS (Set Interface Flags)");
1077 		EM_CORE_LOCK(adapter);
1078 		if (ifp->if_flags & IFF_UP) {
1079 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1080 				if ((ifp->if_flags ^ adapter->if_flags) &
1081 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1082 					em_disable_promisc(adapter);
1083 					em_set_promisc(adapter);
1084 				}
1085 			} else
1086 				em_init_locked(adapter);
1087 		} else
1088 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1089 				em_stop(adapter);
1090 		adapter->if_flags = ifp->if_flags;
1091 		EM_CORE_UNLOCK(adapter);
1092 		break;
1093 	case SIOCADDMULTI:
1094 	case SIOCDELMULTI:
1095 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1096 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1097 			EM_CORE_LOCK(adapter);
1098 			em_disable_intr(adapter);
1099 			em_set_multi(adapter);
1100 #ifdef DEVICE_POLLING
1101 			if (!(ifp->if_capenable & IFCAP_POLLING))
1102 #endif
1103 				em_enable_intr(adapter);
1104 			EM_CORE_UNLOCK(adapter);
1105 		}
1106 		break;
1107 	case SIOCSIFMEDIA:
1108 		/*
1109 		** As the speed/duplex settings are being
1110 		** changed, we need to reset the PHY.
1111 		*/
1112 		adapter->hw.phy.reset_disable = FALSE;
1113 		/* Check SOL/IDER usage */
1114 		EM_CORE_LOCK(adapter);
1115 		if (e1000_check_reset_block(&adapter->hw)) {
1116 			EM_CORE_UNLOCK(adapter);
1117 			device_printf(adapter->dev, "Media change is"
1118 			    " blocked due to SOL/IDER session.\n");
1119 			break;
1120 		}
1121 		EM_CORE_UNLOCK(adapter);
1122 		/* falls thru */
1123 	case SIOCGIFMEDIA:
1124 		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125 		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127 		break;
1128 	case SIOCSIFCAP:
1129 	    {
1130 		int mask, reinit;
1131 
1132 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133 		reinit = 0;
1134 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135 #ifdef DEVICE_POLLING
1136 		if (mask & IFCAP_POLLING) {
1137 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138 				error = ether_poll_register(em_poll, ifp);
1139 				if (error)
1140 					return (error);
1141 				EM_CORE_LOCK(adapter);
1142 				em_disable_intr(adapter);
1143 				ifp->if_capenable |= IFCAP_POLLING;
1144 				EM_CORE_UNLOCK(adapter);
1145 			} else {
1146 				error = ether_poll_deregister(ifp);
1147 				/* Enable interrupt even in error case */
1148 				EM_CORE_LOCK(adapter);
1149 				em_enable_intr(adapter);
1150 				ifp->if_capenable &= ~IFCAP_POLLING;
1151 				EM_CORE_UNLOCK(adapter);
1152 			}
1153 		}
1154 #endif
1155 		if (mask & IFCAP_HWCSUM) {
1156 			ifp->if_capenable ^= IFCAP_HWCSUM;
1157 			reinit = 1;
1158 		}
1159 		if (mask & IFCAP_TSO4) {
1160 			ifp->if_capenable ^= IFCAP_TSO4;
1161 			reinit = 1;
1162 		}
1163 		if (mask & IFCAP_VLAN_HWTAGGING) {
1164 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165 			reinit = 1;
1166 		}
1167 		if (mask & IFCAP_VLAN_HWFILTER) {
1168 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169 			reinit = 1;
1170 		}
1171 		if ((mask & IFCAP_WOL) &&
1172 		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1173 			if (mask & IFCAP_WOL_MCAST)
1174 				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1175 			if (mask & IFCAP_WOL_MAGIC)
1176 				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1177 		}
1178 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1179 			em_init(adapter);
1180 		VLAN_CAPABILITIES(ifp);
1181 		break;
1182 	    }
1183 
1184 	default:
1185 		error = ether_ioctl(ifp, command, data);
1186 		break;
1187 	}
1188 
1189 	return (error);
1190 }
1191 
1192 
1193 /*********************************************************************
1194  *  Init entry point
1195  *
1196  *  This routine is used in two ways. It is used by the stack as
1197  *  init entry point in network interface structure. It is also used
1198  *  by the driver as a hw/sw initialization routine to get to a
1199  *  consistent state.
1200  *
1201  *  return 0 on success, positive on failure
1202  **********************************************************************/
1203 
1204 static void
1205 em_init_locked(struct adapter *adapter)
1206 {
1207 	struct ifnet	*ifp = adapter->ifp;
1208 	device_t	dev = adapter->dev;
1209 	u32		pba;
1210 
1211 	INIT_DEBUGOUT("em_init: begin");
1212 
1213 	EM_CORE_LOCK_ASSERT(adapter);
1214 
1215 	em_disable_intr(adapter);
1216 	callout_stop(&adapter->timer);
1217 
1218 	/*
1219 	 * Packet Buffer Allocation (PBA)
1220 	 * Writing PBA sets the receive portion of the buffer
1221 	 * the remainder is used for the transmit buffer.
1222 	 */
1223 	switch (adapter->hw.mac.type) {
1224 	/* Total Packet Buffer on these is 48K */
1225 	case e1000_82571:
1226 	case e1000_82572:
1227 	case e1000_80003es2lan:
1228 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1229 		break;
1230 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1231 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1232 		break;
1233 	case e1000_82574:
1234 	case e1000_82583:
1235 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1236 		break;
1237 	case e1000_ich8lan:
1238 		pba = E1000_PBA_8K;
1239 		break;
1240 	case e1000_ich9lan:
1241 	case e1000_ich10lan:
1242 		pba = E1000_PBA_10K;
1243 		break;
1244 	case e1000_pchlan:
1245 	case e1000_pch2lan:
1246 		pba = E1000_PBA_26K;
1247 		break;
1248 	default:
1249 		if (adapter->max_frame_size > 8192)
1250 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1251 		else
1252 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1253 	}
1254 
1255 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1256 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1257 
1258 	/* Get the latest mac address, User can use a LAA */
1259         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1260               ETHER_ADDR_LEN);
1261 
1262 	/* Put the address into the Receive Address Array */
1263 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1264 
1265 	/*
1266 	 * With the 82571 adapter, RAR[0] may be overwritten
1267 	 * when the other port is reset, we make a duplicate
1268 	 * in RAR[14] for that eventuality, this assures
1269 	 * the interface continues to function.
1270 	 */
1271 	if (adapter->hw.mac.type == e1000_82571) {
1272 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1273 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1274 		    E1000_RAR_ENTRIES - 1);
1275 	}
1276 
1277 	/* Initialize the hardware */
1278 	em_reset(adapter);
1279 	em_update_link_status(adapter);
1280 
1281 	/* Setup VLAN support, basic and offload if available */
1282 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1283 
1284 	/* Set hardware offload abilities */
1285 	ifp->if_hwassist = 0;
1286 	if (ifp->if_capenable & IFCAP_TXCSUM)
1287 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1288 	if (ifp->if_capenable & IFCAP_TSO4)
1289 		ifp->if_hwassist |= CSUM_TSO;
1290 
1291 	/* Configure for OS presence */
1292 	em_init_manageability(adapter);
1293 
1294 	/* Prepare transmit descriptors and buffers */
1295 	em_setup_transmit_structures(adapter);
1296 	em_initialize_transmit_unit(adapter);
1297 
1298 	/* Setup Multicast table */
1299 	em_set_multi(adapter);
1300 
1301 	/*
1302 	** Figure out the desired mbuf
1303 	** pool for doing jumbos
1304 	*/
1305 	if (adapter->max_frame_size <= 2048)
1306 		adapter->rx_mbuf_sz = MCLBYTES;
1307 	else if (adapter->max_frame_size <= 4096)
1308 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1309 	else
1310 		adapter->rx_mbuf_sz = MJUM9BYTES;
1311 
1312 	/* Prepare receive descriptors and buffers */
1313 	if (em_setup_receive_structures(adapter)) {
1314 		device_printf(dev, "Could not setup receive structures\n");
1315 		em_stop(adapter);
1316 		return;
1317 	}
1318 	em_initialize_receive_unit(adapter);
1319 
1320 	/* Use real VLAN Filter support? */
1321 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1322 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1323 			/* Use real VLAN Filter support */
1324 			em_setup_vlan_hw_support(adapter);
1325 		else {
1326 			u32 ctrl;
1327 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1328 			ctrl |= E1000_CTRL_VME;
1329 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1330 		}
1331 	}
1332 
1333 	/* Don't lose promiscuous settings */
1334 	em_set_promisc(adapter);
1335 
1336 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338 
1339 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1340 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341 
1342 	/* MSI/X configuration for 82574 */
1343 	if (adapter->hw.mac.type == e1000_82574) {
1344 		int tmp;
1345 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1346 		tmp |= E1000_CTRL_EXT_PBA_CLR;
1347 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1348 		/* Set the IVAR - interrupt vector routing. */
1349 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1350 	}
1351 
1352 #ifdef DEVICE_POLLING
1353 	/*
1354 	 * Only enable interrupts if we are not polling, make sure
1355 	 * they are off otherwise.
1356 	 */
1357 	if (ifp->if_capenable & IFCAP_POLLING)
1358 		em_disable_intr(adapter);
1359 	else
1360 #endif /* DEVICE_POLLING */
1361 		em_enable_intr(adapter);
1362 
1363 	/* AMT based hardware can now take control from firmware */
1364 	if (adapter->has_manage && adapter->has_amt)
1365 		em_get_hw_control(adapter);
1366 
1367 	/* Don't reset the phy next time init gets called */
1368 	adapter->hw.phy.reset_disable = TRUE;
1369 }
1370 
1371 static void
1372 em_init(void *arg)
1373 {
1374 	struct adapter *adapter = arg;
1375 
1376 	EM_CORE_LOCK(adapter);
1377 	em_init_locked(adapter);
1378 	EM_CORE_UNLOCK(adapter);
1379 }
1380 
1381 
1382 #ifdef DEVICE_POLLING
1383 /*********************************************************************
1384  *
1385  *  Legacy polling routine: note this only works with single queue
1386  *
1387  *********************************************************************/
1388 static int
1389 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1390 {
1391 	struct adapter *adapter = ifp->if_softc;
1392 	struct tx_ring	*txr = adapter->tx_rings;
1393 	struct rx_ring	*rxr = adapter->rx_rings;
1394 	u32		reg_icr;
1395 	int		rx_done;
1396 
1397 	EM_CORE_LOCK(adapter);
1398 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1399 		EM_CORE_UNLOCK(adapter);
1400 		return (0);
1401 	}
1402 
1403 	if (cmd == POLL_AND_CHECK_STATUS) {
1404 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1405 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1406 			callout_stop(&adapter->timer);
1407 			adapter->hw.mac.get_link_status = 1;
1408 			em_update_link_status(adapter);
1409 			callout_reset(&adapter->timer, hz,
1410 			    em_local_timer, adapter);
1411 		}
1412 	}
1413 	EM_CORE_UNLOCK(adapter);
1414 
1415 	em_rxeof(rxr, count, &rx_done);
1416 
1417 	EM_TX_LOCK(txr);
1418 	em_txeof(txr);
1419 #ifdef EM_MULTIQUEUE
1420 	if (!drbr_empty(ifp, txr->br))
1421 		em_mq_start_locked(ifp, txr, NULL);
1422 #else
1423 	em_start_locked(ifp, txr);
1424 #endif
1425 	EM_TX_UNLOCK(txr);
1426 
1427 	return (rx_done);
1428 }
1429 #endif /* DEVICE_POLLING */
1430 
1431 
1432 /*********************************************************************
1433  *
1434  *  Fast Legacy/MSI Combined Interrupt Service routine
1435  *
1436  *********************************************************************/
1437 static int
1438 em_irq_fast(void *arg)
1439 {
1440 	struct adapter	*adapter = arg;
1441 	struct ifnet	*ifp;
1442 	u32		reg_icr;
1443 
1444 	ifp = adapter->ifp;
1445 
1446 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447 
1448 	/* Hot eject?  */
1449 	if (reg_icr == 0xffffffff)
1450 		return FILTER_STRAY;
1451 
1452 	/* Definitely not our interrupt.  */
1453 	if (reg_icr == 0x0)
1454 		return FILTER_STRAY;
1455 
1456 	/*
1457 	 * Starting with the 82571 chip, bit 31 should be used to
1458 	 * determine whether the interrupt belongs to us.
1459 	 */
1460 	if (adapter->hw.mac.type >= e1000_82571 &&
1461 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1462 		return FILTER_STRAY;
1463 
1464 	em_disable_intr(adapter);
1465 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466 
1467 	/* Link status change */
1468 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1469 		adapter->hw.mac.get_link_status = 1;
1470 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1471 	}
1472 
1473 	if (reg_icr & E1000_ICR_RXO)
1474 		adapter->rx_overruns++;
1475 	return FILTER_HANDLED;
1476 }
1477 
1478 /* Combined RX/TX handler, used by Legacy and MSI */
1479 static void
1480 em_handle_que(void *context, int pending)
1481 {
1482 	struct adapter	*adapter = context;
1483 	struct ifnet	*ifp = adapter->ifp;
1484 	struct tx_ring	*txr = adapter->tx_rings;
1485 	struct rx_ring	*rxr = adapter->rx_rings;
1486 
1487 
1488 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1489 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1490 		EM_TX_LOCK(txr);
1491 		em_txeof(txr);
1492 #ifdef EM_MULTIQUEUE
1493 		if (!drbr_empty(ifp, txr->br))
1494 			em_mq_start_locked(ifp, txr, NULL);
1495 #else
1496 		em_start_locked(ifp, txr);
1497 #endif
1498 		EM_TX_UNLOCK(txr);
1499 		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1500 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1501 			return;
1502 		}
1503 	}
1504 
1505 	em_enable_intr(adapter);
1506 	return;
1507 }
1508 
1509 
1510 /*********************************************************************
1511  *
1512  *  MSIX Interrupt Service Routines
1513  *
1514  **********************************************************************/
1515 static void
1516 em_msix_tx(void *arg)
1517 {
1518 	struct tx_ring *txr = arg;
1519 	struct adapter *adapter = txr->adapter;
1520 	bool		more;
1521 
1522 	++txr->tx_irq;
1523 	EM_TX_LOCK(txr);
1524 	more = em_txeof(txr);
1525 	EM_TX_UNLOCK(txr);
1526 	if (more)
1527 		taskqueue_enqueue(txr->tq, &txr->tx_task);
1528 	else
1529 		/* Reenable this interrupt */
1530 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1531 	return;
1532 }
1533 
1534 /*********************************************************************
1535  *
1536  *  MSIX RX Interrupt Service routine
1537  *
1538  **********************************************************************/
1539 
1540 static void
1541 em_msix_rx(void *arg)
1542 {
1543 	struct rx_ring	*rxr = arg;
1544 	struct adapter	*adapter = rxr->adapter;
1545 	bool		more;
1546 
1547 	++rxr->rx_irq;
1548 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1549 	if (more)
1550 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1551 	else
1552 		/* Reenable this interrupt */
1553 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1554 	return;
1555 }
1556 
1557 /*********************************************************************
1558  *
1559  *  MSIX Link Fast Interrupt Service routine
1560  *
1561  **********************************************************************/
1562 static void
1563 em_msix_link(void *arg)
1564 {
1565 	struct adapter	*adapter = arg;
1566 	u32		reg_icr;
1567 
1568 	++adapter->link_irq;
1569 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570 
1571 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1572 		adapter->hw.mac.get_link_status = 1;
1573 		em_handle_link(adapter, 0);
1574 	} else
1575 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1576 		    EM_MSIX_LINK | E1000_IMS_LSC);
1577 	return;
1578 }
1579 
1580 static void
1581 em_handle_rx(void *context, int pending)
1582 {
1583 	struct rx_ring	*rxr = context;
1584 	struct adapter	*adapter = rxr->adapter;
1585         bool            more;
1586 
1587 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1588 	if (more)
1589 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1590 	else
1591 		/* Reenable this interrupt */
1592 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1593 }
1594 
1595 static void
1596 em_handle_tx(void *context, int pending)
1597 {
1598 	struct tx_ring	*txr = context;
1599 	struct adapter	*adapter = txr->adapter;
1600 	struct ifnet	*ifp = adapter->ifp;
1601 
1602 	EM_TX_LOCK(txr);
1603 	em_txeof(txr);
1604 #ifdef EM_MULTIQUEUE
1605 	if (!drbr_empty(ifp, txr->br))
1606 		em_mq_start_locked(ifp, txr, NULL);
1607 #else
1608 	em_start_locked(ifp, txr);
1609 #endif
1610 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1611 	EM_TX_UNLOCK(txr);
1612 }
1613 
1614 static void
1615 em_handle_link(void *context, int pending)
1616 {
1617 	struct adapter	*adapter = context;
1618 	struct ifnet *ifp = adapter->ifp;
1619 
1620 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1621 		return;
1622 
1623 	EM_CORE_LOCK(adapter);
1624 	callout_stop(&adapter->timer);
1625 	em_update_link_status(adapter);
1626 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1627 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628 	    EM_MSIX_LINK | E1000_IMS_LSC);
1629 	EM_CORE_UNLOCK(adapter);
1630 }
1631 
1632 
1633 /*********************************************************************
1634  *
1635  *  Media Ioctl callback
1636  *
1637  *  This routine is called whenever the user queries the status of
1638  *  the interface using ifconfig.
1639  *
1640  **********************************************************************/
1641 static void
1642 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1643 {
1644 	struct adapter *adapter = ifp->if_softc;
1645 	u_char fiber_type = IFM_1000_SX;
1646 
1647 	INIT_DEBUGOUT("em_media_status: begin");
1648 
1649 	EM_CORE_LOCK(adapter);
1650 	em_update_link_status(adapter);
1651 
1652 	ifmr->ifm_status = IFM_AVALID;
1653 	ifmr->ifm_active = IFM_ETHER;
1654 
1655 	if (!adapter->link_active) {
1656 		EM_CORE_UNLOCK(adapter);
1657 		return;
1658 	}
1659 
1660 	ifmr->ifm_status |= IFM_ACTIVE;
1661 
1662 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1663 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1664 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1665 	} else {
1666 		switch (adapter->link_speed) {
1667 		case 10:
1668 			ifmr->ifm_active |= IFM_10_T;
1669 			break;
1670 		case 100:
1671 			ifmr->ifm_active |= IFM_100_TX;
1672 			break;
1673 		case 1000:
1674 			ifmr->ifm_active |= IFM_1000_T;
1675 			break;
1676 		}
1677 		if (adapter->link_duplex == FULL_DUPLEX)
1678 			ifmr->ifm_active |= IFM_FDX;
1679 		else
1680 			ifmr->ifm_active |= IFM_HDX;
1681 	}
1682 	EM_CORE_UNLOCK(adapter);
1683 }
1684 
1685 /*********************************************************************
1686  *
1687  *  Media Ioctl callback
1688  *
1689  *  This routine is called when the user changes speed/duplex using
1690  *  media/mediopt option with ifconfig.
1691  *
1692  **********************************************************************/
1693 static int
1694 em_media_change(struct ifnet *ifp)
1695 {
1696 	struct adapter *adapter = ifp->if_softc;
1697 	struct ifmedia  *ifm = &adapter->media;
1698 
1699 	INIT_DEBUGOUT("em_media_change: begin");
1700 
1701 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1702 		return (EINVAL);
1703 
1704 	EM_CORE_LOCK(adapter);
1705 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1706 	case IFM_AUTO:
1707 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1708 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1709 		break;
1710 	case IFM_1000_LX:
1711 	case IFM_1000_SX:
1712 	case IFM_1000_T:
1713 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1714 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1715 		break;
1716 	case IFM_100_TX:
1717 		adapter->hw.mac.autoneg = FALSE;
1718 		adapter->hw.phy.autoneg_advertised = 0;
1719 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1720 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1721 		else
1722 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1723 		break;
1724 	case IFM_10_T:
1725 		adapter->hw.mac.autoneg = FALSE;
1726 		adapter->hw.phy.autoneg_advertised = 0;
1727 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1728 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1729 		else
1730 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1731 		break;
1732 	default:
1733 		device_printf(adapter->dev, "Unsupported media type\n");
1734 	}
1735 
1736 	em_init_locked(adapter);
1737 	EM_CORE_UNLOCK(adapter);
1738 
1739 	return (0);
1740 }
1741 
1742 /*********************************************************************
1743  *
1744  *  This routine maps the mbufs to tx descriptors.
1745  *
1746  *  return 0 on success, positive on failure
1747  **********************************************************************/
1748 
1749 static int
1750 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1751 {
1752 	struct adapter		*adapter = txr->adapter;
1753 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1754 	bus_dmamap_t		map;
1755 	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1756 	struct e1000_tx_desc	*ctxd = NULL;
1757 	struct mbuf		*m_head;
1758 	struct ether_header	*eh;
1759 	struct ip		*ip = NULL;
1760 	struct tcphdr		*tp = NULL;
1761 	u32			txd_upper, txd_lower, txd_used, txd_saved;
1762 	int			ip_off, poff;
1763 	int			nsegs, i, j, first, last = 0;
1764 	int			error, do_tso, tso_desc = 0;
1765 
1766 	m_head = *m_headp;
1767 	txd_upper = txd_lower = txd_used = txd_saved = 0;
1768 	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1769 	ip_off = poff = 0;
1770 
1771 	/*
1772 	 * Intel recommends entire IP/TCP header length reside in a single
1773 	 * buffer. If multiple descriptors are used to describe the IP and
1774 	 * TCP header, each descriptor should describe one or more
1775 	 * complete headers; descriptors referencing only parts of headers
1776 	 * are not supported. If all layer headers are not coalesced into
1777 	 * a single buffer, each buffer should not cross a 4KB boundary,
1778 	 * or be larger than the maximum read request size.
1779 	 * Controller also requires modifing IP/TCP header to make TSO work
1780 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1781 	 * IP/TCP header into a single buffer to meet the requirement of
1782 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1783 	 * which also has similiar restrictions.
1784 	 */
1785 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1786 		if (do_tso || (m_head->m_next != NULL &&
1787 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1788 			if (M_WRITABLE(*m_headp) == 0) {
1789 				m_head = m_dup(*m_headp, M_DONTWAIT);
1790 				m_freem(*m_headp);
1791 				if (m_head == NULL) {
1792 					*m_headp = NULL;
1793 					return (ENOBUFS);
1794 				}
1795 				*m_headp = m_head;
1796 			}
1797 		}
1798 		/*
1799 		 * XXX
1800 		 * Assume IPv4, we don't have TSO/checksum offload support
1801 		 * for IPv6 yet.
1802 		 */
1803 		ip_off = sizeof(struct ether_header);
1804 		m_head = m_pullup(m_head, ip_off);
1805 		if (m_head == NULL) {
1806 			*m_headp = NULL;
1807 			return (ENOBUFS);
1808 		}
1809 		eh = mtod(m_head, struct ether_header *);
1810 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1811 			ip_off = sizeof(struct ether_vlan_header);
1812 			m_head = m_pullup(m_head, ip_off);
1813 			if (m_head == NULL) {
1814 				*m_headp = NULL;
1815 				return (ENOBUFS);
1816 			}
1817 		}
1818 		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1819 		if (m_head == NULL) {
1820 			*m_headp = NULL;
1821 			return (ENOBUFS);
1822 		}
1823 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1824 		poff = ip_off + (ip->ip_hl << 2);
1825 		if (do_tso) {
1826 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1827 			if (m_head == NULL) {
1828 				*m_headp = NULL;
1829 				return (ENOBUFS);
1830 			}
1831 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1832 			/*
1833 			 * TSO workaround:
1834 			 *   pull 4 more bytes of data into it.
1835 			 */
1836 			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1837 			if (m_head == NULL) {
1838 				*m_headp = NULL;
1839 				return (ENOBUFS);
1840 			}
1841 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1842 			ip->ip_len = 0;
1843 			ip->ip_sum = 0;
1844 			/*
1845 			 * The pseudo TCP checksum does not include TCP payload
1846 			 * length so driver should recompute the checksum here
1847 			 * what hardware expect to see. This is adherence of
1848 			 * Microsoft's Large Send specification.
1849 			 */
1850 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1851 			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1852 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1853 		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1854 			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1855 			if (m_head == NULL) {
1856 				*m_headp = NULL;
1857 				return (ENOBUFS);
1858 			}
1859 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1860 			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1861 			if (m_head == NULL) {
1862 				*m_headp = NULL;
1863 				return (ENOBUFS);
1864 			}
1865 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1866 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1867 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1868 			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1869 			if (m_head == NULL) {
1870 				*m_headp = NULL;
1871 				return (ENOBUFS);
1872 			}
1873 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1874 		}
1875 		*m_headp = m_head;
1876 	}
1877 
1878 	/*
1879 	 * Map the packet for DMA
1880 	 *
1881 	 * Capture the first descriptor index,
1882 	 * this descriptor will have the index
1883 	 * of the EOP which is the only one that
1884 	 * now gets a DONE bit writeback.
1885 	 */
1886 	first = txr->next_avail_desc;
1887 	tx_buffer = &txr->tx_buffers[first];
1888 	tx_buffer_mapped = tx_buffer;
1889 	map = tx_buffer->map;
1890 
1891 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1892 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1893 
1894 	/*
1895 	 * There are two types of errors we can (try) to handle:
1896 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1897 	 *   out of segments.  Defragment the mbuf chain and try again.
1898 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1899 	 *   at this point in time.  Defer sending and try again later.
1900 	 * All other errors, in particular EINVAL, are fatal and prevent the
1901 	 * mbuf chain from ever going through.  Drop it and report error.
1902 	 */
1903 	if (error == EFBIG) {
1904 		struct mbuf *m;
1905 
1906 		m = m_defrag(*m_headp, M_DONTWAIT);
1907 		if (m == NULL) {
1908 			adapter->mbuf_alloc_failed++;
1909 			m_freem(*m_headp);
1910 			*m_headp = NULL;
1911 			return (ENOBUFS);
1912 		}
1913 		*m_headp = m;
1914 
1915 		/* Try it again */
1916 		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1917 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1918 
1919 		if (error == ENOMEM) {
1920 			adapter->no_tx_dma_setup++;
1921 			return (error);
1922 		} else if (error != 0) {
1923 			adapter->no_tx_dma_setup++;
1924 			m_freem(*m_headp);
1925 			*m_headp = NULL;
1926 			return (error);
1927 		}
1928 
1929 	} else if (error == ENOMEM) {
1930 		adapter->no_tx_dma_setup++;
1931 		return (error);
1932 	} else if (error != 0) {
1933 		adapter->no_tx_dma_setup++;
1934 		m_freem(*m_headp);
1935 		*m_headp = NULL;
1936 		return (error);
1937 	}
1938 
1939 	/*
1940 	 * TSO Hardware workaround, if this packet is not
1941 	 * TSO, and is only a single descriptor long, and
1942 	 * it follows a TSO burst, then we need to add a
1943 	 * sentinel descriptor to prevent premature writeback.
1944 	 */
1945 	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1946 		if (nsegs == 1)
1947 			tso_desc = TRUE;
1948 		txr->tx_tso = FALSE;
1949 	}
1950 
1951         if (nsegs > (txr->tx_avail - 2)) {
1952                 txr->no_desc_avail++;
1953 		bus_dmamap_unload(txr->txtag, map);
1954 		return (ENOBUFS);
1955         }
1956 	m_head = *m_headp;
1957 
1958 	/* Do hardware assists */
1959 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1960 		em_tso_setup(txr, m_head, ip_off, ip, tp,
1961 		    &txd_upper, &txd_lower);
1962 		/* we need to make a final sentinel transmit desc */
1963 		tso_desc = TRUE;
1964 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1965 		em_transmit_checksum_setup(txr, m_head,
1966 		    ip_off, ip, &txd_upper, &txd_lower);
1967 
1968 	i = txr->next_avail_desc;
1969 
1970 	/* Set up our transmit descriptors */
1971 	for (j = 0; j < nsegs; j++) {
1972 		bus_size_t seg_len;
1973 		bus_addr_t seg_addr;
1974 
1975 		tx_buffer = &txr->tx_buffers[i];
1976 		ctxd = &txr->tx_base[i];
1977 		seg_addr = segs[j].ds_addr;
1978 		seg_len  = segs[j].ds_len;
1979 		/*
1980 		** TSO Workaround:
1981 		** If this is the last descriptor, we want to
1982 		** split it so we have a small final sentinel
1983 		*/
1984 		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1985 			seg_len -= 4;
1986 			ctxd->buffer_addr = htole64(seg_addr);
1987 			ctxd->lower.data = htole32(
1988 			adapter->txd_cmd | txd_lower | seg_len);
1989 			ctxd->upper.data =
1990 			    htole32(txd_upper);
1991 			if (++i == adapter->num_tx_desc)
1992 				i = 0;
1993 			/* Now make the sentinel */
1994 			++txd_used; /* using an extra txd */
1995 			ctxd = &txr->tx_base[i];
1996 			tx_buffer = &txr->tx_buffers[i];
1997 			ctxd->buffer_addr =
1998 			    htole64(seg_addr + seg_len);
1999 			ctxd->lower.data = htole32(
2000 			adapter->txd_cmd | txd_lower | 4);
2001 			ctxd->upper.data =
2002 			    htole32(txd_upper);
2003 			last = i;
2004 			if (++i == adapter->num_tx_desc)
2005 				i = 0;
2006 		} else {
2007 			ctxd->buffer_addr = htole64(seg_addr);
2008 			ctxd->lower.data = htole32(
2009 			adapter->txd_cmd | txd_lower | seg_len);
2010 			ctxd->upper.data =
2011 			    htole32(txd_upper);
2012 			last = i;
2013 			if (++i == adapter->num_tx_desc)
2014 				i = 0;
2015 		}
2016 		tx_buffer->m_head = NULL;
2017 		tx_buffer->next_eop = -1;
2018 	}
2019 
2020 	txr->next_avail_desc = i;
2021 	txr->tx_avail -= nsegs;
2022 	if (tso_desc) /* TSO used an extra for sentinel */
2023 		txr->tx_avail -= txd_used;
2024 
2025 	if (m_head->m_flags & M_VLANTAG) {
2026 		/* Set the vlan id. */
2027 		ctxd->upper.fields.special =
2028 		    htole16(m_head->m_pkthdr.ether_vtag);
2029                 /* Tell hardware to add tag */
2030                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
2031         }
2032 
2033         tx_buffer->m_head = m_head;
2034 	tx_buffer_mapped->map = tx_buffer->map;
2035 	tx_buffer->map = map;
2036         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2037 
2038         /*
2039          * Last Descriptor of Packet
2040 	 * needs End Of Packet (EOP)
2041 	 * and Report Status (RS)
2042          */
2043         ctxd->lower.data |=
2044 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2045 	/*
2046 	 * Keep track in the first buffer which
2047 	 * descriptor will be written back
2048 	 */
2049 	tx_buffer = &txr->tx_buffers[first];
2050 	tx_buffer->next_eop = last;
2051 	/* Update the watchdog time early and often */
2052 	txr->watchdog_time = ticks;
2053 
2054 	/*
2055 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2056 	 * that this frame is available to transmit.
2057 	 */
2058 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2059 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2060 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2061 
2062 	return (0);
2063 }
2064 
2065 static void
2066 em_set_promisc(struct adapter *adapter)
2067 {
2068 	struct ifnet	*ifp = adapter->ifp;
2069 	u32		reg_rctl;
2070 
2071 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2072 
2073 	if (ifp->if_flags & IFF_PROMISC) {
2074 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2075 		/* Turn this on if you want to see bad packets */
2076 		if (em_debug_sbp)
2077 			reg_rctl |= E1000_RCTL_SBP;
2078 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079 	} else if (ifp->if_flags & IFF_ALLMULTI) {
2080 		reg_rctl |= E1000_RCTL_MPE;
2081 		reg_rctl &= ~E1000_RCTL_UPE;
2082 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2083 	}
2084 }
2085 
2086 static void
2087 em_disable_promisc(struct adapter *adapter)
2088 {
2089 	u32	reg_rctl;
2090 
2091 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2092 
2093 	reg_rctl &=  (~E1000_RCTL_UPE);
2094 	reg_rctl &=  (~E1000_RCTL_MPE);
2095 	reg_rctl &=  (~E1000_RCTL_SBP);
2096 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2097 }
2098 
2099 
2100 /*********************************************************************
2101  *  Multicast Update
2102  *
2103  *  This routine is called whenever multicast address list is updated.
2104  *
2105  **********************************************************************/
2106 
2107 static void
2108 em_set_multi(struct adapter *adapter)
2109 {
2110 	struct ifnet	*ifp = adapter->ifp;
2111 	struct ifmultiaddr *ifma;
2112 	u32 reg_rctl = 0;
2113 	u8  *mta; /* Multicast array memory */
2114 	int mcnt = 0;
2115 
2116 	IOCTL_DEBUGOUT("em_set_multi: begin");
2117 
2118 	mta = adapter->mta;
2119 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2120 
2121 	if (adapter->hw.mac.type == e1000_82542 &&
2122 	    adapter->hw.revision_id == E1000_REVISION_2) {
2123 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2125 			e1000_pci_clear_mwi(&adapter->hw);
2126 		reg_rctl |= E1000_RCTL_RST;
2127 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2128 		msec_delay(5);
2129 	}
2130 
2131 #if __FreeBSD_version < 800000
2132 	IF_ADDR_LOCK(ifp);
2133 #else
2134 	if_maddr_rlock(ifp);
2135 #endif
2136 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2137 		if (ifma->ifma_addr->sa_family != AF_LINK)
2138 			continue;
2139 
2140 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2141 			break;
2142 
2143 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2144 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2145 		mcnt++;
2146 	}
2147 #if __FreeBSD_version < 800000
2148 	IF_ADDR_UNLOCK(ifp);
2149 #else
2150 	if_maddr_runlock(ifp);
2151 #endif
2152 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2153 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2154 		reg_rctl |= E1000_RCTL_MPE;
2155 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2156 	} else
2157 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2158 
2159 	if (adapter->hw.mac.type == e1000_82542 &&
2160 	    adapter->hw.revision_id == E1000_REVISION_2) {
2161 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2162 		reg_rctl &= ~E1000_RCTL_RST;
2163 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2164 		msec_delay(5);
2165 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2166 			e1000_pci_set_mwi(&adapter->hw);
2167 	}
2168 }
2169 
2170 
2171 /*********************************************************************
2172  *  Timer routine
2173  *
2174  *  This routine checks for link status and updates statistics.
2175  *
2176  **********************************************************************/
2177 
2178 static void
2179 em_local_timer(void *arg)
2180 {
2181 	struct adapter	*adapter = arg;
2182 	struct ifnet	*ifp = adapter->ifp;
2183 	struct tx_ring	*txr = adapter->tx_rings;
2184 	struct rx_ring	*rxr = adapter->rx_rings;
2185 
2186 	EM_CORE_LOCK_ASSERT(adapter);
2187 
2188 	em_update_link_status(adapter);
2189 	em_update_stats_counters(adapter);
2190 
2191 	/* Reset LAA into RAR[0] on 82571 */
2192 	if ((adapter->hw.mac.type == e1000_82571) &&
2193 	    e1000_get_laa_state_82571(&adapter->hw))
2194 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2195 
2196 	/* trigger tq to refill rx ring queue if it is empty */
2197 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2198 		if (rxr->next_to_check == rxr->next_to_refresh) {
2199 			taskqueue_enqueue(rxr->tq, &rxr->rx_task);
2200 		}
2201 	}
2202 
2203 	/*
2204 	** Don't do TX watchdog check if we've been paused
2205 	*/
2206 	if (adapter->pause_frames) {
2207 		adapter->pause_frames = 0;
2208 		goto out;
2209 	}
2210 	/*
2211 	** Check on the state of the TX queue(s), this
2212 	** can be done without the lock because its RO
2213 	** and the HUNG state will be static if set.
2214 	*/
2215 	for (int i = 0; i < adapter->num_queues; i++, txr++)
2216 		if (txr->queue_status == EM_QUEUE_HUNG)
2217 			goto hung;
2218 out:
2219 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2220 	return;
2221 hung:
2222 	/* Looks like we're hung */
2223 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2224 	device_printf(adapter->dev,
2225 	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2226 	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2227 	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2228 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2229 	    "Next TX to Clean = %d\n",
2230 	    txr->me, txr->tx_avail, txr->next_to_clean);
2231 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2232 	adapter->watchdog_events++;
2233 	em_init_locked(adapter);
2234 }
2235 
2236 
2237 static void
2238 em_update_link_status(struct adapter *adapter)
2239 {
2240 	struct e1000_hw *hw = &adapter->hw;
2241 	struct ifnet *ifp = adapter->ifp;
2242 	device_t dev = adapter->dev;
2243 	struct tx_ring *txr = adapter->tx_rings;
2244 	u32 link_check = 0;
2245 
2246 	/* Get the cached link value or read phy for real */
2247 	switch (hw->phy.media_type) {
2248 	case e1000_media_type_copper:
2249 		if (hw->mac.get_link_status) {
2250 			/* Do the work to read phy */
2251 			e1000_check_for_link(hw);
2252 			link_check = !hw->mac.get_link_status;
2253 			if (link_check) /* ESB2 fix */
2254 				e1000_cfg_on_link_up(hw);
2255 		} else
2256 			link_check = TRUE;
2257 		break;
2258 	case e1000_media_type_fiber:
2259 		e1000_check_for_link(hw);
2260 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2261                                  E1000_STATUS_LU);
2262 		break;
2263 	case e1000_media_type_internal_serdes:
2264 		e1000_check_for_link(hw);
2265 		link_check = adapter->hw.mac.serdes_has_link;
2266 		break;
2267 	default:
2268 	case e1000_media_type_unknown:
2269 		break;
2270 	}
2271 
2272 	/* Now check for a transition */
2273 	if (link_check && (adapter->link_active == 0)) {
2274 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2275 		    &adapter->link_duplex);
2276 		/* Check if we must disable SPEED_MODE bit on PCI-E */
2277 		if ((adapter->link_speed != SPEED_1000) &&
2278 		    ((hw->mac.type == e1000_82571) ||
2279 		    (hw->mac.type == e1000_82572))) {
2280 			int tarc0;
2281 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2282 			tarc0 &= ~SPEED_MODE_BIT;
2283 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2284 		}
2285 		if (bootverbose)
2286 			device_printf(dev, "Link is up %d Mbps %s\n",
2287 			    adapter->link_speed,
2288 			    ((adapter->link_duplex == FULL_DUPLEX) ?
2289 			    "Full Duplex" : "Half Duplex"));
2290 		adapter->link_active = 1;
2291 		adapter->smartspeed = 0;
2292 		ifp->if_baudrate = adapter->link_speed * 1000000;
2293 		if_link_state_change(ifp, LINK_STATE_UP);
2294 	} else if (!link_check && (adapter->link_active == 1)) {
2295 		ifp->if_baudrate = adapter->link_speed = 0;
2296 		adapter->link_duplex = 0;
2297 		if (bootverbose)
2298 			device_printf(dev, "Link is Down\n");
2299 		adapter->link_active = 0;
2300 		/* Link down, disable watchdog */
2301 		for (int i = 0; i < adapter->num_queues; i++, txr++)
2302 			txr->queue_status = EM_QUEUE_IDLE;
2303 		if_link_state_change(ifp, LINK_STATE_DOWN);
2304 	}
2305 }
2306 
2307 /*********************************************************************
2308  *
2309  *  This routine disables all traffic on the adapter by issuing a
2310  *  global reset on the MAC and deallocates TX/RX buffers.
2311  *
2312  *  This routine should always be called with BOTH the CORE
2313  *  and TX locks.
2314  **********************************************************************/
2315 
2316 static void
2317 em_stop(void *arg)
2318 {
2319 	struct adapter	*adapter = arg;
2320 	struct ifnet	*ifp = adapter->ifp;
2321 	struct tx_ring	*txr = adapter->tx_rings;
2322 
2323 	EM_CORE_LOCK_ASSERT(adapter);
2324 
2325 	INIT_DEBUGOUT("em_stop: begin");
2326 
2327 	em_disable_intr(adapter);
2328 	callout_stop(&adapter->timer);
2329 
2330 	/* Tell the stack that the interface is no longer active */
2331 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2332 
2333         /* Unarm watchdog timer. */
2334 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2335 		EM_TX_LOCK(txr);
2336 		txr->queue_status = EM_QUEUE_IDLE;
2337 		EM_TX_UNLOCK(txr);
2338 	}
2339 
2340 	e1000_reset_hw(&adapter->hw);
2341 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2342 
2343 	e1000_led_off(&adapter->hw);
2344 	e1000_cleanup_led(&adapter->hw);
2345 }
2346 
2347 
2348 /*********************************************************************
2349  *
2350  *  Determine hardware revision.
2351  *
2352  **********************************************************************/
2353 static void
2354 em_identify_hardware(struct adapter *adapter)
2355 {
2356 	device_t dev = adapter->dev;
2357 
2358 	/* Make sure our PCI config space has the necessary stuff set */
2359 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2360 	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2361 	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2362 		device_printf(dev, "Memory Access and/or Bus Master bits "
2363 		    "were not set!\n");
2364 		adapter->hw.bus.pci_cmd_word |=
2365 		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2366 		pci_write_config(dev, PCIR_COMMAND,
2367 		    adapter->hw.bus.pci_cmd_word, 2);
2368 	}
2369 
2370 	/* Save off the information about this board */
2371 	adapter->hw.vendor_id = pci_get_vendor(dev);
2372 	adapter->hw.device_id = pci_get_device(dev);
2373 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2374 	adapter->hw.subsystem_vendor_id =
2375 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2376 	adapter->hw.subsystem_device_id =
2377 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2378 
2379 	/* Do Shared Code Init and Setup */
2380 	if (e1000_set_mac_type(&adapter->hw)) {
2381 		device_printf(dev, "Setup init failure\n");
2382 		return;
2383 	}
2384 }
2385 
2386 static int
2387 em_allocate_pci_resources(struct adapter *adapter)
2388 {
2389 	device_t	dev = adapter->dev;
2390 	int		rid;
2391 
2392 	rid = PCIR_BAR(0);
2393 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2394 	    &rid, RF_ACTIVE);
2395 	if (adapter->memory == NULL) {
2396 		device_printf(dev, "Unable to allocate bus resource: memory\n");
2397 		return (ENXIO);
2398 	}
2399 	adapter->osdep.mem_bus_space_tag =
2400 	    rman_get_bustag(adapter->memory);
2401 	adapter->osdep.mem_bus_space_handle =
2402 	    rman_get_bushandle(adapter->memory);
2403 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2404 
2405 	/* Default to a single queue */
2406 	adapter->num_queues = 1;
2407 
2408 	/*
2409 	 * Setup MSI/X or MSI if PCI Express
2410 	 */
2411 	adapter->msix = em_setup_msix(adapter);
2412 
2413 	adapter->hw.back = &adapter->osdep;
2414 
2415 	return (0);
2416 }
2417 
2418 /*********************************************************************
2419  *
2420  *  Setup the Legacy or MSI Interrupt handler
2421  *
2422  **********************************************************************/
2423 int
2424 em_allocate_legacy(struct adapter *adapter)
2425 {
2426 	device_t dev = adapter->dev;
2427 	int error, rid = 0;
2428 
2429 	/* Manually turn off all interrupts */
2430 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2431 
2432 	if (adapter->msix == 1) /* using MSI */
2433 		rid = 1;
2434 	/* We allocate a single interrupt resource */
2435 	adapter->res = bus_alloc_resource_any(dev,
2436 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2437 	if (adapter->res == NULL) {
2438 		device_printf(dev, "Unable to allocate bus resource: "
2439 		    "interrupt\n");
2440 		return (ENXIO);
2441 	}
2442 
2443 	/*
2444 	 * Allocate a fast interrupt and the associated
2445 	 * deferred processing contexts.
2446 	 */
2447 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2448 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2449 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2450 	    taskqueue_thread_enqueue, &adapter->tq);
2451 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2452 	    device_get_nameunit(adapter->dev));
2453 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2454 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2455 		device_printf(dev, "Failed to register fast interrupt "
2456 			    "handler: %d\n", error);
2457 		taskqueue_free(adapter->tq);
2458 		adapter->tq = NULL;
2459 		return (error);
2460 	}
2461 
2462 	return (0);
2463 }
2464 
2465 /*********************************************************************
2466  *
2467  *  Setup the MSIX Interrupt handlers
2468  *   This is not really Multiqueue, rather
2469  *   its just multiple interrupt vectors.
2470  *
2471  **********************************************************************/
2472 int
2473 em_allocate_msix(struct adapter *adapter)
2474 {
2475 	device_t	dev = adapter->dev;
2476 	struct		tx_ring *txr = adapter->tx_rings;
2477 	struct		rx_ring *rxr = adapter->rx_rings;
2478 	int		error, rid, vector = 0;
2479 
2480 
2481 	/* Make sure all interrupts are disabled */
2482 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2483 
2484 	/* First set up ring resources */
2485 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2486 
2487 		/* RX ring */
2488 		rid = vector + 1;
2489 
2490 		rxr->res = bus_alloc_resource_any(dev,
2491 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2492 		if (rxr->res == NULL) {
2493 			device_printf(dev,
2494 			    "Unable to allocate bus resource: "
2495 			    "RX MSIX Interrupt %d\n", i);
2496 			return (ENXIO);
2497 		}
2498 		if ((error = bus_setup_intr(dev, rxr->res,
2499 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2500 		    rxr, &rxr->tag)) != 0) {
2501 			device_printf(dev, "Failed to register RX handler");
2502 			return (error);
2503 		}
2504 #if __FreeBSD_version >= 800504
2505 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2506 #endif
2507 		rxr->msix = vector++; /* NOTE increment vector for TX */
2508 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2509 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2510 		    taskqueue_thread_enqueue, &rxr->tq);
2511 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2512 		    device_get_nameunit(adapter->dev));
2513 		/*
2514 		** Set the bit to enable interrupt
2515 		** in E1000_IMS -- bits 20 and 21
2516 		** are for RX0 and RX1, note this has
2517 		** NOTHING to do with the MSIX vector
2518 		*/
2519 		rxr->ims = 1 << (20 + i);
2520 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2521 
2522 		/* TX ring */
2523 		rid = vector + 1;
2524 		txr->res = bus_alloc_resource_any(dev,
2525 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2526 		if (txr->res == NULL) {
2527 			device_printf(dev,
2528 			    "Unable to allocate bus resource: "
2529 			    "TX MSIX Interrupt %d\n", i);
2530 			return (ENXIO);
2531 		}
2532 		if ((error = bus_setup_intr(dev, txr->res,
2533 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2534 		    txr, &txr->tag)) != 0) {
2535 			device_printf(dev, "Failed to register TX handler");
2536 			return (error);
2537 		}
2538 #if __FreeBSD_version >= 800504
2539 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2540 #endif
2541 		txr->msix = vector++; /* Increment vector for next pass */
2542 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2543 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2544 		    taskqueue_thread_enqueue, &txr->tq);
2545 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2546 		    device_get_nameunit(adapter->dev));
2547 		/*
2548 		** Set the bit to enable interrupt
2549 		** in E1000_IMS -- bits 22 and 23
2550 		** are for TX0 and TX1, note this has
2551 		** NOTHING to do with the MSIX vector
2552 		*/
2553 		txr->ims = 1 << (22 + i);
2554 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2555 	}
2556 
2557 	/* Link interrupt */
2558 	++rid;
2559 	adapter->res = bus_alloc_resource_any(dev,
2560 	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2561 	if (!adapter->res) {
2562 		device_printf(dev,"Unable to allocate "
2563 		    "bus resource: Link interrupt [%d]\n", rid);
2564 		return (ENXIO);
2565         }
2566 	/* Set the link handler function */
2567 	error = bus_setup_intr(dev, adapter->res,
2568 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2569 	    em_msix_link, adapter, &adapter->tag);
2570 	if (error) {
2571 		adapter->res = NULL;
2572 		device_printf(dev, "Failed to register LINK handler");
2573 		return (error);
2574 	}
2575 #if __FreeBSD_version >= 800504
2576 		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2577 #endif
2578 	adapter->linkvec = vector;
2579 	adapter->ivars |=  (8 | vector) << 16;
2580 	adapter->ivars |= 0x80000000;
2581 
2582 	return (0);
2583 }
2584 
2585 
2586 static void
2587 em_free_pci_resources(struct adapter *adapter)
2588 {
2589 	device_t	dev = adapter->dev;
2590 	struct tx_ring	*txr;
2591 	struct rx_ring	*rxr;
2592 	int		rid;
2593 
2594 
2595 	/*
2596 	** Release all the queue interrupt resources:
2597 	*/
2598 	for (int i = 0; i < adapter->num_queues; i++) {
2599 		txr = &adapter->tx_rings[i];
2600 		rxr = &adapter->rx_rings[i];
2601 		/* an early abort? */
2602 		if ((txr == NULL) || (rxr == NULL))
2603 			break;
2604 		rid = txr->msix +1;
2605 		if (txr->tag != NULL) {
2606 			bus_teardown_intr(dev, txr->res, txr->tag);
2607 			txr->tag = NULL;
2608 		}
2609 		if (txr->res != NULL)
2610 			bus_release_resource(dev, SYS_RES_IRQ,
2611 			    rid, txr->res);
2612 		rid = rxr->msix +1;
2613 		if (rxr->tag != NULL) {
2614 			bus_teardown_intr(dev, rxr->res, rxr->tag);
2615 			rxr->tag = NULL;
2616 		}
2617 		if (rxr->res != NULL)
2618 			bus_release_resource(dev, SYS_RES_IRQ,
2619 			    rid, rxr->res);
2620 	}
2621 
2622         if (adapter->linkvec) /* we are doing MSIX */
2623                 rid = adapter->linkvec + 1;
2624         else
2625                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2626 
2627 	if (adapter->tag != NULL) {
2628 		bus_teardown_intr(dev, adapter->res, adapter->tag);
2629 		adapter->tag = NULL;
2630 	}
2631 
2632 	if (adapter->res != NULL)
2633 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2634 
2635 
2636 	if (adapter->msix)
2637 		pci_release_msi(dev);
2638 
2639 	if (adapter->msix_mem != NULL)
2640 		bus_release_resource(dev, SYS_RES_MEMORY,
2641 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2642 
2643 	if (adapter->memory != NULL)
2644 		bus_release_resource(dev, SYS_RES_MEMORY,
2645 		    PCIR_BAR(0), adapter->memory);
2646 
2647 	if (adapter->flash != NULL)
2648 		bus_release_resource(dev, SYS_RES_MEMORY,
2649 		    EM_FLASH, adapter->flash);
2650 }
2651 
2652 /*
2653  * Setup MSI or MSI/X
2654  */
2655 static int
2656 em_setup_msix(struct adapter *adapter)
2657 {
2658 	device_t dev = adapter->dev;
2659 	int val = 0;
2660 
2661 
2662 	/*
2663 	** Setup MSI/X for Hartwell: tests have shown
2664 	** use of two queues to be unstable, and to
2665 	** provide no great gain anyway, so we simply
2666 	** seperate the interrupts and use a single queue.
2667 	*/
2668 	if ((adapter->hw.mac.type == e1000_82574) &&
2669 	    (em_enable_msix == TRUE)) {
2670 		/* Map the MSIX BAR */
2671 		int rid = PCIR_BAR(EM_MSIX_BAR);
2672 		adapter->msix_mem = bus_alloc_resource_any(dev,
2673 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2674        		if (!adapter->msix_mem) {
2675 			/* May not be enabled */
2676                		device_printf(adapter->dev,
2677 			    "Unable to map MSIX table \n");
2678 			goto msi;
2679        		}
2680 		val = pci_msix_count(dev);
2681 		if (val < 3) {
2682 			bus_release_resource(dev, SYS_RES_MEMORY,
2683 			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2684 			adapter->msix_mem = NULL;
2685                		device_printf(adapter->dev,
2686 			    "MSIX: insufficient vectors, using MSI\n");
2687 			goto msi;
2688 		}
2689 		val = 3;
2690 		adapter->num_queues = 1;
2691 		if (pci_alloc_msix(dev, &val) == 0) {
2692 			device_printf(adapter->dev,
2693 			    "Using MSIX interrupts "
2694 			    "with %d vectors\n", val);
2695 		}
2696 
2697 		return (val);
2698 	}
2699 msi:
2700        	val = pci_msi_count(dev);
2701        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2702                	adapter->msix = 1;
2703                	device_printf(adapter->dev,"Using an MSI interrupt\n");
2704 		return (val);
2705 	}
2706 	/* Should only happen due to manual configuration */
2707 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2708 	return (0);
2709 }
2710 
2711 
2712 /*********************************************************************
2713  *
2714  *  Initialize the hardware to a configuration
2715  *  as specified by the adapter structure.
2716  *
2717  **********************************************************************/
2718 static void
2719 em_reset(struct adapter *adapter)
2720 {
2721 	device_t	dev = adapter->dev;
2722 	struct ifnet	*ifp = adapter->ifp;
2723 	struct e1000_hw	*hw = &adapter->hw;
2724 	u16		rx_buffer_size;
2725 
2726 	INIT_DEBUGOUT("em_reset: begin");
2727 
2728 	/* Set up smart power down as default off on newer adapters. */
2729 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2730 	    hw->mac.type == e1000_82572)) {
2731 		u16 phy_tmp = 0;
2732 
2733 		/* Speed up time to link by disabling smart power down. */
2734 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2735 		phy_tmp &= ~IGP02E1000_PM_SPD;
2736 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2737 	}
2738 
2739 	/*
2740 	 * These parameters control the automatic generation (Tx) and
2741 	 * response (Rx) to Ethernet PAUSE frames.
2742 	 * - High water mark should allow for at least two frames to be
2743 	 *   received after sending an XOFF.
2744 	 * - Low water mark works best when it is very near the high water mark.
2745 	 *   This allows the receiver to restart by sending XON when it has
2746 	 *   drained a bit. Here we use an arbitary value of 1500 which will
2747 	 *   restart after one full frame is pulled from the buffer. There
2748 	 *   could be several smaller frames in the buffer and if so they will
2749 	 *   not trigger the XON until their total number reduces the buffer
2750 	 *   by 1500.
2751 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2752 	 */
2753 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2754 
2755 	hw->fc.high_water = rx_buffer_size -
2756 	    roundup2(adapter->max_frame_size, 1024);
2757 	hw->fc.low_water = hw->fc.high_water - 1500;
2758 
2759 	if (hw->mac.type == e1000_80003es2lan)
2760 		hw->fc.pause_time = 0xFFFF;
2761 	else
2762 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2763 
2764 	hw->fc.send_xon = TRUE;
2765 
2766         /* Set Flow control, use the tunable location if sane */
2767 	hw->fc.requested_mode = adapter->fc_setting;
2768 
2769 	/* Workaround: no TX flow ctrl for PCH */
2770 	if (hw->mac.type == e1000_pchlan)
2771                 hw->fc.requested_mode = e1000_fc_rx_pause;
2772 
2773 	/* Override - settings for PCH2LAN, ya its magic :) */
2774 	if (hw->mac.type == e1000_pch2lan) {
2775 		hw->fc.high_water = 0x5C20;
2776 		hw->fc.low_water = 0x5048;
2777 		hw->fc.pause_time = 0x0650;
2778 		hw->fc.refresh_time = 0x0400;
2779 		/* Jumbos need adjusted PBA */
2780 		if (ifp->if_mtu > ETHERMTU)
2781 			E1000_WRITE_REG(hw, E1000_PBA, 12);
2782 		else
2783 			E1000_WRITE_REG(hw, E1000_PBA, 26);
2784 	}
2785 
2786 	/* Issue a global reset */
2787 	e1000_reset_hw(hw);
2788 	E1000_WRITE_REG(hw, E1000_WUC, 0);
2789 	em_disable_aspm(adapter);
2790 
2791 	if (e1000_init_hw(hw) < 0) {
2792 		device_printf(dev, "Hardware Initialization Failed\n");
2793 		return;
2794 	}
2795 
2796 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2797 	e1000_get_phy_info(hw);
2798 	e1000_check_for_link(hw);
2799 	return;
2800 }
2801 
2802 /*********************************************************************
2803  *
2804  *  Setup networking device structure and register an interface.
2805  *
2806  **********************************************************************/
2807 static int
2808 em_setup_interface(device_t dev, struct adapter *adapter)
2809 {
2810 	struct ifnet   *ifp;
2811 
2812 	INIT_DEBUGOUT("em_setup_interface: begin");
2813 
2814 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2815 	if (ifp == NULL) {
2816 		device_printf(dev, "can not allocate ifnet structure\n");
2817 		return (-1);
2818 	}
2819 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2820 	ifp->if_mtu = ETHERMTU;
2821 	ifp->if_init =  em_init;
2822 	ifp->if_softc = adapter;
2823 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2824 	ifp->if_ioctl = em_ioctl;
2825 	ifp->if_start = em_start;
2826 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2827 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2828 	IFQ_SET_READY(&ifp->if_snd);
2829 
2830 	ether_ifattach(ifp, adapter->hw.mac.addr);
2831 
2832 	ifp->if_capabilities = ifp->if_capenable = 0;
2833 
2834 #ifdef EM_MULTIQUEUE
2835 	/* Multiqueue tx functions */
2836 	ifp->if_transmit = em_mq_start;
2837 	ifp->if_qflush = em_qflush;
2838 #endif
2839 
2840 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2841 	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2842 
2843 	/* Enable TSO by default, can disable with ifconfig */
2844 	ifp->if_capabilities |= IFCAP_TSO4;
2845 	ifp->if_capenable |= IFCAP_TSO4;
2846 
2847 	/*
2848 	 * Tell the upper layer(s) we
2849 	 * support full VLAN capability
2850 	 */
2851 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2852 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2853 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2854 
2855 	/*
2856 	** Dont turn this on by default, if vlans are
2857 	** created on another pseudo device (eg. lagg)
2858 	** then vlan events are not passed thru, breaking
2859 	** operation, but with HW FILTER off it works. If
2860 	** using vlans directly on the em driver you can
2861 	** enable this and get full hardware tag filtering.
2862 	*/
2863 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2864 
2865 #ifdef DEVICE_POLLING
2866 	ifp->if_capabilities |= IFCAP_POLLING;
2867 #endif
2868 
2869 	/* Enable only WOL MAGIC by default */
2870 	if (adapter->wol) {
2871 		ifp->if_capabilities |= IFCAP_WOL;
2872 		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2873 	}
2874 
2875 	/*
2876 	 * Specify the media types supported by this adapter and register
2877 	 * callbacks to update media and link information
2878 	 */
2879 	ifmedia_init(&adapter->media, IFM_IMASK,
2880 	    em_media_change, em_media_status);
2881 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2882 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2883 		u_char fiber_type = IFM_1000_SX;	/* default type */
2884 
2885 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2886 			    0, NULL);
2887 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2888 	} else {
2889 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2890 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2891 			    0, NULL);
2892 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2893 			    0, NULL);
2894 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2895 			    0, NULL);
2896 		if (adapter->hw.phy.type != e1000_phy_ife) {
2897 			ifmedia_add(&adapter->media,
2898 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2899 			ifmedia_add(&adapter->media,
2900 				IFM_ETHER | IFM_1000_T, 0, NULL);
2901 		}
2902 	}
2903 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2904 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2905 	return (0);
2906 }
2907 
2908 
2909 /*
2910  * Manage DMA'able memory.
2911  */
2912 static void
2913 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2914 {
2915 	if (error)
2916 		return;
2917 	*(bus_addr_t *) arg = segs[0].ds_addr;
2918 }
2919 
2920 static int
2921 em_dma_malloc(struct adapter *adapter, bus_size_t size,
2922         struct em_dma_alloc *dma, int mapflags)
2923 {
2924 	int error;
2925 
2926 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2927 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2928 				BUS_SPACE_MAXADDR,	/* lowaddr */
2929 				BUS_SPACE_MAXADDR,	/* highaddr */
2930 				NULL, NULL,		/* filter, filterarg */
2931 				size,			/* maxsize */
2932 				1,			/* nsegments */
2933 				size,			/* maxsegsize */
2934 				0,			/* flags */
2935 				NULL,			/* lockfunc */
2936 				NULL,			/* lockarg */
2937 				&dma->dma_tag);
2938 	if (error) {
2939 		device_printf(adapter->dev,
2940 		    "%s: bus_dma_tag_create failed: %d\n",
2941 		    __func__, error);
2942 		goto fail_0;
2943 	}
2944 
2945 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2946 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2947 	if (error) {
2948 		device_printf(adapter->dev,
2949 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2950 		    __func__, (uintmax_t)size, error);
2951 		goto fail_2;
2952 	}
2953 
2954 	dma->dma_paddr = 0;
2955 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2956 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2957 	if (error || dma->dma_paddr == 0) {
2958 		device_printf(adapter->dev,
2959 		    "%s: bus_dmamap_load failed: %d\n",
2960 		    __func__, error);
2961 		goto fail_3;
2962 	}
2963 
2964 	return (0);
2965 
2966 fail_3:
2967 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2968 fail_2:
2969 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2970 	bus_dma_tag_destroy(dma->dma_tag);
2971 fail_0:
2972 	dma->dma_map = NULL;
2973 	dma->dma_tag = NULL;
2974 
2975 	return (error);
2976 }
2977 
2978 static void
2979 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2980 {
2981 	if (dma->dma_tag == NULL)
2982 		return;
2983 	if (dma->dma_map != NULL) {
2984 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2985 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2986 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2987 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2988 		dma->dma_map = NULL;
2989 	}
2990 	bus_dma_tag_destroy(dma->dma_tag);
2991 	dma->dma_tag = NULL;
2992 }
2993 
2994 
2995 /*********************************************************************
2996  *
2997  *  Allocate memory for the transmit and receive rings, and then
2998  *  the descriptors associated with each, called only once at attach.
2999  *
3000  **********************************************************************/
3001 static int
3002 em_allocate_queues(struct adapter *adapter)
3003 {
3004 	device_t		dev = adapter->dev;
3005 	struct tx_ring		*txr = NULL;
3006 	struct rx_ring		*rxr = NULL;
3007 	int rsize, tsize, error = E1000_SUCCESS;
3008 	int txconf = 0, rxconf = 0;
3009 
3010 
3011 	/* Allocate the TX ring struct memory */
3012 	if (!(adapter->tx_rings =
3013 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3014 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3015 		device_printf(dev, "Unable to allocate TX ring memory\n");
3016 		error = ENOMEM;
3017 		goto fail;
3018 	}
3019 
3020 	/* Now allocate the RX */
3021 	if (!(adapter->rx_rings =
3022 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3023 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3024 		device_printf(dev, "Unable to allocate RX ring memory\n");
3025 		error = ENOMEM;
3026 		goto rx_fail;
3027 	}
3028 
3029 	tsize = roundup2(adapter->num_tx_desc *
3030 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3031 	/*
3032 	 * Now set up the TX queues, txconf is needed to handle the
3033 	 * possibility that things fail midcourse and we need to
3034 	 * undo memory gracefully
3035 	 */
3036 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3037 		/* Set up some basics */
3038 		txr = &adapter->tx_rings[i];
3039 		txr->adapter = adapter;
3040 		txr->me = i;
3041 
3042 		/* Initialize the TX lock */
3043 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3044 		    device_get_nameunit(dev), txr->me);
3045 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3046 
3047 		if (em_dma_malloc(adapter, tsize,
3048 			&txr->txdma, BUS_DMA_NOWAIT)) {
3049 			device_printf(dev,
3050 			    "Unable to allocate TX Descriptor memory\n");
3051 			error = ENOMEM;
3052 			goto err_tx_desc;
3053 		}
3054 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3055 		bzero((void *)txr->tx_base, tsize);
3056 
3057         	if (em_allocate_transmit_buffers(txr)) {
3058 			device_printf(dev,
3059 			    "Critical Failure setting up transmit buffers\n");
3060 			error = ENOMEM;
3061 			goto err_tx_desc;
3062         	}
3063 #if __FreeBSD_version >= 800000
3064 		/* Allocate a buf ring */
3065 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3066 		    M_WAITOK, &txr->tx_mtx);
3067 #endif
3068 	}
3069 
3070 	/*
3071 	 * Next the RX queues...
3072 	 */
3073 	rsize = roundup2(adapter->num_rx_desc *
3074 	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3075 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3076 		rxr = &adapter->rx_rings[i];
3077 		rxr->adapter = adapter;
3078 		rxr->me = i;
3079 
3080 		/* Initialize the RX lock */
3081 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3082 		    device_get_nameunit(dev), txr->me);
3083 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3084 
3085 		if (em_dma_malloc(adapter, rsize,
3086 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3087 			device_printf(dev,
3088 			    "Unable to allocate RxDescriptor memory\n");
3089 			error = ENOMEM;
3090 			goto err_rx_desc;
3091 		}
3092 		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3093 		bzero((void *)rxr->rx_base, rsize);
3094 
3095         	/* Allocate receive buffers for the ring*/
3096 		if (em_allocate_receive_buffers(rxr)) {
3097 			device_printf(dev,
3098 			    "Critical Failure setting up receive buffers\n");
3099 			error = ENOMEM;
3100 			goto err_rx_desc;
3101 		}
3102 	}
3103 
3104 	return (0);
3105 
3106 err_rx_desc:
3107 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3108 		em_dma_free(adapter, &rxr->rxdma);
3109 err_tx_desc:
3110 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3111 		em_dma_free(adapter, &txr->txdma);
3112 	free(adapter->rx_rings, M_DEVBUF);
3113 rx_fail:
3114 #if __FreeBSD_version >= 800000
3115 	buf_ring_free(txr->br, M_DEVBUF);
3116 #endif
3117 	free(adapter->tx_rings, M_DEVBUF);
3118 fail:
3119 	return (error);
3120 }
3121 
3122 
3123 /*********************************************************************
3124  *
3125  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3126  *  the information needed to transmit a packet on the wire. This is
3127  *  called only once at attach, setup is done every reset.
3128  *
3129  **********************************************************************/
3130 static int
3131 em_allocate_transmit_buffers(struct tx_ring *txr)
3132 {
3133 	struct adapter *adapter = txr->adapter;
3134 	device_t dev = adapter->dev;
3135 	struct em_buffer *txbuf;
3136 	int error, i;
3137 
3138 	/*
3139 	 * Setup DMA descriptor areas.
3140 	 */
3141 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3142 			       1, 0,			/* alignment, bounds */
3143 			       BUS_SPACE_MAXADDR,	/* lowaddr */
3144 			       BUS_SPACE_MAXADDR,	/* highaddr */
3145 			       NULL, NULL,		/* filter, filterarg */
3146 			       EM_TSO_SIZE,		/* maxsize */
3147 			       EM_MAX_SCATTER,		/* nsegments */
3148 			       PAGE_SIZE,		/* maxsegsize */
3149 			       0,			/* flags */
3150 			       NULL,			/* lockfunc */
3151 			       NULL,			/* lockfuncarg */
3152 			       &txr->txtag))) {
3153 		device_printf(dev,"Unable to allocate TX DMA tag\n");
3154 		goto fail;
3155 	}
3156 
3157 	if (!(txr->tx_buffers =
3158 	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3159 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3160 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3161 		error = ENOMEM;
3162 		goto fail;
3163 	}
3164 
3165         /* Create the descriptor buffer dma maps */
3166 	txbuf = txr->tx_buffers;
3167 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3168 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3169 		if (error != 0) {
3170 			device_printf(dev, "Unable to create TX DMA map\n");
3171 			goto fail;
3172 		}
3173 	}
3174 
3175 	return 0;
3176 fail:
3177 	/* We free all, it handles case where we are in the middle */
3178 	em_free_transmit_structures(adapter);
3179 	return (error);
3180 }
3181 
3182 /*********************************************************************
3183  *
3184  *  Initialize a transmit ring.
3185  *
3186  **********************************************************************/
3187 static void
3188 em_setup_transmit_ring(struct tx_ring *txr)
3189 {
3190 	struct adapter *adapter = txr->adapter;
3191 	struct em_buffer *txbuf;
3192 	int i;
3193 
3194 	/* Clear the old descriptor contents */
3195 	EM_TX_LOCK(txr);
3196 	bzero((void *)txr->tx_base,
3197 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3198 	/* Reset indices */
3199 	txr->next_avail_desc = 0;
3200 	txr->next_to_clean = 0;
3201 
3202 	/* Free any existing tx buffers. */
3203         txbuf = txr->tx_buffers;
3204 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3205 		if (txbuf->m_head != NULL) {
3206 			bus_dmamap_sync(txr->txtag, txbuf->map,
3207 			    BUS_DMASYNC_POSTWRITE);
3208 			bus_dmamap_unload(txr->txtag, txbuf->map);
3209 			m_freem(txbuf->m_head);
3210 			txbuf->m_head = NULL;
3211 		}
3212 		/* clear the watch index */
3213 		txbuf->next_eop = -1;
3214         }
3215 
3216 	/* Set number of descriptors available */
3217 	txr->tx_avail = adapter->num_tx_desc;
3218 	txr->queue_status = EM_QUEUE_IDLE;
3219 
3220 	/* Clear checksum offload context. */
3221 	txr->last_hw_offload = 0;
3222 	txr->last_hw_ipcss = 0;
3223 	txr->last_hw_ipcso = 0;
3224 	txr->last_hw_tucss = 0;
3225 	txr->last_hw_tucso = 0;
3226 
3227 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3228 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3229 	EM_TX_UNLOCK(txr);
3230 }
3231 
3232 /*********************************************************************
3233  *
3234  *  Initialize all transmit rings.
3235  *
3236  **********************************************************************/
3237 static void
3238 em_setup_transmit_structures(struct adapter *adapter)
3239 {
3240 	struct tx_ring *txr = adapter->tx_rings;
3241 
3242 	for (int i = 0; i < adapter->num_queues; i++, txr++)
3243 		em_setup_transmit_ring(txr);
3244 
3245 	return;
3246 }
3247 
3248 /*********************************************************************
3249  *
3250  *  Enable transmit unit.
3251  *
3252  **********************************************************************/
3253 static void
3254 em_initialize_transmit_unit(struct adapter *adapter)
3255 {
3256 	struct tx_ring	*txr = adapter->tx_rings;
3257 	struct e1000_hw	*hw = &adapter->hw;
3258 	u32	tctl, tarc, tipg = 0;
3259 
3260 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3261 
3262 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3263 		u64 bus_addr = txr->txdma.dma_paddr;
3264 		/* Base and Len of TX Ring */
3265 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3266 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3267 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3268 	    	    (u32)(bus_addr >> 32));
3269 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3270 	    	    (u32)bus_addr);
3271 		/* Init the HEAD/TAIL indices */
3272 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3273 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3274 
3275 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3276 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3277 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3278 
3279 		txr->queue_status = EM_QUEUE_IDLE;
3280 	}
3281 
3282 	/* Set the default values for the Tx Inter Packet Gap timer */
3283 	switch (adapter->hw.mac.type) {
3284 	case e1000_82542:
3285 		tipg = DEFAULT_82542_TIPG_IPGT;
3286 		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3287 		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3288 		break;
3289 	case e1000_80003es2lan:
3290 		tipg = DEFAULT_82543_TIPG_IPGR1;
3291 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3292 		    E1000_TIPG_IPGR2_SHIFT;
3293 		break;
3294 	default:
3295 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3296 		    (adapter->hw.phy.media_type ==
3297 		    e1000_media_type_internal_serdes))
3298 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3299 		else
3300 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3301 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3302 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3303 	}
3304 
3305 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3306 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3307 
3308 	if(adapter->hw.mac.type >= e1000_82540)
3309 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3310 		    adapter->tx_abs_int_delay.value);
3311 
3312 	if ((adapter->hw.mac.type == e1000_82571) ||
3313 	    (adapter->hw.mac.type == e1000_82572)) {
3314 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3315 		tarc |= SPEED_MODE_BIT;
3316 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3317 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3318 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3319 		tarc |= 1;
3320 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3321 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3322 		tarc |= 1;
3323 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3324 	}
3325 
3326 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3327 	if (adapter->tx_int_delay.value > 0)
3328 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3329 
3330 	/* Program the Transmit Control Register */
3331 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3332 	tctl &= ~E1000_TCTL_CT;
3333 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3334 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3335 
3336 	if (adapter->hw.mac.type >= e1000_82571)
3337 		tctl |= E1000_TCTL_MULR;
3338 
3339 	/* This write will effectively turn on the transmit unit. */
3340 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3341 
3342 }
3343 
3344 
3345 /*********************************************************************
3346  *
3347  *  Free all transmit rings.
3348  *
3349  **********************************************************************/
3350 static void
3351 em_free_transmit_structures(struct adapter *adapter)
3352 {
3353 	struct tx_ring *txr = adapter->tx_rings;
3354 
3355 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3356 		EM_TX_LOCK(txr);
3357 		em_free_transmit_buffers(txr);
3358 		em_dma_free(adapter, &txr->txdma);
3359 		EM_TX_UNLOCK(txr);
3360 		EM_TX_LOCK_DESTROY(txr);
3361 	}
3362 
3363 	free(adapter->tx_rings, M_DEVBUF);
3364 }
3365 
3366 /*********************************************************************
3367  *
3368  *  Free transmit ring related data structures.
3369  *
3370  **********************************************************************/
3371 static void
3372 em_free_transmit_buffers(struct tx_ring *txr)
3373 {
3374 	struct adapter		*adapter = txr->adapter;
3375 	struct em_buffer	*txbuf;
3376 
3377 	INIT_DEBUGOUT("free_transmit_ring: begin");
3378 
3379 	if (txr->tx_buffers == NULL)
3380 		return;
3381 
3382 	for (int i = 0; i < adapter->num_tx_desc; i++) {
3383 		txbuf = &txr->tx_buffers[i];
3384 		if (txbuf->m_head != NULL) {
3385 			bus_dmamap_sync(txr->txtag, txbuf->map,
3386 			    BUS_DMASYNC_POSTWRITE);
3387 			bus_dmamap_unload(txr->txtag,
3388 			    txbuf->map);
3389 			m_freem(txbuf->m_head);
3390 			txbuf->m_head = NULL;
3391 			if (txbuf->map != NULL) {
3392 				bus_dmamap_destroy(txr->txtag,
3393 				    txbuf->map);
3394 				txbuf->map = NULL;
3395 			}
3396 		} else if (txbuf->map != NULL) {
3397 			bus_dmamap_unload(txr->txtag,
3398 			    txbuf->map);
3399 			bus_dmamap_destroy(txr->txtag,
3400 			    txbuf->map);
3401 			txbuf->map = NULL;
3402 		}
3403 	}
3404 #if __FreeBSD_version >= 800000
3405 	if (txr->br != NULL)
3406 		buf_ring_free(txr->br, M_DEVBUF);
3407 #endif
3408 	if (txr->tx_buffers != NULL) {
3409 		free(txr->tx_buffers, M_DEVBUF);
3410 		txr->tx_buffers = NULL;
3411 	}
3412 	if (txr->txtag != NULL) {
3413 		bus_dma_tag_destroy(txr->txtag);
3414 		txr->txtag = NULL;
3415 	}
3416 	return;
3417 }
3418 
3419 
3420 /*********************************************************************
3421  *  The offload context is protocol specific (TCP/UDP) and thus
3422  *  only needs to be set when the protocol changes. The occasion
3423  *  of a context change can be a performance detriment, and
3424  *  might be better just disabled. The reason arises in the way
3425  *  in which the controller supports pipelined requests from the
3426  *  Tx data DMA. Up to four requests can be pipelined, and they may
3427  *  belong to the same packet or to multiple packets. However all
3428  *  requests for one packet are issued before a request is issued
3429  *  for a subsequent packet and if a request for the next packet
3430  *  requires a context change, that request will be stalled
3431  *  until the previous request completes. This means setting up
3432  *  a new context effectively disables pipelined Tx data DMA which
3433  *  in turn greatly slow down performance to send small sized
3434  *  frames.
3435  **********************************************************************/
3436 static void
3437 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3438     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3439 {
3440 	struct adapter			*adapter = txr->adapter;
3441 	struct e1000_context_desc	*TXD = NULL;
3442 	struct em_buffer		*tx_buffer;
3443 	int				cur, hdr_len;
3444 	u32				cmd = 0;
3445 	u16				offload = 0;
3446 	u8				ipcso, ipcss, tucso, tucss;
3447 
3448 	ipcss = ipcso = tucss = tucso = 0;
3449 	hdr_len = ip_off + (ip->ip_hl << 2);
3450 	cur = txr->next_avail_desc;
3451 
3452 	/* Setup of IP header checksum. */
3453 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3454 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3455 		offload |= CSUM_IP;
3456 		ipcss = ip_off;
3457 		ipcso = ip_off + offsetof(struct ip, ip_sum);
3458 		/*
3459 		 * Start offset for header checksum calculation.
3460 		 * End offset for header checksum calculation.
3461 		 * Offset of place to put the checksum.
3462 		 */
3463 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3464 		TXD->lower_setup.ip_fields.ipcss = ipcss;
3465 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3466 		TXD->lower_setup.ip_fields.ipcso = ipcso;
3467 		cmd |= E1000_TXD_CMD_IP;
3468 	}
3469 
3470 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3471  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3472  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3473  		offload |= CSUM_TCP;
3474  		tucss = hdr_len;
3475  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3476  		/*
3477  		 * Setting up new checksum offload context for every frames
3478  		 * takes a lot of processing time for hardware. This also
3479  		 * reduces performance a lot for small sized frames so avoid
3480  		 * it if driver can use previously configured checksum
3481  		 * offload context.
3482  		 */
3483  		if (txr->last_hw_offload == offload) {
3484  			if (offload & CSUM_IP) {
3485  				if (txr->last_hw_ipcss == ipcss &&
3486  				    txr->last_hw_ipcso == ipcso &&
3487  				    txr->last_hw_tucss == tucss &&
3488  				    txr->last_hw_tucso == tucso)
3489  					return;
3490  			} else {
3491  				if (txr->last_hw_tucss == tucss &&
3492  				    txr->last_hw_tucso == tucso)
3493  					return;
3494  			}
3495   		}
3496  		txr->last_hw_offload = offload;
3497  		txr->last_hw_tucss = tucss;
3498  		txr->last_hw_tucso = tucso;
3499  		/*
3500  		 * Start offset for payload checksum calculation.
3501  		 * End offset for payload checksum calculation.
3502  		 * Offset of place to put the checksum.
3503  		 */
3504 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3505  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3506  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3507  		TXD->upper_setup.tcp_fields.tucso = tucso;
3508  		cmd |= E1000_TXD_CMD_TCP;
3509  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3510  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3511  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3512  		tucss = hdr_len;
3513  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3514  		/*
3515  		 * Setting up new checksum offload context for every frames
3516  		 * takes a lot of processing time for hardware. This also
3517  		 * reduces performance a lot for small sized frames so avoid
3518  		 * it if driver can use previously configured checksum
3519  		 * offload context.
3520  		 */
3521  		if (txr->last_hw_offload == offload) {
3522  			if (offload & CSUM_IP) {
3523  				if (txr->last_hw_ipcss == ipcss &&
3524  				    txr->last_hw_ipcso == ipcso &&
3525  				    txr->last_hw_tucss == tucss &&
3526  				    txr->last_hw_tucso == tucso)
3527  					return;
3528  			} else {
3529  				if (txr->last_hw_tucss == tucss &&
3530  				    txr->last_hw_tucso == tucso)
3531  					return;
3532  			}
3533  		}
3534  		txr->last_hw_offload = offload;
3535  		txr->last_hw_tucss = tucss;
3536  		txr->last_hw_tucso = tucso;
3537  		/*
3538  		 * Start offset for header checksum calculation.
3539  		 * End offset for header checksum calculation.
3540  		 * Offset of place to put the checksum.
3541  		 */
3542 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3543  		TXD->upper_setup.tcp_fields.tucss = tucss;
3544  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3545  		TXD->upper_setup.tcp_fields.tucso = tucso;
3546   	}
3547 
3548  	if (offload & CSUM_IP) {
3549  		txr->last_hw_ipcss = ipcss;
3550  		txr->last_hw_ipcso = ipcso;
3551   	}
3552 
3553 	TXD->tcp_seg_setup.data = htole32(0);
3554 	TXD->cmd_and_length =
3555 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3556 	tx_buffer = &txr->tx_buffers[cur];
3557 	tx_buffer->m_head = NULL;
3558 	tx_buffer->next_eop = -1;
3559 
3560 	if (++cur == adapter->num_tx_desc)
3561 		cur = 0;
3562 
3563 	txr->tx_avail--;
3564 	txr->next_avail_desc = cur;
3565 }
3566 
3567 
3568 /**********************************************************************
3569  *
3570  *  Setup work for hardware segmentation offload (TSO)
3571  *
3572  **********************************************************************/
3573 static void
3574 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3575     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3576 {
3577 	struct adapter			*adapter = txr->adapter;
3578 	struct e1000_context_desc	*TXD;
3579 	struct em_buffer		*tx_buffer;
3580 	int cur, hdr_len;
3581 
3582 	/*
3583 	 * In theory we can use the same TSO context if and only if
3584 	 * frame is the same type(IP/TCP) and the same MSS. However
3585 	 * checking whether a frame has the same IP/TCP structure is
3586 	 * hard thing so just ignore that and always restablish a
3587 	 * new TSO context.
3588 	 */
3589 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3590 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3591 		      E1000_TXD_DTYP_D |	/* Data descr type */
3592 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3593 
3594 	/* IP and/or TCP header checksum calculation and insertion. */
3595 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3596 
3597 	cur = txr->next_avail_desc;
3598 	tx_buffer = &txr->tx_buffers[cur];
3599 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3600 
3601 	/*
3602 	 * Start offset for header checksum calculation.
3603 	 * End offset for header checksum calculation.
3604 	 * Offset of place put the checksum.
3605 	 */
3606 	TXD->lower_setup.ip_fields.ipcss = ip_off;
3607 	TXD->lower_setup.ip_fields.ipcse =
3608 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3609 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3610 	/*
3611 	 * Start offset for payload checksum calculation.
3612 	 * End offset for payload checksum calculation.
3613 	 * Offset of place to put the checksum.
3614 	 */
3615 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3616 	TXD->upper_setup.tcp_fields.tucse = 0;
3617 	TXD->upper_setup.tcp_fields.tucso =
3618 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3619 	/*
3620 	 * Payload size per packet w/o any headers.
3621 	 * Length of all headers up to payload.
3622 	 */
3623 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3624 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3625 
3626 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3627 				E1000_TXD_CMD_DEXT |	/* Extended descr */
3628 				E1000_TXD_CMD_TSE |	/* TSE context */
3629 				E1000_TXD_CMD_IP |	/* Do IP csum */
3630 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3631 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3632 
3633 	tx_buffer->m_head = NULL;
3634 	tx_buffer->next_eop = -1;
3635 
3636 	if (++cur == adapter->num_tx_desc)
3637 		cur = 0;
3638 
3639 	txr->tx_avail--;
3640 	txr->next_avail_desc = cur;
3641 	txr->tx_tso = TRUE;
3642 }
3643 
3644 
3645 /**********************************************************************
3646  *
3647  *  Examine each tx_buffer in the used queue. If the hardware is done
3648  *  processing the packet then free associated resources. The
3649  *  tx_buffer is put back on the free queue.
3650  *
3651  **********************************************************************/
3652 static bool
3653 em_txeof(struct tx_ring *txr)
3654 {
3655 	struct adapter	*adapter = txr->adapter;
3656         int first, last, done, processed;
3657         struct em_buffer *tx_buffer;
3658         struct e1000_tx_desc   *tx_desc, *eop_desc;
3659 	struct ifnet   *ifp = adapter->ifp;
3660 
3661 	EM_TX_LOCK_ASSERT(txr);
3662 
3663 	/* No work, make sure watchdog is off */
3664         if (txr->tx_avail == adapter->num_tx_desc) {
3665 		txr->queue_status = EM_QUEUE_IDLE;
3666                 return (FALSE);
3667 	}
3668 
3669 	processed = 0;
3670         first = txr->next_to_clean;
3671         tx_desc = &txr->tx_base[first];
3672         tx_buffer = &txr->tx_buffers[first];
3673 	last = tx_buffer->next_eop;
3674         eop_desc = &txr->tx_base[last];
3675 
3676 	/*
3677 	 * What this does is get the index of the
3678 	 * first descriptor AFTER the EOP of the
3679 	 * first packet, that way we can do the
3680 	 * simple comparison on the inner while loop.
3681 	 */
3682 	if (++last == adapter->num_tx_desc)
3683  		last = 0;
3684 	done = last;
3685 
3686         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3687             BUS_DMASYNC_POSTREAD);
3688 
3689         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3690 		/* We clean the range of the packet */
3691 		while (first != done) {
3692                 	tx_desc->upper.data = 0;
3693                 	tx_desc->lower.data = 0;
3694                 	tx_desc->buffer_addr = 0;
3695                 	++txr->tx_avail;
3696 			++processed;
3697 
3698 			if (tx_buffer->m_head) {
3699 				bus_dmamap_sync(txr->txtag,
3700 				    tx_buffer->map,
3701 				    BUS_DMASYNC_POSTWRITE);
3702 				bus_dmamap_unload(txr->txtag,
3703 				    tx_buffer->map);
3704                         	m_freem(tx_buffer->m_head);
3705                         	tx_buffer->m_head = NULL;
3706                 	}
3707 			tx_buffer->next_eop = -1;
3708 			txr->watchdog_time = ticks;
3709 
3710 	                if (++first == adapter->num_tx_desc)
3711 				first = 0;
3712 
3713 	                tx_buffer = &txr->tx_buffers[first];
3714 			tx_desc = &txr->tx_base[first];
3715 		}
3716 		++ifp->if_opackets;
3717 		/* See if we can continue to the next packet */
3718 		last = tx_buffer->next_eop;
3719 		if (last != -1) {
3720         		eop_desc = &txr->tx_base[last];
3721 			/* Get new done point */
3722 			if (++last == adapter->num_tx_desc) last = 0;
3723 			done = last;
3724 		} else
3725 			break;
3726         }
3727         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3728             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3729 
3730         txr->next_to_clean = first;
3731 
3732 	/*
3733 	** Watchdog calculation, we know there's
3734 	** work outstanding or the first return
3735 	** would have been taken, so none processed
3736 	** for too long indicates a hang. local timer
3737 	** will examine this and do a reset if needed.
3738 	*/
3739 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3740 		txr->queue_status = EM_QUEUE_HUNG;
3741 
3742         /*
3743          * If we have a minimum free, clear IFF_DRV_OACTIVE
3744          * to tell the stack that it is OK to send packets.
3745          */
3746         if (txr->tx_avail > EM_MAX_SCATTER)
3747                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3748 
3749 	/* Disable watchdog if all clean */
3750 	if (txr->tx_avail == adapter->num_tx_desc) {
3751 		txr->queue_status = EM_QUEUE_IDLE;
3752 		return (FALSE);
3753 	}
3754 
3755 	return (TRUE);
3756 }
3757 
3758 
3759 /*********************************************************************
3760  *
3761  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3762  *
3763  **********************************************************************/
3764 static void
3765 em_refresh_mbufs(struct rx_ring *rxr, int limit)
3766 {
3767 	struct adapter		*adapter = rxr->adapter;
3768 	struct mbuf		*m;
3769 	bus_dma_segment_t	segs[1];
3770 	struct em_buffer	*rxbuf;
3771 	int			i, j, error, nsegs;
3772 	bool			cleaned = FALSE;
3773 
3774 	i = j = rxr->next_to_refresh;
3775 	/*
3776 	** Get one descriptor beyond
3777 	** our work mark to control
3778 	** the loop.
3779 	*/
3780 	if (++j == adapter->num_rx_desc)
3781 		j = 0;
3782 
3783 	while (j != limit) {
3784 		rxbuf = &rxr->rx_buffers[i];
3785 		if (rxbuf->m_head == NULL) {
3786 			m = m_getjcl(M_DONTWAIT, MT_DATA,
3787 			    M_PKTHDR, adapter->rx_mbuf_sz);
3788 			/*
3789 			** If we have a temporary resource shortage
3790 			** that causes a failure, just abort refresh
3791 			** for now, we will return to this point when
3792 			** reinvoked from em_rxeof.
3793 			*/
3794 			if (m == NULL)
3795 				goto update;
3796 		} else
3797 			m = rxbuf->m_head;
3798 
3799 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3800 		m->m_flags |= M_PKTHDR;
3801 		m->m_data = m->m_ext.ext_buf;
3802 
3803 		/* Use bus_dma machinery to setup the memory mapping  */
3804 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3805 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3806 		if (error != 0) {
3807 			printf("Refresh mbufs: hdr dmamap load"
3808 			    " failure - %d\n", error);
3809 			m_free(m);
3810 			rxbuf->m_head = NULL;
3811 			goto update;
3812 		}
3813 		rxbuf->m_head = m;
3814 		bus_dmamap_sync(rxr->rxtag,
3815 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3816 		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3817 		cleaned = TRUE;
3818 
3819 		i = j; /* Next is precalulated for us */
3820 		rxr->next_to_refresh = i;
3821 		/* Calculate next controlling index */
3822 		if (++j == adapter->num_rx_desc)
3823 			j = 0;
3824 	}
3825 update:
3826 	/*
3827 	** Update the tail pointer only if,
3828 	** and as far as we have refreshed.
3829 	*/
3830 	if (cleaned)
3831 		E1000_WRITE_REG(&adapter->hw,
3832 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3833 
3834 	return;
3835 }
3836 
3837 
3838 /*********************************************************************
3839  *
3840  *  Allocate memory for rx_buffer structures. Since we use one
3841  *  rx_buffer per received packet, the maximum number of rx_buffer's
3842  *  that we'll need is equal to the number of receive descriptors
3843  *  that we've allocated.
3844  *
3845  **********************************************************************/
3846 static int
3847 em_allocate_receive_buffers(struct rx_ring *rxr)
3848 {
3849 	struct adapter		*adapter = rxr->adapter;
3850 	device_t		dev = adapter->dev;
3851 	struct em_buffer	*rxbuf;
3852 	int			error;
3853 
3854 	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3855 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3856 	if (rxr->rx_buffers == NULL) {
3857 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3858 		return (ENOMEM);
3859 	}
3860 
3861 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3862 				1, 0,			/* alignment, bounds */
3863 				BUS_SPACE_MAXADDR,	/* lowaddr */
3864 				BUS_SPACE_MAXADDR,	/* highaddr */
3865 				NULL, NULL,		/* filter, filterarg */
3866 				MJUM9BYTES,		/* maxsize */
3867 				1,			/* nsegments */
3868 				MJUM9BYTES,		/* maxsegsize */
3869 				0,			/* flags */
3870 				NULL,			/* lockfunc */
3871 				NULL,			/* lockarg */
3872 				&rxr->rxtag);
3873 	if (error) {
3874 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3875 		    __func__, error);
3876 		goto fail;
3877 	}
3878 
3879 	rxbuf = rxr->rx_buffers;
3880 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3881 		rxbuf = &rxr->rx_buffers[i];
3882 		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3883 		    &rxbuf->map);
3884 		if (error) {
3885 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3886 			    __func__, error);
3887 			goto fail;
3888 		}
3889 	}
3890 
3891 	return (0);
3892 
3893 fail:
3894 	em_free_receive_structures(adapter);
3895 	return (error);
3896 }
3897 
3898 
3899 /*********************************************************************
3900  *
3901  *  Initialize a receive ring and its buffers.
3902  *
3903  **********************************************************************/
3904 static int
3905 em_setup_receive_ring(struct rx_ring *rxr)
3906 {
3907 	struct	adapter 	*adapter = rxr->adapter;
3908 	struct em_buffer	*rxbuf;
3909 	bus_dma_segment_t	seg[1];
3910 	int			i, j, nsegs, error;
3911 
3912 
3913 	/* Clear the ring contents */
3914 	EM_RX_LOCK(rxr);
3915 
3916 	/* Invalidate all descriptors */
3917 	for (i = 0; i < adapter->num_rx_desc; i++) {
3918 		struct e1000_rx_desc* cur;
3919 		cur = &rxr->rx_base[i];
3920 		cur->status = 0;
3921 	}
3922 
3923 	/* Now replenish the mbufs */
3924 	i = j = rxr->next_to_refresh;
3925 	if (++j == adapter->num_rx_desc)
3926 		j = 0;
3927 
3928 	while(j != rxr->next_to_check) {
3929 		rxbuf = &rxr->rx_buffers[i];
3930 		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
3931 		    M_PKTHDR, adapter->rx_mbuf_sz);
3932 		if (rxbuf->m_head == NULL) {
3933 			error = ENOBUFS;
3934 			goto fail;
3935 		}
3936 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
3937 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3938 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
3939 
3940 		/* Get the memory mapping */
3941 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3942 		    rxbuf->map, rxbuf->m_head, seg,
3943 		    &nsegs, BUS_DMA_NOWAIT);
3944 		if (error != 0) {
3945 			m_freem(rxbuf->m_head);
3946 			rxbuf->m_head = NULL;
3947 			goto fail;
3948 		}
3949 		bus_dmamap_sync(rxr->rxtag,
3950 		    rxbuf->map, BUS_DMASYNC_PREREAD);
3951 
3952 		/* Update descriptor */
3953 		rxr->rx_base[i].buffer_addr = htole64(seg[0].ds_addr);
3954 		i = j;
3955 		if (++j == adapter->num_rx_desc)
3956 			j = 0;
3957 	}
3958 
3959 fail:
3960 	rxr->next_to_refresh = i;
3961 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3962 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3963 	EM_RX_UNLOCK(rxr);
3964 	return (error);
3965 }
3966 
3967 /*********************************************************************
3968  *
3969  *  Initialize all receive rings.
3970  *
3971  **********************************************************************/
3972 static int
3973 em_setup_receive_structures(struct adapter *adapter)
3974 {
3975 	struct rx_ring *rxr = adapter->rx_rings;
3976 	int q;
3977 
3978 	for (q = 0; q < adapter->num_queues; q++, rxr++)
3979 		if (em_setup_receive_ring(rxr))
3980 			goto fail;
3981 
3982 	return (0);
3983 fail:
3984 	/*
3985 	 * Free RX buffers allocated so far, we will only handle
3986 	 * the rings that completed, the failing case will have
3987 	 * cleaned up for itself. 'q' failed, so its the terminus.
3988 	 */
3989 	for (int i = 0, n = 0; i < q; ++i) {
3990 		rxr = &adapter->rx_rings[i];
3991 		n = rxr->next_to_check;
3992 		while(n != rxr->next_to_refresh) {
3993 			struct em_buffer *rxbuf;
3994 			rxbuf = &rxr->rx_buffers[n];
3995 			if (rxbuf->m_head != NULL) {
3996 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3997 			  	  BUS_DMASYNC_POSTREAD);
3998 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3999 				m_freem(rxbuf->m_head);
4000 				rxbuf->m_head = NULL;
4001 			}
4002 			if (++n == adapter->num_rx_desc)
4003 				n = 0;
4004 		}
4005 		rxr->next_to_check = 0;
4006 		rxr->next_to_refresh = 0;
4007 	}
4008 
4009 	return (ENOBUFS);
4010 }
4011 
4012 /*********************************************************************
4013  *
4014  *  Free all receive rings.
4015  *
4016  **********************************************************************/
4017 static void
4018 em_free_receive_structures(struct adapter *adapter)
4019 {
4020 	struct rx_ring *rxr = adapter->rx_rings;
4021 
4022 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4023 		em_free_receive_buffers(rxr);
4024 		/* Free the ring memory as well */
4025 		em_dma_free(adapter, &rxr->rxdma);
4026 		EM_RX_LOCK_DESTROY(rxr);
4027 	}
4028 
4029 	free(adapter->rx_rings, M_DEVBUF);
4030 }
4031 
4032 
4033 /*********************************************************************
4034  *
4035  *  Free receive ring data structures
4036  *
4037  **********************************************************************/
4038 static void
4039 em_free_receive_buffers(struct rx_ring *rxr)
4040 {
4041 	struct adapter		*adapter = rxr->adapter;
4042 	struct em_buffer	*rxbuf = NULL;
4043 
4044 	INIT_DEBUGOUT("free_receive_buffers: begin");
4045 
4046 	if (rxr->rx_buffers != NULL) {
4047 		int i = rxr->next_to_check;
4048 		while(i != rxr->next_to_refresh) {
4049 			rxbuf = &rxr->rx_buffers[i];
4050 			if (rxbuf->map != NULL) {
4051 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4052 				    BUS_DMASYNC_POSTREAD);
4053 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4054 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4055 			}
4056 			if (rxbuf->m_head != NULL) {
4057 				m_freem(rxbuf->m_head);
4058 				rxbuf->m_head = NULL;
4059 			}
4060 			if (++i == adapter->num_rx_desc)
4061 				i = 0;
4062 		}
4063 		free(rxr->rx_buffers, M_DEVBUF);
4064 		rxr->rx_buffers = NULL;
4065 		rxr->next_to_check = 0;
4066 		rxr->next_to_refresh = 0;
4067 	}
4068 
4069 	if (rxr->rxtag != NULL) {
4070 		bus_dma_tag_destroy(rxr->rxtag);
4071 		rxr->rxtag = NULL;
4072 	}
4073 
4074 	return;
4075 }
4076 
4077 
4078 /*********************************************************************
4079  *
4080  *  Enable receive unit.
4081  *
4082  **********************************************************************/
4083 #define MAX_INTS_PER_SEC	8000
4084 #define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4085 
4086 static void
4087 em_initialize_receive_unit(struct adapter *adapter)
4088 {
4089 	struct rx_ring	*rxr = adapter->rx_rings;
4090 	struct ifnet	*ifp = adapter->ifp;
4091 	struct e1000_hw	*hw = &adapter->hw;
4092 	u64	bus_addr;
4093 	u32	rctl, rxcsum;
4094 
4095 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4096 
4097 	/*
4098 	 * Make sure receives are disabled while setting
4099 	 * up the descriptor ring
4100 	 */
4101 	rctl = E1000_READ_REG(hw, E1000_RCTL);
4102 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4103 
4104 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4105 	    adapter->rx_abs_int_delay.value);
4106 	/*
4107 	 * Set the interrupt throttling rate. Value is calculated
4108 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4109 	 */
4110 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4111 
4112 	/*
4113 	** When using MSIX interrupts we need to throttle
4114 	** using the EITR register (82574 only)
4115 	*/
4116 	if (hw->mac.type == e1000_82574)
4117 		for (int i = 0; i < 4; i++)
4118 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4119 			    DEFAULT_ITR);
4120 
4121 	/* Disable accelerated ackknowledge */
4122 	if (adapter->hw.mac.type == e1000_82574)
4123 		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4124 
4125 	if (ifp->if_capenable & IFCAP_RXCSUM) {
4126 		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4127 		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4128 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4129 	}
4130 
4131 	/*
4132 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4133 	** long latencies are observed, like Lenovo X60. This
4134 	** change eliminates the problem, but since having positive
4135 	** values in RDTR is a known source of problems on other
4136 	** platforms another solution is being sought.
4137 	*/
4138 	if (hw->mac.type == e1000_82573)
4139 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4140 
4141 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4142 		/* Setup the Base and Length of the Rx Descriptor Ring */
4143 		bus_addr = rxr->rxdma.dma_paddr;
4144 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4145 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4146 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4147 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4148 		/* Setup the Head and Tail Descriptor Pointers */
4149 		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4150 		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4151 	}
4152 
4153 	/* Set early receive threshold on appropriate hw */
4154 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4155 	    (adapter->hw.mac.type == e1000_pch2lan) ||
4156 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4157 	    (ifp->if_mtu > ETHERMTU)) {
4158 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4159 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4160 		E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13));
4161 	}
4162 
4163 	if (adapter->hw.mac.type == e1000_pch2lan) {
4164 		if (ifp->if_mtu > ETHERMTU)
4165 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4166 		else
4167 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4168 	}
4169 
4170 	/* Setup the Receive Control Register */
4171 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4172 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4173 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4174 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4175 
4176         /* Strip the CRC */
4177         rctl |= E1000_RCTL_SECRC;
4178 
4179         /* Make sure VLAN Filters are off */
4180         rctl &= ~E1000_RCTL_VFE;
4181 	rctl &= ~E1000_RCTL_SBP;
4182 
4183 	if (adapter->rx_mbuf_sz == MCLBYTES)
4184 		rctl |= E1000_RCTL_SZ_2048;
4185 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4186 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4187 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4188 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4189 
4190 	if (ifp->if_mtu > ETHERMTU)
4191 		rctl |= E1000_RCTL_LPE;
4192 	else
4193 		rctl &= ~E1000_RCTL_LPE;
4194 
4195 	/* Write out the settings */
4196 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4197 
4198 	return;
4199 }
4200 
4201 
4202 /*********************************************************************
4203  *
4204  *  This routine executes in interrupt context. It replenishes
4205  *  the mbufs in the descriptor and sends data which has been
4206  *  dma'ed into host memory to upper layer.
4207  *
4208  *  We loop at most count times if count is > 0, or until done if
4209  *  count < 0.
4210  *
4211  *  For polling we also now return the number of cleaned packets
4212  *********************************************************************/
4213 static bool
4214 em_rxeof(struct rx_ring *rxr, int count, int *done)
4215 {
4216 	struct adapter		*adapter = rxr->adapter;
4217 	struct ifnet		*ifp = adapter->ifp;
4218 	struct mbuf		*mp, *sendmp;
4219 	u8			status = 0;
4220 	u16 			len;
4221 	int			i, processed, rxdone = 0;
4222 	bool			eop;
4223 	struct e1000_rx_desc	*cur;
4224 
4225 	EM_RX_LOCK(rxr);
4226 
4227 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4228 
4229 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4230 			break;
4231 
4232 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4233 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4234 
4235 		cur = &rxr->rx_base[i];
4236 		status = cur->status;
4237 		mp = sendmp = NULL;
4238 
4239 		if ((status & E1000_RXD_STAT_DD) == 0)
4240 			break;
4241 
4242 		len = le16toh(cur->length);
4243 		eop = (status & E1000_RXD_STAT_EOP) != 0;
4244 
4245 		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4246 		    (rxr->discard == TRUE)) {
4247 			ifp->if_ierrors++;
4248 			++rxr->rx_discarded;
4249 			if (!eop) /* Catch subsequent segs */
4250 				rxr->discard = TRUE;
4251 			else
4252 				rxr->discard = FALSE;
4253 			em_rx_discard(rxr, i);
4254 			goto next_desc;
4255 		}
4256 
4257 		/* Assign correct length to the current fragment */
4258 		mp = rxr->rx_buffers[i].m_head;
4259 		mp->m_len = len;
4260 
4261 		/* Trigger for refresh */
4262 		rxr->rx_buffers[i].m_head = NULL;
4263 
4264 		/* First segment? */
4265 		if (rxr->fmp == NULL) {
4266 			mp->m_pkthdr.len = len;
4267 			rxr->fmp = rxr->lmp = mp;
4268 		} else {
4269 			/* Chain mbuf's together */
4270 			mp->m_flags &= ~M_PKTHDR;
4271 			rxr->lmp->m_next = mp;
4272 			rxr->lmp = mp;
4273 			rxr->fmp->m_pkthdr.len += len;
4274 		}
4275 
4276 		if (eop) {
4277 			--count;
4278 			sendmp = rxr->fmp;
4279 			sendmp->m_pkthdr.rcvif = ifp;
4280 			ifp->if_ipackets++;
4281 			em_receive_checksum(cur, sendmp);
4282 #ifndef __NO_STRICT_ALIGNMENT
4283 			if (adapter->max_frame_size >
4284 			    (MCLBYTES - ETHER_ALIGN) &&
4285 			    em_fixup_rx(rxr) != 0)
4286 				goto skip;
4287 #endif
4288 			if (status & E1000_RXD_STAT_VP) {
4289 				sendmp->m_pkthdr.ether_vtag =
4290 				    (le16toh(cur->special) &
4291 				    E1000_RXD_SPC_VLAN_MASK);
4292 				sendmp->m_flags |= M_VLANTAG;
4293 			}
4294 #ifdef EM_MULTIQUEUE
4295 			sendmp->m_pkthdr.flowid = rxr->msix;
4296 			sendmp->m_flags |= M_FLOWID;
4297 #endif
4298 #ifndef __NO_STRICT_ALIGNMENT
4299 skip:
4300 #endif
4301 			rxr->fmp = rxr->lmp = NULL;
4302 		}
4303 next_desc:
4304 		/* Zero out the receive descriptors status. */
4305 		cur->status = 0;
4306 		++rxdone;	/* cumulative for POLL */
4307 		++processed;
4308 
4309 		/* Advance our pointers to the next descriptor. */
4310 		if (++i == adapter->num_rx_desc)
4311 			i = 0;
4312 
4313 		/* Send to the stack */
4314 		if (sendmp != NULL) {
4315 			rxr->next_to_check = i;
4316 			EM_RX_UNLOCK(rxr);
4317 			(*ifp->if_input)(ifp, sendmp);
4318 			EM_RX_LOCK(rxr);
4319 			i = rxr->next_to_check;
4320 		}
4321 
4322 		/* Only refresh mbufs every 8 descriptors */
4323 		if (processed == 8) {
4324 			em_refresh_mbufs(rxr, i);
4325 			processed = 0;
4326 		}
4327 	}
4328 
4329 	/* Catch any remaining refresh work */
4330 	if (processed != 0 || i == rxr->next_to_refresh)
4331 		em_refresh_mbufs(rxr, i);
4332 
4333 	rxr->next_to_check = i;
4334 	if (done != NULL)
4335 		*done = rxdone;
4336 	EM_RX_UNLOCK(rxr);
4337 
4338 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4339 }
4340 
4341 static __inline void
4342 em_rx_discard(struct rx_ring *rxr, int i)
4343 {
4344 	struct em_buffer	*rbuf;
4345 
4346 	rbuf = &rxr->rx_buffers[i];
4347 	/* Free any previous pieces */
4348 	if (rxr->fmp != NULL) {
4349 		rxr->fmp->m_flags |= M_PKTHDR;
4350 		m_freem(rxr->fmp);
4351 		rxr->fmp = NULL;
4352 		rxr->lmp = NULL;
4353 	}
4354 	/*
4355 	** Free buffer and allow em_refresh_mbufs()
4356 	** to clean up and recharge buffer.
4357 	*/
4358 	if (rbuf->m_head) {
4359 		m_free(rbuf->m_head);
4360 		rbuf->m_head = NULL;
4361 	}
4362 	return;
4363 }
4364 
4365 #ifndef __NO_STRICT_ALIGNMENT
4366 /*
4367  * When jumbo frames are enabled we should realign entire payload on
4368  * architecures with strict alignment. This is serious design mistake of 8254x
4369  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4370  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4371  * payload. On architecures without strict alignment restrictions 8254x still
4372  * performs unaligned memory access which would reduce the performance too.
4373  * To avoid copying over an entire frame to align, we allocate a new mbuf and
4374  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4375  * existing mbuf chain.
4376  *
4377  * Be aware, best performance of the 8254x is achived only when jumbo frame is
4378  * not used at all on architectures with strict alignment.
4379  */
4380 static int
4381 em_fixup_rx(struct rx_ring *rxr)
4382 {
4383 	struct adapter *adapter = rxr->adapter;
4384 	struct mbuf *m, *n;
4385 	int error;
4386 
4387 	error = 0;
4388 	m = rxr->fmp;
4389 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4390 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4391 		m->m_data += ETHER_HDR_LEN;
4392 	} else {
4393 		MGETHDR(n, M_DONTWAIT, MT_DATA);
4394 		if (n != NULL) {
4395 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4396 			m->m_data += ETHER_HDR_LEN;
4397 			m->m_len -= ETHER_HDR_LEN;
4398 			n->m_len = ETHER_HDR_LEN;
4399 			M_MOVE_PKTHDR(n, m);
4400 			n->m_next = m;
4401 			rxr->fmp = n;
4402 		} else {
4403 			adapter->dropped_pkts++;
4404 			m_freem(rxr->fmp);
4405 			rxr->fmp = NULL;
4406 			error = ENOMEM;
4407 		}
4408 	}
4409 
4410 	return (error);
4411 }
4412 #endif
4413 
4414 /*********************************************************************
4415  *
4416  *  Verify that the hardware indicated that the checksum is valid.
4417  *  Inform the stack about the status of checksum so that stack
4418  *  doesn't spend time verifying the checksum.
4419  *
4420  *********************************************************************/
4421 static void
4422 em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4423 {
4424 	/* Ignore Checksum bit is set */
4425 	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4426 		mp->m_pkthdr.csum_flags = 0;
4427 		return;
4428 	}
4429 
4430 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4431 		/* Did it pass? */
4432 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4433 			/* IP Checksum Good */
4434 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4435 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4436 
4437 		} else {
4438 			mp->m_pkthdr.csum_flags = 0;
4439 		}
4440 	}
4441 
4442 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4443 		/* Did it pass? */
4444 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4445 			mp->m_pkthdr.csum_flags |=
4446 			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4447 			mp->m_pkthdr.csum_data = htons(0xffff);
4448 		}
4449 	}
4450 }
4451 
4452 /*
4453  * This routine is run via an vlan
4454  * config EVENT
4455  */
4456 static void
4457 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4458 {
4459 	struct adapter	*adapter = ifp->if_softc;
4460 	u32		index, bit;
4461 
4462 	if (ifp->if_softc !=  arg)   /* Not our event */
4463 		return;
4464 
4465 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4466                 return;
4467 
4468 	EM_CORE_LOCK(adapter);
4469 	index = (vtag >> 5) & 0x7F;
4470 	bit = vtag & 0x1F;
4471 	adapter->shadow_vfta[index] |= (1 << bit);
4472 	++adapter->num_vlans;
4473 	/* Re-init to load the changes */
4474 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4475 		em_init_locked(adapter);
4476 	EM_CORE_UNLOCK(adapter);
4477 }
4478 
4479 /*
4480  * This routine is run via an vlan
4481  * unconfig EVENT
4482  */
4483 static void
4484 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4485 {
4486 	struct adapter	*adapter = ifp->if_softc;
4487 	u32		index, bit;
4488 
4489 	if (ifp->if_softc !=  arg)
4490 		return;
4491 
4492 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4493                 return;
4494 
4495 	EM_CORE_LOCK(adapter);
4496 	index = (vtag >> 5) & 0x7F;
4497 	bit = vtag & 0x1F;
4498 	adapter->shadow_vfta[index] &= ~(1 << bit);
4499 	--adapter->num_vlans;
4500 	/* Re-init to load the changes */
4501 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4502 		em_init_locked(adapter);
4503 	EM_CORE_UNLOCK(adapter);
4504 }
4505 
4506 static void
4507 em_setup_vlan_hw_support(struct adapter *adapter)
4508 {
4509 	struct e1000_hw *hw = &adapter->hw;
4510 	u32             reg;
4511 
4512 	/*
4513 	** We get here thru init_locked, meaning
4514 	** a soft reset, this has already cleared
4515 	** the VFTA and other state, so if there
4516 	** have been no vlan's registered do nothing.
4517 	*/
4518 	if (adapter->num_vlans == 0)
4519                 return;
4520 
4521 	/*
4522 	** A soft reset zero's out the VFTA, so
4523 	** we need to repopulate it now.
4524 	*/
4525 	for (int i = 0; i < EM_VFTA_SIZE; i++)
4526                 if (adapter->shadow_vfta[i] != 0)
4527 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4528                             i, adapter->shadow_vfta[i]);
4529 
4530 	reg = E1000_READ_REG(hw, E1000_CTRL);
4531 	reg |= E1000_CTRL_VME;
4532 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4533 
4534 	/* Enable the Filter Table */
4535 	reg = E1000_READ_REG(hw, E1000_RCTL);
4536 	reg &= ~E1000_RCTL_CFIEN;
4537 	reg |= E1000_RCTL_VFE;
4538 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4539 }
4540 
4541 static void
4542 em_enable_intr(struct adapter *adapter)
4543 {
4544 	struct e1000_hw *hw = &adapter->hw;
4545 	u32 ims_mask = IMS_ENABLE_MASK;
4546 
4547 	if (hw->mac.type == e1000_82574) {
4548 		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4549 		ims_mask |= EM_MSIX_MASK;
4550 	}
4551 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4552 }
4553 
4554 static void
4555 em_disable_intr(struct adapter *adapter)
4556 {
4557 	struct e1000_hw *hw = &adapter->hw;
4558 
4559 	if (hw->mac.type == e1000_82574)
4560 		E1000_WRITE_REG(hw, EM_EIAC, 0);
4561 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4562 }
4563 
4564 /*
4565  * Bit of a misnomer, what this really means is
4566  * to enable OS management of the system... aka
4567  * to disable special hardware management features
4568  */
4569 static void
4570 em_init_manageability(struct adapter *adapter)
4571 {
4572 	/* A shared code workaround */
4573 #define E1000_82542_MANC2H E1000_MANC2H
4574 	if (adapter->has_manage) {
4575 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4576 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4577 
4578 		/* disable hardware interception of ARP */
4579 		manc &= ~(E1000_MANC_ARP_EN);
4580 
4581                 /* enable receiving management packets to the host */
4582 		manc |= E1000_MANC_EN_MNG2HOST;
4583 #define E1000_MNG2HOST_PORT_623 (1 << 5)
4584 #define E1000_MNG2HOST_PORT_664 (1 << 6)
4585 		manc2h |= E1000_MNG2HOST_PORT_623;
4586 		manc2h |= E1000_MNG2HOST_PORT_664;
4587 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4588 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4589 	}
4590 }
4591 
4592 /*
4593  * Give control back to hardware management
4594  * controller if there is one.
4595  */
4596 static void
4597 em_release_manageability(struct adapter *adapter)
4598 {
4599 	if (adapter->has_manage) {
4600 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4601 
4602 		/* re-enable hardware interception of ARP */
4603 		manc |= E1000_MANC_ARP_EN;
4604 		manc &= ~E1000_MANC_EN_MNG2HOST;
4605 
4606 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4607 	}
4608 }
4609 
4610 /*
4611  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4612  * For ASF and Pass Through versions of f/w this means
4613  * that the driver is loaded. For AMT version type f/w
4614  * this means that the network i/f is open.
4615  */
4616 static void
4617 em_get_hw_control(struct adapter *adapter)
4618 {
4619 	u32 ctrl_ext, swsm;
4620 
4621 	if (adapter->hw.mac.type == e1000_82573) {
4622 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4623 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4624 		    swsm | E1000_SWSM_DRV_LOAD);
4625 		return;
4626 	}
4627 	/* else */
4628 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4629 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4630 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4631 	return;
4632 }
4633 
4634 /*
4635  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4636  * For ASF and Pass Through versions of f/w this means that
4637  * the driver is no longer loaded. For AMT versions of the
4638  * f/w this means that the network i/f is closed.
4639  */
4640 static void
4641 em_release_hw_control(struct adapter *adapter)
4642 {
4643 	u32 ctrl_ext, swsm;
4644 
4645 	if (!adapter->has_manage)
4646 		return;
4647 
4648 	if (adapter->hw.mac.type == e1000_82573) {
4649 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4650 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4651 		    swsm & ~E1000_SWSM_DRV_LOAD);
4652 		return;
4653 	}
4654 	/* else */
4655 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4656 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4657 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4658 	return;
4659 }
4660 
4661 static int
4662 em_is_valid_ether_addr(u8 *addr)
4663 {
4664 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4665 
4666 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4667 		return (FALSE);
4668 	}
4669 
4670 	return (TRUE);
4671 }
4672 
4673 /*
4674 ** Parse the interface capabilities with regard
4675 ** to both system management and wake-on-lan for
4676 ** later use.
4677 */
4678 static void
4679 em_get_wakeup(device_t dev)
4680 {
4681 	struct adapter	*adapter = device_get_softc(dev);
4682 	u16		eeprom_data = 0, device_id, apme_mask;
4683 
4684 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4685 	apme_mask = EM_EEPROM_APME;
4686 
4687 	switch (adapter->hw.mac.type) {
4688 	case e1000_82573:
4689 	case e1000_82583:
4690 		adapter->has_amt = TRUE;
4691 		/* Falls thru */
4692 	case e1000_82571:
4693 	case e1000_82572:
4694 	case e1000_80003es2lan:
4695 		if (adapter->hw.bus.func == 1) {
4696 			e1000_read_nvm(&adapter->hw,
4697 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4698 			break;
4699 		} else
4700 			e1000_read_nvm(&adapter->hw,
4701 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4702 		break;
4703 	case e1000_ich8lan:
4704 	case e1000_ich9lan:
4705 	case e1000_ich10lan:
4706 	case e1000_pchlan:
4707 	case e1000_pch2lan:
4708 		apme_mask = E1000_WUC_APME;
4709 		adapter->has_amt = TRUE;
4710 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4711 		break;
4712 	default:
4713 		e1000_read_nvm(&adapter->hw,
4714 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4715 		break;
4716 	}
4717 	if (eeprom_data & apme_mask)
4718 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4719 	/*
4720          * We have the eeprom settings, now apply the special cases
4721          * where the eeprom may be wrong or the board won't support
4722          * wake on lan on a particular port
4723 	 */
4724 	device_id = pci_get_device(dev);
4725         switch (device_id) {
4726 	case E1000_DEV_ID_82571EB_FIBER:
4727 		/* Wake events only supported on port A for dual fiber
4728 		 * regardless of eeprom setting */
4729 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4730 		    E1000_STATUS_FUNC_1)
4731 			adapter->wol = 0;
4732 		break;
4733 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4734 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4735 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4736                 /* if quad port adapter, disable WoL on all but port A */
4737 		if (global_quad_port_a != 0)
4738 			adapter->wol = 0;
4739 		/* Reset for multiple quad port adapters */
4740 		if (++global_quad_port_a == 4)
4741 			global_quad_port_a = 0;
4742                 break;
4743 	}
4744 	return;
4745 }
4746 
4747 
4748 /*
4749  * Enable PCI Wake On Lan capability
4750  */
4751 static void
4752 em_enable_wakeup(device_t dev)
4753 {
4754 	struct adapter	*adapter = device_get_softc(dev);
4755 	struct ifnet	*ifp = adapter->ifp;
4756 	u32		pmc, ctrl, ctrl_ext, rctl;
4757 	u16     	status;
4758 
4759 	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4760 		return;
4761 
4762 	/* Advertise the wakeup capability */
4763 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4764 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4765 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4766 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4767 
4768 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4769 	    (adapter->hw.mac.type == e1000_pchlan) ||
4770 	    (adapter->hw.mac.type == e1000_ich9lan) ||
4771 	    (adapter->hw.mac.type == e1000_ich10lan))
4772 		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4773 
4774 	/* Keep the laser running on Fiber adapters */
4775 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4776 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4777 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4778 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4779 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4780 	}
4781 
4782 	/*
4783 	** Determine type of Wakeup: note that wol
4784 	** is set with all bits on by default.
4785 	*/
4786 	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4787 		adapter->wol &= ~E1000_WUFC_MAG;
4788 
4789 	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4790 		adapter->wol &= ~E1000_WUFC_MC;
4791 	else {
4792 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4793 		rctl |= E1000_RCTL_MPE;
4794 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4795 	}
4796 
4797 	if ((adapter->hw.mac.type == e1000_pchlan) ||
4798 	    (adapter->hw.mac.type == e1000_pch2lan)) {
4799 		if (em_enable_phy_wakeup(adapter))
4800 			return;
4801 	} else {
4802 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4803 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4804 	}
4805 
4806 	if (adapter->hw.phy.type == e1000_phy_igp_3)
4807 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4808 
4809         /* Request PME */
4810         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4811 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4812 	if (ifp->if_capenable & IFCAP_WOL)
4813 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4814         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4815 
4816 	return;
4817 }
4818 
4819 /*
4820 ** WOL in the newer chipset interfaces (pchlan)
4821 ** require thing to be copied into the phy
4822 */
4823 static int
4824 em_enable_phy_wakeup(struct adapter *adapter)
4825 {
4826 	struct e1000_hw *hw = &adapter->hw;
4827 	u32 mreg, ret = 0;
4828 	u16 preg;
4829 
4830 	/* copy MAC RARs to PHY RARs */
4831 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
4832 
4833 	/* copy MAC MTA to PHY MTA */
4834 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4835 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4836 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4837 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4838 		    (u16)((mreg >> 16) & 0xFFFF));
4839 	}
4840 
4841 	/* configure PHY Rx Control register */
4842 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4843 	mreg = E1000_READ_REG(hw, E1000_RCTL);
4844 	if (mreg & E1000_RCTL_UPE)
4845 		preg |= BM_RCTL_UPE;
4846 	if (mreg & E1000_RCTL_MPE)
4847 		preg |= BM_RCTL_MPE;
4848 	preg &= ~(BM_RCTL_MO_MASK);
4849 	if (mreg & E1000_RCTL_MO_3)
4850 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4851 				<< BM_RCTL_MO_SHIFT);
4852 	if (mreg & E1000_RCTL_BAM)
4853 		preg |= BM_RCTL_BAM;
4854 	if (mreg & E1000_RCTL_PMCF)
4855 		preg |= BM_RCTL_PMCF;
4856 	mreg = E1000_READ_REG(hw, E1000_CTRL);
4857 	if (mreg & E1000_CTRL_RFCE)
4858 		preg |= BM_RCTL_RFCE;
4859 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4860 
4861 	/* enable PHY wakeup in MAC register */
4862 	E1000_WRITE_REG(hw, E1000_WUC,
4863 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4864 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4865 
4866 	/* configure and enable PHY wakeup in PHY registers */
4867 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4868 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4869 
4870 	/* activate PHY wakeup */
4871 	ret = hw->phy.ops.acquire(hw);
4872 	if (ret) {
4873 		printf("Could not acquire PHY\n");
4874 		return ret;
4875 	}
4876 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4877 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4878 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4879 	if (ret) {
4880 		printf("Could not read PHY page 769\n");
4881 		goto out;
4882 	}
4883 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4884 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4885 	if (ret)
4886 		printf("Could not set PHY Host Wakeup bit\n");
4887 out:
4888 	hw->phy.ops.release(hw);
4889 
4890 	return ret;
4891 }
4892 
4893 static void
4894 em_led_func(void *arg, int onoff)
4895 {
4896 	struct adapter	*adapter = arg;
4897 
4898 	EM_CORE_LOCK(adapter);
4899 	if (onoff) {
4900 		e1000_setup_led(&adapter->hw);
4901 		e1000_led_on(&adapter->hw);
4902 	} else {
4903 		e1000_led_off(&adapter->hw);
4904 		e1000_cleanup_led(&adapter->hw);
4905 	}
4906 	EM_CORE_UNLOCK(adapter);
4907 }
4908 
4909 /*
4910 ** Disable the L0S and L1 LINK states
4911 */
4912 static void
4913 em_disable_aspm(struct adapter *adapter)
4914 {
4915 	int		base, reg;
4916 	u16		link_cap,link_ctrl;
4917 	device_t	dev = adapter->dev;
4918 
4919 	switch (adapter->hw.mac.type) {
4920 		case e1000_82573:
4921 		case e1000_82574:
4922 		case e1000_82583:
4923 			break;
4924 		default:
4925 			return;
4926 	}
4927 	if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0)
4928 		return;
4929 	reg = base + PCIR_EXPRESS_LINK_CAP;
4930 	link_cap = pci_read_config(dev, reg, 2);
4931 	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
4932 		return;
4933 	reg = base + PCIR_EXPRESS_LINK_CTL;
4934 	link_ctrl = pci_read_config(dev, reg, 2);
4935 	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
4936 	pci_write_config(dev, reg, link_ctrl, 2);
4937 	return;
4938 }
4939 
4940 /**********************************************************************
4941  *
4942  *  Update the board statistics counters.
4943  *
4944  **********************************************************************/
4945 static void
4946 em_update_stats_counters(struct adapter *adapter)
4947 {
4948 	struct ifnet   *ifp;
4949 
4950 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4951 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4952 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4953 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4954 	}
4955 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4956 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4957 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4958 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4959 
4960 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4961 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4962 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4963 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4964 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4965 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4966 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4967 	/*
4968 	** For watchdog management we need to know if we have been
4969 	** paused during the last interval, so capture that here.
4970 	*/
4971 	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4972 	adapter->stats.xoffrxc += adapter->pause_frames;
4973 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4974 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4975 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4976 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4977 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4978 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4979 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4980 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4981 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4982 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4983 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4984 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4985 
4986 	/* For the 64-bit byte counters the low dword must be read first. */
4987 	/* Both registers clear on the read of the high dword */
4988 
4989 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4990 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4991 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4992 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4993 
4994 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4995 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4996 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4997 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4998 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4999 
5000 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5001 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5002 
5003 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5004 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5005 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5006 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5007 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5008 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5009 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5010 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5011 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5012 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5013 
5014 	/* Interrupt Counts */
5015 
5016 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5017 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5018 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5019 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5020 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5021 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5022 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5023 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5024 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5025 
5026 	if (adapter->hw.mac.type >= e1000_82543) {
5027 		adapter->stats.algnerrc +=
5028 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5029 		adapter->stats.rxerrc +=
5030 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5031 		adapter->stats.tncrs +=
5032 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5033 		adapter->stats.cexterr +=
5034 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5035 		adapter->stats.tsctc +=
5036 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5037 		adapter->stats.tsctfc +=
5038 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5039 	}
5040 	ifp = adapter->ifp;
5041 
5042 	ifp->if_collisions = adapter->stats.colc;
5043 
5044 	/* Rx Errors */
5045 	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5046 	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5047 	    adapter->stats.ruc + adapter->stats.roc +
5048 	    adapter->stats.mpc + adapter->stats.cexterr;
5049 
5050 	/* Tx Errors */
5051 	ifp->if_oerrors = adapter->stats.ecol +
5052 	    adapter->stats.latecol + adapter->watchdog_events;
5053 }
5054 
5055 /* Export a single 32-bit register via a read-only sysctl. */
5056 static int
5057 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5058 {
5059 	struct adapter *adapter;
5060 	u_int val;
5061 
5062 	adapter = oidp->oid_arg1;
5063 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5064 	return (sysctl_handle_int(oidp, &val, 0, req));
5065 }
5066 
5067 /*
5068  * Add sysctl variables, one per statistic, to the system.
5069  */
5070 static void
5071 em_add_hw_stats(struct adapter *adapter)
5072 {
5073 	device_t dev = adapter->dev;
5074 
5075 	struct tx_ring *txr = adapter->tx_rings;
5076 	struct rx_ring *rxr = adapter->rx_rings;
5077 
5078 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5079 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5080 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5081 	struct e1000_hw_stats *stats = &adapter->stats;
5082 
5083 	struct sysctl_oid *stat_node, *queue_node, *int_node;
5084 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5085 
5086 #define QUEUE_NAME_LEN 32
5087 	char namebuf[QUEUE_NAME_LEN];
5088 
5089 	/* Driver Statistics */
5090 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5091 			CTLFLAG_RD, &adapter->link_irq,
5092 			"Link MSIX IRQ Handled");
5093 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5094 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5095 			 "Std mbuf failed");
5096 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5097 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5098 			 "Std mbuf cluster failed");
5099 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5100 			CTLFLAG_RD, &adapter->dropped_pkts,
5101 			"Driver dropped packets");
5102 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5103 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5104 			"Driver tx dma failure in xmit");
5105 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5106 			CTLFLAG_RD, &adapter->rx_overruns,
5107 			"RX overruns");
5108 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5109 			CTLFLAG_RD, &adapter->watchdog_events,
5110 			"Watchdog timeouts");
5111 
5112 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5113 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5114 			em_sysctl_reg_handler, "IU",
5115 			"Device Control Register");
5116 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5117 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5118 			em_sysctl_reg_handler, "IU",
5119 			"Receiver Control Register");
5120 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5121 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5122 			"Flow Control High Watermark");
5123 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5124 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5125 			"Flow Control Low Watermark");
5126 
5127 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5128 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5129 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5130 					    CTLFLAG_RD, NULL, "Queue Name");
5131 		queue_list = SYSCTL_CHILDREN(queue_node);
5132 
5133 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5134 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5135 				E1000_TDH(txr->me),
5136 				em_sysctl_reg_handler, "IU",
5137  				"Transmit Descriptor Head");
5138 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5139 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5140 				E1000_TDT(txr->me),
5141 				em_sysctl_reg_handler, "IU",
5142  				"Transmit Descriptor Tail");
5143 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5144 				CTLFLAG_RD, &txr->tx_irq,
5145 				"Queue MSI-X Transmit Interrupts");
5146 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5147 				CTLFLAG_RD, &txr->no_desc_avail,
5148 				"Queue No Descriptor Available");
5149 
5150 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5151 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5152 				E1000_RDH(rxr->me),
5153 				em_sysctl_reg_handler, "IU",
5154 				"Receive Descriptor Head");
5155 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5156 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5157 				E1000_RDT(rxr->me),
5158 				em_sysctl_reg_handler, "IU",
5159 				"Receive Descriptor Tail");
5160 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5161 				CTLFLAG_RD, &rxr->rx_irq,
5162 				"Queue MSI-X Receive Interrupts");
5163 	}
5164 
5165 	/* MAC stats get their own sub node */
5166 
5167 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5168 				    CTLFLAG_RD, NULL, "Statistics");
5169 	stat_list = SYSCTL_CHILDREN(stat_node);
5170 
5171 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5172 			CTLFLAG_RD, &stats->ecol,
5173 			"Excessive collisions");
5174 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5175 			CTLFLAG_RD, &stats->scc,
5176 			"Single collisions");
5177 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5178 			CTLFLAG_RD, &stats->mcc,
5179 			"Multiple collisions");
5180 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5181 			CTLFLAG_RD, &stats->latecol,
5182 			"Late collisions");
5183 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5184 			CTLFLAG_RD, &stats->colc,
5185 			"Collision Count");
5186 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5187 			CTLFLAG_RD, &adapter->stats.symerrs,
5188 			"Symbol Errors");
5189 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5190 			CTLFLAG_RD, &adapter->stats.sec,
5191 			"Sequence Errors");
5192 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5193 			CTLFLAG_RD, &adapter->stats.dc,
5194 			"Defer Count");
5195 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5196 			CTLFLAG_RD, &adapter->stats.mpc,
5197 			"Missed Packets");
5198 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5199 			CTLFLAG_RD, &adapter->stats.rnbc,
5200 			"Receive No Buffers");
5201 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5202 			CTLFLAG_RD, &adapter->stats.ruc,
5203 			"Receive Undersize");
5204 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5205 			CTLFLAG_RD, &adapter->stats.rfc,
5206 			"Fragmented Packets Received ");
5207 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5208 			CTLFLAG_RD, &adapter->stats.roc,
5209 			"Oversized Packets Received");
5210 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5211 			CTLFLAG_RD, &adapter->stats.rjc,
5212 			"Recevied Jabber");
5213 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5214 			CTLFLAG_RD, &adapter->stats.rxerrc,
5215 			"Receive Errors");
5216 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5217 			CTLFLAG_RD, &adapter->stats.crcerrs,
5218 			"CRC errors");
5219 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5220 			CTLFLAG_RD, &adapter->stats.algnerrc,
5221 			"Alignment Errors");
5222 	/* On 82575 these are collision counts */
5223 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5224 			CTLFLAG_RD, &adapter->stats.cexterr,
5225 			"Collision/Carrier extension errors");
5226 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5227 			CTLFLAG_RD, &adapter->stats.xonrxc,
5228 			"XON Received");
5229 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5230 			CTLFLAG_RD, &adapter->stats.xontxc,
5231 			"XON Transmitted");
5232 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5233 			CTLFLAG_RD, &adapter->stats.xoffrxc,
5234 			"XOFF Received");
5235 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5236 			CTLFLAG_RD, &adapter->stats.xofftxc,
5237 			"XOFF Transmitted");
5238 
5239 	/* Packet Reception Stats */
5240 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5241 			CTLFLAG_RD, &adapter->stats.tpr,
5242 			"Total Packets Received ");
5243 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5244 			CTLFLAG_RD, &adapter->stats.gprc,
5245 			"Good Packets Received");
5246 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5247 			CTLFLAG_RD, &adapter->stats.bprc,
5248 			"Broadcast Packets Received");
5249 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5250 			CTLFLAG_RD, &adapter->stats.mprc,
5251 			"Multicast Packets Received");
5252 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5253 			CTLFLAG_RD, &adapter->stats.prc64,
5254 			"64 byte frames received ");
5255 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5256 			CTLFLAG_RD, &adapter->stats.prc127,
5257 			"65-127 byte frames received");
5258 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5259 			CTLFLAG_RD, &adapter->stats.prc255,
5260 			"128-255 byte frames received");
5261 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5262 			CTLFLAG_RD, &adapter->stats.prc511,
5263 			"256-511 byte frames received");
5264 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5265 			CTLFLAG_RD, &adapter->stats.prc1023,
5266 			"512-1023 byte frames received");
5267 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5268 			CTLFLAG_RD, &adapter->stats.prc1522,
5269 			"1023-1522 byte frames received");
5270  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5271  			CTLFLAG_RD, &adapter->stats.gorc,
5272  			"Good Octets Received");
5273 
5274 	/* Packet Transmission Stats */
5275  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5276  			CTLFLAG_RD, &adapter->stats.gotc,
5277  			"Good Octets Transmitted");
5278 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5279 			CTLFLAG_RD, &adapter->stats.tpt,
5280 			"Total Packets Transmitted");
5281 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5282 			CTLFLAG_RD, &adapter->stats.gptc,
5283 			"Good Packets Transmitted");
5284 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5285 			CTLFLAG_RD, &adapter->stats.bptc,
5286 			"Broadcast Packets Transmitted");
5287 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5288 			CTLFLAG_RD, &adapter->stats.mptc,
5289 			"Multicast Packets Transmitted");
5290 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5291 			CTLFLAG_RD, &adapter->stats.ptc64,
5292 			"64 byte frames transmitted ");
5293 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5294 			CTLFLAG_RD, &adapter->stats.ptc127,
5295 			"65-127 byte frames transmitted");
5296 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5297 			CTLFLAG_RD, &adapter->stats.ptc255,
5298 			"128-255 byte frames transmitted");
5299 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5300 			CTLFLAG_RD, &adapter->stats.ptc511,
5301 			"256-511 byte frames transmitted");
5302 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5303 			CTLFLAG_RD, &adapter->stats.ptc1023,
5304 			"512-1023 byte frames transmitted");
5305 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5306 			CTLFLAG_RD, &adapter->stats.ptc1522,
5307 			"1024-1522 byte frames transmitted");
5308 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5309 			CTLFLAG_RD, &adapter->stats.tsctc,
5310 			"TSO Contexts Transmitted");
5311 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5312 			CTLFLAG_RD, &adapter->stats.tsctfc,
5313 			"TSO Contexts Failed");
5314 
5315 
5316 	/* Interrupt Stats */
5317 
5318 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5319 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5320 	int_list = SYSCTL_CHILDREN(int_node);
5321 
5322 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5323 			CTLFLAG_RD, &adapter->stats.iac,
5324 			"Interrupt Assertion Count");
5325 
5326 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5327 			CTLFLAG_RD, &adapter->stats.icrxptc,
5328 			"Interrupt Cause Rx Pkt Timer Expire Count");
5329 
5330 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5331 			CTLFLAG_RD, &adapter->stats.icrxatc,
5332 			"Interrupt Cause Rx Abs Timer Expire Count");
5333 
5334 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5335 			CTLFLAG_RD, &adapter->stats.ictxptc,
5336 			"Interrupt Cause Tx Pkt Timer Expire Count");
5337 
5338 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5339 			CTLFLAG_RD, &adapter->stats.ictxatc,
5340 			"Interrupt Cause Tx Abs Timer Expire Count");
5341 
5342 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5343 			CTLFLAG_RD, &adapter->stats.ictxqec,
5344 			"Interrupt Cause Tx Queue Empty Count");
5345 
5346 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5347 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5348 			"Interrupt Cause Tx Queue Min Thresh Count");
5349 
5350 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5351 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5352 			"Interrupt Cause Rx Desc Min Thresh Count");
5353 
5354 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5355 			CTLFLAG_RD, &adapter->stats.icrxoc,
5356 			"Interrupt Cause Receiver Overrun Count");
5357 }
5358 
5359 /**********************************************************************
5360  *
5361  *  This routine provides a way to dump out the adapter eeprom,
5362  *  often a useful debug/service tool. This only dumps the first
5363  *  32 words, stuff that matters is in that extent.
5364  *
5365  **********************************************************************/
5366 static int
5367 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5368 {
5369 	struct adapter *adapter;
5370 	int error;
5371 	int result;
5372 
5373 	result = -1;
5374 	error = sysctl_handle_int(oidp, &result, 0, req);
5375 
5376 	if (error || !req->newptr)
5377 		return (error);
5378 
5379 	/*
5380 	 * This value will cause a hex dump of the
5381 	 * first 32 16-bit words of the EEPROM to
5382 	 * the screen.
5383 	 */
5384 	if (result == 1) {
5385 		adapter = (struct adapter *)arg1;
5386 		em_print_nvm_info(adapter);
5387         }
5388 
5389 	return (error);
5390 }
5391 
5392 static void
5393 em_print_nvm_info(struct adapter *adapter)
5394 {
5395 	u16	eeprom_data;
5396 	int	i, j, row = 0;
5397 
5398 	/* Its a bit crude, but it gets the job done */
5399 	printf("\nInterface EEPROM Dump:\n");
5400 	printf("Offset\n0x0000  ");
5401 	for (i = 0, j = 0; i < 32; i++, j++) {
5402 		if (j == 8) { /* Make the offset block */
5403 			j = 0; ++row;
5404 			printf("\n0x00%x0  ",row);
5405 		}
5406 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5407 		printf("%04x ", eeprom_data);
5408 	}
5409 	printf("\n");
5410 }
5411 
5412 static int
5413 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5414 {
5415 	struct em_int_delay_info *info;
5416 	struct adapter *adapter;
5417 	u32 regval;
5418 	int error, usecs, ticks;
5419 
5420 	info = (struct em_int_delay_info *)arg1;
5421 	usecs = info->value;
5422 	error = sysctl_handle_int(oidp, &usecs, 0, req);
5423 	if (error != 0 || req->newptr == NULL)
5424 		return (error);
5425 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5426 		return (EINVAL);
5427 	info->value = usecs;
5428 	ticks = EM_USECS_TO_TICKS(usecs);
5429 
5430 	adapter = info->adapter;
5431 
5432 	EM_CORE_LOCK(adapter);
5433 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5434 	regval = (regval & ~0xffff) | (ticks & 0xffff);
5435 	/* Handle a few special cases. */
5436 	switch (info->offset) {
5437 	case E1000_RDTR:
5438 		break;
5439 	case E1000_TIDV:
5440 		if (ticks == 0) {
5441 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5442 			/* Don't write 0 into the TIDV register. */
5443 			regval++;
5444 		} else
5445 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5446 		break;
5447 	}
5448 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5449 	EM_CORE_UNLOCK(adapter);
5450 	return (0);
5451 }
5452 
5453 static void
5454 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5455 	const char *description, struct em_int_delay_info *info,
5456 	int offset, int value)
5457 {
5458 	info->adapter = adapter;
5459 	info->offset = offset;
5460 	info->value = value;
5461 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5462 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5463 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5464 	    info, 0, em_sysctl_int_delay, "I", description);
5465 }
5466 
5467 static void
5468 em_set_sysctl_value(struct adapter *adapter, const char *name,
5469 	const char *description, int *limit, int value)
5470 {
5471 	*limit = value;
5472 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5473 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5474 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5475 }
5476 
5477 static int
5478 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5479 {
5480 	struct adapter *adapter;
5481 	int error;
5482 	int result;
5483 
5484 	result = -1;
5485 	error = sysctl_handle_int(oidp, &result, 0, req);
5486 
5487 	if (error || !req->newptr)
5488 		return (error);
5489 
5490 	if (result == 1) {
5491 		adapter = (struct adapter *)arg1;
5492 		em_print_debug_info(adapter);
5493         }
5494 
5495 	return (error);
5496 }
5497 
5498 /*
5499 ** This routine is meant to be fluid, add whatever is
5500 ** needed for debugging a problem.  -jfv
5501 */
5502 static void
5503 em_print_debug_info(struct adapter *adapter)
5504 {
5505 	device_t dev = adapter->dev;
5506 	struct tx_ring *txr = adapter->tx_rings;
5507 	struct rx_ring *rxr = adapter->rx_rings;
5508 
5509 	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5510 		printf("Interface is RUNNING ");
5511 	else
5512 		printf("Interface is NOT RUNNING\n");
5513 	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5514 		printf("and ACTIVE\n");
5515 	else
5516 		printf("and INACTIVE\n");
5517 
5518 	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5519 	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5520 	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5521 	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5522 	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5523 	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5524 	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5525 	device_printf(dev, "TX descriptors avail = %d\n",
5526 	    txr->tx_avail);
5527 	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5528 	    txr->no_desc_avail);
5529 	device_printf(dev, "RX discarded packets = %ld\n",
5530 	    rxr->rx_discarded);
5531 	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5532 	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5533 }
5534