xref: /openbsd/sys/dev/pci/if_em.c (revision d415bd75)
1 /**************************************************************************
2 
3 Copyright (c) 2001-2003, Intel Corporation
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Intel Corporation nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 /* $OpenBSD: if_em.c,v 1.367 2023/11/10 15:51:20 bluhm Exp $ */
35 /* $FreeBSD: if_em.c,v 1.46 2004/09/29 18:28:28 mlaier Exp $ */
36 
37 #include <dev/pci/if_em.h>
38 #include <dev/pci/if_em_soc.h>
39 
40 #include <netinet/ip6.h>
41 
42 /*********************************************************************
43  *  Driver version
44  *********************************************************************/
45 
46 #define EM_DRIVER_VERSION	"6.2.9"
47 
48 /*********************************************************************
49  *  PCI Device ID Table
50  *********************************************************************/
51 const struct pci_matchid em_devices[] = {
52 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_DPT },
53 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_DPT },
54 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_CPR_SPT },
55 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_80003ES2LAN_SDS_SPT },
56 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM },
57 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EM_LOM },
58 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP },
59 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LOM },
60 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82540EP_LP },
61 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI },
62 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541EI_MOBILE },
63 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER },
64 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541ER_LOM },
65 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI },
66 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_LF },
67 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82541GI_MOBILE },
68 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82542 },
69 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_COPPER },
70 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82543GC_FIBER },
71 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_COPPER },
72 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544EI_FIBER },
73 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_COPPER },
74 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82544GC_LOM },
75 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_COPPER },
76 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545EM_FIBER },
77 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_COPPER },
78 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_FIBER },
79 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82545GM_SERDES },
80 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_COPPER },
81 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_FIBER },
82 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546EB_QUAD_CPR },
83 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_COPPER },
84 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_FIBER },
85 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_PCIE },
86 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR },
87 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_QUAD_CPR_K },
88 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_SERDES },
89 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82546GB_2 },
90 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI },
91 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547EI_MOBILE },
92 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82547GI },
93 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AF },
94 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_AT },
95 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_COPPER },
96 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_FIBER },
97 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR },
98 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_CPR_LP },
99 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_QUAD_FBR },
100 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SERDES },
101 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_DUAL },
102 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571EB_SDS_QUAD },
103 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82571PT_QUAD_CPR },
104 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_COPPER },
105 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_FIBER },
106 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI_SERDES },
107 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82572EI },
108 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E },
109 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_IAMT },
110 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573E_PM },
111 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L },
112 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_1 },
113 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573L_PL_2 },
114 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82573V_PM },
115 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574L },
116 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82574LA },
117 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_COPPER },
118 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575EB_SERDES },
119 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QUAD_CPR },
120 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82575GB_QP_PM },
121 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576 },
122 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_FIBER },
123 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES },
124 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_COPPER },
125 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_QUAD_CU_ET2 },
126 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS },
127 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_NS_SERDES },
128 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82576_SERDES_QUAD },
129 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LC },
130 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82577LM },
131 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DC },
132 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82578DM },
133 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579LM },
134 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82579V },
135 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER },
136 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_OEM1 },
137 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_IT },
138 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_FIBER },
139 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES },
140 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SGMII },
141 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_COPPER_NF },
142 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I210_SERDES_NF },
143 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I211_COPPER },
144 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_LM },
145 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I217_V },
146 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM },
147 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_2 },
148 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_LM_3 },
149 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V },
150 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_2 },
151 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I218_V_3 },
152 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM },
153 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM2 },
154 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM3 },
155 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM4 },
156 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM5 },
157 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM6 },
158 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM7 },
159 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM8 },
160 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM9 },
161 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM10 },
162 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM11 },
163 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM12 },
164 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM13 },
165 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM14 },
166 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM15 },
167 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM16 },
168 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM17 },
169 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM18 },
170 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_LM19 },
171 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V },
172 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V2 },
173 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V4 },
174 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V5 },
175 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V6 },
176 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V7 },
177 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V8 },
178 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V9 },
179 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V10 },
180 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V11 },
181 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V12 },
182 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V13 },
183 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V14 },
184 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V15 },
185 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V16 },
186 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V17 },
187 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V18 },
188 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I219_V19 },
189 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER },
190 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_FIBER },
191 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SERDES },
192 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_SGMII },
193 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_COPPER_DUAL },
194 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82580_QUAD_FIBER },
195 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SGMII },
196 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SERDES },
197 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_BPLANE },
198 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DH89XXCC_SFP },
199 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82583V },
200 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_COPPER },
201 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_FIBER },
202 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SERDES },
203 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I350_SGMII },
204 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_1GBPS },
205 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_BP_2_5GBPS },
206 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I354_SGMII },
207 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_82567V_3 },
208 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE },
209 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_G },
210 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IFE_GT },
211 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_AMT },
212 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_C },
213 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M },
214 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH8_IGP_M_AMT },
215 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_BM },
216 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE },
217 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_G },
218 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IFE_GT },
219 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_AMT },
220 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_C },
221 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M },
222 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_AMT },
223 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH9_IGP_M_V },
224 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LF },
225 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_LM },
226 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_D_BM_V },
227 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LF },
228 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_LM },
229 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_ICH10_R_BM_V },
230 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_1 },
231 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_2 },
232 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_3 },
233 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_4 },
234 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_5 },
235 	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_EP80579_LAN_6 }
236 };
237 
238 /*********************************************************************
239  *  Function prototypes
240  *********************************************************************/
241 int  em_probe(struct device *, void *, void *);
242 void em_attach(struct device *, struct device *, void *);
243 void em_defer_attach(struct device*);
244 int  em_detach(struct device *, int);
245 int  em_activate(struct device *, int);
246 int  em_intr(void *);
247 int  em_allocate_legacy(struct em_softc *);
248 void em_start(struct ifqueue *);
249 int  em_ioctl(struct ifnet *, u_long, caddr_t);
250 void em_watchdog(struct ifnet *);
251 void em_init(void *);
252 void em_stop(void *, int);
253 void em_media_status(struct ifnet *, struct ifmediareq *);
254 int  em_media_change(struct ifnet *);
255 uint64_t  em_flowstatus(struct em_softc *);
256 void em_identify_hardware(struct em_softc *);
257 int  em_allocate_pci_resources(struct em_softc *);
258 void em_free_pci_resources(struct em_softc *);
259 void em_local_timer(void *);
260 int  em_hardware_init(struct em_softc *);
261 void em_setup_interface(struct em_softc *);
262 int  em_setup_transmit_structures(struct em_softc *);
263 void em_initialize_transmit_unit(struct em_softc *);
264 int  em_setup_receive_structures(struct em_softc *);
265 void em_initialize_receive_unit(struct em_softc *);
266 void em_enable_intr(struct em_softc *);
267 void em_disable_intr(struct em_softc *);
268 void em_free_transmit_structures(struct em_softc *);
269 void em_free_receive_structures(struct em_softc *);
270 void em_update_stats_counters(struct em_softc *);
271 void em_disable_aspm(struct em_softc *);
272 void em_txeof(struct em_queue *);
273 int  em_allocate_receive_structures(struct em_softc *);
274 int  em_allocate_transmit_structures(struct em_softc *);
275 int  em_allocate_desc_rings(struct em_softc *);
276 int  em_rxfill(struct em_queue *);
277 void em_rxrefill(void *);
278 int  em_rxeof(struct em_queue *);
279 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
280 			 struct mbuf *);
281 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
282 	    u_int32_t *, u_int32_t *);
283 u_int	em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
284 	    u_int32_t *);
285 void em_iff(struct em_softc *);
286 void em_update_link_status(struct em_softc *);
287 int  em_get_buf(struct em_queue *, int);
288 void em_enable_hw_vlans(struct em_softc *);
289 u_int em_encap(struct em_queue *, struct mbuf *);
290 void em_smartspeed(struct em_softc *);
291 int  em_82547_fifo_workaround(struct em_softc *, int);
292 void em_82547_update_fifo_head(struct em_softc *, int);
293 int  em_82547_tx_fifo_reset(struct em_softc *);
294 void em_82547_move_tail(void *arg);
295 void em_82547_move_tail_locked(struct em_softc *);
296 int  em_dma_malloc(struct em_softc *, bus_size_t, struct em_dma_alloc *);
297 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
298 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
299 			      PDESC_ARRAY desc_array);
300 void em_flush_tx_ring(struct em_queue *);
301 void em_flush_rx_ring(struct em_queue *);
302 void em_flush_desc_rings(struct em_softc *);
303 int em_get_sffpage(struct em_softc *, struct if_sffpage *);
304 
305 #ifndef SMALL_KERNEL
306 /* MSIX/Multiqueue functions */
307 int  em_allocate_msix(struct em_softc *);
308 int  em_setup_queues_msix(struct em_softc *);
309 int  em_queue_intr_msix(void *);
310 int  em_link_intr_msix(void *);
311 void em_enable_queue_intr_msix(struct em_queue *);
312 #else
313 #define em_allocate_msix(_sc) 	(-1)
314 #endif
315 
316 #if NKSTAT > 0
317 void	em_kstat_attach(struct em_softc *);
318 int	em_kstat_read(struct kstat *);
319 void	em_tbi_adjust_stats(struct em_softc *, uint32_t, uint8_t *);
320 #endif
321 
322 /*********************************************************************
323  *  OpenBSD Device Interface Entry Points
324  *********************************************************************/
325 
326 const struct cfattach em_ca = {
327 	sizeof(struct em_softc), em_probe, em_attach, em_detach,
328 	em_activate
329 };
330 
331 struct cfdriver em_cd = {
332 	NULL, "em", DV_IFNET
333 };
334 
335 static int em_smart_pwr_down = FALSE;
336 int em_enable_msix = 0;
337 
338 /*********************************************************************
339  *  Device identification routine
340  *
341  *  em_probe determines if the driver should be loaded on
342  *  adapter based on PCI vendor/device id of the adapter.
343  *
344  *  return 0 on no match, positive on match
345  *********************************************************************/
346 
347 int
348 em_probe(struct device *parent, void *match, void *aux)
349 {
350 	INIT_DEBUGOUT("em_probe: begin");
351 
352 	return (pci_matchbyid((struct pci_attach_args *)aux, em_devices,
353 	    nitems(em_devices)));
354 }
355 
356 void
357 em_defer_attach(struct device *self)
358 {
359 	struct em_softc *sc = (struct em_softc *)self;
360 	struct pci_attach_args *pa = &sc->osdep.em_pa;
361 	pci_chipset_tag_t	pc = pa->pa_pc;
362 	void *gcu;
363 
364 	INIT_DEBUGOUT("em_defer_attach: begin");
365 
366 	if ((gcu = em_lookup_gcu(self)) == 0) {
367 		printf("%s: No GCU found, deferred attachment failed\n",
368 		    DEVNAME(sc));
369 
370 		if (sc->sc_intrhand)
371 			pci_intr_disestablish(pc, sc->sc_intrhand);
372 		sc->sc_intrhand = 0;
373 
374 		em_stop(sc, 1);
375 
376 		em_free_pci_resources(sc);
377 
378 		return;
379 	}
380 
381 	sc->hw.gcu = gcu;
382 
383 	em_attach_miibus(self);
384 
385 	em_setup_interface(sc);
386 
387 	em_setup_link(&sc->hw);
388 
389 	em_update_link_status(sc);
390 }
391 
392 /*********************************************************************
393  *  Device initialization routine
394  *
395  *  The attach entry point is called when the driver is being loaded.
396  *  This routine identifies the type of hardware, allocates all resources
397  *  and initializes the hardware.
398  *
399  *********************************************************************/
400 
401 void
402 em_attach(struct device *parent, struct device *self, void *aux)
403 {
404 	struct pci_attach_args *pa = aux;
405 	struct em_softc *sc;
406 	int defer = 0;
407 
408 	INIT_DEBUGOUT("em_attach: begin");
409 
410 	sc = (struct em_softc *)self;
411 	sc->sc_dmat = pa->pa_dmat;
412 	sc->osdep.em_pa = *pa;
413 
414 	timeout_set(&sc->timer_handle, em_local_timer, sc);
415 	timeout_set(&sc->tx_fifo_timer_handle, em_82547_move_tail, sc);
416 
417 	rw_init(&sc->sfflock, "emsff");
418 
419 	/* Determine hardware revision */
420 	em_identify_hardware(sc);
421 
422 	/*
423 	 * Only use MSI on the newer PCIe parts, with the exception
424 	 * of 82571/82572 due to "Byte Enables 2 and 3 Are Not Set" errata
425 	 */
426 	if (sc->hw.mac_type <= em_82572)
427 		sc->osdep.em_pa.pa_flags &= ~PCI_FLAGS_MSI_ENABLED;
428 
429 	/* Parameters (to be read from user) */
430 	if (sc->hw.mac_type >= em_82544) {
431 		sc->sc_tx_slots = EM_MAX_TXD;
432 		sc->sc_rx_slots = EM_MAX_RXD;
433 	} else {
434 		sc->sc_tx_slots = EM_MAX_TXD_82543;
435 		sc->sc_rx_slots = EM_MAX_RXD_82543;
436 	}
437 	sc->tx_int_delay = EM_TIDV;
438 	sc->tx_abs_int_delay = EM_TADV;
439 	sc->rx_int_delay = EM_RDTR;
440 	sc->rx_abs_int_delay = EM_RADV;
441 	sc->hw.autoneg = DO_AUTO_NEG;
442 	sc->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
443 	sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
444 	sc->hw.tbi_compatibility_en = TRUE;
445 	sc->sc_rx_buffer_len = EM_RXBUFFER_2048;
446 
447 	sc->hw.phy_init_script = 1;
448 	sc->hw.phy_reset_disable = FALSE;
449 
450 #ifndef EM_MASTER_SLAVE
451 	sc->hw.master_slave = em_ms_hw_default;
452 #else
453 	sc->hw.master_slave = EM_MASTER_SLAVE;
454 #endif
455 
456 	/*
457 	 * This controls when hardware reports transmit completion
458 	 * status.
459 	 */
460 	sc->hw.report_tx_early = 1;
461 
462 	if (em_allocate_pci_resources(sc))
463 		goto err_pci;
464 
465 	/* Initialize eeprom parameters */
466 	em_init_eeprom_params(&sc->hw);
467 
468 	/*
469 	 * Set the max frame size assuming standard Ethernet
470 	 * sized frames.
471 	 */
472 	switch (sc->hw.mac_type) {
473 		case em_82573:
474 		{
475 			uint16_t	eeprom_data = 0;
476 
477 			/*
478 			 * 82573 only supports Jumbo frames
479 			 * if ASPM is disabled.
480 			 */
481 			em_read_eeprom(&sc->hw, EEPROM_INIT_3GIO_3,
482 			    1, &eeprom_data);
483 			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
484 				sc->hw.max_frame_size = ETHER_MAX_LEN;
485 				break;
486 			}
487 			/* Allow Jumbo frames */
488 			/* FALLTHROUGH */
489 		}
490 		case em_82571:
491 		case em_82572:
492 		case em_82574:
493 		case em_82575:
494 		case em_82576:
495 		case em_82580:
496 		case em_i210:
497 		case em_i350:
498 		case em_ich9lan:
499 		case em_ich10lan:
500 		case em_pch2lan:
501 		case em_pch_lpt:
502 		case em_pch_spt:
503 		case em_pch_cnp:
504 		case em_pch_tgp:
505 		case em_pch_adp:
506 		case em_80003es2lan:
507 			/* 9K Jumbo Frame size */
508 			sc->hw.max_frame_size = 9234;
509 			break;
510 		case em_pchlan:
511 			sc->hw.max_frame_size = 4096;
512 			break;
513 		case em_82542_rev2_0:
514 		case em_82542_rev2_1:
515 		case em_ich8lan:
516 			/* Adapters that do not support Jumbo frames */
517 			sc->hw.max_frame_size = ETHER_MAX_LEN;
518 			break;
519 		default:
520 			sc->hw.max_frame_size =
521 			    MAX_JUMBO_FRAME_SIZE;
522 	}
523 
524 	sc->hw.min_frame_size =
525 	    ETHER_MIN_LEN + ETHER_CRC_LEN;
526 
527 	if (em_allocate_desc_rings(sc) != 0) {
528 		printf("%s: Unable to allocate descriptor ring memory\n",
529 		    DEVNAME(sc));
530 		goto err_pci;
531 	}
532 
533 	/* Initialize the hardware */
534 	if ((defer = em_hardware_init(sc))) {
535 		if (defer == EAGAIN)
536 			config_defer(self, em_defer_attach);
537 		else {
538 			printf("%s: Unable to initialize the hardware\n",
539 			    DEVNAME(sc));
540 			goto err_pci;
541 		}
542 	}
543 
544 	if (sc->hw.mac_type == em_80003es2lan || sc->hw.mac_type == em_82575 ||
545 	    sc->hw.mac_type == em_82576 ||
546 	    sc->hw.mac_type == em_82580 || sc->hw.mac_type == em_i210 ||
547 	    sc->hw.mac_type == em_i350) {
548 		uint32_t reg = EM_READ_REG(&sc->hw, E1000_STATUS);
549 		sc->hw.bus_func = (reg & E1000_STATUS_FUNC_MASK) >>
550 		    E1000_STATUS_FUNC_SHIFT;
551 
552 		switch (sc->hw.bus_func) {
553 		case 0:
554 			sc->hw.swfw = E1000_SWFW_PHY0_SM;
555 			break;
556 		case 1:
557 			sc->hw.swfw = E1000_SWFW_PHY1_SM;
558 			break;
559 		case 2:
560 			sc->hw.swfw = E1000_SWFW_PHY2_SM;
561 			break;
562 		case 3:
563 			sc->hw.swfw = E1000_SWFW_PHY3_SM;
564 			break;
565 		}
566 	} else {
567 		sc->hw.bus_func = 0;
568 	}
569 
570 	/* Copy the permanent MAC address out of the EEPROM */
571 	if (em_read_mac_addr(&sc->hw) < 0) {
572 		printf("%s: EEPROM read error while reading mac address\n",
573 		       DEVNAME(sc));
574 		goto err_pci;
575 	}
576 
577 	bcopy(sc->hw.mac_addr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
578 
579 	/* Setup OS specific network interface */
580 	if (!defer)
581 		em_setup_interface(sc);
582 
583 	/* Initialize statistics */
584 	em_clear_hw_cntrs(&sc->hw);
585 #if NKSTAT > 0
586 	em_kstat_attach(sc);
587 #endif
588 	sc->hw.get_link_status = 1;
589 	if (!defer)
590 		em_update_link_status(sc);
591 
592 #ifdef EM_DEBUG
593 	printf(", mac %#x phy %#x", sc->hw.mac_type, sc->hw.phy_type);
594 #endif
595 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
596 
597 	/* Indicate SOL/IDER usage */
598 	if (em_check_phy_reset_block(&sc->hw))
599 		printf("%s: PHY reset is blocked due to SOL/IDER session.\n",
600 		    DEVNAME(sc));
601 
602 	/* Identify 82544 on PCI-X */
603 	em_get_bus_info(&sc->hw);
604 	if (sc->hw.bus_type == em_bus_type_pcix &&
605 	    sc->hw.mac_type == em_82544)
606 		sc->pcix_82544 = TRUE;
607         else
608 		sc->pcix_82544 = FALSE;
609 
610 	sc->hw.icp_xxxx_is_link_up = FALSE;
611 
612 	INIT_DEBUGOUT("em_attach: end");
613 	return;
614 
615 err_pci:
616 	em_free_pci_resources(sc);
617 }
618 
619 /*********************************************************************
620  *  Transmit entry point
621  *
622  *  em_start is called by the stack to initiate a transmit.
623  *  The driver will remain in this routine as long as there are
624  *  packets to transmit and transmit resources are available.
625  *  In case resources are not available stack is notified and
626  *  the packet is requeued.
627  **********************************************************************/
628 
629 void
630 em_start(struct ifqueue *ifq)
631 {
632 	struct ifnet *ifp = ifq->ifq_if;
633 	struct em_softc *sc = ifp->if_softc;
634 	u_int head, free, used;
635 	struct mbuf *m;
636 	int post = 0;
637 	struct em_queue *que = sc->queues; /* Use only first queue. */
638 
639 	if (!sc->link_active) {
640 		ifq_purge(ifq);
641 		return;
642 	}
643 
644 	/* calculate free space */
645 	head = que->tx.sc_tx_desc_head;
646 	free = que->tx.sc_tx_desc_tail;
647 	if (free <= head)
648 		free += sc->sc_tx_slots;
649 	free -= head;
650 
651 	if (sc->hw.mac_type != em_82547) {
652 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
653 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
654 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
655 	}
656 
657 	for (;;) {
658 		/* use 2 because cksum setup can use an extra slot */
659 		if (EM_MAX_SCATTER + 2 > free) {
660 			ifq_set_oactive(ifq);
661 			break;
662 		}
663 
664 		m = ifq_dequeue(ifq);
665 		if (m == NULL)
666 			break;
667 
668 		used = em_encap(que, m);
669 		if (used == 0) {
670 			m_freem(m);
671 			continue;
672 		}
673 
674 		KASSERT(used <= free);
675 
676 		free -= used;
677 
678 #if NBPFILTER > 0
679 		/* Send a copy of the frame to the BPF listener */
680 		if (ifp->if_bpf)
681 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
682 #endif
683 
684 		/* Set timeout in case hardware has problems transmitting */
685 		ifp->if_timer = EM_TX_TIMEOUT;
686 
687 		if (sc->hw.mac_type == em_82547) {
688 			int len = m->m_pkthdr.len;
689 
690 			if (sc->link_duplex == HALF_DUPLEX)
691 				em_82547_move_tail_locked(sc);
692 			else {
693 				E1000_WRITE_REG(&sc->hw, TDT(que->me),
694 				    que->tx.sc_tx_desc_head);
695 				em_82547_update_fifo_head(sc, len);
696 			}
697 		}
698 
699 		post = 1;
700 	}
701 
702 	if (sc->hw.mac_type != em_82547) {
703 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
704 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
705 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
706 		/*
707 		 * Advance the Transmit Descriptor Tail (Tdt),
708 		 * this tells the E1000 that this frame is
709 		 * available to transmit.
710 		 */
711 		if (post)
712 			E1000_WRITE_REG(&sc->hw, TDT(que->me),
713 			    que->tx.sc_tx_desc_head);
714 	}
715 }
716 
717 /*********************************************************************
718  *  Ioctl entry point
719  *
720  *  em_ioctl is called when the user wants to configure the
721  *  interface.
722  *
723  *  return 0 on success, positive on failure
724  **********************************************************************/
725 
726 int
727 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
728 {
729 	int		error = 0;
730 	struct ifreq   *ifr = (struct ifreq *) data;
731 	struct em_softc *sc = ifp->if_softc;
732 	int s;
733 
734 	s = splnet();
735 
736 	switch (command) {
737 	case SIOCSIFADDR:
738 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface "
739 			       "Addr)");
740 		if (!(ifp->if_flags & IFF_UP)) {
741 			ifp->if_flags |= IFF_UP;
742 			em_init(sc);
743 		}
744 		break;
745 
746 	case SIOCSIFFLAGS:
747 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
748 		if (ifp->if_flags & IFF_UP) {
749 			if (ifp->if_flags & IFF_RUNNING)
750 				error = ENETRESET;
751 			else
752 				em_init(sc);
753 		} else {
754 			if (ifp->if_flags & IFF_RUNNING)
755 				em_stop(sc, 0);
756 		}
757 		break;
758 
759 	case SIOCSIFMEDIA:
760 		/* Check SOL/IDER usage */
761 		if (em_check_phy_reset_block(&sc->hw)) {
762 			printf("%s: Media change is blocked due to SOL/IDER session.\n",
763 			    DEVNAME(sc));
764 			break;
765 		}
766 	case SIOCGIFMEDIA:
767 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
768 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
769 		break;
770 
771 	case SIOCGIFRXR:
772 		error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
773 		    NULL, EM_MCLBYTES, &sc->queues->rx.sc_rx_ring);
774 		break;
775 
776 	case SIOCGIFSFFPAGE:
777 		error = rw_enter(&sc->sfflock, RW_WRITE|RW_INTR);
778 		if (error != 0)
779 			break;
780 
781 		error = em_get_sffpage(sc, (struct if_sffpage *)data);
782 		rw_exit(&sc->sfflock);
783 		break;
784 
785 	default:
786 		error = ether_ioctl(ifp, &sc->sc_ac, command, data);
787 	}
788 
789 	if (error == ENETRESET) {
790 		if (ifp->if_flags & IFF_RUNNING) {
791 			em_disable_intr(sc);
792 			em_iff(sc);
793 			if (sc->hw.mac_type == em_82542_rev2_0)
794 				em_initialize_receive_unit(sc);
795 			em_enable_intr(sc);
796 		}
797 		error = 0;
798 	}
799 
800 	splx(s);
801 	return (error);
802 }
803 
804 /*********************************************************************
805  *  Watchdog entry point
806  *
807  *  This routine is called whenever hardware quits transmitting.
808  *
809  **********************************************************************/
810 
811 void
812 em_watchdog(struct ifnet *ifp)
813 {
814 	struct em_softc *sc = ifp->if_softc;
815 	struct em_queue *que = sc->queues; /* Use only first queue. */
816 
817 
818 	/* If we are in this routine because of pause frames, then
819 	 * don't reset the hardware.
820 	 */
821 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_TXOFF) {
822 		ifp->if_timer = EM_TX_TIMEOUT;
823 		return;
824 	}
825 	printf("%s: watchdog: head %u tail %u TDH %u TDT %u\n",
826 	    DEVNAME(sc),
827 	    que->tx.sc_tx_desc_head, que->tx.sc_tx_desc_tail,
828 	    E1000_READ_REG(&sc->hw, TDH(que->me)),
829 	    E1000_READ_REG(&sc->hw, TDT(que->me)));
830 
831 	em_init(sc);
832 
833 	sc->watchdog_events++;
834 }
835 
836 /*********************************************************************
837  *  Init entry point
838  *
839  *  This routine is used in two ways. It is used by the stack as
840  *  init entry point in network interface structure. It is also used
841  *  by the driver as a hw/sw initialization routine to get to a
842  *  consistent state.
843  *
844  **********************************************************************/
845 
846 void
847 em_init(void *arg)
848 {
849 	struct em_softc *sc = arg;
850 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
851 	uint32_t	pba;
852 	int s;
853 
854 	s = splnet();
855 
856 	INIT_DEBUGOUT("em_init: begin");
857 
858 	em_stop(sc, 0);
859 
860 	/*
861 	 * Packet Buffer Allocation (PBA)
862 	 * Writing PBA sets the receive portion of the buffer
863 	 * the remainder is used for the transmit buffer.
864 	 *
865 	 * Devices before the 82547 had a Packet Buffer of 64K.
866 	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
867 	 * After the 82547 the buffer was reduced to 40K.
868 	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
869 	 *   Note: default does not leave enough room for Jumbo Frame >10k.
870 	 */
871 	switch (sc->hw.mac_type) {
872 	case em_82547:
873 	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
874 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
875 			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
876 		else
877 			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
878 		sc->tx_fifo_head = 0;
879 		sc->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
880 		sc->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
881 		break;
882 	case em_82571:
883 	case em_82572: /* Total Packet Buffer on these is 48k */
884 	case em_82575:
885 	case em_82576:
886 	case em_82580:
887 	case em_80003es2lan:
888 	case em_i350:
889 		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
890 		break;
891 	case em_i210:
892 		pba = E1000_PBA_34K;
893 		break;
894 	case em_82573: /* 82573: Total Packet Buffer is 32K */
895 		/* Jumbo frames not supported */
896 		pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
897 		break;
898 	case em_82574: /* Total Packet Buffer is 40k */
899 		pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
900 		break;
901 	case em_ich8lan:
902 		pba = E1000_PBA_8K;
903 		break;
904 	case em_ich9lan:
905 	case em_ich10lan:
906 		/* Boost Receive side for jumbo frames */
907 		if (sc->hw.max_frame_size > EM_RXBUFFER_4096)
908 			pba = E1000_PBA_14K;
909 		else
910 			pba = E1000_PBA_10K;
911 		break;
912 	case em_pchlan:
913 	case em_pch2lan:
914 	case em_pch_lpt:
915 	case em_pch_spt:
916 	case em_pch_cnp:
917 	case em_pch_tgp:
918 	case em_pch_adp:
919 		pba = E1000_PBA_26K;
920 		break;
921 	default:
922 		/* Devices before 82547 had a Packet Buffer of 64K.   */
923 		if (sc->hw.max_frame_size > EM_RXBUFFER_8192)
924 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
925 		else
926 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
927 	}
928 	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
929 	E1000_WRITE_REG(&sc->hw, PBA, pba);
930 
931 	/* Get the latest mac address, User can use a LAA */
932 	bcopy(sc->sc_ac.ac_enaddr, sc->hw.mac_addr, ETHER_ADDR_LEN);
933 
934 	/* Initialize the hardware */
935 	if (em_hardware_init(sc)) {
936 		printf("%s: Unable to initialize the hardware\n",
937 		       DEVNAME(sc));
938 		splx(s);
939 		return;
940 	}
941 	em_update_link_status(sc);
942 
943 	E1000_WRITE_REG(&sc->hw, VET, ETHERTYPE_VLAN);
944 	if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING)
945 		em_enable_hw_vlans(sc);
946 
947 	/* Prepare transmit descriptors and buffers */
948 	if (em_setup_transmit_structures(sc)) {
949 		printf("%s: Could not setup transmit structures\n",
950 		       DEVNAME(sc));
951 		em_stop(sc, 0);
952 		splx(s);
953 		return;
954 	}
955 	em_initialize_transmit_unit(sc);
956 
957 	/* Prepare receive descriptors and buffers */
958 	if (em_setup_receive_structures(sc)) {
959 		printf("%s: Could not setup receive structures\n",
960 		       DEVNAME(sc));
961 		em_stop(sc, 0);
962 		splx(s);
963 		return;
964 	}
965 	em_initialize_receive_unit(sc);
966 
967 #ifndef SMALL_KERNEL
968 	if (sc->msix) {
969 		if (em_setup_queues_msix(sc)) {
970 			printf("%s: Can't setup msix queues\n", DEVNAME(sc));
971 			splx(s);
972 			return;
973 		}
974 	}
975 #endif
976 
977 	/* Program promiscuous mode and multicast filters. */
978 	em_iff(sc);
979 
980 	ifp->if_flags |= IFF_RUNNING;
981 	ifq_clr_oactive(&ifp->if_snd);
982 
983 	timeout_add_sec(&sc->timer_handle, 1);
984 	em_clear_hw_cntrs(&sc->hw);
985 	em_enable_intr(sc);
986 
987 	/* Don't reset the phy next time init gets called */
988 	sc->hw.phy_reset_disable = TRUE;
989 
990 	splx(s);
991 }
992 
993 /*********************************************************************
994  *
995  *  Interrupt Service routine
996  *
997  **********************************************************************/
998 int
999 em_intr(void *arg)
1000 {
1001 	struct em_softc	*sc = arg;
1002 	struct em_queue *que = sc->queues; /* single queue */
1003 	struct ifnet	*ifp = &sc->sc_ac.ac_if;
1004 	u_int32_t	reg_icr, test_icr;
1005 
1006 	test_icr = reg_icr = E1000_READ_REG(&sc->hw, ICR);
1007 	if (sc->hw.mac_type >= em_82571)
1008 		test_icr = (reg_icr & E1000_ICR_INT_ASSERTED);
1009 	if (!test_icr)
1010 		return (0);
1011 
1012 	if (ifp->if_flags & IFF_RUNNING) {
1013 		em_txeof(que);
1014 		if (em_rxeof(que))
1015 			em_rxrefill(que);
1016 	}
1017 
1018 	/* Link status change */
1019 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1020 		KERNEL_LOCK();
1021 		sc->hw.get_link_status = 1;
1022 		em_check_for_link(&sc->hw);
1023 		em_update_link_status(sc);
1024 		KERNEL_UNLOCK();
1025 	}
1026 
1027 	return (1);
1028 }
1029 
1030 /*********************************************************************
1031  *
1032  *  Media Ioctl callback
1033  *
1034  *  This routine is called whenever the user queries the status of
1035  *  the interface using ifconfig.
1036  *
1037  **********************************************************************/
1038 void
1039 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1040 {
1041 	struct em_softc *sc = ifp->if_softc;
1042 	uint64_t fiber_type = IFM_1000_SX;
1043 	u_int16_t gsr;
1044 
1045 	INIT_DEBUGOUT("em_media_status: begin");
1046 
1047 	em_check_for_link(&sc->hw);
1048 	em_update_link_status(sc);
1049 
1050 	ifmr->ifm_status = IFM_AVALID;
1051 	ifmr->ifm_active = IFM_ETHER;
1052 
1053 	if (!sc->link_active) {
1054 		ifmr->ifm_active |= IFM_NONE;
1055 		return;
1056 	}
1057 
1058 	ifmr->ifm_status |= IFM_ACTIVE;
1059 
1060 	if (sc->hw.media_type == em_media_type_fiber ||
1061 	    sc->hw.media_type == em_media_type_internal_serdes) {
1062 		if (sc->hw.mac_type == em_82545)
1063 			fiber_type = IFM_1000_LX;
1064 		ifmr->ifm_active |= fiber_type | IFM_FDX;
1065 	} else {
1066 		switch (sc->link_speed) {
1067 		case 10:
1068 			ifmr->ifm_active |= IFM_10_T;
1069 			break;
1070 		case 100:
1071 			ifmr->ifm_active |= IFM_100_TX;
1072 			break;
1073 		case 1000:
1074 			ifmr->ifm_active |= IFM_1000_T;
1075 			break;
1076 		}
1077 
1078 		if (sc->link_duplex == FULL_DUPLEX)
1079 			ifmr->ifm_active |= em_flowstatus(sc) | IFM_FDX;
1080 		else
1081 			ifmr->ifm_active |= IFM_HDX;
1082 
1083 		if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_1000_T) {
1084 			em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &gsr);
1085 			if (gsr & SR_1000T_MS_CONFIG_RES)
1086 				ifmr->ifm_active |= IFM_ETH_MASTER;
1087 		}
1088 	}
1089 }
1090 
1091 /*********************************************************************
1092  *
1093  *  Media Ioctl callback
1094  *
1095  *  This routine is called when the user changes speed/duplex using
1096  *  media/mediopt option with ifconfig.
1097  *
1098  **********************************************************************/
1099 int
1100 em_media_change(struct ifnet *ifp)
1101 {
1102 	struct em_softc *sc = ifp->if_softc;
1103 	struct ifmedia	*ifm = &sc->media;
1104 
1105 	INIT_DEBUGOUT("em_media_change: begin");
1106 
1107 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1108 		return (EINVAL);
1109 
1110 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1111 	case IFM_AUTO:
1112 		sc->hw.autoneg = DO_AUTO_NEG;
1113 		sc->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1114 		break;
1115 	case IFM_1000_LX:
1116 	case IFM_1000_SX:
1117 	case IFM_1000_T:
1118 		sc->hw.autoneg = DO_AUTO_NEG;
1119 		sc->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1120 		break;
1121 	case IFM_100_TX:
1122 		sc->hw.autoneg = FALSE;
1123 		sc->hw.autoneg_advertised = 0;
1124 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1125 			sc->hw.forced_speed_duplex = em_100_full;
1126 		else
1127 			sc->hw.forced_speed_duplex = em_100_half;
1128 		break;
1129 	case IFM_10_T:
1130 		sc->hw.autoneg = FALSE;
1131 		sc->hw.autoneg_advertised = 0;
1132 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1133 			sc->hw.forced_speed_duplex = em_10_full;
1134 		else
1135 			sc->hw.forced_speed_duplex = em_10_half;
1136 		break;
1137 	default:
1138 		printf("%s: Unsupported media type\n", DEVNAME(sc));
1139 	}
1140 
1141 	/*
1142 	 * As the speed/duplex settings may have changed we need to
1143 	 * reset the PHY.
1144 	 */
1145 	sc->hw.phy_reset_disable = FALSE;
1146 
1147 	em_init(sc);
1148 
1149 	return (0);
1150 }
1151 
1152 uint64_t
1153 em_flowstatus(struct em_softc *sc)
1154 {
1155 	u_int16_t ar, lpar;
1156 
1157 	if (sc->hw.media_type == em_media_type_fiber ||
1158 	    sc->hw.media_type == em_media_type_internal_serdes)
1159 		return (0);
1160 
1161 	em_read_phy_reg(&sc->hw, PHY_AUTONEG_ADV, &ar);
1162 	em_read_phy_reg(&sc->hw, PHY_LP_ABILITY, &lpar);
1163 
1164 	if ((ar & NWAY_AR_PAUSE) && (lpar & NWAY_LPAR_PAUSE))
1165 		return (IFM_FLOW|IFM_ETH_TXPAUSE|IFM_ETH_RXPAUSE);
1166 	else if (!(ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1167 		(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1168 		return (IFM_FLOW|IFM_ETH_TXPAUSE);
1169 	else if ((ar & NWAY_AR_PAUSE) && (ar & NWAY_AR_ASM_DIR) &&
1170 		!(lpar & NWAY_LPAR_PAUSE) && (lpar & NWAY_LPAR_ASM_DIR))
1171 		return (IFM_FLOW|IFM_ETH_RXPAUSE);
1172 
1173 	return (0);
1174 }
1175 
1176 /*********************************************************************
1177  *
1178  *  This routine maps the mbufs to tx descriptors.
1179  *
1180  *  return 0 on success, positive on failure
1181  **********************************************************************/
1182 u_int
1183 em_encap(struct em_queue *que, struct mbuf *m)
1184 {
1185 	struct em_softc *sc = que->sc;
1186 	struct em_packet *pkt;
1187 	struct em_tx_desc *desc;
1188 	bus_dmamap_t map;
1189 	u_int32_t txd_upper, txd_lower;
1190 	u_int head, last, used = 0;
1191 	int i, j;
1192 
1193 	/* For 82544 Workaround */
1194 	DESC_ARRAY		desc_array;
1195 	u_int32_t		array_elements;
1196 
1197 	/* get a dmamap for this packet from the next free slot */
1198 	head = que->tx.sc_tx_desc_head;
1199 	pkt = &que->tx.sc_tx_pkts_ring[head];
1200 	map = pkt->pkt_map;
1201 
1202 	switch (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT)) {
1203 	case 0:
1204 		break;
1205 	case EFBIG:
1206 		if (m_defrag(m, M_DONTWAIT) == 0 &&
1207 		    bus_dmamap_load_mbuf(sc->sc_dmat, map, m,
1208 		     BUS_DMA_NOWAIT) == 0)
1209 			break;
1210 
1211 		/* FALLTHROUGH */
1212 	default:
1213 		sc->no_tx_dma_setup++;
1214 		return (0);
1215 	}
1216 
1217 	bus_dmamap_sync(sc->sc_dmat, map,
1218 	    0, map->dm_mapsize,
1219 	    BUS_DMASYNC_PREWRITE);
1220 
1221 	if (sc->hw.mac_type == em_82547) {
1222 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1223 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1224 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1225 	}
1226 
1227 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1228 		used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
1229 	} else if (sc->hw.mac_type >= em_82543) {
1230 		used += em_transmit_checksum_setup(que, m, head,
1231 		    &txd_upper, &txd_lower);
1232 	} else {
1233 		txd_upper = txd_lower = 0;
1234 	}
1235 
1236 	head += used;
1237 	if (head >= sc->sc_tx_slots)
1238 		head -= sc->sc_tx_slots;
1239 
1240 	for (i = 0; i < map->dm_nsegs; i++) {
1241 		/* If sc is 82544 and on PCI-X bus */
1242 		if (sc->pcix_82544) {
1243 			/*
1244 			 * Check the Address and Length combination and
1245 			 * split the data accordingly
1246 			 */
1247 			array_elements = em_fill_descriptors(
1248 			    map->dm_segs[i].ds_addr, map->dm_segs[i].ds_len,
1249 			    &desc_array);
1250 			for (j = 0; j < array_elements; j++) {
1251 				desc = &que->tx.sc_tx_desc_ring[head];
1252 
1253 				desc->buffer_addr = htole64(
1254 					desc_array.descriptor[j].address);
1255 				desc->lower.data = htole32(
1256 					(que->tx.sc_txd_cmd | txd_lower |
1257 					 (u_int16_t)desc_array.descriptor[j].length));
1258 				desc->upper.data = htole32(txd_upper);
1259 
1260 				last = head;
1261 				if (++head == sc->sc_tx_slots)
1262 					head = 0;
1263 
1264 				used++;
1265 			}
1266 		} else {
1267 			desc = &que->tx.sc_tx_desc_ring[head];
1268 
1269 			desc->buffer_addr = htole64(map->dm_segs[i].ds_addr);
1270 			desc->lower.data = htole32(que->tx.sc_txd_cmd |
1271 			    txd_lower | map->dm_segs[i].ds_len);
1272 			desc->upper.data = htole32(txd_upper);
1273 
1274 			last = head;
1275 			if (++head == sc->sc_tx_slots)
1276 	        		head = 0;
1277 
1278 			used++;
1279 		}
1280 	}
1281 
1282 #if NVLAN > 0
1283 	/* Find out if we are in VLAN mode */
1284 	if (m->m_flags & M_VLANTAG && (sc->hw.mac_type < em_82575 ||
1285 	    sc->hw.mac_type > em_i210)) {
1286 		/* Set the VLAN id */
1287 		desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
1288 
1289 		/* Tell hardware to add tag */
1290 		desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1291 	}
1292 #endif
1293 
1294 	/* mark the packet with the mbuf and last desc slot */
1295 	pkt->pkt_m = m;
1296 	pkt->pkt_eop = last;
1297 
1298 	que->tx.sc_tx_desc_head = head;
1299 
1300 	/*
1301 	 * Last Descriptor of Packet
1302 	 * needs End Of Packet (EOP)
1303 	 * and Report Status (RS)
1304 	 */
1305 	desc->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1306 
1307 	if (sc->hw.mac_type == em_82547) {
1308 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
1309 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
1310 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1311 	}
1312 
1313 	return (used);
1314 }
1315 
1316 /*********************************************************************
1317  *
1318  * 82547 workaround to avoid controller hang in half-duplex environment.
1319  * The workaround is to avoid queuing a large packet that would span
1320  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1321  * in this case. We do that only when FIFO is quiescent.
1322  *
1323  **********************************************************************/
1324 void
1325 em_82547_move_tail_locked(struct em_softc *sc)
1326 {
1327 	uint16_t hw_tdt;
1328 	uint16_t sw_tdt;
1329 	struct em_tx_desc *tx_desc;
1330 	uint16_t length = 0;
1331 	boolean_t eop = 0;
1332 	struct em_queue *que = sc->queues; /* single queue chip */
1333 
1334 	hw_tdt = E1000_READ_REG(&sc->hw, TDT(que->me));
1335 	sw_tdt = que->tx.sc_tx_desc_head;
1336 
1337 	while (hw_tdt != sw_tdt) {
1338 		tx_desc = &que->tx.sc_tx_desc_ring[hw_tdt];
1339 		length += tx_desc->lower.flags.length;
1340 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1341 		if (++hw_tdt == sc->sc_tx_slots)
1342 			hw_tdt = 0;
1343 
1344 		if (eop) {
1345 			if (em_82547_fifo_workaround(sc, length)) {
1346 				sc->tx_fifo_wrk_cnt++;
1347 				timeout_add(&sc->tx_fifo_timer_handle, 1);
1348 				break;
1349 			}
1350 			E1000_WRITE_REG(&sc->hw, TDT(que->me), hw_tdt);
1351 			em_82547_update_fifo_head(sc, length);
1352 			length = 0;
1353 		}
1354 	}
1355 }
1356 
1357 void
1358 em_82547_move_tail(void *arg)
1359 {
1360 	struct em_softc *sc = arg;
1361 	int s;
1362 
1363 	s = splnet();
1364 	em_82547_move_tail_locked(sc);
1365 	splx(s);
1366 }
1367 
1368 int
1369 em_82547_fifo_workaround(struct em_softc *sc, int len)
1370 {
1371 	int fifo_space, fifo_pkt_len;
1372 
1373 	fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1374 
1375 	if (sc->link_duplex == HALF_DUPLEX) {
1376 		fifo_space = sc->tx_fifo_size - sc->tx_fifo_head;
1377 
1378 		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1379 			if (em_82547_tx_fifo_reset(sc))
1380 				return (0);
1381 			else
1382 				return (1);
1383 		}
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 void
1390 em_82547_update_fifo_head(struct em_softc *sc, int len)
1391 {
1392 	int fifo_pkt_len = EM_ROUNDUP(len + EM_FIFO_HDR, EM_FIFO_HDR);
1393 
1394 	/* tx_fifo_head is always 16 byte aligned */
1395 	sc->tx_fifo_head += fifo_pkt_len;
1396 	if (sc->tx_fifo_head >= sc->tx_fifo_size)
1397 		sc->tx_fifo_head -= sc->tx_fifo_size;
1398 }
1399 
1400 int
1401 em_82547_tx_fifo_reset(struct em_softc *sc)
1402 {
1403 	uint32_t tctl;
1404 	struct em_queue *que = sc->queues; /* single queue chip */
1405 
1406 	if ((E1000_READ_REG(&sc->hw, TDT(que->me)) ==
1407 	     E1000_READ_REG(&sc->hw, TDH(que->me))) &&
1408 	    (E1000_READ_REG(&sc->hw, TDFT) ==
1409 	     E1000_READ_REG(&sc->hw, TDFH)) &&
1410 	    (E1000_READ_REG(&sc->hw, TDFTS) ==
1411 	     E1000_READ_REG(&sc->hw, TDFHS)) &&
1412 	    (E1000_READ_REG(&sc->hw, TDFPC) == 0)) {
1413 
1414 		/* Disable TX unit */
1415 		tctl = E1000_READ_REG(&sc->hw, TCTL);
1416 		E1000_WRITE_REG(&sc->hw, TCTL, tctl & ~E1000_TCTL_EN);
1417 
1418 		/* Reset FIFO pointers */
1419 		E1000_WRITE_REG(&sc->hw, TDFT, sc->tx_head_addr);
1420 		E1000_WRITE_REG(&sc->hw, TDFH, sc->tx_head_addr);
1421 		E1000_WRITE_REG(&sc->hw, TDFTS, sc->tx_head_addr);
1422 		E1000_WRITE_REG(&sc->hw, TDFHS, sc->tx_head_addr);
1423 
1424 		/* Re-enable TX unit */
1425 		E1000_WRITE_REG(&sc->hw, TCTL, tctl);
1426 		E1000_WRITE_FLUSH(&sc->hw);
1427 
1428 		sc->tx_fifo_head = 0;
1429 		sc->tx_fifo_reset_cnt++;
1430 
1431 		return (TRUE);
1432 	} else
1433 		return (FALSE);
1434 }
1435 
1436 void
1437 em_iff(struct em_softc *sc)
1438 {
1439 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1440 	struct arpcom *ac = &sc->sc_ac;
1441 	u_int32_t reg_rctl = 0;
1442 	u_int8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1443 	struct ether_multi *enm;
1444 	struct ether_multistep step;
1445 	int i = 0;
1446 
1447 	IOCTL_DEBUGOUT("em_iff: begin");
1448 
1449 	if (sc->hw.mac_type == em_82542_rev2_0) {
1450 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1451 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1452 			em_pci_clear_mwi(&sc->hw);
1453 		reg_rctl |= E1000_RCTL_RST;
1454 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1455 		msec_delay(5);
1456 	}
1457 
1458 	reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1459 	reg_rctl &= ~(E1000_RCTL_MPE | E1000_RCTL_UPE);
1460 	ifp->if_flags &= ~IFF_ALLMULTI;
1461 
1462 	if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
1463 	    ac->ac_multicnt > MAX_NUM_MULTICAST_ADDRESSES) {
1464 		ifp->if_flags |= IFF_ALLMULTI;
1465 		reg_rctl |= E1000_RCTL_MPE;
1466 		if (ifp->if_flags & IFF_PROMISC)
1467 			reg_rctl |= E1000_RCTL_UPE;
1468 	} else {
1469 		ETHER_FIRST_MULTI(step, ac, enm);
1470 		while (enm != NULL) {
1471 			bcopy(enm->enm_addrlo, mta + i, ETH_LENGTH_OF_ADDRESS);
1472 			i += ETH_LENGTH_OF_ADDRESS;
1473 
1474 			ETHER_NEXT_MULTI(step, enm);
1475 		}
1476 
1477 		em_mc_addr_list_update(&sc->hw, mta, ac->ac_multicnt, 0);
1478 	}
1479 
1480 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1481 
1482 	if (sc->hw.mac_type == em_82542_rev2_0) {
1483 		reg_rctl = E1000_READ_REG(&sc->hw, RCTL);
1484 		reg_rctl &= ~E1000_RCTL_RST;
1485 		E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
1486 		msec_delay(5);
1487 		if (sc->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1488 			em_pci_set_mwi(&sc->hw);
1489 	}
1490 }
1491 
1492 /*********************************************************************
1493  *  Timer routine
1494  *
1495  *  This routine checks for link status and updates statistics.
1496  *
1497  **********************************************************************/
1498 
1499 void
1500 em_local_timer(void *arg)
1501 {
1502 	struct em_softc *sc = arg;
1503 	int s;
1504 
1505 	timeout_add_sec(&sc->timer_handle, 1);
1506 
1507 	s = splnet();
1508 	em_smartspeed(sc);
1509 	splx(s);
1510 
1511 #if NKSTAT > 0
1512 	if (sc->kstat != NULL && mtx_enter_try(&sc->kstat_mtx)) {
1513 		em_kstat_read(sc->kstat);
1514 		mtx_leave(&sc->kstat_mtx);
1515 	}
1516 #endif
1517 }
1518 
1519 void
1520 em_update_link_status(struct em_softc *sc)
1521 {
1522 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1523 	u_char link_state;
1524 
1525 	if (E1000_READ_REG(&sc->hw, STATUS) & E1000_STATUS_LU) {
1526 		if (sc->link_active == 0) {
1527 			em_get_speed_and_duplex(&sc->hw,
1528 						&sc->link_speed,
1529 						&sc->link_duplex);
1530 			/* Check if we may set SPEED_MODE bit on PCI-E */
1531 			if ((sc->link_speed == SPEED_1000) &&
1532 			    ((sc->hw.mac_type == em_82571) ||
1533 			    (sc->hw.mac_type == em_82572) ||
1534 			    (sc->hw.mac_type == em_82575) ||
1535 			    (sc->hw.mac_type == em_82576) ||
1536 			    (sc->hw.mac_type == em_82580))) {
1537 				int tarc0;
1538 
1539 				tarc0 = E1000_READ_REG(&sc->hw, TARC0);
1540 				tarc0 |= SPEED_MODE_BIT;
1541 				E1000_WRITE_REG(&sc->hw, TARC0, tarc0);
1542 			}
1543 			sc->link_active = 1;
1544 			sc->smartspeed = 0;
1545 			ifp->if_baudrate = IF_Mbps(sc->link_speed);
1546 		}
1547 		link_state = (sc->link_duplex == FULL_DUPLEX) ?
1548 		    LINK_STATE_FULL_DUPLEX : LINK_STATE_HALF_DUPLEX;
1549 	} else {
1550 		if (sc->link_active == 1) {
1551 			ifp->if_baudrate = sc->link_speed = 0;
1552 			sc->link_duplex = 0;
1553 			sc->link_active = 0;
1554 		}
1555 		link_state = LINK_STATE_DOWN;
1556 	}
1557 	if (ifp->if_link_state != link_state) {
1558 		ifp->if_link_state = link_state;
1559 		if_link_state_change(ifp);
1560 	}
1561 }
1562 
1563 /*********************************************************************
1564  *
1565  *  This routine disables all traffic on the adapter by issuing a
1566  *  global reset on the MAC and deallocates TX/RX buffers.
1567  *
1568  **********************************************************************/
1569 
1570 void
1571 em_stop(void *arg, int softonly)
1572 {
1573 	struct em_softc *sc = arg;
1574 	struct em_queue *que = sc->queues; /* Use only first queue. */
1575 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
1576 
1577 	/* Tell the stack that the interface is no longer active */
1578 	ifp->if_flags &= ~IFF_RUNNING;
1579 
1580 	INIT_DEBUGOUT("em_stop: begin");
1581 
1582 	timeout_del(&que->rx_refill);
1583 	timeout_del(&sc->timer_handle);
1584 	timeout_del(&sc->tx_fifo_timer_handle);
1585 
1586 	if (!softonly)
1587 		em_disable_intr(sc);
1588 	if (sc->hw.mac_type >= em_pch_spt)
1589 		em_flush_desc_rings(sc);
1590 	if (!softonly)
1591 		em_reset_hw(&sc->hw);
1592 
1593 	intr_barrier(sc->sc_intrhand);
1594 	ifq_barrier(&ifp->if_snd);
1595 
1596 	KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
1597 
1598 	ifq_clr_oactive(&ifp->if_snd);
1599 	ifp->if_timer = 0;
1600 
1601 	em_free_transmit_structures(sc);
1602 	em_free_receive_structures(sc);
1603 }
1604 
1605 /*********************************************************************
1606  *
1607  *  Determine hardware revision.
1608  *
1609  **********************************************************************/
1610 void
1611 em_identify_hardware(struct em_softc *sc)
1612 {
1613 	u_int32_t reg;
1614 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1615 
1616 	/* Make sure our PCI config space has the necessary stuff set */
1617 	sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag,
1618 					    PCI_COMMAND_STATUS_REG);
1619 
1620 	/* Save off the information about this board */
1621 	sc->hw.vendor_id = PCI_VENDOR(pa->pa_id);
1622 	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
1623 
1624 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG);
1625 	sc->hw.revision_id = PCI_REVISION(reg);
1626 
1627 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
1628 	sc->hw.subsystem_vendor_id = PCI_VENDOR(reg);
1629 	sc->hw.subsystem_id = PCI_PRODUCT(reg);
1630 
1631 	/* Identify the MAC */
1632 	if (em_set_mac_type(&sc->hw))
1633 		printf("%s: Unknown MAC Type\n", DEVNAME(sc));
1634 
1635 	if (sc->hw.mac_type == em_pchlan)
1636 		sc->hw.revision_id = PCI_PRODUCT(pa->pa_id) & 0x0f;
1637 
1638 	if (sc->hw.mac_type == em_82541 ||
1639 	    sc->hw.mac_type == em_82541_rev_2 ||
1640 	    sc->hw.mac_type == em_82547 ||
1641 	    sc->hw.mac_type == em_82547_rev_2)
1642 		sc->hw.phy_init_script = TRUE;
1643 }
1644 
1645 void
1646 em_legacy_irq_quirk_spt(struct em_softc *sc)
1647 {
1648 	uint32_t	reg;
1649 
1650 	/* Legacy interrupt: SPT needs a quirk. */
1651 	if (sc->hw.mac_type != em_pch_spt && sc->hw.mac_type != em_pch_cnp &&
1652 	    sc->hw.mac_type != em_pch_tgp && sc->hw.mac_type != em_pch_adp)
1653 		return;
1654 	if (sc->legacy_irq == 0)
1655 		return;
1656 
1657 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM7);
1658 	reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
1659 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM7, reg);
1660 
1661 	reg = EM_READ_REG(&sc->hw, E1000_FEXTNVM9);
1662 	reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
1663 	    E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
1664 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM9, reg);
1665 }
1666 
1667 int
1668 em_allocate_pci_resources(struct em_softc *sc)
1669 {
1670 	int		val, rid;
1671 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1672 	struct em_queue	       *que = NULL;
1673 
1674 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
1675 	if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1676 		printf(": mmba is not mem space\n");
1677 		return (ENXIO);
1678 	}
1679 	if (pci_mapreg_map(pa, EM_MMBA, PCI_MAPREG_MEM_TYPE(val), 0,
1680 	    &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle,
1681 	    &sc->osdep.em_membase, &sc->osdep.em_memsize, 0)) {
1682 		printf(": cannot find mem space\n");
1683 		return (ENXIO);
1684 	}
1685 
1686 	switch (sc->hw.mac_type) {
1687 	case em_82544:
1688 	case em_82540:
1689 	case em_82545:
1690 	case em_82546:
1691 	case em_82541:
1692 	case em_82541_rev_2:
1693 		/* Figure out where our I/O BAR is ? */
1694 		for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END;) {
1695 			val = pci_conf_read(pa->pa_pc, pa->pa_tag, rid);
1696 			if (PCI_MAPREG_TYPE(val) == PCI_MAPREG_TYPE_IO) {
1697 				sc->io_rid = rid;
1698 				break;
1699 			}
1700 			rid += 4;
1701 			if (PCI_MAPREG_MEM_TYPE(val) ==
1702 			    PCI_MAPREG_MEM_TYPE_64BIT)
1703 				rid += 4;	/* skip high bits, too */
1704 		}
1705 
1706 		if (pci_mapreg_map(pa, rid, PCI_MAPREG_TYPE_IO, 0,
1707 		    &sc->osdep.io_bus_space_tag, &sc->osdep.io_bus_space_handle,
1708 		    &sc->osdep.em_iobase, &sc->osdep.em_iosize, 0)) {
1709 			printf(": cannot find i/o space\n");
1710 			return (ENXIO);
1711 		}
1712 
1713 		sc->hw.io_base = 0;
1714 		break;
1715 	default:
1716 		break;
1717 	}
1718 
1719 	sc->osdep.em_flashoffset = 0;
1720 	/* for ICH8 and family we need to find the flash memory */
1721 	if (sc->hw.mac_type >= em_pch_spt) {
1722 		sc->osdep.flash_bus_space_tag = sc->osdep.mem_bus_space_tag;
1723 		sc->osdep.flash_bus_space_handle = sc->osdep.mem_bus_space_handle;
1724 		sc->osdep.em_flashbase = 0;
1725 		sc->osdep.em_flashsize = 0;
1726 		sc->osdep.em_flashoffset = 0xe000;
1727 	} else if (IS_ICH8(sc->hw.mac_type)) {
1728 		val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_FLASH);
1729 		if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) {
1730 			printf(": flash is not mem space\n");
1731 			return (ENXIO);
1732 		}
1733 
1734 		if (pci_mapreg_map(pa, EM_FLASH, PCI_MAPREG_MEM_TYPE(val), 0,
1735 		    &sc->osdep.flash_bus_space_tag, &sc->osdep.flash_bus_space_handle,
1736 		    &sc->osdep.em_flashbase, &sc->osdep.em_flashsize, 0)) {
1737 			printf(": cannot find mem space\n");
1738 			return (ENXIO);
1739 		}
1740         }
1741 
1742 	sc->osdep.dev = (struct device *)sc;
1743 	sc->hw.back = &sc->osdep;
1744 
1745 	/* Only one queue for the moment. */
1746 	que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
1747 	if (que == NULL) {
1748 		printf(": unable to allocate queue memory\n");
1749 		return (ENOMEM);
1750 	}
1751 	que->me = 0;
1752 	que->sc = sc;
1753 	timeout_set(&que->rx_refill, em_rxrefill, que);
1754 
1755 	sc->queues = que;
1756 	sc->num_queues = 1;
1757 	sc->msix = 0;
1758 	sc->legacy_irq = 0;
1759 	if (em_allocate_msix(sc) && em_allocate_legacy(sc))
1760 		return (ENXIO);
1761 
1762 	/*
1763 	 * the ICP_xxxx device has multiple, duplicate register sets for
1764 	 * use when it is being used as a network processor. Disable those
1765 	 * registers here, as they are not necessary in this context and
1766 	 * can confuse the system
1767 	 */
1768 	if(sc->hw.mac_type == em_icp_xxxx) {
1769 		int offset;
1770 		pcireg_t val;
1771 
1772 		if (!pci_get_capability(sc->osdep.em_pa.pa_pc,
1773 		    sc->osdep.em_pa.pa_tag, PCI_CAP_ID_ST, &offset, &val)) {
1774 			return (0);
1775 		}
1776 		offset += PCI_ST_SMIA_OFFSET;
1777 		pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
1778 		    offset, 0x06);
1779 		E1000_WRITE_REG(&sc->hw, IMC1, ~0x0);
1780 		E1000_WRITE_REG(&sc->hw, IMC2, ~0x0);
1781 	}
1782 	return (0);
1783 }
1784 
1785 void
1786 em_free_pci_resources(struct em_softc *sc)
1787 {
1788 	struct pci_attach_args *pa = &sc->osdep.em_pa;
1789 	pci_chipset_tag_t	pc = pa->pa_pc;
1790 	struct em_queue	       *que = NULL;
1791 	if (sc->sc_intrhand)
1792 		pci_intr_disestablish(pc, sc->sc_intrhand);
1793 	sc->sc_intrhand = 0;
1794 
1795 	if (sc->osdep.em_flashbase)
1796 		bus_space_unmap(sc->osdep.flash_bus_space_tag, sc->osdep.flash_bus_space_handle,
1797 				sc->osdep.em_flashsize);
1798 	sc->osdep.em_flashbase = 0;
1799 
1800 	if (sc->osdep.em_iobase)
1801 		bus_space_unmap(sc->osdep.io_bus_space_tag, sc->osdep.io_bus_space_handle,
1802 				sc->osdep.em_iosize);
1803 	sc->osdep.em_iobase = 0;
1804 
1805 	if (sc->osdep.em_membase)
1806 		bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
1807 				sc->osdep.em_memsize);
1808 	sc->osdep.em_membase = 0;
1809 
1810 	FOREACH_QUEUE(sc, que) {
1811 		if (que->rx.sc_rx_desc_ring != NULL) {
1812 			que->rx.sc_rx_desc_ring = NULL;
1813 			em_dma_free(sc, &que->rx.sc_rx_dma);
1814 		}
1815 		if (que->tx.sc_tx_desc_ring != NULL) {
1816 			que->tx.sc_tx_desc_ring = NULL;
1817 			em_dma_free(sc, &que->tx.sc_tx_dma);
1818 		}
1819 		if (que->tag)
1820 			pci_intr_disestablish(pc, que->tag);
1821 		que->tag = NULL;
1822 		que->eims = 0;
1823 		que->me = 0;
1824 		que->sc = NULL;
1825 	}
1826 	sc->legacy_irq = 0;
1827 	sc->msix_linkvec = 0;
1828 	sc->msix_queuesmask = 0;
1829 	if (sc->queues)
1830 		free(sc->queues, M_DEVBUF,
1831 		    sc->num_queues * sizeof(struct em_queue));
1832 	sc->num_queues = 0;
1833 	sc->queues = NULL;
1834 }
1835 
1836 /*********************************************************************
1837  *
1838  *  Initialize the hardware to a configuration as specified by the
1839  *  em_softc structure. The controller is reset, the EEPROM is
1840  *  verified, the MAC address is set, then the shared initialization
1841  *  routines are called.
1842  *
1843  **********************************************************************/
1844 int
1845 em_hardware_init(struct em_softc *sc)
1846 {
1847 	uint32_t ret_val;
1848 	u_int16_t rx_buffer_size;
1849 
1850 	INIT_DEBUGOUT("em_hardware_init: begin");
1851 	if (sc->hw.mac_type >= em_pch_spt)
1852 		em_flush_desc_rings(sc);
1853 	/* Issue a global reset */
1854 	em_reset_hw(&sc->hw);
1855 
1856 	/* When hardware is reset, fifo_head is also reset */
1857 	sc->tx_fifo_head = 0;
1858 
1859 	/* Make sure we have a good EEPROM before we read from it */
1860 	if (em_get_flash_presence_i210(&sc->hw) &&
1861 	    em_validate_eeprom_checksum(&sc->hw) < 0) {
1862 		/*
1863 		 * Some PCIe parts fail the first check due to
1864 		 * the link being in sleep state, call it again,
1865 		 * if it fails a second time its a real issue.
1866 		 */
1867 		if (em_validate_eeprom_checksum(&sc->hw) < 0) {
1868 			printf("%s: The EEPROM Checksum Is Not Valid\n",
1869 			       DEVNAME(sc));
1870 			return (EIO);
1871 		}
1872 	}
1873 
1874 	if (em_get_flash_presence_i210(&sc->hw) &&
1875 	    em_read_part_num(&sc->hw, &(sc->part_num)) < 0) {
1876 		printf("%s: EEPROM read error while reading part number\n",
1877 		       DEVNAME(sc));
1878 		return (EIO);
1879 	}
1880 
1881 	/* Set up smart power down as default off on newer adapters */
1882 	if (!em_smart_pwr_down &&
1883 	     (sc->hw.mac_type == em_82571 ||
1884 	      sc->hw.mac_type == em_82572 ||
1885 	      sc->hw.mac_type == em_82575 ||
1886 	      sc->hw.mac_type == em_82576 ||
1887 	      sc->hw.mac_type == em_82580 ||
1888 	      sc->hw.mac_type == em_i210 ||
1889 	      sc->hw.mac_type == em_i350 )) {
1890 		uint16_t phy_tmp = 0;
1891 
1892 		/* Speed up time to link by disabling smart power down */
1893 		em_read_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
1894 		phy_tmp &= ~IGP02E1000_PM_SPD;
1895 		em_write_phy_reg(&sc->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
1896 	}
1897 
1898 	em_legacy_irq_quirk_spt(sc);
1899 
1900 	/*
1901 	 * These parameters control the automatic generation (Tx) and
1902 	 * response (Rx) to Ethernet PAUSE frames.
1903 	 * - High water mark should allow for at least two frames to be
1904 	 *   received after sending an XOFF.
1905 	 * - Low water mark works best when it is very near the high water mark.
1906 	 *   This allows the receiver to restart by sending XON when it has
1907 	 *   drained a bit.  Here we use an arbitrary value of 1500 which will
1908 	 *   restart after one full frame is pulled from the buffer.  There
1909 	 *   could be several smaller frames in the buffer and if so they will
1910 	 *   not trigger the XON until their total number reduces the buffer
1911 	 *   by 1500.
1912 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
1913 	 */
1914 	rx_buffer_size = ((E1000_READ_REG(&sc->hw, PBA) & 0xffff) << 10 );
1915 
1916 	sc->hw.fc_high_water = rx_buffer_size -
1917 	    EM_ROUNDUP(sc->hw.max_frame_size, 1024);
1918 	sc->hw.fc_low_water = sc->hw.fc_high_water - 1500;
1919 	if (sc->hw.mac_type == em_80003es2lan)
1920 		sc->hw.fc_pause_time = 0xFFFF;
1921 	else
1922 		sc->hw.fc_pause_time = 1000;
1923 	sc->hw.fc_send_xon = TRUE;
1924 	sc->hw.fc = E1000_FC_FULL;
1925 
1926 	em_disable_aspm(sc);
1927 
1928 	if ((ret_val = em_init_hw(sc)) != 0) {
1929 		if (ret_val == E1000_DEFER_INIT) {
1930 			INIT_DEBUGOUT("\nHardware Initialization Deferred ");
1931 			return (EAGAIN);
1932 		}
1933 		printf("\n%s: Hardware Initialization Failed: %d\n",
1934 		       DEVNAME(sc), ret_val);
1935 		return (EIO);
1936 	}
1937 
1938 	em_check_for_link(&sc->hw);
1939 
1940 	return (0);
1941 }
1942 
1943 /*********************************************************************
1944  *
1945  *  Setup networking device structure and register an interface.
1946  *
1947  **********************************************************************/
1948 void
1949 em_setup_interface(struct em_softc *sc)
1950 {
1951 	struct ifnet   *ifp;
1952 	uint64_t fiber_type = IFM_1000_SX;
1953 
1954 	INIT_DEBUGOUT("em_setup_interface: begin");
1955 
1956 	ifp = &sc->sc_ac.ac_if;
1957 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
1958 	ifp->if_softc = sc;
1959 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1960 	ifp->if_xflags = IFXF_MPSAFE;
1961 	ifp->if_ioctl = em_ioctl;
1962 	ifp->if_qstart = em_start;
1963 	ifp->if_watchdog = em_watchdog;
1964 	ifp->if_hardmtu =
1965 		sc->hw.max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN;
1966 	ifq_init_maxlen(&ifp->if_snd, sc->sc_tx_slots - 1);
1967 
1968 	ifp->if_capabilities = IFCAP_VLAN_MTU;
1969 
1970 #if NVLAN > 0
1971 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
1972 #endif
1973 
1974 	if (sc->hw.mac_type >= em_82543) {
1975 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
1976 	}
1977 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
1978 		ifp->if_capabilities |= IFCAP_CSUM_IPv4;
1979 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
1980 	}
1981 
1982 	/*
1983 	 * Specify the media types supported by this adapter and register
1984 	 * callbacks to update media and link information
1985 	 */
1986 	ifmedia_init(&sc->media, IFM_IMASK, em_media_change,
1987 		     em_media_status);
1988 	if (sc->hw.media_type == em_media_type_fiber ||
1989 	    sc->hw.media_type == em_media_type_internal_serdes) {
1990 		if (sc->hw.mac_type == em_82545)
1991 			fiber_type = IFM_1000_LX;
1992 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type | IFM_FDX,
1993 			    0, NULL);
1994 		ifmedia_add(&sc->media, IFM_ETHER | fiber_type,
1995 			    0, NULL);
1996 	} else {
1997 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1998 		ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1999 			    0, NULL);
2000 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX,
2001 			    0, NULL);
2002 		ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2003 			    0, NULL);
2004 		if (sc->hw.phy_type != em_phy_ife) {
2005 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2006 				    0, NULL);
2007 			ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2008 		}
2009 	}
2010 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2011 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2012 
2013 	if_attach(ifp);
2014 	ether_ifattach(ifp);
2015 	em_enable_intr(sc);
2016 }
2017 
2018 int
2019 em_detach(struct device *self, int flags)
2020 {
2021 	struct em_softc *sc = (struct em_softc *)self;
2022 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2023 	struct pci_attach_args *pa = &sc->osdep.em_pa;
2024 	pci_chipset_tag_t	pc = pa->pa_pc;
2025 
2026 	if (sc->sc_intrhand)
2027 		pci_intr_disestablish(pc, sc->sc_intrhand);
2028 	sc->sc_intrhand = 0;
2029 
2030 	em_stop(sc, 1);
2031 
2032 	em_free_pci_resources(sc);
2033 
2034 	ether_ifdetach(ifp);
2035 	if_detach(ifp);
2036 
2037 	return (0);
2038 }
2039 
2040 int
2041 em_activate(struct device *self, int act)
2042 {
2043 	struct em_softc *sc = (struct em_softc *)self;
2044 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2045 	int rv = 0;
2046 
2047 	switch (act) {
2048 	case DVACT_SUSPEND:
2049 		if (ifp->if_flags & IFF_RUNNING)
2050 			em_stop(sc, 0);
2051 		/* We have no children atm, but we will soon */
2052 		rv = config_activate_children(self, act);
2053 		break;
2054 	case DVACT_RESUME:
2055 		if (ifp->if_flags & IFF_UP)
2056 			em_init(sc);
2057 		break;
2058 	default:
2059 		rv = config_activate_children(self, act);
2060 		break;
2061 	}
2062 	return (rv);
2063 }
2064 
2065 /*********************************************************************
2066  *
2067  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2068  *
2069  **********************************************************************/
2070 void
2071 em_smartspeed(struct em_softc *sc)
2072 {
2073 	uint16_t phy_tmp;
2074 
2075 	if (sc->link_active || (sc->hw.phy_type != em_phy_igp) ||
2076 	    !sc->hw.autoneg || !(sc->hw.autoneg_advertised & ADVERTISE_1000_FULL))
2077 		return;
2078 
2079 	if (sc->smartspeed == 0) {
2080 		/* If Master/Slave config fault is asserted twice,
2081 		 * we assume back-to-back */
2082 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2083 		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2084 			return;
2085 		em_read_phy_reg(&sc->hw, PHY_1000T_STATUS, &phy_tmp);
2086 		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2087 			em_read_phy_reg(&sc->hw, PHY_1000T_CTRL,
2088 					&phy_tmp);
2089 			if (phy_tmp & CR_1000T_MS_ENABLE) {
2090 				phy_tmp &= ~CR_1000T_MS_ENABLE;
2091 				em_write_phy_reg(&sc->hw,
2092 						    PHY_1000T_CTRL, phy_tmp);
2093 				sc->smartspeed++;
2094 				if (sc->hw.autoneg &&
2095 				    !em_phy_setup_autoneg(&sc->hw) &&
2096 				    !em_read_phy_reg(&sc->hw, PHY_CTRL,
2097 						       &phy_tmp)) {
2098 					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2099 						    MII_CR_RESTART_AUTO_NEG);
2100 					em_write_phy_reg(&sc->hw,
2101 							 PHY_CTRL, phy_tmp);
2102 				}
2103 			}
2104 		}
2105 		return;
2106 	} else if (sc->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2107 		/* If still no link, perhaps using 2/3 pair cable */
2108 		em_read_phy_reg(&sc->hw, PHY_1000T_CTRL, &phy_tmp);
2109 		phy_tmp |= CR_1000T_MS_ENABLE;
2110 		em_write_phy_reg(&sc->hw, PHY_1000T_CTRL, phy_tmp);
2111 		if (sc->hw.autoneg &&
2112 		    !em_phy_setup_autoneg(&sc->hw) &&
2113 		    !em_read_phy_reg(&sc->hw, PHY_CTRL, &phy_tmp)) {
2114 			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2115 				    MII_CR_RESTART_AUTO_NEG);
2116 			em_write_phy_reg(&sc->hw, PHY_CTRL, phy_tmp);
2117 		}
2118 	}
2119 	/* Restart process after EM_SMARTSPEED_MAX iterations */
2120 	if (sc->smartspeed++ == EM_SMARTSPEED_MAX)
2121 		sc->smartspeed = 0;
2122 }
2123 
2124 /*
2125  * Manage DMA'able memory.
2126  */
2127 int
2128 em_dma_malloc(struct em_softc *sc, bus_size_t size, struct em_dma_alloc *dma)
2129 {
2130 	int r;
2131 
2132 	r = bus_dmamap_create(sc->sc_dmat, size, 1,
2133 	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
2134 	if (r != 0)
2135 		return (r);
2136 
2137 	r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &dma->dma_seg,
2138 	    1, &dma->dma_nseg, BUS_DMA_WAITOK | BUS_DMA_ZERO);
2139 	if (r != 0)
2140 		goto destroy;
2141 
2142 	r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg, size,
2143 	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
2144 	if (r != 0)
2145 		goto free;
2146 
2147 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size,
2148 	    NULL, BUS_DMA_WAITOK);
2149 	if (r != 0)
2150 		goto unmap;
2151 
2152 	dma->dma_size = size;
2153 	return (0);
2154 
2155 unmap:
2156 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
2157 free:
2158 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2159 destroy:
2160 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2161 
2162 	return (r);
2163 }
2164 
2165 void
2166 em_dma_free(struct em_softc *sc, struct em_dma_alloc *dma)
2167 {
2168 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
2169 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
2170 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
2171 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
2172 }
2173 
2174 /*********************************************************************
2175  *
2176  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2177  *  the information needed to transmit a packet on the wire.
2178  *
2179  **********************************************************************/
2180 int
2181 em_allocate_transmit_structures(struct em_softc *sc)
2182 {
2183 	struct em_queue *que;
2184 
2185 	FOREACH_QUEUE(sc, que) {
2186 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2187 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2188 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2189 
2190 		que->tx.sc_tx_pkts_ring = mallocarray(sc->sc_tx_slots,
2191 		    sizeof(*que->tx.sc_tx_pkts_ring), M_DEVBUF, M_NOWAIT | M_ZERO);
2192 		if (que->tx.sc_tx_pkts_ring == NULL) {
2193 			printf("%s: Unable to allocate tx_buffer memory\n",
2194 			    DEVNAME(sc));
2195 			return (ENOMEM);
2196 		}
2197 	}
2198 
2199 	return (0);
2200 }
2201 
2202 /*********************************************************************
2203  *
2204  *  Allocate and initialize transmit structures.
2205  *
2206  **********************************************************************/
2207 int
2208 em_setup_transmit_structures(struct em_softc *sc)
2209 {
2210 	struct em_queue *que;
2211 	struct em_packet *pkt;
2212 	int error, i;
2213 
2214 	if ((error = em_allocate_transmit_structures(sc)) != 0)
2215 		goto fail;
2216 
2217 	FOREACH_QUEUE(sc, que) {
2218 		bzero((void *) que->tx.sc_tx_desc_ring,
2219 		    (sizeof(struct em_tx_desc)) * sc->sc_tx_slots);
2220 
2221 		for (i = 0; i < sc->sc_tx_slots; i++) {
2222 			pkt = &que->tx.sc_tx_pkts_ring[i];
2223 			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
2224 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
2225 			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2226 			if (error != 0) {
2227 				printf("%s: Unable to create TX DMA map\n",
2228 				    DEVNAME(sc));
2229 				goto fail;
2230 			}
2231 		}
2232 
2233 		que->tx.sc_tx_desc_head = 0;
2234 		que->tx.sc_tx_desc_tail = 0;
2235 
2236 		/* Set checksum context */
2237 		que->tx.active_checksum_context = OFFLOAD_NONE;
2238 	}
2239 
2240 	return (0);
2241 
2242 fail:
2243 	em_free_transmit_structures(sc);
2244 	return (error);
2245 }
2246 
2247 /*********************************************************************
2248  *
2249  *  Enable transmit unit.
2250  *
2251  **********************************************************************/
2252 void
2253 em_initialize_transmit_unit(struct em_softc *sc)
2254 {
2255 	u_int32_t	reg_tctl, reg_tipg = 0;
2256 	u_int64_t	bus_addr;
2257 	struct em_queue *que;
2258 
2259 	INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2260 
2261 	FOREACH_QUEUE(sc, que) {
2262 		/* Setup the Base and Length of the Tx Descriptor Ring */
2263 		bus_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
2264 		E1000_WRITE_REG(&sc->hw, TDLEN(que->me),
2265 		    sc->sc_tx_slots *
2266 		    sizeof(struct em_tx_desc));
2267 		E1000_WRITE_REG(&sc->hw, TDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2268 		E1000_WRITE_REG(&sc->hw, TDBAL(que->me), (u_int32_t)bus_addr);
2269 
2270 		/* Setup the HW Tx Head and Tail descriptor pointers */
2271 		E1000_WRITE_REG(&sc->hw, TDT(que->me), 0);
2272 		E1000_WRITE_REG(&sc->hw, TDH(que->me), 0);
2273 
2274 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2275 		    E1000_READ_REG(&sc->hw, TDBAL(que->me)),
2276 		    E1000_READ_REG(&sc->hw, TDLEN(que->me)));
2277 
2278 		/* Set the default values for the Tx Inter Packet Gap timer */
2279 		switch (sc->hw.mac_type) {
2280 		case em_82542_rev2_0:
2281 		case em_82542_rev2_1:
2282 			reg_tipg = DEFAULT_82542_TIPG_IPGT;
2283 			reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2284 			reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2285 			break;
2286 		case em_80003es2lan:
2287 			reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2288 			reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2289 			break;
2290 		default:
2291 			if (sc->hw.media_type == em_media_type_fiber ||
2292 			    sc->hw.media_type == em_media_type_internal_serdes)
2293 				reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2294 			else
2295 				reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2296 			reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2297 			reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2298 		}
2299 
2300 
2301 		E1000_WRITE_REG(&sc->hw, TIPG, reg_tipg);
2302 		E1000_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay);
2303 		if (sc->hw.mac_type >= em_82540)
2304 			E1000_WRITE_REG(&sc->hw, TADV, sc->tx_abs_int_delay);
2305 
2306 		/* Setup Transmit Descriptor Base Settings */
2307 		que->tx.sc_txd_cmd = E1000_TXD_CMD_IFCS;
2308 
2309 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2310 		    sc->hw.mac_type == em_82576 ||
2311 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2312 			/* 82575/6 need to enable the TX queue and lack the IDE bit */
2313 			reg_tctl = E1000_READ_REG(&sc->hw, TXDCTL(que->me));
2314 			reg_tctl |= E1000_TXDCTL_QUEUE_ENABLE;
2315 			E1000_WRITE_REG(&sc->hw, TXDCTL(que->me), reg_tctl);
2316 		} else if (sc->tx_int_delay > 0)
2317 			que->tx.sc_txd_cmd |= E1000_TXD_CMD_IDE;
2318 	}
2319 
2320 	/* Program the Transmit Control Register */
2321 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2322 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2323 	if (sc->hw.mac_type >= em_82571)
2324 		reg_tctl |= E1000_TCTL_MULR;
2325 	if (sc->link_duplex == FULL_DUPLEX)
2326 		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2327 	else
2328 		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2329 	/* This write will effectively turn on the transmit unit */
2330 	E1000_WRITE_REG(&sc->hw, TCTL, reg_tctl);
2331 
2332 	/* SPT Si errata workaround to avoid data corruption */
2333 
2334 	if (sc->hw.mac_type == em_pch_spt) {
2335 		uint32_t	reg_val;
2336 
2337 		reg_val = EM_READ_REG(&sc->hw, E1000_IOSFPC);
2338 		reg_val |= E1000_RCTL_RDMTS_HEX;
2339 		EM_WRITE_REG(&sc->hw, E1000_IOSFPC, reg_val);
2340 
2341 		reg_val = E1000_READ_REG(&sc->hw, TARC0);
2342 		/* i218-i219 Specification Update 1.5.4.5 */
2343 		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
2344 		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
2345 		E1000_WRITE_REG(&sc->hw, TARC0, reg_val);
2346 	}
2347 }
2348 
2349 /*********************************************************************
2350  *
2351  *  Free all transmit related data structures.
2352  *
2353  **********************************************************************/
2354 void
2355 em_free_transmit_structures(struct em_softc *sc)
2356 {
2357 	struct em_queue *que;
2358 	struct em_packet *pkt;
2359 	int i;
2360 
2361 	INIT_DEBUGOUT("free_transmit_structures: begin");
2362 
2363 	FOREACH_QUEUE(sc, que) {
2364 		if (que->tx.sc_tx_pkts_ring != NULL) {
2365 			for (i = 0; i < sc->sc_tx_slots; i++) {
2366 				pkt = &que->tx.sc_tx_pkts_ring[i];
2367 
2368 				if (pkt->pkt_m != NULL) {
2369 					bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2370 					    0, pkt->pkt_map->dm_mapsize,
2371 					    BUS_DMASYNC_POSTWRITE);
2372 					bus_dmamap_unload(sc->sc_dmat,
2373 					    pkt->pkt_map);
2374 
2375 					m_freem(pkt->pkt_m);
2376 					pkt->pkt_m = NULL;
2377 				}
2378 
2379 				if (pkt->pkt_map != NULL) {
2380 					bus_dmamap_destroy(sc->sc_dmat,
2381 					    pkt->pkt_map);
2382 					pkt->pkt_map = NULL;
2383 				}
2384 			}
2385 
2386 			free(que->tx.sc_tx_pkts_ring, M_DEVBUF,
2387 			    sc->sc_tx_slots * sizeof(*que->tx.sc_tx_pkts_ring));
2388 			que->tx.sc_tx_pkts_ring = NULL;
2389 		}
2390 
2391 		bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2392 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2393 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2394 	}
2395 }
2396 
2397 u_int
2398 em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2399     u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
2400 {
2401 	struct ether_extracted ext;
2402 	struct e1000_adv_tx_context_desc *TD;
2403 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
2404 	int off = 0;
2405 	uint8_t iphlen;
2406 
2407 	*olinfo_status = 0;
2408 	*cmd_type_len = 0;
2409 	TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
2410 
2411 #if NVLAN > 0
2412 	if (ISSET(mp->m_flags, M_VLANTAG)) {
2413 		uint32_t vtag = mp->m_pkthdr.ether_vtag;
2414 		vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
2415 		*cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
2416 		off = 1;
2417 	}
2418 #endif
2419 
2420 	ether_extract_headers(mp, &ext);
2421 
2422 	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
2423 
2424 	if (ext.ip4) {
2425 		iphlen = ext.ip4->ip_hl << 2;
2426 
2427 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2428 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
2429 			*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2430 			off = 1;
2431 		}
2432 #ifdef INET6
2433 	} else if (ext.ip6) {
2434 		iphlen = sizeof(*ext.ip6);
2435 
2436 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2437 #endif
2438 	} else {
2439 		iphlen = 0;
2440 	}
2441 
2442 	*cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
2443 	*cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
2444 	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
2445 	vlan_macip_lens |= iphlen;
2446 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2447 
2448 	if (ext.tcp) {
2449 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2450 		if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
2451 			*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2452 			off = 1;
2453 		}
2454 	} else if (ext.udp) {
2455 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
2456 		if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
2457 			*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2458 			off = 1;
2459 		}
2460 	}
2461 
2462 	if (!off)
2463 		return (0);
2464 
2465 	/* 82575 needs the queue index added */
2466 	if (que->sc->hw.mac_type == em_82575)
2467 		mss_l4len_idx |= (que->me & 0xff) << 4;
2468 
2469 	htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
2470 	htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
2471 	htolem32(&TD->u.seqnum_seed, 0);
2472 	htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
2473 
2474 	return (1);
2475 }
2476 
2477 /*********************************************************************
2478  *
2479  *  The offload context needs to be set when we transfer the first
2480  *  packet of a particular protocol (TCP/UDP). We change the
2481  *  context only if the protocol type changes.
2482  *
2483  **********************************************************************/
2484 u_int
2485 em_transmit_checksum_setup(struct em_queue *que, struct mbuf *mp, u_int head,
2486     u_int32_t *txd_upper, u_int32_t *txd_lower)
2487 {
2488 	struct em_context_desc *TXD;
2489 
2490 	if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2491 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2492 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2493 		if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
2494 			return (0);
2495 		else
2496 			que->tx.active_checksum_context = OFFLOAD_TCP_IP;
2497 	} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2498 		*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2499 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2500 		if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
2501 			return (0);
2502 		else
2503 			que->tx.active_checksum_context = OFFLOAD_UDP_IP;
2504 	} else {
2505 		*txd_upper = 0;
2506 		*txd_lower = 0;
2507 		return (0);
2508 	}
2509 
2510 	/* If we reach this point, the checksum offload context
2511 	 * needs to be reset.
2512 	 */
2513 	TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
2514 
2515 	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2516 	TXD->lower_setup.ip_fields.ipcso =
2517 	    ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2518 	TXD->lower_setup.ip_fields.ipcse =
2519 	    htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2520 
2521 	TXD->upper_setup.tcp_fields.tucss =
2522 	    ETHER_HDR_LEN + sizeof(struct ip);
2523 	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2524 
2525 	if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
2526 		TXD->upper_setup.tcp_fields.tucso =
2527 		    ETHER_HDR_LEN + sizeof(struct ip) +
2528 		    offsetof(struct tcphdr, th_sum);
2529 	} else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
2530 		TXD->upper_setup.tcp_fields.tucso =
2531 		    ETHER_HDR_LEN + sizeof(struct ip) +
2532 		    offsetof(struct udphdr, uh_sum);
2533 	}
2534 
2535 	TXD->tcp_seg_setup.data = htole32(0);
2536 	TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
2537 
2538 	return (1);
2539 }
2540 
2541 /**********************************************************************
2542  *
2543  *  Examine each tx_buffer in the used queue. If the hardware is done
2544  *  processing the packet then free associated resources. The
2545  *  tx_buffer is put back on the free queue.
2546  *
2547  **********************************************************************/
2548 void
2549 em_txeof(struct em_queue *que)
2550 {
2551 	struct em_softc *sc = que->sc;
2552 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2553 	struct em_packet *pkt;
2554 	struct em_tx_desc *desc;
2555 	u_int head, tail;
2556 	u_int free = 0;
2557 
2558 	head = que->tx.sc_tx_desc_head;
2559 	tail = que->tx.sc_tx_desc_tail;
2560 
2561 	if (head == tail)
2562 		return;
2563 
2564 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2565 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2566 	    BUS_DMASYNC_POSTREAD);
2567 
2568 	do {
2569 		pkt = &que->tx.sc_tx_pkts_ring[tail];
2570 		desc = &que->tx.sc_tx_desc_ring[pkt->pkt_eop];
2571 
2572 		if (!ISSET(desc->upper.fields.status, E1000_TXD_STAT_DD))
2573 			break;
2574 
2575 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2576 		    0, pkt->pkt_map->dm_mapsize,
2577 		    BUS_DMASYNC_POSTWRITE);
2578 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
2579 
2580 		KASSERT(pkt->pkt_m != NULL);
2581 
2582 		m_freem(pkt->pkt_m);
2583 		pkt->pkt_m = NULL;
2584 
2585 		tail = pkt->pkt_eop;
2586 
2587 		if (++tail == sc->sc_tx_slots)
2588 			tail = 0;
2589 
2590 		free++;
2591 	} while (tail != head);
2592 
2593 	bus_dmamap_sync(sc->sc_dmat, que->tx.sc_tx_dma.dma_map,
2594 	    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
2595 	    BUS_DMASYNC_PREREAD);
2596 
2597 	if (free == 0)
2598 		return;
2599 
2600 	que->tx.sc_tx_desc_tail = tail;
2601 
2602 	if (ifq_is_oactive(&ifp->if_snd))
2603 		ifq_restart(&ifp->if_snd);
2604 	else if (tail == head)
2605 		ifp->if_timer = 0;
2606 }
2607 
2608 /*********************************************************************
2609  *
2610  *  Get a buffer from system mbuf buffer pool.
2611  *
2612  **********************************************************************/
2613 int
2614 em_get_buf(struct em_queue *que, int i)
2615 {
2616 	struct em_softc *sc = que->sc;
2617 	struct mbuf    *m;
2618 	struct em_packet *pkt;
2619 	struct em_rx_desc *desc;
2620 	int error;
2621 
2622 	pkt = &que->rx.sc_rx_pkts_ring[i];
2623 	desc = &que->rx.sc_rx_desc_ring[i];
2624 
2625 	KASSERT(pkt->pkt_m == NULL);
2626 
2627 	m = MCLGETL(NULL, M_DONTWAIT, EM_MCLBYTES);
2628 	if (m == NULL) {
2629 		sc->mbuf_cluster_failed++;
2630 		return (ENOBUFS);
2631 	}
2632 	m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
2633 	m_adj(m, ETHER_ALIGN);
2634 
2635 	error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
2636 	    m, BUS_DMA_NOWAIT);
2637 	if (error) {
2638 		m_freem(m);
2639 		return (error);
2640 	}
2641 
2642 	bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
2643 	    0, pkt->pkt_map->dm_mapsize,
2644 	    BUS_DMASYNC_PREREAD);
2645 	pkt->pkt_m = m;
2646 
2647 	memset(desc, 0, sizeof(*desc));
2648 	htolem64(&desc->buffer_addr, pkt->pkt_map->dm_segs[0].ds_addr);
2649 
2650 	return (0);
2651 }
2652 
2653 /*********************************************************************
2654  *
2655  *  Allocate memory for rx_buffer structures. Since we use one
2656  *  rx_buffer per received packet, the maximum number of rx_buffer's
2657  *  that we'll need is equal to the number of receive descriptors
2658  *  that we've allocated.
2659  *
2660  **********************************************************************/
2661 int
2662 em_allocate_receive_structures(struct em_softc *sc)
2663 {
2664 	struct em_queue *que;
2665 	struct em_packet *pkt;
2666 	int i;
2667 	int error;
2668 
2669 	FOREACH_QUEUE(sc, que) {
2670 		que->rx.sc_rx_pkts_ring = mallocarray(sc->sc_rx_slots,
2671 		    sizeof(*que->rx.sc_rx_pkts_ring),
2672 		    M_DEVBUF, M_NOWAIT | M_ZERO);
2673 		if (que->rx.sc_rx_pkts_ring == NULL) {
2674 			printf("%s: Unable to allocate rx_buffer memory\n",
2675 			    DEVNAME(sc));
2676 			return (ENOMEM);
2677 		}
2678 
2679 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2680 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2681 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2682 
2683 		for (i = 0; i < sc->sc_rx_slots; i++) {
2684 			pkt = &que->rx.sc_rx_pkts_ring[i];
2685 
2686 			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
2687 			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
2688 			if (error != 0) {
2689 				printf("%s: em_allocate_receive_structures: "
2690 				    "bus_dmamap_create failed; error %u\n",
2691 				    DEVNAME(sc), error);
2692 				goto fail;
2693 			}
2694 
2695 			pkt->pkt_m = NULL;
2696 		}
2697 	}
2698 
2699         return (0);
2700 
2701 fail:
2702 	em_free_receive_structures(sc);
2703 	return (error);
2704 }
2705 
2706 /*********************************************************************
2707  *
2708  *  Allocate and initialize receive structures.
2709  *
2710  **********************************************************************/
2711 int
2712 em_setup_receive_structures(struct em_softc *sc)
2713 {
2714 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2715 	struct em_queue *que;
2716 	u_int lwm;
2717 
2718 	if (em_allocate_receive_structures(sc))
2719 		return (ENOMEM);
2720 
2721 	FOREACH_QUEUE(sc, que) {
2722 		memset(que->rx.sc_rx_desc_ring, 0,
2723 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2724 
2725 		/* Setup our descriptor pointers */
2726 		que->rx.sc_rx_desc_tail = 0;
2727 		que->rx.sc_rx_desc_head = sc->sc_rx_slots - 1;
2728 
2729 		lwm = max(4, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1));
2730 		if_rxr_init(&que->rx.sc_rx_ring, lwm, sc->sc_rx_slots);
2731 
2732 		if (em_rxfill(que) == 0) {
2733 			printf("%s: unable to fill any rx descriptors\n",
2734 			    DEVNAME(sc));
2735 			return (ENOMEM);
2736 		}
2737 	}
2738 
2739 	return (0);
2740 }
2741 
2742 /*********************************************************************
2743  *
2744  *  Enable receive unit.
2745  *
2746  **********************************************************************/
2747 void
2748 em_initialize_receive_unit(struct em_softc *sc)
2749 {
2750 	struct em_queue *que;
2751 	u_int32_t	reg_rctl;
2752 	u_int32_t	reg_rxcsum;
2753 	u_int32_t	reg_srrctl;
2754 	u_int64_t	bus_addr;
2755 
2756 	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2757 
2758 	/* Make sure receives are disabled while setting up the descriptor ring */
2759 	E1000_WRITE_REG(&sc->hw, RCTL, 0);
2760 
2761 	/* Set the Receive Delay Timer Register */
2762 	E1000_WRITE_REG(&sc->hw, RDTR,
2763 			sc->rx_int_delay | E1000_RDT_FPDB);
2764 
2765 	if (sc->hw.mac_type >= em_82540) {
2766 		if (sc->rx_int_delay)
2767 			E1000_WRITE_REG(&sc->hw, RADV, sc->rx_abs_int_delay);
2768 
2769 		/* Set the interrupt throttling rate.  Value is calculated
2770 		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */
2771 		E1000_WRITE_REG(&sc->hw, ITR, DEFAULT_ITR);
2772 	}
2773 
2774 	/* Setup the Receive Control Register */
2775 	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2776 	    E1000_RCTL_RDMTS_HALF |
2777 	    (sc->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2778 
2779 	if (sc->hw.tbi_compatibility_on == TRUE)
2780 		reg_rctl |= E1000_RCTL_SBP;
2781 
2782 	/*
2783 	 * The i350 has a bug where it always strips the CRC whether
2784 	 * asked to or not.  So ask for stripped CRC here and
2785 	 * cope in rxeof
2786 	 */
2787 	if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350)
2788 		reg_rctl |= E1000_RCTL_SECRC;
2789 
2790 	switch (sc->sc_rx_buffer_len) {
2791 	default:
2792 	case EM_RXBUFFER_2048:
2793 		reg_rctl |= E1000_RCTL_SZ_2048;
2794 		break;
2795 	case EM_RXBUFFER_4096:
2796 		reg_rctl |= E1000_RCTL_SZ_4096|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2797 		break;
2798 	case EM_RXBUFFER_8192:
2799 		reg_rctl |= E1000_RCTL_SZ_8192|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2800 		break;
2801 	case EM_RXBUFFER_16384:
2802 		reg_rctl |= E1000_RCTL_SZ_16384|E1000_RCTL_BSEX|E1000_RCTL_LPE;
2803 		break;
2804 	}
2805 
2806 	if (sc->hw.max_frame_size != ETHER_MAX_LEN)
2807 		reg_rctl |= E1000_RCTL_LPE;
2808 
2809 	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
2810 	if (sc->hw.mac_type >= em_82543) {
2811 		reg_rxcsum = E1000_READ_REG(&sc->hw, RXCSUM);
2812 		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
2813 		E1000_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum);
2814 	}
2815 
2816 	/*
2817 	 * XXX TEMPORARY WORKAROUND: on some systems with 82573
2818 	 * long latencies are observed, like Lenovo X60.
2819 	 */
2820 	if (sc->hw.mac_type == em_82573)
2821 		E1000_WRITE_REG(&sc->hw, RDTR, 0x20);
2822 
2823 	FOREACH_QUEUE(sc, que) {
2824 		if (sc->num_queues > 1) {
2825 			/*
2826 			 * Disable Drop Enable for every queue, default has
2827 			 * it enabled for queues > 0
2828 			 */
2829 			reg_srrctl = E1000_READ_REG(&sc->hw, SRRCTL(que->me));
2830 			reg_srrctl &= ~E1000_SRRCTL_DROP_EN;
2831 			E1000_WRITE_REG(&sc->hw, SRRCTL(que->me), reg_srrctl);
2832 		}
2833 
2834 		/* Setup the Base and Length of the Rx Descriptor Ring */
2835 		bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
2836 		E1000_WRITE_REG(&sc->hw, RDLEN(que->me),
2837 		    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
2838 		E1000_WRITE_REG(&sc->hw, RDBAH(que->me), (u_int32_t)(bus_addr >> 32));
2839 		E1000_WRITE_REG(&sc->hw, RDBAL(que->me), (u_int32_t)bus_addr);
2840 
2841 		if (sc->hw.mac_type == em_82575 || sc->hw.mac_type == em_82580 ||
2842 		    sc->hw.mac_type == em_82576 ||
2843 		    sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
2844 			/* 82575/6 need to enable the RX queue */
2845 			uint32_t reg;
2846 			reg = E1000_READ_REG(&sc->hw, RXDCTL(que->me));
2847 			reg |= E1000_RXDCTL_QUEUE_ENABLE;
2848 			E1000_WRITE_REG(&sc->hw, RXDCTL(que->me), reg);
2849 		}
2850 	}
2851 
2852 	/* Enable Receives */
2853 	E1000_WRITE_REG(&sc->hw, RCTL, reg_rctl);
2854 
2855 	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2856 	FOREACH_QUEUE(sc, que) {
2857 		E1000_WRITE_REG(&sc->hw, RDH(que->me), 0);
2858 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2859 	}
2860 }
2861 
2862 /*********************************************************************
2863  *
2864  *  Free receive related data structures.
2865  *
2866  **********************************************************************/
2867 void
2868 em_free_receive_structures(struct em_softc *sc)
2869 {
2870 	struct em_queue *que;
2871 	struct em_packet *pkt;
2872 	int i;
2873 
2874 	INIT_DEBUGOUT("free_receive_structures: begin");
2875 
2876 	FOREACH_QUEUE(sc, que) {
2877 		if_rxr_init(&que->rx.sc_rx_ring, 0, 0);
2878 
2879 		bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2880 		    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2881 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2882 
2883 		if (que->rx.sc_rx_pkts_ring != NULL) {
2884 			for (i = 0; i < sc->sc_rx_slots; i++) {
2885 				pkt = &que->rx.sc_rx_pkts_ring[i];
2886 				if (pkt->pkt_m != NULL) {
2887 					bus_dmamap_sync(sc->sc_dmat,
2888 					    pkt->pkt_map,
2889 					    0, pkt->pkt_map->dm_mapsize,
2890 					    BUS_DMASYNC_POSTREAD);
2891 					bus_dmamap_unload(sc->sc_dmat,
2892 					    pkt->pkt_map);
2893 					m_freem(pkt->pkt_m);
2894 					pkt->pkt_m = NULL;
2895 				}
2896 				bus_dmamap_destroy(sc->sc_dmat, pkt->pkt_map);
2897 			}
2898 
2899 			free(que->rx.sc_rx_pkts_ring, M_DEVBUF,
2900 			    sc->sc_rx_slots * sizeof(*que->rx.sc_rx_pkts_ring));
2901 			que->rx.sc_rx_pkts_ring = NULL;
2902 		}
2903 
2904 		if (que->rx.fmp != NULL) {
2905 			m_freem(que->rx.fmp);
2906 			que->rx.fmp = NULL;
2907 			que->rx.lmp = NULL;
2908 		}
2909 	}
2910 }
2911 
2912 int
2913 em_rxfill(struct em_queue *que)
2914 {
2915 	struct em_softc *sc = que->sc;
2916 	u_int slots;
2917 	int post = 0;
2918 	int i;
2919 
2920 	i = que->rx.sc_rx_desc_head;
2921 
2922 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2923 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2924 	    BUS_DMASYNC_POSTWRITE);
2925 
2926 	for (slots = if_rxr_get(&que->rx.sc_rx_ring, sc->sc_rx_slots);
2927 	    slots > 0; slots--) {
2928 		if (++i == sc->sc_rx_slots)
2929 			i = 0;
2930 
2931 		if (em_get_buf(que, i) != 0)
2932 			break;
2933 
2934 		que->rx.sc_rx_desc_head = i;
2935 		post = 1;
2936 	}
2937 
2938 	if_rxr_put(&que->rx.sc_rx_ring, slots);
2939 
2940 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2941 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2942 	    BUS_DMASYNC_PREWRITE);
2943 
2944 	return (post);
2945 }
2946 
2947 void
2948 em_rxrefill(void *arg)
2949 {
2950 	struct em_queue *que = arg;
2951 	struct em_softc *sc = que->sc;
2952 
2953 	if (em_rxfill(que))
2954 		E1000_WRITE_REG(&sc->hw, RDT(que->me), que->rx.sc_rx_desc_head);
2955 	else if (if_rxr_needrefill(&que->rx.sc_rx_ring))
2956 		timeout_add(&que->rx_refill, 1);
2957 }
2958 
2959 /*********************************************************************
2960  *
2961  *  This routine executes in interrupt context. It replenishes
2962  *  the mbufs in the descriptor and sends data which has been
2963  *  dma'ed into host memory to upper layer.
2964  *
2965  *********************************************************************/
2966 int
2967 em_rxeof(struct em_queue *que)
2968 {
2969 	struct em_softc	    *sc = que->sc;
2970 	struct ifnet	    *ifp = &sc->sc_ac.ac_if;
2971 	struct mbuf_list    ml = MBUF_LIST_INITIALIZER();
2972 	struct mbuf	    *m;
2973 	u_int8_t	    accept_frame = 0;
2974 	u_int8_t	    eop = 0;
2975 	u_int16_t	    len, desc_len, prev_len_adj;
2976 	int		    i, rv = 0;
2977 
2978 	/* Pointer to the receive descriptor being examined. */
2979 	struct em_rx_desc   *desc;
2980 	struct em_packet    *pkt;
2981 	u_int8_t	    status;
2982 
2983 	if (if_rxr_inuse(&que->rx.sc_rx_ring) == 0)
2984 		return (0);
2985 
2986 	i = que->rx.sc_rx_desc_tail;
2987 
2988 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
2989 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
2990 	    BUS_DMASYNC_POSTREAD);
2991 
2992 	do {
2993 		m = NULL;
2994 
2995 		pkt = &que->rx.sc_rx_pkts_ring[i];
2996 		desc = &que->rx.sc_rx_desc_ring[i];
2997 
2998 		status = desc->status;
2999 		if (!ISSET(status, E1000_RXD_STAT_DD))
3000 			break;
3001 
3002 		/* pull the mbuf off the ring */
3003 		bus_dmamap_sync(sc->sc_dmat, pkt->pkt_map,
3004 		    0, pkt->pkt_map->dm_mapsize,
3005 		    BUS_DMASYNC_POSTREAD);
3006 		bus_dmamap_unload(sc->sc_dmat, pkt->pkt_map);
3007 		m = pkt->pkt_m;
3008 		pkt->pkt_m = NULL;
3009 
3010 		KASSERT(m != NULL);
3011 
3012 		if_rxr_put(&que->rx.sc_rx_ring, 1);
3013 		rv = 1;
3014 
3015 		accept_frame = 1;
3016 		prev_len_adj = 0;
3017 		desc_len = letoh16(desc->length);
3018 
3019 		if (status & E1000_RXD_STAT_EOP) {
3020 			eop = 1;
3021 			if (desc_len < ETHER_CRC_LEN) {
3022 				len = 0;
3023 				prev_len_adj = ETHER_CRC_LEN - desc_len;
3024 			} else if (sc->hw.mac_type == em_i210 ||
3025 			    sc->hw.mac_type == em_i350)
3026 				len = desc_len;
3027 			else
3028 				len = desc_len - ETHER_CRC_LEN;
3029 		} else {
3030 			eop = 0;
3031 			len = desc_len;
3032 		}
3033 
3034 		if (desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3035 			u_int8_t last_byte;
3036 			u_int32_t pkt_len = desc_len;
3037 
3038 			if (que->rx.fmp != NULL)
3039 				pkt_len += que->rx.fmp->m_pkthdr.len;
3040 
3041 			last_byte = *(mtod(m, caddr_t) + desc_len - 1);
3042 			if (TBI_ACCEPT(&sc->hw, status, desc->errors,
3043 			    pkt_len, last_byte)) {
3044 #if NKSTAT > 0
3045 				em_tbi_adjust_stats(sc,
3046 				    pkt_len, sc->hw.mac_addr);
3047 #endif
3048 				if (len > 0)
3049 					len--;
3050 			} else
3051 				accept_frame = 0;
3052 		}
3053 
3054 		if (accept_frame) {
3055 			/* Assign correct length to the current fragment */
3056 			m->m_len = len;
3057 
3058 			if (que->rx.fmp == NULL) {
3059 				m->m_pkthdr.len = m->m_len;
3060 				que->rx.fmp = m;	 /* Store the first mbuf */
3061 				que->rx.lmp = m;
3062 			} else {
3063 				/* Chain mbuf's together */
3064 				m->m_flags &= ~M_PKTHDR;
3065 				/*
3066 				 * Adjust length of previous mbuf in chain if
3067 				 * we received less than 4 bytes in the last
3068 				 * descriptor.
3069 				 */
3070 				if (prev_len_adj > 0) {
3071 					que->rx.lmp->m_len -= prev_len_adj;
3072 					que->rx.fmp->m_pkthdr.len -= prev_len_adj;
3073 				}
3074 				que->rx.lmp->m_next = m;
3075 				que->rx.lmp = m;
3076 				que->rx.fmp->m_pkthdr.len += m->m_len;
3077 			}
3078 
3079 			if (eop) {
3080 				m = que->rx.fmp;
3081 
3082 				em_receive_checksum(sc, desc, m);
3083 #if NVLAN > 0
3084 				if (desc->status & E1000_RXD_STAT_VP) {
3085 					m->m_pkthdr.ether_vtag =
3086 					    letoh16(desc->special);
3087 					m->m_flags |= M_VLANTAG;
3088 				}
3089 #endif
3090 				ml_enqueue(&ml, m);
3091 
3092 				que->rx.fmp = NULL;
3093 				que->rx.lmp = NULL;
3094 			}
3095 		} else {
3096 			que->rx.dropped_pkts++;
3097 
3098 			if (que->rx.fmp != NULL) {
3099 				m_freem(que->rx.fmp);
3100 				que->rx.fmp = NULL;
3101 				que->rx.lmp = NULL;
3102 			}
3103 
3104 			m_freem(m);
3105 		}
3106 
3107 		/* Advance our pointers to the next descriptor. */
3108 		if (++i == sc->sc_rx_slots)
3109 			i = 0;
3110 	} while (if_rxr_inuse(&que->rx.sc_rx_ring) > 0);
3111 
3112 	bus_dmamap_sync(sc->sc_dmat, que->rx.sc_rx_dma.dma_map,
3113 	    0, que->rx.sc_rx_dma.dma_map->dm_mapsize,
3114 	    BUS_DMASYNC_PREREAD);
3115 
3116 	que->rx.sc_rx_desc_tail = i;
3117 
3118 	if (ifiq_input(&ifp->if_rcv, &ml))
3119 		if_rxr_livelocked(&que->rx.sc_rx_ring);
3120 
3121 	return (rv);
3122 }
3123 
3124 /*********************************************************************
3125  *
3126  *  Verify that the hardware indicated that the checksum is valid.
3127  *  Inform the stack about the status of checksum so that stack
3128  *  doesn't spend time verifying the checksum.
3129  *
3130  *********************************************************************/
3131 void
3132 em_receive_checksum(struct em_softc *sc, struct em_rx_desc *rx_desc,
3133     struct mbuf *mp)
3134 {
3135 	/* 82543 or newer only */
3136 	if ((sc->hw.mac_type < em_82543) ||
3137 	    /* Ignore Checksum bit is set */
3138 	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3139 		mp->m_pkthdr.csum_flags = 0;
3140 		return;
3141 	}
3142 
3143 	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3144 		/* Did it pass? */
3145 		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3146 			/* IP Checksum Good */
3147 			mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK;
3148 
3149 		} else
3150 			mp->m_pkthdr.csum_flags = 0;
3151 	}
3152 
3153 	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3154 		/* Did it pass? */
3155 		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE))
3156 			mp->m_pkthdr.csum_flags |=
3157 				M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
3158 	}
3159 }
3160 
3161 /*
3162  * This turns on the hardware offload of the VLAN
3163  * tag insertion and strip
3164  */
3165 void
3166 em_enable_hw_vlans(struct em_softc *sc)
3167 {
3168 	uint32_t ctrl;
3169 
3170 	ctrl = E1000_READ_REG(&sc->hw, CTRL);
3171 	ctrl |= E1000_CTRL_VME;
3172 	E1000_WRITE_REG(&sc->hw, CTRL, ctrl);
3173 }
3174 
3175 void
3176 em_enable_intr(struct em_softc *sc)
3177 {
3178 	uint32_t mask;
3179 
3180 	if (sc->msix) {
3181 		mask = sc->msix_queuesmask | sc->msix_linkmask;
3182 		E1000_WRITE_REG(&sc->hw, EIAC, mask);
3183 		E1000_WRITE_REG(&sc->hw, EIAM, mask);
3184 		E1000_WRITE_REG(&sc->hw, EIMS, mask);
3185 		E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
3186 	} else
3187 		E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
3188 }
3189 
3190 void
3191 em_disable_intr(struct em_softc *sc)
3192 {
3193 	/*
3194 	 * The first version of 82542 had an errata where when link
3195 	 * was forced it would stay up even if the cable was disconnected
3196 	 * Sequence errors were used to detect the disconnect and then
3197 	 * the driver would unforce the link.  This code is in the ISR.
3198 	 * For this to work correctly the Sequence error interrupt had
3199 	 * to be enabled all the time.
3200 	 */
3201 	if (sc->msix) {
3202 		E1000_WRITE_REG(&sc->hw, EIMC, ~0);
3203 		E1000_WRITE_REG(&sc->hw, EIAC, 0);
3204 	} else if (sc->hw.mac_type == em_82542_rev2_0)
3205 		E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
3206 	else
3207 		E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
3208 }
3209 
3210 void
3211 em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3212 {
3213 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3214 	pcireg_t val;
3215 
3216 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3217 	if (reg & 0x2) {
3218 		val &= 0x0000ffff;
3219 		val |= (*value << 16);
3220 	} else {
3221 		val &= 0xffff0000;
3222 		val |= *value;
3223 	}
3224 	pci_conf_write(pa->pa_pc, pa->pa_tag, reg & ~0x3, val);
3225 }
3226 
3227 void
3228 em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3229 {
3230 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3231 	pcireg_t val;
3232 
3233 	val = pci_conf_read(pa->pa_pc, pa->pa_tag, reg & ~0x3);
3234 	if (reg & 0x2)
3235 		*value = (val >> 16) & 0xffff;
3236 	else
3237 		*value = val & 0xffff;
3238 }
3239 
3240 void
3241 em_pci_set_mwi(struct em_hw *hw)
3242 {
3243 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3244 
3245 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3246 		(hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE));
3247 }
3248 
3249 void
3250 em_pci_clear_mwi(struct em_hw *hw)
3251 {
3252 	struct pci_attach_args *pa = &((struct em_osdep *)hw->back)->em_pa;
3253 
3254 	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
3255 		(hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE));
3256 }
3257 
3258 /*
3259  * We may eventually really do this, but its unnecessary
3260  * for now so we just return unsupported.
3261  */
3262 int32_t
3263 em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3264 {
3265 	return -E1000_NOT_IMPLEMENTED;
3266 }
3267 
3268 /*********************************************************************
3269 * 82544 Coexistence issue workaround.
3270 *    There are 2 issues.
3271 *       1. Transmit Hang issue.
3272 *    To detect this issue, following equation can be used...
3273 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3274 *          If SUM[3:0] is in between 1 to 4, we will have this issue.
3275 *
3276 *       2. DAC issue.
3277 *    To detect this issue, following equation can be used...
3278 *          SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3279 *          If SUM[3:0] is in between 9 to c, we will have this issue.
3280 *
3281 *
3282 *    WORKAROUND:
3283 *          Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3284 *
3285 *** *********************************************************************/
3286 u_int32_t
3287 em_fill_descriptors(u_int64_t address, u_int32_t length,
3288     PDESC_ARRAY desc_array)
3289 {
3290         /* Since issue is sensitive to length and address.*/
3291         /* Let us first check the address...*/
3292         u_int32_t safe_terminator;
3293         if (length <= 4) {
3294                 desc_array->descriptor[0].address = address;
3295                 desc_array->descriptor[0].length = length;
3296                 desc_array->elements = 1;
3297                 return desc_array->elements;
3298         }
3299         safe_terminator = (u_int32_t)((((u_int32_t)address & 0x7) + (length & 0xF)) & 0xF);
3300         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3301         if (safe_terminator == 0   ||
3302         (safe_terminator > 4   &&
3303         safe_terminator < 9)   ||
3304         (safe_terminator > 0xC &&
3305         safe_terminator <= 0xF)) {
3306                 desc_array->descriptor[0].address = address;
3307                 desc_array->descriptor[0].length = length;
3308                 desc_array->elements = 1;
3309                 return desc_array->elements;
3310         }
3311 
3312         desc_array->descriptor[0].address = address;
3313         desc_array->descriptor[0].length = length - 4;
3314         desc_array->descriptor[1].address = address + (length - 4);
3315         desc_array->descriptor[1].length = 4;
3316         desc_array->elements = 2;
3317         return desc_array->elements;
3318 }
3319 
3320 /*
3321  * Disable the L0S and L1 LINK states.
3322  */
3323 void
3324 em_disable_aspm(struct em_softc *sc)
3325 {
3326 	int offset;
3327 	pcireg_t val;
3328 
3329 	switch (sc->hw.mac_type) {
3330 		case em_82571:
3331 		case em_82572:
3332 		case em_82573:
3333 		case em_82574:
3334 			break;
3335 		default:
3336 			return;
3337 	}
3338 
3339 	if (!pci_get_capability(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3340 	    PCI_CAP_PCIEXPRESS, &offset, NULL))
3341 		return;
3342 
3343 	/* Disable PCIe Active State Power Management (ASPM). */
3344 	val = pci_conf_read(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3345 	    offset + PCI_PCIE_LCSR);
3346 
3347 	switch (sc->hw.mac_type) {
3348 		case em_82571:
3349 		case em_82572:
3350 			val &= ~PCI_PCIE_LCSR_ASPM_L1;
3351 			break;
3352 		case em_82573:
3353 		case em_82574:
3354 			val &= ~(PCI_PCIE_LCSR_ASPM_L0S |
3355 			    PCI_PCIE_LCSR_ASPM_L1);
3356 			break;
3357 		default:
3358 			break;
3359 	}
3360 
3361 	pci_conf_write(sc->osdep.em_pa.pa_pc, sc->osdep.em_pa.pa_tag,
3362 	    offset + PCI_PCIE_LCSR, val);
3363 }
3364 
3365 /*
3366  * em_flush_tx_ring - remove all descriptors from the tx_ring
3367  *
3368  * We want to clear all pending descriptors from the TX ring.
3369  * zeroing happens when the HW reads the regs. We assign the ring itself as
3370  * the data of the next descriptor. We don't care about the data we are about
3371  * to reset the HW.
3372  */
3373 void
3374 em_flush_tx_ring(struct em_queue *que)
3375 {
3376 	struct em_softc		*sc = que->sc;
3377 	uint32_t		 tctl, txd_lower = E1000_TXD_CMD_IFCS;
3378 	uint16_t		 size = 512;
3379 	struct em_tx_desc	*txd;
3380 
3381 	KASSERT(que->tx.sc_tx_desc_ring != NULL);
3382 
3383 	tctl = EM_READ_REG(&sc->hw, E1000_TCTL);
3384 	EM_WRITE_REG(&sc->hw, E1000_TCTL, tctl | E1000_TCTL_EN);
3385 
3386 	KASSERT(EM_READ_REG(&sc->hw, E1000_TDT(que->me)) == que->tx.sc_tx_desc_head);
3387 
3388 	txd = &que->tx.sc_tx_desc_ring[que->tx.sc_tx_desc_head];
3389 	txd->buffer_addr = que->tx.sc_tx_dma.dma_map->dm_segs[0].ds_addr;
3390 	txd->lower.data = htole32(txd_lower | size);
3391 	txd->upper.data = 0;
3392 
3393 	/* flush descriptors to memory before notifying the HW */
3394 	bus_space_barrier(sc->osdep.mem_bus_space_tag,
3395 	    sc->osdep.mem_bus_space_handle, 0, 0, BUS_SPACE_BARRIER_WRITE);
3396 
3397 	if (++que->tx.sc_tx_desc_head == sc->sc_tx_slots)
3398 		que->tx.sc_tx_desc_head = 0;
3399 
3400 	EM_WRITE_REG(&sc->hw, E1000_TDT(que->me), que->tx.sc_tx_desc_head);
3401 	bus_space_barrier(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle,
3402 	    0, 0, BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
3403 	usec_delay(250);
3404 }
3405 
3406 /*
3407  * em_flush_rx_ring - remove all descriptors from the rx_ring
3408  *
3409  * Mark all descriptors in the RX ring as consumed and disable the rx ring
3410  */
3411 void
3412 em_flush_rx_ring(struct em_queue *que)
3413 {
3414 	uint32_t	rctl, rxdctl;
3415 	struct em_softc	*sc = que->sc;
3416 
3417 	rctl = EM_READ_REG(&sc->hw, E1000_RCTL);
3418 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3419 	E1000_WRITE_FLUSH(&sc->hw);
3420 	usec_delay(150);
3421 
3422 	rxdctl = EM_READ_REG(&sc->hw, E1000_RXDCTL(que->me));
3423 	/* zero the lower 14 bits (prefetch and host thresholds) */
3424 	rxdctl &= 0xffffc000;
3425 	/*
3426 	 * update thresholds: prefetch threshold to 31, host threshold to 1
3427 	 * and make sure the granularity is "descriptors" and not "cache lines"
3428 	 */
3429 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3430 	EM_WRITE_REG(&sc->hw, E1000_RXDCTL(que->me), rxdctl);
3431 
3432 	/* momentarily enable the RX ring for the changes to take effect */
3433 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3434 	E1000_WRITE_FLUSH(&sc->hw);
3435 	usec_delay(150);
3436 	EM_WRITE_REG(&sc->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3437 }
3438 
3439 /*
3440  * em_flush_desc_rings - remove all descriptors from the descriptor rings
3441  *
3442  * In i219, the descriptor rings must be emptied before resetting the HW
3443  * or before changing the device state to D3 during runtime (runtime PM).
3444  *
3445  * Failure to do this will cause the HW to enter a unit hang state which can
3446  * only be released by PCI reset on the device
3447  *
3448  */
3449 void
3450 em_flush_desc_rings(struct em_softc *sc)
3451 {
3452 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3453 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3454 	uint32_t		 fextnvm11, tdlen;
3455 	uint16_t		 hang_state;
3456 
3457 	/* First, disable MULR fix in FEXTNVM11 */
3458 	fextnvm11 = EM_READ_REG(&sc->hw, E1000_FEXTNVM11);
3459 	fextnvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3460 	EM_WRITE_REG(&sc->hw, E1000_FEXTNVM11, fextnvm11);
3461 
3462 	/* do nothing if we're not in faulty state, or if the queue is empty */
3463 	tdlen = EM_READ_REG(&sc->hw, E1000_TDLEN(que->me));
3464 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3465 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3466 		return;
3467 	em_flush_tx_ring(que);
3468 
3469 	/* recheck, maybe the fault is caused by the rx ring */
3470 	hang_state = pci_conf_read(pa->pa_pc, pa->pa_tag, PCICFG_DESC_RING_STATUS);
3471 	if (hang_state & FLUSH_DESC_REQUIRED)
3472 		em_flush_rx_ring(que);
3473 }
3474 
3475 int
3476 em_allocate_legacy(struct em_softc *sc)
3477 {
3478 	pci_intr_handle_t	 ih;
3479 	const char		*intrstr = NULL;
3480 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3481 	pci_chipset_tag_t	 pc = pa->pa_pc;
3482 
3483 	if (pci_intr_map_msi(pa, &ih)) {
3484 		if (pci_intr_map(pa, &ih)) {
3485 			printf(": couldn't map interrupt\n");
3486 			return (ENXIO);
3487 		}
3488 		sc->legacy_irq = 1;
3489 	}
3490 
3491 	intrstr = pci_intr_string(pc, ih);
3492 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3493 	    em_intr, sc, DEVNAME(sc));
3494 	if (sc->sc_intrhand == NULL) {
3495 		printf(": couldn't establish interrupt");
3496 		if (intrstr != NULL)
3497 			printf(" at %s", intrstr);
3498 		printf("\n");
3499 		return (ENXIO);
3500 	}
3501 	printf(": %s", intrstr);
3502 
3503 	return (0);
3504 }
3505 
3506 #if NKSTAT > 0
3507 /* this is used to look up the array of kstats quickly */
3508 enum em_stat {
3509 	em_stat_crcerrs,
3510 	em_stat_algnerrc,
3511 	em_stat_symerrs,
3512 	em_stat_rxerrc,
3513 	em_stat_mpc,
3514 	em_stat_scc,
3515 	em_stat_ecol,
3516 	em_stat_mcc,
3517 	em_stat_latecol,
3518 	em_stat_colc,
3519 	em_stat_dc,
3520 	em_stat_tncrs,
3521 	em_stat_sec,
3522 	em_stat_cexterr,
3523 	em_stat_rlec,
3524 	em_stat_xonrxc,
3525 	em_stat_xontxc,
3526 	em_stat_xoffrxc,
3527 	em_stat_xofftxc,
3528 	em_stat_fcruc,
3529 	em_stat_prc64,
3530 	em_stat_prc127,
3531 	em_stat_prc255,
3532 	em_stat_prc511,
3533 	em_stat_prc1023,
3534 	em_stat_prc1522,
3535 	em_stat_gprc,
3536 	em_stat_bprc,
3537 	em_stat_mprc,
3538 	em_stat_gptc,
3539 	em_stat_gorc,
3540 	em_stat_gotc,
3541 	em_stat_rnbc,
3542 	em_stat_ruc,
3543 	em_stat_rfc,
3544 	em_stat_roc,
3545 	em_stat_rjc,
3546 	em_stat_mgtprc,
3547 	em_stat_mgtpdc,
3548 	em_stat_mgtptc,
3549 	em_stat_tor,
3550 	em_stat_tot,
3551 	em_stat_tpr,
3552 	em_stat_tpt,
3553 	em_stat_ptc64,
3554 	em_stat_ptc127,
3555 	em_stat_ptc255,
3556 	em_stat_ptc511,
3557 	em_stat_ptc1023,
3558 	em_stat_ptc1522,
3559 	em_stat_mptc,
3560 	em_stat_bptc,
3561 #if 0
3562 	em_stat_tsctc,
3563 	em_stat_tsctf,
3564 #endif
3565 
3566 	em_stat_count,
3567 };
3568 
3569 struct em_counter {
3570 	const char		*name;
3571 	enum kstat_kv_unit	 unit;
3572 	uint32_t		 reg;
3573 };
3574 
3575 static const struct em_counter em_counters[em_stat_count] = {
3576 	[em_stat_crcerrs] =
3577 	    { "rx crc errs",	KSTAT_KV_U_PACKETS,	E1000_CRCERRS },
3578 	[em_stat_algnerrc] = /* >= em_82543 */
3579 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3580 	[em_stat_symerrs] = /* >= em_82543 */
3581 	    { "rx align errs",	KSTAT_KV_U_PACKETS,	0 },
3582 	[em_stat_rxerrc] =
3583 	    { "rx errs",	KSTAT_KV_U_PACKETS,	E1000_RXERRC },
3584 	[em_stat_mpc] =
3585 	    { "rx missed",	KSTAT_KV_U_PACKETS,	E1000_MPC },
3586 	[em_stat_scc] =
3587 	    { "tx single coll",	KSTAT_KV_U_PACKETS,	E1000_SCC },
3588 	[em_stat_ecol] =
3589 	    { "tx excess coll",	KSTAT_KV_U_PACKETS,	E1000_ECOL },
3590 	[em_stat_mcc] =
3591 	    { "tx multi coll",	KSTAT_KV_U_PACKETS,	E1000_MCC },
3592 	[em_stat_latecol] =
3593 	    { "tx late coll",	KSTAT_KV_U_PACKETS,	E1000_LATECOL },
3594 	[em_stat_colc] =
3595 	    { "tx coll",	KSTAT_KV_U_NONE,	E1000_COLC },
3596 	[em_stat_dc] =
3597 	    { "tx defers",	KSTAT_KV_U_NONE,	E1000_DC },
3598 	[em_stat_tncrs] = /* >= em_82543 */
3599 	    { "tx no CRS",	KSTAT_KV_U_PACKETS,	0 },
3600 	[em_stat_sec] =
3601 	    { "seq errs",	KSTAT_KV_U_NONE,	E1000_SEC },
3602 	[em_stat_cexterr] = /* >= em_82543 */
3603 	    { "carr ext errs",	KSTAT_KV_U_PACKETS,	0 },
3604 	[em_stat_rlec] =
3605 	    { "rx len errs",	KSTAT_KV_U_PACKETS,	E1000_RLEC },
3606 	[em_stat_xonrxc] =
3607 	    { "rx xon",		KSTAT_KV_U_PACKETS,	E1000_XONRXC },
3608 	[em_stat_xontxc] =
3609 	    { "tx xon",		KSTAT_KV_U_PACKETS,	E1000_XONTXC },
3610 	[em_stat_xoffrxc] =
3611 	    { "rx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFRXC },
3612 	[em_stat_xofftxc] =
3613 	    { "tx xoff",	KSTAT_KV_U_PACKETS,	E1000_XOFFTXC },
3614 	[em_stat_fcruc] =
3615 	    { "FC unsupported",	KSTAT_KV_U_PACKETS,	E1000_FCRUC },
3616 	[em_stat_prc64] =
3617 	    { "rx 64B",		KSTAT_KV_U_PACKETS,	E1000_PRC64 },
3618 	[em_stat_prc127] =
3619 	    { "rx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PRC127 },
3620 	[em_stat_prc255] =
3621 	    { "rx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PRC255 },
3622 	[em_stat_prc511] =
3623 	    { "rx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PRC511 },
3624 	[em_stat_prc1023] =
3625 	    { "rx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PRC1023 },
3626 	[em_stat_prc1522] =
3627 	    { "rx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PRC1522 },
3628 	[em_stat_gprc] =
3629 	    { "rx good",	KSTAT_KV_U_PACKETS,	E1000_GPRC },
3630 	[em_stat_bprc] =
3631 	    { "rx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPRC },
3632 	[em_stat_mprc] =
3633 	    { "rx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPRC },
3634 	[em_stat_gptc] =
3635 	    { "tx good",	KSTAT_KV_U_PACKETS,	E1000_GPTC },
3636 	[em_stat_gorc] = /* 64bit */
3637 	    { "rx good",	KSTAT_KV_U_BYTES,	0 },
3638 	[em_stat_gotc] = /* 64bit */
3639 	    { "tx good",	KSTAT_KV_U_BYTES,	0 },
3640 	[em_stat_rnbc] =
3641 	    { "rx no buffers",	KSTAT_KV_U_PACKETS,	E1000_RNBC },
3642 	[em_stat_ruc] =
3643 	    { "rx undersize",	KSTAT_KV_U_PACKETS,	E1000_RUC },
3644 	[em_stat_rfc] =
3645 	    { "rx fragments",	KSTAT_KV_U_PACKETS,	E1000_RFC },
3646 	[em_stat_roc] =
3647 	    { "rx oversize",	KSTAT_KV_U_PACKETS,	E1000_ROC },
3648 	[em_stat_rjc] =
3649 	    { "rx jabbers",	KSTAT_KV_U_PACKETS,	E1000_RJC },
3650 	[em_stat_mgtprc] =
3651 	    { "rx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPRC },
3652 	[em_stat_mgtpdc] =
3653 	    { "rx mgmt drops",	KSTAT_KV_U_PACKETS,	E1000_MGTPDC },
3654 	[em_stat_mgtptc] =
3655 	    { "tx mgmt",	KSTAT_KV_U_PACKETS,	E1000_MGTPTC },
3656 	[em_stat_tor] = /* 64bit */
3657 	    { "rx total",	KSTAT_KV_U_BYTES,	0 },
3658 	[em_stat_tot] = /* 64bit */
3659 	    { "tx total",	KSTAT_KV_U_BYTES,	0 },
3660 	[em_stat_tpr] =
3661 	    { "rx total",	KSTAT_KV_U_PACKETS,	E1000_TPR },
3662 	[em_stat_tpt] =
3663 	    { "tx total",	KSTAT_KV_U_PACKETS,	E1000_TPT },
3664 	[em_stat_ptc64] =
3665 	    { "tx 64B",		KSTAT_KV_U_PACKETS,	E1000_PTC64 },
3666 	[em_stat_ptc127] =
3667 	    { "tx 65-127B",	KSTAT_KV_U_PACKETS,	E1000_PTC127 },
3668 	[em_stat_ptc255] =
3669 	    { "tx 128-255B",	KSTAT_KV_U_PACKETS,	E1000_PTC255 },
3670 	[em_stat_ptc511] =
3671 	    { "tx 256-511B",	KSTAT_KV_U_PACKETS,	E1000_PTC511 },
3672 	[em_stat_ptc1023] =
3673 	    { "tx 512-1023B",	KSTAT_KV_U_PACKETS,	E1000_PTC1023 },
3674 	[em_stat_ptc1522] =
3675 	    { "tx 1024-maxB",	KSTAT_KV_U_PACKETS,	E1000_PTC1522 },
3676 	[em_stat_mptc] =
3677 	    { "tx mcast",	KSTAT_KV_U_PACKETS,	E1000_MPTC },
3678 	[em_stat_bptc] =
3679 	    { "tx bcast",	KSTAT_KV_U_PACKETS,	E1000_BPTC },
3680 };
3681 
3682 /**********************************************************************
3683  *
3684  *  Update the board statistics counters.
3685  *
3686  **********************************************************************/
3687 int
3688 em_kstat_read(struct kstat *ks)
3689 {
3690 	struct em_softc *sc = ks->ks_softc;
3691 	struct em_hw *hw = &sc->hw;
3692 	struct kstat_kv *kvs = ks->ks_data;
3693 	uint32_t lo, hi;
3694 	unsigned int i;
3695 
3696 	for (i = 0; i < nitems(em_counters); i++) {
3697 		const struct em_counter *c = &em_counters[i];
3698 		if (c->reg == 0)
3699 			continue;
3700 
3701 		kstat_kv_u64(&kvs[i]) += EM_READ_REG(hw,
3702 		    E1000_REG_TR(hw, c->reg)); /* wtf */
3703 	}
3704 
3705 	/* Handle the exceptions. */
3706 
3707 	if (sc->hw.mac_type >= em_82543) {
3708 		kstat_kv_u64(&kvs[em_stat_algnerrc]) +=
3709 		    E1000_READ_REG(hw, ALGNERRC);
3710 		kstat_kv_u64(&kvs[em_stat_rxerrc]) +=
3711 		    E1000_READ_REG(hw, RXERRC);
3712 		kstat_kv_u64(&kvs[em_stat_cexterr]) +=
3713 		    E1000_READ_REG(hw, CEXTERR);
3714 		kstat_kv_u64(&kvs[em_stat_tncrs]) +=
3715 		    E1000_READ_REG(hw, TNCRS);
3716 #if 0
3717 		sc->stats.tsctc +=
3718 		E1000_READ_REG(hw, TSCTC);
3719 		sc->stats.tsctfc +=
3720 		E1000_READ_REG(hw, TSCTFC);
3721 #endif
3722 	}
3723 
3724 	/* For the 64-bit byte counters the low dword must be read first. */
3725 	/* Both registers clear on the read of the high dword */
3726 
3727 	lo = E1000_READ_REG(hw, GORCL);
3728 	hi = E1000_READ_REG(hw, GORCH);
3729 	kstat_kv_u64(&kvs[em_stat_gorc]) +=
3730 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3731 
3732 	lo = E1000_READ_REG(hw, GOTCL);
3733 	hi = E1000_READ_REG(hw, GOTCH);
3734 	kstat_kv_u64(&kvs[em_stat_gotc]) +=
3735 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3736 
3737 	lo = E1000_READ_REG(hw, TORL);
3738 	hi = E1000_READ_REG(hw, TORH);
3739 	kstat_kv_u64(&kvs[em_stat_tor]) +=
3740 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3741 
3742 	lo = E1000_READ_REG(hw, TOTL);
3743 	hi = E1000_READ_REG(hw, TOTH);
3744 	kstat_kv_u64(&kvs[em_stat_tot]) +=
3745 	    ((uint64_t)hi << 32) | (uint64_t)lo;
3746 
3747 	getnanouptime(&ks->ks_updated);
3748 
3749 	return (0);
3750 }
3751 
3752 void
3753 em_kstat_attach(struct em_softc *sc)
3754 {
3755 	struct kstat *ks;
3756 	struct kstat_kv *kvs;
3757 	unsigned int i;
3758 
3759 	mtx_init(&sc->kstat_mtx, IPL_SOFTCLOCK);
3760 
3761 	ks = kstat_create(DEVNAME(sc), 0, "em-stats", 0,
3762 	    KSTAT_T_KV, 0);
3763 	if (ks == NULL)
3764 		return;
3765 
3766 	kvs = mallocarray(nitems(em_counters), sizeof(*kvs),
3767 	    M_DEVBUF, M_WAITOK|M_ZERO);
3768 	for (i = 0; i < nitems(em_counters); i++) {
3769 		const struct em_counter *c = &em_counters[i];
3770 		kstat_kv_unit_init(&kvs[i], c->name,
3771 		    KSTAT_KV_T_COUNTER64, c->unit);
3772 	}
3773 
3774 	ks->ks_softc = sc;
3775 	ks->ks_data = kvs;
3776 	ks->ks_datalen = nitems(em_counters) * sizeof(*kvs);
3777 	ks->ks_read = em_kstat_read;
3778 	kstat_set_mutex(ks, &sc->kstat_mtx);
3779 
3780 	kstat_install(ks);
3781 }
3782 
3783 /******************************************************************************
3784  * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
3785  *****************************************************************************/
3786 void
3787 em_tbi_adjust_stats(struct em_softc *sc, uint32_t frame_len, uint8_t *mac_addr)
3788 {
3789 	struct em_hw *hw = &sc->hw;
3790 	struct kstat *ks = sc->kstat;
3791 	struct kstat_kv *kvs;
3792 
3793 	if (ks == NULL)
3794 		return;
3795 
3796 	/* First adjust the frame length. */
3797 	frame_len--;
3798 
3799 	mtx_enter(&sc->kstat_mtx);
3800 	kvs = ks->ks_data;
3801 
3802 	/*
3803 	 * We need to adjust the statistics counters, since the hardware
3804 	 * counters overcount this packet as a CRC error and undercount the
3805 	 * packet as a good packet
3806 	 */
3807 
3808 	/* This packet should not be counted as a CRC error.	*/
3809 	kstat_kv_u64(&kvs[em_stat_crcerrs])--;
3810 	/* This packet does count as a Good Packet Received.	*/
3811 	kstat_kv_u64(&kvs[em_stat_gprc])++;
3812 
3813 	/* Adjust the Good Octets received counters		*/
3814 	kstat_kv_u64(&kvs[em_stat_gorc]) += frame_len;
3815 
3816 	/*
3817 	 * Is this a broadcast or multicast?  Check broadcast first, since
3818 	 * the test for a multicast frame will test positive on a broadcast
3819 	 * frame.
3820 	 */
3821 	if (ETHER_IS_BROADCAST(mac_addr)) {
3822 		/* Broadcast packet */
3823 		kstat_kv_u64(&kvs[em_stat_bprc])++;
3824 	} else if (ETHER_IS_MULTICAST(mac_addr)) {
3825 		/* Multicast packet */
3826 		kstat_kv_u64(&kvs[em_stat_mprc])++;
3827 	}
3828 
3829 	if (frame_len == hw->max_frame_size) {
3830 		/*
3831 		 * In this case, the hardware has overcounted the number of
3832 		 * oversize frames.
3833 		 */
3834 		kstat_kv_u64(&kvs[em_stat_roc])--;
3835 	}
3836 
3837 	/*
3838 	 * Adjust the bin counters when the extra byte put the frame in the
3839 	 * wrong bin. Remember that the frame_len was adjusted above.
3840 	 */
3841 	if (frame_len == 64) {
3842 		kstat_kv_u64(&kvs[em_stat_prc64])++;
3843 		kstat_kv_u64(&kvs[em_stat_prc127])--;
3844 	} else if (frame_len == 127) {
3845 		kstat_kv_u64(&kvs[em_stat_prc127])++;
3846 		kstat_kv_u64(&kvs[em_stat_prc255])--;
3847 	} else if (frame_len == 255) {
3848 		kstat_kv_u64(&kvs[em_stat_prc255])++;
3849 		kstat_kv_u64(&kvs[em_stat_prc511])--;
3850 	} else if (frame_len == 511) {
3851 		kstat_kv_u64(&kvs[em_stat_prc511])++;
3852 		kstat_kv_u64(&kvs[em_stat_prc1023])--;
3853 	} else if (frame_len == 1023) {
3854 		kstat_kv_u64(&kvs[em_stat_prc1023])++;
3855 		kstat_kv_u64(&kvs[em_stat_prc1522])--;
3856 	} else if (frame_len == 1522) {
3857 		kstat_kv_u64(&kvs[em_stat_prc1522])++;
3858 	}
3859 
3860 	mtx_leave(&sc->kstat_mtx);
3861 }
3862 #endif /* NKSTAT > 0 */
3863 
3864 #ifndef SMALL_KERNEL
3865 int
3866 em_allocate_msix(struct em_softc *sc)
3867 {
3868 	pci_intr_handle_t	 ih;
3869 	const char		*intrstr = NULL;
3870 	struct pci_attach_args	*pa = &sc->osdep.em_pa;
3871 	pci_chipset_tag_t	 pc = pa->pa_pc;
3872 	struct em_queue		*que = sc->queues; /* Use only first queue. */
3873 	int			 vec;
3874 
3875 	if (!em_enable_msix)
3876 		return (ENODEV);
3877 
3878 	switch (sc->hw.mac_type) {
3879 	case em_82576:
3880 	case em_82580:
3881 	case em_i350:
3882 	case em_i210:
3883 		break;
3884 	default:
3885 		return (ENODEV);
3886 	}
3887 
3888 	vec = 0;
3889 	if (pci_intr_map_msix(pa, vec, &ih))
3890 		return (ENODEV);
3891 	sc->msix = 1;
3892 
3893 	que->me = vec;
3894 	que->eims = 1 << vec;
3895 	snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
3896 
3897 	intrstr = pci_intr_string(pc, ih);
3898 	que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3899 	    em_queue_intr_msix, que, que->name);
3900 	if (que->tag == NULL) {
3901 		printf(": couldn't establish interrupt");
3902 		if (intrstr != NULL)
3903 			printf(" at %s", intrstr);
3904 		printf("\n");
3905 		return (ENXIO);
3906 	}
3907 
3908 	/* Setup linkvector, use last queue vector + 1 */
3909 	vec++;
3910 	sc->msix_linkvec = vec;
3911 	if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
3912 		printf(": couldn't map link vector\n");
3913 		return (ENXIO);
3914 	}
3915 
3916 	intrstr = pci_intr_string(pc, ih);
3917 	sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
3918 	    em_link_intr_msix, sc, DEVNAME(sc));
3919 	if (sc->sc_intrhand == NULL) {
3920 		printf(": couldn't establish interrupt");
3921 		if (intrstr != NULL)
3922 			printf(" at %s", intrstr);
3923 		printf("\n");
3924 		return (ENXIO);
3925 	}
3926 	printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
3927 
3928 	return (0);
3929 }
3930 
3931 /*
3932  * Interrupt for a specific queue, (not link interrupts). The EICR bit which
3933  * maps to the EIMS bit expresses both RX and TX, therefore we can't
3934  * distinguish if this is a RX completion of TX completion and must do both.
3935  * The bits in EICR are autocleared and we _cannot_ read EICR.
3936  */
3937 int
3938 em_queue_intr_msix(void *vque)
3939 {
3940 	struct em_queue *que = vque;
3941 	struct em_softc *sc = que->sc;
3942 	struct ifnet   *ifp = &sc->sc_ac.ac_if;
3943 
3944 	if (ifp->if_flags & IFF_RUNNING) {
3945 		em_txeof(que);
3946 		if (em_rxeof(que))
3947 			em_rxrefill(que);
3948 	}
3949 
3950 	em_enable_queue_intr_msix(que);
3951 
3952 	return (1);
3953 }
3954 
3955 int
3956 em_link_intr_msix(void *arg)
3957 {
3958 	struct em_softc *sc = arg;
3959 	uint32_t icr;
3960 
3961 	icr = E1000_READ_REG(&sc->hw, ICR);
3962 
3963 	/* Link status change */
3964 	if (icr & E1000_ICR_LSC) {
3965 		KERNEL_LOCK();
3966 		sc->hw.get_link_status = 1;
3967 		em_check_for_link(&sc->hw);
3968 		em_update_link_status(sc);
3969 		KERNEL_UNLOCK();
3970 	}
3971 
3972 	/* Re-arm unconditionally */
3973 	E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
3974 	E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
3975 
3976 	return (1);
3977 }
3978 
3979 /*
3980  * Maps queues into msix interrupt vectors.
3981  */
3982 int
3983 em_setup_queues_msix(struct em_softc *sc)
3984 {
3985 	uint32_t ivar, newitr, index;
3986 	struct em_queue *que;
3987 
3988 	KASSERT(sc->msix);
3989 
3990 	/* First turn on RSS capability */
3991 	if (sc->hw.mac_type != em_82575)
3992 		E1000_WRITE_REG(&sc->hw, GPIE,
3993 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
3994 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
3995 
3996 	/* Turn on MSIX */
3997 	switch (sc->hw.mac_type) {
3998 	case em_82580:
3999 	case em_i350:
4000 	case em_i210:
4001 		/* RX entries */
4002 		/*
4003 		 * Note, this maps Queues into MSIX vectors, it works fine.
4004 		 * The funky calculation of offsets and checking if que->me is
4005 		 * odd is due to the weird register distribution, the datasheet
4006 		 * explains it well.
4007 		 */
4008 		FOREACH_QUEUE(sc, que) {
4009 			index = que->me >> 1;
4010 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4011 			if (que->me & 1) {
4012 				ivar &= 0xFF00FFFF;
4013 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
4014 			} else {
4015 				ivar &= 0xFFFFFF00;
4016 				ivar |= que->me | E1000_IVAR_VALID;
4017 			}
4018 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4019 		}
4020 
4021 		/* TX entries */
4022 		FOREACH_QUEUE(sc, que) {
4023 			index = que->me >> 1;
4024 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4025 			if (que->me & 1) {
4026 				ivar &= 0x00FFFFFF;
4027 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
4028 			} else {
4029 				ivar &= 0xFFFF00FF;
4030 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
4031 			}
4032 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4033 			sc->msix_queuesmask |= que->eims;
4034 		}
4035 
4036 		/* And for the link interrupt */
4037 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4038 		sc->msix_linkmask = 1 << sc->msix_linkvec;
4039 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4040 		break;
4041 	case em_82576:
4042 		/* RX entries */
4043 		FOREACH_QUEUE(sc, que) {
4044 			index = que->me & 0x7; /* Each IVAR has two entries */
4045 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4046 			if (que->me < 8) {
4047 				ivar &= 0xFFFFFF00;
4048 				ivar |= que->me | E1000_IVAR_VALID;
4049 			} else {
4050 				ivar &= 0xFF00FFFF;
4051 				ivar |= (que->me | E1000_IVAR_VALID) << 16;
4052 			}
4053 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4054 			sc->msix_queuesmask |= que->eims;
4055 		}
4056 		/* TX entries */
4057 		FOREACH_QUEUE(sc, que) {
4058 			index = que->me & 0x7; /* Each IVAR has two entries */
4059 			ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
4060 			if (que->me < 8) {
4061 				ivar &= 0xFFFF00FF;
4062 				ivar |= (que->me | E1000_IVAR_VALID) << 8;
4063 			} else {
4064 				ivar &= 0x00FFFFFF;
4065 				ivar |= (que->me | E1000_IVAR_VALID) << 24;
4066 			}
4067 			E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
4068 			sc->msix_queuesmask |= que->eims;
4069 		}
4070 
4071 		/* And for the link interrupt */
4072 		ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
4073 		sc->msix_linkmask = 1 << sc->msix_linkvec;
4074 		E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
4075 		break;
4076 	default:
4077 		panic("unsupported mac");
4078 		break;
4079 	}
4080 
4081 	/* Set the starting interrupt rate */
4082 	newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
4083 
4084 	if (sc->hw.mac_type == em_82575)
4085 		newitr |= newitr << 16;
4086 	else
4087 		newitr |= E1000_EITR_CNT_IGNR;
4088 
4089 	FOREACH_QUEUE(sc, que)
4090 		E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
4091 
4092 	return (0);
4093 }
4094 
4095 void
4096 em_enable_queue_intr_msix(struct em_queue *que)
4097 {
4098 	E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
4099 }
4100 #endif /* !SMALL_KERNEL */
4101 
4102 int
4103 em_allocate_desc_rings(struct em_softc *sc)
4104 {
4105 	struct em_queue *que;
4106 
4107 	FOREACH_QUEUE(sc, que) {
4108 		/* Allocate Transmit Descriptor ring */
4109 		if (em_dma_malloc(sc, sc->sc_tx_slots * sizeof(struct em_tx_desc),
4110 		    &que->tx.sc_tx_dma) != 0) {
4111 			printf("%s: Unable to allocate tx_desc memory\n",
4112 			    DEVNAME(sc));
4113 			return (ENOMEM);
4114 		}
4115 		que->tx.sc_tx_desc_ring =
4116 		    (struct em_tx_desc *)que->tx.sc_tx_dma.dma_vaddr;
4117 
4118 		/* Allocate Receive Descriptor ring */
4119 		if (em_dma_malloc(sc, sc->sc_rx_slots * sizeof(struct em_rx_desc),
4120 		    &que->rx.sc_rx_dma) != 0) {
4121 			printf("%s: Unable to allocate rx_desc memory\n",
4122 			    DEVNAME(sc));
4123 			return (ENOMEM);
4124 		}
4125 		que->rx.sc_rx_desc_ring =
4126 		    (struct em_rx_desc *)que->rx.sc_rx_dma.dma_vaddr;
4127 	}
4128 
4129 	return (0);
4130 }
4131 
4132 int
4133 em_get_sffpage(struct em_softc *sc, struct if_sffpage *sff)
4134 {
4135 	struct em_hw *hw = &sc->hw;
4136 	size_t i;
4137 	int off;
4138 
4139 	if (hw->mac_type != em_82575 && hw->mac_type != em_82580 &&
4140 	    hw->mac_type != em_82576 &&
4141 	    hw->mac_type != em_i210 && hw->mac_type != em_i350)
4142 		return (ENODEV);
4143 
4144 	if (sff->sff_addr == IFSFF_ADDR_EEPROM)
4145 		off = E1000_I2CCMD_SFP_DATA_ADDR(0);
4146 	else if (sff->sff_addr == IFSFF_ADDR_DDM)
4147 		off = E1000_I2CCMD_SFP_DIAG_ADDR(0);
4148 	else
4149 		return (EIO);
4150 
4151 	for (i = 0; i < sizeof(sff->sff_data); i++) {
4152 		if (em_read_sfp_data_byte(hw, off + i,
4153 		    &sff->sff_data[i]) != E1000_SUCCESS)
4154 			return (EIO);
4155 	}
4156 
4157 	return (0);
4158 }
4159