xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 0957b409)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_inet.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/module.h>
41 #include <sys/pciio.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/ethernet.h>
62 #include <net/if.h>
63 #include <net/if_var.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 #include <net/if_types.h>
68 #include <net/if_vlan_var.h>
69 
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77 #include <netinet/netdump/netdump.h>
78 
79 #include <dev/pci/pcireg.h>
80 #include <dev/pci/pcivar.h>
81 #include <dev/pci/pci_private.h>
82 
83 #include <cxgb_include.h>
84 
85 #ifdef PRIV_SUPPORTED
86 #include <sys/priv.h>
87 #endif
88 
89 static int cxgb_setup_interrupts(adapter_t *);
90 static void cxgb_teardown_interrupts(adapter_t *);
91 static void cxgb_init(void *);
92 static int cxgb_init_locked(struct port_info *);
93 static int cxgb_uninit_locked(struct port_info *);
94 static int cxgb_uninit_synchronized(struct port_info *);
95 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96 static int cxgb_media_change(struct ifnet *);
97 static int cxgb_ifm_type(int);
98 static void cxgb_build_medialist(struct port_info *);
99 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100 static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101 static int setup_sge_qsets(adapter_t *);
102 static void cxgb_async_intr(void *);
103 static void cxgb_tick_handler(void *, int);
104 static void cxgb_tick(void *);
105 static void link_check_callout(void *);
106 static void check_link_status(void *, int);
107 static void setup_rss(adapter_t *sc);
108 static int alloc_filters(struct adapter *);
109 static int setup_hw_filters(struct adapter *);
110 static int set_filter(struct adapter *, int, const struct filter_info *);
111 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114     unsigned int, u64, u64);
115 #ifdef TCP_OFFLOAD
116 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117 #endif
118 
119 /* Attachment glue for the PCI controller end of the device.  Each port of
120  * the device is attached separately, as defined later.
121  */
122 static int cxgb_controller_probe(device_t);
123 static int cxgb_controller_attach(device_t);
124 static int cxgb_controller_detach(device_t);
125 static void cxgb_free(struct adapter *);
126 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127     unsigned int end);
128 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129 static int cxgb_get_regs_len(void);
130 static void touch_bars(device_t dev);
131 static void cxgb_update_mac_settings(struct port_info *p);
132 #ifdef TCP_OFFLOAD
133 static int toe_capability(struct port_info *, int);
134 #endif
135 
136 /* Table for probing the cards.  The desc field isn't actually used */
137 struct cxgb_ident {
138 	uint16_t	vendor;
139 	uint16_t	device;
140 	int		index;
141 	char		*desc;
142 } cxgb_identifiers[] = {
143 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157 	{0, 0, 0, NULL}
158 };
159 
160 static device_method_t cxgb_controller_methods[] = {
161 	DEVMETHOD(device_probe,		cxgb_controller_probe),
162 	DEVMETHOD(device_attach,	cxgb_controller_attach),
163 	DEVMETHOD(device_detach,	cxgb_controller_detach),
164 
165 	DEVMETHOD_END
166 };
167 
168 static driver_t cxgb_controller_driver = {
169 	"cxgbc",
170 	cxgb_controller_methods,
171 	sizeof(struct adapter)
172 };
173 
174 static int cxgbc_mod_event(module_t, int, void *);
175 static devclass_t	cxgb_controller_devclass;
176 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
177     cxgbc_mod_event, 0);
178 MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
179     nitems(cxgb_identifiers) - 1);
180 MODULE_VERSION(cxgbc, 1);
181 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
182 
183 /*
184  * Attachment glue for the ports.  Attachment is done directly to the
185  * controller device.
186  */
187 static int cxgb_port_probe(device_t);
188 static int cxgb_port_attach(device_t);
189 static int cxgb_port_detach(device_t);
190 
191 static device_method_t cxgb_port_methods[] = {
192 	DEVMETHOD(device_probe,		cxgb_port_probe),
193 	DEVMETHOD(device_attach,	cxgb_port_attach),
194 	DEVMETHOD(device_detach,	cxgb_port_detach),
195 	{ 0, 0 }
196 };
197 
198 static driver_t cxgb_port_driver = {
199 	"cxgb",
200 	cxgb_port_methods,
201 	0
202 };
203 
204 static d_ioctl_t cxgb_extension_ioctl;
205 static d_open_t cxgb_extension_open;
206 static d_close_t cxgb_extension_close;
207 
208 static struct cdevsw cxgb_cdevsw = {
209        .d_version =    D_VERSION,
210        .d_flags =      0,
211        .d_open =       cxgb_extension_open,
212        .d_close =      cxgb_extension_close,
213        .d_ioctl =      cxgb_extension_ioctl,
214        .d_name =       "cxgb",
215 };
216 
217 static devclass_t	cxgb_port_devclass;
218 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
219 MODULE_VERSION(cxgb, 1);
220 
221 NETDUMP_DEFINE(cxgb);
222 
223 static struct mtx t3_list_lock;
224 static SLIST_HEAD(, adapter) t3_list;
225 #ifdef TCP_OFFLOAD
226 static struct mtx t3_uld_list_lock;
227 static SLIST_HEAD(, uld_info) t3_uld_list;
228 #endif
229 
230 /*
231  * The driver uses the best interrupt scheme available on a platform in the
232  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
233  * of these schemes the driver may consider as follows:
234  *
235  * msi = 2: choose from among all three options
236  * msi = 1 : only consider MSI and pin interrupts
237  * msi = 0: force pin interrupts
238  */
239 static int msi_allowed = 2;
240 
241 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
242 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
243     "MSI-X, MSI, INTx selector");
244 
245 /*
246  * The driver uses an auto-queue algorithm by default.
247  * To disable it and force a single queue-set per port, use multiq = 0
248  */
249 static int multiq = 1;
250 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
251     "use min(ncpus/ports, 8) queue-sets per port");
252 
253 /*
254  * By default the driver will not update the firmware unless
255  * it was compiled against a newer version
256  *
257  */
258 static int force_fw_update = 0;
259 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
260     "update firmware even if up to date");
261 
262 int cxgb_use_16k_clusters = -1;
263 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
264     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
265 
266 static int nfilters = -1;
267 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
268     &nfilters, 0, "max number of entries in the filter table");
269 
270 enum {
271 	MAX_TXQ_ENTRIES      = 16384,
272 	MAX_CTRL_TXQ_ENTRIES = 1024,
273 	MAX_RSPQ_ENTRIES     = 16384,
274 	MAX_RX_BUFFERS       = 16384,
275 	MAX_RX_JUMBO_BUFFERS = 16384,
276 	MIN_TXQ_ENTRIES      = 4,
277 	MIN_CTRL_TXQ_ENTRIES = 4,
278 	MIN_RSPQ_ENTRIES     = 32,
279 	MIN_FL_ENTRIES       = 32,
280 	MIN_FL_JUMBO_ENTRIES = 32
281 };
282 
283 struct filter_info {
284 	u32 sip;
285 	u32 sip_mask;
286 	u32 dip;
287 	u16 sport;
288 	u16 dport;
289 	u32 vlan:12;
290 	u32 vlan_prio:3;
291 	u32 mac_hit:1;
292 	u32 mac_idx:4;
293 	u32 mac_vld:1;
294 	u32 pkt_type:2;
295 	u32 report_filter_id:1;
296 	u32 pass:1;
297 	u32 rss:1;
298 	u32 qset:3;
299 	u32 locked:1;
300 	u32 valid:1;
301 };
302 
303 enum { FILTER_NO_VLAN_PRI = 7 };
304 
305 #define EEPROM_MAGIC 0x38E2F10C
306 
307 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
308 
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
461 	    device_get_unit(dev));
462 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
463 
464 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
465 	    device_get_unit(dev));
466 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
467 	    device_get_unit(dev));
468 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
469 	    device_get_unit(dev));
470 
471 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
472 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
473 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
474 
475 	mtx_lock(&t3_list_lock);
476 	SLIST_INSERT_HEAD(&t3_list, sc, link);
477 	mtx_unlock(&t3_list_lock);
478 
479 	/* find the PCIe link width and set max read request to 4KB*/
480 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
481 		uint16_t lnk;
482 
483 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
484 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
485 		if (sc->link_width < 8 &&
486 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
487 			device_printf(sc->dev,
488 			    "PCIe x%d Link, expect reduced performance\n",
489 			    sc->link_width);
490 		}
491 
492 		pci_set_max_read_req(dev, 4096);
493 	}
494 
495 	touch_bars(dev);
496 	pci_enable_busmaster(dev);
497 	/*
498 	 * Allocate the registers and make them available to the driver.
499 	 * The registers that we care about for NIC mode are in BAR 0
500 	 */
501 	sc->regs_rid = PCIR_BAR(0);
502 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
503 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
504 		device_printf(dev, "Cannot allocate BAR region 0\n");
505 		error = ENXIO;
506 		goto out;
507 	}
508 
509 	sc->bt = rman_get_bustag(sc->regs_res);
510 	sc->bh = rman_get_bushandle(sc->regs_res);
511 	sc->mmio_len = rman_get_size(sc->regs_res);
512 
513 	for (i = 0; i < MAX_NPORTS; i++)
514 		sc->port[i].adapter = sc;
515 
516 	if (t3_prep_adapter(sc, ai, 1) < 0) {
517 		printf("prep adapter failed\n");
518 		error = ENODEV;
519 		goto out;
520 	}
521 
522 	sc->udbs_rid = PCIR_BAR(2);
523 	sc->udbs_res = NULL;
524 	if (is_offload(sc) &&
525 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
526 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
527 		device_printf(dev, "Cannot allocate BAR region 1\n");
528 		error = ENXIO;
529 		goto out;
530 	}
531 
532         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
533 	 * enough messages for the queue sets.  If that fails, try falling
534 	 * back to MSI.  If that fails, then try falling back to the legacy
535 	 * interrupt pin model.
536 	 */
537 	sc->msix_regs_rid = 0x20;
538 	if ((msi_allowed >= 2) &&
539 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
540 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
541 
542 		if (multiq)
543 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
544 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
545 
546 		if (pci_msix_count(dev) == 0 ||
547 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
548 		    sc->msi_count != msi_needed) {
549 			device_printf(dev, "alloc msix failed - "
550 				      "msi_count=%d, msi_needed=%d, err=%d; "
551 				      "will try MSI\n", sc->msi_count,
552 				      msi_needed, error);
553 			sc->msi_count = 0;
554 			port_qsets = 1;
555 			pci_release_msi(dev);
556 			bus_release_resource(dev, SYS_RES_MEMORY,
557 			    sc->msix_regs_rid, sc->msix_regs_res);
558 			sc->msix_regs_res = NULL;
559 		} else {
560 			sc->flags |= USING_MSIX;
561 			sc->cxgb_intr = cxgb_async_intr;
562 			device_printf(dev,
563 				      "using MSI-X interrupts (%u vectors)\n",
564 				      sc->msi_count);
565 		}
566 	}
567 
568 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
569 		sc->msi_count = 1;
570 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
571 			device_printf(dev, "alloc msi failed - "
572 				      "err=%d; will try INTx\n", error);
573 			sc->msi_count = 0;
574 			port_qsets = 1;
575 			pci_release_msi(dev);
576 		} else {
577 			sc->flags |= USING_MSI;
578 			sc->cxgb_intr = t3_intr_msi;
579 			device_printf(dev, "using MSI interrupts\n");
580 		}
581 	}
582 	if (sc->msi_count == 0) {
583 		device_printf(dev, "using line interrupts\n");
584 		sc->cxgb_intr = t3b_intr;
585 	}
586 
587 	/* Create a private taskqueue thread for handling driver events */
588 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
589 	    taskqueue_thread_enqueue, &sc->tq);
590 	if (sc->tq == NULL) {
591 		device_printf(dev, "failed to allocate controller task queue\n");
592 		goto out;
593 	}
594 
595 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
596 	    device_get_nameunit(dev));
597 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
598 
599 
600 	/* Create a periodic callout for checking adapter status */
601 	callout_init(&sc->cxgb_tick_ch, 1);
602 
603 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
604 		/*
605 		 * Warn user that a firmware update will be attempted in init.
606 		 */
607 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
608 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
609 		sc->flags &= ~FW_UPTODATE;
610 	} else {
611 		sc->flags |= FW_UPTODATE;
612 	}
613 
614 	if (t3_check_tpsram_version(sc) < 0) {
615 		/*
616 		 * Warn user that a firmware update will be attempted in init.
617 		 */
618 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
619 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
620 		sc->flags &= ~TPS_UPTODATE;
621 	} else {
622 		sc->flags |= TPS_UPTODATE;
623 	}
624 
625 	/*
626 	 * Create a child device for each MAC.  The ethernet attachment
627 	 * will be done in these children.
628 	 */
629 	for (i = 0; i < (sc)->params.nports; i++) {
630 		struct port_info *pi;
631 
632 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
633 			device_printf(dev, "failed to add child port\n");
634 			error = EINVAL;
635 			goto out;
636 		}
637 		pi = &sc->port[i];
638 		pi->adapter = sc;
639 		pi->nqsets = port_qsets;
640 		pi->first_qset = i*port_qsets;
641 		pi->port_id = i;
642 		pi->tx_chan = i >= ai->nports0;
643 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
644 		sc->rxpkt_map[pi->txpkt_intf] = i;
645 		sc->port[i].tx_chan = i >= ai->nports0;
646 		sc->portdev[i] = child;
647 		device_set_softc(child, pi);
648 	}
649 	if ((error = bus_generic_attach(dev)) != 0)
650 		goto out;
651 
652 	/* initialize sge private state */
653 	t3_sge_init_adapter(sc);
654 
655 	t3_led_ready(sc);
656 
657 	error = t3_get_fw_version(sc, &vers);
658 	if (error)
659 		goto out;
660 
661 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663 	    G_FW_VERSION_MICRO(vers));
664 
665 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666 		 ai->desc, is_offload(sc) ? "R" : "",
667 		 sc->params.vpd.ec, sc->params.vpd.sn);
668 	device_set_desc_copy(dev, buf);
669 
670 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673 
674 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
676 	t3_add_attach_sysctls(sc);
677 
678 #ifdef TCP_OFFLOAD
679 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
680 		sc->cpl_handler[i] = cpl_not_handled;
681 #endif
682 
683 	t3_intr_clear(sc);
684 	error = cxgb_setup_interrupts(sc);
685 out:
686 	if (error)
687 		cxgb_free(sc);
688 
689 	return (error);
690 }
691 
692 /*
693  * The cxgb_controller_detach routine is called with the device is
694  * unloaded from the system.
695  */
696 
697 static int
698 cxgb_controller_detach(device_t dev)
699 {
700 	struct adapter *sc;
701 
702 	sc = device_get_softc(dev);
703 
704 	cxgb_free(sc);
705 
706 	return (0);
707 }
708 
709 /*
710  * The cxgb_free() is called by the cxgb_controller_detach() routine
711  * to tear down the structures that were built up in
712  * cxgb_controller_attach(), and should be the final piece of work
713  * done when fully unloading the driver.
714  *
715  *
716  *  1. Shutting down the threads started by the cxgb_controller_attach()
717  *     routine.
718  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
719  *  3. Detaching all of the port devices created during the
720  *     cxgb_controller_attach() routine.
721  *  4. Removing the device children created via cxgb_controller_attach().
722  *  5. Releasing PCI resources associated with the device.
723  *  6. Turning off the offload support, iff it was turned on.
724  *  7. Destroying the mutexes created in cxgb_controller_attach().
725  *
726  */
727 static void
728 cxgb_free(struct adapter *sc)
729 {
730 	int i, nqsets = 0;
731 
732 	ADAPTER_LOCK(sc);
733 	sc->flags |= CXGB_SHUTDOWN;
734 	ADAPTER_UNLOCK(sc);
735 
736 	/*
737 	 * Make sure all child devices are gone.
738 	 */
739 	bus_generic_detach(sc->dev);
740 	for (i = 0; i < (sc)->params.nports; i++) {
741 		if (sc->portdev[i] &&
742 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
743 			device_printf(sc->dev, "failed to delete child port\n");
744 		nqsets += sc->port[i].nqsets;
745 	}
746 
747 	/*
748 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
749 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
750 	 * all open devices have been closed.
751 	 */
752 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
753 					   __func__, sc->open_device_map));
754 	for (i = 0; i < sc->params.nports; i++) {
755 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
756 						  __func__, i));
757 	}
758 
759 	/*
760 	 * Finish off the adapter's callouts.
761 	 */
762 	callout_drain(&sc->cxgb_tick_ch);
763 	callout_drain(&sc->sge_timer_ch);
764 
765 	/*
766 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
767 	 * sysctls are cleaned up by the kernel linker.
768 	 */
769 	if (sc->flags & FULL_INIT_DONE) {
770  		t3_free_sge_resources(sc, nqsets);
771  		sc->flags &= ~FULL_INIT_DONE;
772  	}
773 
774 	/*
775 	 * Release all interrupt resources.
776 	 */
777 	cxgb_teardown_interrupts(sc);
778 	if (sc->flags & (USING_MSI | USING_MSIX)) {
779 		device_printf(sc->dev, "releasing msi message(s)\n");
780 		pci_release_msi(sc->dev);
781 	} else {
782 		device_printf(sc->dev, "no msi message to release\n");
783 	}
784 
785 	if (sc->msix_regs_res != NULL) {
786 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
787 		    sc->msix_regs_res);
788 	}
789 
790 	/*
791 	 * Free the adapter's taskqueue.
792 	 */
793 	if (sc->tq != NULL) {
794 		taskqueue_free(sc->tq);
795 		sc->tq = NULL;
796 	}
797 
798 	free(sc->filters, M_DEVBUF);
799 	t3_sge_free(sc);
800 
801 	if (sc->udbs_res != NULL)
802 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803 		    sc->udbs_res);
804 
805 	if (sc->regs_res != NULL)
806 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807 		    sc->regs_res);
808 
809 	MTX_DESTROY(&sc->mdio_lock);
810 	MTX_DESTROY(&sc->sge.reg_lock);
811 	MTX_DESTROY(&sc->elmer_lock);
812 	mtx_lock(&t3_list_lock);
813 	SLIST_REMOVE(&t3_list, sc, adapter, link);
814 	mtx_unlock(&t3_list_lock);
815 	ADAPTER_LOCK_DEINIT(sc);
816 }
817 
818 /**
819  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
820  *	@sc: the controller softc
821  *
822  *	Determines how many sets of SGE queues to use and initializes them.
823  *	We support multiple queue sets per port if we have MSI-X, otherwise
824  *	just one queue set per port.
825  */
826 static int
827 setup_sge_qsets(adapter_t *sc)
828 {
829 	int i, j, err, irq_idx = 0, qset_idx = 0;
830 	u_int ntxq = SGE_TXQ_PER_SET;
831 
832 	if ((err = t3_sge_alloc(sc)) != 0) {
833 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
834 		return (err);
835 	}
836 
837 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
838 		irq_idx = -1;
839 
840 	for (i = 0; i < (sc)->params.nports; i++) {
841 		struct port_info *pi = &sc->port[i];
842 
843 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
844 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
845 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
846 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
847 			if (err) {
848 				t3_free_sge_resources(sc, qset_idx);
849 				device_printf(sc->dev,
850 				    "t3_sge_alloc_qset failed with %d\n", err);
851 				return (err);
852 			}
853 		}
854 	}
855 
856 	return (0);
857 }
858 
859 static void
860 cxgb_teardown_interrupts(adapter_t *sc)
861 {
862 	int i;
863 
864 	for (i = 0; i < SGE_QSETS; i++) {
865 		if (sc->msix_intr_tag[i] == NULL) {
866 
867 			/* Should have been setup fully or not at all */
868 			KASSERT(sc->msix_irq_res[i] == NULL &&
869 				sc->msix_irq_rid[i] == 0,
870 				("%s: half-done interrupt (%d).", __func__, i));
871 
872 			continue;
873 		}
874 
875 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
876 				  sc->msix_intr_tag[i]);
877 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
878 				     sc->msix_irq_res[i]);
879 
880 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
881 		sc->msix_irq_rid[i] = 0;
882 	}
883 
884 	if (sc->intr_tag) {
885 		KASSERT(sc->irq_res != NULL,
886 			("%s: half-done interrupt.", __func__));
887 
888 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
889 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
890 				     sc->irq_res);
891 
892 		sc->irq_res = sc->intr_tag = NULL;
893 		sc->irq_rid = 0;
894 	}
895 }
896 
897 static int
898 cxgb_setup_interrupts(adapter_t *sc)
899 {
900 	struct resource *res;
901 	void *tag;
902 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
903 
904 	sc->irq_rid = intr_flag ? 1 : 0;
905 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
906 					     RF_SHAREABLE | RF_ACTIVE);
907 	if (sc->irq_res == NULL) {
908 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
909 			      intr_flag, sc->irq_rid);
910 		err = EINVAL;
911 		sc->irq_rid = 0;
912 	} else {
913 		err = bus_setup_intr(sc->dev, sc->irq_res,
914 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
915 		    sc->cxgb_intr, sc, &sc->intr_tag);
916 
917 		if (err) {
918 			device_printf(sc->dev,
919 				      "Cannot set up interrupt (%x, %u, %d)\n",
920 				      intr_flag, sc->irq_rid, err);
921 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
922 					     sc->irq_res);
923 			sc->irq_res = sc->intr_tag = NULL;
924 			sc->irq_rid = 0;
925 		}
926 	}
927 
928 	/* That's all for INTx or MSI */
929 	if (!(intr_flag & USING_MSIX) || err)
930 		return (err);
931 
932 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
933 	for (i = 0; i < sc->msi_count - 1; i++) {
934 		rid = i + 2;
935 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
936 					     RF_SHAREABLE | RF_ACTIVE);
937 		if (res == NULL) {
938 			device_printf(sc->dev, "Cannot allocate interrupt "
939 				      "for message %d\n", rid);
940 			err = EINVAL;
941 			break;
942 		}
943 
944 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
945 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
946 		if (err) {
947 			device_printf(sc->dev, "Cannot set up interrupt "
948 				      "for message %d (%d)\n", rid, err);
949 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
950 			break;
951 		}
952 
953 		sc->msix_irq_rid[i] = rid;
954 		sc->msix_irq_res[i] = res;
955 		sc->msix_intr_tag[i] = tag;
956 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
957 	}
958 
959 	if (err)
960 		cxgb_teardown_interrupts(sc);
961 
962 	return (err);
963 }
964 
965 
966 static int
967 cxgb_port_probe(device_t dev)
968 {
969 	struct port_info *p;
970 	char buf[80];
971 	const char *desc;
972 
973 	p = device_get_softc(dev);
974 	desc = p->phy.desc;
975 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
976 	device_set_desc_copy(dev, buf);
977 	return (0);
978 }
979 
980 
981 static int
982 cxgb_makedev(struct port_info *pi)
983 {
984 
985 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
986 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
987 
988 	if (pi->port_cdev == NULL)
989 		return (ENOMEM);
990 
991 	pi->port_cdev->si_drv1 = (void *)pi;
992 
993 	return (0);
994 }
995 
996 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
997     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
998     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
999 #define CXGB_CAP_ENABLE CXGB_CAP
1000 
1001 static int
1002 cxgb_port_attach(device_t dev)
1003 {
1004 	struct port_info *p;
1005 	struct ifnet *ifp;
1006 	int err;
1007 	struct adapter *sc;
1008 
1009 	p = device_get_softc(dev);
1010 	sc = p->adapter;
1011 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1012 	    device_get_unit(device_get_parent(dev)), p->port_id);
1013 	PORT_LOCK_INIT(p, p->lockbuf);
1014 
1015 	callout_init(&p->link_check_ch, 1);
1016 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1017 
1018 	/* Allocate an ifnet object and set it up */
1019 	ifp = p->ifp = if_alloc(IFT_ETHER);
1020 	if (ifp == NULL) {
1021 		device_printf(dev, "Cannot allocate ifnet\n");
1022 		return (ENOMEM);
1023 	}
1024 
1025 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1026 	ifp->if_init = cxgb_init;
1027 	ifp->if_softc = p;
1028 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1029 	ifp->if_ioctl = cxgb_ioctl;
1030 	ifp->if_transmit = cxgb_transmit;
1031 	ifp->if_qflush = cxgb_qflush;
1032 	ifp->if_get_counter = cxgb_get_counter;
1033 
1034 	ifp->if_capabilities = CXGB_CAP;
1035 #ifdef TCP_OFFLOAD
1036 	if (is_offload(sc))
1037 		ifp->if_capabilities |= IFCAP_TOE4;
1038 #endif
1039 	ifp->if_capenable = CXGB_CAP_ENABLE;
1040 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1041 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1042 
1043 	/*
1044 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1045 	 */
1046 	if (sc->params.nports > 2) {
1047 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1048 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1049 		ifp->if_hwassist &= ~CSUM_TSO;
1050 	}
1051 
1052 	ether_ifattach(ifp, p->hw_addr);
1053 
1054 	/* Attach driver netdump methods. */
1055 	NETDUMP_SET(ifp, cxgb);
1056 
1057 #ifdef DEFAULT_JUMBO
1058 	if (sc->params.nports <= 2)
1059 		ifp->if_mtu = ETHERMTU_JUMBO;
1060 #endif
1061 	if ((err = cxgb_makedev(p)) != 0) {
1062 		printf("makedev failed %d\n", err);
1063 		return (err);
1064 	}
1065 
1066 	/* Create a list of media supported by this port */
1067 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1068 	    cxgb_media_status);
1069 	cxgb_build_medialist(p);
1070 
1071 	t3_sge_init_port(p);
1072 
1073 	return (err);
1074 }
1075 
1076 /*
1077  * cxgb_port_detach() is called via the device_detach methods when
1078  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1079  * removing the device from the view of the kernel, i.e. from all
1080  * interfaces lists etc.  This routine is only called when the driver is
1081  * being unloaded, not when the link goes down.
1082  */
1083 static int
1084 cxgb_port_detach(device_t dev)
1085 {
1086 	struct port_info *p;
1087 	struct adapter *sc;
1088 	int i;
1089 
1090 	p = device_get_softc(dev);
1091 	sc = p->adapter;
1092 
1093 	/* Tell cxgb_ioctl and if_init that the port is going away */
1094 	ADAPTER_LOCK(sc);
1095 	SET_DOOMED(p);
1096 	wakeup(&sc->flags);
1097 	while (IS_BUSY(sc))
1098 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1099 	SET_BUSY(sc);
1100 	ADAPTER_UNLOCK(sc);
1101 
1102 	if (p->port_cdev != NULL)
1103 		destroy_dev(p->port_cdev);
1104 
1105 	cxgb_uninit_synchronized(p);
1106 	ether_ifdetach(p->ifp);
1107 
1108 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1109 		struct sge_qset *qs = &sc->sge.qs[i];
1110 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1111 
1112 		callout_drain(&txq->txq_watchdog);
1113 		callout_drain(&txq->txq_timer);
1114 	}
1115 
1116 	PORT_LOCK_DEINIT(p);
1117 	if_free(p->ifp);
1118 	p->ifp = NULL;
1119 
1120 	ADAPTER_LOCK(sc);
1121 	CLR_BUSY(sc);
1122 	wakeup_one(&sc->flags);
1123 	ADAPTER_UNLOCK(sc);
1124 	return (0);
1125 }
1126 
1127 void
1128 t3_fatal_err(struct adapter *sc)
1129 {
1130 	u_int fw_status[4];
1131 
1132 	if (sc->flags & FULL_INIT_DONE) {
1133 		t3_sge_stop(sc);
1134 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1135 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1136 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1137 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1138 		t3_intr_disable(sc);
1139 	}
1140 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1141 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1142 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1143 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1144 }
1145 
1146 int
1147 t3_os_find_pci_capability(adapter_t *sc, int cap)
1148 {
1149 	device_t dev;
1150 	struct pci_devinfo *dinfo;
1151 	pcicfgregs *cfg;
1152 	uint32_t status;
1153 	uint8_t ptr;
1154 
1155 	dev = sc->dev;
1156 	dinfo = device_get_ivars(dev);
1157 	cfg = &dinfo->cfg;
1158 
1159 	status = pci_read_config(dev, PCIR_STATUS, 2);
1160 	if (!(status & PCIM_STATUS_CAPPRESENT))
1161 		return (0);
1162 
1163 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1164 	case 0:
1165 	case 1:
1166 		ptr = PCIR_CAP_PTR;
1167 		break;
1168 	case 2:
1169 		ptr = PCIR_CAP_PTR_2;
1170 		break;
1171 	default:
1172 		return (0);
1173 		break;
1174 	}
1175 	ptr = pci_read_config(dev, ptr, 1);
1176 
1177 	while (ptr != 0) {
1178 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1179 			return (ptr);
1180 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1181 	}
1182 
1183 	return (0);
1184 }
1185 
1186 int
1187 t3_os_pci_save_state(struct adapter *sc)
1188 {
1189 	device_t dev;
1190 	struct pci_devinfo *dinfo;
1191 
1192 	dev = sc->dev;
1193 	dinfo = device_get_ivars(dev);
1194 
1195 	pci_cfg_save(dev, dinfo, 0);
1196 	return (0);
1197 }
1198 
1199 int
1200 t3_os_pci_restore_state(struct adapter *sc)
1201 {
1202 	device_t dev;
1203 	struct pci_devinfo *dinfo;
1204 
1205 	dev = sc->dev;
1206 	dinfo = device_get_ivars(dev);
1207 
1208 	pci_cfg_restore(dev, dinfo);
1209 	return (0);
1210 }
1211 
1212 /**
1213  *	t3_os_link_changed - handle link status changes
1214  *	@sc: the adapter associated with the link change
1215  *	@port_id: the port index whose link status has changed
1216  *	@link_status: the new status of the link
1217  *	@speed: the new speed setting
1218  *	@duplex: the new duplex setting
1219  *	@fc: the new flow-control setting
1220  *
1221  *	This is the OS-dependent handler for link status changes.  The OS
1222  *	neutral handler takes care of most of the processing for these events,
1223  *	then calls this handler for any OS-specific processing.
1224  */
1225 void
1226 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1227      int duplex, int fc, int mac_was_reset)
1228 {
1229 	struct port_info *pi = &adapter->port[port_id];
1230 	struct ifnet *ifp = pi->ifp;
1231 
1232 	/* no race with detach, so ifp should always be good */
1233 	KASSERT(ifp, ("%s: if detached.", __func__));
1234 
1235 	/* Reapply mac settings if they were lost due to a reset */
1236 	if (mac_was_reset) {
1237 		PORT_LOCK(pi);
1238 		cxgb_update_mac_settings(pi);
1239 		PORT_UNLOCK(pi);
1240 	}
1241 
1242 	if (link_status) {
1243 		ifp->if_baudrate = IF_Mbps(speed);
1244 		if_link_state_change(ifp, LINK_STATE_UP);
1245 	} else
1246 		if_link_state_change(ifp, LINK_STATE_DOWN);
1247 }
1248 
1249 /**
1250  *	t3_os_phymod_changed - handle PHY module changes
1251  *	@phy: the PHY reporting the module change
1252  *	@mod_type: new module type
1253  *
1254  *	This is the OS-dependent handler for PHY module changes.  It is
1255  *	invoked when a PHY module is removed or inserted for any OS-specific
1256  *	processing.
1257  */
1258 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1259 {
1260 	static const char *mod_str[] = {
1261 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1262 	};
1263 	struct port_info *pi = &adap->port[port_id];
1264 	int mod = pi->phy.modtype;
1265 
1266 	if (mod != pi->media.ifm_cur->ifm_data)
1267 		cxgb_build_medialist(pi);
1268 
1269 	if (mod == phy_modtype_none)
1270 		if_printf(pi->ifp, "PHY module unplugged\n");
1271 	else {
1272 		KASSERT(mod < ARRAY_SIZE(mod_str),
1273 			("invalid PHY module type %d", mod));
1274 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1275 	}
1276 }
1277 
1278 void
1279 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1280 {
1281 
1282 	/*
1283 	 * The ifnet might not be allocated before this gets called,
1284 	 * as this is called early on in attach by t3_prep_adapter
1285 	 * save the address off in the port structure
1286 	 */
1287 	if (cxgb_debug)
1288 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1289 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1290 }
1291 
1292 /*
1293  * Programs the XGMAC based on the settings in the ifnet.  These settings
1294  * include MTU, MAC address, mcast addresses, etc.
1295  */
1296 static void
1297 cxgb_update_mac_settings(struct port_info *p)
1298 {
1299 	struct ifnet *ifp = p->ifp;
1300 	struct t3_rx_mode rm;
1301 	struct cmac *mac = &p->mac;
1302 	int mtu, hwtagging;
1303 
1304 	PORT_LOCK_ASSERT_OWNED(p);
1305 
1306 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1307 
1308 	mtu = ifp->if_mtu;
1309 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1310 		mtu += ETHER_VLAN_ENCAP_LEN;
1311 
1312 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1313 
1314 	t3_mac_set_mtu(mac, mtu);
1315 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1316 	t3_mac_set_address(mac, 0, p->hw_addr);
1317 	t3_init_rx_mode(&rm, p);
1318 	t3_mac_set_rx_mode(mac, &rm);
1319 }
1320 
1321 
1322 static int
1323 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1324 			      unsigned long n)
1325 {
1326 	int attempts = 5;
1327 
1328 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1329 		if (!--attempts)
1330 			return (ETIMEDOUT);
1331 		t3_os_sleep(10);
1332 	}
1333 	return 0;
1334 }
1335 
1336 static int
1337 init_tp_parity(struct adapter *adap)
1338 {
1339 	int i;
1340 	struct mbuf *m;
1341 	struct cpl_set_tcb_field *greq;
1342 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1343 
1344 	t3_tp_set_offload_mode(adap, 1);
1345 
1346 	for (i = 0; i < 16; i++) {
1347 		struct cpl_smt_write_req *req;
1348 
1349 		m = m_gethdr(M_WAITOK, MT_DATA);
1350 		req = mtod(m, struct cpl_smt_write_req *);
1351 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1352 		memset(req, 0, sizeof(*req));
1353 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1354 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1355 		req->iff = i;
1356 		t3_mgmt_tx(adap, m);
1357 	}
1358 
1359 	for (i = 0; i < 2048; i++) {
1360 		struct cpl_l2t_write_req *req;
1361 
1362 		m = m_gethdr(M_WAITOK, MT_DATA);
1363 		req = mtod(m, struct cpl_l2t_write_req *);
1364 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1365 		memset(req, 0, sizeof(*req));
1366 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1368 		req->params = htonl(V_L2T_W_IDX(i));
1369 		t3_mgmt_tx(adap, m);
1370 	}
1371 
1372 	for (i = 0; i < 2048; i++) {
1373 		struct cpl_rte_write_req *req;
1374 
1375 		m = m_gethdr(M_WAITOK, MT_DATA);
1376 		req = mtod(m, struct cpl_rte_write_req *);
1377 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1378 		memset(req, 0, sizeof(*req));
1379 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1380 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1381 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1382 		t3_mgmt_tx(adap, m);
1383 	}
1384 
1385 	m = m_gethdr(M_WAITOK, MT_DATA);
1386 	greq = mtod(m, struct cpl_set_tcb_field *);
1387 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1388 	memset(greq, 0, sizeof(*greq));
1389 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1390 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1391 	greq->mask = htobe64(1);
1392 	t3_mgmt_tx(adap, m);
1393 
1394 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1395 	t3_tp_set_offload_mode(adap, 0);
1396 	return (i);
1397 }
1398 
1399 /**
1400  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1401  *	@adap: the adapter
1402  *
1403  *	Sets up RSS to distribute packets to multiple receive queues.  We
1404  *	configure the RSS CPU lookup table to distribute to the number of HW
1405  *	receive queues, and the response queue lookup table to narrow that
1406  *	down to the response queues actually configured for each port.
1407  *	We always configure the RSS mapping for two ports since the mapping
1408  *	table has plenty of entries.
1409  */
1410 static void
1411 setup_rss(adapter_t *adap)
1412 {
1413 	int i;
1414 	u_int nq[2];
1415 	uint8_t cpus[SGE_QSETS + 1];
1416 	uint16_t rspq_map[RSS_TABLE_SIZE];
1417 
1418 	for (i = 0; i < SGE_QSETS; ++i)
1419 		cpus[i] = i;
1420 	cpus[SGE_QSETS] = 0xff;
1421 
1422 	nq[0] = nq[1] = 0;
1423 	for_each_port(adap, i) {
1424 		const struct port_info *pi = adap2pinfo(adap, i);
1425 
1426 		nq[pi->tx_chan] += pi->nqsets;
1427 	}
1428 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1429 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1430 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1431 	}
1432 
1433 	/* Calculate the reverse RSS map table */
1434 	for (i = 0; i < SGE_QSETS; ++i)
1435 		adap->rrss_map[i] = 0xff;
1436 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1437 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1438 			adap->rrss_map[rspq_map[i]] = i;
1439 
1440 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1441 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1442 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1443 	              cpus, rspq_map);
1444 
1445 }
1446 static void
1447 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1448 			      int hi, int port)
1449 {
1450 	struct mbuf *m;
1451 	struct mngt_pktsched_wr *req;
1452 
1453 	m = m_gethdr(M_NOWAIT, MT_DATA);
1454 	if (m) {
1455 		req = mtod(m, struct mngt_pktsched_wr *);
1456 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1457 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1458 		req->sched = sched;
1459 		req->idx = qidx;
1460 		req->min = lo;
1461 		req->max = hi;
1462 		req->binding = port;
1463 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1464 		t3_mgmt_tx(adap, m);
1465 	}
1466 }
1467 
1468 static void
1469 bind_qsets(adapter_t *sc)
1470 {
1471 	int i, j;
1472 
1473 	for (i = 0; i < (sc)->params.nports; ++i) {
1474 		const struct port_info *pi = adap2pinfo(sc, i);
1475 
1476 		for (j = 0; j < pi->nqsets; ++j) {
1477 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1478 					  -1, pi->tx_chan);
1479 
1480 		}
1481 	}
1482 }
1483 
1484 static void
1485 update_tpeeprom(struct adapter *adap)
1486 {
1487 	const struct firmware *tpeeprom;
1488 
1489 	uint32_t version;
1490 	unsigned int major, minor;
1491 	int ret, len;
1492 	char rev, name[32];
1493 
1494 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1495 
1496 	major = G_TP_VERSION_MAJOR(version);
1497 	minor = G_TP_VERSION_MINOR(version);
1498 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1499 		return;
1500 
1501 	rev = t3rev2char(adap);
1502 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1503 
1504 	tpeeprom = firmware_get(name);
1505 	if (tpeeprom == NULL) {
1506 		device_printf(adap->dev,
1507 			      "could not load TP EEPROM: unable to load %s\n",
1508 			      name);
1509 		return;
1510 	}
1511 
1512 	len = tpeeprom->datasize - 4;
1513 
1514 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1515 	if (ret)
1516 		goto release_tpeeprom;
1517 
1518 	if (len != TP_SRAM_LEN) {
1519 		device_printf(adap->dev,
1520 			      "%s length is wrong len=%d expected=%d\n", name,
1521 			      len, TP_SRAM_LEN);
1522 		return;
1523 	}
1524 
1525 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1526 	    TP_SRAM_OFFSET);
1527 
1528 	if (!ret) {
1529 		device_printf(adap->dev,
1530 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1531 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1532 	} else
1533 		device_printf(adap->dev,
1534 			      "Protocol SRAM image update in EEPROM failed\n");
1535 
1536 release_tpeeprom:
1537 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1538 
1539 	return;
1540 }
1541 
1542 static int
1543 update_tpsram(struct adapter *adap)
1544 {
1545 	const struct firmware *tpsram;
1546 	int ret;
1547 	char rev, name[32];
1548 
1549 	rev = t3rev2char(adap);
1550 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1551 
1552 	update_tpeeprom(adap);
1553 
1554 	tpsram = firmware_get(name);
1555 	if (tpsram == NULL){
1556 		device_printf(adap->dev, "could not load TP SRAM\n");
1557 		return (EINVAL);
1558 	} else
1559 		device_printf(adap->dev, "updating TP SRAM\n");
1560 
1561 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1562 	if (ret)
1563 		goto release_tpsram;
1564 
1565 	ret = t3_set_proto_sram(adap, tpsram->data);
1566 	if (ret)
1567 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1568 
1569 release_tpsram:
1570 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1571 
1572 	return ret;
1573 }
1574 
1575 /**
1576  *	cxgb_up - enable the adapter
1577  *	@adap: adapter being enabled
1578  *
1579  *	Called when the first port is enabled, this function performs the
1580  *	actions necessary to make an adapter operational, such as completing
1581  *	the initialization of HW modules, and enabling interrupts.
1582  */
1583 static int
1584 cxgb_up(struct adapter *sc)
1585 {
1586 	int err = 0;
1587 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1588 
1589 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1590 					   __func__, sc->open_device_map));
1591 
1592 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1593 
1594 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1595 
1596 		if ((sc->flags & FW_UPTODATE) == 0)
1597 			if ((err = upgrade_fw(sc)))
1598 				goto out;
1599 
1600 		if ((sc->flags & TPS_UPTODATE) == 0)
1601 			if ((err = update_tpsram(sc)))
1602 				goto out;
1603 
1604 		if (is_offload(sc) && nfilters != 0) {
1605 			sc->params.mc5.nservers = 0;
1606 
1607 			if (nfilters < 0)
1608 				sc->params.mc5.nfilters = mxf;
1609 			else
1610 				sc->params.mc5.nfilters = min(nfilters, mxf);
1611 		}
1612 
1613 		err = t3_init_hw(sc, 0);
1614 		if (err)
1615 			goto out;
1616 
1617 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1618 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1619 
1620 		err = setup_sge_qsets(sc);
1621 		if (err)
1622 			goto out;
1623 
1624 		alloc_filters(sc);
1625 		setup_rss(sc);
1626 
1627 		t3_add_configured_sysctls(sc);
1628 		sc->flags |= FULL_INIT_DONE;
1629 	}
1630 
1631 	t3_intr_clear(sc);
1632 	t3_sge_start(sc);
1633 	t3_intr_enable(sc);
1634 
1635 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1636 	    is_offload(sc) && init_tp_parity(sc) == 0)
1637 		sc->flags |= TP_PARITY_INIT;
1638 
1639 	if (sc->flags & TP_PARITY_INIT) {
1640 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1641 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1642 	}
1643 
1644 	if (!(sc->flags & QUEUES_BOUND)) {
1645 		bind_qsets(sc);
1646 		setup_hw_filters(sc);
1647 		sc->flags |= QUEUES_BOUND;
1648 	}
1649 
1650 	t3_sge_reset_adapter(sc);
1651 out:
1652 	return (err);
1653 }
1654 
1655 /*
1656  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1657  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1658  * during controller_detach, not here.
1659  */
1660 static void
1661 cxgb_down(struct adapter *sc)
1662 {
1663 	t3_sge_stop(sc);
1664 	t3_intr_disable(sc);
1665 }
1666 
1667 /*
1668  * if_init for cxgb ports.
1669  */
1670 static void
1671 cxgb_init(void *arg)
1672 {
1673 	struct port_info *p = arg;
1674 	struct adapter *sc = p->adapter;
1675 
1676 	ADAPTER_LOCK(sc);
1677 	cxgb_init_locked(p); /* releases adapter lock */
1678 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1679 }
1680 
1681 static int
1682 cxgb_init_locked(struct port_info *p)
1683 {
1684 	struct adapter *sc = p->adapter;
1685 	struct ifnet *ifp = p->ifp;
1686 	struct cmac *mac = &p->mac;
1687 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1688 
1689 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1690 
1691 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1692 		gave_up_lock = 1;
1693 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1694 			rc = EINTR;
1695 			goto done;
1696 		}
1697 	}
1698 	if (IS_DOOMED(p)) {
1699 		rc = ENXIO;
1700 		goto done;
1701 	}
1702 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1703 
1704 	/*
1705 	 * The code that runs during one-time adapter initialization can sleep
1706 	 * so it's important not to hold any locks across it.
1707 	 */
1708 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1709 
1710 	if (may_sleep) {
1711 		SET_BUSY(sc);
1712 		gave_up_lock = 1;
1713 		ADAPTER_UNLOCK(sc);
1714 	}
1715 
1716 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1717 			goto done;
1718 
1719 	PORT_LOCK(p);
1720 	if (isset(&sc->open_device_map, p->port_id) &&
1721 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1722 		PORT_UNLOCK(p);
1723 		goto done;
1724 	}
1725 	t3_port_intr_enable(sc, p->port_id);
1726 	if (!mac->multiport)
1727 		t3_mac_init(mac);
1728 	cxgb_update_mac_settings(p);
1729 	t3_link_start(&p->phy, mac, &p->link_config);
1730 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1731 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1732 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1733 	PORT_UNLOCK(p);
1734 
1735 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1736 		struct sge_qset *qs = &sc->sge.qs[i];
1737 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1738 
1739 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1740 				 txq->txq_watchdog.c_cpu);
1741 	}
1742 
1743 	/* all ok */
1744 	setbit(&sc->open_device_map, p->port_id);
1745 	callout_reset(&p->link_check_ch,
1746 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1747 	    link_check_callout, p);
1748 
1749 done:
1750 	if (may_sleep) {
1751 		ADAPTER_LOCK(sc);
1752 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1753 		CLR_BUSY(sc);
1754 	}
1755 	if (gave_up_lock)
1756 		wakeup_one(&sc->flags);
1757 	ADAPTER_UNLOCK(sc);
1758 	return (rc);
1759 }
1760 
1761 static int
1762 cxgb_uninit_locked(struct port_info *p)
1763 {
1764 	struct adapter *sc = p->adapter;
1765 	int rc;
1766 
1767 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1768 
1769 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1770 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1771 			rc = EINTR;
1772 			goto done;
1773 		}
1774 	}
1775 	if (IS_DOOMED(p)) {
1776 		rc = ENXIO;
1777 		goto done;
1778 	}
1779 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1780 	SET_BUSY(sc);
1781 	ADAPTER_UNLOCK(sc);
1782 
1783 	rc = cxgb_uninit_synchronized(p);
1784 
1785 	ADAPTER_LOCK(sc);
1786 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1787 	CLR_BUSY(sc);
1788 	wakeup_one(&sc->flags);
1789 done:
1790 	ADAPTER_UNLOCK(sc);
1791 	return (rc);
1792 }
1793 
1794 /*
1795  * Called on "ifconfig down", and from port_detach
1796  */
1797 static int
1798 cxgb_uninit_synchronized(struct port_info *pi)
1799 {
1800 	struct adapter *sc = pi->adapter;
1801 	struct ifnet *ifp = pi->ifp;
1802 
1803 	/*
1804 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1805 	 */
1806 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1807 
1808 	/*
1809 	 * Clear this port's bit from the open device map, and then drain all
1810 	 * the tasks that can access/manipulate this port's port_info or ifp.
1811 	 * We disable this port's interrupts here and so the slow/ext
1812 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1813 	 * be enqueued every second but the runs after this drain will not see
1814 	 * this port in the open device map.
1815 	 *
1816 	 * A well behaved task must take open_device_map into account and ignore
1817 	 * ports that are not open.
1818 	 */
1819 	clrbit(&sc->open_device_map, pi->port_id);
1820 	t3_port_intr_disable(sc, pi->port_id);
1821 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1822 	taskqueue_drain(sc->tq, &sc->tick_task);
1823 
1824 	callout_drain(&pi->link_check_ch);
1825 	taskqueue_drain(sc->tq, &pi->link_check_task);
1826 
1827 	PORT_LOCK(pi);
1828 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1829 
1830 	/* disable pause frames */
1831 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1832 
1833 	/* Reset RX FIFO HWM */
1834 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1835 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1836 
1837 	DELAY(100 * 1000);
1838 
1839 	/* Wait for TXFIFO empty */
1840 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1841 			F_TXFIFO_EMPTY, 1, 20, 5);
1842 
1843 	DELAY(100 * 1000);
1844 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1845 
1846 	pi->phy.ops->power_down(&pi->phy, 1);
1847 
1848 	PORT_UNLOCK(pi);
1849 
1850 	pi->link_config.link_ok = 0;
1851 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1852 
1853 	if (sc->open_device_map == 0)
1854 		cxgb_down(pi->adapter);
1855 
1856 	return (0);
1857 }
1858 
1859 /*
1860  * Mark lro enabled or disabled in all qsets for this port
1861  */
1862 static int
1863 cxgb_set_lro(struct port_info *p, int enabled)
1864 {
1865 	int i;
1866 	struct adapter *adp = p->adapter;
1867 	struct sge_qset *q;
1868 
1869 	for (i = 0; i < p->nqsets; i++) {
1870 		q = &adp->sge.qs[p->first_qset + i];
1871 		q->lro.enabled = (enabled != 0);
1872 	}
1873 	return (0);
1874 }
1875 
1876 static int
1877 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1878 {
1879 	struct port_info *p = ifp->if_softc;
1880 	struct adapter *sc = p->adapter;
1881 	struct ifreq *ifr = (struct ifreq *)data;
1882 	int flags, error = 0, mtu;
1883 	uint32_t mask;
1884 
1885 	switch (command) {
1886 	case SIOCSIFMTU:
1887 		ADAPTER_LOCK(sc);
1888 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1889 		if (error) {
1890 fail:
1891 			ADAPTER_UNLOCK(sc);
1892 			return (error);
1893 		}
1894 
1895 		mtu = ifr->ifr_mtu;
1896 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1897 			error = EINVAL;
1898 		} else {
1899 			ifp->if_mtu = mtu;
1900 			PORT_LOCK(p);
1901 			cxgb_update_mac_settings(p);
1902 			PORT_UNLOCK(p);
1903 		}
1904 		ADAPTER_UNLOCK(sc);
1905 		break;
1906 	case SIOCSIFFLAGS:
1907 		ADAPTER_LOCK(sc);
1908 		if (IS_DOOMED(p)) {
1909 			error = ENXIO;
1910 			goto fail;
1911 		}
1912 		if (ifp->if_flags & IFF_UP) {
1913 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1914 				flags = p->if_flags;
1915 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1916 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1917 					if (IS_BUSY(sc)) {
1918 						error = EBUSY;
1919 						goto fail;
1920 					}
1921 					PORT_LOCK(p);
1922 					cxgb_update_mac_settings(p);
1923 					PORT_UNLOCK(p);
1924 				}
1925 				ADAPTER_UNLOCK(sc);
1926 			} else
1927 				error = cxgb_init_locked(p);
1928 			p->if_flags = ifp->if_flags;
1929 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1930 			error = cxgb_uninit_locked(p);
1931 		else
1932 			ADAPTER_UNLOCK(sc);
1933 
1934 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1935 		break;
1936 	case SIOCADDMULTI:
1937 	case SIOCDELMULTI:
1938 		ADAPTER_LOCK(sc);
1939 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1940 		if (error)
1941 			goto fail;
1942 
1943 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1944 			PORT_LOCK(p);
1945 			cxgb_update_mac_settings(p);
1946 			PORT_UNLOCK(p);
1947 		}
1948 		ADAPTER_UNLOCK(sc);
1949 
1950 		break;
1951 	case SIOCSIFCAP:
1952 		ADAPTER_LOCK(sc);
1953 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1954 		if (error)
1955 			goto fail;
1956 
1957 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1958 		if (mask & IFCAP_TXCSUM) {
1959 			ifp->if_capenable ^= IFCAP_TXCSUM;
1960 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1961 
1962 			if (IFCAP_TSO4 & ifp->if_capenable &&
1963 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1964 				ifp->if_capenable &= ~IFCAP_TSO4;
1965 				if_printf(ifp,
1966 				    "tso4 disabled due to -txcsum.\n");
1967 			}
1968 		}
1969 		if (mask & IFCAP_TXCSUM_IPV6) {
1970 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1971 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1972 
1973 			if (IFCAP_TSO6 & ifp->if_capenable &&
1974 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1975 				ifp->if_capenable &= ~IFCAP_TSO6;
1976 				if_printf(ifp,
1977 				    "tso6 disabled due to -txcsum6.\n");
1978 			}
1979 		}
1980 		if (mask & IFCAP_RXCSUM)
1981 			ifp->if_capenable ^= IFCAP_RXCSUM;
1982 		if (mask & IFCAP_RXCSUM_IPV6)
1983 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1984 
1985 		/*
1986 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1987 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1988 		 * sending a TSO request our way, so it's sufficient to toggle
1989 		 * IFCAP_TSOx only.
1990 		 */
1991 		if (mask & IFCAP_TSO4) {
1992 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1993 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1994 				if_printf(ifp, "enable txcsum first.\n");
1995 				error = EAGAIN;
1996 				goto fail;
1997 			}
1998 			ifp->if_capenable ^= IFCAP_TSO4;
1999 		}
2000 		if (mask & IFCAP_TSO6) {
2001 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2002 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2003 				if_printf(ifp, "enable txcsum6 first.\n");
2004 				error = EAGAIN;
2005 				goto fail;
2006 			}
2007 			ifp->if_capenable ^= IFCAP_TSO6;
2008 		}
2009 		if (mask & IFCAP_LRO) {
2010 			ifp->if_capenable ^= IFCAP_LRO;
2011 
2012 			/* Safe to do this even if cxgb_up not called yet */
2013 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2014 		}
2015 #ifdef TCP_OFFLOAD
2016 		if (mask & IFCAP_TOE4) {
2017 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2018 
2019 			error = toe_capability(p, enable);
2020 			if (error == 0)
2021 				ifp->if_capenable ^= mask;
2022 		}
2023 #endif
2024 		if (mask & IFCAP_VLAN_HWTAGGING) {
2025 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2026 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2027 				PORT_LOCK(p);
2028 				cxgb_update_mac_settings(p);
2029 				PORT_UNLOCK(p);
2030 			}
2031 		}
2032 		if (mask & IFCAP_VLAN_MTU) {
2033 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2034 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2035 				PORT_LOCK(p);
2036 				cxgb_update_mac_settings(p);
2037 				PORT_UNLOCK(p);
2038 			}
2039 		}
2040 		if (mask & IFCAP_VLAN_HWTSO)
2041 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2042 		if (mask & IFCAP_VLAN_HWCSUM)
2043 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2044 
2045 #ifdef VLAN_CAPABILITIES
2046 		VLAN_CAPABILITIES(ifp);
2047 #endif
2048 		ADAPTER_UNLOCK(sc);
2049 		break;
2050 	case SIOCSIFMEDIA:
2051 	case SIOCGIFMEDIA:
2052 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2053 		break;
2054 	default:
2055 		error = ether_ioctl(ifp, command, data);
2056 	}
2057 
2058 	return (error);
2059 }
2060 
2061 static int
2062 cxgb_media_change(struct ifnet *ifp)
2063 {
2064 	return (EOPNOTSUPP);
2065 }
2066 
2067 /*
2068  * Translates phy->modtype to the correct Ethernet media subtype.
2069  */
2070 static int
2071 cxgb_ifm_type(int mod)
2072 {
2073 	switch (mod) {
2074 	case phy_modtype_sr:
2075 		return (IFM_10G_SR);
2076 	case phy_modtype_lr:
2077 		return (IFM_10G_LR);
2078 	case phy_modtype_lrm:
2079 		return (IFM_10G_LRM);
2080 	case phy_modtype_twinax:
2081 		return (IFM_10G_TWINAX);
2082 	case phy_modtype_twinax_long:
2083 		return (IFM_10G_TWINAX_LONG);
2084 	case phy_modtype_none:
2085 		return (IFM_NONE);
2086 	case phy_modtype_unknown:
2087 		return (IFM_UNKNOWN);
2088 	}
2089 
2090 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2091 	return (IFM_UNKNOWN);
2092 }
2093 
2094 /*
2095  * Rebuilds the ifmedia list for this port, and sets the current media.
2096  */
2097 static void
2098 cxgb_build_medialist(struct port_info *p)
2099 {
2100 	struct cphy *phy = &p->phy;
2101 	struct ifmedia *media = &p->media;
2102 	int mod = phy->modtype;
2103 	int m = IFM_ETHER | IFM_FDX;
2104 
2105 	PORT_LOCK(p);
2106 
2107 	ifmedia_removeall(media);
2108 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2109 		/* Copper (RJ45) */
2110 
2111 		if (phy->caps & SUPPORTED_10000baseT_Full)
2112 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2113 
2114 		if (phy->caps & SUPPORTED_1000baseT_Full)
2115 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2116 
2117 		if (phy->caps & SUPPORTED_100baseT_Full)
2118 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2119 
2120 		if (phy->caps & SUPPORTED_10baseT_Full)
2121 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2122 
2123 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2124 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2125 
2126 	} else if (phy->caps & SUPPORTED_TP) {
2127 		/* Copper (CX4) */
2128 
2129 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2130 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2131 
2132 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2133 		ifmedia_set(media, m | IFM_10G_CX4);
2134 
2135 	} else if (phy->caps & SUPPORTED_FIBRE &&
2136 		   phy->caps & SUPPORTED_10000baseT_Full) {
2137 		/* 10G optical (but includes SFP+ twinax) */
2138 
2139 		m |= cxgb_ifm_type(mod);
2140 		if (IFM_SUBTYPE(m) == IFM_NONE)
2141 			m &= ~IFM_FDX;
2142 
2143 		ifmedia_add(media, m, mod, NULL);
2144 		ifmedia_set(media, m);
2145 
2146 	} else if (phy->caps & SUPPORTED_FIBRE &&
2147 		   phy->caps & SUPPORTED_1000baseT_Full) {
2148 		/* 1G optical */
2149 
2150 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2151 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2152 		ifmedia_set(media, m | IFM_1000_SX);
2153 
2154 	} else {
2155 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2156 			    phy->caps));
2157 	}
2158 
2159 	PORT_UNLOCK(p);
2160 }
2161 
2162 static void
2163 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2164 {
2165 	struct port_info *p = ifp->if_softc;
2166 	struct ifmedia_entry *cur = p->media.ifm_cur;
2167 	int speed = p->link_config.speed;
2168 
2169 	if (cur->ifm_data != p->phy.modtype) {
2170 		cxgb_build_medialist(p);
2171 		cur = p->media.ifm_cur;
2172 	}
2173 
2174 	ifmr->ifm_status = IFM_AVALID;
2175 	if (!p->link_config.link_ok)
2176 		return;
2177 
2178 	ifmr->ifm_status |= IFM_ACTIVE;
2179 
2180 	/*
2181 	 * active and current will differ iff current media is autoselect.  That
2182 	 * can happen only for copper RJ45.
2183 	 */
2184 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2185 		return;
2186 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2187 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2188 
2189 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2190 	if (speed == SPEED_10000)
2191 		ifmr->ifm_active |= IFM_10G_T;
2192 	else if (speed == SPEED_1000)
2193 		ifmr->ifm_active |= IFM_1000_T;
2194 	else if (speed == SPEED_100)
2195 		ifmr->ifm_active |= IFM_100_TX;
2196 	else if (speed == SPEED_10)
2197 		ifmr->ifm_active |= IFM_10_T;
2198 	else
2199 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2200 			    speed));
2201 }
2202 
2203 static uint64_t
2204 cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2205 {
2206 	struct port_info *pi = ifp->if_softc;
2207 	struct adapter *sc = pi->adapter;
2208 	struct cmac *mac = &pi->mac;
2209 	struct mac_stats *mstats = &mac->stats;
2210 
2211 	cxgb_refresh_stats(pi);
2212 
2213 	switch (c) {
2214 	case IFCOUNTER_IPACKETS:
2215 		return (mstats->rx_frames);
2216 
2217 	case IFCOUNTER_IERRORS:
2218 		return (mstats->rx_jabber + mstats->rx_data_errs +
2219 		    mstats->rx_sequence_errs + mstats->rx_runt +
2220 		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2221 		    mstats->rx_short + mstats->rx_fcs_errs);
2222 
2223 	case IFCOUNTER_OPACKETS:
2224 		return (mstats->tx_frames);
2225 
2226 	case IFCOUNTER_OERRORS:
2227 		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2228 		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2229 		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2230 
2231 	case IFCOUNTER_COLLISIONS:
2232 		return (mstats->tx_total_collisions);
2233 
2234 	case IFCOUNTER_IBYTES:
2235 		return (mstats->rx_octets);
2236 
2237 	case IFCOUNTER_OBYTES:
2238 		return (mstats->tx_octets);
2239 
2240 	case IFCOUNTER_IMCASTS:
2241 		return (mstats->rx_mcast_frames);
2242 
2243 	case IFCOUNTER_OMCASTS:
2244 		return (mstats->tx_mcast_frames);
2245 
2246 	case IFCOUNTER_IQDROPS:
2247 		return (mstats->rx_cong_drops);
2248 
2249 	case IFCOUNTER_OQDROPS: {
2250 		int i;
2251 		uint64_t drops;
2252 
2253 		drops = 0;
2254 		if (sc->flags & FULL_INIT_DONE) {
2255 			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2256 				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2257 		}
2258 
2259 		return (drops);
2260 
2261 	}
2262 
2263 	default:
2264 		return (if_get_counter_default(ifp, c));
2265 	}
2266 }
2267 
2268 static void
2269 cxgb_async_intr(void *data)
2270 {
2271 	adapter_t *sc = data;
2272 
2273 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2274 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2275 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2276 }
2277 
2278 static void
2279 link_check_callout(void *arg)
2280 {
2281 	struct port_info *pi = arg;
2282 	struct adapter *sc = pi->adapter;
2283 
2284 	if (!isset(&sc->open_device_map, pi->port_id))
2285 		return;
2286 
2287 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2288 }
2289 
2290 static void
2291 check_link_status(void *arg, int pending)
2292 {
2293 	struct port_info *pi = arg;
2294 	struct adapter *sc = pi->adapter;
2295 
2296 	if (!isset(&sc->open_device_map, pi->port_id))
2297 		return;
2298 
2299 	t3_link_changed(sc, pi->port_id);
2300 
2301 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2302 	    pi->link_config.link_ok == 0)
2303 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2304 }
2305 
2306 void
2307 t3_os_link_intr(struct port_info *pi)
2308 {
2309 	/*
2310 	 * Schedule a link check in the near future.  If the link is flapping
2311 	 * rapidly we'll keep resetting the callout and delaying the check until
2312 	 * things stabilize a bit.
2313 	 */
2314 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2315 }
2316 
2317 static void
2318 check_t3b2_mac(struct adapter *sc)
2319 {
2320 	int i;
2321 
2322 	if (sc->flags & CXGB_SHUTDOWN)
2323 		return;
2324 
2325 	for_each_port(sc, i) {
2326 		struct port_info *p = &sc->port[i];
2327 		int status;
2328 #ifdef INVARIANTS
2329 		struct ifnet *ifp = p->ifp;
2330 #endif
2331 
2332 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2333 		    !p->link_config.link_ok)
2334 			continue;
2335 
2336 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2337 			("%s: state mismatch (drv_flags %x, device_map %x)",
2338 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2339 
2340 		PORT_LOCK(p);
2341 		status = t3b2_mac_watchdog_task(&p->mac);
2342 		if (status == 1)
2343 			p->mac.stats.num_toggled++;
2344 		else if (status == 2) {
2345 			struct cmac *mac = &p->mac;
2346 
2347 			cxgb_update_mac_settings(p);
2348 			t3_link_start(&p->phy, mac, &p->link_config);
2349 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2350 			t3_port_intr_enable(sc, p->port_id);
2351 			p->mac.stats.num_resets++;
2352 		}
2353 		PORT_UNLOCK(p);
2354 	}
2355 }
2356 
2357 static void
2358 cxgb_tick(void *arg)
2359 {
2360 	adapter_t *sc = (adapter_t *)arg;
2361 
2362 	if (sc->flags & CXGB_SHUTDOWN)
2363 		return;
2364 
2365 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2366 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2367 }
2368 
2369 void
2370 cxgb_refresh_stats(struct port_info *pi)
2371 {
2372 	struct timeval tv;
2373 	const struct timeval interval = {0, 250000};    /* 250ms */
2374 
2375 	getmicrotime(&tv);
2376 	timevalsub(&tv, &interval);
2377 	if (timevalcmp(&tv, &pi->last_refreshed, <))
2378 		return;
2379 
2380 	PORT_LOCK(pi);
2381 	t3_mac_update_stats(&pi->mac);
2382 	PORT_UNLOCK(pi);
2383 	getmicrotime(&pi->last_refreshed);
2384 }
2385 
2386 static void
2387 cxgb_tick_handler(void *arg, int count)
2388 {
2389 	adapter_t *sc = (adapter_t *)arg;
2390 	const struct adapter_params *p = &sc->params;
2391 	int i;
2392 	uint32_t cause, reset;
2393 
2394 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2395 		return;
2396 
2397 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2398 		check_t3b2_mac(sc);
2399 
2400 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2401 	if (cause) {
2402 		struct sge_qset *qs = &sc->sge.qs[0];
2403 		uint32_t mask, v;
2404 
2405 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2406 
2407 		mask = 1;
2408 		for (i = 0; i < SGE_QSETS; i++) {
2409 			if (v & mask)
2410 				qs[i].rspq.starved++;
2411 			mask <<= 1;
2412 		}
2413 
2414 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2415 
2416 		for (i = 0; i < SGE_QSETS * 2; i++) {
2417 			if (v & mask) {
2418 				qs[i / 2].fl[i % 2].empty++;
2419 			}
2420 			mask <<= 1;
2421 		}
2422 
2423 		/* clear */
2424 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2425 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2426 	}
2427 
2428 	for (i = 0; i < sc->params.nports; i++) {
2429 		struct port_info *pi = &sc->port[i];
2430 		struct cmac *mac = &pi->mac;
2431 
2432 		if (!isset(&sc->open_device_map, pi->port_id))
2433 			continue;
2434 
2435 		cxgb_refresh_stats(pi);
2436 
2437 		if (mac->multiport)
2438 			continue;
2439 
2440 		/* Count rx fifo overflows, once per second */
2441 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2442 		reset = 0;
2443 		if (cause & F_RXFIFO_OVERFLOW) {
2444 			mac->stats.rx_fifo_ovfl++;
2445 			reset |= F_RXFIFO_OVERFLOW;
2446 		}
2447 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2448 	}
2449 }
2450 
2451 static void
2452 touch_bars(device_t dev)
2453 {
2454 	/*
2455 	 * Don't enable yet
2456 	 */
2457 #if !defined(__LP64__) && 0
2458 	u32 v;
2459 
2460 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2461 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2462 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2463 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2464 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2465 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2466 #endif
2467 }
2468 
2469 static int
2470 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2471 {
2472 	uint8_t *buf;
2473 	int err = 0;
2474 	u32 aligned_offset, aligned_len, *p;
2475 	struct adapter *adapter = pi->adapter;
2476 
2477 
2478 	aligned_offset = offset & ~3;
2479 	aligned_len = (len + (offset & 3) + 3) & ~3;
2480 
2481 	if (aligned_offset != offset || aligned_len != len) {
2482 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2483 		if (!buf)
2484 			return (ENOMEM);
2485 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2486 		if (!err && aligned_len > 4)
2487 			err = t3_seeprom_read(adapter,
2488 					      aligned_offset + aligned_len - 4,
2489 					      (u32 *)&buf[aligned_len - 4]);
2490 		if (err)
2491 			goto out;
2492 		memcpy(buf + (offset & 3), data, len);
2493 	} else
2494 		buf = (uint8_t *)(uintptr_t)data;
2495 
2496 	err = t3_seeprom_wp(adapter, 0);
2497 	if (err)
2498 		goto out;
2499 
2500 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2501 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2502 		aligned_offset += 4;
2503 	}
2504 
2505 	if (!err)
2506 		err = t3_seeprom_wp(adapter, 1);
2507 out:
2508 	if (buf != data)
2509 		free(buf, M_DEVBUF);
2510 	return err;
2511 }
2512 
2513 
2514 static int
2515 in_range(int val, int lo, int hi)
2516 {
2517 	return val < 0 || (val <= hi && val >= lo);
2518 }
2519 
2520 static int
2521 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2522 {
2523        return (0);
2524 }
2525 
2526 static int
2527 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2528 {
2529        return (0);
2530 }
2531 
2532 static int
2533 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2534     int fflag, struct thread *td)
2535 {
2536 	int mmd, error = 0;
2537 	struct port_info *pi = dev->si_drv1;
2538 	adapter_t *sc = pi->adapter;
2539 
2540 #ifdef PRIV_SUPPORTED
2541 	if (priv_check(td, PRIV_DRIVER)) {
2542 		if (cxgb_debug)
2543 			printf("user does not have access to privileged ioctls\n");
2544 		return (EPERM);
2545 	}
2546 #else
2547 	if (suser(td)) {
2548 		if (cxgb_debug)
2549 			printf("user does not have access to privileged ioctls\n");
2550 		return (EPERM);
2551 	}
2552 #endif
2553 
2554 	switch (cmd) {
2555 	case CHELSIO_GET_MIIREG: {
2556 		uint32_t val;
2557 		struct cphy *phy = &pi->phy;
2558 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2559 
2560 		if (!phy->mdio_read)
2561 			return (EOPNOTSUPP);
2562 		if (is_10G(sc)) {
2563 			mmd = mid->phy_id >> 8;
2564 			if (!mmd)
2565 				mmd = MDIO_DEV_PCS;
2566 			else if (mmd > MDIO_DEV_VEND2)
2567 				return (EINVAL);
2568 
2569 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2570 					     mid->reg_num, &val);
2571 		} else
2572 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2573 					     mid->reg_num & 0x1f, &val);
2574 		if (error == 0)
2575 			mid->val_out = val;
2576 		break;
2577 	}
2578 	case CHELSIO_SET_MIIREG: {
2579 		struct cphy *phy = &pi->phy;
2580 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2581 
2582 		if (!phy->mdio_write)
2583 			return (EOPNOTSUPP);
2584 		if (is_10G(sc)) {
2585 			mmd = mid->phy_id >> 8;
2586 			if (!mmd)
2587 				mmd = MDIO_DEV_PCS;
2588 			else if (mmd > MDIO_DEV_VEND2)
2589 				return (EINVAL);
2590 
2591 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2592 					      mmd, mid->reg_num, mid->val_in);
2593 		} else
2594 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2595 					      mid->reg_num & 0x1f,
2596 					      mid->val_in);
2597 		break;
2598 	}
2599 	case CHELSIO_SETREG: {
2600 		struct ch_reg *edata = (struct ch_reg *)data;
2601 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2602 			return (EFAULT);
2603 		t3_write_reg(sc, edata->addr, edata->val);
2604 		break;
2605 	}
2606 	case CHELSIO_GETREG: {
2607 		struct ch_reg *edata = (struct ch_reg *)data;
2608 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2609 			return (EFAULT);
2610 		edata->val = t3_read_reg(sc, edata->addr);
2611 		break;
2612 	}
2613 	case CHELSIO_GET_SGE_CONTEXT: {
2614 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2615 		mtx_lock_spin(&sc->sge.reg_lock);
2616 		switch (ecntxt->cntxt_type) {
2617 		case CNTXT_TYPE_EGRESS:
2618 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2619 			    ecntxt->data);
2620 			break;
2621 		case CNTXT_TYPE_FL:
2622 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2623 			    ecntxt->data);
2624 			break;
2625 		case CNTXT_TYPE_RSP:
2626 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2627 			    ecntxt->data);
2628 			break;
2629 		case CNTXT_TYPE_CQ:
2630 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2631 			    ecntxt->data);
2632 			break;
2633 		default:
2634 			error = EINVAL;
2635 			break;
2636 		}
2637 		mtx_unlock_spin(&sc->sge.reg_lock);
2638 		break;
2639 	}
2640 	case CHELSIO_GET_SGE_DESC: {
2641 		struct ch_desc *edesc = (struct ch_desc *)data;
2642 		int ret;
2643 		if (edesc->queue_num >= SGE_QSETS * 6)
2644 			return (EINVAL);
2645 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2646 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2647 		if (ret < 0)
2648 			return (EINVAL);
2649 		edesc->size = ret;
2650 		break;
2651 	}
2652 	case CHELSIO_GET_QSET_PARAMS: {
2653 		struct qset_params *q;
2654 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2655 		int q1 = pi->first_qset;
2656 		int nqsets = pi->nqsets;
2657 		int i;
2658 
2659 		if (t->qset_idx >= nqsets)
2660 			return EINVAL;
2661 
2662 		i = q1 + t->qset_idx;
2663 		q = &sc->params.sge.qset[i];
2664 		t->rspq_size   = q->rspq_size;
2665 		t->txq_size[0] = q->txq_size[0];
2666 		t->txq_size[1] = q->txq_size[1];
2667 		t->txq_size[2] = q->txq_size[2];
2668 		t->fl_size[0]  = q->fl_size;
2669 		t->fl_size[1]  = q->jumbo_size;
2670 		t->polling     = q->polling;
2671 		t->lro         = q->lro;
2672 		t->intr_lat    = q->coalesce_usecs;
2673 		t->cong_thres  = q->cong_thres;
2674 		t->qnum        = i;
2675 
2676 		if ((sc->flags & FULL_INIT_DONE) == 0)
2677 			t->vector = 0;
2678 		else if (sc->flags & USING_MSIX)
2679 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2680 		else
2681 			t->vector = rman_get_start(sc->irq_res);
2682 
2683 		break;
2684 	}
2685 	case CHELSIO_GET_QSET_NUM: {
2686 		struct ch_reg *edata = (struct ch_reg *)data;
2687 		edata->val = pi->nqsets;
2688 		break;
2689 	}
2690 	case CHELSIO_LOAD_FW: {
2691 		uint8_t *fw_data;
2692 		uint32_t vers;
2693 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2694 
2695 		/*
2696 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2697 		 *
2698 		 * FW_UPTODATE is also set so the rest of the initialization
2699 		 * will not overwrite what was loaded here.  This gives you the
2700 		 * flexibility to load any firmware (and maybe shoot yourself in
2701 		 * the foot).
2702 		 */
2703 
2704 		ADAPTER_LOCK(sc);
2705 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2706 			ADAPTER_UNLOCK(sc);
2707 			return (EBUSY);
2708 		}
2709 
2710 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2711 		if (!fw_data)
2712 			error = ENOMEM;
2713 		else
2714 			error = copyin(t->buf, fw_data, t->len);
2715 
2716 		if (!error)
2717 			error = -t3_load_fw(sc, fw_data, t->len);
2718 
2719 		if (t3_get_fw_version(sc, &vers) == 0) {
2720 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2721 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2722 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2723 		}
2724 
2725 		if (!error)
2726 			sc->flags |= FW_UPTODATE;
2727 
2728 		free(fw_data, M_DEVBUF);
2729 		ADAPTER_UNLOCK(sc);
2730 		break;
2731 	}
2732 	case CHELSIO_LOAD_BOOT: {
2733 		uint8_t *boot_data;
2734 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2735 
2736 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2737 		if (!boot_data)
2738 			return ENOMEM;
2739 
2740 		error = copyin(t->buf, boot_data, t->len);
2741 		if (!error)
2742 			error = -t3_load_boot(sc, boot_data, t->len);
2743 
2744 		free(boot_data, M_DEVBUF);
2745 		break;
2746 	}
2747 	case CHELSIO_GET_PM: {
2748 		struct ch_pm *m = (struct ch_pm *)data;
2749 		struct tp_params *p = &sc->params.tp;
2750 
2751 		if (!is_offload(sc))
2752 			return (EOPNOTSUPP);
2753 
2754 		m->tx_pg_sz = p->tx_pg_size;
2755 		m->tx_num_pg = p->tx_num_pgs;
2756 		m->rx_pg_sz  = p->rx_pg_size;
2757 		m->rx_num_pg = p->rx_num_pgs;
2758 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2759 
2760 		break;
2761 	}
2762 	case CHELSIO_SET_PM: {
2763 		struct ch_pm *m = (struct ch_pm *)data;
2764 		struct tp_params *p = &sc->params.tp;
2765 
2766 		if (!is_offload(sc))
2767 			return (EOPNOTSUPP);
2768 		if (sc->flags & FULL_INIT_DONE)
2769 			return (EBUSY);
2770 
2771 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2772 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2773 			return (EINVAL);	/* not power of 2 */
2774 		if (!(m->rx_pg_sz & 0x14000))
2775 			return (EINVAL);	/* not 16KB or 64KB */
2776 		if (!(m->tx_pg_sz & 0x1554000))
2777 			return (EINVAL);
2778 		if (m->tx_num_pg == -1)
2779 			m->tx_num_pg = p->tx_num_pgs;
2780 		if (m->rx_num_pg == -1)
2781 			m->rx_num_pg = p->rx_num_pgs;
2782 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2783 			return (EINVAL);
2784 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2785 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2786 			return (EINVAL);
2787 
2788 		p->rx_pg_size = m->rx_pg_sz;
2789 		p->tx_pg_size = m->tx_pg_sz;
2790 		p->rx_num_pgs = m->rx_num_pg;
2791 		p->tx_num_pgs = m->tx_num_pg;
2792 		break;
2793 	}
2794 	case CHELSIO_SETMTUTAB: {
2795 		struct ch_mtus *m = (struct ch_mtus *)data;
2796 		int i;
2797 
2798 		if (!is_offload(sc))
2799 			return (EOPNOTSUPP);
2800 		if (offload_running(sc))
2801 			return (EBUSY);
2802 		if (m->nmtus != NMTUS)
2803 			return (EINVAL);
2804 		if (m->mtus[0] < 81)         /* accommodate SACK */
2805 			return (EINVAL);
2806 
2807 		/*
2808 		 * MTUs must be in ascending order
2809 		 */
2810 		for (i = 1; i < NMTUS; ++i)
2811 			if (m->mtus[i] < m->mtus[i - 1])
2812 				return (EINVAL);
2813 
2814 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2815 		break;
2816 	}
2817 	case CHELSIO_GETMTUTAB: {
2818 		struct ch_mtus *m = (struct ch_mtus *)data;
2819 
2820 		if (!is_offload(sc))
2821 			return (EOPNOTSUPP);
2822 
2823 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2824 		m->nmtus = NMTUS;
2825 		break;
2826 	}
2827 	case CHELSIO_GET_MEM: {
2828 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2829 		struct mc7 *mem;
2830 		uint8_t *useraddr;
2831 		u64 buf[32];
2832 
2833 		/*
2834 		 * Use these to avoid modifying len/addr in the return
2835 		 * struct
2836 		 */
2837 		uint32_t len = t->len, addr = t->addr;
2838 
2839 		if (!is_offload(sc))
2840 			return (EOPNOTSUPP);
2841 		if (!(sc->flags & FULL_INIT_DONE))
2842 			return (EIO);         /* need the memory controllers */
2843 		if ((addr & 0x7) || (len & 0x7))
2844 			return (EINVAL);
2845 		if (t->mem_id == MEM_CM)
2846 			mem = &sc->cm;
2847 		else if (t->mem_id == MEM_PMRX)
2848 			mem = &sc->pmrx;
2849 		else if (t->mem_id == MEM_PMTX)
2850 			mem = &sc->pmtx;
2851 		else
2852 			return (EINVAL);
2853 
2854 		/*
2855 		 * Version scheme:
2856 		 * bits 0..9: chip version
2857 		 * bits 10..15: chip revision
2858 		 */
2859 		t->version = 3 | (sc->params.rev << 10);
2860 
2861 		/*
2862 		 * Read 256 bytes at a time as len can be large and we don't
2863 		 * want to use huge intermediate buffers.
2864 		 */
2865 		useraddr = (uint8_t *)t->buf;
2866 		while (len) {
2867 			unsigned int chunk = min(len, sizeof(buf));
2868 
2869 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2870 			if (error)
2871 				return (-error);
2872 			if (copyout(buf, useraddr, chunk))
2873 				return (EFAULT);
2874 			useraddr += chunk;
2875 			addr += chunk;
2876 			len -= chunk;
2877 		}
2878 		break;
2879 	}
2880 	case CHELSIO_READ_TCAM_WORD: {
2881 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2882 
2883 		if (!is_offload(sc))
2884 			return (EOPNOTSUPP);
2885 		if (!(sc->flags & FULL_INIT_DONE))
2886 			return (EIO);         /* need MC5 */
2887 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2888 		break;
2889 	}
2890 	case CHELSIO_SET_TRACE_FILTER: {
2891 		struct ch_trace *t = (struct ch_trace *)data;
2892 		const struct trace_params *tp;
2893 
2894 		tp = (const struct trace_params *)&t->sip;
2895 		if (t->config_tx)
2896 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2897 					       t->trace_tx);
2898 		if (t->config_rx)
2899 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2900 					       t->trace_rx);
2901 		break;
2902 	}
2903 	case CHELSIO_SET_PKTSCHED: {
2904 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2905 		if (sc->open_device_map == 0)
2906 			return (EAGAIN);
2907 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2908 		    p->binding);
2909 		break;
2910 	}
2911 	case CHELSIO_IFCONF_GETREGS: {
2912 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2913 		int reglen = cxgb_get_regs_len();
2914 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2915 		if (buf == NULL) {
2916 			return (ENOMEM);
2917 		}
2918 		if (regs->len > reglen)
2919 			regs->len = reglen;
2920 		else if (regs->len < reglen)
2921 			error = ENOBUFS;
2922 
2923 		if (!error) {
2924 			cxgb_get_regs(sc, regs, buf);
2925 			error = copyout(buf, regs->data, reglen);
2926 		}
2927 		free(buf, M_DEVBUF);
2928 
2929 		break;
2930 	}
2931 	case CHELSIO_SET_HW_SCHED: {
2932 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2933 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2934 
2935 		if ((sc->flags & FULL_INIT_DONE) == 0)
2936 			return (EAGAIN);       /* need TP to be initialized */
2937 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2938 		    !in_range(t->channel, 0, 1) ||
2939 		    !in_range(t->kbps, 0, 10000000) ||
2940 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2941 		    !in_range(t->flow_ipg, 0,
2942 			      dack_ticks_to_usec(sc, 0x7ff)))
2943 			return (EINVAL);
2944 
2945 		if (t->kbps >= 0) {
2946 			error = t3_config_sched(sc, t->kbps, t->sched);
2947 			if (error < 0)
2948 				return (-error);
2949 		}
2950 		if (t->class_ipg >= 0)
2951 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2952 		if (t->flow_ipg >= 0) {
2953 			t->flow_ipg *= 1000;     /* us -> ns */
2954 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2955 		}
2956 		if (t->mode >= 0) {
2957 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2958 
2959 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2960 					 bit, t->mode ? bit : 0);
2961 		}
2962 		if (t->channel >= 0)
2963 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2964 					 1 << t->sched, t->channel << t->sched);
2965 		break;
2966 	}
2967 	case CHELSIO_GET_EEPROM: {
2968 		int i;
2969 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2970 		uint8_t *buf;
2971 
2972 		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2973 		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2974 			return (EINVAL);
2975 		}
2976 
2977 		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2978 		if (buf == NULL) {
2979 			return (ENOMEM);
2980 		}
2981 		e->magic = EEPROM_MAGIC;
2982 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2983 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2984 
2985 		if (!error)
2986 			error = copyout(buf + e->offset, e->data, e->len);
2987 
2988 		free(buf, M_DEVBUF);
2989 		break;
2990 	}
2991 	case CHELSIO_CLEAR_STATS: {
2992 		if (!(sc->flags & FULL_INIT_DONE))
2993 			return EAGAIN;
2994 
2995 		PORT_LOCK(pi);
2996 		t3_mac_update_stats(&pi->mac);
2997 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2998 		PORT_UNLOCK(pi);
2999 		break;
3000 	}
3001 	case CHELSIO_GET_UP_LA: {
3002 		struct ch_up_la *la = (struct ch_up_la *)data;
3003 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3004 		if (buf == NULL) {
3005 			return (ENOMEM);
3006 		}
3007 		if (la->bufsize < LA_BUFSIZE)
3008 			error = ENOBUFS;
3009 
3010 		if (!error)
3011 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3012 					      &la->bufsize, buf);
3013 		if (!error)
3014 			error = copyout(buf, la->data, la->bufsize);
3015 
3016 		free(buf, M_DEVBUF);
3017 		break;
3018 	}
3019 	case CHELSIO_GET_UP_IOQS: {
3020 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3021 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3022 		uint32_t *v;
3023 
3024 		if (buf == NULL) {
3025 			return (ENOMEM);
3026 		}
3027 		if (ioqs->bufsize < IOQS_BUFSIZE)
3028 			error = ENOBUFS;
3029 
3030 		if (!error)
3031 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3032 
3033 		if (!error) {
3034 			v = (uint32_t *)buf;
3035 
3036 			ioqs->ioq_rx_enable = *v++;
3037 			ioqs->ioq_tx_enable = *v++;
3038 			ioqs->ioq_rx_status = *v++;
3039 			ioqs->ioq_tx_status = *v++;
3040 
3041 			error = copyout(v, ioqs->data, ioqs->bufsize);
3042 		}
3043 
3044 		free(buf, M_DEVBUF);
3045 		break;
3046 	}
3047 	case CHELSIO_SET_FILTER: {
3048 		struct ch_filter *f = (struct ch_filter *)data;
3049 		struct filter_info *p;
3050 		unsigned int nfilters = sc->params.mc5.nfilters;
3051 
3052 		if (!is_offload(sc))
3053 			return (EOPNOTSUPP);	/* No TCAM */
3054 		if (!(sc->flags & FULL_INIT_DONE))
3055 			return (EAGAIN);	/* mc5 not setup yet */
3056 		if (nfilters == 0)
3057 			return (EBUSY);		/* TOE will use TCAM */
3058 
3059 		/* sanity checks */
3060 		if (f->filter_id >= nfilters ||
3061 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3062 		    (f->val.sport && f->mask.sport != 0xffff) ||
3063 		    (f->val.dport && f->mask.dport != 0xffff) ||
3064 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3065 		    (f->val.vlan_prio &&
3066 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3067 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3068 		    f->qset >= SGE_QSETS ||
3069 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3070 			return (EINVAL);
3071 
3072 		/* Was allocated with M_WAITOK */
3073 		KASSERT(sc->filters, ("filter table NULL\n"));
3074 
3075 		p = &sc->filters[f->filter_id];
3076 		if (p->locked)
3077 			return (EPERM);
3078 
3079 		bzero(p, sizeof(*p));
3080 		p->sip = f->val.sip;
3081 		p->sip_mask = f->mask.sip;
3082 		p->dip = f->val.dip;
3083 		p->sport = f->val.sport;
3084 		p->dport = f->val.dport;
3085 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3086 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3087 		    FILTER_NO_VLAN_PRI;
3088 		p->mac_hit = f->mac_hit;
3089 		p->mac_vld = f->mac_addr_idx != 0xffff;
3090 		p->mac_idx = f->mac_addr_idx;
3091 		p->pkt_type = f->proto;
3092 		p->report_filter_id = f->want_filter_id;
3093 		p->pass = f->pass;
3094 		p->rss = f->rss;
3095 		p->qset = f->qset;
3096 
3097 		error = set_filter(sc, f->filter_id, p);
3098 		if (error == 0)
3099 			p->valid = 1;
3100 		break;
3101 	}
3102 	case CHELSIO_DEL_FILTER: {
3103 		struct ch_filter *f = (struct ch_filter *)data;
3104 		struct filter_info *p;
3105 		unsigned int nfilters = sc->params.mc5.nfilters;
3106 
3107 		if (!is_offload(sc))
3108 			return (EOPNOTSUPP);
3109 		if (!(sc->flags & FULL_INIT_DONE))
3110 			return (EAGAIN);
3111 		if (nfilters == 0 || sc->filters == NULL)
3112 			return (EINVAL);
3113 		if (f->filter_id >= nfilters)
3114 		       return (EINVAL);
3115 
3116 		p = &sc->filters[f->filter_id];
3117 		if (p->locked)
3118 			return (EPERM);
3119 		if (!p->valid)
3120 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3121 
3122 		bzero(p, sizeof(*p));
3123 		p->sip = p->sip_mask = 0xffffffff;
3124 		p->vlan = 0xfff;
3125 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3126 		p->pkt_type = 1;
3127 		error = set_filter(sc, f->filter_id, p);
3128 		break;
3129 	}
3130 	case CHELSIO_GET_FILTER: {
3131 		struct ch_filter *f = (struct ch_filter *)data;
3132 		struct filter_info *p;
3133 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3134 
3135 		if (!is_offload(sc))
3136 			return (EOPNOTSUPP);
3137 		if (!(sc->flags & FULL_INIT_DONE))
3138 			return (EAGAIN);
3139 		if (nfilters == 0 || sc->filters == NULL)
3140 			return (EINVAL);
3141 
3142 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3143 		for (; i < nfilters; i++) {
3144 			p = &sc->filters[i];
3145 			if (!p->valid)
3146 				continue;
3147 
3148 			bzero(f, sizeof(*f));
3149 
3150 			f->filter_id = i;
3151 			f->val.sip = p->sip;
3152 			f->mask.sip = p->sip_mask;
3153 			f->val.dip = p->dip;
3154 			f->mask.dip = p->dip ? 0xffffffff : 0;
3155 			f->val.sport = p->sport;
3156 			f->mask.sport = p->sport ? 0xffff : 0;
3157 			f->val.dport = p->dport;
3158 			f->mask.dport = p->dport ? 0xffff : 0;
3159 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3160 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3161 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3162 			    0 : p->vlan_prio;
3163 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3164 			    0 : FILTER_NO_VLAN_PRI;
3165 			f->mac_hit = p->mac_hit;
3166 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3167 			f->proto = p->pkt_type;
3168 			f->want_filter_id = p->report_filter_id;
3169 			f->pass = p->pass;
3170 			f->rss = p->rss;
3171 			f->qset = p->qset;
3172 
3173 			break;
3174 		}
3175 
3176 		if (i == nfilters)
3177 			f->filter_id = 0xffffffff;
3178 		break;
3179 	}
3180 	default:
3181 		return (EOPNOTSUPP);
3182 		break;
3183 	}
3184 
3185 	return (error);
3186 }
3187 
3188 static __inline void
3189 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3190     unsigned int end)
3191 {
3192 	uint32_t *p = (uint32_t *)(buf + start);
3193 
3194 	for ( ; start <= end; start += sizeof(uint32_t))
3195 		*p++ = t3_read_reg(ap, start);
3196 }
3197 
3198 #define T3_REGMAP_SIZE (3 * 1024)
3199 static int
3200 cxgb_get_regs_len(void)
3201 {
3202 	return T3_REGMAP_SIZE;
3203 }
3204 
3205 static void
3206 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3207 {
3208 
3209 	/*
3210 	 * Version scheme:
3211 	 * bits 0..9: chip version
3212 	 * bits 10..15: chip revision
3213 	 * bit 31: set for PCIe cards
3214 	 */
3215 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3216 
3217 	/*
3218 	 * We skip the MAC statistics registers because they are clear-on-read.
3219 	 * Also reading multi-register stats would need to synchronize with the
3220 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3221 	 */
3222 	memset(buf, 0, cxgb_get_regs_len());
3223 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3224 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3225 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3226 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3227 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3228 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3229 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3230 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3231 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3232 }
3233 
3234 static int
3235 alloc_filters(struct adapter *sc)
3236 {
3237 	struct filter_info *p;
3238 	unsigned int nfilters = sc->params.mc5.nfilters;
3239 
3240 	if (nfilters == 0)
3241 		return (0);
3242 
3243 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3244 	sc->filters = p;
3245 
3246 	p = &sc->filters[nfilters - 1];
3247 	p->vlan = 0xfff;
3248 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3249 	p->pass = p->rss = p->valid = p->locked = 1;
3250 
3251 	return (0);
3252 }
3253 
3254 static int
3255 setup_hw_filters(struct adapter *sc)
3256 {
3257 	int i, rc;
3258 	unsigned int nfilters = sc->params.mc5.nfilters;
3259 
3260 	if (!sc->filters)
3261 		return (0);
3262 
3263 	t3_enable_filters(sc);
3264 
3265 	for (i = rc = 0; i < nfilters && !rc; i++) {
3266 		if (sc->filters[i].locked)
3267 			rc = set_filter(sc, i, &sc->filters[i]);
3268 	}
3269 
3270 	return (rc);
3271 }
3272 
3273 static int
3274 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3275 {
3276 	int len;
3277 	struct mbuf *m;
3278 	struct ulp_txpkt *txpkt;
3279 	struct work_request_hdr *wr;
3280 	struct cpl_pass_open_req *oreq;
3281 	struct cpl_set_tcb_field *sreq;
3282 
3283 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3284 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3285 
3286 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3287 	      sc->params.mc5.nfilters;
3288 
3289 	m = m_gethdr(M_WAITOK, MT_DATA);
3290 	m->m_len = m->m_pkthdr.len = len;
3291 	bzero(mtod(m, char *), len);
3292 
3293 	wr = mtod(m, struct work_request_hdr *);
3294 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3295 
3296 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3297 	txpkt = (struct ulp_txpkt *)oreq;
3298 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3299 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3300 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3301 	oreq->local_port = htons(f->dport);
3302 	oreq->peer_port = htons(f->sport);
3303 	oreq->local_ip = htonl(f->dip);
3304 	oreq->peer_ip = htonl(f->sip);
3305 	oreq->peer_netmask = htonl(f->sip_mask);
3306 	oreq->opt0h = 0;
3307 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3308 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3309 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3310 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3311 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3312 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3313 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3314 
3315 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3316 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3317 			  (f->report_filter_id << 15) | (1 << 23) |
3318 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3319 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3320 	t3_mgmt_tx(sc, m);
3321 
3322 	if (f->pass && !f->rss) {
3323 		len = sizeof(*sreq);
3324 		m = m_gethdr(M_WAITOK, MT_DATA);
3325 		m->m_len = m->m_pkthdr.len = len;
3326 		bzero(mtod(m, char *), len);
3327 		sreq = mtod(m, struct cpl_set_tcb_field *);
3328 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3329 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3330 				 (u64)sc->rrss_map[f->qset] << 19);
3331 		t3_mgmt_tx(sc, m);
3332 	}
3333 	return 0;
3334 }
3335 
3336 static inline void
3337 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3338     unsigned int word, u64 mask, u64 val)
3339 {
3340 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3341 	req->reply = V_NO_REPLY(1);
3342 	req->cpu_idx = 0;
3343 	req->word = htons(word);
3344 	req->mask = htobe64(mask);
3345 	req->val = htobe64(val);
3346 }
3347 
3348 static inline void
3349 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3350     unsigned int word, u64 mask, u64 val)
3351 {
3352 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3353 
3354 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3355 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3356 	mk_set_tcb_field(req, tid, word, mask, val);
3357 }
3358 
3359 void
3360 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3361 {
3362 	struct adapter *sc;
3363 
3364 	mtx_lock(&t3_list_lock);
3365 	SLIST_FOREACH(sc, &t3_list, link) {
3366 		/*
3367 		 * func should not make any assumptions about what state sc is
3368 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3369 		 */
3370 		func(sc, arg);
3371 	}
3372 	mtx_unlock(&t3_list_lock);
3373 }
3374 
3375 #ifdef TCP_OFFLOAD
3376 static int
3377 toe_capability(struct port_info *pi, int enable)
3378 {
3379 	int rc;
3380 	struct adapter *sc = pi->adapter;
3381 
3382 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3383 
3384 	if (!is_offload(sc))
3385 		return (ENODEV);
3386 
3387 	if (enable) {
3388 		if (!(sc->flags & FULL_INIT_DONE)) {
3389 			log(LOG_WARNING,
3390 			    "You must enable a cxgb interface first\n");
3391 			return (EAGAIN);
3392 		}
3393 
3394 		if (isset(&sc->offload_map, pi->port_id))
3395 			return (0);
3396 
3397 		if (!(sc->flags & TOM_INIT_DONE)) {
3398 			rc = t3_activate_uld(sc, ULD_TOM);
3399 			if (rc == EAGAIN) {
3400 				log(LOG_WARNING,
3401 				    "You must kldload t3_tom.ko before trying "
3402 				    "to enable TOE on a cxgb interface.\n");
3403 			}
3404 			if (rc != 0)
3405 				return (rc);
3406 			KASSERT(sc->tom_softc != NULL,
3407 			    ("%s: TOM activated but softc NULL", __func__));
3408 			KASSERT(sc->flags & TOM_INIT_DONE,
3409 			    ("%s: TOM activated but flag not set", __func__));
3410 		}
3411 
3412 		setbit(&sc->offload_map, pi->port_id);
3413 
3414 		/*
3415 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3416 		 * enabled on any port.  Need to figure out how to enable,
3417 		 * disable, load, and unload iWARP cleanly.
3418 		 */
3419 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3420 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3421 			setbit(&sc->offload_map, MAX_NPORTS);
3422 	} else {
3423 		if (!isset(&sc->offload_map, pi->port_id))
3424 			return (0);
3425 
3426 		KASSERT(sc->flags & TOM_INIT_DONE,
3427 		    ("%s: TOM never initialized?", __func__));
3428 		clrbit(&sc->offload_map, pi->port_id);
3429 	}
3430 
3431 	return (0);
3432 }
3433 
3434 /*
3435  * Add an upper layer driver to the global list.
3436  */
3437 int
3438 t3_register_uld(struct uld_info *ui)
3439 {
3440 	int rc = 0;
3441 	struct uld_info *u;
3442 
3443 	mtx_lock(&t3_uld_list_lock);
3444 	SLIST_FOREACH(u, &t3_uld_list, link) {
3445 	    if (u->uld_id == ui->uld_id) {
3446 		    rc = EEXIST;
3447 		    goto done;
3448 	    }
3449 	}
3450 
3451 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3452 	ui->refcount = 0;
3453 done:
3454 	mtx_unlock(&t3_uld_list_lock);
3455 	return (rc);
3456 }
3457 
3458 int
3459 t3_unregister_uld(struct uld_info *ui)
3460 {
3461 	int rc = EINVAL;
3462 	struct uld_info *u;
3463 
3464 	mtx_lock(&t3_uld_list_lock);
3465 
3466 	SLIST_FOREACH(u, &t3_uld_list, link) {
3467 	    if (u == ui) {
3468 		    if (ui->refcount > 0) {
3469 			    rc = EBUSY;
3470 			    goto done;
3471 		    }
3472 
3473 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3474 		    rc = 0;
3475 		    goto done;
3476 	    }
3477 	}
3478 done:
3479 	mtx_unlock(&t3_uld_list_lock);
3480 	return (rc);
3481 }
3482 
3483 int
3484 t3_activate_uld(struct adapter *sc, int id)
3485 {
3486 	int rc = EAGAIN;
3487 	struct uld_info *ui;
3488 
3489 	mtx_lock(&t3_uld_list_lock);
3490 
3491 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3492 		if (ui->uld_id == id) {
3493 			rc = ui->activate(sc);
3494 			if (rc == 0)
3495 				ui->refcount++;
3496 			goto done;
3497 		}
3498 	}
3499 done:
3500 	mtx_unlock(&t3_uld_list_lock);
3501 
3502 	return (rc);
3503 }
3504 
3505 int
3506 t3_deactivate_uld(struct adapter *sc, int id)
3507 {
3508 	int rc = EINVAL;
3509 	struct uld_info *ui;
3510 
3511 	mtx_lock(&t3_uld_list_lock);
3512 
3513 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3514 		if (ui->uld_id == id) {
3515 			rc = ui->deactivate(sc);
3516 			if (rc == 0)
3517 				ui->refcount--;
3518 			goto done;
3519 		}
3520 	}
3521 done:
3522 	mtx_unlock(&t3_uld_list_lock);
3523 
3524 	return (rc);
3525 }
3526 
3527 static int
3528 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3529     struct mbuf *m)
3530 {
3531 	m_freem(m);
3532 	return (EDOOFUS);
3533 }
3534 
3535 int
3536 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3537 {
3538 	uintptr_t *loc, new;
3539 
3540 	if (opcode >= NUM_CPL_HANDLERS)
3541 		return (EINVAL);
3542 
3543 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3544 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3545 	atomic_store_rel_ptr(loc, new);
3546 
3547 	return (0);
3548 }
3549 #endif
3550 
3551 static int
3552 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3553 {
3554 	int rc = 0;
3555 
3556 	switch (cmd) {
3557 	case MOD_LOAD:
3558 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3559 		SLIST_INIT(&t3_list);
3560 #ifdef TCP_OFFLOAD
3561 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3562 		SLIST_INIT(&t3_uld_list);
3563 #endif
3564 		break;
3565 
3566 	case MOD_UNLOAD:
3567 #ifdef TCP_OFFLOAD
3568 		mtx_lock(&t3_uld_list_lock);
3569 		if (!SLIST_EMPTY(&t3_uld_list)) {
3570 			rc = EBUSY;
3571 			mtx_unlock(&t3_uld_list_lock);
3572 			break;
3573 		}
3574 		mtx_unlock(&t3_uld_list_lock);
3575 		mtx_destroy(&t3_uld_list_lock);
3576 #endif
3577 		mtx_lock(&t3_list_lock);
3578 		if (!SLIST_EMPTY(&t3_list)) {
3579 			rc = EBUSY;
3580 			mtx_unlock(&t3_list_lock);
3581 			break;
3582 		}
3583 		mtx_unlock(&t3_list_lock);
3584 		mtx_destroy(&t3_list_lock);
3585 		break;
3586 	}
3587 
3588 	return (rc);
3589 }
3590 
3591 #ifdef NETDUMP
3592 static void
3593 cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3594 {
3595 	struct port_info *pi;
3596 	adapter_t *adap;
3597 
3598 	pi = if_getsoftc(ifp);
3599 	adap = pi->adapter;
3600 	ADAPTER_LOCK(adap);
3601 	*nrxr = SGE_QSETS;
3602 	*ncl = adap->sge.qs[0].fl[1].size;
3603 	*clsize = adap->sge.qs[0].fl[1].buf_size;
3604 	ADAPTER_UNLOCK(adap);
3605 }
3606 
3607 static void
3608 cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event)
3609 {
3610 	struct port_info *pi;
3611 	struct sge_qset *qs;
3612 	int i;
3613 
3614 	pi = if_getsoftc(ifp);
3615 	if (event == NETDUMP_START)
3616 		for (i = 0; i < SGE_QSETS; i++) {
3617 			qs = &pi->adapter->sge.qs[i];
3618 
3619 			/* Need to reinit after netdump_mbuf_dump(). */
3620 			qs->fl[0].zone = zone_pack;
3621 			qs->fl[1].zone = zone_clust;
3622 			qs->lro.enabled = 0;
3623 		}
3624 }
3625 
3626 static int
3627 cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m)
3628 {
3629 	struct port_info *pi;
3630 	struct sge_qset *qs;
3631 
3632 	pi = if_getsoftc(ifp);
3633 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3634 	    IFF_DRV_RUNNING)
3635 		return (ENOENT);
3636 
3637 	qs = &pi->adapter->sge.qs[pi->first_qset];
3638 	return (cxgb_netdump_encap(qs, &m));
3639 }
3640 
3641 static int
3642 cxgb_netdump_poll(struct ifnet *ifp, int count)
3643 {
3644 	struct port_info *pi;
3645 	adapter_t *adap;
3646 	int i;
3647 
3648 	pi = if_getsoftc(ifp);
3649 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3650 		return (ENOENT);
3651 
3652 	adap = pi->adapter;
3653 	for (i = 0; i < SGE_QSETS; i++)
3654 		(void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]);
3655 	(void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]);
3656 	return (0);
3657 }
3658 #endif /* NETDUMP */
3659