xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 9768746b)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_inet.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/module.h>
41 #include <sys/pciio.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/debugnet.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_var.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <dev/pci/pcireg.h>
80 #include <dev/pci/pcivar.h>
81 #include <dev/pci/pci_private.h>
82 
83 #include <cxgb_include.h>
84 
85 #ifdef PRIV_SUPPORTED
86 #include <sys/priv.h>
87 #endif
88 
89 static int cxgb_setup_interrupts(adapter_t *);
90 static void cxgb_teardown_interrupts(adapter_t *);
91 static void cxgb_init(void *);
92 static int cxgb_init_locked(struct port_info *);
93 static int cxgb_uninit_locked(struct port_info *);
94 static int cxgb_uninit_synchronized(struct port_info *);
95 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96 static int cxgb_media_change(struct ifnet *);
97 static int cxgb_ifm_type(int);
98 static void cxgb_build_medialist(struct port_info *);
99 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100 static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101 static int setup_sge_qsets(adapter_t *);
102 static void cxgb_async_intr(void *);
103 static void cxgb_tick_handler(void *, int);
104 static void cxgb_tick(void *);
105 static void link_check_callout(void *);
106 static void check_link_status(void *, int);
107 static void setup_rss(adapter_t *sc);
108 static int alloc_filters(struct adapter *);
109 static int setup_hw_filters(struct adapter *);
110 static int set_filter(struct adapter *, int, const struct filter_info *);
111 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114     unsigned int, u64, u64);
115 #ifdef TCP_OFFLOAD
116 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117 #endif
118 
119 /* Attachment glue for the PCI controller end of the device.  Each port of
120  * the device is attached separately, as defined later.
121  */
122 static int cxgb_controller_probe(device_t);
123 static int cxgb_controller_attach(device_t);
124 static int cxgb_controller_detach(device_t);
125 static void cxgb_free(struct adapter *);
126 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127     unsigned int end);
128 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129 static int cxgb_get_regs_len(void);
130 static void touch_bars(device_t dev);
131 static void cxgb_update_mac_settings(struct port_info *p);
132 #ifdef TCP_OFFLOAD
133 static int toe_capability(struct port_info *, int);
134 #endif
135 
136 /* Table for probing the cards.  The desc field isn't actually used */
137 struct cxgb_ident {
138 	uint16_t	vendor;
139 	uint16_t	device;
140 	int		index;
141 	char		*desc;
142 } cxgb_identifiers[] = {
143 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157 	{0, 0, 0, NULL}
158 };
159 
160 static device_method_t cxgb_controller_methods[] = {
161 	DEVMETHOD(device_probe,		cxgb_controller_probe),
162 	DEVMETHOD(device_attach,	cxgb_controller_attach),
163 	DEVMETHOD(device_detach,	cxgb_controller_detach),
164 
165 	DEVMETHOD_END
166 };
167 
168 static driver_t cxgb_controller_driver = {
169 	"cxgbc",
170 	cxgb_controller_methods,
171 	sizeof(struct adapter)
172 };
173 
174 static int cxgbc_mod_event(module_t, int, void *);
175 
176 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgbc_mod_event, NULL);
177 MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
178     nitems(cxgb_identifiers) - 1);
179 MODULE_VERSION(cxgbc, 1);
180 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
181 
182 /*
183  * Attachment glue for the ports.  Attachment is done directly to the
184  * controller device.
185  */
186 static int cxgb_port_probe(device_t);
187 static int cxgb_port_attach(device_t);
188 static int cxgb_port_detach(device_t);
189 
190 static device_method_t cxgb_port_methods[] = {
191 	DEVMETHOD(device_probe,		cxgb_port_probe),
192 	DEVMETHOD(device_attach,	cxgb_port_attach),
193 	DEVMETHOD(device_detach,	cxgb_port_detach),
194 	{ 0, 0 }
195 };
196 
197 static driver_t cxgb_port_driver = {
198 	"cxgb",
199 	cxgb_port_methods,
200 	0
201 };
202 
203 static d_ioctl_t cxgb_extension_ioctl;
204 static d_open_t cxgb_extension_open;
205 static d_close_t cxgb_extension_close;
206 
207 static struct cdevsw cxgb_cdevsw = {
208        .d_version =    D_VERSION,
209        .d_flags =      0,
210        .d_open =       cxgb_extension_open,
211        .d_close =      cxgb_extension_close,
212        .d_ioctl =      cxgb_extension_ioctl,
213        .d_name =       "cxgb",
214 };
215 
216 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, 0, 0);
217 MODULE_VERSION(cxgb, 1);
218 
219 DEBUGNET_DEFINE(cxgb);
220 
221 static struct mtx t3_list_lock;
222 static SLIST_HEAD(, adapter) t3_list;
223 #ifdef TCP_OFFLOAD
224 static struct mtx t3_uld_list_lock;
225 static SLIST_HEAD(, uld_info) t3_uld_list;
226 #endif
227 
228 /*
229  * The driver uses the best interrupt scheme available on a platform in the
230  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
231  * of these schemes the driver may consider as follows:
232  *
233  * msi = 2: choose from among all three options
234  * msi = 1 : only consider MSI and pin interrupts
235  * msi = 0: force pin interrupts
236  */
237 static int msi_allowed = 2;
238 
239 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
240     "CXGB driver parameters");
241 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
242     "MSI-X, MSI, INTx selector");
243 
244 /*
245  * The driver uses an auto-queue algorithm by default.
246  * To disable it and force a single queue-set per port, use multiq = 0
247  */
248 static int multiq = 1;
249 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
250     "use min(ncpus/ports, 8) queue-sets per port");
251 
252 /*
253  * By default the driver will not update the firmware unless
254  * it was compiled against a newer version
255  *
256  */
257 static int force_fw_update = 0;
258 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
259     "update firmware even if up to date");
260 
261 int cxgb_use_16k_clusters = -1;
262 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
263     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
264 
265 static int nfilters = -1;
266 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
267     &nfilters, 0, "max number of entries in the filter table");
268 
269 enum {
270 	MAX_TXQ_ENTRIES      = 16384,
271 	MAX_CTRL_TXQ_ENTRIES = 1024,
272 	MAX_RSPQ_ENTRIES     = 16384,
273 	MAX_RX_BUFFERS       = 16384,
274 	MAX_RX_JUMBO_BUFFERS = 16384,
275 	MIN_TXQ_ENTRIES      = 4,
276 	MIN_CTRL_TXQ_ENTRIES = 4,
277 	MIN_RSPQ_ENTRIES     = 32,
278 	MIN_FL_ENTRIES       = 32,
279 	MIN_FL_JUMBO_ENTRIES = 32
280 };
281 
282 struct filter_info {
283 	u32 sip;
284 	u32 sip_mask;
285 	u32 dip;
286 	u16 sport;
287 	u16 dport;
288 	u32 vlan:12;
289 	u32 vlan_prio:3;
290 	u32 mac_hit:1;
291 	u32 mac_idx:4;
292 	u32 mac_vld:1;
293 	u32 pkt_type:2;
294 	u32 report_filter_id:1;
295 	u32 pass:1;
296 	u32 rss:1;
297 	u32 qset:3;
298 	u32 locked:1;
299 	u32 valid:1;
300 };
301 
302 enum { FILTER_NO_VLAN_PRI = 7 };
303 
304 #define EEPROM_MAGIC 0x38E2F10C
305 
306 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
307 
308 
309 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
310 
311 
312 static __inline char
313 t3rev2char(struct adapter *adapter)
314 {
315 	char rev = 'z';
316 
317 	switch(adapter->params.rev) {
318 	case T3_REV_A:
319 		rev = 'a';
320 		break;
321 	case T3_REV_B:
322 	case T3_REV_B2:
323 		rev = 'b';
324 		break;
325 	case T3_REV_C:
326 		rev = 'c';
327 		break;
328 	}
329 	return rev;
330 }
331 
332 static struct cxgb_ident *
333 cxgb_get_ident(device_t dev)
334 {
335 	struct cxgb_ident *id;
336 
337 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
338 		if ((id->vendor == pci_get_vendor(dev)) &&
339 		    (id->device == pci_get_device(dev))) {
340 			return (id);
341 		}
342 	}
343 	return (NULL);
344 }
345 
346 static const struct adapter_info *
347 cxgb_get_adapter_info(device_t dev)
348 {
349 	struct cxgb_ident *id;
350 	const struct adapter_info *ai;
351 
352 	id = cxgb_get_ident(dev);
353 	if (id == NULL)
354 		return (NULL);
355 
356 	ai = t3_get_adapter_info(id->index);
357 
358 	return (ai);
359 }
360 
361 static int
362 cxgb_controller_probe(device_t dev)
363 {
364 	const struct adapter_info *ai;
365 	char *ports, buf[80];
366 	int nports;
367 
368 	ai = cxgb_get_adapter_info(dev);
369 	if (ai == NULL)
370 		return (ENXIO);
371 
372 	nports = ai->nports0 + ai->nports1;
373 	if (nports == 1)
374 		ports = "port";
375 	else
376 		ports = "ports";
377 
378 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
379 	device_set_desc_copy(dev, buf);
380 	return (BUS_PROBE_DEFAULT);
381 }
382 
383 #define FW_FNAME "cxgb_t3fw"
384 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
385 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
386 
387 static int
388 upgrade_fw(adapter_t *sc)
389 {
390 	const struct firmware *fw;
391 	int status;
392 	u32 vers;
393 
394 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
395 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
396 		return (ENOENT);
397 	} else
398 		device_printf(sc->dev, "installing firmware on card\n");
399 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
400 
401 	if (status != 0) {
402 		device_printf(sc->dev, "failed to install firmware: %d\n",
403 		    status);
404 	} else {
405 		t3_get_fw_version(sc, &vers);
406 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
407 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
408 		    G_FW_VERSION_MICRO(vers));
409 	}
410 
411 	firmware_put(fw, FIRMWARE_UNLOAD);
412 
413 	return (status);
414 }
415 
416 /*
417  * The cxgb_controller_attach function is responsible for the initial
418  * bringup of the device.  Its responsibilities include:
419  *
420  *  1. Determine if the device supports MSI or MSI-X.
421  *  2. Allocate bus resources so that we can access the Base Address Register
422  *  3. Create and initialize mutexes for the controller and its control
423  *     logic such as SGE and MDIO.
424  *  4. Call hardware specific setup routine for the adapter as a whole.
425  *  5. Allocate the BAR for doing MSI-X.
426  *  6. Setup the line interrupt iff MSI-X is not supported.
427  *  7. Create the driver's taskq.
428  *  8. Start one task queue service thread.
429  *  9. Check if the firmware and SRAM are up-to-date.  They will be
430  *     auto-updated later (before FULL_INIT_DONE), if required.
431  * 10. Create a child device for each MAC (port)
432  * 11. Initialize T3 private state.
433  * 12. Trigger the LED
434  * 13. Setup offload iff supported.
435  * 14. Reset/restart the tick callout.
436  * 15. Attach sysctls
437  *
438  * NOTE: Any modification or deviation from this list MUST be reflected in
439  * the above comment.  Failure to do so will result in problems on various
440  * error conditions including link flapping.
441  */
442 static int
443 cxgb_controller_attach(device_t dev)
444 {
445 	device_t child;
446 	const struct adapter_info *ai;
447 	struct adapter *sc;
448 	int i, error = 0;
449 	uint32_t vers;
450 	int port_qsets = 1;
451 	int msi_needed, reg;
452 	char buf[80];
453 
454 	sc = device_get_softc(dev);
455 	sc->dev = dev;
456 	sc->msi_count = 0;
457 	ai = cxgb_get_adapter_info(dev);
458 
459 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
460 	    device_get_unit(dev));
461 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
462 
463 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
464 	    device_get_unit(dev));
465 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
466 	    device_get_unit(dev));
467 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
468 	    device_get_unit(dev));
469 
470 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
471 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
472 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
473 
474 	mtx_lock(&t3_list_lock);
475 	SLIST_INSERT_HEAD(&t3_list, sc, link);
476 	mtx_unlock(&t3_list_lock);
477 
478 	/* find the PCIe link width and set max read request to 4KB*/
479 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
480 		uint16_t lnk;
481 
482 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
483 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
484 		if (sc->link_width < 8 &&
485 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
486 			device_printf(sc->dev,
487 			    "PCIe x%d Link, expect reduced performance\n",
488 			    sc->link_width);
489 		}
490 
491 		pci_set_max_read_req(dev, 4096);
492 	}
493 
494 	touch_bars(dev);
495 	pci_enable_busmaster(dev);
496 	/*
497 	 * Allocate the registers and make them available to the driver.
498 	 * The registers that we care about for NIC mode are in BAR 0
499 	 */
500 	sc->regs_rid = PCIR_BAR(0);
501 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
502 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
503 		device_printf(dev, "Cannot allocate BAR region 0\n");
504 		error = ENXIO;
505 		goto out;
506 	}
507 
508 	sc->bt = rman_get_bustag(sc->regs_res);
509 	sc->bh = rman_get_bushandle(sc->regs_res);
510 	sc->mmio_len = rman_get_size(sc->regs_res);
511 
512 	for (i = 0; i < MAX_NPORTS; i++)
513 		sc->port[i].adapter = sc;
514 
515 	if (t3_prep_adapter(sc, ai, 1) < 0) {
516 		printf("prep adapter failed\n");
517 		error = ENODEV;
518 		goto out;
519 	}
520 
521 	sc->udbs_rid = PCIR_BAR(2);
522 	sc->udbs_res = NULL;
523 	if (is_offload(sc) &&
524 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
525 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
526 		device_printf(dev, "Cannot allocate BAR region 1\n");
527 		error = ENXIO;
528 		goto out;
529 	}
530 
531         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
532 	 * enough messages for the queue sets.  If that fails, try falling
533 	 * back to MSI.  If that fails, then try falling back to the legacy
534 	 * interrupt pin model.
535 	 */
536 	sc->msix_regs_rid = 0x20;
537 	if ((msi_allowed >= 2) &&
538 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
539 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
540 
541 		if (multiq)
542 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
543 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
544 
545 		if (pci_msix_count(dev) == 0 ||
546 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
547 		    sc->msi_count != msi_needed) {
548 			device_printf(dev, "alloc msix failed - "
549 				      "msi_count=%d, msi_needed=%d, err=%d; "
550 				      "will try MSI\n", sc->msi_count,
551 				      msi_needed, error);
552 			sc->msi_count = 0;
553 			port_qsets = 1;
554 			pci_release_msi(dev);
555 			bus_release_resource(dev, SYS_RES_MEMORY,
556 			    sc->msix_regs_rid, sc->msix_regs_res);
557 			sc->msix_regs_res = NULL;
558 		} else {
559 			sc->flags |= USING_MSIX;
560 			sc->cxgb_intr = cxgb_async_intr;
561 			device_printf(dev,
562 				      "using MSI-X interrupts (%u vectors)\n",
563 				      sc->msi_count);
564 		}
565 	}
566 
567 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
568 		sc->msi_count = 1;
569 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
570 			device_printf(dev, "alloc msi failed - "
571 				      "err=%d; will try INTx\n", error);
572 			sc->msi_count = 0;
573 			port_qsets = 1;
574 			pci_release_msi(dev);
575 		} else {
576 			sc->flags |= USING_MSI;
577 			sc->cxgb_intr = t3_intr_msi;
578 			device_printf(dev, "using MSI interrupts\n");
579 		}
580 	}
581 	if (sc->msi_count == 0) {
582 		device_printf(dev, "using line interrupts\n");
583 		sc->cxgb_intr = t3b_intr;
584 	}
585 
586 	/* Create a private taskqueue thread for handling driver events */
587 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
588 	    taskqueue_thread_enqueue, &sc->tq);
589 	if (sc->tq == NULL) {
590 		device_printf(dev, "failed to allocate controller task queue\n");
591 		goto out;
592 	}
593 
594 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
595 	    device_get_nameunit(dev));
596 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
597 
598 
599 	/* Create a periodic callout for checking adapter status */
600 	callout_init(&sc->cxgb_tick_ch, 1);
601 
602 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
603 		/*
604 		 * Warn user that a firmware update will be attempted in init.
605 		 */
606 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
607 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
608 		sc->flags &= ~FW_UPTODATE;
609 	} else {
610 		sc->flags |= FW_UPTODATE;
611 	}
612 
613 	if (t3_check_tpsram_version(sc) < 0) {
614 		/*
615 		 * Warn user that a firmware update will be attempted in init.
616 		 */
617 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
618 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
619 		sc->flags &= ~TPS_UPTODATE;
620 	} else {
621 		sc->flags |= TPS_UPTODATE;
622 	}
623 
624 	/*
625 	 * Create a child device for each MAC.  The ethernet attachment
626 	 * will be done in these children.
627 	 */
628 	for (i = 0; i < (sc)->params.nports; i++) {
629 		struct port_info *pi;
630 
631 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
632 			device_printf(dev, "failed to add child port\n");
633 			error = EINVAL;
634 			goto out;
635 		}
636 		pi = &sc->port[i];
637 		pi->adapter = sc;
638 		pi->nqsets = port_qsets;
639 		pi->first_qset = i*port_qsets;
640 		pi->port_id = i;
641 		pi->tx_chan = i >= ai->nports0;
642 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
643 		sc->rxpkt_map[pi->txpkt_intf] = i;
644 		sc->port[i].tx_chan = i >= ai->nports0;
645 		sc->portdev[i] = child;
646 		device_set_softc(child, pi);
647 	}
648 	if ((error = bus_generic_attach(dev)) != 0)
649 		goto out;
650 
651 	/* initialize sge private state */
652 	t3_sge_init_adapter(sc);
653 
654 	t3_led_ready(sc);
655 
656 	error = t3_get_fw_version(sc, &vers);
657 	if (error)
658 		goto out;
659 
660 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
661 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
662 	    G_FW_VERSION_MICRO(vers));
663 
664 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
665 		 ai->desc, is_offload(sc) ? "R" : "",
666 		 sc->params.vpd.ec, sc->params.vpd.sn);
667 	device_set_desc_copy(dev, buf);
668 
669 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
670 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
671 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
672 
673 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
674 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
675 	t3_add_attach_sysctls(sc);
676 
677 #ifdef TCP_OFFLOAD
678 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
679 		sc->cpl_handler[i] = cpl_not_handled;
680 #endif
681 
682 	t3_intr_clear(sc);
683 	error = cxgb_setup_interrupts(sc);
684 out:
685 	if (error)
686 		cxgb_free(sc);
687 
688 	return (error);
689 }
690 
691 /*
692  * The cxgb_controller_detach routine is called with the device is
693  * unloaded from the system.
694  */
695 
696 static int
697 cxgb_controller_detach(device_t dev)
698 {
699 	struct adapter *sc;
700 
701 	sc = device_get_softc(dev);
702 
703 	cxgb_free(sc);
704 
705 	return (0);
706 }
707 
708 /*
709  * The cxgb_free() is called by the cxgb_controller_detach() routine
710  * to tear down the structures that were built up in
711  * cxgb_controller_attach(), and should be the final piece of work
712  * done when fully unloading the driver.
713  *
714  *
715  *  1. Shutting down the threads started by the cxgb_controller_attach()
716  *     routine.
717  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
718  *  3. Detaching all of the port devices created during the
719  *     cxgb_controller_attach() routine.
720  *  4. Removing the device children created via cxgb_controller_attach().
721  *  5. Releasing PCI resources associated with the device.
722  *  6. Turning off the offload support, iff it was turned on.
723  *  7. Destroying the mutexes created in cxgb_controller_attach().
724  *
725  */
726 static void
727 cxgb_free(struct adapter *sc)
728 {
729 	int i, nqsets = 0;
730 
731 	ADAPTER_LOCK(sc);
732 	sc->flags |= CXGB_SHUTDOWN;
733 	ADAPTER_UNLOCK(sc);
734 
735 	/*
736 	 * Make sure all child devices are gone.
737 	 */
738 	bus_generic_detach(sc->dev);
739 	for (i = 0; i < (sc)->params.nports; i++) {
740 		if (sc->portdev[i] &&
741 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
742 			device_printf(sc->dev, "failed to delete child port\n");
743 		nqsets += sc->port[i].nqsets;
744 	}
745 
746 	/*
747 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
748 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
749 	 * all open devices have been closed.
750 	 */
751 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
752 					   __func__, sc->open_device_map));
753 	for (i = 0; i < sc->params.nports; i++) {
754 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
755 						  __func__, i));
756 	}
757 
758 	/*
759 	 * Finish off the adapter's callouts.
760 	 */
761 	callout_drain(&sc->cxgb_tick_ch);
762 	callout_drain(&sc->sge_timer_ch);
763 
764 	/*
765 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
766 	 * sysctls are cleaned up by the kernel linker.
767 	 */
768 	if (sc->flags & FULL_INIT_DONE) {
769  		t3_free_sge_resources(sc, nqsets);
770  		sc->flags &= ~FULL_INIT_DONE;
771  	}
772 
773 	/*
774 	 * Release all interrupt resources.
775 	 */
776 	cxgb_teardown_interrupts(sc);
777 	if (sc->flags & (USING_MSI | USING_MSIX)) {
778 		device_printf(sc->dev, "releasing msi message(s)\n");
779 		pci_release_msi(sc->dev);
780 	} else {
781 		device_printf(sc->dev, "no msi message to release\n");
782 	}
783 
784 	if (sc->msix_regs_res != NULL) {
785 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
786 		    sc->msix_regs_res);
787 	}
788 
789 	/*
790 	 * Free the adapter's taskqueue.
791 	 */
792 	if (sc->tq != NULL) {
793 		taskqueue_free(sc->tq);
794 		sc->tq = NULL;
795 	}
796 
797 	free(sc->filters, M_DEVBUF);
798 	t3_sge_free(sc);
799 
800 	if (sc->udbs_res != NULL)
801 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
802 		    sc->udbs_res);
803 
804 	if (sc->regs_res != NULL)
805 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
806 		    sc->regs_res);
807 
808 	MTX_DESTROY(&sc->mdio_lock);
809 	MTX_DESTROY(&sc->sge.reg_lock);
810 	MTX_DESTROY(&sc->elmer_lock);
811 	mtx_lock(&t3_list_lock);
812 	SLIST_REMOVE(&t3_list, sc, adapter, link);
813 	mtx_unlock(&t3_list_lock);
814 	ADAPTER_LOCK_DEINIT(sc);
815 }
816 
817 /**
818  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
819  *	@sc: the controller softc
820  *
821  *	Determines how many sets of SGE queues to use and initializes them.
822  *	We support multiple queue sets per port if we have MSI-X, otherwise
823  *	just one queue set per port.
824  */
825 static int
826 setup_sge_qsets(adapter_t *sc)
827 {
828 	int i, j, err, irq_idx = 0, qset_idx = 0;
829 	u_int ntxq = SGE_TXQ_PER_SET;
830 
831 	if ((err = t3_sge_alloc(sc)) != 0) {
832 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
833 		return (err);
834 	}
835 
836 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
837 		irq_idx = -1;
838 
839 	for (i = 0; i < (sc)->params.nports; i++) {
840 		struct port_info *pi = &sc->port[i];
841 
842 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
843 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
844 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
845 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
846 			if (err) {
847 				t3_free_sge_resources(sc, qset_idx);
848 				device_printf(sc->dev,
849 				    "t3_sge_alloc_qset failed with %d\n", err);
850 				return (err);
851 			}
852 		}
853 	}
854 
855 	sc->nqsets = qset_idx;
856 
857 	return (0);
858 }
859 
860 static void
861 cxgb_teardown_interrupts(adapter_t *sc)
862 {
863 	int i;
864 
865 	for (i = 0; i < SGE_QSETS; i++) {
866 		if (sc->msix_intr_tag[i] == NULL) {
867 
868 			/* Should have been setup fully or not at all */
869 			KASSERT(sc->msix_irq_res[i] == NULL &&
870 				sc->msix_irq_rid[i] == 0,
871 				("%s: half-done interrupt (%d).", __func__, i));
872 
873 			continue;
874 		}
875 
876 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
877 				  sc->msix_intr_tag[i]);
878 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
879 				     sc->msix_irq_res[i]);
880 
881 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
882 		sc->msix_irq_rid[i] = 0;
883 	}
884 
885 	if (sc->intr_tag) {
886 		KASSERT(sc->irq_res != NULL,
887 			("%s: half-done interrupt.", __func__));
888 
889 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
890 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
891 				     sc->irq_res);
892 
893 		sc->irq_res = sc->intr_tag = NULL;
894 		sc->irq_rid = 0;
895 	}
896 }
897 
898 static int
899 cxgb_setup_interrupts(adapter_t *sc)
900 {
901 	struct resource *res;
902 	void *tag;
903 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
904 
905 	sc->irq_rid = intr_flag ? 1 : 0;
906 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
907 					     RF_SHAREABLE | RF_ACTIVE);
908 	if (sc->irq_res == NULL) {
909 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
910 			      intr_flag, sc->irq_rid);
911 		err = EINVAL;
912 		sc->irq_rid = 0;
913 	} else {
914 		err = bus_setup_intr(sc->dev, sc->irq_res,
915 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
916 		    sc->cxgb_intr, sc, &sc->intr_tag);
917 
918 		if (err) {
919 			device_printf(sc->dev,
920 				      "Cannot set up interrupt (%x, %u, %d)\n",
921 				      intr_flag, sc->irq_rid, err);
922 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
923 					     sc->irq_res);
924 			sc->irq_res = sc->intr_tag = NULL;
925 			sc->irq_rid = 0;
926 		}
927 	}
928 
929 	/* That's all for INTx or MSI */
930 	if (!(intr_flag & USING_MSIX) || err)
931 		return (err);
932 
933 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
934 	for (i = 0; i < sc->msi_count - 1; i++) {
935 		rid = i + 2;
936 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
937 					     RF_SHAREABLE | RF_ACTIVE);
938 		if (res == NULL) {
939 			device_printf(sc->dev, "Cannot allocate interrupt "
940 				      "for message %d\n", rid);
941 			err = EINVAL;
942 			break;
943 		}
944 
945 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
946 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
947 		if (err) {
948 			device_printf(sc->dev, "Cannot set up interrupt "
949 				      "for message %d (%d)\n", rid, err);
950 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
951 			break;
952 		}
953 
954 		sc->msix_irq_rid[i] = rid;
955 		sc->msix_irq_res[i] = res;
956 		sc->msix_intr_tag[i] = tag;
957 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
958 	}
959 
960 	if (err)
961 		cxgb_teardown_interrupts(sc);
962 
963 	return (err);
964 }
965 
966 
967 static int
968 cxgb_port_probe(device_t dev)
969 {
970 	struct port_info *p;
971 	char buf[80];
972 	const char *desc;
973 
974 	p = device_get_softc(dev);
975 	desc = p->phy.desc;
976 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
977 	device_set_desc_copy(dev, buf);
978 	return (0);
979 }
980 
981 
982 static int
983 cxgb_makedev(struct port_info *pi)
984 {
985 
986 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
987 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
988 
989 	if (pi->port_cdev == NULL)
990 		return (ENOMEM);
991 
992 	pi->port_cdev->si_drv1 = (void *)pi;
993 
994 	return (0);
995 }
996 
997 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
998     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
999     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
1000 #define CXGB_CAP_ENABLE CXGB_CAP
1001 
1002 static int
1003 cxgb_port_attach(device_t dev)
1004 {
1005 	struct port_info *p;
1006 	struct ifnet *ifp;
1007 	int err;
1008 	struct adapter *sc;
1009 
1010 	p = device_get_softc(dev);
1011 	sc = p->adapter;
1012 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1013 	    device_get_unit(device_get_parent(dev)), p->port_id);
1014 	PORT_LOCK_INIT(p, p->lockbuf);
1015 
1016 	callout_init(&p->link_check_ch, 1);
1017 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1018 
1019 	/* Allocate an ifnet object and set it up */
1020 	ifp = p->ifp = if_alloc(IFT_ETHER);
1021 	if (ifp == NULL) {
1022 		device_printf(dev, "Cannot allocate ifnet\n");
1023 		return (ENOMEM);
1024 	}
1025 
1026 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1027 	ifp->if_init = cxgb_init;
1028 	ifp->if_softc = p;
1029 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1030 	ifp->if_ioctl = cxgb_ioctl;
1031 	ifp->if_transmit = cxgb_transmit;
1032 	ifp->if_qflush = cxgb_qflush;
1033 	ifp->if_get_counter = cxgb_get_counter;
1034 
1035 	ifp->if_capabilities = CXGB_CAP;
1036 #ifdef TCP_OFFLOAD
1037 	if (is_offload(sc))
1038 		ifp->if_capabilities |= IFCAP_TOE4;
1039 #endif
1040 	ifp->if_capenable = CXGB_CAP_ENABLE;
1041 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1042 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1043 	ifp->if_hw_tsomax = IP_MAXPACKET;
1044 	ifp->if_hw_tsomaxsegcount = 36;
1045 	ifp->if_hw_tsomaxsegsize = 65536;
1046 
1047 	/*
1048 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1049 	 */
1050 	if (sc->params.nports > 2) {
1051 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1052 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1053 		ifp->if_hwassist &= ~CSUM_TSO;
1054 	}
1055 
1056 	ether_ifattach(ifp, p->hw_addr);
1057 
1058 	/* Attach driver debugnet methods. */
1059 	DEBUGNET_SET(ifp, cxgb);
1060 
1061 #ifdef DEFAULT_JUMBO
1062 	if (sc->params.nports <= 2)
1063 		ifp->if_mtu = ETHERMTU_JUMBO;
1064 #endif
1065 	if ((err = cxgb_makedev(p)) != 0) {
1066 		printf("makedev failed %d\n", err);
1067 		return (err);
1068 	}
1069 
1070 	/* Create a list of media supported by this port */
1071 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1072 	    cxgb_media_status);
1073 	cxgb_build_medialist(p);
1074 
1075 	t3_sge_init_port(p);
1076 
1077 	return (err);
1078 }
1079 
1080 /*
1081  * cxgb_port_detach() is called via the device_detach methods when
1082  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1083  * removing the device from the view of the kernel, i.e. from all
1084  * interfaces lists etc.  This routine is only called when the driver is
1085  * being unloaded, not when the link goes down.
1086  */
1087 static int
1088 cxgb_port_detach(device_t dev)
1089 {
1090 	struct port_info *p;
1091 	struct adapter *sc;
1092 	int i;
1093 
1094 	p = device_get_softc(dev);
1095 	sc = p->adapter;
1096 
1097 	/* Tell cxgb_ioctl and if_init that the port is going away */
1098 	ADAPTER_LOCK(sc);
1099 	SET_DOOMED(p);
1100 	wakeup(&sc->flags);
1101 	while (IS_BUSY(sc))
1102 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1103 	SET_BUSY(sc);
1104 	ADAPTER_UNLOCK(sc);
1105 
1106 	if (p->port_cdev != NULL)
1107 		destroy_dev(p->port_cdev);
1108 
1109 	cxgb_uninit_synchronized(p);
1110 	ether_ifdetach(p->ifp);
1111 
1112 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1113 		struct sge_qset *qs = &sc->sge.qs[i];
1114 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1115 
1116 		callout_drain(&txq->txq_watchdog);
1117 		callout_drain(&txq->txq_timer);
1118 	}
1119 
1120 	PORT_LOCK_DEINIT(p);
1121 	if_free(p->ifp);
1122 	p->ifp = NULL;
1123 
1124 	ADAPTER_LOCK(sc);
1125 	CLR_BUSY(sc);
1126 	wakeup_one(&sc->flags);
1127 	ADAPTER_UNLOCK(sc);
1128 	return (0);
1129 }
1130 
1131 void
1132 t3_fatal_err(struct adapter *sc)
1133 {
1134 	u_int fw_status[4];
1135 
1136 	if (sc->flags & FULL_INIT_DONE) {
1137 		t3_sge_stop(sc);
1138 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1139 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1140 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1141 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1142 		t3_intr_disable(sc);
1143 	}
1144 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1145 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1146 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1147 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1148 }
1149 
1150 int
1151 t3_os_find_pci_capability(adapter_t *sc, int cap)
1152 {
1153 	device_t dev;
1154 	struct pci_devinfo *dinfo;
1155 	pcicfgregs *cfg;
1156 	uint32_t status;
1157 	uint8_t ptr;
1158 
1159 	dev = sc->dev;
1160 	dinfo = device_get_ivars(dev);
1161 	cfg = &dinfo->cfg;
1162 
1163 	status = pci_read_config(dev, PCIR_STATUS, 2);
1164 	if (!(status & PCIM_STATUS_CAPPRESENT))
1165 		return (0);
1166 
1167 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1168 	case 0:
1169 	case 1:
1170 		ptr = PCIR_CAP_PTR;
1171 		break;
1172 	case 2:
1173 		ptr = PCIR_CAP_PTR_2;
1174 		break;
1175 	default:
1176 		return (0);
1177 		break;
1178 	}
1179 	ptr = pci_read_config(dev, ptr, 1);
1180 
1181 	while (ptr != 0) {
1182 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1183 			return (ptr);
1184 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1185 	}
1186 
1187 	return (0);
1188 }
1189 
1190 int
1191 t3_os_pci_save_state(struct adapter *sc)
1192 {
1193 	device_t dev;
1194 	struct pci_devinfo *dinfo;
1195 
1196 	dev = sc->dev;
1197 	dinfo = device_get_ivars(dev);
1198 
1199 	pci_cfg_save(dev, dinfo, 0);
1200 	return (0);
1201 }
1202 
1203 int
1204 t3_os_pci_restore_state(struct adapter *sc)
1205 {
1206 	device_t dev;
1207 	struct pci_devinfo *dinfo;
1208 
1209 	dev = sc->dev;
1210 	dinfo = device_get_ivars(dev);
1211 
1212 	pci_cfg_restore(dev, dinfo);
1213 	return (0);
1214 }
1215 
1216 /**
1217  *	t3_os_link_changed - handle link status changes
1218  *	@sc: the adapter associated with the link change
1219  *	@port_id: the port index whose link status has changed
1220  *	@link_status: the new status of the link
1221  *	@speed: the new speed setting
1222  *	@duplex: the new duplex setting
1223  *	@fc: the new flow-control setting
1224  *
1225  *	This is the OS-dependent handler for link status changes.  The OS
1226  *	neutral handler takes care of most of the processing for these events,
1227  *	then calls this handler for any OS-specific processing.
1228  */
1229 void
1230 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1231      int duplex, int fc, int mac_was_reset)
1232 {
1233 	struct port_info *pi = &adapter->port[port_id];
1234 	struct ifnet *ifp = pi->ifp;
1235 
1236 	/* no race with detach, so ifp should always be good */
1237 	KASSERT(ifp, ("%s: if detached.", __func__));
1238 
1239 	/* Reapply mac settings if they were lost due to a reset */
1240 	if (mac_was_reset) {
1241 		PORT_LOCK(pi);
1242 		cxgb_update_mac_settings(pi);
1243 		PORT_UNLOCK(pi);
1244 	}
1245 
1246 	if (link_status) {
1247 		ifp->if_baudrate = IF_Mbps(speed);
1248 		if_link_state_change(ifp, LINK_STATE_UP);
1249 	} else
1250 		if_link_state_change(ifp, LINK_STATE_DOWN);
1251 }
1252 
1253 /**
1254  *	t3_os_phymod_changed - handle PHY module changes
1255  *	@phy: the PHY reporting the module change
1256  *	@mod_type: new module type
1257  *
1258  *	This is the OS-dependent handler for PHY module changes.  It is
1259  *	invoked when a PHY module is removed or inserted for any OS-specific
1260  *	processing.
1261  */
1262 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1263 {
1264 	static const char *mod_str[] = {
1265 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1266 	};
1267 	struct port_info *pi = &adap->port[port_id];
1268 	int mod = pi->phy.modtype;
1269 
1270 	if (mod != pi->media.ifm_cur->ifm_data)
1271 		cxgb_build_medialist(pi);
1272 
1273 	if (mod == phy_modtype_none)
1274 		if_printf(pi->ifp, "PHY module unplugged\n");
1275 	else {
1276 		KASSERT(mod < ARRAY_SIZE(mod_str),
1277 			("invalid PHY module type %d", mod));
1278 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1279 	}
1280 }
1281 
1282 void
1283 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1284 {
1285 
1286 	/*
1287 	 * The ifnet might not be allocated before this gets called,
1288 	 * as this is called early on in attach by t3_prep_adapter
1289 	 * save the address off in the port structure
1290 	 */
1291 	if (cxgb_debug)
1292 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1293 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1294 }
1295 
1296 /*
1297  * Programs the XGMAC based on the settings in the ifnet.  These settings
1298  * include MTU, MAC address, mcast addresses, etc.
1299  */
1300 static void
1301 cxgb_update_mac_settings(struct port_info *p)
1302 {
1303 	struct ifnet *ifp = p->ifp;
1304 	struct t3_rx_mode rm;
1305 	struct cmac *mac = &p->mac;
1306 	int mtu, hwtagging;
1307 
1308 	PORT_LOCK_ASSERT_OWNED(p);
1309 
1310 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1311 
1312 	mtu = ifp->if_mtu;
1313 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1314 		mtu += ETHER_VLAN_ENCAP_LEN;
1315 
1316 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1317 
1318 	t3_mac_set_mtu(mac, mtu);
1319 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1320 	t3_mac_set_address(mac, 0, p->hw_addr);
1321 	t3_init_rx_mode(&rm, p);
1322 	t3_mac_set_rx_mode(mac, &rm);
1323 }
1324 
1325 
1326 static int
1327 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1328 			      unsigned long n)
1329 {
1330 	int attempts = 5;
1331 
1332 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1333 		if (!--attempts)
1334 			return (ETIMEDOUT);
1335 		t3_os_sleep(10);
1336 	}
1337 	return 0;
1338 }
1339 
1340 static int
1341 init_tp_parity(struct adapter *adap)
1342 {
1343 	int i;
1344 	struct mbuf *m;
1345 	struct cpl_set_tcb_field *greq;
1346 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1347 
1348 	t3_tp_set_offload_mode(adap, 1);
1349 
1350 	for (i = 0; i < 16; i++) {
1351 		struct cpl_smt_write_req *req;
1352 
1353 		m = m_gethdr(M_WAITOK, MT_DATA);
1354 		req = mtod(m, struct cpl_smt_write_req *);
1355 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1356 		memset(req, 0, sizeof(*req));
1357 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1359 		req->iff = i;
1360 		t3_mgmt_tx(adap, m);
1361 	}
1362 
1363 	for (i = 0; i < 2048; i++) {
1364 		struct cpl_l2t_write_req *req;
1365 
1366 		m = m_gethdr(M_WAITOK, MT_DATA);
1367 		req = mtod(m, struct cpl_l2t_write_req *);
1368 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1369 		memset(req, 0, sizeof(*req));
1370 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1372 		req->params = htonl(V_L2T_W_IDX(i));
1373 		t3_mgmt_tx(adap, m);
1374 	}
1375 
1376 	for (i = 0; i < 2048; i++) {
1377 		struct cpl_rte_write_req *req;
1378 
1379 		m = m_gethdr(M_WAITOK, MT_DATA);
1380 		req = mtod(m, struct cpl_rte_write_req *);
1381 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1382 		memset(req, 0, sizeof(*req));
1383 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1384 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1385 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1386 		t3_mgmt_tx(adap, m);
1387 	}
1388 
1389 	m = m_gethdr(M_WAITOK, MT_DATA);
1390 	greq = mtod(m, struct cpl_set_tcb_field *);
1391 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1392 	memset(greq, 0, sizeof(*greq));
1393 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1394 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1395 	greq->mask = htobe64(1);
1396 	t3_mgmt_tx(adap, m);
1397 
1398 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1399 	t3_tp_set_offload_mode(adap, 0);
1400 	return (i);
1401 }
1402 
1403 /**
1404  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1405  *	@adap: the adapter
1406  *
1407  *	Sets up RSS to distribute packets to multiple receive queues.  We
1408  *	configure the RSS CPU lookup table to distribute to the number of HW
1409  *	receive queues, and the response queue lookup table to narrow that
1410  *	down to the response queues actually configured for each port.
1411  *	We always configure the RSS mapping for two ports since the mapping
1412  *	table has plenty of entries.
1413  */
1414 static void
1415 setup_rss(adapter_t *adap)
1416 {
1417 	int i;
1418 	u_int nq[2];
1419 	uint8_t cpus[SGE_QSETS + 1];
1420 	uint16_t rspq_map[RSS_TABLE_SIZE];
1421 
1422 	for (i = 0; i < SGE_QSETS; ++i)
1423 		cpus[i] = i;
1424 	cpus[SGE_QSETS] = 0xff;
1425 
1426 	nq[0] = nq[1] = 0;
1427 	for_each_port(adap, i) {
1428 		const struct port_info *pi = adap2pinfo(adap, i);
1429 
1430 		nq[pi->tx_chan] += pi->nqsets;
1431 	}
1432 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1433 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1434 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1435 	}
1436 
1437 	/* Calculate the reverse RSS map table */
1438 	for (i = 0; i < SGE_QSETS; ++i)
1439 		adap->rrss_map[i] = 0xff;
1440 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1441 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1442 			adap->rrss_map[rspq_map[i]] = i;
1443 
1444 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1445 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1446 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1447 	              cpus, rspq_map);
1448 
1449 }
1450 static void
1451 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1452 			      int hi, int port)
1453 {
1454 	struct mbuf *m;
1455 	struct mngt_pktsched_wr *req;
1456 
1457 	m = m_gethdr(M_NOWAIT, MT_DATA);
1458 	if (m) {
1459 		req = mtod(m, struct mngt_pktsched_wr *);
1460 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1461 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1462 		req->sched = sched;
1463 		req->idx = qidx;
1464 		req->min = lo;
1465 		req->max = hi;
1466 		req->binding = port;
1467 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1468 		t3_mgmt_tx(adap, m);
1469 	}
1470 }
1471 
1472 static void
1473 bind_qsets(adapter_t *sc)
1474 {
1475 	int i, j;
1476 
1477 	for (i = 0; i < (sc)->params.nports; ++i) {
1478 		const struct port_info *pi = adap2pinfo(sc, i);
1479 
1480 		for (j = 0; j < pi->nqsets; ++j) {
1481 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1482 					  -1, pi->tx_chan);
1483 
1484 		}
1485 	}
1486 }
1487 
1488 static void
1489 update_tpeeprom(struct adapter *adap)
1490 {
1491 	const struct firmware *tpeeprom;
1492 
1493 	uint32_t version;
1494 	unsigned int major, minor;
1495 	int ret, len;
1496 	char rev, name[32];
1497 
1498 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1499 
1500 	major = G_TP_VERSION_MAJOR(version);
1501 	minor = G_TP_VERSION_MINOR(version);
1502 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1503 		return;
1504 
1505 	rev = t3rev2char(adap);
1506 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1507 
1508 	tpeeprom = firmware_get(name);
1509 	if (tpeeprom == NULL) {
1510 		device_printf(adap->dev,
1511 			      "could not load TP EEPROM: unable to load %s\n",
1512 			      name);
1513 		return;
1514 	}
1515 
1516 	len = tpeeprom->datasize - 4;
1517 
1518 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1519 	if (ret)
1520 		goto release_tpeeprom;
1521 
1522 	if (len != TP_SRAM_LEN) {
1523 		device_printf(adap->dev,
1524 			      "%s length is wrong len=%d expected=%d\n", name,
1525 			      len, TP_SRAM_LEN);
1526 		return;
1527 	}
1528 
1529 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1530 	    TP_SRAM_OFFSET);
1531 
1532 	if (!ret) {
1533 		device_printf(adap->dev,
1534 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1535 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1536 	} else
1537 		device_printf(adap->dev,
1538 			      "Protocol SRAM image update in EEPROM failed\n");
1539 
1540 release_tpeeprom:
1541 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1542 
1543 	return;
1544 }
1545 
1546 static int
1547 update_tpsram(struct adapter *adap)
1548 {
1549 	const struct firmware *tpsram;
1550 	int ret;
1551 	char rev, name[32];
1552 
1553 	rev = t3rev2char(adap);
1554 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1555 
1556 	update_tpeeprom(adap);
1557 
1558 	tpsram = firmware_get(name);
1559 	if (tpsram == NULL){
1560 		device_printf(adap->dev, "could not load TP SRAM\n");
1561 		return (EINVAL);
1562 	} else
1563 		device_printf(adap->dev, "updating TP SRAM\n");
1564 
1565 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1566 	if (ret)
1567 		goto release_tpsram;
1568 
1569 	ret = t3_set_proto_sram(adap, tpsram->data);
1570 	if (ret)
1571 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1572 
1573 release_tpsram:
1574 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1575 
1576 	return ret;
1577 }
1578 
1579 /**
1580  *	cxgb_up - enable the adapter
1581  *	@adap: adapter being enabled
1582  *
1583  *	Called when the first port is enabled, this function performs the
1584  *	actions necessary to make an adapter operational, such as completing
1585  *	the initialization of HW modules, and enabling interrupts.
1586  */
1587 static int
1588 cxgb_up(struct adapter *sc)
1589 {
1590 	int err = 0;
1591 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1592 
1593 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1594 					   __func__, sc->open_device_map));
1595 
1596 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1597 
1598 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1599 
1600 		if ((sc->flags & FW_UPTODATE) == 0)
1601 			if ((err = upgrade_fw(sc)))
1602 				goto out;
1603 
1604 		if ((sc->flags & TPS_UPTODATE) == 0)
1605 			if ((err = update_tpsram(sc)))
1606 				goto out;
1607 
1608 		if (is_offload(sc) && nfilters != 0) {
1609 			sc->params.mc5.nservers = 0;
1610 
1611 			if (nfilters < 0)
1612 				sc->params.mc5.nfilters = mxf;
1613 			else
1614 				sc->params.mc5.nfilters = min(nfilters, mxf);
1615 		}
1616 
1617 		err = t3_init_hw(sc, 0);
1618 		if (err)
1619 			goto out;
1620 
1621 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1622 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1623 
1624 		err = setup_sge_qsets(sc);
1625 		if (err)
1626 			goto out;
1627 
1628 		alloc_filters(sc);
1629 		setup_rss(sc);
1630 
1631 		t3_add_configured_sysctls(sc);
1632 		sc->flags |= FULL_INIT_DONE;
1633 	}
1634 
1635 	t3_intr_clear(sc);
1636 	t3_sge_start(sc);
1637 	t3_intr_enable(sc);
1638 
1639 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1640 	    is_offload(sc) && init_tp_parity(sc) == 0)
1641 		sc->flags |= TP_PARITY_INIT;
1642 
1643 	if (sc->flags & TP_PARITY_INIT) {
1644 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1645 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1646 	}
1647 
1648 	if (!(sc->flags & QUEUES_BOUND)) {
1649 		bind_qsets(sc);
1650 		setup_hw_filters(sc);
1651 		sc->flags |= QUEUES_BOUND;
1652 	}
1653 
1654 	t3_sge_reset_adapter(sc);
1655 out:
1656 	return (err);
1657 }
1658 
1659 /*
1660  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1661  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1662  * during controller_detach, not here.
1663  */
1664 static void
1665 cxgb_down(struct adapter *sc)
1666 {
1667 	t3_sge_stop(sc);
1668 	t3_intr_disable(sc);
1669 }
1670 
1671 /*
1672  * if_init for cxgb ports.
1673  */
1674 static void
1675 cxgb_init(void *arg)
1676 {
1677 	struct port_info *p = arg;
1678 	struct adapter *sc = p->adapter;
1679 
1680 	ADAPTER_LOCK(sc);
1681 	cxgb_init_locked(p); /* releases adapter lock */
1682 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1683 }
1684 
1685 static int
1686 cxgb_init_locked(struct port_info *p)
1687 {
1688 	struct adapter *sc = p->adapter;
1689 	struct ifnet *ifp = p->ifp;
1690 	struct cmac *mac = &p->mac;
1691 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1692 
1693 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1694 
1695 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1696 		gave_up_lock = 1;
1697 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1698 			rc = EINTR;
1699 			goto done;
1700 		}
1701 	}
1702 	if (IS_DOOMED(p)) {
1703 		rc = ENXIO;
1704 		goto done;
1705 	}
1706 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1707 
1708 	/*
1709 	 * The code that runs during one-time adapter initialization can sleep
1710 	 * so it's important not to hold any locks across it.
1711 	 */
1712 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1713 
1714 	if (may_sleep) {
1715 		SET_BUSY(sc);
1716 		gave_up_lock = 1;
1717 		ADAPTER_UNLOCK(sc);
1718 	}
1719 
1720 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1721 			goto done;
1722 
1723 	PORT_LOCK(p);
1724 	if (isset(&sc->open_device_map, p->port_id) &&
1725 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1726 		PORT_UNLOCK(p);
1727 		goto done;
1728 	}
1729 	t3_port_intr_enable(sc, p->port_id);
1730 	if (!mac->multiport)
1731 		t3_mac_init(mac);
1732 	cxgb_update_mac_settings(p);
1733 	t3_link_start(&p->phy, mac, &p->link_config);
1734 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1735 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1736 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1737 	PORT_UNLOCK(p);
1738 
1739 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1740 		struct sge_qset *qs = &sc->sge.qs[i];
1741 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1742 
1743 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1744 				 txq->txq_watchdog.c_cpu);
1745 	}
1746 
1747 	/* all ok */
1748 	setbit(&sc->open_device_map, p->port_id);
1749 	callout_reset(&p->link_check_ch,
1750 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1751 	    link_check_callout, p);
1752 
1753 done:
1754 	if (may_sleep) {
1755 		ADAPTER_LOCK(sc);
1756 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1757 		CLR_BUSY(sc);
1758 	}
1759 	if (gave_up_lock)
1760 		wakeup_one(&sc->flags);
1761 	ADAPTER_UNLOCK(sc);
1762 	return (rc);
1763 }
1764 
1765 static int
1766 cxgb_uninit_locked(struct port_info *p)
1767 {
1768 	struct adapter *sc = p->adapter;
1769 	int rc;
1770 
1771 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1772 
1773 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1774 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1775 			rc = EINTR;
1776 			goto done;
1777 		}
1778 	}
1779 	if (IS_DOOMED(p)) {
1780 		rc = ENXIO;
1781 		goto done;
1782 	}
1783 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1784 	SET_BUSY(sc);
1785 	ADAPTER_UNLOCK(sc);
1786 
1787 	rc = cxgb_uninit_synchronized(p);
1788 
1789 	ADAPTER_LOCK(sc);
1790 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1791 	CLR_BUSY(sc);
1792 	wakeup_one(&sc->flags);
1793 done:
1794 	ADAPTER_UNLOCK(sc);
1795 	return (rc);
1796 }
1797 
1798 /*
1799  * Called on "ifconfig down", and from port_detach
1800  */
1801 static int
1802 cxgb_uninit_synchronized(struct port_info *pi)
1803 {
1804 	struct adapter *sc = pi->adapter;
1805 	struct ifnet *ifp = pi->ifp;
1806 
1807 	/*
1808 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1809 	 */
1810 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1811 
1812 	/*
1813 	 * Clear this port's bit from the open device map, and then drain all
1814 	 * the tasks that can access/manipulate this port's port_info or ifp.
1815 	 * We disable this port's interrupts here and so the slow/ext
1816 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1817 	 * be enqueued every second but the runs after this drain will not see
1818 	 * this port in the open device map.
1819 	 *
1820 	 * A well behaved task must take open_device_map into account and ignore
1821 	 * ports that are not open.
1822 	 */
1823 	clrbit(&sc->open_device_map, pi->port_id);
1824 	t3_port_intr_disable(sc, pi->port_id);
1825 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1826 	taskqueue_drain(sc->tq, &sc->tick_task);
1827 
1828 	callout_drain(&pi->link_check_ch);
1829 	taskqueue_drain(sc->tq, &pi->link_check_task);
1830 
1831 	PORT_LOCK(pi);
1832 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1833 
1834 	/* disable pause frames */
1835 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1836 
1837 	/* Reset RX FIFO HWM */
1838 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1839 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1840 
1841 	DELAY(100 * 1000);
1842 
1843 	/* Wait for TXFIFO empty */
1844 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1845 			F_TXFIFO_EMPTY, 1, 20, 5);
1846 
1847 	DELAY(100 * 1000);
1848 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1849 
1850 	pi->phy.ops->power_down(&pi->phy, 1);
1851 
1852 	PORT_UNLOCK(pi);
1853 
1854 	pi->link_config.link_ok = 0;
1855 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1856 
1857 	if (sc->open_device_map == 0)
1858 		cxgb_down(pi->adapter);
1859 
1860 	return (0);
1861 }
1862 
1863 /*
1864  * Mark lro enabled or disabled in all qsets for this port
1865  */
1866 static int
1867 cxgb_set_lro(struct port_info *p, int enabled)
1868 {
1869 	int i;
1870 	struct adapter *adp = p->adapter;
1871 	struct sge_qset *q;
1872 
1873 	for (i = 0; i < p->nqsets; i++) {
1874 		q = &adp->sge.qs[p->first_qset + i];
1875 		q->lro.enabled = (enabled != 0);
1876 	}
1877 	return (0);
1878 }
1879 
1880 static int
1881 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1882 {
1883 	struct port_info *p = ifp->if_softc;
1884 	struct adapter *sc = p->adapter;
1885 	struct ifreq *ifr = (struct ifreq *)data;
1886 	int flags, error = 0, mtu;
1887 	uint32_t mask;
1888 
1889 	switch (command) {
1890 	case SIOCSIFMTU:
1891 		ADAPTER_LOCK(sc);
1892 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1893 		if (error) {
1894 fail:
1895 			ADAPTER_UNLOCK(sc);
1896 			return (error);
1897 		}
1898 
1899 		mtu = ifr->ifr_mtu;
1900 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1901 			error = EINVAL;
1902 		} else {
1903 			ifp->if_mtu = mtu;
1904 			PORT_LOCK(p);
1905 			cxgb_update_mac_settings(p);
1906 			PORT_UNLOCK(p);
1907 		}
1908 		ADAPTER_UNLOCK(sc);
1909 		break;
1910 	case SIOCSIFFLAGS:
1911 		ADAPTER_LOCK(sc);
1912 		if (IS_DOOMED(p)) {
1913 			error = ENXIO;
1914 			goto fail;
1915 		}
1916 		if (ifp->if_flags & IFF_UP) {
1917 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1918 				flags = p->if_flags;
1919 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1920 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1921 					if (IS_BUSY(sc)) {
1922 						error = EBUSY;
1923 						goto fail;
1924 					}
1925 					PORT_LOCK(p);
1926 					cxgb_update_mac_settings(p);
1927 					PORT_UNLOCK(p);
1928 				}
1929 				ADAPTER_UNLOCK(sc);
1930 			} else
1931 				error = cxgb_init_locked(p);
1932 			p->if_flags = ifp->if_flags;
1933 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1934 			error = cxgb_uninit_locked(p);
1935 		else
1936 			ADAPTER_UNLOCK(sc);
1937 
1938 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1939 		break;
1940 	case SIOCADDMULTI:
1941 	case SIOCDELMULTI:
1942 		ADAPTER_LOCK(sc);
1943 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1944 		if (error)
1945 			goto fail;
1946 
1947 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1948 			PORT_LOCK(p);
1949 			cxgb_update_mac_settings(p);
1950 			PORT_UNLOCK(p);
1951 		}
1952 		ADAPTER_UNLOCK(sc);
1953 
1954 		break;
1955 	case SIOCSIFCAP:
1956 		ADAPTER_LOCK(sc);
1957 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1958 		if (error)
1959 			goto fail;
1960 
1961 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1962 		if (mask & IFCAP_TXCSUM) {
1963 			ifp->if_capenable ^= IFCAP_TXCSUM;
1964 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1965 
1966 			if (IFCAP_TSO4 & ifp->if_capenable &&
1967 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1968 				mask &= ~IFCAP_TSO4;
1969 				ifp->if_capenable &= ~IFCAP_TSO4;
1970 				if_printf(ifp,
1971 				    "tso4 disabled due to -txcsum.\n");
1972 			}
1973 		}
1974 		if (mask & IFCAP_TXCSUM_IPV6) {
1975 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1976 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1977 
1978 			if (IFCAP_TSO6 & ifp->if_capenable &&
1979 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1980 				mask &= ~IFCAP_TSO6;
1981 				ifp->if_capenable &= ~IFCAP_TSO6;
1982 				if_printf(ifp,
1983 				    "tso6 disabled due to -txcsum6.\n");
1984 			}
1985 		}
1986 		if (mask & IFCAP_RXCSUM)
1987 			ifp->if_capenable ^= IFCAP_RXCSUM;
1988 		if (mask & IFCAP_RXCSUM_IPV6)
1989 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1990 
1991 		/*
1992 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1993 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1994 		 * sending a TSO request our way, so it's sufficient to toggle
1995 		 * IFCAP_TSOx only.
1996 		 */
1997 		if (mask & IFCAP_TSO4) {
1998 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1999 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2000 				if_printf(ifp, "enable txcsum first.\n");
2001 				error = EAGAIN;
2002 				goto fail;
2003 			}
2004 			ifp->if_capenable ^= IFCAP_TSO4;
2005 		}
2006 		if (mask & IFCAP_TSO6) {
2007 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2008 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2009 				if_printf(ifp, "enable txcsum6 first.\n");
2010 				error = EAGAIN;
2011 				goto fail;
2012 			}
2013 			ifp->if_capenable ^= IFCAP_TSO6;
2014 		}
2015 		if (mask & IFCAP_LRO) {
2016 			ifp->if_capenable ^= IFCAP_LRO;
2017 
2018 			/* Safe to do this even if cxgb_up not called yet */
2019 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2020 		}
2021 #ifdef TCP_OFFLOAD
2022 		if (mask & IFCAP_TOE4) {
2023 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2024 
2025 			error = toe_capability(p, enable);
2026 			if (error == 0)
2027 				ifp->if_capenable ^= mask;
2028 		}
2029 #endif
2030 		if (mask & IFCAP_VLAN_HWTAGGING) {
2031 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2032 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2033 				PORT_LOCK(p);
2034 				cxgb_update_mac_settings(p);
2035 				PORT_UNLOCK(p);
2036 			}
2037 		}
2038 		if (mask & IFCAP_VLAN_MTU) {
2039 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2040 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2041 				PORT_LOCK(p);
2042 				cxgb_update_mac_settings(p);
2043 				PORT_UNLOCK(p);
2044 			}
2045 		}
2046 		if (mask & IFCAP_VLAN_HWTSO)
2047 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2048 		if (mask & IFCAP_VLAN_HWCSUM)
2049 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2050 
2051 #ifdef VLAN_CAPABILITIES
2052 		VLAN_CAPABILITIES(ifp);
2053 #endif
2054 		ADAPTER_UNLOCK(sc);
2055 		break;
2056 	case SIOCSIFMEDIA:
2057 	case SIOCGIFMEDIA:
2058 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2059 		break;
2060 	default:
2061 		error = ether_ioctl(ifp, command, data);
2062 	}
2063 
2064 	return (error);
2065 }
2066 
2067 static int
2068 cxgb_media_change(struct ifnet *ifp)
2069 {
2070 	return (EOPNOTSUPP);
2071 }
2072 
2073 /*
2074  * Translates phy->modtype to the correct Ethernet media subtype.
2075  */
2076 static int
2077 cxgb_ifm_type(int mod)
2078 {
2079 	switch (mod) {
2080 	case phy_modtype_sr:
2081 		return (IFM_10G_SR);
2082 	case phy_modtype_lr:
2083 		return (IFM_10G_LR);
2084 	case phy_modtype_lrm:
2085 		return (IFM_10G_LRM);
2086 	case phy_modtype_twinax:
2087 		return (IFM_10G_TWINAX);
2088 	case phy_modtype_twinax_long:
2089 		return (IFM_10G_TWINAX_LONG);
2090 	case phy_modtype_none:
2091 		return (IFM_NONE);
2092 	case phy_modtype_unknown:
2093 		return (IFM_UNKNOWN);
2094 	}
2095 
2096 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2097 	return (IFM_UNKNOWN);
2098 }
2099 
2100 /*
2101  * Rebuilds the ifmedia list for this port, and sets the current media.
2102  */
2103 static void
2104 cxgb_build_medialist(struct port_info *p)
2105 {
2106 	struct cphy *phy = &p->phy;
2107 	struct ifmedia *media = &p->media;
2108 	int mod = phy->modtype;
2109 	int m = IFM_ETHER | IFM_FDX;
2110 
2111 	PORT_LOCK(p);
2112 
2113 	ifmedia_removeall(media);
2114 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2115 		/* Copper (RJ45) */
2116 
2117 		if (phy->caps & SUPPORTED_10000baseT_Full)
2118 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2119 
2120 		if (phy->caps & SUPPORTED_1000baseT_Full)
2121 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2122 
2123 		if (phy->caps & SUPPORTED_100baseT_Full)
2124 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2125 
2126 		if (phy->caps & SUPPORTED_10baseT_Full)
2127 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2128 
2129 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2130 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2131 
2132 	} else if (phy->caps & SUPPORTED_TP) {
2133 		/* Copper (CX4) */
2134 
2135 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2136 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2137 
2138 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2139 		ifmedia_set(media, m | IFM_10G_CX4);
2140 
2141 	} else if (phy->caps & SUPPORTED_FIBRE &&
2142 		   phy->caps & SUPPORTED_10000baseT_Full) {
2143 		/* 10G optical (but includes SFP+ twinax) */
2144 
2145 		m |= cxgb_ifm_type(mod);
2146 		if (IFM_SUBTYPE(m) == IFM_NONE)
2147 			m &= ~IFM_FDX;
2148 
2149 		ifmedia_add(media, m, mod, NULL);
2150 		ifmedia_set(media, m);
2151 
2152 	} else if (phy->caps & SUPPORTED_FIBRE &&
2153 		   phy->caps & SUPPORTED_1000baseT_Full) {
2154 		/* 1G optical */
2155 
2156 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2157 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2158 		ifmedia_set(media, m | IFM_1000_SX);
2159 
2160 	} else {
2161 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2162 			    phy->caps));
2163 	}
2164 
2165 	PORT_UNLOCK(p);
2166 }
2167 
2168 static void
2169 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2170 {
2171 	struct port_info *p = ifp->if_softc;
2172 	struct ifmedia_entry *cur = p->media.ifm_cur;
2173 	int speed = p->link_config.speed;
2174 
2175 	if (cur->ifm_data != p->phy.modtype) {
2176 		cxgb_build_medialist(p);
2177 		cur = p->media.ifm_cur;
2178 	}
2179 
2180 	ifmr->ifm_status = IFM_AVALID;
2181 	if (!p->link_config.link_ok)
2182 		return;
2183 
2184 	ifmr->ifm_status |= IFM_ACTIVE;
2185 
2186 	/*
2187 	 * active and current will differ iff current media is autoselect.  That
2188 	 * can happen only for copper RJ45.
2189 	 */
2190 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2191 		return;
2192 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2193 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2194 
2195 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2196 	if (speed == SPEED_10000)
2197 		ifmr->ifm_active |= IFM_10G_T;
2198 	else if (speed == SPEED_1000)
2199 		ifmr->ifm_active |= IFM_1000_T;
2200 	else if (speed == SPEED_100)
2201 		ifmr->ifm_active |= IFM_100_TX;
2202 	else if (speed == SPEED_10)
2203 		ifmr->ifm_active |= IFM_10_T;
2204 	else
2205 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2206 			    speed));
2207 }
2208 
2209 static uint64_t
2210 cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2211 {
2212 	struct port_info *pi = ifp->if_softc;
2213 	struct adapter *sc = pi->adapter;
2214 	struct cmac *mac = &pi->mac;
2215 	struct mac_stats *mstats = &mac->stats;
2216 
2217 	cxgb_refresh_stats(pi);
2218 
2219 	switch (c) {
2220 	case IFCOUNTER_IPACKETS:
2221 		return (mstats->rx_frames);
2222 
2223 	case IFCOUNTER_IERRORS:
2224 		return (mstats->rx_jabber + mstats->rx_data_errs +
2225 		    mstats->rx_sequence_errs + mstats->rx_runt +
2226 		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2227 		    mstats->rx_short + mstats->rx_fcs_errs);
2228 
2229 	case IFCOUNTER_OPACKETS:
2230 		return (mstats->tx_frames);
2231 
2232 	case IFCOUNTER_OERRORS:
2233 		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2234 		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2235 		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2236 
2237 	case IFCOUNTER_COLLISIONS:
2238 		return (mstats->tx_total_collisions);
2239 
2240 	case IFCOUNTER_IBYTES:
2241 		return (mstats->rx_octets);
2242 
2243 	case IFCOUNTER_OBYTES:
2244 		return (mstats->tx_octets);
2245 
2246 	case IFCOUNTER_IMCASTS:
2247 		return (mstats->rx_mcast_frames);
2248 
2249 	case IFCOUNTER_OMCASTS:
2250 		return (mstats->tx_mcast_frames);
2251 
2252 	case IFCOUNTER_IQDROPS:
2253 		return (mstats->rx_cong_drops);
2254 
2255 	case IFCOUNTER_OQDROPS: {
2256 		int i;
2257 		uint64_t drops;
2258 
2259 		drops = 0;
2260 		if (sc->flags & FULL_INIT_DONE) {
2261 			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2262 				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2263 		}
2264 
2265 		return (drops);
2266 
2267 	}
2268 
2269 	default:
2270 		return (if_get_counter_default(ifp, c));
2271 	}
2272 }
2273 
2274 static void
2275 cxgb_async_intr(void *data)
2276 {
2277 	adapter_t *sc = data;
2278 
2279 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2280 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2281 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282 }
2283 
2284 static void
2285 link_check_callout(void *arg)
2286 {
2287 	struct port_info *pi = arg;
2288 	struct adapter *sc = pi->adapter;
2289 
2290 	if (!isset(&sc->open_device_map, pi->port_id))
2291 		return;
2292 
2293 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2294 }
2295 
2296 static void
2297 check_link_status(void *arg, int pending)
2298 {
2299 	struct port_info *pi = arg;
2300 	struct adapter *sc = pi->adapter;
2301 
2302 	if (!isset(&sc->open_device_map, pi->port_id))
2303 		return;
2304 
2305 	t3_link_changed(sc, pi->port_id);
2306 
2307 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2308 	    pi->link_config.link_ok == 0)
2309 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2310 }
2311 
2312 void
2313 t3_os_link_intr(struct port_info *pi)
2314 {
2315 	/*
2316 	 * Schedule a link check in the near future.  If the link is flapping
2317 	 * rapidly we'll keep resetting the callout and delaying the check until
2318 	 * things stabilize a bit.
2319 	 */
2320 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2321 }
2322 
2323 static void
2324 check_t3b2_mac(struct adapter *sc)
2325 {
2326 	int i;
2327 
2328 	if (sc->flags & CXGB_SHUTDOWN)
2329 		return;
2330 
2331 	for_each_port(sc, i) {
2332 		struct port_info *p = &sc->port[i];
2333 		int status;
2334 #ifdef INVARIANTS
2335 		struct ifnet *ifp = p->ifp;
2336 #endif
2337 
2338 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2339 		    !p->link_config.link_ok)
2340 			continue;
2341 
2342 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2343 			("%s: state mismatch (drv_flags %x, device_map %x)",
2344 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2345 
2346 		PORT_LOCK(p);
2347 		status = t3b2_mac_watchdog_task(&p->mac);
2348 		if (status == 1)
2349 			p->mac.stats.num_toggled++;
2350 		else if (status == 2) {
2351 			struct cmac *mac = &p->mac;
2352 
2353 			cxgb_update_mac_settings(p);
2354 			t3_link_start(&p->phy, mac, &p->link_config);
2355 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2356 			t3_port_intr_enable(sc, p->port_id);
2357 			p->mac.stats.num_resets++;
2358 		}
2359 		PORT_UNLOCK(p);
2360 	}
2361 }
2362 
2363 static void
2364 cxgb_tick(void *arg)
2365 {
2366 	adapter_t *sc = (adapter_t *)arg;
2367 
2368 	if (sc->flags & CXGB_SHUTDOWN)
2369 		return;
2370 
2371 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2372 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2373 }
2374 
2375 void
2376 cxgb_refresh_stats(struct port_info *pi)
2377 {
2378 	struct timeval tv;
2379 	const struct timeval interval = {0, 250000};    /* 250ms */
2380 
2381 	getmicrotime(&tv);
2382 	timevalsub(&tv, &interval);
2383 	if (timevalcmp(&tv, &pi->last_refreshed, <))
2384 		return;
2385 
2386 	PORT_LOCK(pi);
2387 	t3_mac_update_stats(&pi->mac);
2388 	PORT_UNLOCK(pi);
2389 	getmicrotime(&pi->last_refreshed);
2390 }
2391 
2392 static void
2393 cxgb_tick_handler(void *arg, int count)
2394 {
2395 	adapter_t *sc = (adapter_t *)arg;
2396 	const struct adapter_params *p = &sc->params;
2397 	int i;
2398 	uint32_t cause, reset;
2399 
2400 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2401 		return;
2402 
2403 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2404 		check_t3b2_mac(sc);
2405 
2406 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2407 	if (cause) {
2408 		struct sge_qset *qs = &sc->sge.qs[0];
2409 		uint32_t mask, v;
2410 
2411 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2412 
2413 		mask = 1;
2414 		for (i = 0; i < SGE_QSETS; i++) {
2415 			if (v & mask)
2416 				qs[i].rspq.starved++;
2417 			mask <<= 1;
2418 		}
2419 
2420 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2421 
2422 		for (i = 0; i < SGE_QSETS * 2; i++) {
2423 			if (v & mask) {
2424 				qs[i / 2].fl[i % 2].empty++;
2425 			}
2426 			mask <<= 1;
2427 		}
2428 
2429 		/* clear */
2430 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2431 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2432 	}
2433 
2434 	for (i = 0; i < sc->params.nports; i++) {
2435 		struct port_info *pi = &sc->port[i];
2436 		struct cmac *mac = &pi->mac;
2437 
2438 		if (!isset(&sc->open_device_map, pi->port_id))
2439 			continue;
2440 
2441 		cxgb_refresh_stats(pi);
2442 
2443 		if (mac->multiport)
2444 			continue;
2445 
2446 		/* Count rx fifo overflows, once per second */
2447 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2448 		reset = 0;
2449 		if (cause & F_RXFIFO_OVERFLOW) {
2450 			mac->stats.rx_fifo_ovfl++;
2451 			reset |= F_RXFIFO_OVERFLOW;
2452 		}
2453 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2454 	}
2455 }
2456 
2457 static void
2458 touch_bars(device_t dev)
2459 {
2460 	/*
2461 	 * Don't enable yet
2462 	 */
2463 #if !defined(__LP64__) && 0
2464 	u32 v;
2465 
2466 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2467 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2468 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2469 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2470 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2471 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2472 #endif
2473 }
2474 
2475 static int
2476 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2477 {
2478 	uint8_t *buf;
2479 	int err = 0;
2480 	u32 aligned_offset, aligned_len, *p;
2481 	struct adapter *adapter = pi->adapter;
2482 
2483 
2484 	aligned_offset = offset & ~3;
2485 	aligned_len = (len + (offset & 3) + 3) & ~3;
2486 
2487 	if (aligned_offset != offset || aligned_len != len) {
2488 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2489 		if (!buf)
2490 			return (ENOMEM);
2491 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2492 		if (!err && aligned_len > 4)
2493 			err = t3_seeprom_read(adapter,
2494 					      aligned_offset + aligned_len - 4,
2495 					      (u32 *)&buf[aligned_len - 4]);
2496 		if (err)
2497 			goto out;
2498 		memcpy(buf + (offset & 3), data, len);
2499 	} else
2500 		buf = (uint8_t *)(uintptr_t)data;
2501 
2502 	err = t3_seeprom_wp(adapter, 0);
2503 	if (err)
2504 		goto out;
2505 
2506 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2507 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2508 		aligned_offset += 4;
2509 	}
2510 
2511 	if (!err)
2512 		err = t3_seeprom_wp(adapter, 1);
2513 out:
2514 	if (buf != data)
2515 		free(buf, M_DEVBUF);
2516 	return err;
2517 }
2518 
2519 
2520 static int
2521 in_range(int val, int lo, int hi)
2522 {
2523 	return val < 0 || (val <= hi && val >= lo);
2524 }
2525 
2526 static int
2527 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2528 {
2529        return (0);
2530 }
2531 
2532 static int
2533 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2534 {
2535        return (0);
2536 }
2537 
2538 static int
2539 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2540     int fflag, struct thread *td)
2541 {
2542 	int mmd, error = 0;
2543 	struct port_info *pi = dev->si_drv1;
2544 	adapter_t *sc = pi->adapter;
2545 
2546 #ifdef PRIV_SUPPORTED
2547 	if (priv_check(td, PRIV_DRIVER)) {
2548 		if (cxgb_debug)
2549 			printf("user does not have access to privileged ioctls\n");
2550 		return (EPERM);
2551 	}
2552 #else
2553 	if (suser(td)) {
2554 		if (cxgb_debug)
2555 			printf("user does not have access to privileged ioctls\n");
2556 		return (EPERM);
2557 	}
2558 #endif
2559 
2560 	switch (cmd) {
2561 	case CHELSIO_GET_MIIREG: {
2562 		uint32_t val;
2563 		struct cphy *phy = &pi->phy;
2564 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2565 
2566 		if (!phy->mdio_read)
2567 			return (EOPNOTSUPP);
2568 		if (is_10G(sc)) {
2569 			mmd = mid->phy_id >> 8;
2570 			if (!mmd)
2571 				mmd = MDIO_DEV_PCS;
2572 			else if (mmd > MDIO_DEV_VEND2)
2573 				return (EINVAL);
2574 
2575 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2576 					     mid->reg_num, &val);
2577 		} else
2578 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2579 					     mid->reg_num & 0x1f, &val);
2580 		if (error == 0)
2581 			mid->val_out = val;
2582 		break;
2583 	}
2584 	case CHELSIO_SET_MIIREG: {
2585 		struct cphy *phy = &pi->phy;
2586 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2587 
2588 		if (!phy->mdio_write)
2589 			return (EOPNOTSUPP);
2590 		if (is_10G(sc)) {
2591 			mmd = mid->phy_id >> 8;
2592 			if (!mmd)
2593 				mmd = MDIO_DEV_PCS;
2594 			else if (mmd > MDIO_DEV_VEND2)
2595 				return (EINVAL);
2596 
2597 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2598 					      mmd, mid->reg_num, mid->val_in);
2599 		} else
2600 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2601 					      mid->reg_num & 0x1f,
2602 					      mid->val_in);
2603 		break;
2604 	}
2605 	case CHELSIO_SETREG: {
2606 		struct ch_reg *edata = (struct ch_reg *)data;
2607 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2608 			return (EFAULT);
2609 		t3_write_reg(sc, edata->addr, edata->val);
2610 		break;
2611 	}
2612 	case CHELSIO_GETREG: {
2613 		struct ch_reg *edata = (struct ch_reg *)data;
2614 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2615 			return (EFAULT);
2616 		edata->val = t3_read_reg(sc, edata->addr);
2617 		break;
2618 	}
2619 	case CHELSIO_GET_SGE_CONTEXT: {
2620 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2621 		mtx_lock_spin(&sc->sge.reg_lock);
2622 		switch (ecntxt->cntxt_type) {
2623 		case CNTXT_TYPE_EGRESS:
2624 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2625 			    ecntxt->data);
2626 			break;
2627 		case CNTXT_TYPE_FL:
2628 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2629 			    ecntxt->data);
2630 			break;
2631 		case CNTXT_TYPE_RSP:
2632 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2633 			    ecntxt->data);
2634 			break;
2635 		case CNTXT_TYPE_CQ:
2636 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2637 			    ecntxt->data);
2638 			break;
2639 		default:
2640 			error = EINVAL;
2641 			break;
2642 		}
2643 		mtx_unlock_spin(&sc->sge.reg_lock);
2644 		break;
2645 	}
2646 	case CHELSIO_GET_SGE_DESC: {
2647 		struct ch_desc *edesc = (struct ch_desc *)data;
2648 		int ret;
2649 		if (edesc->queue_num >= SGE_QSETS * 6)
2650 			return (EINVAL);
2651 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2652 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2653 		if (ret < 0)
2654 			return (EINVAL);
2655 		edesc->size = ret;
2656 		break;
2657 	}
2658 	case CHELSIO_GET_QSET_PARAMS: {
2659 		struct qset_params *q;
2660 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2661 		int q1 = pi->first_qset;
2662 		int nqsets = pi->nqsets;
2663 		int i;
2664 
2665 		if (t->qset_idx >= nqsets)
2666 			return EINVAL;
2667 
2668 		i = q1 + t->qset_idx;
2669 		q = &sc->params.sge.qset[i];
2670 		t->rspq_size   = q->rspq_size;
2671 		t->txq_size[0] = q->txq_size[0];
2672 		t->txq_size[1] = q->txq_size[1];
2673 		t->txq_size[2] = q->txq_size[2];
2674 		t->fl_size[0]  = q->fl_size;
2675 		t->fl_size[1]  = q->jumbo_size;
2676 		t->polling     = q->polling;
2677 		t->lro         = q->lro;
2678 		t->intr_lat    = q->coalesce_usecs;
2679 		t->cong_thres  = q->cong_thres;
2680 		t->qnum        = i;
2681 
2682 		if ((sc->flags & FULL_INIT_DONE) == 0)
2683 			t->vector = 0;
2684 		else if (sc->flags & USING_MSIX)
2685 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2686 		else
2687 			t->vector = rman_get_start(sc->irq_res);
2688 
2689 		break;
2690 	}
2691 	case CHELSIO_GET_QSET_NUM: {
2692 		struct ch_reg *edata = (struct ch_reg *)data;
2693 		edata->val = pi->nqsets;
2694 		break;
2695 	}
2696 	case CHELSIO_LOAD_FW: {
2697 		uint8_t *fw_data;
2698 		uint32_t vers;
2699 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2700 
2701 		/*
2702 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2703 		 *
2704 		 * FW_UPTODATE is also set so the rest of the initialization
2705 		 * will not overwrite what was loaded here.  This gives you the
2706 		 * flexibility to load any firmware (and maybe shoot yourself in
2707 		 * the foot).
2708 		 */
2709 
2710 		ADAPTER_LOCK(sc);
2711 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2712 			ADAPTER_UNLOCK(sc);
2713 			return (EBUSY);
2714 		}
2715 
2716 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2717 		if (!fw_data)
2718 			error = ENOMEM;
2719 		else
2720 			error = copyin(t->buf, fw_data, t->len);
2721 
2722 		if (!error)
2723 			error = -t3_load_fw(sc, fw_data, t->len);
2724 
2725 		if (t3_get_fw_version(sc, &vers) == 0) {
2726 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2727 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2728 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2729 		}
2730 
2731 		if (!error)
2732 			sc->flags |= FW_UPTODATE;
2733 
2734 		free(fw_data, M_DEVBUF);
2735 		ADAPTER_UNLOCK(sc);
2736 		break;
2737 	}
2738 	case CHELSIO_LOAD_BOOT: {
2739 		uint8_t *boot_data;
2740 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2741 
2742 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2743 		if (!boot_data)
2744 			return ENOMEM;
2745 
2746 		error = copyin(t->buf, boot_data, t->len);
2747 		if (!error)
2748 			error = -t3_load_boot(sc, boot_data, t->len);
2749 
2750 		free(boot_data, M_DEVBUF);
2751 		break;
2752 	}
2753 	case CHELSIO_GET_PM: {
2754 		struct ch_pm *m = (struct ch_pm *)data;
2755 		struct tp_params *p = &sc->params.tp;
2756 
2757 		if (!is_offload(sc))
2758 			return (EOPNOTSUPP);
2759 
2760 		m->tx_pg_sz = p->tx_pg_size;
2761 		m->tx_num_pg = p->tx_num_pgs;
2762 		m->rx_pg_sz  = p->rx_pg_size;
2763 		m->rx_num_pg = p->rx_num_pgs;
2764 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2765 
2766 		break;
2767 	}
2768 	case CHELSIO_SET_PM: {
2769 		struct ch_pm *m = (struct ch_pm *)data;
2770 		struct tp_params *p = &sc->params.tp;
2771 
2772 		if (!is_offload(sc))
2773 			return (EOPNOTSUPP);
2774 		if (sc->flags & FULL_INIT_DONE)
2775 			return (EBUSY);
2776 
2777 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2778 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2779 			return (EINVAL);	/* not power of 2 */
2780 		if (!(m->rx_pg_sz & 0x14000))
2781 			return (EINVAL);	/* not 16KB or 64KB */
2782 		if (!(m->tx_pg_sz & 0x1554000))
2783 			return (EINVAL);
2784 		if (m->tx_num_pg == -1)
2785 			m->tx_num_pg = p->tx_num_pgs;
2786 		if (m->rx_num_pg == -1)
2787 			m->rx_num_pg = p->rx_num_pgs;
2788 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2789 			return (EINVAL);
2790 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2791 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2792 			return (EINVAL);
2793 
2794 		p->rx_pg_size = m->rx_pg_sz;
2795 		p->tx_pg_size = m->tx_pg_sz;
2796 		p->rx_num_pgs = m->rx_num_pg;
2797 		p->tx_num_pgs = m->tx_num_pg;
2798 		break;
2799 	}
2800 	case CHELSIO_SETMTUTAB: {
2801 		struct ch_mtus *m = (struct ch_mtus *)data;
2802 		int i;
2803 
2804 		if (!is_offload(sc))
2805 			return (EOPNOTSUPP);
2806 		if (offload_running(sc))
2807 			return (EBUSY);
2808 		if (m->nmtus != NMTUS)
2809 			return (EINVAL);
2810 		if (m->mtus[0] < 81)         /* accommodate SACK */
2811 			return (EINVAL);
2812 
2813 		/*
2814 		 * MTUs must be in ascending order
2815 		 */
2816 		for (i = 1; i < NMTUS; ++i)
2817 			if (m->mtus[i] < m->mtus[i - 1])
2818 				return (EINVAL);
2819 
2820 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2821 		break;
2822 	}
2823 	case CHELSIO_GETMTUTAB: {
2824 		struct ch_mtus *m = (struct ch_mtus *)data;
2825 
2826 		if (!is_offload(sc))
2827 			return (EOPNOTSUPP);
2828 
2829 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2830 		m->nmtus = NMTUS;
2831 		break;
2832 	}
2833 	case CHELSIO_GET_MEM: {
2834 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2835 		struct mc7 *mem;
2836 		uint8_t *useraddr;
2837 		u64 buf[32];
2838 
2839 		/*
2840 		 * Use these to avoid modifying len/addr in the return
2841 		 * struct
2842 		 */
2843 		uint32_t len = t->len, addr = t->addr;
2844 
2845 		if (!is_offload(sc))
2846 			return (EOPNOTSUPP);
2847 		if (!(sc->flags & FULL_INIT_DONE))
2848 			return (EIO);         /* need the memory controllers */
2849 		if ((addr & 0x7) || (len & 0x7))
2850 			return (EINVAL);
2851 		if (t->mem_id == MEM_CM)
2852 			mem = &sc->cm;
2853 		else if (t->mem_id == MEM_PMRX)
2854 			mem = &sc->pmrx;
2855 		else if (t->mem_id == MEM_PMTX)
2856 			mem = &sc->pmtx;
2857 		else
2858 			return (EINVAL);
2859 
2860 		/*
2861 		 * Version scheme:
2862 		 * bits 0..9: chip version
2863 		 * bits 10..15: chip revision
2864 		 */
2865 		t->version = 3 | (sc->params.rev << 10);
2866 
2867 		/*
2868 		 * Read 256 bytes at a time as len can be large and we don't
2869 		 * want to use huge intermediate buffers.
2870 		 */
2871 		useraddr = (uint8_t *)t->buf;
2872 		while (len) {
2873 			unsigned int chunk = min(len, sizeof(buf));
2874 
2875 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2876 			if (error)
2877 				return (-error);
2878 			if (copyout(buf, useraddr, chunk))
2879 				return (EFAULT);
2880 			useraddr += chunk;
2881 			addr += chunk;
2882 			len -= chunk;
2883 		}
2884 		break;
2885 	}
2886 	case CHELSIO_READ_TCAM_WORD: {
2887 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2888 
2889 		if (!is_offload(sc))
2890 			return (EOPNOTSUPP);
2891 		if (!(sc->flags & FULL_INIT_DONE))
2892 			return (EIO);         /* need MC5 */
2893 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2894 		break;
2895 	}
2896 	case CHELSIO_SET_TRACE_FILTER: {
2897 		struct ch_trace *t = (struct ch_trace *)data;
2898 		const struct trace_params *tp;
2899 
2900 		tp = (const struct trace_params *)&t->sip;
2901 		if (t->config_tx)
2902 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2903 					       t->trace_tx);
2904 		if (t->config_rx)
2905 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2906 					       t->trace_rx);
2907 		break;
2908 	}
2909 	case CHELSIO_SET_PKTSCHED: {
2910 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2911 		if (sc->open_device_map == 0)
2912 			return (EAGAIN);
2913 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2914 		    p->binding);
2915 		break;
2916 	}
2917 	case CHELSIO_IFCONF_GETREGS: {
2918 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2919 		int reglen = cxgb_get_regs_len();
2920 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2921 		if (buf == NULL) {
2922 			return (ENOMEM);
2923 		}
2924 		if (regs->len > reglen)
2925 			regs->len = reglen;
2926 		else if (regs->len < reglen)
2927 			error = ENOBUFS;
2928 
2929 		if (!error) {
2930 			cxgb_get_regs(sc, regs, buf);
2931 			error = copyout(buf, regs->data, reglen);
2932 		}
2933 		free(buf, M_DEVBUF);
2934 
2935 		break;
2936 	}
2937 	case CHELSIO_SET_HW_SCHED: {
2938 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2939 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2940 
2941 		if ((sc->flags & FULL_INIT_DONE) == 0)
2942 			return (EAGAIN);       /* need TP to be initialized */
2943 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2944 		    !in_range(t->channel, 0, 1) ||
2945 		    !in_range(t->kbps, 0, 10000000) ||
2946 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2947 		    !in_range(t->flow_ipg, 0,
2948 			      dack_ticks_to_usec(sc, 0x7ff)))
2949 			return (EINVAL);
2950 
2951 		if (t->kbps >= 0) {
2952 			error = t3_config_sched(sc, t->kbps, t->sched);
2953 			if (error < 0)
2954 				return (-error);
2955 		}
2956 		if (t->class_ipg >= 0)
2957 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2958 		if (t->flow_ipg >= 0) {
2959 			t->flow_ipg *= 1000;     /* us -> ns */
2960 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2961 		}
2962 		if (t->mode >= 0) {
2963 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2964 
2965 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2966 					 bit, t->mode ? bit : 0);
2967 		}
2968 		if (t->channel >= 0)
2969 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2970 					 1 << t->sched, t->channel << t->sched);
2971 		break;
2972 	}
2973 	case CHELSIO_GET_EEPROM: {
2974 		int i;
2975 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2976 		uint8_t *buf;
2977 
2978 		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2979 		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2980 			return (EINVAL);
2981 		}
2982 
2983 		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2984 		if (buf == NULL) {
2985 			return (ENOMEM);
2986 		}
2987 		e->magic = EEPROM_MAGIC;
2988 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2989 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2990 
2991 		if (!error)
2992 			error = copyout(buf + e->offset, e->data, e->len);
2993 
2994 		free(buf, M_DEVBUF);
2995 		break;
2996 	}
2997 	case CHELSIO_CLEAR_STATS: {
2998 		if (!(sc->flags & FULL_INIT_DONE))
2999 			return EAGAIN;
3000 
3001 		PORT_LOCK(pi);
3002 		t3_mac_update_stats(&pi->mac);
3003 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3004 		PORT_UNLOCK(pi);
3005 		break;
3006 	}
3007 	case CHELSIO_GET_UP_LA: {
3008 		struct ch_up_la *la = (struct ch_up_la *)data;
3009 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3010 		if (buf == NULL) {
3011 			return (ENOMEM);
3012 		}
3013 		if (la->bufsize < LA_BUFSIZE)
3014 			error = ENOBUFS;
3015 
3016 		if (!error)
3017 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3018 					      &la->bufsize, buf);
3019 		if (!error)
3020 			error = copyout(buf, la->data, la->bufsize);
3021 
3022 		free(buf, M_DEVBUF);
3023 		break;
3024 	}
3025 	case CHELSIO_GET_UP_IOQS: {
3026 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3027 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3028 		uint32_t *v;
3029 
3030 		if (buf == NULL) {
3031 			return (ENOMEM);
3032 		}
3033 		if (ioqs->bufsize < IOQS_BUFSIZE)
3034 			error = ENOBUFS;
3035 
3036 		if (!error)
3037 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3038 
3039 		if (!error) {
3040 			v = (uint32_t *)buf;
3041 
3042 			ioqs->ioq_rx_enable = *v++;
3043 			ioqs->ioq_tx_enable = *v++;
3044 			ioqs->ioq_rx_status = *v++;
3045 			ioqs->ioq_tx_status = *v++;
3046 
3047 			error = copyout(v, ioqs->data, ioqs->bufsize);
3048 		}
3049 
3050 		free(buf, M_DEVBUF);
3051 		break;
3052 	}
3053 	case CHELSIO_SET_FILTER: {
3054 		struct ch_filter *f = (struct ch_filter *)data;
3055 		struct filter_info *p;
3056 		unsigned int nfilters = sc->params.mc5.nfilters;
3057 
3058 		if (!is_offload(sc))
3059 			return (EOPNOTSUPP);	/* No TCAM */
3060 		if (!(sc->flags & FULL_INIT_DONE))
3061 			return (EAGAIN);	/* mc5 not setup yet */
3062 		if (nfilters == 0)
3063 			return (EBUSY);		/* TOE will use TCAM */
3064 
3065 		/* sanity checks */
3066 		if (f->filter_id >= nfilters ||
3067 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3068 		    (f->val.sport && f->mask.sport != 0xffff) ||
3069 		    (f->val.dport && f->mask.dport != 0xffff) ||
3070 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3071 		    (f->val.vlan_prio &&
3072 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3073 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3074 		    f->qset >= SGE_QSETS ||
3075 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3076 			return (EINVAL);
3077 
3078 		/* Was allocated with M_WAITOK */
3079 		KASSERT(sc->filters, ("filter table NULL\n"));
3080 
3081 		p = &sc->filters[f->filter_id];
3082 		if (p->locked)
3083 			return (EPERM);
3084 
3085 		bzero(p, sizeof(*p));
3086 		p->sip = f->val.sip;
3087 		p->sip_mask = f->mask.sip;
3088 		p->dip = f->val.dip;
3089 		p->sport = f->val.sport;
3090 		p->dport = f->val.dport;
3091 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3092 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3093 		    FILTER_NO_VLAN_PRI;
3094 		p->mac_hit = f->mac_hit;
3095 		p->mac_vld = f->mac_addr_idx != 0xffff;
3096 		p->mac_idx = f->mac_addr_idx;
3097 		p->pkt_type = f->proto;
3098 		p->report_filter_id = f->want_filter_id;
3099 		p->pass = f->pass;
3100 		p->rss = f->rss;
3101 		p->qset = f->qset;
3102 
3103 		error = set_filter(sc, f->filter_id, p);
3104 		if (error == 0)
3105 			p->valid = 1;
3106 		break;
3107 	}
3108 	case CHELSIO_DEL_FILTER: {
3109 		struct ch_filter *f = (struct ch_filter *)data;
3110 		struct filter_info *p;
3111 		unsigned int nfilters = sc->params.mc5.nfilters;
3112 
3113 		if (!is_offload(sc))
3114 			return (EOPNOTSUPP);
3115 		if (!(sc->flags & FULL_INIT_DONE))
3116 			return (EAGAIN);
3117 		if (nfilters == 0 || sc->filters == NULL)
3118 			return (EINVAL);
3119 		if (f->filter_id >= nfilters)
3120 		       return (EINVAL);
3121 
3122 		p = &sc->filters[f->filter_id];
3123 		if (p->locked)
3124 			return (EPERM);
3125 		if (!p->valid)
3126 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3127 
3128 		bzero(p, sizeof(*p));
3129 		p->sip = p->sip_mask = 0xffffffff;
3130 		p->vlan = 0xfff;
3131 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3132 		p->pkt_type = 1;
3133 		error = set_filter(sc, f->filter_id, p);
3134 		break;
3135 	}
3136 	case CHELSIO_GET_FILTER: {
3137 		struct ch_filter *f = (struct ch_filter *)data;
3138 		struct filter_info *p;
3139 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3140 
3141 		if (!is_offload(sc))
3142 			return (EOPNOTSUPP);
3143 		if (!(sc->flags & FULL_INIT_DONE))
3144 			return (EAGAIN);
3145 		if (nfilters == 0 || sc->filters == NULL)
3146 			return (EINVAL);
3147 
3148 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3149 		for (; i < nfilters; i++) {
3150 			p = &sc->filters[i];
3151 			if (!p->valid)
3152 				continue;
3153 
3154 			bzero(f, sizeof(*f));
3155 
3156 			f->filter_id = i;
3157 			f->val.sip = p->sip;
3158 			f->mask.sip = p->sip_mask;
3159 			f->val.dip = p->dip;
3160 			f->mask.dip = p->dip ? 0xffffffff : 0;
3161 			f->val.sport = p->sport;
3162 			f->mask.sport = p->sport ? 0xffff : 0;
3163 			f->val.dport = p->dport;
3164 			f->mask.dport = p->dport ? 0xffff : 0;
3165 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3166 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3167 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3168 			    0 : p->vlan_prio;
3169 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3170 			    0 : FILTER_NO_VLAN_PRI;
3171 			f->mac_hit = p->mac_hit;
3172 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3173 			f->proto = p->pkt_type;
3174 			f->want_filter_id = p->report_filter_id;
3175 			f->pass = p->pass;
3176 			f->rss = p->rss;
3177 			f->qset = p->qset;
3178 
3179 			break;
3180 		}
3181 
3182 		if (i == nfilters)
3183 			f->filter_id = 0xffffffff;
3184 		break;
3185 	}
3186 	default:
3187 		return (EOPNOTSUPP);
3188 		break;
3189 	}
3190 
3191 	return (error);
3192 }
3193 
3194 static __inline void
3195 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3196     unsigned int end)
3197 {
3198 	uint32_t *p = (uint32_t *)(buf + start);
3199 
3200 	for ( ; start <= end; start += sizeof(uint32_t))
3201 		*p++ = t3_read_reg(ap, start);
3202 }
3203 
3204 #define T3_REGMAP_SIZE (3 * 1024)
3205 static int
3206 cxgb_get_regs_len(void)
3207 {
3208 	return T3_REGMAP_SIZE;
3209 }
3210 
3211 static void
3212 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3213 {
3214 
3215 	/*
3216 	 * Version scheme:
3217 	 * bits 0..9: chip version
3218 	 * bits 10..15: chip revision
3219 	 * bit 31: set for PCIe cards
3220 	 */
3221 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3222 
3223 	/*
3224 	 * We skip the MAC statistics registers because they are clear-on-read.
3225 	 * Also reading multi-register stats would need to synchronize with the
3226 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3227 	 */
3228 	memset(buf, 0, cxgb_get_regs_len());
3229 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3230 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3231 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3232 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3233 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3234 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3235 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3236 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3237 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3238 }
3239 
3240 static int
3241 alloc_filters(struct adapter *sc)
3242 {
3243 	struct filter_info *p;
3244 	unsigned int nfilters = sc->params.mc5.nfilters;
3245 
3246 	if (nfilters == 0)
3247 		return (0);
3248 
3249 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3250 	sc->filters = p;
3251 
3252 	p = &sc->filters[nfilters - 1];
3253 	p->vlan = 0xfff;
3254 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3255 	p->pass = p->rss = p->valid = p->locked = 1;
3256 
3257 	return (0);
3258 }
3259 
3260 static int
3261 setup_hw_filters(struct adapter *sc)
3262 {
3263 	int i, rc;
3264 	unsigned int nfilters = sc->params.mc5.nfilters;
3265 
3266 	if (!sc->filters)
3267 		return (0);
3268 
3269 	t3_enable_filters(sc);
3270 
3271 	for (i = rc = 0; i < nfilters && !rc; i++) {
3272 		if (sc->filters[i].locked)
3273 			rc = set_filter(sc, i, &sc->filters[i]);
3274 	}
3275 
3276 	return (rc);
3277 }
3278 
3279 static int
3280 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3281 {
3282 	int len;
3283 	struct mbuf *m;
3284 	struct ulp_txpkt *txpkt;
3285 	struct work_request_hdr *wr;
3286 	struct cpl_pass_open_req *oreq;
3287 	struct cpl_set_tcb_field *sreq;
3288 
3289 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3290 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3291 
3292 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3293 	      sc->params.mc5.nfilters;
3294 
3295 	m = m_gethdr(M_WAITOK, MT_DATA);
3296 	m->m_len = m->m_pkthdr.len = len;
3297 	bzero(mtod(m, char *), len);
3298 
3299 	wr = mtod(m, struct work_request_hdr *);
3300 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3301 
3302 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3303 	txpkt = (struct ulp_txpkt *)oreq;
3304 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3305 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3306 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3307 	oreq->local_port = htons(f->dport);
3308 	oreq->peer_port = htons(f->sport);
3309 	oreq->local_ip = htonl(f->dip);
3310 	oreq->peer_ip = htonl(f->sip);
3311 	oreq->peer_netmask = htonl(f->sip_mask);
3312 	oreq->opt0h = 0;
3313 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3314 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3315 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3316 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3317 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3318 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3319 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3320 
3321 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3322 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3323 			  (f->report_filter_id << 15) | (1 << 23) |
3324 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3325 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3326 	t3_mgmt_tx(sc, m);
3327 
3328 	if (f->pass && !f->rss) {
3329 		len = sizeof(*sreq);
3330 		m = m_gethdr(M_WAITOK, MT_DATA);
3331 		m->m_len = m->m_pkthdr.len = len;
3332 		bzero(mtod(m, char *), len);
3333 		sreq = mtod(m, struct cpl_set_tcb_field *);
3334 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3335 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3336 				 (u64)sc->rrss_map[f->qset] << 19);
3337 		t3_mgmt_tx(sc, m);
3338 	}
3339 	return 0;
3340 }
3341 
3342 static inline void
3343 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3344     unsigned int word, u64 mask, u64 val)
3345 {
3346 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3347 	req->reply = V_NO_REPLY(1);
3348 	req->cpu_idx = 0;
3349 	req->word = htons(word);
3350 	req->mask = htobe64(mask);
3351 	req->val = htobe64(val);
3352 }
3353 
3354 static inline void
3355 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3356     unsigned int word, u64 mask, u64 val)
3357 {
3358 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3359 
3360 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3361 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3362 	mk_set_tcb_field(req, tid, word, mask, val);
3363 }
3364 
3365 void
3366 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3367 {
3368 	struct adapter *sc;
3369 
3370 	mtx_lock(&t3_list_lock);
3371 	SLIST_FOREACH(sc, &t3_list, link) {
3372 		/*
3373 		 * func should not make any assumptions about what state sc is
3374 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3375 		 */
3376 		func(sc, arg);
3377 	}
3378 	mtx_unlock(&t3_list_lock);
3379 }
3380 
3381 #ifdef TCP_OFFLOAD
3382 static int
3383 toe_capability(struct port_info *pi, int enable)
3384 {
3385 	int rc;
3386 	struct adapter *sc = pi->adapter;
3387 
3388 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3389 
3390 	if (!is_offload(sc))
3391 		return (ENODEV);
3392 
3393 	if (enable) {
3394 		if (!(sc->flags & FULL_INIT_DONE)) {
3395 			log(LOG_WARNING,
3396 			    "You must enable a cxgb interface first\n");
3397 			return (EAGAIN);
3398 		}
3399 
3400 		if (isset(&sc->offload_map, pi->port_id))
3401 			return (0);
3402 
3403 		if (!(sc->flags & TOM_INIT_DONE)) {
3404 			rc = t3_activate_uld(sc, ULD_TOM);
3405 			if (rc == EAGAIN) {
3406 				log(LOG_WARNING,
3407 				    "You must kldload t3_tom.ko before trying "
3408 				    "to enable TOE on a cxgb interface.\n");
3409 			}
3410 			if (rc != 0)
3411 				return (rc);
3412 			KASSERT(sc->tom_softc != NULL,
3413 			    ("%s: TOM activated but softc NULL", __func__));
3414 			KASSERT(sc->flags & TOM_INIT_DONE,
3415 			    ("%s: TOM activated but flag not set", __func__));
3416 		}
3417 
3418 		setbit(&sc->offload_map, pi->port_id);
3419 
3420 		/*
3421 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3422 		 * enabled on any port.  Need to figure out how to enable,
3423 		 * disable, load, and unload iWARP cleanly.
3424 		 */
3425 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3426 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3427 			setbit(&sc->offload_map, MAX_NPORTS);
3428 	} else {
3429 		if (!isset(&sc->offload_map, pi->port_id))
3430 			return (0);
3431 
3432 		KASSERT(sc->flags & TOM_INIT_DONE,
3433 		    ("%s: TOM never initialized?", __func__));
3434 		clrbit(&sc->offload_map, pi->port_id);
3435 	}
3436 
3437 	return (0);
3438 }
3439 
3440 /*
3441  * Add an upper layer driver to the global list.
3442  */
3443 int
3444 t3_register_uld(struct uld_info *ui)
3445 {
3446 	int rc = 0;
3447 	struct uld_info *u;
3448 
3449 	mtx_lock(&t3_uld_list_lock);
3450 	SLIST_FOREACH(u, &t3_uld_list, link) {
3451 	    if (u->uld_id == ui->uld_id) {
3452 		    rc = EEXIST;
3453 		    goto done;
3454 	    }
3455 	}
3456 
3457 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3458 	ui->refcount = 0;
3459 done:
3460 	mtx_unlock(&t3_uld_list_lock);
3461 	return (rc);
3462 }
3463 
3464 int
3465 t3_unregister_uld(struct uld_info *ui)
3466 {
3467 	int rc = EINVAL;
3468 	struct uld_info *u;
3469 
3470 	mtx_lock(&t3_uld_list_lock);
3471 
3472 	SLIST_FOREACH(u, &t3_uld_list, link) {
3473 	    if (u == ui) {
3474 		    if (ui->refcount > 0) {
3475 			    rc = EBUSY;
3476 			    goto done;
3477 		    }
3478 
3479 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3480 		    rc = 0;
3481 		    goto done;
3482 	    }
3483 	}
3484 done:
3485 	mtx_unlock(&t3_uld_list_lock);
3486 	return (rc);
3487 }
3488 
3489 int
3490 t3_activate_uld(struct adapter *sc, int id)
3491 {
3492 	int rc = EAGAIN;
3493 	struct uld_info *ui;
3494 
3495 	mtx_lock(&t3_uld_list_lock);
3496 
3497 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3498 		if (ui->uld_id == id) {
3499 			rc = ui->activate(sc);
3500 			if (rc == 0)
3501 				ui->refcount++;
3502 			goto done;
3503 		}
3504 	}
3505 done:
3506 	mtx_unlock(&t3_uld_list_lock);
3507 
3508 	return (rc);
3509 }
3510 
3511 int
3512 t3_deactivate_uld(struct adapter *sc, int id)
3513 {
3514 	int rc = EINVAL;
3515 	struct uld_info *ui;
3516 
3517 	mtx_lock(&t3_uld_list_lock);
3518 
3519 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3520 		if (ui->uld_id == id) {
3521 			rc = ui->deactivate(sc);
3522 			if (rc == 0)
3523 				ui->refcount--;
3524 			goto done;
3525 		}
3526 	}
3527 done:
3528 	mtx_unlock(&t3_uld_list_lock);
3529 
3530 	return (rc);
3531 }
3532 
3533 static int
3534 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3535     struct mbuf *m)
3536 {
3537 	m_freem(m);
3538 	return (EDOOFUS);
3539 }
3540 
3541 int
3542 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3543 {
3544 	uintptr_t *loc, new;
3545 
3546 	if (opcode >= NUM_CPL_HANDLERS)
3547 		return (EINVAL);
3548 
3549 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3550 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3551 	atomic_store_rel_ptr(loc, new);
3552 
3553 	return (0);
3554 }
3555 #endif
3556 
3557 static int
3558 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3559 {
3560 	int rc = 0;
3561 
3562 	switch (cmd) {
3563 	case MOD_LOAD:
3564 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3565 		SLIST_INIT(&t3_list);
3566 #ifdef TCP_OFFLOAD
3567 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3568 		SLIST_INIT(&t3_uld_list);
3569 #endif
3570 		break;
3571 
3572 	case MOD_UNLOAD:
3573 #ifdef TCP_OFFLOAD
3574 		mtx_lock(&t3_uld_list_lock);
3575 		if (!SLIST_EMPTY(&t3_uld_list)) {
3576 			rc = EBUSY;
3577 			mtx_unlock(&t3_uld_list_lock);
3578 			break;
3579 		}
3580 		mtx_unlock(&t3_uld_list_lock);
3581 		mtx_destroy(&t3_uld_list_lock);
3582 #endif
3583 		mtx_lock(&t3_list_lock);
3584 		if (!SLIST_EMPTY(&t3_list)) {
3585 			rc = EBUSY;
3586 			mtx_unlock(&t3_list_lock);
3587 			break;
3588 		}
3589 		mtx_unlock(&t3_list_lock);
3590 		mtx_destroy(&t3_list_lock);
3591 		break;
3592 	}
3593 
3594 	return (rc);
3595 }
3596 
3597 #ifdef DEBUGNET
3598 static void
3599 cxgb_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3600 {
3601 	struct port_info *pi;
3602 	adapter_t *adap;
3603 
3604 	pi = if_getsoftc(ifp);
3605 	adap = pi->adapter;
3606 	ADAPTER_LOCK(adap);
3607 	*nrxr = adap->nqsets;
3608 	*ncl = adap->sge.qs[0].fl[1].size;
3609 	*clsize = adap->sge.qs[0].fl[1].buf_size;
3610 	ADAPTER_UNLOCK(adap);
3611 }
3612 
3613 static void
3614 cxgb_debugnet_event(struct ifnet *ifp, enum debugnet_ev event)
3615 {
3616 	struct port_info *pi;
3617 	struct sge_qset *qs;
3618 	int i;
3619 
3620 	pi = if_getsoftc(ifp);
3621 	if (event == DEBUGNET_START)
3622 		for (i = 0; i < pi->adapter->nqsets; i++) {
3623 			qs = &pi->adapter->sge.qs[i];
3624 
3625 			/* Need to reinit after debugnet_mbuf_start(). */
3626 			qs->fl[0].zone = zone_pack;
3627 			qs->fl[1].zone = zone_clust;
3628 			qs->lro.enabled = 0;
3629 		}
3630 }
3631 
3632 static int
3633 cxgb_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
3634 {
3635 	struct port_info *pi;
3636 	struct sge_qset *qs;
3637 
3638 	pi = if_getsoftc(ifp);
3639 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3640 	    IFF_DRV_RUNNING)
3641 		return (ENOENT);
3642 
3643 	qs = &pi->adapter->sge.qs[pi->first_qset];
3644 	return (cxgb_debugnet_encap(qs, &m));
3645 }
3646 
3647 static int
3648 cxgb_debugnet_poll(struct ifnet *ifp, int count)
3649 {
3650 	struct port_info *pi;
3651 	adapter_t *adap;
3652 	int i;
3653 
3654 	pi = if_getsoftc(ifp);
3655 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3656 		return (ENOENT);
3657 
3658 	adap = pi->adapter;
3659 	for (i = 0; i < adap->nqsets; i++)
3660 		(void)cxgb_debugnet_poll_rx(adap, &adap->sge.qs[i]);
3661 	(void)cxgb_debugnet_poll_tx(&adap->sge.qs[pi->first_qset]);
3662 	return (0);
3663 }
3664 #endif /* DEBUGNET */
3665