xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 3157ba21)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 static int alloc_filters(struct adapter *);
103 static int setup_hw_filters(struct adapter *);
104 static int set_filter(struct adapter *, int, const struct filter_info *);
105 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
106     unsigned int, u64, u64);
107 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
108     unsigned int, u64, u64);
109 
110 /* Attachment glue for the PCI controller end of the device.  Each port of
111  * the device is attached separately, as defined later.
112  */
113 static int cxgb_controller_probe(device_t);
114 static int cxgb_controller_attach(device_t);
115 static int cxgb_controller_detach(device_t);
116 static void cxgb_free(struct adapter *);
117 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118     unsigned int end);
119 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
120 static int cxgb_get_regs_len(void);
121 static int offload_open(struct port_info *pi);
122 static void touch_bars(device_t dev);
123 static int offload_close(struct t3cdev *tdev);
124 static void cxgb_update_mac_settings(struct port_info *p);
125 
126 static device_method_t cxgb_controller_methods[] = {
127 	DEVMETHOD(device_probe,		cxgb_controller_probe),
128 	DEVMETHOD(device_attach,	cxgb_controller_attach),
129 	DEVMETHOD(device_detach,	cxgb_controller_detach),
130 
131 	/* bus interface */
132 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
133 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
134 
135 	{ 0, 0 }
136 };
137 
138 static driver_t cxgb_controller_driver = {
139 	"cxgbc",
140 	cxgb_controller_methods,
141 	sizeof(struct adapter)
142 };
143 
144 static devclass_t	cxgb_controller_devclass;
145 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146 
147 /*
148  * Attachment glue for the ports.  Attachment is done directly to the
149  * controller device.
150  */
151 static int cxgb_port_probe(device_t);
152 static int cxgb_port_attach(device_t);
153 static int cxgb_port_detach(device_t);
154 
155 static device_method_t cxgb_port_methods[] = {
156 	DEVMETHOD(device_probe,		cxgb_port_probe),
157 	DEVMETHOD(device_attach,	cxgb_port_attach),
158 	DEVMETHOD(device_detach,	cxgb_port_detach),
159 	{ 0, 0 }
160 };
161 
162 static driver_t cxgb_port_driver = {
163 	"cxgb",
164 	cxgb_port_methods,
165 	0
166 };
167 
168 static d_ioctl_t cxgb_extension_ioctl;
169 static d_open_t cxgb_extension_open;
170 static d_close_t cxgb_extension_close;
171 
172 static struct cdevsw cxgb_cdevsw = {
173        .d_version =    D_VERSION,
174        .d_flags =      0,
175        .d_open =       cxgb_extension_open,
176        .d_close =      cxgb_extension_close,
177        .d_ioctl =      cxgb_extension_ioctl,
178        .d_name =       "cxgb",
179 };
180 
181 static devclass_t	cxgb_port_devclass;
182 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183 
184 /*
185  * The driver uses the best interrupt scheme available on a platform in the
186  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
187  * of these schemes the driver may consider as follows:
188  *
189  * msi = 2: choose from among all three options
190  * msi = 1 : only consider MSI and pin interrupts
191  * msi = 0: force pin interrupts
192  */
193 static int msi_allowed = 2;
194 
195 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
196 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
197 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
198     "MSI-X, MSI, INTx selector");
199 
200 /*
201  * The driver enables offload as a default.
202  * To disable it, use ofld_disable = 1.
203  */
204 static int ofld_disable = 0;
205 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
206 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
207     "disable ULP offload");
208 
209 /*
210  * The driver uses an auto-queue algorithm by default.
211  * To disable it and force a single queue-set per port, use multiq = 0
212  */
213 static int multiq = 1;
214 TUNABLE_INT("hw.cxgb.multiq", &multiq);
215 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
216     "use min(ncpus/ports, 8) queue-sets per port");
217 
218 /*
219  * By default the driver will not update the firmware unless
220  * it was compiled against a newer version
221  *
222  */
223 static int force_fw_update = 0;
224 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
225 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
226     "update firmware even if up to date");
227 
228 int cxgb_use_16k_clusters = -1;
229 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
230 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
231     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
232 
233 /*
234  * Tune the size of the output queue.
235  */
236 int cxgb_snd_queue_len = IFQ_MAXLEN;
237 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
238 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
239     &cxgb_snd_queue_len, 0, "send queue size ");
240 
241 static int nfilters = -1;
242 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
243 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
244     &nfilters, 0, "max number of entries in the filter table");
245 
246 enum {
247 	MAX_TXQ_ENTRIES      = 16384,
248 	MAX_CTRL_TXQ_ENTRIES = 1024,
249 	MAX_RSPQ_ENTRIES     = 16384,
250 	MAX_RX_BUFFERS       = 16384,
251 	MAX_RX_JUMBO_BUFFERS = 16384,
252 	MIN_TXQ_ENTRIES      = 4,
253 	MIN_CTRL_TXQ_ENTRIES = 4,
254 	MIN_RSPQ_ENTRIES     = 32,
255 	MIN_FL_ENTRIES       = 32,
256 	MIN_FL_JUMBO_ENTRIES = 32
257 };
258 
259 struct filter_info {
260 	u32 sip;
261 	u32 sip_mask;
262 	u32 dip;
263 	u16 sport;
264 	u16 dport;
265 	u32 vlan:12;
266 	u32 vlan_prio:3;
267 	u32 mac_hit:1;
268 	u32 mac_idx:4;
269 	u32 mac_vld:1;
270 	u32 pkt_type:2;
271 	u32 report_filter_id:1;
272 	u32 pass:1;
273 	u32 rss:1;
274 	u32 qset:3;
275 	u32 locked:1;
276 	u32 valid:1;
277 };
278 
279 enum { FILTER_NO_VLAN_PRI = 7 };
280 
281 #define EEPROM_MAGIC 0x38E2F10C
282 
283 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
284 
285 /* Table for probing the cards.  The desc field isn't actually used */
286 struct cxgb_ident {
287 	uint16_t	vendor;
288 	uint16_t	device;
289 	int		index;
290 	char		*desc;
291 } cxgb_identifiers[] = {
292 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
305 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
306 	{0, 0, 0, NULL}
307 };
308 
309 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
310 
311 
312 static __inline char
313 t3rev2char(struct adapter *adapter)
314 {
315 	char rev = 'z';
316 
317 	switch(adapter->params.rev) {
318 	case T3_REV_A:
319 		rev = 'a';
320 		break;
321 	case T3_REV_B:
322 	case T3_REV_B2:
323 		rev = 'b';
324 		break;
325 	case T3_REV_C:
326 		rev = 'c';
327 		break;
328 	}
329 	return rev;
330 }
331 
332 static struct cxgb_ident *
333 cxgb_get_ident(device_t dev)
334 {
335 	struct cxgb_ident *id;
336 
337 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
338 		if ((id->vendor == pci_get_vendor(dev)) &&
339 		    (id->device == pci_get_device(dev))) {
340 			return (id);
341 		}
342 	}
343 	return (NULL);
344 }
345 
346 static const struct adapter_info *
347 cxgb_get_adapter_info(device_t dev)
348 {
349 	struct cxgb_ident *id;
350 	const struct adapter_info *ai;
351 
352 	id = cxgb_get_ident(dev);
353 	if (id == NULL)
354 		return (NULL);
355 
356 	ai = t3_get_adapter_info(id->index);
357 
358 	return (ai);
359 }
360 
361 static int
362 cxgb_controller_probe(device_t dev)
363 {
364 	const struct adapter_info *ai;
365 	char *ports, buf[80];
366 	int nports;
367 
368 	ai = cxgb_get_adapter_info(dev);
369 	if (ai == NULL)
370 		return (ENXIO);
371 
372 	nports = ai->nports0 + ai->nports1;
373 	if (nports == 1)
374 		ports = "port";
375 	else
376 		ports = "ports";
377 
378 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
379 	device_set_desc_copy(dev, buf);
380 	return (BUS_PROBE_DEFAULT);
381 }
382 
383 #define FW_FNAME "cxgb_t3fw"
384 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
385 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
386 
387 static int
388 upgrade_fw(adapter_t *sc)
389 {
390 	const struct firmware *fw;
391 	int status;
392 	u32 vers;
393 
394 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
395 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
396 		return (ENOENT);
397 	} else
398 		device_printf(sc->dev, "installing firmware on card\n");
399 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
400 
401 	if (status != 0) {
402 		device_printf(sc->dev, "failed to install firmware: %d\n",
403 		    status);
404 	} else {
405 		t3_get_fw_version(sc, &vers);
406 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
407 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
408 		    G_FW_VERSION_MICRO(vers));
409 	}
410 
411 	firmware_put(fw, FIRMWARE_UNLOAD);
412 
413 	return (status);
414 }
415 
416 /*
417  * The cxgb_controller_attach function is responsible for the initial
418  * bringup of the device.  Its responsibilities include:
419  *
420  *  1. Determine if the device supports MSI or MSI-X.
421  *  2. Allocate bus resources so that we can access the Base Address Register
422  *  3. Create and initialize mutexes for the controller and its control
423  *     logic such as SGE and MDIO.
424  *  4. Call hardware specific setup routine for the adapter as a whole.
425  *  5. Allocate the BAR for doing MSI-X.
426  *  6. Setup the line interrupt iff MSI-X is not supported.
427  *  7. Create the driver's taskq.
428  *  8. Start one task queue service thread.
429  *  9. Check if the firmware and SRAM are up-to-date.  They will be
430  *     auto-updated later (before FULL_INIT_DONE), if required.
431  * 10. Create a child device for each MAC (port)
432  * 11. Initialize T3 private state.
433  * 12. Trigger the LED
434  * 13. Setup offload iff supported.
435  * 14. Reset/restart the tick callout.
436  * 15. Attach sysctls
437  *
438  * NOTE: Any modification or deviation from this list MUST be reflected in
439  * the above comment.  Failure to do so will result in problems on various
440  * error conditions including link flapping.
441  */
442 static int
443 cxgb_controller_attach(device_t dev)
444 {
445 	device_t child;
446 	const struct adapter_info *ai;
447 	struct adapter *sc;
448 	int i, error = 0;
449 	uint32_t vers;
450 	int port_qsets = 1;
451 	int msi_needed, reg;
452 	char buf[80];
453 
454 	sc = device_get_softc(dev);
455 	sc->dev = dev;
456 	sc->msi_count = 0;
457 	ai = cxgb_get_adapter_info(dev);
458 
459 	/* find the PCIe link width and set max read request to 4KB*/
460 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
461 		uint16_t lnk, pectl;
462 		lnk = pci_read_config(dev, reg + 0x12, 2);
463 		sc->link_width = (lnk >> 4) & 0x3f;
464 
465 		pectl = pci_read_config(dev, reg + 0x8, 2);
466 		pectl = (pectl & ~0x7000) | (5 << 12);
467 		pci_write_config(dev, reg + 0x8, pectl, 2);
468 	}
469 
470 	if (sc->link_width != 0 && sc->link_width <= 4 &&
471 	    (ai->nports0 + ai->nports1) <= 2) {
472 		device_printf(sc->dev,
473 		    "PCIe x%d Link, expect reduced performance\n",
474 		    sc->link_width);
475 	}
476 
477 	touch_bars(dev);
478 	pci_enable_busmaster(dev);
479 	/*
480 	 * Allocate the registers and make them available to the driver.
481 	 * The registers that we care about for NIC mode are in BAR 0
482 	 */
483 	sc->regs_rid = PCIR_BAR(0);
484 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
485 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
486 		device_printf(dev, "Cannot allocate BAR region 0\n");
487 		return (ENXIO);
488 	}
489 	sc->udbs_rid = PCIR_BAR(2);
490 	sc->udbs_res = NULL;
491 	if (is_offload(sc) &&
492 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
493 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
494 		device_printf(dev, "Cannot allocate BAR region 1\n");
495 		error = ENXIO;
496 		goto out;
497 	}
498 
499 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
500 	    device_get_unit(dev));
501 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
502 
503 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
504 	    device_get_unit(dev));
505 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
506 	    device_get_unit(dev));
507 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
508 	    device_get_unit(dev));
509 
510 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
511 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
512 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
513 
514 	sc->bt = rman_get_bustag(sc->regs_res);
515 	sc->bh = rman_get_bushandle(sc->regs_res);
516 	sc->mmio_len = rman_get_size(sc->regs_res);
517 
518 	for (i = 0; i < MAX_NPORTS; i++)
519 		sc->port[i].adapter = sc;
520 
521 	if (t3_prep_adapter(sc, ai, 1) < 0) {
522 		printf("prep adapter failed\n");
523 		error = ENODEV;
524 		goto out;
525 	}
526         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
527 	 * enough messages for the queue sets.  If that fails, try falling
528 	 * back to MSI.  If that fails, then try falling back to the legacy
529 	 * interrupt pin model.
530 	 */
531 	sc->msix_regs_rid = 0x20;
532 	if ((msi_allowed >= 2) &&
533 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
534 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
535 
536 		if (multiq)
537 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
538 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
539 
540 		if (pci_msix_count(dev) == 0 ||
541 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
542 		    sc->msi_count != msi_needed) {
543 			device_printf(dev, "alloc msix failed - "
544 				      "msi_count=%d, msi_needed=%d, err=%d; "
545 				      "will try MSI\n", sc->msi_count,
546 				      msi_needed, error);
547 			sc->msi_count = 0;
548 			port_qsets = 1;
549 			pci_release_msi(dev);
550 			bus_release_resource(dev, SYS_RES_MEMORY,
551 			    sc->msix_regs_rid, sc->msix_regs_res);
552 			sc->msix_regs_res = NULL;
553 		} else {
554 			sc->flags |= USING_MSIX;
555 			sc->cxgb_intr = cxgb_async_intr;
556 			device_printf(dev,
557 				      "using MSI-X interrupts (%u vectors)\n",
558 				      sc->msi_count);
559 		}
560 	}
561 
562 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
563 		sc->msi_count = 1;
564 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
565 			device_printf(dev, "alloc msi failed - "
566 				      "err=%d; will try INTx\n", error);
567 			sc->msi_count = 0;
568 			port_qsets = 1;
569 			pci_release_msi(dev);
570 		} else {
571 			sc->flags |= USING_MSI;
572 			sc->cxgb_intr = t3_intr_msi;
573 			device_printf(dev, "using MSI interrupts\n");
574 		}
575 	}
576 	if (sc->msi_count == 0) {
577 		device_printf(dev, "using line interrupts\n");
578 		sc->cxgb_intr = t3b_intr;
579 	}
580 
581 	/* Create a private taskqueue thread for handling driver events */
582 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
583 	    taskqueue_thread_enqueue, &sc->tq);
584 	if (sc->tq == NULL) {
585 		device_printf(dev, "failed to allocate controller task queue\n");
586 		goto out;
587 	}
588 
589 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
590 	    device_get_nameunit(dev));
591 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
592 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
593 
594 
595 	/* Create a periodic callout for checking adapter status */
596 	callout_init(&sc->cxgb_tick_ch, TRUE);
597 
598 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
599 		/*
600 		 * Warn user that a firmware update will be attempted in init.
601 		 */
602 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
603 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
604 		sc->flags &= ~FW_UPTODATE;
605 	} else {
606 		sc->flags |= FW_UPTODATE;
607 	}
608 
609 	if (t3_check_tpsram_version(sc) < 0) {
610 		/*
611 		 * Warn user that a firmware update will be attempted in init.
612 		 */
613 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
614 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
615 		sc->flags &= ~TPS_UPTODATE;
616 	} else {
617 		sc->flags |= TPS_UPTODATE;
618 	}
619 
620 	/*
621 	 * Create a child device for each MAC.  The ethernet attachment
622 	 * will be done in these children.
623 	 */
624 	for (i = 0; i < (sc)->params.nports; i++) {
625 		struct port_info *pi;
626 
627 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
628 			device_printf(dev, "failed to add child port\n");
629 			error = EINVAL;
630 			goto out;
631 		}
632 		pi = &sc->port[i];
633 		pi->adapter = sc;
634 		pi->nqsets = port_qsets;
635 		pi->first_qset = i*port_qsets;
636 		pi->port_id = i;
637 		pi->tx_chan = i >= ai->nports0;
638 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
639 		sc->rxpkt_map[pi->txpkt_intf] = i;
640 		sc->port[i].tx_chan = i >= ai->nports0;
641 		sc->portdev[i] = child;
642 		device_set_softc(child, pi);
643 	}
644 	if ((error = bus_generic_attach(dev)) != 0)
645 		goto out;
646 
647 	/* initialize sge private state */
648 	t3_sge_init_adapter(sc);
649 
650 	t3_led_ready(sc);
651 
652 	cxgb_offload_init();
653 	if (is_offload(sc)) {
654 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
655 		cxgb_adapter_ofld(sc);
656         }
657 	error = t3_get_fw_version(sc, &vers);
658 	if (error)
659 		goto out;
660 
661 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663 	    G_FW_VERSION_MICRO(vers));
664 
665 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666 		 ai->desc, is_offload(sc) ? "R" : "",
667 		 sc->params.vpd.ec, sc->params.vpd.sn);
668 	device_set_desc_copy(dev, buf);
669 
670 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673 
674 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
676 	t3_add_attach_sysctls(sc);
677 out:
678 	if (error)
679 		cxgb_free(sc);
680 
681 	return (error);
682 }
683 
684 /*
685  * The cxgb_controller_detach routine is called with the device is
686  * unloaded from the system.
687  */
688 
689 static int
690 cxgb_controller_detach(device_t dev)
691 {
692 	struct adapter *sc;
693 
694 	sc = device_get_softc(dev);
695 
696 	cxgb_free(sc);
697 
698 	return (0);
699 }
700 
701 /*
702  * The cxgb_free() is called by the cxgb_controller_detach() routine
703  * to tear down the structures that were built up in
704  * cxgb_controller_attach(), and should be the final piece of work
705  * done when fully unloading the driver.
706  *
707  *
708  *  1. Shutting down the threads started by the cxgb_controller_attach()
709  *     routine.
710  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
711  *  3. Detaching all of the port devices created during the
712  *     cxgb_controller_attach() routine.
713  *  4. Removing the device children created via cxgb_controller_attach().
714  *  5. Releasing PCI resources associated with the device.
715  *  6. Turning off the offload support, iff it was turned on.
716  *  7. Destroying the mutexes created in cxgb_controller_attach().
717  *
718  */
719 static void
720 cxgb_free(struct adapter *sc)
721 {
722 	int i;
723 
724 	ADAPTER_LOCK(sc);
725 	sc->flags |= CXGB_SHUTDOWN;
726 	ADAPTER_UNLOCK(sc);
727 
728 	/*
729 	 * Make sure all child devices are gone.
730 	 */
731 	bus_generic_detach(sc->dev);
732 	for (i = 0; i < (sc)->params.nports; i++) {
733 		if (sc->portdev[i] &&
734 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
735 			device_printf(sc->dev, "failed to delete child port\n");
736 	}
737 
738 	/*
739 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
740 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
741 	 * all open devices have been closed.
742 	 */
743 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
744 					   __func__, sc->open_device_map));
745 	for (i = 0; i < sc->params.nports; i++) {
746 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
747 						  __func__, i));
748 	}
749 
750 	/*
751 	 * Finish off the adapter's callouts.
752 	 */
753 	callout_drain(&sc->cxgb_tick_ch);
754 	callout_drain(&sc->sge_timer_ch);
755 
756 	/*
757 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
758 	 * sysctls are cleaned up by the kernel linker.
759 	 */
760 	if (sc->flags & FULL_INIT_DONE) {
761  		t3_free_sge_resources(sc);
762  		sc->flags &= ~FULL_INIT_DONE;
763  	}
764 
765 	/*
766 	 * Release all interrupt resources.
767 	 */
768 	cxgb_teardown_interrupts(sc);
769 	if (sc->flags & (USING_MSI | USING_MSIX)) {
770 		device_printf(sc->dev, "releasing msi message(s)\n");
771 		pci_release_msi(sc->dev);
772 	} else {
773 		device_printf(sc->dev, "no msi message to release\n");
774 	}
775 
776 	if (sc->msix_regs_res != NULL) {
777 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
778 		    sc->msix_regs_res);
779 	}
780 
781 	/*
782 	 * Free the adapter's taskqueue.
783 	 */
784 	if (sc->tq != NULL) {
785 		taskqueue_free(sc->tq);
786 		sc->tq = NULL;
787 	}
788 
789 	if (is_offload(sc)) {
790 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
791 		cxgb_adapter_unofld(sc);
792 	}
793 
794 #ifdef notyet
795 	if (sc->flags & CXGB_OFLD_INIT)
796 		cxgb_offload_deactivate(sc);
797 #endif
798 	free(sc->filters, M_DEVBUF);
799 	t3_sge_free(sc);
800 
801 	cxgb_offload_exit();
802 
803 	if (sc->udbs_res != NULL)
804 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
805 		    sc->udbs_res);
806 
807 	if (sc->regs_res != NULL)
808 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
809 		    sc->regs_res);
810 
811 	MTX_DESTROY(&sc->mdio_lock);
812 	MTX_DESTROY(&sc->sge.reg_lock);
813 	MTX_DESTROY(&sc->elmer_lock);
814 	ADAPTER_LOCK_DEINIT(sc);
815 }
816 
817 /**
818  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
819  *	@sc: the controller softc
820  *
821  *	Determines how many sets of SGE queues to use and initializes them.
822  *	We support multiple queue sets per port if we have MSI-X, otherwise
823  *	just one queue set per port.
824  */
825 static int
826 setup_sge_qsets(adapter_t *sc)
827 {
828 	int i, j, err, irq_idx = 0, qset_idx = 0;
829 	u_int ntxq = SGE_TXQ_PER_SET;
830 
831 	if ((err = t3_sge_alloc(sc)) != 0) {
832 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
833 		return (err);
834 	}
835 
836 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
837 		irq_idx = -1;
838 
839 	for (i = 0; i < (sc)->params.nports; i++) {
840 		struct port_info *pi = &sc->port[i];
841 
842 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
843 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
844 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
845 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
846 			if (err) {
847 				t3_free_sge_resources(sc);
848 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
849 				    err);
850 				return (err);
851 			}
852 		}
853 	}
854 
855 	return (0);
856 }
857 
858 static void
859 cxgb_teardown_interrupts(adapter_t *sc)
860 {
861 	int i;
862 
863 	for (i = 0; i < SGE_QSETS; i++) {
864 		if (sc->msix_intr_tag[i] == NULL) {
865 
866 			/* Should have been setup fully or not at all */
867 			KASSERT(sc->msix_irq_res[i] == NULL &&
868 				sc->msix_irq_rid[i] == 0,
869 				("%s: half-done interrupt (%d).", __func__, i));
870 
871 			continue;
872 		}
873 
874 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
875 				  sc->msix_intr_tag[i]);
876 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
877 				     sc->msix_irq_res[i]);
878 
879 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
880 		sc->msix_irq_rid[i] = 0;
881 	}
882 
883 	if (sc->intr_tag) {
884 		KASSERT(sc->irq_res != NULL,
885 			("%s: half-done interrupt.", __func__));
886 
887 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
888 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
889 				     sc->irq_res);
890 
891 		sc->irq_res = sc->intr_tag = NULL;
892 		sc->irq_rid = 0;
893 	}
894 }
895 
896 static int
897 cxgb_setup_interrupts(adapter_t *sc)
898 {
899 	struct resource *res;
900 	void *tag;
901 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
902 
903 	sc->irq_rid = intr_flag ? 1 : 0;
904 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
905 					     RF_SHAREABLE | RF_ACTIVE);
906 	if (sc->irq_res == NULL) {
907 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
908 			      intr_flag, sc->irq_rid);
909 		err = EINVAL;
910 		sc->irq_rid = 0;
911 	} else {
912 		err = bus_setup_intr(sc->dev, sc->irq_res,
913 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
914 		    sc->cxgb_intr, sc, &sc->intr_tag);
915 
916 		if (err) {
917 			device_printf(sc->dev,
918 				      "Cannot set up interrupt (%x, %u, %d)\n",
919 				      intr_flag, sc->irq_rid, err);
920 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
921 					     sc->irq_res);
922 			sc->irq_res = sc->intr_tag = NULL;
923 			sc->irq_rid = 0;
924 		}
925 	}
926 
927 	/* That's all for INTx or MSI */
928 	if (!(intr_flag & USING_MSIX) || err)
929 		return (err);
930 
931 	for (i = 0; i < sc->msi_count - 1; i++) {
932 		rid = i + 2;
933 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
934 					     RF_SHAREABLE | RF_ACTIVE);
935 		if (res == NULL) {
936 			device_printf(sc->dev, "Cannot allocate interrupt "
937 				      "for message %d\n", rid);
938 			err = EINVAL;
939 			break;
940 		}
941 
942 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
943 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
944 		if (err) {
945 			device_printf(sc->dev, "Cannot set up interrupt "
946 				      "for message %d (%d)\n", rid, err);
947 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
948 			break;
949 		}
950 
951 		sc->msix_irq_rid[i] = rid;
952 		sc->msix_irq_res[i] = res;
953 		sc->msix_intr_tag[i] = tag;
954 	}
955 
956 	if (err)
957 		cxgb_teardown_interrupts(sc);
958 
959 	return (err);
960 }
961 
962 
963 static int
964 cxgb_port_probe(device_t dev)
965 {
966 	struct port_info *p;
967 	char buf[80];
968 	const char *desc;
969 
970 	p = device_get_softc(dev);
971 	desc = p->phy.desc;
972 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
973 	device_set_desc_copy(dev, buf);
974 	return (0);
975 }
976 
977 
978 static int
979 cxgb_makedev(struct port_info *pi)
980 {
981 
982 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
983 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
984 
985 	if (pi->port_cdev == NULL)
986 		return (ENOMEM);
987 
988 	pi->port_cdev->si_drv1 = (void *)pi;
989 
990 	return (0);
991 }
992 
993 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
994     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
995     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
996 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
997 
998 static int
999 cxgb_port_attach(device_t dev)
1000 {
1001 	struct port_info *p;
1002 	struct ifnet *ifp;
1003 	int err;
1004 	struct adapter *sc;
1005 
1006 	p = device_get_softc(dev);
1007 	sc = p->adapter;
1008 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1009 	    device_get_unit(device_get_parent(dev)), p->port_id);
1010 	PORT_LOCK_INIT(p, p->lockbuf);
1011 
1012 	/* Allocate an ifnet object and set it up */
1013 	ifp = p->ifp = if_alloc(IFT_ETHER);
1014 	if (ifp == NULL) {
1015 		device_printf(dev, "Cannot allocate ifnet\n");
1016 		return (ENOMEM);
1017 	}
1018 
1019 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1020 	ifp->if_init = cxgb_init;
1021 	ifp->if_softc = p;
1022 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1023 	ifp->if_ioctl = cxgb_ioctl;
1024 	ifp->if_start = cxgb_start;
1025 
1026 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1027 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1028 	IFQ_SET_READY(&ifp->if_snd);
1029 
1030 	ifp->if_capabilities = CXGB_CAP;
1031 	ifp->if_capenable = CXGB_CAP_ENABLE;
1032 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1033 
1034 	/*
1035 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1036 	 */
1037 	if (sc->params.nports > 2) {
1038 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1039 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040 		ifp->if_hwassist &= ~CSUM_TSO;
1041 	}
1042 
1043 	ether_ifattach(ifp, p->hw_addr);
1044 	ifp->if_transmit = cxgb_transmit;
1045 	ifp->if_qflush = cxgb_qflush;
1046 
1047 #ifdef DEFAULT_JUMBO
1048 	if (sc->params.nports <= 2)
1049 		ifp->if_mtu = ETHERMTU_JUMBO;
1050 #endif
1051 	if ((err = cxgb_makedev(p)) != 0) {
1052 		printf("makedev failed %d\n", err);
1053 		return (err);
1054 	}
1055 
1056 	/* Create a list of media supported by this port */
1057 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1058 	    cxgb_media_status);
1059 	cxgb_build_medialist(p);
1060 
1061 	t3_sge_init_port(p);
1062 
1063 	return (err);
1064 }
1065 
1066 /*
1067  * cxgb_port_detach() is called via the device_detach methods when
1068  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1069  * removing the device from the view of the kernel, i.e. from all
1070  * interfaces lists etc.  This routine is only called when the driver is
1071  * being unloaded, not when the link goes down.
1072  */
1073 static int
1074 cxgb_port_detach(device_t dev)
1075 {
1076 	struct port_info *p;
1077 	struct adapter *sc;
1078 	int i;
1079 
1080 	p = device_get_softc(dev);
1081 	sc = p->adapter;
1082 
1083 	/* Tell cxgb_ioctl and if_init that the port is going away */
1084 	ADAPTER_LOCK(sc);
1085 	SET_DOOMED(p);
1086 	wakeup(&sc->flags);
1087 	while (IS_BUSY(sc))
1088 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1089 	SET_BUSY(sc);
1090 	ADAPTER_UNLOCK(sc);
1091 
1092 	if (p->port_cdev != NULL)
1093 		destroy_dev(p->port_cdev);
1094 
1095 	cxgb_uninit_synchronized(p);
1096 	ether_ifdetach(p->ifp);
1097 
1098 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1099 		struct sge_qset *qs = &sc->sge.qs[i];
1100 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1101 
1102 		callout_drain(&txq->txq_watchdog);
1103 		callout_drain(&txq->txq_timer);
1104 	}
1105 
1106 	PORT_LOCK_DEINIT(p);
1107 	if_free(p->ifp);
1108 	p->ifp = NULL;
1109 
1110 	ADAPTER_LOCK(sc);
1111 	CLR_BUSY(sc);
1112 	wakeup_one(&sc->flags);
1113 	ADAPTER_UNLOCK(sc);
1114 	return (0);
1115 }
1116 
1117 void
1118 t3_fatal_err(struct adapter *sc)
1119 {
1120 	u_int fw_status[4];
1121 
1122 	if (sc->flags & FULL_INIT_DONE) {
1123 		t3_sge_stop(sc);
1124 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1125 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1126 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1127 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1128 		t3_intr_disable(sc);
1129 	}
1130 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1131 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1132 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1133 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1134 }
1135 
1136 int
1137 t3_os_find_pci_capability(adapter_t *sc, int cap)
1138 {
1139 	device_t dev;
1140 	struct pci_devinfo *dinfo;
1141 	pcicfgregs *cfg;
1142 	uint32_t status;
1143 	uint8_t ptr;
1144 
1145 	dev = sc->dev;
1146 	dinfo = device_get_ivars(dev);
1147 	cfg = &dinfo->cfg;
1148 
1149 	status = pci_read_config(dev, PCIR_STATUS, 2);
1150 	if (!(status & PCIM_STATUS_CAPPRESENT))
1151 		return (0);
1152 
1153 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1154 	case 0:
1155 	case 1:
1156 		ptr = PCIR_CAP_PTR;
1157 		break;
1158 	case 2:
1159 		ptr = PCIR_CAP_PTR_2;
1160 		break;
1161 	default:
1162 		return (0);
1163 		break;
1164 	}
1165 	ptr = pci_read_config(dev, ptr, 1);
1166 
1167 	while (ptr != 0) {
1168 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1169 			return (ptr);
1170 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1171 	}
1172 
1173 	return (0);
1174 }
1175 
1176 int
1177 t3_os_pci_save_state(struct adapter *sc)
1178 {
1179 	device_t dev;
1180 	struct pci_devinfo *dinfo;
1181 
1182 	dev = sc->dev;
1183 	dinfo = device_get_ivars(dev);
1184 
1185 	pci_cfg_save(dev, dinfo, 0);
1186 	return (0);
1187 }
1188 
1189 int
1190 t3_os_pci_restore_state(struct adapter *sc)
1191 {
1192 	device_t dev;
1193 	struct pci_devinfo *dinfo;
1194 
1195 	dev = sc->dev;
1196 	dinfo = device_get_ivars(dev);
1197 
1198 	pci_cfg_restore(dev, dinfo);
1199 	return (0);
1200 }
1201 
1202 /**
1203  *	t3_os_link_changed - handle link status changes
1204  *	@sc: the adapter associated with the link change
1205  *	@port_id: the port index whose link status has changed
1206  *	@link_status: the new status of the link
1207  *	@speed: the new speed setting
1208  *	@duplex: the new duplex setting
1209  *	@fc: the new flow-control setting
1210  *
1211  *	This is the OS-dependent handler for link status changes.  The OS
1212  *	neutral handler takes care of most of the processing for these events,
1213  *	then calls this handler for any OS-specific processing.
1214  */
1215 void
1216 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1217      int duplex, int fc, int mac_was_reset)
1218 {
1219 	struct port_info *pi = &adapter->port[port_id];
1220 	struct ifnet *ifp = pi->ifp;
1221 
1222 	/* no race with detach, so ifp should always be good */
1223 	KASSERT(ifp, ("%s: if detached.", __func__));
1224 
1225 	/* Reapply mac settings if they were lost due to a reset */
1226 	if (mac_was_reset) {
1227 		PORT_LOCK(pi);
1228 		cxgb_update_mac_settings(pi);
1229 		PORT_UNLOCK(pi);
1230 	}
1231 
1232 	if (link_status) {
1233 		ifp->if_baudrate = IF_Mbps(speed);
1234 		if_link_state_change(ifp, LINK_STATE_UP);
1235 	} else
1236 		if_link_state_change(ifp, LINK_STATE_DOWN);
1237 }
1238 
1239 /**
1240  *	t3_os_phymod_changed - handle PHY module changes
1241  *	@phy: the PHY reporting the module change
1242  *	@mod_type: new module type
1243  *
1244  *	This is the OS-dependent handler for PHY module changes.  It is
1245  *	invoked when a PHY module is removed or inserted for any OS-specific
1246  *	processing.
1247  */
1248 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1249 {
1250 	static const char *mod_str[] = {
1251 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1252 	};
1253 	struct port_info *pi = &adap->port[port_id];
1254 	int mod = pi->phy.modtype;
1255 
1256 	if (mod != pi->media.ifm_cur->ifm_data)
1257 		cxgb_build_medialist(pi);
1258 
1259 	if (mod == phy_modtype_none)
1260 		if_printf(pi->ifp, "PHY module unplugged\n");
1261 	else {
1262 		KASSERT(mod < ARRAY_SIZE(mod_str),
1263 			("invalid PHY module type %d", mod));
1264 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1265 	}
1266 }
1267 
1268 /*
1269  * Interrupt-context handler for external (PHY) interrupts.
1270  */
1271 void
1272 t3_os_ext_intr_handler(adapter_t *sc)
1273 {
1274 	if (cxgb_debug)
1275 		printf("t3_os_ext_intr_handler\n");
1276 	/*
1277 	 * Schedule a task to handle external interrupts as they may be slow
1278 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1279 	 * interrupts in the meantime and let the task reenable them when
1280 	 * it's done.
1281 	 */
1282 	if (sc->slow_intr_mask) {
1283 		ADAPTER_LOCK(sc);
1284 		sc->slow_intr_mask &= ~F_T3DBG;
1285 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1286 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1287 		ADAPTER_UNLOCK(sc);
1288 	}
1289 }
1290 
1291 void
1292 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1293 {
1294 
1295 	/*
1296 	 * The ifnet might not be allocated before this gets called,
1297 	 * as this is called early on in attach by t3_prep_adapter
1298 	 * save the address off in the port structure
1299 	 */
1300 	if (cxgb_debug)
1301 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1302 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1303 }
1304 
1305 /*
1306  * Programs the XGMAC based on the settings in the ifnet.  These settings
1307  * include MTU, MAC address, mcast addresses, etc.
1308  */
1309 static void
1310 cxgb_update_mac_settings(struct port_info *p)
1311 {
1312 	struct ifnet *ifp = p->ifp;
1313 	struct t3_rx_mode rm;
1314 	struct cmac *mac = &p->mac;
1315 	int mtu, hwtagging;
1316 
1317 	PORT_LOCK_ASSERT_OWNED(p);
1318 
1319 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1320 
1321 	mtu = ifp->if_mtu;
1322 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1323 		mtu += ETHER_VLAN_ENCAP_LEN;
1324 
1325 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1326 
1327 	t3_mac_set_mtu(mac, mtu);
1328 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1329 	t3_mac_set_address(mac, 0, p->hw_addr);
1330 	t3_init_rx_mode(&rm, p);
1331 	t3_mac_set_rx_mode(mac, &rm);
1332 }
1333 
1334 
1335 static int
1336 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1337 			      unsigned long n)
1338 {
1339 	int attempts = 5;
1340 
1341 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1342 		if (!--attempts)
1343 			return (ETIMEDOUT);
1344 		t3_os_sleep(10);
1345 	}
1346 	return 0;
1347 }
1348 
1349 static int
1350 init_tp_parity(struct adapter *adap)
1351 {
1352 	int i;
1353 	struct mbuf *m;
1354 	struct cpl_set_tcb_field *greq;
1355 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1356 
1357 	t3_tp_set_offload_mode(adap, 1);
1358 
1359 	for (i = 0; i < 16; i++) {
1360 		struct cpl_smt_write_req *req;
1361 
1362 		m = m_gethdr(M_WAITOK, MT_DATA);
1363 		req = mtod(m, struct cpl_smt_write_req *);
1364 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1365 		memset(req, 0, sizeof(*req));
1366 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1368 		req->iff = i;
1369 		t3_mgmt_tx(adap, m);
1370 	}
1371 
1372 	for (i = 0; i < 2048; i++) {
1373 		struct cpl_l2t_write_req *req;
1374 
1375 		m = m_gethdr(M_WAITOK, MT_DATA);
1376 		req = mtod(m, struct cpl_l2t_write_req *);
1377 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1378 		memset(req, 0, sizeof(*req));
1379 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1380 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1381 		req->params = htonl(V_L2T_W_IDX(i));
1382 		t3_mgmt_tx(adap, m);
1383 	}
1384 
1385 	for (i = 0; i < 2048; i++) {
1386 		struct cpl_rte_write_req *req;
1387 
1388 		m = m_gethdr(M_WAITOK, MT_DATA);
1389 		req = mtod(m, struct cpl_rte_write_req *);
1390 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1391 		memset(req, 0, sizeof(*req));
1392 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1393 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1394 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1395 		t3_mgmt_tx(adap, m);
1396 	}
1397 
1398 	m = m_gethdr(M_WAITOK, MT_DATA);
1399 	greq = mtod(m, struct cpl_set_tcb_field *);
1400 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1401 	memset(greq, 0, sizeof(*greq));
1402 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1403 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1404 	greq->mask = htobe64(1);
1405 	t3_mgmt_tx(adap, m);
1406 
1407 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1408 	t3_tp_set_offload_mode(adap, 0);
1409 	return (i);
1410 }
1411 
1412 /**
1413  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1414  *	@adap: the adapter
1415  *
1416  *	Sets up RSS to distribute packets to multiple receive queues.  We
1417  *	configure the RSS CPU lookup table to distribute to the number of HW
1418  *	receive queues, and the response queue lookup table to narrow that
1419  *	down to the response queues actually configured for each port.
1420  *	We always configure the RSS mapping for two ports since the mapping
1421  *	table has plenty of entries.
1422  */
1423 static void
1424 setup_rss(adapter_t *adap)
1425 {
1426 	int i;
1427 	u_int nq[2];
1428 	uint8_t cpus[SGE_QSETS + 1];
1429 	uint16_t rspq_map[RSS_TABLE_SIZE];
1430 
1431 	for (i = 0; i < SGE_QSETS; ++i)
1432 		cpus[i] = i;
1433 	cpus[SGE_QSETS] = 0xff;
1434 
1435 	nq[0] = nq[1] = 0;
1436 	for_each_port(adap, i) {
1437 		const struct port_info *pi = adap2pinfo(adap, i);
1438 
1439 		nq[pi->tx_chan] += pi->nqsets;
1440 	}
1441 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1442 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1443 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1444 	}
1445 
1446 	/* Calculate the reverse RSS map table */
1447 	for (i = 0; i < SGE_QSETS; ++i)
1448 		adap->rrss_map[i] = 0xff;
1449 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1450 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1451 			adap->rrss_map[rspq_map[i]] = i;
1452 
1453 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1454 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1455 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1456 	              cpus, rspq_map);
1457 
1458 }
1459 
1460 /*
1461  * Sends an mbuf to an offload queue driver
1462  * after dealing with any active network taps.
1463  */
1464 static inline int
1465 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1466 {
1467 	int ret;
1468 
1469 	ret = t3_offload_tx(tdev, m);
1470 	return (ret);
1471 }
1472 
1473 static int
1474 write_smt_entry(struct adapter *adapter, int idx)
1475 {
1476 	struct port_info *pi = &adapter->port[idx];
1477 	struct cpl_smt_write_req *req;
1478 	struct mbuf *m;
1479 
1480 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1481 		return (ENOMEM);
1482 
1483 	req = mtod(m, struct cpl_smt_write_req *);
1484 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1485 
1486 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1487 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1488 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1489 	req->iff = idx;
1490 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1491 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1492 
1493 	m_set_priority(m, 1);
1494 
1495 	offload_tx(&adapter->tdev, m);
1496 
1497 	return (0);
1498 }
1499 
1500 static int
1501 init_smt(struct adapter *adapter)
1502 {
1503 	int i;
1504 
1505 	for_each_port(adapter, i)
1506 		write_smt_entry(adapter, i);
1507 	return 0;
1508 }
1509 
1510 static void
1511 init_port_mtus(adapter_t *adapter)
1512 {
1513 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1514 
1515 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1516 }
1517 
1518 static void
1519 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1520 			      int hi, int port)
1521 {
1522 	struct mbuf *m;
1523 	struct mngt_pktsched_wr *req;
1524 
1525 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1526 	if (m) {
1527 		req = mtod(m, struct mngt_pktsched_wr *);
1528 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1529 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1530 		req->sched = sched;
1531 		req->idx = qidx;
1532 		req->min = lo;
1533 		req->max = hi;
1534 		req->binding = port;
1535 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1536 		t3_mgmt_tx(adap, m);
1537 	}
1538 }
1539 
1540 static void
1541 bind_qsets(adapter_t *sc)
1542 {
1543 	int i, j;
1544 
1545 	for (i = 0; i < (sc)->params.nports; ++i) {
1546 		const struct port_info *pi = adap2pinfo(sc, i);
1547 
1548 		for (j = 0; j < pi->nqsets; ++j) {
1549 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1550 					  -1, pi->tx_chan);
1551 
1552 		}
1553 	}
1554 }
1555 
1556 static void
1557 update_tpeeprom(struct adapter *adap)
1558 {
1559 	const struct firmware *tpeeprom;
1560 
1561 	uint32_t version;
1562 	unsigned int major, minor;
1563 	int ret, len;
1564 	char rev, name[32];
1565 
1566 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1567 
1568 	major = G_TP_VERSION_MAJOR(version);
1569 	minor = G_TP_VERSION_MINOR(version);
1570 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1571 		return;
1572 
1573 	rev = t3rev2char(adap);
1574 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1575 
1576 	tpeeprom = firmware_get(name);
1577 	if (tpeeprom == NULL) {
1578 		device_printf(adap->dev,
1579 			      "could not load TP EEPROM: unable to load %s\n",
1580 			      name);
1581 		return;
1582 	}
1583 
1584 	len = tpeeprom->datasize - 4;
1585 
1586 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1587 	if (ret)
1588 		goto release_tpeeprom;
1589 
1590 	if (len != TP_SRAM_LEN) {
1591 		device_printf(adap->dev,
1592 			      "%s length is wrong len=%d expected=%d\n", name,
1593 			      len, TP_SRAM_LEN);
1594 		return;
1595 	}
1596 
1597 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1598 	    TP_SRAM_OFFSET);
1599 
1600 	if (!ret) {
1601 		device_printf(adap->dev,
1602 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1603 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1604 	} else
1605 		device_printf(adap->dev,
1606 			      "Protocol SRAM image update in EEPROM failed\n");
1607 
1608 release_tpeeprom:
1609 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1610 
1611 	return;
1612 }
1613 
1614 static int
1615 update_tpsram(struct adapter *adap)
1616 {
1617 	const struct firmware *tpsram;
1618 	int ret;
1619 	char rev, name[32];
1620 
1621 	rev = t3rev2char(adap);
1622 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1623 
1624 	update_tpeeprom(adap);
1625 
1626 	tpsram = firmware_get(name);
1627 	if (tpsram == NULL){
1628 		device_printf(adap->dev, "could not load TP SRAM\n");
1629 		return (EINVAL);
1630 	} else
1631 		device_printf(adap->dev, "updating TP SRAM\n");
1632 
1633 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1634 	if (ret)
1635 		goto release_tpsram;
1636 
1637 	ret = t3_set_proto_sram(adap, tpsram->data);
1638 	if (ret)
1639 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1640 
1641 release_tpsram:
1642 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1643 
1644 	return ret;
1645 }
1646 
1647 /**
1648  *	cxgb_up - enable the adapter
1649  *	@adap: adapter being enabled
1650  *
1651  *	Called when the first port is enabled, this function performs the
1652  *	actions necessary to make an adapter operational, such as completing
1653  *	the initialization of HW modules, and enabling interrupts.
1654  */
1655 static int
1656 cxgb_up(struct adapter *sc)
1657 {
1658 	int err = 0;
1659 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1660 
1661 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1662 					   __func__, sc->open_device_map));
1663 
1664 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1665 
1666 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1667 
1668 		if ((sc->flags & FW_UPTODATE) == 0)
1669 			if ((err = upgrade_fw(sc)))
1670 				goto out;
1671 
1672 		if ((sc->flags & TPS_UPTODATE) == 0)
1673 			if ((err = update_tpsram(sc)))
1674 				goto out;
1675 
1676 		if (is_offload(sc) && nfilters != 0) {
1677 			sc->params.mc5.nservers = 0;
1678 
1679 			if (nfilters < 0)
1680 				sc->params.mc5.nfilters = mxf;
1681 			else
1682 				sc->params.mc5.nfilters = min(nfilters, mxf);
1683 		}
1684 
1685 		err = t3_init_hw(sc, 0);
1686 		if (err)
1687 			goto out;
1688 
1689 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1690 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1691 
1692 		err = setup_sge_qsets(sc);
1693 		if (err)
1694 			goto out;
1695 
1696 		alloc_filters(sc);
1697 		setup_rss(sc);
1698 
1699 		t3_intr_clear(sc);
1700 		err = cxgb_setup_interrupts(sc);
1701 		if (err)
1702 			goto out;
1703 
1704 		t3_add_configured_sysctls(sc);
1705 		sc->flags |= FULL_INIT_DONE;
1706 	}
1707 
1708 	t3_intr_clear(sc);
1709 	t3_sge_start(sc);
1710 	t3_intr_enable(sc);
1711 
1712 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1713 	    is_offload(sc) && init_tp_parity(sc) == 0)
1714 		sc->flags |= TP_PARITY_INIT;
1715 
1716 	if (sc->flags & TP_PARITY_INIT) {
1717 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1718 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1719 	}
1720 
1721 	if (!(sc->flags & QUEUES_BOUND)) {
1722 		bind_qsets(sc);
1723 		setup_hw_filters(sc);
1724 		sc->flags |= QUEUES_BOUND;
1725 	}
1726 
1727 	t3_sge_reset_adapter(sc);
1728 out:
1729 	return (err);
1730 }
1731 
1732 /*
1733  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1734  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1735  * during controller_detach, not here.
1736  */
1737 static void
1738 cxgb_down(struct adapter *sc)
1739 {
1740 	t3_sge_stop(sc);
1741 	t3_intr_disable(sc);
1742 }
1743 
1744 static int
1745 offload_open(struct port_info *pi)
1746 {
1747 	struct adapter *sc = pi->adapter;
1748 	struct t3cdev *tdev = &sc->tdev;
1749 
1750 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1751 
1752 	t3_tp_set_offload_mode(sc, 1);
1753 	tdev->lldev = pi->ifp;
1754 	init_port_mtus(sc);
1755 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1756 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1757 	init_smt(sc);
1758 	cxgb_add_clients(tdev);
1759 
1760 	return (0);
1761 }
1762 
1763 static int
1764 offload_close(struct t3cdev *tdev)
1765 {
1766 	struct adapter *adapter = tdev2adap(tdev);
1767 
1768 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1769 		return (0);
1770 
1771 	/* Call back all registered clients */
1772 	cxgb_remove_clients(tdev);
1773 
1774 	tdev->lldev = NULL;
1775 	cxgb_set_dummy_ops(tdev);
1776 	t3_tp_set_offload_mode(adapter, 0);
1777 
1778 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1779 
1780 	return (0);
1781 }
1782 
1783 /*
1784  * if_init for cxgb ports.
1785  */
1786 static void
1787 cxgb_init(void *arg)
1788 {
1789 	struct port_info *p = arg;
1790 	struct adapter *sc = p->adapter;
1791 
1792 	ADAPTER_LOCK(sc);
1793 	cxgb_init_locked(p); /* releases adapter lock */
1794 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1795 }
1796 
1797 static int
1798 cxgb_init_locked(struct port_info *p)
1799 {
1800 	struct adapter *sc = p->adapter;
1801 	struct ifnet *ifp = p->ifp;
1802 	struct cmac *mac = &p->mac;
1803 	int i, rc = 0, may_sleep = 0;
1804 
1805 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1806 
1807 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1808 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1809 			rc = EINTR;
1810 			goto done;
1811 		}
1812 	}
1813 	if (IS_DOOMED(p)) {
1814 		rc = ENXIO;
1815 		goto done;
1816 	}
1817 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1818 
1819 	/*
1820 	 * The code that runs during one-time adapter initialization can sleep
1821 	 * so it's important not to hold any locks across it.
1822 	 */
1823 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1824 
1825 	if (may_sleep) {
1826 		SET_BUSY(sc);
1827 		ADAPTER_UNLOCK(sc);
1828 	}
1829 
1830 	if (sc->open_device_map == 0) {
1831 		if ((rc = cxgb_up(sc)) != 0)
1832 			goto done;
1833 
1834 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1835 			log(LOG_WARNING,
1836 			    "Could not initialize offload capabilities\n");
1837 	}
1838 
1839 	PORT_LOCK(p);
1840 	if (isset(&sc->open_device_map, p->port_id) &&
1841 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1842 		PORT_UNLOCK(p);
1843 		goto done;
1844 	}
1845 	t3_port_intr_enable(sc, p->port_id);
1846 	if (!mac->multiport)
1847 		t3_mac_init(mac);
1848 	cxgb_update_mac_settings(p);
1849 	t3_link_start(&p->phy, mac, &p->link_config);
1850 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1851 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1852 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1853 	PORT_UNLOCK(p);
1854 
1855 	t3_link_changed(sc, p->port_id);
1856 
1857 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1858 		struct sge_qset *qs = &sc->sge.qs[i];
1859 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1860 
1861 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1862 				 txq->txq_watchdog.c_cpu);
1863 	}
1864 
1865 	/* all ok */
1866 	setbit(&sc->open_device_map, p->port_id);
1867 
1868 done:
1869 	if (may_sleep) {
1870 		ADAPTER_LOCK(sc);
1871 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1872 		CLR_BUSY(sc);
1873 		wakeup_one(&sc->flags);
1874 	}
1875 	ADAPTER_UNLOCK(sc);
1876 	return (rc);
1877 }
1878 
1879 static int
1880 cxgb_uninit_locked(struct port_info *p)
1881 {
1882 	struct adapter *sc = p->adapter;
1883 	int rc;
1884 
1885 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1886 
1887 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1888 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1889 			rc = EINTR;
1890 			goto done;
1891 		}
1892 	}
1893 	if (IS_DOOMED(p)) {
1894 		rc = ENXIO;
1895 		goto done;
1896 	}
1897 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1898 	SET_BUSY(sc);
1899 	ADAPTER_UNLOCK(sc);
1900 
1901 	rc = cxgb_uninit_synchronized(p);
1902 
1903 	ADAPTER_LOCK(sc);
1904 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1905 	CLR_BUSY(sc);
1906 	wakeup_one(&sc->flags);
1907 done:
1908 	ADAPTER_UNLOCK(sc);
1909 	return (rc);
1910 }
1911 
1912 /*
1913  * Called on "ifconfig down", and from port_detach
1914  */
1915 static int
1916 cxgb_uninit_synchronized(struct port_info *pi)
1917 {
1918 	struct adapter *sc = pi->adapter;
1919 	struct ifnet *ifp = pi->ifp;
1920 
1921 	/*
1922 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1923 	 */
1924 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1925 
1926 	/*
1927 	 * Clear this port's bit from the open device map, and then drain all
1928 	 * the tasks that can access/manipulate this port's port_info or ifp.
1929 	 * We disable this port's interrupts here and so the the slow/ext
1930 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1931 	 * be enqueued every second but the runs after this drain will not see
1932 	 * this port in the open device map.
1933 	 *
1934 	 * A well behaved task must take open_device_map into account and ignore
1935 	 * ports that are not open.
1936 	 */
1937 	clrbit(&sc->open_device_map, pi->port_id);
1938 	t3_port_intr_disable(sc, pi->port_id);
1939 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1940 	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1941 	taskqueue_drain(sc->tq, &sc->tick_task);
1942 
1943 	PORT_LOCK(pi);
1944 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1945 
1946 	/* disable pause frames */
1947 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1948 
1949 	/* Reset RX FIFO HWM */
1950 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1951 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1952 
1953 	DELAY(100 * 1000);
1954 
1955 	/* Wait for TXFIFO empty */
1956 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1957 			F_TXFIFO_EMPTY, 1, 20, 5);
1958 
1959 	DELAY(100 * 1000);
1960 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1961 
1962 
1963 	pi->phy.ops->power_down(&pi->phy, 1);
1964 
1965 	PORT_UNLOCK(pi);
1966 
1967 	pi->link_config.link_ok = 0;
1968 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1969 
1970 	if ((sc->open_device_map & PORT_MASK) == 0)
1971 		offload_close(&sc->tdev);
1972 
1973 	if (sc->open_device_map == 0)
1974 		cxgb_down(pi->adapter);
1975 
1976 	return (0);
1977 }
1978 
1979 /*
1980  * Mark lro enabled or disabled in all qsets for this port
1981  */
1982 static int
1983 cxgb_set_lro(struct port_info *p, int enabled)
1984 {
1985 	int i;
1986 	struct adapter *adp = p->adapter;
1987 	struct sge_qset *q;
1988 
1989 	for (i = 0; i < p->nqsets; i++) {
1990 		q = &adp->sge.qs[p->first_qset + i];
1991 		q->lro.enabled = (enabled != 0);
1992 	}
1993 	return (0);
1994 }
1995 
1996 static int
1997 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1998 {
1999 	struct port_info *p = ifp->if_softc;
2000 	struct adapter *sc = p->adapter;
2001 	struct ifreq *ifr = (struct ifreq *)data;
2002 	int flags, error = 0, mtu;
2003 	uint32_t mask;
2004 
2005 	switch (command) {
2006 	case SIOCSIFMTU:
2007 		ADAPTER_LOCK(sc);
2008 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2009 		if (error) {
2010 fail:
2011 			ADAPTER_UNLOCK(sc);
2012 			return (error);
2013 		}
2014 
2015 		mtu = ifr->ifr_mtu;
2016 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2017 			error = EINVAL;
2018 		} else {
2019 			ifp->if_mtu = mtu;
2020 			PORT_LOCK(p);
2021 			cxgb_update_mac_settings(p);
2022 			PORT_UNLOCK(p);
2023 		}
2024 		ADAPTER_UNLOCK(sc);
2025 		break;
2026 	case SIOCSIFFLAGS:
2027 		ADAPTER_LOCK(sc);
2028 		if (IS_DOOMED(p)) {
2029 			error = ENXIO;
2030 			goto fail;
2031 		}
2032 		if (ifp->if_flags & IFF_UP) {
2033 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034 				flags = p->if_flags;
2035 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2036 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2037 					if (IS_BUSY(sc)) {
2038 						error = EBUSY;
2039 						goto fail;
2040 					}
2041 					PORT_LOCK(p);
2042 					cxgb_update_mac_settings(p);
2043 					PORT_UNLOCK(p);
2044 				}
2045 				ADAPTER_UNLOCK(sc);
2046 			} else
2047 				error = cxgb_init_locked(p);
2048 			p->if_flags = ifp->if_flags;
2049 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2050 			error = cxgb_uninit_locked(p);
2051 		else
2052 			ADAPTER_UNLOCK(sc);
2053 
2054 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2055 		break;
2056 	case SIOCADDMULTI:
2057 	case SIOCDELMULTI:
2058 		ADAPTER_LOCK(sc);
2059 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2060 		if (error)
2061 			goto fail;
2062 
2063 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2064 			PORT_LOCK(p);
2065 			cxgb_update_mac_settings(p);
2066 			PORT_UNLOCK(p);
2067 		}
2068 		ADAPTER_UNLOCK(sc);
2069 
2070 		break;
2071 	case SIOCSIFCAP:
2072 		ADAPTER_LOCK(sc);
2073 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2074 		if (error)
2075 			goto fail;
2076 
2077 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2078 		if (mask & IFCAP_TXCSUM) {
2079 			ifp->if_capenable ^= IFCAP_TXCSUM;
2080 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2081 
2082 			if (IFCAP_TSO & ifp->if_capenable &&
2083 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2084 				ifp->if_capenable &= ~IFCAP_TSO;
2085 				ifp->if_hwassist &= ~CSUM_TSO;
2086 				if_printf(ifp,
2087 				    "tso disabled due to -txcsum.\n");
2088 			}
2089 		}
2090 		if (mask & IFCAP_RXCSUM)
2091 			ifp->if_capenable ^= IFCAP_RXCSUM;
2092 		if (mask & IFCAP_TSO4) {
2093 			ifp->if_capenable ^= IFCAP_TSO4;
2094 
2095 			if (IFCAP_TSO & ifp->if_capenable) {
2096 				if (IFCAP_TXCSUM & ifp->if_capenable)
2097 					ifp->if_hwassist |= CSUM_TSO;
2098 				else {
2099 					ifp->if_capenable &= ~IFCAP_TSO;
2100 					ifp->if_hwassist &= ~CSUM_TSO;
2101 					if_printf(ifp,
2102 					    "enable txcsum first.\n");
2103 					error = EAGAIN;
2104 				}
2105 			} else
2106 				ifp->if_hwassist &= ~CSUM_TSO;
2107 		}
2108 		if (mask & IFCAP_LRO) {
2109 			ifp->if_capenable ^= IFCAP_LRO;
2110 
2111 			/* Safe to do this even if cxgb_up not called yet */
2112 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2113 		}
2114 		if (mask & IFCAP_VLAN_HWTAGGING) {
2115 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2116 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2117 				PORT_LOCK(p);
2118 				cxgb_update_mac_settings(p);
2119 				PORT_UNLOCK(p);
2120 			}
2121 		}
2122 		if (mask & IFCAP_VLAN_MTU) {
2123 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2124 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2125 				PORT_LOCK(p);
2126 				cxgb_update_mac_settings(p);
2127 				PORT_UNLOCK(p);
2128 			}
2129 		}
2130 		if (mask & IFCAP_VLAN_HWTSO)
2131 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2132 		if (mask & IFCAP_VLAN_HWCSUM)
2133 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2134 
2135 #ifdef VLAN_CAPABILITIES
2136 		VLAN_CAPABILITIES(ifp);
2137 #endif
2138 		ADAPTER_UNLOCK(sc);
2139 		break;
2140 	case SIOCSIFMEDIA:
2141 	case SIOCGIFMEDIA:
2142 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2143 		break;
2144 	default:
2145 		error = ether_ioctl(ifp, command, data);
2146 	}
2147 
2148 	return (error);
2149 }
2150 
2151 static int
2152 cxgb_media_change(struct ifnet *ifp)
2153 {
2154 	return (EOPNOTSUPP);
2155 }
2156 
2157 /*
2158  * Translates phy->modtype to the correct Ethernet media subtype.
2159  */
2160 static int
2161 cxgb_ifm_type(int mod)
2162 {
2163 	switch (mod) {
2164 	case phy_modtype_sr:
2165 		return (IFM_10G_SR);
2166 	case phy_modtype_lr:
2167 		return (IFM_10G_LR);
2168 	case phy_modtype_lrm:
2169 		return (IFM_10G_LRM);
2170 	case phy_modtype_twinax:
2171 		return (IFM_10G_TWINAX);
2172 	case phy_modtype_twinax_long:
2173 		return (IFM_10G_TWINAX_LONG);
2174 	case phy_modtype_none:
2175 		return (IFM_NONE);
2176 	case phy_modtype_unknown:
2177 		return (IFM_UNKNOWN);
2178 	}
2179 
2180 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2181 	return (IFM_UNKNOWN);
2182 }
2183 
2184 /*
2185  * Rebuilds the ifmedia list for this port, and sets the current media.
2186  */
2187 static void
2188 cxgb_build_medialist(struct port_info *p)
2189 {
2190 	struct cphy *phy = &p->phy;
2191 	struct ifmedia *media = &p->media;
2192 	int mod = phy->modtype;
2193 	int m = IFM_ETHER | IFM_FDX;
2194 
2195 	PORT_LOCK(p);
2196 
2197 	ifmedia_removeall(media);
2198 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2199 		/* Copper (RJ45) */
2200 
2201 		if (phy->caps & SUPPORTED_10000baseT_Full)
2202 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2203 
2204 		if (phy->caps & SUPPORTED_1000baseT_Full)
2205 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2206 
2207 		if (phy->caps & SUPPORTED_100baseT_Full)
2208 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2209 
2210 		if (phy->caps & SUPPORTED_10baseT_Full)
2211 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2212 
2213 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2214 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2215 
2216 	} else if (phy->caps & SUPPORTED_TP) {
2217 		/* Copper (CX4) */
2218 
2219 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2220 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2221 
2222 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2223 		ifmedia_set(media, m | IFM_10G_CX4);
2224 
2225 	} else if (phy->caps & SUPPORTED_FIBRE &&
2226 		   phy->caps & SUPPORTED_10000baseT_Full) {
2227 		/* 10G optical (but includes SFP+ twinax) */
2228 
2229 		m |= cxgb_ifm_type(mod);
2230 		if (IFM_SUBTYPE(m) == IFM_NONE)
2231 			m &= ~IFM_FDX;
2232 
2233 		ifmedia_add(media, m, mod, NULL);
2234 		ifmedia_set(media, m);
2235 
2236 	} else if (phy->caps & SUPPORTED_FIBRE &&
2237 		   phy->caps & SUPPORTED_1000baseT_Full) {
2238 		/* 1G optical */
2239 
2240 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2241 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2242 		ifmedia_set(media, m | IFM_1000_SX);
2243 
2244 	} else {
2245 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2246 			    phy->caps));
2247 	}
2248 
2249 	PORT_UNLOCK(p);
2250 }
2251 
2252 static void
2253 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2254 {
2255 	struct port_info *p = ifp->if_softc;
2256 	struct ifmedia_entry *cur = p->media.ifm_cur;
2257 	int speed = p->link_config.speed;
2258 
2259 	if (cur->ifm_data != p->phy.modtype) {
2260 		cxgb_build_medialist(p);
2261 		cur = p->media.ifm_cur;
2262 	}
2263 
2264 	ifmr->ifm_status = IFM_AVALID;
2265 	if (!p->link_config.link_ok)
2266 		return;
2267 
2268 	ifmr->ifm_status |= IFM_ACTIVE;
2269 
2270 	/*
2271 	 * active and current will differ iff current media is autoselect.  That
2272 	 * can happen only for copper RJ45.
2273 	 */
2274 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2275 		return;
2276 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2277 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2278 
2279 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2280 	if (speed == SPEED_10000)
2281 		ifmr->ifm_active |= IFM_10G_T;
2282 	else if (speed == SPEED_1000)
2283 		ifmr->ifm_active |= IFM_1000_T;
2284 	else if (speed == SPEED_100)
2285 		ifmr->ifm_active |= IFM_100_TX;
2286 	else if (speed == SPEED_10)
2287 		ifmr->ifm_active |= IFM_10_T;
2288 	else
2289 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2290 			    speed));
2291 }
2292 
2293 static void
2294 cxgb_async_intr(void *data)
2295 {
2296 	adapter_t *sc = data;
2297 
2298 	if (cxgb_debug)
2299 		device_printf(sc->dev, "cxgb_async_intr\n");
2300 	/*
2301 	 * May need to sleep - defer to taskqueue
2302 	 */
2303 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2304 }
2305 
2306 static void
2307 cxgb_ext_intr_handler(void *arg, int count)
2308 {
2309 	adapter_t *sc = (adapter_t *)arg;
2310 
2311 	if (cxgb_debug)
2312 		printf("cxgb_ext_intr_handler\n");
2313 
2314 	t3_phy_intr_handler(sc);
2315 
2316 	/* Now reenable external interrupts */
2317 	ADAPTER_LOCK(sc);
2318 	if (sc->slow_intr_mask) {
2319 		sc->slow_intr_mask |= F_T3DBG;
2320 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2321 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2322 	}
2323 	ADAPTER_UNLOCK(sc);
2324 }
2325 
2326 static inline int
2327 link_poll_needed(struct port_info *p)
2328 {
2329 	struct cphy *phy = &p->phy;
2330 
2331 	if (phy->caps & POLL_LINK_1ST_TIME) {
2332 		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2333 		return (1);
2334 	}
2335 
2336 	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2337 }
2338 
2339 static void
2340 check_link_status(adapter_t *sc)
2341 {
2342 	int i;
2343 
2344 	for (i = 0; i < (sc)->params.nports; ++i) {
2345 		struct port_info *p = &sc->port[i];
2346 
2347 		if (!isset(&sc->open_device_map, p->port_id))
2348 			continue;
2349 
2350 		if (link_poll_needed(p))
2351 			t3_link_changed(sc, i);
2352 	}
2353 }
2354 
2355 static void
2356 check_t3b2_mac(struct adapter *sc)
2357 {
2358 	int i;
2359 
2360 	if (sc->flags & CXGB_SHUTDOWN)
2361 		return;
2362 
2363 	for_each_port(sc, i) {
2364 		struct port_info *p = &sc->port[i];
2365 		int status;
2366 #ifdef INVARIANTS
2367 		struct ifnet *ifp = p->ifp;
2368 #endif
2369 
2370 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2371 		    !p->link_config.link_ok)
2372 			continue;
2373 
2374 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2375 			("%s: state mismatch (drv_flags %x, device_map %x)",
2376 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2377 
2378 		PORT_LOCK(p);
2379 		status = t3b2_mac_watchdog_task(&p->mac);
2380 		if (status == 1)
2381 			p->mac.stats.num_toggled++;
2382 		else if (status == 2) {
2383 			struct cmac *mac = &p->mac;
2384 
2385 			cxgb_update_mac_settings(p);
2386 			t3_link_start(&p->phy, mac, &p->link_config);
2387 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2388 			t3_port_intr_enable(sc, p->port_id);
2389 			p->mac.stats.num_resets++;
2390 		}
2391 		PORT_UNLOCK(p);
2392 	}
2393 }
2394 
2395 static void
2396 cxgb_tick(void *arg)
2397 {
2398 	adapter_t *sc = (adapter_t *)arg;
2399 
2400 	if (sc->flags & CXGB_SHUTDOWN)
2401 		return;
2402 
2403 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2404 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2405 }
2406 
2407 static void
2408 cxgb_tick_handler(void *arg, int count)
2409 {
2410 	adapter_t *sc = (adapter_t *)arg;
2411 	const struct adapter_params *p = &sc->params;
2412 	int i;
2413 	uint32_t cause, reset;
2414 
2415 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2416 		return;
2417 
2418 	check_link_status(sc);
2419 
2420 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2421 		check_t3b2_mac(sc);
2422 
2423 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2424 	if (cause) {
2425 		struct sge_qset *qs = &sc->sge.qs[0];
2426 		uint32_t mask, v;
2427 
2428 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2429 
2430 		mask = 1;
2431 		for (i = 0; i < SGE_QSETS; i++) {
2432 			if (v & mask)
2433 				qs[i].rspq.starved++;
2434 			mask <<= 1;
2435 		}
2436 
2437 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2438 
2439 		for (i = 0; i < SGE_QSETS * 2; i++) {
2440 			if (v & mask) {
2441 				qs[i / 2].fl[i % 2].empty++;
2442 			}
2443 			mask <<= 1;
2444 		}
2445 
2446 		/* clear */
2447 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2448 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2449 	}
2450 
2451 	for (i = 0; i < sc->params.nports; i++) {
2452 		struct port_info *pi = &sc->port[i];
2453 		struct ifnet *ifp = pi->ifp;
2454 		struct cmac *mac = &pi->mac;
2455 		struct mac_stats *mstats = &mac->stats;
2456 		int drops, j;
2457 
2458 		if (!isset(&sc->open_device_map, pi->port_id))
2459 			continue;
2460 
2461 		PORT_LOCK(pi);
2462 		t3_mac_update_stats(mac);
2463 		PORT_UNLOCK(pi);
2464 
2465 		ifp->if_opackets = mstats->tx_frames;
2466 		ifp->if_ipackets = mstats->rx_frames;
2467 		ifp->if_obytes = mstats->tx_octets;
2468 		ifp->if_ibytes = mstats->rx_octets;
2469 		ifp->if_omcasts = mstats->tx_mcast_frames;
2470 		ifp->if_imcasts = mstats->rx_mcast_frames;
2471 		ifp->if_collisions = mstats->tx_total_collisions;
2472 		ifp->if_iqdrops = mstats->rx_cong_drops;
2473 
2474 		drops = 0;
2475 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2476 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2477 		ifp->if_snd.ifq_drops = drops;
2478 
2479 		ifp->if_oerrors =
2480 		    mstats->tx_excess_collisions +
2481 		    mstats->tx_underrun +
2482 		    mstats->tx_len_errs +
2483 		    mstats->tx_mac_internal_errs +
2484 		    mstats->tx_excess_deferral +
2485 		    mstats->tx_fcs_errs;
2486 		ifp->if_ierrors =
2487 		    mstats->rx_jabber +
2488 		    mstats->rx_data_errs +
2489 		    mstats->rx_sequence_errs +
2490 		    mstats->rx_runt +
2491 		    mstats->rx_too_long +
2492 		    mstats->rx_mac_internal_errs +
2493 		    mstats->rx_short +
2494 		    mstats->rx_fcs_errs;
2495 
2496 		if (mac->multiport)
2497 			continue;
2498 
2499 		/* Count rx fifo overflows, once per second */
2500 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2501 		reset = 0;
2502 		if (cause & F_RXFIFO_OVERFLOW) {
2503 			mac->stats.rx_fifo_ovfl++;
2504 			reset |= F_RXFIFO_OVERFLOW;
2505 		}
2506 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2507 	}
2508 }
2509 
2510 static void
2511 touch_bars(device_t dev)
2512 {
2513 	/*
2514 	 * Don't enable yet
2515 	 */
2516 #if !defined(__LP64__) && 0
2517 	u32 v;
2518 
2519 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2520 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2521 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2522 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2523 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2524 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2525 #endif
2526 }
2527 
2528 static int
2529 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2530 {
2531 	uint8_t *buf;
2532 	int err = 0;
2533 	u32 aligned_offset, aligned_len, *p;
2534 	struct adapter *adapter = pi->adapter;
2535 
2536 
2537 	aligned_offset = offset & ~3;
2538 	aligned_len = (len + (offset & 3) + 3) & ~3;
2539 
2540 	if (aligned_offset != offset || aligned_len != len) {
2541 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2542 		if (!buf)
2543 			return (ENOMEM);
2544 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2545 		if (!err && aligned_len > 4)
2546 			err = t3_seeprom_read(adapter,
2547 					      aligned_offset + aligned_len - 4,
2548 					      (u32 *)&buf[aligned_len - 4]);
2549 		if (err)
2550 			goto out;
2551 		memcpy(buf + (offset & 3), data, len);
2552 	} else
2553 		buf = (uint8_t *)(uintptr_t)data;
2554 
2555 	err = t3_seeprom_wp(adapter, 0);
2556 	if (err)
2557 		goto out;
2558 
2559 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2560 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2561 		aligned_offset += 4;
2562 	}
2563 
2564 	if (!err)
2565 		err = t3_seeprom_wp(adapter, 1);
2566 out:
2567 	if (buf != data)
2568 		free(buf, M_DEVBUF);
2569 	return err;
2570 }
2571 
2572 
2573 static int
2574 in_range(int val, int lo, int hi)
2575 {
2576 	return val < 0 || (val <= hi && val >= lo);
2577 }
2578 
2579 static int
2580 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2581 {
2582        return (0);
2583 }
2584 
2585 static int
2586 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2587 {
2588        return (0);
2589 }
2590 
2591 static int
2592 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2593     int fflag, struct thread *td)
2594 {
2595 	int mmd, error = 0;
2596 	struct port_info *pi = dev->si_drv1;
2597 	adapter_t *sc = pi->adapter;
2598 
2599 #ifdef PRIV_SUPPORTED
2600 	if (priv_check(td, PRIV_DRIVER)) {
2601 		if (cxgb_debug)
2602 			printf("user does not have access to privileged ioctls\n");
2603 		return (EPERM);
2604 	}
2605 #else
2606 	if (suser(td)) {
2607 		if (cxgb_debug)
2608 			printf("user does not have access to privileged ioctls\n");
2609 		return (EPERM);
2610 	}
2611 #endif
2612 
2613 	switch (cmd) {
2614 	case CHELSIO_GET_MIIREG: {
2615 		uint32_t val;
2616 		struct cphy *phy = &pi->phy;
2617 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2618 
2619 		if (!phy->mdio_read)
2620 			return (EOPNOTSUPP);
2621 		if (is_10G(sc)) {
2622 			mmd = mid->phy_id >> 8;
2623 			if (!mmd)
2624 				mmd = MDIO_DEV_PCS;
2625 			else if (mmd > MDIO_DEV_VEND2)
2626 				return (EINVAL);
2627 
2628 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2629 					     mid->reg_num, &val);
2630 		} else
2631 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2632 					     mid->reg_num & 0x1f, &val);
2633 		if (error == 0)
2634 			mid->val_out = val;
2635 		break;
2636 	}
2637 	case CHELSIO_SET_MIIREG: {
2638 		struct cphy *phy = &pi->phy;
2639 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2640 
2641 		if (!phy->mdio_write)
2642 			return (EOPNOTSUPP);
2643 		if (is_10G(sc)) {
2644 			mmd = mid->phy_id >> 8;
2645 			if (!mmd)
2646 				mmd = MDIO_DEV_PCS;
2647 			else if (mmd > MDIO_DEV_VEND2)
2648 				return (EINVAL);
2649 
2650 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2651 					      mmd, mid->reg_num, mid->val_in);
2652 		} else
2653 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2654 					      mid->reg_num & 0x1f,
2655 					      mid->val_in);
2656 		break;
2657 	}
2658 	case CHELSIO_SETREG: {
2659 		struct ch_reg *edata = (struct ch_reg *)data;
2660 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2661 			return (EFAULT);
2662 		t3_write_reg(sc, edata->addr, edata->val);
2663 		break;
2664 	}
2665 	case CHELSIO_GETREG: {
2666 		struct ch_reg *edata = (struct ch_reg *)data;
2667 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2668 			return (EFAULT);
2669 		edata->val = t3_read_reg(sc, edata->addr);
2670 		break;
2671 	}
2672 	case CHELSIO_GET_SGE_CONTEXT: {
2673 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2674 		mtx_lock_spin(&sc->sge.reg_lock);
2675 		switch (ecntxt->cntxt_type) {
2676 		case CNTXT_TYPE_EGRESS:
2677 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2678 			    ecntxt->data);
2679 			break;
2680 		case CNTXT_TYPE_FL:
2681 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2682 			    ecntxt->data);
2683 			break;
2684 		case CNTXT_TYPE_RSP:
2685 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2686 			    ecntxt->data);
2687 			break;
2688 		case CNTXT_TYPE_CQ:
2689 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2690 			    ecntxt->data);
2691 			break;
2692 		default:
2693 			error = EINVAL;
2694 			break;
2695 		}
2696 		mtx_unlock_spin(&sc->sge.reg_lock);
2697 		break;
2698 	}
2699 	case CHELSIO_GET_SGE_DESC: {
2700 		struct ch_desc *edesc = (struct ch_desc *)data;
2701 		int ret;
2702 		if (edesc->queue_num >= SGE_QSETS * 6)
2703 			return (EINVAL);
2704 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2705 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2706 		if (ret < 0)
2707 			return (EINVAL);
2708 		edesc->size = ret;
2709 		break;
2710 	}
2711 	case CHELSIO_GET_QSET_PARAMS: {
2712 		struct qset_params *q;
2713 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2714 		int q1 = pi->first_qset;
2715 		int nqsets = pi->nqsets;
2716 		int i;
2717 
2718 		if (t->qset_idx >= nqsets)
2719 			return EINVAL;
2720 
2721 		i = q1 + t->qset_idx;
2722 		q = &sc->params.sge.qset[i];
2723 		t->rspq_size   = q->rspq_size;
2724 		t->txq_size[0] = q->txq_size[0];
2725 		t->txq_size[1] = q->txq_size[1];
2726 		t->txq_size[2] = q->txq_size[2];
2727 		t->fl_size[0]  = q->fl_size;
2728 		t->fl_size[1]  = q->jumbo_size;
2729 		t->polling     = q->polling;
2730 		t->lro         = q->lro;
2731 		t->intr_lat    = q->coalesce_usecs;
2732 		t->cong_thres  = q->cong_thres;
2733 		t->qnum        = i;
2734 
2735 		if ((sc->flags & FULL_INIT_DONE) == 0)
2736 			t->vector = 0;
2737 		else if (sc->flags & USING_MSIX)
2738 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2739 		else
2740 			t->vector = rman_get_start(sc->irq_res);
2741 
2742 		break;
2743 	}
2744 	case CHELSIO_GET_QSET_NUM: {
2745 		struct ch_reg *edata = (struct ch_reg *)data;
2746 		edata->val = pi->nqsets;
2747 		break;
2748 	}
2749 	case CHELSIO_LOAD_FW: {
2750 		uint8_t *fw_data;
2751 		uint32_t vers;
2752 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2753 
2754 		/*
2755 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2756 		 *
2757 		 * FW_UPTODATE is also set so the rest of the initialization
2758 		 * will not overwrite what was loaded here.  This gives you the
2759 		 * flexibility to load any firmware (and maybe shoot yourself in
2760 		 * the foot).
2761 		 */
2762 
2763 		ADAPTER_LOCK(sc);
2764 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2765 			ADAPTER_UNLOCK(sc);
2766 			return (EBUSY);
2767 		}
2768 
2769 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2770 		if (!fw_data)
2771 			error = ENOMEM;
2772 		else
2773 			error = copyin(t->buf, fw_data, t->len);
2774 
2775 		if (!error)
2776 			error = -t3_load_fw(sc, fw_data, t->len);
2777 
2778 		if (t3_get_fw_version(sc, &vers) == 0) {
2779 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2780 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2781 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2782 		}
2783 
2784 		if (!error)
2785 			sc->flags |= FW_UPTODATE;
2786 
2787 		free(fw_data, M_DEVBUF);
2788 		ADAPTER_UNLOCK(sc);
2789 		break;
2790 	}
2791 	case CHELSIO_LOAD_BOOT: {
2792 		uint8_t *boot_data;
2793 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2794 
2795 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2796 		if (!boot_data)
2797 			return ENOMEM;
2798 
2799 		error = copyin(t->buf, boot_data, t->len);
2800 		if (!error)
2801 			error = -t3_load_boot(sc, boot_data, t->len);
2802 
2803 		free(boot_data, M_DEVBUF);
2804 		break;
2805 	}
2806 	case CHELSIO_GET_PM: {
2807 		struct ch_pm *m = (struct ch_pm *)data;
2808 		struct tp_params *p = &sc->params.tp;
2809 
2810 		if (!is_offload(sc))
2811 			return (EOPNOTSUPP);
2812 
2813 		m->tx_pg_sz = p->tx_pg_size;
2814 		m->tx_num_pg = p->tx_num_pgs;
2815 		m->rx_pg_sz  = p->rx_pg_size;
2816 		m->rx_num_pg = p->rx_num_pgs;
2817 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2818 
2819 		break;
2820 	}
2821 	case CHELSIO_SET_PM: {
2822 		struct ch_pm *m = (struct ch_pm *)data;
2823 		struct tp_params *p = &sc->params.tp;
2824 
2825 		if (!is_offload(sc))
2826 			return (EOPNOTSUPP);
2827 		if (sc->flags & FULL_INIT_DONE)
2828 			return (EBUSY);
2829 
2830 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2831 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2832 			return (EINVAL);	/* not power of 2 */
2833 		if (!(m->rx_pg_sz & 0x14000))
2834 			return (EINVAL);	/* not 16KB or 64KB */
2835 		if (!(m->tx_pg_sz & 0x1554000))
2836 			return (EINVAL);
2837 		if (m->tx_num_pg == -1)
2838 			m->tx_num_pg = p->tx_num_pgs;
2839 		if (m->rx_num_pg == -1)
2840 			m->rx_num_pg = p->rx_num_pgs;
2841 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2842 			return (EINVAL);
2843 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2844 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2845 			return (EINVAL);
2846 
2847 		p->rx_pg_size = m->rx_pg_sz;
2848 		p->tx_pg_size = m->tx_pg_sz;
2849 		p->rx_num_pgs = m->rx_num_pg;
2850 		p->tx_num_pgs = m->tx_num_pg;
2851 		break;
2852 	}
2853 	case CHELSIO_SETMTUTAB: {
2854 		struct ch_mtus *m = (struct ch_mtus *)data;
2855 		int i;
2856 
2857 		if (!is_offload(sc))
2858 			return (EOPNOTSUPP);
2859 		if (offload_running(sc))
2860 			return (EBUSY);
2861 		if (m->nmtus != NMTUS)
2862 			return (EINVAL);
2863 		if (m->mtus[0] < 81)         /* accommodate SACK */
2864 			return (EINVAL);
2865 
2866 		/*
2867 		 * MTUs must be in ascending order
2868 		 */
2869 		for (i = 1; i < NMTUS; ++i)
2870 			if (m->mtus[i] < m->mtus[i - 1])
2871 				return (EINVAL);
2872 
2873 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2874 		break;
2875 	}
2876 	case CHELSIO_GETMTUTAB: {
2877 		struct ch_mtus *m = (struct ch_mtus *)data;
2878 
2879 		if (!is_offload(sc))
2880 			return (EOPNOTSUPP);
2881 
2882 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2883 		m->nmtus = NMTUS;
2884 		break;
2885 	}
2886 	case CHELSIO_GET_MEM: {
2887 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2888 		struct mc7 *mem;
2889 		uint8_t *useraddr;
2890 		u64 buf[32];
2891 
2892 		/*
2893 		 * Use these to avoid modifying len/addr in the the return
2894 		 * struct
2895 		 */
2896 		uint32_t len = t->len, addr = t->addr;
2897 
2898 		if (!is_offload(sc))
2899 			return (EOPNOTSUPP);
2900 		if (!(sc->flags & FULL_INIT_DONE))
2901 			return (EIO);         /* need the memory controllers */
2902 		if ((addr & 0x7) || (len & 0x7))
2903 			return (EINVAL);
2904 		if (t->mem_id == MEM_CM)
2905 			mem = &sc->cm;
2906 		else if (t->mem_id == MEM_PMRX)
2907 			mem = &sc->pmrx;
2908 		else if (t->mem_id == MEM_PMTX)
2909 			mem = &sc->pmtx;
2910 		else
2911 			return (EINVAL);
2912 
2913 		/*
2914 		 * Version scheme:
2915 		 * bits 0..9: chip version
2916 		 * bits 10..15: chip revision
2917 		 */
2918 		t->version = 3 | (sc->params.rev << 10);
2919 
2920 		/*
2921 		 * Read 256 bytes at a time as len can be large and we don't
2922 		 * want to use huge intermediate buffers.
2923 		 */
2924 		useraddr = (uint8_t *)t->buf;
2925 		while (len) {
2926 			unsigned int chunk = min(len, sizeof(buf));
2927 
2928 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2929 			if (error)
2930 				return (-error);
2931 			if (copyout(buf, useraddr, chunk))
2932 				return (EFAULT);
2933 			useraddr += chunk;
2934 			addr += chunk;
2935 			len -= chunk;
2936 		}
2937 		break;
2938 	}
2939 	case CHELSIO_READ_TCAM_WORD: {
2940 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2941 
2942 		if (!is_offload(sc))
2943 			return (EOPNOTSUPP);
2944 		if (!(sc->flags & FULL_INIT_DONE))
2945 			return (EIO);         /* need MC5 */
2946 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2947 		break;
2948 	}
2949 	case CHELSIO_SET_TRACE_FILTER: {
2950 		struct ch_trace *t = (struct ch_trace *)data;
2951 		const struct trace_params *tp;
2952 
2953 		tp = (const struct trace_params *)&t->sip;
2954 		if (t->config_tx)
2955 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2956 					       t->trace_tx);
2957 		if (t->config_rx)
2958 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2959 					       t->trace_rx);
2960 		break;
2961 	}
2962 	case CHELSIO_SET_PKTSCHED: {
2963 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2964 		if (sc->open_device_map == 0)
2965 			return (EAGAIN);
2966 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2967 		    p->binding);
2968 		break;
2969 	}
2970 	case CHELSIO_IFCONF_GETREGS: {
2971 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2972 		int reglen = cxgb_get_regs_len();
2973 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2974 		if (buf == NULL) {
2975 			return (ENOMEM);
2976 		}
2977 		if (regs->len > reglen)
2978 			regs->len = reglen;
2979 		else if (regs->len < reglen)
2980 			error = ENOBUFS;
2981 
2982 		if (!error) {
2983 			cxgb_get_regs(sc, regs, buf);
2984 			error = copyout(buf, regs->data, reglen);
2985 		}
2986 		free(buf, M_DEVBUF);
2987 
2988 		break;
2989 	}
2990 	case CHELSIO_SET_HW_SCHED: {
2991 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2992 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2993 
2994 		if ((sc->flags & FULL_INIT_DONE) == 0)
2995 			return (EAGAIN);       /* need TP to be initialized */
2996 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2997 		    !in_range(t->channel, 0, 1) ||
2998 		    !in_range(t->kbps, 0, 10000000) ||
2999 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
3000 		    !in_range(t->flow_ipg, 0,
3001 			      dack_ticks_to_usec(sc, 0x7ff)))
3002 			return (EINVAL);
3003 
3004 		if (t->kbps >= 0) {
3005 			error = t3_config_sched(sc, t->kbps, t->sched);
3006 			if (error < 0)
3007 				return (-error);
3008 		}
3009 		if (t->class_ipg >= 0)
3010 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3011 		if (t->flow_ipg >= 0) {
3012 			t->flow_ipg *= 1000;     /* us -> ns */
3013 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3014 		}
3015 		if (t->mode >= 0) {
3016 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3017 
3018 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3019 					 bit, t->mode ? bit : 0);
3020 		}
3021 		if (t->channel >= 0)
3022 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3023 					 1 << t->sched, t->channel << t->sched);
3024 		break;
3025 	}
3026 	case CHELSIO_GET_EEPROM: {
3027 		int i;
3028 		struct ch_eeprom *e = (struct ch_eeprom *)data;
3029 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3030 
3031 		if (buf == NULL) {
3032 			return (ENOMEM);
3033 		}
3034 		e->magic = EEPROM_MAGIC;
3035 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3036 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3037 
3038 		if (!error)
3039 			error = copyout(buf + e->offset, e->data, e->len);
3040 
3041 		free(buf, M_DEVBUF);
3042 		break;
3043 	}
3044 	case CHELSIO_CLEAR_STATS: {
3045 		if (!(sc->flags & FULL_INIT_DONE))
3046 			return EAGAIN;
3047 
3048 		PORT_LOCK(pi);
3049 		t3_mac_update_stats(&pi->mac);
3050 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3051 		PORT_UNLOCK(pi);
3052 		break;
3053 	}
3054 	case CHELSIO_GET_UP_LA: {
3055 		struct ch_up_la *la = (struct ch_up_la *)data;
3056 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3057 		if (buf == NULL) {
3058 			return (ENOMEM);
3059 		}
3060 		if (la->bufsize < LA_BUFSIZE)
3061 			error = ENOBUFS;
3062 
3063 		if (!error)
3064 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3065 					      &la->bufsize, buf);
3066 		if (!error)
3067 			error = copyout(buf, la->data, la->bufsize);
3068 
3069 		free(buf, M_DEVBUF);
3070 		break;
3071 	}
3072 	case CHELSIO_GET_UP_IOQS: {
3073 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3074 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3075 		uint32_t *v;
3076 
3077 		if (buf == NULL) {
3078 			return (ENOMEM);
3079 		}
3080 		if (ioqs->bufsize < IOQS_BUFSIZE)
3081 			error = ENOBUFS;
3082 
3083 		if (!error)
3084 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3085 
3086 		if (!error) {
3087 			v = (uint32_t *)buf;
3088 
3089 			ioqs->bufsize -= 4 * sizeof(uint32_t);
3090 			ioqs->ioq_rx_enable = *v++;
3091 			ioqs->ioq_tx_enable = *v++;
3092 			ioqs->ioq_rx_status = *v++;
3093 			ioqs->ioq_tx_status = *v++;
3094 
3095 			error = copyout(v, ioqs->data, ioqs->bufsize);
3096 		}
3097 
3098 		free(buf, M_DEVBUF);
3099 		break;
3100 	}
3101 	case CHELSIO_SET_FILTER: {
3102 		struct ch_filter *f = (struct ch_filter *)data;;
3103 		struct filter_info *p;
3104 		unsigned int nfilters = sc->params.mc5.nfilters;
3105 
3106 		if (!is_offload(sc))
3107 			return (EOPNOTSUPP);	/* No TCAM */
3108 		if (!(sc->flags & FULL_INIT_DONE))
3109 			return (EAGAIN);	/* mc5 not setup yet */
3110 		if (nfilters == 0)
3111 			return (EBUSY);		/* TOE will use TCAM */
3112 
3113 		/* sanity checks */
3114 		if (f->filter_id >= nfilters ||
3115 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3116 		    (f->val.sport && f->mask.sport != 0xffff) ||
3117 		    (f->val.dport && f->mask.dport != 0xffff) ||
3118 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3119 		    (f->val.vlan_prio &&
3120 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3121 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3122 		    f->qset >= SGE_QSETS ||
3123 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3124 			return (EINVAL);
3125 
3126 		/* Was allocated with M_WAITOK */
3127 		KASSERT(sc->filters, ("filter table NULL\n"));
3128 
3129 		p = &sc->filters[f->filter_id];
3130 		if (p->locked)
3131 			return (EPERM);
3132 
3133 		bzero(p, sizeof(*p));
3134 		p->sip = f->val.sip;
3135 		p->sip_mask = f->mask.sip;
3136 		p->dip = f->val.dip;
3137 		p->sport = f->val.sport;
3138 		p->dport = f->val.dport;
3139 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3140 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3141 		    FILTER_NO_VLAN_PRI;
3142 		p->mac_hit = f->mac_hit;
3143 		p->mac_vld = f->mac_addr_idx != 0xffff;
3144 		p->mac_idx = f->mac_addr_idx;
3145 		p->pkt_type = f->proto;
3146 		p->report_filter_id = f->want_filter_id;
3147 		p->pass = f->pass;
3148 		p->rss = f->rss;
3149 		p->qset = f->qset;
3150 
3151 		error = set_filter(sc, f->filter_id, p);
3152 		if (error == 0)
3153 			p->valid = 1;
3154 		break;
3155 	}
3156 	case CHELSIO_DEL_FILTER: {
3157 		struct ch_filter *f = (struct ch_filter *)data;
3158 		struct filter_info *p;
3159 		unsigned int nfilters = sc->params.mc5.nfilters;
3160 
3161 		if (!is_offload(sc))
3162 			return (EOPNOTSUPP);
3163 		if (!(sc->flags & FULL_INIT_DONE))
3164 			return (EAGAIN);
3165 		if (nfilters == 0 || sc->filters == NULL)
3166 			return (EINVAL);
3167 		if (f->filter_id >= nfilters)
3168 		       return (EINVAL);
3169 
3170 		p = &sc->filters[f->filter_id];
3171 		if (p->locked)
3172 			return (EPERM);
3173 		if (!p->valid)
3174 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3175 
3176 		bzero(p, sizeof(*p));
3177 		p->sip = p->sip_mask = 0xffffffff;
3178 		p->vlan = 0xfff;
3179 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3180 		p->pkt_type = 1;
3181 		error = set_filter(sc, f->filter_id, p);
3182 		break;
3183 	}
3184 	case CHELSIO_GET_FILTER: {
3185 		struct ch_filter *f = (struct ch_filter *)data;
3186 		struct filter_info *p;
3187 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3188 
3189 		if (!is_offload(sc))
3190 			return (EOPNOTSUPP);
3191 		if (!(sc->flags & FULL_INIT_DONE))
3192 			return (EAGAIN);
3193 		if (nfilters == 0 || sc->filters == NULL)
3194 			return (EINVAL);
3195 
3196 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3197 		for (; i < nfilters; i++) {
3198 			p = &sc->filters[i];
3199 			if (!p->valid)
3200 				continue;
3201 
3202 			bzero(f, sizeof(*f));
3203 
3204 			f->filter_id = i;
3205 			f->val.sip = p->sip;
3206 			f->mask.sip = p->sip_mask;
3207 			f->val.dip = p->dip;
3208 			f->mask.dip = p->dip ? 0xffffffff : 0;
3209 			f->val.sport = p->sport;
3210 			f->mask.sport = p->sport ? 0xffff : 0;
3211 			f->val.dport = p->dport;
3212 			f->mask.dport = p->dport ? 0xffff : 0;
3213 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3214 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3215 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3216 			    0 : p->vlan_prio;
3217 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3218 			    0 : FILTER_NO_VLAN_PRI;
3219 			f->mac_hit = p->mac_hit;
3220 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3221 			f->proto = p->pkt_type;
3222 			f->want_filter_id = p->report_filter_id;
3223 			f->pass = p->pass;
3224 			f->rss = p->rss;
3225 			f->qset = p->qset;
3226 
3227 			break;
3228 		}
3229 
3230 		if (i == nfilters)
3231 			f->filter_id = 0xffffffff;
3232 		break;
3233 	}
3234 	default:
3235 		return (EOPNOTSUPP);
3236 		break;
3237 	}
3238 
3239 	return (error);
3240 }
3241 
3242 static __inline void
3243 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3244     unsigned int end)
3245 {
3246 	uint32_t *p = (uint32_t *)(buf + start);
3247 
3248 	for ( ; start <= end; start += sizeof(uint32_t))
3249 		*p++ = t3_read_reg(ap, start);
3250 }
3251 
3252 #define T3_REGMAP_SIZE (3 * 1024)
3253 static int
3254 cxgb_get_regs_len(void)
3255 {
3256 	return T3_REGMAP_SIZE;
3257 }
3258 
3259 static void
3260 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3261 {
3262 
3263 	/*
3264 	 * Version scheme:
3265 	 * bits 0..9: chip version
3266 	 * bits 10..15: chip revision
3267 	 * bit 31: set for PCIe cards
3268 	 */
3269 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3270 
3271 	/*
3272 	 * We skip the MAC statistics registers because they are clear-on-read.
3273 	 * Also reading multi-register stats would need to synchronize with the
3274 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3275 	 */
3276 	memset(buf, 0, cxgb_get_regs_len());
3277 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3278 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3279 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3280 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3281 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3282 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3283 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3284 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3285 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3286 }
3287 
3288 static int
3289 alloc_filters(struct adapter *sc)
3290 {
3291 	struct filter_info *p;
3292 	unsigned int nfilters = sc->params.mc5.nfilters;
3293 
3294 	if (nfilters == 0)
3295 		return (0);
3296 
3297 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3298 	sc->filters = p;
3299 
3300 	p = &sc->filters[nfilters - 1];
3301 	p->vlan = 0xfff;
3302 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3303 	p->pass = p->rss = p->valid = p->locked = 1;
3304 
3305 	return (0);
3306 }
3307 
3308 static int
3309 setup_hw_filters(struct adapter *sc)
3310 {
3311 	int i, rc;
3312 	unsigned int nfilters = sc->params.mc5.nfilters;
3313 
3314 	if (!sc->filters)
3315 		return (0);
3316 
3317 	t3_enable_filters(sc);
3318 
3319 	for (i = rc = 0; i < nfilters && !rc; i++) {
3320 		if (sc->filters[i].locked)
3321 			rc = set_filter(sc, i, &sc->filters[i]);
3322 	}
3323 
3324 	return (rc);
3325 }
3326 
3327 static int
3328 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3329 {
3330 	int len;
3331 	struct mbuf *m;
3332 	struct ulp_txpkt *txpkt;
3333 	struct work_request_hdr *wr;
3334 	struct cpl_pass_open_req *oreq;
3335 	struct cpl_set_tcb_field *sreq;
3336 
3337 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3338 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3339 
3340 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3341 	      sc->params.mc5.nfilters;
3342 
3343 	m = m_gethdr(M_WAITOK, MT_DATA);
3344 	m->m_len = m->m_pkthdr.len = len;
3345 	bzero(mtod(m, char *), len);
3346 
3347 	wr = mtod(m, struct work_request_hdr *);
3348 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3349 
3350 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3351 	txpkt = (struct ulp_txpkt *)oreq;
3352 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3353 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3354 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3355 	oreq->local_port = htons(f->dport);
3356 	oreq->peer_port = htons(f->sport);
3357 	oreq->local_ip = htonl(f->dip);
3358 	oreq->peer_ip = htonl(f->sip);
3359 	oreq->peer_netmask = htonl(f->sip_mask);
3360 	oreq->opt0h = 0;
3361 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3362 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3363 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3364 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3365 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3366 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3367 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3368 
3369 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3370 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3371 			  (f->report_filter_id << 15) | (1 << 23) |
3372 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3373 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3374 	t3_mgmt_tx(sc, m);
3375 
3376 	if (f->pass && !f->rss) {
3377 		len = sizeof(*sreq);
3378 		m = m_gethdr(M_WAITOK, MT_DATA);
3379 		m->m_len = m->m_pkthdr.len = len;
3380 		bzero(mtod(m, char *), len);
3381 		sreq = mtod(m, struct cpl_set_tcb_field *);
3382 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3383 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3384 				 (u64)sc->rrss_map[f->qset] << 19);
3385 		t3_mgmt_tx(sc, m);
3386 	}
3387 	return 0;
3388 }
3389 
3390 static inline void
3391 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3392     unsigned int word, u64 mask, u64 val)
3393 {
3394 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3395 	req->reply = V_NO_REPLY(1);
3396 	req->cpu_idx = 0;
3397 	req->word = htons(word);
3398 	req->mask = htobe64(mask);
3399 	req->val = htobe64(val);
3400 }
3401 
3402 static inline void
3403 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3404     unsigned int word, u64 mask, u64 val)
3405 {
3406 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3407 
3408 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3409 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3410 	mk_set_tcb_field(req, tid, word, mask, val);
3411 }
3412