1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ethernet.h> 52 #include <net/if_dl.h> 53 #include <net/if_media.h> 54 55 #include <net/bpf.h> 56 57 #include <net/if_types.h> 58 #include <net/if_vlan_var.h> 59 #include <net/zlib.h> 60 61 #include <netinet/in_systm.h> 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/tcp.h> 65 66 #include <machine/bus.h> 67 #include <machine/in_cksum.h> 68 #include <machine/resource.h> 69 #include <sys/bus.h> 70 #include <sys/rman.h> 71 #include <sys/smp.h> 72 73 #include <dev/pci/pcireg.h> 74 #include <dev/pci/pcivar.h> 75 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386) || defined(__amd64) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/mxge/mxge_mcp.h> 85 #include <dev/mxge/mcp_gen_header.h> 86 /*#define MXGE_FAKE_IFP*/ 87 #include <dev/mxge/if_mxge_var.h> 88 89 /* tunable params */ 90 static int mxge_nvidia_ecrc_enable = 1; 91 static int mxge_force_firmware = 0; 92 static int mxge_intr_coal_delay = 30; 93 static int mxge_deassert_wait = 1; 94 static int mxge_flow_control = 1; 95 static int mxge_verbose = 0; 96 static int mxge_lro_cnt = 8; 97 static int mxge_ticks; 98 static int mxge_max_slices = 1; 99 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 100 static int mxge_always_promisc = 0; 101 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 102 static char *mxge_fw_aligned = "mxge_eth_z8e"; 103 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 104 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 105 106 static int mxge_probe(device_t dev); 107 static int mxge_attach(device_t dev); 108 static int mxge_detach(device_t dev); 109 static int mxge_shutdown(device_t dev); 110 static void mxge_intr(void *arg); 111 112 static device_method_t mxge_methods[] = 113 { 114 /* Device interface */ 115 DEVMETHOD(device_probe, mxge_probe), 116 DEVMETHOD(device_attach, mxge_attach), 117 DEVMETHOD(device_detach, mxge_detach), 118 DEVMETHOD(device_shutdown, mxge_shutdown), 119 {0, 0} 120 }; 121 122 static driver_t mxge_driver = 123 { 124 "mxge", 125 mxge_methods, 126 sizeof(mxge_softc_t), 127 }; 128 129 static devclass_t mxge_devclass; 130 131 /* Declare ourselves to be a child of the PCI bus.*/ 132 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 133 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 134 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 135 136 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 137 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 138 static int mxge_close(mxge_softc_t *sc); 139 static int mxge_open(mxge_softc_t *sc); 140 static void mxge_tick(void *arg); 141 142 static int 143 mxge_probe(device_t dev) 144 { 145 int rev; 146 147 148 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 149 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 150 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 151 rev = pci_get_revid(dev); 152 switch (rev) { 153 case MXGE_PCI_REV_Z8E: 154 device_set_desc(dev, "Myri10G-PCIE-8A"); 155 break; 156 case MXGE_PCI_REV_Z8ES: 157 device_set_desc(dev, "Myri10G-PCIE-8B"); 158 break; 159 default: 160 device_set_desc(dev, "Myri10G-PCIE-8??"); 161 device_printf(dev, "Unrecognized rev %d NIC\n", 162 rev); 163 break; 164 } 165 return 0; 166 } 167 return ENXIO; 168 } 169 170 static void 171 mxge_enable_wc(mxge_softc_t *sc) 172 { 173 #if defined(__i386) || defined(__amd64) 174 vm_offset_t len; 175 int err; 176 177 sc->wc = 1; 178 len = rman_get_size(sc->mem_res); 179 err = pmap_change_attr((vm_offset_t) sc->sram, 180 len, PAT_WRITE_COMBINING); 181 if (err != 0) { 182 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 183 err); 184 sc->wc = 0; 185 } 186 #endif 187 } 188 189 190 /* callback to get our DMA address */ 191 static void 192 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 193 int error) 194 { 195 if (error == 0) { 196 *(bus_addr_t *) arg = segs->ds_addr; 197 } 198 } 199 200 static int 201 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 202 bus_size_t alignment) 203 { 204 int err; 205 device_t dev = sc->dev; 206 bus_size_t boundary, maxsegsize; 207 208 if (bytes > 4096 && alignment == 4096) { 209 boundary = 0; 210 maxsegsize = bytes; 211 } else { 212 boundary = 4096; 213 maxsegsize = 4096; 214 } 215 216 /* allocate DMAable memory tags */ 217 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 218 alignment, /* alignment */ 219 boundary, /* boundary */ 220 BUS_SPACE_MAXADDR, /* low */ 221 BUS_SPACE_MAXADDR, /* high */ 222 NULL, NULL, /* filter */ 223 bytes, /* maxsize */ 224 1, /* num segs */ 225 maxsegsize, /* maxsegsize */ 226 BUS_DMA_COHERENT, /* flags */ 227 NULL, NULL, /* lock */ 228 &dma->dmat); /* tag */ 229 if (err != 0) { 230 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 231 return err; 232 } 233 234 /* allocate DMAable memory & map */ 235 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 236 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 237 | BUS_DMA_ZERO), &dma->map); 238 if (err != 0) { 239 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 240 goto abort_with_dmat; 241 } 242 243 /* load the memory */ 244 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 245 mxge_dmamap_callback, 246 (void *)&dma->bus_addr, 0); 247 if (err != 0) { 248 device_printf(dev, "couldn't load map (err = %d)\n", err); 249 goto abort_with_mem; 250 } 251 return 0; 252 253 abort_with_mem: 254 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 255 abort_with_dmat: 256 (void)bus_dma_tag_destroy(dma->dmat); 257 return err; 258 } 259 260 261 static void 262 mxge_dma_free(mxge_dma_t *dma) 263 { 264 bus_dmamap_unload(dma->dmat, dma->map); 265 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 266 (void)bus_dma_tag_destroy(dma->dmat); 267 } 268 269 /* 270 * The eeprom strings on the lanaiX have the format 271 * SN=x\0 272 * MAC=x:x:x:x:x:x\0 273 * PC=text\0 274 */ 275 276 static int 277 mxge_parse_strings(mxge_softc_t *sc) 278 { 279 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 280 281 char *ptr, *limit; 282 int i, found_mac; 283 284 ptr = sc->eeprom_strings; 285 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 286 found_mac = 0; 287 while (ptr < limit && *ptr != '\0') { 288 if (memcmp(ptr, "MAC=", 4) == 0) { 289 ptr += 1; 290 sc->mac_addr_string = ptr; 291 for (i = 0; i < 6; i++) { 292 ptr += 3; 293 if ((ptr + 2) > limit) 294 goto abort; 295 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 296 found_mac = 1; 297 } 298 } else if (memcmp(ptr, "PC=", 3) == 0) { 299 ptr += 3; 300 strncpy(sc->product_code_string, ptr, 301 sizeof (sc->product_code_string) - 1); 302 } else if (memcmp(ptr, "SN=", 3) == 0) { 303 ptr += 3; 304 strncpy(sc->serial_number_string, ptr, 305 sizeof (sc->serial_number_string) - 1); 306 } 307 MXGE_NEXT_STRING(ptr); 308 } 309 310 if (found_mac) 311 return 0; 312 313 abort: 314 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 315 316 return ENXIO; 317 } 318 319 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 320 static void 321 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 322 { 323 uint32_t val; 324 unsigned long base, off; 325 char *va, *cfgptr; 326 device_t pdev, mcp55; 327 uint16_t vendor_id, device_id, word; 328 uintptr_t bus, slot, func, ivend, idev; 329 uint32_t *ptr32; 330 331 332 if (!mxge_nvidia_ecrc_enable) 333 return; 334 335 pdev = device_get_parent(device_get_parent(sc->dev)); 336 if (pdev == NULL) { 337 device_printf(sc->dev, "could not find parent?\n"); 338 return; 339 } 340 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 341 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 342 343 if (vendor_id != 0x10de) 344 return; 345 346 base = 0; 347 348 if (device_id == 0x005d) { 349 /* ck804, base address is magic */ 350 base = 0xe0000000UL; 351 } else if (device_id >= 0x0374 && device_id <= 0x378) { 352 /* mcp55, base address stored in chipset */ 353 mcp55 = pci_find_bsf(0, 0, 0); 354 if (mcp55 && 355 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 356 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 357 word = pci_read_config(mcp55, 0x90, 2); 358 base = ((unsigned long)word & 0x7ffeU) << 25; 359 } 360 } 361 if (!base) 362 return; 363 364 /* XXXX 365 Test below is commented because it is believed that doing 366 config read/write beyond 0xff will access the config space 367 for the next larger function. Uncomment this and remove 368 the hacky pmap_mapdev() way of accessing config space when 369 FreeBSD grows support for extended pcie config space access 370 */ 371 #if 0 372 /* See if we can, by some miracle, access the extended 373 config space */ 374 val = pci_read_config(pdev, 0x178, 4); 375 if (val != 0xffffffff) { 376 val |= 0x40; 377 pci_write_config(pdev, 0x178, val, 4); 378 return; 379 } 380 #endif 381 /* Rather than using normal pci config space writes, we must 382 * map the Nvidia config space ourselves. This is because on 383 * opteron/nvidia class machine the 0xe000000 mapping is 384 * handled by the nvidia chipset, that means the internal PCI 385 * device (the on-chip northbridge), or the amd-8131 bridge 386 * and things behind them are not visible by this method. 387 */ 388 389 BUS_READ_IVAR(device_get_parent(pdev), pdev, 390 PCI_IVAR_BUS, &bus); 391 BUS_READ_IVAR(device_get_parent(pdev), pdev, 392 PCI_IVAR_SLOT, &slot); 393 BUS_READ_IVAR(device_get_parent(pdev), pdev, 394 PCI_IVAR_FUNCTION, &func); 395 BUS_READ_IVAR(device_get_parent(pdev), pdev, 396 PCI_IVAR_VENDOR, &ivend); 397 BUS_READ_IVAR(device_get_parent(pdev), pdev, 398 PCI_IVAR_DEVICE, &idev); 399 400 off = base 401 + 0x00100000UL * (unsigned long)bus 402 + 0x00001000UL * (unsigned long)(func 403 + 8 * slot); 404 405 /* map it into the kernel */ 406 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 407 408 409 if (va == NULL) { 410 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 411 return; 412 } 413 /* get a pointer to the config space mapped into the kernel */ 414 cfgptr = va + (off & PAGE_MASK); 415 416 /* make sure that we can really access it */ 417 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 418 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 419 if (! (vendor_id == ivend && device_id == idev)) { 420 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 421 vendor_id, device_id); 422 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 423 return; 424 } 425 426 ptr32 = (uint32_t*)(cfgptr + 0x178); 427 val = *ptr32; 428 429 if (val == 0xffffffff) { 430 device_printf(sc->dev, "extended mapping failed\n"); 431 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 432 return; 433 } 434 *ptr32 = val | 0x40; 435 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 436 if (mxge_verbose) 437 device_printf(sc->dev, 438 "Enabled ECRC on upstream Nvidia bridge " 439 "at %d:%d:%d\n", 440 (int)bus, (int)slot, (int)func); 441 return; 442 } 443 #else 444 static void 445 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 446 { 447 device_printf(sc->dev, 448 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 449 return; 450 } 451 #endif 452 453 454 static int 455 mxge_dma_test(mxge_softc_t *sc, int test_type) 456 { 457 mxge_cmd_t cmd; 458 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 459 int status; 460 uint32_t len; 461 char *test = " "; 462 463 464 /* Run a small DMA test. 465 * The magic multipliers to the length tell the firmware 466 * to do DMA read, write, or read+write tests. The 467 * results are returned in cmd.data0. The upper 16 468 * bits of the return is the number of transfers completed. 469 * The lower 16 bits is the time in 0.5us ticks that the 470 * transfers took to complete. 471 */ 472 473 len = sc->tx_boundary; 474 475 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 476 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 477 cmd.data2 = len * 0x10000; 478 status = mxge_send_cmd(sc, test_type, &cmd); 479 if (status != 0) { 480 test = "read"; 481 goto abort; 482 } 483 sc->read_dma = ((cmd.data0>>16) * len * 2) / 484 (cmd.data0 & 0xffff); 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x1; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "write"; 491 goto abort; 492 } 493 sc->write_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 498 cmd.data2 = len * 0x10001; 499 status = mxge_send_cmd(sc, test_type, &cmd); 500 if (status != 0) { 501 test = "read/write"; 502 goto abort; 503 } 504 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 505 (cmd.data0 & 0xffff); 506 507 abort: 508 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 509 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 510 test, status); 511 512 return status; 513 } 514 515 /* 516 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 517 * when the PCI-E Completion packets are aligned on an 8-byte 518 * boundary. Some PCI-E chip sets always align Completion packets; on 519 * the ones that do not, the alignment can be enforced by enabling 520 * ECRC generation (if supported). 521 * 522 * When PCI-E Completion packets are not aligned, it is actually more 523 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 524 * 525 * If the driver can neither enable ECRC nor verify that it has 526 * already been enabled, then it must use a firmware image which works 527 * around unaligned completion packets (ethp_z8e.dat), and it should 528 * also ensure that it never gives the device a Read-DMA which is 529 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 530 * enabled, then the driver should use the aligned (eth_z8e.dat) 531 * firmware image, and set tx_boundary to 4KB. 532 */ 533 534 static int 535 mxge_firmware_probe(mxge_softc_t *sc) 536 { 537 device_t dev = sc->dev; 538 int reg, status; 539 uint16_t pectl; 540 541 sc->tx_boundary = 4096; 542 /* 543 * Verify the max read request size was set to 4KB 544 * before trying the test with 4KB. 545 */ 546 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 547 pectl = pci_read_config(dev, reg + 0x8, 2); 548 if ((pectl & (5 << 12)) != (5 << 12)) { 549 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 550 pectl); 551 sc->tx_boundary = 2048; 552 } 553 } 554 555 /* 556 * load the optimized firmware (which assumes aligned PCIe 557 * completions) in order to see if it works on this host. 558 */ 559 sc->fw_name = mxge_fw_aligned; 560 status = mxge_load_firmware(sc, 1); 561 if (status != 0) { 562 return status; 563 } 564 565 /* 566 * Enable ECRC if possible 567 */ 568 mxge_enable_nvidia_ecrc(sc); 569 570 /* 571 * Run a DMA test which watches for unaligned completions and 572 * aborts on the first one seen. 573 */ 574 575 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 576 if (status == 0) 577 return 0; /* keep the aligned firmware */ 578 579 if (status != E2BIG) 580 device_printf(dev, "DMA test failed: %d\n", status); 581 if (status == ENOSYS) 582 device_printf(dev, "Falling back to ethp! " 583 "Please install up to date fw\n"); 584 return status; 585 } 586 587 static int 588 mxge_select_firmware(mxge_softc_t *sc) 589 { 590 int aligned = 0; 591 592 593 if (mxge_force_firmware != 0) { 594 if (mxge_force_firmware == 1) 595 aligned = 1; 596 else 597 aligned = 0; 598 if (mxge_verbose) 599 device_printf(sc->dev, 600 "Assuming %s completions (forced)\n", 601 aligned ? "aligned" : "unaligned"); 602 goto abort; 603 } 604 605 /* if the PCIe link width is 4 or less, we can use the aligned 606 firmware and skip any checks */ 607 if (sc->link_width != 0 && sc->link_width <= 4) { 608 device_printf(sc->dev, 609 "PCIe x%d Link, expect reduced performance\n", 610 sc->link_width); 611 aligned = 1; 612 goto abort; 613 } 614 615 if (0 == mxge_firmware_probe(sc)) 616 return 0; 617 618 abort: 619 if (aligned) { 620 sc->fw_name = mxge_fw_aligned; 621 sc->tx_boundary = 4096; 622 } else { 623 sc->fw_name = mxge_fw_unaligned; 624 sc->tx_boundary = 2048; 625 } 626 return (mxge_load_firmware(sc, 0)); 627 } 628 629 union qualhack 630 { 631 const char *ro_char; 632 char *rw_char; 633 }; 634 635 static int 636 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 637 { 638 639 640 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 641 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 642 be32toh(hdr->mcp_type)); 643 return EIO; 644 } 645 646 /* save firmware version for sysctl */ 647 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 648 if (mxge_verbose) 649 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 650 651 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 652 &sc->fw_ver_minor, &sc->fw_ver_tiny); 653 654 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 655 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 656 device_printf(sc->dev, "Found firmware version %s\n", 657 sc->fw_version); 658 device_printf(sc->dev, "Driver needs %d.%d\n", 659 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 660 return EINVAL; 661 } 662 return 0; 663 664 } 665 666 static void * 667 z_alloc(void *nil, u_int items, u_int size) 668 { 669 void *ptr; 670 671 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 672 return ptr; 673 } 674 675 static void 676 z_free(void *nil, void *ptr) 677 { 678 free(ptr, M_TEMP); 679 } 680 681 682 static int 683 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 684 { 685 z_stream zs; 686 char *inflate_buffer; 687 const struct firmware *fw; 688 const mcp_gen_header_t *hdr; 689 unsigned hdr_offset; 690 int status; 691 unsigned int i; 692 char dummy; 693 size_t fw_len; 694 695 fw = firmware_get(sc->fw_name); 696 if (fw == NULL) { 697 device_printf(sc->dev, "Could not find firmware image %s\n", 698 sc->fw_name); 699 return ENOENT; 700 } 701 702 703 704 /* setup zlib and decompress f/w */ 705 bzero(&zs, sizeof (zs)); 706 zs.zalloc = z_alloc; 707 zs.zfree = z_free; 708 status = inflateInit(&zs); 709 if (status != Z_OK) { 710 status = EIO; 711 goto abort_with_fw; 712 } 713 714 /* the uncompressed size is stored as the firmware version, 715 which would otherwise go unused */ 716 fw_len = (size_t) fw->version; 717 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 718 if (inflate_buffer == NULL) 719 goto abort_with_zs; 720 zs.avail_in = fw->datasize; 721 zs.next_in = __DECONST(char *, fw->data); 722 zs.avail_out = fw_len; 723 zs.next_out = inflate_buffer; 724 status = inflate(&zs, Z_FINISH); 725 if (status != Z_STREAM_END) { 726 device_printf(sc->dev, "zlib %d\n", status); 727 status = EIO; 728 goto abort_with_buffer; 729 } 730 731 /* check id */ 732 hdr_offset = htobe32(*(const uint32_t *) 733 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 734 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 735 device_printf(sc->dev, "Bad firmware file"); 736 status = EIO; 737 goto abort_with_buffer; 738 } 739 hdr = (const void*)(inflate_buffer + hdr_offset); 740 741 status = mxge_validate_firmware(sc, hdr); 742 if (status != 0) 743 goto abort_with_buffer; 744 745 /* Copy the inflated firmware to NIC SRAM. */ 746 for (i = 0; i < fw_len; i += 256) { 747 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 748 inflate_buffer + i, 749 min(256U, (unsigned)(fw_len - i))); 750 wmb(); 751 dummy = *sc->sram; 752 wmb(); 753 } 754 755 *limit = fw_len; 756 status = 0; 757 abort_with_buffer: 758 free(inflate_buffer, M_TEMP); 759 abort_with_zs: 760 inflateEnd(&zs); 761 abort_with_fw: 762 firmware_put(fw, FIRMWARE_UNLOAD); 763 return status; 764 } 765 766 /* 767 * Enable or disable periodic RDMAs from the host to make certain 768 * chipsets resend dropped PCIe messages 769 */ 770 771 static void 772 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 773 { 774 char buf_bytes[72]; 775 volatile uint32_t *confirm; 776 volatile char *submit; 777 uint32_t *buf, dma_low, dma_high; 778 int i; 779 780 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 781 782 /* clear confirmation addr */ 783 confirm = (volatile uint32_t *)sc->cmd; 784 *confirm = 0; 785 wmb(); 786 787 /* send an rdma command to the PCIe engine, and wait for the 788 response in the confirmation address. The firmware should 789 write a -1 there to indicate it is alive and well 790 */ 791 792 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 793 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 794 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 795 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 796 buf[2] = htobe32(0xffffffff); /* confirm data */ 797 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 799 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 800 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 801 buf[5] = htobe32(enable); /* enable? */ 802 803 804 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 805 806 mxge_pio_copy(submit, buf, 64); 807 wmb(); 808 DELAY(1000); 809 wmb(); 810 i = 0; 811 while (*confirm != 0xffffffff && i < 20) { 812 DELAY(1000); 813 i++; 814 } 815 if (*confirm != 0xffffffff) { 816 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 817 (enable ? "enable" : "disable"), confirm, 818 *confirm); 819 } 820 return; 821 } 822 823 static int 824 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 825 { 826 mcp_cmd_t *buf; 827 char buf_bytes[sizeof(*buf) + 8]; 828 volatile mcp_cmd_response_t *response = sc->cmd; 829 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 830 uint32_t dma_low, dma_high; 831 int err, sleep_total = 0; 832 833 /* ensure buf is aligned to 8 bytes */ 834 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 835 836 buf->data0 = htobe32(data->data0); 837 buf->data1 = htobe32(data->data1); 838 buf->data2 = htobe32(data->data2); 839 buf->cmd = htobe32(cmd); 840 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 841 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 842 843 buf->response_addr.low = htobe32(dma_low); 844 buf->response_addr.high = htobe32(dma_high); 845 mtx_lock(&sc->cmd_mtx); 846 response->result = 0xffffffff; 847 wmb(); 848 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 849 850 /* wait up to 20ms */ 851 err = EAGAIN; 852 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 853 bus_dmamap_sync(sc->cmd_dma.dmat, 854 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 855 wmb(); 856 switch (be32toh(response->result)) { 857 case 0: 858 data->data0 = be32toh(response->data); 859 err = 0; 860 break; 861 case 0xffffffff: 862 DELAY(1000); 863 break; 864 case MXGEFW_CMD_UNKNOWN: 865 err = ENOSYS; 866 break; 867 case MXGEFW_CMD_ERROR_UNALIGNED: 868 err = E2BIG; 869 break; 870 case MXGEFW_CMD_ERROR_BUSY: 871 err = EBUSY; 872 break; 873 default: 874 device_printf(sc->dev, 875 "mxge: command %d " 876 "failed, result = %d\n", 877 cmd, be32toh(response->result)); 878 err = ENXIO; 879 break; 880 } 881 if (err != EAGAIN) 882 break; 883 } 884 if (err == EAGAIN) 885 device_printf(sc->dev, "mxge: command %d timed out" 886 "result = %d\n", 887 cmd, be32toh(response->result)); 888 mtx_unlock(&sc->cmd_mtx); 889 return err; 890 } 891 892 static int 893 mxge_adopt_running_firmware(mxge_softc_t *sc) 894 { 895 struct mcp_gen_header *hdr; 896 const size_t bytes = sizeof (struct mcp_gen_header); 897 size_t hdr_offset; 898 int status; 899 900 /* find running firmware header */ 901 hdr_offset = htobe32(*(volatile uint32_t *) 902 (sc->sram + MCP_HEADER_PTR_OFFSET)); 903 904 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 905 device_printf(sc->dev, 906 "Running firmware has bad header offset (%d)\n", 907 (int)hdr_offset); 908 return EIO; 909 } 910 911 /* copy header of running firmware from SRAM to host memory to 912 * validate firmware */ 913 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 914 if (hdr == NULL) { 915 device_printf(sc->dev, "could not malloc firmware hdr\n"); 916 return ENOMEM; 917 } 918 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 919 rman_get_bushandle(sc->mem_res), 920 hdr_offset, (char *)hdr, bytes); 921 status = mxge_validate_firmware(sc, hdr); 922 free(hdr, M_DEVBUF); 923 924 /* 925 * check to see if adopted firmware has bug where adopting 926 * it will cause broadcasts to be filtered unless the NIC 927 * is kept in ALLMULTI mode 928 */ 929 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 930 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 931 sc->adopted_rx_filter_bug = 1; 932 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 933 "working around rx filter bug\n", 934 sc->fw_ver_major, sc->fw_ver_minor, 935 sc->fw_ver_tiny); 936 } 937 938 return status; 939 } 940 941 942 static int 943 mxge_load_firmware(mxge_softc_t *sc, int adopt) 944 { 945 volatile uint32_t *confirm; 946 volatile char *submit; 947 char buf_bytes[72]; 948 uint32_t *buf, size, dma_low, dma_high; 949 int status, i; 950 951 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 952 953 size = sc->sram_size; 954 status = mxge_load_firmware_helper(sc, &size); 955 if (status) { 956 if (!adopt) 957 return status; 958 /* Try to use the currently running firmware, if 959 it is new enough */ 960 status = mxge_adopt_running_firmware(sc); 961 if (status) { 962 device_printf(sc->dev, 963 "failed to adopt running firmware\n"); 964 return status; 965 } 966 device_printf(sc->dev, 967 "Successfully adopted running firmware\n"); 968 if (sc->tx_boundary == 4096) { 969 device_printf(sc->dev, 970 "Using firmware currently running on NIC" 971 ". For optimal\n"); 972 device_printf(sc->dev, 973 "performance consider loading optimized " 974 "firmware\n"); 975 } 976 sc->fw_name = mxge_fw_unaligned; 977 sc->tx_boundary = 2048; 978 return 0; 979 } 980 /* clear confirmation addr */ 981 confirm = (volatile uint32_t *)sc->cmd; 982 *confirm = 0; 983 wmb(); 984 /* send a reload command to the bootstrap MCP, and wait for the 985 response in the confirmation address. The firmware should 986 write a -1 there to indicate it is alive and well 987 */ 988 989 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 990 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 991 992 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 993 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 994 buf[2] = htobe32(0xffffffff); /* confirm data */ 995 996 /* FIX: All newest firmware should un-protect the bottom of 997 the sram before handoff. However, the very first interfaces 998 do not. Therefore the handoff copy must skip the first 8 bytes 999 */ 1000 /* where the code starts*/ 1001 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1002 buf[4] = htobe32(size - 8); /* length of code */ 1003 buf[5] = htobe32(8); /* where to copy to */ 1004 buf[6] = htobe32(0); /* where to jump to */ 1005 1006 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1007 mxge_pio_copy(submit, buf, 64); 1008 wmb(); 1009 DELAY(1000); 1010 wmb(); 1011 i = 0; 1012 while (*confirm != 0xffffffff && i < 20) { 1013 DELAY(1000*10); 1014 i++; 1015 bus_dmamap_sync(sc->cmd_dma.dmat, 1016 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1017 } 1018 if (*confirm != 0xffffffff) { 1019 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1020 confirm, *confirm); 1021 1022 return ENXIO; 1023 } 1024 return 0; 1025 } 1026 1027 static int 1028 mxge_update_mac_address(mxge_softc_t *sc) 1029 { 1030 mxge_cmd_t cmd; 1031 uint8_t *addr = sc->mac_addr; 1032 int status; 1033 1034 1035 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1036 | (addr[2] << 8) | addr[3]); 1037 1038 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1039 1040 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1041 return status; 1042 } 1043 1044 static int 1045 mxge_change_pause(mxge_softc_t *sc, int pause) 1046 { 1047 mxge_cmd_t cmd; 1048 int status; 1049 1050 if (pause) 1051 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1052 &cmd); 1053 else 1054 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1055 &cmd); 1056 1057 if (status) { 1058 device_printf(sc->dev, "Failed to set flow control mode\n"); 1059 return ENXIO; 1060 } 1061 sc->pause = pause; 1062 return 0; 1063 } 1064 1065 static void 1066 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1067 { 1068 mxge_cmd_t cmd; 1069 int status; 1070 1071 if (mxge_always_promisc) 1072 promisc = 1; 1073 1074 if (promisc) 1075 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1076 &cmd); 1077 else 1078 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1079 &cmd); 1080 1081 if (status) { 1082 device_printf(sc->dev, "Failed to set promisc mode\n"); 1083 } 1084 } 1085 1086 static void 1087 mxge_set_multicast_list(mxge_softc_t *sc) 1088 { 1089 mxge_cmd_t cmd; 1090 struct ifmultiaddr *ifma; 1091 struct ifnet *ifp = sc->ifp; 1092 int err; 1093 1094 /* This firmware is known to not support multicast */ 1095 if (!sc->fw_multicast_support) 1096 return; 1097 1098 /* Disable multicast filtering while we play with the lists*/ 1099 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1100 if (err != 0) { 1101 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1102 " error status: %d\n", err); 1103 return; 1104 } 1105 1106 if (sc->adopted_rx_filter_bug) 1107 return; 1108 1109 if (ifp->if_flags & IFF_ALLMULTI) 1110 /* request to disable multicast filtering, so quit here */ 1111 return; 1112 1113 /* Flush all the filters */ 1114 1115 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, 1118 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1119 ", error status: %d\n", err); 1120 return; 1121 } 1122 1123 /* Walk the multicast list, and add each address */ 1124 1125 IF_ADDR_LOCK(ifp); 1126 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1127 if (ifma->ifma_addr->sa_family != AF_LINK) 1128 continue; 1129 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1130 &cmd.data0, 4); 1131 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1132 &cmd.data1, 2); 1133 cmd.data0 = htonl(cmd.data0); 1134 cmd.data1 = htonl(cmd.data1); 1135 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1136 if (err != 0) { 1137 device_printf(sc->dev, "Failed " 1138 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1139 "%d\t", err); 1140 /* abort, leaving multicast filtering off */ 1141 IF_ADDR_UNLOCK(ifp); 1142 return; 1143 } 1144 } 1145 IF_ADDR_UNLOCK(ifp); 1146 /* Enable multicast filtering */ 1147 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1148 if (err != 0) { 1149 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1150 ", error status: %d\n", err); 1151 } 1152 } 1153 1154 static int 1155 mxge_max_mtu(mxge_softc_t *sc) 1156 { 1157 mxge_cmd_t cmd; 1158 int status; 1159 1160 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1161 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1162 1163 /* try to set nbufs to see if it we can 1164 use virtually contiguous jumbos */ 1165 cmd.data0 = 0; 1166 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1167 &cmd); 1168 if (status == 0) 1169 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1170 1171 /* otherwise, we're limited to MJUMPAGESIZE */ 1172 return MJUMPAGESIZE - MXGEFW_PAD; 1173 } 1174 1175 static int 1176 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1177 { 1178 struct mxge_slice_state *ss; 1179 mxge_rx_done_t *rx_done; 1180 volatile uint32_t *irq_claim; 1181 mxge_cmd_t cmd; 1182 int slice, status; 1183 1184 /* try to send a reset command to the card to see if it 1185 is alive */ 1186 memset(&cmd, 0, sizeof (cmd)); 1187 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1188 if (status != 0) { 1189 device_printf(sc->dev, "failed reset\n"); 1190 return ENXIO; 1191 } 1192 1193 mxge_dummy_rdma(sc, 1); 1194 1195 1196 /* set the intrq size */ 1197 cmd.data0 = sc->rx_ring_size; 1198 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1199 1200 /* 1201 * Even though we already know how many slices are supported 1202 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1203 * has magic side effects, and must be called after a reset. 1204 * It must be called prior to calling any RSS related cmds, 1205 * including assigning an interrupt queue for anything but 1206 * slice 0. It must also be called *after* 1207 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1208 * the firmware to compute offsets. 1209 */ 1210 1211 if (sc->num_slices > 1) { 1212 /* ask the maximum number of slices it supports */ 1213 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1214 &cmd); 1215 if (status != 0) { 1216 device_printf(sc->dev, 1217 "failed to get number of slices\n"); 1218 return status; 1219 } 1220 /* 1221 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1222 * to setting up the interrupt queue DMA 1223 */ 1224 cmd.data0 = sc->num_slices; 1225 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to set number of slices\n"); 1231 return status; 1232 } 1233 } 1234 1235 1236 if (interrupts_setup) { 1237 /* Now exchange information about interrupts */ 1238 for (slice = 0; slice < sc->num_slices; slice++) { 1239 rx_done = &sc->ss[slice].rx_done; 1240 memset(rx_done->entry, 0, sc->rx_ring_size); 1241 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1242 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1243 cmd.data2 = slice; 1244 status |= mxge_send_cmd(sc, 1245 MXGEFW_CMD_SET_INTRQ_DMA, 1246 &cmd); 1247 } 1248 } 1249 1250 status |= mxge_send_cmd(sc, 1251 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1252 1253 1254 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1255 1256 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1257 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1258 1259 1260 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1261 &cmd); 1262 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1263 if (status != 0) { 1264 device_printf(sc->dev, "failed set interrupt parameters\n"); 1265 return status; 1266 } 1267 1268 1269 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1270 1271 1272 /* run a DMA benchmark */ 1273 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1274 1275 for (slice = 0; slice < sc->num_slices; slice++) { 1276 ss = &sc->ss[slice]; 1277 1278 ss->irq_claim = irq_claim + (2 * slice); 1279 /* reset mcp/driver shared state back to 0 */ 1280 ss->rx_done.idx = 0; 1281 ss->rx_done.cnt = 0; 1282 ss->tx.req = 0; 1283 ss->tx.done = 0; 1284 ss->tx.pkt_done = 0; 1285 ss->tx.wake = 0; 1286 ss->tx.defrag = 0; 1287 ss->tx.stall = 0; 1288 ss->rx_big.cnt = 0; 1289 ss->rx_small.cnt = 0; 1290 ss->lro_bad_csum = 0; 1291 ss->lro_queued = 0; 1292 ss->lro_flushed = 0; 1293 if (ss->fw_stats != NULL) { 1294 ss->fw_stats->valid = 0; 1295 ss->fw_stats->send_done_count = 0; 1296 } 1297 } 1298 sc->rdma_tags_available = 15; 1299 status = mxge_update_mac_address(sc); 1300 mxge_change_promisc(sc, 0); 1301 mxge_change_pause(sc, sc->pause); 1302 mxge_set_multicast_list(sc); 1303 return status; 1304 } 1305 1306 static int 1307 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1308 { 1309 mxge_softc_t *sc; 1310 unsigned int intr_coal_delay; 1311 int err; 1312 1313 sc = arg1; 1314 intr_coal_delay = sc->intr_coal_delay; 1315 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1316 if (err != 0) { 1317 return err; 1318 } 1319 if (intr_coal_delay == sc->intr_coal_delay) 1320 return 0; 1321 1322 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1323 return EINVAL; 1324 1325 mtx_lock(&sc->driver_mtx); 1326 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1327 sc->intr_coal_delay = intr_coal_delay; 1328 1329 mtx_unlock(&sc->driver_mtx); 1330 return err; 1331 } 1332 1333 static int 1334 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1335 { 1336 mxge_softc_t *sc; 1337 unsigned int enabled; 1338 int err; 1339 1340 sc = arg1; 1341 enabled = sc->pause; 1342 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 if (enabled == sc->pause) 1347 return 0; 1348 1349 mtx_lock(&sc->driver_mtx); 1350 err = mxge_change_pause(sc, enabled); 1351 mtx_unlock(&sc->driver_mtx); 1352 return err; 1353 } 1354 1355 static int 1356 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1357 { 1358 struct ifnet *ifp; 1359 int err = 0; 1360 1361 ifp = sc->ifp; 1362 if (lro_cnt == 0) 1363 ifp->if_capenable &= ~IFCAP_LRO; 1364 else 1365 ifp->if_capenable |= IFCAP_LRO; 1366 sc->lro_cnt = lro_cnt; 1367 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1368 mxge_close(sc); 1369 err = mxge_open(sc); 1370 } 1371 return err; 1372 } 1373 1374 static int 1375 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1376 { 1377 mxge_softc_t *sc; 1378 unsigned int lro_cnt; 1379 int err; 1380 1381 sc = arg1; 1382 lro_cnt = sc->lro_cnt; 1383 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1384 if (err != 0) 1385 return err; 1386 1387 if (lro_cnt == sc->lro_cnt) 1388 return 0; 1389 1390 if (lro_cnt > 128) 1391 return EINVAL; 1392 1393 mtx_lock(&sc->driver_mtx); 1394 err = mxge_change_lro_locked(sc, lro_cnt); 1395 mtx_unlock(&sc->driver_mtx); 1396 return err; 1397 } 1398 1399 static int 1400 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1401 { 1402 int err; 1403 1404 if (arg1 == NULL) 1405 return EFAULT; 1406 arg2 = be32toh(*(int *)arg1); 1407 arg1 = NULL; 1408 err = sysctl_handle_int(oidp, arg1, arg2, req); 1409 1410 return err; 1411 } 1412 1413 static void 1414 mxge_rem_sysctls(mxge_softc_t *sc) 1415 { 1416 struct mxge_slice_state *ss; 1417 int slice; 1418 1419 if (sc->slice_sysctl_tree == NULL) 1420 return; 1421 1422 for (slice = 0; slice < sc->num_slices; slice++) { 1423 ss = &sc->ss[slice]; 1424 if (ss == NULL || ss->sysctl_tree == NULL) 1425 continue; 1426 sysctl_ctx_free(&ss->sysctl_ctx); 1427 ss->sysctl_tree = NULL; 1428 } 1429 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1430 sc->slice_sysctl_tree = NULL; 1431 } 1432 1433 static void 1434 mxge_add_sysctls(mxge_softc_t *sc) 1435 { 1436 struct sysctl_ctx_list *ctx; 1437 struct sysctl_oid_list *children; 1438 mcp_irq_data_t *fw; 1439 struct mxge_slice_state *ss; 1440 int slice; 1441 char slice_num[8]; 1442 1443 ctx = device_get_sysctl_ctx(sc->dev); 1444 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1445 fw = sc->ss[0].fw_stats; 1446 1447 /* random information */ 1448 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1449 "firmware_version", 1450 CTLFLAG_RD, &sc->fw_version, 1451 0, "firmware version"); 1452 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1453 "serial_number", 1454 CTLFLAG_RD, &sc->serial_number_string, 1455 0, "serial number"); 1456 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1457 "product_code", 1458 CTLFLAG_RD, &sc->product_code_string, 1459 0, "product_code"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "pcie_link_width", 1462 CTLFLAG_RD, &sc->link_width, 1463 0, "tx_boundary"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "tx_boundary", 1466 CTLFLAG_RD, &sc->tx_boundary, 1467 0, "tx_boundary"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "write_combine", 1470 CTLFLAG_RD, &sc->wc, 1471 0, "write combining PIO?"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "read_dma_MBs", 1474 CTLFLAG_RD, &sc->read_dma, 1475 0, "DMA Read speed in MB/s"); 1476 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1477 "write_dma_MBs", 1478 CTLFLAG_RD, &sc->write_dma, 1479 0, "DMA Write speed in MB/s"); 1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1481 "read_write_dma_MBs", 1482 CTLFLAG_RD, &sc->read_write_dma, 1483 0, "DMA concurrent Read/Write speed in MB/s"); 1484 1485 1486 /* performance related tunables */ 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1488 "intr_coal_delay", 1489 CTLTYPE_INT|CTLFLAG_RW, sc, 1490 0, mxge_change_intr_coal, 1491 "I", "interrupt coalescing delay in usecs"); 1492 1493 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1494 "flow_control_enabled", 1495 CTLTYPE_INT|CTLFLAG_RW, sc, 1496 0, mxge_change_flow_control, 1497 "I", "interrupt coalescing delay in usecs"); 1498 1499 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1500 "deassert_wait", 1501 CTLFLAG_RW, &mxge_deassert_wait, 1502 0, "Wait for IRQ line to go low in ihandler"); 1503 1504 /* stats block from firmware is in network byte order. 1505 Need to swap it */ 1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1507 "link_up", 1508 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1509 0, mxge_handle_be32, 1510 "I", "link up"); 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "rdma_tags_available", 1513 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1514 0, mxge_handle_be32, 1515 "I", "rdma_tags_available"); 1516 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1517 "dropped_bad_crc32", 1518 CTLTYPE_INT|CTLFLAG_RD, 1519 &fw->dropped_bad_crc32, 1520 0, mxge_handle_be32, 1521 "I", "dropped_bad_crc32"); 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "dropped_bad_phy", 1524 CTLTYPE_INT|CTLFLAG_RD, 1525 &fw->dropped_bad_phy, 1526 0, mxge_handle_be32, 1527 "I", "dropped_bad_phy"); 1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1529 "dropped_link_error_or_filtered", 1530 CTLTYPE_INT|CTLFLAG_RD, 1531 &fw->dropped_link_error_or_filtered, 1532 0, mxge_handle_be32, 1533 "I", "dropped_link_error_or_filtered"); 1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1535 "dropped_link_overflow", 1536 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1537 0, mxge_handle_be32, 1538 "I", "dropped_link_overflow"); 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "dropped_multicast_filtered", 1541 CTLTYPE_INT|CTLFLAG_RD, 1542 &fw->dropped_multicast_filtered, 1543 0, mxge_handle_be32, 1544 "I", "dropped_multicast_filtered"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_no_big_buffer", 1547 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1548 0, mxge_handle_be32, 1549 "I", "dropped_no_big_buffer"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_no_small_buffer", 1552 CTLTYPE_INT|CTLFLAG_RD, 1553 &fw->dropped_no_small_buffer, 1554 0, mxge_handle_be32, 1555 "I", "dropped_no_small_buffer"); 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "dropped_overrun", 1558 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1559 0, mxge_handle_be32, 1560 "I", "dropped_overrun"); 1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1562 "dropped_pause", 1563 CTLTYPE_INT|CTLFLAG_RD, 1564 &fw->dropped_pause, 1565 0, mxge_handle_be32, 1566 "I", "dropped_pause"); 1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1568 "dropped_runt", 1569 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1570 0, mxge_handle_be32, 1571 "I", "dropped_runt"); 1572 1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1574 "dropped_unicast_filtered", 1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1576 0, mxge_handle_be32, 1577 "I", "dropped_unicast_filtered"); 1578 1579 /* verbose printing? */ 1580 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1581 "verbose", 1582 CTLFLAG_RW, &mxge_verbose, 1583 0, "verbose printing"); 1584 1585 /* lro */ 1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1587 "lro_cnt", 1588 CTLTYPE_INT|CTLFLAG_RW, sc, 1589 0, mxge_change_lro, 1590 "I", "number of lro merge queues"); 1591 1592 1593 /* add counters exported for debugging from all slices */ 1594 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1595 sc->slice_sysctl_tree = 1596 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1597 "slice", CTLFLAG_RD, 0, ""); 1598 1599 for (slice = 0; slice < sc->num_slices; slice++) { 1600 ss = &sc->ss[slice]; 1601 sysctl_ctx_init(&ss->sysctl_ctx); 1602 ctx = &ss->sysctl_ctx; 1603 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1604 sprintf(slice_num, "%d", slice); 1605 ss->sysctl_tree = 1606 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1607 CTLFLAG_RD, 0, ""); 1608 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1610 "rx_small_cnt", 1611 CTLFLAG_RD, &ss->rx_small.cnt, 1612 0, "rx_small_cnt"); 1613 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1614 "rx_big_cnt", 1615 CTLFLAG_RD, &ss->rx_big.cnt, 1616 0, "rx_small_cnt"); 1617 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1618 "tx_req", 1619 CTLFLAG_RD, &ss->tx.req, 1620 0, "tx_req"); 1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1622 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1623 0, "number of lro merge queues flushed"); 1624 1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1626 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1627 0, "number of frames appended to lro merge" 1628 "queues"); 1629 1630 /* only transmit from slice 0 for now */ 1631 if (slice > 0) 1632 continue; 1633 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "tx_done", 1636 CTLFLAG_RD, &ss->tx.done, 1637 0, "tx_done"); 1638 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1639 "tx_pkt_done", 1640 CTLFLAG_RD, &ss->tx.pkt_done, 1641 0, "tx_done"); 1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1643 "tx_stall", 1644 CTLFLAG_RD, &ss->tx.stall, 1645 0, "tx_stall"); 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "tx_wake", 1648 CTLFLAG_RD, &ss->tx.wake, 1649 0, "tx_wake"); 1650 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1651 "tx_defrag", 1652 CTLFLAG_RD, &ss->tx.defrag, 1653 0, "tx_defrag"); 1654 } 1655 } 1656 1657 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1658 backwards one at a time and handle ring wraps */ 1659 1660 static inline void 1661 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1662 mcp_kreq_ether_send_t *src, int cnt) 1663 { 1664 int idx, starting_slot; 1665 starting_slot = tx->req; 1666 while (cnt > 1) { 1667 cnt--; 1668 idx = (starting_slot + cnt) & tx->mask; 1669 mxge_pio_copy(&tx->lanai[idx], 1670 &src[cnt], sizeof(*src)); 1671 wmb(); 1672 } 1673 } 1674 1675 /* 1676 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1677 * at most 32 bytes at a time, so as to avoid involving the software 1678 * pio handler in the nic. We re-write the first segment's flags 1679 * to mark them valid only after writing the entire chain 1680 */ 1681 1682 static inline void 1683 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1684 int cnt) 1685 { 1686 int idx, i; 1687 uint32_t *src_ints; 1688 volatile uint32_t *dst_ints; 1689 mcp_kreq_ether_send_t *srcp; 1690 volatile mcp_kreq_ether_send_t *dstp, *dst; 1691 uint8_t last_flags; 1692 1693 idx = tx->req & tx->mask; 1694 1695 last_flags = src->flags; 1696 src->flags = 0; 1697 wmb(); 1698 dst = dstp = &tx->lanai[idx]; 1699 srcp = src; 1700 1701 if ((idx + cnt) < tx->mask) { 1702 for (i = 0; i < (cnt - 1); i += 2) { 1703 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1704 wmb(); /* force write every 32 bytes */ 1705 srcp += 2; 1706 dstp += 2; 1707 } 1708 } else { 1709 /* submit all but the first request, and ensure 1710 that it is submitted below */ 1711 mxge_submit_req_backwards(tx, src, cnt); 1712 i = 0; 1713 } 1714 if (i < cnt) { 1715 /* submit the first request */ 1716 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1717 wmb(); /* barrier before setting valid flag */ 1718 } 1719 1720 /* re-write the last 32-bits with the valid flags */ 1721 src->flags = last_flags; 1722 src_ints = (uint32_t *)src; 1723 src_ints+=3; 1724 dst_ints = (volatile uint32_t *)dst; 1725 dst_ints+=3; 1726 *dst_ints = *src_ints; 1727 tx->req += cnt; 1728 wmb(); 1729 } 1730 1731 #if IFCAP_TSO4 1732 1733 static void 1734 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1735 int busdma_seg_cnt, int ip_off) 1736 { 1737 mxge_tx_ring_t *tx; 1738 mcp_kreq_ether_send_t *req; 1739 bus_dma_segment_t *seg; 1740 struct ip *ip; 1741 struct tcphdr *tcp; 1742 uint32_t low, high_swapped; 1743 int len, seglen, cum_len, cum_len_next; 1744 int next_is_first, chop, cnt, rdma_count, small; 1745 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1746 uint8_t flags, flags_next; 1747 static int once; 1748 1749 mss = m->m_pkthdr.tso_segsz; 1750 1751 /* negative cum_len signifies to the 1752 * send loop that we are still in the 1753 * header portion of the TSO packet. 1754 */ 1755 1756 /* ensure we have the ethernet, IP and TCP 1757 header together in the first mbuf, copy 1758 it to a scratch buffer if not */ 1759 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1760 m_copydata(m, 0, ip_off + sizeof (*ip), 1761 ss->scratch); 1762 ip = (struct ip *)(ss->scratch + ip_off); 1763 } else { 1764 ip = (struct ip *)(mtod(m, char *) + ip_off); 1765 } 1766 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1767 + sizeof (*tcp))) { 1768 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1769 + sizeof (*tcp), ss->scratch); 1770 ip = (struct ip *)(mtod(m, char *) + ip_off); 1771 } 1772 1773 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1774 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1775 1776 /* TSO implies checksum offload on this hardware */ 1777 cksum_offset = ip_off + (ip->ip_hl << 2); 1778 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1779 1780 1781 /* for TSO, pseudo_hdr_offset holds mss. 1782 * The firmware figures out where to put 1783 * the checksum by parsing the header. */ 1784 pseudo_hdr_offset = htobe16(mss); 1785 1786 tx = &ss->tx; 1787 req = tx->req_list; 1788 seg = tx->seg_list; 1789 cnt = 0; 1790 rdma_count = 0; 1791 /* "rdma_count" is the number of RDMAs belonging to the 1792 * current packet BEFORE the current send request. For 1793 * non-TSO packets, this is equal to "count". 1794 * For TSO packets, rdma_count needs to be reset 1795 * to 0 after a segment cut. 1796 * 1797 * The rdma_count field of the send request is 1798 * the number of RDMAs of the packet starting at 1799 * that request. For TSO send requests with one ore more cuts 1800 * in the middle, this is the number of RDMAs starting 1801 * after the last cut in the request. All previous 1802 * segments before the last cut implicitly have 1 RDMA. 1803 * 1804 * Since the number of RDMAs is not known beforehand, 1805 * it must be filled-in retroactively - after each 1806 * segmentation cut or at the end of the entire packet. 1807 */ 1808 1809 while (busdma_seg_cnt) { 1810 /* Break the busdma segment up into pieces*/ 1811 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1812 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1813 len = seg->ds_len; 1814 1815 while (len) { 1816 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1817 seglen = len; 1818 cum_len_next = cum_len + seglen; 1819 (req-rdma_count)->rdma_count = rdma_count + 1; 1820 if (__predict_true(cum_len >= 0)) { 1821 /* payload */ 1822 chop = (cum_len_next > mss); 1823 cum_len_next = cum_len_next % mss; 1824 next_is_first = (cum_len_next == 0); 1825 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1826 flags_next |= next_is_first * 1827 MXGEFW_FLAGS_FIRST; 1828 rdma_count |= -(chop | next_is_first); 1829 rdma_count += chop & !next_is_first; 1830 } else if (cum_len_next >= 0) { 1831 /* header ends */ 1832 rdma_count = -1; 1833 cum_len_next = 0; 1834 seglen = -cum_len; 1835 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1836 flags_next = MXGEFW_FLAGS_TSO_PLD | 1837 MXGEFW_FLAGS_FIRST | 1838 (small * MXGEFW_FLAGS_SMALL); 1839 } 1840 1841 req->addr_high = high_swapped; 1842 req->addr_low = htobe32(low); 1843 req->pseudo_hdr_offset = pseudo_hdr_offset; 1844 req->pad = 0; 1845 req->rdma_count = 1; 1846 req->length = htobe16(seglen); 1847 req->cksum_offset = cksum_offset; 1848 req->flags = flags | ((cum_len & 1) * 1849 MXGEFW_FLAGS_ALIGN_ODD); 1850 low += seglen; 1851 len -= seglen; 1852 cum_len = cum_len_next; 1853 flags = flags_next; 1854 req++; 1855 cnt++; 1856 rdma_count++; 1857 if (__predict_false(cksum_offset > seglen)) 1858 cksum_offset -= seglen; 1859 else 1860 cksum_offset = 0; 1861 if (__predict_false(cnt > tx->max_desc)) 1862 goto drop; 1863 } 1864 busdma_seg_cnt--; 1865 seg++; 1866 } 1867 (req-rdma_count)->rdma_count = rdma_count; 1868 1869 do { 1870 req--; 1871 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1872 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1873 1874 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1875 mxge_submit_req(tx, tx->req_list, cnt); 1876 return; 1877 1878 drop: 1879 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1880 m_freem(m); 1881 ss->sc->ifp->if_oerrors++; 1882 if (!once) { 1883 printf("tx->max_desc exceeded via TSO!\n"); 1884 printf("mss = %d, %ld, %d!\n", mss, 1885 (long)seg - (long)tx->seg_list, tx->max_desc); 1886 once = 1; 1887 } 1888 return; 1889 1890 } 1891 1892 #endif /* IFCAP_TSO4 */ 1893 1894 #ifdef MXGE_NEW_VLAN_API 1895 /* 1896 * We reproduce the software vlan tag insertion from 1897 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1898 * vlan tag insertion. We need to advertise this in order to have the 1899 * vlan interface respect our csum offload flags. 1900 */ 1901 static struct mbuf * 1902 mxge_vlan_tag_insert(struct mbuf *m) 1903 { 1904 struct ether_vlan_header *evl; 1905 1906 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1907 if (__predict_false(m == NULL)) 1908 return NULL; 1909 if (m->m_len < sizeof(*evl)) { 1910 m = m_pullup(m, sizeof(*evl)); 1911 if (__predict_false(m == NULL)) 1912 return NULL; 1913 } 1914 /* 1915 * Transform the Ethernet header into an Ethernet header 1916 * with 802.1Q encapsulation. 1917 */ 1918 evl = mtod(m, struct ether_vlan_header *); 1919 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1920 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1921 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1922 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1923 m->m_flags &= ~M_VLANTAG; 1924 return m; 1925 } 1926 #endif /* MXGE_NEW_VLAN_API */ 1927 1928 static void 1929 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1930 { 1931 mxge_softc_t *sc; 1932 mcp_kreq_ether_send_t *req; 1933 bus_dma_segment_t *seg; 1934 struct mbuf *m_tmp; 1935 struct ifnet *ifp; 1936 mxge_tx_ring_t *tx; 1937 struct ip *ip; 1938 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1939 uint16_t pseudo_hdr_offset; 1940 uint8_t flags, cksum_offset; 1941 1942 1943 sc = ss->sc; 1944 ifp = sc->ifp; 1945 tx = &ss->tx; 1946 1947 ip_off = sizeof (struct ether_header); 1948 #ifdef MXGE_NEW_VLAN_API 1949 if (m->m_flags & M_VLANTAG) { 1950 m = mxge_vlan_tag_insert(m); 1951 if (__predict_false(m == NULL)) 1952 goto drop; 1953 ip_off += ETHER_VLAN_ENCAP_LEN; 1954 } 1955 #endif 1956 /* (try to) map the frame for DMA */ 1957 idx = tx->req & tx->mask; 1958 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1959 m, tx->seg_list, &cnt, 1960 BUS_DMA_NOWAIT); 1961 if (__predict_false(err == EFBIG)) { 1962 /* Too many segments in the chain. Try 1963 to defrag */ 1964 m_tmp = m_defrag(m, M_NOWAIT); 1965 if (m_tmp == NULL) { 1966 goto drop; 1967 } 1968 ss->tx.defrag++; 1969 m = m_tmp; 1970 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1971 tx->info[idx].map, 1972 m, tx->seg_list, &cnt, 1973 BUS_DMA_NOWAIT); 1974 } 1975 if (__predict_false(err != 0)) { 1976 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1977 " packet len = %d\n", err, m->m_pkthdr.len); 1978 goto drop; 1979 } 1980 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1981 BUS_DMASYNC_PREWRITE); 1982 tx->info[idx].m = m; 1983 1984 #if IFCAP_TSO4 1985 /* TSO is different enough, we handle it in another routine */ 1986 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1987 mxge_encap_tso(ss, m, cnt, ip_off); 1988 return; 1989 } 1990 #endif 1991 1992 req = tx->req_list; 1993 cksum_offset = 0; 1994 pseudo_hdr_offset = 0; 1995 flags = MXGEFW_FLAGS_NO_TSO; 1996 1997 /* checksum offloading? */ 1998 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1999 /* ensure ip header is in first mbuf, copy 2000 it to a scratch buffer if not */ 2001 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2002 m_copydata(m, 0, ip_off + sizeof (*ip), 2003 ss->scratch); 2004 ip = (struct ip *)(ss->scratch + ip_off); 2005 } else { 2006 ip = (struct ip *)(mtod(m, char *) + ip_off); 2007 } 2008 cksum_offset = ip_off + (ip->ip_hl << 2); 2009 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2010 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2011 req->cksum_offset = cksum_offset; 2012 flags |= MXGEFW_FLAGS_CKSUM; 2013 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2014 } else { 2015 odd_flag = 0; 2016 } 2017 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2018 flags |= MXGEFW_FLAGS_SMALL; 2019 2020 /* convert segments into a request list */ 2021 cum_len = 0; 2022 seg = tx->seg_list; 2023 req->flags = MXGEFW_FLAGS_FIRST; 2024 for (i = 0; i < cnt; i++) { 2025 req->addr_low = 2026 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2027 req->addr_high = 2028 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2029 req->length = htobe16(seg->ds_len); 2030 req->cksum_offset = cksum_offset; 2031 if (cksum_offset > seg->ds_len) 2032 cksum_offset -= seg->ds_len; 2033 else 2034 cksum_offset = 0; 2035 req->pseudo_hdr_offset = pseudo_hdr_offset; 2036 req->pad = 0; /* complete solid 16-byte block */ 2037 req->rdma_count = 1; 2038 req->flags |= flags | ((cum_len & 1) * odd_flag); 2039 cum_len += seg->ds_len; 2040 seg++; 2041 req++; 2042 req->flags = 0; 2043 } 2044 req--; 2045 /* pad runts to 60 bytes */ 2046 if (cum_len < 60) { 2047 req++; 2048 req->addr_low = 2049 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2050 req->addr_high = 2051 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2052 req->length = htobe16(60 - cum_len); 2053 req->cksum_offset = 0; 2054 req->pseudo_hdr_offset = pseudo_hdr_offset; 2055 req->pad = 0; /* complete solid 16-byte block */ 2056 req->rdma_count = 1; 2057 req->flags |= flags | ((cum_len & 1) * odd_flag); 2058 cnt++; 2059 } 2060 2061 tx->req_list[0].rdma_count = cnt; 2062 #if 0 2063 /* print what the firmware will see */ 2064 for (i = 0; i < cnt; i++) { 2065 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2066 "cso:%d, flags:0x%x, rdma:%d\n", 2067 i, (int)ntohl(tx->req_list[i].addr_high), 2068 (int)ntohl(tx->req_list[i].addr_low), 2069 (int)ntohs(tx->req_list[i].length), 2070 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2071 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2072 tx->req_list[i].rdma_count); 2073 } 2074 printf("--------------\n"); 2075 #endif 2076 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2077 mxge_submit_req(tx, tx->req_list, cnt); 2078 return; 2079 2080 drop: 2081 m_freem(m); 2082 ifp->if_oerrors++; 2083 return; 2084 } 2085 2086 2087 2088 2089 static inline void 2090 mxge_start_locked(struct mxge_slice_state *ss) 2091 { 2092 mxge_softc_t *sc; 2093 struct mbuf *m; 2094 struct ifnet *ifp; 2095 mxge_tx_ring_t *tx; 2096 2097 sc = ss->sc; 2098 ifp = sc->ifp; 2099 tx = &ss->tx; 2100 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2101 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2102 if (m == NULL) { 2103 return; 2104 } 2105 /* let BPF see it */ 2106 BPF_MTAP(ifp, m); 2107 2108 /* give it to the nic */ 2109 mxge_encap(ss, m); 2110 } 2111 /* ran out of transmit slots */ 2112 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2113 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2114 tx->stall++; 2115 } 2116 } 2117 2118 static void 2119 mxge_start(struct ifnet *ifp) 2120 { 2121 mxge_softc_t *sc = ifp->if_softc; 2122 struct mxge_slice_state *ss; 2123 2124 /* only use the first slice for now */ 2125 ss = &sc->ss[0]; 2126 mtx_lock(&ss->tx.mtx); 2127 mxge_start_locked(ss); 2128 mtx_unlock(&ss->tx.mtx); 2129 } 2130 2131 /* 2132 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2133 * at most 32 bytes at a time, so as to avoid involving the software 2134 * pio handler in the nic. We re-write the first segment's low 2135 * DMA address to mark it valid only after we write the entire chunk 2136 * in a burst 2137 */ 2138 static inline void 2139 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2140 mcp_kreq_ether_recv_t *src) 2141 { 2142 uint32_t low; 2143 2144 low = src->addr_low; 2145 src->addr_low = 0xffffffff; 2146 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2147 wmb(); 2148 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2149 wmb(); 2150 src->addr_low = low; 2151 dst->addr_low = low; 2152 wmb(); 2153 } 2154 2155 static int 2156 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2157 { 2158 bus_dma_segment_t seg; 2159 struct mbuf *m; 2160 mxge_rx_ring_t *rx = &ss->rx_small; 2161 int cnt, err; 2162 2163 m = m_gethdr(M_DONTWAIT, MT_DATA); 2164 if (m == NULL) { 2165 rx->alloc_fail++; 2166 err = ENOBUFS; 2167 goto done; 2168 } 2169 m->m_len = MHLEN; 2170 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2171 &seg, &cnt, BUS_DMA_NOWAIT); 2172 if (err != 0) { 2173 m_free(m); 2174 goto done; 2175 } 2176 rx->info[idx].m = m; 2177 rx->shadow[idx].addr_low = 2178 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2179 rx->shadow[idx].addr_high = 2180 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2181 2182 done: 2183 if ((idx & 7) == 7) 2184 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2185 return err; 2186 } 2187 2188 static int 2189 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2190 { 2191 bus_dma_segment_t seg[3]; 2192 struct mbuf *m; 2193 mxge_rx_ring_t *rx = &ss->rx_big; 2194 int cnt, err, i; 2195 2196 if (rx->cl_size == MCLBYTES) 2197 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2198 else 2199 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2200 if (m == NULL) { 2201 rx->alloc_fail++; 2202 err = ENOBUFS; 2203 goto done; 2204 } 2205 m->m_len = rx->cl_size; 2206 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2207 seg, &cnt, BUS_DMA_NOWAIT); 2208 if (err != 0) { 2209 m_free(m); 2210 goto done; 2211 } 2212 rx->info[idx].m = m; 2213 rx->shadow[idx].addr_low = 2214 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2215 rx->shadow[idx].addr_high = 2216 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2217 2218 #if MXGE_VIRT_JUMBOS 2219 for (i = 1; i < cnt; i++) { 2220 rx->shadow[idx + i].addr_low = 2221 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2222 rx->shadow[idx + i].addr_high = 2223 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2224 } 2225 #endif 2226 2227 done: 2228 for (i = 0; i < rx->nbufs; i++) { 2229 if ((idx & 7) == 7) { 2230 mxge_submit_8rx(&rx->lanai[idx - 7], 2231 &rx->shadow[idx - 7]); 2232 } 2233 idx++; 2234 } 2235 return err; 2236 } 2237 2238 /* 2239 * Myri10GE hardware checksums are not valid if the sender 2240 * padded the frame with non-zero padding. This is because 2241 * the firmware just does a simple 16-bit 1s complement 2242 * checksum across the entire frame, excluding the first 14 2243 * bytes. It is best to simply to check the checksum and 2244 * tell the stack about it only if the checksum is good 2245 */ 2246 2247 static inline uint16_t 2248 mxge_rx_csum(struct mbuf *m, int csum) 2249 { 2250 struct ether_header *eh; 2251 struct ip *ip; 2252 uint16_t c; 2253 2254 eh = mtod(m, struct ether_header *); 2255 2256 /* only deal with IPv4 TCP & UDP for now */ 2257 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2258 return 1; 2259 ip = (struct ip *)(eh + 1); 2260 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2261 ip->ip_p != IPPROTO_UDP)) 2262 return 1; 2263 2264 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2265 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2266 - (ip->ip_hl << 2) + ip->ip_p)); 2267 c ^= 0xffff; 2268 return (c); 2269 } 2270 2271 static void 2272 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2273 { 2274 struct ether_vlan_header *evl; 2275 struct ether_header *eh; 2276 uint32_t partial; 2277 2278 evl = mtod(m, struct ether_vlan_header *); 2279 eh = mtod(m, struct ether_header *); 2280 2281 /* 2282 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2283 * after what the firmware thought was the end of the ethernet 2284 * header. 2285 */ 2286 2287 /* put checksum into host byte order */ 2288 *csum = ntohs(*csum); 2289 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2290 (*csum) += ~partial; 2291 (*csum) += ((*csum) < ~partial); 2292 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2293 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2294 2295 /* restore checksum to network byte order; 2296 later consumers expect this */ 2297 *csum = htons(*csum); 2298 2299 /* save the tag */ 2300 #ifdef MXGE_NEW_VLAN_API 2301 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2302 #else 2303 { 2304 struct m_tag *mtag; 2305 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2306 M_NOWAIT); 2307 if (mtag == NULL) 2308 return; 2309 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2310 m_tag_prepend(m, mtag); 2311 } 2312 2313 #endif 2314 m->m_flags |= M_VLANTAG; 2315 2316 /* 2317 * Remove the 802.1q header by copying the Ethernet 2318 * addresses over it and adjusting the beginning of 2319 * the data in the mbuf. The encapsulated Ethernet 2320 * type field is already in place. 2321 */ 2322 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2323 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2324 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2325 } 2326 2327 2328 static inline void 2329 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2330 { 2331 mxge_softc_t *sc; 2332 struct ifnet *ifp; 2333 struct mbuf *m; 2334 struct ether_header *eh; 2335 mxge_rx_ring_t *rx; 2336 bus_dmamap_t old_map; 2337 int idx; 2338 uint16_t tcpudp_csum; 2339 2340 sc = ss->sc; 2341 ifp = sc->ifp; 2342 rx = &ss->rx_big; 2343 idx = rx->cnt & rx->mask; 2344 rx->cnt += rx->nbufs; 2345 /* save a pointer to the received mbuf */ 2346 m = rx->info[idx].m; 2347 /* try to replace the received mbuf */ 2348 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2349 /* drop the frame -- the old mbuf is re-cycled */ 2350 ifp->if_ierrors++; 2351 return; 2352 } 2353 2354 /* unmap the received buffer */ 2355 old_map = rx->info[idx].map; 2356 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2357 bus_dmamap_unload(rx->dmat, old_map); 2358 2359 /* swap the bus_dmamap_t's */ 2360 rx->info[idx].map = rx->extra_map; 2361 rx->extra_map = old_map; 2362 2363 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2364 * aligned */ 2365 m->m_data += MXGEFW_PAD; 2366 2367 m->m_pkthdr.rcvif = ifp; 2368 m->m_len = m->m_pkthdr.len = len; 2369 ss->ipackets++; 2370 eh = mtod(m, struct ether_header *); 2371 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2372 mxge_vlan_tag_remove(m, &csum); 2373 } 2374 /* if the checksum is valid, mark it in the mbuf header */ 2375 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2376 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2377 return; 2378 /* otherwise, it was a UDP frame, or a TCP frame which 2379 we could not do LRO on. Tell the stack that the 2380 checksum is good */ 2381 m->m_pkthdr.csum_data = 0xffff; 2382 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2383 } 2384 /* pass the frame up the stack */ 2385 (*ifp->if_input)(ifp, m); 2386 } 2387 2388 static inline void 2389 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2390 { 2391 mxge_softc_t *sc; 2392 struct ifnet *ifp; 2393 struct ether_header *eh; 2394 struct mbuf *m; 2395 mxge_rx_ring_t *rx; 2396 bus_dmamap_t old_map; 2397 int idx; 2398 uint16_t tcpudp_csum; 2399 2400 sc = ss->sc; 2401 ifp = sc->ifp; 2402 rx = &ss->rx_small; 2403 idx = rx->cnt & rx->mask; 2404 rx->cnt++; 2405 /* save a pointer to the received mbuf */ 2406 m = rx->info[idx].m; 2407 /* try to replace the received mbuf */ 2408 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2409 /* drop the frame -- the old mbuf is re-cycled */ 2410 ifp->if_ierrors++; 2411 return; 2412 } 2413 2414 /* unmap the received buffer */ 2415 old_map = rx->info[idx].map; 2416 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2417 bus_dmamap_unload(rx->dmat, old_map); 2418 2419 /* swap the bus_dmamap_t's */ 2420 rx->info[idx].map = rx->extra_map; 2421 rx->extra_map = old_map; 2422 2423 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2424 * aligned */ 2425 m->m_data += MXGEFW_PAD; 2426 2427 m->m_pkthdr.rcvif = ifp; 2428 m->m_len = m->m_pkthdr.len = len; 2429 ss->ipackets++; 2430 eh = mtod(m, struct ether_header *); 2431 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2432 mxge_vlan_tag_remove(m, &csum); 2433 } 2434 /* if the checksum is valid, mark it in the mbuf header */ 2435 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2436 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2437 return; 2438 /* otherwise, it was a UDP frame, or a TCP frame which 2439 we could not do LRO on. Tell the stack that the 2440 checksum is good */ 2441 m->m_pkthdr.csum_data = 0xffff; 2442 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2443 } 2444 /* pass the frame up the stack */ 2445 (*ifp->if_input)(ifp, m); 2446 } 2447 2448 static inline void 2449 mxge_clean_rx_done(struct mxge_slice_state *ss) 2450 { 2451 mxge_rx_done_t *rx_done = &ss->rx_done; 2452 struct lro_entry *lro; 2453 int limit = 0; 2454 uint16_t length; 2455 uint16_t checksum; 2456 2457 2458 while (rx_done->entry[rx_done->idx].length != 0) { 2459 length = ntohs(rx_done->entry[rx_done->idx].length); 2460 rx_done->entry[rx_done->idx].length = 0; 2461 checksum = rx_done->entry[rx_done->idx].checksum; 2462 if (length <= (MHLEN - MXGEFW_PAD)) 2463 mxge_rx_done_small(ss, length, checksum); 2464 else 2465 mxge_rx_done_big(ss, length, checksum); 2466 rx_done->cnt++; 2467 rx_done->idx = rx_done->cnt & rx_done->mask; 2468 2469 /* limit potential for livelock */ 2470 if (__predict_false(++limit > rx_done->mask / 2)) 2471 break; 2472 } 2473 while (!SLIST_EMPTY(&ss->lro_active)) { 2474 lro = SLIST_FIRST(&ss->lro_active); 2475 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2476 mxge_lro_flush(ss, lro); 2477 } 2478 } 2479 2480 2481 static inline void 2482 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2483 { 2484 struct ifnet *ifp; 2485 mxge_tx_ring_t *tx; 2486 struct mbuf *m; 2487 bus_dmamap_t map; 2488 int idx; 2489 2490 tx = &ss->tx; 2491 ifp = ss->sc->ifp; 2492 while (tx->pkt_done != mcp_idx) { 2493 idx = tx->done & tx->mask; 2494 tx->done++; 2495 m = tx->info[idx].m; 2496 /* mbuf and DMA map only attached to the first 2497 segment per-mbuf */ 2498 if (m != NULL) { 2499 ifp->if_opackets++; 2500 tx->info[idx].m = NULL; 2501 map = tx->info[idx].map; 2502 bus_dmamap_unload(tx->dmat, map); 2503 m_freem(m); 2504 } 2505 if (tx->info[idx].flag) { 2506 tx->info[idx].flag = 0; 2507 tx->pkt_done++; 2508 } 2509 } 2510 2511 /* If we have space, clear IFF_OACTIVE to tell the stack that 2512 its OK to send packets */ 2513 2514 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2515 tx->req - tx->done < (tx->mask + 1)/4) { 2516 mtx_lock(&ss->tx.mtx); 2517 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2518 ss->tx.wake++; 2519 mxge_start_locked(ss); 2520 mtx_unlock(&ss->tx.mtx); 2521 } 2522 } 2523 2524 static struct mxge_media_type mxge_xfp_media_types[] = 2525 { 2526 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2527 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2528 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2529 {0, (1 << 5), "10GBASE-ER"}, 2530 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2531 {0, (1 << 3), "10GBASE-SW"}, 2532 {0, (1 << 2), "10GBASE-LW"}, 2533 {0, (1 << 1), "10GBASE-EW"}, 2534 {0, (1 << 0), "Reserved"} 2535 }; 2536 static struct mxge_media_type mxge_sfp_media_types[] = 2537 { 2538 {0, (1 << 7), "Reserved"}, 2539 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2540 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2541 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2542 }; 2543 2544 static void 2545 mxge_set_media(mxge_softc_t *sc, int type) 2546 { 2547 sc->media_flags |= type; 2548 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2549 ifmedia_set(&sc->media, sc->media_flags); 2550 } 2551 2552 2553 /* 2554 * Determine the media type for a NIC. Some XFPs will identify 2555 * themselves only when their link is up, so this is initiated via a 2556 * link up interrupt. However, this can potentially take up to 2557 * several milliseconds, so it is run via the watchdog routine, rather 2558 * than in the interrupt handler itself. This need only be done 2559 * once, not each time the link is up. 2560 */ 2561 static void 2562 mxge_media_probe(mxge_softc_t *sc) 2563 { 2564 mxge_cmd_t cmd; 2565 char *cage_type; 2566 char *ptr; 2567 struct mxge_media_type *mxge_media_types = NULL; 2568 int i, err, ms, mxge_media_type_entries; 2569 uint32_t byte; 2570 2571 sc->need_media_probe = 0; 2572 2573 /* if we've already set a media type, we're done */ 2574 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2575 return; 2576 2577 /* 2578 * parse the product code to deterimine the interface type 2579 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2580 * after the 3rd dash in the driver's cached copy of the 2581 * EEPROM's product code string. 2582 */ 2583 ptr = sc->product_code_string; 2584 if (ptr == NULL) { 2585 device_printf(sc->dev, "Missing product code\n"); 2586 } 2587 2588 for (i = 0; i < 3; i++, ptr++) { 2589 ptr = index(ptr, '-'); 2590 if (ptr == NULL) { 2591 device_printf(sc->dev, 2592 "only %d dashes in PC?!?\n", i); 2593 return; 2594 } 2595 } 2596 if (*ptr == 'C') { 2597 /* -C is CX4 */ 2598 mxge_set_media(sc, IFM_10G_CX4); 2599 return; 2600 } 2601 else if (*ptr == 'Q') { 2602 /* -Q is Quad Ribbon Fiber */ 2603 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2604 /* FreeBSD has no media type for Quad ribbon fiber */ 2605 return; 2606 } 2607 2608 if (*ptr == 'R') { 2609 /* -R is XFP */ 2610 mxge_media_types = mxge_xfp_media_types; 2611 mxge_media_type_entries = 2612 sizeof (mxge_xfp_media_types) / 2613 sizeof (mxge_xfp_media_types[0]); 2614 byte = MXGE_XFP_COMPLIANCE_BYTE; 2615 cage_type = "XFP"; 2616 } 2617 2618 if (*ptr == 'S' || *(ptr +1) == 'S') { 2619 /* -S or -2S is SFP+ */ 2620 mxge_media_types = mxge_sfp_media_types; 2621 mxge_media_type_entries = 2622 sizeof (mxge_sfp_media_types) / 2623 sizeof (mxge_sfp_media_types[0]); 2624 cage_type = "SFP+"; 2625 byte = 3; 2626 } 2627 2628 if (mxge_media_types == NULL) { 2629 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2630 return; 2631 } 2632 2633 /* 2634 * At this point we know the NIC has an XFP cage, so now we 2635 * try to determine what is in the cage by using the 2636 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2637 * register. We read just one byte, which may take over 2638 * a millisecond 2639 */ 2640 2641 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2642 cmd.data1 = byte; 2643 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2644 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2645 device_printf(sc->dev, "failed to read XFP\n"); 2646 } 2647 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2648 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2649 } 2650 if (err != MXGEFW_CMD_OK) { 2651 return; 2652 } 2653 2654 /* now we wait for the data to be cached */ 2655 cmd.data0 = byte; 2656 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2657 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2658 DELAY(1000); 2659 cmd.data0 = byte; 2660 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2661 } 2662 if (err != MXGEFW_CMD_OK) { 2663 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2664 cage_type, err, ms); 2665 return; 2666 } 2667 2668 if (cmd.data0 == mxge_media_types[0].bitmask) { 2669 if (mxge_verbose) 2670 device_printf(sc->dev, "%s:%s\n", cage_type, 2671 mxge_media_types[0].name); 2672 mxge_set_media(sc, IFM_10G_CX4); 2673 return; 2674 } 2675 for (i = 1; i < mxge_media_type_entries; i++) { 2676 if (cmd.data0 & mxge_media_types[i].bitmask) { 2677 if (mxge_verbose) 2678 device_printf(sc->dev, "%s:%s\n", 2679 cage_type, 2680 mxge_media_types[i].name); 2681 2682 mxge_set_media(sc, mxge_media_types[i].flag); 2683 return; 2684 } 2685 } 2686 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2687 cmd.data0); 2688 2689 return; 2690 } 2691 2692 static void 2693 mxge_intr(void *arg) 2694 { 2695 struct mxge_slice_state *ss = arg; 2696 mxge_softc_t *sc = ss->sc; 2697 mcp_irq_data_t *stats = ss->fw_stats; 2698 mxge_tx_ring_t *tx = &ss->tx; 2699 mxge_rx_done_t *rx_done = &ss->rx_done; 2700 uint32_t send_done_count; 2701 uint8_t valid; 2702 2703 2704 /* an interrupt on a non-zero slice is implicitly valid 2705 since MSI-X irqs are not shared */ 2706 if (ss != sc->ss) { 2707 mxge_clean_rx_done(ss); 2708 *ss->irq_claim = be32toh(3); 2709 return; 2710 } 2711 2712 /* make sure the DMA has finished */ 2713 if (!stats->valid) { 2714 return; 2715 } 2716 valid = stats->valid; 2717 2718 if (sc->legacy_irq) { 2719 /* lower legacy IRQ */ 2720 *sc->irq_deassert = 0; 2721 if (!mxge_deassert_wait) 2722 /* don't wait for conf. that irq is low */ 2723 stats->valid = 0; 2724 } else { 2725 stats->valid = 0; 2726 } 2727 2728 /* loop while waiting for legacy irq deassertion */ 2729 do { 2730 /* check for transmit completes and receives */ 2731 send_done_count = be32toh(stats->send_done_count); 2732 while ((send_done_count != tx->pkt_done) || 2733 (rx_done->entry[rx_done->idx].length != 0)) { 2734 mxge_tx_done(ss, (int)send_done_count); 2735 mxge_clean_rx_done(ss); 2736 send_done_count = be32toh(stats->send_done_count); 2737 } 2738 if (sc->legacy_irq && mxge_deassert_wait) 2739 wmb(); 2740 } while (*((volatile uint8_t *) &stats->valid)); 2741 2742 if (__predict_false(stats->stats_updated)) { 2743 if (sc->link_state != stats->link_up) { 2744 sc->link_state = stats->link_up; 2745 if (sc->link_state) { 2746 if_link_state_change(sc->ifp, LINK_STATE_UP); 2747 if (mxge_verbose) 2748 device_printf(sc->dev, "link up\n"); 2749 } else { 2750 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2751 if (mxge_verbose) 2752 device_printf(sc->dev, "link down\n"); 2753 } 2754 sc->need_media_probe = 1; 2755 } 2756 if (sc->rdma_tags_available != 2757 be32toh(stats->rdma_tags_available)) { 2758 sc->rdma_tags_available = 2759 be32toh(stats->rdma_tags_available); 2760 device_printf(sc->dev, "RDMA timed out! %d tags " 2761 "left\n", sc->rdma_tags_available); 2762 } 2763 2764 if (stats->link_down) { 2765 sc->down_cnt += stats->link_down; 2766 sc->link_state = 0; 2767 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2768 } 2769 } 2770 2771 /* check to see if we have rx token to pass back */ 2772 if (valid & 0x1) 2773 *ss->irq_claim = be32toh(3); 2774 *(ss->irq_claim + 1) = be32toh(3); 2775 } 2776 2777 static void 2778 mxge_init(void *arg) 2779 { 2780 } 2781 2782 2783 2784 static void 2785 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2786 { 2787 struct lro_entry *lro_entry; 2788 int i; 2789 2790 while (!SLIST_EMPTY(&ss->lro_free)) { 2791 lro_entry = SLIST_FIRST(&ss->lro_free); 2792 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2793 free(lro_entry, M_DEVBUF); 2794 } 2795 2796 for (i = 0; i <= ss->rx_big.mask; i++) { 2797 if (ss->rx_big.info[i].m == NULL) 2798 continue; 2799 bus_dmamap_unload(ss->rx_big.dmat, 2800 ss->rx_big.info[i].map); 2801 m_freem(ss->rx_big.info[i].m); 2802 ss->rx_big.info[i].m = NULL; 2803 } 2804 2805 for (i = 0; i <= ss->rx_small.mask; i++) { 2806 if (ss->rx_small.info[i].m == NULL) 2807 continue; 2808 bus_dmamap_unload(ss->rx_small.dmat, 2809 ss->rx_small.info[i].map); 2810 m_freem(ss->rx_small.info[i].m); 2811 ss->rx_small.info[i].m = NULL; 2812 } 2813 2814 /* transmit ring used only on the first slice */ 2815 if (ss->tx.info == NULL) 2816 return; 2817 2818 for (i = 0; i <= ss->tx.mask; i++) { 2819 ss->tx.info[i].flag = 0; 2820 if (ss->tx.info[i].m == NULL) 2821 continue; 2822 bus_dmamap_unload(ss->tx.dmat, 2823 ss->tx.info[i].map); 2824 m_freem(ss->tx.info[i].m); 2825 ss->tx.info[i].m = NULL; 2826 } 2827 } 2828 2829 static void 2830 mxge_free_mbufs(mxge_softc_t *sc) 2831 { 2832 int slice; 2833 2834 for (slice = 0; slice < sc->num_slices; slice++) 2835 mxge_free_slice_mbufs(&sc->ss[slice]); 2836 } 2837 2838 static void 2839 mxge_free_slice_rings(struct mxge_slice_state *ss) 2840 { 2841 int i; 2842 2843 2844 if (ss->rx_done.entry != NULL) 2845 mxge_dma_free(&ss->rx_done.dma); 2846 ss->rx_done.entry = NULL; 2847 2848 if (ss->tx.req_bytes != NULL) 2849 free(ss->tx.req_bytes, M_DEVBUF); 2850 ss->tx.req_bytes = NULL; 2851 2852 if (ss->tx.seg_list != NULL) 2853 free(ss->tx.seg_list, M_DEVBUF); 2854 ss->tx.seg_list = NULL; 2855 2856 if (ss->rx_small.shadow != NULL) 2857 free(ss->rx_small.shadow, M_DEVBUF); 2858 ss->rx_small.shadow = NULL; 2859 2860 if (ss->rx_big.shadow != NULL) 2861 free(ss->rx_big.shadow, M_DEVBUF); 2862 ss->rx_big.shadow = NULL; 2863 2864 if (ss->tx.info != NULL) { 2865 if (ss->tx.dmat != NULL) { 2866 for (i = 0; i <= ss->tx.mask; i++) { 2867 bus_dmamap_destroy(ss->tx.dmat, 2868 ss->tx.info[i].map); 2869 } 2870 bus_dma_tag_destroy(ss->tx.dmat); 2871 } 2872 free(ss->tx.info, M_DEVBUF); 2873 } 2874 ss->tx.info = NULL; 2875 2876 if (ss->rx_small.info != NULL) { 2877 if (ss->rx_small.dmat != NULL) { 2878 for (i = 0; i <= ss->rx_small.mask; i++) { 2879 bus_dmamap_destroy(ss->rx_small.dmat, 2880 ss->rx_small.info[i].map); 2881 } 2882 bus_dmamap_destroy(ss->rx_small.dmat, 2883 ss->rx_small.extra_map); 2884 bus_dma_tag_destroy(ss->rx_small.dmat); 2885 } 2886 free(ss->rx_small.info, M_DEVBUF); 2887 } 2888 ss->rx_small.info = NULL; 2889 2890 if (ss->rx_big.info != NULL) { 2891 if (ss->rx_big.dmat != NULL) { 2892 for (i = 0; i <= ss->rx_big.mask; i++) { 2893 bus_dmamap_destroy(ss->rx_big.dmat, 2894 ss->rx_big.info[i].map); 2895 } 2896 bus_dmamap_destroy(ss->rx_big.dmat, 2897 ss->rx_big.extra_map); 2898 bus_dma_tag_destroy(ss->rx_big.dmat); 2899 } 2900 free(ss->rx_big.info, M_DEVBUF); 2901 } 2902 ss->rx_big.info = NULL; 2903 } 2904 2905 static void 2906 mxge_free_rings(mxge_softc_t *sc) 2907 { 2908 int slice; 2909 2910 for (slice = 0; slice < sc->num_slices; slice++) 2911 mxge_free_slice_rings(&sc->ss[slice]); 2912 } 2913 2914 static int 2915 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2916 int tx_ring_entries) 2917 { 2918 mxge_softc_t *sc = ss->sc; 2919 size_t bytes; 2920 int err, i; 2921 2922 err = ENOMEM; 2923 2924 /* allocate per-slice receive resources */ 2925 2926 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2927 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 2928 2929 /* allocate the rx shadow rings */ 2930 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2931 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2932 if (ss->rx_small.shadow == NULL) 2933 return err;; 2934 2935 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2936 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2937 if (ss->rx_big.shadow == NULL) 2938 return err;; 2939 2940 /* allocate the rx host info rings */ 2941 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2942 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2943 if (ss->rx_small.info == NULL) 2944 return err;; 2945 2946 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2947 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2948 if (ss->rx_big.info == NULL) 2949 return err;; 2950 2951 /* allocate the rx busdma resources */ 2952 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2953 1, /* alignment */ 2954 4096, /* boundary */ 2955 BUS_SPACE_MAXADDR, /* low */ 2956 BUS_SPACE_MAXADDR, /* high */ 2957 NULL, NULL, /* filter */ 2958 MHLEN, /* maxsize */ 2959 1, /* num segs */ 2960 MHLEN, /* maxsegsize */ 2961 BUS_DMA_ALLOCNOW, /* flags */ 2962 NULL, NULL, /* lock */ 2963 &ss->rx_small.dmat); /* tag */ 2964 if (err != 0) { 2965 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2966 err); 2967 return err;; 2968 } 2969 2970 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2971 1, /* alignment */ 2972 #if MXGE_VIRT_JUMBOS 2973 4096, /* boundary */ 2974 #else 2975 0, /* boundary */ 2976 #endif 2977 BUS_SPACE_MAXADDR, /* low */ 2978 BUS_SPACE_MAXADDR, /* high */ 2979 NULL, NULL, /* filter */ 2980 3*4096, /* maxsize */ 2981 #if MXGE_VIRT_JUMBOS 2982 3, /* num segs */ 2983 4096, /* maxsegsize*/ 2984 #else 2985 1, /* num segs */ 2986 MJUM9BYTES, /* maxsegsize*/ 2987 #endif 2988 BUS_DMA_ALLOCNOW, /* flags */ 2989 NULL, NULL, /* lock */ 2990 &ss->rx_big.dmat); /* tag */ 2991 if (err != 0) { 2992 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2993 err); 2994 return err;; 2995 } 2996 for (i = 0; i <= ss->rx_small.mask; i++) { 2997 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2998 &ss->rx_small.info[i].map); 2999 if (err != 0) { 3000 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3001 err); 3002 return err;; 3003 } 3004 } 3005 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3006 &ss->rx_small.extra_map); 3007 if (err != 0) { 3008 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3009 err); 3010 return err;; 3011 } 3012 3013 for (i = 0; i <= ss->rx_big.mask; i++) { 3014 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3015 &ss->rx_big.info[i].map); 3016 if (err != 0) { 3017 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3018 err); 3019 return err;; 3020 } 3021 } 3022 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3023 &ss->rx_big.extra_map); 3024 if (err != 0) { 3025 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3026 err); 3027 return err;; 3028 } 3029 3030 /* now allocate TX resouces */ 3031 3032 /* only use a single TX ring for now */ 3033 if (ss != ss->sc->ss) 3034 return 0; 3035 3036 ss->tx.mask = tx_ring_entries - 1; 3037 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3038 3039 3040 /* allocate the tx request copy block */ 3041 bytes = 8 + 3042 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3043 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3044 if (ss->tx.req_bytes == NULL) 3045 return err;; 3046 /* ensure req_list entries are aligned to 8 bytes */ 3047 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3048 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3049 3050 /* allocate the tx busdma segment list */ 3051 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3052 ss->tx.seg_list = (bus_dma_segment_t *) 3053 malloc(bytes, M_DEVBUF, M_WAITOK); 3054 if (ss->tx.seg_list == NULL) 3055 return err;; 3056 3057 /* allocate the tx host info ring */ 3058 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3059 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3060 if (ss->tx.info == NULL) 3061 return err;; 3062 3063 /* allocate the tx busdma resources */ 3064 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3065 1, /* alignment */ 3066 sc->tx_boundary, /* boundary */ 3067 BUS_SPACE_MAXADDR, /* low */ 3068 BUS_SPACE_MAXADDR, /* high */ 3069 NULL, NULL, /* filter */ 3070 65536 + 256, /* maxsize */ 3071 ss->tx.max_desc - 2, /* num segs */ 3072 sc->tx_boundary, /* maxsegsz */ 3073 BUS_DMA_ALLOCNOW, /* flags */ 3074 NULL, NULL, /* lock */ 3075 &ss->tx.dmat); /* tag */ 3076 3077 if (err != 0) { 3078 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3079 err); 3080 return err;; 3081 } 3082 3083 /* now use these tags to setup dmamaps for each slot 3084 in the ring */ 3085 for (i = 0; i <= ss->tx.mask; i++) { 3086 err = bus_dmamap_create(ss->tx.dmat, 0, 3087 &ss->tx.info[i].map); 3088 if (err != 0) { 3089 device_printf(sc->dev, "Err %d tx dmamap\n", 3090 err); 3091 return err;; 3092 } 3093 } 3094 return 0; 3095 3096 } 3097 3098 static int 3099 mxge_alloc_rings(mxge_softc_t *sc) 3100 { 3101 mxge_cmd_t cmd; 3102 int tx_ring_size; 3103 int tx_ring_entries, rx_ring_entries; 3104 int err, slice; 3105 3106 /* get ring sizes */ 3107 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3108 tx_ring_size = cmd.data0; 3109 if (err != 0) { 3110 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3111 goto abort; 3112 } 3113 3114 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3115 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3116 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3117 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3118 IFQ_SET_READY(&sc->ifp->if_snd); 3119 3120 for (slice = 0; slice < sc->num_slices; slice++) { 3121 err = mxge_alloc_slice_rings(&sc->ss[slice], 3122 rx_ring_entries, 3123 tx_ring_entries); 3124 if (err != 0) 3125 goto abort; 3126 } 3127 return 0; 3128 3129 abort: 3130 mxge_free_rings(sc); 3131 return err; 3132 3133 } 3134 3135 3136 static void 3137 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3138 { 3139 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3140 3141 if (bufsize < MCLBYTES) { 3142 /* easy, everything fits in a single buffer */ 3143 *big_buf_size = MCLBYTES; 3144 *cl_size = MCLBYTES; 3145 *nbufs = 1; 3146 return; 3147 } 3148 3149 if (bufsize < MJUMPAGESIZE) { 3150 /* still easy, everything still fits in a single buffer */ 3151 *big_buf_size = MJUMPAGESIZE; 3152 *cl_size = MJUMPAGESIZE; 3153 *nbufs = 1; 3154 return; 3155 } 3156 #if MXGE_VIRT_JUMBOS 3157 /* now we need to use virtually contiguous buffers */ 3158 *cl_size = MJUM9BYTES; 3159 *big_buf_size = 4096; 3160 *nbufs = mtu / 4096 + 1; 3161 /* needs to be a power of two, so round up */ 3162 if (*nbufs == 3) 3163 *nbufs = 4; 3164 #else 3165 *cl_size = MJUM9BYTES; 3166 *big_buf_size = MJUM9BYTES; 3167 *nbufs = 1; 3168 #endif 3169 } 3170 3171 static int 3172 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3173 { 3174 mxge_softc_t *sc; 3175 mxge_cmd_t cmd; 3176 bus_dmamap_t map; 3177 struct lro_entry *lro_entry; 3178 int err, i, slice; 3179 3180 3181 sc = ss->sc; 3182 slice = ss - sc->ss; 3183 3184 SLIST_INIT(&ss->lro_free); 3185 SLIST_INIT(&ss->lro_active); 3186 3187 for (i = 0; i < sc->lro_cnt; i++) { 3188 lro_entry = (struct lro_entry *) 3189 malloc(sizeof (*lro_entry), M_DEVBUF, 3190 M_NOWAIT | M_ZERO); 3191 if (lro_entry == NULL) { 3192 sc->lro_cnt = i; 3193 break; 3194 } 3195 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3196 } 3197 /* get the lanai pointers to the send and receive rings */ 3198 3199 err = 0; 3200 /* We currently only send from the first slice */ 3201 if (slice == 0) { 3202 cmd.data0 = slice; 3203 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3204 ss->tx.lanai = 3205 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3206 } 3207 cmd.data0 = slice; 3208 err |= mxge_send_cmd(sc, 3209 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3210 ss->rx_small.lanai = 3211 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3212 cmd.data0 = slice; 3213 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3214 ss->rx_big.lanai = 3215 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3216 3217 if (err != 0) { 3218 device_printf(sc->dev, 3219 "failed to get ring sizes or locations\n"); 3220 return EIO; 3221 } 3222 3223 /* stock receive rings */ 3224 for (i = 0; i <= ss->rx_small.mask; i++) { 3225 map = ss->rx_small.info[i].map; 3226 err = mxge_get_buf_small(ss, map, i); 3227 if (err) { 3228 device_printf(sc->dev, "alloced %d/%d smalls\n", 3229 i, ss->rx_small.mask + 1); 3230 return ENOMEM; 3231 } 3232 } 3233 for (i = 0; i <= ss->rx_big.mask; i++) { 3234 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3235 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3236 } 3237 ss->rx_big.nbufs = nbufs; 3238 ss->rx_big.cl_size = cl_size; 3239 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3240 map = ss->rx_big.info[i].map; 3241 err = mxge_get_buf_big(ss, map, i); 3242 if (err) { 3243 device_printf(sc->dev, "alloced %d/%d bigs\n", 3244 i, ss->rx_big.mask + 1); 3245 return ENOMEM; 3246 } 3247 } 3248 return 0; 3249 } 3250 3251 static int 3252 mxge_open(mxge_softc_t *sc) 3253 { 3254 mxge_cmd_t cmd; 3255 int err, big_bytes, nbufs, slice, cl_size, i; 3256 bus_addr_t bus; 3257 volatile uint8_t *itable; 3258 3259 /* Copy the MAC address in case it was overridden */ 3260 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3261 3262 err = mxge_reset(sc, 1); 3263 if (err != 0) { 3264 device_printf(sc->dev, "failed to reset\n"); 3265 return EIO; 3266 } 3267 3268 if (sc->num_slices > 1) { 3269 /* setup the indirection table */ 3270 cmd.data0 = sc->num_slices; 3271 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3272 &cmd); 3273 3274 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3275 &cmd); 3276 if (err != 0) { 3277 device_printf(sc->dev, 3278 "failed to setup rss tables\n"); 3279 return err; 3280 } 3281 3282 /* just enable an identity mapping */ 3283 itable = sc->sram + cmd.data0; 3284 for (i = 0; i < sc->num_slices; i++) 3285 itable[i] = (uint8_t)i; 3286 3287 cmd.data0 = 1; 3288 cmd.data1 = mxge_rss_hash_type; 3289 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3290 if (err != 0) { 3291 device_printf(sc->dev, "failed to enable slices\n"); 3292 return err; 3293 } 3294 } 3295 3296 3297 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3298 3299 cmd.data0 = nbufs; 3300 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3301 &cmd); 3302 /* error is only meaningful if we're trying to set 3303 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3304 if (err && nbufs > 1) { 3305 device_printf(sc->dev, 3306 "Failed to set alway-use-n to %d\n", 3307 nbufs); 3308 return EIO; 3309 } 3310 /* Give the firmware the mtu and the big and small buffer 3311 sizes. The firmware wants the big buf size to be a power 3312 of two. Luckily, FreeBSD's clusters are powers of two */ 3313 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3314 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3315 cmd.data0 = MHLEN - MXGEFW_PAD; 3316 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3317 &cmd); 3318 cmd.data0 = big_bytes; 3319 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3320 3321 if (err != 0) { 3322 device_printf(sc->dev, "failed to setup params\n"); 3323 goto abort; 3324 } 3325 3326 /* Now give him the pointer to the stats block */ 3327 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3328 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3329 cmd.data2 = sizeof(struct mcp_irq_data); 3330 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3331 3332 if (err != 0) { 3333 bus = sc->ss->fw_stats_dma.bus_addr; 3334 bus += offsetof(struct mcp_irq_data, send_done_count); 3335 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3336 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3337 err = mxge_send_cmd(sc, 3338 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3339 &cmd); 3340 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3341 sc->fw_multicast_support = 0; 3342 } else { 3343 sc->fw_multicast_support = 1; 3344 } 3345 3346 if (err != 0) { 3347 device_printf(sc->dev, "failed to setup params\n"); 3348 goto abort; 3349 } 3350 3351 for (slice = 0; slice < sc->num_slices; slice++) { 3352 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3353 if (err != 0) { 3354 device_printf(sc->dev, "couldn't open slice %d\n", 3355 slice); 3356 goto abort; 3357 } 3358 } 3359 3360 /* Finally, start the firmware running */ 3361 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3362 if (err) { 3363 device_printf(sc->dev, "Couldn't bring up link\n"); 3364 goto abort; 3365 } 3366 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3367 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3368 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3369 3370 return 0; 3371 3372 3373 abort: 3374 mxge_free_mbufs(sc); 3375 3376 return err; 3377 } 3378 3379 static int 3380 mxge_close(mxge_softc_t *sc) 3381 { 3382 mxge_cmd_t cmd; 3383 int err, old_down_cnt; 3384 3385 callout_stop(&sc->co_hdl); 3386 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3387 old_down_cnt = sc->down_cnt; 3388 wmb(); 3389 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3390 if (err) { 3391 device_printf(sc->dev, "Couldn't bring down link\n"); 3392 } 3393 if (old_down_cnt == sc->down_cnt) { 3394 /* wait for down irq */ 3395 DELAY(10 * sc->intr_coal_delay); 3396 } 3397 wmb(); 3398 if (old_down_cnt == sc->down_cnt) { 3399 device_printf(sc->dev, "never got down irq\n"); 3400 } 3401 3402 mxge_free_mbufs(sc); 3403 3404 return 0; 3405 } 3406 3407 static void 3408 mxge_setup_cfg_space(mxge_softc_t *sc) 3409 { 3410 device_t dev = sc->dev; 3411 int reg; 3412 uint16_t cmd, lnk, pectl; 3413 3414 /* find the PCIe link width and set max read request to 4KB*/ 3415 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3416 lnk = pci_read_config(dev, reg + 0x12, 2); 3417 sc->link_width = (lnk >> 4) & 0x3f; 3418 3419 pectl = pci_read_config(dev, reg + 0x8, 2); 3420 pectl = (pectl & ~0x7000) | (5 << 12); 3421 pci_write_config(dev, reg + 0x8, pectl, 2); 3422 } 3423 3424 /* Enable DMA and Memory space access */ 3425 pci_enable_busmaster(dev); 3426 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3427 cmd |= PCIM_CMD_MEMEN; 3428 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3429 } 3430 3431 static uint32_t 3432 mxge_read_reboot(mxge_softc_t *sc) 3433 { 3434 device_t dev = sc->dev; 3435 uint32_t vs; 3436 3437 /* find the vendor specific offset */ 3438 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3439 device_printf(sc->dev, 3440 "could not find vendor specific offset\n"); 3441 return (uint32_t)-1; 3442 } 3443 /* enable read32 mode */ 3444 pci_write_config(dev, vs + 0x10, 0x3, 1); 3445 /* tell NIC which register to read */ 3446 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3447 return (pci_read_config(dev, vs + 0x14, 4)); 3448 } 3449 3450 static int 3451 mxge_watchdog_reset(mxge_softc_t *sc) 3452 { 3453 struct pci_devinfo *dinfo; 3454 int err; 3455 uint32_t reboot; 3456 uint16_t cmd; 3457 3458 err = ENXIO; 3459 3460 device_printf(sc->dev, "Watchdog reset!\n"); 3461 3462 /* 3463 * check to see if the NIC rebooted. If it did, then all of 3464 * PCI config space has been reset, and things like the 3465 * busmaster bit will be zero. If this is the case, then we 3466 * must restore PCI config space before the NIC can be used 3467 * again 3468 */ 3469 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3470 if (cmd == 0xffff) { 3471 /* 3472 * maybe the watchdog caught the NIC rebooting; wait 3473 * up to 100ms for it to finish. If it does not come 3474 * back, then give up 3475 */ 3476 DELAY(1000*100); 3477 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3478 if (cmd == 0xffff) { 3479 device_printf(sc->dev, "NIC disappeared!\n"); 3480 return (err); 3481 } 3482 } 3483 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3484 /* print the reboot status */ 3485 reboot = mxge_read_reboot(sc); 3486 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3487 reboot); 3488 /* restore PCI configuration space */ 3489 dinfo = device_get_ivars(sc->dev); 3490 pci_cfg_restore(sc->dev, dinfo); 3491 3492 /* and redo any changes we made to our config space */ 3493 mxge_setup_cfg_space(sc); 3494 3495 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3496 mxge_close(sc); 3497 err = mxge_open(sc); 3498 } 3499 } else { 3500 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 3501 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 3502 sc->ss->tx.req, sc->ss->tx.done); 3503 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3504 sc->ss->tx.pkt_done, 3505 be32toh(sc->ss->fw_stats->send_done_count)); 3506 device_printf(sc->dev, "not resetting\n"); 3507 } 3508 return (err); 3509 } 3510 3511 static int 3512 mxge_watchdog(mxge_softc_t *sc) 3513 { 3514 mxge_tx_ring_t *tx = &sc->ss->tx; 3515 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3516 int err = 0; 3517 3518 /* see if we have outstanding transmits, which 3519 have been pending for more than mxge_ticks */ 3520 if (tx->req != tx->done && 3521 tx->watchdog_req != tx->watchdog_done && 3522 tx->done == tx->watchdog_done) { 3523 /* check for pause blocking before resetting */ 3524 if (tx->watchdog_rx_pause == rx_pause) 3525 err = mxge_watchdog_reset(sc); 3526 else 3527 device_printf(sc->dev, "Flow control blocking " 3528 "xmits, check link partner\n"); 3529 } 3530 3531 tx->watchdog_req = tx->req; 3532 tx->watchdog_done = tx->done; 3533 tx->watchdog_rx_pause = rx_pause; 3534 3535 if (sc->need_media_probe) 3536 mxge_media_probe(sc); 3537 return (err); 3538 } 3539 3540 static void 3541 mxge_update_stats(mxge_softc_t *sc) 3542 { 3543 struct mxge_slice_state *ss; 3544 u_long ipackets = 0; 3545 int slice; 3546 3547 for(slice = 0; slice < sc->num_slices; slice++) { 3548 ss = &sc->ss[slice]; 3549 ipackets += ss->ipackets; 3550 } 3551 sc->ifp->if_ipackets = ipackets; 3552 3553 } 3554 static void 3555 mxge_tick(void *arg) 3556 { 3557 mxge_softc_t *sc = arg; 3558 int err = 0; 3559 3560 /* aggregate stats from different slices */ 3561 mxge_update_stats(sc); 3562 if (!sc->watchdog_countdown) { 3563 err = mxge_watchdog(sc); 3564 sc->watchdog_countdown = 4; 3565 } 3566 sc->watchdog_countdown--; 3567 if (err == 0) 3568 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3569 3570 } 3571 3572 static int 3573 mxge_media_change(struct ifnet *ifp) 3574 { 3575 return EINVAL; 3576 } 3577 3578 static int 3579 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3580 { 3581 struct ifnet *ifp = sc->ifp; 3582 int real_mtu, old_mtu; 3583 int err = 0; 3584 3585 3586 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3587 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3588 return EINVAL; 3589 mtx_lock(&sc->driver_mtx); 3590 old_mtu = ifp->if_mtu; 3591 ifp->if_mtu = mtu; 3592 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3593 mxge_close(sc); 3594 err = mxge_open(sc); 3595 if (err != 0) { 3596 ifp->if_mtu = old_mtu; 3597 mxge_close(sc); 3598 (void) mxge_open(sc); 3599 } 3600 } 3601 mtx_unlock(&sc->driver_mtx); 3602 return err; 3603 } 3604 3605 static void 3606 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3607 { 3608 mxge_softc_t *sc = ifp->if_softc; 3609 3610 3611 if (sc == NULL) 3612 return; 3613 ifmr->ifm_status = IFM_AVALID; 3614 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3615 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3616 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3617 } 3618 3619 static int 3620 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3621 { 3622 mxge_softc_t *sc = ifp->if_softc; 3623 struct ifreq *ifr = (struct ifreq *)data; 3624 int err, mask; 3625 3626 err = 0; 3627 switch (command) { 3628 case SIOCSIFADDR: 3629 case SIOCGIFADDR: 3630 err = ether_ioctl(ifp, command, data); 3631 break; 3632 3633 case SIOCSIFMTU: 3634 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3635 break; 3636 3637 case SIOCSIFFLAGS: 3638 mtx_lock(&sc->driver_mtx); 3639 if (ifp->if_flags & IFF_UP) { 3640 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3641 err = mxge_open(sc); 3642 } else { 3643 /* take care of promis can allmulti 3644 flag chages */ 3645 mxge_change_promisc(sc, 3646 ifp->if_flags & IFF_PROMISC); 3647 mxge_set_multicast_list(sc); 3648 } 3649 } else { 3650 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3651 mxge_close(sc); 3652 } 3653 } 3654 mtx_unlock(&sc->driver_mtx); 3655 break; 3656 3657 case SIOCADDMULTI: 3658 case SIOCDELMULTI: 3659 mtx_lock(&sc->driver_mtx); 3660 mxge_set_multicast_list(sc); 3661 mtx_unlock(&sc->driver_mtx); 3662 break; 3663 3664 case SIOCSIFCAP: 3665 mtx_lock(&sc->driver_mtx); 3666 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3667 if (mask & IFCAP_TXCSUM) { 3668 if (IFCAP_TXCSUM & ifp->if_capenable) { 3669 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3670 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3671 | CSUM_TSO); 3672 } else { 3673 ifp->if_capenable |= IFCAP_TXCSUM; 3674 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3675 } 3676 } else if (mask & IFCAP_RXCSUM) { 3677 if (IFCAP_RXCSUM & ifp->if_capenable) { 3678 ifp->if_capenable &= ~IFCAP_RXCSUM; 3679 sc->csum_flag = 0; 3680 } else { 3681 ifp->if_capenable |= IFCAP_RXCSUM; 3682 sc->csum_flag = 1; 3683 } 3684 } 3685 if (mask & IFCAP_TSO4) { 3686 if (IFCAP_TSO4 & ifp->if_capenable) { 3687 ifp->if_capenable &= ~IFCAP_TSO4; 3688 ifp->if_hwassist &= ~CSUM_TSO; 3689 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3690 ifp->if_capenable |= IFCAP_TSO4; 3691 ifp->if_hwassist |= CSUM_TSO; 3692 } else { 3693 printf("mxge requires tx checksum offload" 3694 " be enabled to use TSO\n"); 3695 err = EINVAL; 3696 } 3697 } 3698 if (mask & IFCAP_LRO) { 3699 if (IFCAP_LRO & ifp->if_capenable) 3700 err = mxge_change_lro_locked(sc, 0); 3701 else 3702 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3703 } 3704 if (mask & IFCAP_VLAN_HWTAGGING) 3705 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3706 mtx_unlock(&sc->driver_mtx); 3707 VLAN_CAPABILITIES(ifp); 3708 3709 break; 3710 3711 case SIOCGIFMEDIA: 3712 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3713 &sc->media, command); 3714 break; 3715 3716 default: 3717 err = ENOTTY; 3718 } 3719 return err; 3720 } 3721 3722 static void 3723 mxge_fetch_tunables(mxge_softc_t *sc) 3724 { 3725 3726 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3727 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3728 &mxge_flow_control); 3729 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3730 &mxge_intr_coal_delay); 3731 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3732 &mxge_nvidia_ecrc_enable); 3733 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3734 &mxge_force_firmware); 3735 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3736 &mxge_deassert_wait); 3737 TUNABLE_INT_FETCH("hw.mxge.verbose", 3738 &mxge_verbose); 3739 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3740 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3741 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3742 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3743 if (sc->lro_cnt != 0) 3744 mxge_lro_cnt = sc->lro_cnt; 3745 3746 if (bootverbose) 3747 mxge_verbose = 1; 3748 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3749 mxge_intr_coal_delay = 30; 3750 if (mxge_ticks == 0) 3751 mxge_ticks = hz / 2; 3752 sc->pause = mxge_flow_control; 3753 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3754 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 3755 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 3756 } 3757 } 3758 3759 3760 static void 3761 mxge_free_slices(mxge_softc_t *sc) 3762 { 3763 struct mxge_slice_state *ss; 3764 int i; 3765 3766 3767 if (sc->ss == NULL) 3768 return; 3769 3770 for (i = 0; i < sc->num_slices; i++) { 3771 ss = &sc->ss[i]; 3772 if (ss->fw_stats != NULL) { 3773 mxge_dma_free(&ss->fw_stats_dma); 3774 ss->fw_stats = NULL; 3775 mtx_destroy(&ss->tx.mtx); 3776 } 3777 if (ss->rx_done.entry != NULL) { 3778 mxge_dma_free(&ss->rx_done.dma); 3779 ss->rx_done.entry = NULL; 3780 } 3781 } 3782 free(sc->ss, M_DEVBUF); 3783 sc->ss = NULL; 3784 } 3785 3786 static int 3787 mxge_alloc_slices(mxge_softc_t *sc) 3788 { 3789 mxge_cmd_t cmd; 3790 struct mxge_slice_state *ss; 3791 size_t bytes; 3792 int err, i, max_intr_slots; 3793 3794 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3795 if (err != 0) { 3796 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3797 return err; 3798 } 3799 sc->rx_ring_size = cmd.data0; 3800 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3801 3802 bytes = sizeof (*sc->ss) * sc->num_slices; 3803 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 3804 if (sc->ss == NULL) 3805 return (ENOMEM); 3806 for (i = 0; i < sc->num_slices; i++) { 3807 ss = &sc->ss[i]; 3808 3809 ss->sc = sc; 3810 3811 /* allocate per-slice rx interrupt queues */ 3812 3813 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 3814 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 3815 if (err != 0) 3816 goto abort; 3817 ss->rx_done.entry = ss->rx_done.dma.addr; 3818 bzero(ss->rx_done.entry, bytes); 3819 3820 /* 3821 * allocate the per-slice firmware stats; stats 3822 * (including tx) are used used only on the first 3823 * slice for now 3824 */ 3825 if (i > 0) 3826 continue; 3827 3828 bytes = sizeof (*ss->fw_stats); 3829 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3830 sizeof (*ss->fw_stats), 64); 3831 if (err != 0) 3832 goto abort; 3833 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 3834 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 3835 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 3836 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 3837 } 3838 3839 return (0); 3840 3841 abort: 3842 mxge_free_slices(sc); 3843 return (ENOMEM); 3844 } 3845 3846 static void 3847 mxge_slice_probe(mxge_softc_t *sc) 3848 { 3849 mxge_cmd_t cmd; 3850 char *old_fw; 3851 int msix_cnt, status, max_intr_slots; 3852 3853 sc->num_slices = 1; 3854 /* 3855 * don't enable multiple slices if they are not enabled, 3856 * or if this is not an SMP system 3857 */ 3858 3859 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 3860 return; 3861 3862 /* see how many MSI-X interrupts are available */ 3863 msix_cnt = pci_msix_count(sc->dev); 3864 if (msix_cnt < 2) 3865 return; 3866 3867 /* now load the slice aware firmware see what it supports */ 3868 old_fw = sc->fw_name; 3869 if (old_fw == mxge_fw_aligned) 3870 sc->fw_name = mxge_fw_rss_aligned; 3871 else 3872 sc->fw_name = mxge_fw_rss_unaligned; 3873 status = mxge_load_firmware(sc, 0); 3874 if (status != 0) { 3875 device_printf(sc->dev, "Falling back to a single slice\n"); 3876 return; 3877 } 3878 3879 /* try to send a reset command to the card to see if it 3880 is alive */ 3881 memset(&cmd, 0, sizeof (cmd)); 3882 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3883 if (status != 0) { 3884 device_printf(sc->dev, "failed reset\n"); 3885 goto abort_with_fw; 3886 } 3887 3888 /* get rx ring size */ 3889 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3890 if (status != 0) { 3891 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3892 goto abort_with_fw; 3893 } 3894 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3895 3896 /* tell it the size of the interrupt queues */ 3897 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3898 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3899 if (status != 0) { 3900 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3901 goto abort_with_fw; 3902 } 3903 3904 /* ask the maximum number of slices it supports */ 3905 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3906 if (status != 0) { 3907 device_printf(sc->dev, 3908 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3909 goto abort_with_fw; 3910 } 3911 sc->num_slices = cmd.data0; 3912 if (sc->num_slices > msix_cnt) 3913 sc->num_slices = msix_cnt; 3914 3915 if (mxge_max_slices == -1) { 3916 /* cap to number of CPUs in system */ 3917 if (sc->num_slices > mp_ncpus) 3918 sc->num_slices = mp_ncpus; 3919 } else { 3920 if (sc->num_slices > mxge_max_slices) 3921 sc->num_slices = mxge_max_slices; 3922 } 3923 /* make sure it is a power of two */ 3924 while (sc->num_slices & (sc->num_slices - 1)) 3925 sc->num_slices--; 3926 3927 if (mxge_verbose) 3928 device_printf(sc->dev, "using %d slices\n", 3929 sc->num_slices); 3930 3931 return; 3932 3933 abort_with_fw: 3934 sc->fw_name = old_fw; 3935 (void) mxge_load_firmware(sc, 0); 3936 } 3937 3938 static int 3939 mxge_add_msix_irqs(mxge_softc_t *sc) 3940 { 3941 size_t bytes; 3942 int count, err, i, rid; 3943 3944 rid = PCIR_BAR(2); 3945 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3946 &rid, RF_ACTIVE); 3947 3948 if (sc->msix_table_res == NULL) { 3949 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3950 return ENXIO; 3951 } 3952 3953 count = sc->num_slices; 3954 err = pci_alloc_msix(sc->dev, &count); 3955 if (err != 0) { 3956 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3957 "err = %d \n", sc->num_slices, err); 3958 goto abort_with_msix_table; 3959 } 3960 if (count < sc->num_slices) { 3961 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3962 count, sc->num_slices); 3963 device_printf(sc->dev, 3964 "Try setting hw.mxge.max_slices to %d\n", 3965 count); 3966 err = ENOSPC; 3967 goto abort_with_msix; 3968 } 3969 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3970 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3971 if (sc->msix_irq_res == NULL) { 3972 err = ENOMEM; 3973 goto abort_with_msix; 3974 } 3975 3976 for (i = 0; i < sc->num_slices; i++) { 3977 rid = i + 1; 3978 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3979 SYS_RES_IRQ, 3980 &rid, RF_ACTIVE); 3981 if (sc->msix_irq_res[i] == NULL) { 3982 device_printf(sc->dev, "couldn't allocate IRQ res" 3983 " for message %d\n", i); 3984 err = ENXIO; 3985 goto abort_with_res; 3986 } 3987 } 3988 3989 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3990 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3991 3992 for (i = 0; i < sc->num_slices; i++) { 3993 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3994 INTR_TYPE_NET | INTR_MPSAFE, 3995 #if __FreeBSD_version > 700030 3996 NULL, 3997 #endif 3998 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 3999 if (err != 0) { 4000 device_printf(sc->dev, "couldn't setup intr for " 4001 "message %d\n", i); 4002 goto abort_with_intr; 4003 } 4004 } 4005 4006 if (mxge_verbose) { 4007 device_printf(sc->dev, "using %d msix IRQs:", 4008 sc->num_slices); 4009 for (i = 0; i < sc->num_slices; i++) 4010 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4011 printf("\n"); 4012 } 4013 return (0); 4014 4015 abort_with_intr: 4016 for (i = 0; i < sc->num_slices; i++) { 4017 if (sc->msix_ih[i] != NULL) { 4018 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4019 sc->msix_ih[i]); 4020 sc->msix_ih[i] = NULL; 4021 } 4022 } 4023 free(sc->msix_ih, M_DEVBUF); 4024 4025 4026 abort_with_res: 4027 for (i = 0; i < sc->num_slices; i++) { 4028 rid = i + 1; 4029 if (sc->msix_irq_res[i] != NULL) 4030 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4031 sc->msix_irq_res[i]); 4032 sc->msix_irq_res[i] = NULL; 4033 } 4034 free(sc->msix_irq_res, M_DEVBUF); 4035 4036 4037 abort_with_msix: 4038 pci_release_msi(sc->dev); 4039 4040 abort_with_msix_table: 4041 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4042 sc->msix_table_res); 4043 4044 return err; 4045 } 4046 4047 static int 4048 mxge_add_single_irq(mxge_softc_t *sc) 4049 { 4050 int count, err, rid; 4051 4052 count = pci_msi_count(sc->dev); 4053 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4054 rid = 1; 4055 } else { 4056 rid = 0; 4057 sc->legacy_irq = 1; 4058 } 4059 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4060 1, RF_SHAREABLE | RF_ACTIVE); 4061 if (sc->irq_res == NULL) { 4062 device_printf(sc->dev, "could not alloc interrupt\n"); 4063 return ENXIO; 4064 } 4065 if (mxge_verbose) 4066 device_printf(sc->dev, "using %s irq %ld\n", 4067 sc->legacy_irq ? "INTx" : "MSI", 4068 rman_get_start(sc->irq_res)); 4069 err = bus_setup_intr(sc->dev, sc->irq_res, 4070 INTR_TYPE_NET | INTR_MPSAFE, 4071 #if __FreeBSD_version > 700030 4072 NULL, 4073 #endif 4074 mxge_intr, &sc->ss[0], &sc->ih); 4075 if (err != 0) { 4076 bus_release_resource(sc->dev, SYS_RES_IRQ, 4077 sc->legacy_irq ? 0 : 1, sc->irq_res); 4078 if (!sc->legacy_irq) 4079 pci_release_msi(sc->dev); 4080 } 4081 return err; 4082 } 4083 4084 static void 4085 mxge_rem_msix_irqs(mxge_softc_t *sc) 4086 { 4087 int i, rid; 4088 4089 for (i = 0; i < sc->num_slices; i++) { 4090 if (sc->msix_ih[i] != NULL) { 4091 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4092 sc->msix_ih[i]); 4093 sc->msix_ih[i] = NULL; 4094 } 4095 } 4096 free(sc->msix_ih, M_DEVBUF); 4097 4098 for (i = 0; i < sc->num_slices; i++) { 4099 rid = i + 1; 4100 if (sc->msix_irq_res[i] != NULL) 4101 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4102 sc->msix_irq_res[i]); 4103 sc->msix_irq_res[i] = NULL; 4104 } 4105 free(sc->msix_irq_res, M_DEVBUF); 4106 4107 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4108 sc->msix_table_res); 4109 4110 pci_release_msi(sc->dev); 4111 return; 4112 } 4113 4114 static void 4115 mxge_rem_single_irq(mxge_softc_t *sc) 4116 { 4117 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4118 bus_release_resource(sc->dev, SYS_RES_IRQ, 4119 sc->legacy_irq ? 0 : 1, sc->irq_res); 4120 if (!sc->legacy_irq) 4121 pci_release_msi(sc->dev); 4122 } 4123 4124 static void 4125 mxge_rem_irq(mxge_softc_t *sc) 4126 { 4127 if (sc->num_slices > 1) 4128 mxge_rem_msix_irqs(sc); 4129 else 4130 mxge_rem_single_irq(sc); 4131 } 4132 4133 static int 4134 mxge_add_irq(mxge_softc_t *sc) 4135 { 4136 int err; 4137 4138 if (sc->num_slices > 1) 4139 err = mxge_add_msix_irqs(sc); 4140 else 4141 err = mxge_add_single_irq(sc); 4142 4143 if (0 && err == 0 && sc->num_slices > 1) { 4144 mxge_rem_msix_irqs(sc); 4145 err = mxge_add_msix_irqs(sc); 4146 } 4147 return err; 4148 } 4149 4150 4151 static int 4152 mxge_attach(device_t dev) 4153 { 4154 mxge_softc_t *sc = device_get_softc(dev); 4155 struct ifnet *ifp; 4156 int err, rid; 4157 4158 sc->dev = dev; 4159 mxge_fetch_tunables(sc); 4160 4161 err = bus_dma_tag_create(NULL, /* parent */ 4162 1, /* alignment */ 4163 0, /* boundary */ 4164 BUS_SPACE_MAXADDR, /* low */ 4165 BUS_SPACE_MAXADDR, /* high */ 4166 NULL, NULL, /* filter */ 4167 65536 + 256, /* maxsize */ 4168 MXGE_MAX_SEND_DESC, /* num segs */ 4169 65536, /* maxsegsize */ 4170 0, /* flags */ 4171 NULL, NULL, /* lock */ 4172 &sc->parent_dmat); /* tag */ 4173 4174 if (err != 0) { 4175 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4176 err); 4177 goto abort_with_nothing; 4178 } 4179 4180 ifp = sc->ifp = if_alloc(IFT_ETHER); 4181 if (ifp == NULL) { 4182 device_printf(dev, "can not if_alloc()\n"); 4183 err = ENOSPC; 4184 goto abort_with_parent_dmat; 4185 } 4186 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4187 4188 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4189 device_get_nameunit(dev)); 4190 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4191 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4192 "%s:drv", device_get_nameunit(dev)); 4193 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4194 MTX_NETWORK_LOCK, MTX_DEF); 4195 4196 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4197 4198 mxge_setup_cfg_space(sc); 4199 4200 /* Map the board into the kernel */ 4201 rid = PCIR_BARS; 4202 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4203 ~0, 1, RF_ACTIVE); 4204 if (sc->mem_res == NULL) { 4205 device_printf(dev, "could not map memory\n"); 4206 err = ENXIO; 4207 goto abort_with_lock; 4208 } 4209 sc->sram = rman_get_virtual(sc->mem_res); 4210 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4211 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4212 device_printf(dev, "impossible memory region size %ld\n", 4213 rman_get_size(sc->mem_res)); 4214 err = ENXIO; 4215 goto abort_with_mem_res; 4216 } 4217 4218 /* make NULL terminated copy of the EEPROM strings section of 4219 lanai SRAM */ 4220 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4221 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4222 rman_get_bushandle(sc->mem_res), 4223 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4224 sc->eeprom_strings, 4225 MXGE_EEPROM_STRINGS_SIZE - 2); 4226 err = mxge_parse_strings(sc); 4227 if (err != 0) 4228 goto abort_with_mem_res; 4229 4230 /* Enable write combining for efficient use of PCIe bus */ 4231 mxge_enable_wc(sc); 4232 4233 /* Allocate the out of band dma memory */ 4234 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4235 sizeof (mxge_cmd_t), 64); 4236 if (err != 0) 4237 goto abort_with_mem_res; 4238 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4239 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4240 if (err != 0) 4241 goto abort_with_cmd_dma; 4242 4243 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4244 if (err != 0) 4245 goto abort_with_zeropad_dma; 4246 4247 /* select & load the firmware */ 4248 err = mxge_select_firmware(sc); 4249 if (err != 0) 4250 goto abort_with_dmabench; 4251 sc->intr_coal_delay = mxge_intr_coal_delay; 4252 4253 mxge_slice_probe(sc); 4254 err = mxge_alloc_slices(sc); 4255 if (err != 0) 4256 goto abort_with_dmabench; 4257 4258 err = mxge_reset(sc, 0); 4259 if (err != 0) 4260 goto abort_with_slices; 4261 4262 err = mxge_alloc_rings(sc); 4263 if (err != 0) { 4264 device_printf(sc->dev, "failed to allocate rings\n"); 4265 goto abort_with_dmabench; 4266 } 4267 4268 err = mxge_add_irq(sc); 4269 if (err != 0) { 4270 device_printf(sc->dev, "failed to add irq\n"); 4271 goto abort_with_rings; 4272 } 4273 4274 ifp->if_baudrate = IF_Gbps(10UL); 4275 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4276 IFCAP_VLAN_MTU | IFCAP_LRO; 4277 4278 #ifdef MXGE_NEW_VLAN_API 4279 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4280 #endif 4281 4282 sc->max_mtu = mxge_max_mtu(sc); 4283 if (sc->max_mtu >= 9000) 4284 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4285 else 4286 device_printf(dev, "MTU limited to %d. Install " 4287 "latest firmware for 9000 byte jumbo support\n", 4288 sc->max_mtu - ETHER_HDR_LEN); 4289 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4290 ifp->if_capenable = ifp->if_capabilities; 4291 if (sc->lro_cnt == 0) 4292 ifp->if_capenable &= ~IFCAP_LRO; 4293 sc->csum_flag = 1; 4294 ifp->if_init = mxge_init; 4295 ifp->if_softc = sc; 4296 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4297 ifp->if_ioctl = mxge_ioctl; 4298 ifp->if_start = mxge_start; 4299 /* Initialise the ifmedia structure */ 4300 ifmedia_init(&sc->media, 0, mxge_media_change, 4301 mxge_media_status); 4302 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4303 mxge_media_probe(sc); 4304 ether_ifattach(ifp, sc->mac_addr); 4305 /* ether_ifattach sets mtu to 1500 */ 4306 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4307 ifp->if_mtu = 9000; 4308 4309 mxge_add_sysctls(sc); 4310 return 0; 4311 4312 abort_with_rings: 4313 mxge_free_rings(sc); 4314 abort_with_slices: 4315 mxge_free_slices(sc); 4316 abort_with_dmabench: 4317 mxge_dma_free(&sc->dmabench_dma); 4318 abort_with_zeropad_dma: 4319 mxge_dma_free(&sc->zeropad_dma); 4320 abort_with_cmd_dma: 4321 mxge_dma_free(&sc->cmd_dma); 4322 abort_with_mem_res: 4323 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4324 abort_with_lock: 4325 pci_disable_busmaster(dev); 4326 mtx_destroy(&sc->cmd_mtx); 4327 mtx_destroy(&sc->driver_mtx); 4328 if_free(ifp); 4329 abort_with_parent_dmat: 4330 bus_dma_tag_destroy(sc->parent_dmat); 4331 4332 abort_with_nothing: 4333 return err; 4334 } 4335 4336 static int 4337 mxge_detach(device_t dev) 4338 { 4339 mxge_softc_t *sc = device_get_softc(dev); 4340 4341 if (mxge_vlans_active(sc)) { 4342 device_printf(sc->dev, 4343 "Detach vlans before removing module\n"); 4344 return EBUSY; 4345 } 4346 mtx_lock(&sc->driver_mtx); 4347 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4348 mxge_close(sc); 4349 mtx_unlock(&sc->driver_mtx); 4350 ether_ifdetach(sc->ifp); 4351 callout_drain(&sc->co_hdl); 4352 ifmedia_removeall(&sc->media); 4353 mxge_dummy_rdma(sc, 0); 4354 mxge_rem_sysctls(sc); 4355 mxge_rem_irq(sc); 4356 mxge_free_rings(sc); 4357 mxge_free_slices(sc); 4358 mxge_dma_free(&sc->dmabench_dma); 4359 mxge_dma_free(&sc->zeropad_dma); 4360 mxge_dma_free(&sc->cmd_dma); 4361 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4362 pci_disable_busmaster(dev); 4363 mtx_destroy(&sc->cmd_mtx); 4364 mtx_destroy(&sc->driver_mtx); 4365 if_free(sc->ifp); 4366 bus_dma_tag_destroy(sc->parent_dmat); 4367 return 0; 4368 } 4369 4370 static int 4371 mxge_shutdown(device_t dev) 4372 { 4373 return 0; 4374 } 4375 4376 /* 4377 This file uses Myri10GE driver indentation. 4378 4379 Local Variables: 4380 c-file-style:"linux" 4381 tab-width:8 4382 End: 4383 */ 4384