1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 #include <net/if_poll.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/vlan/if_vlan_var.h> 62 #include <net/zlib.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/tcp.h> 69 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <bus/pci/pcireg.h> 74 #include <bus/pci/pcivar.h> 75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__x86_64__) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/netif/mxge/mxge_mcp.h> 85 #include <dev/netif/mxge/mcp_gen_header.h> 86 #include <dev/netif/mxge/if_mxge_var.h> 87 88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE) 89 90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 91 #define MXGE_HWRSS_KEYLEN 16 92 93 /* Tunable params */ 94 static int mxge_nvidia_ecrc_enable = 1; 95 static int mxge_force_firmware = 0; 96 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 97 static int mxge_deassert_wait = 1; 98 static int mxge_ticks; 99 static int mxge_num_slices = 0; 100 static int mxge_always_promisc = 0; 101 static int mxge_throttle = 0; 102 static int mxge_msi_enable = 1; 103 static int mxge_msix_enable = 1; 104 static int mxge_multi_tx = 1; 105 /* 106 * Don't use RSS by default, its just too slow 107 */ 108 static int mxge_use_rss = 0; 109 110 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_FULL; 111 112 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 113 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 114 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 115 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 116 117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl)); 130 131 static int mxge_probe(device_t dev); 132 static int mxge_attach(device_t dev); 133 static int mxge_detach(device_t dev); 134 static int mxge_shutdown(device_t dev); 135 136 static int mxge_alloc_intr(struct mxge_softc *sc); 137 static void mxge_free_intr(struct mxge_softc *sc); 138 static int mxge_setup_intr(struct mxge_softc *sc); 139 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 140 141 static device_method_t mxge_methods[] = { 142 /* Device interface */ 143 DEVMETHOD(device_probe, mxge_probe), 144 DEVMETHOD(device_attach, mxge_attach), 145 DEVMETHOD(device_detach, mxge_detach), 146 DEVMETHOD(device_shutdown, mxge_shutdown), 147 DEVMETHOD_END 148 }; 149 150 static driver_t mxge_driver = { 151 "mxge", 152 mxge_methods, 153 sizeof(mxge_softc_t), 154 }; 155 156 static devclass_t mxge_devclass; 157 158 /* Declare ourselves to be a child of the PCI bus.*/ 159 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 160 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 161 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 162 163 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 164 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 165 static void mxge_close(mxge_softc_t *sc, int down); 166 static int mxge_open(mxge_softc_t *sc); 167 static void mxge_tick(void *arg); 168 static void mxge_watchdog_reset(mxge_softc_t *sc); 169 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 170 171 static int 172 mxge_probe(device_t dev) 173 { 174 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 175 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 176 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 177 int rev = pci_get_revid(dev); 178 179 switch (rev) { 180 case MXGE_PCI_REV_Z8E: 181 device_set_desc(dev, "Myri10G-PCIE-8A"); 182 break; 183 case MXGE_PCI_REV_Z8ES: 184 device_set_desc(dev, "Myri10G-PCIE-8B"); 185 break; 186 default: 187 device_set_desc(dev, "Myri10G-PCIE-8??"); 188 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 189 break; 190 } 191 return 0; 192 } 193 return ENXIO; 194 } 195 196 static void 197 mxge_enable_wc(mxge_softc_t *sc) 198 { 199 #if defined(__x86_64__) 200 vm_offset_t len; 201 202 sc->wc = 1; 203 len = rman_get_size(sc->mem_res); 204 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 205 PAT_WRITE_COMBINING); 206 #endif 207 } 208 209 static int 210 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 211 bus_size_t alignment) 212 { 213 bus_size_t boundary; 214 int err; 215 216 if (bytes > 4096 && alignment == 4096) 217 boundary = 0; 218 else 219 boundary = 4096; 220 221 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 222 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 223 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 224 if (err != 0) { 225 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 226 return err; 227 } 228 return 0; 229 } 230 231 static void 232 mxge_dma_free(bus_dmamem_t *dma) 233 { 234 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 235 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 236 bus_dma_tag_destroy(dma->dmem_tag); 237 } 238 239 /* 240 * The eeprom strings on the lanaiX have the format 241 * SN=x\0 242 * MAC=x:x:x:x:x:x\0 243 * PC=text\0 244 */ 245 static int 246 mxge_parse_strings(mxge_softc_t *sc) 247 { 248 const char *ptr; 249 int i, found_mac, found_sn2; 250 char *endptr; 251 252 ptr = sc->eeprom_strings; 253 found_mac = 0; 254 found_sn2 = 0; 255 while (*ptr != '\0') { 256 if (strncmp(ptr, "MAC=", 4) == 0) { 257 ptr += 4; 258 for (i = 0;;) { 259 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 260 if (endptr - ptr != 2) 261 goto abort; 262 ptr = endptr; 263 if (++i == 6) 264 break; 265 if (*ptr++ != ':') 266 goto abort; 267 } 268 found_mac = 1; 269 } else if (strncmp(ptr, "PC=", 3) == 0) { 270 ptr += 3; 271 strlcpy(sc->product_code_string, ptr, 272 sizeof(sc->product_code_string)); 273 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 274 ptr += 3; 275 strlcpy(sc->serial_number_string, ptr, 276 sizeof(sc->serial_number_string)); 277 } else if (strncmp(ptr, "SN2=", 4) == 0) { 278 /* SN2 takes precedence over SN */ 279 ptr += 4; 280 found_sn2 = 1; 281 strlcpy(sc->serial_number_string, ptr, 282 sizeof(sc->serial_number_string)); 283 } 284 while (*ptr++ != '\0') {} 285 } 286 287 if (found_mac) 288 return 0; 289 290 abort: 291 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 292 return ENXIO; 293 } 294 295 #if defined(__x86_64__) 296 297 static void 298 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 299 { 300 uint32_t val; 301 unsigned long base, off; 302 char *va, *cfgptr; 303 device_t pdev, mcp55; 304 uint16_t vendor_id, device_id, word; 305 uintptr_t bus, slot, func, ivend, idev; 306 uint32_t *ptr32; 307 308 if (!mxge_nvidia_ecrc_enable) 309 return; 310 311 pdev = device_get_parent(device_get_parent(sc->dev)); 312 if (pdev == NULL) { 313 device_printf(sc->dev, "could not find parent?\n"); 314 return; 315 } 316 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 317 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 318 319 if (vendor_id != 0x10de) 320 return; 321 322 base = 0; 323 324 if (device_id == 0x005d) { 325 /* ck804, base address is magic */ 326 base = 0xe0000000UL; 327 } else if (device_id >= 0x0374 && device_id <= 0x378) { 328 /* mcp55, base address stored in chipset */ 329 mcp55 = pci_find_bsf(0, 0, 0); 330 if (mcp55 && 331 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 332 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 333 word = pci_read_config(mcp55, 0x90, 2); 334 base = ((unsigned long)word & 0x7ffeU) << 25; 335 } 336 } 337 if (!base) 338 return; 339 340 /* 341 * XXXX 342 * Test below is commented because it is believed that doing 343 * config read/write beyond 0xff will access the config space 344 * for the next larger function. Uncomment this and remove 345 * the hacky pmap_mapdev() way of accessing config space when 346 * DragonFly grows support for extended pcie config space access. 347 */ 348 #if 0 349 /* 350 * See if we can, by some miracle, access the extended 351 * config space 352 */ 353 val = pci_read_config(pdev, 0x178, 4); 354 if (val != 0xffffffff) { 355 val |= 0x40; 356 pci_write_config(pdev, 0x178, val, 4); 357 return; 358 } 359 #endif 360 /* 361 * Rather than using normal pci config space writes, we must 362 * map the Nvidia config space ourselves. This is because on 363 * opteron/nvidia class machine the 0xe000000 mapping is 364 * handled by the nvidia chipset, that means the internal PCI 365 * device (the on-chip northbridge), or the amd-8131 bridge 366 * and things behind them are not visible by this method. 367 */ 368 369 BUS_READ_IVAR(device_get_parent(pdev), pdev, 370 PCI_IVAR_BUS, &bus); 371 BUS_READ_IVAR(device_get_parent(pdev), pdev, 372 PCI_IVAR_SLOT, &slot); 373 BUS_READ_IVAR(device_get_parent(pdev), pdev, 374 PCI_IVAR_FUNCTION, &func); 375 BUS_READ_IVAR(device_get_parent(pdev), pdev, 376 PCI_IVAR_VENDOR, &ivend); 377 BUS_READ_IVAR(device_get_parent(pdev), pdev, 378 PCI_IVAR_DEVICE, &idev); 379 380 off = base + 0x00100000UL * (unsigned long)bus + 381 0x00001000UL * (unsigned long)(func + 8 * slot); 382 383 /* map it into the kernel */ 384 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 385 if (va == NULL) { 386 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 387 return; 388 } 389 /* get a pointer to the config space mapped into the kernel */ 390 cfgptr = va + (off & PAGE_MASK); 391 392 /* make sure that we can really access it */ 393 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 394 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 395 if (!(vendor_id == ivend && device_id == idev)) { 396 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 397 vendor_id, device_id); 398 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 399 return; 400 } 401 402 ptr32 = (uint32_t*)(cfgptr + 0x178); 403 val = *ptr32; 404 405 if (val == 0xffffffff) { 406 device_printf(sc->dev, "extended mapping failed\n"); 407 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 408 return; 409 } 410 *ptr32 = val | 0x40; 411 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 412 if (bootverbose) { 413 device_printf(sc->dev, "Enabled ECRC on upstream " 414 "Nvidia bridge at %d:%d:%d\n", 415 (int)bus, (int)slot, (int)func); 416 } 417 } 418 419 #else /* __x86_64__ */ 420 421 static void 422 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 423 { 424 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 425 } 426 427 #endif 428 429 static int 430 mxge_dma_test(mxge_softc_t *sc, int test_type) 431 { 432 mxge_cmd_t cmd; 433 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 434 int status; 435 uint32_t len; 436 const char *test = " "; 437 438 /* 439 * Run a small DMA test. 440 * The magic multipliers to the length tell the firmware 441 * to do DMA read, write, or read+write tests. The 442 * results are returned in cmd.data0. The upper 16 443 * bits of the return is the number of transfers completed. 444 * The lower 16 bits is the time in 0.5us ticks that the 445 * transfers took to complete. 446 */ 447 448 len = sc->tx_boundary; 449 450 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 451 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 452 cmd.data2 = len * 0x10000; 453 status = mxge_send_cmd(sc, test_type, &cmd); 454 if (status != 0) { 455 test = "read"; 456 goto abort; 457 } 458 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 459 460 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 461 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 462 cmd.data2 = len * 0x1; 463 status = mxge_send_cmd(sc, test_type, &cmd); 464 if (status != 0) { 465 test = "write"; 466 goto abort; 467 } 468 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 469 470 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 471 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 472 cmd.data2 = len * 0x10001; 473 status = mxge_send_cmd(sc, test_type, &cmd); 474 if (status != 0) { 475 test = "read/write"; 476 goto abort; 477 } 478 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 479 (cmd.data0 & 0xffff); 480 481 abort: 482 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 483 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 484 test, status); 485 } 486 return status; 487 } 488 489 /* 490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 491 * when the PCI-E Completion packets are aligned on an 8-byte 492 * boundary. Some PCI-E chip sets always align Completion packets; on 493 * the ones that do not, the alignment can be enforced by enabling 494 * ECRC generation (if supported). 495 * 496 * When PCI-E Completion packets are not aligned, it is actually more 497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 498 * 499 * If the driver can neither enable ECRC nor verify that it has 500 * already been enabled, then it must use a firmware image which works 501 * around unaligned completion packets (ethp_z8e.dat), and it should 502 * also ensure that it never gives the device a Read-DMA which is 503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 504 * enabled, then the driver should use the aligned (eth_z8e.dat) 505 * firmware image, and set tx_boundary to 4KB. 506 */ 507 static int 508 mxge_firmware_probe(mxge_softc_t *sc) 509 { 510 device_t dev = sc->dev; 511 int reg, status; 512 uint16_t pectl; 513 514 sc->tx_boundary = 4096; 515 516 /* 517 * Verify the max read request size was set to 4KB 518 * before trying the test with 4KB. 519 */ 520 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 521 pectl = pci_read_config(dev, reg + 0x8, 2); 522 if ((pectl & (5 << 12)) != (5 << 12)) { 523 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 524 pectl); 525 sc->tx_boundary = 2048; 526 } 527 } 528 529 /* 530 * Load the optimized firmware (which assumes aligned PCIe 531 * completions) in order to see if it works on this host. 532 */ 533 sc->fw_name = mxge_fw_aligned; 534 status = mxge_load_firmware(sc, 1); 535 if (status != 0) 536 return status; 537 538 /* 539 * Enable ECRC if possible 540 */ 541 mxge_enable_nvidia_ecrc(sc); 542 543 /* 544 * Run a DMA test which watches for unaligned completions and 545 * aborts on the first one seen. Not required on Z8ES or newer. 546 */ 547 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 548 return 0; 549 550 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 551 if (status == 0) 552 return 0; /* keep the aligned firmware */ 553 554 if (status != E2BIG) 555 device_printf(dev, "DMA test failed: %d\n", status); 556 if (status == ENOSYS) { 557 device_printf(dev, "Falling back to ethp! " 558 "Please install up to date fw\n"); 559 } 560 return status; 561 } 562 563 static int 564 mxge_select_firmware(mxge_softc_t *sc) 565 { 566 int aligned = 0; 567 int force_firmware = mxge_force_firmware; 568 569 if (sc->throttle) 570 force_firmware = sc->throttle; 571 572 if (force_firmware != 0) { 573 if (force_firmware == 1) 574 aligned = 1; 575 else 576 aligned = 0; 577 if (bootverbose) { 578 device_printf(sc->dev, 579 "Assuming %s completions (forced)\n", 580 aligned ? "aligned" : "unaligned"); 581 } 582 goto abort; 583 } 584 585 /* 586 * If the PCIe link width is 4 or less, we can use the aligned 587 * firmware and skip any checks 588 */ 589 if (sc->link_width != 0 && sc->link_width <= 4) { 590 device_printf(sc->dev, "PCIe x%d Link, " 591 "expect reduced performance\n", sc->link_width); 592 aligned = 1; 593 goto abort; 594 } 595 596 if (mxge_firmware_probe(sc) == 0) 597 return 0; 598 599 abort: 600 if (aligned) { 601 sc->fw_name = mxge_fw_aligned; 602 sc->tx_boundary = 4096; 603 } else { 604 sc->fw_name = mxge_fw_unaligned; 605 sc->tx_boundary = 2048; 606 } 607 return mxge_load_firmware(sc, 0); 608 } 609 610 static int 611 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 612 { 613 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 614 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 615 be32toh(hdr->mcp_type)); 616 return EIO; 617 } 618 619 /* Save firmware version for sysctl */ 620 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 621 if (bootverbose) 622 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 623 624 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 625 &sc->fw_ver_minor, &sc->fw_ver_tiny); 626 627 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 628 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 629 if_printf(sc->ifp, "Found firmware version %s\n", 630 sc->fw_version); 631 if_printf(sc->ifp, "Driver needs %d.%d\n", 632 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 633 return EINVAL; 634 } 635 return 0; 636 } 637 638 static void * 639 z_alloc(void *nil, u_int items, u_int size) 640 { 641 return kmalloc(items * size, M_TEMP, M_WAITOK); 642 } 643 644 static void 645 z_free(void *nil, void *ptr) 646 { 647 kfree(ptr, M_TEMP); 648 } 649 650 static int 651 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 652 { 653 z_stream zs; 654 char *inflate_buffer; 655 const struct firmware *fw; 656 const mcp_gen_header_t *hdr; 657 unsigned hdr_offset; 658 int status; 659 unsigned int i; 660 char dummy; 661 size_t fw_len; 662 663 fw = firmware_get(sc->fw_name); 664 if (fw == NULL) { 665 if_printf(sc->ifp, "Could not find firmware image %s\n", 666 sc->fw_name); 667 return ENOENT; 668 } 669 670 /* Setup zlib and decompress f/w */ 671 bzero(&zs, sizeof(zs)); 672 zs.zalloc = z_alloc; 673 zs.zfree = z_free; 674 status = inflateInit(&zs); 675 if (status != Z_OK) { 676 status = EIO; 677 goto abort_with_fw; 678 } 679 680 /* 681 * The uncompressed size is stored as the firmware version, 682 * which would otherwise go unused 683 */ 684 fw_len = (size_t)fw->version; 685 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 686 zs.avail_in = fw->datasize; 687 zs.next_in = __DECONST(char *, fw->data); 688 zs.avail_out = fw_len; 689 zs.next_out = inflate_buffer; 690 status = inflate(&zs, Z_FINISH); 691 if (status != Z_STREAM_END) { 692 if_printf(sc->ifp, "zlib %d\n", status); 693 status = EIO; 694 goto abort_with_buffer; 695 } 696 697 /* Check id */ 698 hdr_offset = 699 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 700 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 701 if_printf(sc->ifp, "Bad firmware file"); 702 status = EIO; 703 goto abort_with_buffer; 704 } 705 hdr = (const void*)(inflate_buffer + hdr_offset); 706 707 status = mxge_validate_firmware(sc, hdr); 708 if (status != 0) 709 goto abort_with_buffer; 710 711 /* Copy the inflated firmware to NIC SRAM. */ 712 for (i = 0; i < fw_len; i += 256) { 713 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 714 min(256U, (unsigned)(fw_len - i))); 715 wmb(); 716 dummy = *sc->sram; 717 wmb(); 718 } 719 720 *limit = fw_len; 721 status = 0; 722 abort_with_buffer: 723 kfree(inflate_buffer, M_TEMP); 724 inflateEnd(&zs); 725 abort_with_fw: 726 firmware_put(fw, FIRMWARE_UNLOAD); 727 return status; 728 } 729 730 /* 731 * Enable or disable periodic RDMAs from the host to make certain 732 * chipsets resend dropped PCIe messages 733 */ 734 static void 735 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 736 { 737 char buf_bytes[72]; 738 volatile uint32_t *confirm; 739 volatile char *submit; 740 uint32_t *buf, dma_low, dma_high; 741 int i; 742 743 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 744 745 /* Clear confirmation addr */ 746 confirm = (volatile uint32_t *)sc->cmd; 747 *confirm = 0; 748 wmb(); 749 750 /* 751 * Send an rdma command to the PCIe engine, and wait for the 752 * response in the confirmation address. The firmware should 753 * write a -1 there to indicate it is alive and well 754 */ 755 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 756 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 757 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 758 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 759 buf[2] = htobe32(0xffffffff); /* confirm data */ 760 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 761 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 762 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 763 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 764 buf[5] = htobe32(enable); /* enable? */ 765 766 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 767 768 mxge_pio_copy(submit, buf, 64); 769 wmb(); 770 DELAY(1000); 771 wmb(); 772 i = 0; 773 while (*confirm != 0xffffffff && i < 20) { 774 DELAY(1000); 775 i++; 776 } 777 if (*confirm != 0xffffffff) { 778 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 779 (enable ? "enable" : "disable"), confirm, *confirm); 780 } 781 } 782 783 static int 784 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 785 { 786 mcp_cmd_t *buf; 787 char buf_bytes[sizeof(*buf) + 8]; 788 volatile mcp_cmd_response_t *response = sc->cmd; 789 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 790 uint32_t dma_low, dma_high; 791 int err, sleep_total = 0; 792 793 /* Ensure buf is aligned to 8 bytes */ 794 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 795 796 buf->data0 = htobe32(data->data0); 797 buf->data1 = htobe32(data->data1); 798 buf->data2 = htobe32(data->data2); 799 buf->cmd = htobe32(cmd); 800 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 801 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 802 803 buf->response_addr.low = htobe32(dma_low); 804 buf->response_addr.high = htobe32(dma_high); 805 806 response->result = 0xffffffff; 807 wmb(); 808 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 809 810 /* 811 * Wait up to 20ms 812 */ 813 err = EAGAIN; 814 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 815 wmb(); 816 switch (be32toh(response->result)) { 817 case 0: 818 data->data0 = be32toh(response->data); 819 err = 0; 820 break; 821 case 0xffffffff: 822 DELAY(1000); 823 break; 824 case MXGEFW_CMD_UNKNOWN: 825 err = ENOSYS; 826 break; 827 case MXGEFW_CMD_ERROR_UNALIGNED: 828 err = E2BIG; 829 break; 830 case MXGEFW_CMD_ERROR_BUSY: 831 err = EBUSY; 832 break; 833 case MXGEFW_CMD_ERROR_I2C_ABSENT: 834 err = ENXIO; 835 break; 836 default: 837 if_printf(sc->ifp, "command %d failed, result = %d\n", 838 cmd, be32toh(response->result)); 839 err = ENXIO; 840 break; 841 } 842 if (err != EAGAIN) 843 break; 844 } 845 if (err == EAGAIN) { 846 if_printf(sc->ifp, "command %d timed out result = %d\n", 847 cmd, be32toh(response->result)); 848 } 849 return err; 850 } 851 852 static int 853 mxge_adopt_running_firmware(mxge_softc_t *sc) 854 { 855 struct mcp_gen_header *hdr; 856 const size_t bytes = sizeof(struct mcp_gen_header); 857 size_t hdr_offset; 858 int status; 859 860 /* 861 * Find running firmware header 862 */ 863 hdr_offset = 864 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 865 866 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 867 if_printf(sc->ifp, "Running firmware has bad header offset " 868 "(%zu)\n", hdr_offset); 869 return EIO; 870 } 871 872 /* 873 * Copy header of running firmware from SRAM to host memory to 874 * validate firmware 875 */ 876 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 877 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 878 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 879 status = mxge_validate_firmware(sc, hdr); 880 kfree(hdr, M_DEVBUF); 881 882 /* 883 * Check to see if adopted firmware has bug where adopting 884 * it will cause broadcasts to be filtered unless the NIC 885 * is kept in ALLMULTI mode 886 */ 887 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 888 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 889 sc->adopted_rx_filter_bug = 1; 890 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 891 "working around rx filter bug\n", 892 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 893 } 894 895 return status; 896 } 897 898 static int 899 mxge_load_firmware(mxge_softc_t *sc, int adopt) 900 { 901 volatile uint32_t *confirm; 902 volatile char *submit; 903 char buf_bytes[72]; 904 uint32_t *buf, size, dma_low, dma_high; 905 int status, i; 906 907 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 908 909 size = sc->sram_size; 910 status = mxge_load_firmware_helper(sc, &size); 911 if (status) { 912 if (!adopt) 913 return status; 914 915 /* 916 * Try to use the currently running firmware, if 917 * it is new enough 918 */ 919 status = mxge_adopt_running_firmware(sc); 920 if (status) { 921 if_printf(sc->ifp, 922 "failed to adopt running firmware\n"); 923 return status; 924 } 925 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 926 927 if (sc->tx_boundary == 4096) { 928 if_printf(sc->ifp, 929 "Using firmware currently running on NIC. " 930 "For optimal\n"); 931 if_printf(sc->ifp, "performance consider loading " 932 "optimized firmware\n"); 933 } 934 sc->fw_name = mxge_fw_unaligned; 935 sc->tx_boundary = 2048; 936 return 0; 937 } 938 939 /* Clear confirmation addr */ 940 confirm = (volatile uint32_t *)sc->cmd; 941 *confirm = 0; 942 wmb(); 943 944 /* 945 * Send a reload command to the bootstrap MCP, and wait for the 946 * response in the confirmation address. The firmware should 947 * write a -1 there to indicate it is alive and well 948 */ 949 950 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 951 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 952 953 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 954 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 955 buf[2] = htobe32(0xffffffff); /* confirm data */ 956 957 /* 958 * FIX: All newest firmware should un-protect the bottom of 959 * the sram before handoff. However, the very first interfaces 960 * do not. Therefore the handoff copy must skip the first 8 bytes 961 */ 962 /* where the code starts*/ 963 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 964 buf[4] = htobe32(size - 8); /* length of code */ 965 buf[5] = htobe32(8); /* where to copy to */ 966 buf[6] = htobe32(0); /* where to jump to */ 967 968 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 969 mxge_pio_copy(submit, buf, 64); 970 wmb(); 971 DELAY(1000); 972 wmb(); 973 i = 0; 974 while (*confirm != 0xffffffff && i < 20) { 975 DELAY(1000*10); 976 i++; 977 } 978 if (*confirm != 0xffffffff) { 979 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 980 confirm, *confirm); 981 return ENXIO; 982 } 983 return 0; 984 } 985 986 static int 987 mxge_update_mac_address(mxge_softc_t *sc) 988 { 989 mxge_cmd_t cmd; 990 uint8_t *addr = sc->mac_addr; 991 992 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 993 (addr[2] << 8) | addr[3]; 994 cmd.data1 = (addr[4] << 8) | (addr[5]); 995 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 996 } 997 998 static int 999 mxge_change_pause(mxge_softc_t *sc, int pause) 1000 { 1001 mxge_cmd_t cmd; 1002 int status; 1003 1004 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1005 if (pause) 1006 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1007 else 1008 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1009 if (status) { 1010 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1011 return ENXIO; 1012 } 1013 sc->pause = pause; 1014 return 0; 1015 } 1016 1017 static void 1018 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1019 { 1020 mxge_cmd_t cmd; 1021 int status; 1022 1023 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */ 1024 if (mxge_always_promisc) 1025 promisc = 1; 1026 1027 if (promisc) 1028 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1029 else 1030 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1031 if (status) 1032 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1033 } 1034 1035 static void 1036 mxge_set_multicast_list(mxge_softc_t *sc) 1037 { 1038 mxge_cmd_t cmd; 1039 struct ifmultiaddr *ifma; 1040 struct ifnet *ifp = sc->ifp; 1041 int err; 1042 1043 /* This firmware is known to not support multicast */ 1044 if (!sc->fw_multicast_support) 1045 return; 1046 1047 /* Disable multicast filtering while we play with the lists*/ 1048 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1049 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1050 if (err != 0) { 1051 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1052 "error status: %d\n", err); 1053 return; 1054 } 1055 1056 if (sc->adopted_rx_filter_bug) 1057 return; 1058 1059 if (ifp->if_flags & IFF_ALLMULTI) { 1060 /* Request to disable multicast filtering, so quit here */ 1061 return; 1062 } 1063 1064 /* Flush all the filters */ 1065 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1066 if (err != 0) { 1067 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1068 "error status: %d\n", err); 1069 return; 1070 } 1071 1072 /* 1073 * Walk the multicast list, and add each address 1074 */ 1075 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1076 if (ifma->ifma_addr->sa_family != AF_LINK) 1077 continue; 1078 1079 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1080 &cmd.data0, 4); 1081 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1082 &cmd.data1, 2); 1083 cmd.data0 = htonl(cmd.data0); 1084 cmd.data1 = htonl(cmd.data1); 1085 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1086 if (err != 0) { 1087 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1088 "error status: %d\n", err); 1089 /* Abort, leaving multicast filtering off */ 1090 return; 1091 } 1092 } 1093 1094 /* Enable multicast filtering */ 1095 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1096 if (err != 0) { 1097 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1098 "error status: %d\n", err); 1099 } 1100 } 1101 1102 #if 0 1103 static int 1104 mxge_max_mtu(mxge_softc_t *sc) 1105 { 1106 mxge_cmd_t cmd; 1107 int status; 1108 1109 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1110 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1111 1112 /* try to set nbufs to see if it we can 1113 use virtually contiguous jumbos */ 1114 cmd.data0 = 0; 1115 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1116 &cmd); 1117 if (status == 0) 1118 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1119 1120 /* otherwise, we're limited to MJUMPAGESIZE */ 1121 return MJUMPAGESIZE - MXGEFW_PAD; 1122 } 1123 #endif 1124 1125 static int 1126 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1127 { 1128 struct mxge_slice_state *ss; 1129 mxge_rx_done_t *rx_done; 1130 volatile uint32_t *irq_claim; 1131 mxge_cmd_t cmd; 1132 int slice, status, rx_intr_size; 1133 1134 /* 1135 * Try to send a reset command to the card to see if it 1136 * is alive 1137 */ 1138 memset(&cmd, 0, sizeof (cmd)); 1139 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1140 if (status != 0) { 1141 if_printf(sc->ifp, "failed reset\n"); 1142 return ENXIO; 1143 } 1144 1145 mxge_dummy_rdma(sc, 1); 1146 1147 /* 1148 * Set the intrq size 1149 * XXX assume 4byte mcp_slot 1150 */ 1151 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1152 cmd.data0 = rx_intr_size; 1153 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1154 1155 /* 1156 * Even though we already know how many slices are supported 1157 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1158 * has magic side effects, and must be called after a reset. 1159 * It must be called prior to calling any RSS related cmds, 1160 * including assigning an interrupt queue for anything but 1161 * slice 0. It must also be called *after* 1162 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1163 * the firmware to compute offsets. 1164 */ 1165 if (sc->num_slices > 1) { 1166 /* Ask the maximum number of slices it supports */ 1167 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1168 if (status != 0) { 1169 if_printf(sc->ifp, "failed to get number of slices\n"); 1170 return status; 1171 } 1172 1173 /* 1174 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1175 * to setting up the interrupt queue DMA 1176 */ 1177 cmd.data0 = sc->num_slices; 1178 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1179 if (sc->num_tx_rings > 1) 1180 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1181 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1182 if (status != 0) { 1183 if_printf(sc->ifp, "failed to set number of slices\n"); 1184 return status; 1185 } 1186 } 1187 1188 if (interrupts_setup) { 1189 /* Now exchange information about interrupts */ 1190 for (slice = 0; slice < sc->num_slices; slice++) { 1191 ss = &sc->ss[slice]; 1192 1193 rx_done = &ss->rx_data.rx_done; 1194 memset(rx_done->entry, 0, rx_intr_size); 1195 1196 cmd.data0 = 1197 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1198 cmd.data1 = 1199 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1200 cmd.data2 = slice; 1201 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1202 &cmd); 1203 } 1204 } 1205 1206 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1207 &cmd); 1208 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1209 1210 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1211 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1212 1213 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1214 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1215 1216 if (status != 0) { 1217 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1218 return status; 1219 } 1220 1221 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1222 1223 /* Run a DMA benchmark */ 1224 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1225 1226 for (slice = 0; slice < sc->num_slices; slice++) { 1227 ss = &sc->ss[slice]; 1228 1229 ss->irq_claim = irq_claim + (2 * slice); 1230 1231 /* Reset mcp/driver shared state back to 0 */ 1232 ss->rx_data.rx_done.idx = 0; 1233 ss->tx.req = 0; 1234 ss->tx.done = 0; 1235 ss->tx.pkt_done = 0; 1236 ss->tx.queue_active = 0; 1237 ss->tx.activate = 0; 1238 ss->tx.deactivate = 0; 1239 ss->rx_data.rx_big.cnt = 0; 1240 ss->rx_data.rx_small.cnt = 0; 1241 if (ss->fw_stats != NULL) 1242 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1243 } 1244 sc->rdma_tags_available = 15; 1245 1246 status = mxge_update_mac_address(sc); 1247 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1248 mxge_change_pause(sc, sc->pause); 1249 mxge_set_multicast_list(sc); 1250 1251 if (sc->throttle) { 1252 cmd.data0 = sc->throttle; 1253 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1254 if_printf(sc->ifp, "can't enable throttle\n"); 1255 } 1256 return status; 1257 } 1258 1259 static int 1260 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1261 { 1262 mxge_cmd_t cmd; 1263 mxge_softc_t *sc; 1264 int err; 1265 unsigned int throttle; 1266 1267 sc = arg1; 1268 throttle = sc->throttle; 1269 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1270 if (err != 0) 1271 return err; 1272 1273 if (throttle == sc->throttle) 1274 return 0; 1275 1276 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1277 return EINVAL; 1278 1279 ifnet_serialize_all(sc->ifp); 1280 1281 cmd.data0 = throttle; 1282 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1283 if (err == 0) 1284 sc->throttle = throttle; 1285 1286 ifnet_deserialize_all(sc->ifp); 1287 return err; 1288 } 1289 1290 static int 1291 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1292 { 1293 mxge_softc_t *sc; 1294 int err, use_rss; 1295 1296 sc = arg1; 1297 use_rss = sc->use_rss; 1298 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1299 if (err != 0) 1300 return err; 1301 1302 if (use_rss == sc->use_rss) 1303 return 0; 1304 1305 ifnet_serialize_all(sc->ifp); 1306 1307 sc->use_rss = use_rss; 1308 if (sc->ifp->if_flags & IFF_RUNNING) { 1309 mxge_close(sc, 0); 1310 mxge_open(sc); 1311 } 1312 1313 ifnet_deserialize_all(sc->ifp); 1314 return err; 1315 } 1316 1317 static int 1318 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1319 { 1320 mxge_softc_t *sc; 1321 unsigned int intr_coal_delay; 1322 int err; 1323 1324 sc = arg1; 1325 intr_coal_delay = sc->intr_coal_delay; 1326 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1327 if (err != 0) 1328 return err; 1329 1330 if (intr_coal_delay == sc->intr_coal_delay) 1331 return 0; 1332 1333 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1334 return EINVAL; 1335 1336 ifnet_serialize_all(sc->ifp); 1337 1338 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1339 sc->intr_coal_delay = intr_coal_delay; 1340 1341 ifnet_deserialize_all(sc->ifp); 1342 return err; 1343 } 1344 1345 static int 1346 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1347 { 1348 int err; 1349 1350 if (arg1 == NULL) 1351 return EFAULT; 1352 arg2 = be32toh(*(int *)arg1); 1353 arg1 = NULL; 1354 err = sysctl_handle_int(oidp, arg1, arg2, req); 1355 1356 return err; 1357 } 1358 1359 static void 1360 mxge_rem_sysctls(mxge_softc_t *sc) 1361 { 1362 if (sc->ss != NULL) { 1363 struct mxge_slice_state *ss; 1364 int slice; 1365 1366 for (slice = 0; slice < sc->num_slices; slice++) { 1367 ss = &sc->ss[slice]; 1368 if (ss->sysctl_tree != NULL) { 1369 sysctl_ctx_free(&ss->sysctl_ctx); 1370 ss->sysctl_tree = NULL; 1371 } 1372 } 1373 } 1374 1375 if (sc->slice_sysctl_tree != NULL) { 1376 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1377 sc->slice_sysctl_tree = NULL; 1378 } 1379 } 1380 1381 static void 1382 mxge_add_sysctls(mxge_softc_t *sc) 1383 { 1384 struct sysctl_ctx_list *ctx; 1385 struct sysctl_oid_list *children; 1386 mcp_irq_data_t *fw; 1387 struct mxge_slice_state *ss; 1388 int slice; 1389 char slice_num[8]; 1390 1391 ctx = device_get_sysctl_ctx(sc->dev); 1392 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1393 fw = sc->ss[0].fw_stats; 1394 1395 /* 1396 * Random information 1397 */ 1398 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1399 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1400 1401 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1402 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1403 1404 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1405 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1406 1407 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1408 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1409 1410 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1411 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1412 1413 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1414 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1415 1416 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1417 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1418 1419 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1420 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1421 1422 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1423 CTLFLAG_RD, &sc->read_write_dma, 0, 1424 "DMA concurrent Read/Write speed in MB/s"); 1425 1426 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1427 CTLFLAG_RD, &sc->watchdog_resets, 0, 1428 "Number of times NIC was reset"); 1429 1430 /* 1431 * Performance related tunables 1432 */ 1433 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1434 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1435 "Interrupt coalescing delay in usecs"); 1436 1437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1438 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1439 "Transmit throttling"); 1440 1441 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1442 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1443 "Use RSS"); 1444 1445 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1446 CTLFLAG_RW, &mxge_deassert_wait, 0, 1447 "Wait for IRQ line to go low in ihandler"); 1448 1449 /* 1450 * Stats block from firmware is in network byte order. 1451 * Need to swap it 1452 */ 1453 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1454 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1455 mxge_handle_be32, "I", "link up"); 1456 1457 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1458 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1459 mxge_handle_be32, "I", "rdma_tags_available"); 1460 1461 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1462 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1463 mxge_handle_be32, "I", "dropped_bad_crc32"); 1464 1465 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1466 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1467 mxge_handle_be32, "I", "dropped_bad_phy"); 1468 1469 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1470 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1471 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1472 1473 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1474 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1475 mxge_handle_be32, "I", "dropped_link_overflow"); 1476 1477 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1478 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1479 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1480 1481 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1482 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1483 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1484 1485 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1486 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1487 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1488 1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1490 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1491 mxge_handle_be32, "I", "dropped_overrun"); 1492 1493 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1494 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1495 mxge_handle_be32, "I", "dropped_pause"); 1496 1497 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1498 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1499 mxge_handle_be32, "I", "dropped_runt"); 1500 1501 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1502 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1503 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1504 1505 /* add counters exported for debugging from all slices */ 1506 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1507 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1508 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1509 if (sc->slice_sysctl_tree == NULL) { 1510 device_printf(sc->dev, "can't add slice sysctl node\n"); 1511 return; 1512 } 1513 1514 for (slice = 0; slice < sc->num_slices; slice++) { 1515 ss = &sc->ss[slice]; 1516 sysctl_ctx_init(&ss->sysctl_ctx); 1517 ctx = &ss->sysctl_ctx; 1518 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1519 ksprintf(slice_num, "%d", slice); 1520 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1521 slice_num, CTLFLAG_RD, 0, ""); 1522 if (ss->sysctl_tree == NULL) { 1523 device_printf(sc->dev, 1524 "can't add %d slice sysctl node\n", slice); 1525 return; /* XXX continue? */ 1526 } 1527 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1528 1529 /* 1530 * XXX change to ULONG 1531 */ 1532 1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1534 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1535 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1537 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1538 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1540 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1541 1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1543 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1544 1545 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1546 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1547 1548 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1549 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1550 1551 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1552 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1553 1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1555 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1556 } 1557 } 1558 1559 /* 1560 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1561 * backwards one at a time and handle ring wraps 1562 */ 1563 static __inline void 1564 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1565 mcp_kreq_ether_send_t *src, int cnt) 1566 { 1567 int idx, starting_slot; 1568 1569 starting_slot = tx->req; 1570 while (cnt > 1) { 1571 cnt--; 1572 idx = (starting_slot + cnt) & tx->mask; 1573 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1574 wmb(); 1575 } 1576 } 1577 1578 /* 1579 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1580 * at most 32 bytes at a time, so as to avoid involving the software 1581 * pio handler in the nic. We re-write the first segment's flags 1582 * to mark them valid only after writing the entire chain 1583 */ 1584 static __inline void 1585 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1586 { 1587 int idx, i; 1588 uint32_t *src_ints; 1589 volatile uint32_t *dst_ints; 1590 mcp_kreq_ether_send_t *srcp; 1591 volatile mcp_kreq_ether_send_t *dstp, *dst; 1592 uint8_t last_flags; 1593 1594 idx = tx->req & tx->mask; 1595 1596 last_flags = src->flags; 1597 src->flags = 0; 1598 wmb(); 1599 dst = dstp = &tx->lanai[idx]; 1600 srcp = src; 1601 1602 if ((idx + cnt) < tx->mask) { 1603 for (i = 0; i < cnt - 1; i += 2) { 1604 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1605 wmb(); /* force write every 32 bytes */ 1606 srcp += 2; 1607 dstp += 2; 1608 } 1609 } else { 1610 /* 1611 * Submit all but the first request, and ensure 1612 * that it is submitted below 1613 */ 1614 mxge_submit_req_backwards(tx, src, cnt); 1615 i = 0; 1616 } 1617 if (i < cnt) { 1618 /* Submit the first request */ 1619 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1620 wmb(); /* barrier before setting valid flag */ 1621 } 1622 1623 /* Re-write the last 32-bits with the valid flags */ 1624 src->flags = last_flags; 1625 src_ints = (uint32_t *)src; 1626 src_ints+=3; 1627 dst_ints = (volatile uint32_t *)dst; 1628 dst_ints+=3; 1629 *dst_ints = *src_ints; 1630 tx->req += cnt; 1631 wmb(); 1632 } 1633 1634 static int 1635 mxge_pullup_tso(struct mbuf **mp) 1636 { 1637 int hoff, iphlen, thoff; 1638 struct mbuf *m; 1639 1640 m = *mp; 1641 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1642 1643 iphlen = m->m_pkthdr.csum_iphlen; 1644 thoff = m->m_pkthdr.csum_thlen; 1645 hoff = m->m_pkthdr.csum_lhlen; 1646 1647 KASSERT(iphlen > 0, ("invalid ip hlen")); 1648 KASSERT(thoff > 0, ("invalid tcp hlen")); 1649 KASSERT(hoff > 0, ("invalid ether hlen")); 1650 1651 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1652 m = m_pullup(m, hoff + iphlen + thoff); 1653 if (m == NULL) { 1654 *mp = NULL; 1655 return ENOBUFS; 1656 } 1657 *mp = m; 1658 } 1659 return 0; 1660 } 1661 1662 static int 1663 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1664 struct mbuf *m, int busdma_seg_cnt) 1665 { 1666 mcp_kreq_ether_send_t *req; 1667 bus_dma_segment_t *seg; 1668 uint32_t low, high_swapped; 1669 int len, seglen, cum_len, cum_len_next; 1670 int next_is_first, chop, cnt, rdma_count, small; 1671 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1672 uint8_t flags, flags_next; 1673 struct mxge_buffer_state *info_last; 1674 bus_dmamap_t map = info_map->map; 1675 1676 mss = m->m_pkthdr.tso_segsz; 1677 1678 /* 1679 * Negative cum_len signifies to the send loop that we are 1680 * still in the header portion of the TSO packet. 1681 */ 1682 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1683 m->m_pkthdr.csum_thlen); 1684 1685 /* 1686 * TSO implies checksum offload on this hardware 1687 */ 1688 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1689 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1690 1691 /* 1692 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1693 * out where to put the checksum by parsing the header. 1694 */ 1695 pseudo_hdr_offset = htobe16(mss); 1696 1697 req = tx->req_list; 1698 seg = tx->seg_list; 1699 cnt = 0; 1700 rdma_count = 0; 1701 1702 /* 1703 * "rdma_count" is the number of RDMAs belonging to the current 1704 * packet BEFORE the current send request. For non-TSO packets, 1705 * this is equal to "count". 1706 * 1707 * For TSO packets, rdma_count needs to be reset to 0 after a 1708 * segment cut. 1709 * 1710 * The rdma_count field of the send request is the number of 1711 * RDMAs of the packet starting at that request. For TSO send 1712 * requests with one ore more cuts in the middle, this is the 1713 * number of RDMAs starting after the last cut in the request. 1714 * All previous segments before the last cut implicitly have 1 1715 * RDMA. 1716 * 1717 * Since the number of RDMAs is not known beforehand, it must be 1718 * filled-in retroactively - after each segmentation cut or at 1719 * the end of the entire packet. 1720 */ 1721 1722 while (busdma_seg_cnt) { 1723 /* 1724 * Break the busdma segment up into pieces 1725 */ 1726 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1727 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1728 len = seg->ds_len; 1729 1730 while (len) { 1731 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1732 seglen = len; 1733 cum_len_next = cum_len + seglen; 1734 (req - rdma_count)->rdma_count = rdma_count + 1; 1735 if (__predict_true(cum_len >= 0)) { 1736 /* Payload */ 1737 chop = (cum_len_next > mss); 1738 cum_len_next = cum_len_next % mss; 1739 next_is_first = (cum_len_next == 0); 1740 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1741 flags_next |= 1742 next_is_first * MXGEFW_FLAGS_FIRST; 1743 rdma_count |= -(chop | next_is_first); 1744 rdma_count += chop & !next_is_first; 1745 } else if (cum_len_next >= 0) { 1746 /* Header ends */ 1747 rdma_count = -1; 1748 cum_len_next = 0; 1749 seglen = -cum_len; 1750 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1751 flags_next = MXGEFW_FLAGS_TSO_PLD | 1752 MXGEFW_FLAGS_FIRST | 1753 (small * MXGEFW_FLAGS_SMALL); 1754 } 1755 1756 req->addr_high = high_swapped; 1757 req->addr_low = htobe32(low); 1758 req->pseudo_hdr_offset = pseudo_hdr_offset; 1759 req->pad = 0; 1760 req->rdma_count = 1; 1761 req->length = htobe16(seglen); 1762 req->cksum_offset = cksum_offset; 1763 req->flags = 1764 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1765 low += seglen; 1766 len -= seglen; 1767 cum_len = cum_len_next; 1768 flags = flags_next; 1769 req++; 1770 cnt++; 1771 rdma_count++; 1772 if (__predict_false(cksum_offset > seglen)) 1773 cksum_offset -= seglen; 1774 else 1775 cksum_offset = 0; 1776 if (__predict_false(cnt > tx->max_desc)) 1777 goto drop; 1778 } 1779 busdma_seg_cnt--; 1780 seg++; 1781 } 1782 (req - rdma_count)->rdma_count = rdma_count; 1783 1784 do { 1785 req--; 1786 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1787 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1788 1789 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1790 1791 info_map->map = info_last->map; 1792 info_last->map = map; 1793 info_last->m = m; 1794 1795 mxge_submit_req(tx, tx->req_list, cnt); 1796 1797 if (tx->send_go != NULL && tx->queue_active == 0) { 1798 /* Tell the NIC to start polling this slice */ 1799 *tx->send_go = 1; 1800 tx->queue_active = 1; 1801 tx->activate++; 1802 wmb(); 1803 } 1804 return 0; 1805 1806 drop: 1807 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1808 m_freem(m); 1809 return ENOBUFS; 1810 } 1811 1812 static int 1813 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1814 { 1815 mcp_kreq_ether_send_t *req; 1816 bus_dma_segment_t *seg; 1817 bus_dmamap_t map; 1818 int cnt, cum_len, err, i, idx, odd_flag; 1819 uint16_t pseudo_hdr_offset; 1820 uint8_t flags, cksum_offset; 1821 struct mxge_buffer_state *info_map, *info_last; 1822 1823 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1824 err = mxge_pullup_tso(&m); 1825 if (__predict_false(err)) 1826 return err; 1827 } 1828 1829 /* 1830 * Map the frame for DMA 1831 */ 1832 idx = tx->req & tx->mask; 1833 info_map = &tx->info[idx]; 1834 map = info_map->map; 1835 1836 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1837 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1838 if (__predict_false(err != 0)) 1839 goto drop; 1840 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1841 1842 /* 1843 * TSO is different enough, we handle it in another routine 1844 */ 1845 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1846 return mxge_encap_tso(tx, info_map, m, cnt); 1847 1848 req = tx->req_list; 1849 cksum_offset = 0; 1850 pseudo_hdr_offset = 0; 1851 flags = MXGEFW_FLAGS_NO_TSO; 1852 1853 /* 1854 * Checksum offloading 1855 */ 1856 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1857 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1858 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1859 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1860 req->cksum_offset = cksum_offset; 1861 flags |= MXGEFW_FLAGS_CKSUM; 1862 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1863 } else { 1864 odd_flag = 0; 1865 } 1866 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1867 flags |= MXGEFW_FLAGS_SMALL; 1868 1869 /* 1870 * Convert segments into a request list 1871 */ 1872 cum_len = 0; 1873 seg = tx->seg_list; 1874 req->flags = MXGEFW_FLAGS_FIRST; 1875 for (i = 0; i < cnt; i++) { 1876 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1877 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1878 req->length = htobe16(seg->ds_len); 1879 req->cksum_offset = cksum_offset; 1880 if (cksum_offset > seg->ds_len) 1881 cksum_offset -= seg->ds_len; 1882 else 1883 cksum_offset = 0; 1884 req->pseudo_hdr_offset = pseudo_hdr_offset; 1885 req->pad = 0; /* complete solid 16-byte block */ 1886 req->rdma_count = 1; 1887 req->flags |= flags | ((cum_len & 1) * odd_flag); 1888 cum_len += seg->ds_len; 1889 seg++; 1890 req++; 1891 req->flags = 0; 1892 } 1893 req--; 1894 1895 /* 1896 * Pad runt to 60 bytes 1897 */ 1898 if (cum_len < 60) { 1899 req++; 1900 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1901 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1902 req->length = htobe16(60 - cum_len); 1903 req->cksum_offset = 0; 1904 req->pseudo_hdr_offset = pseudo_hdr_offset; 1905 req->pad = 0; /* complete solid 16-byte block */ 1906 req->rdma_count = 1; 1907 req->flags |= flags | ((cum_len & 1) * odd_flag); 1908 cnt++; 1909 } 1910 1911 tx->req_list[0].rdma_count = cnt; 1912 #if 0 1913 /* print what the firmware will see */ 1914 for (i = 0; i < cnt; i++) { 1915 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1916 "cso:%d, flags:0x%x, rdma:%d\n", 1917 i, (int)ntohl(tx->req_list[i].addr_high), 1918 (int)ntohl(tx->req_list[i].addr_low), 1919 (int)ntohs(tx->req_list[i].length), 1920 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1921 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1922 tx->req_list[i].rdma_count); 1923 } 1924 kprintf("--------------\n"); 1925 #endif 1926 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1927 1928 info_map->map = info_last->map; 1929 info_last->map = map; 1930 info_last->m = m; 1931 1932 mxge_submit_req(tx, tx->req_list, cnt); 1933 1934 if (tx->send_go != NULL && tx->queue_active == 0) { 1935 /* Tell the NIC to start polling this slice */ 1936 *tx->send_go = 1; 1937 tx->queue_active = 1; 1938 tx->activate++; 1939 wmb(); 1940 } 1941 return 0; 1942 1943 drop: 1944 m_freem(m); 1945 return err; 1946 } 1947 1948 static void 1949 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1950 { 1951 mxge_softc_t *sc = ifp->if_softc; 1952 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1953 bus_addr_t zeropad; 1954 int encap = 0; 1955 1956 KKASSERT(tx->ifsq == ifsq); 1957 ASSERT_SERIALIZED(&tx->tx_serialize); 1958 1959 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1960 return; 1961 1962 zeropad = sc->zeropad_dma.dmem_busaddr; 1963 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1964 struct mbuf *m; 1965 int error; 1966 1967 m = ifsq_dequeue(ifsq); 1968 if (m == NULL) 1969 goto done; 1970 1971 BPF_MTAP(ifp, m); 1972 error = mxge_encap(tx, m, zeropad); 1973 if (!error) 1974 encap = 1; 1975 else 1976 IFNET_STAT_INC(ifp, oerrors, 1); 1977 } 1978 1979 /* Ran out of transmit slots */ 1980 ifsq_set_oactive(ifsq); 1981 done: 1982 if (encap) 1983 tx->watchdog.wd_timer = 5; 1984 } 1985 1986 static void 1987 mxge_watchdog(struct ifaltq_subque *ifsq) 1988 { 1989 struct ifnet *ifp = ifsq_get_ifp(ifsq); 1990 struct mxge_softc *sc = ifp->if_softc; 1991 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1992 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1993 1994 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1995 1996 /* Check for pause blocking before resetting */ 1997 if (tx->watchdog_rx_pause == rx_pause) { 1998 mxge_warn_stuck(sc, tx, 0); 1999 mxge_watchdog_reset(sc); 2000 return; 2001 } else { 2002 if_printf(ifp, "Flow control blocking xmits, " 2003 "check link partner\n"); 2004 } 2005 tx->watchdog_rx_pause = rx_pause; 2006 } 2007 2008 /* 2009 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2010 * at most 32 bytes at a time, so as to avoid involving the software 2011 * pio handler in the nic. We re-write the first segment's low 2012 * DMA address to mark it valid only after we write the entire chunk 2013 * in a burst 2014 */ 2015 static __inline void 2016 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2017 mcp_kreq_ether_recv_t *src) 2018 { 2019 uint32_t low; 2020 2021 low = src->addr_low; 2022 src->addr_low = 0xffffffff; 2023 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2024 wmb(); 2025 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2026 wmb(); 2027 src->addr_low = low; 2028 dst->addr_low = low; 2029 wmb(); 2030 } 2031 2032 static int 2033 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2034 boolean_t init) 2035 { 2036 bus_dma_segment_t seg; 2037 struct mbuf *m; 2038 int cnt, err, mflag; 2039 2040 mflag = M_NOWAIT; 2041 if (__predict_false(init)) 2042 mflag = M_WAITOK; 2043 2044 m = m_gethdr(mflag, MT_DATA); 2045 if (m == NULL) { 2046 err = ENOBUFS; 2047 if (__predict_false(init)) { 2048 /* 2049 * During initialization, there 2050 * is nothing to setup; bail out 2051 */ 2052 return err; 2053 } 2054 goto done; 2055 } 2056 m->m_len = m->m_pkthdr.len = MHLEN; 2057 2058 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2059 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2060 if (err != 0) { 2061 m_freem(m); 2062 if (__predict_false(init)) { 2063 /* 2064 * During initialization, there 2065 * is nothing to setup; bail out 2066 */ 2067 return err; 2068 } 2069 goto done; 2070 } 2071 2072 rx->info[idx].m = m; 2073 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2074 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2075 2076 done: 2077 if ((idx & 7) == 7) 2078 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2079 return err; 2080 } 2081 2082 static int 2083 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2084 boolean_t init) 2085 { 2086 bus_dma_segment_t seg; 2087 struct mbuf *m; 2088 int cnt, err, mflag; 2089 2090 mflag = M_NOWAIT; 2091 if (__predict_false(init)) 2092 mflag = M_WAITOK; 2093 2094 if (rx->cl_size == MCLBYTES) 2095 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2096 else 2097 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2098 if (m == NULL) { 2099 err = ENOBUFS; 2100 if (__predict_false(init)) { 2101 /* 2102 * During initialization, there 2103 * is nothing to setup; bail out 2104 */ 2105 return err; 2106 } 2107 goto done; 2108 } 2109 m->m_len = m->m_pkthdr.len = rx->cl_size; 2110 2111 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2112 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2113 if (err != 0) { 2114 m_freem(m); 2115 if (__predict_false(init)) { 2116 /* 2117 * During initialization, there 2118 * is nothing to setup; bail out 2119 */ 2120 return err; 2121 } 2122 goto done; 2123 } 2124 2125 rx->info[idx].m = m; 2126 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2127 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2128 2129 done: 2130 if ((idx & 7) == 7) 2131 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2132 return err; 2133 } 2134 2135 /* 2136 * Myri10GE hardware checksums are not valid if the sender 2137 * padded the frame with non-zero padding. This is because 2138 * the firmware just does a simple 16-bit 1s complement 2139 * checksum across the entire frame, excluding the first 14 2140 * bytes. It is best to simply to check the checksum and 2141 * tell the stack about it only if the checksum is good 2142 */ 2143 static __inline uint16_t 2144 mxge_rx_csum(struct mbuf *m, int csum) 2145 { 2146 const struct ether_header *eh; 2147 const struct ip *ip; 2148 uint16_t c; 2149 2150 eh = mtod(m, const struct ether_header *); 2151 2152 /* Only deal with IPv4 TCP & UDP for now */ 2153 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2154 return 1; 2155 2156 ip = (const struct ip *)(eh + 1); 2157 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2158 return 1; 2159 2160 #ifdef INET 2161 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2162 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2163 - (ip->ip_hl << 2) + ip->ip_p)); 2164 #else 2165 c = 1; 2166 #endif 2167 c ^= 0xffff; 2168 return c; 2169 } 2170 2171 static void 2172 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2173 { 2174 struct ether_vlan_header *evl; 2175 uint32_t partial; 2176 2177 evl = mtod(m, struct ether_vlan_header *); 2178 2179 /* 2180 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2181 * what the firmware thought was the end of the ethernet 2182 * header. 2183 */ 2184 2185 /* Put checksum into host byte order */ 2186 *csum = ntohs(*csum); 2187 2188 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2189 *csum += ~partial; 2190 *csum += ((*csum) < ~partial); 2191 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2192 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2193 2194 /* 2195 * Restore checksum to network byte order; 2196 * later consumers expect this 2197 */ 2198 *csum = htons(*csum); 2199 2200 /* save the tag */ 2201 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2202 m->m_flags |= M_VLANTAG; 2203 2204 /* 2205 * Remove the 802.1q header by copying the Ethernet 2206 * addresses over it and adjusting the beginning of 2207 * the data in the mbuf. The encapsulated Ethernet 2208 * type field is already in place. 2209 */ 2210 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2211 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2212 m_adj(m, EVL_ENCAPLEN); 2213 } 2214 2215 2216 static __inline void 2217 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2218 uint32_t len, uint32_t csum) 2219 { 2220 struct mbuf *m; 2221 const struct ether_header *eh; 2222 bus_dmamap_t old_map; 2223 int idx; 2224 2225 idx = rx->cnt & rx->mask; 2226 rx->cnt++; 2227 2228 /* Save a pointer to the received mbuf */ 2229 m = rx->info[idx].m; 2230 2231 /* Try to replace the received mbuf */ 2232 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2233 /* Drop the frame -- the old mbuf is re-cycled */ 2234 IFNET_STAT_INC(ifp, ierrors, 1); 2235 return; 2236 } 2237 2238 /* Unmap the received buffer */ 2239 old_map = rx->info[idx].map; 2240 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2241 bus_dmamap_unload(rx->dmat, old_map); 2242 2243 /* Swap the bus_dmamap_t's */ 2244 rx->info[idx].map = rx->extra_map; 2245 rx->extra_map = old_map; 2246 2247 /* 2248 * mcp implicitly skips 1st 2 bytes so that packet is properly 2249 * aligned 2250 */ 2251 m->m_data += MXGEFW_PAD; 2252 2253 m->m_pkthdr.rcvif = ifp; 2254 m->m_len = m->m_pkthdr.len = len; 2255 2256 IFNET_STAT_INC(ifp, ipackets, 1); 2257 2258 eh = mtod(m, const struct ether_header *); 2259 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2260 mxge_vlan_tag_remove(m, &csum); 2261 2262 /* If the checksum is valid, mark it in the mbuf header */ 2263 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2264 mxge_rx_csum(m, csum) == 0) { 2265 /* Tell the stack that the checksum is good */ 2266 m->m_pkthdr.csum_data = 0xffff; 2267 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2268 CSUM_DATA_VALID; 2269 } 2270 ifp->if_input(ifp, m, NULL, -1); 2271 } 2272 2273 static __inline void 2274 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2275 uint32_t len, uint32_t csum) 2276 { 2277 const struct ether_header *eh; 2278 struct mbuf *m; 2279 bus_dmamap_t old_map; 2280 int idx; 2281 2282 idx = rx->cnt & rx->mask; 2283 rx->cnt++; 2284 2285 /* Save a pointer to the received mbuf */ 2286 m = rx->info[idx].m; 2287 2288 /* Try to replace the received mbuf */ 2289 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2290 /* Drop the frame -- the old mbuf is re-cycled */ 2291 IFNET_STAT_INC(ifp, ierrors, 1); 2292 return; 2293 } 2294 2295 /* Unmap the received buffer */ 2296 old_map = rx->info[idx].map; 2297 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2298 bus_dmamap_unload(rx->dmat, old_map); 2299 2300 /* Swap the bus_dmamap_t's */ 2301 rx->info[idx].map = rx->extra_map; 2302 rx->extra_map = old_map; 2303 2304 /* 2305 * mcp implicitly skips 1st 2 bytes so that packet is properly 2306 * aligned 2307 */ 2308 m->m_data += MXGEFW_PAD; 2309 2310 m->m_pkthdr.rcvif = ifp; 2311 m->m_len = m->m_pkthdr.len = len; 2312 2313 IFNET_STAT_INC(ifp, ipackets, 1); 2314 2315 eh = mtod(m, const struct ether_header *); 2316 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2317 mxge_vlan_tag_remove(m, &csum); 2318 2319 /* If the checksum is valid, mark it in the mbuf header */ 2320 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2321 mxge_rx_csum(m, csum) == 0) { 2322 /* Tell the stack that the checksum is good */ 2323 m->m_pkthdr.csum_data = 0xffff; 2324 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2325 CSUM_DATA_VALID; 2326 } 2327 ifp->if_input(ifp, m, NULL, -1); 2328 } 2329 2330 static __inline void 2331 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2332 { 2333 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2334 2335 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2336 uint16_t length, checksum; 2337 2338 length = ntohs(rx_done->entry[rx_done->idx].length); 2339 rx_done->entry[rx_done->idx].length = 0; 2340 2341 checksum = rx_done->entry[rx_done->idx].checksum; 2342 2343 if (length <= MXGE_RX_SMALL_BUFLEN) { 2344 mxge_rx_done_small(ifp, &rx_data->rx_small, 2345 length, checksum); 2346 } else { 2347 mxge_rx_done_big(ifp, &rx_data->rx_big, 2348 length, checksum); 2349 } 2350 2351 rx_done->idx++; 2352 rx_done->idx &= rx_done->mask; 2353 --cycle; 2354 } 2355 } 2356 2357 static __inline void 2358 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2359 { 2360 ASSERT_SERIALIZED(&tx->tx_serialize); 2361 2362 while (tx->pkt_done != mcp_idx) { 2363 struct mbuf *m; 2364 int idx; 2365 2366 idx = tx->done & tx->mask; 2367 tx->done++; 2368 2369 m = tx->info[idx].m; 2370 /* 2371 * mbuf and DMA map only attached to the first 2372 * segment per-mbuf. 2373 */ 2374 if (m != NULL) { 2375 tx->pkt_done++; 2376 IFNET_STAT_INC(ifp, opackets, 1); 2377 tx->info[idx].m = NULL; 2378 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2379 m_freem(m); 2380 } 2381 } 2382 2383 /* 2384 * If we have space, clear OACTIVE to tell the stack that 2385 * its OK to send packets 2386 */ 2387 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2388 ifsq_clr_oactive(tx->ifsq); 2389 if (tx->req == tx->done) { 2390 /* Reset watchdog */ 2391 tx->watchdog.wd_timer = 0; 2392 } 2393 } 2394 2395 if (!ifsq_is_empty(tx->ifsq)) 2396 ifsq_devstart(tx->ifsq); 2397 2398 if (tx->send_stop != NULL && tx->req == tx->done) { 2399 /* 2400 * Let the NIC stop polling this queue, since there 2401 * are no more transmits pending 2402 */ 2403 *tx->send_stop = 1; 2404 tx->queue_active = 0; 2405 tx->deactivate++; 2406 wmb(); 2407 } 2408 } 2409 2410 static struct mxge_media_type mxge_xfp_media_types[] = { 2411 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2412 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2413 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2414 {IFM_NONE, (1 << 5), "10GBASE-ER"}, 2415 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2416 {IFM_NONE, (1 << 3), "10GBASE-SW"}, 2417 {IFM_NONE, (1 << 2), "10GBASE-LW"}, 2418 {IFM_NONE, (1 << 1), "10GBASE-EW"}, 2419 {IFM_NONE, (1 << 0), "Reserved"} 2420 }; 2421 2422 static struct mxge_media_type mxge_sfp_media_types[] = { 2423 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2424 {IFM_NONE, (1 << 7), "Reserved"}, 2425 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2426 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2427 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2428 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2429 }; 2430 2431 static void 2432 mxge_media_set(mxge_softc_t *sc, int media_type) 2433 { 2434 int fc_opt = 0; 2435 2436 if (media_type == IFM_NONE) 2437 return; 2438 2439 if (sc->pause) 2440 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 2441 2442 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL); 2443 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt); 2444 2445 sc->current_media = media_type; 2446 } 2447 2448 static void 2449 mxge_media_unset(mxge_softc_t *sc) 2450 { 2451 ifmedia_removeall(&sc->media); 2452 sc->current_media = IFM_NONE; 2453 } 2454 2455 static void 2456 mxge_media_init(mxge_softc_t *sc) 2457 { 2458 const char *ptr; 2459 int i; 2460 2461 mxge_media_unset(sc); 2462 2463 /* 2464 * Parse the product code to deterimine the interface type 2465 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2466 * after the 3rd dash in the driver's cached copy of the 2467 * EEPROM's product code string. 2468 */ 2469 ptr = sc->product_code_string; 2470 if (ptr == NULL) { 2471 if_printf(sc->ifp, "Missing product code\n"); 2472 return; 2473 } 2474 2475 for (i = 0; i < 3; i++, ptr++) { 2476 ptr = strchr(ptr, '-'); 2477 if (ptr == NULL) { 2478 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2479 return; 2480 } 2481 } 2482 if (*ptr == 'C' || *(ptr +1) == 'C') { 2483 /* -C is CX4 */ 2484 sc->connector = MXGE_CX4; 2485 mxge_media_set(sc, IFM_10G_CX4); 2486 } else if (*ptr == 'Q') { 2487 /* -Q is Quad Ribbon Fiber */ 2488 sc->connector = MXGE_QRF; 2489 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2490 /* DragonFly has no media type for Quad ribbon fiber */ 2491 } else if (*ptr == 'R') { 2492 /* -R is XFP */ 2493 sc->connector = MXGE_XFP; 2494 /* NOTE: ifmedia will be installed later */ 2495 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2496 /* -S or -2S is SFP+ */ 2497 sc->connector = MXGE_SFP; 2498 /* NOTE: ifmedia will be installed later */ 2499 } else { 2500 sc->connector = MXGE_UNK; 2501 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2502 } 2503 } 2504 2505 /* 2506 * Determine the media type for a NIC. Some XFPs will identify 2507 * themselves only when their link is up, so this is initiated via a 2508 * link up interrupt. However, this can potentially take up to 2509 * several milliseconds, so it is run via the watchdog routine, rather 2510 * than in the interrupt handler itself. 2511 */ 2512 static void 2513 mxge_media_probe(mxge_softc_t *sc) 2514 { 2515 mxge_cmd_t cmd; 2516 const char *cage_type; 2517 struct mxge_media_type *mxge_media_types = NULL; 2518 int i, err, ms, mxge_media_type_entries; 2519 uint32_t byte; 2520 2521 sc->need_media_probe = 0; 2522 2523 if (sc->connector == MXGE_XFP) { 2524 /* -R is XFP */ 2525 mxge_media_types = mxge_xfp_media_types; 2526 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2527 byte = MXGE_XFP_COMPLIANCE_BYTE; 2528 cage_type = "XFP"; 2529 } else if (sc->connector == MXGE_SFP) { 2530 /* -S or -2S is SFP+ */ 2531 mxge_media_types = mxge_sfp_media_types; 2532 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2533 cage_type = "SFP+"; 2534 byte = 3; 2535 } else { 2536 /* nothing to do; media type cannot change */ 2537 return; 2538 } 2539 2540 /* 2541 * At this point we know the NIC has an XFP cage, so now we 2542 * try to determine what is in the cage by using the 2543 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2544 * register. We read just one byte, which may take over 2545 * a millisecond 2546 */ 2547 2548 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 2549 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2550 cmd.data1 = byte; 2551 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2552 if (err != MXGEFW_CMD_OK) { 2553 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2554 if_printf(sc->ifp, "failed to read XFP\n"); 2555 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2556 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2557 else 2558 if_printf(sc->ifp, "I2C read failed, err: %d", err); 2559 mxge_media_unset(sc); 2560 return; 2561 } 2562 2563 /* Now we wait for the data to be cached */ 2564 cmd.data0 = byte; 2565 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2566 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2567 DELAY(1000); 2568 cmd.data0 = byte; 2569 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2570 } 2571 if (err != MXGEFW_CMD_OK) { 2572 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2573 cage_type, err, ms); 2574 mxge_media_unset(sc); 2575 return; 2576 } 2577 2578 if (cmd.data0 == mxge_media_types[0].bitmask) { 2579 if (bootverbose) { 2580 if_printf(sc->ifp, "%s:%s\n", cage_type, 2581 mxge_media_types[0].name); 2582 } 2583 if (sc->current_media != mxge_media_types[0].flag) { 2584 mxge_media_unset(sc); 2585 mxge_media_set(sc, mxge_media_types[0].flag); 2586 } 2587 return; 2588 } 2589 for (i = 1; i < mxge_media_type_entries; i++) { 2590 if (cmd.data0 & mxge_media_types[i].bitmask) { 2591 if (bootverbose) { 2592 if_printf(sc->ifp, "%s:%s\n", cage_type, 2593 mxge_media_types[i].name); 2594 } 2595 2596 if (sc->current_media != mxge_media_types[i].flag) { 2597 mxge_media_unset(sc); 2598 mxge_media_set(sc, mxge_media_types[i].flag); 2599 } 2600 return; 2601 } 2602 } 2603 mxge_media_unset(sc); 2604 if (bootverbose) { 2605 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2606 cmd.data0); 2607 } 2608 } 2609 2610 static void 2611 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2612 { 2613 if (sc->link_state != stats->link_up) { 2614 sc->link_state = stats->link_up; 2615 if (sc->link_state) { 2616 sc->ifp->if_link_state = LINK_STATE_UP; 2617 if_link_state_change(sc->ifp); 2618 if (bootverbose) 2619 if_printf(sc->ifp, "link up\n"); 2620 } else { 2621 sc->ifp->if_link_state = LINK_STATE_DOWN; 2622 if_link_state_change(sc->ifp); 2623 if (bootverbose) 2624 if_printf(sc->ifp, "link down\n"); 2625 } 2626 sc->need_media_probe = 1; 2627 } 2628 2629 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2630 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2631 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2632 sc->rdma_tags_available); 2633 } 2634 2635 if (stats->link_down) { 2636 sc->down_cnt += stats->link_down; 2637 sc->link_state = 0; 2638 sc->ifp->if_link_state = LINK_STATE_DOWN; 2639 if_link_state_change(sc->ifp); 2640 } 2641 } 2642 2643 static void 2644 mxge_serialize_skipmain(struct mxge_softc *sc) 2645 { 2646 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2647 } 2648 2649 static void 2650 mxge_deserialize_skipmain(struct mxge_softc *sc) 2651 { 2652 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2653 } 2654 2655 static void 2656 mxge_legacy(void *arg) 2657 { 2658 struct mxge_slice_state *ss = arg; 2659 mxge_softc_t *sc = ss->sc; 2660 mcp_irq_data_t *stats = ss->fw_stats; 2661 mxge_tx_ring_t *tx = &ss->tx; 2662 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2663 uint32_t send_done_count; 2664 uint8_t valid; 2665 2666 ASSERT_SERIALIZED(&sc->main_serialize); 2667 2668 /* Make sure the DMA has finished */ 2669 if (!stats->valid) 2670 return; 2671 valid = stats->valid; 2672 2673 /* Lower legacy IRQ */ 2674 *sc->irq_deassert = 0; 2675 if (!mxge_deassert_wait) { 2676 /* Don't wait for conf. that irq is low */ 2677 stats->valid = 0; 2678 } 2679 2680 mxge_serialize_skipmain(sc); 2681 2682 /* 2683 * Loop while waiting for legacy irq deassertion 2684 * XXX do we really want to loop? 2685 */ 2686 do { 2687 /* Check for transmit completes and receives */ 2688 send_done_count = be32toh(stats->send_done_count); 2689 while ((send_done_count != tx->pkt_done) || 2690 (rx_done->entry[rx_done->idx].length != 0)) { 2691 if (send_done_count != tx->pkt_done) { 2692 mxge_tx_done(&sc->arpcom.ac_if, tx, 2693 (int)send_done_count); 2694 } 2695 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2696 send_done_count = be32toh(stats->send_done_count); 2697 } 2698 if (mxge_deassert_wait) 2699 wmb(); 2700 } while (*((volatile uint8_t *)&stats->valid)); 2701 2702 mxge_deserialize_skipmain(sc); 2703 2704 /* Fw link & error stats meaningful only on the first slice */ 2705 if (__predict_false(stats->stats_updated)) 2706 mxge_intr_status(sc, stats); 2707 2708 /* Check to see if we have rx token to pass back */ 2709 if (valid & 0x1) 2710 *ss->irq_claim = be32toh(3); 2711 *(ss->irq_claim + 1) = be32toh(3); 2712 } 2713 2714 static void 2715 mxge_msi(void *arg) 2716 { 2717 struct mxge_slice_state *ss = arg; 2718 mxge_softc_t *sc = ss->sc; 2719 mcp_irq_data_t *stats = ss->fw_stats; 2720 mxge_tx_ring_t *tx = &ss->tx; 2721 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2722 uint32_t send_done_count; 2723 uint8_t valid; 2724 #ifndef IFPOLL_ENABLE 2725 const boolean_t polling = FALSE; 2726 #else 2727 boolean_t polling = FALSE; 2728 #endif 2729 2730 ASSERT_SERIALIZED(&sc->main_serialize); 2731 2732 /* Make sure the DMA has finished */ 2733 if (__predict_false(!stats->valid)) 2734 return; 2735 2736 valid = stats->valid; 2737 stats->valid = 0; 2738 2739 #ifdef IFPOLL_ENABLE 2740 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2741 polling = TRUE; 2742 #endif 2743 2744 if (!polling) { 2745 /* Check for receives */ 2746 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2747 if (rx_done->entry[rx_done->idx].length != 0) 2748 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2749 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2750 } 2751 2752 /* 2753 * Check for transmit completes 2754 * 2755 * NOTE: 2756 * Since pkt_done is only changed by mxge_tx_done(), 2757 * which is called only in interrupt handler, the 2758 * check w/o holding tx serializer is MPSAFE. 2759 */ 2760 send_done_count = be32toh(stats->send_done_count); 2761 if (send_done_count != tx->pkt_done) { 2762 lwkt_serialize_enter(&tx->tx_serialize); 2763 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2764 lwkt_serialize_exit(&tx->tx_serialize); 2765 } 2766 2767 if (__predict_false(stats->stats_updated)) 2768 mxge_intr_status(sc, stats); 2769 2770 /* Check to see if we have rx token to pass back */ 2771 if (!polling && (valid & 0x1)) 2772 *ss->irq_claim = be32toh(3); 2773 *(ss->irq_claim + 1) = be32toh(3); 2774 } 2775 2776 static void 2777 mxge_msix_rx(void *arg) 2778 { 2779 struct mxge_slice_state *ss = arg; 2780 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2781 2782 #ifdef IFPOLL_ENABLE 2783 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2784 return; 2785 #endif 2786 2787 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2788 2789 if (rx_done->entry[rx_done->idx].length != 0) 2790 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2791 2792 *ss->irq_claim = be32toh(3); 2793 } 2794 2795 static void 2796 mxge_msix_rxtx(void *arg) 2797 { 2798 struct mxge_slice_state *ss = arg; 2799 mxge_softc_t *sc = ss->sc; 2800 mcp_irq_data_t *stats = ss->fw_stats; 2801 mxge_tx_ring_t *tx = &ss->tx; 2802 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2803 uint32_t send_done_count; 2804 uint8_t valid; 2805 #ifndef IFPOLL_ENABLE 2806 const boolean_t polling = FALSE; 2807 #else 2808 boolean_t polling = FALSE; 2809 #endif 2810 2811 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2812 2813 /* Make sure the DMA has finished */ 2814 if (__predict_false(!stats->valid)) 2815 return; 2816 2817 valid = stats->valid; 2818 stats->valid = 0; 2819 2820 #ifdef IFPOLL_ENABLE 2821 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2822 polling = TRUE; 2823 #endif 2824 2825 /* Check for receives */ 2826 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2827 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2828 2829 /* 2830 * Check for transmit completes 2831 * 2832 * NOTE: 2833 * Since pkt_done is only changed by mxge_tx_done(), 2834 * which is called only in interrupt handler, the 2835 * check w/o holding tx serializer is MPSAFE. 2836 */ 2837 send_done_count = be32toh(stats->send_done_count); 2838 if (send_done_count != tx->pkt_done) { 2839 lwkt_serialize_enter(&tx->tx_serialize); 2840 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2841 lwkt_serialize_exit(&tx->tx_serialize); 2842 } 2843 2844 /* Check to see if we have rx token to pass back */ 2845 if (!polling && (valid & 0x1)) 2846 *ss->irq_claim = be32toh(3); 2847 *(ss->irq_claim + 1) = be32toh(3); 2848 } 2849 2850 static void 2851 mxge_init(void *arg) 2852 { 2853 struct mxge_softc *sc = arg; 2854 2855 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2856 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2857 mxge_open(sc); 2858 } 2859 2860 static void 2861 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2862 { 2863 int i; 2864 2865 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2866 if (ss->rx_data.rx_big.info[i].m == NULL) 2867 continue; 2868 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2869 ss->rx_data.rx_big.info[i].map); 2870 m_freem(ss->rx_data.rx_big.info[i].m); 2871 ss->rx_data.rx_big.info[i].m = NULL; 2872 } 2873 2874 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2875 if (ss->rx_data.rx_small.info[i].m == NULL) 2876 continue; 2877 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2878 ss->rx_data.rx_small.info[i].map); 2879 m_freem(ss->rx_data.rx_small.info[i].m); 2880 ss->rx_data.rx_small.info[i].m = NULL; 2881 } 2882 2883 /* Transmit ring used only on the first slice */ 2884 if (ss->tx.info == NULL) 2885 return; 2886 2887 for (i = 0; i <= ss->tx.mask; i++) { 2888 if (ss->tx.info[i].m == NULL) 2889 continue; 2890 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2891 m_freem(ss->tx.info[i].m); 2892 ss->tx.info[i].m = NULL; 2893 } 2894 } 2895 2896 static void 2897 mxge_free_mbufs(mxge_softc_t *sc) 2898 { 2899 int slice; 2900 2901 for (slice = 0; slice < sc->num_slices; slice++) 2902 mxge_free_slice_mbufs(&sc->ss[slice]); 2903 } 2904 2905 static void 2906 mxge_free_slice_rings(struct mxge_slice_state *ss) 2907 { 2908 int i; 2909 2910 if (ss->rx_data.rx_done.entry != NULL) { 2911 mxge_dma_free(&ss->rx_done_dma); 2912 ss->rx_data.rx_done.entry = NULL; 2913 } 2914 2915 if (ss->tx.req_list != NULL) { 2916 kfree(ss->tx.req_list, M_DEVBUF); 2917 ss->tx.req_list = NULL; 2918 } 2919 2920 if (ss->tx.seg_list != NULL) { 2921 kfree(ss->tx.seg_list, M_DEVBUF); 2922 ss->tx.seg_list = NULL; 2923 } 2924 2925 if (ss->rx_data.rx_small.shadow != NULL) { 2926 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2927 ss->rx_data.rx_small.shadow = NULL; 2928 } 2929 2930 if (ss->rx_data.rx_big.shadow != NULL) { 2931 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2932 ss->rx_data.rx_big.shadow = NULL; 2933 } 2934 2935 if (ss->tx.info != NULL) { 2936 if (ss->tx.dmat != NULL) { 2937 for (i = 0; i <= ss->tx.mask; i++) { 2938 bus_dmamap_destroy(ss->tx.dmat, 2939 ss->tx.info[i].map); 2940 } 2941 bus_dma_tag_destroy(ss->tx.dmat); 2942 } 2943 kfree(ss->tx.info, M_DEVBUF); 2944 ss->tx.info = NULL; 2945 } 2946 2947 if (ss->rx_data.rx_small.info != NULL) { 2948 if (ss->rx_data.rx_small.dmat != NULL) { 2949 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2950 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2951 ss->rx_data.rx_small.info[i].map); 2952 } 2953 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2954 ss->rx_data.rx_small.extra_map); 2955 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2956 } 2957 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2958 ss->rx_data.rx_small.info = NULL; 2959 } 2960 2961 if (ss->rx_data.rx_big.info != NULL) { 2962 if (ss->rx_data.rx_big.dmat != NULL) { 2963 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2964 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2965 ss->rx_data.rx_big.info[i].map); 2966 } 2967 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2968 ss->rx_data.rx_big.extra_map); 2969 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2970 } 2971 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2972 ss->rx_data.rx_big.info = NULL; 2973 } 2974 } 2975 2976 static void 2977 mxge_free_rings(mxge_softc_t *sc) 2978 { 2979 int slice; 2980 2981 if (sc->ss == NULL) 2982 return; 2983 2984 for (slice = 0; slice < sc->num_slices; slice++) 2985 mxge_free_slice_rings(&sc->ss[slice]); 2986 } 2987 2988 static int 2989 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2990 int tx_ring_entries) 2991 { 2992 mxge_softc_t *sc = ss->sc; 2993 size_t bytes; 2994 int err, i; 2995 2996 /* 2997 * Allocate per-slice receive resources 2998 */ 2999 3000 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 3001 rx_ring_entries - 1; 3002 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3003 3004 /* Allocate the rx shadow rings */ 3005 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3006 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3007 3008 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3009 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3010 3011 /* Allocate the rx host info rings */ 3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3013 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3014 3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3016 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3017 3018 /* Allocate the rx busdma resources */ 3019 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3020 1, /* alignment */ 3021 4096, /* boundary */ 3022 BUS_SPACE_MAXADDR, /* low */ 3023 BUS_SPACE_MAXADDR, /* high */ 3024 NULL, NULL, /* filter */ 3025 MHLEN, /* maxsize */ 3026 1, /* num segs */ 3027 MHLEN, /* maxsegsize */ 3028 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3029 /* flags */ 3030 &ss->rx_data.rx_small.dmat); /* tag */ 3031 if (err != 0) { 3032 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3033 err); 3034 return err; 3035 } 3036 3037 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3038 &ss->rx_data.rx_small.extra_map); 3039 if (err != 0) { 3040 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3041 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3042 ss->rx_data.rx_small.dmat = NULL; 3043 return err; 3044 } 3045 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3046 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3047 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3048 if (err != 0) { 3049 int j; 3050 3051 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3052 3053 for (j = 0; j < i; ++j) { 3054 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3055 ss->rx_data.rx_small.info[j].map); 3056 } 3057 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3058 ss->rx_data.rx_small.extra_map); 3059 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3060 ss->rx_data.rx_small.dmat = NULL; 3061 return err; 3062 } 3063 } 3064 3065 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3066 1, /* alignment */ 3067 4096, /* boundary */ 3068 BUS_SPACE_MAXADDR, /* low */ 3069 BUS_SPACE_MAXADDR, /* high */ 3070 NULL, NULL, /* filter */ 3071 4096, /* maxsize */ 3072 1, /* num segs */ 3073 4096, /* maxsegsize*/ 3074 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3075 /* flags */ 3076 &ss->rx_data.rx_big.dmat); /* tag */ 3077 if (err != 0) { 3078 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3079 err); 3080 return err; 3081 } 3082 3083 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3084 &ss->rx_data.rx_big.extra_map); 3085 if (err != 0) { 3086 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3087 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3088 ss->rx_data.rx_big.dmat = NULL; 3089 return err; 3090 } 3091 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3092 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3093 &ss->rx_data.rx_big.info[i].map); 3094 if (err != 0) { 3095 int j; 3096 3097 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3098 for (j = 0; j < i; ++j) { 3099 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3100 ss->rx_data.rx_big.info[j].map); 3101 } 3102 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3103 ss->rx_data.rx_big.extra_map); 3104 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3105 ss->rx_data.rx_big.dmat = NULL; 3106 return err; 3107 } 3108 } 3109 3110 /* 3111 * Now allocate TX resources 3112 */ 3113 3114 ss->tx.mask = tx_ring_entries - 1; 3115 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3116 3117 /* 3118 * Allocate the tx request copy block; MUST be at least 8 bytes 3119 * aligned 3120 */ 3121 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3122 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3123 M_DEVBUF, M_WAITOK); 3124 3125 /* Allocate the tx busdma segment list */ 3126 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3127 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3128 3129 /* Allocate the tx host info ring */ 3130 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3131 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3132 3133 /* Allocate the tx busdma resources */ 3134 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3135 1, /* alignment */ 3136 sc->tx_boundary, /* boundary */ 3137 BUS_SPACE_MAXADDR, /* low */ 3138 BUS_SPACE_MAXADDR, /* high */ 3139 NULL, NULL, /* filter */ 3140 IP_MAXPACKET + 3141 sizeof(struct ether_vlan_header), 3142 /* maxsize */ 3143 ss->tx.max_desc - 2, /* num segs */ 3144 sc->tx_boundary, /* maxsegsz */ 3145 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3146 BUS_DMA_ONEBPAGE, /* flags */ 3147 &ss->tx.dmat); /* tag */ 3148 if (err != 0) { 3149 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3150 return err; 3151 } 3152 3153 /* 3154 * Now use these tags to setup DMA maps for each slot in the ring 3155 */ 3156 for (i = 0; i <= ss->tx.mask; i++) { 3157 err = bus_dmamap_create(ss->tx.dmat, 3158 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3159 if (err != 0) { 3160 int j; 3161 3162 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3163 for (j = 0; j < i; ++j) { 3164 bus_dmamap_destroy(ss->tx.dmat, 3165 ss->tx.info[j].map); 3166 } 3167 bus_dma_tag_destroy(ss->tx.dmat); 3168 ss->tx.dmat = NULL; 3169 return err; 3170 } 3171 } 3172 return 0; 3173 } 3174 3175 static int 3176 mxge_alloc_rings(mxge_softc_t *sc) 3177 { 3178 mxge_cmd_t cmd; 3179 int tx_ring_size; 3180 int tx_ring_entries, rx_ring_entries; 3181 int err, slice; 3182 3183 /* Get ring sizes */ 3184 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3185 if (err != 0) { 3186 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3187 return err; 3188 } 3189 tx_ring_size = cmd.data0; 3190 3191 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3192 rx_ring_entries = sc->rx_intr_slots / 2; 3193 3194 if (bootverbose) { 3195 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3196 tx_ring_entries, rx_ring_entries); 3197 } 3198 3199 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices; 3200 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters; 3201 3202 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3203 ifq_set_ready(&sc->ifp->if_snd); 3204 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3205 3206 if (sc->num_tx_rings > 1) { 3207 sc->ifp->if_mapsubq = ifq_mapsubq_mask; 3208 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1); 3209 } 3210 3211 for (slice = 0; slice < sc->num_slices; slice++) { 3212 err = mxge_alloc_slice_rings(&sc->ss[slice], 3213 rx_ring_entries, tx_ring_entries); 3214 if (err != 0) { 3215 device_printf(sc->dev, 3216 "alloc %d slice rings failed\n", slice); 3217 return err; 3218 } 3219 } 3220 return 0; 3221 } 3222 3223 static void 3224 mxge_choose_params(int mtu, int *cl_size) 3225 { 3226 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3227 3228 if (bufsize < MCLBYTES) { 3229 *cl_size = MCLBYTES; 3230 } else { 3231 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3232 *cl_size = MJUMPAGESIZE; 3233 } 3234 } 3235 3236 static int 3237 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3238 { 3239 mxge_cmd_t cmd; 3240 int err, i, slice; 3241 3242 slice = ss - ss->sc->ss; 3243 3244 /* 3245 * Get the lanai pointers to the send and receive rings 3246 */ 3247 err = 0; 3248 3249 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 3250 if (ss->sc->num_tx_rings == 1) { 3251 if (slice == 0) { 3252 cmd.data0 = slice; 3253 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3254 &cmd); 3255 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3256 (ss->sc->sram + cmd.data0); 3257 /* Leave send_go and send_stop as NULL */ 3258 } 3259 } else { 3260 cmd.data0 = slice; 3261 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3262 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3263 (ss->sc->sram + cmd.data0); 3264 ss->tx.send_go = (volatile uint32_t *) 3265 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3266 ss->tx.send_stop = (volatile uint32_t *) 3267 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3268 } 3269 3270 cmd.data0 = slice; 3271 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3272 ss->rx_data.rx_small.lanai = 3273 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3274 3275 cmd.data0 = slice; 3276 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3277 ss->rx_data.rx_big.lanai = 3278 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3279 3280 if (err != 0) { 3281 if_printf(ss->sc->ifp, 3282 "failed to get ring sizes or locations\n"); 3283 return EIO; 3284 } 3285 3286 /* 3287 * Stock small receive ring 3288 */ 3289 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3290 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3291 ss->rx_data.rx_small.info[i].map, i, TRUE); 3292 if (err) { 3293 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3294 ss->rx_data.rx_small.mask + 1); 3295 return ENOMEM; 3296 } 3297 } 3298 3299 /* 3300 * Stock big receive ring 3301 */ 3302 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3303 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3304 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3305 } 3306 3307 ss->rx_data.rx_big.cl_size = cl_size; 3308 3309 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3310 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3311 ss->rx_data.rx_big.info[i].map, i, TRUE); 3312 if (err) { 3313 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3314 ss->rx_data.rx_big.mask + 1); 3315 return ENOMEM; 3316 } 3317 } 3318 return 0; 3319 } 3320 3321 static int 3322 mxge_open(mxge_softc_t *sc) 3323 { 3324 struct ifnet *ifp = sc->ifp; 3325 mxge_cmd_t cmd; 3326 int err, slice, cl_size, i; 3327 bus_addr_t bus; 3328 volatile uint8_t *itable; 3329 struct mxge_slice_state *ss; 3330 3331 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3332 3333 /* Copy the MAC address in case it was overridden */ 3334 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3335 3336 err = mxge_reset(sc, 1); 3337 if (err != 0) { 3338 if_printf(ifp, "failed to reset\n"); 3339 return EIO; 3340 } 3341 3342 if (sc->num_slices > 1) { 3343 /* Setup the indirection table */ 3344 cmd.data0 = sc->num_slices; 3345 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3346 3347 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3348 if (err != 0) { 3349 if_printf(ifp, "failed to setup rss tables\n"); 3350 return err; 3351 } 3352 3353 /* Just enable an identity mapping */ 3354 itable = sc->sram + cmd.data0; 3355 for (i = 0; i < sc->num_slices; i++) 3356 itable[i] = (uint8_t)i; 3357 3358 if (sc->use_rss) { 3359 volatile uint8_t *hwkey; 3360 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3361 3362 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3363 &cmd); 3364 if (err != 0) { 3365 if_printf(ifp, "failed to get rsskey\n"); 3366 return err; 3367 } 3368 hwkey = sc->sram + cmd.data0; 3369 3370 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3371 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3372 hwkey[i] = swkey[i]; 3373 wmb(); 3374 3375 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3376 &cmd); 3377 if (err != 0) { 3378 if_printf(ifp, "failed to update rsskey\n"); 3379 return err; 3380 } 3381 if (bootverbose) 3382 if_printf(ifp, "RSS key updated\n"); 3383 } 3384 3385 cmd.data0 = 1; 3386 if (sc->use_rss) { 3387 if (bootverbose) 3388 if_printf(ifp, "input hash: RSS\n"); 3389 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3390 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3391 } else { 3392 if (bootverbose) 3393 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3394 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3395 } 3396 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3397 if (err != 0) { 3398 if_printf(ifp, "failed to enable slices\n"); 3399 return err; 3400 } 3401 } 3402 3403 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3404 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3405 if (err) { 3406 /* 3407 * Can't change TSO mode to NDIS, never allow TSO then 3408 */ 3409 if_printf(ifp, "failed to set TSO mode\n"); 3410 ifp->if_capenable &= ~IFCAP_TSO; 3411 ifp->if_capabilities &= ~IFCAP_TSO; 3412 ifp->if_hwassist &= ~CSUM_TSO; 3413 } 3414 3415 mxge_choose_params(ifp->if_mtu, &cl_size); 3416 3417 cmd.data0 = 1; 3418 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3419 /* 3420 * Error is only meaningful if we're trying to set 3421 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3422 */ 3423 3424 /* 3425 * Give the firmware the mtu and the big and small buffer 3426 * sizes. The firmware wants the big buf size to be a power 3427 * of two. Luckily, DragonFly's clusters are powers of two 3428 */ 3429 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3430 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3431 3432 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3433 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3434 3435 cmd.data0 = cl_size; 3436 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3437 3438 if (err != 0) { 3439 if_printf(ifp, "failed to setup params\n"); 3440 goto abort; 3441 } 3442 3443 /* Now give him the pointer to the stats block */ 3444 for (slice = 0; slice < sc->num_slices; slice++) { 3445 ss = &sc->ss[slice]; 3446 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3447 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3448 cmd.data2 = sizeof(struct mcp_irq_data); 3449 cmd.data2 |= (slice << 16); 3450 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3451 } 3452 3453 if (err != 0) { 3454 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3455 bus += offsetof(struct mcp_irq_data, send_done_count); 3456 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3457 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3458 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3459 &cmd); 3460 3461 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3462 sc->fw_multicast_support = 0; 3463 } else { 3464 sc->fw_multicast_support = 1; 3465 } 3466 3467 if (err != 0) { 3468 if_printf(ifp, "failed to setup params\n"); 3469 goto abort; 3470 } 3471 3472 for (slice = 0; slice < sc->num_slices; slice++) { 3473 err = mxge_slice_open(&sc->ss[slice], cl_size); 3474 if (err != 0) { 3475 if_printf(ifp, "couldn't open slice %d\n", slice); 3476 goto abort; 3477 } 3478 } 3479 3480 /* Finally, start the firmware running */ 3481 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3482 if (err) { 3483 if_printf(ifp, "Couldn't bring up link\n"); 3484 goto abort; 3485 } 3486 3487 ifp->if_flags |= IFF_RUNNING; 3488 for (i = 0; i < sc->num_tx_rings; ++i) { 3489 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3490 3491 ifsq_clr_oactive(tx->ifsq); 3492 ifsq_watchdog_start(&tx->watchdog); 3493 } 3494 3495 return 0; 3496 3497 abort: 3498 mxge_free_mbufs(sc); 3499 return err; 3500 } 3501 3502 static void 3503 mxge_close(mxge_softc_t *sc, int down) 3504 { 3505 struct ifnet *ifp = sc->ifp; 3506 mxge_cmd_t cmd; 3507 int err, old_down_cnt, i; 3508 3509 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3510 3511 if (!down) { 3512 old_down_cnt = sc->down_cnt; 3513 wmb(); 3514 3515 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3516 if (err) 3517 if_printf(ifp, "Couldn't bring down link\n"); 3518 3519 if (old_down_cnt == sc->down_cnt) { 3520 /* 3521 * Wait for down irq 3522 * XXX racy 3523 */ 3524 ifnet_deserialize_all(ifp); 3525 DELAY(10 * sc->intr_coal_delay); 3526 ifnet_serialize_all(ifp); 3527 } 3528 3529 wmb(); 3530 if (old_down_cnt == sc->down_cnt) 3531 if_printf(ifp, "never got down irq\n"); 3532 } 3533 mxge_free_mbufs(sc); 3534 3535 ifp->if_flags &= ~IFF_RUNNING; 3536 for (i = 0; i < sc->num_tx_rings; ++i) { 3537 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3538 3539 ifsq_clr_oactive(tx->ifsq); 3540 ifsq_watchdog_stop(&tx->watchdog); 3541 } 3542 } 3543 3544 static void 3545 mxge_setup_cfg_space(mxge_softc_t *sc) 3546 { 3547 device_t dev = sc->dev; 3548 int reg; 3549 uint16_t lnk, pectl; 3550 3551 /* Find the PCIe link width and set max read request to 4KB */ 3552 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3553 lnk = pci_read_config(dev, reg + 0x12, 2); 3554 sc->link_width = (lnk >> 4) & 0x3f; 3555 3556 if (sc->pectl == 0) { 3557 pectl = pci_read_config(dev, reg + 0x8, 2); 3558 pectl = (pectl & ~0x7000) | (5 << 12); 3559 pci_write_config(dev, reg + 0x8, pectl, 2); 3560 sc->pectl = pectl; 3561 } else { 3562 /* Restore saved pectl after watchdog reset */ 3563 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3564 } 3565 } 3566 3567 /* Enable DMA and memory space access */ 3568 pci_enable_busmaster(dev); 3569 } 3570 3571 static uint32_t 3572 mxge_read_reboot(mxge_softc_t *sc) 3573 { 3574 device_t dev = sc->dev; 3575 uint32_t vs; 3576 3577 /* Find the vendor specific offset */ 3578 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3579 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3580 return (uint32_t)-1; 3581 } 3582 /* Enable read32 mode */ 3583 pci_write_config(dev, vs + 0x10, 0x3, 1); 3584 /* Tell NIC which register to read */ 3585 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3586 return pci_read_config(dev, vs + 0x14, 4); 3587 } 3588 3589 static void 3590 mxge_watchdog_reset(mxge_softc_t *sc) 3591 { 3592 struct pci_devinfo *dinfo; 3593 int err, running; 3594 uint32_t reboot; 3595 uint16_t cmd; 3596 3597 err = ENXIO; 3598 3599 if_printf(sc->ifp, "Watchdog reset!\n"); 3600 3601 /* 3602 * Check to see if the NIC rebooted. If it did, then all of 3603 * PCI config space has been reset, and things like the 3604 * busmaster bit will be zero. If this is the case, then we 3605 * must restore PCI config space before the NIC can be used 3606 * again 3607 */ 3608 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3609 if (cmd == 0xffff) { 3610 /* 3611 * Maybe the watchdog caught the NIC rebooting; wait 3612 * up to 100ms for it to finish. If it does not come 3613 * back, then give up 3614 */ 3615 DELAY(1000*100); 3616 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3617 if (cmd == 0xffff) 3618 if_printf(sc->ifp, "NIC disappeared!\n"); 3619 } 3620 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3621 /* Print the reboot status */ 3622 reboot = mxge_read_reboot(sc); 3623 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3624 3625 running = sc->ifp->if_flags & IFF_RUNNING; 3626 if (running) { 3627 /* 3628 * Quiesce NIC so that TX routines will not try to 3629 * xmit after restoration of BAR 3630 */ 3631 3632 /* Mark the link as down */ 3633 if (sc->link_state) { 3634 sc->ifp->if_link_state = LINK_STATE_DOWN; 3635 if_link_state_change(sc->ifp); 3636 } 3637 mxge_close(sc, 1); 3638 } 3639 /* Restore PCI configuration space */ 3640 dinfo = device_get_ivars(sc->dev); 3641 pci_cfg_restore(sc->dev, dinfo); 3642 3643 /* And redo any changes we made to our config space */ 3644 mxge_setup_cfg_space(sc); 3645 3646 /* Reload f/w */ 3647 err = mxge_load_firmware(sc, 0); 3648 if (err) 3649 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3650 if (running && !err) { 3651 int i; 3652 3653 err = mxge_open(sc); 3654 3655 for (i = 0; i < sc->num_tx_rings; ++i) 3656 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3657 } 3658 sc->watchdog_resets++; 3659 } else { 3660 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3661 err = 0; 3662 } 3663 if (err) { 3664 if_printf(sc->ifp, "watchdog reset failed\n"); 3665 } else { 3666 if (sc->dying == 2) 3667 sc->dying = 0; 3668 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3669 } 3670 } 3671 3672 static void 3673 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3674 { 3675 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3676 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3677 tx->req, tx->done, tx->queue_active); 3678 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3679 tx->activate, tx->deactivate); 3680 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3681 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3682 } 3683 3684 static u_long 3685 mxge_update_stats(mxge_softc_t *sc) 3686 { 3687 u_long ipackets, opackets, pkts; 3688 3689 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3690 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3691 3692 pkts = ipackets - sc->ipackets; 3693 pkts += opackets - sc->opackets; 3694 3695 sc->ipackets = ipackets; 3696 sc->opackets = opackets; 3697 3698 return pkts; 3699 } 3700 3701 static void 3702 mxge_tick(void *arg) 3703 { 3704 mxge_softc_t *sc = arg; 3705 u_long pkts = 0; 3706 int err = 0; 3707 int ticks; 3708 3709 lwkt_serialize_enter(&sc->main_serialize); 3710 3711 ticks = mxge_ticks; 3712 if (sc->ifp->if_flags & IFF_RUNNING) { 3713 /* Aggregate stats from different slices */ 3714 pkts = mxge_update_stats(sc); 3715 if (sc->need_media_probe) 3716 mxge_media_probe(sc); 3717 } 3718 if (pkts == 0) { 3719 uint16_t cmd; 3720 3721 /* Ensure NIC did not suffer h/w fault while idle */ 3722 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3723 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3724 sc->dying = 2; 3725 mxge_serialize_skipmain(sc); 3726 mxge_watchdog_reset(sc); 3727 mxge_deserialize_skipmain(sc); 3728 err = ENXIO; 3729 } 3730 3731 /* Look less often if NIC is idle */ 3732 ticks *= 4; 3733 } 3734 3735 if (err == 0) 3736 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3737 3738 lwkt_serialize_exit(&sc->main_serialize); 3739 } 3740 3741 static int 3742 mxge_media_change(struct ifnet *ifp) 3743 { 3744 mxge_softc_t *sc = ifp->if_softc; 3745 const struct ifmedia *ifm = &sc->media; 3746 int pause; 3747 3748 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { 3749 if (sc->pause) 3750 return 0; 3751 pause = 1; 3752 } else { 3753 if (!sc->pause) 3754 return 0; 3755 pause = 0; 3756 } 3757 return mxge_change_pause(sc, pause); 3758 } 3759 3760 static int 3761 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3762 { 3763 struct ifnet *ifp = sc->ifp; 3764 int real_mtu, old_mtu; 3765 int err = 0; 3766 3767 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3768 if (mtu > sc->max_mtu || real_mtu < 60) 3769 return EINVAL; 3770 3771 old_mtu = ifp->if_mtu; 3772 ifp->if_mtu = mtu; 3773 if (ifp->if_flags & IFF_RUNNING) { 3774 mxge_close(sc, 0); 3775 err = mxge_open(sc); 3776 if (err != 0) { 3777 ifp->if_mtu = old_mtu; 3778 mxge_close(sc, 0); 3779 mxge_open(sc); 3780 } 3781 } 3782 return err; 3783 } 3784 3785 static void 3786 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3787 { 3788 mxge_softc_t *sc = ifp->if_softc; 3789 3790 ifmr->ifm_status = IFM_AVALID; 3791 ifmr->ifm_active = IFM_ETHER; 3792 3793 if (sc->link_state) 3794 ifmr->ifm_status |= IFM_ACTIVE; 3795 3796 /* 3797 * Autoselect is not supported, so the current media 3798 * should be delivered. 3799 */ 3800 ifmr->ifm_active |= sc->current_media; 3801 if (sc->current_media != IFM_NONE) { 3802 ifmr->ifm_active |= MXGE_IFM; 3803 if (sc->pause) 3804 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 3805 } 3806 } 3807 3808 static int 3809 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3810 struct ucred *cr __unused) 3811 { 3812 mxge_softc_t *sc = ifp->if_softc; 3813 struct ifreq *ifr = (struct ifreq *)data; 3814 int err, mask; 3815 3816 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3817 err = 0; 3818 3819 switch (command) { 3820 case SIOCSIFMTU: 3821 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3822 break; 3823 3824 case SIOCSIFFLAGS: 3825 if (sc->dying) 3826 return EINVAL; 3827 3828 if (ifp->if_flags & IFF_UP) { 3829 if (!(ifp->if_flags & IFF_RUNNING)) { 3830 err = mxge_open(sc); 3831 } else { 3832 /* 3833 * Take care of PROMISC and ALLMULTI 3834 * flag changes 3835 */ 3836 mxge_change_promisc(sc, 3837 ifp->if_flags & IFF_PROMISC); 3838 mxge_set_multicast_list(sc); 3839 } 3840 } else { 3841 if (ifp->if_flags & IFF_RUNNING) 3842 mxge_close(sc, 0); 3843 } 3844 break; 3845 3846 case SIOCADDMULTI: 3847 case SIOCDELMULTI: 3848 mxge_set_multicast_list(sc); 3849 break; 3850 3851 case SIOCSIFCAP: 3852 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3853 if (mask & IFCAP_TXCSUM) { 3854 ifp->if_capenable ^= IFCAP_TXCSUM; 3855 if (ifp->if_capenable & IFCAP_TXCSUM) 3856 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3857 else 3858 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3859 } 3860 if (mask & IFCAP_TSO) { 3861 ifp->if_capenable ^= IFCAP_TSO; 3862 if (ifp->if_capenable & IFCAP_TSO) 3863 ifp->if_hwassist |= CSUM_TSO; 3864 else 3865 ifp->if_hwassist &= ~CSUM_TSO; 3866 } 3867 if (mask & IFCAP_RXCSUM) 3868 ifp->if_capenable ^= IFCAP_RXCSUM; 3869 if (mask & IFCAP_VLAN_HWTAGGING) 3870 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3871 break; 3872 3873 case SIOCGIFMEDIA: 3874 case SIOCSIFMEDIA: 3875 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3876 &sc->media, command); 3877 break; 3878 3879 default: 3880 err = ether_ioctl(ifp, command, data); 3881 break; 3882 } 3883 return err; 3884 } 3885 3886 static void 3887 mxge_fetch_tunables(mxge_softc_t *sc) 3888 { 3889 int ifm; 3890 3891 sc->intr_coal_delay = mxge_intr_coal_delay; 3892 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3893 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3894 3895 /* XXX */ 3896 if (mxge_ticks == 0) 3897 mxge_ticks = hz / 2; 3898 3899 ifm = ifmedia_str2ethfc(mxge_flowctrl); 3900 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) 3901 sc->pause = 1; 3902 3903 sc->use_rss = mxge_use_rss; 3904 3905 sc->throttle = mxge_throttle; 3906 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3907 sc->throttle = MXGE_MAX_THROTTLE; 3908 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3909 sc->throttle = MXGE_MIN_THROTTLE; 3910 } 3911 3912 static void 3913 mxge_free_slices(mxge_softc_t *sc) 3914 { 3915 struct mxge_slice_state *ss; 3916 int i; 3917 3918 if (sc->ss == NULL) 3919 return; 3920 3921 for (i = 0; i < sc->num_slices; i++) { 3922 ss = &sc->ss[i]; 3923 if (ss->fw_stats != NULL) { 3924 mxge_dma_free(&ss->fw_stats_dma); 3925 ss->fw_stats = NULL; 3926 } 3927 if (ss->rx_data.rx_done.entry != NULL) { 3928 mxge_dma_free(&ss->rx_done_dma); 3929 ss->rx_data.rx_done.entry = NULL; 3930 } 3931 } 3932 kfree(sc->ss, M_DEVBUF); 3933 sc->ss = NULL; 3934 } 3935 3936 static int 3937 mxge_alloc_slices(mxge_softc_t *sc) 3938 { 3939 mxge_cmd_t cmd; 3940 struct mxge_slice_state *ss; 3941 size_t bytes; 3942 int err, i, rx_ring_size; 3943 3944 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3945 if (err != 0) { 3946 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3947 return err; 3948 } 3949 rx_ring_size = cmd.data0; 3950 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3951 3952 bytes = sizeof(*sc->ss) * sc->num_slices; 3953 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3954 3955 for (i = 0; i < sc->num_slices; i++) { 3956 ss = &sc->ss[i]; 3957 3958 ss->sc = sc; 3959 3960 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3961 lwkt_serialize_init(&ss->tx.tx_serialize); 3962 ss->intr_rid = -1; 3963 3964 /* 3965 * Allocate per-slice rx interrupt queue 3966 * XXX assume 4bytes mcp_slot 3967 */ 3968 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3969 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3970 if (err != 0) { 3971 device_printf(sc->dev, 3972 "alloc %d slice rx_done failed\n", i); 3973 return err; 3974 } 3975 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3976 3977 /* 3978 * Allocate the per-slice firmware stats 3979 */ 3980 bytes = sizeof(*ss->fw_stats); 3981 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3982 sizeof(*ss->fw_stats), 64); 3983 if (err != 0) { 3984 device_printf(sc->dev, 3985 "alloc %d fw_stats failed\n", i); 3986 return err; 3987 } 3988 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3989 } 3990 return 0; 3991 } 3992 3993 static void 3994 mxge_slice_probe(mxge_softc_t *sc) 3995 { 3996 int status, max_intr_slots, max_slices, num_slices; 3997 int msix_cnt, msix_enable, i, multi_tx; 3998 mxge_cmd_t cmd; 3999 const char *old_fw; 4000 4001 sc->num_slices = 1; 4002 sc->num_tx_rings = 1; 4003 4004 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 4005 if (num_slices == 1) 4006 return; 4007 4008 if (ncpus2 == 1) 4009 return; 4010 4011 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4012 mxge_msix_enable); 4013 if (!msix_enable) 4014 return; 4015 4016 msix_cnt = pci_msix_count(sc->dev); 4017 if (msix_cnt < 2) 4018 return; 4019 4020 /* 4021 * Round down MSI-X vector count to the nearest power of 2 4022 */ 4023 i = 0; 4024 while ((1 << (i + 1)) <= msix_cnt) 4025 ++i; 4026 msix_cnt = 1 << i; 4027 4028 /* 4029 * Now load the slice aware firmware see what it supports 4030 */ 4031 old_fw = sc->fw_name; 4032 if (old_fw == mxge_fw_aligned) 4033 sc->fw_name = mxge_fw_rss_aligned; 4034 else 4035 sc->fw_name = mxge_fw_rss_unaligned; 4036 status = mxge_load_firmware(sc, 0); 4037 if (status != 0) { 4038 device_printf(sc->dev, "Falling back to a single slice\n"); 4039 return; 4040 } 4041 4042 /* 4043 * Try to send a reset command to the card to see if it is alive 4044 */ 4045 memset(&cmd, 0, sizeof(cmd)); 4046 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4047 if (status != 0) { 4048 device_printf(sc->dev, "failed reset\n"); 4049 goto abort_with_fw; 4050 } 4051 4052 /* 4053 * Get rx ring size to calculate rx interrupt queue size 4054 */ 4055 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4056 if (status != 0) { 4057 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4058 goto abort_with_fw; 4059 } 4060 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4061 4062 /* 4063 * Tell it the size of the rx interrupt queue 4064 */ 4065 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4066 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4067 if (status != 0) { 4068 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4069 goto abort_with_fw; 4070 } 4071 4072 /* 4073 * Ask the maximum number of slices it supports 4074 */ 4075 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4076 if (status != 0) { 4077 device_printf(sc->dev, 4078 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4079 goto abort_with_fw; 4080 } 4081 max_slices = cmd.data0; 4082 4083 /* 4084 * Round down max slices count to the nearest power of 2 4085 */ 4086 i = 0; 4087 while ((1 << (i + 1)) <= max_slices) 4088 ++i; 4089 max_slices = 1 << i; 4090 4091 if (max_slices > msix_cnt) 4092 max_slices = msix_cnt; 4093 4094 sc->num_slices = num_slices; 4095 sc->num_slices = if_ring_count2(sc->num_slices, max_slices); 4096 4097 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4098 if (multi_tx) 4099 sc->num_tx_rings = sc->num_slices; 4100 4101 if (bootverbose) { 4102 device_printf(sc->dev, "using %d slices, max %d\n", 4103 sc->num_slices, max_slices); 4104 } 4105 4106 if (sc->num_slices == 1) 4107 goto abort_with_fw; 4108 return; 4109 4110 abort_with_fw: 4111 sc->fw_name = old_fw; 4112 mxge_load_firmware(sc, 0); 4113 } 4114 4115 static void 4116 mxge_setup_serialize(struct mxge_softc *sc) 4117 { 4118 int i = 0, slice; 4119 4120 /* Main + rx + tx */ 4121 sc->nserialize = (2 * sc->num_slices) + 1; 4122 sc->serializes = 4123 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4124 M_DEVBUF, M_WAITOK | M_ZERO); 4125 4126 /* 4127 * Setup serializes 4128 * 4129 * NOTE: Order is critical 4130 */ 4131 4132 KKASSERT(i < sc->nserialize); 4133 sc->serializes[i++] = &sc->main_serialize; 4134 4135 for (slice = 0; slice < sc->num_slices; ++slice) { 4136 KKASSERT(i < sc->nserialize); 4137 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4138 } 4139 4140 for (slice = 0; slice < sc->num_slices; ++slice) { 4141 KKASSERT(i < sc->nserialize); 4142 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4143 } 4144 4145 KKASSERT(i == sc->nserialize); 4146 } 4147 4148 static void 4149 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4150 { 4151 struct mxge_softc *sc = ifp->if_softc; 4152 4153 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4154 } 4155 4156 static void 4157 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4158 { 4159 struct mxge_softc *sc = ifp->if_softc; 4160 4161 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4162 } 4163 4164 static int 4165 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4166 { 4167 struct mxge_softc *sc = ifp->if_softc; 4168 4169 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4170 } 4171 4172 #ifdef INVARIANTS 4173 4174 static void 4175 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4176 boolean_t serialized) 4177 { 4178 struct mxge_softc *sc = ifp->if_softc; 4179 4180 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4181 slz, serialized); 4182 } 4183 4184 #endif /* INVARIANTS */ 4185 4186 #ifdef IFPOLL_ENABLE 4187 4188 static void 4189 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4190 { 4191 struct mxge_slice_state *ss = xss; 4192 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4193 4194 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4195 4196 if (rx_done->entry[rx_done->idx].length != 0) { 4197 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4198 } else { 4199 /* 4200 * XXX 4201 * This register writting obviously has cost, 4202 * however, if we don't hand back the rx token, 4203 * the upcoming packets may suffer rediculously 4204 * large delay, as observed on 8AL-C using ping(8). 4205 */ 4206 *ss->irq_claim = be32toh(3); 4207 } 4208 } 4209 4210 static void 4211 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4212 { 4213 struct mxge_softc *sc = ifp->if_softc; 4214 int i; 4215 4216 if (info == NULL) 4217 return; 4218 4219 /* 4220 * Only poll rx; polling tx and status don't seem to work 4221 */ 4222 for (i = 0; i < sc->num_slices; ++i) { 4223 struct mxge_slice_state *ss = &sc->ss[i]; 4224 int idx = ss->intr_cpuid; 4225 4226 KKASSERT(idx < ncpus2); 4227 info->ifpi_rx[idx].poll_func = mxge_npoll_rx; 4228 info->ifpi_rx[idx].arg = ss; 4229 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize; 4230 } 4231 } 4232 4233 #endif /* IFPOLL_ENABLE */ 4234 4235 static int 4236 mxge_attach(device_t dev) 4237 { 4238 mxge_softc_t *sc = device_get_softc(dev); 4239 struct ifnet *ifp = &sc->arpcom.ac_if; 4240 int err, rid, i; 4241 4242 /* 4243 * Avoid rewriting half the lines in this file to use 4244 * &sc->arpcom.ac_if instead 4245 */ 4246 sc->ifp = ifp; 4247 sc->dev = dev; 4248 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4249 4250 /* IFM_ETH_FORCEPAUSE can't be changed */ 4251 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 4252 mxge_media_change, mxge_media_status); 4253 4254 lwkt_serialize_init(&sc->main_serialize); 4255 4256 mxge_fetch_tunables(sc); 4257 4258 err = bus_dma_tag_create(NULL, /* parent */ 4259 1, /* alignment */ 4260 0, /* boundary */ 4261 BUS_SPACE_MAXADDR, /* low */ 4262 BUS_SPACE_MAXADDR, /* high */ 4263 NULL, NULL, /* filter */ 4264 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4265 0, /* num segs */ 4266 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4267 0, /* flags */ 4268 &sc->parent_dmat); /* tag */ 4269 if (err != 0) { 4270 device_printf(dev, "Err %d allocating parent dmat\n", err); 4271 goto failed; 4272 } 4273 4274 callout_init_mp(&sc->co_hdl); 4275 4276 mxge_setup_cfg_space(sc); 4277 4278 /* 4279 * Map the board into the kernel 4280 */ 4281 rid = PCIR_BARS; 4282 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4283 &rid, RF_ACTIVE); 4284 if (sc->mem_res == NULL) { 4285 device_printf(dev, "could not map memory\n"); 4286 err = ENXIO; 4287 goto failed; 4288 } 4289 4290 sc->sram = rman_get_virtual(sc->mem_res); 4291 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4292 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4293 device_printf(dev, "impossible memory region size %ld\n", 4294 rman_get_size(sc->mem_res)); 4295 err = ENXIO; 4296 goto failed; 4297 } 4298 4299 /* 4300 * Make NULL terminated copy of the EEPROM strings section of 4301 * lanai SRAM 4302 */ 4303 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4304 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4305 rman_get_bushandle(sc->mem_res), 4306 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4307 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4308 err = mxge_parse_strings(sc); 4309 if (err != 0) { 4310 device_printf(dev, "parse EEPROM string failed\n"); 4311 goto failed; 4312 } 4313 4314 /* 4315 * Enable write combining for efficient use of PCIe bus 4316 */ 4317 mxge_enable_wc(sc); 4318 4319 /* 4320 * Allocate the out of band DMA memory 4321 */ 4322 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4323 if (err != 0) { 4324 device_printf(dev, "alloc cmd DMA buf failed\n"); 4325 goto failed; 4326 } 4327 sc->cmd = sc->cmd_dma.dmem_addr; 4328 4329 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4330 if (err != 0) { 4331 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4332 goto failed; 4333 } 4334 4335 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4336 if (err != 0) { 4337 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4338 goto failed; 4339 } 4340 4341 /* Select & load the firmware */ 4342 err = mxge_select_firmware(sc); 4343 if (err != 0) { 4344 device_printf(dev, "select firmware failed\n"); 4345 goto failed; 4346 } 4347 4348 mxge_slice_probe(sc); 4349 err = mxge_alloc_slices(sc); 4350 if (err != 0) { 4351 device_printf(dev, "alloc slices failed\n"); 4352 goto failed; 4353 } 4354 4355 err = mxge_alloc_intr(sc); 4356 if (err != 0) { 4357 device_printf(dev, "alloc intr failed\n"); 4358 goto failed; 4359 } 4360 4361 /* Setup serializes */ 4362 mxge_setup_serialize(sc); 4363 4364 err = mxge_reset(sc, 0); 4365 if (err != 0) { 4366 device_printf(dev, "reset failed\n"); 4367 goto failed; 4368 } 4369 4370 err = mxge_alloc_rings(sc); 4371 if (err != 0) { 4372 device_printf(dev, "failed to allocate rings\n"); 4373 goto failed; 4374 } 4375 4376 ifp->if_baudrate = IF_Gbps(10UL); 4377 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4378 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4379 4380 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4381 #if 0 4382 /* Well, its software, sigh */ 4383 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4384 #endif 4385 ifp->if_capenable = ifp->if_capabilities; 4386 4387 ifp->if_softc = sc; 4388 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4389 ifp->if_init = mxge_init; 4390 ifp->if_ioctl = mxge_ioctl; 4391 ifp->if_start = mxge_start; 4392 #ifdef IFPOLL_ENABLE 4393 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4394 ifp->if_npoll = mxge_npoll; 4395 #endif 4396 ifp->if_serialize = mxge_serialize; 4397 ifp->if_deserialize = mxge_deserialize; 4398 ifp->if_tryserialize = mxge_tryserialize; 4399 #ifdef INVARIANTS 4400 ifp->if_serialize_assert = mxge_serialize_assert; 4401 #endif 4402 4403 /* Increase TSO burst length */ 4404 ifp->if_tsolen = (32 * ETHERMTU); 4405 4406 /* Initialise the ifmedia structure */ 4407 mxge_media_init(sc); 4408 mxge_media_probe(sc); 4409 4410 ether_ifattach(ifp, sc->mac_addr, NULL); 4411 4412 /* Setup TX rings and subqueues */ 4413 for (i = 0; i < sc->num_tx_rings; ++i) { 4414 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4415 struct mxge_slice_state *ss = &sc->ss[i]; 4416 4417 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4418 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4419 ifsq_set_priv(ifsq, &ss->tx); 4420 ss->tx.ifsq = ifsq; 4421 4422 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4423 } 4424 4425 /* 4426 * XXX 4427 * We are not ready to do "gather" jumbo frame, so 4428 * limit MTU to MJUMPAGESIZE 4429 */ 4430 sc->max_mtu = MJUMPAGESIZE - 4431 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4432 sc->dying = 0; 4433 4434 err = mxge_setup_intr(sc); 4435 if (err != 0) { 4436 device_printf(dev, "alloc and setup intr failed\n"); 4437 ether_ifdetach(ifp); 4438 goto failed; 4439 } 4440 4441 mxge_add_sysctls(sc); 4442 4443 /* Increase non-cluster mbuf limit; used by small RX rings */ 4444 mb_inclimit(ifp->if_nmbclusters); 4445 4446 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4447 sc->ss[0].intr_cpuid); 4448 return 0; 4449 4450 failed: 4451 mxge_detach(dev); 4452 return err; 4453 } 4454 4455 static int 4456 mxge_detach(device_t dev) 4457 { 4458 mxge_softc_t *sc = device_get_softc(dev); 4459 4460 if (device_is_attached(dev)) { 4461 struct ifnet *ifp = sc->ifp; 4462 int mblimit = ifp->if_nmbclusters; 4463 4464 ifnet_serialize_all(ifp); 4465 4466 sc->dying = 1; 4467 if (ifp->if_flags & IFF_RUNNING) 4468 mxge_close(sc, 1); 4469 callout_stop(&sc->co_hdl); 4470 4471 mxge_teardown_intr(sc, sc->num_slices); 4472 4473 ifnet_deserialize_all(ifp); 4474 4475 callout_terminate(&sc->co_hdl); 4476 4477 ether_ifdetach(ifp); 4478 4479 /* Decrease non-cluster mbuf limit increased by us */ 4480 mb_inclimit(-mblimit); 4481 } 4482 ifmedia_removeall(&sc->media); 4483 4484 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4485 sc->sram != NULL) 4486 mxge_dummy_rdma(sc, 0); 4487 4488 mxge_free_intr(sc); 4489 mxge_rem_sysctls(sc); 4490 mxge_free_rings(sc); 4491 4492 /* MUST after sysctls, intr and rings are freed */ 4493 mxge_free_slices(sc); 4494 4495 if (sc->dmabench_dma.dmem_addr != NULL) 4496 mxge_dma_free(&sc->dmabench_dma); 4497 if (sc->zeropad_dma.dmem_addr != NULL) 4498 mxge_dma_free(&sc->zeropad_dma); 4499 if (sc->cmd_dma.dmem_addr != NULL) 4500 mxge_dma_free(&sc->cmd_dma); 4501 4502 if (sc->msix_table_res != NULL) { 4503 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4504 sc->msix_table_res); 4505 } 4506 if (sc->mem_res != NULL) { 4507 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4508 sc->mem_res); 4509 } 4510 4511 if (sc->parent_dmat != NULL) 4512 bus_dma_tag_destroy(sc->parent_dmat); 4513 4514 return 0; 4515 } 4516 4517 static int 4518 mxge_shutdown(device_t dev) 4519 { 4520 return 0; 4521 } 4522 4523 static void 4524 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4525 { 4526 int i; 4527 4528 KKASSERT(sc->num_slices > 1); 4529 4530 for (i = 0; i < sc->num_slices; ++i) { 4531 struct mxge_slice_state *ss = &sc->ss[i]; 4532 4533 if (ss->intr_res != NULL) { 4534 bus_release_resource(sc->dev, SYS_RES_IRQ, 4535 ss->intr_rid, ss->intr_res); 4536 } 4537 if (ss->intr_rid >= 0) 4538 pci_release_msix_vector(sc->dev, ss->intr_rid); 4539 } 4540 if (setup) 4541 pci_teardown_msix(sc->dev); 4542 } 4543 4544 static int 4545 mxge_alloc_msix(struct mxge_softc *sc) 4546 { 4547 struct mxge_slice_state *ss; 4548 int offset, rid, error, i; 4549 boolean_t setup = FALSE; 4550 4551 KKASSERT(sc->num_slices > 1); 4552 4553 if (sc->num_slices == ncpus2) { 4554 offset = 0; 4555 } else { 4556 int offset_def; 4557 4558 offset_def = (sc->num_slices * device_get_unit(sc->dev)) % 4559 ncpus2; 4560 4561 offset = device_getenv_int(sc->dev, "msix.offset", offset_def); 4562 if (offset >= ncpus2 || 4563 offset % sc->num_slices != 0) { 4564 device_printf(sc->dev, "invalid msix.offset %d, " 4565 "use %d\n", offset, offset_def); 4566 offset = offset_def; 4567 } 4568 } 4569 4570 ss = &sc->ss[0]; 4571 4572 ss->intr_serialize = &sc->main_serialize; 4573 ss->intr_func = mxge_msi; 4574 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4575 "%s comb", device_get_nameunit(sc->dev)); 4576 ss->intr_desc = ss->intr_desc0; 4577 ss->intr_cpuid = offset; 4578 4579 for (i = 1; i < sc->num_slices; ++i) { 4580 ss = &sc->ss[i]; 4581 4582 ss->intr_serialize = &ss->rx_data.rx_serialize; 4583 if (sc->num_tx_rings == 1) { 4584 ss->intr_func = mxge_msix_rx; 4585 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4586 "%s rx", device_get_nameunit(sc->dev)); 4587 } else { 4588 ss->intr_func = mxge_msix_rxtx; 4589 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4590 "%s rxtx", device_get_nameunit(sc->dev)); 4591 } 4592 ss->intr_desc = ss->intr_desc0; 4593 ss->intr_cpuid = offset + i; 4594 } 4595 4596 rid = PCIR_BAR(2); 4597 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4598 &rid, RF_ACTIVE); 4599 if (sc->msix_table_res == NULL) { 4600 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4601 return ENXIO; 4602 } 4603 4604 error = pci_setup_msix(sc->dev); 4605 if (error) { 4606 device_printf(sc->dev, "could not setup MSI-X\n"); 4607 goto back; 4608 } 4609 setup = TRUE; 4610 4611 for (i = 0; i < sc->num_slices; ++i) { 4612 ss = &sc->ss[i]; 4613 4614 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4615 ss->intr_cpuid); 4616 if (error) { 4617 device_printf(sc->dev, "could not alloc " 4618 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4619 goto back; 4620 } 4621 4622 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4623 &ss->intr_rid, RF_ACTIVE); 4624 if (ss->intr_res == NULL) { 4625 device_printf(sc->dev, "could not alloc " 4626 "MSI-X %d resource\n", i); 4627 error = ENXIO; 4628 goto back; 4629 } 4630 } 4631 4632 pci_enable_msix(sc->dev); 4633 sc->intr_type = PCI_INTR_TYPE_MSIX; 4634 back: 4635 if (error) 4636 mxge_free_msix(sc, setup); 4637 return error; 4638 } 4639 4640 static int 4641 mxge_alloc_intr(struct mxge_softc *sc) 4642 { 4643 struct mxge_slice_state *ss; 4644 u_int irq_flags; 4645 4646 if (sc->num_slices > 1) { 4647 int error; 4648 4649 error = mxge_alloc_msix(sc); 4650 if (error) 4651 return error; 4652 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4653 return 0; 4654 } 4655 4656 ss = &sc->ss[0]; 4657 4658 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4659 &ss->intr_rid, &irq_flags); 4660 4661 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4662 &ss->intr_rid, irq_flags); 4663 if (ss->intr_res == NULL) { 4664 device_printf(sc->dev, "could not alloc interrupt\n"); 4665 return ENXIO; 4666 } 4667 4668 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4669 ss->intr_func = mxge_legacy; 4670 else 4671 ss->intr_func = mxge_msi; 4672 ss->intr_serialize = &sc->main_serialize; 4673 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4674 4675 return 0; 4676 } 4677 4678 static int 4679 mxge_setup_intr(struct mxge_softc *sc) 4680 { 4681 int i; 4682 4683 for (i = 0; i < sc->num_slices; ++i) { 4684 struct mxge_slice_state *ss = &sc->ss[i]; 4685 int error; 4686 4687 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4688 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4689 ss->intr_serialize, ss->intr_desc); 4690 if (error) { 4691 device_printf(sc->dev, "can't setup %dth intr\n", i); 4692 mxge_teardown_intr(sc, i); 4693 return error; 4694 } 4695 } 4696 return 0; 4697 } 4698 4699 static void 4700 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4701 { 4702 int i; 4703 4704 if (sc->ss == NULL) 4705 return; 4706 4707 for (i = 0; i < cnt; ++i) { 4708 struct mxge_slice_state *ss = &sc->ss[i]; 4709 4710 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4711 } 4712 } 4713 4714 static void 4715 mxge_free_intr(struct mxge_softc *sc) 4716 { 4717 if (sc->ss == NULL) 4718 return; 4719 4720 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4721 struct mxge_slice_state *ss = &sc->ss[0]; 4722 4723 if (ss->intr_res != NULL) { 4724 bus_release_resource(sc->dev, SYS_RES_IRQ, 4725 ss->intr_rid, ss->intr_res); 4726 } 4727 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4728 pci_release_msi(sc->dev); 4729 } else { 4730 mxge_free_msix(sc, TRUE); 4731 } 4732 } 4733