1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 #include <net/if_poll.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/vlan/if_vlan_var.h> 62 #include <net/zlib.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/tcp.h> 69 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <bus/pci/pcireg.h> 74 #include <bus/pci/pcivar.h> 75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386__) || defined(__x86_64__) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/netif/mxge/mxge_mcp.h> 85 #include <dev/netif/mxge/mcp_gen_header.h> 86 #include <dev/netif/mxge/if_mxge_var.h> 87 88 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 89 #define MXGE_HWRSS_KEYLEN 16 90 91 /* Tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_ticks; 98 static int mxge_num_slices = 0; 99 static int mxge_always_promisc = 0; 100 static int mxge_throttle = 0; 101 static int mxge_msi_enable = 1; 102 static int mxge_msix_enable = 1; 103 static int mxge_multi_tx = 1; 104 /* 105 * Don't use RSS by default, its just too slow 106 */ 107 static int mxge_use_rss = 0; 108 109 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 115 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control); 116 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 117 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 118 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 119 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 120 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 121 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 122 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 123 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 124 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 125 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 126 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 127 128 static int mxge_probe(device_t dev); 129 static int mxge_attach(device_t dev); 130 static int mxge_detach(device_t dev); 131 static int mxge_shutdown(device_t dev); 132 133 static int mxge_alloc_intr(struct mxge_softc *sc); 134 static void mxge_free_intr(struct mxge_softc *sc); 135 static int mxge_setup_intr(struct mxge_softc *sc); 136 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 137 138 static device_method_t mxge_methods[] = { 139 /* Device interface */ 140 DEVMETHOD(device_probe, mxge_probe), 141 DEVMETHOD(device_attach, mxge_attach), 142 DEVMETHOD(device_detach, mxge_detach), 143 DEVMETHOD(device_shutdown, mxge_shutdown), 144 DEVMETHOD_END 145 }; 146 147 static driver_t mxge_driver = { 148 "mxge", 149 mxge_methods, 150 sizeof(mxge_softc_t), 151 }; 152 153 static devclass_t mxge_devclass; 154 155 /* Declare ourselves to be a child of the PCI bus.*/ 156 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 157 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 158 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 159 160 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 161 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 162 static void mxge_close(mxge_softc_t *sc, int down); 163 static int mxge_open(mxge_softc_t *sc); 164 static void mxge_tick(void *arg); 165 static void mxge_watchdog_reset(mxge_softc_t *sc); 166 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 167 168 static int 169 mxge_probe(device_t dev) 170 { 171 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 173 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 174 int rev = pci_get_revid(dev); 175 176 switch (rev) { 177 case MXGE_PCI_REV_Z8E: 178 device_set_desc(dev, "Myri10G-PCIE-8A"); 179 break; 180 case MXGE_PCI_REV_Z8ES: 181 device_set_desc(dev, "Myri10G-PCIE-8B"); 182 break; 183 default: 184 device_set_desc(dev, "Myri10G-PCIE-8??"); 185 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 186 break; 187 } 188 return 0; 189 } 190 return ENXIO; 191 } 192 193 static void 194 mxge_enable_wc(mxge_softc_t *sc) 195 { 196 #if defined(__i386__) || defined(__x86_64__) 197 vm_offset_t len; 198 199 sc->wc = 1; 200 len = rman_get_size(sc->mem_res); 201 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 202 PAT_WRITE_COMBINING); 203 #endif 204 } 205 206 static int 207 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 208 bus_size_t alignment) 209 { 210 bus_size_t boundary; 211 int err; 212 213 if (bytes > 4096 && alignment == 4096) 214 boundary = 0; 215 else 216 boundary = 4096; 217 218 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 219 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 220 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 221 if (err != 0) { 222 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 223 return err; 224 } 225 return 0; 226 } 227 228 static void 229 mxge_dma_free(bus_dmamem_t *dma) 230 { 231 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 232 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 233 bus_dma_tag_destroy(dma->dmem_tag); 234 } 235 236 /* 237 * The eeprom strings on the lanaiX have the format 238 * SN=x\0 239 * MAC=x:x:x:x:x:x\0 240 * PC=text\0 241 */ 242 static int 243 mxge_parse_strings(mxge_softc_t *sc) 244 { 245 const char *ptr; 246 int i, found_mac, found_sn2; 247 char *endptr; 248 249 ptr = sc->eeprom_strings; 250 found_mac = 0; 251 found_sn2 = 0; 252 while (*ptr != '\0') { 253 if (strncmp(ptr, "MAC=", 4) == 0) { 254 ptr += 4; 255 for (i = 0;;) { 256 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 257 if (endptr - ptr != 2) 258 goto abort; 259 ptr = endptr; 260 if (++i == 6) 261 break; 262 if (*ptr++ != ':') 263 goto abort; 264 } 265 found_mac = 1; 266 } else if (strncmp(ptr, "PC=", 3) == 0) { 267 ptr += 3; 268 strlcpy(sc->product_code_string, ptr, 269 sizeof(sc->product_code_string)); 270 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 271 ptr += 3; 272 strlcpy(sc->serial_number_string, ptr, 273 sizeof(sc->serial_number_string)); 274 } else if (strncmp(ptr, "SN2=", 4) == 0) { 275 /* SN2 takes precedence over SN */ 276 ptr += 4; 277 found_sn2 = 1; 278 strlcpy(sc->serial_number_string, ptr, 279 sizeof(sc->serial_number_string)); 280 } 281 while (*ptr++ != '\0') {} 282 } 283 284 if (found_mac) 285 return 0; 286 287 abort: 288 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 289 return ENXIO; 290 } 291 292 #if defined(__i386__) || defined(__x86_64__) 293 294 static void 295 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 296 { 297 uint32_t val; 298 unsigned long base, off; 299 char *va, *cfgptr; 300 device_t pdev, mcp55; 301 uint16_t vendor_id, device_id, word; 302 uintptr_t bus, slot, func, ivend, idev; 303 uint32_t *ptr32; 304 305 if (!mxge_nvidia_ecrc_enable) 306 return; 307 308 pdev = device_get_parent(device_get_parent(sc->dev)); 309 if (pdev == NULL) { 310 device_printf(sc->dev, "could not find parent?\n"); 311 return; 312 } 313 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 314 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 315 316 if (vendor_id != 0x10de) 317 return; 318 319 base = 0; 320 321 if (device_id == 0x005d) { 322 /* ck804, base address is magic */ 323 base = 0xe0000000UL; 324 } else if (device_id >= 0x0374 && device_id <= 0x378) { 325 /* mcp55, base address stored in chipset */ 326 mcp55 = pci_find_bsf(0, 0, 0); 327 if (mcp55 && 328 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 329 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 330 word = pci_read_config(mcp55, 0x90, 2); 331 base = ((unsigned long)word & 0x7ffeU) << 25; 332 } 333 } 334 if (!base) 335 return; 336 337 /* 338 * XXXX 339 * Test below is commented because it is believed that doing 340 * config read/write beyond 0xff will access the config space 341 * for the next larger function. Uncomment this and remove 342 * the hacky pmap_mapdev() way of accessing config space when 343 * DragonFly grows support for extended pcie config space access. 344 */ 345 #if 0 346 /* 347 * See if we can, by some miracle, access the extended 348 * config space 349 */ 350 val = pci_read_config(pdev, 0x178, 4); 351 if (val != 0xffffffff) { 352 val |= 0x40; 353 pci_write_config(pdev, 0x178, val, 4); 354 return; 355 } 356 #endif 357 /* 358 * Rather than using normal pci config space writes, we must 359 * map the Nvidia config space ourselves. This is because on 360 * opteron/nvidia class machine the 0xe000000 mapping is 361 * handled by the nvidia chipset, that means the internal PCI 362 * device (the on-chip northbridge), or the amd-8131 bridge 363 * and things behind them are not visible by this method. 364 */ 365 366 BUS_READ_IVAR(device_get_parent(pdev), pdev, 367 PCI_IVAR_BUS, &bus); 368 BUS_READ_IVAR(device_get_parent(pdev), pdev, 369 PCI_IVAR_SLOT, &slot); 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_FUNCTION, &func); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_VENDOR, &ivend); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_DEVICE, &idev); 376 377 off = base + 0x00100000UL * (unsigned long)bus + 378 0x00001000UL * (unsigned long)(func + 8 * slot); 379 380 /* map it into the kernel */ 381 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 382 if (va == NULL) { 383 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 384 return; 385 } 386 /* get a pointer to the config space mapped into the kernel */ 387 cfgptr = va + (off & PAGE_MASK); 388 389 /* make sure that we can really access it */ 390 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 391 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 392 if (!(vendor_id == ivend && device_id == idev)) { 393 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 394 vendor_id, device_id); 395 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 396 return; 397 } 398 399 ptr32 = (uint32_t*)(cfgptr + 0x178); 400 val = *ptr32; 401 402 if (val == 0xffffffff) { 403 device_printf(sc->dev, "extended mapping failed\n"); 404 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 405 return; 406 } 407 *ptr32 = val | 0x40; 408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 409 if (bootverbose) { 410 device_printf(sc->dev, "Enabled ECRC on upstream " 411 "Nvidia bridge at %d:%d:%d\n", 412 (int)bus, (int)slot, (int)func); 413 } 414 } 415 416 #else /* __i386__ || __x86_64__ */ 417 418 static void 419 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 420 { 421 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 422 } 423 424 #endif 425 426 static int 427 mxge_dma_test(mxge_softc_t *sc, int test_type) 428 { 429 mxge_cmd_t cmd; 430 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 431 int status; 432 uint32_t len; 433 const char *test = " "; 434 435 /* 436 * Run a small DMA test. 437 * The magic multipliers to the length tell the firmware 438 * to do DMA read, write, or read+write tests. The 439 * results are returned in cmd.data0. The upper 16 440 * bits of the return is the number of transfers completed. 441 * The lower 16 bits is the time in 0.5us ticks that the 442 * transfers took to complete. 443 */ 444 445 len = sc->tx_boundary; 446 447 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 448 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 449 cmd.data2 = len * 0x10000; 450 status = mxge_send_cmd(sc, test_type, &cmd); 451 if (status != 0) { 452 test = "read"; 453 goto abort; 454 } 455 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 456 457 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 458 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 459 cmd.data2 = len * 0x1; 460 status = mxge_send_cmd(sc, test_type, &cmd); 461 if (status != 0) { 462 test = "write"; 463 goto abort; 464 } 465 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 466 467 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 468 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 469 cmd.data2 = len * 0x10001; 470 status = mxge_send_cmd(sc, test_type, &cmd); 471 if (status != 0) { 472 test = "read/write"; 473 goto abort; 474 } 475 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 476 (cmd.data0 & 0xffff); 477 478 abort: 479 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 480 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 481 test, status); 482 } 483 return status; 484 } 485 486 /* 487 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 488 * when the PCI-E Completion packets are aligned on an 8-byte 489 * boundary. Some PCI-E chip sets always align Completion packets; on 490 * the ones that do not, the alignment can be enforced by enabling 491 * ECRC generation (if supported). 492 * 493 * When PCI-E Completion packets are not aligned, it is actually more 494 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 495 * 496 * If the driver can neither enable ECRC nor verify that it has 497 * already been enabled, then it must use a firmware image which works 498 * around unaligned completion packets (ethp_z8e.dat), and it should 499 * also ensure that it never gives the device a Read-DMA which is 500 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 501 * enabled, then the driver should use the aligned (eth_z8e.dat) 502 * firmware image, and set tx_boundary to 4KB. 503 */ 504 static int 505 mxge_firmware_probe(mxge_softc_t *sc) 506 { 507 device_t dev = sc->dev; 508 int reg, status; 509 uint16_t pectl; 510 511 sc->tx_boundary = 4096; 512 513 /* 514 * Verify the max read request size was set to 4KB 515 * before trying the test with 4KB. 516 */ 517 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 518 pectl = pci_read_config(dev, reg + 0x8, 2); 519 if ((pectl & (5 << 12)) != (5 << 12)) { 520 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 521 pectl); 522 sc->tx_boundary = 2048; 523 } 524 } 525 526 /* 527 * Load the optimized firmware (which assumes aligned PCIe 528 * completions) in order to see if it works on this host. 529 */ 530 sc->fw_name = mxge_fw_aligned; 531 status = mxge_load_firmware(sc, 1); 532 if (status != 0) 533 return status; 534 535 /* 536 * Enable ECRC if possible 537 */ 538 mxge_enable_nvidia_ecrc(sc); 539 540 /* 541 * Run a DMA test which watches for unaligned completions and 542 * aborts on the first one seen. Not required on Z8ES or newer. 543 */ 544 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 545 return 0; 546 547 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 548 if (status == 0) 549 return 0; /* keep the aligned firmware */ 550 551 if (status != E2BIG) 552 device_printf(dev, "DMA test failed: %d\n", status); 553 if (status == ENOSYS) { 554 device_printf(dev, "Falling back to ethp! " 555 "Please install up to date fw\n"); 556 } 557 return status; 558 } 559 560 static int 561 mxge_select_firmware(mxge_softc_t *sc) 562 { 563 int aligned = 0; 564 int force_firmware = mxge_force_firmware; 565 566 if (sc->throttle) 567 force_firmware = sc->throttle; 568 569 if (force_firmware != 0) { 570 if (force_firmware == 1) 571 aligned = 1; 572 else 573 aligned = 0; 574 if (bootverbose) { 575 device_printf(sc->dev, 576 "Assuming %s completions (forced)\n", 577 aligned ? "aligned" : "unaligned"); 578 } 579 goto abort; 580 } 581 582 /* 583 * If the PCIe link width is 4 or less, we can use the aligned 584 * firmware and skip any checks 585 */ 586 if (sc->link_width != 0 && sc->link_width <= 4) { 587 device_printf(sc->dev, "PCIe x%d Link, " 588 "expect reduced performance\n", sc->link_width); 589 aligned = 1; 590 goto abort; 591 } 592 593 if (mxge_firmware_probe(sc) == 0) 594 return 0; 595 596 abort: 597 if (aligned) { 598 sc->fw_name = mxge_fw_aligned; 599 sc->tx_boundary = 4096; 600 } else { 601 sc->fw_name = mxge_fw_unaligned; 602 sc->tx_boundary = 2048; 603 } 604 return mxge_load_firmware(sc, 0); 605 } 606 607 static int 608 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 609 { 610 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 611 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 612 be32toh(hdr->mcp_type)); 613 return EIO; 614 } 615 616 /* Save firmware version for sysctl */ 617 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 618 if (bootverbose) 619 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 620 621 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 622 &sc->fw_ver_minor, &sc->fw_ver_tiny); 623 624 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 625 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 626 if_printf(sc->ifp, "Found firmware version %s\n", 627 sc->fw_version); 628 if_printf(sc->ifp, "Driver needs %d.%d\n", 629 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 630 return EINVAL; 631 } 632 return 0; 633 } 634 635 static void * 636 z_alloc(void *nil, u_int items, u_int size) 637 { 638 return kmalloc(items * size, M_TEMP, M_WAITOK); 639 } 640 641 static void 642 z_free(void *nil, void *ptr) 643 { 644 kfree(ptr, M_TEMP); 645 } 646 647 static int 648 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 649 { 650 z_stream zs; 651 char *inflate_buffer; 652 const struct firmware *fw; 653 const mcp_gen_header_t *hdr; 654 unsigned hdr_offset; 655 int status; 656 unsigned int i; 657 char dummy; 658 size_t fw_len; 659 660 fw = firmware_get(sc->fw_name); 661 if (fw == NULL) { 662 if_printf(sc->ifp, "Could not find firmware image %s\n", 663 sc->fw_name); 664 return ENOENT; 665 } 666 667 /* Setup zlib and decompress f/w */ 668 bzero(&zs, sizeof(zs)); 669 zs.zalloc = z_alloc; 670 zs.zfree = z_free; 671 status = inflateInit(&zs); 672 if (status != Z_OK) { 673 status = EIO; 674 goto abort_with_fw; 675 } 676 677 /* 678 * The uncompressed size is stored as the firmware version, 679 * which would otherwise go unused 680 */ 681 fw_len = (size_t)fw->version; 682 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 683 zs.avail_in = fw->datasize; 684 zs.next_in = __DECONST(char *, fw->data); 685 zs.avail_out = fw_len; 686 zs.next_out = inflate_buffer; 687 status = inflate(&zs, Z_FINISH); 688 if (status != Z_STREAM_END) { 689 if_printf(sc->ifp, "zlib %d\n", status); 690 status = EIO; 691 goto abort_with_buffer; 692 } 693 694 /* Check id */ 695 hdr_offset = 696 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 697 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 698 if_printf(sc->ifp, "Bad firmware file"); 699 status = EIO; 700 goto abort_with_buffer; 701 } 702 hdr = (const void*)(inflate_buffer + hdr_offset); 703 704 status = mxge_validate_firmware(sc, hdr); 705 if (status != 0) 706 goto abort_with_buffer; 707 708 /* Copy the inflated firmware to NIC SRAM. */ 709 for (i = 0; i < fw_len; i += 256) { 710 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 711 min(256U, (unsigned)(fw_len - i))); 712 wmb(); 713 dummy = *sc->sram; 714 wmb(); 715 } 716 717 *limit = fw_len; 718 status = 0; 719 abort_with_buffer: 720 kfree(inflate_buffer, M_TEMP); 721 inflateEnd(&zs); 722 abort_with_fw: 723 firmware_put(fw, FIRMWARE_UNLOAD); 724 return status; 725 } 726 727 /* 728 * Enable or disable periodic RDMAs from the host to make certain 729 * chipsets resend dropped PCIe messages 730 */ 731 static void 732 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 733 { 734 char buf_bytes[72]; 735 volatile uint32_t *confirm; 736 volatile char *submit; 737 uint32_t *buf, dma_low, dma_high; 738 int i; 739 740 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 741 742 /* Clear confirmation addr */ 743 confirm = (volatile uint32_t *)sc->cmd; 744 *confirm = 0; 745 wmb(); 746 747 /* 748 * Send an rdma command to the PCIe engine, and wait for the 749 * response in the confirmation address. The firmware should 750 * write a -1 there to indicate it is alive and well 751 */ 752 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 753 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 754 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 755 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 756 buf[2] = htobe32(0xffffffff); /* confirm data */ 757 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 758 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 759 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 760 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 761 buf[5] = htobe32(enable); /* enable? */ 762 763 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 764 765 mxge_pio_copy(submit, buf, 64); 766 wmb(); 767 DELAY(1000); 768 wmb(); 769 i = 0; 770 while (*confirm != 0xffffffff && i < 20) { 771 DELAY(1000); 772 i++; 773 } 774 if (*confirm != 0xffffffff) { 775 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 776 (enable ? "enable" : "disable"), confirm, *confirm); 777 } 778 } 779 780 static int 781 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 782 { 783 mcp_cmd_t *buf; 784 char buf_bytes[sizeof(*buf) + 8]; 785 volatile mcp_cmd_response_t *response = sc->cmd; 786 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 787 uint32_t dma_low, dma_high; 788 int err, sleep_total = 0; 789 790 /* Ensure buf is aligned to 8 bytes */ 791 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 792 793 buf->data0 = htobe32(data->data0); 794 buf->data1 = htobe32(data->data1); 795 buf->data2 = htobe32(data->data2); 796 buf->cmd = htobe32(cmd); 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 799 800 buf->response_addr.low = htobe32(dma_low); 801 buf->response_addr.high = htobe32(dma_high); 802 803 response->result = 0xffffffff; 804 wmb(); 805 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 806 807 /* 808 * Wait up to 20ms 809 */ 810 err = EAGAIN; 811 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 812 wmb(); 813 switch (be32toh(response->result)) { 814 case 0: 815 data->data0 = be32toh(response->data); 816 err = 0; 817 break; 818 case 0xffffffff: 819 DELAY(1000); 820 break; 821 case MXGEFW_CMD_UNKNOWN: 822 err = ENOSYS; 823 break; 824 case MXGEFW_CMD_ERROR_UNALIGNED: 825 err = E2BIG; 826 break; 827 case MXGEFW_CMD_ERROR_BUSY: 828 err = EBUSY; 829 break; 830 case MXGEFW_CMD_ERROR_I2C_ABSENT: 831 err = ENXIO; 832 break; 833 default: 834 if_printf(sc->ifp, "command %d failed, result = %d\n", 835 cmd, be32toh(response->result)); 836 err = ENXIO; 837 break; 838 } 839 if (err != EAGAIN) 840 break; 841 } 842 if (err == EAGAIN) { 843 if_printf(sc->ifp, "command %d timed out result = %d\n", 844 cmd, be32toh(response->result)); 845 } 846 return err; 847 } 848 849 static int 850 mxge_adopt_running_firmware(mxge_softc_t *sc) 851 { 852 struct mcp_gen_header *hdr; 853 const size_t bytes = sizeof(struct mcp_gen_header); 854 size_t hdr_offset; 855 int status; 856 857 /* 858 * Find running firmware header 859 */ 860 hdr_offset = 861 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 862 863 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 864 if_printf(sc->ifp, "Running firmware has bad header offset " 865 "(%zu)\n", hdr_offset); 866 return EIO; 867 } 868 869 /* 870 * Copy header of running firmware from SRAM to host memory to 871 * validate firmware 872 */ 873 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 874 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 875 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 876 status = mxge_validate_firmware(sc, hdr); 877 kfree(hdr, M_DEVBUF); 878 879 /* 880 * Check to see if adopted firmware has bug where adopting 881 * it will cause broadcasts to be filtered unless the NIC 882 * is kept in ALLMULTI mode 883 */ 884 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 885 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 886 sc->adopted_rx_filter_bug = 1; 887 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 888 "working around rx filter bug\n", 889 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 890 } 891 892 return status; 893 } 894 895 static int 896 mxge_load_firmware(mxge_softc_t *sc, int adopt) 897 { 898 volatile uint32_t *confirm; 899 volatile char *submit; 900 char buf_bytes[72]; 901 uint32_t *buf, size, dma_low, dma_high; 902 int status, i; 903 904 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 905 906 size = sc->sram_size; 907 status = mxge_load_firmware_helper(sc, &size); 908 if (status) { 909 if (!adopt) 910 return status; 911 912 /* 913 * Try to use the currently running firmware, if 914 * it is new enough 915 */ 916 status = mxge_adopt_running_firmware(sc); 917 if (status) { 918 if_printf(sc->ifp, 919 "failed to adopt running firmware\n"); 920 return status; 921 } 922 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 923 924 if (sc->tx_boundary == 4096) { 925 if_printf(sc->ifp, 926 "Using firmware currently running on NIC. " 927 "For optimal\n"); 928 if_printf(sc->ifp, "performance consider loading " 929 "optimized firmware\n"); 930 } 931 sc->fw_name = mxge_fw_unaligned; 932 sc->tx_boundary = 2048; 933 return 0; 934 } 935 936 /* Clear confirmation addr */ 937 confirm = (volatile uint32_t *)sc->cmd; 938 *confirm = 0; 939 wmb(); 940 941 /* 942 * Send a reload command to the bootstrap MCP, and wait for the 943 * response in the confirmation address. The firmware should 944 * write a -1 there to indicate it is alive and well 945 */ 946 947 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 948 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 949 950 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 951 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 952 buf[2] = htobe32(0xffffffff); /* confirm data */ 953 954 /* 955 * FIX: All newest firmware should un-protect the bottom of 956 * the sram before handoff. However, the very first interfaces 957 * do not. Therefore the handoff copy must skip the first 8 bytes 958 */ 959 /* where the code starts*/ 960 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 961 buf[4] = htobe32(size - 8); /* length of code */ 962 buf[5] = htobe32(8); /* where to copy to */ 963 buf[6] = htobe32(0); /* where to jump to */ 964 965 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 966 mxge_pio_copy(submit, buf, 64); 967 wmb(); 968 DELAY(1000); 969 wmb(); 970 i = 0; 971 while (*confirm != 0xffffffff && i < 20) { 972 DELAY(1000*10); 973 i++; 974 } 975 if (*confirm != 0xffffffff) { 976 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 977 confirm, *confirm); 978 return ENXIO; 979 } 980 return 0; 981 } 982 983 static int 984 mxge_update_mac_address(mxge_softc_t *sc) 985 { 986 mxge_cmd_t cmd; 987 uint8_t *addr = sc->mac_addr; 988 989 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 990 (addr[2] << 8) | addr[3]; 991 cmd.data1 = (addr[4] << 8) | (addr[5]); 992 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 993 } 994 995 static int 996 mxge_change_pause(mxge_softc_t *sc, int pause) 997 { 998 mxge_cmd_t cmd; 999 int status; 1000 1001 if (pause) 1002 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1003 else 1004 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1005 if (status) { 1006 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1007 return ENXIO; 1008 } 1009 sc->pause = pause; 1010 return 0; 1011 } 1012 1013 static void 1014 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1015 { 1016 mxge_cmd_t cmd; 1017 int status; 1018 1019 if (mxge_always_promisc) 1020 promisc = 1; 1021 1022 if (promisc) 1023 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1024 else 1025 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1026 if (status) 1027 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1028 } 1029 1030 static void 1031 mxge_set_multicast_list(mxge_softc_t *sc) 1032 { 1033 mxge_cmd_t cmd; 1034 struct ifmultiaddr *ifma; 1035 struct ifnet *ifp = sc->ifp; 1036 int err; 1037 1038 /* This firmware is known to not support multicast */ 1039 if (!sc->fw_multicast_support) 1040 return; 1041 1042 /* Disable multicast filtering while we play with the lists*/ 1043 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1044 if (err != 0) { 1045 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1046 "error status: %d\n", err); 1047 return; 1048 } 1049 1050 if (sc->adopted_rx_filter_bug) 1051 return; 1052 1053 if (ifp->if_flags & IFF_ALLMULTI) { 1054 /* Request to disable multicast filtering, so quit here */ 1055 return; 1056 } 1057 1058 /* Flush all the filters */ 1059 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1060 if (err != 0) { 1061 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1062 "error status: %d\n", err); 1063 return; 1064 } 1065 1066 /* 1067 * Walk the multicast list, and add each address 1068 */ 1069 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1070 if (ifma->ifma_addr->sa_family != AF_LINK) 1071 continue; 1072 1073 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1074 &cmd.data0, 4); 1075 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1076 &cmd.data1, 2); 1077 cmd.data0 = htonl(cmd.data0); 1078 cmd.data1 = htonl(cmd.data1); 1079 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1080 if (err != 0) { 1081 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1082 "error status: %d\n", err); 1083 /* Abort, leaving multicast filtering off */ 1084 return; 1085 } 1086 } 1087 1088 /* Enable multicast filtering */ 1089 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1090 if (err != 0) { 1091 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1092 "error status: %d\n", err); 1093 } 1094 } 1095 1096 #if 0 1097 static int 1098 mxge_max_mtu(mxge_softc_t *sc) 1099 { 1100 mxge_cmd_t cmd; 1101 int status; 1102 1103 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1104 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1105 1106 /* try to set nbufs to see if it we can 1107 use virtually contiguous jumbos */ 1108 cmd.data0 = 0; 1109 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1110 &cmd); 1111 if (status == 0) 1112 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1113 1114 /* otherwise, we're limited to MJUMPAGESIZE */ 1115 return MJUMPAGESIZE - MXGEFW_PAD; 1116 } 1117 #endif 1118 1119 static int 1120 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1121 { 1122 struct mxge_slice_state *ss; 1123 mxge_rx_done_t *rx_done; 1124 volatile uint32_t *irq_claim; 1125 mxge_cmd_t cmd; 1126 int slice, status, rx_intr_size; 1127 1128 /* 1129 * Try to send a reset command to the card to see if it 1130 * is alive 1131 */ 1132 memset(&cmd, 0, sizeof (cmd)); 1133 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1134 if (status != 0) { 1135 if_printf(sc->ifp, "failed reset\n"); 1136 return ENXIO; 1137 } 1138 1139 mxge_dummy_rdma(sc, 1); 1140 1141 /* 1142 * Set the intrq size 1143 * XXX assume 4byte mcp_slot 1144 */ 1145 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1146 cmd.data0 = rx_intr_size; 1147 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1148 1149 /* 1150 * Even though we already know how many slices are supported 1151 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1152 * has magic side effects, and must be called after a reset. 1153 * It must be called prior to calling any RSS related cmds, 1154 * including assigning an interrupt queue for anything but 1155 * slice 0. It must also be called *after* 1156 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1157 * the firmware to compute offsets. 1158 */ 1159 if (sc->num_slices > 1) { 1160 /* Ask the maximum number of slices it supports */ 1161 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1162 if (status != 0) { 1163 if_printf(sc->ifp, "failed to get number of slices\n"); 1164 return status; 1165 } 1166 1167 /* 1168 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1169 * to setting up the interrupt queue DMA 1170 */ 1171 cmd.data0 = sc->num_slices; 1172 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1173 if (sc->num_tx_rings > 1) 1174 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1175 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1176 if (status != 0) { 1177 if_printf(sc->ifp, "failed to set number of slices\n"); 1178 return status; 1179 } 1180 } 1181 1182 if (interrupts_setup) { 1183 /* Now exchange information about interrupts */ 1184 for (slice = 0; slice < sc->num_slices; slice++) { 1185 ss = &sc->ss[slice]; 1186 1187 rx_done = &ss->rx_data.rx_done; 1188 memset(rx_done->entry, 0, rx_intr_size); 1189 1190 cmd.data0 = 1191 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1192 cmd.data1 = 1193 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1194 cmd.data2 = slice; 1195 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1196 &cmd); 1197 } 1198 } 1199 1200 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1201 &cmd); 1202 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1203 1204 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1205 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1208 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1209 1210 if (status != 0) { 1211 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1212 return status; 1213 } 1214 1215 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1216 1217 /* Run a DMA benchmark */ 1218 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1219 1220 for (slice = 0; slice < sc->num_slices; slice++) { 1221 ss = &sc->ss[slice]; 1222 1223 ss->irq_claim = irq_claim + (2 * slice); 1224 1225 /* Reset mcp/driver shared state back to 0 */ 1226 ss->rx_data.rx_done.idx = 0; 1227 ss->tx.req = 0; 1228 ss->tx.done = 0; 1229 ss->tx.pkt_done = 0; 1230 ss->tx.queue_active = 0; 1231 ss->tx.activate = 0; 1232 ss->tx.deactivate = 0; 1233 ss->rx_data.rx_big.cnt = 0; 1234 ss->rx_data.rx_small.cnt = 0; 1235 if (ss->fw_stats != NULL) 1236 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1237 } 1238 sc->rdma_tags_available = 15; 1239 1240 status = mxge_update_mac_address(sc); 1241 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1242 mxge_change_pause(sc, sc->pause); 1243 mxge_set_multicast_list(sc); 1244 1245 if (sc->throttle) { 1246 cmd.data0 = sc->throttle; 1247 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1248 if_printf(sc->ifp, "can't enable throttle\n"); 1249 } 1250 return status; 1251 } 1252 1253 static int 1254 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1255 { 1256 mxge_cmd_t cmd; 1257 mxge_softc_t *sc; 1258 int err; 1259 unsigned int throttle; 1260 1261 sc = arg1; 1262 throttle = sc->throttle; 1263 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1264 if (err != 0) 1265 return err; 1266 1267 if (throttle == sc->throttle) 1268 return 0; 1269 1270 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1271 return EINVAL; 1272 1273 ifnet_serialize_all(sc->ifp); 1274 1275 cmd.data0 = throttle; 1276 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1277 if (err == 0) 1278 sc->throttle = throttle; 1279 1280 ifnet_deserialize_all(sc->ifp); 1281 return err; 1282 } 1283 1284 static int 1285 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1286 { 1287 mxge_softc_t *sc; 1288 int err, use_rss; 1289 1290 sc = arg1; 1291 use_rss = sc->use_rss; 1292 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1293 if (err != 0) 1294 return err; 1295 1296 if (use_rss == sc->use_rss) 1297 return 0; 1298 1299 ifnet_serialize_all(sc->ifp); 1300 1301 sc->use_rss = use_rss; 1302 if (sc->ifp->if_flags & IFF_RUNNING) { 1303 mxge_close(sc, 0); 1304 mxge_open(sc); 1305 } 1306 1307 ifnet_deserialize_all(sc->ifp); 1308 return err; 1309 } 1310 1311 static int 1312 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1313 { 1314 mxge_softc_t *sc; 1315 unsigned int intr_coal_delay; 1316 int err; 1317 1318 sc = arg1; 1319 intr_coal_delay = sc->intr_coal_delay; 1320 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1321 if (err != 0) 1322 return err; 1323 1324 if (intr_coal_delay == sc->intr_coal_delay) 1325 return 0; 1326 1327 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1328 return EINVAL; 1329 1330 ifnet_serialize_all(sc->ifp); 1331 1332 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1333 sc->intr_coal_delay = intr_coal_delay; 1334 1335 ifnet_deserialize_all(sc->ifp); 1336 return err; 1337 } 1338 1339 static int 1340 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1341 { 1342 mxge_softc_t *sc; 1343 unsigned int enabled; 1344 int err; 1345 1346 sc = arg1; 1347 enabled = sc->pause; 1348 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1349 if (err != 0) 1350 return err; 1351 1352 if (enabled == sc->pause) 1353 return 0; 1354 1355 ifnet_serialize_all(sc->ifp); 1356 err = mxge_change_pause(sc, enabled); 1357 ifnet_deserialize_all(sc->ifp); 1358 1359 return err; 1360 } 1361 1362 static int 1363 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1364 { 1365 int err; 1366 1367 if (arg1 == NULL) 1368 return EFAULT; 1369 arg2 = be32toh(*(int *)arg1); 1370 arg1 = NULL; 1371 err = sysctl_handle_int(oidp, arg1, arg2, req); 1372 1373 return err; 1374 } 1375 1376 static void 1377 mxge_rem_sysctls(mxge_softc_t *sc) 1378 { 1379 if (sc->ss != NULL) { 1380 struct mxge_slice_state *ss; 1381 int slice; 1382 1383 for (slice = 0; slice < sc->num_slices; slice++) { 1384 ss = &sc->ss[slice]; 1385 if (ss->sysctl_tree != NULL) { 1386 sysctl_ctx_free(&ss->sysctl_ctx); 1387 ss->sysctl_tree = NULL; 1388 } 1389 } 1390 } 1391 1392 if (sc->slice_sysctl_tree != NULL) { 1393 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1394 sc->slice_sysctl_tree = NULL; 1395 } 1396 1397 if (sc->sysctl_tree != NULL) { 1398 sysctl_ctx_free(&sc->sysctl_ctx); 1399 sc->sysctl_tree = NULL; 1400 } 1401 } 1402 1403 static void 1404 mxge_add_sysctls(mxge_softc_t *sc) 1405 { 1406 struct sysctl_ctx_list *ctx; 1407 struct sysctl_oid_list *children; 1408 mcp_irq_data_t *fw; 1409 struct mxge_slice_state *ss; 1410 int slice; 1411 char slice_num[8]; 1412 1413 ctx = &sc->sysctl_ctx; 1414 sysctl_ctx_init(ctx); 1415 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1416 OID_AUTO, device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1417 if (sc->sysctl_tree == NULL) { 1418 device_printf(sc->dev, "can't add sysctl node\n"); 1419 return; 1420 } 1421 1422 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1423 fw = sc->ss[0].fw_stats; 1424 1425 /* 1426 * Random information 1427 */ 1428 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1429 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1430 1431 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1432 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1433 1434 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1435 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1436 1437 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1438 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1439 1440 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1441 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1442 1443 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1444 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1445 1446 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1447 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1448 1449 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1450 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1451 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1453 CTLFLAG_RD, &sc->read_write_dma, 0, 1454 "DMA concurrent Read/Write speed in MB/s"); 1455 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1457 CTLFLAG_RD, &sc->watchdog_resets, 0, 1458 "Number of times NIC was reset"); 1459 1460 /* 1461 * Performance related tunables 1462 */ 1463 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1464 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1465 "Interrupt coalescing delay in usecs"); 1466 1467 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1468 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1469 "Transmit throttling"); 1470 1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", 1472 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", 1473 "Interrupt coalescing delay in usecs"); 1474 1475 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1476 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1477 "Use RSS"); 1478 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1480 CTLFLAG_RW, &mxge_deassert_wait, 0, 1481 "Wait for IRQ line to go low in ihandler"); 1482 1483 /* 1484 * Stats block from firmware is in network byte order. 1485 * Need to swap it 1486 */ 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1488 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1489 mxge_handle_be32, "I", "link up"); 1490 1491 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1492 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1493 mxge_handle_be32, "I", "rdma_tags_available"); 1494 1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1496 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1497 mxge_handle_be32, "I", "dropped_bad_crc32"); 1498 1499 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1500 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1501 mxge_handle_be32, "I", "dropped_bad_phy"); 1502 1503 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1504 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1505 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1506 1507 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1508 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1509 mxge_handle_be32, "I", "dropped_link_overflow"); 1510 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1512 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1513 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1514 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1516 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1517 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1518 1519 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1520 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1521 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1522 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1524 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1525 mxge_handle_be32, "I", "dropped_overrun"); 1526 1527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1528 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1529 mxge_handle_be32, "I", "dropped_pause"); 1530 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1532 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1533 mxge_handle_be32, "I", "dropped_runt"); 1534 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1536 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1537 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1538 1539 /* add counters exported for debugging from all slices */ 1540 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1541 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1542 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1543 if (sc->slice_sysctl_tree == NULL) { 1544 device_printf(sc->dev, "can't add slice sysctl node\n"); 1545 return; 1546 } 1547 1548 for (slice = 0; slice < sc->num_slices; slice++) { 1549 ss = &sc->ss[slice]; 1550 sysctl_ctx_init(&ss->sysctl_ctx); 1551 ctx = &ss->sysctl_ctx; 1552 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1553 ksprintf(slice_num, "%d", slice); 1554 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1555 slice_num, CTLFLAG_RD, 0, ""); 1556 if (ss->sysctl_tree == NULL) { 1557 device_printf(sc->dev, 1558 "can't add %d slice sysctl node\n", slice); 1559 return; /* XXX continue? */ 1560 } 1561 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1562 1563 /* 1564 * XXX change to ULONG 1565 */ 1566 1567 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1568 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1569 1570 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1571 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1572 1573 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1574 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1575 1576 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1577 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1578 1579 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1580 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1581 1582 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1583 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1584 1585 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1586 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1587 1588 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1589 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1590 } 1591 } 1592 1593 /* 1594 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1595 * backwards one at a time and handle ring wraps 1596 */ 1597 static __inline void 1598 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1599 mcp_kreq_ether_send_t *src, int cnt) 1600 { 1601 int idx, starting_slot; 1602 1603 starting_slot = tx->req; 1604 while (cnt > 1) { 1605 cnt--; 1606 idx = (starting_slot + cnt) & tx->mask; 1607 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1608 wmb(); 1609 } 1610 } 1611 1612 /* 1613 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1614 * at most 32 bytes at a time, so as to avoid involving the software 1615 * pio handler in the nic. We re-write the first segment's flags 1616 * to mark them valid only after writing the entire chain 1617 */ 1618 static __inline void 1619 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1620 { 1621 int idx, i; 1622 uint32_t *src_ints; 1623 volatile uint32_t *dst_ints; 1624 mcp_kreq_ether_send_t *srcp; 1625 volatile mcp_kreq_ether_send_t *dstp, *dst; 1626 uint8_t last_flags; 1627 1628 idx = tx->req & tx->mask; 1629 1630 last_flags = src->flags; 1631 src->flags = 0; 1632 wmb(); 1633 dst = dstp = &tx->lanai[idx]; 1634 srcp = src; 1635 1636 if ((idx + cnt) < tx->mask) { 1637 for (i = 0; i < cnt - 1; i += 2) { 1638 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1639 wmb(); /* force write every 32 bytes */ 1640 srcp += 2; 1641 dstp += 2; 1642 } 1643 } else { 1644 /* 1645 * Submit all but the first request, and ensure 1646 * that it is submitted below 1647 */ 1648 mxge_submit_req_backwards(tx, src, cnt); 1649 i = 0; 1650 } 1651 if (i < cnt) { 1652 /* Submit the first request */ 1653 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1654 wmb(); /* barrier before setting valid flag */ 1655 } 1656 1657 /* Re-write the last 32-bits with the valid flags */ 1658 src->flags = last_flags; 1659 src_ints = (uint32_t *)src; 1660 src_ints+=3; 1661 dst_ints = (volatile uint32_t *)dst; 1662 dst_ints+=3; 1663 *dst_ints = *src_ints; 1664 tx->req += cnt; 1665 wmb(); 1666 } 1667 1668 static int 1669 mxge_pullup_tso(struct mbuf **mp) 1670 { 1671 int hoff, iphlen, thoff; 1672 struct mbuf *m; 1673 1674 m = *mp; 1675 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1676 1677 iphlen = m->m_pkthdr.csum_iphlen; 1678 thoff = m->m_pkthdr.csum_thlen; 1679 hoff = m->m_pkthdr.csum_lhlen; 1680 1681 KASSERT(iphlen > 0, ("invalid ip hlen")); 1682 KASSERT(thoff > 0, ("invalid tcp hlen")); 1683 KASSERT(hoff > 0, ("invalid ether hlen")); 1684 1685 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1686 m = m_pullup(m, hoff + iphlen + thoff); 1687 if (m == NULL) { 1688 *mp = NULL; 1689 return ENOBUFS; 1690 } 1691 *mp = m; 1692 } 1693 return 0; 1694 } 1695 1696 static int 1697 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1698 struct mbuf *m, int busdma_seg_cnt) 1699 { 1700 mcp_kreq_ether_send_t *req; 1701 bus_dma_segment_t *seg; 1702 uint32_t low, high_swapped; 1703 int len, seglen, cum_len, cum_len_next; 1704 int next_is_first, chop, cnt, rdma_count, small; 1705 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1706 uint8_t flags, flags_next; 1707 struct mxge_buffer_state *info_last; 1708 bus_dmamap_t map = info_map->map; 1709 1710 mss = m->m_pkthdr.tso_segsz; 1711 1712 /* 1713 * Negative cum_len signifies to the send loop that we are 1714 * still in the header portion of the TSO packet. 1715 */ 1716 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1717 m->m_pkthdr.csum_thlen); 1718 1719 /* 1720 * TSO implies checksum offload on this hardware 1721 */ 1722 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1723 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1724 1725 /* 1726 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1727 * out where to put the checksum by parsing the header. 1728 */ 1729 pseudo_hdr_offset = htobe16(mss); 1730 1731 req = tx->req_list; 1732 seg = tx->seg_list; 1733 cnt = 0; 1734 rdma_count = 0; 1735 1736 /* 1737 * "rdma_count" is the number of RDMAs belonging to the current 1738 * packet BEFORE the current send request. For non-TSO packets, 1739 * this is equal to "count". 1740 * 1741 * For TSO packets, rdma_count needs to be reset to 0 after a 1742 * segment cut. 1743 * 1744 * The rdma_count field of the send request is the number of 1745 * RDMAs of the packet starting at that request. For TSO send 1746 * requests with one ore more cuts in the middle, this is the 1747 * number of RDMAs starting after the last cut in the request. 1748 * All previous segments before the last cut implicitly have 1 1749 * RDMA. 1750 * 1751 * Since the number of RDMAs is not known beforehand, it must be 1752 * filled-in retroactively - after each segmentation cut or at 1753 * the end of the entire packet. 1754 */ 1755 1756 while (busdma_seg_cnt) { 1757 /* 1758 * Break the busdma segment up into pieces 1759 */ 1760 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1761 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1762 len = seg->ds_len; 1763 1764 while (len) { 1765 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1766 seglen = len; 1767 cum_len_next = cum_len + seglen; 1768 (req - rdma_count)->rdma_count = rdma_count + 1; 1769 if (__predict_true(cum_len >= 0)) { 1770 /* Payload */ 1771 chop = (cum_len_next > mss); 1772 cum_len_next = cum_len_next % mss; 1773 next_is_first = (cum_len_next == 0); 1774 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1775 flags_next |= 1776 next_is_first * MXGEFW_FLAGS_FIRST; 1777 rdma_count |= -(chop | next_is_first); 1778 rdma_count += chop & !next_is_first; 1779 } else if (cum_len_next >= 0) { 1780 /* Header ends */ 1781 rdma_count = -1; 1782 cum_len_next = 0; 1783 seglen = -cum_len; 1784 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1785 flags_next = MXGEFW_FLAGS_TSO_PLD | 1786 MXGEFW_FLAGS_FIRST | 1787 (small * MXGEFW_FLAGS_SMALL); 1788 } 1789 1790 req->addr_high = high_swapped; 1791 req->addr_low = htobe32(low); 1792 req->pseudo_hdr_offset = pseudo_hdr_offset; 1793 req->pad = 0; 1794 req->rdma_count = 1; 1795 req->length = htobe16(seglen); 1796 req->cksum_offset = cksum_offset; 1797 req->flags = 1798 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1799 low += seglen; 1800 len -= seglen; 1801 cum_len = cum_len_next; 1802 flags = flags_next; 1803 req++; 1804 cnt++; 1805 rdma_count++; 1806 if (__predict_false(cksum_offset > seglen)) 1807 cksum_offset -= seglen; 1808 else 1809 cksum_offset = 0; 1810 if (__predict_false(cnt > tx->max_desc)) 1811 goto drop; 1812 } 1813 busdma_seg_cnt--; 1814 seg++; 1815 } 1816 (req - rdma_count)->rdma_count = rdma_count; 1817 1818 do { 1819 req--; 1820 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1821 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1822 1823 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1824 1825 info_map->map = info_last->map; 1826 info_last->map = map; 1827 info_last->m = m; 1828 1829 mxge_submit_req(tx, tx->req_list, cnt); 1830 1831 if (tx->send_go != NULL && tx->queue_active == 0) { 1832 /* Tell the NIC to start polling this slice */ 1833 *tx->send_go = 1; 1834 tx->queue_active = 1; 1835 tx->activate++; 1836 wmb(); 1837 } 1838 return 0; 1839 1840 drop: 1841 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1842 m_freem(m); 1843 return ENOBUFS; 1844 } 1845 1846 static int 1847 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1848 { 1849 mcp_kreq_ether_send_t *req; 1850 bus_dma_segment_t *seg; 1851 bus_dmamap_t map; 1852 int cnt, cum_len, err, i, idx, odd_flag; 1853 uint16_t pseudo_hdr_offset; 1854 uint8_t flags, cksum_offset; 1855 struct mxge_buffer_state *info_map, *info_last; 1856 1857 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1858 err = mxge_pullup_tso(&m); 1859 if (__predict_false(err)) 1860 return err; 1861 } 1862 1863 /* 1864 * Map the frame for DMA 1865 */ 1866 idx = tx->req & tx->mask; 1867 info_map = &tx->info[idx]; 1868 map = info_map->map; 1869 1870 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1871 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1872 if (__predict_false(err != 0)) 1873 goto drop; 1874 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1875 1876 /* 1877 * TSO is different enough, we handle it in another routine 1878 */ 1879 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1880 return mxge_encap_tso(tx, info_map, m, cnt); 1881 1882 req = tx->req_list; 1883 cksum_offset = 0; 1884 pseudo_hdr_offset = 0; 1885 flags = MXGEFW_FLAGS_NO_TSO; 1886 1887 /* 1888 * Checksum offloading 1889 */ 1890 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1891 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1892 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1893 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1894 req->cksum_offset = cksum_offset; 1895 flags |= MXGEFW_FLAGS_CKSUM; 1896 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1897 } else { 1898 odd_flag = 0; 1899 } 1900 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1901 flags |= MXGEFW_FLAGS_SMALL; 1902 1903 /* 1904 * Convert segments into a request list 1905 */ 1906 cum_len = 0; 1907 seg = tx->seg_list; 1908 req->flags = MXGEFW_FLAGS_FIRST; 1909 for (i = 0; i < cnt; i++) { 1910 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1911 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1912 req->length = htobe16(seg->ds_len); 1913 req->cksum_offset = cksum_offset; 1914 if (cksum_offset > seg->ds_len) 1915 cksum_offset -= seg->ds_len; 1916 else 1917 cksum_offset = 0; 1918 req->pseudo_hdr_offset = pseudo_hdr_offset; 1919 req->pad = 0; /* complete solid 16-byte block */ 1920 req->rdma_count = 1; 1921 req->flags |= flags | ((cum_len & 1) * odd_flag); 1922 cum_len += seg->ds_len; 1923 seg++; 1924 req++; 1925 req->flags = 0; 1926 } 1927 req--; 1928 1929 /* 1930 * Pad runt to 60 bytes 1931 */ 1932 if (cum_len < 60) { 1933 req++; 1934 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1935 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1936 req->length = htobe16(60 - cum_len); 1937 req->cksum_offset = 0; 1938 req->pseudo_hdr_offset = pseudo_hdr_offset; 1939 req->pad = 0; /* complete solid 16-byte block */ 1940 req->rdma_count = 1; 1941 req->flags |= flags | ((cum_len & 1) * odd_flag); 1942 cnt++; 1943 } 1944 1945 tx->req_list[0].rdma_count = cnt; 1946 #if 0 1947 /* print what the firmware will see */ 1948 for (i = 0; i < cnt; i++) { 1949 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1950 "cso:%d, flags:0x%x, rdma:%d\n", 1951 i, (int)ntohl(tx->req_list[i].addr_high), 1952 (int)ntohl(tx->req_list[i].addr_low), 1953 (int)ntohs(tx->req_list[i].length), 1954 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1955 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1956 tx->req_list[i].rdma_count); 1957 } 1958 kprintf("--------------\n"); 1959 #endif 1960 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1961 1962 info_map->map = info_last->map; 1963 info_last->map = map; 1964 info_last->m = m; 1965 1966 mxge_submit_req(tx, tx->req_list, cnt); 1967 1968 if (tx->send_go != NULL && tx->queue_active == 0) { 1969 /* Tell the NIC to start polling this slice */ 1970 *tx->send_go = 1; 1971 tx->queue_active = 1; 1972 tx->activate++; 1973 wmb(); 1974 } 1975 return 0; 1976 1977 drop: 1978 m_freem(m); 1979 return err; 1980 } 1981 1982 static void 1983 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1984 { 1985 mxge_softc_t *sc = ifp->if_softc; 1986 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1987 bus_addr_t zeropad; 1988 int encap = 0; 1989 1990 KKASSERT(tx->ifsq == ifsq); 1991 ASSERT_SERIALIZED(&tx->tx_serialize); 1992 1993 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1994 return; 1995 1996 zeropad = sc->zeropad_dma.dmem_busaddr; 1997 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1998 struct mbuf *m; 1999 int error; 2000 2001 m = ifsq_dequeue(ifsq); 2002 if (m == NULL) 2003 goto done; 2004 2005 BPF_MTAP(ifp, m); 2006 error = mxge_encap(tx, m, zeropad); 2007 if (!error) 2008 encap = 1; 2009 else 2010 IFNET_STAT_INC(ifp, oerrors, 1); 2011 } 2012 2013 /* Ran out of transmit slots */ 2014 ifsq_set_oactive(ifsq); 2015 done: 2016 if (encap) 2017 tx->watchdog.wd_timer = 5; 2018 } 2019 2020 static void 2021 mxge_watchdog(struct ifaltq_subque *ifsq) 2022 { 2023 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2024 struct mxge_softc *sc = ifp->if_softc; 2025 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 2026 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 2027 2028 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2029 2030 /* Check for pause blocking before resetting */ 2031 if (tx->watchdog_rx_pause == rx_pause) { 2032 mxge_warn_stuck(sc, tx, 0); 2033 mxge_watchdog_reset(sc); 2034 return; 2035 } else { 2036 if_printf(ifp, "Flow control blocking xmits, " 2037 "check link partner\n"); 2038 } 2039 tx->watchdog_rx_pause = rx_pause; 2040 } 2041 2042 /* 2043 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2044 * at most 32 bytes at a time, so as to avoid involving the software 2045 * pio handler in the nic. We re-write the first segment's low 2046 * DMA address to mark it valid only after we write the entire chunk 2047 * in a burst 2048 */ 2049 static __inline void 2050 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2051 mcp_kreq_ether_recv_t *src) 2052 { 2053 uint32_t low; 2054 2055 low = src->addr_low; 2056 src->addr_low = 0xffffffff; 2057 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2058 wmb(); 2059 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2060 wmb(); 2061 src->addr_low = low; 2062 dst->addr_low = low; 2063 wmb(); 2064 } 2065 2066 static int 2067 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2068 boolean_t init) 2069 { 2070 bus_dma_segment_t seg; 2071 struct mbuf *m; 2072 int cnt, err, mflag; 2073 2074 mflag = MB_DONTWAIT; 2075 if (__predict_false(init)) 2076 mflag = MB_WAIT; 2077 2078 m = m_gethdr(mflag, MT_DATA); 2079 if (m == NULL) { 2080 err = ENOBUFS; 2081 if (__predict_false(init)) { 2082 /* 2083 * During initialization, there 2084 * is nothing to setup; bail out 2085 */ 2086 return err; 2087 } 2088 goto done; 2089 } 2090 m->m_len = m->m_pkthdr.len = MHLEN; 2091 2092 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2093 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2094 if (err != 0) { 2095 m_freem(m); 2096 if (__predict_false(init)) { 2097 /* 2098 * During initialization, there 2099 * is nothing to setup; bail out 2100 */ 2101 return err; 2102 } 2103 goto done; 2104 } 2105 2106 rx->info[idx].m = m; 2107 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2108 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2109 2110 done: 2111 if ((idx & 7) == 7) 2112 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2113 return err; 2114 } 2115 2116 static int 2117 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2118 boolean_t init) 2119 { 2120 bus_dma_segment_t seg; 2121 struct mbuf *m; 2122 int cnt, err, mflag; 2123 2124 mflag = MB_DONTWAIT; 2125 if (__predict_false(init)) 2126 mflag = MB_WAIT; 2127 2128 if (rx->cl_size == MCLBYTES) 2129 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2130 else 2131 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2132 if (m == NULL) { 2133 err = ENOBUFS; 2134 if (__predict_false(init)) { 2135 /* 2136 * During initialization, there 2137 * is nothing to setup; bail out 2138 */ 2139 return err; 2140 } 2141 goto done; 2142 } 2143 m->m_len = m->m_pkthdr.len = rx->cl_size; 2144 2145 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2146 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2147 if (err != 0) { 2148 m_freem(m); 2149 if (__predict_false(init)) { 2150 /* 2151 * During initialization, there 2152 * is nothing to setup; bail out 2153 */ 2154 return err; 2155 } 2156 goto done; 2157 } 2158 2159 rx->info[idx].m = m; 2160 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2161 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2162 2163 done: 2164 if ((idx & 7) == 7) 2165 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2166 return err; 2167 } 2168 2169 /* 2170 * Myri10GE hardware checksums are not valid if the sender 2171 * padded the frame with non-zero padding. This is because 2172 * the firmware just does a simple 16-bit 1s complement 2173 * checksum across the entire frame, excluding the first 14 2174 * bytes. It is best to simply to check the checksum and 2175 * tell the stack about it only if the checksum is good 2176 */ 2177 static __inline uint16_t 2178 mxge_rx_csum(struct mbuf *m, int csum) 2179 { 2180 const struct ether_header *eh; 2181 const struct ip *ip; 2182 uint16_t c; 2183 2184 eh = mtod(m, const struct ether_header *); 2185 2186 /* Only deal with IPv4 TCP & UDP for now */ 2187 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2188 return 1; 2189 2190 ip = (const struct ip *)(eh + 1); 2191 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2192 return 1; 2193 2194 #ifdef INET 2195 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2196 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2197 - (ip->ip_hl << 2) + ip->ip_p)); 2198 #else 2199 c = 1; 2200 #endif 2201 c ^= 0xffff; 2202 return c; 2203 } 2204 2205 static void 2206 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2207 { 2208 struct ether_vlan_header *evl; 2209 uint32_t partial; 2210 2211 evl = mtod(m, struct ether_vlan_header *); 2212 2213 /* 2214 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2215 * what the firmware thought was the end of the ethernet 2216 * header. 2217 */ 2218 2219 /* Put checksum into host byte order */ 2220 *csum = ntohs(*csum); 2221 2222 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2223 *csum += ~partial; 2224 *csum += ((*csum) < ~partial); 2225 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2226 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2227 2228 /* 2229 * Restore checksum to network byte order; 2230 * later consumers expect this 2231 */ 2232 *csum = htons(*csum); 2233 2234 /* save the tag */ 2235 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2236 m->m_flags |= M_VLANTAG; 2237 2238 /* 2239 * Remove the 802.1q header by copying the Ethernet 2240 * addresses over it and adjusting the beginning of 2241 * the data in the mbuf. The encapsulated Ethernet 2242 * type field is already in place. 2243 */ 2244 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2245 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2246 m_adj(m, EVL_ENCAPLEN); 2247 } 2248 2249 2250 static __inline void 2251 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2252 uint32_t len, uint32_t csum) 2253 { 2254 struct mbuf *m; 2255 const struct ether_header *eh; 2256 bus_dmamap_t old_map; 2257 int idx; 2258 2259 idx = rx->cnt & rx->mask; 2260 rx->cnt++; 2261 2262 /* Save a pointer to the received mbuf */ 2263 m = rx->info[idx].m; 2264 2265 /* Try to replace the received mbuf */ 2266 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2267 /* Drop the frame -- the old mbuf is re-cycled */ 2268 IFNET_STAT_INC(ifp, ierrors, 1); 2269 return; 2270 } 2271 2272 /* Unmap the received buffer */ 2273 old_map = rx->info[idx].map; 2274 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2275 bus_dmamap_unload(rx->dmat, old_map); 2276 2277 /* Swap the bus_dmamap_t's */ 2278 rx->info[idx].map = rx->extra_map; 2279 rx->extra_map = old_map; 2280 2281 /* 2282 * mcp implicitly skips 1st 2 bytes so that packet is properly 2283 * aligned 2284 */ 2285 m->m_data += MXGEFW_PAD; 2286 2287 m->m_pkthdr.rcvif = ifp; 2288 m->m_len = m->m_pkthdr.len = len; 2289 2290 IFNET_STAT_INC(ifp, ipackets, 1); 2291 2292 eh = mtod(m, const struct ether_header *); 2293 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2294 mxge_vlan_tag_remove(m, &csum); 2295 2296 /* If the checksum is valid, mark it in the mbuf header */ 2297 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2298 mxge_rx_csum(m, csum) == 0) { 2299 /* Tell the stack that the checksum is good */ 2300 m->m_pkthdr.csum_data = 0xffff; 2301 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2302 CSUM_DATA_VALID; 2303 } 2304 ifp->if_input(ifp, m, NULL, -1); 2305 } 2306 2307 static __inline void 2308 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2309 uint32_t len, uint32_t csum) 2310 { 2311 const struct ether_header *eh; 2312 struct mbuf *m; 2313 bus_dmamap_t old_map; 2314 int idx; 2315 2316 idx = rx->cnt & rx->mask; 2317 rx->cnt++; 2318 2319 /* Save a pointer to the received mbuf */ 2320 m = rx->info[idx].m; 2321 2322 /* Try to replace the received mbuf */ 2323 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2324 /* Drop the frame -- the old mbuf is re-cycled */ 2325 IFNET_STAT_INC(ifp, ierrors, 1); 2326 return; 2327 } 2328 2329 /* Unmap the received buffer */ 2330 old_map = rx->info[idx].map; 2331 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2332 bus_dmamap_unload(rx->dmat, old_map); 2333 2334 /* Swap the bus_dmamap_t's */ 2335 rx->info[idx].map = rx->extra_map; 2336 rx->extra_map = old_map; 2337 2338 /* 2339 * mcp implicitly skips 1st 2 bytes so that packet is properly 2340 * aligned 2341 */ 2342 m->m_data += MXGEFW_PAD; 2343 2344 m->m_pkthdr.rcvif = ifp; 2345 m->m_len = m->m_pkthdr.len = len; 2346 2347 IFNET_STAT_INC(ifp, ipackets, 1); 2348 2349 eh = mtod(m, const struct ether_header *); 2350 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2351 mxge_vlan_tag_remove(m, &csum); 2352 2353 /* If the checksum is valid, mark it in the mbuf header */ 2354 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2355 mxge_rx_csum(m, csum) == 0) { 2356 /* Tell the stack that the checksum is good */ 2357 m->m_pkthdr.csum_data = 0xffff; 2358 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2359 CSUM_DATA_VALID; 2360 } 2361 ifp->if_input(ifp, m, NULL, -1); 2362 } 2363 2364 static __inline void 2365 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2366 { 2367 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2368 2369 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2370 uint16_t length, checksum; 2371 2372 length = ntohs(rx_done->entry[rx_done->idx].length); 2373 rx_done->entry[rx_done->idx].length = 0; 2374 2375 checksum = rx_done->entry[rx_done->idx].checksum; 2376 2377 if (length <= MXGE_RX_SMALL_BUFLEN) { 2378 mxge_rx_done_small(ifp, &rx_data->rx_small, 2379 length, checksum); 2380 } else { 2381 mxge_rx_done_big(ifp, &rx_data->rx_big, 2382 length, checksum); 2383 } 2384 2385 rx_done->idx++; 2386 rx_done->idx &= rx_done->mask; 2387 --cycle; 2388 } 2389 } 2390 2391 static __inline void 2392 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2393 { 2394 ASSERT_SERIALIZED(&tx->tx_serialize); 2395 2396 while (tx->pkt_done != mcp_idx) { 2397 struct mbuf *m; 2398 int idx; 2399 2400 idx = tx->done & tx->mask; 2401 tx->done++; 2402 2403 m = tx->info[idx].m; 2404 /* 2405 * mbuf and DMA map only attached to the first 2406 * segment per-mbuf. 2407 */ 2408 if (m != NULL) { 2409 tx->pkt_done++; 2410 IFNET_STAT_INC(ifp, opackets, 1); 2411 tx->info[idx].m = NULL; 2412 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2413 m_freem(m); 2414 } 2415 } 2416 2417 /* 2418 * If we have space, clear OACTIVE to tell the stack that 2419 * its OK to send packets 2420 */ 2421 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2422 ifsq_clr_oactive(tx->ifsq); 2423 if (tx->req == tx->done) { 2424 /* Reset watchdog */ 2425 tx->watchdog.wd_timer = 0; 2426 } 2427 } 2428 2429 if (!ifsq_is_empty(tx->ifsq)) 2430 ifsq_devstart(tx->ifsq); 2431 2432 if (tx->send_stop != NULL && tx->req == tx->done) { 2433 /* 2434 * Let the NIC stop polling this queue, since there 2435 * are no more transmits pending 2436 */ 2437 *tx->send_stop = 1; 2438 tx->queue_active = 0; 2439 tx->deactivate++; 2440 wmb(); 2441 } 2442 } 2443 2444 static struct mxge_media_type mxge_xfp_media_types[] = { 2445 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2446 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2447 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2448 {0, (1 << 5), "10GBASE-ER"}, 2449 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2450 {0, (1 << 3), "10GBASE-SW"}, 2451 {0, (1 << 2), "10GBASE-LW"}, 2452 {0, (1 << 1), "10GBASE-EW"}, 2453 {0, (1 << 0), "Reserved"} 2454 }; 2455 2456 static struct mxge_media_type mxge_sfp_media_types[] = { 2457 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2458 {0, (1 << 7), "Reserved"}, 2459 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2460 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2461 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2462 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2463 }; 2464 2465 static void 2466 mxge_media_set(mxge_softc_t *sc, int media_type) 2467 { 2468 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); 2469 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2470 sc->current_media = media_type; 2471 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2472 } 2473 2474 static void 2475 mxge_media_init(mxge_softc_t *sc) 2476 { 2477 const char *ptr; 2478 int i; 2479 2480 ifmedia_removeall(&sc->media); 2481 mxge_media_set(sc, IFM_AUTO); 2482 2483 /* 2484 * Parse the product code to deterimine the interface type 2485 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2486 * after the 3rd dash in the driver's cached copy of the 2487 * EEPROM's product code string. 2488 */ 2489 ptr = sc->product_code_string; 2490 if (ptr == NULL) { 2491 if_printf(sc->ifp, "Missing product code\n"); 2492 return; 2493 } 2494 2495 for (i = 0; i < 3; i++, ptr++) { 2496 ptr = strchr(ptr, '-'); 2497 if (ptr == NULL) { 2498 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2499 return; 2500 } 2501 } 2502 if (*ptr == 'C' || *(ptr +1) == 'C') { 2503 /* -C is CX4 */ 2504 sc->connector = MXGE_CX4; 2505 mxge_media_set(sc, IFM_10G_CX4); 2506 } else if (*ptr == 'Q') { 2507 /* -Q is Quad Ribbon Fiber */ 2508 sc->connector = MXGE_QRF; 2509 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2510 /* DragonFly has no media type for Quad ribbon fiber */ 2511 } else if (*ptr == 'R') { 2512 /* -R is XFP */ 2513 sc->connector = MXGE_XFP; 2514 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2515 /* -S or -2S is SFP+ */ 2516 sc->connector = MXGE_SFP; 2517 } else { 2518 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2519 } 2520 } 2521 2522 /* 2523 * Determine the media type for a NIC. Some XFPs will identify 2524 * themselves only when their link is up, so this is initiated via a 2525 * link up interrupt. However, this can potentially take up to 2526 * several milliseconds, so it is run via the watchdog routine, rather 2527 * than in the interrupt handler itself. 2528 */ 2529 static void 2530 mxge_media_probe(mxge_softc_t *sc) 2531 { 2532 mxge_cmd_t cmd; 2533 const char *cage_type; 2534 struct mxge_media_type *mxge_media_types = NULL; 2535 int i, err, ms, mxge_media_type_entries; 2536 uint32_t byte; 2537 2538 sc->need_media_probe = 0; 2539 2540 if (sc->connector == MXGE_XFP) { 2541 /* -R is XFP */ 2542 mxge_media_types = mxge_xfp_media_types; 2543 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2544 byte = MXGE_XFP_COMPLIANCE_BYTE; 2545 cage_type = "XFP"; 2546 } else if (sc->connector == MXGE_SFP) { 2547 /* -S or -2S is SFP+ */ 2548 mxge_media_types = mxge_sfp_media_types; 2549 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2550 cage_type = "SFP+"; 2551 byte = 3; 2552 } else { 2553 /* nothing to do; media type cannot change */ 2554 return; 2555 } 2556 2557 /* 2558 * At this point we know the NIC has an XFP cage, so now we 2559 * try to determine what is in the cage by using the 2560 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2561 * register. We read just one byte, which may take over 2562 * a millisecond 2563 */ 2564 2565 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2566 cmd.data1 = byte; 2567 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2568 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2569 if_printf(sc->ifp, "failed to read XFP\n"); 2570 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2571 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2572 if (err != MXGEFW_CMD_OK) 2573 return; 2574 2575 /* Now we wait for the data to be cached */ 2576 cmd.data0 = byte; 2577 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2578 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2579 DELAY(1000); 2580 cmd.data0 = byte; 2581 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2582 } 2583 if (err != MXGEFW_CMD_OK) { 2584 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2585 cage_type, err, ms); 2586 return; 2587 } 2588 2589 if (cmd.data0 == mxge_media_types[0].bitmask) { 2590 if (bootverbose) { 2591 if_printf(sc->ifp, "%s:%s\n", cage_type, 2592 mxge_media_types[0].name); 2593 } 2594 if (sc->current_media != mxge_media_types[0].flag) { 2595 mxge_media_init(sc); 2596 mxge_media_set(sc, mxge_media_types[0].flag); 2597 } 2598 return; 2599 } 2600 for (i = 1; i < mxge_media_type_entries; i++) { 2601 if (cmd.data0 & mxge_media_types[i].bitmask) { 2602 if (bootverbose) { 2603 if_printf(sc->ifp, "%s:%s\n", cage_type, 2604 mxge_media_types[i].name); 2605 } 2606 2607 if (sc->current_media != mxge_media_types[i].flag) { 2608 mxge_media_init(sc); 2609 mxge_media_set(sc, mxge_media_types[i].flag); 2610 } 2611 return; 2612 } 2613 } 2614 if (bootverbose) { 2615 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2616 cmd.data0); 2617 } 2618 } 2619 2620 static void 2621 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2622 { 2623 if (sc->link_state != stats->link_up) { 2624 sc->link_state = stats->link_up; 2625 if (sc->link_state) { 2626 sc->ifp->if_link_state = LINK_STATE_UP; 2627 if_link_state_change(sc->ifp); 2628 if (bootverbose) 2629 if_printf(sc->ifp, "link up\n"); 2630 } else { 2631 sc->ifp->if_link_state = LINK_STATE_DOWN; 2632 if_link_state_change(sc->ifp); 2633 if (bootverbose) 2634 if_printf(sc->ifp, "link down\n"); 2635 } 2636 sc->need_media_probe = 1; 2637 } 2638 2639 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2640 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2641 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2642 sc->rdma_tags_available); 2643 } 2644 2645 if (stats->link_down) { 2646 sc->down_cnt += stats->link_down; 2647 sc->link_state = 0; 2648 sc->ifp->if_link_state = LINK_STATE_DOWN; 2649 if_link_state_change(sc->ifp); 2650 } 2651 } 2652 2653 static void 2654 mxge_serialize_skipmain(struct mxge_softc *sc) 2655 { 2656 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2657 } 2658 2659 static void 2660 mxge_deserialize_skipmain(struct mxge_softc *sc) 2661 { 2662 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2663 } 2664 2665 static void 2666 mxge_legacy(void *arg) 2667 { 2668 struct mxge_slice_state *ss = arg; 2669 mxge_softc_t *sc = ss->sc; 2670 mcp_irq_data_t *stats = ss->fw_stats; 2671 mxge_tx_ring_t *tx = &ss->tx; 2672 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2673 uint32_t send_done_count; 2674 uint8_t valid; 2675 2676 ASSERT_SERIALIZED(&sc->main_serialize); 2677 2678 /* Make sure the DMA has finished */ 2679 if (!stats->valid) 2680 return; 2681 valid = stats->valid; 2682 2683 /* Lower legacy IRQ */ 2684 *sc->irq_deassert = 0; 2685 if (!mxge_deassert_wait) { 2686 /* Don't wait for conf. that irq is low */ 2687 stats->valid = 0; 2688 } 2689 2690 mxge_serialize_skipmain(sc); 2691 2692 /* 2693 * Loop while waiting for legacy irq deassertion 2694 * XXX do we really want to loop? 2695 */ 2696 do { 2697 /* Check for transmit completes and receives */ 2698 send_done_count = be32toh(stats->send_done_count); 2699 while ((send_done_count != tx->pkt_done) || 2700 (rx_done->entry[rx_done->idx].length != 0)) { 2701 if (send_done_count != tx->pkt_done) { 2702 mxge_tx_done(&sc->arpcom.ac_if, tx, 2703 (int)send_done_count); 2704 } 2705 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2706 send_done_count = be32toh(stats->send_done_count); 2707 } 2708 if (mxge_deassert_wait) 2709 wmb(); 2710 } while (*((volatile uint8_t *)&stats->valid)); 2711 2712 mxge_deserialize_skipmain(sc); 2713 2714 /* Fw link & error stats meaningful only on the first slice */ 2715 if (__predict_false(stats->stats_updated)) 2716 mxge_intr_status(sc, stats); 2717 2718 /* Check to see if we have rx token to pass back */ 2719 if (valid & 0x1) 2720 *ss->irq_claim = be32toh(3); 2721 *(ss->irq_claim + 1) = be32toh(3); 2722 } 2723 2724 static void 2725 mxge_msi(void *arg) 2726 { 2727 struct mxge_slice_state *ss = arg; 2728 mxge_softc_t *sc = ss->sc; 2729 mcp_irq_data_t *stats = ss->fw_stats; 2730 mxge_tx_ring_t *tx = &ss->tx; 2731 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2732 uint32_t send_done_count; 2733 uint8_t valid; 2734 #ifndef IFPOLL_ENABLE 2735 const boolean_t polling = FALSE; 2736 #else 2737 boolean_t polling = FALSE; 2738 #endif 2739 2740 ASSERT_SERIALIZED(&sc->main_serialize); 2741 2742 /* Make sure the DMA has finished */ 2743 if (__predict_false(!stats->valid)) 2744 return; 2745 2746 valid = stats->valid; 2747 stats->valid = 0; 2748 2749 #ifdef IFPOLL_ENABLE 2750 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2751 polling = TRUE; 2752 #endif 2753 2754 if (!polling) { 2755 /* Check for receives */ 2756 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2757 if (rx_done->entry[rx_done->idx].length != 0) 2758 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2759 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2760 } 2761 2762 /* 2763 * Check for transmit completes 2764 * 2765 * NOTE: 2766 * Since pkt_done is only changed by mxge_tx_done(), 2767 * which is called only in interrupt handler, the 2768 * check w/o holding tx serializer is MPSAFE. 2769 */ 2770 send_done_count = be32toh(stats->send_done_count); 2771 if (send_done_count != tx->pkt_done) { 2772 lwkt_serialize_enter(&tx->tx_serialize); 2773 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2774 lwkt_serialize_exit(&tx->tx_serialize); 2775 } 2776 2777 if (__predict_false(stats->stats_updated)) 2778 mxge_intr_status(sc, stats); 2779 2780 /* Check to see if we have rx token to pass back */ 2781 if (!polling && (valid & 0x1)) 2782 *ss->irq_claim = be32toh(3); 2783 *(ss->irq_claim + 1) = be32toh(3); 2784 } 2785 2786 static void 2787 mxge_msix_rx(void *arg) 2788 { 2789 struct mxge_slice_state *ss = arg; 2790 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2791 2792 #ifdef IFPOLL_ENABLE 2793 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2794 return; 2795 #endif 2796 2797 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2798 2799 if (rx_done->entry[rx_done->idx].length != 0) 2800 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2801 2802 *ss->irq_claim = be32toh(3); 2803 } 2804 2805 static void 2806 mxge_msix_rxtx(void *arg) 2807 { 2808 struct mxge_slice_state *ss = arg; 2809 mxge_softc_t *sc = ss->sc; 2810 mcp_irq_data_t *stats = ss->fw_stats; 2811 mxge_tx_ring_t *tx = &ss->tx; 2812 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2813 uint32_t send_done_count; 2814 uint8_t valid; 2815 #ifndef IFPOLL_ENABLE 2816 const boolean_t polling = FALSE; 2817 #else 2818 boolean_t polling = FALSE; 2819 #endif 2820 2821 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2822 2823 /* Make sure the DMA has finished */ 2824 if (__predict_false(!stats->valid)) 2825 return; 2826 2827 valid = stats->valid; 2828 stats->valid = 0; 2829 2830 #ifdef IFPOLL_ENABLE 2831 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2832 polling = TRUE; 2833 #endif 2834 2835 /* Check for receives */ 2836 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2837 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2838 2839 /* 2840 * Check for transmit completes 2841 * 2842 * NOTE: 2843 * Since pkt_done is only changed by mxge_tx_done(), 2844 * which is called only in interrupt handler, the 2845 * check w/o holding tx serializer is MPSAFE. 2846 */ 2847 send_done_count = be32toh(stats->send_done_count); 2848 if (send_done_count != tx->pkt_done) { 2849 lwkt_serialize_enter(&tx->tx_serialize); 2850 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2851 lwkt_serialize_exit(&tx->tx_serialize); 2852 } 2853 2854 /* Check to see if we have rx token to pass back */ 2855 if (!polling && (valid & 0x1)) 2856 *ss->irq_claim = be32toh(3); 2857 *(ss->irq_claim + 1) = be32toh(3); 2858 } 2859 2860 static void 2861 mxge_init(void *arg) 2862 { 2863 struct mxge_softc *sc = arg; 2864 2865 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2866 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2867 mxge_open(sc); 2868 } 2869 2870 static void 2871 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2872 { 2873 int i; 2874 2875 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2876 if (ss->rx_data.rx_big.info[i].m == NULL) 2877 continue; 2878 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2879 ss->rx_data.rx_big.info[i].map); 2880 m_freem(ss->rx_data.rx_big.info[i].m); 2881 ss->rx_data.rx_big.info[i].m = NULL; 2882 } 2883 2884 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2885 if (ss->rx_data.rx_small.info[i].m == NULL) 2886 continue; 2887 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2888 ss->rx_data.rx_small.info[i].map); 2889 m_freem(ss->rx_data.rx_small.info[i].m); 2890 ss->rx_data.rx_small.info[i].m = NULL; 2891 } 2892 2893 /* Transmit ring used only on the first slice */ 2894 if (ss->tx.info == NULL) 2895 return; 2896 2897 for (i = 0; i <= ss->tx.mask; i++) { 2898 if (ss->tx.info[i].m == NULL) 2899 continue; 2900 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2901 m_freem(ss->tx.info[i].m); 2902 ss->tx.info[i].m = NULL; 2903 } 2904 } 2905 2906 static void 2907 mxge_free_mbufs(mxge_softc_t *sc) 2908 { 2909 int slice; 2910 2911 for (slice = 0; slice < sc->num_slices; slice++) 2912 mxge_free_slice_mbufs(&sc->ss[slice]); 2913 } 2914 2915 static void 2916 mxge_free_slice_rings(struct mxge_slice_state *ss) 2917 { 2918 int i; 2919 2920 if (ss->rx_data.rx_done.entry != NULL) { 2921 mxge_dma_free(&ss->rx_done_dma); 2922 ss->rx_data.rx_done.entry = NULL; 2923 } 2924 2925 if (ss->tx.req_list != NULL) { 2926 kfree(ss->tx.req_list, M_DEVBUF); 2927 ss->tx.req_list = NULL; 2928 } 2929 2930 if (ss->tx.seg_list != NULL) { 2931 kfree(ss->tx.seg_list, M_DEVBUF); 2932 ss->tx.seg_list = NULL; 2933 } 2934 2935 if (ss->rx_data.rx_small.shadow != NULL) { 2936 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2937 ss->rx_data.rx_small.shadow = NULL; 2938 } 2939 2940 if (ss->rx_data.rx_big.shadow != NULL) { 2941 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2942 ss->rx_data.rx_big.shadow = NULL; 2943 } 2944 2945 if (ss->tx.info != NULL) { 2946 if (ss->tx.dmat != NULL) { 2947 for (i = 0; i <= ss->tx.mask; i++) { 2948 bus_dmamap_destroy(ss->tx.dmat, 2949 ss->tx.info[i].map); 2950 } 2951 bus_dma_tag_destroy(ss->tx.dmat); 2952 } 2953 kfree(ss->tx.info, M_DEVBUF); 2954 ss->tx.info = NULL; 2955 } 2956 2957 if (ss->rx_data.rx_small.info != NULL) { 2958 if (ss->rx_data.rx_small.dmat != NULL) { 2959 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2960 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2961 ss->rx_data.rx_small.info[i].map); 2962 } 2963 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2964 ss->rx_data.rx_small.extra_map); 2965 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2966 } 2967 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2968 ss->rx_data.rx_small.info = NULL; 2969 } 2970 2971 if (ss->rx_data.rx_big.info != NULL) { 2972 if (ss->rx_data.rx_big.dmat != NULL) { 2973 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2974 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2975 ss->rx_data.rx_big.info[i].map); 2976 } 2977 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2978 ss->rx_data.rx_big.extra_map); 2979 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2980 } 2981 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2982 ss->rx_data.rx_big.info = NULL; 2983 } 2984 } 2985 2986 static void 2987 mxge_free_rings(mxge_softc_t *sc) 2988 { 2989 int slice; 2990 2991 if (sc->ss == NULL) 2992 return; 2993 2994 for (slice = 0; slice < sc->num_slices; slice++) 2995 mxge_free_slice_rings(&sc->ss[slice]); 2996 } 2997 2998 static int 2999 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3000 int tx_ring_entries) 3001 { 3002 mxge_softc_t *sc = ss->sc; 3003 size_t bytes; 3004 int err, i; 3005 3006 /* 3007 * Allocate per-slice receive resources 3008 */ 3009 3010 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 3011 rx_ring_entries - 1; 3012 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3013 3014 /* Allocate the rx shadow rings */ 3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3016 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3017 3018 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3019 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3020 3021 /* Allocate the rx host info rings */ 3022 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3023 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3024 3025 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3026 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3027 3028 /* Allocate the rx busdma resources */ 3029 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3030 1, /* alignment */ 3031 4096, /* boundary */ 3032 BUS_SPACE_MAXADDR, /* low */ 3033 BUS_SPACE_MAXADDR, /* high */ 3034 NULL, NULL, /* filter */ 3035 MHLEN, /* maxsize */ 3036 1, /* num segs */ 3037 MHLEN, /* maxsegsize */ 3038 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3039 /* flags */ 3040 &ss->rx_data.rx_small.dmat); /* tag */ 3041 if (err != 0) { 3042 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3043 err); 3044 return err; 3045 } 3046 3047 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3048 &ss->rx_data.rx_small.extra_map); 3049 if (err != 0) { 3050 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3051 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3052 ss->rx_data.rx_small.dmat = NULL; 3053 return err; 3054 } 3055 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3056 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3057 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3058 if (err != 0) { 3059 int j; 3060 3061 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3062 3063 for (j = 0; j < i; ++j) { 3064 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3065 ss->rx_data.rx_small.info[j].map); 3066 } 3067 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3068 ss->rx_data.rx_small.extra_map); 3069 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3070 ss->rx_data.rx_small.dmat = NULL; 3071 return err; 3072 } 3073 } 3074 3075 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3076 1, /* alignment */ 3077 4096, /* boundary */ 3078 BUS_SPACE_MAXADDR, /* low */ 3079 BUS_SPACE_MAXADDR, /* high */ 3080 NULL, NULL, /* filter */ 3081 4096, /* maxsize */ 3082 1, /* num segs */ 3083 4096, /* maxsegsize*/ 3084 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3085 /* flags */ 3086 &ss->rx_data.rx_big.dmat); /* tag */ 3087 if (err != 0) { 3088 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3089 err); 3090 return err; 3091 } 3092 3093 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3094 &ss->rx_data.rx_big.extra_map); 3095 if (err != 0) { 3096 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3097 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3098 ss->rx_data.rx_big.dmat = NULL; 3099 return err; 3100 } 3101 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3102 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3103 &ss->rx_data.rx_big.info[i].map); 3104 if (err != 0) { 3105 int j; 3106 3107 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3108 for (j = 0; j < i; ++j) { 3109 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3110 ss->rx_data.rx_big.info[j].map); 3111 } 3112 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3113 ss->rx_data.rx_big.extra_map); 3114 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3115 ss->rx_data.rx_big.dmat = NULL; 3116 return err; 3117 } 3118 } 3119 3120 /* 3121 * Now allocate TX resources 3122 */ 3123 3124 ss->tx.mask = tx_ring_entries - 1; 3125 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3126 3127 /* 3128 * Allocate the tx request copy block; MUST be at least 8 bytes 3129 * aligned 3130 */ 3131 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3132 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3133 M_DEVBUF, M_WAITOK); 3134 3135 /* Allocate the tx busdma segment list */ 3136 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3137 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3138 3139 /* Allocate the tx host info ring */ 3140 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3141 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3142 3143 /* Allocate the tx busdma resources */ 3144 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3145 1, /* alignment */ 3146 sc->tx_boundary, /* boundary */ 3147 BUS_SPACE_MAXADDR, /* low */ 3148 BUS_SPACE_MAXADDR, /* high */ 3149 NULL, NULL, /* filter */ 3150 IP_MAXPACKET + 3151 sizeof(struct ether_vlan_header), 3152 /* maxsize */ 3153 ss->tx.max_desc - 2, /* num segs */ 3154 sc->tx_boundary, /* maxsegsz */ 3155 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3156 BUS_DMA_ONEBPAGE, /* flags */ 3157 &ss->tx.dmat); /* tag */ 3158 if (err != 0) { 3159 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3160 return err; 3161 } 3162 3163 /* 3164 * Now use these tags to setup DMA maps for each slot in the ring 3165 */ 3166 for (i = 0; i <= ss->tx.mask; i++) { 3167 err = bus_dmamap_create(ss->tx.dmat, 3168 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3169 if (err != 0) { 3170 int j; 3171 3172 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3173 for (j = 0; j < i; ++j) { 3174 bus_dmamap_destroy(ss->tx.dmat, 3175 ss->tx.info[j].map); 3176 } 3177 bus_dma_tag_destroy(ss->tx.dmat); 3178 ss->tx.dmat = NULL; 3179 return err; 3180 } 3181 } 3182 return 0; 3183 } 3184 3185 static int 3186 mxge_alloc_rings(mxge_softc_t *sc) 3187 { 3188 mxge_cmd_t cmd; 3189 int tx_ring_size; 3190 int tx_ring_entries, rx_ring_entries; 3191 int err, slice; 3192 3193 /* Get ring sizes */ 3194 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3195 if (err != 0) { 3196 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3197 return err; 3198 } 3199 tx_ring_size = cmd.data0; 3200 3201 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3202 rx_ring_entries = sc->rx_intr_slots / 2; 3203 3204 if (bootverbose) { 3205 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3206 tx_ring_entries, rx_ring_entries); 3207 } 3208 3209 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3210 ifq_set_ready(&sc->ifp->if_snd); 3211 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3212 3213 if (sc->num_tx_rings > 1) { 3214 sc->ifp->if_mapsubq = ifq_mapsubq_mask; 3215 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1); 3216 } 3217 3218 for (slice = 0; slice < sc->num_slices; slice++) { 3219 err = mxge_alloc_slice_rings(&sc->ss[slice], 3220 rx_ring_entries, tx_ring_entries); 3221 if (err != 0) { 3222 device_printf(sc->dev, 3223 "alloc %d slice rings failed\n", slice); 3224 return err; 3225 } 3226 } 3227 return 0; 3228 } 3229 3230 static void 3231 mxge_choose_params(int mtu, int *cl_size) 3232 { 3233 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3234 3235 if (bufsize < MCLBYTES) { 3236 *cl_size = MCLBYTES; 3237 } else { 3238 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3239 *cl_size = MJUMPAGESIZE; 3240 } 3241 } 3242 3243 static int 3244 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3245 { 3246 mxge_cmd_t cmd; 3247 int err, i, slice; 3248 3249 slice = ss - ss->sc->ss; 3250 3251 /* 3252 * Get the lanai pointers to the send and receive rings 3253 */ 3254 err = 0; 3255 3256 if (ss->sc->num_tx_rings == 1) { 3257 if (slice == 0) { 3258 cmd.data0 = slice; 3259 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3260 &cmd); 3261 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3262 (ss->sc->sram + cmd.data0); 3263 /* Leave send_go and send_stop as NULL */ 3264 } 3265 } else { 3266 cmd.data0 = slice; 3267 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3268 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3269 (ss->sc->sram + cmd.data0); 3270 ss->tx.send_go = (volatile uint32_t *) 3271 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3272 ss->tx.send_stop = (volatile uint32_t *) 3273 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3274 } 3275 3276 cmd.data0 = slice; 3277 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3278 ss->rx_data.rx_small.lanai = 3279 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3280 3281 cmd.data0 = slice; 3282 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3283 ss->rx_data.rx_big.lanai = 3284 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3285 3286 if (err != 0) { 3287 if_printf(ss->sc->ifp, 3288 "failed to get ring sizes or locations\n"); 3289 return EIO; 3290 } 3291 3292 /* 3293 * Stock small receive ring 3294 */ 3295 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3296 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3297 ss->rx_data.rx_small.info[i].map, i, TRUE); 3298 if (err) { 3299 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3300 ss->rx_data.rx_small.mask + 1); 3301 return ENOMEM; 3302 } 3303 } 3304 3305 /* 3306 * Stock big receive ring 3307 */ 3308 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3309 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3310 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3311 } 3312 3313 ss->rx_data.rx_big.cl_size = cl_size; 3314 3315 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3316 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3317 ss->rx_data.rx_big.info[i].map, i, TRUE); 3318 if (err) { 3319 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3320 ss->rx_data.rx_big.mask + 1); 3321 return ENOMEM; 3322 } 3323 } 3324 return 0; 3325 } 3326 3327 static int 3328 mxge_open(mxge_softc_t *sc) 3329 { 3330 struct ifnet *ifp = sc->ifp; 3331 mxge_cmd_t cmd; 3332 int err, slice, cl_size, i; 3333 bus_addr_t bus; 3334 volatile uint8_t *itable; 3335 struct mxge_slice_state *ss; 3336 3337 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3338 3339 /* Copy the MAC address in case it was overridden */ 3340 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3341 3342 err = mxge_reset(sc, 1); 3343 if (err != 0) { 3344 if_printf(ifp, "failed to reset\n"); 3345 return EIO; 3346 } 3347 3348 if (sc->num_slices > 1) { 3349 /* Setup the indirection table */ 3350 cmd.data0 = sc->num_slices; 3351 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3352 3353 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3354 if (err != 0) { 3355 if_printf(ifp, "failed to setup rss tables\n"); 3356 return err; 3357 } 3358 3359 /* Just enable an identity mapping */ 3360 itable = sc->sram + cmd.data0; 3361 for (i = 0; i < sc->num_slices; i++) 3362 itable[i] = (uint8_t)i; 3363 3364 if (sc->use_rss) { 3365 volatile uint8_t *hwkey; 3366 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3367 3368 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3369 &cmd); 3370 if (err != 0) { 3371 if_printf(ifp, "failed to get rsskey\n"); 3372 return err; 3373 } 3374 hwkey = sc->sram + cmd.data0; 3375 3376 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3377 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3378 hwkey[i] = swkey[i]; 3379 wmb(); 3380 3381 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3382 &cmd); 3383 if (err != 0) { 3384 if_printf(ifp, "failed to update rsskey\n"); 3385 return err; 3386 } 3387 if (bootverbose) 3388 if_printf(ifp, "RSS key updated\n"); 3389 } 3390 3391 cmd.data0 = 1; 3392 if (sc->use_rss) { 3393 if (bootverbose) 3394 if_printf(ifp, "input hash: RSS\n"); 3395 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3396 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3397 } else { 3398 if (bootverbose) 3399 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3400 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3401 } 3402 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3403 if (err != 0) { 3404 if_printf(ifp, "failed to enable slices\n"); 3405 return err; 3406 } 3407 } 3408 3409 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3410 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3411 if (err) { 3412 /* 3413 * Can't change TSO mode to NDIS, never allow TSO then 3414 */ 3415 if_printf(ifp, "failed to set TSO mode\n"); 3416 ifp->if_capenable &= ~IFCAP_TSO; 3417 ifp->if_capabilities &= ~IFCAP_TSO; 3418 ifp->if_hwassist &= ~CSUM_TSO; 3419 } 3420 3421 mxge_choose_params(ifp->if_mtu, &cl_size); 3422 3423 cmd.data0 = 1; 3424 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3425 /* 3426 * Error is only meaningful if we're trying to set 3427 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3428 */ 3429 3430 /* 3431 * Give the firmware the mtu and the big and small buffer 3432 * sizes. The firmware wants the big buf size to be a power 3433 * of two. Luckily, DragonFly's clusters are powers of two 3434 */ 3435 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3436 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3437 3438 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3439 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3440 3441 cmd.data0 = cl_size; 3442 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3443 3444 if (err != 0) { 3445 if_printf(ifp, "failed to setup params\n"); 3446 goto abort; 3447 } 3448 3449 /* Now give him the pointer to the stats block */ 3450 for (slice = 0; slice < sc->num_slices; slice++) { 3451 ss = &sc->ss[slice]; 3452 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3453 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3454 cmd.data2 = sizeof(struct mcp_irq_data); 3455 cmd.data2 |= (slice << 16); 3456 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3457 } 3458 3459 if (err != 0) { 3460 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3461 bus += offsetof(struct mcp_irq_data, send_done_count); 3462 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3463 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3464 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3465 &cmd); 3466 3467 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3468 sc->fw_multicast_support = 0; 3469 } else { 3470 sc->fw_multicast_support = 1; 3471 } 3472 3473 if (err != 0) { 3474 if_printf(ifp, "failed to setup params\n"); 3475 goto abort; 3476 } 3477 3478 for (slice = 0; slice < sc->num_slices; slice++) { 3479 err = mxge_slice_open(&sc->ss[slice], cl_size); 3480 if (err != 0) { 3481 if_printf(ifp, "couldn't open slice %d\n", slice); 3482 goto abort; 3483 } 3484 } 3485 3486 /* Finally, start the firmware running */ 3487 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3488 if (err) { 3489 if_printf(ifp, "Couldn't bring up link\n"); 3490 goto abort; 3491 } 3492 3493 ifp->if_flags |= IFF_RUNNING; 3494 for (i = 0; i < sc->num_tx_rings; ++i) { 3495 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3496 3497 ifsq_clr_oactive(tx->ifsq); 3498 ifsq_watchdog_start(&tx->watchdog); 3499 } 3500 3501 return 0; 3502 3503 abort: 3504 mxge_free_mbufs(sc); 3505 return err; 3506 } 3507 3508 static void 3509 mxge_close(mxge_softc_t *sc, int down) 3510 { 3511 struct ifnet *ifp = sc->ifp; 3512 mxge_cmd_t cmd; 3513 int err, old_down_cnt, i; 3514 3515 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3516 3517 if (!down) { 3518 old_down_cnt = sc->down_cnt; 3519 wmb(); 3520 3521 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3522 if (err) 3523 if_printf(ifp, "Couldn't bring down link\n"); 3524 3525 if (old_down_cnt == sc->down_cnt) { 3526 /* 3527 * Wait for down irq 3528 * XXX racy 3529 */ 3530 ifnet_deserialize_all(ifp); 3531 DELAY(10 * sc->intr_coal_delay); 3532 ifnet_serialize_all(ifp); 3533 } 3534 3535 wmb(); 3536 if (old_down_cnt == sc->down_cnt) 3537 if_printf(ifp, "never got down irq\n"); 3538 } 3539 mxge_free_mbufs(sc); 3540 3541 ifp->if_flags &= ~IFF_RUNNING; 3542 for (i = 0; i < sc->num_tx_rings; ++i) { 3543 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3544 3545 ifsq_clr_oactive(tx->ifsq); 3546 ifsq_watchdog_stop(&tx->watchdog); 3547 } 3548 } 3549 3550 static void 3551 mxge_setup_cfg_space(mxge_softc_t *sc) 3552 { 3553 device_t dev = sc->dev; 3554 int reg; 3555 uint16_t lnk, pectl; 3556 3557 /* Find the PCIe link width and set max read request to 4KB */ 3558 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3559 lnk = pci_read_config(dev, reg + 0x12, 2); 3560 sc->link_width = (lnk >> 4) & 0x3f; 3561 3562 if (sc->pectl == 0) { 3563 pectl = pci_read_config(dev, reg + 0x8, 2); 3564 pectl = (pectl & ~0x7000) | (5 << 12); 3565 pci_write_config(dev, reg + 0x8, pectl, 2); 3566 sc->pectl = pectl; 3567 } else { 3568 /* Restore saved pectl after watchdog reset */ 3569 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3570 } 3571 } 3572 3573 /* Enable DMA and memory space access */ 3574 pci_enable_busmaster(dev); 3575 } 3576 3577 static uint32_t 3578 mxge_read_reboot(mxge_softc_t *sc) 3579 { 3580 device_t dev = sc->dev; 3581 uint32_t vs; 3582 3583 /* Find the vendor specific offset */ 3584 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3585 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3586 return (uint32_t)-1; 3587 } 3588 /* Enable read32 mode */ 3589 pci_write_config(dev, vs + 0x10, 0x3, 1); 3590 /* Tell NIC which register to read */ 3591 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3592 return pci_read_config(dev, vs + 0x14, 4); 3593 } 3594 3595 static void 3596 mxge_watchdog_reset(mxge_softc_t *sc) 3597 { 3598 struct pci_devinfo *dinfo; 3599 int err, running; 3600 uint32_t reboot; 3601 uint16_t cmd; 3602 3603 err = ENXIO; 3604 3605 if_printf(sc->ifp, "Watchdog reset!\n"); 3606 3607 /* 3608 * Check to see if the NIC rebooted. If it did, then all of 3609 * PCI config space has been reset, and things like the 3610 * busmaster bit will be zero. If this is the case, then we 3611 * must restore PCI config space before the NIC can be used 3612 * again 3613 */ 3614 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3615 if (cmd == 0xffff) { 3616 /* 3617 * Maybe the watchdog caught the NIC rebooting; wait 3618 * up to 100ms for it to finish. If it does not come 3619 * back, then give up 3620 */ 3621 DELAY(1000*100); 3622 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3623 if (cmd == 0xffff) 3624 if_printf(sc->ifp, "NIC disappeared!\n"); 3625 } 3626 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3627 /* Print the reboot status */ 3628 reboot = mxge_read_reboot(sc); 3629 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3630 3631 running = sc->ifp->if_flags & IFF_RUNNING; 3632 if (running) { 3633 /* 3634 * Quiesce NIC so that TX routines will not try to 3635 * xmit after restoration of BAR 3636 */ 3637 3638 /* Mark the link as down */ 3639 if (sc->link_state) { 3640 sc->ifp->if_link_state = LINK_STATE_DOWN; 3641 if_link_state_change(sc->ifp); 3642 } 3643 mxge_close(sc, 1); 3644 } 3645 /* Restore PCI configuration space */ 3646 dinfo = device_get_ivars(sc->dev); 3647 pci_cfg_restore(sc->dev, dinfo); 3648 3649 /* And redo any changes we made to our config space */ 3650 mxge_setup_cfg_space(sc); 3651 3652 /* Reload f/w */ 3653 err = mxge_load_firmware(sc, 0); 3654 if (err) 3655 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3656 if (running && !err) { 3657 int i; 3658 3659 err = mxge_open(sc); 3660 3661 for (i = 0; i < sc->num_tx_rings; ++i) 3662 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3663 } 3664 sc->watchdog_resets++; 3665 } else { 3666 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3667 err = 0; 3668 } 3669 if (err) { 3670 if_printf(sc->ifp, "watchdog reset failed\n"); 3671 } else { 3672 if (sc->dying == 2) 3673 sc->dying = 0; 3674 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3675 } 3676 } 3677 3678 static void 3679 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3680 { 3681 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3682 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3683 tx->req, tx->done, tx->queue_active); 3684 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3685 tx->activate, tx->deactivate); 3686 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3687 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3688 } 3689 3690 static u_long 3691 mxge_update_stats(mxge_softc_t *sc) 3692 { 3693 u_long ipackets, opackets, pkts; 3694 3695 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3696 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3697 3698 pkts = ipackets - sc->ipackets; 3699 pkts += opackets - sc->opackets; 3700 3701 sc->ipackets = ipackets; 3702 sc->opackets = opackets; 3703 3704 return pkts; 3705 } 3706 3707 static void 3708 mxge_tick(void *arg) 3709 { 3710 mxge_softc_t *sc = arg; 3711 u_long pkts = 0; 3712 int err = 0; 3713 int ticks; 3714 3715 lwkt_serialize_enter(&sc->main_serialize); 3716 3717 ticks = mxge_ticks; 3718 if (sc->ifp->if_flags & IFF_RUNNING) { 3719 /* Aggregate stats from different slices */ 3720 pkts = mxge_update_stats(sc); 3721 if (sc->need_media_probe) 3722 mxge_media_probe(sc); 3723 } 3724 if (pkts == 0) { 3725 uint16_t cmd; 3726 3727 /* Ensure NIC did not suffer h/w fault while idle */ 3728 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3729 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3730 sc->dying = 2; 3731 mxge_serialize_skipmain(sc); 3732 mxge_watchdog_reset(sc); 3733 mxge_deserialize_skipmain(sc); 3734 err = ENXIO; 3735 } 3736 3737 /* Look less often if NIC is idle */ 3738 ticks *= 4; 3739 } 3740 3741 if (err == 0) 3742 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3743 3744 lwkt_serialize_exit(&sc->main_serialize); 3745 } 3746 3747 static int 3748 mxge_media_change(struct ifnet *ifp) 3749 { 3750 return EINVAL; 3751 } 3752 3753 static int 3754 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3755 { 3756 struct ifnet *ifp = sc->ifp; 3757 int real_mtu, old_mtu; 3758 int err = 0; 3759 3760 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3761 if (mtu > sc->max_mtu || real_mtu < 60) 3762 return EINVAL; 3763 3764 old_mtu = ifp->if_mtu; 3765 ifp->if_mtu = mtu; 3766 if (ifp->if_flags & IFF_RUNNING) { 3767 mxge_close(sc, 0); 3768 err = mxge_open(sc); 3769 if (err != 0) { 3770 ifp->if_mtu = old_mtu; 3771 mxge_close(sc, 0); 3772 mxge_open(sc); 3773 } 3774 } 3775 return err; 3776 } 3777 3778 static void 3779 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3780 { 3781 mxge_softc_t *sc = ifp->if_softc; 3782 3783 3784 if (sc == NULL) 3785 return; 3786 ifmr->ifm_status = IFM_AVALID; 3787 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3788 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3789 ifmr->ifm_active |= sc->current_media; 3790 } 3791 3792 static int 3793 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3794 struct ucred *cr __unused) 3795 { 3796 mxge_softc_t *sc = ifp->if_softc; 3797 struct ifreq *ifr = (struct ifreq *)data; 3798 int err, mask; 3799 3800 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3801 err = 0; 3802 3803 switch (command) { 3804 case SIOCSIFMTU: 3805 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3806 break; 3807 3808 case SIOCSIFFLAGS: 3809 if (sc->dying) 3810 return EINVAL; 3811 3812 if (ifp->if_flags & IFF_UP) { 3813 if (!(ifp->if_flags & IFF_RUNNING)) { 3814 err = mxge_open(sc); 3815 } else { 3816 /* 3817 * Take care of PROMISC and ALLMULTI 3818 * flag changes 3819 */ 3820 mxge_change_promisc(sc, 3821 ifp->if_flags & IFF_PROMISC); 3822 mxge_set_multicast_list(sc); 3823 } 3824 } else { 3825 if (ifp->if_flags & IFF_RUNNING) 3826 mxge_close(sc, 0); 3827 } 3828 break; 3829 3830 case SIOCADDMULTI: 3831 case SIOCDELMULTI: 3832 mxge_set_multicast_list(sc); 3833 break; 3834 3835 case SIOCSIFCAP: 3836 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3837 if (mask & IFCAP_TXCSUM) { 3838 ifp->if_capenable ^= IFCAP_TXCSUM; 3839 if (ifp->if_capenable & IFCAP_TXCSUM) 3840 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3841 else 3842 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3843 } 3844 if (mask & IFCAP_TSO) { 3845 ifp->if_capenable ^= IFCAP_TSO; 3846 if (ifp->if_capenable & IFCAP_TSO) 3847 ifp->if_hwassist |= CSUM_TSO; 3848 else 3849 ifp->if_hwassist &= ~CSUM_TSO; 3850 } 3851 if (mask & IFCAP_RXCSUM) 3852 ifp->if_capenable ^= IFCAP_RXCSUM; 3853 if (mask & IFCAP_VLAN_HWTAGGING) 3854 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3855 break; 3856 3857 case SIOCGIFMEDIA: 3858 mxge_media_probe(sc); 3859 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3860 &sc->media, command); 3861 break; 3862 3863 default: 3864 err = ether_ioctl(ifp, command, data); 3865 break; 3866 } 3867 return err; 3868 } 3869 3870 static void 3871 mxge_fetch_tunables(mxge_softc_t *sc) 3872 { 3873 sc->intr_coal_delay = mxge_intr_coal_delay; 3874 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3875 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3876 3877 /* XXX */ 3878 if (mxge_ticks == 0) 3879 mxge_ticks = hz / 2; 3880 3881 sc->pause = mxge_flow_control; 3882 sc->use_rss = mxge_use_rss; 3883 3884 sc->throttle = mxge_throttle; 3885 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3886 sc->throttle = MXGE_MAX_THROTTLE; 3887 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3888 sc->throttle = MXGE_MIN_THROTTLE; 3889 } 3890 3891 static void 3892 mxge_free_slices(mxge_softc_t *sc) 3893 { 3894 struct mxge_slice_state *ss; 3895 int i; 3896 3897 if (sc->ss == NULL) 3898 return; 3899 3900 for (i = 0; i < sc->num_slices; i++) { 3901 ss = &sc->ss[i]; 3902 if (ss->fw_stats != NULL) { 3903 mxge_dma_free(&ss->fw_stats_dma); 3904 ss->fw_stats = NULL; 3905 } 3906 if (ss->rx_data.rx_done.entry != NULL) { 3907 mxge_dma_free(&ss->rx_done_dma); 3908 ss->rx_data.rx_done.entry = NULL; 3909 } 3910 } 3911 kfree(sc->ss, M_DEVBUF); 3912 sc->ss = NULL; 3913 } 3914 3915 static int 3916 mxge_alloc_slices(mxge_softc_t *sc) 3917 { 3918 mxge_cmd_t cmd; 3919 struct mxge_slice_state *ss; 3920 size_t bytes; 3921 int err, i, rx_ring_size; 3922 3923 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3924 if (err != 0) { 3925 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3926 return err; 3927 } 3928 rx_ring_size = cmd.data0; 3929 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3930 3931 bytes = sizeof(*sc->ss) * sc->num_slices; 3932 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3933 3934 for (i = 0; i < sc->num_slices; i++) { 3935 ss = &sc->ss[i]; 3936 3937 ss->sc = sc; 3938 3939 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3940 lwkt_serialize_init(&ss->tx.tx_serialize); 3941 ss->intr_rid = -1; 3942 3943 /* 3944 * Allocate per-slice rx interrupt queue 3945 * XXX assume 4bytes mcp_slot 3946 */ 3947 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3948 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3949 if (err != 0) { 3950 device_printf(sc->dev, 3951 "alloc %d slice rx_done failed\n", i); 3952 return err; 3953 } 3954 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3955 3956 /* 3957 * Allocate the per-slice firmware stats 3958 */ 3959 bytes = sizeof(*ss->fw_stats); 3960 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3961 sizeof(*ss->fw_stats), 64); 3962 if (err != 0) { 3963 device_printf(sc->dev, 3964 "alloc %d fw_stats failed\n", i); 3965 return err; 3966 } 3967 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3968 } 3969 return 0; 3970 } 3971 3972 static void 3973 mxge_slice_probe(mxge_softc_t *sc) 3974 { 3975 int status, max_intr_slots, max_slices, num_slices; 3976 int msix_cnt, msix_enable, i, multi_tx; 3977 mxge_cmd_t cmd; 3978 const char *old_fw; 3979 3980 sc->num_slices = 1; 3981 sc->num_tx_rings = 1; 3982 3983 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 3984 if (num_slices == 1) 3985 return; 3986 3987 if (ncpus2 == 1) 3988 return; 3989 3990 msix_enable = device_getenv_int(sc->dev, "msix.enable", 3991 mxge_msix_enable); 3992 if (!msix_enable) 3993 return; 3994 3995 msix_cnt = pci_msix_count(sc->dev); 3996 if (msix_cnt < 2) 3997 return; 3998 3999 /* 4000 * Round down MSI-X vector count to the nearest power of 2 4001 */ 4002 i = 0; 4003 while ((1 << (i + 1)) <= msix_cnt) 4004 ++i; 4005 msix_cnt = 1 << i; 4006 4007 /* 4008 * Now load the slice aware firmware see what it supports 4009 */ 4010 old_fw = sc->fw_name; 4011 if (old_fw == mxge_fw_aligned) 4012 sc->fw_name = mxge_fw_rss_aligned; 4013 else 4014 sc->fw_name = mxge_fw_rss_unaligned; 4015 status = mxge_load_firmware(sc, 0); 4016 if (status != 0) { 4017 device_printf(sc->dev, "Falling back to a single slice\n"); 4018 return; 4019 } 4020 4021 /* 4022 * Try to send a reset command to the card to see if it is alive 4023 */ 4024 memset(&cmd, 0, sizeof(cmd)); 4025 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4026 if (status != 0) { 4027 device_printf(sc->dev, "failed reset\n"); 4028 goto abort_with_fw; 4029 } 4030 4031 /* 4032 * Get rx ring size to calculate rx interrupt queue size 4033 */ 4034 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4035 if (status != 0) { 4036 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4037 goto abort_with_fw; 4038 } 4039 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4040 4041 /* 4042 * Tell it the size of the rx interrupt queue 4043 */ 4044 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4045 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4046 if (status != 0) { 4047 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4048 goto abort_with_fw; 4049 } 4050 4051 /* 4052 * Ask the maximum number of slices it supports 4053 */ 4054 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4055 if (status != 0) { 4056 device_printf(sc->dev, 4057 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4058 goto abort_with_fw; 4059 } 4060 max_slices = cmd.data0; 4061 4062 /* 4063 * Round down max slices count to the nearest power of 2 4064 */ 4065 i = 0; 4066 while ((1 << (i + 1)) <= max_slices) 4067 ++i; 4068 max_slices = 1 << i; 4069 4070 if (max_slices > msix_cnt) 4071 max_slices = msix_cnt; 4072 4073 sc->num_slices = num_slices; 4074 sc->num_slices = if_ring_count2(sc->num_slices, max_slices); 4075 4076 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4077 if (multi_tx) 4078 sc->num_tx_rings = sc->num_slices; 4079 4080 if (bootverbose) { 4081 device_printf(sc->dev, "using %d slices, max %d\n", 4082 sc->num_slices, max_slices); 4083 } 4084 4085 if (sc->num_slices == 1) 4086 goto abort_with_fw; 4087 return; 4088 4089 abort_with_fw: 4090 sc->fw_name = old_fw; 4091 mxge_load_firmware(sc, 0); 4092 } 4093 4094 static void 4095 mxge_setup_serialize(struct mxge_softc *sc) 4096 { 4097 int i = 0, slice; 4098 4099 /* Main + rx + tx */ 4100 sc->nserialize = (2 * sc->num_slices) + 1; 4101 sc->serializes = 4102 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4103 M_DEVBUF, M_WAITOK | M_ZERO); 4104 4105 /* 4106 * Setup serializes 4107 * 4108 * NOTE: Order is critical 4109 */ 4110 4111 KKASSERT(i < sc->nserialize); 4112 sc->serializes[i++] = &sc->main_serialize; 4113 4114 for (slice = 0; slice < sc->num_slices; ++slice) { 4115 KKASSERT(i < sc->nserialize); 4116 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4117 } 4118 4119 for (slice = 0; slice < sc->num_slices; ++slice) { 4120 KKASSERT(i < sc->nserialize); 4121 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4122 } 4123 4124 KKASSERT(i == sc->nserialize); 4125 } 4126 4127 static void 4128 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4129 { 4130 struct mxge_softc *sc = ifp->if_softc; 4131 4132 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4133 } 4134 4135 static void 4136 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4137 { 4138 struct mxge_softc *sc = ifp->if_softc; 4139 4140 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4141 } 4142 4143 static int 4144 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4145 { 4146 struct mxge_softc *sc = ifp->if_softc; 4147 4148 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4149 } 4150 4151 #ifdef INVARIANTS 4152 4153 static void 4154 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4155 boolean_t serialized) 4156 { 4157 struct mxge_softc *sc = ifp->if_softc; 4158 4159 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4160 slz, serialized); 4161 } 4162 4163 #endif /* INVARIANTS */ 4164 4165 #ifdef IFPOLL_ENABLE 4166 4167 static void 4168 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4169 { 4170 struct mxge_slice_state *ss = xss; 4171 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4172 4173 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4174 4175 if (rx_done->entry[rx_done->idx].length != 0) { 4176 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4177 } else { 4178 /* 4179 * XXX 4180 * This register writting obviously has cost, 4181 * however, if we don't hand back the rx token, 4182 * the upcoming packets may suffer rediculously 4183 * large delay, as observed on 8AL-C using ping(8). 4184 */ 4185 *ss->irq_claim = be32toh(3); 4186 } 4187 } 4188 4189 static void 4190 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4191 { 4192 struct mxge_softc *sc = ifp->if_softc; 4193 int i; 4194 4195 if (info == NULL) 4196 return; 4197 4198 /* 4199 * Only poll rx; polling tx and status don't seem to work 4200 */ 4201 for (i = 0; i < sc->num_slices; ++i) { 4202 struct mxge_slice_state *ss = &sc->ss[i]; 4203 int idx = ss->intr_cpuid; 4204 4205 KKASSERT(idx < ncpus2); 4206 info->ifpi_rx[idx].poll_func = mxge_npoll_rx; 4207 info->ifpi_rx[idx].arg = ss; 4208 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize; 4209 } 4210 } 4211 4212 #endif /* IFPOLL_ENABLE */ 4213 4214 static int 4215 mxge_attach(device_t dev) 4216 { 4217 mxge_softc_t *sc = device_get_softc(dev); 4218 struct ifnet *ifp = &sc->arpcom.ac_if; 4219 int err, rid, i; 4220 4221 /* 4222 * Avoid rewriting half the lines in this file to use 4223 * &sc->arpcom.ac_if instead 4224 */ 4225 sc->ifp = ifp; 4226 sc->dev = dev; 4227 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4228 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); 4229 4230 lwkt_serialize_init(&sc->main_serialize); 4231 4232 mxge_fetch_tunables(sc); 4233 4234 err = bus_dma_tag_create(NULL, /* parent */ 4235 1, /* alignment */ 4236 0, /* boundary */ 4237 BUS_SPACE_MAXADDR, /* low */ 4238 BUS_SPACE_MAXADDR, /* high */ 4239 NULL, NULL, /* filter */ 4240 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4241 0, /* num segs */ 4242 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4243 0, /* flags */ 4244 &sc->parent_dmat); /* tag */ 4245 if (err != 0) { 4246 device_printf(dev, "Err %d allocating parent dmat\n", err); 4247 goto failed; 4248 } 4249 4250 callout_init_mp(&sc->co_hdl); 4251 4252 mxge_setup_cfg_space(sc); 4253 4254 /* 4255 * Map the board into the kernel 4256 */ 4257 rid = PCIR_BARS; 4258 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4259 &rid, RF_ACTIVE); 4260 if (sc->mem_res == NULL) { 4261 device_printf(dev, "could not map memory\n"); 4262 err = ENXIO; 4263 goto failed; 4264 } 4265 4266 sc->sram = rman_get_virtual(sc->mem_res); 4267 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4268 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4269 device_printf(dev, "impossible memory region size %ld\n", 4270 rman_get_size(sc->mem_res)); 4271 err = ENXIO; 4272 goto failed; 4273 } 4274 4275 /* 4276 * Make NULL terminated copy of the EEPROM strings section of 4277 * lanai SRAM 4278 */ 4279 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4280 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4281 rman_get_bushandle(sc->mem_res), 4282 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4283 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4284 err = mxge_parse_strings(sc); 4285 if (err != 0) { 4286 device_printf(dev, "parse EEPROM string failed\n"); 4287 goto failed; 4288 } 4289 4290 /* 4291 * Enable write combining for efficient use of PCIe bus 4292 */ 4293 mxge_enable_wc(sc); 4294 4295 /* 4296 * Allocate the out of band DMA memory 4297 */ 4298 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4299 if (err != 0) { 4300 device_printf(dev, "alloc cmd DMA buf failed\n"); 4301 goto failed; 4302 } 4303 sc->cmd = sc->cmd_dma.dmem_addr; 4304 4305 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4306 if (err != 0) { 4307 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4308 goto failed; 4309 } 4310 4311 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4312 if (err != 0) { 4313 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4314 goto failed; 4315 } 4316 4317 /* Select & load the firmware */ 4318 err = mxge_select_firmware(sc); 4319 if (err != 0) { 4320 device_printf(dev, "select firmware failed\n"); 4321 goto failed; 4322 } 4323 4324 mxge_slice_probe(sc); 4325 err = mxge_alloc_slices(sc); 4326 if (err != 0) { 4327 device_printf(dev, "alloc slices failed\n"); 4328 goto failed; 4329 } 4330 4331 err = mxge_alloc_intr(sc); 4332 if (err != 0) { 4333 device_printf(dev, "alloc intr failed\n"); 4334 goto failed; 4335 } 4336 4337 /* Setup serializes */ 4338 mxge_setup_serialize(sc); 4339 4340 err = mxge_reset(sc, 0); 4341 if (err != 0) { 4342 device_printf(dev, "reset failed\n"); 4343 goto failed; 4344 } 4345 4346 err = mxge_alloc_rings(sc); 4347 if (err != 0) { 4348 device_printf(dev, "failed to allocate rings\n"); 4349 goto failed; 4350 } 4351 4352 ifp->if_baudrate = IF_Gbps(10UL); 4353 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4354 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4355 4356 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4357 #if 0 4358 /* Well, its software, sigh */ 4359 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4360 #endif 4361 ifp->if_capenable = ifp->if_capabilities; 4362 4363 ifp->if_softc = sc; 4364 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4365 ifp->if_init = mxge_init; 4366 ifp->if_ioctl = mxge_ioctl; 4367 ifp->if_start = mxge_start; 4368 #ifdef IFPOLL_ENABLE 4369 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4370 ifp->if_npoll = mxge_npoll; 4371 #endif 4372 ifp->if_serialize = mxge_serialize; 4373 ifp->if_deserialize = mxge_deserialize; 4374 ifp->if_tryserialize = mxge_tryserialize; 4375 #ifdef INVARIANTS 4376 ifp->if_serialize_assert = mxge_serialize_assert; 4377 #endif 4378 4379 /* Increase TSO burst length */ 4380 ifp->if_tsolen = (32 * ETHERMTU); 4381 4382 /* Initialise the ifmedia structure */ 4383 mxge_media_init(sc); 4384 mxge_media_probe(sc); 4385 4386 ether_ifattach(ifp, sc->mac_addr, NULL); 4387 4388 /* Setup TX rings and subqueues */ 4389 for (i = 0; i < sc->num_tx_rings; ++i) { 4390 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4391 struct mxge_slice_state *ss = &sc->ss[i]; 4392 4393 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4394 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4395 ifsq_set_priv(ifsq, &ss->tx); 4396 ss->tx.ifsq = ifsq; 4397 4398 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4399 } 4400 4401 /* 4402 * XXX 4403 * We are not ready to do "gather" jumbo frame, so 4404 * limit MTU to MJUMPAGESIZE 4405 */ 4406 sc->max_mtu = MJUMPAGESIZE - 4407 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4408 sc->dying = 0; 4409 4410 err = mxge_setup_intr(sc); 4411 if (err != 0) { 4412 device_printf(dev, "alloc and setup intr failed\n"); 4413 ether_ifdetach(ifp); 4414 goto failed; 4415 } 4416 4417 mxge_add_sysctls(sc); 4418 4419 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4420 sc->ss[0].intr_cpuid); 4421 return 0; 4422 4423 failed: 4424 mxge_detach(dev); 4425 return err; 4426 } 4427 4428 static int 4429 mxge_detach(device_t dev) 4430 { 4431 mxge_softc_t *sc = device_get_softc(dev); 4432 4433 if (device_is_attached(dev)) { 4434 struct ifnet *ifp = sc->ifp; 4435 4436 ifnet_serialize_all(ifp); 4437 4438 sc->dying = 1; 4439 if (ifp->if_flags & IFF_RUNNING) 4440 mxge_close(sc, 1); 4441 callout_stop(&sc->co_hdl); 4442 4443 mxge_teardown_intr(sc, sc->num_slices); 4444 4445 ifnet_deserialize_all(ifp); 4446 4447 callout_terminate(&sc->co_hdl); 4448 4449 ether_ifdetach(ifp); 4450 } 4451 ifmedia_removeall(&sc->media); 4452 4453 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4454 sc->sram != NULL) 4455 mxge_dummy_rdma(sc, 0); 4456 4457 mxge_free_intr(sc); 4458 mxge_rem_sysctls(sc); 4459 mxge_free_rings(sc); 4460 4461 /* MUST after sysctls, intr and rings are freed */ 4462 mxge_free_slices(sc); 4463 4464 if (sc->dmabench_dma.dmem_addr != NULL) 4465 mxge_dma_free(&sc->dmabench_dma); 4466 if (sc->zeropad_dma.dmem_addr != NULL) 4467 mxge_dma_free(&sc->zeropad_dma); 4468 if (sc->cmd_dma.dmem_addr != NULL) 4469 mxge_dma_free(&sc->cmd_dma); 4470 4471 if (sc->msix_table_res != NULL) { 4472 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4473 sc->msix_table_res); 4474 } 4475 if (sc->mem_res != NULL) { 4476 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4477 sc->mem_res); 4478 } 4479 4480 if (sc->parent_dmat != NULL) 4481 bus_dma_tag_destroy(sc->parent_dmat); 4482 4483 return 0; 4484 } 4485 4486 static int 4487 mxge_shutdown(device_t dev) 4488 { 4489 return 0; 4490 } 4491 4492 static void 4493 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4494 { 4495 int i; 4496 4497 KKASSERT(sc->num_slices > 1); 4498 4499 for (i = 0; i < sc->num_slices; ++i) { 4500 struct mxge_slice_state *ss = &sc->ss[i]; 4501 4502 if (ss->intr_res != NULL) { 4503 bus_release_resource(sc->dev, SYS_RES_IRQ, 4504 ss->intr_rid, ss->intr_res); 4505 } 4506 if (ss->intr_rid >= 0) 4507 pci_release_msix_vector(sc->dev, ss->intr_rid); 4508 } 4509 if (setup) 4510 pci_teardown_msix(sc->dev); 4511 } 4512 4513 static int 4514 mxge_alloc_msix(struct mxge_softc *sc) 4515 { 4516 struct mxge_slice_state *ss; 4517 int offset, rid, error, i; 4518 boolean_t setup = FALSE; 4519 4520 KKASSERT(sc->num_slices > 1); 4521 4522 if (sc->num_slices == ncpus2) { 4523 offset = 0; 4524 } else { 4525 int offset_def; 4526 4527 offset_def = (sc->num_slices * device_get_unit(sc->dev)) % 4528 ncpus2; 4529 4530 offset = device_getenv_int(sc->dev, "msix.offset", offset_def); 4531 if (offset >= ncpus2 || 4532 offset % sc->num_slices != 0) { 4533 device_printf(sc->dev, "invalid msix.offset %d, " 4534 "use %d\n", offset, offset_def); 4535 offset = offset_def; 4536 } 4537 } 4538 4539 ss = &sc->ss[0]; 4540 4541 ss->intr_serialize = &sc->main_serialize; 4542 ss->intr_func = mxge_msi; 4543 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4544 "%s comb", device_get_nameunit(sc->dev)); 4545 ss->intr_desc = ss->intr_desc0; 4546 ss->intr_cpuid = offset; 4547 4548 for (i = 1; i < sc->num_slices; ++i) { 4549 ss = &sc->ss[i]; 4550 4551 ss->intr_serialize = &ss->rx_data.rx_serialize; 4552 if (sc->num_tx_rings == 1) { 4553 ss->intr_func = mxge_msix_rx; 4554 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4555 "%s rx", device_get_nameunit(sc->dev)); 4556 } else { 4557 ss->intr_func = mxge_msix_rxtx; 4558 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4559 "%s rxtx", device_get_nameunit(sc->dev)); 4560 } 4561 ss->intr_desc = ss->intr_desc0; 4562 ss->intr_cpuid = offset + i; 4563 } 4564 4565 rid = PCIR_BAR(2); 4566 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4567 &rid, RF_ACTIVE); 4568 if (sc->msix_table_res == NULL) { 4569 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4570 return ENXIO; 4571 } 4572 4573 error = pci_setup_msix(sc->dev); 4574 if (error) { 4575 device_printf(sc->dev, "could not setup MSI-X\n"); 4576 goto back; 4577 } 4578 setup = TRUE; 4579 4580 for (i = 0; i < sc->num_slices; ++i) { 4581 ss = &sc->ss[i]; 4582 4583 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4584 ss->intr_cpuid); 4585 if (error) { 4586 device_printf(sc->dev, "could not alloc " 4587 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4588 goto back; 4589 } 4590 4591 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4592 &ss->intr_rid, RF_ACTIVE); 4593 if (ss->intr_res == NULL) { 4594 device_printf(sc->dev, "could not alloc " 4595 "MSI-X %d resource\n", i); 4596 error = ENXIO; 4597 goto back; 4598 } 4599 } 4600 4601 pci_enable_msix(sc->dev); 4602 sc->intr_type = PCI_INTR_TYPE_MSIX; 4603 back: 4604 if (error) 4605 mxge_free_msix(sc, setup); 4606 return error; 4607 } 4608 4609 static int 4610 mxge_alloc_intr(struct mxge_softc *sc) 4611 { 4612 struct mxge_slice_state *ss; 4613 u_int irq_flags; 4614 4615 if (sc->num_slices > 1) { 4616 int error; 4617 4618 error = mxge_alloc_msix(sc); 4619 if (error) 4620 return error; 4621 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4622 return 0; 4623 } 4624 4625 ss = &sc->ss[0]; 4626 4627 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4628 &ss->intr_rid, &irq_flags); 4629 4630 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4631 &ss->intr_rid, irq_flags); 4632 if (ss->intr_res == NULL) { 4633 device_printf(sc->dev, "could not alloc interrupt\n"); 4634 return ENXIO; 4635 } 4636 4637 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4638 ss->intr_func = mxge_legacy; 4639 else 4640 ss->intr_func = mxge_msi; 4641 ss->intr_serialize = &sc->main_serialize; 4642 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4643 4644 return 0; 4645 } 4646 4647 static int 4648 mxge_setup_intr(struct mxge_softc *sc) 4649 { 4650 int i; 4651 4652 for (i = 0; i < sc->num_slices; ++i) { 4653 struct mxge_slice_state *ss = &sc->ss[i]; 4654 int error; 4655 4656 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4657 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4658 ss->intr_serialize, ss->intr_desc); 4659 if (error) { 4660 device_printf(sc->dev, "can't setup %dth intr\n", i); 4661 mxge_teardown_intr(sc, i); 4662 return error; 4663 } 4664 } 4665 return 0; 4666 } 4667 4668 static void 4669 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4670 { 4671 int i; 4672 4673 if (sc->ss == NULL) 4674 return; 4675 4676 for (i = 0; i < cnt; ++i) { 4677 struct mxge_slice_state *ss = &sc->ss[i]; 4678 4679 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4680 } 4681 } 4682 4683 static void 4684 mxge_free_intr(struct mxge_softc *sc) 4685 { 4686 if (sc->ss == NULL) 4687 return; 4688 4689 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4690 struct mxge_slice_state *ss = &sc->ss[0]; 4691 4692 if (ss->intr_res != NULL) { 4693 bus_release_resource(sc->dev, SYS_RES_IRQ, 4694 ss->intr_rid, ss->intr_res); 4695 } 4696 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4697 pci_release_msi(sc->dev); 4698 } else { 4699 mxge_free_msix(sc, TRUE); 4700 } 4701 } 4702