1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/if_ringmap.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/if_poll.h> 58 59 #include <net/bpf.h> 60 61 #include <net/if_types.h> 62 #include <net/vlan/if_vlan_var.h> 63 #include <net/zlib.h> 64 #include <net/toeplitz.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <sys/bus.h> 72 #include <sys/rman.h> 73 74 #include <bus/pci/pcireg.h> 75 #include <bus/pci/pcivar.h> 76 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 77 78 #include <vm/vm.h> /* for pmap_mapdev() */ 79 #include <vm/pmap.h> 80 81 #if defined(__x86_64__) 82 #include <machine/specialreg.h> 83 #endif 84 85 #include <dev/netif/mxge/mxge_mcp.h> 86 #include <dev/netif/mxge/mcp_gen_header.h> 87 #include <dev/netif/mxge/if_mxge_var.h> 88 89 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE) 90 91 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 92 #define MXGE_HWRSS_KEYLEN 16 93 94 /* Tunable params */ 95 static int mxge_nvidia_ecrc_enable = 1; 96 static int mxge_force_firmware = 0; 97 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 98 static int mxge_deassert_wait = 1; 99 static int mxge_ticks; 100 static int mxge_num_slices = 0; 101 static int mxge_always_promisc = 0; 102 static int mxge_throttle = 0; 103 static int mxge_msi_enable = 1; 104 static int mxge_msix_enable = 1; 105 static int mxge_multi_tx = 1; 106 /* 107 * Don't use RSS by default, its just too slow 108 */ 109 static int mxge_use_rss = 0; 110 111 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_NONE; 112 113 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 114 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 115 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 116 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 117 118 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 119 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 120 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 121 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 122 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 123 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 124 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 125 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 126 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 127 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 128 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 129 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 130 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl)); 131 132 static int mxge_probe(device_t dev); 133 static int mxge_attach(device_t dev); 134 static int mxge_detach(device_t dev); 135 static int mxge_shutdown(device_t dev); 136 137 static int mxge_alloc_intr(struct mxge_softc *sc); 138 static void mxge_free_intr(struct mxge_softc *sc); 139 static int mxge_setup_intr(struct mxge_softc *sc); 140 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 141 142 static device_method_t mxge_methods[] = { 143 /* Device interface */ 144 DEVMETHOD(device_probe, mxge_probe), 145 DEVMETHOD(device_attach, mxge_attach), 146 DEVMETHOD(device_detach, mxge_detach), 147 DEVMETHOD(device_shutdown, mxge_shutdown), 148 DEVMETHOD_END 149 }; 150 151 static driver_t mxge_driver = { 152 "mxge", 153 mxge_methods, 154 sizeof(mxge_softc_t), 155 }; 156 157 static devclass_t mxge_devclass; 158 159 /* Declare ourselves to be a child of the PCI bus.*/ 160 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 161 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 162 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 163 164 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 165 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 166 static void mxge_close(mxge_softc_t *sc, int down); 167 static int mxge_open(mxge_softc_t *sc); 168 static void mxge_tick(void *arg); 169 static void mxge_watchdog_reset(mxge_softc_t *sc); 170 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 171 172 static int 173 mxge_probe(device_t dev) 174 { 175 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 176 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 177 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 178 int rev = pci_get_revid(dev); 179 180 switch (rev) { 181 case MXGE_PCI_REV_Z8E: 182 device_set_desc(dev, "Myri10G-PCIE-8A"); 183 break; 184 case MXGE_PCI_REV_Z8ES: 185 device_set_desc(dev, "Myri10G-PCIE-8B"); 186 break; 187 default: 188 device_set_desc(dev, "Myri10G-PCIE-8??"); 189 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 190 break; 191 } 192 return 0; 193 } 194 return ENXIO; 195 } 196 197 static void 198 mxge_enable_wc(mxge_softc_t *sc) 199 { 200 #if defined(__x86_64__) 201 vm_offset_t len; 202 203 sc->wc = 1; 204 len = rman_get_size(sc->mem_res); 205 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 206 PAT_WRITE_COMBINING); 207 #endif 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 bus_size_t boundary; 215 int err; 216 217 if (bytes > 4096 && alignment == 4096) 218 boundary = 0; 219 else 220 boundary = 4096; 221 222 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 223 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 224 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 225 if (err != 0) { 226 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 227 return err; 228 } 229 return 0; 230 } 231 232 static void 233 mxge_dma_free(bus_dmamem_t *dma) 234 { 235 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 236 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 237 bus_dma_tag_destroy(dma->dmem_tag); 238 } 239 240 /* 241 * The eeprom strings on the lanaiX have the format 242 * SN=x\0 243 * MAC=x:x:x:x:x:x\0 244 * PC=text\0 245 */ 246 static int 247 mxge_parse_strings(mxge_softc_t *sc) 248 { 249 const char *ptr; 250 int i, found_mac, found_sn2; 251 char *endptr; 252 253 ptr = sc->eeprom_strings; 254 found_mac = 0; 255 found_sn2 = 0; 256 while (*ptr != '\0') { 257 if (strncmp(ptr, "MAC=", 4) == 0) { 258 ptr += 4; 259 for (i = 0;;) { 260 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 261 if (endptr - ptr != 2) 262 goto abort; 263 ptr = endptr; 264 if (++i == 6) 265 break; 266 if (*ptr++ != ':') 267 goto abort; 268 } 269 found_mac = 1; 270 } else if (strncmp(ptr, "PC=", 3) == 0) { 271 ptr += 3; 272 strlcpy(sc->product_code_string, ptr, 273 sizeof(sc->product_code_string)); 274 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 275 ptr += 3; 276 strlcpy(sc->serial_number_string, ptr, 277 sizeof(sc->serial_number_string)); 278 } else if (strncmp(ptr, "SN2=", 4) == 0) { 279 /* SN2 takes precedence over SN */ 280 ptr += 4; 281 found_sn2 = 1; 282 strlcpy(sc->serial_number_string, ptr, 283 sizeof(sc->serial_number_string)); 284 } 285 while (*ptr++ != '\0') {} 286 } 287 288 if (found_mac) 289 return 0; 290 291 abort: 292 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 293 return ENXIO; 294 } 295 296 #if defined(__x86_64__) 297 298 static void 299 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 300 { 301 uint32_t val; 302 unsigned long base, off; 303 char *va, *cfgptr; 304 device_t pdev, mcp55; 305 uint16_t vendor_id, device_id, word; 306 uintptr_t bus, slot, func, ivend, idev; 307 uint32_t *ptr32; 308 309 if (!mxge_nvidia_ecrc_enable) 310 return; 311 312 pdev = device_get_parent(device_get_parent(sc->dev)); 313 if (pdev == NULL) { 314 device_printf(sc->dev, "could not find parent?\n"); 315 return; 316 } 317 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 318 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 319 320 if (vendor_id != 0x10de) 321 return; 322 323 base = 0; 324 325 if (device_id == 0x005d) { 326 /* ck804, base address is magic */ 327 base = 0xe0000000UL; 328 } else if (device_id >= 0x0374 && device_id <= 0x378) { 329 /* mcp55, base address stored in chipset */ 330 mcp55 = pci_find_bsf(0, 0, 0); 331 if (mcp55 && 332 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 333 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 334 word = pci_read_config(mcp55, 0x90, 2); 335 base = ((unsigned long)word & 0x7ffeU) << 25; 336 } 337 } 338 if (!base) 339 return; 340 341 /* 342 * XXXX 343 * Test below is commented because it is believed that doing 344 * config read/write beyond 0xff will access the config space 345 * for the next larger function. Uncomment this and remove 346 * the hacky pmap_mapdev() way of accessing config space when 347 * DragonFly grows support for extended pcie config space access. 348 */ 349 #if 0 350 /* 351 * See if we can, by some miracle, access the extended 352 * config space 353 */ 354 val = pci_read_config(pdev, 0x178, 4); 355 if (val != 0xffffffff) { 356 val |= 0x40; 357 pci_write_config(pdev, 0x178, val, 4); 358 return; 359 } 360 #endif 361 /* 362 * Rather than using normal pci config space writes, we must 363 * map the Nvidia config space ourselves. This is because on 364 * opteron/nvidia class machine the 0xe000000 mapping is 365 * handled by the nvidia chipset, that means the internal PCI 366 * device (the on-chip northbridge), or the amd-8131 bridge 367 * and things behind them are not visible by this method. 368 */ 369 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_BUS, &bus); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_SLOT, &slot); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_FUNCTION, &func); 376 BUS_READ_IVAR(device_get_parent(pdev), pdev, 377 PCI_IVAR_VENDOR, &ivend); 378 BUS_READ_IVAR(device_get_parent(pdev), pdev, 379 PCI_IVAR_DEVICE, &idev); 380 381 off = base + 0x00100000UL * (unsigned long)bus + 382 0x00001000UL * (unsigned long)(func + 8 * slot); 383 384 /* map it into the kernel */ 385 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 386 if (va == NULL) { 387 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 388 return; 389 } 390 /* get a pointer to the config space mapped into the kernel */ 391 cfgptr = va + (off & PAGE_MASK); 392 393 /* make sure that we can really access it */ 394 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 395 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 396 if (!(vendor_id == ivend && device_id == idev)) { 397 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 398 vendor_id, device_id); 399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 400 return; 401 } 402 403 ptr32 = (uint32_t*)(cfgptr + 0x178); 404 val = *ptr32; 405 406 if (val == 0xffffffff) { 407 device_printf(sc->dev, "extended mapping failed\n"); 408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 409 return; 410 } 411 *ptr32 = val | 0x40; 412 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 413 if (bootverbose) { 414 device_printf(sc->dev, "Enabled ECRC on upstream " 415 "Nvidia bridge at %d:%d:%d\n", 416 (int)bus, (int)slot, (int)func); 417 } 418 } 419 420 #else /* __x86_64__ */ 421 422 static void 423 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 424 { 425 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 426 } 427 428 #endif 429 430 static int 431 mxge_dma_test(mxge_softc_t *sc, int test_type) 432 { 433 mxge_cmd_t cmd; 434 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 435 int status; 436 uint32_t len; 437 const char *test = " "; 438 439 /* 440 * Run a small DMA test. 441 * The magic multipliers to the length tell the firmware 442 * to do DMA read, write, or read+write tests. The 443 * results are returned in cmd.data0. The upper 16 444 * bits of the return is the number of transfers completed. 445 * The lower 16 bits is the time in 0.5us ticks that the 446 * transfers took to complete. 447 */ 448 449 len = sc->tx_boundary; 450 451 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 452 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 453 cmd.data2 = len * 0x10000; 454 status = mxge_send_cmd(sc, test_type, &cmd); 455 if (status != 0) { 456 test = "read"; 457 goto abort; 458 } 459 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 460 461 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 462 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 463 cmd.data2 = len * 0x1; 464 status = mxge_send_cmd(sc, test_type, &cmd); 465 if (status != 0) { 466 test = "write"; 467 goto abort; 468 } 469 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 470 471 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 472 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 473 cmd.data2 = len * 0x10001; 474 status = mxge_send_cmd(sc, test_type, &cmd); 475 if (status != 0) { 476 test = "read/write"; 477 goto abort; 478 } 479 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 480 (cmd.data0 & 0xffff); 481 482 abort: 483 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 484 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 485 test, status); 486 } 487 return status; 488 } 489 490 /* 491 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 492 * when the PCI-E Completion packets are aligned on an 8-byte 493 * boundary. Some PCI-E chip sets always align Completion packets; on 494 * the ones that do not, the alignment can be enforced by enabling 495 * ECRC generation (if supported). 496 * 497 * When PCI-E Completion packets are not aligned, it is actually more 498 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 499 * 500 * If the driver can neither enable ECRC nor verify that it has 501 * already been enabled, then it must use a firmware image which works 502 * around unaligned completion packets (ethp_z8e.dat), and it should 503 * also ensure that it never gives the device a Read-DMA which is 504 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 505 * enabled, then the driver should use the aligned (eth_z8e.dat) 506 * firmware image, and set tx_boundary to 4KB. 507 */ 508 static int 509 mxge_firmware_probe(mxge_softc_t *sc) 510 { 511 device_t dev = sc->dev; 512 int reg, status; 513 uint16_t pectl; 514 515 sc->tx_boundary = 4096; 516 517 /* 518 * Verify the max read request size was set to 4KB 519 * before trying the test with 4KB. 520 */ 521 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 522 pectl = pci_read_config(dev, reg + 0x8, 2); 523 if ((pectl & (5 << 12)) != (5 << 12)) { 524 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 525 pectl); 526 sc->tx_boundary = 2048; 527 } 528 } 529 530 /* 531 * Load the optimized firmware (which assumes aligned PCIe 532 * completions) in order to see if it works on this host. 533 */ 534 sc->fw_name = mxge_fw_aligned; 535 status = mxge_load_firmware(sc, 1); 536 if (status != 0) 537 return status; 538 539 /* 540 * Enable ECRC if possible 541 */ 542 mxge_enable_nvidia_ecrc(sc); 543 544 /* 545 * Run a DMA test which watches for unaligned completions and 546 * aborts on the first one seen. Not required on Z8ES or newer. 547 */ 548 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 549 return 0; 550 551 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 552 if (status == 0) 553 return 0; /* keep the aligned firmware */ 554 555 if (status != E2BIG) 556 device_printf(dev, "DMA test failed: %d\n", status); 557 if (status == ENOSYS) { 558 device_printf(dev, "Falling back to ethp! " 559 "Please install up to date fw\n"); 560 } 561 return status; 562 } 563 564 static int 565 mxge_select_firmware(mxge_softc_t *sc) 566 { 567 int aligned = 0; 568 int force_firmware = mxge_force_firmware; 569 570 if (sc->throttle) 571 force_firmware = sc->throttle; 572 573 if (force_firmware != 0) { 574 if (force_firmware == 1) 575 aligned = 1; 576 else 577 aligned = 0; 578 if (bootverbose) { 579 device_printf(sc->dev, 580 "Assuming %s completions (forced)\n", 581 aligned ? "aligned" : "unaligned"); 582 } 583 goto abort; 584 } 585 586 /* 587 * If the PCIe link width is 4 or less, we can use the aligned 588 * firmware and skip any checks 589 */ 590 if (sc->link_width != 0 && sc->link_width <= 4) { 591 device_printf(sc->dev, "PCIe x%d Link, " 592 "expect reduced performance\n", sc->link_width); 593 aligned = 1; 594 goto abort; 595 } 596 597 if (mxge_firmware_probe(sc) == 0) 598 return 0; 599 600 abort: 601 if (aligned) { 602 sc->fw_name = mxge_fw_aligned; 603 sc->tx_boundary = 4096; 604 } else { 605 sc->fw_name = mxge_fw_unaligned; 606 sc->tx_boundary = 2048; 607 } 608 return mxge_load_firmware(sc, 0); 609 } 610 611 static int 612 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 613 { 614 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 615 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 616 be32toh(hdr->mcp_type)); 617 return EIO; 618 } 619 620 /* Save firmware version for sysctl */ 621 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 622 if (bootverbose) 623 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 624 625 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 626 &sc->fw_ver_minor, &sc->fw_ver_tiny); 627 628 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 629 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 630 if_printf(sc->ifp, "Found firmware version %s\n", 631 sc->fw_version); 632 if_printf(sc->ifp, "Driver needs %d.%d\n", 633 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 634 return EINVAL; 635 } 636 return 0; 637 } 638 639 static void * 640 z_alloc(void *nil, u_int items, u_int size) 641 { 642 return kmalloc(items * size, M_TEMP, M_WAITOK); 643 } 644 645 static void 646 z_free(void *nil, void *ptr) 647 { 648 kfree(ptr, M_TEMP); 649 } 650 651 static int 652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 653 { 654 z_stream zs; 655 char *inflate_buffer; 656 const struct firmware *fw; 657 const mcp_gen_header_t *hdr; 658 unsigned hdr_offset; 659 int status; 660 unsigned int i; 661 char dummy; 662 size_t fw_len; 663 664 fw = firmware_get(sc->fw_name); 665 if (fw == NULL) { 666 if_printf(sc->ifp, "Could not find firmware image %s\n", 667 sc->fw_name); 668 return ENOENT; 669 } 670 671 /* Setup zlib and decompress f/w */ 672 bzero(&zs, sizeof(zs)); 673 zs.zalloc = z_alloc; 674 zs.zfree = z_free; 675 status = inflateInit(&zs); 676 if (status != Z_OK) { 677 status = EIO; 678 goto abort_with_fw; 679 } 680 681 /* 682 * The uncompressed size is stored as the firmware version, 683 * which would otherwise go unused 684 */ 685 fw_len = (size_t)fw->version; 686 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 687 zs.avail_in = fw->datasize; 688 zs.next_in = __DECONST(char *, fw->data); 689 zs.avail_out = fw_len; 690 zs.next_out = inflate_buffer; 691 status = inflate(&zs, Z_FINISH); 692 if (status != Z_STREAM_END) { 693 if_printf(sc->ifp, "zlib %d\n", status); 694 status = EIO; 695 goto abort_with_buffer; 696 } 697 698 /* Check id */ 699 hdr_offset = 700 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 701 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 702 if_printf(sc->ifp, "Bad firmware file"); 703 status = EIO; 704 goto abort_with_buffer; 705 } 706 hdr = (const void*)(inflate_buffer + hdr_offset); 707 708 status = mxge_validate_firmware(sc, hdr); 709 if (status != 0) 710 goto abort_with_buffer; 711 712 /* Copy the inflated firmware to NIC SRAM. */ 713 for (i = 0; i < fw_len; i += 256) { 714 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 715 min(256U, (unsigned)(fw_len - i))); 716 wmb(); 717 dummy = *sc->sram; 718 wmb(); 719 } 720 721 *limit = fw_len; 722 status = 0; 723 abort_with_buffer: 724 kfree(inflate_buffer, M_TEMP); 725 inflateEnd(&zs); 726 abort_with_fw: 727 firmware_put(fw, FIRMWARE_UNLOAD); 728 return status; 729 } 730 731 /* 732 * Enable or disable periodic RDMAs from the host to make certain 733 * chipsets resend dropped PCIe messages 734 */ 735 static void 736 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 737 { 738 char buf_bytes[72]; 739 volatile uint32_t *confirm; 740 volatile char *submit; 741 uint32_t *buf, dma_low, dma_high; 742 int i; 743 744 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 745 746 /* Clear confirmation addr */ 747 confirm = (volatile uint32_t *)sc->cmd; 748 *confirm = 0; 749 wmb(); 750 751 /* 752 * Send an rdma command to the PCIe engine, and wait for the 753 * response in the confirmation address. The firmware should 754 * write a -1 there to indicate it is alive and well 755 */ 756 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 757 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 758 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 759 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 760 buf[2] = htobe32(0xffffffff); /* confirm data */ 761 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 762 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 763 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 764 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 765 buf[5] = htobe32(enable); /* enable? */ 766 767 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 768 769 mxge_pio_copy(submit, buf, 64); 770 wmb(); 771 DELAY(1000); 772 wmb(); 773 i = 0; 774 while (*confirm != 0xffffffff && i < 20) { 775 DELAY(1000); 776 i++; 777 } 778 if (*confirm != 0xffffffff) { 779 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 780 (enable ? "enable" : "disable"), confirm, *confirm); 781 } 782 } 783 784 static int 785 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 786 { 787 mcp_cmd_t *buf; 788 char buf_bytes[sizeof(*buf) + 8]; 789 volatile mcp_cmd_response_t *response = sc->cmd; 790 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 791 uint32_t dma_low, dma_high; 792 int err, sleep_total = 0; 793 794 /* Ensure buf is aligned to 8 bytes */ 795 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 796 797 buf->data0 = htobe32(data->data0); 798 buf->data1 = htobe32(data->data1); 799 buf->data2 = htobe32(data->data2); 800 buf->cmd = htobe32(cmd); 801 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 802 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 803 804 buf->response_addr.low = htobe32(dma_low); 805 buf->response_addr.high = htobe32(dma_high); 806 807 response->result = 0xffffffff; 808 wmb(); 809 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 810 811 /* 812 * Wait up to 20ms 813 */ 814 err = EAGAIN; 815 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 816 wmb(); 817 switch (be32toh(response->result)) { 818 case 0: 819 data->data0 = be32toh(response->data); 820 err = 0; 821 break; 822 case 0xffffffff: 823 DELAY(1000); 824 break; 825 case MXGEFW_CMD_UNKNOWN: 826 err = ENOSYS; 827 break; 828 case MXGEFW_CMD_ERROR_UNALIGNED: 829 err = E2BIG; 830 break; 831 case MXGEFW_CMD_ERROR_BUSY: 832 err = EBUSY; 833 break; 834 case MXGEFW_CMD_ERROR_I2C_ABSENT: 835 err = ENXIO; 836 break; 837 default: 838 if_printf(sc->ifp, "command %d failed, result = %d\n", 839 cmd, be32toh(response->result)); 840 err = ENXIO; 841 break; 842 } 843 if (err != EAGAIN) 844 break; 845 } 846 if (err == EAGAIN) { 847 if_printf(sc->ifp, "command %d timed out result = %d\n", 848 cmd, be32toh(response->result)); 849 } 850 return err; 851 } 852 853 static int 854 mxge_adopt_running_firmware(mxge_softc_t *sc) 855 { 856 struct mcp_gen_header *hdr; 857 const size_t bytes = sizeof(struct mcp_gen_header); 858 size_t hdr_offset; 859 int status; 860 861 /* 862 * Find running firmware header 863 */ 864 hdr_offset = 865 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 866 867 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 868 if_printf(sc->ifp, "Running firmware has bad header offset " 869 "(%zu)\n", hdr_offset); 870 return EIO; 871 } 872 873 /* 874 * Copy header of running firmware from SRAM to host memory to 875 * validate firmware 876 */ 877 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 878 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 879 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 880 status = mxge_validate_firmware(sc, hdr); 881 kfree(hdr, M_DEVBUF); 882 883 /* 884 * Check to see if adopted firmware has bug where adopting 885 * it will cause broadcasts to be filtered unless the NIC 886 * is kept in ALLMULTI mode 887 */ 888 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 889 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 890 sc->adopted_rx_filter_bug = 1; 891 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 892 "working around rx filter bug\n", 893 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 894 } 895 896 return status; 897 } 898 899 static int 900 mxge_load_firmware(mxge_softc_t *sc, int adopt) 901 { 902 volatile uint32_t *confirm; 903 volatile char *submit; 904 char buf_bytes[72]; 905 uint32_t *buf, size, dma_low, dma_high; 906 int status, i; 907 908 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 909 910 size = sc->sram_size; 911 status = mxge_load_firmware_helper(sc, &size); 912 if (status) { 913 if (!adopt) 914 return status; 915 916 /* 917 * Try to use the currently running firmware, if 918 * it is new enough 919 */ 920 status = mxge_adopt_running_firmware(sc); 921 if (status) { 922 if_printf(sc->ifp, 923 "failed to adopt running firmware\n"); 924 return status; 925 } 926 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 927 928 if (sc->tx_boundary == 4096) { 929 if_printf(sc->ifp, 930 "Using firmware currently running on NIC. " 931 "For optimal\n"); 932 if_printf(sc->ifp, "performance consider loading " 933 "optimized firmware\n"); 934 } 935 sc->fw_name = mxge_fw_unaligned; 936 sc->tx_boundary = 2048; 937 return 0; 938 } 939 940 /* Clear confirmation addr */ 941 confirm = (volatile uint32_t *)sc->cmd; 942 *confirm = 0; 943 wmb(); 944 945 /* 946 * Send a reload command to the bootstrap MCP, and wait for the 947 * response in the confirmation address. The firmware should 948 * write a -1 there to indicate it is alive and well 949 */ 950 951 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 952 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 953 954 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 955 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 956 buf[2] = htobe32(0xffffffff); /* confirm data */ 957 958 /* 959 * FIX: All newest firmware should un-protect the bottom of 960 * the sram before handoff. However, the very first interfaces 961 * do not. Therefore the handoff copy must skip the first 8 bytes 962 */ 963 /* where the code starts*/ 964 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 965 buf[4] = htobe32(size - 8); /* length of code */ 966 buf[5] = htobe32(8); /* where to copy to */ 967 buf[6] = htobe32(0); /* where to jump to */ 968 969 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 970 mxge_pio_copy(submit, buf, 64); 971 wmb(); 972 DELAY(1000); 973 wmb(); 974 i = 0; 975 while (*confirm != 0xffffffff && i < 20) { 976 DELAY(1000*10); 977 i++; 978 } 979 if (*confirm != 0xffffffff) { 980 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 981 confirm, *confirm); 982 return ENXIO; 983 } 984 return 0; 985 } 986 987 static int 988 mxge_update_mac_address(mxge_softc_t *sc) 989 { 990 mxge_cmd_t cmd; 991 uint8_t *addr = sc->mac_addr; 992 993 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 994 (addr[2] << 8) | addr[3]; 995 cmd.data1 = (addr[4] << 8) | (addr[5]); 996 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 997 } 998 999 static int 1000 mxge_change_pause(mxge_softc_t *sc, int pause) 1001 { 1002 mxge_cmd_t cmd; 1003 int status; 1004 1005 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1006 if (pause) 1007 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1008 else 1009 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1010 if (status) { 1011 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1012 return ENXIO; 1013 } 1014 sc->pause = pause; 1015 return 0; 1016 } 1017 1018 static void 1019 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1020 { 1021 mxge_cmd_t cmd; 1022 int status; 1023 1024 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */ 1025 if (mxge_always_promisc) 1026 promisc = 1; 1027 1028 if (promisc) 1029 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1030 else 1031 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1032 if (status) 1033 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1034 } 1035 1036 static void 1037 mxge_set_multicast_list(mxge_softc_t *sc) 1038 { 1039 mxge_cmd_t cmd; 1040 struct ifmultiaddr *ifma; 1041 struct ifnet *ifp = sc->ifp; 1042 int err; 1043 1044 /* This firmware is known to not support multicast */ 1045 if (!sc->fw_multicast_support) 1046 return; 1047 1048 /* Disable multicast filtering while we play with the lists*/ 1049 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1050 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1051 if (err != 0) { 1052 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1053 "error status: %d\n", err); 1054 return; 1055 } 1056 1057 if (sc->adopted_rx_filter_bug) 1058 return; 1059 1060 if (ifp->if_flags & IFF_ALLMULTI) { 1061 /* Request to disable multicast filtering, so quit here */ 1062 return; 1063 } 1064 1065 /* Flush all the filters */ 1066 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1067 if (err != 0) { 1068 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1069 "error status: %d\n", err); 1070 return; 1071 } 1072 1073 /* 1074 * Walk the multicast list, and add each address 1075 */ 1076 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1077 if (ifma->ifma_addr->sa_family != AF_LINK) 1078 continue; 1079 1080 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1081 &cmd.data0, 4); 1082 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1083 &cmd.data1, 2); 1084 cmd.data0 = htonl(cmd.data0); 1085 cmd.data1 = htonl(cmd.data1); 1086 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1087 if (err != 0) { 1088 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1089 "error status: %d\n", err); 1090 /* Abort, leaving multicast filtering off */ 1091 return; 1092 } 1093 } 1094 1095 /* Enable multicast filtering */ 1096 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1097 if (err != 0) { 1098 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1099 "error status: %d\n", err); 1100 } 1101 } 1102 1103 #if 0 1104 static int 1105 mxge_max_mtu(mxge_softc_t *sc) 1106 { 1107 mxge_cmd_t cmd; 1108 int status; 1109 1110 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1111 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1112 1113 /* try to set nbufs to see if it we can 1114 use virtually contiguous jumbos */ 1115 cmd.data0 = 0; 1116 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1117 &cmd); 1118 if (status == 0) 1119 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1120 1121 /* otherwise, we're limited to MJUMPAGESIZE */ 1122 return MJUMPAGESIZE - MXGEFW_PAD; 1123 } 1124 #endif 1125 1126 static int 1127 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1128 { 1129 struct mxge_slice_state *ss; 1130 mxge_rx_done_t *rx_done; 1131 volatile uint32_t *irq_claim; 1132 mxge_cmd_t cmd; 1133 int slice, status, rx_intr_size; 1134 1135 /* 1136 * Try to send a reset command to the card to see if it 1137 * is alive 1138 */ 1139 memset(&cmd, 0, sizeof (cmd)); 1140 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1141 if (status != 0) { 1142 if_printf(sc->ifp, "failed reset\n"); 1143 return ENXIO; 1144 } 1145 1146 mxge_dummy_rdma(sc, 1); 1147 1148 /* 1149 * Set the intrq size 1150 * XXX assume 4byte mcp_slot 1151 */ 1152 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1153 cmd.data0 = rx_intr_size; 1154 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1155 1156 /* 1157 * Even though we already know how many slices are supported 1158 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1159 * has magic side effects, and must be called after a reset. 1160 * It must be called prior to calling any RSS related cmds, 1161 * including assigning an interrupt queue for anything but 1162 * slice 0. It must also be called *after* 1163 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1164 * the firmware to compute offsets. 1165 */ 1166 if (sc->num_slices > 1) { 1167 /* Ask the maximum number of slices it supports */ 1168 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1169 if (status != 0) { 1170 if_printf(sc->ifp, "failed to get number of slices\n"); 1171 return status; 1172 } 1173 1174 /* 1175 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1176 * to setting up the interrupt queue DMA 1177 */ 1178 cmd.data0 = sc->num_slices; 1179 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1180 if (sc->num_tx_rings > 1) 1181 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1183 if (status != 0) { 1184 if_printf(sc->ifp, "failed to set number of slices\n"); 1185 return status; 1186 } 1187 } 1188 1189 if (interrupts_setup) { 1190 /* Now exchange information about interrupts */ 1191 for (slice = 0; slice < sc->num_slices; slice++) { 1192 ss = &sc->ss[slice]; 1193 1194 rx_done = &ss->rx_data.rx_done; 1195 memset(rx_done->entry, 0, rx_intr_size); 1196 1197 cmd.data0 = 1198 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1199 cmd.data1 = 1200 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1201 cmd.data2 = slice; 1202 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1203 &cmd); 1204 } 1205 } 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1208 &cmd); 1209 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1210 1211 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1212 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1213 1214 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1215 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1216 1217 if (status != 0) { 1218 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1219 return status; 1220 } 1221 1222 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1223 1224 /* Run a DMA benchmark */ 1225 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1226 1227 for (slice = 0; slice < sc->num_slices; slice++) { 1228 ss = &sc->ss[slice]; 1229 1230 ss->irq_claim = irq_claim + (2 * slice); 1231 1232 /* Reset mcp/driver shared state back to 0 */ 1233 ss->rx_data.rx_done.idx = 0; 1234 ss->tx.req = 0; 1235 ss->tx.done = 0; 1236 ss->tx.pkt_done = 0; 1237 ss->tx.queue_active = 0; 1238 ss->tx.activate = 0; 1239 ss->tx.deactivate = 0; 1240 ss->rx_data.rx_big.cnt = 0; 1241 ss->rx_data.rx_small.cnt = 0; 1242 if (ss->fw_stats != NULL) 1243 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1244 } 1245 sc->rdma_tags_available = 15; 1246 1247 status = mxge_update_mac_address(sc); 1248 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1249 mxge_change_pause(sc, sc->pause); 1250 mxge_set_multicast_list(sc); 1251 1252 if (sc->throttle) { 1253 cmd.data0 = sc->throttle; 1254 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1255 if_printf(sc->ifp, "can't enable throttle\n"); 1256 } 1257 return status; 1258 } 1259 1260 static int 1261 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1262 { 1263 mxge_cmd_t cmd; 1264 mxge_softc_t *sc; 1265 int err; 1266 unsigned int throttle; 1267 1268 sc = arg1; 1269 throttle = sc->throttle; 1270 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1271 if (err != 0) 1272 return err; 1273 1274 if (throttle == sc->throttle) 1275 return 0; 1276 1277 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1278 return EINVAL; 1279 1280 ifnet_serialize_all(sc->ifp); 1281 1282 cmd.data0 = throttle; 1283 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1284 if (err == 0) 1285 sc->throttle = throttle; 1286 1287 ifnet_deserialize_all(sc->ifp); 1288 return err; 1289 } 1290 1291 static int 1292 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1293 { 1294 mxge_softc_t *sc; 1295 int err, use_rss; 1296 1297 sc = arg1; 1298 use_rss = sc->use_rss; 1299 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1300 if (err != 0) 1301 return err; 1302 1303 if (use_rss == sc->use_rss) 1304 return 0; 1305 1306 ifnet_serialize_all(sc->ifp); 1307 1308 sc->use_rss = use_rss; 1309 if (sc->ifp->if_flags & IFF_RUNNING) { 1310 mxge_close(sc, 0); 1311 mxge_open(sc); 1312 } 1313 1314 ifnet_deserialize_all(sc->ifp); 1315 return err; 1316 } 1317 1318 static int 1319 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1320 { 1321 mxge_softc_t *sc; 1322 unsigned int intr_coal_delay; 1323 int err; 1324 1325 sc = arg1; 1326 intr_coal_delay = sc->intr_coal_delay; 1327 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1328 if (err != 0) 1329 return err; 1330 1331 if (intr_coal_delay == sc->intr_coal_delay) 1332 return 0; 1333 1334 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1335 return EINVAL; 1336 1337 ifnet_serialize_all(sc->ifp); 1338 1339 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1340 sc->intr_coal_delay = intr_coal_delay; 1341 1342 ifnet_deserialize_all(sc->ifp); 1343 return err; 1344 } 1345 1346 static int 1347 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1348 { 1349 int err; 1350 1351 if (arg1 == NULL) 1352 return EFAULT; 1353 arg2 = be32toh(*(int *)arg1); 1354 arg1 = NULL; 1355 err = sysctl_handle_int(oidp, arg1, arg2, req); 1356 1357 return err; 1358 } 1359 1360 static void 1361 mxge_rem_sysctls(mxge_softc_t *sc) 1362 { 1363 if (sc->ss != NULL) { 1364 struct mxge_slice_state *ss; 1365 int slice; 1366 1367 for (slice = 0; slice < sc->num_slices; slice++) { 1368 ss = &sc->ss[slice]; 1369 if (ss->sysctl_tree != NULL) { 1370 sysctl_ctx_free(&ss->sysctl_ctx); 1371 ss->sysctl_tree = NULL; 1372 } 1373 } 1374 } 1375 1376 if (sc->slice_sysctl_tree != NULL) { 1377 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1378 sc->slice_sysctl_tree = NULL; 1379 } 1380 } 1381 1382 static void 1383 mxge_add_sysctls(mxge_softc_t *sc) 1384 { 1385 struct sysctl_ctx_list *ctx; 1386 struct sysctl_oid_list *children; 1387 mcp_irq_data_t *fw; 1388 struct mxge_slice_state *ss; 1389 int slice; 1390 char slice_num[8]; 1391 1392 ctx = device_get_sysctl_ctx(sc->dev); 1393 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1394 fw = sc->ss[0].fw_stats; 1395 1396 /* 1397 * Random information 1398 */ 1399 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1400 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1401 1402 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1403 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1404 1405 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1406 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1407 1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1409 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1410 1411 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1412 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1413 1414 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1415 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1416 1417 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1418 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1419 1420 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1421 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1422 1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1424 CTLFLAG_RD, &sc->read_write_dma, 0, 1425 "DMA concurrent Read/Write speed in MB/s"); 1426 1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1428 CTLFLAG_RD, &sc->watchdog_resets, 0, 1429 "Number of times NIC was reset"); 1430 1431 if (sc->num_slices > 1) { 1432 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "slice_cpumap", 1433 CTLTYPE_OPAQUE | CTLFLAG_RD, sc->ring_map, 0, 1434 if_ringmap_cpumap_sysctl, "I", "slice CPU map"); 1435 } 1436 1437 /* 1438 * Performance related tunables 1439 */ 1440 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1441 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1442 "Interrupt coalescing delay in usecs"); 1443 1444 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1445 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1446 "Transmit throttling"); 1447 1448 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1449 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1450 "Use RSS"); 1451 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1453 CTLFLAG_RW, &mxge_deassert_wait, 0, 1454 "Wait for IRQ line to go low in ihandler"); 1455 1456 /* 1457 * Stats block from firmware is in network byte order. 1458 * Need to swap it 1459 */ 1460 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1461 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1462 mxge_handle_be32, "I", "link up"); 1463 1464 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1465 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1466 mxge_handle_be32, "I", "rdma_tags_available"); 1467 1468 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1469 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1470 mxge_handle_be32, "I", "dropped_bad_crc32"); 1471 1472 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1473 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1474 mxge_handle_be32, "I", "dropped_bad_phy"); 1475 1476 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1477 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1478 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1479 1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1481 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1482 mxge_handle_be32, "I", "dropped_link_overflow"); 1483 1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1485 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1486 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1487 1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1489 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1490 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1491 1492 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1493 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1494 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1495 1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1497 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1498 mxge_handle_be32, "I", "dropped_overrun"); 1499 1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1501 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1502 mxge_handle_be32, "I", "dropped_pause"); 1503 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1505 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1506 mxge_handle_be32, "I", "dropped_runt"); 1507 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1509 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1510 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1511 1512 /* add counters exported for debugging from all slices */ 1513 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1514 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1515 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1516 if (sc->slice_sysctl_tree == NULL) { 1517 device_printf(sc->dev, "can't add slice sysctl node\n"); 1518 return; 1519 } 1520 1521 for (slice = 0; slice < sc->num_slices; slice++) { 1522 ss = &sc->ss[slice]; 1523 sysctl_ctx_init(&ss->sysctl_ctx); 1524 ctx = &ss->sysctl_ctx; 1525 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1526 ksprintf(slice_num, "%d", slice); 1527 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1528 slice_num, CTLFLAG_RD, 0, ""); 1529 if (ss->sysctl_tree == NULL) { 1530 device_printf(sc->dev, 1531 "can't add %d slice sysctl node\n", slice); 1532 return; /* XXX continue? */ 1533 } 1534 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1535 1536 /* 1537 * XXX change to ULONG 1538 */ 1539 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1541 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1542 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1544 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1545 1546 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1547 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1548 1549 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1550 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1551 1552 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1553 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1554 1555 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1556 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1557 1558 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1559 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1560 1561 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1562 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1563 } 1564 } 1565 1566 /* 1567 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1568 * backwards one at a time and handle ring wraps 1569 */ 1570 static __inline void 1571 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1572 mcp_kreq_ether_send_t *src, int cnt) 1573 { 1574 int idx, starting_slot; 1575 1576 starting_slot = tx->req; 1577 while (cnt > 1) { 1578 cnt--; 1579 idx = (starting_slot + cnt) & tx->mask; 1580 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1581 wmb(); 1582 } 1583 } 1584 1585 /* 1586 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1587 * at most 32 bytes at a time, so as to avoid involving the software 1588 * pio handler in the nic. We re-write the first segment's flags 1589 * to mark them valid only after writing the entire chain 1590 */ 1591 static __inline void 1592 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1593 { 1594 int idx, i; 1595 uint32_t *src_ints; 1596 volatile uint32_t *dst_ints; 1597 mcp_kreq_ether_send_t *srcp; 1598 volatile mcp_kreq_ether_send_t *dstp, *dst; 1599 uint8_t last_flags; 1600 1601 idx = tx->req & tx->mask; 1602 1603 last_flags = src->flags; 1604 src->flags = 0; 1605 wmb(); 1606 dst = dstp = &tx->lanai[idx]; 1607 srcp = src; 1608 1609 if ((idx + cnt) < tx->mask) { 1610 for (i = 0; i < cnt - 1; i += 2) { 1611 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1612 wmb(); /* force write every 32 bytes */ 1613 srcp += 2; 1614 dstp += 2; 1615 } 1616 } else { 1617 /* 1618 * Submit all but the first request, and ensure 1619 * that it is submitted below 1620 */ 1621 mxge_submit_req_backwards(tx, src, cnt); 1622 i = 0; 1623 } 1624 if (i < cnt) { 1625 /* Submit the first request */ 1626 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1627 wmb(); /* barrier before setting valid flag */ 1628 } 1629 1630 /* Re-write the last 32-bits with the valid flags */ 1631 src->flags = last_flags; 1632 src_ints = (uint32_t *)src; 1633 src_ints+=3; 1634 dst_ints = (volatile uint32_t *)dst; 1635 dst_ints+=3; 1636 *dst_ints = *src_ints; 1637 tx->req += cnt; 1638 wmb(); 1639 } 1640 1641 static int 1642 mxge_pullup_tso(struct mbuf **mp) 1643 { 1644 int hoff, iphlen, thoff; 1645 struct mbuf *m; 1646 1647 m = *mp; 1648 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1649 1650 iphlen = m->m_pkthdr.csum_iphlen; 1651 thoff = m->m_pkthdr.csum_thlen; 1652 hoff = m->m_pkthdr.csum_lhlen; 1653 1654 KASSERT(iphlen > 0, ("invalid ip hlen")); 1655 KASSERT(thoff > 0, ("invalid tcp hlen")); 1656 KASSERT(hoff > 0, ("invalid ether hlen")); 1657 1658 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1659 m = m_pullup(m, hoff + iphlen + thoff); 1660 if (m == NULL) { 1661 *mp = NULL; 1662 return ENOBUFS; 1663 } 1664 *mp = m; 1665 } 1666 return 0; 1667 } 1668 1669 static int 1670 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1671 struct mbuf *m, int busdma_seg_cnt) 1672 { 1673 mcp_kreq_ether_send_t *req; 1674 bus_dma_segment_t *seg; 1675 uint32_t low, high_swapped; 1676 int len, seglen, cum_len, cum_len_next; 1677 int next_is_first, chop, cnt, rdma_count, small; 1678 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1679 uint8_t flags, flags_next; 1680 struct mxge_buffer_state *info_last; 1681 bus_dmamap_t map = info_map->map; 1682 1683 mss = m->m_pkthdr.tso_segsz; 1684 1685 /* 1686 * Negative cum_len signifies to the send loop that we are 1687 * still in the header portion of the TSO packet. 1688 */ 1689 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1690 m->m_pkthdr.csum_thlen); 1691 1692 /* 1693 * TSO implies checksum offload on this hardware 1694 */ 1695 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1696 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1697 1698 /* 1699 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1700 * out where to put the checksum by parsing the header. 1701 */ 1702 pseudo_hdr_offset = htobe16(mss); 1703 1704 req = tx->req_list; 1705 seg = tx->seg_list; 1706 cnt = 0; 1707 rdma_count = 0; 1708 1709 /* 1710 * "rdma_count" is the number of RDMAs belonging to the current 1711 * packet BEFORE the current send request. For non-TSO packets, 1712 * this is equal to "count". 1713 * 1714 * For TSO packets, rdma_count needs to be reset to 0 after a 1715 * segment cut. 1716 * 1717 * The rdma_count field of the send request is the number of 1718 * RDMAs of the packet starting at that request. For TSO send 1719 * requests with one ore more cuts in the middle, this is the 1720 * number of RDMAs starting after the last cut in the request. 1721 * All previous segments before the last cut implicitly have 1 1722 * RDMA. 1723 * 1724 * Since the number of RDMAs is not known beforehand, it must be 1725 * filled-in retroactively - after each segmentation cut or at 1726 * the end of the entire packet. 1727 */ 1728 1729 while (busdma_seg_cnt) { 1730 /* 1731 * Break the busdma segment up into pieces 1732 */ 1733 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1734 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1735 len = seg->ds_len; 1736 1737 while (len) { 1738 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1739 seglen = len; 1740 cum_len_next = cum_len + seglen; 1741 (req - rdma_count)->rdma_count = rdma_count + 1; 1742 if (__predict_true(cum_len >= 0)) { 1743 /* Payload */ 1744 chop = (cum_len_next > mss); 1745 cum_len_next = cum_len_next % mss; 1746 next_is_first = (cum_len_next == 0); 1747 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1748 flags_next |= 1749 next_is_first * MXGEFW_FLAGS_FIRST; 1750 rdma_count |= -(chop | next_is_first); 1751 rdma_count += chop & !next_is_first; 1752 } else if (cum_len_next >= 0) { 1753 /* Header ends */ 1754 rdma_count = -1; 1755 cum_len_next = 0; 1756 seglen = -cum_len; 1757 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1758 flags_next = MXGEFW_FLAGS_TSO_PLD | 1759 MXGEFW_FLAGS_FIRST | 1760 (small * MXGEFW_FLAGS_SMALL); 1761 } 1762 1763 req->addr_high = high_swapped; 1764 req->addr_low = htobe32(low); 1765 req->pseudo_hdr_offset = pseudo_hdr_offset; 1766 req->pad = 0; 1767 req->rdma_count = 1; 1768 req->length = htobe16(seglen); 1769 req->cksum_offset = cksum_offset; 1770 req->flags = 1771 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1772 low += seglen; 1773 len -= seglen; 1774 cum_len = cum_len_next; 1775 flags = flags_next; 1776 req++; 1777 cnt++; 1778 rdma_count++; 1779 if (__predict_false(cksum_offset > seglen)) 1780 cksum_offset -= seglen; 1781 else 1782 cksum_offset = 0; 1783 if (__predict_false(cnt > tx->max_desc)) 1784 goto drop; 1785 } 1786 busdma_seg_cnt--; 1787 seg++; 1788 } 1789 (req - rdma_count)->rdma_count = rdma_count; 1790 1791 do { 1792 req--; 1793 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1794 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1795 1796 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1797 1798 info_map->map = info_last->map; 1799 info_last->map = map; 1800 info_last->m = m; 1801 1802 mxge_submit_req(tx, tx->req_list, cnt); 1803 1804 if (tx->send_go != NULL && tx->queue_active == 0) { 1805 /* Tell the NIC to start polling this slice */ 1806 *tx->send_go = 1; 1807 tx->queue_active = 1; 1808 tx->activate++; 1809 wmb(); 1810 } 1811 return 0; 1812 1813 drop: 1814 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1815 m_freem(m); 1816 return ENOBUFS; 1817 } 1818 1819 static int 1820 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1821 { 1822 mcp_kreq_ether_send_t *req; 1823 bus_dma_segment_t *seg; 1824 bus_dmamap_t map; 1825 int cnt, cum_len, err, i, idx, odd_flag; 1826 uint16_t pseudo_hdr_offset; 1827 uint8_t flags, cksum_offset; 1828 struct mxge_buffer_state *info_map, *info_last; 1829 1830 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1831 err = mxge_pullup_tso(&m); 1832 if (__predict_false(err)) 1833 return err; 1834 } 1835 1836 /* 1837 * Map the frame for DMA 1838 */ 1839 idx = tx->req & tx->mask; 1840 info_map = &tx->info[idx]; 1841 map = info_map->map; 1842 1843 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1844 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1845 if (__predict_false(err != 0)) 1846 goto drop; 1847 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1848 1849 /* 1850 * TSO is different enough, we handle it in another routine 1851 */ 1852 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1853 return mxge_encap_tso(tx, info_map, m, cnt); 1854 1855 req = tx->req_list; 1856 cksum_offset = 0; 1857 pseudo_hdr_offset = 0; 1858 flags = MXGEFW_FLAGS_NO_TSO; 1859 1860 /* 1861 * Checksum offloading 1862 */ 1863 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1864 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1865 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1866 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1867 req->cksum_offset = cksum_offset; 1868 flags |= MXGEFW_FLAGS_CKSUM; 1869 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1870 } else { 1871 odd_flag = 0; 1872 } 1873 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1874 flags |= MXGEFW_FLAGS_SMALL; 1875 1876 /* 1877 * Convert segments into a request list 1878 */ 1879 cum_len = 0; 1880 seg = tx->seg_list; 1881 req->flags = MXGEFW_FLAGS_FIRST; 1882 for (i = 0; i < cnt; i++) { 1883 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1884 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1885 req->length = htobe16(seg->ds_len); 1886 req->cksum_offset = cksum_offset; 1887 if (cksum_offset > seg->ds_len) 1888 cksum_offset -= seg->ds_len; 1889 else 1890 cksum_offset = 0; 1891 req->pseudo_hdr_offset = pseudo_hdr_offset; 1892 req->pad = 0; /* complete solid 16-byte block */ 1893 req->rdma_count = 1; 1894 req->flags |= flags | ((cum_len & 1) * odd_flag); 1895 cum_len += seg->ds_len; 1896 seg++; 1897 req++; 1898 req->flags = 0; 1899 } 1900 req--; 1901 1902 /* 1903 * Pad runt to 60 bytes 1904 */ 1905 if (cum_len < 60) { 1906 req++; 1907 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1908 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1909 req->length = htobe16(60 - cum_len); 1910 req->cksum_offset = 0; 1911 req->pseudo_hdr_offset = pseudo_hdr_offset; 1912 req->pad = 0; /* complete solid 16-byte block */ 1913 req->rdma_count = 1; 1914 req->flags |= flags | ((cum_len & 1) * odd_flag); 1915 cnt++; 1916 } 1917 1918 tx->req_list[0].rdma_count = cnt; 1919 #if 0 1920 /* print what the firmware will see */ 1921 for (i = 0; i < cnt; i++) { 1922 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1923 "cso:%d, flags:0x%x, rdma:%d\n", 1924 i, (int)ntohl(tx->req_list[i].addr_high), 1925 (int)ntohl(tx->req_list[i].addr_low), 1926 (int)ntohs(tx->req_list[i].length), 1927 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1928 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1929 tx->req_list[i].rdma_count); 1930 } 1931 kprintf("--------------\n"); 1932 #endif 1933 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1934 1935 info_map->map = info_last->map; 1936 info_last->map = map; 1937 info_last->m = m; 1938 1939 mxge_submit_req(tx, tx->req_list, cnt); 1940 1941 if (tx->send_go != NULL && tx->queue_active == 0) { 1942 /* Tell the NIC to start polling this slice */ 1943 *tx->send_go = 1; 1944 tx->queue_active = 1; 1945 tx->activate++; 1946 wmb(); 1947 } 1948 return 0; 1949 1950 drop: 1951 m_freem(m); 1952 return err; 1953 } 1954 1955 static void 1956 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1957 { 1958 mxge_softc_t *sc = ifp->if_softc; 1959 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1960 bus_addr_t zeropad; 1961 int encap = 0; 1962 1963 KKASSERT(tx->ifsq == ifsq); 1964 ASSERT_SERIALIZED(&tx->tx_serialize); 1965 1966 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1967 return; 1968 1969 zeropad = sc->zeropad_dma.dmem_busaddr; 1970 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1971 struct mbuf *m; 1972 int error; 1973 1974 m = ifsq_dequeue(ifsq); 1975 if (m == NULL) 1976 goto done; 1977 1978 BPF_MTAP(ifp, m); 1979 error = mxge_encap(tx, m, zeropad); 1980 if (!error) 1981 encap = 1; 1982 else 1983 IFNET_STAT_INC(ifp, oerrors, 1); 1984 } 1985 1986 /* Ran out of transmit slots */ 1987 ifsq_set_oactive(ifsq); 1988 done: 1989 if (encap) 1990 ifsq_watchdog_set_count(&tx->watchdog, 5); 1991 } 1992 1993 static void 1994 mxge_watchdog(struct ifaltq_subque *ifsq) 1995 { 1996 struct ifnet *ifp = ifsq_get_ifp(ifsq); 1997 struct mxge_softc *sc = ifp->if_softc; 1998 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1999 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 2000 2001 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2002 2003 /* Check for pause blocking before resetting */ 2004 if (tx->watchdog_rx_pause == rx_pause) { 2005 mxge_warn_stuck(sc, tx, 0); 2006 mxge_watchdog_reset(sc); 2007 return; 2008 } else { 2009 if_printf(ifp, "Flow control blocking xmits, " 2010 "check link partner\n"); 2011 } 2012 tx->watchdog_rx_pause = rx_pause; 2013 } 2014 2015 /* 2016 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2017 * at most 32 bytes at a time, so as to avoid involving the software 2018 * pio handler in the nic. We re-write the first segment's low 2019 * DMA address to mark it valid only after we write the entire chunk 2020 * in a burst 2021 */ 2022 static __inline void 2023 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2024 mcp_kreq_ether_recv_t *src) 2025 { 2026 uint32_t low; 2027 2028 low = src->addr_low; 2029 src->addr_low = 0xffffffff; 2030 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2031 wmb(); 2032 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2033 wmb(); 2034 src->addr_low = low; 2035 dst->addr_low = low; 2036 wmb(); 2037 } 2038 2039 static int 2040 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2041 boolean_t init) 2042 { 2043 bus_dma_segment_t seg; 2044 struct mbuf *m; 2045 int cnt, err, mflag; 2046 2047 mflag = M_NOWAIT; 2048 if (__predict_false(init)) 2049 mflag = M_WAITOK; 2050 2051 m = m_gethdr(mflag, MT_DATA); 2052 if (m == NULL) { 2053 err = ENOBUFS; 2054 if (__predict_false(init)) { 2055 /* 2056 * During initialization, there 2057 * is nothing to setup; bail out 2058 */ 2059 return err; 2060 } 2061 goto done; 2062 } 2063 m->m_len = m->m_pkthdr.len = MHLEN; 2064 2065 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2066 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2067 if (err != 0) { 2068 m_freem(m); 2069 if (__predict_false(init)) { 2070 /* 2071 * During initialization, there 2072 * is nothing to setup; bail out 2073 */ 2074 return err; 2075 } 2076 goto done; 2077 } 2078 2079 rx->info[idx].m = m; 2080 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2081 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2082 2083 done: 2084 if ((idx & 7) == 7) 2085 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2086 return err; 2087 } 2088 2089 static int 2090 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2091 boolean_t init) 2092 { 2093 bus_dma_segment_t seg; 2094 struct mbuf *m; 2095 int cnt, err, mflag; 2096 2097 mflag = M_NOWAIT; 2098 if (__predict_false(init)) 2099 mflag = M_WAITOK; 2100 2101 if (rx->cl_size == MCLBYTES) 2102 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2103 else 2104 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2105 if (m == NULL) { 2106 err = ENOBUFS; 2107 if (__predict_false(init)) { 2108 /* 2109 * During initialization, there 2110 * is nothing to setup; bail out 2111 */ 2112 return err; 2113 } 2114 goto done; 2115 } 2116 m->m_len = m->m_pkthdr.len = rx->cl_size; 2117 2118 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2119 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2120 if (err != 0) { 2121 m_freem(m); 2122 if (__predict_false(init)) { 2123 /* 2124 * During initialization, there 2125 * is nothing to setup; bail out 2126 */ 2127 return err; 2128 } 2129 goto done; 2130 } 2131 2132 rx->info[idx].m = m; 2133 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2134 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2135 2136 done: 2137 if ((idx & 7) == 7) 2138 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2139 return err; 2140 } 2141 2142 /* 2143 * Myri10GE hardware checksums are not valid if the sender 2144 * padded the frame with non-zero padding. This is because 2145 * the firmware just does a simple 16-bit 1s complement 2146 * checksum across the entire frame, excluding the first 14 2147 * bytes. It is best to simply to check the checksum and 2148 * tell the stack about it only if the checksum is good 2149 */ 2150 static __inline uint16_t 2151 mxge_rx_csum(struct mbuf *m, int csum) 2152 { 2153 const struct ether_header *eh; 2154 const struct ip *ip; 2155 uint16_t c; 2156 2157 eh = mtod(m, const struct ether_header *); 2158 2159 /* Only deal with IPv4 TCP & UDP for now */ 2160 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2161 return 1; 2162 2163 ip = (const struct ip *)(eh + 1); 2164 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2165 return 1; 2166 2167 #ifdef INET 2168 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2169 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2170 - (ip->ip_hl << 2) + ip->ip_p)); 2171 #else 2172 c = 1; 2173 #endif 2174 c ^= 0xffff; 2175 return c; 2176 } 2177 2178 static void 2179 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2180 { 2181 struct ether_vlan_header *evl; 2182 uint32_t partial; 2183 2184 evl = mtod(m, struct ether_vlan_header *); 2185 2186 /* 2187 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2188 * what the firmware thought was the end of the ethernet 2189 * header. 2190 */ 2191 2192 /* Put checksum into host byte order */ 2193 *csum = ntohs(*csum); 2194 2195 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2196 *csum += ~partial; 2197 *csum += ((*csum) < ~partial); 2198 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2199 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2200 2201 /* 2202 * Restore checksum to network byte order; 2203 * later consumers expect this 2204 */ 2205 *csum = htons(*csum); 2206 2207 /* save the tag */ 2208 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2209 m->m_flags |= M_VLANTAG; 2210 2211 /* 2212 * Remove the 802.1q header by copying the Ethernet 2213 * addresses over it and adjusting the beginning of 2214 * the data in the mbuf. The encapsulated Ethernet 2215 * type field is already in place. 2216 */ 2217 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2218 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2219 m_adj(m, EVL_ENCAPLEN); 2220 } 2221 2222 2223 static __inline void 2224 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2225 uint32_t len, uint32_t csum) 2226 { 2227 struct mbuf *m; 2228 const struct ether_header *eh; 2229 bus_dmamap_t old_map; 2230 int idx; 2231 2232 idx = rx->cnt & rx->mask; 2233 rx->cnt++; 2234 2235 /* Save a pointer to the received mbuf */ 2236 m = rx->info[idx].m; 2237 2238 /* Try to replace the received mbuf */ 2239 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2240 /* Drop the frame -- the old mbuf is re-cycled */ 2241 IFNET_STAT_INC(ifp, ierrors, 1); 2242 return; 2243 } 2244 2245 /* Unmap the received buffer */ 2246 old_map = rx->info[idx].map; 2247 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2248 bus_dmamap_unload(rx->dmat, old_map); 2249 2250 /* Swap the bus_dmamap_t's */ 2251 rx->info[idx].map = rx->extra_map; 2252 rx->extra_map = old_map; 2253 2254 /* 2255 * mcp implicitly skips 1st 2 bytes so that packet is properly 2256 * aligned 2257 */ 2258 m->m_data += MXGEFW_PAD; 2259 2260 m->m_pkthdr.rcvif = ifp; 2261 m->m_len = m->m_pkthdr.len = len; 2262 2263 IFNET_STAT_INC(ifp, ipackets, 1); 2264 2265 eh = mtod(m, const struct ether_header *); 2266 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2267 mxge_vlan_tag_remove(m, &csum); 2268 2269 /* If the checksum is valid, mark it in the mbuf header */ 2270 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2271 mxge_rx_csum(m, csum) == 0) { 2272 /* Tell the stack that the checksum is good */ 2273 m->m_pkthdr.csum_data = 0xffff; 2274 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2275 CSUM_DATA_VALID; 2276 } 2277 ifp->if_input(ifp, m, NULL, -1); 2278 } 2279 2280 static __inline void 2281 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2282 uint32_t len, uint32_t csum) 2283 { 2284 const struct ether_header *eh; 2285 struct mbuf *m; 2286 bus_dmamap_t old_map; 2287 int idx; 2288 2289 idx = rx->cnt & rx->mask; 2290 rx->cnt++; 2291 2292 /* Save a pointer to the received mbuf */ 2293 m = rx->info[idx].m; 2294 2295 /* Try to replace the received mbuf */ 2296 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2297 /* Drop the frame -- the old mbuf is re-cycled */ 2298 IFNET_STAT_INC(ifp, ierrors, 1); 2299 return; 2300 } 2301 2302 /* Unmap the received buffer */ 2303 old_map = rx->info[idx].map; 2304 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2305 bus_dmamap_unload(rx->dmat, old_map); 2306 2307 /* Swap the bus_dmamap_t's */ 2308 rx->info[idx].map = rx->extra_map; 2309 rx->extra_map = old_map; 2310 2311 /* 2312 * mcp implicitly skips 1st 2 bytes so that packet is properly 2313 * aligned 2314 */ 2315 m->m_data += MXGEFW_PAD; 2316 2317 m->m_pkthdr.rcvif = ifp; 2318 m->m_len = m->m_pkthdr.len = len; 2319 2320 IFNET_STAT_INC(ifp, ipackets, 1); 2321 2322 eh = mtod(m, const struct ether_header *); 2323 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2324 mxge_vlan_tag_remove(m, &csum); 2325 2326 /* If the checksum is valid, mark it in the mbuf header */ 2327 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2328 mxge_rx_csum(m, csum) == 0) { 2329 /* Tell the stack that the checksum is good */ 2330 m->m_pkthdr.csum_data = 0xffff; 2331 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2332 CSUM_DATA_VALID; 2333 } 2334 ifp->if_input(ifp, m, NULL, -1); 2335 } 2336 2337 static __inline void 2338 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2339 { 2340 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2341 2342 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2343 uint16_t length, checksum; 2344 2345 length = ntohs(rx_done->entry[rx_done->idx].length); 2346 rx_done->entry[rx_done->idx].length = 0; 2347 2348 checksum = rx_done->entry[rx_done->idx].checksum; 2349 2350 if (length <= MXGE_RX_SMALL_BUFLEN) { 2351 mxge_rx_done_small(ifp, &rx_data->rx_small, 2352 length, checksum); 2353 } else { 2354 mxge_rx_done_big(ifp, &rx_data->rx_big, 2355 length, checksum); 2356 } 2357 2358 rx_done->idx++; 2359 rx_done->idx &= rx_done->mask; 2360 --cycle; 2361 } 2362 } 2363 2364 static __inline void 2365 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2366 { 2367 ASSERT_SERIALIZED(&tx->tx_serialize); 2368 2369 while (tx->pkt_done != mcp_idx) { 2370 struct mbuf *m; 2371 int idx; 2372 2373 idx = tx->done & tx->mask; 2374 tx->done++; 2375 2376 m = tx->info[idx].m; 2377 /* 2378 * mbuf and DMA map only attached to the first 2379 * segment per-mbuf. 2380 */ 2381 if (m != NULL) { 2382 tx->pkt_done++; 2383 IFNET_STAT_INC(ifp, opackets, 1); 2384 tx->info[idx].m = NULL; 2385 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2386 m_freem(m); 2387 } 2388 } 2389 2390 /* 2391 * If we have space, clear OACTIVE to tell the stack that 2392 * its OK to send packets 2393 */ 2394 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2395 ifsq_clr_oactive(tx->ifsq); 2396 if (tx->req == tx->done) { 2397 /* Reset watchdog */ 2398 ifsq_watchdog_set_count(&tx->watchdog, 0); 2399 } 2400 } 2401 2402 if (!ifsq_is_empty(tx->ifsq)) 2403 ifsq_devstart(tx->ifsq); 2404 2405 if (tx->send_stop != NULL && tx->req == tx->done) { 2406 /* 2407 * Let the NIC stop polling this queue, since there 2408 * are no more transmits pending 2409 */ 2410 *tx->send_stop = 1; 2411 tx->queue_active = 0; 2412 tx->deactivate++; 2413 wmb(); 2414 } 2415 } 2416 2417 static struct mxge_media_type mxge_xfp_media_types[] = { 2418 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2419 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2420 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2421 {IFM_NONE, (1 << 5), "10GBASE-ER"}, 2422 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2423 {IFM_NONE, (1 << 3), "10GBASE-SW"}, 2424 {IFM_NONE, (1 << 2), "10GBASE-LW"}, 2425 {IFM_NONE, (1 << 1), "10GBASE-EW"}, 2426 {IFM_NONE, (1 << 0), "Reserved"} 2427 }; 2428 2429 static struct mxge_media_type mxge_sfp_media_types[] = { 2430 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2431 {IFM_NONE, (1 << 7), "Reserved"}, 2432 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2433 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2434 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2435 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2436 }; 2437 2438 static void 2439 mxge_media_set(mxge_softc_t *sc, int media_type) 2440 { 2441 int fc_opt = 0; 2442 2443 if (media_type == IFM_NONE) 2444 return; 2445 2446 if (sc->pause) 2447 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 2448 2449 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL); 2450 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt); 2451 2452 sc->current_media = media_type; 2453 } 2454 2455 static void 2456 mxge_media_unset(mxge_softc_t *sc) 2457 { 2458 ifmedia_removeall(&sc->media); 2459 sc->current_media = IFM_NONE; 2460 } 2461 2462 static void 2463 mxge_media_init(mxge_softc_t *sc) 2464 { 2465 const char *ptr; 2466 int i; 2467 2468 mxge_media_unset(sc); 2469 2470 /* 2471 * Parse the product code to deterimine the interface type 2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2473 * after the 3rd dash in the driver's cached copy of the 2474 * EEPROM's product code string. 2475 */ 2476 ptr = sc->product_code_string; 2477 if (ptr == NULL) { 2478 if_printf(sc->ifp, "Missing product code\n"); 2479 return; 2480 } 2481 2482 for (i = 0; i < 3; i++, ptr++) { 2483 ptr = strchr(ptr, '-'); 2484 if (ptr == NULL) { 2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2486 return; 2487 } 2488 } 2489 if (*ptr == 'C' || *(ptr +1) == 'C') { 2490 /* -C is CX4 */ 2491 sc->connector = MXGE_CX4; 2492 mxge_media_set(sc, IFM_10G_CX4); 2493 } else if (*ptr == 'Q') { 2494 /* -Q is Quad Ribbon Fiber */ 2495 sc->connector = MXGE_QRF; 2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2497 /* DragonFly has no media type for Quad ribbon fiber */ 2498 } else if (*ptr == 'R') { 2499 /* -R is XFP */ 2500 sc->connector = MXGE_XFP; 2501 /* NOTE: ifmedia will be installed later */ 2502 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2503 /* -S or -2S is SFP+ */ 2504 sc->connector = MXGE_SFP; 2505 /* NOTE: ifmedia will be installed later */ 2506 } else { 2507 sc->connector = MXGE_UNK; 2508 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2509 } 2510 } 2511 2512 /* 2513 * Determine the media type for a NIC. Some XFPs will identify 2514 * themselves only when their link is up, so this is initiated via a 2515 * link up interrupt. However, this can potentially take up to 2516 * several milliseconds, so it is run via the watchdog routine, rather 2517 * than in the interrupt handler itself. 2518 */ 2519 static void 2520 mxge_media_probe(mxge_softc_t *sc) 2521 { 2522 mxge_cmd_t cmd; 2523 const char *cage_type; 2524 struct mxge_media_type *mxge_media_types = NULL; 2525 int i, err, ms, mxge_media_type_entries; 2526 uint32_t byte; 2527 2528 sc->need_media_probe = 0; 2529 2530 if (sc->connector == MXGE_XFP) { 2531 /* -R is XFP */ 2532 mxge_media_types = mxge_xfp_media_types; 2533 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2534 byte = MXGE_XFP_COMPLIANCE_BYTE; 2535 cage_type = "XFP"; 2536 } else if (sc->connector == MXGE_SFP) { 2537 /* -S or -2S is SFP+ */ 2538 mxge_media_types = mxge_sfp_media_types; 2539 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2540 cage_type = "SFP+"; 2541 byte = 3; 2542 } else { 2543 /* nothing to do; media type cannot change */ 2544 return; 2545 } 2546 2547 /* 2548 * At this point we know the NIC has an XFP cage, so now we 2549 * try to determine what is in the cage by using the 2550 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2551 * register. We read just one byte, which may take over 2552 * a millisecond 2553 */ 2554 2555 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 2556 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2557 cmd.data1 = byte; 2558 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2559 if (err != MXGEFW_CMD_OK) { 2560 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2561 if_printf(sc->ifp, "failed to read XFP\n"); 2562 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2563 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2564 else 2565 if_printf(sc->ifp, "I2C read failed, err: %d", err); 2566 mxge_media_unset(sc); 2567 return; 2568 } 2569 2570 /* Now we wait for the data to be cached */ 2571 cmd.data0 = byte; 2572 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2573 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2574 DELAY(1000); 2575 cmd.data0 = byte; 2576 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2577 } 2578 if (err != MXGEFW_CMD_OK) { 2579 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2580 cage_type, err, ms); 2581 mxge_media_unset(sc); 2582 return; 2583 } 2584 2585 if (cmd.data0 == mxge_media_types[0].bitmask) { 2586 if (bootverbose) { 2587 if_printf(sc->ifp, "%s:%s\n", cage_type, 2588 mxge_media_types[0].name); 2589 } 2590 if (sc->current_media != mxge_media_types[0].flag) { 2591 mxge_media_unset(sc); 2592 mxge_media_set(sc, mxge_media_types[0].flag); 2593 } 2594 return; 2595 } 2596 for (i = 1; i < mxge_media_type_entries; i++) { 2597 if (cmd.data0 & mxge_media_types[i].bitmask) { 2598 if (bootverbose) { 2599 if_printf(sc->ifp, "%s:%s\n", cage_type, 2600 mxge_media_types[i].name); 2601 } 2602 2603 if (sc->current_media != mxge_media_types[i].flag) { 2604 mxge_media_unset(sc); 2605 mxge_media_set(sc, mxge_media_types[i].flag); 2606 } 2607 return; 2608 } 2609 } 2610 mxge_media_unset(sc); 2611 if (bootverbose) { 2612 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2613 cmd.data0); 2614 } 2615 } 2616 2617 static void 2618 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2619 { 2620 if (sc->link_state != stats->link_up) { 2621 sc->link_state = stats->link_up; 2622 if (sc->link_state) { 2623 sc->ifp->if_link_state = LINK_STATE_UP; 2624 if_link_state_change(sc->ifp); 2625 if (bootverbose) 2626 if_printf(sc->ifp, "link up\n"); 2627 } else { 2628 sc->ifp->if_link_state = LINK_STATE_DOWN; 2629 if_link_state_change(sc->ifp); 2630 if (bootverbose) 2631 if_printf(sc->ifp, "link down\n"); 2632 } 2633 sc->need_media_probe = 1; 2634 } 2635 2636 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2637 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2638 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2639 sc->rdma_tags_available); 2640 } 2641 2642 if (stats->link_down) { 2643 sc->down_cnt += stats->link_down; 2644 sc->link_state = 0; 2645 sc->ifp->if_link_state = LINK_STATE_DOWN; 2646 if_link_state_change(sc->ifp); 2647 } 2648 } 2649 2650 static void 2651 mxge_serialize_skipmain(struct mxge_softc *sc) 2652 { 2653 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2654 } 2655 2656 static void 2657 mxge_deserialize_skipmain(struct mxge_softc *sc) 2658 { 2659 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2660 } 2661 2662 static void 2663 mxge_legacy(void *arg) 2664 { 2665 struct mxge_slice_state *ss = arg; 2666 mxge_softc_t *sc = ss->sc; 2667 mcp_irq_data_t *stats = ss->fw_stats; 2668 mxge_tx_ring_t *tx = &ss->tx; 2669 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2670 uint32_t send_done_count; 2671 uint8_t valid; 2672 2673 ASSERT_SERIALIZED(&sc->main_serialize); 2674 2675 /* Make sure the DMA has finished */ 2676 if (!stats->valid) 2677 return; 2678 valid = stats->valid; 2679 2680 /* Lower legacy IRQ */ 2681 *sc->irq_deassert = 0; 2682 if (!mxge_deassert_wait) { 2683 /* Don't wait for conf. that irq is low */ 2684 stats->valid = 0; 2685 } 2686 2687 mxge_serialize_skipmain(sc); 2688 2689 /* 2690 * Loop while waiting for legacy irq deassertion 2691 * XXX do we really want to loop? 2692 */ 2693 do { 2694 /* Check for transmit completes and receives */ 2695 send_done_count = be32toh(stats->send_done_count); 2696 while ((send_done_count != tx->pkt_done) || 2697 (rx_done->entry[rx_done->idx].length != 0)) { 2698 if (send_done_count != tx->pkt_done) { 2699 mxge_tx_done(&sc->arpcom.ac_if, tx, 2700 (int)send_done_count); 2701 } 2702 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2703 send_done_count = be32toh(stats->send_done_count); 2704 } 2705 if (mxge_deassert_wait) 2706 wmb(); 2707 } while (*((volatile uint8_t *)&stats->valid)); 2708 2709 mxge_deserialize_skipmain(sc); 2710 2711 /* Fw link & error stats meaningful only on the first slice */ 2712 if (__predict_false(stats->stats_updated)) 2713 mxge_intr_status(sc, stats); 2714 2715 /* Check to see if we have rx token to pass back */ 2716 if (valid & 0x1) 2717 *ss->irq_claim = be32toh(3); 2718 *(ss->irq_claim + 1) = be32toh(3); 2719 } 2720 2721 static void 2722 mxge_msi(void *arg) 2723 { 2724 struct mxge_slice_state *ss = arg; 2725 mxge_softc_t *sc = ss->sc; 2726 mcp_irq_data_t *stats = ss->fw_stats; 2727 mxge_tx_ring_t *tx = &ss->tx; 2728 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2729 uint32_t send_done_count; 2730 uint8_t valid; 2731 #ifndef IFPOLL_ENABLE 2732 const boolean_t polling = FALSE; 2733 #else 2734 boolean_t polling = FALSE; 2735 #endif 2736 2737 ASSERT_SERIALIZED(&sc->main_serialize); 2738 2739 /* Make sure the DMA has finished */ 2740 if (__predict_false(!stats->valid)) 2741 return; 2742 2743 valid = stats->valid; 2744 stats->valid = 0; 2745 2746 #ifdef IFPOLL_ENABLE 2747 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2748 polling = TRUE; 2749 #endif 2750 2751 if (!polling) { 2752 /* Check for receives */ 2753 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2754 if (rx_done->entry[rx_done->idx].length != 0) 2755 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2756 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2757 } 2758 2759 /* 2760 * Check for transmit completes 2761 * 2762 * NOTE: 2763 * Since pkt_done is only changed by mxge_tx_done(), 2764 * which is called only in interrupt handler, the 2765 * check w/o holding tx serializer is MPSAFE. 2766 */ 2767 send_done_count = be32toh(stats->send_done_count); 2768 if (send_done_count != tx->pkt_done) { 2769 lwkt_serialize_enter(&tx->tx_serialize); 2770 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2771 lwkt_serialize_exit(&tx->tx_serialize); 2772 } 2773 2774 if (__predict_false(stats->stats_updated)) 2775 mxge_intr_status(sc, stats); 2776 2777 /* Check to see if we have rx token to pass back */ 2778 if (!polling && (valid & 0x1)) 2779 *ss->irq_claim = be32toh(3); 2780 *(ss->irq_claim + 1) = be32toh(3); 2781 } 2782 2783 static void 2784 mxge_msix_rx(void *arg) 2785 { 2786 struct mxge_slice_state *ss = arg; 2787 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2788 2789 #ifdef IFPOLL_ENABLE 2790 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2791 return; 2792 #endif 2793 2794 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2795 2796 if (rx_done->entry[rx_done->idx].length != 0) 2797 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2798 2799 *ss->irq_claim = be32toh(3); 2800 } 2801 2802 static void 2803 mxge_msix_rxtx(void *arg) 2804 { 2805 struct mxge_slice_state *ss = arg; 2806 mxge_softc_t *sc = ss->sc; 2807 mcp_irq_data_t *stats = ss->fw_stats; 2808 mxge_tx_ring_t *tx = &ss->tx; 2809 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2810 uint32_t send_done_count; 2811 uint8_t valid; 2812 #ifndef IFPOLL_ENABLE 2813 const boolean_t polling = FALSE; 2814 #else 2815 boolean_t polling = FALSE; 2816 #endif 2817 2818 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2819 2820 /* Make sure the DMA has finished */ 2821 if (__predict_false(!stats->valid)) 2822 return; 2823 2824 valid = stats->valid; 2825 stats->valid = 0; 2826 2827 #ifdef IFPOLL_ENABLE 2828 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2829 polling = TRUE; 2830 #endif 2831 2832 /* Check for receives */ 2833 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2834 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2835 2836 /* 2837 * Check for transmit completes 2838 * 2839 * NOTE: 2840 * Since pkt_done is only changed by mxge_tx_done(), 2841 * which is called only in interrupt handler, the 2842 * check w/o holding tx serializer is MPSAFE. 2843 */ 2844 send_done_count = be32toh(stats->send_done_count); 2845 if (send_done_count != tx->pkt_done) { 2846 lwkt_serialize_enter(&tx->tx_serialize); 2847 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2848 lwkt_serialize_exit(&tx->tx_serialize); 2849 } 2850 2851 /* Check to see if we have rx token to pass back */ 2852 if (!polling && (valid & 0x1)) 2853 *ss->irq_claim = be32toh(3); 2854 *(ss->irq_claim + 1) = be32toh(3); 2855 } 2856 2857 static void 2858 mxge_init(void *arg) 2859 { 2860 struct mxge_softc *sc = arg; 2861 2862 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2863 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2864 mxge_open(sc); 2865 } 2866 2867 static void 2868 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2869 { 2870 int i; 2871 2872 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2873 if (ss->rx_data.rx_big.info[i].m == NULL) 2874 continue; 2875 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2876 ss->rx_data.rx_big.info[i].map); 2877 m_freem(ss->rx_data.rx_big.info[i].m); 2878 ss->rx_data.rx_big.info[i].m = NULL; 2879 } 2880 2881 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2882 if (ss->rx_data.rx_small.info[i].m == NULL) 2883 continue; 2884 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2885 ss->rx_data.rx_small.info[i].map); 2886 m_freem(ss->rx_data.rx_small.info[i].m); 2887 ss->rx_data.rx_small.info[i].m = NULL; 2888 } 2889 2890 /* Transmit ring used only on the first slice */ 2891 if (ss->tx.info == NULL) 2892 return; 2893 2894 for (i = 0; i <= ss->tx.mask; i++) { 2895 if (ss->tx.info[i].m == NULL) 2896 continue; 2897 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2898 m_freem(ss->tx.info[i].m); 2899 ss->tx.info[i].m = NULL; 2900 } 2901 } 2902 2903 static void 2904 mxge_free_mbufs(mxge_softc_t *sc) 2905 { 2906 int slice; 2907 2908 for (slice = 0; slice < sc->num_slices; slice++) 2909 mxge_free_slice_mbufs(&sc->ss[slice]); 2910 } 2911 2912 static void 2913 mxge_free_slice_rings(struct mxge_slice_state *ss) 2914 { 2915 int i; 2916 2917 if (ss->rx_data.rx_done.entry != NULL) { 2918 mxge_dma_free(&ss->rx_done_dma); 2919 ss->rx_data.rx_done.entry = NULL; 2920 } 2921 2922 if (ss->tx.req_list != NULL) { 2923 kfree(ss->tx.req_list, M_DEVBUF); 2924 ss->tx.req_list = NULL; 2925 } 2926 2927 if (ss->tx.seg_list != NULL) { 2928 kfree(ss->tx.seg_list, M_DEVBUF); 2929 ss->tx.seg_list = NULL; 2930 } 2931 2932 if (ss->rx_data.rx_small.shadow != NULL) { 2933 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2934 ss->rx_data.rx_small.shadow = NULL; 2935 } 2936 2937 if (ss->rx_data.rx_big.shadow != NULL) { 2938 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2939 ss->rx_data.rx_big.shadow = NULL; 2940 } 2941 2942 if (ss->tx.info != NULL) { 2943 if (ss->tx.dmat != NULL) { 2944 for (i = 0; i <= ss->tx.mask; i++) { 2945 bus_dmamap_destroy(ss->tx.dmat, 2946 ss->tx.info[i].map); 2947 } 2948 bus_dma_tag_destroy(ss->tx.dmat); 2949 } 2950 kfree(ss->tx.info, M_DEVBUF); 2951 ss->tx.info = NULL; 2952 } 2953 2954 if (ss->rx_data.rx_small.info != NULL) { 2955 if (ss->rx_data.rx_small.dmat != NULL) { 2956 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2957 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2958 ss->rx_data.rx_small.info[i].map); 2959 } 2960 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2961 ss->rx_data.rx_small.extra_map); 2962 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2963 } 2964 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2965 ss->rx_data.rx_small.info = NULL; 2966 } 2967 2968 if (ss->rx_data.rx_big.info != NULL) { 2969 if (ss->rx_data.rx_big.dmat != NULL) { 2970 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2971 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2972 ss->rx_data.rx_big.info[i].map); 2973 } 2974 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2975 ss->rx_data.rx_big.extra_map); 2976 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2977 } 2978 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2979 ss->rx_data.rx_big.info = NULL; 2980 } 2981 } 2982 2983 static void 2984 mxge_free_rings(mxge_softc_t *sc) 2985 { 2986 int slice; 2987 2988 if (sc->ss == NULL) 2989 return; 2990 2991 for (slice = 0; slice < sc->num_slices; slice++) 2992 mxge_free_slice_rings(&sc->ss[slice]); 2993 } 2994 2995 static int 2996 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2997 int tx_ring_entries) 2998 { 2999 mxge_softc_t *sc = ss->sc; 3000 size_t bytes; 3001 int err, i; 3002 3003 /* 3004 * Allocate per-slice receive resources 3005 */ 3006 3007 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 3008 rx_ring_entries - 1; 3009 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3010 3011 /* Allocate the rx shadow rings */ 3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3013 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3014 3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3016 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3017 3018 /* Allocate the rx host info rings */ 3019 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3020 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3021 3022 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3023 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3024 3025 /* Allocate the rx busdma resources */ 3026 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3027 1, /* alignment */ 3028 4096, /* boundary */ 3029 BUS_SPACE_MAXADDR, /* low */ 3030 BUS_SPACE_MAXADDR, /* high */ 3031 NULL, NULL, /* filter */ 3032 MHLEN, /* maxsize */ 3033 1, /* num segs */ 3034 MHLEN, /* maxsegsize */ 3035 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3036 /* flags */ 3037 &ss->rx_data.rx_small.dmat); /* tag */ 3038 if (err != 0) { 3039 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3040 err); 3041 return err; 3042 } 3043 3044 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3045 &ss->rx_data.rx_small.extra_map); 3046 if (err != 0) { 3047 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3048 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3049 ss->rx_data.rx_small.dmat = NULL; 3050 return err; 3051 } 3052 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3053 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3054 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3055 if (err != 0) { 3056 int j; 3057 3058 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3059 3060 for (j = 0; j < i; ++j) { 3061 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3062 ss->rx_data.rx_small.info[j].map); 3063 } 3064 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3065 ss->rx_data.rx_small.extra_map); 3066 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3067 ss->rx_data.rx_small.dmat = NULL; 3068 return err; 3069 } 3070 } 3071 3072 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3073 1, /* alignment */ 3074 4096, /* boundary */ 3075 BUS_SPACE_MAXADDR, /* low */ 3076 BUS_SPACE_MAXADDR, /* high */ 3077 NULL, NULL, /* filter */ 3078 4096, /* maxsize */ 3079 1, /* num segs */ 3080 4096, /* maxsegsize*/ 3081 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3082 /* flags */ 3083 &ss->rx_data.rx_big.dmat); /* tag */ 3084 if (err != 0) { 3085 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3086 err); 3087 return err; 3088 } 3089 3090 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3091 &ss->rx_data.rx_big.extra_map); 3092 if (err != 0) { 3093 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3094 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3095 ss->rx_data.rx_big.dmat = NULL; 3096 return err; 3097 } 3098 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3099 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3100 &ss->rx_data.rx_big.info[i].map); 3101 if (err != 0) { 3102 int j; 3103 3104 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3105 for (j = 0; j < i; ++j) { 3106 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3107 ss->rx_data.rx_big.info[j].map); 3108 } 3109 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3110 ss->rx_data.rx_big.extra_map); 3111 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3112 ss->rx_data.rx_big.dmat = NULL; 3113 return err; 3114 } 3115 } 3116 3117 /* 3118 * Now allocate TX resources 3119 */ 3120 3121 ss->tx.mask = tx_ring_entries - 1; 3122 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3123 3124 /* 3125 * Allocate the tx request copy block; MUST be at least 8 bytes 3126 * aligned 3127 */ 3128 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3129 ss->tx.req_list = kmalloc(__VM_CACHELINE_ALIGN(bytes), 3130 M_DEVBUF, 3131 M_WAITOK | M_CACHEALIGN); 3132 3133 /* Allocate the tx busdma segment list */ 3134 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3135 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3136 3137 /* Allocate the tx host info ring */ 3138 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3139 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3140 3141 /* Allocate the tx busdma resources */ 3142 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3143 1, /* alignment */ 3144 sc->tx_boundary, /* boundary */ 3145 BUS_SPACE_MAXADDR, /* low */ 3146 BUS_SPACE_MAXADDR, /* high */ 3147 NULL, NULL, /* filter */ 3148 IP_MAXPACKET + 3149 sizeof(struct ether_vlan_header), 3150 /* maxsize */ 3151 ss->tx.max_desc - 2, /* num segs */ 3152 sc->tx_boundary, /* maxsegsz */ 3153 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3154 BUS_DMA_ONEBPAGE, /* flags */ 3155 &ss->tx.dmat); /* tag */ 3156 if (err != 0) { 3157 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3158 return err; 3159 } 3160 3161 /* 3162 * Now use these tags to setup DMA maps for each slot in the ring 3163 */ 3164 for (i = 0; i <= ss->tx.mask; i++) { 3165 err = bus_dmamap_create(ss->tx.dmat, 3166 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3167 if (err != 0) { 3168 int j; 3169 3170 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3171 for (j = 0; j < i; ++j) { 3172 bus_dmamap_destroy(ss->tx.dmat, 3173 ss->tx.info[j].map); 3174 } 3175 bus_dma_tag_destroy(ss->tx.dmat); 3176 ss->tx.dmat = NULL; 3177 return err; 3178 } 3179 } 3180 return 0; 3181 } 3182 3183 static int 3184 mxge_alloc_rings(mxge_softc_t *sc) 3185 { 3186 mxge_cmd_t cmd; 3187 int tx_ring_size; 3188 int tx_ring_entries, rx_ring_entries; 3189 int err, slice; 3190 3191 /* Get ring sizes */ 3192 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3193 if (err != 0) { 3194 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3195 return err; 3196 } 3197 tx_ring_size = cmd.data0; 3198 3199 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3200 rx_ring_entries = sc->rx_intr_slots / 2; 3201 3202 if (bootverbose) { 3203 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3204 tx_ring_entries, rx_ring_entries); 3205 } 3206 3207 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices; 3208 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters; 3209 3210 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3211 ifq_set_ready(&sc->ifp->if_snd); 3212 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3213 3214 if (sc->num_tx_rings > 1) { 3215 sc->ifp->if_mapsubq = ifq_mapsubq_modulo; 3216 ifq_set_subq_divisor(&sc->ifp->if_snd, sc->num_tx_rings); 3217 } 3218 3219 for (slice = 0; slice < sc->num_slices; slice++) { 3220 err = mxge_alloc_slice_rings(&sc->ss[slice], 3221 rx_ring_entries, tx_ring_entries); 3222 if (err != 0) { 3223 device_printf(sc->dev, 3224 "alloc %d slice rings failed\n", slice); 3225 return err; 3226 } 3227 } 3228 return 0; 3229 } 3230 3231 static void 3232 mxge_choose_params(int mtu, int *cl_size) 3233 { 3234 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3235 3236 if (bufsize < MCLBYTES) { 3237 *cl_size = MCLBYTES; 3238 } else { 3239 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3240 *cl_size = MJUMPAGESIZE; 3241 } 3242 } 3243 3244 static int 3245 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3246 { 3247 mxge_cmd_t cmd; 3248 int err, i, slice; 3249 3250 slice = ss - ss->sc->ss; 3251 3252 /* 3253 * Get the lanai pointers to the send and receive rings 3254 */ 3255 err = 0; 3256 3257 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 3258 if (ss->sc->num_tx_rings == 1) { 3259 if (slice == 0) { 3260 cmd.data0 = slice; 3261 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3262 &cmd); 3263 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3264 (ss->sc->sram + cmd.data0); 3265 /* Leave send_go and send_stop as NULL */ 3266 } 3267 } else { 3268 cmd.data0 = slice; 3269 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3270 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3271 (ss->sc->sram + cmd.data0); 3272 ss->tx.send_go = (volatile uint32_t *) 3273 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3274 ss->tx.send_stop = (volatile uint32_t *) 3275 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3276 } 3277 3278 cmd.data0 = slice; 3279 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3280 ss->rx_data.rx_small.lanai = 3281 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3282 3283 cmd.data0 = slice; 3284 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3285 ss->rx_data.rx_big.lanai = 3286 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3287 3288 if (err != 0) { 3289 if_printf(ss->sc->ifp, 3290 "failed to get ring sizes or locations\n"); 3291 return EIO; 3292 } 3293 3294 /* 3295 * Stock small receive ring 3296 */ 3297 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3298 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3299 ss->rx_data.rx_small.info[i].map, i, TRUE); 3300 if (err) { 3301 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3302 ss->rx_data.rx_small.mask + 1); 3303 return ENOMEM; 3304 } 3305 } 3306 3307 /* 3308 * Stock big receive ring 3309 */ 3310 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3311 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3312 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3313 } 3314 3315 ss->rx_data.rx_big.cl_size = cl_size; 3316 3317 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3318 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3319 ss->rx_data.rx_big.info[i].map, i, TRUE); 3320 if (err) { 3321 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3322 ss->rx_data.rx_big.mask + 1); 3323 return ENOMEM; 3324 } 3325 } 3326 return 0; 3327 } 3328 3329 static int 3330 mxge_open(mxge_softc_t *sc) 3331 { 3332 struct ifnet *ifp = sc->ifp; 3333 mxge_cmd_t cmd; 3334 int err, slice, cl_size, i; 3335 bus_addr_t bus; 3336 volatile uint8_t *itable; 3337 struct mxge_slice_state *ss; 3338 3339 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3340 3341 /* Copy the MAC address in case it was overridden */ 3342 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3343 3344 err = mxge_reset(sc, 1); 3345 if (err != 0) { 3346 if_printf(ifp, "failed to reset\n"); 3347 return EIO; 3348 } 3349 3350 if (sc->num_slices > 1) { 3351 /* 3352 * Setup the indirect table. 3353 */ 3354 if_ringmap_rdrtable(sc->ring_map, sc->rdr_table, NETISR_CPUMAX); 3355 3356 cmd.data0 = NETISR_CPUMAX; 3357 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3358 3359 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3360 if (err != 0) { 3361 if_printf(ifp, "failed to setup rss tables\n"); 3362 return err; 3363 } 3364 3365 itable = sc->sram + cmd.data0; 3366 for (i = 0; i < NETISR_CPUMAX; i++) 3367 itable[i] = sc->rdr_table[i]; 3368 3369 if (sc->use_rss) { 3370 volatile uint8_t *hwkey; 3371 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3372 3373 /* 3374 * Setup Toeplitz key. 3375 */ 3376 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3377 &cmd); 3378 if (err != 0) { 3379 if_printf(ifp, "failed to get rsskey\n"); 3380 return err; 3381 } 3382 hwkey = sc->sram + cmd.data0; 3383 3384 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3385 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3386 hwkey[i] = swkey[i]; 3387 wmb(); 3388 3389 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3390 &cmd); 3391 if (err != 0) { 3392 if_printf(ifp, "failed to update rsskey\n"); 3393 return err; 3394 } 3395 if (bootverbose) 3396 if_printf(ifp, "RSS key updated\n"); 3397 } 3398 3399 cmd.data0 = 1; 3400 if (sc->use_rss) { 3401 if (bootverbose) 3402 if_printf(ifp, "input hash: RSS\n"); 3403 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3404 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3405 } else { 3406 if (bootverbose) 3407 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3408 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3409 } 3410 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3411 if (err != 0) { 3412 if_printf(ifp, "failed to enable slices\n"); 3413 return err; 3414 } 3415 } 3416 3417 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3418 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3419 if (err) { 3420 /* 3421 * Can't change TSO mode to NDIS, never allow TSO then 3422 */ 3423 if_printf(ifp, "failed to set TSO mode\n"); 3424 ifp->if_capenable &= ~IFCAP_TSO; 3425 ifp->if_capabilities &= ~IFCAP_TSO; 3426 ifp->if_hwassist &= ~CSUM_TSO; 3427 } 3428 3429 mxge_choose_params(ifp->if_mtu, &cl_size); 3430 3431 cmd.data0 = 1; 3432 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3433 /* 3434 * Error is only meaningful if we're trying to set 3435 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3436 */ 3437 3438 /* 3439 * Give the firmware the mtu and the big and small buffer 3440 * sizes. The firmware wants the big buf size to be a power 3441 * of two. Luckily, DragonFly's clusters are powers of two 3442 */ 3443 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3444 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3445 3446 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3447 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3448 3449 cmd.data0 = cl_size; 3450 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3451 3452 if (err != 0) { 3453 if_printf(ifp, "failed to setup params\n"); 3454 goto abort; 3455 } 3456 3457 /* Now give him the pointer to the stats block */ 3458 for (slice = 0; slice < sc->num_slices; slice++) { 3459 ss = &sc->ss[slice]; 3460 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3461 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3462 cmd.data2 = sizeof(struct mcp_irq_data); 3463 cmd.data2 |= (slice << 16); 3464 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3465 } 3466 3467 if (err != 0) { 3468 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3469 bus += offsetof(struct mcp_irq_data, send_done_count); 3470 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3471 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3472 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3473 &cmd); 3474 3475 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3476 sc->fw_multicast_support = 0; 3477 } else { 3478 sc->fw_multicast_support = 1; 3479 } 3480 3481 if (err != 0) { 3482 if_printf(ifp, "failed to setup params\n"); 3483 goto abort; 3484 } 3485 3486 for (slice = 0; slice < sc->num_slices; slice++) { 3487 err = mxge_slice_open(&sc->ss[slice], cl_size); 3488 if (err != 0) { 3489 if_printf(ifp, "couldn't open slice %d\n", slice); 3490 goto abort; 3491 } 3492 } 3493 3494 /* Finally, start the firmware running */ 3495 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3496 if (err) { 3497 if_printf(ifp, "Couldn't bring up link\n"); 3498 goto abort; 3499 } 3500 3501 ifp->if_flags |= IFF_RUNNING; 3502 for (i = 0; i < sc->num_tx_rings; ++i) { 3503 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3504 3505 ifsq_clr_oactive(tx->ifsq); 3506 ifsq_watchdog_start(&tx->watchdog); 3507 } 3508 3509 return 0; 3510 3511 abort: 3512 mxge_free_mbufs(sc); 3513 return err; 3514 } 3515 3516 static void 3517 mxge_close(mxge_softc_t *sc, int down) 3518 { 3519 struct ifnet *ifp = sc->ifp; 3520 mxge_cmd_t cmd; 3521 int err, old_down_cnt, i; 3522 3523 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3524 3525 if (!down) { 3526 old_down_cnt = sc->down_cnt; 3527 wmb(); 3528 3529 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3530 if (err) 3531 if_printf(ifp, "Couldn't bring down link\n"); 3532 3533 if (old_down_cnt == sc->down_cnt) { 3534 /* 3535 * Wait for down irq 3536 * XXX racy 3537 */ 3538 ifnet_deserialize_all(ifp); 3539 DELAY(10 * sc->intr_coal_delay); 3540 ifnet_serialize_all(ifp); 3541 } 3542 3543 wmb(); 3544 if (old_down_cnt == sc->down_cnt) 3545 if_printf(ifp, "never got down irq\n"); 3546 } 3547 mxge_free_mbufs(sc); 3548 3549 ifp->if_flags &= ~IFF_RUNNING; 3550 for (i = 0; i < sc->num_tx_rings; ++i) { 3551 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3552 3553 ifsq_clr_oactive(tx->ifsq); 3554 ifsq_watchdog_stop(&tx->watchdog); 3555 } 3556 } 3557 3558 static void 3559 mxge_setup_cfg_space(mxge_softc_t *sc) 3560 { 3561 device_t dev = sc->dev; 3562 int reg; 3563 uint16_t lnk, pectl; 3564 3565 /* Find the PCIe link width and set max read request to 4KB */ 3566 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3567 lnk = pci_read_config(dev, reg + 0x12, 2); 3568 sc->link_width = (lnk >> 4) & 0x3f; 3569 3570 if (sc->pectl == 0) { 3571 pectl = pci_read_config(dev, reg + 0x8, 2); 3572 pectl = (pectl & ~0x7000) | (5 << 12); 3573 pci_write_config(dev, reg + 0x8, pectl, 2); 3574 sc->pectl = pectl; 3575 } else { 3576 /* Restore saved pectl after watchdog reset */ 3577 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3578 } 3579 } 3580 3581 /* Enable DMA and memory space access */ 3582 pci_enable_busmaster(dev); 3583 } 3584 3585 static uint32_t 3586 mxge_read_reboot(mxge_softc_t *sc) 3587 { 3588 device_t dev = sc->dev; 3589 uint32_t vs; 3590 3591 /* Find the vendor specific offset */ 3592 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3593 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3594 return (uint32_t)-1; 3595 } 3596 /* Enable read32 mode */ 3597 pci_write_config(dev, vs + 0x10, 0x3, 1); 3598 /* Tell NIC which register to read */ 3599 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3600 return pci_read_config(dev, vs + 0x14, 4); 3601 } 3602 3603 static void 3604 mxge_watchdog_reset(mxge_softc_t *sc) 3605 { 3606 struct pci_devinfo *dinfo; 3607 int err, running; 3608 uint32_t reboot; 3609 uint16_t cmd; 3610 3611 err = ENXIO; 3612 3613 if_printf(sc->ifp, "Watchdog reset!\n"); 3614 3615 /* 3616 * Check to see if the NIC rebooted. If it did, then all of 3617 * PCI config space has been reset, and things like the 3618 * busmaster bit will be zero. If this is the case, then we 3619 * must restore PCI config space before the NIC can be used 3620 * again 3621 */ 3622 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3623 if (cmd == 0xffff) { 3624 /* 3625 * Maybe the watchdog caught the NIC rebooting; wait 3626 * up to 100ms for it to finish. If it does not come 3627 * back, then give up 3628 */ 3629 DELAY(1000*100); 3630 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3631 if (cmd == 0xffff) 3632 if_printf(sc->ifp, "NIC disappeared!\n"); 3633 } 3634 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3635 /* Print the reboot status */ 3636 reboot = mxge_read_reboot(sc); 3637 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3638 3639 running = sc->ifp->if_flags & IFF_RUNNING; 3640 if (running) { 3641 /* 3642 * Quiesce NIC so that TX routines will not try to 3643 * xmit after restoration of BAR 3644 */ 3645 3646 /* Mark the link as down */ 3647 if (sc->link_state) { 3648 sc->ifp->if_link_state = LINK_STATE_DOWN; 3649 if_link_state_change(sc->ifp); 3650 } 3651 mxge_close(sc, 1); 3652 } 3653 /* Restore PCI configuration space */ 3654 dinfo = device_get_ivars(sc->dev); 3655 pci_cfg_restore(sc->dev, dinfo); 3656 3657 /* And redo any changes we made to our config space */ 3658 mxge_setup_cfg_space(sc); 3659 3660 /* Reload f/w */ 3661 err = mxge_load_firmware(sc, 0); 3662 if (err) 3663 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3664 if (running && !err) { 3665 int i; 3666 3667 err = mxge_open(sc); 3668 3669 for (i = 0; i < sc->num_tx_rings; ++i) 3670 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3671 } 3672 sc->watchdog_resets++; 3673 } else { 3674 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3675 err = 0; 3676 } 3677 if (err) { 3678 if_printf(sc->ifp, "watchdog reset failed\n"); 3679 } else { 3680 if (sc->dying == 2) 3681 sc->dying = 0; 3682 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3683 } 3684 } 3685 3686 static void 3687 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3688 { 3689 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3690 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3691 tx->req, tx->done, tx->queue_active); 3692 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3693 tx->activate, tx->deactivate); 3694 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3695 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3696 } 3697 3698 static u_long 3699 mxge_update_stats(mxge_softc_t *sc) 3700 { 3701 u_long ipackets, opackets, pkts; 3702 3703 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3704 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3705 3706 pkts = ipackets - sc->ipackets; 3707 pkts += opackets - sc->opackets; 3708 3709 sc->ipackets = ipackets; 3710 sc->opackets = opackets; 3711 3712 return pkts; 3713 } 3714 3715 static void 3716 mxge_tick(void *arg) 3717 { 3718 mxge_softc_t *sc = arg; 3719 u_long pkts = 0; 3720 int err = 0; 3721 int ticks; 3722 3723 lwkt_serialize_enter(&sc->main_serialize); 3724 3725 ticks = mxge_ticks; 3726 if (sc->ifp->if_flags & IFF_RUNNING) { 3727 /* Aggregate stats from different slices */ 3728 pkts = mxge_update_stats(sc); 3729 if (sc->need_media_probe) 3730 mxge_media_probe(sc); 3731 } 3732 if (pkts == 0) { 3733 uint16_t cmd; 3734 3735 /* Ensure NIC did not suffer h/w fault while idle */ 3736 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3737 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3738 sc->dying = 2; 3739 mxge_serialize_skipmain(sc); 3740 mxge_watchdog_reset(sc); 3741 mxge_deserialize_skipmain(sc); 3742 err = ENXIO; 3743 } 3744 3745 /* Look less often if NIC is idle */ 3746 ticks *= 4; 3747 } 3748 3749 if (err == 0) 3750 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3751 3752 lwkt_serialize_exit(&sc->main_serialize); 3753 } 3754 3755 static int 3756 mxge_media_change(struct ifnet *ifp) 3757 { 3758 mxge_softc_t *sc = ifp->if_softc; 3759 const struct ifmedia *ifm = &sc->media; 3760 int pause; 3761 3762 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { 3763 if (sc->pause) 3764 return 0; 3765 pause = 1; 3766 } else { 3767 if (!sc->pause) 3768 return 0; 3769 pause = 0; 3770 } 3771 return mxge_change_pause(sc, pause); 3772 } 3773 3774 static int 3775 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3776 { 3777 struct ifnet *ifp = sc->ifp; 3778 int real_mtu, old_mtu; 3779 int err = 0; 3780 3781 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3782 if (mtu > sc->max_mtu || real_mtu < 60) 3783 return EINVAL; 3784 3785 old_mtu = ifp->if_mtu; 3786 ifp->if_mtu = mtu; 3787 if (ifp->if_flags & IFF_RUNNING) { 3788 mxge_close(sc, 0); 3789 err = mxge_open(sc); 3790 if (err != 0) { 3791 ifp->if_mtu = old_mtu; 3792 mxge_close(sc, 0); 3793 mxge_open(sc); 3794 } 3795 } 3796 return err; 3797 } 3798 3799 static void 3800 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3801 { 3802 mxge_softc_t *sc = ifp->if_softc; 3803 3804 ifmr->ifm_status = IFM_AVALID; 3805 ifmr->ifm_active = IFM_ETHER; 3806 3807 if (sc->link_state) 3808 ifmr->ifm_status |= IFM_ACTIVE; 3809 3810 /* 3811 * Autoselect is not supported, so the current media 3812 * should be delivered. 3813 */ 3814 ifmr->ifm_active |= sc->current_media; 3815 if (sc->current_media != IFM_NONE) { 3816 ifmr->ifm_active |= MXGE_IFM; 3817 if (sc->pause) 3818 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 3819 } 3820 } 3821 3822 static int 3823 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3824 struct ucred *cr __unused) 3825 { 3826 mxge_softc_t *sc = ifp->if_softc; 3827 struct ifreq *ifr = (struct ifreq *)data; 3828 int err, mask; 3829 3830 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3831 err = 0; 3832 3833 switch (command) { 3834 case SIOCSIFMTU: 3835 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3836 break; 3837 3838 case SIOCSIFFLAGS: 3839 if (sc->dying) 3840 return EINVAL; 3841 3842 if (ifp->if_flags & IFF_UP) { 3843 if (!(ifp->if_flags & IFF_RUNNING)) { 3844 err = mxge_open(sc); 3845 } else { 3846 /* 3847 * Take care of PROMISC and ALLMULTI 3848 * flag changes 3849 */ 3850 mxge_change_promisc(sc, 3851 ifp->if_flags & IFF_PROMISC); 3852 mxge_set_multicast_list(sc); 3853 } 3854 } else { 3855 if (ifp->if_flags & IFF_RUNNING) 3856 mxge_close(sc, 0); 3857 } 3858 break; 3859 3860 case SIOCADDMULTI: 3861 case SIOCDELMULTI: 3862 mxge_set_multicast_list(sc); 3863 break; 3864 3865 case SIOCSIFCAP: 3866 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3867 if (mask & IFCAP_TXCSUM) { 3868 ifp->if_capenable ^= IFCAP_TXCSUM; 3869 if (ifp->if_capenable & IFCAP_TXCSUM) 3870 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3871 else 3872 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3873 } 3874 if (mask & IFCAP_TSO) { 3875 ifp->if_capenable ^= IFCAP_TSO; 3876 if (ifp->if_capenable & IFCAP_TSO) 3877 ifp->if_hwassist |= CSUM_TSO; 3878 else 3879 ifp->if_hwassist &= ~CSUM_TSO; 3880 } 3881 if (mask & IFCAP_RXCSUM) 3882 ifp->if_capenable ^= IFCAP_RXCSUM; 3883 if (mask & IFCAP_VLAN_HWTAGGING) 3884 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3885 break; 3886 3887 case SIOCGIFMEDIA: 3888 case SIOCSIFMEDIA: 3889 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3890 &sc->media, command); 3891 break; 3892 3893 default: 3894 err = ether_ioctl(ifp, command, data); 3895 break; 3896 } 3897 return err; 3898 } 3899 3900 static void 3901 mxge_fetch_tunables(mxge_softc_t *sc) 3902 { 3903 int ifm; 3904 3905 sc->intr_coal_delay = mxge_intr_coal_delay; 3906 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3907 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3908 3909 /* XXX */ 3910 if (mxge_ticks == 0) 3911 mxge_ticks = hz / 2; 3912 3913 ifm = ifmedia_str2ethfc(mxge_flowctrl); 3914 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) 3915 sc->pause = 1; 3916 3917 sc->use_rss = mxge_use_rss; 3918 3919 sc->throttle = mxge_throttle; 3920 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3921 sc->throttle = MXGE_MAX_THROTTLE; 3922 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3923 sc->throttle = MXGE_MIN_THROTTLE; 3924 } 3925 3926 static void 3927 mxge_free_slices(mxge_softc_t *sc) 3928 { 3929 struct mxge_slice_state *ss; 3930 int i; 3931 3932 if (sc->ss == NULL) 3933 return; 3934 3935 for (i = 0; i < sc->num_slices; i++) { 3936 ss = &sc->ss[i]; 3937 if (ss->fw_stats != NULL) { 3938 mxge_dma_free(&ss->fw_stats_dma); 3939 ss->fw_stats = NULL; 3940 } 3941 if (ss->rx_data.rx_done.entry != NULL) { 3942 mxge_dma_free(&ss->rx_done_dma); 3943 ss->rx_data.rx_done.entry = NULL; 3944 } 3945 } 3946 kfree(sc->ss, M_DEVBUF); 3947 sc->ss = NULL; 3948 } 3949 3950 static int 3951 mxge_alloc_slices(mxge_softc_t *sc) 3952 { 3953 mxge_cmd_t cmd; 3954 struct mxge_slice_state *ss; 3955 size_t bytes; 3956 int err, i, rx_ring_size; 3957 3958 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3959 if (err != 0) { 3960 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3961 return err; 3962 } 3963 rx_ring_size = cmd.data0; 3964 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3965 3966 bytes = sizeof(*sc->ss) * sc->num_slices; 3967 sc->ss = kmalloc(bytes, M_DEVBUF, 3968 M_WAITOK | M_ZERO | M_CACHEALIGN); 3969 3970 for (i = 0; i < sc->num_slices; i++) { 3971 ss = &sc->ss[i]; 3972 3973 ss->sc = sc; 3974 3975 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3976 lwkt_serialize_init(&ss->tx.tx_serialize); 3977 ss->intr_rid = -1; 3978 3979 /* 3980 * Allocate per-slice rx interrupt queue 3981 * XXX assume 4bytes mcp_slot 3982 */ 3983 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3984 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3985 if (err != 0) { 3986 device_printf(sc->dev, 3987 "alloc %d slice rx_done failed\n", i); 3988 return err; 3989 } 3990 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3991 3992 /* 3993 * Allocate the per-slice firmware stats 3994 */ 3995 bytes = sizeof(*ss->fw_stats); 3996 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3997 sizeof(*ss->fw_stats), 64); 3998 if (err != 0) { 3999 device_printf(sc->dev, 4000 "alloc %d fw_stats failed\n", i); 4001 return err; 4002 } 4003 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 4004 } 4005 return 0; 4006 } 4007 4008 static void 4009 mxge_slice_probe(mxge_softc_t *sc) 4010 { 4011 int status, max_intr_slots, max_slices, num_slices; 4012 int msix_cnt, msix_enable, multi_tx; 4013 mxge_cmd_t cmd; 4014 const char *old_fw; 4015 4016 sc->num_slices = 1; 4017 sc->num_tx_rings = 1; 4018 4019 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 4020 if (num_slices == 1) 4021 return; 4022 4023 if (netisr_ncpus == 1) 4024 return; 4025 4026 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4027 mxge_msix_enable); 4028 if (!msix_enable) 4029 return; 4030 4031 msix_cnt = pci_msix_count(sc->dev); 4032 if (msix_cnt < 2) 4033 return; 4034 if (bootverbose) 4035 device_printf(sc->dev, "MSI-X count %d\n", msix_cnt); 4036 4037 /* 4038 * Now load the slice aware firmware see what it supports 4039 */ 4040 old_fw = sc->fw_name; 4041 if (old_fw == mxge_fw_aligned) 4042 sc->fw_name = mxge_fw_rss_aligned; 4043 else 4044 sc->fw_name = mxge_fw_rss_unaligned; 4045 status = mxge_load_firmware(sc, 0); 4046 if (status != 0) { 4047 device_printf(sc->dev, "Falling back to a single slice\n"); 4048 return; 4049 } 4050 4051 /* 4052 * Try to send a reset command to the card to see if it is alive 4053 */ 4054 memset(&cmd, 0, sizeof(cmd)); 4055 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4056 if (status != 0) { 4057 device_printf(sc->dev, "failed reset\n"); 4058 goto abort_with_fw; 4059 } 4060 4061 /* 4062 * Get rx ring size to calculate rx interrupt queue size 4063 */ 4064 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4065 if (status != 0) { 4066 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4067 goto abort_with_fw; 4068 } 4069 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4070 4071 /* 4072 * Tell it the size of the rx interrupt queue 4073 */ 4074 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4075 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4076 if (status != 0) { 4077 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4078 goto abort_with_fw; 4079 } 4080 4081 /* 4082 * Ask the maximum number of slices it supports 4083 */ 4084 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4085 if (status != 0) { 4086 device_printf(sc->dev, 4087 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4088 goto abort_with_fw; 4089 } 4090 max_slices = cmd.data0; 4091 if (bootverbose) 4092 device_printf(sc->dev, "max slices %d\n", max_slices); 4093 4094 if (max_slices > msix_cnt) 4095 max_slices = msix_cnt; 4096 4097 sc->ring_map = if_ringmap_alloc(sc->dev, num_slices, max_slices); 4098 sc->num_slices = if_ringmap_count(sc->ring_map); 4099 4100 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4101 if (multi_tx) 4102 sc->num_tx_rings = sc->num_slices; 4103 4104 if (bootverbose) { 4105 device_printf(sc->dev, "using %d slices, max %d\n", 4106 sc->num_slices, max_slices); 4107 } 4108 4109 if (sc->num_slices == 1) 4110 goto abort_with_fw; 4111 return; 4112 4113 abort_with_fw: 4114 sc->fw_name = old_fw; 4115 mxge_load_firmware(sc, 0); 4116 } 4117 4118 static void 4119 mxge_setup_serialize(struct mxge_softc *sc) 4120 { 4121 int i = 0, slice; 4122 4123 /* Main + rx + tx */ 4124 sc->nserialize = (2 * sc->num_slices) + 1; 4125 sc->serializes = 4126 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4127 M_DEVBUF, M_WAITOK | M_ZERO); 4128 4129 /* 4130 * Setup serializes 4131 * 4132 * NOTE: Order is critical 4133 */ 4134 4135 KKASSERT(i < sc->nserialize); 4136 sc->serializes[i++] = &sc->main_serialize; 4137 4138 for (slice = 0; slice < sc->num_slices; ++slice) { 4139 KKASSERT(i < sc->nserialize); 4140 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4141 } 4142 4143 for (slice = 0; slice < sc->num_slices; ++slice) { 4144 KKASSERT(i < sc->nserialize); 4145 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4146 } 4147 4148 KKASSERT(i == sc->nserialize); 4149 } 4150 4151 static void 4152 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4153 { 4154 struct mxge_softc *sc = ifp->if_softc; 4155 4156 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4157 } 4158 4159 static void 4160 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4161 { 4162 struct mxge_softc *sc = ifp->if_softc; 4163 4164 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4165 } 4166 4167 static int 4168 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4169 { 4170 struct mxge_softc *sc = ifp->if_softc; 4171 4172 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4173 } 4174 4175 #ifdef INVARIANTS 4176 4177 static void 4178 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4179 boolean_t serialized) 4180 { 4181 struct mxge_softc *sc = ifp->if_softc; 4182 4183 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4184 slz, serialized); 4185 } 4186 4187 #endif /* INVARIANTS */ 4188 4189 #ifdef IFPOLL_ENABLE 4190 4191 static void 4192 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4193 { 4194 struct mxge_slice_state *ss = xss; 4195 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4196 4197 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4198 4199 if (rx_done->entry[rx_done->idx].length != 0) { 4200 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4201 } else { 4202 /* 4203 * XXX 4204 * This register writting obviously has cost, 4205 * however, if we don't hand back the rx token, 4206 * the upcoming packets may suffer rediculously 4207 * large delay, as observed on 8AL-C using ping(8). 4208 */ 4209 *ss->irq_claim = be32toh(3); 4210 } 4211 } 4212 4213 static void 4214 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4215 { 4216 struct mxge_softc *sc = ifp->if_softc; 4217 int i; 4218 4219 if (info == NULL) 4220 return; 4221 4222 /* 4223 * Only poll rx; polling tx and status don't seem to work 4224 */ 4225 for (i = 0; i < sc->num_slices; ++i) { 4226 struct mxge_slice_state *ss = &sc->ss[i]; 4227 int cpu = ss->intr_cpuid; 4228 4229 KKASSERT(cpu < netisr_ncpus); 4230 info->ifpi_rx[cpu].poll_func = mxge_npoll_rx; 4231 info->ifpi_rx[cpu].arg = ss; 4232 info->ifpi_rx[cpu].serializer = &ss->rx_data.rx_serialize; 4233 } 4234 } 4235 4236 #endif /* IFPOLL_ENABLE */ 4237 4238 static int 4239 mxge_attach(device_t dev) 4240 { 4241 mxge_softc_t *sc = device_get_softc(dev); 4242 struct ifnet *ifp = &sc->arpcom.ac_if; 4243 int err, rid, i; 4244 4245 /* 4246 * Avoid rewriting half the lines in this file to use 4247 * &sc->arpcom.ac_if instead 4248 */ 4249 sc->ifp = ifp; 4250 sc->dev = dev; 4251 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4252 4253 /* IFM_ETH_FORCEPAUSE can't be changed */ 4254 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 4255 mxge_media_change, mxge_media_status); 4256 4257 lwkt_serialize_init(&sc->main_serialize); 4258 4259 mxge_fetch_tunables(sc); 4260 4261 err = bus_dma_tag_create(NULL, /* parent */ 4262 1, /* alignment */ 4263 0, /* boundary */ 4264 BUS_SPACE_MAXADDR, /* low */ 4265 BUS_SPACE_MAXADDR, /* high */ 4266 NULL, NULL, /* filter */ 4267 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4268 0, /* num segs */ 4269 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4270 0, /* flags */ 4271 &sc->parent_dmat); /* tag */ 4272 if (err != 0) { 4273 device_printf(dev, "Err %d allocating parent dmat\n", err); 4274 goto failed; 4275 } 4276 4277 callout_init_mp(&sc->co_hdl); 4278 4279 mxge_setup_cfg_space(sc); 4280 4281 /* 4282 * Map the board into the kernel 4283 */ 4284 rid = PCIR_BARS; 4285 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4286 &rid, RF_ACTIVE); 4287 if (sc->mem_res == NULL) { 4288 device_printf(dev, "could not map memory\n"); 4289 err = ENXIO; 4290 goto failed; 4291 } 4292 4293 sc->sram = rman_get_virtual(sc->mem_res); 4294 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4295 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4296 device_printf(dev, "impossible memory region size %ld\n", 4297 rman_get_size(sc->mem_res)); 4298 err = ENXIO; 4299 goto failed; 4300 } 4301 4302 /* 4303 * Make NULL terminated copy of the EEPROM strings section of 4304 * lanai SRAM 4305 */ 4306 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4307 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4308 rman_get_bushandle(sc->mem_res), 4309 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4310 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4311 err = mxge_parse_strings(sc); 4312 if (err != 0) { 4313 device_printf(dev, "parse EEPROM string failed\n"); 4314 goto failed; 4315 } 4316 4317 /* 4318 * Enable write combining for efficient use of PCIe bus 4319 */ 4320 mxge_enable_wc(sc); 4321 4322 /* 4323 * Allocate the out of band DMA memory 4324 */ 4325 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4326 if (err != 0) { 4327 device_printf(dev, "alloc cmd DMA buf failed\n"); 4328 goto failed; 4329 } 4330 sc->cmd = sc->cmd_dma.dmem_addr; 4331 4332 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4333 if (err != 0) { 4334 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4335 goto failed; 4336 } 4337 4338 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4339 if (err != 0) { 4340 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4341 goto failed; 4342 } 4343 4344 /* Select & load the firmware */ 4345 err = mxge_select_firmware(sc); 4346 if (err != 0) { 4347 device_printf(dev, "select firmware failed\n"); 4348 goto failed; 4349 } 4350 4351 mxge_slice_probe(sc); 4352 err = mxge_alloc_slices(sc); 4353 if (err != 0) { 4354 device_printf(dev, "alloc slices failed\n"); 4355 goto failed; 4356 } 4357 4358 err = mxge_alloc_intr(sc); 4359 if (err != 0) { 4360 device_printf(dev, "alloc intr failed\n"); 4361 goto failed; 4362 } 4363 4364 /* Setup serializes */ 4365 mxge_setup_serialize(sc); 4366 4367 err = mxge_reset(sc, 0); 4368 if (err != 0) { 4369 device_printf(dev, "reset failed\n"); 4370 goto failed; 4371 } 4372 4373 err = mxge_alloc_rings(sc); 4374 if (err != 0) { 4375 device_printf(dev, "failed to allocate rings\n"); 4376 goto failed; 4377 } 4378 4379 ifp->if_baudrate = IF_Gbps(10UL); 4380 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4381 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4382 4383 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4384 #if 0 4385 /* Well, its software, sigh */ 4386 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4387 #endif 4388 ifp->if_capenable = ifp->if_capabilities; 4389 4390 ifp->if_softc = sc; 4391 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4392 ifp->if_init = mxge_init; 4393 ifp->if_ioctl = mxge_ioctl; 4394 ifp->if_start = mxge_start; 4395 #ifdef IFPOLL_ENABLE 4396 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4397 ifp->if_npoll = mxge_npoll; 4398 #endif 4399 ifp->if_serialize = mxge_serialize; 4400 ifp->if_deserialize = mxge_deserialize; 4401 ifp->if_tryserialize = mxge_tryserialize; 4402 #ifdef INVARIANTS 4403 ifp->if_serialize_assert = mxge_serialize_assert; 4404 #endif 4405 4406 /* Increase TSO burst length */ 4407 ifp->if_tsolen = (32 * ETHERMTU); 4408 4409 /* Initialise the ifmedia structure */ 4410 mxge_media_init(sc); 4411 mxge_media_probe(sc); 4412 4413 ether_ifattach(ifp, sc->mac_addr, NULL); 4414 4415 /* Setup TX rings and subqueues */ 4416 for (i = 0; i < sc->num_tx_rings; ++i) { 4417 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4418 struct mxge_slice_state *ss = &sc->ss[i]; 4419 4420 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4421 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4422 ifsq_set_priv(ifsq, &ss->tx); 4423 ss->tx.ifsq = ifsq; 4424 4425 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog, 0); 4426 } 4427 4428 /* 4429 * XXX 4430 * We are not ready to do "gather" jumbo frame, so 4431 * limit MTU to MJUMPAGESIZE 4432 */ 4433 sc->max_mtu = MJUMPAGESIZE - 4434 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4435 sc->dying = 0; 4436 4437 err = mxge_setup_intr(sc); 4438 if (err != 0) { 4439 device_printf(dev, "alloc and setup intr failed\n"); 4440 ether_ifdetach(ifp); 4441 goto failed; 4442 } 4443 4444 mxge_add_sysctls(sc); 4445 4446 /* Increase non-cluster mbuf limit; used by small RX rings */ 4447 mb_inclimit(ifp->if_nmbclusters); 4448 4449 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4450 sc->ss[0].intr_cpuid); 4451 return 0; 4452 4453 failed: 4454 mxge_detach(dev); 4455 return err; 4456 } 4457 4458 static int 4459 mxge_detach(device_t dev) 4460 { 4461 mxge_softc_t *sc = device_get_softc(dev); 4462 4463 if (device_is_attached(dev)) { 4464 struct ifnet *ifp = sc->ifp; 4465 int mblimit = ifp->if_nmbclusters; 4466 4467 ifnet_serialize_all(ifp); 4468 4469 sc->dying = 1; 4470 if (ifp->if_flags & IFF_RUNNING) 4471 mxge_close(sc, 1); 4472 callout_stop(&sc->co_hdl); 4473 4474 mxge_teardown_intr(sc, sc->num_slices); 4475 4476 ifnet_deserialize_all(ifp); 4477 4478 callout_terminate(&sc->co_hdl); 4479 4480 ether_ifdetach(ifp); 4481 4482 /* Decrease non-cluster mbuf limit increased by us */ 4483 mb_inclimit(-mblimit); 4484 } 4485 ifmedia_removeall(&sc->media); 4486 4487 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4488 sc->sram != NULL) 4489 mxge_dummy_rdma(sc, 0); 4490 4491 mxge_free_intr(sc); 4492 mxge_rem_sysctls(sc); 4493 mxge_free_rings(sc); 4494 4495 /* MUST after sysctls, intr and rings are freed */ 4496 mxge_free_slices(sc); 4497 4498 if (sc->dmabench_dma.dmem_addr != NULL) 4499 mxge_dma_free(&sc->dmabench_dma); 4500 if (sc->zeropad_dma.dmem_addr != NULL) 4501 mxge_dma_free(&sc->zeropad_dma); 4502 if (sc->cmd_dma.dmem_addr != NULL) 4503 mxge_dma_free(&sc->cmd_dma); 4504 4505 if (sc->msix_table_res != NULL) { 4506 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4507 sc->msix_table_res); 4508 } 4509 if (sc->mem_res != NULL) { 4510 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4511 sc->mem_res); 4512 } 4513 4514 if (sc->parent_dmat != NULL) 4515 bus_dma_tag_destroy(sc->parent_dmat); 4516 4517 if (sc->ring_map != NULL) 4518 if_ringmap_free(sc->ring_map); 4519 4520 return 0; 4521 } 4522 4523 static int 4524 mxge_shutdown(device_t dev) 4525 { 4526 return 0; 4527 } 4528 4529 static void 4530 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4531 { 4532 int i; 4533 4534 KKASSERT(sc->num_slices > 1); 4535 4536 for (i = 0; i < sc->num_slices; ++i) { 4537 struct mxge_slice_state *ss = &sc->ss[i]; 4538 4539 if (ss->intr_res != NULL) { 4540 bus_release_resource(sc->dev, SYS_RES_IRQ, 4541 ss->intr_rid, ss->intr_res); 4542 } 4543 if (ss->intr_rid >= 0) 4544 pci_release_msix_vector(sc->dev, ss->intr_rid); 4545 } 4546 if (setup) 4547 pci_teardown_msix(sc->dev); 4548 } 4549 4550 static int 4551 mxge_alloc_msix(struct mxge_softc *sc) 4552 { 4553 struct mxge_slice_state *ss; 4554 int rid, error, i; 4555 boolean_t setup = FALSE; 4556 4557 KKASSERT(sc->num_slices > 1); 4558 4559 ss = &sc->ss[0]; 4560 4561 ss->intr_serialize = &sc->main_serialize; 4562 ss->intr_func = mxge_msi; 4563 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4564 "%s comb", device_get_nameunit(sc->dev)); 4565 ss->intr_desc = ss->intr_desc0; 4566 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, 0); 4567 4568 for (i = 1; i < sc->num_slices; ++i) { 4569 ss = &sc->ss[i]; 4570 4571 ss->intr_serialize = &ss->rx_data.rx_serialize; 4572 if (sc->num_tx_rings == 1) { 4573 ss->intr_func = mxge_msix_rx; 4574 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4575 "%s rx%d", device_get_nameunit(sc->dev), i); 4576 } else { 4577 ss->intr_func = mxge_msix_rxtx; 4578 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4579 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4580 } 4581 ss->intr_desc = ss->intr_desc0; 4582 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, i); 4583 } 4584 4585 rid = PCIR_BAR(2); 4586 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4587 &rid, RF_ACTIVE); 4588 if (sc->msix_table_res == NULL) { 4589 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4590 return ENXIO; 4591 } 4592 4593 error = pci_setup_msix(sc->dev); 4594 if (error) { 4595 device_printf(sc->dev, "could not setup MSI-X\n"); 4596 goto back; 4597 } 4598 setup = TRUE; 4599 4600 for (i = 0; i < sc->num_slices; ++i) { 4601 ss = &sc->ss[i]; 4602 4603 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4604 ss->intr_cpuid); 4605 if (error) { 4606 device_printf(sc->dev, "could not alloc " 4607 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4608 goto back; 4609 } 4610 4611 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4612 &ss->intr_rid, RF_ACTIVE); 4613 if (ss->intr_res == NULL) { 4614 device_printf(sc->dev, "could not alloc " 4615 "MSI-X %d resource\n", i); 4616 error = ENXIO; 4617 goto back; 4618 } 4619 } 4620 4621 pci_enable_msix(sc->dev); 4622 sc->intr_type = PCI_INTR_TYPE_MSIX; 4623 back: 4624 if (error) 4625 mxge_free_msix(sc, setup); 4626 return error; 4627 } 4628 4629 static int 4630 mxge_alloc_intr(struct mxge_softc *sc) 4631 { 4632 struct mxge_slice_state *ss; 4633 u_int irq_flags; 4634 4635 if (sc->num_slices > 1) { 4636 int error; 4637 4638 error = mxge_alloc_msix(sc); 4639 if (error) 4640 return error; 4641 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4642 return 0; 4643 } 4644 4645 ss = &sc->ss[0]; 4646 4647 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4648 &ss->intr_rid, &irq_flags); 4649 4650 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4651 &ss->intr_rid, irq_flags); 4652 if (ss->intr_res == NULL) { 4653 device_printf(sc->dev, "could not alloc interrupt\n"); 4654 return ENXIO; 4655 } 4656 4657 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4658 ss->intr_func = mxge_legacy; 4659 else 4660 ss->intr_func = mxge_msi; 4661 ss->intr_serialize = &sc->main_serialize; 4662 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4663 4664 return 0; 4665 } 4666 4667 static int 4668 mxge_setup_intr(struct mxge_softc *sc) 4669 { 4670 int i; 4671 4672 for (i = 0; i < sc->num_slices; ++i) { 4673 struct mxge_slice_state *ss = &sc->ss[i]; 4674 int error; 4675 4676 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4677 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4678 ss->intr_serialize, ss->intr_desc); 4679 if (error) { 4680 device_printf(sc->dev, "can't setup %dth intr\n", i); 4681 mxge_teardown_intr(sc, i); 4682 return error; 4683 } 4684 } 4685 return 0; 4686 } 4687 4688 static void 4689 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4690 { 4691 int i; 4692 4693 if (sc->ss == NULL) 4694 return; 4695 4696 for (i = 0; i < cnt; ++i) { 4697 struct mxge_slice_state *ss = &sc->ss[i]; 4698 4699 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4700 } 4701 } 4702 4703 static void 4704 mxge_free_intr(struct mxge_softc *sc) 4705 { 4706 if (sc->ss == NULL) 4707 return; 4708 4709 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4710 struct mxge_slice_state *ss = &sc->ss[0]; 4711 4712 if (ss->intr_res != NULL) { 4713 bus_release_resource(sc->dev, SYS_RES_IRQ, 4714 ss->intr_rid, ss->intr_res); 4715 } 4716 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4717 pci_release_msi(sc->dev); 4718 } else { 4719 mxge_free_msix(sc, TRUE); 4720 } 4721 } 4722