1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/if_ringmap.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/if_poll.h> 58 59 #include <net/bpf.h> 60 61 #include <net/if_types.h> 62 #include <net/vlan/if_vlan_var.h> 63 #include <net/zlib.h> 64 #include <net/toeplitz.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <sys/bus.h> 72 #include <sys/rman.h> 73 74 #include <bus/pci/pcireg.h> 75 #include <bus/pci/pcivar.h> 76 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 77 78 #include <vm/vm.h> /* for pmap_mapdev() */ 79 #include <vm/pmap.h> 80 81 #if defined(__x86_64__) 82 #include <machine/specialreg.h> 83 #endif 84 85 #include <dev/netif/mxge/mxge_mcp.h> 86 #include <dev/netif/mxge/mcp_gen_header.h> 87 #include <dev/netif/mxge/if_mxge_var.h> 88 89 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE) 90 91 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 92 #define MXGE_HWRSS_KEYLEN 16 93 94 /* Tunable params */ 95 static int mxge_nvidia_ecrc_enable = 1; 96 static int mxge_force_firmware = 0; 97 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 98 static int mxge_deassert_wait = 1; 99 static int mxge_ticks; 100 static int mxge_num_slices = 0; 101 static int mxge_always_promisc = 0; 102 static int mxge_throttle = 0; 103 static int mxge_msi_enable = 1; 104 static int mxge_msix_enable = 1; 105 static int mxge_multi_tx = 1; 106 /* 107 * Don't use RSS by default, its just too slow 108 */ 109 static int mxge_use_rss = 0; 110 111 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_NONE; 112 113 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 114 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 115 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 116 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 117 118 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 119 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 120 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 121 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 122 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 123 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 124 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 125 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 126 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 127 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 128 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 129 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 130 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl)); 131 132 static int mxge_probe(device_t dev); 133 static int mxge_attach(device_t dev); 134 static int mxge_detach(device_t dev); 135 static int mxge_shutdown(device_t dev); 136 137 static int mxge_alloc_intr(struct mxge_softc *sc); 138 static void mxge_free_intr(struct mxge_softc *sc); 139 static int mxge_setup_intr(struct mxge_softc *sc); 140 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 141 142 static device_method_t mxge_methods[] = { 143 /* Device interface */ 144 DEVMETHOD(device_probe, mxge_probe), 145 DEVMETHOD(device_attach, mxge_attach), 146 DEVMETHOD(device_detach, mxge_detach), 147 DEVMETHOD(device_shutdown, mxge_shutdown), 148 DEVMETHOD_END 149 }; 150 151 static driver_t mxge_driver = { 152 "mxge", 153 mxge_methods, 154 sizeof(mxge_softc_t), 155 }; 156 157 static devclass_t mxge_devclass; 158 159 /* Declare ourselves to be a child of the PCI bus.*/ 160 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 161 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 162 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 163 164 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 165 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 166 static void mxge_close(mxge_softc_t *sc, int down); 167 static int mxge_open(mxge_softc_t *sc); 168 static void mxge_tick(void *arg); 169 static void mxge_watchdog_reset(mxge_softc_t *sc); 170 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 171 172 static int 173 mxge_probe(device_t dev) 174 { 175 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 176 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 177 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 178 int rev = pci_get_revid(dev); 179 180 switch (rev) { 181 case MXGE_PCI_REV_Z8E: 182 device_set_desc(dev, "Myri10G-PCIE-8A"); 183 break; 184 case MXGE_PCI_REV_Z8ES: 185 device_set_desc(dev, "Myri10G-PCIE-8B"); 186 break; 187 default: 188 device_set_desc(dev, "Myri10G-PCIE-8??"); 189 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 190 break; 191 } 192 return 0; 193 } 194 return ENXIO; 195 } 196 197 static void 198 mxge_enable_wc(mxge_softc_t *sc) 199 { 200 #if defined(__x86_64__) 201 vm_offset_t len; 202 203 sc->wc = 1; 204 len = rman_get_size(sc->mem_res); 205 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 206 PAT_WRITE_COMBINING); 207 #endif 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 bus_size_t boundary; 215 int err; 216 217 if (bytes > 4096 && alignment == 4096) 218 boundary = 0; 219 else 220 boundary = 4096; 221 222 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 223 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 224 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 225 if (err != 0) { 226 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 227 return err; 228 } 229 return 0; 230 } 231 232 static void 233 mxge_dma_free(bus_dmamem_t *dma) 234 { 235 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 236 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 237 bus_dma_tag_destroy(dma->dmem_tag); 238 } 239 240 /* 241 * The eeprom strings on the lanaiX have the format 242 * SN=x\0 243 * MAC=x:x:x:x:x:x\0 244 * PC=text\0 245 */ 246 static int 247 mxge_parse_strings(mxge_softc_t *sc) 248 { 249 const char *ptr; 250 int i, found_mac, found_sn2; 251 char *endptr; 252 253 ptr = sc->eeprom_strings; 254 found_mac = 0; 255 found_sn2 = 0; 256 while (*ptr != '\0') { 257 if (strncmp(ptr, "MAC=", 4) == 0) { 258 ptr += 4; 259 for (i = 0;;) { 260 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 261 if (endptr - ptr != 2) 262 goto abort; 263 ptr = endptr; 264 if (++i == 6) 265 break; 266 if (*ptr++ != ':') 267 goto abort; 268 } 269 found_mac = 1; 270 } else if (strncmp(ptr, "PC=", 3) == 0) { 271 ptr += 3; 272 strlcpy(sc->product_code_string, ptr, 273 sizeof(sc->product_code_string)); 274 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 275 ptr += 3; 276 strlcpy(sc->serial_number_string, ptr, 277 sizeof(sc->serial_number_string)); 278 } else if (strncmp(ptr, "SN2=", 4) == 0) { 279 /* SN2 takes precedence over SN */ 280 ptr += 4; 281 found_sn2 = 1; 282 strlcpy(sc->serial_number_string, ptr, 283 sizeof(sc->serial_number_string)); 284 } 285 while (*ptr++ != '\0') {} 286 } 287 288 if (found_mac) 289 return 0; 290 291 abort: 292 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 293 return ENXIO; 294 } 295 296 #if defined(__x86_64__) 297 298 static void 299 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 300 { 301 uint32_t val; 302 unsigned long base, off; 303 char *va, *cfgptr; 304 device_t pdev, mcp55; 305 uint16_t vendor_id, device_id, word; 306 uintptr_t bus, slot, func, ivend, idev; 307 uint32_t *ptr32; 308 309 if (!mxge_nvidia_ecrc_enable) 310 return; 311 312 pdev = device_get_parent(device_get_parent(sc->dev)); 313 if (pdev == NULL) { 314 device_printf(sc->dev, "could not find parent?\n"); 315 return; 316 } 317 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 318 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 319 320 if (vendor_id != 0x10de) 321 return; 322 323 base = 0; 324 325 if (device_id == 0x005d) { 326 /* ck804, base address is magic */ 327 base = 0xe0000000UL; 328 } else if (device_id >= 0x0374 && device_id <= 0x378) { 329 /* mcp55, base address stored in chipset */ 330 mcp55 = pci_find_bsf(0, 0, 0); 331 if (mcp55 && 332 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 333 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 334 word = pci_read_config(mcp55, 0x90, 2); 335 base = ((unsigned long)word & 0x7ffeU) << 25; 336 } 337 } 338 if (!base) 339 return; 340 341 /* 342 * XXXX 343 * Test below is commented because it is believed that doing 344 * config read/write beyond 0xff will access the config space 345 * for the next larger function. Uncomment this and remove 346 * the hacky pmap_mapdev() way of accessing config space when 347 * DragonFly grows support for extended pcie config space access. 348 */ 349 #if 0 350 /* 351 * See if we can, by some miracle, access the extended 352 * config space 353 */ 354 val = pci_read_config(pdev, 0x178, 4); 355 if (val != 0xffffffff) { 356 val |= 0x40; 357 pci_write_config(pdev, 0x178, val, 4); 358 return; 359 } 360 #endif 361 /* 362 * Rather than using normal pci config space writes, we must 363 * map the Nvidia config space ourselves. This is because on 364 * opteron/nvidia class machine the 0xe000000 mapping is 365 * handled by the nvidia chipset, that means the internal PCI 366 * device (the on-chip northbridge), or the amd-8131 bridge 367 * and things behind them are not visible by this method. 368 */ 369 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_BUS, &bus); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_SLOT, &slot); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_FUNCTION, &func); 376 BUS_READ_IVAR(device_get_parent(pdev), pdev, 377 PCI_IVAR_VENDOR, &ivend); 378 BUS_READ_IVAR(device_get_parent(pdev), pdev, 379 PCI_IVAR_DEVICE, &idev); 380 381 off = base + 0x00100000UL * (unsigned long)bus + 382 0x00001000UL * (unsigned long)(func + 8 * slot); 383 384 /* map it into the kernel */ 385 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 386 if (va == NULL) { 387 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 388 return; 389 } 390 /* get a pointer to the config space mapped into the kernel */ 391 cfgptr = va + (off & PAGE_MASK); 392 393 /* make sure that we can really access it */ 394 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 395 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 396 if (!(vendor_id == ivend && device_id == idev)) { 397 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 398 vendor_id, device_id); 399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 400 return; 401 } 402 403 ptr32 = (uint32_t*)(cfgptr + 0x178); 404 val = *ptr32; 405 406 if (val == 0xffffffff) { 407 device_printf(sc->dev, "extended mapping failed\n"); 408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 409 return; 410 } 411 *ptr32 = val | 0x40; 412 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 413 if (bootverbose) { 414 device_printf(sc->dev, "Enabled ECRC on upstream " 415 "Nvidia bridge at %d:%d:%d\n", 416 (int)bus, (int)slot, (int)func); 417 } 418 } 419 420 #else /* __x86_64__ */ 421 422 static void 423 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 424 { 425 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 426 } 427 428 #endif 429 430 static int 431 mxge_dma_test(mxge_softc_t *sc, int test_type) 432 { 433 mxge_cmd_t cmd; 434 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 435 int status; 436 uint32_t len; 437 const char *test = " "; 438 439 /* 440 * Run a small DMA test. 441 * The magic multipliers to the length tell the firmware 442 * to do DMA read, write, or read+write tests. The 443 * results are returned in cmd.data0. The upper 16 444 * bits of the return is the number of transfers completed. 445 * The lower 16 bits is the time in 0.5us ticks that the 446 * transfers took to complete. 447 */ 448 449 len = sc->tx_boundary; 450 451 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 452 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 453 cmd.data2 = len * 0x10000; 454 status = mxge_send_cmd(sc, test_type, &cmd); 455 if (status != 0) { 456 test = "read"; 457 goto abort; 458 } 459 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 460 461 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 462 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 463 cmd.data2 = len * 0x1; 464 status = mxge_send_cmd(sc, test_type, &cmd); 465 if (status != 0) { 466 test = "write"; 467 goto abort; 468 } 469 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 470 471 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 472 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 473 cmd.data2 = len * 0x10001; 474 status = mxge_send_cmd(sc, test_type, &cmd); 475 if (status != 0) { 476 test = "read/write"; 477 goto abort; 478 } 479 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 480 (cmd.data0 & 0xffff); 481 482 abort: 483 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 484 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 485 test, status); 486 } 487 return status; 488 } 489 490 /* 491 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 492 * when the PCI-E Completion packets are aligned on an 8-byte 493 * boundary. Some PCI-E chip sets always align Completion packets; on 494 * the ones that do not, the alignment can be enforced by enabling 495 * ECRC generation (if supported). 496 * 497 * When PCI-E Completion packets are not aligned, it is actually more 498 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 499 * 500 * If the driver can neither enable ECRC nor verify that it has 501 * already been enabled, then it must use a firmware image which works 502 * around unaligned completion packets (ethp_z8e.dat), and it should 503 * also ensure that it never gives the device a Read-DMA which is 504 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 505 * enabled, then the driver should use the aligned (eth_z8e.dat) 506 * firmware image, and set tx_boundary to 4KB. 507 */ 508 static int 509 mxge_firmware_probe(mxge_softc_t *sc) 510 { 511 device_t dev = sc->dev; 512 int reg, status; 513 uint16_t pectl; 514 515 sc->tx_boundary = 4096; 516 517 /* 518 * Verify the max read request size was set to 4KB 519 * before trying the test with 4KB. 520 */ 521 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 522 pectl = pci_read_config(dev, reg + 0x8, 2); 523 if ((pectl & (5 << 12)) != (5 << 12)) { 524 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 525 pectl); 526 sc->tx_boundary = 2048; 527 } 528 } 529 530 /* 531 * Load the optimized firmware (which assumes aligned PCIe 532 * completions) in order to see if it works on this host. 533 */ 534 sc->fw_name = mxge_fw_aligned; 535 status = mxge_load_firmware(sc, 1); 536 if (status != 0) 537 return status; 538 539 /* 540 * Enable ECRC if possible 541 */ 542 mxge_enable_nvidia_ecrc(sc); 543 544 /* 545 * Run a DMA test which watches for unaligned completions and 546 * aborts on the first one seen. Not required on Z8ES or newer. 547 */ 548 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 549 return 0; 550 551 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 552 if (status == 0) 553 return 0; /* keep the aligned firmware */ 554 555 if (status != E2BIG) 556 device_printf(dev, "DMA test failed: %d\n", status); 557 if (status == ENOSYS) { 558 device_printf(dev, "Falling back to ethp! " 559 "Please install up to date fw\n"); 560 } 561 return status; 562 } 563 564 static int 565 mxge_select_firmware(mxge_softc_t *sc) 566 { 567 int aligned = 0; 568 int force_firmware = mxge_force_firmware; 569 570 if (sc->throttle) 571 force_firmware = sc->throttle; 572 573 if (force_firmware != 0) { 574 if (force_firmware == 1) 575 aligned = 1; 576 else 577 aligned = 0; 578 if (bootverbose) { 579 device_printf(sc->dev, 580 "Assuming %s completions (forced)\n", 581 aligned ? "aligned" : "unaligned"); 582 } 583 goto abort; 584 } 585 586 /* 587 * If the PCIe link width is 4 or less, we can use the aligned 588 * firmware and skip any checks 589 */ 590 if (sc->link_width != 0 && sc->link_width <= 4) { 591 device_printf(sc->dev, "PCIe x%d Link, " 592 "expect reduced performance\n", sc->link_width); 593 aligned = 1; 594 goto abort; 595 } 596 597 if (mxge_firmware_probe(sc) == 0) 598 return 0; 599 600 abort: 601 if (aligned) { 602 sc->fw_name = mxge_fw_aligned; 603 sc->tx_boundary = 4096; 604 } else { 605 sc->fw_name = mxge_fw_unaligned; 606 sc->tx_boundary = 2048; 607 } 608 return mxge_load_firmware(sc, 0); 609 } 610 611 static int 612 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 613 { 614 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 615 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 616 be32toh(hdr->mcp_type)); 617 return EIO; 618 } 619 620 /* Save firmware version for sysctl */ 621 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 622 if (bootverbose) 623 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 624 625 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 626 &sc->fw_ver_minor, &sc->fw_ver_tiny); 627 628 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 629 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 630 if_printf(sc->ifp, "Found firmware version %s\n", 631 sc->fw_version); 632 if_printf(sc->ifp, "Driver needs %d.%d\n", 633 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 634 return EINVAL; 635 } 636 return 0; 637 } 638 639 static void * 640 z_alloc(void *nil, u_int items, u_int size) 641 { 642 return kmalloc(items * size, M_TEMP, M_WAITOK); 643 } 644 645 static void 646 z_free(void *nil, void *ptr) 647 { 648 kfree(ptr, M_TEMP); 649 } 650 651 static int 652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 653 { 654 z_stream zs; 655 char *inflate_buffer; 656 const struct firmware *fw; 657 const mcp_gen_header_t *hdr; 658 unsigned hdr_offset; 659 int status; 660 unsigned int i; 661 char dummy; 662 size_t fw_len; 663 664 fw = firmware_get(sc->fw_name); 665 if (fw == NULL) { 666 if_printf(sc->ifp, "Could not find firmware image %s\n", 667 sc->fw_name); 668 return ENOENT; 669 } 670 671 /* Setup zlib and decompress f/w */ 672 bzero(&zs, sizeof(zs)); 673 zs.zalloc = z_alloc; 674 zs.zfree = z_free; 675 status = inflateInit(&zs); 676 if (status != Z_OK) { 677 status = EIO; 678 goto abort_with_fw; 679 } 680 681 /* 682 * The uncompressed size is stored as the firmware version, 683 * which would otherwise go unused 684 */ 685 fw_len = (size_t)fw->version; 686 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 687 zs.avail_in = fw->datasize; 688 zs.next_in = __DECONST(char *, fw->data); 689 zs.avail_out = fw_len; 690 zs.next_out = inflate_buffer; 691 status = inflate(&zs, Z_FINISH); 692 if (status != Z_STREAM_END) { 693 if_printf(sc->ifp, "zlib %d\n", status); 694 status = EIO; 695 goto abort_with_buffer; 696 } 697 698 /* Check id */ 699 hdr_offset = 700 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 701 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 702 if_printf(sc->ifp, "Bad firmware file"); 703 status = EIO; 704 goto abort_with_buffer; 705 } 706 hdr = (const void*)(inflate_buffer + hdr_offset); 707 708 status = mxge_validate_firmware(sc, hdr); 709 if (status != 0) 710 goto abort_with_buffer; 711 712 /* Copy the inflated firmware to NIC SRAM. */ 713 for (i = 0; i < fw_len; i += 256) { 714 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 715 min(256U, (unsigned)(fw_len - i))); 716 wmb(); 717 dummy = *sc->sram; 718 wmb(); 719 } 720 721 *limit = fw_len; 722 status = 0; 723 abort_with_buffer: 724 kfree(inflate_buffer, M_TEMP); 725 inflateEnd(&zs); 726 abort_with_fw: 727 firmware_put(fw, FIRMWARE_UNLOAD); 728 return status; 729 } 730 731 /* 732 * Enable or disable periodic RDMAs from the host to make certain 733 * chipsets resend dropped PCIe messages 734 */ 735 static void 736 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 737 { 738 char buf_bytes[72]; 739 volatile uint32_t *confirm; 740 volatile char *submit; 741 uint32_t *buf, dma_low, dma_high; 742 int i; 743 744 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 745 746 /* Clear confirmation addr */ 747 confirm = (volatile uint32_t *)sc->cmd; 748 *confirm = 0; 749 wmb(); 750 751 /* 752 * Send an rdma command to the PCIe engine, and wait for the 753 * response in the confirmation address. The firmware should 754 * write a -1 there to indicate it is alive and well 755 */ 756 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 757 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 758 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 759 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 760 buf[2] = htobe32(0xffffffff); /* confirm data */ 761 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 762 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 763 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 764 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 765 buf[5] = htobe32(enable); /* enable? */ 766 767 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 768 769 mxge_pio_copy(submit, buf, 64); 770 wmb(); 771 DELAY(1000); 772 wmb(); 773 i = 0; 774 while (*confirm != 0xffffffff && i < 20) { 775 DELAY(1000); 776 i++; 777 } 778 if (*confirm != 0xffffffff) { 779 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 780 (enable ? "enable" : "disable"), confirm, *confirm); 781 } 782 } 783 784 static int 785 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 786 { 787 mcp_cmd_t *buf; 788 char buf_bytes[sizeof(*buf) + 8]; 789 volatile mcp_cmd_response_t *response = sc->cmd; 790 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 791 uint32_t dma_low, dma_high; 792 int err, sleep_total = 0; 793 794 /* Ensure buf is aligned to 8 bytes */ 795 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 796 797 buf->data0 = htobe32(data->data0); 798 buf->data1 = htobe32(data->data1); 799 buf->data2 = htobe32(data->data2); 800 buf->cmd = htobe32(cmd); 801 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 802 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 803 804 buf->response_addr.low = htobe32(dma_low); 805 buf->response_addr.high = htobe32(dma_high); 806 807 response->result = 0xffffffff; 808 wmb(); 809 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 810 811 /* 812 * Wait up to 20ms 813 */ 814 err = EAGAIN; 815 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 816 wmb(); 817 switch (be32toh(response->result)) { 818 case 0: 819 data->data0 = be32toh(response->data); 820 err = 0; 821 break; 822 case 0xffffffff: 823 DELAY(1000); 824 break; 825 case MXGEFW_CMD_UNKNOWN: 826 err = ENOSYS; 827 break; 828 case MXGEFW_CMD_ERROR_UNALIGNED: 829 err = E2BIG; 830 break; 831 case MXGEFW_CMD_ERROR_BUSY: 832 err = EBUSY; 833 break; 834 case MXGEFW_CMD_ERROR_I2C_ABSENT: 835 err = ENXIO; 836 break; 837 default: 838 if_printf(sc->ifp, "command %d failed, result = %d\n", 839 cmd, be32toh(response->result)); 840 err = ENXIO; 841 break; 842 } 843 if (err != EAGAIN) 844 break; 845 } 846 if (err == EAGAIN) { 847 if_printf(sc->ifp, "command %d timed out result = %d\n", 848 cmd, be32toh(response->result)); 849 } 850 return err; 851 } 852 853 static int 854 mxge_adopt_running_firmware(mxge_softc_t *sc) 855 { 856 struct mcp_gen_header *hdr; 857 const size_t bytes = sizeof(struct mcp_gen_header); 858 size_t hdr_offset; 859 int status; 860 861 /* 862 * Find running firmware header 863 */ 864 hdr_offset = 865 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 866 867 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 868 if_printf(sc->ifp, "Running firmware has bad header offset " 869 "(%zu)\n", hdr_offset); 870 return EIO; 871 } 872 873 /* 874 * Copy header of running firmware from SRAM to host memory to 875 * validate firmware 876 */ 877 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 878 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 879 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 880 status = mxge_validate_firmware(sc, hdr); 881 kfree(hdr, M_DEVBUF); 882 883 /* 884 * Check to see if adopted firmware has bug where adopting 885 * it will cause broadcasts to be filtered unless the NIC 886 * is kept in ALLMULTI mode 887 */ 888 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 889 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 890 sc->adopted_rx_filter_bug = 1; 891 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 892 "working around rx filter bug\n", 893 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 894 } 895 896 return status; 897 } 898 899 static int 900 mxge_load_firmware(mxge_softc_t *sc, int adopt) 901 { 902 volatile uint32_t *confirm; 903 volatile char *submit; 904 char buf_bytes[72]; 905 uint32_t *buf, size, dma_low, dma_high; 906 int status, i; 907 908 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 909 910 size = sc->sram_size; 911 status = mxge_load_firmware_helper(sc, &size); 912 if (status) { 913 if (!adopt) 914 return status; 915 916 /* 917 * Try to use the currently running firmware, if 918 * it is new enough 919 */ 920 status = mxge_adopt_running_firmware(sc); 921 if (status) { 922 if_printf(sc->ifp, 923 "failed to adopt running firmware\n"); 924 return status; 925 } 926 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 927 928 if (sc->tx_boundary == 4096) { 929 if_printf(sc->ifp, 930 "Using firmware currently running on NIC. " 931 "For optimal\n"); 932 if_printf(sc->ifp, "performance consider loading " 933 "optimized firmware\n"); 934 } 935 sc->fw_name = mxge_fw_unaligned; 936 sc->tx_boundary = 2048; 937 return 0; 938 } 939 940 /* Clear confirmation addr */ 941 confirm = (volatile uint32_t *)sc->cmd; 942 *confirm = 0; 943 wmb(); 944 945 /* 946 * Send a reload command to the bootstrap MCP, and wait for the 947 * response in the confirmation address. The firmware should 948 * write a -1 there to indicate it is alive and well 949 */ 950 951 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 952 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 953 954 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 955 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 956 buf[2] = htobe32(0xffffffff); /* confirm data */ 957 958 /* 959 * FIX: All newest firmware should un-protect the bottom of 960 * the sram before handoff. However, the very first interfaces 961 * do not. Therefore the handoff copy must skip the first 8 bytes 962 */ 963 /* where the code starts*/ 964 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 965 buf[4] = htobe32(size - 8); /* length of code */ 966 buf[5] = htobe32(8); /* where to copy to */ 967 buf[6] = htobe32(0); /* where to jump to */ 968 969 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 970 mxge_pio_copy(submit, buf, 64); 971 wmb(); 972 DELAY(1000); 973 wmb(); 974 i = 0; 975 while (*confirm != 0xffffffff && i < 20) { 976 DELAY(1000*10); 977 i++; 978 } 979 if (*confirm != 0xffffffff) { 980 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 981 confirm, *confirm); 982 return ENXIO; 983 } 984 return 0; 985 } 986 987 static int 988 mxge_update_mac_address(mxge_softc_t *sc) 989 { 990 mxge_cmd_t cmd; 991 uint8_t *addr = sc->mac_addr; 992 993 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 994 (addr[2] << 8) | addr[3]; 995 cmd.data1 = (addr[4] << 8) | (addr[5]); 996 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 997 } 998 999 static int 1000 mxge_change_pause(mxge_softc_t *sc, int pause) 1001 { 1002 mxge_cmd_t cmd; 1003 int status; 1004 1005 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1006 if (pause) 1007 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1008 else 1009 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1010 if (status) { 1011 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1012 return ENXIO; 1013 } 1014 sc->pause = pause; 1015 return 0; 1016 } 1017 1018 static void 1019 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1020 { 1021 mxge_cmd_t cmd; 1022 int status; 1023 1024 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */ 1025 if (mxge_always_promisc) 1026 promisc = 1; 1027 1028 if (promisc) 1029 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1030 else 1031 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1032 if (status) 1033 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1034 } 1035 1036 static void 1037 mxge_set_multicast_list(mxge_softc_t *sc) 1038 { 1039 mxge_cmd_t cmd; 1040 struct ifmultiaddr *ifma; 1041 struct ifnet *ifp = sc->ifp; 1042 int err; 1043 1044 /* This firmware is known to not support multicast */ 1045 if (!sc->fw_multicast_support) 1046 return; 1047 1048 /* Disable multicast filtering while we play with the lists*/ 1049 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 1050 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1051 if (err != 0) { 1052 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1053 "error status: %d\n", err); 1054 return; 1055 } 1056 1057 if (sc->adopted_rx_filter_bug) 1058 return; 1059 1060 if (ifp->if_flags & IFF_ALLMULTI) { 1061 /* Request to disable multicast filtering, so quit here */ 1062 return; 1063 } 1064 1065 /* Flush all the filters */ 1066 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1067 if (err != 0) { 1068 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1069 "error status: %d\n", err); 1070 return; 1071 } 1072 1073 /* 1074 * Walk the multicast list, and add each address 1075 */ 1076 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1077 if (ifma->ifma_addr->sa_family != AF_LINK) 1078 continue; 1079 1080 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1081 &cmd.data0, 4); 1082 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1083 &cmd.data1, 2); 1084 cmd.data0 = htonl(cmd.data0); 1085 cmd.data1 = htonl(cmd.data1); 1086 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1087 if (err != 0) { 1088 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1089 "error status: %d\n", err); 1090 /* Abort, leaving multicast filtering off */ 1091 return; 1092 } 1093 } 1094 1095 /* Enable multicast filtering */ 1096 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1097 if (err != 0) { 1098 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1099 "error status: %d\n", err); 1100 } 1101 } 1102 1103 #if 0 1104 static int 1105 mxge_max_mtu(mxge_softc_t *sc) 1106 { 1107 mxge_cmd_t cmd; 1108 int status; 1109 1110 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1111 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1112 1113 /* try to set nbufs to see if it we can 1114 use virtually contiguous jumbos */ 1115 cmd.data0 = 0; 1116 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1117 &cmd); 1118 if (status == 0) 1119 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1120 1121 /* otherwise, we're limited to MJUMPAGESIZE */ 1122 return MJUMPAGESIZE - MXGEFW_PAD; 1123 } 1124 #endif 1125 1126 static int 1127 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1128 { 1129 struct mxge_slice_state *ss; 1130 mxge_rx_done_t *rx_done; 1131 volatile uint32_t *irq_claim; 1132 mxge_cmd_t cmd; 1133 int slice, status, rx_intr_size; 1134 1135 /* 1136 * Try to send a reset command to the card to see if it 1137 * is alive 1138 */ 1139 memset(&cmd, 0, sizeof (cmd)); 1140 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1141 if (status != 0) { 1142 if_printf(sc->ifp, "failed reset\n"); 1143 return ENXIO; 1144 } 1145 1146 mxge_dummy_rdma(sc, 1); 1147 1148 /* 1149 * Set the intrq size 1150 * XXX assume 4byte mcp_slot 1151 */ 1152 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1153 cmd.data0 = rx_intr_size; 1154 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1155 1156 /* 1157 * Even though we already know how many slices are supported 1158 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1159 * has magic side effects, and must be called after a reset. 1160 * It must be called prior to calling any RSS related cmds, 1161 * including assigning an interrupt queue for anything but 1162 * slice 0. It must also be called *after* 1163 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1164 * the firmware to compute offsets. 1165 */ 1166 if (sc->num_slices > 1) { 1167 /* Ask the maximum number of slices it supports */ 1168 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1169 if (status != 0) { 1170 if_printf(sc->ifp, "failed to get number of slices\n"); 1171 return status; 1172 } 1173 1174 /* 1175 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1176 * to setting up the interrupt queue DMA 1177 */ 1178 cmd.data0 = sc->num_slices; 1179 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1180 if (sc->num_tx_rings > 1) 1181 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1183 if (status != 0) { 1184 if_printf(sc->ifp, "failed to set number of slices\n"); 1185 return status; 1186 } 1187 } 1188 1189 if (interrupts_setup) { 1190 /* Now exchange information about interrupts */ 1191 for (slice = 0; slice < sc->num_slices; slice++) { 1192 ss = &sc->ss[slice]; 1193 1194 rx_done = &ss->rx_data.rx_done; 1195 memset(rx_done->entry, 0, rx_intr_size); 1196 1197 cmd.data0 = 1198 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1199 cmd.data1 = 1200 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1201 cmd.data2 = slice; 1202 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1203 &cmd); 1204 } 1205 } 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1208 &cmd); 1209 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1210 1211 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1212 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1213 1214 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1215 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1216 1217 if (status != 0) { 1218 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1219 return status; 1220 } 1221 1222 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1223 1224 /* Run a DMA benchmark */ 1225 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1226 1227 for (slice = 0; slice < sc->num_slices; slice++) { 1228 ss = &sc->ss[slice]; 1229 1230 ss->irq_claim = irq_claim + (2 * slice); 1231 1232 /* Reset mcp/driver shared state back to 0 */ 1233 ss->rx_data.rx_done.idx = 0; 1234 ss->tx.req = 0; 1235 ss->tx.done = 0; 1236 ss->tx.pkt_done = 0; 1237 ss->tx.queue_active = 0; 1238 ss->tx.activate = 0; 1239 ss->tx.deactivate = 0; 1240 ss->rx_data.rx_big.cnt = 0; 1241 ss->rx_data.rx_small.cnt = 0; 1242 if (ss->fw_stats != NULL) 1243 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1244 } 1245 sc->rdma_tags_available = 15; 1246 1247 status = mxge_update_mac_address(sc); 1248 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1249 mxge_change_pause(sc, sc->pause); 1250 mxge_set_multicast_list(sc); 1251 1252 if (sc->throttle) { 1253 cmd.data0 = sc->throttle; 1254 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1255 if_printf(sc->ifp, "can't enable throttle\n"); 1256 } 1257 return status; 1258 } 1259 1260 static int 1261 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1262 { 1263 mxge_cmd_t cmd; 1264 mxge_softc_t *sc; 1265 int err; 1266 unsigned int throttle; 1267 1268 sc = arg1; 1269 throttle = sc->throttle; 1270 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1271 if (err != 0) 1272 return err; 1273 1274 if (throttle == sc->throttle) 1275 return 0; 1276 1277 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1278 return EINVAL; 1279 1280 ifnet_serialize_all(sc->ifp); 1281 1282 cmd.data0 = throttle; 1283 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1284 if (err == 0) 1285 sc->throttle = throttle; 1286 1287 ifnet_deserialize_all(sc->ifp); 1288 return err; 1289 } 1290 1291 static int 1292 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1293 { 1294 mxge_softc_t *sc; 1295 int err, use_rss; 1296 1297 sc = arg1; 1298 use_rss = sc->use_rss; 1299 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1300 if (err != 0) 1301 return err; 1302 1303 if (use_rss == sc->use_rss) 1304 return 0; 1305 1306 ifnet_serialize_all(sc->ifp); 1307 1308 sc->use_rss = use_rss; 1309 if (sc->ifp->if_flags & IFF_RUNNING) { 1310 mxge_close(sc, 0); 1311 mxge_open(sc); 1312 } 1313 1314 ifnet_deserialize_all(sc->ifp); 1315 return err; 1316 } 1317 1318 static int 1319 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1320 { 1321 mxge_softc_t *sc; 1322 unsigned int intr_coal_delay; 1323 int err; 1324 1325 sc = arg1; 1326 intr_coal_delay = sc->intr_coal_delay; 1327 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1328 if (err != 0) 1329 return err; 1330 1331 if (intr_coal_delay == sc->intr_coal_delay) 1332 return 0; 1333 1334 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1335 return EINVAL; 1336 1337 ifnet_serialize_all(sc->ifp); 1338 1339 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1340 sc->intr_coal_delay = intr_coal_delay; 1341 1342 ifnet_deserialize_all(sc->ifp); 1343 return err; 1344 } 1345 1346 static int 1347 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1348 { 1349 int err; 1350 1351 if (arg1 == NULL) 1352 return EFAULT; 1353 arg2 = be32toh(*(int *)arg1); 1354 arg1 = NULL; 1355 err = sysctl_handle_int(oidp, arg1, arg2, req); 1356 1357 return err; 1358 } 1359 1360 static void 1361 mxge_rem_sysctls(mxge_softc_t *sc) 1362 { 1363 if (sc->ss != NULL) { 1364 struct mxge_slice_state *ss; 1365 int slice; 1366 1367 for (slice = 0; slice < sc->num_slices; slice++) { 1368 ss = &sc->ss[slice]; 1369 if (ss->sysctl_tree != NULL) { 1370 sysctl_ctx_free(&ss->sysctl_ctx); 1371 ss->sysctl_tree = NULL; 1372 } 1373 } 1374 } 1375 1376 if (sc->slice_sysctl_tree != NULL) { 1377 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1378 sc->slice_sysctl_tree = NULL; 1379 } 1380 } 1381 1382 static void 1383 mxge_add_sysctls(mxge_softc_t *sc) 1384 { 1385 struct sysctl_ctx_list *ctx; 1386 struct sysctl_oid_list *children; 1387 mcp_irq_data_t *fw; 1388 struct mxge_slice_state *ss; 1389 int slice; 1390 char slice_num[8]; 1391 1392 ctx = device_get_sysctl_ctx(sc->dev); 1393 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1394 fw = sc->ss[0].fw_stats; 1395 1396 /* 1397 * Random information 1398 */ 1399 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1400 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1401 1402 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1403 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1404 1405 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1406 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1407 1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1409 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1410 1411 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1412 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1413 1414 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1415 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1416 1417 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1418 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1419 1420 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1421 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1422 1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1424 CTLFLAG_RD, &sc->read_write_dma, 0, 1425 "DMA concurrent Read/Write speed in MB/s"); 1426 1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1428 CTLFLAG_RD, &sc->watchdog_resets, 0, 1429 "Number of times NIC was reset"); 1430 1431 if (sc->num_slices > 1) { 1432 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "slice_cpumap", 1433 CTLTYPE_OPAQUE | CTLFLAG_RD, sc->ring_map, 0, 1434 if_ringmap_cpumap_sysctl, "I", "slice CPU map"); 1435 } 1436 1437 /* 1438 * Performance related tunables 1439 */ 1440 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1441 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1442 "Interrupt coalescing delay in usecs"); 1443 1444 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1445 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1446 "Transmit throttling"); 1447 1448 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1449 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1450 "Use RSS"); 1451 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1453 CTLFLAG_RW, &mxge_deassert_wait, 0, 1454 "Wait for IRQ line to go low in ihandler"); 1455 1456 /* 1457 * Stats block from firmware is in network byte order. 1458 * Need to swap it 1459 */ 1460 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1461 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1462 mxge_handle_be32, "I", "link up"); 1463 1464 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1465 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1466 mxge_handle_be32, "I", "rdma_tags_available"); 1467 1468 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1469 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1470 mxge_handle_be32, "I", "dropped_bad_crc32"); 1471 1472 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1473 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1474 mxge_handle_be32, "I", "dropped_bad_phy"); 1475 1476 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1477 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1478 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1479 1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1481 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1482 mxge_handle_be32, "I", "dropped_link_overflow"); 1483 1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1485 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1486 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1487 1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1489 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1490 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1491 1492 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1493 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1494 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1495 1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1497 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1498 mxge_handle_be32, "I", "dropped_overrun"); 1499 1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1501 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1502 mxge_handle_be32, "I", "dropped_pause"); 1503 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1505 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1506 mxge_handle_be32, "I", "dropped_runt"); 1507 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1509 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1510 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1511 1512 /* add counters exported for debugging from all slices */ 1513 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1514 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1515 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1516 if (sc->slice_sysctl_tree == NULL) { 1517 device_printf(sc->dev, "can't add slice sysctl node\n"); 1518 return; 1519 } 1520 1521 for (slice = 0; slice < sc->num_slices; slice++) { 1522 ss = &sc->ss[slice]; 1523 sysctl_ctx_init(&ss->sysctl_ctx); 1524 ctx = &ss->sysctl_ctx; 1525 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1526 ksprintf(slice_num, "%d", slice); 1527 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1528 slice_num, CTLFLAG_RD, 0, ""); 1529 if (ss->sysctl_tree == NULL) { 1530 device_printf(sc->dev, 1531 "can't add %d slice sysctl node\n", slice); 1532 return; /* XXX continue? */ 1533 } 1534 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1535 1536 /* 1537 * XXX change to ULONG 1538 */ 1539 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1541 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1542 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1544 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1545 1546 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1547 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1548 1549 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1550 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1551 1552 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1553 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1554 1555 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1556 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1557 1558 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1559 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1560 1561 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1562 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1563 } 1564 } 1565 1566 /* 1567 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1568 * backwards one at a time and handle ring wraps 1569 */ 1570 static __inline void 1571 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1572 mcp_kreq_ether_send_t *src, int cnt) 1573 { 1574 int idx, starting_slot; 1575 1576 starting_slot = tx->req; 1577 while (cnt > 1) { 1578 cnt--; 1579 idx = (starting_slot + cnt) & tx->mask; 1580 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1581 wmb(); 1582 } 1583 } 1584 1585 /* 1586 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1587 * at most 32 bytes at a time, so as to avoid involving the software 1588 * pio handler in the nic. We re-write the first segment's flags 1589 * to mark them valid only after writing the entire chain 1590 */ 1591 static __inline void 1592 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1593 { 1594 int idx, i; 1595 uint32_t *src_ints; 1596 volatile uint32_t *dst_ints; 1597 mcp_kreq_ether_send_t *srcp; 1598 volatile mcp_kreq_ether_send_t *dstp, *dst; 1599 uint8_t last_flags; 1600 1601 idx = tx->req & tx->mask; 1602 1603 last_flags = src->flags; 1604 src->flags = 0; 1605 wmb(); 1606 dst = dstp = &tx->lanai[idx]; 1607 srcp = src; 1608 1609 if ((idx + cnt) < tx->mask) { 1610 for (i = 0; i < cnt - 1; i += 2) { 1611 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1612 wmb(); /* force write every 32 bytes */ 1613 srcp += 2; 1614 dstp += 2; 1615 } 1616 } else { 1617 /* 1618 * Submit all but the first request, and ensure 1619 * that it is submitted below 1620 */ 1621 mxge_submit_req_backwards(tx, src, cnt); 1622 i = 0; 1623 } 1624 if (i < cnt) { 1625 /* Submit the first request */ 1626 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1627 wmb(); /* barrier before setting valid flag */ 1628 } 1629 1630 /* Re-write the last 32-bits with the valid flags */ 1631 src->flags = last_flags; 1632 src_ints = (uint32_t *)src; 1633 src_ints+=3; 1634 dst_ints = (volatile uint32_t *)dst; 1635 dst_ints+=3; 1636 *dst_ints = *src_ints; 1637 tx->req += cnt; 1638 wmb(); 1639 } 1640 1641 static int 1642 mxge_pullup_tso(struct mbuf **mp) 1643 { 1644 int hoff, iphlen, thoff; 1645 struct mbuf *m; 1646 1647 m = *mp; 1648 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1649 1650 iphlen = m->m_pkthdr.csum_iphlen; 1651 thoff = m->m_pkthdr.csum_thlen; 1652 hoff = m->m_pkthdr.csum_lhlen; 1653 1654 KASSERT(iphlen > 0, ("invalid ip hlen")); 1655 KASSERT(thoff > 0, ("invalid tcp hlen")); 1656 KASSERT(hoff > 0, ("invalid ether hlen")); 1657 1658 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1659 m = m_pullup(m, hoff + iphlen + thoff); 1660 if (m == NULL) { 1661 *mp = NULL; 1662 return ENOBUFS; 1663 } 1664 *mp = m; 1665 } 1666 return 0; 1667 } 1668 1669 static int 1670 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1671 struct mbuf *m, int busdma_seg_cnt) 1672 { 1673 mcp_kreq_ether_send_t *req; 1674 bus_dma_segment_t *seg; 1675 uint32_t low, high_swapped; 1676 int len, seglen, cum_len, cum_len_next; 1677 int next_is_first, chop, cnt, rdma_count, small; 1678 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1679 uint8_t flags, flags_next; 1680 struct mxge_buffer_state *info_last; 1681 bus_dmamap_t map = info_map->map; 1682 1683 mss = m->m_pkthdr.tso_segsz; 1684 1685 /* 1686 * Negative cum_len signifies to the send loop that we are 1687 * still in the header portion of the TSO packet. 1688 */ 1689 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1690 m->m_pkthdr.csum_thlen); 1691 1692 /* 1693 * TSO implies checksum offload on this hardware 1694 */ 1695 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1696 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1697 1698 /* 1699 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1700 * out where to put the checksum by parsing the header. 1701 */ 1702 pseudo_hdr_offset = htobe16(mss); 1703 1704 req = tx->req_list; 1705 seg = tx->seg_list; 1706 cnt = 0; 1707 rdma_count = 0; 1708 1709 /* 1710 * "rdma_count" is the number of RDMAs belonging to the current 1711 * packet BEFORE the current send request. For non-TSO packets, 1712 * this is equal to "count". 1713 * 1714 * For TSO packets, rdma_count needs to be reset to 0 after a 1715 * segment cut. 1716 * 1717 * The rdma_count field of the send request is the number of 1718 * RDMAs of the packet starting at that request. For TSO send 1719 * requests with one ore more cuts in the middle, this is the 1720 * number of RDMAs starting after the last cut in the request. 1721 * All previous segments before the last cut implicitly have 1 1722 * RDMA. 1723 * 1724 * Since the number of RDMAs is not known beforehand, it must be 1725 * filled-in retroactively - after each segmentation cut or at 1726 * the end of the entire packet. 1727 */ 1728 1729 while (busdma_seg_cnt) { 1730 /* 1731 * Break the busdma segment up into pieces 1732 */ 1733 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1734 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1735 len = seg->ds_len; 1736 1737 while (len) { 1738 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1739 seglen = len; 1740 cum_len_next = cum_len + seglen; 1741 (req - rdma_count)->rdma_count = rdma_count + 1; 1742 if (__predict_true(cum_len >= 0)) { 1743 /* Payload */ 1744 chop = (cum_len_next > mss); 1745 cum_len_next = cum_len_next % mss; 1746 next_is_first = (cum_len_next == 0); 1747 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1748 flags_next |= 1749 next_is_first * MXGEFW_FLAGS_FIRST; 1750 rdma_count |= -(chop | next_is_first); 1751 rdma_count += chop & !next_is_first; 1752 } else if (cum_len_next >= 0) { 1753 /* Header ends */ 1754 rdma_count = -1; 1755 cum_len_next = 0; 1756 seglen = -cum_len; 1757 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1758 flags_next = MXGEFW_FLAGS_TSO_PLD | 1759 MXGEFW_FLAGS_FIRST | 1760 (small * MXGEFW_FLAGS_SMALL); 1761 } 1762 1763 req->addr_high = high_swapped; 1764 req->addr_low = htobe32(low); 1765 req->pseudo_hdr_offset = pseudo_hdr_offset; 1766 req->pad = 0; 1767 req->rdma_count = 1; 1768 req->length = htobe16(seglen); 1769 req->cksum_offset = cksum_offset; 1770 req->flags = 1771 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1772 low += seglen; 1773 len -= seglen; 1774 cum_len = cum_len_next; 1775 flags = flags_next; 1776 req++; 1777 cnt++; 1778 rdma_count++; 1779 if (__predict_false(cksum_offset > seglen)) 1780 cksum_offset -= seglen; 1781 else 1782 cksum_offset = 0; 1783 if (__predict_false(cnt > tx->max_desc)) 1784 goto drop; 1785 } 1786 busdma_seg_cnt--; 1787 seg++; 1788 } 1789 (req - rdma_count)->rdma_count = rdma_count; 1790 1791 do { 1792 req--; 1793 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1794 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1795 1796 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1797 1798 info_map->map = info_last->map; 1799 info_last->map = map; 1800 info_last->m = m; 1801 1802 mxge_submit_req(tx, tx->req_list, cnt); 1803 1804 if (tx->send_go != NULL && tx->queue_active == 0) { 1805 /* Tell the NIC to start polling this slice */ 1806 *tx->send_go = 1; 1807 tx->queue_active = 1; 1808 tx->activate++; 1809 wmb(); 1810 } 1811 return 0; 1812 1813 drop: 1814 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1815 m_freem(m); 1816 return ENOBUFS; 1817 } 1818 1819 static int 1820 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1821 { 1822 mcp_kreq_ether_send_t *req; 1823 bus_dma_segment_t *seg; 1824 bus_dmamap_t map; 1825 int cnt, cum_len, err, i, idx, odd_flag; 1826 uint16_t pseudo_hdr_offset; 1827 uint8_t flags, cksum_offset; 1828 struct mxge_buffer_state *info_map, *info_last; 1829 1830 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1831 err = mxge_pullup_tso(&m); 1832 if (__predict_false(err)) 1833 return err; 1834 } 1835 1836 /* 1837 * Map the frame for DMA 1838 */ 1839 idx = tx->req & tx->mask; 1840 info_map = &tx->info[idx]; 1841 map = info_map->map; 1842 1843 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1844 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1845 if (__predict_false(err != 0)) 1846 goto drop; 1847 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1848 1849 /* 1850 * TSO is different enough, we handle it in another routine 1851 */ 1852 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1853 return mxge_encap_tso(tx, info_map, m, cnt); 1854 1855 req = tx->req_list; 1856 cksum_offset = 0; 1857 pseudo_hdr_offset = 0; 1858 flags = MXGEFW_FLAGS_NO_TSO; 1859 1860 /* 1861 * Checksum offloading 1862 */ 1863 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1864 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1865 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1866 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1867 req->cksum_offset = cksum_offset; 1868 flags |= MXGEFW_FLAGS_CKSUM; 1869 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1870 } else { 1871 odd_flag = 0; 1872 } 1873 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1874 flags |= MXGEFW_FLAGS_SMALL; 1875 1876 /* 1877 * Convert segments into a request list 1878 */ 1879 cum_len = 0; 1880 seg = tx->seg_list; 1881 req->flags = MXGEFW_FLAGS_FIRST; 1882 for (i = 0; i < cnt; i++) { 1883 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1884 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1885 req->length = htobe16(seg->ds_len); 1886 req->cksum_offset = cksum_offset; 1887 if (cksum_offset > seg->ds_len) 1888 cksum_offset -= seg->ds_len; 1889 else 1890 cksum_offset = 0; 1891 req->pseudo_hdr_offset = pseudo_hdr_offset; 1892 req->pad = 0; /* complete solid 16-byte block */ 1893 req->rdma_count = 1; 1894 req->flags |= flags | ((cum_len & 1) * odd_flag); 1895 cum_len += seg->ds_len; 1896 seg++; 1897 req++; 1898 req->flags = 0; 1899 } 1900 req--; 1901 1902 /* 1903 * Pad runt to 60 bytes 1904 */ 1905 if (cum_len < 60) { 1906 req++; 1907 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1908 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1909 req->length = htobe16(60 - cum_len); 1910 req->cksum_offset = 0; 1911 req->pseudo_hdr_offset = pseudo_hdr_offset; 1912 req->pad = 0; /* complete solid 16-byte block */ 1913 req->rdma_count = 1; 1914 req->flags |= flags | ((cum_len & 1) * odd_flag); 1915 cnt++; 1916 } 1917 1918 tx->req_list[0].rdma_count = cnt; 1919 #if 0 1920 /* print what the firmware will see */ 1921 for (i = 0; i < cnt; i++) { 1922 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1923 "cso:%d, flags:0x%x, rdma:%d\n", 1924 i, (int)ntohl(tx->req_list[i].addr_high), 1925 (int)ntohl(tx->req_list[i].addr_low), 1926 (int)ntohs(tx->req_list[i].length), 1927 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1928 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1929 tx->req_list[i].rdma_count); 1930 } 1931 kprintf("--------------\n"); 1932 #endif 1933 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1934 1935 info_map->map = info_last->map; 1936 info_last->map = map; 1937 info_last->m = m; 1938 1939 mxge_submit_req(tx, tx->req_list, cnt); 1940 1941 if (tx->send_go != NULL && tx->queue_active == 0) { 1942 /* Tell the NIC to start polling this slice */ 1943 *tx->send_go = 1; 1944 tx->queue_active = 1; 1945 tx->activate++; 1946 wmb(); 1947 } 1948 return 0; 1949 1950 drop: 1951 m_freem(m); 1952 return err; 1953 } 1954 1955 static void 1956 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1957 { 1958 mxge_softc_t *sc = ifp->if_softc; 1959 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1960 bus_addr_t zeropad; 1961 int encap = 0; 1962 1963 KKASSERT(tx->ifsq == ifsq); 1964 ASSERT_SERIALIZED(&tx->tx_serialize); 1965 1966 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1967 return; 1968 1969 zeropad = sc->zeropad_dma.dmem_busaddr; 1970 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1971 struct mbuf *m; 1972 int error; 1973 1974 m = ifsq_dequeue(ifsq); 1975 if (m == NULL) 1976 goto done; 1977 1978 BPF_MTAP(ifp, m); 1979 error = mxge_encap(tx, m, zeropad); 1980 if (!error) 1981 encap = 1; 1982 else 1983 IFNET_STAT_INC(ifp, oerrors, 1); 1984 } 1985 1986 /* Ran out of transmit slots */ 1987 ifsq_set_oactive(ifsq); 1988 done: 1989 if (encap) 1990 tx->watchdog.wd_timer = 5; 1991 } 1992 1993 static void 1994 mxge_watchdog(struct ifaltq_subque *ifsq) 1995 { 1996 struct ifnet *ifp = ifsq_get_ifp(ifsq); 1997 struct mxge_softc *sc = ifp->if_softc; 1998 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1999 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 2000 2001 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2002 2003 /* Check for pause blocking before resetting */ 2004 if (tx->watchdog_rx_pause == rx_pause) { 2005 mxge_warn_stuck(sc, tx, 0); 2006 mxge_watchdog_reset(sc); 2007 return; 2008 } else { 2009 if_printf(ifp, "Flow control blocking xmits, " 2010 "check link partner\n"); 2011 } 2012 tx->watchdog_rx_pause = rx_pause; 2013 } 2014 2015 /* 2016 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2017 * at most 32 bytes at a time, so as to avoid involving the software 2018 * pio handler in the nic. We re-write the first segment's low 2019 * DMA address to mark it valid only after we write the entire chunk 2020 * in a burst 2021 */ 2022 static __inline void 2023 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2024 mcp_kreq_ether_recv_t *src) 2025 { 2026 uint32_t low; 2027 2028 low = src->addr_low; 2029 src->addr_low = 0xffffffff; 2030 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2031 wmb(); 2032 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2033 wmb(); 2034 src->addr_low = low; 2035 dst->addr_low = low; 2036 wmb(); 2037 } 2038 2039 static int 2040 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2041 boolean_t init) 2042 { 2043 bus_dma_segment_t seg; 2044 struct mbuf *m; 2045 int cnt, err, mflag; 2046 2047 mflag = M_NOWAIT; 2048 if (__predict_false(init)) 2049 mflag = M_WAITOK; 2050 2051 m = m_gethdr(mflag, MT_DATA); 2052 if (m == NULL) { 2053 err = ENOBUFS; 2054 if (__predict_false(init)) { 2055 /* 2056 * During initialization, there 2057 * is nothing to setup; bail out 2058 */ 2059 return err; 2060 } 2061 goto done; 2062 } 2063 m->m_len = m->m_pkthdr.len = MHLEN; 2064 2065 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2066 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2067 if (err != 0) { 2068 m_freem(m); 2069 if (__predict_false(init)) { 2070 /* 2071 * During initialization, there 2072 * is nothing to setup; bail out 2073 */ 2074 return err; 2075 } 2076 goto done; 2077 } 2078 2079 rx->info[idx].m = m; 2080 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2081 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2082 2083 done: 2084 if ((idx & 7) == 7) 2085 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2086 return err; 2087 } 2088 2089 static int 2090 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2091 boolean_t init) 2092 { 2093 bus_dma_segment_t seg; 2094 struct mbuf *m; 2095 int cnt, err, mflag; 2096 2097 mflag = M_NOWAIT; 2098 if (__predict_false(init)) 2099 mflag = M_WAITOK; 2100 2101 if (rx->cl_size == MCLBYTES) 2102 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2103 else 2104 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2105 if (m == NULL) { 2106 err = ENOBUFS; 2107 if (__predict_false(init)) { 2108 /* 2109 * During initialization, there 2110 * is nothing to setup; bail out 2111 */ 2112 return err; 2113 } 2114 goto done; 2115 } 2116 m->m_len = m->m_pkthdr.len = rx->cl_size; 2117 2118 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2119 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2120 if (err != 0) { 2121 m_freem(m); 2122 if (__predict_false(init)) { 2123 /* 2124 * During initialization, there 2125 * is nothing to setup; bail out 2126 */ 2127 return err; 2128 } 2129 goto done; 2130 } 2131 2132 rx->info[idx].m = m; 2133 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2134 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2135 2136 done: 2137 if ((idx & 7) == 7) 2138 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2139 return err; 2140 } 2141 2142 /* 2143 * Myri10GE hardware checksums are not valid if the sender 2144 * padded the frame with non-zero padding. This is because 2145 * the firmware just does a simple 16-bit 1s complement 2146 * checksum across the entire frame, excluding the first 14 2147 * bytes. It is best to simply to check the checksum and 2148 * tell the stack about it only if the checksum is good 2149 */ 2150 static __inline uint16_t 2151 mxge_rx_csum(struct mbuf *m, int csum) 2152 { 2153 const struct ether_header *eh; 2154 const struct ip *ip; 2155 uint16_t c; 2156 2157 eh = mtod(m, const struct ether_header *); 2158 2159 /* Only deal with IPv4 TCP & UDP for now */ 2160 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2161 return 1; 2162 2163 ip = (const struct ip *)(eh + 1); 2164 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2165 return 1; 2166 2167 #ifdef INET 2168 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2169 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2170 - (ip->ip_hl << 2) + ip->ip_p)); 2171 #else 2172 c = 1; 2173 #endif 2174 c ^= 0xffff; 2175 return c; 2176 } 2177 2178 static void 2179 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2180 { 2181 struct ether_vlan_header *evl; 2182 uint32_t partial; 2183 2184 evl = mtod(m, struct ether_vlan_header *); 2185 2186 /* 2187 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2188 * what the firmware thought was the end of the ethernet 2189 * header. 2190 */ 2191 2192 /* Put checksum into host byte order */ 2193 *csum = ntohs(*csum); 2194 2195 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2196 *csum += ~partial; 2197 *csum += ((*csum) < ~partial); 2198 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2199 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2200 2201 /* 2202 * Restore checksum to network byte order; 2203 * later consumers expect this 2204 */ 2205 *csum = htons(*csum); 2206 2207 /* save the tag */ 2208 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2209 m->m_flags |= M_VLANTAG; 2210 2211 /* 2212 * Remove the 802.1q header by copying the Ethernet 2213 * addresses over it and adjusting the beginning of 2214 * the data in the mbuf. The encapsulated Ethernet 2215 * type field is already in place. 2216 */ 2217 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2218 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2219 m_adj(m, EVL_ENCAPLEN); 2220 } 2221 2222 2223 static __inline void 2224 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2225 uint32_t len, uint32_t csum) 2226 { 2227 struct mbuf *m; 2228 const struct ether_header *eh; 2229 bus_dmamap_t old_map; 2230 int idx; 2231 2232 idx = rx->cnt & rx->mask; 2233 rx->cnt++; 2234 2235 /* Save a pointer to the received mbuf */ 2236 m = rx->info[idx].m; 2237 2238 /* Try to replace the received mbuf */ 2239 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2240 /* Drop the frame -- the old mbuf is re-cycled */ 2241 IFNET_STAT_INC(ifp, ierrors, 1); 2242 return; 2243 } 2244 2245 /* Unmap the received buffer */ 2246 old_map = rx->info[idx].map; 2247 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2248 bus_dmamap_unload(rx->dmat, old_map); 2249 2250 /* Swap the bus_dmamap_t's */ 2251 rx->info[idx].map = rx->extra_map; 2252 rx->extra_map = old_map; 2253 2254 /* 2255 * mcp implicitly skips 1st 2 bytes so that packet is properly 2256 * aligned 2257 */ 2258 m->m_data += MXGEFW_PAD; 2259 2260 m->m_pkthdr.rcvif = ifp; 2261 m->m_len = m->m_pkthdr.len = len; 2262 2263 IFNET_STAT_INC(ifp, ipackets, 1); 2264 2265 eh = mtod(m, const struct ether_header *); 2266 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2267 mxge_vlan_tag_remove(m, &csum); 2268 2269 /* If the checksum is valid, mark it in the mbuf header */ 2270 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2271 mxge_rx_csum(m, csum) == 0) { 2272 /* Tell the stack that the checksum is good */ 2273 m->m_pkthdr.csum_data = 0xffff; 2274 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2275 CSUM_DATA_VALID; 2276 } 2277 ifp->if_input(ifp, m, NULL, -1); 2278 } 2279 2280 static __inline void 2281 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2282 uint32_t len, uint32_t csum) 2283 { 2284 const struct ether_header *eh; 2285 struct mbuf *m; 2286 bus_dmamap_t old_map; 2287 int idx; 2288 2289 idx = rx->cnt & rx->mask; 2290 rx->cnt++; 2291 2292 /* Save a pointer to the received mbuf */ 2293 m = rx->info[idx].m; 2294 2295 /* Try to replace the received mbuf */ 2296 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2297 /* Drop the frame -- the old mbuf is re-cycled */ 2298 IFNET_STAT_INC(ifp, ierrors, 1); 2299 return; 2300 } 2301 2302 /* Unmap the received buffer */ 2303 old_map = rx->info[idx].map; 2304 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2305 bus_dmamap_unload(rx->dmat, old_map); 2306 2307 /* Swap the bus_dmamap_t's */ 2308 rx->info[idx].map = rx->extra_map; 2309 rx->extra_map = old_map; 2310 2311 /* 2312 * mcp implicitly skips 1st 2 bytes so that packet is properly 2313 * aligned 2314 */ 2315 m->m_data += MXGEFW_PAD; 2316 2317 m->m_pkthdr.rcvif = ifp; 2318 m->m_len = m->m_pkthdr.len = len; 2319 2320 IFNET_STAT_INC(ifp, ipackets, 1); 2321 2322 eh = mtod(m, const struct ether_header *); 2323 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2324 mxge_vlan_tag_remove(m, &csum); 2325 2326 /* If the checksum is valid, mark it in the mbuf header */ 2327 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2328 mxge_rx_csum(m, csum) == 0) { 2329 /* Tell the stack that the checksum is good */ 2330 m->m_pkthdr.csum_data = 0xffff; 2331 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2332 CSUM_DATA_VALID; 2333 } 2334 ifp->if_input(ifp, m, NULL, -1); 2335 } 2336 2337 static __inline void 2338 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2339 { 2340 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2341 2342 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2343 uint16_t length, checksum; 2344 2345 length = ntohs(rx_done->entry[rx_done->idx].length); 2346 rx_done->entry[rx_done->idx].length = 0; 2347 2348 checksum = rx_done->entry[rx_done->idx].checksum; 2349 2350 if (length <= MXGE_RX_SMALL_BUFLEN) { 2351 mxge_rx_done_small(ifp, &rx_data->rx_small, 2352 length, checksum); 2353 } else { 2354 mxge_rx_done_big(ifp, &rx_data->rx_big, 2355 length, checksum); 2356 } 2357 2358 rx_done->idx++; 2359 rx_done->idx &= rx_done->mask; 2360 --cycle; 2361 } 2362 } 2363 2364 static __inline void 2365 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2366 { 2367 ASSERT_SERIALIZED(&tx->tx_serialize); 2368 2369 while (tx->pkt_done != mcp_idx) { 2370 struct mbuf *m; 2371 int idx; 2372 2373 idx = tx->done & tx->mask; 2374 tx->done++; 2375 2376 m = tx->info[idx].m; 2377 /* 2378 * mbuf and DMA map only attached to the first 2379 * segment per-mbuf. 2380 */ 2381 if (m != NULL) { 2382 tx->pkt_done++; 2383 IFNET_STAT_INC(ifp, opackets, 1); 2384 tx->info[idx].m = NULL; 2385 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2386 m_freem(m); 2387 } 2388 } 2389 2390 /* 2391 * If we have space, clear OACTIVE to tell the stack that 2392 * its OK to send packets 2393 */ 2394 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2395 ifsq_clr_oactive(tx->ifsq); 2396 if (tx->req == tx->done) { 2397 /* Reset watchdog */ 2398 tx->watchdog.wd_timer = 0; 2399 } 2400 } 2401 2402 if (!ifsq_is_empty(tx->ifsq)) 2403 ifsq_devstart(tx->ifsq); 2404 2405 if (tx->send_stop != NULL && tx->req == tx->done) { 2406 /* 2407 * Let the NIC stop polling this queue, since there 2408 * are no more transmits pending 2409 */ 2410 *tx->send_stop = 1; 2411 tx->queue_active = 0; 2412 tx->deactivate++; 2413 wmb(); 2414 } 2415 } 2416 2417 static struct mxge_media_type mxge_xfp_media_types[] = { 2418 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2419 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2420 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2421 {IFM_NONE, (1 << 5), "10GBASE-ER"}, 2422 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2423 {IFM_NONE, (1 << 3), "10GBASE-SW"}, 2424 {IFM_NONE, (1 << 2), "10GBASE-LW"}, 2425 {IFM_NONE, (1 << 1), "10GBASE-EW"}, 2426 {IFM_NONE, (1 << 0), "Reserved"} 2427 }; 2428 2429 static struct mxge_media_type mxge_sfp_media_types[] = { 2430 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2431 {IFM_NONE, (1 << 7), "Reserved"}, 2432 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2433 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2434 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2435 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2436 }; 2437 2438 static void 2439 mxge_media_set(mxge_softc_t *sc, int media_type) 2440 { 2441 int fc_opt = 0; 2442 2443 if (media_type == IFM_NONE) 2444 return; 2445 2446 if (sc->pause) 2447 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 2448 2449 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL); 2450 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt); 2451 2452 sc->current_media = media_type; 2453 } 2454 2455 static void 2456 mxge_media_unset(mxge_softc_t *sc) 2457 { 2458 ifmedia_removeall(&sc->media); 2459 sc->current_media = IFM_NONE; 2460 } 2461 2462 static void 2463 mxge_media_init(mxge_softc_t *sc) 2464 { 2465 const char *ptr; 2466 int i; 2467 2468 mxge_media_unset(sc); 2469 2470 /* 2471 * Parse the product code to deterimine the interface type 2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2473 * after the 3rd dash in the driver's cached copy of the 2474 * EEPROM's product code string. 2475 */ 2476 ptr = sc->product_code_string; 2477 if (ptr == NULL) { 2478 if_printf(sc->ifp, "Missing product code\n"); 2479 return; 2480 } 2481 2482 for (i = 0; i < 3; i++, ptr++) { 2483 ptr = strchr(ptr, '-'); 2484 if (ptr == NULL) { 2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2486 return; 2487 } 2488 } 2489 if (*ptr == 'C' || *(ptr +1) == 'C') { 2490 /* -C is CX4 */ 2491 sc->connector = MXGE_CX4; 2492 mxge_media_set(sc, IFM_10G_CX4); 2493 } else if (*ptr == 'Q') { 2494 /* -Q is Quad Ribbon Fiber */ 2495 sc->connector = MXGE_QRF; 2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2497 /* DragonFly has no media type for Quad ribbon fiber */ 2498 } else if (*ptr == 'R') { 2499 /* -R is XFP */ 2500 sc->connector = MXGE_XFP; 2501 /* NOTE: ifmedia will be installed later */ 2502 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2503 /* -S or -2S is SFP+ */ 2504 sc->connector = MXGE_SFP; 2505 /* NOTE: ifmedia will be installed later */ 2506 } else { 2507 sc->connector = MXGE_UNK; 2508 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2509 } 2510 } 2511 2512 /* 2513 * Determine the media type for a NIC. Some XFPs will identify 2514 * themselves only when their link is up, so this is initiated via a 2515 * link up interrupt. However, this can potentially take up to 2516 * several milliseconds, so it is run via the watchdog routine, rather 2517 * than in the interrupt handler itself. 2518 */ 2519 static void 2520 mxge_media_probe(mxge_softc_t *sc) 2521 { 2522 mxge_cmd_t cmd; 2523 const char *cage_type; 2524 struct mxge_media_type *mxge_media_types = NULL; 2525 int i, err, ms, mxge_media_type_entries; 2526 uint32_t byte; 2527 2528 sc->need_media_probe = 0; 2529 2530 if (sc->connector == MXGE_XFP) { 2531 /* -R is XFP */ 2532 mxge_media_types = mxge_xfp_media_types; 2533 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2534 byte = MXGE_XFP_COMPLIANCE_BYTE; 2535 cage_type = "XFP"; 2536 } else if (sc->connector == MXGE_SFP) { 2537 /* -S or -2S is SFP+ */ 2538 mxge_media_types = mxge_sfp_media_types; 2539 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2540 cage_type = "SFP+"; 2541 byte = 3; 2542 } else { 2543 /* nothing to do; media type cannot change */ 2544 return; 2545 } 2546 2547 /* 2548 * At this point we know the NIC has an XFP cage, so now we 2549 * try to determine what is in the cage by using the 2550 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2551 * register. We read just one byte, which may take over 2552 * a millisecond 2553 */ 2554 2555 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 2556 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2557 cmd.data1 = byte; 2558 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2559 if (err != MXGEFW_CMD_OK) { 2560 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2561 if_printf(sc->ifp, "failed to read XFP\n"); 2562 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2563 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2564 else 2565 if_printf(sc->ifp, "I2C read failed, err: %d", err); 2566 mxge_media_unset(sc); 2567 return; 2568 } 2569 2570 /* Now we wait for the data to be cached */ 2571 cmd.data0 = byte; 2572 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2573 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2574 DELAY(1000); 2575 cmd.data0 = byte; 2576 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2577 } 2578 if (err != MXGEFW_CMD_OK) { 2579 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2580 cage_type, err, ms); 2581 mxge_media_unset(sc); 2582 return; 2583 } 2584 2585 if (cmd.data0 == mxge_media_types[0].bitmask) { 2586 if (bootverbose) { 2587 if_printf(sc->ifp, "%s:%s\n", cage_type, 2588 mxge_media_types[0].name); 2589 } 2590 if (sc->current_media != mxge_media_types[0].flag) { 2591 mxge_media_unset(sc); 2592 mxge_media_set(sc, mxge_media_types[0].flag); 2593 } 2594 return; 2595 } 2596 for (i = 1; i < mxge_media_type_entries; i++) { 2597 if (cmd.data0 & mxge_media_types[i].bitmask) { 2598 if (bootverbose) { 2599 if_printf(sc->ifp, "%s:%s\n", cage_type, 2600 mxge_media_types[i].name); 2601 } 2602 2603 if (sc->current_media != mxge_media_types[i].flag) { 2604 mxge_media_unset(sc); 2605 mxge_media_set(sc, mxge_media_types[i].flag); 2606 } 2607 return; 2608 } 2609 } 2610 mxge_media_unset(sc); 2611 if (bootverbose) { 2612 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2613 cmd.data0); 2614 } 2615 } 2616 2617 static void 2618 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2619 { 2620 if (sc->link_state != stats->link_up) { 2621 sc->link_state = stats->link_up; 2622 if (sc->link_state) { 2623 sc->ifp->if_link_state = LINK_STATE_UP; 2624 if_link_state_change(sc->ifp); 2625 if (bootverbose) 2626 if_printf(sc->ifp, "link up\n"); 2627 } else { 2628 sc->ifp->if_link_state = LINK_STATE_DOWN; 2629 if_link_state_change(sc->ifp); 2630 if (bootverbose) 2631 if_printf(sc->ifp, "link down\n"); 2632 } 2633 sc->need_media_probe = 1; 2634 } 2635 2636 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2637 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2638 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2639 sc->rdma_tags_available); 2640 } 2641 2642 if (stats->link_down) { 2643 sc->down_cnt += stats->link_down; 2644 sc->link_state = 0; 2645 sc->ifp->if_link_state = LINK_STATE_DOWN; 2646 if_link_state_change(sc->ifp); 2647 } 2648 } 2649 2650 static void 2651 mxge_serialize_skipmain(struct mxge_softc *sc) 2652 { 2653 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2654 } 2655 2656 static void 2657 mxge_deserialize_skipmain(struct mxge_softc *sc) 2658 { 2659 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2660 } 2661 2662 static void 2663 mxge_legacy(void *arg) 2664 { 2665 struct mxge_slice_state *ss = arg; 2666 mxge_softc_t *sc = ss->sc; 2667 mcp_irq_data_t *stats = ss->fw_stats; 2668 mxge_tx_ring_t *tx = &ss->tx; 2669 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2670 uint32_t send_done_count; 2671 uint8_t valid; 2672 2673 ASSERT_SERIALIZED(&sc->main_serialize); 2674 2675 /* Make sure the DMA has finished */ 2676 if (!stats->valid) 2677 return; 2678 valid = stats->valid; 2679 2680 /* Lower legacy IRQ */ 2681 *sc->irq_deassert = 0; 2682 if (!mxge_deassert_wait) { 2683 /* Don't wait for conf. that irq is low */ 2684 stats->valid = 0; 2685 } 2686 2687 mxge_serialize_skipmain(sc); 2688 2689 /* 2690 * Loop while waiting for legacy irq deassertion 2691 * XXX do we really want to loop? 2692 */ 2693 do { 2694 /* Check for transmit completes and receives */ 2695 send_done_count = be32toh(stats->send_done_count); 2696 while ((send_done_count != tx->pkt_done) || 2697 (rx_done->entry[rx_done->idx].length != 0)) { 2698 if (send_done_count != tx->pkt_done) { 2699 mxge_tx_done(&sc->arpcom.ac_if, tx, 2700 (int)send_done_count); 2701 } 2702 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2703 send_done_count = be32toh(stats->send_done_count); 2704 } 2705 if (mxge_deassert_wait) 2706 wmb(); 2707 } while (*((volatile uint8_t *)&stats->valid)); 2708 2709 mxge_deserialize_skipmain(sc); 2710 2711 /* Fw link & error stats meaningful only on the first slice */ 2712 if (__predict_false(stats->stats_updated)) 2713 mxge_intr_status(sc, stats); 2714 2715 /* Check to see if we have rx token to pass back */ 2716 if (valid & 0x1) 2717 *ss->irq_claim = be32toh(3); 2718 *(ss->irq_claim + 1) = be32toh(3); 2719 } 2720 2721 static void 2722 mxge_msi(void *arg) 2723 { 2724 struct mxge_slice_state *ss = arg; 2725 mxge_softc_t *sc = ss->sc; 2726 mcp_irq_data_t *stats = ss->fw_stats; 2727 mxge_tx_ring_t *tx = &ss->tx; 2728 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2729 uint32_t send_done_count; 2730 uint8_t valid; 2731 #ifndef IFPOLL_ENABLE 2732 const boolean_t polling = FALSE; 2733 #else 2734 boolean_t polling = FALSE; 2735 #endif 2736 2737 ASSERT_SERIALIZED(&sc->main_serialize); 2738 2739 /* Make sure the DMA has finished */ 2740 if (__predict_false(!stats->valid)) 2741 return; 2742 2743 valid = stats->valid; 2744 stats->valid = 0; 2745 2746 #ifdef IFPOLL_ENABLE 2747 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2748 polling = TRUE; 2749 #endif 2750 2751 if (!polling) { 2752 /* Check for receives */ 2753 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2754 if (rx_done->entry[rx_done->idx].length != 0) 2755 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2756 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2757 } 2758 2759 /* 2760 * Check for transmit completes 2761 * 2762 * NOTE: 2763 * Since pkt_done is only changed by mxge_tx_done(), 2764 * which is called only in interrupt handler, the 2765 * check w/o holding tx serializer is MPSAFE. 2766 */ 2767 send_done_count = be32toh(stats->send_done_count); 2768 if (send_done_count != tx->pkt_done) { 2769 lwkt_serialize_enter(&tx->tx_serialize); 2770 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2771 lwkt_serialize_exit(&tx->tx_serialize); 2772 } 2773 2774 if (__predict_false(stats->stats_updated)) 2775 mxge_intr_status(sc, stats); 2776 2777 /* Check to see if we have rx token to pass back */ 2778 if (!polling && (valid & 0x1)) 2779 *ss->irq_claim = be32toh(3); 2780 *(ss->irq_claim + 1) = be32toh(3); 2781 } 2782 2783 static void 2784 mxge_msix_rx(void *arg) 2785 { 2786 struct mxge_slice_state *ss = arg; 2787 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2788 2789 #ifdef IFPOLL_ENABLE 2790 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2791 return; 2792 #endif 2793 2794 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2795 2796 if (rx_done->entry[rx_done->idx].length != 0) 2797 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2798 2799 *ss->irq_claim = be32toh(3); 2800 } 2801 2802 static void 2803 mxge_msix_rxtx(void *arg) 2804 { 2805 struct mxge_slice_state *ss = arg; 2806 mxge_softc_t *sc = ss->sc; 2807 mcp_irq_data_t *stats = ss->fw_stats; 2808 mxge_tx_ring_t *tx = &ss->tx; 2809 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2810 uint32_t send_done_count; 2811 uint8_t valid; 2812 #ifndef IFPOLL_ENABLE 2813 const boolean_t polling = FALSE; 2814 #else 2815 boolean_t polling = FALSE; 2816 #endif 2817 2818 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2819 2820 /* Make sure the DMA has finished */ 2821 if (__predict_false(!stats->valid)) 2822 return; 2823 2824 valid = stats->valid; 2825 stats->valid = 0; 2826 2827 #ifdef IFPOLL_ENABLE 2828 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2829 polling = TRUE; 2830 #endif 2831 2832 /* Check for receives */ 2833 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2834 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2835 2836 /* 2837 * Check for transmit completes 2838 * 2839 * NOTE: 2840 * Since pkt_done is only changed by mxge_tx_done(), 2841 * which is called only in interrupt handler, the 2842 * check w/o holding tx serializer is MPSAFE. 2843 */ 2844 send_done_count = be32toh(stats->send_done_count); 2845 if (send_done_count != tx->pkt_done) { 2846 lwkt_serialize_enter(&tx->tx_serialize); 2847 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2848 lwkt_serialize_exit(&tx->tx_serialize); 2849 } 2850 2851 /* Check to see if we have rx token to pass back */ 2852 if (!polling && (valid & 0x1)) 2853 *ss->irq_claim = be32toh(3); 2854 *(ss->irq_claim + 1) = be32toh(3); 2855 } 2856 2857 static void 2858 mxge_init(void *arg) 2859 { 2860 struct mxge_softc *sc = arg; 2861 2862 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2863 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2864 mxge_open(sc); 2865 } 2866 2867 static void 2868 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2869 { 2870 int i; 2871 2872 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2873 if (ss->rx_data.rx_big.info[i].m == NULL) 2874 continue; 2875 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2876 ss->rx_data.rx_big.info[i].map); 2877 m_freem(ss->rx_data.rx_big.info[i].m); 2878 ss->rx_data.rx_big.info[i].m = NULL; 2879 } 2880 2881 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2882 if (ss->rx_data.rx_small.info[i].m == NULL) 2883 continue; 2884 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2885 ss->rx_data.rx_small.info[i].map); 2886 m_freem(ss->rx_data.rx_small.info[i].m); 2887 ss->rx_data.rx_small.info[i].m = NULL; 2888 } 2889 2890 /* Transmit ring used only on the first slice */ 2891 if (ss->tx.info == NULL) 2892 return; 2893 2894 for (i = 0; i <= ss->tx.mask; i++) { 2895 if (ss->tx.info[i].m == NULL) 2896 continue; 2897 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2898 m_freem(ss->tx.info[i].m); 2899 ss->tx.info[i].m = NULL; 2900 } 2901 } 2902 2903 static void 2904 mxge_free_mbufs(mxge_softc_t *sc) 2905 { 2906 int slice; 2907 2908 for (slice = 0; slice < sc->num_slices; slice++) 2909 mxge_free_slice_mbufs(&sc->ss[slice]); 2910 } 2911 2912 static void 2913 mxge_free_slice_rings(struct mxge_slice_state *ss) 2914 { 2915 int i; 2916 2917 if (ss->rx_data.rx_done.entry != NULL) { 2918 mxge_dma_free(&ss->rx_done_dma); 2919 ss->rx_data.rx_done.entry = NULL; 2920 } 2921 2922 if (ss->tx.req_list != NULL) { 2923 kfree(ss->tx.req_list, M_DEVBUF); 2924 ss->tx.req_list = NULL; 2925 } 2926 2927 if (ss->tx.seg_list != NULL) { 2928 kfree(ss->tx.seg_list, M_DEVBUF); 2929 ss->tx.seg_list = NULL; 2930 } 2931 2932 if (ss->rx_data.rx_small.shadow != NULL) { 2933 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2934 ss->rx_data.rx_small.shadow = NULL; 2935 } 2936 2937 if (ss->rx_data.rx_big.shadow != NULL) { 2938 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2939 ss->rx_data.rx_big.shadow = NULL; 2940 } 2941 2942 if (ss->tx.info != NULL) { 2943 if (ss->tx.dmat != NULL) { 2944 for (i = 0; i <= ss->tx.mask; i++) { 2945 bus_dmamap_destroy(ss->tx.dmat, 2946 ss->tx.info[i].map); 2947 } 2948 bus_dma_tag_destroy(ss->tx.dmat); 2949 } 2950 kfree(ss->tx.info, M_DEVBUF); 2951 ss->tx.info = NULL; 2952 } 2953 2954 if (ss->rx_data.rx_small.info != NULL) { 2955 if (ss->rx_data.rx_small.dmat != NULL) { 2956 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2957 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2958 ss->rx_data.rx_small.info[i].map); 2959 } 2960 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2961 ss->rx_data.rx_small.extra_map); 2962 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2963 } 2964 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2965 ss->rx_data.rx_small.info = NULL; 2966 } 2967 2968 if (ss->rx_data.rx_big.info != NULL) { 2969 if (ss->rx_data.rx_big.dmat != NULL) { 2970 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2971 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2972 ss->rx_data.rx_big.info[i].map); 2973 } 2974 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2975 ss->rx_data.rx_big.extra_map); 2976 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2977 } 2978 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2979 ss->rx_data.rx_big.info = NULL; 2980 } 2981 } 2982 2983 static void 2984 mxge_free_rings(mxge_softc_t *sc) 2985 { 2986 int slice; 2987 2988 if (sc->ss == NULL) 2989 return; 2990 2991 for (slice = 0; slice < sc->num_slices; slice++) 2992 mxge_free_slice_rings(&sc->ss[slice]); 2993 } 2994 2995 static int 2996 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2997 int tx_ring_entries) 2998 { 2999 mxge_softc_t *sc = ss->sc; 3000 size_t bytes; 3001 int err, i; 3002 3003 /* 3004 * Allocate per-slice receive resources 3005 */ 3006 3007 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 3008 rx_ring_entries - 1; 3009 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3010 3011 /* Allocate the rx shadow rings */ 3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3013 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3014 3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3016 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3017 3018 /* Allocate the rx host info rings */ 3019 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3020 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3021 3022 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3023 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3024 3025 /* Allocate the rx busdma resources */ 3026 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3027 1, /* alignment */ 3028 4096, /* boundary */ 3029 BUS_SPACE_MAXADDR, /* low */ 3030 BUS_SPACE_MAXADDR, /* high */ 3031 NULL, NULL, /* filter */ 3032 MHLEN, /* maxsize */ 3033 1, /* num segs */ 3034 MHLEN, /* maxsegsize */ 3035 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3036 /* flags */ 3037 &ss->rx_data.rx_small.dmat); /* tag */ 3038 if (err != 0) { 3039 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3040 err); 3041 return err; 3042 } 3043 3044 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3045 &ss->rx_data.rx_small.extra_map); 3046 if (err != 0) { 3047 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3048 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3049 ss->rx_data.rx_small.dmat = NULL; 3050 return err; 3051 } 3052 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3053 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3054 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3055 if (err != 0) { 3056 int j; 3057 3058 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3059 3060 for (j = 0; j < i; ++j) { 3061 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3062 ss->rx_data.rx_small.info[j].map); 3063 } 3064 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3065 ss->rx_data.rx_small.extra_map); 3066 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3067 ss->rx_data.rx_small.dmat = NULL; 3068 return err; 3069 } 3070 } 3071 3072 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3073 1, /* alignment */ 3074 4096, /* boundary */ 3075 BUS_SPACE_MAXADDR, /* low */ 3076 BUS_SPACE_MAXADDR, /* high */ 3077 NULL, NULL, /* filter */ 3078 4096, /* maxsize */ 3079 1, /* num segs */ 3080 4096, /* maxsegsize*/ 3081 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3082 /* flags */ 3083 &ss->rx_data.rx_big.dmat); /* tag */ 3084 if (err != 0) { 3085 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3086 err); 3087 return err; 3088 } 3089 3090 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3091 &ss->rx_data.rx_big.extra_map); 3092 if (err != 0) { 3093 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3094 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3095 ss->rx_data.rx_big.dmat = NULL; 3096 return err; 3097 } 3098 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3099 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3100 &ss->rx_data.rx_big.info[i].map); 3101 if (err != 0) { 3102 int j; 3103 3104 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3105 for (j = 0; j < i; ++j) { 3106 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3107 ss->rx_data.rx_big.info[j].map); 3108 } 3109 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3110 ss->rx_data.rx_big.extra_map); 3111 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3112 ss->rx_data.rx_big.dmat = NULL; 3113 return err; 3114 } 3115 } 3116 3117 /* 3118 * Now allocate TX resources 3119 */ 3120 3121 ss->tx.mask = tx_ring_entries - 1; 3122 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3123 3124 /* 3125 * Allocate the tx request copy block; MUST be at least 8 bytes 3126 * aligned 3127 */ 3128 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3129 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3130 M_DEVBUF, M_WAITOK); 3131 3132 /* Allocate the tx busdma segment list */ 3133 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3134 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3135 3136 /* Allocate the tx host info ring */ 3137 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3138 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3139 3140 /* Allocate the tx busdma resources */ 3141 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3142 1, /* alignment */ 3143 sc->tx_boundary, /* boundary */ 3144 BUS_SPACE_MAXADDR, /* low */ 3145 BUS_SPACE_MAXADDR, /* high */ 3146 NULL, NULL, /* filter */ 3147 IP_MAXPACKET + 3148 sizeof(struct ether_vlan_header), 3149 /* maxsize */ 3150 ss->tx.max_desc - 2, /* num segs */ 3151 sc->tx_boundary, /* maxsegsz */ 3152 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3153 BUS_DMA_ONEBPAGE, /* flags */ 3154 &ss->tx.dmat); /* tag */ 3155 if (err != 0) { 3156 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3157 return err; 3158 } 3159 3160 /* 3161 * Now use these tags to setup DMA maps for each slot in the ring 3162 */ 3163 for (i = 0; i <= ss->tx.mask; i++) { 3164 err = bus_dmamap_create(ss->tx.dmat, 3165 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3166 if (err != 0) { 3167 int j; 3168 3169 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3170 for (j = 0; j < i; ++j) { 3171 bus_dmamap_destroy(ss->tx.dmat, 3172 ss->tx.info[j].map); 3173 } 3174 bus_dma_tag_destroy(ss->tx.dmat); 3175 ss->tx.dmat = NULL; 3176 return err; 3177 } 3178 } 3179 return 0; 3180 } 3181 3182 static int 3183 mxge_alloc_rings(mxge_softc_t *sc) 3184 { 3185 mxge_cmd_t cmd; 3186 int tx_ring_size; 3187 int tx_ring_entries, rx_ring_entries; 3188 int err, slice; 3189 3190 /* Get ring sizes */ 3191 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3192 if (err != 0) { 3193 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3194 return err; 3195 } 3196 tx_ring_size = cmd.data0; 3197 3198 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3199 rx_ring_entries = sc->rx_intr_slots / 2; 3200 3201 if (bootverbose) { 3202 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3203 tx_ring_entries, rx_ring_entries); 3204 } 3205 3206 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices; 3207 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters; 3208 3209 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3210 ifq_set_ready(&sc->ifp->if_snd); 3211 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3212 3213 if (sc->num_tx_rings > 1) { 3214 sc->ifp->if_mapsubq = ifq_mapsubq_modulo; 3215 ifq_set_subq_divisor(&sc->ifp->if_snd, sc->num_tx_rings); 3216 } 3217 3218 for (slice = 0; slice < sc->num_slices; slice++) { 3219 err = mxge_alloc_slice_rings(&sc->ss[slice], 3220 rx_ring_entries, tx_ring_entries); 3221 if (err != 0) { 3222 device_printf(sc->dev, 3223 "alloc %d slice rings failed\n", slice); 3224 return err; 3225 } 3226 } 3227 return 0; 3228 } 3229 3230 static void 3231 mxge_choose_params(int mtu, int *cl_size) 3232 { 3233 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3234 3235 if (bufsize < MCLBYTES) { 3236 *cl_size = MCLBYTES; 3237 } else { 3238 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3239 *cl_size = MJUMPAGESIZE; 3240 } 3241 } 3242 3243 static int 3244 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3245 { 3246 mxge_cmd_t cmd; 3247 int err, i, slice; 3248 3249 slice = ss - ss->sc->ss; 3250 3251 /* 3252 * Get the lanai pointers to the send and receive rings 3253 */ 3254 err = 0; 3255 3256 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */ 3257 if (ss->sc->num_tx_rings == 1) { 3258 if (slice == 0) { 3259 cmd.data0 = slice; 3260 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3261 &cmd); 3262 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3263 (ss->sc->sram + cmd.data0); 3264 /* Leave send_go and send_stop as NULL */ 3265 } 3266 } else { 3267 cmd.data0 = slice; 3268 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3269 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3270 (ss->sc->sram + cmd.data0); 3271 ss->tx.send_go = (volatile uint32_t *) 3272 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3273 ss->tx.send_stop = (volatile uint32_t *) 3274 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3275 } 3276 3277 cmd.data0 = slice; 3278 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3279 ss->rx_data.rx_small.lanai = 3280 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3281 3282 cmd.data0 = slice; 3283 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3284 ss->rx_data.rx_big.lanai = 3285 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3286 3287 if (err != 0) { 3288 if_printf(ss->sc->ifp, 3289 "failed to get ring sizes or locations\n"); 3290 return EIO; 3291 } 3292 3293 /* 3294 * Stock small receive ring 3295 */ 3296 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3297 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3298 ss->rx_data.rx_small.info[i].map, i, TRUE); 3299 if (err) { 3300 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3301 ss->rx_data.rx_small.mask + 1); 3302 return ENOMEM; 3303 } 3304 } 3305 3306 /* 3307 * Stock big receive ring 3308 */ 3309 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3310 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3311 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3312 } 3313 3314 ss->rx_data.rx_big.cl_size = cl_size; 3315 3316 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3317 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3318 ss->rx_data.rx_big.info[i].map, i, TRUE); 3319 if (err) { 3320 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3321 ss->rx_data.rx_big.mask + 1); 3322 return ENOMEM; 3323 } 3324 } 3325 return 0; 3326 } 3327 3328 static int 3329 mxge_open(mxge_softc_t *sc) 3330 { 3331 struct ifnet *ifp = sc->ifp; 3332 mxge_cmd_t cmd; 3333 int err, slice, cl_size, i; 3334 bus_addr_t bus; 3335 volatile uint8_t *itable; 3336 struct mxge_slice_state *ss; 3337 3338 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3339 3340 /* Copy the MAC address in case it was overridden */ 3341 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3342 3343 err = mxge_reset(sc, 1); 3344 if (err != 0) { 3345 if_printf(ifp, "failed to reset\n"); 3346 return EIO; 3347 } 3348 3349 if (sc->num_slices > 1) { 3350 /* 3351 * Setup the indirect table. 3352 */ 3353 if_ringmap_rdrtable(sc->ring_map, sc->rdr_table, NETISR_CPUMAX); 3354 3355 cmd.data0 = NETISR_CPUMAX; 3356 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3357 3358 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3359 if (err != 0) { 3360 if_printf(ifp, "failed to setup rss tables\n"); 3361 return err; 3362 } 3363 3364 itable = sc->sram + cmd.data0; 3365 for (i = 0; i < NETISR_CPUMAX; i++) 3366 itable[i] = sc->rdr_table[i]; 3367 3368 if (sc->use_rss) { 3369 volatile uint8_t *hwkey; 3370 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3371 3372 /* 3373 * Setup Toeplitz key. 3374 */ 3375 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3376 &cmd); 3377 if (err != 0) { 3378 if_printf(ifp, "failed to get rsskey\n"); 3379 return err; 3380 } 3381 hwkey = sc->sram + cmd.data0; 3382 3383 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3384 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3385 hwkey[i] = swkey[i]; 3386 wmb(); 3387 3388 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3389 &cmd); 3390 if (err != 0) { 3391 if_printf(ifp, "failed to update rsskey\n"); 3392 return err; 3393 } 3394 if (bootverbose) 3395 if_printf(ifp, "RSS key updated\n"); 3396 } 3397 3398 cmd.data0 = 1; 3399 if (sc->use_rss) { 3400 if (bootverbose) 3401 if_printf(ifp, "input hash: RSS\n"); 3402 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3403 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3404 } else { 3405 if (bootverbose) 3406 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3407 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3408 } 3409 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3410 if (err != 0) { 3411 if_printf(ifp, "failed to enable slices\n"); 3412 return err; 3413 } 3414 } 3415 3416 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3417 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3418 if (err) { 3419 /* 3420 * Can't change TSO mode to NDIS, never allow TSO then 3421 */ 3422 if_printf(ifp, "failed to set TSO mode\n"); 3423 ifp->if_capenable &= ~IFCAP_TSO; 3424 ifp->if_capabilities &= ~IFCAP_TSO; 3425 ifp->if_hwassist &= ~CSUM_TSO; 3426 } 3427 3428 mxge_choose_params(ifp->if_mtu, &cl_size); 3429 3430 cmd.data0 = 1; 3431 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3432 /* 3433 * Error is only meaningful if we're trying to set 3434 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3435 */ 3436 3437 /* 3438 * Give the firmware the mtu and the big and small buffer 3439 * sizes. The firmware wants the big buf size to be a power 3440 * of two. Luckily, DragonFly's clusters are powers of two 3441 */ 3442 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3443 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3444 3445 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3446 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3447 3448 cmd.data0 = cl_size; 3449 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3450 3451 if (err != 0) { 3452 if_printf(ifp, "failed to setup params\n"); 3453 goto abort; 3454 } 3455 3456 /* Now give him the pointer to the stats block */ 3457 for (slice = 0; slice < sc->num_slices; slice++) { 3458 ss = &sc->ss[slice]; 3459 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3460 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3461 cmd.data2 = sizeof(struct mcp_irq_data); 3462 cmd.data2 |= (slice << 16); 3463 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3464 } 3465 3466 if (err != 0) { 3467 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3468 bus += offsetof(struct mcp_irq_data, send_done_count); 3469 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3470 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3471 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3472 &cmd); 3473 3474 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3475 sc->fw_multicast_support = 0; 3476 } else { 3477 sc->fw_multicast_support = 1; 3478 } 3479 3480 if (err != 0) { 3481 if_printf(ifp, "failed to setup params\n"); 3482 goto abort; 3483 } 3484 3485 for (slice = 0; slice < sc->num_slices; slice++) { 3486 err = mxge_slice_open(&sc->ss[slice], cl_size); 3487 if (err != 0) { 3488 if_printf(ifp, "couldn't open slice %d\n", slice); 3489 goto abort; 3490 } 3491 } 3492 3493 /* Finally, start the firmware running */ 3494 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3495 if (err) { 3496 if_printf(ifp, "Couldn't bring up link\n"); 3497 goto abort; 3498 } 3499 3500 ifp->if_flags |= IFF_RUNNING; 3501 for (i = 0; i < sc->num_tx_rings; ++i) { 3502 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3503 3504 ifsq_clr_oactive(tx->ifsq); 3505 ifsq_watchdog_start(&tx->watchdog); 3506 } 3507 3508 return 0; 3509 3510 abort: 3511 mxge_free_mbufs(sc); 3512 return err; 3513 } 3514 3515 static void 3516 mxge_close(mxge_softc_t *sc, int down) 3517 { 3518 struct ifnet *ifp = sc->ifp; 3519 mxge_cmd_t cmd; 3520 int err, old_down_cnt, i; 3521 3522 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3523 3524 if (!down) { 3525 old_down_cnt = sc->down_cnt; 3526 wmb(); 3527 3528 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3529 if (err) 3530 if_printf(ifp, "Couldn't bring down link\n"); 3531 3532 if (old_down_cnt == sc->down_cnt) { 3533 /* 3534 * Wait for down irq 3535 * XXX racy 3536 */ 3537 ifnet_deserialize_all(ifp); 3538 DELAY(10 * sc->intr_coal_delay); 3539 ifnet_serialize_all(ifp); 3540 } 3541 3542 wmb(); 3543 if (old_down_cnt == sc->down_cnt) 3544 if_printf(ifp, "never got down irq\n"); 3545 } 3546 mxge_free_mbufs(sc); 3547 3548 ifp->if_flags &= ~IFF_RUNNING; 3549 for (i = 0; i < sc->num_tx_rings; ++i) { 3550 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3551 3552 ifsq_clr_oactive(tx->ifsq); 3553 ifsq_watchdog_stop(&tx->watchdog); 3554 } 3555 } 3556 3557 static void 3558 mxge_setup_cfg_space(mxge_softc_t *sc) 3559 { 3560 device_t dev = sc->dev; 3561 int reg; 3562 uint16_t lnk, pectl; 3563 3564 /* Find the PCIe link width and set max read request to 4KB */ 3565 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3566 lnk = pci_read_config(dev, reg + 0x12, 2); 3567 sc->link_width = (lnk >> 4) & 0x3f; 3568 3569 if (sc->pectl == 0) { 3570 pectl = pci_read_config(dev, reg + 0x8, 2); 3571 pectl = (pectl & ~0x7000) | (5 << 12); 3572 pci_write_config(dev, reg + 0x8, pectl, 2); 3573 sc->pectl = pectl; 3574 } else { 3575 /* Restore saved pectl after watchdog reset */ 3576 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3577 } 3578 } 3579 3580 /* Enable DMA and memory space access */ 3581 pci_enable_busmaster(dev); 3582 } 3583 3584 static uint32_t 3585 mxge_read_reboot(mxge_softc_t *sc) 3586 { 3587 device_t dev = sc->dev; 3588 uint32_t vs; 3589 3590 /* Find the vendor specific offset */ 3591 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3592 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3593 return (uint32_t)-1; 3594 } 3595 /* Enable read32 mode */ 3596 pci_write_config(dev, vs + 0x10, 0x3, 1); 3597 /* Tell NIC which register to read */ 3598 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3599 return pci_read_config(dev, vs + 0x14, 4); 3600 } 3601 3602 static void 3603 mxge_watchdog_reset(mxge_softc_t *sc) 3604 { 3605 struct pci_devinfo *dinfo; 3606 int err, running; 3607 uint32_t reboot; 3608 uint16_t cmd; 3609 3610 err = ENXIO; 3611 3612 if_printf(sc->ifp, "Watchdog reset!\n"); 3613 3614 /* 3615 * Check to see if the NIC rebooted. If it did, then all of 3616 * PCI config space has been reset, and things like the 3617 * busmaster bit will be zero. If this is the case, then we 3618 * must restore PCI config space before the NIC can be used 3619 * again 3620 */ 3621 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3622 if (cmd == 0xffff) { 3623 /* 3624 * Maybe the watchdog caught the NIC rebooting; wait 3625 * up to 100ms for it to finish. If it does not come 3626 * back, then give up 3627 */ 3628 DELAY(1000*100); 3629 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3630 if (cmd == 0xffff) 3631 if_printf(sc->ifp, "NIC disappeared!\n"); 3632 } 3633 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3634 /* Print the reboot status */ 3635 reboot = mxge_read_reboot(sc); 3636 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3637 3638 running = sc->ifp->if_flags & IFF_RUNNING; 3639 if (running) { 3640 /* 3641 * Quiesce NIC so that TX routines will not try to 3642 * xmit after restoration of BAR 3643 */ 3644 3645 /* Mark the link as down */ 3646 if (sc->link_state) { 3647 sc->ifp->if_link_state = LINK_STATE_DOWN; 3648 if_link_state_change(sc->ifp); 3649 } 3650 mxge_close(sc, 1); 3651 } 3652 /* Restore PCI configuration space */ 3653 dinfo = device_get_ivars(sc->dev); 3654 pci_cfg_restore(sc->dev, dinfo); 3655 3656 /* And redo any changes we made to our config space */ 3657 mxge_setup_cfg_space(sc); 3658 3659 /* Reload f/w */ 3660 err = mxge_load_firmware(sc, 0); 3661 if (err) 3662 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3663 if (running && !err) { 3664 int i; 3665 3666 err = mxge_open(sc); 3667 3668 for (i = 0; i < sc->num_tx_rings; ++i) 3669 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3670 } 3671 sc->watchdog_resets++; 3672 } else { 3673 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3674 err = 0; 3675 } 3676 if (err) { 3677 if_printf(sc->ifp, "watchdog reset failed\n"); 3678 } else { 3679 if (sc->dying == 2) 3680 sc->dying = 0; 3681 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3682 } 3683 } 3684 3685 static void 3686 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3687 { 3688 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3689 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3690 tx->req, tx->done, tx->queue_active); 3691 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3692 tx->activate, tx->deactivate); 3693 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3694 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3695 } 3696 3697 static u_long 3698 mxge_update_stats(mxge_softc_t *sc) 3699 { 3700 u_long ipackets, opackets, pkts; 3701 3702 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3703 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3704 3705 pkts = ipackets - sc->ipackets; 3706 pkts += opackets - sc->opackets; 3707 3708 sc->ipackets = ipackets; 3709 sc->opackets = opackets; 3710 3711 return pkts; 3712 } 3713 3714 static void 3715 mxge_tick(void *arg) 3716 { 3717 mxge_softc_t *sc = arg; 3718 u_long pkts = 0; 3719 int err = 0; 3720 int ticks; 3721 3722 lwkt_serialize_enter(&sc->main_serialize); 3723 3724 ticks = mxge_ticks; 3725 if (sc->ifp->if_flags & IFF_RUNNING) { 3726 /* Aggregate stats from different slices */ 3727 pkts = mxge_update_stats(sc); 3728 if (sc->need_media_probe) 3729 mxge_media_probe(sc); 3730 } 3731 if (pkts == 0) { 3732 uint16_t cmd; 3733 3734 /* Ensure NIC did not suffer h/w fault while idle */ 3735 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3736 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3737 sc->dying = 2; 3738 mxge_serialize_skipmain(sc); 3739 mxge_watchdog_reset(sc); 3740 mxge_deserialize_skipmain(sc); 3741 err = ENXIO; 3742 } 3743 3744 /* Look less often if NIC is idle */ 3745 ticks *= 4; 3746 } 3747 3748 if (err == 0) 3749 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3750 3751 lwkt_serialize_exit(&sc->main_serialize); 3752 } 3753 3754 static int 3755 mxge_media_change(struct ifnet *ifp) 3756 { 3757 mxge_softc_t *sc = ifp->if_softc; 3758 const struct ifmedia *ifm = &sc->media; 3759 int pause; 3760 3761 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { 3762 if (sc->pause) 3763 return 0; 3764 pause = 1; 3765 } else { 3766 if (!sc->pause) 3767 return 0; 3768 pause = 0; 3769 } 3770 return mxge_change_pause(sc, pause); 3771 } 3772 3773 static int 3774 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3775 { 3776 struct ifnet *ifp = sc->ifp; 3777 int real_mtu, old_mtu; 3778 int err = 0; 3779 3780 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3781 if (mtu > sc->max_mtu || real_mtu < 60) 3782 return EINVAL; 3783 3784 old_mtu = ifp->if_mtu; 3785 ifp->if_mtu = mtu; 3786 if (ifp->if_flags & IFF_RUNNING) { 3787 mxge_close(sc, 0); 3788 err = mxge_open(sc); 3789 if (err != 0) { 3790 ifp->if_mtu = old_mtu; 3791 mxge_close(sc, 0); 3792 mxge_open(sc); 3793 } 3794 } 3795 return err; 3796 } 3797 3798 static void 3799 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3800 { 3801 mxge_softc_t *sc = ifp->if_softc; 3802 3803 ifmr->ifm_status = IFM_AVALID; 3804 ifmr->ifm_active = IFM_ETHER; 3805 3806 if (sc->link_state) 3807 ifmr->ifm_status |= IFM_ACTIVE; 3808 3809 /* 3810 * Autoselect is not supported, so the current media 3811 * should be delivered. 3812 */ 3813 ifmr->ifm_active |= sc->current_media; 3814 if (sc->current_media != IFM_NONE) { 3815 ifmr->ifm_active |= MXGE_IFM; 3816 if (sc->pause) 3817 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 3818 } 3819 } 3820 3821 static int 3822 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3823 struct ucred *cr __unused) 3824 { 3825 mxge_softc_t *sc = ifp->if_softc; 3826 struct ifreq *ifr = (struct ifreq *)data; 3827 int err, mask; 3828 3829 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3830 err = 0; 3831 3832 switch (command) { 3833 case SIOCSIFMTU: 3834 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3835 break; 3836 3837 case SIOCSIFFLAGS: 3838 if (sc->dying) 3839 return EINVAL; 3840 3841 if (ifp->if_flags & IFF_UP) { 3842 if (!(ifp->if_flags & IFF_RUNNING)) { 3843 err = mxge_open(sc); 3844 } else { 3845 /* 3846 * Take care of PROMISC and ALLMULTI 3847 * flag changes 3848 */ 3849 mxge_change_promisc(sc, 3850 ifp->if_flags & IFF_PROMISC); 3851 mxge_set_multicast_list(sc); 3852 } 3853 } else { 3854 if (ifp->if_flags & IFF_RUNNING) 3855 mxge_close(sc, 0); 3856 } 3857 break; 3858 3859 case SIOCADDMULTI: 3860 case SIOCDELMULTI: 3861 mxge_set_multicast_list(sc); 3862 break; 3863 3864 case SIOCSIFCAP: 3865 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3866 if (mask & IFCAP_TXCSUM) { 3867 ifp->if_capenable ^= IFCAP_TXCSUM; 3868 if (ifp->if_capenable & IFCAP_TXCSUM) 3869 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3870 else 3871 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3872 } 3873 if (mask & IFCAP_TSO) { 3874 ifp->if_capenable ^= IFCAP_TSO; 3875 if (ifp->if_capenable & IFCAP_TSO) 3876 ifp->if_hwassist |= CSUM_TSO; 3877 else 3878 ifp->if_hwassist &= ~CSUM_TSO; 3879 } 3880 if (mask & IFCAP_RXCSUM) 3881 ifp->if_capenable ^= IFCAP_RXCSUM; 3882 if (mask & IFCAP_VLAN_HWTAGGING) 3883 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3884 break; 3885 3886 case SIOCGIFMEDIA: 3887 case SIOCSIFMEDIA: 3888 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3889 &sc->media, command); 3890 break; 3891 3892 default: 3893 err = ether_ioctl(ifp, command, data); 3894 break; 3895 } 3896 return err; 3897 } 3898 3899 static void 3900 mxge_fetch_tunables(mxge_softc_t *sc) 3901 { 3902 int ifm; 3903 3904 sc->intr_coal_delay = mxge_intr_coal_delay; 3905 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3906 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3907 3908 /* XXX */ 3909 if (mxge_ticks == 0) 3910 mxge_ticks = hz / 2; 3911 3912 ifm = ifmedia_str2ethfc(mxge_flowctrl); 3913 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) 3914 sc->pause = 1; 3915 3916 sc->use_rss = mxge_use_rss; 3917 3918 sc->throttle = mxge_throttle; 3919 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3920 sc->throttle = MXGE_MAX_THROTTLE; 3921 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3922 sc->throttle = MXGE_MIN_THROTTLE; 3923 } 3924 3925 static void 3926 mxge_free_slices(mxge_softc_t *sc) 3927 { 3928 struct mxge_slice_state *ss; 3929 int i; 3930 3931 if (sc->ss == NULL) 3932 return; 3933 3934 for (i = 0; i < sc->num_slices; i++) { 3935 ss = &sc->ss[i]; 3936 if (ss->fw_stats != NULL) { 3937 mxge_dma_free(&ss->fw_stats_dma); 3938 ss->fw_stats = NULL; 3939 } 3940 if (ss->rx_data.rx_done.entry != NULL) { 3941 mxge_dma_free(&ss->rx_done_dma); 3942 ss->rx_data.rx_done.entry = NULL; 3943 } 3944 } 3945 kfree(sc->ss, M_DEVBUF); 3946 sc->ss = NULL; 3947 } 3948 3949 static int 3950 mxge_alloc_slices(mxge_softc_t *sc) 3951 { 3952 mxge_cmd_t cmd; 3953 struct mxge_slice_state *ss; 3954 size_t bytes; 3955 int err, i, rx_ring_size; 3956 3957 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3958 if (err != 0) { 3959 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3960 return err; 3961 } 3962 rx_ring_size = cmd.data0; 3963 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3964 3965 bytes = sizeof(*sc->ss) * sc->num_slices; 3966 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3967 3968 for (i = 0; i < sc->num_slices; i++) { 3969 ss = &sc->ss[i]; 3970 3971 ss->sc = sc; 3972 3973 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3974 lwkt_serialize_init(&ss->tx.tx_serialize); 3975 ss->intr_rid = -1; 3976 3977 /* 3978 * Allocate per-slice rx interrupt queue 3979 * XXX assume 4bytes mcp_slot 3980 */ 3981 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3982 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3983 if (err != 0) { 3984 device_printf(sc->dev, 3985 "alloc %d slice rx_done failed\n", i); 3986 return err; 3987 } 3988 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3989 3990 /* 3991 * Allocate the per-slice firmware stats 3992 */ 3993 bytes = sizeof(*ss->fw_stats); 3994 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3995 sizeof(*ss->fw_stats), 64); 3996 if (err != 0) { 3997 device_printf(sc->dev, 3998 "alloc %d fw_stats failed\n", i); 3999 return err; 4000 } 4001 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 4002 } 4003 return 0; 4004 } 4005 4006 static void 4007 mxge_slice_probe(mxge_softc_t *sc) 4008 { 4009 int status, max_intr_slots, max_slices, num_slices; 4010 int msix_cnt, msix_enable, multi_tx; 4011 mxge_cmd_t cmd; 4012 const char *old_fw; 4013 4014 sc->num_slices = 1; 4015 sc->num_tx_rings = 1; 4016 4017 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 4018 if (num_slices == 1) 4019 return; 4020 4021 if (netisr_ncpus == 1) 4022 return; 4023 4024 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4025 mxge_msix_enable); 4026 if (!msix_enable) 4027 return; 4028 4029 msix_cnt = pci_msix_count(sc->dev); 4030 if (msix_cnt < 2) 4031 return; 4032 if (bootverbose) 4033 device_printf(sc->dev, "MSI-X count %d\n", msix_cnt); 4034 4035 /* 4036 * Now load the slice aware firmware see what it supports 4037 */ 4038 old_fw = sc->fw_name; 4039 if (old_fw == mxge_fw_aligned) 4040 sc->fw_name = mxge_fw_rss_aligned; 4041 else 4042 sc->fw_name = mxge_fw_rss_unaligned; 4043 status = mxge_load_firmware(sc, 0); 4044 if (status != 0) { 4045 device_printf(sc->dev, "Falling back to a single slice\n"); 4046 return; 4047 } 4048 4049 /* 4050 * Try to send a reset command to the card to see if it is alive 4051 */ 4052 memset(&cmd, 0, sizeof(cmd)); 4053 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4054 if (status != 0) { 4055 device_printf(sc->dev, "failed reset\n"); 4056 goto abort_with_fw; 4057 } 4058 4059 /* 4060 * Get rx ring size to calculate rx interrupt queue size 4061 */ 4062 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4063 if (status != 0) { 4064 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4065 goto abort_with_fw; 4066 } 4067 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4068 4069 /* 4070 * Tell it the size of the rx interrupt queue 4071 */ 4072 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4073 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4074 if (status != 0) { 4075 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4076 goto abort_with_fw; 4077 } 4078 4079 /* 4080 * Ask the maximum number of slices it supports 4081 */ 4082 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4083 if (status != 0) { 4084 device_printf(sc->dev, 4085 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4086 goto abort_with_fw; 4087 } 4088 max_slices = cmd.data0; 4089 if (bootverbose) 4090 device_printf(sc->dev, "max slices %d\n", max_slices); 4091 4092 if (max_slices > msix_cnt) 4093 max_slices = msix_cnt; 4094 4095 sc->ring_map = if_ringmap_alloc(sc->dev, num_slices, max_slices); 4096 sc->num_slices = if_ringmap_count(sc->ring_map); 4097 4098 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4099 if (multi_tx) 4100 sc->num_tx_rings = sc->num_slices; 4101 4102 if (bootverbose) { 4103 device_printf(sc->dev, "using %d slices, max %d\n", 4104 sc->num_slices, max_slices); 4105 } 4106 4107 if (sc->num_slices == 1) 4108 goto abort_with_fw; 4109 return; 4110 4111 abort_with_fw: 4112 sc->fw_name = old_fw; 4113 mxge_load_firmware(sc, 0); 4114 } 4115 4116 static void 4117 mxge_setup_serialize(struct mxge_softc *sc) 4118 { 4119 int i = 0, slice; 4120 4121 /* Main + rx + tx */ 4122 sc->nserialize = (2 * sc->num_slices) + 1; 4123 sc->serializes = 4124 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4125 M_DEVBUF, M_WAITOK | M_ZERO); 4126 4127 /* 4128 * Setup serializes 4129 * 4130 * NOTE: Order is critical 4131 */ 4132 4133 KKASSERT(i < sc->nserialize); 4134 sc->serializes[i++] = &sc->main_serialize; 4135 4136 for (slice = 0; slice < sc->num_slices; ++slice) { 4137 KKASSERT(i < sc->nserialize); 4138 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4139 } 4140 4141 for (slice = 0; slice < sc->num_slices; ++slice) { 4142 KKASSERT(i < sc->nserialize); 4143 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4144 } 4145 4146 KKASSERT(i == sc->nserialize); 4147 } 4148 4149 static void 4150 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4151 { 4152 struct mxge_softc *sc = ifp->if_softc; 4153 4154 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4155 } 4156 4157 static void 4158 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4159 { 4160 struct mxge_softc *sc = ifp->if_softc; 4161 4162 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4163 } 4164 4165 static int 4166 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4167 { 4168 struct mxge_softc *sc = ifp->if_softc; 4169 4170 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4171 } 4172 4173 #ifdef INVARIANTS 4174 4175 static void 4176 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4177 boolean_t serialized) 4178 { 4179 struct mxge_softc *sc = ifp->if_softc; 4180 4181 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4182 slz, serialized); 4183 } 4184 4185 #endif /* INVARIANTS */ 4186 4187 #ifdef IFPOLL_ENABLE 4188 4189 static void 4190 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4191 { 4192 struct mxge_slice_state *ss = xss; 4193 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4194 4195 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4196 4197 if (rx_done->entry[rx_done->idx].length != 0) { 4198 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4199 } else { 4200 /* 4201 * XXX 4202 * This register writting obviously has cost, 4203 * however, if we don't hand back the rx token, 4204 * the upcoming packets may suffer rediculously 4205 * large delay, as observed on 8AL-C using ping(8). 4206 */ 4207 *ss->irq_claim = be32toh(3); 4208 } 4209 } 4210 4211 static void 4212 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4213 { 4214 struct mxge_softc *sc = ifp->if_softc; 4215 int i; 4216 4217 if (info == NULL) 4218 return; 4219 4220 /* 4221 * Only poll rx; polling tx and status don't seem to work 4222 */ 4223 for (i = 0; i < sc->num_slices; ++i) { 4224 struct mxge_slice_state *ss = &sc->ss[i]; 4225 int cpu = ss->intr_cpuid; 4226 4227 KKASSERT(cpu < netisr_ncpus); 4228 info->ifpi_rx[cpu].poll_func = mxge_npoll_rx; 4229 info->ifpi_rx[cpu].arg = ss; 4230 info->ifpi_rx[cpu].serializer = &ss->rx_data.rx_serialize; 4231 } 4232 } 4233 4234 #endif /* IFPOLL_ENABLE */ 4235 4236 static int 4237 mxge_attach(device_t dev) 4238 { 4239 mxge_softc_t *sc = device_get_softc(dev); 4240 struct ifnet *ifp = &sc->arpcom.ac_if; 4241 int err, rid, i; 4242 4243 /* 4244 * Avoid rewriting half the lines in this file to use 4245 * &sc->arpcom.ac_if instead 4246 */ 4247 sc->ifp = ifp; 4248 sc->dev = dev; 4249 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4250 4251 /* IFM_ETH_FORCEPAUSE can't be changed */ 4252 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 4253 mxge_media_change, mxge_media_status); 4254 4255 lwkt_serialize_init(&sc->main_serialize); 4256 4257 mxge_fetch_tunables(sc); 4258 4259 err = bus_dma_tag_create(NULL, /* parent */ 4260 1, /* alignment */ 4261 0, /* boundary */ 4262 BUS_SPACE_MAXADDR, /* low */ 4263 BUS_SPACE_MAXADDR, /* high */ 4264 NULL, NULL, /* filter */ 4265 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4266 0, /* num segs */ 4267 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4268 0, /* flags */ 4269 &sc->parent_dmat); /* tag */ 4270 if (err != 0) { 4271 device_printf(dev, "Err %d allocating parent dmat\n", err); 4272 goto failed; 4273 } 4274 4275 callout_init_mp(&sc->co_hdl); 4276 4277 mxge_setup_cfg_space(sc); 4278 4279 /* 4280 * Map the board into the kernel 4281 */ 4282 rid = PCIR_BARS; 4283 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4284 &rid, RF_ACTIVE); 4285 if (sc->mem_res == NULL) { 4286 device_printf(dev, "could not map memory\n"); 4287 err = ENXIO; 4288 goto failed; 4289 } 4290 4291 sc->sram = rman_get_virtual(sc->mem_res); 4292 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4293 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4294 device_printf(dev, "impossible memory region size %ld\n", 4295 rman_get_size(sc->mem_res)); 4296 err = ENXIO; 4297 goto failed; 4298 } 4299 4300 /* 4301 * Make NULL terminated copy of the EEPROM strings section of 4302 * lanai SRAM 4303 */ 4304 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4305 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4306 rman_get_bushandle(sc->mem_res), 4307 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4308 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4309 err = mxge_parse_strings(sc); 4310 if (err != 0) { 4311 device_printf(dev, "parse EEPROM string failed\n"); 4312 goto failed; 4313 } 4314 4315 /* 4316 * Enable write combining for efficient use of PCIe bus 4317 */ 4318 mxge_enable_wc(sc); 4319 4320 /* 4321 * Allocate the out of band DMA memory 4322 */ 4323 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4324 if (err != 0) { 4325 device_printf(dev, "alloc cmd DMA buf failed\n"); 4326 goto failed; 4327 } 4328 sc->cmd = sc->cmd_dma.dmem_addr; 4329 4330 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4331 if (err != 0) { 4332 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4333 goto failed; 4334 } 4335 4336 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4337 if (err != 0) { 4338 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4339 goto failed; 4340 } 4341 4342 /* Select & load the firmware */ 4343 err = mxge_select_firmware(sc); 4344 if (err != 0) { 4345 device_printf(dev, "select firmware failed\n"); 4346 goto failed; 4347 } 4348 4349 mxge_slice_probe(sc); 4350 err = mxge_alloc_slices(sc); 4351 if (err != 0) { 4352 device_printf(dev, "alloc slices failed\n"); 4353 goto failed; 4354 } 4355 4356 err = mxge_alloc_intr(sc); 4357 if (err != 0) { 4358 device_printf(dev, "alloc intr failed\n"); 4359 goto failed; 4360 } 4361 4362 /* Setup serializes */ 4363 mxge_setup_serialize(sc); 4364 4365 err = mxge_reset(sc, 0); 4366 if (err != 0) { 4367 device_printf(dev, "reset failed\n"); 4368 goto failed; 4369 } 4370 4371 err = mxge_alloc_rings(sc); 4372 if (err != 0) { 4373 device_printf(dev, "failed to allocate rings\n"); 4374 goto failed; 4375 } 4376 4377 ifp->if_baudrate = IF_Gbps(10UL); 4378 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4379 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4380 4381 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4382 #if 0 4383 /* Well, its software, sigh */ 4384 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4385 #endif 4386 ifp->if_capenable = ifp->if_capabilities; 4387 4388 ifp->if_softc = sc; 4389 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4390 ifp->if_init = mxge_init; 4391 ifp->if_ioctl = mxge_ioctl; 4392 ifp->if_start = mxge_start; 4393 #ifdef IFPOLL_ENABLE 4394 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4395 ifp->if_npoll = mxge_npoll; 4396 #endif 4397 ifp->if_serialize = mxge_serialize; 4398 ifp->if_deserialize = mxge_deserialize; 4399 ifp->if_tryserialize = mxge_tryserialize; 4400 #ifdef INVARIANTS 4401 ifp->if_serialize_assert = mxge_serialize_assert; 4402 #endif 4403 4404 /* Increase TSO burst length */ 4405 ifp->if_tsolen = (32 * ETHERMTU); 4406 4407 /* Initialise the ifmedia structure */ 4408 mxge_media_init(sc); 4409 mxge_media_probe(sc); 4410 4411 ether_ifattach(ifp, sc->mac_addr, NULL); 4412 4413 /* Setup TX rings and subqueues */ 4414 for (i = 0; i < sc->num_tx_rings; ++i) { 4415 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4416 struct mxge_slice_state *ss = &sc->ss[i]; 4417 4418 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4419 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4420 ifsq_set_priv(ifsq, &ss->tx); 4421 ss->tx.ifsq = ifsq; 4422 4423 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4424 } 4425 4426 /* 4427 * XXX 4428 * We are not ready to do "gather" jumbo frame, so 4429 * limit MTU to MJUMPAGESIZE 4430 */ 4431 sc->max_mtu = MJUMPAGESIZE - 4432 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4433 sc->dying = 0; 4434 4435 err = mxge_setup_intr(sc); 4436 if (err != 0) { 4437 device_printf(dev, "alloc and setup intr failed\n"); 4438 ether_ifdetach(ifp); 4439 goto failed; 4440 } 4441 4442 mxge_add_sysctls(sc); 4443 4444 /* Increase non-cluster mbuf limit; used by small RX rings */ 4445 mb_inclimit(ifp->if_nmbclusters); 4446 4447 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4448 sc->ss[0].intr_cpuid); 4449 return 0; 4450 4451 failed: 4452 mxge_detach(dev); 4453 return err; 4454 } 4455 4456 static int 4457 mxge_detach(device_t dev) 4458 { 4459 mxge_softc_t *sc = device_get_softc(dev); 4460 4461 if (device_is_attached(dev)) { 4462 struct ifnet *ifp = sc->ifp; 4463 int mblimit = ifp->if_nmbclusters; 4464 4465 ifnet_serialize_all(ifp); 4466 4467 sc->dying = 1; 4468 if (ifp->if_flags & IFF_RUNNING) 4469 mxge_close(sc, 1); 4470 callout_stop(&sc->co_hdl); 4471 4472 mxge_teardown_intr(sc, sc->num_slices); 4473 4474 ifnet_deserialize_all(ifp); 4475 4476 callout_terminate(&sc->co_hdl); 4477 4478 ether_ifdetach(ifp); 4479 4480 /* Decrease non-cluster mbuf limit increased by us */ 4481 mb_inclimit(-mblimit); 4482 } 4483 ifmedia_removeall(&sc->media); 4484 4485 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4486 sc->sram != NULL) 4487 mxge_dummy_rdma(sc, 0); 4488 4489 mxge_free_intr(sc); 4490 mxge_rem_sysctls(sc); 4491 mxge_free_rings(sc); 4492 4493 /* MUST after sysctls, intr and rings are freed */ 4494 mxge_free_slices(sc); 4495 4496 if (sc->dmabench_dma.dmem_addr != NULL) 4497 mxge_dma_free(&sc->dmabench_dma); 4498 if (sc->zeropad_dma.dmem_addr != NULL) 4499 mxge_dma_free(&sc->zeropad_dma); 4500 if (sc->cmd_dma.dmem_addr != NULL) 4501 mxge_dma_free(&sc->cmd_dma); 4502 4503 if (sc->msix_table_res != NULL) { 4504 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4505 sc->msix_table_res); 4506 } 4507 if (sc->mem_res != NULL) { 4508 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4509 sc->mem_res); 4510 } 4511 4512 if (sc->parent_dmat != NULL) 4513 bus_dma_tag_destroy(sc->parent_dmat); 4514 4515 if (sc->ring_map != NULL) 4516 if_ringmap_free(sc->ring_map); 4517 4518 return 0; 4519 } 4520 4521 static int 4522 mxge_shutdown(device_t dev) 4523 { 4524 return 0; 4525 } 4526 4527 static void 4528 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4529 { 4530 int i; 4531 4532 KKASSERT(sc->num_slices > 1); 4533 4534 for (i = 0; i < sc->num_slices; ++i) { 4535 struct mxge_slice_state *ss = &sc->ss[i]; 4536 4537 if (ss->intr_res != NULL) { 4538 bus_release_resource(sc->dev, SYS_RES_IRQ, 4539 ss->intr_rid, ss->intr_res); 4540 } 4541 if (ss->intr_rid >= 0) 4542 pci_release_msix_vector(sc->dev, ss->intr_rid); 4543 } 4544 if (setup) 4545 pci_teardown_msix(sc->dev); 4546 } 4547 4548 static int 4549 mxge_alloc_msix(struct mxge_softc *sc) 4550 { 4551 struct mxge_slice_state *ss; 4552 int rid, error, i; 4553 boolean_t setup = FALSE; 4554 4555 KKASSERT(sc->num_slices > 1); 4556 4557 ss = &sc->ss[0]; 4558 4559 ss->intr_serialize = &sc->main_serialize; 4560 ss->intr_func = mxge_msi; 4561 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4562 "%s comb", device_get_nameunit(sc->dev)); 4563 ss->intr_desc = ss->intr_desc0; 4564 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, 0); 4565 4566 for (i = 1; i < sc->num_slices; ++i) { 4567 ss = &sc->ss[i]; 4568 4569 ss->intr_serialize = &ss->rx_data.rx_serialize; 4570 if (sc->num_tx_rings == 1) { 4571 ss->intr_func = mxge_msix_rx; 4572 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4573 "%s rx%d", device_get_nameunit(sc->dev), i); 4574 } else { 4575 ss->intr_func = mxge_msix_rxtx; 4576 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4577 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4578 } 4579 ss->intr_desc = ss->intr_desc0; 4580 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, i); 4581 } 4582 4583 rid = PCIR_BAR(2); 4584 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4585 &rid, RF_ACTIVE); 4586 if (sc->msix_table_res == NULL) { 4587 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4588 return ENXIO; 4589 } 4590 4591 error = pci_setup_msix(sc->dev); 4592 if (error) { 4593 device_printf(sc->dev, "could not setup MSI-X\n"); 4594 goto back; 4595 } 4596 setup = TRUE; 4597 4598 for (i = 0; i < sc->num_slices; ++i) { 4599 ss = &sc->ss[i]; 4600 4601 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4602 ss->intr_cpuid); 4603 if (error) { 4604 device_printf(sc->dev, "could not alloc " 4605 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4606 goto back; 4607 } 4608 4609 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4610 &ss->intr_rid, RF_ACTIVE); 4611 if (ss->intr_res == NULL) { 4612 device_printf(sc->dev, "could not alloc " 4613 "MSI-X %d resource\n", i); 4614 error = ENXIO; 4615 goto back; 4616 } 4617 } 4618 4619 pci_enable_msix(sc->dev); 4620 sc->intr_type = PCI_INTR_TYPE_MSIX; 4621 back: 4622 if (error) 4623 mxge_free_msix(sc, setup); 4624 return error; 4625 } 4626 4627 static int 4628 mxge_alloc_intr(struct mxge_softc *sc) 4629 { 4630 struct mxge_slice_state *ss; 4631 u_int irq_flags; 4632 4633 if (sc->num_slices > 1) { 4634 int error; 4635 4636 error = mxge_alloc_msix(sc); 4637 if (error) 4638 return error; 4639 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4640 return 0; 4641 } 4642 4643 ss = &sc->ss[0]; 4644 4645 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4646 &ss->intr_rid, &irq_flags); 4647 4648 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4649 &ss->intr_rid, irq_flags); 4650 if (ss->intr_res == NULL) { 4651 device_printf(sc->dev, "could not alloc interrupt\n"); 4652 return ENXIO; 4653 } 4654 4655 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4656 ss->intr_func = mxge_legacy; 4657 else 4658 ss->intr_func = mxge_msi; 4659 ss->intr_serialize = &sc->main_serialize; 4660 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4661 4662 return 0; 4663 } 4664 4665 static int 4666 mxge_setup_intr(struct mxge_softc *sc) 4667 { 4668 int i; 4669 4670 for (i = 0; i < sc->num_slices; ++i) { 4671 struct mxge_slice_state *ss = &sc->ss[i]; 4672 int error; 4673 4674 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4675 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4676 ss->intr_serialize, ss->intr_desc); 4677 if (error) { 4678 device_printf(sc->dev, "can't setup %dth intr\n", i); 4679 mxge_teardown_intr(sc, i); 4680 return error; 4681 } 4682 } 4683 return 0; 4684 } 4685 4686 static void 4687 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4688 { 4689 int i; 4690 4691 if (sc->ss == NULL) 4692 return; 4693 4694 for (i = 0; i < cnt; ++i) { 4695 struct mxge_slice_state *ss = &sc->ss[i]; 4696 4697 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4698 } 4699 } 4700 4701 static void 4702 mxge_free_intr(struct mxge_softc *sc) 4703 { 4704 if (sc->ss == NULL) 4705 return; 4706 4707 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4708 struct mxge_slice_state *ss = &sc->ss[0]; 4709 4710 if (ss->intr_res != NULL) { 4711 bus_release_resource(sc->dev, SYS_RES_IRQ, 4712 ss->intr_rid, ss->intr_res); 4713 } 4714 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4715 pci_release_msi(sc->dev); 4716 } else { 4717 mxge_free_msix(sc, TRUE); 4718 } 4719 } 4720