1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 #include <net/if_poll.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/vlan/if_vlan_var.h> 62 #include <net/zlib.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/tcp.h> 69 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <bus/pci/pcireg.h> 74 #include <bus/pci/pcivar.h> 75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386__) || defined(__x86_64__) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/netif/mxge/mxge_mcp.h> 85 #include <dev/netif/mxge/mcp_gen_header.h> 86 #include <dev/netif/mxge/if_mxge_var.h> 87 88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE) 89 90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 91 #define MXGE_HWRSS_KEYLEN 16 92 93 /* Tunable params */ 94 static int mxge_nvidia_ecrc_enable = 1; 95 static int mxge_force_firmware = 0; 96 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 97 static int mxge_deassert_wait = 1; 98 static int mxge_ticks; 99 static int mxge_num_slices = 0; 100 static int mxge_always_promisc = 0; 101 static int mxge_throttle = 0; 102 static int mxge_msi_enable = 1; 103 static int mxge_msix_enable = 1; 104 static int mxge_multi_tx = 1; 105 /* 106 * Don't use RSS by default, its just too slow 107 */ 108 static int mxge_use_rss = 0; 109 110 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_FULL; 111 112 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 113 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 114 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 115 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 116 117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl)); 130 131 static int mxge_probe(device_t dev); 132 static int mxge_attach(device_t dev); 133 static int mxge_detach(device_t dev); 134 static int mxge_shutdown(device_t dev); 135 136 static int mxge_alloc_intr(struct mxge_softc *sc); 137 static void mxge_free_intr(struct mxge_softc *sc); 138 static int mxge_setup_intr(struct mxge_softc *sc); 139 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 140 141 static device_method_t mxge_methods[] = { 142 /* Device interface */ 143 DEVMETHOD(device_probe, mxge_probe), 144 DEVMETHOD(device_attach, mxge_attach), 145 DEVMETHOD(device_detach, mxge_detach), 146 DEVMETHOD(device_shutdown, mxge_shutdown), 147 DEVMETHOD_END 148 }; 149 150 static driver_t mxge_driver = { 151 "mxge", 152 mxge_methods, 153 sizeof(mxge_softc_t), 154 }; 155 156 static devclass_t mxge_devclass; 157 158 /* Declare ourselves to be a child of the PCI bus.*/ 159 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 160 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 161 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 162 163 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 164 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 165 static void mxge_close(mxge_softc_t *sc, int down); 166 static int mxge_open(mxge_softc_t *sc); 167 static void mxge_tick(void *arg); 168 static void mxge_watchdog_reset(mxge_softc_t *sc); 169 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 170 171 static int 172 mxge_probe(device_t dev) 173 { 174 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 175 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 176 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 177 int rev = pci_get_revid(dev); 178 179 switch (rev) { 180 case MXGE_PCI_REV_Z8E: 181 device_set_desc(dev, "Myri10G-PCIE-8A"); 182 break; 183 case MXGE_PCI_REV_Z8ES: 184 device_set_desc(dev, "Myri10G-PCIE-8B"); 185 break; 186 default: 187 device_set_desc(dev, "Myri10G-PCIE-8??"); 188 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 189 break; 190 } 191 return 0; 192 } 193 return ENXIO; 194 } 195 196 static void 197 mxge_enable_wc(mxge_softc_t *sc) 198 { 199 #if defined(__i386__) || defined(__x86_64__) 200 vm_offset_t len; 201 202 sc->wc = 1; 203 len = rman_get_size(sc->mem_res); 204 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 205 PAT_WRITE_COMBINING); 206 #endif 207 } 208 209 static int 210 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 211 bus_size_t alignment) 212 { 213 bus_size_t boundary; 214 int err; 215 216 if (bytes > 4096 && alignment == 4096) 217 boundary = 0; 218 else 219 boundary = 4096; 220 221 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 222 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 223 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 224 if (err != 0) { 225 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 226 return err; 227 } 228 return 0; 229 } 230 231 static void 232 mxge_dma_free(bus_dmamem_t *dma) 233 { 234 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 235 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 236 bus_dma_tag_destroy(dma->dmem_tag); 237 } 238 239 /* 240 * The eeprom strings on the lanaiX have the format 241 * SN=x\0 242 * MAC=x:x:x:x:x:x\0 243 * PC=text\0 244 */ 245 static int 246 mxge_parse_strings(mxge_softc_t *sc) 247 { 248 const char *ptr; 249 int i, found_mac, found_sn2; 250 char *endptr; 251 252 ptr = sc->eeprom_strings; 253 found_mac = 0; 254 found_sn2 = 0; 255 while (*ptr != '\0') { 256 if (strncmp(ptr, "MAC=", 4) == 0) { 257 ptr += 4; 258 for (i = 0;;) { 259 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 260 if (endptr - ptr != 2) 261 goto abort; 262 ptr = endptr; 263 if (++i == 6) 264 break; 265 if (*ptr++ != ':') 266 goto abort; 267 } 268 found_mac = 1; 269 } else if (strncmp(ptr, "PC=", 3) == 0) { 270 ptr += 3; 271 strlcpy(sc->product_code_string, ptr, 272 sizeof(sc->product_code_string)); 273 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 274 ptr += 3; 275 strlcpy(sc->serial_number_string, ptr, 276 sizeof(sc->serial_number_string)); 277 } else if (strncmp(ptr, "SN2=", 4) == 0) { 278 /* SN2 takes precedence over SN */ 279 ptr += 4; 280 found_sn2 = 1; 281 strlcpy(sc->serial_number_string, ptr, 282 sizeof(sc->serial_number_string)); 283 } 284 while (*ptr++ != '\0') {} 285 } 286 287 if (found_mac) 288 return 0; 289 290 abort: 291 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 292 return ENXIO; 293 } 294 295 #if defined(__i386__) || defined(__x86_64__) 296 297 static void 298 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 299 { 300 uint32_t val; 301 unsigned long base, off; 302 char *va, *cfgptr; 303 device_t pdev, mcp55; 304 uint16_t vendor_id, device_id, word; 305 uintptr_t bus, slot, func, ivend, idev; 306 uint32_t *ptr32; 307 308 if (!mxge_nvidia_ecrc_enable) 309 return; 310 311 pdev = device_get_parent(device_get_parent(sc->dev)); 312 if (pdev == NULL) { 313 device_printf(sc->dev, "could not find parent?\n"); 314 return; 315 } 316 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 317 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 318 319 if (vendor_id != 0x10de) 320 return; 321 322 base = 0; 323 324 if (device_id == 0x005d) { 325 /* ck804, base address is magic */ 326 base = 0xe0000000UL; 327 } else if (device_id >= 0x0374 && device_id <= 0x378) { 328 /* mcp55, base address stored in chipset */ 329 mcp55 = pci_find_bsf(0, 0, 0); 330 if (mcp55 && 331 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 332 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 333 word = pci_read_config(mcp55, 0x90, 2); 334 base = ((unsigned long)word & 0x7ffeU) << 25; 335 } 336 } 337 if (!base) 338 return; 339 340 /* 341 * XXXX 342 * Test below is commented because it is believed that doing 343 * config read/write beyond 0xff will access the config space 344 * for the next larger function. Uncomment this and remove 345 * the hacky pmap_mapdev() way of accessing config space when 346 * DragonFly grows support for extended pcie config space access. 347 */ 348 #if 0 349 /* 350 * See if we can, by some miracle, access the extended 351 * config space 352 */ 353 val = pci_read_config(pdev, 0x178, 4); 354 if (val != 0xffffffff) { 355 val |= 0x40; 356 pci_write_config(pdev, 0x178, val, 4); 357 return; 358 } 359 #endif 360 /* 361 * Rather than using normal pci config space writes, we must 362 * map the Nvidia config space ourselves. This is because on 363 * opteron/nvidia class machine the 0xe000000 mapping is 364 * handled by the nvidia chipset, that means the internal PCI 365 * device (the on-chip northbridge), or the amd-8131 bridge 366 * and things behind them are not visible by this method. 367 */ 368 369 BUS_READ_IVAR(device_get_parent(pdev), pdev, 370 PCI_IVAR_BUS, &bus); 371 BUS_READ_IVAR(device_get_parent(pdev), pdev, 372 PCI_IVAR_SLOT, &slot); 373 BUS_READ_IVAR(device_get_parent(pdev), pdev, 374 PCI_IVAR_FUNCTION, &func); 375 BUS_READ_IVAR(device_get_parent(pdev), pdev, 376 PCI_IVAR_VENDOR, &ivend); 377 BUS_READ_IVAR(device_get_parent(pdev), pdev, 378 PCI_IVAR_DEVICE, &idev); 379 380 off = base + 0x00100000UL * (unsigned long)bus + 381 0x00001000UL * (unsigned long)(func + 8 * slot); 382 383 /* map it into the kernel */ 384 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 385 if (va == NULL) { 386 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 387 return; 388 } 389 /* get a pointer to the config space mapped into the kernel */ 390 cfgptr = va + (off & PAGE_MASK); 391 392 /* make sure that we can really access it */ 393 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 394 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 395 if (!(vendor_id == ivend && device_id == idev)) { 396 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 397 vendor_id, device_id); 398 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 399 return; 400 } 401 402 ptr32 = (uint32_t*)(cfgptr + 0x178); 403 val = *ptr32; 404 405 if (val == 0xffffffff) { 406 device_printf(sc->dev, "extended mapping failed\n"); 407 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 408 return; 409 } 410 *ptr32 = val | 0x40; 411 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 412 if (bootverbose) { 413 device_printf(sc->dev, "Enabled ECRC on upstream " 414 "Nvidia bridge at %d:%d:%d\n", 415 (int)bus, (int)slot, (int)func); 416 } 417 } 418 419 #else /* __i386__ || __x86_64__ */ 420 421 static void 422 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 423 { 424 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 425 } 426 427 #endif 428 429 static int 430 mxge_dma_test(mxge_softc_t *sc, int test_type) 431 { 432 mxge_cmd_t cmd; 433 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 434 int status; 435 uint32_t len; 436 const char *test = " "; 437 438 /* 439 * Run a small DMA test. 440 * The magic multipliers to the length tell the firmware 441 * to do DMA read, write, or read+write tests. The 442 * results are returned in cmd.data0. The upper 16 443 * bits of the return is the number of transfers completed. 444 * The lower 16 bits is the time in 0.5us ticks that the 445 * transfers took to complete. 446 */ 447 448 len = sc->tx_boundary; 449 450 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 451 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 452 cmd.data2 = len * 0x10000; 453 status = mxge_send_cmd(sc, test_type, &cmd); 454 if (status != 0) { 455 test = "read"; 456 goto abort; 457 } 458 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 459 460 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 461 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 462 cmd.data2 = len * 0x1; 463 status = mxge_send_cmd(sc, test_type, &cmd); 464 if (status != 0) { 465 test = "write"; 466 goto abort; 467 } 468 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 469 470 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 471 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 472 cmd.data2 = len * 0x10001; 473 status = mxge_send_cmd(sc, test_type, &cmd); 474 if (status != 0) { 475 test = "read/write"; 476 goto abort; 477 } 478 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 479 (cmd.data0 & 0xffff); 480 481 abort: 482 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 483 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 484 test, status); 485 } 486 return status; 487 } 488 489 /* 490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 491 * when the PCI-E Completion packets are aligned on an 8-byte 492 * boundary. Some PCI-E chip sets always align Completion packets; on 493 * the ones that do not, the alignment can be enforced by enabling 494 * ECRC generation (if supported). 495 * 496 * When PCI-E Completion packets are not aligned, it is actually more 497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 498 * 499 * If the driver can neither enable ECRC nor verify that it has 500 * already been enabled, then it must use a firmware image which works 501 * around unaligned completion packets (ethp_z8e.dat), and it should 502 * also ensure that it never gives the device a Read-DMA which is 503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 504 * enabled, then the driver should use the aligned (eth_z8e.dat) 505 * firmware image, and set tx_boundary to 4KB. 506 */ 507 static int 508 mxge_firmware_probe(mxge_softc_t *sc) 509 { 510 device_t dev = sc->dev; 511 int reg, status; 512 uint16_t pectl; 513 514 sc->tx_boundary = 4096; 515 516 /* 517 * Verify the max read request size was set to 4KB 518 * before trying the test with 4KB. 519 */ 520 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 521 pectl = pci_read_config(dev, reg + 0x8, 2); 522 if ((pectl & (5 << 12)) != (5 << 12)) { 523 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 524 pectl); 525 sc->tx_boundary = 2048; 526 } 527 } 528 529 /* 530 * Load the optimized firmware (which assumes aligned PCIe 531 * completions) in order to see if it works on this host. 532 */ 533 sc->fw_name = mxge_fw_aligned; 534 status = mxge_load_firmware(sc, 1); 535 if (status != 0) 536 return status; 537 538 /* 539 * Enable ECRC if possible 540 */ 541 mxge_enable_nvidia_ecrc(sc); 542 543 /* 544 * Run a DMA test which watches for unaligned completions and 545 * aborts on the first one seen. Not required on Z8ES or newer. 546 */ 547 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 548 return 0; 549 550 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 551 if (status == 0) 552 return 0; /* keep the aligned firmware */ 553 554 if (status != E2BIG) 555 device_printf(dev, "DMA test failed: %d\n", status); 556 if (status == ENOSYS) { 557 device_printf(dev, "Falling back to ethp! " 558 "Please install up to date fw\n"); 559 } 560 return status; 561 } 562 563 static int 564 mxge_select_firmware(mxge_softc_t *sc) 565 { 566 int aligned = 0; 567 int force_firmware = mxge_force_firmware; 568 569 if (sc->throttle) 570 force_firmware = sc->throttle; 571 572 if (force_firmware != 0) { 573 if (force_firmware == 1) 574 aligned = 1; 575 else 576 aligned = 0; 577 if (bootverbose) { 578 device_printf(sc->dev, 579 "Assuming %s completions (forced)\n", 580 aligned ? "aligned" : "unaligned"); 581 } 582 goto abort; 583 } 584 585 /* 586 * If the PCIe link width is 4 or less, we can use the aligned 587 * firmware and skip any checks 588 */ 589 if (sc->link_width != 0 && sc->link_width <= 4) { 590 device_printf(sc->dev, "PCIe x%d Link, " 591 "expect reduced performance\n", sc->link_width); 592 aligned = 1; 593 goto abort; 594 } 595 596 if (mxge_firmware_probe(sc) == 0) 597 return 0; 598 599 abort: 600 if (aligned) { 601 sc->fw_name = mxge_fw_aligned; 602 sc->tx_boundary = 4096; 603 } else { 604 sc->fw_name = mxge_fw_unaligned; 605 sc->tx_boundary = 2048; 606 } 607 return mxge_load_firmware(sc, 0); 608 } 609 610 static int 611 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 612 { 613 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 614 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 615 be32toh(hdr->mcp_type)); 616 return EIO; 617 } 618 619 /* Save firmware version for sysctl */ 620 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 621 if (bootverbose) 622 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 623 624 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 625 &sc->fw_ver_minor, &sc->fw_ver_tiny); 626 627 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 628 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 629 if_printf(sc->ifp, "Found firmware version %s\n", 630 sc->fw_version); 631 if_printf(sc->ifp, "Driver needs %d.%d\n", 632 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 633 return EINVAL; 634 } 635 return 0; 636 } 637 638 static void * 639 z_alloc(void *nil, u_int items, u_int size) 640 { 641 return kmalloc(items * size, M_TEMP, M_WAITOK); 642 } 643 644 static void 645 z_free(void *nil, void *ptr) 646 { 647 kfree(ptr, M_TEMP); 648 } 649 650 static int 651 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 652 { 653 z_stream zs; 654 char *inflate_buffer; 655 const struct firmware *fw; 656 const mcp_gen_header_t *hdr; 657 unsigned hdr_offset; 658 int status; 659 unsigned int i; 660 char dummy; 661 size_t fw_len; 662 663 fw = firmware_get(sc->fw_name); 664 if (fw == NULL) { 665 if_printf(sc->ifp, "Could not find firmware image %s\n", 666 sc->fw_name); 667 return ENOENT; 668 } 669 670 /* Setup zlib and decompress f/w */ 671 bzero(&zs, sizeof(zs)); 672 zs.zalloc = z_alloc; 673 zs.zfree = z_free; 674 status = inflateInit(&zs); 675 if (status != Z_OK) { 676 status = EIO; 677 goto abort_with_fw; 678 } 679 680 /* 681 * The uncompressed size is stored as the firmware version, 682 * which would otherwise go unused 683 */ 684 fw_len = (size_t)fw->version; 685 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 686 zs.avail_in = fw->datasize; 687 zs.next_in = __DECONST(char *, fw->data); 688 zs.avail_out = fw_len; 689 zs.next_out = inflate_buffer; 690 status = inflate(&zs, Z_FINISH); 691 if (status != Z_STREAM_END) { 692 if_printf(sc->ifp, "zlib %d\n", status); 693 status = EIO; 694 goto abort_with_buffer; 695 } 696 697 /* Check id */ 698 hdr_offset = 699 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 700 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 701 if_printf(sc->ifp, "Bad firmware file"); 702 status = EIO; 703 goto abort_with_buffer; 704 } 705 hdr = (const void*)(inflate_buffer + hdr_offset); 706 707 status = mxge_validate_firmware(sc, hdr); 708 if (status != 0) 709 goto abort_with_buffer; 710 711 /* Copy the inflated firmware to NIC SRAM. */ 712 for (i = 0; i < fw_len; i += 256) { 713 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 714 min(256U, (unsigned)(fw_len - i))); 715 wmb(); 716 dummy = *sc->sram; 717 wmb(); 718 } 719 720 *limit = fw_len; 721 status = 0; 722 abort_with_buffer: 723 kfree(inflate_buffer, M_TEMP); 724 inflateEnd(&zs); 725 abort_with_fw: 726 firmware_put(fw, FIRMWARE_UNLOAD); 727 return status; 728 } 729 730 /* 731 * Enable or disable periodic RDMAs from the host to make certain 732 * chipsets resend dropped PCIe messages 733 */ 734 static void 735 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 736 { 737 char buf_bytes[72]; 738 volatile uint32_t *confirm; 739 volatile char *submit; 740 uint32_t *buf, dma_low, dma_high; 741 int i; 742 743 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 744 745 /* Clear confirmation addr */ 746 confirm = (volatile uint32_t *)sc->cmd; 747 *confirm = 0; 748 wmb(); 749 750 /* 751 * Send an rdma command to the PCIe engine, and wait for the 752 * response in the confirmation address. The firmware should 753 * write a -1 there to indicate it is alive and well 754 */ 755 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 756 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 757 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 758 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 759 buf[2] = htobe32(0xffffffff); /* confirm data */ 760 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 761 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 762 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 763 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 764 buf[5] = htobe32(enable); /* enable? */ 765 766 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 767 768 mxge_pio_copy(submit, buf, 64); 769 wmb(); 770 DELAY(1000); 771 wmb(); 772 i = 0; 773 while (*confirm != 0xffffffff && i < 20) { 774 DELAY(1000); 775 i++; 776 } 777 if (*confirm != 0xffffffff) { 778 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 779 (enable ? "enable" : "disable"), confirm, *confirm); 780 } 781 } 782 783 static int 784 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 785 { 786 mcp_cmd_t *buf; 787 char buf_bytes[sizeof(*buf) + 8]; 788 volatile mcp_cmd_response_t *response = sc->cmd; 789 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 790 uint32_t dma_low, dma_high; 791 int err, sleep_total = 0; 792 793 /* Ensure buf is aligned to 8 bytes */ 794 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 795 796 buf->data0 = htobe32(data->data0); 797 buf->data1 = htobe32(data->data1); 798 buf->data2 = htobe32(data->data2); 799 buf->cmd = htobe32(cmd); 800 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 801 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 802 803 buf->response_addr.low = htobe32(dma_low); 804 buf->response_addr.high = htobe32(dma_high); 805 806 response->result = 0xffffffff; 807 wmb(); 808 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 809 810 /* 811 * Wait up to 20ms 812 */ 813 err = EAGAIN; 814 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 815 wmb(); 816 switch (be32toh(response->result)) { 817 case 0: 818 data->data0 = be32toh(response->data); 819 err = 0; 820 break; 821 case 0xffffffff: 822 DELAY(1000); 823 break; 824 case MXGEFW_CMD_UNKNOWN: 825 err = ENOSYS; 826 break; 827 case MXGEFW_CMD_ERROR_UNALIGNED: 828 err = E2BIG; 829 break; 830 case MXGEFW_CMD_ERROR_BUSY: 831 err = EBUSY; 832 break; 833 case MXGEFW_CMD_ERROR_I2C_ABSENT: 834 err = ENXIO; 835 break; 836 default: 837 if_printf(sc->ifp, "command %d failed, result = %d\n", 838 cmd, be32toh(response->result)); 839 err = ENXIO; 840 break; 841 } 842 if (err != EAGAIN) 843 break; 844 } 845 if (err == EAGAIN) { 846 if_printf(sc->ifp, "command %d timed out result = %d\n", 847 cmd, be32toh(response->result)); 848 } 849 return err; 850 } 851 852 static int 853 mxge_adopt_running_firmware(mxge_softc_t *sc) 854 { 855 struct mcp_gen_header *hdr; 856 const size_t bytes = sizeof(struct mcp_gen_header); 857 size_t hdr_offset; 858 int status; 859 860 /* 861 * Find running firmware header 862 */ 863 hdr_offset = 864 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 865 866 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 867 if_printf(sc->ifp, "Running firmware has bad header offset " 868 "(%zu)\n", hdr_offset); 869 return EIO; 870 } 871 872 /* 873 * Copy header of running firmware from SRAM to host memory to 874 * validate firmware 875 */ 876 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 877 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 878 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 879 status = mxge_validate_firmware(sc, hdr); 880 kfree(hdr, M_DEVBUF); 881 882 /* 883 * Check to see if adopted firmware has bug where adopting 884 * it will cause broadcasts to be filtered unless the NIC 885 * is kept in ALLMULTI mode 886 */ 887 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 888 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 889 sc->adopted_rx_filter_bug = 1; 890 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 891 "working around rx filter bug\n", 892 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 893 } 894 895 return status; 896 } 897 898 static int 899 mxge_load_firmware(mxge_softc_t *sc, int adopt) 900 { 901 volatile uint32_t *confirm; 902 volatile char *submit; 903 char buf_bytes[72]; 904 uint32_t *buf, size, dma_low, dma_high; 905 int status, i; 906 907 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 908 909 size = sc->sram_size; 910 status = mxge_load_firmware_helper(sc, &size); 911 if (status) { 912 if (!adopt) 913 return status; 914 915 /* 916 * Try to use the currently running firmware, if 917 * it is new enough 918 */ 919 status = mxge_adopt_running_firmware(sc); 920 if (status) { 921 if_printf(sc->ifp, 922 "failed to adopt running firmware\n"); 923 return status; 924 } 925 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 926 927 if (sc->tx_boundary == 4096) { 928 if_printf(sc->ifp, 929 "Using firmware currently running on NIC. " 930 "For optimal\n"); 931 if_printf(sc->ifp, "performance consider loading " 932 "optimized firmware\n"); 933 } 934 sc->fw_name = mxge_fw_unaligned; 935 sc->tx_boundary = 2048; 936 return 0; 937 } 938 939 /* Clear confirmation addr */ 940 confirm = (volatile uint32_t *)sc->cmd; 941 *confirm = 0; 942 wmb(); 943 944 /* 945 * Send a reload command to the bootstrap MCP, and wait for the 946 * response in the confirmation address. The firmware should 947 * write a -1 there to indicate it is alive and well 948 */ 949 950 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 951 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 952 953 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 954 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 955 buf[2] = htobe32(0xffffffff); /* confirm data */ 956 957 /* 958 * FIX: All newest firmware should un-protect the bottom of 959 * the sram before handoff. However, the very first interfaces 960 * do not. Therefore the handoff copy must skip the first 8 bytes 961 */ 962 /* where the code starts*/ 963 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 964 buf[4] = htobe32(size - 8); /* length of code */ 965 buf[5] = htobe32(8); /* where to copy to */ 966 buf[6] = htobe32(0); /* where to jump to */ 967 968 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 969 mxge_pio_copy(submit, buf, 64); 970 wmb(); 971 DELAY(1000); 972 wmb(); 973 i = 0; 974 while (*confirm != 0xffffffff && i < 20) { 975 DELAY(1000*10); 976 i++; 977 } 978 if (*confirm != 0xffffffff) { 979 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 980 confirm, *confirm); 981 return ENXIO; 982 } 983 return 0; 984 } 985 986 static int 987 mxge_update_mac_address(mxge_softc_t *sc) 988 { 989 mxge_cmd_t cmd; 990 uint8_t *addr = sc->mac_addr; 991 992 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 993 (addr[2] << 8) | addr[3]; 994 cmd.data1 = (addr[4] << 8) | (addr[5]); 995 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 996 } 997 998 static int 999 mxge_change_pause(mxge_softc_t *sc, int pause) 1000 { 1001 mxge_cmd_t cmd; 1002 int status; 1003 1004 if (pause) 1005 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1006 else 1007 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1008 if (status) { 1009 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1010 return ENXIO; 1011 } 1012 sc->pause = pause; 1013 return 0; 1014 } 1015 1016 static void 1017 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1018 { 1019 mxge_cmd_t cmd; 1020 int status; 1021 1022 if (mxge_always_promisc) 1023 promisc = 1; 1024 1025 if (promisc) 1026 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1027 else 1028 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1029 if (status) 1030 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1031 } 1032 1033 static void 1034 mxge_set_multicast_list(mxge_softc_t *sc) 1035 { 1036 mxge_cmd_t cmd; 1037 struct ifmultiaddr *ifma; 1038 struct ifnet *ifp = sc->ifp; 1039 int err; 1040 1041 /* This firmware is known to not support multicast */ 1042 if (!sc->fw_multicast_support) 1043 return; 1044 1045 /* Disable multicast filtering while we play with the lists*/ 1046 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1047 if (err != 0) { 1048 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1049 "error status: %d\n", err); 1050 return; 1051 } 1052 1053 if (sc->adopted_rx_filter_bug) 1054 return; 1055 1056 if (ifp->if_flags & IFF_ALLMULTI) { 1057 /* Request to disable multicast filtering, so quit here */ 1058 return; 1059 } 1060 1061 /* Flush all the filters */ 1062 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1063 if (err != 0) { 1064 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1065 "error status: %d\n", err); 1066 return; 1067 } 1068 1069 /* 1070 * Walk the multicast list, and add each address 1071 */ 1072 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1073 if (ifma->ifma_addr->sa_family != AF_LINK) 1074 continue; 1075 1076 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1077 &cmd.data0, 4); 1078 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1079 &cmd.data1, 2); 1080 cmd.data0 = htonl(cmd.data0); 1081 cmd.data1 = htonl(cmd.data1); 1082 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1083 if (err != 0) { 1084 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1085 "error status: %d\n", err); 1086 /* Abort, leaving multicast filtering off */ 1087 return; 1088 } 1089 } 1090 1091 /* Enable multicast filtering */ 1092 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1093 if (err != 0) { 1094 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1095 "error status: %d\n", err); 1096 } 1097 } 1098 1099 #if 0 1100 static int 1101 mxge_max_mtu(mxge_softc_t *sc) 1102 { 1103 mxge_cmd_t cmd; 1104 int status; 1105 1106 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1107 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1108 1109 /* try to set nbufs to see if it we can 1110 use virtually contiguous jumbos */ 1111 cmd.data0 = 0; 1112 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1113 &cmd); 1114 if (status == 0) 1115 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1116 1117 /* otherwise, we're limited to MJUMPAGESIZE */ 1118 return MJUMPAGESIZE - MXGEFW_PAD; 1119 } 1120 #endif 1121 1122 static int 1123 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1124 { 1125 struct mxge_slice_state *ss; 1126 mxge_rx_done_t *rx_done; 1127 volatile uint32_t *irq_claim; 1128 mxge_cmd_t cmd; 1129 int slice, status, rx_intr_size; 1130 1131 /* 1132 * Try to send a reset command to the card to see if it 1133 * is alive 1134 */ 1135 memset(&cmd, 0, sizeof (cmd)); 1136 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1137 if (status != 0) { 1138 if_printf(sc->ifp, "failed reset\n"); 1139 return ENXIO; 1140 } 1141 1142 mxge_dummy_rdma(sc, 1); 1143 1144 /* 1145 * Set the intrq size 1146 * XXX assume 4byte mcp_slot 1147 */ 1148 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1149 cmd.data0 = rx_intr_size; 1150 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1151 1152 /* 1153 * Even though we already know how many slices are supported 1154 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1155 * has magic side effects, and must be called after a reset. 1156 * It must be called prior to calling any RSS related cmds, 1157 * including assigning an interrupt queue for anything but 1158 * slice 0. It must also be called *after* 1159 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1160 * the firmware to compute offsets. 1161 */ 1162 if (sc->num_slices > 1) { 1163 /* Ask the maximum number of slices it supports */ 1164 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1165 if (status != 0) { 1166 if_printf(sc->ifp, "failed to get number of slices\n"); 1167 return status; 1168 } 1169 1170 /* 1171 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1172 * to setting up the interrupt queue DMA 1173 */ 1174 cmd.data0 = sc->num_slices; 1175 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1176 if (sc->num_tx_rings > 1) 1177 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1178 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1179 if (status != 0) { 1180 if_printf(sc->ifp, "failed to set number of slices\n"); 1181 return status; 1182 } 1183 } 1184 1185 if (interrupts_setup) { 1186 /* Now exchange information about interrupts */ 1187 for (slice = 0; slice < sc->num_slices; slice++) { 1188 ss = &sc->ss[slice]; 1189 1190 rx_done = &ss->rx_data.rx_done; 1191 memset(rx_done->entry, 0, rx_intr_size); 1192 1193 cmd.data0 = 1194 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1195 cmd.data1 = 1196 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1197 cmd.data2 = slice; 1198 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1199 &cmd); 1200 } 1201 } 1202 1203 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1204 &cmd); 1205 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1208 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1209 1210 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1211 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1212 1213 if (status != 0) { 1214 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1215 return status; 1216 } 1217 1218 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1219 1220 /* Run a DMA benchmark */ 1221 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1222 1223 for (slice = 0; slice < sc->num_slices; slice++) { 1224 ss = &sc->ss[slice]; 1225 1226 ss->irq_claim = irq_claim + (2 * slice); 1227 1228 /* Reset mcp/driver shared state back to 0 */ 1229 ss->rx_data.rx_done.idx = 0; 1230 ss->tx.req = 0; 1231 ss->tx.done = 0; 1232 ss->tx.pkt_done = 0; 1233 ss->tx.queue_active = 0; 1234 ss->tx.activate = 0; 1235 ss->tx.deactivate = 0; 1236 ss->rx_data.rx_big.cnt = 0; 1237 ss->rx_data.rx_small.cnt = 0; 1238 if (ss->fw_stats != NULL) 1239 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1240 } 1241 sc->rdma_tags_available = 15; 1242 1243 status = mxge_update_mac_address(sc); 1244 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1245 mxge_change_pause(sc, sc->pause); 1246 mxge_set_multicast_list(sc); 1247 1248 if (sc->throttle) { 1249 cmd.data0 = sc->throttle; 1250 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1251 if_printf(sc->ifp, "can't enable throttle\n"); 1252 } 1253 return status; 1254 } 1255 1256 static int 1257 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1258 { 1259 mxge_cmd_t cmd; 1260 mxge_softc_t *sc; 1261 int err; 1262 unsigned int throttle; 1263 1264 sc = arg1; 1265 throttle = sc->throttle; 1266 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1267 if (err != 0) 1268 return err; 1269 1270 if (throttle == sc->throttle) 1271 return 0; 1272 1273 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1274 return EINVAL; 1275 1276 ifnet_serialize_all(sc->ifp); 1277 1278 cmd.data0 = throttle; 1279 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1280 if (err == 0) 1281 sc->throttle = throttle; 1282 1283 ifnet_deserialize_all(sc->ifp); 1284 return err; 1285 } 1286 1287 static int 1288 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1289 { 1290 mxge_softc_t *sc; 1291 int err, use_rss; 1292 1293 sc = arg1; 1294 use_rss = sc->use_rss; 1295 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1296 if (err != 0) 1297 return err; 1298 1299 if (use_rss == sc->use_rss) 1300 return 0; 1301 1302 ifnet_serialize_all(sc->ifp); 1303 1304 sc->use_rss = use_rss; 1305 if (sc->ifp->if_flags & IFF_RUNNING) { 1306 mxge_close(sc, 0); 1307 mxge_open(sc); 1308 } 1309 1310 ifnet_deserialize_all(sc->ifp); 1311 return err; 1312 } 1313 1314 static int 1315 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1316 { 1317 mxge_softc_t *sc; 1318 unsigned int intr_coal_delay; 1319 int err; 1320 1321 sc = arg1; 1322 intr_coal_delay = sc->intr_coal_delay; 1323 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1324 if (err != 0) 1325 return err; 1326 1327 if (intr_coal_delay == sc->intr_coal_delay) 1328 return 0; 1329 1330 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1331 return EINVAL; 1332 1333 ifnet_serialize_all(sc->ifp); 1334 1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1336 sc->intr_coal_delay = intr_coal_delay; 1337 1338 ifnet_deserialize_all(sc->ifp); 1339 return err; 1340 } 1341 1342 static int 1343 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1344 { 1345 int err; 1346 1347 if (arg1 == NULL) 1348 return EFAULT; 1349 arg2 = be32toh(*(int *)arg1); 1350 arg1 = NULL; 1351 err = sysctl_handle_int(oidp, arg1, arg2, req); 1352 1353 return err; 1354 } 1355 1356 static void 1357 mxge_rem_sysctls(mxge_softc_t *sc) 1358 { 1359 if (sc->ss != NULL) { 1360 struct mxge_slice_state *ss; 1361 int slice; 1362 1363 for (slice = 0; slice < sc->num_slices; slice++) { 1364 ss = &sc->ss[slice]; 1365 if (ss->sysctl_tree != NULL) { 1366 sysctl_ctx_free(&ss->sysctl_ctx); 1367 ss->sysctl_tree = NULL; 1368 } 1369 } 1370 } 1371 1372 if (sc->slice_sysctl_tree != NULL) { 1373 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1374 sc->slice_sysctl_tree = NULL; 1375 } 1376 } 1377 1378 static void 1379 mxge_add_sysctls(mxge_softc_t *sc) 1380 { 1381 struct sysctl_ctx_list *ctx; 1382 struct sysctl_oid_list *children; 1383 mcp_irq_data_t *fw; 1384 struct mxge_slice_state *ss; 1385 int slice; 1386 char slice_num[8]; 1387 1388 ctx = device_get_sysctl_ctx(sc->dev); 1389 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1390 fw = sc->ss[0].fw_stats; 1391 1392 /* 1393 * Random information 1394 */ 1395 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1396 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1397 1398 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1399 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1400 1401 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1402 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1403 1404 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1405 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1406 1407 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1408 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1409 1410 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1411 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1412 1413 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1414 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1415 1416 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1417 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1418 1419 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1420 CTLFLAG_RD, &sc->read_write_dma, 0, 1421 "DMA concurrent Read/Write speed in MB/s"); 1422 1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1424 CTLFLAG_RD, &sc->watchdog_resets, 0, 1425 "Number of times NIC was reset"); 1426 1427 /* 1428 * Performance related tunables 1429 */ 1430 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1431 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1432 "Interrupt coalescing delay in usecs"); 1433 1434 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1435 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1436 "Transmit throttling"); 1437 1438 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1439 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1440 "Use RSS"); 1441 1442 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1443 CTLFLAG_RW, &mxge_deassert_wait, 0, 1444 "Wait for IRQ line to go low in ihandler"); 1445 1446 /* 1447 * Stats block from firmware is in network byte order. 1448 * Need to swap it 1449 */ 1450 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1451 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1452 mxge_handle_be32, "I", "link up"); 1453 1454 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1455 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1456 mxge_handle_be32, "I", "rdma_tags_available"); 1457 1458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1459 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1460 mxge_handle_be32, "I", "dropped_bad_crc32"); 1461 1462 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1463 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1464 mxge_handle_be32, "I", "dropped_bad_phy"); 1465 1466 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1467 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1468 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1469 1470 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1471 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1472 mxge_handle_be32, "I", "dropped_link_overflow"); 1473 1474 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1475 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1476 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1477 1478 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1479 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1480 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1481 1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1483 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1484 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1485 1486 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1487 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1488 mxge_handle_be32, "I", "dropped_overrun"); 1489 1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1491 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1492 mxge_handle_be32, "I", "dropped_pause"); 1493 1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1495 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1496 mxge_handle_be32, "I", "dropped_runt"); 1497 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1499 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1500 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1501 1502 /* add counters exported for debugging from all slices */ 1503 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1504 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1505 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1506 if (sc->slice_sysctl_tree == NULL) { 1507 device_printf(sc->dev, "can't add slice sysctl node\n"); 1508 return; 1509 } 1510 1511 for (slice = 0; slice < sc->num_slices; slice++) { 1512 ss = &sc->ss[slice]; 1513 sysctl_ctx_init(&ss->sysctl_ctx); 1514 ctx = &ss->sysctl_ctx; 1515 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1516 ksprintf(slice_num, "%d", slice); 1517 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1518 slice_num, CTLFLAG_RD, 0, ""); 1519 if (ss->sysctl_tree == NULL) { 1520 device_printf(sc->dev, 1521 "can't add %d slice sysctl node\n", slice); 1522 return; /* XXX continue? */ 1523 } 1524 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1525 1526 /* 1527 * XXX change to ULONG 1528 */ 1529 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1531 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1532 1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1534 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1535 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1537 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1538 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1540 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1541 1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1543 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1544 1545 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1546 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1547 1548 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1549 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1550 1551 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1552 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1553 } 1554 } 1555 1556 /* 1557 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1558 * backwards one at a time and handle ring wraps 1559 */ 1560 static __inline void 1561 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1562 mcp_kreq_ether_send_t *src, int cnt) 1563 { 1564 int idx, starting_slot; 1565 1566 starting_slot = tx->req; 1567 while (cnt > 1) { 1568 cnt--; 1569 idx = (starting_slot + cnt) & tx->mask; 1570 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1571 wmb(); 1572 } 1573 } 1574 1575 /* 1576 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1577 * at most 32 bytes at a time, so as to avoid involving the software 1578 * pio handler in the nic. We re-write the first segment's flags 1579 * to mark them valid only after writing the entire chain 1580 */ 1581 static __inline void 1582 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1583 { 1584 int idx, i; 1585 uint32_t *src_ints; 1586 volatile uint32_t *dst_ints; 1587 mcp_kreq_ether_send_t *srcp; 1588 volatile mcp_kreq_ether_send_t *dstp, *dst; 1589 uint8_t last_flags; 1590 1591 idx = tx->req & tx->mask; 1592 1593 last_flags = src->flags; 1594 src->flags = 0; 1595 wmb(); 1596 dst = dstp = &tx->lanai[idx]; 1597 srcp = src; 1598 1599 if ((idx + cnt) < tx->mask) { 1600 for (i = 0; i < cnt - 1; i += 2) { 1601 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1602 wmb(); /* force write every 32 bytes */ 1603 srcp += 2; 1604 dstp += 2; 1605 } 1606 } else { 1607 /* 1608 * Submit all but the first request, and ensure 1609 * that it is submitted below 1610 */ 1611 mxge_submit_req_backwards(tx, src, cnt); 1612 i = 0; 1613 } 1614 if (i < cnt) { 1615 /* Submit the first request */ 1616 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1617 wmb(); /* barrier before setting valid flag */ 1618 } 1619 1620 /* Re-write the last 32-bits with the valid flags */ 1621 src->flags = last_flags; 1622 src_ints = (uint32_t *)src; 1623 src_ints+=3; 1624 dst_ints = (volatile uint32_t *)dst; 1625 dst_ints+=3; 1626 *dst_ints = *src_ints; 1627 tx->req += cnt; 1628 wmb(); 1629 } 1630 1631 static int 1632 mxge_pullup_tso(struct mbuf **mp) 1633 { 1634 int hoff, iphlen, thoff; 1635 struct mbuf *m; 1636 1637 m = *mp; 1638 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1639 1640 iphlen = m->m_pkthdr.csum_iphlen; 1641 thoff = m->m_pkthdr.csum_thlen; 1642 hoff = m->m_pkthdr.csum_lhlen; 1643 1644 KASSERT(iphlen > 0, ("invalid ip hlen")); 1645 KASSERT(thoff > 0, ("invalid tcp hlen")); 1646 KASSERT(hoff > 0, ("invalid ether hlen")); 1647 1648 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1649 m = m_pullup(m, hoff + iphlen + thoff); 1650 if (m == NULL) { 1651 *mp = NULL; 1652 return ENOBUFS; 1653 } 1654 *mp = m; 1655 } 1656 return 0; 1657 } 1658 1659 static int 1660 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1661 struct mbuf *m, int busdma_seg_cnt) 1662 { 1663 mcp_kreq_ether_send_t *req; 1664 bus_dma_segment_t *seg; 1665 uint32_t low, high_swapped; 1666 int len, seglen, cum_len, cum_len_next; 1667 int next_is_first, chop, cnt, rdma_count, small; 1668 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1669 uint8_t flags, flags_next; 1670 struct mxge_buffer_state *info_last; 1671 bus_dmamap_t map = info_map->map; 1672 1673 mss = m->m_pkthdr.tso_segsz; 1674 1675 /* 1676 * Negative cum_len signifies to the send loop that we are 1677 * still in the header portion of the TSO packet. 1678 */ 1679 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1680 m->m_pkthdr.csum_thlen); 1681 1682 /* 1683 * TSO implies checksum offload on this hardware 1684 */ 1685 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1686 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1687 1688 /* 1689 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1690 * out where to put the checksum by parsing the header. 1691 */ 1692 pseudo_hdr_offset = htobe16(mss); 1693 1694 req = tx->req_list; 1695 seg = tx->seg_list; 1696 cnt = 0; 1697 rdma_count = 0; 1698 1699 /* 1700 * "rdma_count" is the number of RDMAs belonging to the current 1701 * packet BEFORE the current send request. For non-TSO packets, 1702 * this is equal to "count". 1703 * 1704 * For TSO packets, rdma_count needs to be reset to 0 after a 1705 * segment cut. 1706 * 1707 * The rdma_count field of the send request is the number of 1708 * RDMAs of the packet starting at that request. For TSO send 1709 * requests with one ore more cuts in the middle, this is the 1710 * number of RDMAs starting after the last cut in the request. 1711 * All previous segments before the last cut implicitly have 1 1712 * RDMA. 1713 * 1714 * Since the number of RDMAs is not known beforehand, it must be 1715 * filled-in retroactively - after each segmentation cut or at 1716 * the end of the entire packet. 1717 */ 1718 1719 while (busdma_seg_cnt) { 1720 /* 1721 * Break the busdma segment up into pieces 1722 */ 1723 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1724 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1725 len = seg->ds_len; 1726 1727 while (len) { 1728 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1729 seglen = len; 1730 cum_len_next = cum_len + seglen; 1731 (req - rdma_count)->rdma_count = rdma_count + 1; 1732 if (__predict_true(cum_len >= 0)) { 1733 /* Payload */ 1734 chop = (cum_len_next > mss); 1735 cum_len_next = cum_len_next % mss; 1736 next_is_first = (cum_len_next == 0); 1737 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1738 flags_next |= 1739 next_is_first * MXGEFW_FLAGS_FIRST; 1740 rdma_count |= -(chop | next_is_first); 1741 rdma_count += chop & !next_is_first; 1742 } else if (cum_len_next >= 0) { 1743 /* Header ends */ 1744 rdma_count = -1; 1745 cum_len_next = 0; 1746 seglen = -cum_len; 1747 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1748 flags_next = MXGEFW_FLAGS_TSO_PLD | 1749 MXGEFW_FLAGS_FIRST | 1750 (small * MXGEFW_FLAGS_SMALL); 1751 } 1752 1753 req->addr_high = high_swapped; 1754 req->addr_low = htobe32(low); 1755 req->pseudo_hdr_offset = pseudo_hdr_offset; 1756 req->pad = 0; 1757 req->rdma_count = 1; 1758 req->length = htobe16(seglen); 1759 req->cksum_offset = cksum_offset; 1760 req->flags = 1761 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1762 low += seglen; 1763 len -= seglen; 1764 cum_len = cum_len_next; 1765 flags = flags_next; 1766 req++; 1767 cnt++; 1768 rdma_count++; 1769 if (__predict_false(cksum_offset > seglen)) 1770 cksum_offset -= seglen; 1771 else 1772 cksum_offset = 0; 1773 if (__predict_false(cnt > tx->max_desc)) 1774 goto drop; 1775 } 1776 busdma_seg_cnt--; 1777 seg++; 1778 } 1779 (req - rdma_count)->rdma_count = rdma_count; 1780 1781 do { 1782 req--; 1783 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1784 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1785 1786 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1787 1788 info_map->map = info_last->map; 1789 info_last->map = map; 1790 info_last->m = m; 1791 1792 mxge_submit_req(tx, tx->req_list, cnt); 1793 1794 if (tx->send_go != NULL && tx->queue_active == 0) { 1795 /* Tell the NIC to start polling this slice */ 1796 *tx->send_go = 1; 1797 tx->queue_active = 1; 1798 tx->activate++; 1799 wmb(); 1800 } 1801 return 0; 1802 1803 drop: 1804 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1805 m_freem(m); 1806 return ENOBUFS; 1807 } 1808 1809 static int 1810 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1811 { 1812 mcp_kreq_ether_send_t *req; 1813 bus_dma_segment_t *seg; 1814 bus_dmamap_t map; 1815 int cnt, cum_len, err, i, idx, odd_flag; 1816 uint16_t pseudo_hdr_offset; 1817 uint8_t flags, cksum_offset; 1818 struct mxge_buffer_state *info_map, *info_last; 1819 1820 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1821 err = mxge_pullup_tso(&m); 1822 if (__predict_false(err)) 1823 return err; 1824 } 1825 1826 /* 1827 * Map the frame for DMA 1828 */ 1829 idx = tx->req & tx->mask; 1830 info_map = &tx->info[idx]; 1831 map = info_map->map; 1832 1833 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1834 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1835 if (__predict_false(err != 0)) 1836 goto drop; 1837 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1838 1839 /* 1840 * TSO is different enough, we handle it in another routine 1841 */ 1842 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1843 return mxge_encap_tso(tx, info_map, m, cnt); 1844 1845 req = tx->req_list; 1846 cksum_offset = 0; 1847 pseudo_hdr_offset = 0; 1848 flags = MXGEFW_FLAGS_NO_TSO; 1849 1850 /* 1851 * Checksum offloading 1852 */ 1853 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1854 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1855 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1856 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1857 req->cksum_offset = cksum_offset; 1858 flags |= MXGEFW_FLAGS_CKSUM; 1859 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1860 } else { 1861 odd_flag = 0; 1862 } 1863 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1864 flags |= MXGEFW_FLAGS_SMALL; 1865 1866 /* 1867 * Convert segments into a request list 1868 */ 1869 cum_len = 0; 1870 seg = tx->seg_list; 1871 req->flags = MXGEFW_FLAGS_FIRST; 1872 for (i = 0; i < cnt; i++) { 1873 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1874 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1875 req->length = htobe16(seg->ds_len); 1876 req->cksum_offset = cksum_offset; 1877 if (cksum_offset > seg->ds_len) 1878 cksum_offset -= seg->ds_len; 1879 else 1880 cksum_offset = 0; 1881 req->pseudo_hdr_offset = pseudo_hdr_offset; 1882 req->pad = 0; /* complete solid 16-byte block */ 1883 req->rdma_count = 1; 1884 req->flags |= flags | ((cum_len & 1) * odd_flag); 1885 cum_len += seg->ds_len; 1886 seg++; 1887 req++; 1888 req->flags = 0; 1889 } 1890 req--; 1891 1892 /* 1893 * Pad runt to 60 bytes 1894 */ 1895 if (cum_len < 60) { 1896 req++; 1897 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1898 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1899 req->length = htobe16(60 - cum_len); 1900 req->cksum_offset = 0; 1901 req->pseudo_hdr_offset = pseudo_hdr_offset; 1902 req->pad = 0; /* complete solid 16-byte block */ 1903 req->rdma_count = 1; 1904 req->flags |= flags | ((cum_len & 1) * odd_flag); 1905 cnt++; 1906 } 1907 1908 tx->req_list[0].rdma_count = cnt; 1909 #if 0 1910 /* print what the firmware will see */ 1911 for (i = 0; i < cnt; i++) { 1912 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1913 "cso:%d, flags:0x%x, rdma:%d\n", 1914 i, (int)ntohl(tx->req_list[i].addr_high), 1915 (int)ntohl(tx->req_list[i].addr_low), 1916 (int)ntohs(tx->req_list[i].length), 1917 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1918 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1919 tx->req_list[i].rdma_count); 1920 } 1921 kprintf("--------------\n"); 1922 #endif 1923 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1924 1925 info_map->map = info_last->map; 1926 info_last->map = map; 1927 info_last->m = m; 1928 1929 mxge_submit_req(tx, tx->req_list, cnt); 1930 1931 if (tx->send_go != NULL && tx->queue_active == 0) { 1932 /* Tell the NIC to start polling this slice */ 1933 *tx->send_go = 1; 1934 tx->queue_active = 1; 1935 tx->activate++; 1936 wmb(); 1937 } 1938 return 0; 1939 1940 drop: 1941 m_freem(m); 1942 return err; 1943 } 1944 1945 static void 1946 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1947 { 1948 mxge_softc_t *sc = ifp->if_softc; 1949 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1950 bus_addr_t zeropad; 1951 int encap = 0; 1952 1953 KKASSERT(tx->ifsq == ifsq); 1954 ASSERT_SERIALIZED(&tx->tx_serialize); 1955 1956 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1957 return; 1958 1959 zeropad = sc->zeropad_dma.dmem_busaddr; 1960 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1961 struct mbuf *m; 1962 int error; 1963 1964 m = ifsq_dequeue(ifsq); 1965 if (m == NULL) 1966 goto done; 1967 1968 BPF_MTAP(ifp, m); 1969 error = mxge_encap(tx, m, zeropad); 1970 if (!error) 1971 encap = 1; 1972 else 1973 IFNET_STAT_INC(ifp, oerrors, 1); 1974 } 1975 1976 /* Ran out of transmit slots */ 1977 ifsq_set_oactive(ifsq); 1978 done: 1979 if (encap) 1980 tx->watchdog.wd_timer = 5; 1981 } 1982 1983 static void 1984 mxge_watchdog(struct ifaltq_subque *ifsq) 1985 { 1986 struct ifnet *ifp = ifsq_get_ifp(ifsq); 1987 struct mxge_softc *sc = ifp->if_softc; 1988 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1989 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1990 1991 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1992 1993 /* Check for pause blocking before resetting */ 1994 if (tx->watchdog_rx_pause == rx_pause) { 1995 mxge_warn_stuck(sc, tx, 0); 1996 mxge_watchdog_reset(sc); 1997 return; 1998 } else { 1999 if_printf(ifp, "Flow control blocking xmits, " 2000 "check link partner\n"); 2001 } 2002 tx->watchdog_rx_pause = rx_pause; 2003 } 2004 2005 /* 2006 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2007 * at most 32 bytes at a time, so as to avoid involving the software 2008 * pio handler in the nic. We re-write the first segment's low 2009 * DMA address to mark it valid only after we write the entire chunk 2010 * in a burst 2011 */ 2012 static __inline void 2013 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2014 mcp_kreq_ether_recv_t *src) 2015 { 2016 uint32_t low; 2017 2018 low = src->addr_low; 2019 src->addr_low = 0xffffffff; 2020 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2021 wmb(); 2022 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2023 wmb(); 2024 src->addr_low = low; 2025 dst->addr_low = low; 2026 wmb(); 2027 } 2028 2029 static int 2030 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2031 boolean_t init) 2032 { 2033 bus_dma_segment_t seg; 2034 struct mbuf *m; 2035 int cnt, err, mflag; 2036 2037 mflag = M_NOWAIT; 2038 if (__predict_false(init)) 2039 mflag = M_WAITOK; 2040 2041 m = m_gethdr(mflag, MT_DATA); 2042 if (m == NULL) { 2043 err = ENOBUFS; 2044 if (__predict_false(init)) { 2045 /* 2046 * During initialization, there 2047 * is nothing to setup; bail out 2048 */ 2049 return err; 2050 } 2051 goto done; 2052 } 2053 m->m_len = m->m_pkthdr.len = MHLEN; 2054 2055 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2056 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2057 if (err != 0) { 2058 m_freem(m); 2059 if (__predict_false(init)) { 2060 /* 2061 * During initialization, there 2062 * is nothing to setup; bail out 2063 */ 2064 return err; 2065 } 2066 goto done; 2067 } 2068 2069 rx->info[idx].m = m; 2070 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2071 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2072 2073 done: 2074 if ((idx & 7) == 7) 2075 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2076 return err; 2077 } 2078 2079 static int 2080 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2081 boolean_t init) 2082 { 2083 bus_dma_segment_t seg; 2084 struct mbuf *m; 2085 int cnt, err, mflag; 2086 2087 mflag = M_NOWAIT; 2088 if (__predict_false(init)) 2089 mflag = M_WAITOK; 2090 2091 if (rx->cl_size == MCLBYTES) 2092 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2093 else 2094 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2095 if (m == NULL) { 2096 err = ENOBUFS; 2097 if (__predict_false(init)) { 2098 /* 2099 * During initialization, there 2100 * is nothing to setup; bail out 2101 */ 2102 return err; 2103 } 2104 goto done; 2105 } 2106 m->m_len = m->m_pkthdr.len = rx->cl_size; 2107 2108 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2109 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2110 if (err != 0) { 2111 m_freem(m); 2112 if (__predict_false(init)) { 2113 /* 2114 * During initialization, there 2115 * is nothing to setup; bail out 2116 */ 2117 return err; 2118 } 2119 goto done; 2120 } 2121 2122 rx->info[idx].m = m; 2123 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2124 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2125 2126 done: 2127 if ((idx & 7) == 7) 2128 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2129 return err; 2130 } 2131 2132 /* 2133 * Myri10GE hardware checksums are not valid if the sender 2134 * padded the frame with non-zero padding. This is because 2135 * the firmware just does a simple 16-bit 1s complement 2136 * checksum across the entire frame, excluding the first 14 2137 * bytes. It is best to simply to check the checksum and 2138 * tell the stack about it only if the checksum is good 2139 */ 2140 static __inline uint16_t 2141 mxge_rx_csum(struct mbuf *m, int csum) 2142 { 2143 const struct ether_header *eh; 2144 const struct ip *ip; 2145 uint16_t c; 2146 2147 eh = mtod(m, const struct ether_header *); 2148 2149 /* Only deal with IPv4 TCP & UDP for now */ 2150 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2151 return 1; 2152 2153 ip = (const struct ip *)(eh + 1); 2154 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2155 return 1; 2156 2157 #ifdef INET 2158 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2159 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2160 - (ip->ip_hl << 2) + ip->ip_p)); 2161 #else 2162 c = 1; 2163 #endif 2164 c ^= 0xffff; 2165 return c; 2166 } 2167 2168 static void 2169 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2170 { 2171 struct ether_vlan_header *evl; 2172 uint32_t partial; 2173 2174 evl = mtod(m, struct ether_vlan_header *); 2175 2176 /* 2177 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2178 * what the firmware thought was the end of the ethernet 2179 * header. 2180 */ 2181 2182 /* Put checksum into host byte order */ 2183 *csum = ntohs(*csum); 2184 2185 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2186 *csum += ~partial; 2187 *csum += ((*csum) < ~partial); 2188 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2189 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2190 2191 /* 2192 * Restore checksum to network byte order; 2193 * later consumers expect this 2194 */ 2195 *csum = htons(*csum); 2196 2197 /* save the tag */ 2198 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2199 m->m_flags |= M_VLANTAG; 2200 2201 /* 2202 * Remove the 802.1q header by copying the Ethernet 2203 * addresses over it and adjusting the beginning of 2204 * the data in the mbuf. The encapsulated Ethernet 2205 * type field is already in place. 2206 */ 2207 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2208 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2209 m_adj(m, EVL_ENCAPLEN); 2210 } 2211 2212 2213 static __inline void 2214 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2215 uint32_t len, uint32_t csum) 2216 { 2217 struct mbuf *m; 2218 const struct ether_header *eh; 2219 bus_dmamap_t old_map; 2220 int idx; 2221 2222 idx = rx->cnt & rx->mask; 2223 rx->cnt++; 2224 2225 /* Save a pointer to the received mbuf */ 2226 m = rx->info[idx].m; 2227 2228 /* Try to replace the received mbuf */ 2229 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2230 /* Drop the frame -- the old mbuf is re-cycled */ 2231 IFNET_STAT_INC(ifp, ierrors, 1); 2232 return; 2233 } 2234 2235 /* Unmap the received buffer */ 2236 old_map = rx->info[idx].map; 2237 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2238 bus_dmamap_unload(rx->dmat, old_map); 2239 2240 /* Swap the bus_dmamap_t's */ 2241 rx->info[idx].map = rx->extra_map; 2242 rx->extra_map = old_map; 2243 2244 /* 2245 * mcp implicitly skips 1st 2 bytes so that packet is properly 2246 * aligned 2247 */ 2248 m->m_data += MXGEFW_PAD; 2249 2250 m->m_pkthdr.rcvif = ifp; 2251 m->m_len = m->m_pkthdr.len = len; 2252 2253 IFNET_STAT_INC(ifp, ipackets, 1); 2254 2255 eh = mtod(m, const struct ether_header *); 2256 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2257 mxge_vlan_tag_remove(m, &csum); 2258 2259 /* If the checksum is valid, mark it in the mbuf header */ 2260 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2261 mxge_rx_csum(m, csum) == 0) { 2262 /* Tell the stack that the checksum is good */ 2263 m->m_pkthdr.csum_data = 0xffff; 2264 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2265 CSUM_DATA_VALID; 2266 } 2267 ifp->if_input(ifp, m, NULL, -1); 2268 } 2269 2270 static __inline void 2271 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2272 uint32_t len, uint32_t csum) 2273 { 2274 const struct ether_header *eh; 2275 struct mbuf *m; 2276 bus_dmamap_t old_map; 2277 int idx; 2278 2279 idx = rx->cnt & rx->mask; 2280 rx->cnt++; 2281 2282 /* Save a pointer to the received mbuf */ 2283 m = rx->info[idx].m; 2284 2285 /* Try to replace the received mbuf */ 2286 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2287 /* Drop the frame -- the old mbuf is re-cycled */ 2288 IFNET_STAT_INC(ifp, ierrors, 1); 2289 return; 2290 } 2291 2292 /* Unmap the received buffer */ 2293 old_map = rx->info[idx].map; 2294 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2295 bus_dmamap_unload(rx->dmat, old_map); 2296 2297 /* Swap the bus_dmamap_t's */ 2298 rx->info[idx].map = rx->extra_map; 2299 rx->extra_map = old_map; 2300 2301 /* 2302 * mcp implicitly skips 1st 2 bytes so that packet is properly 2303 * aligned 2304 */ 2305 m->m_data += MXGEFW_PAD; 2306 2307 m->m_pkthdr.rcvif = ifp; 2308 m->m_len = m->m_pkthdr.len = len; 2309 2310 IFNET_STAT_INC(ifp, ipackets, 1); 2311 2312 eh = mtod(m, const struct ether_header *); 2313 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2314 mxge_vlan_tag_remove(m, &csum); 2315 2316 /* If the checksum is valid, mark it in the mbuf header */ 2317 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2318 mxge_rx_csum(m, csum) == 0) { 2319 /* Tell the stack that the checksum is good */ 2320 m->m_pkthdr.csum_data = 0xffff; 2321 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2322 CSUM_DATA_VALID; 2323 } 2324 ifp->if_input(ifp, m, NULL, -1); 2325 } 2326 2327 static __inline void 2328 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2329 { 2330 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2331 2332 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2333 uint16_t length, checksum; 2334 2335 length = ntohs(rx_done->entry[rx_done->idx].length); 2336 rx_done->entry[rx_done->idx].length = 0; 2337 2338 checksum = rx_done->entry[rx_done->idx].checksum; 2339 2340 if (length <= MXGE_RX_SMALL_BUFLEN) { 2341 mxge_rx_done_small(ifp, &rx_data->rx_small, 2342 length, checksum); 2343 } else { 2344 mxge_rx_done_big(ifp, &rx_data->rx_big, 2345 length, checksum); 2346 } 2347 2348 rx_done->idx++; 2349 rx_done->idx &= rx_done->mask; 2350 --cycle; 2351 } 2352 } 2353 2354 static __inline void 2355 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2356 { 2357 ASSERT_SERIALIZED(&tx->tx_serialize); 2358 2359 while (tx->pkt_done != mcp_idx) { 2360 struct mbuf *m; 2361 int idx; 2362 2363 idx = tx->done & tx->mask; 2364 tx->done++; 2365 2366 m = tx->info[idx].m; 2367 /* 2368 * mbuf and DMA map only attached to the first 2369 * segment per-mbuf. 2370 */ 2371 if (m != NULL) { 2372 tx->pkt_done++; 2373 IFNET_STAT_INC(ifp, opackets, 1); 2374 tx->info[idx].m = NULL; 2375 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2376 m_freem(m); 2377 } 2378 } 2379 2380 /* 2381 * If we have space, clear OACTIVE to tell the stack that 2382 * its OK to send packets 2383 */ 2384 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2385 ifsq_clr_oactive(tx->ifsq); 2386 if (tx->req == tx->done) { 2387 /* Reset watchdog */ 2388 tx->watchdog.wd_timer = 0; 2389 } 2390 } 2391 2392 if (!ifsq_is_empty(tx->ifsq)) 2393 ifsq_devstart(tx->ifsq); 2394 2395 if (tx->send_stop != NULL && tx->req == tx->done) { 2396 /* 2397 * Let the NIC stop polling this queue, since there 2398 * are no more transmits pending 2399 */ 2400 *tx->send_stop = 1; 2401 tx->queue_active = 0; 2402 tx->deactivate++; 2403 wmb(); 2404 } 2405 } 2406 2407 static struct mxge_media_type mxge_xfp_media_types[] = { 2408 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2409 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2410 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2411 {IFM_NONE, (1 << 5), "10GBASE-ER"}, 2412 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2413 {IFM_NONE, (1 << 3), "10GBASE-SW"}, 2414 {IFM_NONE, (1 << 2), "10GBASE-LW"}, 2415 {IFM_NONE, (1 << 1), "10GBASE-EW"}, 2416 {IFM_NONE, (1 << 0), "Reserved"} 2417 }; 2418 2419 static struct mxge_media_type mxge_sfp_media_types[] = { 2420 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2421 {IFM_NONE, (1 << 7), "Reserved"}, 2422 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2423 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2424 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2425 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2426 }; 2427 2428 static void 2429 mxge_media_set(mxge_softc_t *sc, int media_type) 2430 { 2431 int fc_opt = 0; 2432 2433 if (media_type == IFM_NONE) 2434 return; 2435 2436 if (sc->pause) 2437 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 2438 2439 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL); 2440 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt); 2441 2442 sc->current_media = media_type; 2443 } 2444 2445 static void 2446 mxge_media_unset(mxge_softc_t *sc) 2447 { 2448 ifmedia_removeall(&sc->media); 2449 sc->current_media = IFM_NONE; 2450 } 2451 2452 static void 2453 mxge_media_init(mxge_softc_t *sc) 2454 { 2455 const char *ptr; 2456 int i; 2457 2458 mxge_media_unset(sc); 2459 2460 /* 2461 * Parse the product code to deterimine the interface type 2462 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2463 * after the 3rd dash in the driver's cached copy of the 2464 * EEPROM's product code string. 2465 */ 2466 ptr = sc->product_code_string; 2467 if (ptr == NULL) { 2468 if_printf(sc->ifp, "Missing product code\n"); 2469 return; 2470 } 2471 2472 for (i = 0; i < 3; i++, ptr++) { 2473 ptr = strchr(ptr, '-'); 2474 if (ptr == NULL) { 2475 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2476 return; 2477 } 2478 } 2479 if (*ptr == 'C' || *(ptr +1) == 'C') { 2480 /* -C is CX4 */ 2481 sc->connector = MXGE_CX4; 2482 mxge_media_set(sc, IFM_10G_CX4); 2483 } else if (*ptr == 'Q') { 2484 /* -Q is Quad Ribbon Fiber */ 2485 sc->connector = MXGE_QRF; 2486 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2487 /* DragonFly has no media type for Quad ribbon fiber */ 2488 } else if (*ptr == 'R') { 2489 /* -R is XFP */ 2490 sc->connector = MXGE_XFP; 2491 /* NOTE: ifmedia will be installed later */ 2492 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2493 /* -S or -2S is SFP+ */ 2494 sc->connector = MXGE_SFP; 2495 /* NOTE: ifmedia will be installed later */ 2496 } else { 2497 sc->connector = MXGE_UNK; 2498 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2499 } 2500 } 2501 2502 /* 2503 * Determine the media type for a NIC. Some XFPs will identify 2504 * themselves only when their link is up, so this is initiated via a 2505 * link up interrupt. However, this can potentially take up to 2506 * several milliseconds, so it is run via the watchdog routine, rather 2507 * than in the interrupt handler itself. 2508 */ 2509 static void 2510 mxge_media_probe(mxge_softc_t *sc) 2511 { 2512 mxge_cmd_t cmd; 2513 const char *cage_type; 2514 struct mxge_media_type *mxge_media_types = NULL; 2515 int i, err, ms, mxge_media_type_entries; 2516 uint32_t byte; 2517 2518 sc->need_media_probe = 0; 2519 2520 if (sc->connector == MXGE_XFP) { 2521 /* -R is XFP */ 2522 mxge_media_types = mxge_xfp_media_types; 2523 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2524 byte = MXGE_XFP_COMPLIANCE_BYTE; 2525 cage_type = "XFP"; 2526 } else if (sc->connector == MXGE_SFP) { 2527 /* -S or -2S is SFP+ */ 2528 mxge_media_types = mxge_sfp_media_types; 2529 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2530 cage_type = "SFP+"; 2531 byte = 3; 2532 } else { 2533 /* nothing to do; media type cannot change */ 2534 return; 2535 } 2536 2537 /* 2538 * At this point we know the NIC has an XFP cage, so now we 2539 * try to determine what is in the cage by using the 2540 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2541 * register. We read just one byte, which may take over 2542 * a millisecond 2543 */ 2544 2545 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2546 cmd.data1 = byte; 2547 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2548 if (err != MXGEFW_CMD_OK) { 2549 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2550 if_printf(sc->ifp, "failed to read XFP\n"); 2551 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2552 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2553 else 2554 if_printf(sc->ifp, "I2C read failed, err: %d", err); 2555 mxge_media_unset(sc); 2556 return; 2557 } 2558 2559 /* Now we wait for the data to be cached */ 2560 cmd.data0 = byte; 2561 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2562 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2563 DELAY(1000); 2564 cmd.data0 = byte; 2565 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2566 } 2567 if (err != MXGEFW_CMD_OK) { 2568 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2569 cage_type, err, ms); 2570 mxge_media_unset(sc); 2571 return; 2572 } 2573 2574 if (cmd.data0 == mxge_media_types[0].bitmask) { 2575 if (bootverbose) { 2576 if_printf(sc->ifp, "%s:%s\n", cage_type, 2577 mxge_media_types[0].name); 2578 } 2579 if (sc->current_media != mxge_media_types[0].flag) { 2580 mxge_media_unset(sc); 2581 mxge_media_set(sc, mxge_media_types[0].flag); 2582 } 2583 return; 2584 } 2585 for (i = 1; i < mxge_media_type_entries; i++) { 2586 if (cmd.data0 & mxge_media_types[i].bitmask) { 2587 if (bootverbose) { 2588 if_printf(sc->ifp, "%s:%s\n", cage_type, 2589 mxge_media_types[i].name); 2590 } 2591 2592 if (sc->current_media != mxge_media_types[i].flag) { 2593 mxge_media_unset(sc); 2594 mxge_media_set(sc, mxge_media_types[i].flag); 2595 } 2596 return; 2597 } 2598 } 2599 mxge_media_unset(sc); 2600 if (bootverbose) { 2601 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2602 cmd.data0); 2603 } 2604 } 2605 2606 static void 2607 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2608 { 2609 if (sc->link_state != stats->link_up) { 2610 sc->link_state = stats->link_up; 2611 if (sc->link_state) { 2612 sc->ifp->if_link_state = LINK_STATE_UP; 2613 if_link_state_change(sc->ifp); 2614 if (bootverbose) 2615 if_printf(sc->ifp, "link up\n"); 2616 } else { 2617 sc->ifp->if_link_state = LINK_STATE_DOWN; 2618 if_link_state_change(sc->ifp); 2619 if (bootverbose) 2620 if_printf(sc->ifp, "link down\n"); 2621 } 2622 sc->need_media_probe = 1; 2623 } 2624 2625 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2626 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2627 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2628 sc->rdma_tags_available); 2629 } 2630 2631 if (stats->link_down) { 2632 sc->down_cnt += stats->link_down; 2633 sc->link_state = 0; 2634 sc->ifp->if_link_state = LINK_STATE_DOWN; 2635 if_link_state_change(sc->ifp); 2636 } 2637 } 2638 2639 static void 2640 mxge_serialize_skipmain(struct mxge_softc *sc) 2641 { 2642 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2643 } 2644 2645 static void 2646 mxge_deserialize_skipmain(struct mxge_softc *sc) 2647 { 2648 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2649 } 2650 2651 static void 2652 mxge_legacy(void *arg) 2653 { 2654 struct mxge_slice_state *ss = arg; 2655 mxge_softc_t *sc = ss->sc; 2656 mcp_irq_data_t *stats = ss->fw_stats; 2657 mxge_tx_ring_t *tx = &ss->tx; 2658 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2659 uint32_t send_done_count; 2660 uint8_t valid; 2661 2662 ASSERT_SERIALIZED(&sc->main_serialize); 2663 2664 /* Make sure the DMA has finished */ 2665 if (!stats->valid) 2666 return; 2667 valid = stats->valid; 2668 2669 /* Lower legacy IRQ */ 2670 *sc->irq_deassert = 0; 2671 if (!mxge_deassert_wait) { 2672 /* Don't wait for conf. that irq is low */ 2673 stats->valid = 0; 2674 } 2675 2676 mxge_serialize_skipmain(sc); 2677 2678 /* 2679 * Loop while waiting for legacy irq deassertion 2680 * XXX do we really want to loop? 2681 */ 2682 do { 2683 /* Check for transmit completes and receives */ 2684 send_done_count = be32toh(stats->send_done_count); 2685 while ((send_done_count != tx->pkt_done) || 2686 (rx_done->entry[rx_done->idx].length != 0)) { 2687 if (send_done_count != tx->pkt_done) { 2688 mxge_tx_done(&sc->arpcom.ac_if, tx, 2689 (int)send_done_count); 2690 } 2691 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2692 send_done_count = be32toh(stats->send_done_count); 2693 } 2694 if (mxge_deassert_wait) 2695 wmb(); 2696 } while (*((volatile uint8_t *)&stats->valid)); 2697 2698 mxge_deserialize_skipmain(sc); 2699 2700 /* Fw link & error stats meaningful only on the first slice */ 2701 if (__predict_false(stats->stats_updated)) 2702 mxge_intr_status(sc, stats); 2703 2704 /* Check to see if we have rx token to pass back */ 2705 if (valid & 0x1) 2706 *ss->irq_claim = be32toh(3); 2707 *(ss->irq_claim + 1) = be32toh(3); 2708 } 2709 2710 static void 2711 mxge_msi(void *arg) 2712 { 2713 struct mxge_slice_state *ss = arg; 2714 mxge_softc_t *sc = ss->sc; 2715 mcp_irq_data_t *stats = ss->fw_stats; 2716 mxge_tx_ring_t *tx = &ss->tx; 2717 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2718 uint32_t send_done_count; 2719 uint8_t valid; 2720 #ifndef IFPOLL_ENABLE 2721 const boolean_t polling = FALSE; 2722 #else 2723 boolean_t polling = FALSE; 2724 #endif 2725 2726 ASSERT_SERIALIZED(&sc->main_serialize); 2727 2728 /* Make sure the DMA has finished */ 2729 if (__predict_false(!stats->valid)) 2730 return; 2731 2732 valid = stats->valid; 2733 stats->valid = 0; 2734 2735 #ifdef IFPOLL_ENABLE 2736 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2737 polling = TRUE; 2738 #endif 2739 2740 if (!polling) { 2741 /* Check for receives */ 2742 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2743 if (rx_done->entry[rx_done->idx].length != 0) 2744 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2745 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2746 } 2747 2748 /* 2749 * Check for transmit completes 2750 * 2751 * NOTE: 2752 * Since pkt_done is only changed by mxge_tx_done(), 2753 * which is called only in interrupt handler, the 2754 * check w/o holding tx serializer is MPSAFE. 2755 */ 2756 send_done_count = be32toh(stats->send_done_count); 2757 if (send_done_count != tx->pkt_done) { 2758 lwkt_serialize_enter(&tx->tx_serialize); 2759 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2760 lwkt_serialize_exit(&tx->tx_serialize); 2761 } 2762 2763 if (__predict_false(stats->stats_updated)) 2764 mxge_intr_status(sc, stats); 2765 2766 /* Check to see if we have rx token to pass back */ 2767 if (!polling && (valid & 0x1)) 2768 *ss->irq_claim = be32toh(3); 2769 *(ss->irq_claim + 1) = be32toh(3); 2770 } 2771 2772 static void 2773 mxge_msix_rx(void *arg) 2774 { 2775 struct mxge_slice_state *ss = arg; 2776 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2777 2778 #ifdef IFPOLL_ENABLE 2779 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2780 return; 2781 #endif 2782 2783 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2784 2785 if (rx_done->entry[rx_done->idx].length != 0) 2786 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2787 2788 *ss->irq_claim = be32toh(3); 2789 } 2790 2791 static void 2792 mxge_msix_rxtx(void *arg) 2793 { 2794 struct mxge_slice_state *ss = arg; 2795 mxge_softc_t *sc = ss->sc; 2796 mcp_irq_data_t *stats = ss->fw_stats; 2797 mxge_tx_ring_t *tx = &ss->tx; 2798 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2799 uint32_t send_done_count; 2800 uint8_t valid; 2801 #ifndef IFPOLL_ENABLE 2802 const boolean_t polling = FALSE; 2803 #else 2804 boolean_t polling = FALSE; 2805 #endif 2806 2807 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2808 2809 /* Make sure the DMA has finished */ 2810 if (__predict_false(!stats->valid)) 2811 return; 2812 2813 valid = stats->valid; 2814 stats->valid = 0; 2815 2816 #ifdef IFPOLL_ENABLE 2817 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2818 polling = TRUE; 2819 #endif 2820 2821 /* Check for receives */ 2822 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2823 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2824 2825 /* 2826 * Check for transmit completes 2827 * 2828 * NOTE: 2829 * Since pkt_done is only changed by mxge_tx_done(), 2830 * which is called only in interrupt handler, the 2831 * check w/o holding tx serializer is MPSAFE. 2832 */ 2833 send_done_count = be32toh(stats->send_done_count); 2834 if (send_done_count != tx->pkt_done) { 2835 lwkt_serialize_enter(&tx->tx_serialize); 2836 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2837 lwkt_serialize_exit(&tx->tx_serialize); 2838 } 2839 2840 /* Check to see if we have rx token to pass back */ 2841 if (!polling && (valid & 0x1)) 2842 *ss->irq_claim = be32toh(3); 2843 *(ss->irq_claim + 1) = be32toh(3); 2844 } 2845 2846 static void 2847 mxge_init(void *arg) 2848 { 2849 struct mxge_softc *sc = arg; 2850 2851 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2852 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2853 mxge_open(sc); 2854 } 2855 2856 static void 2857 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2858 { 2859 int i; 2860 2861 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2862 if (ss->rx_data.rx_big.info[i].m == NULL) 2863 continue; 2864 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2865 ss->rx_data.rx_big.info[i].map); 2866 m_freem(ss->rx_data.rx_big.info[i].m); 2867 ss->rx_data.rx_big.info[i].m = NULL; 2868 } 2869 2870 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2871 if (ss->rx_data.rx_small.info[i].m == NULL) 2872 continue; 2873 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2874 ss->rx_data.rx_small.info[i].map); 2875 m_freem(ss->rx_data.rx_small.info[i].m); 2876 ss->rx_data.rx_small.info[i].m = NULL; 2877 } 2878 2879 /* Transmit ring used only on the first slice */ 2880 if (ss->tx.info == NULL) 2881 return; 2882 2883 for (i = 0; i <= ss->tx.mask; i++) { 2884 if (ss->tx.info[i].m == NULL) 2885 continue; 2886 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2887 m_freem(ss->tx.info[i].m); 2888 ss->tx.info[i].m = NULL; 2889 } 2890 } 2891 2892 static void 2893 mxge_free_mbufs(mxge_softc_t *sc) 2894 { 2895 int slice; 2896 2897 for (slice = 0; slice < sc->num_slices; slice++) 2898 mxge_free_slice_mbufs(&sc->ss[slice]); 2899 } 2900 2901 static void 2902 mxge_free_slice_rings(struct mxge_slice_state *ss) 2903 { 2904 int i; 2905 2906 if (ss->rx_data.rx_done.entry != NULL) { 2907 mxge_dma_free(&ss->rx_done_dma); 2908 ss->rx_data.rx_done.entry = NULL; 2909 } 2910 2911 if (ss->tx.req_list != NULL) { 2912 kfree(ss->tx.req_list, M_DEVBUF); 2913 ss->tx.req_list = NULL; 2914 } 2915 2916 if (ss->tx.seg_list != NULL) { 2917 kfree(ss->tx.seg_list, M_DEVBUF); 2918 ss->tx.seg_list = NULL; 2919 } 2920 2921 if (ss->rx_data.rx_small.shadow != NULL) { 2922 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2923 ss->rx_data.rx_small.shadow = NULL; 2924 } 2925 2926 if (ss->rx_data.rx_big.shadow != NULL) { 2927 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2928 ss->rx_data.rx_big.shadow = NULL; 2929 } 2930 2931 if (ss->tx.info != NULL) { 2932 if (ss->tx.dmat != NULL) { 2933 for (i = 0; i <= ss->tx.mask; i++) { 2934 bus_dmamap_destroy(ss->tx.dmat, 2935 ss->tx.info[i].map); 2936 } 2937 bus_dma_tag_destroy(ss->tx.dmat); 2938 } 2939 kfree(ss->tx.info, M_DEVBUF); 2940 ss->tx.info = NULL; 2941 } 2942 2943 if (ss->rx_data.rx_small.info != NULL) { 2944 if (ss->rx_data.rx_small.dmat != NULL) { 2945 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2946 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2947 ss->rx_data.rx_small.info[i].map); 2948 } 2949 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2950 ss->rx_data.rx_small.extra_map); 2951 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2952 } 2953 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2954 ss->rx_data.rx_small.info = NULL; 2955 } 2956 2957 if (ss->rx_data.rx_big.info != NULL) { 2958 if (ss->rx_data.rx_big.dmat != NULL) { 2959 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2960 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2961 ss->rx_data.rx_big.info[i].map); 2962 } 2963 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2964 ss->rx_data.rx_big.extra_map); 2965 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2966 } 2967 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2968 ss->rx_data.rx_big.info = NULL; 2969 } 2970 } 2971 2972 static void 2973 mxge_free_rings(mxge_softc_t *sc) 2974 { 2975 int slice; 2976 2977 if (sc->ss == NULL) 2978 return; 2979 2980 for (slice = 0; slice < sc->num_slices; slice++) 2981 mxge_free_slice_rings(&sc->ss[slice]); 2982 } 2983 2984 static int 2985 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2986 int tx_ring_entries) 2987 { 2988 mxge_softc_t *sc = ss->sc; 2989 size_t bytes; 2990 int err, i; 2991 2992 /* 2993 * Allocate per-slice receive resources 2994 */ 2995 2996 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 2997 rx_ring_entries - 1; 2998 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 2999 3000 /* Allocate the rx shadow rings */ 3001 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3002 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3003 3004 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3005 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3006 3007 /* Allocate the rx host info rings */ 3008 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3009 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3010 3011 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3012 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3013 3014 /* Allocate the rx busdma resources */ 3015 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3016 1, /* alignment */ 3017 4096, /* boundary */ 3018 BUS_SPACE_MAXADDR, /* low */ 3019 BUS_SPACE_MAXADDR, /* high */ 3020 NULL, NULL, /* filter */ 3021 MHLEN, /* maxsize */ 3022 1, /* num segs */ 3023 MHLEN, /* maxsegsize */ 3024 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3025 /* flags */ 3026 &ss->rx_data.rx_small.dmat); /* tag */ 3027 if (err != 0) { 3028 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3029 err); 3030 return err; 3031 } 3032 3033 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3034 &ss->rx_data.rx_small.extra_map); 3035 if (err != 0) { 3036 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3037 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3038 ss->rx_data.rx_small.dmat = NULL; 3039 return err; 3040 } 3041 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3042 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3043 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3044 if (err != 0) { 3045 int j; 3046 3047 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3048 3049 for (j = 0; j < i; ++j) { 3050 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3051 ss->rx_data.rx_small.info[j].map); 3052 } 3053 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3054 ss->rx_data.rx_small.extra_map); 3055 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3056 ss->rx_data.rx_small.dmat = NULL; 3057 return err; 3058 } 3059 } 3060 3061 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3062 1, /* alignment */ 3063 4096, /* boundary */ 3064 BUS_SPACE_MAXADDR, /* low */ 3065 BUS_SPACE_MAXADDR, /* high */ 3066 NULL, NULL, /* filter */ 3067 4096, /* maxsize */ 3068 1, /* num segs */ 3069 4096, /* maxsegsize*/ 3070 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3071 /* flags */ 3072 &ss->rx_data.rx_big.dmat); /* tag */ 3073 if (err != 0) { 3074 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3075 err); 3076 return err; 3077 } 3078 3079 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3080 &ss->rx_data.rx_big.extra_map); 3081 if (err != 0) { 3082 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3083 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3084 ss->rx_data.rx_big.dmat = NULL; 3085 return err; 3086 } 3087 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3088 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3089 &ss->rx_data.rx_big.info[i].map); 3090 if (err != 0) { 3091 int j; 3092 3093 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3094 for (j = 0; j < i; ++j) { 3095 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3096 ss->rx_data.rx_big.info[j].map); 3097 } 3098 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3099 ss->rx_data.rx_big.extra_map); 3100 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3101 ss->rx_data.rx_big.dmat = NULL; 3102 return err; 3103 } 3104 } 3105 3106 /* 3107 * Now allocate TX resources 3108 */ 3109 3110 ss->tx.mask = tx_ring_entries - 1; 3111 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3112 3113 /* 3114 * Allocate the tx request copy block; MUST be at least 8 bytes 3115 * aligned 3116 */ 3117 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3118 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3119 M_DEVBUF, M_WAITOK); 3120 3121 /* Allocate the tx busdma segment list */ 3122 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3123 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3124 3125 /* Allocate the tx host info ring */ 3126 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3127 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3128 3129 /* Allocate the tx busdma resources */ 3130 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3131 1, /* alignment */ 3132 sc->tx_boundary, /* boundary */ 3133 BUS_SPACE_MAXADDR, /* low */ 3134 BUS_SPACE_MAXADDR, /* high */ 3135 NULL, NULL, /* filter */ 3136 IP_MAXPACKET + 3137 sizeof(struct ether_vlan_header), 3138 /* maxsize */ 3139 ss->tx.max_desc - 2, /* num segs */ 3140 sc->tx_boundary, /* maxsegsz */ 3141 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3142 BUS_DMA_ONEBPAGE, /* flags */ 3143 &ss->tx.dmat); /* tag */ 3144 if (err != 0) { 3145 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3146 return err; 3147 } 3148 3149 /* 3150 * Now use these tags to setup DMA maps for each slot in the ring 3151 */ 3152 for (i = 0; i <= ss->tx.mask; i++) { 3153 err = bus_dmamap_create(ss->tx.dmat, 3154 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3155 if (err != 0) { 3156 int j; 3157 3158 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3159 for (j = 0; j < i; ++j) { 3160 bus_dmamap_destroy(ss->tx.dmat, 3161 ss->tx.info[j].map); 3162 } 3163 bus_dma_tag_destroy(ss->tx.dmat); 3164 ss->tx.dmat = NULL; 3165 return err; 3166 } 3167 } 3168 return 0; 3169 } 3170 3171 static int 3172 mxge_alloc_rings(mxge_softc_t *sc) 3173 { 3174 mxge_cmd_t cmd; 3175 int tx_ring_size; 3176 int tx_ring_entries, rx_ring_entries; 3177 int err, slice; 3178 3179 /* Get ring sizes */ 3180 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3181 if (err != 0) { 3182 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3183 return err; 3184 } 3185 tx_ring_size = cmd.data0; 3186 3187 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3188 rx_ring_entries = sc->rx_intr_slots / 2; 3189 3190 if (bootverbose) { 3191 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3192 tx_ring_entries, rx_ring_entries); 3193 } 3194 3195 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices; 3196 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters; 3197 3198 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3199 ifq_set_ready(&sc->ifp->if_snd); 3200 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3201 3202 if (sc->num_tx_rings > 1) { 3203 sc->ifp->if_mapsubq = ifq_mapsubq_mask; 3204 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1); 3205 } 3206 3207 for (slice = 0; slice < sc->num_slices; slice++) { 3208 err = mxge_alloc_slice_rings(&sc->ss[slice], 3209 rx_ring_entries, tx_ring_entries); 3210 if (err != 0) { 3211 device_printf(sc->dev, 3212 "alloc %d slice rings failed\n", slice); 3213 return err; 3214 } 3215 } 3216 return 0; 3217 } 3218 3219 static void 3220 mxge_choose_params(int mtu, int *cl_size) 3221 { 3222 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3223 3224 if (bufsize < MCLBYTES) { 3225 *cl_size = MCLBYTES; 3226 } else { 3227 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3228 *cl_size = MJUMPAGESIZE; 3229 } 3230 } 3231 3232 static int 3233 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3234 { 3235 mxge_cmd_t cmd; 3236 int err, i, slice; 3237 3238 slice = ss - ss->sc->ss; 3239 3240 /* 3241 * Get the lanai pointers to the send and receive rings 3242 */ 3243 err = 0; 3244 3245 if (ss->sc->num_tx_rings == 1) { 3246 if (slice == 0) { 3247 cmd.data0 = slice; 3248 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3249 &cmd); 3250 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3251 (ss->sc->sram + cmd.data0); 3252 /* Leave send_go and send_stop as NULL */ 3253 } 3254 } else { 3255 cmd.data0 = slice; 3256 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3257 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3258 (ss->sc->sram + cmd.data0); 3259 ss->tx.send_go = (volatile uint32_t *) 3260 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3261 ss->tx.send_stop = (volatile uint32_t *) 3262 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3263 } 3264 3265 cmd.data0 = slice; 3266 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3267 ss->rx_data.rx_small.lanai = 3268 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3269 3270 cmd.data0 = slice; 3271 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3272 ss->rx_data.rx_big.lanai = 3273 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3274 3275 if (err != 0) { 3276 if_printf(ss->sc->ifp, 3277 "failed to get ring sizes or locations\n"); 3278 return EIO; 3279 } 3280 3281 /* 3282 * Stock small receive ring 3283 */ 3284 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3285 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3286 ss->rx_data.rx_small.info[i].map, i, TRUE); 3287 if (err) { 3288 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3289 ss->rx_data.rx_small.mask + 1); 3290 return ENOMEM; 3291 } 3292 } 3293 3294 /* 3295 * Stock big receive ring 3296 */ 3297 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3298 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3299 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3300 } 3301 3302 ss->rx_data.rx_big.cl_size = cl_size; 3303 3304 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3305 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3306 ss->rx_data.rx_big.info[i].map, i, TRUE); 3307 if (err) { 3308 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3309 ss->rx_data.rx_big.mask + 1); 3310 return ENOMEM; 3311 } 3312 } 3313 return 0; 3314 } 3315 3316 static int 3317 mxge_open(mxge_softc_t *sc) 3318 { 3319 struct ifnet *ifp = sc->ifp; 3320 mxge_cmd_t cmd; 3321 int err, slice, cl_size, i; 3322 bus_addr_t bus; 3323 volatile uint8_t *itable; 3324 struct mxge_slice_state *ss; 3325 3326 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3327 3328 /* Copy the MAC address in case it was overridden */ 3329 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3330 3331 err = mxge_reset(sc, 1); 3332 if (err != 0) { 3333 if_printf(ifp, "failed to reset\n"); 3334 return EIO; 3335 } 3336 3337 if (sc->num_slices > 1) { 3338 /* Setup the indirection table */ 3339 cmd.data0 = sc->num_slices; 3340 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3341 3342 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3343 if (err != 0) { 3344 if_printf(ifp, "failed to setup rss tables\n"); 3345 return err; 3346 } 3347 3348 /* Just enable an identity mapping */ 3349 itable = sc->sram + cmd.data0; 3350 for (i = 0; i < sc->num_slices; i++) 3351 itable[i] = (uint8_t)i; 3352 3353 if (sc->use_rss) { 3354 volatile uint8_t *hwkey; 3355 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3356 3357 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3358 &cmd); 3359 if (err != 0) { 3360 if_printf(ifp, "failed to get rsskey\n"); 3361 return err; 3362 } 3363 hwkey = sc->sram + cmd.data0; 3364 3365 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3366 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3367 hwkey[i] = swkey[i]; 3368 wmb(); 3369 3370 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3371 &cmd); 3372 if (err != 0) { 3373 if_printf(ifp, "failed to update rsskey\n"); 3374 return err; 3375 } 3376 if (bootverbose) 3377 if_printf(ifp, "RSS key updated\n"); 3378 } 3379 3380 cmd.data0 = 1; 3381 if (sc->use_rss) { 3382 if (bootverbose) 3383 if_printf(ifp, "input hash: RSS\n"); 3384 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3385 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3386 } else { 3387 if (bootverbose) 3388 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3389 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3390 } 3391 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3392 if (err != 0) { 3393 if_printf(ifp, "failed to enable slices\n"); 3394 return err; 3395 } 3396 } 3397 3398 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3399 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3400 if (err) { 3401 /* 3402 * Can't change TSO mode to NDIS, never allow TSO then 3403 */ 3404 if_printf(ifp, "failed to set TSO mode\n"); 3405 ifp->if_capenable &= ~IFCAP_TSO; 3406 ifp->if_capabilities &= ~IFCAP_TSO; 3407 ifp->if_hwassist &= ~CSUM_TSO; 3408 } 3409 3410 mxge_choose_params(ifp->if_mtu, &cl_size); 3411 3412 cmd.data0 = 1; 3413 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3414 /* 3415 * Error is only meaningful if we're trying to set 3416 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3417 */ 3418 3419 /* 3420 * Give the firmware the mtu and the big and small buffer 3421 * sizes. The firmware wants the big buf size to be a power 3422 * of two. Luckily, DragonFly's clusters are powers of two 3423 */ 3424 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3425 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3426 3427 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3428 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3429 3430 cmd.data0 = cl_size; 3431 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3432 3433 if (err != 0) { 3434 if_printf(ifp, "failed to setup params\n"); 3435 goto abort; 3436 } 3437 3438 /* Now give him the pointer to the stats block */ 3439 for (slice = 0; slice < sc->num_slices; slice++) { 3440 ss = &sc->ss[slice]; 3441 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3442 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3443 cmd.data2 = sizeof(struct mcp_irq_data); 3444 cmd.data2 |= (slice << 16); 3445 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3446 } 3447 3448 if (err != 0) { 3449 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3450 bus += offsetof(struct mcp_irq_data, send_done_count); 3451 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3452 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3453 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3454 &cmd); 3455 3456 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3457 sc->fw_multicast_support = 0; 3458 } else { 3459 sc->fw_multicast_support = 1; 3460 } 3461 3462 if (err != 0) { 3463 if_printf(ifp, "failed to setup params\n"); 3464 goto abort; 3465 } 3466 3467 for (slice = 0; slice < sc->num_slices; slice++) { 3468 err = mxge_slice_open(&sc->ss[slice], cl_size); 3469 if (err != 0) { 3470 if_printf(ifp, "couldn't open slice %d\n", slice); 3471 goto abort; 3472 } 3473 } 3474 3475 /* Finally, start the firmware running */ 3476 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3477 if (err) { 3478 if_printf(ifp, "Couldn't bring up link\n"); 3479 goto abort; 3480 } 3481 3482 ifp->if_flags |= IFF_RUNNING; 3483 for (i = 0; i < sc->num_tx_rings; ++i) { 3484 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3485 3486 ifsq_clr_oactive(tx->ifsq); 3487 ifsq_watchdog_start(&tx->watchdog); 3488 } 3489 3490 return 0; 3491 3492 abort: 3493 mxge_free_mbufs(sc); 3494 return err; 3495 } 3496 3497 static void 3498 mxge_close(mxge_softc_t *sc, int down) 3499 { 3500 struct ifnet *ifp = sc->ifp; 3501 mxge_cmd_t cmd; 3502 int err, old_down_cnt, i; 3503 3504 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3505 3506 if (!down) { 3507 old_down_cnt = sc->down_cnt; 3508 wmb(); 3509 3510 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3511 if (err) 3512 if_printf(ifp, "Couldn't bring down link\n"); 3513 3514 if (old_down_cnt == sc->down_cnt) { 3515 /* 3516 * Wait for down irq 3517 * XXX racy 3518 */ 3519 ifnet_deserialize_all(ifp); 3520 DELAY(10 * sc->intr_coal_delay); 3521 ifnet_serialize_all(ifp); 3522 } 3523 3524 wmb(); 3525 if (old_down_cnt == sc->down_cnt) 3526 if_printf(ifp, "never got down irq\n"); 3527 } 3528 mxge_free_mbufs(sc); 3529 3530 ifp->if_flags &= ~IFF_RUNNING; 3531 for (i = 0; i < sc->num_tx_rings; ++i) { 3532 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3533 3534 ifsq_clr_oactive(tx->ifsq); 3535 ifsq_watchdog_stop(&tx->watchdog); 3536 } 3537 } 3538 3539 static void 3540 mxge_setup_cfg_space(mxge_softc_t *sc) 3541 { 3542 device_t dev = sc->dev; 3543 int reg; 3544 uint16_t lnk, pectl; 3545 3546 /* Find the PCIe link width and set max read request to 4KB */ 3547 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3548 lnk = pci_read_config(dev, reg + 0x12, 2); 3549 sc->link_width = (lnk >> 4) & 0x3f; 3550 3551 if (sc->pectl == 0) { 3552 pectl = pci_read_config(dev, reg + 0x8, 2); 3553 pectl = (pectl & ~0x7000) | (5 << 12); 3554 pci_write_config(dev, reg + 0x8, pectl, 2); 3555 sc->pectl = pectl; 3556 } else { 3557 /* Restore saved pectl after watchdog reset */ 3558 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3559 } 3560 } 3561 3562 /* Enable DMA and memory space access */ 3563 pci_enable_busmaster(dev); 3564 } 3565 3566 static uint32_t 3567 mxge_read_reboot(mxge_softc_t *sc) 3568 { 3569 device_t dev = sc->dev; 3570 uint32_t vs; 3571 3572 /* Find the vendor specific offset */ 3573 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3574 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3575 return (uint32_t)-1; 3576 } 3577 /* Enable read32 mode */ 3578 pci_write_config(dev, vs + 0x10, 0x3, 1); 3579 /* Tell NIC which register to read */ 3580 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3581 return pci_read_config(dev, vs + 0x14, 4); 3582 } 3583 3584 static void 3585 mxge_watchdog_reset(mxge_softc_t *sc) 3586 { 3587 struct pci_devinfo *dinfo; 3588 int err, running; 3589 uint32_t reboot; 3590 uint16_t cmd; 3591 3592 err = ENXIO; 3593 3594 if_printf(sc->ifp, "Watchdog reset!\n"); 3595 3596 /* 3597 * Check to see if the NIC rebooted. If it did, then all of 3598 * PCI config space has been reset, and things like the 3599 * busmaster bit will be zero. If this is the case, then we 3600 * must restore PCI config space before the NIC can be used 3601 * again 3602 */ 3603 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3604 if (cmd == 0xffff) { 3605 /* 3606 * Maybe the watchdog caught the NIC rebooting; wait 3607 * up to 100ms for it to finish. If it does not come 3608 * back, then give up 3609 */ 3610 DELAY(1000*100); 3611 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3612 if (cmd == 0xffff) 3613 if_printf(sc->ifp, "NIC disappeared!\n"); 3614 } 3615 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3616 /* Print the reboot status */ 3617 reboot = mxge_read_reboot(sc); 3618 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3619 3620 running = sc->ifp->if_flags & IFF_RUNNING; 3621 if (running) { 3622 /* 3623 * Quiesce NIC so that TX routines will not try to 3624 * xmit after restoration of BAR 3625 */ 3626 3627 /* Mark the link as down */ 3628 if (sc->link_state) { 3629 sc->ifp->if_link_state = LINK_STATE_DOWN; 3630 if_link_state_change(sc->ifp); 3631 } 3632 mxge_close(sc, 1); 3633 } 3634 /* Restore PCI configuration space */ 3635 dinfo = device_get_ivars(sc->dev); 3636 pci_cfg_restore(sc->dev, dinfo); 3637 3638 /* And redo any changes we made to our config space */ 3639 mxge_setup_cfg_space(sc); 3640 3641 /* Reload f/w */ 3642 err = mxge_load_firmware(sc, 0); 3643 if (err) 3644 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3645 if (running && !err) { 3646 int i; 3647 3648 err = mxge_open(sc); 3649 3650 for (i = 0; i < sc->num_tx_rings; ++i) 3651 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3652 } 3653 sc->watchdog_resets++; 3654 } else { 3655 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3656 err = 0; 3657 } 3658 if (err) { 3659 if_printf(sc->ifp, "watchdog reset failed\n"); 3660 } else { 3661 if (sc->dying == 2) 3662 sc->dying = 0; 3663 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3664 } 3665 } 3666 3667 static void 3668 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3669 { 3670 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3671 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3672 tx->req, tx->done, tx->queue_active); 3673 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3674 tx->activate, tx->deactivate); 3675 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3676 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3677 } 3678 3679 static u_long 3680 mxge_update_stats(mxge_softc_t *sc) 3681 { 3682 u_long ipackets, opackets, pkts; 3683 3684 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3685 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3686 3687 pkts = ipackets - sc->ipackets; 3688 pkts += opackets - sc->opackets; 3689 3690 sc->ipackets = ipackets; 3691 sc->opackets = opackets; 3692 3693 return pkts; 3694 } 3695 3696 static void 3697 mxge_tick(void *arg) 3698 { 3699 mxge_softc_t *sc = arg; 3700 u_long pkts = 0; 3701 int err = 0; 3702 int ticks; 3703 3704 lwkt_serialize_enter(&sc->main_serialize); 3705 3706 ticks = mxge_ticks; 3707 if (sc->ifp->if_flags & IFF_RUNNING) { 3708 /* Aggregate stats from different slices */ 3709 pkts = mxge_update_stats(sc); 3710 if (sc->need_media_probe) 3711 mxge_media_probe(sc); 3712 } 3713 if (pkts == 0) { 3714 uint16_t cmd; 3715 3716 /* Ensure NIC did not suffer h/w fault while idle */ 3717 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3718 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3719 sc->dying = 2; 3720 mxge_serialize_skipmain(sc); 3721 mxge_watchdog_reset(sc); 3722 mxge_deserialize_skipmain(sc); 3723 err = ENXIO; 3724 } 3725 3726 /* Look less often if NIC is idle */ 3727 ticks *= 4; 3728 } 3729 3730 if (err == 0) 3731 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3732 3733 lwkt_serialize_exit(&sc->main_serialize); 3734 } 3735 3736 static int 3737 mxge_media_change(struct ifnet *ifp) 3738 { 3739 mxge_softc_t *sc = ifp->if_softc; 3740 const struct ifmedia *ifm = &sc->media; 3741 int pause; 3742 3743 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { 3744 if (sc->pause) 3745 return 0; 3746 pause = 1; 3747 } else { 3748 if (!sc->pause) 3749 return 0; 3750 pause = 0; 3751 } 3752 return mxge_change_pause(sc, pause); 3753 } 3754 3755 static int 3756 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3757 { 3758 struct ifnet *ifp = sc->ifp; 3759 int real_mtu, old_mtu; 3760 int err = 0; 3761 3762 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3763 if (mtu > sc->max_mtu || real_mtu < 60) 3764 return EINVAL; 3765 3766 old_mtu = ifp->if_mtu; 3767 ifp->if_mtu = mtu; 3768 if (ifp->if_flags & IFF_RUNNING) { 3769 mxge_close(sc, 0); 3770 err = mxge_open(sc); 3771 if (err != 0) { 3772 ifp->if_mtu = old_mtu; 3773 mxge_close(sc, 0); 3774 mxge_open(sc); 3775 } 3776 } 3777 return err; 3778 } 3779 3780 static void 3781 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3782 { 3783 mxge_softc_t *sc = ifp->if_softc; 3784 3785 ifmr->ifm_status = IFM_AVALID; 3786 ifmr->ifm_active = IFM_ETHER; 3787 3788 if (sc->link_state) 3789 ifmr->ifm_status |= IFM_ACTIVE; 3790 3791 /* 3792 * Autoselect is not supported, so the current media 3793 * should be delivered. 3794 */ 3795 ifmr->ifm_active |= sc->current_media; 3796 if (sc->current_media != IFM_NONE) { 3797 ifmr->ifm_active |= MXGE_IFM; 3798 if (sc->pause) 3799 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 3800 } 3801 } 3802 3803 static int 3804 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3805 struct ucred *cr __unused) 3806 { 3807 mxge_softc_t *sc = ifp->if_softc; 3808 struct ifreq *ifr = (struct ifreq *)data; 3809 int err, mask; 3810 3811 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3812 err = 0; 3813 3814 switch (command) { 3815 case SIOCSIFMTU: 3816 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3817 break; 3818 3819 case SIOCSIFFLAGS: 3820 if (sc->dying) 3821 return EINVAL; 3822 3823 if (ifp->if_flags & IFF_UP) { 3824 if (!(ifp->if_flags & IFF_RUNNING)) { 3825 err = mxge_open(sc); 3826 } else { 3827 /* 3828 * Take care of PROMISC and ALLMULTI 3829 * flag changes 3830 */ 3831 mxge_change_promisc(sc, 3832 ifp->if_flags & IFF_PROMISC); 3833 mxge_set_multicast_list(sc); 3834 } 3835 } else { 3836 if (ifp->if_flags & IFF_RUNNING) 3837 mxge_close(sc, 0); 3838 } 3839 break; 3840 3841 case SIOCADDMULTI: 3842 case SIOCDELMULTI: 3843 mxge_set_multicast_list(sc); 3844 break; 3845 3846 case SIOCSIFCAP: 3847 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3848 if (mask & IFCAP_TXCSUM) { 3849 ifp->if_capenable ^= IFCAP_TXCSUM; 3850 if (ifp->if_capenable & IFCAP_TXCSUM) 3851 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3852 else 3853 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3854 } 3855 if (mask & IFCAP_TSO) { 3856 ifp->if_capenable ^= IFCAP_TSO; 3857 if (ifp->if_capenable & IFCAP_TSO) 3858 ifp->if_hwassist |= CSUM_TSO; 3859 else 3860 ifp->if_hwassist &= ~CSUM_TSO; 3861 } 3862 if (mask & IFCAP_RXCSUM) 3863 ifp->if_capenable ^= IFCAP_RXCSUM; 3864 if (mask & IFCAP_VLAN_HWTAGGING) 3865 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3866 break; 3867 3868 case SIOCGIFMEDIA: 3869 case SIOCSIFMEDIA: 3870 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3871 &sc->media, command); 3872 break; 3873 3874 default: 3875 err = ether_ioctl(ifp, command, data); 3876 break; 3877 } 3878 return err; 3879 } 3880 3881 static void 3882 mxge_fetch_tunables(mxge_softc_t *sc) 3883 { 3884 int ifm; 3885 3886 sc->intr_coal_delay = mxge_intr_coal_delay; 3887 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3888 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3889 3890 /* XXX */ 3891 if (mxge_ticks == 0) 3892 mxge_ticks = hz / 2; 3893 3894 ifm = ifmedia_str2ethfc(mxge_flowctrl); 3895 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) 3896 sc->pause = 1; 3897 3898 sc->use_rss = mxge_use_rss; 3899 3900 sc->throttle = mxge_throttle; 3901 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3902 sc->throttle = MXGE_MAX_THROTTLE; 3903 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3904 sc->throttle = MXGE_MIN_THROTTLE; 3905 } 3906 3907 static void 3908 mxge_free_slices(mxge_softc_t *sc) 3909 { 3910 struct mxge_slice_state *ss; 3911 int i; 3912 3913 if (sc->ss == NULL) 3914 return; 3915 3916 for (i = 0; i < sc->num_slices; i++) { 3917 ss = &sc->ss[i]; 3918 if (ss->fw_stats != NULL) { 3919 mxge_dma_free(&ss->fw_stats_dma); 3920 ss->fw_stats = NULL; 3921 } 3922 if (ss->rx_data.rx_done.entry != NULL) { 3923 mxge_dma_free(&ss->rx_done_dma); 3924 ss->rx_data.rx_done.entry = NULL; 3925 } 3926 } 3927 kfree(sc->ss, M_DEVBUF); 3928 sc->ss = NULL; 3929 } 3930 3931 static int 3932 mxge_alloc_slices(mxge_softc_t *sc) 3933 { 3934 mxge_cmd_t cmd; 3935 struct mxge_slice_state *ss; 3936 size_t bytes; 3937 int err, i, rx_ring_size; 3938 3939 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3940 if (err != 0) { 3941 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3942 return err; 3943 } 3944 rx_ring_size = cmd.data0; 3945 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3946 3947 bytes = sizeof(*sc->ss) * sc->num_slices; 3948 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3949 3950 for (i = 0; i < sc->num_slices; i++) { 3951 ss = &sc->ss[i]; 3952 3953 ss->sc = sc; 3954 3955 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3956 lwkt_serialize_init(&ss->tx.tx_serialize); 3957 ss->intr_rid = -1; 3958 3959 /* 3960 * Allocate per-slice rx interrupt queue 3961 * XXX assume 4bytes mcp_slot 3962 */ 3963 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3964 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3965 if (err != 0) { 3966 device_printf(sc->dev, 3967 "alloc %d slice rx_done failed\n", i); 3968 return err; 3969 } 3970 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3971 3972 /* 3973 * Allocate the per-slice firmware stats 3974 */ 3975 bytes = sizeof(*ss->fw_stats); 3976 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3977 sizeof(*ss->fw_stats), 64); 3978 if (err != 0) { 3979 device_printf(sc->dev, 3980 "alloc %d fw_stats failed\n", i); 3981 return err; 3982 } 3983 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3984 } 3985 return 0; 3986 } 3987 3988 static void 3989 mxge_slice_probe(mxge_softc_t *sc) 3990 { 3991 int status, max_intr_slots, max_slices, num_slices; 3992 int msix_cnt, msix_enable, i, multi_tx; 3993 mxge_cmd_t cmd; 3994 const char *old_fw; 3995 3996 sc->num_slices = 1; 3997 sc->num_tx_rings = 1; 3998 3999 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 4000 if (num_slices == 1) 4001 return; 4002 4003 if (ncpus2 == 1) 4004 return; 4005 4006 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4007 mxge_msix_enable); 4008 if (!msix_enable) 4009 return; 4010 4011 msix_cnt = pci_msix_count(sc->dev); 4012 if (msix_cnt < 2) 4013 return; 4014 4015 /* 4016 * Round down MSI-X vector count to the nearest power of 2 4017 */ 4018 i = 0; 4019 while ((1 << (i + 1)) <= msix_cnt) 4020 ++i; 4021 msix_cnt = 1 << i; 4022 4023 /* 4024 * Now load the slice aware firmware see what it supports 4025 */ 4026 old_fw = sc->fw_name; 4027 if (old_fw == mxge_fw_aligned) 4028 sc->fw_name = mxge_fw_rss_aligned; 4029 else 4030 sc->fw_name = mxge_fw_rss_unaligned; 4031 status = mxge_load_firmware(sc, 0); 4032 if (status != 0) { 4033 device_printf(sc->dev, "Falling back to a single slice\n"); 4034 return; 4035 } 4036 4037 /* 4038 * Try to send a reset command to the card to see if it is alive 4039 */ 4040 memset(&cmd, 0, sizeof(cmd)); 4041 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4042 if (status != 0) { 4043 device_printf(sc->dev, "failed reset\n"); 4044 goto abort_with_fw; 4045 } 4046 4047 /* 4048 * Get rx ring size to calculate rx interrupt queue size 4049 */ 4050 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4051 if (status != 0) { 4052 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4053 goto abort_with_fw; 4054 } 4055 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4056 4057 /* 4058 * Tell it the size of the rx interrupt queue 4059 */ 4060 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4061 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4062 if (status != 0) { 4063 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4064 goto abort_with_fw; 4065 } 4066 4067 /* 4068 * Ask the maximum number of slices it supports 4069 */ 4070 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4071 if (status != 0) { 4072 device_printf(sc->dev, 4073 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4074 goto abort_with_fw; 4075 } 4076 max_slices = cmd.data0; 4077 4078 /* 4079 * Round down max slices count to the nearest power of 2 4080 */ 4081 i = 0; 4082 while ((1 << (i + 1)) <= max_slices) 4083 ++i; 4084 max_slices = 1 << i; 4085 4086 if (max_slices > msix_cnt) 4087 max_slices = msix_cnt; 4088 4089 sc->num_slices = num_slices; 4090 sc->num_slices = if_ring_count2(sc->num_slices, max_slices); 4091 4092 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4093 if (multi_tx) 4094 sc->num_tx_rings = sc->num_slices; 4095 4096 if (bootverbose) { 4097 device_printf(sc->dev, "using %d slices, max %d\n", 4098 sc->num_slices, max_slices); 4099 } 4100 4101 if (sc->num_slices == 1) 4102 goto abort_with_fw; 4103 return; 4104 4105 abort_with_fw: 4106 sc->fw_name = old_fw; 4107 mxge_load_firmware(sc, 0); 4108 } 4109 4110 static void 4111 mxge_setup_serialize(struct mxge_softc *sc) 4112 { 4113 int i = 0, slice; 4114 4115 /* Main + rx + tx */ 4116 sc->nserialize = (2 * sc->num_slices) + 1; 4117 sc->serializes = 4118 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4119 M_DEVBUF, M_WAITOK | M_ZERO); 4120 4121 /* 4122 * Setup serializes 4123 * 4124 * NOTE: Order is critical 4125 */ 4126 4127 KKASSERT(i < sc->nserialize); 4128 sc->serializes[i++] = &sc->main_serialize; 4129 4130 for (slice = 0; slice < sc->num_slices; ++slice) { 4131 KKASSERT(i < sc->nserialize); 4132 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4133 } 4134 4135 for (slice = 0; slice < sc->num_slices; ++slice) { 4136 KKASSERT(i < sc->nserialize); 4137 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4138 } 4139 4140 KKASSERT(i == sc->nserialize); 4141 } 4142 4143 static void 4144 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4145 { 4146 struct mxge_softc *sc = ifp->if_softc; 4147 4148 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4149 } 4150 4151 static void 4152 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4153 { 4154 struct mxge_softc *sc = ifp->if_softc; 4155 4156 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4157 } 4158 4159 static int 4160 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4161 { 4162 struct mxge_softc *sc = ifp->if_softc; 4163 4164 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4165 } 4166 4167 #ifdef INVARIANTS 4168 4169 static void 4170 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4171 boolean_t serialized) 4172 { 4173 struct mxge_softc *sc = ifp->if_softc; 4174 4175 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4176 slz, serialized); 4177 } 4178 4179 #endif /* INVARIANTS */ 4180 4181 #ifdef IFPOLL_ENABLE 4182 4183 static void 4184 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4185 { 4186 struct mxge_slice_state *ss = xss; 4187 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4188 4189 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4190 4191 if (rx_done->entry[rx_done->idx].length != 0) { 4192 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4193 } else { 4194 /* 4195 * XXX 4196 * This register writting obviously has cost, 4197 * however, if we don't hand back the rx token, 4198 * the upcoming packets may suffer rediculously 4199 * large delay, as observed on 8AL-C using ping(8). 4200 */ 4201 *ss->irq_claim = be32toh(3); 4202 } 4203 } 4204 4205 static void 4206 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4207 { 4208 struct mxge_softc *sc = ifp->if_softc; 4209 int i; 4210 4211 if (info == NULL) 4212 return; 4213 4214 /* 4215 * Only poll rx; polling tx and status don't seem to work 4216 */ 4217 for (i = 0; i < sc->num_slices; ++i) { 4218 struct mxge_slice_state *ss = &sc->ss[i]; 4219 int idx = ss->intr_cpuid; 4220 4221 KKASSERT(idx < ncpus2); 4222 info->ifpi_rx[idx].poll_func = mxge_npoll_rx; 4223 info->ifpi_rx[idx].arg = ss; 4224 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize; 4225 } 4226 } 4227 4228 #endif /* IFPOLL_ENABLE */ 4229 4230 static int 4231 mxge_attach(device_t dev) 4232 { 4233 mxge_softc_t *sc = device_get_softc(dev); 4234 struct ifnet *ifp = &sc->arpcom.ac_if; 4235 int err, rid, i; 4236 4237 /* 4238 * Avoid rewriting half the lines in this file to use 4239 * &sc->arpcom.ac_if instead 4240 */ 4241 sc->ifp = ifp; 4242 sc->dev = dev; 4243 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4244 4245 /* IFM_ETH_FORCEPAUSE can't be changed */ 4246 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 4247 mxge_media_change, mxge_media_status); 4248 4249 lwkt_serialize_init(&sc->main_serialize); 4250 4251 mxge_fetch_tunables(sc); 4252 4253 err = bus_dma_tag_create(NULL, /* parent */ 4254 1, /* alignment */ 4255 0, /* boundary */ 4256 BUS_SPACE_MAXADDR, /* low */ 4257 BUS_SPACE_MAXADDR, /* high */ 4258 NULL, NULL, /* filter */ 4259 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4260 0, /* num segs */ 4261 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4262 0, /* flags */ 4263 &sc->parent_dmat); /* tag */ 4264 if (err != 0) { 4265 device_printf(dev, "Err %d allocating parent dmat\n", err); 4266 goto failed; 4267 } 4268 4269 callout_init_mp(&sc->co_hdl); 4270 4271 mxge_setup_cfg_space(sc); 4272 4273 /* 4274 * Map the board into the kernel 4275 */ 4276 rid = PCIR_BARS; 4277 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4278 &rid, RF_ACTIVE); 4279 if (sc->mem_res == NULL) { 4280 device_printf(dev, "could not map memory\n"); 4281 err = ENXIO; 4282 goto failed; 4283 } 4284 4285 sc->sram = rman_get_virtual(sc->mem_res); 4286 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4287 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4288 device_printf(dev, "impossible memory region size %ld\n", 4289 rman_get_size(sc->mem_res)); 4290 err = ENXIO; 4291 goto failed; 4292 } 4293 4294 /* 4295 * Make NULL terminated copy of the EEPROM strings section of 4296 * lanai SRAM 4297 */ 4298 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4299 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4300 rman_get_bushandle(sc->mem_res), 4301 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4302 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4303 err = mxge_parse_strings(sc); 4304 if (err != 0) { 4305 device_printf(dev, "parse EEPROM string failed\n"); 4306 goto failed; 4307 } 4308 4309 /* 4310 * Enable write combining for efficient use of PCIe bus 4311 */ 4312 mxge_enable_wc(sc); 4313 4314 /* 4315 * Allocate the out of band DMA memory 4316 */ 4317 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4318 if (err != 0) { 4319 device_printf(dev, "alloc cmd DMA buf failed\n"); 4320 goto failed; 4321 } 4322 sc->cmd = sc->cmd_dma.dmem_addr; 4323 4324 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4325 if (err != 0) { 4326 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4327 goto failed; 4328 } 4329 4330 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4331 if (err != 0) { 4332 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4333 goto failed; 4334 } 4335 4336 /* Select & load the firmware */ 4337 err = mxge_select_firmware(sc); 4338 if (err != 0) { 4339 device_printf(dev, "select firmware failed\n"); 4340 goto failed; 4341 } 4342 4343 mxge_slice_probe(sc); 4344 err = mxge_alloc_slices(sc); 4345 if (err != 0) { 4346 device_printf(dev, "alloc slices failed\n"); 4347 goto failed; 4348 } 4349 4350 err = mxge_alloc_intr(sc); 4351 if (err != 0) { 4352 device_printf(dev, "alloc intr failed\n"); 4353 goto failed; 4354 } 4355 4356 /* Setup serializes */ 4357 mxge_setup_serialize(sc); 4358 4359 err = mxge_reset(sc, 0); 4360 if (err != 0) { 4361 device_printf(dev, "reset failed\n"); 4362 goto failed; 4363 } 4364 4365 err = mxge_alloc_rings(sc); 4366 if (err != 0) { 4367 device_printf(dev, "failed to allocate rings\n"); 4368 goto failed; 4369 } 4370 4371 ifp->if_baudrate = IF_Gbps(10UL); 4372 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4373 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4374 4375 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4376 #if 0 4377 /* Well, its software, sigh */ 4378 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4379 #endif 4380 ifp->if_capenable = ifp->if_capabilities; 4381 4382 ifp->if_softc = sc; 4383 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4384 ifp->if_init = mxge_init; 4385 ifp->if_ioctl = mxge_ioctl; 4386 ifp->if_start = mxge_start; 4387 #ifdef IFPOLL_ENABLE 4388 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4389 ifp->if_npoll = mxge_npoll; 4390 #endif 4391 ifp->if_serialize = mxge_serialize; 4392 ifp->if_deserialize = mxge_deserialize; 4393 ifp->if_tryserialize = mxge_tryserialize; 4394 #ifdef INVARIANTS 4395 ifp->if_serialize_assert = mxge_serialize_assert; 4396 #endif 4397 4398 /* Increase TSO burst length */ 4399 ifp->if_tsolen = (32 * ETHERMTU); 4400 4401 /* Initialise the ifmedia structure */ 4402 mxge_media_init(sc); 4403 mxge_media_probe(sc); 4404 4405 ether_ifattach(ifp, sc->mac_addr, NULL); 4406 4407 /* Setup TX rings and subqueues */ 4408 for (i = 0; i < sc->num_tx_rings; ++i) { 4409 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4410 struct mxge_slice_state *ss = &sc->ss[i]; 4411 4412 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4413 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4414 ifsq_set_priv(ifsq, &ss->tx); 4415 ss->tx.ifsq = ifsq; 4416 4417 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4418 } 4419 4420 /* 4421 * XXX 4422 * We are not ready to do "gather" jumbo frame, so 4423 * limit MTU to MJUMPAGESIZE 4424 */ 4425 sc->max_mtu = MJUMPAGESIZE - 4426 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4427 sc->dying = 0; 4428 4429 err = mxge_setup_intr(sc); 4430 if (err != 0) { 4431 device_printf(dev, "alloc and setup intr failed\n"); 4432 ether_ifdetach(ifp); 4433 goto failed; 4434 } 4435 4436 mxge_add_sysctls(sc); 4437 4438 /* Increase non-cluster mbuf limit; used by small RX rings */ 4439 mb_inclimit(ifp->if_nmbclusters); 4440 4441 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4442 sc->ss[0].intr_cpuid); 4443 return 0; 4444 4445 failed: 4446 mxge_detach(dev); 4447 return err; 4448 } 4449 4450 static int 4451 mxge_detach(device_t dev) 4452 { 4453 mxge_softc_t *sc = device_get_softc(dev); 4454 4455 if (device_is_attached(dev)) { 4456 struct ifnet *ifp = sc->ifp; 4457 int mblimit = ifp->if_nmbclusters; 4458 4459 ifnet_serialize_all(ifp); 4460 4461 sc->dying = 1; 4462 if (ifp->if_flags & IFF_RUNNING) 4463 mxge_close(sc, 1); 4464 callout_stop(&sc->co_hdl); 4465 4466 mxge_teardown_intr(sc, sc->num_slices); 4467 4468 ifnet_deserialize_all(ifp); 4469 4470 callout_terminate(&sc->co_hdl); 4471 4472 ether_ifdetach(ifp); 4473 4474 /* Decrease non-cluster mbuf limit increased by us */ 4475 mb_inclimit(-mblimit); 4476 } 4477 ifmedia_removeall(&sc->media); 4478 4479 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4480 sc->sram != NULL) 4481 mxge_dummy_rdma(sc, 0); 4482 4483 mxge_free_intr(sc); 4484 mxge_rem_sysctls(sc); 4485 mxge_free_rings(sc); 4486 4487 /* MUST after sysctls, intr and rings are freed */ 4488 mxge_free_slices(sc); 4489 4490 if (sc->dmabench_dma.dmem_addr != NULL) 4491 mxge_dma_free(&sc->dmabench_dma); 4492 if (sc->zeropad_dma.dmem_addr != NULL) 4493 mxge_dma_free(&sc->zeropad_dma); 4494 if (sc->cmd_dma.dmem_addr != NULL) 4495 mxge_dma_free(&sc->cmd_dma); 4496 4497 if (sc->msix_table_res != NULL) { 4498 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4499 sc->msix_table_res); 4500 } 4501 if (sc->mem_res != NULL) { 4502 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4503 sc->mem_res); 4504 } 4505 4506 if (sc->parent_dmat != NULL) 4507 bus_dma_tag_destroy(sc->parent_dmat); 4508 4509 return 0; 4510 } 4511 4512 static int 4513 mxge_shutdown(device_t dev) 4514 { 4515 return 0; 4516 } 4517 4518 static void 4519 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4520 { 4521 int i; 4522 4523 KKASSERT(sc->num_slices > 1); 4524 4525 for (i = 0; i < sc->num_slices; ++i) { 4526 struct mxge_slice_state *ss = &sc->ss[i]; 4527 4528 if (ss->intr_res != NULL) { 4529 bus_release_resource(sc->dev, SYS_RES_IRQ, 4530 ss->intr_rid, ss->intr_res); 4531 } 4532 if (ss->intr_rid >= 0) 4533 pci_release_msix_vector(sc->dev, ss->intr_rid); 4534 } 4535 if (setup) 4536 pci_teardown_msix(sc->dev); 4537 } 4538 4539 static int 4540 mxge_alloc_msix(struct mxge_softc *sc) 4541 { 4542 struct mxge_slice_state *ss; 4543 int offset, rid, error, i; 4544 boolean_t setup = FALSE; 4545 4546 KKASSERT(sc->num_slices > 1); 4547 4548 if (sc->num_slices == ncpus2) { 4549 offset = 0; 4550 } else { 4551 int offset_def; 4552 4553 offset_def = (sc->num_slices * device_get_unit(sc->dev)) % 4554 ncpus2; 4555 4556 offset = device_getenv_int(sc->dev, "msix.offset", offset_def); 4557 if (offset >= ncpus2 || 4558 offset % sc->num_slices != 0) { 4559 device_printf(sc->dev, "invalid msix.offset %d, " 4560 "use %d\n", offset, offset_def); 4561 offset = offset_def; 4562 } 4563 } 4564 4565 ss = &sc->ss[0]; 4566 4567 ss->intr_serialize = &sc->main_serialize; 4568 ss->intr_func = mxge_msi; 4569 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4570 "%s comb", device_get_nameunit(sc->dev)); 4571 ss->intr_desc = ss->intr_desc0; 4572 ss->intr_cpuid = offset; 4573 4574 for (i = 1; i < sc->num_slices; ++i) { 4575 ss = &sc->ss[i]; 4576 4577 ss->intr_serialize = &ss->rx_data.rx_serialize; 4578 if (sc->num_tx_rings == 1) { 4579 ss->intr_func = mxge_msix_rx; 4580 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4581 "%s rx", device_get_nameunit(sc->dev)); 4582 } else { 4583 ss->intr_func = mxge_msix_rxtx; 4584 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4585 "%s rxtx", device_get_nameunit(sc->dev)); 4586 } 4587 ss->intr_desc = ss->intr_desc0; 4588 ss->intr_cpuid = offset + i; 4589 } 4590 4591 rid = PCIR_BAR(2); 4592 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4593 &rid, RF_ACTIVE); 4594 if (sc->msix_table_res == NULL) { 4595 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4596 return ENXIO; 4597 } 4598 4599 error = pci_setup_msix(sc->dev); 4600 if (error) { 4601 device_printf(sc->dev, "could not setup MSI-X\n"); 4602 goto back; 4603 } 4604 setup = TRUE; 4605 4606 for (i = 0; i < sc->num_slices; ++i) { 4607 ss = &sc->ss[i]; 4608 4609 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4610 ss->intr_cpuid); 4611 if (error) { 4612 device_printf(sc->dev, "could not alloc " 4613 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4614 goto back; 4615 } 4616 4617 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4618 &ss->intr_rid, RF_ACTIVE); 4619 if (ss->intr_res == NULL) { 4620 device_printf(sc->dev, "could not alloc " 4621 "MSI-X %d resource\n", i); 4622 error = ENXIO; 4623 goto back; 4624 } 4625 } 4626 4627 pci_enable_msix(sc->dev); 4628 sc->intr_type = PCI_INTR_TYPE_MSIX; 4629 back: 4630 if (error) 4631 mxge_free_msix(sc, setup); 4632 return error; 4633 } 4634 4635 static int 4636 mxge_alloc_intr(struct mxge_softc *sc) 4637 { 4638 struct mxge_slice_state *ss; 4639 u_int irq_flags; 4640 4641 if (sc->num_slices > 1) { 4642 int error; 4643 4644 error = mxge_alloc_msix(sc); 4645 if (error) 4646 return error; 4647 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4648 return 0; 4649 } 4650 4651 ss = &sc->ss[0]; 4652 4653 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4654 &ss->intr_rid, &irq_flags); 4655 4656 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4657 &ss->intr_rid, irq_flags); 4658 if (ss->intr_res == NULL) { 4659 device_printf(sc->dev, "could not alloc interrupt\n"); 4660 return ENXIO; 4661 } 4662 4663 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4664 ss->intr_func = mxge_legacy; 4665 else 4666 ss->intr_func = mxge_msi; 4667 ss->intr_serialize = &sc->main_serialize; 4668 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4669 4670 return 0; 4671 } 4672 4673 static int 4674 mxge_setup_intr(struct mxge_softc *sc) 4675 { 4676 int i; 4677 4678 for (i = 0; i < sc->num_slices; ++i) { 4679 struct mxge_slice_state *ss = &sc->ss[i]; 4680 int error; 4681 4682 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4683 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4684 ss->intr_serialize, ss->intr_desc); 4685 if (error) { 4686 device_printf(sc->dev, "can't setup %dth intr\n", i); 4687 mxge_teardown_intr(sc, i); 4688 return error; 4689 } 4690 } 4691 return 0; 4692 } 4693 4694 static void 4695 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4696 { 4697 int i; 4698 4699 if (sc->ss == NULL) 4700 return; 4701 4702 for (i = 0; i < cnt; ++i) { 4703 struct mxge_slice_state *ss = &sc->ss[i]; 4704 4705 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4706 } 4707 } 4708 4709 static void 4710 mxge_free_intr(struct mxge_softc *sc) 4711 { 4712 if (sc->ss == NULL) 4713 return; 4714 4715 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4716 struct mxge_slice_state *ss = &sc->ss[0]; 4717 4718 if (ss->intr_res != NULL) { 4719 bus_release_resource(sc->dev, SYS_RES_IRQ, 4720 ss->intr_rid, ss->intr_res); 4721 } 4722 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4723 pci_release_msi(sc->dev); 4724 } else { 4725 mxge_free_msix(sc, TRUE); 4726 } 4727 } 4728