1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 #include <net/if_poll.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/vlan/if_vlan_var.h> 62 #include <net/zlib.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/tcp.h> 69 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <bus/pci/pcireg.h> 74 #include <bus/pci/pcivar.h> 75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386__) || defined(__x86_64__) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/netif/mxge/mxge_mcp.h> 85 #include <dev/netif/mxge/mcp_gen_header.h> 86 #include <dev/netif/mxge/if_mxge_var.h> 87 88 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 89 #define MXGE_HWRSS_KEYLEN 16 90 91 /* Tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_ticks; 98 static int mxge_num_slices = 0; 99 static int mxge_always_promisc = 0; 100 static int mxge_throttle = 0; 101 static int mxge_msi_enable = 1; 102 static int mxge_msix_enable = 1; 103 static int mxge_multi_tx = 1; 104 /* 105 * Don't use RSS by default, its just too slow 106 */ 107 static int mxge_use_rss = 0; 108 109 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 115 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control); 116 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 117 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 118 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 119 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 120 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 121 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 122 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 123 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 124 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 125 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 126 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 127 128 static int mxge_probe(device_t dev); 129 static int mxge_attach(device_t dev); 130 static int mxge_detach(device_t dev); 131 static int mxge_shutdown(device_t dev); 132 133 static int mxge_alloc_intr(struct mxge_softc *sc); 134 static void mxge_free_intr(struct mxge_softc *sc); 135 static int mxge_setup_intr(struct mxge_softc *sc); 136 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 137 138 static device_method_t mxge_methods[] = { 139 /* Device interface */ 140 DEVMETHOD(device_probe, mxge_probe), 141 DEVMETHOD(device_attach, mxge_attach), 142 DEVMETHOD(device_detach, mxge_detach), 143 DEVMETHOD(device_shutdown, mxge_shutdown), 144 DEVMETHOD_END 145 }; 146 147 static driver_t mxge_driver = { 148 "mxge", 149 mxge_methods, 150 sizeof(mxge_softc_t), 151 }; 152 153 static devclass_t mxge_devclass; 154 155 /* Declare ourselves to be a child of the PCI bus.*/ 156 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 157 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 158 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 159 160 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 161 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 162 static void mxge_close(mxge_softc_t *sc, int down); 163 static int mxge_open(mxge_softc_t *sc); 164 static void mxge_tick(void *arg); 165 static void mxge_watchdog_reset(mxge_softc_t *sc); 166 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 167 168 static int 169 mxge_probe(device_t dev) 170 { 171 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 173 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 174 int rev = pci_get_revid(dev); 175 176 switch (rev) { 177 case MXGE_PCI_REV_Z8E: 178 device_set_desc(dev, "Myri10G-PCIE-8A"); 179 break; 180 case MXGE_PCI_REV_Z8ES: 181 device_set_desc(dev, "Myri10G-PCIE-8B"); 182 break; 183 default: 184 device_set_desc(dev, "Myri10G-PCIE-8??"); 185 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 186 break; 187 } 188 return 0; 189 } 190 return ENXIO; 191 } 192 193 static void 194 mxge_enable_wc(mxge_softc_t *sc) 195 { 196 #if defined(__i386__) || defined(__x86_64__) 197 vm_offset_t len; 198 199 sc->wc = 1; 200 len = rman_get_size(sc->mem_res); 201 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 202 PAT_WRITE_COMBINING); 203 #endif 204 } 205 206 static int 207 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 208 bus_size_t alignment) 209 { 210 bus_size_t boundary; 211 int err; 212 213 if (bytes > 4096 && alignment == 4096) 214 boundary = 0; 215 else 216 boundary = 4096; 217 218 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 219 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 220 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 221 if (err != 0) { 222 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 223 return err; 224 } 225 return 0; 226 } 227 228 static void 229 mxge_dma_free(bus_dmamem_t *dma) 230 { 231 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 232 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 233 bus_dma_tag_destroy(dma->dmem_tag); 234 } 235 236 /* 237 * The eeprom strings on the lanaiX have the format 238 * SN=x\0 239 * MAC=x:x:x:x:x:x\0 240 * PC=text\0 241 */ 242 static int 243 mxge_parse_strings(mxge_softc_t *sc) 244 { 245 const char *ptr; 246 int i, found_mac, found_sn2; 247 char *endptr; 248 249 ptr = sc->eeprom_strings; 250 found_mac = 0; 251 found_sn2 = 0; 252 while (*ptr != '\0') { 253 if (strncmp(ptr, "MAC=", 4) == 0) { 254 ptr += 4; 255 for (i = 0;;) { 256 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 257 if (endptr - ptr != 2) 258 goto abort; 259 ptr = endptr; 260 if (++i == 6) 261 break; 262 if (*ptr++ != ':') 263 goto abort; 264 } 265 found_mac = 1; 266 } else if (strncmp(ptr, "PC=", 3) == 0) { 267 ptr += 3; 268 strlcpy(sc->product_code_string, ptr, 269 sizeof(sc->product_code_string)); 270 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 271 ptr += 3; 272 strlcpy(sc->serial_number_string, ptr, 273 sizeof(sc->serial_number_string)); 274 } else if (strncmp(ptr, "SN2=", 4) == 0) { 275 /* SN2 takes precedence over SN */ 276 ptr += 4; 277 found_sn2 = 1; 278 strlcpy(sc->serial_number_string, ptr, 279 sizeof(sc->serial_number_string)); 280 } 281 while (*ptr++ != '\0') {} 282 } 283 284 if (found_mac) 285 return 0; 286 287 abort: 288 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 289 return ENXIO; 290 } 291 292 #if defined(__i386__) || defined(__x86_64__) 293 294 static void 295 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 296 { 297 uint32_t val; 298 unsigned long base, off; 299 char *va, *cfgptr; 300 device_t pdev, mcp55; 301 uint16_t vendor_id, device_id, word; 302 uintptr_t bus, slot, func, ivend, idev; 303 uint32_t *ptr32; 304 305 if (!mxge_nvidia_ecrc_enable) 306 return; 307 308 pdev = device_get_parent(device_get_parent(sc->dev)); 309 if (pdev == NULL) { 310 device_printf(sc->dev, "could not find parent?\n"); 311 return; 312 } 313 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 314 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 315 316 if (vendor_id != 0x10de) 317 return; 318 319 base = 0; 320 321 if (device_id == 0x005d) { 322 /* ck804, base address is magic */ 323 base = 0xe0000000UL; 324 } else if (device_id >= 0x0374 && device_id <= 0x378) { 325 /* mcp55, base address stored in chipset */ 326 mcp55 = pci_find_bsf(0, 0, 0); 327 if (mcp55 && 328 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 329 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 330 word = pci_read_config(mcp55, 0x90, 2); 331 base = ((unsigned long)word & 0x7ffeU) << 25; 332 } 333 } 334 if (!base) 335 return; 336 337 /* 338 * XXXX 339 * Test below is commented because it is believed that doing 340 * config read/write beyond 0xff will access the config space 341 * for the next larger function. Uncomment this and remove 342 * the hacky pmap_mapdev() way of accessing config space when 343 * DragonFly grows support for extended pcie config space access. 344 */ 345 #if 0 346 /* 347 * See if we can, by some miracle, access the extended 348 * config space 349 */ 350 val = pci_read_config(pdev, 0x178, 4); 351 if (val != 0xffffffff) { 352 val |= 0x40; 353 pci_write_config(pdev, 0x178, val, 4); 354 return; 355 } 356 #endif 357 /* 358 * Rather than using normal pci config space writes, we must 359 * map the Nvidia config space ourselves. This is because on 360 * opteron/nvidia class machine the 0xe000000 mapping is 361 * handled by the nvidia chipset, that means the internal PCI 362 * device (the on-chip northbridge), or the amd-8131 bridge 363 * and things behind them are not visible by this method. 364 */ 365 366 BUS_READ_IVAR(device_get_parent(pdev), pdev, 367 PCI_IVAR_BUS, &bus); 368 BUS_READ_IVAR(device_get_parent(pdev), pdev, 369 PCI_IVAR_SLOT, &slot); 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_FUNCTION, &func); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_VENDOR, &ivend); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_DEVICE, &idev); 376 377 off = base + 0x00100000UL * (unsigned long)bus + 378 0x00001000UL * (unsigned long)(func + 8 * slot); 379 380 /* map it into the kernel */ 381 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 382 if (va == NULL) { 383 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 384 return; 385 } 386 /* get a pointer to the config space mapped into the kernel */ 387 cfgptr = va + (off & PAGE_MASK); 388 389 /* make sure that we can really access it */ 390 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 391 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 392 if (!(vendor_id == ivend && device_id == idev)) { 393 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 394 vendor_id, device_id); 395 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 396 return; 397 } 398 399 ptr32 = (uint32_t*)(cfgptr + 0x178); 400 val = *ptr32; 401 402 if (val == 0xffffffff) { 403 device_printf(sc->dev, "extended mapping failed\n"); 404 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 405 return; 406 } 407 *ptr32 = val | 0x40; 408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 409 if (bootverbose) { 410 device_printf(sc->dev, "Enabled ECRC on upstream " 411 "Nvidia bridge at %d:%d:%d\n", 412 (int)bus, (int)slot, (int)func); 413 } 414 } 415 416 #else /* __i386__ || __x86_64__ */ 417 418 static void 419 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 420 { 421 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 422 } 423 424 #endif 425 426 static int 427 mxge_dma_test(mxge_softc_t *sc, int test_type) 428 { 429 mxge_cmd_t cmd; 430 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 431 int status; 432 uint32_t len; 433 const char *test = " "; 434 435 /* 436 * Run a small DMA test. 437 * The magic multipliers to the length tell the firmware 438 * to do DMA read, write, or read+write tests. The 439 * results are returned in cmd.data0. The upper 16 440 * bits of the return is the number of transfers completed. 441 * The lower 16 bits is the time in 0.5us ticks that the 442 * transfers took to complete. 443 */ 444 445 len = sc->tx_boundary; 446 447 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 448 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 449 cmd.data2 = len * 0x10000; 450 status = mxge_send_cmd(sc, test_type, &cmd); 451 if (status != 0) { 452 test = "read"; 453 goto abort; 454 } 455 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 456 457 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 458 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 459 cmd.data2 = len * 0x1; 460 status = mxge_send_cmd(sc, test_type, &cmd); 461 if (status != 0) { 462 test = "write"; 463 goto abort; 464 } 465 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 466 467 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 468 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 469 cmd.data2 = len * 0x10001; 470 status = mxge_send_cmd(sc, test_type, &cmd); 471 if (status != 0) { 472 test = "read/write"; 473 goto abort; 474 } 475 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 476 (cmd.data0 & 0xffff); 477 478 abort: 479 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 480 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 481 test, status); 482 } 483 return status; 484 } 485 486 /* 487 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 488 * when the PCI-E Completion packets are aligned on an 8-byte 489 * boundary. Some PCI-E chip sets always align Completion packets; on 490 * the ones that do not, the alignment can be enforced by enabling 491 * ECRC generation (if supported). 492 * 493 * When PCI-E Completion packets are not aligned, it is actually more 494 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 495 * 496 * If the driver can neither enable ECRC nor verify that it has 497 * already been enabled, then it must use a firmware image which works 498 * around unaligned completion packets (ethp_z8e.dat), and it should 499 * also ensure that it never gives the device a Read-DMA which is 500 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 501 * enabled, then the driver should use the aligned (eth_z8e.dat) 502 * firmware image, and set tx_boundary to 4KB. 503 */ 504 static int 505 mxge_firmware_probe(mxge_softc_t *sc) 506 { 507 device_t dev = sc->dev; 508 int reg, status; 509 uint16_t pectl; 510 511 sc->tx_boundary = 4096; 512 513 /* 514 * Verify the max read request size was set to 4KB 515 * before trying the test with 4KB. 516 */ 517 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 518 pectl = pci_read_config(dev, reg + 0x8, 2); 519 if ((pectl & (5 << 12)) != (5 << 12)) { 520 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 521 pectl); 522 sc->tx_boundary = 2048; 523 } 524 } 525 526 /* 527 * Load the optimized firmware (which assumes aligned PCIe 528 * completions) in order to see if it works on this host. 529 */ 530 sc->fw_name = mxge_fw_aligned; 531 status = mxge_load_firmware(sc, 1); 532 if (status != 0) 533 return status; 534 535 /* 536 * Enable ECRC if possible 537 */ 538 mxge_enable_nvidia_ecrc(sc); 539 540 /* 541 * Run a DMA test which watches for unaligned completions and 542 * aborts on the first one seen. Not required on Z8ES or newer. 543 */ 544 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 545 return 0; 546 547 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 548 if (status == 0) 549 return 0; /* keep the aligned firmware */ 550 551 if (status != E2BIG) 552 device_printf(dev, "DMA test failed: %d\n", status); 553 if (status == ENOSYS) { 554 device_printf(dev, "Falling back to ethp! " 555 "Please install up to date fw\n"); 556 } 557 return status; 558 } 559 560 static int 561 mxge_select_firmware(mxge_softc_t *sc) 562 { 563 int aligned = 0; 564 int force_firmware = mxge_force_firmware; 565 566 if (sc->throttle) 567 force_firmware = sc->throttle; 568 569 if (force_firmware != 0) { 570 if (force_firmware == 1) 571 aligned = 1; 572 else 573 aligned = 0; 574 if (bootverbose) { 575 device_printf(sc->dev, 576 "Assuming %s completions (forced)\n", 577 aligned ? "aligned" : "unaligned"); 578 } 579 goto abort; 580 } 581 582 /* 583 * If the PCIe link width is 4 or less, we can use the aligned 584 * firmware and skip any checks 585 */ 586 if (sc->link_width != 0 && sc->link_width <= 4) { 587 device_printf(sc->dev, "PCIe x%d Link, " 588 "expect reduced performance\n", sc->link_width); 589 aligned = 1; 590 goto abort; 591 } 592 593 if (mxge_firmware_probe(sc) == 0) 594 return 0; 595 596 abort: 597 if (aligned) { 598 sc->fw_name = mxge_fw_aligned; 599 sc->tx_boundary = 4096; 600 } else { 601 sc->fw_name = mxge_fw_unaligned; 602 sc->tx_boundary = 2048; 603 } 604 return mxge_load_firmware(sc, 0); 605 } 606 607 static int 608 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 609 { 610 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 611 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 612 be32toh(hdr->mcp_type)); 613 return EIO; 614 } 615 616 /* Save firmware version for sysctl */ 617 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 618 if (bootverbose) 619 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 620 621 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 622 &sc->fw_ver_minor, &sc->fw_ver_tiny); 623 624 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 625 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 626 if_printf(sc->ifp, "Found firmware version %s\n", 627 sc->fw_version); 628 if_printf(sc->ifp, "Driver needs %d.%d\n", 629 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 630 return EINVAL; 631 } 632 return 0; 633 } 634 635 static void * 636 z_alloc(void *nil, u_int items, u_int size) 637 { 638 return kmalloc(items * size, M_TEMP, M_WAITOK); 639 } 640 641 static void 642 z_free(void *nil, void *ptr) 643 { 644 kfree(ptr, M_TEMP); 645 } 646 647 static int 648 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 649 { 650 z_stream zs; 651 char *inflate_buffer; 652 const struct firmware *fw; 653 const mcp_gen_header_t *hdr; 654 unsigned hdr_offset; 655 int status; 656 unsigned int i; 657 char dummy; 658 size_t fw_len; 659 660 fw = firmware_get(sc->fw_name); 661 if (fw == NULL) { 662 if_printf(sc->ifp, "Could not find firmware image %s\n", 663 sc->fw_name); 664 return ENOENT; 665 } 666 667 /* Setup zlib and decompress f/w */ 668 bzero(&zs, sizeof(zs)); 669 zs.zalloc = z_alloc; 670 zs.zfree = z_free; 671 status = inflateInit(&zs); 672 if (status != Z_OK) { 673 status = EIO; 674 goto abort_with_fw; 675 } 676 677 /* 678 * The uncompressed size is stored as the firmware version, 679 * which would otherwise go unused 680 */ 681 fw_len = (size_t)fw->version; 682 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 683 zs.avail_in = fw->datasize; 684 zs.next_in = __DECONST(char *, fw->data); 685 zs.avail_out = fw_len; 686 zs.next_out = inflate_buffer; 687 status = inflate(&zs, Z_FINISH); 688 if (status != Z_STREAM_END) { 689 if_printf(sc->ifp, "zlib %d\n", status); 690 status = EIO; 691 goto abort_with_buffer; 692 } 693 694 /* Check id */ 695 hdr_offset = 696 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 697 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 698 if_printf(sc->ifp, "Bad firmware file"); 699 status = EIO; 700 goto abort_with_buffer; 701 } 702 hdr = (const void*)(inflate_buffer + hdr_offset); 703 704 status = mxge_validate_firmware(sc, hdr); 705 if (status != 0) 706 goto abort_with_buffer; 707 708 /* Copy the inflated firmware to NIC SRAM. */ 709 for (i = 0; i < fw_len; i += 256) { 710 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 711 min(256U, (unsigned)(fw_len - i))); 712 wmb(); 713 dummy = *sc->sram; 714 wmb(); 715 } 716 717 *limit = fw_len; 718 status = 0; 719 abort_with_buffer: 720 kfree(inflate_buffer, M_TEMP); 721 inflateEnd(&zs); 722 abort_with_fw: 723 firmware_put(fw, FIRMWARE_UNLOAD); 724 return status; 725 } 726 727 /* 728 * Enable or disable periodic RDMAs from the host to make certain 729 * chipsets resend dropped PCIe messages 730 */ 731 static void 732 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 733 { 734 char buf_bytes[72]; 735 volatile uint32_t *confirm; 736 volatile char *submit; 737 uint32_t *buf, dma_low, dma_high; 738 int i; 739 740 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 741 742 /* Clear confirmation addr */ 743 confirm = (volatile uint32_t *)sc->cmd; 744 *confirm = 0; 745 wmb(); 746 747 /* 748 * Send an rdma command to the PCIe engine, and wait for the 749 * response in the confirmation address. The firmware should 750 * write a -1 there to indicate it is alive and well 751 */ 752 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 753 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 754 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 755 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 756 buf[2] = htobe32(0xffffffff); /* confirm data */ 757 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 758 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 759 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 760 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 761 buf[5] = htobe32(enable); /* enable? */ 762 763 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 764 765 mxge_pio_copy(submit, buf, 64); 766 wmb(); 767 DELAY(1000); 768 wmb(); 769 i = 0; 770 while (*confirm != 0xffffffff && i < 20) { 771 DELAY(1000); 772 i++; 773 } 774 if (*confirm != 0xffffffff) { 775 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 776 (enable ? "enable" : "disable"), confirm, *confirm); 777 } 778 } 779 780 static int 781 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 782 { 783 mcp_cmd_t *buf; 784 char buf_bytes[sizeof(*buf) + 8]; 785 volatile mcp_cmd_response_t *response = sc->cmd; 786 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 787 uint32_t dma_low, dma_high; 788 int err, sleep_total = 0; 789 790 /* Ensure buf is aligned to 8 bytes */ 791 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 792 793 buf->data0 = htobe32(data->data0); 794 buf->data1 = htobe32(data->data1); 795 buf->data2 = htobe32(data->data2); 796 buf->cmd = htobe32(cmd); 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 799 800 buf->response_addr.low = htobe32(dma_low); 801 buf->response_addr.high = htobe32(dma_high); 802 803 response->result = 0xffffffff; 804 wmb(); 805 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 806 807 /* 808 * Wait up to 20ms 809 */ 810 err = EAGAIN; 811 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 812 wmb(); 813 switch (be32toh(response->result)) { 814 case 0: 815 data->data0 = be32toh(response->data); 816 err = 0; 817 break; 818 case 0xffffffff: 819 DELAY(1000); 820 break; 821 case MXGEFW_CMD_UNKNOWN: 822 err = ENOSYS; 823 break; 824 case MXGEFW_CMD_ERROR_UNALIGNED: 825 err = E2BIG; 826 break; 827 case MXGEFW_CMD_ERROR_BUSY: 828 err = EBUSY; 829 break; 830 case MXGEFW_CMD_ERROR_I2C_ABSENT: 831 err = ENXIO; 832 break; 833 default: 834 if_printf(sc->ifp, "command %d failed, result = %d\n", 835 cmd, be32toh(response->result)); 836 err = ENXIO; 837 break; 838 } 839 if (err != EAGAIN) 840 break; 841 } 842 if (err == EAGAIN) { 843 if_printf(sc->ifp, "command %d timed out result = %d\n", 844 cmd, be32toh(response->result)); 845 } 846 return err; 847 } 848 849 static int 850 mxge_adopt_running_firmware(mxge_softc_t *sc) 851 { 852 struct mcp_gen_header *hdr; 853 const size_t bytes = sizeof(struct mcp_gen_header); 854 size_t hdr_offset; 855 int status; 856 857 /* 858 * Find running firmware header 859 */ 860 hdr_offset = 861 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 862 863 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 864 if_printf(sc->ifp, "Running firmware has bad header offset " 865 "(%zu)\n", hdr_offset); 866 return EIO; 867 } 868 869 /* 870 * Copy header of running firmware from SRAM to host memory to 871 * validate firmware 872 */ 873 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 874 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 875 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 876 status = mxge_validate_firmware(sc, hdr); 877 kfree(hdr, M_DEVBUF); 878 879 /* 880 * Check to see if adopted firmware has bug where adopting 881 * it will cause broadcasts to be filtered unless the NIC 882 * is kept in ALLMULTI mode 883 */ 884 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 885 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 886 sc->adopted_rx_filter_bug = 1; 887 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 888 "working around rx filter bug\n", 889 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 890 } 891 892 return status; 893 } 894 895 static int 896 mxge_load_firmware(mxge_softc_t *sc, int adopt) 897 { 898 volatile uint32_t *confirm; 899 volatile char *submit; 900 char buf_bytes[72]; 901 uint32_t *buf, size, dma_low, dma_high; 902 int status, i; 903 904 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 905 906 size = sc->sram_size; 907 status = mxge_load_firmware_helper(sc, &size); 908 if (status) { 909 if (!adopt) 910 return status; 911 912 /* 913 * Try to use the currently running firmware, if 914 * it is new enough 915 */ 916 status = mxge_adopt_running_firmware(sc); 917 if (status) { 918 if_printf(sc->ifp, 919 "failed to adopt running firmware\n"); 920 return status; 921 } 922 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 923 924 if (sc->tx_boundary == 4096) { 925 if_printf(sc->ifp, 926 "Using firmware currently running on NIC. " 927 "For optimal\n"); 928 if_printf(sc->ifp, "performance consider loading " 929 "optimized firmware\n"); 930 } 931 sc->fw_name = mxge_fw_unaligned; 932 sc->tx_boundary = 2048; 933 return 0; 934 } 935 936 /* Clear confirmation addr */ 937 confirm = (volatile uint32_t *)sc->cmd; 938 *confirm = 0; 939 wmb(); 940 941 /* 942 * Send a reload command to the bootstrap MCP, and wait for the 943 * response in the confirmation address. The firmware should 944 * write a -1 there to indicate it is alive and well 945 */ 946 947 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 948 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 949 950 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 951 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 952 buf[2] = htobe32(0xffffffff); /* confirm data */ 953 954 /* 955 * FIX: All newest firmware should un-protect the bottom of 956 * the sram before handoff. However, the very first interfaces 957 * do not. Therefore the handoff copy must skip the first 8 bytes 958 */ 959 /* where the code starts*/ 960 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 961 buf[4] = htobe32(size - 8); /* length of code */ 962 buf[5] = htobe32(8); /* where to copy to */ 963 buf[6] = htobe32(0); /* where to jump to */ 964 965 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 966 mxge_pio_copy(submit, buf, 64); 967 wmb(); 968 DELAY(1000); 969 wmb(); 970 i = 0; 971 while (*confirm != 0xffffffff && i < 20) { 972 DELAY(1000*10); 973 i++; 974 } 975 if (*confirm != 0xffffffff) { 976 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 977 confirm, *confirm); 978 return ENXIO; 979 } 980 return 0; 981 } 982 983 static int 984 mxge_update_mac_address(mxge_softc_t *sc) 985 { 986 mxge_cmd_t cmd; 987 uint8_t *addr = sc->mac_addr; 988 989 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 990 (addr[2] << 8) | addr[3]; 991 cmd.data1 = (addr[4] << 8) | (addr[5]); 992 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 993 } 994 995 static int 996 mxge_change_pause(mxge_softc_t *sc, int pause) 997 { 998 mxge_cmd_t cmd; 999 int status; 1000 1001 if (pause) 1002 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1003 else 1004 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1005 if (status) { 1006 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1007 return ENXIO; 1008 } 1009 sc->pause = pause; 1010 return 0; 1011 } 1012 1013 static void 1014 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1015 { 1016 mxge_cmd_t cmd; 1017 int status; 1018 1019 if (mxge_always_promisc) 1020 promisc = 1; 1021 1022 if (promisc) 1023 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1024 else 1025 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1026 if (status) 1027 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1028 } 1029 1030 static void 1031 mxge_set_multicast_list(mxge_softc_t *sc) 1032 { 1033 mxge_cmd_t cmd; 1034 struct ifmultiaddr *ifma; 1035 struct ifnet *ifp = sc->ifp; 1036 int err; 1037 1038 /* This firmware is known to not support multicast */ 1039 if (!sc->fw_multicast_support) 1040 return; 1041 1042 /* Disable multicast filtering while we play with the lists*/ 1043 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1044 if (err != 0) { 1045 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1046 "error status: %d\n", err); 1047 return; 1048 } 1049 1050 if (sc->adopted_rx_filter_bug) 1051 return; 1052 1053 if (ifp->if_flags & IFF_ALLMULTI) { 1054 /* Request to disable multicast filtering, so quit here */ 1055 return; 1056 } 1057 1058 /* Flush all the filters */ 1059 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1060 if (err != 0) { 1061 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1062 "error status: %d\n", err); 1063 return; 1064 } 1065 1066 /* 1067 * Walk the multicast list, and add each address 1068 */ 1069 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1070 if (ifma->ifma_addr->sa_family != AF_LINK) 1071 continue; 1072 1073 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1074 &cmd.data0, 4); 1075 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1076 &cmd.data1, 2); 1077 cmd.data0 = htonl(cmd.data0); 1078 cmd.data1 = htonl(cmd.data1); 1079 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1080 if (err != 0) { 1081 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1082 "error status: %d\n", err); 1083 /* Abort, leaving multicast filtering off */ 1084 return; 1085 } 1086 } 1087 1088 /* Enable multicast filtering */ 1089 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1090 if (err != 0) { 1091 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1092 "error status: %d\n", err); 1093 } 1094 } 1095 1096 #if 0 1097 static int 1098 mxge_max_mtu(mxge_softc_t *sc) 1099 { 1100 mxge_cmd_t cmd; 1101 int status; 1102 1103 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1104 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1105 1106 /* try to set nbufs to see if it we can 1107 use virtually contiguous jumbos */ 1108 cmd.data0 = 0; 1109 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1110 &cmd); 1111 if (status == 0) 1112 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1113 1114 /* otherwise, we're limited to MJUMPAGESIZE */ 1115 return MJUMPAGESIZE - MXGEFW_PAD; 1116 } 1117 #endif 1118 1119 static int 1120 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1121 { 1122 struct mxge_slice_state *ss; 1123 mxge_rx_done_t *rx_done; 1124 volatile uint32_t *irq_claim; 1125 mxge_cmd_t cmd; 1126 int slice, status, rx_intr_size; 1127 1128 /* 1129 * Try to send a reset command to the card to see if it 1130 * is alive 1131 */ 1132 memset(&cmd, 0, sizeof (cmd)); 1133 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1134 if (status != 0) { 1135 if_printf(sc->ifp, "failed reset\n"); 1136 return ENXIO; 1137 } 1138 1139 mxge_dummy_rdma(sc, 1); 1140 1141 /* 1142 * Set the intrq size 1143 * XXX assume 4byte mcp_slot 1144 */ 1145 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1146 cmd.data0 = rx_intr_size; 1147 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1148 1149 /* 1150 * Even though we already know how many slices are supported 1151 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1152 * has magic side effects, and must be called after a reset. 1153 * It must be called prior to calling any RSS related cmds, 1154 * including assigning an interrupt queue for anything but 1155 * slice 0. It must also be called *after* 1156 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1157 * the firmware to compute offsets. 1158 */ 1159 if (sc->num_slices > 1) { 1160 /* Ask the maximum number of slices it supports */ 1161 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1162 if (status != 0) { 1163 if_printf(sc->ifp, "failed to get number of slices\n"); 1164 return status; 1165 } 1166 1167 /* 1168 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1169 * to setting up the interrupt queue DMA 1170 */ 1171 cmd.data0 = sc->num_slices; 1172 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1173 if (sc->num_tx_rings > 1) 1174 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1175 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1176 if (status != 0) { 1177 if_printf(sc->ifp, "failed to set number of slices\n"); 1178 return status; 1179 } 1180 } 1181 1182 if (interrupts_setup) { 1183 /* Now exchange information about interrupts */ 1184 for (slice = 0; slice < sc->num_slices; slice++) { 1185 ss = &sc->ss[slice]; 1186 1187 rx_done = &ss->rx_data.rx_done; 1188 memset(rx_done->entry, 0, rx_intr_size); 1189 1190 cmd.data0 = 1191 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1192 cmd.data1 = 1193 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1194 cmd.data2 = slice; 1195 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1196 &cmd); 1197 } 1198 } 1199 1200 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1201 &cmd); 1202 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1203 1204 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1205 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1208 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1209 1210 if (status != 0) { 1211 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1212 return status; 1213 } 1214 1215 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1216 1217 /* Run a DMA benchmark */ 1218 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1219 1220 for (slice = 0; slice < sc->num_slices; slice++) { 1221 ss = &sc->ss[slice]; 1222 1223 ss->irq_claim = irq_claim + (2 * slice); 1224 1225 /* Reset mcp/driver shared state back to 0 */ 1226 ss->rx_data.rx_done.idx = 0; 1227 ss->tx.req = 0; 1228 ss->tx.done = 0; 1229 ss->tx.pkt_done = 0; 1230 ss->tx.queue_active = 0; 1231 ss->tx.activate = 0; 1232 ss->tx.deactivate = 0; 1233 ss->rx_data.rx_big.cnt = 0; 1234 ss->rx_data.rx_small.cnt = 0; 1235 if (ss->fw_stats != NULL) 1236 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1237 } 1238 sc->rdma_tags_available = 15; 1239 1240 status = mxge_update_mac_address(sc); 1241 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1242 mxge_change_pause(sc, sc->pause); 1243 mxge_set_multicast_list(sc); 1244 1245 if (sc->throttle) { 1246 cmd.data0 = sc->throttle; 1247 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1248 if_printf(sc->ifp, "can't enable throttle\n"); 1249 } 1250 return status; 1251 } 1252 1253 static int 1254 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1255 { 1256 mxge_cmd_t cmd; 1257 mxge_softc_t *sc; 1258 int err; 1259 unsigned int throttle; 1260 1261 sc = arg1; 1262 throttle = sc->throttle; 1263 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1264 if (err != 0) 1265 return err; 1266 1267 if (throttle == sc->throttle) 1268 return 0; 1269 1270 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1271 return EINVAL; 1272 1273 ifnet_serialize_all(sc->ifp); 1274 1275 cmd.data0 = throttle; 1276 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1277 if (err == 0) 1278 sc->throttle = throttle; 1279 1280 ifnet_deserialize_all(sc->ifp); 1281 return err; 1282 } 1283 1284 static int 1285 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1286 { 1287 mxge_softc_t *sc; 1288 int err, use_rss; 1289 1290 sc = arg1; 1291 use_rss = sc->use_rss; 1292 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1293 if (err != 0) 1294 return err; 1295 1296 if (use_rss == sc->use_rss) 1297 return 0; 1298 1299 ifnet_serialize_all(sc->ifp); 1300 1301 sc->use_rss = use_rss; 1302 if (sc->ifp->if_flags & IFF_RUNNING) { 1303 mxge_close(sc, 0); 1304 mxge_open(sc); 1305 } 1306 1307 ifnet_deserialize_all(sc->ifp); 1308 return err; 1309 } 1310 1311 static int 1312 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1313 { 1314 mxge_softc_t *sc; 1315 unsigned int intr_coal_delay; 1316 int err; 1317 1318 sc = arg1; 1319 intr_coal_delay = sc->intr_coal_delay; 1320 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1321 if (err != 0) 1322 return err; 1323 1324 if (intr_coal_delay == sc->intr_coal_delay) 1325 return 0; 1326 1327 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1328 return EINVAL; 1329 1330 ifnet_serialize_all(sc->ifp); 1331 1332 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1333 sc->intr_coal_delay = intr_coal_delay; 1334 1335 ifnet_deserialize_all(sc->ifp); 1336 return err; 1337 } 1338 1339 static int 1340 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1341 { 1342 mxge_softc_t *sc; 1343 unsigned int enabled; 1344 int err; 1345 1346 sc = arg1; 1347 enabled = sc->pause; 1348 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1349 if (err != 0) 1350 return err; 1351 1352 if (enabled == sc->pause) 1353 return 0; 1354 1355 ifnet_serialize_all(sc->ifp); 1356 err = mxge_change_pause(sc, enabled); 1357 ifnet_deserialize_all(sc->ifp); 1358 1359 return err; 1360 } 1361 1362 static int 1363 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1364 { 1365 int err; 1366 1367 if (arg1 == NULL) 1368 return EFAULT; 1369 arg2 = be32toh(*(int *)arg1); 1370 arg1 = NULL; 1371 err = sysctl_handle_int(oidp, arg1, arg2, req); 1372 1373 return err; 1374 } 1375 1376 static void 1377 mxge_rem_sysctls(mxge_softc_t *sc) 1378 { 1379 if (sc->ss != NULL) { 1380 struct mxge_slice_state *ss; 1381 int slice; 1382 1383 for (slice = 0; slice < sc->num_slices; slice++) { 1384 ss = &sc->ss[slice]; 1385 if (ss->sysctl_tree != NULL) { 1386 sysctl_ctx_free(&ss->sysctl_ctx); 1387 ss->sysctl_tree = NULL; 1388 } 1389 } 1390 } 1391 1392 if (sc->slice_sysctl_tree != NULL) { 1393 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1394 sc->slice_sysctl_tree = NULL; 1395 } 1396 1397 if (sc->sysctl_tree != NULL) { 1398 sysctl_ctx_free(&sc->sysctl_ctx); 1399 sc->sysctl_tree = NULL; 1400 } 1401 } 1402 1403 static void 1404 mxge_add_sysctls(mxge_softc_t *sc) 1405 { 1406 struct sysctl_ctx_list *ctx; 1407 struct sysctl_oid_list *children; 1408 mcp_irq_data_t *fw; 1409 struct mxge_slice_state *ss; 1410 int slice; 1411 char slice_num[8]; 1412 1413 ctx = &sc->sysctl_ctx; 1414 sysctl_ctx_init(ctx); 1415 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1416 OID_AUTO, device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1417 if (sc->sysctl_tree == NULL) { 1418 device_printf(sc->dev, "can't add sysctl node\n"); 1419 return; 1420 } 1421 1422 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1423 fw = sc->ss[0].fw_stats; 1424 1425 /* 1426 * Random information 1427 */ 1428 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1429 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1430 1431 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1432 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1433 1434 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1435 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1436 1437 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1438 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1439 1440 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1441 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1442 1443 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1444 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1445 1446 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1447 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1448 1449 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1450 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1451 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1453 CTLFLAG_RD, &sc->read_write_dma, 0, 1454 "DMA concurrent Read/Write speed in MB/s"); 1455 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1457 CTLFLAG_RD, &sc->watchdog_resets, 0, 1458 "Number of times NIC was reset"); 1459 1460 /* 1461 * Performance related tunables 1462 */ 1463 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1464 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1465 "Interrupt coalescing delay in usecs"); 1466 1467 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1468 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1469 "Transmit throttling"); 1470 1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", 1472 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", 1473 "Interrupt coalescing delay in usecs"); 1474 1475 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1476 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1477 "Use RSS"); 1478 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1480 CTLFLAG_RW, &mxge_deassert_wait, 0, 1481 "Wait for IRQ line to go low in ihandler"); 1482 1483 /* 1484 * Stats block from firmware is in network byte order. 1485 * Need to swap it 1486 */ 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1488 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1489 mxge_handle_be32, "I", "link up"); 1490 1491 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1492 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1493 mxge_handle_be32, "I", "rdma_tags_available"); 1494 1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1496 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1497 mxge_handle_be32, "I", "dropped_bad_crc32"); 1498 1499 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1500 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1501 mxge_handle_be32, "I", "dropped_bad_phy"); 1502 1503 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1504 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1505 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1506 1507 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1508 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1509 mxge_handle_be32, "I", "dropped_link_overflow"); 1510 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1512 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1513 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1514 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1516 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1517 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1518 1519 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1520 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1521 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1522 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1524 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1525 mxge_handle_be32, "I", "dropped_overrun"); 1526 1527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1528 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1529 mxge_handle_be32, "I", "dropped_pause"); 1530 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1532 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1533 mxge_handle_be32, "I", "dropped_runt"); 1534 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1536 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1537 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1538 1539 /* add counters exported for debugging from all slices */ 1540 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1541 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1542 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1543 if (sc->slice_sysctl_tree == NULL) { 1544 device_printf(sc->dev, "can't add slice sysctl node\n"); 1545 return; 1546 } 1547 1548 for (slice = 0; slice < sc->num_slices; slice++) { 1549 ss = &sc->ss[slice]; 1550 sysctl_ctx_init(&ss->sysctl_ctx); 1551 ctx = &ss->sysctl_ctx; 1552 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1553 ksprintf(slice_num, "%d", slice); 1554 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1555 slice_num, CTLFLAG_RD, 0, ""); 1556 if (ss->sysctl_tree == NULL) { 1557 device_printf(sc->dev, 1558 "can't add %d slice sysctl node\n", slice); 1559 return; /* XXX continue? */ 1560 } 1561 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1562 1563 /* 1564 * XXX change to ULONG 1565 */ 1566 1567 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1568 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1569 1570 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1571 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1572 1573 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1574 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1575 1576 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1577 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1578 1579 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1580 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1581 1582 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1583 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1584 1585 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1586 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1587 1588 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1589 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1590 } 1591 } 1592 1593 /* 1594 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1595 * backwards one at a time and handle ring wraps 1596 */ 1597 static __inline void 1598 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1599 mcp_kreq_ether_send_t *src, int cnt) 1600 { 1601 int idx, starting_slot; 1602 1603 starting_slot = tx->req; 1604 while (cnt > 1) { 1605 cnt--; 1606 idx = (starting_slot + cnt) & tx->mask; 1607 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1608 wmb(); 1609 } 1610 } 1611 1612 /* 1613 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1614 * at most 32 bytes at a time, so as to avoid involving the software 1615 * pio handler in the nic. We re-write the first segment's flags 1616 * to mark them valid only after writing the entire chain 1617 */ 1618 static __inline void 1619 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1620 { 1621 int idx, i; 1622 uint32_t *src_ints; 1623 volatile uint32_t *dst_ints; 1624 mcp_kreq_ether_send_t *srcp; 1625 volatile mcp_kreq_ether_send_t *dstp, *dst; 1626 uint8_t last_flags; 1627 1628 idx = tx->req & tx->mask; 1629 1630 last_flags = src->flags; 1631 src->flags = 0; 1632 wmb(); 1633 dst = dstp = &tx->lanai[idx]; 1634 srcp = src; 1635 1636 if ((idx + cnt) < tx->mask) { 1637 for (i = 0; i < cnt - 1; i += 2) { 1638 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1639 wmb(); /* force write every 32 bytes */ 1640 srcp += 2; 1641 dstp += 2; 1642 } 1643 } else { 1644 /* 1645 * Submit all but the first request, and ensure 1646 * that it is submitted below 1647 */ 1648 mxge_submit_req_backwards(tx, src, cnt); 1649 i = 0; 1650 } 1651 if (i < cnt) { 1652 /* Submit the first request */ 1653 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1654 wmb(); /* barrier before setting valid flag */ 1655 } 1656 1657 /* Re-write the last 32-bits with the valid flags */ 1658 src->flags = last_flags; 1659 src_ints = (uint32_t *)src; 1660 src_ints+=3; 1661 dst_ints = (volatile uint32_t *)dst; 1662 dst_ints+=3; 1663 *dst_ints = *src_ints; 1664 tx->req += cnt; 1665 wmb(); 1666 } 1667 1668 static int 1669 mxge_pullup_tso(struct mbuf **mp) 1670 { 1671 int hoff, iphlen, thoff; 1672 struct mbuf *m; 1673 1674 m = *mp; 1675 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1676 1677 iphlen = m->m_pkthdr.csum_iphlen; 1678 thoff = m->m_pkthdr.csum_thlen; 1679 hoff = m->m_pkthdr.csum_lhlen; 1680 1681 KASSERT(iphlen > 0, ("invalid ip hlen")); 1682 KASSERT(thoff > 0, ("invalid tcp hlen")); 1683 KASSERT(hoff > 0, ("invalid ether hlen")); 1684 1685 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1686 m = m_pullup(m, hoff + iphlen + thoff); 1687 if (m == NULL) { 1688 *mp = NULL; 1689 return ENOBUFS; 1690 } 1691 *mp = m; 1692 } 1693 return 0; 1694 } 1695 1696 static int 1697 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1698 struct mbuf *m, int busdma_seg_cnt) 1699 { 1700 mcp_kreq_ether_send_t *req; 1701 bus_dma_segment_t *seg; 1702 uint32_t low, high_swapped; 1703 int len, seglen, cum_len, cum_len_next; 1704 int next_is_first, chop, cnt, rdma_count, small; 1705 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1706 uint8_t flags, flags_next; 1707 struct mxge_buffer_state *info_last; 1708 bus_dmamap_t map = info_map->map; 1709 1710 mss = m->m_pkthdr.tso_segsz; 1711 1712 /* 1713 * Negative cum_len signifies to the send loop that we are 1714 * still in the header portion of the TSO packet. 1715 */ 1716 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1717 m->m_pkthdr.csum_thlen); 1718 1719 /* 1720 * TSO implies checksum offload on this hardware 1721 */ 1722 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1723 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1724 1725 /* 1726 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1727 * out where to put the checksum by parsing the header. 1728 */ 1729 pseudo_hdr_offset = htobe16(mss); 1730 1731 req = tx->req_list; 1732 seg = tx->seg_list; 1733 cnt = 0; 1734 rdma_count = 0; 1735 1736 /* 1737 * "rdma_count" is the number of RDMAs belonging to the current 1738 * packet BEFORE the current send request. For non-TSO packets, 1739 * this is equal to "count". 1740 * 1741 * For TSO packets, rdma_count needs to be reset to 0 after a 1742 * segment cut. 1743 * 1744 * The rdma_count field of the send request is the number of 1745 * RDMAs of the packet starting at that request. For TSO send 1746 * requests with one ore more cuts in the middle, this is the 1747 * number of RDMAs starting after the last cut in the request. 1748 * All previous segments before the last cut implicitly have 1 1749 * RDMA. 1750 * 1751 * Since the number of RDMAs is not known beforehand, it must be 1752 * filled-in retroactively - after each segmentation cut or at 1753 * the end of the entire packet. 1754 */ 1755 1756 while (busdma_seg_cnt) { 1757 /* 1758 * Break the busdma segment up into pieces 1759 */ 1760 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1761 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1762 len = seg->ds_len; 1763 1764 while (len) { 1765 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1766 seglen = len; 1767 cum_len_next = cum_len + seglen; 1768 (req - rdma_count)->rdma_count = rdma_count + 1; 1769 if (__predict_true(cum_len >= 0)) { 1770 /* Payload */ 1771 chop = (cum_len_next > mss); 1772 cum_len_next = cum_len_next % mss; 1773 next_is_first = (cum_len_next == 0); 1774 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1775 flags_next |= 1776 next_is_first * MXGEFW_FLAGS_FIRST; 1777 rdma_count |= -(chop | next_is_first); 1778 rdma_count += chop & !next_is_first; 1779 } else if (cum_len_next >= 0) { 1780 /* Header ends */ 1781 rdma_count = -1; 1782 cum_len_next = 0; 1783 seglen = -cum_len; 1784 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1785 flags_next = MXGEFW_FLAGS_TSO_PLD | 1786 MXGEFW_FLAGS_FIRST | 1787 (small * MXGEFW_FLAGS_SMALL); 1788 } 1789 1790 req->addr_high = high_swapped; 1791 req->addr_low = htobe32(low); 1792 req->pseudo_hdr_offset = pseudo_hdr_offset; 1793 req->pad = 0; 1794 req->rdma_count = 1; 1795 req->length = htobe16(seglen); 1796 req->cksum_offset = cksum_offset; 1797 req->flags = 1798 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1799 low += seglen; 1800 len -= seglen; 1801 cum_len = cum_len_next; 1802 flags = flags_next; 1803 req++; 1804 cnt++; 1805 rdma_count++; 1806 if (__predict_false(cksum_offset > seglen)) 1807 cksum_offset -= seglen; 1808 else 1809 cksum_offset = 0; 1810 if (__predict_false(cnt > tx->max_desc)) 1811 goto drop; 1812 } 1813 busdma_seg_cnt--; 1814 seg++; 1815 } 1816 (req - rdma_count)->rdma_count = rdma_count; 1817 1818 do { 1819 req--; 1820 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1821 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1822 1823 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1824 1825 info_map->map = info_last->map; 1826 info_last->map = map; 1827 info_last->m = m; 1828 1829 mxge_submit_req(tx, tx->req_list, cnt); 1830 1831 if (tx->send_go != NULL && tx->queue_active == 0) { 1832 /* Tell the NIC to start polling this slice */ 1833 *tx->send_go = 1; 1834 tx->queue_active = 1; 1835 tx->activate++; 1836 wmb(); 1837 } 1838 return 0; 1839 1840 drop: 1841 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1842 m_freem(m); 1843 return ENOBUFS; 1844 } 1845 1846 static int 1847 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1848 { 1849 mcp_kreq_ether_send_t *req; 1850 bus_dma_segment_t *seg; 1851 bus_dmamap_t map; 1852 int cnt, cum_len, err, i, idx, odd_flag; 1853 uint16_t pseudo_hdr_offset; 1854 uint8_t flags, cksum_offset; 1855 struct mxge_buffer_state *info_map, *info_last; 1856 1857 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1858 err = mxge_pullup_tso(&m); 1859 if (__predict_false(err)) 1860 return err; 1861 } 1862 1863 /* 1864 * Map the frame for DMA 1865 */ 1866 idx = tx->req & tx->mask; 1867 info_map = &tx->info[idx]; 1868 map = info_map->map; 1869 1870 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1871 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1872 if (__predict_false(err != 0)) 1873 goto drop; 1874 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1875 1876 /* 1877 * TSO is different enough, we handle it in another routine 1878 */ 1879 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1880 return mxge_encap_tso(tx, info_map, m, cnt); 1881 1882 req = tx->req_list; 1883 cksum_offset = 0; 1884 pseudo_hdr_offset = 0; 1885 flags = MXGEFW_FLAGS_NO_TSO; 1886 1887 /* 1888 * Checksum offloading 1889 */ 1890 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1891 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1892 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1893 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1894 req->cksum_offset = cksum_offset; 1895 flags |= MXGEFW_FLAGS_CKSUM; 1896 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1897 } else { 1898 odd_flag = 0; 1899 } 1900 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1901 flags |= MXGEFW_FLAGS_SMALL; 1902 1903 /* 1904 * Convert segments into a request list 1905 */ 1906 cum_len = 0; 1907 seg = tx->seg_list; 1908 req->flags = MXGEFW_FLAGS_FIRST; 1909 for (i = 0; i < cnt; i++) { 1910 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1911 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1912 req->length = htobe16(seg->ds_len); 1913 req->cksum_offset = cksum_offset; 1914 if (cksum_offset > seg->ds_len) 1915 cksum_offset -= seg->ds_len; 1916 else 1917 cksum_offset = 0; 1918 req->pseudo_hdr_offset = pseudo_hdr_offset; 1919 req->pad = 0; /* complete solid 16-byte block */ 1920 req->rdma_count = 1; 1921 req->flags |= flags | ((cum_len & 1) * odd_flag); 1922 cum_len += seg->ds_len; 1923 seg++; 1924 req++; 1925 req->flags = 0; 1926 } 1927 req--; 1928 1929 /* 1930 * Pad runt to 60 bytes 1931 */ 1932 if (cum_len < 60) { 1933 req++; 1934 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1935 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1936 req->length = htobe16(60 - cum_len); 1937 req->cksum_offset = 0; 1938 req->pseudo_hdr_offset = pseudo_hdr_offset; 1939 req->pad = 0; /* complete solid 16-byte block */ 1940 req->rdma_count = 1; 1941 req->flags |= flags | ((cum_len & 1) * odd_flag); 1942 cnt++; 1943 } 1944 1945 tx->req_list[0].rdma_count = cnt; 1946 #if 0 1947 /* print what the firmware will see */ 1948 for (i = 0; i < cnt; i++) { 1949 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1950 "cso:%d, flags:0x%x, rdma:%d\n", 1951 i, (int)ntohl(tx->req_list[i].addr_high), 1952 (int)ntohl(tx->req_list[i].addr_low), 1953 (int)ntohs(tx->req_list[i].length), 1954 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1955 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1956 tx->req_list[i].rdma_count); 1957 } 1958 kprintf("--------------\n"); 1959 #endif 1960 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1961 1962 info_map->map = info_last->map; 1963 info_last->map = map; 1964 info_last->m = m; 1965 1966 mxge_submit_req(tx, tx->req_list, cnt); 1967 1968 if (tx->send_go != NULL && tx->queue_active == 0) { 1969 /* Tell the NIC to start polling this slice */ 1970 *tx->send_go = 1; 1971 tx->queue_active = 1; 1972 tx->activate++; 1973 wmb(); 1974 } 1975 return 0; 1976 1977 drop: 1978 m_freem(m); 1979 return err; 1980 } 1981 1982 static void 1983 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1984 { 1985 mxge_softc_t *sc = ifp->if_softc; 1986 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1987 bus_addr_t zeropad; 1988 int encap = 0; 1989 1990 KKASSERT(tx->ifsq == ifsq); 1991 ASSERT_SERIALIZED(&tx->tx_serialize); 1992 1993 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1994 return; 1995 1996 zeropad = sc->zeropad_dma.dmem_busaddr; 1997 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1998 struct mbuf *m; 1999 int error; 2000 2001 m = ifsq_dequeue(ifsq); 2002 if (m == NULL) 2003 goto done; 2004 2005 BPF_MTAP(ifp, m); 2006 error = mxge_encap(tx, m, zeropad); 2007 if (!error) 2008 encap = 1; 2009 else 2010 IFNET_STAT_INC(ifp, oerrors, 1); 2011 } 2012 2013 /* Ran out of transmit slots */ 2014 ifsq_set_oactive(ifsq); 2015 done: 2016 if (encap) 2017 tx->watchdog.wd_timer = 5; 2018 } 2019 2020 static void 2021 mxge_watchdog(struct ifaltq_subque *ifsq) 2022 { 2023 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2024 struct mxge_softc *sc = ifp->if_softc; 2025 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 2026 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 2027 2028 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2029 2030 /* Check for pause blocking before resetting */ 2031 if (tx->watchdog_rx_pause == rx_pause) { 2032 mxge_warn_stuck(sc, tx, 0); 2033 mxge_watchdog_reset(sc); 2034 return; 2035 } else { 2036 if_printf(ifp, "Flow control blocking xmits, " 2037 "check link partner\n"); 2038 } 2039 tx->watchdog_rx_pause = rx_pause; 2040 } 2041 2042 /* 2043 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2044 * at most 32 bytes at a time, so as to avoid involving the software 2045 * pio handler in the nic. We re-write the first segment's low 2046 * DMA address to mark it valid only after we write the entire chunk 2047 * in a burst 2048 */ 2049 static __inline void 2050 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2051 mcp_kreq_ether_recv_t *src) 2052 { 2053 uint32_t low; 2054 2055 low = src->addr_low; 2056 src->addr_low = 0xffffffff; 2057 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2058 wmb(); 2059 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2060 wmb(); 2061 src->addr_low = low; 2062 dst->addr_low = low; 2063 wmb(); 2064 } 2065 2066 static int 2067 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2068 boolean_t init) 2069 { 2070 bus_dma_segment_t seg; 2071 struct mbuf *m; 2072 int cnt, err, mflag; 2073 2074 mflag = MB_DONTWAIT; 2075 if (__predict_false(init)) 2076 mflag = MB_WAIT; 2077 2078 m = m_gethdr(mflag, MT_DATA); 2079 if (m == NULL) { 2080 err = ENOBUFS; 2081 if (__predict_false(init)) { 2082 /* 2083 * During initialization, there 2084 * is nothing to setup; bail out 2085 */ 2086 return err; 2087 } 2088 goto done; 2089 } 2090 m->m_len = m->m_pkthdr.len = MHLEN; 2091 2092 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2093 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2094 if (err != 0) { 2095 m_freem(m); 2096 if (__predict_false(init)) { 2097 /* 2098 * During initialization, there 2099 * is nothing to setup; bail out 2100 */ 2101 return err; 2102 } 2103 goto done; 2104 } 2105 2106 rx->info[idx].m = m; 2107 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2108 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2109 2110 done: 2111 if ((idx & 7) == 7) 2112 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2113 return err; 2114 } 2115 2116 static int 2117 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2118 boolean_t init) 2119 { 2120 bus_dma_segment_t seg; 2121 struct mbuf *m; 2122 int cnt, err, mflag; 2123 2124 mflag = MB_DONTWAIT; 2125 if (__predict_false(init)) 2126 mflag = MB_WAIT; 2127 2128 if (rx->cl_size == MCLBYTES) 2129 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2130 else 2131 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2132 if (m == NULL) { 2133 err = ENOBUFS; 2134 if (__predict_false(init)) { 2135 /* 2136 * During initialization, there 2137 * is nothing to setup; bail out 2138 */ 2139 return err; 2140 } 2141 goto done; 2142 } 2143 m->m_len = m->m_pkthdr.len = rx->cl_size; 2144 2145 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2146 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2147 if (err != 0) { 2148 m_freem(m); 2149 if (__predict_false(init)) { 2150 /* 2151 * During initialization, there 2152 * is nothing to setup; bail out 2153 */ 2154 return err; 2155 } 2156 goto done; 2157 } 2158 2159 rx->info[idx].m = m; 2160 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2161 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2162 2163 done: 2164 if ((idx & 7) == 7) 2165 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2166 return err; 2167 } 2168 2169 /* 2170 * Myri10GE hardware checksums are not valid if the sender 2171 * padded the frame with non-zero padding. This is because 2172 * the firmware just does a simple 16-bit 1s complement 2173 * checksum across the entire frame, excluding the first 14 2174 * bytes. It is best to simply to check the checksum and 2175 * tell the stack about it only if the checksum is good 2176 */ 2177 static __inline uint16_t 2178 mxge_rx_csum(struct mbuf *m, int csum) 2179 { 2180 const struct ether_header *eh; 2181 const struct ip *ip; 2182 uint16_t c; 2183 2184 eh = mtod(m, const struct ether_header *); 2185 2186 /* Only deal with IPv4 TCP & UDP for now */ 2187 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2188 return 1; 2189 2190 ip = (const struct ip *)(eh + 1); 2191 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2192 return 1; 2193 2194 #ifdef INET 2195 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2196 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2197 - (ip->ip_hl << 2) + ip->ip_p)); 2198 #else 2199 c = 1; 2200 #endif 2201 c ^= 0xffff; 2202 return c; 2203 } 2204 2205 static void 2206 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2207 { 2208 struct ether_vlan_header *evl; 2209 uint32_t partial; 2210 2211 evl = mtod(m, struct ether_vlan_header *); 2212 2213 /* 2214 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2215 * what the firmware thought was the end of the ethernet 2216 * header. 2217 */ 2218 2219 /* Put checksum into host byte order */ 2220 *csum = ntohs(*csum); 2221 2222 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2223 *csum += ~partial; 2224 *csum += ((*csum) < ~partial); 2225 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2226 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2227 2228 /* 2229 * Restore checksum to network byte order; 2230 * later consumers expect this 2231 */ 2232 *csum = htons(*csum); 2233 2234 /* save the tag */ 2235 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2236 m->m_flags |= M_VLANTAG; 2237 2238 /* 2239 * Remove the 802.1q header by copying the Ethernet 2240 * addresses over it and adjusting the beginning of 2241 * the data in the mbuf. The encapsulated Ethernet 2242 * type field is already in place. 2243 */ 2244 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2245 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2246 m_adj(m, EVL_ENCAPLEN); 2247 } 2248 2249 2250 static __inline void 2251 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2252 uint32_t len, uint32_t csum) 2253 { 2254 struct mbuf *m; 2255 const struct ether_header *eh; 2256 bus_dmamap_t old_map; 2257 int idx; 2258 2259 idx = rx->cnt & rx->mask; 2260 rx->cnt++; 2261 2262 /* Save a pointer to the received mbuf */ 2263 m = rx->info[idx].m; 2264 2265 /* Try to replace the received mbuf */ 2266 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2267 /* Drop the frame -- the old mbuf is re-cycled */ 2268 IFNET_STAT_INC(ifp, ierrors, 1); 2269 return; 2270 } 2271 2272 /* Unmap the received buffer */ 2273 old_map = rx->info[idx].map; 2274 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2275 bus_dmamap_unload(rx->dmat, old_map); 2276 2277 /* Swap the bus_dmamap_t's */ 2278 rx->info[idx].map = rx->extra_map; 2279 rx->extra_map = old_map; 2280 2281 /* 2282 * mcp implicitly skips 1st 2 bytes so that packet is properly 2283 * aligned 2284 */ 2285 m->m_data += MXGEFW_PAD; 2286 2287 m->m_pkthdr.rcvif = ifp; 2288 m->m_len = m->m_pkthdr.len = len; 2289 2290 IFNET_STAT_INC(ifp, ipackets, 1); 2291 2292 eh = mtod(m, const struct ether_header *); 2293 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2294 mxge_vlan_tag_remove(m, &csum); 2295 2296 /* If the checksum is valid, mark it in the mbuf header */ 2297 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2298 mxge_rx_csum(m, csum) == 0) { 2299 /* Tell the stack that the checksum is good */ 2300 m->m_pkthdr.csum_data = 0xffff; 2301 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2302 CSUM_DATA_VALID; 2303 } 2304 ifp->if_input(ifp, m); 2305 } 2306 2307 static __inline void 2308 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2309 uint32_t len, uint32_t csum) 2310 { 2311 const struct ether_header *eh; 2312 struct mbuf *m; 2313 bus_dmamap_t old_map; 2314 int idx; 2315 2316 idx = rx->cnt & rx->mask; 2317 rx->cnt++; 2318 2319 /* Save a pointer to the received mbuf */ 2320 m = rx->info[idx].m; 2321 2322 /* Try to replace the received mbuf */ 2323 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2324 /* Drop the frame -- the old mbuf is re-cycled */ 2325 IFNET_STAT_INC(ifp, ierrors, 1); 2326 return; 2327 } 2328 2329 /* Unmap the received buffer */ 2330 old_map = rx->info[idx].map; 2331 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2332 bus_dmamap_unload(rx->dmat, old_map); 2333 2334 /* Swap the bus_dmamap_t's */ 2335 rx->info[idx].map = rx->extra_map; 2336 rx->extra_map = old_map; 2337 2338 /* 2339 * mcp implicitly skips 1st 2 bytes so that packet is properly 2340 * aligned 2341 */ 2342 m->m_data += MXGEFW_PAD; 2343 2344 m->m_pkthdr.rcvif = ifp; 2345 m->m_len = m->m_pkthdr.len = len; 2346 2347 IFNET_STAT_INC(ifp, ipackets, 1); 2348 2349 eh = mtod(m, const struct ether_header *); 2350 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2351 mxge_vlan_tag_remove(m, &csum); 2352 2353 /* If the checksum is valid, mark it in the mbuf header */ 2354 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2355 mxge_rx_csum(m, csum) == 0) { 2356 /* Tell the stack that the checksum is good */ 2357 m->m_pkthdr.csum_data = 0xffff; 2358 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2359 CSUM_DATA_VALID; 2360 } 2361 ifp->if_input(ifp, m); 2362 } 2363 2364 static __inline void 2365 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2366 { 2367 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2368 2369 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2370 uint16_t length, checksum; 2371 2372 length = ntohs(rx_done->entry[rx_done->idx].length); 2373 rx_done->entry[rx_done->idx].length = 0; 2374 2375 checksum = rx_done->entry[rx_done->idx].checksum; 2376 2377 if (length <= MXGE_RX_SMALL_BUFLEN) { 2378 mxge_rx_done_small(ifp, &rx_data->rx_small, 2379 length, checksum); 2380 } else { 2381 mxge_rx_done_big(ifp, &rx_data->rx_big, 2382 length, checksum); 2383 } 2384 2385 rx_done->idx++; 2386 rx_done->idx &= rx_done->mask; 2387 --cycle; 2388 } 2389 } 2390 2391 static __inline void 2392 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2393 { 2394 ASSERT_SERIALIZED(&tx->tx_serialize); 2395 2396 while (tx->pkt_done != mcp_idx) { 2397 struct mbuf *m; 2398 int idx; 2399 2400 idx = tx->done & tx->mask; 2401 tx->done++; 2402 2403 m = tx->info[idx].m; 2404 /* 2405 * mbuf and DMA map only attached to the first 2406 * segment per-mbuf. 2407 */ 2408 if (m != NULL) { 2409 tx->pkt_done++; 2410 IFNET_STAT_INC(ifp, opackets, 1); 2411 tx->info[idx].m = NULL; 2412 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2413 m_freem(m); 2414 } 2415 } 2416 2417 /* 2418 * If we have space, clear OACTIVE to tell the stack that 2419 * its OK to send packets 2420 */ 2421 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2422 ifsq_clr_oactive(tx->ifsq); 2423 if (tx->req == tx->done) { 2424 /* Reset watchdog */ 2425 tx->watchdog.wd_timer = 0; 2426 } 2427 } 2428 2429 if (!ifsq_is_empty(tx->ifsq)) 2430 ifsq_devstart(tx->ifsq); 2431 2432 if (tx->send_stop != NULL && tx->req == tx->done) { 2433 /* 2434 * Let the NIC stop polling this queue, since there 2435 * are no more transmits pending 2436 */ 2437 *tx->send_stop = 1; 2438 tx->queue_active = 0; 2439 tx->deactivate++; 2440 wmb(); 2441 } 2442 } 2443 2444 static struct mxge_media_type mxge_xfp_media_types[] = { 2445 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2446 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2447 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2448 {0, (1 << 5), "10GBASE-ER"}, 2449 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2450 {0, (1 << 3), "10GBASE-SW"}, 2451 {0, (1 << 2), "10GBASE-LW"}, 2452 {0, (1 << 1), "10GBASE-EW"}, 2453 {0, (1 << 0), "Reserved"} 2454 }; 2455 2456 static struct mxge_media_type mxge_sfp_media_types[] = { 2457 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2458 {0, (1 << 7), "Reserved"}, 2459 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2460 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2461 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2462 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2463 }; 2464 2465 static void 2466 mxge_media_set(mxge_softc_t *sc, int media_type) 2467 { 2468 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); 2469 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2470 sc->current_media = media_type; 2471 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2472 } 2473 2474 static void 2475 mxge_media_init(mxge_softc_t *sc) 2476 { 2477 const char *ptr; 2478 int i; 2479 2480 ifmedia_removeall(&sc->media); 2481 mxge_media_set(sc, IFM_AUTO); 2482 2483 /* 2484 * Parse the product code to deterimine the interface type 2485 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2486 * after the 3rd dash in the driver's cached copy of the 2487 * EEPROM's product code string. 2488 */ 2489 ptr = sc->product_code_string; 2490 if (ptr == NULL) { 2491 if_printf(sc->ifp, "Missing product code\n"); 2492 return; 2493 } 2494 2495 for (i = 0; i < 3; i++, ptr++) { 2496 ptr = strchr(ptr, '-'); 2497 if (ptr == NULL) { 2498 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2499 return; 2500 } 2501 } 2502 if (*ptr == 'C' || *(ptr +1) == 'C') { 2503 /* -C is CX4 */ 2504 sc->connector = MXGE_CX4; 2505 mxge_media_set(sc, IFM_10G_CX4); 2506 } else if (*ptr == 'Q') { 2507 /* -Q is Quad Ribbon Fiber */ 2508 sc->connector = MXGE_QRF; 2509 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2510 /* DragonFly has no media type for Quad ribbon fiber */ 2511 } else if (*ptr == 'R') { 2512 /* -R is XFP */ 2513 sc->connector = MXGE_XFP; 2514 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2515 /* -S or -2S is SFP+ */ 2516 sc->connector = MXGE_SFP; 2517 } else { 2518 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2519 } 2520 } 2521 2522 /* 2523 * Determine the media type for a NIC. Some XFPs will identify 2524 * themselves only when their link is up, so this is initiated via a 2525 * link up interrupt. However, this can potentially take up to 2526 * several milliseconds, so it is run via the watchdog routine, rather 2527 * than in the interrupt handler itself. 2528 */ 2529 static void 2530 mxge_media_probe(mxge_softc_t *sc) 2531 { 2532 mxge_cmd_t cmd; 2533 const char *cage_type; 2534 struct mxge_media_type *mxge_media_types = NULL; 2535 int i, err, ms, mxge_media_type_entries; 2536 uint32_t byte; 2537 2538 sc->need_media_probe = 0; 2539 2540 if (sc->connector == MXGE_XFP) { 2541 /* -R is XFP */ 2542 mxge_media_types = mxge_xfp_media_types; 2543 mxge_media_type_entries = sizeof(mxge_xfp_media_types) / 2544 sizeof(mxge_xfp_media_types[0]); 2545 byte = MXGE_XFP_COMPLIANCE_BYTE; 2546 cage_type = "XFP"; 2547 } else if (sc->connector == MXGE_SFP) { 2548 /* -S or -2S is SFP+ */ 2549 mxge_media_types = mxge_sfp_media_types; 2550 mxge_media_type_entries = sizeof(mxge_sfp_media_types) / 2551 sizeof(mxge_sfp_media_types[0]); 2552 cage_type = "SFP+"; 2553 byte = 3; 2554 } else { 2555 /* nothing to do; media type cannot change */ 2556 return; 2557 } 2558 2559 /* 2560 * At this point we know the NIC has an XFP cage, so now we 2561 * try to determine what is in the cage by using the 2562 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2563 * register. We read just one byte, which may take over 2564 * a millisecond 2565 */ 2566 2567 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2568 cmd.data1 = byte; 2569 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2570 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2571 if_printf(sc->ifp, "failed to read XFP\n"); 2572 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2573 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2574 if (err != MXGEFW_CMD_OK) 2575 return; 2576 2577 /* Now we wait for the data to be cached */ 2578 cmd.data0 = byte; 2579 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2580 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2581 DELAY(1000); 2582 cmd.data0 = byte; 2583 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2584 } 2585 if (err != MXGEFW_CMD_OK) { 2586 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2587 cage_type, err, ms); 2588 return; 2589 } 2590 2591 if (cmd.data0 == mxge_media_types[0].bitmask) { 2592 if (bootverbose) { 2593 if_printf(sc->ifp, "%s:%s\n", cage_type, 2594 mxge_media_types[0].name); 2595 } 2596 if (sc->current_media != mxge_media_types[0].flag) { 2597 mxge_media_init(sc); 2598 mxge_media_set(sc, mxge_media_types[0].flag); 2599 } 2600 return; 2601 } 2602 for (i = 1; i < mxge_media_type_entries; i++) { 2603 if (cmd.data0 & mxge_media_types[i].bitmask) { 2604 if (bootverbose) { 2605 if_printf(sc->ifp, "%s:%s\n", cage_type, 2606 mxge_media_types[i].name); 2607 } 2608 2609 if (sc->current_media != mxge_media_types[i].flag) { 2610 mxge_media_init(sc); 2611 mxge_media_set(sc, mxge_media_types[i].flag); 2612 } 2613 return; 2614 } 2615 } 2616 if (bootverbose) { 2617 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2618 cmd.data0); 2619 } 2620 } 2621 2622 static void 2623 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2624 { 2625 if (sc->link_state != stats->link_up) { 2626 sc->link_state = stats->link_up; 2627 if (sc->link_state) { 2628 sc->ifp->if_link_state = LINK_STATE_UP; 2629 if_link_state_change(sc->ifp); 2630 if (bootverbose) 2631 if_printf(sc->ifp, "link up\n"); 2632 } else { 2633 sc->ifp->if_link_state = LINK_STATE_DOWN; 2634 if_link_state_change(sc->ifp); 2635 if (bootverbose) 2636 if_printf(sc->ifp, "link down\n"); 2637 } 2638 sc->need_media_probe = 1; 2639 } 2640 2641 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2642 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2643 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2644 sc->rdma_tags_available); 2645 } 2646 2647 if (stats->link_down) { 2648 sc->down_cnt += stats->link_down; 2649 sc->link_state = 0; 2650 sc->ifp->if_link_state = LINK_STATE_DOWN; 2651 if_link_state_change(sc->ifp); 2652 } 2653 } 2654 2655 static void 2656 mxge_serialize_skipmain(struct mxge_softc *sc) 2657 { 2658 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2659 } 2660 2661 static void 2662 mxge_deserialize_skipmain(struct mxge_softc *sc) 2663 { 2664 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2665 } 2666 2667 static void 2668 mxge_legacy(void *arg) 2669 { 2670 struct mxge_slice_state *ss = arg; 2671 mxge_softc_t *sc = ss->sc; 2672 mcp_irq_data_t *stats = ss->fw_stats; 2673 mxge_tx_ring_t *tx = &ss->tx; 2674 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2675 uint32_t send_done_count; 2676 uint8_t valid; 2677 2678 ASSERT_SERIALIZED(&sc->main_serialize); 2679 2680 /* Make sure the DMA has finished */ 2681 if (!stats->valid) 2682 return; 2683 valid = stats->valid; 2684 2685 /* Lower legacy IRQ */ 2686 *sc->irq_deassert = 0; 2687 if (!mxge_deassert_wait) { 2688 /* Don't wait for conf. that irq is low */ 2689 stats->valid = 0; 2690 } 2691 2692 mxge_serialize_skipmain(sc); 2693 2694 /* 2695 * Loop while waiting for legacy irq deassertion 2696 * XXX do we really want to loop? 2697 */ 2698 do { 2699 /* Check for transmit completes and receives */ 2700 send_done_count = be32toh(stats->send_done_count); 2701 while ((send_done_count != tx->pkt_done) || 2702 (rx_done->entry[rx_done->idx].length != 0)) { 2703 if (send_done_count != tx->pkt_done) { 2704 mxge_tx_done(&sc->arpcom.ac_if, tx, 2705 (int)send_done_count); 2706 } 2707 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2708 send_done_count = be32toh(stats->send_done_count); 2709 } 2710 if (mxge_deassert_wait) 2711 wmb(); 2712 } while (*((volatile uint8_t *)&stats->valid)); 2713 2714 mxge_deserialize_skipmain(sc); 2715 2716 /* Fw link & error stats meaningful only on the first slice */ 2717 if (__predict_false(stats->stats_updated)) 2718 mxge_intr_status(sc, stats); 2719 2720 /* Check to see if we have rx token to pass back */ 2721 if (valid & 0x1) 2722 *ss->irq_claim = be32toh(3); 2723 *(ss->irq_claim + 1) = be32toh(3); 2724 } 2725 2726 static void 2727 mxge_msi(void *arg) 2728 { 2729 struct mxge_slice_state *ss = arg; 2730 mxge_softc_t *sc = ss->sc; 2731 mcp_irq_data_t *stats = ss->fw_stats; 2732 mxge_tx_ring_t *tx = &ss->tx; 2733 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2734 uint32_t send_done_count; 2735 uint8_t valid; 2736 #ifndef IFPOLL_ENABLE 2737 const boolean_t polling = FALSE; 2738 #else 2739 boolean_t polling = FALSE; 2740 #endif 2741 2742 ASSERT_SERIALIZED(&sc->main_serialize); 2743 2744 /* Make sure the DMA has finished */ 2745 if (__predict_false(!stats->valid)) 2746 return; 2747 2748 valid = stats->valid; 2749 stats->valid = 0; 2750 2751 #ifdef IFPOLL_ENABLE 2752 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2753 polling = TRUE; 2754 #endif 2755 2756 if (!polling) { 2757 /* Check for receives */ 2758 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2759 if (rx_done->entry[rx_done->idx].length != 0) 2760 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2761 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2762 } 2763 2764 /* 2765 * Check for transmit completes 2766 * 2767 * NOTE: 2768 * Since pkt_done is only changed by mxge_tx_done(), 2769 * which is called only in interrupt handler, the 2770 * check w/o holding tx serializer is MPSAFE. 2771 */ 2772 send_done_count = be32toh(stats->send_done_count); 2773 if (send_done_count != tx->pkt_done) { 2774 lwkt_serialize_enter(&tx->tx_serialize); 2775 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2776 lwkt_serialize_exit(&tx->tx_serialize); 2777 } 2778 2779 if (__predict_false(stats->stats_updated)) 2780 mxge_intr_status(sc, stats); 2781 2782 /* Check to see if we have rx token to pass back */ 2783 if (!polling && (valid & 0x1)) 2784 *ss->irq_claim = be32toh(3); 2785 *(ss->irq_claim + 1) = be32toh(3); 2786 } 2787 2788 static void 2789 mxge_msix_rx(void *arg) 2790 { 2791 struct mxge_slice_state *ss = arg; 2792 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2793 2794 #ifdef IFPOLL_ENABLE 2795 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2796 return; 2797 #endif 2798 2799 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2800 2801 if (rx_done->entry[rx_done->idx].length != 0) 2802 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2803 2804 *ss->irq_claim = be32toh(3); 2805 } 2806 2807 static void 2808 mxge_msix_rxtx(void *arg) 2809 { 2810 struct mxge_slice_state *ss = arg; 2811 mxge_softc_t *sc = ss->sc; 2812 mcp_irq_data_t *stats = ss->fw_stats; 2813 mxge_tx_ring_t *tx = &ss->tx; 2814 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2815 uint32_t send_done_count; 2816 uint8_t valid; 2817 #ifndef IFPOLL_ENABLE 2818 const boolean_t polling = FALSE; 2819 #else 2820 boolean_t polling = FALSE; 2821 #endif 2822 2823 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2824 2825 /* Make sure the DMA has finished */ 2826 if (__predict_false(!stats->valid)) 2827 return; 2828 2829 valid = stats->valid; 2830 stats->valid = 0; 2831 2832 #ifdef IFPOLL_ENABLE 2833 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2834 polling = TRUE; 2835 #endif 2836 2837 /* Check for receives */ 2838 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2839 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2840 2841 /* 2842 * Check for transmit completes 2843 * 2844 * NOTE: 2845 * Since pkt_done is only changed by mxge_tx_done(), 2846 * which is called only in interrupt handler, the 2847 * check w/o holding tx serializer is MPSAFE. 2848 */ 2849 send_done_count = be32toh(stats->send_done_count); 2850 if (send_done_count != tx->pkt_done) { 2851 lwkt_serialize_enter(&tx->tx_serialize); 2852 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2853 lwkt_serialize_exit(&tx->tx_serialize); 2854 } 2855 2856 /* Check to see if we have rx token to pass back */ 2857 if (!polling && (valid & 0x1)) 2858 *ss->irq_claim = be32toh(3); 2859 *(ss->irq_claim + 1) = be32toh(3); 2860 } 2861 2862 static void 2863 mxge_init(void *arg) 2864 { 2865 struct mxge_softc *sc = arg; 2866 2867 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2868 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2869 mxge_open(sc); 2870 } 2871 2872 static void 2873 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2874 { 2875 int i; 2876 2877 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2878 if (ss->rx_data.rx_big.info[i].m == NULL) 2879 continue; 2880 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2881 ss->rx_data.rx_big.info[i].map); 2882 m_freem(ss->rx_data.rx_big.info[i].m); 2883 ss->rx_data.rx_big.info[i].m = NULL; 2884 } 2885 2886 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2887 if (ss->rx_data.rx_small.info[i].m == NULL) 2888 continue; 2889 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2890 ss->rx_data.rx_small.info[i].map); 2891 m_freem(ss->rx_data.rx_small.info[i].m); 2892 ss->rx_data.rx_small.info[i].m = NULL; 2893 } 2894 2895 /* Transmit ring used only on the first slice */ 2896 if (ss->tx.info == NULL) 2897 return; 2898 2899 for (i = 0; i <= ss->tx.mask; i++) { 2900 if (ss->tx.info[i].m == NULL) 2901 continue; 2902 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2903 m_freem(ss->tx.info[i].m); 2904 ss->tx.info[i].m = NULL; 2905 } 2906 } 2907 2908 static void 2909 mxge_free_mbufs(mxge_softc_t *sc) 2910 { 2911 int slice; 2912 2913 for (slice = 0; slice < sc->num_slices; slice++) 2914 mxge_free_slice_mbufs(&sc->ss[slice]); 2915 } 2916 2917 static void 2918 mxge_free_slice_rings(struct mxge_slice_state *ss) 2919 { 2920 int i; 2921 2922 if (ss->rx_data.rx_done.entry != NULL) { 2923 mxge_dma_free(&ss->rx_done_dma); 2924 ss->rx_data.rx_done.entry = NULL; 2925 } 2926 2927 if (ss->tx.req_list != NULL) { 2928 kfree(ss->tx.req_list, M_DEVBUF); 2929 ss->tx.req_list = NULL; 2930 } 2931 2932 if (ss->tx.seg_list != NULL) { 2933 kfree(ss->tx.seg_list, M_DEVBUF); 2934 ss->tx.seg_list = NULL; 2935 } 2936 2937 if (ss->rx_data.rx_small.shadow != NULL) { 2938 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2939 ss->rx_data.rx_small.shadow = NULL; 2940 } 2941 2942 if (ss->rx_data.rx_big.shadow != NULL) { 2943 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2944 ss->rx_data.rx_big.shadow = NULL; 2945 } 2946 2947 if (ss->tx.info != NULL) { 2948 if (ss->tx.dmat != NULL) { 2949 for (i = 0; i <= ss->tx.mask; i++) { 2950 bus_dmamap_destroy(ss->tx.dmat, 2951 ss->tx.info[i].map); 2952 } 2953 bus_dma_tag_destroy(ss->tx.dmat); 2954 } 2955 kfree(ss->tx.info, M_DEVBUF); 2956 ss->tx.info = NULL; 2957 } 2958 2959 if (ss->rx_data.rx_small.info != NULL) { 2960 if (ss->rx_data.rx_small.dmat != NULL) { 2961 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2962 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2963 ss->rx_data.rx_small.info[i].map); 2964 } 2965 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2966 ss->rx_data.rx_small.extra_map); 2967 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2968 } 2969 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2970 ss->rx_data.rx_small.info = NULL; 2971 } 2972 2973 if (ss->rx_data.rx_big.info != NULL) { 2974 if (ss->rx_data.rx_big.dmat != NULL) { 2975 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2976 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2977 ss->rx_data.rx_big.info[i].map); 2978 } 2979 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2980 ss->rx_data.rx_big.extra_map); 2981 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2982 } 2983 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2984 ss->rx_data.rx_big.info = NULL; 2985 } 2986 } 2987 2988 static void 2989 mxge_free_rings(mxge_softc_t *sc) 2990 { 2991 int slice; 2992 2993 if (sc->ss == NULL) 2994 return; 2995 2996 for (slice = 0; slice < sc->num_slices; slice++) 2997 mxge_free_slice_rings(&sc->ss[slice]); 2998 } 2999 3000 static int 3001 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3002 int tx_ring_entries) 3003 { 3004 mxge_softc_t *sc = ss->sc; 3005 size_t bytes; 3006 int err, i; 3007 3008 /* 3009 * Allocate per-slice receive resources 3010 */ 3011 3012 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 3013 rx_ring_entries - 1; 3014 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3015 3016 /* Allocate the rx shadow rings */ 3017 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3018 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3019 3020 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3021 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3022 3023 /* Allocate the rx host info rings */ 3024 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3025 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3026 3027 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3028 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3029 3030 /* Allocate the rx busdma resources */ 3031 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3032 1, /* alignment */ 3033 4096, /* boundary */ 3034 BUS_SPACE_MAXADDR, /* low */ 3035 BUS_SPACE_MAXADDR, /* high */ 3036 NULL, NULL, /* filter */ 3037 MHLEN, /* maxsize */ 3038 1, /* num segs */ 3039 MHLEN, /* maxsegsize */ 3040 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3041 /* flags */ 3042 &ss->rx_data.rx_small.dmat); /* tag */ 3043 if (err != 0) { 3044 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3045 err); 3046 return err; 3047 } 3048 3049 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3050 &ss->rx_data.rx_small.extra_map); 3051 if (err != 0) { 3052 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3053 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3054 ss->rx_data.rx_small.dmat = NULL; 3055 return err; 3056 } 3057 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3058 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3059 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3060 if (err != 0) { 3061 int j; 3062 3063 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3064 3065 for (j = 0; j < i; ++j) { 3066 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3067 ss->rx_data.rx_small.info[j].map); 3068 } 3069 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3070 ss->rx_data.rx_small.extra_map); 3071 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3072 ss->rx_data.rx_small.dmat = NULL; 3073 return err; 3074 } 3075 } 3076 3077 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3078 1, /* alignment */ 3079 4096, /* boundary */ 3080 BUS_SPACE_MAXADDR, /* low */ 3081 BUS_SPACE_MAXADDR, /* high */ 3082 NULL, NULL, /* filter */ 3083 4096, /* maxsize */ 3084 1, /* num segs */ 3085 4096, /* maxsegsize*/ 3086 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3087 /* flags */ 3088 &ss->rx_data.rx_big.dmat); /* tag */ 3089 if (err != 0) { 3090 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3091 err); 3092 return err; 3093 } 3094 3095 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3096 &ss->rx_data.rx_big.extra_map); 3097 if (err != 0) { 3098 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3099 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3100 ss->rx_data.rx_big.dmat = NULL; 3101 return err; 3102 } 3103 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3104 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3105 &ss->rx_data.rx_big.info[i].map); 3106 if (err != 0) { 3107 int j; 3108 3109 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3110 for (j = 0; j < i; ++j) { 3111 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3112 ss->rx_data.rx_big.info[j].map); 3113 } 3114 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3115 ss->rx_data.rx_big.extra_map); 3116 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3117 ss->rx_data.rx_big.dmat = NULL; 3118 return err; 3119 } 3120 } 3121 3122 /* 3123 * Now allocate TX resources 3124 */ 3125 3126 ss->tx.mask = tx_ring_entries - 1; 3127 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3128 3129 /* 3130 * Allocate the tx request copy block; MUST be at least 8 bytes 3131 * aligned 3132 */ 3133 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3134 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3135 M_DEVBUF, M_WAITOK); 3136 3137 /* Allocate the tx busdma segment list */ 3138 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3139 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3140 3141 /* Allocate the tx host info ring */ 3142 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3143 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3144 3145 /* Allocate the tx busdma resources */ 3146 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3147 1, /* alignment */ 3148 sc->tx_boundary, /* boundary */ 3149 BUS_SPACE_MAXADDR, /* low */ 3150 BUS_SPACE_MAXADDR, /* high */ 3151 NULL, NULL, /* filter */ 3152 IP_MAXPACKET + 3153 sizeof(struct ether_vlan_header), 3154 /* maxsize */ 3155 ss->tx.max_desc - 2, /* num segs */ 3156 sc->tx_boundary, /* maxsegsz */ 3157 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3158 BUS_DMA_ONEBPAGE, /* flags */ 3159 &ss->tx.dmat); /* tag */ 3160 if (err != 0) { 3161 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3162 return err; 3163 } 3164 3165 /* 3166 * Now use these tags to setup DMA maps for each slot in the ring 3167 */ 3168 for (i = 0; i <= ss->tx.mask; i++) { 3169 err = bus_dmamap_create(ss->tx.dmat, 3170 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3171 if (err != 0) { 3172 int j; 3173 3174 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3175 for (j = 0; j < i; ++j) { 3176 bus_dmamap_destroy(ss->tx.dmat, 3177 ss->tx.info[j].map); 3178 } 3179 bus_dma_tag_destroy(ss->tx.dmat); 3180 ss->tx.dmat = NULL; 3181 return err; 3182 } 3183 } 3184 return 0; 3185 } 3186 3187 static int 3188 mxge_alloc_rings(mxge_softc_t *sc) 3189 { 3190 mxge_cmd_t cmd; 3191 int tx_ring_size; 3192 int tx_ring_entries, rx_ring_entries; 3193 int err, slice; 3194 3195 /* Get ring sizes */ 3196 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3197 if (err != 0) { 3198 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3199 return err; 3200 } 3201 tx_ring_size = cmd.data0; 3202 3203 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3204 rx_ring_entries = sc->rx_intr_slots / 2; 3205 3206 if (bootverbose) { 3207 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3208 tx_ring_entries, rx_ring_entries); 3209 } 3210 3211 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3212 ifq_set_ready(&sc->ifp->if_snd); 3213 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3214 3215 if (sc->num_tx_rings > 1) { 3216 sc->ifp->if_mapsubq = ifq_mapsubq_mask; 3217 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1); 3218 } 3219 3220 for (slice = 0; slice < sc->num_slices; slice++) { 3221 err = mxge_alloc_slice_rings(&sc->ss[slice], 3222 rx_ring_entries, tx_ring_entries); 3223 if (err != 0) { 3224 device_printf(sc->dev, 3225 "alloc %d slice rings failed\n", slice); 3226 return err; 3227 } 3228 } 3229 return 0; 3230 } 3231 3232 static void 3233 mxge_choose_params(int mtu, int *cl_size) 3234 { 3235 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3236 3237 if (bufsize < MCLBYTES) { 3238 *cl_size = MCLBYTES; 3239 } else { 3240 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3241 *cl_size = MJUMPAGESIZE; 3242 } 3243 } 3244 3245 static int 3246 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3247 { 3248 mxge_cmd_t cmd; 3249 int err, i, slice; 3250 3251 slice = ss - ss->sc->ss; 3252 3253 /* 3254 * Get the lanai pointers to the send and receive rings 3255 */ 3256 err = 0; 3257 3258 if (ss->sc->num_tx_rings == 1) { 3259 if (slice == 0) { 3260 cmd.data0 = slice; 3261 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3262 &cmd); 3263 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3264 (ss->sc->sram + cmd.data0); 3265 /* Leave send_go and send_stop as NULL */ 3266 } 3267 } else { 3268 cmd.data0 = slice; 3269 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3270 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3271 (ss->sc->sram + cmd.data0); 3272 ss->tx.send_go = (volatile uint32_t *) 3273 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3274 ss->tx.send_stop = (volatile uint32_t *) 3275 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3276 } 3277 3278 cmd.data0 = slice; 3279 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3280 ss->rx_data.rx_small.lanai = 3281 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3282 3283 cmd.data0 = slice; 3284 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3285 ss->rx_data.rx_big.lanai = 3286 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3287 3288 if (err != 0) { 3289 if_printf(ss->sc->ifp, 3290 "failed to get ring sizes or locations\n"); 3291 return EIO; 3292 } 3293 3294 /* 3295 * Stock small receive ring 3296 */ 3297 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3298 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3299 ss->rx_data.rx_small.info[i].map, i, TRUE); 3300 if (err) { 3301 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3302 ss->rx_data.rx_small.mask + 1); 3303 return ENOMEM; 3304 } 3305 } 3306 3307 /* 3308 * Stock big receive ring 3309 */ 3310 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3311 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3312 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3313 } 3314 3315 ss->rx_data.rx_big.cl_size = cl_size; 3316 3317 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3318 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3319 ss->rx_data.rx_big.info[i].map, i, TRUE); 3320 if (err) { 3321 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3322 ss->rx_data.rx_big.mask + 1); 3323 return ENOMEM; 3324 } 3325 } 3326 return 0; 3327 } 3328 3329 static int 3330 mxge_open(mxge_softc_t *sc) 3331 { 3332 struct ifnet *ifp = sc->ifp; 3333 mxge_cmd_t cmd; 3334 int err, slice, cl_size, i; 3335 bus_addr_t bus; 3336 volatile uint8_t *itable; 3337 struct mxge_slice_state *ss; 3338 3339 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3340 3341 /* Copy the MAC address in case it was overridden */ 3342 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3343 3344 err = mxge_reset(sc, 1); 3345 if (err != 0) { 3346 if_printf(ifp, "failed to reset\n"); 3347 return EIO; 3348 } 3349 3350 if (sc->num_slices > 1) { 3351 /* Setup the indirection table */ 3352 cmd.data0 = sc->num_slices; 3353 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3354 3355 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3356 if (err != 0) { 3357 if_printf(ifp, "failed to setup rss tables\n"); 3358 return err; 3359 } 3360 3361 /* Just enable an identity mapping */ 3362 itable = sc->sram + cmd.data0; 3363 for (i = 0; i < sc->num_slices; i++) 3364 itable[i] = (uint8_t)i; 3365 3366 if (sc->use_rss) { 3367 volatile uint8_t *hwkey; 3368 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3369 3370 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3371 &cmd); 3372 if (err != 0) { 3373 if_printf(ifp, "failed to get rsskey\n"); 3374 return err; 3375 } 3376 hwkey = sc->sram + cmd.data0; 3377 3378 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3379 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3380 hwkey[i] = swkey[i]; 3381 wmb(); 3382 3383 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3384 &cmd); 3385 if (err != 0) { 3386 if_printf(ifp, "failed to update rsskey\n"); 3387 return err; 3388 } 3389 if (bootverbose) 3390 if_printf(ifp, "RSS key updated\n"); 3391 } 3392 3393 cmd.data0 = 1; 3394 if (sc->use_rss) { 3395 if (bootverbose) 3396 if_printf(ifp, "input hash: RSS\n"); 3397 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3398 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3399 } else { 3400 if (bootverbose) 3401 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3402 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3403 } 3404 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3405 if (err != 0) { 3406 if_printf(ifp, "failed to enable slices\n"); 3407 return err; 3408 } 3409 } 3410 3411 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3412 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3413 if (err) { 3414 /* 3415 * Can't change TSO mode to NDIS, never allow TSO then 3416 */ 3417 if_printf(ifp, "failed to set TSO mode\n"); 3418 ifp->if_capenable &= ~IFCAP_TSO; 3419 ifp->if_capabilities &= ~IFCAP_TSO; 3420 ifp->if_hwassist &= ~CSUM_TSO; 3421 } 3422 3423 mxge_choose_params(ifp->if_mtu, &cl_size); 3424 3425 cmd.data0 = 1; 3426 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3427 /* 3428 * Error is only meaningful if we're trying to set 3429 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3430 */ 3431 3432 /* 3433 * Give the firmware the mtu and the big and small buffer 3434 * sizes. The firmware wants the big buf size to be a power 3435 * of two. Luckily, DragonFly's clusters are powers of two 3436 */ 3437 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3438 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3439 3440 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3441 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3442 3443 cmd.data0 = cl_size; 3444 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3445 3446 if (err != 0) { 3447 if_printf(ifp, "failed to setup params\n"); 3448 goto abort; 3449 } 3450 3451 /* Now give him the pointer to the stats block */ 3452 for (slice = 0; slice < sc->num_slices; slice++) { 3453 ss = &sc->ss[slice]; 3454 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3455 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3456 cmd.data2 = sizeof(struct mcp_irq_data); 3457 cmd.data2 |= (slice << 16); 3458 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3459 } 3460 3461 if (err != 0) { 3462 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3463 bus += offsetof(struct mcp_irq_data, send_done_count); 3464 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3465 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3466 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3467 &cmd); 3468 3469 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3470 sc->fw_multicast_support = 0; 3471 } else { 3472 sc->fw_multicast_support = 1; 3473 } 3474 3475 if (err != 0) { 3476 if_printf(ifp, "failed to setup params\n"); 3477 goto abort; 3478 } 3479 3480 for (slice = 0; slice < sc->num_slices; slice++) { 3481 err = mxge_slice_open(&sc->ss[slice], cl_size); 3482 if (err != 0) { 3483 if_printf(ifp, "couldn't open slice %d\n", slice); 3484 goto abort; 3485 } 3486 } 3487 3488 /* Finally, start the firmware running */ 3489 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3490 if (err) { 3491 if_printf(ifp, "Couldn't bring up link\n"); 3492 goto abort; 3493 } 3494 3495 ifp->if_flags |= IFF_RUNNING; 3496 for (i = 0; i < sc->num_tx_rings; ++i) { 3497 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3498 3499 ifsq_clr_oactive(tx->ifsq); 3500 ifsq_watchdog_start(&tx->watchdog); 3501 } 3502 3503 return 0; 3504 3505 abort: 3506 mxge_free_mbufs(sc); 3507 return err; 3508 } 3509 3510 static void 3511 mxge_close(mxge_softc_t *sc, int down) 3512 { 3513 struct ifnet *ifp = sc->ifp; 3514 mxge_cmd_t cmd; 3515 int err, old_down_cnt, i; 3516 3517 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3518 3519 if (!down) { 3520 old_down_cnt = sc->down_cnt; 3521 wmb(); 3522 3523 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3524 if (err) 3525 if_printf(ifp, "Couldn't bring down link\n"); 3526 3527 if (old_down_cnt == sc->down_cnt) { 3528 /* 3529 * Wait for down irq 3530 * XXX racy 3531 */ 3532 ifnet_deserialize_all(ifp); 3533 DELAY(10 * sc->intr_coal_delay); 3534 ifnet_serialize_all(ifp); 3535 } 3536 3537 wmb(); 3538 if (old_down_cnt == sc->down_cnt) 3539 if_printf(ifp, "never got down irq\n"); 3540 } 3541 mxge_free_mbufs(sc); 3542 3543 ifp->if_flags &= ~IFF_RUNNING; 3544 for (i = 0; i < sc->num_tx_rings; ++i) { 3545 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3546 3547 ifsq_clr_oactive(tx->ifsq); 3548 ifsq_watchdog_stop(&tx->watchdog); 3549 } 3550 } 3551 3552 static void 3553 mxge_setup_cfg_space(mxge_softc_t *sc) 3554 { 3555 device_t dev = sc->dev; 3556 int reg; 3557 uint16_t lnk, pectl; 3558 3559 /* Find the PCIe link width and set max read request to 4KB */ 3560 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3561 lnk = pci_read_config(dev, reg + 0x12, 2); 3562 sc->link_width = (lnk >> 4) & 0x3f; 3563 3564 if (sc->pectl == 0) { 3565 pectl = pci_read_config(dev, reg + 0x8, 2); 3566 pectl = (pectl & ~0x7000) | (5 << 12); 3567 pci_write_config(dev, reg + 0x8, pectl, 2); 3568 sc->pectl = pectl; 3569 } else { 3570 /* Restore saved pectl after watchdog reset */ 3571 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3572 } 3573 } 3574 3575 /* Enable DMA and memory space access */ 3576 pci_enable_busmaster(dev); 3577 } 3578 3579 static uint32_t 3580 mxge_read_reboot(mxge_softc_t *sc) 3581 { 3582 device_t dev = sc->dev; 3583 uint32_t vs; 3584 3585 /* Find the vendor specific offset */ 3586 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3587 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3588 return (uint32_t)-1; 3589 } 3590 /* Enable read32 mode */ 3591 pci_write_config(dev, vs + 0x10, 0x3, 1); 3592 /* Tell NIC which register to read */ 3593 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3594 return pci_read_config(dev, vs + 0x14, 4); 3595 } 3596 3597 static void 3598 mxge_watchdog_reset(mxge_softc_t *sc) 3599 { 3600 struct pci_devinfo *dinfo; 3601 int err, running; 3602 uint32_t reboot; 3603 uint16_t cmd; 3604 3605 err = ENXIO; 3606 3607 if_printf(sc->ifp, "Watchdog reset!\n"); 3608 3609 /* 3610 * Check to see if the NIC rebooted. If it did, then all of 3611 * PCI config space has been reset, and things like the 3612 * busmaster bit will be zero. If this is the case, then we 3613 * must restore PCI config space before the NIC can be used 3614 * again 3615 */ 3616 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3617 if (cmd == 0xffff) { 3618 /* 3619 * Maybe the watchdog caught the NIC rebooting; wait 3620 * up to 100ms for it to finish. If it does not come 3621 * back, then give up 3622 */ 3623 DELAY(1000*100); 3624 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3625 if (cmd == 0xffff) 3626 if_printf(sc->ifp, "NIC disappeared!\n"); 3627 } 3628 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3629 /* Print the reboot status */ 3630 reboot = mxge_read_reboot(sc); 3631 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3632 3633 running = sc->ifp->if_flags & IFF_RUNNING; 3634 if (running) { 3635 /* 3636 * Quiesce NIC so that TX routines will not try to 3637 * xmit after restoration of BAR 3638 */ 3639 3640 /* Mark the link as down */ 3641 if (sc->link_state) { 3642 sc->ifp->if_link_state = LINK_STATE_DOWN; 3643 if_link_state_change(sc->ifp); 3644 } 3645 mxge_close(sc, 1); 3646 } 3647 /* Restore PCI configuration space */ 3648 dinfo = device_get_ivars(sc->dev); 3649 pci_cfg_restore(sc->dev, dinfo); 3650 3651 /* And redo any changes we made to our config space */ 3652 mxge_setup_cfg_space(sc); 3653 3654 /* Reload f/w */ 3655 err = mxge_load_firmware(sc, 0); 3656 if (err) 3657 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3658 if (running && !err) { 3659 int i; 3660 3661 err = mxge_open(sc); 3662 3663 for (i = 0; i < sc->num_tx_rings; ++i) 3664 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3665 } 3666 sc->watchdog_resets++; 3667 } else { 3668 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3669 err = 0; 3670 } 3671 if (err) { 3672 if_printf(sc->ifp, "watchdog reset failed\n"); 3673 } else { 3674 if (sc->dying == 2) 3675 sc->dying = 0; 3676 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3677 } 3678 } 3679 3680 static void 3681 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3682 { 3683 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3684 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3685 tx->req, tx->done, tx->queue_active); 3686 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3687 tx->activate, tx->deactivate); 3688 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3689 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3690 } 3691 3692 static u_long 3693 mxge_update_stats(mxge_softc_t *sc) 3694 { 3695 u_long ipackets, opackets, pkts; 3696 3697 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3698 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3699 3700 pkts = ipackets - sc->ipackets; 3701 pkts += opackets - sc->opackets; 3702 3703 sc->ipackets = ipackets; 3704 sc->opackets = opackets; 3705 3706 return pkts; 3707 } 3708 3709 static void 3710 mxge_tick(void *arg) 3711 { 3712 mxge_softc_t *sc = arg; 3713 u_long pkts = 0; 3714 int err = 0; 3715 int ticks; 3716 3717 lwkt_serialize_enter(&sc->main_serialize); 3718 3719 ticks = mxge_ticks; 3720 if (sc->ifp->if_flags & IFF_RUNNING) { 3721 /* Aggregate stats from different slices */ 3722 pkts = mxge_update_stats(sc); 3723 if (sc->need_media_probe) 3724 mxge_media_probe(sc); 3725 } 3726 if (pkts == 0) { 3727 uint16_t cmd; 3728 3729 /* Ensure NIC did not suffer h/w fault while idle */ 3730 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3731 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3732 sc->dying = 2; 3733 mxge_serialize_skipmain(sc); 3734 mxge_watchdog_reset(sc); 3735 mxge_deserialize_skipmain(sc); 3736 err = ENXIO; 3737 } 3738 3739 /* Look less often if NIC is idle */ 3740 ticks *= 4; 3741 } 3742 3743 if (err == 0) 3744 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3745 3746 lwkt_serialize_exit(&sc->main_serialize); 3747 } 3748 3749 static int 3750 mxge_media_change(struct ifnet *ifp) 3751 { 3752 return EINVAL; 3753 } 3754 3755 static int 3756 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3757 { 3758 struct ifnet *ifp = sc->ifp; 3759 int real_mtu, old_mtu; 3760 int err = 0; 3761 3762 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3763 if (mtu > sc->max_mtu || real_mtu < 60) 3764 return EINVAL; 3765 3766 old_mtu = ifp->if_mtu; 3767 ifp->if_mtu = mtu; 3768 if (ifp->if_flags & IFF_RUNNING) { 3769 mxge_close(sc, 0); 3770 err = mxge_open(sc); 3771 if (err != 0) { 3772 ifp->if_mtu = old_mtu; 3773 mxge_close(sc, 0); 3774 mxge_open(sc); 3775 } 3776 } 3777 return err; 3778 } 3779 3780 static void 3781 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3782 { 3783 mxge_softc_t *sc = ifp->if_softc; 3784 3785 3786 if (sc == NULL) 3787 return; 3788 ifmr->ifm_status = IFM_AVALID; 3789 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3790 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3791 ifmr->ifm_active |= sc->current_media; 3792 } 3793 3794 static int 3795 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3796 struct ucred *cr __unused) 3797 { 3798 mxge_softc_t *sc = ifp->if_softc; 3799 struct ifreq *ifr = (struct ifreq *)data; 3800 int err, mask; 3801 3802 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3803 err = 0; 3804 3805 switch (command) { 3806 case SIOCSIFMTU: 3807 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3808 break; 3809 3810 case SIOCSIFFLAGS: 3811 if (sc->dying) 3812 return EINVAL; 3813 3814 if (ifp->if_flags & IFF_UP) { 3815 if (!(ifp->if_flags & IFF_RUNNING)) { 3816 err = mxge_open(sc); 3817 } else { 3818 /* 3819 * Take care of PROMISC and ALLMULTI 3820 * flag changes 3821 */ 3822 mxge_change_promisc(sc, 3823 ifp->if_flags & IFF_PROMISC); 3824 mxge_set_multicast_list(sc); 3825 } 3826 } else { 3827 if (ifp->if_flags & IFF_RUNNING) 3828 mxge_close(sc, 0); 3829 } 3830 break; 3831 3832 case SIOCADDMULTI: 3833 case SIOCDELMULTI: 3834 mxge_set_multicast_list(sc); 3835 break; 3836 3837 case SIOCSIFCAP: 3838 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3839 if (mask & IFCAP_TXCSUM) { 3840 ifp->if_capenable ^= IFCAP_TXCSUM; 3841 if (ifp->if_capenable & IFCAP_TXCSUM) 3842 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3843 else 3844 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3845 } 3846 if (mask & IFCAP_TSO) { 3847 ifp->if_capenable ^= IFCAP_TSO; 3848 if (ifp->if_capenable & IFCAP_TSO) 3849 ifp->if_hwassist |= CSUM_TSO; 3850 else 3851 ifp->if_hwassist &= ~CSUM_TSO; 3852 } 3853 if (mask & IFCAP_RXCSUM) 3854 ifp->if_capenable ^= IFCAP_RXCSUM; 3855 if (mask & IFCAP_VLAN_HWTAGGING) 3856 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3857 break; 3858 3859 case SIOCGIFMEDIA: 3860 mxge_media_probe(sc); 3861 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3862 &sc->media, command); 3863 break; 3864 3865 default: 3866 err = ether_ioctl(ifp, command, data); 3867 break; 3868 } 3869 return err; 3870 } 3871 3872 static void 3873 mxge_fetch_tunables(mxge_softc_t *sc) 3874 { 3875 sc->intr_coal_delay = mxge_intr_coal_delay; 3876 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3877 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3878 3879 /* XXX */ 3880 if (mxge_ticks == 0) 3881 mxge_ticks = hz / 2; 3882 3883 sc->pause = mxge_flow_control; 3884 sc->use_rss = mxge_use_rss; 3885 3886 sc->throttle = mxge_throttle; 3887 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3888 sc->throttle = MXGE_MAX_THROTTLE; 3889 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3890 sc->throttle = MXGE_MIN_THROTTLE; 3891 } 3892 3893 static void 3894 mxge_free_slices(mxge_softc_t *sc) 3895 { 3896 struct mxge_slice_state *ss; 3897 int i; 3898 3899 if (sc->ss == NULL) 3900 return; 3901 3902 for (i = 0; i < sc->num_slices; i++) { 3903 ss = &sc->ss[i]; 3904 if (ss->fw_stats != NULL) { 3905 mxge_dma_free(&ss->fw_stats_dma); 3906 ss->fw_stats = NULL; 3907 } 3908 if (ss->rx_data.rx_done.entry != NULL) { 3909 mxge_dma_free(&ss->rx_done_dma); 3910 ss->rx_data.rx_done.entry = NULL; 3911 } 3912 } 3913 kfree(sc->ss, M_DEVBUF); 3914 sc->ss = NULL; 3915 } 3916 3917 static int 3918 mxge_alloc_slices(mxge_softc_t *sc) 3919 { 3920 mxge_cmd_t cmd; 3921 struct mxge_slice_state *ss; 3922 size_t bytes; 3923 int err, i, rx_ring_size; 3924 3925 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3926 if (err != 0) { 3927 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3928 return err; 3929 } 3930 rx_ring_size = cmd.data0; 3931 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3932 3933 bytes = sizeof(*sc->ss) * sc->num_slices; 3934 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3935 3936 for (i = 0; i < sc->num_slices; i++) { 3937 ss = &sc->ss[i]; 3938 3939 ss->sc = sc; 3940 3941 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3942 lwkt_serialize_init(&ss->tx.tx_serialize); 3943 ss->intr_rid = -1; 3944 3945 /* 3946 * Allocate per-slice rx interrupt queue 3947 * XXX assume 4bytes mcp_slot 3948 */ 3949 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3950 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3951 if (err != 0) { 3952 device_printf(sc->dev, 3953 "alloc %d slice rx_done failed\n", i); 3954 return err; 3955 } 3956 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3957 3958 /* 3959 * Allocate the per-slice firmware stats 3960 */ 3961 bytes = sizeof(*ss->fw_stats); 3962 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3963 sizeof(*ss->fw_stats), 64); 3964 if (err != 0) { 3965 device_printf(sc->dev, 3966 "alloc %d fw_stats failed\n", i); 3967 return err; 3968 } 3969 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3970 } 3971 return 0; 3972 } 3973 3974 static void 3975 mxge_slice_probe(mxge_softc_t *sc) 3976 { 3977 int status, max_intr_slots, max_slices, num_slices; 3978 int msix_cnt, msix_enable, i, multi_tx; 3979 mxge_cmd_t cmd; 3980 const char *old_fw; 3981 3982 sc->num_slices = 1; 3983 sc->num_tx_rings = 1; 3984 3985 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 3986 if (num_slices == 1) 3987 return; 3988 3989 if (ncpus2 == 1) 3990 return; 3991 3992 msix_enable = device_getenv_int(sc->dev, "msix.enable", 3993 mxge_msix_enable); 3994 if (!msix_enable) 3995 return; 3996 3997 msix_cnt = pci_msix_count(sc->dev); 3998 if (msix_cnt < 2) 3999 return; 4000 4001 /* 4002 * Round down MSI-X vector count to the nearest power of 2 4003 */ 4004 i = 0; 4005 while ((1 << (i + 1)) <= msix_cnt) 4006 ++i; 4007 msix_cnt = 1 << i; 4008 4009 /* 4010 * Now load the slice aware firmware see what it supports 4011 */ 4012 old_fw = sc->fw_name; 4013 if (old_fw == mxge_fw_aligned) 4014 sc->fw_name = mxge_fw_rss_aligned; 4015 else 4016 sc->fw_name = mxge_fw_rss_unaligned; 4017 status = mxge_load_firmware(sc, 0); 4018 if (status != 0) { 4019 device_printf(sc->dev, "Falling back to a single slice\n"); 4020 return; 4021 } 4022 4023 /* 4024 * Try to send a reset command to the card to see if it is alive 4025 */ 4026 memset(&cmd, 0, sizeof(cmd)); 4027 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4028 if (status != 0) { 4029 device_printf(sc->dev, "failed reset\n"); 4030 goto abort_with_fw; 4031 } 4032 4033 /* 4034 * Get rx ring size to calculate rx interrupt queue size 4035 */ 4036 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4037 if (status != 0) { 4038 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4039 goto abort_with_fw; 4040 } 4041 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4042 4043 /* 4044 * Tell it the size of the rx interrupt queue 4045 */ 4046 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4047 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4048 if (status != 0) { 4049 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4050 goto abort_with_fw; 4051 } 4052 4053 /* 4054 * Ask the maximum number of slices it supports 4055 */ 4056 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4057 if (status != 0) { 4058 device_printf(sc->dev, 4059 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4060 goto abort_with_fw; 4061 } 4062 max_slices = cmd.data0; 4063 4064 /* 4065 * Round down max slices count to the nearest power of 2 4066 */ 4067 i = 0; 4068 while ((1 << (i + 1)) <= max_slices) 4069 ++i; 4070 max_slices = 1 << i; 4071 4072 if (max_slices > msix_cnt) 4073 max_slices = msix_cnt; 4074 4075 sc->num_slices = num_slices; 4076 sc->num_slices = if_ring_count2(sc->num_slices, max_slices); 4077 4078 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4079 if (multi_tx) 4080 sc->num_tx_rings = sc->num_slices; 4081 4082 if (bootverbose) { 4083 device_printf(sc->dev, "using %d slices, max %d\n", 4084 sc->num_slices, max_slices); 4085 } 4086 4087 if (sc->num_slices == 1) 4088 goto abort_with_fw; 4089 return; 4090 4091 abort_with_fw: 4092 sc->fw_name = old_fw; 4093 mxge_load_firmware(sc, 0); 4094 } 4095 4096 static void 4097 mxge_setup_serialize(struct mxge_softc *sc) 4098 { 4099 int i = 0, slice; 4100 4101 /* Main + rx + tx */ 4102 sc->nserialize = (2 * sc->num_slices) + 1; 4103 sc->serializes = 4104 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4105 M_DEVBUF, M_WAITOK | M_ZERO); 4106 4107 /* 4108 * Setup serializes 4109 * 4110 * NOTE: Order is critical 4111 */ 4112 4113 KKASSERT(i < sc->nserialize); 4114 sc->serializes[i++] = &sc->main_serialize; 4115 4116 for (slice = 0; slice < sc->num_slices; ++slice) { 4117 KKASSERT(i < sc->nserialize); 4118 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4119 } 4120 4121 for (slice = 0; slice < sc->num_slices; ++slice) { 4122 KKASSERT(i < sc->nserialize); 4123 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4124 } 4125 4126 KKASSERT(i == sc->nserialize); 4127 } 4128 4129 static void 4130 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4131 { 4132 struct mxge_softc *sc = ifp->if_softc; 4133 4134 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4135 } 4136 4137 static void 4138 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4139 { 4140 struct mxge_softc *sc = ifp->if_softc; 4141 4142 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4143 } 4144 4145 static int 4146 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4147 { 4148 struct mxge_softc *sc = ifp->if_softc; 4149 4150 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4151 } 4152 4153 #ifdef INVARIANTS 4154 4155 static void 4156 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4157 boolean_t serialized) 4158 { 4159 struct mxge_softc *sc = ifp->if_softc; 4160 4161 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4162 slz, serialized); 4163 } 4164 4165 #endif /* INVARIANTS */ 4166 4167 #ifdef IFPOLL_ENABLE 4168 4169 static void 4170 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4171 { 4172 struct mxge_slice_state *ss = xss; 4173 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4174 4175 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4176 4177 if (rx_done->entry[rx_done->idx].length != 0) { 4178 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4179 } else { 4180 /* 4181 * XXX 4182 * This register writting obviously has cost, 4183 * however, if we don't hand back the rx token, 4184 * the upcoming packets may suffer rediculously 4185 * large delay, as observed on 8AL-C using ping(8). 4186 */ 4187 *ss->irq_claim = be32toh(3); 4188 } 4189 } 4190 4191 static void 4192 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4193 { 4194 struct mxge_softc *sc = ifp->if_softc; 4195 int i; 4196 4197 if (info == NULL) 4198 return; 4199 4200 /* 4201 * Only poll rx; polling tx and status don't seem to work 4202 */ 4203 for (i = 0; i < sc->num_slices; ++i) { 4204 struct mxge_slice_state *ss = &sc->ss[i]; 4205 int idx = ss->intr_cpuid; 4206 4207 KKASSERT(idx < ncpus2); 4208 info->ifpi_rx[idx].poll_func = mxge_npoll_rx; 4209 info->ifpi_rx[idx].arg = ss; 4210 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize; 4211 } 4212 } 4213 4214 #endif /* IFPOLL_ENABLE */ 4215 4216 static int 4217 mxge_attach(device_t dev) 4218 { 4219 mxge_softc_t *sc = device_get_softc(dev); 4220 struct ifnet *ifp = &sc->arpcom.ac_if; 4221 int err, rid, i; 4222 4223 /* 4224 * Avoid rewriting half the lines in this file to use 4225 * &sc->arpcom.ac_if instead 4226 */ 4227 sc->ifp = ifp; 4228 sc->dev = dev; 4229 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4230 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); 4231 4232 lwkt_serialize_init(&sc->main_serialize); 4233 4234 mxge_fetch_tunables(sc); 4235 4236 err = bus_dma_tag_create(NULL, /* parent */ 4237 1, /* alignment */ 4238 0, /* boundary */ 4239 BUS_SPACE_MAXADDR, /* low */ 4240 BUS_SPACE_MAXADDR, /* high */ 4241 NULL, NULL, /* filter */ 4242 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4243 0, /* num segs */ 4244 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4245 0, /* flags */ 4246 &sc->parent_dmat); /* tag */ 4247 if (err != 0) { 4248 device_printf(dev, "Err %d allocating parent dmat\n", err); 4249 goto failed; 4250 } 4251 4252 callout_init_mp(&sc->co_hdl); 4253 4254 mxge_setup_cfg_space(sc); 4255 4256 /* 4257 * Map the board into the kernel 4258 */ 4259 rid = PCIR_BARS; 4260 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4261 &rid, RF_ACTIVE); 4262 if (sc->mem_res == NULL) { 4263 device_printf(dev, "could not map memory\n"); 4264 err = ENXIO; 4265 goto failed; 4266 } 4267 4268 sc->sram = rman_get_virtual(sc->mem_res); 4269 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4270 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4271 device_printf(dev, "impossible memory region size %ld\n", 4272 rman_get_size(sc->mem_res)); 4273 err = ENXIO; 4274 goto failed; 4275 } 4276 4277 /* 4278 * Make NULL terminated copy of the EEPROM strings section of 4279 * lanai SRAM 4280 */ 4281 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4282 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4283 rman_get_bushandle(sc->mem_res), 4284 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4285 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4286 err = mxge_parse_strings(sc); 4287 if (err != 0) { 4288 device_printf(dev, "parse EEPROM string failed\n"); 4289 goto failed; 4290 } 4291 4292 /* 4293 * Enable write combining for efficient use of PCIe bus 4294 */ 4295 mxge_enable_wc(sc); 4296 4297 /* 4298 * Allocate the out of band DMA memory 4299 */ 4300 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4301 if (err != 0) { 4302 device_printf(dev, "alloc cmd DMA buf failed\n"); 4303 goto failed; 4304 } 4305 sc->cmd = sc->cmd_dma.dmem_addr; 4306 4307 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4308 if (err != 0) { 4309 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4310 goto failed; 4311 } 4312 4313 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4314 if (err != 0) { 4315 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4316 goto failed; 4317 } 4318 4319 /* Select & load the firmware */ 4320 err = mxge_select_firmware(sc); 4321 if (err != 0) { 4322 device_printf(dev, "select firmware failed\n"); 4323 goto failed; 4324 } 4325 4326 mxge_slice_probe(sc); 4327 err = mxge_alloc_slices(sc); 4328 if (err != 0) { 4329 device_printf(dev, "alloc slices failed\n"); 4330 goto failed; 4331 } 4332 4333 err = mxge_alloc_intr(sc); 4334 if (err != 0) { 4335 device_printf(dev, "alloc intr failed\n"); 4336 goto failed; 4337 } 4338 4339 /* Setup serializes */ 4340 mxge_setup_serialize(sc); 4341 4342 err = mxge_reset(sc, 0); 4343 if (err != 0) { 4344 device_printf(dev, "reset failed\n"); 4345 goto failed; 4346 } 4347 4348 err = mxge_alloc_rings(sc); 4349 if (err != 0) { 4350 device_printf(dev, "failed to allocate rings\n"); 4351 goto failed; 4352 } 4353 4354 ifp->if_baudrate = IF_Gbps(10UL); 4355 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4356 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4357 4358 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4359 #if 0 4360 /* Well, its software, sigh */ 4361 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4362 #endif 4363 ifp->if_capenable = ifp->if_capabilities; 4364 4365 ifp->if_softc = sc; 4366 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4367 ifp->if_init = mxge_init; 4368 ifp->if_ioctl = mxge_ioctl; 4369 ifp->if_start = mxge_start; 4370 #ifdef IFPOLL_ENABLE 4371 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4372 ifp->if_npoll = mxge_npoll; 4373 #endif 4374 ifp->if_serialize = mxge_serialize; 4375 ifp->if_deserialize = mxge_deserialize; 4376 ifp->if_tryserialize = mxge_tryserialize; 4377 #ifdef INVARIANTS 4378 ifp->if_serialize_assert = mxge_serialize_assert; 4379 #endif 4380 4381 /* Increase TSO burst length */ 4382 ifp->if_tsolen = (32 * ETHERMTU); 4383 4384 /* Initialise the ifmedia structure */ 4385 mxge_media_init(sc); 4386 mxge_media_probe(sc); 4387 4388 ether_ifattach(ifp, sc->mac_addr, NULL); 4389 4390 /* Setup TX rings and subqueues */ 4391 for (i = 0; i < sc->num_tx_rings; ++i) { 4392 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4393 struct mxge_slice_state *ss = &sc->ss[i]; 4394 4395 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4396 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4397 ifsq_set_priv(ifsq, &ss->tx); 4398 ss->tx.ifsq = ifsq; 4399 4400 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4401 } 4402 4403 /* 4404 * XXX 4405 * We are not ready to do "gather" jumbo frame, so 4406 * limit MTU to MJUMPAGESIZE 4407 */ 4408 sc->max_mtu = MJUMPAGESIZE - 4409 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4410 sc->dying = 0; 4411 4412 err = mxge_setup_intr(sc); 4413 if (err != 0) { 4414 device_printf(dev, "alloc and setup intr failed\n"); 4415 ether_ifdetach(ifp); 4416 goto failed; 4417 } 4418 4419 mxge_add_sysctls(sc); 4420 4421 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4422 sc->ss[0].intr_cpuid); 4423 return 0; 4424 4425 failed: 4426 mxge_detach(dev); 4427 return err; 4428 } 4429 4430 static int 4431 mxge_detach(device_t dev) 4432 { 4433 mxge_softc_t *sc = device_get_softc(dev); 4434 4435 if (device_is_attached(dev)) { 4436 struct ifnet *ifp = sc->ifp; 4437 4438 ifnet_serialize_all(ifp); 4439 4440 sc->dying = 1; 4441 if (ifp->if_flags & IFF_RUNNING) 4442 mxge_close(sc, 1); 4443 callout_stop(&sc->co_hdl); 4444 4445 mxge_teardown_intr(sc, sc->num_slices); 4446 4447 ifnet_deserialize_all(ifp); 4448 4449 callout_terminate(&sc->co_hdl); 4450 4451 ether_ifdetach(ifp); 4452 } 4453 ifmedia_removeall(&sc->media); 4454 4455 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4456 sc->sram != NULL) 4457 mxge_dummy_rdma(sc, 0); 4458 4459 mxge_free_intr(sc); 4460 mxge_rem_sysctls(sc); 4461 mxge_free_rings(sc); 4462 4463 /* MUST after sysctls, intr and rings are freed */ 4464 mxge_free_slices(sc); 4465 4466 if (sc->dmabench_dma.dmem_addr != NULL) 4467 mxge_dma_free(&sc->dmabench_dma); 4468 if (sc->zeropad_dma.dmem_addr != NULL) 4469 mxge_dma_free(&sc->zeropad_dma); 4470 if (sc->cmd_dma.dmem_addr != NULL) 4471 mxge_dma_free(&sc->cmd_dma); 4472 4473 if (sc->msix_table_res != NULL) { 4474 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4475 sc->msix_table_res); 4476 } 4477 if (sc->mem_res != NULL) { 4478 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4479 sc->mem_res); 4480 } 4481 4482 if (sc->parent_dmat != NULL) 4483 bus_dma_tag_destroy(sc->parent_dmat); 4484 4485 return 0; 4486 } 4487 4488 static int 4489 mxge_shutdown(device_t dev) 4490 { 4491 return 0; 4492 } 4493 4494 static void 4495 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4496 { 4497 int i; 4498 4499 KKASSERT(sc->num_slices > 1); 4500 4501 for (i = 0; i < sc->num_slices; ++i) { 4502 struct mxge_slice_state *ss = &sc->ss[i]; 4503 4504 if (ss->intr_res != NULL) { 4505 bus_release_resource(sc->dev, SYS_RES_IRQ, 4506 ss->intr_rid, ss->intr_res); 4507 } 4508 if (ss->intr_rid >= 0) 4509 pci_release_msix_vector(sc->dev, ss->intr_rid); 4510 } 4511 if (setup) 4512 pci_teardown_msix(sc->dev); 4513 } 4514 4515 static int 4516 mxge_alloc_msix(struct mxge_softc *sc) 4517 { 4518 struct mxge_slice_state *ss; 4519 int offset, rid, error, i; 4520 boolean_t setup = FALSE; 4521 4522 KKASSERT(sc->num_slices > 1); 4523 4524 if (sc->num_slices == ncpus2) { 4525 offset = 0; 4526 } else { 4527 int offset_def; 4528 4529 offset_def = (sc->num_slices * device_get_unit(sc->dev)) % 4530 ncpus2; 4531 4532 offset = device_getenv_int(sc->dev, "msix.offset", offset_def); 4533 if (offset >= ncpus2 || 4534 offset % sc->num_slices != 0) { 4535 device_printf(sc->dev, "invalid msix.offset %d, " 4536 "use %d\n", offset, offset_def); 4537 offset = offset_def; 4538 } 4539 } 4540 4541 ss = &sc->ss[0]; 4542 4543 ss->intr_serialize = &sc->main_serialize; 4544 ss->intr_func = mxge_msi; 4545 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4546 "%s comb", device_get_nameunit(sc->dev)); 4547 ss->intr_desc = ss->intr_desc0; 4548 ss->intr_cpuid = offset; 4549 4550 for (i = 1; i < sc->num_slices; ++i) { 4551 ss = &sc->ss[i]; 4552 4553 ss->intr_serialize = &ss->rx_data.rx_serialize; 4554 if (sc->num_tx_rings == 1) { 4555 ss->intr_func = mxge_msix_rx; 4556 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4557 "%s rx", device_get_nameunit(sc->dev)); 4558 } else { 4559 ss->intr_func = mxge_msix_rxtx; 4560 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4561 "%s rxtx", device_get_nameunit(sc->dev)); 4562 } 4563 ss->intr_desc = ss->intr_desc0; 4564 ss->intr_cpuid = offset + i; 4565 } 4566 4567 rid = PCIR_BAR(2); 4568 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4569 &rid, RF_ACTIVE); 4570 if (sc->msix_table_res == NULL) { 4571 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4572 return ENXIO; 4573 } 4574 4575 error = pci_setup_msix(sc->dev); 4576 if (error) { 4577 device_printf(sc->dev, "could not setup MSI-X\n"); 4578 goto back; 4579 } 4580 setup = TRUE; 4581 4582 for (i = 0; i < sc->num_slices; ++i) { 4583 ss = &sc->ss[i]; 4584 4585 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4586 ss->intr_cpuid); 4587 if (error) { 4588 device_printf(sc->dev, "could not alloc " 4589 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4590 goto back; 4591 } 4592 4593 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4594 &ss->intr_rid, RF_ACTIVE); 4595 if (ss->intr_res == NULL) { 4596 device_printf(sc->dev, "could not alloc " 4597 "MSI-X %d resource\n", i); 4598 error = ENXIO; 4599 goto back; 4600 } 4601 } 4602 4603 pci_enable_msix(sc->dev); 4604 sc->intr_type = PCI_INTR_TYPE_MSIX; 4605 back: 4606 if (error) 4607 mxge_free_msix(sc, setup); 4608 return error; 4609 } 4610 4611 static int 4612 mxge_alloc_intr(struct mxge_softc *sc) 4613 { 4614 struct mxge_slice_state *ss; 4615 u_int irq_flags; 4616 4617 if (sc->num_slices > 1) { 4618 int error; 4619 4620 error = mxge_alloc_msix(sc); 4621 if (error) 4622 return error; 4623 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4624 return 0; 4625 } 4626 4627 ss = &sc->ss[0]; 4628 4629 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4630 &ss->intr_rid, &irq_flags); 4631 4632 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4633 &ss->intr_rid, irq_flags); 4634 if (ss->intr_res == NULL) { 4635 device_printf(sc->dev, "could not alloc interrupt\n"); 4636 return ENXIO; 4637 } 4638 4639 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4640 ss->intr_func = mxge_legacy; 4641 else 4642 ss->intr_func = mxge_msi; 4643 ss->intr_serialize = &sc->main_serialize; 4644 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4645 4646 return 0; 4647 } 4648 4649 static int 4650 mxge_setup_intr(struct mxge_softc *sc) 4651 { 4652 int i; 4653 4654 for (i = 0; i < sc->num_slices; ++i) { 4655 struct mxge_slice_state *ss = &sc->ss[i]; 4656 int error; 4657 4658 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4659 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4660 ss->intr_serialize, ss->intr_desc); 4661 if (error) { 4662 device_printf(sc->dev, "can't setup %dth intr\n", i); 4663 mxge_teardown_intr(sc, i); 4664 return error; 4665 } 4666 } 4667 return 0; 4668 } 4669 4670 static void 4671 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4672 { 4673 int i; 4674 4675 if (sc->ss == NULL) 4676 return; 4677 4678 for (i = 0; i < cnt; ++i) { 4679 struct mxge_slice_state *ss = &sc->ss[i]; 4680 4681 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4682 } 4683 } 4684 4685 static void 4686 mxge_free_intr(struct mxge_softc *sc) 4687 { 4688 if (sc->ss == NULL) 4689 return; 4690 4691 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4692 struct mxge_slice_state *ss = &sc->ss[0]; 4693 4694 if (ss->intr_res != NULL) { 4695 bus_release_resource(sc->dev, SYS_RES_IRQ, 4696 ss->intr_rid, ss->intr_res); 4697 } 4698 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4699 pci_release_msi(sc->dev); 4700 } else { 4701 mxge_free_msix(sc, TRUE); 4702 } 4703 } 4704