1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_ifpoll.h" 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/in_cksum.h> 41 #include <sys/sockio.h> 42 #include <sys/mbuf.h> 43 #include <sys/malloc.h> 44 #include <sys/kernel.h> 45 #include <sys/module.h> 46 #include <sys/serialize.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 #include <net/if_poll.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/vlan/if_vlan_var.h> 62 #include <net/zlib.h> 63 #include <net/toeplitz.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/tcp.h> 69 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <bus/pci/pcireg.h> 74 #include <bus/pci/pcivar.h> 75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386__) || defined(__x86_64__) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/netif/mxge/mxge_mcp.h> 85 #include <dev/netif/mxge/mcp_gen_header.h> 86 #include <dev/netif/mxge/if_mxge_var.h> 87 88 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 89 #define MXGE_HWRSS_KEYLEN 16 90 91 /* Tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_ticks; 98 static int mxge_num_slices = 0; 99 static int mxge_always_promisc = 0; 100 static int mxge_throttle = 0; 101 static int mxge_msi_enable = 1; 102 static int mxge_msix_enable = 1; 103 static int mxge_multi_tx = 1; 104 /* 105 * Don't use RSS by default, its just too slow 106 */ 107 static int mxge_use_rss = 0; 108 109 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices); 115 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control); 116 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 117 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 118 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 119 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 120 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 121 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 122 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 123 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx); 124 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss); 125 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 126 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable); 127 128 static int mxge_probe(device_t dev); 129 static int mxge_attach(device_t dev); 130 static int mxge_detach(device_t dev); 131 static int mxge_shutdown(device_t dev); 132 133 static int mxge_alloc_intr(struct mxge_softc *sc); 134 static void mxge_free_intr(struct mxge_softc *sc); 135 static int mxge_setup_intr(struct mxge_softc *sc); 136 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt); 137 138 static device_method_t mxge_methods[] = { 139 /* Device interface */ 140 DEVMETHOD(device_probe, mxge_probe), 141 DEVMETHOD(device_attach, mxge_attach), 142 DEVMETHOD(device_detach, mxge_detach), 143 DEVMETHOD(device_shutdown, mxge_shutdown), 144 DEVMETHOD_END 145 }; 146 147 static driver_t mxge_driver = { 148 "mxge", 149 mxge_methods, 150 sizeof(mxge_softc_t), 151 }; 152 153 static devclass_t mxge_devclass; 154 155 /* Declare ourselves to be a child of the PCI bus.*/ 156 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 157 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 158 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 159 160 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 161 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 162 static void mxge_close(mxge_softc_t *sc, int down); 163 static int mxge_open(mxge_softc_t *sc); 164 static void mxge_tick(void *arg); 165 static void mxge_watchdog_reset(mxge_softc_t *sc); 166 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 167 168 static int 169 mxge_probe(device_t dev) 170 { 171 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 173 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 174 int rev = pci_get_revid(dev); 175 176 switch (rev) { 177 case MXGE_PCI_REV_Z8E: 178 device_set_desc(dev, "Myri10G-PCIE-8A"); 179 break; 180 case MXGE_PCI_REV_Z8ES: 181 device_set_desc(dev, "Myri10G-PCIE-8B"); 182 break; 183 default: 184 device_set_desc(dev, "Myri10G-PCIE-8??"); 185 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 186 break; 187 } 188 return 0; 189 } 190 return ENXIO; 191 } 192 193 static void 194 mxge_enable_wc(mxge_softc_t *sc) 195 { 196 #if defined(__i386__) || defined(__x86_64__) 197 vm_offset_t len; 198 199 sc->wc = 1; 200 len = rman_get_size(sc->mem_res); 201 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 202 PAT_WRITE_COMBINING); 203 #endif 204 } 205 206 static int 207 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 208 bus_size_t alignment) 209 { 210 bus_size_t boundary; 211 int err; 212 213 if (bytes > 4096 && alignment == 4096) 214 boundary = 0; 215 else 216 boundary = 4096; 217 218 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 219 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 220 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 221 if (err != 0) { 222 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 223 return err; 224 } 225 return 0; 226 } 227 228 static void 229 mxge_dma_free(bus_dmamem_t *dma) 230 { 231 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 232 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 233 bus_dma_tag_destroy(dma->dmem_tag); 234 } 235 236 /* 237 * The eeprom strings on the lanaiX have the format 238 * SN=x\0 239 * MAC=x:x:x:x:x:x\0 240 * PC=text\0 241 */ 242 static int 243 mxge_parse_strings(mxge_softc_t *sc) 244 { 245 const char *ptr; 246 int i, found_mac, found_sn2; 247 char *endptr; 248 249 ptr = sc->eeprom_strings; 250 found_mac = 0; 251 found_sn2 = 0; 252 while (*ptr != '\0') { 253 if (strncmp(ptr, "MAC=", 4) == 0) { 254 ptr += 4; 255 for (i = 0;;) { 256 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 257 if (endptr - ptr != 2) 258 goto abort; 259 ptr = endptr; 260 if (++i == 6) 261 break; 262 if (*ptr++ != ':') 263 goto abort; 264 } 265 found_mac = 1; 266 } else if (strncmp(ptr, "PC=", 3) == 0) { 267 ptr += 3; 268 strlcpy(sc->product_code_string, ptr, 269 sizeof(sc->product_code_string)); 270 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 271 ptr += 3; 272 strlcpy(sc->serial_number_string, ptr, 273 sizeof(sc->serial_number_string)); 274 } else if (strncmp(ptr, "SN2=", 4) == 0) { 275 /* SN2 takes precedence over SN */ 276 ptr += 4; 277 found_sn2 = 1; 278 strlcpy(sc->serial_number_string, ptr, 279 sizeof(sc->serial_number_string)); 280 } 281 while (*ptr++ != '\0') {} 282 } 283 284 if (found_mac) 285 return 0; 286 287 abort: 288 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 289 return ENXIO; 290 } 291 292 #if defined(__i386__) || defined(__x86_64__) 293 294 static void 295 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 296 { 297 uint32_t val; 298 unsigned long base, off; 299 char *va, *cfgptr; 300 device_t pdev, mcp55; 301 uint16_t vendor_id, device_id, word; 302 uintptr_t bus, slot, func, ivend, idev; 303 uint32_t *ptr32; 304 305 if (!mxge_nvidia_ecrc_enable) 306 return; 307 308 pdev = device_get_parent(device_get_parent(sc->dev)); 309 if (pdev == NULL) { 310 device_printf(sc->dev, "could not find parent?\n"); 311 return; 312 } 313 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 314 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 315 316 if (vendor_id != 0x10de) 317 return; 318 319 base = 0; 320 321 if (device_id == 0x005d) { 322 /* ck804, base address is magic */ 323 base = 0xe0000000UL; 324 } else if (device_id >= 0x0374 && device_id <= 0x378) { 325 /* mcp55, base address stored in chipset */ 326 mcp55 = pci_find_bsf(0, 0, 0); 327 if (mcp55 && 328 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 329 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 330 word = pci_read_config(mcp55, 0x90, 2); 331 base = ((unsigned long)word & 0x7ffeU) << 25; 332 } 333 } 334 if (!base) 335 return; 336 337 /* 338 * XXXX 339 * Test below is commented because it is believed that doing 340 * config read/write beyond 0xff will access the config space 341 * for the next larger function. Uncomment this and remove 342 * the hacky pmap_mapdev() way of accessing config space when 343 * DragonFly grows support for extended pcie config space access. 344 */ 345 #if 0 346 /* 347 * See if we can, by some miracle, access the extended 348 * config space 349 */ 350 val = pci_read_config(pdev, 0x178, 4); 351 if (val != 0xffffffff) { 352 val |= 0x40; 353 pci_write_config(pdev, 0x178, val, 4); 354 return; 355 } 356 #endif 357 /* 358 * Rather than using normal pci config space writes, we must 359 * map the Nvidia config space ourselves. This is because on 360 * opteron/nvidia class machine the 0xe000000 mapping is 361 * handled by the nvidia chipset, that means the internal PCI 362 * device (the on-chip northbridge), or the amd-8131 bridge 363 * and things behind them are not visible by this method. 364 */ 365 366 BUS_READ_IVAR(device_get_parent(pdev), pdev, 367 PCI_IVAR_BUS, &bus); 368 BUS_READ_IVAR(device_get_parent(pdev), pdev, 369 PCI_IVAR_SLOT, &slot); 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_FUNCTION, &func); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_VENDOR, &ivend); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_DEVICE, &idev); 376 377 off = base + 0x00100000UL * (unsigned long)bus + 378 0x00001000UL * (unsigned long)(func + 8 * slot); 379 380 /* map it into the kernel */ 381 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 382 if (va == NULL) { 383 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 384 return; 385 } 386 /* get a pointer to the config space mapped into the kernel */ 387 cfgptr = va + (off & PAGE_MASK); 388 389 /* make sure that we can really access it */ 390 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 391 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 392 if (!(vendor_id == ivend && device_id == idev)) { 393 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 394 vendor_id, device_id); 395 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 396 return; 397 } 398 399 ptr32 = (uint32_t*)(cfgptr + 0x178); 400 val = *ptr32; 401 402 if (val == 0xffffffff) { 403 device_printf(sc->dev, "extended mapping failed\n"); 404 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 405 return; 406 } 407 *ptr32 = val | 0x40; 408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 409 if (bootverbose) { 410 device_printf(sc->dev, "Enabled ECRC on upstream " 411 "Nvidia bridge at %d:%d:%d\n", 412 (int)bus, (int)slot, (int)func); 413 } 414 } 415 416 #else /* __i386__ || __x86_64__ */ 417 418 static void 419 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 420 { 421 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 422 } 423 424 #endif 425 426 static int 427 mxge_dma_test(mxge_softc_t *sc, int test_type) 428 { 429 mxge_cmd_t cmd; 430 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 431 int status; 432 uint32_t len; 433 const char *test = " "; 434 435 /* 436 * Run a small DMA test. 437 * The magic multipliers to the length tell the firmware 438 * to do DMA read, write, or read+write tests. The 439 * results are returned in cmd.data0. The upper 16 440 * bits of the return is the number of transfers completed. 441 * The lower 16 bits is the time in 0.5us ticks that the 442 * transfers took to complete. 443 */ 444 445 len = sc->tx_boundary; 446 447 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 448 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 449 cmd.data2 = len * 0x10000; 450 status = mxge_send_cmd(sc, test_type, &cmd); 451 if (status != 0) { 452 test = "read"; 453 goto abort; 454 } 455 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 456 457 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 458 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 459 cmd.data2 = len * 0x1; 460 status = mxge_send_cmd(sc, test_type, &cmd); 461 if (status != 0) { 462 test = "write"; 463 goto abort; 464 } 465 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 466 467 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 468 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 469 cmd.data2 = len * 0x10001; 470 status = mxge_send_cmd(sc, test_type, &cmd); 471 if (status != 0) { 472 test = "read/write"; 473 goto abort; 474 } 475 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 476 (cmd.data0 & 0xffff); 477 478 abort: 479 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 480 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 481 test, status); 482 } 483 return status; 484 } 485 486 /* 487 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 488 * when the PCI-E Completion packets are aligned on an 8-byte 489 * boundary. Some PCI-E chip sets always align Completion packets; on 490 * the ones that do not, the alignment can be enforced by enabling 491 * ECRC generation (if supported). 492 * 493 * When PCI-E Completion packets are not aligned, it is actually more 494 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 495 * 496 * If the driver can neither enable ECRC nor verify that it has 497 * already been enabled, then it must use a firmware image which works 498 * around unaligned completion packets (ethp_z8e.dat), and it should 499 * also ensure that it never gives the device a Read-DMA which is 500 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 501 * enabled, then the driver should use the aligned (eth_z8e.dat) 502 * firmware image, and set tx_boundary to 4KB. 503 */ 504 static int 505 mxge_firmware_probe(mxge_softc_t *sc) 506 { 507 device_t dev = sc->dev; 508 int reg, status; 509 uint16_t pectl; 510 511 sc->tx_boundary = 4096; 512 513 /* 514 * Verify the max read request size was set to 4KB 515 * before trying the test with 4KB. 516 */ 517 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 518 pectl = pci_read_config(dev, reg + 0x8, 2); 519 if ((pectl & (5 << 12)) != (5 << 12)) { 520 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 521 pectl); 522 sc->tx_boundary = 2048; 523 } 524 } 525 526 /* 527 * Load the optimized firmware (which assumes aligned PCIe 528 * completions) in order to see if it works on this host. 529 */ 530 sc->fw_name = mxge_fw_aligned; 531 status = mxge_load_firmware(sc, 1); 532 if (status != 0) 533 return status; 534 535 /* 536 * Enable ECRC if possible 537 */ 538 mxge_enable_nvidia_ecrc(sc); 539 540 /* 541 * Run a DMA test which watches for unaligned completions and 542 * aborts on the first one seen. Not required on Z8ES or newer. 543 */ 544 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 545 return 0; 546 547 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 548 if (status == 0) 549 return 0; /* keep the aligned firmware */ 550 551 if (status != E2BIG) 552 device_printf(dev, "DMA test failed: %d\n", status); 553 if (status == ENOSYS) { 554 device_printf(dev, "Falling back to ethp! " 555 "Please install up to date fw\n"); 556 } 557 return status; 558 } 559 560 static int 561 mxge_select_firmware(mxge_softc_t *sc) 562 { 563 int aligned = 0; 564 int force_firmware = mxge_force_firmware; 565 566 if (sc->throttle) 567 force_firmware = sc->throttle; 568 569 if (force_firmware != 0) { 570 if (force_firmware == 1) 571 aligned = 1; 572 else 573 aligned = 0; 574 if (bootverbose) { 575 device_printf(sc->dev, 576 "Assuming %s completions (forced)\n", 577 aligned ? "aligned" : "unaligned"); 578 } 579 goto abort; 580 } 581 582 /* 583 * If the PCIe link width is 4 or less, we can use the aligned 584 * firmware and skip any checks 585 */ 586 if (sc->link_width != 0 && sc->link_width <= 4) { 587 device_printf(sc->dev, "PCIe x%d Link, " 588 "expect reduced performance\n", sc->link_width); 589 aligned = 1; 590 goto abort; 591 } 592 593 if (mxge_firmware_probe(sc) == 0) 594 return 0; 595 596 abort: 597 if (aligned) { 598 sc->fw_name = mxge_fw_aligned; 599 sc->tx_boundary = 4096; 600 } else { 601 sc->fw_name = mxge_fw_unaligned; 602 sc->tx_boundary = 2048; 603 } 604 return mxge_load_firmware(sc, 0); 605 } 606 607 static int 608 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 609 { 610 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 611 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 612 be32toh(hdr->mcp_type)); 613 return EIO; 614 } 615 616 /* Save firmware version for sysctl */ 617 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 618 if (bootverbose) 619 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 620 621 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 622 &sc->fw_ver_minor, &sc->fw_ver_tiny); 623 624 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 625 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 626 if_printf(sc->ifp, "Found firmware version %s\n", 627 sc->fw_version); 628 if_printf(sc->ifp, "Driver needs %d.%d\n", 629 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 630 return EINVAL; 631 } 632 return 0; 633 } 634 635 static void * 636 z_alloc(void *nil, u_int items, u_int size) 637 { 638 return kmalloc(items * size, M_TEMP, M_WAITOK); 639 } 640 641 static void 642 z_free(void *nil, void *ptr) 643 { 644 kfree(ptr, M_TEMP); 645 } 646 647 static int 648 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 649 { 650 z_stream zs; 651 char *inflate_buffer; 652 const struct firmware *fw; 653 const mcp_gen_header_t *hdr; 654 unsigned hdr_offset; 655 int status; 656 unsigned int i; 657 char dummy; 658 size_t fw_len; 659 660 fw = firmware_get(sc->fw_name); 661 if (fw == NULL) { 662 if_printf(sc->ifp, "Could not find firmware image %s\n", 663 sc->fw_name); 664 return ENOENT; 665 } 666 667 /* Setup zlib and decompress f/w */ 668 bzero(&zs, sizeof(zs)); 669 zs.zalloc = z_alloc; 670 zs.zfree = z_free; 671 status = inflateInit(&zs); 672 if (status != Z_OK) { 673 status = EIO; 674 goto abort_with_fw; 675 } 676 677 /* 678 * The uncompressed size is stored as the firmware version, 679 * which would otherwise go unused 680 */ 681 fw_len = (size_t)fw->version; 682 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 683 zs.avail_in = fw->datasize; 684 zs.next_in = __DECONST(char *, fw->data); 685 zs.avail_out = fw_len; 686 zs.next_out = inflate_buffer; 687 status = inflate(&zs, Z_FINISH); 688 if (status != Z_STREAM_END) { 689 if_printf(sc->ifp, "zlib %d\n", status); 690 status = EIO; 691 goto abort_with_buffer; 692 } 693 694 /* Check id */ 695 hdr_offset = 696 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 697 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 698 if_printf(sc->ifp, "Bad firmware file"); 699 status = EIO; 700 goto abort_with_buffer; 701 } 702 hdr = (const void*)(inflate_buffer + hdr_offset); 703 704 status = mxge_validate_firmware(sc, hdr); 705 if (status != 0) 706 goto abort_with_buffer; 707 708 /* Copy the inflated firmware to NIC SRAM. */ 709 for (i = 0; i < fw_len; i += 256) { 710 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 711 min(256U, (unsigned)(fw_len - i))); 712 wmb(); 713 dummy = *sc->sram; 714 wmb(); 715 } 716 717 *limit = fw_len; 718 status = 0; 719 abort_with_buffer: 720 kfree(inflate_buffer, M_TEMP); 721 inflateEnd(&zs); 722 abort_with_fw: 723 firmware_put(fw, FIRMWARE_UNLOAD); 724 return status; 725 } 726 727 /* 728 * Enable or disable periodic RDMAs from the host to make certain 729 * chipsets resend dropped PCIe messages 730 */ 731 static void 732 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 733 { 734 char buf_bytes[72]; 735 volatile uint32_t *confirm; 736 volatile char *submit; 737 uint32_t *buf, dma_low, dma_high; 738 int i; 739 740 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 741 742 /* Clear confirmation addr */ 743 confirm = (volatile uint32_t *)sc->cmd; 744 *confirm = 0; 745 wmb(); 746 747 /* 748 * Send an rdma command to the PCIe engine, and wait for the 749 * response in the confirmation address. The firmware should 750 * write a -1 there to indicate it is alive and well 751 */ 752 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 753 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 754 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 755 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 756 buf[2] = htobe32(0xffffffff); /* confirm data */ 757 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 758 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 759 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 760 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 761 buf[5] = htobe32(enable); /* enable? */ 762 763 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 764 765 mxge_pio_copy(submit, buf, 64); 766 wmb(); 767 DELAY(1000); 768 wmb(); 769 i = 0; 770 while (*confirm != 0xffffffff && i < 20) { 771 DELAY(1000); 772 i++; 773 } 774 if (*confirm != 0xffffffff) { 775 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 776 (enable ? "enable" : "disable"), confirm, *confirm); 777 } 778 } 779 780 static int 781 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 782 { 783 mcp_cmd_t *buf; 784 char buf_bytes[sizeof(*buf) + 8]; 785 volatile mcp_cmd_response_t *response = sc->cmd; 786 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 787 uint32_t dma_low, dma_high; 788 int err, sleep_total = 0; 789 790 /* Ensure buf is aligned to 8 bytes */ 791 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 792 793 buf->data0 = htobe32(data->data0); 794 buf->data1 = htobe32(data->data1); 795 buf->data2 = htobe32(data->data2); 796 buf->cmd = htobe32(cmd); 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 799 800 buf->response_addr.low = htobe32(dma_low); 801 buf->response_addr.high = htobe32(dma_high); 802 803 response->result = 0xffffffff; 804 wmb(); 805 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 806 807 /* 808 * Wait up to 20ms 809 */ 810 err = EAGAIN; 811 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 812 wmb(); 813 switch (be32toh(response->result)) { 814 case 0: 815 data->data0 = be32toh(response->data); 816 err = 0; 817 break; 818 case 0xffffffff: 819 DELAY(1000); 820 break; 821 case MXGEFW_CMD_UNKNOWN: 822 err = ENOSYS; 823 break; 824 case MXGEFW_CMD_ERROR_UNALIGNED: 825 err = E2BIG; 826 break; 827 case MXGEFW_CMD_ERROR_BUSY: 828 err = EBUSY; 829 break; 830 case MXGEFW_CMD_ERROR_I2C_ABSENT: 831 err = ENXIO; 832 break; 833 default: 834 if_printf(sc->ifp, "command %d failed, result = %d\n", 835 cmd, be32toh(response->result)); 836 err = ENXIO; 837 break; 838 } 839 if (err != EAGAIN) 840 break; 841 } 842 if (err == EAGAIN) { 843 if_printf(sc->ifp, "command %d timed out result = %d\n", 844 cmd, be32toh(response->result)); 845 } 846 return err; 847 } 848 849 static int 850 mxge_adopt_running_firmware(mxge_softc_t *sc) 851 { 852 struct mcp_gen_header *hdr; 853 const size_t bytes = sizeof(struct mcp_gen_header); 854 size_t hdr_offset; 855 int status; 856 857 /* 858 * Find running firmware header 859 */ 860 hdr_offset = 861 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 862 863 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 864 if_printf(sc->ifp, "Running firmware has bad header offset " 865 "(%zu)\n", hdr_offset); 866 return EIO; 867 } 868 869 /* 870 * Copy header of running firmware from SRAM to host memory to 871 * validate firmware 872 */ 873 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 874 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 875 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 876 status = mxge_validate_firmware(sc, hdr); 877 kfree(hdr, M_DEVBUF); 878 879 /* 880 * Check to see if adopted firmware has bug where adopting 881 * it will cause broadcasts to be filtered unless the NIC 882 * is kept in ALLMULTI mode 883 */ 884 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 885 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 886 sc->adopted_rx_filter_bug = 1; 887 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 888 "working around rx filter bug\n", 889 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 890 } 891 892 return status; 893 } 894 895 static int 896 mxge_load_firmware(mxge_softc_t *sc, int adopt) 897 { 898 volatile uint32_t *confirm; 899 volatile char *submit; 900 char buf_bytes[72]; 901 uint32_t *buf, size, dma_low, dma_high; 902 int status, i; 903 904 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 905 906 size = sc->sram_size; 907 status = mxge_load_firmware_helper(sc, &size); 908 if (status) { 909 if (!adopt) 910 return status; 911 912 /* 913 * Try to use the currently running firmware, if 914 * it is new enough 915 */ 916 status = mxge_adopt_running_firmware(sc); 917 if (status) { 918 if_printf(sc->ifp, 919 "failed to adopt running firmware\n"); 920 return status; 921 } 922 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 923 924 if (sc->tx_boundary == 4096) { 925 if_printf(sc->ifp, 926 "Using firmware currently running on NIC. " 927 "For optimal\n"); 928 if_printf(sc->ifp, "performance consider loading " 929 "optimized firmware\n"); 930 } 931 sc->fw_name = mxge_fw_unaligned; 932 sc->tx_boundary = 2048; 933 return 0; 934 } 935 936 /* Clear confirmation addr */ 937 confirm = (volatile uint32_t *)sc->cmd; 938 *confirm = 0; 939 wmb(); 940 941 /* 942 * Send a reload command to the bootstrap MCP, and wait for the 943 * response in the confirmation address. The firmware should 944 * write a -1 there to indicate it is alive and well 945 */ 946 947 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 948 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 949 950 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 951 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 952 buf[2] = htobe32(0xffffffff); /* confirm data */ 953 954 /* 955 * FIX: All newest firmware should un-protect the bottom of 956 * the sram before handoff. However, the very first interfaces 957 * do not. Therefore the handoff copy must skip the first 8 bytes 958 */ 959 /* where the code starts*/ 960 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 961 buf[4] = htobe32(size - 8); /* length of code */ 962 buf[5] = htobe32(8); /* where to copy to */ 963 buf[6] = htobe32(0); /* where to jump to */ 964 965 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 966 mxge_pio_copy(submit, buf, 64); 967 wmb(); 968 DELAY(1000); 969 wmb(); 970 i = 0; 971 while (*confirm != 0xffffffff && i < 20) { 972 DELAY(1000*10); 973 i++; 974 } 975 if (*confirm != 0xffffffff) { 976 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 977 confirm, *confirm); 978 return ENXIO; 979 } 980 return 0; 981 } 982 983 static int 984 mxge_update_mac_address(mxge_softc_t *sc) 985 { 986 mxge_cmd_t cmd; 987 uint8_t *addr = sc->mac_addr; 988 989 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 990 (addr[2] << 8) | addr[3]; 991 cmd.data1 = (addr[4] << 8) | (addr[5]); 992 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 993 } 994 995 static int 996 mxge_change_pause(mxge_softc_t *sc, int pause) 997 { 998 mxge_cmd_t cmd; 999 int status; 1000 1001 if (pause) 1002 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 1003 else 1004 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 1005 if (status) { 1006 if_printf(sc->ifp, "Failed to set flow control mode\n"); 1007 return ENXIO; 1008 } 1009 sc->pause = pause; 1010 return 0; 1011 } 1012 1013 static void 1014 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1015 { 1016 mxge_cmd_t cmd; 1017 int status; 1018 1019 if (mxge_always_promisc) 1020 promisc = 1; 1021 1022 if (promisc) 1023 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1024 else 1025 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1026 if (status) 1027 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1028 } 1029 1030 static void 1031 mxge_set_multicast_list(mxge_softc_t *sc) 1032 { 1033 mxge_cmd_t cmd; 1034 struct ifmultiaddr *ifma; 1035 struct ifnet *ifp = sc->ifp; 1036 int err; 1037 1038 /* This firmware is known to not support multicast */ 1039 if (!sc->fw_multicast_support) 1040 return; 1041 1042 /* Disable multicast filtering while we play with the lists*/ 1043 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1044 if (err != 0) { 1045 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1046 "error status: %d\n", err); 1047 return; 1048 } 1049 1050 if (sc->adopted_rx_filter_bug) 1051 return; 1052 1053 if (ifp->if_flags & IFF_ALLMULTI) { 1054 /* Request to disable multicast filtering, so quit here */ 1055 return; 1056 } 1057 1058 /* Flush all the filters */ 1059 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1060 if (err != 0) { 1061 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1062 "error status: %d\n", err); 1063 return; 1064 } 1065 1066 /* 1067 * Walk the multicast list, and add each address 1068 */ 1069 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1070 if (ifma->ifma_addr->sa_family != AF_LINK) 1071 continue; 1072 1073 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1074 &cmd.data0, 4); 1075 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1076 &cmd.data1, 2); 1077 cmd.data0 = htonl(cmd.data0); 1078 cmd.data1 = htonl(cmd.data1); 1079 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1080 if (err != 0) { 1081 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1082 "error status: %d\n", err); 1083 /* Abort, leaving multicast filtering off */ 1084 return; 1085 } 1086 } 1087 1088 /* Enable multicast filtering */ 1089 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1090 if (err != 0) { 1091 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1092 "error status: %d\n", err); 1093 } 1094 } 1095 1096 #if 0 1097 static int 1098 mxge_max_mtu(mxge_softc_t *sc) 1099 { 1100 mxge_cmd_t cmd; 1101 int status; 1102 1103 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1104 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1105 1106 /* try to set nbufs to see if it we can 1107 use virtually contiguous jumbos */ 1108 cmd.data0 = 0; 1109 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1110 &cmd); 1111 if (status == 0) 1112 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1113 1114 /* otherwise, we're limited to MJUMPAGESIZE */ 1115 return MJUMPAGESIZE - MXGEFW_PAD; 1116 } 1117 #endif 1118 1119 static int 1120 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1121 { 1122 struct mxge_slice_state *ss; 1123 mxge_rx_done_t *rx_done; 1124 volatile uint32_t *irq_claim; 1125 mxge_cmd_t cmd; 1126 int slice, status, rx_intr_size; 1127 1128 /* 1129 * Try to send a reset command to the card to see if it 1130 * is alive 1131 */ 1132 memset(&cmd, 0, sizeof (cmd)); 1133 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1134 if (status != 0) { 1135 if_printf(sc->ifp, "failed reset\n"); 1136 return ENXIO; 1137 } 1138 1139 mxge_dummy_rdma(sc, 1); 1140 1141 /* 1142 * Set the intrq size 1143 * XXX assume 4byte mcp_slot 1144 */ 1145 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1146 cmd.data0 = rx_intr_size; 1147 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1148 1149 /* 1150 * Even though we already know how many slices are supported 1151 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1152 * has magic side effects, and must be called after a reset. 1153 * It must be called prior to calling any RSS related cmds, 1154 * including assigning an interrupt queue for anything but 1155 * slice 0. It must also be called *after* 1156 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1157 * the firmware to compute offsets. 1158 */ 1159 if (sc->num_slices > 1) { 1160 /* Ask the maximum number of slices it supports */ 1161 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1162 if (status != 0) { 1163 if_printf(sc->ifp, "failed to get number of slices\n"); 1164 return status; 1165 } 1166 1167 /* 1168 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1169 * to setting up the interrupt queue DMA 1170 */ 1171 cmd.data0 = sc->num_slices; 1172 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1173 if (sc->num_tx_rings > 1) 1174 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1175 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1176 if (status != 0) { 1177 if_printf(sc->ifp, "failed to set number of slices\n"); 1178 return status; 1179 } 1180 } 1181 1182 if (interrupts_setup) { 1183 /* Now exchange information about interrupts */ 1184 for (slice = 0; slice < sc->num_slices; slice++) { 1185 ss = &sc->ss[slice]; 1186 1187 rx_done = &ss->rx_data.rx_done; 1188 memset(rx_done->entry, 0, rx_intr_size); 1189 1190 cmd.data0 = 1191 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1192 cmd.data1 = 1193 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1194 cmd.data2 = slice; 1195 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1196 &cmd); 1197 } 1198 } 1199 1200 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1201 &cmd); 1202 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1203 1204 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1205 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1206 1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1208 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1209 1210 if (status != 0) { 1211 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1212 return status; 1213 } 1214 1215 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1216 1217 /* Run a DMA benchmark */ 1218 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1219 1220 for (slice = 0; slice < sc->num_slices; slice++) { 1221 ss = &sc->ss[slice]; 1222 1223 ss->irq_claim = irq_claim + (2 * slice); 1224 1225 /* Reset mcp/driver shared state back to 0 */ 1226 ss->rx_data.rx_done.idx = 0; 1227 ss->tx.req = 0; 1228 ss->tx.done = 0; 1229 ss->tx.pkt_done = 0; 1230 ss->tx.queue_active = 0; 1231 ss->tx.activate = 0; 1232 ss->tx.deactivate = 0; 1233 ss->rx_data.rx_big.cnt = 0; 1234 ss->rx_data.rx_small.cnt = 0; 1235 if (ss->fw_stats != NULL) 1236 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1237 } 1238 sc->rdma_tags_available = 15; 1239 1240 status = mxge_update_mac_address(sc); 1241 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1242 mxge_change_pause(sc, sc->pause); 1243 mxge_set_multicast_list(sc); 1244 1245 if (sc->throttle) { 1246 cmd.data0 = sc->throttle; 1247 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1248 if_printf(sc->ifp, "can't enable throttle\n"); 1249 } 1250 return status; 1251 } 1252 1253 static int 1254 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1255 { 1256 mxge_cmd_t cmd; 1257 mxge_softc_t *sc; 1258 int err; 1259 unsigned int throttle; 1260 1261 sc = arg1; 1262 throttle = sc->throttle; 1263 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1264 if (err != 0) 1265 return err; 1266 1267 if (throttle == sc->throttle) 1268 return 0; 1269 1270 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1271 return EINVAL; 1272 1273 ifnet_serialize_all(sc->ifp); 1274 1275 cmd.data0 = throttle; 1276 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1277 if (err == 0) 1278 sc->throttle = throttle; 1279 1280 ifnet_deserialize_all(sc->ifp); 1281 return err; 1282 } 1283 1284 static int 1285 mxge_change_use_rss(SYSCTL_HANDLER_ARGS) 1286 { 1287 mxge_softc_t *sc; 1288 int err, use_rss; 1289 1290 sc = arg1; 1291 use_rss = sc->use_rss; 1292 err = sysctl_handle_int(oidp, &use_rss, arg2, req); 1293 if (err != 0) 1294 return err; 1295 1296 if (use_rss == sc->use_rss) 1297 return 0; 1298 1299 ifnet_serialize_all(sc->ifp); 1300 1301 sc->use_rss = use_rss; 1302 if (sc->ifp->if_flags & IFF_RUNNING) { 1303 mxge_close(sc, 0); 1304 mxge_open(sc); 1305 } 1306 1307 ifnet_deserialize_all(sc->ifp); 1308 return err; 1309 } 1310 1311 static int 1312 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1313 { 1314 mxge_softc_t *sc; 1315 unsigned int intr_coal_delay; 1316 int err; 1317 1318 sc = arg1; 1319 intr_coal_delay = sc->intr_coal_delay; 1320 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1321 if (err != 0) 1322 return err; 1323 1324 if (intr_coal_delay == sc->intr_coal_delay) 1325 return 0; 1326 1327 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1328 return EINVAL; 1329 1330 ifnet_serialize_all(sc->ifp); 1331 1332 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1333 sc->intr_coal_delay = intr_coal_delay; 1334 1335 ifnet_deserialize_all(sc->ifp); 1336 return err; 1337 } 1338 1339 static int 1340 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1341 { 1342 mxge_softc_t *sc; 1343 unsigned int enabled; 1344 int err; 1345 1346 sc = arg1; 1347 enabled = sc->pause; 1348 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1349 if (err != 0) 1350 return err; 1351 1352 if (enabled == sc->pause) 1353 return 0; 1354 1355 ifnet_serialize_all(sc->ifp); 1356 err = mxge_change_pause(sc, enabled); 1357 ifnet_deserialize_all(sc->ifp); 1358 1359 return err; 1360 } 1361 1362 static int 1363 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1364 { 1365 int err; 1366 1367 if (arg1 == NULL) 1368 return EFAULT; 1369 arg2 = be32toh(*(int *)arg1); 1370 arg1 = NULL; 1371 err = sysctl_handle_int(oidp, arg1, arg2, req); 1372 1373 return err; 1374 } 1375 1376 static void 1377 mxge_rem_sysctls(mxge_softc_t *sc) 1378 { 1379 if (sc->ss != NULL) { 1380 struct mxge_slice_state *ss; 1381 int slice; 1382 1383 for (slice = 0; slice < sc->num_slices; slice++) { 1384 ss = &sc->ss[slice]; 1385 if (ss->sysctl_tree != NULL) { 1386 sysctl_ctx_free(&ss->sysctl_ctx); 1387 ss->sysctl_tree = NULL; 1388 } 1389 } 1390 } 1391 1392 if (sc->slice_sysctl_tree != NULL) { 1393 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1394 sc->slice_sysctl_tree = NULL; 1395 } 1396 } 1397 1398 static void 1399 mxge_add_sysctls(mxge_softc_t *sc) 1400 { 1401 struct sysctl_ctx_list *ctx; 1402 struct sysctl_oid_list *children; 1403 mcp_irq_data_t *fw; 1404 struct mxge_slice_state *ss; 1405 int slice; 1406 char slice_num[8]; 1407 1408 ctx = device_get_sysctl_ctx(sc->dev); 1409 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1410 fw = sc->ss[0].fw_stats; 1411 1412 /* 1413 * Random information 1414 */ 1415 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1416 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1417 1418 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1419 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1420 1421 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1422 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1423 1424 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1425 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1426 1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1428 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1429 1430 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1431 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1432 1433 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1434 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1435 1436 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1437 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1438 1439 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1440 CTLFLAG_RD, &sc->read_write_dma, 0, 1441 "DMA concurrent Read/Write speed in MB/s"); 1442 1443 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1444 CTLFLAG_RD, &sc->watchdog_resets, 0, 1445 "Number of times NIC was reset"); 1446 1447 /* 1448 * Performance related tunables 1449 */ 1450 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1451 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1452 "Interrupt coalescing delay in usecs"); 1453 1454 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1455 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1456 "Transmit throttling"); 1457 1458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", 1459 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", 1460 "Interrupt coalescing delay in usecs"); 1461 1462 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss", 1463 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I", 1464 "Use RSS"); 1465 1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1467 CTLFLAG_RW, &mxge_deassert_wait, 0, 1468 "Wait for IRQ line to go low in ihandler"); 1469 1470 /* 1471 * Stats block from firmware is in network byte order. 1472 * Need to swap it 1473 */ 1474 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1475 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1476 mxge_handle_be32, "I", "link up"); 1477 1478 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1479 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1480 mxge_handle_be32, "I", "rdma_tags_available"); 1481 1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1483 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1484 mxge_handle_be32, "I", "dropped_bad_crc32"); 1485 1486 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1487 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1488 mxge_handle_be32, "I", "dropped_bad_phy"); 1489 1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1491 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1492 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1493 1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1495 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1496 mxge_handle_be32, "I", "dropped_link_overflow"); 1497 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1499 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1500 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1501 1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1503 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1504 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1505 1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1507 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1508 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1509 1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1511 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1512 mxge_handle_be32, "I", "dropped_overrun"); 1513 1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1515 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1516 mxge_handle_be32, "I", "dropped_pause"); 1517 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1519 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1520 mxge_handle_be32, "I", "dropped_runt"); 1521 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1523 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1524 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1525 1526 /* add counters exported for debugging from all slices */ 1527 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1528 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1529 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1530 if (sc->slice_sysctl_tree == NULL) { 1531 device_printf(sc->dev, "can't add slice sysctl node\n"); 1532 return; 1533 } 1534 1535 for (slice = 0; slice < sc->num_slices; slice++) { 1536 ss = &sc->ss[slice]; 1537 sysctl_ctx_init(&ss->sysctl_ctx); 1538 ctx = &ss->sysctl_ctx; 1539 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1540 ksprintf(slice_num, "%d", slice); 1541 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1542 slice_num, CTLFLAG_RD, 0, ""); 1543 if (ss->sysctl_tree == NULL) { 1544 device_printf(sc->dev, 1545 "can't add %d slice sysctl node\n", slice); 1546 return; /* XXX continue? */ 1547 } 1548 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1549 1550 /* 1551 * XXX change to ULONG 1552 */ 1553 1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1555 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1556 1557 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1558 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1559 1560 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1561 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1562 1563 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1564 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1565 1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1567 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1568 1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1570 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1571 1572 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1573 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1574 1575 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1576 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1577 } 1578 } 1579 1580 /* 1581 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1582 * backwards one at a time and handle ring wraps 1583 */ 1584 static __inline void 1585 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1586 mcp_kreq_ether_send_t *src, int cnt) 1587 { 1588 int idx, starting_slot; 1589 1590 starting_slot = tx->req; 1591 while (cnt > 1) { 1592 cnt--; 1593 idx = (starting_slot + cnt) & tx->mask; 1594 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1595 wmb(); 1596 } 1597 } 1598 1599 /* 1600 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1601 * at most 32 bytes at a time, so as to avoid involving the software 1602 * pio handler in the nic. We re-write the first segment's flags 1603 * to mark them valid only after writing the entire chain 1604 */ 1605 static __inline void 1606 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1607 { 1608 int idx, i; 1609 uint32_t *src_ints; 1610 volatile uint32_t *dst_ints; 1611 mcp_kreq_ether_send_t *srcp; 1612 volatile mcp_kreq_ether_send_t *dstp, *dst; 1613 uint8_t last_flags; 1614 1615 idx = tx->req & tx->mask; 1616 1617 last_flags = src->flags; 1618 src->flags = 0; 1619 wmb(); 1620 dst = dstp = &tx->lanai[idx]; 1621 srcp = src; 1622 1623 if ((idx + cnt) < tx->mask) { 1624 for (i = 0; i < cnt - 1; i += 2) { 1625 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1626 wmb(); /* force write every 32 bytes */ 1627 srcp += 2; 1628 dstp += 2; 1629 } 1630 } else { 1631 /* 1632 * Submit all but the first request, and ensure 1633 * that it is submitted below 1634 */ 1635 mxge_submit_req_backwards(tx, src, cnt); 1636 i = 0; 1637 } 1638 if (i < cnt) { 1639 /* Submit the first request */ 1640 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1641 wmb(); /* barrier before setting valid flag */ 1642 } 1643 1644 /* Re-write the last 32-bits with the valid flags */ 1645 src->flags = last_flags; 1646 src_ints = (uint32_t *)src; 1647 src_ints+=3; 1648 dst_ints = (volatile uint32_t *)dst; 1649 dst_ints+=3; 1650 *dst_ints = *src_ints; 1651 tx->req += cnt; 1652 wmb(); 1653 } 1654 1655 static int 1656 mxge_pullup_tso(struct mbuf **mp) 1657 { 1658 int hoff, iphlen, thoff; 1659 struct mbuf *m; 1660 1661 m = *mp; 1662 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1663 1664 iphlen = m->m_pkthdr.csum_iphlen; 1665 thoff = m->m_pkthdr.csum_thlen; 1666 hoff = m->m_pkthdr.csum_lhlen; 1667 1668 KASSERT(iphlen > 0, ("invalid ip hlen")); 1669 KASSERT(thoff > 0, ("invalid tcp hlen")); 1670 KASSERT(hoff > 0, ("invalid ether hlen")); 1671 1672 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1673 m = m_pullup(m, hoff + iphlen + thoff); 1674 if (m == NULL) { 1675 *mp = NULL; 1676 return ENOBUFS; 1677 } 1678 *mp = m; 1679 } 1680 return 0; 1681 } 1682 1683 static int 1684 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1685 struct mbuf *m, int busdma_seg_cnt) 1686 { 1687 mcp_kreq_ether_send_t *req; 1688 bus_dma_segment_t *seg; 1689 uint32_t low, high_swapped; 1690 int len, seglen, cum_len, cum_len_next; 1691 int next_is_first, chop, cnt, rdma_count, small; 1692 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1693 uint8_t flags, flags_next; 1694 struct mxge_buffer_state *info_last; 1695 bus_dmamap_t map = info_map->map; 1696 1697 mss = m->m_pkthdr.tso_segsz; 1698 1699 /* 1700 * Negative cum_len signifies to the send loop that we are 1701 * still in the header portion of the TSO packet. 1702 */ 1703 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1704 m->m_pkthdr.csum_thlen); 1705 1706 /* 1707 * TSO implies checksum offload on this hardware 1708 */ 1709 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1710 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1711 1712 /* 1713 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1714 * out where to put the checksum by parsing the header. 1715 */ 1716 pseudo_hdr_offset = htobe16(mss); 1717 1718 req = tx->req_list; 1719 seg = tx->seg_list; 1720 cnt = 0; 1721 rdma_count = 0; 1722 1723 /* 1724 * "rdma_count" is the number of RDMAs belonging to the current 1725 * packet BEFORE the current send request. For non-TSO packets, 1726 * this is equal to "count". 1727 * 1728 * For TSO packets, rdma_count needs to be reset to 0 after a 1729 * segment cut. 1730 * 1731 * The rdma_count field of the send request is the number of 1732 * RDMAs of the packet starting at that request. For TSO send 1733 * requests with one ore more cuts in the middle, this is the 1734 * number of RDMAs starting after the last cut in the request. 1735 * All previous segments before the last cut implicitly have 1 1736 * RDMA. 1737 * 1738 * Since the number of RDMAs is not known beforehand, it must be 1739 * filled-in retroactively - after each segmentation cut or at 1740 * the end of the entire packet. 1741 */ 1742 1743 while (busdma_seg_cnt) { 1744 /* 1745 * Break the busdma segment up into pieces 1746 */ 1747 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1748 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1749 len = seg->ds_len; 1750 1751 while (len) { 1752 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1753 seglen = len; 1754 cum_len_next = cum_len + seglen; 1755 (req - rdma_count)->rdma_count = rdma_count + 1; 1756 if (__predict_true(cum_len >= 0)) { 1757 /* Payload */ 1758 chop = (cum_len_next > mss); 1759 cum_len_next = cum_len_next % mss; 1760 next_is_first = (cum_len_next == 0); 1761 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1762 flags_next |= 1763 next_is_first * MXGEFW_FLAGS_FIRST; 1764 rdma_count |= -(chop | next_is_first); 1765 rdma_count += chop & !next_is_first; 1766 } else if (cum_len_next >= 0) { 1767 /* Header ends */ 1768 rdma_count = -1; 1769 cum_len_next = 0; 1770 seglen = -cum_len; 1771 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1772 flags_next = MXGEFW_FLAGS_TSO_PLD | 1773 MXGEFW_FLAGS_FIRST | 1774 (small * MXGEFW_FLAGS_SMALL); 1775 } 1776 1777 req->addr_high = high_swapped; 1778 req->addr_low = htobe32(low); 1779 req->pseudo_hdr_offset = pseudo_hdr_offset; 1780 req->pad = 0; 1781 req->rdma_count = 1; 1782 req->length = htobe16(seglen); 1783 req->cksum_offset = cksum_offset; 1784 req->flags = 1785 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1786 low += seglen; 1787 len -= seglen; 1788 cum_len = cum_len_next; 1789 flags = flags_next; 1790 req++; 1791 cnt++; 1792 rdma_count++; 1793 if (__predict_false(cksum_offset > seglen)) 1794 cksum_offset -= seglen; 1795 else 1796 cksum_offset = 0; 1797 if (__predict_false(cnt > tx->max_desc)) 1798 goto drop; 1799 } 1800 busdma_seg_cnt--; 1801 seg++; 1802 } 1803 (req - rdma_count)->rdma_count = rdma_count; 1804 1805 do { 1806 req--; 1807 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1808 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1809 1810 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1811 1812 info_map->map = info_last->map; 1813 info_last->map = map; 1814 info_last->m = m; 1815 1816 mxge_submit_req(tx, tx->req_list, cnt); 1817 1818 if (tx->send_go != NULL && tx->queue_active == 0) { 1819 /* Tell the NIC to start polling this slice */ 1820 *tx->send_go = 1; 1821 tx->queue_active = 1; 1822 tx->activate++; 1823 wmb(); 1824 } 1825 return 0; 1826 1827 drop: 1828 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1829 m_freem(m); 1830 return ENOBUFS; 1831 } 1832 1833 static int 1834 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1835 { 1836 mcp_kreq_ether_send_t *req; 1837 bus_dma_segment_t *seg; 1838 bus_dmamap_t map; 1839 int cnt, cum_len, err, i, idx, odd_flag; 1840 uint16_t pseudo_hdr_offset; 1841 uint8_t flags, cksum_offset; 1842 struct mxge_buffer_state *info_map, *info_last; 1843 1844 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1845 err = mxge_pullup_tso(&m); 1846 if (__predict_false(err)) 1847 return err; 1848 } 1849 1850 /* 1851 * Map the frame for DMA 1852 */ 1853 idx = tx->req & tx->mask; 1854 info_map = &tx->info[idx]; 1855 map = info_map->map; 1856 1857 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1858 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1859 if (__predict_false(err != 0)) 1860 goto drop; 1861 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1862 1863 /* 1864 * TSO is different enough, we handle it in another routine 1865 */ 1866 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1867 return mxge_encap_tso(tx, info_map, m, cnt); 1868 1869 req = tx->req_list; 1870 cksum_offset = 0; 1871 pseudo_hdr_offset = 0; 1872 flags = MXGEFW_FLAGS_NO_TSO; 1873 1874 /* 1875 * Checksum offloading 1876 */ 1877 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1878 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1879 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1880 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1881 req->cksum_offset = cksum_offset; 1882 flags |= MXGEFW_FLAGS_CKSUM; 1883 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1884 } else { 1885 odd_flag = 0; 1886 } 1887 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1888 flags |= MXGEFW_FLAGS_SMALL; 1889 1890 /* 1891 * Convert segments into a request list 1892 */ 1893 cum_len = 0; 1894 seg = tx->seg_list; 1895 req->flags = MXGEFW_FLAGS_FIRST; 1896 for (i = 0; i < cnt; i++) { 1897 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1898 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1899 req->length = htobe16(seg->ds_len); 1900 req->cksum_offset = cksum_offset; 1901 if (cksum_offset > seg->ds_len) 1902 cksum_offset -= seg->ds_len; 1903 else 1904 cksum_offset = 0; 1905 req->pseudo_hdr_offset = pseudo_hdr_offset; 1906 req->pad = 0; /* complete solid 16-byte block */ 1907 req->rdma_count = 1; 1908 req->flags |= flags | ((cum_len & 1) * odd_flag); 1909 cum_len += seg->ds_len; 1910 seg++; 1911 req++; 1912 req->flags = 0; 1913 } 1914 req--; 1915 1916 /* 1917 * Pad runt to 60 bytes 1918 */ 1919 if (cum_len < 60) { 1920 req++; 1921 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1922 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1923 req->length = htobe16(60 - cum_len); 1924 req->cksum_offset = 0; 1925 req->pseudo_hdr_offset = pseudo_hdr_offset; 1926 req->pad = 0; /* complete solid 16-byte block */ 1927 req->rdma_count = 1; 1928 req->flags |= flags | ((cum_len & 1) * odd_flag); 1929 cnt++; 1930 } 1931 1932 tx->req_list[0].rdma_count = cnt; 1933 #if 0 1934 /* print what the firmware will see */ 1935 for (i = 0; i < cnt; i++) { 1936 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1937 "cso:%d, flags:0x%x, rdma:%d\n", 1938 i, (int)ntohl(tx->req_list[i].addr_high), 1939 (int)ntohl(tx->req_list[i].addr_low), 1940 (int)ntohs(tx->req_list[i].length), 1941 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1942 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1943 tx->req_list[i].rdma_count); 1944 } 1945 kprintf("--------------\n"); 1946 #endif 1947 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1948 1949 info_map->map = info_last->map; 1950 info_last->map = map; 1951 info_last->m = m; 1952 1953 mxge_submit_req(tx, tx->req_list, cnt); 1954 1955 if (tx->send_go != NULL && tx->queue_active == 0) { 1956 /* Tell the NIC to start polling this slice */ 1957 *tx->send_go = 1; 1958 tx->queue_active = 1; 1959 tx->activate++; 1960 wmb(); 1961 } 1962 return 0; 1963 1964 drop: 1965 m_freem(m); 1966 return err; 1967 } 1968 1969 static void 1970 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1971 { 1972 mxge_softc_t *sc = ifp->if_softc; 1973 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 1974 bus_addr_t zeropad; 1975 int encap = 0; 1976 1977 KKASSERT(tx->ifsq == ifsq); 1978 ASSERT_SERIALIZED(&tx->tx_serialize); 1979 1980 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1981 return; 1982 1983 zeropad = sc->zeropad_dma.dmem_busaddr; 1984 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1985 struct mbuf *m; 1986 int error; 1987 1988 m = ifsq_dequeue(ifsq); 1989 if (m == NULL) 1990 goto done; 1991 1992 BPF_MTAP(ifp, m); 1993 error = mxge_encap(tx, m, zeropad); 1994 if (!error) 1995 encap = 1; 1996 else 1997 IFNET_STAT_INC(ifp, oerrors, 1); 1998 } 1999 2000 /* Ran out of transmit slots */ 2001 ifsq_set_oactive(ifsq); 2002 done: 2003 if (encap) 2004 tx->watchdog.wd_timer = 5; 2005 } 2006 2007 static void 2008 mxge_watchdog(struct ifaltq_subque *ifsq) 2009 { 2010 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2011 struct mxge_softc *sc = ifp->if_softc; 2012 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 2013 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq); 2014 2015 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2016 2017 /* Check for pause blocking before resetting */ 2018 if (tx->watchdog_rx_pause == rx_pause) { 2019 mxge_warn_stuck(sc, tx, 0); 2020 mxge_watchdog_reset(sc); 2021 return; 2022 } else { 2023 if_printf(ifp, "Flow control blocking xmits, " 2024 "check link partner\n"); 2025 } 2026 tx->watchdog_rx_pause = rx_pause; 2027 } 2028 2029 /* 2030 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2031 * at most 32 bytes at a time, so as to avoid involving the software 2032 * pio handler in the nic. We re-write the first segment's low 2033 * DMA address to mark it valid only after we write the entire chunk 2034 * in a burst 2035 */ 2036 static __inline void 2037 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2038 mcp_kreq_ether_recv_t *src) 2039 { 2040 uint32_t low; 2041 2042 low = src->addr_low; 2043 src->addr_low = 0xffffffff; 2044 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2045 wmb(); 2046 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2047 wmb(); 2048 src->addr_low = low; 2049 dst->addr_low = low; 2050 wmb(); 2051 } 2052 2053 static int 2054 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2055 boolean_t init) 2056 { 2057 bus_dma_segment_t seg; 2058 struct mbuf *m; 2059 int cnt, err, mflag; 2060 2061 mflag = M_NOWAIT; 2062 if (__predict_false(init)) 2063 mflag = M_WAITOK; 2064 2065 m = m_gethdr(mflag, MT_DATA); 2066 if (m == NULL) { 2067 err = ENOBUFS; 2068 if (__predict_false(init)) { 2069 /* 2070 * During initialization, there 2071 * is nothing to setup; bail out 2072 */ 2073 return err; 2074 } 2075 goto done; 2076 } 2077 m->m_len = m->m_pkthdr.len = MHLEN; 2078 2079 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2080 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2081 if (err != 0) { 2082 m_freem(m); 2083 if (__predict_false(init)) { 2084 /* 2085 * During initialization, there 2086 * is nothing to setup; bail out 2087 */ 2088 return err; 2089 } 2090 goto done; 2091 } 2092 2093 rx->info[idx].m = m; 2094 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2095 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2096 2097 done: 2098 if ((idx & 7) == 7) 2099 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2100 return err; 2101 } 2102 2103 static int 2104 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2105 boolean_t init) 2106 { 2107 bus_dma_segment_t seg; 2108 struct mbuf *m; 2109 int cnt, err, mflag; 2110 2111 mflag = M_NOWAIT; 2112 if (__predict_false(init)) 2113 mflag = M_WAITOK; 2114 2115 if (rx->cl_size == MCLBYTES) 2116 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2117 else 2118 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2119 if (m == NULL) { 2120 err = ENOBUFS; 2121 if (__predict_false(init)) { 2122 /* 2123 * During initialization, there 2124 * is nothing to setup; bail out 2125 */ 2126 return err; 2127 } 2128 goto done; 2129 } 2130 m->m_len = m->m_pkthdr.len = rx->cl_size; 2131 2132 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2133 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2134 if (err != 0) { 2135 m_freem(m); 2136 if (__predict_false(init)) { 2137 /* 2138 * During initialization, there 2139 * is nothing to setup; bail out 2140 */ 2141 return err; 2142 } 2143 goto done; 2144 } 2145 2146 rx->info[idx].m = m; 2147 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2148 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2149 2150 done: 2151 if ((idx & 7) == 7) 2152 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2153 return err; 2154 } 2155 2156 /* 2157 * Myri10GE hardware checksums are not valid if the sender 2158 * padded the frame with non-zero padding. This is because 2159 * the firmware just does a simple 16-bit 1s complement 2160 * checksum across the entire frame, excluding the first 14 2161 * bytes. It is best to simply to check the checksum and 2162 * tell the stack about it only if the checksum is good 2163 */ 2164 static __inline uint16_t 2165 mxge_rx_csum(struct mbuf *m, int csum) 2166 { 2167 const struct ether_header *eh; 2168 const struct ip *ip; 2169 uint16_t c; 2170 2171 eh = mtod(m, const struct ether_header *); 2172 2173 /* Only deal with IPv4 TCP & UDP for now */ 2174 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2175 return 1; 2176 2177 ip = (const struct ip *)(eh + 1); 2178 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2179 return 1; 2180 2181 #ifdef INET 2182 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2183 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2184 - (ip->ip_hl << 2) + ip->ip_p)); 2185 #else 2186 c = 1; 2187 #endif 2188 c ^= 0xffff; 2189 return c; 2190 } 2191 2192 static void 2193 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2194 { 2195 struct ether_vlan_header *evl; 2196 uint32_t partial; 2197 2198 evl = mtod(m, struct ether_vlan_header *); 2199 2200 /* 2201 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2202 * what the firmware thought was the end of the ethernet 2203 * header. 2204 */ 2205 2206 /* Put checksum into host byte order */ 2207 *csum = ntohs(*csum); 2208 2209 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2210 *csum += ~partial; 2211 *csum += ((*csum) < ~partial); 2212 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2213 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2214 2215 /* 2216 * Restore checksum to network byte order; 2217 * later consumers expect this 2218 */ 2219 *csum = htons(*csum); 2220 2221 /* save the tag */ 2222 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2223 m->m_flags |= M_VLANTAG; 2224 2225 /* 2226 * Remove the 802.1q header by copying the Ethernet 2227 * addresses over it and adjusting the beginning of 2228 * the data in the mbuf. The encapsulated Ethernet 2229 * type field is already in place. 2230 */ 2231 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2232 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2233 m_adj(m, EVL_ENCAPLEN); 2234 } 2235 2236 2237 static __inline void 2238 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2239 uint32_t len, uint32_t csum) 2240 { 2241 struct mbuf *m; 2242 const struct ether_header *eh; 2243 bus_dmamap_t old_map; 2244 int idx; 2245 2246 idx = rx->cnt & rx->mask; 2247 rx->cnt++; 2248 2249 /* Save a pointer to the received mbuf */ 2250 m = rx->info[idx].m; 2251 2252 /* Try to replace the received mbuf */ 2253 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2254 /* Drop the frame -- the old mbuf is re-cycled */ 2255 IFNET_STAT_INC(ifp, ierrors, 1); 2256 return; 2257 } 2258 2259 /* Unmap the received buffer */ 2260 old_map = rx->info[idx].map; 2261 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2262 bus_dmamap_unload(rx->dmat, old_map); 2263 2264 /* Swap the bus_dmamap_t's */ 2265 rx->info[idx].map = rx->extra_map; 2266 rx->extra_map = old_map; 2267 2268 /* 2269 * mcp implicitly skips 1st 2 bytes so that packet is properly 2270 * aligned 2271 */ 2272 m->m_data += MXGEFW_PAD; 2273 2274 m->m_pkthdr.rcvif = ifp; 2275 m->m_len = m->m_pkthdr.len = len; 2276 2277 IFNET_STAT_INC(ifp, ipackets, 1); 2278 2279 eh = mtod(m, const struct ether_header *); 2280 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2281 mxge_vlan_tag_remove(m, &csum); 2282 2283 /* If the checksum is valid, mark it in the mbuf header */ 2284 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2285 mxge_rx_csum(m, csum) == 0) { 2286 /* Tell the stack that the checksum is good */ 2287 m->m_pkthdr.csum_data = 0xffff; 2288 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2289 CSUM_DATA_VALID; 2290 } 2291 ifp->if_input(ifp, m, NULL, -1); 2292 } 2293 2294 static __inline void 2295 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2296 uint32_t len, uint32_t csum) 2297 { 2298 const struct ether_header *eh; 2299 struct mbuf *m; 2300 bus_dmamap_t old_map; 2301 int idx; 2302 2303 idx = rx->cnt & rx->mask; 2304 rx->cnt++; 2305 2306 /* Save a pointer to the received mbuf */ 2307 m = rx->info[idx].m; 2308 2309 /* Try to replace the received mbuf */ 2310 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2311 /* Drop the frame -- the old mbuf is re-cycled */ 2312 IFNET_STAT_INC(ifp, ierrors, 1); 2313 return; 2314 } 2315 2316 /* Unmap the received buffer */ 2317 old_map = rx->info[idx].map; 2318 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2319 bus_dmamap_unload(rx->dmat, old_map); 2320 2321 /* Swap the bus_dmamap_t's */ 2322 rx->info[idx].map = rx->extra_map; 2323 rx->extra_map = old_map; 2324 2325 /* 2326 * mcp implicitly skips 1st 2 bytes so that packet is properly 2327 * aligned 2328 */ 2329 m->m_data += MXGEFW_PAD; 2330 2331 m->m_pkthdr.rcvif = ifp; 2332 m->m_len = m->m_pkthdr.len = len; 2333 2334 IFNET_STAT_INC(ifp, ipackets, 1); 2335 2336 eh = mtod(m, const struct ether_header *); 2337 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2338 mxge_vlan_tag_remove(m, &csum); 2339 2340 /* If the checksum is valid, mark it in the mbuf header */ 2341 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2342 mxge_rx_csum(m, csum) == 0) { 2343 /* Tell the stack that the checksum is good */ 2344 m->m_pkthdr.csum_data = 0xffff; 2345 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2346 CSUM_DATA_VALID; 2347 } 2348 ifp->if_input(ifp, m, NULL, -1); 2349 } 2350 2351 static __inline void 2352 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle) 2353 { 2354 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2355 2356 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) { 2357 uint16_t length, checksum; 2358 2359 length = ntohs(rx_done->entry[rx_done->idx].length); 2360 rx_done->entry[rx_done->idx].length = 0; 2361 2362 checksum = rx_done->entry[rx_done->idx].checksum; 2363 2364 if (length <= MXGE_RX_SMALL_BUFLEN) { 2365 mxge_rx_done_small(ifp, &rx_data->rx_small, 2366 length, checksum); 2367 } else { 2368 mxge_rx_done_big(ifp, &rx_data->rx_big, 2369 length, checksum); 2370 } 2371 2372 rx_done->idx++; 2373 rx_done->idx &= rx_done->mask; 2374 --cycle; 2375 } 2376 } 2377 2378 static __inline void 2379 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2380 { 2381 ASSERT_SERIALIZED(&tx->tx_serialize); 2382 2383 while (tx->pkt_done != mcp_idx) { 2384 struct mbuf *m; 2385 int idx; 2386 2387 idx = tx->done & tx->mask; 2388 tx->done++; 2389 2390 m = tx->info[idx].m; 2391 /* 2392 * mbuf and DMA map only attached to the first 2393 * segment per-mbuf. 2394 */ 2395 if (m != NULL) { 2396 tx->pkt_done++; 2397 IFNET_STAT_INC(ifp, opackets, 1); 2398 tx->info[idx].m = NULL; 2399 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2400 m_freem(m); 2401 } 2402 } 2403 2404 /* 2405 * If we have space, clear OACTIVE to tell the stack that 2406 * its OK to send packets 2407 */ 2408 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2409 ifsq_clr_oactive(tx->ifsq); 2410 if (tx->req == tx->done) { 2411 /* Reset watchdog */ 2412 tx->watchdog.wd_timer = 0; 2413 } 2414 } 2415 2416 if (!ifsq_is_empty(tx->ifsq)) 2417 ifsq_devstart(tx->ifsq); 2418 2419 if (tx->send_stop != NULL && tx->req == tx->done) { 2420 /* 2421 * Let the NIC stop polling this queue, since there 2422 * are no more transmits pending 2423 */ 2424 *tx->send_stop = 1; 2425 tx->queue_active = 0; 2426 tx->deactivate++; 2427 wmb(); 2428 } 2429 } 2430 2431 static struct mxge_media_type mxge_xfp_media_types[] = { 2432 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2433 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2434 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2435 {0, (1 << 5), "10GBASE-ER"}, 2436 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2437 {0, (1 << 3), "10GBASE-SW"}, 2438 {0, (1 << 2), "10GBASE-LW"}, 2439 {0, (1 << 1), "10GBASE-EW"}, 2440 {0, (1 << 0), "Reserved"} 2441 }; 2442 2443 static struct mxge_media_type mxge_sfp_media_types[] = { 2444 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2445 {0, (1 << 7), "Reserved"}, 2446 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2447 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2448 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2449 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2450 }; 2451 2452 static void 2453 mxge_media_set(mxge_softc_t *sc, int media_type) 2454 { 2455 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); 2456 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2457 sc->current_media = media_type; 2458 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2459 } 2460 2461 static void 2462 mxge_media_init(mxge_softc_t *sc) 2463 { 2464 const char *ptr; 2465 int i; 2466 2467 ifmedia_removeall(&sc->media); 2468 mxge_media_set(sc, IFM_AUTO); 2469 2470 /* 2471 * Parse the product code to deterimine the interface type 2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2473 * after the 3rd dash in the driver's cached copy of the 2474 * EEPROM's product code string. 2475 */ 2476 ptr = sc->product_code_string; 2477 if (ptr == NULL) { 2478 if_printf(sc->ifp, "Missing product code\n"); 2479 return; 2480 } 2481 2482 for (i = 0; i < 3; i++, ptr++) { 2483 ptr = strchr(ptr, '-'); 2484 if (ptr == NULL) { 2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2486 return; 2487 } 2488 } 2489 if (*ptr == 'C' || *(ptr +1) == 'C') { 2490 /* -C is CX4 */ 2491 sc->connector = MXGE_CX4; 2492 mxge_media_set(sc, IFM_10G_CX4); 2493 } else if (*ptr == 'Q') { 2494 /* -Q is Quad Ribbon Fiber */ 2495 sc->connector = MXGE_QRF; 2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2497 /* DragonFly has no media type for Quad ribbon fiber */ 2498 } else if (*ptr == 'R') { 2499 /* -R is XFP */ 2500 sc->connector = MXGE_XFP; 2501 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2502 /* -S or -2S is SFP+ */ 2503 sc->connector = MXGE_SFP; 2504 } else { 2505 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2506 } 2507 } 2508 2509 /* 2510 * Determine the media type for a NIC. Some XFPs will identify 2511 * themselves only when their link is up, so this is initiated via a 2512 * link up interrupt. However, this can potentially take up to 2513 * several milliseconds, so it is run via the watchdog routine, rather 2514 * than in the interrupt handler itself. 2515 */ 2516 static void 2517 mxge_media_probe(mxge_softc_t *sc) 2518 { 2519 mxge_cmd_t cmd; 2520 const char *cage_type; 2521 struct mxge_media_type *mxge_media_types = NULL; 2522 int i, err, ms, mxge_media_type_entries; 2523 uint32_t byte; 2524 2525 sc->need_media_probe = 0; 2526 2527 if (sc->connector == MXGE_XFP) { 2528 /* -R is XFP */ 2529 mxge_media_types = mxge_xfp_media_types; 2530 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2531 byte = MXGE_XFP_COMPLIANCE_BYTE; 2532 cage_type = "XFP"; 2533 } else if (sc->connector == MXGE_SFP) { 2534 /* -S or -2S is SFP+ */ 2535 mxge_media_types = mxge_sfp_media_types; 2536 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2537 cage_type = "SFP+"; 2538 byte = 3; 2539 } else { 2540 /* nothing to do; media type cannot change */ 2541 return; 2542 } 2543 2544 /* 2545 * At this point we know the NIC has an XFP cage, so now we 2546 * try to determine what is in the cage by using the 2547 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2548 * register. We read just one byte, which may take over 2549 * a millisecond 2550 */ 2551 2552 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2553 cmd.data1 = byte; 2554 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2555 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2556 if_printf(sc->ifp, "failed to read XFP\n"); 2557 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2558 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2559 if (err != MXGEFW_CMD_OK) 2560 return; 2561 2562 /* Now we wait for the data to be cached */ 2563 cmd.data0 = byte; 2564 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2565 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2566 DELAY(1000); 2567 cmd.data0 = byte; 2568 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2569 } 2570 if (err != MXGEFW_CMD_OK) { 2571 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2572 cage_type, err, ms); 2573 return; 2574 } 2575 2576 if (cmd.data0 == mxge_media_types[0].bitmask) { 2577 if (bootverbose) { 2578 if_printf(sc->ifp, "%s:%s\n", cage_type, 2579 mxge_media_types[0].name); 2580 } 2581 if (sc->current_media != mxge_media_types[0].flag) { 2582 mxge_media_init(sc); 2583 mxge_media_set(sc, mxge_media_types[0].flag); 2584 } 2585 return; 2586 } 2587 for (i = 1; i < mxge_media_type_entries; i++) { 2588 if (cmd.data0 & mxge_media_types[i].bitmask) { 2589 if (bootverbose) { 2590 if_printf(sc->ifp, "%s:%s\n", cage_type, 2591 mxge_media_types[i].name); 2592 } 2593 2594 if (sc->current_media != mxge_media_types[i].flag) { 2595 mxge_media_init(sc); 2596 mxge_media_set(sc, mxge_media_types[i].flag); 2597 } 2598 return; 2599 } 2600 } 2601 if (bootverbose) { 2602 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2603 cmd.data0); 2604 } 2605 } 2606 2607 static void 2608 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2609 { 2610 if (sc->link_state != stats->link_up) { 2611 sc->link_state = stats->link_up; 2612 if (sc->link_state) { 2613 sc->ifp->if_link_state = LINK_STATE_UP; 2614 if_link_state_change(sc->ifp); 2615 if (bootverbose) 2616 if_printf(sc->ifp, "link up\n"); 2617 } else { 2618 sc->ifp->if_link_state = LINK_STATE_DOWN; 2619 if_link_state_change(sc->ifp); 2620 if (bootverbose) 2621 if_printf(sc->ifp, "link down\n"); 2622 } 2623 sc->need_media_probe = 1; 2624 } 2625 2626 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2627 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2628 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2629 sc->rdma_tags_available); 2630 } 2631 2632 if (stats->link_down) { 2633 sc->down_cnt += stats->link_down; 2634 sc->link_state = 0; 2635 sc->ifp->if_link_state = LINK_STATE_DOWN; 2636 if_link_state_change(sc->ifp); 2637 } 2638 } 2639 2640 static void 2641 mxge_serialize_skipmain(struct mxge_softc *sc) 2642 { 2643 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2644 } 2645 2646 static void 2647 mxge_deserialize_skipmain(struct mxge_softc *sc) 2648 { 2649 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2650 } 2651 2652 static void 2653 mxge_legacy(void *arg) 2654 { 2655 struct mxge_slice_state *ss = arg; 2656 mxge_softc_t *sc = ss->sc; 2657 mcp_irq_data_t *stats = ss->fw_stats; 2658 mxge_tx_ring_t *tx = &ss->tx; 2659 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2660 uint32_t send_done_count; 2661 uint8_t valid; 2662 2663 ASSERT_SERIALIZED(&sc->main_serialize); 2664 2665 /* Make sure the DMA has finished */ 2666 if (!stats->valid) 2667 return; 2668 valid = stats->valid; 2669 2670 /* Lower legacy IRQ */ 2671 *sc->irq_deassert = 0; 2672 if (!mxge_deassert_wait) { 2673 /* Don't wait for conf. that irq is low */ 2674 stats->valid = 0; 2675 } 2676 2677 mxge_serialize_skipmain(sc); 2678 2679 /* 2680 * Loop while waiting for legacy irq deassertion 2681 * XXX do we really want to loop? 2682 */ 2683 do { 2684 /* Check for transmit completes and receives */ 2685 send_done_count = be32toh(stats->send_done_count); 2686 while ((send_done_count != tx->pkt_done) || 2687 (rx_done->entry[rx_done->idx].length != 0)) { 2688 if (send_done_count != tx->pkt_done) { 2689 mxge_tx_done(&sc->arpcom.ac_if, tx, 2690 (int)send_done_count); 2691 } 2692 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2693 send_done_count = be32toh(stats->send_done_count); 2694 } 2695 if (mxge_deassert_wait) 2696 wmb(); 2697 } while (*((volatile uint8_t *)&stats->valid)); 2698 2699 mxge_deserialize_skipmain(sc); 2700 2701 /* Fw link & error stats meaningful only on the first slice */ 2702 if (__predict_false(stats->stats_updated)) 2703 mxge_intr_status(sc, stats); 2704 2705 /* Check to see if we have rx token to pass back */ 2706 if (valid & 0x1) 2707 *ss->irq_claim = be32toh(3); 2708 *(ss->irq_claim + 1) = be32toh(3); 2709 } 2710 2711 static void 2712 mxge_msi(void *arg) 2713 { 2714 struct mxge_slice_state *ss = arg; 2715 mxge_softc_t *sc = ss->sc; 2716 mcp_irq_data_t *stats = ss->fw_stats; 2717 mxge_tx_ring_t *tx = &ss->tx; 2718 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2719 uint32_t send_done_count; 2720 uint8_t valid; 2721 #ifndef IFPOLL_ENABLE 2722 const boolean_t polling = FALSE; 2723 #else 2724 boolean_t polling = FALSE; 2725 #endif 2726 2727 ASSERT_SERIALIZED(&sc->main_serialize); 2728 2729 /* Make sure the DMA has finished */ 2730 if (__predict_false(!stats->valid)) 2731 return; 2732 2733 valid = stats->valid; 2734 stats->valid = 0; 2735 2736 #ifdef IFPOLL_ENABLE 2737 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2738 polling = TRUE; 2739 #endif 2740 2741 if (!polling) { 2742 /* Check for receives */ 2743 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2744 if (rx_done->entry[rx_done->idx].length != 0) 2745 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2746 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2747 } 2748 2749 /* 2750 * Check for transmit completes 2751 * 2752 * NOTE: 2753 * Since pkt_done is only changed by mxge_tx_done(), 2754 * which is called only in interrupt handler, the 2755 * check w/o holding tx serializer is MPSAFE. 2756 */ 2757 send_done_count = be32toh(stats->send_done_count); 2758 if (send_done_count != tx->pkt_done) { 2759 lwkt_serialize_enter(&tx->tx_serialize); 2760 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2761 lwkt_serialize_exit(&tx->tx_serialize); 2762 } 2763 2764 if (__predict_false(stats->stats_updated)) 2765 mxge_intr_status(sc, stats); 2766 2767 /* Check to see if we have rx token to pass back */ 2768 if (!polling && (valid & 0x1)) 2769 *ss->irq_claim = be32toh(3); 2770 *(ss->irq_claim + 1) = be32toh(3); 2771 } 2772 2773 static void 2774 mxge_msix_rx(void *arg) 2775 { 2776 struct mxge_slice_state *ss = arg; 2777 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2778 2779 #ifdef IFPOLL_ENABLE 2780 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2781 return; 2782 #endif 2783 2784 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2785 2786 if (rx_done->entry[rx_done->idx].length != 0) 2787 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1); 2788 2789 *ss->irq_claim = be32toh(3); 2790 } 2791 2792 static void 2793 mxge_msix_rxtx(void *arg) 2794 { 2795 struct mxge_slice_state *ss = arg; 2796 mxge_softc_t *sc = ss->sc; 2797 mcp_irq_data_t *stats = ss->fw_stats; 2798 mxge_tx_ring_t *tx = &ss->tx; 2799 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2800 uint32_t send_done_count; 2801 uint8_t valid; 2802 #ifndef IFPOLL_ENABLE 2803 const boolean_t polling = FALSE; 2804 #else 2805 boolean_t polling = FALSE; 2806 #endif 2807 2808 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 2809 2810 /* Make sure the DMA has finished */ 2811 if (__predict_false(!stats->valid)) 2812 return; 2813 2814 valid = stats->valid; 2815 stats->valid = 0; 2816 2817 #ifdef IFPOLL_ENABLE 2818 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING) 2819 polling = TRUE; 2820 #endif 2821 2822 /* Check for receives */ 2823 if (!polling && rx_done->entry[rx_done->idx].length != 0) 2824 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1); 2825 2826 /* 2827 * Check for transmit completes 2828 * 2829 * NOTE: 2830 * Since pkt_done is only changed by mxge_tx_done(), 2831 * which is called only in interrupt handler, the 2832 * check w/o holding tx serializer is MPSAFE. 2833 */ 2834 send_done_count = be32toh(stats->send_done_count); 2835 if (send_done_count != tx->pkt_done) { 2836 lwkt_serialize_enter(&tx->tx_serialize); 2837 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2838 lwkt_serialize_exit(&tx->tx_serialize); 2839 } 2840 2841 /* Check to see if we have rx token to pass back */ 2842 if (!polling && (valid & 0x1)) 2843 *ss->irq_claim = be32toh(3); 2844 *(ss->irq_claim + 1) = be32toh(3); 2845 } 2846 2847 static void 2848 mxge_init(void *arg) 2849 { 2850 struct mxge_softc *sc = arg; 2851 2852 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2853 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2854 mxge_open(sc); 2855 } 2856 2857 static void 2858 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2859 { 2860 int i; 2861 2862 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2863 if (ss->rx_data.rx_big.info[i].m == NULL) 2864 continue; 2865 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2866 ss->rx_data.rx_big.info[i].map); 2867 m_freem(ss->rx_data.rx_big.info[i].m); 2868 ss->rx_data.rx_big.info[i].m = NULL; 2869 } 2870 2871 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2872 if (ss->rx_data.rx_small.info[i].m == NULL) 2873 continue; 2874 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2875 ss->rx_data.rx_small.info[i].map); 2876 m_freem(ss->rx_data.rx_small.info[i].m); 2877 ss->rx_data.rx_small.info[i].m = NULL; 2878 } 2879 2880 /* Transmit ring used only on the first slice */ 2881 if (ss->tx.info == NULL) 2882 return; 2883 2884 for (i = 0; i <= ss->tx.mask; i++) { 2885 if (ss->tx.info[i].m == NULL) 2886 continue; 2887 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2888 m_freem(ss->tx.info[i].m); 2889 ss->tx.info[i].m = NULL; 2890 } 2891 } 2892 2893 static void 2894 mxge_free_mbufs(mxge_softc_t *sc) 2895 { 2896 int slice; 2897 2898 for (slice = 0; slice < sc->num_slices; slice++) 2899 mxge_free_slice_mbufs(&sc->ss[slice]); 2900 } 2901 2902 static void 2903 mxge_free_slice_rings(struct mxge_slice_state *ss) 2904 { 2905 int i; 2906 2907 if (ss->rx_data.rx_done.entry != NULL) { 2908 mxge_dma_free(&ss->rx_done_dma); 2909 ss->rx_data.rx_done.entry = NULL; 2910 } 2911 2912 if (ss->tx.req_list != NULL) { 2913 kfree(ss->tx.req_list, M_DEVBUF); 2914 ss->tx.req_list = NULL; 2915 } 2916 2917 if (ss->tx.seg_list != NULL) { 2918 kfree(ss->tx.seg_list, M_DEVBUF); 2919 ss->tx.seg_list = NULL; 2920 } 2921 2922 if (ss->rx_data.rx_small.shadow != NULL) { 2923 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2924 ss->rx_data.rx_small.shadow = NULL; 2925 } 2926 2927 if (ss->rx_data.rx_big.shadow != NULL) { 2928 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2929 ss->rx_data.rx_big.shadow = NULL; 2930 } 2931 2932 if (ss->tx.info != NULL) { 2933 if (ss->tx.dmat != NULL) { 2934 for (i = 0; i <= ss->tx.mask; i++) { 2935 bus_dmamap_destroy(ss->tx.dmat, 2936 ss->tx.info[i].map); 2937 } 2938 bus_dma_tag_destroy(ss->tx.dmat); 2939 } 2940 kfree(ss->tx.info, M_DEVBUF); 2941 ss->tx.info = NULL; 2942 } 2943 2944 if (ss->rx_data.rx_small.info != NULL) { 2945 if (ss->rx_data.rx_small.dmat != NULL) { 2946 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2947 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2948 ss->rx_data.rx_small.info[i].map); 2949 } 2950 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2951 ss->rx_data.rx_small.extra_map); 2952 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2953 } 2954 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2955 ss->rx_data.rx_small.info = NULL; 2956 } 2957 2958 if (ss->rx_data.rx_big.info != NULL) { 2959 if (ss->rx_data.rx_big.dmat != NULL) { 2960 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2961 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2962 ss->rx_data.rx_big.info[i].map); 2963 } 2964 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2965 ss->rx_data.rx_big.extra_map); 2966 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2967 } 2968 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2969 ss->rx_data.rx_big.info = NULL; 2970 } 2971 } 2972 2973 static void 2974 mxge_free_rings(mxge_softc_t *sc) 2975 { 2976 int slice; 2977 2978 if (sc->ss == NULL) 2979 return; 2980 2981 for (slice = 0; slice < sc->num_slices; slice++) 2982 mxge_free_slice_rings(&sc->ss[slice]); 2983 } 2984 2985 static int 2986 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2987 int tx_ring_entries) 2988 { 2989 mxge_softc_t *sc = ss->sc; 2990 size_t bytes; 2991 int err, i; 2992 2993 /* 2994 * Allocate per-slice receive resources 2995 */ 2996 2997 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 2998 rx_ring_entries - 1; 2999 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 3000 3001 /* Allocate the rx shadow rings */ 3002 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 3003 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3004 3005 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 3006 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3007 3008 /* Allocate the rx host info rings */ 3009 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 3010 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3011 3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 3013 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3014 3015 /* Allocate the rx busdma resources */ 3016 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3017 1, /* alignment */ 3018 4096, /* boundary */ 3019 BUS_SPACE_MAXADDR, /* low */ 3020 BUS_SPACE_MAXADDR, /* high */ 3021 NULL, NULL, /* filter */ 3022 MHLEN, /* maxsize */ 3023 1, /* num segs */ 3024 MHLEN, /* maxsegsize */ 3025 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3026 /* flags */ 3027 &ss->rx_data.rx_small.dmat); /* tag */ 3028 if (err != 0) { 3029 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3030 err); 3031 return err; 3032 } 3033 3034 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 3035 &ss->rx_data.rx_small.extra_map); 3036 if (err != 0) { 3037 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 3038 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3039 ss->rx_data.rx_small.dmat = NULL; 3040 return err; 3041 } 3042 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3043 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 3044 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 3045 if (err != 0) { 3046 int j; 3047 3048 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 3049 3050 for (j = 0; j < i; ++j) { 3051 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3052 ss->rx_data.rx_small.info[j].map); 3053 } 3054 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 3055 ss->rx_data.rx_small.extra_map); 3056 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 3057 ss->rx_data.rx_small.dmat = NULL; 3058 return err; 3059 } 3060 } 3061 3062 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3063 1, /* alignment */ 3064 4096, /* boundary */ 3065 BUS_SPACE_MAXADDR, /* low */ 3066 BUS_SPACE_MAXADDR, /* high */ 3067 NULL, NULL, /* filter */ 3068 4096, /* maxsize */ 3069 1, /* num segs */ 3070 4096, /* maxsegsize*/ 3071 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 3072 /* flags */ 3073 &ss->rx_data.rx_big.dmat); /* tag */ 3074 if (err != 0) { 3075 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3076 err); 3077 return err; 3078 } 3079 3080 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3081 &ss->rx_data.rx_big.extra_map); 3082 if (err != 0) { 3083 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 3084 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3085 ss->rx_data.rx_big.dmat = NULL; 3086 return err; 3087 } 3088 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3089 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 3090 &ss->rx_data.rx_big.info[i].map); 3091 if (err != 0) { 3092 int j; 3093 3094 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 3095 for (j = 0; j < i; ++j) { 3096 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3097 ss->rx_data.rx_big.info[j].map); 3098 } 3099 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 3100 ss->rx_data.rx_big.extra_map); 3101 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 3102 ss->rx_data.rx_big.dmat = NULL; 3103 return err; 3104 } 3105 } 3106 3107 /* 3108 * Now allocate TX resources 3109 */ 3110 3111 ss->tx.mask = tx_ring_entries - 1; 3112 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3113 3114 /* 3115 * Allocate the tx request copy block; MUST be at least 8 bytes 3116 * aligned 3117 */ 3118 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3119 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3120 M_DEVBUF, M_WAITOK); 3121 3122 /* Allocate the tx busdma segment list */ 3123 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3124 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3125 3126 /* Allocate the tx host info ring */ 3127 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3128 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3129 3130 /* Allocate the tx busdma resources */ 3131 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3132 1, /* alignment */ 3133 sc->tx_boundary, /* boundary */ 3134 BUS_SPACE_MAXADDR, /* low */ 3135 BUS_SPACE_MAXADDR, /* high */ 3136 NULL, NULL, /* filter */ 3137 IP_MAXPACKET + 3138 sizeof(struct ether_vlan_header), 3139 /* maxsize */ 3140 ss->tx.max_desc - 2, /* num segs */ 3141 sc->tx_boundary, /* maxsegsz */ 3142 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3143 BUS_DMA_ONEBPAGE, /* flags */ 3144 &ss->tx.dmat); /* tag */ 3145 if (err != 0) { 3146 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3147 return err; 3148 } 3149 3150 /* 3151 * Now use these tags to setup DMA maps for each slot in the ring 3152 */ 3153 for (i = 0; i <= ss->tx.mask; i++) { 3154 err = bus_dmamap_create(ss->tx.dmat, 3155 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3156 if (err != 0) { 3157 int j; 3158 3159 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3160 for (j = 0; j < i; ++j) { 3161 bus_dmamap_destroy(ss->tx.dmat, 3162 ss->tx.info[j].map); 3163 } 3164 bus_dma_tag_destroy(ss->tx.dmat); 3165 ss->tx.dmat = NULL; 3166 return err; 3167 } 3168 } 3169 return 0; 3170 } 3171 3172 static int 3173 mxge_alloc_rings(mxge_softc_t *sc) 3174 { 3175 mxge_cmd_t cmd; 3176 int tx_ring_size; 3177 int tx_ring_entries, rx_ring_entries; 3178 int err, slice; 3179 3180 /* Get ring sizes */ 3181 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3182 if (err != 0) { 3183 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3184 return err; 3185 } 3186 tx_ring_size = cmd.data0; 3187 3188 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3189 rx_ring_entries = sc->rx_intr_slots / 2; 3190 3191 if (bootverbose) { 3192 device_printf(sc->dev, "tx desc %d, rx desc %d\n", 3193 tx_ring_entries, rx_ring_entries); 3194 } 3195 3196 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices; 3197 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters; 3198 3199 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3200 ifq_set_ready(&sc->ifp->if_snd); 3201 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings); 3202 3203 if (sc->num_tx_rings > 1) { 3204 sc->ifp->if_mapsubq = ifq_mapsubq_mask; 3205 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1); 3206 } 3207 3208 for (slice = 0; slice < sc->num_slices; slice++) { 3209 err = mxge_alloc_slice_rings(&sc->ss[slice], 3210 rx_ring_entries, tx_ring_entries); 3211 if (err != 0) { 3212 device_printf(sc->dev, 3213 "alloc %d slice rings failed\n", slice); 3214 return err; 3215 } 3216 } 3217 return 0; 3218 } 3219 3220 static void 3221 mxge_choose_params(int mtu, int *cl_size) 3222 { 3223 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3224 3225 if (bufsize < MCLBYTES) { 3226 *cl_size = MCLBYTES; 3227 } else { 3228 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3229 *cl_size = MJUMPAGESIZE; 3230 } 3231 } 3232 3233 static int 3234 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3235 { 3236 mxge_cmd_t cmd; 3237 int err, i, slice; 3238 3239 slice = ss - ss->sc->ss; 3240 3241 /* 3242 * Get the lanai pointers to the send and receive rings 3243 */ 3244 err = 0; 3245 3246 if (ss->sc->num_tx_rings == 1) { 3247 if (slice == 0) { 3248 cmd.data0 = slice; 3249 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, 3250 &cmd); 3251 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3252 (ss->sc->sram + cmd.data0); 3253 /* Leave send_go and send_stop as NULL */ 3254 } 3255 } else { 3256 cmd.data0 = slice; 3257 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3258 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3259 (ss->sc->sram + cmd.data0); 3260 ss->tx.send_go = (volatile uint32_t *) 3261 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3262 ss->tx.send_stop = (volatile uint32_t *) 3263 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3264 } 3265 3266 cmd.data0 = slice; 3267 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3268 ss->rx_data.rx_small.lanai = 3269 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3270 3271 cmd.data0 = slice; 3272 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3273 ss->rx_data.rx_big.lanai = 3274 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3275 3276 if (err != 0) { 3277 if_printf(ss->sc->ifp, 3278 "failed to get ring sizes or locations\n"); 3279 return EIO; 3280 } 3281 3282 /* 3283 * Stock small receive ring 3284 */ 3285 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3286 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3287 ss->rx_data.rx_small.info[i].map, i, TRUE); 3288 if (err) { 3289 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3290 ss->rx_data.rx_small.mask + 1); 3291 return ENOMEM; 3292 } 3293 } 3294 3295 /* 3296 * Stock big receive ring 3297 */ 3298 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3299 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3300 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3301 } 3302 3303 ss->rx_data.rx_big.cl_size = cl_size; 3304 3305 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3306 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3307 ss->rx_data.rx_big.info[i].map, i, TRUE); 3308 if (err) { 3309 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3310 ss->rx_data.rx_big.mask + 1); 3311 return ENOMEM; 3312 } 3313 } 3314 return 0; 3315 } 3316 3317 static int 3318 mxge_open(mxge_softc_t *sc) 3319 { 3320 struct ifnet *ifp = sc->ifp; 3321 mxge_cmd_t cmd; 3322 int err, slice, cl_size, i; 3323 bus_addr_t bus; 3324 volatile uint8_t *itable; 3325 struct mxge_slice_state *ss; 3326 3327 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3328 3329 /* Copy the MAC address in case it was overridden */ 3330 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3331 3332 err = mxge_reset(sc, 1); 3333 if (err != 0) { 3334 if_printf(ifp, "failed to reset\n"); 3335 return EIO; 3336 } 3337 3338 if (sc->num_slices > 1) { 3339 /* Setup the indirection table */ 3340 cmd.data0 = sc->num_slices; 3341 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3342 3343 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3344 if (err != 0) { 3345 if_printf(ifp, "failed to setup rss tables\n"); 3346 return err; 3347 } 3348 3349 /* Just enable an identity mapping */ 3350 itable = sc->sram + cmd.data0; 3351 for (i = 0; i < sc->num_slices; i++) 3352 itable[i] = (uint8_t)i; 3353 3354 if (sc->use_rss) { 3355 volatile uint8_t *hwkey; 3356 uint8_t swkey[MXGE_HWRSS_KEYLEN]; 3357 3358 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 3359 &cmd); 3360 if (err != 0) { 3361 if_printf(ifp, "failed to get rsskey\n"); 3362 return err; 3363 } 3364 hwkey = sc->sram + cmd.data0; 3365 3366 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN); 3367 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i) 3368 hwkey[i] = swkey[i]; 3369 wmb(); 3370 3371 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED, 3372 &cmd); 3373 if (err != 0) { 3374 if_printf(ifp, "failed to update rsskey\n"); 3375 return err; 3376 } 3377 if (bootverbose) 3378 if_printf(ifp, "RSS key updated\n"); 3379 } 3380 3381 cmd.data0 = 1; 3382 if (sc->use_rss) { 3383 if (bootverbose) 3384 if_printf(ifp, "input hash: RSS\n"); 3385 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 | 3386 MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3387 } else { 3388 if (bootverbose) 3389 if_printf(ifp, "input hash: SRC_DST_PORT\n"); 3390 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 3391 } 3392 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3393 if (err != 0) { 3394 if_printf(ifp, "failed to enable slices\n"); 3395 return err; 3396 } 3397 } 3398 3399 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3400 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3401 if (err) { 3402 /* 3403 * Can't change TSO mode to NDIS, never allow TSO then 3404 */ 3405 if_printf(ifp, "failed to set TSO mode\n"); 3406 ifp->if_capenable &= ~IFCAP_TSO; 3407 ifp->if_capabilities &= ~IFCAP_TSO; 3408 ifp->if_hwassist &= ~CSUM_TSO; 3409 } 3410 3411 mxge_choose_params(ifp->if_mtu, &cl_size); 3412 3413 cmd.data0 = 1; 3414 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3415 /* 3416 * Error is only meaningful if we're trying to set 3417 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3418 */ 3419 3420 /* 3421 * Give the firmware the mtu and the big and small buffer 3422 * sizes. The firmware wants the big buf size to be a power 3423 * of two. Luckily, DragonFly's clusters are powers of two 3424 */ 3425 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3426 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3427 3428 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3429 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3430 3431 cmd.data0 = cl_size; 3432 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3433 3434 if (err != 0) { 3435 if_printf(ifp, "failed to setup params\n"); 3436 goto abort; 3437 } 3438 3439 /* Now give him the pointer to the stats block */ 3440 for (slice = 0; slice < sc->num_slices; slice++) { 3441 ss = &sc->ss[slice]; 3442 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3443 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3444 cmd.data2 = sizeof(struct mcp_irq_data); 3445 cmd.data2 |= (slice << 16); 3446 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3447 } 3448 3449 if (err != 0) { 3450 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3451 bus += offsetof(struct mcp_irq_data, send_done_count); 3452 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3453 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3454 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3455 &cmd); 3456 3457 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3458 sc->fw_multicast_support = 0; 3459 } else { 3460 sc->fw_multicast_support = 1; 3461 } 3462 3463 if (err != 0) { 3464 if_printf(ifp, "failed to setup params\n"); 3465 goto abort; 3466 } 3467 3468 for (slice = 0; slice < sc->num_slices; slice++) { 3469 err = mxge_slice_open(&sc->ss[slice], cl_size); 3470 if (err != 0) { 3471 if_printf(ifp, "couldn't open slice %d\n", slice); 3472 goto abort; 3473 } 3474 } 3475 3476 /* Finally, start the firmware running */ 3477 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3478 if (err) { 3479 if_printf(ifp, "Couldn't bring up link\n"); 3480 goto abort; 3481 } 3482 3483 ifp->if_flags |= IFF_RUNNING; 3484 for (i = 0; i < sc->num_tx_rings; ++i) { 3485 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3486 3487 ifsq_clr_oactive(tx->ifsq); 3488 ifsq_watchdog_start(&tx->watchdog); 3489 } 3490 3491 return 0; 3492 3493 abort: 3494 mxge_free_mbufs(sc); 3495 return err; 3496 } 3497 3498 static void 3499 mxge_close(mxge_softc_t *sc, int down) 3500 { 3501 struct ifnet *ifp = sc->ifp; 3502 mxge_cmd_t cmd; 3503 int err, old_down_cnt, i; 3504 3505 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3506 3507 if (!down) { 3508 old_down_cnt = sc->down_cnt; 3509 wmb(); 3510 3511 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3512 if (err) 3513 if_printf(ifp, "Couldn't bring down link\n"); 3514 3515 if (old_down_cnt == sc->down_cnt) { 3516 /* 3517 * Wait for down irq 3518 * XXX racy 3519 */ 3520 ifnet_deserialize_all(ifp); 3521 DELAY(10 * sc->intr_coal_delay); 3522 ifnet_serialize_all(ifp); 3523 } 3524 3525 wmb(); 3526 if (old_down_cnt == sc->down_cnt) 3527 if_printf(ifp, "never got down irq\n"); 3528 } 3529 mxge_free_mbufs(sc); 3530 3531 ifp->if_flags &= ~IFF_RUNNING; 3532 for (i = 0; i < sc->num_tx_rings; ++i) { 3533 mxge_tx_ring_t *tx = &sc->ss[i].tx; 3534 3535 ifsq_clr_oactive(tx->ifsq); 3536 ifsq_watchdog_stop(&tx->watchdog); 3537 } 3538 } 3539 3540 static void 3541 mxge_setup_cfg_space(mxge_softc_t *sc) 3542 { 3543 device_t dev = sc->dev; 3544 int reg; 3545 uint16_t lnk, pectl; 3546 3547 /* Find the PCIe link width and set max read request to 4KB */ 3548 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3549 lnk = pci_read_config(dev, reg + 0x12, 2); 3550 sc->link_width = (lnk >> 4) & 0x3f; 3551 3552 if (sc->pectl == 0) { 3553 pectl = pci_read_config(dev, reg + 0x8, 2); 3554 pectl = (pectl & ~0x7000) | (5 << 12); 3555 pci_write_config(dev, reg + 0x8, pectl, 2); 3556 sc->pectl = pectl; 3557 } else { 3558 /* Restore saved pectl after watchdog reset */ 3559 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3560 } 3561 } 3562 3563 /* Enable DMA and memory space access */ 3564 pci_enable_busmaster(dev); 3565 } 3566 3567 static uint32_t 3568 mxge_read_reboot(mxge_softc_t *sc) 3569 { 3570 device_t dev = sc->dev; 3571 uint32_t vs; 3572 3573 /* Find the vendor specific offset */ 3574 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3575 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3576 return (uint32_t)-1; 3577 } 3578 /* Enable read32 mode */ 3579 pci_write_config(dev, vs + 0x10, 0x3, 1); 3580 /* Tell NIC which register to read */ 3581 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3582 return pci_read_config(dev, vs + 0x14, 4); 3583 } 3584 3585 static void 3586 mxge_watchdog_reset(mxge_softc_t *sc) 3587 { 3588 struct pci_devinfo *dinfo; 3589 int err, running; 3590 uint32_t reboot; 3591 uint16_t cmd; 3592 3593 err = ENXIO; 3594 3595 if_printf(sc->ifp, "Watchdog reset!\n"); 3596 3597 /* 3598 * Check to see if the NIC rebooted. If it did, then all of 3599 * PCI config space has been reset, and things like the 3600 * busmaster bit will be zero. If this is the case, then we 3601 * must restore PCI config space before the NIC can be used 3602 * again 3603 */ 3604 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3605 if (cmd == 0xffff) { 3606 /* 3607 * Maybe the watchdog caught the NIC rebooting; wait 3608 * up to 100ms for it to finish. If it does not come 3609 * back, then give up 3610 */ 3611 DELAY(1000*100); 3612 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3613 if (cmd == 0xffff) 3614 if_printf(sc->ifp, "NIC disappeared!\n"); 3615 } 3616 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3617 /* Print the reboot status */ 3618 reboot = mxge_read_reboot(sc); 3619 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3620 3621 running = sc->ifp->if_flags & IFF_RUNNING; 3622 if (running) { 3623 /* 3624 * Quiesce NIC so that TX routines will not try to 3625 * xmit after restoration of BAR 3626 */ 3627 3628 /* Mark the link as down */ 3629 if (sc->link_state) { 3630 sc->ifp->if_link_state = LINK_STATE_DOWN; 3631 if_link_state_change(sc->ifp); 3632 } 3633 mxge_close(sc, 1); 3634 } 3635 /* Restore PCI configuration space */ 3636 dinfo = device_get_ivars(sc->dev); 3637 pci_cfg_restore(sc->dev, dinfo); 3638 3639 /* And redo any changes we made to our config space */ 3640 mxge_setup_cfg_space(sc); 3641 3642 /* Reload f/w */ 3643 err = mxge_load_firmware(sc, 0); 3644 if (err) 3645 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3646 if (running && !err) { 3647 int i; 3648 3649 err = mxge_open(sc); 3650 3651 for (i = 0; i < sc->num_tx_rings; ++i) 3652 ifsq_devstart_sched(sc->ss[i].tx.ifsq); 3653 } 3654 sc->watchdog_resets++; 3655 } else { 3656 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3657 err = 0; 3658 } 3659 if (err) { 3660 if_printf(sc->ifp, "watchdog reset failed\n"); 3661 } else { 3662 if (sc->dying == 2) 3663 sc->dying = 0; 3664 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3665 } 3666 } 3667 3668 static void 3669 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3670 { 3671 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3672 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3673 tx->req, tx->done, tx->queue_active); 3674 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3675 tx->activate, tx->deactivate); 3676 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3677 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3678 } 3679 3680 static u_long 3681 mxge_update_stats(mxge_softc_t *sc) 3682 { 3683 u_long ipackets, opackets, pkts; 3684 3685 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3686 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3687 3688 pkts = ipackets - sc->ipackets; 3689 pkts += opackets - sc->opackets; 3690 3691 sc->ipackets = ipackets; 3692 sc->opackets = opackets; 3693 3694 return pkts; 3695 } 3696 3697 static void 3698 mxge_tick(void *arg) 3699 { 3700 mxge_softc_t *sc = arg; 3701 u_long pkts = 0; 3702 int err = 0; 3703 int ticks; 3704 3705 lwkt_serialize_enter(&sc->main_serialize); 3706 3707 ticks = mxge_ticks; 3708 if (sc->ifp->if_flags & IFF_RUNNING) { 3709 /* Aggregate stats from different slices */ 3710 pkts = mxge_update_stats(sc); 3711 if (sc->need_media_probe) 3712 mxge_media_probe(sc); 3713 } 3714 if (pkts == 0) { 3715 uint16_t cmd; 3716 3717 /* Ensure NIC did not suffer h/w fault while idle */ 3718 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3719 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3720 sc->dying = 2; 3721 mxge_serialize_skipmain(sc); 3722 mxge_watchdog_reset(sc); 3723 mxge_deserialize_skipmain(sc); 3724 err = ENXIO; 3725 } 3726 3727 /* Look less often if NIC is idle */ 3728 ticks *= 4; 3729 } 3730 3731 if (err == 0) 3732 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3733 3734 lwkt_serialize_exit(&sc->main_serialize); 3735 } 3736 3737 static int 3738 mxge_media_change(struct ifnet *ifp) 3739 { 3740 return EINVAL; 3741 } 3742 3743 static int 3744 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3745 { 3746 struct ifnet *ifp = sc->ifp; 3747 int real_mtu, old_mtu; 3748 int err = 0; 3749 3750 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3751 if (mtu > sc->max_mtu || real_mtu < 60) 3752 return EINVAL; 3753 3754 old_mtu = ifp->if_mtu; 3755 ifp->if_mtu = mtu; 3756 if (ifp->if_flags & IFF_RUNNING) { 3757 mxge_close(sc, 0); 3758 err = mxge_open(sc); 3759 if (err != 0) { 3760 ifp->if_mtu = old_mtu; 3761 mxge_close(sc, 0); 3762 mxge_open(sc); 3763 } 3764 } 3765 return err; 3766 } 3767 3768 static void 3769 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3770 { 3771 mxge_softc_t *sc = ifp->if_softc; 3772 3773 3774 if (sc == NULL) 3775 return; 3776 ifmr->ifm_status = IFM_AVALID; 3777 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3778 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3779 ifmr->ifm_active |= sc->current_media; 3780 } 3781 3782 static int 3783 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3784 struct ucred *cr __unused) 3785 { 3786 mxge_softc_t *sc = ifp->if_softc; 3787 struct ifreq *ifr = (struct ifreq *)data; 3788 int err, mask; 3789 3790 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3791 err = 0; 3792 3793 switch (command) { 3794 case SIOCSIFMTU: 3795 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3796 break; 3797 3798 case SIOCSIFFLAGS: 3799 if (sc->dying) 3800 return EINVAL; 3801 3802 if (ifp->if_flags & IFF_UP) { 3803 if (!(ifp->if_flags & IFF_RUNNING)) { 3804 err = mxge_open(sc); 3805 } else { 3806 /* 3807 * Take care of PROMISC and ALLMULTI 3808 * flag changes 3809 */ 3810 mxge_change_promisc(sc, 3811 ifp->if_flags & IFF_PROMISC); 3812 mxge_set_multicast_list(sc); 3813 } 3814 } else { 3815 if (ifp->if_flags & IFF_RUNNING) 3816 mxge_close(sc, 0); 3817 } 3818 break; 3819 3820 case SIOCADDMULTI: 3821 case SIOCDELMULTI: 3822 mxge_set_multicast_list(sc); 3823 break; 3824 3825 case SIOCSIFCAP: 3826 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3827 if (mask & IFCAP_TXCSUM) { 3828 ifp->if_capenable ^= IFCAP_TXCSUM; 3829 if (ifp->if_capenable & IFCAP_TXCSUM) 3830 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3831 else 3832 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3833 } 3834 if (mask & IFCAP_TSO) { 3835 ifp->if_capenable ^= IFCAP_TSO; 3836 if (ifp->if_capenable & IFCAP_TSO) 3837 ifp->if_hwassist |= CSUM_TSO; 3838 else 3839 ifp->if_hwassist &= ~CSUM_TSO; 3840 } 3841 if (mask & IFCAP_RXCSUM) 3842 ifp->if_capenable ^= IFCAP_RXCSUM; 3843 if (mask & IFCAP_VLAN_HWTAGGING) 3844 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3845 break; 3846 3847 case SIOCGIFMEDIA: 3848 mxge_media_probe(sc); 3849 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3850 &sc->media, command); 3851 break; 3852 3853 default: 3854 err = ether_ioctl(ifp, command, data); 3855 break; 3856 } 3857 return err; 3858 } 3859 3860 static void 3861 mxge_fetch_tunables(mxge_softc_t *sc) 3862 { 3863 sc->intr_coal_delay = mxge_intr_coal_delay; 3864 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3865 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3866 3867 /* XXX */ 3868 if (mxge_ticks == 0) 3869 mxge_ticks = hz / 2; 3870 3871 sc->pause = mxge_flow_control; 3872 sc->use_rss = mxge_use_rss; 3873 3874 sc->throttle = mxge_throttle; 3875 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3876 sc->throttle = MXGE_MAX_THROTTLE; 3877 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3878 sc->throttle = MXGE_MIN_THROTTLE; 3879 } 3880 3881 static void 3882 mxge_free_slices(mxge_softc_t *sc) 3883 { 3884 struct mxge_slice_state *ss; 3885 int i; 3886 3887 if (sc->ss == NULL) 3888 return; 3889 3890 for (i = 0; i < sc->num_slices; i++) { 3891 ss = &sc->ss[i]; 3892 if (ss->fw_stats != NULL) { 3893 mxge_dma_free(&ss->fw_stats_dma); 3894 ss->fw_stats = NULL; 3895 } 3896 if (ss->rx_data.rx_done.entry != NULL) { 3897 mxge_dma_free(&ss->rx_done_dma); 3898 ss->rx_data.rx_done.entry = NULL; 3899 } 3900 } 3901 kfree(sc->ss, M_DEVBUF); 3902 sc->ss = NULL; 3903 } 3904 3905 static int 3906 mxge_alloc_slices(mxge_softc_t *sc) 3907 { 3908 mxge_cmd_t cmd; 3909 struct mxge_slice_state *ss; 3910 size_t bytes; 3911 int err, i, rx_ring_size; 3912 3913 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3914 if (err != 0) { 3915 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3916 return err; 3917 } 3918 rx_ring_size = cmd.data0; 3919 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3920 3921 bytes = sizeof(*sc->ss) * sc->num_slices; 3922 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3923 3924 for (i = 0; i < sc->num_slices; i++) { 3925 ss = &sc->ss[i]; 3926 3927 ss->sc = sc; 3928 3929 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3930 lwkt_serialize_init(&ss->tx.tx_serialize); 3931 ss->intr_rid = -1; 3932 3933 /* 3934 * Allocate per-slice rx interrupt queue 3935 * XXX assume 4bytes mcp_slot 3936 */ 3937 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3938 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3939 if (err != 0) { 3940 device_printf(sc->dev, 3941 "alloc %d slice rx_done failed\n", i); 3942 return err; 3943 } 3944 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3945 3946 /* 3947 * Allocate the per-slice firmware stats 3948 */ 3949 bytes = sizeof(*ss->fw_stats); 3950 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3951 sizeof(*ss->fw_stats), 64); 3952 if (err != 0) { 3953 device_printf(sc->dev, 3954 "alloc %d fw_stats failed\n", i); 3955 return err; 3956 } 3957 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3958 } 3959 return 0; 3960 } 3961 3962 static void 3963 mxge_slice_probe(mxge_softc_t *sc) 3964 { 3965 int status, max_intr_slots, max_slices, num_slices; 3966 int msix_cnt, msix_enable, i, multi_tx; 3967 mxge_cmd_t cmd; 3968 const char *old_fw; 3969 3970 sc->num_slices = 1; 3971 sc->num_tx_rings = 1; 3972 3973 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices); 3974 if (num_slices == 1) 3975 return; 3976 3977 if (ncpus2 == 1) 3978 return; 3979 3980 msix_enable = device_getenv_int(sc->dev, "msix.enable", 3981 mxge_msix_enable); 3982 if (!msix_enable) 3983 return; 3984 3985 msix_cnt = pci_msix_count(sc->dev); 3986 if (msix_cnt < 2) 3987 return; 3988 3989 /* 3990 * Round down MSI-X vector count to the nearest power of 2 3991 */ 3992 i = 0; 3993 while ((1 << (i + 1)) <= msix_cnt) 3994 ++i; 3995 msix_cnt = 1 << i; 3996 3997 /* 3998 * Now load the slice aware firmware see what it supports 3999 */ 4000 old_fw = sc->fw_name; 4001 if (old_fw == mxge_fw_aligned) 4002 sc->fw_name = mxge_fw_rss_aligned; 4003 else 4004 sc->fw_name = mxge_fw_rss_unaligned; 4005 status = mxge_load_firmware(sc, 0); 4006 if (status != 0) { 4007 device_printf(sc->dev, "Falling back to a single slice\n"); 4008 return; 4009 } 4010 4011 /* 4012 * Try to send a reset command to the card to see if it is alive 4013 */ 4014 memset(&cmd, 0, sizeof(cmd)); 4015 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4016 if (status != 0) { 4017 device_printf(sc->dev, "failed reset\n"); 4018 goto abort_with_fw; 4019 } 4020 4021 /* 4022 * Get rx ring size to calculate rx interrupt queue size 4023 */ 4024 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4025 if (status != 0) { 4026 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4027 goto abort_with_fw; 4028 } 4029 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t)); 4030 4031 /* 4032 * Tell it the size of the rx interrupt queue 4033 */ 4034 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot); 4035 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4036 if (status != 0) { 4037 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4038 goto abort_with_fw; 4039 } 4040 4041 /* 4042 * Ask the maximum number of slices it supports 4043 */ 4044 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4045 if (status != 0) { 4046 device_printf(sc->dev, 4047 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4048 goto abort_with_fw; 4049 } 4050 max_slices = cmd.data0; 4051 4052 /* 4053 * Round down max slices count to the nearest power of 2 4054 */ 4055 i = 0; 4056 while ((1 << (i + 1)) <= max_slices) 4057 ++i; 4058 max_slices = 1 << i; 4059 4060 if (max_slices > msix_cnt) 4061 max_slices = msix_cnt; 4062 4063 sc->num_slices = num_slices; 4064 sc->num_slices = if_ring_count2(sc->num_slices, max_slices); 4065 4066 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx); 4067 if (multi_tx) 4068 sc->num_tx_rings = sc->num_slices; 4069 4070 if (bootverbose) { 4071 device_printf(sc->dev, "using %d slices, max %d\n", 4072 sc->num_slices, max_slices); 4073 } 4074 4075 if (sc->num_slices == 1) 4076 goto abort_with_fw; 4077 return; 4078 4079 abort_with_fw: 4080 sc->fw_name = old_fw; 4081 mxge_load_firmware(sc, 0); 4082 } 4083 4084 static void 4085 mxge_setup_serialize(struct mxge_softc *sc) 4086 { 4087 int i = 0, slice; 4088 4089 /* Main + rx + tx */ 4090 sc->nserialize = (2 * sc->num_slices) + 1; 4091 sc->serializes = 4092 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4093 M_DEVBUF, M_WAITOK | M_ZERO); 4094 4095 /* 4096 * Setup serializes 4097 * 4098 * NOTE: Order is critical 4099 */ 4100 4101 KKASSERT(i < sc->nserialize); 4102 sc->serializes[i++] = &sc->main_serialize; 4103 4104 for (slice = 0; slice < sc->num_slices; ++slice) { 4105 KKASSERT(i < sc->nserialize); 4106 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4107 } 4108 4109 for (slice = 0; slice < sc->num_slices; ++slice) { 4110 KKASSERT(i < sc->nserialize); 4111 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4112 } 4113 4114 KKASSERT(i == sc->nserialize); 4115 } 4116 4117 static void 4118 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4119 { 4120 struct mxge_softc *sc = ifp->if_softc; 4121 4122 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4123 } 4124 4125 static void 4126 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4127 { 4128 struct mxge_softc *sc = ifp->if_softc; 4129 4130 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4131 } 4132 4133 static int 4134 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4135 { 4136 struct mxge_softc *sc = ifp->if_softc; 4137 4138 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4139 } 4140 4141 #ifdef INVARIANTS 4142 4143 static void 4144 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4145 boolean_t serialized) 4146 { 4147 struct mxge_softc *sc = ifp->if_softc; 4148 4149 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4150 slz, serialized); 4151 } 4152 4153 #endif /* INVARIANTS */ 4154 4155 #ifdef IFPOLL_ENABLE 4156 4157 static void 4158 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle) 4159 { 4160 struct mxge_slice_state *ss = xss; 4161 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 4162 4163 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize); 4164 4165 if (rx_done->entry[rx_done->idx].length != 0) { 4166 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle); 4167 } else { 4168 /* 4169 * XXX 4170 * This register writting obviously has cost, 4171 * however, if we don't hand back the rx token, 4172 * the upcoming packets may suffer rediculously 4173 * large delay, as observed on 8AL-C using ping(8). 4174 */ 4175 *ss->irq_claim = be32toh(3); 4176 } 4177 } 4178 4179 static void 4180 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4181 { 4182 struct mxge_softc *sc = ifp->if_softc; 4183 int i; 4184 4185 if (info == NULL) 4186 return; 4187 4188 /* 4189 * Only poll rx; polling tx and status don't seem to work 4190 */ 4191 for (i = 0; i < sc->num_slices; ++i) { 4192 struct mxge_slice_state *ss = &sc->ss[i]; 4193 int idx = ss->intr_cpuid; 4194 4195 KKASSERT(idx < ncpus2); 4196 info->ifpi_rx[idx].poll_func = mxge_npoll_rx; 4197 info->ifpi_rx[idx].arg = ss; 4198 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize; 4199 } 4200 } 4201 4202 #endif /* IFPOLL_ENABLE */ 4203 4204 static int 4205 mxge_attach(device_t dev) 4206 { 4207 mxge_softc_t *sc = device_get_softc(dev); 4208 struct ifnet *ifp = &sc->arpcom.ac_if; 4209 int err, rid, i; 4210 4211 /* 4212 * Avoid rewriting half the lines in this file to use 4213 * &sc->arpcom.ac_if instead 4214 */ 4215 sc->ifp = ifp; 4216 sc->dev = dev; 4217 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4218 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); 4219 4220 lwkt_serialize_init(&sc->main_serialize); 4221 4222 mxge_fetch_tunables(sc); 4223 4224 err = bus_dma_tag_create(NULL, /* parent */ 4225 1, /* alignment */ 4226 0, /* boundary */ 4227 BUS_SPACE_MAXADDR, /* low */ 4228 BUS_SPACE_MAXADDR, /* high */ 4229 NULL, NULL, /* filter */ 4230 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4231 0, /* num segs */ 4232 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4233 0, /* flags */ 4234 &sc->parent_dmat); /* tag */ 4235 if (err != 0) { 4236 device_printf(dev, "Err %d allocating parent dmat\n", err); 4237 goto failed; 4238 } 4239 4240 callout_init_mp(&sc->co_hdl); 4241 4242 mxge_setup_cfg_space(sc); 4243 4244 /* 4245 * Map the board into the kernel 4246 */ 4247 rid = PCIR_BARS; 4248 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4249 &rid, RF_ACTIVE); 4250 if (sc->mem_res == NULL) { 4251 device_printf(dev, "could not map memory\n"); 4252 err = ENXIO; 4253 goto failed; 4254 } 4255 4256 sc->sram = rman_get_virtual(sc->mem_res); 4257 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4258 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4259 device_printf(dev, "impossible memory region size %ld\n", 4260 rman_get_size(sc->mem_res)); 4261 err = ENXIO; 4262 goto failed; 4263 } 4264 4265 /* 4266 * Make NULL terminated copy of the EEPROM strings section of 4267 * lanai SRAM 4268 */ 4269 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4270 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4271 rman_get_bushandle(sc->mem_res), 4272 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4273 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4274 err = mxge_parse_strings(sc); 4275 if (err != 0) { 4276 device_printf(dev, "parse EEPROM string failed\n"); 4277 goto failed; 4278 } 4279 4280 /* 4281 * Enable write combining for efficient use of PCIe bus 4282 */ 4283 mxge_enable_wc(sc); 4284 4285 /* 4286 * Allocate the out of band DMA memory 4287 */ 4288 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4289 if (err != 0) { 4290 device_printf(dev, "alloc cmd DMA buf failed\n"); 4291 goto failed; 4292 } 4293 sc->cmd = sc->cmd_dma.dmem_addr; 4294 4295 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4296 if (err != 0) { 4297 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4298 goto failed; 4299 } 4300 4301 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4302 if (err != 0) { 4303 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4304 goto failed; 4305 } 4306 4307 /* Select & load the firmware */ 4308 err = mxge_select_firmware(sc); 4309 if (err != 0) { 4310 device_printf(dev, "select firmware failed\n"); 4311 goto failed; 4312 } 4313 4314 mxge_slice_probe(sc); 4315 err = mxge_alloc_slices(sc); 4316 if (err != 0) { 4317 device_printf(dev, "alloc slices failed\n"); 4318 goto failed; 4319 } 4320 4321 err = mxge_alloc_intr(sc); 4322 if (err != 0) { 4323 device_printf(dev, "alloc intr failed\n"); 4324 goto failed; 4325 } 4326 4327 /* Setup serializes */ 4328 mxge_setup_serialize(sc); 4329 4330 err = mxge_reset(sc, 0); 4331 if (err != 0) { 4332 device_printf(dev, "reset failed\n"); 4333 goto failed; 4334 } 4335 4336 err = mxge_alloc_rings(sc); 4337 if (err != 0) { 4338 device_printf(dev, "failed to allocate rings\n"); 4339 goto failed; 4340 } 4341 4342 ifp->if_baudrate = IF_Gbps(10UL); 4343 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4344 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4345 4346 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4347 #if 0 4348 /* Well, its software, sigh */ 4349 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4350 #endif 4351 ifp->if_capenable = ifp->if_capabilities; 4352 4353 ifp->if_softc = sc; 4354 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4355 ifp->if_init = mxge_init; 4356 ifp->if_ioctl = mxge_ioctl; 4357 ifp->if_start = mxge_start; 4358 #ifdef IFPOLL_ENABLE 4359 if (sc->intr_type != PCI_INTR_TYPE_LEGACY) 4360 ifp->if_npoll = mxge_npoll; 4361 #endif 4362 ifp->if_serialize = mxge_serialize; 4363 ifp->if_deserialize = mxge_deserialize; 4364 ifp->if_tryserialize = mxge_tryserialize; 4365 #ifdef INVARIANTS 4366 ifp->if_serialize_assert = mxge_serialize_assert; 4367 #endif 4368 4369 /* Increase TSO burst length */ 4370 ifp->if_tsolen = (32 * ETHERMTU); 4371 4372 /* Initialise the ifmedia structure */ 4373 mxge_media_init(sc); 4374 mxge_media_probe(sc); 4375 4376 ether_ifattach(ifp, sc->mac_addr, NULL); 4377 4378 /* Setup TX rings and subqueues */ 4379 for (i = 0; i < sc->num_tx_rings; ++i) { 4380 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 4381 struct mxge_slice_state *ss = &sc->ss[i]; 4382 4383 ifsq_set_cpuid(ifsq, ss->intr_cpuid); 4384 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize); 4385 ifsq_set_priv(ifsq, &ss->tx); 4386 ss->tx.ifsq = ifsq; 4387 4388 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog); 4389 } 4390 4391 /* 4392 * XXX 4393 * We are not ready to do "gather" jumbo frame, so 4394 * limit MTU to MJUMPAGESIZE 4395 */ 4396 sc->max_mtu = MJUMPAGESIZE - 4397 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4398 sc->dying = 0; 4399 4400 err = mxge_setup_intr(sc); 4401 if (err != 0) { 4402 device_printf(dev, "alloc and setup intr failed\n"); 4403 ether_ifdetach(ifp); 4404 goto failed; 4405 } 4406 4407 mxge_add_sysctls(sc); 4408 4409 /* Increase non-cluster mbuf limit; used by small RX rings */ 4410 mb_inclimit(ifp->if_nmbclusters); 4411 4412 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4413 sc->ss[0].intr_cpuid); 4414 return 0; 4415 4416 failed: 4417 mxge_detach(dev); 4418 return err; 4419 } 4420 4421 static int 4422 mxge_detach(device_t dev) 4423 { 4424 mxge_softc_t *sc = device_get_softc(dev); 4425 4426 if (device_is_attached(dev)) { 4427 struct ifnet *ifp = sc->ifp; 4428 int mblimit = ifp->if_nmbclusters; 4429 4430 ifnet_serialize_all(ifp); 4431 4432 sc->dying = 1; 4433 if (ifp->if_flags & IFF_RUNNING) 4434 mxge_close(sc, 1); 4435 callout_stop(&sc->co_hdl); 4436 4437 mxge_teardown_intr(sc, sc->num_slices); 4438 4439 ifnet_deserialize_all(ifp); 4440 4441 callout_terminate(&sc->co_hdl); 4442 4443 ether_ifdetach(ifp); 4444 4445 /* Decrease non-cluster mbuf limit increased by us */ 4446 mb_inclimit(-mblimit); 4447 } 4448 ifmedia_removeall(&sc->media); 4449 4450 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4451 sc->sram != NULL) 4452 mxge_dummy_rdma(sc, 0); 4453 4454 mxge_free_intr(sc); 4455 mxge_rem_sysctls(sc); 4456 mxge_free_rings(sc); 4457 4458 /* MUST after sysctls, intr and rings are freed */ 4459 mxge_free_slices(sc); 4460 4461 if (sc->dmabench_dma.dmem_addr != NULL) 4462 mxge_dma_free(&sc->dmabench_dma); 4463 if (sc->zeropad_dma.dmem_addr != NULL) 4464 mxge_dma_free(&sc->zeropad_dma); 4465 if (sc->cmd_dma.dmem_addr != NULL) 4466 mxge_dma_free(&sc->cmd_dma); 4467 4468 if (sc->msix_table_res != NULL) { 4469 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2), 4470 sc->msix_table_res); 4471 } 4472 if (sc->mem_res != NULL) { 4473 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4474 sc->mem_res); 4475 } 4476 4477 if (sc->parent_dmat != NULL) 4478 bus_dma_tag_destroy(sc->parent_dmat); 4479 4480 return 0; 4481 } 4482 4483 static int 4484 mxge_shutdown(device_t dev) 4485 { 4486 return 0; 4487 } 4488 4489 static void 4490 mxge_free_msix(struct mxge_softc *sc, boolean_t setup) 4491 { 4492 int i; 4493 4494 KKASSERT(sc->num_slices > 1); 4495 4496 for (i = 0; i < sc->num_slices; ++i) { 4497 struct mxge_slice_state *ss = &sc->ss[i]; 4498 4499 if (ss->intr_res != NULL) { 4500 bus_release_resource(sc->dev, SYS_RES_IRQ, 4501 ss->intr_rid, ss->intr_res); 4502 } 4503 if (ss->intr_rid >= 0) 4504 pci_release_msix_vector(sc->dev, ss->intr_rid); 4505 } 4506 if (setup) 4507 pci_teardown_msix(sc->dev); 4508 } 4509 4510 static int 4511 mxge_alloc_msix(struct mxge_softc *sc) 4512 { 4513 struct mxge_slice_state *ss; 4514 int offset, rid, error, i; 4515 boolean_t setup = FALSE; 4516 4517 KKASSERT(sc->num_slices > 1); 4518 4519 if (sc->num_slices == ncpus2) { 4520 offset = 0; 4521 } else { 4522 int offset_def; 4523 4524 offset_def = (sc->num_slices * device_get_unit(sc->dev)) % 4525 ncpus2; 4526 4527 offset = device_getenv_int(sc->dev, "msix.offset", offset_def); 4528 if (offset >= ncpus2 || 4529 offset % sc->num_slices != 0) { 4530 device_printf(sc->dev, "invalid msix.offset %d, " 4531 "use %d\n", offset, offset_def); 4532 offset = offset_def; 4533 } 4534 } 4535 4536 ss = &sc->ss[0]; 4537 4538 ss->intr_serialize = &sc->main_serialize; 4539 ss->intr_func = mxge_msi; 4540 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4541 "%s comb", device_get_nameunit(sc->dev)); 4542 ss->intr_desc = ss->intr_desc0; 4543 ss->intr_cpuid = offset; 4544 4545 for (i = 1; i < sc->num_slices; ++i) { 4546 ss = &sc->ss[i]; 4547 4548 ss->intr_serialize = &ss->rx_data.rx_serialize; 4549 if (sc->num_tx_rings == 1) { 4550 ss->intr_func = mxge_msix_rx; 4551 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4552 "%s rx", device_get_nameunit(sc->dev)); 4553 } else { 4554 ss->intr_func = mxge_msix_rxtx; 4555 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0), 4556 "%s rxtx", device_get_nameunit(sc->dev)); 4557 } 4558 ss->intr_desc = ss->intr_desc0; 4559 ss->intr_cpuid = offset + i; 4560 } 4561 4562 rid = PCIR_BAR(2); 4563 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4564 &rid, RF_ACTIVE); 4565 if (sc->msix_table_res == NULL) { 4566 device_printf(sc->dev, "couldn't alloc MSI-X table res\n"); 4567 return ENXIO; 4568 } 4569 4570 error = pci_setup_msix(sc->dev); 4571 if (error) { 4572 device_printf(sc->dev, "could not setup MSI-X\n"); 4573 goto back; 4574 } 4575 setup = TRUE; 4576 4577 for (i = 0; i < sc->num_slices; ++i) { 4578 ss = &sc->ss[i]; 4579 4580 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid, 4581 ss->intr_cpuid); 4582 if (error) { 4583 device_printf(sc->dev, "could not alloc " 4584 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid); 4585 goto back; 4586 } 4587 4588 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4589 &ss->intr_rid, RF_ACTIVE); 4590 if (ss->intr_res == NULL) { 4591 device_printf(sc->dev, "could not alloc " 4592 "MSI-X %d resource\n", i); 4593 error = ENXIO; 4594 goto back; 4595 } 4596 } 4597 4598 pci_enable_msix(sc->dev); 4599 sc->intr_type = PCI_INTR_TYPE_MSIX; 4600 back: 4601 if (error) 4602 mxge_free_msix(sc, setup); 4603 return error; 4604 } 4605 4606 static int 4607 mxge_alloc_intr(struct mxge_softc *sc) 4608 { 4609 struct mxge_slice_state *ss; 4610 u_int irq_flags; 4611 4612 if (sc->num_slices > 1) { 4613 int error; 4614 4615 error = mxge_alloc_msix(sc); 4616 if (error) 4617 return error; 4618 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX); 4619 return 0; 4620 } 4621 4622 ss = &sc->ss[0]; 4623 4624 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 4625 &ss->intr_rid, &irq_flags); 4626 4627 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4628 &ss->intr_rid, irq_flags); 4629 if (ss->intr_res == NULL) { 4630 device_printf(sc->dev, "could not alloc interrupt\n"); 4631 return ENXIO; 4632 } 4633 4634 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) 4635 ss->intr_func = mxge_legacy; 4636 else 4637 ss->intr_func = mxge_msi; 4638 ss->intr_serialize = &sc->main_serialize; 4639 ss->intr_cpuid = rman_get_cpuid(ss->intr_res); 4640 4641 return 0; 4642 } 4643 4644 static int 4645 mxge_setup_intr(struct mxge_softc *sc) 4646 { 4647 int i; 4648 4649 for (i = 0; i < sc->num_slices; ++i) { 4650 struct mxge_slice_state *ss = &sc->ss[i]; 4651 int error; 4652 4653 error = bus_setup_intr_descr(sc->dev, ss->intr_res, 4654 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand, 4655 ss->intr_serialize, ss->intr_desc); 4656 if (error) { 4657 device_printf(sc->dev, "can't setup %dth intr\n", i); 4658 mxge_teardown_intr(sc, i); 4659 return error; 4660 } 4661 } 4662 return 0; 4663 } 4664 4665 static void 4666 mxge_teardown_intr(struct mxge_softc *sc, int cnt) 4667 { 4668 int i; 4669 4670 if (sc->ss == NULL) 4671 return; 4672 4673 for (i = 0; i < cnt; ++i) { 4674 struct mxge_slice_state *ss = &sc->ss[i]; 4675 4676 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand); 4677 } 4678 } 4679 4680 static void 4681 mxge_free_intr(struct mxge_softc *sc) 4682 { 4683 if (sc->ss == NULL) 4684 return; 4685 4686 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4687 struct mxge_slice_state *ss = &sc->ss[0]; 4688 4689 if (ss->intr_res != NULL) { 4690 bus_release_resource(sc->dev, SYS_RES_IRQ, 4691 ss->intr_rid, ss->intr_res); 4692 } 4693 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4694 pci_release_msi(sc->dev); 4695 } else { 4696 mxge_free_msix(sc, TRUE); 4697 } 4698 } 4699