1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_inet.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/in_cksum.h> 40 #include <sys/sockio.h> 41 #include <sys/mbuf.h> 42 #include <sys/malloc.h> 43 #include <sys/kernel.h> 44 #include <sys/module.h> 45 #include <sys/serialize.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ifq_var.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/vlan/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <sys/bus.h> 68 #include <sys/rman.h> 69 70 #include <bus/pci/pcireg.h> 71 #include <bus/pci/pcivar.h> 72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 73 74 #include <vm/vm.h> /* for pmap_mapdev() */ 75 #include <vm/pmap.h> 76 77 #if defined(__i386__) || defined(__x86_64__) 78 #include <machine/specialreg.h> 79 #endif 80 81 #include <dev/netif/mxge/mxge_mcp.h> 82 #include <dev/netif/mxge/mcp_gen_header.h> 83 #include <dev/netif/mxge/if_mxge_var.h> 84 85 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 86 87 /* tunable params */ 88 static int mxge_nvidia_ecrc_enable = 1; 89 static int mxge_force_firmware = 0; 90 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_ticks; 94 static int mxge_max_slices = 1; 95 static int mxge_always_promisc = 0; 96 static int mxge_throttle = 0; 97 static int mxge_msi_enable = 1; 98 99 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 100 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 101 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 102 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 103 104 TUNABLE_INT("hw.mxge.max_slices", &mxge_max_slices); 105 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control); 106 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 107 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 108 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 109 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 110 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 111 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 112 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 113 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 114 115 static int mxge_probe(device_t dev); 116 static int mxge_attach(device_t dev); 117 static int mxge_detach(device_t dev); 118 static int mxge_shutdown(device_t dev); 119 120 static device_method_t mxge_methods[] = { 121 /* Device interface */ 122 DEVMETHOD(device_probe, mxge_probe), 123 DEVMETHOD(device_attach, mxge_attach), 124 DEVMETHOD(device_detach, mxge_detach), 125 DEVMETHOD(device_shutdown, mxge_shutdown), 126 DEVMETHOD_END 127 }; 128 129 static driver_t mxge_driver = { 130 "mxge", 131 mxge_methods, 132 sizeof(mxge_softc_t), 133 }; 134 135 static devclass_t mxge_devclass; 136 137 /* Declare ourselves to be a child of the PCI bus.*/ 138 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 139 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 140 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 141 142 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 143 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 144 static void mxge_close(mxge_softc_t *sc, int down); 145 static int mxge_open(mxge_softc_t *sc); 146 static void mxge_tick(void *arg); 147 static void mxge_watchdog_reset(mxge_softc_t *sc); 148 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 149 150 static int 151 mxge_probe(device_t dev) 152 { 153 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 154 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 155 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 156 int rev = pci_get_revid(dev); 157 158 switch (rev) { 159 case MXGE_PCI_REV_Z8E: 160 device_set_desc(dev, "Myri10G-PCIE-8A"); 161 break; 162 case MXGE_PCI_REV_Z8ES: 163 device_set_desc(dev, "Myri10G-PCIE-8B"); 164 break; 165 default: 166 device_set_desc(dev, "Myri10G-PCIE-8??"); 167 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 168 break; 169 } 170 return 0; 171 } 172 return ENXIO; 173 } 174 175 static void 176 mxge_enable_wc(mxge_softc_t *sc) 177 { 178 #if defined(__i386__) || defined(__x86_64__) 179 vm_offset_t len; 180 181 sc->wc = 1; 182 len = rman_get_size(sc->mem_res); 183 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 184 PAT_WRITE_COMBINING); 185 #endif 186 } 187 188 static int 189 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 190 bus_size_t alignment) 191 { 192 bus_size_t boundary; 193 int err; 194 195 if (bytes > 4096 && alignment == 4096) 196 boundary = 0; 197 else 198 boundary = 4096; 199 200 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 201 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 202 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 203 if (err != 0) { 204 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 205 return err; 206 } 207 return 0; 208 } 209 210 static void 211 mxge_dma_free(bus_dmamem_t *dma) 212 { 213 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 214 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 215 bus_dma_tag_destroy(dma->dmem_tag); 216 } 217 218 /* 219 * The eeprom strings on the lanaiX have the format 220 * SN=x\0 221 * MAC=x:x:x:x:x:x\0 222 * PC=text\0 223 */ 224 static int 225 mxge_parse_strings(mxge_softc_t *sc) 226 { 227 const char *ptr; 228 int i, found_mac, found_sn2; 229 char *endptr; 230 231 ptr = sc->eeprom_strings; 232 found_mac = 0; 233 found_sn2 = 0; 234 while (*ptr != '\0') { 235 if (strncmp(ptr, "MAC=", 4) == 0) { 236 ptr += 4; 237 for (i = 0;;) { 238 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 239 if (endptr - ptr != 2) 240 goto abort; 241 ptr = endptr; 242 if (++i == 6) 243 break; 244 if (*ptr++ != ':') 245 goto abort; 246 } 247 found_mac = 1; 248 } else if (strncmp(ptr, "PC=", 3) == 0) { 249 ptr += 3; 250 strlcpy(sc->product_code_string, ptr, 251 sizeof(sc->product_code_string)); 252 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 253 ptr += 3; 254 strlcpy(sc->serial_number_string, ptr, 255 sizeof(sc->serial_number_string)); 256 } else if (strncmp(ptr, "SN2=", 4) == 0) { 257 /* SN2 takes precedence over SN */ 258 ptr += 4; 259 found_sn2 = 1; 260 strlcpy(sc->serial_number_string, ptr, 261 sizeof(sc->serial_number_string)); 262 } 263 while (*ptr++ != '\0') {} 264 } 265 266 if (found_mac) 267 return 0; 268 269 abort: 270 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 271 return ENXIO; 272 } 273 274 #if defined(__i386__) || defined(__x86_64__) 275 276 static void 277 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 278 { 279 uint32_t val; 280 unsigned long base, off; 281 char *va, *cfgptr; 282 device_t pdev, mcp55; 283 uint16_t vendor_id, device_id, word; 284 uintptr_t bus, slot, func, ivend, idev; 285 uint32_t *ptr32; 286 287 if (!mxge_nvidia_ecrc_enable) 288 return; 289 290 pdev = device_get_parent(device_get_parent(sc->dev)); 291 if (pdev == NULL) { 292 device_printf(sc->dev, "could not find parent?\n"); 293 return; 294 } 295 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 296 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 297 298 if (vendor_id != 0x10de) 299 return; 300 301 base = 0; 302 303 if (device_id == 0x005d) { 304 /* ck804, base address is magic */ 305 base = 0xe0000000UL; 306 } else if (device_id >= 0x0374 && device_id <= 0x378) { 307 /* mcp55, base address stored in chipset */ 308 mcp55 = pci_find_bsf(0, 0, 0); 309 if (mcp55 && 310 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 311 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 312 word = pci_read_config(mcp55, 0x90, 2); 313 base = ((unsigned long)word & 0x7ffeU) << 25; 314 } 315 } 316 if (!base) 317 return; 318 319 /* 320 * XXXX 321 * Test below is commented because it is believed that doing 322 * config read/write beyond 0xff will access the config space 323 * for the next larger function. Uncomment this and remove 324 * the hacky pmap_mapdev() way of accessing config space when 325 * DragonFly grows support for extended pcie config space access. 326 */ 327 #if 0 328 /* 329 * See if we can, by some miracle, access the extended 330 * config space 331 */ 332 val = pci_read_config(pdev, 0x178, 4); 333 if (val != 0xffffffff) { 334 val |= 0x40; 335 pci_write_config(pdev, 0x178, val, 4); 336 return; 337 } 338 #endif 339 /* 340 * Rather than using normal pci config space writes, we must 341 * map the Nvidia config space ourselves. This is because on 342 * opteron/nvidia class machine the 0xe000000 mapping is 343 * handled by the nvidia chipset, that means the internal PCI 344 * device (the on-chip northbridge), or the amd-8131 bridge 345 * and things behind them are not visible by this method. 346 */ 347 348 BUS_READ_IVAR(device_get_parent(pdev), pdev, 349 PCI_IVAR_BUS, &bus); 350 BUS_READ_IVAR(device_get_parent(pdev), pdev, 351 PCI_IVAR_SLOT, &slot); 352 BUS_READ_IVAR(device_get_parent(pdev), pdev, 353 PCI_IVAR_FUNCTION, &func); 354 BUS_READ_IVAR(device_get_parent(pdev), pdev, 355 PCI_IVAR_VENDOR, &ivend); 356 BUS_READ_IVAR(device_get_parent(pdev), pdev, 357 PCI_IVAR_DEVICE, &idev); 358 359 off = base + 0x00100000UL * (unsigned long)bus + 360 0x00001000UL * (unsigned long)(func + 8 * slot); 361 362 /* map it into the kernel */ 363 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 364 if (va == NULL) { 365 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 366 return; 367 } 368 /* get a pointer to the config space mapped into the kernel */ 369 cfgptr = va + (off & PAGE_MASK); 370 371 /* make sure that we can really access it */ 372 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 373 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 374 if (!(vendor_id == ivend && device_id == idev)) { 375 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 376 vendor_id, device_id); 377 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 378 return; 379 } 380 381 ptr32 = (uint32_t*)(cfgptr + 0x178); 382 val = *ptr32; 383 384 if (val == 0xffffffff) { 385 device_printf(sc->dev, "extended mapping failed\n"); 386 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 387 return; 388 } 389 *ptr32 = val | 0x40; 390 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 391 if (bootverbose) { 392 device_printf(sc->dev, "Enabled ECRC on upstream " 393 "Nvidia bridge at %d:%d:%d\n", 394 (int)bus, (int)slot, (int)func); 395 } 396 } 397 398 #else /* __i386__ || __x86_64__ */ 399 400 static void 401 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 402 { 403 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 404 } 405 406 #endif 407 408 static int 409 mxge_dma_test(mxge_softc_t *sc, int test_type) 410 { 411 mxge_cmd_t cmd; 412 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 413 int status; 414 uint32_t len; 415 const char *test = " "; 416 417 /* 418 * Run a small DMA test. 419 * The magic multipliers to the length tell the firmware 420 * to do DMA read, write, or read+write tests. The 421 * results are returned in cmd.data0. The upper 16 422 * bits of the return is the number of transfers completed. 423 * The lower 16 bits is the time in 0.5us ticks that the 424 * transfers took to complete. 425 */ 426 427 len = sc->tx_boundary; 428 429 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 430 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 431 cmd.data2 = len * 0x10000; 432 status = mxge_send_cmd(sc, test_type, &cmd); 433 if (status != 0) { 434 test = "read"; 435 goto abort; 436 } 437 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 438 439 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 440 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 441 cmd.data2 = len * 0x1; 442 status = mxge_send_cmd(sc, test_type, &cmd); 443 if (status != 0) { 444 test = "write"; 445 goto abort; 446 } 447 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 448 449 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 450 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 451 cmd.data2 = len * 0x10001; 452 status = mxge_send_cmd(sc, test_type, &cmd); 453 if (status != 0) { 454 test = "read/write"; 455 goto abort; 456 } 457 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 458 (cmd.data0 & 0xffff); 459 460 abort: 461 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 462 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 463 test, status); 464 } 465 return status; 466 } 467 468 /* 469 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 470 * when the PCI-E Completion packets are aligned on an 8-byte 471 * boundary. Some PCI-E chip sets always align Completion packets; on 472 * the ones that do not, the alignment can be enforced by enabling 473 * ECRC generation (if supported). 474 * 475 * When PCI-E Completion packets are not aligned, it is actually more 476 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 477 * 478 * If the driver can neither enable ECRC nor verify that it has 479 * already been enabled, then it must use a firmware image which works 480 * around unaligned completion packets (ethp_z8e.dat), and it should 481 * also ensure that it never gives the device a Read-DMA which is 482 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 483 * enabled, then the driver should use the aligned (eth_z8e.dat) 484 * firmware image, and set tx_boundary to 4KB. 485 */ 486 static int 487 mxge_firmware_probe(mxge_softc_t *sc) 488 { 489 device_t dev = sc->dev; 490 int reg, status; 491 uint16_t pectl; 492 493 sc->tx_boundary = 4096; 494 495 /* 496 * Verify the max read request size was set to 4KB 497 * before trying the test with 4KB. 498 */ 499 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 500 pectl = pci_read_config(dev, reg + 0x8, 2); 501 if ((pectl & (5 << 12)) != (5 << 12)) { 502 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 503 pectl); 504 sc->tx_boundary = 2048; 505 } 506 } 507 508 /* 509 * Load the optimized firmware (which assumes aligned PCIe 510 * completions) in order to see if it works on this host. 511 */ 512 sc->fw_name = mxge_fw_aligned; 513 status = mxge_load_firmware(sc, 1); 514 if (status != 0) 515 return status; 516 517 /* 518 * Enable ECRC if possible 519 */ 520 mxge_enable_nvidia_ecrc(sc); 521 522 /* 523 * Run a DMA test which watches for unaligned completions and 524 * aborts on the first one seen. Not required on Z8ES or newer. 525 */ 526 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 527 return 0; 528 529 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 530 if (status == 0) 531 return 0; /* keep the aligned firmware */ 532 533 if (status != E2BIG) 534 device_printf(dev, "DMA test failed: %d\n", status); 535 if (status == ENOSYS) { 536 device_printf(dev, "Falling back to ethp! " 537 "Please install up to date fw\n"); 538 } 539 return status; 540 } 541 542 static int 543 mxge_select_firmware(mxge_softc_t *sc) 544 { 545 int aligned = 0; 546 int force_firmware = mxge_force_firmware; 547 548 if (sc->throttle) 549 force_firmware = sc->throttle; 550 551 if (force_firmware != 0) { 552 if (force_firmware == 1) 553 aligned = 1; 554 else 555 aligned = 0; 556 if (bootverbose) { 557 device_printf(sc->dev, 558 "Assuming %s completions (forced)\n", 559 aligned ? "aligned" : "unaligned"); 560 } 561 goto abort; 562 } 563 564 /* 565 * If the PCIe link width is 4 or less, we can use the aligned 566 * firmware and skip any checks 567 */ 568 if (sc->link_width != 0 && sc->link_width <= 4) { 569 device_printf(sc->dev, "PCIe x%d Link, " 570 "expect reduced performance\n", sc->link_width); 571 aligned = 1; 572 goto abort; 573 } 574 575 if (mxge_firmware_probe(sc) == 0) 576 return 0; 577 578 abort: 579 if (aligned) { 580 sc->fw_name = mxge_fw_aligned; 581 sc->tx_boundary = 4096; 582 } else { 583 sc->fw_name = mxge_fw_unaligned; 584 sc->tx_boundary = 2048; 585 } 586 return mxge_load_firmware(sc, 0); 587 } 588 589 static int 590 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 591 { 592 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 593 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 594 be32toh(hdr->mcp_type)); 595 return EIO; 596 } 597 598 /* Save firmware version for sysctl */ 599 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 600 if (bootverbose) 601 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 602 603 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 604 &sc->fw_ver_minor, &sc->fw_ver_tiny); 605 606 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 607 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 608 if_printf(sc->ifp, "Found firmware version %s\n", 609 sc->fw_version); 610 if_printf(sc->ifp, "Driver needs %d.%d\n", 611 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 612 return EINVAL; 613 } 614 return 0; 615 } 616 617 static void * 618 z_alloc(void *nil, u_int items, u_int size) 619 { 620 return kmalloc(items * size, M_TEMP, M_WAITOK); 621 } 622 623 static void 624 z_free(void *nil, void *ptr) 625 { 626 kfree(ptr, M_TEMP); 627 } 628 629 static int 630 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 631 { 632 z_stream zs; 633 char *inflate_buffer; 634 const struct firmware *fw; 635 const mcp_gen_header_t *hdr; 636 unsigned hdr_offset; 637 int status; 638 unsigned int i; 639 char dummy; 640 size_t fw_len; 641 642 fw = firmware_get(sc->fw_name); 643 if (fw == NULL) { 644 if_printf(sc->ifp, "Could not find firmware image %s\n", 645 sc->fw_name); 646 return ENOENT; 647 } 648 649 /* Setup zlib and decompress f/w */ 650 bzero(&zs, sizeof(zs)); 651 zs.zalloc = z_alloc; 652 zs.zfree = z_free; 653 status = inflateInit(&zs); 654 if (status != Z_OK) { 655 status = EIO; 656 goto abort_with_fw; 657 } 658 659 /* 660 * The uncompressed size is stored as the firmware version, 661 * which would otherwise go unused 662 */ 663 fw_len = (size_t)fw->version; 664 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 665 zs.avail_in = fw->datasize; 666 zs.next_in = __DECONST(char *, fw->data); 667 zs.avail_out = fw_len; 668 zs.next_out = inflate_buffer; 669 status = inflate(&zs, Z_FINISH); 670 if (status != Z_STREAM_END) { 671 if_printf(sc->ifp, "zlib %d\n", status); 672 status = EIO; 673 goto abort_with_buffer; 674 } 675 676 /* Check id */ 677 hdr_offset = 678 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 679 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 680 if_printf(sc->ifp, "Bad firmware file"); 681 status = EIO; 682 goto abort_with_buffer; 683 } 684 hdr = (const void*)(inflate_buffer + hdr_offset); 685 686 status = mxge_validate_firmware(sc, hdr); 687 if (status != 0) 688 goto abort_with_buffer; 689 690 /* Copy the inflated firmware to NIC SRAM. */ 691 for (i = 0; i < fw_len; i += 256) { 692 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 693 min(256U, (unsigned)(fw_len - i))); 694 wmb(); 695 dummy = *sc->sram; 696 wmb(); 697 } 698 699 *limit = fw_len; 700 status = 0; 701 abort_with_buffer: 702 kfree(inflate_buffer, M_TEMP); 703 inflateEnd(&zs); 704 abort_with_fw: 705 firmware_put(fw, FIRMWARE_UNLOAD); 706 return status; 707 } 708 709 /* 710 * Enable or disable periodic RDMAs from the host to make certain 711 * chipsets resend dropped PCIe messages 712 */ 713 static void 714 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 715 { 716 char buf_bytes[72]; 717 volatile uint32_t *confirm; 718 volatile char *submit; 719 uint32_t *buf, dma_low, dma_high; 720 int i; 721 722 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 723 724 /* Clear confirmation addr */ 725 confirm = (volatile uint32_t *)sc->cmd; 726 *confirm = 0; 727 wmb(); 728 729 /* 730 * Send an rdma command to the PCIe engine, and wait for the 731 * response in the confirmation address. The firmware should 732 * write a -1 there to indicate it is alive and well 733 */ 734 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 735 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 736 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 737 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 738 buf[2] = htobe32(0xffffffff); /* confirm data */ 739 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 740 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 741 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 742 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 743 buf[5] = htobe32(enable); /* enable? */ 744 745 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 746 747 mxge_pio_copy(submit, buf, 64); 748 wmb(); 749 DELAY(1000); 750 wmb(); 751 i = 0; 752 while (*confirm != 0xffffffff && i < 20) { 753 DELAY(1000); 754 i++; 755 } 756 if (*confirm != 0xffffffff) { 757 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 758 (enable ? "enable" : "disable"), confirm, *confirm); 759 } 760 } 761 762 static int 763 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 764 { 765 mcp_cmd_t *buf; 766 char buf_bytes[sizeof(*buf) + 8]; 767 volatile mcp_cmd_response_t *response = sc->cmd; 768 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 769 uint32_t dma_low, dma_high; 770 int err, sleep_total = 0; 771 772 /* Ensure buf is aligned to 8 bytes */ 773 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 774 775 buf->data0 = htobe32(data->data0); 776 buf->data1 = htobe32(data->data1); 777 buf->data2 = htobe32(data->data2); 778 buf->cmd = htobe32(cmd); 779 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 780 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 781 782 buf->response_addr.low = htobe32(dma_low); 783 buf->response_addr.high = htobe32(dma_high); 784 785 response->result = 0xffffffff; 786 wmb(); 787 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 788 789 /* 790 * Wait up to 20ms 791 */ 792 err = EAGAIN; 793 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 794 wmb(); 795 switch (be32toh(response->result)) { 796 case 0: 797 data->data0 = be32toh(response->data); 798 err = 0; 799 break; 800 case 0xffffffff: 801 DELAY(1000); 802 break; 803 case MXGEFW_CMD_UNKNOWN: 804 err = ENOSYS; 805 break; 806 case MXGEFW_CMD_ERROR_UNALIGNED: 807 err = E2BIG; 808 break; 809 case MXGEFW_CMD_ERROR_BUSY: 810 err = EBUSY; 811 break; 812 case MXGEFW_CMD_ERROR_I2C_ABSENT: 813 err = ENXIO; 814 break; 815 default: 816 if_printf(sc->ifp, "command %d failed, result = %d\n", 817 cmd, be32toh(response->result)); 818 err = ENXIO; 819 break; 820 } 821 if (err != EAGAIN) 822 break; 823 } 824 if (err == EAGAIN) { 825 if_printf(sc->ifp, "command %d timed out result = %d\n", 826 cmd, be32toh(response->result)); 827 } 828 return err; 829 } 830 831 static int 832 mxge_adopt_running_firmware(mxge_softc_t *sc) 833 { 834 struct mcp_gen_header *hdr; 835 const size_t bytes = sizeof(struct mcp_gen_header); 836 size_t hdr_offset; 837 int status; 838 839 /* 840 * Find running firmware header 841 */ 842 hdr_offset = 843 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 844 845 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 846 if_printf(sc->ifp, "Running firmware has bad header offset " 847 "(%zu)\n", hdr_offset); 848 return EIO; 849 } 850 851 /* 852 * Copy header of running firmware from SRAM to host memory to 853 * validate firmware 854 */ 855 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 856 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 857 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 858 status = mxge_validate_firmware(sc, hdr); 859 kfree(hdr, M_DEVBUF); 860 861 /* 862 * Check to see if adopted firmware has bug where adopting 863 * it will cause broadcasts to be filtered unless the NIC 864 * is kept in ALLMULTI mode 865 */ 866 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 867 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 868 sc->adopted_rx_filter_bug = 1; 869 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 870 "working around rx filter bug\n", 871 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 872 } 873 874 return status; 875 } 876 877 static int 878 mxge_load_firmware(mxge_softc_t *sc, int adopt) 879 { 880 volatile uint32_t *confirm; 881 volatile char *submit; 882 char buf_bytes[72]; 883 uint32_t *buf, size, dma_low, dma_high; 884 int status, i; 885 886 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 887 888 size = sc->sram_size; 889 status = mxge_load_firmware_helper(sc, &size); 890 if (status) { 891 if (!adopt) 892 return status; 893 894 /* 895 * Try to use the currently running firmware, if 896 * it is new enough 897 */ 898 status = mxge_adopt_running_firmware(sc); 899 if (status) { 900 if_printf(sc->ifp, 901 "failed to adopt running firmware\n"); 902 return status; 903 } 904 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 905 906 if (sc->tx_boundary == 4096) { 907 if_printf(sc->ifp, 908 "Using firmware currently running on NIC. " 909 "For optimal\n"); 910 if_printf(sc->ifp, "performance consider loading " 911 "optimized firmware\n"); 912 } 913 sc->fw_name = mxge_fw_unaligned; 914 sc->tx_boundary = 2048; 915 return 0; 916 } 917 918 /* Clear confirmation addr */ 919 confirm = (volatile uint32_t *)sc->cmd; 920 *confirm = 0; 921 wmb(); 922 923 /* 924 * Send a reload command to the bootstrap MCP, and wait for the 925 * response in the confirmation address. The firmware should 926 * write a -1 there to indicate it is alive and well 927 */ 928 929 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 930 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 931 932 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 933 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 934 buf[2] = htobe32(0xffffffff); /* confirm data */ 935 936 /* 937 * FIX: All newest firmware should un-protect the bottom of 938 * the sram before handoff. However, the very first interfaces 939 * do not. Therefore the handoff copy must skip the first 8 bytes 940 */ 941 /* where the code starts*/ 942 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 943 buf[4] = htobe32(size - 8); /* length of code */ 944 buf[5] = htobe32(8); /* where to copy to */ 945 buf[6] = htobe32(0); /* where to jump to */ 946 947 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 948 mxge_pio_copy(submit, buf, 64); 949 wmb(); 950 DELAY(1000); 951 wmb(); 952 i = 0; 953 while (*confirm != 0xffffffff && i < 20) { 954 DELAY(1000*10); 955 i++; 956 } 957 if (*confirm != 0xffffffff) { 958 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 959 confirm, *confirm); 960 return ENXIO; 961 } 962 return 0; 963 } 964 965 static int 966 mxge_update_mac_address(mxge_softc_t *sc) 967 { 968 mxge_cmd_t cmd; 969 uint8_t *addr = sc->mac_addr; 970 971 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 972 (addr[2] << 8) | addr[3]; 973 cmd.data1 = (addr[4] << 8) | (addr[5]); 974 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 975 } 976 977 static int 978 mxge_change_pause(mxge_softc_t *sc, int pause) 979 { 980 mxge_cmd_t cmd; 981 int status; 982 983 if (pause) 984 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 985 else 986 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 987 if (status) { 988 if_printf(sc->ifp, "Failed to set flow control mode\n"); 989 return ENXIO; 990 } 991 sc->pause = pause; 992 return 0; 993 } 994 995 static void 996 mxge_change_promisc(mxge_softc_t *sc, int promisc) 997 { 998 mxge_cmd_t cmd; 999 int status; 1000 1001 if (mxge_always_promisc) 1002 promisc = 1; 1003 1004 if (promisc) 1005 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1006 else 1007 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1008 if (status) 1009 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1010 } 1011 1012 static void 1013 mxge_set_multicast_list(mxge_softc_t *sc) 1014 { 1015 mxge_cmd_t cmd; 1016 struct ifmultiaddr *ifma; 1017 struct ifnet *ifp = sc->ifp; 1018 int err; 1019 1020 /* This firmware is known to not support multicast */ 1021 if (!sc->fw_multicast_support) 1022 return; 1023 1024 /* Disable multicast filtering while we play with the lists*/ 1025 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1026 if (err != 0) { 1027 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1028 "error status: %d\n", err); 1029 return; 1030 } 1031 1032 if (sc->adopted_rx_filter_bug) 1033 return; 1034 1035 if (ifp->if_flags & IFF_ALLMULTI) { 1036 /* Request to disable multicast filtering, so quit here */ 1037 return; 1038 } 1039 1040 /* Flush all the filters */ 1041 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1042 if (err != 0) { 1043 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1044 "error status: %d\n", err); 1045 return; 1046 } 1047 1048 /* 1049 * Walk the multicast list, and add each address 1050 */ 1051 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1052 if (ifma->ifma_addr->sa_family != AF_LINK) 1053 continue; 1054 1055 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1056 &cmd.data0, 4); 1057 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1058 &cmd.data1, 2); 1059 cmd.data0 = htonl(cmd.data0); 1060 cmd.data1 = htonl(cmd.data1); 1061 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1062 if (err != 0) { 1063 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1064 "error status: %d\n", err); 1065 /* Abort, leaving multicast filtering off */ 1066 return; 1067 } 1068 } 1069 1070 /* Enable multicast filtering */ 1071 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1072 if (err != 0) { 1073 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1074 "error status: %d\n", err); 1075 } 1076 } 1077 1078 #if 0 1079 static int 1080 mxge_max_mtu(mxge_softc_t *sc) 1081 { 1082 mxge_cmd_t cmd; 1083 int status; 1084 1085 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1086 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1087 1088 /* try to set nbufs to see if it we can 1089 use virtually contiguous jumbos */ 1090 cmd.data0 = 0; 1091 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1092 &cmd); 1093 if (status == 0) 1094 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1095 1096 /* otherwise, we're limited to MJUMPAGESIZE */ 1097 return MJUMPAGESIZE - MXGEFW_PAD; 1098 } 1099 #endif 1100 1101 static int 1102 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1103 { 1104 struct mxge_slice_state *ss; 1105 mxge_rx_done_t *rx_done; 1106 volatile uint32_t *irq_claim; 1107 mxge_cmd_t cmd; 1108 int slice, status; 1109 1110 /* 1111 * Try to send a reset command to the card to see if it 1112 * is alive 1113 */ 1114 memset(&cmd, 0, sizeof (cmd)); 1115 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1116 if (status != 0) { 1117 if_printf(sc->ifp, "failed reset\n"); 1118 return ENXIO; 1119 } 1120 1121 mxge_dummy_rdma(sc, 1); 1122 1123 /* Set the intrq size */ 1124 cmd.data0 = sc->rx_ring_size; 1125 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1126 1127 /* 1128 * Even though we already know how many slices are supported 1129 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1130 * has magic side effects, and must be called after a reset. 1131 * It must be called prior to calling any RSS related cmds, 1132 * including assigning an interrupt queue for anything but 1133 * slice 0. It must also be called *after* 1134 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1135 * the firmware to compute offsets. 1136 */ 1137 if (sc->num_slices > 1) { 1138 /* Ask the maximum number of slices it supports */ 1139 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1140 if (status != 0) { 1141 if_printf(sc->ifp, "failed to get number of slices\n"); 1142 return status; 1143 } 1144 1145 /* 1146 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1147 * to setting up the interrupt queue DMA 1148 */ 1149 cmd.data0 = sc->num_slices; 1150 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1151 #ifdef IFNET_BUF_RING 1152 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1153 #endif 1154 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1155 if (status != 0) { 1156 if_printf(sc->ifp, "failed to set number of slices\n"); 1157 return status; 1158 } 1159 } 1160 1161 if (interrupts_setup) { 1162 /* Now exchange information about interrupts */ 1163 for (slice = 0; slice < sc->num_slices; slice++) { 1164 ss = &sc->ss[slice]; 1165 1166 rx_done = &ss->rx_data.rx_done; 1167 memset(rx_done->entry, 0, sc->rx_ring_size); 1168 1169 cmd.data0 = 1170 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1171 cmd.data1 = 1172 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1173 cmd.data2 = slice; 1174 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1175 &cmd); 1176 } 1177 } 1178 1179 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1180 &cmd); 1181 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1182 1183 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1184 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1185 1186 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1187 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1188 1189 if (status != 0) { 1190 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1191 return status; 1192 } 1193 1194 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1195 1196 /* Run a DMA benchmark */ 1197 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1198 1199 for (slice = 0; slice < sc->num_slices; slice++) { 1200 ss = &sc->ss[slice]; 1201 1202 ss->irq_claim = irq_claim + (2 * slice); 1203 1204 /* Reset mcp/driver shared state back to 0 */ 1205 ss->rx_data.rx_done.idx = 0; 1206 ss->tx.req = 0; 1207 ss->tx.done = 0; 1208 ss->tx.pkt_done = 0; 1209 ss->tx.queue_active = 0; 1210 ss->tx.activate = 0; 1211 ss->tx.deactivate = 0; 1212 ss->rx_data.rx_big.cnt = 0; 1213 ss->rx_data.rx_small.cnt = 0; 1214 if (ss->fw_stats != NULL) 1215 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1216 } 1217 sc->rdma_tags_available = 15; 1218 1219 status = mxge_update_mac_address(sc); 1220 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1221 mxge_change_pause(sc, sc->pause); 1222 mxge_set_multicast_list(sc); 1223 1224 if (sc->throttle) { 1225 cmd.data0 = sc->throttle; 1226 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1227 if_printf(sc->ifp, "can't enable throttle\n"); 1228 } 1229 return status; 1230 } 1231 1232 static int 1233 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1234 { 1235 mxge_cmd_t cmd; 1236 mxge_softc_t *sc; 1237 int err; 1238 unsigned int throttle; 1239 1240 sc = arg1; 1241 throttle = sc->throttle; 1242 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1243 if (err != 0) 1244 return err; 1245 1246 if (throttle == sc->throttle) 1247 return 0; 1248 1249 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1250 return EINVAL; 1251 1252 ifnet_serialize_all(sc->ifp); 1253 1254 cmd.data0 = throttle; 1255 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1256 if (err == 0) 1257 sc->throttle = throttle; 1258 1259 ifnet_deserialize_all(sc->ifp); 1260 return err; 1261 } 1262 1263 static int 1264 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1265 { 1266 mxge_softc_t *sc; 1267 unsigned int intr_coal_delay; 1268 int err; 1269 1270 sc = arg1; 1271 intr_coal_delay = sc->intr_coal_delay; 1272 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1273 if (err != 0) 1274 return err; 1275 1276 if (intr_coal_delay == sc->intr_coal_delay) 1277 return 0; 1278 1279 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1280 return EINVAL; 1281 1282 ifnet_serialize_all(sc->ifp); 1283 1284 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1285 sc->intr_coal_delay = intr_coal_delay; 1286 1287 ifnet_deserialize_all(sc->ifp); 1288 return err; 1289 } 1290 1291 static int 1292 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1293 { 1294 mxge_softc_t *sc; 1295 unsigned int enabled; 1296 int err; 1297 1298 sc = arg1; 1299 enabled = sc->pause; 1300 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1301 if (err != 0) 1302 return err; 1303 1304 if (enabled == sc->pause) 1305 return 0; 1306 1307 ifnet_serialize_all(sc->ifp); 1308 err = mxge_change_pause(sc, enabled); 1309 ifnet_deserialize_all(sc->ifp); 1310 1311 return err; 1312 } 1313 1314 static int 1315 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1316 { 1317 int err; 1318 1319 if (arg1 == NULL) 1320 return EFAULT; 1321 arg2 = be32toh(*(int *)arg1); 1322 arg1 = NULL; 1323 err = sysctl_handle_int(oidp, arg1, arg2, req); 1324 1325 return err; 1326 } 1327 1328 static void 1329 mxge_rem_sysctls(mxge_softc_t *sc) 1330 { 1331 if (sc->ss != NULL) { 1332 struct mxge_slice_state *ss; 1333 int slice; 1334 1335 for (slice = 0; slice < sc->num_slices; slice++) { 1336 ss = &sc->ss[slice]; 1337 if (ss->sysctl_tree != NULL) { 1338 sysctl_ctx_free(&ss->sysctl_ctx); 1339 ss->sysctl_tree = NULL; 1340 } 1341 } 1342 } 1343 1344 if (sc->slice_sysctl_tree != NULL) { 1345 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1346 sc->slice_sysctl_tree = NULL; 1347 } 1348 1349 if (sc->sysctl_tree != NULL) { 1350 sysctl_ctx_free(&sc->sysctl_ctx); 1351 sc->sysctl_tree = NULL; 1352 } 1353 } 1354 1355 static void 1356 mxge_add_sysctls(mxge_softc_t *sc) 1357 { 1358 struct sysctl_ctx_list *ctx; 1359 struct sysctl_oid_list *children; 1360 mcp_irq_data_t *fw; 1361 struct mxge_slice_state *ss; 1362 int slice; 1363 char slice_num[8]; 1364 1365 ctx = &sc->sysctl_ctx; 1366 sysctl_ctx_init(ctx); 1367 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1368 OID_AUTO, device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1369 if (sc->sysctl_tree == NULL) { 1370 device_printf(sc->dev, "can't add sysctl node\n"); 1371 return; 1372 } 1373 1374 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1375 fw = sc->ss[0].fw_stats; 1376 1377 /* 1378 * Random information 1379 */ 1380 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1381 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1382 1383 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1384 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1385 1386 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1387 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1388 1389 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1390 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1391 1392 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1393 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1394 1395 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1396 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1397 1398 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1399 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1400 1401 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1402 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1403 1404 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1405 CTLFLAG_RD, &sc->read_write_dma, 0, 1406 "DMA concurrent Read/Write speed in MB/s"); 1407 1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1409 CTLFLAG_RD, &sc->watchdog_resets, 0, 1410 "Number of times NIC was reset"); 1411 1412 /* 1413 * Performance related tunables 1414 */ 1415 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1416 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1417 "Interrupt coalescing delay in usecs"); 1418 1419 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1420 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1421 "Transmit throttling"); 1422 1423 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", 1424 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", 1425 "Interrupt coalescing delay in usecs"); 1426 1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1428 CTLFLAG_RW, &mxge_deassert_wait, 0, 1429 "Wait for IRQ line to go low in ihandler"); 1430 1431 /* 1432 * Stats block from firmware is in network byte order. 1433 * Need to swap it 1434 */ 1435 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1436 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1437 mxge_handle_be32, "I", "link up"); 1438 1439 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1440 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1441 mxge_handle_be32, "I", "rdma_tags_available"); 1442 1443 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1444 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1445 mxge_handle_be32, "I", "dropped_bad_crc32"); 1446 1447 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1448 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1449 mxge_handle_be32, "I", "dropped_bad_phy"); 1450 1451 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1452 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1453 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1454 1455 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1456 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1457 mxge_handle_be32, "I", "dropped_link_overflow"); 1458 1459 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1460 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1461 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1462 1463 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1464 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1465 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1466 1467 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1468 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1469 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1470 1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1472 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1473 mxge_handle_be32, "I", "dropped_overrun"); 1474 1475 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1476 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1477 mxge_handle_be32, "I", "dropped_pause"); 1478 1479 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1480 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1481 mxge_handle_be32, "I", "dropped_runt"); 1482 1483 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1484 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1485 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1486 1487 /* add counters exported for debugging from all slices */ 1488 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1490 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1491 if (sc->slice_sysctl_tree == NULL) { 1492 device_printf(sc->dev, "can't add slice sysctl node\n"); 1493 return; 1494 } 1495 1496 for (slice = 0; slice < sc->num_slices; slice++) { 1497 ss = &sc->ss[slice]; 1498 sysctl_ctx_init(&ss->sysctl_ctx); 1499 ctx = &ss->sysctl_ctx; 1500 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1501 ksprintf(slice_num, "%d", slice); 1502 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1503 slice_num, CTLFLAG_RD, 0, ""); 1504 if (ss->sysctl_tree == NULL) { 1505 device_printf(sc->dev, 1506 "can't add %d slice sysctl node\n", slice); 1507 return; /* XXX continue? */ 1508 } 1509 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1510 1511 /* 1512 * XXX change to ULONG 1513 */ 1514 1515 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1516 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1517 1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1519 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1520 1521 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1522 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1523 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1525 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1526 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1528 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1529 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1531 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1532 1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1534 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1535 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1537 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1538 } 1539 } 1540 1541 /* 1542 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1543 * backwards one at a time and handle ring wraps 1544 */ 1545 static __inline void 1546 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1547 mcp_kreq_ether_send_t *src, int cnt) 1548 { 1549 int idx, starting_slot; 1550 1551 starting_slot = tx->req; 1552 while (cnt > 1) { 1553 cnt--; 1554 idx = (starting_slot + cnt) & tx->mask; 1555 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1556 wmb(); 1557 } 1558 } 1559 1560 /* 1561 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1562 * at most 32 bytes at a time, so as to avoid involving the software 1563 * pio handler in the nic. We re-write the first segment's flags 1564 * to mark them valid only after writing the entire chain 1565 */ 1566 static __inline void 1567 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1568 { 1569 int idx, i; 1570 uint32_t *src_ints; 1571 volatile uint32_t *dst_ints; 1572 mcp_kreq_ether_send_t *srcp; 1573 volatile mcp_kreq_ether_send_t *dstp, *dst; 1574 uint8_t last_flags; 1575 1576 idx = tx->req & tx->mask; 1577 1578 last_flags = src->flags; 1579 src->flags = 0; 1580 wmb(); 1581 dst = dstp = &tx->lanai[idx]; 1582 srcp = src; 1583 1584 if ((idx + cnt) < tx->mask) { 1585 for (i = 0; i < cnt - 1; i += 2) { 1586 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1587 wmb(); /* force write every 32 bytes */ 1588 srcp += 2; 1589 dstp += 2; 1590 } 1591 } else { 1592 /* 1593 * Submit all but the first request, and ensure 1594 * that it is submitted below 1595 */ 1596 mxge_submit_req_backwards(tx, src, cnt); 1597 i = 0; 1598 } 1599 if (i < cnt) { 1600 /* Submit the first request */ 1601 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1602 wmb(); /* barrier before setting valid flag */ 1603 } 1604 1605 /* Re-write the last 32-bits with the valid flags */ 1606 src->flags = last_flags; 1607 src_ints = (uint32_t *)src; 1608 src_ints+=3; 1609 dst_ints = (volatile uint32_t *)dst; 1610 dst_ints+=3; 1611 *dst_ints = *src_ints; 1612 tx->req += cnt; 1613 wmb(); 1614 } 1615 1616 static int 1617 mxge_pullup_tso(struct mbuf **mp) 1618 { 1619 int hoff, iphlen, thoff; 1620 struct mbuf *m; 1621 1622 m = *mp; 1623 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1624 1625 iphlen = m->m_pkthdr.csum_iphlen; 1626 thoff = m->m_pkthdr.csum_thlen; 1627 hoff = m->m_pkthdr.csum_lhlen; 1628 1629 KASSERT(iphlen > 0, ("invalid ip hlen")); 1630 KASSERT(thoff > 0, ("invalid tcp hlen")); 1631 KASSERT(hoff > 0, ("invalid ether hlen")); 1632 1633 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1634 m = m_pullup(m, hoff + iphlen + thoff); 1635 if (m == NULL) { 1636 *mp = NULL; 1637 return ENOBUFS; 1638 } 1639 *mp = m; 1640 } 1641 return 0; 1642 } 1643 1644 static int 1645 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1646 struct mbuf *m, int busdma_seg_cnt) 1647 { 1648 mcp_kreq_ether_send_t *req; 1649 bus_dma_segment_t *seg; 1650 uint32_t low, high_swapped; 1651 int len, seglen, cum_len, cum_len_next; 1652 int next_is_first, chop, cnt, rdma_count, small; 1653 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1654 uint8_t flags, flags_next; 1655 struct mxge_buffer_state *info_last; 1656 bus_dmamap_t map = info_map->map; 1657 1658 mss = m->m_pkthdr.tso_segsz; 1659 1660 /* 1661 * Negative cum_len signifies to the send loop that we are 1662 * still in the header portion of the TSO packet. 1663 */ 1664 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1665 m->m_pkthdr.csum_thlen); 1666 1667 /* 1668 * TSO implies checksum offload on this hardware 1669 */ 1670 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1671 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1672 1673 /* 1674 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1675 * out where to put the checksum by parsing the header. 1676 */ 1677 pseudo_hdr_offset = htobe16(mss); 1678 1679 req = tx->req_list; 1680 seg = tx->seg_list; 1681 cnt = 0; 1682 rdma_count = 0; 1683 1684 /* 1685 * "rdma_count" is the number of RDMAs belonging to the current 1686 * packet BEFORE the current send request. For non-TSO packets, 1687 * this is equal to "count". 1688 * 1689 * For TSO packets, rdma_count needs to be reset to 0 after a 1690 * segment cut. 1691 * 1692 * The rdma_count field of the send request is the number of 1693 * RDMAs of the packet starting at that request. For TSO send 1694 * requests with one ore more cuts in the middle, this is the 1695 * number of RDMAs starting after the last cut in the request. 1696 * All previous segments before the last cut implicitly have 1 1697 * RDMA. 1698 * 1699 * Since the number of RDMAs is not known beforehand, it must be 1700 * filled-in retroactively - after each segmentation cut or at 1701 * the end of the entire packet. 1702 */ 1703 1704 while (busdma_seg_cnt) { 1705 /* 1706 * Break the busdma segment up into pieces 1707 */ 1708 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1709 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1710 len = seg->ds_len; 1711 1712 while (len) { 1713 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1714 seglen = len; 1715 cum_len_next = cum_len + seglen; 1716 (req - rdma_count)->rdma_count = rdma_count + 1; 1717 if (__predict_true(cum_len >= 0)) { 1718 /* Payload */ 1719 chop = (cum_len_next > mss); 1720 cum_len_next = cum_len_next % mss; 1721 next_is_first = (cum_len_next == 0); 1722 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1723 flags_next |= 1724 next_is_first * MXGEFW_FLAGS_FIRST; 1725 rdma_count |= -(chop | next_is_first); 1726 rdma_count += chop & !next_is_first; 1727 } else if (cum_len_next >= 0) { 1728 /* Header ends */ 1729 rdma_count = -1; 1730 cum_len_next = 0; 1731 seglen = -cum_len; 1732 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1733 flags_next = MXGEFW_FLAGS_TSO_PLD | 1734 MXGEFW_FLAGS_FIRST | 1735 (small * MXGEFW_FLAGS_SMALL); 1736 } 1737 1738 req->addr_high = high_swapped; 1739 req->addr_low = htobe32(low); 1740 req->pseudo_hdr_offset = pseudo_hdr_offset; 1741 req->pad = 0; 1742 req->rdma_count = 1; 1743 req->length = htobe16(seglen); 1744 req->cksum_offset = cksum_offset; 1745 req->flags = 1746 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1747 low += seglen; 1748 len -= seglen; 1749 cum_len = cum_len_next; 1750 flags = flags_next; 1751 req++; 1752 cnt++; 1753 rdma_count++; 1754 if (__predict_false(cksum_offset > seglen)) 1755 cksum_offset -= seglen; 1756 else 1757 cksum_offset = 0; 1758 if (__predict_false(cnt > tx->max_desc)) 1759 goto drop; 1760 } 1761 busdma_seg_cnt--; 1762 seg++; 1763 } 1764 (req - rdma_count)->rdma_count = rdma_count; 1765 1766 do { 1767 req--; 1768 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1769 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1770 1771 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1772 1773 info_map->map = info_last->map; 1774 info_last->map = map; 1775 info_last->m = m; 1776 1777 mxge_submit_req(tx, tx->req_list, cnt); 1778 #ifdef IFNET_BUF_RING 1779 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1780 /* tell the NIC to start polling this slice */ 1781 *tx->send_go = 1; 1782 tx->queue_active = 1; 1783 tx->activate++; 1784 wmb(); 1785 } 1786 #endif 1787 return 0; 1788 1789 drop: 1790 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1791 m_freem(m); 1792 return ENOBUFS; 1793 } 1794 1795 static int 1796 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1797 { 1798 mcp_kreq_ether_send_t *req; 1799 bus_dma_segment_t *seg; 1800 bus_dmamap_t map; 1801 int cnt, cum_len, err, i, idx, odd_flag; 1802 uint16_t pseudo_hdr_offset; 1803 uint8_t flags, cksum_offset; 1804 struct mxge_buffer_state *info_map, *info_last; 1805 1806 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1807 err = mxge_pullup_tso(&m); 1808 if (__predict_false(err)) 1809 return err; 1810 } 1811 1812 /* 1813 * Map the frame for DMA 1814 */ 1815 idx = tx->req & tx->mask; 1816 info_map = &tx->info[idx]; 1817 map = info_map->map; 1818 1819 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1820 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1821 if (__predict_false(err != 0)) 1822 goto drop; 1823 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1824 1825 /* 1826 * TSO is different enough, we handle it in another routine 1827 */ 1828 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1829 return mxge_encap_tso(tx, info_map, m, cnt); 1830 1831 req = tx->req_list; 1832 cksum_offset = 0; 1833 pseudo_hdr_offset = 0; 1834 flags = MXGEFW_FLAGS_NO_TSO; 1835 1836 /* 1837 * Checksum offloading 1838 */ 1839 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1840 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1841 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1842 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1843 req->cksum_offset = cksum_offset; 1844 flags |= MXGEFW_FLAGS_CKSUM; 1845 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1846 } else { 1847 odd_flag = 0; 1848 } 1849 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1850 flags |= MXGEFW_FLAGS_SMALL; 1851 1852 /* 1853 * Convert segments into a request list 1854 */ 1855 cum_len = 0; 1856 seg = tx->seg_list; 1857 req->flags = MXGEFW_FLAGS_FIRST; 1858 for (i = 0; i < cnt; i++) { 1859 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1860 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1861 req->length = htobe16(seg->ds_len); 1862 req->cksum_offset = cksum_offset; 1863 if (cksum_offset > seg->ds_len) 1864 cksum_offset -= seg->ds_len; 1865 else 1866 cksum_offset = 0; 1867 req->pseudo_hdr_offset = pseudo_hdr_offset; 1868 req->pad = 0; /* complete solid 16-byte block */ 1869 req->rdma_count = 1; 1870 req->flags |= flags | ((cum_len & 1) * odd_flag); 1871 cum_len += seg->ds_len; 1872 seg++; 1873 req++; 1874 req->flags = 0; 1875 } 1876 req--; 1877 1878 /* 1879 * Pad runt to 60 bytes 1880 */ 1881 if (cum_len < 60) { 1882 req++; 1883 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1884 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1885 req->length = htobe16(60 - cum_len); 1886 req->cksum_offset = 0; 1887 req->pseudo_hdr_offset = pseudo_hdr_offset; 1888 req->pad = 0; /* complete solid 16-byte block */ 1889 req->rdma_count = 1; 1890 req->flags |= flags | ((cum_len & 1) * odd_flag); 1891 cnt++; 1892 } 1893 1894 tx->req_list[0].rdma_count = cnt; 1895 #if 0 1896 /* print what the firmware will see */ 1897 for (i = 0; i < cnt; i++) { 1898 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1899 "cso:%d, flags:0x%x, rdma:%d\n", 1900 i, (int)ntohl(tx->req_list[i].addr_high), 1901 (int)ntohl(tx->req_list[i].addr_low), 1902 (int)ntohs(tx->req_list[i].length), 1903 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1904 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1905 tx->req_list[i].rdma_count); 1906 } 1907 kprintf("--------------\n"); 1908 #endif 1909 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1910 1911 info_map->map = info_last->map; 1912 info_last->map = map; 1913 info_last->m = m; 1914 1915 mxge_submit_req(tx, tx->req_list, cnt); 1916 #ifdef IFNET_BUF_RING 1917 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1918 /* tell the NIC to start polling this slice */ 1919 *tx->send_go = 1; 1920 tx->queue_active = 1; 1921 tx->activate++; 1922 wmb(); 1923 } 1924 #endif 1925 return 0; 1926 1927 drop: 1928 m_freem(m); 1929 return err; 1930 } 1931 1932 static void 1933 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1934 { 1935 mxge_softc_t *sc = ifp->if_softc; 1936 mxge_tx_ring_t *tx; 1937 bus_addr_t zeropad; 1938 int encap = 0; 1939 1940 /* XXX Only use the first slice for now */ 1941 tx = &sc->ss[0].tx; 1942 1943 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 1944 ASSERT_SERIALIZED(&tx->tx_serialize); 1945 1946 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1947 return; 1948 1949 zeropad = sc->zeropad_dma.dmem_busaddr; 1950 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1951 struct mbuf *m; 1952 int error; 1953 1954 m = ifsq_dequeue(ifsq); 1955 if (m == NULL) 1956 goto done; 1957 1958 BPF_MTAP(ifp, m); 1959 error = mxge_encap(tx, m, zeropad); 1960 if (!error) 1961 encap = 1; 1962 else 1963 IFNET_STAT_INC(ifp, oerrors, 1); 1964 } 1965 1966 /* Ran out of transmit slots */ 1967 ifsq_set_oactive(ifsq); 1968 done: 1969 if (encap) 1970 ifp->if_timer = 5; 1971 } 1972 1973 static void 1974 mxge_watchdog(struct ifnet *ifp) 1975 { 1976 struct mxge_softc *sc = ifp->if_softc; 1977 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1978 mxge_tx_ring_t *tx = &sc->ss[0].tx; 1979 1980 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1981 1982 /* Check for pause blocking before resetting */ 1983 if (tx->watchdog_rx_pause == rx_pause) { 1984 mxge_warn_stuck(sc, tx, 0); 1985 mxge_watchdog_reset(sc); 1986 return; 1987 } else { 1988 if_printf(ifp, "Flow control blocking xmits, " 1989 "check link partner\n"); 1990 } 1991 tx->watchdog_rx_pause = rx_pause; 1992 } 1993 1994 /* 1995 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1996 * at most 32 bytes at a time, so as to avoid involving the software 1997 * pio handler in the nic. We re-write the first segment's low 1998 * DMA address to mark it valid only after we write the entire chunk 1999 * in a burst 2000 */ 2001 static __inline void 2002 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2003 mcp_kreq_ether_recv_t *src) 2004 { 2005 uint32_t low; 2006 2007 low = src->addr_low; 2008 src->addr_low = 0xffffffff; 2009 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2010 wmb(); 2011 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2012 wmb(); 2013 src->addr_low = low; 2014 dst->addr_low = low; 2015 wmb(); 2016 } 2017 2018 static int 2019 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2020 boolean_t init) 2021 { 2022 bus_dma_segment_t seg; 2023 struct mbuf *m; 2024 int cnt, err, mflag; 2025 2026 mflag = MB_DONTWAIT; 2027 if (__predict_false(init)) 2028 mflag = MB_WAIT; 2029 2030 m = m_gethdr(mflag, MT_DATA); 2031 if (m == NULL) { 2032 err = ENOBUFS; 2033 if (__predict_false(init)) { 2034 /* 2035 * During initialization, there 2036 * is nothing to setup; bail out 2037 */ 2038 return err; 2039 } 2040 goto done; 2041 } 2042 m->m_len = m->m_pkthdr.len = MHLEN; 2043 2044 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2045 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2046 if (err != 0) { 2047 m_freem(m); 2048 if (__predict_false(init)) { 2049 /* 2050 * During initialization, there 2051 * is nothing to setup; bail out 2052 */ 2053 return err; 2054 } 2055 goto done; 2056 } 2057 2058 rx->info[idx].m = m; 2059 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2060 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2061 2062 done: 2063 if ((idx & 7) == 7) 2064 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2065 return err; 2066 } 2067 2068 static int 2069 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2070 boolean_t init) 2071 { 2072 bus_dma_segment_t seg; 2073 struct mbuf *m; 2074 int cnt, err, mflag; 2075 2076 mflag = MB_DONTWAIT; 2077 if (__predict_false(init)) 2078 mflag = MB_WAIT; 2079 2080 if (rx->cl_size == MCLBYTES) 2081 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2082 else 2083 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2084 if (m == NULL) { 2085 err = ENOBUFS; 2086 if (__predict_false(init)) { 2087 /* 2088 * During initialization, there 2089 * is nothing to setup; bail out 2090 */ 2091 return err; 2092 } 2093 goto done; 2094 } 2095 m->m_len = m->m_pkthdr.len = rx->cl_size; 2096 2097 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2098 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2099 if (err != 0) { 2100 m_freem(m); 2101 if (__predict_false(init)) { 2102 /* 2103 * During initialization, there 2104 * is nothing to setup; bail out 2105 */ 2106 return err; 2107 } 2108 goto done; 2109 } 2110 2111 rx->info[idx].m = m; 2112 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2113 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2114 2115 done: 2116 if ((idx & 7) == 7) 2117 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2118 return err; 2119 } 2120 2121 /* 2122 * Myri10GE hardware checksums are not valid if the sender 2123 * padded the frame with non-zero padding. This is because 2124 * the firmware just does a simple 16-bit 1s complement 2125 * checksum across the entire frame, excluding the first 14 2126 * bytes. It is best to simply to check the checksum and 2127 * tell the stack about it only if the checksum is good 2128 */ 2129 static __inline uint16_t 2130 mxge_rx_csum(struct mbuf *m, int csum) 2131 { 2132 const struct ether_header *eh; 2133 const struct ip *ip; 2134 uint16_t c; 2135 2136 eh = mtod(m, const struct ether_header *); 2137 2138 /* Only deal with IPv4 TCP & UDP for now */ 2139 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2140 return 1; 2141 2142 ip = (const struct ip *)(eh + 1); 2143 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2144 return 1; 2145 2146 #ifdef INET 2147 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2148 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2149 - (ip->ip_hl << 2) + ip->ip_p)); 2150 #else 2151 c = 1; 2152 #endif 2153 c ^= 0xffff; 2154 return c; 2155 } 2156 2157 static void 2158 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2159 { 2160 struct ether_vlan_header *evl; 2161 uint32_t partial; 2162 2163 evl = mtod(m, struct ether_vlan_header *); 2164 2165 /* 2166 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2167 * what the firmware thought was the end of the ethernet 2168 * header. 2169 */ 2170 2171 /* Put checksum into host byte order */ 2172 *csum = ntohs(*csum); 2173 2174 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2175 *csum += ~partial; 2176 *csum += ((*csum) < ~partial); 2177 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2178 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2179 2180 /* 2181 * Restore checksum to network byte order; 2182 * later consumers expect this 2183 */ 2184 *csum = htons(*csum); 2185 2186 /* save the tag */ 2187 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2188 m->m_flags |= M_VLANTAG; 2189 2190 /* 2191 * Remove the 802.1q header by copying the Ethernet 2192 * addresses over it and adjusting the beginning of 2193 * the data in the mbuf. The encapsulated Ethernet 2194 * type field is already in place. 2195 */ 2196 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2197 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2198 m_adj(m, EVL_ENCAPLEN); 2199 } 2200 2201 2202 static __inline void 2203 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2204 uint32_t len, uint32_t csum) 2205 { 2206 struct mbuf *m; 2207 const struct ether_header *eh; 2208 bus_dmamap_t old_map; 2209 int idx; 2210 2211 idx = rx->cnt & rx->mask; 2212 rx->cnt++; 2213 2214 /* Save a pointer to the received mbuf */ 2215 m = rx->info[idx].m; 2216 2217 /* Try to replace the received mbuf */ 2218 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2219 /* Drop the frame -- the old mbuf is re-cycled */ 2220 IFNET_STAT_INC(ifp, ierrors, 1); 2221 return; 2222 } 2223 2224 /* Unmap the received buffer */ 2225 old_map = rx->info[idx].map; 2226 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2227 bus_dmamap_unload(rx->dmat, old_map); 2228 2229 /* Swap the bus_dmamap_t's */ 2230 rx->info[idx].map = rx->extra_map; 2231 rx->extra_map = old_map; 2232 2233 /* 2234 * mcp implicitly skips 1st 2 bytes so that packet is properly 2235 * aligned 2236 */ 2237 m->m_data += MXGEFW_PAD; 2238 2239 m->m_pkthdr.rcvif = ifp; 2240 m->m_len = m->m_pkthdr.len = len; 2241 2242 IFNET_STAT_INC(ifp, ipackets, 1); 2243 2244 eh = mtod(m, const struct ether_header *); 2245 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2246 mxge_vlan_tag_remove(m, &csum); 2247 2248 /* If the checksum is valid, mark it in the mbuf header */ 2249 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2250 mxge_rx_csum(m, csum) == 0) { 2251 /* Tell the stack that the checksum is good */ 2252 m->m_pkthdr.csum_data = 0xffff; 2253 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2254 CSUM_DATA_VALID; 2255 } 2256 ifp->if_input(ifp, m); 2257 } 2258 2259 static __inline void 2260 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2261 uint32_t len, uint32_t csum) 2262 { 2263 const struct ether_header *eh; 2264 struct mbuf *m; 2265 bus_dmamap_t old_map; 2266 int idx; 2267 2268 idx = rx->cnt & rx->mask; 2269 rx->cnt++; 2270 2271 /* Save a pointer to the received mbuf */ 2272 m = rx->info[idx].m; 2273 2274 /* Try to replace the received mbuf */ 2275 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2276 /* Drop the frame -- the old mbuf is re-cycled */ 2277 IFNET_STAT_INC(ifp, ierrors, 1); 2278 return; 2279 } 2280 2281 /* Unmap the received buffer */ 2282 old_map = rx->info[idx].map; 2283 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2284 bus_dmamap_unload(rx->dmat, old_map); 2285 2286 /* Swap the bus_dmamap_t's */ 2287 rx->info[idx].map = rx->extra_map; 2288 rx->extra_map = old_map; 2289 2290 /* 2291 * mcp implicitly skips 1st 2 bytes so that packet is properly 2292 * aligned 2293 */ 2294 m->m_data += MXGEFW_PAD; 2295 2296 m->m_pkthdr.rcvif = ifp; 2297 m->m_len = m->m_pkthdr.len = len; 2298 2299 IFNET_STAT_INC(ifp, ipackets, 1); 2300 2301 eh = mtod(m, const struct ether_header *); 2302 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2303 mxge_vlan_tag_remove(m, &csum); 2304 2305 /* If the checksum is valid, mark it in the mbuf header */ 2306 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2307 mxge_rx_csum(m, csum) == 0) { 2308 /* Tell the stack that the checksum is good */ 2309 m->m_pkthdr.csum_data = 0xffff; 2310 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2311 CSUM_DATA_VALID; 2312 } 2313 ifp->if_input(ifp, m); 2314 } 2315 2316 static __inline void 2317 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data) 2318 { 2319 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2320 2321 while (rx_done->entry[rx_done->idx].length != 0) { 2322 uint16_t length, checksum; 2323 2324 length = ntohs(rx_done->entry[rx_done->idx].length); 2325 rx_done->entry[rx_done->idx].length = 0; 2326 2327 checksum = rx_done->entry[rx_done->idx].checksum; 2328 2329 if (length <= MXGE_RX_SMALL_BUFLEN) { 2330 mxge_rx_done_small(ifp, &rx_data->rx_small, 2331 length, checksum); 2332 } else { 2333 mxge_rx_done_big(ifp, &rx_data->rx_big, 2334 length, checksum); 2335 } 2336 2337 rx_done->idx++; 2338 rx_done->idx &= rx_done->mask; 2339 } 2340 } 2341 2342 static __inline void 2343 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2344 { 2345 ASSERT_SERIALIZED(&tx->tx_serialize); 2346 2347 while (tx->pkt_done != mcp_idx) { 2348 struct mbuf *m; 2349 int idx; 2350 2351 idx = tx->done & tx->mask; 2352 tx->done++; 2353 2354 m = tx->info[idx].m; 2355 /* 2356 * mbuf and DMA map only attached to the first 2357 * segment per-mbuf. 2358 */ 2359 if (m != NULL) { 2360 tx->pkt_done++; 2361 IFNET_STAT_INC(ifp, opackets, 1); 2362 tx->info[idx].m = NULL; 2363 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2364 m_freem(m); 2365 } 2366 } 2367 2368 /* 2369 * If we have space, clear OACTIVE to tell the stack that 2370 * its OK to send packets 2371 */ 2372 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2373 ifq_clr_oactive(&ifp->if_snd); 2374 if (tx->req == tx->done) 2375 ifp->if_timer = 0; 2376 } 2377 2378 if (!ifq_is_empty(&ifp->if_snd)) 2379 if_devstart(ifp); 2380 2381 #ifdef IFNET_BUF_RING 2382 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2383 /* let the NIC stop polling this queue, since there 2384 * are no more transmits pending */ 2385 if (tx->req == tx->done) { 2386 *tx->send_stop = 1; 2387 tx->queue_active = 0; 2388 tx->deactivate++; 2389 wmb(); 2390 } 2391 } 2392 #endif 2393 } 2394 2395 static struct mxge_media_type mxge_xfp_media_types[] = { 2396 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2397 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2398 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2399 {0, (1 << 5), "10GBASE-ER"}, 2400 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2401 {0, (1 << 3), "10GBASE-SW"}, 2402 {0, (1 << 2), "10GBASE-LW"}, 2403 {0, (1 << 1), "10GBASE-EW"}, 2404 {0, (1 << 0), "Reserved"} 2405 }; 2406 2407 static struct mxge_media_type mxge_sfp_media_types[] = { 2408 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2409 {0, (1 << 7), "Reserved"}, 2410 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2411 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2412 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2413 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2414 }; 2415 2416 static void 2417 mxge_media_set(mxge_softc_t *sc, int media_type) 2418 { 2419 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); 2420 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2421 sc->current_media = media_type; 2422 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2423 } 2424 2425 static void 2426 mxge_media_init(mxge_softc_t *sc) 2427 { 2428 const char *ptr; 2429 int i; 2430 2431 ifmedia_removeall(&sc->media); 2432 mxge_media_set(sc, IFM_AUTO); 2433 2434 /* 2435 * Parse the product code to deterimine the interface type 2436 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2437 * after the 3rd dash in the driver's cached copy of the 2438 * EEPROM's product code string. 2439 */ 2440 ptr = sc->product_code_string; 2441 if (ptr == NULL) { 2442 if_printf(sc->ifp, "Missing product code\n"); 2443 return; 2444 } 2445 2446 for (i = 0; i < 3; i++, ptr++) { 2447 ptr = strchr(ptr, '-'); 2448 if (ptr == NULL) { 2449 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2450 return; 2451 } 2452 } 2453 if (*ptr == 'C' || *(ptr +1) == 'C') { 2454 /* -C is CX4 */ 2455 sc->connector = MXGE_CX4; 2456 mxge_media_set(sc, IFM_10G_CX4); 2457 } else if (*ptr == 'Q') { 2458 /* -Q is Quad Ribbon Fiber */ 2459 sc->connector = MXGE_QRF; 2460 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2461 /* DragonFly has no media type for Quad ribbon fiber */ 2462 } else if (*ptr == 'R') { 2463 /* -R is XFP */ 2464 sc->connector = MXGE_XFP; 2465 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2466 /* -S or -2S is SFP+ */ 2467 sc->connector = MXGE_SFP; 2468 } else { 2469 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2470 } 2471 } 2472 2473 /* 2474 * Determine the media type for a NIC. Some XFPs will identify 2475 * themselves only when their link is up, so this is initiated via a 2476 * link up interrupt. However, this can potentially take up to 2477 * several milliseconds, so it is run via the watchdog routine, rather 2478 * than in the interrupt handler itself. 2479 */ 2480 static void 2481 mxge_media_probe(mxge_softc_t *sc) 2482 { 2483 mxge_cmd_t cmd; 2484 const char *cage_type; 2485 struct mxge_media_type *mxge_media_types = NULL; 2486 int i, err, ms, mxge_media_type_entries; 2487 uint32_t byte; 2488 2489 sc->need_media_probe = 0; 2490 2491 if (sc->connector == MXGE_XFP) { 2492 /* -R is XFP */ 2493 mxge_media_types = mxge_xfp_media_types; 2494 mxge_media_type_entries = sizeof(mxge_xfp_media_types) / 2495 sizeof(mxge_xfp_media_types[0]); 2496 byte = MXGE_XFP_COMPLIANCE_BYTE; 2497 cage_type = "XFP"; 2498 } else if (sc->connector == MXGE_SFP) { 2499 /* -S or -2S is SFP+ */ 2500 mxge_media_types = mxge_sfp_media_types; 2501 mxge_media_type_entries = sizeof(mxge_sfp_media_types) / 2502 sizeof(mxge_sfp_media_types[0]); 2503 cage_type = "SFP+"; 2504 byte = 3; 2505 } else { 2506 /* nothing to do; media type cannot change */ 2507 return; 2508 } 2509 2510 /* 2511 * At this point we know the NIC has an XFP cage, so now we 2512 * try to determine what is in the cage by using the 2513 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2514 * register. We read just one byte, which may take over 2515 * a millisecond 2516 */ 2517 2518 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2519 cmd.data1 = byte; 2520 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2521 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2522 if_printf(sc->ifp, "failed to read XFP\n"); 2523 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2524 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2525 if (err != MXGEFW_CMD_OK) 2526 return; 2527 2528 /* Now we wait for the data to be cached */ 2529 cmd.data0 = byte; 2530 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2531 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2532 DELAY(1000); 2533 cmd.data0 = byte; 2534 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2535 } 2536 if (err != MXGEFW_CMD_OK) { 2537 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2538 cage_type, err, ms); 2539 return; 2540 } 2541 2542 if (cmd.data0 == mxge_media_types[0].bitmask) { 2543 if (bootverbose) { 2544 if_printf(sc->ifp, "%s:%s\n", cage_type, 2545 mxge_media_types[0].name); 2546 } 2547 if (sc->current_media != mxge_media_types[0].flag) { 2548 mxge_media_init(sc); 2549 mxge_media_set(sc, mxge_media_types[0].flag); 2550 } 2551 return; 2552 } 2553 for (i = 1; i < mxge_media_type_entries; i++) { 2554 if (cmd.data0 & mxge_media_types[i].bitmask) { 2555 if (bootverbose) { 2556 if_printf(sc->ifp, "%s:%s\n", cage_type, 2557 mxge_media_types[i].name); 2558 } 2559 2560 if (sc->current_media != mxge_media_types[i].flag) { 2561 mxge_media_init(sc); 2562 mxge_media_set(sc, mxge_media_types[i].flag); 2563 } 2564 return; 2565 } 2566 } 2567 if (bootverbose) { 2568 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2569 cmd.data0); 2570 } 2571 } 2572 2573 static void 2574 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2575 { 2576 if (sc->link_state != stats->link_up) { 2577 sc->link_state = stats->link_up; 2578 if (sc->link_state) { 2579 sc->ifp->if_link_state = LINK_STATE_UP; 2580 if_link_state_change(sc->ifp); 2581 if (bootverbose) 2582 if_printf(sc->ifp, "link up\n"); 2583 } else { 2584 sc->ifp->if_link_state = LINK_STATE_DOWN; 2585 if_link_state_change(sc->ifp); 2586 if (bootverbose) 2587 if_printf(sc->ifp, "link down\n"); 2588 } 2589 sc->need_media_probe = 1; 2590 } 2591 2592 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2593 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2594 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2595 sc->rdma_tags_available); 2596 } 2597 2598 if (stats->link_down) { 2599 sc->down_cnt += stats->link_down; 2600 sc->link_state = 0; 2601 sc->ifp->if_link_state = LINK_STATE_DOWN; 2602 if_link_state_change(sc->ifp); 2603 } 2604 } 2605 2606 static void 2607 mxge_serialize_skipmain(struct mxge_softc *sc) 2608 { 2609 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2610 } 2611 2612 static void 2613 mxge_deserialize_skipmain(struct mxge_softc *sc) 2614 { 2615 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2616 } 2617 2618 static void 2619 mxge_legacy(void *arg) 2620 { 2621 struct mxge_slice_state *ss = arg; 2622 mxge_softc_t *sc = ss->sc; 2623 mcp_irq_data_t *stats = ss->fw_stats; 2624 mxge_tx_ring_t *tx = &ss->tx; 2625 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2626 uint32_t send_done_count; 2627 uint8_t valid; 2628 2629 ASSERT_SERIALIZED(&sc->main_serialize); 2630 2631 #if 0 2632 /* an interrupt on a non-zero slice is implicitly valid 2633 since MSI-X irqs are not shared */ 2634 if (ss != sc->ss) { 2635 mxge_clean_rx_done(rx_done); 2636 *ss->irq_claim = be32toh(3); 2637 return; 2638 } 2639 #endif 2640 2641 /* Make sure the DMA has finished */ 2642 if (!stats->valid) 2643 return; 2644 valid = stats->valid; 2645 2646 /* Lower legacy IRQ */ 2647 *sc->irq_deassert = 0; 2648 if (!mxge_deassert_wait) { 2649 /* Don't wait for conf. that irq is low */ 2650 stats->valid = 0; 2651 } 2652 2653 mxge_serialize_skipmain(sc); 2654 2655 /* 2656 * Loop while waiting for legacy irq deassertion 2657 * XXX do we really want to loop? 2658 */ 2659 do { 2660 /* Check for transmit completes and receives */ 2661 send_done_count = be32toh(stats->send_done_count); 2662 while ((send_done_count != tx->pkt_done) || 2663 (rx_done->entry[rx_done->idx].length != 0)) { 2664 if (send_done_count != tx->pkt_done) { 2665 mxge_tx_done(&sc->arpcom.ac_if, tx, 2666 (int)send_done_count); 2667 } 2668 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data); 2669 send_done_count = be32toh(stats->send_done_count); 2670 } 2671 if (mxge_deassert_wait) 2672 wmb(); 2673 } while (*((volatile uint8_t *)&stats->valid)); 2674 2675 mxge_deserialize_skipmain(sc); 2676 2677 /* Fw link & error stats meaningful only on the first slice */ 2678 if (__predict_false(stats->stats_updated)) 2679 mxge_intr_status(sc, stats); 2680 2681 /* Check to see if we have rx token to pass back */ 2682 if (valid & 0x1) 2683 *ss->irq_claim = be32toh(3); 2684 *(ss->irq_claim + 1) = be32toh(3); 2685 } 2686 2687 static void 2688 mxge_msi(void *arg) 2689 { 2690 struct mxge_slice_state *ss = arg; 2691 mxge_softc_t *sc = ss->sc; 2692 mcp_irq_data_t *stats = ss->fw_stats; 2693 mxge_tx_ring_t *tx = &ss->tx; 2694 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2695 uint32_t send_done_count; 2696 uint8_t valid; 2697 2698 ASSERT_SERIALIZED(&sc->main_serialize); 2699 2700 /* Make sure the DMA has finished */ 2701 if (__predict_false(!stats->valid)) 2702 return; 2703 2704 valid = stats->valid; 2705 stats->valid = 0; 2706 2707 /* Check for receives */ 2708 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2709 if (rx_done->entry[rx_done->idx].length != 0) 2710 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data); 2711 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2712 2713 /* 2714 * Check for transmit completes 2715 * 2716 * NOTE: 2717 * Since pkt_done is only changed by mxge_tx_done(), 2718 * which is called only in interrupt handler, the 2719 * check w/o holding tx serializer is MPSAFE. 2720 */ 2721 send_done_count = be32toh(stats->send_done_count); 2722 if (send_done_count != tx->pkt_done) { 2723 lwkt_serialize_enter(&tx->tx_serialize); 2724 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2725 lwkt_serialize_exit(&tx->tx_serialize); 2726 } 2727 2728 if (__predict_false(stats->stats_updated)) 2729 mxge_intr_status(sc, stats); 2730 2731 /* Check to see if we have rx token to pass back */ 2732 if (valid & 0x1) 2733 *ss->irq_claim = be32toh(3); 2734 *(ss->irq_claim + 1) = be32toh(3); 2735 } 2736 2737 static void 2738 mxge_init(void *arg) 2739 { 2740 struct mxge_softc *sc = arg; 2741 2742 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2743 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2744 mxge_open(sc); 2745 } 2746 2747 static void 2748 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2749 { 2750 int i; 2751 2752 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2753 if (ss->rx_data.rx_big.info[i].m == NULL) 2754 continue; 2755 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2756 ss->rx_data.rx_big.info[i].map); 2757 m_freem(ss->rx_data.rx_big.info[i].m); 2758 ss->rx_data.rx_big.info[i].m = NULL; 2759 } 2760 2761 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2762 if (ss->rx_data.rx_small.info[i].m == NULL) 2763 continue; 2764 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2765 ss->rx_data.rx_small.info[i].map); 2766 m_freem(ss->rx_data.rx_small.info[i].m); 2767 ss->rx_data.rx_small.info[i].m = NULL; 2768 } 2769 2770 /* Transmit ring used only on the first slice */ 2771 if (ss->tx.info == NULL) 2772 return; 2773 2774 for (i = 0; i <= ss->tx.mask; i++) { 2775 if (ss->tx.info[i].m == NULL) 2776 continue; 2777 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2778 m_freem(ss->tx.info[i].m); 2779 ss->tx.info[i].m = NULL; 2780 } 2781 } 2782 2783 static void 2784 mxge_free_mbufs(mxge_softc_t *sc) 2785 { 2786 int slice; 2787 2788 for (slice = 0; slice < sc->num_slices; slice++) 2789 mxge_free_slice_mbufs(&sc->ss[slice]); 2790 } 2791 2792 static void 2793 mxge_free_slice_rings(struct mxge_slice_state *ss) 2794 { 2795 int i; 2796 2797 if (ss->rx_data.rx_done.entry != NULL) { 2798 mxge_dma_free(&ss->rx_done_dma); 2799 ss->rx_data.rx_done.entry = NULL; 2800 } 2801 2802 if (ss->tx.req_list != NULL) { 2803 kfree(ss->tx.req_list, M_DEVBUF); 2804 ss->tx.req_list = NULL; 2805 } 2806 2807 if (ss->tx.seg_list != NULL) { 2808 kfree(ss->tx.seg_list, M_DEVBUF); 2809 ss->tx.seg_list = NULL; 2810 } 2811 2812 if (ss->rx_data.rx_small.shadow != NULL) { 2813 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2814 ss->rx_data.rx_small.shadow = NULL; 2815 } 2816 2817 if (ss->rx_data.rx_big.shadow != NULL) { 2818 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2819 ss->rx_data.rx_big.shadow = NULL; 2820 } 2821 2822 if (ss->tx.info != NULL) { 2823 if (ss->tx.dmat != NULL) { 2824 for (i = 0; i <= ss->tx.mask; i++) { 2825 bus_dmamap_destroy(ss->tx.dmat, 2826 ss->tx.info[i].map); 2827 } 2828 bus_dma_tag_destroy(ss->tx.dmat); 2829 } 2830 kfree(ss->tx.info, M_DEVBUF); 2831 ss->tx.info = NULL; 2832 } 2833 2834 if (ss->rx_data.rx_small.info != NULL) { 2835 if (ss->rx_data.rx_small.dmat != NULL) { 2836 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2837 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2838 ss->rx_data.rx_small.info[i].map); 2839 } 2840 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2841 ss->rx_data.rx_small.extra_map); 2842 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2843 } 2844 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2845 ss->rx_data.rx_small.info = NULL; 2846 } 2847 2848 if (ss->rx_data.rx_big.info != NULL) { 2849 if (ss->rx_data.rx_big.dmat != NULL) { 2850 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2851 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2852 ss->rx_data.rx_big.info[i].map); 2853 } 2854 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2855 ss->rx_data.rx_big.extra_map); 2856 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2857 } 2858 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2859 ss->rx_data.rx_big.info = NULL; 2860 } 2861 } 2862 2863 static void 2864 mxge_free_rings(mxge_softc_t *sc) 2865 { 2866 int slice; 2867 2868 if (sc->ss == NULL) 2869 return; 2870 2871 for (slice = 0; slice < sc->num_slices; slice++) 2872 mxge_free_slice_rings(&sc->ss[slice]); 2873 } 2874 2875 static int 2876 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2877 int tx_ring_entries) 2878 { 2879 mxge_softc_t *sc = ss->sc; 2880 size_t bytes; 2881 int err, i; 2882 2883 /* 2884 * Allocate per-slice receive resources 2885 */ 2886 2887 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 2888 rx_ring_entries - 1; 2889 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 2890 2891 /* Allocate the rx shadow rings */ 2892 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 2893 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2894 2895 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 2896 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2897 2898 /* Allocate the rx host info rings */ 2899 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 2900 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2901 2902 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 2903 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2904 2905 /* Allocate the rx busdma resources */ 2906 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2907 1, /* alignment */ 2908 4096, /* boundary */ 2909 BUS_SPACE_MAXADDR, /* low */ 2910 BUS_SPACE_MAXADDR, /* high */ 2911 NULL, NULL, /* filter */ 2912 MHLEN, /* maxsize */ 2913 1, /* num segs */ 2914 MHLEN, /* maxsegsize */ 2915 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 2916 /* flags */ 2917 &ss->rx_data.rx_small.dmat); /* tag */ 2918 if (err != 0) { 2919 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2920 err); 2921 return err; 2922 } 2923 2924 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 2925 &ss->rx_data.rx_small.extra_map); 2926 if (err != 0) { 2927 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 2928 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2929 ss->rx_data.rx_small.dmat = NULL; 2930 return err; 2931 } 2932 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2933 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 2934 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 2935 if (err != 0) { 2936 int j; 2937 2938 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 2939 2940 for (j = 0; j < i; ++j) { 2941 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2942 ss->rx_data.rx_small.info[j].map); 2943 } 2944 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2945 ss->rx_data.rx_small.extra_map); 2946 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2947 ss->rx_data.rx_small.dmat = NULL; 2948 return err; 2949 } 2950 } 2951 2952 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2953 1, /* alignment */ 2954 4096, /* boundary */ 2955 BUS_SPACE_MAXADDR, /* low */ 2956 BUS_SPACE_MAXADDR, /* high */ 2957 NULL, NULL, /* filter */ 2958 4096, /* maxsize */ 2959 1, /* num segs */ 2960 4096, /* maxsegsize*/ 2961 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 2962 /* flags */ 2963 &ss->rx_data.rx_big.dmat); /* tag */ 2964 if (err != 0) { 2965 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2966 err); 2967 return err; 2968 } 2969 2970 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 2971 &ss->rx_data.rx_big.extra_map); 2972 if (err != 0) { 2973 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 2974 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2975 ss->rx_data.rx_big.dmat = NULL; 2976 return err; 2977 } 2978 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2979 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 2980 &ss->rx_data.rx_big.info[i].map); 2981 if (err != 0) { 2982 int j; 2983 2984 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 2985 for (j = 0; j < i; ++j) { 2986 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2987 ss->rx_data.rx_big.info[j].map); 2988 } 2989 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2990 ss->rx_data.rx_big.extra_map); 2991 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2992 ss->rx_data.rx_big.dmat = NULL; 2993 return err; 2994 } 2995 } 2996 2997 /* 2998 * Now allocate TX resources 2999 */ 3000 3001 ss->tx.mask = tx_ring_entries - 1; 3002 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3003 3004 /* 3005 * Allocate the tx request copy block; MUST be at least 8 bytes 3006 * aligned 3007 */ 3008 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3009 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3010 M_DEVBUF, M_WAITOK); 3011 3012 /* Allocate the tx busdma segment list */ 3013 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3014 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3015 3016 /* Allocate the tx host info ring */ 3017 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3018 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3019 3020 /* Allocate the tx busdma resources */ 3021 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3022 1, /* alignment */ 3023 sc->tx_boundary, /* boundary */ 3024 BUS_SPACE_MAXADDR, /* low */ 3025 BUS_SPACE_MAXADDR, /* high */ 3026 NULL, NULL, /* filter */ 3027 IP_MAXPACKET + 3028 sizeof(struct ether_vlan_header), 3029 /* maxsize */ 3030 ss->tx.max_desc - 2, /* num segs */ 3031 sc->tx_boundary, /* maxsegsz */ 3032 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3033 BUS_DMA_ONEBPAGE, /* flags */ 3034 &ss->tx.dmat); /* tag */ 3035 if (err != 0) { 3036 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3037 return err; 3038 } 3039 3040 /* 3041 * Now use these tags to setup DMA maps for each slot in the ring 3042 */ 3043 for (i = 0; i <= ss->tx.mask; i++) { 3044 err = bus_dmamap_create(ss->tx.dmat, 3045 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3046 if (err != 0) { 3047 int j; 3048 3049 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3050 for (j = 0; j < i; ++j) { 3051 bus_dmamap_destroy(ss->tx.dmat, 3052 ss->tx.info[j].map); 3053 } 3054 bus_dma_tag_destroy(ss->tx.dmat); 3055 ss->tx.dmat = NULL; 3056 return err; 3057 } 3058 } 3059 return 0; 3060 } 3061 3062 static int 3063 mxge_alloc_rings(mxge_softc_t *sc) 3064 { 3065 mxge_cmd_t cmd; 3066 int tx_ring_size; 3067 int tx_ring_entries, rx_ring_entries; 3068 int err, slice; 3069 3070 /* Get ring sizes */ 3071 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3072 if (err != 0) { 3073 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3074 return err; 3075 } 3076 tx_ring_size = cmd.data0; 3077 3078 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3079 rx_ring_entries = sc->rx_ring_size / sizeof(mcp_dma_addr_t); 3080 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3081 ifq_set_ready(&sc->ifp->if_snd); 3082 3083 for (slice = 0; slice < sc->num_slices; slice++) { 3084 err = mxge_alloc_slice_rings(&sc->ss[slice], 3085 rx_ring_entries, tx_ring_entries); 3086 if (err != 0) { 3087 device_printf(sc->dev, 3088 "alloc %d slice rings failed\n", slice); 3089 return err; 3090 } 3091 } 3092 return 0; 3093 } 3094 3095 static void 3096 mxge_choose_params(int mtu, int *cl_size) 3097 { 3098 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3099 3100 if (bufsize < MCLBYTES) { 3101 *cl_size = MCLBYTES; 3102 } else { 3103 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3104 *cl_size = MJUMPAGESIZE; 3105 } 3106 } 3107 3108 static int 3109 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3110 { 3111 mxge_cmd_t cmd; 3112 int err, i, slice; 3113 3114 slice = ss - ss->sc->ss; 3115 3116 /* 3117 * Get the lanai pointers to the send and receive rings 3118 */ 3119 err = 0; 3120 #ifndef IFNET_BUF_RING 3121 /* We currently only send from the first slice */ 3122 if (slice == 0) { 3123 #endif 3124 cmd.data0 = slice; 3125 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3126 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3127 (ss->sc->sram + cmd.data0); 3128 ss->tx.send_go = (volatile uint32_t *) 3129 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3130 ss->tx.send_stop = (volatile uint32_t *) 3131 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3132 #ifndef IFNET_BUF_RING 3133 } 3134 #endif 3135 3136 cmd.data0 = slice; 3137 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3138 ss->rx_data.rx_small.lanai = 3139 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3140 3141 cmd.data0 = slice; 3142 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3143 ss->rx_data.rx_big.lanai = 3144 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3145 3146 if (err != 0) { 3147 if_printf(ss->sc->ifp, 3148 "failed to get ring sizes or locations\n"); 3149 return EIO; 3150 } 3151 3152 /* 3153 * Stock small receive ring 3154 */ 3155 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3156 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3157 ss->rx_data.rx_small.info[i].map, i, TRUE); 3158 if (err) { 3159 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3160 ss->rx_data.rx_small.mask + 1); 3161 return ENOMEM; 3162 } 3163 } 3164 3165 /* 3166 * Stock big receive ring 3167 */ 3168 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3169 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3170 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3171 } 3172 3173 ss->rx_data.rx_big.cl_size = cl_size; 3174 3175 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3176 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3177 ss->rx_data.rx_big.info[i].map, i, TRUE); 3178 if (err) { 3179 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3180 ss->rx_data.rx_big.mask + 1); 3181 return ENOMEM; 3182 } 3183 } 3184 return 0; 3185 } 3186 3187 static int 3188 mxge_open(mxge_softc_t *sc) 3189 { 3190 struct ifnet *ifp = sc->ifp; 3191 mxge_cmd_t cmd; 3192 int err, slice, cl_size, i; 3193 bus_addr_t bus; 3194 volatile uint8_t *itable; 3195 struct mxge_slice_state *ss; 3196 3197 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3198 3199 /* Copy the MAC address in case it was overridden */ 3200 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3201 3202 err = mxge_reset(sc, 1); 3203 if (err != 0) { 3204 if_printf(ifp, "failed to reset\n"); 3205 return EIO; 3206 } 3207 3208 if (sc->num_slices > 1) { 3209 /* Setup the indirection table */ 3210 cmd.data0 = sc->num_slices; 3211 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3212 3213 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3214 if (err != 0) { 3215 if_printf(ifp, "failed to setup rss tables\n"); 3216 return err; 3217 } 3218 3219 /* Just enable an identity mapping */ 3220 itable = sc->sram + cmd.data0; 3221 for (i = 0; i < sc->num_slices; i++) 3222 itable[i] = (uint8_t)i; 3223 3224 cmd.data0 = 1; 3225 cmd.data1 = MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3226 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3227 if (err != 0) { 3228 if_printf(ifp, "failed to enable slices\n"); 3229 return err; 3230 } 3231 } 3232 3233 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3234 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3235 if (err) { 3236 /* 3237 * Can't change TSO mode to NDIS, never allow TSO then 3238 */ 3239 if_printf(ifp, "failed to set TSO mode\n"); 3240 ifp->if_capenable &= ~IFCAP_TSO; 3241 ifp->if_capabilities &= ~IFCAP_TSO; 3242 ifp->if_hwassist &= ~CSUM_TSO; 3243 } 3244 3245 mxge_choose_params(ifp->if_mtu, &cl_size); 3246 3247 cmd.data0 = 1; 3248 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3249 /* 3250 * Error is only meaningful if we're trying to set 3251 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3252 */ 3253 3254 /* 3255 * Give the firmware the mtu and the big and small buffer 3256 * sizes. The firmware wants the big buf size to be a power 3257 * of two. Luckily, DragonFly's clusters are powers of two 3258 */ 3259 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3260 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3261 3262 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3263 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3264 3265 cmd.data0 = cl_size; 3266 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3267 3268 if (err != 0) { 3269 if_printf(ifp, "failed to setup params\n"); 3270 goto abort; 3271 } 3272 3273 /* Now give him the pointer to the stats block */ 3274 for (slice = 0; slice < sc->num_slices; slice++) { 3275 ss = &sc->ss[slice]; 3276 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3277 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3278 cmd.data2 = sizeof(struct mcp_irq_data); 3279 cmd.data2 |= (slice << 16); 3280 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3281 } 3282 3283 if (err != 0) { 3284 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3285 bus += offsetof(struct mcp_irq_data, send_done_count); 3286 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3287 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3288 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3289 &cmd); 3290 3291 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3292 sc->fw_multicast_support = 0; 3293 } else { 3294 sc->fw_multicast_support = 1; 3295 } 3296 3297 if (err != 0) { 3298 if_printf(ifp, "failed to setup params\n"); 3299 goto abort; 3300 } 3301 3302 for (slice = 0; slice < sc->num_slices; slice++) { 3303 err = mxge_slice_open(&sc->ss[slice], cl_size); 3304 if (err != 0) { 3305 if_printf(ifp, "couldn't open slice %d\n", slice); 3306 goto abort; 3307 } 3308 } 3309 3310 /* Finally, start the firmware running */ 3311 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3312 if (err) { 3313 if_printf(ifp, "Couldn't bring up link\n"); 3314 goto abort; 3315 } 3316 ifp->if_flags |= IFF_RUNNING; 3317 ifq_clr_oactive(&ifp->if_snd); 3318 ifp->if_timer = 0; 3319 3320 return 0; 3321 3322 abort: 3323 mxge_free_mbufs(sc); 3324 return err; 3325 } 3326 3327 static void 3328 mxge_close(mxge_softc_t *sc, int down) 3329 { 3330 struct ifnet *ifp = sc->ifp; 3331 mxge_cmd_t cmd; 3332 int err, old_down_cnt; 3333 3334 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3335 3336 ifp->if_flags &= ~IFF_RUNNING; 3337 ifq_clr_oactive(&ifp->if_snd); 3338 ifp->if_timer = 0; 3339 3340 if (!down) { 3341 old_down_cnt = sc->down_cnt; 3342 wmb(); 3343 3344 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3345 if (err) 3346 if_printf(ifp, "Couldn't bring down link\n"); 3347 3348 if (old_down_cnt == sc->down_cnt) { 3349 /* Wait for down irq */ 3350 ifnet_deserialize_all(ifp); 3351 DELAY(10 * sc->intr_coal_delay); 3352 ifnet_serialize_all(ifp); 3353 } 3354 3355 wmb(); 3356 if (old_down_cnt == sc->down_cnt) 3357 if_printf(ifp, "never got down irq\n"); 3358 } 3359 mxge_free_mbufs(sc); 3360 } 3361 3362 static void 3363 mxge_setup_cfg_space(mxge_softc_t *sc) 3364 { 3365 device_t dev = sc->dev; 3366 int reg; 3367 uint16_t lnk, pectl; 3368 3369 /* Find the PCIe link width and set max read request to 4KB */ 3370 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3371 lnk = pci_read_config(dev, reg + 0x12, 2); 3372 sc->link_width = (lnk >> 4) & 0x3f; 3373 3374 if (sc->pectl == 0) { 3375 pectl = pci_read_config(dev, reg + 0x8, 2); 3376 pectl = (pectl & ~0x7000) | (5 << 12); 3377 pci_write_config(dev, reg + 0x8, pectl, 2); 3378 sc->pectl = pectl; 3379 } else { 3380 /* Restore saved pectl after watchdog reset */ 3381 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3382 } 3383 } 3384 3385 /* Enable DMA and memory space access */ 3386 pci_enable_busmaster(dev); 3387 } 3388 3389 static uint32_t 3390 mxge_read_reboot(mxge_softc_t *sc) 3391 { 3392 device_t dev = sc->dev; 3393 uint32_t vs; 3394 3395 /* Find the vendor specific offset */ 3396 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3397 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3398 return (uint32_t)-1; 3399 } 3400 /* Enable read32 mode */ 3401 pci_write_config(dev, vs + 0x10, 0x3, 1); 3402 /* Tell NIC which register to read */ 3403 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3404 return pci_read_config(dev, vs + 0x14, 4); 3405 } 3406 3407 static void 3408 mxge_watchdog_reset(mxge_softc_t *sc) 3409 { 3410 struct pci_devinfo *dinfo; 3411 int err, running; 3412 uint32_t reboot; 3413 uint16_t cmd; 3414 3415 err = ENXIO; 3416 3417 if_printf(sc->ifp, "Watchdog reset!\n"); 3418 3419 /* 3420 * Check to see if the NIC rebooted. If it did, then all of 3421 * PCI config space has been reset, and things like the 3422 * busmaster bit will be zero. If this is the case, then we 3423 * must restore PCI config space before the NIC can be used 3424 * again 3425 */ 3426 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3427 if (cmd == 0xffff) { 3428 /* 3429 * Maybe the watchdog caught the NIC rebooting; wait 3430 * up to 100ms for it to finish. If it does not come 3431 * back, then give up 3432 */ 3433 DELAY(1000*100); 3434 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3435 if (cmd == 0xffff) 3436 if_printf(sc->ifp, "NIC disappeared!\n"); 3437 } 3438 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3439 /* Print the reboot status */ 3440 reboot = mxge_read_reboot(sc); 3441 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3442 3443 running = sc->ifp->if_flags & IFF_RUNNING; 3444 if (running) { 3445 /* 3446 * Quiesce NIC so that TX routines will not try to 3447 * xmit after restoration of BAR 3448 */ 3449 3450 /* Mark the link as down */ 3451 if (sc->link_state) { 3452 sc->ifp->if_link_state = LINK_STATE_DOWN; 3453 if_link_state_change(sc->ifp); 3454 } 3455 mxge_close(sc, 1); 3456 } 3457 /* Restore PCI configuration space */ 3458 dinfo = device_get_ivars(sc->dev); 3459 pci_cfg_restore(sc->dev, dinfo); 3460 3461 /* And redo any changes we made to our config space */ 3462 mxge_setup_cfg_space(sc); 3463 3464 /* Reload f/w */ 3465 err = mxge_load_firmware(sc, 0); 3466 if (err) 3467 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3468 if (running && !err) { 3469 err = mxge_open(sc); 3470 if_devstart_sched(sc->ifp); 3471 } 3472 sc->watchdog_resets++; 3473 } else { 3474 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3475 err = 0; 3476 } 3477 if (err) { 3478 if_printf(sc->ifp, "watchdog reset failed\n"); 3479 } else { 3480 if (sc->dying == 2) 3481 sc->dying = 0; 3482 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3483 } 3484 } 3485 3486 static void 3487 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3488 { 3489 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3490 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3491 tx->req, tx->done, tx->queue_active); 3492 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3493 tx->activate, tx->deactivate); 3494 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3495 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3496 } 3497 3498 static u_long 3499 mxge_update_stats(mxge_softc_t *sc) 3500 { 3501 u_long ipackets, opackets, pkts; 3502 3503 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3504 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3505 3506 pkts = ipackets - sc->ipackets; 3507 pkts += opackets - sc->opackets; 3508 3509 sc->ipackets = ipackets; 3510 sc->opackets = opackets; 3511 3512 return pkts; 3513 } 3514 3515 static void 3516 mxge_tick(void *arg) 3517 { 3518 mxge_softc_t *sc = arg; 3519 u_long pkts = 0; 3520 int err = 0; 3521 int ticks; 3522 3523 lwkt_serialize_enter(&sc->main_serialize); 3524 3525 ticks = mxge_ticks; 3526 if (sc->ifp->if_flags & IFF_RUNNING) { 3527 /* Aggregate stats from different slices */ 3528 pkts = mxge_update_stats(sc); 3529 if (sc->need_media_probe) 3530 mxge_media_probe(sc); 3531 } 3532 if (pkts == 0) { 3533 uint16_t cmd; 3534 3535 /* Ensure NIC did not suffer h/w fault while idle */ 3536 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3537 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3538 sc->dying = 2; 3539 mxge_serialize_skipmain(sc); 3540 mxge_watchdog_reset(sc); 3541 mxge_deserialize_skipmain(sc); 3542 err = ENXIO; 3543 } 3544 3545 /* Look less often if NIC is idle */ 3546 ticks *= 4; 3547 } 3548 3549 if (err == 0) 3550 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3551 3552 lwkt_serialize_exit(&sc->main_serialize); 3553 } 3554 3555 static int 3556 mxge_media_change(struct ifnet *ifp) 3557 { 3558 return EINVAL; 3559 } 3560 3561 static int 3562 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3563 { 3564 struct ifnet *ifp = sc->ifp; 3565 int real_mtu, old_mtu; 3566 int err = 0; 3567 3568 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3569 if (mtu > sc->max_mtu || real_mtu < 60) 3570 return EINVAL; 3571 3572 old_mtu = ifp->if_mtu; 3573 ifp->if_mtu = mtu; 3574 if (ifp->if_flags & IFF_RUNNING) { 3575 mxge_close(sc, 0); 3576 err = mxge_open(sc); 3577 if (err != 0) { 3578 ifp->if_mtu = old_mtu; 3579 mxge_close(sc, 0); 3580 mxge_open(sc); 3581 } 3582 } 3583 return err; 3584 } 3585 3586 static void 3587 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3588 { 3589 mxge_softc_t *sc = ifp->if_softc; 3590 3591 3592 if (sc == NULL) 3593 return; 3594 ifmr->ifm_status = IFM_AVALID; 3595 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3596 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3597 ifmr->ifm_active |= sc->current_media; 3598 } 3599 3600 static int 3601 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3602 struct ucred *cr __unused) 3603 { 3604 mxge_softc_t *sc = ifp->if_softc; 3605 struct ifreq *ifr = (struct ifreq *)data; 3606 int err, mask; 3607 3608 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3609 err = 0; 3610 3611 switch (command) { 3612 case SIOCSIFMTU: 3613 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3614 break; 3615 3616 case SIOCSIFFLAGS: 3617 if (sc->dying) 3618 return EINVAL; 3619 3620 if (ifp->if_flags & IFF_UP) { 3621 if (!(ifp->if_flags & IFF_RUNNING)) { 3622 err = mxge_open(sc); 3623 } else { 3624 /* 3625 * Take care of PROMISC and ALLMULTI 3626 * flag changes 3627 */ 3628 mxge_change_promisc(sc, 3629 ifp->if_flags & IFF_PROMISC); 3630 mxge_set_multicast_list(sc); 3631 } 3632 } else { 3633 if (ifp->if_flags & IFF_RUNNING) 3634 mxge_close(sc, 0); 3635 } 3636 break; 3637 3638 case SIOCADDMULTI: 3639 case SIOCDELMULTI: 3640 mxge_set_multicast_list(sc); 3641 break; 3642 3643 case SIOCSIFCAP: 3644 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3645 if (mask & IFCAP_TXCSUM) { 3646 ifp->if_capenable ^= IFCAP_TXCSUM; 3647 if (ifp->if_capenable & IFCAP_TXCSUM) 3648 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3649 else 3650 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3651 } 3652 if (mask & IFCAP_TSO) { 3653 ifp->if_capenable ^= IFCAP_TSO; 3654 if (ifp->if_capenable & IFCAP_TSO) 3655 ifp->if_hwassist |= CSUM_TSO; 3656 else 3657 ifp->if_hwassist &= ~CSUM_TSO; 3658 } 3659 if (mask & IFCAP_RXCSUM) 3660 ifp->if_capenable ^= IFCAP_RXCSUM; 3661 if (mask & IFCAP_VLAN_HWTAGGING) 3662 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3663 break; 3664 3665 case SIOCGIFMEDIA: 3666 mxge_media_probe(sc); 3667 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3668 &sc->media, command); 3669 break; 3670 3671 default: 3672 err = ether_ioctl(ifp, command, data); 3673 break; 3674 } 3675 return err; 3676 } 3677 3678 static void 3679 mxge_fetch_tunables(mxge_softc_t *sc) 3680 { 3681 sc->intr_coal_delay = mxge_intr_coal_delay; 3682 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3683 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3684 3685 /* XXX */ 3686 if (mxge_ticks == 0) 3687 mxge_ticks = hz / 2; 3688 3689 sc->pause = mxge_flow_control; 3690 3691 sc->throttle = mxge_throttle; 3692 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3693 sc->throttle = MXGE_MAX_THROTTLE; 3694 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3695 sc->throttle = MXGE_MIN_THROTTLE; 3696 } 3697 3698 static void 3699 mxge_free_slices(mxge_softc_t *sc) 3700 { 3701 struct mxge_slice_state *ss; 3702 int i; 3703 3704 if (sc->ss == NULL) 3705 return; 3706 3707 for (i = 0; i < sc->num_slices; i++) { 3708 ss = &sc->ss[i]; 3709 if (ss->fw_stats != NULL) { 3710 mxge_dma_free(&ss->fw_stats_dma); 3711 ss->fw_stats = NULL; 3712 } 3713 if (ss->rx_data.rx_done.entry != NULL) { 3714 mxge_dma_free(&ss->rx_done_dma); 3715 ss->rx_data.rx_done.entry = NULL; 3716 } 3717 } 3718 kfree(sc->ss, M_DEVBUF); 3719 sc->ss = NULL; 3720 } 3721 3722 static int 3723 mxge_alloc_slices(mxge_softc_t *sc) 3724 { 3725 mxge_cmd_t cmd; 3726 struct mxge_slice_state *ss; 3727 size_t bytes; 3728 int err, i, max_intr_slots; 3729 3730 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3731 if (err != 0) { 3732 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3733 return err; 3734 } 3735 sc->rx_ring_size = cmd.data0; 3736 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3737 3738 bytes = sizeof(*sc->ss) * sc->num_slices; 3739 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3740 3741 for (i = 0; i < sc->num_slices; i++) { 3742 ss = &sc->ss[i]; 3743 3744 ss->sc = sc; 3745 3746 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3747 lwkt_serialize_init(&ss->tx.tx_serialize); 3748 3749 /* 3750 * Allocate per-slice rx interrupt queues 3751 */ 3752 bytes = max_intr_slots * sizeof(*ss->rx_data.rx_done.entry); 3753 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3754 if (err != 0) { 3755 device_printf(sc->dev, 3756 "alloc %d slice rx_done failed\n", i); 3757 return err; 3758 } 3759 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3760 3761 /* 3762 * Allocate the per-slice firmware stats 3763 */ 3764 bytes = sizeof(*ss->fw_stats); 3765 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3766 sizeof(*ss->fw_stats), 64); 3767 if (err != 0) { 3768 device_printf(sc->dev, 3769 "alloc %d fw_stats failed\n", i); 3770 return err; 3771 } 3772 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3773 } 3774 return 0; 3775 } 3776 3777 static void 3778 mxge_slice_probe(mxge_softc_t *sc) 3779 { 3780 mxge_cmd_t cmd; 3781 const char *old_fw; 3782 int msix_cnt, status, max_intr_slots; 3783 3784 sc->num_slices = 1; 3785 3786 /* 3787 * XXX 3788 * 3789 * Don't enable multiple slices if they are not enabled, 3790 * or if this is not an SMP system 3791 */ 3792 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2) 3793 return; 3794 3795 /* see how many MSI-X interrupts are available */ 3796 msix_cnt = pci_msix_count(sc->dev); 3797 if (msix_cnt < 2) 3798 return; 3799 3800 /* now load the slice aware firmware see what it supports */ 3801 old_fw = sc->fw_name; 3802 if (old_fw == mxge_fw_aligned) 3803 sc->fw_name = mxge_fw_rss_aligned; 3804 else 3805 sc->fw_name = mxge_fw_rss_unaligned; 3806 status = mxge_load_firmware(sc, 0); 3807 if (status != 0) { 3808 device_printf(sc->dev, "Falling back to a single slice\n"); 3809 return; 3810 } 3811 3812 /* try to send a reset command to the card to see if it 3813 is alive */ 3814 memset(&cmd, 0, sizeof (cmd)); 3815 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3816 if (status != 0) { 3817 device_printf(sc->dev, "failed reset\n"); 3818 goto abort_with_fw; 3819 } 3820 3821 /* get rx ring size */ 3822 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3823 if (status != 0) { 3824 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3825 goto abort_with_fw; 3826 } 3827 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3828 3829 /* tell it the size of the interrupt queues */ 3830 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3831 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3832 if (status != 0) { 3833 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3834 goto abort_with_fw; 3835 } 3836 3837 /* ask the maximum number of slices it supports */ 3838 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3839 if (status != 0) { 3840 device_printf(sc->dev, 3841 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3842 goto abort_with_fw; 3843 } 3844 sc->num_slices = cmd.data0; 3845 if (sc->num_slices > msix_cnt) 3846 sc->num_slices = msix_cnt; 3847 3848 if (mxge_max_slices == -1) { 3849 /* cap to number of CPUs in system */ 3850 if (sc->num_slices > ncpus) 3851 sc->num_slices = ncpus; 3852 } else { 3853 if (sc->num_slices > mxge_max_slices) 3854 sc->num_slices = mxge_max_slices; 3855 } 3856 /* make sure it is a power of two */ 3857 while (sc->num_slices & (sc->num_slices - 1)) 3858 sc->num_slices--; 3859 3860 if (bootverbose) 3861 device_printf(sc->dev, "using %d slices\n", 3862 sc->num_slices); 3863 3864 return; 3865 3866 abort_with_fw: 3867 sc->fw_name = old_fw; 3868 (void) mxge_load_firmware(sc, 0); 3869 } 3870 3871 #if 0 3872 static int 3873 mxge_add_msix_irqs(mxge_softc_t *sc) 3874 { 3875 size_t bytes; 3876 int count, err, i, rid; 3877 3878 rid = PCIR_BAR(2); 3879 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3880 &rid, RF_ACTIVE); 3881 3882 if (sc->msix_table_res == NULL) { 3883 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3884 return ENXIO; 3885 } 3886 3887 count = sc->num_slices; 3888 err = pci_alloc_msix(sc->dev, &count); 3889 if (err != 0) { 3890 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3891 "err = %d \n", sc->num_slices, err); 3892 goto abort_with_msix_table; 3893 } 3894 if (count < sc->num_slices) { 3895 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3896 count, sc->num_slices); 3897 device_printf(sc->dev, 3898 "Try setting hw.mxge.max_slices to %d\n", 3899 count); 3900 err = ENOSPC; 3901 goto abort_with_msix; 3902 } 3903 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3904 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3905 if (sc->msix_irq_res == NULL) { 3906 err = ENOMEM; 3907 goto abort_with_msix; 3908 } 3909 3910 for (i = 0; i < sc->num_slices; i++) { 3911 rid = i + 1; 3912 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3913 SYS_RES_IRQ, 3914 &rid, RF_ACTIVE); 3915 if (sc->msix_irq_res[i] == NULL) { 3916 device_printf(sc->dev, "couldn't allocate IRQ res" 3917 " for message %d\n", i); 3918 err = ENXIO; 3919 goto abort_with_res; 3920 } 3921 } 3922 3923 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3924 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3925 3926 for (i = 0; i < sc->num_slices; i++) { 3927 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3928 INTR_MPSAFE, 3929 mxge_intr, &sc->ss[i], &sc->msix_ih[i], 3930 sc->ifp->if_serializer); 3931 if (err != 0) { 3932 device_printf(sc->dev, "couldn't setup intr for " 3933 "message %d\n", i); 3934 goto abort_with_intr; 3935 } 3936 } 3937 3938 if (bootverbose) { 3939 device_printf(sc->dev, "using %d msix IRQs:", 3940 sc->num_slices); 3941 for (i = 0; i < sc->num_slices; i++) 3942 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i])); 3943 kprintf("\n"); 3944 } 3945 return (0); 3946 3947 abort_with_intr: 3948 for (i = 0; i < sc->num_slices; i++) { 3949 if (sc->msix_ih[i] != NULL) { 3950 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 3951 sc->msix_ih[i]); 3952 sc->msix_ih[i] = NULL; 3953 } 3954 } 3955 kfree(sc->msix_ih, M_DEVBUF); 3956 3957 3958 abort_with_res: 3959 for (i = 0; i < sc->num_slices; i++) { 3960 rid = i + 1; 3961 if (sc->msix_irq_res[i] != NULL) 3962 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 3963 sc->msix_irq_res[i]); 3964 sc->msix_irq_res[i] = NULL; 3965 } 3966 kfree(sc->msix_irq_res, M_DEVBUF); 3967 3968 3969 abort_with_msix: 3970 pci_release_msi(sc->dev); 3971 3972 abort_with_msix_table: 3973 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 3974 sc->msix_table_res); 3975 3976 return err; 3977 } 3978 #endif 3979 3980 static int 3981 mxge_add_single_irq(mxge_softc_t *sc) 3982 { 3983 driver_intr_t *intr_func; 3984 u_int irq_flags; 3985 3986 sc->irq_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 3987 &sc->irq_rid, &irq_flags); 3988 3989 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 3990 &sc->irq_rid, irq_flags); 3991 if (sc->irq_res == NULL) { 3992 device_printf(sc->dev, "could not alloc interrupt\n"); 3993 return ENXIO; 3994 } 3995 3996 if (sc->irq_type == PCI_INTR_TYPE_LEGACY) 3997 intr_func = mxge_legacy; 3998 else 3999 intr_func = mxge_msi; 4000 4001 return bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE, 4002 intr_func, &sc->ss[0], &sc->ih, &sc->main_serialize); 4003 } 4004 4005 #if 0 4006 static void 4007 mxge_rem_msix_irqs(mxge_softc_t *sc) 4008 { 4009 int i, rid; 4010 4011 for (i = 0; i < sc->num_slices; i++) { 4012 if (sc->msix_ih[i] != NULL) { 4013 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4014 sc->msix_ih[i]); 4015 sc->msix_ih[i] = NULL; 4016 } 4017 } 4018 kfree(sc->msix_ih, M_DEVBUF); 4019 4020 for (i = 0; i < sc->num_slices; i++) { 4021 rid = i + 1; 4022 if (sc->msix_irq_res[i] != NULL) 4023 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4024 sc->msix_irq_res[i]); 4025 sc->msix_irq_res[i] = NULL; 4026 } 4027 kfree(sc->msix_irq_res, M_DEVBUF); 4028 4029 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4030 sc->msix_table_res); 4031 4032 pci_release_msi(sc->dev); 4033 return; 4034 } 4035 #endif 4036 4037 static int 4038 mxge_add_irq(mxge_softc_t *sc) 4039 { 4040 #if 0 4041 int err; 4042 4043 if (sc->num_slices > 1) 4044 err = mxge_add_msix_irqs(sc); 4045 else 4046 err = mxge_add_single_irq(sc); 4047 4048 if (0 && err == 0 && sc->num_slices > 1) { 4049 mxge_rem_msix_irqs(sc); 4050 err = mxge_add_msix_irqs(sc); 4051 } 4052 return err; 4053 #else 4054 return mxge_add_single_irq(sc); 4055 #endif 4056 } 4057 4058 static void 4059 mxge_setup_serialize(struct mxge_softc *sc) 4060 { 4061 int i = 0, slice; 4062 4063 /* Main + rx + tx */ 4064 sc->nserialize = (2 * sc->num_slices) + 1; 4065 sc->serializes = 4066 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4067 M_DEVBUF, M_WAITOK | M_ZERO); 4068 4069 /* 4070 * Setup serializes 4071 * 4072 * NOTE: Order is critical 4073 */ 4074 4075 KKASSERT(i < sc->nserialize); 4076 sc->serializes[i++] = &sc->main_serialize; 4077 4078 for (slice = 0; slice < sc->num_slices; ++slice) { 4079 KKASSERT(i < sc->nserialize); 4080 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4081 } 4082 4083 for (slice = 0; slice < sc->num_slices; ++slice) { 4084 KKASSERT(i < sc->nserialize); 4085 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4086 } 4087 4088 KKASSERT(i == sc->nserialize); 4089 } 4090 4091 static void 4092 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4093 { 4094 struct mxge_softc *sc = ifp->if_softc; 4095 4096 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4097 } 4098 4099 static void 4100 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4101 { 4102 struct mxge_softc *sc = ifp->if_softc; 4103 4104 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4105 } 4106 4107 static int 4108 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4109 { 4110 struct mxge_softc *sc = ifp->if_softc; 4111 4112 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4113 } 4114 4115 #ifdef INVARIANTS 4116 4117 static void 4118 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4119 boolean_t serialized) 4120 { 4121 struct mxge_softc *sc = ifp->if_softc; 4122 4123 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4124 slz, serialized); 4125 } 4126 4127 #endif /* INVARIANTS */ 4128 4129 static int 4130 mxge_attach(device_t dev) 4131 { 4132 mxge_softc_t *sc = device_get_softc(dev); 4133 struct ifnet *ifp = &sc->arpcom.ac_if; 4134 int err, rid; 4135 4136 /* 4137 * Avoid rewriting half the lines in this file to use 4138 * &sc->arpcom.ac_if instead 4139 */ 4140 sc->ifp = ifp; 4141 sc->dev = dev; 4142 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4143 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); 4144 4145 lwkt_serialize_init(&sc->main_serialize); 4146 4147 mxge_fetch_tunables(sc); 4148 4149 err = bus_dma_tag_create(NULL, /* parent */ 4150 1, /* alignment */ 4151 0, /* boundary */ 4152 BUS_SPACE_MAXADDR, /* low */ 4153 BUS_SPACE_MAXADDR, /* high */ 4154 NULL, NULL, /* filter */ 4155 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4156 0, /* num segs */ 4157 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4158 0, /* flags */ 4159 &sc->parent_dmat); /* tag */ 4160 if (err != 0) { 4161 device_printf(dev, "Err %d allocating parent dmat\n", err); 4162 goto failed; 4163 } 4164 4165 callout_init_mp(&sc->co_hdl); 4166 4167 mxge_setup_cfg_space(sc); 4168 4169 /* 4170 * Map the board into the kernel 4171 */ 4172 rid = PCIR_BARS; 4173 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4174 &rid, RF_ACTIVE); 4175 if (sc->mem_res == NULL) { 4176 device_printf(dev, "could not map memory\n"); 4177 err = ENXIO; 4178 goto failed; 4179 } 4180 4181 sc->sram = rman_get_virtual(sc->mem_res); 4182 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4183 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4184 device_printf(dev, "impossible memory region size %ld\n", 4185 rman_get_size(sc->mem_res)); 4186 err = ENXIO; 4187 goto failed; 4188 } 4189 4190 /* 4191 * Make NULL terminated copy of the EEPROM strings section of 4192 * lanai SRAM 4193 */ 4194 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4195 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4196 rman_get_bushandle(sc->mem_res), 4197 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4198 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4199 err = mxge_parse_strings(sc); 4200 if (err != 0) { 4201 device_printf(dev, "parse EEPROM string failed\n"); 4202 goto failed; 4203 } 4204 4205 /* 4206 * Enable write combining for efficient use of PCIe bus 4207 */ 4208 mxge_enable_wc(sc); 4209 4210 /* 4211 * Allocate the out of band DMA memory 4212 */ 4213 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4214 if (err != 0) { 4215 device_printf(dev, "alloc cmd DMA buf failed\n"); 4216 goto failed; 4217 } 4218 sc->cmd = sc->cmd_dma.dmem_addr; 4219 4220 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4221 if (err != 0) { 4222 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4223 goto failed; 4224 } 4225 4226 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4227 if (err != 0) { 4228 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4229 goto failed; 4230 } 4231 4232 /* Select & load the firmware */ 4233 err = mxge_select_firmware(sc); 4234 if (err != 0) { 4235 device_printf(dev, "select firmware failed\n"); 4236 goto failed; 4237 } 4238 4239 mxge_slice_probe(sc); 4240 err = mxge_alloc_slices(sc); 4241 if (err != 0) { 4242 device_printf(dev, "alloc slices failed\n"); 4243 goto failed; 4244 } 4245 4246 /* Setup serializes */ 4247 mxge_setup_serialize(sc); 4248 4249 err = mxge_reset(sc, 0); 4250 if (err != 0) { 4251 device_printf(dev, "reset failed\n"); 4252 goto failed; 4253 } 4254 4255 err = mxge_alloc_rings(sc); 4256 if (err != 0) { 4257 device_printf(dev, "failed to allocate rings\n"); 4258 goto failed; 4259 } 4260 4261 ifp->if_baudrate = IF_Gbps(10UL); 4262 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4263 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4264 4265 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4266 #if 0 4267 /* Well, its software, sigh */ 4268 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4269 #endif 4270 ifp->if_capenable = ifp->if_capabilities; 4271 4272 ifp->if_softc = sc; 4273 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4274 ifp->if_init = mxge_init; 4275 ifp->if_ioctl = mxge_ioctl; 4276 ifp->if_start = mxge_start; 4277 ifp->if_watchdog = mxge_watchdog; 4278 ifp->if_serialize = mxge_serialize; 4279 ifp->if_deserialize = mxge_deserialize; 4280 ifp->if_tryserialize = mxge_tryserialize; 4281 #ifdef INVARIANTS 4282 ifp->if_serialize_assert = mxge_serialize_assert; 4283 #endif 4284 4285 /* Increase TSO burst length */ 4286 ifp->if_tsolen = (32 * ETHERMTU); 4287 4288 /* Initialise the ifmedia structure */ 4289 mxge_media_init(sc); 4290 mxge_media_probe(sc); 4291 4292 ether_ifattach(ifp, sc->mac_addr, NULL); 4293 4294 /* 4295 * XXX 4296 * We are not ready to do "gather" jumbo frame, so 4297 * limit MTU to MJUMPAGESIZE 4298 */ 4299 sc->max_mtu = MJUMPAGESIZE - 4300 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4301 sc->dying = 0; 4302 4303 /* must come after ether_ifattach() */ 4304 err = mxge_add_irq(sc); 4305 if (err != 0) { 4306 device_printf(dev, "alloc and setup intr failed\n"); 4307 ether_ifdetach(ifp); 4308 goto failed; 4309 } 4310 4311 ifq_set_cpuid(&ifp->if_snd, rman_get_cpuid(sc->irq_res)); 4312 ifq_set_hw_serialize(&ifp->if_snd, &sc->ss[0].tx.tx_serialize); 4313 4314 mxge_add_sysctls(sc); 4315 4316 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4317 rman_get_cpuid(sc->irq_res)); 4318 return 0; 4319 4320 failed: 4321 mxge_detach(dev); 4322 return err; 4323 } 4324 4325 static int 4326 mxge_detach(device_t dev) 4327 { 4328 mxge_softc_t *sc = device_get_softc(dev); 4329 4330 if (device_is_attached(dev)) { 4331 struct ifnet *ifp = sc->ifp; 4332 4333 ifnet_serialize_all(ifp); 4334 4335 sc->dying = 1; 4336 if (ifp->if_flags & IFF_RUNNING) 4337 mxge_close(sc, 1); 4338 callout_stop(&sc->co_hdl); 4339 4340 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4341 4342 ifnet_deserialize_all(ifp); 4343 4344 callout_terminate(&sc->co_hdl); 4345 4346 ether_ifdetach(ifp); 4347 } 4348 ifmedia_removeall(&sc->media); 4349 4350 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4351 sc->sram != NULL) 4352 mxge_dummy_rdma(sc, 0); 4353 4354 mxge_rem_sysctls(sc); 4355 mxge_free_rings(sc); 4356 4357 /* MUST after sysctls and rings are freed */ 4358 mxge_free_slices(sc); 4359 4360 if (sc->dmabench_dma.dmem_addr != NULL) 4361 mxge_dma_free(&sc->dmabench_dma); 4362 if (sc->zeropad_dma.dmem_addr != NULL) 4363 mxge_dma_free(&sc->zeropad_dma); 4364 if (sc->cmd_dma.dmem_addr != NULL) 4365 mxge_dma_free(&sc->cmd_dma); 4366 4367 if (sc->irq_res != NULL) { 4368 bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, 4369 sc->irq_res); 4370 } 4371 if (sc->irq_type == PCI_INTR_TYPE_MSI) 4372 pci_release_msi(dev); 4373 4374 if (sc->mem_res != NULL) { 4375 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4376 sc->mem_res); 4377 } 4378 4379 if (sc->parent_dmat != NULL) 4380 bus_dma_tag_destroy(sc->parent_dmat); 4381 4382 return 0; 4383 } 4384 4385 static int 4386 mxge_shutdown(device_t dev) 4387 { 4388 return 0; 4389 } 4390