1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $ 29 30 ***************************************************************************/ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/linker.h> 35 #include <sys/firmware.h> 36 #include <sys/endian.h> 37 #include <sys/in_cksum.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kernel.h> 42 #include <sys/module.h> 43 #include <sys/serialize.h> 44 #include <sys/socket.h> 45 #include <sys/sysctl.h> 46 47 /* count xmits ourselves, rather than via drbr */ 48 #define NO_SLOW_STATS 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ifq_var.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/vlan/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <sys/bus.h> 68 #include <sys/rman.h> 69 70 #include <bus/pci/pcireg.h> 71 #include <bus/pci/pcivar.h> 72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 73 74 #include <vm/vm.h> /* for pmap_mapdev() */ 75 #include <vm/pmap.h> 76 77 #if defined(__i386) || defined(__x86_64) 78 #include <machine/specialreg.h> 79 #endif 80 81 #include <dev/netif/mxge/mxge_mcp.h> 82 #include <dev/netif/mxge/mcp_gen_header.h> 83 /*#define MXGE_FAKE_IFP*/ 84 #include <dev/netif/mxge/if_mxge_var.h> 85 #ifdef IFNET_BUF_RING 86 #include <sys/buf_ring.h> 87 #endif 88 89 #include "opt_inet.h" 90 91 /* tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = 30; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_verbose = 0; 98 static int mxge_lro_cnt = 8; 99 static int mxge_ticks; 100 static int mxge_max_slices = 1; 101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 102 static int mxge_always_promisc = 0; 103 /* XXX: not yet */ 104 /* static int mxge_initial_mtu = ETHERMTU_JUMBO; */ 105 static int mxge_initial_mtu = ETHERMTU; 106 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 107 static char *mxge_fw_aligned = "mxge_eth_z8e"; 108 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 109 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 110 111 static int mxge_probe(device_t dev); 112 static int mxge_attach(device_t dev); 113 static int mxge_detach(device_t dev); 114 static int mxge_shutdown(device_t dev); 115 static void mxge_intr(void *arg); 116 117 static device_method_t mxge_methods[] = 118 { 119 /* Device interface */ 120 DEVMETHOD(device_probe, mxge_probe), 121 DEVMETHOD(device_attach, mxge_attach), 122 DEVMETHOD(device_detach, mxge_detach), 123 DEVMETHOD(device_shutdown, mxge_shutdown), 124 DEVMETHOD_END 125 }; 126 127 static driver_t mxge_driver = 128 { 129 "mxge", 130 mxge_methods, 131 sizeof(mxge_softc_t), 132 }; 133 134 static devclass_t mxge_devclass; 135 136 /* Declare ourselves to be a child of the PCI bus.*/ 137 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 138 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 139 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 140 141 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 142 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 143 static int mxge_close(mxge_softc_t *sc); 144 static int mxge_open(mxge_softc_t *sc); 145 static void mxge_tick(void *arg); 146 147 /* XXX: we don't have Large Receive Offload support yet */ 148 inline int 149 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum) 150 { 151 (void)ss; 152 (void)m_head; 153 (void)csum; 154 return 1; 155 } 156 157 inline void 158 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro) 159 { 160 (void)ss; 161 (void)lro; 162 } 163 164 static int 165 mxge_probe(device_t dev) 166 { 167 int rev; 168 169 170 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 171 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 173 rev = pci_get_revid(dev); 174 switch (rev) { 175 case MXGE_PCI_REV_Z8E: 176 device_set_desc(dev, "Myri10G-PCIE-8A"); 177 break; 178 case MXGE_PCI_REV_Z8ES: 179 device_set_desc(dev, "Myri10G-PCIE-8B"); 180 break; 181 default: 182 device_set_desc(dev, "Myri10G-PCIE-8??"); 183 device_printf(dev, "Unrecognized rev %d NIC\n", 184 rev); 185 break; 186 } 187 return 0; 188 } 189 return ENXIO; 190 } 191 192 static void 193 mxge_enable_wc(mxge_softc_t *sc) 194 { 195 #if 0 196 #if defined(__i386) || defined(__x86_64) 197 vm_offset_t len; 198 int err; 199 200 sc->wc = 1; 201 len = rman_get_size(sc->mem_res); 202 err = pmap_change_attr((vm_offset_t) sc->sram, 203 len, PAT_WRITE_COMBINING); 204 if (err != 0) { 205 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 206 err); 207 sc->wc = 0; 208 } 209 #endif 210 #else 211 sc->wc = 0; /* TBD: PAT support */ 212 #endif 213 } 214 215 216 /* callback to get our DMA address */ 217 static void 218 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 219 int error) 220 { 221 if (error == 0) { 222 *(bus_addr_t *) arg = segs->ds_addr; 223 } 224 } 225 226 static int 227 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 228 bus_size_t alignment) 229 { 230 int err; 231 device_t dev = sc->dev; 232 bus_size_t boundary, maxsegsize; 233 234 if (bytes > 4096 && alignment == 4096) { 235 boundary = 0; 236 maxsegsize = bytes; 237 } else { 238 boundary = 4096; 239 maxsegsize = 4096; 240 } 241 242 /* allocate DMAable memory tags */ 243 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 244 alignment, /* alignment */ 245 boundary, /* boundary */ 246 BUS_SPACE_MAXADDR, /* low */ 247 BUS_SPACE_MAXADDR, /* high */ 248 NULL, NULL, /* filter */ 249 bytes, /* maxsize */ 250 1, /* num segs */ 251 maxsegsize, /* maxsegsize */ 252 BUS_DMA_COHERENT, /* flags */ 253 &dma->dmat); /* tag */ 254 if (err != 0) { 255 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 256 return err; 257 } 258 259 /* allocate DMAable memory & map */ 260 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 261 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 262 | BUS_DMA_ZERO), &dma->map); 263 if (err != 0) { 264 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 265 goto abort_with_dmat; 266 } 267 268 /* load the memory */ 269 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 270 mxge_dmamap_callback, 271 (void *)&dma->bus_addr, 0); 272 if (err != 0) { 273 device_printf(dev, "couldn't load map (err = %d)\n", err); 274 goto abort_with_mem; 275 } 276 return 0; 277 278 abort_with_mem: 279 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 280 abort_with_dmat: 281 (void)bus_dma_tag_destroy(dma->dmat); 282 return err; 283 } 284 285 286 static void 287 mxge_dma_free(mxge_dma_t *dma) 288 { 289 bus_dmamap_unload(dma->dmat, dma->map); 290 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 291 (void)bus_dma_tag_destroy(dma->dmat); 292 } 293 294 /* 295 * The eeprom strings on the lanaiX have the format 296 * SN=x\0 297 * MAC=x:x:x:x:x:x\0 298 * PC=text\0 299 */ 300 301 static int 302 mxge_parse_strings(mxge_softc_t *sc) 303 { 304 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 305 306 char *ptr, *limit; 307 int i, found_mac; 308 309 ptr = sc->eeprom_strings; 310 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 311 found_mac = 0; 312 while (ptr < limit && *ptr != '\0') { 313 if (memcmp(ptr, "MAC=", 4) == 0) { 314 ptr += 1; 315 sc->mac_addr_string = ptr; 316 for (i = 0; i < 6; i++) { 317 ptr += 3; 318 if ((ptr + 2) > limit) 319 goto abort; 320 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 321 found_mac = 1; 322 } 323 } else if (memcmp(ptr, "PC=", 3) == 0) { 324 ptr += 3; 325 strncpy(sc->product_code_string, ptr, 326 sizeof (sc->product_code_string) - 1); 327 } else if (memcmp(ptr, "SN=", 3) == 0) { 328 ptr += 3; 329 strncpy(sc->serial_number_string, ptr, 330 sizeof (sc->serial_number_string) - 1); 331 } 332 MXGE_NEXT_STRING(ptr); 333 } 334 335 if (found_mac) 336 return 0; 337 338 abort: 339 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 340 341 return ENXIO; 342 } 343 344 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 345 static void 346 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 347 { 348 uint32_t val; 349 unsigned long base, off; 350 char *va, *cfgptr; 351 device_t pdev, mcp55; 352 uint16_t vendor_id, device_id, word; 353 uintptr_t bus, slot, func, ivend, idev; 354 uint32_t *ptr32; 355 356 357 if (!mxge_nvidia_ecrc_enable) 358 return; 359 360 pdev = device_get_parent(device_get_parent(sc->dev)); 361 if (pdev == NULL) { 362 device_printf(sc->dev, "could not find parent?\n"); 363 return; 364 } 365 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 366 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 367 368 if (vendor_id != 0x10de) 369 return; 370 371 base = 0; 372 373 if (device_id == 0x005d) { 374 /* ck804, base address is magic */ 375 base = 0xe0000000UL; 376 } else if (device_id >= 0x0374 && device_id <= 0x378) { 377 /* mcp55, base address stored in chipset */ 378 mcp55 = pci_find_bsf(0, 0, 0); 379 if (mcp55 && 380 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 381 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 382 word = pci_read_config(mcp55, 0x90, 2); 383 base = ((unsigned long)word & 0x7ffeU) << 25; 384 } 385 } 386 if (!base) 387 return; 388 389 /* XXXX 390 Test below is commented because it is believed that doing 391 config read/write beyond 0xff will access the config space 392 for the next larger function. Uncomment this and remove 393 the hacky pmap_mapdev() way of accessing config space when 394 FreeBSD grows support for extended pcie config space access 395 */ 396 #if 0 397 /* See if we can, by some miracle, access the extended 398 config space */ 399 val = pci_read_config(pdev, 0x178, 4); 400 if (val != 0xffffffff) { 401 val |= 0x40; 402 pci_write_config(pdev, 0x178, val, 4); 403 return; 404 } 405 #endif 406 /* Rather than using normal pci config space writes, we must 407 * map the Nvidia config space ourselves. This is because on 408 * opteron/nvidia class machine the 0xe000000 mapping is 409 * handled by the nvidia chipset, that means the internal PCI 410 * device (the on-chip northbridge), or the amd-8131 bridge 411 * and things behind them are not visible by this method. 412 */ 413 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_BUS, &bus); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_SLOT, &slot); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_FUNCTION, &func); 420 BUS_READ_IVAR(device_get_parent(pdev), pdev, 421 PCI_IVAR_VENDOR, &ivend); 422 BUS_READ_IVAR(device_get_parent(pdev), pdev, 423 PCI_IVAR_DEVICE, &idev); 424 425 off = base 426 + 0x00100000UL * (unsigned long)bus 427 + 0x00001000UL * (unsigned long)(func 428 + 8 * slot); 429 430 /* map it into the kernel */ 431 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 432 433 434 if (va == NULL) { 435 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 436 return; 437 } 438 /* get a pointer to the config space mapped into the kernel */ 439 cfgptr = va + (off & PAGE_MASK); 440 441 /* make sure that we can really access it */ 442 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 443 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 444 if (! (vendor_id == ivend && device_id == idev)) { 445 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 446 vendor_id, device_id); 447 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 448 return; 449 } 450 451 ptr32 = (uint32_t*)(cfgptr + 0x178); 452 val = *ptr32; 453 454 if (val == 0xffffffff) { 455 device_printf(sc->dev, "extended mapping failed\n"); 456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 457 return; 458 } 459 *ptr32 = val | 0x40; 460 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 461 if (mxge_verbose) 462 device_printf(sc->dev, 463 "Enabled ECRC on upstream Nvidia bridge " 464 "at %d:%d:%d\n", 465 (int)bus, (int)slot, (int)func); 466 return; 467 } 468 #else 469 static void 470 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 471 { 472 device_printf(sc->dev, 473 "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 474 return; 475 } 476 #endif 477 478 479 static int 480 mxge_dma_test(mxge_softc_t *sc, int test_type) 481 { 482 mxge_cmd_t cmd; 483 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 484 int status; 485 uint32_t len; 486 char *test = " "; 487 488 489 /* Run a small DMA test. 490 * The magic multipliers to the length tell the firmware 491 * to do DMA read, write, or read+write tests. The 492 * results are returned in cmd.data0. The upper 16 493 * bits of the return is the number of transfers completed. 494 * The lower 16 bits is the time in 0.5us ticks that the 495 * transfers took to complete. 496 */ 497 498 len = sc->tx_boundary; 499 500 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 501 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 502 cmd.data2 = len * 0x10000; 503 status = mxge_send_cmd(sc, test_type, &cmd); 504 if (status != 0) { 505 test = "read"; 506 goto abort; 507 } 508 sc->read_dma = ((cmd.data0>>16) * len * 2) / 509 (cmd.data0 & 0xffff); 510 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 511 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 512 cmd.data2 = len * 0x1; 513 status = mxge_send_cmd(sc, test_type, &cmd); 514 if (status != 0) { 515 test = "write"; 516 goto abort; 517 } 518 sc->write_dma = ((cmd.data0>>16) * len * 2) / 519 (cmd.data0 & 0xffff); 520 521 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 522 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 523 cmd.data2 = len * 0x10001; 524 status = mxge_send_cmd(sc, test_type, &cmd); 525 if (status != 0) { 526 test = "read/write"; 527 goto abort; 528 } 529 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 530 (cmd.data0 & 0xffff); 531 532 abort: 533 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 534 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 535 test, status); 536 537 return status; 538 } 539 540 /* 541 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 542 * when the PCI-E Completion packets are aligned on an 8-byte 543 * boundary. Some PCI-E chip sets always align Completion packets; on 544 * the ones that do not, the alignment can be enforced by enabling 545 * ECRC generation (if supported). 546 * 547 * When PCI-E Completion packets are not aligned, it is actually more 548 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 549 * 550 * If the driver can neither enable ECRC nor verify that it has 551 * already been enabled, then it must use a firmware image which works 552 * around unaligned completion packets (ethp_z8e.dat), and it should 553 * also ensure that it never gives the device a Read-DMA which is 554 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 555 * enabled, then the driver should use the aligned (eth_z8e.dat) 556 * firmware image, and set tx_boundary to 4KB. 557 */ 558 559 static int 560 mxge_firmware_probe(mxge_softc_t *sc) 561 { 562 device_t dev = sc->dev; 563 int reg, status; 564 uint16_t pectl; 565 566 sc->tx_boundary = 4096; 567 /* 568 * Verify the max read request size was set to 4KB 569 * before trying the test with 4KB. 570 */ 571 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 572 pectl = pci_read_config(dev, reg + 0x8, 2); 573 if ((pectl & (5 << 12)) != (5 << 12)) { 574 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 575 pectl); 576 sc->tx_boundary = 2048; 577 } 578 } 579 580 /* 581 * load the optimized firmware (which assumes aligned PCIe 582 * completions) in order to see if it works on this host. 583 */ 584 sc->fw_name = mxge_fw_aligned; 585 status = mxge_load_firmware(sc, 1); 586 if (status != 0) { 587 return status; 588 } 589 590 /* 591 * Enable ECRC if possible 592 */ 593 mxge_enable_nvidia_ecrc(sc); 594 595 /* 596 * Run a DMA test which watches for unaligned completions and 597 * aborts on the first one seen. 598 */ 599 600 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 601 if (status == 0) 602 return 0; /* keep the aligned firmware */ 603 604 if (status != E2BIG) 605 device_printf(dev, "DMA test failed: %d\n", status); 606 if (status == ENOSYS) 607 device_printf(dev, "Falling back to ethp! " 608 "Please install up to date fw\n"); 609 return status; 610 } 611 612 static int 613 mxge_select_firmware(mxge_softc_t *sc) 614 { 615 int aligned = 0; 616 617 618 if (mxge_force_firmware != 0) { 619 if (mxge_force_firmware == 1) 620 aligned = 1; 621 else 622 aligned = 0; 623 if (mxge_verbose) 624 device_printf(sc->dev, 625 "Assuming %s completions (forced)\n", 626 aligned ? "aligned" : "unaligned"); 627 goto abort; 628 } 629 630 /* if the PCIe link width is 4 or less, we can use the aligned 631 firmware and skip any checks */ 632 if (sc->link_width != 0 && sc->link_width <= 4) { 633 device_printf(sc->dev, 634 "PCIe x%d Link, expect reduced performance\n", 635 sc->link_width); 636 aligned = 1; 637 goto abort; 638 } 639 640 if (0 == mxge_firmware_probe(sc)) 641 return 0; 642 643 abort: 644 if (aligned) { 645 sc->fw_name = mxge_fw_aligned; 646 sc->tx_boundary = 4096; 647 } else { 648 sc->fw_name = mxge_fw_unaligned; 649 sc->tx_boundary = 2048; 650 } 651 return (mxge_load_firmware(sc, 0)); 652 } 653 654 union qualhack 655 { 656 const char *ro_char; 657 char *rw_char; 658 }; 659 660 static int 661 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 662 { 663 664 665 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 666 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 667 be32toh(hdr->mcp_type)); 668 return EIO; 669 } 670 671 /* save firmware version for sysctl */ 672 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 673 if (mxge_verbose) 674 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 675 676 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 677 &sc->fw_ver_minor, &sc->fw_ver_tiny); 678 679 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 680 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 681 device_printf(sc->dev, "Found firmware version %s\n", 682 sc->fw_version); 683 device_printf(sc->dev, "Driver needs %d.%d\n", 684 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 685 return EINVAL; 686 } 687 return 0; 688 689 } 690 691 static void * 692 z_alloc(void *nil, u_int items, u_int size) 693 { 694 void *ptr; 695 696 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT); 697 return ptr; 698 } 699 700 static void 701 z_free(void *nil, void *ptr) 702 { 703 kfree(ptr, M_TEMP); 704 } 705 706 707 static int 708 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 709 { 710 z_stream zs; 711 char *inflate_buffer; 712 const struct firmware *fw; 713 const mcp_gen_header_t *hdr; 714 unsigned hdr_offset; 715 int status; 716 unsigned int i; 717 size_t fw_len; 718 719 fw = firmware_get(sc->fw_name); 720 if (fw == NULL) { 721 device_printf(sc->dev, "Could not find firmware image %s\n", 722 sc->fw_name); 723 return ENOENT; 724 } 725 726 727 728 /* setup zlib and decompress f/w */ 729 bzero(&zs, sizeof (zs)); 730 zs.zalloc = z_alloc; 731 zs.zfree = z_free; 732 status = inflateInit(&zs); 733 if (status != Z_OK) { 734 status = EIO; 735 goto abort_with_fw; 736 } 737 738 /* the uncompressed size is stored as the firmware version, 739 which would otherwise go unused */ 740 fw_len = (size_t) fw->version; 741 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT); 742 if (inflate_buffer == NULL) 743 goto abort_with_zs; 744 zs.avail_in = fw->datasize; 745 zs.next_in = __DECONST(char *, fw->data); 746 zs.avail_out = fw_len; 747 zs.next_out = inflate_buffer; 748 status = inflate(&zs, Z_FINISH); 749 if (status != Z_STREAM_END) { 750 device_printf(sc->dev, "zlib %d\n", status); 751 status = EIO; 752 goto abort_with_buffer; 753 } 754 755 /* check id */ 756 hdr_offset = htobe32(*(const uint32_t *) 757 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 758 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 759 device_printf(sc->dev, "Bad firmware file"); 760 status = EIO; 761 goto abort_with_buffer; 762 } 763 hdr = (const void*)(inflate_buffer + hdr_offset); 764 765 status = mxge_validate_firmware(sc, hdr); 766 if (status != 0) 767 goto abort_with_buffer; 768 769 /* Copy the inflated firmware to NIC SRAM. */ 770 for (i = 0; i < fw_len; i += 256) { 771 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 772 inflate_buffer + i, 773 min(256U, (unsigned)(fw_len - i))); 774 wmb(); 775 wmb(); 776 } 777 778 *limit = fw_len; 779 status = 0; 780 abort_with_buffer: 781 kfree(inflate_buffer, M_TEMP); 782 abort_with_zs: 783 inflateEnd(&zs); 784 abort_with_fw: 785 firmware_put(fw, FIRMWARE_UNLOAD); 786 return status; 787 } 788 789 /* 790 * Enable or disable periodic RDMAs from the host to make certain 791 * chipsets resend dropped PCIe messages 792 */ 793 794 static void 795 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 796 { 797 char buf_bytes[72]; 798 volatile uint32_t *confirm; 799 volatile char *submit; 800 uint32_t *buf, dma_low, dma_high; 801 int i; 802 803 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 804 805 /* clear confirmation addr */ 806 confirm = (volatile uint32_t *)sc->cmd; 807 *confirm = 0; 808 wmb(); 809 810 /* send an rdma command to the PCIe engine, and wait for the 811 response in the confirmation address. The firmware should 812 write a -1 there to indicate it is alive and well 813 */ 814 815 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 816 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 817 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 818 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 819 buf[2] = htobe32(0xffffffff); /* confirm data */ 820 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 821 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 822 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 823 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 824 buf[5] = htobe32(enable); /* enable? */ 825 826 827 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 828 829 mxge_pio_copy(submit, buf, 64); 830 wmb(); 831 DELAY(1000); 832 wmb(); 833 i = 0; 834 while (*confirm != 0xffffffff && i < 20) { 835 DELAY(1000); 836 i++; 837 } 838 if (*confirm != 0xffffffff) { 839 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 840 (enable ? "enable" : "disable"), confirm, 841 *confirm); 842 } 843 return; 844 } 845 846 static int 847 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 848 { 849 mcp_cmd_t *buf; 850 char buf_bytes[sizeof(*buf) + 8]; 851 volatile mcp_cmd_response_t *response = sc->cmd; 852 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 853 uint32_t dma_low, dma_high; 854 int err, sleep_total = 0; 855 856 /* 857 * We may be called during attach, before if_serializer is available. 858 * This is not a fast path, just check for NULL 859 */ 860 861 if (sc->ifp->if_serializer) 862 ASSERT_SERIALIZED(sc->ifp->if_serializer); 863 864 /* ensure buf is aligned to 8 bytes */ 865 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 866 867 buf->data0 = htobe32(data->data0); 868 buf->data1 = htobe32(data->data1); 869 buf->data2 = htobe32(data->data2); 870 buf->cmd = htobe32(cmd); 871 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 872 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 873 874 buf->response_addr.low = htobe32(dma_low); 875 buf->response_addr.high = htobe32(dma_high); 876 877 878 response->result = 0xffffffff; 879 wmb(); 880 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 881 882 /* wait up to 20ms */ 883 err = EAGAIN; 884 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 885 bus_dmamap_sync(sc->cmd_dma.dmat, 886 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 887 wmb(); 888 switch (be32toh(response->result)) { 889 case 0: 890 data->data0 = be32toh(response->data); 891 err = 0; 892 break; 893 case 0xffffffff: 894 DELAY(1000); 895 break; 896 case MXGEFW_CMD_UNKNOWN: 897 err = ENOSYS; 898 break; 899 case MXGEFW_CMD_ERROR_UNALIGNED: 900 err = E2BIG; 901 break; 902 case MXGEFW_CMD_ERROR_BUSY: 903 err = EBUSY; 904 break; 905 default: 906 device_printf(sc->dev, 907 "mxge: command %d " 908 "failed, result = %d\n", 909 cmd, be32toh(response->result)); 910 err = ENXIO; 911 break; 912 } 913 if (err != EAGAIN) 914 break; 915 } 916 if (err == EAGAIN) 917 device_printf(sc->dev, "mxge: command %d timed out" 918 "result = %d\n", 919 cmd, be32toh(response->result)); 920 return err; 921 } 922 923 static int 924 mxge_adopt_running_firmware(mxge_softc_t *sc) 925 { 926 struct mcp_gen_header *hdr; 927 const size_t bytes = sizeof (struct mcp_gen_header); 928 size_t hdr_offset; 929 int status; 930 931 /* find running firmware header */ 932 hdr_offset = htobe32(*(volatile uint32_t *) 933 (sc->sram + MCP_HEADER_PTR_OFFSET)); 934 935 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 936 device_printf(sc->dev, 937 "Running firmware has bad header offset (%d)\n", 938 (int)hdr_offset); 939 return EIO; 940 } 941 942 /* copy header of running firmware from SRAM to host memory to 943 * validate firmware */ 944 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT); 945 if (hdr == NULL) { 946 device_printf(sc->dev, "could not kmalloc firmware hdr\n"); 947 return ENOMEM; 948 } 949 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 950 rman_get_bushandle(sc->mem_res), 951 hdr_offset, (char *)hdr, bytes); 952 status = mxge_validate_firmware(sc, hdr); 953 kfree(hdr, M_DEVBUF); 954 955 /* 956 * check to see if adopted firmware has bug where adopting 957 * it will cause broadcasts to be filtered unless the NIC 958 * is kept in ALLMULTI mode 959 */ 960 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 961 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 962 sc->adopted_rx_filter_bug = 1; 963 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 964 "working around rx filter bug\n", 965 sc->fw_ver_major, sc->fw_ver_minor, 966 sc->fw_ver_tiny); 967 } 968 969 return status; 970 } 971 972 973 static int 974 mxge_load_firmware(mxge_softc_t *sc, int adopt) 975 { 976 volatile uint32_t *confirm; 977 volatile char *submit; 978 char buf_bytes[72]; 979 uint32_t *buf, size, dma_low, dma_high; 980 int status, i; 981 982 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 983 984 size = sc->sram_size; 985 status = mxge_load_firmware_helper(sc, &size); 986 if (status) { 987 if (!adopt) 988 return status; 989 /* Try to use the currently running firmware, if 990 it is new enough */ 991 status = mxge_adopt_running_firmware(sc); 992 if (status) { 993 device_printf(sc->dev, 994 "failed to adopt running firmware\n"); 995 return status; 996 } 997 device_printf(sc->dev, 998 "Successfully adopted running firmware\n"); 999 if (sc->tx_boundary == 4096) { 1000 device_printf(sc->dev, 1001 "Using firmware currently running on NIC" 1002 ". For optimal\n"); 1003 device_printf(sc->dev, 1004 "performance consider loading optimized " 1005 "firmware\n"); 1006 } 1007 sc->fw_name = mxge_fw_unaligned; 1008 sc->tx_boundary = 2048; 1009 return 0; 1010 } 1011 /* clear confirmation addr */ 1012 confirm = (volatile uint32_t *)sc->cmd; 1013 *confirm = 0; 1014 wmb(); 1015 /* send a reload command to the bootstrap MCP, and wait for the 1016 response in the confirmation address. The firmware should 1017 write a -1 there to indicate it is alive and well 1018 */ 1019 1020 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1021 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1022 1023 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1024 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1025 buf[2] = htobe32(0xffffffff); /* confirm data */ 1026 1027 /* FIX: All newest firmware should un-protect the bottom of 1028 the sram before handoff. However, the very first interfaces 1029 do not. Therefore the handoff copy must skip the first 8 bytes 1030 */ 1031 /* where the code starts*/ 1032 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1033 buf[4] = htobe32(size - 8); /* length of code */ 1034 buf[5] = htobe32(8); /* where to copy to */ 1035 buf[6] = htobe32(0); /* where to jump to */ 1036 1037 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1038 mxge_pio_copy(submit, buf, 64); 1039 wmb(); 1040 DELAY(1000); 1041 wmb(); 1042 i = 0; 1043 while (*confirm != 0xffffffff && i < 20) { 1044 DELAY(1000*10); 1045 i++; 1046 bus_dmamap_sync(sc->cmd_dma.dmat, 1047 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1048 } 1049 if (*confirm != 0xffffffff) { 1050 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1051 confirm, *confirm); 1052 1053 return ENXIO; 1054 } 1055 return 0; 1056 } 1057 1058 static int 1059 mxge_update_mac_address(mxge_softc_t *sc) 1060 { 1061 mxge_cmd_t cmd; 1062 uint8_t *addr = sc->mac_addr; 1063 int status; 1064 1065 1066 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1067 | (addr[2] << 8) | addr[3]); 1068 1069 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1070 1071 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1072 return status; 1073 } 1074 1075 static int 1076 mxge_change_pause(mxge_softc_t *sc, int pause) 1077 { 1078 mxge_cmd_t cmd; 1079 int status; 1080 1081 if (pause) 1082 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1083 &cmd); 1084 else 1085 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1086 &cmd); 1087 1088 if (status) { 1089 device_printf(sc->dev, "Failed to set flow control mode\n"); 1090 return ENXIO; 1091 } 1092 sc->pause = pause; 1093 return 0; 1094 } 1095 1096 static void 1097 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1098 { 1099 mxge_cmd_t cmd; 1100 int status; 1101 1102 if( sc->ifp->if_serializer) 1103 ASSERT_SERIALIZED(sc->ifp->if_serializer); 1104 if (mxge_always_promisc) 1105 promisc = 1; 1106 1107 if (promisc) 1108 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1109 &cmd); 1110 else 1111 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1112 &cmd); 1113 1114 if (status) { 1115 device_printf(sc->dev, "Failed to set promisc mode\n"); 1116 } 1117 } 1118 1119 static void 1120 mxge_set_multicast_list(mxge_softc_t *sc) 1121 { 1122 mxge_cmd_t cmd; 1123 struct ifmultiaddr *ifma; 1124 struct ifnet *ifp = sc->ifp; 1125 int err; 1126 1127 if (ifp->if_serializer) 1128 ASSERT_SERIALIZED(ifp->if_serializer); 1129 1130 /* This firmware is known to not support multicast */ 1131 if (!sc->fw_multicast_support) 1132 return; 1133 1134 /* Disable multicast filtering while we play with the lists*/ 1135 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1136 if (err != 0) { 1137 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1138 " error status: %d\n", err); 1139 return; 1140 } 1141 1142 if (sc->adopted_rx_filter_bug) 1143 return; 1144 1145 if (ifp->if_flags & IFF_ALLMULTI) 1146 /* request to disable multicast filtering, so quit here */ 1147 return; 1148 1149 /* Flush all the filters */ 1150 1151 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, 1154 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1155 ", error status: %d\n", err); 1156 return; 1157 } 1158 1159 /* Walk the multicast list, and add each address */ 1160 1161 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1162 if (ifma->ifma_addr->sa_family != AF_LINK) 1163 continue; 1164 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1165 &cmd.data0, 4); 1166 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1167 &cmd.data1, 2); 1168 cmd.data0 = htonl(cmd.data0); 1169 cmd.data1 = htonl(cmd.data1); 1170 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1171 if (err != 0) { 1172 device_printf(sc->dev, "Failed " 1173 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1174 "%d\t", err); 1175 /* abort, leaving multicast filtering off */ 1176 return; 1177 } 1178 } 1179 /* Enable multicast filtering */ 1180 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1181 if (err != 0) { 1182 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1183 ", error status: %d\n", err); 1184 } 1185 } 1186 1187 static int 1188 mxge_max_mtu(mxge_softc_t *sc) 1189 { 1190 mxge_cmd_t cmd; 1191 int status; 1192 1193 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1194 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1195 1196 /* try to set nbufs to see if it we can 1197 use virtually contiguous jumbos */ 1198 cmd.data0 = 0; 1199 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1200 &cmd); 1201 if (status == 0) 1202 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1203 1204 /* otherwise, we're limited to MJUMPAGESIZE */ 1205 return MJUMPAGESIZE - MXGEFW_PAD; 1206 } 1207 1208 static int 1209 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1210 { 1211 struct mxge_slice_state *ss; 1212 mxge_rx_done_t *rx_done; 1213 volatile uint32_t *irq_claim; 1214 mxge_cmd_t cmd; 1215 int slice, status; 1216 1217 /* try to send a reset command to the card to see if it 1218 is alive */ 1219 memset(&cmd, 0, sizeof (cmd)); 1220 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1221 if (status != 0) { 1222 device_printf(sc->dev, "failed reset\n"); 1223 return ENXIO; 1224 } 1225 1226 mxge_dummy_rdma(sc, 1); 1227 1228 1229 /* set the intrq size */ 1230 cmd.data0 = sc->rx_ring_size; 1231 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1232 1233 /* 1234 * Even though we already know how many slices are supported 1235 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1236 * has magic side effects, and must be called after a reset. 1237 * It must be called prior to calling any RSS related cmds, 1238 * including assigning an interrupt queue for anything but 1239 * slice 0. It must also be called *after* 1240 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1241 * the firmware to compute offsets. 1242 */ 1243 1244 if (sc->num_slices > 1) { 1245 /* ask the maximum number of slices it supports */ 1246 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1247 &cmd); 1248 if (status != 0) { 1249 device_printf(sc->dev, 1250 "failed to get number of slices\n"); 1251 return status; 1252 } 1253 /* 1254 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1255 * to setting up the interrupt queue DMA 1256 */ 1257 cmd.data0 = sc->num_slices; 1258 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1259 #ifdef IFNET_BUF_RING 1260 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1261 #endif 1262 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1263 &cmd); 1264 if (status != 0) { 1265 device_printf(sc->dev, 1266 "failed to set number of slices\n"); 1267 return status; 1268 } 1269 } 1270 1271 1272 if (interrupts_setup) { 1273 /* Now exchange information about interrupts */ 1274 for (slice = 0; slice < sc->num_slices; slice++) { 1275 rx_done = &sc->ss[slice].rx_done; 1276 memset(rx_done->entry, 0, sc->rx_ring_size); 1277 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1278 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1279 cmd.data2 = slice; 1280 status |= mxge_send_cmd(sc, 1281 MXGEFW_CMD_SET_INTRQ_DMA, 1282 &cmd); 1283 } 1284 } 1285 1286 status |= mxge_send_cmd(sc, 1287 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1288 1289 1290 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1291 1292 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1293 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1294 1295 1296 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1297 &cmd); 1298 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1299 if (status != 0) { 1300 device_printf(sc->dev, "failed set interrupt parameters\n"); 1301 return status; 1302 } 1303 1304 1305 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1306 1307 1308 /* run a DMA benchmark */ 1309 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1310 1311 for (slice = 0; slice < sc->num_slices; slice++) { 1312 ss = &sc->ss[slice]; 1313 1314 ss->irq_claim = irq_claim + (2 * slice); 1315 /* reset mcp/driver shared state back to 0 */ 1316 ss->rx_done.idx = 0; 1317 ss->rx_done.cnt = 0; 1318 ss->tx.req = 0; 1319 ss->tx.done = 0; 1320 ss->tx.pkt_done = 0; 1321 ss->tx.queue_active = 0; 1322 ss->tx.activate = 0; 1323 ss->tx.deactivate = 0; 1324 ss->tx.wake = 0; 1325 ss->tx.defrag = 0; 1326 ss->tx.stall = 0; 1327 ss->rx_big.cnt = 0; 1328 ss->rx_small.cnt = 0; 1329 ss->lro_bad_csum = 0; 1330 ss->lro_queued = 0; 1331 ss->lro_flushed = 0; 1332 if (ss->fw_stats != NULL) { 1333 ss->fw_stats->valid = 0; 1334 ss->fw_stats->send_done_count = 0; 1335 } 1336 } 1337 sc->rdma_tags_available = 15; 1338 status = mxge_update_mac_address(sc); 1339 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1340 mxge_change_pause(sc, sc->pause); 1341 mxge_set_multicast_list(sc); 1342 return status; 1343 } 1344 1345 static int 1346 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1347 { 1348 mxge_softc_t *sc; 1349 unsigned int intr_coal_delay; 1350 int err; 1351 1352 sc = arg1; 1353 intr_coal_delay = sc->intr_coal_delay; 1354 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1355 if (err != 0) { 1356 return err; 1357 } 1358 if (intr_coal_delay == sc->intr_coal_delay) 1359 return 0; 1360 1361 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1362 return EINVAL; 1363 1364 lwkt_serialize_enter(sc->ifp->if_serializer); 1365 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1366 sc->intr_coal_delay = intr_coal_delay; 1367 1368 lwkt_serialize_exit(sc->ifp->if_serializer); 1369 return err; 1370 } 1371 1372 static int 1373 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1374 { 1375 mxge_softc_t *sc; 1376 unsigned int enabled; 1377 int err; 1378 1379 sc = arg1; 1380 enabled = sc->pause; 1381 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1382 if (err != 0) { 1383 return err; 1384 } 1385 if (enabled == sc->pause) 1386 return 0; 1387 1388 lwkt_serialize_enter(sc->ifp->if_serializer); 1389 err = mxge_change_pause(sc, enabled); 1390 lwkt_serialize_exit(sc->ifp->if_serializer); 1391 return err; 1392 } 1393 1394 static int 1395 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1396 { 1397 struct ifnet *ifp; 1398 int err = 0; 1399 1400 ifp = sc->ifp; 1401 if (lro_cnt == 0) 1402 ifp->if_capenable &= ~IFCAP_LRO; 1403 else 1404 ifp->if_capenable |= IFCAP_LRO; 1405 sc->lro_cnt = lro_cnt; 1406 if (ifp->if_flags & IFF_RUNNING) { 1407 mxge_close(sc); 1408 err = mxge_open(sc); 1409 } 1410 return err; 1411 } 1412 1413 static int 1414 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1415 { 1416 mxge_softc_t *sc; 1417 unsigned int lro_cnt; 1418 int err; 1419 1420 sc = arg1; 1421 lro_cnt = sc->lro_cnt; 1422 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1423 if (err != 0) 1424 return err; 1425 1426 if (lro_cnt == sc->lro_cnt) 1427 return 0; 1428 1429 if (lro_cnt > 128) 1430 return EINVAL; 1431 1432 lwkt_serialize_enter(sc->ifp->if_serializer); 1433 err = mxge_change_lro_locked(sc, lro_cnt); 1434 lwkt_serialize_exit(sc->ifp->if_serializer); 1435 return err; 1436 } 1437 1438 static int 1439 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1440 { 1441 int err; 1442 1443 if (arg1 == NULL) 1444 return EFAULT; 1445 arg2 = be32toh(*(int *)arg1); 1446 arg1 = NULL; 1447 err = sysctl_handle_int(oidp, arg1, arg2, req); 1448 1449 return err; 1450 } 1451 1452 static void 1453 mxge_rem_sysctls(mxge_softc_t *sc) 1454 { 1455 struct mxge_slice_state *ss; 1456 int slice; 1457 1458 if (sc->slice_sysctl_tree == NULL) 1459 return; 1460 1461 for (slice = 0; slice < sc->num_slices; slice++) { 1462 ss = &sc->ss[slice]; 1463 if (ss == NULL || ss->sysctl_tree == NULL) 1464 continue; 1465 sysctl_ctx_free(&ss->sysctl_ctx); 1466 ss->sysctl_tree = NULL; 1467 } 1468 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1469 sc->slice_sysctl_tree = NULL; 1470 sysctl_ctx_free(&sc->sysctl_ctx); 1471 sc->sysctl_tree = NULL; 1472 1473 } 1474 1475 static void 1476 mxge_add_sysctls(mxge_softc_t *sc) 1477 { 1478 struct sysctl_ctx_list *ctx; 1479 struct sysctl_oid_list *children; 1480 mcp_irq_data_t *fw; 1481 struct mxge_slice_state *ss; 1482 int slice; 1483 char slice_num[8]; 1484 1485 ctx = &sc->sysctl_ctx; 1486 sysctl_ctx_init(ctx); 1487 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1488 OID_AUTO, 1489 device_get_nameunit(sc->dev), 1490 CTLFLAG_RD, 0, ""); 1491 if (sc->sysctl_tree == NULL) { 1492 device_printf(sc->dev, "can't add sysctl node\n"); 1493 return; 1494 } 1495 1496 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1497 fw = sc->ss[0].fw_stats; 1498 1499 /* random information */ 1500 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1501 "firmware_version", 1502 CTLFLAG_RD, &sc->fw_version, 1503 0, "firmware version"); 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "serial_number", 1506 CTLFLAG_RD, &sc->serial_number_string, 1507 0, "serial number"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "product_code", 1510 CTLFLAG_RD, &sc->product_code_string, 1511 0, "product_code"); 1512 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1513 "pcie_link_width", 1514 CTLFLAG_RD, &sc->link_width, 1515 0, "tx_boundary"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "tx_boundary", 1518 CTLFLAG_RD, &sc->tx_boundary, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "write_combine", 1522 CTLFLAG_RD, &sc->wc, 1523 0, "write combining PIO?"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "read_dma_MBs", 1526 CTLFLAG_RD, &sc->read_dma, 1527 0, "DMA Read speed in MB/s"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "write_dma_MBs", 1530 CTLFLAG_RD, &sc->write_dma, 1531 0, "DMA Write speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "read_write_dma_MBs", 1534 CTLFLAG_RD, &sc->read_write_dma, 1535 0, "DMA concurrent Read/Write speed in MB/s"); 1536 1537 1538 /* performance related tunables */ 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "intr_coal_delay", 1541 CTLTYPE_INT|CTLFLAG_RW, sc, 1542 0, mxge_change_intr_coal, 1543 "I", "interrupt coalescing delay in usecs"); 1544 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "flow_control_enabled", 1547 CTLTYPE_INT|CTLFLAG_RW, sc, 1548 0, mxge_change_flow_control, 1549 "I", "interrupt coalescing delay in usecs"); 1550 1551 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1552 "deassert_wait", 1553 CTLFLAG_RW, &mxge_deassert_wait, 1554 0, "Wait for IRQ line to go low in ihandler"); 1555 1556 /* stats block from firmware is in network byte order. 1557 Need to swap it */ 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "link_up", 1560 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1561 0, mxge_handle_be32, 1562 "I", "link up"); 1563 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1564 "rdma_tags_available", 1565 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1566 0, mxge_handle_be32, 1567 "I", "rdma_tags_available"); 1568 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1569 "dropped_bad_crc32", 1570 CTLTYPE_INT|CTLFLAG_RD, 1571 &fw->dropped_bad_crc32, 1572 0, mxge_handle_be32, 1573 "I", "dropped_bad_crc32"); 1574 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1575 "dropped_bad_phy", 1576 CTLTYPE_INT|CTLFLAG_RD, 1577 &fw->dropped_bad_phy, 1578 0, mxge_handle_be32, 1579 "I", "dropped_bad_phy"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "dropped_link_error_or_filtered", 1582 CTLTYPE_INT|CTLFLAG_RD, 1583 &fw->dropped_link_error_or_filtered, 1584 0, mxge_handle_be32, 1585 "I", "dropped_link_error_or_filtered"); 1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1587 "dropped_link_overflow", 1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1589 0, mxge_handle_be32, 1590 "I", "dropped_link_overflow"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_multicast_filtered", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_multicast_filtered, 1595 0, mxge_handle_be32, 1596 "I", "dropped_multicast_filtered"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_no_big_buffer", 1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1600 0, mxge_handle_be32, 1601 "I", "dropped_no_big_buffer"); 1602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1603 "dropped_no_small_buffer", 1604 CTLTYPE_INT|CTLFLAG_RD, 1605 &fw->dropped_no_small_buffer, 1606 0, mxge_handle_be32, 1607 "I", "dropped_no_small_buffer"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_overrun", 1610 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1611 0, mxge_handle_be32, 1612 "I", "dropped_overrun"); 1613 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1614 "dropped_pause", 1615 CTLTYPE_INT|CTLFLAG_RD, 1616 &fw->dropped_pause, 1617 0, mxge_handle_be32, 1618 "I", "dropped_pause"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_runt", 1621 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1622 0, mxge_handle_be32, 1623 "I", "dropped_runt"); 1624 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_unicast_filtered", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1628 0, mxge_handle_be32, 1629 "I", "dropped_unicast_filtered"); 1630 1631 /* verbose printing? */ 1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1633 "verbose", 1634 CTLFLAG_RW, &mxge_verbose, 1635 0, "verbose printing"); 1636 1637 /* lro */ 1638 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1639 "lro_cnt", 1640 CTLTYPE_INT|CTLFLAG_RW, sc, 1641 0, mxge_change_lro, 1642 "I", "number of lro merge queues"); 1643 1644 1645 /* add counters exported for debugging from all slices */ 1646 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1647 sc->slice_sysctl_tree = 1648 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1649 "slice", CTLFLAG_RD, 0, ""); 1650 1651 for (slice = 0; slice < sc->num_slices; slice++) { 1652 ss = &sc->ss[slice]; 1653 sysctl_ctx_init(&ss->sysctl_ctx); 1654 ctx = &ss->sysctl_ctx; 1655 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1656 ksprintf(slice_num, "%d", slice); 1657 ss->sysctl_tree = 1658 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1659 CTLFLAG_RD, 0, ""); 1660 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1662 "rx_small_cnt", 1663 CTLFLAG_RD, &ss->rx_small.cnt, 1664 0, "rx_small_cnt"); 1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1666 "rx_big_cnt", 1667 CTLFLAG_RD, &ss->rx_big.cnt, 1668 0, "rx_small_cnt"); 1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1670 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1671 0, "number of lro merge queues flushed"); 1672 1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1674 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1675 0, "number of frames appended to lro merge" 1676 "queues"); 1677 1678 #ifndef IFNET_BUF_RING 1679 /* only transmit from slice 0 for now */ 1680 if (slice > 0) 1681 continue; 1682 #endif 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "tx_req", 1685 CTLFLAG_RD, &ss->tx.req, 1686 0, "tx_req"); 1687 1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1689 "tx_done", 1690 CTLFLAG_RD, &ss->tx.done, 1691 0, "tx_done"); 1692 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1693 "tx_pkt_done", 1694 CTLFLAG_RD, &ss->tx.pkt_done, 1695 0, "tx_done"); 1696 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1697 "tx_stall", 1698 CTLFLAG_RD, &ss->tx.stall, 1699 0, "tx_stall"); 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_wake", 1702 CTLFLAG_RD, &ss->tx.wake, 1703 0, "tx_wake"); 1704 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1705 "tx_defrag", 1706 CTLFLAG_RD, &ss->tx.defrag, 1707 0, "tx_defrag"); 1708 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1709 "tx_queue_active", 1710 CTLFLAG_RD, &ss->tx.queue_active, 1711 0, "tx_queue_active"); 1712 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1713 "tx_activate", 1714 CTLFLAG_RD, &ss->tx.activate, 1715 0, "tx_activate"); 1716 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1717 "tx_deactivate", 1718 CTLFLAG_RD, &ss->tx.deactivate, 1719 0, "tx_deactivate"); 1720 } 1721 } 1722 1723 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1724 backwards one at a time and handle ring wraps */ 1725 1726 static inline void 1727 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1728 mcp_kreq_ether_send_t *src, int cnt) 1729 { 1730 int idx, starting_slot; 1731 starting_slot = tx->req; 1732 while (cnt > 1) { 1733 cnt--; 1734 idx = (starting_slot + cnt) & tx->mask; 1735 mxge_pio_copy(&tx->lanai[idx], 1736 &src[cnt], sizeof(*src)); 1737 wmb(); 1738 } 1739 } 1740 1741 /* 1742 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1743 * at most 32 bytes at a time, so as to avoid involving the software 1744 * pio handler in the nic. We re-write the first segment's flags 1745 * to mark them valid only after writing the entire chain 1746 */ 1747 1748 static inline void 1749 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1750 int cnt) 1751 { 1752 int idx, i; 1753 uint32_t *src_ints; 1754 volatile uint32_t *dst_ints; 1755 mcp_kreq_ether_send_t *srcp; 1756 volatile mcp_kreq_ether_send_t *dstp, *dst; 1757 uint8_t last_flags; 1758 1759 idx = tx->req & tx->mask; 1760 1761 last_flags = src->flags; 1762 src->flags = 0; 1763 wmb(); 1764 dst = dstp = &tx->lanai[idx]; 1765 srcp = src; 1766 1767 if ((idx + cnt) < tx->mask) { 1768 for (i = 0; i < (cnt - 1); i += 2) { 1769 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1770 wmb(); /* force write every 32 bytes */ 1771 srcp += 2; 1772 dstp += 2; 1773 } 1774 } else { 1775 /* submit all but the first request, and ensure 1776 that it is submitted below */ 1777 mxge_submit_req_backwards(tx, src, cnt); 1778 i = 0; 1779 } 1780 if (i < cnt) { 1781 /* submit the first request */ 1782 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1783 wmb(); /* barrier before setting valid flag */ 1784 } 1785 1786 /* re-write the last 32-bits with the valid flags */ 1787 src->flags = last_flags; 1788 src_ints = (uint32_t *)src; 1789 src_ints+=3; 1790 dst_ints = (volatile uint32_t *)dst; 1791 dst_ints+=3; 1792 *dst_ints = *src_ints; 1793 tx->req += cnt; 1794 wmb(); 1795 } 1796 1797 #if IFCAP_TSO4 1798 1799 static void 1800 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1801 int busdma_seg_cnt, int ip_off) 1802 { 1803 mxge_tx_ring_t *tx; 1804 mcp_kreq_ether_send_t *req; 1805 bus_dma_segment_t *seg; 1806 struct ip *ip; 1807 struct tcphdr *tcp; 1808 uint32_t low, high_swapped; 1809 int len, seglen, cum_len, cum_len_next; 1810 int next_is_first, chop, cnt, rdma_count, small; 1811 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1812 uint8_t flags, flags_next; 1813 static int once; 1814 1815 mss = m->m_pkthdr.tso_segsz; 1816 1817 /* negative cum_len signifies to the 1818 * send loop that we are still in the 1819 * header portion of the TSO packet. 1820 */ 1821 1822 /* ensure we have the ethernet, IP and TCP 1823 header together in the first mbuf, copy 1824 it to a scratch buffer if not */ 1825 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1826 m_copydata(m, 0, ip_off + sizeof (*ip), 1827 ss->scratch); 1828 ip = (struct ip *)(ss->scratch + ip_off); 1829 } else { 1830 ip = (struct ip *)(mtod(m, char *) + ip_off); 1831 } 1832 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1833 + sizeof (*tcp))) { 1834 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1835 + sizeof (*tcp), ss->scratch); 1836 ip = (struct ip *)(mtod(m, char *) + ip_off); 1837 } 1838 1839 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1840 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1841 1842 /* TSO implies checksum offload on this hardware */ 1843 cksum_offset = ip_off + (ip->ip_hl << 2); 1844 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1845 1846 1847 /* for TSO, pseudo_hdr_offset holds mss. 1848 * The firmware figures out where to put 1849 * the checksum by parsing the header. */ 1850 pseudo_hdr_offset = htobe16(mss); 1851 1852 tx = &ss->tx; 1853 req = tx->req_list; 1854 seg = tx->seg_list; 1855 cnt = 0; 1856 rdma_count = 0; 1857 /* "rdma_count" is the number of RDMAs belonging to the 1858 * current packet BEFORE the current send request. For 1859 * non-TSO packets, this is equal to "count". 1860 * For TSO packets, rdma_count needs to be reset 1861 * to 0 after a segment cut. 1862 * 1863 * The rdma_count field of the send request is 1864 * the number of RDMAs of the packet starting at 1865 * that request. For TSO send requests with one ore more cuts 1866 * in the middle, this is the number of RDMAs starting 1867 * after the last cut in the request. All previous 1868 * segments before the last cut implicitly have 1 RDMA. 1869 * 1870 * Since the number of RDMAs is not known beforehand, 1871 * it must be filled-in retroactively - after each 1872 * segmentation cut or at the end of the entire packet. 1873 */ 1874 1875 while (busdma_seg_cnt) { 1876 /* Break the busdma segment up into pieces*/ 1877 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1878 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1879 len = seg->ds_len; 1880 1881 while (len) { 1882 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1883 seglen = len; 1884 cum_len_next = cum_len + seglen; 1885 (req-rdma_count)->rdma_count = rdma_count + 1; 1886 if (__predict_true(cum_len >= 0)) { 1887 /* payload */ 1888 chop = (cum_len_next > mss); 1889 cum_len_next = cum_len_next % mss; 1890 next_is_first = (cum_len_next == 0); 1891 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1892 flags_next |= next_is_first * 1893 MXGEFW_FLAGS_FIRST; 1894 rdma_count |= -(chop | next_is_first); 1895 rdma_count += chop & !next_is_first; 1896 } else if (cum_len_next >= 0) { 1897 /* header ends */ 1898 rdma_count = -1; 1899 cum_len_next = 0; 1900 seglen = -cum_len; 1901 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1902 flags_next = MXGEFW_FLAGS_TSO_PLD | 1903 MXGEFW_FLAGS_FIRST | 1904 (small * MXGEFW_FLAGS_SMALL); 1905 } 1906 1907 req->addr_high = high_swapped; 1908 req->addr_low = htobe32(low); 1909 req->pseudo_hdr_offset = pseudo_hdr_offset; 1910 req->pad = 0; 1911 req->rdma_count = 1; 1912 req->length = htobe16(seglen); 1913 req->cksum_offset = cksum_offset; 1914 req->flags = flags | ((cum_len & 1) * 1915 MXGEFW_FLAGS_ALIGN_ODD); 1916 low += seglen; 1917 len -= seglen; 1918 cum_len = cum_len_next; 1919 flags = flags_next; 1920 req++; 1921 cnt++; 1922 rdma_count++; 1923 if (__predict_false(cksum_offset > seglen)) 1924 cksum_offset -= seglen; 1925 else 1926 cksum_offset = 0; 1927 if (__predict_false(cnt > tx->max_desc)) 1928 goto drop; 1929 } 1930 busdma_seg_cnt--; 1931 seg++; 1932 } 1933 (req-rdma_count)->rdma_count = rdma_count; 1934 1935 do { 1936 req--; 1937 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1938 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1939 1940 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1941 mxge_submit_req(tx, tx->req_list, cnt); 1942 #ifdef IFNET_BUF_RING 1943 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1944 /* tell the NIC to start polling this slice */ 1945 *tx->send_go = 1; 1946 tx->queue_active = 1; 1947 tx->activate++; 1948 wmb(); 1949 } 1950 #endif 1951 return; 1952 1953 drop: 1954 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1955 m_freem(m); 1956 ss->oerrors++; 1957 if (!once) { 1958 kprintf("tx->max_desc exceeded via TSO!\n"); 1959 kprintf("mss = %d, %ld, %d!\n", mss, 1960 (long)seg - (long)tx->seg_list, tx->max_desc); 1961 once = 1; 1962 } 1963 return; 1964 1965 } 1966 1967 #endif /* IFCAP_TSO4 */ 1968 1969 #ifdef MXGE_NEW_VLAN_API 1970 /* 1971 * We reproduce the software vlan tag insertion from 1972 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1973 * vlan tag insertion. We need to advertise this in order to have the 1974 * vlan interface respect our csum offload flags. 1975 */ 1976 static struct mbuf * 1977 mxge_vlan_tag_insert(struct mbuf *m) 1978 { 1979 struct ether_vlan_header *evl; 1980 1981 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT); 1982 if (__predict_false(m == NULL)) 1983 return NULL; 1984 if (m->m_len < sizeof(*evl)) { 1985 m = m_pullup(m, sizeof(*evl)); 1986 if (__predict_false(m == NULL)) 1987 return NULL; 1988 } 1989 /* 1990 * Transform the Ethernet header into an Ethernet header 1991 * with 802.1Q encapsulation. 1992 */ 1993 evl = mtod(m, struct ether_vlan_header *); 1994 bcopy((char *)evl + EVL_ENCAPLEN, 1995 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1996 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1997 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag); 1998 m->m_flags &= ~M_VLANTAG; 1999 return m; 2000 } 2001 #endif /* MXGE_NEW_VLAN_API */ 2002 2003 static void 2004 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2005 { 2006 mxge_softc_t *sc; 2007 mcp_kreq_ether_send_t *req; 2008 bus_dma_segment_t *seg; 2009 struct mbuf *m_tmp; 2010 mxge_tx_ring_t *tx; 2011 struct ip *ip; 2012 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2013 uint16_t pseudo_hdr_offset; 2014 uint8_t flags, cksum_offset; 2015 2016 2017 sc = ss->sc; 2018 tx = &ss->tx; 2019 2020 ip_off = sizeof (struct ether_header); 2021 #ifdef MXGE_NEW_VLAN_API 2022 if (m->m_flags & M_VLANTAG) { 2023 m = mxge_vlan_tag_insert(m); 2024 if (__predict_false(m == NULL)) 2025 goto drop; 2026 ip_off += EVL_ENCAPLEN; 2027 } 2028 #endif 2029 /* (try to) map the frame for DMA */ 2030 idx = tx->req & tx->mask; 2031 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map, 2032 m, tx->seg_list, 1, &cnt, 2033 BUS_DMA_NOWAIT); 2034 if (__predict_false(err == EFBIG)) { 2035 /* Too many segments in the chain. Try 2036 to defrag */ 2037 m_tmp = m_defrag(m, MB_DONTWAIT); 2038 if (m_tmp == NULL) { 2039 goto drop; 2040 } 2041 ss->tx.defrag++; 2042 m = m_tmp; 2043 err = bus_dmamap_load_mbuf_segment(tx->dmat, 2044 tx->info[idx].map, 2045 m, tx->seg_list, 1, &cnt, 2046 BUS_DMA_NOWAIT); 2047 } 2048 if (__predict_false(err != 0)) { 2049 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d" 2050 " packet len = %d\n", err, m->m_pkthdr.len); 2051 goto drop; 2052 } 2053 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2054 BUS_DMASYNC_PREWRITE); 2055 tx->info[idx].m = m; 2056 2057 #if IFCAP_TSO4 2058 /* TSO is different enough, we handle it in another routine */ 2059 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2060 mxge_encap_tso(ss, m, cnt, ip_off); 2061 return; 2062 } 2063 #endif 2064 2065 req = tx->req_list; 2066 cksum_offset = 0; 2067 pseudo_hdr_offset = 0; 2068 flags = MXGEFW_FLAGS_NO_TSO; 2069 2070 /* checksum offloading? */ 2071 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2072 /* ensure ip header is in first mbuf, copy 2073 it to a scratch buffer if not */ 2074 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2075 m_copydata(m, 0, ip_off + sizeof (*ip), 2076 ss->scratch); 2077 ip = (struct ip *)(ss->scratch + ip_off); 2078 } else { 2079 ip = (struct ip *)(mtod(m, char *) + ip_off); 2080 } 2081 cksum_offset = ip_off + (ip->ip_hl << 2); 2082 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2083 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2084 req->cksum_offset = cksum_offset; 2085 flags |= MXGEFW_FLAGS_CKSUM; 2086 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2087 } else { 2088 odd_flag = 0; 2089 } 2090 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2091 flags |= MXGEFW_FLAGS_SMALL; 2092 2093 /* convert segments into a request list */ 2094 cum_len = 0; 2095 seg = tx->seg_list; 2096 req->flags = MXGEFW_FLAGS_FIRST; 2097 for (i = 0; i < cnt; i++) { 2098 req->addr_low = 2099 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2100 req->addr_high = 2101 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2102 req->length = htobe16(seg->ds_len); 2103 req->cksum_offset = cksum_offset; 2104 if (cksum_offset > seg->ds_len) 2105 cksum_offset -= seg->ds_len; 2106 else 2107 cksum_offset = 0; 2108 req->pseudo_hdr_offset = pseudo_hdr_offset; 2109 req->pad = 0; /* complete solid 16-byte block */ 2110 req->rdma_count = 1; 2111 req->flags |= flags | ((cum_len & 1) * odd_flag); 2112 cum_len += seg->ds_len; 2113 seg++; 2114 req++; 2115 req->flags = 0; 2116 } 2117 req--; 2118 /* pad runts to 60 bytes */ 2119 if (cum_len < 60) { 2120 req++; 2121 req->addr_low = 2122 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2123 req->addr_high = 2124 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2125 req->length = htobe16(60 - cum_len); 2126 req->cksum_offset = 0; 2127 req->pseudo_hdr_offset = pseudo_hdr_offset; 2128 req->pad = 0; /* complete solid 16-byte block */ 2129 req->rdma_count = 1; 2130 req->flags |= flags | ((cum_len & 1) * odd_flag); 2131 cnt++; 2132 } 2133 2134 tx->req_list[0].rdma_count = cnt; 2135 #if 0 2136 /* print what the firmware will see */ 2137 for (i = 0; i < cnt; i++) { 2138 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 2139 "cso:%d, flags:0x%x, rdma:%d\n", 2140 i, (int)ntohl(tx->req_list[i].addr_high), 2141 (int)ntohl(tx->req_list[i].addr_low), 2142 (int)ntohs(tx->req_list[i].length), 2143 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2144 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2145 tx->req_list[i].rdma_count); 2146 } 2147 kprintf("--------------\n"); 2148 #endif 2149 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2150 mxge_submit_req(tx, tx->req_list, cnt); 2151 #ifdef IFNET_BUF_RING 2152 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2153 /* tell the NIC to start polling this slice */ 2154 *tx->send_go = 1; 2155 tx->queue_active = 1; 2156 tx->activate++; 2157 wmb(); 2158 } 2159 #endif 2160 return; 2161 2162 drop: 2163 m_freem(m); 2164 ss->oerrors++; 2165 return; 2166 } 2167 2168 static inline void 2169 mxge_start_locked(struct mxge_slice_state *ss) 2170 { 2171 mxge_softc_t *sc; 2172 struct mbuf *m; 2173 struct ifnet *ifp; 2174 mxge_tx_ring_t *tx; 2175 2176 sc = ss->sc; 2177 ifp = sc->ifp; 2178 tx = &ss->tx; 2179 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2180 m = ifq_dequeue(&ifp->if_snd, NULL); 2181 if (m == NULL) { 2182 return; 2183 } 2184 /* let BPF see it */ 2185 BPF_MTAP(ifp, m); 2186 2187 /* give it to the nic */ 2188 mxge_encap(ss, m); 2189 } 2190 /* ran out of transmit slots */ 2191 if (!ifq_is_oactive(&ifp->if_snd)) { 2192 ifq_set_oactive(&ifp->if_snd); 2193 tx->stall++; 2194 } 2195 } 2196 2197 static void 2198 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 2199 { 2200 mxge_softc_t *sc = ifp->if_softc; 2201 struct mxge_slice_state *ss; 2202 2203 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 2204 ASSERT_SERIALIZED(sc->ifp->if_serializer); 2205 /* only use the first slice for now */ 2206 ss = &sc->ss[0]; 2207 mxge_start_locked(ss); 2208 } 2209 2210 /* 2211 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2212 * at most 32 bytes at a time, so as to avoid involving the software 2213 * pio handler in the nic. We re-write the first segment's low 2214 * DMA address to mark it valid only after we write the entire chunk 2215 * in a burst 2216 */ 2217 static inline void 2218 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2219 mcp_kreq_ether_recv_t *src) 2220 { 2221 uint32_t low; 2222 2223 low = src->addr_low; 2224 src->addr_low = 0xffffffff; 2225 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2226 wmb(); 2227 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2228 wmb(); 2229 src->addr_low = low; 2230 dst->addr_low = low; 2231 wmb(); 2232 } 2233 2234 static int 2235 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2236 { 2237 bus_dma_segment_t seg; 2238 struct mbuf *m; 2239 mxge_rx_ring_t *rx = &ss->rx_small; 2240 int cnt, err; 2241 2242 m = m_gethdr(MB_DONTWAIT, MT_DATA); 2243 if (m == NULL) { 2244 rx->alloc_fail++; 2245 err = ENOBUFS; 2246 goto done; 2247 } 2248 m->m_len = m->m_pkthdr.len = MHLEN; 2249 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2250 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2251 if (err != 0) { 2252 kprintf("can't dmamap small (%d)\n", err); 2253 m_free(m); 2254 goto done; 2255 } 2256 rx->info[idx].m = m; 2257 rx->shadow[idx].addr_low = 2258 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2259 rx->shadow[idx].addr_high = 2260 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2261 2262 done: 2263 if ((idx & 7) == 7) 2264 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2265 return err; 2266 } 2267 2268 2269 static int 2270 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2271 { 2272 bus_dma_segment_t seg[3]; 2273 struct mbuf *m; 2274 mxge_rx_ring_t *rx = &ss->rx_big; 2275 int cnt, err, i; 2276 2277 if (rx->cl_size == MCLBYTES) 2278 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2279 else { 2280 #if 0 2281 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2282 #else 2283 /* 2284 * XXX: allocate normal sized buffers for big buffers. 2285 * We should be fine as long as we don't get any jumbo frames 2286 */ 2287 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2288 #endif 2289 } 2290 if (m == NULL) { 2291 rx->alloc_fail++; 2292 err = ENOBUFS; 2293 goto done; 2294 } 2295 m->m_pkthdr.len = 0; 2296 m->m_len = m->m_pkthdr.len = rx->mlen; 2297 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2298 seg, 1, &cnt, BUS_DMA_NOWAIT); 2299 if (err != 0) { 2300 kprintf("can't dmamap big (%d)\n", err); 2301 m_free(m); 2302 goto done; 2303 } 2304 rx->info[idx].m = m; 2305 rx->shadow[idx].addr_low = 2306 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2307 rx->shadow[idx].addr_high = 2308 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2309 2310 #if MXGE_VIRT_JUMBOS 2311 for (i = 1; i < cnt; i++) { 2312 rx->shadow[idx + i].addr_low = 2313 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2314 rx->shadow[idx + i].addr_high = 2315 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2316 } 2317 #endif 2318 2319 done: 2320 for (i = 0; i < rx->nbufs; i++) { 2321 if ((idx & 7) == 7) { 2322 mxge_submit_8rx(&rx->lanai[idx - 7], 2323 &rx->shadow[idx - 7]); 2324 } 2325 idx++; 2326 } 2327 return err; 2328 } 2329 2330 /* 2331 * Myri10GE hardware checksums are not valid if the sender 2332 * padded the frame with non-zero padding. This is because 2333 * the firmware just does a simple 16-bit 1s complement 2334 * checksum across the entire frame, excluding the first 14 2335 * bytes. It is best to simply to check the checksum and 2336 * tell the stack about it only if the checksum is good 2337 */ 2338 2339 static inline uint16_t 2340 mxge_rx_csum(struct mbuf *m, int csum) 2341 { 2342 struct ether_header *eh; 2343 struct ip *ip; 2344 uint16_t c; 2345 2346 eh = mtod(m, struct ether_header *); 2347 2348 /* only deal with IPv4 TCP & UDP for now */ 2349 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2350 return 1; 2351 ip = (struct ip *)(eh + 1); 2352 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2353 ip->ip_p != IPPROTO_UDP)) 2354 return 1; 2355 #ifdef INET 2356 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2357 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2358 - (ip->ip_hl << 2) + ip->ip_p)); 2359 #else 2360 c = 1; 2361 #endif 2362 c ^= 0xffff; 2363 return (c); 2364 } 2365 2366 static void 2367 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2368 { 2369 struct ether_vlan_header *evl; 2370 uint32_t partial; 2371 2372 evl = mtod(m, struct ether_vlan_header *); 2373 2374 /* 2375 * fix checksum by subtracting EVL_ENCAPLEN bytes 2376 * after what the firmware thought was the end of the ethernet 2377 * header. 2378 */ 2379 2380 /* put checksum into host byte order */ 2381 *csum = ntohs(*csum); 2382 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2383 (*csum) += ~partial; 2384 (*csum) += ((*csum) < ~partial); 2385 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2386 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2387 2388 /* restore checksum to network byte order; 2389 later consumers expect this */ 2390 *csum = htons(*csum); 2391 2392 /* save the tag */ 2393 #ifdef MXGE_NEW_VLAN_API 2394 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2395 #else 2396 { 2397 struct m_tag *mtag; 2398 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2399 MB_DONTWAIT); 2400 if (mtag == NULL) 2401 return; 2402 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2403 m_tag_prepend(m, mtag); 2404 } 2405 2406 #endif 2407 m->m_flags |= M_VLANTAG; 2408 2409 /* 2410 * Remove the 802.1q header by copying the Ethernet 2411 * addresses over it and adjusting the beginning of 2412 * the data in the mbuf. The encapsulated Ethernet 2413 * type field is already in place. 2414 */ 2415 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2416 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2417 m_adj(m, EVL_ENCAPLEN); 2418 } 2419 2420 2421 static inline void 2422 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2423 { 2424 mxge_softc_t *sc; 2425 struct ifnet *ifp; 2426 struct mbuf *m; 2427 struct ether_header *eh; 2428 mxge_rx_ring_t *rx; 2429 bus_dmamap_t old_map; 2430 int idx; 2431 uint16_t tcpudp_csum; 2432 2433 sc = ss->sc; 2434 ifp = sc->ifp; 2435 rx = &ss->rx_big; 2436 idx = rx->cnt & rx->mask; 2437 rx->cnt += rx->nbufs; 2438 /* save a pointer to the received mbuf */ 2439 m = rx->info[idx].m; 2440 /* try to replace the received mbuf */ 2441 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2442 /* drop the frame -- the old mbuf is re-cycled */ 2443 IFNET_STAT_INC(ifp, ierrors, 1); 2444 return; 2445 } 2446 2447 /* unmap the received buffer */ 2448 old_map = rx->info[idx].map; 2449 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2450 bus_dmamap_unload(rx->dmat, old_map); 2451 2452 /* swap the bus_dmamap_t's */ 2453 rx->info[idx].map = rx->extra_map; 2454 rx->extra_map = old_map; 2455 2456 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2457 * aligned */ 2458 m->m_data += MXGEFW_PAD; 2459 2460 m->m_pkthdr.rcvif = ifp; 2461 m->m_len = m->m_pkthdr.len = len; 2462 ss->ipackets++; 2463 eh = mtod(m, struct ether_header *); 2464 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2465 mxge_vlan_tag_remove(m, &csum); 2466 } 2467 /* if the checksum is valid, mark it in the mbuf header */ 2468 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2469 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2470 return; 2471 /* otherwise, it was a UDP frame, or a TCP frame which 2472 we could not do LRO on. Tell the stack that the 2473 checksum is good */ 2474 m->m_pkthdr.csum_data = 0xffff; 2475 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2476 } 2477 #if 0 2478 /* flowid only valid if RSS hashing is enabled */ 2479 if (sc->num_slices > 1) { 2480 m->m_pkthdr.flowid = (ss - sc->ss); 2481 m->m_flags |= M_FLOWID; 2482 } 2483 #endif 2484 ifp->if_input(ifp, m); 2485 } 2486 2487 static inline void 2488 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2489 { 2490 mxge_softc_t *sc; 2491 struct ifnet *ifp; 2492 struct ether_header *eh; 2493 struct mbuf *m; 2494 mxge_rx_ring_t *rx; 2495 bus_dmamap_t old_map; 2496 int idx; 2497 uint16_t tcpudp_csum; 2498 2499 sc = ss->sc; 2500 ifp = sc->ifp; 2501 rx = &ss->rx_small; 2502 idx = rx->cnt & rx->mask; 2503 rx->cnt++; 2504 /* save a pointer to the received mbuf */ 2505 m = rx->info[idx].m; 2506 /* try to replace the received mbuf */ 2507 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2508 /* drop the frame -- the old mbuf is re-cycled */ 2509 IFNET_STAT_INC(ifp, ierrors, 1); 2510 return; 2511 } 2512 2513 /* unmap the received buffer */ 2514 old_map = rx->info[idx].map; 2515 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2516 bus_dmamap_unload(rx->dmat, old_map); 2517 2518 /* swap the bus_dmamap_t's */ 2519 rx->info[idx].map = rx->extra_map; 2520 rx->extra_map = old_map; 2521 2522 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2523 * aligned */ 2524 m->m_data += MXGEFW_PAD; 2525 2526 m->m_pkthdr.rcvif = ifp; 2527 m->m_len = m->m_pkthdr.len = len; 2528 ss->ipackets++; 2529 eh = mtod(m, struct ether_header *); 2530 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2531 mxge_vlan_tag_remove(m, &csum); 2532 } 2533 /* if the checksum is valid, mark it in the mbuf header */ 2534 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2535 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2536 return; 2537 /* otherwise, it was a UDP frame, or a TCP frame which 2538 we could not do LRO on. Tell the stack that the 2539 checksum is good */ 2540 m->m_pkthdr.csum_data = 0xffff; 2541 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2542 } 2543 #if 0 2544 /* flowid only valid if RSS hashing is enabled */ 2545 if (sc->num_slices > 1) { 2546 m->m_pkthdr.flowid = (ss - sc->ss); 2547 m->m_flags |= M_FLOWID; 2548 } 2549 #endif 2550 ifp->if_input(ifp, m); 2551 } 2552 2553 /* 2554 * XXX 2555 * 2556 * Inlining the call to this function causes mxge_intr() to grow too large 2557 * for GCC's stack size limits (which shouldn't take into account inlining 2558 * of leaf functions at one call site anyway). Inlining is definitely a 2559 * good idea in this case though, so mark the function appropriately. 2560 */ 2561 static inline __always_inline void 2562 mxge_clean_rx_done(struct mxge_slice_state *ss) 2563 { 2564 mxge_rx_done_t *rx_done = &ss->rx_done; 2565 int limit = 0; 2566 uint16_t length; 2567 uint16_t checksum; 2568 2569 while (rx_done->entry[rx_done->idx].length != 0) { 2570 length = ntohs(rx_done->entry[rx_done->idx].length); 2571 rx_done->entry[rx_done->idx].length = 0; 2572 checksum = rx_done->entry[rx_done->idx].checksum; 2573 if (length <= (MHLEN - MXGEFW_PAD)) 2574 mxge_rx_done_small(ss, length, checksum); 2575 else 2576 mxge_rx_done_big(ss, length, checksum); 2577 rx_done->cnt++; 2578 rx_done->idx = rx_done->cnt & rx_done->mask; 2579 2580 /* limit potential for livelock */ 2581 if (__predict_false(++limit > rx_done->mask / 2)) 2582 break; 2583 } 2584 #ifdef INET 2585 while (!SLIST_EMPTY(&ss->lro_active)) { 2586 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2587 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2588 mxge_lro_flush(ss, lro); 2589 } 2590 #endif 2591 } 2592 2593 2594 static inline void 2595 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2596 { 2597 struct ifnet *ifp; 2598 mxge_tx_ring_t *tx; 2599 struct mbuf *m; 2600 bus_dmamap_t map; 2601 int idx; 2602 2603 tx = &ss->tx; 2604 ifp = ss->sc->ifp; 2605 ASSERT_SERIALIZED(ifp->if_serializer); 2606 while (tx->pkt_done != mcp_idx) { 2607 idx = tx->done & tx->mask; 2608 tx->done++; 2609 m = tx->info[idx].m; 2610 /* mbuf and DMA map only attached to the first 2611 segment per-mbuf */ 2612 if (m != NULL) { 2613 ss->obytes += m->m_pkthdr.len; 2614 if (m->m_flags & M_MCAST) 2615 ss->omcasts++; 2616 ss->opackets++; 2617 tx->info[idx].m = NULL; 2618 map = tx->info[idx].map; 2619 bus_dmamap_unload(tx->dmat, map); 2620 m_freem(m); 2621 } 2622 if (tx->info[idx].flag) { 2623 tx->info[idx].flag = 0; 2624 tx->pkt_done++; 2625 } 2626 } 2627 2628 /* If we have space, clear OACTIVE to tell the stack that 2629 its OK to send packets */ 2630 if (ifq_is_oactive(&ifp->if_snd) && 2631 tx->req - tx->done < (tx->mask + 1)/4) { 2632 ifq_clr_oactive(&ifp->if_snd); 2633 ss->tx.wake++; 2634 mxge_start_locked(ss); 2635 } 2636 #ifdef IFNET_BUF_RING 2637 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2638 /* let the NIC stop polling this queue, since there 2639 * are no more transmits pending */ 2640 if (tx->req == tx->done) { 2641 *tx->send_stop = 1; 2642 tx->queue_active = 0; 2643 tx->deactivate++; 2644 wmb(); 2645 } 2646 } 2647 #endif 2648 2649 } 2650 2651 static struct mxge_media_type mxge_xfp_media_types[] = 2652 { 2653 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2654 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2655 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2656 {0, (1 << 5), "10GBASE-ER"}, 2657 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2658 {0, (1 << 3), "10GBASE-SW"}, 2659 {0, (1 << 2), "10GBASE-LW"}, 2660 {0, (1 << 1), "10GBASE-EW"}, 2661 {0, (1 << 0), "Reserved"} 2662 }; 2663 static struct mxge_media_type mxge_sfp_media_types[] = 2664 { 2665 {0, (1 << 7), "Reserved"}, 2666 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2667 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2668 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2669 }; 2670 2671 static void 2672 mxge_set_media(mxge_softc_t *sc, int type) 2673 { 2674 sc->media_flags |= type; 2675 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2676 ifmedia_set(&sc->media, sc->media_flags); 2677 } 2678 2679 2680 /* 2681 * Determine the media type for a NIC. Some XFPs will identify 2682 * themselves only when their link is up, so this is initiated via a 2683 * link up interrupt. However, this can potentially take up to 2684 * several milliseconds, so it is run via the watchdog routine, rather 2685 * than in the interrupt handler itself. This need only be done 2686 * once, not each time the link is up. 2687 */ 2688 static void 2689 mxge_media_probe(mxge_softc_t *sc) 2690 { 2691 mxge_cmd_t cmd; 2692 char *cage_type; 2693 char *ptr; 2694 struct mxge_media_type *mxge_media_types = NULL; 2695 int i, err, ms, mxge_media_type_entries; 2696 uint32_t byte; 2697 2698 sc->need_media_probe = 0; 2699 2700 /* if we've already set a media type, we're done */ 2701 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2702 return; 2703 2704 /* 2705 * parse the product code to deterimine the interface type 2706 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2707 * after the 3rd dash in the driver's cached copy of the 2708 * EEPROM's product code string. 2709 */ 2710 ptr = sc->product_code_string; 2711 if (ptr == NULL) { 2712 device_printf(sc->dev, "Missing product code\n"); 2713 } 2714 2715 for (i = 0; i < 3; i++, ptr++) { 2716 ptr = index(ptr, '-'); 2717 if (ptr == NULL) { 2718 device_printf(sc->dev, 2719 "only %d dashes in PC?!?\n", i); 2720 return; 2721 } 2722 } 2723 if (*ptr == 'C') { 2724 /* -C is CX4 */ 2725 mxge_set_media(sc, IFM_10G_CX4); 2726 return; 2727 } 2728 else if (*ptr == 'Q') { 2729 /* -Q is Quad Ribbon Fiber */ 2730 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2731 /* FreeBSD has no media type for Quad ribbon fiber */ 2732 return; 2733 } 2734 2735 if (*ptr == 'R') { 2736 /* -R is XFP */ 2737 mxge_media_types = mxge_xfp_media_types; 2738 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2739 byte = MXGE_XFP_COMPLIANCE_BYTE; 2740 cage_type = "XFP"; 2741 } 2742 2743 if (*ptr == 'S' || *(ptr +1) == 'S') { 2744 /* -S or -2S is SFP+ */ 2745 mxge_media_types = mxge_sfp_media_types; 2746 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2747 cage_type = "SFP+"; 2748 byte = 3; 2749 } 2750 2751 if (mxge_media_types == NULL) { 2752 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2753 return; 2754 } 2755 2756 /* 2757 * At this point we know the NIC has an XFP cage, so now we 2758 * try to determine what is in the cage by using the 2759 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2760 * register. We read just one byte, which may take over 2761 * a millisecond 2762 */ 2763 2764 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2765 cmd.data1 = byte; 2766 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2767 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2768 device_printf(sc->dev, "failed to read XFP\n"); 2769 } 2770 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2771 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2772 } 2773 if (err != MXGEFW_CMD_OK) { 2774 return; 2775 } 2776 2777 /* now we wait for the data to be cached */ 2778 cmd.data0 = byte; 2779 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2780 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2781 DELAY(1000); 2782 cmd.data0 = byte; 2783 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2784 } 2785 if (err != MXGEFW_CMD_OK) { 2786 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2787 cage_type, err, ms); 2788 return; 2789 } 2790 2791 if (cmd.data0 == mxge_media_types[0].bitmask) { 2792 if (mxge_verbose) 2793 device_printf(sc->dev, "%s:%s\n", cage_type, 2794 mxge_media_types[0].name); 2795 mxge_set_media(sc, IFM_10G_CX4); 2796 return; 2797 } 2798 for (i = 1; i < mxge_media_type_entries; i++) { 2799 if (cmd.data0 & mxge_media_types[i].bitmask) { 2800 if (mxge_verbose) 2801 device_printf(sc->dev, "%s:%s\n", 2802 cage_type, 2803 mxge_media_types[i].name); 2804 2805 mxge_set_media(sc, mxge_media_types[i].flag); 2806 return; 2807 } 2808 } 2809 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2810 cmd.data0); 2811 2812 return; 2813 } 2814 2815 static void 2816 mxge_intr(void *arg) 2817 { 2818 struct mxge_slice_state *ss = arg; 2819 mxge_softc_t *sc = ss->sc; 2820 mcp_irq_data_t *stats = ss->fw_stats; 2821 mxge_tx_ring_t *tx = &ss->tx; 2822 mxge_rx_done_t *rx_done = &ss->rx_done; 2823 uint32_t send_done_count; 2824 uint8_t valid; 2825 2826 2827 #ifndef IFNET_BUF_RING 2828 /* an interrupt on a non-zero slice is implicitly valid 2829 since MSI-X irqs are not shared */ 2830 if (ss != sc->ss) { 2831 mxge_clean_rx_done(ss); 2832 *ss->irq_claim = be32toh(3); 2833 return; 2834 } 2835 #endif 2836 2837 /* make sure the DMA has finished */ 2838 if (!stats->valid) { 2839 return; 2840 } 2841 valid = stats->valid; 2842 2843 if (sc->legacy_irq) { 2844 /* lower legacy IRQ */ 2845 *sc->irq_deassert = 0; 2846 if (!mxge_deassert_wait) 2847 /* don't wait for conf. that irq is low */ 2848 stats->valid = 0; 2849 } else { 2850 stats->valid = 0; 2851 } 2852 2853 /* loop while waiting for legacy irq deassertion */ 2854 do { 2855 /* check for transmit completes and receives */ 2856 send_done_count = be32toh(stats->send_done_count); 2857 while ((send_done_count != tx->pkt_done) || 2858 (rx_done->entry[rx_done->idx].length != 0)) { 2859 if (send_done_count != tx->pkt_done) 2860 mxge_tx_done(ss, (int)send_done_count); 2861 mxge_clean_rx_done(ss); 2862 send_done_count = be32toh(stats->send_done_count); 2863 } 2864 if (sc->legacy_irq && mxge_deassert_wait) 2865 wmb(); 2866 } while (*((volatile uint8_t *) &stats->valid)); 2867 2868 /* fw link & error stats meaningful only on the first slice */ 2869 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2870 if (sc->link_state != stats->link_up) { 2871 sc->link_state = stats->link_up; 2872 if (sc->link_state) { 2873 sc->ifp->if_link_state = LINK_STATE_UP; 2874 if_link_state_change(sc->ifp); 2875 if (mxge_verbose) 2876 device_printf(sc->dev, "link up\n"); 2877 } else { 2878 sc->ifp->if_link_state = LINK_STATE_DOWN; 2879 if_link_state_change(sc->ifp); 2880 if (mxge_verbose) 2881 device_printf(sc->dev, "link down\n"); 2882 } 2883 sc->need_media_probe = 1; 2884 } 2885 if (sc->rdma_tags_available != 2886 be32toh(stats->rdma_tags_available)) { 2887 sc->rdma_tags_available = 2888 be32toh(stats->rdma_tags_available); 2889 device_printf(sc->dev, "RDMA timed out! %d tags " 2890 "left\n", sc->rdma_tags_available); 2891 } 2892 2893 if (stats->link_down) { 2894 sc->down_cnt += stats->link_down; 2895 sc->link_state = 0; 2896 sc->ifp->if_link_state = LINK_STATE_DOWN; 2897 if_link_state_change(sc->ifp); 2898 } 2899 } 2900 2901 /* check to see if we have rx token to pass back */ 2902 if (valid & 0x1) 2903 *ss->irq_claim = be32toh(3); 2904 *(ss->irq_claim + 1) = be32toh(3); 2905 } 2906 2907 static void 2908 mxge_init(void *arg) 2909 { 2910 } 2911 2912 2913 2914 static void 2915 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2916 { 2917 struct lro_entry *lro_entry; 2918 int i; 2919 2920 while (!SLIST_EMPTY(&ss->lro_free)) { 2921 lro_entry = SLIST_FIRST(&ss->lro_free); 2922 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2923 kfree(lro_entry, M_DEVBUF); 2924 } 2925 2926 for (i = 0; i <= ss->rx_big.mask; i++) { 2927 if (ss->rx_big.info[i].m == NULL) 2928 continue; 2929 bus_dmamap_unload(ss->rx_big.dmat, 2930 ss->rx_big.info[i].map); 2931 m_freem(ss->rx_big.info[i].m); 2932 ss->rx_big.info[i].m = NULL; 2933 } 2934 2935 for (i = 0; i <= ss->rx_small.mask; i++) { 2936 if (ss->rx_small.info[i].m == NULL) 2937 continue; 2938 bus_dmamap_unload(ss->rx_small.dmat, 2939 ss->rx_small.info[i].map); 2940 m_freem(ss->rx_small.info[i].m); 2941 ss->rx_small.info[i].m = NULL; 2942 } 2943 2944 /* transmit ring used only on the first slice */ 2945 if (ss->tx.info == NULL) 2946 return; 2947 2948 for (i = 0; i <= ss->tx.mask; i++) { 2949 ss->tx.info[i].flag = 0; 2950 if (ss->tx.info[i].m == NULL) 2951 continue; 2952 bus_dmamap_unload(ss->tx.dmat, 2953 ss->tx.info[i].map); 2954 m_freem(ss->tx.info[i].m); 2955 ss->tx.info[i].m = NULL; 2956 } 2957 } 2958 2959 static void 2960 mxge_free_mbufs(mxge_softc_t *sc) 2961 { 2962 int slice; 2963 2964 for (slice = 0; slice < sc->num_slices; slice++) 2965 mxge_free_slice_mbufs(&sc->ss[slice]); 2966 } 2967 2968 static void 2969 mxge_free_slice_rings(struct mxge_slice_state *ss) 2970 { 2971 int i; 2972 2973 2974 if (ss->rx_done.entry != NULL) 2975 mxge_dma_free(&ss->rx_done.dma); 2976 ss->rx_done.entry = NULL; 2977 2978 if (ss->tx.req_bytes != NULL) 2979 kfree(ss->tx.req_bytes, M_DEVBUF); 2980 ss->tx.req_bytes = NULL; 2981 2982 if (ss->tx.seg_list != NULL) 2983 kfree(ss->tx.seg_list, M_DEVBUF); 2984 ss->tx.seg_list = NULL; 2985 2986 if (ss->rx_small.shadow != NULL) 2987 kfree(ss->rx_small.shadow, M_DEVBUF); 2988 ss->rx_small.shadow = NULL; 2989 2990 if (ss->rx_big.shadow != NULL) 2991 kfree(ss->rx_big.shadow, M_DEVBUF); 2992 ss->rx_big.shadow = NULL; 2993 2994 if (ss->tx.info != NULL) { 2995 if (ss->tx.dmat != NULL) { 2996 for (i = 0; i <= ss->tx.mask; i++) { 2997 bus_dmamap_destroy(ss->tx.dmat, 2998 ss->tx.info[i].map); 2999 } 3000 bus_dma_tag_destroy(ss->tx.dmat); 3001 } 3002 kfree(ss->tx.info, M_DEVBUF); 3003 } 3004 ss->tx.info = NULL; 3005 3006 if (ss->rx_small.info != NULL) { 3007 if (ss->rx_small.dmat != NULL) { 3008 for (i = 0; i <= ss->rx_small.mask; i++) { 3009 bus_dmamap_destroy(ss->rx_small.dmat, 3010 ss->rx_small.info[i].map); 3011 } 3012 bus_dmamap_destroy(ss->rx_small.dmat, 3013 ss->rx_small.extra_map); 3014 bus_dma_tag_destroy(ss->rx_small.dmat); 3015 } 3016 kfree(ss->rx_small.info, M_DEVBUF); 3017 } 3018 ss->rx_small.info = NULL; 3019 3020 if (ss->rx_big.info != NULL) { 3021 if (ss->rx_big.dmat != NULL) { 3022 for (i = 0; i <= ss->rx_big.mask; i++) { 3023 bus_dmamap_destroy(ss->rx_big.dmat, 3024 ss->rx_big.info[i].map); 3025 } 3026 bus_dmamap_destroy(ss->rx_big.dmat, 3027 ss->rx_big.extra_map); 3028 bus_dma_tag_destroy(ss->rx_big.dmat); 3029 } 3030 kfree(ss->rx_big.info, M_DEVBUF); 3031 } 3032 ss->rx_big.info = NULL; 3033 } 3034 3035 static void 3036 mxge_free_rings(mxge_softc_t *sc) 3037 { 3038 int slice; 3039 3040 for (slice = 0; slice < sc->num_slices; slice++) 3041 mxge_free_slice_rings(&sc->ss[slice]); 3042 } 3043 3044 static int 3045 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3046 int tx_ring_entries) 3047 { 3048 mxge_softc_t *sc = ss->sc; 3049 size_t bytes; 3050 int err, i; 3051 3052 err = ENOMEM; 3053 3054 /* allocate per-slice receive resources */ 3055 3056 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3057 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3058 3059 /* allocate the rx shadow rings */ 3060 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3061 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3062 3063 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3064 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3065 3066 /* allocate the rx host info rings */ 3067 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3068 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3069 3070 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3071 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3072 3073 /* allocate the rx busdma resources */ 3074 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3075 1, /* alignment */ 3076 4096, /* boundary */ 3077 BUS_SPACE_MAXADDR, /* low */ 3078 BUS_SPACE_MAXADDR, /* high */ 3079 NULL, NULL, /* filter */ 3080 MHLEN, /* maxsize */ 3081 1, /* num segs */ 3082 MHLEN, /* maxsegsize */ 3083 BUS_DMA_ALLOCNOW, /* flags */ 3084 &ss->rx_small.dmat); /* tag */ 3085 if (err != 0) { 3086 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3087 err); 3088 return err; 3089 } 3090 3091 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3092 1, /* alignment */ 3093 #if MXGE_VIRT_JUMBOS 3094 4096, /* boundary */ 3095 #else 3096 0, /* boundary */ 3097 #endif 3098 BUS_SPACE_MAXADDR, /* low */ 3099 BUS_SPACE_MAXADDR, /* high */ 3100 NULL, NULL, /* filter */ 3101 3*4096, /* maxsize */ 3102 #if MXGE_VIRT_JUMBOS 3103 3, /* num segs */ 3104 4096, /* maxsegsize*/ 3105 #else 3106 1, /* num segs */ 3107 MJUM9BYTES, /* maxsegsize*/ 3108 #endif 3109 BUS_DMA_ALLOCNOW, /* flags */ 3110 &ss->rx_big.dmat); /* tag */ 3111 if (err != 0) { 3112 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3113 err); 3114 return err; 3115 } 3116 for (i = 0; i <= ss->rx_small.mask; i++) { 3117 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3118 &ss->rx_small.info[i].map); 3119 if (err != 0) { 3120 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3121 err); 3122 return err; 3123 } 3124 } 3125 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3126 &ss->rx_small.extra_map); 3127 if (err != 0) { 3128 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3129 err); 3130 return err; 3131 } 3132 3133 for (i = 0; i <= ss->rx_big.mask; i++) { 3134 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3135 &ss->rx_big.info[i].map); 3136 if (err != 0) { 3137 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3138 err); 3139 return err; 3140 } 3141 } 3142 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3143 &ss->rx_big.extra_map); 3144 if (err != 0) { 3145 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3146 err); 3147 return err; 3148 } 3149 3150 /* now allocate TX resouces */ 3151 3152 #ifndef IFNET_BUF_RING 3153 /* only use a single TX ring for now */ 3154 if (ss != ss->sc->ss) 3155 return 0; 3156 #endif 3157 3158 ss->tx.mask = tx_ring_entries - 1; 3159 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3160 3161 3162 /* allocate the tx request copy block */ 3163 bytes = 8 + 3164 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3165 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3166 /* ensure req_list entries are aligned to 8 bytes */ 3167 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3168 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3169 3170 /* allocate the tx busdma segment list */ 3171 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3172 ss->tx.seg_list = (bus_dma_segment_t *) 3173 kmalloc(bytes, M_DEVBUF, M_WAITOK); 3174 if (ss->tx.seg_list == NULL) 3175 return err; 3176 3177 /* allocate the tx host info ring */ 3178 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3179 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3180 3181 /* allocate the tx busdma resources */ 3182 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3183 1, /* alignment */ 3184 sc->tx_boundary, /* boundary */ 3185 BUS_SPACE_MAXADDR, /* low */ 3186 BUS_SPACE_MAXADDR, /* high */ 3187 NULL, NULL, /* filter */ 3188 65536 + 256, /* maxsize */ 3189 ss->tx.max_desc - 2, /* num segs */ 3190 sc->tx_boundary, /* maxsegsz */ 3191 BUS_DMA_ALLOCNOW, /* flags */ 3192 &ss->tx.dmat); /* tag */ 3193 3194 if (err != 0) { 3195 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3196 err); 3197 return err; 3198 } 3199 3200 /* now use these tags to setup dmamaps for each slot 3201 in the ring */ 3202 for (i = 0; i <= ss->tx.mask; i++) { 3203 err = bus_dmamap_create(ss->tx.dmat, 0, 3204 &ss->tx.info[i].map); 3205 if (err != 0) { 3206 device_printf(sc->dev, "Err %d tx dmamap\n", 3207 err); 3208 return err; 3209 } 3210 } 3211 return 0; 3212 3213 } 3214 3215 static int 3216 mxge_alloc_rings(mxge_softc_t *sc) 3217 { 3218 mxge_cmd_t cmd; 3219 int tx_ring_size; 3220 int tx_ring_entries, rx_ring_entries; 3221 int err, slice; 3222 3223 /* get ring sizes */ 3224 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3225 tx_ring_size = cmd.data0; 3226 if (err != 0) { 3227 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3228 goto abort; 3229 } 3230 3231 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3232 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3233 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3234 ifq_set_ready(&sc->ifp->if_snd); 3235 3236 for (slice = 0; slice < sc->num_slices; slice++) { 3237 err = mxge_alloc_slice_rings(&sc->ss[slice], 3238 rx_ring_entries, 3239 tx_ring_entries); 3240 if (err != 0) 3241 goto abort; 3242 } 3243 return 0; 3244 3245 abort: 3246 mxge_free_rings(sc); 3247 return err; 3248 3249 } 3250 3251 3252 static void 3253 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3254 { 3255 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3256 3257 if (bufsize < MCLBYTES) { 3258 /* easy, everything fits in a single buffer */ 3259 *big_buf_size = MCLBYTES; 3260 *cl_size = MCLBYTES; 3261 *nbufs = 1; 3262 return; 3263 } 3264 3265 if (bufsize < MJUMPAGESIZE) { 3266 /* still easy, everything still fits in a single buffer */ 3267 *big_buf_size = MJUMPAGESIZE; 3268 *cl_size = MJUMPAGESIZE; 3269 *nbufs = 1; 3270 return; 3271 } 3272 #if MXGE_VIRT_JUMBOS 3273 /* now we need to use virtually contiguous buffers */ 3274 *cl_size = MJUM9BYTES; 3275 *big_buf_size = 4096; 3276 *nbufs = mtu / 4096 + 1; 3277 /* needs to be a power of two, so round up */ 3278 if (*nbufs == 3) 3279 *nbufs = 4; 3280 #else 3281 *cl_size = MJUM9BYTES; 3282 *big_buf_size = MJUM9BYTES; 3283 *nbufs = 1; 3284 #endif 3285 } 3286 3287 static int 3288 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3289 { 3290 mxge_softc_t *sc; 3291 mxge_cmd_t cmd; 3292 bus_dmamap_t map; 3293 struct lro_entry *lro_entry; 3294 int err, i, slice; 3295 3296 3297 sc = ss->sc; 3298 slice = ss - sc->ss; 3299 3300 SLIST_INIT(&ss->lro_free); 3301 SLIST_INIT(&ss->lro_active); 3302 3303 for (i = 0; i < sc->lro_cnt; i++) { 3304 lro_entry = (struct lro_entry *) 3305 kmalloc(sizeof (*lro_entry), M_DEVBUF, 3306 M_NOWAIT | M_ZERO); 3307 if (lro_entry == NULL) { 3308 sc->lro_cnt = i; 3309 break; 3310 } 3311 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3312 } 3313 /* get the lanai pointers to the send and receive rings */ 3314 3315 err = 0; 3316 #ifndef IFNET_BUF_RING 3317 /* We currently only send from the first slice */ 3318 if (slice == 0) { 3319 #endif 3320 cmd.data0 = slice; 3321 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3322 ss->tx.lanai = 3323 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3324 ss->tx.send_go = (volatile uint32_t *) 3325 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3326 ss->tx.send_stop = (volatile uint32_t *) 3327 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3328 #ifndef IFNET_BUF_RING 3329 } 3330 #endif 3331 cmd.data0 = slice; 3332 err |= mxge_send_cmd(sc, 3333 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3334 ss->rx_small.lanai = 3335 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3336 cmd.data0 = slice; 3337 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3338 ss->rx_big.lanai = 3339 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3340 3341 if (err != 0) { 3342 device_printf(sc->dev, 3343 "failed to get ring sizes or locations\n"); 3344 return EIO; 3345 } 3346 3347 /* stock receive rings */ 3348 for (i = 0; i <= ss->rx_small.mask; i++) { 3349 map = ss->rx_small.info[i].map; 3350 err = mxge_get_buf_small(ss, map, i); 3351 if (err) { 3352 device_printf(sc->dev, "alloced %d/%d smalls\n", 3353 i, ss->rx_small.mask + 1); 3354 return ENOMEM; 3355 } 3356 } 3357 for (i = 0; i <= ss->rx_big.mask; i++) { 3358 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3359 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3360 } 3361 ss->rx_big.nbufs = nbufs; 3362 ss->rx_big.cl_size = cl_size; 3363 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3364 EVL_ENCAPLEN + MXGEFW_PAD; 3365 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3366 map = ss->rx_big.info[i].map; 3367 err = mxge_get_buf_big(ss, map, i); 3368 if (err) { 3369 device_printf(sc->dev, "alloced %d/%d bigs\n", 3370 i, ss->rx_big.mask + 1); 3371 return ENOMEM; 3372 } 3373 } 3374 return 0; 3375 } 3376 3377 static int 3378 mxge_open(mxge_softc_t *sc) 3379 { 3380 mxge_cmd_t cmd; 3381 int err, big_bytes, nbufs, slice, cl_size, i; 3382 bus_addr_t bus; 3383 volatile uint8_t *itable; 3384 struct mxge_slice_state *ss; 3385 3386 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3387 /* Copy the MAC address in case it was overridden */ 3388 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3389 3390 err = mxge_reset(sc, 1); 3391 if (err != 0) { 3392 device_printf(sc->dev, "failed to reset\n"); 3393 return EIO; 3394 } 3395 3396 if (sc->num_slices > 1) { 3397 /* setup the indirection table */ 3398 cmd.data0 = sc->num_slices; 3399 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3400 &cmd); 3401 3402 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3403 &cmd); 3404 if (err != 0) { 3405 device_printf(sc->dev, 3406 "failed to setup rss tables\n"); 3407 return err; 3408 } 3409 3410 /* just enable an identity mapping */ 3411 itable = sc->sram + cmd.data0; 3412 for (i = 0; i < sc->num_slices; i++) 3413 itable[i] = (uint8_t)i; 3414 3415 cmd.data0 = 1; 3416 cmd.data1 = mxge_rss_hash_type; 3417 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3418 if (err != 0) { 3419 device_printf(sc->dev, "failed to enable slices\n"); 3420 return err; 3421 } 3422 } 3423 3424 3425 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3426 3427 cmd.data0 = nbufs; 3428 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3429 &cmd); 3430 /* error is only meaningful if we're trying to set 3431 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3432 if (err && nbufs > 1) { 3433 device_printf(sc->dev, 3434 "Failed to set alway-use-n to %d\n", 3435 nbufs); 3436 return EIO; 3437 } 3438 /* Give the firmware the mtu and the big and small buffer 3439 sizes. The firmware wants the big buf size to be a power 3440 of two. Luckily, FreeBSD's clusters are powers of two */ 3441 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3442 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3443 cmd.data0 = MHLEN - MXGEFW_PAD; 3444 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3445 &cmd); 3446 cmd.data0 = big_bytes; 3447 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3448 3449 if (err != 0) { 3450 device_printf(sc->dev, "failed to setup params\n"); 3451 goto abort; 3452 } 3453 3454 /* Now give him the pointer to the stats block */ 3455 for (slice = 0; 3456 #ifdef IFNET_BUF_RING 3457 slice < sc->num_slices; 3458 #else 3459 slice < 1; 3460 #endif 3461 slice++) { 3462 ss = &sc->ss[slice]; 3463 cmd.data0 = 3464 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3465 cmd.data1 = 3466 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3467 cmd.data2 = sizeof(struct mcp_irq_data); 3468 cmd.data2 |= (slice << 16); 3469 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3470 } 3471 3472 if (err != 0) { 3473 bus = sc->ss->fw_stats_dma.bus_addr; 3474 bus += offsetof(struct mcp_irq_data, send_done_count); 3475 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3476 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3477 err = mxge_send_cmd(sc, 3478 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3479 &cmd); 3480 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3481 sc->fw_multicast_support = 0; 3482 } else { 3483 sc->fw_multicast_support = 1; 3484 } 3485 3486 if (err != 0) { 3487 device_printf(sc->dev, "failed to setup params\n"); 3488 goto abort; 3489 } 3490 3491 for (slice = 0; slice < sc->num_slices; slice++) { 3492 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3493 if (err != 0) { 3494 device_printf(sc->dev, "couldn't open slice %d\n", 3495 slice); 3496 goto abort; 3497 } 3498 } 3499 3500 /* Finally, start the firmware running */ 3501 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3502 if (err) { 3503 device_printf(sc->dev, "Couldn't bring up link\n"); 3504 goto abort; 3505 } 3506 sc->ifp->if_flags |= IFF_RUNNING; 3507 ifq_clr_oactive(&sc->ifp->if_snd); 3508 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3509 3510 return 0; 3511 3512 3513 abort: 3514 mxge_free_mbufs(sc); 3515 3516 return err; 3517 } 3518 3519 static int 3520 mxge_close(mxge_softc_t *sc) 3521 { 3522 mxge_cmd_t cmd; 3523 int err, old_down_cnt; 3524 #ifdef IFNET_BUF_RING 3525 struct mxge_slice_state *ss; 3526 int slice; 3527 #endif 3528 3529 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3530 callout_stop(&sc->co_hdl); 3531 #ifdef IFNET_BUF_RING 3532 for (slice = 0; slice < sc->num_slices; slice++) { 3533 ss = &sc->ss[slice]; 3534 ss->if_flags &= ~IFF_RUNNING; 3535 } 3536 #endif 3537 sc->ifp->if_flags &= ~IFF_RUNNING; 3538 old_down_cnt = sc->down_cnt; 3539 wmb(); 3540 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3541 if (err) { 3542 device_printf(sc->dev, "Couldn't bring down link\n"); 3543 } 3544 if (old_down_cnt == sc->down_cnt) { 3545 /* wait for down irq */ 3546 DELAY(10 * sc->intr_coal_delay); 3547 } 3548 wmb(); 3549 if (old_down_cnt == sc->down_cnt) { 3550 device_printf(sc->dev, "never got down irq\n"); 3551 } 3552 3553 mxge_free_mbufs(sc); 3554 3555 return 0; 3556 } 3557 3558 static void 3559 mxge_setup_cfg_space(mxge_softc_t *sc) 3560 { 3561 device_t dev = sc->dev; 3562 int reg; 3563 uint16_t cmd, lnk, pectl; 3564 3565 /* find the PCIe link width and set max read request to 4KB*/ 3566 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3567 lnk = pci_read_config(dev, reg + 0x12, 2); 3568 sc->link_width = (lnk >> 4) & 0x3f; 3569 3570 pectl = pci_read_config(dev, reg + 0x8, 2); 3571 pectl = (pectl & ~0x7000) | (5 << 12); 3572 pci_write_config(dev, reg + 0x8, pectl, 2); 3573 } 3574 3575 /* Enable DMA and Memory space access */ 3576 pci_enable_busmaster(dev); 3577 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3578 cmd |= PCIM_CMD_MEMEN; 3579 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3580 } 3581 3582 static uint32_t 3583 mxge_read_reboot(mxge_softc_t *sc) 3584 { 3585 device_t dev = sc->dev; 3586 uint32_t vs; 3587 3588 /* find the vendor specific offset */ 3589 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3590 device_printf(sc->dev, 3591 "could not find vendor specific offset\n"); 3592 return (uint32_t)-1; 3593 } 3594 /* enable read32 mode */ 3595 pci_write_config(dev, vs + 0x10, 0x3, 1); 3596 /* tell NIC which register to read */ 3597 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3598 return (pci_read_config(dev, vs + 0x14, 4)); 3599 } 3600 3601 static int 3602 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3603 { 3604 struct pci_devinfo *dinfo; 3605 mxge_tx_ring_t *tx; 3606 int err; 3607 uint32_t reboot; 3608 uint16_t cmd; 3609 3610 err = ENXIO; 3611 3612 device_printf(sc->dev, "Watchdog reset!\n"); 3613 3614 /* 3615 * check to see if the NIC rebooted. If it did, then all of 3616 * PCI config space has been reset, and things like the 3617 * busmaster bit will be zero. If this is the case, then we 3618 * must restore PCI config space before the NIC can be used 3619 * again 3620 */ 3621 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3622 if (cmd == 0xffff) { 3623 /* 3624 * maybe the watchdog caught the NIC rebooting; wait 3625 * up to 100ms for it to finish. If it does not come 3626 * back, then give up 3627 */ 3628 DELAY(1000*100); 3629 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3630 if (cmd == 0xffff) { 3631 device_printf(sc->dev, "NIC disappeared!\n"); 3632 return (err); 3633 } 3634 } 3635 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3636 /* print the reboot status */ 3637 reboot = mxge_read_reboot(sc); 3638 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3639 reboot); 3640 /* restore PCI configuration space */ 3641 dinfo = device_get_ivars(sc->dev); 3642 pci_cfg_restore(sc->dev, dinfo); 3643 3644 /* and redo any changes we made to our config space */ 3645 mxge_setup_cfg_space(sc); 3646 3647 if (sc->ifp->if_flags & IFF_RUNNING) { 3648 mxge_close(sc); 3649 err = mxge_open(sc); 3650 } 3651 } else { 3652 tx = &sc->ss[slice].tx; 3653 device_printf(sc->dev, 3654 "NIC did not reboot, slice %d ring state:\n", 3655 slice); 3656 device_printf(sc->dev, 3657 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3658 tx->req, tx->done, tx->queue_active); 3659 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3660 tx->activate, tx->deactivate); 3661 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3662 tx->pkt_done, 3663 be32toh(sc->ss->fw_stats->send_done_count)); 3664 device_printf(sc->dev, "not resetting\n"); 3665 } 3666 return (err); 3667 } 3668 3669 static int 3670 mxge_watchdog(mxge_softc_t *sc) 3671 { 3672 mxge_tx_ring_t *tx; 3673 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3674 int i, err = 0; 3675 3676 /* see if we have outstanding transmits, which 3677 have been pending for more than mxge_ticks */ 3678 for (i = 0; 3679 #ifdef IFNET_BUF_RING 3680 (i < sc->num_slices) && (err == 0); 3681 #else 3682 (i < 1) && (err == 0); 3683 #endif 3684 i++) { 3685 tx = &sc->ss[i].tx; 3686 if (tx->req != tx->done && 3687 tx->watchdog_req != tx->watchdog_done && 3688 tx->done == tx->watchdog_done) { 3689 /* check for pause blocking before resetting */ 3690 if (tx->watchdog_rx_pause == rx_pause) 3691 err = mxge_watchdog_reset(sc, i); 3692 else 3693 device_printf(sc->dev, "Flow control blocking " 3694 "xmits, check link partner\n"); 3695 } 3696 3697 tx->watchdog_req = tx->req; 3698 tx->watchdog_done = tx->done; 3699 tx->watchdog_rx_pause = rx_pause; 3700 } 3701 3702 if (sc->need_media_probe) 3703 mxge_media_probe(sc); 3704 return (err); 3705 } 3706 3707 static void 3708 mxge_update_stats(mxge_softc_t *sc) 3709 { 3710 struct mxge_slice_state *ss; 3711 u_long ipackets = 0; 3712 u_long opackets = 0; 3713 #ifdef IFNET_BUF_RING 3714 u_long obytes = 0; 3715 u_long omcasts = 0; 3716 u_long odrops = 0; 3717 #endif 3718 u_long oerrors = 0; 3719 int slice; 3720 3721 for (slice = 0; slice < sc->num_slices; slice++) { 3722 ss = &sc->ss[slice]; 3723 ipackets += ss->ipackets; 3724 opackets += ss->opackets; 3725 #ifdef IFNET_BUF_RING 3726 obytes += ss->obytes; 3727 omcasts += ss->omcasts; 3728 odrops += ss->tx.br->br_drops; 3729 #endif 3730 oerrors += ss->oerrors; 3731 } 3732 IFNET_STAT_SET(sc->ifp, ipackets, ipackets); 3733 IFNET_STAT_SET(sc->ifp, opackets, opackets); 3734 #ifdef IFNET_BUF_RING 3735 sc->ifp->if_obytes = obytes; 3736 sc->ifp->if_omcasts = omcasts; 3737 sc->ifp->if_snd.ifq_drops = odrops; 3738 #endif 3739 IFNET_STAT_SET(sc->ifp, oerrors, oerrors); 3740 } 3741 3742 static void 3743 mxge_tick(void *arg) 3744 { 3745 mxge_softc_t *sc = arg; 3746 int err = 0; 3747 3748 lwkt_serialize_enter(sc->ifp->if_serializer); 3749 /* aggregate stats from different slices */ 3750 mxge_update_stats(sc); 3751 if (!sc->watchdog_countdown) { 3752 err = mxge_watchdog(sc); 3753 sc->watchdog_countdown = 4; 3754 } 3755 sc->watchdog_countdown--; 3756 if (err == 0) 3757 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3758 lwkt_serialize_exit(sc->ifp->if_serializer); 3759 } 3760 3761 static int 3762 mxge_media_change(struct ifnet *ifp) 3763 { 3764 return EINVAL; 3765 } 3766 3767 static int 3768 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3769 { 3770 struct ifnet *ifp = sc->ifp; 3771 int real_mtu, old_mtu; 3772 int err = 0; 3773 3774 if (ifp->if_serializer) 3775 ASSERT_SERIALIZED(ifp->if_serializer); 3776 3777 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3778 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3779 return EINVAL; 3780 old_mtu = ifp->if_mtu; 3781 ifp->if_mtu = mtu; 3782 if (ifp->if_flags & IFF_RUNNING) { 3783 mxge_close(sc); 3784 err = mxge_open(sc); 3785 if (err != 0) { 3786 ifp->if_mtu = old_mtu; 3787 mxge_close(sc); 3788 (void) mxge_open(sc); 3789 } 3790 } 3791 return err; 3792 } 3793 3794 static void 3795 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3796 { 3797 mxge_softc_t *sc = ifp->if_softc; 3798 3799 3800 if (sc == NULL) 3801 return; 3802 ifmr->ifm_status = IFM_AVALID; 3803 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3804 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3805 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3806 } 3807 3808 static int 3809 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 3810 { 3811 mxge_softc_t *sc = ifp->if_softc; 3812 struct ifreq *ifr = (struct ifreq *)data; 3813 int err, mask; 3814 3815 (void)cr; 3816 err = 0; 3817 ASSERT_SERIALIZED(ifp->if_serializer); 3818 switch (command) { 3819 case SIOCSIFADDR: 3820 case SIOCGIFADDR: 3821 err = ether_ioctl(ifp, command, data); 3822 break; 3823 3824 case SIOCSIFMTU: 3825 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3826 break; 3827 3828 case SIOCSIFFLAGS: 3829 if (sc->dying) { 3830 return EINVAL; 3831 } 3832 if (ifp->if_flags & IFF_UP) { 3833 if (!(ifp->if_flags & IFF_RUNNING)) { 3834 err = mxge_open(sc); 3835 } else { 3836 /* take care of promis can allmulti 3837 flag chages */ 3838 mxge_change_promisc(sc, 3839 ifp->if_flags & IFF_PROMISC); 3840 mxge_set_multicast_list(sc); 3841 } 3842 } else { 3843 if (ifp->if_flags & IFF_RUNNING) { 3844 mxge_close(sc); 3845 } 3846 } 3847 break; 3848 3849 case SIOCADDMULTI: 3850 case SIOCDELMULTI: 3851 mxge_set_multicast_list(sc); 3852 break; 3853 3854 case SIOCSIFCAP: 3855 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3856 if (mask & IFCAP_TXCSUM) { 3857 if (IFCAP_TXCSUM & ifp->if_capenable) { 3858 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3859 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3860 | CSUM_TSO); 3861 } else { 3862 ifp->if_capenable |= IFCAP_TXCSUM; 3863 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3864 } 3865 } else if (mask & IFCAP_RXCSUM) { 3866 if (IFCAP_RXCSUM & ifp->if_capenable) { 3867 ifp->if_capenable &= ~IFCAP_RXCSUM; 3868 sc->csum_flag = 0; 3869 } else { 3870 ifp->if_capenable |= IFCAP_RXCSUM; 3871 sc->csum_flag = 1; 3872 } 3873 } 3874 if (mask & IFCAP_TSO4) { 3875 if (IFCAP_TSO4 & ifp->if_capenable) { 3876 ifp->if_capenable &= ~IFCAP_TSO4; 3877 ifp->if_hwassist &= ~CSUM_TSO; 3878 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3879 ifp->if_capenable |= IFCAP_TSO4; 3880 ifp->if_hwassist |= CSUM_TSO; 3881 } else { 3882 kprintf("mxge requires tx checksum offload" 3883 " be enabled to use TSO\n"); 3884 err = EINVAL; 3885 } 3886 } 3887 if (mask & IFCAP_LRO) { 3888 if (IFCAP_LRO & ifp->if_capenable) 3889 err = mxge_change_lro_locked(sc, 0); 3890 else 3891 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3892 } 3893 if (mask & IFCAP_VLAN_HWTAGGING) 3894 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3895 VLAN_CAPABILITIES(ifp); 3896 3897 break; 3898 3899 case SIOCGIFMEDIA: 3900 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3901 &sc->media, command); 3902 break; 3903 3904 default: 3905 err = ENOTTY; 3906 } 3907 return err; 3908 } 3909 3910 static void 3911 mxge_fetch_tunables(mxge_softc_t *sc) 3912 { 3913 3914 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3915 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3916 &mxge_flow_control); 3917 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3918 &mxge_intr_coal_delay); 3919 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3920 &mxge_nvidia_ecrc_enable); 3921 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3922 &mxge_force_firmware); 3923 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3924 &mxge_deassert_wait); 3925 TUNABLE_INT_FETCH("hw.mxge.verbose", 3926 &mxge_verbose); 3927 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3928 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3929 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3930 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3931 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 3932 if (sc->lro_cnt != 0) 3933 mxge_lro_cnt = sc->lro_cnt; 3934 3935 if (bootverbose) 3936 mxge_verbose = 1; 3937 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3938 mxge_intr_coal_delay = 30; 3939 if (mxge_ticks == 0) 3940 mxge_ticks = hz / 2; 3941 sc->pause = mxge_flow_control; 3942 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3943 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 3944 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 3945 } 3946 if (mxge_initial_mtu > ETHERMTU_JUMBO || 3947 mxge_initial_mtu < ETHER_MIN_LEN) 3948 mxge_initial_mtu = ETHERMTU_JUMBO; 3949 } 3950 3951 3952 static void 3953 mxge_free_slices(mxge_softc_t *sc) 3954 { 3955 struct mxge_slice_state *ss; 3956 int i; 3957 3958 3959 if (sc->ss == NULL) 3960 return; 3961 3962 for (i = 0; i < sc->num_slices; i++) { 3963 ss = &sc->ss[i]; 3964 if (ss->fw_stats != NULL) { 3965 mxge_dma_free(&ss->fw_stats_dma); 3966 ss->fw_stats = NULL; 3967 #ifdef IFNET_BUF_RING 3968 if (ss->tx.br != NULL) { 3969 drbr_free(ss->tx.br, M_DEVBUF); 3970 ss->tx.br = NULL; 3971 } 3972 #endif 3973 } 3974 if (ss->rx_done.entry != NULL) { 3975 mxge_dma_free(&ss->rx_done.dma); 3976 ss->rx_done.entry = NULL; 3977 } 3978 } 3979 kfree(sc->ss, M_DEVBUF); 3980 sc->ss = NULL; 3981 } 3982 3983 static int 3984 mxge_alloc_slices(mxge_softc_t *sc) 3985 { 3986 mxge_cmd_t cmd; 3987 struct mxge_slice_state *ss; 3988 size_t bytes; 3989 int err, i, max_intr_slots; 3990 3991 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3992 if (err != 0) { 3993 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3994 return err; 3995 } 3996 sc->rx_ring_size = cmd.data0; 3997 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3998 3999 bytes = sizeof (*sc->ss) * sc->num_slices; 4000 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4001 if (sc->ss == NULL) 4002 return (ENOMEM); 4003 for (i = 0; i < sc->num_slices; i++) { 4004 ss = &sc->ss[i]; 4005 4006 ss->sc = sc; 4007 4008 /* allocate per-slice rx interrupt queues */ 4009 4010 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4011 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4012 if (err != 0) 4013 goto abort; 4014 ss->rx_done.entry = ss->rx_done.dma.addr; 4015 bzero(ss->rx_done.entry, bytes); 4016 4017 /* 4018 * allocate the per-slice firmware stats; stats 4019 * (including tx) are used used only on the first 4020 * slice for now 4021 */ 4022 #ifndef IFNET_BUF_RING 4023 if (i > 0) 4024 continue; 4025 #endif 4026 4027 bytes = sizeof (*ss->fw_stats); 4028 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4029 sizeof (*ss->fw_stats), 64); 4030 if (err != 0) 4031 goto abort; 4032 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4033 #ifdef IFNET_BUF_RING 4034 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4035 &ss->tx.lock); 4036 #endif 4037 } 4038 4039 return (0); 4040 4041 abort: 4042 mxge_free_slices(sc); 4043 return (ENOMEM); 4044 } 4045 4046 static void 4047 mxge_slice_probe(mxge_softc_t *sc) 4048 { 4049 mxge_cmd_t cmd; 4050 char *old_fw; 4051 int msix_cnt, status, max_intr_slots; 4052 4053 sc->num_slices = 1; 4054 /* 4055 * don't enable multiple slices if they are not enabled, 4056 * or if this is not an SMP system 4057 */ 4058 4059 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2) 4060 return; 4061 4062 /* see how many MSI-X interrupts are available */ 4063 msix_cnt = pci_msix_count(sc->dev); 4064 if (msix_cnt < 2) 4065 return; 4066 4067 /* now load the slice aware firmware see what it supports */ 4068 old_fw = sc->fw_name; 4069 if (old_fw == mxge_fw_aligned) 4070 sc->fw_name = mxge_fw_rss_aligned; 4071 else 4072 sc->fw_name = mxge_fw_rss_unaligned; 4073 status = mxge_load_firmware(sc, 0); 4074 if (status != 0) { 4075 device_printf(sc->dev, "Falling back to a single slice\n"); 4076 return; 4077 } 4078 4079 /* try to send a reset command to the card to see if it 4080 is alive */ 4081 memset(&cmd, 0, sizeof (cmd)); 4082 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4083 if (status != 0) { 4084 device_printf(sc->dev, "failed reset\n"); 4085 goto abort_with_fw; 4086 } 4087 4088 /* get rx ring size */ 4089 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4090 if (status != 0) { 4091 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4092 goto abort_with_fw; 4093 } 4094 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4095 4096 /* tell it the size of the interrupt queues */ 4097 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4098 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4099 if (status != 0) { 4100 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4101 goto abort_with_fw; 4102 } 4103 4104 /* ask the maximum number of slices it supports */ 4105 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4106 if (status != 0) { 4107 device_printf(sc->dev, 4108 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4109 goto abort_with_fw; 4110 } 4111 sc->num_slices = cmd.data0; 4112 if (sc->num_slices > msix_cnt) 4113 sc->num_slices = msix_cnt; 4114 4115 if (mxge_max_slices == -1) { 4116 /* cap to number of CPUs in system */ 4117 if (sc->num_slices > ncpus) 4118 sc->num_slices = ncpus; 4119 } else { 4120 if (sc->num_slices > mxge_max_slices) 4121 sc->num_slices = mxge_max_slices; 4122 } 4123 /* make sure it is a power of two */ 4124 while (sc->num_slices & (sc->num_slices - 1)) 4125 sc->num_slices--; 4126 4127 if (mxge_verbose) 4128 device_printf(sc->dev, "using %d slices\n", 4129 sc->num_slices); 4130 4131 return; 4132 4133 abort_with_fw: 4134 sc->fw_name = old_fw; 4135 (void) mxge_load_firmware(sc, 0); 4136 } 4137 4138 #if 0 4139 static int 4140 mxge_add_msix_irqs(mxge_softc_t *sc) 4141 { 4142 size_t bytes; 4143 int count, err, i, rid; 4144 4145 rid = PCIR_BAR(2); 4146 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4147 &rid, RF_ACTIVE); 4148 4149 if (sc->msix_table_res == NULL) { 4150 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4151 return ENXIO; 4152 } 4153 4154 count = sc->num_slices; 4155 err = pci_alloc_msix(sc->dev, &count); 4156 if (err != 0) { 4157 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4158 "err = %d \n", sc->num_slices, err); 4159 goto abort_with_msix_table; 4160 } 4161 if (count < sc->num_slices) { 4162 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4163 count, sc->num_slices); 4164 device_printf(sc->dev, 4165 "Try setting hw.mxge.max_slices to %d\n", 4166 count); 4167 err = ENOSPC; 4168 goto abort_with_msix; 4169 } 4170 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4171 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4172 if (sc->msix_irq_res == NULL) { 4173 err = ENOMEM; 4174 goto abort_with_msix; 4175 } 4176 4177 for (i = 0; i < sc->num_slices; i++) { 4178 rid = i + 1; 4179 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4180 SYS_RES_IRQ, 4181 &rid, RF_ACTIVE); 4182 if (sc->msix_irq_res[i] == NULL) { 4183 device_printf(sc->dev, "couldn't allocate IRQ res" 4184 " for message %d\n", i); 4185 err = ENXIO; 4186 goto abort_with_res; 4187 } 4188 } 4189 4190 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4191 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4192 4193 for (i = 0; i < sc->num_slices; i++) { 4194 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4195 INTR_MPSAFE, 4196 mxge_intr, &sc->ss[i], &sc->msix_ih[i], 4197 sc->ifp->if_serializer); 4198 if (err != 0) { 4199 device_printf(sc->dev, "couldn't setup intr for " 4200 "message %d\n", i); 4201 goto abort_with_intr; 4202 } 4203 } 4204 4205 if (mxge_verbose) { 4206 device_printf(sc->dev, "using %d msix IRQs:", 4207 sc->num_slices); 4208 for (i = 0; i < sc->num_slices; i++) 4209 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4210 kprintf("\n"); 4211 } 4212 return (0); 4213 4214 abort_with_intr: 4215 for (i = 0; i < sc->num_slices; i++) { 4216 if (sc->msix_ih[i] != NULL) { 4217 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4218 sc->msix_ih[i]); 4219 sc->msix_ih[i] = NULL; 4220 } 4221 } 4222 kfree(sc->msix_ih, M_DEVBUF); 4223 4224 4225 abort_with_res: 4226 for (i = 0; i < sc->num_slices; i++) { 4227 rid = i + 1; 4228 if (sc->msix_irq_res[i] != NULL) 4229 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4230 sc->msix_irq_res[i]); 4231 sc->msix_irq_res[i] = NULL; 4232 } 4233 kfree(sc->msix_irq_res, M_DEVBUF); 4234 4235 4236 abort_with_msix: 4237 pci_release_msi(sc->dev); 4238 4239 abort_with_msix_table: 4240 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4241 sc->msix_table_res); 4242 4243 return err; 4244 } 4245 #endif 4246 4247 static int 4248 mxge_add_single_irq(mxge_softc_t *sc) 4249 { 4250 int err, rid; 4251 #ifdef OLD_MSI 4252 int count; 4253 4254 count = pci_msi_count(sc->dev); 4255 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4256 rid = 1; 4257 } else { 4258 rid = 0; 4259 sc->legacy_irq = 1; 4260 } 4261 #else 4262 rid = 0; 4263 sc->legacy_irq = 1; 4264 #endif 4265 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4266 1, RF_SHAREABLE | RF_ACTIVE); 4267 if (sc->irq_res == NULL) { 4268 device_printf(sc->dev, "could not alloc interrupt\n"); 4269 return ENXIO; 4270 } 4271 if (mxge_verbose) 4272 device_printf(sc->dev, "using %s irq %ld\n", 4273 sc->legacy_irq ? "INTx" : "MSI", 4274 rman_get_start(sc->irq_res)); 4275 err = bus_setup_intr(sc->dev, sc->irq_res, 4276 INTR_MPSAFE, 4277 mxge_intr, &sc->ss[0], &sc->ih, 4278 sc->ifp->if_serializer); 4279 if (err != 0) { 4280 bus_release_resource(sc->dev, SYS_RES_IRQ, 4281 sc->legacy_irq ? 0 : 1, sc->irq_res); 4282 if (!sc->legacy_irq) 4283 pci_release_msi(sc->dev); 4284 } 4285 return err; 4286 } 4287 4288 #if 0 4289 static void 4290 mxge_rem_msix_irqs(mxge_softc_t *sc) 4291 { 4292 int i, rid; 4293 4294 for (i = 0; i < sc->num_slices; i++) { 4295 if (sc->msix_ih[i] != NULL) { 4296 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4297 sc->msix_ih[i]); 4298 sc->msix_ih[i] = NULL; 4299 } 4300 } 4301 kfree(sc->msix_ih, M_DEVBUF); 4302 4303 for (i = 0; i < sc->num_slices; i++) { 4304 rid = i + 1; 4305 if (sc->msix_irq_res[i] != NULL) 4306 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4307 sc->msix_irq_res[i]); 4308 sc->msix_irq_res[i] = NULL; 4309 } 4310 kfree(sc->msix_irq_res, M_DEVBUF); 4311 4312 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4313 sc->msix_table_res); 4314 4315 pci_release_msi(sc->dev); 4316 return; 4317 } 4318 #endif 4319 4320 static void 4321 mxge_rem_single_irq(mxge_softc_t *sc) 4322 { 4323 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4324 bus_release_resource(sc->dev, SYS_RES_IRQ, 4325 sc->legacy_irq ? 0 : 1, sc->irq_res); 4326 if (!sc->legacy_irq) 4327 pci_release_msi(sc->dev); 4328 } 4329 4330 static void 4331 mxge_rem_irq(mxge_softc_t *sc) 4332 { 4333 #if 0 4334 if (sc->num_slices > 1) 4335 mxge_rem_msix_irqs(sc); 4336 else 4337 #endif 4338 mxge_rem_single_irq(sc); 4339 } 4340 4341 static int 4342 mxge_add_irq(mxge_softc_t *sc) 4343 { 4344 #if 0 4345 int err; 4346 4347 if (sc->num_slices > 1) 4348 err = mxge_add_msix_irqs(sc); 4349 else 4350 err = mxge_add_single_irq(sc); 4351 4352 if (0 && err == 0 && sc->num_slices > 1) { 4353 mxge_rem_msix_irqs(sc); 4354 err = mxge_add_msix_irqs(sc); 4355 } 4356 return err; 4357 #else 4358 return mxge_add_single_irq(sc); 4359 #endif 4360 } 4361 4362 4363 static int 4364 mxge_attach(device_t dev) 4365 { 4366 mxge_softc_t *sc = device_get_softc(dev); 4367 struct ifnet *ifp = &sc->arpcom.ac_if; 4368 int err, rid; 4369 4370 /* 4371 * avoid rewriting half the lines in this file to use 4372 * &sc->arpcom.ac_if instead 4373 */ 4374 sc->ifp = ifp; 4375 sc->dev = dev; 4376 mxge_fetch_tunables(sc); 4377 4378 err = bus_dma_tag_create(NULL, /* parent */ 4379 1, /* alignment */ 4380 0, /* boundary */ 4381 BUS_SPACE_MAXADDR, /* low */ 4382 BUS_SPACE_MAXADDR, /* high */ 4383 NULL, NULL, /* filter */ 4384 65536 + 256, /* maxsize */ 4385 MXGE_MAX_SEND_DESC, /* num segs */ 4386 65536, /* maxsegsize */ 4387 0, /* flags */ 4388 &sc->parent_dmat); /* tag */ 4389 4390 if (err != 0) { 4391 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4392 err); 4393 goto abort_with_nothing; 4394 } 4395 4396 sc->ifp = ifp; 4397 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4398 4399 callout_init_mp(&sc->co_hdl); 4400 4401 mxge_setup_cfg_space(sc); 4402 4403 /* Map the board into the kernel */ 4404 rid = PCIR_BARS; 4405 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4406 ~0, 1, RF_ACTIVE); 4407 if (sc->mem_res == NULL) { 4408 device_printf(dev, "could not map memory\n"); 4409 err = ENXIO; 4410 goto abort_with_nothing; 4411 } 4412 sc->sram = rman_get_virtual(sc->mem_res); 4413 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4414 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4415 device_printf(dev, "impossible memory region size %ld\n", 4416 rman_get_size(sc->mem_res)); 4417 err = ENXIO; 4418 goto abort_with_mem_res; 4419 } 4420 4421 /* make NULL terminated copy of the EEPROM strings section of 4422 lanai SRAM */ 4423 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4424 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4425 rman_get_bushandle(sc->mem_res), 4426 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4427 sc->eeprom_strings, 4428 MXGE_EEPROM_STRINGS_SIZE - 2); 4429 err = mxge_parse_strings(sc); 4430 if (err != 0) 4431 goto abort_with_mem_res; 4432 4433 /* Enable write combining for efficient use of PCIe bus */ 4434 mxge_enable_wc(sc); 4435 4436 /* Allocate the out of band dma memory */ 4437 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4438 sizeof (mxge_cmd_t), 64); 4439 if (err != 0) 4440 goto abort_with_mem_res; 4441 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4442 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4443 if (err != 0) 4444 goto abort_with_cmd_dma; 4445 4446 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4447 if (err != 0) 4448 goto abort_with_zeropad_dma; 4449 4450 /* select & load the firmware */ 4451 err = mxge_select_firmware(sc); 4452 if (err != 0) 4453 goto abort_with_dmabench; 4454 sc->intr_coal_delay = mxge_intr_coal_delay; 4455 4456 mxge_slice_probe(sc); 4457 err = mxge_alloc_slices(sc); 4458 if (err != 0) 4459 goto abort_with_dmabench; 4460 4461 err = mxge_reset(sc, 0); 4462 if (err != 0) 4463 goto abort_with_slices; 4464 4465 err = mxge_alloc_rings(sc); 4466 if (err != 0) { 4467 device_printf(sc->dev, "failed to allocate rings\n"); 4468 goto abort_with_dmabench; 4469 } 4470 4471 ifp->if_baudrate = IF_Gbps(10UL); 4472 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4473 IFCAP_VLAN_MTU; 4474 #ifdef INET 4475 ifp->if_capabilities |= IFCAP_LRO; 4476 #endif 4477 4478 #ifdef MXGE_NEW_VLAN_API 4479 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4480 #endif 4481 4482 sc->max_mtu = mxge_max_mtu(sc); 4483 if (sc->max_mtu >= 9000) 4484 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4485 else 4486 device_printf(dev, "MTU limited to %d. Install " 4487 "latest firmware for 9000 byte jumbo support\n", 4488 sc->max_mtu - ETHER_HDR_LEN); 4489 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4490 ifp->if_capenable = ifp->if_capabilities; 4491 if (sc->lro_cnt == 0) 4492 ifp->if_capenable &= ~IFCAP_LRO; 4493 sc->csum_flag = 1; 4494 ifp->if_init = mxge_init; 4495 ifp->if_softc = sc; 4496 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4497 ifp->if_ioctl = mxge_ioctl; 4498 ifp->if_start = mxge_start; 4499 /* Initialise the ifmedia structure */ 4500 ifmedia_init(&sc->media, 0, mxge_media_change, 4501 mxge_media_status); 4502 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4503 mxge_media_probe(sc); 4504 sc->dying = 0; 4505 ether_ifattach(ifp, sc->mac_addr, NULL); 4506 /* ether_ifattach sets mtu to ETHERMTU */ 4507 if (mxge_initial_mtu != ETHERMTU) { 4508 lwkt_serialize_enter(ifp->if_serializer); 4509 mxge_change_mtu(sc, mxge_initial_mtu); 4510 lwkt_serialize_exit(ifp->if_serializer); 4511 } 4512 /* must come after ether_ifattach() */ 4513 err = mxge_add_irq(sc); 4514 if (err != 0) { 4515 device_printf(sc->dev, "failed to add irq\n"); 4516 goto abort_with_rings; 4517 } 4518 4519 mxge_add_sysctls(sc); 4520 #ifdef IFNET_BUF_RING 4521 ifp->if_transmit = mxge_transmit; 4522 ifp->if_qflush = mxge_qflush; 4523 #endif 4524 return 0; 4525 4526 abort_with_rings: 4527 mxge_free_rings(sc); 4528 abort_with_slices: 4529 mxge_free_slices(sc); 4530 abort_with_dmabench: 4531 mxge_dma_free(&sc->dmabench_dma); 4532 abort_with_zeropad_dma: 4533 mxge_dma_free(&sc->zeropad_dma); 4534 abort_with_cmd_dma: 4535 mxge_dma_free(&sc->cmd_dma); 4536 abort_with_mem_res: 4537 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4538 pci_disable_busmaster(dev); 4539 bus_dma_tag_destroy(sc->parent_dmat); 4540 abort_with_nothing: 4541 return err; 4542 } 4543 4544 static int 4545 mxge_detach(device_t dev) 4546 { 4547 mxge_softc_t *sc = device_get_softc(dev); 4548 4549 lwkt_serialize_enter(sc->ifp->if_serializer); 4550 sc->dying = 1; 4551 if (sc->ifp->if_flags & IFF_RUNNING) 4552 mxge_close(sc); 4553 /* 4554 * XXX: race: the callout callback could be spinning on 4555 * the serializer and run anyway 4556 */ 4557 callout_stop(&sc->co_hdl); 4558 lwkt_serialize_exit(sc->ifp->if_serializer); 4559 4560 ether_ifdetach(sc->ifp); 4561 ifmedia_removeall(&sc->media); 4562 mxge_dummy_rdma(sc, 0); 4563 mxge_rem_sysctls(sc); 4564 mxge_rem_irq(sc); 4565 mxge_free_rings(sc); 4566 mxge_free_slices(sc); 4567 mxge_dma_free(&sc->dmabench_dma); 4568 mxge_dma_free(&sc->zeropad_dma); 4569 mxge_dma_free(&sc->cmd_dma); 4570 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4571 pci_disable_busmaster(dev); 4572 bus_dma_tag_destroy(sc->parent_dmat); 4573 return 0; 4574 } 4575 4576 static int 4577 mxge_shutdown(device_t dev) 4578 { 4579 return 0; 4580 } 4581