1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $ 29 30 ***************************************************************************/ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/linker.h> 35 #include <sys/firmware.h> 36 #include <sys/endian.h> 37 #include <sys/in_cksum.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kernel.h> 42 #include <sys/module.h> 43 #include <sys/serialize.h> 44 #include <sys/socket.h> 45 #include <sys/sysctl.h> 46 47 /* count xmits ourselves, rather than via drbr */ 48 #define NO_SLOW_STATS 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ifq_var.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/vlan/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <sys/bus.h> 68 #include <sys/rman.h> 69 70 #include <bus/pci/pcireg.h> 71 #include <bus/pci/pcivar.h> 72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 73 74 #include <vm/vm.h> /* for pmap_mapdev() */ 75 #include <vm/pmap.h> 76 77 #if defined(__i386) || defined(__x86_64) 78 #include <machine/specialreg.h> 79 #endif 80 81 #include <dev/netif/mxge/mxge_mcp.h> 82 #include <dev/netif/mxge/mcp_gen_header.h> 83 /*#define MXGE_FAKE_IFP*/ 84 #include <dev/netif/mxge/if_mxge_var.h> 85 #ifdef IFNET_BUF_RING 86 #include <sys/buf_ring.h> 87 #endif 88 89 #include "opt_inet.h" 90 91 /* tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = 30; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_verbose = 0; 98 static int mxge_lro_cnt = 8; 99 static int mxge_ticks; 100 static int mxge_max_slices = 1; 101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 102 static int mxge_always_promisc = 0; 103 /* XXX: not yet */ 104 /* static int mxge_initial_mtu = ETHERMTU_JUMBO; */ 105 static int mxge_initial_mtu = ETHERMTU; 106 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 107 static char *mxge_fw_aligned = "mxge_eth_z8e"; 108 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 109 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 110 111 static int mxge_probe(device_t dev); 112 static int mxge_attach(device_t dev); 113 static int mxge_detach(device_t dev); 114 static int mxge_shutdown(device_t dev); 115 static void mxge_intr(void *arg); 116 117 static device_method_t mxge_methods[] = 118 { 119 /* Device interface */ 120 DEVMETHOD(device_probe, mxge_probe), 121 DEVMETHOD(device_attach, mxge_attach), 122 DEVMETHOD(device_detach, mxge_detach), 123 DEVMETHOD(device_shutdown, mxge_shutdown), 124 {0, 0} 125 }; 126 127 static driver_t mxge_driver = 128 { 129 "mxge", 130 mxge_methods, 131 sizeof(mxge_softc_t), 132 }; 133 134 static devclass_t mxge_devclass; 135 136 /* Declare ourselves to be a child of the PCI bus.*/ 137 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 138 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 139 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 140 141 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 142 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 143 static int mxge_close(mxge_softc_t *sc); 144 static int mxge_open(mxge_softc_t *sc); 145 static void mxge_tick(void *arg); 146 147 /* XXX: we don't have Large Receive Offload support yet */ 148 inline int 149 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum) 150 { 151 (void)ss; 152 (void)m_head; 153 (void)csum; 154 return 1; 155 } 156 157 inline void 158 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro) 159 { 160 (void)ss; 161 (void)lro; 162 } 163 164 static int 165 mxge_probe(device_t dev) 166 { 167 int rev; 168 169 170 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 171 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 173 rev = pci_get_revid(dev); 174 switch (rev) { 175 case MXGE_PCI_REV_Z8E: 176 device_set_desc(dev, "Myri10G-PCIE-8A"); 177 break; 178 case MXGE_PCI_REV_Z8ES: 179 device_set_desc(dev, "Myri10G-PCIE-8B"); 180 break; 181 default: 182 device_set_desc(dev, "Myri10G-PCIE-8??"); 183 device_printf(dev, "Unrecognized rev %d NIC\n", 184 rev); 185 break; 186 } 187 return 0; 188 } 189 return ENXIO; 190 } 191 192 static void 193 mxge_enable_wc(mxge_softc_t *sc) 194 { 195 #if 0 196 #if defined(__i386) || defined(__x86_64) 197 vm_offset_t len; 198 int err; 199 200 sc->wc = 1; 201 len = rman_get_size(sc->mem_res); 202 err = pmap_change_attr((vm_offset_t) sc->sram, 203 len, PAT_WRITE_COMBINING); 204 if (err != 0) { 205 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 206 err); 207 sc->wc = 0; 208 } 209 #endif 210 #else 211 sc->wc = 0; /* TBD: PAT support */ 212 #endif 213 } 214 215 216 /* callback to get our DMA address */ 217 static void 218 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 219 int error) 220 { 221 if (error == 0) { 222 *(bus_addr_t *) arg = segs->ds_addr; 223 } 224 } 225 226 static int 227 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 228 bus_size_t alignment) 229 { 230 int err; 231 device_t dev = sc->dev; 232 bus_size_t boundary, maxsegsize; 233 234 if (bytes > 4096 && alignment == 4096) { 235 boundary = 0; 236 maxsegsize = bytes; 237 } else { 238 boundary = 4096; 239 maxsegsize = 4096; 240 } 241 242 /* allocate DMAable memory tags */ 243 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 244 alignment, /* alignment */ 245 boundary, /* boundary */ 246 BUS_SPACE_MAXADDR, /* low */ 247 BUS_SPACE_MAXADDR, /* high */ 248 NULL, NULL, /* filter */ 249 bytes, /* maxsize */ 250 1, /* num segs */ 251 maxsegsize, /* maxsegsize */ 252 BUS_DMA_COHERENT, /* flags */ 253 &dma->dmat); /* tag */ 254 if (err != 0) { 255 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 256 return err; 257 } 258 259 /* allocate DMAable memory & map */ 260 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 261 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 262 | BUS_DMA_ZERO), &dma->map); 263 if (err != 0) { 264 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 265 goto abort_with_dmat; 266 } 267 268 /* load the memory */ 269 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 270 mxge_dmamap_callback, 271 (void *)&dma->bus_addr, 0); 272 if (err != 0) { 273 device_printf(dev, "couldn't load map (err = %d)\n", err); 274 goto abort_with_mem; 275 } 276 return 0; 277 278 abort_with_mem: 279 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 280 abort_with_dmat: 281 (void)bus_dma_tag_destroy(dma->dmat); 282 return err; 283 } 284 285 286 static void 287 mxge_dma_free(mxge_dma_t *dma) 288 { 289 bus_dmamap_unload(dma->dmat, dma->map); 290 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 291 (void)bus_dma_tag_destroy(dma->dmat); 292 } 293 294 /* 295 * The eeprom strings on the lanaiX have the format 296 * SN=x\0 297 * MAC=x:x:x:x:x:x\0 298 * PC=text\0 299 */ 300 301 static int 302 mxge_parse_strings(mxge_softc_t *sc) 303 { 304 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 305 306 char *ptr, *limit; 307 int i, found_mac; 308 309 ptr = sc->eeprom_strings; 310 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 311 found_mac = 0; 312 while (ptr < limit && *ptr != '\0') { 313 if (memcmp(ptr, "MAC=", 4) == 0) { 314 ptr += 1; 315 sc->mac_addr_string = ptr; 316 for (i = 0; i < 6; i++) { 317 ptr += 3; 318 if ((ptr + 2) > limit) 319 goto abort; 320 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 321 found_mac = 1; 322 } 323 } else if (memcmp(ptr, "PC=", 3) == 0) { 324 ptr += 3; 325 strncpy(sc->product_code_string, ptr, 326 sizeof (sc->product_code_string) - 1); 327 } else if (memcmp(ptr, "SN=", 3) == 0) { 328 ptr += 3; 329 strncpy(sc->serial_number_string, ptr, 330 sizeof (sc->serial_number_string) - 1); 331 } 332 MXGE_NEXT_STRING(ptr); 333 } 334 335 if (found_mac) 336 return 0; 337 338 abort: 339 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 340 341 return ENXIO; 342 } 343 344 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 345 static void 346 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 347 { 348 uint32_t val; 349 unsigned long base, off; 350 char *va, *cfgptr; 351 device_t pdev, mcp55; 352 uint16_t vendor_id, device_id, word; 353 uintptr_t bus, slot, func, ivend, idev; 354 uint32_t *ptr32; 355 356 357 if (!mxge_nvidia_ecrc_enable) 358 return; 359 360 pdev = device_get_parent(device_get_parent(sc->dev)); 361 if (pdev == NULL) { 362 device_printf(sc->dev, "could not find parent?\n"); 363 return; 364 } 365 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 366 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 367 368 if (vendor_id != 0x10de) 369 return; 370 371 base = 0; 372 373 if (device_id == 0x005d) { 374 /* ck804, base address is magic */ 375 base = 0xe0000000UL; 376 } else if (device_id >= 0x0374 && device_id <= 0x378) { 377 /* mcp55, base address stored in chipset */ 378 mcp55 = pci_find_bsf(0, 0, 0); 379 if (mcp55 && 380 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 381 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 382 word = pci_read_config(mcp55, 0x90, 2); 383 base = ((unsigned long)word & 0x7ffeU) << 25; 384 } 385 } 386 if (!base) 387 return; 388 389 /* XXXX 390 Test below is commented because it is believed that doing 391 config read/write beyond 0xff will access the config space 392 for the next larger function. Uncomment this and remove 393 the hacky pmap_mapdev() way of accessing config space when 394 FreeBSD grows support for extended pcie config space access 395 */ 396 #if 0 397 /* See if we can, by some miracle, access the extended 398 config space */ 399 val = pci_read_config(pdev, 0x178, 4); 400 if (val != 0xffffffff) { 401 val |= 0x40; 402 pci_write_config(pdev, 0x178, val, 4); 403 return; 404 } 405 #endif 406 /* Rather than using normal pci config space writes, we must 407 * map the Nvidia config space ourselves. This is because on 408 * opteron/nvidia class machine the 0xe000000 mapping is 409 * handled by the nvidia chipset, that means the internal PCI 410 * device (the on-chip northbridge), or the amd-8131 bridge 411 * and things behind them are not visible by this method. 412 */ 413 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_BUS, &bus); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_SLOT, &slot); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_FUNCTION, &func); 420 BUS_READ_IVAR(device_get_parent(pdev), pdev, 421 PCI_IVAR_VENDOR, &ivend); 422 BUS_READ_IVAR(device_get_parent(pdev), pdev, 423 PCI_IVAR_DEVICE, &idev); 424 425 off = base 426 + 0x00100000UL * (unsigned long)bus 427 + 0x00001000UL * (unsigned long)(func 428 + 8 * slot); 429 430 /* map it into the kernel */ 431 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 432 433 434 if (va == NULL) { 435 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 436 return; 437 } 438 /* get a pointer to the config space mapped into the kernel */ 439 cfgptr = va + (off & PAGE_MASK); 440 441 /* make sure that we can really access it */ 442 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 443 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 444 if (! (vendor_id == ivend && device_id == idev)) { 445 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 446 vendor_id, device_id); 447 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 448 return; 449 } 450 451 ptr32 = (uint32_t*)(cfgptr + 0x178); 452 val = *ptr32; 453 454 if (val == 0xffffffff) { 455 device_printf(sc->dev, "extended mapping failed\n"); 456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 457 return; 458 } 459 *ptr32 = val | 0x40; 460 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 461 if (mxge_verbose) 462 device_printf(sc->dev, 463 "Enabled ECRC on upstream Nvidia bridge " 464 "at %d:%d:%d\n", 465 (int)bus, (int)slot, (int)func); 466 return; 467 } 468 #else 469 static void 470 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 471 { 472 device_printf(sc->dev, 473 "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 474 return; 475 } 476 #endif 477 478 479 static int 480 mxge_dma_test(mxge_softc_t *sc, int test_type) 481 { 482 mxge_cmd_t cmd; 483 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 484 int status; 485 uint32_t len; 486 char *test = " "; 487 488 489 /* Run a small DMA test. 490 * The magic multipliers to the length tell the firmware 491 * to do DMA read, write, or read+write tests. The 492 * results are returned in cmd.data0. The upper 16 493 * bits of the return is the number of transfers completed. 494 * The lower 16 bits is the time in 0.5us ticks that the 495 * transfers took to complete. 496 */ 497 498 len = sc->tx_boundary; 499 500 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 501 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 502 cmd.data2 = len * 0x10000; 503 status = mxge_send_cmd(sc, test_type, &cmd); 504 if (status != 0) { 505 test = "read"; 506 goto abort; 507 } 508 sc->read_dma = ((cmd.data0>>16) * len * 2) / 509 (cmd.data0 & 0xffff); 510 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 511 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 512 cmd.data2 = len * 0x1; 513 status = mxge_send_cmd(sc, test_type, &cmd); 514 if (status != 0) { 515 test = "write"; 516 goto abort; 517 } 518 sc->write_dma = ((cmd.data0>>16) * len * 2) / 519 (cmd.data0 & 0xffff); 520 521 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 522 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 523 cmd.data2 = len * 0x10001; 524 status = mxge_send_cmd(sc, test_type, &cmd); 525 if (status != 0) { 526 test = "read/write"; 527 goto abort; 528 } 529 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 530 (cmd.data0 & 0xffff); 531 532 abort: 533 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 534 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 535 test, status); 536 537 return status; 538 } 539 540 /* 541 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 542 * when the PCI-E Completion packets are aligned on an 8-byte 543 * boundary. Some PCI-E chip sets always align Completion packets; on 544 * the ones that do not, the alignment can be enforced by enabling 545 * ECRC generation (if supported). 546 * 547 * When PCI-E Completion packets are not aligned, it is actually more 548 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 549 * 550 * If the driver can neither enable ECRC nor verify that it has 551 * already been enabled, then it must use a firmware image which works 552 * around unaligned completion packets (ethp_z8e.dat), and it should 553 * also ensure that it never gives the device a Read-DMA which is 554 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 555 * enabled, then the driver should use the aligned (eth_z8e.dat) 556 * firmware image, and set tx_boundary to 4KB. 557 */ 558 559 static int 560 mxge_firmware_probe(mxge_softc_t *sc) 561 { 562 device_t dev = sc->dev; 563 int reg, status; 564 uint16_t pectl; 565 566 sc->tx_boundary = 4096; 567 /* 568 * Verify the max read request size was set to 4KB 569 * before trying the test with 4KB. 570 */ 571 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 572 pectl = pci_read_config(dev, reg + 0x8, 2); 573 if ((pectl & (5 << 12)) != (5 << 12)) { 574 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 575 pectl); 576 sc->tx_boundary = 2048; 577 } 578 } 579 580 /* 581 * load the optimized firmware (which assumes aligned PCIe 582 * completions) in order to see if it works on this host. 583 */ 584 sc->fw_name = mxge_fw_aligned; 585 status = mxge_load_firmware(sc, 1); 586 if (status != 0) { 587 return status; 588 } 589 590 /* 591 * Enable ECRC if possible 592 */ 593 mxge_enable_nvidia_ecrc(sc); 594 595 /* 596 * Run a DMA test which watches for unaligned completions and 597 * aborts on the first one seen. 598 */ 599 600 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 601 if (status == 0) 602 return 0; /* keep the aligned firmware */ 603 604 if (status != E2BIG) 605 device_printf(dev, "DMA test failed: %d\n", status); 606 if (status == ENOSYS) 607 device_printf(dev, "Falling back to ethp! " 608 "Please install up to date fw\n"); 609 return status; 610 } 611 612 static int 613 mxge_select_firmware(mxge_softc_t *sc) 614 { 615 int aligned = 0; 616 617 618 if (mxge_force_firmware != 0) { 619 if (mxge_force_firmware == 1) 620 aligned = 1; 621 else 622 aligned = 0; 623 if (mxge_verbose) 624 device_printf(sc->dev, 625 "Assuming %s completions (forced)\n", 626 aligned ? "aligned" : "unaligned"); 627 goto abort; 628 } 629 630 /* if the PCIe link width is 4 or less, we can use the aligned 631 firmware and skip any checks */ 632 if (sc->link_width != 0 && sc->link_width <= 4) { 633 device_printf(sc->dev, 634 "PCIe x%d Link, expect reduced performance\n", 635 sc->link_width); 636 aligned = 1; 637 goto abort; 638 } 639 640 if (0 == mxge_firmware_probe(sc)) 641 return 0; 642 643 abort: 644 if (aligned) { 645 sc->fw_name = mxge_fw_aligned; 646 sc->tx_boundary = 4096; 647 } else { 648 sc->fw_name = mxge_fw_unaligned; 649 sc->tx_boundary = 2048; 650 } 651 return (mxge_load_firmware(sc, 0)); 652 } 653 654 union qualhack 655 { 656 const char *ro_char; 657 char *rw_char; 658 }; 659 660 static int 661 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 662 { 663 664 665 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 666 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 667 be32toh(hdr->mcp_type)); 668 return EIO; 669 } 670 671 /* save firmware version for sysctl */ 672 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 673 if (mxge_verbose) 674 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 675 676 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 677 &sc->fw_ver_minor, &sc->fw_ver_tiny); 678 679 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 680 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 681 device_printf(sc->dev, "Found firmware version %s\n", 682 sc->fw_version); 683 device_printf(sc->dev, "Driver needs %d.%d\n", 684 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 685 return EINVAL; 686 } 687 return 0; 688 689 } 690 691 static void * 692 z_alloc(void *nil, u_int items, u_int size) 693 { 694 void *ptr; 695 696 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT); 697 return ptr; 698 } 699 700 static void 701 z_free(void *nil, void *ptr) 702 { 703 kfree(ptr, M_TEMP); 704 } 705 706 707 static int 708 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 709 { 710 z_stream zs; 711 char *inflate_buffer; 712 const struct firmware *fw; 713 const mcp_gen_header_t *hdr; 714 unsigned hdr_offset; 715 int status; 716 unsigned int i; 717 char dummy; 718 size_t fw_len; 719 720 fw = firmware_get(sc->fw_name); 721 if (fw == NULL) { 722 device_printf(sc->dev, "Could not find firmware image %s\n", 723 sc->fw_name); 724 return ENOENT; 725 } 726 727 728 729 /* setup zlib and decompress f/w */ 730 bzero(&zs, sizeof (zs)); 731 zs.zalloc = z_alloc; 732 zs.zfree = z_free; 733 status = inflateInit(&zs); 734 if (status != Z_OK) { 735 status = EIO; 736 goto abort_with_fw; 737 } 738 739 /* the uncompressed size is stored as the firmware version, 740 which would otherwise go unused */ 741 fw_len = (size_t) fw->version; 742 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT); 743 if (inflate_buffer == NULL) 744 goto abort_with_zs; 745 zs.avail_in = fw->datasize; 746 zs.next_in = __DECONST(char *, fw->data); 747 zs.avail_out = fw_len; 748 zs.next_out = inflate_buffer; 749 status = inflate(&zs, Z_FINISH); 750 if (status != Z_STREAM_END) { 751 device_printf(sc->dev, "zlib %d\n", status); 752 status = EIO; 753 goto abort_with_buffer; 754 } 755 756 /* check id */ 757 hdr_offset = htobe32(*(const uint32_t *) 758 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 759 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 760 device_printf(sc->dev, "Bad firmware file"); 761 status = EIO; 762 goto abort_with_buffer; 763 } 764 hdr = (const void*)(inflate_buffer + hdr_offset); 765 766 status = mxge_validate_firmware(sc, hdr); 767 if (status != 0) 768 goto abort_with_buffer; 769 770 /* Copy the inflated firmware to NIC SRAM. */ 771 for (i = 0; i < fw_len; i += 256) { 772 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 773 inflate_buffer + i, 774 min(256U, (unsigned)(fw_len - i))); 775 wmb(); 776 dummy = *sc->sram; 777 wmb(); 778 } 779 780 *limit = fw_len; 781 status = 0; 782 abort_with_buffer: 783 kfree(inflate_buffer, M_TEMP); 784 abort_with_zs: 785 inflateEnd(&zs); 786 abort_with_fw: 787 firmware_put(fw, FIRMWARE_UNLOAD); 788 return status; 789 } 790 791 /* 792 * Enable or disable periodic RDMAs from the host to make certain 793 * chipsets resend dropped PCIe messages 794 */ 795 796 static void 797 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 798 { 799 char buf_bytes[72]; 800 volatile uint32_t *confirm; 801 volatile char *submit; 802 uint32_t *buf, dma_low, dma_high; 803 int i; 804 805 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 806 807 /* clear confirmation addr */ 808 confirm = (volatile uint32_t *)sc->cmd; 809 *confirm = 0; 810 wmb(); 811 812 /* send an rdma command to the PCIe engine, and wait for the 813 response in the confirmation address. The firmware should 814 write a -1 there to indicate it is alive and well 815 */ 816 817 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 818 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 819 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 820 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 821 buf[2] = htobe32(0xffffffff); /* confirm data */ 822 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 823 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 824 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 825 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 826 buf[5] = htobe32(enable); /* enable? */ 827 828 829 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 830 831 mxge_pio_copy(submit, buf, 64); 832 wmb(); 833 DELAY(1000); 834 wmb(); 835 i = 0; 836 while (*confirm != 0xffffffff && i < 20) { 837 DELAY(1000); 838 i++; 839 } 840 if (*confirm != 0xffffffff) { 841 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 842 (enable ? "enable" : "disable"), confirm, 843 *confirm); 844 } 845 return; 846 } 847 848 static int 849 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 850 { 851 mcp_cmd_t *buf; 852 char buf_bytes[sizeof(*buf) + 8]; 853 volatile mcp_cmd_response_t *response = sc->cmd; 854 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 855 uint32_t dma_low, dma_high; 856 int err, sleep_total = 0; 857 858 /* 859 * We may be called during attach, before if_serializer is available. 860 * This is not a fast path, just check for NULL 861 */ 862 863 if (sc->ifp->if_serializer) 864 ASSERT_SERIALIZED(sc->ifp->if_serializer); 865 866 /* ensure buf is aligned to 8 bytes */ 867 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 868 869 buf->data0 = htobe32(data->data0); 870 buf->data1 = htobe32(data->data1); 871 buf->data2 = htobe32(data->data2); 872 buf->cmd = htobe32(cmd); 873 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 874 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 875 876 buf->response_addr.low = htobe32(dma_low); 877 buf->response_addr.high = htobe32(dma_high); 878 879 880 response->result = 0xffffffff; 881 wmb(); 882 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 883 884 /* wait up to 20ms */ 885 err = EAGAIN; 886 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 887 bus_dmamap_sync(sc->cmd_dma.dmat, 888 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 889 wmb(); 890 switch (be32toh(response->result)) { 891 case 0: 892 data->data0 = be32toh(response->data); 893 err = 0; 894 break; 895 case 0xffffffff: 896 DELAY(1000); 897 break; 898 case MXGEFW_CMD_UNKNOWN: 899 err = ENOSYS; 900 break; 901 case MXGEFW_CMD_ERROR_UNALIGNED: 902 err = E2BIG; 903 break; 904 case MXGEFW_CMD_ERROR_BUSY: 905 err = EBUSY; 906 break; 907 default: 908 device_printf(sc->dev, 909 "mxge: command %d " 910 "failed, result = %d\n", 911 cmd, be32toh(response->result)); 912 err = ENXIO; 913 break; 914 } 915 if (err != EAGAIN) 916 break; 917 } 918 if (err == EAGAIN) 919 device_printf(sc->dev, "mxge: command %d timed out" 920 "result = %d\n", 921 cmd, be32toh(response->result)); 922 return err; 923 } 924 925 static int 926 mxge_adopt_running_firmware(mxge_softc_t *sc) 927 { 928 struct mcp_gen_header *hdr; 929 const size_t bytes = sizeof (struct mcp_gen_header); 930 size_t hdr_offset; 931 int status; 932 933 /* find running firmware header */ 934 hdr_offset = htobe32(*(volatile uint32_t *) 935 (sc->sram + MCP_HEADER_PTR_OFFSET)); 936 937 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 938 device_printf(sc->dev, 939 "Running firmware has bad header offset (%d)\n", 940 (int)hdr_offset); 941 return EIO; 942 } 943 944 /* copy header of running firmware from SRAM to host memory to 945 * validate firmware */ 946 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT); 947 if (hdr == NULL) { 948 device_printf(sc->dev, "could not kmalloc firmware hdr\n"); 949 return ENOMEM; 950 } 951 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 952 rman_get_bushandle(sc->mem_res), 953 hdr_offset, (char *)hdr, bytes); 954 status = mxge_validate_firmware(sc, hdr); 955 kfree(hdr, M_DEVBUF); 956 957 /* 958 * check to see if adopted firmware has bug where adopting 959 * it will cause broadcasts to be filtered unless the NIC 960 * is kept in ALLMULTI mode 961 */ 962 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 963 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 964 sc->adopted_rx_filter_bug = 1; 965 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 966 "working around rx filter bug\n", 967 sc->fw_ver_major, sc->fw_ver_minor, 968 sc->fw_ver_tiny); 969 } 970 971 return status; 972 } 973 974 975 static int 976 mxge_load_firmware(mxge_softc_t *sc, int adopt) 977 { 978 volatile uint32_t *confirm; 979 volatile char *submit; 980 char buf_bytes[72]; 981 uint32_t *buf, size, dma_low, dma_high; 982 int status, i; 983 984 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 985 986 size = sc->sram_size; 987 status = mxge_load_firmware_helper(sc, &size); 988 if (status) { 989 if (!adopt) 990 return status; 991 /* Try to use the currently running firmware, if 992 it is new enough */ 993 status = mxge_adopt_running_firmware(sc); 994 if (status) { 995 device_printf(sc->dev, 996 "failed to adopt running firmware\n"); 997 return status; 998 } 999 device_printf(sc->dev, 1000 "Successfully adopted running firmware\n"); 1001 if (sc->tx_boundary == 4096) { 1002 device_printf(sc->dev, 1003 "Using firmware currently running on NIC" 1004 ". For optimal\n"); 1005 device_printf(sc->dev, 1006 "performance consider loading optimized " 1007 "firmware\n"); 1008 } 1009 sc->fw_name = mxge_fw_unaligned; 1010 sc->tx_boundary = 2048; 1011 return 0; 1012 } 1013 /* clear confirmation addr */ 1014 confirm = (volatile uint32_t *)sc->cmd; 1015 *confirm = 0; 1016 wmb(); 1017 /* send a reload command to the bootstrap MCP, and wait for the 1018 response in the confirmation address. The firmware should 1019 write a -1 there to indicate it is alive and well 1020 */ 1021 1022 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1023 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1024 1025 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1026 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1027 buf[2] = htobe32(0xffffffff); /* confirm data */ 1028 1029 /* FIX: All newest firmware should un-protect the bottom of 1030 the sram before handoff. However, the very first interfaces 1031 do not. Therefore the handoff copy must skip the first 8 bytes 1032 */ 1033 /* where the code starts*/ 1034 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1035 buf[4] = htobe32(size - 8); /* length of code */ 1036 buf[5] = htobe32(8); /* where to copy to */ 1037 buf[6] = htobe32(0); /* where to jump to */ 1038 1039 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1040 mxge_pio_copy(submit, buf, 64); 1041 wmb(); 1042 DELAY(1000); 1043 wmb(); 1044 i = 0; 1045 while (*confirm != 0xffffffff && i < 20) { 1046 DELAY(1000*10); 1047 i++; 1048 bus_dmamap_sync(sc->cmd_dma.dmat, 1049 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1050 } 1051 if (*confirm != 0xffffffff) { 1052 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1053 confirm, *confirm); 1054 1055 return ENXIO; 1056 } 1057 return 0; 1058 } 1059 1060 static int 1061 mxge_update_mac_address(mxge_softc_t *sc) 1062 { 1063 mxge_cmd_t cmd; 1064 uint8_t *addr = sc->mac_addr; 1065 int status; 1066 1067 1068 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1069 | (addr[2] << 8) | addr[3]); 1070 1071 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1072 1073 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1074 return status; 1075 } 1076 1077 static int 1078 mxge_change_pause(mxge_softc_t *sc, int pause) 1079 { 1080 mxge_cmd_t cmd; 1081 int status; 1082 1083 if (pause) 1084 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1085 &cmd); 1086 else 1087 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1088 &cmd); 1089 1090 if (status) { 1091 device_printf(sc->dev, "Failed to set flow control mode\n"); 1092 return ENXIO; 1093 } 1094 sc->pause = pause; 1095 return 0; 1096 } 1097 1098 static void 1099 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1100 { 1101 mxge_cmd_t cmd; 1102 int status; 1103 1104 if( sc->ifp->if_serializer) 1105 ASSERT_SERIALIZED(sc->ifp->if_serializer); 1106 if (mxge_always_promisc) 1107 promisc = 1; 1108 1109 if (promisc) 1110 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1111 &cmd); 1112 else 1113 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1114 &cmd); 1115 1116 if (status) { 1117 device_printf(sc->dev, "Failed to set promisc mode\n"); 1118 } 1119 } 1120 1121 static void 1122 mxge_set_multicast_list(mxge_softc_t *sc) 1123 { 1124 mxge_cmd_t cmd; 1125 struct ifmultiaddr *ifma; 1126 struct ifnet *ifp = sc->ifp; 1127 int err; 1128 1129 if (ifp->if_serializer) 1130 ASSERT_SERIALIZED(ifp->if_serializer); 1131 1132 /* This firmware is known to not support multicast */ 1133 if (!sc->fw_multicast_support) 1134 return; 1135 1136 /* Disable multicast filtering while we play with the lists*/ 1137 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1138 if (err != 0) { 1139 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1140 " error status: %d\n", err); 1141 return; 1142 } 1143 1144 if (sc->adopted_rx_filter_bug) 1145 return; 1146 1147 if (ifp->if_flags & IFF_ALLMULTI) 1148 /* request to disable multicast filtering, so quit here */ 1149 return; 1150 1151 /* Flush all the filters */ 1152 1153 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1154 if (err != 0) { 1155 device_printf(sc->dev, 1156 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1157 ", error status: %d\n", err); 1158 return; 1159 } 1160 1161 /* Walk the multicast list, and add each address */ 1162 1163 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1164 if (ifma->ifma_addr->sa_family != AF_LINK) 1165 continue; 1166 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1167 &cmd.data0, 4); 1168 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1169 &cmd.data1, 2); 1170 cmd.data0 = htonl(cmd.data0); 1171 cmd.data1 = htonl(cmd.data1); 1172 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1173 if (err != 0) { 1174 device_printf(sc->dev, "Failed " 1175 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1176 "%d\t", err); 1177 /* abort, leaving multicast filtering off */ 1178 return; 1179 } 1180 } 1181 /* Enable multicast filtering */ 1182 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1183 if (err != 0) { 1184 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1185 ", error status: %d\n", err); 1186 } 1187 } 1188 1189 static int 1190 mxge_max_mtu(mxge_softc_t *sc) 1191 { 1192 mxge_cmd_t cmd; 1193 int status; 1194 1195 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1196 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1197 1198 /* try to set nbufs to see if it we can 1199 use virtually contiguous jumbos */ 1200 cmd.data0 = 0; 1201 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1202 &cmd); 1203 if (status == 0) 1204 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1205 1206 /* otherwise, we're limited to MJUMPAGESIZE */ 1207 return MJUMPAGESIZE - MXGEFW_PAD; 1208 } 1209 1210 static int 1211 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1212 { 1213 struct mxge_slice_state *ss; 1214 mxge_rx_done_t *rx_done; 1215 volatile uint32_t *irq_claim; 1216 mxge_cmd_t cmd; 1217 int slice, status; 1218 1219 /* try to send a reset command to the card to see if it 1220 is alive */ 1221 memset(&cmd, 0, sizeof (cmd)); 1222 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1223 if (status != 0) { 1224 device_printf(sc->dev, "failed reset\n"); 1225 return ENXIO; 1226 } 1227 1228 mxge_dummy_rdma(sc, 1); 1229 1230 1231 /* set the intrq size */ 1232 cmd.data0 = sc->rx_ring_size; 1233 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1234 1235 /* 1236 * Even though we already know how many slices are supported 1237 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1238 * has magic side effects, and must be called after a reset. 1239 * It must be called prior to calling any RSS related cmds, 1240 * including assigning an interrupt queue for anything but 1241 * slice 0. It must also be called *after* 1242 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1243 * the firmware to compute offsets. 1244 */ 1245 1246 if (sc->num_slices > 1) { 1247 /* ask the maximum number of slices it supports */ 1248 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1249 &cmd); 1250 if (status != 0) { 1251 device_printf(sc->dev, 1252 "failed to get number of slices\n"); 1253 return status; 1254 } 1255 /* 1256 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1257 * to setting up the interrupt queue DMA 1258 */ 1259 cmd.data0 = sc->num_slices; 1260 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1261 #ifdef IFNET_BUF_RING 1262 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1263 #endif 1264 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1265 &cmd); 1266 if (status != 0) { 1267 device_printf(sc->dev, 1268 "failed to set number of slices\n"); 1269 return status; 1270 } 1271 } 1272 1273 1274 if (interrupts_setup) { 1275 /* Now exchange information about interrupts */ 1276 for (slice = 0; slice < sc->num_slices; slice++) { 1277 rx_done = &sc->ss[slice].rx_done; 1278 memset(rx_done->entry, 0, sc->rx_ring_size); 1279 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1280 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1281 cmd.data2 = slice; 1282 status |= mxge_send_cmd(sc, 1283 MXGEFW_CMD_SET_INTRQ_DMA, 1284 &cmd); 1285 } 1286 } 1287 1288 status |= mxge_send_cmd(sc, 1289 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1290 1291 1292 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1293 1294 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1295 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1296 1297 1298 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1299 &cmd); 1300 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1301 if (status != 0) { 1302 device_printf(sc->dev, "failed set interrupt parameters\n"); 1303 return status; 1304 } 1305 1306 1307 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1308 1309 1310 /* run a DMA benchmark */ 1311 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1312 1313 for (slice = 0; slice < sc->num_slices; slice++) { 1314 ss = &sc->ss[slice]; 1315 1316 ss->irq_claim = irq_claim + (2 * slice); 1317 /* reset mcp/driver shared state back to 0 */ 1318 ss->rx_done.idx = 0; 1319 ss->rx_done.cnt = 0; 1320 ss->tx.req = 0; 1321 ss->tx.done = 0; 1322 ss->tx.pkt_done = 0; 1323 ss->tx.queue_active = 0; 1324 ss->tx.activate = 0; 1325 ss->tx.deactivate = 0; 1326 ss->tx.wake = 0; 1327 ss->tx.defrag = 0; 1328 ss->tx.stall = 0; 1329 ss->rx_big.cnt = 0; 1330 ss->rx_small.cnt = 0; 1331 ss->lro_bad_csum = 0; 1332 ss->lro_queued = 0; 1333 ss->lro_flushed = 0; 1334 if (ss->fw_stats != NULL) { 1335 ss->fw_stats->valid = 0; 1336 ss->fw_stats->send_done_count = 0; 1337 } 1338 } 1339 sc->rdma_tags_available = 15; 1340 status = mxge_update_mac_address(sc); 1341 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1342 mxge_change_pause(sc, sc->pause); 1343 mxge_set_multicast_list(sc); 1344 return status; 1345 } 1346 1347 static int 1348 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1349 { 1350 mxge_softc_t *sc; 1351 unsigned int intr_coal_delay; 1352 int err; 1353 1354 sc = arg1; 1355 intr_coal_delay = sc->intr_coal_delay; 1356 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1357 if (err != 0) { 1358 return err; 1359 } 1360 if (intr_coal_delay == sc->intr_coal_delay) 1361 return 0; 1362 1363 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1364 return EINVAL; 1365 1366 lwkt_serialize_enter(sc->ifp->if_serializer); 1367 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1368 sc->intr_coal_delay = intr_coal_delay; 1369 1370 lwkt_serialize_exit(sc->ifp->if_serializer); 1371 return err; 1372 } 1373 1374 static int 1375 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1376 { 1377 mxge_softc_t *sc; 1378 unsigned int enabled; 1379 int err; 1380 1381 sc = arg1; 1382 enabled = sc->pause; 1383 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1384 if (err != 0) { 1385 return err; 1386 } 1387 if (enabled == sc->pause) 1388 return 0; 1389 1390 lwkt_serialize_enter(sc->ifp->if_serializer); 1391 err = mxge_change_pause(sc, enabled); 1392 lwkt_serialize_exit(sc->ifp->if_serializer); 1393 return err; 1394 } 1395 1396 static int 1397 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1398 { 1399 struct ifnet *ifp; 1400 int err = 0; 1401 1402 ifp = sc->ifp; 1403 if (lro_cnt == 0) 1404 ifp->if_capenable &= ~IFCAP_LRO; 1405 else 1406 ifp->if_capenable |= IFCAP_LRO; 1407 sc->lro_cnt = lro_cnt; 1408 if (ifp->if_flags & IFF_RUNNING) { 1409 mxge_close(sc); 1410 err = mxge_open(sc); 1411 } 1412 return err; 1413 } 1414 1415 static int 1416 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1417 { 1418 mxge_softc_t *sc; 1419 unsigned int lro_cnt; 1420 int err; 1421 1422 sc = arg1; 1423 lro_cnt = sc->lro_cnt; 1424 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1425 if (err != 0) 1426 return err; 1427 1428 if (lro_cnt == sc->lro_cnt) 1429 return 0; 1430 1431 if (lro_cnt > 128) 1432 return EINVAL; 1433 1434 lwkt_serialize_enter(sc->ifp->if_serializer); 1435 err = mxge_change_lro_locked(sc, lro_cnt); 1436 lwkt_serialize_exit(sc->ifp->if_serializer); 1437 return err; 1438 } 1439 1440 static int 1441 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1442 { 1443 int err; 1444 1445 if (arg1 == NULL) 1446 return EFAULT; 1447 arg2 = be32toh(*(int *)arg1); 1448 arg1 = NULL; 1449 err = sysctl_handle_int(oidp, arg1, arg2, req); 1450 1451 return err; 1452 } 1453 1454 static void 1455 mxge_rem_sysctls(mxge_softc_t *sc) 1456 { 1457 struct mxge_slice_state *ss; 1458 int slice; 1459 1460 if (sc->slice_sysctl_tree == NULL) 1461 return; 1462 1463 for (slice = 0; slice < sc->num_slices; slice++) { 1464 ss = &sc->ss[slice]; 1465 if (ss == NULL || ss->sysctl_tree == NULL) 1466 continue; 1467 sysctl_ctx_free(&ss->sysctl_ctx); 1468 ss->sysctl_tree = NULL; 1469 } 1470 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1471 sc->slice_sysctl_tree = NULL; 1472 sysctl_ctx_free(&sc->sysctl_ctx); 1473 sc->sysctl_tree = NULL; 1474 1475 } 1476 1477 static void 1478 mxge_add_sysctls(mxge_softc_t *sc) 1479 { 1480 struct sysctl_ctx_list *ctx; 1481 struct sysctl_oid_list *children; 1482 mcp_irq_data_t *fw; 1483 struct mxge_slice_state *ss; 1484 int slice; 1485 char slice_num[8]; 1486 1487 ctx = &sc->sysctl_ctx; 1488 sysctl_ctx_init(ctx); 1489 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1490 OID_AUTO, 1491 device_get_nameunit(sc->dev), 1492 CTLFLAG_RD, 0, ""); 1493 if (sc->sysctl_tree == NULL) { 1494 device_printf(sc->dev, "can't add sysctl node\n"); 1495 return; 1496 } 1497 1498 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1499 fw = sc->ss[0].fw_stats; 1500 1501 /* random information */ 1502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1503 "firmware_version", 1504 CTLFLAG_RD, &sc->fw_version, 1505 0, "firmware version"); 1506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1507 "serial_number", 1508 CTLFLAG_RD, &sc->serial_number_string, 1509 0, "serial number"); 1510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1511 "product_code", 1512 CTLFLAG_RD, &sc->product_code_string, 1513 0, "product_code"); 1514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1515 "pcie_link_width", 1516 CTLFLAG_RD, &sc->link_width, 1517 0, "tx_boundary"); 1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1519 "tx_boundary", 1520 CTLFLAG_RD, &sc->tx_boundary, 1521 0, "tx_boundary"); 1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1523 "write_combine", 1524 CTLFLAG_RD, &sc->wc, 1525 0, "write combining PIO?"); 1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1527 "read_dma_MBs", 1528 CTLFLAG_RD, &sc->read_dma, 1529 0, "DMA Read speed in MB/s"); 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1531 "write_dma_MBs", 1532 CTLFLAG_RD, &sc->write_dma, 1533 0, "DMA Write speed in MB/s"); 1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1535 "read_write_dma_MBs", 1536 CTLFLAG_RD, &sc->read_write_dma, 1537 0, "DMA concurrent Read/Write speed in MB/s"); 1538 1539 1540 /* performance related tunables */ 1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1542 "intr_coal_delay", 1543 CTLTYPE_INT|CTLFLAG_RW, sc, 1544 0, mxge_change_intr_coal, 1545 "I", "interrupt coalescing delay in usecs"); 1546 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "flow_control_enabled", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_flow_control, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1554 "deassert_wait", 1555 CTLFLAG_RW, &mxge_deassert_wait, 1556 0, "Wait for IRQ line to go low in ihandler"); 1557 1558 /* stats block from firmware is in network byte order. 1559 Need to swap it */ 1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1561 "link_up", 1562 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1563 0, mxge_handle_be32, 1564 "I", "link up"); 1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1566 "rdma_tags_available", 1567 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1568 0, mxge_handle_be32, 1569 "I", "rdma_tags_available"); 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "dropped_bad_crc32", 1572 CTLTYPE_INT|CTLFLAG_RD, 1573 &fw->dropped_bad_crc32, 1574 0, mxge_handle_be32, 1575 "I", "dropped_bad_crc32"); 1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1577 "dropped_bad_phy", 1578 CTLTYPE_INT|CTLFLAG_RD, 1579 &fw->dropped_bad_phy, 1580 0, mxge_handle_be32, 1581 "I", "dropped_bad_phy"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_link_error_or_filtered", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_link_error_or_filtered, 1586 0, mxge_handle_be32, 1587 "I", "dropped_link_error_or_filtered"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_link_overflow", 1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1591 0, mxge_handle_be32, 1592 "I", "dropped_link_overflow"); 1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1594 "dropped_multicast_filtered", 1595 CTLTYPE_INT|CTLFLAG_RD, 1596 &fw->dropped_multicast_filtered, 1597 0, mxge_handle_be32, 1598 "I", "dropped_multicast_filtered"); 1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1600 "dropped_no_big_buffer", 1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1602 0, mxge_handle_be32, 1603 "I", "dropped_no_big_buffer"); 1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1605 "dropped_no_small_buffer", 1606 CTLTYPE_INT|CTLFLAG_RD, 1607 &fw->dropped_no_small_buffer, 1608 0, mxge_handle_be32, 1609 "I", "dropped_no_small_buffer"); 1610 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1611 "dropped_overrun", 1612 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1613 0, mxge_handle_be32, 1614 "I", "dropped_overrun"); 1615 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1616 "dropped_pause", 1617 CTLTYPE_INT|CTLFLAG_RD, 1618 &fw->dropped_pause, 1619 0, mxge_handle_be32, 1620 "I", "dropped_pause"); 1621 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1622 "dropped_runt", 1623 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1624 0, mxge_handle_be32, 1625 "I", "dropped_runt"); 1626 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_unicast_filtered", 1629 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1630 0, mxge_handle_be32, 1631 "I", "dropped_unicast_filtered"); 1632 1633 /* verbose printing? */ 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "verbose", 1636 CTLFLAG_RW, &mxge_verbose, 1637 0, "verbose printing"); 1638 1639 /* lro */ 1640 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1641 "lro_cnt", 1642 CTLTYPE_INT|CTLFLAG_RW, sc, 1643 0, mxge_change_lro, 1644 "I", "number of lro merge queues"); 1645 1646 1647 /* add counters exported for debugging from all slices */ 1648 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1649 sc->slice_sysctl_tree = 1650 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1651 "slice", CTLFLAG_RD, 0, ""); 1652 1653 for (slice = 0; slice < sc->num_slices; slice++) { 1654 ss = &sc->ss[slice]; 1655 sysctl_ctx_init(&ss->sysctl_ctx); 1656 ctx = &ss->sysctl_ctx; 1657 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1658 ksprintf(slice_num, "%d", slice); 1659 ss->sysctl_tree = 1660 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1661 CTLFLAG_RD, 0, ""); 1662 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1664 "rx_small_cnt", 1665 CTLFLAG_RD, &ss->rx_small.cnt, 1666 0, "rx_small_cnt"); 1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1668 "rx_big_cnt", 1669 CTLFLAG_RD, &ss->rx_big.cnt, 1670 0, "rx_small_cnt"); 1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1672 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1673 0, "number of lro merge queues flushed"); 1674 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1677 0, "number of frames appended to lro merge" 1678 "queues"); 1679 1680 #ifndef IFNET_BUF_RING 1681 /* only transmit from slice 0 for now */ 1682 if (slice > 0) 1683 continue; 1684 #endif 1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1686 "tx_req", 1687 CTLFLAG_RD, &ss->tx.req, 1688 0, "tx_req"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "tx_done", 1692 CTLFLAG_RD, &ss->tx.done, 1693 0, "tx_done"); 1694 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1695 "tx_pkt_done", 1696 CTLFLAG_RD, &ss->tx.pkt_done, 1697 0, "tx_done"); 1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1699 "tx_stall", 1700 CTLFLAG_RD, &ss->tx.stall, 1701 0, "tx_stall"); 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_wake", 1704 CTLFLAG_RD, &ss->tx.wake, 1705 0, "tx_wake"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_defrag", 1708 CTLFLAG_RD, &ss->tx.defrag, 1709 0, "tx_defrag"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_queue_active", 1712 CTLFLAG_RD, &ss->tx.queue_active, 1713 0, "tx_queue_active"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_activate", 1716 CTLFLAG_RD, &ss->tx.activate, 1717 0, "tx_activate"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_deactivate", 1720 CTLFLAG_RD, &ss->tx.deactivate, 1721 0, "tx_deactivate"); 1722 } 1723 } 1724 1725 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1726 backwards one at a time and handle ring wraps */ 1727 1728 static inline void 1729 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1730 mcp_kreq_ether_send_t *src, int cnt) 1731 { 1732 int idx, starting_slot; 1733 starting_slot = tx->req; 1734 while (cnt > 1) { 1735 cnt--; 1736 idx = (starting_slot + cnt) & tx->mask; 1737 mxge_pio_copy(&tx->lanai[idx], 1738 &src[cnt], sizeof(*src)); 1739 wmb(); 1740 } 1741 } 1742 1743 /* 1744 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1745 * at most 32 bytes at a time, so as to avoid involving the software 1746 * pio handler in the nic. We re-write the first segment's flags 1747 * to mark them valid only after writing the entire chain 1748 */ 1749 1750 static inline void 1751 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1752 int cnt) 1753 { 1754 int idx, i; 1755 uint32_t *src_ints; 1756 volatile uint32_t *dst_ints; 1757 mcp_kreq_ether_send_t *srcp; 1758 volatile mcp_kreq_ether_send_t *dstp, *dst; 1759 uint8_t last_flags; 1760 1761 idx = tx->req & tx->mask; 1762 1763 last_flags = src->flags; 1764 src->flags = 0; 1765 wmb(); 1766 dst = dstp = &tx->lanai[idx]; 1767 srcp = src; 1768 1769 if ((idx + cnt) < tx->mask) { 1770 for (i = 0; i < (cnt - 1); i += 2) { 1771 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1772 wmb(); /* force write every 32 bytes */ 1773 srcp += 2; 1774 dstp += 2; 1775 } 1776 } else { 1777 /* submit all but the first request, and ensure 1778 that it is submitted below */ 1779 mxge_submit_req_backwards(tx, src, cnt); 1780 i = 0; 1781 } 1782 if (i < cnt) { 1783 /* submit the first request */ 1784 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1785 wmb(); /* barrier before setting valid flag */ 1786 } 1787 1788 /* re-write the last 32-bits with the valid flags */ 1789 src->flags = last_flags; 1790 src_ints = (uint32_t *)src; 1791 src_ints+=3; 1792 dst_ints = (volatile uint32_t *)dst; 1793 dst_ints+=3; 1794 *dst_ints = *src_ints; 1795 tx->req += cnt; 1796 wmb(); 1797 } 1798 1799 #if IFCAP_TSO4 1800 1801 static void 1802 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1803 int busdma_seg_cnt, int ip_off) 1804 { 1805 mxge_tx_ring_t *tx; 1806 mcp_kreq_ether_send_t *req; 1807 bus_dma_segment_t *seg; 1808 struct ip *ip; 1809 struct tcphdr *tcp; 1810 uint32_t low, high_swapped; 1811 int len, seglen, cum_len, cum_len_next; 1812 int next_is_first, chop, cnt, rdma_count, small; 1813 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1814 uint8_t flags, flags_next; 1815 static int once; 1816 1817 mss = m->m_pkthdr.tso_segsz; 1818 1819 /* negative cum_len signifies to the 1820 * send loop that we are still in the 1821 * header portion of the TSO packet. 1822 */ 1823 1824 /* ensure we have the ethernet, IP and TCP 1825 header together in the first mbuf, copy 1826 it to a scratch buffer if not */ 1827 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1828 m_copydata(m, 0, ip_off + sizeof (*ip), 1829 ss->scratch); 1830 ip = (struct ip *)(ss->scratch + ip_off); 1831 } else { 1832 ip = (struct ip *)(mtod(m, char *) + ip_off); 1833 } 1834 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1835 + sizeof (*tcp))) { 1836 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1837 + sizeof (*tcp), ss->scratch); 1838 ip = (struct ip *)(mtod(m, char *) + ip_off); 1839 } 1840 1841 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1842 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1843 1844 /* TSO implies checksum offload on this hardware */ 1845 cksum_offset = ip_off + (ip->ip_hl << 2); 1846 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1847 1848 1849 /* for TSO, pseudo_hdr_offset holds mss. 1850 * The firmware figures out where to put 1851 * the checksum by parsing the header. */ 1852 pseudo_hdr_offset = htobe16(mss); 1853 1854 tx = &ss->tx; 1855 req = tx->req_list; 1856 seg = tx->seg_list; 1857 cnt = 0; 1858 rdma_count = 0; 1859 /* "rdma_count" is the number of RDMAs belonging to the 1860 * current packet BEFORE the current send request. For 1861 * non-TSO packets, this is equal to "count". 1862 * For TSO packets, rdma_count needs to be reset 1863 * to 0 after a segment cut. 1864 * 1865 * The rdma_count field of the send request is 1866 * the number of RDMAs of the packet starting at 1867 * that request. For TSO send requests with one ore more cuts 1868 * in the middle, this is the number of RDMAs starting 1869 * after the last cut in the request. All previous 1870 * segments before the last cut implicitly have 1 RDMA. 1871 * 1872 * Since the number of RDMAs is not known beforehand, 1873 * it must be filled-in retroactively - after each 1874 * segmentation cut or at the end of the entire packet. 1875 */ 1876 1877 while (busdma_seg_cnt) { 1878 /* Break the busdma segment up into pieces*/ 1879 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1880 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1881 len = seg->ds_len; 1882 1883 while (len) { 1884 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1885 seglen = len; 1886 cum_len_next = cum_len + seglen; 1887 (req-rdma_count)->rdma_count = rdma_count + 1; 1888 if (__predict_true(cum_len >= 0)) { 1889 /* payload */ 1890 chop = (cum_len_next > mss); 1891 cum_len_next = cum_len_next % mss; 1892 next_is_first = (cum_len_next == 0); 1893 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1894 flags_next |= next_is_first * 1895 MXGEFW_FLAGS_FIRST; 1896 rdma_count |= -(chop | next_is_first); 1897 rdma_count += chop & !next_is_first; 1898 } else if (cum_len_next >= 0) { 1899 /* header ends */ 1900 rdma_count = -1; 1901 cum_len_next = 0; 1902 seglen = -cum_len; 1903 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1904 flags_next = MXGEFW_FLAGS_TSO_PLD | 1905 MXGEFW_FLAGS_FIRST | 1906 (small * MXGEFW_FLAGS_SMALL); 1907 } 1908 1909 req->addr_high = high_swapped; 1910 req->addr_low = htobe32(low); 1911 req->pseudo_hdr_offset = pseudo_hdr_offset; 1912 req->pad = 0; 1913 req->rdma_count = 1; 1914 req->length = htobe16(seglen); 1915 req->cksum_offset = cksum_offset; 1916 req->flags = flags | ((cum_len & 1) * 1917 MXGEFW_FLAGS_ALIGN_ODD); 1918 low += seglen; 1919 len -= seglen; 1920 cum_len = cum_len_next; 1921 flags = flags_next; 1922 req++; 1923 cnt++; 1924 rdma_count++; 1925 if (__predict_false(cksum_offset > seglen)) 1926 cksum_offset -= seglen; 1927 else 1928 cksum_offset = 0; 1929 if (__predict_false(cnt > tx->max_desc)) 1930 goto drop; 1931 } 1932 busdma_seg_cnt--; 1933 seg++; 1934 } 1935 (req-rdma_count)->rdma_count = rdma_count; 1936 1937 do { 1938 req--; 1939 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1940 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1941 1942 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1943 mxge_submit_req(tx, tx->req_list, cnt); 1944 #ifdef IFNET_BUF_RING 1945 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1946 /* tell the NIC to start polling this slice */ 1947 *tx->send_go = 1; 1948 tx->queue_active = 1; 1949 tx->activate++; 1950 wmb(); 1951 } 1952 #endif 1953 return; 1954 1955 drop: 1956 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1957 m_freem(m); 1958 ss->oerrors++; 1959 if (!once) { 1960 kprintf("tx->max_desc exceeded via TSO!\n"); 1961 kprintf("mss = %d, %ld, %d!\n", mss, 1962 (long)seg - (long)tx->seg_list, tx->max_desc); 1963 once = 1; 1964 } 1965 return; 1966 1967 } 1968 1969 #endif /* IFCAP_TSO4 */ 1970 1971 #ifdef MXGE_NEW_VLAN_API 1972 /* 1973 * We reproduce the software vlan tag insertion from 1974 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1975 * vlan tag insertion. We need to advertise this in order to have the 1976 * vlan interface respect our csum offload flags. 1977 */ 1978 static struct mbuf * 1979 mxge_vlan_tag_insert(struct mbuf *m) 1980 { 1981 struct ether_vlan_header *evl; 1982 1983 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT); 1984 if (__predict_false(m == NULL)) 1985 return NULL; 1986 if (m->m_len < sizeof(*evl)) { 1987 m = m_pullup(m, sizeof(*evl)); 1988 if (__predict_false(m == NULL)) 1989 return NULL; 1990 } 1991 /* 1992 * Transform the Ethernet header into an Ethernet header 1993 * with 802.1Q encapsulation. 1994 */ 1995 evl = mtod(m, struct ether_vlan_header *); 1996 bcopy((char *)evl + EVL_ENCAPLEN, 1997 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1998 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1999 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag); 2000 m->m_flags &= ~M_VLANTAG; 2001 return m; 2002 } 2003 #endif /* MXGE_NEW_VLAN_API */ 2004 2005 static void 2006 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2007 { 2008 mxge_softc_t *sc; 2009 mcp_kreq_ether_send_t *req; 2010 bus_dma_segment_t *seg; 2011 struct mbuf *m_tmp; 2012 struct ifnet *ifp; 2013 mxge_tx_ring_t *tx; 2014 struct ip *ip; 2015 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2016 uint16_t pseudo_hdr_offset; 2017 uint8_t flags, cksum_offset; 2018 2019 2020 sc = ss->sc; 2021 ifp = sc->ifp; 2022 tx = &ss->tx; 2023 2024 ip_off = sizeof (struct ether_header); 2025 #ifdef MXGE_NEW_VLAN_API 2026 if (m->m_flags & M_VLANTAG) { 2027 m = mxge_vlan_tag_insert(m); 2028 if (__predict_false(m == NULL)) 2029 goto drop; 2030 ip_off += EVL_ENCAPLEN; 2031 } 2032 #endif 2033 /* (try to) map the frame for DMA */ 2034 idx = tx->req & tx->mask; 2035 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map, 2036 m, tx->seg_list, 1, &cnt, 2037 BUS_DMA_NOWAIT); 2038 if (__predict_false(err == EFBIG)) { 2039 /* Too many segments in the chain. Try 2040 to defrag */ 2041 m_tmp = m_defrag(m, MB_DONTWAIT); 2042 if (m_tmp == NULL) { 2043 goto drop; 2044 } 2045 ss->tx.defrag++; 2046 m = m_tmp; 2047 err = bus_dmamap_load_mbuf_segment(tx->dmat, 2048 tx->info[idx].map, 2049 m, tx->seg_list, 1, &cnt, 2050 BUS_DMA_NOWAIT); 2051 } 2052 if (__predict_false(err != 0)) { 2053 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d" 2054 " packet len = %d\n", err, m->m_pkthdr.len); 2055 goto drop; 2056 } 2057 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2058 BUS_DMASYNC_PREWRITE); 2059 tx->info[idx].m = m; 2060 2061 #if IFCAP_TSO4 2062 /* TSO is different enough, we handle it in another routine */ 2063 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2064 mxge_encap_tso(ss, m, cnt, ip_off); 2065 return; 2066 } 2067 #endif 2068 2069 req = tx->req_list; 2070 cksum_offset = 0; 2071 pseudo_hdr_offset = 0; 2072 flags = MXGEFW_FLAGS_NO_TSO; 2073 2074 /* checksum offloading? */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2076 /* ensure ip header is in first mbuf, copy 2077 it to a scratch buffer if not */ 2078 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2079 m_copydata(m, 0, ip_off + sizeof (*ip), 2080 ss->scratch); 2081 ip = (struct ip *)(ss->scratch + ip_off); 2082 } else { 2083 ip = (struct ip *)(mtod(m, char *) + ip_off); 2084 } 2085 cksum_offset = ip_off + (ip->ip_hl << 2); 2086 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2087 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2088 req->cksum_offset = cksum_offset; 2089 flags |= MXGEFW_FLAGS_CKSUM; 2090 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2091 } else { 2092 odd_flag = 0; 2093 } 2094 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2095 flags |= MXGEFW_FLAGS_SMALL; 2096 2097 /* convert segments into a request list */ 2098 cum_len = 0; 2099 seg = tx->seg_list; 2100 req->flags = MXGEFW_FLAGS_FIRST; 2101 for (i = 0; i < cnt; i++) { 2102 req->addr_low = 2103 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2104 req->addr_high = 2105 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2106 req->length = htobe16(seg->ds_len); 2107 req->cksum_offset = cksum_offset; 2108 if (cksum_offset > seg->ds_len) 2109 cksum_offset -= seg->ds_len; 2110 else 2111 cksum_offset = 0; 2112 req->pseudo_hdr_offset = pseudo_hdr_offset; 2113 req->pad = 0; /* complete solid 16-byte block */ 2114 req->rdma_count = 1; 2115 req->flags |= flags | ((cum_len & 1) * odd_flag); 2116 cum_len += seg->ds_len; 2117 seg++; 2118 req++; 2119 req->flags = 0; 2120 } 2121 req--; 2122 /* pad runts to 60 bytes */ 2123 if (cum_len < 60) { 2124 req++; 2125 req->addr_low = 2126 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2127 req->addr_high = 2128 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2129 req->length = htobe16(60 - cum_len); 2130 req->cksum_offset = 0; 2131 req->pseudo_hdr_offset = pseudo_hdr_offset; 2132 req->pad = 0; /* complete solid 16-byte block */ 2133 req->rdma_count = 1; 2134 req->flags |= flags | ((cum_len & 1) * odd_flag); 2135 cnt++; 2136 } 2137 2138 tx->req_list[0].rdma_count = cnt; 2139 #if 0 2140 /* print what the firmware will see */ 2141 for (i = 0; i < cnt; i++) { 2142 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 2143 "cso:%d, flags:0x%x, rdma:%d\n", 2144 i, (int)ntohl(tx->req_list[i].addr_high), 2145 (int)ntohl(tx->req_list[i].addr_low), 2146 (int)ntohs(tx->req_list[i].length), 2147 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2148 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2149 tx->req_list[i].rdma_count); 2150 } 2151 kprintf("--------------\n"); 2152 #endif 2153 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2154 mxge_submit_req(tx, tx->req_list, cnt); 2155 #ifdef IFNET_BUF_RING 2156 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2157 /* tell the NIC to start polling this slice */ 2158 *tx->send_go = 1; 2159 tx->queue_active = 1; 2160 tx->activate++; 2161 wmb(); 2162 } 2163 #endif 2164 return; 2165 2166 drop: 2167 m_freem(m); 2168 ss->oerrors++; 2169 return; 2170 } 2171 2172 #ifdef IFNET_BUF_RING 2173 static void 2174 mxge_qflush(struct ifnet *ifp) 2175 { 2176 mxge_softc_t *sc = ifp->if_softc; 2177 mxge_tx_ring_t *tx; 2178 struct mbuf *m; 2179 int slice; 2180 2181 for (slice = 0; slice < sc->num_slices; slice++) { 2182 tx = &sc->ss[slice].tx; 2183 lwkt_serialize_enter(sc->ifp->if_serializer); 2184 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2185 m_freem(m); 2186 lwkt_serialize_exit(sc->ifp->if_serializer); 2187 } 2188 if_qflush(ifp); 2189 } 2190 2191 static inline void 2192 mxge_start_locked(struct mxge_slice_state *ss) 2193 { 2194 mxge_softc_t *sc; 2195 struct mbuf *m; 2196 struct ifnet *ifp; 2197 mxge_tx_ring_t *tx; 2198 2199 sc = ss->sc; 2200 ifp = sc->ifp; 2201 tx = &ss->tx; 2202 2203 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2204 m = drbr_dequeue(ifp, tx->br); 2205 if (m == NULL) { 2206 return; 2207 } 2208 /* let BPF see it */ 2209 BPF_MTAP(ifp, m); 2210 2211 /* give it to the nic */ 2212 mxge_encap(ss, m); 2213 } 2214 /* ran out of transmit slots */ 2215 if (((ss->if_flags & IFF_OACTIVE) == 0) 2216 && (!drbr_empty(ifp, tx->br))) { 2217 ss->if_flags |= IFF_OACTIVE; 2218 tx->stall++; 2219 } 2220 } 2221 2222 static int 2223 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2224 { 2225 mxge_softc_t *sc; 2226 struct ifnet *ifp; 2227 mxge_tx_ring_t *tx; 2228 int err; 2229 2230 sc = ss->sc; 2231 ifp = sc->ifp; 2232 tx = &ss->tx; 2233 2234 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != 2235 IFF_RUNNING) { 2236 err = drbr_enqueue(ifp, tx->br, m); 2237 return (err); 2238 } 2239 2240 if (drbr_empty(ifp, tx->br) && 2241 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2242 /* let BPF see it */ 2243 BPF_MTAP(ifp, m); 2244 /* give it to the nic */ 2245 mxge_encap(ss, m); 2246 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2247 return (err); 2248 } 2249 if (!drbr_empty(ifp, tx->br)) 2250 mxge_start_locked(ss); 2251 return (0); 2252 } 2253 2254 static int 2255 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2256 { 2257 mxge_softc_t *sc = ifp->if_softc; 2258 struct mxge_slice_state *ss; 2259 mxge_tx_ring_t *tx; 2260 int err = 0; 2261 int slice; 2262 2263 #if 0 2264 slice = m->m_pkthdr.flowid; 2265 #endif 2266 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2267 2268 ss = &sc->ss[slice]; 2269 tx = &ss->tx; 2270 2271 if(lwkt_serialize_try(ifp->if_serializer)) { 2272 err = mxge_transmit_locked(ss, m); 2273 lwkt_serialize_exit(ifp->if_serializer); 2274 } else { 2275 err = drbr_enqueue(ifp, tx->br, m); 2276 } 2277 2278 return (err); 2279 } 2280 2281 #else 2282 2283 static inline void 2284 mxge_start_locked(struct mxge_slice_state *ss) 2285 { 2286 mxge_softc_t *sc; 2287 struct mbuf *m; 2288 struct ifnet *ifp; 2289 mxge_tx_ring_t *tx; 2290 2291 sc = ss->sc; 2292 ifp = sc->ifp; 2293 tx = &ss->tx; 2294 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2295 m = ifq_dequeue(&ifp->if_snd, NULL); 2296 if (m == NULL) { 2297 return; 2298 } 2299 /* let BPF see it */ 2300 BPF_MTAP(ifp, m); 2301 2302 /* give it to the nic */ 2303 mxge_encap(ss, m); 2304 } 2305 /* ran out of transmit slots */ 2306 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) { 2307 sc->ifp->if_flags |= IFF_OACTIVE; 2308 tx->stall++; 2309 } 2310 } 2311 #endif 2312 static void 2313 mxge_start(struct ifnet *ifp) 2314 { 2315 mxge_softc_t *sc = ifp->if_softc; 2316 struct mxge_slice_state *ss; 2317 2318 ASSERT_SERIALIZED(sc->ifp->if_serializer); 2319 /* only use the first slice for now */ 2320 ss = &sc->ss[0]; 2321 mxge_start_locked(ss); 2322 } 2323 2324 /* 2325 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2326 * at most 32 bytes at a time, so as to avoid involving the software 2327 * pio handler in the nic. We re-write the first segment's low 2328 * DMA address to mark it valid only after we write the entire chunk 2329 * in a burst 2330 */ 2331 static inline void 2332 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2333 mcp_kreq_ether_recv_t *src) 2334 { 2335 uint32_t low; 2336 2337 low = src->addr_low; 2338 src->addr_low = 0xffffffff; 2339 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2340 wmb(); 2341 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2342 wmb(); 2343 src->addr_low = low; 2344 dst->addr_low = low; 2345 wmb(); 2346 } 2347 2348 static int 2349 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2350 { 2351 bus_dma_segment_t seg; 2352 struct mbuf *m; 2353 mxge_rx_ring_t *rx = &ss->rx_small; 2354 int cnt, err; 2355 2356 m = m_gethdr(MB_DONTWAIT, MT_DATA); 2357 if (m == NULL) { 2358 rx->alloc_fail++; 2359 err = ENOBUFS; 2360 goto done; 2361 } 2362 m->m_len = m->m_pkthdr.len = MHLEN; 2363 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2364 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2365 if (err != 0) { 2366 kprintf("can't dmamap small (%d)\n", err); 2367 m_free(m); 2368 goto done; 2369 } 2370 rx->info[idx].m = m; 2371 rx->shadow[idx].addr_low = 2372 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2373 rx->shadow[idx].addr_high = 2374 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2375 2376 done: 2377 if ((idx & 7) == 7) 2378 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2379 return err; 2380 } 2381 2382 2383 static int 2384 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2385 { 2386 bus_dma_segment_t seg[3]; 2387 struct mbuf *m; 2388 mxge_rx_ring_t *rx = &ss->rx_big; 2389 int cnt, err, i; 2390 2391 if (rx->cl_size == MCLBYTES) 2392 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2393 else { 2394 #if 0 2395 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2396 #else 2397 /* 2398 * XXX: allocate normal sized buffers for big buffers. 2399 * We should be fine as long as we don't get any jumbo frames 2400 */ 2401 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2402 #endif 2403 } 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_pkthdr.len = 0; 2410 m->m_len = m->m_pkthdr.len = rx->mlen; 2411 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2412 seg, 1, &cnt, BUS_DMA_NOWAIT); 2413 if (err != 0) { 2414 kprintf("can't dmamap big (%d)\n", err); 2415 m_free(m); 2416 goto done; 2417 } 2418 rx->info[idx].m = m; 2419 rx->shadow[idx].addr_low = 2420 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2421 rx->shadow[idx].addr_high = 2422 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2423 2424 #if MXGE_VIRT_JUMBOS 2425 for (i = 1; i < cnt; i++) { 2426 rx->shadow[idx + i].addr_low = 2427 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2428 rx->shadow[idx + i].addr_high = 2429 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2430 } 2431 #endif 2432 2433 done: 2434 for (i = 0; i < rx->nbufs; i++) { 2435 if ((idx & 7) == 7) { 2436 mxge_submit_8rx(&rx->lanai[idx - 7], 2437 &rx->shadow[idx - 7]); 2438 } 2439 idx++; 2440 } 2441 return err; 2442 } 2443 2444 /* 2445 * Myri10GE hardware checksums are not valid if the sender 2446 * padded the frame with non-zero padding. This is because 2447 * the firmware just does a simple 16-bit 1s complement 2448 * checksum across the entire frame, excluding the first 14 2449 * bytes. It is best to simply to check the checksum and 2450 * tell the stack about it only if the checksum is good 2451 */ 2452 2453 static inline uint16_t 2454 mxge_rx_csum(struct mbuf *m, int csum) 2455 { 2456 struct ether_header *eh; 2457 struct ip *ip; 2458 uint16_t c; 2459 2460 eh = mtod(m, struct ether_header *); 2461 2462 /* only deal with IPv4 TCP & UDP for now */ 2463 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2464 return 1; 2465 ip = (struct ip *)(eh + 1); 2466 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2467 ip->ip_p != IPPROTO_UDP)) 2468 return 1; 2469 #ifdef INET 2470 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2471 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2472 - (ip->ip_hl << 2) + ip->ip_p)); 2473 #else 2474 c = 1; 2475 #endif 2476 c ^= 0xffff; 2477 return (c); 2478 } 2479 2480 static void 2481 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2482 { 2483 struct ether_vlan_header *evl; 2484 struct ether_header *eh; 2485 uint32_t partial; 2486 2487 evl = mtod(m, struct ether_vlan_header *); 2488 eh = mtod(m, struct ether_header *); 2489 2490 /* 2491 * fix checksum by subtracting EVL_ENCAPLEN bytes 2492 * after what the firmware thought was the end of the ethernet 2493 * header. 2494 */ 2495 2496 /* put checksum into host byte order */ 2497 *csum = ntohs(*csum); 2498 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2499 (*csum) += ~partial; 2500 (*csum) += ((*csum) < ~partial); 2501 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2502 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2503 2504 /* restore checksum to network byte order; 2505 later consumers expect this */ 2506 *csum = htons(*csum); 2507 2508 /* save the tag */ 2509 #ifdef MXGE_NEW_VLAN_API 2510 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2511 #else 2512 { 2513 struct m_tag *mtag; 2514 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2515 MB_DONTWAIT); 2516 if (mtag == NULL) 2517 return; 2518 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2519 m_tag_prepend(m, mtag); 2520 } 2521 2522 #endif 2523 m->m_flags |= M_VLANTAG; 2524 2525 /* 2526 * Remove the 802.1q header by copying the Ethernet 2527 * addresses over it and adjusting the beginning of 2528 * the data in the mbuf. The encapsulated Ethernet 2529 * type field is already in place. 2530 */ 2531 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2532 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2533 m_adj(m, EVL_ENCAPLEN); 2534 } 2535 2536 2537 static inline void 2538 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, 2539 struct mbuf_chain *chain) 2540 { 2541 mxge_softc_t *sc; 2542 struct ifnet *ifp; 2543 struct mbuf *m; 2544 struct ether_header *eh; 2545 mxge_rx_ring_t *rx; 2546 bus_dmamap_t old_map; 2547 int idx; 2548 uint16_t tcpudp_csum; 2549 2550 sc = ss->sc; 2551 ifp = sc->ifp; 2552 rx = &ss->rx_big; 2553 idx = rx->cnt & rx->mask; 2554 rx->cnt += rx->nbufs; 2555 /* save a pointer to the received mbuf */ 2556 m = rx->info[idx].m; 2557 /* try to replace the received mbuf */ 2558 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2559 /* drop the frame -- the old mbuf is re-cycled */ 2560 ifp->if_ierrors++; 2561 return; 2562 } 2563 2564 /* unmap the received buffer */ 2565 old_map = rx->info[idx].map; 2566 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2567 bus_dmamap_unload(rx->dmat, old_map); 2568 2569 /* swap the bus_dmamap_t's */ 2570 rx->info[idx].map = rx->extra_map; 2571 rx->extra_map = old_map; 2572 2573 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2574 * aligned */ 2575 m->m_data += MXGEFW_PAD; 2576 2577 m->m_pkthdr.rcvif = ifp; 2578 m->m_len = m->m_pkthdr.len = len; 2579 ss->ipackets++; 2580 eh = mtod(m, struct ether_header *); 2581 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2582 mxge_vlan_tag_remove(m, &csum); 2583 } 2584 /* if the checksum is valid, mark it in the mbuf header */ 2585 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2586 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2587 return; 2588 /* otherwise, it was a UDP frame, or a TCP frame which 2589 we could not do LRO on. Tell the stack that the 2590 checksum is good */ 2591 m->m_pkthdr.csum_data = 0xffff; 2592 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2593 } 2594 #if 0 2595 /* flowid only valid if RSS hashing is enabled */ 2596 if (sc->num_slices > 1) { 2597 m->m_pkthdr.flowid = (ss - sc->ss); 2598 m->m_flags |= M_FLOWID; 2599 } 2600 #endif 2601 ether_input_chain(ifp, m, NULL, chain); 2602 } 2603 2604 static inline void 2605 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, 2606 struct mbuf_chain *chain) 2607 { 2608 mxge_softc_t *sc; 2609 struct ifnet *ifp; 2610 struct ether_header *eh; 2611 struct mbuf *m; 2612 mxge_rx_ring_t *rx; 2613 bus_dmamap_t old_map; 2614 int idx; 2615 uint16_t tcpudp_csum; 2616 2617 sc = ss->sc; 2618 ifp = sc->ifp; 2619 rx = &ss->rx_small; 2620 idx = rx->cnt & rx->mask; 2621 rx->cnt++; 2622 /* save a pointer to the received mbuf */ 2623 m = rx->info[idx].m; 2624 /* try to replace the received mbuf */ 2625 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2626 /* drop the frame -- the old mbuf is re-cycled */ 2627 ifp->if_ierrors++; 2628 return; 2629 } 2630 2631 /* unmap the received buffer */ 2632 old_map = rx->info[idx].map; 2633 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2634 bus_dmamap_unload(rx->dmat, old_map); 2635 2636 /* swap the bus_dmamap_t's */ 2637 rx->info[idx].map = rx->extra_map; 2638 rx->extra_map = old_map; 2639 2640 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2641 * aligned */ 2642 m->m_data += MXGEFW_PAD; 2643 2644 m->m_pkthdr.rcvif = ifp; 2645 m->m_len = m->m_pkthdr.len = len; 2646 ss->ipackets++; 2647 eh = mtod(m, struct ether_header *); 2648 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2649 mxge_vlan_tag_remove(m, &csum); 2650 } 2651 /* if the checksum is valid, mark it in the mbuf header */ 2652 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2653 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2654 return; 2655 /* otherwise, it was a UDP frame, or a TCP frame which 2656 we could not do LRO on. Tell the stack that the 2657 checksum is good */ 2658 m->m_pkthdr.csum_data = 0xffff; 2659 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2660 } 2661 #if 0 2662 /* flowid only valid if RSS hashing is enabled */ 2663 if (sc->num_slices > 1) { 2664 m->m_pkthdr.flowid = (ss - sc->ss); 2665 m->m_flags |= M_FLOWID; 2666 } 2667 #endif 2668 ether_input_chain(ifp, m, NULL, chain); 2669 } 2670 2671 /* 2672 * XXX 2673 * 2674 * Inlining the call to this function causes mxge_intr() to grow too large 2675 * for GCC's stack size limits (which shouldn't take into account inlining 2676 * of leaf functions at one call site anyway). Inlining is definitely a 2677 * good idea in this case though, so mark the function appropriately. 2678 */ 2679 static __always_inline void 2680 mxge_clean_rx_done(struct mxge_slice_state *ss) 2681 { 2682 mxge_rx_done_t *rx_done = &ss->rx_done; 2683 int limit = 0; 2684 uint16_t length; 2685 uint16_t checksum; 2686 struct mbuf_chain chain[MAXCPU]; 2687 2688 ether_input_chain_init(chain); 2689 while (rx_done->entry[rx_done->idx].length != 0) { 2690 length = ntohs(rx_done->entry[rx_done->idx].length); 2691 rx_done->entry[rx_done->idx].length = 0; 2692 checksum = rx_done->entry[rx_done->idx].checksum; 2693 if (length <= (MHLEN - MXGEFW_PAD)) 2694 mxge_rx_done_small(ss, length, checksum, chain); 2695 else 2696 mxge_rx_done_big(ss, length, checksum, chain); 2697 rx_done->cnt++; 2698 rx_done->idx = rx_done->cnt & rx_done->mask; 2699 2700 /* limit potential for livelock */ 2701 if (__predict_false(++limit > rx_done->mask / 2)) 2702 break; 2703 } 2704 ether_input_dispatch(chain); 2705 #ifdef INET 2706 while (!SLIST_EMPTY(&ss->lro_active)) { 2707 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2708 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2709 mxge_lro_flush(ss, lro); 2710 } 2711 #endif 2712 } 2713 2714 2715 static inline void 2716 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2717 { 2718 struct ifnet *ifp; 2719 mxge_tx_ring_t *tx; 2720 struct mbuf *m; 2721 bus_dmamap_t map; 2722 int idx; 2723 int *flags; 2724 2725 tx = &ss->tx; 2726 ifp = ss->sc->ifp; 2727 ASSERT_SERIALIZED(ifp->if_serializer); 2728 while (tx->pkt_done != mcp_idx) { 2729 idx = tx->done & tx->mask; 2730 tx->done++; 2731 m = tx->info[idx].m; 2732 /* mbuf and DMA map only attached to the first 2733 segment per-mbuf */ 2734 if (m != NULL) { 2735 ss->obytes += m->m_pkthdr.len; 2736 if (m->m_flags & M_MCAST) 2737 ss->omcasts++; 2738 ss->opackets++; 2739 tx->info[idx].m = NULL; 2740 map = tx->info[idx].map; 2741 bus_dmamap_unload(tx->dmat, map); 2742 m_freem(m); 2743 } 2744 if (tx->info[idx].flag) { 2745 tx->info[idx].flag = 0; 2746 tx->pkt_done++; 2747 } 2748 } 2749 2750 /* If we have space, clear IFF_OACTIVE to tell the stack that 2751 its OK to send packets */ 2752 #ifdef IFNET_BUF_RING 2753 flags = &ss->if_flags; 2754 #else 2755 flags = &ifp->if_flags; 2756 #endif 2757 if ((*flags) & IFF_OACTIVE && 2758 tx->req - tx->done < (tx->mask + 1)/4) { 2759 *(flags) &= ~IFF_OACTIVE; 2760 ss->tx.wake++; 2761 mxge_start_locked(ss); 2762 } 2763 #ifdef IFNET_BUF_RING 2764 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2765 /* let the NIC stop polling this queue, since there 2766 * are no more transmits pending */ 2767 if (tx->req == tx->done) { 2768 *tx->send_stop = 1; 2769 tx->queue_active = 0; 2770 tx->deactivate++; 2771 wmb(); 2772 } 2773 } 2774 #endif 2775 2776 } 2777 2778 static struct mxge_media_type mxge_xfp_media_types[] = 2779 { 2780 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2781 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2782 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2783 {0, (1 << 5), "10GBASE-ER"}, 2784 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2785 {0, (1 << 3), "10GBASE-SW"}, 2786 {0, (1 << 2), "10GBASE-LW"}, 2787 {0, (1 << 1), "10GBASE-EW"}, 2788 {0, (1 << 0), "Reserved"} 2789 }; 2790 static struct mxge_media_type mxge_sfp_media_types[] = 2791 { 2792 {0, (1 << 7), "Reserved"}, 2793 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2794 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2795 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2796 }; 2797 2798 static void 2799 mxge_set_media(mxge_softc_t *sc, int type) 2800 { 2801 sc->media_flags |= type; 2802 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2803 ifmedia_set(&sc->media, sc->media_flags); 2804 } 2805 2806 2807 /* 2808 * Determine the media type for a NIC. Some XFPs will identify 2809 * themselves only when their link is up, so this is initiated via a 2810 * link up interrupt. However, this can potentially take up to 2811 * several milliseconds, so it is run via the watchdog routine, rather 2812 * than in the interrupt handler itself. This need only be done 2813 * once, not each time the link is up. 2814 */ 2815 static void 2816 mxge_media_probe(mxge_softc_t *sc) 2817 { 2818 mxge_cmd_t cmd; 2819 char *cage_type; 2820 char *ptr; 2821 struct mxge_media_type *mxge_media_types = NULL; 2822 int i, err, ms, mxge_media_type_entries; 2823 uint32_t byte; 2824 2825 sc->need_media_probe = 0; 2826 2827 /* if we've already set a media type, we're done */ 2828 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2829 return; 2830 2831 /* 2832 * parse the product code to deterimine the interface type 2833 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2834 * after the 3rd dash in the driver's cached copy of the 2835 * EEPROM's product code string. 2836 */ 2837 ptr = sc->product_code_string; 2838 if (ptr == NULL) { 2839 device_printf(sc->dev, "Missing product code\n"); 2840 } 2841 2842 for (i = 0; i < 3; i++, ptr++) { 2843 ptr = index(ptr, '-'); 2844 if (ptr == NULL) { 2845 device_printf(sc->dev, 2846 "only %d dashes in PC?!?\n", i); 2847 return; 2848 } 2849 } 2850 if (*ptr == 'C') { 2851 /* -C is CX4 */ 2852 mxge_set_media(sc, IFM_10G_CX4); 2853 return; 2854 } 2855 else if (*ptr == 'Q') { 2856 /* -Q is Quad Ribbon Fiber */ 2857 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2858 /* FreeBSD has no media type for Quad ribbon fiber */ 2859 return; 2860 } 2861 2862 if (*ptr == 'R') { 2863 /* -R is XFP */ 2864 mxge_media_types = mxge_xfp_media_types; 2865 mxge_media_type_entries = NELEM(mxge_xfp_media_types); 2866 byte = MXGE_XFP_COMPLIANCE_BYTE; 2867 cage_type = "XFP"; 2868 } 2869 2870 if (*ptr == 'S' || *(ptr +1) == 'S') { 2871 /* -S or -2S is SFP+ */ 2872 mxge_media_types = mxge_sfp_media_types; 2873 mxge_media_type_entries = NELEM(mxge_sfp_media_types); 2874 cage_type = "SFP+"; 2875 byte = 3; 2876 } 2877 2878 if (mxge_media_types == NULL) { 2879 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2880 return; 2881 } 2882 2883 /* 2884 * At this point we know the NIC has an XFP cage, so now we 2885 * try to determine what is in the cage by using the 2886 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2887 * register. We read just one byte, which may take over 2888 * a millisecond 2889 */ 2890 2891 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2892 cmd.data1 = byte; 2893 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2894 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2895 device_printf(sc->dev, "failed to read XFP\n"); 2896 } 2897 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2898 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2899 } 2900 if (err != MXGEFW_CMD_OK) { 2901 return; 2902 } 2903 2904 /* now we wait for the data to be cached */ 2905 cmd.data0 = byte; 2906 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2907 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2908 DELAY(1000); 2909 cmd.data0 = byte; 2910 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2911 } 2912 if (err != MXGEFW_CMD_OK) { 2913 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2914 cage_type, err, ms); 2915 return; 2916 } 2917 2918 if (cmd.data0 == mxge_media_types[0].bitmask) { 2919 if (mxge_verbose) 2920 device_printf(sc->dev, "%s:%s\n", cage_type, 2921 mxge_media_types[0].name); 2922 mxge_set_media(sc, IFM_10G_CX4); 2923 return; 2924 } 2925 for (i = 1; i < mxge_media_type_entries; i++) { 2926 if (cmd.data0 & mxge_media_types[i].bitmask) { 2927 if (mxge_verbose) 2928 device_printf(sc->dev, "%s:%s\n", 2929 cage_type, 2930 mxge_media_types[i].name); 2931 2932 mxge_set_media(sc, mxge_media_types[i].flag); 2933 return; 2934 } 2935 } 2936 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2937 cmd.data0); 2938 2939 return; 2940 } 2941 2942 static void 2943 mxge_intr(void *arg) 2944 { 2945 struct mxge_slice_state *ss = arg; 2946 mxge_softc_t *sc = ss->sc; 2947 mcp_irq_data_t *stats = ss->fw_stats; 2948 mxge_tx_ring_t *tx = &ss->tx; 2949 mxge_rx_done_t *rx_done = &ss->rx_done; 2950 uint32_t send_done_count; 2951 uint8_t valid; 2952 2953 2954 #ifndef IFNET_BUF_RING 2955 /* an interrupt on a non-zero slice is implicitly valid 2956 since MSI-X irqs are not shared */ 2957 if (ss != sc->ss) { 2958 mxge_clean_rx_done(ss); 2959 *ss->irq_claim = be32toh(3); 2960 return; 2961 } 2962 #endif 2963 2964 /* make sure the DMA has finished */ 2965 if (!stats->valid) { 2966 return; 2967 } 2968 valid = stats->valid; 2969 2970 if (sc->legacy_irq) { 2971 /* lower legacy IRQ */ 2972 *sc->irq_deassert = 0; 2973 if (!mxge_deassert_wait) 2974 /* don't wait for conf. that irq is low */ 2975 stats->valid = 0; 2976 } else { 2977 stats->valid = 0; 2978 } 2979 2980 /* loop while waiting for legacy irq deassertion */ 2981 do { 2982 /* check for transmit completes and receives */ 2983 send_done_count = be32toh(stats->send_done_count); 2984 while ((send_done_count != tx->pkt_done) || 2985 (rx_done->entry[rx_done->idx].length != 0)) { 2986 if (send_done_count != tx->pkt_done) 2987 mxge_tx_done(ss, (int)send_done_count); 2988 mxge_clean_rx_done(ss); 2989 send_done_count = be32toh(stats->send_done_count); 2990 } 2991 if (sc->legacy_irq && mxge_deassert_wait) 2992 wmb(); 2993 } while (*((volatile uint8_t *) &stats->valid)); 2994 2995 /* fw link & error stats meaningful only on the first slice */ 2996 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2997 if (sc->link_state != stats->link_up) { 2998 sc->link_state = stats->link_up; 2999 if (sc->link_state) { 3000 sc->ifp->if_link_state = LINK_STATE_UP; 3001 if_link_state_change(sc->ifp); 3002 if (mxge_verbose) 3003 device_printf(sc->dev, "link up\n"); 3004 } else { 3005 sc->ifp->if_link_state = LINK_STATE_DOWN; 3006 if_link_state_change(sc->ifp); 3007 if (mxge_verbose) 3008 device_printf(sc->dev, "link down\n"); 3009 } 3010 sc->need_media_probe = 1; 3011 } 3012 if (sc->rdma_tags_available != 3013 be32toh(stats->rdma_tags_available)) { 3014 sc->rdma_tags_available = 3015 be32toh(stats->rdma_tags_available); 3016 device_printf(sc->dev, "RDMA timed out! %d tags " 3017 "left\n", sc->rdma_tags_available); 3018 } 3019 3020 if (stats->link_down) { 3021 sc->down_cnt += stats->link_down; 3022 sc->link_state = 0; 3023 sc->ifp->if_link_state = LINK_STATE_DOWN; 3024 if_link_state_change(sc->ifp); 3025 } 3026 } 3027 3028 /* check to see if we have rx token to pass back */ 3029 if (valid & 0x1) 3030 *ss->irq_claim = be32toh(3); 3031 *(ss->irq_claim + 1) = be32toh(3); 3032 } 3033 3034 static void 3035 mxge_init(void *arg) 3036 { 3037 } 3038 3039 3040 3041 static void 3042 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3043 { 3044 struct lro_entry *lro_entry; 3045 int i; 3046 3047 while (!SLIST_EMPTY(&ss->lro_free)) { 3048 lro_entry = SLIST_FIRST(&ss->lro_free); 3049 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3050 kfree(lro_entry, M_DEVBUF); 3051 } 3052 3053 for (i = 0; i <= ss->rx_big.mask; i++) { 3054 if (ss->rx_big.info[i].m == NULL) 3055 continue; 3056 bus_dmamap_unload(ss->rx_big.dmat, 3057 ss->rx_big.info[i].map); 3058 m_freem(ss->rx_big.info[i].m); 3059 ss->rx_big.info[i].m = NULL; 3060 } 3061 3062 for (i = 0; i <= ss->rx_small.mask; i++) { 3063 if (ss->rx_small.info[i].m == NULL) 3064 continue; 3065 bus_dmamap_unload(ss->rx_small.dmat, 3066 ss->rx_small.info[i].map); 3067 m_freem(ss->rx_small.info[i].m); 3068 ss->rx_small.info[i].m = NULL; 3069 } 3070 3071 /* transmit ring used only on the first slice */ 3072 if (ss->tx.info == NULL) 3073 return; 3074 3075 for (i = 0; i <= ss->tx.mask; i++) { 3076 ss->tx.info[i].flag = 0; 3077 if (ss->tx.info[i].m == NULL) 3078 continue; 3079 bus_dmamap_unload(ss->tx.dmat, 3080 ss->tx.info[i].map); 3081 m_freem(ss->tx.info[i].m); 3082 ss->tx.info[i].m = NULL; 3083 } 3084 } 3085 3086 static void 3087 mxge_free_mbufs(mxge_softc_t *sc) 3088 { 3089 int slice; 3090 3091 for (slice = 0; slice < sc->num_slices; slice++) 3092 mxge_free_slice_mbufs(&sc->ss[slice]); 3093 } 3094 3095 static void 3096 mxge_free_slice_rings(struct mxge_slice_state *ss) 3097 { 3098 int i; 3099 3100 3101 if (ss->rx_done.entry != NULL) 3102 mxge_dma_free(&ss->rx_done.dma); 3103 ss->rx_done.entry = NULL; 3104 3105 if (ss->tx.req_bytes != NULL) 3106 kfree(ss->tx.req_bytes, M_DEVBUF); 3107 ss->tx.req_bytes = NULL; 3108 3109 if (ss->tx.seg_list != NULL) 3110 kfree(ss->tx.seg_list, M_DEVBUF); 3111 ss->tx.seg_list = NULL; 3112 3113 if (ss->rx_small.shadow != NULL) 3114 kfree(ss->rx_small.shadow, M_DEVBUF); 3115 ss->rx_small.shadow = NULL; 3116 3117 if (ss->rx_big.shadow != NULL) 3118 kfree(ss->rx_big.shadow, M_DEVBUF); 3119 ss->rx_big.shadow = NULL; 3120 3121 if (ss->tx.info != NULL) { 3122 if (ss->tx.dmat != NULL) { 3123 for (i = 0; i <= ss->tx.mask; i++) { 3124 bus_dmamap_destroy(ss->tx.dmat, 3125 ss->tx.info[i].map); 3126 } 3127 bus_dma_tag_destroy(ss->tx.dmat); 3128 } 3129 kfree(ss->tx.info, M_DEVBUF); 3130 } 3131 ss->tx.info = NULL; 3132 3133 if (ss->rx_small.info != NULL) { 3134 if (ss->rx_small.dmat != NULL) { 3135 for (i = 0; i <= ss->rx_small.mask; i++) { 3136 bus_dmamap_destroy(ss->rx_small.dmat, 3137 ss->rx_small.info[i].map); 3138 } 3139 bus_dmamap_destroy(ss->rx_small.dmat, 3140 ss->rx_small.extra_map); 3141 bus_dma_tag_destroy(ss->rx_small.dmat); 3142 } 3143 kfree(ss->rx_small.info, M_DEVBUF); 3144 } 3145 ss->rx_small.info = NULL; 3146 3147 if (ss->rx_big.info != NULL) { 3148 if (ss->rx_big.dmat != NULL) { 3149 for (i = 0; i <= ss->rx_big.mask; i++) { 3150 bus_dmamap_destroy(ss->rx_big.dmat, 3151 ss->rx_big.info[i].map); 3152 } 3153 bus_dmamap_destroy(ss->rx_big.dmat, 3154 ss->rx_big.extra_map); 3155 bus_dma_tag_destroy(ss->rx_big.dmat); 3156 } 3157 kfree(ss->rx_big.info, M_DEVBUF); 3158 } 3159 ss->rx_big.info = NULL; 3160 } 3161 3162 static void 3163 mxge_free_rings(mxge_softc_t *sc) 3164 { 3165 int slice; 3166 3167 for (slice = 0; slice < sc->num_slices; slice++) 3168 mxge_free_slice_rings(&sc->ss[slice]); 3169 } 3170 3171 static int 3172 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3173 int tx_ring_entries) 3174 { 3175 mxge_softc_t *sc = ss->sc; 3176 size_t bytes; 3177 int err, i; 3178 3179 err = ENOMEM; 3180 3181 /* allocate per-slice receive resources */ 3182 3183 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3184 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3185 3186 /* allocate the rx shadow rings */ 3187 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3188 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3189 if (ss->rx_small.shadow == NULL) 3190 return err;; 3191 3192 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3193 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3194 if (ss->rx_big.shadow == NULL) 3195 return err;; 3196 3197 /* allocate the rx host info rings */ 3198 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3199 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3200 if (ss->rx_small.info == NULL) 3201 return err;; 3202 3203 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3204 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3205 if (ss->rx_big.info == NULL) 3206 return err;; 3207 3208 /* allocate the rx busdma resources */ 3209 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3210 1, /* alignment */ 3211 4096, /* boundary */ 3212 BUS_SPACE_MAXADDR, /* low */ 3213 BUS_SPACE_MAXADDR, /* high */ 3214 NULL, NULL, /* filter */ 3215 MHLEN, /* maxsize */ 3216 1, /* num segs */ 3217 MHLEN, /* maxsegsize */ 3218 BUS_DMA_ALLOCNOW, /* flags */ 3219 &ss->rx_small.dmat); /* tag */ 3220 if (err != 0) { 3221 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3222 err); 3223 return err;; 3224 } 3225 3226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3227 1, /* alignment */ 3228 #if MXGE_VIRT_JUMBOS 3229 4096, /* boundary */ 3230 #else 3231 0, /* boundary */ 3232 #endif 3233 BUS_SPACE_MAXADDR, /* low */ 3234 BUS_SPACE_MAXADDR, /* high */ 3235 NULL, NULL, /* filter */ 3236 3*4096, /* maxsize */ 3237 #if MXGE_VIRT_JUMBOS 3238 3, /* num segs */ 3239 4096, /* maxsegsize*/ 3240 #else 3241 1, /* num segs */ 3242 MJUM9BYTES, /* maxsegsize*/ 3243 #endif 3244 BUS_DMA_ALLOCNOW, /* flags */ 3245 &ss->rx_big.dmat); /* tag */ 3246 if (err != 0) { 3247 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3248 err); 3249 return err;; 3250 } 3251 for (i = 0; i <= ss->rx_small.mask; i++) { 3252 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3253 &ss->rx_small.info[i].map); 3254 if (err != 0) { 3255 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3256 err); 3257 return err;; 3258 } 3259 } 3260 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3261 &ss->rx_small.extra_map); 3262 if (err != 0) { 3263 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3264 err); 3265 return err;; 3266 } 3267 3268 for (i = 0; i <= ss->rx_big.mask; i++) { 3269 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3270 &ss->rx_big.info[i].map); 3271 if (err != 0) { 3272 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3273 err); 3274 return err;; 3275 } 3276 } 3277 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3278 &ss->rx_big.extra_map); 3279 if (err != 0) { 3280 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3281 err); 3282 return err;; 3283 } 3284 3285 /* now allocate TX resouces */ 3286 3287 #ifndef IFNET_BUF_RING 3288 /* only use a single TX ring for now */ 3289 if (ss != ss->sc->ss) 3290 return 0; 3291 #endif 3292 3293 ss->tx.mask = tx_ring_entries - 1; 3294 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3295 3296 3297 /* allocate the tx request copy block */ 3298 bytes = 8 + 3299 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3300 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3301 if (ss->tx.req_bytes == NULL) 3302 return err;; 3303 /* ensure req_list entries are aligned to 8 bytes */ 3304 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3305 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3306 3307 /* allocate the tx busdma segment list */ 3308 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3309 ss->tx.seg_list = (bus_dma_segment_t *) 3310 kmalloc(bytes, M_DEVBUF, M_WAITOK); 3311 if (ss->tx.seg_list == NULL) 3312 return err;; 3313 3314 /* allocate the tx host info ring */ 3315 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3316 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3317 if (ss->tx.info == NULL) 3318 return err;; 3319 3320 /* allocate the tx busdma resources */ 3321 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3322 1, /* alignment */ 3323 sc->tx_boundary, /* boundary */ 3324 BUS_SPACE_MAXADDR, /* low */ 3325 BUS_SPACE_MAXADDR, /* high */ 3326 NULL, NULL, /* filter */ 3327 65536 + 256, /* maxsize */ 3328 ss->tx.max_desc - 2, /* num segs */ 3329 sc->tx_boundary, /* maxsegsz */ 3330 BUS_DMA_ALLOCNOW, /* flags */ 3331 &ss->tx.dmat); /* tag */ 3332 3333 if (err != 0) { 3334 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3335 err); 3336 return err;; 3337 } 3338 3339 /* now use these tags to setup dmamaps for each slot 3340 in the ring */ 3341 for (i = 0; i <= ss->tx.mask; i++) { 3342 err = bus_dmamap_create(ss->tx.dmat, 0, 3343 &ss->tx.info[i].map); 3344 if (err != 0) { 3345 device_printf(sc->dev, "Err %d tx dmamap\n", 3346 err); 3347 return err;; 3348 } 3349 } 3350 return 0; 3351 3352 } 3353 3354 static int 3355 mxge_alloc_rings(mxge_softc_t *sc) 3356 { 3357 mxge_cmd_t cmd; 3358 int tx_ring_size; 3359 int tx_ring_entries, rx_ring_entries; 3360 int err, slice; 3361 3362 /* get ring sizes */ 3363 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3364 tx_ring_size = cmd.data0; 3365 if (err != 0) { 3366 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3367 goto abort; 3368 } 3369 3370 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3371 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3372 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3373 ifq_set_ready(&sc->ifp->if_snd); 3374 3375 for (slice = 0; slice < sc->num_slices; slice++) { 3376 err = mxge_alloc_slice_rings(&sc->ss[slice], 3377 rx_ring_entries, 3378 tx_ring_entries); 3379 if (err != 0) 3380 goto abort; 3381 } 3382 return 0; 3383 3384 abort: 3385 mxge_free_rings(sc); 3386 return err; 3387 3388 } 3389 3390 3391 static void 3392 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3393 { 3394 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3395 3396 if (bufsize < MCLBYTES) { 3397 /* easy, everything fits in a single buffer */ 3398 *big_buf_size = MCLBYTES; 3399 *cl_size = MCLBYTES; 3400 *nbufs = 1; 3401 return; 3402 } 3403 3404 if (bufsize < MJUMPAGESIZE) { 3405 /* still easy, everything still fits in a single buffer */ 3406 *big_buf_size = MJUMPAGESIZE; 3407 *cl_size = MJUMPAGESIZE; 3408 *nbufs = 1; 3409 return; 3410 } 3411 #if MXGE_VIRT_JUMBOS 3412 /* now we need to use virtually contiguous buffers */ 3413 *cl_size = MJUM9BYTES; 3414 *big_buf_size = 4096; 3415 *nbufs = mtu / 4096 + 1; 3416 /* needs to be a power of two, so round up */ 3417 if (*nbufs == 3) 3418 *nbufs = 4; 3419 #else 3420 *cl_size = MJUM9BYTES; 3421 *big_buf_size = MJUM9BYTES; 3422 *nbufs = 1; 3423 #endif 3424 } 3425 3426 static int 3427 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3428 { 3429 mxge_softc_t *sc; 3430 mxge_cmd_t cmd; 3431 bus_dmamap_t map; 3432 struct lro_entry *lro_entry; 3433 int err, i, slice; 3434 3435 3436 sc = ss->sc; 3437 slice = ss - sc->ss; 3438 3439 SLIST_INIT(&ss->lro_free); 3440 SLIST_INIT(&ss->lro_active); 3441 3442 for (i = 0; i < sc->lro_cnt; i++) { 3443 lro_entry = (struct lro_entry *) 3444 kmalloc(sizeof (*lro_entry), M_DEVBUF, 3445 M_NOWAIT | M_ZERO); 3446 if (lro_entry == NULL) { 3447 sc->lro_cnt = i; 3448 break; 3449 } 3450 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3451 } 3452 /* get the lanai pointers to the send and receive rings */ 3453 3454 err = 0; 3455 #ifndef IFNET_BUF_RING 3456 /* We currently only send from the first slice */ 3457 if (slice == 0) { 3458 #endif 3459 cmd.data0 = slice; 3460 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3461 ss->tx.lanai = 3462 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3463 ss->tx.send_go = (volatile uint32_t *) 3464 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3465 ss->tx.send_stop = (volatile uint32_t *) 3466 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3467 #ifndef IFNET_BUF_RING 3468 } 3469 #endif 3470 cmd.data0 = slice; 3471 err |= mxge_send_cmd(sc, 3472 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3473 ss->rx_small.lanai = 3474 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3475 cmd.data0 = slice; 3476 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3477 ss->rx_big.lanai = 3478 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3479 3480 if (err != 0) { 3481 device_printf(sc->dev, 3482 "failed to get ring sizes or locations\n"); 3483 return EIO; 3484 } 3485 3486 /* stock receive rings */ 3487 for (i = 0; i <= ss->rx_small.mask; i++) { 3488 map = ss->rx_small.info[i].map; 3489 err = mxge_get_buf_small(ss, map, i); 3490 if (err) { 3491 device_printf(sc->dev, "alloced %d/%d smalls\n", 3492 i, ss->rx_small.mask + 1); 3493 return ENOMEM; 3494 } 3495 } 3496 for (i = 0; i <= ss->rx_big.mask; i++) { 3497 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3498 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3499 } 3500 ss->rx_big.nbufs = nbufs; 3501 ss->rx_big.cl_size = cl_size; 3502 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3503 EVL_ENCAPLEN + MXGEFW_PAD; 3504 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3505 map = ss->rx_big.info[i].map; 3506 err = mxge_get_buf_big(ss, map, i); 3507 if (err) { 3508 device_printf(sc->dev, "alloced %d/%d bigs\n", 3509 i, ss->rx_big.mask + 1); 3510 return ENOMEM; 3511 } 3512 } 3513 return 0; 3514 } 3515 3516 static int 3517 mxge_open(mxge_softc_t *sc) 3518 { 3519 mxge_cmd_t cmd; 3520 int err, big_bytes, nbufs, slice, cl_size, i; 3521 bus_addr_t bus; 3522 volatile uint8_t *itable; 3523 struct mxge_slice_state *ss; 3524 3525 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3526 /* Copy the MAC address in case it was overridden */ 3527 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3528 3529 err = mxge_reset(sc, 1); 3530 if (err != 0) { 3531 device_printf(sc->dev, "failed to reset\n"); 3532 return EIO; 3533 } 3534 3535 if (sc->num_slices > 1) { 3536 /* setup the indirection table */ 3537 cmd.data0 = sc->num_slices; 3538 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3539 &cmd); 3540 3541 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3542 &cmd); 3543 if (err != 0) { 3544 device_printf(sc->dev, 3545 "failed to setup rss tables\n"); 3546 return err; 3547 } 3548 3549 /* just enable an identity mapping */ 3550 itable = sc->sram + cmd.data0; 3551 for (i = 0; i < sc->num_slices; i++) 3552 itable[i] = (uint8_t)i; 3553 3554 cmd.data0 = 1; 3555 cmd.data1 = mxge_rss_hash_type; 3556 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3557 if (err != 0) { 3558 device_printf(sc->dev, "failed to enable slices\n"); 3559 return err; 3560 } 3561 } 3562 3563 3564 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3565 3566 cmd.data0 = nbufs; 3567 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3568 &cmd); 3569 /* error is only meaningful if we're trying to set 3570 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3571 if (err && nbufs > 1) { 3572 device_printf(sc->dev, 3573 "Failed to set alway-use-n to %d\n", 3574 nbufs); 3575 return EIO; 3576 } 3577 /* Give the firmware the mtu and the big and small buffer 3578 sizes. The firmware wants the big buf size to be a power 3579 of two. Luckily, FreeBSD's clusters are powers of two */ 3580 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3581 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3582 cmd.data0 = MHLEN - MXGEFW_PAD; 3583 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3584 &cmd); 3585 cmd.data0 = big_bytes; 3586 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3587 3588 if (err != 0) { 3589 device_printf(sc->dev, "failed to setup params\n"); 3590 goto abort; 3591 } 3592 3593 /* Now give him the pointer to the stats block */ 3594 for (slice = 0; 3595 #ifdef IFNET_BUF_RING 3596 slice < sc->num_slices; 3597 #else 3598 slice < 1; 3599 #endif 3600 slice++) { 3601 ss = &sc->ss[slice]; 3602 cmd.data0 = 3603 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3604 cmd.data1 = 3605 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3606 cmd.data2 = sizeof(struct mcp_irq_data); 3607 cmd.data2 |= (slice << 16); 3608 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3609 } 3610 3611 if (err != 0) { 3612 bus = sc->ss->fw_stats_dma.bus_addr; 3613 bus += offsetof(struct mcp_irq_data, send_done_count); 3614 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3615 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3616 err = mxge_send_cmd(sc, 3617 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3618 &cmd); 3619 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3620 sc->fw_multicast_support = 0; 3621 } else { 3622 sc->fw_multicast_support = 1; 3623 } 3624 3625 if (err != 0) { 3626 device_printf(sc->dev, "failed to setup params\n"); 3627 goto abort; 3628 } 3629 3630 for (slice = 0; slice < sc->num_slices; slice++) { 3631 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3632 if (err != 0) { 3633 device_printf(sc->dev, "couldn't open slice %d\n", 3634 slice); 3635 goto abort; 3636 } 3637 } 3638 3639 /* Finally, start the firmware running */ 3640 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3641 if (err) { 3642 device_printf(sc->dev, "Couldn't bring up link\n"); 3643 goto abort; 3644 } 3645 #ifdef IFNET_BUF_RING 3646 for (slice = 0; slice < sc->num_slices; slice++) { 3647 ss = &sc->ss[slice]; 3648 ss->if_flags |= IFF_RUNNING; 3649 ss->if_flags &= ~IFF_OACTIVE; 3650 } 3651 #endif 3652 sc->ifp->if_flags |= IFF_RUNNING; 3653 sc->ifp->if_flags &= ~IFF_OACTIVE; 3654 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3655 3656 return 0; 3657 3658 3659 abort: 3660 mxge_free_mbufs(sc); 3661 3662 return err; 3663 } 3664 3665 static int 3666 mxge_close(mxge_softc_t *sc) 3667 { 3668 mxge_cmd_t cmd; 3669 int err, old_down_cnt; 3670 #ifdef IFNET_BUF_RING 3671 struct mxge_slice_state *ss; 3672 int slice; 3673 #endif 3674 3675 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3676 callout_stop(&sc->co_hdl); 3677 #ifdef IFNET_BUF_RING 3678 for (slice = 0; slice < sc->num_slices; slice++) { 3679 ss = &sc->ss[slice]; 3680 ss->if_flags &= ~IFF_RUNNING; 3681 } 3682 #endif 3683 sc->ifp->if_flags &= ~IFF_RUNNING; 3684 old_down_cnt = sc->down_cnt; 3685 wmb(); 3686 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3687 if (err) { 3688 device_printf(sc->dev, "Couldn't bring down link\n"); 3689 } 3690 if (old_down_cnt == sc->down_cnt) { 3691 /* wait for down irq */ 3692 DELAY(10 * sc->intr_coal_delay); 3693 } 3694 wmb(); 3695 if (old_down_cnt == sc->down_cnt) { 3696 device_printf(sc->dev, "never got down irq\n"); 3697 } 3698 3699 mxge_free_mbufs(sc); 3700 3701 return 0; 3702 } 3703 3704 static void 3705 mxge_setup_cfg_space(mxge_softc_t *sc) 3706 { 3707 device_t dev = sc->dev; 3708 int reg; 3709 uint16_t cmd, lnk, pectl; 3710 3711 /* find the PCIe link width and set max read request to 4KB*/ 3712 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3713 lnk = pci_read_config(dev, reg + 0x12, 2); 3714 sc->link_width = (lnk >> 4) & 0x3f; 3715 3716 pectl = pci_read_config(dev, reg + 0x8, 2); 3717 pectl = (pectl & ~0x7000) | (5 << 12); 3718 pci_write_config(dev, reg + 0x8, pectl, 2); 3719 } 3720 3721 /* Enable DMA and Memory space access */ 3722 pci_enable_busmaster(dev); 3723 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3724 cmd |= PCIM_CMD_MEMEN; 3725 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3726 } 3727 3728 static uint32_t 3729 mxge_read_reboot(mxge_softc_t *sc) 3730 { 3731 device_t dev = sc->dev; 3732 uint32_t vs; 3733 3734 /* find the vendor specific offset */ 3735 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3736 device_printf(sc->dev, 3737 "could not find vendor specific offset\n"); 3738 return (uint32_t)-1; 3739 } 3740 /* enable read32 mode */ 3741 pci_write_config(dev, vs + 0x10, 0x3, 1); 3742 /* tell NIC which register to read */ 3743 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3744 return (pci_read_config(dev, vs + 0x14, 4)); 3745 } 3746 3747 static int 3748 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3749 { 3750 struct pci_devinfo *dinfo; 3751 mxge_tx_ring_t *tx; 3752 int err; 3753 uint32_t reboot; 3754 uint16_t cmd; 3755 3756 err = ENXIO; 3757 3758 device_printf(sc->dev, "Watchdog reset!\n"); 3759 3760 /* 3761 * check to see if the NIC rebooted. If it did, then all of 3762 * PCI config space has been reset, and things like the 3763 * busmaster bit will be zero. If this is the case, then we 3764 * must restore PCI config space before the NIC can be used 3765 * again 3766 */ 3767 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3768 if (cmd == 0xffff) { 3769 /* 3770 * maybe the watchdog caught the NIC rebooting; wait 3771 * up to 100ms for it to finish. If it does not come 3772 * back, then give up 3773 */ 3774 DELAY(1000*100); 3775 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3776 if (cmd == 0xffff) { 3777 device_printf(sc->dev, "NIC disappeared!\n"); 3778 return (err); 3779 } 3780 } 3781 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3782 /* print the reboot status */ 3783 reboot = mxge_read_reboot(sc); 3784 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3785 reboot); 3786 /* restore PCI configuration space */ 3787 dinfo = device_get_ivars(sc->dev); 3788 pci_cfg_restore(sc->dev, dinfo); 3789 3790 /* and redo any changes we made to our config space */ 3791 mxge_setup_cfg_space(sc); 3792 3793 if (sc->ifp->if_flags & IFF_RUNNING) { 3794 mxge_close(sc); 3795 err = mxge_open(sc); 3796 } 3797 } else { 3798 tx = &sc->ss[slice].tx; 3799 device_printf(sc->dev, 3800 "NIC did not reboot, slice %d ring state:\n", 3801 slice); 3802 device_printf(sc->dev, 3803 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3804 tx->req, tx->done, tx->queue_active); 3805 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3806 tx->activate, tx->deactivate); 3807 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3808 tx->pkt_done, 3809 be32toh(sc->ss->fw_stats->send_done_count)); 3810 device_printf(sc->dev, "not resetting\n"); 3811 } 3812 return (err); 3813 } 3814 3815 static int 3816 mxge_watchdog(mxge_softc_t *sc) 3817 { 3818 mxge_tx_ring_t *tx; 3819 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3820 int i, err = 0; 3821 3822 /* see if we have outstanding transmits, which 3823 have been pending for more than mxge_ticks */ 3824 for (i = 0; 3825 #ifdef IFNET_BUF_RING 3826 (i < sc->num_slices) && (err == 0); 3827 #else 3828 (i < 1) && (err == 0); 3829 #endif 3830 i++) { 3831 tx = &sc->ss[i].tx; 3832 if (tx->req != tx->done && 3833 tx->watchdog_req != tx->watchdog_done && 3834 tx->done == tx->watchdog_done) { 3835 /* check for pause blocking before resetting */ 3836 if (tx->watchdog_rx_pause == rx_pause) 3837 err = mxge_watchdog_reset(sc, i); 3838 else 3839 device_printf(sc->dev, "Flow control blocking " 3840 "xmits, check link partner\n"); 3841 } 3842 3843 tx->watchdog_req = tx->req; 3844 tx->watchdog_done = tx->done; 3845 tx->watchdog_rx_pause = rx_pause; 3846 } 3847 3848 if (sc->need_media_probe) 3849 mxge_media_probe(sc); 3850 return (err); 3851 } 3852 3853 static void 3854 mxge_update_stats(mxge_softc_t *sc) 3855 { 3856 struct mxge_slice_state *ss; 3857 u_long ipackets = 0; 3858 u_long opackets = 0; 3859 #ifdef IFNET_BUF_RING 3860 u_long obytes = 0; 3861 u_long omcasts = 0; 3862 u_long odrops = 0; 3863 #endif 3864 u_long oerrors = 0; 3865 int slice; 3866 3867 for (slice = 0; slice < sc->num_slices; slice++) { 3868 ss = &sc->ss[slice]; 3869 ipackets += ss->ipackets; 3870 opackets += ss->opackets; 3871 #ifdef IFNET_BUF_RING 3872 obytes += ss->obytes; 3873 omcasts += ss->omcasts; 3874 odrops += ss->tx.br->br_drops; 3875 #endif 3876 oerrors += ss->oerrors; 3877 } 3878 sc->ifp->if_ipackets = ipackets; 3879 sc->ifp->if_opackets = opackets; 3880 #ifdef IFNET_BUF_RING 3881 sc->ifp->if_obytes = obytes; 3882 sc->ifp->if_omcasts = omcasts; 3883 sc->ifp->if_snd.ifq_drops = odrops; 3884 #endif 3885 sc->ifp->if_oerrors = oerrors; 3886 } 3887 3888 static void 3889 mxge_tick(void *arg) 3890 { 3891 mxge_softc_t *sc = arg; 3892 int err = 0; 3893 3894 lwkt_serialize_enter(sc->ifp->if_serializer); 3895 /* aggregate stats from different slices */ 3896 mxge_update_stats(sc); 3897 if (!sc->watchdog_countdown) { 3898 err = mxge_watchdog(sc); 3899 sc->watchdog_countdown = 4; 3900 } 3901 sc->watchdog_countdown--; 3902 if (err == 0) 3903 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3904 lwkt_serialize_exit(sc->ifp->if_serializer); 3905 } 3906 3907 static int 3908 mxge_media_change(struct ifnet *ifp) 3909 { 3910 return EINVAL; 3911 } 3912 3913 static int 3914 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3915 { 3916 struct ifnet *ifp = sc->ifp; 3917 int real_mtu, old_mtu; 3918 int err = 0; 3919 3920 if (ifp->if_serializer) 3921 ASSERT_SERIALIZED(ifp->if_serializer); 3922 3923 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3924 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3925 return EINVAL; 3926 old_mtu = ifp->if_mtu; 3927 ifp->if_mtu = mtu; 3928 if (ifp->if_flags & IFF_RUNNING) { 3929 mxge_close(sc); 3930 err = mxge_open(sc); 3931 if (err != 0) { 3932 ifp->if_mtu = old_mtu; 3933 mxge_close(sc); 3934 (void) mxge_open(sc); 3935 } 3936 } 3937 return err; 3938 } 3939 3940 static void 3941 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3942 { 3943 mxge_softc_t *sc = ifp->if_softc; 3944 3945 3946 if (sc == NULL) 3947 return; 3948 ifmr->ifm_status = IFM_AVALID; 3949 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3950 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3951 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3952 } 3953 3954 static int 3955 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 3956 { 3957 mxge_softc_t *sc = ifp->if_softc; 3958 struct ifreq *ifr = (struct ifreq *)data; 3959 int err, mask; 3960 3961 (void)cr; 3962 err = 0; 3963 ASSERT_SERIALIZED(ifp->if_serializer); 3964 switch (command) { 3965 case SIOCSIFADDR: 3966 case SIOCGIFADDR: 3967 err = ether_ioctl(ifp, command, data); 3968 break; 3969 3970 case SIOCSIFMTU: 3971 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3972 break; 3973 3974 case SIOCSIFFLAGS: 3975 if (sc->dying) { 3976 return EINVAL; 3977 } 3978 if (ifp->if_flags & IFF_UP) { 3979 if (!(ifp->if_flags & IFF_RUNNING)) { 3980 err = mxge_open(sc); 3981 } else { 3982 /* take care of promis can allmulti 3983 flag chages */ 3984 mxge_change_promisc(sc, 3985 ifp->if_flags & IFF_PROMISC); 3986 mxge_set_multicast_list(sc); 3987 } 3988 } else { 3989 if (ifp->if_flags & IFF_RUNNING) { 3990 mxge_close(sc); 3991 } 3992 } 3993 break; 3994 3995 case SIOCADDMULTI: 3996 case SIOCDELMULTI: 3997 mxge_set_multicast_list(sc); 3998 break; 3999 4000 case SIOCSIFCAP: 4001 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4002 if (mask & IFCAP_TXCSUM) { 4003 if (IFCAP_TXCSUM & ifp->if_capenable) { 4004 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4005 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4006 | CSUM_TSO); 4007 } else { 4008 ifp->if_capenable |= IFCAP_TXCSUM; 4009 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4010 } 4011 } else if (mask & IFCAP_RXCSUM) { 4012 if (IFCAP_RXCSUM & ifp->if_capenable) { 4013 ifp->if_capenable &= ~IFCAP_RXCSUM; 4014 sc->csum_flag = 0; 4015 } else { 4016 ifp->if_capenable |= IFCAP_RXCSUM; 4017 sc->csum_flag = 1; 4018 } 4019 } 4020 if (mask & IFCAP_TSO4) { 4021 if (IFCAP_TSO4 & ifp->if_capenable) { 4022 ifp->if_capenable &= ~IFCAP_TSO4; 4023 ifp->if_hwassist &= ~CSUM_TSO; 4024 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4025 ifp->if_capenable |= IFCAP_TSO4; 4026 ifp->if_hwassist |= CSUM_TSO; 4027 } else { 4028 kprintf("mxge requires tx checksum offload" 4029 " be enabled to use TSO\n"); 4030 err = EINVAL; 4031 } 4032 } 4033 if (mask & IFCAP_LRO) { 4034 if (IFCAP_LRO & ifp->if_capenable) 4035 err = mxge_change_lro_locked(sc, 0); 4036 else 4037 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4038 } 4039 if (mask & IFCAP_VLAN_HWTAGGING) 4040 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4041 VLAN_CAPABILITIES(ifp); 4042 4043 break; 4044 4045 case SIOCGIFMEDIA: 4046 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4047 &sc->media, command); 4048 break; 4049 4050 default: 4051 err = ENOTTY; 4052 } 4053 return err; 4054 } 4055 4056 static void 4057 mxge_fetch_tunables(mxge_softc_t *sc) 4058 { 4059 4060 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4061 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4062 &mxge_flow_control); 4063 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4064 &mxge_intr_coal_delay); 4065 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4066 &mxge_nvidia_ecrc_enable); 4067 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4068 &mxge_force_firmware); 4069 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4070 &mxge_deassert_wait); 4071 TUNABLE_INT_FETCH("hw.mxge.verbose", 4072 &mxge_verbose); 4073 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4074 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4075 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4076 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4077 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4078 if (sc->lro_cnt != 0) 4079 mxge_lro_cnt = sc->lro_cnt; 4080 4081 if (bootverbose) 4082 mxge_verbose = 1; 4083 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4084 mxge_intr_coal_delay = 30; 4085 if (mxge_ticks == 0) 4086 mxge_ticks = hz / 2; 4087 sc->pause = mxge_flow_control; 4088 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4089 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4090 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4091 } 4092 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4093 mxge_initial_mtu < ETHER_MIN_LEN) 4094 mxge_initial_mtu = ETHERMTU_JUMBO; 4095 } 4096 4097 4098 static void 4099 mxge_free_slices(mxge_softc_t *sc) 4100 { 4101 struct mxge_slice_state *ss; 4102 int i; 4103 4104 4105 if (sc->ss == NULL) 4106 return; 4107 4108 for (i = 0; i < sc->num_slices; i++) { 4109 ss = &sc->ss[i]; 4110 if (ss->fw_stats != NULL) { 4111 mxge_dma_free(&ss->fw_stats_dma); 4112 ss->fw_stats = NULL; 4113 #ifdef IFNET_BUF_RING 4114 if (ss->tx.br != NULL) { 4115 drbr_free(ss->tx.br, M_DEVBUF); 4116 ss->tx.br = NULL; 4117 } 4118 #endif 4119 } 4120 if (ss->rx_done.entry != NULL) { 4121 mxge_dma_free(&ss->rx_done.dma); 4122 ss->rx_done.entry = NULL; 4123 } 4124 } 4125 kfree(sc->ss, M_DEVBUF); 4126 sc->ss = NULL; 4127 } 4128 4129 static int 4130 mxge_alloc_slices(mxge_softc_t *sc) 4131 { 4132 mxge_cmd_t cmd; 4133 struct mxge_slice_state *ss; 4134 size_t bytes; 4135 int err, i, max_intr_slots; 4136 4137 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4138 if (err != 0) { 4139 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4140 return err; 4141 } 4142 sc->rx_ring_size = cmd.data0; 4143 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4144 4145 bytes = sizeof (*sc->ss) * sc->num_slices; 4146 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4147 if (sc->ss == NULL) 4148 return (ENOMEM); 4149 for (i = 0; i < sc->num_slices; i++) { 4150 ss = &sc->ss[i]; 4151 4152 ss->sc = sc; 4153 4154 /* allocate per-slice rx interrupt queues */ 4155 4156 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4157 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4158 if (err != 0) 4159 goto abort; 4160 ss->rx_done.entry = ss->rx_done.dma.addr; 4161 bzero(ss->rx_done.entry, bytes); 4162 4163 /* 4164 * allocate the per-slice firmware stats; stats 4165 * (including tx) are used used only on the first 4166 * slice for now 4167 */ 4168 #ifndef IFNET_BUF_RING 4169 if (i > 0) 4170 continue; 4171 #endif 4172 4173 bytes = sizeof (*ss->fw_stats); 4174 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4175 sizeof (*ss->fw_stats), 64); 4176 if (err != 0) 4177 goto abort; 4178 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4179 #ifdef IFNET_BUF_RING 4180 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4181 &ss->tx.lock); 4182 #endif 4183 } 4184 4185 return (0); 4186 4187 abort: 4188 mxge_free_slices(sc); 4189 return (ENOMEM); 4190 } 4191 4192 static void 4193 mxge_slice_probe(mxge_softc_t *sc) 4194 { 4195 mxge_cmd_t cmd; 4196 char *old_fw; 4197 int msix_cnt, status, max_intr_slots; 4198 4199 sc->num_slices = 1; 4200 /* 4201 * don't enable multiple slices if they are not enabled, 4202 * or if this is not an SMP system 4203 */ 4204 4205 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2) 4206 return; 4207 4208 /* see how many MSI-X interrupts are available */ 4209 msix_cnt = pci_msix_count(sc->dev); 4210 if (msix_cnt < 2) 4211 return; 4212 4213 /* now load the slice aware firmware see what it supports */ 4214 old_fw = sc->fw_name; 4215 if (old_fw == mxge_fw_aligned) 4216 sc->fw_name = mxge_fw_rss_aligned; 4217 else 4218 sc->fw_name = mxge_fw_rss_unaligned; 4219 status = mxge_load_firmware(sc, 0); 4220 if (status != 0) { 4221 device_printf(sc->dev, "Falling back to a single slice\n"); 4222 return; 4223 } 4224 4225 /* try to send a reset command to the card to see if it 4226 is alive */ 4227 memset(&cmd, 0, sizeof (cmd)); 4228 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4229 if (status != 0) { 4230 device_printf(sc->dev, "failed reset\n"); 4231 goto abort_with_fw; 4232 } 4233 4234 /* get rx ring size */ 4235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4236 if (status != 0) { 4237 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4238 goto abort_with_fw; 4239 } 4240 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4241 4242 /* tell it the size of the interrupt queues */ 4243 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4244 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4245 if (status != 0) { 4246 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4247 goto abort_with_fw; 4248 } 4249 4250 /* ask the maximum number of slices it supports */ 4251 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4252 if (status != 0) { 4253 device_printf(sc->dev, 4254 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4255 goto abort_with_fw; 4256 } 4257 sc->num_slices = cmd.data0; 4258 if (sc->num_slices > msix_cnt) 4259 sc->num_slices = msix_cnt; 4260 4261 if (mxge_max_slices == -1) { 4262 /* cap to number of CPUs in system */ 4263 if (sc->num_slices > ncpus) 4264 sc->num_slices = ncpus; 4265 } else { 4266 if (sc->num_slices > mxge_max_slices) 4267 sc->num_slices = mxge_max_slices; 4268 } 4269 /* make sure it is a power of two */ 4270 while (sc->num_slices & (sc->num_slices - 1)) 4271 sc->num_slices--; 4272 4273 if (mxge_verbose) 4274 device_printf(sc->dev, "using %d slices\n", 4275 sc->num_slices); 4276 4277 return; 4278 4279 abort_with_fw: 4280 sc->fw_name = old_fw; 4281 (void) mxge_load_firmware(sc, 0); 4282 } 4283 4284 #if 0 4285 static int 4286 mxge_add_msix_irqs(mxge_softc_t *sc) 4287 { 4288 size_t bytes; 4289 int count, err, i, rid; 4290 4291 rid = PCIR_BAR(2); 4292 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4293 &rid, RF_ACTIVE); 4294 4295 if (sc->msix_table_res == NULL) { 4296 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4297 return ENXIO; 4298 } 4299 4300 count = sc->num_slices; 4301 err = pci_alloc_msix(sc->dev, &count); 4302 if (err != 0) { 4303 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4304 "err = %d \n", sc->num_slices, err); 4305 goto abort_with_msix_table; 4306 } 4307 if (count < sc->num_slices) { 4308 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4309 count, sc->num_slices); 4310 device_printf(sc->dev, 4311 "Try setting hw.mxge.max_slices to %d\n", 4312 count); 4313 err = ENOSPC; 4314 goto abort_with_msix; 4315 } 4316 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4317 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4318 if (sc->msix_irq_res == NULL) { 4319 err = ENOMEM; 4320 goto abort_with_msix; 4321 } 4322 4323 for (i = 0; i < sc->num_slices; i++) { 4324 rid = i + 1; 4325 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4326 SYS_RES_IRQ, 4327 &rid, RF_ACTIVE); 4328 if (sc->msix_irq_res[i] == NULL) { 4329 device_printf(sc->dev, "couldn't allocate IRQ res" 4330 " for message %d\n", i); 4331 err = ENXIO; 4332 goto abort_with_res; 4333 } 4334 } 4335 4336 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4337 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4338 4339 for (i = 0; i < sc->num_slices; i++) { 4340 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4341 INTR_MPSAFE, 4342 mxge_intr, &sc->ss[i], &sc->msix_ih[i], 4343 sc->ifp->if_serializer); 4344 if (err != 0) { 4345 device_printf(sc->dev, "couldn't setup intr for " 4346 "message %d\n", i); 4347 goto abort_with_intr; 4348 } 4349 } 4350 4351 if (mxge_verbose) { 4352 device_printf(sc->dev, "using %d msix IRQs:", 4353 sc->num_slices); 4354 for (i = 0; i < sc->num_slices; i++) 4355 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4356 kprintf("\n"); 4357 } 4358 return (0); 4359 4360 abort_with_intr: 4361 for (i = 0; i < sc->num_slices; i++) { 4362 if (sc->msix_ih[i] != NULL) { 4363 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4364 sc->msix_ih[i]); 4365 sc->msix_ih[i] = NULL; 4366 } 4367 } 4368 kfree(sc->msix_ih, M_DEVBUF); 4369 4370 4371 abort_with_res: 4372 for (i = 0; i < sc->num_slices; i++) { 4373 rid = i + 1; 4374 if (sc->msix_irq_res[i] != NULL) 4375 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4376 sc->msix_irq_res[i]); 4377 sc->msix_irq_res[i] = NULL; 4378 } 4379 kfree(sc->msix_irq_res, M_DEVBUF); 4380 4381 4382 abort_with_msix: 4383 pci_release_msi(sc->dev); 4384 4385 abort_with_msix_table: 4386 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4387 sc->msix_table_res); 4388 4389 return err; 4390 } 4391 #endif 4392 4393 static int 4394 mxge_add_single_irq(mxge_softc_t *sc) 4395 { 4396 int count, err, rid; 4397 4398 #ifdef OLD_MSI 4399 count = pci_msi_count(sc->dev); 4400 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4401 rid = 1; 4402 } else { 4403 rid = 0; 4404 sc->legacy_irq = 1; 4405 } 4406 #else 4407 count = 0; 4408 rid = 0; 4409 sc->legacy_irq = 1; 4410 #endif 4411 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4412 1, RF_SHAREABLE | RF_ACTIVE); 4413 if (sc->irq_res == NULL) { 4414 device_printf(sc->dev, "could not alloc interrupt\n"); 4415 return ENXIO; 4416 } 4417 if (mxge_verbose) 4418 device_printf(sc->dev, "using %s irq %ld\n", 4419 sc->legacy_irq ? "INTx" : "MSI", 4420 rman_get_start(sc->irq_res)); 4421 err = bus_setup_intr(sc->dev, sc->irq_res, 4422 INTR_MPSAFE, 4423 mxge_intr, &sc->ss[0], &sc->ih, 4424 sc->ifp->if_serializer); 4425 if (err != 0) { 4426 bus_release_resource(sc->dev, SYS_RES_IRQ, 4427 sc->legacy_irq ? 0 : 1, sc->irq_res); 4428 if (!sc->legacy_irq) 4429 pci_release_msi(sc->dev); 4430 } 4431 return err; 4432 } 4433 4434 #if 0 4435 static void 4436 mxge_rem_msix_irqs(mxge_softc_t *sc) 4437 { 4438 int i, rid; 4439 4440 for (i = 0; i < sc->num_slices; i++) { 4441 if (sc->msix_ih[i] != NULL) { 4442 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4443 sc->msix_ih[i]); 4444 sc->msix_ih[i] = NULL; 4445 } 4446 } 4447 kfree(sc->msix_ih, M_DEVBUF); 4448 4449 for (i = 0; i < sc->num_slices; i++) { 4450 rid = i + 1; 4451 if (sc->msix_irq_res[i] != NULL) 4452 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4453 sc->msix_irq_res[i]); 4454 sc->msix_irq_res[i] = NULL; 4455 } 4456 kfree(sc->msix_irq_res, M_DEVBUF); 4457 4458 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4459 sc->msix_table_res); 4460 4461 pci_release_msi(sc->dev); 4462 return; 4463 } 4464 #endif 4465 4466 static void 4467 mxge_rem_single_irq(mxge_softc_t *sc) 4468 { 4469 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4470 bus_release_resource(sc->dev, SYS_RES_IRQ, 4471 sc->legacy_irq ? 0 : 1, sc->irq_res); 4472 if (!sc->legacy_irq) 4473 pci_release_msi(sc->dev); 4474 } 4475 4476 static void 4477 mxge_rem_irq(mxge_softc_t *sc) 4478 { 4479 #if 0 4480 if (sc->num_slices > 1) 4481 mxge_rem_msix_irqs(sc); 4482 else 4483 #endif 4484 mxge_rem_single_irq(sc); 4485 } 4486 4487 static int 4488 mxge_add_irq(mxge_softc_t *sc) 4489 { 4490 #if 0 4491 int err; 4492 4493 if (sc->num_slices > 1) 4494 err = mxge_add_msix_irqs(sc); 4495 else 4496 err = mxge_add_single_irq(sc); 4497 4498 if (0 && err == 0 && sc->num_slices > 1) { 4499 mxge_rem_msix_irqs(sc); 4500 err = mxge_add_msix_irqs(sc); 4501 } 4502 return err; 4503 #else 4504 return mxge_add_single_irq(sc); 4505 #endif 4506 } 4507 4508 4509 static int 4510 mxge_attach(device_t dev) 4511 { 4512 mxge_softc_t *sc = device_get_softc(dev); 4513 struct ifnet *ifp = &sc->arpcom.ac_if; 4514 int err, rid; 4515 4516 /* 4517 * avoid rewriting half the lines in this file to use 4518 * &sc->arpcom.ac_if instead 4519 */ 4520 sc->ifp = ifp; 4521 sc->dev = dev; 4522 mxge_fetch_tunables(sc); 4523 4524 err = bus_dma_tag_create(NULL, /* parent */ 4525 1, /* alignment */ 4526 0, /* boundary */ 4527 BUS_SPACE_MAXADDR, /* low */ 4528 BUS_SPACE_MAXADDR, /* high */ 4529 NULL, NULL, /* filter */ 4530 65536 + 256, /* maxsize */ 4531 MXGE_MAX_SEND_DESC, /* num segs */ 4532 65536, /* maxsegsize */ 4533 0, /* flags */ 4534 &sc->parent_dmat); /* tag */ 4535 4536 if (err != 0) { 4537 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4538 err); 4539 goto abort_with_nothing; 4540 } 4541 4542 sc->ifp = ifp; 4543 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4544 4545 callout_init_mp(&sc->co_hdl); 4546 4547 mxge_setup_cfg_space(sc); 4548 4549 /* Map the board into the kernel */ 4550 rid = PCIR_BARS; 4551 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4552 ~0, 1, RF_ACTIVE); 4553 if (sc->mem_res == NULL) { 4554 device_printf(dev, "could not map memory\n"); 4555 err = ENXIO; 4556 goto abort_with_nothing; 4557 } 4558 sc->sram = rman_get_virtual(sc->mem_res); 4559 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4560 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4561 device_printf(dev, "impossible memory region size %ld\n", 4562 rman_get_size(sc->mem_res)); 4563 err = ENXIO; 4564 goto abort_with_mem_res; 4565 } 4566 4567 /* make NULL terminated copy of the EEPROM strings section of 4568 lanai SRAM */ 4569 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4570 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4571 rman_get_bushandle(sc->mem_res), 4572 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4573 sc->eeprom_strings, 4574 MXGE_EEPROM_STRINGS_SIZE - 2); 4575 err = mxge_parse_strings(sc); 4576 if (err != 0) 4577 goto abort_with_mem_res; 4578 4579 /* Enable write combining for efficient use of PCIe bus */ 4580 mxge_enable_wc(sc); 4581 4582 /* Allocate the out of band dma memory */ 4583 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4584 sizeof (mxge_cmd_t), 64); 4585 if (err != 0) 4586 goto abort_with_mem_res; 4587 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4588 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4589 if (err != 0) 4590 goto abort_with_cmd_dma; 4591 4592 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4593 if (err != 0) 4594 goto abort_with_zeropad_dma; 4595 4596 /* select & load the firmware */ 4597 err = mxge_select_firmware(sc); 4598 if (err != 0) 4599 goto abort_with_dmabench; 4600 sc->intr_coal_delay = mxge_intr_coal_delay; 4601 4602 mxge_slice_probe(sc); 4603 err = mxge_alloc_slices(sc); 4604 if (err != 0) 4605 goto abort_with_dmabench; 4606 4607 err = mxge_reset(sc, 0); 4608 if (err != 0) 4609 goto abort_with_slices; 4610 4611 err = mxge_alloc_rings(sc); 4612 if (err != 0) { 4613 device_printf(sc->dev, "failed to allocate rings\n"); 4614 goto abort_with_dmabench; 4615 } 4616 4617 ifp->if_baudrate = IF_Gbps(10UL); 4618 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4619 IFCAP_VLAN_MTU; 4620 #ifdef INET 4621 ifp->if_capabilities |= IFCAP_LRO; 4622 #endif 4623 4624 #ifdef MXGE_NEW_VLAN_API 4625 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4626 #endif 4627 4628 sc->max_mtu = mxge_max_mtu(sc); 4629 if (sc->max_mtu >= 9000) 4630 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4631 else 4632 device_printf(dev, "MTU limited to %d. Install " 4633 "latest firmware for 9000 byte jumbo support\n", 4634 sc->max_mtu - ETHER_HDR_LEN); 4635 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4636 ifp->if_capenable = ifp->if_capabilities; 4637 if (sc->lro_cnt == 0) 4638 ifp->if_capenable &= ~IFCAP_LRO; 4639 sc->csum_flag = 1; 4640 ifp->if_init = mxge_init; 4641 ifp->if_softc = sc; 4642 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4643 ifp->if_ioctl = mxge_ioctl; 4644 ifp->if_start = mxge_start; 4645 /* Initialise the ifmedia structure */ 4646 ifmedia_init(&sc->media, 0, mxge_media_change, 4647 mxge_media_status); 4648 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4649 mxge_media_probe(sc); 4650 sc->dying = 0; 4651 ether_ifattach(ifp, sc->mac_addr, NULL); 4652 /* ether_ifattach sets mtu to ETHERMTU */ 4653 if (mxge_initial_mtu != ETHERMTU) { 4654 lwkt_serialize_enter(ifp->if_serializer); 4655 mxge_change_mtu(sc, mxge_initial_mtu); 4656 lwkt_serialize_exit(ifp->if_serializer); 4657 } 4658 /* must come after ether_ifattach() */ 4659 err = mxge_add_irq(sc); 4660 if (err != 0) { 4661 device_printf(sc->dev, "failed to add irq\n"); 4662 goto abort_with_rings; 4663 } 4664 4665 mxge_add_sysctls(sc); 4666 #ifdef IFNET_BUF_RING 4667 ifp->if_transmit = mxge_transmit; 4668 ifp->if_qflush = mxge_qflush; 4669 #endif 4670 return 0; 4671 4672 abort_with_rings: 4673 mxge_free_rings(sc); 4674 abort_with_slices: 4675 mxge_free_slices(sc); 4676 abort_with_dmabench: 4677 mxge_dma_free(&sc->dmabench_dma); 4678 abort_with_zeropad_dma: 4679 mxge_dma_free(&sc->zeropad_dma); 4680 abort_with_cmd_dma: 4681 mxge_dma_free(&sc->cmd_dma); 4682 abort_with_mem_res: 4683 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4684 pci_disable_busmaster(dev); 4685 bus_dma_tag_destroy(sc->parent_dmat); 4686 abort_with_nothing: 4687 return err; 4688 } 4689 4690 static int 4691 mxge_detach(device_t dev) 4692 { 4693 mxge_softc_t *sc = device_get_softc(dev); 4694 4695 lwkt_serialize_enter(sc->ifp->if_serializer); 4696 sc->dying = 1; 4697 if (sc->ifp->if_flags & IFF_RUNNING) 4698 mxge_close(sc); 4699 /* 4700 * XXX: race: the callout callback could be spinning on 4701 * the serializer and run anyway 4702 */ 4703 callout_stop(&sc->co_hdl); 4704 lwkt_serialize_exit(sc->ifp->if_serializer); 4705 4706 ether_ifdetach(sc->ifp); 4707 ifmedia_removeall(&sc->media); 4708 mxge_dummy_rdma(sc, 0); 4709 mxge_rem_sysctls(sc); 4710 mxge_rem_irq(sc); 4711 mxge_free_rings(sc); 4712 mxge_free_slices(sc); 4713 mxge_dma_free(&sc->dmabench_dma); 4714 mxge_dma_free(&sc->zeropad_dma); 4715 mxge_dma_free(&sc->cmd_dma); 4716 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4717 pci_disable_busmaster(dev); 4718 bus_dma_tag_destroy(sc->parent_dmat); 4719 return 0; 4720 } 4721 4722 static int 4723 mxge_shutdown(device_t dev) 4724 { 4725 return 0; 4726 } 4727 4728 /* 4729 This file uses Myri10GE driver indentation. 4730 4731 Local Variables: 4732 c-file-style:"linux" 4733 tab-width:8 4734 End: 4735 */ 4736