1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/in_cksum.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kernel.h> 43 #include <sys/module.h> 44 #include <sys/serialize.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 48 /* count xmits ourselves, rather than via drbr */ 49 #define NO_SLOW_STATS 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ifq_var.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 57 #include <net/bpf.h> 58 59 #include <net/if_types.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/zlib.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/tcp.h> 67 68 #include <sys/bus.h> 69 #include <sys/rman.h> 70 71 #include <bus/pci/pcireg.h> 72 #include <bus/pci/pcivar.h> 73 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 74 75 #include <vm/vm.h> /* for pmap_mapdev() */ 76 #include <vm/pmap.h> 77 78 #if defined(__i386) || defined(__amd64) 79 #include <machine/specialreg.h> 80 #endif 81 82 #include <dev/netif/mxge/mxge_mcp.h> 83 #include <dev/netif/mxge/mcp_gen_header.h> 84 /*#define MXGE_FAKE_IFP*/ 85 #include <dev/netif/mxge/if_mxge_var.h> 86 #ifdef IFNET_BUF_RING 87 #include <sys/buf_ring.h> 88 #endif 89 90 #include "opt_inet.h" 91 92 /* tunable params */ 93 static int mxge_nvidia_ecrc_enable = 1; 94 static int mxge_force_firmware = 0; 95 static int mxge_intr_coal_delay = 30; 96 static int mxge_deassert_wait = 1; 97 static int mxge_flow_control = 1; 98 static int mxge_verbose = 0; 99 static int mxge_lro_cnt = 8; 100 static int mxge_ticks; 101 static int mxge_max_slices = 1; 102 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 103 static int mxge_always_promisc = 0; 104 /* XXX: not yet */ 105 /* static int mxge_initial_mtu = ETHERMTU_JUMBO; */ 106 static int mxge_initial_mtu = ETHERMTU; 107 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 108 static char *mxge_fw_aligned = "mxge_eth_z8e"; 109 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 110 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 111 112 static int mxge_probe(device_t dev); 113 static int mxge_attach(device_t dev); 114 static int mxge_detach(device_t dev); 115 static int mxge_shutdown(device_t dev); 116 static void mxge_intr(void *arg); 117 118 static device_method_t mxge_methods[] = 119 { 120 /* Device interface */ 121 DEVMETHOD(device_probe, mxge_probe), 122 DEVMETHOD(device_attach, mxge_attach), 123 DEVMETHOD(device_detach, mxge_detach), 124 DEVMETHOD(device_shutdown, mxge_shutdown), 125 {0, 0} 126 }; 127 128 static driver_t mxge_driver = 129 { 130 "mxge", 131 mxge_methods, 132 sizeof(mxge_softc_t), 133 }; 134 135 static devclass_t mxge_devclass; 136 137 /* Declare ourselves to be a child of the PCI bus.*/ 138 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 139 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 140 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 141 142 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 143 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 144 static int mxge_close(mxge_softc_t *sc); 145 static int mxge_open(mxge_softc_t *sc); 146 static void mxge_tick(void *arg); 147 148 /* XXX: we don't have Large Receive Offload support yet */ 149 inline int 150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum) 151 { 152 (void)ss; 153 (void)m_head; 154 (void)csum; 155 return 1; 156 } 157 158 inline void 159 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro) 160 { 161 (void)ss; 162 (void)lro; 163 } 164 165 static int 166 mxge_probe(device_t dev) 167 { 168 int rev; 169 170 171 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 172 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 173 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 174 rev = pci_get_revid(dev); 175 switch (rev) { 176 case MXGE_PCI_REV_Z8E: 177 device_set_desc(dev, "Myri10G-PCIE-8A"); 178 break; 179 case MXGE_PCI_REV_Z8ES: 180 device_set_desc(dev, "Myri10G-PCIE-8B"); 181 break; 182 default: 183 device_set_desc(dev, "Myri10G-PCIE-8??"); 184 device_printf(dev, "Unrecognized rev %d NIC\n", 185 rev); 186 break; 187 } 188 return 0; 189 } 190 return ENXIO; 191 } 192 193 static void 194 mxge_enable_wc(mxge_softc_t *sc) 195 { 196 #if 0 197 #if defined(__i386) || defined(__amd64) 198 vm_offset_t len; 199 int err; 200 201 sc->wc = 1; 202 len = rman_get_size(sc->mem_res); 203 err = pmap_change_attr((vm_offset_t) sc->sram, 204 len, PAT_WRITE_COMBINING); 205 if (err != 0) { 206 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 207 err); 208 sc->wc = 0; 209 } 210 #endif 211 #else 212 sc->wc = 0; /* TBD: PAT support */ 213 #endif 214 } 215 216 217 /* callback to get our DMA address */ 218 static void 219 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 220 int error) 221 { 222 if (error == 0) { 223 *(bus_addr_t *) arg = segs->ds_addr; 224 } 225 } 226 227 static int 228 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 229 bus_size_t alignment) 230 { 231 int err; 232 device_t dev = sc->dev; 233 bus_size_t boundary, maxsegsize; 234 235 if (bytes > 4096 && alignment == 4096) { 236 boundary = 0; 237 maxsegsize = bytes; 238 } else { 239 boundary = 4096; 240 maxsegsize = 4096; 241 } 242 243 /* allocate DMAable memory tags */ 244 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 245 alignment, /* alignment */ 246 boundary, /* boundary */ 247 BUS_SPACE_MAXADDR, /* low */ 248 BUS_SPACE_MAXADDR, /* high */ 249 NULL, NULL, /* filter */ 250 bytes, /* maxsize */ 251 1, /* num segs */ 252 maxsegsize, /* maxsegsize */ 253 BUS_DMA_COHERENT, /* flags */ 254 &dma->dmat); /* tag */ 255 if (err != 0) { 256 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 257 return err; 258 } 259 260 /* allocate DMAable memory & map */ 261 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 262 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 263 | BUS_DMA_ZERO), &dma->map); 264 if (err != 0) { 265 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 266 goto abort_with_dmat; 267 } 268 269 /* load the memory */ 270 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 271 mxge_dmamap_callback, 272 (void *)&dma->bus_addr, 0); 273 if (err != 0) { 274 device_printf(dev, "couldn't load map (err = %d)\n", err); 275 goto abort_with_mem; 276 } 277 return 0; 278 279 abort_with_mem: 280 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 281 abort_with_dmat: 282 (void)bus_dma_tag_destroy(dma->dmat); 283 return err; 284 } 285 286 287 static void 288 mxge_dma_free(mxge_dma_t *dma) 289 { 290 bus_dmamap_unload(dma->dmat, dma->map); 291 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 292 (void)bus_dma_tag_destroy(dma->dmat); 293 } 294 295 /* 296 * The eeprom strings on the lanaiX have the format 297 * SN=x\0 298 * MAC=x:x:x:x:x:x\0 299 * PC=text\0 300 */ 301 302 static int 303 mxge_parse_strings(mxge_softc_t *sc) 304 { 305 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 306 307 char *ptr, *limit; 308 int i, found_mac; 309 310 ptr = sc->eeprom_strings; 311 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 312 found_mac = 0; 313 while (ptr < limit && *ptr != '\0') { 314 if (memcmp(ptr, "MAC=", 4) == 0) { 315 ptr += 1; 316 sc->mac_addr_string = ptr; 317 for (i = 0; i < 6; i++) { 318 ptr += 3; 319 if ((ptr + 2) > limit) 320 goto abort; 321 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 322 found_mac = 1; 323 } 324 } else if (memcmp(ptr, "PC=", 3) == 0) { 325 ptr += 3; 326 strncpy(sc->product_code_string, ptr, 327 sizeof (sc->product_code_string) - 1); 328 } else if (memcmp(ptr, "SN=", 3) == 0) { 329 ptr += 3; 330 strncpy(sc->serial_number_string, ptr, 331 sizeof (sc->serial_number_string) - 1); 332 } 333 MXGE_NEXT_STRING(ptr); 334 } 335 336 if (found_mac) 337 return 0; 338 339 abort: 340 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 341 342 return ENXIO; 343 } 344 345 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 346 static void 347 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 348 { 349 uint32_t val; 350 unsigned long base, off; 351 char *va, *cfgptr; 352 device_t pdev, mcp55; 353 uint16_t vendor_id, device_id, word; 354 uintptr_t bus, slot, func, ivend, idev; 355 uint32_t *ptr32; 356 357 358 if (!mxge_nvidia_ecrc_enable) 359 return; 360 361 pdev = device_get_parent(device_get_parent(sc->dev)); 362 if (pdev == NULL) { 363 device_printf(sc->dev, "could not find parent?\n"); 364 return; 365 } 366 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 367 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 368 369 if (vendor_id != 0x10de) 370 return; 371 372 base = 0; 373 374 if (device_id == 0x005d) { 375 /* ck804, base address is magic */ 376 base = 0xe0000000UL; 377 } else if (device_id >= 0x0374 && device_id <= 0x378) { 378 /* mcp55, base address stored in chipset */ 379 mcp55 = pci_find_bsf(0, 0, 0); 380 if (mcp55 && 381 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 382 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 383 word = pci_read_config(mcp55, 0x90, 2); 384 base = ((unsigned long)word & 0x7ffeU) << 25; 385 } 386 } 387 if (!base) 388 return; 389 390 /* XXXX 391 Test below is commented because it is believed that doing 392 config read/write beyond 0xff will access the config space 393 for the next larger function. Uncomment this and remove 394 the hacky pmap_mapdev() way of accessing config space when 395 FreeBSD grows support for extended pcie config space access 396 */ 397 #if 0 398 /* See if we can, by some miracle, access the extended 399 config space */ 400 val = pci_read_config(pdev, 0x178, 4); 401 if (val != 0xffffffff) { 402 val |= 0x40; 403 pci_write_config(pdev, 0x178, val, 4); 404 return; 405 } 406 #endif 407 /* Rather than using normal pci config space writes, we must 408 * map the Nvidia config space ourselves. This is because on 409 * opteron/nvidia class machine the 0xe000000 mapping is 410 * handled by the nvidia chipset, that means the internal PCI 411 * device (the on-chip northbridge), or the amd-8131 bridge 412 * and things behind them are not visible by this method. 413 */ 414 415 BUS_READ_IVAR(device_get_parent(pdev), pdev, 416 PCI_IVAR_BUS, &bus); 417 BUS_READ_IVAR(device_get_parent(pdev), pdev, 418 PCI_IVAR_SLOT, &slot); 419 BUS_READ_IVAR(device_get_parent(pdev), pdev, 420 PCI_IVAR_FUNCTION, &func); 421 BUS_READ_IVAR(device_get_parent(pdev), pdev, 422 PCI_IVAR_VENDOR, &ivend); 423 BUS_READ_IVAR(device_get_parent(pdev), pdev, 424 PCI_IVAR_DEVICE, &idev); 425 426 off = base 427 + 0x00100000UL * (unsigned long)bus 428 + 0x00001000UL * (unsigned long)(func 429 + 8 * slot); 430 431 /* map it into the kernel */ 432 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 433 434 435 if (va == NULL) { 436 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 437 return; 438 } 439 /* get a pointer to the config space mapped into the kernel */ 440 cfgptr = va + (off & PAGE_MASK); 441 442 /* make sure that we can really access it */ 443 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 444 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 445 if (! (vendor_id == ivend && device_id == idev)) { 446 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 447 vendor_id, device_id); 448 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 449 return; 450 } 451 452 ptr32 = (uint32_t*)(cfgptr + 0x178); 453 val = *ptr32; 454 455 if (val == 0xffffffff) { 456 device_printf(sc->dev, "extended mapping failed\n"); 457 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 458 return; 459 } 460 *ptr32 = val | 0x40; 461 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 462 if (mxge_verbose) 463 device_printf(sc->dev, 464 "Enabled ECRC on upstream Nvidia bridge " 465 "at %d:%d:%d\n", 466 (int)bus, (int)slot, (int)func); 467 return; 468 } 469 #else 470 static void 471 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 472 { 473 device_printf(sc->dev, 474 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 475 return; 476 } 477 #endif 478 479 480 static int 481 mxge_dma_test(mxge_softc_t *sc, int test_type) 482 { 483 mxge_cmd_t cmd; 484 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 485 int status; 486 uint32_t len; 487 char *test = " "; 488 489 490 /* Run a small DMA test. 491 * The magic multipliers to the length tell the firmware 492 * to do DMA read, write, or read+write tests. The 493 * results are returned in cmd.data0. The upper 16 494 * bits of the return is the number of transfers completed. 495 * The lower 16 bits is the time in 0.5us ticks that the 496 * transfers took to complete. 497 */ 498 499 len = sc->tx_boundary; 500 501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 503 cmd.data2 = len * 0x10000; 504 status = mxge_send_cmd(sc, test_type, &cmd); 505 if (status != 0) { 506 test = "read"; 507 goto abort; 508 } 509 sc->read_dma = ((cmd.data0>>16) * len * 2) / 510 (cmd.data0 & 0xffff); 511 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 512 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 513 cmd.data2 = len * 0x1; 514 status = mxge_send_cmd(sc, test_type, &cmd); 515 if (status != 0) { 516 test = "write"; 517 goto abort; 518 } 519 sc->write_dma = ((cmd.data0>>16) * len * 2) / 520 (cmd.data0 & 0xffff); 521 522 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 523 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 524 cmd.data2 = len * 0x10001; 525 status = mxge_send_cmd(sc, test_type, &cmd); 526 if (status != 0) { 527 test = "read/write"; 528 goto abort; 529 } 530 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 531 (cmd.data0 & 0xffff); 532 533 abort: 534 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 535 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 536 test, status); 537 538 return status; 539 } 540 541 /* 542 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 543 * when the PCI-E Completion packets are aligned on an 8-byte 544 * boundary. Some PCI-E chip sets always align Completion packets; on 545 * the ones that do not, the alignment can be enforced by enabling 546 * ECRC generation (if supported). 547 * 548 * When PCI-E Completion packets are not aligned, it is actually more 549 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 550 * 551 * If the driver can neither enable ECRC nor verify that it has 552 * already been enabled, then it must use a firmware image which works 553 * around unaligned completion packets (ethp_z8e.dat), and it should 554 * also ensure that it never gives the device a Read-DMA which is 555 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 556 * enabled, then the driver should use the aligned (eth_z8e.dat) 557 * firmware image, and set tx_boundary to 4KB. 558 */ 559 560 static int 561 mxge_firmware_probe(mxge_softc_t *sc) 562 { 563 device_t dev = sc->dev; 564 int reg, status; 565 uint16_t pectl; 566 567 sc->tx_boundary = 4096; 568 /* 569 * Verify the max read request size was set to 4KB 570 * before trying the test with 4KB. 571 */ 572 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 573 pectl = pci_read_config(dev, reg + 0x8, 2); 574 if ((pectl & (5 << 12)) != (5 << 12)) { 575 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 576 pectl); 577 sc->tx_boundary = 2048; 578 } 579 } 580 581 /* 582 * load the optimized firmware (which assumes aligned PCIe 583 * completions) in order to see if it works on this host. 584 */ 585 sc->fw_name = mxge_fw_aligned; 586 status = mxge_load_firmware(sc, 1); 587 if (status != 0) { 588 return status; 589 } 590 591 /* 592 * Enable ECRC if possible 593 */ 594 mxge_enable_nvidia_ecrc(sc); 595 596 /* 597 * Run a DMA test which watches for unaligned completions and 598 * aborts on the first one seen. 599 */ 600 601 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 602 if (status == 0) 603 return 0; /* keep the aligned firmware */ 604 605 if (status != E2BIG) 606 device_printf(dev, "DMA test failed: %d\n", status); 607 if (status == ENOSYS) 608 device_printf(dev, "Falling back to ethp! " 609 "Please install up to date fw\n"); 610 return status; 611 } 612 613 static int 614 mxge_select_firmware(mxge_softc_t *sc) 615 { 616 int aligned = 0; 617 618 619 if (mxge_force_firmware != 0) { 620 if (mxge_force_firmware == 1) 621 aligned = 1; 622 else 623 aligned = 0; 624 if (mxge_verbose) 625 device_printf(sc->dev, 626 "Assuming %s completions (forced)\n", 627 aligned ? "aligned" : "unaligned"); 628 goto abort; 629 } 630 631 /* if the PCIe link width is 4 or less, we can use the aligned 632 firmware and skip any checks */ 633 if (sc->link_width != 0 && sc->link_width <= 4) { 634 device_printf(sc->dev, 635 "PCIe x%d Link, expect reduced performance\n", 636 sc->link_width); 637 aligned = 1; 638 goto abort; 639 } 640 641 if (0 == mxge_firmware_probe(sc)) 642 return 0; 643 644 abort: 645 if (aligned) { 646 sc->fw_name = mxge_fw_aligned; 647 sc->tx_boundary = 4096; 648 } else { 649 sc->fw_name = mxge_fw_unaligned; 650 sc->tx_boundary = 2048; 651 } 652 return (mxge_load_firmware(sc, 0)); 653 } 654 655 union qualhack 656 { 657 const char *ro_char; 658 char *rw_char; 659 }; 660 661 static int 662 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 663 { 664 665 666 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 667 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 668 be32toh(hdr->mcp_type)); 669 return EIO; 670 } 671 672 /* save firmware version for sysctl */ 673 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 674 if (mxge_verbose) 675 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 676 677 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 678 &sc->fw_ver_minor, &sc->fw_ver_tiny); 679 680 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 681 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 682 device_printf(sc->dev, "Found firmware version %s\n", 683 sc->fw_version); 684 device_printf(sc->dev, "Driver needs %d.%d\n", 685 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 686 return EINVAL; 687 } 688 return 0; 689 690 } 691 692 #if 0 693 static void * 694 z_alloc(void *nil, u_int items, u_int size) 695 { 696 void *ptr; 697 698 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT); 699 return ptr; 700 } 701 702 static void 703 z_free(void *nil, void *ptr) 704 { 705 kfree(ptr, M_TEMP); 706 } 707 #endif 708 709 static int 710 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 711 { 712 struct fw_image *fw; 713 const mcp_gen_header_t *hdr; 714 unsigned hdr_offset; 715 int status; 716 unsigned int i; 717 char dummy; 718 size_t fw_len; 719 720 fw = firmware_image_load(sc->fw_name, NULL); 721 if (fw == NULL) { 722 device_printf(sc->dev, "Could not find firmware image %s\n", 723 sc->fw_name); 724 return ENOENT; 725 } 726 #if 0 727 /* setup zlib and decompress f/w */ 728 bzero(&zs, sizeof (zs)); 729 zs.zalloc = z_alloc; 730 zs.zfree = z_free; 731 status = inflateInit(&zs); 732 if (status != Z_OK) { 733 status = EIO; 734 goto abort_with_fw; 735 } 736 737 /* the uncompressed size is stored as the firmware version, 738 which would otherwise go unused */ 739 fw_len = (size_t) fw->version; 740 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT); 741 if (inflate_buffer == NULL) 742 goto abort_with_zs; 743 zs.avail_in = fw->datasize; 744 zs.next_in = __DECONST(char *, fw->data); 745 zs.avail_out = fw_len; 746 zs.next_out = inflate_buffer; 747 status = inflate(&zs, Z_FINISH); 748 if (status != Z_STREAM_END) { 749 device_printf(sc->dev, "zlib %d\n", status); 750 status = EIO; 751 goto abort_with_buffer; 752 } 753 #endif 754 fw_len = fw->fw_imglen; 755 /* check id */ 756 hdr_offset = htobe32(*(const uint32_t *) 757 (fw->fw_image + MCP_HEADER_PTR_OFFSET)); 758 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 759 device_printf(sc->dev, "Bad firmware file"); 760 status = EIO; 761 goto abort_with_fw; 762 } 763 hdr = (const void*)(fw->fw_image + hdr_offset); 764 765 status = mxge_validate_firmware(sc, hdr); 766 if (status != 0) 767 goto abort_with_fw; 768 769 /* Copy the inflated firmware to NIC SRAM. */ 770 for (i = 0; i < fw_len; i += 256) { 771 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 772 fw->fw_image + i, 773 min(256U, (unsigned)(fw_len - i))); 774 wmb(); 775 dummy = *sc->sram; 776 wmb(); 777 } 778 779 *limit = fw_len; 780 status = 0; 781 #if 0 782 abort_with_buffer: 783 kfree(inflate_buffer, M_TEMP); 784 abort_with_zs: 785 inflateEnd(&zs); 786 #endif 787 abort_with_fw: 788 firmware_image_unload(fw); 789 return status; 790 } 791 792 /* 793 * Enable or disable periodic RDMAs from the host to make certain 794 * chipsets resend dropped PCIe messages 795 */ 796 797 static void 798 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 799 { 800 char buf_bytes[72]; 801 volatile uint32_t *confirm; 802 volatile char *submit; 803 uint32_t *buf, dma_low, dma_high; 804 int i; 805 806 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 807 808 /* clear confirmation addr */ 809 confirm = (volatile uint32_t *)sc->cmd; 810 *confirm = 0; 811 wmb(); 812 813 /* send an rdma command to the PCIe engine, and wait for the 814 response in the confirmation address. The firmware should 815 write a -1 there to indicate it is alive and well 816 */ 817 818 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 819 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 820 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 821 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 822 buf[2] = htobe32(0xffffffff); /* confirm data */ 823 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 824 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 825 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 826 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 827 buf[5] = htobe32(enable); /* enable? */ 828 829 830 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 831 832 mxge_pio_copy(submit, buf, 64); 833 wmb(); 834 DELAY(1000); 835 wmb(); 836 i = 0; 837 while (*confirm != 0xffffffff && i < 20) { 838 DELAY(1000); 839 i++; 840 } 841 if (*confirm != 0xffffffff) { 842 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 843 (enable ? "enable" : "disable"), confirm, 844 *confirm); 845 } 846 return; 847 } 848 849 static int 850 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 851 { 852 mcp_cmd_t *buf; 853 char buf_bytes[sizeof(*buf) + 8]; 854 volatile mcp_cmd_response_t *response = sc->cmd; 855 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 856 uint32_t dma_low, dma_high; 857 int err, sleep_total = 0; 858 859 /* 860 * We may be called during attach, before if_serializer is available. 861 * This is not a fast path, just check for NULL 862 */ 863 864 if (sc->ifp->if_serializer) 865 ASSERT_SERIALIZED(sc->ifp->if_serializer); 866 867 /* ensure buf is aligned to 8 bytes */ 868 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 869 870 buf->data0 = htobe32(data->data0); 871 buf->data1 = htobe32(data->data1); 872 buf->data2 = htobe32(data->data2); 873 buf->cmd = htobe32(cmd); 874 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 875 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 876 877 buf->response_addr.low = htobe32(dma_low); 878 buf->response_addr.high = htobe32(dma_high); 879 880 881 response->result = 0xffffffff; 882 wmb(); 883 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 884 885 /* wait up to 20ms */ 886 err = EAGAIN; 887 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 888 bus_dmamap_sync(sc->cmd_dma.dmat, 889 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 890 wmb(); 891 switch (be32toh(response->result)) { 892 case 0: 893 data->data0 = be32toh(response->data); 894 err = 0; 895 break; 896 case 0xffffffff: 897 DELAY(1000); 898 break; 899 case MXGEFW_CMD_UNKNOWN: 900 err = ENOSYS; 901 break; 902 case MXGEFW_CMD_ERROR_UNALIGNED: 903 err = E2BIG; 904 break; 905 case MXGEFW_CMD_ERROR_BUSY: 906 err = EBUSY; 907 break; 908 default: 909 device_printf(sc->dev, 910 "mxge: command %d " 911 "failed, result = %d\n", 912 cmd, be32toh(response->result)); 913 err = ENXIO; 914 break; 915 } 916 if (err != EAGAIN) 917 break; 918 } 919 if (err == EAGAIN) 920 device_printf(sc->dev, "mxge: command %d timed out" 921 "result = %d\n", 922 cmd, be32toh(response->result)); 923 return err; 924 } 925 926 static int 927 mxge_adopt_running_firmware(mxge_softc_t *sc) 928 { 929 struct mcp_gen_header *hdr; 930 const size_t bytes = sizeof (struct mcp_gen_header); 931 size_t hdr_offset; 932 int status; 933 934 /* find running firmware header */ 935 hdr_offset = htobe32(*(volatile uint32_t *) 936 (sc->sram + MCP_HEADER_PTR_OFFSET)); 937 938 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 939 device_printf(sc->dev, 940 "Running firmware has bad header offset (%d)\n", 941 (int)hdr_offset); 942 return EIO; 943 } 944 945 /* copy header of running firmware from SRAM to host memory to 946 * validate firmware */ 947 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT); 948 if (hdr == NULL) { 949 device_printf(sc->dev, "could not kmalloc firmware hdr\n"); 950 return ENOMEM; 951 } 952 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 953 rman_get_bushandle(sc->mem_res), 954 hdr_offset, (char *)hdr, bytes); 955 status = mxge_validate_firmware(sc, hdr); 956 kfree(hdr, M_DEVBUF); 957 958 /* 959 * check to see if adopted firmware has bug where adopting 960 * it will cause broadcasts to be filtered unless the NIC 961 * is kept in ALLMULTI mode 962 */ 963 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 964 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 965 sc->adopted_rx_filter_bug = 1; 966 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 967 "working around rx filter bug\n", 968 sc->fw_ver_major, sc->fw_ver_minor, 969 sc->fw_ver_tiny); 970 } 971 972 return status; 973 } 974 975 976 static int 977 mxge_load_firmware(mxge_softc_t *sc, int adopt) 978 { 979 volatile uint32_t *confirm; 980 volatile char *submit; 981 char buf_bytes[72]; 982 uint32_t *buf, size, dma_low, dma_high; 983 int status, i; 984 985 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 986 987 size = sc->sram_size; 988 status = mxge_load_firmware_helper(sc, &size); 989 if (status) { 990 if (!adopt) 991 return status; 992 /* Try to use the currently running firmware, if 993 it is new enough */ 994 status = mxge_adopt_running_firmware(sc); 995 if (status) { 996 device_printf(sc->dev, 997 "failed to adopt running firmware\n"); 998 return status; 999 } 1000 device_printf(sc->dev, 1001 "Successfully adopted running firmware\n"); 1002 if (sc->tx_boundary == 4096) { 1003 device_printf(sc->dev, 1004 "Using firmware currently running on NIC" 1005 ". For optimal\n"); 1006 device_printf(sc->dev, 1007 "performance consider loading optimized " 1008 "firmware\n"); 1009 } 1010 sc->fw_name = mxge_fw_unaligned; 1011 sc->tx_boundary = 2048; 1012 return 0; 1013 } 1014 /* clear confirmation addr */ 1015 confirm = (volatile uint32_t *)sc->cmd; 1016 *confirm = 0; 1017 wmb(); 1018 /* send a reload command to the bootstrap MCP, and wait for the 1019 response in the confirmation address. The firmware should 1020 write a -1 there to indicate it is alive and well 1021 */ 1022 1023 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1024 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1025 1026 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1027 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1028 buf[2] = htobe32(0xffffffff); /* confirm data */ 1029 1030 /* FIX: All newest firmware should un-protect the bottom of 1031 the sram before handoff. However, the very first interfaces 1032 do not. Therefore the handoff copy must skip the first 8 bytes 1033 */ 1034 /* where the code starts*/ 1035 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1036 buf[4] = htobe32(size - 8); /* length of code */ 1037 buf[5] = htobe32(8); /* where to copy to */ 1038 buf[6] = htobe32(0); /* where to jump to */ 1039 1040 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1041 mxge_pio_copy(submit, buf, 64); 1042 wmb(); 1043 DELAY(1000); 1044 wmb(); 1045 i = 0; 1046 while (*confirm != 0xffffffff && i < 20) { 1047 DELAY(1000*10); 1048 i++; 1049 bus_dmamap_sync(sc->cmd_dma.dmat, 1050 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1051 } 1052 if (*confirm != 0xffffffff) { 1053 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1054 confirm, *confirm); 1055 1056 return ENXIO; 1057 } 1058 return 0; 1059 } 1060 1061 static int 1062 mxge_update_mac_address(mxge_softc_t *sc) 1063 { 1064 mxge_cmd_t cmd; 1065 uint8_t *addr = sc->mac_addr; 1066 int status; 1067 1068 1069 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1070 | (addr[2] << 8) | addr[3]); 1071 1072 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1073 1074 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1075 return status; 1076 } 1077 1078 static int 1079 mxge_change_pause(mxge_softc_t *sc, int pause) 1080 { 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (pause) 1085 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1086 &cmd); 1087 else 1088 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1089 &cmd); 1090 1091 if (status) { 1092 device_printf(sc->dev, "Failed to set flow control mode\n"); 1093 return ENXIO; 1094 } 1095 sc->pause = pause; 1096 return 0; 1097 } 1098 1099 static void 1100 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1101 { 1102 mxge_cmd_t cmd; 1103 int status; 1104 1105 if( sc->ifp->if_serializer) 1106 ASSERT_SERIALIZED(sc->ifp->if_serializer); 1107 if (mxge_always_promisc) 1108 promisc = 1; 1109 1110 if (promisc) 1111 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1112 &cmd); 1113 else 1114 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1115 &cmd); 1116 1117 if (status) { 1118 device_printf(sc->dev, "Failed to set promisc mode\n"); 1119 } 1120 } 1121 1122 static void 1123 mxge_set_multicast_list(mxge_softc_t *sc) 1124 { 1125 mxge_cmd_t cmd; 1126 struct ifmultiaddr *ifma; 1127 struct ifnet *ifp = sc->ifp; 1128 int err; 1129 1130 if (ifp->if_serializer) 1131 ASSERT_SERIALIZED(ifp->if_serializer); 1132 1133 /* This firmware is known to not support multicast */ 1134 if (!sc->fw_multicast_support) 1135 return; 1136 1137 /* Disable multicast filtering while we play with the lists*/ 1138 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1139 if (err != 0) { 1140 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1141 " error status: %d\n", err); 1142 return; 1143 } 1144 1145 if (sc->adopted_rx_filter_bug) 1146 return; 1147 1148 if (ifp->if_flags & IFF_ALLMULTI) 1149 /* request to disable multicast filtering, so quit here */ 1150 return; 1151 1152 /* Flush all the filters */ 1153 1154 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1155 if (err != 0) { 1156 device_printf(sc->dev, 1157 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1158 ", error status: %d\n", err); 1159 return; 1160 } 1161 1162 /* Walk the multicast list, and add each address */ 1163 1164 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1165 if (ifma->ifma_addr->sa_family != AF_LINK) 1166 continue; 1167 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1168 &cmd.data0, 4); 1169 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1170 &cmd.data1, 2); 1171 cmd.data0 = htonl(cmd.data0); 1172 cmd.data1 = htonl(cmd.data1); 1173 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1174 if (err != 0) { 1175 device_printf(sc->dev, "Failed " 1176 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1177 "%d\t", err); 1178 /* abort, leaving multicast filtering off */ 1179 return; 1180 } 1181 } 1182 /* Enable multicast filtering */ 1183 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1184 if (err != 0) { 1185 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1186 ", error status: %d\n", err); 1187 } 1188 } 1189 1190 static int 1191 mxge_max_mtu(mxge_softc_t *sc) 1192 { 1193 mxge_cmd_t cmd; 1194 int status; 1195 1196 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1197 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1198 1199 /* try to set nbufs to see if it we can 1200 use virtually contiguous jumbos */ 1201 cmd.data0 = 0; 1202 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1203 &cmd); 1204 if (status == 0) 1205 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1206 1207 /* otherwise, we're limited to MJUMPAGESIZE */ 1208 return MJUMPAGESIZE - MXGEFW_PAD; 1209 } 1210 1211 static int 1212 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1213 { 1214 struct mxge_slice_state *ss; 1215 mxge_rx_done_t *rx_done; 1216 volatile uint32_t *irq_claim; 1217 mxge_cmd_t cmd; 1218 int slice, status; 1219 1220 /* try to send a reset command to the card to see if it 1221 is alive */ 1222 memset(&cmd, 0, sizeof (cmd)); 1223 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1224 if (status != 0) { 1225 device_printf(sc->dev, "failed reset\n"); 1226 return ENXIO; 1227 } 1228 1229 mxge_dummy_rdma(sc, 1); 1230 1231 1232 /* set the intrq size */ 1233 cmd.data0 = sc->rx_ring_size; 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1235 1236 /* 1237 * Even though we already know how many slices are supported 1238 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1239 * has magic side effects, and must be called after a reset. 1240 * It must be called prior to calling any RSS related cmds, 1241 * including assigning an interrupt queue for anything but 1242 * slice 0. It must also be called *after* 1243 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1244 * the firmware to compute offsets. 1245 */ 1246 1247 if (sc->num_slices > 1) { 1248 /* ask the maximum number of slices it supports */ 1249 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1250 &cmd); 1251 if (status != 0) { 1252 device_printf(sc->dev, 1253 "failed to get number of slices\n"); 1254 return status; 1255 } 1256 /* 1257 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1258 * to setting up the interrupt queue DMA 1259 */ 1260 cmd.data0 = sc->num_slices; 1261 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1262 #ifdef IFNET_BUF_RING 1263 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1264 #endif 1265 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1266 &cmd); 1267 if (status != 0) { 1268 device_printf(sc->dev, 1269 "failed to set number of slices\n"); 1270 return status; 1271 } 1272 } 1273 1274 1275 if (interrupts_setup) { 1276 /* Now exchange information about interrupts */ 1277 for (slice = 0; slice < sc->num_slices; slice++) { 1278 rx_done = &sc->ss[slice].rx_done; 1279 memset(rx_done->entry, 0, sc->rx_ring_size); 1280 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1281 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1282 cmd.data2 = slice; 1283 status |= mxge_send_cmd(sc, 1284 MXGEFW_CMD_SET_INTRQ_DMA, 1285 &cmd); 1286 } 1287 } 1288 1289 status |= mxge_send_cmd(sc, 1290 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1291 1292 1293 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1294 1295 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1296 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1297 1298 1299 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1300 &cmd); 1301 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1302 if (status != 0) { 1303 device_printf(sc->dev, "failed set interrupt parameters\n"); 1304 return status; 1305 } 1306 1307 1308 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1309 1310 1311 /* run a DMA benchmark */ 1312 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1313 1314 for (slice = 0; slice < sc->num_slices; slice++) { 1315 ss = &sc->ss[slice]; 1316 1317 ss->irq_claim = irq_claim + (2 * slice); 1318 /* reset mcp/driver shared state back to 0 */ 1319 ss->rx_done.idx = 0; 1320 ss->rx_done.cnt = 0; 1321 ss->tx.req = 0; 1322 ss->tx.done = 0; 1323 ss->tx.pkt_done = 0; 1324 ss->tx.queue_active = 0; 1325 ss->tx.activate = 0; 1326 ss->tx.deactivate = 0; 1327 ss->tx.wake = 0; 1328 ss->tx.defrag = 0; 1329 ss->tx.stall = 0; 1330 ss->rx_big.cnt = 0; 1331 ss->rx_small.cnt = 0; 1332 ss->lro_bad_csum = 0; 1333 ss->lro_queued = 0; 1334 ss->lro_flushed = 0; 1335 if (ss->fw_stats != NULL) { 1336 ss->fw_stats->valid = 0; 1337 ss->fw_stats->send_done_count = 0; 1338 } 1339 } 1340 sc->rdma_tags_available = 15; 1341 status = mxge_update_mac_address(sc); 1342 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1343 mxge_change_pause(sc, sc->pause); 1344 mxge_set_multicast_list(sc); 1345 return status; 1346 } 1347 1348 static int 1349 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1350 { 1351 mxge_softc_t *sc; 1352 unsigned int intr_coal_delay; 1353 int err; 1354 1355 sc = arg1; 1356 intr_coal_delay = sc->intr_coal_delay; 1357 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1358 if (err != 0) { 1359 return err; 1360 } 1361 if (intr_coal_delay == sc->intr_coal_delay) 1362 return 0; 1363 1364 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1365 return EINVAL; 1366 1367 lwkt_serialize_enter(sc->ifp->if_serializer); 1368 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1369 sc->intr_coal_delay = intr_coal_delay; 1370 1371 lwkt_serialize_exit(sc->ifp->if_serializer); 1372 return err; 1373 } 1374 1375 static int 1376 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1377 { 1378 mxge_softc_t *sc; 1379 unsigned int enabled; 1380 int err; 1381 1382 sc = arg1; 1383 enabled = sc->pause; 1384 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1385 if (err != 0) { 1386 return err; 1387 } 1388 if (enabled == sc->pause) 1389 return 0; 1390 1391 lwkt_serialize_enter(sc->ifp->if_serializer); 1392 err = mxge_change_pause(sc, enabled); 1393 lwkt_serialize_exit(sc->ifp->if_serializer); 1394 return err; 1395 } 1396 1397 static int 1398 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1399 { 1400 struct ifnet *ifp; 1401 int err = 0; 1402 1403 ifp = sc->ifp; 1404 if (lro_cnt == 0) 1405 ifp->if_capenable &= ~IFCAP_LRO; 1406 else 1407 ifp->if_capenable |= IFCAP_LRO; 1408 sc->lro_cnt = lro_cnt; 1409 if (ifp->if_flags & IFF_RUNNING) { 1410 mxge_close(sc); 1411 err = mxge_open(sc); 1412 } 1413 return err; 1414 } 1415 1416 static int 1417 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1418 { 1419 mxge_softc_t *sc; 1420 unsigned int lro_cnt; 1421 int err; 1422 1423 sc = arg1; 1424 lro_cnt = sc->lro_cnt; 1425 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1426 if (err != 0) 1427 return err; 1428 1429 if (lro_cnt == sc->lro_cnt) 1430 return 0; 1431 1432 if (lro_cnt > 128) 1433 return EINVAL; 1434 1435 lwkt_serialize_enter(sc->ifp->if_serializer); 1436 err = mxge_change_lro_locked(sc, lro_cnt); 1437 lwkt_serialize_exit(sc->ifp->if_serializer); 1438 return err; 1439 } 1440 1441 static int 1442 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1443 { 1444 int err; 1445 1446 if (arg1 == NULL) 1447 return EFAULT; 1448 arg2 = be32toh(*(int *)arg1); 1449 arg1 = NULL; 1450 err = sysctl_handle_int(oidp, arg1, arg2, req); 1451 1452 return err; 1453 } 1454 1455 static void 1456 mxge_rem_sysctls(mxge_softc_t *sc) 1457 { 1458 struct mxge_slice_state *ss; 1459 int slice; 1460 1461 if (sc->slice_sysctl_tree == NULL) 1462 return; 1463 1464 for (slice = 0; slice < sc->num_slices; slice++) { 1465 ss = &sc->ss[slice]; 1466 if (ss == NULL || ss->sysctl_tree == NULL) 1467 continue; 1468 sysctl_ctx_free(&ss->sysctl_ctx); 1469 ss->sysctl_tree = NULL; 1470 } 1471 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1472 sc->slice_sysctl_tree = NULL; 1473 sysctl_ctx_free(&sc->sysctl_ctx); 1474 sc->sysctl_tree = NULL; 1475 1476 } 1477 1478 static void 1479 mxge_add_sysctls(mxge_softc_t *sc) 1480 { 1481 struct sysctl_ctx_list *ctx; 1482 struct sysctl_oid_list *children; 1483 mcp_irq_data_t *fw; 1484 struct mxge_slice_state *ss; 1485 int slice; 1486 char slice_num[8]; 1487 1488 ctx = &sc->sysctl_ctx; 1489 sysctl_ctx_init(ctx); 1490 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1491 OID_AUTO, 1492 device_get_nameunit(sc->dev), 1493 CTLFLAG_RD, 0, ""); 1494 if (sc->sysctl_tree == NULL) { 1495 device_printf(sc->dev, "can't add sysctl node\n"); 1496 return; 1497 } 1498 1499 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1500 fw = sc->ss[0].fw_stats; 1501 1502 /* random information */ 1503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1504 "firmware_version", 1505 CTLFLAG_RD, &sc->fw_version, 1506 0, "firmware version"); 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "serial_number", 1509 CTLFLAG_RD, &sc->serial_number_string, 1510 0, "serial number"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "product_code", 1513 CTLFLAG_RD, &sc->product_code_string, 1514 0, "product_code"); 1515 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1516 "pcie_link_width", 1517 CTLFLAG_RD, &sc->link_width, 1518 0, "tx_boundary"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "tx_boundary", 1521 CTLFLAG_RD, &sc->tx_boundary, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "write_combine", 1525 CTLFLAG_RD, &sc->wc, 1526 0, "write combining PIO?"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "read_dma_MBs", 1529 CTLFLAG_RD, &sc->read_dma, 1530 0, "DMA Read speed in MB/s"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "write_dma_MBs", 1533 CTLFLAG_RD, &sc->write_dma, 1534 0, "DMA Write speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "read_write_dma_MBs", 1537 CTLFLAG_RD, &sc->read_write_dma, 1538 0, "DMA concurrent Read/Write speed in MB/s"); 1539 1540 1541 /* performance related tunables */ 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "intr_coal_delay", 1544 CTLTYPE_INT|CTLFLAG_RW, sc, 1545 0, mxge_change_intr_coal, 1546 "I", "interrupt coalescing delay in usecs"); 1547 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "flow_control_enabled", 1550 CTLTYPE_INT|CTLFLAG_RW, sc, 1551 0, mxge_change_flow_control, 1552 "I", "interrupt coalescing delay in usecs"); 1553 1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1555 "deassert_wait", 1556 CTLFLAG_RW, &mxge_deassert_wait, 1557 0, "Wait for IRQ line to go low in ihandler"); 1558 1559 /* stats block from firmware is in network byte order. 1560 Need to swap it */ 1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1562 "link_up", 1563 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1564 0, mxge_handle_be32, 1565 "I", "link up"); 1566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1567 "rdma_tags_available", 1568 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1569 0, mxge_handle_be32, 1570 "I", "rdma_tags_available"); 1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1572 "dropped_bad_crc32", 1573 CTLTYPE_INT|CTLFLAG_RD, 1574 &fw->dropped_bad_crc32, 1575 0, mxge_handle_be32, 1576 "I", "dropped_bad_crc32"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "dropped_bad_phy", 1579 CTLTYPE_INT|CTLFLAG_RD, 1580 &fw->dropped_bad_phy, 1581 0, mxge_handle_be32, 1582 "I", "dropped_bad_phy"); 1583 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1584 "dropped_link_error_or_filtered", 1585 CTLTYPE_INT|CTLFLAG_RD, 1586 &fw->dropped_link_error_or_filtered, 1587 0, mxge_handle_be32, 1588 "I", "dropped_link_error_or_filtered"); 1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1590 "dropped_link_overflow", 1591 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1592 0, mxge_handle_be32, 1593 "I", "dropped_link_overflow"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_multicast_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_multicast_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_multicast_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_no_big_buffer", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1603 0, mxge_handle_be32, 1604 "I", "dropped_no_big_buffer"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_no_small_buffer", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_no_small_buffer, 1609 0, mxge_handle_be32, 1610 "I", "dropped_no_small_buffer"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_overrun", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1614 0, mxge_handle_be32, 1615 "I", "dropped_overrun"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_pause", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_pause, 1620 0, mxge_handle_be32, 1621 "I", "dropped_pause"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_runt", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1625 0, mxge_handle_be32, 1626 "I", "dropped_runt"); 1627 1628 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1629 "dropped_unicast_filtered", 1630 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1631 0, mxge_handle_be32, 1632 "I", "dropped_unicast_filtered"); 1633 1634 /* verbose printing? */ 1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1636 "verbose", 1637 CTLFLAG_RW, &mxge_verbose, 1638 0, "verbose printing"); 1639 1640 /* lro */ 1641 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1642 "lro_cnt", 1643 CTLTYPE_INT|CTLFLAG_RW, sc, 1644 0, mxge_change_lro, 1645 "I", "number of lro merge queues"); 1646 1647 1648 /* add counters exported for debugging from all slices */ 1649 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1650 sc->slice_sysctl_tree = 1651 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1652 "slice", CTLFLAG_RD, 0, ""); 1653 1654 for (slice = 0; slice < sc->num_slices; slice++) { 1655 ss = &sc->ss[slice]; 1656 sysctl_ctx_init(&ss->sysctl_ctx); 1657 ctx = &ss->sysctl_ctx; 1658 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1659 ksprintf(slice_num, "%d", slice); 1660 ss->sysctl_tree = 1661 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1662 CTLFLAG_RD, 0, ""); 1663 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1665 "rx_small_cnt", 1666 CTLFLAG_RD, &ss->rx_small.cnt, 1667 0, "rx_small_cnt"); 1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1669 "rx_big_cnt", 1670 CTLFLAG_RD, &ss->rx_big.cnt, 1671 0, "rx_small_cnt"); 1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1673 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1674 0, "number of lro merge queues flushed"); 1675 1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1677 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1678 0, "number of frames appended to lro merge" 1679 "queues"); 1680 1681 #ifndef IFNET_BUF_RING 1682 /* only transmit from slice 0 for now */ 1683 if (slice > 0) 1684 continue; 1685 #endif 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "tx_req", 1688 CTLFLAG_RD, &ss->tx.req, 1689 0, "tx_req"); 1690 1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1692 "tx_done", 1693 CTLFLAG_RD, &ss->tx.done, 1694 0, "tx_done"); 1695 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1696 "tx_pkt_done", 1697 CTLFLAG_RD, &ss->tx.pkt_done, 1698 0, "tx_done"); 1699 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1700 "tx_stall", 1701 CTLFLAG_RD, &ss->tx.stall, 1702 0, "tx_stall"); 1703 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1704 "tx_wake", 1705 CTLFLAG_RD, &ss->tx.wake, 1706 0, "tx_wake"); 1707 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1708 "tx_defrag", 1709 CTLFLAG_RD, &ss->tx.defrag, 1710 0, "tx_defrag"); 1711 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1712 "tx_queue_active", 1713 CTLFLAG_RD, &ss->tx.queue_active, 1714 0, "tx_queue_active"); 1715 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1716 "tx_activate", 1717 CTLFLAG_RD, &ss->tx.activate, 1718 0, "tx_activate"); 1719 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1720 "tx_deactivate", 1721 CTLFLAG_RD, &ss->tx.deactivate, 1722 0, "tx_deactivate"); 1723 } 1724 } 1725 1726 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1727 backwards one at a time and handle ring wraps */ 1728 1729 static inline void 1730 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1731 mcp_kreq_ether_send_t *src, int cnt) 1732 { 1733 int idx, starting_slot; 1734 starting_slot = tx->req; 1735 while (cnt > 1) { 1736 cnt--; 1737 idx = (starting_slot + cnt) & tx->mask; 1738 mxge_pio_copy(&tx->lanai[idx], 1739 &src[cnt], sizeof(*src)); 1740 wmb(); 1741 } 1742 } 1743 1744 /* 1745 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1746 * at most 32 bytes at a time, so as to avoid involving the software 1747 * pio handler in the nic. We re-write the first segment's flags 1748 * to mark them valid only after writing the entire chain 1749 */ 1750 1751 static inline void 1752 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1753 int cnt) 1754 { 1755 int idx, i; 1756 uint32_t *src_ints; 1757 volatile uint32_t *dst_ints; 1758 mcp_kreq_ether_send_t *srcp; 1759 volatile mcp_kreq_ether_send_t *dstp, *dst; 1760 uint8_t last_flags; 1761 1762 idx = tx->req & tx->mask; 1763 1764 last_flags = src->flags; 1765 src->flags = 0; 1766 wmb(); 1767 dst = dstp = &tx->lanai[idx]; 1768 srcp = src; 1769 1770 if ((idx + cnt) < tx->mask) { 1771 for (i = 0; i < (cnt - 1); i += 2) { 1772 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1773 wmb(); /* force write every 32 bytes */ 1774 srcp += 2; 1775 dstp += 2; 1776 } 1777 } else { 1778 /* submit all but the first request, and ensure 1779 that it is submitted below */ 1780 mxge_submit_req_backwards(tx, src, cnt); 1781 i = 0; 1782 } 1783 if (i < cnt) { 1784 /* submit the first request */ 1785 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1786 wmb(); /* barrier before setting valid flag */ 1787 } 1788 1789 /* re-write the last 32-bits with the valid flags */ 1790 src->flags = last_flags; 1791 src_ints = (uint32_t *)src; 1792 src_ints+=3; 1793 dst_ints = (volatile uint32_t *)dst; 1794 dst_ints+=3; 1795 *dst_ints = *src_ints; 1796 tx->req += cnt; 1797 wmb(); 1798 } 1799 1800 #if IFCAP_TSO4 1801 1802 static void 1803 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1804 int busdma_seg_cnt, int ip_off) 1805 { 1806 mxge_tx_ring_t *tx; 1807 mcp_kreq_ether_send_t *req; 1808 bus_dma_segment_t *seg; 1809 struct ip *ip; 1810 struct tcphdr *tcp; 1811 uint32_t low, high_swapped; 1812 int len, seglen, cum_len, cum_len_next; 1813 int next_is_first, chop, cnt, rdma_count, small; 1814 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1815 uint8_t flags, flags_next; 1816 static int once; 1817 1818 mss = m->m_pkthdr.tso_segsz; 1819 1820 /* negative cum_len signifies to the 1821 * send loop that we are still in the 1822 * header portion of the TSO packet. 1823 */ 1824 1825 /* ensure we have the ethernet, IP and TCP 1826 header together in the first mbuf, copy 1827 it to a scratch buffer if not */ 1828 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1829 m_copydata(m, 0, ip_off + sizeof (*ip), 1830 ss->scratch); 1831 ip = (struct ip *)(ss->scratch + ip_off); 1832 } else { 1833 ip = (struct ip *)(mtod(m, char *) + ip_off); 1834 } 1835 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1836 + sizeof (*tcp))) { 1837 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1838 + sizeof (*tcp), ss->scratch); 1839 ip = (struct ip *)(mtod(m, char *) + ip_off); 1840 } 1841 1842 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1843 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1844 1845 /* TSO implies checksum offload on this hardware */ 1846 cksum_offset = ip_off + (ip->ip_hl << 2); 1847 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1848 1849 1850 /* for TSO, pseudo_hdr_offset holds mss. 1851 * The firmware figures out where to put 1852 * the checksum by parsing the header. */ 1853 pseudo_hdr_offset = htobe16(mss); 1854 1855 tx = &ss->tx; 1856 req = tx->req_list; 1857 seg = tx->seg_list; 1858 cnt = 0; 1859 rdma_count = 0; 1860 /* "rdma_count" is the number of RDMAs belonging to the 1861 * current packet BEFORE the current send request. For 1862 * non-TSO packets, this is equal to "count". 1863 * For TSO packets, rdma_count needs to be reset 1864 * to 0 after a segment cut. 1865 * 1866 * The rdma_count field of the send request is 1867 * the number of RDMAs of the packet starting at 1868 * that request. For TSO send requests with one ore more cuts 1869 * in the middle, this is the number of RDMAs starting 1870 * after the last cut in the request. All previous 1871 * segments before the last cut implicitly have 1 RDMA. 1872 * 1873 * Since the number of RDMAs is not known beforehand, 1874 * it must be filled-in retroactively - after each 1875 * segmentation cut or at the end of the entire packet. 1876 */ 1877 1878 while (busdma_seg_cnt) { 1879 /* Break the busdma segment up into pieces*/ 1880 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1881 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1882 len = seg->ds_len; 1883 1884 while (len) { 1885 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1886 seglen = len; 1887 cum_len_next = cum_len + seglen; 1888 (req-rdma_count)->rdma_count = rdma_count + 1; 1889 if (__predict_true(cum_len >= 0)) { 1890 /* payload */ 1891 chop = (cum_len_next > mss); 1892 cum_len_next = cum_len_next % mss; 1893 next_is_first = (cum_len_next == 0); 1894 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1895 flags_next |= next_is_first * 1896 MXGEFW_FLAGS_FIRST; 1897 rdma_count |= -(chop | next_is_first); 1898 rdma_count += chop & !next_is_first; 1899 } else if (cum_len_next >= 0) { 1900 /* header ends */ 1901 rdma_count = -1; 1902 cum_len_next = 0; 1903 seglen = -cum_len; 1904 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1905 flags_next = MXGEFW_FLAGS_TSO_PLD | 1906 MXGEFW_FLAGS_FIRST | 1907 (small * MXGEFW_FLAGS_SMALL); 1908 } 1909 1910 req->addr_high = high_swapped; 1911 req->addr_low = htobe32(low); 1912 req->pseudo_hdr_offset = pseudo_hdr_offset; 1913 req->pad = 0; 1914 req->rdma_count = 1; 1915 req->length = htobe16(seglen); 1916 req->cksum_offset = cksum_offset; 1917 req->flags = flags | ((cum_len & 1) * 1918 MXGEFW_FLAGS_ALIGN_ODD); 1919 low += seglen; 1920 len -= seglen; 1921 cum_len = cum_len_next; 1922 flags = flags_next; 1923 req++; 1924 cnt++; 1925 rdma_count++; 1926 if (__predict_false(cksum_offset > seglen)) 1927 cksum_offset -= seglen; 1928 else 1929 cksum_offset = 0; 1930 if (__predict_false(cnt > tx->max_desc)) 1931 goto drop; 1932 } 1933 busdma_seg_cnt--; 1934 seg++; 1935 } 1936 (req-rdma_count)->rdma_count = rdma_count; 1937 1938 do { 1939 req--; 1940 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1941 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1942 1943 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1944 mxge_submit_req(tx, tx->req_list, cnt); 1945 #ifdef IFNET_BUF_RING 1946 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1947 /* tell the NIC to start polling this slice */ 1948 *tx->send_go = 1; 1949 tx->queue_active = 1; 1950 tx->activate++; 1951 wmb(); 1952 } 1953 #endif 1954 return; 1955 1956 drop: 1957 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1958 m_freem(m); 1959 ss->oerrors++; 1960 if (!once) { 1961 kprintf("tx->max_desc exceeded via TSO!\n"); 1962 kprintf("mss = %d, %ld, %d!\n", mss, 1963 (long)seg - (long)tx->seg_list, tx->max_desc); 1964 once = 1; 1965 } 1966 return; 1967 1968 } 1969 1970 #endif /* IFCAP_TSO4 */ 1971 1972 #ifdef MXGE_NEW_VLAN_API 1973 /* 1974 * We reproduce the software vlan tag insertion from 1975 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1976 * vlan tag insertion. We need to advertise this in order to have the 1977 * vlan interface respect our csum offload flags. 1978 */ 1979 static struct mbuf * 1980 mxge_vlan_tag_insert(struct mbuf *m) 1981 { 1982 struct ether_vlan_header *evl; 1983 1984 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT); 1985 if (__predict_false(m == NULL)) 1986 return NULL; 1987 if (m->m_len < sizeof(*evl)) { 1988 m = m_pullup(m, sizeof(*evl)); 1989 if (__predict_false(m == NULL)) 1990 return NULL; 1991 } 1992 /* 1993 * Transform the Ethernet header into an Ethernet header 1994 * with 802.1Q encapsulation. 1995 */ 1996 evl = mtod(m, struct ether_vlan_header *); 1997 bcopy((char *)evl + EVL_ENCAPLEN, 1998 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1999 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2000 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag); 2001 m->m_flags &= ~M_VLANTAG; 2002 return m; 2003 } 2004 #endif /* MXGE_NEW_VLAN_API */ 2005 2006 static void 2007 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2008 { 2009 mxge_softc_t *sc; 2010 mcp_kreq_ether_send_t *req; 2011 bus_dma_segment_t *seg; 2012 struct mbuf *m_tmp; 2013 struct ifnet *ifp; 2014 mxge_tx_ring_t *tx; 2015 struct ip *ip; 2016 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2017 uint16_t pseudo_hdr_offset; 2018 uint8_t flags, cksum_offset; 2019 2020 2021 sc = ss->sc; 2022 ifp = sc->ifp; 2023 tx = &ss->tx; 2024 2025 ip_off = sizeof (struct ether_header); 2026 #ifdef MXGE_NEW_VLAN_API 2027 if (m->m_flags & M_VLANTAG) { 2028 m = mxge_vlan_tag_insert(m); 2029 if (__predict_false(m == NULL)) 2030 goto drop; 2031 ip_off += EVL_ENCAPLEN; 2032 } 2033 #endif 2034 /* (try to) map the frame for DMA */ 2035 idx = tx->req & tx->mask; 2036 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map, 2037 m, tx->seg_list, 1, &cnt, 2038 BUS_DMA_NOWAIT); 2039 if (__predict_false(err == EFBIG)) { 2040 /* Too many segments in the chain. Try 2041 to defrag */ 2042 m_tmp = m_defrag(m, M_NOWAIT); 2043 if (m_tmp == NULL) { 2044 goto drop; 2045 } 2046 ss->tx.defrag++; 2047 m = m_tmp; 2048 err = bus_dmamap_load_mbuf_segment(tx->dmat, 2049 tx->info[idx].map, 2050 m, tx->seg_list, 1, &cnt, 2051 BUS_DMA_NOWAIT); 2052 } 2053 if (__predict_false(err != 0)) { 2054 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d" 2055 " packet len = %d\n", err, m->m_pkthdr.len); 2056 goto drop; 2057 } 2058 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2059 BUS_DMASYNC_PREWRITE); 2060 tx->info[idx].m = m; 2061 2062 #if IFCAP_TSO4 2063 /* TSO is different enough, we handle it in another routine */ 2064 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2065 mxge_encap_tso(ss, m, cnt, ip_off); 2066 return; 2067 } 2068 #endif 2069 2070 req = tx->req_list; 2071 cksum_offset = 0; 2072 pseudo_hdr_offset = 0; 2073 flags = MXGEFW_FLAGS_NO_TSO; 2074 2075 /* checksum offloading? */ 2076 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2077 /* ensure ip header is in first mbuf, copy 2078 it to a scratch buffer if not */ 2079 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2080 m_copydata(m, 0, ip_off + sizeof (*ip), 2081 ss->scratch); 2082 ip = (struct ip *)(ss->scratch + ip_off); 2083 } else { 2084 ip = (struct ip *)(mtod(m, char *) + ip_off); 2085 } 2086 cksum_offset = ip_off + (ip->ip_hl << 2); 2087 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2088 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2089 req->cksum_offset = cksum_offset; 2090 flags |= MXGEFW_FLAGS_CKSUM; 2091 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2092 } else { 2093 odd_flag = 0; 2094 } 2095 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2096 flags |= MXGEFW_FLAGS_SMALL; 2097 2098 /* convert segments into a request list */ 2099 cum_len = 0; 2100 seg = tx->seg_list; 2101 req->flags = MXGEFW_FLAGS_FIRST; 2102 for (i = 0; i < cnt; i++) { 2103 req->addr_low = 2104 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2105 req->addr_high = 2106 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2107 req->length = htobe16(seg->ds_len); 2108 req->cksum_offset = cksum_offset; 2109 if (cksum_offset > seg->ds_len) 2110 cksum_offset -= seg->ds_len; 2111 else 2112 cksum_offset = 0; 2113 req->pseudo_hdr_offset = pseudo_hdr_offset; 2114 req->pad = 0; /* complete solid 16-byte block */ 2115 req->rdma_count = 1; 2116 req->flags |= flags | ((cum_len & 1) * odd_flag); 2117 cum_len += seg->ds_len; 2118 seg++; 2119 req++; 2120 req->flags = 0; 2121 } 2122 req--; 2123 /* pad runts to 60 bytes */ 2124 if (cum_len < 60) { 2125 req++; 2126 req->addr_low = 2127 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2128 req->addr_high = 2129 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2130 req->length = htobe16(60 - cum_len); 2131 req->cksum_offset = 0; 2132 req->pseudo_hdr_offset = pseudo_hdr_offset; 2133 req->pad = 0; /* complete solid 16-byte block */ 2134 req->rdma_count = 1; 2135 req->flags |= flags | ((cum_len & 1) * odd_flag); 2136 cnt++; 2137 } 2138 2139 tx->req_list[0].rdma_count = cnt; 2140 #if 0 2141 /* print what the firmware will see */ 2142 for (i = 0; i < cnt; i++) { 2143 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 2144 "cso:%d, flags:0x%x, rdma:%d\n", 2145 i, (int)ntohl(tx->req_list[i].addr_high), 2146 (int)ntohl(tx->req_list[i].addr_low), 2147 (int)ntohs(tx->req_list[i].length), 2148 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2149 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2150 tx->req_list[i].rdma_count); 2151 } 2152 kprintf("--------------\n"); 2153 #endif 2154 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2155 mxge_submit_req(tx, tx->req_list, cnt); 2156 #ifdef IFNET_BUF_RING 2157 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2158 /* tell the NIC to start polling this slice */ 2159 *tx->send_go = 1; 2160 tx->queue_active = 1; 2161 tx->activate++; 2162 wmb(); 2163 } 2164 #endif 2165 return; 2166 2167 drop: 2168 m_freem(m); 2169 ss->oerrors++; 2170 return; 2171 } 2172 2173 #ifdef IFNET_BUF_RING 2174 static void 2175 mxge_qflush(struct ifnet *ifp) 2176 { 2177 mxge_softc_t *sc = ifp->if_softc; 2178 mxge_tx_ring_t *tx; 2179 struct mbuf *m; 2180 int slice; 2181 2182 for (slice = 0; slice < sc->num_slices; slice++) { 2183 tx = &sc->ss[slice].tx; 2184 lwkt_serialize_enter(sc->ifp->if_serializer); 2185 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2186 m_freem(m); 2187 lwkt_serialize_exit(sc->ifp->if_serializer); 2188 } 2189 if_qflush(ifp); 2190 } 2191 2192 static inline void 2193 mxge_start_locked(struct mxge_slice_state *ss) 2194 { 2195 mxge_softc_t *sc; 2196 struct mbuf *m; 2197 struct ifnet *ifp; 2198 mxge_tx_ring_t *tx; 2199 2200 sc = ss->sc; 2201 ifp = sc->ifp; 2202 tx = &ss->tx; 2203 2204 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2205 m = drbr_dequeue(ifp, tx->br); 2206 if (m == NULL) { 2207 return; 2208 } 2209 /* let BPF see it */ 2210 BPF_MTAP(ifp, m); 2211 2212 /* give it to the nic */ 2213 mxge_encap(ss, m); 2214 } 2215 /* ran out of transmit slots */ 2216 if (((ss->if_flags & IFF_OACTIVE) == 0) 2217 && (!drbr_empty(ifp, tx->br))) { 2218 ss->if_flags |= IFF_OACTIVE; 2219 tx->stall++; 2220 } 2221 } 2222 2223 static int 2224 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2225 { 2226 mxge_softc_t *sc; 2227 struct ifnet *ifp; 2228 mxge_tx_ring_t *tx; 2229 int err; 2230 2231 sc = ss->sc; 2232 ifp = sc->ifp; 2233 tx = &ss->tx; 2234 2235 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != 2236 IFF_RUNNING) { 2237 err = drbr_enqueue(ifp, tx->br, m); 2238 return (err); 2239 } 2240 2241 if (drbr_empty(ifp, tx->br) && 2242 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2243 /* let BPF see it */ 2244 BPF_MTAP(ifp, m); 2245 /* give it to the nic */ 2246 mxge_encap(ss, m); 2247 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2248 return (err); 2249 } 2250 if (!drbr_empty(ifp, tx->br)) 2251 mxge_start_locked(ss); 2252 return (0); 2253 } 2254 2255 static int 2256 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2257 { 2258 mxge_softc_t *sc = ifp->if_softc; 2259 struct mxge_slice_state *ss; 2260 mxge_tx_ring_t *tx; 2261 int err = 0; 2262 int slice; 2263 2264 #if 0 2265 slice = m->m_pkthdr.flowid; 2266 #endif 2267 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2268 2269 ss = &sc->ss[slice]; 2270 tx = &ss->tx; 2271 2272 if(lwkt_serialize_try(ifp->if_serializer)) { 2273 err = mxge_transmit_locked(ss, m); 2274 lwkt_serialize_exit(ifp->if_serializer); 2275 } else { 2276 err = drbr_enqueue(ifp, tx->br, m); 2277 } 2278 2279 return (err); 2280 } 2281 2282 #else 2283 2284 static inline void 2285 mxge_start_locked(struct mxge_slice_state *ss) 2286 { 2287 mxge_softc_t *sc; 2288 struct mbuf *m; 2289 struct ifnet *ifp; 2290 mxge_tx_ring_t *tx; 2291 2292 sc = ss->sc; 2293 ifp = sc->ifp; 2294 tx = &ss->tx; 2295 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2296 m = ifq_dequeue(&ifp->if_snd, NULL); 2297 if (m == NULL) { 2298 return; 2299 } 2300 /* let BPF see it */ 2301 BPF_MTAP(ifp, m); 2302 2303 /* give it to the nic */ 2304 mxge_encap(ss, m); 2305 } 2306 /* ran out of transmit slots */ 2307 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) { 2308 sc->ifp->if_flags |= IFF_OACTIVE; 2309 tx->stall++; 2310 } 2311 } 2312 #endif 2313 static void 2314 mxge_start(struct ifnet *ifp) 2315 { 2316 mxge_softc_t *sc = ifp->if_softc; 2317 struct mxge_slice_state *ss; 2318 2319 ASSERT_SERIALIZED(sc->ifp->if_serializer); 2320 /* only use the first slice for now */ 2321 ss = &sc->ss[0]; 2322 mxge_start_locked(ss); 2323 } 2324 2325 /* 2326 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2327 * at most 32 bytes at a time, so as to avoid involving the software 2328 * pio handler in the nic. We re-write the first segment's low 2329 * DMA address to mark it valid only after we write the entire chunk 2330 * in a burst 2331 */ 2332 static inline void 2333 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2334 mcp_kreq_ether_recv_t *src) 2335 { 2336 uint32_t low; 2337 2338 low = src->addr_low; 2339 src->addr_low = 0xffffffff; 2340 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2341 wmb(); 2342 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2343 wmb(); 2344 src->addr_low = low; 2345 dst->addr_low = low; 2346 wmb(); 2347 } 2348 2349 static int 2350 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2351 { 2352 bus_dma_segment_t seg; 2353 struct mbuf *m; 2354 mxge_rx_ring_t *rx = &ss->rx_small; 2355 int cnt, err; 2356 2357 m = m_gethdr(MB_DONTWAIT, MT_DATA); 2358 if (m == NULL) { 2359 rx->alloc_fail++; 2360 err = ENOBUFS; 2361 goto done; 2362 } 2363 m->m_len = m->m_pkthdr.len = MHLEN; 2364 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2365 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2366 if (err != 0) { 2367 kprintf("can't dmamap small (%d)\n", err); 2368 m_free(m); 2369 goto done; 2370 } 2371 rx->info[idx].m = m; 2372 rx->shadow[idx].addr_low = 2373 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2374 rx->shadow[idx].addr_high = 2375 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2376 2377 done: 2378 if ((idx & 7) == 7) 2379 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2380 return err; 2381 } 2382 2383 2384 static int 2385 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2386 { 2387 bus_dma_segment_t seg[3]; 2388 struct mbuf *m; 2389 mxge_rx_ring_t *rx = &ss->rx_big; 2390 int cnt, err, i; 2391 2392 if (rx->cl_size == MCLBYTES) 2393 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2394 else { 2395 #if 0 2396 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2397 #else 2398 /* 2399 * XXX: allocate normal sized buffers for big buffers. 2400 * We should be fine as long as we don't get any jumbo frames 2401 */ 2402 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR); 2403 #endif 2404 } 2405 if (m == NULL) { 2406 rx->alloc_fail++; 2407 err = ENOBUFS; 2408 goto done; 2409 } 2410 m->m_pkthdr.len = 0; 2411 m->m_len = m->m_pkthdr.len = rx->mlen; 2412 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2413 seg, 1, &cnt, BUS_DMA_NOWAIT); 2414 if (err != 0) { 2415 kprintf("can't dmamap big (%d)\n", err); 2416 m_free(m); 2417 goto done; 2418 } 2419 rx->info[idx].m = m; 2420 rx->shadow[idx].addr_low = 2421 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2422 rx->shadow[idx].addr_high = 2423 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2424 2425 #if MXGE_VIRT_JUMBOS 2426 for (i = 1; i < cnt; i++) { 2427 rx->shadow[idx + i].addr_low = 2428 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2429 rx->shadow[idx + i].addr_high = 2430 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2431 } 2432 #endif 2433 2434 done: 2435 for (i = 0; i < rx->nbufs; i++) { 2436 if ((idx & 7) == 7) { 2437 mxge_submit_8rx(&rx->lanai[idx - 7], 2438 &rx->shadow[idx - 7]); 2439 } 2440 idx++; 2441 } 2442 return err; 2443 } 2444 2445 /* 2446 * Myri10GE hardware checksums are not valid if the sender 2447 * padded the frame with non-zero padding. This is because 2448 * the firmware just does a simple 16-bit 1s complement 2449 * checksum across the entire frame, excluding the first 14 2450 * bytes. It is best to simply to check the checksum and 2451 * tell the stack about it only if the checksum is good 2452 */ 2453 2454 static inline uint16_t 2455 mxge_rx_csum(struct mbuf *m, int csum) 2456 { 2457 struct ether_header *eh; 2458 struct ip *ip; 2459 uint16_t c; 2460 2461 eh = mtod(m, struct ether_header *); 2462 2463 /* only deal with IPv4 TCP & UDP for now */ 2464 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2465 return 1; 2466 ip = (struct ip *)(eh + 1); 2467 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2468 ip->ip_p != IPPROTO_UDP)) 2469 return 1; 2470 #ifdef INET 2471 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2472 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2473 - (ip->ip_hl << 2) + ip->ip_p)); 2474 #else 2475 c = 1; 2476 #endif 2477 c ^= 0xffff; 2478 return (c); 2479 } 2480 2481 static void 2482 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2483 { 2484 struct ether_vlan_header *evl; 2485 struct ether_header *eh; 2486 uint32_t partial; 2487 2488 evl = mtod(m, struct ether_vlan_header *); 2489 eh = mtod(m, struct ether_header *); 2490 2491 /* 2492 * fix checksum by subtracting EVL_ENCAPLEN bytes 2493 * after what the firmware thought was the end of the ethernet 2494 * header. 2495 */ 2496 2497 /* put checksum into host byte order */ 2498 *csum = ntohs(*csum); 2499 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2500 (*csum) += ~partial; 2501 (*csum) += ((*csum) < ~partial); 2502 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2503 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2504 2505 /* restore checksum to network byte order; 2506 later consumers expect this */ 2507 *csum = htons(*csum); 2508 2509 /* save the tag */ 2510 #ifdef MXGE_NEW_VLAN_API 2511 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2512 #else 2513 { 2514 struct m_tag *mtag; 2515 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2516 MB_DONTWAIT); 2517 if (mtag == NULL) 2518 return; 2519 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2520 m_tag_prepend(m, mtag); 2521 } 2522 2523 #endif 2524 m->m_flags |= M_VLANTAG; 2525 2526 /* 2527 * Remove the 802.1q header by copying the Ethernet 2528 * addresses over it and adjusting the beginning of 2529 * the data in the mbuf. The encapsulated Ethernet 2530 * type field is already in place. 2531 */ 2532 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2533 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2534 m_adj(m, EVL_ENCAPLEN); 2535 } 2536 2537 2538 static inline void 2539 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, 2540 struct mbuf_chain *chain) 2541 { 2542 mxge_softc_t *sc; 2543 struct ifnet *ifp; 2544 struct mbuf *m; 2545 struct ether_header *eh; 2546 mxge_rx_ring_t *rx; 2547 bus_dmamap_t old_map; 2548 int idx; 2549 uint16_t tcpudp_csum; 2550 2551 sc = ss->sc; 2552 ifp = sc->ifp; 2553 rx = &ss->rx_big; 2554 idx = rx->cnt & rx->mask; 2555 rx->cnt += rx->nbufs; 2556 /* save a pointer to the received mbuf */ 2557 m = rx->info[idx].m; 2558 /* try to replace the received mbuf */ 2559 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2560 /* drop the frame -- the old mbuf is re-cycled */ 2561 ifp->if_ierrors++; 2562 return; 2563 } 2564 2565 /* unmap the received buffer */ 2566 old_map = rx->info[idx].map; 2567 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2568 bus_dmamap_unload(rx->dmat, old_map); 2569 2570 /* swap the bus_dmamap_t's */ 2571 rx->info[idx].map = rx->extra_map; 2572 rx->extra_map = old_map; 2573 2574 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2575 * aligned */ 2576 m->m_data += MXGEFW_PAD; 2577 2578 m->m_pkthdr.rcvif = ifp; 2579 m->m_len = m->m_pkthdr.len = len; 2580 ss->ipackets++; 2581 eh = mtod(m, struct ether_header *); 2582 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2583 mxge_vlan_tag_remove(m, &csum); 2584 } 2585 /* if the checksum is valid, mark it in the mbuf header */ 2586 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2587 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2588 return; 2589 /* otherwise, it was a UDP frame, or a TCP frame which 2590 we could not do LRO on. Tell the stack that the 2591 checksum is good */ 2592 m->m_pkthdr.csum_data = 0xffff; 2593 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2594 } 2595 #if 0 2596 /* flowid only valid if RSS hashing is enabled */ 2597 if (sc->num_slices > 1) { 2598 m->m_pkthdr.flowid = (ss - sc->ss); 2599 m->m_flags |= M_FLOWID; 2600 } 2601 #endif 2602 ether_input_chain(ifp, m, NULL, chain); 2603 } 2604 2605 static inline void 2606 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum, 2607 struct mbuf_chain *chain) 2608 { 2609 mxge_softc_t *sc; 2610 struct ifnet *ifp; 2611 struct ether_header *eh; 2612 struct mbuf *m; 2613 mxge_rx_ring_t *rx; 2614 bus_dmamap_t old_map; 2615 int idx; 2616 uint16_t tcpudp_csum; 2617 2618 sc = ss->sc; 2619 ifp = sc->ifp; 2620 rx = &ss->rx_small; 2621 idx = rx->cnt & rx->mask; 2622 rx->cnt++; 2623 /* save a pointer to the received mbuf */ 2624 m = rx->info[idx].m; 2625 /* try to replace the received mbuf */ 2626 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2627 /* drop the frame -- the old mbuf is re-cycled */ 2628 ifp->if_ierrors++; 2629 return; 2630 } 2631 2632 /* unmap the received buffer */ 2633 old_map = rx->info[idx].map; 2634 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2635 bus_dmamap_unload(rx->dmat, old_map); 2636 2637 /* swap the bus_dmamap_t's */ 2638 rx->info[idx].map = rx->extra_map; 2639 rx->extra_map = old_map; 2640 2641 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2642 * aligned */ 2643 m->m_data += MXGEFW_PAD; 2644 2645 m->m_pkthdr.rcvif = ifp; 2646 m->m_len = m->m_pkthdr.len = len; 2647 ss->ipackets++; 2648 eh = mtod(m, struct ether_header *); 2649 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2650 mxge_vlan_tag_remove(m, &csum); 2651 } 2652 /* if the checksum is valid, mark it in the mbuf header */ 2653 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2654 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2655 return; 2656 /* otherwise, it was a UDP frame, or a TCP frame which 2657 we could not do LRO on. Tell the stack that the 2658 checksum is good */ 2659 m->m_pkthdr.csum_data = 0xffff; 2660 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2661 } 2662 #if 0 2663 /* flowid only valid if RSS hashing is enabled */ 2664 if (sc->num_slices > 1) { 2665 m->m_pkthdr.flowid = (ss - sc->ss); 2666 m->m_flags |= M_FLOWID; 2667 } 2668 #endif 2669 ether_input_chain(ifp, m, NULL, chain); 2670 } 2671 2672 static inline void 2673 mxge_clean_rx_done(struct mxge_slice_state *ss) 2674 { 2675 mxge_rx_done_t *rx_done = &ss->rx_done; 2676 int limit = 0; 2677 uint16_t length; 2678 uint16_t checksum; 2679 struct mbuf_chain chain[MAXCPU]; 2680 2681 ether_input_chain_init(chain); 2682 while (rx_done->entry[rx_done->idx].length != 0) { 2683 length = ntohs(rx_done->entry[rx_done->idx].length); 2684 rx_done->entry[rx_done->idx].length = 0; 2685 checksum = rx_done->entry[rx_done->idx].checksum; 2686 if (length <= (MHLEN - MXGEFW_PAD)) 2687 mxge_rx_done_small(ss, length, checksum, chain); 2688 else 2689 mxge_rx_done_big(ss, length, checksum, chain); 2690 rx_done->cnt++; 2691 rx_done->idx = rx_done->cnt & rx_done->mask; 2692 2693 /* limit potential for livelock */ 2694 if (__predict_false(++limit > rx_done->mask / 2)) 2695 break; 2696 } 2697 ether_input_dispatch(chain); 2698 #ifdef INET 2699 while (!SLIST_EMPTY(&ss->lro_active)) { 2700 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2701 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2702 mxge_lro_flush(ss, lro); 2703 } 2704 #endif 2705 } 2706 2707 2708 static inline void 2709 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2710 { 2711 struct ifnet *ifp; 2712 mxge_tx_ring_t *tx; 2713 struct mbuf *m; 2714 bus_dmamap_t map; 2715 int idx; 2716 int *flags; 2717 2718 tx = &ss->tx; 2719 ifp = ss->sc->ifp; 2720 ASSERT_SERIALIZED(ifp->if_serializer); 2721 while (tx->pkt_done != mcp_idx) { 2722 idx = tx->done & tx->mask; 2723 tx->done++; 2724 m = tx->info[idx].m; 2725 /* mbuf and DMA map only attached to the first 2726 segment per-mbuf */ 2727 if (m != NULL) { 2728 ss->obytes += m->m_pkthdr.len; 2729 if (m->m_flags & M_MCAST) 2730 ss->omcasts++; 2731 ss->opackets++; 2732 tx->info[idx].m = NULL; 2733 map = tx->info[idx].map; 2734 bus_dmamap_unload(tx->dmat, map); 2735 m_freem(m); 2736 } 2737 if (tx->info[idx].flag) { 2738 tx->info[idx].flag = 0; 2739 tx->pkt_done++; 2740 } 2741 } 2742 2743 /* If we have space, clear IFF_OACTIVE to tell the stack that 2744 its OK to send packets */ 2745 #ifdef IFNET_BUF_RING 2746 flags = &ss->if_flags; 2747 #else 2748 flags = &ifp->if_flags; 2749 #endif 2750 if ((*flags) & IFF_OACTIVE && 2751 tx->req - tx->done < (tx->mask + 1)/4) { 2752 *(flags) &= ~IFF_OACTIVE; 2753 ss->tx.wake++; 2754 mxge_start_locked(ss); 2755 } 2756 #ifdef IFNET_BUF_RING 2757 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2758 /* let the NIC stop polling this queue, since there 2759 * are no more transmits pending */ 2760 if (tx->req == tx->done) { 2761 *tx->send_stop = 1; 2762 tx->queue_active = 0; 2763 tx->deactivate++; 2764 wmb(); 2765 } 2766 } 2767 #endif 2768 2769 } 2770 2771 static struct mxge_media_type mxge_xfp_media_types[] = 2772 { 2773 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2774 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2775 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2776 {0, (1 << 5), "10GBASE-ER"}, 2777 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2778 {0, (1 << 3), "10GBASE-SW"}, 2779 {0, (1 << 2), "10GBASE-LW"}, 2780 {0, (1 << 1), "10GBASE-EW"}, 2781 {0, (1 << 0), "Reserved"} 2782 }; 2783 static struct mxge_media_type mxge_sfp_media_types[] = 2784 { 2785 {0, (1 << 7), "Reserved"}, 2786 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2787 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2788 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2789 }; 2790 2791 static void 2792 mxge_set_media(mxge_softc_t *sc, int type) 2793 { 2794 sc->media_flags |= type; 2795 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2796 ifmedia_set(&sc->media, sc->media_flags); 2797 } 2798 2799 2800 /* 2801 * Determine the media type for a NIC. Some XFPs will identify 2802 * themselves only when their link is up, so this is initiated via a 2803 * link up interrupt. However, this can potentially take up to 2804 * several milliseconds, so it is run via the watchdog routine, rather 2805 * than in the interrupt handler itself. This need only be done 2806 * once, not each time the link is up. 2807 */ 2808 static void 2809 mxge_media_probe(mxge_softc_t *sc) 2810 { 2811 mxge_cmd_t cmd; 2812 char *cage_type; 2813 char *ptr; 2814 struct mxge_media_type *mxge_media_types = NULL; 2815 int i, err, ms, mxge_media_type_entries; 2816 uint32_t byte; 2817 2818 sc->need_media_probe = 0; 2819 2820 /* if we've already set a media type, we're done */ 2821 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2822 return; 2823 2824 /* 2825 * parse the product code to deterimine the interface type 2826 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2827 * after the 3rd dash in the driver's cached copy of the 2828 * EEPROM's product code string. 2829 */ 2830 ptr = sc->product_code_string; 2831 if (ptr == NULL) { 2832 device_printf(sc->dev, "Missing product code\n"); 2833 } 2834 2835 for (i = 0; i < 3; i++, ptr++) { 2836 ptr = index(ptr, '-'); 2837 if (ptr == NULL) { 2838 device_printf(sc->dev, 2839 "only %d dashes in PC?!?\n", i); 2840 return; 2841 } 2842 } 2843 if (*ptr == 'C') { 2844 /* -C is CX4 */ 2845 mxge_set_media(sc, IFM_10G_CX4); 2846 return; 2847 } 2848 else if (*ptr == 'Q') { 2849 /* -Q is Quad Ribbon Fiber */ 2850 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2851 /* FreeBSD has no media type for Quad ribbon fiber */ 2852 return; 2853 } 2854 2855 if (*ptr == 'R') { 2856 /* -R is XFP */ 2857 mxge_media_types = mxge_xfp_media_types; 2858 mxge_media_type_entries = 2859 sizeof (mxge_xfp_media_types) / 2860 sizeof (mxge_xfp_media_types[0]); 2861 byte = MXGE_XFP_COMPLIANCE_BYTE; 2862 cage_type = "XFP"; 2863 } 2864 2865 if (*ptr == 'S' || *(ptr +1) == 'S') { 2866 /* -S or -2S is SFP+ */ 2867 mxge_media_types = mxge_sfp_media_types; 2868 mxge_media_type_entries = 2869 sizeof (mxge_sfp_media_types) / 2870 sizeof (mxge_sfp_media_types[0]); 2871 cage_type = "SFP+"; 2872 byte = 3; 2873 } 2874 2875 if (mxge_media_types == NULL) { 2876 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2877 return; 2878 } 2879 2880 /* 2881 * At this point we know the NIC has an XFP cage, so now we 2882 * try to determine what is in the cage by using the 2883 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2884 * register. We read just one byte, which may take over 2885 * a millisecond 2886 */ 2887 2888 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2889 cmd.data1 = byte; 2890 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2891 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2892 device_printf(sc->dev, "failed to read XFP\n"); 2893 } 2894 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2895 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2896 } 2897 if (err != MXGEFW_CMD_OK) { 2898 return; 2899 } 2900 2901 /* now we wait for the data to be cached */ 2902 cmd.data0 = byte; 2903 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2904 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2905 DELAY(1000); 2906 cmd.data0 = byte; 2907 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2908 } 2909 if (err != MXGEFW_CMD_OK) { 2910 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2911 cage_type, err, ms); 2912 return; 2913 } 2914 2915 if (cmd.data0 == mxge_media_types[0].bitmask) { 2916 if (mxge_verbose) 2917 device_printf(sc->dev, "%s:%s\n", cage_type, 2918 mxge_media_types[0].name); 2919 mxge_set_media(sc, IFM_10G_CX4); 2920 return; 2921 } 2922 for (i = 1; i < mxge_media_type_entries; i++) { 2923 if (cmd.data0 & mxge_media_types[i].bitmask) { 2924 if (mxge_verbose) 2925 device_printf(sc->dev, "%s:%s\n", 2926 cage_type, 2927 mxge_media_types[i].name); 2928 2929 mxge_set_media(sc, mxge_media_types[i].flag); 2930 return; 2931 } 2932 } 2933 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2934 cmd.data0); 2935 2936 return; 2937 } 2938 2939 static void 2940 mxge_intr(void *arg) 2941 { 2942 struct mxge_slice_state *ss = arg; 2943 mxge_softc_t *sc = ss->sc; 2944 mcp_irq_data_t *stats = ss->fw_stats; 2945 mxge_tx_ring_t *tx = &ss->tx; 2946 mxge_rx_done_t *rx_done = &ss->rx_done; 2947 uint32_t send_done_count; 2948 uint8_t valid; 2949 2950 2951 #ifndef IFNET_BUF_RING 2952 /* an interrupt on a non-zero slice is implicitly valid 2953 since MSI-X irqs are not shared */ 2954 if (ss != sc->ss) { 2955 mxge_clean_rx_done(ss); 2956 *ss->irq_claim = be32toh(3); 2957 return; 2958 } 2959 #endif 2960 2961 /* make sure the DMA has finished */ 2962 if (!stats->valid) { 2963 return; 2964 } 2965 valid = stats->valid; 2966 2967 if (sc->legacy_irq) { 2968 /* lower legacy IRQ */ 2969 *sc->irq_deassert = 0; 2970 if (!mxge_deassert_wait) 2971 /* don't wait for conf. that irq is low */ 2972 stats->valid = 0; 2973 } else { 2974 stats->valid = 0; 2975 } 2976 2977 /* loop while waiting for legacy irq deassertion */ 2978 do { 2979 /* check for transmit completes and receives */ 2980 send_done_count = be32toh(stats->send_done_count); 2981 while ((send_done_count != tx->pkt_done) || 2982 (rx_done->entry[rx_done->idx].length != 0)) { 2983 if (send_done_count != tx->pkt_done) 2984 mxge_tx_done(ss, (int)send_done_count); 2985 mxge_clean_rx_done(ss); 2986 send_done_count = be32toh(stats->send_done_count); 2987 } 2988 if (sc->legacy_irq && mxge_deassert_wait) 2989 wmb(); 2990 } while (*((volatile uint8_t *) &stats->valid)); 2991 2992 /* fw link & error stats meaningful only on the first slice */ 2993 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2994 if (sc->link_state != stats->link_up) { 2995 sc->link_state = stats->link_up; 2996 if (sc->link_state) { 2997 sc->ifp->if_link_state = LINK_STATE_UP; 2998 if_link_state_change(sc->ifp); 2999 if (mxge_verbose) 3000 device_printf(sc->dev, "link up\n"); 3001 } else { 3002 sc->ifp->if_link_state = LINK_STATE_DOWN; 3003 if_link_state_change(sc->ifp); 3004 if (mxge_verbose) 3005 device_printf(sc->dev, "link down\n"); 3006 } 3007 sc->need_media_probe = 1; 3008 } 3009 if (sc->rdma_tags_available != 3010 be32toh(stats->rdma_tags_available)) { 3011 sc->rdma_tags_available = 3012 be32toh(stats->rdma_tags_available); 3013 device_printf(sc->dev, "RDMA timed out! %d tags " 3014 "left\n", sc->rdma_tags_available); 3015 } 3016 3017 if (stats->link_down) { 3018 sc->down_cnt += stats->link_down; 3019 sc->link_state = 0; 3020 sc->ifp->if_link_state = LINK_STATE_DOWN; 3021 if_link_state_change(sc->ifp); 3022 } 3023 } 3024 3025 /* check to see if we have rx token to pass back */ 3026 if (valid & 0x1) 3027 *ss->irq_claim = be32toh(3); 3028 *(ss->irq_claim + 1) = be32toh(3); 3029 } 3030 3031 static void 3032 mxge_init(void *arg) 3033 { 3034 } 3035 3036 3037 3038 static void 3039 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3040 { 3041 struct lro_entry *lro_entry; 3042 int i; 3043 3044 while (!SLIST_EMPTY(&ss->lro_free)) { 3045 lro_entry = SLIST_FIRST(&ss->lro_free); 3046 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3047 kfree(lro_entry, M_DEVBUF); 3048 } 3049 3050 for (i = 0; i <= ss->rx_big.mask; i++) { 3051 if (ss->rx_big.info[i].m == NULL) 3052 continue; 3053 bus_dmamap_unload(ss->rx_big.dmat, 3054 ss->rx_big.info[i].map); 3055 m_freem(ss->rx_big.info[i].m); 3056 ss->rx_big.info[i].m = NULL; 3057 } 3058 3059 for (i = 0; i <= ss->rx_small.mask; i++) { 3060 if (ss->rx_small.info[i].m == NULL) 3061 continue; 3062 bus_dmamap_unload(ss->rx_small.dmat, 3063 ss->rx_small.info[i].map); 3064 m_freem(ss->rx_small.info[i].m); 3065 ss->rx_small.info[i].m = NULL; 3066 } 3067 3068 /* transmit ring used only on the first slice */ 3069 if (ss->tx.info == NULL) 3070 return; 3071 3072 for (i = 0; i <= ss->tx.mask; i++) { 3073 ss->tx.info[i].flag = 0; 3074 if (ss->tx.info[i].m == NULL) 3075 continue; 3076 bus_dmamap_unload(ss->tx.dmat, 3077 ss->tx.info[i].map); 3078 m_freem(ss->tx.info[i].m); 3079 ss->tx.info[i].m = NULL; 3080 } 3081 } 3082 3083 static void 3084 mxge_free_mbufs(mxge_softc_t *sc) 3085 { 3086 int slice; 3087 3088 for (slice = 0; slice < sc->num_slices; slice++) 3089 mxge_free_slice_mbufs(&sc->ss[slice]); 3090 } 3091 3092 static void 3093 mxge_free_slice_rings(struct mxge_slice_state *ss) 3094 { 3095 int i; 3096 3097 3098 if (ss->rx_done.entry != NULL) 3099 mxge_dma_free(&ss->rx_done.dma); 3100 ss->rx_done.entry = NULL; 3101 3102 if (ss->tx.req_bytes != NULL) 3103 kfree(ss->tx.req_bytes, M_DEVBUF); 3104 ss->tx.req_bytes = NULL; 3105 3106 if (ss->tx.seg_list != NULL) 3107 kfree(ss->tx.seg_list, M_DEVBUF); 3108 ss->tx.seg_list = NULL; 3109 3110 if (ss->rx_small.shadow != NULL) 3111 kfree(ss->rx_small.shadow, M_DEVBUF); 3112 ss->rx_small.shadow = NULL; 3113 3114 if (ss->rx_big.shadow != NULL) 3115 kfree(ss->rx_big.shadow, M_DEVBUF); 3116 ss->rx_big.shadow = NULL; 3117 3118 if (ss->tx.info != NULL) { 3119 if (ss->tx.dmat != NULL) { 3120 for (i = 0; i <= ss->tx.mask; i++) { 3121 bus_dmamap_destroy(ss->tx.dmat, 3122 ss->tx.info[i].map); 3123 } 3124 bus_dma_tag_destroy(ss->tx.dmat); 3125 } 3126 kfree(ss->tx.info, M_DEVBUF); 3127 } 3128 ss->tx.info = NULL; 3129 3130 if (ss->rx_small.info != NULL) { 3131 if (ss->rx_small.dmat != NULL) { 3132 for (i = 0; i <= ss->rx_small.mask; i++) { 3133 bus_dmamap_destroy(ss->rx_small.dmat, 3134 ss->rx_small.info[i].map); 3135 } 3136 bus_dmamap_destroy(ss->rx_small.dmat, 3137 ss->rx_small.extra_map); 3138 bus_dma_tag_destroy(ss->rx_small.dmat); 3139 } 3140 kfree(ss->rx_small.info, M_DEVBUF); 3141 } 3142 ss->rx_small.info = NULL; 3143 3144 if (ss->rx_big.info != NULL) { 3145 if (ss->rx_big.dmat != NULL) { 3146 for (i = 0; i <= ss->rx_big.mask; i++) { 3147 bus_dmamap_destroy(ss->rx_big.dmat, 3148 ss->rx_big.info[i].map); 3149 } 3150 bus_dmamap_destroy(ss->rx_big.dmat, 3151 ss->rx_big.extra_map); 3152 bus_dma_tag_destroy(ss->rx_big.dmat); 3153 } 3154 kfree(ss->rx_big.info, M_DEVBUF); 3155 } 3156 ss->rx_big.info = NULL; 3157 } 3158 3159 static void 3160 mxge_free_rings(mxge_softc_t *sc) 3161 { 3162 int slice; 3163 3164 for (slice = 0; slice < sc->num_slices; slice++) 3165 mxge_free_slice_rings(&sc->ss[slice]); 3166 } 3167 3168 static int 3169 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3170 int tx_ring_entries) 3171 { 3172 mxge_softc_t *sc = ss->sc; 3173 size_t bytes; 3174 int err, i; 3175 3176 err = ENOMEM; 3177 3178 /* allocate per-slice receive resources */ 3179 3180 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3181 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3182 3183 /* allocate the rx shadow rings */ 3184 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3185 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3186 if (ss->rx_small.shadow == NULL) 3187 return err;; 3188 3189 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3190 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3191 if (ss->rx_big.shadow == NULL) 3192 return err;; 3193 3194 /* allocate the rx host info rings */ 3195 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3196 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3197 if (ss->rx_small.info == NULL) 3198 return err;; 3199 3200 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3201 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3202 if (ss->rx_big.info == NULL) 3203 return err;; 3204 3205 /* allocate the rx busdma resources */ 3206 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3207 1, /* alignment */ 3208 4096, /* boundary */ 3209 BUS_SPACE_MAXADDR, /* low */ 3210 BUS_SPACE_MAXADDR, /* high */ 3211 NULL, NULL, /* filter */ 3212 MHLEN, /* maxsize */ 3213 1, /* num segs */ 3214 MHLEN, /* maxsegsize */ 3215 BUS_DMA_ALLOCNOW, /* flags */ 3216 &ss->rx_small.dmat); /* tag */ 3217 if (err != 0) { 3218 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3219 err); 3220 return err;; 3221 } 3222 3223 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3224 1, /* alignment */ 3225 #if MXGE_VIRT_JUMBOS 3226 4096, /* boundary */ 3227 #else 3228 0, /* boundary */ 3229 #endif 3230 BUS_SPACE_MAXADDR, /* low */ 3231 BUS_SPACE_MAXADDR, /* high */ 3232 NULL, NULL, /* filter */ 3233 3*4096, /* maxsize */ 3234 #if MXGE_VIRT_JUMBOS 3235 3, /* num segs */ 3236 4096, /* maxsegsize*/ 3237 #else 3238 1, /* num segs */ 3239 MJUM9BYTES, /* maxsegsize*/ 3240 #endif 3241 BUS_DMA_ALLOCNOW, /* flags */ 3242 &ss->rx_big.dmat); /* tag */ 3243 if (err != 0) { 3244 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3245 err); 3246 return err;; 3247 } 3248 for (i = 0; i <= ss->rx_small.mask; i++) { 3249 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3250 &ss->rx_small.info[i].map); 3251 if (err != 0) { 3252 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3253 err); 3254 return err;; 3255 } 3256 } 3257 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3258 &ss->rx_small.extra_map); 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3261 err); 3262 return err;; 3263 } 3264 3265 for (i = 0; i <= ss->rx_big.mask; i++) { 3266 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3267 &ss->rx_big.info[i].map); 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3270 err); 3271 return err;; 3272 } 3273 } 3274 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3275 &ss->rx_big.extra_map); 3276 if (err != 0) { 3277 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3278 err); 3279 return err;; 3280 } 3281 3282 /* now allocate TX resouces */ 3283 3284 #ifndef IFNET_BUF_RING 3285 /* only use a single TX ring for now */ 3286 if (ss != ss->sc->ss) 3287 return 0; 3288 #endif 3289 3290 ss->tx.mask = tx_ring_entries - 1; 3291 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3292 3293 3294 /* allocate the tx request copy block */ 3295 bytes = 8 + 3296 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3297 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3298 if (ss->tx.req_bytes == NULL) 3299 return err;; 3300 /* ensure req_list entries are aligned to 8 bytes */ 3301 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3302 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3303 3304 /* allocate the tx busdma segment list */ 3305 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3306 ss->tx.seg_list = (bus_dma_segment_t *) 3307 kmalloc(bytes, M_DEVBUF, M_WAITOK); 3308 if (ss->tx.seg_list == NULL) 3309 return err;; 3310 3311 /* allocate the tx host info ring */ 3312 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3313 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3314 if (ss->tx.info == NULL) 3315 return err;; 3316 3317 /* allocate the tx busdma resources */ 3318 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3319 1, /* alignment */ 3320 sc->tx_boundary, /* boundary */ 3321 BUS_SPACE_MAXADDR, /* low */ 3322 BUS_SPACE_MAXADDR, /* high */ 3323 NULL, NULL, /* filter */ 3324 65536 + 256, /* maxsize */ 3325 ss->tx.max_desc - 2, /* num segs */ 3326 sc->tx_boundary, /* maxsegsz */ 3327 BUS_DMA_ALLOCNOW, /* flags */ 3328 &ss->tx.dmat); /* tag */ 3329 3330 if (err != 0) { 3331 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3332 err); 3333 return err;; 3334 } 3335 3336 /* now use these tags to setup dmamaps for each slot 3337 in the ring */ 3338 for (i = 0; i <= ss->tx.mask; i++) { 3339 err = bus_dmamap_create(ss->tx.dmat, 0, 3340 &ss->tx.info[i].map); 3341 if (err != 0) { 3342 device_printf(sc->dev, "Err %d tx dmamap\n", 3343 err); 3344 return err;; 3345 } 3346 } 3347 return 0; 3348 3349 } 3350 3351 static int 3352 mxge_alloc_rings(mxge_softc_t *sc) 3353 { 3354 mxge_cmd_t cmd; 3355 int tx_ring_size; 3356 int tx_ring_entries, rx_ring_entries; 3357 int err, slice; 3358 3359 /* get ring sizes */ 3360 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3361 tx_ring_size = cmd.data0; 3362 if (err != 0) { 3363 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3364 goto abort; 3365 } 3366 3367 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3368 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3369 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3370 ifq_set_ready(&sc->ifp->if_snd); 3371 3372 for (slice = 0; slice < sc->num_slices; slice++) { 3373 err = mxge_alloc_slice_rings(&sc->ss[slice], 3374 rx_ring_entries, 3375 tx_ring_entries); 3376 if (err != 0) 3377 goto abort; 3378 } 3379 return 0; 3380 3381 abort: 3382 mxge_free_rings(sc); 3383 return err; 3384 3385 } 3386 3387 3388 static void 3389 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3390 { 3391 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3392 3393 if (bufsize < MCLBYTES) { 3394 /* easy, everything fits in a single buffer */ 3395 *big_buf_size = MCLBYTES; 3396 *cl_size = MCLBYTES; 3397 *nbufs = 1; 3398 return; 3399 } 3400 3401 if (bufsize < MJUMPAGESIZE) { 3402 /* still easy, everything still fits in a single buffer */ 3403 *big_buf_size = MJUMPAGESIZE; 3404 *cl_size = MJUMPAGESIZE; 3405 *nbufs = 1; 3406 return; 3407 } 3408 #if MXGE_VIRT_JUMBOS 3409 /* now we need to use virtually contiguous buffers */ 3410 *cl_size = MJUM9BYTES; 3411 *big_buf_size = 4096; 3412 *nbufs = mtu / 4096 + 1; 3413 /* needs to be a power of two, so round up */ 3414 if (*nbufs == 3) 3415 *nbufs = 4; 3416 #else 3417 *cl_size = MJUM9BYTES; 3418 *big_buf_size = MJUM9BYTES; 3419 *nbufs = 1; 3420 #endif 3421 } 3422 3423 static int 3424 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3425 { 3426 mxge_softc_t *sc; 3427 mxge_cmd_t cmd; 3428 bus_dmamap_t map; 3429 struct lro_entry *lro_entry; 3430 int err, i, slice; 3431 3432 3433 sc = ss->sc; 3434 slice = ss - sc->ss; 3435 3436 SLIST_INIT(&ss->lro_free); 3437 SLIST_INIT(&ss->lro_active); 3438 3439 for (i = 0; i < sc->lro_cnt; i++) { 3440 lro_entry = (struct lro_entry *) 3441 kmalloc(sizeof (*lro_entry), M_DEVBUF, 3442 M_NOWAIT | M_ZERO); 3443 if (lro_entry == NULL) { 3444 sc->lro_cnt = i; 3445 break; 3446 } 3447 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3448 } 3449 /* get the lanai pointers to the send and receive rings */ 3450 3451 err = 0; 3452 #ifndef IFNET_BUF_RING 3453 /* We currently only send from the first slice */ 3454 if (slice == 0) { 3455 #endif 3456 cmd.data0 = slice; 3457 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3458 ss->tx.lanai = 3459 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3460 ss->tx.send_go = (volatile uint32_t *) 3461 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3462 ss->tx.send_stop = (volatile uint32_t *) 3463 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3464 #ifndef IFNET_BUF_RING 3465 } 3466 #endif 3467 cmd.data0 = slice; 3468 err |= mxge_send_cmd(sc, 3469 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3470 ss->rx_small.lanai = 3471 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3472 cmd.data0 = slice; 3473 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3474 ss->rx_big.lanai = 3475 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3476 3477 if (err != 0) { 3478 device_printf(sc->dev, 3479 "failed to get ring sizes or locations\n"); 3480 return EIO; 3481 } 3482 3483 /* stock receive rings */ 3484 for (i = 0; i <= ss->rx_small.mask; i++) { 3485 map = ss->rx_small.info[i].map; 3486 err = mxge_get_buf_small(ss, map, i); 3487 if (err) { 3488 device_printf(sc->dev, "alloced %d/%d smalls\n", 3489 i, ss->rx_small.mask + 1); 3490 return ENOMEM; 3491 } 3492 } 3493 for (i = 0; i <= ss->rx_big.mask; i++) { 3494 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3495 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3496 } 3497 ss->rx_big.nbufs = nbufs; 3498 ss->rx_big.cl_size = cl_size; 3499 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3500 EVL_ENCAPLEN + MXGEFW_PAD; 3501 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3502 map = ss->rx_big.info[i].map; 3503 err = mxge_get_buf_big(ss, map, i); 3504 if (err) { 3505 device_printf(sc->dev, "alloced %d/%d bigs\n", 3506 i, ss->rx_big.mask + 1); 3507 return ENOMEM; 3508 } 3509 } 3510 return 0; 3511 } 3512 3513 static int 3514 mxge_open(mxge_softc_t *sc) 3515 { 3516 mxge_cmd_t cmd; 3517 int err, big_bytes, nbufs, slice, cl_size, i; 3518 bus_addr_t bus; 3519 volatile uint8_t *itable; 3520 struct mxge_slice_state *ss; 3521 3522 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3523 /* Copy the MAC address in case it was overridden */ 3524 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3525 3526 err = mxge_reset(sc, 1); 3527 if (err != 0) { 3528 device_printf(sc->dev, "failed to reset\n"); 3529 return EIO; 3530 } 3531 3532 if (sc->num_slices > 1) { 3533 /* setup the indirection table */ 3534 cmd.data0 = sc->num_slices; 3535 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3536 &cmd); 3537 3538 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3539 &cmd); 3540 if (err != 0) { 3541 device_printf(sc->dev, 3542 "failed to setup rss tables\n"); 3543 return err; 3544 } 3545 3546 /* just enable an identity mapping */ 3547 itable = sc->sram + cmd.data0; 3548 for (i = 0; i < sc->num_slices; i++) 3549 itable[i] = (uint8_t)i; 3550 3551 cmd.data0 = 1; 3552 cmd.data1 = mxge_rss_hash_type; 3553 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3554 if (err != 0) { 3555 device_printf(sc->dev, "failed to enable slices\n"); 3556 return err; 3557 } 3558 } 3559 3560 3561 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3562 3563 cmd.data0 = nbufs; 3564 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3565 &cmd); 3566 /* error is only meaningful if we're trying to set 3567 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3568 if (err && nbufs > 1) { 3569 device_printf(sc->dev, 3570 "Failed to set alway-use-n to %d\n", 3571 nbufs); 3572 return EIO; 3573 } 3574 /* Give the firmware the mtu and the big and small buffer 3575 sizes. The firmware wants the big buf size to be a power 3576 of two. Luckily, FreeBSD's clusters are powers of two */ 3577 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3578 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3579 cmd.data0 = MHLEN - MXGEFW_PAD; 3580 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3581 &cmd); 3582 cmd.data0 = big_bytes; 3583 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3584 3585 if (err != 0) { 3586 device_printf(sc->dev, "failed to setup params\n"); 3587 goto abort; 3588 } 3589 3590 /* Now give him the pointer to the stats block */ 3591 for (slice = 0; 3592 #ifdef IFNET_BUF_RING 3593 slice < sc->num_slices; 3594 #else 3595 slice < 1; 3596 #endif 3597 slice++) { 3598 ss = &sc->ss[slice]; 3599 cmd.data0 = 3600 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3601 cmd.data1 = 3602 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3603 cmd.data2 = sizeof(struct mcp_irq_data); 3604 cmd.data2 |= (slice << 16); 3605 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3606 } 3607 3608 if (err != 0) { 3609 bus = sc->ss->fw_stats_dma.bus_addr; 3610 bus += offsetof(struct mcp_irq_data, send_done_count); 3611 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3612 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3613 err = mxge_send_cmd(sc, 3614 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3615 &cmd); 3616 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3617 sc->fw_multicast_support = 0; 3618 } else { 3619 sc->fw_multicast_support = 1; 3620 } 3621 3622 if (err != 0) { 3623 device_printf(sc->dev, "failed to setup params\n"); 3624 goto abort; 3625 } 3626 3627 for (slice = 0; slice < sc->num_slices; slice++) { 3628 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3629 if (err != 0) { 3630 device_printf(sc->dev, "couldn't open slice %d\n", 3631 slice); 3632 goto abort; 3633 } 3634 } 3635 3636 /* Finally, start the firmware running */ 3637 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3638 if (err) { 3639 device_printf(sc->dev, "Couldn't bring up link\n"); 3640 goto abort; 3641 } 3642 #ifdef IFNET_BUF_RING 3643 for (slice = 0; slice < sc->num_slices; slice++) { 3644 ss = &sc->ss[slice]; 3645 ss->if_flags |= IFF_RUNNING; 3646 ss->if_flags &= ~IFF_OACTIVE; 3647 } 3648 #endif 3649 sc->ifp->if_flags |= IFF_RUNNING; 3650 sc->ifp->if_flags &= ~IFF_OACTIVE; 3651 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3652 3653 return 0; 3654 3655 3656 abort: 3657 mxge_free_mbufs(sc); 3658 3659 return err; 3660 } 3661 3662 static int 3663 mxge_close(mxge_softc_t *sc) 3664 { 3665 mxge_cmd_t cmd; 3666 int err, old_down_cnt; 3667 #ifdef IFNET_BUF_RING 3668 struct mxge_slice_state *ss; 3669 int slice; 3670 #endif 3671 3672 ASSERT_SERIALIZED(sc->ifp->if_serializer); 3673 callout_stop(&sc->co_hdl); 3674 #ifdef IFNET_BUF_RING 3675 for (slice = 0; slice < sc->num_slices; slice++) { 3676 ss = &sc->ss[slice]; 3677 ss->if_flags &= ~IFF_RUNNING; 3678 } 3679 #endif 3680 sc->ifp->if_flags &= ~IFF_RUNNING; 3681 old_down_cnt = sc->down_cnt; 3682 wmb(); 3683 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3684 if (err) { 3685 device_printf(sc->dev, "Couldn't bring down link\n"); 3686 } 3687 if (old_down_cnt == sc->down_cnt) { 3688 /* wait for down irq */ 3689 DELAY(10 * sc->intr_coal_delay); 3690 } 3691 wmb(); 3692 if (old_down_cnt == sc->down_cnt) { 3693 device_printf(sc->dev, "never got down irq\n"); 3694 } 3695 3696 mxge_free_mbufs(sc); 3697 3698 return 0; 3699 } 3700 3701 static void 3702 mxge_setup_cfg_space(mxge_softc_t *sc) 3703 { 3704 device_t dev = sc->dev; 3705 int reg; 3706 uint16_t cmd, lnk, pectl; 3707 3708 /* find the PCIe link width and set max read request to 4KB*/ 3709 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3710 lnk = pci_read_config(dev, reg + 0x12, 2); 3711 sc->link_width = (lnk >> 4) & 0x3f; 3712 3713 pectl = pci_read_config(dev, reg + 0x8, 2); 3714 pectl = (pectl & ~0x7000) | (5 << 12); 3715 pci_write_config(dev, reg + 0x8, pectl, 2); 3716 } 3717 3718 /* Enable DMA and Memory space access */ 3719 pci_enable_busmaster(dev); 3720 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3721 cmd |= PCIM_CMD_MEMEN; 3722 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3723 } 3724 3725 static uint32_t 3726 mxge_read_reboot(mxge_softc_t *sc) 3727 { 3728 device_t dev = sc->dev; 3729 uint32_t vs; 3730 3731 /* find the vendor specific offset */ 3732 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3733 device_printf(sc->dev, 3734 "could not find vendor specific offset\n"); 3735 return (uint32_t)-1; 3736 } 3737 /* enable read32 mode */ 3738 pci_write_config(dev, vs + 0x10, 0x3, 1); 3739 /* tell NIC which register to read */ 3740 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3741 return (pci_read_config(dev, vs + 0x14, 4)); 3742 } 3743 3744 static int 3745 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3746 { 3747 struct pci_devinfo *dinfo; 3748 mxge_tx_ring_t *tx; 3749 int err; 3750 uint32_t reboot; 3751 uint16_t cmd; 3752 3753 err = ENXIO; 3754 3755 device_printf(sc->dev, "Watchdog reset!\n"); 3756 3757 /* 3758 * check to see if the NIC rebooted. If it did, then all of 3759 * PCI config space has been reset, and things like the 3760 * busmaster bit will be zero. If this is the case, then we 3761 * must restore PCI config space before the NIC can be used 3762 * again 3763 */ 3764 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3765 if (cmd == 0xffff) { 3766 /* 3767 * maybe the watchdog caught the NIC rebooting; wait 3768 * up to 100ms for it to finish. If it does not come 3769 * back, then give up 3770 */ 3771 DELAY(1000*100); 3772 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3773 if (cmd == 0xffff) { 3774 device_printf(sc->dev, "NIC disappeared!\n"); 3775 return (err); 3776 } 3777 } 3778 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3779 /* print the reboot status */ 3780 reboot = mxge_read_reboot(sc); 3781 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3782 reboot); 3783 /* restore PCI configuration space */ 3784 dinfo = device_get_ivars(sc->dev); 3785 pci_cfg_restore(sc->dev, dinfo); 3786 3787 /* and redo any changes we made to our config space */ 3788 mxge_setup_cfg_space(sc); 3789 3790 if (sc->ifp->if_flags & IFF_RUNNING) { 3791 mxge_close(sc); 3792 err = mxge_open(sc); 3793 } 3794 } else { 3795 tx = &sc->ss[slice].tx; 3796 device_printf(sc->dev, 3797 "NIC did not reboot, slice %d ring state:\n", 3798 slice); 3799 device_printf(sc->dev, 3800 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3801 tx->req, tx->done, tx->queue_active); 3802 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3803 tx->activate, tx->deactivate); 3804 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3805 tx->pkt_done, 3806 be32toh(sc->ss->fw_stats->send_done_count)); 3807 device_printf(sc->dev, "not resetting\n"); 3808 } 3809 return (err); 3810 } 3811 3812 static int 3813 mxge_watchdog(mxge_softc_t *sc) 3814 { 3815 mxge_tx_ring_t *tx; 3816 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3817 int i, err = 0; 3818 3819 /* see if we have outstanding transmits, which 3820 have been pending for more than mxge_ticks */ 3821 for (i = 0; 3822 #ifdef IFNET_BUF_RING 3823 (i < sc->num_slices) && (err == 0); 3824 #else 3825 (i < 1) && (err == 0); 3826 #endif 3827 i++) { 3828 tx = &sc->ss[i].tx; 3829 if (tx->req != tx->done && 3830 tx->watchdog_req != tx->watchdog_done && 3831 tx->done == tx->watchdog_done) { 3832 /* check for pause blocking before resetting */ 3833 if (tx->watchdog_rx_pause == rx_pause) 3834 err = mxge_watchdog_reset(sc, i); 3835 else 3836 device_printf(sc->dev, "Flow control blocking " 3837 "xmits, check link partner\n"); 3838 } 3839 3840 tx->watchdog_req = tx->req; 3841 tx->watchdog_done = tx->done; 3842 tx->watchdog_rx_pause = rx_pause; 3843 } 3844 3845 if (sc->need_media_probe) 3846 mxge_media_probe(sc); 3847 return (err); 3848 } 3849 3850 static void 3851 mxge_update_stats(mxge_softc_t *sc) 3852 { 3853 struct mxge_slice_state *ss; 3854 u_long ipackets = 0; 3855 u_long opackets = 0; 3856 #ifdef IFNET_BUF_RING 3857 u_long obytes = 0; 3858 u_long omcasts = 0; 3859 u_long odrops = 0; 3860 #endif 3861 u_long oerrors = 0; 3862 int slice; 3863 3864 for (slice = 0; slice < sc->num_slices; slice++) { 3865 ss = &sc->ss[slice]; 3866 ipackets += ss->ipackets; 3867 opackets += ss->opackets; 3868 #ifdef IFNET_BUF_RING 3869 obytes += ss->obytes; 3870 omcasts += ss->omcasts; 3871 odrops += ss->tx.br->br_drops; 3872 #endif 3873 oerrors += ss->oerrors; 3874 } 3875 sc->ifp->if_ipackets = ipackets; 3876 sc->ifp->if_opackets = opackets; 3877 #ifdef IFNET_BUF_RING 3878 sc->ifp->if_obytes = obytes; 3879 sc->ifp->if_omcasts = omcasts; 3880 sc->ifp->if_snd.ifq_drops = odrops; 3881 #endif 3882 sc->ifp->if_oerrors = oerrors; 3883 } 3884 3885 static void 3886 mxge_tick(void *arg) 3887 { 3888 mxge_softc_t *sc = arg; 3889 int err = 0; 3890 3891 lwkt_serialize_enter(sc->ifp->if_serializer); 3892 /* aggregate stats from different slices */ 3893 mxge_update_stats(sc); 3894 if (!sc->watchdog_countdown) { 3895 err = mxge_watchdog(sc); 3896 sc->watchdog_countdown = 4; 3897 } 3898 sc->watchdog_countdown--; 3899 if (err == 0) 3900 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3901 lwkt_serialize_exit(sc->ifp->if_serializer); 3902 } 3903 3904 static int 3905 mxge_media_change(struct ifnet *ifp) 3906 { 3907 return EINVAL; 3908 } 3909 3910 static int 3911 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3912 { 3913 struct ifnet *ifp = sc->ifp; 3914 int real_mtu, old_mtu; 3915 int err = 0; 3916 3917 if (ifp->if_serializer) 3918 ASSERT_SERIALIZED(ifp->if_serializer); 3919 3920 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3921 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3922 return EINVAL; 3923 old_mtu = ifp->if_mtu; 3924 ifp->if_mtu = mtu; 3925 if (ifp->if_flags & IFF_RUNNING) { 3926 mxge_close(sc); 3927 err = mxge_open(sc); 3928 if (err != 0) { 3929 ifp->if_mtu = old_mtu; 3930 mxge_close(sc); 3931 (void) mxge_open(sc); 3932 } 3933 } 3934 return err; 3935 } 3936 3937 static void 3938 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3939 { 3940 mxge_softc_t *sc = ifp->if_softc; 3941 3942 3943 if (sc == NULL) 3944 return; 3945 ifmr->ifm_status = IFM_AVALID; 3946 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3947 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3948 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3949 } 3950 3951 static int 3952 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 3953 { 3954 mxge_softc_t *sc = ifp->if_softc; 3955 struct ifreq *ifr = (struct ifreq *)data; 3956 int err, mask; 3957 3958 (void)cr; 3959 err = 0; 3960 ASSERT_SERIALIZED(ifp->if_serializer); 3961 switch (command) { 3962 case SIOCSIFADDR: 3963 case SIOCGIFADDR: 3964 err = ether_ioctl(ifp, command, data); 3965 break; 3966 3967 case SIOCSIFMTU: 3968 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3969 break; 3970 3971 case SIOCSIFFLAGS: 3972 if (sc->dying) { 3973 return EINVAL; 3974 } 3975 if (ifp->if_flags & IFF_UP) { 3976 if (!(ifp->if_flags & IFF_RUNNING)) { 3977 err = mxge_open(sc); 3978 } else { 3979 /* take care of promis can allmulti 3980 flag chages */ 3981 mxge_change_promisc(sc, 3982 ifp->if_flags & IFF_PROMISC); 3983 mxge_set_multicast_list(sc); 3984 } 3985 } else { 3986 if (ifp->if_flags & IFF_RUNNING) { 3987 mxge_close(sc); 3988 } 3989 } 3990 break; 3991 3992 case SIOCADDMULTI: 3993 case SIOCDELMULTI: 3994 mxge_set_multicast_list(sc); 3995 break; 3996 3997 case SIOCSIFCAP: 3998 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3999 if (mask & IFCAP_TXCSUM) { 4000 if (IFCAP_TXCSUM & ifp->if_capenable) { 4001 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4002 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4003 | CSUM_TSO); 4004 } else { 4005 ifp->if_capenable |= IFCAP_TXCSUM; 4006 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4007 } 4008 } else if (mask & IFCAP_RXCSUM) { 4009 if (IFCAP_RXCSUM & ifp->if_capenable) { 4010 ifp->if_capenable &= ~IFCAP_RXCSUM; 4011 sc->csum_flag = 0; 4012 } else { 4013 ifp->if_capenable |= IFCAP_RXCSUM; 4014 sc->csum_flag = 1; 4015 } 4016 } 4017 if (mask & IFCAP_TSO4) { 4018 if (IFCAP_TSO4 & ifp->if_capenable) { 4019 ifp->if_capenable &= ~IFCAP_TSO4; 4020 ifp->if_hwassist &= ~CSUM_TSO; 4021 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4022 ifp->if_capenable |= IFCAP_TSO4; 4023 ifp->if_hwassist |= CSUM_TSO; 4024 } else { 4025 kprintf("mxge requires tx checksum offload" 4026 " be enabled to use TSO\n"); 4027 err = EINVAL; 4028 } 4029 } 4030 if (mask & IFCAP_LRO) { 4031 if (IFCAP_LRO & ifp->if_capenable) 4032 err = mxge_change_lro_locked(sc, 0); 4033 else 4034 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4035 } 4036 if (mask & IFCAP_VLAN_HWTAGGING) 4037 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4038 VLAN_CAPABILITIES(ifp); 4039 4040 break; 4041 4042 case SIOCGIFMEDIA: 4043 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4044 &sc->media, command); 4045 break; 4046 4047 default: 4048 err = ENOTTY; 4049 } 4050 return err; 4051 } 4052 4053 static void 4054 mxge_fetch_tunables(mxge_softc_t *sc) 4055 { 4056 4057 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4058 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4059 &mxge_flow_control); 4060 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4061 &mxge_intr_coal_delay); 4062 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4063 &mxge_nvidia_ecrc_enable); 4064 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4065 &mxge_force_firmware); 4066 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4067 &mxge_deassert_wait); 4068 TUNABLE_INT_FETCH("hw.mxge.verbose", 4069 &mxge_verbose); 4070 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4071 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4072 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4073 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4074 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4075 if (sc->lro_cnt != 0) 4076 mxge_lro_cnt = sc->lro_cnt; 4077 4078 if (bootverbose) 4079 mxge_verbose = 1; 4080 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4081 mxge_intr_coal_delay = 30; 4082 if (mxge_ticks == 0) 4083 mxge_ticks = hz / 2; 4084 sc->pause = mxge_flow_control; 4085 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4086 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4087 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4088 } 4089 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4090 mxge_initial_mtu < ETHER_MIN_LEN) 4091 mxge_initial_mtu = ETHERMTU_JUMBO; 4092 } 4093 4094 4095 static void 4096 mxge_free_slices(mxge_softc_t *sc) 4097 { 4098 struct mxge_slice_state *ss; 4099 int i; 4100 4101 4102 if (sc->ss == NULL) 4103 return; 4104 4105 for (i = 0; i < sc->num_slices; i++) { 4106 ss = &sc->ss[i]; 4107 if (ss->fw_stats != NULL) { 4108 mxge_dma_free(&ss->fw_stats_dma); 4109 ss->fw_stats = NULL; 4110 #ifdef IFNET_BUF_RING 4111 if (ss->tx.br != NULL) { 4112 drbr_free(ss->tx.br, M_DEVBUF); 4113 ss->tx.br = NULL; 4114 } 4115 #endif 4116 } 4117 if (ss->rx_done.entry != NULL) { 4118 mxge_dma_free(&ss->rx_done.dma); 4119 ss->rx_done.entry = NULL; 4120 } 4121 } 4122 kfree(sc->ss, M_DEVBUF); 4123 sc->ss = NULL; 4124 } 4125 4126 static int 4127 mxge_alloc_slices(mxge_softc_t *sc) 4128 { 4129 mxge_cmd_t cmd; 4130 struct mxge_slice_state *ss; 4131 size_t bytes; 4132 int err, i, max_intr_slots; 4133 4134 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4135 if (err != 0) { 4136 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4137 return err; 4138 } 4139 sc->rx_ring_size = cmd.data0; 4140 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4141 4142 bytes = sizeof (*sc->ss) * sc->num_slices; 4143 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4144 if (sc->ss == NULL) 4145 return (ENOMEM); 4146 for (i = 0; i < sc->num_slices; i++) { 4147 ss = &sc->ss[i]; 4148 4149 ss->sc = sc; 4150 4151 /* allocate per-slice rx interrupt queues */ 4152 4153 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4154 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4155 if (err != 0) 4156 goto abort; 4157 ss->rx_done.entry = ss->rx_done.dma.addr; 4158 bzero(ss->rx_done.entry, bytes); 4159 4160 /* 4161 * allocate the per-slice firmware stats; stats 4162 * (including tx) are used used only on the first 4163 * slice for now 4164 */ 4165 #ifndef IFNET_BUF_RING 4166 if (i > 0) 4167 continue; 4168 #endif 4169 4170 bytes = sizeof (*ss->fw_stats); 4171 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4172 sizeof (*ss->fw_stats), 64); 4173 if (err != 0) 4174 goto abort; 4175 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4176 #ifdef IFNET_BUF_RING 4177 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4178 &ss->tx.lock); 4179 #endif 4180 } 4181 4182 return (0); 4183 4184 abort: 4185 mxge_free_slices(sc); 4186 return (ENOMEM); 4187 } 4188 4189 static void 4190 mxge_slice_probe(mxge_softc_t *sc) 4191 { 4192 mxge_cmd_t cmd; 4193 char *old_fw; 4194 int msix_cnt, status, max_intr_slots; 4195 4196 sc->num_slices = 1; 4197 /* 4198 * don't enable multiple slices if they are not enabled, 4199 * or if this is not an SMP system 4200 */ 4201 4202 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2) 4203 return; 4204 4205 /* see how many MSI-X interrupts are available */ 4206 msix_cnt = pci_msix_count(sc->dev); 4207 if (msix_cnt < 2) 4208 return; 4209 4210 /* now load the slice aware firmware see what it supports */ 4211 old_fw = sc->fw_name; 4212 if (old_fw == mxge_fw_aligned) 4213 sc->fw_name = mxge_fw_rss_aligned; 4214 else 4215 sc->fw_name = mxge_fw_rss_unaligned; 4216 status = mxge_load_firmware(sc, 0); 4217 if (status != 0) { 4218 device_printf(sc->dev, "Falling back to a single slice\n"); 4219 return; 4220 } 4221 4222 /* try to send a reset command to the card to see if it 4223 is alive */ 4224 memset(&cmd, 0, sizeof (cmd)); 4225 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4226 if (status != 0) { 4227 device_printf(sc->dev, "failed reset\n"); 4228 goto abort_with_fw; 4229 } 4230 4231 /* get rx ring size */ 4232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4233 if (status != 0) { 4234 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4235 goto abort_with_fw; 4236 } 4237 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4238 4239 /* tell it the size of the interrupt queues */ 4240 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4241 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4242 if (status != 0) { 4243 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4244 goto abort_with_fw; 4245 } 4246 4247 /* ask the maximum number of slices it supports */ 4248 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4249 if (status != 0) { 4250 device_printf(sc->dev, 4251 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4252 goto abort_with_fw; 4253 } 4254 sc->num_slices = cmd.data0; 4255 if (sc->num_slices > msix_cnt) 4256 sc->num_slices = msix_cnt; 4257 4258 if (mxge_max_slices == -1) { 4259 /* cap to number of CPUs in system */ 4260 if (sc->num_slices > ncpus) 4261 sc->num_slices = ncpus; 4262 } else { 4263 if (sc->num_slices > mxge_max_slices) 4264 sc->num_slices = mxge_max_slices; 4265 } 4266 /* make sure it is a power of two */ 4267 while (sc->num_slices & (sc->num_slices - 1)) 4268 sc->num_slices--; 4269 4270 if (mxge_verbose) 4271 device_printf(sc->dev, "using %d slices\n", 4272 sc->num_slices); 4273 4274 return; 4275 4276 abort_with_fw: 4277 sc->fw_name = old_fw; 4278 (void) mxge_load_firmware(sc, 0); 4279 } 4280 4281 static int 4282 mxge_add_msix_irqs(mxge_softc_t *sc) 4283 { 4284 size_t bytes; 4285 int count, err, i, rid; 4286 4287 rid = PCIR_BAR(2); 4288 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4289 &rid, RF_ACTIVE); 4290 4291 if (sc->msix_table_res == NULL) { 4292 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4293 return ENXIO; 4294 } 4295 4296 count = sc->num_slices; 4297 err = pci_alloc_msix(sc->dev, &count); 4298 if (err != 0) { 4299 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4300 "err = %d \n", sc->num_slices, err); 4301 goto abort_with_msix_table; 4302 } 4303 if (count < sc->num_slices) { 4304 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4305 count, sc->num_slices); 4306 device_printf(sc->dev, 4307 "Try setting hw.mxge.max_slices to %d\n", 4308 count); 4309 err = ENOSPC; 4310 goto abort_with_msix; 4311 } 4312 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4313 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4314 if (sc->msix_irq_res == NULL) { 4315 err = ENOMEM; 4316 goto abort_with_msix; 4317 } 4318 4319 for (i = 0; i < sc->num_slices; i++) { 4320 rid = i + 1; 4321 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4322 SYS_RES_IRQ, 4323 &rid, RF_ACTIVE); 4324 if (sc->msix_irq_res[i] == NULL) { 4325 device_printf(sc->dev, "couldn't allocate IRQ res" 4326 " for message %d\n", i); 4327 err = ENXIO; 4328 goto abort_with_res; 4329 } 4330 } 4331 4332 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4333 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4334 4335 for (i = 0; i < sc->num_slices; i++) { 4336 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4337 INTR_MPSAFE, 4338 mxge_intr, &sc->ss[i], &sc->msix_ih[i], 4339 sc->ifp->if_serializer); 4340 if (err != 0) { 4341 device_printf(sc->dev, "couldn't setup intr for " 4342 "message %d\n", i); 4343 goto abort_with_intr; 4344 } 4345 } 4346 4347 if (mxge_verbose) { 4348 device_printf(sc->dev, "using %d msix IRQs:", 4349 sc->num_slices); 4350 for (i = 0; i < sc->num_slices; i++) 4351 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4352 kprintf("\n"); 4353 } 4354 return (0); 4355 4356 abort_with_intr: 4357 for (i = 0; i < sc->num_slices; i++) { 4358 if (sc->msix_ih[i] != NULL) { 4359 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4360 sc->msix_ih[i]); 4361 sc->msix_ih[i] = NULL; 4362 } 4363 } 4364 kfree(sc->msix_ih, M_DEVBUF); 4365 4366 4367 abort_with_res: 4368 for (i = 0; i < sc->num_slices; i++) { 4369 rid = i + 1; 4370 if (sc->msix_irq_res[i] != NULL) 4371 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4372 sc->msix_irq_res[i]); 4373 sc->msix_irq_res[i] = NULL; 4374 } 4375 kfree(sc->msix_irq_res, M_DEVBUF); 4376 4377 4378 abort_with_msix: 4379 pci_release_msi(sc->dev); 4380 4381 abort_with_msix_table: 4382 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4383 sc->msix_table_res); 4384 4385 return err; 4386 } 4387 4388 static int 4389 mxge_add_single_irq(mxge_softc_t *sc) 4390 { 4391 int count, err, rid; 4392 4393 count = pci_msi_count(sc->dev); 4394 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4395 rid = 1; 4396 } else { 4397 rid = 0; 4398 sc->legacy_irq = 1; 4399 } 4400 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4401 1, RF_SHAREABLE | RF_ACTIVE); 4402 if (sc->irq_res == NULL) { 4403 device_printf(sc->dev, "could not alloc interrupt\n"); 4404 return ENXIO; 4405 } 4406 if (mxge_verbose) 4407 device_printf(sc->dev, "using %s irq %ld\n", 4408 sc->legacy_irq ? "INTx" : "MSI", 4409 rman_get_start(sc->irq_res)); 4410 err = bus_setup_intr(sc->dev, sc->irq_res, 4411 INTR_MPSAFE, 4412 mxge_intr, &sc->ss[0], &sc->ih, 4413 sc->ifp->if_serializer); 4414 if (err != 0) { 4415 bus_release_resource(sc->dev, SYS_RES_IRQ, 4416 sc->legacy_irq ? 0 : 1, sc->irq_res); 4417 if (!sc->legacy_irq) 4418 pci_release_msi(sc->dev); 4419 } 4420 return err; 4421 } 4422 4423 static void 4424 mxge_rem_msix_irqs(mxge_softc_t *sc) 4425 { 4426 int i, rid; 4427 4428 for (i = 0; i < sc->num_slices; i++) { 4429 if (sc->msix_ih[i] != NULL) { 4430 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4431 sc->msix_ih[i]); 4432 sc->msix_ih[i] = NULL; 4433 } 4434 } 4435 kfree(sc->msix_ih, M_DEVBUF); 4436 4437 for (i = 0; i < sc->num_slices; i++) { 4438 rid = i + 1; 4439 if (sc->msix_irq_res[i] != NULL) 4440 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4441 sc->msix_irq_res[i]); 4442 sc->msix_irq_res[i] = NULL; 4443 } 4444 kfree(sc->msix_irq_res, M_DEVBUF); 4445 4446 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4447 sc->msix_table_res); 4448 4449 pci_release_msi(sc->dev); 4450 return; 4451 } 4452 4453 static void 4454 mxge_rem_single_irq(mxge_softc_t *sc) 4455 { 4456 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4457 bus_release_resource(sc->dev, SYS_RES_IRQ, 4458 sc->legacy_irq ? 0 : 1, sc->irq_res); 4459 if (!sc->legacy_irq) 4460 pci_release_msi(sc->dev); 4461 } 4462 4463 static void 4464 mxge_rem_irq(mxge_softc_t *sc) 4465 { 4466 if (sc->num_slices > 1) 4467 mxge_rem_msix_irqs(sc); 4468 else 4469 mxge_rem_single_irq(sc); 4470 } 4471 4472 static int 4473 mxge_add_irq(mxge_softc_t *sc) 4474 { 4475 int err; 4476 4477 if (sc->num_slices > 1) 4478 err = mxge_add_msix_irqs(sc); 4479 else 4480 err = mxge_add_single_irq(sc); 4481 4482 if (0 && err == 0 && sc->num_slices > 1) { 4483 mxge_rem_msix_irqs(sc); 4484 err = mxge_add_msix_irqs(sc); 4485 } 4486 return err; 4487 } 4488 4489 4490 static int 4491 mxge_attach(device_t dev) 4492 { 4493 mxge_softc_t *sc = device_get_softc(dev); 4494 struct ifnet *ifp = &sc->arpcom.ac_if; 4495 int err, rid; 4496 4497 /* 4498 * avoid rewriting half the lines in this file to use 4499 * &sc->arpcom.ac_if instead 4500 */ 4501 sc->ifp = ifp; 4502 sc->dev = dev; 4503 mxge_fetch_tunables(sc); 4504 4505 err = bus_dma_tag_create(NULL, /* parent */ 4506 1, /* alignment */ 4507 0, /* boundary */ 4508 BUS_SPACE_MAXADDR, /* low */ 4509 BUS_SPACE_MAXADDR, /* high */ 4510 NULL, NULL, /* filter */ 4511 65536 + 256, /* maxsize */ 4512 MXGE_MAX_SEND_DESC, /* num segs */ 4513 65536, /* maxsegsize */ 4514 0, /* flags */ 4515 &sc->parent_dmat); /* tag */ 4516 4517 if (err != 0) { 4518 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4519 err); 4520 goto abort_with_nothing; 4521 } 4522 4523 sc->ifp = ifp; 4524 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4525 4526 callout_init_mp(&sc->co_hdl); 4527 4528 mxge_setup_cfg_space(sc); 4529 4530 /* Map the board into the kernel */ 4531 rid = PCIR_BARS; 4532 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4533 ~0, 1, RF_ACTIVE); 4534 if (sc->mem_res == NULL) { 4535 device_printf(dev, "could not map memory\n"); 4536 err = ENXIO; 4537 goto abort_with_nothing; 4538 } 4539 sc->sram = rman_get_virtual(sc->mem_res); 4540 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4541 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4542 device_printf(dev, "impossible memory region size %ld\n", 4543 rman_get_size(sc->mem_res)); 4544 err = ENXIO; 4545 goto abort_with_mem_res; 4546 } 4547 4548 /* make NULL terminated copy of the EEPROM strings section of 4549 lanai SRAM */ 4550 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4551 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4552 rman_get_bushandle(sc->mem_res), 4553 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4554 sc->eeprom_strings, 4555 MXGE_EEPROM_STRINGS_SIZE - 2); 4556 err = mxge_parse_strings(sc); 4557 if (err != 0) 4558 goto abort_with_mem_res; 4559 4560 /* Enable write combining for efficient use of PCIe bus */ 4561 mxge_enable_wc(sc); 4562 4563 /* Allocate the out of band dma memory */ 4564 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4565 sizeof (mxge_cmd_t), 64); 4566 if (err != 0) 4567 goto abort_with_mem_res; 4568 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4569 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4570 if (err != 0) 4571 goto abort_with_cmd_dma; 4572 4573 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4574 if (err != 0) 4575 goto abort_with_zeropad_dma; 4576 4577 /* select & load the firmware */ 4578 err = mxge_select_firmware(sc); 4579 if (err != 0) 4580 goto abort_with_dmabench; 4581 sc->intr_coal_delay = mxge_intr_coal_delay; 4582 4583 mxge_slice_probe(sc); 4584 err = mxge_alloc_slices(sc); 4585 if (err != 0) 4586 goto abort_with_dmabench; 4587 4588 err = mxge_reset(sc, 0); 4589 if (err != 0) 4590 goto abort_with_slices; 4591 4592 err = mxge_alloc_rings(sc); 4593 if (err != 0) { 4594 device_printf(sc->dev, "failed to allocate rings\n"); 4595 goto abort_with_dmabench; 4596 } 4597 4598 ifp->if_baudrate = IF_Gbps(10UL); 4599 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4600 IFCAP_VLAN_MTU; 4601 #ifdef INET 4602 ifp->if_capabilities |= IFCAP_LRO; 4603 #endif 4604 4605 #ifdef MXGE_NEW_VLAN_API 4606 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4607 #endif 4608 4609 sc->max_mtu = mxge_max_mtu(sc); 4610 if (sc->max_mtu >= 9000) 4611 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4612 else 4613 device_printf(dev, "MTU limited to %d. Install " 4614 "latest firmware for 9000 byte jumbo support\n", 4615 sc->max_mtu - ETHER_HDR_LEN); 4616 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4617 ifp->if_capenable = ifp->if_capabilities; 4618 if (sc->lro_cnt == 0) 4619 ifp->if_capenable &= ~IFCAP_LRO; 4620 sc->csum_flag = 1; 4621 ifp->if_init = mxge_init; 4622 ifp->if_softc = sc; 4623 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4624 ifp->if_ioctl = mxge_ioctl; 4625 ifp->if_start = mxge_start; 4626 /* Initialise the ifmedia structure */ 4627 ifmedia_init(&sc->media, 0, mxge_media_change, 4628 mxge_media_status); 4629 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4630 mxge_media_probe(sc); 4631 sc->dying = 0; 4632 ether_ifattach(ifp, sc->mac_addr, NULL); 4633 /* ether_ifattach sets mtu to ETHERMTU */ 4634 if (mxge_initial_mtu != ETHERMTU) { 4635 lwkt_serialize_enter(ifp->if_serializer); 4636 mxge_change_mtu(sc, mxge_initial_mtu); 4637 lwkt_serialize_exit(ifp->if_serializer); 4638 } 4639 /* must come after ether_ifattach() */ 4640 err = mxge_add_irq(sc); 4641 if (err != 0) { 4642 device_printf(sc->dev, "failed to add irq\n"); 4643 goto abort_with_rings; 4644 } 4645 4646 mxge_add_sysctls(sc); 4647 #ifdef IFNET_BUF_RING 4648 ifp->if_transmit = mxge_transmit; 4649 ifp->if_qflush = mxge_qflush; 4650 #endif 4651 return 0; 4652 4653 abort_with_rings: 4654 mxge_free_rings(sc); 4655 abort_with_slices: 4656 mxge_free_slices(sc); 4657 abort_with_dmabench: 4658 mxge_dma_free(&sc->dmabench_dma); 4659 abort_with_zeropad_dma: 4660 mxge_dma_free(&sc->zeropad_dma); 4661 abort_with_cmd_dma: 4662 mxge_dma_free(&sc->cmd_dma); 4663 abort_with_mem_res: 4664 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4665 pci_disable_busmaster(dev); 4666 bus_dma_tag_destroy(sc->parent_dmat); 4667 abort_with_nothing: 4668 return err; 4669 } 4670 4671 static int 4672 mxge_detach(device_t dev) 4673 { 4674 mxge_softc_t *sc = device_get_softc(dev); 4675 4676 lwkt_serialize_enter(sc->ifp->if_serializer); 4677 sc->dying = 1; 4678 if (sc->ifp->if_flags & IFF_RUNNING) 4679 mxge_close(sc); 4680 /* 4681 * XXX: race: the callout callback could be spinning on 4682 * the serializer and run anyway 4683 */ 4684 callout_stop(&sc->co_hdl); 4685 lwkt_serialize_exit(sc->ifp->if_serializer); 4686 4687 ether_ifdetach(sc->ifp); 4688 ifmedia_removeall(&sc->media); 4689 mxge_dummy_rdma(sc, 0); 4690 mxge_rem_sysctls(sc); 4691 mxge_rem_irq(sc); 4692 mxge_free_rings(sc); 4693 mxge_free_slices(sc); 4694 mxge_dma_free(&sc->dmabench_dma); 4695 mxge_dma_free(&sc->zeropad_dma); 4696 mxge_dma_free(&sc->cmd_dma); 4697 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4698 pci_disable_busmaster(dev); 4699 bus_dma_tag_destroy(sc->parent_dmat); 4700 return 0; 4701 } 4702 4703 static int 4704 mxge_shutdown(device_t dev) 4705 { 4706 return 0; 4707 } 4708 4709 /* 4710 This file uses Myri10GE driver indentation. 4711 4712 Local Variables: 4713 c-file-style:"linux" 4714 tab-width:8 4715 End: 4716 */ 4717