1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 #include <netinet/udp.h> 69 70 #include <bus/pci/pcivar.h> 71 #include <bus/pci/pcireg.h> 72 73 #include <dev/netif/ig_hal/e1000_api.h> 74 #include <dev/netif/ig_hal/e1000_82575.h> 75 #include <dev/netif/igb/if_igb.h> 76 77 #ifdef IGB_RSS_DEBUG 78 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 79 do { \ 80 if (sc->rss_debug >= lvl) \ 81 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 82 } while (0) 83 #else /* !IGB_RSS_DEBUG */ 84 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 85 #endif /* IGB_RSS_DEBUG */ 86 87 #define IGB_NAME "Intel(R) PRO/1000 " 88 #define IGB_DEVICE(id) \ 89 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 90 #define IGB_DEVICE_NULL { 0, 0, NULL } 91 92 static struct igb_device { 93 uint16_t vid; 94 uint16_t did; 95 const char *desc; 96 } igb_devices[] = { 97 IGB_DEVICE(82575EB_COPPER), 98 IGB_DEVICE(82575EB_FIBER_SERDES), 99 IGB_DEVICE(82575GB_QUAD_COPPER), 100 IGB_DEVICE(82576), 101 IGB_DEVICE(82576_NS), 102 IGB_DEVICE(82576_NS_SERDES), 103 IGB_DEVICE(82576_FIBER), 104 IGB_DEVICE(82576_SERDES), 105 IGB_DEVICE(82576_SERDES_QUAD), 106 IGB_DEVICE(82576_QUAD_COPPER), 107 IGB_DEVICE(82576_QUAD_COPPER_ET2), 108 IGB_DEVICE(82576_VF), 109 IGB_DEVICE(82580_COPPER), 110 IGB_DEVICE(82580_FIBER), 111 IGB_DEVICE(82580_SERDES), 112 IGB_DEVICE(82580_SGMII), 113 IGB_DEVICE(82580_COPPER_DUAL), 114 IGB_DEVICE(82580_QUAD_FIBER), 115 IGB_DEVICE(DH89XXCC_SERDES), 116 IGB_DEVICE(DH89XXCC_SGMII), 117 IGB_DEVICE(DH89XXCC_SFP), 118 IGB_DEVICE(DH89XXCC_BACKPLANE), 119 IGB_DEVICE(I350_COPPER), 120 IGB_DEVICE(I350_FIBER), 121 IGB_DEVICE(I350_SERDES), 122 IGB_DEVICE(I350_SGMII), 123 IGB_DEVICE(I350_VF), 124 125 /* required last entry */ 126 IGB_DEVICE_NULL 127 }; 128 129 static int igb_probe(device_t); 130 static int igb_attach(device_t); 131 static int igb_detach(device_t); 132 static int igb_shutdown(device_t); 133 static int igb_suspend(device_t); 134 static int igb_resume(device_t); 135 136 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 137 static void igb_setup_ifp(struct igb_softc *); 138 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 139 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 140 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 141 static void igb_add_sysctl(struct igb_softc *); 142 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 143 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 144 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 145 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 146 #ifdef IFPOLL_ENABLE 147 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 148 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 149 #endif 150 151 static void igb_vf_init_stats(struct igb_softc *); 152 static void igb_reset(struct igb_softc *); 153 static void igb_update_stats_counters(struct igb_softc *); 154 static void igb_update_vf_stats_counters(struct igb_softc *); 155 static void igb_update_link_status(struct igb_softc *); 156 static void igb_init_tx_unit(struct igb_softc *); 157 static void igb_init_rx_unit(struct igb_softc *); 158 159 static void igb_set_vlan(struct igb_softc *); 160 static void igb_set_multi(struct igb_softc *); 161 static void igb_set_promisc(struct igb_softc *); 162 static void igb_disable_promisc(struct igb_softc *); 163 164 static int igb_alloc_rings(struct igb_softc *); 165 static void igb_free_rings(struct igb_softc *); 166 static int igb_create_tx_ring(struct igb_tx_ring *); 167 static int igb_create_rx_ring(struct igb_rx_ring *); 168 static void igb_free_tx_ring(struct igb_tx_ring *); 169 static void igb_free_rx_ring(struct igb_rx_ring *); 170 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 171 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 172 static void igb_init_tx_ring(struct igb_tx_ring *); 173 static int igb_init_rx_ring(struct igb_rx_ring *); 174 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 175 static int igb_encap(struct igb_tx_ring *, struct mbuf **); 176 177 static void igb_stop(struct igb_softc *); 178 static void igb_init(void *); 179 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 180 static void igb_media_status(struct ifnet *, struct ifmediareq *); 181 static int igb_media_change(struct ifnet *); 182 static void igb_timer(void *); 183 static void igb_watchdog(struct ifnet *); 184 static void igb_start(struct ifnet *); 185 #ifdef IFPOLL_ENABLE 186 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 187 static void igb_npoll_rx(struct ifnet *, void *, int); 188 static void igb_npoll_tx(struct ifnet *, void *, int); 189 static void igb_npoll_status(struct ifnet *); 190 #endif 191 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 192 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 193 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 194 #ifdef INVARIANTS 195 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 196 boolean_t); 197 #endif 198 199 static void igb_intr(void *); 200 static void igb_intr_shared(void *); 201 static void igb_rxeof(struct igb_rx_ring *, int); 202 static void igb_txeof(struct igb_tx_ring *); 203 static void igb_set_eitr(struct igb_softc *, int, int); 204 static void igb_enable_intr(struct igb_softc *); 205 static void igb_disable_intr(struct igb_softc *); 206 static void igb_init_unshared_intr(struct igb_softc *); 207 static void igb_init_intr(struct igb_softc *); 208 static int igb_setup_intr(struct igb_softc *); 209 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 210 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 211 static void igb_set_intr_mask(struct igb_softc *); 212 static int igb_alloc_intr(struct igb_softc *); 213 static void igb_free_intr(struct igb_softc *); 214 static void igb_teardown_intr(struct igb_softc *); 215 static void igb_msix_try_alloc(struct igb_softc *); 216 static void igb_msix_free(struct igb_softc *, boolean_t); 217 static int igb_msix_setup(struct igb_softc *); 218 static void igb_msix_teardown(struct igb_softc *, int); 219 static void igb_msix_rx(void *); 220 static void igb_msix_tx(void *); 221 static void igb_msix_status(void *); 222 223 /* Management and WOL Support */ 224 static void igb_get_mgmt(struct igb_softc *); 225 static void igb_rel_mgmt(struct igb_softc *); 226 static void igb_get_hw_control(struct igb_softc *); 227 static void igb_rel_hw_control(struct igb_softc *); 228 static void igb_enable_wol(device_t); 229 230 static device_method_t igb_methods[] = { 231 /* Device interface */ 232 DEVMETHOD(device_probe, igb_probe), 233 DEVMETHOD(device_attach, igb_attach), 234 DEVMETHOD(device_detach, igb_detach), 235 DEVMETHOD(device_shutdown, igb_shutdown), 236 DEVMETHOD(device_suspend, igb_suspend), 237 DEVMETHOD(device_resume, igb_resume), 238 { 0, 0 } 239 }; 240 241 static driver_t igb_driver = { 242 "igb", 243 igb_methods, 244 sizeof(struct igb_softc), 245 }; 246 247 static devclass_t igb_devclass; 248 249 DECLARE_DUMMY_MODULE(if_igb); 250 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 251 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 252 253 static int igb_rxd = IGB_DEFAULT_RXD; 254 static int igb_txd = IGB_DEFAULT_TXD; 255 static int igb_rxr = 0; 256 static int igb_msi_enable = 1; 257 static int igb_msix_enable = 1; 258 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 259 static int igb_fc_setting = e1000_fc_full; 260 261 /* 262 * DMA Coalescing, only for i350 - default to off, 263 * this feature is for power savings 264 */ 265 static int igb_dma_coalesce = 0; 266 267 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 268 TUNABLE_INT("hw.igb.txd", &igb_txd); 269 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 270 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 271 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 272 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 273 274 /* i350 specific */ 275 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 276 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 277 278 static __inline void 279 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 280 { 281 /* Ignore Checksum bit is set */ 282 if (staterr & E1000_RXD_STAT_IXSM) 283 return; 284 285 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 286 E1000_RXD_STAT_IPCS) 287 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 288 289 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 290 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 291 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 292 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 293 mp->m_pkthdr.csum_data = htons(0xffff); 294 } 295 } 296 } 297 298 static __inline struct pktinfo * 299 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 300 uint32_t hash, uint32_t hashtype, uint32_t staterr) 301 { 302 switch (hashtype) { 303 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 304 pi->pi_netisr = NETISR_IP; 305 pi->pi_flags = 0; 306 pi->pi_l3proto = IPPROTO_TCP; 307 break; 308 309 case E1000_RXDADV_RSSTYPE_IPV4: 310 if (staterr & E1000_RXD_STAT_IXSM) 311 return NULL; 312 313 if ((staterr & 314 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 315 E1000_RXD_STAT_TCPCS) { 316 pi->pi_netisr = NETISR_IP; 317 pi->pi_flags = 0; 318 pi->pi_l3proto = IPPROTO_UDP; 319 break; 320 } 321 /* FALL THROUGH */ 322 default: 323 return NULL; 324 } 325 326 m->m_flags |= M_HASH; 327 m->m_pkthdr.hash = toeplitz_hash(hash); 328 return pi; 329 } 330 331 static int 332 igb_probe(device_t dev) 333 { 334 const struct igb_device *d; 335 uint16_t vid, did; 336 337 vid = pci_get_vendor(dev); 338 did = pci_get_device(dev); 339 340 for (d = igb_devices; d->desc != NULL; ++d) { 341 if (vid == d->vid && did == d->did) { 342 device_set_desc(dev, d->desc); 343 return 0; 344 } 345 } 346 return ENXIO; 347 } 348 349 static int 350 igb_attach(device_t dev) 351 { 352 struct igb_softc *sc = device_get_softc(dev); 353 uint16_t eeprom_data; 354 int error = 0, i, j, ring_max; 355 #ifdef IFPOLL_ENABLE 356 int offset, offset_def; 357 #endif 358 359 #ifdef notyet 360 /* SYSCTL stuff */ 361 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 362 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 363 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 364 igb_sysctl_nvm_info, "I", "NVM Information"); 365 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 366 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 367 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 368 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 369 #endif 370 371 callout_init_mp(&sc->timer); 372 lwkt_serialize_init(&sc->main_serialize); 373 374 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 375 device_get_unit(dev)); 376 sc->dev = sc->osdep.dev = dev; 377 378 /* 379 * Determine hardware and mac type 380 */ 381 sc->hw.vendor_id = pci_get_vendor(dev); 382 sc->hw.device_id = pci_get_device(dev); 383 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 384 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 385 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 386 387 if (e1000_set_mac_type(&sc->hw)) 388 return ENXIO; 389 390 /* Are we a VF device? */ 391 if (sc->hw.mac.type == e1000_vfadapt || 392 sc->hw.mac.type == e1000_vfadapt_i350) 393 sc->vf_ifp = 1; 394 else 395 sc->vf_ifp = 0; 396 397 /* 398 * Configure total supported RX/TX ring count 399 */ 400 switch (sc->hw.mac.type) { 401 case e1000_82575: 402 ring_max = IGB_MAX_RING_82575; 403 break; 404 case e1000_82580: 405 ring_max = IGB_MAX_RING_82580; 406 break; 407 case e1000_i350: 408 ring_max = IGB_MAX_RING_I350; 409 break; 410 case e1000_82576: 411 ring_max = IGB_MAX_RING_82576; 412 break; 413 default: 414 ring_max = IGB_MIN_RING; 415 break; 416 } 417 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 418 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 419 #ifdef IGB_RSS_DEBUG 420 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 421 #endif 422 sc->rx_ring_inuse = sc->rx_ring_cnt; 423 sc->tx_ring_cnt = 1; /* XXX */ 424 425 if (sc->hw.mac.type == e1000_82575) 426 sc->flags |= IGB_FLAG_TSO_IPLEN0; 427 428 /* Enable bus mastering */ 429 pci_enable_busmaster(dev); 430 431 /* 432 * Allocate IO memory 433 */ 434 sc->mem_rid = PCIR_BAR(0); 435 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 436 RF_ACTIVE); 437 if (sc->mem_res == NULL) { 438 device_printf(dev, "Unable to allocate bus resource: memory\n"); 439 error = ENXIO; 440 goto failed; 441 } 442 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 443 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 444 445 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 446 447 /* Save PCI command register for Shared Code */ 448 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 449 sc->hw.back = &sc->osdep; 450 451 /* Do Shared Code initialization */ 452 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 453 device_printf(dev, "Setup of Shared code failed\n"); 454 error = ENXIO; 455 goto failed; 456 } 457 458 e1000_get_bus_info(&sc->hw); 459 460 sc->hw.mac.autoneg = DO_AUTO_NEG; 461 sc->hw.phy.autoneg_wait_to_complete = FALSE; 462 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 463 464 /* Copper options */ 465 if (sc->hw.phy.media_type == e1000_media_type_copper) { 466 sc->hw.phy.mdix = AUTO_ALL_MODES; 467 sc->hw.phy.disable_polarity_correction = FALSE; 468 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 469 } 470 471 /* Set the frame limits assuming standard ethernet sized frames. */ 472 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 473 474 /* Allocate RX/TX rings */ 475 error = igb_alloc_rings(sc); 476 if (error) 477 goto failed; 478 479 #ifdef IFPOLL_ENABLE 480 /* 481 * NPOLLING RX CPU offset 482 */ 483 if (sc->rx_ring_cnt == ncpus2) { 484 offset = 0; 485 } else { 486 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 487 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 488 if (offset >= ncpus2 || 489 offset % sc->rx_ring_cnt != 0) { 490 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 491 offset, offset_def); 492 offset = offset_def; 493 } 494 } 495 sc->rx_npoll_off = offset; 496 497 /* 498 * NPOLLING TX CPU offset 499 */ 500 offset_def = sc->rx_npoll_off; 501 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 502 if (offset >= ncpus2) { 503 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 504 offset, offset_def); 505 offset = offset_def; 506 } 507 sc->tx_npoll_off = offset; 508 #endif 509 510 /* Allocate interrupt */ 511 error = igb_alloc_intr(sc); 512 if (error) 513 goto failed; 514 515 /* 516 * Setup serializers 517 */ 518 i = 0; 519 sc->serializes[i++] = &sc->main_serialize; 520 521 sc->tx_serialize = i; 522 for (j = 0; j < sc->tx_ring_cnt; ++j) 523 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 524 525 sc->rx_serialize = i; 526 for (j = 0; j < sc->rx_ring_cnt; ++j) 527 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 528 529 sc->serialize_cnt = i; 530 KKASSERT(sc->serialize_cnt <= IGB_NSERIALIZE); 531 532 /* Allocate the appropriate stats memory */ 533 if (sc->vf_ifp) { 534 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 535 M_WAITOK | M_ZERO); 536 igb_vf_init_stats(sc); 537 } else { 538 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 539 M_WAITOK | M_ZERO); 540 } 541 542 /* Allocate multicast array memory. */ 543 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 544 M_DEVBUF, M_WAITOK); 545 546 /* Some adapter-specific advanced features */ 547 if (sc->hw.mac.type >= e1000_i350) { 548 #ifdef notyet 549 igb_set_sysctl_value(adapter, "dma_coalesce", 550 "configure dma coalesce", 551 &adapter->dma_coalesce, igb_dma_coalesce); 552 igb_set_sysctl_value(adapter, "eee_disabled", 553 "enable Energy Efficient Ethernet", 554 &adapter->hw.dev_spec._82575.eee_disable, 555 igb_eee_disabled); 556 #else 557 sc->dma_coalesce = igb_dma_coalesce; 558 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 559 #endif 560 e1000_set_eee_i350(&sc->hw); 561 } 562 563 /* 564 * Start from a known state, this is important in reading the nvm and 565 * mac from that. 566 */ 567 e1000_reset_hw(&sc->hw); 568 569 /* Make sure we have a good EEPROM before we read from it */ 570 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 571 /* 572 * Some PCI-E parts fail the first check due to 573 * the link being in sleep state, call it again, 574 * if it fails a second time its a real issue. 575 */ 576 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 577 device_printf(dev, 578 "The EEPROM Checksum Is Not Valid\n"); 579 error = EIO; 580 goto failed; 581 } 582 } 583 584 /* Copy the permanent MAC address out of the EEPROM */ 585 if (e1000_read_mac_addr(&sc->hw) < 0) { 586 device_printf(dev, "EEPROM read error while reading MAC" 587 " address\n"); 588 error = EIO; 589 goto failed; 590 } 591 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 592 device_printf(dev, "Invalid MAC address\n"); 593 error = EIO; 594 goto failed; 595 } 596 597 /* Setup OS specific network interface */ 598 igb_setup_ifp(sc); 599 600 /* Add sysctl tree, must after igb_setup_ifp() */ 601 igb_add_sysctl(sc); 602 603 /* Now get a good starting state */ 604 igb_reset(sc); 605 606 /* Initialize statistics */ 607 igb_update_stats_counters(sc); 608 609 sc->hw.mac.get_link_status = 1; 610 igb_update_link_status(sc); 611 612 /* Indicate SOL/IDER usage */ 613 if (e1000_check_reset_block(&sc->hw)) { 614 device_printf(dev, 615 "PHY reset is blocked due to SOL/IDER session.\n"); 616 } 617 618 /* Determine if we have to control management hardware */ 619 if (e1000_enable_mng_pass_thru(&sc->hw)) 620 sc->flags |= IGB_FLAG_HAS_MGMT; 621 622 /* 623 * Setup Wake-on-Lan 624 */ 625 /* APME bit in EEPROM is mapped to WUC.APME */ 626 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 627 if (eeprom_data) 628 sc->wol = E1000_WUFC_MAG; 629 /* XXX disable WOL */ 630 sc->wol = 0; 631 632 #ifdef notyet 633 /* Register for VLAN events */ 634 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 635 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 636 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 637 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 638 #endif 639 640 #ifdef notyet 641 igb_add_hw_stats(adapter); 642 #endif 643 644 error = igb_setup_intr(sc); 645 if (error) { 646 ether_ifdetach(&sc->arpcom.ac_if); 647 goto failed; 648 } 649 return 0; 650 651 failed: 652 igb_detach(dev); 653 return error; 654 } 655 656 static int 657 igb_detach(device_t dev) 658 { 659 struct igb_softc *sc = device_get_softc(dev); 660 661 if (device_is_attached(dev)) { 662 struct ifnet *ifp = &sc->arpcom.ac_if; 663 664 ifnet_serialize_all(ifp); 665 666 igb_stop(sc); 667 668 e1000_phy_hw_reset(&sc->hw); 669 670 /* Give control back to firmware */ 671 igb_rel_mgmt(sc); 672 igb_rel_hw_control(sc); 673 674 if (sc->wol) { 675 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 676 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 677 igb_enable_wol(dev); 678 } 679 680 igb_teardown_intr(sc); 681 682 ifnet_deserialize_all(ifp); 683 684 ether_ifdetach(ifp); 685 } else if (sc->mem_res != NULL) { 686 igb_rel_hw_control(sc); 687 } 688 bus_generic_detach(dev); 689 690 if (sc->sysctl_tree != NULL) 691 sysctl_ctx_free(&sc->sysctl_ctx); 692 693 igb_free_intr(sc); 694 695 if (sc->msix_mem_res != NULL) { 696 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 697 sc->msix_mem_res); 698 } 699 if (sc->mem_res != NULL) { 700 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 701 sc->mem_res); 702 } 703 704 igb_free_rings(sc); 705 706 if (sc->mta != NULL) 707 kfree(sc->mta, M_DEVBUF); 708 if (sc->stats != NULL) 709 kfree(sc->stats, M_DEVBUF); 710 711 return 0; 712 } 713 714 static int 715 igb_shutdown(device_t dev) 716 { 717 return igb_suspend(dev); 718 } 719 720 static int 721 igb_suspend(device_t dev) 722 { 723 struct igb_softc *sc = device_get_softc(dev); 724 struct ifnet *ifp = &sc->arpcom.ac_if; 725 726 ifnet_serialize_all(ifp); 727 728 igb_stop(sc); 729 730 igb_rel_mgmt(sc); 731 igb_rel_hw_control(sc); 732 733 if (sc->wol) { 734 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 735 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 736 igb_enable_wol(dev); 737 } 738 739 ifnet_deserialize_all(ifp); 740 741 return bus_generic_suspend(dev); 742 } 743 744 static int 745 igb_resume(device_t dev) 746 { 747 struct igb_softc *sc = device_get_softc(dev); 748 struct ifnet *ifp = &sc->arpcom.ac_if; 749 750 ifnet_serialize_all(ifp); 751 752 igb_init(sc); 753 igb_get_mgmt(sc); 754 755 if_devstart(ifp); 756 757 ifnet_deserialize_all(ifp); 758 759 return bus_generic_resume(dev); 760 } 761 762 static int 763 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 764 { 765 struct igb_softc *sc = ifp->if_softc; 766 struct ifreq *ifr = (struct ifreq *)data; 767 int max_frame_size, mask, reinit; 768 int error = 0; 769 770 ASSERT_IFNET_SERIALIZED_ALL(ifp); 771 772 switch (command) { 773 case SIOCSIFMTU: 774 max_frame_size = 9234; 775 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 776 ETHER_CRC_LEN) { 777 error = EINVAL; 778 break; 779 } 780 781 ifp->if_mtu = ifr->ifr_mtu; 782 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 783 ETHER_CRC_LEN; 784 785 if (ifp->if_flags & IFF_RUNNING) 786 igb_init(sc); 787 break; 788 789 case SIOCSIFFLAGS: 790 if (ifp->if_flags & IFF_UP) { 791 if (ifp->if_flags & IFF_RUNNING) { 792 if ((ifp->if_flags ^ sc->if_flags) & 793 (IFF_PROMISC | IFF_ALLMULTI)) { 794 igb_disable_promisc(sc); 795 igb_set_promisc(sc); 796 } 797 } else { 798 igb_init(sc); 799 } 800 } else if (ifp->if_flags & IFF_RUNNING) { 801 igb_stop(sc); 802 } 803 sc->if_flags = ifp->if_flags; 804 break; 805 806 case SIOCADDMULTI: 807 case SIOCDELMULTI: 808 if (ifp->if_flags & IFF_RUNNING) { 809 igb_disable_intr(sc); 810 igb_set_multi(sc); 811 #ifdef IFPOLL_ENABLE 812 if (!(ifp->if_flags & IFF_NPOLLING)) 813 #endif 814 igb_enable_intr(sc); 815 } 816 break; 817 818 case SIOCSIFMEDIA: 819 /* 820 * As the speed/duplex settings are being 821 * changed, we need toreset the PHY. 822 */ 823 sc->hw.phy.reset_disable = FALSE; 824 825 /* Check SOL/IDER usage */ 826 if (e1000_check_reset_block(&sc->hw)) { 827 if_printf(ifp, "Media change is " 828 "blocked due to SOL/IDER session.\n"); 829 break; 830 } 831 /* FALL THROUGH */ 832 833 case SIOCGIFMEDIA: 834 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 835 break; 836 837 case SIOCSIFCAP: 838 reinit = 0; 839 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 840 if (mask & IFCAP_RXCSUM) { 841 ifp->if_capenable ^= IFCAP_RXCSUM; 842 reinit = 1; 843 } 844 if (mask & IFCAP_VLAN_HWTAGGING) { 845 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 846 reinit = 1; 847 } 848 if (mask & IFCAP_TXCSUM) { 849 ifp->if_capenable ^= IFCAP_TXCSUM; 850 if (ifp->if_capenable & IFCAP_TXCSUM) 851 ifp->if_hwassist |= IGB_CSUM_FEATURES; 852 else 853 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 854 } 855 if (mask & IFCAP_TSO) { 856 ifp->if_capenable ^= IFCAP_TSO; 857 if (ifp->if_capenable & IFCAP_TSO) 858 ifp->if_hwassist |= CSUM_TSO; 859 else 860 ifp->if_hwassist &= ~CSUM_TSO; 861 } 862 if (mask & IFCAP_RSS) 863 ifp->if_capenable ^= IFCAP_RSS; 864 if (reinit && (ifp->if_flags & IFF_RUNNING)) 865 igb_init(sc); 866 break; 867 868 default: 869 error = ether_ioctl(ifp, command, data); 870 break; 871 } 872 return error; 873 } 874 875 static void 876 igb_init(void *xsc) 877 { 878 struct igb_softc *sc = xsc; 879 struct ifnet *ifp = &sc->arpcom.ac_if; 880 boolean_t polling; 881 int i; 882 883 ASSERT_IFNET_SERIALIZED_ALL(ifp); 884 885 igb_stop(sc); 886 887 /* Get the latest mac address, User can use a LAA */ 888 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 889 890 /* Put the address into the Receive Address Array */ 891 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 892 893 igb_reset(sc); 894 igb_update_link_status(sc); 895 896 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 897 898 /* Configure for OS presence */ 899 igb_get_mgmt(sc); 900 901 polling = FALSE; 902 #ifdef IFPOLL_ENABLE 903 if (ifp->if_flags & IFF_NPOLLING) 904 polling = TRUE; 905 #endif 906 907 /* Configured used RX/TX rings */ 908 igb_set_ring_inuse(sc, polling); 909 910 /* Initialize interrupt */ 911 igb_init_intr(sc); 912 913 /* Prepare transmit descriptors and buffers */ 914 for (i = 0; i < sc->tx_ring_cnt; ++i) 915 igb_init_tx_ring(&sc->tx_rings[i]); 916 igb_init_tx_unit(sc); 917 918 /* Setup Multicast table */ 919 igb_set_multi(sc); 920 921 #if 0 922 /* 923 * Figure out the desired mbuf pool 924 * for doing jumbo/packetsplit 925 */ 926 if (adapter->max_frame_size <= 2048) 927 adapter->rx_mbuf_sz = MCLBYTES; 928 else if (adapter->max_frame_size <= 4096) 929 adapter->rx_mbuf_sz = MJUMPAGESIZE; 930 else 931 adapter->rx_mbuf_sz = MJUM9BYTES; 932 #endif 933 934 /* Prepare receive descriptors and buffers */ 935 for (i = 0; i < sc->rx_ring_inuse; ++i) { 936 int error; 937 938 error = igb_init_rx_ring(&sc->rx_rings[i]); 939 if (error) { 940 if_printf(ifp, "Could not setup receive structures\n"); 941 igb_stop(sc); 942 return; 943 } 944 } 945 igb_init_rx_unit(sc); 946 947 /* Enable VLAN support */ 948 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 949 igb_set_vlan(sc); 950 951 /* Don't lose promiscuous settings */ 952 igb_set_promisc(sc); 953 954 ifp->if_flags |= IFF_RUNNING; 955 ifp->if_flags &= ~IFF_OACTIVE; 956 957 callout_reset(&sc->timer, hz, igb_timer, sc); 958 e1000_clear_hw_cntrs_base_generic(&sc->hw); 959 960 /* This clears any pending interrupts */ 961 E1000_READ_REG(&sc->hw, E1000_ICR); 962 963 /* 964 * Only enable interrupts if we are not polling, make sure 965 * they are off otherwise. 966 */ 967 if (polling) { 968 igb_disable_intr(sc); 969 } else { 970 igb_enable_intr(sc); 971 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 972 } 973 974 /* Set Energy Efficient Ethernet */ 975 e1000_set_eee_i350(&sc->hw); 976 977 /* Don't reset the phy next time init gets called */ 978 sc->hw.phy.reset_disable = TRUE; 979 } 980 981 static void 982 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 983 { 984 struct igb_softc *sc = ifp->if_softc; 985 u_char fiber_type = IFM_1000_SX; 986 987 ASSERT_IFNET_SERIALIZED_ALL(ifp); 988 989 igb_update_link_status(sc); 990 991 ifmr->ifm_status = IFM_AVALID; 992 ifmr->ifm_active = IFM_ETHER; 993 994 if (!sc->link_active) 995 return; 996 997 ifmr->ifm_status |= IFM_ACTIVE; 998 999 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1000 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1001 ifmr->ifm_active |= fiber_type | IFM_FDX; 1002 } else { 1003 switch (sc->link_speed) { 1004 case 10: 1005 ifmr->ifm_active |= IFM_10_T; 1006 break; 1007 1008 case 100: 1009 ifmr->ifm_active |= IFM_100_TX; 1010 break; 1011 1012 case 1000: 1013 ifmr->ifm_active |= IFM_1000_T; 1014 break; 1015 } 1016 if (sc->link_duplex == FULL_DUPLEX) 1017 ifmr->ifm_active |= IFM_FDX; 1018 else 1019 ifmr->ifm_active |= IFM_HDX; 1020 } 1021 } 1022 1023 static int 1024 igb_media_change(struct ifnet *ifp) 1025 { 1026 struct igb_softc *sc = ifp->if_softc; 1027 struct ifmedia *ifm = &sc->media; 1028 1029 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1030 1031 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1032 return EINVAL; 1033 1034 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1035 case IFM_AUTO: 1036 sc->hw.mac.autoneg = DO_AUTO_NEG; 1037 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1038 break; 1039 1040 case IFM_1000_LX: 1041 case IFM_1000_SX: 1042 case IFM_1000_T: 1043 sc->hw.mac.autoneg = DO_AUTO_NEG; 1044 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1045 break; 1046 1047 case IFM_100_TX: 1048 sc->hw.mac.autoneg = FALSE; 1049 sc->hw.phy.autoneg_advertised = 0; 1050 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1051 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1052 else 1053 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1054 break; 1055 1056 case IFM_10_T: 1057 sc->hw.mac.autoneg = FALSE; 1058 sc->hw.phy.autoneg_advertised = 0; 1059 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1060 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1061 else 1062 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1063 break; 1064 1065 default: 1066 if_printf(ifp, "Unsupported media type\n"); 1067 break; 1068 } 1069 1070 igb_init(sc); 1071 1072 return 0; 1073 } 1074 1075 static void 1076 igb_set_promisc(struct igb_softc *sc) 1077 { 1078 struct ifnet *ifp = &sc->arpcom.ac_if; 1079 struct e1000_hw *hw = &sc->hw; 1080 uint32_t reg; 1081 1082 if (sc->vf_ifp) { 1083 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1084 return; 1085 } 1086 1087 reg = E1000_READ_REG(hw, E1000_RCTL); 1088 if (ifp->if_flags & IFF_PROMISC) { 1089 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1090 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1091 } else if (ifp->if_flags & IFF_ALLMULTI) { 1092 reg |= E1000_RCTL_MPE; 1093 reg &= ~E1000_RCTL_UPE; 1094 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1095 } 1096 } 1097 1098 static void 1099 igb_disable_promisc(struct igb_softc *sc) 1100 { 1101 struct e1000_hw *hw = &sc->hw; 1102 uint32_t reg; 1103 1104 if (sc->vf_ifp) { 1105 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1106 return; 1107 } 1108 reg = E1000_READ_REG(hw, E1000_RCTL); 1109 reg &= ~E1000_RCTL_UPE; 1110 reg &= ~E1000_RCTL_MPE; 1111 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1112 } 1113 1114 static void 1115 igb_set_multi(struct igb_softc *sc) 1116 { 1117 struct ifnet *ifp = &sc->arpcom.ac_if; 1118 struct ifmultiaddr *ifma; 1119 uint32_t reg_rctl = 0; 1120 uint8_t *mta; 1121 int mcnt = 0; 1122 1123 mta = sc->mta; 1124 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1125 1126 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1127 if (ifma->ifma_addr->sa_family != AF_LINK) 1128 continue; 1129 1130 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1131 break; 1132 1133 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1134 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1135 mcnt++; 1136 } 1137 1138 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1139 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1140 reg_rctl |= E1000_RCTL_MPE; 1141 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1142 } else { 1143 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1144 } 1145 } 1146 1147 static void 1148 igb_timer(void *xsc) 1149 { 1150 struct igb_softc *sc = xsc; 1151 1152 lwkt_serialize_enter(&sc->main_serialize); 1153 1154 igb_update_link_status(sc); 1155 igb_update_stats_counters(sc); 1156 1157 callout_reset(&sc->timer, hz, igb_timer, sc); 1158 1159 lwkt_serialize_exit(&sc->main_serialize); 1160 } 1161 1162 static void 1163 igb_update_link_status(struct igb_softc *sc) 1164 { 1165 struct ifnet *ifp = &sc->arpcom.ac_if; 1166 struct e1000_hw *hw = &sc->hw; 1167 uint32_t link_check, thstat, ctrl; 1168 1169 link_check = thstat = ctrl = 0; 1170 1171 /* Get the cached link value or read for real */ 1172 switch (hw->phy.media_type) { 1173 case e1000_media_type_copper: 1174 if (hw->mac.get_link_status) { 1175 /* Do the work to read phy */ 1176 e1000_check_for_link(hw); 1177 link_check = !hw->mac.get_link_status; 1178 } else { 1179 link_check = TRUE; 1180 } 1181 break; 1182 1183 case e1000_media_type_fiber: 1184 e1000_check_for_link(hw); 1185 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1186 break; 1187 1188 case e1000_media_type_internal_serdes: 1189 e1000_check_for_link(hw); 1190 link_check = hw->mac.serdes_has_link; 1191 break; 1192 1193 /* VF device is type_unknown */ 1194 case e1000_media_type_unknown: 1195 e1000_check_for_link(hw); 1196 link_check = !hw->mac.get_link_status; 1197 /* Fall thru */ 1198 default: 1199 break; 1200 } 1201 1202 /* Check for thermal downshift or shutdown */ 1203 if (hw->mac.type == e1000_i350) { 1204 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1205 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1206 } 1207 1208 /* Now we check if a transition has happened */ 1209 if (link_check && sc->link_active == 0) { 1210 e1000_get_speed_and_duplex(hw, 1211 &sc->link_speed, &sc->link_duplex); 1212 if (bootverbose) { 1213 if_printf(ifp, "Link is up %d Mbps %s\n", 1214 sc->link_speed, 1215 sc->link_duplex == FULL_DUPLEX ? 1216 "Full Duplex" : "Half Duplex"); 1217 } 1218 sc->link_active = 1; 1219 1220 ifp->if_baudrate = sc->link_speed * 1000000; 1221 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1222 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1223 if_printf(ifp, "Link: thermal downshift\n"); 1224 /* This can sleep */ 1225 ifp->if_link_state = LINK_STATE_UP; 1226 if_link_state_change(ifp); 1227 } else if (!link_check && sc->link_active == 1) { 1228 ifp->if_baudrate = sc->link_speed = 0; 1229 sc->link_duplex = 0; 1230 if (bootverbose) 1231 if_printf(ifp, "Link is Down\n"); 1232 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1233 (thstat & E1000_THSTAT_PWR_DOWN)) 1234 if_printf(ifp, "Link: thermal shutdown\n"); 1235 sc->link_active = 0; 1236 /* This can sleep */ 1237 ifp->if_link_state = LINK_STATE_DOWN; 1238 if_link_state_change(ifp); 1239 } 1240 } 1241 1242 static void 1243 igb_stop(struct igb_softc *sc) 1244 { 1245 struct ifnet *ifp = &sc->arpcom.ac_if; 1246 int i; 1247 1248 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1249 1250 igb_disable_intr(sc); 1251 1252 callout_stop(&sc->timer); 1253 1254 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 1255 ifp->if_timer = 0; 1256 1257 e1000_reset_hw(&sc->hw); 1258 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1259 1260 e1000_led_off(&sc->hw); 1261 e1000_cleanup_led(&sc->hw); 1262 1263 for (i = 0; i < sc->tx_ring_cnt; ++i) 1264 igb_free_tx_ring(&sc->tx_rings[i]); 1265 for (i = 0; i < sc->rx_ring_cnt; ++i) 1266 igb_free_rx_ring(&sc->rx_rings[i]); 1267 } 1268 1269 static void 1270 igb_reset(struct igb_softc *sc) 1271 { 1272 struct ifnet *ifp = &sc->arpcom.ac_if; 1273 struct e1000_hw *hw = &sc->hw; 1274 struct e1000_fc_info *fc = &hw->fc; 1275 uint32_t pba = 0; 1276 uint16_t hwm; 1277 1278 /* Let the firmware know the OS is in control */ 1279 igb_get_hw_control(sc); 1280 1281 /* 1282 * Packet Buffer Allocation (PBA) 1283 * Writing PBA sets the receive portion of the buffer 1284 * the remainder is used for the transmit buffer. 1285 */ 1286 switch (hw->mac.type) { 1287 case e1000_82575: 1288 pba = E1000_PBA_32K; 1289 break; 1290 1291 case e1000_82576: 1292 case e1000_vfadapt: 1293 pba = E1000_READ_REG(hw, E1000_RXPBS); 1294 pba &= E1000_RXPBS_SIZE_MASK_82576; 1295 break; 1296 1297 case e1000_82580: 1298 case e1000_i350: 1299 case e1000_vfadapt_i350: 1300 pba = E1000_READ_REG(hw, E1000_RXPBS); 1301 pba = e1000_rxpbs_adjust_82580(pba); 1302 break; 1303 /* XXX pba = E1000_PBA_35K; */ 1304 1305 default: 1306 break; 1307 } 1308 1309 /* Special needs in case of Jumbo frames */ 1310 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1311 uint32_t tx_space, min_tx, min_rx; 1312 1313 pba = E1000_READ_REG(hw, E1000_PBA); 1314 tx_space = pba >> 16; 1315 pba &= 0xffff; 1316 1317 min_tx = (sc->max_frame_size + 1318 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1319 min_tx = roundup2(min_tx, 1024); 1320 min_tx >>= 10; 1321 min_rx = sc->max_frame_size; 1322 min_rx = roundup2(min_rx, 1024); 1323 min_rx >>= 10; 1324 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1325 pba = pba - (min_tx - tx_space); 1326 /* 1327 * if short on rx space, rx wins 1328 * and must trump tx adjustment 1329 */ 1330 if (pba < min_rx) 1331 pba = min_rx; 1332 } 1333 E1000_WRITE_REG(hw, E1000_PBA, pba); 1334 } 1335 1336 /* 1337 * These parameters control the automatic generation (Tx) and 1338 * response (Rx) to Ethernet PAUSE frames. 1339 * - High water mark should allow for at least two frames to be 1340 * received after sending an XOFF. 1341 * - Low water mark works best when it is very near the high water mark. 1342 * This allows the receiver to restart by sending XON when it has 1343 * drained a bit. 1344 */ 1345 hwm = min(((pba << 10) * 9 / 10), 1346 ((pba << 10) - 2 * sc->max_frame_size)); 1347 1348 if (hw->mac.type < e1000_82576) { 1349 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1350 fc->low_water = fc->high_water - 8; 1351 } else { 1352 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1353 fc->low_water = fc->high_water - 16; 1354 } 1355 fc->pause_time = IGB_FC_PAUSE_TIME; 1356 fc->send_xon = TRUE; 1357 1358 /* Issue a global reset */ 1359 e1000_reset_hw(hw); 1360 E1000_WRITE_REG(hw, E1000_WUC, 0); 1361 1362 if (e1000_init_hw(hw) < 0) 1363 if_printf(ifp, "Hardware Initialization Failed\n"); 1364 1365 /* Setup DMA Coalescing */ 1366 if (hw->mac.type == e1000_i350 && sc->dma_coalesce) { 1367 uint32_t reg; 1368 1369 hwm = (pba - 4) << 10; 1370 reg = ((pba - 6) << E1000_DMACR_DMACTHR_SHIFT) 1371 & E1000_DMACR_DMACTHR_MASK; 1372 1373 /* transition to L0x or L1 if available..*/ 1374 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1375 1376 /* timer = +-1000 usec in 32usec intervals */ 1377 reg |= (1000 >> 5); 1378 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1379 1380 /* No lower threshold */ 1381 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1382 1383 /* set hwm to PBA - 2 * max frame size */ 1384 E1000_WRITE_REG(hw, E1000_FCRTC, hwm); 1385 1386 /* Set the interval before transition */ 1387 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1388 reg |= 0x800000FF; /* 255 usec */ 1389 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1390 1391 /* free space in tx packet buffer to wake from DMA coal */ 1392 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1393 (20480 - (2 * sc->max_frame_size)) >> 6); 1394 1395 /* make low power state decision controlled by DMA coal */ 1396 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1397 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1398 reg | E1000_PCIEMISC_LX_DECISION); 1399 if_printf(ifp, "DMA Coalescing enabled\n"); 1400 } 1401 1402 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1403 e1000_get_phy_info(hw); 1404 e1000_check_for_link(hw); 1405 } 1406 1407 static void 1408 igb_setup_ifp(struct igb_softc *sc) 1409 { 1410 struct ifnet *ifp = &sc->arpcom.ac_if; 1411 1412 ifp->if_softc = sc; 1413 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1414 ifp->if_init = igb_init; 1415 ifp->if_ioctl = igb_ioctl; 1416 ifp->if_start = igb_start; 1417 ifp->if_serialize = igb_serialize; 1418 ifp->if_deserialize = igb_deserialize; 1419 ifp->if_tryserialize = igb_tryserialize; 1420 #ifdef INVARIANTS 1421 ifp->if_serialize_assert = igb_serialize_assert; 1422 #endif 1423 #ifdef IFPOLL_ENABLE 1424 ifp->if_npoll = igb_npoll; 1425 #endif 1426 ifp->if_watchdog = igb_watchdog; 1427 1428 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1429 ifq_set_ready(&ifp->if_snd); 1430 1431 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1432 1433 ifp->if_capabilities = 1434 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1435 if (IGB_ENABLE_HWRSS(sc)) 1436 ifp->if_capabilities |= IFCAP_RSS; 1437 ifp->if_capenable = ifp->if_capabilities; 1438 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1439 1440 /* 1441 * Tell the upper layer(s) we support long frames 1442 */ 1443 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1444 1445 /* 1446 * Specify the media types supported by this adapter and register 1447 * callbacks to update media and link information 1448 */ 1449 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1450 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1451 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1452 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1453 0, NULL); 1454 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1455 } else { 1456 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1457 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1458 0, NULL); 1459 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1460 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1461 0, NULL); 1462 if (sc->hw.phy.type != e1000_phy_ife) { 1463 ifmedia_add(&sc->media, 1464 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1465 ifmedia_add(&sc->media, 1466 IFM_ETHER | IFM_1000_T, 0, NULL); 1467 } 1468 } 1469 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1470 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1471 } 1472 1473 static void 1474 igb_add_sysctl(struct igb_softc *sc) 1475 { 1476 char node[32]; 1477 int i; 1478 1479 sysctl_ctx_init(&sc->sysctl_ctx); 1480 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1481 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, 1482 device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1483 if (sc->sysctl_tree == NULL) { 1484 device_printf(sc->dev, "can't add sysctl node\n"); 1485 return; 1486 } 1487 1488 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1489 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1490 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1491 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1492 "# of RX rings used"); 1493 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1494 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1495 "# of RX descs"); 1496 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1497 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1498 "# of TX descs"); 1499 1500 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1501 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1502 SYSCTL_CHILDREN(sc->sysctl_tree), 1503 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1504 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1505 } else { 1506 for (i = 0; i < sc->msix_cnt; ++i) { 1507 struct igb_msix_data *msix = &sc->msix_data[i]; 1508 1509 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1510 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1511 SYSCTL_CHILDREN(sc->sysctl_tree), 1512 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1513 msix, 0, igb_sysctl_msix_rate, "I", 1514 msix->msix_rate_desc); 1515 } 1516 } 1517 1518 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1519 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1520 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1521 "# of segments per TX interrupt"); 1522 1523 #ifdef IFPOLL_ENABLE 1524 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1525 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1526 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1527 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1528 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1529 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1530 #endif 1531 1532 #ifdef IGB_RSS_DEBUG 1533 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1534 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1535 "RSS debug level"); 1536 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1537 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1538 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1539 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1540 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1541 } 1542 #endif 1543 } 1544 1545 static int 1546 igb_alloc_rings(struct igb_softc *sc) 1547 { 1548 int error, i; 1549 1550 /* 1551 * Create top level busdma tag 1552 */ 1553 error = bus_dma_tag_create(NULL, 1, 0, 1554 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1555 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1556 &sc->parent_tag); 1557 if (error) { 1558 device_printf(sc->dev, "could not create top level DMA tag\n"); 1559 return error; 1560 } 1561 1562 /* 1563 * Allocate TX descriptor rings and buffers 1564 */ 1565 sc->tx_rings = kmalloc_cachealign( 1566 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1567 M_DEVBUF, M_WAITOK | M_ZERO); 1568 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1569 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1570 1571 /* Set up some basics */ 1572 txr->sc = sc; 1573 txr->me = i; 1574 lwkt_serialize_init(&txr->tx_serialize); 1575 1576 error = igb_create_tx_ring(txr); 1577 if (error) 1578 return error; 1579 } 1580 1581 /* 1582 * Allocate RX descriptor rings and buffers 1583 */ 1584 sc->rx_rings = kmalloc_cachealign( 1585 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1586 M_DEVBUF, M_WAITOK | M_ZERO); 1587 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1588 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1589 1590 /* Set up some basics */ 1591 rxr->sc = sc; 1592 rxr->me = i; 1593 lwkt_serialize_init(&rxr->rx_serialize); 1594 1595 error = igb_create_rx_ring(rxr); 1596 if (error) 1597 return error; 1598 } 1599 1600 return 0; 1601 } 1602 1603 static void 1604 igb_free_rings(struct igb_softc *sc) 1605 { 1606 int i; 1607 1608 if (sc->tx_rings != NULL) { 1609 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1610 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1611 1612 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1613 } 1614 kfree(sc->tx_rings, M_DEVBUF); 1615 } 1616 1617 if (sc->rx_rings != NULL) { 1618 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1619 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1620 1621 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1622 } 1623 kfree(sc->rx_rings, M_DEVBUF); 1624 } 1625 } 1626 1627 static int 1628 igb_create_tx_ring(struct igb_tx_ring *txr) 1629 { 1630 int tsize, error, i, ntxd; 1631 1632 /* 1633 * Validate number of transmit descriptors. It must not exceed 1634 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1635 */ 1636 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1637 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1638 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1639 device_printf(txr->sc->dev, 1640 "Using %d TX descriptors instead of %d!\n", 1641 IGB_DEFAULT_TXD, ntxd); 1642 txr->num_tx_desc = IGB_DEFAULT_TXD; 1643 } else { 1644 txr->num_tx_desc = ntxd; 1645 } 1646 1647 /* 1648 * Allocate TX descriptor ring 1649 */ 1650 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1651 IGB_DBA_ALIGN); 1652 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1653 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1654 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1655 if (txr->txdma.dma_vaddr == NULL) { 1656 device_printf(txr->sc->dev, 1657 "Unable to allocate TX Descriptor memory\n"); 1658 return ENOMEM; 1659 } 1660 txr->tx_base = txr->txdma.dma_vaddr; 1661 bzero(txr->tx_base, tsize); 1662 1663 tsize = __VM_CACHELINE_ALIGN( 1664 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1665 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1666 1667 /* 1668 * Allocate TX head write-back buffer 1669 */ 1670 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1671 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1672 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1673 if (txr->tx_hdr == NULL) { 1674 device_printf(txr->sc->dev, 1675 "Unable to allocate TX head write-back buffer\n"); 1676 return ENOMEM; 1677 } 1678 1679 /* 1680 * Create DMA tag for TX buffers 1681 */ 1682 error = bus_dma_tag_create(txr->sc->parent_tag, 1683 1, 0, /* alignment, bounds */ 1684 BUS_SPACE_MAXADDR, /* lowaddr */ 1685 BUS_SPACE_MAXADDR, /* highaddr */ 1686 NULL, NULL, /* filter, filterarg */ 1687 IGB_TSO_SIZE, /* maxsize */ 1688 IGB_MAX_SCATTER, /* nsegments */ 1689 PAGE_SIZE, /* maxsegsize */ 1690 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1691 BUS_DMA_ONEBPAGE, /* flags */ 1692 &txr->tx_tag); 1693 if (error) { 1694 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1695 kfree(txr->tx_buf, M_DEVBUF); 1696 txr->tx_buf = NULL; 1697 return error; 1698 } 1699 1700 /* 1701 * Create DMA maps for TX buffers 1702 */ 1703 for (i = 0; i < txr->num_tx_desc; ++i) { 1704 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1705 1706 error = bus_dmamap_create(txr->tx_tag, 1707 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1708 if (error) { 1709 device_printf(txr->sc->dev, 1710 "Unable to create TX DMA map\n"); 1711 igb_destroy_tx_ring(txr, i); 1712 return error; 1713 } 1714 } 1715 1716 /* 1717 * Initialize various watermark 1718 */ 1719 txr->spare_desc = IGB_TX_SPARE; 1720 txr->intr_nsegs = txr->num_tx_desc / 16; 1721 txr->oact_hi_desc = txr->num_tx_desc / 2; 1722 txr->oact_lo_desc = txr->num_tx_desc / 8; 1723 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1724 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1725 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1726 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1727 1728 return 0; 1729 } 1730 1731 static void 1732 igb_free_tx_ring(struct igb_tx_ring *txr) 1733 { 1734 int i; 1735 1736 for (i = 0; i < txr->num_tx_desc; ++i) { 1737 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1738 1739 if (txbuf->m_head != NULL) { 1740 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1741 m_freem(txbuf->m_head); 1742 txbuf->m_head = NULL; 1743 } 1744 } 1745 } 1746 1747 static void 1748 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1749 { 1750 int i; 1751 1752 if (txr->txdma.dma_vaddr != NULL) { 1753 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1754 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1755 txr->txdma.dma_map); 1756 bus_dma_tag_destroy(txr->txdma.dma_tag); 1757 txr->txdma.dma_vaddr = NULL; 1758 } 1759 1760 if (txr->tx_hdr != NULL) { 1761 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1762 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1763 txr->tx_hdr_dmap); 1764 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1765 txr->tx_hdr = NULL; 1766 } 1767 1768 if (txr->tx_buf == NULL) 1769 return; 1770 1771 for (i = 0; i < ndesc; ++i) { 1772 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1773 1774 KKASSERT(txbuf->m_head == NULL); 1775 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1776 } 1777 bus_dma_tag_destroy(txr->tx_tag); 1778 1779 kfree(txr->tx_buf, M_DEVBUF); 1780 txr->tx_buf = NULL; 1781 } 1782 1783 static void 1784 igb_init_tx_ring(struct igb_tx_ring *txr) 1785 { 1786 /* Clear the old descriptor contents */ 1787 bzero(txr->tx_base, 1788 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1789 1790 /* Clear TX head write-back buffer */ 1791 *(txr->tx_hdr) = 0; 1792 1793 /* Reset indices */ 1794 txr->next_avail_desc = 0; 1795 txr->next_to_clean = 0; 1796 txr->tx_nsegs = 0; 1797 1798 /* Set number of descriptors available */ 1799 txr->tx_avail = txr->num_tx_desc; 1800 } 1801 1802 static void 1803 igb_init_tx_unit(struct igb_softc *sc) 1804 { 1805 struct e1000_hw *hw = &sc->hw; 1806 uint32_t tctl; 1807 int i; 1808 1809 /* Setup the Tx Descriptor Rings */ 1810 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1811 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1812 uint64_t bus_addr = txr->txdma.dma_paddr; 1813 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1814 uint32_t txdctl = 0; 1815 uint32_t dca_txctrl; 1816 1817 E1000_WRITE_REG(hw, E1000_TDLEN(i), 1818 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 1819 E1000_WRITE_REG(hw, E1000_TDBAH(i), 1820 (uint32_t)(bus_addr >> 32)); 1821 E1000_WRITE_REG(hw, E1000_TDBAL(i), 1822 (uint32_t)bus_addr); 1823 1824 /* Setup the HW Tx Head and Tail descriptor pointers */ 1825 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 1826 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 1827 1828 /* 1829 * WTHRESH is ignored by the hardware, since header 1830 * write back mode is used. 1831 */ 1832 txdctl |= IGB_TX_PTHRESH; 1833 txdctl |= IGB_TX_HTHRESH << 8; 1834 txdctl |= IGB_TX_WTHRESH << 16; 1835 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 1836 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 1837 1838 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 1839 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 1840 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 1841 1842 /* 1843 * Don't set WB_on_EITR: 1844 * - 82575 does not have it 1845 * - It almost has no effect on 82576, see: 1846 * 82576 specification update errata #26 1847 * - It causes unnecessary bus traffic 1848 */ 1849 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 1850 (uint32_t)(hdr_paddr >> 32)); 1851 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 1852 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 1853 } 1854 1855 if (sc->vf_ifp) 1856 return; 1857 1858 e1000_config_collision_dist(hw); 1859 1860 /* Program the Transmit Control Register */ 1861 tctl = E1000_READ_REG(hw, E1000_TCTL); 1862 tctl &= ~E1000_TCTL_CT; 1863 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 1864 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 1865 1866 /* This write will effectively turn on the transmit unit. */ 1867 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 1868 } 1869 1870 static boolean_t 1871 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 1872 { 1873 struct e1000_adv_tx_context_desc *TXD; 1874 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 1875 int ehdrlen, ctxd, ip_hlen = 0; 1876 boolean_t offload = TRUE; 1877 1878 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 1879 offload = FALSE; 1880 1881 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 1882 1883 ctxd = txr->next_avail_desc; 1884 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 1885 1886 /* 1887 * In advanced descriptors the vlan tag must 1888 * be placed into the context descriptor, thus 1889 * we need to be here just for that setup. 1890 */ 1891 if (mp->m_flags & M_VLANTAG) { 1892 uint16_t vlantag; 1893 1894 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 1895 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 1896 } else if (!offload) { 1897 return FALSE; 1898 } 1899 1900 ehdrlen = mp->m_pkthdr.csum_lhlen; 1901 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 1902 1903 /* Set the ether header length */ 1904 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 1905 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 1906 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 1907 ip_hlen = mp->m_pkthdr.csum_iphlen; 1908 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 1909 } 1910 vlan_macip_lens |= ip_hlen; 1911 1912 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 1913 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 1914 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 1915 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 1916 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 1917 1918 /* 82575 needs the queue index added */ 1919 if (txr->sc->hw.mac.type == e1000_82575) 1920 mss_l4len_idx = txr->me << 4; 1921 1922 /* Now copy bits into descriptor */ 1923 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 1924 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 1925 TXD->seqnum_seed = htole32(0); 1926 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 1927 1928 /* We've consumed the first desc, adjust counters */ 1929 if (++ctxd == txr->num_tx_desc) 1930 ctxd = 0; 1931 txr->next_avail_desc = ctxd; 1932 --txr->tx_avail; 1933 1934 return offload; 1935 } 1936 1937 static void 1938 igb_txeof(struct igb_tx_ring *txr) 1939 { 1940 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 1941 int first, hdr, avail; 1942 1943 if (txr->tx_avail == txr->num_tx_desc) 1944 return; 1945 1946 first = txr->next_to_clean; 1947 hdr = *(txr->tx_hdr); 1948 1949 if (first == hdr) 1950 return; 1951 1952 avail = txr->tx_avail; 1953 while (first != hdr) { 1954 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 1955 1956 ++avail; 1957 if (txbuf->m_head) { 1958 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1959 m_freem(txbuf->m_head); 1960 txbuf->m_head = NULL; 1961 ++ifp->if_opackets; 1962 } 1963 if (++first == txr->num_tx_desc) 1964 first = 0; 1965 } 1966 txr->next_to_clean = first; 1967 txr->tx_avail = avail; 1968 1969 /* 1970 * If we have a minimum free, clear IFF_OACTIVE 1971 * to tell the stack that it is OK to send packets. 1972 */ 1973 if (IGB_IS_NOT_OACTIVE(txr)) { 1974 ifp->if_flags &= ~IFF_OACTIVE; 1975 1976 /* 1977 * We have enough TX descriptors, turn off 1978 * the watchdog. We allow small amount of 1979 * packets (roughly intr_nsegs) pending on 1980 * the transmit ring. 1981 */ 1982 ifp->if_timer = 0; 1983 } 1984 } 1985 1986 static int 1987 igb_create_rx_ring(struct igb_rx_ring *rxr) 1988 { 1989 int rsize, i, error, nrxd; 1990 1991 /* 1992 * Validate number of receive descriptors. It must not exceed 1993 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1994 */ 1995 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 1996 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 1997 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 1998 device_printf(rxr->sc->dev, 1999 "Using %d RX descriptors instead of %d!\n", 2000 IGB_DEFAULT_RXD, nrxd); 2001 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2002 } else { 2003 rxr->num_rx_desc = nrxd; 2004 } 2005 2006 /* 2007 * Allocate RX descriptor ring 2008 */ 2009 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2010 IGB_DBA_ALIGN); 2011 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2012 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2013 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2014 &rxr->rxdma.dma_paddr); 2015 if (rxr->rxdma.dma_vaddr == NULL) { 2016 device_printf(rxr->sc->dev, 2017 "Unable to allocate RxDescriptor memory\n"); 2018 return ENOMEM; 2019 } 2020 rxr->rx_base = rxr->rxdma.dma_vaddr; 2021 bzero(rxr->rx_base, rsize); 2022 2023 rsize = __VM_CACHELINE_ALIGN( 2024 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2025 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2026 2027 /* 2028 * Create DMA tag for RX buffers 2029 */ 2030 error = bus_dma_tag_create(rxr->sc->parent_tag, 2031 1, 0, /* alignment, bounds */ 2032 BUS_SPACE_MAXADDR, /* lowaddr */ 2033 BUS_SPACE_MAXADDR, /* highaddr */ 2034 NULL, NULL, /* filter, filterarg */ 2035 MCLBYTES, /* maxsize */ 2036 1, /* nsegments */ 2037 MCLBYTES, /* maxsegsize */ 2038 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2039 &rxr->rx_tag); 2040 if (error) { 2041 device_printf(rxr->sc->dev, 2042 "Unable to create RX payload DMA tag\n"); 2043 kfree(rxr->rx_buf, M_DEVBUF); 2044 rxr->rx_buf = NULL; 2045 return error; 2046 } 2047 2048 /* 2049 * Create spare DMA map for RX buffers 2050 */ 2051 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2052 &rxr->rx_sparemap); 2053 if (error) { 2054 device_printf(rxr->sc->dev, 2055 "Unable to create spare RX DMA maps\n"); 2056 bus_dma_tag_destroy(rxr->rx_tag); 2057 kfree(rxr->rx_buf, M_DEVBUF); 2058 rxr->rx_buf = NULL; 2059 return error; 2060 } 2061 2062 /* 2063 * Create DMA maps for RX buffers 2064 */ 2065 for (i = 0; i < rxr->num_rx_desc; i++) { 2066 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2067 2068 error = bus_dmamap_create(rxr->rx_tag, 2069 BUS_DMA_WAITOK, &rxbuf->map); 2070 if (error) { 2071 device_printf(rxr->sc->dev, 2072 "Unable to create RX DMA maps\n"); 2073 igb_destroy_rx_ring(rxr, i); 2074 return error; 2075 } 2076 } 2077 return 0; 2078 } 2079 2080 static void 2081 igb_free_rx_ring(struct igb_rx_ring *rxr) 2082 { 2083 int i; 2084 2085 for (i = 0; i < rxr->num_rx_desc; ++i) { 2086 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2087 2088 if (rxbuf->m_head != NULL) { 2089 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2090 m_freem(rxbuf->m_head); 2091 rxbuf->m_head = NULL; 2092 } 2093 } 2094 2095 if (rxr->fmp != NULL) 2096 m_freem(rxr->fmp); 2097 rxr->fmp = NULL; 2098 rxr->lmp = NULL; 2099 } 2100 2101 static void 2102 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2103 { 2104 int i; 2105 2106 if (rxr->rxdma.dma_vaddr != NULL) { 2107 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2108 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2109 rxr->rxdma.dma_map); 2110 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2111 rxr->rxdma.dma_vaddr = NULL; 2112 } 2113 2114 if (rxr->rx_buf == NULL) 2115 return; 2116 2117 for (i = 0; i < ndesc; ++i) { 2118 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2119 2120 KKASSERT(rxbuf->m_head == NULL); 2121 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2122 } 2123 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2124 bus_dma_tag_destroy(rxr->rx_tag); 2125 2126 kfree(rxr->rx_buf, M_DEVBUF); 2127 rxr->rx_buf = NULL; 2128 } 2129 2130 static void 2131 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2132 { 2133 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2134 rxd->wb.upper.status_error = 0; 2135 } 2136 2137 static int 2138 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2139 { 2140 struct mbuf *m; 2141 bus_dma_segment_t seg; 2142 bus_dmamap_t map; 2143 struct igb_rx_buf *rxbuf; 2144 int error, nseg; 2145 2146 m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR); 2147 if (m == NULL) { 2148 if (wait) { 2149 if_printf(&rxr->sc->arpcom.ac_if, 2150 "Unable to allocate RX mbuf\n"); 2151 } 2152 return ENOBUFS; 2153 } 2154 m->m_len = m->m_pkthdr.len = MCLBYTES; 2155 2156 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2157 m_adj(m, ETHER_ALIGN); 2158 2159 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2160 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2161 if (error) { 2162 m_freem(m); 2163 if (wait) { 2164 if_printf(&rxr->sc->arpcom.ac_if, 2165 "Unable to load RX mbuf\n"); 2166 } 2167 return error; 2168 } 2169 2170 rxbuf = &rxr->rx_buf[i]; 2171 if (rxbuf->m_head != NULL) 2172 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2173 2174 map = rxbuf->map; 2175 rxbuf->map = rxr->rx_sparemap; 2176 rxr->rx_sparemap = map; 2177 2178 rxbuf->m_head = m; 2179 rxbuf->paddr = seg.ds_addr; 2180 2181 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2182 return 0; 2183 } 2184 2185 static int 2186 igb_init_rx_ring(struct igb_rx_ring *rxr) 2187 { 2188 int i; 2189 2190 /* Clear the ring contents */ 2191 bzero(rxr->rx_base, 2192 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2193 2194 /* Now replenish the ring mbufs */ 2195 for (i = 0; i < rxr->num_rx_desc; ++i) { 2196 int error; 2197 2198 error = igb_newbuf(rxr, i, TRUE); 2199 if (error) 2200 return error; 2201 } 2202 2203 /* Setup our descriptor indices */ 2204 rxr->next_to_check = 0; 2205 2206 rxr->fmp = NULL; 2207 rxr->lmp = NULL; 2208 rxr->discard = FALSE; 2209 2210 return 0; 2211 } 2212 2213 static void 2214 igb_init_rx_unit(struct igb_softc *sc) 2215 { 2216 struct ifnet *ifp = &sc->arpcom.ac_if; 2217 struct e1000_hw *hw = &sc->hw; 2218 uint32_t rctl, rxcsum, srrctl = 0; 2219 int i; 2220 2221 /* 2222 * Make sure receives are disabled while setting 2223 * up the descriptor ring 2224 */ 2225 rctl = E1000_READ_REG(hw, E1000_RCTL); 2226 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2227 2228 #if 0 2229 /* 2230 ** Set up for header split 2231 */ 2232 if (igb_header_split) { 2233 /* Use a standard mbuf for the header */ 2234 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2235 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2236 } else 2237 #endif 2238 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2239 2240 /* 2241 ** Set up for jumbo frames 2242 */ 2243 if (ifp->if_mtu > ETHERMTU) { 2244 rctl |= E1000_RCTL_LPE; 2245 #if 0 2246 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2247 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2248 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2249 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2250 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2251 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2252 } 2253 /* Set maximum packet len */ 2254 psize = adapter->max_frame_size; 2255 /* are we on a vlan? */ 2256 if (adapter->ifp->if_vlantrunk != NULL) 2257 psize += VLAN_TAG_SIZE; 2258 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2259 #else 2260 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2261 rctl |= E1000_RCTL_SZ_2048; 2262 #endif 2263 } else { 2264 rctl &= ~E1000_RCTL_LPE; 2265 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2266 rctl |= E1000_RCTL_SZ_2048; 2267 } 2268 2269 /* Setup the Base and Length of the Rx Descriptor Rings */ 2270 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2271 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2272 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2273 uint32_t rxdctl; 2274 2275 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2276 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2277 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2278 (uint32_t)(bus_addr >> 32)); 2279 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2280 (uint32_t)bus_addr); 2281 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2282 /* Enable this Queue */ 2283 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2284 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2285 rxdctl &= 0xFFF00000; 2286 rxdctl |= IGB_RX_PTHRESH; 2287 rxdctl |= IGB_RX_HTHRESH << 8; 2288 /* 2289 * Don't set WTHRESH to a value above 1 on 82576, see: 2290 * 82576 specification update errata #26 2291 */ 2292 rxdctl |= IGB_RX_WTHRESH << 16; 2293 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2294 } 2295 2296 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2297 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2298 2299 /* 2300 * Receive Checksum Offload for TCP and UDP 2301 * 2302 * Checksum offloading is also enabled if multiple receive 2303 * queue is to be supported, since we need it to figure out 2304 * fragments. 2305 */ 2306 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2307 /* 2308 * NOTE: 2309 * PCSD must be enabled to enable multiple 2310 * receive queues. 2311 */ 2312 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2313 E1000_RXCSUM_PCSD; 2314 } else { 2315 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2316 E1000_RXCSUM_PCSD); 2317 } 2318 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2319 2320 if (IGB_ENABLE_HWRSS(sc)) { 2321 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2322 uint32_t reta_shift; 2323 int j, r; 2324 2325 /* 2326 * NOTE: 2327 * When we reach here, RSS has already been disabled 2328 * in igb_stop(), so we could safely configure RSS key 2329 * and redirect table. 2330 */ 2331 2332 /* 2333 * Configure RSS key 2334 */ 2335 toeplitz_get_key(key, sizeof(key)); 2336 for (i = 0; i < IGB_NRSSRK; ++i) { 2337 uint32_t rssrk; 2338 2339 rssrk = IGB_RSSRK_VAL(key, i); 2340 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2341 2342 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2343 } 2344 2345 /* 2346 * Configure RSS redirect table in following fashion: 2347 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2348 */ 2349 reta_shift = IGB_RETA_SHIFT; 2350 if (hw->mac.type == e1000_82575) 2351 reta_shift = IGB_RETA_SHIFT_82575; 2352 2353 r = 0; 2354 for (j = 0; j < IGB_NRETA; ++j) { 2355 uint32_t reta = 0; 2356 2357 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2358 uint32_t q; 2359 2360 q = (r % sc->rx_ring_inuse) << reta_shift; 2361 reta |= q << (8 * i); 2362 ++r; 2363 } 2364 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2365 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2366 } 2367 2368 /* 2369 * Enable multiple receive queues. 2370 * Enable IPv4 RSS standard hash functions. 2371 * Disable RSS interrupt on 82575 2372 */ 2373 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2374 E1000_MRQC_ENABLE_RSS_4Q | 2375 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2376 E1000_MRQC_RSS_FIELD_IPV4); 2377 } 2378 2379 /* Setup the Receive Control Register */ 2380 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2381 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2382 E1000_RCTL_RDMTS_HALF | 2383 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2384 /* Strip CRC bytes. */ 2385 rctl |= E1000_RCTL_SECRC; 2386 /* Make sure VLAN Filters are off */ 2387 rctl &= ~E1000_RCTL_VFE; 2388 /* Don't store bad packets */ 2389 rctl &= ~E1000_RCTL_SBP; 2390 2391 /* Enable Receives */ 2392 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2393 2394 /* 2395 * Setup the HW Rx Head and Tail Descriptor Pointers 2396 * - needs to be after enable 2397 */ 2398 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2399 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2400 2401 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2402 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2403 } 2404 } 2405 2406 static void 2407 igb_rxeof(struct igb_rx_ring *rxr, int count) 2408 { 2409 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2410 union e1000_adv_rx_desc *cur; 2411 uint32_t staterr; 2412 int i; 2413 2414 i = rxr->next_to_check; 2415 cur = &rxr->rx_base[i]; 2416 staterr = le32toh(cur->wb.upper.status_error); 2417 2418 if ((staterr & E1000_RXD_STAT_DD) == 0) 2419 return; 2420 2421 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2422 struct pktinfo *pi = NULL, pi0; 2423 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2424 struct mbuf *m = NULL; 2425 boolean_t eop; 2426 2427 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2428 if (eop) 2429 --count; 2430 2431 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2432 !rxr->discard) { 2433 struct mbuf *mp = rxbuf->m_head; 2434 uint32_t hash, hashtype; 2435 uint16_t vlan; 2436 int len; 2437 2438 len = le16toh(cur->wb.upper.length); 2439 if (rxr->sc->hw.mac.type == e1000_i350 && 2440 (staterr & E1000_RXDEXT_STATERR_LB)) 2441 vlan = be16toh(cur->wb.upper.vlan); 2442 else 2443 vlan = le16toh(cur->wb.upper.vlan); 2444 2445 hash = le32toh(cur->wb.lower.hi_dword.rss); 2446 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2447 E1000_RXDADV_RSSTYPE_MASK; 2448 2449 IGB_RSS_DPRINTF(rxr->sc, 10, 2450 "ring%d, hash 0x%08x, hashtype %u\n", 2451 rxr->me, hash, hashtype); 2452 2453 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2454 BUS_DMASYNC_POSTREAD); 2455 2456 if (igb_newbuf(rxr, i, FALSE) != 0) { 2457 ifp->if_iqdrops++; 2458 goto discard; 2459 } 2460 2461 mp->m_len = len; 2462 if (rxr->fmp == NULL) { 2463 mp->m_pkthdr.len = len; 2464 rxr->fmp = mp; 2465 rxr->lmp = mp; 2466 } else { 2467 rxr->lmp->m_next = mp; 2468 rxr->lmp = rxr->lmp->m_next; 2469 rxr->fmp->m_pkthdr.len += len; 2470 } 2471 2472 if (eop) { 2473 m = rxr->fmp; 2474 rxr->fmp = NULL; 2475 rxr->lmp = NULL; 2476 2477 m->m_pkthdr.rcvif = ifp; 2478 ifp->if_ipackets++; 2479 2480 if (ifp->if_capenable & IFCAP_RXCSUM) 2481 igb_rxcsum(staterr, m); 2482 2483 if (staterr & E1000_RXD_STAT_VP) { 2484 m->m_pkthdr.ether_vlantag = vlan; 2485 m->m_flags |= M_VLANTAG; 2486 } 2487 2488 if (ifp->if_capenable & IFCAP_RSS) { 2489 pi = igb_rssinfo(m, &pi0, 2490 hash, hashtype, staterr); 2491 } 2492 #ifdef IGB_RSS_DEBUG 2493 rxr->rx_packets++; 2494 #endif 2495 } 2496 } else { 2497 ifp->if_ierrors++; 2498 discard: 2499 igb_setup_rxdesc(cur, rxbuf); 2500 if (!eop) 2501 rxr->discard = TRUE; 2502 else 2503 rxr->discard = FALSE; 2504 if (rxr->fmp != NULL) { 2505 m_freem(rxr->fmp); 2506 rxr->fmp = NULL; 2507 rxr->lmp = NULL; 2508 } 2509 m = NULL; 2510 } 2511 2512 if (m != NULL) 2513 ether_input_pkt(ifp, m, pi); 2514 2515 /* Advance our pointers to the next descriptor. */ 2516 if (++i == rxr->num_rx_desc) 2517 i = 0; 2518 2519 cur = &rxr->rx_base[i]; 2520 staterr = le32toh(cur->wb.upper.status_error); 2521 } 2522 rxr->next_to_check = i; 2523 2524 if (--i < 0) 2525 i = rxr->num_rx_desc - 1; 2526 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2527 } 2528 2529 2530 static void 2531 igb_set_vlan(struct igb_softc *sc) 2532 { 2533 struct e1000_hw *hw = &sc->hw; 2534 uint32_t reg; 2535 #if 0 2536 struct ifnet *ifp = sc->arpcom.ac_if; 2537 #endif 2538 2539 if (sc->vf_ifp) { 2540 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2541 return; 2542 } 2543 2544 reg = E1000_READ_REG(hw, E1000_CTRL); 2545 reg |= E1000_CTRL_VME; 2546 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2547 2548 #if 0 2549 /* Enable the Filter Table */ 2550 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2551 reg = E1000_READ_REG(hw, E1000_RCTL); 2552 reg &= ~E1000_RCTL_CFIEN; 2553 reg |= E1000_RCTL_VFE; 2554 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2555 } 2556 #endif 2557 2558 /* Update the frame size */ 2559 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2560 sc->max_frame_size + VLAN_TAG_SIZE); 2561 2562 #if 0 2563 /* Don't bother with table if no vlans */ 2564 if ((adapter->num_vlans == 0) || 2565 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2566 return; 2567 /* 2568 ** A soft reset zero's out the VFTA, so 2569 ** we need to repopulate it now. 2570 */ 2571 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2572 if (adapter->shadow_vfta[i] != 0) { 2573 if (adapter->vf_ifp) 2574 e1000_vfta_set_vf(hw, 2575 adapter->shadow_vfta[i], TRUE); 2576 else 2577 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2578 i, adapter->shadow_vfta[i]); 2579 } 2580 #endif 2581 } 2582 2583 static void 2584 igb_enable_intr(struct igb_softc *sc) 2585 { 2586 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2587 lwkt_serialize_handler_enable(&sc->main_serialize); 2588 } else { 2589 int i; 2590 2591 for (i = 0; i < sc->msix_cnt; ++i) { 2592 lwkt_serialize_handler_enable( 2593 sc->msix_data[i].msix_serialize); 2594 } 2595 } 2596 2597 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2598 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2599 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2600 else 2601 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2602 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2603 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2604 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2605 } else { 2606 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2607 } 2608 E1000_WRITE_FLUSH(&sc->hw); 2609 } 2610 2611 static void 2612 igb_disable_intr(struct igb_softc *sc) 2613 { 2614 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2615 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2616 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2617 } 2618 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2619 E1000_WRITE_FLUSH(&sc->hw); 2620 2621 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2622 lwkt_serialize_handler_disable(&sc->main_serialize); 2623 } else { 2624 int i; 2625 2626 for (i = 0; i < sc->msix_cnt; ++i) { 2627 lwkt_serialize_handler_disable( 2628 sc->msix_data[i].msix_serialize); 2629 } 2630 } 2631 } 2632 2633 /* 2634 * Bit of a misnomer, what this really means is 2635 * to enable OS management of the system... aka 2636 * to disable special hardware management features 2637 */ 2638 static void 2639 igb_get_mgmt(struct igb_softc *sc) 2640 { 2641 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2642 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2643 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2644 2645 /* disable hardware interception of ARP */ 2646 manc &= ~E1000_MANC_ARP_EN; 2647 2648 /* enable receiving management packets to the host */ 2649 manc |= E1000_MANC_EN_MNG2HOST; 2650 manc2h |= 1 << 5; /* Mng Port 623 */ 2651 manc2h |= 1 << 6; /* Mng Port 664 */ 2652 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2653 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2654 } 2655 } 2656 2657 /* 2658 * Give control back to hardware management controller 2659 * if there is one. 2660 */ 2661 static void 2662 igb_rel_mgmt(struct igb_softc *sc) 2663 { 2664 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2665 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2666 2667 /* Re-enable hardware interception of ARP */ 2668 manc |= E1000_MANC_ARP_EN; 2669 manc &= ~E1000_MANC_EN_MNG2HOST; 2670 2671 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2672 } 2673 } 2674 2675 /* 2676 * Sets CTRL_EXT:DRV_LOAD bit. 2677 * 2678 * For ASF and Pass Through versions of f/w this means that 2679 * the driver is loaded. 2680 */ 2681 static void 2682 igb_get_hw_control(struct igb_softc *sc) 2683 { 2684 uint32_t ctrl_ext; 2685 2686 if (sc->vf_ifp) 2687 return; 2688 2689 /* Let firmware know the driver has taken over */ 2690 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2691 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2692 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2693 } 2694 2695 /* 2696 * Resets CTRL_EXT:DRV_LOAD bit. 2697 * 2698 * For ASF and Pass Through versions of f/w this means that the 2699 * driver is no longer loaded. 2700 */ 2701 static void 2702 igb_rel_hw_control(struct igb_softc *sc) 2703 { 2704 uint32_t ctrl_ext; 2705 2706 if (sc->vf_ifp) 2707 return; 2708 2709 /* Let firmware taken over control of h/w */ 2710 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2711 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2712 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2713 } 2714 2715 static int 2716 igb_is_valid_ether_addr(const uint8_t *addr) 2717 { 2718 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2719 2720 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2721 return FALSE; 2722 return TRUE; 2723 } 2724 2725 /* 2726 * Enable PCI Wake On Lan capability 2727 */ 2728 static void 2729 igb_enable_wol(device_t dev) 2730 { 2731 uint16_t cap, status; 2732 uint8_t id; 2733 2734 /* First find the capabilities pointer*/ 2735 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2736 2737 /* Read the PM Capabilities */ 2738 id = pci_read_config(dev, cap, 1); 2739 if (id != PCIY_PMG) /* Something wrong */ 2740 return; 2741 2742 /* 2743 * OK, we have the power capabilities, 2744 * so now get the status register 2745 */ 2746 cap += PCIR_POWER_STATUS; 2747 status = pci_read_config(dev, cap, 2); 2748 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2749 pci_write_config(dev, cap, status, 2); 2750 } 2751 2752 static void 2753 igb_update_stats_counters(struct igb_softc *sc) 2754 { 2755 struct e1000_hw *hw = &sc->hw; 2756 struct e1000_hw_stats *stats; 2757 struct ifnet *ifp = &sc->arpcom.ac_if; 2758 2759 /* 2760 * The virtual function adapter has only a 2761 * small controlled set of stats, do only 2762 * those and return. 2763 */ 2764 if (sc->vf_ifp) { 2765 igb_update_vf_stats_counters(sc); 2766 return; 2767 } 2768 stats = sc->stats; 2769 2770 if (sc->hw.phy.media_type == e1000_media_type_copper || 2771 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2772 stats->symerrs += 2773 E1000_READ_REG(hw,E1000_SYMERRS); 2774 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2775 } 2776 2777 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2778 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2779 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2780 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2781 2782 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2783 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2784 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2785 stats->dc += E1000_READ_REG(hw, E1000_DC); 2786 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2787 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2788 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2789 2790 /* 2791 * For watchdog management we need to know if we have been 2792 * paused during the last interval, so capture that here. 2793 */ 2794 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 2795 stats->xoffrxc += sc->pause_frames; 2796 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 2797 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 2798 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 2799 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 2800 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 2801 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 2802 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 2803 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 2804 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 2805 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 2806 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 2807 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 2808 2809 /* For the 64-bit byte counters the low dword must be read first. */ 2810 /* Both registers clear on the read of the high dword */ 2811 2812 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 2813 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 2814 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 2815 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 2816 2817 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 2818 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 2819 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 2820 stats->roc += E1000_READ_REG(hw, E1000_ROC); 2821 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 2822 2823 stats->tor += E1000_READ_REG(hw, E1000_TORH); 2824 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 2825 2826 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 2827 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 2828 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 2829 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 2830 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 2831 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 2832 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 2833 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 2834 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 2835 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 2836 2837 /* Interrupt Counts */ 2838 2839 stats->iac += E1000_READ_REG(hw, E1000_IAC); 2840 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 2841 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 2842 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 2843 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 2844 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 2845 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 2846 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 2847 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 2848 2849 /* Host to Card Statistics */ 2850 2851 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 2852 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 2853 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 2854 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 2855 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 2856 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 2857 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 2858 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 2859 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 2860 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 2861 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 2862 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 2863 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 2864 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 2865 2866 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 2867 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 2868 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 2869 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 2870 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 2871 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 2872 2873 ifp->if_collisions = stats->colc; 2874 2875 /* Rx Errors */ 2876 ifp->if_ierrors = stats->rxerrc + stats->crcerrs + stats->algnerrc + 2877 stats->ruc + stats->roc + stats->mpc + stats->cexterr; 2878 2879 /* Tx Errors */ 2880 ifp->if_oerrors = stats->ecol + stats->latecol + sc->watchdog_events; 2881 2882 /* Driver specific counters */ 2883 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 2884 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 2885 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 2886 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 2887 sc->packet_buf_alloc_tx = 2888 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 2889 sc->packet_buf_alloc_rx = 2890 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 2891 } 2892 2893 static void 2894 igb_vf_init_stats(struct igb_softc *sc) 2895 { 2896 struct e1000_hw *hw = &sc->hw; 2897 struct e1000_vf_stats *stats; 2898 2899 stats = sc->stats; 2900 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 2901 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 2902 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 2903 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 2904 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 2905 } 2906 2907 static void 2908 igb_update_vf_stats_counters(struct igb_softc *sc) 2909 { 2910 struct e1000_hw *hw = &sc->hw; 2911 struct e1000_vf_stats *stats; 2912 2913 if (sc->link_speed == 0) 2914 return; 2915 2916 stats = sc->stats; 2917 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 2918 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 2919 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 2920 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 2921 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 2922 } 2923 2924 #ifdef IFPOLL_ENABLE 2925 2926 static void 2927 igb_npoll_status(struct ifnet *ifp) 2928 { 2929 struct igb_softc *sc = ifp->if_softc; 2930 uint32_t reg_icr; 2931 2932 ASSERT_SERIALIZED(&sc->main_serialize); 2933 2934 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 2935 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 2936 sc->hw.mac.get_link_status = 1; 2937 igb_update_link_status(sc); 2938 } 2939 } 2940 2941 static void 2942 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 2943 { 2944 struct igb_tx_ring *txr = arg; 2945 2946 ASSERT_SERIALIZED(&txr->tx_serialize); 2947 2948 igb_txeof(txr); 2949 if (!ifq_is_empty(&ifp->if_snd)) 2950 if_devstart(ifp); 2951 } 2952 2953 static void 2954 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 2955 { 2956 struct igb_rx_ring *rxr = arg; 2957 2958 ASSERT_SERIALIZED(&rxr->rx_serialize); 2959 2960 igb_rxeof(rxr, cycle); 2961 } 2962 2963 static void 2964 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 2965 { 2966 struct igb_softc *sc = ifp->if_softc; 2967 2968 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2969 2970 if (info) { 2971 struct igb_tx_ring *txr; 2972 int i, off; 2973 2974 info->ifpi_status.status_func = igb_npoll_status; 2975 info->ifpi_status.serializer = &sc->main_serialize; 2976 2977 off = sc->tx_npoll_off; 2978 KKASSERT(off < ncpus2); 2979 txr = &sc->tx_rings[0]; 2980 info->ifpi_tx[off].poll_func = igb_npoll_tx; 2981 info->ifpi_tx[off].arg = txr; 2982 info->ifpi_tx[off].serializer = &txr->tx_serialize; 2983 2984 off = sc->rx_npoll_off; 2985 for (i = 0; i < sc->rx_ring_cnt; ++i) { 2986 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2987 int idx = i + off; 2988 2989 KKASSERT(idx < ncpus2); 2990 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 2991 info->ifpi_rx[idx].arg = rxr; 2992 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 2993 } 2994 2995 if (ifp->if_flags & IFF_RUNNING) { 2996 if (sc->rx_ring_inuse == sc->rx_ring_cnt) 2997 igb_disable_intr(sc); 2998 else 2999 igb_init(sc); 3000 } 3001 ifp->if_npoll_cpuid = sc->tx_npoll_off; 3002 } else { 3003 if (ifp->if_flags & IFF_RUNNING) { 3004 if (sc->rx_ring_inuse == sc->rx_ring_cnt) 3005 igb_enable_intr(sc); 3006 else 3007 igb_init(sc); 3008 } 3009 ifp->if_npoll_cpuid = -1; 3010 } 3011 } 3012 3013 #endif /* IFPOLL_ENABLE */ 3014 3015 static void 3016 igb_intr(void *xsc) 3017 { 3018 struct igb_softc *sc = xsc; 3019 struct ifnet *ifp = &sc->arpcom.ac_if; 3020 uint32_t eicr; 3021 3022 ASSERT_SERIALIZED(&sc->main_serialize); 3023 3024 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3025 3026 if (eicr == 0) 3027 return; 3028 3029 if (ifp->if_flags & IFF_RUNNING) { 3030 struct igb_tx_ring *txr; 3031 int i; 3032 3033 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3034 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3035 3036 if (eicr & rxr->rx_intr_mask) { 3037 lwkt_serialize_enter(&rxr->rx_serialize); 3038 igb_rxeof(rxr, -1); 3039 lwkt_serialize_exit(&rxr->rx_serialize); 3040 } 3041 } 3042 3043 txr = &sc->tx_rings[0]; 3044 if (eicr & txr->tx_intr_mask) { 3045 lwkt_serialize_enter(&txr->tx_serialize); 3046 igb_txeof(txr); 3047 if (!ifq_is_empty(&ifp->if_snd)) 3048 if_devstart(ifp); 3049 lwkt_serialize_exit(&txr->tx_serialize); 3050 } 3051 } 3052 3053 if (eicr & E1000_EICR_OTHER) { 3054 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3055 3056 /* Link status change */ 3057 if (icr & E1000_ICR_LSC) { 3058 sc->hw.mac.get_link_status = 1; 3059 igb_update_link_status(sc); 3060 } 3061 } 3062 3063 /* 3064 * Reading EICR has the side effect to clear interrupt mask, 3065 * so all interrupts need to be enabled here. 3066 */ 3067 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3068 } 3069 3070 static void 3071 igb_intr_shared(void *xsc) 3072 { 3073 struct igb_softc *sc = xsc; 3074 struct ifnet *ifp = &sc->arpcom.ac_if; 3075 uint32_t reg_icr; 3076 3077 ASSERT_SERIALIZED(&sc->main_serialize); 3078 3079 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3080 3081 /* Hot eject? */ 3082 if (reg_icr == 0xffffffff) 3083 return; 3084 3085 /* Definitely not our interrupt. */ 3086 if (reg_icr == 0x0) 3087 return; 3088 3089 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3090 return; 3091 3092 if (ifp->if_flags & IFF_RUNNING) { 3093 if (reg_icr & 3094 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3095 int i; 3096 3097 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3098 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3099 3100 lwkt_serialize_enter(&rxr->rx_serialize); 3101 igb_rxeof(rxr, -1); 3102 lwkt_serialize_exit(&rxr->rx_serialize); 3103 } 3104 } 3105 3106 if (reg_icr & E1000_ICR_TXDW) { 3107 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3108 3109 lwkt_serialize_enter(&txr->tx_serialize); 3110 igb_txeof(txr); 3111 if (!ifq_is_empty(&ifp->if_snd)) 3112 if_devstart(ifp); 3113 lwkt_serialize_exit(&txr->tx_serialize); 3114 } 3115 } 3116 3117 /* Link status change */ 3118 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3119 sc->hw.mac.get_link_status = 1; 3120 igb_update_link_status(sc); 3121 } 3122 3123 if (reg_icr & E1000_ICR_RXO) 3124 sc->rx_overruns++; 3125 } 3126 3127 static int 3128 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp) 3129 { 3130 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3131 bus_dmamap_t map; 3132 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3133 union e1000_adv_tx_desc *txd = NULL; 3134 struct mbuf *m_head = *m_headp; 3135 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3136 int maxsegs, nsegs, i, j, error, last = 0; 3137 uint32_t hdrlen = 0; 3138 3139 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3140 error = igb_tso_pullup(txr, m_headp); 3141 if (error) 3142 return error; 3143 m_head = *m_headp; 3144 } 3145 3146 /* Set basic descriptor constants */ 3147 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3148 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3149 if (m_head->m_flags & M_VLANTAG) 3150 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3151 3152 /* 3153 * Map the packet for DMA. 3154 */ 3155 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3156 tx_buf_mapped = tx_buf; 3157 map = tx_buf->map; 3158 3159 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3160 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3161 if (maxsegs > IGB_MAX_SCATTER) 3162 maxsegs = IGB_MAX_SCATTER; 3163 3164 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3165 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3166 if (error) { 3167 if (error == ENOBUFS) 3168 txr->sc->mbuf_defrag_failed++; 3169 else 3170 txr->sc->no_tx_dma_setup++; 3171 3172 m_freem(*m_headp); 3173 *m_headp = NULL; 3174 return error; 3175 } 3176 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3177 3178 m_head = *m_headp; 3179 3180 /* 3181 * Set up the TX context descriptor, if any hardware offloading is 3182 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3183 * TX descriptor. 3184 * 3185 * Unlike these chips' predecessors (em/emx), TX context descriptor 3186 * will _not_ interfere TX data fetching pipelining. 3187 */ 3188 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3189 igb_tso_ctx(txr, m_head, &hdrlen); 3190 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3191 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3192 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3193 txr->tx_nsegs++; 3194 } else if (igb_txcsum_ctx(txr, m_head)) { 3195 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3196 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3197 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3198 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3199 txr->tx_nsegs++; 3200 } 3201 3202 txr->tx_nsegs += nsegs; 3203 if (txr->tx_nsegs >= txr->intr_nsegs) { 3204 /* 3205 * Report Status (RS) is turned on every intr_nsegs 3206 * descriptors (roughly). 3207 */ 3208 txr->tx_nsegs = 0; 3209 cmd_rs = E1000_ADVTXD_DCMD_RS; 3210 } 3211 3212 /* Calculate payload length */ 3213 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3214 << E1000_ADVTXD_PAYLEN_SHIFT); 3215 3216 /* 82575 needs the queue index added */ 3217 if (txr->sc->hw.mac.type == e1000_82575) 3218 olinfo_status |= txr->me << 4; 3219 3220 /* Set up our transmit descriptors */ 3221 i = txr->next_avail_desc; 3222 for (j = 0; j < nsegs; j++) { 3223 bus_size_t seg_len; 3224 bus_addr_t seg_addr; 3225 3226 tx_buf = &txr->tx_buf[i]; 3227 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3228 seg_addr = segs[j].ds_addr; 3229 seg_len = segs[j].ds_len; 3230 3231 txd->read.buffer_addr = htole64(seg_addr); 3232 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3233 txd->read.olinfo_status = htole32(olinfo_status); 3234 last = i; 3235 if (++i == txr->num_tx_desc) 3236 i = 0; 3237 tx_buf->m_head = NULL; 3238 } 3239 3240 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3241 txr->next_avail_desc = i; 3242 txr->tx_avail -= nsegs; 3243 3244 tx_buf->m_head = m_head; 3245 tx_buf_mapped->map = tx_buf->map; 3246 tx_buf->map = map; 3247 3248 /* 3249 * Last Descriptor of Packet needs End Of Packet (EOP) 3250 */ 3251 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3252 3253 /* 3254 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 3255 * that this frame is available to transmit. 3256 */ 3257 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), i); 3258 ++txr->tx_packets; 3259 3260 return 0; 3261 } 3262 3263 static void 3264 igb_start(struct ifnet *ifp) 3265 { 3266 struct igb_softc *sc = ifp->if_softc; 3267 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3268 struct mbuf *m_head; 3269 3270 ASSERT_SERIALIZED(&txr->tx_serialize); 3271 3272 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) 3273 return; 3274 3275 if (!sc->link_active) { 3276 ifq_purge(&ifp->if_snd); 3277 return; 3278 } 3279 3280 if (!IGB_IS_NOT_OACTIVE(txr)) 3281 igb_txeof(txr); 3282 3283 while (!ifq_is_empty(&ifp->if_snd)) { 3284 if (IGB_IS_OACTIVE(txr)) { 3285 ifp->if_flags |= IFF_OACTIVE; 3286 /* Set watchdog on */ 3287 ifp->if_timer = 5; 3288 break; 3289 } 3290 3291 m_head = ifq_dequeue(&ifp->if_snd, NULL); 3292 if (m_head == NULL) 3293 break; 3294 3295 if (igb_encap(txr, &m_head)) { 3296 ifp->if_oerrors++; 3297 continue; 3298 } 3299 3300 /* Send a copy of the frame to the BPF listener */ 3301 ETHER_BPF_MTAP(ifp, m_head); 3302 } 3303 } 3304 3305 static void 3306 igb_watchdog(struct ifnet *ifp) 3307 { 3308 struct igb_softc *sc = ifp->if_softc; 3309 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3310 3311 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3312 3313 /* 3314 * If flow control has paused us since last checking 3315 * it invalidates the watchdog timing, so dont run it. 3316 */ 3317 if (sc->pause_frames) { 3318 sc->pause_frames = 0; 3319 ifp->if_timer = 5; 3320 return; 3321 } 3322 3323 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3324 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3325 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3326 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3327 if_printf(ifp, "TX(%d) desc avail = %d, " 3328 "Next TX to Clean = %d\n", 3329 txr->me, txr->tx_avail, txr->next_to_clean); 3330 3331 ifp->if_oerrors++; 3332 sc->watchdog_events++; 3333 3334 igb_init(sc); 3335 if (!ifq_is_empty(&ifp->if_snd)) 3336 if_devstart(ifp); 3337 } 3338 3339 static void 3340 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3341 { 3342 uint32_t eitr = 0; 3343 3344 if (rate > 0) { 3345 if (sc->hw.mac.type == e1000_82575) { 3346 eitr = 1000000000 / 256 / rate; 3347 /* 3348 * NOTE: 3349 * Document is wrong on the 2 bits left shift 3350 */ 3351 } else { 3352 eitr = 1000000 / rate; 3353 eitr <<= IGB_EITR_INTVL_SHIFT; 3354 } 3355 3356 if (eitr == 0) { 3357 /* Don't disable it */ 3358 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3359 } else if (eitr > IGB_EITR_INTVL_MASK) { 3360 /* Don't allow it to be too large */ 3361 eitr = IGB_EITR_INTVL_MASK; 3362 } 3363 } 3364 if (sc->hw.mac.type == e1000_82575) 3365 eitr |= eitr << 16; 3366 else 3367 eitr |= E1000_EITR_CNT_IGNR; 3368 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3369 } 3370 3371 static int 3372 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3373 { 3374 struct igb_softc *sc = (void *)arg1; 3375 struct ifnet *ifp = &sc->arpcom.ac_if; 3376 int error, intr_rate; 3377 3378 intr_rate = sc->intr_rate; 3379 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3380 if (error || req->newptr == NULL) 3381 return error; 3382 if (intr_rate < 0) 3383 return EINVAL; 3384 3385 ifnet_serialize_all(ifp); 3386 3387 sc->intr_rate = intr_rate; 3388 if (ifp->if_flags & IFF_RUNNING) 3389 igb_set_eitr(sc, 0, sc->intr_rate); 3390 3391 if (bootverbose) 3392 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3393 3394 ifnet_deserialize_all(ifp); 3395 3396 return 0; 3397 } 3398 3399 static int 3400 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3401 { 3402 struct igb_msix_data *msix = (void *)arg1; 3403 struct igb_softc *sc = msix->msix_sc; 3404 struct ifnet *ifp = &sc->arpcom.ac_if; 3405 int error, msix_rate; 3406 3407 msix_rate = msix->msix_rate; 3408 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3409 if (error || req->newptr == NULL) 3410 return error; 3411 if (msix_rate < 0) 3412 return EINVAL; 3413 3414 lwkt_serialize_enter(msix->msix_serialize); 3415 3416 msix->msix_rate = msix_rate; 3417 if (ifp->if_flags & IFF_RUNNING) 3418 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3419 3420 if (bootverbose) { 3421 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3422 msix->msix_rate); 3423 } 3424 3425 lwkt_serialize_exit(msix->msix_serialize); 3426 3427 return 0; 3428 } 3429 3430 static int 3431 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3432 { 3433 struct igb_softc *sc = (void *)arg1; 3434 struct ifnet *ifp = &sc->arpcom.ac_if; 3435 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3436 int error, nsegs; 3437 3438 nsegs = txr->intr_nsegs; 3439 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3440 if (error || req->newptr == NULL) 3441 return error; 3442 if (nsegs <= 0) 3443 return EINVAL; 3444 3445 ifnet_serialize_all(ifp); 3446 3447 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3448 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3449 error = EINVAL; 3450 } else { 3451 error = 0; 3452 txr->intr_nsegs = nsegs; 3453 } 3454 3455 ifnet_deserialize_all(ifp); 3456 3457 return error; 3458 } 3459 3460 #ifdef IFPOLL_ENABLE 3461 3462 static int 3463 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3464 { 3465 struct igb_softc *sc = (void *)arg1; 3466 struct ifnet *ifp = &sc->arpcom.ac_if; 3467 int error, off; 3468 3469 off = sc->rx_npoll_off; 3470 error = sysctl_handle_int(oidp, &off, 0, req); 3471 if (error || req->newptr == NULL) 3472 return error; 3473 if (off < 0) 3474 return EINVAL; 3475 3476 ifnet_serialize_all(ifp); 3477 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3478 error = EINVAL; 3479 } else { 3480 error = 0; 3481 sc->rx_npoll_off = off; 3482 } 3483 ifnet_deserialize_all(ifp); 3484 3485 return error; 3486 } 3487 3488 static int 3489 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3490 { 3491 struct igb_softc *sc = (void *)arg1; 3492 struct ifnet *ifp = &sc->arpcom.ac_if; 3493 int error, off; 3494 3495 off = sc->tx_npoll_off; 3496 error = sysctl_handle_int(oidp, &off, 0, req); 3497 if (error || req->newptr == NULL) 3498 return error; 3499 if (off < 0) 3500 return EINVAL; 3501 3502 ifnet_serialize_all(ifp); 3503 if (off >= ncpus2) { 3504 error = EINVAL; 3505 } else { 3506 error = 0; 3507 sc->tx_npoll_off = off; 3508 } 3509 ifnet_deserialize_all(ifp); 3510 3511 return error; 3512 } 3513 3514 #endif /* IFPOLL_ENABLE */ 3515 3516 static void 3517 igb_init_intr(struct igb_softc *sc) 3518 { 3519 igb_set_intr_mask(sc); 3520 3521 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3522 igb_init_unshared_intr(sc); 3523 3524 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3525 igb_set_eitr(sc, 0, sc->intr_rate); 3526 } else { 3527 int i; 3528 3529 for (i = 0; i < sc->msix_cnt; ++i) 3530 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3531 } 3532 } 3533 3534 static void 3535 igb_init_unshared_intr(struct igb_softc *sc) 3536 { 3537 struct e1000_hw *hw = &sc->hw; 3538 const struct igb_rx_ring *rxr; 3539 const struct igb_tx_ring *txr; 3540 uint32_t ivar, index; 3541 int i; 3542 3543 /* 3544 * Enable extended mode 3545 */ 3546 if (sc->hw.mac.type != e1000_82575) { 3547 uint32_t gpie; 3548 int ivar_max; 3549 3550 gpie = E1000_GPIE_NSICR; 3551 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3552 gpie |= E1000_GPIE_MSIX_MODE | 3553 E1000_GPIE_EIAME | 3554 E1000_GPIE_PBA; 3555 } 3556 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3557 3558 /* 3559 * Clear IVARs 3560 */ 3561 switch (sc->hw.mac.type) { 3562 case e1000_82580: 3563 ivar_max = IGB_MAX_IVAR_82580; 3564 break; 3565 3566 case e1000_i350: 3567 ivar_max = IGB_MAX_IVAR_I350; 3568 break; 3569 3570 case e1000_vfadapt: 3571 case e1000_vfadapt_i350: 3572 ivar_max = IGB_MAX_IVAR_VF; 3573 break; 3574 3575 case e1000_82576: 3576 ivar_max = IGB_MAX_IVAR_82576; 3577 break; 3578 3579 default: 3580 panic("unknown mac type %d\n", sc->hw.mac.type); 3581 } 3582 for (i = 0; i < ivar_max; ++i) 3583 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3584 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3585 } else { 3586 uint32_t tmp; 3587 3588 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3589 ("82575 w/ MSI-X")); 3590 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3591 tmp |= E1000_CTRL_EXT_IRCA; 3592 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3593 } 3594 3595 /* 3596 * Map TX/RX interrupts to EICR 3597 */ 3598 switch (sc->hw.mac.type) { 3599 case e1000_82580: 3600 case e1000_i350: 3601 case e1000_vfadapt: 3602 case e1000_vfadapt_i350: 3603 /* RX entries */ 3604 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3605 rxr = &sc->rx_rings[i]; 3606 3607 index = i >> 1; 3608 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3609 3610 if (i & 1) { 3611 ivar &= 0xff00ffff; 3612 ivar |= 3613 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3614 } else { 3615 ivar &= 0xffffff00; 3616 ivar |= 3617 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3618 } 3619 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3620 } 3621 /* TX entries */ 3622 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3623 txr = &sc->tx_rings[i]; 3624 3625 index = i >> 1; 3626 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3627 3628 if (i & 1) { 3629 ivar &= 0x00ffffff; 3630 ivar |= 3631 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3632 } else { 3633 ivar &= 0xffff00ff; 3634 ivar |= 3635 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3636 } 3637 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3638 } 3639 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3640 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3641 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3642 } 3643 break; 3644 3645 case e1000_82576: 3646 /* RX entries */ 3647 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3648 rxr = &sc->rx_rings[i]; 3649 3650 index = i & 0x7; /* Each IVAR has two entries */ 3651 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3652 3653 if (i < 8) { 3654 ivar &= 0xffffff00; 3655 ivar |= 3656 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3657 } else { 3658 ivar &= 0xff00ffff; 3659 ivar |= 3660 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3661 } 3662 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3663 } 3664 /* TX entries */ 3665 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3666 txr = &sc->tx_rings[i]; 3667 3668 index = i & 0x7; /* Each IVAR has two entries */ 3669 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3670 3671 if (i < 8) { 3672 ivar &= 0xffff00ff; 3673 ivar |= 3674 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3675 } else { 3676 ivar &= 0x00ffffff; 3677 ivar |= 3678 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3679 } 3680 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3681 } 3682 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3683 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3684 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3685 } 3686 break; 3687 3688 case e1000_82575: 3689 /* 3690 * Enable necessary interrupt bits. 3691 * 3692 * The name of the register is confusing; in addition to 3693 * configuring the first vector of MSI-X, it also configures 3694 * which bits of EICR could be set by the hardware even when 3695 * MSI or line interrupt is used; it thus controls interrupt 3696 * generation. It MUST be configured explicitly; the default 3697 * value mentioned in the datasheet is wrong: RX queue0 and 3698 * TX queue0 are NOT enabled by default. 3699 */ 3700 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 3701 break; 3702 3703 default: 3704 panic("unknown mac type %d\n", sc->hw.mac.type); 3705 } 3706 } 3707 3708 static int 3709 igb_setup_intr(struct igb_softc *sc) 3710 { 3711 struct ifnet *ifp = &sc->arpcom.ac_if; 3712 int error; 3713 3714 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 3715 return igb_msix_setup(sc); 3716 3717 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 3718 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 3719 sc, &sc->intr_tag, &sc->main_serialize); 3720 if (error) { 3721 device_printf(sc->dev, "Failed to register interrupt handler"); 3722 return error; 3723 } 3724 3725 ifp->if_cpuid = rman_get_cpuid(sc->intr_res); 3726 KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus); 3727 3728 return 0; 3729 } 3730 3731 static void 3732 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 3733 { 3734 if (txr->sc->hw.mac.type == e1000_82575) { 3735 txr->tx_intr_bit = 0; /* unused */ 3736 switch (txr->me) { 3737 case 0: 3738 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 3739 break; 3740 case 1: 3741 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 3742 break; 3743 case 2: 3744 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 3745 break; 3746 case 3: 3747 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 3748 break; 3749 default: 3750 panic("unsupported # of TX ring, %d\n", txr->me); 3751 } 3752 } else { 3753 int intr_bit = *intr_bit0; 3754 3755 txr->tx_intr_bit = intr_bit % intr_bitmax; 3756 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 3757 3758 *intr_bit0 = intr_bit + 1; 3759 } 3760 } 3761 3762 static void 3763 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 3764 { 3765 if (rxr->sc->hw.mac.type == e1000_82575) { 3766 rxr->rx_intr_bit = 0; /* unused */ 3767 switch (rxr->me) { 3768 case 0: 3769 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 3770 break; 3771 case 1: 3772 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 3773 break; 3774 case 2: 3775 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 3776 break; 3777 case 3: 3778 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 3779 break; 3780 default: 3781 panic("unsupported # of RX ring, %d\n", rxr->me); 3782 } 3783 } else { 3784 int intr_bit = *intr_bit0; 3785 3786 rxr->rx_intr_bit = intr_bit % intr_bitmax; 3787 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 3788 3789 *intr_bit0 = intr_bit + 1; 3790 } 3791 } 3792 3793 static void 3794 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 3795 { 3796 struct igb_softc *sc = ifp->if_softc; 3797 3798 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, 3799 sc->tx_serialize, sc->rx_serialize, slz); 3800 } 3801 3802 static void 3803 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3804 { 3805 struct igb_softc *sc = ifp->if_softc; 3806 3807 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, 3808 sc->tx_serialize, sc->rx_serialize, slz); 3809 } 3810 3811 static int 3812 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3813 { 3814 struct igb_softc *sc = ifp->if_softc; 3815 3816 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 3817 sc->tx_serialize, sc->rx_serialize, slz); 3818 } 3819 3820 #ifdef INVARIANTS 3821 3822 static void 3823 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 3824 boolean_t serialized) 3825 { 3826 struct igb_softc *sc = ifp->if_softc; 3827 3828 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 3829 sc->tx_serialize, sc->rx_serialize, slz, serialized); 3830 } 3831 3832 #endif /* INVARIANTS */ 3833 3834 static void 3835 igb_set_intr_mask(struct igb_softc *sc) 3836 { 3837 int i; 3838 3839 sc->intr_mask = sc->sts_intr_mask; 3840 for (i = 0; i < sc->rx_ring_inuse; ++i) 3841 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 3842 for (i = 0; i < sc->tx_ring_cnt; ++i) 3843 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 3844 if (bootverbose) { 3845 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 3846 sc->intr_mask); 3847 } 3848 } 3849 3850 static int 3851 igb_alloc_intr(struct igb_softc *sc) 3852 { 3853 int i, intr_bit, intr_bitmax; 3854 u_int intr_flags; 3855 3856 igb_msix_try_alloc(sc); 3857 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 3858 goto done; 3859 3860 /* 3861 * Allocate MSI/legacy interrupt resource 3862 */ 3863 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 3864 &sc->intr_rid, &intr_flags); 3865 3866 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 3867 int unshared; 3868 3869 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 3870 if (!unshared) { 3871 sc->flags |= IGB_FLAG_SHARED_INTR; 3872 if (bootverbose) 3873 device_printf(sc->dev, "IRQ shared\n"); 3874 } else { 3875 intr_flags &= ~RF_SHAREABLE; 3876 if (bootverbose) 3877 device_printf(sc->dev, "IRQ unshared\n"); 3878 } 3879 } 3880 3881 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 3882 &sc->intr_rid, intr_flags); 3883 if (sc->intr_res == NULL) { 3884 device_printf(sc->dev, "Unable to allocate bus resource: " 3885 "interrupt\n"); 3886 return ENXIO; 3887 } 3888 3889 /* 3890 * Setup MSI/legacy interrupt mask 3891 */ 3892 switch (sc->hw.mac.type) { 3893 case e1000_82575: 3894 intr_bitmax = IGB_MAX_TXRXINT_82575; 3895 break; 3896 case e1000_82580: 3897 intr_bitmax = IGB_MAX_TXRXINT_82580; 3898 break; 3899 case e1000_i350: 3900 intr_bitmax = IGB_MAX_TXRXINT_I350; 3901 break; 3902 case e1000_82576: 3903 intr_bitmax = IGB_MAX_TXRXINT_82576; 3904 break; 3905 default: 3906 intr_bitmax = IGB_MIN_TXRXINT; 3907 break; 3908 } 3909 intr_bit = 0; 3910 for (i = 0; i < sc->tx_ring_cnt; ++i) 3911 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 3912 for (i = 0; i < sc->rx_ring_cnt; ++i) 3913 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 3914 sc->sts_intr_bit = 0; 3915 sc->sts_intr_mask = E1000_EICR_OTHER; 3916 3917 /* Initialize interrupt rate */ 3918 sc->intr_rate = IGB_INTR_RATE; 3919 done: 3920 igb_set_ring_inuse(sc, FALSE); 3921 igb_set_intr_mask(sc); 3922 return 0; 3923 } 3924 3925 static void 3926 igb_free_intr(struct igb_softc *sc) 3927 { 3928 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3929 if (sc->intr_res != NULL) { 3930 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 3931 sc->intr_res); 3932 } 3933 if (sc->intr_type == PCI_INTR_TYPE_MSI) 3934 pci_release_msi(sc->dev); 3935 } else { 3936 igb_msix_free(sc, TRUE); 3937 } 3938 } 3939 3940 static void 3941 igb_teardown_intr(struct igb_softc *sc) 3942 { 3943 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 3944 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 3945 else 3946 igb_msix_teardown(sc, sc->msix_cnt); 3947 } 3948 3949 static void 3950 igb_msix_try_alloc(struct igb_softc *sc) 3951 { 3952 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 3953 int i, x, error; 3954 struct igb_msix_data *msix; 3955 boolean_t aggregate, setup = FALSE; 3956 3957 /* 3958 * Don't enable MSI-X on 82575, see: 3959 * 82575 specification update errata #25 3960 */ 3961 if (sc->hw.mac.type == e1000_82575) 3962 return; 3963 3964 /* Don't enable MSI-X on VF */ 3965 if (sc->vf_ifp) 3966 return; 3967 3968 msix_enable = device_getenv_int(sc->dev, "msix.enable", 3969 igb_msix_enable); 3970 if (!msix_enable) 3971 return; 3972 3973 msix_cnt = pci_msix_count(sc->dev); 3974 #ifdef IGB_MSIX_DEBUG 3975 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 3976 #endif 3977 if (msix_cnt <= 1) { 3978 /* One MSI-X model does not make sense */ 3979 return; 3980 } 3981 3982 i = 0; 3983 while ((1 << (i + 1)) <= msix_cnt) 3984 ++i; 3985 msix_cnt2 = 1 << i; 3986 3987 if (bootverbose) { 3988 device_printf(sc->dev, "MSI-X count %d/%d\n", 3989 msix_cnt2, msix_cnt); 3990 } 3991 3992 KKASSERT(msix_cnt2 <= msix_cnt); 3993 if (msix_cnt == msix_cnt2) { 3994 /* We need at least one MSI-X for link status */ 3995 msix_cnt2 >>= 1; 3996 if (msix_cnt2 <= 1) { 3997 /* One MSI-X for RX/TX does not make sense */ 3998 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 3999 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4000 return; 4001 } 4002 KKASSERT(msix_cnt > msix_cnt2); 4003 4004 if (bootverbose) { 4005 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4006 msix_cnt2, msix_cnt); 4007 } 4008 } 4009 4010 sc->rx_ring_msix = sc->rx_ring_cnt; 4011 if (sc->rx_ring_msix > msix_cnt2) 4012 sc->rx_ring_msix = msix_cnt2; 4013 4014 if (msix_cnt >= sc->tx_ring_cnt + sc->rx_ring_msix + 1) { 4015 /* 4016 * Independent TX/RX MSI-X 4017 */ 4018 aggregate = FALSE; 4019 if (bootverbose) 4020 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4021 alloc_cnt = sc->tx_ring_cnt + sc->rx_ring_msix; 4022 } else { 4023 /* 4024 * Aggregate TX/RX MSI-X 4025 */ 4026 aggregate = TRUE; 4027 if (bootverbose) 4028 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4029 alloc_cnt = msix_cnt2; 4030 if (alloc_cnt > ncpus2) 4031 alloc_cnt = ncpus2; 4032 if (sc->rx_ring_msix > alloc_cnt) 4033 sc->rx_ring_msix = alloc_cnt; 4034 } 4035 ++alloc_cnt; /* For link status */ 4036 4037 if (bootverbose) { 4038 device_printf(sc->dev, "MSI-X alloc %d, RX ring %d\n", 4039 alloc_cnt, sc->rx_ring_msix); 4040 } 4041 4042 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4043 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4044 &sc->msix_mem_rid, RF_ACTIVE); 4045 if (sc->msix_mem_res == NULL) { 4046 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4047 return; 4048 } 4049 4050 sc->msix_cnt = alloc_cnt; 4051 sc->msix_data = kmalloc_cachealign( 4052 sizeof(struct igb_msix_data) * sc->msix_cnt, 4053 M_DEVBUF, M_WAITOK | M_ZERO); 4054 for (x = 0; x < sc->msix_cnt; ++x) { 4055 msix = &sc->msix_data[x]; 4056 4057 lwkt_serialize_init(&msix->msix_serialize0); 4058 msix->msix_sc = sc; 4059 msix->msix_rid = -1; 4060 msix->msix_vector = x; 4061 msix->msix_mask = 1 << msix->msix_vector; 4062 msix->msix_rate = IGB_INTR_RATE; 4063 } 4064 4065 x = 0; 4066 if (!aggregate) { 4067 int offset, offset_def; 4068 4069 if (sc->rx_ring_msix == ncpus2) { 4070 offset = 0; 4071 } else { 4072 offset_def = (sc->rx_ring_msix * 4073 device_get_unit(sc->dev)) % ncpus2; 4074 4075 offset = device_getenv_int(sc->dev, 4076 "msix.rxoff", offset_def); 4077 if (offset >= ncpus2 || 4078 offset % sc->rx_ring_msix != 0) { 4079 device_printf(sc->dev, 4080 "invalid msix.rxoff %d, use %d\n", 4081 offset, offset_def); 4082 offset = offset_def; 4083 } 4084 } 4085 4086 /* RX rings */ 4087 for (i = 0; i < sc->rx_ring_msix; ++i) { 4088 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4089 4090 KKASSERT(x < sc->msix_cnt); 4091 msix = &sc->msix_data[x++]; 4092 rxr->rx_intr_bit = msix->msix_vector; 4093 rxr->rx_intr_mask = msix->msix_mask; 4094 4095 msix->msix_serialize = &rxr->rx_serialize; 4096 msix->msix_func = igb_msix_rx; 4097 msix->msix_arg = rxr; 4098 msix->msix_cpuid = i + offset; 4099 KKASSERT(msix->msix_cpuid < ncpus2); 4100 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4101 "%s rx%d", device_get_nameunit(sc->dev), i); 4102 msix->msix_rate = IGB_MSIX_RX_RATE; 4103 ksnprintf(msix->msix_rate_desc, 4104 sizeof(msix->msix_rate_desc), 4105 "RX%d interrupt rate", i); 4106 } 4107 4108 offset_def = device_get_unit(sc->dev) % ncpus2; 4109 offset = device_getenv_int(sc->dev, "msix.txoff", offset_def); 4110 if (offset >= ncpus2) { 4111 device_printf(sc->dev, "invalid msix.txoff %d, " 4112 "use %d\n", offset, offset_def); 4113 offset = offset_def; 4114 } 4115 4116 /* TX rings */ 4117 for (i = 0; i < sc->tx_ring_cnt; ++i) { 4118 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4119 4120 KKASSERT(x < sc->msix_cnt); 4121 msix = &sc->msix_data[x++]; 4122 txr->tx_intr_bit = msix->msix_vector; 4123 txr->tx_intr_mask = msix->msix_mask; 4124 4125 msix->msix_serialize = &txr->tx_serialize; 4126 msix->msix_func = igb_msix_tx; 4127 msix->msix_arg = txr; 4128 msix->msix_cpuid = i + offset; 4129 sc->msix_tx_cpuid = msix->msix_cpuid; /* XXX */ 4130 KKASSERT(msix->msix_cpuid < ncpus2); 4131 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4132 "%s tx%d", device_get_nameunit(sc->dev), i); 4133 msix->msix_rate = IGB_MSIX_TX_RATE; 4134 ksnprintf(msix->msix_rate_desc, 4135 sizeof(msix->msix_rate_desc), 4136 "TX%d interrupt rate", i); 4137 } 4138 } else { 4139 /* TODO */ 4140 error = EOPNOTSUPP; 4141 goto back; 4142 } 4143 4144 /* 4145 * Link status 4146 */ 4147 KKASSERT(x < sc->msix_cnt); 4148 msix = &sc->msix_data[x++]; 4149 sc->sts_intr_bit = msix->msix_vector; 4150 sc->sts_intr_mask = msix->msix_mask; 4151 4152 msix->msix_serialize = &sc->main_serialize; 4153 msix->msix_func = igb_msix_status; 4154 msix->msix_arg = sc; 4155 msix->msix_cpuid = 0; /* TODO tunable */ 4156 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4157 device_get_nameunit(sc->dev)); 4158 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4159 "status interrupt rate"); 4160 4161 KKASSERT(x == sc->msix_cnt); 4162 4163 error = pci_setup_msix(sc->dev); 4164 if (error) { 4165 device_printf(sc->dev, "Setup MSI-X failed\n"); 4166 goto back; 4167 } 4168 setup = TRUE; 4169 4170 for (i = 0; i < sc->msix_cnt; ++i) { 4171 msix = &sc->msix_data[i]; 4172 4173 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4174 &msix->msix_rid, msix->msix_cpuid); 4175 if (error) { 4176 device_printf(sc->dev, 4177 "Unable to allocate MSI-X %d on cpu%d\n", 4178 msix->msix_vector, msix->msix_cpuid); 4179 goto back; 4180 } 4181 4182 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4183 &msix->msix_rid, RF_ACTIVE); 4184 if (msix->msix_res == NULL) { 4185 device_printf(sc->dev, 4186 "Unable to allocate MSI-X %d resource\n", 4187 msix->msix_vector); 4188 error = ENOMEM; 4189 goto back; 4190 } 4191 } 4192 4193 pci_enable_msix(sc->dev); 4194 sc->intr_type = PCI_INTR_TYPE_MSIX; 4195 back: 4196 if (error) 4197 igb_msix_free(sc, setup); 4198 } 4199 4200 static void 4201 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4202 { 4203 int i; 4204 4205 KKASSERT(sc->msix_cnt > 1); 4206 4207 for (i = 0; i < sc->msix_cnt; ++i) { 4208 struct igb_msix_data *msix = &sc->msix_data[i]; 4209 4210 if (msix->msix_res != NULL) { 4211 bus_release_resource(sc->dev, SYS_RES_IRQ, 4212 msix->msix_rid, msix->msix_res); 4213 } 4214 if (msix->msix_rid >= 0) 4215 pci_release_msix_vector(sc->dev, msix->msix_rid); 4216 } 4217 if (setup) 4218 pci_teardown_msix(sc->dev); 4219 4220 sc->msix_cnt = 0; 4221 kfree(sc->msix_data, M_DEVBUF); 4222 sc->msix_data = NULL; 4223 } 4224 4225 static int 4226 igb_msix_setup(struct igb_softc *sc) 4227 { 4228 struct ifnet *ifp = &sc->arpcom.ac_if; 4229 int i; 4230 4231 for (i = 0; i < sc->msix_cnt; ++i) { 4232 struct igb_msix_data *msix = &sc->msix_data[i]; 4233 int error; 4234 4235 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4236 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4237 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4238 if (error) { 4239 device_printf(sc->dev, "could not set up %s " 4240 "interrupt handler.\n", msix->msix_desc); 4241 igb_msix_teardown(sc, i); 4242 return error; 4243 } 4244 } 4245 ifp->if_cpuid = sc->msix_tx_cpuid; 4246 4247 return 0; 4248 } 4249 4250 static void 4251 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4252 { 4253 int i; 4254 4255 for (i = 0; i < msix_cnt; ++i) { 4256 struct igb_msix_data *msix = &sc->msix_data[i]; 4257 4258 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4259 } 4260 } 4261 4262 static void 4263 igb_msix_rx(void *arg) 4264 { 4265 struct igb_rx_ring *rxr = arg; 4266 4267 ASSERT_SERIALIZED(&rxr->rx_serialize); 4268 igb_rxeof(rxr, -1); 4269 4270 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4271 } 4272 4273 static void 4274 igb_msix_tx(void *arg) 4275 { 4276 struct igb_tx_ring *txr = arg; 4277 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 4278 4279 ASSERT_SERIALIZED(&txr->tx_serialize); 4280 4281 igb_txeof(txr); 4282 if (!ifq_is_empty(&ifp->if_snd)) 4283 if_devstart(ifp); 4284 4285 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4286 } 4287 4288 static void 4289 igb_msix_status(void *arg) 4290 { 4291 struct igb_softc *sc = arg; 4292 uint32_t icr; 4293 4294 ASSERT_SERIALIZED(&sc->main_serialize); 4295 4296 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4297 if (icr & E1000_ICR_LSC) { 4298 sc->hw.mac.get_link_status = 1; 4299 igb_update_link_status(sc); 4300 } 4301 4302 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4303 } 4304 4305 static void 4306 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4307 { 4308 if (!IGB_ENABLE_HWRSS(sc)) 4309 return; 4310 4311 if (polling) 4312 sc->rx_ring_inuse = sc->rx_ring_cnt; 4313 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4314 sc->rx_ring_inuse = IGB_MIN_RING_RSS; 4315 else 4316 sc->rx_ring_inuse = sc->rx_ring_msix; 4317 if (bootverbose) { 4318 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d\n", 4319 sc->rx_ring_inuse, sc->rx_ring_cnt); 4320 } 4321 } 4322 4323 static int 4324 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4325 { 4326 int hoff, iphlen, thoff; 4327 struct mbuf *m; 4328 4329 m = *mp; 4330 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4331 4332 iphlen = m->m_pkthdr.csum_iphlen; 4333 thoff = m->m_pkthdr.csum_thlen; 4334 hoff = m->m_pkthdr.csum_lhlen; 4335 4336 KASSERT(iphlen > 0, ("invalid ip hlen")); 4337 KASSERT(thoff > 0, ("invalid tcp hlen")); 4338 KASSERT(hoff > 0, ("invalid ether hlen")); 4339 4340 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4341 m = m_pullup(m, hoff + iphlen + thoff); 4342 if (m == NULL) { 4343 *mp = NULL; 4344 return ENOBUFS; 4345 } 4346 *mp = m; 4347 } 4348 if (txr->sc->flags & IGB_FLAG_TSO_IPLEN0) { 4349 struct ip *ip; 4350 4351 ip = mtodoff(m, struct ip *, hoff); 4352 ip->ip_len = 0; 4353 } 4354 4355 return 0; 4356 } 4357 4358 static void 4359 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4360 { 4361 struct e1000_adv_tx_context_desc *TXD; 4362 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4363 int hoff, ctxd, iphlen, thoff; 4364 4365 iphlen = m->m_pkthdr.csum_iphlen; 4366 thoff = m->m_pkthdr.csum_thlen; 4367 hoff = m->m_pkthdr.csum_lhlen; 4368 4369 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4370 4371 ctxd = txr->next_avail_desc; 4372 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4373 4374 if (m->m_flags & M_VLANTAG) { 4375 uint16_t vlantag; 4376 4377 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4378 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4379 } 4380 4381 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4382 vlan_macip_lens |= iphlen; 4383 4384 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4385 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4386 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4387 4388 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4389 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4390 /* 82575 needs the queue index added */ 4391 if (txr->sc->hw.mac.type == e1000_82575) 4392 mss_l4len_idx |= txr->me << 4; 4393 4394 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4395 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4396 TXD->seqnum_seed = htole32(0); 4397 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4398 4399 /* We've consumed the first desc, adjust counters */ 4400 if (++ctxd == txr->num_tx_desc) 4401 ctxd = 0; 4402 txr->next_avail_desc = ctxd; 4403 --txr->tx_avail; 4404 4405 *hlen = hoff + iphlen + thoff; 4406 } 4407