1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 68 #include <bus/pci/pcivar.h> 69 #include <bus/pci/pcireg.h> 70 71 #include <dev/netif/ig_hal/e1000_api.h> 72 #include <dev/netif/ig_hal/e1000_82575.h> 73 #include <dev/netif/igb/if_igb.h> 74 75 #ifdef IGB_RSS_DEBUG 76 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 77 do { \ 78 if (sc->rss_debug >= lvl) \ 79 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 80 } while (0) 81 #else /* !IGB_RSS_DEBUG */ 82 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 83 #endif /* IGB_RSS_DEBUG */ 84 85 #define IGB_NAME "Intel(R) PRO/1000 " 86 #define IGB_DEVICE(id) \ 87 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 88 #define IGB_DEVICE_NULL { 0, 0, NULL } 89 90 static struct igb_device { 91 uint16_t vid; 92 uint16_t did; 93 const char *desc; 94 } igb_devices[] = { 95 IGB_DEVICE(82575EB_COPPER), 96 IGB_DEVICE(82575EB_FIBER_SERDES), 97 IGB_DEVICE(82575GB_QUAD_COPPER), 98 IGB_DEVICE(82576), 99 IGB_DEVICE(82576_NS), 100 IGB_DEVICE(82576_NS_SERDES), 101 IGB_DEVICE(82576_FIBER), 102 IGB_DEVICE(82576_SERDES), 103 IGB_DEVICE(82576_SERDES_QUAD), 104 IGB_DEVICE(82576_QUAD_COPPER), 105 IGB_DEVICE(82576_QUAD_COPPER_ET2), 106 IGB_DEVICE(82576_VF), 107 IGB_DEVICE(82580_COPPER), 108 IGB_DEVICE(82580_FIBER), 109 IGB_DEVICE(82580_SERDES), 110 IGB_DEVICE(82580_SGMII), 111 IGB_DEVICE(82580_COPPER_DUAL), 112 IGB_DEVICE(82580_QUAD_FIBER), 113 IGB_DEVICE(DH89XXCC_SERDES), 114 IGB_DEVICE(DH89XXCC_SGMII), 115 IGB_DEVICE(DH89XXCC_SFP), 116 IGB_DEVICE(DH89XXCC_BACKPLANE), 117 IGB_DEVICE(I350_COPPER), 118 IGB_DEVICE(I350_FIBER), 119 IGB_DEVICE(I350_SERDES), 120 IGB_DEVICE(I350_SGMII), 121 IGB_DEVICE(I350_VF), 122 IGB_DEVICE(I210_COPPER), 123 IGB_DEVICE(I210_COPPER_IT), 124 IGB_DEVICE(I210_COPPER_OEM1), 125 IGB_DEVICE(I210_COPPER_FLASHLESS), 126 IGB_DEVICE(I210_SERDES_FLASHLESS), 127 IGB_DEVICE(I210_FIBER), 128 IGB_DEVICE(I210_SERDES), 129 IGB_DEVICE(I210_SGMII), 130 IGB_DEVICE(I211_COPPER), 131 IGB_DEVICE(I354_BACKPLANE_1GBPS), 132 IGB_DEVICE(I354_SGMII), 133 134 /* required last entry */ 135 IGB_DEVICE_NULL 136 }; 137 138 static int igb_probe(device_t); 139 static int igb_attach(device_t); 140 static int igb_detach(device_t); 141 static int igb_shutdown(device_t); 142 static int igb_suspend(device_t); 143 static int igb_resume(device_t); 144 145 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 146 static void igb_setup_ifp(struct igb_softc *); 147 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 148 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 149 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 150 static void igb_add_sysctl(struct igb_softc *); 151 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 152 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 153 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 154 static int igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 155 static int igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 156 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 157 static int igb_get_rxring_inuse(const struct igb_softc *, boolean_t); 158 static int igb_get_txring_inuse(const struct igb_softc *, boolean_t); 159 static void igb_set_timer_cpuid(struct igb_softc *, boolean_t); 160 #ifdef IFPOLL_ENABLE 161 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 162 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 163 #endif 164 165 static void igb_vf_init_stats(struct igb_softc *); 166 static void igb_reset(struct igb_softc *); 167 static void igb_update_stats_counters(struct igb_softc *); 168 static void igb_update_vf_stats_counters(struct igb_softc *); 169 static void igb_update_link_status(struct igb_softc *); 170 static void igb_init_tx_unit(struct igb_softc *); 171 static void igb_init_rx_unit(struct igb_softc *); 172 173 static void igb_set_vlan(struct igb_softc *); 174 static void igb_set_multi(struct igb_softc *); 175 static void igb_set_promisc(struct igb_softc *); 176 static void igb_disable_promisc(struct igb_softc *); 177 178 static int igb_alloc_rings(struct igb_softc *); 179 static void igb_free_rings(struct igb_softc *); 180 static int igb_create_tx_ring(struct igb_tx_ring *); 181 static int igb_create_rx_ring(struct igb_rx_ring *); 182 static void igb_free_tx_ring(struct igb_tx_ring *); 183 static void igb_free_rx_ring(struct igb_rx_ring *); 184 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 185 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 186 static void igb_init_tx_ring(struct igb_tx_ring *); 187 static int igb_init_rx_ring(struct igb_rx_ring *); 188 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 189 static int igb_encap(struct igb_tx_ring *, struct mbuf **, int *, int *); 190 static void igb_rx_refresh(struct igb_rx_ring *, int); 191 static void igb_setup_serializer(struct igb_softc *); 192 193 static void igb_stop(struct igb_softc *); 194 static void igb_init(void *); 195 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 196 static void igb_media_status(struct ifnet *, struct ifmediareq *); 197 static int igb_media_change(struct ifnet *); 198 static void igb_timer(void *); 199 static void igb_watchdog(struct ifaltq_subque *); 200 static void igb_start(struct ifnet *, struct ifaltq_subque *); 201 #ifdef IFPOLL_ENABLE 202 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 203 static void igb_npoll_rx(struct ifnet *, void *, int); 204 static void igb_npoll_tx(struct ifnet *, void *, int); 205 static void igb_npoll_status(struct ifnet *); 206 #endif 207 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 208 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 209 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 210 #ifdef INVARIANTS 211 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 212 boolean_t); 213 #endif 214 215 static void igb_intr(void *); 216 static void igb_intr_shared(void *); 217 static void igb_rxeof(struct igb_rx_ring *, int); 218 static void igb_txeof(struct igb_tx_ring *); 219 static void igb_set_eitr(struct igb_softc *, int, int); 220 static void igb_enable_intr(struct igb_softc *); 221 static void igb_disable_intr(struct igb_softc *); 222 static void igb_init_unshared_intr(struct igb_softc *); 223 static void igb_init_intr(struct igb_softc *); 224 static int igb_setup_intr(struct igb_softc *); 225 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 226 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 227 static void igb_set_intr_mask(struct igb_softc *); 228 static int igb_alloc_intr(struct igb_softc *); 229 static void igb_free_intr(struct igb_softc *); 230 static void igb_teardown_intr(struct igb_softc *); 231 static void igb_msix_try_alloc(struct igb_softc *); 232 static void igb_msix_rx_conf(struct igb_softc *, int, int *, int); 233 static void igb_msix_tx_conf(struct igb_softc *, int, int *, int); 234 static void igb_msix_free(struct igb_softc *, boolean_t); 235 static int igb_msix_setup(struct igb_softc *); 236 static void igb_msix_teardown(struct igb_softc *, int); 237 static void igb_msix_rx(void *); 238 static void igb_msix_tx(void *); 239 static void igb_msix_status(void *); 240 static void igb_msix_rxtx(void *); 241 242 /* Management and WOL Support */ 243 static void igb_get_mgmt(struct igb_softc *); 244 static void igb_rel_mgmt(struct igb_softc *); 245 static void igb_get_hw_control(struct igb_softc *); 246 static void igb_rel_hw_control(struct igb_softc *); 247 static void igb_enable_wol(device_t); 248 249 static device_method_t igb_methods[] = { 250 /* Device interface */ 251 DEVMETHOD(device_probe, igb_probe), 252 DEVMETHOD(device_attach, igb_attach), 253 DEVMETHOD(device_detach, igb_detach), 254 DEVMETHOD(device_shutdown, igb_shutdown), 255 DEVMETHOD(device_suspend, igb_suspend), 256 DEVMETHOD(device_resume, igb_resume), 257 DEVMETHOD_END 258 }; 259 260 static driver_t igb_driver = { 261 "igb", 262 igb_methods, 263 sizeof(struct igb_softc), 264 }; 265 266 static devclass_t igb_devclass; 267 268 DECLARE_DUMMY_MODULE(if_igb); 269 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 270 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 271 272 static int igb_rxd = IGB_DEFAULT_RXD; 273 static int igb_txd = IGB_DEFAULT_TXD; 274 static int igb_rxr = 0; 275 static int igb_txr = 0; 276 static int igb_msi_enable = 1; 277 static int igb_msix_enable = 1; 278 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 279 static int igb_fc_setting = e1000_fc_full; 280 281 /* 282 * DMA Coalescing, only for i350 - default to off, 283 * this feature is for power savings 284 */ 285 static int igb_dma_coalesce = 0; 286 287 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 288 TUNABLE_INT("hw.igb.txd", &igb_txd); 289 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 290 TUNABLE_INT("hw.igb.txr", &igb_txr); 291 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 292 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 293 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 294 295 /* i350 specific */ 296 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 297 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 298 299 static __inline void 300 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 301 { 302 /* Ignore Checksum bit is set */ 303 if (staterr & E1000_RXD_STAT_IXSM) 304 return; 305 306 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 307 E1000_RXD_STAT_IPCS) 308 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 309 310 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 311 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 312 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 313 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 314 mp->m_pkthdr.csum_data = htons(0xffff); 315 } 316 } 317 } 318 319 static __inline struct pktinfo * 320 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 321 uint32_t hash, uint32_t hashtype, uint32_t staterr) 322 { 323 switch (hashtype) { 324 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 325 pi->pi_netisr = NETISR_IP; 326 pi->pi_flags = 0; 327 pi->pi_l3proto = IPPROTO_TCP; 328 break; 329 330 case E1000_RXDADV_RSSTYPE_IPV4: 331 if (staterr & E1000_RXD_STAT_IXSM) 332 return NULL; 333 334 if ((staterr & 335 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 336 E1000_RXD_STAT_TCPCS) { 337 pi->pi_netisr = NETISR_IP; 338 pi->pi_flags = 0; 339 pi->pi_l3proto = IPPROTO_UDP; 340 break; 341 } 342 /* FALL THROUGH */ 343 default: 344 return NULL; 345 } 346 347 m->m_flags |= M_HASH; 348 m->m_pkthdr.hash = toeplitz_hash(hash); 349 return pi; 350 } 351 352 static int 353 igb_probe(device_t dev) 354 { 355 const struct igb_device *d; 356 uint16_t vid, did; 357 358 vid = pci_get_vendor(dev); 359 did = pci_get_device(dev); 360 361 for (d = igb_devices; d->desc != NULL; ++d) { 362 if (vid == d->vid && did == d->did) { 363 device_set_desc(dev, d->desc); 364 return 0; 365 } 366 } 367 return ENXIO; 368 } 369 370 static int 371 igb_attach(device_t dev) 372 { 373 struct igb_softc *sc = device_get_softc(dev); 374 uint16_t eeprom_data; 375 int error = 0, ring_max; 376 #ifdef IFPOLL_ENABLE 377 int offset, offset_def; 378 #endif 379 380 #ifdef notyet 381 /* SYSCTL stuff */ 382 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 383 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 384 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 385 igb_sysctl_nvm_info, "I", "NVM Information"); 386 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 387 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 388 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 389 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 390 #endif 391 392 callout_init_mp(&sc->timer); 393 lwkt_serialize_init(&sc->main_serialize); 394 395 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 396 device_get_unit(dev)); 397 sc->dev = sc->osdep.dev = dev; 398 399 /* 400 * Determine hardware and mac type 401 */ 402 sc->hw.vendor_id = pci_get_vendor(dev); 403 sc->hw.device_id = pci_get_device(dev); 404 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 405 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 406 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 407 408 if (e1000_set_mac_type(&sc->hw)) 409 return ENXIO; 410 411 /* Are we a VF device? */ 412 if (sc->hw.mac.type == e1000_vfadapt || 413 sc->hw.mac.type == e1000_vfadapt_i350) 414 sc->vf_ifp = 1; 415 else 416 sc->vf_ifp = 0; 417 418 /* 419 * Configure total supported RX/TX ring count 420 */ 421 switch (sc->hw.mac.type) { 422 case e1000_82575: 423 ring_max = IGB_MAX_RING_82575; 424 break; 425 426 case e1000_82576: 427 ring_max = IGB_MAX_RING_82576; 428 break; 429 430 case e1000_82580: 431 ring_max = IGB_MAX_RING_82580; 432 break; 433 434 case e1000_i350: 435 ring_max = IGB_MAX_RING_I350; 436 break; 437 438 case e1000_i354: 439 ring_max = IGB_MAX_RING_I354; 440 break; 441 442 case e1000_i210: 443 ring_max = IGB_MAX_RING_I210; 444 break; 445 446 case e1000_i211: 447 ring_max = IGB_MAX_RING_I211; 448 break; 449 450 default: 451 ring_max = IGB_MIN_RING; 452 break; 453 } 454 455 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 456 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 457 #ifdef IGB_RSS_DEBUG 458 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 459 #endif 460 sc->rx_ring_inuse = sc->rx_ring_cnt; 461 462 sc->tx_ring_cnt = device_getenv_int(dev, "txr", igb_txr); 463 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_max); 464 #ifdef IGB_TSS_DEBUG 465 sc->tx_ring_cnt = device_getenv_int(dev, "txr_debug", sc->tx_ring_cnt); 466 #endif 467 sc->tx_ring_inuse = sc->tx_ring_cnt; 468 469 /* Enable bus mastering */ 470 pci_enable_busmaster(dev); 471 472 /* 473 * Allocate IO memory 474 */ 475 sc->mem_rid = PCIR_BAR(0); 476 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 477 RF_ACTIVE); 478 if (sc->mem_res == NULL) { 479 device_printf(dev, "Unable to allocate bus resource: memory\n"); 480 error = ENXIO; 481 goto failed; 482 } 483 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 484 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 485 486 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 487 488 /* Save PCI command register for Shared Code */ 489 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 490 sc->hw.back = &sc->osdep; 491 492 /* Do Shared Code initialization */ 493 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 494 device_printf(dev, "Setup of Shared code failed\n"); 495 error = ENXIO; 496 goto failed; 497 } 498 499 e1000_get_bus_info(&sc->hw); 500 501 sc->hw.mac.autoneg = DO_AUTO_NEG; 502 sc->hw.phy.autoneg_wait_to_complete = FALSE; 503 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 504 505 /* Copper options */ 506 if (sc->hw.phy.media_type == e1000_media_type_copper) { 507 sc->hw.phy.mdix = AUTO_ALL_MODES; 508 sc->hw.phy.disable_polarity_correction = FALSE; 509 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 510 } 511 512 /* Set the frame limits assuming standard ethernet sized frames. */ 513 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 514 515 /* Allocate RX/TX rings */ 516 error = igb_alloc_rings(sc); 517 if (error) 518 goto failed; 519 520 #ifdef IFPOLL_ENABLE 521 /* 522 * NPOLLING RX CPU offset 523 */ 524 if (sc->rx_ring_cnt == ncpus2) { 525 offset = 0; 526 } else { 527 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 528 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 529 if (offset >= ncpus2 || 530 offset % sc->rx_ring_cnt != 0) { 531 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 532 offset, offset_def); 533 offset = offset_def; 534 } 535 } 536 sc->rx_npoll_off = offset; 537 538 /* 539 * NPOLLING TX CPU offset 540 */ 541 if (sc->tx_ring_cnt == ncpus2) { 542 offset = 0; 543 } else { 544 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 545 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 546 if (offset >= ncpus2 || 547 offset % sc->tx_ring_cnt != 0) { 548 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 549 offset, offset_def); 550 offset = offset_def; 551 } 552 } 553 sc->tx_npoll_off = offset; 554 #endif 555 556 /* Allocate interrupt */ 557 error = igb_alloc_intr(sc); 558 if (error) 559 goto failed; 560 561 /* Setup serializers */ 562 igb_setup_serializer(sc); 563 564 /* Allocate the appropriate stats memory */ 565 if (sc->vf_ifp) { 566 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 567 M_WAITOK | M_ZERO); 568 igb_vf_init_stats(sc); 569 } else { 570 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 571 M_WAITOK | M_ZERO); 572 } 573 574 /* Allocate multicast array memory. */ 575 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 576 M_DEVBUF, M_WAITOK); 577 578 /* Some adapter-specific advanced features */ 579 if (sc->hw.mac.type >= e1000_i350) { 580 #ifdef notyet 581 igb_set_sysctl_value(adapter, "dma_coalesce", 582 "configure dma coalesce", 583 &adapter->dma_coalesce, igb_dma_coalesce); 584 igb_set_sysctl_value(adapter, "eee_disabled", 585 "enable Energy Efficient Ethernet", 586 &adapter->hw.dev_spec._82575.eee_disable, 587 igb_eee_disabled); 588 #else 589 sc->dma_coalesce = igb_dma_coalesce; 590 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 591 #endif 592 if (sc->hw.phy.media_type == e1000_media_type_copper) { 593 if (sc->hw.mac.type == e1000_i354) 594 e1000_set_eee_i354(&sc->hw); 595 else 596 e1000_set_eee_i350(&sc->hw); 597 } 598 } 599 600 /* 601 * Start from a known state, this is important in reading the nvm and 602 * mac from that. 603 */ 604 e1000_reset_hw(&sc->hw); 605 606 /* Make sure we have a good EEPROM before we read from it */ 607 if (sc->hw.mac.type != e1000_i210 && sc->hw.mac.type != e1000_i211 && 608 e1000_validate_nvm_checksum(&sc->hw) < 0) { 609 /* 610 * Some PCI-E parts fail the first check due to 611 * the link being in sleep state, call it again, 612 * if it fails a second time its a real issue. 613 */ 614 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 615 device_printf(dev, 616 "The EEPROM Checksum Is Not Valid\n"); 617 error = EIO; 618 goto failed; 619 } 620 } 621 622 /* Copy the permanent MAC address out of the EEPROM */ 623 if (e1000_read_mac_addr(&sc->hw) < 0) { 624 device_printf(dev, "EEPROM read error while reading MAC" 625 " address\n"); 626 error = EIO; 627 goto failed; 628 } 629 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 630 device_printf(dev, "Invalid MAC address\n"); 631 error = EIO; 632 goto failed; 633 } 634 635 /* Setup OS specific network interface */ 636 igb_setup_ifp(sc); 637 638 /* Add sysctl tree, must after igb_setup_ifp() */ 639 igb_add_sysctl(sc); 640 641 /* Now get a good starting state */ 642 igb_reset(sc); 643 644 /* Initialize statistics */ 645 igb_update_stats_counters(sc); 646 647 sc->hw.mac.get_link_status = 1; 648 igb_update_link_status(sc); 649 650 /* Indicate SOL/IDER usage */ 651 if (e1000_check_reset_block(&sc->hw)) { 652 device_printf(dev, 653 "PHY reset is blocked due to SOL/IDER session.\n"); 654 } 655 656 /* Determine if we have to control management hardware */ 657 if (e1000_enable_mng_pass_thru(&sc->hw)) 658 sc->flags |= IGB_FLAG_HAS_MGMT; 659 660 /* 661 * Setup Wake-on-Lan 662 */ 663 /* APME bit in EEPROM is mapped to WUC.APME */ 664 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 665 if (eeprom_data) 666 sc->wol = E1000_WUFC_MAG; 667 /* XXX disable WOL */ 668 sc->wol = 0; 669 670 #ifdef notyet 671 /* Register for VLAN events */ 672 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 673 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 674 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 675 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 676 #endif 677 678 #ifdef notyet 679 igb_add_hw_stats(adapter); 680 #endif 681 682 /* 683 * Disable interrupt to prevent spurious interrupts (line based 684 * interrupt, MSI or even MSI-X), which had been observed on 685 * several types of LOMs, from being handled. 686 */ 687 igb_disable_intr(sc); 688 689 error = igb_setup_intr(sc); 690 if (error) { 691 ether_ifdetach(&sc->arpcom.ac_if); 692 goto failed; 693 } 694 return 0; 695 696 failed: 697 igb_detach(dev); 698 return error; 699 } 700 701 static int 702 igb_detach(device_t dev) 703 { 704 struct igb_softc *sc = device_get_softc(dev); 705 706 if (device_is_attached(dev)) { 707 struct ifnet *ifp = &sc->arpcom.ac_if; 708 709 ifnet_serialize_all(ifp); 710 711 igb_stop(sc); 712 713 e1000_phy_hw_reset(&sc->hw); 714 715 /* Give control back to firmware */ 716 igb_rel_mgmt(sc); 717 igb_rel_hw_control(sc); 718 719 if (sc->wol) { 720 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 721 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 722 igb_enable_wol(dev); 723 } 724 725 igb_teardown_intr(sc); 726 727 ifnet_deserialize_all(ifp); 728 729 ether_ifdetach(ifp); 730 } else if (sc->mem_res != NULL) { 731 igb_rel_hw_control(sc); 732 } 733 bus_generic_detach(dev); 734 735 igb_free_intr(sc); 736 737 if (sc->msix_mem_res != NULL) { 738 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 739 sc->msix_mem_res); 740 } 741 if (sc->mem_res != NULL) { 742 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 743 sc->mem_res); 744 } 745 746 igb_free_rings(sc); 747 748 if (sc->mta != NULL) 749 kfree(sc->mta, M_DEVBUF); 750 if (sc->stats != NULL) 751 kfree(sc->stats, M_DEVBUF); 752 if (sc->serializes != NULL) 753 kfree(sc->serializes, M_DEVBUF); 754 755 return 0; 756 } 757 758 static int 759 igb_shutdown(device_t dev) 760 { 761 return igb_suspend(dev); 762 } 763 764 static int 765 igb_suspend(device_t dev) 766 { 767 struct igb_softc *sc = device_get_softc(dev); 768 struct ifnet *ifp = &sc->arpcom.ac_if; 769 770 ifnet_serialize_all(ifp); 771 772 igb_stop(sc); 773 774 igb_rel_mgmt(sc); 775 igb_rel_hw_control(sc); 776 777 if (sc->wol) { 778 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 779 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 780 igb_enable_wol(dev); 781 } 782 783 ifnet_deserialize_all(ifp); 784 785 return bus_generic_suspend(dev); 786 } 787 788 static int 789 igb_resume(device_t dev) 790 { 791 struct igb_softc *sc = device_get_softc(dev); 792 struct ifnet *ifp = &sc->arpcom.ac_if; 793 int i; 794 795 ifnet_serialize_all(ifp); 796 797 igb_init(sc); 798 igb_get_mgmt(sc); 799 800 for (i = 0; i < sc->tx_ring_inuse; ++i) 801 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 802 803 ifnet_deserialize_all(ifp); 804 805 return bus_generic_resume(dev); 806 } 807 808 static int 809 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 810 { 811 struct igb_softc *sc = ifp->if_softc; 812 struct ifreq *ifr = (struct ifreq *)data; 813 int max_frame_size, mask, reinit; 814 int error = 0; 815 816 ASSERT_IFNET_SERIALIZED_ALL(ifp); 817 818 switch (command) { 819 case SIOCSIFMTU: 820 max_frame_size = 9234; 821 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 822 ETHER_CRC_LEN) { 823 error = EINVAL; 824 break; 825 } 826 827 ifp->if_mtu = ifr->ifr_mtu; 828 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 829 ETHER_CRC_LEN; 830 831 if (ifp->if_flags & IFF_RUNNING) 832 igb_init(sc); 833 break; 834 835 case SIOCSIFFLAGS: 836 if (ifp->if_flags & IFF_UP) { 837 if (ifp->if_flags & IFF_RUNNING) { 838 if ((ifp->if_flags ^ sc->if_flags) & 839 (IFF_PROMISC | IFF_ALLMULTI)) { 840 igb_disable_promisc(sc); 841 igb_set_promisc(sc); 842 } 843 } else { 844 igb_init(sc); 845 } 846 } else if (ifp->if_flags & IFF_RUNNING) { 847 igb_stop(sc); 848 } 849 sc->if_flags = ifp->if_flags; 850 break; 851 852 case SIOCADDMULTI: 853 case SIOCDELMULTI: 854 if (ifp->if_flags & IFF_RUNNING) { 855 igb_disable_intr(sc); 856 igb_set_multi(sc); 857 #ifdef IFPOLL_ENABLE 858 if (!(ifp->if_flags & IFF_NPOLLING)) 859 #endif 860 igb_enable_intr(sc); 861 } 862 break; 863 864 case SIOCSIFMEDIA: 865 /* Check SOL/IDER usage */ 866 if (e1000_check_reset_block(&sc->hw)) { 867 if_printf(ifp, "Media change is " 868 "blocked due to SOL/IDER session.\n"); 869 break; 870 } 871 /* FALL THROUGH */ 872 873 case SIOCGIFMEDIA: 874 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 875 break; 876 877 case SIOCSIFCAP: 878 reinit = 0; 879 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 880 if (mask & IFCAP_RXCSUM) { 881 ifp->if_capenable ^= IFCAP_RXCSUM; 882 reinit = 1; 883 } 884 if (mask & IFCAP_VLAN_HWTAGGING) { 885 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 886 reinit = 1; 887 } 888 if (mask & IFCAP_TXCSUM) { 889 ifp->if_capenable ^= IFCAP_TXCSUM; 890 if (ifp->if_capenable & IFCAP_TXCSUM) 891 ifp->if_hwassist |= IGB_CSUM_FEATURES; 892 else 893 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 894 } 895 if (mask & IFCAP_TSO) { 896 ifp->if_capenable ^= IFCAP_TSO; 897 if (ifp->if_capenable & IFCAP_TSO) 898 ifp->if_hwassist |= CSUM_TSO; 899 else 900 ifp->if_hwassist &= ~CSUM_TSO; 901 } 902 if (mask & IFCAP_RSS) 903 ifp->if_capenable ^= IFCAP_RSS; 904 if (reinit && (ifp->if_flags & IFF_RUNNING)) 905 igb_init(sc); 906 break; 907 908 default: 909 error = ether_ioctl(ifp, command, data); 910 break; 911 } 912 return error; 913 } 914 915 static void 916 igb_init(void *xsc) 917 { 918 struct igb_softc *sc = xsc; 919 struct ifnet *ifp = &sc->arpcom.ac_if; 920 boolean_t polling; 921 int i; 922 923 ASSERT_IFNET_SERIALIZED_ALL(ifp); 924 925 igb_stop(sc); 926 927 /* Get the latest mac address, User can use a LAA */ 928 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 929 930 /* Put the address into the Receive Address Array */ 931 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 932 933 igb_reset(sc); 934 igb_update_link_status(sc); 935 936 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 937 938 /* Configure for OS presence */ 939 igb_get_mgmt(sc); 940 941 polling = FALSE; 942 #ifdef IFPOLL_ENABLE 943 if (ifp->if_flags & IFF_NPOLLING) 944 polling = TRUE; 945 #endif 946 947 /* Configured used RX/TX rings */ 948 igb_set_ring_inuse(sc, polling); 949 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 950 951 /* Initialize interrupt */ 952 igb_init_intr(sc); 953 954 /* Prepare transmit descriptors and buffers */ 955 for (i = 0; i < sc->tx_ring_inuse; ++i) 956 igb_init_tx_ring(&sc->tx_rings[i]); 957 igb_init_tx_unit(sc); 958 959 /* Setup Multicast table */ 960 igb_set_multi(sc); 961 962 #if 0 963 /* 964 * Figure out the desired mbuf pool 965 * for doing jumbo/packetsplit 966 */ 967 if (adapter->max_frame_size <= 2048) 968 adapter->rx_mbuf_sz = MCLBYTES; 969 else if (adapter->max_frame_size <= 4096) 970 adapter->rx_mbuf_sz = MJUMPAGESIZE; 971 else 972 adapter->rx_mbuf_sz = MJUM9BYTES; 973 #endif 974 975 /* Prepare receive descriptors and buffers */ 976 for (i = 0; i < sc->rx_ring_inuse; ++i) { 977 int error; 978 979 error = igb_init_rx_ring(&sc->rx_rings[i]); 980 if (error) { 981 if_printf(ifp, "Could not setup receive structures\n"); 982 igb_stop(sc); 983 return; 984 } 985 } 986 igb_init_rx_unit(sc); 987 988 /* Enable VLAN support */ 989 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 990 igb_set_vlan(sc); 991 992 /* Don't lose promiscuous settings */ 993 igb_set_promisc(sc); 994 995 ifp->if_flags |= IFF_RUNNING; 996 for (i = 0; i < sc->tx_ring_inuse; ++i) { 997 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 998 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 999 } 1000 1001 igb_set_timer_cpuid(sc, polling); 1002 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1003 e1000_clear_hw_cntrs_base_generic(&sc->hw); 1004 1005 /* This clears any pending interrupts */ 1006 E1000_READ_REG(&sc->hw, E1000_ICR); 1007 1008 /* 1009 * Only enable interrupts if we are not polling, make sure 1010 * they are off otherwise. 1011 */ 1012 if (polling) { 1013 igb_disable_intr(sc); 1014 } else { 1015 igb_enable_intr(sc); 1016 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 1017 } 1018 1019 /* Set Energy Efficient Ethernet */ 1020 if (sc->hw.phy.media_type == e1000_media_type_copper) { 1021 if (sc->hw.mac.type == e1000_i354) 1022 e1000_set_eee_i354(&sc->hw); 1023 else 1024 e1000_set_eee_i350(&sc->hw); 1025 } 1026 } 1027 1028 static void 1029 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1030 { 1031 struct igb_softc *sc = ifp->if_softc; 1032 1033 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1034 1035 if ((ifp->if_flags & IFF_RUNNING) == 0) 1036 sc->hw.mac.get_link_status = 1; 1037 igb_update_link_status(sc); 1038 1039 ifmr->ifm_status = IFM_AVALID; 1040 ifmr->ifm_active = IFM_ETHER; 1041 1042 if (!sc->link_active) 1043 return; 1044 1045 ifmr->ifm_status |= IFM_ACTIVE; 1046 1047 switch (sc->link_speed) { 1048 case 10: 1049 ifmr->ifm_active |= IFM_10_T; 1050 break; 1051 1052 case 100: 1053 /* 1054 * Support for 100Mb SFP - these are Fiber 1055 * but the media type appears as serdes 1056 */ 1057 if (sc->hw.phy.media_type == e1000_media_type_internal_serdes) 1058 ifmr->ifm_active |= IFM_100_FX; 1059 else 1060 ifmr->ifm_active |= IFM_100_TX; 1061 break; 1062 1063 case 1000: 1064 ifmr->ifm_active |= IFM_1000_T; 1065 break; 1066 } 1067 1068 if (sc->link_duplex == FULL_DUPLEX) 1069 ifmr->ifm_active |= IFM_FDX; 1070 else 1071 ifmr->ifm_active |= IFM_HDX; 1072 } 1073 1074 static int 1075 igb_media_change(struct ifnet *ifp) 1076 { 1077 struct igb_softc *sc = ifp->if_softc; 1078 struct ifmedia *ifm = &sc->media; 1079 1080 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1081 1082 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1083 return EINVAL; 1084 1085 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1086 case IFM_AUTO: 1087 sc->hw.mac.autoneg = DO_AUTO_NEG; 1088 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1089 break; 1090 1091 case IFM_1000_LX: 1092 case IFM_1000_SX: 1093 case IFM_1000_T: 1094 sc->hw.mac.autoneg = DO_AUTO_NEG; 1095 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1096 break; 1097 1098 case IFM_100_TX: 1099 sc->hw.mac.autoneg = FALSE; 1100 sc->hw.phy.autoneg_advertised = 0; 1101 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1102 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1103 else 1104 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1105 break; 1106 1107 case IFM_10_T: 1108 sc->hw.mac.autoneg = FALSE; 1109 sc->hw.phy.autoneg_advertised = 0; 1110 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1111 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1112 else 1113 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1114 break; 1115 1116 default: 1117 if_printf(ifp, "Unsupported media type\n"); 1118 break; 1119 } 1120 1121 igb_init(sc); 1122 1123 return 0; 1124 } 1125 1126 static void 1127 igb_set_promisc(struct igb_softc *sc) 1128 { 1129 struct ifnet *ifp = &sc->arpcom.ac_if; 1130 struct e1000_hw *hw = &sc->hw; 1131 uint32_t reg; 1132 1133 if (sc->vf_ifp) { 1134 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1135 return; 1136 } 1137 1138 reg = E1000_READ_REG(hw, E1000_RCTL); 1139 if (ifp->if_flags & IFF_PROMISC) { 1140 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1141 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1142 } else if (ifp->if_flags & IFF_ALLMULTI) { 1143 reg |= E1000_RCTL_MPE; 1144 reg &= ~E1000_RCTL_UPE; 1145 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1146 } 1147 } 1148 1149 static void 1150 igb_disable_promisc(struct igb_softc *sc) 1151 { 1152 struct e1000_hw *hw = &sc->hw; 1153 struct ifnet *ifp = &sc->arpcom.ac_if; 1154 uint32_t reg; 1155 int mcnt = 0; 1156 1157 if (sc->vf_ifp) { 1158 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1159 return; 1160 } 1161 reg = E1000_READ_REG(hw, E1000_RCTL); 1162 reg &= ~E1000_RCTL_UPE; 1163 if (ifp->if_flags & IFF_ALLMULTI) { 1164 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 1165 } else { 1166 struct ifmultiaddr *ifma; 1167 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1168 if (ifma->ifma_addr->sa_family != AF_LINK) 1169 continue; 1170 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1171 break; 1172 mcnt++; 1173 } 1174 } 1175 /* Don't disable if in MAX groups */ 1176 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 1177 reg &= ~E1000_RCTL_MPE; 1178 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1179 } 1180 1181 static void 1182 igb_set_multi(struct igb_softc *sc) 1183 { 1184 struct ifnet *ifp = &sc->arpcom.ac_if; 1185 struct ifmultiaddr *ifma; 1186 uint32_t reg_rctl = 0; 1187 uint8_t *mta; 1188 int mcnt = 0; 1189 1190 mta = sc->mta; 1191 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1192 1193 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1194 if (ifma->ifma_addr->sa_family != AF_LINK) 1195 continue; 1196 1197 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1198 break; 1199 1200 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1201 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1202 mcnt++; 1203 } 1204 1205 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1206 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1207 reg_rctl |= E1000_RCTL_MPE; 1208 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1209 } else { 1210 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1211 } 1212 } 1213 1214 static void 1215 igb_timer(void *xsc) 1216 { 1217 struct igb_softc *sc = xsc; 1218 1219 lwkt_serialize_enter(&sc->main_serialize); 1220 1221 igb_update_link_status(sc); 1222 igb_update_stats_counters(sc); 1223 1224 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1225 1226 lwkt_serialize_exit(&sc->main_serialize); 1227 } 1228 1229 static void 1230 igb_update_link_status(struct igb_softc *sc) 1231 { 1232 struct ifnet *ifp = &sc->arpcom.ac_if; 1233 struct e1000_hw *hw = &sc->hw; 1234 uint32_t link_check, thstat, ctrl; 1235 1236 link_check = thstat = ctrl = 0; 1237 1238 /* Get the cached link value or read for real */ 1239 switch (hw->phy.media_type) { 1240 case e1000_media_type_copper: 1241 if (hw->mac.get_link_status) { 1242 /* Do the work to read phy */ 1243 e1000_check_for_link(hw); 1244 link_check = !hw->mac.get_link_status; 1245 } else { 1246 link_check = TRUE; 1247 } 1248 break; 1249 1250 case e1000_media_type_fiber: 1251 e1000_check_for_link(hw); 1252 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1253 break; 1254 1255 case e1000_media_type_internal_serdes: 1256 e1000_check_for_link(hw); 1257 link_check = hw->mac.serdes_has_link; 1258 break; 1259 1260 /* VF device is type_unknown */ 1261 case e1000_media_type_unknown: 1262 e1000_check_for_link(hw); 1263 link_check = !hw->mac.get_link_status; 1264 /* Fall thru */ 1265 default: 1266 break; 1267 } 1268 1269 /* Check for thermal downshift or shutdown */ 1270 if (hw->mac.type == e1000_i350) { 1271 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1272 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1273 } 1274 1275 /* Now we check if a transition has happened */ 1276 if (link_check && sc->link_active == 0) { 1277 e1000_get_speed_and_duplex(hw, 1278 &sc->link_speed, &sc->link_duplex); 1279 if (bootverbose) { 1280 const char *flowctl; 1281 1282 /* Get the flow control for display */ 1283 switch (hw->fc.current_mode) { 1284 case e1000_fc_rx_pause: 1285 flowctl = "RX"; 1286 break; 1287 1288 case e1000_fc_tx_pause: 1289 flowctl = "TX"; 1290 break; 1291 1292 case e1000_fc_full: 1293 flowctl = "Full"; 1294 break; 1295 1296 default: 1297 flowctl = "None"; 1298 break; 1299 } 1300 1301 if_printf(ifp, "Link is up %d Mbps %s, " 1302 "Flow control: %s\n", 1303 sc->link_speed, 1304 sc->link_duplex == FULL_DUPLEX ? 1305 "Full Duplex" : "Half Duplex", 1306 flowctl); 1307 } 1308 sc->link_active = 1; 1309 1310 ifp->if_baudrate = sc->link_speed * 1000000; 1311 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1312 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1313 if_printf(ifp, "Link: thermal downshift\n"); 1314 /* Delay Link Up for Phy update */ 1315 if ((hw->mac.type == e1000_i210 || 1316 hw->mac.type == e1000_i211) && 1317 hw->phy.id == I210_I_PHY_ID) 1318 msec_delay(IGB_I210_LINK_DELAY); 1319 /* This can sleep */ 1320 ifp->if_link_state = LINK_STATE_UP; 1321 if_link_state_change(ifp); 1322 } else if (!link_check && sc->link_active == 1) { 1323 ifp->if_baudrate = sc->link_speed = 0; 1324 sc->link_duplex = 0; 1325 if (bootverbose) 1326 if_printf(ifp, "Link is Down\n"); 1327 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1328 (thstat & E1000_THSTAT_PWR_DOWN)) 1329 if_printf(ifp, "Link: thermal shutdown\n"); 1330 sc->link_active = 0; 1331 /* This can sleep */ 1332 ifp->if_link_state = LINK_STATE_DOWN; 1333 if_link_state_change(ifp); 1334 } 1335 } 1336 1337 static void 1338 igb_stop(struct igb_softc *sc) 1339 { 1340 struct ifnet *ifp = &sc->arpcom.ac_if; 1341 int i; 1342 1343 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1344 1345 igb_disable_intr(sc); 1346 1347 callout_stop(&sc->timer); 1348 1349 ifp->if_flags &= ~IFF_RUNNING; 1350 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1351 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1352 ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog); 1353 sc->tx_rings[i].tx_flags &= ~IGB_TXFLAG_ENABLED; 1354 } 1355 1356 e1000_reset_hw(&sc->hw); 1357 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1358 1359 e1000_led_off(&sc->hw); 1360 e1000_cleanup_led(&sc->hw); 1361 1362 for (i = 0; i < sc->tx_ring_cnt; ++i) 1363 igb_free_tx_ring(&sc->tx_rings[i]); 1364 for (i = 0; i < sc->rx_ring_cnt; ++i) 1365 igb_free_rx_ring(&sc->rx_rings[i]); 1366 } 1367 1368 static void 1369 igb_reset(struct igb_softc *sc) 1370 { 1371 struct ifnet *ifp = &sc->arpcom.ac_if; 1372 struct e1000_hw *hw = &sc->hw; 1373 struct e1000_fc_info *fc = &hw->fc; 1374 uint32_t pba = 0; 1375 uint16_t hwm; 1376 1377 /* Let the firmware know the OS is in control */ 1378 igb_get_hw_control(sc); 1379 1380 /* 1381 * Packet Buffer Allocation (PBA) 1382 * Writing PBA sets the receive portion of the buffer 1383 * the remainder is used for the transmit buffer. 1384 */ 1385 switch (hw->mac.type) { 1386 case e1000_82575: 1387 pba = E1000_PBA_32K; 1388 break; 1389 1390 case e1000_82576: 1391 case e1000_vfadapt: 1392 pba = E1000_READ_REG(hw, E1000_RXPBS); 1393 pba &= E1000_RXPBS_SIZE_MASK_82576; 1394 break; 1395 1396 case e1000_82580: 1397 case e1000_i350: 1398 case e1000_i354: 1399 case e1000_vfadapt_i350: 1400 pba = E1000_READ_REG(hw, E1000_RXPBS); 1401 pba = e1000_rxpbs_adjust_82580(pba); 1402 break; 1403 1404 case e1000_i210: 1405 case e1000_i211: 1406 pba = E1000_PBA_34K; 1407 break; 1408 1409 default: 1410 break; 1411 } 1412 1413 /* Special needs in case of Jumbo frames */ 1414 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1415 uint32_t tx_space, min_tx, min_rx; 1416 1417 pba = E1000_READ_REG(hw, E1000_PBA); 1418 tx_space = pba >> 16; 1419 pba &= 0xffff; 1420 1421 min_tx = (sc->max_frame_size + 1422 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1423 min_tx = roundup2(min_tx, 1024); 1424 min_tx >>= 10; 1425 min_rx = sc->max_frame_size; 1426 min_rx = roundup2(min_rx, 1024); 1427 min_rx >>= 10; 1428 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1429 pba = pba - (min_tx - tx_space); 1430 /* 1431 * if short on rx space, rx wins 1432 * and must trump tx adjustment 1433 */ 1434 if (pba < min_rx) 1435 pba = min_rx; 1436 } 1437 E1000_WRITE_REG(hw, E1000_PBA, pba); 1438 } 1439 1440 /* 1441 * These parameters control the automatic generation (Tx) and 1442 * response (Rx) to Ethernet PAUSE frames. 1443 * - High water mark should allow for at least two frames to be 1444 * received after sending an XOFF. 1445 * - Low water mark works best when it is very near the high water mark. 1446 * This allows the receiver to restart by sending XON when it has 1447 * drained a bit. 1448 */ 1449 hwm = min(((pba << 10) * 9 / 10), 1450 ((pba << 10) - 2 * sc->max_frame_size)); 1451 1452 if (hw->mac.type < e1000_82576) { 1453 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1454 fc->low_water = fc->high_water - 8; 1455 } else { 1456 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1457 fc->low_water = fc->high_water - 16; 1458 } 1459 fc->pause_time = IGB_FC_PAUSE_TIME; 1460 fc->send_xon = TRUE; 1461 fc->requested_mode = e1000_fc_default; 1462 1463 /* Issue a global reset */ 1464 e1000_reset_hw(hw); 1465 E1000_WRITE_REG(hw, E1000_WUC, 0); 1466 1467 if (e1000_init_hw(hw) < 0) 1468 if_printf(ifp, "Hardware Initialization Failed\n"); 1469 1470 /* Setup DMA Coalescing */ 1471 if (hw->mac.type > e1000_82580 && hw->mac.type != e1000_i211) { 1472 uint32_t dmac; 1473 uint32_t reg; 1474 1475 if (sc->dma_coalesce == 0) { 1476 /* 1477 * Disabled 1478 */ 1479 reg = E1000_READ_REG(hw, E1000_DMACR); 1480 reg &= ~E1000_DMACR_DMAC_EN; 1481 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1482 goto reset_out; 1483 } 1484 1485 /* Set starting thresholds */ 1486 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 1487 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1488 1489 hwm = 64 * pba - sc->max_frame_size / 16; 1490 if (hwm < 64 * (pba - 6)) 1491 hwm = 64 * (pba - 6); 1492 reg = E1000_READ_REG(hw, E1000_FCRTC); 1493 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 1494 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 1495 & E1000_FCRTC_RTH_COAL_MASK); 1496 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 1497 1498 dmac = pba - sc->max_frame_size / 512; 1499 if (dmac < pba - 10) 1500 dmac = pba - 10; 1501 reg = E1000_READ_REG(hw, E1000_DMACR); 1502 reg &= ~E1000_DMACR_DMACTHR_MASK; 1503 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 1504 & E1000_DMACR_DMACTHR_MASK); 1505 /* Transition to L0x or L1 if available.. */ 1506 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1507 /* timer = value in sc->dma_coalesce in 32usec intervals */ 1508 reg |= (sc->dma_coalesce >> 5); 1509 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1510 1511 /* Set the interval before transition */ 1512 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1513 reg |= 0x80000004; 1514 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1515 1516 /* Free space in tx packet buffer to wake from DMA coal */ 1517 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1518 (20480 - (2 * sc->max_frame_size)) >> 6); 1519 1520 /* Make low power state decision controlled by DMA coal */ 1521 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1522 reg &= ~E1000_PCIEMISC_LX_DECISION; 1523 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 1524 if_printf(ifp, "DMA Coalescing enabled\n"); 1525 } else if (hw->mac.type == e1000_82580) { 1526 uint32_t reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1527 1528 E1000_WRITE_REG(hw, E1000_DMACR, 0); 1529 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1530 reg & ~E1000_PCIEMISC_LX_DECISION); 1531 } 1532 1533 reset_out: 1534 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1535 e1000_get_phy_info(hw); 1536 e1000_check_for_link(hw); 1537 } 1538 1539 static void 1540 igb_setup_ifp(struct igb_softc *sc) 1541 { 1542 struct ifnet *ifp = &sc->arpcom.ac_if; 1543 int i; 1544 1545 ifp->if_softc = sc; 1546 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1547 ifp->if_init = igb_init; 1548 ifp->if_ioctl = igb_ioctl; 1549 ifp->if_start = igb_start; 1550 ifp->if_serialize = igb_serialize; 1551 ifp->if_deserialize = igb_deserialize; 1552 ifp->if_tryserialize = igb_tryserialize; 1553 #ifdef INVARIANTS 1554 ifp->if_serialize_assert = igb_serialize_assert; 1555 #endif 1556 #ifdef IFPOLL_ENABLE 1557 ifp->if_npoll = igb_npoll; 1558 #endif 1559 1560 ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].num_rx_desc; 1561 1562 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1563 ifq_set_ready(&ifp->if_snd); 1564 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1565 1566 ifp->if_mapsubq = ifq_mapsubq_mask; 1567 ifq_set_subq_mask(&ifp->if_snd, 0); 1568 1569 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1570 1571 ifp->if_capabilities = 1572 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1573 if (IGB_ENABLE_HWRSS(sc)) 1574 ifp->if_capabilities |= IFCAP_RSS; 1575 ifp->if_capenable = ifp->if_capabilities; 1576 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1577 1578 /* 1579 * Tell the upper layer(s) we support long frames 1580 */ 1581 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1582 1583 /* Setup TX rings and subqueues */ 1584 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1585 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 1586 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1587 1588 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 1589 ifsq_set_priv(ifsq, txr); 1590 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize); 1591 txr->ifsq = ifsq; 1592 1593 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, igb_watchdog); 1594 } 1595 1596 /* 1597 * Specify the media types supported by this adapter and register 1598 * callbacks to update media and link information 1599 */ 1600 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1601 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1602 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1603 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1604 0, NULL); 1605 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1606 } else { 1607 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1608 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1609 0, NULL); 1610 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1611 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1612 0, NULL); 1613 if (sc->hw.phy.type != e1000_phy_ife) { 1614 ifmedia_add(&sc->media, 1615 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1616 ifmedia_add(&sc->media, 1617 IFM_ETHER | IFM_1000_T, 0, NULL); 1618 } 1619 } 1620 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1621 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1622 } 1623 1624 static void 1625 igb_add_sysctl(struct igb_softc *sc) 1626 { 1627 struct sysctl_ctx_list *ctx; 1628 struct sysctl_oid *tree; 1629 char node[32]; 1630 int i; 1631 1632 ctx = device_get_sysctl_ctx(sc->dev); 1633 tree = device_get_sysctl_tree(sc->dev); 1634 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1635 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1636 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1637 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1638 "# of RX rings used"); 1639 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1640 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 1641 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1642 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 1643 "# of TX rings used"); 1644 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1645 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1646 "# of RX descs"); 1647 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1648 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1649 "# of TX descs"); 1650 1651 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1652 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1653 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1654 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1655 } else { 1656 for (i = 0; i < sc->msix_cnt; ++i) { 1657 struct igb_msix_data *msix = &sc->msix_data[i]; 1658 1659 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1660 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1661 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1662 msix, 0, igb_sysctl_msix_rate, "I", 1663 msix->msix_rate_desc); 1664 } 1665 } 1666 1667 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1668 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1669 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1670 "# of segments per TX interrupt"); 1671 1672 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1673 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1674 sc, 0, igb_sysctl_tx_wreg_nsegs, "I", 1675 "# of segments sent before write to hardware register"); 1676 1677 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1678 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1679 sc, 0, igb_sysctl_rx_wreg_nsegs, "I", 1680 "# of segments received before write to hardware register"); 1681 1682 #ifdef IFPOLL_ENABLE 1683 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1684 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1685 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1686 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1687 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1688 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1689 #endif 1690 1691 #ifdef IGB_RSS_DEBUG 1692 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1693 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1694 "RSS debug level"); 1695 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1696 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1697 SYSCTL_ADD_ULONG(ctx, 1698 SYSCTL_CHILDREN(tree), OID_AUTO, node, 1699 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1700 } 1701 #endif 1702 #ifdef IGB_TSS_DEBUG 1703 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1704 ksnprintf(node, sizeof(node), "tx%d_pkt", i); 1705 SYSCTL_ADD_ULONG(ctx, 1706 SYSCTL_CHILDREN(tree), OID_AUTO, node, 1707 CTLFLAG_RW, &sc->tx_rings[i].tx_packets, "TXed packets"); 1708 } 1709 #endif 1710 } 1711 1712 static int 1713 igb_alloc_rings(struct igb_softc *sc) 1714 { 1715 int error, i; 1716 1717 /* 1718 * Create top level busdma tag 1719 */ 1720 error = bus_dma_tag_create(NULL, 1, 0, 1721 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1722 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1723 &sc->parent_tag); 1724 if (error) { 1725 device_printf(sc->dev, "could not create top level DMA tag\n"); 1726 return error; 1727 } 1728 1729 /* 1730 * Allocate TX descriptor rings and buffers 1731 */ 1732 sc->tx_rings = kmalloc_cachealign( 1733 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1734 M_DEVBUF, M_WAITOK | M_ZERO); 1735 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1736 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1737 1738 /* Set up some basics */ 1739 txr->sc = sc; 1740 txr->me = i; 1741 lwkt_serialize_init(&txr->tx_serialize); 1742 1743 error = igb_create_tx_ring(txr); 1744 if (error) 1745 return error; 1746 } 1747 1748 /* 1749 * Allocate RX descriptor rings and buffers 1750 */ 1751 sc->rx_rings = kmalloc_cachealign( 1752 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1753 M_DEVBUF, M_WAITOK | M_ZERO); 1754 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1755 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1756 1757 /* Set up some basics */ 1758 rxr->sc = sc; 1759 rxr->me = i; 1760 lwkt_serialize_init(&rxr->rx_serialize); 1761 1762 error = igb_create_rx_ring(rxr); 1763 if (error) 1764 return error; 1765 } 1766 1767 return 0; 1768 } 1769 1770 static void 1771 igb_free_rings(struct igb_softc *sc) 1772 { 1773 int i; 1774 1775 if (sc->tx_rings != NULL) { 1776 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1777 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1778 1779 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1780 } 1781 kfree(sc->tx_rings, M_DEVBUF); 1782 } 1783 1784 if (sc->rx_rings != NULL) { 1785 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1786 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1787 1788 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1789 } 1790 kfree(sc->rx_rings, M_DEVBUF); 1791 } 1792 } 1793 1794 static int 1795 igb_create_tx_ring(struct igb_tx_ring *txr) 1796 { 1797 int tsize, error, i, ntxd; 1798 1799 /* 1800 * Validate number of transmit descriptors. It must not exceed 1801 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1802 */ 1803 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1804 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1805 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1806 device_printf(txr->sc->dev, 1807 "Using %d TX descriptors instead of %d!\n", 1808 IGB_DEFAULT_TXD, ntxd); 1809 txr->num_tx_desc = IGB_DEFAULT_TXD; 1810 } else { 1811 txr->num_tx_desc = ntxd; 1812 } 1813 1814 /* 1815 * Allocate TX descriptor ring 1816 */ 1817 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1818 IGB_DBA_ALIGN); 1819 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1820 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1821 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1822 if (txr->txdma.dma_vaddr == NULL) { 1823 device_printf(txr->sc->dev, 1824 "Unable to allocate TX Descriptor memory\n"); 1825 return ENOMEM; 1826 } 1827 txr->tx_base = txr->txdma.dma_vaddr; 1828 bzero(txr->tx_base, tsize); 1829 1830 tsize = __VM_CACHELINE_ALIGN( 1831 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1832 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1833 1834 /* 1835 * Allocate TX head write-back buffer 1836 */ 1837 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1838 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1839 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1840 if (txr->tx_hdr == NULL) { 1841 device_printf(txr->sc->dev, 1842 "Unable to allocate TX head write-back buffer\n"); 1843 return ENOMEM; 1844 } 1845 1846 /* 1847 * Create DMA tag for TX buffers 1848 */ 1849 error = bus_dma_tag_create(txr->sc->parent_tag, 1850 1, 0, /* alignment, bounds */ 1851 BUS_SPACE_MAXADDR, /* lowaddr */ 1852 BUS_SPACE_MAXADDR, /* highaddr */ 1853 NULL, NULL, /* filter, filterarg */ 1854 IGB_TSO_SIZE, /* maxsize */ 1855 IGB_MAX_SCATTER, /* nsegments */ 1856 PAGE_SIZE, /* maxsegsize */ 1857 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1858 BUS_DMA_ONEBPAGE, /* flags */ 1859 &txr->tx_tag); 1860 if (error) { 1861 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1862 kfree(txr->tx_buf, M_DEVBUF); 1863 txr->tx_buf = NULL; 1864 return error; 1865 } 1866 1867 /* 1868 * Create DMA maps for TX buffers 1869 */ 1870 for (i = 0; i < txr->num_tx_desc; ++i) { 1871 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1872 1873 error = bus_dmamap_create(txr->tx_tag, 1874 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1875 if (error) { 1876 device_printf(txr->sc->dev, 1877 "Unable to create TX DMA map\n"); 1878 igb_destroy_tx_ring(txr, i); 1879 return error; 1880 } 1881 } 1882 1883 if (txr->sc->hw.mac.type == e1000_82575) 1884 txr->tx_flags |= IGB_TXFLAG_TSO_IPLEN0; 1885 1886 /* 1887 * Initialize various watermark 1888 */ 1889 txr->spare_desc = IGB_TX_SPARE; 1890 txr->intr_nsegs = txr->num_tx_desc / 16; 1891 txr->wreg_nsegs = IGB_DEF_TXWREG_NSEGS; 1892 txr->oact_hi_desc = txr->num_tx_desc / 2; 1893 txr->oact_lo_desc = txr->num_tx_desc / 8; 1894 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1895 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1896 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1897 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1898 1899 return 0; 1900 } 1901 1902 static void 1903 igb_free_tx_ring(struct igb_tx_ring *txr) 1904 { 1905 int i; 1906 1907 for (i = 0; i < txr->num_tx_desc; ++i) { 1908 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1909 1910 if (txbuf->m_head != NULL) { 1911 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1912 m_freem(txbuf->m_head); 1913 txbuf->m_head = NULL; 1914 } 1915 } 1916 } 1917 1918 static void 1919 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1920 { 1921 int i; 1922 1923 if (txr->txdma.dma_vaddr != NULL) { 1924 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1925 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1926 txr->txdma.dma_map); 1927 bus_dma_tag_destroy(txr->txdma.dma_tag); 1928 txr->txdma.dma_vaddr = NULL; 1929 } 1930 1931 if (txr->tx_hdr != NULL) { 1932 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1933 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1934 txr->tx_hdr_dmap); 1935 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1936 txr->tx_hdr = NULL; 1937 } 1938 1939 if (txr->tx_buf == NULL) 1940 return; 1941 1942 for (i = 0; i < ndesc; ++i) { 1943 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1944 1945 KKASSERT(txbuf->m_head == NULL); 1946 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1947 } 1948 bus_dma_tag_destroy(txr->tx_tag); 1949 1950 kfree(txr->tx_buf, M_DEVBUF); 1951 txr->tx_buf = NULL; 1952 } 1953 1954 static void 1955 igb_init_tx_ring(struct igb_tx_ring *txr) 1956 { 1957 /* Clear the old descriptor contents */ 1958 bzero(txr->tx_base, 1959 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1960 1961 /* Clear TX head write-back buffer */ 1962 *(txr->tx_hdr) = 0; 1963 1964 /* Reset indices */ 1965 txr->next_avail_desc = 0; 1966 txr->next_to_clean = 0; 1967 txr->tx_nsegs = 0; 1968 1969 /* Set number of descriptors available */ 1970 txr->tx_avail = txr->num_tx_desc; 1971 1972 /* Enable this TX ring */ 1973 txr->tx_flags |= IGB_TXFLAG_ENABLED; 1974 } 1975 1976 static void 1977 igb_init_tx_unit(struct igb_softc *sc) 1978 { 1979 struct e1000_hw *hw = &sc->hw; 1980 uint32_t tctl; 1981 int i; 1982 1983 /* Setup the Tx Descriptor Rings */ 1984 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1985 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1986 uint64_t bus_addr = txr->txdma.dma_paddr; 1987 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1988 uint32_t txdctl = 0; 1989 uint32_t dca_txctrl; 1990 1991 E1000_WRITE_REG(hw, E1000_TDLEN(i), 1992 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 1993 E1000_WRITE_REG(hw, E1000_TDBAH(i), 1994 (uint32_t)(bus_addr >> 32)); 1995 E1000_WRITE_REG(hw, E1000_TDBAL(i), 1996 (uint32_t)bus_addr); 1997 1998 /* Setup the HW Tx Head and Tail descriptor pointers */ 1999 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 2000 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 2001 2002 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 2003 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 2004 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 2005 2006 /* 2007 * Don't set WB_on_EITR: 2008 * - 82575 does not have it 2009 * - It almost has no effect on 82576, see: 2010 * 82576 specification update errata #26 2011 * - It causes unnecessary bus traffic 2012 */ 2013 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 2014 (uint32_t)(hdr_paddr >> 32)); 2015 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 2016 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 2017 2018 /* 2019 * WTHRESH is ignored by the hardware, since header 2020 * write back mode is used. 2021 */ 2022 txdctl |= IGB_TX_PTHRESH; 2023 txdctl |= IGB_TX_HTHRESH << 8; 2024 txdctl |= IGB_TX_WTHRESH << 16; 2025 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2026 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 2027 } 2028 2029 if (sc->vf_ifp) 2030 return; 2031 2032 e1000_config_collision_dist(hw); 2033 2034 /* Program the Transmit Control Register */ 2035 tctl = E1000_READ_REG(hw, E1000_TCTL); 2036 tctl &= ~E1000_TCTL_CT; 2037 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 2038 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 2039 2040 /* This write will effectively turn on the transmit unit. */ 2041 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 2042 } 2043 2044 static boolean_t 2045 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 2046 { 2047 struct e1000_adv_tx_context_desc *TXD; 2048 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 2049 int ehdrlen, ctxd, ip_hlen = 0; 2050 boolean_t offload = TRUE; 2051 2052 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 2053 offload = FALSE; 2054 2055 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 2056 2057 ctxd = txr->next_avail_desc; 2058 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2059 2060 /* 2061 * In advanced descriptors the vlan tag must 2062 * be placed into the context descriptor, thus 2063 * we need to be here just for that setup. 2064 */ 2065 if (mp->m_flags & M_VLANTAG) { 2066 uint16_t vlantag; 2067 2068 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 2069 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 2070 } else if (!offload) { 2071 return FALSE; 2072 } 2073 2074 ehdrlen = mp->m_pkthdr.csum_lhlen; 2075 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2076 2077 /* Set the ether header length */ 2078 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 2079 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2080 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 2081 ip_hlen = mp->m_pkthdr.csum_iphlen; 2082 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2083 } 2084 vlan_macip_lens |= ip_hlen; 2085 2086 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 2087 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2088 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 2089 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2090 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 2091 2092 /* 2093 * 82575 needs the TX context index added; the queue 2094 * index is used as TX context index here. 2095 */ 2096 if (txr->sc->hw.mac.type == e1000_82575) 2097 mss_l4len_idx = txr->me << 4; 2098 2099 /* Now copy bits into descriptor */ 2100 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2101 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2102 TXD->seqnum_seed = htole32(0); 2103 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2104 2105 /* We've consumed the first desc, adjust counters */ 2106 if (++ctxd == txr->num_tx_desc) 2107 ctxd = 0; 2108 txr->next_avail_desc = ctxd; 2109 --txr->tx_avail; 2110 2111 return offload; 2112 } 2113 2114 static void 2115 igb_txeof(struct igb_tx_ring *txr) 2116 { 2117 int first, hdr, avail; 2118 2119 if (txr->tx_avail == txr->num_tx_desc) 2120 return; 2121 2122 first = txr->next_to_clean; 2123 hdr = *(txr->tx_hdr); 2124 2125 if (first == hdr) 2126 return; 2127 2128 avail = txr->tx_avail; 2129 while (first != hdr) { 2130 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 2131 2132 ++avail; 2133 if (txbuf->m_head) { 2134 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2135 m_freem(txbuf->m_head); 2136 txbuf->m_head = NULL; 2137 } 2138 if (++first == txr->num_tx_desc) 2139 first = 0; 2140 } 2141 txr->next_to_clean = first; 2142 txr->tx_avail = avail; 2143 2144 /* 2145 * If we have a minimum free, clear OACTIVE 2146 * to tell the stack that it is OK to send packets. 2147 */ 2148 if (IGB_IS_NOT_OACTIVE(txr)) { 2149 ifsq_clr_oactive(txr->ifsq); 2150 2151 /* 2152 * We have enough TX descriptors, turn off 2153 * the watchdog. We allow small amount of 2154 * packets (roughly intr_nsegs) pending on 2155 * the transmit ring. 2156 */ 2157 txr->tx_watchdog.wd_timer = 0; 2158 } 2159 } 2160 2161 static int 2162 igb_create_rx_ring(struct igb_rx_ring *rxr) 2163 { 2164 int rsize, i, error, nrxd; 2165 2166 /* 2167 * Validate number of receive descriptors. It must not exceed 2168 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 2169 */ 2170 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 2171 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 2172 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 2173 device_printf(rxr->sc->dev, 2174 "Using %d RX descriptors instead of %d!\n", 2175 IGB_DEFAULT_RXD, nrxd); 2176 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2177 } else { 2178 rxr->num_rx_desc = nrxd; 2179 } 2180 2181 /* 2182 * Allocate RX descriptor ring 2183 */ 2184 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2185 IGB_DBA_ALIGN); 2186 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2187 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2188 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2189 &rxr->rxdma.dma_paddr); 2190 if (rxr->rxdma.dma_vaddr == NULL) { 2191 device_printf(rxr->sc->dev, 2192 "Unable to allocate RxDescriptor memory\n"); 2193 return ENOMEM; 2194 } 2195 rxr->rx_base = rxr->rxdma.dma_vaddr; 2196 bzero(rxr->rx_base, rsize); 2197 2198 rsize = __VM_CACHELINE_ALIGN( 2199 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2200 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2201 2202 /* 2203 * Create DMA tag for RX buffers 2204 */ 2205 error = bus_dma_tag_create(rxr->sc->parent_tag, 2206 1, 0, /* alignment, bounds */ 2207 BUS_SPACE_MAXADDR, /* lowaddr */ 2208 BUS_SPACE_MAXADDR, /* highaddr */ 2209 NULL, NULL, /* filter, filterarg */ 2210 MCLBYTES, /* maxsize */ 2211 1, /* nsegments */ 2212 MCLBYTES, /* maxsegsize */ 2213 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2214 &rxr->rx_tag); 2215 if (error) { 2216 device_printf(rxr->sc->dev, 2217 "Unable to create RX payload DMA tag\n"); 2218 kfree(rxr->rx_buf, M_DEVBUF); 2219 rxr->rx_buf = NULL; 2220 return error; 2221 } 2222 2223 /* 2224 * Create spare DMA map for RX buffers 2225 */ 2226 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2227 &rxr->rx_sparemap); 2228 if (error) { 2229 device_printf(rxr->sc->dev, 2230 "Unable to create spare RX DMA maps\n"); 2231 bus_dma_tag_destroy(rxr->rx_tag); 2232 kfree(rxr->rx_buf, M_DEVBUF); 2233 rxr->rx_buf = NULL; 2234 return error; 2235 } 2236 2237 /* 2238 * Create DMA maps for RX buffers 2239 */ 2240 for (i = 0; i < rxr->num_rx_desc; i++) { 2241 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2242 2243 error = bus_dmamap_create(rxr->rx_tag, 2244 BUS_DMA_WAITOK, &rxbuf->map); 2245 if (error) { 2246 device_printf(rxr->sc->dev, 2247 "Unable to create RX DMA maps\n"); 2248 igb_destroy_rx_ring(rxr, i); 2249 return error; 2250 } 2251 } 2252 2253 /* 2254 * Initialize various watermark 2255 */ 2256 rxr->wreg_nsegs = IGB_DEF_RXWREG_NSEGS; 2257 2258 return 0; 2259 } 2260 2261 static void 2262 igb_free_rx_ring(struct igb_rx_ring *rxr) 2263 { 2264 int i; 2265 2266 for (i = 0; i < rxr->num_rx_desc; ++i) { 2267 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2268 2269 if (rxbuf->m_head != NULL) { 2270 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2271 m_freem(rxbuf->m_head); 2272 rxbuf->m_head = NULL; 2273 } 2274 } 2275 2276 if (rxr->fmp != NULL) 2277 m_freem(rxr->fmp); 2278 rxr->fmp = NULL; 2279 rxr->lmp = NULL; 2280 } 2281 2282 static void 2283 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2284 { 2285 int i; 2286 2287 if (rxr->rxdma.dma_vaddr != NULL) { 2288 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2289 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2290 rxr->rxdma.dma_map); 2291 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2292 rxr->rxdma.dma_vaddr = NULL; 2293 } 2294 2295 if (rxr->rx_buf == NULL) 2296 return; 2297 2298 for (i = 0; i < ndesc; ++i) { 2299 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2300 2301 KKASSERT(rxbuf->m_head == NULL); 2302 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2303 } 2304 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2305 bus_dma_tag_destroy(rxr->rx_tag); 2306 2307 kfree(rxr->rx_buf, M_DEVBUF); 2308 rxr->rx_buf = NULL; 2309 } 2310 2311 static void 2312 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2313 { 2314 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2315 rxd->wb.upper.status_error = 0; 2316 } 2317 2318 static int 2319 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2320 { 2321 struct mbuf *m; 2322 bus_dma_segment_t seg; 2323 bus_dmamap_t map; 2324 struct igb_rx_buf *rxbuf; 2325 int error, nseg; 2326 2327 m = m_getcl(wait ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR); 2328 if (m == NULL) { 2329 if (wait) { 2330 if_printf(&rxr->sc->arpcom.ac_if, 2331 "Unable to allocate RX mbuf\n"); 2332 } 2333 return ENOBUFS; 2334 } 2335 m->m_len = m->m_pkthdr.len = MCLBYTES; 2336 2337 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2338 m_adj(m, ETHER_ALIGN); 2339 2340 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2341 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2342 if (error) { 2343 m_freem(m); 2344 if (wait) { 2345 if_printf(&rxr->sc->arpcom.ac_if, 2346 "Unable to load RX mbuf\n"); 2347 } 2348 return error; 2349 } 2350 2351 rxbuf = &rxr->rx_buf[i]; 2352 if (rxbuf->m_head != NULL) 2353 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2354 2355 map = rxbuf->map; 2356 rxbuf->map = rxr->rx_sparemap; 2357 rxr->rx_sparemap = map; 2358 2359 rxbuf->m_head = m; 2360 rxbuf->paddr = seg.ds_addr; 2361 2362 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2363 return 0; 2364 } 2365 2366 static int 2367 igb_init_rx_ring(struct igb_rx_ring *rxr) 2368 { 2369 int i; 2370 2371 /* Clear the ring contents */ 2372 bzero(rxr->rx_base, 2373 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2374 2375 /* Now replenish the ring mbufs */ 2376 for (i = 0; i < rxr->num_rx_desc; ++i) { 2377 int error; 2378 2379 error = igb_newbuf(rxr, i, TRUE); 2380 if (error) 2381 return error; 2382 } 2383 2384 /* Setup our descriptor indices */ 2385 rxr->next_to_check = 0; 2386 2387 rxr->fmp = NULL; 2388 rxr->lmp = NULL; 2389 rxr->discard = FALSE; 2390 2391 return 0; 2392 } 2393 2394 static void 2395 igb_init_rx_unit(struct igb_softc *sc) 2396 { 2397 struct ifnet *ifp = &sc->arpcom.ac_if; 2398 struct e1000_hw *hw = &sc->hw; 2399 uint32_t rctl, rxcsum, srrctl = 0; 2400 int i; 2401 2402 /* 2403 * Make sure receives are disabled while setting 2404 * up the descriptor ring 2405 */ 2406 rctl = E1000_READ_REG(hw, E1000_RCTL); 2407 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2408 2409 #if 0 2410 /* 2411 ** Set up for header split 2412 */ 2413 if (igb_header_split) { 2414 /* Use a standard mbuf for the header */ 2415 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2416 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2417 } else 2418 #endif 2419 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2420 2421 /* 2422 ** Set up for jumbo frames 2423 */ 2424 if (ifp->if_mtu > ETHERMTU) { 2425 rctl |= E1000_RCTL_LPE; 2426 #if 0 2427 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2428 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2429 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2430 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2431 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2432 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2433 } 2434 /* Set maximum packet len */ 2435 psize = adapter->max_frame_size; 2436 /* are we on a vlan? */ 2437 if (adapter->ifp->if_vlantrunk != NULL) 2438 psize += VLAN_TAG_SIZE; 2439 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2440 #else 2441 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2442 rctl |= E1000_RCTL_SZ_2048; 2443 #endif 2444 } else { 2445 rctl &= ~E1000_RCTL_LPE; 2446 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2447 rctl |= E1000_RCTL_SZ_2048; 2448 } 2449 2450 /* Setup the Base and Length of the Rx Descriptor Rings */ 2451 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2452 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2453 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2454 uint32_t rxdctl; 2455 2456 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2457 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2458 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2459 (uint32_t)(bus_addr >> 32)); 2460 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2461 (uint32_t)bus_addr); 2462 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2463 /* Enable this Queue */ 2464 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2465 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2466 rxdctl &= 0xFFF00000; 2467 rxdctl |= IGB_RX_PTHRESH; 2468 rxdctl |= IGB_RX_HTHRESH << 8; 2469 /* 2470 * Don't set WTHRESH to a value above 1 on 82576, see: 2471 * 82576 specification update errata #26 2472 */ 2473 rxdctl |= IGB_RX_WTHRESH << 16; 2474 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2475 } 2476 2477 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2478 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2479 2480 /* 2481 * Receive Checksum Offload for TCP and UDP 2482 * 2483 * Checksum offloading is also enabled if multiple receive 2484 * queue is to be supported, since we need it to figure out 2485 * fragments. 2486 */ 2487 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2488 /* 2489 * NOTE: 2490 * PCSD must be enabled to enable multiple 2491 * receive queues. 2492 */ 2493 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2494 E1000_RXCSUM_PCSD; 2495 } else { 2496 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2497 E1000_RXCSUM_PCSD); 2498 } 2499 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2500 2501 if (IGB_ENABLE_HWRSS(sc)) { 2502 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2503 uint32_t reta_shift; 2504 int j, r; 2505 2506 /* 2507 * NOTE: 2508 * When we reach here, RSS has already been disabled 2509 * in igb_stop(), so we could safely configure RSS key 2510 * and redirect table. 2511 */ 2512 2513 /* 2514 * Configure RSS key 2515 */ 2516 toeplitz_get_key(key, sizeof(key)); 2517 for (i = 0; i < IGB_NRSSRK; ++i) { 2518 uint32_t rssrk; 2519 2520 rssrk = IGB_RSSRK_VAL(key, i); 2521 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2522 2523 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2524 } 2525 2526 /* 2527 * Configure RSS redirect table in following fashion: 2528 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2529 */ 2530 reta_shift = IGB_RETA_SHIFT; 2531 if (hw->mac.type == e1000_82575) 2532 reta_shift = IGB_RETA_SHIFT_82575; 2533 2534 r = 0; 2535 for (j = 0; j < IGB_NRETA; ++j) { 2536 uint32_t reta = 0; 2537 2538 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2539 uint32_t q; 2540 2541 q = (r % sc->rx_ring_inuse) << reta_shift; 2542 reta |= q << (8 * i); 2543 ++r; 2544 } 2545 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2546 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2547 } 2548 2549 /* 2550 * Enable multiple receive queues. 2551 * Enable IPv4 RSS standard hash functions. 2552 * Disable RSS interrupt on 82575 2553 */ 2554 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2555 E1000_MRQC_ENABLE_RSS_4Q | 2556 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2557 E1000_MRQC_RSS_FIELD_IPV4); 2558 } 2559 2560 /* Setup the Receive Control Register */ 2561 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2562 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2563 E1000_RCTL_RDMTS_HALF | 2564 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2565 /* Strip CRC bytes. */ 2566 rctl |= E1000_RCTL_SECRC; 2567 /* Make sure VLAN Filters are off */ 2568 rctl &= ~E1000_RCTL_VFE; 2569 /* Don't store bad packets */ 2570 rctl &= ~E1000_RCTL_SBP; 2571 2572 /* Enable Receives */ 2573 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2574 2575 /* 2576 * Setup the HW Rx Head and Tail Descriptor Pointers 2577 * - needs to be after enable 2578 */ 2579 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2580 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2581 2582 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2583 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2584 } 2585 } 2586 2587 static void 2588 igb_rx_refresh(struct igb_rx_ring *rxr, int i) 2589 { 2590 if (--i < 0) 2591 i = rxr->num_rx_desc - 1; 2592 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2593 } 2594 2595 static void 2596 igb_rxeof(struct igb_rx_ring *rxr, int count) 2597 { 2598 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2599 union e1000_adv_rx_desc *cur; 2600 uint32_t staterr; 2601 int i, ncoll = 0, cpuid = mycpuid; 2602 2603 i = rxr->next_to_check; 2604 cur = &rxr->rx_base[i]; 2605 staterr = le32toh(cur->wb.upper.status_error); 2606 2607 if ((staterr & E1000_RXD_STAT_DD) == 0) 2608 return; 2609 2610 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2611 struct pktinfo *pi = NULL, pi0; 2612 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2613 struct mbuf *m = NULL; 2614 boolean_t eop; 2615 2616 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2617 if (eop) 2618 --count; 2619 2620 ++ncoll; 2621 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2622 !rxr->discard) { 2623 struct mbuf *mp = rxbuf->m_head; 2624 uint32_t hash, hashtype; 2625 uint16_t vlan; 2626 int len; 2627 2628 len = le16toh(cur->wb.upper.length); 2629 if ((rxr->sc->hw.mac.type == e1000_i350 || 2630 rxr->sc->hw.mac.type == e1000_i354) && 2631 (staterr & E1000_RXDEXT_STATERR_LB)) 2632 vlan = be16toh(cur->wb.upper.vlan); 2633 else 2634 vlan = le16toh(cur->wb.upper.vlan); 2635 2636 hash = le32toh(cur->wb.lower.hi_dword.rss); 2637 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2638 E1000_RXDADV_RSSTYPE_MASK; 2639 2640 IGB_RSS_DPRINTF(rxr->sc, 10, 2641 "ring%d, hash 0x%08x, hashtype %u\n", 2642 rxr->me, hash, hashtype); 2643 2644 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2645 BUS_DMASYNC_POSTREAD); 2646 2647 if (igb_newbuf(rxr, i, FALSE) != 0) { 2648 IFNET_STAT_INC(ifp, iqdrops, 1); 2649 goto discard; 2650 } 2651 2652 mp->m_len = len; 2653 if (rxr->fmp == NULL) { 2654 mp->m_pkthdr.len = len; 2655 rxr->fmp = mp; 2656 rxr->lmp = mp; 2657 } else { 2658 rxr->lmp->m_next = mp; 2659 rxr->lmp = rxr->lmp->m_next; 2660 rxr->fmp->m_pkthdr.len += len; 2661 } 2662 2663 if (eop) { 2664 m = rxr->fmp; 2665 rxr->fmp = NULL; 2666 rxr->lmp = NULL; 2667 2668 m->m_pkthdr.rcvif = ifp; 2669 IFNET_STAT_INC(ifp, ipackets, 1); 2670 2671 if (ifp->if_capenable & IFCAP_RXCSUM) 2672 igb_rxcsum(staterr, m); 2673 2674 if (staterr & E1000_RXD_STAT_VP) { 2675 m->m_pkthdr.ether_vlantag = vlan; 2676 m->m_flags |= M_VLANTAG; 2677 } 2678 2679 if (ifp->if_capenable & IFCAP_RSS) { 2680 pi = igb_rssinfo(m, &pi0, 2681 hash, hashtype, staterr); 2682 } 2683 #ifdef IGB_RSS_DEBUG 2684 rxr->rx_packets++; 2685 #endif 2686 } 2687 } else { 2688 IFNET_STAT_INC(ifp, ierrors, 1); 2689 discard: 2690 igb_setup_rxdesc(cur, rxbuf); 2691 if (!eop) 2692 rxr->discard = TRUE; 2693 else 2694 rxr->discard = FALSE; 2695 if (rxr->fmp != NULL) { 2696 m_freem(rxr->fmp); 2697 rxr->fmp = NULL; 2698 rxr->lmp = NULL; 2699 } 2700 m = NULL; 2701 } 2702 2703 if (m != NULL) 2704 ifp->if_input(ifp, m, pi, cpuid); 2705 2706 /* Advance our pointers to the next descriptor. */ 2707 if (++i == rxr->num_rx_desc) 2708 i = 0; 2709 2710 if (ncoll >= rxr->wreg_nsegs) { 2711 igb_rx_refresh(rxr, i); 2712 ncoll = 0; 2713 } 2714 2715 cur = &rxr->rx_base[i]; 2716 staterr = le32toh(cur->wb.upper.status_error); 2717 } 2718 rxr->next_to_check = i; 2719 2720 if (ncoll > 0) 2721 igb_rx_refresh(rxr, i); 2722 } 2723 2724 2725 static void 2726 igb_set_vlan(struct igb_softc *sc) 2727 { 2728 struct e1000_hw *hw = &sc->hw; 2729 uint32_t reg; 2730 #if 0 2731 struct ifnet *ifp = sc->arpcom.ac_if; 2732 #endif 2733 2734 if (sc->vf_ifp) { 2735 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2736 return; 2737 } 2738 2739 reg = E1000_READ_REG(hw, E1000_CTRL); 2740 reg |= E1000_CTRL_VME; 2741 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2742 2743 #if 0 2744 /* Enable the Filter Table */ 2745 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2746 reg = E1000_READ_REG(hw, E1000_RCTL); 2747 reg &= ~E1000_RCTL_CFIEN; 2748 reg |= E1000_RCTL_VFE; 2749 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2750 } 2751 #endif 2752 2753 /* Update the frame size */ 2754 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2755 sc->max_frame_size + VLAN_TAG_SIZE); 2756 2757 #if 0 2758 /* Don't bother with table if no vlans */ 2759 if ((adapter->num_vlans == 0) || 2760 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2761 return; 2762 /* 2763 ** A soft reset zero's out the VFTA, so 2764 ** we need to repopulate it now. 2765 */ 2766 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2767 if (adapter->shadow_vfta[i] != 0) { 2768 if (adapter->vf_ifp) 2769 e1000_vfta_set_vf(hw, 2770 adapter->shadow_vfta[i], TRUE); 2771 else 2772 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2773 i, adapter->shadow_vfta[i]); 2774 } 2775 #endif 2776 } 2777 2778 static void 2779 igb_enable_intr(struct igb_softc *sc) 2780 { 2781 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2782 lwkt_serialize_handler_enable(&sc->main_serialize); 2783 } else { 2784 int i; 2785 2786 for (i = 0; i < sc->msix_cnt; ++i) { 2787 lwkt_serialize_handler_enable( 2788 sc->msix_data[i].msix_serialize); 2789 } 2790 } 2791 2792 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2793 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2794 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2795 else 2796 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2797 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2798 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2799 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2800 } else { 2801 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2802 } 2803 E1000_WRITE_FLUSH(&sc->hw); 2804 } 2805 2806 static void 2807 igb_disable_intr(struct igb_softc *sc) 2808 { 2809 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2810 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2811 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2812 } 2813 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2814 E1000_WRITE_FLUSH(&sc->hw); 2815 2816 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2817 lwkt_serialize_handler_disable(&sc->main_serialize); 2818 } else { 2819 int i; 2820 2821 for (i = 0; i < sc->msix_cnt; ++i) { 2822 lwkt_serialize_handler_disable( 2823 sc->msix_data[i].msix_serialize); 2824 } 2825 } 2826 } 2827 2828 /* 2829 * Bit of a misnomer, what this really means is 2830 * to enable OS management of the system... aka 2831 * to disable special hardware management features 2832 */ 2833 static void 2834 igb_get_mgmt(struct igb_softc *sc) 2835 { 2836 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2837 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2838 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2839 2840 /* disable hardware interception of ARP */ 2841 manc &= ~E1000_MANC_ARP_EN; 2842 2843 /* enable receiving management packets to the host */ 2844 manc |= E1000_MANC_EN_MNG2HOST; 2845 manc2h |= 1 << 5; /* Mng Port 623 */ 2846 manc2h |= 1 << 6; /* Mng Port 664 */ 2847 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2848 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2849 } 2850 } 2851 2852 /* 2853 * Give control back to hardware management controller 2854 * if there is one. 2855 */ 2856 static void 2857 igb_rel_mgmt(struct igb_softc *sc) 2858 { 2859 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2860 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2861 2862 /* Re-enable hardware interception of ARP */ 2863 manc |= E1000_MANC_ARP_EN; 2864 manc &= ~E1000_MANC_EN_MNG2HOST; 2865 2866 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2867 } 2868 } 2869 2870 /* 2871 * Sets CTRL_EXT:DRV_LOAD bit. 2872 * 2873 * For ASF and Pass Through versions of f/w this means that 2874 * the driver is loaded. 2875 */ 2876 static void 2877 igb_get_hw_control(struct igb_softc *sc) 2878 { 2879 uint32_t ctrl_ext; 2880 2881 if (sc->vf_ifp) 2882 return; 2883 2884 /* Let firmware know the driver has taken over */ 2885 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2886 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2887 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2888 } 2889 2890 /* 2891 * Resets CTRL_EXT:DRV_LOAD bit. 2892 * 2893 * For ASF and Pass Through versions of f/w this means that the 2894 * driver is no longer loaded. 2895 */ 2896 static void 2897 igb_rel_hw_control(struct igb_softc *sc) 2898 { 2899 uint32_t ctrl_ext; 2900 2901 if (sc->vf_ifp) 2902 return; 2903 2904 /* Let firmware taken over control of h/w */ 2905 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2906 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2907 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2908 } 2909 2910 static boolean_t 2911 igb_is_valid_ether_addr(const uint8_t *addr) 2912 { 2913 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2914 2915 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2916 return FALSE; 2917 return TRUE; 2918 } 2919 2920 /* 2921 * Enable PCI Wake On Lan capability 2922 */ 2923 static void 2924 igb_enable_wol(device_t dev) 2925 { 2926 uint16_t cap, status; 2927 uint8_t id; 2928 2929 /* First find the capabilities pointer*/ 2930 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2931 2932 /* Read the PM Capabilities */ 2933 id = pci_read_config(dev, cap, 1); 2934 if (id != PCIY_PMG) /* Something wrong */ 2935 return; 2936 2937 /* 2938 * OK, we have the power capabilities, 2939 * so now get the status register 2940 */ 2941 cap += PCIR_POWER_STATUS; 2942 status = pci_read_config(dev, cap, 2); 2943 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2944 pci_write_config(dev, cap, status, 2); 2945 } 2946 2947 static void 2948 igb_update_stats_counters(struct igb_softc *sc) 2949 { 2950 struct e1000_hw *hw = &sc->hw; 2951 struct e1000_hw_stats *stats; 2952 struct ifnet *ifp = &sc->arpcom.ac_if; 2953 2954 /* 2955 * The virtual function adapter has only a 2956 * small controlled set of stats, do only 2957 * those and return. 2958 */ 2959 if (sc->vf_ifp) { 2960 igb_update_vf_stats_counters(sc); 2961 return; 2962 } 2963 stats = sc->stats; 2964 2965 if (sc->hw.phy.media_type == e1000_media_type_copper || 2966 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2967 stats->symerrs += 2968 E1000_READ_REG(hw,E1000_SYMERRS); 2969 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2970 } 2971 2972 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2973 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2974 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2975 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2976 2977 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2978 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2979 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2980 stats->dc += E1000_READ_REG(hw, E1000_DC); 2981 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2982 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2983 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2984 2985 /* 2986 * For watchdog management we need to know if we have been 2987 * paused during the last interval, so capture that here. 2988 */ 2989 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 2990 stats->xoffrxc += sc->pause_frames; 2991 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 2992 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 2993 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 2994 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 2995 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 2996 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 2997 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 2998 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 2999 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 3000 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 3001 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 3002 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 3003 3004 /* For the 64-bit byte counters the low dword must be read first. */ 3005 /* Both registers clear on the read of the high dword */ 3006 3007 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 3008 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 3009 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 3010 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 3011 3012 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 3013 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 3014 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 3015 stats->roc += E1000_READ_REG(hw, E1000_ROC); 3016 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 3017 3018 stats->tor += E1000_READ_REG(hw, E1000_TORH); 3019 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 3020 3021 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 3022 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 3023 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 3024 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 3025 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 3026 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 3027 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 3028 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 3029 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 3030 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 3031 3032 /* Interrupt Counts */ 3033 3034 stats->iac += E1000_READ_REG(hw, E1000_IAC); 3035 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 3036 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 3037 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 3038 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 3039 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 3040 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 3041 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 3042 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 3043 3044 /* Host to Card Statistics */ 3045 3046 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 3047 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 3048 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 3049 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 3050 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 3051 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 3052 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 3053 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 3054 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 3055 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 3056 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 3057 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 3058 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 3059 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 3060 3061 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 3062 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 3063 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 3064 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 3065 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 3066 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 3067 3068 IFNET_STAT_SET(ifp, collisions, stats->colc); 3069 3070 /* Rx Errors */ 3071 IFNET_STAT_SET(ifp, ierrors, 3072 stats->rxerrc + stats->crcerrs + stats->algnerrc + 3073 stats->ruc + stats->roc + stats->mpc + stats->cexterr); 3074 3075 /* Tx Errors */ 3076 IFNET_STAT_SET(ifp, oerrors, 3077 stats->ecol + stats->latecol + sc->watchdog_events); 3078 3079 /* Driver specific counters */ 3080 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 3081 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 3082 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 3083 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 3084 sc->packet_buf_alloc_tx = 3085 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 3086 sc->packet_buf_alloc_rx = 3087 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 3088 } 3089 3090 static void 3091 igb_vf_init_stats(struct igb_softc *sc) 3092 { 3093 struct e1000_hw *hw = &sc->hw; 3094 struct e1000_vf_stats *stats; 3095 3096 stats = sc->stats; 3097 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 3098 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 3099 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 3100 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 3101 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 3102 } 3103 3104 static void 3105 igb_update_vf_stats_counters(struct igb_softc *sc) 3106 { 3107 struct e1000_hw *hw = &sc->hw; 3108 struct e1000_vf_stats *stats; 3109 3110 if (sc->link_speed == 0) 3111 return; 3112 3113 stats = sc->stats; 3114 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 3115 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 3116 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 3117 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 3118 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 3119 } 3120 3121 #ifdef IFPOLL_ENABLE 3122 3123 static void 3124 igb_npoll_status(struct ifnet *ifp) 3125 { 3126 struct igb_softc *sc = ifp->if_softc; 3127 uint32_t reg_icr; 3128 3129 ASSERT_SERIALIZED(&sc->main_serialize); 3130 3131 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3132 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3133 sc->hw.mac.get_link_status = 1; 3134 igb_update_link_status(sc); 3135 } 3136 } 3137 3138 static void 3139 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 3140 { 3141 struct igb_tx_ring *txr = arg; 3142 3143 ASSERT_SERIALIZED(&txr->tx_serialize); 3144 3145 igb_txeof(txr); 3146 if (!ifsq_is_empty(txr->ifsq)) 3147 ifsq_devstart(txr->ifsq); 3148 } 3149 3150 static void 3151 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 3152 { 3153 struct igb_rx_ring *rxr = arg; 3154 3155 ASSERT_SERIALIZED(&rxr->rx_serialize); 3156 3157 igb_rxeof(rxr, cycle); 3158 } 3159 3160 static void 3161 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 3162 { 3163 struct igb_softc *sc = ifp->if_softc; 3164 int i, txr_cnt, rxr_cnt; 3165 3166 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3167 3168 if (info) { 3169 int off; 3170 3171 info->ifpi_status.status_func = igb_npoll_status; 3172 info->ifpi_status.serializer = &sc->main_serialize; 3173 3174 txr_cnt = igb_get_txring_inuse(sc, TRUE); 3175 off = sc->tx_npoll_off; 3176 for (i = 0; i < txr_cnt; ++i) { 3177 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3178 int idx = i + off; 3179 3180 KKASSERT(idx < ncpus2); 3181 info->ifpi_tx[idx].poll_func = igb_npoll_tx; 3182 info->ifpi_tx[idx].arg = txr; 3183 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 3184 ifsq_set_cpuid(txr->ifsq, idx); 3185 } 3186 3187 rxr_cnt = igb_get_rxring_inuse(sc, TRUE); 3188 off = sc->rx_npoll_off; 3189 for (i = 0; i < rxr_cnt; ++i) { 3190 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3191 int idx = i + off; 3192 3193 KKASSERT(idx < ncpus2); 3194 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 3195 info->ifpi_rx[idx].arg = rxr; 3196 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 3197 } 3198 3199 if (ifp->if_flags & IFF_RUNNING) { 3200 if (rxr_cnt == sc->rx_ring_inuse && 3201 txr_cnt == sc->tx_ring_inuse) { 3202 igb_set_timer_cpuid(sc, TRUE); 3203 igb_disable_intr(sc); 3204 } else { 3205 igb_init(sc); 3206 } 3207 } 3208 } else { 3209 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3210 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3211 3212 ifsq_set_cpuid(txr->ifsq, txr->tx_intr_cpuid); 3213 } 3214 3215 if (ifp->if_flags & IFF_RUNNING) { 3216 txr_cnt = igb_get_txring_inuse(sc, FALSE); 3217 rxr_cnt = igb_get_rxring_inuse(sc, FALSE); 3218 3219 if (rxr_cnt == sc->rx_ring_inuse && 3220 txr_cnt == sc->tx_ring_inuse) { 3221 igb_set_timer_cpuid(sc, FALSE); 3222 igb_enable_intr(sc); 3223 } else { 3224 igb_init(sc); 3225 } 3226 } 3227 } 3228 } 3229 3230 #endif /* IFPOLL_ENABLE */ 3231 3232 static void 3233 igb_intr(void *xsc) 3234 { 3235 struct igb_softc *sc = xsc; 3236 struct ifnet *ifp = &sc->arpcom.ac_if; 3237 uint32_t eicr; 3238 3239 ASSERT_SERIALIZED(&sc->main_serialize); 3240 3241 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3242 3243 if (eicr == 0) 3244 return; 3245 3246 if (ifp->if_flags & IFF_RUNNING) { 3247 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3248 int i; 3249 3250 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3251 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3252 3253 if (eicr & rxr->rx_intr_mask) { 3254 lwkt_serialize_enter(&rxr->rx_serialize); 3255 igb_rxeof(rxr, -1); 3256 lwkt_serialize_exit(&rxr->rx_serialize); 3257 } 3258 } 3259 3260 if (eicr & txr->tx_intr_mask) { 3261 lwkt_serialize_enter(&txr->tx_serialize); 3262 igb_txeof(txr); 3263 if (!ifsq_is_empty(txr->ifsq)) 3264 ifsq_devstart(txr->ifsq); 3265 lwkt_serialize_exit(&txr->tx_serialize); 3266 } 3267 } 3268 3269 if (eicr & E1000_EICR_OTHER) { 3270 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3271 3272 /* Link status change */ 3273 if (icr & E1000_ICR_LSC) { 3274 sc->hw.mac.get_link_status = 1; 3275 igb_update_link_status(sc); 3276 } 3277 } 3278 3279 /* 3280 * Reading EICR has the side effect to clear interrupt mask, 3281 * so all interrupts need to be enabled here. 3282 */ 3283 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3284 } 3285 3286 static void 3287 igb_intr_shared(void *xsc) 3288 { 3289 struct igb_softc *sc = xsc; 3290 struct ifnet *ifp = &sc->arpcom.ac_if; 3291 uint32_t reg_icr; 3292 3293 ASSERT_SERIALIZED(&sc->main_serialize); 3294 3295 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3296 3297 /* Hot eject? */ 3298 if (reg_icr == 0xffffffff) 3299 return; 3300 3301 /* Definitely not our interrupt. */ 3302 if (reg_icr == 0x0) 3303 return; 3304 3305 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3306 return; 3307 3308 if (ifp->if_flags & IFF_RUNNING) { 3309 if (reg_icr & 3310 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3311 int i; 3312 3313 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3314 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3315 3316 lwkt_serialize_enter(&rxr->rx_serialize); 3317 igb_rxeof(rxr, -1); 3318 lwkt_serialize_exit(&rxr->rx_serialize); 3319 } 3320 } 3321 3322 if (reg_icr & E1000_ICR_TXDW) { 3323 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3324 3325 lwkt_serialize_enter(&txr->tx_serialize); 3326 igb_txeof(txr); 3327 if (!ifsq_is_empty(txr->ifsq)) 3328 ifsq_devstart(txr->ifsq); 3329 lwkt_serialize_exit(&txr->tx_serialize); 3330 } 3331 } 3332 3333 /* Link status change */ 3334 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3335 sc->hw.mac.get_link_status = 1; 3336 igb_update_link_status(sc); 3337 } 3338 3339 if (reg_icr & E1000_ICR_RXO) 3340 sc->rx_overruns++; 3341 } 3342 3343 static int 3344 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp, 3345 int *segs_used, int *idx) 3346 { 3347 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3348 bus_dmamap_t map; 3349 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3350 union e1000_adv_tx_desc *txd = NULL; 3351 struct mbuf *m_head = *m_headp; 3352 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3353 int maxsegs, nsegs, i, j, error; 3354 uint32_t hdrlen = 0; 3355 3356 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3357 error = igb_tso_pullup(txr, m_headp); 3358 if (error) 3359 return error; 3360 m_head = *m_headp; 3361 } 3362 3363 /* Set basic descriptor constants */ 3364 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3365 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3366 if (m_head->m_flags & M_VLANTAG) 3367 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3368 3369 /* 3370 * Map the packet for DMA. 3371 */ 3372 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3373 tx_buf_mapped = tx_buf; 3374 map = tx_buf->map; 3375 3376 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3377 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3378 if (maxsegs > IGB_MAX_SCATTER) 3379 maxsegs = IGB_MAX_SCATTER; 3380 3381 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3382 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3383 if (error) { 3384 if (error == ENOBUFS) 3385 txr->sc->mbuf_defrag_failed++; 3386 else 3387 txr->sc->no_tx_dma_setup++; 3388 3389 m_freem(*m_headp); 3390 *m_headp = NULL; 3391 return error; 3392 } 3393 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3394 3395 m_head = *m_headp; 3396 3397 /* 3398 * Set up the TX context descriptor, if any hardware offloading is 3399 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3400 * TX descriptor. 3401 * 3402 * Unlike these chips' predecessors (em/emx), TX context descriptor 3403 * will _not_ interfere TX data fetching pipelining. 3404 */ 3405 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3406 igb_tso_ctx(txr, m_head, &hdrlen); 3407 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3408 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3409 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3410 txr->tx_nsegs++; 3411 (*segs_used)++; 3412 } else if (igb_txcsum_ctx(txr, m_head)) { 3413 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3414 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3415 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3416 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3417 txr->tx_nsegs++; 3418 (*segs_used)++; 3419 } 3420 3421 *segs_used += nsegs; 3422 txr->tx_nsegs += nsegs; 3423 if (txr->tx_nsegs >= txr->intr_nsegs) { 3424 /* 3425 * Report Status (RS) is turned on every intr_nsegs 3426 * descriptors (roughly). 3427 */ 3428 txr->tx_nsegs = 0; 3429 cmd_rs = E1000_ADVTXD_DCMD_RS; 3430 } 3431 3432 /* Calculate payload length */ 3433 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3434 << E1000_ADVTXD_PAYLEN_SHIFT); 3435 3436 /* 3437 * 82575 needs the TX context index added; the queue 3438 * index is used as TX context index here. 3439 */ 3440 if (txr->sc->hw.mac.type == e1000_82575) 3441 olinfo_status |= txr->me << 4; 3442 3443 /* Set up our transmit descriptors */ 3444 i = txr->next_avail_desc; 3445 for (j = 0; j < nsegs; j++) { 3446 bus_size_t seg_len; 3447 bus_addr_t seg_addr; 3448 3449 tx_buf = &txr->tx_buf[i]; 3450 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3451 seg_addr = segs[j].ds_addr; 3452 seg_len = segs[j].ds_len; 3453 3454 txd->read.buffer_addr = htole64(seg_addr); 3455 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3456 txd->read.olinfo_status = htole32(olinfo_status); 3457 if (++i == txr->num_tx_desc) 3458 i = 0; 3459 tx_buf->m_head = NULL; 3460 } 3461 3462 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3463 txr->next_avail_desc = i; 3464 txr->tx_avail -= nsegs; 3465 3466 tx_buf->m_head = m_head; 3467 tx_buf_mapped->map = tx_buf->map; 3468 tx_buf->map = map; 3469 3470 /* 3471 * Last Descriptor of Packet needs End Of Packet (EOP) 3472 */ 3473 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3474 3475 /* 3476 * Defer TDT updating, until enough descrptors are setup 3477 */ 3478 *idx = i; 3479 #ifdef IGB_TSS_DEBUG 3480 ++txr->tx_packets; 3481 #endif 3482 3483 return 0; 3484 } 3485 3486 static void 3487 igb_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 3488 { 3489 struct igb_softc *sc = ifp->if_softc; 3490 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3491 struct mbuf *m_head; 3492 int idx = -1, nsegs = 0; 3493 3494 KKASSERT(txr->ifsq == ifsq); 3495 ASSERT_SERIALIZED(&txr->tx_serialize); 3496 3497 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 3498 return; 3499 3500 if (!sc->link_active || (txr->tx_flags & IGB_TXFLAG_ENABLED) == 0) { 3501 ifsq_purge(ifsq); 3502 return; 3503 } 3504 3505 if (!IGB_IS_NOT_OACTIVE(txr)) 3506 igb_txeof(txr); 3507 3508 while (!ifsq_is_empty(ifsq)) { 3509 if (IGB_IS_OACTIVE(txr)) { 3510 ifsq_set_oactive(ifsq); 3511 /* Set watchdog on */ 3512 txr->tx_watchdog.wd_timer = 5; 3513 break; 3514 } 3515 3516 m_head = ifsq_dequeue(ifsq); 3517 if (m_head == NULL) 3518 break; 3519 3520 if (igb_encap(txr, &m_head, &nsegs, &idx)) { 3521 IFNET_STAT_INC(ifp, oerrors, 1); 3522 continue; 3523 } 3524 3525 /* 3526 * TX interrupt are aggressively aggregated, so increasing 3527 * opackets at TX interrupt time will make the opackets 3528 * statistics vastly inaccurate; we do the opackets increment 3529 * now. 3530 */ 3531 IFNET_STAT_INC(ifp, opackets, 1); 3532 3533 if (nsegs >= txr->wreg_nsegs) { 3534 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3535 idx = -1; 3536 nsegs = 0; 3537 } 3538 3539 /* Send a copy of the frame to the BPF listener */ 3540 ETHER_BPF_MTAP(ifp, m_head); 3541 } 3542 if (idx >= 0) 3543 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3544 } 3545 3546 static void 3547 igb_watchdog(struct ifaltq_subque *ifsq) 3548 { 3549 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3550 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3551 struct igb_softc *sc = ifp->if_softc; 3552 int i; 3553 3554 KKASSERT(txr->ifsq == ifsq); 3555 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3556 3557 /* 3558 * If flow control has paused us since last checking 3559 * it invalidates the watchdog timing, so dont run it. 3560 */ 3561 if (sc->pause_frames) { 3562 sc->pause_frames = 0; 3563 txr->tx_watchdog.wd_timer = 5; 3564 return; 3565 } 3566 3567 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3568 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3569 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3570 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3571 if_printf(ifp, "TX(%d) desc avail = %d, " 3572 "Next TX to Clean = %d\n", 3573 txr->me, txr->tx_avail, txr->next_to_clean); 3574 3575 IFNET_STAT_INC(ifp, oerrors, 1); 3576 sc->watchdog_events++; 3577 3578 igb_init(sc); 3579 for (i = 0; i < sc->tx_ring_inuse; ++i) 3580 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 3581 } 3582 3583 static void 3584 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3585 { 3586 uint32_t eitr = 0; 3587 3588 if (rate > 0) { 3589 if (sc->hw.mac.type == e1000_82575) { 3590 eitr = 1000000000 / 256 / rate; 3591 /* 3592 * NOTE: 3593 * Document is wrong on the 2 bits left shift 3594 */ 3595 } else { 3596 eitr = 1000000 / rate; 3597 eitr <<= IGB_EITR_INTVL_SHIFT; 3598 } 3599 3600 if (eitr == 0) { 3601 /* Don't disable it */ 3602 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3603 } else if (eitr > IGB_EITR_INTVL_MASK) { 3604 /* Don't allow it to be too large */ 3605 eitr = IGB_EITR_INTVL_MASK; 3606 } 3607 } 3608 if (sc->hw.mac.type == e1000_82575) 3609 eitr |= eitr << 16; 3610 else 3611 eitr |= E1000_EITR_CNT_IGNR; 3612 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3613 } 3614 3615 static int 3616 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3617 { 3618 struct igb_softc *sc = (void *)arg1; 3619 struct ifnet *ifp = &sc->arpcom.ac_if; 3620 int error, intr_rate; 3621 3622 intr_rate = sc->intr_rate; 3623 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3624 if (error || req->newptr == NULL) 3625 return error; 3626 if (intr_rate < 0) 3627 return EINVAL; 3628 3629 ifnet_serialize_all(ifp); 3630 3631 sc->intr_rate = intr_rate; 3632 if (ifp->if_flags & IFF_RUNNING) 3633 igb_set_eitr(sc, 0, sc->intr_rate); 3634 3635 if (bootverbose) 3636 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3637 3638 ifnet_deserialize_all(ifp); 3639 3640 return 0; 3641 } 3642 3643 static int 3644 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3645 { 3646 struct igb_msix_data *msix = (void *)arg1; 3647 struct igb_softc *sc = msix->msix_sc; 3648 struct ifnet *ifp = &sc->arpcom.ac_if; 3649 int error, msix_rate; 3650 3651 msix_rate = msix->msix_rate; 3652 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3653 if (error || req->newptr == NULL) 3654 return error; 3655 if (msix_rate < 0) 3656 return EINVAL; 3657 3658 lwkt_serialize_enter(msix->msix_serialize); 3659 3660 msix->msix_rate = msix_rate; 3661 if (ifp->if_flags & IFF_RUNNING) 3662 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3663 3664 if (bootverbose) { 3665 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3666 msix->msix_rate); 3667 } 3668 3669 lwkt_serialize_exit(msix->msix_serialize); 3670 3671 return 0; 3672 } 3673 3674 static int 3675 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3676 { 3677 struct igb_softc *sc = (void *)arg1; 3678 struct ifnet *ifp = &sc->arpcom.ac_if; 3679 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3680 int error, nsegs; 3681 3682 nsegs = txr->intr_nsegs; 3683 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3684 if (error || req->newptr == NULL) 3685 return error; 3686 if (nsegs <= 0) 3687 return EINVAL; 3688 3689 ifnet_serialize_all(ifp); 3690 3691 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3692 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3693 error = EINVAL; 3694 } else { 3695 int i; 3696 3697 error = 0; 3698 for (i = 0; i < sc->tx_ring_cnt; ++i) 3699 sc->tx_rings[i].intr_nsegs = nsegs; 3700 } 3701 3702 ifnet_deserialize_all(ifp); 3703 3704 return error; 3705 } 3706 3707 static int 3708 igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3709 { 3710 struct igb_softc *sc = (void *)arg1; 3711 struct ifnet *ifp = &sc->arpcom.ac_if; 3712 int error, nsegs, i; 3713 3714 nsegs = sc->rx_rings[0].wreg_nsegs; 3715 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3716 if (error || req->newptr == NULL) 3717 return error; 3718 3719 ifnet_serialize_all(ifp); 3720 for (i = 0; i < sc->rx_ring_cnt; ++i) 3721 sc->rx_rings[i].wreg_nsegs =nsegs; 3722 ifnet_deserialize_all(ifp); 3723 3724 return 0; 3725 } 3726 3727 static int 3728 igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3729 { 3730 struct igb_softc *sc = (void *)arg1; 3731 struct ifnet *ifp = &sc->arpcom.ac_if; 3732 int error, nsegs, i; 3733 3734 nsegs = sc->tx_rings[0].wreg_nsegs; 3735 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3736 if (error || req->newptr == NULL) 3737 return error; 3738 3739 ifnet_serialize_all(ifp); 3740 for (i = 0; i < sc->tx_ring_cnt; ++i) 3741 sc->tx_rings[i].wreg_nsegs =nsegs; 3742 ifnet_deserialize_all(ifp); 3743 3744 return 0; 3745 } 3746 3747 #ifdef IFPOLL_ENABLE 3748 3749 static int 3750 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3751 { 3752 struct igb_softc *sc = (void *)arg1; 3753 struct ifnet *ifp = &sc->arpcom.ac_if; 3754 int error, off; 3755 3756 off = sc->rx_npoll_off; 3757 error = sysctl_handle_int(oidp, &off, 0, req); 3758 if (error || req->newptr == NULL) 3759 return error; 3760 if (off < 0) 3761 return EINVAL; 3762 3763 ifnet_serialize_all(ifp); 3764 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3765 error = EINVAL; 3766 } else { 3767 error = 0; 3768 sc->rx_npoll_off = off; 3769 } 3770 ifnet_deserialize_all(ifp); 3771 3772 return error; 3773 } 3774 3775 static int 3776 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3777 { 3778 struct igb_softc *sc = (void *)arg1; 3779 struct ifnet *ifp = &sc->arpcom.ac_if; 3780 int error, off; 3781 3782 off = sc->tx_npoll_off; 3783 error = sysctl_handle_int(oidp, &off, 0, req); 3784 if (error || req->newptr == NULL) 3785 return error; 3786 if (off < 0) 3787 return EINVAL; 3788 3789 ifnet_serialize_all(ifp); 3790 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 3791 error = EINVAL; 3792 } else { 3793 error = 0; 3794 sc->tx_npoll_off = off; 3795 } 3796 ifnet_deserialize_all(ifp); 3797 3798 return error; 3799 } 3800 3801 #endif /* IFPOLL_ENABLE */ 3802 3803 static void 3804 igb_init_intr(struct igb_softc *sc) 3805 { 3806 igb_set_intr_mask(sc); 3807 3808 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3809 igb_init_unshared_intr(sc); 3810 3811 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3812 igb_set_eitr(sc, 0, sc->intr_rate); 3813 } else { 3814 int i; 3815 3816 for (i = 0; i < sc->msix_cnt; ++i) 3817 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3818 } 3819 } 3820 3821 static void 3822 igb_init_unshared_intr(struct igb_softc *sc) 3823 { 3824 struct e1000_hw *hw = &sc->hw; 3825 const struct igb_rx_ring *rxr; 3826 const struct igb_tx_ring *txr; 3827 uint32_t ivar, index; 3828 int i; 3829 3830 /* 3831 * Enable extended mode 3832 */ 3833 if (sc->hw.mac.type != e1000_82575) { 3834 uint32_t gpie; 3835 int ivar_max; 3836 3837 gpie = E1000_GPIE_NSICR; 3838 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3839 gpie |= E1000_GPIE_MSIX_MODE | 3840 E1000_GPIE_EIAME | 3841 E1000_GPIE_PBA; 3842 } 3843 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3844 3845 /* 3846 * Clear IVARs 3847 */ 3848 switch (sc->hw.mac.type) { 3849 case e1000_82576: 3850 ivar_max = IGB_MAX_IVAR_82576; 3851 break; 3852 3853 case e1000_82580: 3854 ivar_max = IGB_MAX_IVAR_82580; 3855 break; 3856 3857 case e1000_i350: 3858 ivar_max = IGB_MAX_IVAR_I350; 3859 break; 3860 3861 case e1000_i354: 3862 ivar_max = IGB_MAX_IVAR_I354; 3863 break; 3864 3865 case e1000_vfadapt: 3866 case e1000_vfadapt_i350: 3867 ivar_max = IGB_MAX_IVAR_VF; 3868 break; 3869 3870 case e1000_i210: 3871 ivar_max = IGB_MAX_IVAR_I210; 3872 break; 3873 3874 case e1000_i211: 3875 ivar_max = IGB_MAX_IVAR_I211; 3876 break; 3877 3878 default: 3879 panic("unknown mac type %d\n", sc->hw.mac.type); 3880 } 3881 for (i = 0; i < ivar_max; ++i) 3882 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3883 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3884 } else { 3885 uint32_t tmp; 3886 3887 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3888 ("82575 w/ MSI-X")); 3889 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3890 tmp |= E1000_CTRL_EXT_IRCA; 3891 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3892 } 3893 3894 /* 3895 * Map TX/RX interrupts to EICR 3896 */ 3897 switch (sc->hw.mac.type) { 3898 case e1000_82580: 3899 case e1000_i350: 3900 case e1000_i354: 3901 case e1000_vfadapt: 3902 case e1000_vfadapt_i350: 3903 case e1000_i210: 3904 case e1000_i211: 3905 /* RX entries */ 3906 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3907 rxr = &sc->rx_rings[i]; 3908 3909 index = i >> 1; 3910 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3911 3912 if (i & 1) { 3913 ivar &= 0xff00ffff; 3914 ivar |= 3915 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3916 } else { 3917 ivar &= 0xffffff00; 3918 ivar |= 3919 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3920 } 3921 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3922 } 3923 /* TX entries */ 3924 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3925 txr = &sc->tx_rings[i]; 3926 3927 index = i >> 1; 3928 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3929 3930 if (i & 1) { 3931 ivar &= 0x00ffffff; 3932 ivar |= 3933 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3934 } else { 3935 ivar &= 0xffff00ff; 3936 ivar |= 3937 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3938 } 3939 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3940 } 3941 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3942 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3943 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3944 } 3945 break; 3946 3947 case e1000_82576: 3948 /* RX entries */ 3949 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3950 rxr = &sc->rx_rings[i]; 3951 3952 index = i & 0x7; /* Each IVAR has two entries */ 3953 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3954 3955 if (i < 8) { 3956 ivar &= 0xffffff00; 3957 ivar |= 3958 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3959 } else { 3960 ivar &= 0xff00ffff; 3961 ivar |= 3962 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3963 } 3964 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3965 } 3966 /* TX entries */ 3967 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3968 txr = &sc->tx_rings[i]; 3969 3970 index = i & 0x7; /* Each IVAR has two entries */ 3971 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3972 3973 if (i < 8) { 3974 ivar &= 0xffff00ff; 3975 ivar |= 3976 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3977 } else { 3978 ivar &= 0x00ffffff; 3979 ivar |= 3980 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3981 } 3982 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3983 } 3984 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3985 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3986 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3987 } 3988 break; 3989 3990 case e1000_82575: 3991 /* 3992 * Enable necessary interrupt bits. 3993 * 3994 * The name of the register is confusing; in addition to 3995 * configuring the first vector of MSI-X, it also configures 3996 * which bits of EICR could be set by the hardware even when 3997 * MSI or line interrupt is used; it thus controls interrupt 3998 * generation. It MUST be configured explicitly; the default 3999 * value mentioned in the datasheet is wrong: RX queue0 and 4000 * TX queue0 are NOT enabled by default. 4001 */ 4002 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 4003 break; 4004 4005 default: 4006 panic("unknown mac type %d\n", sc->hw.mac.type); 4007 } 4008 } 4009 4010 static int 4011 igb_setup_intr(struct igb_softc *sc) 4012 { 4013 int error; 4014 4015 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4016 return igb_msix_setup(sc); 4017 4018 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 4019 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 4020 sc, &sc->intr_tag, &sc->main_serialize); 4021 if (error) { 4022 device_printf(sc->dev, "Failed to register interrupt handler"); 4023 return error; 4024 } 4025 return 0; 4026 } 4027 4028 static void 4029 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 4030 { 4031 if (txr->sc->hw.mac.type == e1000_82575) { 4032 txr->tx_intr_bit = 0; /* unused */ 4033 switch (txr->me) { 4034 case 0: 4035 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 4036 break; 4037 case 1: 4038 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 4039 break; 4040 case 2: 4041 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 4042 break; 4043 case 3: 4044 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 4045 break; 4046 default: 4047 panic("unsupported # of TX ring, %d\n", txr->me); 4048 } 4049 } else { 4050 int intr_bit = *intr_bit0; 4051 4052 txr->tx_intr_bit = intr_bit % intr_bitmax; 4053 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 4054 4055 *intr_bit0 = intr_bit + 1; 4056 } 4057 } 4058 4059 static void 4060 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 4061 { 4062 if (rxr->sc->hw.mac.type == e1000_82575) { 4063 rxr->rx_intr_bit = 0; /* unused */ 4064 switch (rxr->me) { 4065 case 0: 4066 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 4067 break; 4068 case 1: 4069 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 4070 break; 4071 case 2: 4072 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 4073 break; 4074 case 3: 4075 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 4076 break; 4077 default: 4078 panic("unsupported # of RX ring, %d\n", rxr->me); 4079 } 4080 } else { 4081 int intr_bit = *intr_bit0; 4082 4083 rxr->rx_intr_bit = intr_bit % intr_bitmax; 4084 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 4085 4086 *intr_bit0 = intr_bit + 1; 4087 } 4088 } 4089 4090 static void 4091 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4092 { 4093 struct igb_softc *sc = ifp->if_softc; 4094 4095 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, slz); 4096 } 4097 4098 static void 4099 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4100 { 4101 struct igb_softc *sc = ifp->if_softc; 4102 4103 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, slz); 4104 } 4105 4106 static int 4107 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4108 { 4109 struct igb_softc *sc = ifp->if_softc; 4110 4111 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 4112 slz); 4113 } 4114 4115 #ifdef INVARIANTS 4116 4117 static void 4118 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4119 boolean_t serialized) 4120 { 4121 struct igb_softc *sc = ifp->if_softc; 4122 4123 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 4124 slz, serialized); 4125 } 4126 4127 #endif /* INVARIANTS */ 4128 4129 static void 4130 igb_set_intr_mask(struct igb_softc *sc) 4131 { 4132 int i; 4133 4134 sc->intr_mask = sc->sts_intr_mask; 4135 for (i = 0; i < sc->rx_ring_inuse; ++i) 4136 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 4137 for (i = 0; i < sc->tx_ring_inuse; ++i) 4138 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 4139 if (bootverbose) { 4140 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 4141 sc->intr_mask); 4142 } 4143 } 4144 4145 static int 4146 igb_alloc_intr(struct igb_softc *sc) 4147 { 4148 int i, intr_bit, intr_bitmax; 4149 u_int intr_flags; 4150 4151 igb_msix_try_alloc(sc); 4152 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4153 goto done; 4154 4155 /* 4156 * Allocate MSI/legacy interrupt resource 4157 */ 4158 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 4159 &sc->intr_rid, &intr_flags); 4160 4161 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 4162 int unshared; 4163 4164 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 4165 if (!unshared) { 4166 sc->flags |= IGB_FLAG_SHARED_INTR; 4167 if (bootverbose) 4168 device_printf(sc->dev, "IRQ shared\n"); 4169 } else { 4170 intr_flags &= ~RF_SHAREABLE; 4171 if (bootverbose) 4172 device_printf(sc->dev, "IRQ unshared\n"); 4173 } 4174 } 4175 4176 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4177 &sc->intr_rid, intr_flags); 4178 if (sc->intr_res == NULL) { 4179 device_printf(sc->dev, "Unable to allocate bus resource: " 4180 "interrupt\n"); 4181 return ENXIO; 4182 } 4183 4184 for (i = 0; i < sc->tx_ring_cnt; ++i) 4185 sc->tx_rings[i].tx_intr_cpuid = rman_get_cpuid(sc->intr_res); 4186 4187 /* 4188 * Setup MSI/legacy interrupt mask 4189 */ 4190 switch (sc->hw.mac.type) { 4191 case e1000_82575: 4192 intr_bitmax = IGB_MAX_TXRXINT_82575; 4193 break; 4194 4195 case e1000_82576: 4196 intr_bitmax = IGB_MAX_TXRXINT_82576; 4197 break; 4198 4199 case e1000_82580: 4200 intr_bitmax = IGB_MAX_TXRXINT_82580; 4201 break; 4202 4203 case e1000_i350: 4204 intr_bitmax = IGB_MAX_TXRXINT_I350; 4205 break; 4206 4207 case e1000_i354: 4208 intr_bitmax = IGB_MAX_TXRXINT_I354; 4209 break; 4210 4211 case e1000_i210: 4212 intr_bitmax = IGB_MAX_TXRXINT_I210; 4213 break; 4214 4215 case e1000_i211: 4216 intr_bitmax = IGB_MAX_TXRXINT_I211; 4217 break; 4218 4219 default: 4220 intr_bitmax = IGB_MIN_TXRXINT; 4221 break; 4222 } 4223 intr_bit = 0; 4224 for (i = 0; i < sc->tx_ring_cnt; ++i) 4225 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 4226 for (i = 0; i < sc->rx_ring_cnt; ++i) 4227 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 4228 sc->sts_intr_bit = 0; 4229 sc->sts_intr_mask = E1000_EICR_OTHER; 4230 4231 /* Initialize interrupt rate */ 4232 sc->intr_rate = IGB_INTR_RATE; 4233 done: 4234 igb_set_ring_inuse(sc, FALSE); 4235 igb_set_intr_mask(sc); 4236 return 0; 4237 } 4238 4239 static void 4240 igb_free_intr(struct igb_softc *sc) 4241 { 4242 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4243 if (sc->intr_res != NULL) { 4244 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 4245 sc->intr_res); 4246 } 4247 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4248 pci_release_msi(sc->dev); 4249 } else { 4250 igb_msix_free(sc, TRUE); 4251 } 4252 } 4253 4254 static void 4255 igb_teardown_intr(struct igb_softc *sc) 4256 { 4257 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4258 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 4259 else 4260 igb_msix_teardown(sc, sc->msix_cnt); 4261 } 4262 4263 static void 4264 igb_msix_try_alloc(struct igb_softc *sc) 4265 { 4266 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4267 int i, x, error; 4268 int offset, offset_def; 4269 struct igb_msix_data *msix; 4270 boolean_t aggregate, setup = FALSE; 4271 4272 /* 4273 * Don't enable MSI-X on 82575, see: 4274 * 82575 specification update errata #25 4275 */ 4276 if (sc->hw.mac.type == e1000_82575) 4277 return; 4278 4279 /* Don't enable MSI-X on VF */ 4280 if (sc->vf_ifp) 4281 return; 4282 4283 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4284 igb_msix_enable); 4285 if (!msix_enable) 4286 return; 4287 4288 msix_cnt = pci_msix_count(sc->dev); 4289 #ifdef IGB_MSIX_DEBUG 4290 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4291 #endif 4292 if (msix_cnt <= 1) { 4293 /* One MSI-X model does not make sense */ 4294 return; 4295 } 4296 4297 i = 0; 4298 while ((1 << (i + 1)) <= msix_cnt) 4299 ++i; 4300 msix_cnt2 = 1 << i; 4301 4302 if (bootverbose) { 4303 device_printf(sc->dev, "MSI-X count %d/%d\n", 4304 msix_cnt2, msix_cnt); 4305 } 4306 4307 KKASSERT(msix_cnt2 <= msix_cnt); 4308 if (msix_cnt == msix_cnt2) { 4309 /* We need at least one MSI-X for link status */ 4310 msix_cnt2 >>= 1; 4311 if (msix_cnt2 <= 1) { 4312 /* One MSI-X for RX/TX does not make sense */ 4313 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4314 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4315 return; 4316 } 4317 KKASSERT(msix_cnt > msix_cnt2); 4318 4319 if (bootverbose) { 4320 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4321 msix_cnt2, msix_cnt); 4322 } 4323 } 4324 4325 sc->rx_ring_msix = sc->rx_ring_cnt; 4326 if (sc->rx_ring_msix > msix_cnt2) 4327 sc->rx_ring_msix = msix_cnt2; 4328 4329 sc->tx_ring_msix = sc->tx_ring_cnt; 4330 if (sc->tx_ring_msix > msix_cnt2) 4331 sc->tx_ring_msix = msix_cnt2; 4332 4333 if (msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4334 /* 4335 * Independent TX/RX MSI-X 4336 */ 4337 aggregate = FALSE; 4338 if (bootverbose) 4339 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4340 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4341 } else { 4342 /* 4343 * Aggregate TX/RX MSI-X 4344 */ 4345 aggregate = TRUE; 4346 if (bootverbose) 4347 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4348 alloc_cnt = msix_cnt2; 4349 if (alloc_cnt > ncpus2) 4350 alloc_cnt = ncpus2; 4351 if (sc->rx_ring_msix > alloc_cnt) 4352 sc->rx_ring_msix = alloc_cnt; 4353 if (sc->tx_ring_msix > alloc_cnt) 4354 sc->tx_ring_msix = alloc_cnt; 4355 } 4356 ++alloc_cnt; /* For link status */ 4357 4358 if (bootverbose) { 4359 device_printf(sc->dev, "MSI-X alloc %d, " 4360 "RX ring %d, TX ring %d\n", alloc_cnt, 4361 sc->rx_ring_msix, sc->tx_ring_msix); 4362 } 4363 4364 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4365 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4366 &sc->msix_mem_rid, RF_ACTIVE); 4367 if (sc->msix_mem_res == NULL) { 4368 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR_ALT); 4369 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4370 &sc->msix_mem_rid, RF_ACTIVE); 4371 if (sc->msix_mem_res == NULL) { 4372 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4373 return; 4374 } 4375 } 4376 4377 sc->msix_cnt = alloc_cnt; 4378 sc->msix_data = kmalloc_cachealign( 4379 sizeof(struct igb_msix_data) * sc->msix_cnt, 4380 M_DEVBUF, M_WAITOK | M_ZERO); 4381 for (x = 0; x < sc->msix_cnt; ++x) { 4382 msix = &sc->msix_data[x]; 4383 4384 lwkt_serialize_init(&msix->msix_serialize0); 4385 msix->msix_sc = sc; 4386 msix->msix_rid = -1; 4387 msix->msix_vector = x; 4388 msix->msix_mask = 1 << msix->msix_vector; 4389 msix->msix_rate = IGB_INTR_RATE; 4390 } 4391 4392 x = 0; 4393 if (!aggregate) { 4394 /* 4395 * RX rings 4396 */ 4397 if (sc->rx_ring_msix == ncpus2) { 4398 offset = 0; 4399 } else { 4400 offset_def = (sc->rx_ring_msix * 4401 device_get_unit(sc->dev)) % ncpus2; 4402 4403 offset = device_getenv_int(sc->dev, 4404 "msix.rxoff", offset_def); 4405 if (offset >= ncpus2 || 4406 offset % sc->rx_ring_msix != 0) { 4407 device_printf(sc->dev, 4408 "invalid msix.rxoff %d, use %d\n", 4409 offset, offset_def); 4410 offset = offset_def; 4411 } 4412 } 4413 igb_msix_rx_conf(sc, 0, &x, offset); 4414 4415 /* 4416 * TX rings 4417 */ 4418 if (sc->tx_ring_msix == ncpus2) { 4419 offset = 0; 4420 } else { 4421 offset_def = (sc->tx_ring_msix * 4422 device_get_unit(sc->dev)) % ncpus2; 4423 4424 offset = device_getenv_int(sc->dev, 4425 "msix.txoff", offset_def); 4426 if (offset >= ncpus2 || 4427 offset % sc->tx_ring_msix != 0) { 4428 device_printf(sc->dev, 4429 "invalid msix.txoff %d, use %d\n", 4430 offset, offset_def); 4431 offset = offset_def; 4432 } 4433 } 4434 igb_msix_tx_conf(sc, 0, &x, offset); 4435 } else { 4436 int ring_agg, ring_max; 4437 4438 ring_agg = sc->rx_ring_msix; 4439 if (ring_agg > sc->tx_ring_msix) 4440 ring_agg = sc->tx_ring_msix; 4441 4442 ring_max = sc->rx_ring_msix; 4443 if (ring_max < sc->tx_ring_msix) 4444 ring_max = sc->tx_ring_msix; 4445 4446 if (ring_max == ncpus2) { 4447 offset = 0; 4448 } else { 4449 offset_def = (ring_max * device_get_unit(sc->dev)) % 4450 ncpus2; 4451 4452 offset = device_getenv_int(sc->dev, "msix.off", 4453 offset_def); 4454 if (offset >= ncpus2 || offset % ring_max != 0) { 4455 device_printf(sc->dev, 4456 "invalid msix.off %d, use %d\n", 4457 offset, offset_def); 4458 offset = offset_def; 4459 } 4460 } 4461 4462 for (i = 0; i < ring_agg; ++i) { 4463 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4464 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4465 4466 KKASSERT(x < sc->msix_cnt); 4467 msix = &sc->msix_data[x++]; 4468 4469 txr->tx_intr_bit = msix->msix_vector; 4470 txr->tx_intr_mask = msix->msix_mask; 4471 rxr->rx_intr_bit = msix->msix_vector; 4472 rxr->rx_intr_mask = msix->msix_mask; 4473 4474 msix->msix_serialize = &msix->msix_serialize0; 4475 msix->msix_func = igb_msix_rxtx; 4476 msix->msix_arg = msix; 4477 msix->msix_rx = rxr; 4478 msix->msix_tx = txr; 4479 4480 msix->msix_cpuid = i + offset; 4481 KKASSERT(msix->msix_cpuid < ncpus2); 4482 txr->tx_intr_cpuid = msix->msix_cpuid; 4483 4484 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4485 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4486 msix->msix_rate = IGB_MSIX_RX_RATE; 4487 ksnprintf(msix->msix_rate_desc, 4488 sizeof(msix->msix_rate_desc), 4489 "RXTX%d interrupt rate", i); 4490 } 4491 4492 if (ring_agg != ring_max) { 4493 if (ring_max == sc->tx_ring_msix) 4494 igb_msix_tx_conf(sc, i, &x, offset); 4495 else 4496 igb_msix_rx_conf(sc, i, &x, offset); 4497 } 4498 } 4499 4500 /* 4501 * Link status 4502 */ 4503 KKASSERT(x < sc->msix_cnt); 4504 msix = &sc->msix_data[x++]; 4505 sc->sts_intr_bit = msix->msix_vector; 4506 sc->sts_intr_mask = msix->msix_mask; 4507 4508 msix->msix_serialize = &sc->main_serialize; 4509 msix->msix_func = igb_msix_status; 4510 msix->msix_arg = sc; 4511 msix->msix_cpuid = 0; 4512 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4513 device_get_nameunit(sc->dev)); 4514 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4515 "status interrupt rate"); 4516 4517 KKASSERT(x == sc->msix_cnt); 4518 4519 error = pci_setup_msix(sc->dev); 4520 if (error) { 4521 device_printf(sc->dev, "Setup MSI-X failed\n"); 4522 goto back; 4523 } 4524 setup = TRUE; 4525 4526 for (i = 0; i < sc->msix_cnt; ++i) { 4527 msix = &sc->msix_data[i]; 4528 4529 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4530 &msix->msix_rid, msix->msix_cpuid); 4531 if (error) { 4532 device_printf(sc->dev, 4533 "Unable to allocate MSI-X %d on cpu%d\n", 4534 msix->msix_vector, msix->msix_cpuid); 4535 goto back; 4536 } 4537 4538 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4539 &msix->msix_rid, RF_ACTIVE); 4540 if (msix->msix_res == NULL) { 4541 device_printf(sc->dev, 4542 "Unable to allocate MSI-X %d resource\n", 4543 msix->msix_vector); 4544 error = ENOMEM; 4545 goto back; 4546 } 4547 } 4548 4549 pci_enable_msix(sc->dev); 4550 sc->intr_type = PCI_INTR_TYPE_MSIX; 4551 back: 4552 if (error) 4553 igb_msix_free(sc, setup); 4554 } 4555 4556 static void 4557 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4558 { 4559 int i; 4560 4561 KKASSERT(sc->msix_cnt > 1); 4562 4563 for (i = 0; i < sc->msix_cnt; ++i) { 4564 struct igb_msix_data *msix = &sc->msix_data[i]; 4565 4566 if (msix->msix_res != NULL) { 4567 bus_release_resource(sc->dev, SYS_RES_IRQ, 4568 msix->msix_rid, msix->msix_res); 4569 } 4570 if (msix->msix_rid >= 0) 4571 pci_release_msix_vector(sc->dev, msix->msix_rid); 4572 } 4573 if (setup) 4574 pci_teardown_msix(sc->dev); 4575 4576 sc->msix_cnt = 0; 4577 kfree(sc->msix_data, M_DEVBUF); 4578 sc->msix_data = NULL; 4579 } 4580 4581 static int 4582 igb_msix_setup(struct igb_softc *sc) 4583 { 4584 int i; 4585 4586 for (i = 0; i < sc->msix_cnt; ++i) { 4587 struct igb_msix_data *msix = &sc->msix_data[i]; 4588 int error; 4589 4590 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4591 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4592 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4593 if (error) { 4594 device_printf(sc->dev, "could not set up %s " 4595 "interrupt handler.\n", msix->msix_desc); 4596 igb_msix_teardown(sc, i); 4597 return error; 4598 } 4599 } 4600 return 0; 4601 } 4602 4603 static void 4604 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4605 { 4606 int i; 4607 4608 for (i = 0; i < msix_cnt; ++i) { 4609 struct igb_msix_data *msix = &sc->msix_data[i]; 4610 4611 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4612 } 4613 } 4614 4615 static void 4616 igb_msix_rx(void *arg) 4617 { 4618 struct igb_rx_ring *rxr = arg; 4619 4620 ASSERT_SERIALIZED(&rxr->rx_serialize); 4621 igb_rxeof(rxr, -1); 4622 4623 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4624 } 4625 4626 static void 4627 igb_msix_tx(void *arg) 4628 { 4629 struct igb_tx_ring *txr = arg; 4630 4631 ASSERT_SERIALIZED(&txr->tx_serialize); 4632 4633 igb_txeof(txr); 4634 if (!ifsq_is_empty(txr->ifsq)) 4635 ifsq_devstart(txr->ifsq); 4636 4637 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4638 } 4639 4640 static void 4641 igb_msix_status(void *arg) 4642 { 4643 struct igb_softc *sc = arg; 4644 uint32_t icr; 4645 4646 ASSERT_SERIALIZED(&sc->main_serialize); 4647 4648 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4649 if (icr & E1000_ICR_LSC) { 4650 sc->hw.mac.get_link_status = 1; 4651 igb_update_link_status(sc); 4652 } 4653 4654 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4655 } 4656 4657 static void 4658 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4659 { 4660 sc->rx_ring_inuse = igb_get_rxring_inuse(sc, polling); 4661 sc->tx_ring_inuse = igb_get_txring_inuse(sc, polling); 4662 if (bootverbose) { 4663 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d, TX rings %d/%d\n", 4664 sc->rx_ring_inuse, sc->rx_ring_cnt, 4665 sc->tx_ring_inuse, sc->tx_ring_cnt); 4666 } 4667 } 4668 4669 static int 4670 igb_get_rxring_inuse(const struct igb_softc *sc, boolean_t polling) 4671 { 4672 if (!IGB_ENABLE_HWRSS(sc)) 4673 return 1; 4674 4675 if (polling) 4676 return sc->rx_ring_cnt; 4677 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4678 return IGB_MIN_RING_RSS; 4679 else 4680 return sc->rx_ring_msix; 4681 } 4682 4683 static int 4684 igb_get_txring_inuse(const struct igb_softc *sc, boolean_t polling) 4685 { 4686 if (!IGB_ENABLE_HWTSS(sc)) 4687 return 1; 4688 4689 if (polling) 4690 return sc->tx_ring_cnt; 4691 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4692 return IGB_MIN_RING; 4693 else 4694 return sc->tx_ring_msix; 4695 } 4696 4697 static int 4698 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4699 { 4700 int hoff, iphlen, thoff; 4701 struct mbuf *m; 4702 4703 m = *mp; 4704 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4705 4706 iphlen = m->m_pkthdr.csum_iphlen; 4707 thoff = m->m_pkthdr.csum_thlen; 4708 hoff = m->m_pkthdr.csum_lhlen; 4709 4710 KASSERT(iphlen > 0, ("invalid ip hlen")); 4711 KASSERT(thoff > 0, ("invalid tcp hlen")); 4712 KASSERT(hoff > 0, ("invalid ether hlen")); 4713 4714 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4715 m = m_pullup(m, hoff + iphlen + thoff); 4716 if (m == NULL) { 4717 *mp = NULL; 4718 return ENOBUFS; 4719 } 4720 *mp = m; 4721 } 4722 if (txr->tx_flags & IGB_TXFLAG_TSO_IPLEN0) { 4723 struct ip *ip; 4724 4725 ip = mtodoff(m, struct ip *, hoff); 4726 ip->ip_len = 0; 4727 } 4728 4729 return 0; 4730 } 4731 4732 static void 4733 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4734 { 4735 struct e1000_adv_tx_context_desc *TXD; 4736 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4737 int hoff, ctxd, iphlen, thoff; 4738 4739 iphlen = m->m_pkthdr.csum_iphlen; 4740 thoff = m->m_pkthdr.csum_thlen; 4741 hoff = m->m_pkthdr.csum_lhlen; 4742 4743 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4744 4745 ctxd = txr->next_avail_desc; 4746 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4747 4748 if (m->m_flags & M_VLANTAG) { 4749 uint16_t vlantag; 4750 4751 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4752 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4753 } 4754 4755 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4756 vlan_macip_lens |= iphlen; 4757 4758 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4759 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4760 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4761 4762 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4763 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4764 4765 /* 4766 * 82575 needs the TX context index added; the queue 4767 * index is used as TX context index here. 4768 */ 4769 if (txr->sc->hw.mac.type == e1000_82575) 4770 mss_l4len_idx |= txr->me << 4; 4771 4772 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4773 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4774 TXD->seqnum_seed = htole32(0); 4775 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4776 4777 /* We've consumed the first desc, adjust counters */ 4778 if (++ctxd == txr->num_tx_desc) 4779 ctxd = 0; 4780 txr->next_avail_desc = ctxd; 4781 --txr->tx_avail; 4782 4783 *hlen = hoff + iphlen + thoff; 4784 } 4785 4786 static void 4787 igb_setup_serializer(struct igb_softc *sc) 4788 { 4789 const struct igb_msix_data *msix; 4790 int i, j; 4791 4792 /* 4793 * Allocate serializer array 4794 */ 4795 4796 /* Main + TX + RX */ 4797 sc->serialize_cnt = 1 + sc->tx_ring_cnt + sc->rx_ring_cnt; 4798 4799 /* Aggregate TX/RX MSI-X */ 4800 for (i = 0; i < sc->msix_cnt; ++i) { 4801 msix = &sc->msix_data[i]; 4802 if (msix->msix_serialize == &msix->msix_serialize0) 4803 sc->serialize_cnt++; 4804 } 4805 4806 sc->serializes = 4807 kmalloc(sc->serialize_cnt * sizeof(struct lwkt_serialize *), 4808 M_DEVBUF, M_WAITOK | M_ZERO); 4809 4810 /* 4811 * Setup serializers 4812 * 4813 * NOTE: Order is critical 4814 */ 4815 4816 i = 0; 4817 4818 KKASSERT(i < sc->serialize_cnt); 4819 sc->serializes[i++] = &sc->main_serialize; 4820 4821 for (j = 0; j < sc->msix_cnt; ++j) { 4822 msix = &sc->msix_data[j]; 4823 if (msix->msix_serialize == &msix->msix_serialize0) { 4824 KKASSERT(i < sc->serialize_cnt); 4825 sc->serializes[i++] = msix->msix_serialize; 4826 } 4827 } 4828 4829 for (j = 0; j < sc->tx_ring_cnt; ++j) { 4830 KKASSERT(i < sc->serialize_cnt); 4831 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 4832 } 4833 4834 for (j = 0; j < sc->rx_ring_cnt; ++j) { 4835 KKASSERT(i < sc->serialize_cnt); 4836 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 4837 } 4838 4839 KKASSERT(i == sc->serialize_cnt); 4840 } 4841 4842 static void 4843 igb_msix_rx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4844 { 4845 int x = *x0; 4846 4847 for (; i < sc->rx_ring_msix; ++i) { 4848 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4849 struct igb_msix_data *msix; 4850 4851 KKASSERT(x < sc->msix_cnt); 4852 msix = &sc->msix_data[x++]; 4853 4854 rxr->rx_intr_bit = msix->msix_vector; 4855 rxr->rx_intr_mask = msix->msix_mask; 4856 4857 msix->msix_serialize = &rxr->rx_serialize; 4858 msix->msix_func = igb_msix_rx; 4859 msix->msix_arg = rxr; 4860 4861 msix->msix_cpuid = i + offset; 4862 KKASSERT(msix->msix_cpuid < ncpus2); 4863 4864 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s rx%d", 4865 device_get_nameunit(sc->dev), i); 4866 4867 msix->msix_rate = IGB_MSIX_RX_RATE; 4868 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4869 "RX%d interrupt rate", i); 4870 } 4871 *x0 = x; 4872 } 4873 4874 static void 4875 igb_msix_tx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4876 { 4877 int x = *x0; 4878 4879 for (; i < sc->tx_ring_msix; ++i) { 4880 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4881 struct igb_msix_data *msix; 4882 4883 KKASSERT(x < sc->msix_cnt); 4884 msix = &sc->msix_data[x++]; 4885 4886 txr->tx_intr_bit = msix->msix_vector; 4887 txr->tx_intr_mask = msix->msix_mask; 4888 4889 msix->msix_serialize = &txr->tx_serialize; 4890 msix->msix_func = igb_msix_tx; 4891 msix->msix_arg = txr; 4892 4893 msix->msix_cpuid = i + offset; 4894 KKASSERT(msix->msix_cpuid < ncpus2); 4895 txr->tx_intr_cpuid = msix->msix_cpuid; 4896 4897 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s tx%d", 4898 device_get_nameunit(sc->dev), i); 4899 4900 msix->msix_rate = IGB_MSIX_TX_RATE; 4901 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4902 "TX%d interrupt rate", i); 4903 } 4904 *x0 = x; 4905 } 4906 4907 static void 4908 igb_msix_rxtx(void *arg) 4909 { 4910 struct igb_msix_data *msix = arg; 4911 struct igb_rx_ring *rxr = msix->msix_rx; 4912 struct igb_tx_ring *txr = msix->msix_tx; 4913 4914 ASSERT_SERIALIZED(&msix->msix_serialize0); 4915 4916 lwkt_serialize_enter(&rxr->rx_serialize); 4917 igb_rxeof(rxr, -1); 4918 lwkt_serialize_exit(&rxr->rx_serialize); 4919 4920 lwkt_serialize_enter(&txr->tx_serialize); 4921 igb_txeof(txr); 4922 if (!ifsq_is_empty(txr->ifsq)) 4923 ifsq_devstart(txr->ifsq); 4924 lwkt_serialize_exit(&txr->tx_serialize); 4925 4926 E1000_WRITE_REG(&msix->msix_sc->hw, E1000_EIMS, msix->msix_mask); 4927 } 4928 4929 static void 4930 igb_set_timer_cpuid(struct igb_softc *sc, boolean_t polling) 4931 { 4932 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4933 sc->timer_cpuid = 0; /* XXX fixed */ 4934 else 4935 sc->timer_cpuid = rman_get_cpuid(sc->intr_res); 4936 } 4937