1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 68 #include <bus/pci/pcivar.h> 69 #include <bus/pci/pcireg.h> 70 71 #include <dev/netif/ig_hal/e1000_api.h> 72 #include <dev/netif/ig_hal/e1000_82575.h> 73 #include <dev/netif/igb/if_igb.h> 74 75 #ifdef IGB_RSS_DEBUG 76 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 77 do { \ 78 if (sc->rss_debug >= lvl) \ 79 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 80 } while (0) 81 #else /* !IGB_RSS_DEBUG */ 82 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 83 #endif /* IGB_RSS_DEBUG */ 84 85 #define IGB_NAME "Intel(R) PRO/1000 " 86 #define IGB_DEVICE(id) \ 87 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 88 #define IGB_DEVICE_NULL { 0, 0, NULL } 89 90 static struct igb_device { 91 uint16_t vid; 92 uint16_t did; 93 const char *desc; 94 } igb_devices[] = { 95 IGB_DEVICE(82575EB_COPPER), 96 IGB_DEVICE(82575EB_FIBER_SERDES), 97 IGB_DEVICE(82575GB_QUAD_COPPER), 98 IGB_DEVICE(82576), 99 IGB_DEVICE(82576_NS), 100 IGB_DEVICE(82576_NS_SERDES), 101 IGB_DEVICE(82576_FIBER), 102 IGB_DEVICE(82576_SERDES), 103 IGB_DEVICE(82576_SERDES_QUAD), 104 IGB_DEVICE(82576_QUAD_COPPER), 105 IGB_DEVICE(82576_QUAD_COPPER_ET2), 106 IGB_DEVICE(82576_VF), 107 IGB_DEVICE(82580_COPPER), 108 IGB_DEVICE(82580_FIBER), 109 IGB_DEVICE(82580_SERDES), 110 IGB_DEVICE(82580_SGMII), 111 IGB_DEVICE(82580_COPPER_DUAL), 112 IGB_DEVICE(82580_QUAD_FIBER), 113 IGB_DEVICE(DH89XXCC_SERDES), 114 IGB_DEVICE(DH89XXCC_SGMII), 115 IGB_DEVICE(DH89XXCC_SFP), 116 IGB_DEVICE(DH89XXCC_BACKPLANE), 117 IGB_DEVICE(I350_COPPER), 118 IGB_DEVICE(I350_FIBER), 119 IGB_DEVICE(I350_SERDES), 120 IGB_DEVICE(I350_SGMII), 121 IGB_DEVICE(I350_VF), 122 IGB_DEVICE(I210_COPPER), 123 IGB_DEVICE(I210_COPPER_IT), 124 IGB_DEVICE(I210_COPPER_OEM1), 125 IGB_DEVICE(I210_COPPER_FLASHLESS), 126 IGB_DEVICE(I210_SERDES_FLASHLESS), 127 IGB_DEVICE(I210_FIBER), 128 IGB_DEVICE(I210_SERDES), 129 IGB_DEVICE(I210_SGMII), 130 IGB_DEVICE(I211_COPPER), 131 IGB_DEVICE(I354_BACKPLANE_1GBPS), 132 IGB_DEVICE(I354_SGMII), 133 134 /* required last entry */ 135 IGB_DEVICE_NULL 136 }; 137 138 static int igb_probe(device_t); 139 static int igb_attach(device_t); 140 static int igb_detach(device_t); 141 static int igb_shutdown(device_t); 142 static int igb_suspend(device_t); 143 static int igb_resume(device_t); 144 145 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 146 static void igb_setup_ifp(struct igb_softc *); 147 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 148 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 149 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 150 static void igb_add_sysctl(struct igb_softc *); 151 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 152 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 153 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 154 static int igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 155 static int igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 156 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 157 static int igb_get_rxring_inuse(const struct igb_softc *, boolean_t); 158 static int igb_get_txring_inuse(const struct igb_softc *, boolean_t); 159 static void igb_set_timer_cpuid(struct igb_softc *, boolean_t); 160 #ifdef IFPOLL_ENABLE 161 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 162 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 163 #endif 164 165 static void igb_vf_init_stats(struct igb_softc *); 166 static void igb_reset(struct igb_softc *); 167 static void igb_update_stats_counters(struct igb_softc *); 168 static void igb_update_vf_stats_counters(struct igb_softc *); 169 static void igb_update_link_status(struct igb_softc *); 170 static void igb_init_tx_unit(struct igb_softc *); 171 static void igb_init_rx_unit(struct igb_softc *); 172 173 static void igb_set_vlan(struct igb_softc *); 174 static void igb_set_multi(struct igb_softc *); 175 static void igb_set_promisc(struct igb_softc *); 176 static void igb_disable_promisc(struct igb_softc *); 177 178 static int igb_alloc_rings(struct igb_softc *); 179 static void igb_free_rings(struct igb_softc *); 180 static int igb_create_tx_ring(struct igb_tx_ring *); 181 static int igb_create_rx_ring(struct igb_rx_ring *); 182 static void igb_free_tx_ring(struct igb_tx_ring *); 183 static void igb_free_rx_ring(struct igb_rx_ring *); 184 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 185 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 186 static void igb_init_tx_ring(struct igb_tx_ring *); 187 static int igb_init_rx_ring(struct igb_rx_ring *); 188 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 189 static int igb_encap(struct igb_tx_ring *, struct mbuf **, int *, int *); 190 static void igb_rx_refresh(struct igb_rx_ring *, int); 191 static void igb_setup_serializer(struct igb_softc *); 192 193 static void igb_stop(struct igb_softc *); 194 static void igb_init(void *); 195 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 196 static void igb_media_status(struct ifnet *, struct ifmediareq *); 197 static int igb_media_change(struct ifnet *); 198 static void igb_timer(void *); 199 static void igb_watchdog(struct ifaltq_subque *); 200 static void igb_start(struct ifnet *, struct ifaltq_subque *); 201 #ifdef IFPOLL_ENABLE 202 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 203 static void igb_npoll_rx(struct ifnet *, void *, int); 204 static void igb_npoll_tx(struct ifnet *, void *, int); 205 static void igb_npoll_status(struct ifnet *); 206 #endif 207 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 208 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 209 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 210 #ifdef INVARIANTS 211 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 212 boolean_t); 213 #endif 214 215 static void igb_intr(void *); 216 static void igb_intr_shared(void *); 217 static void igb_rxeof(struct igb_rx_ring *, int); 218 static void igb_txeof(struct igb_tx_ring *); 219 static void igb_set_eitr(struct igb_softc *, int, int); 220 static void igb_enable_intr(struct igb_softc *); 221 static void igb_disable_intr(struct igb_softc *); 222 static void igb_init_unshared_intr(struct igb_softc *); 223 static void igb_init_intr(struct igb_softc *); 224 static int igb_setup_intr(struct igb_softc *); 225 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 226 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 227 static void igb_set_intr_mask(struct igb_softc *); 228 static int igb_alloc_intr(struct igb_softc *); 229 static void igb_free_intr(struct igb_softc *); 230 static void igb_teardown_intr(struct igb_softc *); 231 static void igb_msix_try_alloc(struct igb_softc *); 232 static void igb_msix_rx_conf(struct igb_softc *, int, int *, int); 233 static void igb_msix_tx_conf(struct igb_softc *, int, int *, int); 234 static void igb_msix_free(struct igb_softc *, boolean_t); 235 static int igb_msix_setup(struct igb_softc *); 236 static void igb_msix_teardown(struct igb_softc *, int); 237 static void igb_msix_rx(void *); 238 static void igb_msix_tx(void *); 239 static void igb_msix_status(void *); 240 static void igb_msix_rxtx(void *); 241 242 /* Management and WOL Support */ 243 static void igb_get_mgmt(struct igb_softc *); 244 static void igb_rel_mgmt(struct igb_softc *); 245 static void igb_get_hw_control(struct igb_softc *); 246 static void igb_rel_hw_control(struct igb_softc *); 247 static void igb_enable_wol(device_t); 248 249 static device_method_t igb_methods[] = { 250 /* Device interface */ 251 DEVMETHOD(device_probe, igb_probe), 252 DEVMETHOD(device_attach, igb_attach), 253 DEVMETHOD(device_detach, igb_detach), 254 DEVMETHOD(device_shutdown, igb_shutdown), 255 DEVMETHOD(device_suspend, igb_suspend), 256 DEVMETHOD(device_resume, igb_resume), 257 DEVMETHOD_END 258 }; 259 260 static driver_t igb_driver = { 261 "igb", 262 igb_methods, 263 sizeof(struct igb_softc), 264 }; 265 266 static devclass_t igb_devclass; 267 268 DECLARE_DUMMY_MODULE(if_igb); 269 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 270 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 271 272 static int igb_rxd = IGB_DEFAULT_RXD; 273 static int igb_txd = IGB_DEFAULT_TXD; 274 static int igb_rxr = 0; 275 static int igb_txr = 0; 276 static int igb_msi_enable = 1; 277 static int igb_msix_enable = 1; 278 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 279 static int igb_fc_setting = e1000_fc_full; 280 281 /* 282 * DMA Coalescing, only for i350 - default to off, 283 * this feature is for power savings 284 */ 285 static int igb_dma_coalesce = 0; 286 287 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 288 TUNABLE_INT("hw.igb.txd", &igb_txd); 289 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 290 TUNABLE_INT("hw.igb.txr", &igb_txr); 291 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 292 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 293 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 294 295 /* i350 specific */ 296 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 297 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 298 299 static __inline void 300 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 301 { 302 /* Ignore Checksum bit is set */ 303 if (staterr & E1000_RXD_STAT_IXSM) 304 return; 305 306 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 307 E1000_RXD_STAT_IPCS) 308 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 309 310 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 311 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 312 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 313 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 314 mp->m_pkthdr.csum_data = htons(0xffff); 315 } 316 } 317 } 318 319 static __inline struct pktinfo * 320 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 321 uint32_t hash, uint32_t hashtype, uint32_t staterr) 322 { 323 switch (hashtype) { 324 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 325 pi->pi_netisr = NETISR_IP; 326 pi->pi_flags = 0; 327 pi->pi_l3proto = IPPROTO_TCP; 328 break; 329 330 case E1000_RXDADV_RSSTYPE_IPV4: 331 if (staterr & E1000_RXD_STAT_IXSM) 332 return NULL; 333 334 if ((staterr & 335 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 336 E1000_RXD_STAT_TCPCS) { 337 pi->pi_netisr = NETISR_IP; 338 pi->pi_flags = 0; 339 pi->pi_l3proto = IPPROTO_UDP; 340 break; 341 } 342 /* FALL THROUGH */ 343 default: 344 return NULL; 345 } 346 347 m->m_flags |= M_HASH; 348 m->m_pkthdr.hash = toeplitz_hash(hash); 349 return pi; 350 } 351 352 static int 353 igb_probe(device_t dev) 354 { 355 const struct igb_device *d; 356 uint16_t vid, did; 357 358 vid = pci_get_vendor(dev); 359 did = pci_get_device(dev); 360 361 for (d = igb_devices; d->desc != NULL; ++d) { 362 if (vid == d->vid && did == d->did) { 363 device_set_desc(dev, d->desc); 364 return 0; 365 } 366 } 367 return ENXIO; 368 } 369 370 static int 371 igb_attach(device_t dev) 372 { 373 struct igb_softc *sc = device_get_softc(dev); 374 uint16_t eeprom_data; 375 int error = 0, ring_max; 376 #ifdef IFPOLL_ENABLE 377 int offset, offset_def; 378 #endif 379 380 #ifdef notyet 381 /* SYSCTL stuff */ 382 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 383 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 384 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 385 igb_sysctl_nvm_info, "I", "NVM Information"); 386 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 387 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 388 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 389 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 390 #endif 391 392 callout_init_mp(&sc->timer); 393 lwkt_serialize_init(&sc->main_serialize); 394 395 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 396 device_get_unit(dev)); 397 sc->dev = sc->osdep.dev = dev; 398 399 /* 400 * Determine hardware and mac type 401 */ 402 sc->hw.vendor_id = pci_get_vendor(dev); 403 sc->hw.device_id = pci_get_device(dev); 404 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 405 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 406 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 407 408 if (e1000_set_mac_type(&sc->hw)) 409 return ENXIO; 410 411 /* Are we a VF device? */ 412 if (sc->hw.mac.type == e1000_vfadapt || 413 sc->hw.mac.type == e1000_vfadapt_i350) 414 sc->vf_ifp = 1; 415 else 416 sc->vf_ifp = 0; 417 418 /* 419 * Configure total supported RX/TX ring count 420 */ 421 switch (sc->hw.mac.type) { 422 case e1000_82575: 423 ring_max = IGB_MAX_RING_82575; 424 break; 425 426 case e1000_82576: 427 ring_max = IGB_MAX_RING_82576; 428 break; 429 430 case e1000_82580: 431 ring_max = IGB_MAX_RING_82580; 432 break; 433 434 case e1000_i350: 435 ring_max = IGB_MAX_RING_I350; 436 break; 437 438 case e1000_i354: 439 ring_max = IGB_MAX_RING_I354; 440 break; 441 442 case e1000_i210: 443 ring_max = IGB_MAX_RING_I210; 444 break; 445 446 case e1000_i211: 447 ring_max = IGB_MAX_RING_I211; 448 break; 449 450 default: 451 ring_max = IGB_MIN_RING; 452 break; 453 } 454 455 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 456 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 457 #ifdef IGB_RSS_DEBUG 458 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 459 #endif 460 sc->rx_ring_inuse = sc->rx_ring_cnt; 461 462 sc->tx_ring_cnt = device_getenv_int(dev, "txr", igb_txr); 463 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_max); 464 #ifdef IGB_TSS_DEBUG 465 sc->tx_ring_cnt = device_getenv_int(dev, "txr_debug", sc->tx_ring_cnt); 466 #endif 467 sc->tx_ring_inuse = sc->tx_ring_cnt; 468 469 /* Enable bus mastering */ 470 pci_enable_busmaster(dev); 471 472 /* 473 * Allocate IO memory 474 */ 475 sc->mem_rid = PCIR_BAR(0); 476 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 477 RF_ACTIVE); 478 if (sc->mem_res == NULL) { 479 device_printf(dev, "Unable to allocate bus resource: memory\n"); 480 error = ENXIO; 481 goto failed; 482 } 483 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 484 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 485 486 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 487 488 /* Save PCI command register for Shared Code */ 489 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 490 sc->hw.back = &sc->osdep; 491 492 /* Do Shared Code initialization */ 493 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 494 device_printf(dev, "Setup of Shared code failed\n"); 495 error = ENXIO; 496 goto failed; 497 } 498 499 e1000_get_bus_info(&sc->hw); 500 501 sc->hw.mac.autoneg = DO_AUTO_NEG; 502 sc->hw.phy.autoneg_wait_to_complete = FALSE; 503 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 504 505 /* Copper options */ 506 if (sc->hw.phy.media_type == e1000_media_type_copper) { 507 sc->hw.phy.mdix = AUTO_ALL_MODES; 508 sc->hw.phy.disable_polarity_correction = FALSE; 509 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 510 } 511 512 /* Set the frame limits assuming standard ethernet sized frames. */ 513 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 514 515 /* Allocate RX/TX rings */ 516 error = igb_alloc_rings(sc); 517 if (error) 518 goto failed; 519 520 #ifdef IFPOLL_ENABLE 521 /* 522 * NPOLLING RX CPU offset 523 */ 524 if (sc->rx_ring_cnt == ncpus2) { 525 offset = 0; 526 } else { 527 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 528 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 529 if (offset >= ncpus2 || 530 offset % sc->rx_ring_cnt != 0) { 531 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 532 offset, offset_def); 533 offset = offset_def; 534 } 535 } 536 sc->rx_npoll_off = offset; 537 538 /* 539 * NPOLLING TX CPU offset 540 */ 541 if (sc->tx_ring_cnt == ncpus2) { 542 offset = 0; 543 } else { 544 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 545 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 546 if (offset >= ncpus2 || 547 offset % sc->tx_ring_cnt != 0) { 548 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 549 offset, offset_def); 550 offset = offset_def; 551 } 552 } 553 sc->tx_npoll_off = offset; 554 #endif 555 556 /* Allocate interrupt */ 557 error = igb_alloc_intr(sc); 558 if (error) 559 goto failed; 560 561 /* Setup serializers */ 562 igb_setup_serializer(sc); 563 564 /* Allocate the appropriate stats memory */ 565 if (sc->vf_ifp) { 566 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 567 M_WAITOK | M_ZERO); 568 igb_vf_init_stats(sc); 569 } else { 570 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 571 M_WAITOK | M_ZERO); 572 } 573 574 /* Allocate multicast array memory. */ 575 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 576 M_DEVBUF, M_WAITOK); 577 578 /* Some adapter-specific advanced features */ 579 if (sc->hw.mac.type >= e1000_i350) { 580 #ifdef notyet 581 igb_set_sysctl_value(adapter, "dma_coalesce", 582 "configure dma coalesce", 583 &adapter->dma_coalesce, igb_dma_coalesce); 584 igb_set_sysctl_value(adapter, "eee_disabled", 585 "enable Energy Efficient Ethernet", 586 &adapter->hw.dev_spec._82575.eee_disable, 587 igb_eee_disabled); 588 #else 589 sc->dma_coalesce = igb_dma_coalesce; 590 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 591 #endif 592 if (sc->hw.phy.media_type == e1000_media_type_copper) { 593 if (sc->hw.mac.type == e1000_i354) 594 e1000_set_eee_i354(&sc->hw); 595 else 596 e1000_set_eee_i350(&sc->hw); 597 } 598 } 599 600 /* 601 * Start from a known state, this is important in reading the nvm and 602 * mac from that. 603 */ 604 e1000_reset_hw(&sc->hw); 605 606 /* Make sure we have a good EEPROM before we read from it */ 607 if (sc->hw.mac.type != e1000_i210 && sc->hw.mac.type != e1000_i211 && 608 e1000_validate_nvm_checksum(&sc->hw) < 0) { 609 /* 610 * Some PCI-E parts fail the first check due to 611 * the link being in sleep state, call it again, 612 * if it fails a second time its a real issue. 613 */ 614 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 615 device_printf(dev, 616 "The EEPROM Checksum Is Not Valid\n"); 617 error = EIO; 618 goto failed; 619 } 620 } 621 622 /* Copy the permanent MAC address out of the EEPROM */ 623 if (e1000_read_mac_addr(&sc->hw) < 0) { 624 device_printf(dev, "EEPROM read error while reading MAC" 625 " address\n"); 626 error = EIO; 627 goto failed; 628 } 629 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 630 device_printf(dev, "Invalid MAC address\n"); 631 error = EIO; 632 goto failed; 633 } 634 635 /* Setup OS specific network interface */ 636 igb_setup_ifp(sc); 637 638 /* Add sysctl tree, must after igb_setup_ifp() */ 639 igb_add_sysctl(sc); 640 641 /* Now get a good starting state */ 642 igb_reset(sc); 643 644 /* Initialize statistics */ 645 igb_update_stats_counters(sc); 646 647 sc->hw.mac.get_link_status = 1; 648 igb_update_link_status(sc); 649 650 /* Indicate SOL/IDER usage */ 651 if (e1000_check_reset_block(&sc->hw)) { 652 device_printf(dev, 653 "PHY reset is blocked due to SOL/IDER session.\n"); 654 } 655 656 /* Determine if we have to control management hardware */ 657 if (e1000_enable_mng_pass_thru(&sc->hw)) 658 sc->flags |= IGB_FLAG_HAS_MGMT; 659 660 /* 661 * Setup Wake-on-Lan 662 */ 663 /* APME bit in EEPROM is mapped to WUC.APME */ 664 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 665 if (eeprom_data) 666 sc->wol = E1000_WUFC_MAG; 667 /* XXX disable WOL */ 668 sc->wol = 0; 669 670 #ifdef notyet 671 /* Register for VLAN events */ 672 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 673 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 674 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 675 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 676 #endif 677 678 #ifdef notyet 679 igb_add_hw_stats(adapter); 680 #endif 681 682 /* 683 * Disable interrupt to prevent spurious interrupts (line based 684 * interrupt, MSI or even MSI-X), which had been observed on 685 * several types of LOMs, from being handled. 686 */ 687 igb_disable_intr(sc); 688 689 error = igb_setup_intr(sc); 690 if (error) { 691 ether_ifdetach(&sc->arpcom.ac_if); 692 goto failed; 693 } 694 return 0; 695 696 failed: 697 igb_detach(dev); 698 return error; 699 } 700 701 static int 702 igb_detach(device_t dev) 703 { 704 struct igb_softc *sc = device_get_softc(dev); 705 706 if (device_is_attached(dev)) { 707 struct ifnet *ifp = &sc->arpcom.ac_if; 708 709 ifnet_serialize_all(ifp); 710 711 igb_stop(sc); 712 713 e1000_phy_hw_reset(&sc->hw); 714 715 /* Give control back to firmware */ 716 igb_rel_mgmt(sc); 717 igb_rel_hw_control(sc); 718 719 if (sc->wol) { 720 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 721 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 722 igb_enable_wol(dev); 723 } 724 725 igb_teardown_intr(sc); 726 727 ifnet_deserialize_all(ifp); 728 729 ether_ifdetach(ifp); 730 } else if (sc->mem_res != NULL) { 731 igb_rel_hw_control(sc); 732 } 733 bus_generic_detach(dev); 734 735 igb_free_intr(sc); 736 737 if (sc->msix_mem_res != NULL) { 738 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 739 sc->msix_mem_res); 740 } 741 if (sc->mem_res != NULL) { 742 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 743 sc->mem_res); 744 } 745 746 igb_free_rings(sc); 747 748 if (sc->mta != NULL) 749 kfree(sc->mta, M_DEVBUF); 750 if (sc->stats != NULL) 751 kfree(sc->stats, M_DEVBUF); 752 if (sc->serializes != NULL) 753 kfree(sc->serializes, M_DEVBUF); 754 755 return 0; 756 } 757 758 static int 759 igb_shutdown(device_t dev) 760 { 761 return igb_suspend(dev); 762 } 763 764 static int 765 igb_suspend(device_t dev) 766 { 767 struct igb_softc *sc = device_get_softc(dev); 768 struct ifnet *ifp = &sc->arpcom.ac_if; 769 770 ifnet_serialize_all(ifp); 771 772 igb_stop(sc); 773 774 igb_rel_mgmt(sc); 775 igb_rel_hw_control(sc); 776 777 if (sc->wol) { 778 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 779 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 780 igb_enable_wol(dev); 781 } 782 783 ifnet_deserialize_all(ifp); 784 785 return bus_generic_suspend(dev); 786 } 787 788 static int 789 igb_resume(device_t dev) 790 { 791 struct igb_softc *sc = device_get_softc(dev); 792 struct ifnet *ifp = &sc->arpcom.ac_if; 793 int i; 794 795 ifnet_serialize_all(ifp); 796 797 igb_init(sc); 798 igb_get_mgmt(sc); 799 800 for (i = 0; i < sc->tx_ring_inuse; ++i) 801 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 802 803 ifnet_deserialize_all(ifp); 804 805 return bus_generic_resume(dev); 806 } 807 808 static int 809 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 810 { 811 struct igb_softc *sc = ifp->if_softc; 812 struct ifreq *ifr = (struct ifreq *)data; 813 int max_frame_size, mask, reinit; 814 int error = 0; 815 816 ASSERT_IFNET_SERIALIZED_ALL(ifp); 817 818 switch (command) { 819 case SIOCSIFMTU: 820 max_frame_size = 9234; 821 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 822 ETHER_CRC_LEN) { 823 error = EINVAL; 824 break; 825 } 826 827 ifp->if_mtu = ifr->ifr_mtu; 828 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 829 ETHER_CRC_LEN; 830 831 if (ifp->if_flags & IFF_RUNNING) 832 igb_init(sc); 833 break; 834 835 case SIOCSIFFLAGS: 836 if (ifp->if_flags & IFF_UP) { 837 if (ifp->if_flags & IFF_RUNNING) { 838 if ((ifp->if_flags ^ sc->if_flags) & 839 (IFF_PROMISC | IFF_ALLMULTI)) { 840 igb_disable_promisc(sc); 841 igb_set_promisc(sc); 842 } 843 } else { 844 igb_init(sc); 845 } 846 } else if (ifp->if_flags & IFF_RUNNING) { 847 igb_stop(sc); 848 } 849 sc->if_flags = ifp->if_flags; 850 break; 851 852 case SIOCADDMULTI: 853 case SIOCDELMULTI: 854 if (ifp->if_flags & IFF_RUNNING) { 855 igb_disable_intr(sc); 856 igb_set_multi(sc); 857 #ifdef IFPOLL_ENABLE 858 if (!(ifp->if_flags & IFF_NPOLLING)) 859 #endif 860 igb_enable_intr(sc); 861 } 862 break; 863 864 case SIOCSIFMEDIA: 865 /* Check SOL/IDER usage */ 866 if (e1000_check_reset_block(&sc->hw)) { 867 if_printf(ifp, "Media change is " 868 "blocked due to SOL/IDER session.\n"); 869 break; 870 } 871 /* FALL THROUGH */ 872 873 case SIOCGIFMEDIA: 874 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 875 break; 876 877 case SIOCSIFCAP: 878 reinit = 0; 879 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 880 if (mask & IFCAP_RXCSUM) { 881 ifp->if_capenable ^= IFCAP_RXCSUM; 882 reinit = 1; 883 } 884 if (mask & IFCAP_VLAN_HWTAGGING) { 885 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 886 reinit = 1; 887 } 888 if (mask & IFCAP_TXCSUM) { 889 ifp->if_capenable ^= IFCAP_TXCSUM; 890 if (ifp->if_capenable & IFCAP_TXCSUM) 891 ifp->if_hwassist |= IGB_CSUM_FEATURES; 892 else 893 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 894 } 895 if (mask & IFCAP_TSO) { 896 ifp->if_capenable ^= IFCAP_TSO; 897 if (ifp->if_capenable & IFCAP_TSO) 898 ifp->if_hwassist |= CSUM_TSO; 899 else 900 ifp->if_hwassist &= ~CSUM_TSO; 901 } 902 if (mask & IFCAP_RSS) 903 ifp->if_capenable ^= IFCAP_RSS; 904 if (reinit && (ifp->if_flags & IFF_RUNNING)) 905 igb_init(sc); 906 break; 907 908 default: 909 error = ether_ioctl(ifp, command, data); 910 break; 911 } 912 return error; 913 } 914 915 static void 916 igb_init(void *xsc) 917 { 918 struct igb_softc *sc = xsc; 919 struct ifnet *ifp = &sc->arpcom.ac_if; 920 boolean_t polling; 921 int i; 922 923 ASSERT_IFNET_SERIALIZED_ALL(ifp); 924 925 igb_stop(sc); 926 927 /* Get the latest mac address, User can use a LAA */ 928 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 929 930 /* Put the address into the Receive Address Array */ 931 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 932 933 igb_reset(sc); 934 igb_update_link_status(sc); 935 936 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 937 938 /* Configure for OS presence */ 939 igb_get_mgmt(sc); 940 941 polling = FALSE; 942 #ifdef IFPOLL_ENABLE 943 if (ifp->if_flags & IFF_NPOLLING) 944 polling = TRUE; 945 #endif 946 947 /* Configured used RX/TX rings */ 948 igb_set_ring_inuse(sc, polling); 949 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 950 951 /* Initialize interrupt */ 952 igb_init_intr(sc); 953 954 /* Prepare transmit descriptors and buffers */ 955 for (i = 0; i < sc->tx_ring_inuse; ++i) 956 igb_init_tx_ring(&sc->tx_rings[i]); 957 igb_init_tx_unit(sc); 958 959 /* Setup Multicast table */ 960 igb_set_multi(sc); 961 962 #if 0 963 /* 964 * Figure out the desired mbuf pool 965 * for doing jumbo/packetsplit 966 */ 967 if (adapter->max_frame_size <= 2048) 968 adapter->rx_mbuf_sz = MCLBYTES; 969 else if (adapter->max_frame_size <= 4096) 970 adapter->rx_mbuf_sz = MJUMPAGESIZE; 971 else 972 adapter->rx_mbuf_sz = MJUM9BYTES; 973 #endif 974 975 /* Prepare receive descriptors and buffers */ 976 for (i = 0; i < sc->rx_ring_inuse; ++i) { 977 int error; 978 979 error = igb_init_rx_ring(&sc->rx_rings[i]); 980 if (error) { 981 if_printf(ifp, "Could not setup receive structures\n"); 982 igb_stop(sc); 983 return; 984 } 985 } 986 igb_init_rx_unit(sc); 987 988 /* Enable VLAN support */ 989 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 990 igb_set_vlan(sc); 991 992 /* Don't lose promiscuous settings */ 993 igb_set_promisc(sc); 994 995 ifp->if_flags |= IFF_RUNNING; 996 for (i = 0; i < sc->tx_ring_inuse; ++i) { 997 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 998 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 999 } 1000 1001 igb_set_timer_cpuid(sc, polling); 1002 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1003 e1000_clear_hw_cntrs_base_generic(&sc->hw); 1004 1005 /* This clears any pending interrupts */ 1006 E1000_READ_REG(&sc->hw, E1000_ICR); 1007 1008 /* 1009 * Only enable interrupts if we are not polling, make sure 1010 * they are off otherwise. 1011 */ 1012 if (polling) { 1013 igb_disable_intr(sc); 1014 } else { 1015 igb_enable_intr(sc); 1016 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 1017 } 1018 1019 /* Set Energy Efficient Ethernet */ 1020 if (sc->hw.phy.media_type == e1000_media_type_copper) { 1021 if (sc->hw.mac.type == e1000_i354) 1022 e1000_set_eee_i354(&sc->hw); 1023 else 1024 e1000_set_eee_i350(&sc->hw); 1025 } 1026 } 1027 1028 static void 1029 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1030 { 1031 struct igb_softc *sc = ifp->if_softc; 1032 1033 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1034 1035 if ((ifp->if_flags & IFF_RUNNING) == 0) 1036 sc->hw.mac.get_link_status = 1; 1037 igb_update_link_status(sc); 1038 1039 ifmr->ifm_status = IFM_AVALID; 1040 ifmr->ifm_active = IFM_ETHER; 1041 1042 if (!sc->link_active) 1043 return; 1044 1045 ifmr->ifm_status |= IFM_ACTIVE; 1046 1047 switch (sc->link_speed) { 1048 case 10: 1049 ifmr->ifm_active |= IFM_10_T; 1050 break; 1051 1052 case 100: 1053 /* 1054 * Support for 100Mb SFP - these are Fiber 1055 * but the media type appears as serdes 1056 */ 1057 if (sc->hw.phy.media_type == e1000_media_type_internal_serdes) 1058 ifmr->ifm_active |= IFM_100_FX; 1059 else 1060 ifmr->ifm_active |= IFM_100_TX; 1061 break; 1062 1063 case 1000: 1064 ifmr->ifm_active |= IFM_1000_T; 1065 break; 1066 } 1067 1068 if (sc->link_duplex == FULL_DUPLEX) 1069 ifmr->ifm_active |= IFM_FDX; 1070 else 1071 ifmr->ifm_active |= IFM_HDX; 1072 } 1073 1074 static int 1075 igb_media_change(struct ifnet *ifp) 1076 { 1077 struct igb_softc *sc = ifp->if_softc; 1078 struct ifmedia *ifm = &sc->media; 1079 1080 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1081 1082 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1083 return EINVAL; 1084 1085 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1086 case IFM_AUTO: 1087 sc->hw.mac.autoneg = DO_AUTO_NEG; 1088 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1089 break; 1090 1091 case IFM_1000_LX: 1092 case IFM_1000_SX: 1093 case IFM_1000_T: 1094 sc->hw.mac.autoneg = DO_AUTO_NEG; 1095 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1096 break; 1097 1098 case IFM_100_TX: 1099 sc->hw.mac.autoneg = FALSE; 1100 sc->hw.phy.autoneg_advertised = 0; 1101 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1102 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1103 else 1104 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1105 break; 1106 1107 case IFM_10_T: 1108 sc->hw.mac.autoneg = FALSE; 1109 sc->hw.phy.autoneg_advertised = 0; 1110 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1111 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1112 else 1113 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1114 break; 1115 1116 default: 1117 if_printf(ifp, "Unsupported media type\n"); 1118 break; 1119 } 1120 1121 igb_init(sc); 1122 1123 return 0; 1124 } 1125 1126 static void 1127 igb_set_promisc(struct igb_softc *sc) 1128 { 1129 struct ifnet *ifp = &sc->arpcom.ac_if; 1130 struct e1000_hw *hw = &sc->hw; 1131 uint32_t reg; 1132 1133 if (sc->vf_ifp) { 1134 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1135 return; 1136 } 1137 1138 reg = E1000_READ_REG(hw, E1000_RCTL); 1139 if (ifp->if_flags & IFF_PROMISC) { 1140 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1141 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1142 } else if (ifp->if_flags & IFF_ALLMULTI) { 1143 reg |= E1000_RCTL_MPE; 1144 reg &= ~E1000_RCTL_UPE; 1145 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1146 } 1147 } 1148 1149 static void 1150 igb_disable_promisc(struct igb_softc *sc) 1151 { 1152 struct e1000_hw *hw = &sc->hw; 1153 struct ifnet *ifp = &sc->arpcom.ac_if; 1154 uint32_t reg; 1155 int mcnt = 0; 1156 1157 if (sc->vf_ifp) { 1158 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1159 return; 1160 } 1161 reg = E1000_READ_REG(hw, E1000_RCTL); 1162 reg &= ~E1000_RCTL_UPE; 1163 if (ifp->if_flags & IFF_ALLMULTI) { 1164 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 1165 } else { 1166 struct ifmultiaddr *ifma; 1167 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1168 if (ifma->ifma_addr->sa_family != AF_LINK) 1169 continue; 1170 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1171 break; 1172 mcnt++; 1173 } 1174 } 1175 /* Don't disable if in MAX groups */ 1176 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 1177 reg &= ~E1000_RCTL_MPE; 1178 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1179 } 1180 1181 static void 1182 igb_set_multi(struct igb_softc *sc) 1183 { 1184 struct ifnet *ifp = &sc->arpcom.ac_if; 1185 struct ifmultiaddr *ifma; 1186 uint32_t reg_rctl = 0; 1187 uint8_t *mta; 1188 int mcnt = 0; 1189 1190 mta = sc->mta; 1191 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1192 1193 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1194 if (ifma->ifma_addr->sa_family != AF_LINK) 1195 continue; 1196 1197 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1198 break; 1199 1200 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1201 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1202 mcnt++; 1203 } 1204 1205 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1206 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1207 reg_rctl |= E1000_RCTL_MPE; 1208 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1209 } else { 1210 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1211 } 1212 } 1213 1214 static void 1215 igb_timer(void *xsc) 1216 { 1217 struct igb_softc *sc = xsc; 1218 1219 lwkt_serialize_enter(&sc->main_serialize); 1220 1221 igb_update_link_status(sc); 1222 igb_update_stats_counters(sc); 1223 1224 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1225 1226 lwkt_serialize_exit(&sc->main_serialize); 1227 } 1228 1229 static void 1230 igb_update_link_status(struct igb_softc *sc) 1231 { 1232 struct ifnet *ifp = &sc->arpcom.ac_if; 1233 struct e1000_hw *hw = &sc->hw; 1234 uint32_t link_check, thstat, ctrl; 1235 1236 link_check = thstat = ctrl = 0; 1237 1238 /* Get the cached link value or read for real */ 1239 switch (hw->phy.media_type) { 1240 case e1000_media_type_copper: 1241 if (hw->mac.get_link_status) { 1242 /* Do the work to read phy */ 1243 e1000_check_for_link(hw); 1244 link_check = !hw->mac.get_link_status; 1245 } else { 1246 link_check = TRUE; 1247 } 1248 break; 1249 1250 case e1000_media_type_fiber: 1251 e1000_check_for_link(hw); 1252 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1253 break; 1254 1255 case e1000_media_type_internal_serdes: 1256 e1000_check_for_link(hw); 1257 link_check = hw->mac.serdes_has_link; 1258 break; 1259 1260 /* VF device is type_unknown */ 1261 case e1000_media_type_unknown: 1262 e1000_check_for_link(hw); 1263 link_check = !hw->mac.get_link_status; 1264 /* Fall thru */ 1265 default: 1266 break; 1267 } 1268 1269 /* Check for thermal downshift or shutdown */ 1270 if (hw->mac.type == e1000_i350) { 1271 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1272 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1273 } 1274 1275 /* Now we check if a transition has happened */ 1276 if (link_check && sc->link_active == 0) { 1277 e1000_get_speed_and_duplex(hw, 1278 &sc->link_speed, &sc->link_duplex); 1279 if (bootverbose) { 1280 const char *flowctl; 1281 1282 /* Get the flow control for display */ 1283 switch (hw->fc.current_mode) { 1284 case e1000_fc_rx_pause: 1285 flowctl = "RX"; 1286 break; 1287 1288 case e1000_fc_tx_pause: 1289 flowctl = "TX"; 1290 break; 1291 1292 case e1000_fc_full: 1293 flowctl = "Full"; 1294 break; 1295 1296 default: 1297 flowctl = "None"; 1298 break; 1299 } 1300 1301 if_printf(ifp, "Link is up %d Mbps %s, " 1302 "Flow control: %s\n", 1303 sc->link_speed, 1304 sc->link_duplex == FULL_DUPLEX ? 1305 "Full Duplex" : "Half Duplex", 1306 flowctl); 1307 } 1308 sc->link_active = 1; 1309 1310 ifp->if_baudrate = sc->link_speed * 1000000; 1311 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1312 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1313 if_printf(ifp, "Link: thermal downshift\n"); 1314 /* Delay Link Up for Phy update */ 1315 if ((hw->mac.type == e1000_i210 || 1316 hw->mac.type == e1000_i211) && 1317 hw->phy.id == I210_I_PHY_ID) 1318 msec_delay(IGB_I210_LINK_DELAY); 1319 /* This can sleep */ 1320 ifp->if_link_state = LINK_STATE_UP; 1321 if_link_state_change(ifp); 1322 } else if (!link_check && sc->link_active == 1) { 1323 ifp->if_baudrate = sc->link_speed = 0; 1324 sc->link_duplex = 0; 1325 if (bootverbose) 1326 if_printf(ifp, "Link is Down\n"); 1327 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1328 (thstat & E1000_THSTAT_PWR_DOWN)) 1329 if_printf(ifp, "Link: thermal shutdown\n"); 1330 sc->link_active = 0; 1331 /* This can sleep */ 1332 ifp->if_link_state = LINK_STATE_DOWN; 1333 if_link_state_change(ifp); 1334 } 1335 } 1336 1337 static void 1338 igb_stop(struct igb_softc *sc) 1339 { 1340 struct ifnet *ifp = &sc->arpcom.ac_if; 1341 int i; 1342 1343 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1344 1345 igb_disable_intr(sc); 1346 1347 callout_stop(&sc->timer); 1348 1349 ifp->if_flags &= ~IFF_RUNNING; 1350 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1351 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1352 ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog); 1353 sc->tx_rings[i].tx_flags &= ~IGB_TXFLAG_ENABLED; 1354 } 1355 1356 e1000_reset_hw(&sc->hw); 1357 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1358 1359 e1000_led_off(&sc->hw); 1360 e1000_cleanup_led(&sc->hw); 1361 1362 for (i = 0; i < sc->tx_ring_cnt; ++i) 1363 igb_free_tx_ring(&sc->tx_rings[i]); 1364 for (i = 0; i < sc->rx_ring_cnt; ++i) 1365 igb_free_rx_ring(&sc->rx_rings[i]); 1366 } 1367 1368 static void 1369 igb_reset(struct igb_softc *sc) 1370 { 1371 struct ifnet *ifp = &sc->arpcom.ac_if; 1372 struct e1000_hw *hw = &sc->hw; 1373 struct e1000_fc_info *fc = &hw->fc; 1374 uint32_t pba = 0; 1375 uint16_t hwm; 1376 1377 /* Let the firmware know the OS is in control */ 1378 igb_get_hw_control(sc); 1379 1380 /* 1381 * Packet Buffer Allocation (PBA) 1382 * Writing PBA sets the receive portion of the buffer 1383 * the remainder is used for the transmit buffer. 1384 */ 1385 switch (hw->mac.type) { 1386 case e1000_82575: 1387 pba = E1000_PBA_32K; 1388 break; 1389 1390 case e1000_82576: 1391 case e1000_vfadapt: 1392 pba = E1000_READ_REG(hw, E1000_RXPBS); 1393 pba &= E1000_RXPBS_SIZE_MASK_82576; 1394 break; 1395 1396 case e1000_82580: 1397 case e1000_i350: 1398 case e1000_i354: 1399 case e1000_vfadapt_i350: 1400 pba = E1000_READ_REG(hw, E1000_RXPBS); 1401 pba = e1000_rxpbs_adjust_82580(pba); 1402 break; 1403 1404 case e1000_i210: 1405 case e1000_i211: 1406 pba = E1000_PBA_34K; 1407 break; 1408 1409 default: 1410 break; 1411 } 1412 1413 /* Special needs in case of Jumbo frames */ 1414 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1415 uint32_t tx_space, min_tx, min_rx; 1416 1417 pba = E1000_READ_REG(hw, E1000_PBA); 1418 tx_space = pba >> 16; 1419 pba &= 0xffff; 1420 1421 min_tx = (sc->max_frame_size + 1422 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1423 min_tx = roundup2(min_tx, 1024); 1424 min_tx >>= 10; 1425 min_rx = sc->max_frame_size; 1426 min_rx = roundup2(min_rx, 1024); 1427 min_rx >>= 10; 1428 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1429 pba = pba - (min_tx - tx_space); 1430 /* 1431 * if short on rx space, rx wins 1432 * and must trump tx adjustment 1433 */ 1434 if (pba < min_rx) 1435 pba = min_rx; 1436 } 1437 E1000_WRITE_REG(hw, E1000_PBA, pba); 1438 } 1439 1440 /* 1441 * These parameters control the automatic generation (Tx) and 1442 * response (Rx) to Ethernet PAUSE frames. 1443 * - High water mark should allow for at least two frames to be 1444 * received after sending an XOFF. 1445 * - Low water mark works best when it is very near the high water mark. 1446 * This allows the receiver to restart by sending XON when it has 1447 * drained a bit. 1448 */ 1449 hwm = min(((pba << 10) * 9 / 10), 1450 ((pba << 10) - 2 * sc->max_frame_size)); 1451 1452 if (hw->mac.type < e1000_82576) { 1453 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1454 fc->low_water = fc->high_water - 8; 1455 } else { 1456 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1457 fc->low_water = fc->high_water - 16; 1458 } 1459 fc->pause_time = IGB_FC_PAUSE_TIME; 1460 fc->send_xon = TRUE; 1461 fc->requested_mode = e1000_fc_default; 1462 1463 /* Issue a global reset */ 1464 e1000_reset_hw(hw); 1465 E1000_WRITE_REG(hw, E1000_WUC, 0); 1466 1467 if (e1000_init_hw(hw) < 0) 1468 if_printf(ifp, "Hardware Initialization Failed\n"); 1469 1470 /* Setup DMA Coalescing */ 1471 if (hw->mac.type > e1000_82580 && hw->mac.type != e1000_i211) { 1472 uint32_t dmac; 1473 uint32_t reg; 1474 1475 if (sc->dma_coalesce == 0) { 1476 /* 1477 * Disabled 1478 */ 1479 reg = E1000_READ_REG(hw, E1000_DMACR); 1480 reg &= ~E1000_DMACR_DMAC_EN; 1481 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1482 goto reset_out; 1483 } 1484 1485 /* Set starting thresholds */ 1486 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 1487 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1488 1489 hwm = 64 * pba - sc->max_frame_size / 16; 1490 if (hwm < 64 * (pba - 6)) 1491 hwm = 64 * (pba - 6); 1492 reg = E1000_READ_REG(hw, E1000_FCRTC); 1493 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 1494 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 1495 & E1000_FCRTC_RTH_COAL_MASK); 1496 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 1497 1498 dmac = pba - sc->max_frame_size / 512; 1499 if (dmac < pba - 10) 1500 dmac = pba - 10; 1501 reg = E1000_READ_REG(hw, E1000_DMACR); 1502 reg &= ~E1000_DMACR_DMACTHR_MASK; 1503 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 1504 & E1000_DMACR_DMACTHR_MASK); 1505 /* Transition to L0x or L1 if available.. */ 1506 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1507 /* timer = value in sc->dma_coalesce in 32usec intervals */ 1508 reg |= (sc->dma_coalesce >> 5); 1509 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1510 1511 /* Set the interval before transition */ 1512 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1513 reg |= 0x80000004; 1514 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1515 1516 /* Free space in tx packet buffer to wake from DMA coal */ 1517 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1518 (20480 - (2 * sc->max_frame_size)) >> 6); 1519 1520 /* Make low power state decision controlled by DMA coal */ 1521 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1522 reg &= ~E1000_PCIEMISC_LX_DECISION; 1523 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 1524 if_printf(ifp, "DMA Coalescing enabled\n"); 1525 } else if (hw->mac.type == e1000_82580) { 1526 uint32_t reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1527 1528 E1000_WRITE_REG(hw, E1000_DMACR, 0); 1529 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1530 reg & ~E1000_PCIEMISC_LX_DECISION); 1531 } 1532 1533 reset_out: 1534 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1535 e1000_get_phy_info(hw); 1536 e1000_check_for_link(hw); 1537 } 1538 1539 static void 1540 igb_setup_ifp(struct igb_softc *sc) 1541 { 1542 struct ifnet *ifp = &sc->arpcom.ac_if; 1543 int i; 1544 1545 ifp->if_softc = sc; 1546 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1547 ifp->if_init = igb_init; 1548 ifp->if_ioctl = igb_ioctl; 1549 ifp->if_start = igb_start; 1550 ifp->if_serialize = igb_serialize; 1551 ifp->if_deserialize = igb_deserialize; 1552 ifp->if_tryserialize = igb_tryserialize; 1553 #ifdef INVARIANTS 1554 ifp->if_serialize_assert = igb_serialize_assert; 1555 #endif 1556 #ifdef IFPOLL_ENABLE 1557 ifp->if_npoll = igb_npoll; 1558 #endif 1559 1560 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1561 ifq_set_ready(&ifp->if_snd); 1562 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1563 1564 ifp->if_mapsubq = ifq_mapsubq_mask; 1565 ifq_set_subq_mask(&ifp->if_snd, 0); 1566 1567 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1568 1569 ifp->if_capabilities = 1570 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1571 if (IGB_ENABLE_HWRSS(sc)) 1572 ifp->if_capabilities |= IFCAP_RSS; 1573 ifp->if_capenable = ifp->if_capabilities; 1574 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1575 1576 /* 1577 * Tell the upper layer(s) we support long frames 1578 */ 1579 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1580 1581 /* Setup TX rings and subqueues */ 1582 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1583 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 1584 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1585 1586 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 1587 ifsq_set_priv(ifsq, txr); 1588 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize); 1589 txr->ifsq = ifsq; 1590 1591 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, igb_watchdog); 1592 } 1593 1594 /* 1595 * Specify the media types supported by this adapter and register 1596 * callbacks to update media and link information 1597 */ 1598 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1599 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1600 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1601 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1602 0, NULL); 1603 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1604 } else { 1605 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1606 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1607 0, NULL); 1608 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1609 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1610 0, NULL); 1611 if (sc->hw.phy.type != e1000_phy_ife) { 1612 ifmedia_add(&sc->media, 1613 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1614 ifmedia_add(&sc->media, 1615 IFM_ETHER | IFM_1000_T, 0, NULL); 1616 } 1617 } 1618 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1619 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1620 } 1621 1622 static void 1623 igb_add_sysctl(struct igb_softc *sc) 1624 { 1625 struct sysctl_ctx_list *ctx; 1626 struct sysctl_oid *tree; 1627 char node[32]; 1628 int i; 1629 1630 ctx = device_get_sysctl_ctx(sc->dev); 1631 tree = device_get_sysctl_tree(sc->dev); 1632 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1633 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1634 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1635 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1636 "# of RX rings used"); 1637 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1638 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 1639 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1640 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 1641 "# of TX rings used"); 1642 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1643 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1644 "# of RX descs"); 1645 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1646 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1647 "# of TX descs"); 1648 1649 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1650 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1651 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1652 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1653 } else { 1654 for (i = 0; i < sc->msix_cnt; ++i) { 1655 struct igb_msix_data *msix = &sc->msix_data[i]; 1656 1657 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1658 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1659 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1660 msix, 0, igb_sysctl_msix_rate, "I", 1661 msix->msix_rate_desc); 1662 } 1663 } 1664 1665 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1666 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1667 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1668 "# of segments per TX interrupt"); 1669 1670 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1671 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1672 sc, 0, igb_sysctl_tx_wreg_nsegs, "I", 1673 "# of segments sent before write to hardware register"); 1674 1675 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1676 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1677 sc, 0, igb_sysctl_rx_wreg_nsegs, "I", 1678 "# of segments received before write to hardware register"); 1679 1680 #ifdef IFPOLL_ENABLE 1681 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1682 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1683 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1684 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 1685 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1686 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1687 #endif 1688 1689 #ifdef IGB_RSS_DEBUG 1690 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 1691 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1692 "RSS debug level"); 1693 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1694 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1695 SYSCTL_ADD_ULONG(ctx, 1696 SYSCTL_CHILDREN(tree), OID_AUTO, node, 1697 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1698 } 1699 #endif 1700 #ifdef IGB_TSS_DEBUG 1701 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1702 ksnprintf(node, sizeof(node), "tx%d_pkt", i); 1703 SYSCTL_ADD_ULONG(ctx, 1704 SYSCTL_CHILDREN(tree), OID_AUTO, node, 1705 CTLFLAG_RW, &sc->tx_rings[i].tx_packets, "TXed packets"); 1706 } 1707 #endif 1708 } 1709 1710 static int 1711 igb_alloc_rings(struct igb_softc *sc) 1712 { 1713 int error, i; 1714 1715 /* 1716 * Create top level busdma tag 1717 */ 1718 error = bus_dma_tag_create(NULL, 1, 0, 1719 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1720 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1721 &sc->parent_tag); 1722 if (error) { 1723 device_printf(sc->dev, "could not create top level DMA tag\n"); 1724 return error; 1725 } 1726 1727 /* 1728 * Allocate TX descriptor rings and buffers 1729 */ 1730 sc->tx_rings = kmalloc_cachealign( 1731 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1732 M_DEVBUF, M_WAITOK | M_ZERO); 1733 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1734 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1735 1736 /* Set up some basics */ 1737 txr->sc = sc; 1738 txr->me = i; 1739 lwkt_serialize_init(&txr->tx_serialize); 1740 1741 error = igb_create_tx_ring(txr); 1742 if (error) 1743 return error; 1744 } 1745 1746 /* 1747 * Allocate RX descriptor rings and buffers 1748 */ 1749 sc->rx_rings = kmalloc_cachealign( 1750 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1751 M_DEVBUF, M_WAITOK | M_ZERO); 1752 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1753 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1754 1755 /* Set up some basics */ 1756 rxr->sc = sc; 1757 rxr->me = i; 1758 lwkt_serialize_init(&rxr->rx_serialize); 1759 1760 error = igb_create_rx_ring(rxr); 1761 if (error) 1762 return error; 1763 } 1764 1765 return 0; 1766 } 1767 1768 static void 1769 igb_free_rings(struct igb_softc *sc) 1770 { 1771 int i; 1772 1773 if (sc->tx_rings != NULL) { 1774 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1775 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1776 1777 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1778 } 1779 kfree(sc->tx_rings, M_DEVBUF); 1780 } 1781 1782 if (sc->rx_rings != NULL) { 1783 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1784 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1785 1786 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1787 } 1788 kfree(sc->rx_rings, M_DEVBUF); 1789 } 1790 } 1791 1792 static int 1793 igb_create_tx_ring(struct igb_tx_ring *txr) 1794 { 1795 int tsize, error, i, ntxd; 1796 1797 /* 1798 * Validate number of transmit descriptors. It must not exceed 1799 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1800 */ 1801 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1802 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1803 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1804 device_printf(txr->sc->dev, 1805 "Using %d TX descriptors instead of %d!\n", 1806 IGB_DEFAULT_TXD, ntxd); 1807 txr->num_tx_desc = IGB_DEFAULT_TXD; 1808 } else { 1809 txr->num_tx_desc = ntxd; 1810 } 1811 1812 /* 1813 * Allocate TX descriptor ring 1814 */ 1815 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1816 IGB_DBA_ALIGN); 1817 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1818 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1819 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1820 if (txr->txdma.dma_vaddr == NULL) { 1821 device_printf(txr->sc->dev, 1822 "Unable to allocate TX Descriptor memory\n"); 1823 return ENOMEM; 1824 } 1825 txr->tx_base = txr->txdma.dma_vaddr; 1826 bzero(txr->tx_base, tsize); 1827 1828 tsize = __VM_CACHELINE_ALIGN( 1829 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1830 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1831 1832 /* 1833 * Allocate TX head write-back buffer 1834 */ 1835 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1836 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1837 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1838 if (txr->tx_hdr == NULL) { 1839 device_printf(txr->sc->dev, 1840 "Unable to allocate TX head write-back buffer\n"); 1841 return ENOMEM; 1842 } 1843 1844 /* 1845 * Create DMA tag for TX buffers 1846 */ 1847 error = bus_dma_tag_create(txr->sc->parent_tag, 1848 1, 0, /* alignment, bounds */ 1849 BUS_SPACE_MAXADDR, /* lowaddr */ 1850 BUS_SPACE_MAXADDR, /* highaddr */ 1851 NULL, NULL, /* filter, filterarg */ 1852 IGB_TSO_SIZE, /* maxsize */ 1853 IGB_MAX_SCATTER, /* nsegments */ 1854 PAGE_SIZE, /* maxsegsize */ 1855 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1856 BUS_DMA_ONEBPAGE, /* flags */ 1857 &txr->tx_tag); 1858 if (error) { 1859 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1860 kfree(txr->tx_buf, M_DEVBUF); 1861 txr->tx_buf = NULL; 1862 return error; 1863 } 1864 1865 /* 1866 * Create DMA maps for TX buffers 1867 */ 1868 for (i = 0; i < txr->num_tx_desc; ++i) { 1869 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1870 1871 error = bus_dmamap_create(txr->tx_tag, 1872 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1873 if (error) { 1874 device_printf(txr->sc->dev, 1875 "Unable to create TX DMA map\n"); 1876 igb_destroy_tx_ring(txr, i); 1877 return error; 1878 } 1879 } 1880 1881 if (txr->sc->hw.mac.type == e1000_82575) 1882 txr->tx_flags |= IGB_TXFLAG_TSO_IPLEN0; 1883 1884 /* 1885 * Initialize various watermark 1886 */ 1887 txr->spare_desc = IGB_TX_SPARE; 1888 txr->intr_nsegs = txr->num_tx_desc / 16; 1889 txr->wreg_nsegs = IGB_DEF_TXWREG_NSEGS; 1890 txr->oact_hi_desc = txr->num_tx_desc / 2; 1891 txr->oact_lo_desc = txr->num_tx_desc / 8; 1892 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1893 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1894 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1895 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1896 1897 return 0; 1898 } 1899 1900 static void 1901 igb_free_tx_ring(struct igb_tx_ring *txr) 1902 { 1903 int i; 1904 1905 for (i = 0; i < txr->num_tx_desc; ++i) { 1906 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1907 1908 if (txbuf->m_head != NULL) { 1909 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1910 m_freem(txbuf->m_head); 1911 txbuf->m_head = NULL; 1912 } 1913 } 1914 } 1915 1916 static void 1917 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1918 { 1919 int i; 1920 1921 if (txr->txdma.dma_vaddr != NULL) { 1922 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1923 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1924 txr->txdma.dma_map); 1925 bus_dma_tag_destroy(txr->txdma.dma_tag); 1926 txr->txdma.dma_vaddr = NULL; 1927 } 1928 1929 if (txr->tx_hdr != NULL) { 1930 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1931 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1932 txr->tx_hdr_dmap); 1933 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1934 txr->tx_hdr = NULL; 1935 } 1936 1937 if (txr->tx_buf == NULL) 1938 return; 1939 1940 for (i = 0; i < ndesc; ++i) { 1941 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1942 1943 KKASSERT(txbuf->m_head == NULL); 1944 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1945 } 1946 bus_dma_tag_destroy(txr->tx_tag); 1947 1948 kfree(txr->tx_buf, M_DEVBUF); 1949 txr->tx_buf = NULL; 1950 } 1951 1952 static void 1953 igb_init_tx_ring(struct igb_tx_ring *txr) 1954 { 1955 /* Clear the old descriptor contents */ 1956 bzero(txr->tx_base, 1957 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1958 1959 /* Clear TX head write-back buffer */ 1960 *(txr->tx_hdr) = 0; 1961 1962 /* Reset indices */ 1963 txr->next_avail_desc = 0; 1964 txr->next_to_clean = 0; 1965 txr->tx_nsegs = 0; 1966 1967 /* Set number of descriptors available */ 1968 txr->tx_avail = txr->num_tx_desc; 1969 1970 /* Enable this TX ring */ 1971 txr->tx_flags |= IGB_TXFLAG_ENABLED; 1972 } 1973 1974 static void 1975 igb_init_tx_unit(struct igb_softc *sc) 1976 { 1977 struct e1000_hw *hw = &sc->hw; 1978 uint32_t tctl; 1979 int i; 1980 1981 /* Setup the Tx Descriptor Rings */ 1982 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1983 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1984 uint64_t bus_addr = txr->txdma.dma_paddr; 1985 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1986 uint32_t txdctl = 0; 1987 uint32_t dca_txctrl; 1988 1989 E1000_WRITE_REG(hw, E1000_TDLEN(i), 1990 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 1991 E1000_WRITE_REG(hw, E1000_TDBAH(i), 1992 (uint32_t)(bus_addr >> 32)); 1993 E1000_WRITE_REG(hw, E1000_TDBAL(i), 1994 (uint32_t)bus_addr); 1995 1996 /* Setup the HW Tx Head and Tail descriptor pointers */ 1997 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 1998 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 1999 2000 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 2001 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 2002 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 2003 2004 /* 2005 * Don't set WB_on_EITR: 2006 * - 82575 does not have it 2007 * - It almost has no effect on 82576, see: 2008 * 82576 specification update errata #26 2009 * - It causes unnecessary bus traffic 2010 */ 2011 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 2012 (uint32_t)(hdr_paddr >> 32)); 2013 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 2014 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 2015 2016 /* 2017 * WTHRESH is ignored by the hardware, since header 2018 * write back mode is used. 2019 */ 2020 txdctl |= IGB_TX_PTHRESH; 2021 txdctl |= IGB_TX_HTHRESH << 8; 2022 txdctl |= IGB_TX_WTHRESH << 16; 2023 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2024 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 2025 } 2026 2027 if (sc->vf_ifp) 2028 return; 2029 2030 e1000_config_collision_dist(hw); 2031 2032 /* Program the Transmit Control Register */ 2033 tctl = E1000_READ_REG(hw, E1000_TCTL); 2034 tctl &= ~E1000_TCTL_CT; 2035 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 2036 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 2037 2038 /* This write will effectively turn on the transmit unit. */ 2039 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 2040 } 2041 2042 static boolean_t 2043 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 2044 { 2045 struct e1000_adv_tx_context_desc *TXD; 2046 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 2047 int ehdrlen, ctxd, ip_hlen = 0; 2048 boolean_t offload = TRUE; 2049 2050 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 2051 offload = FALSE; 2052 2053 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 2054 2055 ctxd = txr->next_avail_desc; 2056 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2057 2058 /* 2059 * In advanced descriptors the vlan tag must 2060 * be placed into the context descriptor, thus 2061 * we need to be here just for that setup. 2062 */ 2063 if (mp->m_flags & M_VLANTAG) { 2064 uint16_t vlantag; 2065 2066 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 2067 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 2068 } else if (!offload) { 2069 return FALSE; 2070 } 2071 2072 ehdrlen = mp->m_pkthdr.csum_lhlen; 2073 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2074 2075 /* Set the ether header length */ 2076 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 2077 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2078 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 2079 ip_hlen = mp->m_pkthdr.csum_iphlen; 2080 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2081 } 2082 vlan_macip_lens |= ip_hlen; 2083 2084 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 2085 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2086 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 2087 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2088 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 2089 2090 /* 2091 * 82575 needs the TX context index added; the queue 2092 * index is used as TX context index here. 2093 */ 2094 if (txr->sc->hw.mac.type == e1000_82575) 2095 mss_l4len_idx = txr->me << 4; 2096 2097 /* Now copy bits into descriptor */ 2098 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2099 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2100 TXD->seqnum_seed = htole32(0); 2101 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2102 2103 /* We've consumed the first desc, adjust counters */ 2104 if (++ctxd == txr->num_tx_desc) 2105 ctxd = 0; 2106 txr->next_avail_desc = ctxd; 2107 --txr->tx_avail; 2108 2109 return offload; 2110 } 2111 2112 static void 2113 igb_txeof(struct igb_tx_ring *txr) 2114 { 2115 int first, hdr, avail; 2116 2117 if (txr->tx_avail == txr->num_tx_desc) 2118 return; 2119 2120 first = txr->next_to_clean; 2121 hdr = *(txr->tx_hdr); 2122 2123 if (first == hdr) 2124 return; 2125 2126 avail = txr->tx_avail; 2127 while (first != hdr) { 2128 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 2129 2130 ++avail; 2131 if (txbuf->m_head) { 2132 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2133 m_freem(txbuf->m_head); 2134 txbuf->m_head = NULL; 2135 } 2136 if (++first == txr->num_tx_desc) 2137 first = 0; 2138 } 2139 txr->next_to_clean = first; 2140 txr->tx_avail = avail; 2141 2142 /* 2143 * If we have a minimum free, clear OACTIVE 2144 * to tell the stack that it is OK to send packets. 2145 */ 2146 if (IGB_IS_NOT_OACTIVE(txr)) { 2147 ifsq_clr_oactive(txr->ifsq); 2148 2149 /* 2150 * We have enough TX descriptors, turn off 2151 * the watchdog. We allow small amount of 2152 * packets (roughly intr_nsegs) pending on 2153 * the transmit ring. 2154 */ 2155 txr->tx_watchdog.wd_timer = 0; 2156 } 2157 } 2158 2159 static int 2160 igb_create_rx_ring(struct igb_rx_ring *rxr) 2161 { 2162 int rsize, i, error, nrxd; 2163 2164 /* 2165 * Validate number of receive descriptors. It must not exceed 2166 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 2167 */ 2168 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 2169 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 2170 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 2171 device_printf(rxr->sc->dev, 2172 "Using %d RX descriptors instead of %d!\n", 2173 IGB_DEFAULT_RXD, nrxd); 2174 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2175 } else { 2176 rxr->num_rx_desc = nrxd; 2177 } 2178 2179 /* 2180 * Allocate RX descriptor ring 2181 */ 2182 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2183 IGB_DBA_ALIGN); 2184 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2185 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2186 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2187 &rxr->rxdma.dma_paddr); 2188 if (rxr->rxdma.dma_vaddr == NULL) { 2189 device_printf(rxr->sc->dev, 2190 "Unable to allocate RxDescriptor memory\n"); 2191 return ENOMEM; 2192 } 2193 rxr->rx_base = rxr->rxdma.dma_vaddr; 2194 bzero(rxr->rx_base, rsize); 2195 2196 rsize = __VM_CACHELINE_ALIGN( 2197 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2198 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2199 2200 /* 2201 * Create DMA tag for RX buffers 2202 */ 2203 error = bus_dma_tag_create(rxr->sc->parent_tag, 2204 1, 0, /* alignment, bounds */ 2205 BUS_SPACE_MAXADDR, /* lowaddr */ 2206 BUS_SPACE_MAXADDR, /* highaddr */ 2207 NULL, NULL, /* filter, filterarg */ 2208 MCLBYTES, /* maxsize */ 2209 1, /* nsegments */ 2210 MCLBYTES, /* maxsegsize */ 2211 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2212 &rxr->rx_tag); 2213 if (error) { 2214 device_printf(rxr->sc->dev, 2215 "Unable to create RX payload DMA tag\n"); 2216 kfree(rxr->rx_buf, M_DEVBUF); 2217 rxr->rx_buf = NULL; 2218 return error; 2219 } 2220 2221 /* 2222 * Create spare DMA map for RX buffers 2223 */ 2224 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2225 &rxr->rx_sparemap); 2226 if (error) { 2227 device_printf(rxr->sc->dev, 2228 "Unable to create spare RX DMA maps\n"); 2229 bus_dma_tag_destroy(rxr->rx_tag); 2230 kfree(rxr->rx_buf, M_DEVBUF); 2231 rxr->rx_buf = NULL; 2232 return error; 2233 } 2234 2235 /* 2236 * Create DMA maps for RX buffers 2237 */ 2238 for (i = 0; i < rxr->num_rx_desc; i++) { 2239 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2240 2241 error = bus_dmamap_create(rxr->rx_tag, 2242 BUS_DMA_WAITOK, &rxbuf->map); 2243 if (error) { 2244 device_printf(rxr->sc->dev, 2245 "Unable to create RX DMA maps\n"); 2246 igb_destroy_rx_ring(rxr, i); 2247 return error; 2248 } 2249 } 2250 2251 /* 2252 * Initialize various watermark 2253 */ 2254 rxr->wreg_nsegs = IGB_DEF_RXWREG_NSEGS; 2255 2256 return 0; 2257 } 2258 2259 static void 2260 igb_free_rx_ring(struct igb_rx_ring *rxr) 2261 { 2262 int i; 2263 2264 for (i = 0; i < rxr->num_rx_desc; ++i) { 2265 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2266 2267 if (rxbuf->m_head != NULL) { 2268 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2269 m_freem(rxbuf->m_head); 2270 rxbuf->m_head = NULL; 2271 } 2272 } 2273 2274 if (rxr->fmp != NULL) 2275 m_freem(rxr->fmp); 2276 rxr->fmp = NULL; 2277 rxr->lmp = NULL; 2278 } 2279 2280 static void 2281 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2282 { 2283 int i; 2284 2285 if (rxr->rxdma.dma_vaddr != NULL) { 2286 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2287 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2288 rxr->rxdma.dma_map); 2289 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2290 rxr->rxdma.dma_vaddr = NULL; 2291 } 2292 2293 if (rxr->rx_buf == NULL) 2294 return; 2295 2296 for (i = 0; i < ndesc; ++i) { 2297 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2298 2299 KKASSERT(rxbuf->m_head == NULL); 2300 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2301 } 2302 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2303 bus_dma_tag_destroy(rxr->rx_tag); 2304 2305 kfree(rxr->rx_buf, M_DEVBUF); 2306 rxr->rx_buf = NULL; 2307 } 2308 2309 static void 2310 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2311 { 2312 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2313 rxd->wb.upper.status_error = 0; 2314 } 2315 2316 static int 2317 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2318 { 2319 struct mbuf *m; 2320 bus_dma_segment_t seg; 2321 bus_dmamap_t map; 2322 struct igb_rx_buf *rxbuf; 2323 int error, nseg; 2324 2325 m = m_getcl(wait ? M_WAITOK : M_NOWAIT, MT_DATA, M_PKTHDR); 2326 if (m == NULL) { 2327 if (wait) { 2328 if_printf(&rxr->sc->arpcom.ac_if, 2329 "Unable to allocate RX mbuf\n"); 2330 } 2331 return ENOBUFS; 2332 } 2333 m->m_len = m->m_pkthdr.len = MCLBYTES; 2334 2335 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2336 m_adj(m, ETHER_ALIGN); 2337 2338 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2339 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2340 if (error) { 2341 m_freem(m); 2342 if (wait) { 2343 if_printf(&rxr->sc->arpcom.ac_if, 2344 "Unable to load RX mbuf\n"); 2345 } 2346 return error; 2347 } 2348 2349 rxbuf = &rxr->rx_buf[i]; 2350 if (rxbuf->m_head != NULL) 2351 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2352 2353 map = rxbuf->map; 2354 rxbuf->map = rxr->rx_sparemap; 2355 rxr->rx_sparemap = map; 2356 2357 rxbuf->m_head = m; 2358 rxbuf->paddr = seg.ds_addr; 2359 2360 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2361 return 0; 2362 } 2363 2364 static int 2365 igb_init_rx_ring(struct igb_rx_ring *rxr) 2366 { 2367 int i; 2368 2369 /* Clear the ring contents */ 2370 bzero(rxr->rx_base, 2371 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2372 2373 /* Now replenish the ring mbufs */ 2374 for (i = 0; i < rxr->num_rx_desc; ++i) { 2375 int error; 2376 2377 error = igb_newbuf(rxr, i, TRUE); 2378 if (error) 2379 return error; 2380 } 2381 2382 /* Setup our descriptor indices */ 2383 rxr->next_to_check = 0; 2384 2385 rxr->fmp = NULL; 2386 rxr->lmp = NULL; 2387 rxr->discard = FALSE; 2388 2389 return 0; 2390 } 2391 2392 static void 2393 igb_init_rx_unit(struct igb_softc *sc) 2394 { 2395 struct ifnet *ifp = &sc->arpcom.ac_if; 2396 struct e1000_hw *hw = &sc->hw; 2397 uint32_t rctl, rxcsum, srrctl = 0; 2398 int i; 2399 2400 /* 2401 * Make sure receives are disabled while setting 2402 * up the descriptor ring 2403 */ 2404 rctl = E1000_READ_REG(hw, E1000_RCTL); 2405 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2406 2407 #if 0 2408 /* 2409 ** Set up for header split 2410 */ 2411 if (igb_header_split) { 2412 /* Use a standard mbuf for the header */ 2413 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2414 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2415 } else 2416 #endif 2417 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2418 2419 /* 2420 ** Set up for jumbo frames 2421 */ 2422 if (ifp->if_mtu > ETHERMTU) { 2423 rctl |= E1000_RCTL_LPE; 2424 #if 0 2425 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2426 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2427 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2428 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2429 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2430 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2431 } 2432 /* Set maximum packet len */ 2433 psize = adapter->max_frame_size; 2434 /* are we on a vlan? */ 2435 if (adapter->ifp->if_vlantrunk != NULL) 2436 psize += VLAN_TAG_SIZE; 2437 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2438 #else 2439 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2440 rctl |= E1000_RCTL_SZ_2048; 2441 #endif 2442 } else { 2443 rctl &= ~E1000_RCTL_LPE; 2444 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2445 rctl |= E1000_RCTL_SZ_2048; 2446 } 2447 2448 /* Setup the Base and Length of the Rx Descriptor Rings */ 2449 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2450 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2451 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2452 uint32_t rxdctl; 2453 2454 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2455 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2456 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2457 (uint32_t)(bus_addr >> 32)); 2458 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2459 (uint32_t)bus_addr); 2460 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2461 /* Enable this Queue */ 2462 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2463 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2464 rxdctl &= 0xFFF00000; 2465 rxdctl |= IGB_RX_PTHRESH; 2466 rxdctl |= IGB_RX_HTHRESH << 8; 2467 /* 2468 * Don't set WTHRESH to a value above 1 on 82576, see: 2469 * 82576 specification update errata #26 2470 */ 2471 rxdctl |= IGB_RX_WTHRESH << 16; 2472 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2473 } 2474 2475 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2476 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2477 2478 /* 2479 * Receive Checksum Offload for TCP and UDP 2480 * 2481 * Checksum offloading is also enabled if multiple receive 2482 * queue is to be supported, since we need it to figure out 2483 * fragments. 2484 */ 2485 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2486 /* 2487 * NOTE: 2488 * PCSD must be enabled to enable multiple 2489 * receive queues. 2490 */ 2491 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2492 E1000_RXCSUM_PCSD; 2493 } else { 2494 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2495 E1000_RXCSUM_PCSD); 2496 } 2497 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2498 2499 if (IGB_ENABLE_HWRSS(sc)) { 2500 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2501 uint32_t reta_shift; 2502 int j, r; 2503 2504 /* 2505 * NOTE: 2506 * When we reach here, RSS has already been disabled 2507 * in igb_stop(), so we could safely configure RSS key 2508 * and redirect table. 2509 */ 2510 2511 /* 2512 * Configure RSS key 2513 */ 2514 toeplitz_get_key(key, sizeof(key)); 2515 for (i = 0; i < IGB_NRSSRK; ++i) { 2516 uint32_t rssrk; 2517 2518 rssrk = IGB_RSSRK_VAL(key, i); 2519 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2520 2521 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2522 } 2523 2524 /* 2525 * Configure RSS redirect table in following fashion: 2526 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2527 */ 2528 reta_shift = IGB_RETA_SHIFT; 2529 if (hw->mac.type == e1000_82575) 2530 reta_shift = IGB_RETA_SHIFT_82575; 2531 2532 r = 0; 2533 for (j = 0; j < IGB_NRETA; ++j) { 2534 uint32_t reta = 0; 2535 2536 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2537 uint32_t q; 2538 2539 q = (r % sc->rx_ring_inuse) << reta_shift; 2540 reta |= q << (8 * i); 2541 ++r; 2542 } 2543 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2544 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2545 } 2546 2547 /* 2548 * Enable multiple receive queues. 2549 * Enable IPv4 RSS standard hash functions. 2550 * Disable RSS interrupt on 82575 2551 */ 2552 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2553 E1000_MRQC_ENABLE_RSS_4Q | 2554 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2555 E1000_MRQC_RSS_FIELD_IPV4); 2556 } 2557 2558 /* Setup the Receive Control Register */ 2559 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2560 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2561 E1000_RCTL_RDMTS_HALF | 2562 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2563 /* Strip CRC bytes. */ 2564 rctl |= E1000_RCTL_SECRC; 2565 /* Make sure VLAN Filters are off */ 2566 rctl &= ~E1000_RCTL_VFE; 2567 /* Don't store bad packets */ 2568 rctl &= ~E1000_RCTL_SBP; 2569 2570 /* Enable Receives */ 2571 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2572 2573 /* 2574 * Setup the HW Rx Head and Tail Descriptor Pointers 2575 * - needs to be after enable 2576 */ 2577 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2578 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2579 2580 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2581 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2582 } 2583 } 2584 2585 static void 2586 igb_rx_refresh(struct igb_rx_ring *rxr, int i) 2587 { 2588 if (--i < 0) 2589 i = rxr->num_rx_desc - 1; 2590 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2591 } 2592 2593 static void 2594 igb_rxeof(struct igb_rx_ring *rxr, int count) 2595 { 2596 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2597 union e1000_adv_rx_desc *cur; 2598 uint32_t staterr; 2599 int i, ncoll = 0, cpuid = mycpuid; 2600 2601 i = rxr->next_to_check; 2602 cur = &rxr->rx_base[i]; 2603 staterr = le32toh(cur->wb.upper.status_error); 2604 2605 if ((staterr & E1000_RXD_STAT_DD) == 0) 2606 return; 2607 2608 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2609 struct pktinfo *pi = NULL, pi0; 2610 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2611 struct mbuf *m = NULL; 2612 boolean_t eop; 2613 2614 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2615 if (eop) 2616 --count; 2617 2618 ++ncoll; 2619 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2620 !rxr->discard) { 2621 struct mbuf *mp = rxbuf->m_head; 2622 uint32_t hash, hashtype; 2623 uint16_t vlan; 2624 int len; 2625 2626 len = le16toh(cur->wb.upper.length); 2627 if ((rxr->sc->hw.mac.type == e1000_i350 || 2628 rxr->sc->hw.mac.type == e1000_i354) && 2629 (staterr & E1000_RXDEXT_STATERR_LB)) 2630 vlan = be16toh(cur->wb.upper.vlan); 2631 else 2632 vlan = le16toh(cur->wb.upper.vlan); 2633 2634 hash = le32toh(cur->wb.lower.hi_dword.rss); 2635 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2636 E1000_RXDADV_RSSTYPE_MASK; 2637 2638 IGB_RSS_DPRINTF(rxr->sc, 10, 2639 "ring%d, hash 0x%08x, hashtype %u\n", 2640 rxr->me, hash, hashtype); 2641 2642 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2643 BUS_DMASYNC_POSTREAD); 2644 2645 if (igb_newbuf(rxr, i, FALSE) != 0) { 2646 IFNET_STAT_INC(ifp, iqdrops, 1); 2647 goto discard; 2648 } 2649 2650 mp->m_len = len; 2651 if (rxr->fmp == NULL) { 2652 mp->m_pkthdr.len = len; 2653 rxr->fmp = mp; 2654 rxr->lmp = mp; 2655 } else { 2656 rxr->lmp->m_next = mp; 2657 rxr->lmp = rxr->lmp->m_next; 2658 rxr->fmp->m_pkthdr.len += len; 2659 } 2660 2661 if (eop) { 2662 m = rxr->fmp; 2663 rxr->fmp = NULL; 2664 rxr->lmp = NULL; 2665 2666 m->m_pkthdr.rcvif = ifp; 2667 IFNET_STAT_INC(ifp, ipackets, 1); 2668 2669 if (ifp->if_capenable & IFCAP_RXCSUM) 2670 igb_rxcsum(staterr, m); 2671 2672 if (staterr & E1000_RXD_STAT_VP) { 2673 m->m_pkthdr.ether_vlantag = vlan; 2674 m->m_flags |= M_VLANTAG; 2675 } 2676 2677 if (ifp->if_capenable & IFCAP_RSS) { 2678 pi = igb_rssinfo(m, &pi0, 2679 hash, hashtype, staterr); 2680 } 2681 #ifdef IGB_RSS_DEBUG 2682 rxr->rx_packets++; 2683 #endif 2684 } 2685 } else { 2686 IFNET_STAT_INC(ifp, ierrors, 1); 2687 discard: 2688 igb_setup_rxdesc(cur, rxbuf); 2689 if (!eop) 2690 rxr->discard = TRUE; 2691 else 2692 rxr->discard = FALSE; 2693 if (rxr->fmp != NULL) { 2694 m_freem(rxr->fmp); 2695 rxr->fmp = NULL; 2696 rxr->lmp = NULL; 2697 } 2698 m = NULL; 2699 } 2700 2701 if (m != NULL) 2702 ifp->if_input(ifp, m, pi, cpuid); 2703 2704 /* Advance our pointers to the next descriptor. */ 2705 if (++i == rxr->num_rx_desc) 2706 i = 0; 2707 2708 if (ncoll >= rxr->wreg_nsegs) { 2709 igb_rx_refresh(rxr, i); 2710 ncoll = 0; 2711 } 2712 2713 cur = &rxr->rx_base[i]; 2714 staterr = le32toh(cur->wb.upper.status_error); 2715 } 2716 rxr->next_to_check = i; 2717 2718 if (ncoll > 0) 2719 igb_rx_refresh(rxr, i); 2720 } 2721 2722 2723 static void 2724 igb_set_vlan(struct igb_softc *sc) 2725 { 2726 struct e1000_hw *hw = &sc->hw; 2727 uint32_t reg; 2728 #if 0 2729 struct ifnet *ifp = sc->arpcom.ac_if; 2730 #endif 2731 2732 if (sc->vf_ifp) { 2733 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2734 return; 2735 } 2736 2737 reg = E1000_READ_REG(hw, E1000_CTRL); 2738 reg |= E1000_CTRL_VME; 2739 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2740 2741 #if 0 2742 /* Enable the Filter Table */ 2743 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2744 reg = E1000_READ_REG(hw, E1000_RCTL); 2745 reg &= ~E1000_RCTL_CFIEN; 2746 reg |= E1000_RCTL_VFE; 2747 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2748 } 2749 #endif 2750 2751 /* Update the frame size */ 2752 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2753 sc->max_frame_size + VLAN_TAG_SIZE); 2754 2755 #if 0 2756 /* Don't bother with table if no vlans */ 2757 if ((adapter->num_vlans == 0) || 2758 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2759 return; 2760 /* 2761 ** A soft reset zero's out the VFTA, so 2762 ** we need to repopulate it now. 2763 */ 2764 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2765 if (adapter->shadow_vfta[i] != 0) { 2766 if (adapter->vf_ifp) 2767 e1000_vfta_set_vf(hw, 2768 adapter->shadow_vfta[i], TRUE); 2769 else 2770 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2771 i, adapter->shadow_vfta[i]); 2772 } 2773 #endif 2774 } 2775 2776 static void 2777 igb_enable_intr(struct igb_softc *sc) 2778 { 2779 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2780 lwkt_serialize_handler_enable(&sc->main_serialize); 2781 } else { 2782 int i; 2783 2784 for (i = 0; i < sc->msix_cnt; ++i) { 2785 lwkt_serialize_handler_enable( 2786 sc->msix_data[i].msix_serialize); 2787 } 2788 } 2789 2790 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2791 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2792 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2793 else 2794 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2795 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2796 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2797 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2798 } else { 2799 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2800 } 2801 E1000_WRITE_FLUSH(&sc->hw); 2802 } 2803 2804 static void 2805 igb_disable_intr(struct igb_softc *sc) 2806 { 2807 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2808 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2809 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2810 } 2811 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2812 E1000_WRITE_FLUSH(&sc->hw); 2813 2814 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2815 lwkt_serialize_handler_disable(&sc->main_serialize); 2816 } else { 2817 int i; 2818 2819 for (i = 0; i < sc->msix_cnt; ++i) { 2820 lwkt_serialize_handler_disable( 2821 sc->msix_data[i].msix_serialize); 2822 } 2823 } 2824 } 2825 2826 /* 2827 * Bit of a misnomer, what this really means is 2828 * to enable OS management of the system... aka 2829 * to disable special hardware management features 2830 */ 2831 static void 2832 igb_get_mgmt(struct igb_softc *sc) 2833 { 2834 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2835 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2836 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2837 2838 /* disable hardware interception of ARP */ 2839 manc &= ~E1000_MANC_ARP_EN; 2840 2841 /* enable receiving management packets to the host */ 2842 manc |= E1000_MANC_EN_MNG2HOST; 2843 manc2h |= 1 << 5; /* Mng Port 623 */ 2844 manc2h |= 1 << 6; /* Mng Port 664 */ 2845 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2846 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2847 } 2848 } 2849 2850 /* 2851 * Give control back to hardware management controller 2852 * if there is one. 2853 */ 2854 static void 2855 igb_rel_mgmt(struct igb_softc *sc) 2856 { 2857 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2858 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2859 2860 /* Re-enable hardware interception of ARP */ 2861 manc |= E1000_MANC_ARP_EN; 2862 manc &= ~E1000_MANC_EN_MNG2HOST; 2863 2864 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2865 } 2866 } 2867 2868 /* 2869 * Sets CTRL_EXT:DRV_LOAD bit. 2870 * 2871 * For ASF and Pass Through versions of f/w this means that 2872 * the driver is loaded. 2873 */ 2874 static void 2875 igb_get_hw_control(struct igb_softc *sc) 2876 { 2877 uint32_t ctrl_ext; 2878 2879 if (sc->vf_ifp) 2880 return; 2881 2882 /* Let firmware know the driver has taken over */ 2883 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2884 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2885 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2886 } 2887 2888 /* 2889 * Resets CTRL_EXT:DRV_LOAD bit. 2890 * 2891 * For ASF and Pass Through versions of f/w this means that the 2892 * driver is no longer loaded. 2893 */ 2894 static void 2895 igb_rel_hw_control(struct igb_softc *sc) 2896 { 2897 uint32_t ctrl_ext; 2898 2899 if (sc->vf_ifp) 2900 return; 2901 2902 /* Let firmware taken over control of h/w */ 2903 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2904 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2905 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2906 } 2907 2908 static boolean_t 2909 igb_is_valid_ether_addr(const uint8_t *addr) 2910 { 2911 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2912 2913 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2914 return FALSE; 2915 return TRUE; 2916 } 2917 2918 /* 2919 * Enable PCI Wake On Lan capability 2920 */ 2921 static void 2922 igb_enable_wol(device_t dev) 2923 { 2924 uint16_t cap, status; 2925 uint8_t id; 2926 2927 /* First find the capabilities pointer*/ 2928 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2929 2930 /* Read the PM Capabilities */ 2931 id = pci_read_config(dev, cap, 1); 2932 if (id != PCIY_PMG) /* Something wrong */ 2933 return; 2934 2935 /* 2936 * OK, we have the power capabilities, 2937 * so now get the status register 2938 */ 2939 cap += PCIR_POWER_STATUS; 2940 status = pci_read_config(dev, cap, 2); 2941 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2942 pci_write_config(dev, cap, status, 2); 2943 } 2944 2945 static void 2946 igb_update_stats_counters(struct igb_softc *sc) 2947 { 2948 struct e1000_hw *hw = &sc->hw; 2949 struct e1000_hw_stats *stats; 2950 struct ifnet *ifp = &sc->arpcom.ac_if; 2951 2952 /* 2953 * The virtual function adapter has only a 2954 * small controlled set of stats, do only 2955 * those and return. 2956 */ 2957 if (sc->vf_ifp) { 2958 igb_update_vf_stats_counters(sc); 2959 return; 2960 } 2961 stats = sc->stats; 2962 2963 if (sc->hw.phy.media_type == e1000_media_type_copper || 2964 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2965 stats->symerrs += 2966 E1000_READ_REG(hw,E1000_SYMERRS); 2967 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2968 } 2969 2970 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2971 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2972 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2973 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2974 2975 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2976 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2977 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2978 stats->dc += E1000_READ_REG(hw, E1000_DC); 2979 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2980 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2981 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2982 2983 /* 2984 * For watchdog management we need to know if we have been 2985 * paused during the last interval, so capture that here. 2986 */ 2987 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 2988 stats->xoffrxc += sc->pause_frames; 2989 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 2990 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 2991 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 2992 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 2993 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 2994 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 2995 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 2996 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 2997 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 2998 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 2999 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 3000 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 3001 3002 /* For the 64-bit byte counters the low dword must be read first. */ 3003 /* Both registers clear on the read of the high dword */ 3004 3005 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 3006 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 3007 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 3008 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 3009 3010 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 3011 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 3012 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 3013 stats->roc += E1000_READ_REG(hw, E1000_ROC); 3014 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 3015 3016 stats->tor += E1000_READ_REG(hw, E1000_TORH); 3017 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 3018 3019 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 3020 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 3021 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 3022 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 3023 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 3024 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 3025 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 3026 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 3027 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 3028 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 3029 3030 /* Interrupt Counts */ 3031 3032 stats->iac += E1000_READ_REG(hw, E1000_IAC); 3033 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 3034 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 3035 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 3036 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 3037 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 3038 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 3039 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 3040 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 3041 3042 /* Host to Card Statistics */ 3043 3044 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 3045 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 3046 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 3047 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 3048 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 3049 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 3050 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 3051 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 3052 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 3053 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 3054 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 3055 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 3056 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 3057 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 3058 3059 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 3060 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 3061 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 3062 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 3063 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 3064 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 3065 3066 IFNET_STAT_SET(ifp, collisions, stats->colc); 3067 3068 /* Rx Errors */ 3069 IFNET_STAT_SET(ifp, ierrors, 3070 stats->rxerrc + stats->crcerrs + stats->algnerrc + 3071 stats->ruc + stats->roc + stats->mpc + stats->cexterr); 3072 3073 /* Tx Errors */ 3074 IFNET_STAT_SET(ifp, oerrors, 3075 stats->ecol + stats->latecol + sc->watchdog_events); 3076 3077 /* Driver specific counters */ 3078 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 3079 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 3080 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 3081 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 3082 sc->packet_buf_alloc_tx = 3083 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 3084 sc->packet_buf_alloc_rx = 3085 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 3086 } 3087 3088 static void 3089 igb_vf_init_stats(struct igb_softc *sc) 3090 { 3091 struct e1000_hw *hw = &sc->hw; 3092 struct e1000_vf_stats *stats; 3093 3094 stats = sc->stats; 3095 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 3096 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 3097 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 3098 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 3099 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 3100 } 3101 3102 static void 3103 igb_update_vf_stats_counters(struct igb_softc *sc) 3104 { 3105 struct e1000_hw *hw = &sc->hw; 3106 struct e1000_vf_stats *stats; 3107 3108 if (sc->link_speed == 0) 3109 return; 3110 3111 stats = sc->stats; 3112 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 3113 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 3114 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 3115 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 3116 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 3117 } 3118 3119 #ifdef IFPOLL_ENABLE 3120 3121 static void 3122 igb_npoll_status(struct ifnet *ifp) 3123 { 3124 struct igb_softc *sc = ifp->if_softc; 3125 uint32_t reg_icr; 3126 3127 ASSERT_SERIALIZED(&sc->main_serialize); 3128 3129 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3130 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3131 sc->hw.mac.get_link_status = 1; 3132 igb_update_link_status(sc); 3133 } 3134 } 3135 3136 static void 3137 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 3138 { 3139 struct igb_tx_ring *txr = arg; 3140 3141 ASSERT_SERIALIZED(&txr->tx_serialize); 3142 3143 igb_txeof(txr); 3144 if (!ifsq_is_empty(txr->ifsq)) 3145 ifsq_devstart(txr->ifsq); 3146 } 3147 3148 static void 3149 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 3150 { 3151 struct igb_rx_ring *rxr = arg; 3152 3153 ASSERT_SERIALIZED(&rxr->rx_serialize); 3154 3155 igb_rxeof(rxr, cycle); 3156 } 3157 3158 static void 3159 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 3160 { 3161 struct igb_softc *sc = ifp->if_softc; 3162 int i, txr_cnt, rxr_cnt; 3163 3164 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3165 3166 if (info) { 3167 int off; 3168 3169 info->ifpi_status.status_func = igb_npoll_status; 3170 info->ifpi_status.serializer = &sc->main_serialize; 3171 3172 txr_cnt = igb_get_txring_inuse(sc, TRUE); 3173 off = sc->tx_npoll_off; 3174 for (i = 0; i < txr_cnt; ++i) { 3175 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3176 int idx = i + off; 3177 3178 KKASSERT(idx < ncpus2); 3179 info->ifpi_tx[idx].poll_func = igb_npoll_tx; 3180 info->ifpi_tx[idx].arg = txr; 3181 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 3182 ifsq_set_cpuid(txr->ifsq, idx); 3183 } 3184 3185 rxr_cnt = igb_get_rxring_inuse(sc, TRUE); 3186 off = sc->rx_npoll_off; 3187 for (i = 0; i < rxr_cnt; ++i) { 3188 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3189 int idx = i + off; 3190 3191 KKASSERT(idx < ncpus2); 3192 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 3193 info->ifpi_rx[idx].arg = rxr; 3194 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 3195 } 3196 3197 if (ifp->if_flags & IFF_RUNNING) { 3198 if (rxr_cnt == sc->rx_ring_inuse && 3199 txr_cnt == sc->tx_ring_inuse) { 3200 igb_set_timer_cpuid(sc, TRUE); 3201 igb_disable_intr(sc); 3202 } else { 3203 igb_init(sc); 3204 } 3205 } 3206 } else { 3207 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3208 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3209 3210 ifsq_set_cpuid(txr->ifsq, txr->tx_intr_cpuid); 3211 } 3212 3213 if (ifp->if_flags & IFF_RUNNING) { 3214 txr_cnt = igb_get_txring_inuse(sc, FALSE); 3215 rxr_cnt = igb_get_rxring_inuse(sc, FALSE); 3216 3217 if (rxr_cnt == sc->rx_ring_inuse && 3218 txr_cnt == sc->tx_ring_inuse) { 3219 igb_set_timer_cpuid(sc, FALSE); 3220 igb_enable_intr(sc); 3221 } else { 3222 igb_init(sc); 3223 } 3224 } 3225 } 3226 } 3227 3228 #endif /* IFPOLL_ENABLE */ 3229 3230 static void 3231 igb_intr(void *xsc) 3232 { 3233 struct igb_softc *sc = xsc; 3234 struct ifnet *ifp = &sc->arpcom.ac_if; 3235 uint32_t eicr; 3236 3237 ASSERT_SERIALIZED(&sc->main_serialize); 3238 3239 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3240 3241 if (eicr == 0) 3242 return; 3243 3244 if (ifp->if_flags & IFF_RUNNING) { 3245 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3246 int i; 3247 3248 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3249 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3250 3251 if (eicr & rxr->rx_intr_mask) { 3252 lwkt_serialize_enter(&rxr->rx_serialize); 3253 igb_rxeof(rxr, -1); 3254 lwkt_serialize_exit(&rxr->rx_serialize); 3255 } 3256 } 3257 3258 if (eicr & txr->tx_intr_mask) { 3259 lwkt_serialize_enter(&txr->tx_serialize); 3260 igb_txeof(txr); 3261 if (!ifsq_is_empty(txr->ifsq)) 3262 ifsq_devstart(txr->ifsq); 3263 lwkt_serialize_exit(&txr->tx_serialize); 3264 } 3265 } 3266 3267 if (eicr & E1000_EICR_OTHER) { 3268 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3269 3270 /* Link status change */ 3271 if (icr & E1000_ICR_LSC) { 3272 sc->hw.mac.get_link_status = 1; 3273 igb_update_link_status(sc); 3274 } 3275 } 3276 3277 /* 3278 * Reading EICR has the side effect to clear interrupt mask, 3279 * so all interrupts need to be enabled here. 3280 */ 3281 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3282 } 3283 3284 static void 3285 igb_intr_shared(void *xsc) 3286 { 3287 struct igb_softc *sc = xsc; 3288 struct ifnet *ifp = &sc->arpcom.ac_if; 3289 uint32_t reg_icr; 3290 3291 ASSERT_SERIALIZED(&sc->main_serialize); 3292 3293 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3294 3295 /* Hot eject? */ 3296 if (reg_icr == 0xffffffff) 3297 return; 3298 3299 /* Definitely not our interrupt. */ 3300 if (reg_icr == 0x0) 3301 return; 3302 3303 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3304 return; 3305 3306 if (ifp->if_flags & IFF_RUNNING) { 3307 if (reg_icr & 3308 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3309 int i; 3310 3311 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3312 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3313 3314 lwkt_serialize_enter(&rxr->rx_serialize); 3315 igb_rxeof(rxr, -1); 3316 lwkt_serialize_exit(&rxr->rx_serialize); 3317 } 3318 } 3319 3320 if (reg_icr & E1000_ICR_TXDW) { 3321 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3322 3323 lwkt_serialize_enter(&txr->tx_serialize); 3324 igb_txeof(txr); 3325 if (!ifsq_is_empty(txr->ifsq)) 3326 ifsq_devstart(txr->ifsq); 3327 lwkt_serialize_exit(&txr->tx_serialize); 3328 } 3329 } 3330 3331 /* Link status change */ 3332 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3333 sc->hw.mac.get_link_status = 1; 3334 igb_update_link_status(sc); 3335 } 3336 3337 if (reg_icr & E1000_ICR_RXO) 3338 sc->rx_overruns++; 3339 } 3340 3341 static int 3342 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp, 3343 int *segs_used, int *idx) 3344 { 3345 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3346 bus_dmamap_t map; 3347 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3348 union e1000_adv_tx_desc *txd = NULL; 3349 struct mbuf *m_head = *m_headp; 3350 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3351 int maxsegs, nsegs, i, j, error; 3352 uint32_t hdrlen = 0; 3353 3354 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3355 error = igb_tso_pullup(txr, m_headp); 3356 if (error) 3357 return error; 3358 m_head = *m_headp; 3359 } 3360 3361 /* Set basic descriptor constants */ 3362 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3363 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3364 if (m_head->m_flags & M_VLANTAG) 3365 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3366 3367 /* 3368 * Map the packet for DMA. 3369 */ 3370 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3371 tx_buf_mapped = tx_buf; 3372 map = tx_buf->map; 3373 3374 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3375 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3376 if (maxsegs > IGB_MAX_SCATTER) 3377 maxsegs = IGB_MAX_SCATTER; 3378 3379 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3380 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3381 if (error) { 3382 if (error == ENOBUFS) 3383 txr->sc->mbuf_defrag_failed++; 3384 else 3385 txr->sc->no_tx_dma_setup++; 3386 3387 m_freem(*m_headp); 3388 *m_headp = NULL; 3389 return error; 3390 } 3391 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3392 3393 m_head = *m_headp; 3394 3395 /* 3396 * Set up the TX context descriptor, if any hardware offloading is 3397 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3398 * TX descriptor. 3399 * 3400 * Unlike these chips' predecessors (em/emx), TX context descriptor 3401 * will _not_ interfere TX data fetching pipelining. 3402 */ 3403 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3404 igb_tso_ctx(txr, m_head, &hdrlen); 3405 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3406 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3407 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3408 txr->tx_nsegs++; 3409 (*segs_used)++; 3410 } else if (igb_txcsum_ctx(txr, m_head)) { 3411 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3412 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3413 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3414 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3415 txr->tx_nsegs++; 3416 (*segs_used)++; 3417 } 3418 3419 *segs_used += nsegs; 3420 txr->tx_nsegs += nsegs; 3421 if (txr->tx_nsegs >= txr->intr_nsegs) { 3422 /* 3423 * Report Status (RS) is turned on every intr_nsegs 3424 * descriptors (roughly). 3425 */ 3426 txr->tx_nsegs = 0; 3427 cmd_rs = E1000_ADVTXD_DCMD_RS; 3428 } 3429 3430 /* Calculate payload length */ 3431 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3432 << E1000_ADVTXD_PAYLEN_SHIFT); 3433 3434 /* 3435 * 82575 needs the TX context index added; the queue 3436 * index is used as TX context index here. 3437 */ 3438 if (txr->sc->hw.mac.type == e1000_82575) 3439 olinfo_status |= txr->me << 4; 3440 3441 /* Set up our transmit descriptors */ 3442 i = txr->next_avail_desc; 3443 for (j = 0; j < nsegs; j++) { 3444 bus_size_t seg_len; 3445 bus_addr_t seg_addr; 3446 3447 tx_buf = &txr->tx_buf[i]; 3448 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3449 seg_addr = segs[j].ds_addr; 3450 seg_len = segs[j].ds_len; 3451 3452 txd->read.buffer_addr = htole64(seg_addr); 3453 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3454 txd->read.olinfo_status = htole32(olinfo_status); 3455 if (++i == txr->num_tx_desc) 3456 i = 0; 3457 tx_buf->m_head = NULL; 3458 } 3459 3460 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3461 txr->next_avail_desc = i; 3462 txr->tx_avail -= nsegs; 3463 3464 tx_buf->m_head = m_head; 3465 tx_buf_mapped->map = tx_buf->map; 3466 tx_buf->map = map; 3467 3468 /* 3469 * Last Descriptor of Packet needs End Of Packet (EOP) 3470 */ 3471 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3472 3473 /* 3474 * Defer TDT updating, until enough descrptors are setup 3475 */ 3476 *idx = i; 3477 #ifdef IGB_TSS_DEBUG 3478 ++txr->tx_packets; 3479 #endif 3480 3481 return 0; 3482 } 3483 3484 static void 3485 igb_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 3486 { 3487 struct igb_softc *sc = ifp->if_softc; 3488 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3489 struct mbuf *m_head; 3490 int idx = -1, nsegs = 0; 3491 3492 KKASSERT(txr->ifsq == ifsq); 3493 ASSERT_SERIALIZED(&txr->tx_serialize); 3494 3495 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 3496 return; 3497 3498 if (!sc->link_active || (txr->tx_flags & IGB_TXFLAG_ENABLED) == 0) { 3499 ifsq_purge(ifsq); 3500 return; 3501 } 3502 3503 if (!IGB_IS_NOT_OACTIVE(txr)) 3504 igb_txeof(txr); 3505 3506 while (!ifsq_is_empty(ifsq)) { 3507 if (IGB_IS_OACTIVE(txr)) { 3508 ifsq_set_oactive(ifsq); 3509 /* Set watchdog on */ 3510 txr->tx_watchdog.wd_timer = 5; 3511 break; 3512 } 3513 3514 m_head = ifsq_dequeue(ifsq); 3515 if (m_head == NULL) 3516 break; 3517 3518 if (igb_encap(txr, &m_head, &nsegs, &idx)) { 3519 IFNET_STAT_INC(ifp, oerrors, 1); 3520 continue; 3521 } 3522 3523 /* 3524 * TX interrupt are aggressively aggregated, so increasing 3525 * opackets at TX interrupt time will make the opackets 3526 * statistics vastly inaccurate; we do the opackets increment 3527 * now. 3528 */ 3529 IFNET_STAT_INC(ifp, opackets, 1); 3530 3531 if (nsegs >= txr->wreg_nsegs) { 3532 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3533 idx = -1; 3534 nsegs = 0; 3535 } 3536 3537 /* Send a copy of the frame to the BPF listener */ 3538 ETHER_BPF_MTAP(ifp, m_head); 3539 } 3540 if (idx >= 0) 3541 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3542 } 3543 3544 static void 3545 igb_watchdog(struct ifaltq_subque *ifsq) 3546 { 3547 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3548 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3549 struct igb_softc *sc = ifp->if_softc; 3550 int i; 3551 3552 KKASSERT(txr->ifsq == ifsq); 3553 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3554 3555 /* 3556 * If flow control has paused us since last checking 3557 * it invalidates the watchdog timing, so dont run it. 3558 */ 3559 if (sc->pause_frames) { 3560 sc->pause_frames = 0; 3561 txr->tx_watchdog.wd_timer = 5; 3562 return; 3563 } 3564 3565 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3566 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3567 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3568 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3569 if_printf(ifp, "TX(%d) desc avail = %d, " 3570 "Next TX to Clean = %d\n", 3571 txr->me, txr->tx_avail, txr->next_to_clean); 3572 3573 IFNET_STAT_INC(ifp, oerrors, 1); 3574 sc->watchdog_events++; 3575 3576 igb_init(sc); 3577 for (i = 0; i < sc->tx_ring_inuse; ++i) 3578 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 3579 } 3580 3581 static void 3582 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3583 { 3584 uint32_t eitr = 0; 3585 3586 if (rate > 0) { 3587 if (sc->hw.mac.type == e1000_82575) { 3588 eitr = 1000000000 / 256 / rate; 3589 /* 3590 * NOTE: 3591 * Document is wrong on the 2 bits left shift 3592 */ 3593 } else { 3594 eitr = 1000000 / rate; 3595 eitr <<= IGB_EITR_INTVL_SHIFT; 3596 } 3597 3598 if (eitr == 0) { 3599 /* Don't disable it */ 3600 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3601 } else if (eitr > IGB_EITR_INTVL_MASK) { 3602 /* Don't allow it to be too large */ 3603 eitr = IGB_EITR_INTVL_MASK; 3604 } 3605 } 3606 if (sc->hw.mac.type == e1000_82575) 3607 eitr |= eitr << 16; 3608 else 3609 eitr |= E1000_EITR_CNT_IGNR; 3610 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3611 } 3612 3613 static int 3614 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3615 { 3616 struct igb_softc *sc = (void *)arg1; 3617 struct ifnet *ifp = &sc->arpcom.ac_if; 3618 int error, intr_rate; 3619 3620 intr_rate = sc->intr_rate; 3621 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3622 if (error || req->newptr == NULL) 3623 return error; 3624 if (intr_rate < 0) 3625 return EINVAL; 3626 3627 ifnet_serialize_all(ifp); 3628 3629 sc->intr_rate = intr_rate; 3630 if (ifp->if_flags & IFF_RUNNING) 3631 igb_set_eitr(sc, 0, sc->intr_rate); 3632 3633 if (bootverbose) 3634 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3635 3636 ifnet_deserialize_all(ifp); 3637 3638 return 0; 3639 } 3640 3641 static int 3642 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3643 { 3644 struct igb_msix_data *msix = (void *)arg1; 3645 struct igb_softc *sc = msix->msix_sc; 3646 struct ifnet *ifp = &sc->arpcom.ac_if; 3647 int error, msix_rate; 3648 3649 msix_rate = msix->msix_rate; 3650 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3651 if (error || req->newptr == NULL) 3652 return error; 3653 if (msix_rate < 0) 3654 return EINVAL; 3655 3656 lwkt_serialize_enter(msix->msix_serialize); 3657 3658 msix->msix_rate = msix_rate; 3659 if (ifp->if_flags & IFF_RUNNING) 3660 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3661 3662 if (bootverbose) { 3663 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3664 msix->msix_rate); 3665 } 3666 3667 lwkt_serialize_exit(msix->msix_serialize); 3668 3669 return 0; 3670 } 3671 3672 static int 3673 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3674 { 3675 struct igb_softc *sc = (void *)arg1; 3676 struct ifnet *ifp = &sc->arpcom.ac_if; 3677 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3678 int error, nsegs; 3679 3680 nsegs = txr->intr_nsegs; 3681 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3682 if (error || req->newptr == NULL) 3683 return error; 3684 if (nsegs <= 0) 3685 return EINVAL; 3686 3687 ifnet_serialize_all(ifp); 3688 3689 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3690 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3691 error = EINVAL; 3692 } else { 3693 int i; 3694 3695 error = 0; 3696 for (i = 0; i < sc->tx_ring_cnt; ++i) 3697 sc->tx_rings[i].intr_nsegs = nsegs; 3698 } 3699 3700 ifnet_deserialize_all(ifp); 3701 3702 return error; 3703 } 3704 3705 static int 3706 igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3707 { 3708 struct igb_softc *sc = (void *)arg1; 3709 struct ifnet *ifp = &sc->arpcom.ac_if; 3710 int error, nsegs, i; 3711 3712 nsegs = sc->rx_rings[0].wreg_nsegs; 3713 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3714 if (error || req->newptr == NULL) 3715 return error; 3716 3717 ifnet_serialize_all(ifp); 3718 for (i = 0; i < sc->rx_ring_cnt; ++i) 3719 sc->rx_rings[i].wreg_nsegs =nsegs; 3720 ifnet_deserialize_all(ifp); 3721 3722 return 0; 3723 } 3724 3725 static int 3726 igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3727 { 3728 struct igb_softc *sc = (void *)arg1; 3729 struct ifnet *ifp = &sc->arpcom.ac_if; 3730 int error, nsegs, i; 3731 3732 nsegs = sc->tx_rings[0].wreg_nsegs; 3733 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3734 if (error || req->newptr == NULL) 3735 return error; 3736 3737 ifnet_serialize_all(ifp); 3738 for (i = 0; i < sc->tx_ring_cnt; ++i) 3739 sc->tx_rings[i].wreg_nsegs =nsegs; 3740 ifnet_deserialize_all(ifp); 3741 3742 return 0; 3743 } 3744 3745 #ifdef IFPOLL_ENABLE 3746 3747 static int 3748 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3749 { 3750 struct igb_softc *sc = (void *)arg1; 3751 struct ifnet *ifp = &sc->arpcom.ac_if; 3752 int error, off; 3753 3754 off = sc->rx_npoll_off; 3755 error = sysctl_handle_int(oidp, &off, 0, req); 3756 if (error || req->newptr == NULL) 3757 return error; 3758 if (off < 0) 3759 return EINVAL; 3760 3761 ifnet_serialize_all(ifp); 3762 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3763 error = EINVAL; 3764 } else { 3765 error = 0; 3766 sc->rx_npoll_off = off; 3767 } 3768 ifnet_deserialize_all(ifp); 3769 3770 return error; 3771 } 3772 3773 static int 3774 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3775 { 3776 struct igb_softc *sc = (void *)arg1; 3777 struct ifnet *ifp = &sc->arpcom.ac_if; 3778 int error, off; 3779 3780 off = sc->tx_npoll_off; 3781 error = sysctl_handle_int(oidp, &off, 0, req); 3782 if (error || req->newptr == NULL) 3783 return error; 3784 if (off < 0) 3785 return EINVAL; 3786 3787 ifnet_serialize_all(ifp); 3788 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 3789 error = EINVAL; 3790 } else { 3791 error = 0; 3792 sc->tx_npoll_off = off; 3793 } 3794 ifnet_deserialize_all(ifp); 3795 3796 return error; 3797 } 3798 3799 #endif /* IFPOLL_ENABLE */ 3800 3801 static void 3802 igb_init_intr(struct igb_softc *sc) 3803 { 3804 igb_set_intr_mask(sc); 3805 3806 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3807 igb_init_unshared_intr(sc); 3808 3809 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3810 igb_set_eitr(sc, 0, sc->intr_rate); 3811 } else { 3812 int i; 3813 3814 for (i = 0; i < sc->msix_cnt; ++i) 3815 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3816 } 3817 } 3818 3819 static void 3820 igb_init_unshared_intr(struct igb_softc *sc) 3821 { 3822 struct e1000_hw *hw = &sc->hw; 3823 const struct igb_rx_ring *rxr; 3824 const struct igb_tx_ring *txr; 3825 uint32_t ivar, index; 3826 int i; 3827 3828 /* 3829 * Enable extended mode 3830 */ 3831 if (sc->hw.mac.type != e1000_82575) { 3832 uint32_t gpie; 3833 int ivar_max; 3834 3835 gpie = E1000_GPIE_NSICR; 3836 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3837 gpie |= E1000_GPIE_MSIX_MODE | 3838 E1000_GPIE_EIAME | 3839 E1000_GPIE_PBA; 3840 } 3841 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3842 3843 /* 3844 * Clear IVARs 3845 */ 3846 switch (sc->hw.mac.type) { 3847 case e1000_82576: 3848 ivar_max = IGB_MAX_IVAR_82576; 3849 break; 3850 3851 case e1000_82580: 3852 ivar_max = IGB_MAX_IVAR_82580; 3853 break; 3854 3855 case e1000_i350: 3856 ivar_max = IGB_MAX_IVAR_I350; 3857 break; 3858 3859 case e1000_i354: 3860 ivar_max = IGB_MAX_IVAR_I354; 3861 break; 3862 3863 case e1000_vfadapt: 3864 case e1000_vfadapt_i350: 3865 ivar_max = IGB_MAX_IVAR_VF; 3866 break; 3867 3868 case e1000_i210: 3869 ivar_max = IGB_MAX_IVAR_I210; 3870 break; 3871 3872 case e1000_i211: 3873 ivar_max = IGB_MAX_IVAR_I211; 3874 break; 3875 3876 default: 3877 panic("unknown mac type %d\n", sc->hw.mac.type); 3878 } 3879 for (i = 0; i < ivar_max; ++i) 3880 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3881 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3882 } else { 3883 uint32_t tmp; 3884 3885 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3886 ("82575 w/ MSI-X")); 3887 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3888 tmp |= E1000_CTRL_EXT_IRCA; 3889 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3890 } 3891 3892 /* 3893 * Map TX/RX interrupts to EICR 3894 */ 3895 switch (sc->hw.mac.type) { 3896 case e1000_82580: 3897 case e1000_i350: 3898 case e1000_i354: 3899 case e1000_vfadapt: 3900 case e1000_vfadapt_i350: 3901 case e1000_i210: 3902 case e1000_i211: 3903 /* RX entries */ 3904 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3905 rxr = &sc->rx_rings[i]; 3906 3907 index = i >> 1; 3908 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3909 3910 if (i & 1) { 3911 ivar &= 0xff00ffff; 3912 ivar |= 3913 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3914 } else { 3915 ivar &= 0xffffff00; 3916 ivar |= 3917 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3918 } 3919 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3920 } 3921 /* TX entries */ 3922 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3923 txr = &sc->tx_rings[i]; 3924 3925 index = i >> 1; 3926 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3927 3928 if (i & 1) { 3929 ivar &= 0x00ffffff; 3930 ivar |= 3931 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3932 } else { 3933 ivar &= 0xffff00ff; 3934 ivar |= 3935 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3936 } 3937 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3938 } 3939 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3940 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3941 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3942 } 3943 break; 3944 3945 case e1000_82576: 3946 /* RX entries */ 3947 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3948 rxr = &sc->rx_rings[i]; 3949 3950 index = i & 0x7; /* Each IVAR has two entries */ 3951 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3952 3953 if (i < 8) { 3954 ivar &= 0xffffff00; 3955 ivar |= 3956 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3957 } else { 3958 ivar &= 0xff00ffff; 3959 ivar |= 3960 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3961 } 3962 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3963 } 3964 /* TX entries */ 3965 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3966 txr = &sc->tx_rings[i]; 3967 3968 index = i & 0x7; /* Each IVAR has two entries */ 3969 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3970 3971 if (i < 8) { 3972 ivar &= 0xffff00ff; 3973 ivar |= 3974 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3975 } else { 3976 ivar &= 0x00ffffff; 3977 ivar |= 3978 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3979 } 3980 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3981 } 3982 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3983 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3984 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3985 } 3986 break; 3987 3988 case e1000_82575: 3989 /* 3990 * Enable necessary interrupt bits. 3991 * 3992 * The name of the register is confusing; in addition to 3993 * configuring the first vector of MSI-X, it also configures 3994 * which bits of EICR could be set by the hardware even when 3995 * MSI or line interrupt is used; it thus controls interrupt 3996 * generation. It MUST be configured explicitly; the default 3997 * value mentioned in the datasheet is wrong: RX queue0 and 3998 * TX queue0 are NOT enabled by default. 3999 */ 4000 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 4001 break; 4002 4003 default: 4004 panic("unknown mac type %d\n", sc->hw.mac.type); 4005 } 4006 } 4007 4008 static int 4009 igb_setup_intr(struct igb_softc *sc) 4010 { 4011 int error; 4012 4013 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4014 return igb_msix_setup(sc); 4015 4016 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 4017 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 4018 sc, &sc->intr_tag, &sc->main_serialize); 4019 if (error) { 4020 device_printf(sc->dev, "Failed to register interrupt handler"); 4021 return error; 4022 } 4023 return 0; 4024 } 4025 4026 static void 4027 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 4028 { 4029 if (txr->sc->hw.mac.type == e1000_82575) { 4030 txr->tx_intr_bit = 0; /* unused */ 4031 switch (txr->me) { 4032 case 0: 4033 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 4034 break; 4035 case 1: 4036 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 4037 break; 4038 case 2: 4039 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 4040 break; 4041 case 3: 4042 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 4043 break; 4044 default: 4045 panic("unsupported # of TX ring, %d\n", txr->me); 4046 } 4047 } else { 4048 int intr_bit = *intr_bit0; 4049 4050 txr->tx_intr_bit = intr_bit % intr_bitmax; 4051 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 4052 4053 *intr_bit0 = intr_bit + 1; 4054 } 4055 } 4056 4057 static void 4058 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 4059 { 4060 if (rxr->sc->hw.mac.type == e1000_82575) { 4061 rxr->rx_intr_bit = 0; /* unused */ 4062 switch (rxr->me) { 4063 case 0: 4064 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 4065 break; 4066 case 1: 4067 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 4068 break; 4069 case 2: 4070 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 4071 break; 4072 case 3: 4073 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 4074 break; 4075 default: 4076 panic("unsupported # of RX ring, %d\n", rxr->me); 4077 } 4078 } else { 4079 int intr_bit = *intr_bit0; 4080 4081 rxr->rx_intr_bit = intr_bit % intr_bitmax; 4082 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 4083 4084 *intr_bit0 = intr_bit + 1; 4085 } 4086 } 4087 4088 static void 4089 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4090 { 4091 struct igb_softc *sc = ifp->if_softc; 4092 4093 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, slz); 4094 } 4095 4096 static void 4097 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4098 { 4099 struct igb_softc *sc = ifp->if_softc; 4100 4101 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, slz); 4102 } 4103 4104 static int 4105 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4106 { 4107 struct igb_softc *sc = ifp->if_softc; 4108 4109 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 4110 slz); 4111 } 4112 4113 #ifdef INVARIANTS 4114 4115 static void 4116 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4117 boolean_t serialized) 4118 { 4119 struct igb_softc *sc = ifp->if_softc; 4120 4121 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 4122 slz, serialized); 4123 } 4124 4125 #endif /* INVARIANTS */ 4126 4127 static void 4128 igb_set_intr_mask(struct igb_softc *sc) 4129 { 4130 int i; 4131 4132 sc->intr_mask = sc->sts_intr_mask; 4133 for (i = 0; i < sc->rx_ring_inuse; ++i) 4134 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 4135 for (i = 0; i < sc->tx_ring_inuse; ++i) 4136 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 4137 if (bootverbose) { 4138 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 4139 sc->intr_mask); 4140 } 4141 } 4142 4143 static int 4144 igb_alloc_intr(struct igb_softc *sc) 4145 { 4146 int i, intr_bit, intr_bitmax; 4147 u_int intr_flags; 4148 4149 igb_msix_try_alloc(sc); 4150 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4151 goto done; 4152 4153 /* 4154 * Allocate MSI/legacy interrupt resource 4155 */ 4156 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 4157 &sc->intr_rid, &intr_flags); 4158 4159 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 4160 int unshared; 4161 4162 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 4163 if (!unshared) { 4164 sc->flags |= IGB_FLAG_SHARED_INTR; 4165 if (bootverbose) 4166 device_printf(sc->dev, "IRQ shared\n"); 4167 } else { 4168 intr_flags &= ~RF_SHAREABLE; 4169 if (bootverbose) 4170 device_printf(sc->dev, "IRQ unshared\n"); 4171 } 4172 } 4173 4174 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4175 &sc->intr_rid, intr_flags); 4176 if (sc->intr_res == NULL) { 4177 device_printf(sc->dev, "Unable to allocate bus resource: " 4178 "interrupt\n"); 4179 return ENXIO; 4180 } 4181 4182 for (i = 0; i < sc->tx_ring_cnt; ++i) 4183 sc->tx_rings[i].tx_intr_cpuid = rman_get_cpuid(sc->intr_res); 4184 4185 /* 4186 * Setup MSI/legacy interrupt mask 4187 */ 4188 switch (sc->hw.mac.type) { 4189 case e1000_82575: 4190 intr_bitmax = IGB_MAX_TXRXINT_82575; 4191 break; 4192 4193 case e1000_82576: 4194 intr_bitmax = IGB_MAX_TXRXINT_82576; 4195 break; 4196 4197 case e1000_82580: 4198 intr_bitmax = IGB_MAX_TXRXINT_82580; 4199 break; 4200 4201 case e1000_i350: 4202 intr_bitmax = IGB_MAX_TXRXINT_I350; 4203 break; 4204 4205 case e1000_i354: 4206 intr_bitmax = IGB_MAX_TXRXINT_I354; 4207 break; 4208 4209 case e1000_i210: 4210 intr_bitmax = IGB_MAX_TXRXINT_I210; 4211 break; 4212 4213 case e1000_i211: 4214 intr_bitmax = IGB_MAX_TXRXINT_I211; 4215 break; 4216 4217 default: 4218 intr_bitmax = IGB_MIN_TXRXINT; 4219 break; 4220 } 4221 intr_bit = 0; 4222 for (i = 0; i < sc->tx_ring_cnt; ++i) 4223 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 4224 for (i = 0; i < sc->rx_ring_cnt; ++i) 4225 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 4226 sc->sts_intr_bit = 0; 4227 sc->sts_intr_mask = E1000_EICR_OTHER; 4228 4229 /* Initialize interrupt rate */ 4230 sc->intr_rate = IGB_INTR_RATE; 4231 done: 4232 igb_set_ring_inuse(sc, FALSE); 4233 igb_set_intr_mask(sc); 4234 return 0; 4235 } 4236 4237 static void 4238 igb_free_intr(struct igb_softc *sc) 4239 { 4240 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4241 if (sc->intr_res != NULL) { 4242 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 4243 sc->intr_res); 4244 } 4245 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4246 pci_release_msi(sc->dev); 4247 } else { 4248 igb_msix_free(sc, TRUE); 4249 } 4250 } 4251 4252 static void 4253 igb_teardown_intr(struct igb_softc *sc) 4254 { 4255 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4256 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 4257 else 4258 igb_msix_teardown(sc, sc->msix_cnt); 4259 } 4260 4261 static void 4262 igb_msix_try_alloc(struct igb_softc *sc) 4263 { 4264 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4265 int i, x, error; 4266 int offset, offset_def; 4267 struct igb_msix_data *msix; 4268 boolean_t aggregate, setup = FALSE; 4269 4270 /* 4271 * Don't enable MSI-X on 82575, see: 4272 * 82575 specification update errata #25 4273 */ 4274 if (sc->hw.mac.type == e1000_82575) 4275 return; 4276 4277 /* Don't enable MSI-X on VF */ 4278 if (sc->vf_ifp) 4279 return; 4280 4281 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4282 igb_msix_enable); 4283 if (!msix_enable) 4284 return; 4285 4286 msix_cnt = pci_msix_count(sc->dev); 4287 #ifdef IGB_MSIX_DEBUG 4288 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4289 #endif 4290 if (msix_cnt <= 1) { 4291 /* One MSI-X model does not make sense */ 4292 return; 4293 } 4294 4295 i = 0; 4296 while ((1 << (i + 1)) <= msix_cnt) 4297 ++i; 4298 msix_cnt2 = 1 << i; 4299 4300 if (bootverbose) { 4301 device_printf(sc->dev, "MSI-X count %d/%d\n", 4302 msix_cnt2, msix_cnt); 4303 } 4304 4305 KKASSERT(msix_cnt2 <= msix_cnt); 4306 if (msix_cnt == msix_cnt2) { 4307 /* We need at least one MSI-X for link status */ 4308 msix_cnt2 >>= 1; 4309 if (msix_cnt2 <= 1) { 4310 /* One MSI-X for RX/TX does not make sense */ 4311 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4312 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4313 return; 4314 } 4315 KKASSERT(msix_cnt > msix_cnt2); 4316 4317 if (bootverbose) { 4318 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4319 msix_cnt2, msix_cnt); 4320 } 4321 } 4322 4323 sc->rx_ring_msix = sc->rx_ring_cnt; 4324 if (sc->rx_ring_msix > msix_cnt2) 4325 sc->rx_ring_msix = msix_cnt2; 4326 4327 sc->tx_ring_msix = sc->tx_ring_cnt; 4328 if (sc->tx_ring_msix > msix_cnt2) 4329 sc->tx_ring_msix = msix_cnt2; 4330 4331 if (msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4332 /* 4333 * Independent TX/RX MSI-X 4334 */ 4335 aggregate = FALSE; 4336 if (bootverbose) 4337 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4338 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4339 } else { 4340 /* 4341 * Aggregate TX/RX MSI-X 4342 */ 4343 aggregate = TRUE; 4344 if (bootverbose) 4345 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4346 alloc_cnt = msix_cnt2; 4347 if (alloc_cnt > ncpus2) 4348 alloc_cnt = ncpus2; 4349 if (sc->rx_ring_msix > alloc_cnt) 4350 sc->rx_ring_msix = alloc_cnt; 4351 if (sc->tx_ring_msix > alloc_cnt) 4352 sc->tx_ring_msix = alloc_cnt; 4353 } 4354 ++alloc_cnt; /* For link status */ 4355 4356 if (bootverbose) { 4357 device_printf(sc->dev, "MSI-X alloc %d, " 4358 "RX ring %d, TX ring %d\n", alloc_cnt, 4359 sc->rx_ring_msix, sc->tx_ring_msix); 4360 } 4361 4362 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4363 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4364 &sc->msix_mem_rid, RF_ACTIVE); 4365 if (sc->msix_mem_res == NULL) { 4366 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR_ALT); 4367 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4368 &sc->msix_mem_rid, RF_ACTIVE); 4369 if (sc->msix_mem_res == NULL) { 4370 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4371 return; 4372 } 4373 } 4374 4375 sc->msix_cnt = alloc_cnt; 4376 sc->msix_data = kmalloc_cachealign( 4377 sizeof(struct igb_msix_data) * sc->msix_cnt, 4378 M_DEVBUF, M_WAITOK | M_ZERO); 4379 for (x = 0; x < sc->msix_cnt; ++x) { 4380 msix = &sc->msix_data[x]; 4381 4382 lwkt_serialize_init(&msix->msix_serialize0); 4383 msix->msix_sc = sc; 4384 msix->msix_rid = -1; 4385 msix->msix_vector = x; 4386 msix->msix_mask = 1 << msix->msix_vector; 4387 msix->msix_rate = IGB_INTR_RATE; 4388 } 4389 4390 x = 0; 4391 if (!aggregate) { 4392 /* 4393 * RX rings 4394 */ 4395 if (sc->rx_ring_msix == ncpus2) { 4396 offset = 0; 4397 } else { 4398 offset_def = (sc->rx_ring_msix * 4399 device_get_unit(sc->dev)) % ncpus2; 4400 4401 offset = device_getenv_int(sc->dev, 4402 "msix.rxoff", offset_def); 4403 if (offset >= ncpus2 || 4404 offset % sc->rx_ring_msix != 0) { 4405 device_printf(sc->dev, 4406 "invalid msix.rxoff %d, use %d\n", 4407 offset, offset_def); 4408 offset = offset_def; 4409 } 4410 } 4411 igb_msix_rx_conf(sc, 0, &x, offset); 4412 4413 /* 4414 * TX rings 4415 */ 4416 if (sc->tx_ring_msix == ncpus2) { 4417 offset = 0; 4418 } else { 4419 offset_def = (sc->tx_ring_msix * 4420 device_get_unit(sc->dev)) % ncpus2; 4421 4422 offset = device_getenv_int(sc->dev, 4423 "msix.txoff", offset_def); 4424 if (offset >= ncpus2 || 4425 offset % sc->tx_ring_msix != 0) { 4426 device_printf(sc->dev, 4427 "invalid msix.txoff %d, use %d\n", 4428 offset, offset_def); 4429 offset = offset_def; 4430 } 4431 } 4432 igb_msix_tx_conf(sc, 0, &x, offset); 4433 } else { 4434 int ring_agg, ring_max; 4435 4436 ring_agg = sc->rx_ring_msix; 4437 if (ring_agg > sc->tx_ring_msix) 4438 ring_agg = sc->tx_ring_msix; 4439 4440 ring_max = sc->rx_ring_msix; 4441 if (ring_max < sc->tx_ring_msix) 4442 ring_max = sc->tx_ring_msix; 4443 4444 if (ring_max == ncpus2) { 4445 offset = 0; 4446 } else { 4447 offset_def = (ring_max * device_get_unit(sc->dev)) % 4448 ncpus2; 4449 4450 offset = device_getenv_int(sc->dev, "msix.off", 4451 offset_def); 4452 if (offset >= ncpus2 || offset % ring_max != 0) { 4453 device_printf(sc->dev, 4454 "invalid msix.off %d, use %d\n", 4455 offset, offset_def); 4456 offset = offset_def; 4457 } 4458 } 4459 4460 for (i = 0; i < ring_agg; ++i) { 4461 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4462 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4463 4464 KKASSERT(x < sc->msix_cnt); 4465 msix = &sc->msix_data[x++]; 4466 4467 txr->tx_intr_bit = msix->msix_vector; 4468 txr->tx_intr_mask = msix->msix_mask; 4469 rxr->rx_intr_bit = msix->msix_vector; 4470 rxr->rx_intr_mask = msix->msix_mask; 4471 4472 msix->msix_serialize = &msix->msix_serialize0; 4473 msix->msix_func = igb_msix_rxtx; 4474 msix->msix_arg = msix; 4475 msix->msix_rx = rxr; 4476 msix->msix_tx = txr; 4477 4478 msix->msix_cpuid = i + offset; 4479 KKASSERT(msix->msix_cpuid < ncpus2); 4480 txr->tx_intr_cpuid = msix->msix_cpuid; 4481 4482 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4483 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4484 msix->msix_rate = IGB_MSIX_RX_RATE; 4485 ksnprintf(msix->msix_rate_desc, 4486 sizeof(msix->msix_rate_desc), 4487 "RXTX%d interrupt rate", i); 4488 } 4489 4490 if (ring_agg != ring_max) { 4491 if (ring_max == sc->tx_ring_msix) 4492 igb_msix_tx_conf(sc, i, &x, offset); 4493 else 4494 igb_msix_rx_conf(sc, i, &x, offset); 4495 } 4496 } 4497 4498 /* 4499 * Link status 4500 */ 4501 KKASSERT(x < sc->msix_cnt); 4502 msix = &sc->msix_data[x++]; 4503 sc->sts_intr_bit = msix->msix_vector; 4504 sc->sts_intr_mask = msix->msix_mask; 4505 4506 msix->msix_serialize = &sc->main_serialize; 4507 msix->msix_func = igb_msix_status; 4508 msix->msix_arg = sc; 4509 msix->msix_cpuid = 0; 4510 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4511 device_get_nameunit(sc->dev)); 4512 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4513 "status interrupt rate"); 4514 4515 KKASSERT(x == sc->msix_cnt); 4516 4517 error = pci_setup_msix(sc->dev); 4518 if (error) { 4519 device_printf(sc->dev, "Setup MSI-X failed\n"); 4520 goto back; 4521 } 4522 setup = TRUE; 4523 4524 for (i = 0; i < sc->msix_cnt; ++i) { 4525 msix = &sc->msix_data[i]; 4526 4527 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4528 &msix->msix_rid, msix->msix_cpuid); 4529 if (error) { 4530 device_printf(sc->dev, 4531 "Unable to allocate MSI-X %d on cpu%d\n", 4532 msix->msix_vector, msix->msix_cpuid); 4533 goto back; 4534 } 4535 4536 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4537 &msix->msix_rid, RF_ACTIVE); 4538 if (msix->msix_res == NULL) { 4539 device_printf(sc->dev, 4540 "Unable to allocate MSI-X %d resource\n", 4541 msix->msix_vector); 4542 error = ENOMEM; 4543 goto back; 4544 } 4545 } 4546 4547 pci_enable_msix(sc->dev); 4548 sc->intr_type = PCI_INTR_TYPE_MSIX; 4549 back: 4550 if (error) 4551 igb_msix_free(sc, setup); 4552 } 4553 4554 static void 4555 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4556 { 4557 int i; 4558 4559 KKASSERT(sc->msix_cnt > 1); 4560 4561 for (i = 0; i < sc->msix_cnt; ++i) { 4562 struct igb_msix_data *msix = &sc->msix_data[i]; 4563 4564 if (msix->msix_res != NULL) { 4565 bus_release_resource(sc->dev, SYS_RES_IRQ, 4566 msix->msix_rid, msix->msix_res); 4567 } 4568 if (msix->msix_rid >= 0) 4569 pci_release_msix_vector(sc->dev, msix->msix_rid); 4570 } 4571 if (setup) 4572 pci_teardown_msix(sc->dev); 4573 4574 sc->msix_cnt = 0; 4575 kfree(sc->msix_data, M_DEVBUF); 4576 sc->msix_data = NULL; 4577 } 4578 4579 static int 4580 igb_msix_setup(struct igb_softc *sc) 4581 { 4582 int i; 4583 4584 for (i = 0; i < sc->msix_cnt; ++i) { 4585 struct igb_msix_data *msix = &sc->msix_data[i]; 4586 int error; 4587 4588 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4589 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4590 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4591 if (error) { 4592 device_printf(sc->dev, "could not set up %s " 4593 "interrupt handler.\n", msix->msix_desc); 4594 igb_msix_teardown(sc, i); 4595 return error; 4596 } 4597 } 4598 return 0; 4599 } 4600 4601 static void 4602 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4603 { 4604 int i; 4605 4606 for (i = 0; i < msix_cnt; ++i) { 4607 struct igb_msix_data *msix = &sc->msix_data[i]; 4608 4609 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4610 } 4611 } 4612 4613 static void 4614 igb_msix_rx(void *arg) 4615 { 4616 struct igb_rx_ring *rxr = arg; 4617 4618 ASSERT_SERIALIZED(&rxr->rx_serialize); 4619 igb_rxeof(rxr, -1); 4620 4621 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4622 } 4623 4624 static void 4625 igb_msix_tx(void *arg) 4626 { 4627 struct igb_tx_ring *txr = arg; 4628 4629 ASSERT_SERIALIZED(&txr->tx_serialize); 4630 4631 igb_txeof(txr); 4632 if (!ifsq_is_empty(txr->ifsq)) 4633 ifsq_devstart(txr->ifsq); 4634 4635 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4636 } 4637 4638 static void 4639 igb_msix_status(void *arg) 4640 { 4641 struct igb_softc *sc = arg; 4642 uint32_t icr; 4643 4644 ASSERT_SERIALIZED(&sc->main_serialize); 4645 4646 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4647 if (icr & E1000_ICR_LSC) { 4648 sc->hw.mac.get_link_status = 1; 4649 igb_update_link_status(sc); 4650 } 4651 4652 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4653 } 4654 4655 static void 4656 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4657 { 4658 sc->rx_ring_inuse = igb_get_rxring_inuse(sc, polling); 4659 sc->tx_ring_inuse = igb_get_txring_inuse(sc, polling); 4660 if (bootverbose) { 4661 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d, TX rings %d/%d\n", 4662 sc->rx_ring_inuse, sc->rx_ring_cnt, 4663 sc->tx_ring_inuse, sc->tx_ring_cnt); 4664 } 4665 } 4666 4667 static int 4668 igb_get_rxring_inuse(const struct igb_softc *sc, boolean_t polling) 4669 { 4670 if (!IGB_ENABLE_HWRSS(sc)) 4671 return 1; 4672 4673 if (polling) 4674 return sc->rx_ring_cnt; 4675 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4676 return IGB_MIN_RING_RSS; 4677 else 4678 return sc->rx_ring_msix; 4679 } 4680 4681 static int 4682 igb_get_txring_inuse(const struct igb_softc *sc, boolean_t polling) 4683 { 4684 if (!IGB_ENABLE_HWTSS(sc)) 4685 return 1; 4686 4687 if (polling) 4688 return sc->tx_ring_cnt; 4689 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4690 return IGB_MIN_RING; 4691 else 4692 return sc->tx_ring_msix; 4693 } 4694 4695 static int 4696 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4697 { 4698 int hoff, iphlen, thoff; 4699 struct mbuf *m; 4700 4701 m = *mp; 4702 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4703 4704 iphlen = m->m_pkthdr.csum_iphlen; 4705 thoff = m->m_pkthdr.csum_thlen; 4706 hoff = m->m_pkthdr.csum_lhlen; 4707 4708 KASSERT(iphlen > 0, ("invalid ip hlen")); 4709 KASSERT(thoff > 0, ("invalid tcp hlen")); 4710 KASSERT(hoff > 0, ("invalid ether hlen")); 4711 4712 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4713 m = m_pullup(m, hoff + iphlen + thoff); 4714 if (m == NULL) { 4715 *mp = NULL; 4716 return ENOBUFS; 4717 } 4718 *mp = m; 4719 } 4720 if (txr->tx_flags & IGB_TXFLAG_TSO_IPLEN0) { 4721 struct ip *ip; 4722 4723 ip = mtodoff(m, struct ip *, hoff); 4724 ip->ip_len = 0; 4725 } 4726 4727 return 0; 4728 } 4729 4730 static void 4731 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4732 { 4733 struct e1000_adv_tx_context_desc *TXD; 4734 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4735 int hoff, ctxd, iphlen, thoff; 4736 4737 iphlen = m->m_pkthdr.csum_iphlen; 4738 thoff = m->m_pkthdr.csum_thlen; 4739 hoff = m->m_pkthdr.csum_lhlen; 4740 4741 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4742 4743 ctxd = txr->next_avail_desc; 4744 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4745 4746 if (m->m_flags & M_VLANTAG) { 4747 uint16_t vlantag; 4748 4749 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4750 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4751 } 4752 4753 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4754 vlan_macip_lens |= iphlen; 4755 4756 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4757 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4758 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4759 4760 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4761 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4762 4763 /* 4764 * 82575 needs the TX context index added; the queue 4765 * index is used as TX context index here. 4766 */ 4767 if (txr->sc->hw.mac.type == e1000_82575) 4768 mss_l4len_idx |= txr->me << 4; 4769 4770 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4771 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4772 TXD->seqnum_seed = htole32(0); 4773 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4774 4775 /* We've consumed the first desc, adjust counters */ 4776 if (++ctxd == txr->num_tx_desc) 4777 ctxd = 0; 4778 txr->next_avail_desc = ctxd; 4779 --txr->tx_avail; 4780 4781 *hlen = hoff + iphlen + thoff; 4782 } 4783 4784 static void 4785 igb_setup_serializer(struct igb_softc *sc) 4786 { 4787 const struct igb_msix_data *msix; 4788 int i, j; 4789 4790 /* 4791 * Allocate serializer array 4792 */ 4793 4794 /* Main + TX + RX */ 4795 sc->serialize_cnt = 1 + sc->tx_ring_cnt + sc->rx_ring_cnt; 4796 4797 /* Aggregate TX/RX MSI-X */ 4798 for (i = 0; i < sc->msix_cnt; ++i) { 4799 msix = &sc->msix_data[i]; 4800 if (msix->msix_serialize == &msix->msix_serialize0) 4801 sc->serialize_cnt++; 4802 } 4803 4804 sc->serializes = 4805 kmalloc(sc->serialize_cnt * sizeof(struct lwkt_serialize *), 4806 M_DEVBUF, M_WAITOK | M_ZERO); 4807 4808 /* 4809 * Setup serializers 4810 * 4811 * NOTE: Order is critical 4812 */ 4813 4814 i = 0; 4815 4816 KKASSERT(i < sc->serialize_cnt); 4817 sc->serializes[i++] = &sc->main_serialize; 4818 4819 for (j = 0; j < sc->msix_cnt; ++j) { 4820 msix = &sc->msix_data[j]; 4821 if (msix->msix_serialize == &msix->msix_serialize0) { 4822 KKASSERT(i < sc->serialize_cnt); 4823 sc->serializes[i++] = msix->msix_serialize; 4824 } 4825 } 4826 4827 for (j = 0; j < sc->tx_ring_cnt; ++j) { 4828 KKASSERT(i < sc->serialize_cnt); 4829 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 4830 } 4831 4832 for (j = 0; j < sc->rx_ring_cnt; ++j) { 4833 KKASSERT(i < sc->serialize_cnt); 4834 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 4835 } 4836 4837 KKASSERT(i == sc->serialize_cnt); 4838 } 4839 4840 static void 4841 igb_msix_rx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4842 { 4843 int x = *x0; 4844 4845 for (; i < sc->rx_ring_msix; ++i) { 4846 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4847 struct igb_msix_data *msix; 4848 4849 KKASSERT(x < sc->msix_cnt); 4850 msix = &sc->msix_data[x++]; 4851 4852 rxr->rx_intr_bit = msix->msix_vector; 4853 rxr->rx_intr_mask = msix->msix_mask; 4854 4855 msix->msix_serialize = &rxr->rx_serialize; 4856 msix->msix_func = igb_msix_rx; 4857 msix->msix_arg = rxr; 4858 4859 msix->msix_cpuid = i + offset; 4860 KKASSERT(msix->msix_cpuid < ncpus2); 4861 4862 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s rx%d", 4863 device_get_nameunit(sc->dev), i); 4864 4865 msix->msix_rate = IGB_MSIX_RX_RATE; 4866 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4867 "RX%d interrupt rate", i); 4868 } 4869 *x0 = x; 4870 } 4871 4872 static void 4873 igb_msix_tx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4874 { 4875 int x = *x0; 4876 4877 for (; i < sc->tx_ring_msix; ++i) { 4878 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4879 struct igb_msix_data *msix; 4880 4881 KKASSERT(x < sc->msix_cnt); 4882 msix = &sc->msix_data[x++]; 4883 4884 txr->tx_intr_bit = msix->msix_vector; 4885 txr->tx_intr_mask = msix->msix_mask; 4886 4887 msix->msix_serialize = &txr->tx_serialize; 4888 msix->msix_func = igb_msix_tx; 4889 msix->msix_arg = txr; 4890 4891 msix->msix_cpuid = i + offset; 4892 KKASSERT(msix->msix_cpuid < ncpus2); 4893 txr->tx_intr_cpuid = msix->msix_cpuid; 4894 4895 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s tx%d", 4896 device_get_nameunit(sc->dev), i); 4897 4898 msix->msix_rate = IGB_MSIX_TX_RATE; 4899 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4900 "TX%d interrupt rate", i); 4901 } 4902 *x0 = x; 4903 } 4904 4905 static void 4906 igb_msix_rxtx(void *arg) 4907 { 4908 struct igb_msix_data *msix = arg; 4909 struct igb_rx_ring *rxr = msix->msix_rx; 4910 struct igb_tx_ring *txr = msix->msix_tx; 4911 4912 ASSERT_SERIALIZED(&msix->msix_serialize0); 4913 4914 lwkt_serialize_enter(&rxr->rx_serialize); 4915 igb_rxeof(rxr, -1); 4916 lwkt_serialize_exit(&rxr->rx_serialize); 4917 4918 lwkt_serialize_enter(&txr->tx_serialize); 4919 igb_txeof(txr); 4920 if (!ifsq_is_empty(txr->ifsq)) 4921 ifsq_devstart(txr->ifsq); 4922 lwkt_serialize_exit(&txr->tx_serialize); 4923 4924 E1000_WRITE_REG(&msix->msix_sc->hw, E1000_EIMS, msix->msix_mask); 4925 } 4926 4927 static void 4928 igb_set_timer_cpuid(struct igb_softc *sc, boolean_t polling) 4929 { 4930 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4931 sc->timer_cpuid = 0; /* XXX fixed */ 4932 else 4933 sc->timer_cpuid = rman_get_cpuid(sc->intr_res); 4934 } 4935