1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 68 #include <bus/pci/pcivar.h> 69 #include <bus/pci/pcireg.h> 70 71 #include <dev/netif/ig_hal/e1000_api.h> 72 #include <dev/netif/ig_hal/e1000_82575.h> 73 #include <dev/netif/igb/if_igb.h> 74 75 #ifdef IGB_RSS_DEBUG 76 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 77 do { \ 78 if (sc->rss_debug >= lvl) \ 79 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 80 } while (0) 81 #else /* !IGB_RSS_DEBUG */ 82 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 83 #endif /* IGB_RSS_DEBUG */ 84 85 #define IGB_NAME "Intel(R) PRO/1000 " 86 #define IGB_DEVICE(id) \ 87 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 88 #define IGB_DEVICE_NULL { 0, 0, NULL } 89 90 static struct igb_device { 91 uint16_t vid; 92 uint16_t did; 93 const char *desc; 94 } igb_devices[] = { 95 IGB_DEVICE(82575EB_COPPER), 96 IGB_DEVICE(82575EB_FIBER_SERDES), 97 IGB_DEVICE(82575GB_QUAD_COPPER), 98 IGB_DEVICE(82576), 99 IGB_DEVICE(82576_NS), 100 IGB_DEVICE(82576_NS_SERDES), 101 IGB_DEVICE(82576_FIBER), 102 IGB_DEVICE(82576_SERDES), 103 IGB_DEVICE(82576_SERDES_QUAD), 104 IGB_DEVICE(82576_QUAD_COPPER), 105 IGB_DEVICE(82576_QUAD_COPPER_ET2), 106 IGB_DEVICE(82576_VF), 107 IGB_DEVICE(82580_COPPER), 108 IGB_DEVICE(82580_FIBER), 109 IGB_DEVICE(82580_SERDES), 110 IGB_DEVICE(82580_SGMII), 111 IGB_DEVICE(82580_COPPER_DUAL), 112 IGB_DEVICE(82580_QUAD_FIBER), 113 IGB_DEVICE(DH89XXCC_SERDES), 114 IGB_DEVICE(DH89XXCC_SGMII), 115 IGB_DEVICE(DH89XXCC_SFP), 116 IGB_DEVICE(DH89XXCC_BACKPLANE), 117 IGB_DEVICE(I350_COPPER), 118 IGB_DEVICE(I350_FIBER), 119 IGB_DEVICE(I350_SERDES), 120 IGB_DEVICE(I350_SGMII), 121 IGB_DEVICE(I350_VF), 122 IGB_DEVICE(I210_COPPER), 123 IGB_DEVICE(I210_COPPER_IT), 124 IGB_DEVICE(I210_COPPER_OEM1), 125 IGB_DEVICE(I210_COPPER_FLASHLESS), 126 IGB_DEVICE(I210_SERDES_FLASHLESS), 127 IGB_DEVICE(I210_FIBER), 128 IGB_DEVICE(I210_SERDES), 129 IGB_DEVICE(I210_SGMII), 130 IGB_DEVICE(I211_COPPER), 131 IGB_DEVICE(I354_BACKPLANE_1GBPS), 132 IGB_DEVICE(I354_SGMII), 133 134 /* required last entry */ 135 IGB_DEVICE_NULL 136 }; 137 138 static int igb_probe(device_t); 139 static int igb_attach(device_t); 140 static int igb_detach(device_t); 141 static int igb_shutdown(device_t); 142 static int igb_suspend(device_t); 143 static int igb_resume(device_t); 144 145 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 146 static void igb_setup_ifp(struct igb_softc *); 147 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 148 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 149 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 150 static void igb_add_sysctl(struct igb_softc *); 151 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 152 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 153 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 154 static int igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 155 static int igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 156 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 157 static int igb_get_rxring_inuse(const struct igb_softc *, boolean_t); 158 static int igb_get_txring_inuse(const struct igb_softc *, boolean_t); 159 static void igb_set_timer_cpuid(struct igb_softc *, boolean_t); 160 #ifdef IFPOLL_ENABLE 161 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 162 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 163 #endif 164 165 static void igb_vf_init_stats(struct igb_softc *); 166 static void igb_reset(struct igb_softc *); 167 static void igb_update_stats_counters(struct igb_softc *); 168 static void igb_update_vf_stats_counters(struct igb_softc *); 169 static void igb_update_link_status(struct igb_softc *); 170 static void igb_init_tx_unit(struct igb_softc *); 171 static void igb_init_rx_unit(struct igb_softc *); 172 173 static void igb_set_vlan(struct igb_softc *); 174 static void igb_set_multi(struct igb_softc *); 175 static void igb_set_promisc(struct igb_softc *); 176 static void igb_disable_promisc(struct igb_softc *); 177 178 static int igb_alloc_rings(struct igb_softc *); 179 static void igb_free_rings(struct igb_softc *); 180 static int igb_create_tx_ring(struct igb_tx_ring *); 181 static int igb_create_rx_ring(struct igb_rx_ring *); 182 static void igb_free_tx_ring(struct igb_tx_ring *); 183 static void igb_free_rx_ring(struct igb_rx_ring *); 184 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 185 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 186 static void igb_init_tx_ring(struct igb_tx_ring *); 187 static int igb_init_rx_ring(struct igb_rx_ring *); 188 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 189 static int igb_encap(struct igb_tx_ring *, struct mbuf **, int *, int *); 190 static void igb_rx_refresh(struct igb_rx_ring *, int); 191 static void igb_setup_serializer(struct igb_softc *); 192 193 static void igb_stop(struct igb_softc *); 194 static void igb_init(void *); 195 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 196 static void igb_media_status(struct ifnet *, struct ifmediareq *); 197 static int igb_media_change(struct ifnet *); 198 static void igb_timer(void *); 199 static void igb_watchdog(struct ifaltq_subque *); 200 static void igb_start(struct ifnet *, struct ifaltq_subque *); 201 #ifdef IFPOLL_ENABLE 202 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 203 static void igb_npoll_rx(struct ifnet *, void *, int); 204 static void igb_npoll_tx(struct ifnet *, void *, int); 205 static void igb_npoll_status(struct ifnet *); 206 #endif 207 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 208 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 209 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 210 #ifdef INVARIANTS 211 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 212 boolean_t); 213 #endif 214 215 static void igb_intr(void *); 216 static void igb_intr_shared(void *); 217 static void igb_rxeof(struct igb_rx_ring *, int); 218 static void igb_txeof(struct igb_tx_ring *); 219 static void igb_set_eitr(struct igb_softc *, int, int); 220 static void igb_enable_intr(struct igb_softc *); 221 static void igb_disable_intr(struct igb_softc *); 222 static void igb_init_unshared_intr(struct igb_softc *); 223 static void igb_init_intr(struct igb_softc *); 224 static int igb_setup_intr(struct igb_softc *); 225 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 226 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 227 static void igb_set_intr_mask(struct igb_softc *); 228 static int igb_alloc_intr(struct igb_softc *); 229 static void igb_free_intr(struct igb_softc *); 230 static void igb_teardown_intr(struct igb_softc *); 231 static void igb_msix_try_alloc(struct igb_softc *); 232 static void igb_msix_rx_conf(struct igb_softc *, int, int *, int); 233 static void igb_msix_tx_conf(struct igb_softc *, int, int *, int); 234 static void igb_msix_free(struct igb_softc *, boolean_t); 235 static int igb_msix_setup(struct igb_softc *); 236 static void igb_msix_teardown(struct igb_softc *, int); 237 static void igb_msix_rx(void *); 238 static void igb_msix_tx(void *); 239 static void igb_msix_status(void *); 240 static void igb_msix_rxtx(void *); 241 242 /* Management and WOL Support */ 243 static void igb_get_mgmt(struct igb_softc *); 244 static void igb_rel_mgmt(struct igb_softc *); 245 static void igb_get_hw_control(struct igb_softc *); 246 static void igb_rel_hw_control(struct igb_softc *); 247 static void igb_enable_wol(device_t); 248 249 static device_method_t igb_methods[] = { 250 /* Device interface */ 251 DEVMETHOD(device_probe, igb_probe), 252 DEVMETHOD(device_attach, igb_attach), 253 DEVMETHOD(device_detach, igb_detach), 254 DEVMETHOD(device_shutdown, igb_shutdown), 255 DEVMETHOD(device_suspend, igb_suspend), 256 DEVMETHOD(device_resume, igb_resume), 257 DEVMETHOD_END 258 }; 259 260 static driver_t igb_driver = { 261 "igb", 262 igb_methods, 263 sizeof(struct igb_softc), 264 }; 265 266 static devclass_t igb_devclass; 267 268 DECLARE_DUMMY_MODULE(if_igb); 269 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 270 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 271 272 static int igb_rxd = IGB_DEFAULT_RXD; 273 static int igb_txd = IGB_DEFAULT_TXD; 274 static int igb_rxr = 0; 275 static int igb_txr = 0; 276 static int igb_msi_enable = 1; 277 static int igb_msix_enable = 1; 278 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 279 static int igb_fc_setting = e1000_fc_full; 280 281 /* 282 * DMA Coalescing, only for i350 - default to off, 283 * this feature is for power savings 284 */ 285 static int igb_dma_coalesce = 0; 286 287 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 288 TUNABLE_INT("hw.igb.txd", &igb_txd); 289 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 290 TUNABLE_INT("hw.igb.txr", &igb_txr); 291 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 292 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 293 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 294 295 /* i350 specific */ 296 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 297 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 298 299 static __inline void 300 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 301 { 302 /* Ignore Checksum bit is set */ 303 if (staterr & E1000_RXD_STAT_IXSM) 304 return; 305 306 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 307 E1000_RXD_STAT_IPCS) 308 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 309 310 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 311 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 312 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 313 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 314 mp->m_pkthdr.csum_data = htons(0xffff); 315 } 316 } 317 } 318 319 static __inline struct pktinfo * 320 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 321 uint32_t hash, uint32_t hashtype, uint32_t staterr) 322 { 323 switch (hashtype) { 324 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 325 pi->pi_netisr = NETISR_IP; 326 pi->pi_flags = 0; 327 pi->pi_l3proto = IPPROTO_TCP; 328 break; 329 330 case E1000_RXDADV_RSSTYPE_IPV4: 331 if (staterr & E1000_RXD_STAT_IXSM) 332 return NULL; 333 334 if ((staterr & 335 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 336 E1000_RXD_STAT_TCPCS) { 337 pi->pi_netisr = NETISR_IP; 338 pi->pi_flags = 0; 339 pi->pi_l3proto = IPPROTO_UDP; 340 break; 341 } 342 /* FALL THROUGH */ 343 default: 344 return NULL; 345 } 346 347 m->m_flags |= M_HASH; 348 m->m_pkthdr.hash = toeplitz_hash(hash); 349 return pi; 350 } 351 352 static int 353 igb_probe(device_t dev) 354 { 355 const struct igb_device *d; 356 uint16_t vid, did; 357 358 vid = pci_get_vendor(dev); 359 did = pci_get_device(dev); 360 361 for (d = igb_devices; d->desc != NULL; ++d) { 362 if (vid == d->vid && did == d->did) { 363 device_set_desc(dev, d->desc); 364 return 0; 365 } 366 } 367 return ENXIO; 368 } 369 370 static int 371 igb_attach(device_t dev) 372 { 373 struct igb_softc *sc = device_get_softc(dev); 374 uint16_t eeprom_data; 375 int error = 0, ring_max; 376 #ifdef IFPOLL_ENABLE 377 int offset, offset_def; 378 #endif 379 380 #ifdef notyet 381 /* SYSCTL stuff */ 382 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 383 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 384 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 385 igb_sysctl_nvm_info, "I", "NVM Information"); 386 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 387 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 388 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 389 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 390 #endif 391 392 callout_init_mp(&sc->timer); 393 lwkt_serialize_init(&sc->main_serialize); 394 395 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 396 device_get_unit(dev)); 397 sc->dev = sc->osdep.dev = dev; 398 399 /* 400 * Determine hardware and mac type 401 */ 402 sc->hw.vendor_id = pci_get_vendor(dev); 403 sc->hw.device_id = pci_get_device(dev); 404 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 405 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 406 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 407 408 if (e1000_set_mac_type(&sc->hw)) 409 return ENXIO; 410 411 /* Are we a VF device? */ 412 if (sc->hw.mac.type == e1000_vfadapt || 413 sc->hw.mac.type == e1000_vfadapt_i350) 414 sc->vf_ifp = 1; 415 else 416 sc->vf_ifp = 0; 417 418 /* 419 * Configure total supported RX/TX ring count 420 */ 421 switch (sc->hw.mac.type) { 422 case e1000_82575: 423 ring_max = IGB_MAX_RING_82575; 424 break; 425 426 case e1000_82576: 427 ring_max = IGB_MAX_RING_82576; 428 break; 429 430 case e1000_82580: 431 ring_max = IGB_MAX_RING_82580; 432 break; 433 434 case e1000_i350: 435 ring_max = IGB_MAX_RING_I350; 436 break; 437 438 case e1000_i354: 439 ring_max = IGB_MAX_RING_I354; 440 break; 441 442 case e1000_i210: 443 ring_max = IGB_MAX_RING_I210; 444 break; 445 446 case e1000_i211: 447 ring_max = IGB_MAX_RING_I211; 448 break; 449 450 default: 451 ring_max = IGB_MIN_RING; 452 break; 453 } 454 455 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 456 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 457 #ifdef IGB_RSS_DEBUG 458 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 459 #endif 460 sc->rx_ring_inuse = sc->rx_ring_cnt; 461 462 sc->tx_ring_cnt = device_getenv_int(dev, "txr", igb_txr); 463 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_max); 464 #ifdef IGB_TSS_DEBUG 465 sc->tx_ring_cnt = device_getenv_int(dev, "txr_debug", sc->tx_ring_cnt); 466 #endif 467 sc->tx_ring_inuse = sc->tx_ring_cnt; 468 469 /* Enable bus mastering */ 470 pci_enable_busmaster(dev); 471 472 /* 473 * Allocate IO memory 474 */ 475 sc->mem_rid = PCIR_BAR(0); 476 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 477 RF_ACTIVE); 478 if (sc->mem_res == NULL) { 479 device_printf(dev, "Unable to allocate bus resource: memory\n"); 480 error = ENXIO; 481 goto failed; 482 } 483 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 484 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 485 486 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 487 488 /* Save PCI command register for Shared Code */ 489 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 490 sc->hw.back = &sc->osdep; 491 492 /* Do Shared Code initialization */ 493 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 494 device_printf(dev, "Setup of Shared code failed\n"); 495 error = ENXIO; 496 goto failed; 497 } 498 499 e1000_get_bus_info(&sc->hw); 500 501 sc->hw.mac.autoneg = DO_AUTO_NEG; 502 sc->hw.phy.autoneg_wait_to_complete = FALSE; 503 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 504 505 /* Copper options */ 506 if (sc->hw.phy.media_type == e1000_media_type_copper) { 507 sc->hw.phy.mdix = AUTO_ALL_MODES; 508 sc->hw.phy.disable_polarity_correction = FALSE; 509 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 510 } 511 512 /* Set the frame limits assuming standard ethernet sized frames. */ 513 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 514 515 /* Allocate RX/TX rings */ 516 error = igb_alloc_rings(sc); 517 if (error) 518 goto failed; 519 520 #ifdef IFPOLL_ENABLE 521 /* 522 * NPOLLING RX CPU offset 523 */ 524 if (sc->rx_ring_cnt == ncpus2) { 525 offset = 0; 526 } else { 527 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 528 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 529 if (offset >= ncpus2 || 530 offset % sc->rx_ring_cnt != 0) { 531 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 532 offset, offset_def); 533 offset = offset_def; 534 } 535 } 536 sc->rx_npoll_off = offset; 537 538 /* 539 * NPOLLING TX CPU offset 540 */ 541 if (sc->tx_ring_cnt == ncpus2) { 542 offset = 0; 543 } else { 544 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 545 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 546 if (offset >= ncpus2 || 547 offset % sc->tx_ring_cnt != 0) { 548 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 549 offset, offset_def); 550 offset = offset_def; 551 } 552 } 553 sc->tx_npoll_off = offset; 554 #endif 555 556 /* Allocate interrupt */ 557 error = igb_alloc_intr(sc); 558 if (error) 559 goto failed; 560 561 /* Setup serializers */ 562 igb_setup_serializer(sc); 563 564 /* Allocate the appropriate stats memory */ 565 if (sc->vf_ifp) { 566 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 567 M_WAITOK | M_ZERO); 568 igb_vf_init_stats(sc); 569 } else { 570 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 571 M_WAITOK | M_ZERO); 572 } 573 574 /* Allocate multicast array memory. */ 575 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 576 M_DEVBUF, M_WAITOK); 577 578 /* Some adapter-specific advanced features */ 579 if (sc->hw.mac.type >= e1000_i350) { 580 #ifdef notyet 581 igb_set_sysctl_value(adapter, "dma_coalesce", 582 "configure dma coalesce", 583 &adapter->dma_coalesce, igb_dma_coalesce); 584 igb_set_sysctl_value(adapter, "eee_disabled", 585 "enable Energy Efficient Ethernet", 586 &adapter->hw.dev_spec._82575.eee_disable, 587 igb_eee_disabled); 588 #else 589 sc->dma_coalesce = igb_dma_coalesce; 590 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 591 #endif 592 if (sc->hw.phy.media_type == e1000_media_type_copper) { 593 if (sc->hw.mac.type == e1000_i354) 594 e1000_set_eee_i354(&sc->hw); 595 else 596 e1000_set_eee_i350(&sc->hw); 597 } 598 } 599 600 /* 601 * Start from a known state, this is important in reading the nvm and 602 * mac from that. 603 */ 604 e1000_reset_hw(&sc->hw); 605 606 /* Make sure we have a good EEPROM before we read from it */ 607 if (sc->hw.mac.type != e1000_i210 && sc->hw.mac.type != e1000_i211 && 608 e1000_validate_nvm_checksum(&sc->hw) < 0) { 609 /* 610 * Some PCI-E parts fail the first check due to 611 * the link being in sleep state, call it again, 612 * if it fails a second time its a real issue. 613 */ 614 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 615 device_printf(dev, 616 "The EEPROM Checksum Is Not Valid\n"); 617 error = EIO; 618 goto failed; 619 } 620 } 621 622 /* Copy the permanent MAC address out of the EEPROM */ 623 if (e1000_read_mac_addr(&sc->hw) < 0) { 624 device_printf(dev, "EEPROM read error while reading MAC" 625 " address\n"); 626 error = EIO; 627 goto failed; 628 } 629 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 630 device_printf(dev, "Invalid MAC address\n"); 631 error = EIO; 632 goto failed; 633 } 634 635 /* Setup OS specific network interface */ 636 igb_setup_ifp(sc); 637 638 /* Add sysctl tree, must after igb_setup_ifp() */ 639 igb_add_sysctl(sc); 640 641 /* Now get a good starting state */ 642 igb_reset(sc); 643 644 /* Initialize statistics */ 645 igb_update_stats_counters(sc); 646 647 sc->hw.mac.get_link_status = 1; 648 igb_update_link_status(sc); 649 650 /* Indicate SOL/IDER usage */ 651 if (e1000_check_reset_block(&sc->hw)) { 652 device_printf(dev, 653 "PHY reset is blocked due to SOL/IDER session.\n"); 654 } 655 656 /* Determine if we have to control management hardware */ 657 if (e1000_enable_mng_pass_thru(&sc->hw)) 658 sc->flags |= IGB_FLAG_HAS_MGMT; 659 660 /* 661 * Setup Wake-on-Lan 662 */ 663 /* APME bit in EEPROM is mapped to WUC.APME */ 664 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 665 if (eeprom_data) 666 sc->wol = E1000_WUFC_MAG; 667 /* XXX disable WOL */ 668 sc->wol = 0; 669 670 #ifdef notyet 671 /* Register for VLAN events */ 672 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 673 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 674 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 675 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 676 #endif 677 678 #ifdef notyet 679 igb_add_hw_stats(adapter); 680 #endif 681 682 /* 683 * Disable interrupt to prevent spurious interrupts (line based 684 * interrupt, MSI or even MSI-X), which had been observed on 685 * several types of LOMs, from being handled. 686 */ 687 igb_disable_intr(sc); 688 689 error = igb_setup_intr(sc); 690 if (error) { 691 ether_ifdetach(&sc->arpcom.ac_if); 692 goto failed; 693 } 694 return 0; 695 696 failed: 697 igb_detach(dev); 698 return error; 699 } 700 701 static int 702 igb_detach(device_t dev) 703 { 704 struct igb_softc *sc = device_get_softc(dev); 705 706 if (device_is_attached(dev)) { 707 struct ifnet *ifp = &sc->arpcom.ac_if; 708 709 ifnet_serialize_all(ifp); 710 711 igb_stop(sc); 712 713 e1000_phy_hw_reset(&sc->hw); 714 715 /* Give control back to firmware */ 716 igb_rel_mgmt(sc); 717 igb_rel_hw_control(sc); 718 719 if (sc->wol) { 720 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 721 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 722 igb_enable_wol(dev); 723 } 724 725 igb_teardown_intr(sc); 726 727 ifnet_deserialize_all(ifp); 728 729 ether_ifdetach(ifp); 730 } else if (sc->mem_res != NULL) { 731 igb_rel_hw_control(sc); 732 } 733 bus_generic_detach(dev); 734 735 if (sc->sysctl_tree != NULL) 736 sysctl_ctx_free(&sc->sysctl_ctx); 737 738 igb_free_intr(sc); 739 740 if (sc->msix_mem_res != NULL) { 741 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 742 sc->msix_mem_res); 743 } 744 if (sc->mem_res != NULL) { 745 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 746 sc->mem_res); 747 } 748 749 igb_free_rings(sc); 750 751 if (sc->mta != NULL) 752 kfree(sc->mta, M_DEVBUF); 753 if (sc->stats != NULL) 754 kfree(sc->stats, M_DEVBUF); 755 if (sc->serializes != NULL) 756 kfree(sc->serializes, M_DEVBUF); 757 758 return 0; 759 } 760 761 static int 762 igb_shutdown(device_t dev) 763 { 764 return igb_suspend(dev); 765 } 766 767 static int 768 igb_suspend(device_t dev) 769 { 770 struct igb_softc *sc = device_get_softc(dev); 771 struct ifnet *ifp = &sc->arpcom.ac_if; 772 773 ifnet_serialize_all(ifp); 774 775 igb_stop(sc); 776 777 igb_rel_mgmt(sc); 778 igb_rel_hw_control(sc); 779 780 if (sc->wol) { 781 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 782 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 783 igb_enable_wol(dev); 784 } 785 786 ifnet_deserialize_all(ifp); 787 788 return bus_generic_suspend(dev); 789 } 790 791 static int 792 igb_resume(device_t dev) 793 { 794 struct igb_softc *sc = device_get_softc(dev); 795 struct ifnet *ifp = &sc->arpcom.ac_if; 796 int i; 797 798 ifnet_serialize_all(ifp); 799 800 igb_init(sc); 801 igb_get_mgmt(sc); 802 803 for (i = 0; i < sc->tx_ring_inuse; ++i) 804 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 805 806 ifnet_deserialize_all(ifp); 807 808 return bus_generic_resume(dev); 809 } 810 811 static int 812 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 813 { 814 struct igb_softc *sc = ifp->if_softc; 815 struct ifreq *ifr = (struct ifreq *)data; 816 int max_frame_size, mask, reinit; 817 int error = 0; 818 819 ASSERT_IFNET_SERIALIZED_ALL(ifp); 820 821 switch (command) { 822 case SIOCSIFMTU: 823 max_frame_size = 9234; 824 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 825 ETHER_CRC_LEN) { 826 error = EINVAL; 827 break; 828 } 829 830 ifp->if_mtu = ifr->ifr_mtu; 831 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 832 ETHER_CRC_LEN; 833 834 if (ifp->if_flags & IFF_RUNNING) 835 igb_init(sc); 836 break; 837 838 case SIOCSIFFLAGS: 839 if (ifp->if_flags & IFF_UP) { 840 if (ifp->if_flags & IFF_RUNNING) { 841 if ((ifp->if_flags ^ sc->if_flags) & 842 (IFF_PROMISC | IFF_ALLMULTI)) { 843 igb_disable_promisc(sc); 844 igb_set_promisc(sc); 845 } 846 } else { 847 igb_init(sc); 848 } 849 } else if (ifp->if_flags & IFF_RUNNING) { 850 igb_stop(sc); 851 } 852 sc->if_flags = ifp->if_flags; 853 break; 854 855 case SIOCADDMULTI: 856 case SIOCDELMULTI: 857 if (ifp->if_flags & IFF_RUNNING) { 858 igb_disable_intr(sc); 859 igb_set_multi(sc); 860 #ifdef IFPOLL_ENABLE 861 if (!(ifp->if_flags & IFF_NPOLLING)) 862 #endif 863 igb_enable_intr(sc); 864 } 865 break; 866 867 case SIOCSIFMEDIA: 868 /* Check SOL/IDER usage */ 869 if (e1000_check_reset_block(&sc->hw)) { 870 if_printf(ifp, "Media change is " 871 "blocked due to SOL/IDER session.\n"); 872 break; 873 } 874 /* FALL THROUGH */ 875 876 case SIOCGIFMEDIA: 877 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 878 break; 879 880 case SIOCSIFCAP: 881 reinit = 0; 882 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 883 if (mask & IFCAP_RXCSUM) { 884 ifp->if_capenable ^= IFCAP_RXCSUM; 885 reinit = 1; 886 } 887 if (mask & IFCAP_VLAN_HWTAGGING) { 888 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 889 reinit = 1; 890 } 891 if (mask & IFCAP_TXCSUM) { 892 ifp->if_capenable ^= IFCAP_TXCSUM; 893 if (ifp->if_capenable & IFCAP_TXCSUM) 894 ifp->if_hwassist |= IGB_CSUM_FEATURES; 895 else 896 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 897 } 898 if (mask & IFCAP_TSO) { 899 ifp->if_capenable ^= IFCAP_TSO; 900 if (ifp->if_capenable & IFCAP_TSO) 901 ifp->if_hwassist |= CSUM_TSO; 902 else 903 ifp->if_hwassist &= ~CSUM_TSO; 904 } 905 if (mask & IFCAP_RSS) 906 ifp->if_capenable ^= IFCAP_RSS; 907 if (reinit && (ifp->if_flags & IFF_RUNNING)) 908 igb_init(sc); 909 break; 910 911 default: 912 error = ether_ioctl(ifp, command, data); 913 break; 914 } 915 return error; 916 } 917 918 static void 919 igb_init(void *xsc) 920 { 921 struct igb_softc *sc = xsc; 922 struct ifnet *ifp = &sc->arpcom.ac_if; 923 boolean_t polling; 924 int i; 925 926 ASSERT_IFNET_SERIALIZED_ALL(ifp); 927 928 igb_stop(sc); 929 930 /* Get the latest mac address, User can use a LAA */ 931 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 932 933 /* Put the address into the Receive Address Array */ 934 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 935 936 igb_reset(sc); 937 igb_update_link_status(sc); 938 939 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 940 941 /* Configure for OS presence */ 942 igb_get_mgmt(sc); 943 944 polling = FALSE; 945 #ifdef IFPOLL_ENABLE 946 if (ifp->if_flags & IFF_NPOLLING) 947 polling = TRUE; 948 #endif 949 950 /* Configured used RX/TX rings */ 951 igb_set_ring_inuse(sc, polling); 952 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 953 954 /* Initialize interrupt */ 955 igb_init_intr(sc); 956 957 /* Prepare transmit descriptors and buffers */ 958 for (i = 0; i < sc->tx_ring_inuse; ++i) 959 igb_init_tx_ring(&sc->tx_rings[i]); 960 igb_init_tx_unit(sc); 961 962 /* Setup Multicast table */ 963 igb_set_multi(sc); 964 965 #if 0 966 /* 967 * Figure out the desired mbuf pool 968 * for doing jumbo/packetsplit 969 */ 970 if (adapter->max_frame_size <= 2048) 971 adapter->rx_mbuf_sz = MCLBYTES; 972 else if (adapter->max_frame_size <= 4096) 973 adapter->rx_mbuf_sz = MJUMPAGESIZE; 974 else 975 adapter->rx_mbuf_sz = MJUM9BYTES; 976 #endif 977 978 /* Prepare receive descriptors and buffers */ 979 for (i = 0; i < sc->rx_ring_inuse; ++i) { 980 int error; 981 982 error = igb_init_rx_ring(&sc->rx_rings[i]); 983 if (error) { 984 if_printf(ifp, "Could not setup receive structures\n"); 985 igb_stop(sc); 986 return; 987 } 988 } 989 igb_init_rx_unit(sc); 990 991 /* Enable VLAN support */ 992 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 993 igb_set_vlan(sc); 994 995 /* Don't lose promiscuous settings */ 996 igb_set_promisc(sc); 997 998 ifp->if_flags |= IFF_RUNNING; 999 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1000 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1001 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 1002 } 1003 1004 igb_set_timer_cpuid(sc, polling); 1005 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1006 e1000_clear_hw_cntrs_base_generic(&sc->hw); 1007 1008 /* This clears any pending interrupts */ 1009 E1000_READ_REG(&sc->hw, E1000_ICR); 1010 1011 /* 1012 * Only enable interrupts if we are not polling, make sure 1013 * they are off otherwise. 1014 */ 1015 if (polling) { 1016 igb_disable_intr(sc); 1017 } else { 1018 igb_enable_intr(sc); 1019 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 1020 } 1021 1022 /* Set Energy Efficient Ethernet */ 1023 if (sc->hw.phy.media_type == e1000_media_type_copper) { 1024 if (sc->hw.mac.type == e1000_i354) 1025 e1000_set_eee_i354(&sc->hw); 1026 else 1027 e1000_set_eee_i350(&sc->hw); 1028 } 1029 } 1030 1031 static void 1032 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1033 { 1034 struct igb_softc *sc = ifp->if_softc; 1035 1036 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1037 1038 if ((ifp->if_flags & IFF_RUNNING) == 0) 1039 sc->hw.mac.get_link_status = 1; 1040 igb_update_link_status(sc); 1041 1042 ifmr->ifm_status = IFM_AVALID; 1043 ifmr->ifm_active = IFM_ETHER; 1044 1045 if (!sc->link_active) 1046 return; 1047 1048 ifmr->ifm_status |= IFM_ACTIVE; 1049 1050 switch (sc->link_speed) { 1051 case 10: 1052 ifmr->ifm_active |= IFM_10_T; 1053 break; 1054 1055 case 100: 1056 /* 1057 * Support for 100Mb SFP - these are Fiber 1058 * but the media type appears as serdes 1059 */ 1060 if (sc->hw.phy.media_type == e1000_media_type_internal_serdes) 1061 ifmr->ifm_active |= IFM_100_FX; 1062 else 1063 ifmr->ifm_active |= IFM_100_TX; 1064 break; 1065 1066 case 1000: 1067 ifmr->ifm_active |= IFM_1000_T; 1068 break; 1069 } 1070 1071 if (sc->link_duplex == FULL_DUPLEX) 1072 ifmr->ifm_active |= IFM_FDX; 1073 else 1074 ifmr->ifm_active |= IFM_HDX; 1075 } 1076 1077 static int 1078 igb_media_change(struct ifnet *ifp) 1079 { 1080 struct igb_softc *sc = ifp->if_softc; 1081 struct ifmedia *ifm = &sc->media; 1082 1083 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1084 1085 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1086 return EINVAL; 1087 1088 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1089 case IFM_AUTO: 1090 sc->hw.mac.autoneg = DO_AUTO_NEG; 1091 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1092 break; 1093 1094 case IFM_1000_LX: 1095 case IFM_1000_SX: 1096 case IFM_1000_T: 1097 sc->hw.mac.autoneg = DO_AUTO_NEG; 1098 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1099 break; 1100 1101 case IFM_100_TX: 1102 sc->hw.mac.autoneg = FALSE; 1103 sc->hw.phy.autoneg_advertised = 0; 1104 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1105 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1106 else 1107 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1108 break; 1109 1110 case IFM_10_T: 1111 sc->hw.mac.autoneg = FALSE; 1112 sc->hw.phy.autoneg_advertised = 0; 1113 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1114 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1115 else 1116 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1117 break; 1118 1119 default: 1120 if_printf(ifp, "Unsupported media type\n"); 1121 break; 1122 } 1123 1124 igb_init(sc); 1125 1126 return 0; 1127 } 1128 1129 static void 1130 igb_set_promisc(struct igb_softc *sc) 1131 { 1132 struct ifnet *ifp = &sc->arpcom.ac_if; 1133 struct e1000_hw *hw = &sc->hw; 1134 uint32_t reg; 1135 1136 if (sc->vf_ifp) { 1137 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1138 return; 1139 } 1140 1141 reg = E1000_READ_REG(hw, E1000_RCTL); 1142 if (ifp->if_flags & IFF_PROMISC) { 1143 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1144 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1145 } else if (ifp->if_flags & IFF_ALLMULTI) { 1146 reg |= E1000_RCTL_MPE; 1147 reg &= ~E1000_RCTL_UPE; 1148 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1149 } 1150 } 1151 1152 static void 1153 igb_disable_promisc(struct igb_softc *sc) 1154 { 1155 struct e1000_hw *hw = &sc->hw; 1156 struct ifnet *ifp = &sc->arpcom.ac_if; 1157 uint32_t reg; 1158 int mcnt = 0; 1159 1160 if (sc->vf_ifp) { 1161 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1162 return; 1163 } 1164 reg = E1000_READ_REG(hw, E1000_RCTL); 1165 reg &= ~E1000_RCTL_UPE; 1166 if (ifp->if_flags & IFF_ALLMULTI) { 1167 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 1168 } else { 1169 struct ifmultiaddr *ifma; 1170 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1171 if (ifma->ifma_addr->sa_family != AF_LINK) 1172 continue; 1173 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1174 break; 1175 mcnt++; 1176 } 1177 } 1178 /* Don't disable if in MAX groups */ 1179 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 1180 reg &= ~E1000_RCTL_MPE; 1181 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1182 } 1183 1184 static void 1185 igb_set_multi(struct igb_softc *sc) 1186 { 1187 struct ifnet *ifp = &sc->arpcom.ac_if; 1188 struct ifmultiaddr *ifma; 1189 uint32_t reg_rctl = 0; 1190 uint8_t *mta; 1191 int mcnt = 0; 1192 1193 mta = sc->mta; 1194 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1195 1196 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1197 if (ifma->ifma_addr->sa_family != AF_LINK) 1198 continue; 1199 1200 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1201 break; 1202 1203 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1204 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1205 mcnt++; 1206 } 1207 1208 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1209 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1210 reg_rctl |= E1000_RCTL_MPE; 1211 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1212 } else { 1213 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1214 } 1215 } 1216 1217 static void 1218 igb_timer(void *xsc) 1219 { 1220 struct igb_softc *sc = xsc; 1221 1222 lwkt_serialize_enter(&sc->main_serialize); 1223 1224 igb_update_link_status(sc); 1225 igb_update_stats_counters(sc); 1226 1227 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1228 1229 lwkt_serialize_exit(&sc->main_serialize); 1230 } 1231 1232 static void 1233 igb_update_link_status(struct igb_softc *sc) 1234 { 1235 struct ifnet *ifp = &sc->arpcom.ac_if; 1236 struct e1000_hw *hw = &sc->hw; 1237 uint32_t link_check, thstat, ctrl; 1238 1239 link_check = thstat = ctrl = 0; 1240 1241 /* Get the cached link value or read for real */ 1242 switch (hw->phy.media_type) { 1243 case e1000_media_type_copper: 1244 if (hw->mac.get_link_status) { 1245 /* Do the work to read phy */ 1246 e1000_check_for_link(hw); 1247 link_check = !hw->mac.get_link_status; 1248 } else { 1249 link_check = TRUE; 1250 } 1251 break; 1252 1253 case e1000_media_type_fiber: 1254 e1000_check_for_link(hw); 1255 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1256 break; 1257 1258 case e1000_media_type_internal_serdes: 1259 e1000_check_for_link(hw); 1260 link_check = hw->mac.serdes_has_link; 1261 break; 1262 1263 /* VF device is type_unknown */ 1264 case e1000_media_type_unknown: 1265 e1000_check_for_link(hw); 1266 link_check = !hw->mac.get_link_status; 1267 /* Fall thru */ 1268 default: 1269 break; 1270 } 1271 1272 /* Check for thermal downshift or shutdown */ 1273 if (hw->mac.type == e1000_i350) { 1274 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1275 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1276 } 1277 1278 /* Now we check if a transition has happened */ 1279 if (link_check && sc->link_active == 0) { 1280 e1000_get_speed_and_duplex(hw, 1281 &sc->link_speed, &sc->link_duplex); 1282 if (bootverbose) { 1283 const char *flowctl; 1284 1285 /* Get the flow control for display */ 1286 switch (hw->fc.current_mode) { 1287 case e1000_fc_rx_pause: 1288 flowctl = "RX"; 1289 break; 1290 1291 case e1000_fc_tx_pause: 1292 flowctl = "TX"; 1293 break; 1294 1295 case e1000_fc_full: 1296 flowctl = "Full"; 1297 break; 1298 1299 default: 1300 flowctl = "None"; 1301 break; 1302 } 1303 1304 if_printf(ifp, "Link is up %d Mbps %s, " 1305 "Flow control: %s\n", 1306 sc->link_speed, 1307 sc->link_duplex == FULL_DUPLEX ? 1308 "Full Duplex" : "Half Duplex", 1309 flowctl); 1310 } 1311 sc->link_active = 1; 1312 1313 ifp->if_baudrate = sc->link_speed * 1000000; 1314 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1315 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1316 if_printf(ifp, "Link: thermal downshift\n"); 1317 /* Delay Link Up for Phy update */ 1318 if ((hw->mac.type == e1000_i210 || 1319 hw->mac.type == e1000_i211) && 1320 hw->phy.id == I210_I_PHY_ID) 1321 msec_delay(IGB_I210_LINK_DELAY); 1322 /* This can sleep */ 1323 ifp->if_link_state = LINK_STATE_UP; 1324 if_link_state_change(ifp); 1325 } else if (!link_check && sc->link_active == 1) { 1326 ifp->if_baudrate = sc->link_speed = 0; 1327 sc->link_duplex = 0; 1328 if (bootverbose) 1329 if_printf(ifp, "Link is Down\n"); 1330 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1331 (thstat & E1000_THSTAT_PWR_DOWN)) 1332 if_printf(ifp, "Link: thermal shutdown\n"); 1333 sc->link_active = 0; 1334 /* This can sleep */ 1335 ifp->if_link_state = LINK_STATE_DOWN; 1336 if_link_state_change(ifp); 1337 } 1338 } 1339 1340 static void 1341 igb_stop(struct igb_softc *sc) 1342 { 1343 struct ifnet *ifp = &sc->arpcom.ac_if; 1344 int i; 1345 1346 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1347 1348 igb_disable_intr(sc); 1349 1350 callout_stop(&sc->timer); 1351 1352 ifp->if_flags &= ~IFF_RUNNING; 1353 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1354 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1355 ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog); 1356 sc->tx_rings[i].tx_flags &= ~IGB_TXFLAG_ENABLED; 1357 } 1358 1359 e1000_reset_hw(&sc->hw); 1360 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1361 1362 e1000_led_off(&sc->hw); 1363 e1000_cleanup_led(&sc->hw); 1364 1365 for (i = 0; i < sc->tx_ring_cnt; ++i) 1366 igb_free_tx_ring(&sc->tx_rings[i]); 1367 for (i = 0; i < sc->rx_ring_cnt; ++i) 1368 igb_free_rx_ring(&sc->rx_rings[i]); 1369 } 1370 1371 static void 1372 igb_reset(struct igb_softc *sc) 1373 { 1374 struct ifnet *ifp = &sc->arpcom.ac_if; 1375 struct e1000_hw *hw = &sc->hw; 1376 struct e1000_fc_info *fc = &hw->fc; 1377 uint32_t pba = 0; 1378 uint16_t hwm; 1379 1380 /* Let the firmware know the OS is in control */ 1381 igb_get_hw_control(sc); 1382 1383 /* 1384 * Packet Buffer Allocation (PBA) 1385 * Writing PBA sets the receive portion of the buffer 1386 * the remainder is used for the transmit buffer. 1387 */ 1388 switch (hw->mac.type) { 1389 case e1000_82575: 1390 pba = E1000_PBA_32K; 1391 break; 1392 1393 case e1000_82576: 1394 case e1000_vfadapt: 1395 pba = E1000_READ_REG(hw, E1000_RXPBS); 1396 pba &= E1000_RXPBS_SIZE_MASK_82576; 1397 break; 1398 1399 case e1000_82580: 1400 case e1000_i350: 1401 case e1000_i354: 1402 case e1000_vfadapt_i350: 1403 pba = E1000_READ_REG(hw, E1000_RXPBS); 1404 pba = e1000_rxpbs_adjust_82580(pba); 1405 break; 1406 1407 case e1000_i210: 1408 case e1000_i211: 1409 pba = E1000_PBA_34K; 1410 break; 1411 1412 default: 1413 break; 1414 } 1415 1416 /* Special needs in case of Jumbo frames */ 1417 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1418 uint32_t tx_space, min_tx, min_rx; 1419 1420 pba = E1000_READ_REG(hw, E1000_PBA); 1421 tx_space = pba >> 16; 1422 pba &= 0xffff; 1423 1424 min_tx = (sc->max_frame_size + 1425 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1426 min_tx = roundup2(min_tx, 1024); 1427 min_tx >>= 10; 1428 min_rx = sc->max_frame_size; 1429 min_rx = roundup2(min_rx, 1024); 1430 min_rx >>= 10; 1431 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1432 pba = pba - (min_tx - tx_space); 1433 /* 1434 * if short on rx space, rx wins 1435 * and must trump tx adjustment 1436 */ 1437 if (pba < min_rx) 1438 pba = min_rx; 1439 } 1440 E1000_WRITE_REG(hw, E1000_PBA, pba); 1441 } 1442 1443 /* 1444 * These parameters control the automatic generation (Tx) and 1445 * response (Rx) to Ethernet PAUSE frames. 1446 * - High water mark should allow for at least two frames to be 1447 * received after sending an XOFF. 1448 * - Low water mark works best when it is very near the high water mark. 1449 * This allows the receiver to restart by sending XON when it has 1450 * drained a bit. 1451 */ 1452 hwm = min(((pba << 10) * 9 / 10), 1453 ((pba << 10) - 2 * sc->max_frame_size)); 1454 1455 if (hw->mac.type < e1000_82576) { 1456 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1457 fc->low_water = fc->high_water - 8; 1458 } else { 1459 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1460 fc->low_water = fc->high_water - 16; 1461 } 1462 fc->pause_time = IGB_FC_PAUSE_TIME; 1463 fc->send_xon = TRUE; 1464 fc->requested_mode = e1000_fc_default; 1465 1466 /* Issue a global reset */ 1467 e1000_reset_hw(hw); 1468 E1000_WRITE_REG(hw, E1000_WUC, 0); 1469 1470 if (e1000_init_hw(hw) < 0) 1471 if_printf(ifp, "Hardware Initialization Failed\n"); 1472 1473 /* Setup DMA Coalescing */ 1474 if (hw->mac.type > e1000_82580 && hw->mac.type != e1000_i211) { 1475 uint32_t dmac; 1476 uint32_t reg; 1477 1478 if (sc->dma_coalesce == 0) { 1479 /* 1480 * Disabled 1481 */ 1482 reg = E1000_READ_REG(hw, E1000_DMACR); 1483 reg &= ~E1000_DMACR_DMAC_EN; 1484 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1485 goto reset_out; 1486 } 1487 1488 /* Set starting thresholds */ 1489 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 1490 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1491 1492 hwm = 64 * pba - sc->max_frame_size / 16; 1493 if (hwm < 64 * (pba - 6)) 1494 hwm = 64 * (pba - 6); 1495 reg = E1000_READ_REG(hw, E1000_FCRTC); 1496 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 1497 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 1498 & E1000_FCRTC_RTH_COAL_MASK); 1499 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 1500 1501 dmac = pba - sc->max_frame_size / 512; 1502 if (dmac < pba - 10) 1503 dmac = pba - 10; 1504 reg = E1000_READ_REG(hw, E1000_DMACR); 1505 reg &= ~E1000_DMACR_DMACTHR_MASK; 1506 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 1507 & E1000_DMACR_DMACTHR_MASK); 1508 /* Transition to L0x or L1 if available.. */ 1509 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1510 /* timer = value in sc->dma_coalesce in 32usec intervals */ 1511 reg |= (sc->dma_coalesce >> 5); 1512 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1513 1514 /* Set the interval before transition */ 1515 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1516 reg |= 0x80000004; 1517 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1518 1519 /* Free space in tx packet buffer to wake from DMA coal */ 1520 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1521 (20480 - (2 * sc->max_frame_size)) >> 6); 1522 1523 /* Make low power state decision controlled by DMA coal */ 1524 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1525 reg &= ~E1000_PCIEMISC_LX_DECISION; 1526 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 1527 if_printf(ifp, "DMA Coalescing enabled\n"); 1528 } else if (hw->mac.type == e1000_82580) { 1529 uint32_t reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1530 1531 E1000_WRITE_REG(hw, E1000_DMACR, 0); 1532 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1533 reg & ~E1000_PCIEMISC_LX_DECISION); 1534 } 1535 1536 reset_out: 1537 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1538 e1000_get_phy_info(hw); 1539 e1000_check_for_link(hw); 1540 } 1541 1542 static void 1543 igb_setup_ifp(struct igb_softc *sc) 1544 { 1545 struct ifnet *ifp = &sc->arpcom.ac_if; 1546 int i; 1547 1548 ifp->if_softc = sc; 1549 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1550 ifp->if_init = igb_init; 1551 ifp->if_ioctl = igb_ioctl; 1552 ifp->if_start = igb_start; 1553 ifp->if_serialize = igb_serialize; 1554 ifp->if_deserialize = igb_deserialize; 1555 ifp->if_tryserialize = igb_tryserialize; 1556 #ifdef INVARIANTS 1557 ifp->if_serialize_assert = igb_serialize_assert; 1558 #endif 1559 #ifdef IFPOLL_ENABLE 1560 ifp->if_npoll = igb_npoll; 1561 #endif 1562 1563 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1564 ifq_set_ready(&ifp->if_snd); 1565 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1566 1567 ifp->if_mapsubq = ifq_mapsubq_mask; 1568 ifq_set_subq_mask(&ifp->if_snd, 0); 1569 1570 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1571 1572 ifp->if_capabilities = 1573 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1574 if (IGB_ENABLE_HWRSS(sc)) 1575 ifp->if_capabilities |= IFCAP_RSS; 1576 ifp->if_capenable = ifp->if_capabilities; 1577 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1578 1579 /* 1580 * Tell the upper layer(s) we support long frames 1581 */ 1582 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1583 1584 /* Setup TX rings and subqueues */ 1585 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1586 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 1587 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1588 1589 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 1590 ifsq_set_priv(ifsq, txr); 1591 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize); 1592 txr->ifsq = ifsq; 1593 1594 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, igb_watchdog); 1595 } 1596 1597 /* 1598 * Specify the media types supported by this adapter and register 1599 * callbacks to update media and link information 1600 */ 1601 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1602 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1603 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1604 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1605 0, NULL); 1606 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1607 } else { 1608 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1609 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1610 0, NULL); 1611 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1612 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1613 0, NULL); 1614 if (sc->hw.phy.type != e1000_phy_ife) { 1615 ifmedia_add(&sc->media, 1616 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1617 ifmedia_add(&sc->media, 1618 IFM_ETHER | IFM_1000_T, 0, NULL); 1619 } 1620 } 1621 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1622 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1623 } 1624 1625 static void 1626 igb_add_sysctl(struct igb_softc *sc) 1627 { 1628 char node[32]; 1629 int i; 1630 1631 sysctl_ctx_init(&sc->sysctl_ctx); 1632 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1633 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, 1634 device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1635 if (sc->sysctl_tree == NULL) { 1636 device_printf(sc->dev, "can't add sysctl node\n"); 1637 return; 1638 } 1639 1640 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1641 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1642 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1643 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1644 "# of RX rings used"); 1645 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1646 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 1647 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1648 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 1649 "# of TX rings used"); 1650 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1651 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1652 "# of RX descs"); 1653 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1654 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1655 "# of TX descs"); 1656 1657 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1658 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1659 SYSCTL_CHILDREN(sc->sysctl_tree), 1660 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1661 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1662 } else { 1663 for (i = 0; i < sc->msix_cnt; ++i) { 1664 struct igb_msix_data *msix = &sc->msix_data[i]; 1665 1666 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1667 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1668 SYSCTL_CHILDREN(sc->sysctl_tree), 1669 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1670 msix, 0, igb_sysctl_msix_rate, "I", 1671 msix->msix_rate_desc); 1672 } 1673 } 1674 1675 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1676 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1677 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1678 "# of segments per TX interrupt"); 1679 1680 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1681 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1682 sc, 0, igb_sysctl_tx_wreg_nsegs, "I", 1683 "# of segments sent before write to hardware register"); 1684 1685 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1686 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1687 sc, 0, igb_sysctl_rx_wreg_nsegs, "I", 1688 "# of segments received before write to hardware register"); 1689 1690 #ifdef IFPOLL_ENABLE 1691 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1692 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1693 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1694 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1695 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1696 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1697 #endif 1698 1699 #ifdef IGB_RSS_DEBUG 1700 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1701 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1702 "RSS debug level"); 1703 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1704 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1705 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1706 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1707 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1708 } 1709 #endif 1710 #ifdef IGB_TSS_DEBUG 1711 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1712 ksnprintf(node, sizeof(node), "tx%d_pkt", i); 1713 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1714 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1715 CTLFLAG_RW, &sc->tx_rings[i].tx_packets, "TXed packets"); 1716 } 1717 #endif 1718 } 1719 1720 static int 1721 igb_alloc_rings(struct igb_softc *sc) 1722 { 1723 int error, i; 1724 1725 /* 1726 * Create top level busdma tag 1727 */ 1728 error = bus_dma_tag_create(NULL, 1, 0, 1729 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1730 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1731 &sc->parent_tag); 1732 if (error) { 1733 device_printf(sc->dev, "could not create top level DMA tag\n"); 1734 return error; 1735 } 1736 1737 /* 1738 * Allocate TX descriptor rings and buffers 1739 */ 1740 sc->tx_rings = kmalloc_cachealign( 1741 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1742 M_DEVBUF, M_WAITOK | M_ZERO); 1743 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1744 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1745 1746 /* Set up some basics */ 1747 txr->sc = sc; 1748 txr->me = i; 1749 lwkt_serialize_init(&txr->tx_serialize); 1750 1751 error = igb_create_tx_ring(txr); 1752 if (error) 1753 return error; 1754 } 1755 1756 /* 1757 * Allocate RX descriptor rings and buffers 1758 */ 1759 sc->rx_rings = kmalloc_cachealign( 1760 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1761 M_DEVBUF, M_WAITOK | M_ZERO); 1762 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1763 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1764 1765 /* Set up some basics */ 1766 rxr->sc = sc; 1767 rxr->me = i; 1768 lwkt_serialize_init(&rxr->rx_serialize); 1769 1770 error = igb_create_rx_ring(rxr); 1771 if (error) 1772 return error; 1773 } 1774 1775 return 0; 1776 } 1777 1778 static void 1779 igb_free_rings(struct igb_softc *sc) 1780 { 1781 int i; 1782 1783 if (sc->tx_rings != NULL) { 1784 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1785 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1786 1787 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1788 } 1789 kfree(sc->tx_rings, M_DEVBUF); 1790 } 1791 1792 if (sc->rx_rings != NULL) { 1793 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1794 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1795 1796 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1797 } 1798 kfree(sc->rx_rings, M_DEVBUF); 1799 } 1800 } 1801 1802 static int 1803 igb_create_tx_ring(struct igb_tx_ring *txr) 1804 { 1805 int tsize, error, i, ntxd; 1806 1807 /* 1808 * Validate number of transmit descriptors. It must not exceed 1809 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1810 */ 1811 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1812 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1813 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1814 device_printf(txr->sc->dev, 1815 "Using %d TX descriptors instead of %d!\n", 1816 IGB_DEFAULT_TXD, ntxd); 1817 txr->num_tx_desc = IGB_DEFAULT_TXD; 1818 } else { 1819 txr->num_tx_desc = ntxd; 1820 } 1821 1822 /* 1823 * Allocate TX descriptor ring 1824 */ 1825 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1826 IGB_DBA_ALIGN); 1827 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1828 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1829 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1830 if (txr->txdma.dma_vaddr == NULL) { 1831 device_printf(txr->sc->dev, 1832 "Unable to allocate TX Descriptor memory\n"); 1833 return ENOMEM; 1834 } 1835 txr->tx_base = txr->txdma.dma_vaddr; 1836 bzero(txr->tx_base, tsize); 1837 1838 tsize = __VM_CACHELINE_ALIGN( 1839 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1840 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1841 1842 /* 1843 * Allocate TX head write-back buffer 1844 */ 1845 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1846 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1847 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1848 if (txr->tx_hdr == NULL) { 1849 device_printf(txr->sc->dev, 1850 "Unable to allocate TX head write-back buffer\n"); 1851 return ENOMEM; 1852 } 1853 1854 /* 1855 * Create DMA tag for TX buffers 1856 */ 1857 error = bus_dma_tag_create(txr->sc->parent_tag, 1858 1, 0, /* alignment, bounds */ 1859 BUS_SPACE_MAXADDR, /* lowaddr */ 1860 BUS_SPACE_MAXADDR, /* highaddr */ 1861 NULL, NULL, /* filter, filterarg */ 1862 IGB_TSO_SIZE, /* maxsize */ 1863 IGB_MAX_SCATTER, /* nsegments */ 1864 PAGE_SIZE, /* maxsegsize */ 1865 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1866 BUS_DMA_ONEBPAGE, /* flags */ 1867 &txr->tx_tag); 1868 if (error) { 1869 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1870 kfree(txr->tx_buf, M_DEVBUF); 1871 txr->tx_buf = NULL; 1872 return error; 1873 } 1874 1875 /* 1876 * Create DMA maps for TX buffers 1877 */ 1878 for (i = 0; i < txr->num_tx_desc; ++i) { 1879 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1880 1881 error = bus_dmamap_create(txr->tx_tag, 1882 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1883 if (error) { 1884 device_printf(txr->sc->dev, 1885 "Unable to create TX DMA map\n"); 1886 igb_destroy_tx_ring(txr, i); 1887 return error; 1888 } 1889 } 1890 1891 if (txr->sc->hw.mac.type == e1000_82575) 1892 txr->tx_flags |= IGB_TXFLAG_TSO_IPLEN0; 1893 1894 /* 1895 * Initialize various watermark 1896 */ 1897 txr->spare_desc = IGB_TX_SPARE; 1898 txr->intr_nsegs = txr->num_tx_desc / 16; 1899 txr->wreg_nsegs = IGB_DEF_TXWREG_NSEGS; 1900 txr->oact_hi_desc = txr->num_tx_desc / 2; 1901 txr->oact_lo_desc = txr->num_tx_desc / 8; 1902 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1903 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1904 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1905 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1906 1907 return 0; 1908 } 1909 1910 static void 1911 igb_free_tx_ring(struct igb_tx_ring *txr) 1912 { 1913 int i; 1914 1915 for (i = 0; i < txr->num_tx_desc; ++i) { 1916 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1917 1918 if (txbuf->m_head != NULL) { 1919 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1920 m_freem(txbuf->m_head); 1921 txbuf->m_head = NULL; 1922 } 1923 } 1924 } 1925 1926 static void 1927 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1928 { 1929 int i; 1930 1931 if (txr->txdma.dma_vaddr != NULL) { 1932 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1933 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1934 txr->txdma.dma_map); 1935 bus_dma_tag_destroy(txr->txdma.dma_tag); 1936 txr->txdma.dma_vaddr = NULL; 1937 } 1938 1939 if (txr->tx_hdr != NULL) { 1940 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1941 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1942 txr->tx_hdr_dmap); 1943 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1944 txr->tx_hdr = NULL; 1945 } 1946 1947 if (txr->tx_buf == NULL) 1948 return; 1949 1950 for (i = 0; i < ndesc; ++i) { 1951 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1952 1953 KKASSERT(txbuf->m_head == NULL); 1954 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1955 } 1956 bus_dma_tag_destroy(txr->tx_tag); 1957 1958 kfree(txr->tx_buf, M_DEVBUF); 1959 txr->tx_buf = NULL; 1960 } 1961 1962 static void 1963 igb_init_tx_ring(struct igb_tx_ring *txr) 1964 { 1965 /* Clear the old descriptor contents */ 1966 bzero(txr->tx_base, 1967 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1968 1969 /* Clear TX head write-back buffer */ 1970 *(txr->tx_hdr) = 0; 1971 1972 /* Reset indices */ 1973 txr->next_avail_desc = 0; 1974 txr->next_to_clean = 0; 1975 txr->tx_nsegs = 0; 1976 1977 /* Set number of descriptors available */ 1978 txr->tx_avail = txr->num_tx_desc; 1979 1980 /* Enable this TX ring */ 1981 txr->tx_flags |= IGB_TXFLAG_ENABLED; 1982 } 1983 1984 static void 1985 igb_init_tx_unit(struct igb_softc *sc) 1986 { 1987 struct e1000_hw *hw = &sc->hw; 1988 uint32_t tctl; 1989 int i; 1990 1991 /* Setup the Tx Descriptor Rings */ 1992 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1993 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1994 uint64_t bus_addr = txr->txdma.dma_paddr; 1995 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1996 uint32_t txdctl = 0; 1997 uint32_t dca_txctrl; 1998 1999 E1000_WRITE_REG(hw, E1000_TDLEN(i), 2000 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 2001 E1000_WRITE_REG(hw, E1000_TDBAH(i), 2002 (uint32_t)(bus_addr >> 32)); 2003 E1000_WRITE_REG(hw, E1000_TDBAL(i), 2004 (uint32_t)bus_addr); 2005 2006 /* Setup the HW Tx Head and Tail descriptor pointers */ 2007 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 2008 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 2009 2010 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 2011 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 2012 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 2013 2014 /* 2015 * Don't set WB_on_EITR: 2016 * - 82575 does not have it 2017 * - It almost has no effect on 82576, see: 2018 * 82576 specification update errata #26 2019 * - It causes unnecessary bus traffic 2020 */ 2021 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 2022 (uint32_t)(hdr_paddr >> 32)); 2023 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 2024 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 2025 2026 /* 2027 * WTHRESH is ignored by the hardware, since header 2028 * write back mode is used. 2029 */ 2030 txdctl |= IGB_TX_PTHRESH; 2031 txdctl |= IGB_TX_HTHRESH << 8; 2032 txdctl |= IGB_TX_WTHRESH << 16; 2033 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2034 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 2035 } 2036 2037 if (sc->vf_ifp) 2038 return; 2039 2040 e1000_config_collision_dist(hw); 2041 2042 /* Program the Transmit Control Register */ 2043 tctl = E1000_READ_REG(hw, E1000_TCTL); 2044 tctl &= ~E1000_TCTL_CT; 2045 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 2046 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 2047 2048 /* This write will effectively turn on the transmit unit. */ 2049 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 2050 } 2051 2052 static boolean_t 2053 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 2054 { 2055 struct e1000_adv_tx_context_desc *TXD; 2056 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 2057 int ehdrlen, ctxd, ip_hlen = 0; 2058 boolean_t offload = TRUE; 2059 2060 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 2061 offload = FALSE; 2062 2063 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 2064 2065 ctxd = txr->next_avail_desc; 2066 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2067 2068 /* 2069 * In advanced descriptors the vlan tag must 2070 * be placed into the context descriptor, thus 2071 * we need to be here just for that setup. 2072 */ 2073 if (mp->m_flags & M_VLANTAG) { 2074 uint16_t vlantag; 2075 2076 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 2077 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 2078 } else if (!offload) { 2079 return FALSE; 2080 } 2081 2082 ehdrlen = mp->m_pkthdr.csum_lhlen; 2083 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2084 2085 /* Set the ether header length */ 2086 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 2087 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2088 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 2089 ip_hlen = mp->m_pkthdr.csum_iphlen; 2090 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2091 } 2092 vlan_macip_lens |= ip_hlen; 2093 2094 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 2095 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2096 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 2097 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2098 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 2099 2100 /* 2101 * 82575 needs the TX context index added; the queue 2102 * index is used as TX context index here. 2103 */ 2104 if (txr->sc->hw.mac.type == e1000_82575) 2105 mss_l4len_idx = txr->me << 4; 2106 2107 /* Now copy bits into descriptor */ 2108 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2109 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2110 TXD->seqnum_seed = htole32(0); 2111 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2112 2113 /* We've consumed the first desc, adjust counters */ 2114 if (++ctxd == txr->num_tx_desc) 2115 ctxd = 0; 2116 txr->next_avail_desc = ctxd; 2117 --txr->tx_avail; 2118 2119 return offload; 2120 } 2121 2122 static void 2123 igb_txeof(struct igb_tx_ring *txr) 2124 { 2125 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 2126 int first, hdr, avail; 2127 2128 if (txr->tx_avail == txr->num_tx_desc) 2129 return; 2130 2131 first = txr->next_to_clean; 2132 hdr = *(txr->tx_hdr); 2133 2134 if (first == hdr) 2135 return; 2136 2137 avail = txr->tx_avail; 2138 while (first != hdr) { 2139 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 2140 2141 ++avail; 2142 if (txbuf->m_head) { 2143 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2144 m_freem(txbuf->m_head); 2145 txbuf->m_head = NULL; 2146 IFNET_STAT_INC(ifp, opackets, 1); 2147 } 2148 if (++first == txr->num_tx_desc) 2149 first = 0; 2150 } 2151 txr->next_to_clean = first; 2152 txr->tx_avail = avail; 2153 2154 /* 2155 * If we have a minimum free, clear OACTIVE 2156 * to tell the stack that it is OK to send packets. 2157 */ 2158 if (IGB_IS_NOT_OACTIVE(txr)) { 2159 ifsq_clr_oactive(txr->ifsq); 2160 2161 /* 2162 * We have enough TX descriptors, turn off 2163 * the watchdog. We allow small amount of 2164 * packets (roughly intr_nsegs) pending on 2165 * the transmit ring. 2166 */ 2167 txr->tx_watchdog.wd_timer = 0; 2168 } 2169 } 2170 2171 static int 2172 igb_create_rx_ring(struct igb_rx_ring *rxr) 2173 { 2174 int rsize, i, error, nrxd; 2175 2176 /* 2177 * Validate number of receive descriptors. It must not exceed 2178 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 2179 */ 2180 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 2181 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 2182 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 2183 device_printf(rxr->sc->dev, 2184 "Using %d RX descriptors instead of %d!\n", 2185 IGB_DEFAULT_RXD, nrxd); 2186 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2187 } else { 2188 rxr->num_rx_desc = nrxd; 2189 } 2190 2191 /* 2192 * Allocate RX descriptor ring 2193 */ 2194 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2195 IGB_DBA_ALIGN); 2196 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2197 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2198 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2199 &rxr->rxdma.dma_paddr); 2200 if (rxr->rxdma.dma_vaddr == NULL) { 2201 device_printf(rxr->sc->dev, 2202 "Unable to allocate RxDescriptor memory\n"); 2203 return ENOMEM; 2204 } 2205 rxr->rx_base = rxr->rxdma.dma_vaddr; 2206 bzero(rxr->rx_base, rsize); 2207 2208 rsize = __VM_CACHELINE_ALIGN( 2209 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2210 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2211 2212 /* 2213 * Create DMA tag for RX buffers 2214 */ 2215 error = bus_dma_tag_create(rxr->sc->parent_tag, 2216 1, 0, /* alignment, bounds */ 2217 BUS_SPACE_MAXADDR, /* lowaddr */ 2218 BUS_SPACE_MAXADDR, /* highaddr */ 2219 NULL, NULL, /* filter, filterarg */ 2220 MCLBYTES, /* maxsize */ 2221 1, /* nsegments */ 2222 MCLBYTES, /* maxsegsize */ 2223 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2224 &rxr->rx_tag); 2225 if (error) { 2226 device_printf(rxr->sc->dev, 2227 "Unable to create RX payload DMA tag\n"); 2228 kfree(rxr->rx_buf, M_DEVBUF); 2229 rxr->rx_buf = NULL; 2230 return error; 2231 } 2232 2233 /* 2234 * Create spare DMA map for RX buffers 2235 */ 2236 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2237 &rxr->rx_sparemap); 2238 if (error) { 2239 device_printf(rxr->sc->dev, 2240 "Unable to create spare RX DMA maps\n"); 2241 bus_dma_tag_destroy(rxr->rx_tag); 2242 kfree(rxr->rx_buf, M_DEVBUF); 2243 rxr->rx_buf = NULL; 2244 return error; 2245 } 2246 2247 /* 2248 * Create DMA maps for RX buffers 2249 */ 2250 for (i = 0; i < rxr->num_rx_desc; i++) { 2251 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2252 2253 error = bus_dmamap_create(rxr->rx_tag, 2254 BUS_DMA_WAITOK, &rxbuf->map); 2255 if (error) { 2256 device_printf(rxr->sc->dev, 2257 "Unable to create RX DMA maps\n"); 2258 igb_destroy_rx_ring(rxr, i); 2259 return error; 2260 } 2261 } 2262 2263 /* 2264 * Initialize various watermark 2265 */ 2266 rxr->wreg_nsegs = IGB_DEF_RXWREG_NSEGS; 2267 2268 return 0; 2269 } 2270 2271 static void 2272 igb_free_rx_ring(struct igb_rx_ring *rxr) 2273 { 2274 int i; 2275 2276 for (i = 0; i < rxr->num_rx_desc; ++i) { 2277 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2278 2279 if (rxbuf->m_head != NULL) { 2280 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2281 m_freem(rxbuf->m_head); 2282 rxbuf->m_head = NULL; 2283 } 2284 } 2285 2286 if (rxr->fmp != NULL) 2287 m_freem(rxr->fmp); 2288 rxr->fmp = NULL; 2289 rxr->lmp = NULL; 2290 } 2291 2292 static void 2293 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2294 { 2295 int i; 2296 2297 if (rxr->rxdma.dma_vaddr != NULL) { 2298 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2299 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2300 rxr->rxdma.dma_map); 2301 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2302 rxr->rxdma.dma_vaddr = NULL; 2303 } 2304 2305 if (rxr->rx_buf == NULL) 2306 return; 2307 2308 for (i = 0; i < ndesc; ++i) { 2309 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2310 2311 KKASSERT(rxbuf->m_head == NULL); 2312 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2313 } 2314 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2315 bus_dma_tag_destroy(rxr->rx_tag); 2316 2317 kfree(rxr->rx_buf, M_DEVBUF); 2318 rxr->rx_buf = NULL; 2319 } 2320 2321 static void 2322 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2323 { 2324 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2325 rxd->wb.upper.status_error = 0; 2326 } 2327 2328 static int 2329 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2330 { 2331 struct mbuf *m; 2332 bus_dma_segment_t seg; 2333 bus_dmamap_t map; 2334 struct igb_rx_buf *rxbuf; 2335 int error, nseg; 2336 2337 m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR); 2338 if (m == NULL) { 2339 if (wait) { 2340 if_printf(&rxr->sc->arpcom.ac_if, 2341 "Unable to allocate RX mbuf\n"); 2342 } 2343 return ENOBUFS; 2344 } 2345 m->m_len = m->m_pkthdr.len = MCLBYTES; 2346 2347 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2348 m_adj(m, ETHER_ALIGN); 2349 2350 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2351 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2352 if (error) { 2353 m_freem(m); 2354 if (wait) { 2355 if_printf(&rxr->sc->arpcom.ac_if, 2356 "Unable to load RX mbuf\n"); 2357 } 2358 return error; 2359 } 2360 2361 rxbuf = &rxr->rx_buf[i]; 2362 if (rxbuf->m_head != NULL) 2363 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2364 2365 map = rxbuf->map; 2366 rxbuf->map = rxr->rx_sparemap; 2367 rxr->rx_sparemap = map; 2368 2369 rxbuf->m_head = m; 2370 rxbuf->paddr = seg.ds_addr; 2371 2372 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2373 return 0; 2374 } 2375 2376 static int 2377 igb_init_rx_ring(struct igb_rx_ring *rxr) 2378 { 2379 int i; 2380 2381 /* Clear the ring contents */ 2382 bzero(rxr->rx_base, 2383 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2384 2385 /* Now replenish the ring mbufs */ 2386 for (i = 0; i < rxr->num_rx_desc; ++i) { 2387 int error; 2388 2389 error = igb_newbuf(rxr, i, TRUE); 2390 if (error) 2391 return error; 2392 } 2393 2394 /* Setup our descriptor indices */ 2395 rxr->next_to_check = 0; 2396 2397 rxr->fmp = NULL; 2398 rxr->lmp = NULL; 2399 rxr->discard = FALSE; 2400 2401 return 0; 2402 } 2403 2404 static void 2405 igb_init_rx_unit(struct igb_softc *sc) 2406 { 2407 struct ifnet *ifp = &sc->arpcom.ac_if; 2408 struct e1000_hw *hw = &sc->hw; 2409 uint32_t rctl, rxcsum, srrctl = 0; 2410 int i; 2411 2412 /* 2413 * Make sure receives are disabled while setting 2414 * up the descriptor ring 2415 */ 2416 rctl = E1000_READ_REG(hw, E1000_RCTL); 2417 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2418 2419 #if 0 2420 /* 2421 ** Set up for header split 2422 */ 2423 if (igb_header_split) { 2424 /* Use a standard mbuf for the header */ 2425 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2426 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2427 } else 2428 #endif 2429 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2430 2431 /* 2432 ** Set up for jumbo frames 2433 */ 2434 if (ifp->if_mtu > ETHERMTU) { 2435 rctl |= E1000_RCTL_LPE; 2436 #if 0 2437 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2438 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2439 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2440 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2441 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2442 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2443 } 2444 /* Set maximum packet len */ 2445 psize = adapter->max_frame_size; 2446 /* are we on a vlan? */ 2447 if (adapter->ifp->if_vlantrunk != NULL) 2448 psize += VLAN_TAG_SIZE; 2449 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2450 #else 2451 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2452 rctl |= E1000_RCTL_SZ_2048; 2453 #endif 2454 } else { 2455 rctl &= ~E1000_RCTL_LPE; 2456 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2457 rctl |= E1000_RCTL_SZ_2048; 2458 } 2459 2460 /* Setup the Base and Length of the Rx Descriptor Rings */ 2461 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2462 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2463 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2464 uint32_t rxdctl; 2465 2466 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2467 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2468 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2469 (uint32_t)(bus_addr >> 32)); 2470 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2471 (uint32_t)bus_addr); 2472 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2473 /* Enable this Queue */ 2474 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2475 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2476 rxdctl &= 0xFFF00000; 2477 rxdctl |= IGB_RX_PTHRESH; 2478 rxdctl |= IGB_RX_HTHRESH << 8; 2479 /* 2480 * Don't set WTHRESH to a value above 1 on 82576, see: 2481 * 82576 specification update errata #26 2482 */ 2483 rxdctl |= IGB_RX_WTHRESH << 16; 2484 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2485 } 2486 2487 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2488 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2489 2490 /* 2491 * Receive Checksum Offload for TCP and UDP 2492 * 2493 * Checksum offloading is also enabled if multiple receive 2494 * queue is to be supported, since we need it to figure out 2495 * fragments. 2496 */ 2497 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2498 /* 2499 * NOTE: 2500 * PCSD must be enabled to enable multiple 2501 * receive queues. 2502 */ 2503 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2504 E1000_RXCSUM_PCSD; 2505 } else { 2506 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2507 E1000_RXCSUM_PCSD); 2508 } 2509 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2510 2511 if (IGB_ENABLE_HWRSS(sc)) { 2512 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2513 uint32_t reta_shift; 2514 int j, r; 2515 2516 /* 2517 * NOTE: 2518 * When we reach here, RSS has already been disabled 2519 * in igb_stop(), so we could safely configure RSS key 2520 * and redirect table. 2521 */ 2522 2523 /* 2524 * Configure RSS key 2525 */ 2526 toeplitz_get_key(key, sizeof(key)); 2527 for (i = 0; i < IGB_NRSSRK; ++i) { 2528 uint32_t rssrk; 2529 2530 rssrk = IGB_RSSRK_VAL(key, i); 2531 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2532 2533 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2534 } 2535 2536 /* 2537 * Configure RSS redirect table in following fashion: 2538 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2539 */ 2540 reta_shift = IGB_RETA_SHIFT; 2541 if (hw->mac.type == e1000_82575) 2542 reta_shift = IGB_RETA_SHIFT_82575; 2543 2544 r = 0; 2545 for (j = 0; j < IGB_NRETA; ++j) { 2546 uint32_t reta = 0; 2547 2548 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2549 uint32_t q; 2550 2551 q = (r % sc->rx_ring_inuse) << reta_shift; 2552 reta |= q << (8 * i); 2553 ++r; 2554 } 2555 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2556 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2557 } 2558 2559 /* 2560 * Enable multiple receive queues. 2561 * Enable IPv4 RSS standard hash functions. 2562 * Disable RSS interrupt on 82575 2563 */ 2564 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2565 E1000_MRQC_ENABLE_RSS_4Q | 2566 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2567 E1000_MRQC_RSS_FIELD_IPV4); 2568 } 2569 2570 /* Setup the Receive Control Register */ 2571 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2572 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2573 E1000_RCTL_RDMTS_HALF | 2574 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2575 /* Strip CRC bytes. */ 2576 rctl |= E1000_RCTL_SECRC; 2577 /* Make sure VLAN Filters are off */ 2578 rctl &= ~E1000_RCTL_VFE; 2579 /* Don't store bad packets */ 2580 rctl &= ~E1000_RCTL_SBP; 2581 2582 /* Enable Receives */ 2583 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2584 2585 /* 2586 * Setup the HW Rx Head and Tail Descriptor Pointers 2587 * - needs to be after enable 2588 */ 2589 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2590 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2591 2592 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2593 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2594 } 2595 } 2596 2597 static void 2598 igb_rx_refresh(struct igb_rx_ring *rxr, int i) 2599 { 2600 if (--i < 0) 2601 i = rxr->num_rx_desc - 1; 2602 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2603 } 2604 2605 static void 2606 igb_rxeof(struct igb_rx_ring *rxr, int count) 2607 { 2608 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2609 union e1000_adv_rx_desc *cur; 2610 uint32_t staterr; 2611 int i, ncoll = 0, cpuid = mycpuid; 2612 2613 i = rxr->next_to_check; 2614 cur = &rxr->rx_base[i]; 2615 staterr = le32toh(cur->wb.upper.status_error); 2616 2617 if ((staterr & E1000_RXD_STAT_DD) == 0) 2618 return; 2619 2620 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2621 struct pktinfo *pi = NULL, pi0; 2622 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2623 struct mbuf *m = NULL; 2624 boolean_t eop; 2625 2626 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2627 if (eop) 2628 --count; 2629 2630 ++ncoll; 2631 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2632 !rxr->discard) { 2633 struct mbuf *mp = rxbuf->m_head; 2634 uint32_t hash, hashtype; 2635 uint16_t vlan; 2636 int len; 2637 2638 len = le16toh(cur->wb.upper.length); 2639 if ((rxr->sc->hw.mac.type == e1000_i350 || 2640 rxr->sc->hw.mac.type == e1000_i354) && 2641 (staterr & E1000_RXDEXT_STATERR_LB)) 2642 vlan = be16toh(cur->wb.upper.vlan); 2643 else 2644 vlan = le16toh(cur->wb.upper.vlan); 2645 2646 hash = le32toh(cur->wb.lower.hi_dword.rss); 2647 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2648 E1000_RXDADV_RSSTYPE_MASK; 2649 2650 IGB_RSS_DPRINTF(rxr->sc, 10, 2651 "ring%d, hash 0x%08x, hashtype %u\n", 2652 rxr->me, hash, hashtype); 2653 2654 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2655 BUS_DMASYNC_POSTREAD); 2656 2657 if (igb_newbuf(rxr, i, FALSE) != 0) { 2658 IFNET_STAT_INC(ifp, iqdrops, 1); 2659 goto discard; 2660 } 2661 2662 mp->m_len = len; 2663 if (rxr->fmp == NULL) { 2664 mp->m_pkthdr.len = len; 2665 rxr->fmp = mp; 2666 rxr->lmp = mp; 2667 } else { 2668 rxr->lmp->m_next = mp; 2669 rxr->lmp = rxr->lmp->m_next; 2670 rxr->fmp->m_pkthdr.len += len; 2671 } 2672 2673 if (eop) { 2674 m = rxr->fmp; 2675 rxr->fmp = NULL; 2676 rxr->lmp = NULL; 2677 2678 m->m_pkthdr.rcvif = ifp; 2679 IFNET_STAT_INC(ifp, ipackets, 1); 2680 2681 if (ifp->if_capenable & IFCAP_RXCSUM) 2682 igb_rxcsum(staterr, m); 2683 2684 if (staterr & E1000_RXD_STAT_VP) { 2685 m->m_pkthdr.ether_vlantag = vlan; 2686 m->m_flags |= M_VLANTAG; 2687 } 2688 2689 if (ifp->if_capenable & IFCAP_RSS) { 2690 pi = igb_rssinfo(m, &pi0, 2691 hash, hashtype, staterr); 2692 } 2693 #ifdef IGB_RSS_DEBUG 2694 rxr->rx_packets++; 2695 #endif 2696 } 2697 } else { 2698 IFNET_STAT_INC(ifp, ierrors, 1); 2699 discard: 2700 igb_setup_rxdesc(cur, rxbuf); 2701 if (!eop) 2702 rxr->discard = TRUE; 2703 else 2704 rxr->discard = FALSE; 2705 if (rxr->fmp != NULL) { 2706 m_freem(rxr->fmp); 2707 rxr->fmp = NULL; 2708 rxr->lmp = NULL; 2709 } 2710 m = NULL; 2711 } 2712 2713 if (m != NULL) 2714 ifp->if_input(ifp, m, pi, cpuid); 2715 2716 /* Advance our pointers to the next descriptor. */ 2717 if (++i == rxr->num_rx_desc) 2718 i = 0; 2719 2720 if (ncoll >= rxr->wreg_nsegs) { 2721 igb_rx_refresh(rxr, i); 2722 ncoll = 0; 2723 } 2724 2725 cur = &rxr->rx_base[i]; 2726 staterr = le32toh(cur->wb.upper.status_error); 2727 } 2728 rxr->next_to_check = i; 2729 2730 if (ncoll > 0) 2731 igb_rx_refresh(rxr, i); 2732 } 2733 2734 2735 static void 2736 igb_set_vlan(struct igb_softc *sc) 2737 { 2738 struct e1000_hw *hw = &sc->hw; 2739 uint32_t reg; 2740 #if 0 2741 struct ifnet *ifp = sc->arpcom.ac_if; 2742 #endif 2743 2744 if (sc->vf_ifp) { 2745 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2746 return; 2747 } 2748 2749 reg = E1000_READ_REG(hw, E1000_CTRL); 2750 reg |= E1000_CTRL_VME; 2751 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2752 2753 #if 0 2754 /* Enable the Filter Table */ 2755 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2756 reg = E1000_READ_REG(hw, E1000_RCTL); 2757 reg &= ~E1000_RCTL_CFIEN; 2758 reg |= E1000_RCTL_VFE; 2759 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2760 } 2761 #endif 2762 2763 /* Update the frame size */ 2764 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2765 sc->max_frame_size + VLAN_TAG_SIZE); 2766 2767 #if 0 2768 /* Don't bother with table if no vlans */ 2769 if ((adapter->num_vlans == 0) || 2770 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2771 return; 2772 /* 2773 ** A soft reset zero's out the VFTA, so 2774 ** we need to repopulate it now. 2775 */ 2776 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2777 if (adapter->shadow_vfta[i] != 0) { 2778 if (adapter->vf_ifp) 2779 e1000_vfta_set_vf(hw, 2780 adapter->shadow_vfta[i], TRUE); 2781 else 2782 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2783 i, adapter->shadow_vfta[i]); 2784 } 2785 #endif 2786 } 2787 2788 static void 2789 igb_enable_intr(struct igb_softc *sc) 2790 { 2791 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2792 lwkt_serialize_handler_enable(&sc->main_serialize); 2793 } else { 2794 int i; 2795 2796 for (i = 0; i < sc->msix_cnt; ++i) { 2797 lwkt_serialize_handler_enable( 2798 sc->msix_data[i].msix_serialize); 2799 } 2800 } 2801 2802 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2803 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2804 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2805 else 2806 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2807 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2808 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2809 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2810 } else { 2811 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2812 } 2813 E1000_WRITE_FLUSH(&sc->hw); 2814 } 2815 2816 static void 2817 igb_disable_intr(struct igb_softc *sc) 2818 { 2819 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2820 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2821 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2822 } 2823 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2824 E1000_WRITE_FLUSH(&sc->hw); 2825 2826 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2827 lwkt_serialize_handler_disable(&sc->main_serialize); 2828 } else { 2829 int i; 2830 2831 for (i = 0; i < sc->msix_cnt; ++i) { 2832 lwkt_serialize_handler_disable( 2833 sc->msix_data[i].msix_serialize); 2834 } 2835 } 2836 } 2837 2838 /* 2839 * Bit of a misnomer, what this really means is 2840 * to enable OS management of the system... aka 2841 * to disable special hardware management features 2842 */ 2843 static void 2844 igb_get_mgmt(struct igb_softc *sc) 2845 { 2846 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2847 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2848 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2849 2850 /* disable hardware interception of ARP */ 2851 manc &= ~E1000_MANC_ARP_EN; 2852 2853 /* enable receiving management packets to the host */ 2854 manc |= E1000_MANC_EN_MNG2HOST; 2855 manc2h |= 1 << 5; /* Mng Port 623 */ 2856 manc2h |= 1 << 6; /* Mng Port 664 */ 2857 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2858 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2859 } 2860 } 2861 2862 /* 2863 * Give control back to hardware management controller 2864 * if there is one. 2865 */ 2866 static void 2867 igb_rel_mgmt(struct igb_softc *sc) 2868 { 2869 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2870 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2871 2872 /* Re-enable hardware interception of ARP */ 2873 manc |= E1000_MANC_ARP_EN; 2874 manc &= ~E1000_MANC_EN_MNG2HOST; 2875 2876 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2877 } 2878 } 2879 2880 /* 2881 * Sets CTRL_EXT:DRV_LOAD bit. 2882 * 2883 * For ASF and Pass Through versions of f/w this means that 2884 * the driver is loaded. 2885 */ 2886 static void 2887 igb_get_hw_control(struct igb_softc *sc) 2888 { 2889 uint32_t ctrl_ext; 2890 2891 if (sc->vf_ifp) 2892 return; 2893 2894 /* Let firmware know the driver has taken over */ 2895 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2896 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2897 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2898 } 2899 2900 /* 2901 * Resets CTRL_EXT:DRV_LOAD bit. 2902 * 2903 * For ASF and Pass Through versions of f/w this means that the 2904 * driver is no longer loaded. 2905 */ 2906 static void 2907 igb_rel_hw_control(struct igb_softc *sc) 2908 { 2909 uint32_t ctrl_ext; 2910 2911 if (sc->vf_ifp) 2912 return; 2913 2914 /* Let firmware taken over control of h/w */ 2915 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2916 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2917 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2918 } 2919 2920 static boolean_t 2921 igb_is_valid_ether_addr(const uint8_t *addr) 2922 { 2923 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2924 2925 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2926 return FALSE; 2927 return TRUE; 2928 } 2929 2930 /* 2931 * Enable PCI Wake On Lan capability 2932 */ 2933 static void 2934 igb_enable_wol(device_t dev) 2935 { 2936 uint16_t cap, status; 2937 uint8_t id; 2938 2939 /* First find the capabilities pointer*/ 2940 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2941 2942 /* Read the PM Capabilities */ 2943 id = pci_read_config(dev, cap, 1); 2944 if (id != PCIY_PMG) /* Something wrong */ 2945 return; 2946 2947 /* 2948 * OK, we have the power capabilities, 2949 * so now get the status register 2950 */ 2951 cap += PCIR_POWER_STATUS; 2952 status = pci_read_config(dev, cap, 2); 2953 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2954 pci_write_config(dev, cap, status, 2); 2955 } 2956 2957 static void 2958 igb_update_stats_counters(struct igb_softc *sc) 2959 { 2960 struct e1000_hw *hw = &sc->hw; 2961 struct e1000_hw_stats *stats; 2962 struct ifnet *ifp = &sc->arpcom.ac_if; 2963 2964 /* 2965 * The virtual function adapter has only a 2966 * small controlled set of stats, do only 2967 * those and return. 2968 */ 2969 if (sc->vf_ifp) { 2970 igb_update_vf_stats_counters(sc); 2971 return; 2972 } 2973 stats = sc->stats; 2974 2975 if (sc->hw.phy.media_type == e1000_media_type_copper || 2976 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2977 stats->symerrs += 2978 E1000_READ_REG(hw,E1000_SYMERRS); 2979 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2980 } 2981 2982 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2983 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2984 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2985 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2986 2987 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2988 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2989 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2990 stats->dc += E1000_READ_REG(hw, E1000_DC); 2991 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2992 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2993 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2994 2995 /* 2996 * For watchdog management we need to know if we have been 2997 * paused during the last interval, so capture that here. 2998 */ 2999 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 3000 stats->xoffrxc += sc->pause_frames; 3001 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 3002 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 3003 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 3004 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 3005 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 3006 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 3007 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 3008 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 3009 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 3010 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 3011 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 3012 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 3013 3014 /* For the 64-bit byte counters the low dword must be read first. */ 3015 /* Both registers clear on the read of the high dword */ 3016 3017 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 3018 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 3019 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 3020 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 3021 3022 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 3023 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 3024 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 3025 stats->roc += E1000_READ_REG(hw, E1000_ROC); 3026 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 3027 3028 stats->tor += E1000_READ_REG(hw, E1000_TORH); 3029 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 3030 3031 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 3032 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 3033 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 3034 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 3035 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 3036 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 3037 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 3038 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 3039 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 3040 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 3041 3042 /* Interrupt Counts */ 3043 3044 stats->iac += E1000_READ_REG(hw, E1000_IAC); 3045 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 3046 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 3047 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 3048 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 3049 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 3050 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 3051 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 3052 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 3053 3054 /* Host to Card Statistics */ 3055 3056 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 3057 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 3058 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 3059 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 3060 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 3061 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 3062 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 3063 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 3064 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 3065 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 3066 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 3067 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 3068 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 3069 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 3070 3071 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 3072 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 3073 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 3074 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 3075 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 3076 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 3077 3078 IFNET_STAT_SET(ifp, collisions, stats->colc); 3079 3080 /* Rx Errors */ 3081 IFNET_STAT_SET(ifp, ierrors, 3082 stats->rxerrc + stats->crcerrs + stats->algnerrc + 3083 stats->ruc + stats->roc + stats->mpc + stats->cexterr); 3084 3085 /* Tx Errors */ 3086 IFNET_STAT_SET(ifp, oerrors, 3087 stats->ecol + stats->latecol + sc->watchdog_events); 3088 3089 /* Driver specific counters */ 3090 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 3091 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 3092 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 3093 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 3094 sc->packet_buf_alloc_tx = 3095 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 3096 sc->packet_buf_alloc_rx = 3097 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 3098 } 3099 3100 static void 3101 igb_vf_init_stats(struct igb_softc *sc) 3102 { 3103 struct e1000_hw *hw = &sc->hw; 3104 struct e1000_vf_stats *stats; 3105 3106 stats = sc->stats; 3107 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 3108 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 3109 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 3110 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 3111 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 3112 } 3113 3114 static void 3115 igb_update_vf_stats_counters(struct igb_softc *sc) 3116 { 3117 struct e1000_hw *hw = &sc->hw; 3118 struct e1000_vf_stats *stats; 3119 3120 if (sc->link_speed == 0) 3121 return; 3122 3123 stats = sc->stats; 3124 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 3125 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 3126 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 3127 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 3128 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 3129 } 3130 3131 #ifdef IFPOLL_ENABLE 3132 3133 static void 3134 igb_npoll_status(struct ifnet *ifp) 3135 { 3136 struct igb_softc *sc = ifp->if_softc; 3137 uint32_t reg_icr; 3138 3139 ASSERT_SERIALIZED(&sc->main_serialize); 3140 3141 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3142 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3143 sc->hw.mac.get_link_status = 1; 3144 igb_update_link_status(sc); 3145 } 3146 } 3147 3148 static void 3149 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 3150 { 3151 struct igb_tx_ring *txr = arg; 3152 3153 ASSERT_SERIALIZED(&txr->tx_serialize); 3154 3155 igb_txeof(txr); 3156 if (!ifsq_is_empty(txr->ifsq)) 3157 ifsq_devstart(txr->ifsq); 3158 } 3159 3160 static void 3161 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 3162 { 3163 struct igb_rx_ring *rxr = arg; 3164 3165 ASSERT_SERIALIZED(&rxr->rx_serialize); 3166 3167 igb_rxeof(rxr, cycle); 3168 } 3169 3170 static void 3171 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 3172 { 3173 struct igb_softc *sc = ifp->if_softc; 3174 int i, txr_cnt, rxr_cnt; 3175 3176 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3177 3178 if (info) { 3179 int off; 3180 3181 info->ifpi_status.status_func = igb_npoll_status; 3182 info->ifpi_status.serializer = &sc->main_serialize; 3183 3184 txr_cnt = igb_get_txring_inuse(sc, TRUE); 3185 off = sc->tx_npoll_off; 3186 for (i = 0; i < txr_cnt; ++i) { 3187 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3188 int idx = i + off; 3189 3190 KKASSERT(idx < ncpus2); 3191 info->ifpi_tx[idx].poll_func = igb_npoll_tx; 3192 info->ifpi_tx[idx].arg = txr; 3193 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 3194 ifsq_set_cpuid(txr->ifsq, idx); 3195 } 3196 3197 rxr_cnt = igb_get_rxring_inuse(sc, TRUE); 3198 off = sc->rx_npoll_off; 3199 for (i = 0; i < rxr_cnt; ++i) { 3200 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3201 int idx = i + off; 3202 3203 KKASSERT(idx < ncpus2); 3204 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 3205 info->ifpi_rx[idx].arg = rxr; 3206 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 3207 } 3208 3209 if (ifp->if_flags & IFF_RUNNING) { 3210 if (rxr_cnt == sc->rx_ring_inuse && 3211 txr_cnt == sc->tx_ring_inuse) { 3212 igb_set_timer_cpuid(sc, TRUE); 3213 igb_disable_intr(sc); 3214 } else { 3215 igb_init(sc); 3216 } 3217 } 3218 } else { 3219 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3220 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3221 3222 ifsq_set_cpuid(txr->ifsq, txr->tx_intr_cpuid); 3223 } 3224 3225 if (ifp->if_flags & IFF_RUNNING) { 3226 txr_cnt = igb_get_txring_inuse(sc, FALSE); 3227 rxr_cnt = igb_get_rxring_inuse(sc, FALSE); 3228 3229 if (rxr_cnt == sc->rx_ring_inuse && 3230 txr_cnt == sc->tx_ring_inuse) { 3231 igb_set_timer_cpuid(sc, FALSE); 3232 igb_enable_intr(sc); 3233 } else { 3234 igb_init(sc); 3235 } 3236 } 3237 } 3238 } 3239 3240 #endif /* IFPOLL_ENABLE */ 3241 3242 static void 3243 igb_intr(void *xsc) 3244 { 3245 struct igb_softc *sc = xsc; 3246 struct ifnet *ifp = &sc->arpcom.ac_if; 3247 uint32_t eicr; 3248 3249 ASSERT_SERIALIZED(&sc->main_serialize); 3250 3251 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3252 3253 if (eicr == 0) 3254 return; 3255 3256 if (ifp->if_flags & IFF_RUNNING) { 3257 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3258 int i; 3259 3260 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3261 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3262 3263 if (eicr & rxr->rx_intr_mask) { 3264 lwkt_serialize_enter(&rxr->rx_serialize); 3265 igb_rxeof(rxr, -1); 3266 lwkt_serialize_exit(&rxr->rx_serialize); 3267 } 3268 } 3269 3270 if (eicr & txr->tx_intr_mask) { 3271 lwkt_serialize_enter(&txr->tx_serialize); 3272 igb_txeof(txr); 3273 if (!ifsq_is_empty(txr->ifsq)) 3274 ifsq_devstart(txr->ifsq); 3275 lwkt_serialize_exit(&txr->tx_serialize); 3276 } 3277 } 3278 3279 if (eicr & E1000_EICR_OTHER) { 3280 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3281 3282 /* Link status change */ 3283 if (icr & E1000_ICR_LSC) { 3284 sc->hw.mac.get_link_status = 1; 3285 igb_update_link_status(sc); 3286 } 3287 } 3288 3289 /* 3290 * Reading EICR has the side effect to clear interrupt mask, 3291 * so all interrupts need to be enabled here. 3292 */ 3293 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3294 } 3295 3296 static void 3297 igb_intr_shared(void *xsc) 3298 { 3299 struct igb_softc *sc = xsc; 3300 struct ifnet *ifp = &sc->arpcom.ac_if; 3301 uint32_t reg_icr; 3302 3303 ASSERT_SERIALIZED(&sc->main_serialize); 3304 3305 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3306 3307 /* Hot eject? */ 3308 if (reg_icr == 0xffffffff) 3309 return; 3310 3311 /* Definitely not our interrupt. */ 3312 if (reg_icr == 0x0) 3313 return; 3314 3315 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3316 return; 3317 3318 if (ifp->if_flags & IFF_RUNNING) { 3319 if (reg_icr & 3320 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3321 int i; 3322 3323 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3324 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3325 3326 lwkt_serialize_enter(&rxr->rx_serialize); 3327 igb_rxeof(rxr, -1); 3328 lwkt_serialize_exit(&rxr->rx_serialize); 3329 } 3330 } 3331 3332 if (reg_icr & E1000_ICR_TXDW) { 3333 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3334 3335 lwkt_serialize_enter(&txr->tx_serialize); 3336 igb_txeof(txr); 3337 if (!ifsq_is_empty(txr->ifsq)) 3338 ifsq_devstart(txr->ifsq); 3339 lwkt_serialize_exit(&txr->tx_serialize); 3340 } 3341 } 3342 3343 /* Link status change */ 3344 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3345 sc->hw.mac.get_link_status = 1; 3346 igb_update_link_status(sc); 3347 } 3348 3349 if (reg_icr & E1000_ICR_RXO) 3350 sc->rx_overruns++; 3351 } 3352 3353 static int 3354 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp, 3355 int *segs_used, int *idx) 3356 { 3357 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3358 bus_dmamap_t map; 3359 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3360 union e1000_adv_tx_desc *txd = NULL; 3361 struct mbuf *m_head = *m_headp; 3362 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3363 int maxsegs, nsegs, i, j, error; 3364 uint32_t hdrlen = 0; 3365 3366 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3367 error = igb_tso_pullup(txr, m_headp); 3368 if (error) 3369 return error; 3370 m_head = *m_headp; 3371 } 3372 3373 /* Set basic descriptor constants */ 3374 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3375 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3376 if (m_head->m_flags & M_VLANTAG) 3377 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3378 3379 /* 3380 * Map the packet for DMA. 3381 */ 3382 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3383 tx_buf_mapped = tx_buf; 3384 map = tx_buf->map; 3385 3386 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3387 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3388 if (maxsegs > IGB_MAX_SCATTER) 3389 maxsegs = IGB_MAX_SCATTER; 3390 3391 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3392 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3393 if (error) { 3394 if (error == ENOBUFS) 3395 txr->sc->mbuf_defrag_failed++; 3396 else 3397 txr->sc->no_tx_dma_setup++; 3398 3399 m_freem(*m_headp); 3400 *m_headp = NULL; 3401 return error; 3402 } 3403 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3404 3405 m_head = *m_headp; 3406 3407 /* 3408 * Set up the TX context descriptor, if any hardware offloading is 3409 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3410 * TX descriptor. 3411 * 3412 * Unlike these chips' predecessors (em/emx), TX context descriptor 3413 * will _not_ interfere TX data fetching pipelining. 3414 */ 3415 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3416 igb_tso_ctx(txr, m_head, &hdrlen); 3417 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3418 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3419 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3420 txr->tx_nsegs++; 3421 (*segs_used)++; 3422 } else if (igb_txcsum_ctx(txr, m_head)) { 3423 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3424 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3425 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3426 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3427 txr->tx_nsegs++; 3428 (*segs_used)++; 3429 } 3430 3431 *segs_used += nsegs; 3432 txr->tx_nsegs += nsegs; 3433 if (txr->tx_nsegs >= txr->intr_nsegs) { 3434 /* 3435 * Report Status (RS) is turned on every intr_nsegs 3436 * descriptors (roughly). 3437 */ 3438 txr->tx_nsegs = 0; 3439 cmd_rs = E1000_ADVTXD_DCMD_RS; 3440 } 3441 3442 /* Calculate payload length */ 3443 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3444 << E1000_ADVTXD_PAYLEN_SHIFT); 3445 3446 /* 3447 * 82575 needs the TX context index added; the queue 3448 * index is used as TX context index here. 3449 */ 3450 if (txr->sc->hw.mac.type == e1000_82575) 3451 olinfo_status |= txr->me << 4; 3452 3453 /* Set up our transmit descriptors */ 3454 i = txr->next_avail_desc; 3455 for (j = 0; j < nsegs; j++) { 3456 bus_size_t seg_len; 3457 bus_addr_t seg_addr; 3458 3459 tx_buf = &txr->tx_buf[i]; 3460 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3461 seg_addr = segs[j].ds_addr; 3462 seg_len = segs[j].ds_len; 3463 3464 txd->read.buffer_addr = htole64(seg_addr); 3465 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3466 txd->read.olinfo_status = htole32(olinfo_status); 3467 if (++i == txr->num_tx_desc) 3468 i = 0; 3469 tx_buf->m_head = NULL; 3470 } 3471 3472 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3473 txr->next_avail_desc = i; 3474 txr->tx_avail -= nsegs; 3475 3476 tx_buf->m_head = m_head; 3477 tx_buf_mapped->map = tx_buf->map; 3478 tx_buf->map = map; 3479 3480 /* 3481 * Last Descriptor of Packet needs End Of Packet (EOP) 3482 */ 3483 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3484 3485 /* 3486 * Defer TDT updating, until enough descrptors are setup 3487 */ 3488 *idx = i; 3489 #ifdef IGB_TSS_DEBUG 3490 ++txr->tx_packets; 3491 #endif 3492 3493 return 0; 3494 } 3495 3496 static void 3497 igb_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 3498 { 3499 struct igb_softc *sc = ifp->if_softc; 3500 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3501 struct mbuf *m_head; 3502 int idx = -1, nsegs = 0; 3503 3504 KKASSERT(txr->ifsq == ifsq); 3505 ASSERT_SERIALIZED(&txr->tx_serialize); 3506 3507 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 3508 return; 3509 3510 if (!sc->link_active || (txr->tx_flags & IGB_TXFLAG_ENABLED) == 0) { 3511 ifsq_purge(ifsq); 3512 return; 3513 } 3514 3515 if (!IGB_IS_NOT_OACTIVE(txr)) 3516 igb_txeof(txr); 3517 3518 while (!ifsq_is_empty(ifsq)) { 3519 if (IGB_IS_OACTIVE(txr)) { 3520 ifsq_set_oactive(ifsq); 3521 /* Set watchdog on */ 3522 txr->tx_watchdog.wd_timer = 5; 3523 break; 3524 } 3525 3526 m_head = ifsq_dequeue(ifsq); 3527 if (m_head == NULL) 3528 break; 3529 3530 if (igb_encap(txr, &m_head, &nsegs, &idx)) { 3531 IFNET_STAT_INC(ifp, oerrors, 1); 3532 continue; 3533 } 3534 3535 if (nsegs >= txr->wreg_nsegs) { 3536 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3537 idx = -1; 3538 nsegs = 0; 3539 } 3540 3541 /* Send a copy of the frame to the BPF listener */ 3542 ETHER_BPF_MTAP(ifp, m_head); 3543 } 3544 if (idx >= 0) 3545 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3546 } 3547 3548 static void 3549 igb_watchdog(struct ifaltq_subque *ifsq) 3550 { 3551 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3552 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3553 struct igb_softc *sc = ifp->if_softc; 3554 int i; 3555 3556 KKASSERT(txr->ifsq == ifsq); 3557 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3558 3559 /* 3560 * If flow control has paused us since last checking 3561 * it invalidates the watchdog timing, so dont run it. 3562 */ 3563 if (sc->pause_frames) { 3564 sc->pause_frames = 0; 3565 txr->tx_watchdog.wd_timer = 5; 3566 return; 3567 } 3568 3569 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3570 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3571 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3572 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3573 if_printf(ifp, "TX(%d) desc avail = %d, " 3574 "Next TX to Clean = %d\n", 3575 txr->me, txr->tx_avail, txr->next_to_clean); 3576 3577 IFNET_STAT_INC(ifp, oerrors, 1); 3578 sc->watchdog_events++; 3579 3580 igb_init(sc); 3581 for (i = 0; i < sc->tx_ring_inuse; ++i) 3582 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 3583 } 3584 3585 static void 3586 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3587 { 3588 uint32_t eitr = 0; 3589 3590 if (rate > 0) { 3591 if (sc->hw.mac.type == e1000_82575) { 3592 eitr = 1000000000 / 256 / rate; 3593 /* 3594 * NOTE: 3595 * Document is wrong on the 2 bits left shift 3596 */ 3597 } else { 3598 eitr = 1000000 / rate; 3599 eitr <<= IGB_EITR_INTVL_SHIFT; 3600 } 3601 3602 if (eitr == 0) { 3603 /* Don't disable it */ 3604 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3605 } else if (eitr > IGB_EITR_INTVL_MASK) { 3606 /* Don't allow it to be too large */ 3607 eitr = IGB_EITR_INTVL_MASK; 3608 } 3609 } 3610 if (sc->hw.mac.type == e1000_82575) 3611 eitr |= eitr << 16; 3612 else 3613 eitr |= E1000_EITR_CNT_IGNR; 3614 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3615 } 3616 3617 static int 3618 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3619 { 3620 struct igb_softc *sc = (void *)arg1; 3621 struct ifnet *ifp = &sc->arpcom.ac_if; 3622 int error, intr_rate; 3623 3624 intr_rate = sc->intr_rate; 3625 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3626 if (error || req->newptr == NULL) 3627 return error; 3628 if (intr_rate < 0) 3629 return EINVAL; 3630 3631 ifnet_serialize_all(ifp); 3632 3633 sc->intr_rate = intr_rate; 3634 if (ifp->if_flags & IFF_RUNNING) 3635 igb_set_eitr(sc, 0, sc->intr_rate); 3636 3637 if (bootverbose) 3638 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3639 3640 ifnet_deserialize_all(ifp); 3641 3642 return 0; 3643 } 3644 3645 static int 3646 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3647 { 3648 struct igb_msix_data *msix = (void *)arg1; 3649 struct igb_softc *sc = msix->msix_sc; 3650 struct ifnet *ifp = &sc->arpcom.ac_if; 3651 int error, msix_rate; 3652 3653 msix_rate = msix->msix_rate; 3654 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3655 if (error || req->newptr == NULL) 3656 return error; 3657 if (msix_rate < 0) 3658 return EINVAL; 3659 3660 lwkt_serialize_enter(msix->msix_serialize); 3661 3662 msix->msix_rate = msix_rate; 3663 if (ifp->if_flags & IFF_RUNNING) 3664 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3665 3666 if (bootverbose) { 3667 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3668 msix->msix_rate); 3669 } 3670 3671 lwkt_serialize_exit(msix->msix_serialize); 3672 3673 return 0; 3674 } 3675 3676 static int 3677 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3678 { 3679 struct igb_softc *sc = (void *)arg1; 3680 struct ifnet *ifp = &sc->arpcom.ac_if; 3681 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3682 int error, nsegs; 3683 3684 nsegs = txr->intr_nsegs; 3685 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3686 if (error || req->newptr == NULL) 3687 return error; 3688 if (nsegs <= 0) 3689 return EINVAL; 3690 3691 ifnet_serialize_all(ifp); 3692 3693 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3694 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3695 error = EINVAL; 3696 } else { 3697 int i; 3698 3699 error = 0; 3700 for (i = 0; i < sc->tx_ring_cnt; ++i) 3701 sc->tx_rings[i].intr_nsegs = nsegs; 3702 } 3703 3704 ifnet_deserialize_all(ifp); 3705 3706 return error; 3707 } 3708 3709 static int 3710 igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3711 { 3712 struct igb_softc *sc = (void *)arg1; 3713 struct ifnet *ifp = &sc->arpcom.ac_if; 3714 int error, nsegs, i; 3715 3716 nsegs = sc->rx_rings[0].wreg_nsegs; 3717 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3718 if (error || req->newptr == NULL) 3719 return error; 3720 3721 ifnet_serialize_all(ifp); 3722 for (i = 0; i < sc->rx_ring_cnt; ++i) 3723 sc->rx_rings[i].wreg_nsegs =nsegs; 3724 ifnet_deserialize_all(ifp); 3725 3726 return 0; 3727 } 3728 3729 static int 3730 igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3731 { 3732 struct igb_softc *sc = (void *)arg1; 3733 struct ifnet *ifp = &sc->arpcom.ac_if; 3734 int error, nsegs, i; 3735 3736 nsegs = sc->tx_rings[0].wreg_nsegs; 3737 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3738 if (error || req->newptr == NULL) 3739 return error; 3740 3741 ifnet_serialize_all(ifp); 3742 for (i = 0; i < sc->tx_ring_cnt; ++i) 3743 sc->tx_rings[i].wreg_nsegs =nsegs; 3744 ifnet_deserialize_all(ifp); 3745 3746 return 0; 3747 } 3748 3749 #ifdef IFPOLL_ENABLE 3750 3751 static int 3752 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3753 { 3754 struct igb_softc *sc = (void *)arg1; 3755 struct ifnet *ifp = &sc->arpcom.ac_if; 3756 int error, off; 3757 3758 off = sc->rx_npoll_off; 3759 error = sysctl_handle_int(oidp, &off, 0, req); 3760 if (error || req->newptr == NULL) 3761 return error; 3762 if (off < 0) 3763 return EINVAL; 3764 3765 ifnet_serialize_all(ifp); 3766 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3767 error = EINVAL; 3768 } else { 3769 error = 0; 3770 sc->rx_npoll_off = off; 3771 } 3772 ifnet_deserialize_all(ifp); 3773 3774 return error; 3775 } 3776 3777 static int 3778 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3779 { 3780 struct igb_softc *sc = (void *)arg1; 3781 struct ifnet *ifp = &sc->arpcom.ac_if; 3782 int error, off; 3783 3784 off = sc->tx_npoll_off; 3785 error = sysctl_handle_int(oidp, &off, 0, req); 3786 if (error || req->newptr == NULL) 3787 return error; 3788 if (off < 0) 3789 return EINVAL; 3790 3791 ifnet_serialize_all(ifp); 3792 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 3793 error = EINVAL; 3794 } else { 3795 error = 0; 3796 sc->tx_npoll_off = off; 3797 } 3798 ifnet_deserialize_all(ifp); 3799 3800 return error; 3801 } 3802 3803 #endif /* IFPOLL_ENABLE */ 3804 3805 static void 3806 igb_init_intr(struct igb_softc *sc) 3807 { 3808 igb_set_intr_mask(sc); 3809 3810 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3811 igb_init_unshared_intr(sc); 3812 3813 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3814 igb_set_eitr(sc, 0, sc->intr_rate); 3815 } else { 3816 int i; 3817 3818 for (i = 0; i < sc->msix_cnt; ++i) 3819 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3820 } 3821 } 3822 3823 static void 3824 igb_init_unshared_intr(struct igb_softc *sc) 3825 { 3826 struct e1000_hw *hw = &sc->hw; 3827 const struct igb_rx_ring *rxr; 3828 const struct igb_tx_ring *txr; 3829 uint32_t ivar, index; 3830 int i; 3831 3832 /* 3833 * Enable extended mode 3834 */ 3835 if (sc->hw.mac.type != e1000_82575) { 3836 uint32_t gpie; 3837 int ivar_max; 3838 3839 gpie = E1000_GPIE_NSICR; 3840 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3841 gpie |= E1000_GPIE_MSIX_MODE | 3842 E1000_GPIE_EIAME | 3843 E1000_GPIE_PBA; 3844 } 3845 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3846 3847 /* 3848 * Clear IVARs 3849 */ 3850 switch (sc->hw.mac.type) { 3851 case e1000_82576: 3852 ivar_max = IGB_MAX_IVAR_82576; 3853 break; 3854 3855 case e1000_82580: 3856 ivar_max = IGB_MAX_IVAR_82580; 3857 break; 3858 3859 case e1000_i350: 3860 ivar_max = IGB_MAX_IVAR_I350; 3861 break; 3862 3863 case e1000_i354: 3864 ivar_max = IGB_MAX_IVAR_I354; 3865 break; 3866 3867 case e1000_vfadapt: 3868 case e1000_vfadapt_i350: 3869 ivar_max = IGB_MAX_IVAR_VF; 3870 break; 3871 3872 case e1000_i210: 3873 ivar_max = IGB_MAX_IVAR_I210; 3874 break; 3875 3876 case e1000_i211: 3877 ivar_max = IGB_MAX_IVAR_I211; 3878 break; 3879 3880 default: 3881 panic("unknown mac type %d\n", sc->hw.mac.type); 3882 } 3883 for (i = 0; i < ivar_max; ++i) 3884 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3885 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3886 } else { 3887 uint32_t tmp; 3888 3889 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3890 ("82575 w/ MSI-X")); 3891 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3892 tmp |= E1000_CTRL_EXT_IRCA; 3893 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3894 } 3895 3896 /* 3897 * Map TX/RX interrupts to EICR 3898 */ 3899 switch (sc->hw.mac.type) { 3900 case e1000_82580: 3901 case e1000_i350: 3902 case e1000_i354: 3903 case e1000_vfadapt: 3904 case e1000_vfadapt_i350: 3905 case e1000_i210: 3906 case e1000_i211: 3907 /* RX entries */ 3908 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3909 rxr = &sc->rx_rings[i]; 3910 3911 index = i >> 1; 3912 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3913 3914 if (i & 1) { 3915 ivar &= 0xff00ffff; 3916 ivar |= 3917 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3918 } else { 3919 ivar &= 0xffffff00; 3920 ivar |= 3921 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3922 } 3923 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3924 } 3925 /* TX entries */ 3926 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3927 txr = &sc->tx_rings[i]; 3928 3929 index = i >> 1; 3930 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3931 3932 if (i & 1) { 3933 ivar &= 0x00ffffff; 3934 ivar |= 3935 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3936 } else { 3937 ivar &= 0xffff00ff; 3938 ivar |= 3939 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3940 } 3941 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3942 } 3943 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3944 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3945 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3946 } 3947 break; 3948 3949 case e1000_82576: 3950 /* RX entries */ 3951 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3952 rxr = &sc->rx_rings[i]; 3953 3954 index = i & 0x7; /* Each IVAR has two entries */ 3955 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3956 3957 if (i < 8) { 3958 ivar &= 0xffffff00; 3959 ivar |= 3960 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3961 } else { 3962 ivar &= 0xff00ffff; 3963 ivar |= 3964 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3965 } 3966 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3967 } 3968 /* TX entries */ 3969 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3970 txr = &sc->tx_rings[i]; 3971 3972 index = i & 0x7; /* Each IVAR has two entries */ 3973 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3974 3975 if (i < 8) { 3976 ivar &= 0xffff00ff; 3977 ivar |= 3978 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3979 } else { 3980 ivar &= 0x00ffffff; 3981 ivar |= 3982 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3983 } 3984 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3985 } 3986 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3987 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3988 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3989 } 3990 break; 3991 3992 case e1000_82575: 3993 /* 3994 * Enable necessary interrupt bits. 3995 * 3996 * The name of the register is confusing; in addition to 3997 * configuring the first vector of MSI-X, it also configures 3998 * which bits of EICR could be set by the hardware even when 3999 * MSI or line interrupt is used; it thus controls interrupt 4000 * generation. It MUST be configured explicitly; the default 4001 * value mentioned in the datasheet is wrong: RX queue0 and 4002 * TX queue0 are NOT enabled by default. 4003 */ 4004 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 4005 break; 4006 4007 default: 4008 panic("unknown mac type %d\n", sc->hw.mac.type); 4009 } 4010 } 4011 4012 static int 4013 igb_setup_intr(struct igb_softc *sc) 4014 { 4015 int error; 4016 4017 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4018 return igb_msix_setup(sc); 4019 4020 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 4021 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 4022 sc, &sc->intr_tag, &sc->main_serialize); 4023 if (error) { 4024 device_printf(sc->dev, "Failed to register interrupt handler"); 4025 return error; 4026 } 4027 return 0; 4028 } 4029 4030 static void 4031 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 4032 { 4033 if (txr->sc->hw.mac.type == e1000_82575) { 4034 txr->tx_intr_bit = 0; /* unused */ 4035 switch (txr->me) { 4036 case 0: 4037 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 4038 break; 4039 case 1: 4040 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 4041 break; 4042 case 2: 4043 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 4044 break; 4045 case 3: 4046 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 4047 break; 4048 default: 4049 panic("unsupported # of TX ring, %d\n", txr->me); 4050 } 4051 } else { 4052 int intr_bit = *intr_bit0; 4053 4054 txr->tx_intr_bit = intr_bit % intr_bitmax; 4055 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 4056 4057 *intr_bit0 = intr_bit + 1; 4058 } 4059 } 4060 4061 static void 4062 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 4063 { 4064 if (rxr->sc->hw.mac.type == e1000_82575) { 4065 rxr->rx_intr_bit = 0; /* unused */ 4066 switch (rxr->me) { 4067 case 0: 4068 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 4069 break; 4070 case 1: 4071 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 4072 break; 4073 case 2: 4074 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 4075 break; 4076 case 3: 4077 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 4078 break; 4079 default: 4080 panic("unsupported # of RX ring, %d\n", rxr->me); 4081 } 4082 } else { 4083 int intr_bit = *intr_bit0; 4084 4085 rxr->rx_intr_bit = intr_bit % intr_bitmax; 4086 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 4087 4088 *intr_bit0 = intr_bit + 1; 4089 } 4090 } 4091 4092 static void 4093 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4094 { 4095 struct igb_softc *sc = ifp->if_softc; 4096 4097 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, slz); 4098 } 4099 4100 static void 4101 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4102 { 4103 struct igb_softc *sc = ifp->if_softc; 4104 4105 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, slz); 4106 } 4107 4108 static int 4109 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4110 { 4111 struct igb_softc *sc = ifp->if_softc; 4112 4113 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 4114 slz); 4115 } 4116 4117 #ifdef INVARIANTS 4118 4119 static void 4120 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4121 boolean_t serialized) 4122 { 4123 struct igb_softc *sc = ifp->if_softc; 4124 4125 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 4126 slz, serialized); 4127 } 4128 4129 #endif /* INVARIANTS */ 4130 4131 static void 4132 igb_set_intr_mask(struct igb_softc *sc) 4133 { 4134 int i; 4135 4136 sc->intr_mask = sc->sts_intr_mask; 4137 for (i = 0; i < sc->rx_ring_inuse; ++i) 4138 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 4139 for (i = 0; i < sc->tx_ring_inuse; ++i) 4140 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 4141 if (bootverbose) { 4142 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 4143 sc->intr_mask); 4144 } 4145 } 4146 4147 static int 4148 igb_alloc_intr(struct igb_softc *sc) 4149 { 4150 int i, intr_bit, intr_bitmax; 4151 u_int intr_flags; 4152 4153 igb_msix_try_alloc(sc); 4154 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4155 goto done; 4156 4157 /* 4158 * Allocate MSI/legacy interrupt resource 4159 */ 4160 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 4161 &sc->intr_rid, &intr_flags); 4162 4163 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 4164 int unshared; 4165 4166 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 4167 if (!unshared) { 4168 sc->flags |= IGB_FLAG_SHARED_INTR; 4169 if (bootverbose) 4170 device_printf(sc->dev, "IRQ shared\n"); 4171 } else { 4172 intr_flags &= ~RF_SHAREABLE; 4173 if (bootverbose) 4174 device_printf(sc->dev, "IRQ unshared\n"); 4175 } 4176 } 4177 4178 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4179 &sc->intr_rid, intr_flags); 4180 if (sc->intr_res == NULL) { 4181 device_printf(sc->dev, "Unable to allocate bus resource: " 4182 "interrupt\n"); 4183 return ENXIO; 4184 } 4185 4186 for (i = 0; i < sc->tx_ring_cnt; ++i) 4187 sc->tx_rings[i].tx_intr_cpuid = rman_get_cpuid(sc->intr_res); 4188 4189 /* 4190 * Setup MSI/legacy interrupt mask 4191 */ 4192 switch (sc->hw.mac.type) { 4193 case e1000_82575: 4194 intr_bitmax = IGB_MAX_TXRXINT_82575; 4195 break; 4196 4197 case e1000_82576: 4198 intr_bitmax = IGB_MAX_TXRXINT_82576; 4199 break; 4200 4201 case e1000_82580: 4202 intr_bitmax = IGB_MAX_TXRXINT_82580; 4203 break; 4204 4205 case e1000_i350: 4206 intr_bitmax = IGB_MAX_TXRXINT_I350; 4207 break; 4208 4209 case e1000_i354: 4210 intr_bitmax = IGB_MAX_TXRXINT_I354; 4211 break; 4212 4213 case e1000_i210: 4214 intr_bitmax = IGB_MAX_TXRXINT_I210; 4215 break; 4216 4217 case e1000_i211: 4218 intr_bitmax = IGB_MAX_TXRXINT_I211; 4219 break; 4220 4221 default: 4222 intr_bitmax = IGB_MIN_TXRXINT; 4223 break; 4224 } 4225 intr_bit = 0; 4226 for (i = 0; i < sc->tx_ring_cnt; ++i) 4227 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 4228 for (i = 0; i < sc->rx_ring_cnt; ++i) 4229 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 4230 sc->sts_intr_bit = 0; 4231 sc->sts_intr_mask = E1000_EICR_OTHER; 4232 4233 /* Initialize interrupt rate */ 4234 sc->intr_rate = IGB_INTR_RATE; 4235 done: 4236 igb_set_ring_inuse(sc, FALSE); 4237 igb_set_intr_mask(sc); 4238 return 0; 4239 } 4240 4241 static void 4242 igb_free_intr(struct igb_softc *sc) 4243 { 4244 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4245 if (sc->intr_res != NULL) { 4246 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 4247 sc->intr_res); 4248 } 4249 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4250 pci_release_msi(sc->dev); 4251 } else { 4252 igb_msix_free(sc, TRUE); 4253 } 4254 } 4255 4256 static void 4257 igb_teardown_intr(struct igb_softc *sc) 4258 { 4259 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4260 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 4261 else 4262 igb_msix_teardown(sc, sc->msix_cnt); 4263 } 4264 4265 static void 4266 igb_msix_try_alloc(struct igb_softc *sc) 4267 { 4268 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4269 int i, x, error; 4270 int offset, offset_def; 4271 struct igb_msix_data *msix; 4272 boolean_t aggregate, setup = FALSE; 4273 4274 /* 4275 * Don't enable MSI-X on 82575, see: 4276 * 82575 specification update errata #25 4277 */ 4278 if (sc->hw.mac.type == e1000_82575) 4279 return; 4280 4281 /* Don't enable MSI-X on VF */ 4282 if (sc->vf_ifp) 4283 return; 4284 4285 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4286 igb_msix_enable); 4287 if (!msix_enable) 4288 return; 4289 4290 msix_cnt = pci_msix_count(sc->dev); 4291 #ifdef IGB_MSIX_DEBUG 4292 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4293 #endif 4294 if (msix_cnt <= 1) { 4295 /* One MSI-X model does not make sense */ 4296 return; 4297 } 4298 4299 i = 0; 4300 while ((1 << (i + 1)) <= msix_cnt) 4301 ++i; 4302 msix_cnt2 = 1 << i; 4303 4304 if (bootverbose) { 4305 device_printf(sc->dev, "MSI-X count %d/%d\n", 4306 msix_cnt2, msix_cnt); 4307 } 4308 4309 KKASSERT(msix_cnt2 <= msix_cnt); 4310 if (msix_cnt == msix_cnt2) { 4311 /* We need at least one MSI-X for link status */ 4312 msix_cnt2 >>= 1; 4313 if (msix_cnt2 <= 1) { 4314 /* One MSI-X for RX/TX does not make sense */ 4315 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4316 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4317 return; 4318 } 4319 KKASSERT(msix_cnt > msix_cnt2); 4320 4321 if (bootverbose) { 4322 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4323 msix_cnt2, msix_cnt); 4324 } 4325 } 4326 4327 sc->rx_ring_msix = sc->rx_ring_cnt; 4328 if (sc->rx_ring_msix > msix_cnt2) 4329 sc->rx_ring_msix = msix_cnt2; 4330 4331 sc->tx_ring_msix = sc->tx_ring_cnt; 4332 if (sc->tx_ring_msix > msix_cnt2) 4333 sc->tx_ring_msix = msix_cnt2; 4334 4335 if (msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4336 /* 4337 * Independent TX/RX MSI-X 4338 */ 4339 aggregate = FALSE; 4340 if (bootverbose) 4341 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4342 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4343 } else { 4344 /* 4345 * Aggregate TX/RX MSI-X 4346 */ 4347 aggregate = TRUE; 4348 if (bootverbose) 4349 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4350 alloc_cnt = msix_cnt2; 4351 if (alloc_cnt > ncpus2) 4352 alloc_cnt = ncpus2; 4353 if (sc->rx_ring_msix > alloc_cnt) 4354 sc->rx_ring_msix = alloc_cnt; 4355 if (sc->tx_ring_msix > alloc_cnt) 4356 sc->tx_ring_msix = alloc_cnt; 4357 } 4358 ++alloc_cnt; /* For link status */ 4359 4360 if (bootverbose) { 4361 device_printf(sc->dev, "MSI-X alloc %d, " 4362 "RX ring %d, TX ring %d\n", alloc_cnt, 4363 sc->rx_ring_msix, sc->tx_ring_msix); 4364 } 4365 4366 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4367 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4368 &sc->msix_mem_rid, RF_ACTIVE); 4369 if (sc->msix_mem_res == NULL) { 4370 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR_ALT); 4371 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4372 &sc->msix_mem_rid, RF_ACTIVE); 4373 if (sc->msix_mem_res == NULL) { 4374 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4375 return; 4376 } 4377 } 4378 4379 sc->msix_cnt = alloc_cnt; 4380 sc->msix_data = kmalloc_cachealign( 4381 sizeof(struct igb_msix_data) * sc->msix_cnt, 4382 M_DEVBUF, M_WAITOK | M_ZERO); 4383 for (x = 0; x < sc->msix_cnt; ++x) { 4384 msix = &sc->msix_data[x]; 4385 4386 lwkt_serialize_init(&msix->msix_serialize0); 4387 msix->msix_sc = sc; 4388 msix->msix_rid = -1; 4389 msix->msix_vector = x; 4390 msix->msix_mask = 1 << msix->msix_vector; 4391 msix->msix_rate = IGB_INTR_RATE; 4392 } 4393 4394 x = 0; 4395 if (!aggregate) { 4396 /* 4397 * RX rings 4398 */ 4399 if (sc->rx_ring_msix == ncpus2) { 4400 offset = 0; 4401 } else { 4402 offset_def = (sc->rx_ring_msix * 4403 device_get_unit(sc->dev)) % ncpus2; 4404 4405 offset = device_getenv_int(sc->dev, 4406 "msix.rxoff", offset_def); 4407 if (offset >= ncpus2 || 4408 offset % sc->rx_ring_msix != 0) { 4409 device_printf(sc->dev, 4410 "invalid msix.rxoff %d, use %d\n", 4411 offset, offset_def); 4412 offset = offset_def; 4413 } 4414 } 4415 igb_msix_rx_conf(sc, 0, &x, offset); 4416 4417 /* 4418 * TX rings 4419 */ 4420 if (sc->tx_ring_msix == ncpus2) { 4421 offset = 0; 4422 } else { 4423 offset_def = (sc->tx_ring_msix * 4424 device_get_unit(sc->dev)) % ncpus2; 4425 4426 offset = device_getenv_int(sc->dev, 4427 "msix.txoff", offset_def); 4428 if (offset >= ncpus2 || 4429 offset % sc->tx_ring_msix != 0) { 4430 device_printf(sc->dev, 4431 "invalid msix.txoff %d, use %d\n", 4432 offset, offset_def); 4433 offset = offset_def; 4434 } 4435 } 4436 igb_msix_tx_conf(sc, 0, &x, offset); 4437 } else { 4438 int ring_agg, ring_max; 4439 4440 ring_agg = sc->rx_ring_msix; 4441 if (ring_agg > sc->tx_ring_msix) 4442 ring_agg = sc->tx_ring_msix; 4443 4444 ring_max = sc->rx_ring_msix; 4445 if (ring_max < sc->tx_ring_msix) 4446 ring_max = sc->tx_ring_msix; 4447 4448 if (ring_max == ncpus2) { 4449 offset = 0; 4450 } else { 4451 offset_def = (ring_max * device_get_unit(sc->dev)) % 4452 ncpus2; 4453 4454 offset = device_getenv_int(sc->dev, "msix.off", 4455 offset_def); 4456 if (offset >= ncpus2 || offset % ring_max != 0) { 4457 device_printf(sc->dev, 4458 "invalid msix.off %d, use %d\n", 4459 offset, offset_def); 4460 offset = offset_def; 4461 } 4462 } 4463 4464 for (i = 0; i < ring_agg; ++i) { 4465 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4466 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4467 4468 KKASSERT(x < sc->msix_cnt); 4469 msix = &sc->msix_data[x++]; 4470 4471 txr->tx_intr_bit = msix->msix_vector; 4472 txr->tx_intr_mask = msix->msix_mask; 4473 rxr->rx_intr_bit = msix->msix_vector; 4474 rxr->rx_intr_mask = msix->msix_mask; 4475 4476 msix->msix_serialize = &msix->msix_serialize0; 4477 msix->msix_func = igb_msix_rxtx; 4478 msix->msix_arg = msix; 4479 msix->msix_rx = rxr; 4480 msix->msix_tx = txr; 4481 4482 msix->msix_cpuid = i + offset; 4483 KKASSERT(msix->msix_cpuid < ncpus2); 4484 txr->tx_intr_cpuid = msix->msix_cpuid; 4485 4486 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4487 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4488 msix->msix_rate = IGB_MSIX_RX_RATE; 4489 ksnprintf(msix->msix_rate_desc, 4490 sizeof(msix->msix_rate_desc), 4491 "RXTX%d interrupt rate", i); 4492 } 4493 4494 if (ring_agg != ring_max) { 4495 if (ring_max == sc->tx_ring_msix) 4496 igb_msix_tx_conf(sc, i, &x, offset); 4497 else 4498 igb_msix_rx_conf(sc, i, &x, offset); 4499 } 4500 } 4501 4502 /* 4503 * Link status 4504 */ 4505 KKASSERT(x < sc->msix_cnt); 4506 msix = &sc->msix_data[x++]; 4507 sc->sts_intr_bit = msix->msix_vector; 4508 sc->sts_intr_mask = msix->msix_mask; 4509 4510 msix->msix_serialize = &sc->main_serialize; 4511 msix->msix_func = igb_msix_status; 4512 msix->msix_arg = sc; 4513 msix->msix_cpuid = 0; 4514 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4515 device_get_nameunit(sc->dev)); 4516 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4517 "status interrupt rate"); 4518 4519 KKASSERT(x == sc->msix_cnt); 4520 4521 error = pci_setup_msix(sc->dev); 4522 if (error) { 4523 device_printf(sc->dev, "Setup MSI-X failed\n"); 4524 goto back; 4525 } 4526 setup = TRUE; 4527 4528 for (i = 0; i < sc->msix_cnt; ++i) { 4529 msix = &sc->msix_data[i]; 4530 4531 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4532 &msix->msix_rid, msix->msix_cpuid); 4533 if (error) { 4534 device_printf(sc->dev, 4535 "Unable to allocate MSI-X %d on cpu%d\n", 4536 msix->msix_vector, msix->msix_cpuid); 4537 goto back; 4538 } 4539 4540 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4541 &msix->msix_rid, RF_ACTIVE); 4542 if (msix->msix_res == NULL) { 4543 device_printf(sc->dev, 4544 "Unable to allocate MSI-X %d resource\n", 4545 msix->msix_vector); 4546 error = ENOMEM; 4547 goto back; 4548 } 4549 } 4550 4551 pci_enable_msix(sc->dev); 4552 sc->intr_type = PCI_INTR_TYPE_MSIX; 4553 back: 4554 if (error) 4555 igb_msix_free(sc, setup); 4556 } 4557 4558 static void 4559 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4560 { 4561 int i; 4562 4563 KKASSERT(sc->msix_cnt > 1); 4564 4565 for (i = 0; i < sc->msix_cnt; ++i) { 4566 struct igb_msix_data *msix = &sc->msix_data[i]; 4567 4568 if (msix->msix_res != NULL) { 4569 bus_release_resource(sc->dev, SYS_RES_IRQ, 4570 msix->msix_rid, msix->msix_res); 4571 } 4572 if (msix->msix_rid >= 0) 4573 pci_release_msix_vector(sc->dev, msix->msix_rid); 4574 } 4575 if (setup) 4576 pci_teardown_msix(sc->dev); 4577 4578 sc->msix_cnt = 0; 4579 kfree(sc->msix_data, M_DEVBUF); 4580 sc->msix_data = NULL; 4581 } 4582 4583 static int 4584 igb_msix_setup(struct igb_softc *sc) 4585 { 4586 int i; 4587 4588 for (i = 0; i < sc->msix_cnt; ++i) { 4589 struct igb_msix_data *msix = &sc->msix_data[i]; 4590 int error; 4591 4592 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4593 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4594 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4595 if (error) { 4596 device_printf(sc->dev, "could not set up %s " 4597 "interrupt handler.\n", msix->msix_desc); 4598 igb_msix_teardown(sc, i); 4599 return error; 4600 } 4601 } 4602 return 0; 4603 } 4604 4605 static void 4606 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4607 { 4608 int i; 4609 4610 for (i = 0; i < msix_cnt; ++i) { 4611 struct igb_msix_data *msix = &sc->msix_data[i]; 4612 4613 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4614 } 4615 } 4616 4617 static void 4618 igb_msix_rx(void *arg) 4619 { 4620 struct igb_rx_ring *rxr = arg; 4621 4622 ASSERT_SERIALIZED(&rxr->rx_serialize); 4623 igb_rxeof(rxr, -1); 4624 4625 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4626 } 4627 4628 static void 4629 igb_msix_tx(void *arg) 4630 { 4631 struct igb_tx_ring *txr = arg; 4632 4633 ASSERT_SERIALIZED(&txr->tx_serialize); 4634 4635 igb_txeof(txr); 4636 if (!ifsq_is_empty(txr->ifsq)) 4637 ifsq_devstart(txr->ifsq); 4638 4639 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4640 } 4641 4642 static void 4643 igb_msix_status(void *arg) 4644 { 4645 struct igb_softc *sc = arg; 4646 uint32_t icr; 4647 4648 ASSERT_SERIALIZED(&sc->main_serialize); 4649 4650 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4651 if (icr & E1000_ICR_LSC) { 4652 sc->hw.mac.get_link_status = 1; 4653 igb_update_link_status(sc); 4654 } 4655 4656 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4657 } 4658 4659 static void 4660 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4661 { 4662 sc->rx_ring_inuse = igb_get_rxring_inuse(sc, polling); 4663 sc->tx_ring_inuse = igb_get_txring_inuse(sc, polling); 4664 if (bootverbose) { 4665 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d, TX rings %d/%d\n", 4666 sc->rx_ring_inuse, sc->rx_ring_cnt, 4667 sc->tx_ring_inuse, sc->tx_ring_cnt); 4668 } 4669 } 4670 4671 static int 4672 igb_get_rxring_inuse(const struct igb_softc *sc, boolean_t polling) 4673 { 4674 if (!IGB_ENABLE_HWRSS(sc)) 4675 return 1; 4676 4677 if (polling) 4678 return sc->rx_ring_cnt; 4679 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4680 return IGB_MIN_RING_RSS; 4681 else 4682 return sc->rx_ring_msix; 4683 } 4684 4685 static int 4686 igb_get_txring_inuse(const struct igb_softc *sc, boolean_t polling) 4687 { 4688 if (!IGB_ENABLE_HWTSS(sc)) 4689 return 1; 4690 4691 if (polling) 4692 return sc->tx_ring_cnt; 4693 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4694 return IGB_MIN_RING; 4695 else 4696 return sc->tx_ring_msix; 4697 } 4698 4699 static int 4700 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4701 { 4702 int hoff, iphlen, thoff; 4703 struct mbuf *m; 4704 4705 m = *mp; 4706 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4707 4708 iphlen = m->m_pkthdr.csum_iphlen; 4709 thoff = m->m_pkthdr.csum_thlen; 4710 hoff = m->m_pkthdr.csum_lhlen; 4711 4712 KASSERT(iphlen > 0, ("invalid ip hlen")); 4713 KASSERT(thoff > 0, ("invalid tcp hlen")); 4714 KASSERT(hoff > 0, ("invalid ether hlen")); 4715 4716 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4717 m = m_pullup(m, hoff + iphlen + thoff); 4718 if (m == NULL) { 4719 *mp = NULL; 4720 return ENOBUFS; 4721 } 4722 *mp = m; 4723 } 4724 if (txr->tx_flags & IGB_TXFLAG_TSO_IPLEN0) { 4725 struct ip *ip; 4726 4727 ip = mtodoff(m, struct ip *, hoff); 4728 ip->ip_len = 0; 4729 } 4730 4731 return 0; 4732 } 4733 4734 static void 4735 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4736 { 4737 struct e1000_adv_tx_context_desc *TXD; 4738 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4739 int hoff, ctxd, iphlen, thoff; 4740 4741 iphlen = m->m_pkthdr.csum_iphlen; 4742 thoff = m->m_pkthdr.csum_thlen; 4743 hoff = m->m_pkthdr.csum_lhlen; 4744 4745 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4746 4747 ctxd = txr->next_avail_desc; 4748 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4749 4750 if (m->m_flags & M_VLANTAG) { 4751 uint16_t vlantag; 4752 4753 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4754 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4755 } 4756 4757 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4758 vlan_macip_lens |= iphlen; 4759 4760 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4761 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4762 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4763 4764 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4765 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4766 4767 /* 4768 * 82575 needs the TX context index added; the queue 4769 * index is used as TX context index here. 4770 */ 4771 if (txr->sc->hw.mac.type == e1000_82575) 4772 mss_l4len_idx |= txr->me << 4; 4773 4774 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4775 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4776 TXD->seqnum_seed = htole32(0); 4777 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4778 4779 /* We've consumed the first desc, adjust counters */ 4780 if (++ctxd == txr->num_tx_desc) 4781 ctxd = 0; 4782 txr->next_avail_desc = ctxd; 4783 --txr->tx_avail; 4784 4785 *hlen = hoff + iphlen + thoff; 4786 } 4787 4788 static void 4789 igb_setup_serializer(struct igb_softc *sc) 4790 { 4791 const struct igb_msix_data *msix; 4792 int i, j; 4793 4794 /* 4795 * Allocate serializer array 4796 */ 4797 4798 /* Main + TX + RX */ 4799 sc->serialize_cnt = 1 + sc->tx_ring_cnt + sc->rx_ring_cnt; 4800 4801 /* Aggregate TX/RX MSI-X */ 4802 for (i = 0; i < sc->msix_cnt; ++i) { 4803 msix = &sc->msix_data[i]; 4804 if (msix->msix_serialize == &msix->msix_serialize0) 4805 sc->serialize_cnt++; 4806 } 4807 4808 sc->serializes = 4809 kmalloc(sc->serialize_cnt * sizeof(struct lwkt_serialize *), 4810 M_DEVBUF, M_WAITOK | M_ZERO); 4811 4812 /* 4813 * Setup serializers 4814 * 4815 * NOTE: Order is critical 4816 */ 4817 4818 i = 0; 4819 4820 KKASSERT(i < sc->serialize_cnt); 4821 sc->serializes[i++] = &sc->main_serialize; 4822 4823 for (j = 0; j < sc->msix_cnt; ++j) { 4824 msix = &sc->msix_data[j]; 4825 if (msix->msix_serialize == &msix->msix_serialize0) { 4826 KKASSERT(i < sc->serialize_cnt); 4827 sc->serializes[i++] = msix->msix_serialize; 4828 } 4829 } 4830 4831 for (j = 0; j < sc->tx_ring_cnt; ++j) { 4832 KKASSERT(i < sc->serialize_cnt); 4833 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 4834 } 4835 4836 for (j = 0; j < sc->rx_ring_cnt; ++j) { 4837 KKASSERT(i < sc->serialize_cnt); 4838 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 4839 } 4840 4841 KKASSERT(i == sc->serialize_cnt); 4842 } 4843 4844 static void 4845 igb_msix_rx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4846 { 4847 int x = *x0; 4848 4849 for (; i < sc->rx_ring_msix; ++i) { 4850 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4851 struct igb_msix_data *msix; 4852 4853 KKASSERT(x < sc->msix_cnt); 4854 msix = &sc->msix_data[x++]; 4855 4856 rxr->rx_intr_bit = msix->msix_vector; 4857 rxr->rx_intr_mask = msix->msix_mask; 4858 4859 msix->msix_serialize = &rxr->rx_serialize; 4860 msix->msix_func = igb_msix_rx; 4861 msix->msix_arg = rxr; 4862 4863 msix->msix_cpuid = i + offset; 4864 KKASSERT(msix->msix_cpuid < ncpus2); 4865 4866 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s rx%d", 4867 device_get_nameunit(sc->dev), i); 4868 4869 msix->msix_rate = IGB_MSIX_RX_RATE; 4870 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4871 "RX%d interrupt rate", i); 4872 } 4873 *x0 = x; 4874 } 4875 4876 static void 4877 igb_msix_tx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4878 { 4879 int x = *x0; 4880 4881 for (; i < sc->tx_ring_msix; ++i) { 4882 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4883 struct igb_msix_data *msix; 4884 4885 KKASSERT(x < sc->msix_cnt); 4886 msix = &sc->msix_data[x++]; 4887 4888 txr->tx_intr_bit = msix->msix_vector; 4889 txr->tx_intr_mask = msix->msix_mask; 4890 4891 msix->msix_serialize = &txr->tx_serialize; 4892 msix->msix_func = igb_msix_tx; 4893 msix->msix_arg = txr; 4894 4895 msix->msix_cpuid = i + offset; 4896 KKASSERT(msix->msix_cpuid < ncpus2); 4897 txr->tx_intr_cpuid = msix->msix_cpuid; 4898 4899 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s tx%d", 4900 device_get_nameunit(sc->dev), i); 4901 4902 msix->msix_rate = IGB_MSIX_TX_RATE; 4903 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4904 "TX%d interrupt rate", i); 4905 } 4906 *x0 = x; 4907 } 4908 4909 static void 4910 igb_msix_rxtx(void *arg) 4911 { 4912 struct igb_msix_data *msix = arg; 4913 struct igb_rx_ring *rxr = msix->msix_rx; 4914 struct igb_tx_ring *txr = msix->msix_tx; 4915 4916 ASSERT_SERIALIZED(&msix->msix_serialize0); 4917 4918 lwkt_serialize_enter(&rxr->rx_serialize); 4919 igb_rxeof(rxr, -1); 4920 lwkt_serialize_exit(&rxr->rx_serialize); 4921 4922 lwkt_serialize_enter(&txr->tx_serialize); 4923 igb_txeof(txr); 4924 if (!ifsq_is_empty(txr->ifsq)) 4925 ifsq_devstart(txr->ifsq); 4926 lwkt_serialize_exit(&txr->tx_serialize); 4927 4928 E1000_WRITE_REG(&msix->msix_sc->hw, E1000_EIMS, msix->msix_mask); 4929 } 4930 4931 static void 4932 igb_set_timer_cpuid(struct igb_softc *sc, boolean_t polling) 4933 { 4934 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4935 sc->timer_cpuid = 0; /* XXX fixed */ 4936 else 4937 sc->timer_cpuid = rman_get_cpuid(sc->intr_res); 4938 } 4939