1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 #include <netinet/udp.h> 69 70 #include <bus/pci/pcivar.h> 71 #include <bus/pci/pcireg.h> 72 73 #include <dev/netif/ig_hal/e1000_api.h> 74 #include <dev/netif/ig_hal/e1000_82575.h> 75 #include <dev/netif/igb/if_igb.h> 76 77 #ifdef IGB_RSS_DEBUG 78 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 79 do { \ 80 if (sc->rss_debug >= lvl) \ 81 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 82 } while (0) 83 #else /* !IGB_RSS_DEBUG */ 84 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 85 #endif /* IGB_RSS_DEBUG */ 86 87 #define IGB_NAME "Intel(R) PRO/1000 " 88 #define IGB_DEVICE(id) \ 89 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 90 #define IGB_DEVICE_NULL { 0, 0, NULL } 91 92 static struct igb_device { 93 uint16_t vid; 94 uint16_t did; 95 const char *desc; 96 } igb_devices[] = { 97 IGB_DEVICE(82575EB_COPPER), 98 IGB_DEVICE(82575EB_FIBER_SERDES), 99 IGB_DEVICE(82575GB_QUAD_COPPER), 100 IGB_DEVICE(82576), 101 IGB_DEVICE(82576_NS), 102 IGB_DEVICE(82576_NS_SERDES), 103 IGB_DEVICE(82576_FIBER), 104 IGB_DEVICE(82576_SERDES), 105 IGB_DEVICE(82576_SERDES_QUAD), 106 IGB_DEVICE(82576_QUAD_COPPER), 107 IGB_DEVICE(82576_QUAD_COPPER_ET2), 108 IGB_DEVICE(82576_VF), 109 IGB_DEVICE(82580_COPPER), 110 IGB_DEVICE(82580_FIBER), 111 IGB_DEVICE(82580_SERDES), 112 IGB_DEVICE(82580_SGMII), 113 IGB_DEVICE(82580_COPPER_DUAL), 114 IGB_DEVICE(82580_QUAD_FIBER), 115 IGB_DEVICE(DH89XXCC_SERDES), 116 IGB_DEVICE(DH89XXCC_SGMII), 117 IGB_DEVICE(DH89XXCC_SFP), 118 IGB_DEVICE(DH89XXCC_BACKPLANE), 119 IGB_DEVICE(I350_COPPER), 120 IGB_DEVICE(I350_FIBER), 121 IGB_DEVICE(I350_SERDES), 122 IGB_DEVICE(I350_SGMII), 123 IGB_DEVICE(I350_VF), 124 IGB_DEVICE(I210_COPPER), 125 IGB_DEVICE(I210_COPPER_IT), 126 IGB_DEVICE(I210_COPPER_OEM1), 127 IGB_DEVICE(I210_COPPER_FLASHLESS), 128 IGB_DEVICE(I210_SERDES_FLASHLESS), 129 IGB_DEVICE(I210_FIBER), 130 IGB_DEVICE(I210_SERDES), 131 IGB_DEVICE(I210_SGMII), 132 IGB_DEVICE(I211_COPPER), 133 IGB_DEVICE(I354_BACKPLANE_1GBPS), 134 IGB_DEVICE(I354_SGMII), 135 136 /* required last entry */ 137 IGB_DEVICE_NULL 138 }; 139 140 static int igb_probe(device_t); 141 static int igb_attach(device_t); 142 static int igb_detach(device_t); 143 static int igb_shutdown(device_t); 144 static int igb_suspend(device_t); 145 static int igb_resume(device_t); 146 147 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 148 static void igb_setup_ifp(struct igb_softc *); 149 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 150 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 151 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 152 static void igb_add_sysctl(struct igb_softc *); 153 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 154 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 155 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 156 static int igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 157 static int igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 158 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 159 static int igb_get_rxring_inuse(const struct igb_softc *, boolean_t); 160 static int igb_get_txring_inuse(const struct igb_softc *, boolean_t); 161 static void igb_set_timer_cpuid(struct igb_softc *, boolean_t); 162 #ifdef IFPOLL_ENABLE 163 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 164 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 165 #endif 166 167 static void igb_vf_init_stats(struct igb_softc *); 168 static void igb_reset(struct igb_softc *); 169 static void igb_update_stats_counters(struct igb_softc *); 170 static void igb_update_vf_stats_counters(struct igb_softc *); 171 static void igb_update_link_status(struct igb_softc *); 172 static void igb_init_tx_unit(struct igb_softc *); 173 static void igb_init_rx_unit(struct igb_softc *); 174 175 static void igb_set_vlan(struct igb_softc *); 176 static void igb_set_multi(struct igb_softc *); 177 static void igb_set_promisc(struct igb_softc *); 178 static void igb_disable_promisc(struct igb_softc *); 179 180 static int igb_alloc_rings(struct igb_softc *); 181 static void igb_free_rings(struct igb_softc *); 182 static int igb_create_tx_ring(struct igb_tx_ring *); 183 static int igb_create_rx_ring(struct igb_rx_ring *); 184 static void igb_free_tx_ring(struct igb_tx_ring *); 185 static void igb_free_rx_ring(struct igb_rx_ring *); 186 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 187 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 188 static void igb_init_tx_ring(struct igb_tx_ring *); 189 static int igb_init_rx_ring(struct igb_rx_ring *); 190 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 191 static int igb_encap(struct igb_tx_ring *, struct mbuf **, int *, int *); 192 static void igb_rx_refresh(struct igb_rx_ring *, int); 193 static void igb_setup_serializer(struct igb_softc *); 194 195 static void igb_stop(struct igb_softc *); 196 static void igb_init(void *); 197 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 198 static void igb_media_status(struct ifnet *, struct ifmediareq *); 199 static int igb_media_change(struct ifnet *); 200 static void igb_timer(void *); 201 static void igb_watchdog(struct ifaltq_subque *); 202 static void igb_start(struct ifnet *, struct ifaltq_subque *); 203 #ifdef IFPOLL_ENABLE 204 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 205 static void igb_npoll_rx(struct ifnet *, void *, int); 206 static void igb_npoll_tx(struct ifnet *, void *, int); 207 static void igb_npoll_status(struct ifnet *); 208 #endif 209 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 210 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 211 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 212 #ifdef INVARIANTS 213 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 214 boolean_t); 215 #endif 216 217 static void igb_intr(void *); 218 static void igb_intr_shared(void *); 219 static void igb_rxeof(struct igb_rx_ring *, int); 220 static void igb_txeof(struct igb_tx_ring *); 221 static void igb_set_eitr(struct igb_softc *, int, int); 222 static void igb_enable_intr(struct igb_softc *); 223 static void igb_disable_intr(struct igb_softc *); 224 static void igb_init_unshared_intr(struct igb_softc *); 225 static void igb_init_intr(struct igb_softc *); 226 static int igb_setup_intr(struct igb_softc *); 227 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 228 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 229 static void igb_set_intr_mask(struct igb_softc *); 230 static int igb_alloc_intr(struct igb_softc *); 231 static void igb_free_intr(struct igb_softc *); 232 static void igb_teardown_intr(struct igb_softc *); 233 static void igb_msix_try_alloc(struct igb_softc *); 234 static void igb_msix_rx_conf(struct igb_softc *, int, int *, int); 235 static void igb_msix_tx_conf(struct igb_softc *, int, int *, int); 236 static void igb_msix_free(struct igb_softc *, boolean_t); 237 static int igb_msix_setup(struct igb_softc *); 238 static void igb_msix_teardown(struct igb_softc *, int); 239 static void igb_msix_rx(void *); 240 static void igb_msix_tx(void *); 241 static void igb_msix_status(void *); 242 static void igb_msix_rxtx(void *); 243 244 /* Management and WOL Support */ 245 static void igb_get_mgmt(struct igb_softc *); 246 static void igb_rel_mgmt(struct igb_softc *); 247 static void igb_get_hw_control(struct igb_softc *); 248 static void igb_rel_hw_control(struct igb_softc *); 249 static void igb_enable_wol(device_t); 250 251 static device_method_t igb_methods[] = { 252 /* Device interface */ 253 DEVMETHOD(device_probe, igb_probe), 254 DEVMETHOD(device_attach, igb_attach), 255 DEVMETHOD(device_detach, igb_detach), 256 DEVMETHOD(device_shutdown, igb_shutdown), 257 DEVMETHOD(device_suspend, igb_suspend), 258 DEVMETHOD(device_resume, igb_resume), 259 DEVMETHOD_END 260 }; 261 262 static driver_t igb_driver = { 263 "igb", 264 igb_methods, 265 sizeof(struct igb_softc), 266 }; 267 268 static devclass_t igb_devclass; 269 270 DECLARE_DUMMY_MODULE(if_igb); 271 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 272 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 273 274 static int igb_rxd = IGB_DEFAULT_RXD; 275 static int igb_txd = IGB_DEFAULT_TXD; 276 static int igb_rxr = 0; 277 static int igb_txr = 0; 278 static int igb_msi_enable = 1; 279 static int igb_msix_enable = 1; 280 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 281 static int igb_fc_setting = e1000_fc_full; 282 283 /* 284 * DMA Coalescing, only for i350 - default to off, 285 * this feature is for power savings 286 */ 287 static int igb_dma_coalesce = 0; 288 289 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 290 TUNABLE_INT("hw.igb.txd", &igb_txd); 291 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 292 TUNABLE_INT("hw.igb.txr", &igb_txr); 293 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 294 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 295 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 296 297 /* i350 specific */ 298 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 299 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 300 301 static __inline void 302 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 303 { 304 /* Ignore Checksum bit is set */ 305 if (staterr & E1000_RXD_STAT_IXSM) 306 return; 307 308 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 309 E1000_RXD_STAT_IPCS) 310 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 311 312 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 313 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 314 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 315 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 316 mp->m_pkthdr.csum_data = htons(0xffff); 317 } 318 } 319 } 320 321 static __inline struct pktinfo * 322 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 323 uint32_t hash, uint32_t hashtype, uint32_t staterr) 324 { 325 switch (hashtype) { 326 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 327 pi->pi_netisr = NETISR_IP; 328 pi->pi_flags = 0; 329 pi->pi_l3proto = IPPROTO_TCP; 330 break; 331 332 case E1000_RXDADV_RSSTYPE_IPV4: 333 if (staterr & E1000_RXD_STAT_IXSM) 334 return NULL; 335 336 if ((staterr & 337 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 338 E1000_RXD_STAT_TCPCS) { 339 pi->pi_netisr = NETISR_IP; 340 pi->pi_flags = 0; 341 pi->pi_l3proto = IPPROTO_UDP; 342 break; 343 } 344 /* FALL THROUGH */ 345 default: 346 return NULL; 347 } 348 349 m->m_flags |= M_HASH; 350 m->m_pkthdr.hash = toeplitz_hash(hash); 351 return pi; 352 } 353 354 static int 355 igb_probe(device_t dev) 356 { 357 const struct igb_device *d; 358 uint16_t vid, did; 359 360 vid = pci_get_vendor(dev); 361 did = pci_get_device(dev); 362 363 for (d = igb_devices; d->desc != NULL; ++d) { 364 if (vid == d->vid && did == d->did) { 365 device_set_desc(dev, d->desc); 366 return 0; 367 } 368 } 369 return ENXIO; 370 } 371 372 static int 373 igb_attach(device_t dev) 374 { 375 struct igb_softc *sc = device_get_softc(dev); 376 uint16_t eeprom_data; 377 int error = 0, ring_max; 378 #ifdef IFPOLL_ENABLE 379 int offset, offset_def; 380 #endif 381 382 #ifdef notyet 383 /* SYSCTL stuff */ 384 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 385 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 386 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 387 igb_sysctl_nvm_info, "I", "NVM Information"); 388 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 389 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 390 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 391 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 392 #endif 393 394 callout_init_mp(&sc->timer); 395 lwkt_serialize_init(&sc->main_serialize); 396 397 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 398 device_get_unit(dev)); 399 sc->dev = sc->osdep.dev = dev; 400 401 /* 402 * Determine hardware and mac type 403 */ 404 sc->hw.vendor_id = pci_get_vendor(dev); 405 sc->hw.device_id = pci_get_device(dev); 406 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 407 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 408 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 409 410 if (e1000_set_mac_type(&sc->hw)) 411 return ENXIO; 412 413 /* Are we a VF device? */ 414 if (sc->hw.mac.type == e1000_vfadapt || 415 sc->hw.mac.type == e1000_vfadapt_i350) 416 sc->vf_ifp = 1; 417 else 418 sc->vf_ifp = 0; 419 420 /* 421 * Configure total supported RX/TX ring count 422 */ 423 switch (sc->hw.mac.type) { 424 case e1000_82575: 425 ring_max = IGB_MAX_RING_82575; 426 break; 427 428 case e1000_82576: 429 ring_max = IGB_MAX_RING_82576; 430 break; 431 432 case e1000_82580: 433 ring_max = IGB_MAX_RING_82580; 434 break; 435 436 case e1000_i350: 437 ring_max = IGB_MAX_RING_I350; 438 break; 439 440 case e1000_i354: 441 ring_max = IGB_MAX_RING_I354; 442 break; 443 444 case e1000_i210: 445 ring_max = IGB_MAX_RING_I210; 446 break; 447 448 case e1000_i211: 449 ring_max = IGB_MAX_RING_I211; 450 break; 451 452 default: 453 ring_max = IGB_MIN_RING; 454 break; 455 } 456 457 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 458 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 459 #ifdef IGB_RSS_DEBUG 460 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 461 #endif 462 sc->rx_ring_inuse = sc->rx_ring_cnt; 463 464 sc->tx_ring_cnt = device_getenv_int(dev, "txr", igb_txr); 465 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_max); 466 #ifdef IGB_TSS_DEBUG 467 sc->tx_ring_cnt = device_getenv_int(dev, "txr_debug", sc->tx_ring_cnt); 468 #endif 469 sc->tx_ring_inuse = sc->tx_ring_cnt; 470 471 /* Enable bus mastering */ 472 pci_enable_busmaster(dev); 473 474 /* 475 * Allocate IO memory 476 */ 477 sc->mem_rid = PCIR_BAR(0); 478 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 479 RF_ACTIVE); 480 if (sc->mem_res == NULL) { 481 device_printf(dev, "Unable to allocate bus resource: memory\n"); 482 error = ENXIO; 483 goto failed; 484 } 485 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 486 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 487 488 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 489 490 /* Save PCI command register for Shared Code */ 491 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 492 sc->hw.back = &sc->osdep; 493 494 /* Do Shared Code initialization */ 495 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 496 device_printf(dev, "Setup of Shared code failed\n"); 497 error = ENXIO; 498 goto failed; 499 } 500 501 e1000_get_bus_info(&sc->hw); 502 503 sc->hw.mac.autoneg = DO_AUTO_NEG; 504 sc->hw.phy.autoneg_wait_to_complete = FALSE; 505 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 506 507 /* Copper options */ 508 if (sc->hw.phy.media_type == e1000_media_type_copper) { 509 sc->hw.phy.mdix = AUTO_ALL_MODES; 510 sc->hw.phy.disable_polarity_correction = FALSE; 511 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 512 } 513 514 /* Set the frame limits assuming standard ethernet sized frames. */ 515 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 516 517 /* Allocate RX/TX rings */ 518 error = igb_alloc_rings(sc); 519 if (error) 520 goto failed; 521 522 #ifdef IFPOLL_ENABLE 523 /* 524 * NPOLLING RX CPU offset 525 */ 526 if (sc->rx_ring_cnt == ncpus2) { 527 offset = 0; 528 } else { 529 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 530 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 531 if (offset >= ncpus2 || 532 offset % sc->rx_ring_cnt != 0) { 533 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 534 offset, offset_def); 535 offset = offset_def; 536 } 537 } 538 sc->rx_npoll_off = offset; 539 540 /* 541 * NPOLLING TX CPU offset 542 */ 543 if (sc->tx_ring_cnt == ncpus2) { 544 offset = 0; 545 } else { 546 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 547 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 548 if (offset >= ncpus2 || 549 offset % sc->tx_ring_cnt != 0) { 550 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 551 offset, offset_def); 552 offset = offset_def; 553 } 554 } 555 sc->tx_npoll_off = offset; 556 #endif 557 558 /* Allocate interrupt */ 559 error = igb_alloc_intr(sc); 560 if (error) 561 goto failed; 562 563 /* Setup serializers */ 564 igb_setup_serializer(sc); 565 566 /* Allocate the appropriate stats memory */ 567 if (sc->vf_ifp) { 568 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 569 M_WAITOK | M_ZERO); 570 igb_vf_init_stats(sc); 571 } else { 572 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 573 M_WAITOK | M_ZERO); 574 } 575 576 /* Allocate multicast array memory. */ 577 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 578 M_DEVBUF, M_WAITOK); 579 580 /* Some adapter-specific advanced features */ 581 if (sc->hw.mac.type >= e1000_i350) { 582 #ifdef notyet 583 igb_set_sysctl_value(adapter, "dma_coalesce", 584 "configure dma coalesce", 585 &adapter->dma_coalesce, igb_dma_coalesce); 586 igb_set_sysctl_value(adapter, "eee_disabled", 587 "enable Energy Efficient Ethernet", 588 &adapter->hw.dev_spec._82575.eee_disable, 589 igb_eee_disabled); 590 #else 591 sc->dma_coalesce = igb_dma_coalesce; 592 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 593 #endif 594 if (sc->hw.phy.media_type == e1000_media_type_copper) { 595 if (sc->hw.mac.type == e1000_i354) 596 e1000_set_eee_i354(&sc->hw); 597 else 598 e1000_set_eee_i350(&sc->hw); 599 } 600 } 601 602 /* 603 * Start from a known state, this is important in reading the nvm and 604 * mac from that. 605 */ 606 e1000_reset_hw(&sc->hw); 607 608 /* Make sure we have a good EEPROM before we read from it */ 609 if (sc->hw.mac.type != e1000_i210 && sc->hw.mac.type != e1000_i211 && 610 e1000_validate_nvm_checksum(&sc->hw) < 0) { 611 /* 612 * Some PCI-E parts fail the first check due to 613 * the link being in sleep state, call it again, 614 * if it fails a second time its a real issue. 615 */ 616 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 617 device_printf(dev, 618 "The EEPROM Checksum Is Not Valid\n"); 619 error = EIO; 620 goto failed; 621 } 622 } 623 624 /* Copy the permanent MAC address out of the EEPROM */ 625 if (e1000_read_mac_addr(&sc->hw) < 0) { 626 device_printf(dev, "EEPROM read error while reading MAC" 627 " address\n"); 628 error = EIO; 629 goto failed; 630 } 631 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 632 device_printf(dev, "Invalid MAC address\n"); 633 error = EIO; 634 goto failed; 635 } 636 637 /* Setup OS specific network interface */ 638 igb_setup_ifp(sc); 639 640 /* Add sysctl tree, must after igb_setup_ifp() */ 641 igb_add_sysctl(sc); 642 643 /* Now get a good starting state */ 644 igb_reset(sc); 645 646 /* Initialize statistics */ 647 igb_update_stats_counters(sc); 648 649 sc->hw.mac.get_link_status = 1; 650 igb_update_link_status(sc); 651 652 /* Indicate SOL/IDER usage */ 653 if (e1000_check_reset_block(&sc->hw)) { 654 device_printf(dev, 655 "PHY reset is blocked due to SOL/IDER session.\n"); 656 } 657 658 /* Determine if we have to control management hardware */ 659 if (e1000_enable_mng_pass_thru(&sc->hw)) 660 sc->flags |= IGB_FLAG_HAS_MGMT; 661 662 /* 663 * Setup Wake-on-Lan 664 */ 665 /* APME bit in EEPROM is mapped to WUC.APME */ 666 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 667 if (eeprom_data) 668 sc->wol = E1000_WUFC_MAG; 669 /* XXX disable WOL */ 670 sc->wol = 0; 671 672 #ifdef notyet 673 /* Register for VLAN events */ 674 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 675 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 676 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 677 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 678 #endif 679 680 #ifdef notyet 681 igb_add_hw_stats(adapter); 682 #endif 683 684 /* 685 * Disable interrupt to prevent spurious interrupts (line based 686 * interrupt, MSI or even MSI-X), which had been observed on 687 * several types of LOMs, from being handled. 688 */ 689 igb_disable_intr(sc); 690 691 error = igb_setup_intr(sc); 692 if (error) { 693 ether_ifdetach(&sc->arpcom.ac_if); 694 goto failed; 695 } 696 return 0; 697 698 failed: 699 igb_detach(dev); 700 return error; 701 } 702 703 static int 704 igb_detach(device_t dev) 705 { 706 struct igb_softc *sc = device_get_softc(dev); 707 708 if (device_is_attached(dev)) { 709 struct ifnet *ifp = &sc->arpcom.ac_if; 710 711 ifnet_serialize_all(ifp); 712 713 igb_stop(sc); 714 715 e1000_phy_hw_reset(&sc->hw); 716 717 /* Give control back to firmware */ 718 igb_rel_mgmt(sc); 719 igb_rel_hw_control(sc); 720 721 if (sc->wol) { 722 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 723 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 724 igb_enable_wol(dev); 725 } 726 727 igb_teardown_intr(sc); 728 729 ifnet_deserialize_all(ifp); 730 731 ether_ifdetach(ifp); 732 } else if (sc->mem_res != NULL) { 733 igb_rel_hw_control(sc); 734 } 735 bus_generic_detach(dev); 736 737 if (sc->sysctl_tree != NULL) 738 sysctl_ctx_free(&sc->sysctl_ctx); 739 740 igb_free_intr(sc); 741 742 if (sc->msix_mem_res != NULL) { 743 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 744 sc->msix_mem_res); 745 } 746 if (sc->mem_res != NULL) { 747 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 748 sc->mem_res); 749 } 750 751 igb_free_rings(sc); 752 753 if (sc->mta != NULL) 754 kfree(sc->mta, M_DEVBUF); 755 if (sc->stats != NULL) 756 kfree(sc->stats, M_DEVBUF); 757 if (sc->serializes != NULL) 758 kfree(sc->serializes, M_DEVBUF); 759 760 return 0; 761 } 762 763 static int 764 igb_shutdown(device_t dev) 765 { 766 return igb_suspend(dev); 767 } 768 769 static int 770 igb_suspend(device_t dev) 771 { 772 struct igb_softc *sc = device_get_softc(dev); 773 struct ifnet *ifp = &sc->arpcom.ac_if; 774 775 ifnet_serialize_all(ifp); 776 777 igb_stop(sc); 778 779 igb_rel_mgmt(sc); 780 igb_rel_hw_control(sc); 781 782 if (sc->wol) { 783 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 784 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 785 igb_enable_wol(dev); 786 } 787 788 ifnet_deserialize_all(ifp); 789 790 return bus_generic_suspend(dev); 791 } 792 793 static int 794 igb_resume(device_t dev) 795 { 796 struct igb_softc *sc = device_get_softc(dev); 797 struct ifnet *ifp = &sc->arpcom.ac_if; 798 int i; 799 800 ifnet_serialize_all(ifp); 801 802 igb_init(sc); 803 igb_get_mgmt(sc); 804 805 for (i = 0; i < sc->tx_ring_inuse; ++i) 806 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 807 808 ifnet_deserialize_all(ifp); 809 810 return bus_generic_resume(dev); 811 } 812 813 static int 814 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 815 { 816 struct igb_softc *sc = ifp->if_softc; 817 struct ifreq *ifr = (struct ifreq *)data; 818 int max_frame_size, mask, reinit; 819 int error = 0; 820 821 ASSERT_IFNET_SERIALIZED_ALL(ifp); 822 823 switch (command) { 824 case SIOCSIFMTU: 825 max_frame_size = 9234; 826 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 827 ETHER_CRC_LEN) { 828 error = EINVAL; 829 break; 830 } 831 832 ifp->if_mtu = ifr->ifr_mtu; 833 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 834 ETHER_CRC_LEN; 835 836 if (ifp->if_flags & IFF_RUNNING) 837 igb_init(sc); 838 break; 839 840 case SIOCSIFFLAGS: 841 if (ifp->if_flags & IFF_UP) { 842 if (ifp->if_flags & IFF_RUNNING) { 843 if ((ifp->if_flags ^ sc->if_flags) & 844 (IFF_PROMISC | IFF_ALLMULTI)) { 845 igb_disable_promisc(sc); 846 igb_set_promisc(sc); 847 } 848 } else { 849 igb_init(sc); 850 } 851 } else if (ifp->if_flags & IFF_RUNNING) { 852 igb_stop(sc); 853 } 854 sc->if_flags = ifp->if_flags; 855 break; 856 857 case SIOCADDMULTI: 858 case SIOCDELMULTI: 859 if (ifp->if_flags & IFF_RUNNING) { 860 igb_disable_intr(sc); 861 igb_set_multi(sc); 862 #ifdef IFPOLL_ENABLE 863 if (!(ifp->if_flags & IFF_NPOLLING)) 864 #endif 865 igb_enable_intr(sc); 866 } 867 break; 868 869 case SIOCSIFMEDIA: 870 /* Check SOL/IDER usage */ 871 if (e1000_check_reset_block(&sc->hw)) { 872 if_printf(ifp, "Media change is " 873 "blocked due to SOL/IDER session.\n"); 874 break; 875 } 876 /* FALL THROUGH */ 877 878 case SIOCGIFMEDIA: 879 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 880 break; 881 882 case SIOCSIFCAP: 883 reinit = 0; 884 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 885 if (mask & IFCAP_RXCSUM) { 886 ifp->if_capenable ^= IFCAP_RXCSUM; 887 reinit = 1; 888 } 889 if (mask & IFCAP_VLAN_HWTAGGING) { 890 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 891 reinit = 1; 892 } 893 if (mask & IFCAP_TXCSUM) { 894 ifp->if_capenable ^= IFCAP_TXCSUM; 895 if (ifp->if_capenable & IFCAP_TXCSUM) 896 ifp->if_hwassist |= IGB_CSUM_FEATURES; 897 else 898 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 899 } 900 if (mask & IFCAP_TSO) { 901 ifp->if_capenable ^= IFCAP_TSO; 902 if (ifp->if_capenable & IFCAP_TSO) 903 ifp->if_hwassist |= CSUM_TSO; 904 else 905 ifp->if_hwassist &= ~CSUM_TSO; 906 } 907 if (mask & IFCAP_RSS) 908 ifp->if_capenable ^= IFCAP_RSS; 909 if (reinit && (ifp->if_flags & IFF_RUNNING)) 910 igb_init(sc); 911 break; 912 913 default: 914 error = ether_ioctl(ifp, command, data); 915 break; 916 } 917 return error; 918 } 919 920 static void 921 igb_init(void *xsc) 922 { 923 struct igb_softc *sc = xsc; 924 struct ifnet *ifp = &sc->arpcom.ac_if; 925 boolean_t polling; 926 int i; 927 928 ASSERT_IFNET_SERIALIZED_ALL(ifp); 929 930 igb_stop(sc); 931 932 /* Get the latest mac address, User can use a LAA */ 933 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 934 935 /* Put the address into the Receive Address Array */ 936 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 937 938 igb_reset(sc); 939 igb_update_link_status(sc); 940 941 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 942 943 /* Configure for OS presence */ 944 igb_get_mgmt(sc); 945 946 polling = FALSE; 947 #ifdef IFPOLL_ENABLE 948 if (ifp->if_flags & IFF_NPOLLING) 949 polling = TRUE; 950 #endif 951 952 /* Configured used RX/TX rings */ 953 igb_set_ring_inuse(sc, polling); 954 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 955 956 /* Initialize interrupt */ 957 igb_init_intr(sc); 958 959 /* Prepare transmit descriptors and buffers */ 960 for (i = 0; i < sc->tx_ring_inuse; ++i) 961 igb_init_tx_ring(&sc->tx_rings[i]); 962 igb_init_tx_unit(sc); 963 964 /* Setup Multicast table */ 965 igb_set_multi(sc); 966 967 #if 0 968 /* 969 * Figure out the desired mbuf pool 970 * for doing jumbo/packetsplit 971 */ 972 if (adapter->max_frame_size <= 2048) 973 adapter->rx_mbuf_sz = MCLBYTES; 974 else if (adapter->max_frame_size <= 4096) 975 adapter->rx_mbuf_sz = MJUMPAGESIZE; 976 else 977 adapter->rx_mbuf_sz = MJUM9BYTES; 978 #endif 979 980 /* Prepare receive descriptors and buffers */ 981 for (i = 0; i < sc->rx_ring_inuse; ++i) { 982 int error; 983 984 error = igb_init_rx_ring(&sc->rx_rings[i]); 985 if (error) { 986 if_printf(ifp, "Could not setup receive structures\n"); 987 igb_stop(sc); 988 return; 989 } 990 } 991 igb_init_rx_unit(sc); 992 993 /* Enable VLAN support */ 994 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 995 igb_set_vlan(sc); 996 997 /* Don't lose promiscuous settings */ 998 igb_set_promisc(sc); 999 1000 ifp->if_flags |= IFF_RUNNING; 1001 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1002 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1003 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 1004 } 1005 1006 igb_set_timer_cpuid(sc, polling); 1007 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1008 e1000_clear_hw_cntrs_base_generic(&sc->hw); 1009 1010 /* This clears any pending interrupts */ 1011 E1000_READ_REG(&sc->hw, E1000_ICR); 1012 1013 /* 1014 * Only enable interrupts if we are not polling, make sure 1015 * they are off otherwise. 1016 */ 1017 if (polling) { 1018 igb_disable_intr(sc); 1019 } else { 1020 igb_enable_intr(sc); 1021 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 1022 } 1023 1024 /* Set Energy Efficient Ethernet */ 1025 if (sc->hw.phy.media_type == e1000_media_type_copper) { 1026 if (sc->hw.mac.type == e1000_i354) 1027 e1000_set_eee_i354(&sc->hw); 1028 else 1029 e1000_set_eee_i350(&sc->hw); 1030 } 1031 } 1032 1033 static void 1034 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1035 { 1036 struct igb_softc *sc = ifp->if_softc; 1037 1038 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1039 1040 if ((ifp->if_flags & IFF_RUNNING) == 0) 1041 sc->hw.mac.get_link_status = 1; 1042 igb_update_link_status(sc); 1043 1044 ifmr->ifm_status = IFM_AVALID; 1045 ifmr->ifm_active = IFM_ETHER; 1046 1047 if (!sc->link_active) 1048 return; 1049 1050 ifmr->ifm_status |= IFM_ACTIVE; 1051 1052 switch (sc->link_speed) { 1053 case 10: 1054 ifmr->ifm_active |= IFM_10_T; 1055 break; 1056 1057 case 100: 1058 /* 1059 * Support for 100Mb SFP - these are Fiber 1060 * but the media type appears as serdes 1061 */ 1062 if (sc->hw.phy.media_type == e1000_media_type_internal_serdes) 1063 ifmr->ifm_active |= IFM_100_FX; 1064 else 1065 ifmr->ifm_active |= IFM_100_TX; 1066 break; 1067 1068 case 1000: 1069 ifmr->ifm_active |= IFM_1000_T; 1070 break; 1071 } 1072 1073 if (sc->link_duplex == FULL_DUPLEX) 1074 ifmr->ifm_active |= IFM_FDX; 1075 else 1076 ifmr->ifm_active |= IFM_HDX; 1077 } 1078 1079 static int 1080 igb_media_change(struct ifnet *ifp) 1081 { 1082 struct igb_softc *sc = ifp->if_softc; 1083 struct ifmedia *ifm = &sc->media; 1084 1085 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1086 1087 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1088 return EINVAL; 1089 1090 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1091 case IFM_AUTO: 1092 sc->hw.mac.autoneg = DO_AUTO_NEG; 1093 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1094 break; 1095 1096 case IFM_1000_LX: 1097 case IFM_1000_SX: 1098 case IFM_1000_T: 1099 sc->hw.mac.autoneg = DO_AUTO_NEG; 1100 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1101 break; 1102 1103 case IFM_100_TX: 1104 sc->hw.mac.autoneg = FALSE; 1105 sc->hw.phy.autoneg_advertised = 0; 1106 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1107 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1108 else 1109 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1110 break; 1111 1112 case IFM_10_T: 1113 sc->hw.mac.autoneg = FALSE; 1114 sc->hw.phy.autoneg_advertised = 0; 1115 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1116 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1117 else 1118 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1119 break; 1120 1121 default: 1122 if_printf(ifp, "Unsupported media type\n"); 1123 break; 1124 } 1125 1126 igb_init(sc); 1127 1128 return 0; 1129 } 1130 1131 static void 1132 igb_set_promisc(struct igb_softc *sc) 1133 { 1134 struct ifnet *ifp = &sc->arpcom.ac_if; 1135 struct e1000_hw *hw = &sc->hw; 1136 uint32_t reg; 1137 1138 if (sc->vf_ifp) { 1139 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1140 return; 1141 } 1142 1143 reg = E1000_READ_REG(hw, E1000_RCTL); 1144 if (ifp->if_flags & IFF_PROMISC) { 1145 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1146 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1147 } else if (ifp->if_flags & IFF_ALLMULTI) { 1148 reg |= E1000_RCTL_MPE; 1149 reg &= ~E1000_RCTL_UPE; 1150 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1151 } 1152 } 1153 1154 static void 1155 igb_disable_promisc(struct igb_softc *sc) 1156 { 1157 struct e1000_hw *hw = &sc->hw; 1158 struct ifnet *ifp = &sc->arpcom.ac_if; 1159 uint32_t reg; 1160 int mcnt = 0; 1161 1162 if (sc->vf_ifp) { 1163 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1164 return; 1165 } 1166 reg = E1000_READ_REG(hw, E1000_RCTL); 1167 reg &= ~E1000_RCTL_UPE; 1168 if (ifp->if_flags & IFF_ALLMULTI) { 1169 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 1170 } else { 1171 struct ifmultiaddr *ifma; 1172 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1173 if (ifma->ifma_addr->sa_family != AF_LINK) 1174 continue; 1175 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1176 break; 1177 mcnt++; 1178 } 1179 } 1180 /* Don't disable if in MAX groups */ 1181 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 1182 reg &= ~E1000_RCTL_MPE; 1183 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1184 } 1185 1186 static void 1187 igb_set_multi(struct igb_softc *sc) 1188 { 1189 struct ifnet *ifp = &sc->arpcom.ac_if; 1190 struct ifmultiaddr *ifma; 1191 uint32_t reg_rctl = 0; 1192 uint8_t *mta; 1193 int mcnt = 0; 1194 1195 mta = sc->mta; 1196 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1197 1198 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1199 if (ifma->ifma_addr->sa_family != AF_LINK) 1200 continue; 1201 1202 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1203 break; 1204 1205 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1206 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1207 mcnt++; 1208 } 1209 1210 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1211 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1212 reg_rctl |= E1000_RCTL_MPE; 1213 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1214 } else { 1215 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1216 } 1217 } 1218 1219 static void 1220 igb_timer(void *xsc) 1221 { 1222 struct igb_softc *sc = xsc; 1223 1224 lwkt_serialize_enter(&sc->main_serialize); 1225 1226 igb_update_link_status(sc); 1227 igb_update_stats_counters(sc); 1228 1229 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1230 1231 lwkt_serialize_exit(&sc->main_serialize); 1232 } 1233 1234 static void 1235 igb_update_link_status(struct igb_softc *sc) 1236 { 1237 struct ifnet *ifp = &sc->arpcom.ac_if; 1238 struct e1000_hw *hw = &sc->hw; 1239 uint32_t link_check, thstat, ctrl; 1240 1241 link_check = thstat = ctrl = 0; 1242 1243 /* Get the cached link value or read for real */ 1244 switch (hw->phy.media_type) { 1245 case e1000_media_type_copper: 1246 if (hw->mac.get_link_status) { 1247 /* Do the work to read phy */ 1248 e1000_check_for_link(hw); 1249 link_check = !hw->mac.get_link_status; 1250 } else { 1251 link_check = TRUE; 1252 } 1253 break; 1254 1255 case e1000_media_type_fiber: 1256 e1000_check_for_link(hw); 1257 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1258 break; 1259 1260 case e1000_media_type_internal_serdes: 1261 e1000_check_for_link(hw); 1262 link_check = hw->mac.serdes_has_link; 1263 break; 1264 1265 /* VF device is type_unknown */ 1266 case e1000_media_type_unknown: 1267 e1000_check_for_link(hw); 1268 link_check = !hw->mac.get_link_status; 1269 /* Fall thru */ 1270 default: 1271 break; 1272 } 1273 1274 /* Check for thermal downshift or shutdown */ 1275 if (hw->mac.type == e1000_i350) { 1276 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1277 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1278 } 1279 1280 /* Now we check if a transition has happened */ 1281 if (link_check && sc->link_active == 0) { 1282 e1000_get_speed_and_duplex(hw, 1283 &sc->link_speed, &sc->link_duplex); 1284 if (bootverbose) { 1285 const char *flowctl; 1286 1287 /* Get the flow control for display */ 1288 switch (hw->fc.current_mode) { 1289 case e1000_fc_rx_pause: 1290 flowctl = "RX"; 1291 break; 1292 1293 case e1000_fc_tx_pause: 1294 flowctl = "TX"; 1295 break; 1296 1297 case e1000_fc_full: 1298 flowctl = "Full"; 1299 break; 1300 1301 default: 1302 flowctl = "None"; 1303 break; 1304 } 1305 1306 if_printf(ifp, "Link is up %d Mbps %s, " 1307 "Flow control: %s\n", 1308 sc->link_speed, 1309 sc->link_duplex == FULL_DUPLEX ? 1310 "Full Duplex" : "Half Duplex", 1311 flowctl); 1312 } 1313 sc->link_active = 1; 1314 1315 ifp->if_baudrate = sc->link_speed * 1000000; 1316 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1317 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1318 if_printf(ifp, "Link: thermal downshift\n"); 1319 /* Delay Link Up for Phy update */ 1320 if ((hw->mac.type == e1000_i210 || 1321 hw->mac.type == e1000_i211) && 1322 hw->phy.id == I210_I_PHY_ID) 1323 msec_delay(IGB_I210_LINK_DELAY); 1324 /* This can sleep */ 1325 ifp->if_link_state = LINK_STATE_UP; 1326 if_link_state_change(ifp); 1327 } else if (!link_check && sc->link_active == 1) { 1328 ifp->if_baudrate = sc->link_speed = 0; 1329 sc->link_duplex = 0; 1330 if (bootverbose) 1331 if_printf(ifp, "Link is Down\n"); 1332 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1333 (thstat & E1000_THSTAT_PWR_DOWN)) 1334 if_printf(ifp, "Link: thermal shutdown\n"); 1335 sc->link_active = 0; 1336 /* This can sleep */ 1337 ifp->if_link_state = LINK_STATE_DOWN; 1338 if_link_state_change(ifp); 1339 } 1340 } 1341 1342 static void 1343 igb_stop(struct igb_softc *sc) 1344 { 1345 struct ifnet *ifp = &sc->arpcom.ac_if; 1346 int i; 1347 1348 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1349 1350 igb_disable_intr(sc); 1351 1352 callout_stop(&sc->timer); 1353 1354 ifp->if_flags &= ~IFF_RUNNING; 1355 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1356 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1357 ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog); 1358 sc->tx_rings[i].tx_flags &= ~IGB_TXFLAG_ENABLED; 1359 } 1360 1361 e1000_reset_hw(&sc->hw); 1362 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1363 1364 e1000_led_off(&sc->hw); 1365 e1000_cleanup_led(&sc->hw); 1366 1367 for (i = 0; i < sc->tx_ring_cnt; ++i) 1368 igb_free_tx_ring(&sc->tx_rings[i]); 1369 for (i = 0; i < sc->rx_ring_cnt; ++i) 1370 igb_free_rx_ring(&sc->rx_rings[i]); 1371 } 1372 1373 static void 1374 igb_reset(struct igb_softc *sc) 1375 { 1376 struct ifnet *ifp = &sc->arpcom.ac_if; 1377 struct e1000_hw *hw = &sc->hw; 1378 struct e1000_fc_info *fc = &hw->fc; 1379 uint32_t pba = 0; 1380 uint16_t hwm; 1381 1382 /* Let the firmware know the OS is in control */ 1383 igb_get_hw_control(sc); 1384 1385 /* 1386 * Packet Buffer Allocation (PBA) 1387 * Writing PBA sets the receive portion of the buffer 1388 * the remainder is used for the transmit buffer. 1389 */ 1390 switch (hw->mac.type) { 1391 case e1000_82575: 1392 pba = E1000_PBA_32K; 1393 break; 1394 1395 case e1000_82576: 1396 case e1000_vfadapt: 1397 pba = E1000_READ_REG(hw, E1000_RXPBS); 1398 pba &= E1000_RXPBS_SIZE_MASK_82576; 1399 break; 1400 1401 case e1000_82580: 1402 case e1000_i350: 1403 case e1000_i354: 1404 case e1000_vfadapt_i350: 1405 pba = E1000_READ_REG(hw, E1000_RXPBS); 1406 pba = e1000_rxpbs_adjust_82580(pba); 1407 break; 1408 1409 case e1000_i210: 1410 case e1000_i211: 1411 pba = E1000_PBA_34K; 1412 break; 1413 1414 default: 1415 break; 1416 } 1417 1418 /* Special needs in case of Jumbo frames */ 1419 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1420 uint32_t tx_space, min_tx, min_rx; 1421 1422 pba = E1000_READ_REG(hw, E1000_PBA); 1423 tx_space = pba >> 16; 1424 pba &= 0xffff; 1425 1426 min_tx = (sc->max_frame_size + 1427 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1428 min_tx = roundup2(min_tx, 1024); 1429 min_tx >>= 10; 1430 min_rx = sc->max_frame_size; 1431 min_rx = roundup2(min_rx, 1024); 1432 min_rx >>= 10; 1433 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1434 pba = pba - (min_tx - tx_space); 1435 /* 1436 * if short on rx space, rx wins 1437 * and must trump tx adjustment 1438 */ 1439 if (pba < min_rx) 1440 pba = min_rx; 1441 } 1442 E1000_WRITE_REG(hw, E1000_PBA, pba); 1443 } 1444 1445 /* 1446 * These parameters control the automatic generation (Tx) and 1447 * response (Rx) to Ethernet PAUSE frames. 1448 * - High water mark should allow for at least two frames to be 1449 * received after sending an XOFF. 1450 * - Low water mark works best when it is very near the high water mark. 1451 * This allows the receiver to restart by sending XON when it has 1452 * drained a bit. 1453 */ 1454 hwm = min(((pba << 10) * 9 / 10), 1455 ((pba << 10) - 2 * sc->max_frame_size)); 1456 1457 if (hw->mac.type < e1000_82576) { 1458 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1459 fc->low_water = fc->high_water - 8; 1460 } else { 1461 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1462 fc->low_water = fc->high_water - 16; 1463 } 1464 fc->pause_time = IGB_FC_PAUSE_TIME; 1465 fc->send_xon = TRUE; 1466 fc->requested_mode = e1000_fc_default; 1467 1468 /* Issue a global reset */ 1469 e1000_reset_hw(hw); 1470 E1000_WRITE_REG(hw, E1000_WUC, 0); 1471 1472 if (e1000_init_hw(hw) < 0) 1473 if_printf(ifp, "Hardware Initialization Failed\n"); 1474 1475 /* Setup DMA Coalescing */ 1476 if (hw->mac.type > e1000_82580 && hw->mac.type != e1000_i211) { 1477 uint32_t dmac; 1478 uint32_t reg; 1479 1480 if (sc->dma_coalesce == 0) { 1481 /* 1482 * Disabled 1483 */ 1484 reg = E1000_READ_REG(hw, E1000_DMACR); 1485 reg &= ~E1000_DMACR_DMAC_EN; 1486 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1487 goto reset_out; 1488 } 1489 1490 /* Set starting thresholds */ 1491 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 1492 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1493 1494 hwm = 64 * pba - sc->max_frame_size / 16; 1495 if (hwm < 64 * (pba - 6)) 1496 hwm = 64 * (pba - 6); 1497 reg = E1000_READ_REG(hw, E1000_FCRTC); 1498 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 1499 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 1500 & E1000_FCRTC_RTH_COAL_MASK); 1501 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 1502 1503 dmac = pba - sc->max_frame_size / 512; 1504 if (dmac < pba - 10) 1505 dmac = pba - 10; 1506 reg = E1000_READ_REG(hw, E1000_DMACR); 1507 reg &= ~E1000_DMACR_DMACTHR_MASK; 1508 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 1509 & E1000_DMACR_DMACTHR_MASK); 1510 /* Transition to L0x or L1 if available.. */ 1511 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1512 /* timer = value in sc->dma_coalesce in 32usec intervals */ 1513 reg |= (sc->dma_coalesce >> 5); 1514 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1515 1516 /* Set the interval before transition */ 1517 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1518 reg |= 0x80000004; 1519 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1520 1521 /* Free space in tx packet buffer to wake from DMA coal */ 1522 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1523 (20480 - (2 * sc->max_frame_size)) >> 6); 1524 1525 /* Make low power state decision controlled by DMA coal */ 1526 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1527 reg &= ~E1000_PCIEMISC_LX_DECISION; 1528 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 1529 if_printf(ifp, "DMA Coalescing enabled\n"); 1530 } else if (hw->mac.type == e1000_82580) { 1531 uint32_t reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1532 1533 E1000_WRITE_REG(hw, E1000_DMACR, 0); 1534 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1535 reg & ~E1000_PCIEMISC_LX_DECISION); 1536 } 1537 1538 reset_out: 1539 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1540 e1000_get_phy_info(hw); 1541 e1000_check_for_link(hw); 1542 } 1543 1544 static void 1545 igb_setup_ifp(struct igb_softc *sc) 1546 { 1547 struct ifnet *ifp = &sc->arpcom.ac_if; 1548 int i; 1549 1550 ifp->if_softc = sc; 1551 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1552 ifp->if_init = igb_init; 1553 ifp->if_ioctl = igb_ioctl; 1554 ifp->if_start = igb_start; 1555 ifp->if_serialize = igb_serialize; 1556 ifp->if_deserialize = igb_deserialize; 1557 ifp->if_tryserialize = igb_tryserialize; 1558 #ifdef INVARIANTS 1559 ifp->if_serialize_assert = igb_serialize_assert; 1560 #endif 1561 #ifdef IFPOLL_ENABLE 1562 ifp->if_npoll = igb_npoll; 1563 #endif 1564 1565 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1566 ifq_set_ready(&ifp->if_snd); 1567 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1568 1569 ifp->if_mapsubq = ifq_mapsubq_mask; 1570 ifq_set_subq_mask(&ifp->if_snd, 0); 1571 1572 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1573 1574 ifp->if_capabilities = 1575 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1576 if (IGB_ENABLE_HWRSS(sc)) 1577 ifp->if_capabilities |= IFCAP_RSS; 1578 ifp->if_capenable = ifp->if_capabilities; 1579 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1580 1581 /* 1582 * Tell the upper layer(s) we support long frames 1583 */ 1584 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1585 1586 /* Setup TX rings and subqueues */ 1587 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1588 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 1589 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1590 1591 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 1592 ifsq_set_priv(ifsq, txr); 1593 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize); 1594 txr->ifsq = ifsq; 1595 1596 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, igb_watchdog); 1597 } 1598 1599 /* 1600 * Specify the media types supported by this adapter and register 1601 * callbacks to update media and link information 1602 */ 1603 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1604 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1605 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1606 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1607 0, NULL); 1608 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1609 } else { 1610 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1611 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1612 0, NULL); 1613 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1614 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1615 0, NULL); 1616 if (sc->hw.phy.type != e1000_phy_ife) { 1617 ifmedia_add(&sc->media, 1618 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1619 ifmedia_add(&sc->media, 1620 IFM_ETHER | IFM_1000_T, 0, NULL); 1621 } 1622 } 1623 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1624 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1625 } 1626 1627 static void 1628 igb_add_sysctl(struct igb_softc *sc) 1629 { 1630 char node[32]; 1631 int i; 1632 1633 sysctl_ctx_init(&sc->sysctl_ctx); 1634 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1635 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, 1636 device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1637 if (sc->sysctl_tree == NULL) { 1638 device_printf(sc->dev, "can't add sysctl node\n"); 1639 return; 1640 } 1641 1642 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1643 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1644 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1645 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1646 "# of RX rings used"); 1647 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1648 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 1649 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1650 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 1651 "# of TX rings used"); 1652 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1653 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1654 "# of RX descs"); 1655 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1656 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1657 "# of TX descs"); 1658 1659 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1660 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1661 SYSCTL_CHILDREN(sc->sysctl_tree), 1662 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1663 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1664 } else { 1665 for (i = 0; i < sc->msix_cnt; ++i) { 1666 struct igb_msix_data *msix = &sc->msix_data[i]; 1667 1668 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1669 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1670 SYSCTL_CHILDREN(sc->sysctl_tree), 1671 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1672 msix, 0, igb_sysctl_msix_rate, "I", 1673 msix->msix_rate_desc); 1674 } 1675 } 1676 1677 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1678 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1679 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1680 "# of segments per TX interrupt"); 1681 1682 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1683 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1684 sc, 0, igb_sysctl_tx_wreg_nsegs, "I", 1685 "# of segments sent before write to hardware register"); 1686 1687 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1688 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1689 sc, 0, igb_sysctl_rx_wreg_nsegs, "I", 1690 "# of segments received before write to hardware register"); 1691 1692 #ifdef IFPOLL_ENABLE 1693 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1694 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1695 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1696 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1697 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1698 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1699 #endif 1700 1701 #ifdef IGB_RSS_DEBUG 1702 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1703 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1704 "RSS debug level"); 1705 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1706 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1707 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1708 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1709 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1710 } 1711 #endif 1712 #ifdef IGB_TSS_DEBUG 1713 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1714 ksnprintf(node, sizeof(node), "tx%d_pkt", i); 1715 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1716 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1717 CTLFLAG_RW, &sc->tx_rings[i].tx_packets, "TXed packets"); 1718 } 1719 #endif 1720 } 1721 1722 static int 1723 igb_alloc_rings(struct igb_softc *sc) 1724 { 1725 int error, i; 1726 1727 /* 1728 * Create top level busdma tag 1729 */ 1730 error = bus_dma_tag_create(NULL, 1, 0, 1731 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1732 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1733 &sc->parent_tag); 1734 if (error) { 1735 device_printf(sc->dev, "could not create top level DMA tag\n"); 1736 return error; 1737 } 1738 1739 /* 1740 * Allocate TX descriptor rings and buffers 1741 */ 1742 sc->tx_rings = kmalloc_cachealign( 1743 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1744 M_DEVBUF, M_WAITOK | M_ZERO); 1745 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1746 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1747 1748 /* Set up some basics */ 1749 txr->sc = sc; 1750 txr->me = i; 1751 lwkt_serialize_init(&txr->tx_serialize); 1752 1753 error = igb_create_tx_ring(txr); 1754 if (error) 1755 return error; 1756 } 1757 1758 /* 1759 * Allocate RX descriptor rings and buffers 1760 */ 1761 sc->rx_rings = kmalloc_cachealign( 1762 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1763 M_DEVBUF, M_WAITOK | M_ZERO); 1764 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1765 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1766 1767 /* Set up some basics */ 1768 rxr->sc = sc; 1769 rxr->me = i; 1770 lwkt_serialize_init(&rxr->rx_serialize); 1771 1772 error = igb_create_rx_ring(rxr); 1773 if (error) 1774 return error; 1775 } 1776 1777 return 0; 1778 } 1779 1780 static void 1781 igb_free_rings(struct igb_softc *sc) 1782 { 1783 int i; 1784 1785 if (sc->tx_rings != NULL) { 1786 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1787 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1788 1789 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1790 } 1791 kfree(sc->tx_rings, M_DEVBUF); 1792 } 1793 1794 if (sc->rx_rings != NULL) { 1795 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1796 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1797 1798 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1799 } 1800 kfree(sc->rx_rings, M_DEVBUF); 1801 } 1802 } 1803 1804 static int 1805 igb_create_tx_ring(struct igb_tx_ring *txr) 1806 { 1807 int tsize, error, i, ntxd; 1808 1809 /* 1810 * Validate number of transmit descriptors. It must not exceed 1811 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1812 */ 1813 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1814 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1815 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1816 device_printf(txr->sc->dev, 1817 "Using %d TX descriptors instead of %d!\n", 1818 IGB_DEFAULT_TXD, ntxd); 1819 txr->num_tx_desc = IGB_DEFAULT_TXD; 1820 } else { 1821 txr->num_tx_desc = ntxd; 1822 } 1823 1824 /* 1825 * Allocate TX descriptor ring 1826 */ 1827 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1828 IGB_DBA_ALIGN); 1829 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1830 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1831 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1832 if (txr->txdma.dma_vaddr == NULL) { 1833 device_printf(txr->sc->dev, 1834 "Unable to allocate TX Descriptor memory\n"); 1835 return ENOMEM; 1836 } 1837 txr->tx_base = txr->txdma.dma_vaddr; 1838 bzero(txr->tx_base, tsize); 1839 1840 tsize = __VM_CACHELINE_ALIGN( 1841 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1842 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1843 1844 /* 1845 * Allocate TX head write-back buffer 1846 */ 1847 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1848 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1849 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1850 if (txr->tx_hdr == NULL) { 1851 device_printf(txr->sc->dev, 1852 "Unable to allocate TX head write-back buffer\n"); 1853 return ENOMEM; 1854 } 1855 1856 /* 1857 * Create DMA tag for TX buffers 1858 */ 1859 error = bus_dma_tag_create(txr->sc->parent_tag, 1860 1, 0, /* alignment, bounds */ 1861 BUS_SPACE_MAXADDR, /* lowaddr */ 1862 BUS_SPACE_MAXADDR, /* highaddr */ 1863 NULL, NULL, /* filter, filterarg */ 1864 IGB_TSO_SIZE, /* maxsize */ 1865 IGB_MAX_SCATTER, /* nsegments */ 1866 PAGE_SIZE, /* maxsegsize */ 1867 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1868 BUS_DMA_ONEBPAGE, /* flags */ 1869 &txr->tx_tag); 1870 if (error) { 1871 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1872 kfree(txr->tx_buf, M_DEVBUF); 1873 txr->tx_buf = NULL; 1874 return error; 1875 } 1876 1877 /* 1878 * Create DMA maps for TX buffers 1879 */ 1880 for (i = 0; i < txr->num_tx_desc; ++i) { 1881 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1882 1883 error = bus_dmamap_create(txr->tx_tag, 1884 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1885 if (error) { 1886 device_printf(txr->sc->dev, 1887 "Unable to create TX DMA map\n"); 1888 igb_destroy_tx_ring(txr, i); 1889 return error; 1890 } 1891 } 1892 1893 if (txr->sc->hw.mac.type == e1000_82575) 1894 txr->tx_flags |= IGB_TXFLAG_TSO_IPLEN0; 1895 1896 /* 1897 * Initialize various watermark 1898 */ 1899 txr->spare_desc = IGB_TX_SPARE; 1900 txr->intr_nsegs = txr->num_tx_desc / 16; 1901 txr->wreg_nsegs = IGB_DEF_TXWREG_NSEGS; 1902 txr->oact_hi_desc = txr->num_tx_desc / 2; 1903 txr->oact_lo_desc = txr->num_tx_desc / 8; 1904 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1905 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1906 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1907 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1908 1909 return 0; 1910 } 1911 1912 static void 1913 igb_free_tx_ring(struct igb_tx_ring *txr) 1914 { 1915 int i; 1916 1917 for (i = 0; i < txr->num_tx_desc; ++i) { 1918 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1919 1920 if (txbuf->m_head != NULL) { 1921 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1922 m_freem(txbuf->m_head); 1923 txbuf->m_head = NULL; 1924 } 1925 } 1926 } 1927 1928 static void 1929 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1930 { 1931 int i; 1932 1933 if (txr->txdma.dma_vaddr != NULL) { 1934 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1935 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1936 txr->txdma.dma_map); 1937 bus_dma_tag_destroy(txr->txdma.dma_tag); 1938 txr->txdma.dma_vaddr = NULL; 1939 } 1940 1941 if (txr->tx_hdr != NULL) { 1942 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1943 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1944 txr->tx_hdr_dmap); 1945 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1946 txr->tx_hdr = NULL; 1947 } 1948 1949 if (txr->tx_buf == NULL) 1950 return; 1951 1952 for (i = 0; i < ndesc; ++i) { 1953 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1954 1955 KKASSERT(txbuf->m_head == NULL); 1956 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1957 } 1958 bus_dma_tag_destroy(txr->tx_tag); 1959 1960 kfree(txr->tx_buf, M_DEVBUF); 1961 txr->tx_buf = NULL; 1962 } 1963 1964 static void 1965 igb_init_tx_ring(struct igb_tx_ring *txr) 1966 { 1967 /* Clear the old descriptor contents */ 1968 bzero(txr->tx_base, 1969 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1970 1971 /* Clear TX head write-back buffer */ 1972 *(txr->tx_hdr) = 0; 1973 1974 /* Reset indices */ 1975 txr->next_avail_desc = 0; 1976 txr->next_to_clean = 0; 1977 txr->tx_nsegs = 0; 1978 1979 /* Set number of descriptors available */ 1980 txr->tx_avail = txr->num_tx_desc; 1981 1982 /* Enable this TX ring */ 1983 txr->tx_flags |= IGB_TXFLAG_ENABLED; 1984 } 1985 1986 static void 1987 igb_init_tx_unit(struct igb_softc *sc) 1988 { 1989 struct e1000_hw *hw = &sc->hw; 1990 uint32_t tctl; 1991 int i; 1992 1993 /* Setup the Tx Descriptor Rings */ 1994 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1995 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1996 uint64_t bus_addr = txr->txdma.dma_paddr; 1997 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1998 uint32_t txdctl = 0; 1999 uint32_t dca_txctrl; 2000 2001 E1000_WRITE_REG(hw, E1000_TDLEN(i), 2002 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 2003 E1000_WRITE_REG(hw, E1000_TDBAH(i), 2004 (uint32_t)(bus_addr >> 32)); 2005 E1000_WRITE_REG(hw, E1000_TDBAL(i), 2006 (uint32_t)bus_addr); 2007 2008 /* Setup the HW Tx Head and Tail descriptor pointers */ 2009 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 2010 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 2011 2012 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 2013 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 2014 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 2015 2016 /* 2017 * Don't set WB_on_EITR: 2018 * - 82575 does not have it 2019 * - It almost has no effect on 82576, see: 2020 * 82576 specification update errata #26 2021 * - It causes unnecessary bus traffic 2022 */ 2023 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 2024 (uint32_t)(hdr_paddr >> 32)); 2025 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 2026 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 2027 2028 /* 2029 * WTHRESH is ignored by the hardware, since header 2030 * write back mode is used. 2031 */ 2032 txdctl |= IGB_TX_PTHRESH; 2033 txdctl |= IGB_TX_HTHRESH << 8; 2034 txdctl |= IGB_TX_WTHRESH << 16; 2035 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2036 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 2037 } 2038 2039 if (sc->vf_ifp) 2040 return; 2041 2042 e1000_config_collision_dist(hw); 2043 2044 /* Program the Transmit Control Register */ 2045 tctl = E1000_READ_REG(hw, E1000_TCTL); 2046 tctl &= ~E1000_TCTL_CT; 2047 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 2048 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 2049 2050 /* This write will effectively turn on the transmit unit. */ 2051 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 2052 } 2053 2054 static boolean_t 2055 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 2056 { 2057 struct e1000_adv_tx_context_desc *TXD; 2058 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 2059 int ehdrlen, ctxd, ip_hlen = 0; 2060 boolean_t offload = TRUE; 2061 2062 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 2063 offload = FALSE; 2064 2065 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 2066 2067 ctxd = txr->next_avail_desc; 2068 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2069 2070 /* 2071 * In advanced descriptors the vlan tag must 2072 * be placed into the context descriptor, thus 2073 * we need to be here just for that setup. 2074 */ 2075 if (mp->m_flags & M_VLANTAG) { 2076 uint16_t vlantag; 2077 2078 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 2079 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 2080 } else if (!offload) { 2081 return FALSE; 2082 } 2083 2084 ehdrlen = mp->m_pkthdr.csum_lhlen; 2085 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2086 2087 /* Set the ether header length */ 2088 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 2089 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2090 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 2091 ip_hlen = mp->m_pkthdr.csum_iphlen; 2092 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2093 } 2094 vlan_macip_lens |= ip_hlen; 2095 2096 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 2097 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2098 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 2099 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2100 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 2101 2102 /* 2103 * 82575 needs the TX context index added; the queue 2104 * index is used as TX context index here. 2105 */ 2106 if (txr->sc->hw.mac.type == e1000_82575) 2107 mss_l4len_idx = txr->me << 4; 2108 2109 /* Now copy bits into descriptor */ 2110 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2111 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2112 TXD->seqnum_seed = htole32(0); 2113 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2114 2115 /* We've consumed the first desc, adjust counters */ 2116 if (++ctxd == txr->num_tx_desc) 2117 ctxd = 0; 2118 txr->next_avail_desc = ctxd; 2119 --txr->tx_avail; 2120 2121 return offload; 2122 } 2123 2124 static void 2125 igb_txeof(struct igb_tx_ring *txr) 2126 { 2127 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 2128 int first, hdr, avail; 2129 2130 if (txr->tx_avail == txr->num_tx_desc) 2131 return; 2132 2133 first = txr->next_to_clean; 2134 hdr = *(txr->tx_hdr); 2135 2136 if (first == hdr) 2137 return; 2138 2139 avail = txr->tx_avail; 2140 while (first != hdr) { 2141 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 2142 2143 ++avail; 2144 if (txbuf->m_head) { 2145 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2146 m_freem(txbuf->m_head); 2147 txbuf->m_head = NULL; 2148 IFNET_STAT_INC(ifp, opackets, 1); 2149 } 2150 if (++first == txr->num_tx_desc) 2151 first = 0; 2152 } 2153 txr->next_to_clean = first; 2154 txr->tx_avail = avail; 2155 2156 /* 2157 * If we have a minimum free, clear OACTIVE 2158 * to tell the stack that it is OK to send packets. 2159 */ 2160 if (IGB_IS_NOT_OACTIVE(txr)) { 2161 ifsq_clr_oactive(txr->ifsq); 2162 2163 /* 2164 * We have enough TX descriptors, turn off 2165 * the watchdog. We allow small amount of 2166 * packets (roughly intr_nsegs) pending on 2167 * the transmit ring. 2168 */ 2169 txr->tx_watchdog.wd_timer = 0; 2170 } 2171 } 2172 2173 static int 2174 igb_create_rx_ring(struct igb_rx_ring *rxr) 2175 { 2176 int rsize, i, error, nrxd; 2177 2178 /* 2179 * Validate number of receive descriptors. It must not exceed 2180 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 2181 */ 2182 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 2183 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 2184 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 2185 device_printf(rxr->sc->dev, 2186 "Using %d RX descriptors instead of %d!\n", 2187 IGB_DEFAULT_RXD, nrxd); 2188 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2189 } else { 2190 rxr->num_rx_desc = nrxd; 2191 } 2192 2193 /* 2194 * Allocate RX descriptor ring 2195 */ 2196 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2197 IGB_DBA_ALIGN); 2198 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2199 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2200 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2201 &rxr->rxdma.dma_paddr); 2202 if (rxr->rxdma.dma_vaddr == NULL) { 2203 device_printf(rxr->sc->dev, 2204 "Unable to allocate RxDescriptor memory\n"); 2205 return ENOMEM; 2206 } 2207 rxr->rx_base = rxr->rxdma.dma_vaddr; 2208 bzero(rxr->rx_base, rsize); 2209 2210 rsize = __VM_CACHELINE_ALIGN( 2211 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2212 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2213 2214 /* 2215 * Create DMA tag for RX buffers 2216 */ 2217 error = bus_dma_tag_create(rxr->sc->parent_tag, 2218 1, 0, /* alignment, bounds */ 2219 BUS_SPACE_MAXADDR, /* lowaddr */ 2220 BUS_SPACE_MAXADDR, /* highaddr */ 2221 NULL, NULL, /* filter, filterarg */ 2222 MCLBYTES, /* maxsize */ 2223 1, /* nsegments */ 2224 MCLBYTES, /* maxsegsize */ 2225 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2226 &rxr->rx_tag); 2227 if (error) { 2228 device_printf(rxr->sc->dev, 2229 "Unable to create RX payload DMA tag\n"); 2230 kfree(rxr->rx_buf, M_DEVBUF); 2231 rxr->rx_buf = NULL; 2232 return error; 2233 } 2234 2235 /* 2236 * Create spare DMA map for RX buffers 2237 */ 2238 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2239 &rxr->rx_sparemap); 2240 if (error) { 2241 device_printf(rxr->sc->dev, 2242 "Unable to create spare RX DMA maps\n"); 2243 bus_dma_tag_destroy(rxr->rx_tag); 2244 kfree(rxr->rx_buf, M_DEVBUF); 2245 rxr->rx_buf = NULL; 2246 return error; 2247 } 2248 2249 /* 2250 * Create DMA maps for RX buffers 2251 */ 2252 for (i = 0; i < rxr->num_rx_desc; i++) { 2253 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2254 2255 error = bus_dmamap_create(rxr->rx_tag, 2256 BUS_DMA_WAITOK, &rxbuf->map); 2257 if (error) { 2258 device_printf(rxr->sc->dev, 2259 "Unable to create RX DMA maps\n"); 2260 igb_destroy_rx_ring(rxr, i); 2261 return error; 2262 } 2263 } 2264 2265 /* 2266 * Initialize various watermark 2267 */ 2268 rxr->wreg_nsegs = IGB_DEF_RXWREG_NSEGS; 2269 2270 return 0; 2271 } 2272 2273 static void 2274 igb_free_rx_ring(struct igb_rx_ring *rxr) 2275 { 2276 int i; 2277 2278 for (i = 0; i < rxr->num_rx_desc; ++i) { 2279 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2280 2281 if (rxbuf->m_head != NULL) { 2282 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2283 m_freem(rxbuf->m_head); 2284 rxbuf->m_head = NULL; 2285 } 2286 } 2287 2288 if (rxr->fmp != NULL) 2289 m_freem(rxr->fmp); 2290 rxr->fmp = NULL; 2291 rxr->lmp = NULL; 2292 } 2293 2294 static void 2295 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2296 { 2297 int i; 2298 2299 if (rxr->rxdma.dma_vaddr != NULL) { 2300 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2301 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2302 rxr->rxdma.dma_map); 2303 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2304 rxr->rxdma.dma_vaddr = NULL; 2305 } 2306 2307 if (rxr->rx_buf == NULL) 2308 return; 2309 2310 for (i = 0; i < ndesc; ++i) { 2311 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2312 2313 KKASSERT(rxbuf->m_head == NULL); 2314 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2315 } 2316 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2317 bus_dma_tag_destroy(rxr->rx_tag); 2318 2319 kfree(rxr->rx_buf, M_DEVBUF); 2320 rxr->rx_buf = NULL; 2321 } 2322 2323 static void 2324 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2325 { 2326 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2327 rxd->wb.upper.status_error = 0; 2328 } 2329 2330 static int 2331 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2332 { 2333 struct mbuf *m; 2334 bus_dma_segment_t seg; 2335 bus_dmamap_t map; 2336 struct igb_rx_buf *rxbuf; 2337 int error, nseg; 2338 2339 m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR); 2340 if (m == NULL) { 2341 if (wait) { 2342 if_printf(&rxr->sc->arpcom.ac_if, 2343 "Unable to allocate RX mbuf\n"); 2344 } 2345 return ENOBUFS; 2346 } 2347 m->m_len = m->m_pkthdr.len = MCLBYTES; 2348 2349 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2350 m_adj(m, ETHER_ALIGN); 2351 2352 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2353 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2354 if (error) { 2355 m_freem(m); 2356 if (wait) { 2357 if_printf(&rxr->sc->arpcom.ac_if, 2358 "Unable to load RX mbuf\n"); 2359 } 2360 return error; 2361 } 2362 2363 rxbuf = &rxr->rx_buf[i]; 2364 if (rxbuf->m_head != NULL) 2365 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2366 2367 map = rxbuf->map; 2368 rxbuf->map = rxr->rx_sparemap; 2369 rxr->rx_sparemap = map; 2370 2371 rxbuf->m_head = m; 2372 rxbuf->paddr = seg.ds_addr; 2373 2374 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2375 return 0; 2376 } 2377 2378 static int 2379 igb_init_rx_ring(struct igb_rx_ring *rxr) 2380 { 2381 int i; 2382 2383 /* Clear the ring contents */ 2384 bzero(rxr->rx_base, 2385 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2386 2387 /* Now replenish the ring mbufs */ 2388 for (i = 0; i < rxr->num_rx_desc; ++i) { 2389 int error; 2390 2391 error = igb_newbuf(rxr, i, TRUE); 2392 if (error) 2393 return error; 2394 } 2395 2396 /* Setup our descriptor indices */ 2397 rxr->next_to_check = 0; 2398 2399 rxr->fmp = NULL; 2400 rxr->lmp = NULL; 2401 rxr->discard = FALSE; 2402 2403 return 0; 2404 } 2405 2406 static void 2407 igb_init_rx_unit(struct igb_softc *sc) 2408 { 2409 struct ifnet *ifp = &sc->arpcom.ac_if; 2410 struct e1000_hw *hw = &sc->hw; 2411 uint32_t rctl, rxcsum, srrctl = 0; 2412 int i; 2413 2414 /* 2415 * Make sure receives are disabled while setting 2416 * up the descriptor ring 2417 */ 2418 rctl = E1000_READ_REG(hw, E1000_RCTL); 2419 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2420 2421 #if 0 2422 /* 2423 ** Set up for header split 2424 */ 2425 if (igb_header_split) { 2426 /* Use a standard mbuf for the header */ 2427 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2428 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2429 } else 2430 #endif 2431 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2432 2433 /* 2434 ** Set up for jumbo frames 2435 */ 2436 if (ifp->if_mtu > ETHERMTU) { 2437 rctl |= E1000_RCTL_LPE; 2438 #if 0 2439 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2440 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2441 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2442 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2443 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2444 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2445 } 2446 /* Set maximum packet len */ 2447 psize = adapter->max_frame_size; 2448 /* are we on a vlan? */ 2449 if (adapter->ifp->if_vlantrunk != NULL) 2450 psize += VLAN_TAG_SIZE; 2451 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2452 #else 2453 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2454 rctl |= E1000_RCTL_SZ_2048; 2455 #endif 2456 } else { 2457 rctl &= ~E1000_RCTL_LPE; 2458 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2459 rctl |= E1000_RCTL_SZ_2048; 2460 } 2461 2462 /* Setup the Base and Length of the Rx Descriptor Rings */ 2463 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2464 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2465 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2466 uint32_t rxdctl; 2467 2468 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2469 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2470 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2471 (uint32_t)(bus_addr >> 32)); 2472 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2473 (uint32_t)bus_addr); 2474 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2475 /* Enable this Queue */ 2476 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2477 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2478 rxdctl &= 0xFFF00000; 2479 rxdctl |= IGB_RX_PTHRESH; 2480 rxdctl |= IGB_RX_HTHRESH << 8; 2481 /* 2482 * Don't set WTHRESH to a value above 1 on 82576, see: 2483 * 82576 specification update errata #26 2484 */ 2485 rxdctl |= IGB_RX_WTHRESH << 16; 2486 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2487 } 2488 2489 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2490 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2491 2492 /* 2493 * Receive Checksum Offload for TCP and UDP 2494 * 2495 * Checksum offloading is also enabled if multiple receive 2496 * queue is to be supported, since we need it to figure out 2497 * fragments. 2498 */ 2499 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2500 /* 2501 * NOTE: 2502 * PCSD must be enabled to enable multiple 2503 * receive queues. 2504 */ 2505 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2506 E1000_RXCSUM_PCSD; 2507 } else { 2508 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2509 E1000_RXCSUM_PCSD); 2510 } 2511 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2512 2513 if (IGB_ENABLE_HWRSS(sc)) { 2514 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2515 uint32_t reta_shift; 2516 int j, r; 2517 2518 /* 2519 * NOTE: 2520 * When we reach here, RSS has already been disabled 2521 * in igb_stop(), so we could safely configure RSS key 2522 * and redirect table. 2523 */ 2524 2525 /* 2526 * Configure RSS key 2527 */ 2528 toeplitz_get_key(key, sizeof(key)); 2529 for (i = 0; i < IGB_NRSSRK; ++i) { 2530 uint32_t rssrk; 2531 2532 rssrk = IGB_RSSRK_VAL(key, i); 2533 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2534 2535 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2536 } 2537 2538 /* 2539 * Configure RSS redirect table in following fashion: 2540 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2541 */ 2542 reta_shift = IGB_RETA_SHIFT; 2543 if (hw->mac.type == e1000_82575) 2544 reta_shift = IGB_RETA_SHIFT_82575; 2545 2546 r = 0; 2547 for (j = 0; j < IGB_NRETA; ++j) { 2548 uint32_t reta = 0; 2549 2550 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2551 uint32_t q; 2552 2553 q = (r % sc->rx_ring_inuse) << reta_shift; 2554 reta |= q << (8 * i); 2555 ++r; 2556 } 2557 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2558 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2559 } 2560 2561 /* 2562 * Enable multiple receive queues. 2563 * Enable IPv4 RSS standard hash functions. 2564 * Disable RSS interrupt on 82575 2565 */ 2566 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2567 E1000_MRQC_ENABLE_RSS_4Q | 2568 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2569 E1000_MRQC_RSS_FIELD_IPV4); 2570 } 2571 2572 /* Setup the Receive Control Register */ 2573 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2574 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2575 E1000_RCTL_RDMTS_HALF | 2576 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2577 /* Strip CRC bytes. */ 2578 rctl |= E1000_RCTL_SECRC; 2579 /* Make sure VLAN Filters are off */ 2580 rctl &= ~E1000_RCTL_VFE; 2581 /* Don't store bad packets */ 2582 rctl &= ~E1000_RCTL_SBP; 2583 2584 /* Enable Receives */ 2585 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2586 2587 /* 2588 * Setup the HW Rx Head and Tail Descriptor Pointers 2589 * - needs to be after enable 2590 */ 2591 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2592 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2593 2594 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2595 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2596 } 2597 } 2598 2599 static void 2600 igb_rx_refresh(struct igb_rx_ring *rxr, int i) 2601 { 2602 if (--i < 0) 2603 i = rxr->num_rx_desc - 1; 2604 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2605 } 2606 2607 static void 2608 igb_rxeof(struct igb_rx_ring *rxr, int count) 2609 { 2610 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2611 union e1000_adv_rx_desc *cur; 2612 uint32_t staterr; 2613 int i, ncoll = 0; 2614 2615 i = rxr->next_to_check; 2616 cur = &rxr->rx_base[i]; 2617 staterr = le32toh(cur->wb.upper.status_error); 2618 2619 if ((staterr & E1000_RXD_STAT_DD) == 0) 2620 return; 2621 2622 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2623 struct pktinfo *pi = NULL, pi0; 2624 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2625 struct mbuf *m = NULL; 2626 boolean_t eop; 2627 2628 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2629 if (eop) 2630 --count; 2631 2632 ++ncoll; 2633 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2634 !rxr->discard) { 2635 struct mbuf *mp = rxbuf->m_head; 2636 uint32_t hash, hashtype; 2637 uint16_t vlan; 2638 int len; 2639 2640 len = le16toh(cur->wb.upper.length); 2641 if ((rxr->sc->hw.mac.type == e1000_i350 || 2642 rxr->sc->hw.mac.type == e1000_i354) && 2643 (staterr & E1000_RXDEXT_STATERR_LB)) 2644 vlan = be16toh(cur->wb.upper.vlan); 2645 else 2646 vlan = le16toh(cur->wb.upper.vlan); 2647 2648 hash = le32toh(cur->wb.lower.hi_dword.rss); 2649 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2650 E1000_RXDADV_RSSTYPE_MASK; 2651 2652 IGB_RSS_DPRINTF(rxr->sc, 10, 2653 "ring%d, hash 0x%08x, hashtype %u\n", 2654 rxr->me, hash, hashtype); 2655 2656 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2657 BUS_DMASYNC_POSTREAD); 2658 2659 if (igb_newbuf(rxr, i, FALSE) != 0) { 2660 IFNET_STAT_INC(ifp, iqdrops, 1); 2661 goto discard; 2662 } 2663 2664 mp->m_len = len; 2665 if (rxr->fmp == NULL) { 2666 mp->m_pkthdr.len = len; 2667 rxr->fmp = mp; 2668 rxr->lmp = mp; 2669 } else { 2670 rxr->lmp->m_next = mp; 2671 rxr->lmp = rxr->lmp->m_next; 2672 rxr->fmp->m_pkthdr.len += len; 2673 } 2674 2675 if (eop) { 2676 m = rxr->fmp; 2677 rxr->fmp = NULL; 2678 rxr->lmp = NULL; 2679 2680 m->m_pkthdr.rcvif = ifp; 2681 IFNET_STAT_INC(ifp, ipackets, 1); 2682 2683 if (ifp->if_capenable & IFCAP_RXCSUM) 2684 igb_rxcsum(staterr, m); 2685 2686 if (staterr & E1000_RXD_STAT_VP) { 2687 m->m_pkthdr.ether_vlantag = vlan; 2688 m->m_flags |= M_VLANTAG; 2689 } 2690 2691 if (ifp->if_capenable & IFCAP_RSS) { 2692 pi = igb_rssinfo(m, &pi0, 2693 hash, hashtype, staterr); 2694 } 2695 #ifdef IGB_RSS_DEBUG 2696 rxr->rx_packets++; 2697 #endif 2698 } 2699 } else { 2700 IFNET_STAT_INC(ifp, ierrors, 1); 2701 discard: 2702 igb_setup_rxdesc(cur, rxbuf); 2703 if (!eop) 2704 rxr->discard = TRUE; 2705 else 2706 rxr->discard = FALSE; 2707 if (rxr->fmp != NULL) { 2708 m_freem(rxr->fmp); 2709 rxr->fmp = NULL; 2710 rxr->lmp = NULL; 2711 } 2712 m = NULL; 2713 } 2714 2715 if (m != NULL) 2716 ether_input_pkt(ifp, m, pi); 2717 2718 /* Advance our pointers to the next descriptor. */ 2719 if (++i == rxr->num_rx_desc) 2720 i = 0; 2721 2722 if (ncoll >= rxr->wreg_nsegs) { 2723 igb_rx_refresh(rxr, i); 2724 ncoll = 0; 2725 } 2726 2727 cur = &rxr->rx_base[i]; 2728 staterr = le32toh(cur->wb.upper.status_error); 2729 } 2730 rxr->next_to_check = i; 2731 2732 if (ncoll > 0) 2733 igb_rx_refresh(rxr, i); 2734 } 2735 2736 2737 static void 2738 igb_set_vlan(struct igb_softc *sc) 2739 { 2740 struct e1000_hw *hw = &sc->hw; 2741 uint32_t reg; 2742 #if 0 2743 struct ifnet *ifp = sc->arpcom.ac_if; 2744 #endif 2745 2746 if (sc->vf_ifp) { 2747 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2748 return; 2749 } 2750 2751 reg = E1000_READ_REG(hw, E1000_CTRL); 2752 reg |= E1000_CTRL_VME; 2753 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2754 2755 #if 0 2756 /* Enable the Filter Table */ 2757 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2758 reg = E1000_READ_REG(hw, E1000_RCTL); 2759 reg &= ~E1000_RCTL_CFIEN; 2760 reg |= E1000_RCTL_VFE; 2761 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2762 } 2763 #endif 2764 2765 /* Update the frame size */ 2766 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2767 sc->max_frame_size + VLAN_TAG_SIZE); 2768 2769 #if 0 2770 /* Don't bother with table if no vlans */ 2771 if ((adapter->num_vlans == 0) || 2772 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2773 return; 2774 /* 2775 ** A soft reset zero's out the VFTA, so 2776 ** we need to repopulate it now. 2777 */ 2778 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2779 if (adapter->shadow_vfta[i] != 0) { 2780 if (adapter->vf_ifp) 2781 e1000_vfta_set_vf(hw, 2782 adapter->shadow_vfta[i], TRUE); 2783 else 2784 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2785 i, adapter->shadow_vfta[i]); 2786 } 2787 #endif 2788 } 2789 2790 static void 2791 igb_enable_intr(struct igb_softc *sc) 2792 { 2793 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2794 lwkt_serialize_handler_enable(&sc->main_serialize); 2795 } else { 2796 int i; 2797 2798 for (i = 0; i < sc->msix_cnt; ++i) { 2799 lwkt_serialize_handler_enable( 2800 sc->msix_data[i].msix_serialize); 2801 } 2802 } 2803 2804 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2805 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2806 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2807 else 2808 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2809 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2810 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2811 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2812 } else { 2813 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2814 } 2815 E1000_WRITE_FLUSH(&sc->hw); 2816 } 2817 2818 static void 2819 igb_disable_intr(struct igb_softc *sc) 2820 { 2821 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2822 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2823 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2824 } 2825 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2826 E1000_WRITE_FLUSH(&sc->hw); 2827 2828 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2829 lwkt_serialize_handler_disable(&sc->main_serialize); 2830 } else { 2831 int i; 2832 2833 for (i = 0; i < sc->msix_cnt; ++i) { 2834 lwkt_serialize_handler_disable( 2835 sc->msix_data[i].msix_serialize); 2836 } 2837 } 2838 } 2839 2840 /* 2841 * Bit of a misnomer, what this really means is 2842 * to enable OS management of the system... aka 2843 * to disable special hardware management features 2844 */ 2845 static void 2846 igb_get_mgmt(struct igb_softc *sc) 2847 { 2848 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2849 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2850 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2851 2852 /* disable hardware interception of ARP */ 2853 manc &= ~E1000_MANC_ARP_EN; 2854 2855 /* enable receiving management packets to the host */ 2856 manc |= E1000_MANC_EN_MNG2HOST; 2857 manc2h |= 1 << 5; /* Mng Port 623 */ 2858 manc2h |= 1 << 6; /* Mng Port 664 */ 2859 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2860 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2861 } 2862 } 2863 2864 /* 2865 * Give control back to hardware management controller 2866 * if there is one. 2867 */ 2868 static void 2869 igb_rel_mgmt(struct igb_softc *sc) 2870 { 2871 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2872 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2873 2874 /* Re-enable hardware interception of ARP */ 2875 manc |= E1000_MANC_ARP_EN; 2876 manc &= ~E1000_MANC_EN_MNG2HOST; 2877 2878 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2879 } 2880 } 2881 2882 /* 2883 * Sets CTRL_EXT:DRV_LOAD bit. 2884 * 2885 * For ASF and Pass Through versions of f/w this means that 2886 * the driver is loaded. 2887 */ 2888 static void 2889 igb_get_hw_control(struct igb_softc *sc) 2890 { 2891 uint32_t ctrl_ext; 2892 2893 if (sc->vf_ifp) 2894 return; 2895 2896 /* Let firmware know the driver has taken over */ 2897 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2898 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2899 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2900 } 2901 2902 /* 2903 * Resets CTRL_EXT:DRV_LOAD bit. 2904 * 2905 * For ASF and Pass Through versions of f/w this means that the 2906 * driver is no longer loaded. 2907 */ 2908 static void 2909 igb_rel_hw_control(struct igb_softc *sc) 2910 { 2911 uint32_t ctrl_ext; 2912 2913 if (sc->vf_ifp) 2914 return; 2915 2916 /* Let firmware taken over control of h/w */ 2917 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2918 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2919 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2920 } 2921 2922 static int 2923 igb_is_valid_ether_addr(const uint8_t *addr) 2924 { 2925 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2926 2927 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2928 return FALSE; 2929 return TRUE; 2930 } 2931 2932 /* 2933 * Enable PCI Wake On Lan capability 2934 */ 2935 static void 2936 igb_enable_wol(device_t dev) 2937 { 2938 uint16_t cap, status; 2939 uint8_t id; 2940 2941 /* First find the capabilities pointer*/ 2942 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2943 2944 /* Read the PM Capabilities */ 2945 id = pci_read_config(dev, cap, 1); 2946 if (id != PCIY_PMG) /* Something wrong */ 2947 return; 2948 2949 /* 2950 * OK, we have the power capabilities, 2951 * so now get the status register 2952 */ 2953 cap += PCIR_POWER_STATUS; 2954 status = pci_read_config(dev, cap, 2); 2955 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2956 pci_write_config(dev, cap, status, 2); 2957 } 2958 2959 static void 2960 igb_update_stats_counters(struct igb_softc *sc) 2961 { 2962 struct e1000_hw *hw = &sc->hw; 2963 struct e1000_hw_stats *stats; 2964 struct ifnet *ifp = &sc->arpcom.ac_if; 2965 2966 /* 2967 * The virtual function adapter has only a 2968 * small controlled set of stats, do only 2969 * those and return. 2970 */ 2971 if (sc->vf_ifp) { 2972 igb_update_vf_stats_counters(sc); 2973 return; 2974 } 2975 stats = sc->stats; 2976 2977 if (sc->hw.phy.media_type == e1000_media_type_copper || 2978 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2979 stats->symerrs += 2980 E1000_READ_REG(hw,E1000_SYMERRS); 2981 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2982 } 2983 2984 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2985 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2986 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2987 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2988 2989 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2990 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2991 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2992 stats->dc += E1000_READ_REG(hw, E1000_DC); 2993 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2994 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2995 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2996 2997 /* 2998 * For watchdog management we need to know if we have been 2999 * paused during the last interval, so capture that here. 3000 */ 3001 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 3002 stats->xoffrxc += sc->pause_frames; 3003 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 3004 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 3005 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 3006 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 3007 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 3008 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 3009 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 3010 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 3011 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 3012 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 3013 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 3014 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 3015 3016 /* For the 64-bit byte counters the low dword must be read first. */ 3017 /* Both registers clear on the read of the high dword */ 3018 3019 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 3020 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 3021 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 3022 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 3023 3024 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 3025 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 3026 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 3027 stats->roc += E1000_READ_REG(hw, E1000_ROC); 3028 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 3029 3030 stats->tor += E1000_READ_REG(hw, E1000_TORH); 3031 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 3032 3033 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 3034 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 3035 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 3036 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 3037 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 3038 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 3039 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 3040 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 3041 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 3042 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 3043 3044 /* Interrupt Counts */ 3045 3046 stats->iac += E1000_READ_REG(hw, E1000_IAC); 3047 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 3048 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 3049 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 3050 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 3051 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 3052 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 3053 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 3054 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 3055 3056 /* Host to Card Statistics */ 3057 3058 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 3059 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 3060 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 3061 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 3062 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 3063 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 3064 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 3065 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 3066 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 3067 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 3068 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 3069 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 3070 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 3071 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 3072 3073 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 3074 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 3075 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 3076 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 3077 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 3078 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 3079 3080 IFNET_STAT_SET(ifp, collisions, stats->colc); 3081 3082 /* Rx Errors */ 3083 IFNET_STAT_SET(ifp, ierrors, 3084 stats->rxerrc + stats->crcerrs + stats->algnerrc + 3085 stats->ruc + stats->roc + stats->mpc + stats->cexterr); 3086 3087 /* Tx Errors */ 3088 IFNET_STAT_SET(ifp, oerrors, 3089 stats->ecol + stats->latecol + sc->watchdog_events); 3090 3091 /* Driver specific counters */ 3092 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 3093 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 3094 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 3095 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 3096 sc->packet_buf_alloc_tx = 3097 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 3098 sc->packet_buf_alloc_rx = 3099 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 3100 } 3101 3102 static void 3103 igb_vf_init_stats(struct igb_softc *sc) 3104 { 3105 struct e1000_hw *hw = &sc->hw; 3106 struct e1000_vf_stats *stats; 3107 3108 stats = sc->stats; 3109 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 3110 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 3111 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 3112 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 3113 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 3114 } 3115 3116 static void 3117 igb_update_vf_stats_counters(struct igb_softc *sc) 3118 { 3119 struct e1000_hw *hw = &sc->hw; 3120 struct e1000_vf_stats *stats; 3121 3122 if (sc->link_speed == 0) 3123 return; 3124 3125 stats = sc->stats; 3126 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 3127 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 3128 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 3129 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 3130 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 3131 } 3132 3133 #ifdef IFPOLL_ENABLE 3134 3135 static void 3136 igb_npoll_status(struct ifnet *ifp) 3137 { 3138 struct igb_softc *sc = ifp->if_softc; 3139 uint32_t reg_icr; 3140 3141 ASSERT_SERIALIZED(&sc->main_serialize); 3142 3143 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3144 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3145 sc->hw.mac.get_link_status = 1; 3146 igb_update_link_status(sc); 3147 } 3148 } 3149 3150 static void 3151 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 3152 { 3153 struct igb_tx_ring *txr = arg; 3154 3155 ASSERT_SERIALIZED(&txr->tx_serialize); 3156 3157 igb_txeof(txr); 3158 if (!ifsq_is_empty(txr->ifsq)) 3159 ifsq_devstart(txr->ifsq); 3160 } 3161 3162 static void 3163 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 3164 { 3165 struct igb_rx_ring *rxr = arg; 3166 3167 ASSERT_SERIALIZED(&rxr->rx_serialize); 3168 3169 igb_rxeof(rxr, cycle); 3170 } 3171 3172 static void 3173 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 3174 { 3175 struct igb_softc *sc = ifp->if_softc; 3176 int i, txr_cnt, rxr_cnt; 3177 3178 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3179 3180 if (info) { 3181 int off; 3182 3183 info->ifpi_status.status_func = igb_npoll_status; 3184 info->ifpi_status.serializer = &sc->main_serialize; 3185 3186 txr_cnt = igb_get_txring_inuse(sc, TRUE); 3187 off = sc->tx_npoll_off; 3188 for (i = 0; i < txr_cnt; ++i) { 3189 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3190 int idx = i + off; 3191 3192 KKASSERT(idx < ncpus2); 3193 info->ifpi_tx[idx].poll_func = igb_npoll_tx; 3194 info->ifpi_tx[idx].arg = txr; 3195 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 3196 ifsq_set_cpuid(txr->ifsq, idx); 3197 } 3198 3199 rxr_cnt = igb_get_rxring_inuse(sc, TRUE); 3200 off = sc->rx_npoll_off; 3201 for (i = 0; i < rxr_cnt; ++i) { 3202 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3203 int idx = i + off; 3204 3205 KKASSERT(idx < ncpus2); 3206 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 3207 info->ifpi_rx[idx].arg = rxr; 3208 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 3209 } 3210 3211 if (ifp->if_flags & IFF_RUNNING) { 3212 if (rxr_cnt == sc->rx_ring_inuse && 3213 txr_cnt == sc->tx_ring_inuse) { 3214 igb_set_timer_cpuid(sc, TRUE); 3215 igb_disable_intr(sc); 3216 } else { 3217 igb_init(sc); 3218 } 3219 } 3220 } else { 3221 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3222 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3223 3224 ifsq_set_cpuid(txr->ifsq, txr->tx_intr_cpuid); 3225 } 3226 3227 if (ifp->if_flags & IFF_RUNNING) { 3228 txr_cnt = igb_get_txring_inuse(sc, FALSE); 3229 rxr_cnt = igb_get_rxring_inuse(sc, FALSE); 3230 3231 if (rxr_cnt == sc->rx_ring_inuse && 3232 txr_cnt == sc->tx_ring_inuse) { 3233 igb_set_timer_cpuid(sc, FALSE); 3234 igb_enable_intr(sc); 3235 } else { 3236 igb_init(sc); 3237 } 3238 } 3239 } 3240 } 3241 3242 #endif /* IFPOLL_ENABLE */ 3243 3244 static void 3245 igb_intr(void *xsc) 3246 { 3247 struct igb_softc *sc = xsc; 3248 struct ifnet *ifp = &sc->arpcom.ac_if; 3249 uint32_t eicr; 3250 3251 ASSERT_SERIALIZED(&sc->main_serialize); 3252 3253 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3254 3255 if (eicr == 0) 3256 return; 3257 3258 if (ifp->if_flags & IFF_RUNNING) { 3259 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3260 int i; 3261 3262 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3263 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3264 3265 if (eicr & rxr->rx_intr_mask) { 3266 lwkt_serialize_enter(&rxr->rx_serialize); 3267 igb_rxeof(rxr, -1); 3268 lwkt_serialize_exit(&rxr->rx_serialize); 3269 } 3270 } 3271 3272 if (eicr & txr->tx_intr_mask) { 3273 lwkt_serialize_enter(&txr->tx_serialize); 3274 igb_txeof(txr); 3275 if (!ifsq_is_empty(txr->ifsq)) 3276 ifsq_devstart(txr->ifsq); 3277 lwkt_serialize_exit(&txr->tx_serialize); 3278 } 3279 } 3280 3281 if (eicr & E1000_EICR_OTHER) { 3282 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3283 3284 /* Link status change */ 3285 if (icr & E1000_ICR_LSC) { 3286 sc->hw.mac.get_link_status = 1; 3287 igb_update_link_status(sc); 3288 } 3289 } 3290 3291 /* 3292 * Reading EICR has the side effect to clear interrupt mask, 3293 * so all interrupts need to be enabled here. 3294 */ 3295 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3296 } 3297 3298 static void 3299 igb_intr_shared(void *xsc) 3300 { 3301 struct igb_softc *sc = xsc; 3302 struct ifnet *ifp = &sc->arpcom.ac_if; 3303 uint32_t reg_icr; 3304 3305 ASSERT_SERIALIZED(&sc->main_serialize); 3306 3307 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3308 3309 /* Hot eject? */ 3310 if (reg_icr == 0xffffffff) 3311 return; 3312 3313 /* Definitely not our interrupt. */ 3314 if (reg_icr == 0x0) 3315 return; 3316 3317 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3318 return; 3319 3320 if (ifp->if_flags & IFF_RUNNING) { 3321 if (reg_icr & 3322 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3323 int i; 3324 3325 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3326 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3327 3328 lwkt_serialize_enter(&rxr->rx_serialize); 3329 igb_rxeof(rxr, -1); 3330 lwkt_serialize_exit(&rxr->rx_serialize); 3331 } 3332 } 3333 3334 if (reg_icr & E1000_ICR_TXDW) { 3335 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3336 3337 lwkt_serialize_enter(&txr->tx_serialize); 3338 igb_txeof(txr); 3339 if (!ifsq_is_empty(txr->ifsq)) 3340 ifsq_devstart(txr->ifsq); 3341 lwkt_serialize_exit(&txr->tx_serialize); 3342 } 3343 } 3344 3345 /* Link status change */ 3346 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3347 sc->hw.mac.get_link_status = 1; 3348 igb_update_link_status(sc); 3349 } 3350 3351 if (reg_icr & E1000_ICR_RXO) 3352 sc->rx_overruns++; 3353 } 3354 3355 static int 3356 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp, 3357 int *segs_used, int *idx) 3358 { 3359 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3360 bus_dmamap_t map; 3361 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3362 union e1000_adv_tx_desc *txd = NULL; 3363 struct mbuf *m_head = *m_headp; 3364 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3365 int maxsegs, nsegs, i, j, error; 3366 uint32_t hdrlen = 0; 3367 3368 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3369 error = igb_tso_pullup(txr, m_headp); 3370 if (error) 3371 return error; 3372 m_head = *m_headp; 3373 } 3374 3375 /* Set basic descriptor constants */ 3376 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3377 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3378 if (m_head->m_flags & M_VLANTAG) 3379 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3380 3381 /* 3382 * Map the packet for DMA. 3383 */ 3384 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3385 tx_buf_mapped = tx_buf; 3386 map = tx_buf->map; 3387 3388 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3389 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3390 if (maxsegs > IGB_MAX_SCATTER) 3391 maxsegs = IGB_MAX_SCATTER; 3392 3393 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3394 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3395 if (error) { 3396 if (error == ENOBUFS) 3397 txr->sc->mbuf_defrag_failed++; 3398 else 3399 txr->sc->no_tx_dma_setup++; 3400 3401 m_freem(*m_headp); 3402 *m_headp = NULL; 3403 return error; 3404 } 3405 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3406 3407 m_head = *m_headp; 3408 3409 /* 3410 * Set up the TX context descriptor, if any hardware offloading is 3411 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3412 * TX descriptor. 3413 * 3414 * Unlike these chips' predecessors (em/emx), TX context descriptor 3415 * will _not_ interfere TX data fetching pipelining. 3416 */ 3417 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3418 igb_tso_ctx(txr, m_head, &hdrlen); 3419 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3420 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3421 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3422 txr->tx_nsegs++; 3423 (*segs_used)++; 3424 } else if (igb_txcsum_ctx(txr, m_head)) { 3425 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3426 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3427 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3428 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3429 txr->tx_nsegs++; 3430 (*segs_used)++; 3431 } 3432 3433 *segs_used += nsegs; 3434 txr->tx_nsegs += nsegs; 3435 if (txr->tx_nsegs >= txr->intr_nsegs) { 3436 /* 3437 * Report Status (RS) is turned on every intr_nsegs 3438 * descriptors (roughly). 3439 */ 3440 txr->tx_nsegs = 0; 3441 cmd_rs = E1000_ADVTXD_DCMD_RS; 3442 } 3443 3444 /* Calculate payload length */ 3445 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3446 << E1000_ADVTXD_PAYLEN_SHIFT); 3447 3448 /* 3449 * 82575 needs the TX context index added; the queue 3450 * index is used as TX context index here. 3451 */ 3452 if (txr->sc->hw.mac.type == e1000_82575) 3453 olinfo_status |= txr->me << 4; 3454 3455 /* Set up our transmit descriptors */ 3456 i = txr->next_avail_desc; 3457 for (j = 0; j < nsegs; j++) { 3458 bus_size_t seg_len; 3459 bus_addr_t seg_addr; 3460 3461 tx_buf = &txr->tx_buf[i]; 3462 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3463 seg_addr = segs[j].ds_addr; 3464 seg_len = segs[j].ds_len; 3465 3466 txd->read.buffer_addr = htole64(seg_addr); 3467 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3468 txd->read.olinfo_status = htole32(olinfo_status); 3469 if (++i == txr->num_tx_desc) 3470 i = 0; 3471 tx_buf->m_head = NULL; 3472 } 3473 3474 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3475 txr->next_avail_desc = i; 3476 txr->tx_avail -= nsegs; 3477 3478 tx_buf->m_head = m_head; 3479 tx_buf_mapped->map = tx_buf->map; 3480 tx_buf->map = map; 3481 3482 /* 3483 * Last Descriptor of Packet needs End Of Packet (EOP) 3484 */ 3485 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3486 3487 /* 3488 * Defer TDT updating, until enough descrptors are setup 3489 */ 3490 *idx = i; 3491 #ifdef IGB_TSS_DEBUG 3492 ++txr->tx_packets; 3493 #endif 3494 3495 return 0; 3496 } 3497 3498 static void 3499 igb_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 3500 { 3501 struct igb_softc *sc = ifp->if_softc; 3502 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3503 struct mbuf *m_head; 3504 int idx = -1, nsegs = 0; 3505 3506 KKASSERT(txr->ifsq == ifsq); 3507 ASSERT_SERIALIZED(&txr->tx_serialize); 3508 3509 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 3510 return; 3511 3512 if (!sc->link_active || (txr->tx_flags & IGB_TXFLAG_ENABLED) == 0) { 3513 ifsq_purge(ifsq); 3514 return; 3515 } 3516 3517 if (!IGB_IS_NOT_OACTIVE(txr)) 3518 igb_txeof(txr); 3519 3520 while (!ifsq_is_empty(ifsq)) { 3521 if (IGB_IS_OACTIVE(txr)) { 3522 ifsq_set_oactive(ifsq); 3523 /* Set watchdog on */ 3524 txr->tx_watchdog.wd_timer = 5; 3525 break; 3526 } 3527 3528 m_head = ifsq_dequeue(ifsq); 3529 if (m_head == NULL) 3530 break; 3531 3532 if (igb_encap(txr, &m_head, &nsegs, &idx)) { 3533 IFNET_STAT_INC(ifp, oerrors, 1); 3534 continue; 3535 } 3536 3537 if (nsegs >= txr->wreg_nsegs) { 3538 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3539 idx = -1; 3540 nsegs = 0; 3541 } 3542 3543 /* Send a copy of the frame to the BPF listener */ 3544 ETHER_BPF_MTAP(ifp, m_head); 3545 } 3546 if (idx >= 0) 3547 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3548 } 3549 3550 static void 3551 igb_watchdog(struct ifaltq_subque *ifsq) 3552 { 3553 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3554 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3555 struct igb_softc *sc = ifp->if_softc; 3556 int i; 3557 3558 KKASSERT(txr->ifsq == ifsq); 3559 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3560 3561 /* 3562 * If flow control has paused us since last checking 3563 * it invalidates the watchdog timing, so dont run it. 3564 */ 3565 if (sc->pause_frames) { 3566 sc->pause_frames = 0; 3567 txr->tx_watchdog.wd_timer = 5; 3568 return; 3569 } 3570 3571 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3572 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3573 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3574 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3575 if_printf(ifp, "TX(%d) desc avail = %d, " 3576 "Next TX to Clean = %d\n", 3577 txr->me, txr->tx_avail, txr->next_to_clean); 3578 3579 IFNET_STAT_INC(ifp, oerrors, 1); 3580 sc->watchdog_events++; 3581 3582 igb_init(sc); 3583 for (i = 0; i < sc->tx_ring_inuse; ++i) 3584 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 3585 } 3586 3587 static void 3588 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3589 { 3590 uint32_t eitr = 0; 3591 3592 if (rate > 0) { 3593 if (sc->hw.mac.type == e1000_82575) { 3594 eitr = 1000000000 / 256 / rate; 3595 /* 3596 * NOTE: 3597 * Document is wrong on the 2 bits left shift 3598 */ 3599 } else { 3600 eitr = 1000000 / rate; 3601 eitr <<= IGB_EITR_INTVL_SHIFT; 3602 } 3603 3604 if (eitr == 0) { 3605 /* Don't disable it */ 3606 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3607 } else if (eitr > IGB_EITR_INTVL_MASK) { 3608 /* Don't allow it to be too large */ 3609 eitr = IGB_EITR_INTVL_MASK; 3610 } 3611 } 3612 if (sc->hw.mac.type == e1000_82575) 3613 eitr |= eitr << 16; 3614 else 3615 eitr |= E1000_EITR_CNT_IGNR; 3616 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3617 } 3618 3619 static int 3620 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3621 { 3622 struct igb_softc *sc = (void *)arg1; 3623 struct ifnet *ifp = &sc->arpcom.ac_if; 3624 int error, intr_rate; 3625 3626 intr_rate = sc->intr_rate; 3627 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3628 if (error || req->newptr == NULL) 3629 return error; 3630 if (intr_rate < 0) 3631 return EINVAL; 3632 3633 ifnet_serialize_all(ifp); 3634 3635 sc->intr_rate = intr_rate; 3636 if (ifp->if_flags & IFF_RUNNING) 3637 igb_set_eitr(sc, 0, sc->intr_rate); 3638 3639 if (bootverbose) 3640 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3641 3642 ifnet_deserialize_all(ifp); 3643 3644 return 0; 3645 } 3646 3647 static int 3648 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3649 { 3650 struct igb_msix_data *msix = (void *)arg1; 3651 struct igb_softc *sc = msix->msix_sc; 3652 struct ifnet *ifp = &sc->arpcom.ac_if; 3653 int error, msix_rate; 3654 3655 msix_rate = msix->msix_rate; 3656 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3657 if (error || req->newptr == NULL) 3658 return error; 3659 if (msix_rate < 0) 3660 return EINVAL; 3661 3662 lwkt_serialize_enter(msix->msix_serialize); 3663 3664 msix->msix_rate = msix_rate; 3665 if (ifp->if_flags & IFF_RUNNING) 3666 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3667 3668 if (bootverbose) { 3669 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3670 msix->msix_rate); 3671 } 3672 3673 lwkt_serialize_exit(msix->msix_serialize); 3674 3675 return 0; 3676 } 3677 3678 static int 3679 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3680 { 3681 struct igb_softc *sc = (void *)arg1; 3682 struct ifnet *ifp = &sc->arpcom.ac_if; 3683 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3684 int error, nsegs; 3685 3686 nsegs = txr->intr_nsegs; 3687 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3688 if (error || req->newptr == NULL) 3689 return error; 3690 if (nsegs <= 0) 3691 return EINVAL; 3692 3693 ifnet_serialize_all(ifp); 3694 3695 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3696 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3697 error = EINVAL; 3698 } else { 3699 int i; 3700 3701 error = 0; 3702 for (i = 0; i < sc->tx_ring_cnt; ++i) 3703 sc->tx_rings[i].intr_nsegs = nsegs; 3704 } 3705 3706 ifnet_deserialize_all(ifp); 3707 3708 return error; 3709 } 3710 3711 static int 3712 igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3713 { 3714 struct igb_softc *sc = (void *)arg1; 3715 struct ifnet *ifp = &sc->arpcom.ac_if; 3716 int error, nsegs, i; 3717 3718 nsegs = sc->rx_rings[0].wreg_nsegs; 3719 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3720 if (error || req->newptr == NULL) 3721 return error; 3722 3723 ifnet_serialize_all(ifp); 3724 for (i = 0; i < sc->rx_ring_cnt; ++i) 3725 sc->rx_rings[i].wreg_nsegs =nsegs; 3726 ifnet_deserialize_all(ifp); 3727 3728 return 0; 3729 } 3730 3731 static int 3732 igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3733 { 3734 struct igb_softc *sc = (void *)arg1; 3735 struct ifnet *ifp = &sc->arpcom.ac_if; 3736 int error, nsegs, i; 3737 3738 nsegs = sc->tx_rings[0].wreg_nsegs; 3739 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3740 if (error || req->newptr == NULL) 3741 return error; 3742 3743 ifnet_serialize_all(ifp); 3744 for (i = 0; i < sc->tx_ring_cnt; ++i) 3745 sc->tx_rings[i].wreg_nsegs =nsegs; 3746 ifnet_deserialize_all(ifp); 3747 3748 return 0; 3749 } 3750 3751 #ifdef IFPOLL_ENABLE 3752 3753 static int 3754 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3755 { 3756 struct igb_softc *sc = (void *)arg1; 3757 struct ifnet *ifp = &sc->arpcom.ac_if; 3758 int error, off; 3759 3760 off = sc->rx_npoll_off; 3761 error = sysctl_handle_int(oidp, &off, 0, req); 3762 if (error || req->newptr == NULL) 3763 return error; 3764 if (off < 0) 3765 return EINVAL; 3766 3767 ifnet_serialize_all(ifp); 3768 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3769 error = EINVAL; 3770 } else { 3771 error = 0; 3772 sc->rx_npoll_off = off; 3773 } 3774 ifnet_deserialize_all(ifp); 3775 3776 return error; 3777 } 3778 3779 static int 3780 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3781 { 3782 struct igb_softc *sc = (void *)arg1; 3783 struct ifnet *ifp = &sc->arpcom.ac_if; 3784 int error, off; 3785 3786 off = sc->tx_npoll_off; 3787 error = sysctl_handle_int(oidp, &off, 0, req); 3788 if (error || req->newptr == NULL) 3789 return error; 3790 if (off < 0) 3791 return EINVAL; 3792 3793 ifnet_serialize_all(ifp); 3794 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 3795 error = EINVAL; 3796 } else { 3797 error = 0; 3798 sc->tx_npoll_off = off; 3799 } 3800 ifnet_deserialize_all(ifp); 3801 3802 return error; 3803 } 3804 3805 #endif /* IFPOLL_ENABLE */ 3806 3807 static void 3808 igb_init_intr(struct igb_softc *sc) 3809 { 3810 igb_set_intr_mask(sc); 3811 3812 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3813 igb_init_unshared_intr(sc); 3814 3815 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3816 igb_set_eitr(sc, 0, sc->intr_rate); 3817 } else { 3818 int i; 3819 3820 for (i = 0; i < sc->msix_cnt; ++i) 3821 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3822 } 3823 } 3824 3825 static void 3826 igb_init_unshared_intr(struct igb_softc *sc) 3827 { 3828 struct e1000_hw *hw = &sc->hw; 3829 const struct igb_rx_ring *rxr; 3830 const struct igb_tx_ring *txr; 3831 uint32_t ivar, index; 3832 int i; 3833 3834 /* 3835 * Enable extended mode 3836 */ 3837 if (sc->hw.mac.type != e1000_82575) { 3838 uint32_t gpie; 3839 int ivar_max; 3840 3841 gpie = E1000_GPIE_NSICR; 3842 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3843 gpie |= E1000_GPIE_MSIX_MODE | 3844 E1000_GPIE_EIAME | 3845 E1000_GPIE_PBA; 3846 } 3847 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3848 3849 /* 3850 * Clear IVARs 3851 */ 3852 switch (sc->hw.mac.type) { 3853 case e1000_82576: 3854 ivar_max = IGB_MAX_IVAR_82576; 3855 break; 3856 3857 case e1000_82580: 3858 ivar_max = IGB_MAX_IVAR_82580; 3859 break; 3860 3861 case e1000_i350: 3862 ivar_max = IGB_MAX_IVAR_I350; 3863 break; 3864 3865 case e1000_i354: 3866 ivar_max = IGB_MAX_IVAR_I354; 3867 break; 3868 3869 case e1000_vfadapt: 3870 case e1000_vfadapt_i350: 3871 ivar_max = IGB_MAX_IVAR_VF; 3872 break; 3873 3874 case e1000_i210: 3875 ivar_max = IGB_MAX_IVAR_I210; 3876 break; 3877 3878 case e1000_i211: 3879 ivar_max = IGB_MAX_IVAR_I211; 3880 break; 3881 3882 default: 3883 panic("unknown mac type %d\n", sc->hw.mac.type); 3884 } 3885 for (i = 0; i < ivar_max; ++i) 3886 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3887 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3888 } else { 3889 uint32_t tmp; 3890 3891 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3892 ("82575 w/ MSI-X")); 3893 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3894 tmp |= E1000_CTRL_EXT_IRCA; 3895 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3896 } 3897 3898 /* 3899 * Map TX/RX interrupts to EICR 3900 */ 3901 switch (sc->hw.mac.type) { 3902 case e1000_82580: 3903 case e1000_i350: 3904 case e1000_i354: 3905 case e1000_vfadapt: 3906 case e1000_vfadapt_i350: 3907 case e1000_i210: 3908 case e1000_i211: 3909 /* RX entries */ 3910 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3911 rxr = &sc->rx_rings[i]; 3912 3913 index = i >> 1; 3914 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3915 3916 if (i & 1) { 3917 ivar &= 0xff00ffff; 3918 ivar |= 3919 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3920 } else { 3921 ivar &= 0xffffff00; 3922 ivar |= 3923 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3924 } 3925 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3926 } 3927 /* TX entries */ 3928 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3929 txr = &sc->tx_rings[i]; 3930 3931 index = i >> 1; 3932 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3933 3934 if (i & 1) { 3935 ivar &= 0x00ffffff; 3936 ivar |= 3937 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3938 } else { 3939 ivar &= 0xffff00ff; 3940 ivar |= 3941 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3942 } 3943 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3944 } 3945 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3946 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3947 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3948 } 3949 break; 3950 3951 case e1000_82576: 3952 /* RX entries */ 3953 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3954 rxr = &sc->rx_rings[i]; 3955 3956 index = i & 0x7; /* Each IVAR has two entries */ 3957 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3958 3959 if (i < 8) { 3960 ivar &= 0xffffff00; 3961 ivar |= 3962 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3963 } else { 3964 ivar &= 0xff00ffff; 3965 ivar |= 3966 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3967 } 3968 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3969 } 3970 /* TX entries */ 3971 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3972 txr = &sc->tx_rings[i]; 3973 3974 index = i & 0x7; /* Each IVAR has two entries */ 3975 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3976 3977 if (i < 8) { 3978 ivar &= 0xffff00ff; 3979 ivar |= 3980 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3981 } else { 3982 ivar &= 0x00ffffff; 3983 ivar |= 3984 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3985 } 3986 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3987 } 3988 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3989 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3990 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3991 } 3992 break; 3993 3994 case e1000_82575: 3995 /* 3996 * Enable necessary interrupt bits. 3997 * 3998 * The name of the register is confusing; in addition to 3999 * configuring the first vector of MSI-X, it also configures 4000 * which bits of EICR could be set by the hardware even when 4001 * MSI or line interrupt is used; it thus controls interrupt 4002 * generation. It MUST be configured explicitly; the default 4003 * value mentioned in the datasheet is wrong: RX queue0 and 4004 * TX queue0 are NOT enabled by default. 4005 */ 4006 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 4007 break; 4008 4009 default: 4010 panic("unknown mac type %d\n", sc->hw.mac.type); 4011 } 4012 } 4013 4014 static int 4015 igb_setup_intr(struct igb_softc *sc) 4016 { 4017 int error; 4018 4019 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4020 return igb_msix_setup(sc); 4021 4022 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 4023 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 4024 sc, &sc->intr_tag, &sc->main_serialize); 4025 if (error) { 4026 device_printf(sc->dev, "Failed to register interrupt handler"); 4027 return error; 4028 } 4029 return 0; 4030 } 4031 4032 static void 4033 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 4034 { 4035 if (txr->sc->hw.mac.type == e1000_82575) { 4036 txr->tx_intr_bit = 0; /* unused */ 4037 switch (txr->me) { 4038 case 0: 4039 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 4040 break; 4041 case 1: 4042 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 4043 break; 4044 case 2: 4045 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 4046 break; 4047 case 3: 4048 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 4049 break; 4050 default: 4051 panic("unsupported # of TX ring, %d\n", txr->me); 4052 } 4053 } else { 4054 int intr_bit = *intr_bit0; 4055 4056 txr->tx_intr_bit = intr_bit % intr_bitmax; 4057 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 4058 4059 *intr_bit0 = intr_bit + 1; 4060 } 4061 } 4062 4063 static void 4064 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 4065 { 4066 if (rxr->sc->hw.mac.type == e1000_82575) { 4067 rxr->rx_intr_bit = 0; /* unused */ 4068 switch (rxr->me) { 4069 case 0: 4070 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 4071 break; 4072 case 1: 4073 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 4074 break; 4075 case 2: 4076 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 4077 break; 4078 case 3: 4079 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 4080 break; 4081 default: 4082 panic("unsupported # of RX ring, %d\n", rxr->me); 4083 } 4084 } else { 4085 int intr_bit = *intr_bit0; 4086 4087 rxr->rx_intr_bit = intr_bit % intr_bitmax; 4088 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 4089 4090 *intr_bit0 = intr_bit + 1; 4091 } 4092 } 4093 4094 static void 4095 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4096 { 4097 struct igb_softc *sc = ifp->if_softc; 4098 4099 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, slz); 4100 } 4101 4102 static void 4103 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4104 { 4105 struct igb_softc *sc = ifp->if_softc; 4106 4107 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, slz); 4108 } 4109 4110 static int 4111 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4112 { 4113 struct igb_softc *sc = ifp->if_softc; 4114 4115 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 4116 slz); 4117 } 4118 4119 #ifdef INVARIANTS 4120 4121 static void 4122 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4123 boolean_t serialized) 4124 { 4125 struct igb_softc *sc = ifp->if_softc; 4126 4127 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 4128 slz, serialized); 4129 } 4130 4131 #endif /* INVARIANTS */ 4132 4133 static void 4134 igb_set_intr_mask(struct igb_softc *sc) 4135 { 4136 int i; 4137 4138 sc->intr_mask = sc->sts_intr_mask; 4139 for (i = 0; i < sc->rx_ring_inuse; ++i) 4140 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 4141 for (i = 0; i < sc->tx_ring_inuse; ++i) 4142 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 4143 if (bootverbose) { 4144 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 4145 sc->intr_mask); 4146 } 4147 } 4148 4149 static int 4150 igb_alloc_intr(struct igb_softc *sc) 4151 { 4152 int i, intr_bit, intr_bitmax; 4153 u_int intr_flags; 4154 4155 igb_msix_try_alloc(sc); 4156 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4157 goto done; 4158 4159 /* 4160 * Allocate MSI/legacy interrupt resource 4161 */ 4162 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 4163 &sc->intr_rid, &intr_flags); 4164 4165 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 4166 int unshared; 4167 4168 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 4169 if (!unshared) { 4170 sc->flags |= IGB_FLAG_SHARED_INTR; 4171 if (bootverbose) 4172 device_printf(sc->dev, "IRQ shared\n"); 4173 } else { 4174 intr_flags &= ~RF_SHAREABLE; 4175 if (bootverbose) 4176 device_printf(sc->dev, "IRQ unshared\n"); 4177 } 4178 } 4179 4180 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4181 &sc->intr_rid, intr_flags); 4182 if (sc->intr_res == NULL) { 4183 device_printf(sc->dev, "Unable to allocate bus resource: " 4184 "interrupt\n"); 4185 return ENXIO; 4186 } 4187 4188 for (i = 0; i < sc->tx_ring_cnt; ++i) 4189 sc->tx_rings[i].tx_intr_cpuid = rman_get_cpuid(sc->intr_res); 4190 4191 /* 4192 * Setup MSI/legacy interrupt mask 4193 */ 4194 switch (sc->hw.mac.type) { 4195 case e1000_82575: 4196 intr_bitmax = IGB_MAX_TXRXINT_82575; 4197 break; 4198 4199 case e1000_82576: 4200 intr_bitmax = IGB_MAX_TXRXINT_82576; 4201 break; 4202 4203 case e1000_82580: 4204 intr_bitmax = IGB_MAX_TXRXINT_82580; 4205 break; 4206 4207 case e1000_i350: 4208 intr_bitmax = IGB_MAX_TXRXINT_I350; 4209 break; 4210 4211 case e1000_i354: 4212 intr_bitmax = IGB_MAX_TXRXINT_I354; 4213 break; 4214 4215 case e1000_i210: 4216 intr_bitmax = IGB_MAX_TXRXINT_I210; 4217 break; 4218 4219 case e1000_i211: 4220 intr_bitmax = IGB_MAX_TXRXINT_I211; 4221 break; 4222 4223 default: 4224 intr_bitmax = IGB_MIN_TXRXINT; 4225 break; 4226 } 4227 intr_bit = 0; 4228 for (i = 0; i < sc->tx_ring_cnt; ++i) 4229 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 4230 for (i = 0; i < sc->rx_ring_cnt; ++i) 4231 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 4232 sc->sts_intr_bit = 0; 4233 sc->sts_intr_mask = E1000_EICR_OTHER; 4234 4235 /* Initialize interrupt rate */ 4236 sc->intr_rate = IGB_INTR_RATE; 4237 done: 4238 igb_set_ring_inuse(sc, FALSE); 4239 igb_set_intr_mask(sc); 4240 return 0; 4241 } 4242 4243 static void 4244 igb_free_intr(struct igb_softc *sc) 4245 { 4246 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4247 if (sc->intr_res != NULL) { 4248 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 4249 sc->intr_res); 4250 } 4251 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4252 pci_release_msi(sc->dev); 4253 } else { 4254 igb_msix_free(sc, TRUE); 4255 } 4256 } 4257 4258 static void 4259 igb_teardown_intr(struct igb_softc *sc) 4260 { 4261 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4262 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 4263 else 4264 igb_msix_teardown(sc, sc->msix_cnt); 4265 } 4266 4267 static void 4268 igb_msix_try_alloc(struct igb_softc *sc) 4269 { 4270 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4271 int i, x, error; 4272 int offset, offset_def; 4273 struct igb_msix_data *msix; 4274 boolean_t aggregate, setup = FALSE; 4275 4276 /* 4277 * Don't enable MSI-X on 82575, see: 4278 * 82575 specification update errata #25 4279 */ 4280 if (sc->hw.mac.type == e1000_82575) 4281 return; 4282 4283 /* Don't enable MSI-X on VF */ 4284 if (sc->vf_ifp) 4285 return; 4286 4287 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4288 igb_msix_enable); 4289 if (!msix_enable) 4290 return; 4291 4292 msix_cnt = pci_msix_count(sc->dev); 4293 #ifdef IGB_MSIX_DEBUG 4294 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4295 #endif 4296 if (msix_cnt <= 1) { 4297 /* One MSI-X model does not make sense */ 4298 return; 4299 } 4300 4301 i = 0; 4302 while ((1 << (i + 1)) <= msix_cnt) 4303 ++i; 4304 msix_cnt2 = 1 << i; 4305 4306 if (bootverbose) { 4307 device_printf(sc->dev, "MSI-X count %d/%d\n", 4308 msix_cnt2, msix_cnt); 4309 } 4310 4311 KKASSERT(msix_cnt2 <= msix_cnt); 4312 if (msix_cnt == msix_cnt2) { 4313 /* We need at least one MSI-X for link status */ 4314 msix_cnt2 >>= 1; 4315 if (msix_cnt2 <= 1) { 4316 /* One MSI-X for RX/TX does not make sense */ 4317 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4318 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4319 return; 4320 } 4321 KKASSERT(msix_cnt > msix_cnt2); 4322 4323 if (bootverbose) { 4324 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4325 msix_cnt2, msix_cnt); 4326 } 4327 } 4328 4329 sc->rx_ring_msix = sc->rx_ring_cnt; 4330 if (sc->rx_ring_msix > msix_cnt2) 4331 sc->rx_ring_msix = msix_cnt2; 4332 4333 sc->tx_ring_msix = sc->tx_ring_cnt; 4334 if (sc->tx_ring_msix > msix_cnt2) 4335 sc->tx_ring_msix = msix_cnt2; 4336 4337 if (msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4338 /* 4339 * Independent TX/RX MSI-X 4340 */ 4341 aggregate = FALSE; 4342 if (bootverbose) 4343 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4344 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4345 } else { 4346 /* 4347 * Aggregate TX/RX MSI-X 4348 */ 4349 aggregate = TRUE; 4350 if (bootverbose) 4351 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4352 alloc_cnt = msix_cnt2; 4353 if (alloc_cnt > ncpus2) 4354 alloc_cnt = ncpus2; 4355 if (sc->rx_ring_msix > alloc_cnt) 4356 sc->rx_ring_msix = alloc_cnt; 4357 if (sc->tx_ring_msix > alloc_cnt) 4358 sc->tx_ring_msix = alloc_cnt; 4359 } 4360 ++alloc_cnt; /* For link status */ 4361 4362 if (bootverbose) { 4363 device_printf(sc->dev, "MSI-X alloc %d, " 4364 "RX ring %d, TX ring %d\n", alloc_cnt, 4365 sc->rx_ring_msix, sc->tx_ring_msix); 4366 } 4367 4368 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4369 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4370 &sc->msix_mem_rid, RF_ACTIVE); 4371 if (sc->msix_mem_res == NULL) { 4372 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4373 return; 4374 } 4375 4376 sc->msix_cnt = alloc_cnt; 4377 sc->msix_data = kmalloc_cachealign( 4378 sizeof(struct igb_msix_data) * sc->msix_cnt, 4379 M_DEVBUF, M_WAITOK | M_ZERO); 4380 for (x = 0; x < sc->msix_cnt; ++x) { 4381 msix = &sc->msix_data[x]; 4382 4383 lwkt_serialize_init(&msix->msix_serialize0); 4384 msix->msix_sc = sc; 4385 msix->msix_rid = -1; 4386 msix->msix_vector = x; 4387 msix->msix_mask = 1 << msix->msix_vector; 4388 msix->msix_rate = IGB_INTR_RATE; 4389 } 4390 4391 x = 0; 4392 if (!aggregate) { 4393 /* 4394 * RX rings 4395 */ 4396 if (sc->rx_ring_msix == ncpus2) { 4397 offset = 0; 4398 } else { 4399 offset_def = (sc->rx_ring_msix * 4400 device_get_unit(sc->dev)) % ncpus2; 4401 4402 offset = device_getenv_int(sc->dev, 4403 "msix.rxoff", offset_def); 4404 if (offset >= ncpus2 || 4405 offset % sc->rx_ring_msix != 0) { 4406 device_printf(sc->dev, 4407 "invalid msix.rxoff %d, use %d\n", 4408 offset, offset_def); 4409 offset = offset_def; 4410 } 4411 } 4412 igb_msix_rx_conf(sc, 0, &x, offset); 4413 4414 /* 4415 * TX rings 4416 */ 4417 if (sc->tx_ring_msix == ncpus2) { 4418 offset = 0; 4419 } else { 4420 offset_def = (sc->tx_ring_msix * 4421 device_get_unit(sc->dev)) % ncpus2; 4422 4423 offset = device_getenv_int(sc->dev, 4424 "msix.txoff", offset_def); 4425 if (offset >= ncpus2 || 4426 offset % sc->tx_ring_msix != 0) { 4427 device_printf(sc->dev, 4428 "invalid msix.txoff %d, use %d\n", 4429 offset, offset_def); 4430 offset = offset_def; 4431 } 4432 } 4433 igb_msix_tx_conf(sc, 0, &x, offset); 4434 } else { 4435 int ring_agg, ring_max; 4436 4437 ring_agg = sc->rx_ring_msix; 4438 if (ring_agg > sc->tx_ring_msix) 4439 ring_agg = sc->tx_ring_msix; 4440 4441 ring_max = sc->rx_ring_msix; 4442 if (ring_max < sc->tx_ring_msix) 4443 ring_max = sc->tx_ring_msix; 4444 4445 if (ring_max == ncpus2) { 4446 offset = 0; 4447 } else { 4448 offset_def = (ring_max * device_get_unit(sc->dev)) % 4449 ncpus2; 4450 4451 offset = device_getenv_int(sc->dev, "msix.off", 4452 offset_def); 4453 if (offset >= ncpus2 || offset % ring_max != 0) { 4454 device_printf(sc->dev, 4455 "invalid msix.off %d, use %d\n", 4456 offset, offset_def); 4457 offset = offset_def; 4458 } 4459 } 4460 4461 for (i = 0; i < ring_agg; ++i) { 4462 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4463 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4464 4465 KKASSERT(x < sc->msix_cnt); 4466 msix = &sc->msix_data[x++]; 4467 4468 txr->tx_intr_bit = msix->msix_vector; 4469 txr->tx_intr_mask = msix->msix_mask; 4470 rxr->rx_intr_bit = msix->msix_vector; 4471 rxr->rx_intr_mask = msix->msix_mask; 4472 4473 msix->msix_serialize = &msix->msix_serialize0; 4474 msix->msix_func = igb_msix_rxtx; 4475 msix->msix_arg = msix; 4476 msix->msix_rx = rxr; 4477 msix->msix_tx = txr; 4478 4479 msix->msix_cpuid = i + offset; 4480 KKASSERT(msix->msix_cpuid < ncpus2); 4481 txr->tx_intr_cpuid = msix->msix_cpuid; 4482 4483 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4484 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4485 msix->msix_rate = IGB_MSIX_RX_RATE; 4486 ksnprintf(msix->msix_rate_desc, 4487 sizeof(msix->msix_rate_desc), 4488 "RXTX%d interrupt rate", i); 4489 } 4490 4491 if (ring_agg != ring_max) { 4492 if (ring_max == sc->tx_ring_msix) 4493 igb_msix_tx_conf(sc, i, &x, offset); 4494 else 4495 igb_msix_rx_conf(sc, i, &x, offset); 4496 } 4497 } 4498 4499 /* 4500 * Link status 4501 */ 4502 KKASSERT(x < sc->msix_cnt); 4503 msix = &sc->msix_data[x++]; 4504 sc->sts_intr_bit = msix->msix_vector; 4505 sc->sts_intr_mask = msix->msix_mask; 4506 4507 msix->msix_serialize = &sc->main_serialize; 4508 msix->msix_func = igb_msix_status; 4509 msix->msix_arg = sc; 4510 msix->msix_cpuid = 0; 4511 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4512 device_get_nameunit(sc->dev)); 4513 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4514 "status interrupt rate"); 4515 4516 KKASSERT(x == sc->msix_cnt); 4517 4518 error = pci_setup_msix(sc->dev); 4519 if (error) { 4520 device_printf(sc->dev, "Setup MSI-X failed\n"); 4521 goto back; 4522 } 4523 setup = TRUE; 4524 4525 for (i = 0; i < sc->msix_cnt; ++i) { 4526 msix = &sc->msix_data[i]; 4527 4528 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4529 &msix->msix_rid, msix->msix_cpuid); 4530 if (error) { 4531 device_printf(sc->dev, 4532 "Unable to allocate MSI-X %d on cpu%d\n", 4533 msix->msix_vector, msix->msix_cpuid); 4534 goto back; 4535 } 4536 4537 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4538 &msix->msix_rid, RF_ACTIVE); 4539 if (msix->msix_res == NULL) { 4540 device_printf(sc->dev, 4541 "Unable to allocate MSI-X %d resource\n", 4542 msix->msix_vector); 4543 error = ENOMEM; 4544 goto back; 4545 } 4546 } 4547 4548 pci_enable_msix(sc->dev); 4549 sc->intr_type = PCI_INTR_TYPE_MSIX; 4550 back: 4551 if (error) 4552 igb_msix_free(sc, setup); 4553 } 4554 4555 static void 4556 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4557 { 4558 int i; 4559 4560 KKASSERT(sc->msix_cnt > 1); 4561 4562 for (i = 0; i < sc->msix_cnt; ++i) { 4563 struct igb_msix_data *msix = &sc->msix_data[i]; 4564 4565 if (msix->msix_res != NULL) { 4566 bus_release_resource(sc->dev, SYS_RES_IRQ, 4567 msix->msix_rid, msix->msix_res); 4568 } 4569 if (msix->msix_rid >= 0) 4570 pci_release_msix_vector(sc->dev, msix->msix_rid); 4571 } 4572 if (setup) 4573 pci_teardown_msix(sc->dev); 4574 4575 sc->msix_cnt = 0; 4576 kfree(sc->msix_data, M_DEVBUF); 4577 sc->msix_data = NULL; 4578 } 4579 4580 static int 4581 igb_msix_setup(struct igb_softc *sc) 4582 { 4583 int i; 4584 4585 for (i = 0; i < sc->msix_cnt; ++i) { 4586 struct igb_msix_data *msix = &sc->msix_data[i]; 4587 int error; 4588 4589 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4590 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4591 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4592 if (error) { 4593 device_printf(sc->dev, "could not set up %s " 4594 "interrupt handler.\n", msix->msix_desc); 4595 igb_msix_teardown(sc, i); 4596 return error; 4597 } 4598 } 4599 return 0; 4600 } 4601 4602 static void 4603 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4604 { 4605 int i; 4606 4607 for (i = 0; i < msix_cnt; ++i) { 4608 struct igb_msix_data *msix = &sc->msix_data[i]; 4609 4610 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4611 } 4612 } 4613 4614 static void 4615 igb_msix_rx(void *arg) 4616 { 4617 struct igb_rx_ring *rxr = arg; 4618 4619 ASSERT_SERIALIZED(&rxr->rx_serialize); 4620 igb_rxeof(rxr, -1); 4621 4622 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4623 } 4624 4625 static void 4626 igb_msix_tx(void *arg) 4627 { 4628 struct igb_tx_ring *txr = arg; 4629 4630 ASSERT_SERIALIZED(&txr->tx_serialize); 4631 4632 igb_txeof(txr); 4633 if (!ifsq_is_empty(txr->ifsq)) 4634 ifsq_devstart(txr->ifsq); 4635 4636 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4637 } 4638 4639 static void 4640 igb_msix_status(void *arg) 4641 { 4642 struct igb_softc *sc = arg; 4643 uint32_t icr; 4644 4645 ASSERT_SERIALIZED(&sc->main_serialize); 4646 4647 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4648 if (icr & E1000_ICR_LSC) { 4649 sc->hw.mac.get_link_status = 1; 4650 igb_update_link_status(sc); 4651 } 4652 4653 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4654 } 4655 4656 static void 4657 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4658 { 4659 sc->rx_ring_inuse = igb_get_rxring_inuse(sc, polling); 4660 sc->tx_ring_inuse = igb_get_txring_inuse(sc, polling); 4661 if (bootverbose) { 4662 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d, TX rings %d/%d\n", 4663 sc->rx_ring_inuse, sc->rx_ring_cnt, 4664 sc->tx_ring_inuse, sc->tx_ring_cnt); 4665 } 4666 } 4667 4668 static int 4669 igb_get_rxring_inuse(const struct igb_softc *sc, boolean_t polling) 4670 { 4671 if (!IGB_ENABLE_HWRSS(sc)) 4672 return 1; 4673 4674 if (polling) 4675 return sc->rx_ring_cnt; 4676 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4677 return IGB_MIN_RING_RSS; 4678 else 4679 return sc->rx_ring_msix; 4680 } 4681 4682 static int 4683 igb_get_txring_inuse(const struct igb_softc *sc, boolean_t polling) 4684 { 4685 if (!IGB_ENABLE_HWTSS(sc)) 4686 return 1; 4687 4688 if (polling) 4689 return sc->tx_ring_cnt; 4690 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4691 return IGB_MIN_RING; 4692 else 4693 return sc->tx_ring_msix; 4694 } 4695 4696 static int 4697 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4698 { 4699 int hoff, iphlen, thoff; 4700 struct mbuf *m; 4701 4702 m = *mp; 4703 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4704 4705 iphlen = m->m_pkthdr.csum_iphlen; 4706 thoff = m->m_pkthdr.csum_thlen; 4707 hoff = m->m_pkthdr.csum_lhlen; 4708 4709 KASSERT(iphlen > 0, ("invalid ip hlen")); 4710 KASSERT(thoff > 0, ("invalid tcp hlen")); 4711 KASSERT(hoff > 0, ("invalid ether hlen")); 4712 4713 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4714 m = m_pullup(m, hoff + iphlen + thoff); 4715 if (m == NULL) { 4716 *mp = NULL; 4717 return ENOBUFS; 4718 } 4719 *mp = m; 4720 } 4721 if (txr->tx_flags & IGB_TXFLAG_TSO_IPLEN0) { 4722 struct ip *ip; 4723 4724 ip = mtodoff(m, struct ip *, hoff); 4725 ip->ip_len = 0; 4726 } 4727 4728 return 0; 4729 } 4730 4731 static void 4732 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4733 { 4734 struct e1000_adv_tx_context_desc *TXD; 4735 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4736 int hoff, ctxd, iphlen, thoff; 4737 4738 iphlen = m->m_pkthdr.csum_iphlen; 4739 thoff = m->m_pkthdr.csum_thlen; 4740 hoff = m->m_pkthdr.csum_lhlen; 4741 4742 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4743 4744 ctxd = txr->next_avail_desc; 4745 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4746 4747 if (m->m_flags & M_VLANTAG) { 4748 uint16_t vlantag; 4749 4750 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4751 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4752 } 4753 4754 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4755 vlan_macip_lens |= iphlen; 4756 4757 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4758 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4759 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4760 4761 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4762 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4763 4764 /* 4765 * 82575 needs the TX context index added; the queue 4766 * index is used as TX context index here. 4767 */ 4768 if (txr->sc->hw.mac.type == e1000_82575) 4769 mss_l4len_idx |= txr->me << 4; 4770 4771 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4772 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4773 TXD->seqnum_seed = htole32(0); 4774 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4775 4776 /* We've consumed the first desc, adjust counters */ 4777 if (++ctxd == txr->num_tx_desc) 4778 ctxd = 0; 4779 txr->next_avail_desc = ctxd; 4780 --txr->tx_avail; 4781 4782 *hlen = hoff + iphlen + thoff; 4783 } 4784 4785 static void 4786 igb_setup_serializer(struct igb_softc *sc) 4787 { 4788 const struct igb_msix_data *msix; 4789 int i, j; 4790 4791 /* 4792 * Allocate serializer array 4793 */ 4794 4795 /* Main + TX + RX */ 4796 sc->serialize_cnt = 1 + sc->tx_ring_cnt + sc->rx_ring_cnt; 4797 4798 /* Aggregate TX/RX MSI-X */ 4799 for (i = 0; i < sc->msix_cnt; ++i) { 4800 msix = &sc->msix_data[i]; 4801 if (msix->msix_serialize == &msix->msix_serialize0) 4802 sc->serialize_cnt++; 4803 } 4804 4805 sc->serializes = 4806 kmalloc(sc->serialize_cnt * sizeof(struct lwkt_serialize *), 4807 M_DEVBUF, M_WAITOK | M_ZERO); 4808 4809 /* 4810 * Setup serializers 4811 * 4812 * NOTE: Order is critical 4813 */ 4814 4815 i = 0; 4816 4817 KKASSERT(i < sc->serialize_cnt); 4818 sc->serializes[i++] = &sc->main_serialize; 4819 4820 for (j = 0; j < sc->msix_cnt; ++j) { 4821 msix = &sc->msix_data[j]; 4822 if (msix->msix_serialize == &msix->msix_serialize0) { 4823 KKASSERT(i < sc->serialize_cnt); 4824 sc->serializes[i++] = msix->msix_serialize; 4825 } 4826 } 4827 4828 for (j = 0; j < sc->tx_ring_cnt; ++j) { 4829 KKASSERT(i < sc->serialize_cnt); 4830 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 4831 } 4832 4833 for (j = 0; j < sc->rx_ring_cnt; ++j) { 4834 KKASSERT(i < sc->serialize_cnt); 4835 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 4836 } 4837 4838 KKASSERT(i == sc->serialize_cnt); 4839 } 4840 4841 static void 4842 igb_msix_rx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4843 { 4844 int x = *x0; 4845 4846 for (; i < sc->rx_ring_msix; ++i) { 4847 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4848 struct igb_msix_data *msix; 4849 4850 KKASSERT(x < sc->msix_cnt); 4851 msix = &sc->msix_data[x++]; 4852 4853 rxr->rx_intr_bit = msix->msix_vector; 4854 rxr->rx_intr_mask = msix->msix_mask; 4855 4856 msix->msix_serialize = &rxr->rx_serialize; 4857 msix->msix_func = igb_msix_rx; 4858 msix->msix_arg = rxr; 4859 4860 msix->msix_cpuid = i + offset; 4861 KKASSERT(msix->msix_cpuid < ncpus2); 4862 4863 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s rx%d", 4864 device_get_nameunit(sc->dev), i); 4865 4866 msix->msix_rate = IGB_MSIX_RX_RATE; 4867 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4868 "RX%d interrupt rate", i); 4869 } 4870 *x0 = x; 4871 } 4872 4873 static void 4874 igb_msix_tx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4875 { 4876 int x = *x0; 4877 4878 for (; i < sc->tx_ring_msix; ++i) { 4879 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4880 struct igb_msix_data *msix; 4881 4882 KKASSERT(x < sc->msix_cnt); 4883 msix = &sc->msix_data[x++]; 4884 4885 txr->tx_intr_bit = msix->msix_vector; 4886 txr->tx_intr_mask = msix->msix_mask; 4887 4888 msix->msix_serialize = &txr->tx_serialize; 4889 msix->msix_func = igb_msix_tx; 4890 msix->msix_arg = txr; 4891 4892 msix->msix_cpuid = i + offset; 4893 KKASSERT(msix->msix_cpuid < ncpus2); 4894 txr->tx_intr_cpuid = msix->msix_cpuid; 4895 4896 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s tx%d", 4897 device_get_nameunit(sc->dev), i); 4898 4899 msix->msix_rate = IGB_MSIX_TX_RATE; 4900 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4901 "TX%d interrupt rate", i); 4902 } 4903 *x0 = x; 4904 } 4905 4906 static void 4907 igb_msix_rxtx(void *arg) 4908 { 4909 struct igb_msix_data *msix = arg; 4910 struct igb_rx_ring *rxr = msix->msix_rx; 4911 struct igb_tx_ring *txr = msix->msix_tx; 4912 4913 ASSERT_SERIALIZED(&msix->msix_serialize0); 4914 4915 lwkt_serialize_enter(&rxr->rx_serialize); 4916 igb_rxeof(rxr, -1); 4917 lwkt_serialize_exit(&rxr->rx_serialize); 4918 4919 lwkt_serialize_enter(&txr->tx_serialize); 4920 igb_txeof(txr); 4921 if (!ifsq_is_empty(txr->ifsq)) 4922 ifsq_devstart(txr->ifsq); 4923 lwkt_serialize_exit(&txr->tx_serialize); 4924 4925 E1000_WRITE_REG(&msix->msix_sc->hw, E1000_EIMS, msix->msix_mask); 4926 } 4927 4928 static void 4929 igb_set_timer_cpuid(struct igb_softc *sc, boolean_t polling) 4930 { 4931 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4932 sc->timer_cpuid = 0; /* XXX fixed */ 4933 else 4934 sc->timer_cpuid = rman_get_cpuid(sc->intr_res); 4935 } 4936