1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* 33 * Copyright (c) 2014, Joyent, Inc. 34 * Copyright (c) 2016 by Delphix. All rights reserved. 35 */ 36 37 #define MXGEFW_NDIS 38 #include "myri10ge_var.h" 39 #include "rss_eth_z8e.h" 40 #include "rss_ethp_z8e.h" 41 #include "mcp_gen_header.h" 42 43 #define MYRI10GE_MAX_ETHER_MTU 9014 44 #define MYRI10GE_MAX_GLD_MTU 9000 45 #define MYRI10GE_MIN_GLD_MTU 1500 46 47 #define MYRI10GE_ETH_STOPPED 0 48 #define MYRI10GE_ETH_STOPPING 1 49 #define MYRI10GE_ETH_STARTING 2 50 #define MYRI10GE_ETH_RUNNING 3 51 #define MYRI10GE_ETH_OPEN_FAILED 4 52 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5 53 54 static int myri10ge_small_bytes = 510; 55 static int myri10ge_intr_coal_delay = 125; 56 static int myri10ge_flow_control = 1; 57 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 58 static int myri10ge_nvidia_ecrc_enable = 1; 59 #endif 60 static int myri10ge_mtu_override = 0; 61 static int myri10ge_tx_copylen = 512; 62 static int myri10ge_deassert_wait = 1; 63 static int myri10ge_verbose = 0; 64 static int myri10ge_watchdog_reset = 0; 65 static int myri10ge_use_msix = 1; 66 static int myri10ge_max_slices = -1; 67 static int myri10ge_use_msi = 1; 68 int myri10ge_force_firmware = 0; 69 static boolean_t myri10ge_use_lso = B_TRUE; 70 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 71 static int myri10ge_tx_hash = 1; 72 static int myri10ge_lro = 0; 73 static int myri10ge_lro_cnt = 8; 74 int myri10ge_lro_max_aggr = 2; 75 static int myri10ge_lso_copy = 0; 76 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp); 77 int myri10ge_tx_handles_initial = 128; 78 79 static kmutex_t myri10ge_param_lock; 80 static void* myri10ge_db_lastfree; 81 82 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 83 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 84 static int myri10ge_quiesce(dev_info_t *dip); 85 86 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach, 87 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce); 88 89 90 static struct modldrv modldrv = { 91 &mod_driverops, 92 "Myricom 10G driver (10GbE)", 93 &myri10ge_ops, 94 }; 95 96 97 static struct modlinkage modlinkage = { 98 MODREV_1, 99 {&modldrv, NULL}, 100 }; 101 102 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 103 104 static ddi_dma_attr_t myri10ge_misc_dma_attr = { 105 DMA_ATTR_V0, /* version number. */ 106 (uint64_t)0, /* low address */ 107 (uint64_t)0xffffffffffffffffULL, /* high address */ 108 (uint64_t)0x7ffffff, /* address counter max */ 109 (uint64_t)4096, /* alignment */ 110 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 111 (uint32_t)0x1, /* minimum transfer size */ 112 (uint64_t)0x7fffffff, /* maximum transfer size */ 113 (uint64_t)0x7fffffff, /* maximum segment size */ 114 1, /* scatter/gather list length */ 115 1, /* granularity */ 116 0 /* attribute flags */ 117 }; 118 119 /* 120 * The Myri10GE NIC has the following constraints on receive buffers: 121 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB 122 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary 123 */ 124 125 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = { 126 DMA_ATTR_V0, /* version number. */ 127 (uint64_t)0, /* low address */ 128 (uint64_t)0xffffffffffffffffULL, /* high address */ 129 (uint64_t)0x7ffffff, /* address counter max */ 130 (uint64_t)4096, /* alignment */ 131 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 132 (uint32_t)0x1, /* minimum transfer size */ 133 (uint64_t)0x7fffffff, /* maximum transfer size */ 134 UINT64_MAX, /* maximum segment size */ 135 1, /* scatter/gather list length */ 136 1, /* granularity */ 137 0 /* attribute flags */ 138 }; 139 140 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = { 141 DMA_ATTR_V0, /* version number. */ 142 (uint64_t)0, /* low address */ 143 (uint64_t)0xffffffffffffffffULL, /* high address */ 144 (uint64_t)0x7ffffff, /* address counter max */ 145 #if defined sparc64 || defined __sparcv9 146 (uint64_t)4096, /* alignment */ 147 #else 148 (uint64_t)0x80, /* alignment */ 149 #endif 150 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 151 (uint32_t)0x1, /* minimum transfer size */ 152 (uint64_t)0x7fffffff, /* maximum transfer size */ 153 #if defined sparc64 || defined __sparcv9 154 UINT64_MAX, /* maximum segment size */ 155 #else 156 (uint64_t)0xfff, /* maximum segment size */ 157 #endif 158 1, /* scatter/gather list length */ 159 1, /* granularity */ 160 0 /* attribute flags */ 161 }; 162 163 static ddi_dma_attr_t myri10ge_tx_dma_attr = { 164 DMA_ATTR_V0, /* version number. */ 165 (uint64_t)0, /* low address */ 166 (uint64_t)0xffffffffffffffffULL, /* high address */ 167 (uint64_t)0x7ffffff, /* address counter max */ 168 (uint64_t)1, /* alignment */ 169 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */ 170 (uint32_t)0x1, /* minimum transfer size */ 171 (uint64_t)0x7fffffff, /* maximum transfer size */ 172 UINT64_MAX, /* maximum segment size */ 173 INT32_MAX, /* scatter/gather list length */ 174 1, /* granularity */ 175 0 /* attribute flags */ 176 }; 177 178 #if defined sparc64 || defined __sparcv9 179 #define WC 0 180 #else 181 #define WC 1 182 #endif 183 184 struct ddi_device_acc_attr myri10ge_dev_access_attr = { 185 DDI_DEVICE_ATTR_V0, /* version */ 186 DDI_NEVERSWAP_ACC, /* endian flash */ 187 #if WC 188 DDI_MERGING_OK_ACC /* data order */ 189 #else 190 DDI_STRICTORDER_ACC 191 #endif 192 }; 193 194 static void myri10ge_watchdog(void *arg); 195 196 #ifdef MYRICOM_PRIV 197 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ; 198 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MAX_GLD_MTU 199 #else 200 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ; 201 #define MYRI10GE_DEFAULT_GLD_MTU MYRI10GE_MIN_GLD_MTU 202 #endif 203 int myri10ge_bigbufs_initial = 1024; 204 int myri10ge_bigbufs_max = 4096; 205 206 207 caddr_t 208 myri10ge_dma_alloc(dev_info_t *dip, size_t len, 209 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr, 210 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma, 211 int warn, int (*wait)(caddr_t)) 212 { 213 caddr_t kaddr; 214 size_t real_length; 215 ddi_dma_cookie_t cookie; 216 uint_t count; 217 int err; 218 219 err = ddi_dma_alloc_handle(dip, attr, wait, 220 NULL, &dma->handle); 221 if (err != DDI_SUCCESS) { 222 if (warn) 223 cmn_err(CE_WARN, 224 "myri10ge: ddi_dma_alloc_handle failed\n"); 225 goto abort_with_nothing; 226 } 227 228 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags, 229 wait, NULL, &kaddr, &real_length, 230 &dma->acc_handle); 231 if (err != DDI_SUCCESS) { 232 if (warn) 233 cmn_err(CE_WARN, 234 "myri10ge: ddi_dma_mem_alloc failed\n"); 235 goto abort_with_handle; 236 } 237 238 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len, 239 bind_flags, wait, NULL, &cookie, &count); 240 241 if (err != DDI_SUCCESS) { 242 if (warn) 243 cmn_err(CE_WARN, 244 "myri10ge: ddi_dma_addr_bind_handle failed\n"); 245 goto abort_with_mem; 246 } 247 248 if (count != 1) { 249 if (warn) 250 cmn_err(CE_WARN, 251 "myri10ge: got too many dma segments "); 252 goto abort_with_bind; 253 } 254 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 255 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 256 return (kaddr); 257 258 abort_with_bind: 259 (void) ddi_dma_unbind_handle(dma->handle); 260 261 abort_with_mem: 262 ddi_dma_mem_free(&dma->acc_handle); 263 264 abort_with_handle: 265 ddi_dma_free_handle(&dma->handle); 266 abort_with_nothing: 267 if (warn) { 268 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n "); 269 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n", 270 (void*) dip, len, (void*) attr); 271 cmn_err(CE_WARN, 272 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n", 273 (void*) accattr, alloc_flags); 274 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p", 275 bind_flags, (void*) dma); 276 } 277 return (NULL); 278 279 } 280 281 void 282 myri10ge_dma_free(struct myri10ge_dma_stuff *dma) 283 { 284 (void) ddi_dma_unbind_handle(dma->handle); 285 ddi_dma_mem_free(&dma->acc_handle); 286 ddi_dma_free_handle(&dma->handle); 287 } 288 289 static inline void 290 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size) 291 { 292 register volatile uint32_t *to32; 293 size_t i; 294 295 to32 = (volatile uint32_t *) to; 296 for (i = (size / 4); i; i--) { 297 *to32 = *from32; 298 to32++; 299 from32++; 300 } 301 } 302 303 #if defined(_LP64) 304 static inline void 305 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size) 306 { 307 register volatile uint64_t *to64; 308 size_t i; 309 310 to64 = (volatile uint64_t *) to; 311 for (i = (size / 8); i; i--) { 312 *to64 = *from64; 313 to64++; 314 from64++; 315 } 316 } 317 #endif 318 319 /* 320 * This routine copies memory from the host to the NIC. 321 * The "size" argument must always be a multiple of 322 * the size of long (4 or 8 bytes), and to/from must also 323 * be naturally aligned. 324 */ 325 static inline void 326 myri10ge_pio_copy(void *to, void *from, size_t size) 327 { 328 #if !defined(_LP64) 329 ASSERT((size % 4) == 0); 330 myri10ge_pio_copy32(to, (uint32_t *)from, size); 331 #else 332 ASSERT((size % 8) == 0); 333 myri10ge_pio_copy64(to, (uint64_t *)from, size); 334 #endif 335 } 336 337 338 /* 339 * Due to various bugs in Solaris (especially bug 6186772 where the 340 * TCP/UDP checksum is calculated incorrectly on mblk chains with more 341 * than two elements), and the design bug where hardware checksums are 342 * ignored on mblk chains with more than 2 elements, we need to 343 * allocate private pool of physically contiguous receive buffers. 344 */ 345 346 static void 347 myri10ge_jpool_init(struct myri10ge_slice_state *ss) 348 { 349 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 350 351 bzero(jpool, sizeof (*jpool)); 352 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER, 353 ss->mgp->icookie); 354 jpool->head = NULL; 355 } 356 357 static void 358 myri10ge_jpool_fini(struct myri10ge_slice_state *ss) 359 { 360 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 361 362 if (jpool->head != NULL) { 363 cmn_err(CE_WARN, 364 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n", 365 ss->mgp->name); 366 } 367 mutex_destroy(&jpool->mtx); 368 } 369 370 371 /* 372 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 373 * at most 32 bytes at a time, so as to avoid involving the software 374 * pio handler in the nic. We re-write the first segment's low 375 * DMA address to mark it valid only after we write the entire chunk 376 * in a burst 377 */ 378 static inline void 379 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src) 380 { 381 src->addr_low |= BE_32(1); 382 myri10ge_pio_copy(dst, src, 4 * sizeof (*src)); 383 mb(); 384 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 385 mb(); 386 src->addr_low &= ~(BE_32(1)); 387 dst->addr_low = src->addr_low; 388 mb(); 389 } 390 391 static void 392 myri10ge_pull_jpool(struct myri10ge_slice_state *ss) 393 { 394 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 395 struct myri10ge_jpool_entry *jtail, *j, *jfree; 396 volatile void *putp; 397 int i; 398 399 /* find tail */ 400 jtail = NULL; 401 if (jpool->head != NULL) { 402 j = jpool->head; 403 while (j->next != NULL) 404 j = j->next; 405 jtail = j; 406 } 407 408 /* 409 * iterate over all per-CPU caches, and add contents into 410 * jpool 411 */ 412 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) { 413 /* take per-CPU free list */ 414 putp = &jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head; 415 jfree = atomic_swap_ptr(putp, NULL); 416 if (jfree == NULL) 417 continue; 418 419 /* append to pool */ 420 if (jtail == NULL) { 421 jpool->head = jfree; 422 } else { 423 jtail->next = jfree; 424 } 425 j = jfree; 426 while (j->next != NULL) 427 j = j->next; 428 jtail = j; 429 } 430 } 431 432 /* 433 * Transfers buffers from the free pool to the nic 434 * Must be called holding the jpool mutex. 435 */ 436 437 static inline void 438 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss) 439 { 440 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 441 struct myri10ge_jpool_entry *j; 442 myri10ge_rx_ring_t *rx; 443 int i, idx, limit; 444 445 rx = &ss->rx_big; 446 limit = ss->j_rx_cnt + (rx->mask + 1); 447 448 for (i = rx->cnt; i != limit; i++) { 449 idx = i & (rx->mask); 450 j = jpool->head; 451 if (j == NULL) { 452 myri10ge_pull_jpool(ss); 453 j = jpool->head; 454 if (j == NULL) { 455 break; 456 } 457 } 458 jpool->head = j->next; 459 rx->info[idx].j = j; 460 rx->shadow[idx].addr_low = j->dma.low; 461 rx->shadow[idx].addr_high = j->dma.high; 462 /* copy 4 descriptors (32-bytes) to the mcp at a time */ 463 if ((idx & 7) == 7) { 464 myri10ge_submit_8rx(&rx->lanai[idx - 7], 465 &rx->shadow[idx - 7]); 466 } 467 } 468 rx->cnt = i; 469 } 470 471 /* 472 * Transfer buffers from the nic to the free pool. 473 * Should be called holding the jpool mutex 474 */ 475 476 static inline void 477 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss) 478 { 479 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 480 struct myri10ge_jpool_entry *j; 481 myri10ge_rx_ring_t *rx; 482 int i; 483 484 mutex_enter(&jpool->mtx); 485 rx = &ss->rx_big; 486 487 for (i = 0; i < rx->mask + 1; i++) { 488 j = rx->info[i].j; 489 rx->info[i].j = NULL; 490 if (j == NULL) 491 continue; 492 j->next = jpool->head; 493 jpool->head = j; 494 } 495 mutex_exit(&jpool->mtx); 496 497 } 498 499 500 /* 501 * Free routine which is called when the mblk allocated via 502 * esballoc() is freed. Here we return the jumbo buffer 503 * to the free pool, and possibly pass some jumbo buffers 504 * to the nic 505 */ 506 507 static void 508 myri10ge_jfree_rtn(void *arg) 509 { 510 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg; 511 struct myri10ge_jpool_stuff *jpool; 512 volatile uintptr_t *putp; 513 uintptr_t old, new; 514 515 jpool = &j->ss->jpool; 516 517 /* prepend buffer locklessly to per-CPU freelist */ 518 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head; 519 new = (uintptr_t)j; 520 do { 521 old = *putp; 522 j->next = (void *)old; 523 } while (atomic_cas_ulong(putp, old, new) != old); 524 } 525 526 static void 527 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j) 528 { 529 (void) ddi_dma_unbind_handle(j->dma_handle); 530 ddi_dma_mem_free(&j->acc_handle); 531 ddi_dma_free_handle(&j->dma_handle); 532 kmem_free(j, sizeof (*j)); 533 } 534 535 536 /* 537 * Allocates one physically contiguous descriptor 538 * and add it to the jumbo buffer pool. 539 */ 540 541 static int 542 myri10ge_add_jbuf(struct myri10ge_slice_state *ss) 543 { 544 struct myri10ge_jpool_entry *j; 545 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 546 ddi_dma_attr_t *rx_dma_attr; 547 size_t real_length; 548 ddi_dma_cookie_t cookie; 549 uint_t count; 550 int err; 551 552 if (myri10ge_mtu < 2048) 553 rx_dma_attr = &myri10ge_rx_std_dma_attr; 554 else 555 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr; 556 557 again: 558 j = (struct myri10ge_jpool_entry *) 559 kmem_alloc(sizeof (*j), KM_SLEEP); 560 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr, 561 DDI_DMA_DONTWAIT, NULL, &j->dma_handle); 562 if (err != DDI_SUCCESS) 563 goto abort_with_j; 564 565 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu, 566 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 567 NULL, &j->buf, &real_length, &j->acc_handle); 568 if (err != DDI_SUCCESS) 569 goto abort_with_handle; 570 571 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf, 572 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 573 NULL, &cookie, &count); 574 if (err != DDI_SUCCESS) 575 goto abort_with_mem; 576 577 /* 578 * Make certain std MTU buffers do not cross a 4KB boundary: 579 * 580 * Setting dma_attr_align=4096 will do this, but the system 581 * will only allocate 1 RX buffer per 4KB page, rather than 2. 582 * Setting dma_attr_granular=4096 *seems* to work around this, 583 * but I'm paranoid about future systems no longer honoring 584 * this, so fall back to the safe, but memory wasting way if a 585 * buffer crosses a 4KB boundary. 586 */ 587 588 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 589 rx_dma_attr->dma_attr_align != 4096) { 590 uint32_t start, end; 591 592 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 593 end = start + myri10ge_mtu; 594 if (((end >> 12) != (start >> 12)) && (start & 4095U)) { 595 printf("std buffer crossed a 4KB boundary!\n"); 596 myri10ge_remove_jbuf(j); 597 rx_dma_attr->dma_attr_align = 4096; 598 rx_dma_attr->dma_attr_seg = UINT64_MAX; 599 goto again; 600 } 601 } 602 603 j->dma.low = 604 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress)); 605 j->dma.high = 606 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress)); 607 j->ss = ss; 608 609 610 j->free_func.free_func = myri10ge_jfree_rtn; 611 j->free_func.free_arg = (char *)j; 612 mutex_enter(&jpool->mtx); 613 j->next = jpool->head; 614 jpool->head = j; 615 jpool->num_alloc++; 616 mutex_exit(&jpool->mtx); 617 return (0); 618 619 abort_with_mem: 620 ddi_dma_mem_free(&j->acc_handle); 621 622 abort_with_handle: 623 ddi_dma_free_handle(&j->dma_handle); 624 625 abort_with_j: 626 kmem_free(j, sizeof (*j)); 627 628 /* 629 * If an allocation failed, perhaps it failed because it could 630 * not satisfy granularity requirement. Disable that, and 631 * try agin. 632 */ 633 if (rx_dma_attr == &myri10ge_rx_std_dma_attr && 634 rx_dma_attr->dma_attr_align != 4096) { 635 cmn_err(CE_NOTE, 636 "!alloc failed, reverting to gran=1\n"); 637 rx_dma_attr->dma_attr_align = 4096; 638 rx_dma_attr->dma_attr_seg = UINT64_MAX; 639 goto again; 640 } 641 return (err); 642 } 643 644 static int 645 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool) 646 { 647 int i; 648 struct myri10ge_jpool_entry *j; 649 650 mutex_enter(&jpool->mtx); 651 j = jpool->head; 652 i = 0; 653 while (j != NULL) { 654 i++; 655 j = j->next; 656 } 657 mutex_exit(&jpool->mtx); 658 return (i); 659 } 660 661 static int 662 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total) 663 { 664 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 665 int allocated = 0; 666 int err; 667 int needed; 668 669 /* 670 * if total is set, user wants "num" jbufs in the pool, 671 * otherwise the user wants to "num" additional jbufs 672 * added to the pool 673 */ 674 if (total && jpool->num_alloc) { 675 allocated = myri10ge_jfree_cnt(jpool); 676 needed = num - allocated; 677 } else { 678 needed = num; 679 } 680 681 while (needed > 0) { 682 needed--; 683 err = myri10ge_add_jbuf(ss); 684 if (err == 0) { 685 allocated++; 686 } 687 } 688 return (allocated); 689 } 690 691 static void 692 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss) 693 { 694 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 695 struct myri10ge_jpool_entry *j; 696 697 mutex_enter(&jpool->mtx); 698 myri10ge_pull_jpool(ss); 699 while (jpool->head != NULL) { 700 jpool->num_alloc--; 701 j = jpool->head; 702 jpool->head = j->next; 703 myri10ge_remove_jbuf(j); 704 } 705 mutex_exit(&jpool->mtx); 706 } 707 708 static void 709 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss) 710 { 711 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 712 struct myri10ge_jpool_entry *j = NULL; 713 caddr_t ptr; 714 uint32_t dma_low, dma_high; 715 int idx, len; 716 unsigned int alloc_size; 717 718 dma_low = dma_high = len = 0; 719 alloc_size = myri10ge_small_bytes + MXGEFW_PAD; 720 ptr = NULL; 721 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) { 722 /* Allocate a jumbo frame and carve it into small frames */ 723 if (len < alloc_size) { 724 mutex_enter(&jpool->mtx); 725 /* remove jumbo from freelist */ 726 j = jpool->head; 727 jpool->head = j->next; 728 /* place it onto small list */ 729 j->next = ss->small_jpool; 730 ss->small_jpool = j; 731 mutex_exit(&jpool->mtx); 732 len = myri10ge_mtu; 733 dma_low = ntohl(j->dma.low); 734 dma_high = ntohl(j->dma.high); 735 ptr = j->buf; 736 } 737 ss->rx_small.info[idx].ptr = ptr; 738 ss->rx_small.shadow[idx].addr_low = htonl(dma_low); 739 ss->rx_small.shadow[idx].addr_high = htonl(dma_high); 740 len -= alloc_size; 741 ptr += alloc_size; 742 dma_low += alloc_size; 743 } 744 } 745 746 /* 747 * Return the jumbo bufs we carved up for small to the jumbo pool 748 */ 749 750 static void 751 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss) 752 { 753 struct myri10ge_jpool_stuff *jpool = &ss->jpool; 754 struct myri10ge_jpool_entry *j = NULL; 755 756 mutex_enter(&jpool->mtx); 757 while (ss->small_jpool != NULL) { 758 j = ss->small_jpool; 759 ss->small_jpool = j->next; 760 j->next = jpool->head; 761 jpool->head = j; 762 } 763 mutex_exit(&jpool->mtx); 764 ss->jbufs_for_smalls = 0; 765 } 766 767 static int 768 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss) 769 { 770 myri10ge_tx_ring_t *tx = &ss->tx; 771 struct myri10ge_priv *mgp = ss->mgp; 772 struct myri10ge_tx_dma_handle *handle; 773 int err; 774 775 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP); 776 err = ddi_dma_alloc_handle(mgp->dip, 777 &myri10ge_tx_dma_attr, 778 DDI_DMA_SLEEP, NULL, 779 &handle->h); 780 if (err) { 781 static int limit = 0; 782 if (limit == 0) 783 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n", 784 mgp->name); 785 limit++; 786 kmem_free(handle, sizeof (*handle)); 787 return (err); 788 } 789 mutex_enter(&tx->handle_lock); 790 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced); 791 handle->next = tx->free_tx_handles; 792 tx->free_tx_handles = handle; 793 mutex_exit(&tx->handle_lock); 794 return (DDI_SUCCESS); 795 } 796 797 static void 798 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss) 799 { 800 myri10ge_tx_ring_t *tx = &ss->tx; 801 struct myri10ge_tx_dma_handle *handle; 802 mutex_enter(&tx->handle_lock); 803 804 handle = tx->free_tx_handles; 805 while (handle != NULL) { 806 tx->free_tx_handles = handle->next; 807 ddi_dma_free_handle(&handle->h); 808 kmem_free(handle, sizeof (*handle)); 809 handle = tx->free_tx_handles; 810 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced); 811 } 812 mutex_exit(&tx->handle_lock); 813 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) { 814 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n", 815 ss->mgp->name, 816 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced)); 817 } 818 } 819 820 static void 821 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx, 822 struct myri10ge_tx_dma_handle_head *list) 823 { 824 mutex_enter(&tx->handle_lock); 825 list->tail->next = tx->free_tx_handles; 826 tx->free_tx_handles = list->head; 827 mutex_exit(&tx->handle_lock); 828 } 829 830 static void 831 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx, 832 struct myri10ge_tx_dma_handle *handle) 833 { 834 struct myri10ge_tx_dma_handle_head list; 835 836 if (handle == NULL) 837 return; 838 list.head = handle; 839 list.tail = handle; 840 while (handle != NULL) { 841 list.tail = handle; 842 handle = handle->next; 843 } 844 myri10ge_free_tx_handles(tx, &list); 845 } 846 847 static int 848 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count, 849 struct myri10ge_tx_dma_handle **ret) 850 { 851 myri10ge_tx_ring_t *tx = &ss->tx; 852 struct myri10ge_tx_dma_handle *handle; 853 int err, i; 854 855 mutex_enter(&tx->handle_lock); 856 for (i = 0; i < count; i++) { 857 handle = tx->free_tx_handles; 858 while (handle == NULL) { 859 mutex_exit(&tx->handle_lock); 860 err = myri10ge_add_tx_handle(ss); 861 if (err != DDI_SUCCESS) { 862 goto abort_with_handles; 863 } 864 mutex_enter(&tx->handle_lock); 865 handle = tx->free_tx_handles; 866 } 867 tx->free_tx_handles = handle->next; 868 handle->next = *ret; 869 *ret = handle; 870 } 871 mutex_exit(&tx->handle_lock); 872 return (DDI_SUCCESS); 873 874 abort_with_handles: 875 myri10ge_free_tx_handle_slist(tx, *ret); 876 return (err); 877 } 878 879 880 /* 881 * Frees DMA resources associated with the send ring 882 */ 883 static void 884 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss) 885 { 886 myri10ge_tx_ring_t *tx; 887 struct myri10ge_tx_dma_handle_head handles; 888 size_t bytes; 889 int idx; 890 891 tx = &ss->tx; 892 handles.head = NULL; 893 handles.tail = NULL; 894 for (idx = 0; idx < ss->tx.mask + 1; idx++) { 895 if (tx->info[idx].m) { 896 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 897 handles.head = tx->info[idx].handle; 898 if (handles.tail == NULL) 899 handles.tail = tx->info[idx].handle; 900 freeb(tx->info[idx].m); 901 tx->info[idx].m = 0; 902 tx->info[idx].handle = 0; 903 } 904 tx->cp[idx].va = NULL; 905 myri10ge_dma_free(&tx->cp[idx].dma); 906 } 907 bytes = sizeof (*tx->cp) * (tx->mask + 1); 908 kmem_free(tx->cp, bytes); 909 tx->cp = NULL; 910 if (handles.head != NULL) 911 myri10ge_free_tx_handles(tx, &handles); 912 myri10ge_remove_tx_handles(ss); 913 } 914 915 /* 916 * Allocates DMA handles associated with the send ring 917 */ 918 static inline int 919 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss) 920 { 921 struct myri10ge_tx_dma_handle *handles; 922 int h; 923 size_t bytes; 924 925 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 926 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP); 927 if (ss->tx.cp == NULL) { 928 cmn_err(CE_WARN, 929 "%s: Failed to allocate tx copyblock storage\n", 930 ss->mgp->name); 931 return (DDI_FAILURE); 932 } 933 934 935 /* allocate the TX copyblocks */ 936 for (h = 0; h < ss->tx.mask + 1; h++) { 937 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip, 938 4096, &myri10ge_rx_jumbo_dma_attr, 939 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 940 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1, 941 DDI_DMA_DONTWAIT); 942 if (ss->tx.cp[h].va == NULL) { 943 cmn_err(CE_WARN, "%s: Failed to allocate tx " 944 "copyblock %d\n", ss->mgp->name, h); 945 goto abort_with_copyblocks; 946 } 947 } 948 /* pre-allocate transmit handles */ 949 handles = NULL; 950 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial, 951 &handles); 952 if (handles != NULL) 953 myri10ge_free_tx_handle_slist(&ss->tx, handles); 954 955 return (DDI_SUCCESS); 956 957 abort_with_copyblocks: 958 while (h > 0) { 959 h--; 960 myri10ge_dma_free(&ss->tx.cp[h].dma); 961 } 962 963 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1); 964 kmem_free(ss->tx.cp, bytes); 965 ss->tx.cp = NULL; 966 return (DDI_FAILURE); 967 } 968 969 /* 970 * The eeprom strings on the lanaiX have the format 971 * SN=x\0 972 * MAC=x:x:x:x:x:x\0 973 * PT:ddd mmm xx xx:xx:xx xx\0 974 * PV:ddd mmm xx xx:xx:xx xx\0 975 */ 976 static int 977 myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 978 { 979 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++) 980 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \ 981 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \ 982 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1))) 983 984 char *ptr, *limit; 985 int i, hv, lv; 986 987 ptr = mgp->eeprom_strings; 988 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 989 990 while (*ptr != '\0' && ptr < limit) { 991 if (memcmp(ptr, "MAC=", 4) == 0) { 992 ptr += 4; 993 if (myri10ge_verbose) 994 printf("%s: mac address = %s\n", mgp->name, 995 ptr); 996 mgp->mac_addr_string = ptr; 997 for (i = 0; i < 6; i++) { 998 if ((ptr + 2) > limit) 999 goto abort; 1000 1001 if (*(ptr+1) == ':') { 1002 hv = 0; 1003 lv = myri10ge_digit(*ptr); ptr++; 1004 } else { 1005 hv = myri10ge_digit(*ptr); ptr++; 1006 lv = myri10ge_digit(*ptr); ptr++; 1007 } 1008 mgp->mac_addr[i] = (hv << 4) | lv; 1009 ptr++; 1010 } 1011 } 1012 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 1013 ptr += 3; 1014 mgp->sn_str = (char *)ptr; 1015 } 1016 if (memcmp((const void *)ptr, "PC=", 3) == 0) { 1017 ptr += 3; 1018 mgp->pc_str = (char *)ptr; 1019 } 1020 MYRI10GE_NEXT_STRING(ptr); 1021 } 1022 1023 return (0); 1024 1025 abort: 1026 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name); 1027 return (ENXIO); 1028 } 1029 1030 1031 /* 1032 * Determine the register set containing the PCI resource we 1033 * want to map: the memory-mappable part of the interface. We do 1034 * this by scanning the DDI "reg" property of the interface, 1035 * which is an array of mx_ddi_reg_set structures. 1036 */ 1037 static int 1038 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span, 1039 unsigned long *busno, unsigned long *devno, 1040 unsigned long *funcno) 1041 { 1042 1043 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff) 1044 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07) 1045 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f) 1046 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff) 1047 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03) 1048 #define PCI_ADDR_HIGH(ip) (ip[1]) 1049 #define PCI_ADDR_LOW(ip) (ip[2]) 1050 #define PCI_SPAN_HIGH(ip) (ip[3]) 1051 #define PCI_SPAN_LOW(ip) (ip[4]) 1052 1053 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2 1054 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3 1055 1056 int *data, i, *rs; 1057 uint32_t nelementsp; 1058 1059 #ifdef MYRI10GE_REGSET_VERBOSE 1060 char *address_space_name[] = { "Configuration Space", 1061 "I/O Space", 1062 "32-bit Memory Space", 1063 "64-bit Memory Space" 1064 }; 1065 #endif 1066 1067 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1068 "reg", &data, &nelementsp) != DDI_SUCCESS) { 1069 printf("Could not determine register set.\n"); 1070 return (ENXIO); 1071 } 1072 1073 #ifdef MYRI10GE_REGSET_VERBOSE 1074 printf("There are %d register sets.\n", nelementsp / 5); 1075 #endif 1076 if (!nelementsp) { 1077 printf("Didn't find any \"reg\" properties.\n"); 1078 ddi_prop_free(data); 1079 return (ENODEV); 1080 } 1081 1082 /* Scan for the register number. */ 1083 rs = &data[0]; 1084 *busno = BUS_NUMBER(rs); 1085 *devno = DEVICE_NUMBER(rs); 1086 *funcno = FUNCTION_NUMBER(rs); 1087 1088 #ifdef MYRI10GE_REGSET_VERBOSE 1089 printf("*** Scanning for register number.\n"); 1090 #endif 1091 for (i = 0; i < nelementsp / 5; i++) { 1092 rs = &data[5 * i]; 1093 #ifdef MYRI10GE_REGSET_VERBOSE 1094 printf("Examining register set %d:\n", i); 1095 printf(" Register number = %d.\n", REGISTER_NUMBER(rs)); 1096 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs)); 1097 printf(" Device number = %d.\n", DEVICE_NUMBER(rs)); 1098 printf(" Bus number = %d.\n", BUS_NUMBER(rs)); 1099 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs), 1100 address_space_name[ADDRESS_SPACE(rs)]); 1101 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs), 1102 PCI_ADDR_LOW(rs)); 1103 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs), 1104 PCI_SPAN_LOW(rs)); 1105 #endif 1106 /* We are looking for a memory property. */ 1107 1108 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE || 1109 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) { 1110 *reg_set = i; 1111 1112 #ifdef MYRI10GE_REGSET_VERBOSE 1113 printf("%s uses register set %d.\n", 1114 address_space_name[ADDRESS_SPACE(rs)], *reg_set); 1115 #endif 1116 1117 *span = (PCI_SPAN_LOW(rs)); 1118 #ifdef MYRI10GE_REGSET_VERBOSE 1119 printf("Board span is 0x%x\n", *span); 1120 #endif 1121 break; 1122 } 1123 } 1124 1125 ddi_prop_free(data); 1126 1127 /* If no match, fail. */ 1128 if (i >= nelementsp / 5) { 1129 return (EIO); 1130 } 1131 1132 return (0); 1133 } 1134 1135 1136 static int 1137 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit) 1138 { 1139 void *inflate_buffer; 1140 int rv, status; 1141 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE; 1142 size_t destlen; 1143 mcp_gen_header_t *hdr; 1144 unsigned hdr_offset, i; 1145 1146 1147 *limit = 0; /* -Wuninitialized */ 1148 status = 0; 1149 1150 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP); 1151 if (!inflate_buffer) { 1152 cmn_err(CE_WARN, 1153 "%s: Could not allocate buffer to inflate mcp\n", 1154 mgp->name); 1155 return (ENOMEM); 1156 } 1157 1158 destlen = sram_size; 1159 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e, 1160 mgp->eth_z8e_length); 1161 1162 if (rv != Z_OK) { 1163 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n", 1164 mgp->name, z_strerror(rv)); 1165 status = ENXIO; 1166 goto abort; 1167 } 1168 1169 *limit = (uint32_t)destlen; 1170 1171 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer + 1172 MCP_HEADER_PTR_OFFSET)); 1173 hdr = (void *)((char *)inflate_buffer + hdr_offset); 1174 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 1175 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name, 1176 ntohl(hdr->mcp_type)); 1177 status = EIO; 1178 goto abort; 1179 } 1180 1181 /* save firmware version for kstat */ 1182 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version)); 1183 if (myri10ge_verbose) 1184 printf("%s: firmware id: %s\n", mgp->name, hdr->version); 1185 1186 /* Copy the inflated firmware to NIC SRAM. */ 1187 for (i = 0; i < *limit; i += 256) { 1188 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i, 1189 (char *)inflate_buffer + i, 1190 min(256U, (unsigned)(*limit - i))); 1191 mb(); 1192 (void) *(int *)(void *)mgp->sram; 1193 mb(); 1194 } 1195 1196 abort: 1197 kmem_free(inflate_buffer, sram_size); 1198 1199 return (status); 1200 1201 } 1202 1203 1204 int 1205 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd, 1206 myri10ge_cmd_t *data) 1207 { 1208 mcp_cmd_t *buf; 1209 char buf_bytes[sizeof (*buf) + 8]; 1210 volatile mcp_cmd_response_t *response = mgp->cmd; 1211 volatile char *cmd_addr = 1212 (volatile char *)mgp->sram + MXGEFW_ETH_CMD; 1213 int sleep_total = 0; 1214 1215 /* ensure buf is aligned to 8 bytes */ 1216 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1217 1218 buf->data0 = htonl(data->data0); 1219 buf->data1 = htonl(data->data1); 1220 buf->data2 = htonl(data->data2); 1221 buf->cmd = htonl(cmd); 1222 buf->response_addr.low = mgp->cmd_dma.low; 1223 buf->response_addr.high = mgp->cmd_dma.high; 1224 mutex_enter(&mgp->cmd_lock); 1225 response->result = 0xffffffff; 1226 mb(); 1227 1228 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf)); 1229 1230 /* wait up to 20ms */ 1231 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 1232 mb(); 1233 if (response->result != 0xffffffff) { 1234 if (response->result == 0) { 1235 data->data0 = ntohl(response->data); 1236 mutex_exit(&mgp->cmd_lock); 1237 return (0); 1238 } else if (ntohl(response->result) 1239 == MXGEFW_CMD_UNKNOWN) { 1240 mutex_exit(&mgp->cmd_lock); 1241 return (ENOSYS); 1242 } else if (ntohl(response->result) 1243 == MXGEFW_CMD_ERROR_UNALIGNED) { 1244 mutex_exit(&mgp->cmd_lock); 1245 return (E2BIG); 1246 } else { 1247 cmn_err(CE_WARN, 1248 "%s: command %d failed, result = %d\n", 1249 mgp->name, cmd, ntohl(response->result)); 1250 mutex_exit(&mgp->cmd_lock); 1251 return (ENXIO); 1252 } 1253 } 1254 drv_usecwait(1000); 1255 } 1256 mutex_exit(&mgp->cmd_lock); 1257 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n", 1258 mgp->name, cmd, ntohl(response->result)); 1259 return (EAGAIN); 1260 } 1261 1262 /* 1263 * Enable or disable periodic RDMAs from the host to make certain 1264 * chipsets resend dropped PCIe messages 1265 */ 1266 1267 static void 1268 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 1269 { 1270 char buf_bytes[72]; 1271 volatile uint32_t *confirm; 1272 volatile char *submit; 1273 uint32_t *buf; 1274 int i; 1275 1276 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1277 1278 /* clear confirmation addr */ 1279 confirm = (volatile uint32_t *)mgp->cmd; 1280 *confirm = 0; 1281 mb(); 1282 1283 /* 1284 * send an rdma command to the PCIe engine, and wait for the 1285 * response in the confirmation address. The firmware should 1286 * write a -1 there to indicate it is alive and well 1287 */ 1288 1289 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1290 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1291 buf[2] = htonl(0xffffffff); /* confirm data */ 1292 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */ 1293 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */ 1294 buf[5] = htonl(enable); /* enable? */ 1295 1296 1297 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA); 1298 1299 myri10ge_pio_copy((char *)submit, buf, 64); 1300 mb(); 1301 drv_usecwait(1000); 1302 mb(); 1303 i = 0; 1304 while (*confirm != 0xffffffff && i < 20) { 1305 drv_usecwait(1000); 1306 i++; 1307 } 1308 if (*confirm != 0xffffffff) { 1309 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)", 1310 mgp->name, 1311 (enable ? "enable" : "disable"), (void*) confirm, *confirm); 1312 } 1313 } 1314 1315 static int 1316 myri10ge_load_firmware(struct myri10ge_priv *mgp) 1317 { 1318 myri10ge_cmd_t cmd; 1319 volatile uint32_t *confirm; 1320 volatile char *submit; 1321 char buf_bytes[72]; 1322 uint32_t *buf, size; 1323 int status, i; 1324 1325 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 1326 1327 status = myri10ge_load_firmware_from_zlib(mgp, &size); 1328 if (status) { 1329 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name); 1330 return (status); 1331 } 1332 1333 /* clear confirmation addr */ 1334 confirm = (volatile uint32_t *)mgp->cmd; 1335 *confirm = 0; 1336 mb(); 1337 1338 /* 1339 * send a reload command to the bootstrap MCP, and wait for the 1340 * response in the confirmation address. The firmware should 1341 * write a -1 there to indicate it is alive and well 1342 */ 1343 1344 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */ 1345 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */ 1346 buf[2] = htonl(0xffffffff); /* confirm data */ 1347 1348 /* 1349 * FIX: All newest firmware should un-protect the bottom of 1350 * the sram before handoff. However, the very first interfaces 1351 * do not. Therefore the handoff copy must skip the first 8 bytes 1352 */ 1353 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 1354 buf[4] = htonl(size - 8); /* length of code */ 1355 buf[5] = htonl(8); /* where to copy to */ 1356 buf[6] = htonl(0); /* where to jump to */ 1357 1358 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF); 1359 1360 myri10ge_pio_copy((char *)submit, buf, 64); 1361 mb(); 1362 drv_usecwait(1000); 1363 mb(); 1364 i = 0; 1365 while (*confirm != 0xffffffff && i < 1000) { 1366 drv_usecwait(1000); 1367 i++; 1368 } 1369 if (*confirm != 0xffffffff) { 1370 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)", 1371 mgp->name, (void *) confirm, *confirm); 1372 1373 return (ENXIO); 1374 } 1375 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1376 if (status != 0) { 1377 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n", 1378 mgp->name); 1379 return (ENXIO); 1380 } 1381 1382 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 1383 myri10ge_dummy_rdma(mgp, 1); 1384 return (0); 1385 } 1386 1387 static int 1388 myri10ge_m_unicst(void *arg, const uint8_t *addr) 1389 { 1390 struct myri10ge_priv *mgp = arg; 1391 myri10ge_cmd_t cmd; 1392 int status; 1393 1394 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1395 | (addr[2] << 8) | addr[3]); 1396 1397 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1398 1399 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd); 1400 if (status == 0 && (addr != mgp->mac_addr)) 1401 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr)); 1402 1403 return (status); 1404 } 1405 1406 static int 1407 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 1408 { 1409 myri10ge_cmd_t cmd; 1410 int status; 1411 1412 if (pause) 1413 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL, 1414 &cmd); 1415 else 1416 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL, 1417 &cmd); 1418 1419 if (status) { 1420 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n", 1421 mgp->name); 1422 return (ENXIO); 1423 } 1424 mgp->pause = pause; 1425 return (0); 1426 } 1427 1428 static void 1429 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc) 1430 { 1431 myri10ge_cmd_t cmd; 1432 int status; 1433 1434 if (promisc) 1435 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd); 1436 else 1437 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd); 1438 1439 if (status) { 1440 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n", 1441 mgp->name); 1442 } 1443 } 1444 1445 static int 1446 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 1447 { 1448 myri10ge_cmd_t cmd; 1449 int status; 1450 uint32_t len; 1451 void *dmabench; 1452 struct myri10ge_dma_stuff dmabench_dma; 1453 char *test = " "; 1454 1455 /* 1456 * Run a small DMA test. 1457 * The magic multipliers to the length tell the firmware 1458 * tp do DMA read, write, or read+write tests. The 1459 * results are returned in cmd.data0. The upper 16 1460 * bits or the return is the number of transfers completed. 1461 * The lower 16 bits is the time in 0.5us ticks that the 1462 * transfers took to complete 1463 */ 1464 1465 len = mgp->tx_boundary; 1466 1467 dmabench = myri10ge_dma_alloc(mgp->dip, len, 1468 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr, 1469 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING, 1470 &dmabench_dma, 1, DDI_DMA_DONTWAIT); 1471 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0; 1472 if (dmabench == NULL) { 1473 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name); 1474 return (ENOMEM); 1475 } 1476 1477 cmd.data0 = ntohl(dmabench_dma.low); 1478 cmd.data1 = ntohl(dmabench_dma.high); 1479 cmd.data2 = len * 0x10000; 1480 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1481 if (status != 0) { 1482 test = "read"; 1483 goto abort; 1484 } 1485 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1486 1487 cmd.data0 = ntohl(dmabench_dma.low); 1488 cmd.data1 = ntohl(dmabench_dma.high); 1489 cmd.data2 = len * 0x1; 1490 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1491 if (status != 0) { 1492 test = "write"; 1493 goto abort; 1494 } 1495 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 1496 1497 cmd.data0 = ntohl(dmabench_dma.low); 1498 cmd.data1 = ntohl(dmabench_dma.high); 1499 cmd.data2 = len * 0x10001; 1500 status = myri10ge_send_cmd(mgp, test_type, &cmd); 1501 if (status != 0) { 1502 test = "read/write"; 1503 goto abort; 1504 } 1505 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 1506 (cmd.data0 & 0xffff); 1507 1508 1509 abort: 1510 myri10ge_dma_free(&dmabench_dma); 1511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 1512 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name, 1513 test); 1514 return (status); 1515 } 1516 1517 static int 1518 myri10ge_reset(struct myri10ge_priv *mgp) 1519 { 1520 myri10ge_cmd_t cmd; 1521 struct myri10ge_nic_stat *ethstat; 1522 struct myri10ge_slice_state *ss; 1523 int i, status; 1524 size_t bytes; 1525 1526 /* send a reset command to the card to see if it is alive */ 1527 (void) memset(&cmd, 0, sizeof (cmd)); 1528 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 1529 if (status != 0) { 1530 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 1531 return (ENXIO); 1532 } 1533 1534 /* Now exchange information about interrupts */ 1535 1536 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry); 1537 cmd.data0 = (uint32_t)bytes; 1538 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1539 1540 /* 1541 * Even though we already know how many slices are supported 1542 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1543 * has magic side effects, and must be called after a reset. 1544 * It must be called prior to calling any RSS related cmds, 1545 * including assigning an interrupt queue for anything but 1546 * slice 0. It must also be called *after* 1547 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1548 * the firmware to compute offsets. 1549 */ 1550 1551 if (mgp->num_slices > 1) { 1552 1553 /* ask the maximum number of slices it supports */ 1554 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1555 &cmd); 1556 if (status != 0) { 1557 cmn_err(CE_WARN, 1558 "%s: failed to get number of slices\n", 1559 mgp->name); 1560 return (status); 1561 } 1562 1563 /* 1564 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1565 * to setting up the interrupt queue DMA 1566 */ 1567 1568 cmd.data0 = mgp->num_slices; 1569 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | 1570 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1571 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1572 &cmd); 1573 if (status != 0) { 1574 cmn_err(CE_WARN, 1575 "%s: failed to set number of slices\n", 1576 mgp->name); 1577 return (status); 1578 } 1579 } 1580 for (i = 0; i < mgp->num_slices; i++) { 1581 ss = &mgp->ss[i]; 1582 cmd.data0 = ntohl(ss->rx_done.dma.low); 1583 cmd.data1 = ntohl(ss->rx_done.dma.high); 1584 cmd.data2 = i; 1585 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1586 &cmd); 1587 }; 1588 1589 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1590 for (i = 0; i < mgp->num_slices; i++) { 1591 ss = &mgp->ss[i]; 1592 ss->irq_claim = (volatile unsigned int *) 1593 (void *)(mgp->sram + cmd.data0 + 8 * i); 1594 } 1595 1596 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 1597 status |= myri10ge_send_cmd(mgp, 1598 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1599 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1600 } 1601 1602 status |= myri10ge_send_cmd(mgp, 1603 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1604 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0); 1605 1606 if (status != 0) { 1607 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n", 1608 mgp->name); 1609 return (status); 1610 } 1611 1612 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 1613 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1614 1615 /* reset mcp/driver shared state back to 0 */ 1616 1617 for (i = 0; i < mgp->num_slices; i++) { 1618 ss = &mgp->ss[i]; 1619 bytes = mgp->max_intr_slots * 1620 sizeof (*mgp->ss[0].rx_done.entry); 1621 (void) memset(ss->rx_done.entry, 0, bytes); 1622 ss->tx.req = 0; 1623 ss->tx.done = 0; 1624 ss->tx.pkt_done = 0; 1625 ss->rx_big.cnt = 0; 1626 ss->rx_small.cnt = 0; 1627 ss->rx_done.idx = 0; 1628 ss->rx_done.cnt = 0; 1629 ss->rx_token = 0; 1630 ss->tx.watchdog_done = 0; 1631 ss->tx.watchdog_req = 0; 1632 ss->tx.active = 0; 1633 ss->tx.activate = 0; 1634 } 1635 mgp->watchdog_rx_pause = 0; 1636 if (mgp->ksp_stat != NULL) { 1637 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data; 1638 ethstat->link_changes.value.ul = 0; 1639 } 1640 status = myri10ge_m_unicst(mgp, mgp->mac_addr); 1641 myri10ge_change_promisc(mgp, 0); 1642 (void) myri10ge_change_pause(mgp, mgp->pause); 1643 return (status); 1644 } 1645 1646 static int 1647 myri10ge_init_toeplitz(struct myri10ge_priv *mgp) 1648 { 1649 myri10ge_cmd_t cmd; 1650 int i, b, s, t, j; 1651 int status; 1652 uint32_t k[8]; 1653 uint32_t tmp; 1654 uint8_t *key; 1655 1656 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET, 1657 &cmd); 1658 if (status != 0) { 1659 cmn_err(CE_WARN, "%s: failed to get rss key\n", 1660 mgp->name); 1661 return (EIO); 1662 } 1663 myri10ge_pio_copy32(mgp->rss_key, 1664 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0), 1665 sizeof (mgp->rss_key)); 1666 1667 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256, 1668 KM_SLEEP); 1669 key = (uint8_t *)mgp->rss_key; 1670 t = 0; 1671 for (b = 0; b < 12; b++) { 1672 for (s = 0; s < 8; s++) { 1673 /* Bits: b*8+s, ..., b*8+s+31 */ 1674 k[s] = 0; 1675 for (j = 0; j < 32; j++) { 1676 int bit = b*8+s+j; 1677 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7))); 1678 k[s] |= bit << (31 - j); 1679 } 1680 } 1681 1682 for (i = 0; i <= 0xff; i++) { 1683 tmp = 0; 1684 if (i & (1 << 7)) { tmp ^= k[0]; } 1685 if (i & (1 << 6)) { tmp ^= k[1]; } 1686 if (i & (1 << 5)) { tmp ^= k[2]; } 1687 if (i & (1 << 4)) { tmp ^= k[3]; } 1688 if (i & (1 << 3)) { tmp ^= k[4]; } 1689 if (i & (1 << 2)) { tmp ^= k[5]; } 1690 if (i & (1 << 1)) { tmp ^= k[6]; } 1691 if (i & (1 << 0)) { tmp ^= k[7]; } 1692 mgp->toeplitz_hash_table[t++] = tmp; 1693 } 1694 } 1695 return (0); 1696 } 1697 1698 static inline struct myri10ge_slice_state * 1699 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1700 { 1701 struct tcphdr *hdr; 1702 uint32_t saddr, daddr; 1703 uint32_t hash, slice; 1704 uint32_t *table = mgp->toeplitz_hash_table; 1705 uint16_t src, dst; 1706 1707 /* 1708 * Note hashing order is reversed from how it is done 1709 * in the NIC, so as to generate the same hash value 1710 * for the connection to try to keep connections CPU local 1711 */ 1712 1713 /* hash on IPv4 src/dst address */ 1714 saddr = ntohl(ip->ip_src.s_addr); 1715 daddr = ntohl(ip->ip_dst.s_addr); 1716 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)]; 1717 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)]; 1718 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)]; 1719 hash ^= table[(256 * 3) + ((daddr) & 0xff)]; 1720 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)]; 1721 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)]; 1722 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)]; 1723 hash ^= table[(256 * 7) + ((saddr) & 0xff)]; 1724 /* hash on TCP port, if required */ 1725 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) && 1726 ip->ip_p == IPPROTO_TCP) { 1727 hdr = (struct tcphdr *)(void *) 1728 (((uint8_t *)ip) + (ip->ip_hl << 2)); 1729 src = ntohs(hdr->th_sport); 1730 dst = ntohs(hdr->th_dport); 1731 1732 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)]; 1733 hash ^= table[(256 * 9) + ((dst) & 0xff)]; 1734 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)]; 1735 hash ^= table[(256 * 11) + ((src) & 0xff)]; 1736 } 1737 slice = (mgp->num_slices - 1) & hash; 1738 return (&mgp->ss[slice]); 1739 1740 } 1741 1742 static inline struct myri10ge_slice_state * 1743 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip) 1744 { 1745 struct tcphdr *hdr; 1746 uint32_t slice, hash_val; 1747 1748 1749 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) { 1750 return (&mgp->ss[0]); 1751 } 1752 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2)); 1753 1754 /* 1755 * Use the second byte of the *destination* address for 1756 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing 1757 */ 1758 hash_val = ntohs(hdr->th_dport) & 0xff; 1759 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT) 1760 hash_val += ntohs(hdr->th_sport) & 0xff; 1761 1762 slice = (mgp->num_slices - 1) & hash_val; 1763 return (&mgp->ss[slice]); 1764 } 1765 1766 static inline struct myri10ge_slice_state * 1767 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp) 1768 { 1769 unsigned int slice = 0; 1770 struct ether_header *eh; 1771 struct ether_vlan_header *vh; 1772 struct ip *ip; 1773 int ehl, ihl; 1774 1775 if (mgp->num_slices == 1) 1776 return (&mgp->ss[0]); 1777 1778 if (myri10ge_tx_hash == 0) { 1779 slice = CPU->cpu_id & (mgp->num_slices - 1); 1780 return (&mgp->ss[slice]); 1781 } 1782 1783 /* 1784 * ensure it is a TCP or UDP over IPv4 packet, and that the 1785 * headers are in the 1st mblk. Otherwise, punt 1786 */ 1787 ehl = sizeof (*eh); 1788 ihl = sizeof (*ip); 1789 if ((MBLKL(mp)) < (ehl + ihl + 8)) 1790 return (&mgp->ss[0]); 1791 eh = (struct ether_header *)(void *)mp->b_rptr; 1792 ip = (struct ip *)(void *)(eh + 1); 1793 if (eh->ether_type != BE_16(ETHERTYPE_IP)) { 1794 if (eh->ether_type != BE_16(ETHERTYPE_VLAN)) 1795 return (&mgp->ss[0]); 1796 vh = (struct ether_vlan_header *)(void *)mp->b_rptr; 1797 if (vh->ether_type != BE_16(ETHERTYPE_IP)) 1798 return (&mgp->ss[0]); 1799 ehl += 4; 1800 ip = (struct ip *)(void *)(vh + 1); 1801 } 1802 ihl = ip->ip_hl << 2; 1803 if (MBLKL(mp) < (ehl + ihl + 8)) 1804 return (&mgp->ss[0]); 1805 switch (myri10ge_rss_hash) { 1806 case MXGEFW_RSS_HASH_TYPE_IPV4: 1807 /* fallthru */ 1808 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4: 1809 /* fallthru */ 1810 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4): 1811 return (myri10ge_toeplitz_send_hash(mgp, ip)); 1812 case MXGEFW_RSS_HASH_TYPE_SRC_PORT: 1813 /* fallthru */ 1814 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT: 1815 return (myri10ge_simple_send_hash(mgp, ip)); 1816 default: 1817 break; 1818 } 1819 return (&mgp->ss[0]); 1820 } 1821 1822 static int 1823 myri10ge_setup_slice(struct myri10ge_slice_state *ss) 1824 { 1825 struct myri10ge_priv *mgp = ss->mgp; 1826 myri10ge_cmd_t cmd; 1827 int tx_ring_size, rx_ring_size; 1828 int tx_ring_entries, rx_ring_entries; 1829 int slice, status; 1830 int allocated, idx; 1831 size_t bytes; 1832 1833 slice = ss - mgp->ss; 1834 cmd.data0 = slice; 1835 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 1836 tx_ring_size = cmd.data0; 1837 cmd.data0 = slice; 1838 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 1839 if (status != 0) 1840 return (status); 1841 rx_ring_size = cmd.data0; 1842 1843 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send); 1844 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr); 1845 ss->tx.mask = tx_ring_entries - 1; 1846 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 1847 1848 /* get the lanai pointers to the send and receive rings */ 1849 1850 cmd.data0 = slice; 1851 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 1852 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0); 1853 if (mgp->num_slices > 1) { 1854 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice; 1855 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP + 1856 64 * slice; 1857 } else { 1858 ss->tx.go = NULL; 1859 ss->tx.stop = NULL; 1860 } 1861 1862 cmd.data0 = slice; 1863 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 1864 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *) 1865 (void *)(mgp->sram + cmd.data0); 1866 1867 cmd.data0 = slice; 1868 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 1869 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *) 1870 (mgp->sram + cmd.data0); 1871 1872 if (status != 0) { 1873 cmn_err(CE_WARN, 1874 "%s: failed to get ring sizes or locations\n", mgp->name); 1875 return (status); 1876 } 1877 1878 status = ENOMEM; 1879 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 1880 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP); 1881 if (ss->rx_small.shadow == NULL) 1882 goto abort; 1883 (void) memset(ss->rx_small.shadow, 0, bytes); 1884 1885 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 1886 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP); 1887 if (ss->rx_big.shadow == NULL) 1888 goto abort_with_rx_small_shadow; 1889 (void) memset(ss->rx_big.shadow, 0, bytes); 1890 1891 /* allocate the host info rings */ 1892 1893 bytes = tx_ring_entries * sizeof (*ss->tx.info); 1894 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP); 1895 if (ss->tx.info == NULL) 1896 goto abort_with_rx_big_shadow; 1897 (void) memset(ss->tx.info, 0, bytes); 1898 1899 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 1900 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP); 1901 if (ss->rx_small.info == NULL) 1902 goto abort_with_tx_info; 1903 (void) memset(ss->rx_small.info, 0, bytes); 1904 1905 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 1906 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP); 1907 if (ss->rx_big.info == NULL) 1908 goto abort_with_rx_small_info; 1909 (void) memset(ss->rx_big.info, 0, bytes); 1910 1911 ss->tx.stall = ss->tx.sched = 0; 1912 ss->tx.stall_early = ss->tx.stall_late = 0; 1913 1914 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) / 1915 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD)); 1916 1917 allocated = myri10ge_add_jbufs(ss, 1918 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1); 1919 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) { 1920 cmn_err(CE_WARN, 1921 "%s: Could not allocate enough receive buffers (%d/%d)\n", 1922 mgp->name, allocated, 1923 myri10ge_bigbufs_initial + ss->jbufs_for_smalls); 1924 goto abort_with_jumbos; 1925 } 1926 1927 myri10ge_carve_up_jbufs_into_small_ring(ss); 1928 ss->j_rx_cnt = 0; 1929 1930 mutex_enter(&ss->jpool.mtx); 1931 if (allocated < rx_ring_entries) 1932 ss->jpool.low_water = allocated / 4; 1933 else 1934 ss->jpool.low_water = rx_ring_entries / 2; 1935 1936 /* 1937 * invalidate the big receive ring in case we do not 1938 * allocate sufficient jumbos to fill it 1939 */ 1940 (void) memset(ss->rx_big.shadow, 1, 1941 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0])); 1942 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) { 1943 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7], 1944 &ss->rx_big.shadow[idx - 7]); 1945 mb(); 1946 } 1947 1948 1949 myri10ge_restock_jumbos(ss); 1950 1951 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) { 1952 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7], 1953 &ss->rx_small.shadow[idx - 7]); 1954 mb(); 1955 } 1956 ss->rx_small.cnt = ss->rx_small.mask + 1; 1957 1958 mutex_exit(&ss->jpool.mtx); 1959 1960 status = myri10ge_prepare_tx_ring(ss); 1961 1962 if (status != 0) 1963 goto abort_with_small_jbufs; 1964 1965 cmd.data0 = ntohl(ss->fw_stats_dma.low); 1966 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1967 cmd.data2 = sizeof (mcp_irq_data_t); 1968 cmd.data2 |= (slice << 16); 1969 bzero(ss->fw_stats, sizeof (*ss->fw_stats)); 1970 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 1971 if (status == ENOSYS) { 1972 cmd.data0 = ntohl(ss->fw_stats_dma.low) + 1973 offsetof(mcp_irq_data_t, send_done_count); 1974 cmd.data1 = ntohl(ss->fw_stats_dma.high); 1975 status = myri10ge_send_cmd(mgp, 1976 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd); 1977 } 1978 if (status) { 1979 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name); 1980 goto abort_with_tx; 1981 } 1982 1983 return (0); 1984 1985 abort_with_tx: 1986 myri10ge_unprepare_tx_ring(ss); 1987 1988 abort_with_small_jbufs: 1989 myri10ge_release_small_jbufs(ss); 1990 1991 abort_with_jumbos: 1992 if (allocated != 0) { 1993 mutex_enter(&ss->jpool.mtx); 1994 ss->jpool.low_water = 0; 1995 mutex_exit(&ss->jpool.mtx); 1996 myri10ge_unstock_jumbos(ss); 1997 myri10ge_remove_jbufs(ss); 1998 } 1999 2000 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2001 kmem_free(ss->rx_big.info, bytes); 2002 2003 abort_with_rx_small_info: 2004 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2005 kmem_free(ss->rx_small.info, bytes); 2006 2007 abort_with_tx_info: 2008 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2009 kmem_free(ss->tx.info, bytes); 2010 2011 abort_with_rx_big_shadow: 2012 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2013 kmem_free(ss->rx_big.shadow, bytes); 2014 2015 abort_with_rx_small_shadow: 2016 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2017 kmem_free(ss->rx_small.shadow, bytes); 2018 abort: 2019 return (status); 2020 2021 } 2022 2023 static void 2024 myri10ge_teardown_slice(struct myri10ge_slice_state *ss) 2025 { 2026 int tx_ring_entries, rx_ring_entries; 2027 size_t bytes; 2028 2029 /* ignore slices that have not been fully setup */ 2030 if (ss->tx.cp == NULL) 2031 return; 2032 /* Free the TX copy buffers */ 2033 myri10ge_unprepare_tx_ring(ss); 2034 2035 /* stop passing returned buffers to firmware */ 2036 2037 mutex_enter(&ss->jpool.mtx); 2038 ss->jpool.low_water = 0; 2039 mutex_exit(&ss->jpool.mtx); 2040 myri10ge_release_small_jbufs(ss); 2041 2042 /* Release the free jumbo frame pool */ 2043 myri10ge_unstock_jumbos(ss); 2044 myri10ge_remove_jbufs(ss); 2045 2046 rx_ring_entries = ss->rx_big.mask + 1; 2047 tx_ring_entries = ss->tx.mask + 1; 2048 2049 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2050 kmem_free(ss->rx_big.info, bytes); 2051 2052 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2053 kmem_free(ss->rx_small.info, bytes); 2054 2055 bytes = tx_ring_entries * sizeof (*ss->tx.info); 2056 kmem_free(ss->tx.info, bytes); 2057 2058 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2059 kmem_free(ss->rx_big.shadow, bytes); 2060 2061 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2062 kmem_free(ss->rx_small.shadow, bytes); 2063 2064 } 2065 static int 2066 myri10ge_start_locked(struct myri10ge_priv *mgp) 2067 { 2068 myri10ge_cmd_t cmd; 2069 int status, big_pow2, i; 2070 volatile uint8_t *itable; 2071 2072 status = DDI_SUCCESS; 2073 /* Allocate DMA resources and receive buffers */ 2074 2075 status = myri10ge_reset(mgp); 2076 if (status != 0) { 2077 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 2078 return (DDI_FAILURE); 2079 } 2080 2081 if (mgp->num_slices > 1) { 2082 cmd.data0 = mgp->num_slices; 2083 cmd.data1 = 1; /* use MSI-X */ 2084 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2085 &cmd); 2086 if (status != 0) { 2087 cmn_err(CE_WARN, 2088 "%s: failed to set number of slices\n", 2089 mgp->name); 2090 goto abort_with_nothing; 2091 } 2092 /* setup the indirection table */ 2093 cmd.data0 = mgp->num_slices; 2094 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2095 &cmd); 2096 2097 status |= myri10ge_send_cmd(mgp, 2098 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 2099 if (status != 0) { 2100 cmn_err(CE_WARN, 2101 "%s: failed to setup rss tables\n", mgp->name); 2102 } 2103 2104 /* just enable an identity mapping */ 2105 itable = mgp->sram + cmd.data0; 2106 for (i = 0; i < mgp->num_slices; i++) 2107 itable[i] = (uint8_t)i; 2108 2109 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) { 2110 status = myri10ge_init_toeplitz(mgp); 2111 if (status != 0) { 2112 cmn_err(CE_WARN, "%s: failed to setup " 2113 "toeplitz tx hash table", mgp->name); 2114 goto abort_with_nothing; 2115 } 2116 } 2117 cmd.data0 = 1; 2118 cmd.data1 = myri10ge_rss_hash; 2119 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2120 &cmd); 2121 if (status != 0) { 2122 cmn_err(CE_WARN, 2123 "%s: failed to enable slices\n", mgp->name); 2124 goto abort_with_toeplitz; 2125 } 2126 } 2127 2128 for (i = 0; i < mgp->num_slices; i++) { 2129 status = myri10ge_setup_slice(&mgp->ss[i]); 2130 if (status != 0) 2131 goto abort_with_slices; 2132 } 2133 2134 /* 2135 * Tell the MCP how many buffers it has, and to 2136 * bring the ethernet interface up 2137 * 2138 * Firmware needs the big buff size as a power of 2. Lie and 2139 * tell it the buffer is larger, because we only use 1 2140 * buffer/pkt, and the mtu will prevent overruns 2141 */ 2142 big_pow2 = myri10ge_mtu + MXGEFW_PAD; 2143 while (!ISP2(big_pow2)) 2144 big_pow2++; 2145 2146 /* now give firmware buffers sizes, and MTU */ 2147 cmd.data0 = myri10ge_mtu; 2148 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd); 2149 cmd.data0 = myri10ge_small_bytes; 2150 status |= 2151 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 2152 cmd.data0 = big_pow2; 2153 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2154 if (status) { 2155 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name); 2156 goto abort_with_slices; 2157 } 2158 2159 2160 cmd.data0 = 1; 2161 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd); 2162 if (status) { 2163 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n", 2164 mgp->name, status); 2165 } else { 2166 mgp->features |= MYRI10GE_TSO; 2167 } 2168 2169 mgp->link_state = -1; 2170 mgp->rdma_tags_available = 15; 2171 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd); 2172 if (status) { 2173 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name); 2174 goto abort_with_slices; 2175 } 2176 mgp->running = MYRI10GE_ETH_RUNNING; 2177 return (DDI_SUCCESS); 2178 2179 abort_with_slices: 2180 for (i = 0; i < mgp->num_slices; i++) 2181 myri10ge_teardown_slice(&mgp->ss[i]); 2182 2183 mgp->running = MYRI10GE_ETH_STOPPED; 2184 2185 abort_with_toeplitz: 2186 if (mgp->toeplitz_hash_table != NULL) { 2187 kmem_free(mgp->toeplitz_hash_table, 2188 sizeof (uint32_t) * 12 * 256); 2189 mgp->toeplitz_hash_table = NULL; 2190 } 2191 2192 abort_with_nothing: 2193 return (DDI_FAILURE); 2194 } 2195 2196 static void 2197 myri10ge_stop_locked(struct myri10ge_priv *mgp) 2198 { 2199 int status, old_down_cnt; 2200 myri10ge_cmd_t cmd; 2201 int wait_time = 10; 2202 int i, polling; 2203 2204 old_down_cnt = mgp->down_cnt; 2205 mb(); 2206 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2207 if (status) { 2208 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 2209 } 2210 2211 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2212 delay(1 * drv_usectohz(1000000)); 2213 wait_time--; 2214 if (wait_time == 0) 2215 break; 2216 } 2217 again: 2218 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2219 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name); 2220 for (i = 0; i < mgp->num_slices; i++) { 2221 /* 2222 * take and release the rx lock to ensure 2223 * that no interrupt thread is blocked 2224 * elsewhere in the stack, preventing 2225 * completion 2226 */ 2227 2228 mutex_enter(&mgp->ss[i].rx_lock); 2229 printf("%s: slice %d rx irq idle\n", 2230 mgp->name, i); 2231 mutex_exit(&mgp->ss[i].rx_lock); 2232 2233 /* verify that the poll handler is inactive */ 2234 mutex_enter(&mgp->ss->poll_lock); 2235 polling = mgp->ss->rx_polling; 2236 mutex_exit(&mgp->ss->poll_lock); 2237 if (polling) { 2238 printf("%s: slice %d is polling\n", 2239 mgp->name, i); 2240 delay(1 * drv_usectohz(1000000)); 2241 goto again; 2242 } 2243 } 2244 delay(1 * drv_usectohz(1000000)); 2245 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) { 2246 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name); 2247 } 2248 } 2249 2250 for (i = 0; i < mgp->num_slices; i++) 2251 myri10ge_teardown_slice(&mgp->ss[i]); 2252 2253 if (mgp->toeplitz_hash_table != NULL) { 2254 kmem_free(mgp->toeplitz_hash_table, 2255 sizeof (uint32_t) * 12 * 256); 2256 mgp->toeplitz_hash_table = NULL; 2257 } 2258 mgp->running = MYRI10GE_ETH_STOPPED; 2259 } 2260 2261 static int 2262 myri10ge_m_start(void *arg) 2263 { 2264 struct myri10ge_priv *mgp = arg; 2265 int status; 2266 2267 mutex_enter(&mgp->intrlock); 2268 2269 if (mgp->running != MYRI10GE_ETH_STOPPED) { 2270 mutex_exit(&mgp->intrlock); 2271 return (DDI_FAILURE); 2272 } 2273 status = myri10ge_start_locked(mgp); 2274 mutex_exit(&mgp->intrlock); 2275 2276 if (status != DDI_SUCCESS) 2277 return (status); 2278 2279 /* start the watchdog timer */ 2280 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 2281 mgp->timer_ticks); 2282 return (DDI_SUCCESS); 2283 2284 } 2285 2286 static void 2287 myri10ge_m_stop(void *arg) 2288 { 2289 struct myri10ge_priv *mgp = arg; 2290 2291 mutex_enter(&mgp->intrlock); 2292 /* if the device not running give up */ 2293 if (mgp->running != MYRI10GE_ETH_RUNNING) { 2294 mutex_exit(&mgp->intrlock); 2295 return; 2296 } 2297 2298 mgp->running = MYRI10GE_ETH_STOPPING; 2299 mutex_exit(&mgp->intrlock); 2300 (void) untimeout(mgp->timer_id); 2301 mutex_enter(&mgp->intrlock); 2302 myri10ge_stop_locked(mgp); 2303 mutex_exit(&mgp->intrlock); 2304 2305 } 2306 2307 static inline void 2308 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum) 2309 { 2310 struct ether_header *eh; 2311 struct ip *ip; 2312 struct ip6_hdr *ip6; 2313 uint32_t start, stuff, end, partial, hdrlen; 2314 2315 2316 csum = ntohs((uint16_t)csum); 2317 eh = (struct ether_header *)(void *)mp->b_rptr; 2318 hdrlen = sizeof (*eh); 2319 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2320 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2321 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2322 s->brdcstrcv++; 2323 else 2324 s->multircv++; 2325 } 2326 2327 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 2328 /* 2329 * fix checksum by subtracting 4 bytes after what the 2330 * firmware thought was the end of the ether hdr 2331 */ 2332 partial = *(uint32_t *) 2333 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE); 2334 csum += ~partial; 2335 csum += (csum < ~partial); 2336 csum = (csum >> 16) + (csum & 0xFFFF); 2337 csum = (csum >> 16) + (csum & 0xFFFF); 2338 hdrlen += VLAN_TAGSZ; 2339 } 2340 2341 if (eh->ether_type == BE_16(ETHERTYPE_IP)) { 2342 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen); 2343 start = ip->ip_hl << 2; 2344 2345 if (ip->ip_p == IPPROTO_TCP) 2346 stuff = start + offsetof(struct tcphdr, th_sum); 2347 else if (ip->ip_p == IPPROTO_UDP) 2348 stuff = start + offsetof(struct udphdr, uh_sum); 2349 else 2350 return; 2351 end = ntohs(ip->ip_len); 2352 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) { 2353 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen); 2354 start = sizeof (*ip6); 2355 if (ip6->ip6_nxt == IPPROTO_TCP) { 2356 stuff = start + offsetof(struct tcphdr, th_sum); 2357 } else if (ip6->ip6_nxt == IPPROTO_UDP) 2358 stuff = start + offsetof(struct udphdr, uh_sum); 2359 else 2360 return; 2361 end = start + ntohs(ip6->ip6_plen); 2362 /* 2363 * IPv6 headers do not contain a checksum, and hence 2364 * do not checksum to zero, so they don't "fall out" 2365 * of the partial checksum calculation like IPv4 2366 * headers do. We need to fix the partial checksum by 2367 * subtracting the checksum of the IPv6 header. 2368 */ 2369 2370 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6)); 2371 csum += ~partial; 2372 csum += (csum < ~partial); 2373 csum = (csum >> 16) + (csum & 0xFFFF); 2374 csum = (csum >> 16) + (csum & 0xFFFF); 2375 } else { 2376 return; 2377 } 2378 2379 if (MBLKL(mp) > hdrlen + end) { 2380 /* padded frame, so hw csum may be invalid */ 2381 return; 2382 } 2383 2384 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM); 2385 } 2386 2387 static mblk_t * 2388 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len, 2389 uint32_t csum) 2390 { 2391 mblk_t *mp; 2392 myri10ge_rx_ring_t *rx; 2393 int idx; 2394 2395 rx = &ss->rx_small; 2396 idx = rx->cnt & rx->mask; 2397 ss->rx_small.cnt++; 2398 2399 /* allocate a new buffer to pass up the stack */ 2400 mp = allocb(len + MXGEFW_PAD, 0); 2401 if (mp == NULL) { 2402 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf); 2403 goto abort; 2404 } 2405 bcopy(ss->rx_small.info[idx].ptr, 2406 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2407 mp->b_wptr += len + MXGEFW_PAD; 2408 mp->b_rptr += MXGEFW_PAD; 2409 2410 ss->rx_stats.ibytes += len; 2411 ss->rx_stats.ipackets += 1; 2412 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2413 2414 abort: 2415 if ((idx & 7) == 7) { 2416 myri10ge_submit_8rx(&rx->lanai[idx - 7], 2417 &rx->shadow[idx - 7]); 2418 } 2419 2420 return (mp); 2421 } 2422 2423 2424 static mblk_t * 2425 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len, 2426 uint32_t csum) 2427 { 2428 struct myri10ge_jpool_stuff *jpool; 2429 struct myri10ge_jpool_entry *j; 2430 mblk_t *mp; 2431 int idx, num_owned_by_mcp; 2432 2433 jpool = &ss->jpool; 2434 idx = ss->j_rx_cnt & ss->rx_big.mask; 2435 j = ss->rx_big.info[idx].j; 2436 2437 if (j == NULL) { 2438 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n", 2439 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt); 2440 return (NULL); 2441 } 2442 2443 2444 ss->rx_big.info[idx].j = NULL; 2445 ss->j_rx_cnt++; 2446 2447 2448 /* 2449 * Check to see if we are low on rx buffers. 2450 * Note that we must leave at least 8 free so there are 2451 * enough to free in a single 64-byte write. 2452 */ 2453 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2454 if (num_owned_by_mcp < jpool->low_water) { 2455 mutex_enter(&jpool->mtx); 2456 myri10ge_restock_jumbos(ss); 2457 mutex_exit(&jpool->mtx); 2458 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt; 2459 /* if we are still low, then we have to copy */ 2460 if (num_owned_by_mcp < 16) { 2461 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy); 2462 /* allocate a new buffer to pass up the stack */ 2463 mp = allocb(len + MXGEFW_PAD, 0); 2464 if (mp == NULL) { 2465 goto abort; 2466 } 2467 bcopy(j->buf, 2468 (caddr_t)mp->b_wptr, len + MXGEFW_PAD); 2469 myri10ge_jfree_rtn(j); 2470 /* push buffer back to NIC */ 2471 mutex_enter(&jpool->mtx); 2472 myri10ge_restock_jumbos(ss); 2473 mutex_exit(&jpool->mtx); 2474 goto set_len; 2475 } 2476 } 2477 2478 /* loan our buffer to the stack */ 2479 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func); 2480 if (mp == NULL) { 2481 goto abort; 2482 } 2483 2484 set_len: 2485 mp->b_rptr += MXGEFW_PAD; 2486 mp->b_wptr = ((unsigned char *) mp->b_rptr + len); 2487 2488 ss->rx_stats.ibytes += len; 2489 ss->rx_stats.ipackets += 1; 2490 myri10ge_rx_csum(mp, &ss->rx_stats, csum); 2491 2492 return (mp); 2493 2494 abort: 2495 myri10ge_jfree_rtn(j); 2496 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf); 2497 return (NULL); 2498 } 2499 2500 /* 2501 * Free all transmit buffers up until the specified index 2502 */ 2503 static inline void 2504 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index) 2505 { 2506 myri10ge_tx_ring_t *tx; 2507 struct myri10ge_tx_dma_handle_head handles; 2508 int idx; 2509 int limit = 0; 2510 2511 tx = &ss->tx; 2512 handles.head = NULL; 2513 handles.tail = NULL; 2514 while (tx->pkt_done != (int)mcp_index) { 2515 idx = tx->done & tx->mask; 2516 2517 /* 2518 * mblk & DMA handle attached only to first slot 2519 * per buffer in the packet 2520 */ 2521 2522 if (tx->info[idx].m) { 2523 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h); 2524 tx->info[idx].handle->next = handles.head; 2525 handles.head = tx->info[idx].handle; 2526 if (handles.tail == NULL) 2527 handles.tail = tx->info[idx].handle; 2528 freeb(tx->info[idx].m); 2529 tx->info[idx].m = 0; 2530 tx->info[idx].handle = 0; 2531 } 2532 if (tx->info[idx].ostat.opackets != 0) { 2533 tx->stats.multixmt += tx->info[idx].ostat.multixmt; 2534 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt; 2535 tx->stats.obytes += tx->info[idx].ostat.obytes; 2536 tx->stats.opackets += tx->info[idx].ostat.opackets; 2537 tx->info[idx].stat.un.all = 0; 2538 tx->pkt_done++; 2539 } 2540 2541 tx->done++; 2542 /* 2543 * if we stalled the queue, wake it. But Wait until 2544 * we have at least 1/2 our slots free. 2545 */ 2546 if ((tx->req - tx->done) < (tx->mask >> 1) && 2547 tx->stall != tx->sched) { 2548 mutex_enter(&ss->tx.lock); 2549 tx->sched = tx->stall; 2550 mutex_exit(&ss->tx.lock); 2551 mac_tx_ring_update(ss->mgp->mh, tx->rh); 2552 } 2553 2554 /* limit potential for livelock */ 2555 if (unlikely(++limit > 2 * tx->mask)) 2556 break; 2557 } 2558 if (tx->req == tx->done && tx->stop != NULL) { 2559 /* 2560 * Nic has sent all pending requests, allow it 2561 * to stop polling this queue 2562 */ 2563 mutex_enter(&tx->lock); 2564 if (tx->req == tx->done && tx->active) { 2565 *(int *)(void *)tx->stop = 1; 2566 tx->active = 0; 2567 mb(); 2568 } 2569 mutex_exit(&tx->lock); 2570 } 2571 if (handles.head != NULL) 2572 myri10ge_free_tx_handles(tx, &handles); 2573 } 2574 2575 static void 2576 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl) 2577 { 2578 mbl->head = NULL; 2579 mbl->tail = &mbl->head; 2580 mbl->cnt = 0; 2581 } 2582 2583 /*ARGSUSED*/ 2584 void 2585 myri10ge_mbl_append(struct myri10ge_slice_state *ss, 2586 struct myri10ge_mblk_list *mbl, mblk_t *mp) 2587 { 2588 *(mbl->tail) = mp; 2589 mbl->tail = &mp->b_next; 2590 mp->b_next = NULL; 2591 mbl->cnt++; 2592 } 2593 2594 2595 static inline void 2596 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, 2597 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop) 2598 { 2599 myri10ge_rx_done_t *rx_done = &ss->rx_done; 2600 struct myri10ge_priv *mgp = ss->mgp; 2601 mblk_t *mp; 2602 struct lro_entry *lro; 2603 uint16_t length; 2604 uint16_t checksum; 2605 2606 2607 while (rx_done->entry[rx_done->idx].length != 0) { 2608 if (unlikely (*stop)) { 2609 break; 2610 } 2611 length = ntohs(rx_done->entry[rx_done->idx].length); 2612 length &= (~MXGEFW_RSS_HASH_MASK); 2613 2614 /* limit potential for livelock */ 2615 limit -= length; 2616 if (unlikely(limit < 0)) 2617 break; 2618 2619 rx_done->entry[rx_done->idx].length = 0; 2620 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2621 if (length <= myri10ge_small_bytes) 2622 mp = myri10ge_rx_done_small(ss, length, checksum); 2623 else 2624 mp = myri10ge_rx_done_big(ss, length, checksum); 2625 if (mp != NULL) { 2626 if (!myri10ge_lro || 2627 0 != myri10ge_lro_rx(ss, mp, checksum, mbl)) 2628 myri10ge_mbl_append(ss, mbl, mp); 2629 } 2630 rx_done->cnt++; 2631 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1); 2632 } 2633 while (ss->lro_active != NULL) { 2634 lro = ss->lro_active; 2635 ss->lro_active = lro->next; 2636 myri10ge_lro_flush(ss, lro, mbl); 2637 } 2638 } 2639 2640 static void 2641 myri10ge_intr_rx(struct myri10ge_slice_state *ss) 2642 { 2643 uint64_t gen; 2644 struct myri10ge_mblk_list mbl; 2645 2646 myri10ge_mbl_init(&mbl); 2647 if (mutex_tryenter(&ss->rx_lock) == 0) 2648 return; 2649 gen = ss->rx_gen_num; 2650 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL, 2651 &ss->rx_polling); 2652 if (mbl.head != NULL) 2653 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen); 2654 mutex_exit(&ss->rx_lock); 2655 2656 } 2657 2658 static mblk_t * 2659 myri10ge_poll_rx(void *arg, int bytes) 2660 { 2661 struct myri10ge_slice_state *ss = arg; 2662 struct myri10ge_mblk_list mbl; 2663 boolean_t dummy = B_FALSE; 2664 2665 if (bytes == 0) 2666 return (NULL); 2667 2668 myri10ge_mbl_init(&mbl); 2669 mutex_enter(&ss->rx_lock); 2670 if (ss->rx_polling) 2671 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy); 2672 else 2673 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss - 2674 ss->mgp->ss), ss->rx_token, ss->rx_polling); 2675 mutex_exit(&ss->rx_lock); 2676 return (mbl.head); 2677 } 2678 2679 /*ARGSUSED*/ 2680 static uint_t 2681 myri10ge_intr(caddr_t arg0, caddr_t arg1) 2682 { 2683 struct myri10ge_slice_state *ss = 2684 (struct myri10ge_slice_state *)(void *)arg0; 2685 struct myri10ge_priv *mgp = ss->mgp; 2686 mcp_irq_data_t *stats = ss->fw_stats; 2687 myri10ge_tx_ring_t *tx = &ss->tx; 2688 uint32_t send_done_count; 2689 uint8_t valid; 2690 2691 2692 /* make sure the DMA has finished */ 2693 if (!stats->valid) { 2694 return (DDI_INTR_UNCLAIMED); 2695 } 2696 valid = stats->valid; 2697 2698 /* low bit indicates receives are present */ 2699 if (valid & 1) 2700 myri10ge_intr_rx(ss); 2701 2702 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 2703 /* lower legacy IRQ */ 2704 *mgp->irq_deassert = 0; 2705 if (!myri10ge_deassert_wait) 2706 /* don't wait for conf. that irq is low */ 2707 stats->valid = 0; 2708 mb(); 2709 } else { 2710 /* no need to wait for conf. that irq is low */ 2711 stats->valid = 0; 2712 } 2713 2714 do { 2715 /* check for transmit completes and receives */ 2716 send_done_count = ntohl(stats->send_done_count); 2717 if (send_done_count != tx->pkt_done) 2718 myri10ge_tx_done(ss, (int)send_done_count); 2719 } while (*((volatile uint8_t *) &stats->valid)); 2720 2721 if (stats->stats_updated) { 2722 if (mgp->link_state != stats->link_up || stats->link_down) { 2723 mgp->link_state = stats->link_up; 2724 if (stats->link_down) { 2725 mgp->down_cnt += stats->link_down; 2726 mgp->link_state = 0; 2727 } 2728 if (mgp->link_state) { 2729 if (myri10ge_verbose) 2730 printf("%s: link up\n", mgp->name); 2731 mac_link_update(mgp->mh, LINK_STATE_UP); 2732 } else { 2733 if (myri10ge_verbose) 2734 printf("%s: link down\n", mgp->name); 2735 mac_link_update(mgp->mh, LINK_STATE_DOWN); 2736 } 2737 MYRI10GE_NIC_STAT_INC(link_changes); 2738 } 2739 if (mgp->rdma_tags_available != 2740 ntohl(ss->fw_stats->rdma_tags_available)) { 2741 mgp->rdma_tags_available = 2742 ntohl(ss->fw_stats->rdma_tags_available); 2743 cmn_err(CE_NOTE, "%s: RDMA timed out! " 2744 "%d tags left\n", mgp->name, 2745 mgp->rdma_tags_available); 2746 } 2747 } 2748 2749 mb(); 2750 /* check to see if we have rx token to pass back */ 2751 if (valid & 0x1) { 2752 mutex_enter(&ss->poll_lock); 2753 if (ss->rx_polling) { 2754 ss->rx_token = 1; 2755 } else { 2756 *ss->irq_claim = BE_32(3); 2757 ss->rx_token = 0; 2758 } 2759 mutex_exit(&ss->poll_lock); 2760 } 2761 *(ss->irq_claim + 1) = BE_32(3); 2762 return (DDI_INTR_CLAIMED); 2763 } 2764 2765 /* 2766 * Add or remove a multicast address. This is called with our 2767 * macinfo's lock held by GLD, so we do not need to worry about 2768 * our own locking here. 2769 */ 2770 static int 2771 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr) 2772 { 2773 myri10ge_cmd_t cmd; 2774 struct myri10ge_priv *mgp = arg; 2775 int status, join_leave; 2776 2777 if (add) 2778 join_leave = MXGEFW_JOIN_MULTICAST_GROUP; 2779 else 2780 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP; 2781 (void) memcpy(&cmd.data0, multicastaddr, 4); 2782 (void) memcpy(&cmd.data1, multicastaddr + 4, 2); 2783 cmd.data0 = htonl(cmd.data0); 2784 cmd.data1 = htonl(cmd.data1); 2785 status = myri10ge_send_cmd(mgp, join_leave, &cmd); 2786 if (status == 0) 2787 return (0); 2788 2789 cmn_err(CE_WARN, "%s: failed to set multicast address\n", 2790 mgp->name); 2791 return (status); 2792 } 2793 2794 2795 static int 2796 myri10ge_m_promisc(void *arg, boolean_t on) 2797 { 2798 struct myri10ge_priv *mgp = arg; 2799 2800 myri10ge_change_promisc(mgp, on); 2801 return (0); 2802 } 2803 2804 /* 2805 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2806 * backwards one at a time and handle ring wraps 2807 */ 2808 2809 static inline void 2810 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx, 2811 mcp_kreq_ether_send_t *src, int cnt) 2812 { 2813 int idx, starting_slot; 2814 starting_slot = tx->req; 2815 while (cnt > 1) { 2816 cnt--; 2817 idx = (starting_slot + cnt) & tx->mask; 2818 myri10ge_pio_copy(&tx->lanai[idx], 2819 &src[cnt], sizeof (*src)); 2820 mb(); 2821 } 2822 } 2823 2824 /* 2825 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 2826 * at most 32 bytes at a time, so as to avoid involving the software 2827 * pio handler in the nic. We re-write the first segment's flags 2828 * to mark them valid only after writing the entire chain 2829 */ 2830 2831 static inline void 2832 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 2833 int cnt) 2834 { 2835 int idx, i; 2836 uint32_t *src_ints, *dst_ints; 2837 mcp_kreq_ether_send_t *srcp, *dstp, *dst; 2838 uint8_t last_flags; 2839 2840 idx = tx->req & tx->mask; 2841 2842 last_flags = src->flags; 2843 src->flags = 0; 2844 mb(); 2845 dst = dstp = &tx->lanai[idx]; 2846 srcp = src; 2847 2848 if ((idx + cnt) < tx->mask) { 2849 for (i = 0; i < (cnt - 1); i += 2) { 2850 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src)); 2851 mb(); /* force write every 32 bytes */ 2852 srcp += 2; 2853 dstp += 2; 2854 } 2855 } else { 2856 /* 2857 * submit all but the first request, and ensure 2858 * that it is submitted below 2859 */ 2860 myri10ge_submit_req_backwards(tx, src, cnt); 2861 i = 0; 2862 } 2863 if (i < cnt) { 2864 /* submit the first request */ 2865 myri10ge_pio_copy(dstp, srcp, sizeof (*src)); 2866 mb(); /* barrier before setting valid flag */ 2867 } 2868 2869 /* re-write the last 32-bits with the valid flags */ 2870 src->flags |= last_flags; 2871 src_ints = (uint32_t *)src; 2872 src_ints += 3; 2873 dst_ints = (uint32_t *)dst; 2874 dst_ints += 3; 2875 *dst_ints = *src_ints; 2876 tx->req += cnt; 2877 mb(); 2878 /* notify NIC to poll this tx ring */ 2879 if (!tx->active && tx->go != NULL) { 2880 *(int *)(void *)tx->go = 1; 2881 tx->active = 1; 2882 tx->activate++; 2883 mb(); 2884 } 2885 } 2886 2887 /* ARGSUSED */ 2888 static inline void 2889 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 2890 { 2891 uint32_t lso_flag; 2892 mac_lso_get(mp, mss, &lso_flag); 2893 (*flags) |= lso_flag; 2894 } 2895 2896 2897 /* like pullupmsg, except preserve hcksum/LSO attributes */ 2898 static int 2899 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp) 2900 { 2901 uint32_t start, stuff, tx_offload_flags, mss; 2902 int ok; 2903 2904 mss = 0; 2905 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 2906 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 2907 2908 ok = pullupmsg(mp, -1); 2909 if (!ok) { 2910 printf("pullupmsg failed"); 2911 return (DDI_FAILURE); 2912 } 2913 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup); 2914 mac_hcksum_set(mp, start, stuff, 0, 0, tx_offload_flags); 2915 if (tx_offload_flags & HW_LSO) 2916 DB_LSOMSS(mp) = (uint16_t)mss; 2917 lso_info_set(mp, mss, tx_offload_flags); 2918 return (DDI_SUCCESS); 2919 } 2920 2921 static inline void 2922 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh, 2923 int opackets, int obytes) 2924 { 2925 s->un.all = 0; 2926 if (eh->ether_dhost.ether_addr_octet[0] & 1) { 2927 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet, 2928 myri10ge_broadcastaddr, sizeof (eh->ether_dhost)))) 2929 s->un.s.brdcstxmt = 1; 2930 else 2931 s->un.s.multixmt = 1; 2932 } 2933 s->un.s.opackets = (uint16_t)opackets; 2934 s->un.s.obytes = obytes; 2935 } 2936 2937 static int 2938 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 2939 mcp_kreq_ether_send_t *req) 2940 { 2941 myri10ge_tx_ring_t *tx = &ss->tx; 2942 caddr_t ptr; 2943 struct myri10ge_tx_copybuf *cp; 2944 mblk_t *bp; 2945 int idx, mblen, avail; 2946 uint16_t len; 2947 2948 mutex_enter(&tx->lock); 2949 avail = tx->mask - (tx->req - tx->done); 2950 if (avail <= 1) { 2951 mutex_exit(&tx->lock); 2952 return (EBUSY); 2953 } 2954 idx = tx->req & tx->mask; 2955 cp = &tx->cp[idx]; 2956 ptr = cp->va; 2957 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) { 2958 mblen = MBLKL(bp); 2959 bcopy(bp->b_rptr, ptr, mblen); 2960 ptr += mblen; 2961 len += mblen; 2962 } 2963 /* ensure runts are padded to 60 bytes */ 2964 if (len < 60) { 2965 bzero(ptr, 64 - len); 2966 len = 60; 2967 } 2968 req->addr_low = cp->dma.low; 2969 req->addr_high = cp->dma.high; 2970 req->length = htons(len); 2971 req->pad = 0; 2972 req->rdma_count = 1; 2973 myri10ge_tx_stat(&tx->info[idx].stat, 2974 (struct ether_header *)(void *)cp->va, 1, len); 2975 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV); 2976 myri10ge_submit_req(&ss->tx, req, 1); 2977 mutex_exit(&tx->lock); 2978 freemsg(mp); 2979 return (DDI_SUCCESS); 2980 } 2981 2982 2983 static void 2984 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list, 2985 struct myri10ge_tx_buffer_state *tx_info, 2986 int count) 2987 { 2988 int i, idx; 2989 2990 idx = 0; /* gcc -Wuninitialized */ 2991 /* store unmapping and bp info for tx irq handler */ 2992 for (i = 0; i < count; i++) { 2993 idx = (tx->req + i) & tx->mask; 2994 tx->info[idx].m = tx_info[i].m; 2995 tx->info[idx].handle = tx_info[i].handle; 2996 } 2997 tx->info[idx].stat.un.all = tx_info[0].stat.un.all; 2998 2999 /* submit the frame to the nic */ 3000 myri10ge_submit_req(tx, req_list, count); 3001 3002 3003 } 3004 3005 3006 3007 static void 3008 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf) 3009 { 3010 mblk_t *bp; 3011 int seglen; 3012 uint_t count; 3013 3014 bp = mp; 3015 3016 while (off > 0) { 3017 seglen = MBLKL(bp); 3018 if (off < seglen) 3019 break; 3020 off -= seglen; 3021 bp = bp->b_cont; 3022 } 3023 while (len > 0) { 3024 seglen = MBLKL(bp); 3025 count = min(seglen - off, len); 3026 bcopy(bp->b_rptr + off, buf, count); 3027 len -= count; 3028 buf += count; 3029 off = 0; 3030 bp = bp->b_cont; 3031 } 3032 } 3033 3034 static int 3035 myri10ge_ether_parse_header(mblk_t *mp) 3036 { 3037 struct ether_header eh_copy; 3038 struct ether_header *eh; 3039 int eth_hdr_len, seglen; 3040 3041 seglen = MBLKL(mp); 3042 eth_hdr_len = sizeof (*eh); 3043 if (seglen < eth_hdr_len) { 3044 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy); 3045 eh = &eh_copy; 3046 } else { 3047 eh = (struct ether_header *)(void *)mp->b_rptr; 3048 } 3049 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) { 3050 eth_hdr_len += 4; 3051 } 3052 3053 return (eth_hdr_len); 3054 } 3055 3056 static int 3057 myri10ge_lso_parse_header(mblk_t *mp, int off) 3058 { 3059 char buf[128]; 3060 int seglen, sum_off; 3061 struct ip *ip; 3062 struct tcphdr *tcp; 3063 3064 seglen = MBLKL(mp); 3065 if (seglen < off + sizeof (*ip)) { 3066 myri10ge_copydata(mp, off, sizeof (*ip), buf); 3067 ip = (struct ip *)(void *)buf; 3068 } else { 3069 ip = (struct ip *)(void *)(mp->b_rptr + off); 3070 } 3071 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) { 3072 myri10ge_copydata(mp, off, 3073 (ip->ip_hl << 2) + sizeof (*tcp), buf); 3074 ip = (struct ip *)(void *)buf; 3075 } 3076 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2)); 3077 3078 /* 3079 * NIC expects ip_sum to be zero. Recent changes to 3080 * OpenSolaris leave the correct ip checksum there, rather 3081 * than the required zero, so we need to zero it. Otherwise, 3082 * the NIC will produce bad checksums when sending LSO packets. 3083 */ 3084 if (ip->ip_sum != 0) { 3085 if (((char *)ip) != buf) { 3086 /* ip points into mblk, so just zero it */ 3087 ip->ip_sum = 0; 3088 } else { 3089 /* 3090 * ip points into a copy, so walk the chain 3091 * to find the ip_csum, then zero it 3092 */ 3093 sum_off = off + _PTRDIFF(&ip->ip_sum, buf); 3094 while (sum_off > (int)(MBLKL(mp) - 1)) { 3095 sum_off -= MBLKL(mp); 3096 mp = mp->b_cont; 3097 } 3098 mp->b_rptr[sum_off] = 0; 3099 sum_off++; 3100 while (sum_off > MBLKL(mp) - 1) { 3101 sum_off -= MBLKL(mp); 3102 mp = mp->b_cont; 3103 } 3104 mp->b_rptr[sum_off] = 0; 3105 } 3106 } 3107 return (off + ((ip->ip_hl + tcp->th_off) << 2)); 3108 } 3109 3110 static int 3111 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp, 3112 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size, 3113 uint16_t mss, uint8_t cksum_offset) 3114 { 3115 myri10ge_tx_ring_t *tx = &ss->tx; 3116 struct myri10ge_priv *mgp = ss->mgp; 3117 mblk_t *bp; 3118 mcp_kreq_ether_send_t *req; 3119 struct myri10ge_tx_copybuf *cp; 3120 caddr_t rptr, ptr; 3121 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp; 3122 int resid, avail, idx, hdr_size_tmp, tx_boundary; 3123 int rdma_count; 3124 uint32_t seglen, len, boundary, low, high_swapped; 3125 uint16_t pseudo_hdr_offset = htons(mss); 3126 uint8_t flags; 3127 3128 tx_boundary = mgp->tx_boundary; 3129 hdr_size_tmp = hdr_size; 3130 resid = tx_boundary; 3131 count = 1; 3132 mutex_enter(&tx->lock); 3133 3134 /* check to see if the slots are really there */ 3135 avail = tx->mask - (tx->req - tx->done); 3136 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) { 3137 atomic_inc_32(&tx->stall); 3138 mutex_exit(&tx->lock); 3139 return (EBUSY); 3140 } 3141 3142 /* copy */ 3143 cum_len = -hdr_size; 3144 count = 0; 3145 req = req_list; 3146 idx = tx->mask & tx->req; 3147 cp = &tx->cp[idx]; 3148 low = ntohl(cp->dma.low); 3149 ptr = cp->va; 3150 cp->len = 0; 3151 if (mss) { 3152 int payload = pkt_size - hdr_size; 3153 uint16_t opackets = (payload / mss) + ((payload % mss) != 0); 3154 tx->info[idx].ostat.opackets = opackets; 3155 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size 3156 + pkt_size; 3157 } 3158 hdr_size_tmp = hdr_size; 3159 mss_resid = mss; 3160 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3161 tx_req = tx->req; 3162 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3163 mblen = MBLKL(bp); 3164 rptr = (caddr_t)bp->b_rptr; 3165 len = min(hdr_size_tmp, mblen); 3166 if (len) { 3167 bcopy(rptr, ptr, len); 3168 rptr += len; 3169 ptr += len; 3170 resid -= len; 3171 mblen -= len; 3172 hdr_size_tmp -= len; 3173 cp->len += len; 3174 if (hdr_size_tmp) 3175 continue; 3176 if (resid < mss) { 3177 tx_req++; 3178 idx = tx->mask & tx_req; 3179 cp = &tx->cp[idx]; 3180 low = ntohl(cp->dma.low); 3181 ptr = cp->va; 3182 resid = tx_boundary; 3183 } 3184 } 3185 while (mblen) { 3186 len = min(mss_resid, mblen); 3187 bcopy(rptr, ptr, len); 3188 mss_resid -= len; 3189 resid -= len; 3190 mblen -= len; 3191 rptr += len; 3192 ptr += len; 3193 cp->len += len; 3194 if (mss_resid == 0) { 3195 mss_resid = mss; 3196 if (resid < mss) { 3197 tx_req++; 3198 idx = tx->mask & tx_req; 3199 cp = &tx->cp[idx]; 3200 cp->len = 0; 3201 low = ntohl(cp->dma.low); 3202 ptr = cp->va; 3203 resid = tx_boundary; 3204 } 3205 } 3206 } 3207 } 3208 3209 req = req_list; 3210 pkt_size_tmp = pkt_size; 3211 count = 0; 3212 rdma_count = 0; 3213 tx_req = tx->req; 3214 while (pkt_size_tmp) { 3215 idx = tx->mask & tx_req; 3216 cp = &tx->cp[idx]; 3217 high_swapped = cp->dma.high; 3218 low = ntohl(cp->dma.low); 3219 len = cp->len; 3220 if (len == 0) { 3221 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n", 3222 pkt_size_tmp, pkt_size); 3223 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3224 mblen = MBLKL(bp); 3225 printf("mblen:%d\n", mblen); 3226 } 3227 pkt_size_tmp = pkt_size; 3228 tx_req = tx->req; 3229 while (pkt_size_tmp > 0) { 3230 idx = tx->mask & tx_req; 3231 cp = &tx->cp[idx]; 3232 printf("cp->len = %d\n", cp->len); 3233 pkt_size_tmp -= cp->len; 3234 tx_req++; 3235 } 3236 printf("dropped\n"); 3237 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3238 goto done; 3239 } 3240 pkt_size_tmp -= len; 3241 while (len) { 3242 while (len) { 3243 uint8_t flags_next; 3244 int cum_len_next; 3245 3246 boundary = (low + mgp->tx_boundary) & 3247 ~(mgp->tx_boundary - 1); 3248 seglen = boundary - low; 3249 if (seglen > len) 3250 seglen = len; 3251 3252 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3253 cum_len_next = cum_len + seglen; 3254 (req-rdma_count)->rdma_count = rdma_count + 1; 3255 if (likely(cum_len >= 0)) { 3256 /* payload */ 3257 int next_is_first, chop; 3258 3259 chop = (cum_len_next > mss); 3260 cum_len_next = cum_len_next % mss; 3261 next_is_first = (cum_len_next == 0); 3262 flags |= chop * 3263 MXGEFW_FLAGS_TSO_CHOP; 3264 flags_next |= next_is_first * 3265 MXGEFW_FLAGS_FIRST; 3266 rdma_count |= -(chop | next_is_first); 3267 rdma_count += chop & !next_is_first; 3268 } else if (likely(cum_len_next >= 0)) { 3269 /* header ends */ 3270 int small; 3271 3272 rdma_count = -1; 3273 cum_len_next = 0; 3274 seglen = -cum_len; 3275 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 3276 flags_next = MXGEFW_FLAGS_TSO_PLD | 3277 MXGEFW_FLAGS_FIRST | 3278 (small * MXGEFW_FLAGS_SMALL); 3279 } 3280 req->addr_high = high_swapped; 3281 req->addr_low = htonl(low); 3282 req->pseudo_hdr_offset = pseudo_hdr_offset; 3283 req->pad = 0; /* complete solid 16-byte block */ 3284 req->rdma_count = 1; 3285 req->cksum_offset = cksum_offset; 3286 req->length = htons(seglen); 3287 req->flags = flags | ((cum_len & 1) * 3288 MXGEFW_FLAGS_ALIGN_ODD); 3289 if (cksum_offset > seglen) 3290 cksum_offset -= seglen; 3291 else 3292 cksum_offset = 0; 3293 low += seglen; 3294 len -= seglen; 3295 cum_len = cum_len_next; 3296 req++; 3297 req->flags = 0; 3298 flags = flags_next; 3299 count++; 3300 rdma_count++; 3301 } 3302 } 3303 tx_req++; 3304 } 3305 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3306 do { 3307 req--; 3308 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3309 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3310 MXGEFW_FLAGS_FIRST))); 3311 3312 myri10ge_submit_req(tx, req_list, count); 3313 done: 3314 mutex_exit(&tx->lock); 3315 freemsg(mp); 3316 return (DDI_SUCCESS); 3317 } 3318 3319 /* 3320 * Try to send the chain of buffers described by the mp. We must not 3321 * encapsulate more than eth->tx.req - eth->tx.done, or 3322 * MXGEFW_MAX_SEND_DESC, whichever is more. 3323 */ 3324 3325 static int 3326 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp, 3327 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info) 3328 { 3329 struct myri10ge_priv *mgp = ss->mgp; 3330 myri10ge_tx_ring_t *tx = &ss->tx; 3331 mcp_kreq_ether_send_t *req; 3332 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL; 3333 mblk_t *bp; 3334 ddi_dma_cookie_t cookie; 3335 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen, 3336 rdma_count, cum_len, lso_hdr_size; 3337 uint32_t start, stuff, tx_offload_flags; 3338 uint32_t seglen, len, mss, boundary, low, high_swapped; 3339 uint_t ncookies; 3340 uint16_t pseudo_hdr_offset; 3341 uint8_t flags, cksum_offset, odd_flag; 3342 int pkt_size; 3343 int lso_copy = myri10ge_lso_copy; 3344 try_pullup = 1; 3345 3346 again: 3347 /* Setup checksum offloading, if needed */ 3348 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags); 3349 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags); 3350 if (tx_offload_flags & HW_LSO) { 3351 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3352 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) { 3353 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags); 3354 freemsg(mp); 3355 return (DDI_SUCCESS); 3356 } 3357 } else { 3358 max_segs = MXGEFW_MAX_SEND_DESC; 3359 mss = 0; 3360 } 3361 req = req_list; 3362 cksum_offset = 0; 3363 pseudo_hdr_offset = 0; 3364 3365 /* leave an extra slot keep the ring from wrapping */ 3366 avail = tx->mask - (tx->req - tx->done); 3367 3368 /* 3369 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length 3370 * message will need to be pulled up in order to fit. 3371 * Otherwise, we are low on transmit descriptors, it is 3372 * probably better to stall and try again rather than pullup a 3373 * message to fit. 3374 */ 3375 3376 if (avail < max_segs) { 3377 err = EBUSY; 3378 atomic_inc_32(&tx->stall_early); 3379 goto stall; 3380 } 3381 3382 /* find out how long the frame is and how many segments it is */ 3383 count = 0; 3384 odd_flag = 0; 3385 pkt_size = 0; 3386 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 3387 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3388 dblk_t *dbp; 3389 mblen = MBLKL(bp); 3390 if (mblen == 0) { 3391 /* 3392 * we can't simply skip over 0-length mblks 3393 * because the hardware can't deal with them, 3394 * and we could leak them. 3395 */ 3396 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len); 3397 err = EIO; 3398 goto pullup; 3399 } 3400 /* 3401 * There's no advantage to copying most gesballoc 3402 * attached blocks, so disable lso copy in that case 3403 */ 3404 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) { 3405 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) { 3406 lso_copy = 0; 3407 } 3408 } 3409 pkt_size += mblen; 3410 count++; 3411 } 3412 3413 /* Try to pull up excessivly long chains */ 3414 if (count >= max_segs) { 3415 err = myri10ge_pullup(ss, mp); 3416 if (likely(err == DDI_SUCCESS)) { 3417 count = 1; 3418 } else { 3419 if (count < MYRI10GE_MAX_SEND_DESC_TSO) { 3420 /* 3421 * just let the h/w send it, it will be 3422 * inefficient, but us better than dropping 3423 */ 3424 max_segs = MYRI10GE_MAX_SEND_DESC_TSO; 3425 } else { 3426 /* drop it */ 3427 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3428 freemsg(mp); 3429 return (0); 3430 } 3431 } 3432 } 3433 3434 cum_len = 0; 3435 maclen = myri10ge_ether_parse_header(mp); 3436 3437 if (tx_offload_flags & HCK_PARTIALCKSUM) { 3438 3439 cksum_offset = start + maclen; 3440 pseudo_hdr_offset = htons(stuff + maclen); 3441 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 3442 flags |= MXGEFW_FLAGS_CKSUM; 3443 } 3444 3445 lso_hdr_size = 0; /* -Wunitinialized */ 3446 if (mss) { /* LSO */ 3447 /* this removes any CKSUM flag from before */ 3448 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 3449 /* 3450 * parse the headers and set cum_len to a negative 3451 * value to reflect the offset of the TCP payload 3452 */ 3453 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen); 3454 cum_len = -lso_hdr_size; 3455 if ((mss < mgp->tx_boundary) && lso_copy) { 3456 err = myri10ge_tx_tso_copy(ss, mp, req_list, 3457 lso_hdr_size, pkt_size, mss, cksum_offset); 3458 return (err); 3459 } 3460 3461 /* 3462 * for TSO, pseudo_hdr_offset holds mss. The firmware 3463 * figures out where to put the checksum by parsing 3464 * the header. 3465 */ 3466 3467 pseudo_hdr_offset = htons(mss); 3468 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) { 3469 flags |= MXGEFW_FLAGS_SMALL; 3470 if (pkt_size < myri10ge_tx_copylen) { 3471 req->cksum_offset = cksum_offset; 3472 req->pseudo_hdr_offset = pseudo_hdr_offset; 3473 req->flags = flags; 3474 err = myri10ge_tx_copy(ss, mp, req); 3475 return (err); 3476 } 3477 cum_len = 0; 3478 } 3479 3480 /* pull one DMA handle for each bp from our freelist */ 3481 handles = NULL; 3482 err = myri10ge_alloc_tx_handles(ss, count, &handles); 3483 if (err != DDI_SUCCESS) { 3484 err = DDI_FAILURE; 3485 goto stall; 3486 } 3487 count = 0; 3488 rdma_count = 0; 3489 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3490 mblen = MBLKL(bp); 3491 dma_handle = handles; 3492 handles = handles->next; 3493 3494 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL, 3495 (caddr_t)bp->b_rptr, mblen, 3496 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL, 3497 &cookie, &ncookies); 3498 if (unlikely(rv != DDI_DMA_MAPPED)) { 3499 err = EIO; 3500 try_pullup = 0; 3501 dma_handle->next = handles; 3502 handles = dma_handle; 3503 goto abort_with_handles; 3504 } 3505 3506 /* reserve the slot */ 3507 tx_info[count].m = bp; 3508 tx_info[count].handle = dma_handle; 3509 3510 for (; ; ) { 3511 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress); 3512 high_swapped = 3513 htonl(MYRI10GE_HIGHPART_TO_U32( 3514 cookie.dmac_laddress)); 3515 len = (uint32_t)cookie.dmac_size; 3516 while (len) { 3517 uint8_t flags_next; 3518 int cum_len_next; 3519 3520 boundary = (low + mgp->tx_boundary) & 3521 ~(mgp->tx_boundary - 1); 3522 seglen = boundary - low; 3523 if (seglen > len) 3524 seglen = len; 3525 3526 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 3527 cum_len_next = cum_len + seglen; 3528 if (mss) { 3529 (req-rdma_count)->rdma_count = 3530 rdma_count + 1; 3531 if (likely(cum_len >= 0)) { 3532 /* payload */ 3533 int next_is_first, chop; 3534 3535 chop = (cum_len_next > mss); 3536 cum_len_next = 3537 cum_len_next % mss; 3538 next_is_first = 3539 (cum_len_next == 0); 3540 flags |= chop * 3541 MXGEFW_FLAGS_TSO_CHOP; 3542 flags_next |= next_is_first * 3543 MXGEFW_FLAGS_FIRST; 3544 rdma_count |= 3545 -(chop | next_is_first); 3546 rdma_count += 3547 chop & !next_is_first; 3548 } else if (likely(cum_len_next >= 0)) { 3549 /* header ends */ 3550 int small; 3551 3552 rdma_count = -1; 3553 cum_len_next = 0; 3554 seglen = -cum_len; 3555 small = (mss <= 3556 MXGEFW_SEND_SMALL_SIZE); 3557 flags_next = 3558 MXGEFW_FLAGS_TSO_PLD 3559 | MXGEFW_FLAGS_FIRST 3560 | (small * 3561 MXGEFW_FLAGS_SMALL); 3562 } 3563 } 3564 req->addr_high = high_swapped; 3565 req->addr_low = htonl(low); 3566 req->pseudo_hdr_offset = pseudo_hdr_offset; 3567 req->pad = 0; /* complete solid 16-byte block */ 3568 req->rdma_count = 1; 3569 req->cksum_offset = cksum_offset; 3570 req->length = htons(seglen); 3571 req->flags = flags | ((cum_len & 1) * odd_flag); 3572 if (cksum_offset > seglen) 3573 cksum_offset -= seglen; 3574 else 3575 cksum_offset = 0; 3576 low += seglen; 3577 len -= seglen; 3578 cum_len = cum_len_next; 3579 count++; 3580 rdma_count++; 3581 /* make sure all the segments will fit */ 3582 if (unlikely(count >= max_segs)) { 3583 MYRI10GE_ATOMIC_SLICE_STAT_INC( 3584 xmit_lowbuf); 3585 /* may try a pullup */ 3586 err = EBUSY; 3587 if (try_pullup) 3588 try_pullup = 2; 3589 goto abort_with_handles; 3590 } 3591 req++; 3592 req->flags = 0; 3593 flags = flags_next; 3594 tx_info[count].m = 0; 3595 } 3596 ncookies--; 3597 if (ncookies == 0) 3598 break; 3599 ddi_dma_nextcookie(dma_handle->h, &cookie); 3600 } 3601 } 3602 (req-rdma_count)->rdma_count = (uint8_t)rdma_count; 3603 3604 if (mss) { 3605 do { 3606 req--; 3607 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3608 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3609 MXGEFW_FLAGS_FIRST))); 3610 } 3611 3612 /* calculate tx stats */ 3613 if (mss) { 3614 uint16_t opackets; 3615 int payload; 3616 3617 payload = pkt_size - lso_hdr_size; 3618 opackets = (payload / mss) + ((payload % mss) != 0); 3619 tx_info[0].stat.un.all = 0; 3620 tx_info[0].ostat.opackets = opackets; 3621 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size 3622 + pkt_size; 3623 } else { 3624 myri10ge_tx_stat(&tx_info[0].stat, 3625 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size); 3626 } 3627 mutex_enter(&tx->lock); 3628 3629 /* check to see if the slots are really there */ 3630 avail = tx->mask - (tx->req - tx->done); 3631 if (unlikely(avail <= count)) { 3632 mutex_exit(&tx->lock); 3633 err = 0; 3634 goto late_stall; 3635 } 3636 3637 myri10ge_send_locked(tx, req_list, tx_info, count); 3638 mutex_exit(&tx->lock); 3639 return (DDI_SUCCESS); 3640 3641 late_stall: 3642 try_pullup = 0; 3643 atomic_inc_32(&tx->stall_late); 3644 3645 abort_with_handles: 3646 /* unbind and free handles from previous mblks */ 3647 for (i = 0; i < count; i++) { 3648 bp = tx_info[i].m; 3649 tx_info[i].m = 0; 3650 if (bp) { 3651 dma_handle = tx_info[i].handle; 3652 (void) ddi_dma_unbind_handle(dma_handle->h); 3653 dma_handle->next = handles; 3654 handles = dma_handle; 3655 tx_info[i].handle = NULL; 3656 tx_info[i].m = NULL; 3657 } 3658 } 3659 myri10ge_free_tx_handle_slist(tx, handles); 3660 pullup: 3661 if (try_pullup) { 3662 err = myri10ge_pullup(ss, mp); 3663 if (err != DDI_SUCCESS && try_pullup == 2) { 3664 /* drop */ 3665 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3666 freemsg(mp); 3667 return (0); 3668 } 3669 try_pullup = 0; 3670 goto again; 3671 } 3672 3673 stall: 3674 if (err != 0) { 3675 if (err == EBUSY) { 3676 atomic_inc_32(&tx->stall); 3677 } else { 3678 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err); 3679 } 3680 } 3681 return (err); 3682 } 3683 3684 static mblk_t * 3685 myri10ge_send_wrapper(void *arg, mblk_t *mp) 3686 { 3687 struct myri10ge_slice_state *ss = arg; 3688 int err = 0; 3689 mcp_kreq_ether_send_t *req_list; 3690 #if defined(__i386) 3691 /* 3692 * We need about 2.5KB of scratch space to handle transmits. 3693 * i86pc has only 8KB of kernel stack space, so we malloc the 3694 * scratch space there rather than keeping it on the stack. 3695 */ 3696 size_t req_size, tx_info_size; 3697 struct myri10ge_tx_buffer_state *tx_info; 3698 caddr_t req_bytes; 3699 3700 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3701 + 8; 3702 req_bytes = kmem_alloc(req_size, KM_SLEEP); 3703 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1); 3704 tx_info = kmem_alloc(tx_info_size, KM_SLEEP); 3705 #else 3706 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4) 3707 + 8]; 3708 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1]; 3709 #endif 3710 3711 /* ensure req_list entries are aligned to 8 bytes */ 3712 req_list = (struct mcp_kreq_ether_send *) 3713 (((unsigned long)req_bytes + 7UL) & ~7UL); 3714 3715 err = myri10ge_send(ss, mp, req_list, tx_info); 3716 3717 #if defined(__i386) 3718 kmem_free(tx_info, tx_info_size); 3719 kmem_free(req_bytes, req_size); 3720 #endif 3721 if (err) 3722 return (mp); 3723 else 3724 return (NULL); 3725 } 3726 3727 static int 3728 myri10ge_addmac(void *arg, const uint8_t *mac_addr) 3729 { 3730 struct myri10ge_priv *mgp = arg; 3731 int err; 3732 3733 if (mac_addr == NULL) 3734 return (EINVAL); 3735 3736 mutex_enter(&mgp->intrlock); 3737 if (mgp->macaddr_cnt) { 3738 mutex_exit(&mgp->intrlock); 3739 return (ENOSPC); 3740 } 3741 err = myri10ge_m_unicst(mgp, mac_addr); 3742 if (!err) 3743 mgp->macaddr_cnt++; 3744 3745 mutex_exit(&mgp->intrlock); 3746 if (err) 3747 return (err); 3748 3749 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr)); 3750 return (0); 3751 } 3752 3753 /*ARGSUSED*/ 3754 static int 3755 myri10ge_remmac(void *arg, const uint8_t *mac_addr) 3756 { 3757 struct myri10ge_priv *mgp = arg; 3758 3759 mutex_enter(&mgp->intrlock); 3760 mgp->macaddr_cnt--; 3761 mutex_exit(&mgp->intrlock); 3762 3763 return (0); 3764 } 3765 3766 /*ARGSUSED*/ 3767 static void 3768 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index, 3769 mac_group_info_t *infop, mac_group_handle_t gh) 3770 { 3771 struct myri10ge_priv *mgp = arg; 3772 3773 if (rtype != MAC_RING_TYPE_RX) 3774 return; 3775 3776 infop->mgi_driver = (mac_group_driver_t)mgp; 3777 infop->mgi_start = NULL; 3778 infop->mgi_stop = NULL; 3779 infop->mgi_addmac = myri10ge_addmac; 3780 infop->mgi_remmac = myri10ge_remmac; 3781 infop->mgi_count = mgp->num_slices; 3782 } 3783 3784 static int 3785 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num) 3786 { 3787 struct myri10ge_slice_state *ss; 3788 3789 ss = (struct myri10ge_slice_state *)rh; 3790 mutex_enter(&ss->rx_lock); 3791 ss->rx_gen_num = mr_gen_num; 3792 mutex_exit(&ss->rx_lock); 3793 return (0); 3794 } 3795 3796 /* 3797 * Retrieve a value for one of the statistics for a particular rx ring 3798 */ 3799 int 3800 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3801 { 3802 struct myri10ge_slice_state *ss; 3803 3804 ss = (struct myri10ge_slice_state *)rh; 3805 switch (stat) { 3806 case MAC_STAT_RBYTES: 3807 *val = ss->rx_stats.ibytes; 3808 break; 3809 3810 case MAC_STAT_IPACKETS: 3811 *val = ss->rx_stats.ipackets; 3812 break; 3813 3814 default: 3815 *val = 0; 3816 return (ENOTSUP); 3817 } 3818 3819 return (0); 3820 } 3821 3822 /* 3823 * Retrieve a value for one of the statistics for a particular tx ring 3824 */ 3825 int 3826 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 3827 { 3828 struct myri10ge_slice_state *ss; 3829 3830 ss = (struct myri10ge_slice_state *)rh; 3831 switch (stat) { 3832 case MAC_STAT_OBYTES: 3833 *val = ss->tx.stats.obytes; 3834 break; 3835 3836 case MAC_STAT_OPACKETS: 3837 *val = ss->tx.stats.opackets; 3838 break; 3839 3840 default: 3841 *val = 0; 3842 return (ENOTSUP); 3843 } 3844 3845 return (0); 3846 } 3847 3848 static int 3849 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh) 3850 { 3851 struct myri10ge_slice_state *ss; 3852 3853 ss = (struct myri10ge_slice_state *)intrh; 3854 mutex_enter(&ss->poll_lock); 3855 ss->rx_polling = B_TRUE; 3856 mutex_exit(&ss->poll_lock); 3857 return (0); 3858 } 3859 3860 static int 3861 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh) 3862 { 3863 struct myri10ge_slice_state *ss; 3864 3865 ss = (struct myri10ge_slice_state *)intrh; 3866 mutex_enter(&ss->poll_lock); 3867 ss->rx_polling = B_FALSE; 3868 if (ss->rx_token) { 3869 *ss->irq_claim = BE_32(3); 3870 ss->rx_token = 0; 3871 } 3872 mutex_exit(&ss->poll_lock); 3873 return (0); 3874 } 3875 3876 /*ARGSUSED*/ 3877 static void 3878 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 3879 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 3880 { 3881 struct myri10ge_priv *mgp = arg; 3882 struct myri10ge_slice_state *ss; 3883 mac_intr_t *mintr = &infop->mri_intr; 3884 3885 ASSERT((unsigned int)ring_index < mgp->num_slices); 3886 3887 ss = &mgp->ss[ring_index]; 3888 switch (rtype) { 3889 case MAC_RING_TYPE_RX: 3890 ss->rx_rh = rh; 3891 infop->mri_driver = (mac_ring_driver_t)ss; 3892 infop->mri_start = myri10ge_ring_start; 3893 infop->mri_stop = NULL; 3894 infop->mri_poll = myri10ge_poll_rx; 3895 infop->mri_stat = myri10ge_rx_ring_stat; 3896 mintr->mi_handle = (mac_intr_handle_t)ss; 3897 mintr->mi_enable = myri10ge_rx_ring_intr_enable; 3898 mintr->mi_disable = myri10ge_rx_ring_intr_disable; 3899 break; 3900 case MAC_RING_TYPE_TX: 3901 ss->tx.rh = rh; 3902 infop->mri_driver = (mac_ring_driver_t)ss; 3903 infop->mri_start = NULL; 3904 infop->mri_stop = NULL; 3905 infop->mri_tx = myri10ge_send_wrapper; 3906 infop->mri_stat = myri10ge_tx_ring_stat; 3907 break; 3908 default: 3909 break; 3910 } 3911 } 3912 3913 static void 3914 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp) 3915 { 3916 if (mgp->ksp_stat == NULL) 3917 return; 3918 3919 kstat_delete(mgp->ksp_stat); 3920 mgp->ksp_stat = NULL; 3921 } 3922 3923 static void 3924 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss) 3925 { 3926 if (ss->ksp_stat == NULL) 3927 return; 3928 3929 kstat_delete(ss->ksp_stat); 3930 ss->ksp_stat = NULL; 3931 } 3932 3933 static void 3934 myri10ge_info_destroy(struct myri10ge_priv *mgp) 3935 { 3936 if (mgp->ksp_info == NULL) 3937 return; 3938 3939 kstat_delete(mgp->ksp_info); 3940 mgp->ksp_info = NULL; 3941 } 3942 3943 static int 3944 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw) 3945 { 3946 struct myri10ge_nic_stat *ethstat; 3947 struct myri10ge_priv *mgp; 3948 mcp_irq_data_t *fw_stats; 3949 3950 3951 if (rw == KSTAT_WRITE) 3952 return (EACCES); 3953 3954 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data; 3955 mgp = (struct myri10ge_priv *)ksp->ks_private; 3956 fw_stats = mgp->ss[0].fw_stats; 3957 3958 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma; 3959 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma; 3960 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma; 3961 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) 3962 ethstat->dma_force_physical.value.ul = 1; 3963 else 3964 ethstat->dma_force_physical.value.ul = 0; 3965 ethstat->lanes.value.ul = mgp->pcie_link_width; 3966 ethstat->dropped_bad_crc32.value.ul = 3967 ntohl(fw_stats->dropped_bad_crc32); 3968 ethstat->dropped_bad_phy.value.ul = 3969 ntohl(fw_stats->dropped_bad_phy); 3970 ethstat->dropped_link_error_or_filtered.value.ul = 3971 ntohl(fw_stats->dropped_link_error_or_filtered); 3972 ethstat->dropped_link_overflow.value.ul = 3973 ntohl(fw_stats->dropped_link_overflow); 3974 ethstat->dropped_multicast_filtered.value.ul = 3975 ntohl(fw_stats->dropped_multicast_filtered); 3976 ethstat->dropped_no_big_buffer.value.ul = 3977 ntohl(fw_stats->dropped_no_big_buffer); 3978 ethstat->dropped_no_small_buffer.value.ul = 3979 ntohl(fw_stats->dropped_no_small_buffer); 3980 ethstat->dropped_overrun.value.ul = 3981 ntohl(fw_stats->dropped_overrun); 3982 ethstat->dropped_pause.value.ul = 3983 ntohl(fw_stats->dropped_pause); 3984 ethstat->dropped_runt.value.ul = 3985 ntohl(fw_stats->dropped_runt); 3986 ethstat->link_up.value.ul = 3987 ntohl(fw_stats->link_up); 3988 ethstat->dropped_unicast_filtered.value.ul = 3989 ntohl(fw_stats->dropped_unicast_filtered); 3990 return (0); 3991 } 3992 3993 static int 3994 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw) 3995 { 3996 struct myri10ge_slice_stat *ethstat; 3997 struct myri10ge_slice_state *ss; 3998 3999 if (rw == KSTAT_WRITE) 4000 return (EACCES); 4001 4002 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data; 4003 ss = (struct myri10ge_slice_state *)ksp->ks_private; 4004 4005 ethstat->rx_big.value.ul = ss->j_rx_cnt; 4006 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt; 4007 ethstat->rx_bigbuf_pool.value.ul = 4008 ss->jpool.num_alloc - ss->jbufs_for_smalls; 4009 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls; 4010 ethstat->rx_small.value.ul = ss->rx_small.cnt - 4011 (ss->rx_small.mask + 1); 4012 ethstat->tx_done.value.ul = ss->tx.done; 4013 ethstat->tx_req.value.ul = ss->tx.req; 4014 ethstat->tx_activate.value.ul = ss->tx.activate; 4015 ethstat->xmit_sched.value.ul = ss->tx.sched; 4016 ethstat->xmit_stall.value.ul = ss->tx.stall; 4017 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early; 4018 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late; 4019 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err); 4020 return (0); 4021 } 4022 4023 static int 4024 myri10ge_info_kstat_update(kstat_t *ksp, int rw) 4025 { 4026 struct myri10ge_info *info; 4027 struct myri10ge_priv *mgp; 4028 4029 4030 if (rw == KSTAT_WRITE) 4031 return (EACCES); 4032 4033 info = (struct myri10ge_info *)ksp->ks_data; 4034 mgp = (struct myri10ge_priv *)ksp->ks_private; 4035 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR); 4036 kstat_named_setstr(&info->firmware_version, mgp->fw_version); 4037 kstat_named_setstr(&info->firmware_name, mgp->fw_name); 4038 kstat_named_setstr(&info->interrupt_type, mgp->intr_type); 4039 kstat_named_setstr(&info->product_code, mgp->pc_str); 4040 kstat_named_setstr(&info->serial_number, mgp->sn_str); 4041 return (0); 4042 } 4043 4044 static struct myri10ge_info myri10ge_info_template = { 4045 { "driver_version", KSTAT_DATA_STRING }, 4046 { "firmware_version", KSTAT_DATA_STRING }, 4047 { "firmware_name", KSTAT_DATA_STRING }, 4048 { "interrupt_type", KSTAT_DATA_STRING }, 4049 { "product_code", KSTAT_DATA_STRING }, 4050 { "serial_number", KSTAT_DATA_STRING }, 4051 }; 4052 static kmutex_t myri10ge_info_template_lock; 4053 4054 4055 static int 4056 myri10ge_info_init(struct myri10ge_priv *mgp) 4057 { 4058 struct kstat *ksp; 4059 4060 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4061 "myri10ge_info", "net", KSTAT_TYPE_NAMED, 4062 sizeof (myri10ge_info_template) / 4063 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 4064 if (ksp == NULL) { 4065 cmn_err(CE_WARN, 4066 "%s: myri10ge_info_init: kstat_create failed", mgp->name); 4067 return (DDI_FAILURE); 4068 } 4069 mgp->ksp_info = ksp; 4070 ksp->ks_update = myri10ge_info_kstat_update; 4071 ksp->ks_private = (void *) mgp; 4072 ksp->ks_data = &myri10ge_info_template; 4073 ksp->ks_lock = &myri10ge_info_template_lock; 4074 if (MYRI10GE_VERSION_STR != NULL) 4075 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1; 4076 if (mgp->fw_version != NULL) 4077 ksp->ks_data_size += strlen(mgp->fw_version) + 1; 4078 ksp->ks_data_size += strlen(mgp->fw_name) + 1; 4079 ksp->ks_data_size += strlen(mgp->intr_type) + 1; 4080 if (mgp->pc_str != NULL) 4081 ksp->ks_data_size += strlen(mgp->pc_str) + 1; 4082 if (mgp->sn_str != NULL) 4083 ksp->ks_data_size += strlen(mgp->sn_str) + 1; 4084 4085 kstat_install(ksp); 4086 return (DDI_SUCCESS); 4087 } 4088 4089 4090 static int 4091 myri10ge_nic_stat_init(struct myri10ge_priv *mgp) 4092 { 4093 struct kstat *ksp; 4094 struct myri10ge_nic_stat *ethstat; 4095 4096 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip), 4097 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED, 4098 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4099 if (ksp == NULL) { 4100 cmn_err(CE_WARN, 4101 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4102 return (DDI_FAILURE); 4103 } 4104 mgp->ksp_stat = ksp; 4105 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data); 4106 4107 kstat_named_init(ðstat->dma_read_bw_MBs, 4108 "dma_read_bw_MBs", KSTAT_DATA_ULONG); 4109 kstat_named_init(ðstat->dma_write_bw_MBs, 4110 "dma_write_bw_MBs", KSTAT_DATA_ULONG); 4111 kstat_named_init(ðstat->dma_read_write_bw_MBs, 4112 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG); 4113 kstat_named_init(ðstat->dma_force_physical, 4114 "dma_force_physical", KSTAT_DATA_ULONG); 4115 kstat_named_init(ðstat->lanes, 4116 "lanes", KSTAT_DATA_ULONG); 4117 kstat_named_init(ðstat->dropped_bad_crc32, 4118 "dropped_bad_crc32", KSTAT_DATA_ULONG); 4119 kstat_named_init(ðstat->dropped_bad_phy, 4120 "dropped_bad_phy", KSTAT_DATA_ULONG); 4121 kstat_named_init(ðstat->dropped_link_error_or_filtered, 4122 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG); 4123 kstat_named_init(ðstat->dropped_link_overflow, 4124 "dropped_link_overflow", KSTAT_DATA_ULONG); 4125 kstat_named_init(ðstat->dropped_multicast_filtered, 4126 "dropped_multicast_filtered", KSTAT_DATA_ULONG); 4127 kstat_named_init(ðstat->dropped_no_big_buffer, 4128 "dropped_no_big_buffer", KSTAT_DATA_ULONG); 4129 kstat_named_init(ðstat->dropped_no_small_buffer, 4130 "dropped_no_small_buffer", KSTAT_DATA_ULONG); 4131 kstat_named_init(ðstat->dropped_overrun, 4132 "dropped_overrun", KSTAT_DATA_ULONG); 4133 kstat_named_init(ðstat->dropped_pause, 4134 "dropped_pause", KSTAT_DATA_ULONG); 4135 kstat_named_init(ðstat->dropped_runt, 4136 "dropped_runt", KSTAT_DATA_ULONG); 4137 kstat_named_init(ðstat->dropped_unicast_filtered, 4138 "dropped_unicast_filtered", KSTAT_DATA_ULONG); 4139 kstat_named_init(ðstat->dropped_runt, "dropped_runt", 4140 KSTAT_DATA_ULONG); 4141 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG); 4142 kstat_named_init(ðstat->link_changes, "link_changes", 4143 KSTAT_DATA_ULONG); 4144 ksp->ks_update = myri10ge_nic_stat_kstat_update; 4145 ksp->ks_private = (void *) mgp; 4146 kstat_install(ksp); 4147 return (DDI_SUCCESS); 4148 } 4149 4150 static int 4151 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss) 4152 { 4153 struct myri10ge_priv *mgp = ss->mgp; 4154 struct kstat *ksp; 4155 struct myri10ge_slice_stat *ethstat; 4156 int instance; 4157 4158 /* 4159 * fake an instance so that the same slice numbers from 4160 * different instances do not collide 4161 */ 4162 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss); 4163 ksp = kstat_create("myri10ge", instance, 4164 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED, 4165 sizeof (*ethstat) / sizeof (kstat_named_t), 0); 4166 if (ksp == NULL) { 4167 cmn_err(CE_WARN, 4168 "%s: myri10ge_stat_init: kstat_create failed", mgp->name); 4169 return (DDI_FAILURE); 4170 } 4171 ss->ksp_stat = ksp; 4172 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data); 4173 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum", 4174 KSTAT_DATA_ULONG); 4175 kstat_named_init(ðstat->lro_flushed, "lro_flushed", 4176 KSTAT_DATA_ULONG); 4177 kstat_named_init(ðstat->lro_queued, "lro_queued", 4178 KSTAT_DATA_ULONG); 4179 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware", 4180 KSTAT_DATA_ULONG); 4181 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool", 4182 KSTAT_DATA_ULONG); 4183 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls", 4184 KSTAT_DATA_ULONG); 4185 kstat_named_init(ðstat->rx_copy, "rx_copy", 4186 KSTAT_DATA_ULONG); 4187 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf", 4188 KSTAT_DATA_ULONG); 4189 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf", 4190 KSTAT_DATA_ULONG); 4191 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len", 4192 KSTAT_DATA_ULONG); 4193 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup", 4194 KSTAT_DATA_ULONG); 4195 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first", 4196 KSTAT_DATA_ULONG); 4197 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf", 4198 KSTAT_DATA_ULONG); 4199 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags", 4200 KSTAT_DATA_ULONG); 4201 kstat_named_init(ðstat->xmit_sched, "xmit_sched", 4202 KSTAT_DATA_ULONG); 4203 kstat_named_init(ðstat->xmit_stall, "xmit_stall", 4204 KSTAT_DATA_ULONG); 4205 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early", 4206 KSTAT_DATA_ULONG); 4207 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late", 4208 KSTAT_DATA_ULONG); 4209 kstat_named_init(ðstat->xmit_err, "xmit_err", 4210 KSTAT_DATA_ULONG); 4211 kstat_named_init(ðstat->tx_req, "tx_req", 4212 KSTAT_DATA_ULONG); 4213 kstat_named_init(ðstat->tx_activate, "tx_activate", 4214 KSTAT_DATA_ULONG); 4215 kstat_named_init(ðstat->tx_done, "tx_done", 4216 KSTAT_DATA_ULONG); 4217 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced", 4218 KSTAT_DATA_ULONG); 4219 kstat_named_init(ðstat->rx_big, "rx_big", 4220 KSTAT_DATA_ULONG); 4221 kstat_named_init(ðstat->rx_small, "rx_small", 4222 KSTAT_DATA_ULONG); 4223 ksp->ks_update = myri10ge_slice_stat_kstat_update; 4224 ksp->ks_private = (void *) ss; 4225 kstat_install(ksp); 4226 return (DDI_SUCCESS); 4227 } 4228 4229 4230 4231 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4232 4233 #include <vm/hat.h> 4234 #include <sys/ddi_isa.h> 4235 void *device_arena_alloc(size_t size, int vm_flag); 4236 void device_arena_free(void *vaddr, size_t size); 4237 4238 static void 4239 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4240 { 4241 dev_info_t *parent_dip; 4242 ddi_acc_handle_t handle; 4243 unsigned long bus_number, dev_number, func_number; 4244 unsigned long cfg_pa, paddr, base, pgoffset; 4245 char *cvaddr, *ptr; 4246 uint32_t *ptr32; 4247 int retval = DDI_FAILURE; 4248 int dontcare; 4249 uint16_t read_vid, read_did, vendor_id, device_id; 4250 4251 if (!myri10ge_nvidia_ecrc_enable) 4252 return; 4253 4254 parent_dip = ddi_get_parent(mgp->dip); 4255 if (parent_dip == NULL) { 4256 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name); 4257 return; 4258 } 4259 4260 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) { 4261 cmn_err(CE_WARN, 4262 "%s: Could not access my parent's registers", mgp->name); 4263 return; 4264 } 4265 4266 vendor_id = pci_config_get16(handle, PCI_CONF_VENID); 4267 device_id = pci_config_get16(handle, PCI_CONF_DEVID); 4268 pci_config_teardown(&handle); 4269 4270 if (myri10ge_verbose) { 4271 unsigned long bus_number, dev_number, func_number; 4272 int reg_set, span; 4273 (void) myri10ge_reg_set(parent_dip, ®_set, &span, 4274 &bus_number, &dev_number, &func_number); 4275 if (myri10ge_verbose) 4276 printf("%s: parent at %ld:%ld:%ld\n", mgp->name, 4277 bus_number, dev_number, func_number); 4278 } 4279 4280 if (vendor_id != 0x10de) 4281 return; 4282 4283 if (device_id != 0x005d /* CK804 */ && 4284 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) { 4285 return; 4286 } 4287 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare, 4288 &bus_number, &dev_number, &func_number); 4289 4290 for (cfg_pa = 0xf0000000UL; 4291 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL; 4292 cfg_pa -= 0x10000000UL) { 4293 /* find the config space address for the nvidia bridge */ 4294 paddr = (cfg_pa + bus_number * 0x00100000UL + 4295 (dev_number * 8 + func_number) * 0x00001000UL); 4296 4297 base = paddr & (~MMU_PAGEOFFSET); 4298 pgoffset = paddr & MMU_PAGEOFFSET; 4299 4300 /* map it into the kernel */ 4301 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP); 4302 if (cvaddr == NULL) 4303 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n", 4304 mgp->name); 4305 4306 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1), 4307 i_ddi_paddr_to_pfn(base), 4308 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK); 4309 4310 ptr = cvaddr + pgoffset; 4311 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID); 4312 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID); 4313 if (vendor_id == read_did || device_id == read_did) { 4314 ptr32 = (uint32_t *)(void *)(ptr + 0x178); 4315 if (myri10ge_verbose) 4316 printf("%s: Enabling ECRC on upstream " 4317 "Nvidia bridge (0x%x:0x%x) " 4318 "at %ld:%ld:%ld\n", mgp->name, 4319 read_vid, read_did, bus_number, 4320 dev_number, func_number); 4321 *ptr32 |= 0x40; 4322 retval = DDI_SUCCESS; 4323 } 4324 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK); 4325 device_arena_free(cvaddr, ptob(1)); 4326 } 4327 } 4328 4329 #else 4330 /*ARGSUSED*/ 4331 static void 4332 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp) 4333 { 4334 } 4335 #endif /* i386 */ 4336 4337 4338 /* 4339 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 4340 * when the PCI-E Completion packets are aligned on an 8-byte 4341 * boundary. Some PCI-E chip sets always align Completion packets; on 4342 * the ones that do not, the alignment can be enforced by enabling 4343 * ECRC generation (if supported). 4344 * 4345 * When PCI-E Completion packets are not aligned, it is actually more 4346 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 4347 * 4348 * If the driver can neither enable ECRC nor verify that it has 4349 * already been enabled, then it must use a firmware image which works 4350 * around unaligned completion packets (ethp_z8e.dat), and it should 4351 * also ensure that it never gives the device a Read-DMA which is 4352 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 4353 * enabled, then the driver should use the aligned (eth_z8e.dat) 4354 * firmware image, and set tx.boundary to 4KB. 4355 */ 4356 4357 4358 static int 4359 myri10ge_firmware_probe(struct myri10ge_priv *mgp) 4360 { 4361 int status; 4362 4363 mgp->tx_boundary = 4096; 4364 /* 4365 * Verify the max read request size was set to 4KB 4366 * before trying the test with 4KB. 4367 */ 4368 if (mgp->max_read_request_4k == 0) 4369 mgp->tx_boundary = 2048; 4370 /* 4371 * load the optimized firmware which assumes aligned PCIe 4372 * completions in order to see if it works on this host. 4373 */ 4374 4375 mgp->fw_name = "rss_eth_z8e"; 4376 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4377 mgp->eth_z8e_length = rss_eth_z8e_length; 4378 4379 status = myri10ge_load_firmware(mgp); 4380 if (status != 0) { 4381 return (status); 4382 } 4383 /* 4384 * Enable ECRC if possible 4385 */ 4386 myri10ge_enable_nvidia_ecrc(mgp); 4387 4388 /* 4389 * Run a DMA test which watches for unaligned completions and 4390 * aborts on the first one seen. 4391 */ 4392 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 4393 if (status == 0) 4394 return (0); /* keep the aligned firmware */ 4395 4396 if (status != E2BIG) 4397 cmn_err(CE_WARN, "%s: DMA test failed: %d\n", 4398 mgp->name, status); 4399 if (status == ENOSYS) 4400 cmn_err(CE_WARN, "%s: Falling back to ethp! " 4401 "Please install up to date fw\n", mgp->name); 4402 return (status); 4403 } 4404 4405 static int 4406 myri10ge_select_firmware(struct myri10ge_priv *mgp) 4407 { 4408 int aligned; 4409 4410 aligned = 0; 4411 4412 if (myri10ge_force_firmware == 1) { 4413 if (myri10ge_verbose) 4414 printf("%s: Assuming aligned completions (forced)\n", 4415 mgp->name); 4416 aligned = 1; 4417 goto done; 4418 } 4419 4420 if (myri10ge_force_firmware == 2) { 4421 if (myri10ge_verbose) 4422 printf("%s: Assuming unaligned completions (forced)\n", 4423 mgp->name); 4424 aligned = 0; 4425 goto done; 4426 } 4427 4428 /* If the width is less than 8, we may used the aligned firmware */ 4429 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) { 4430 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n", 4431 mgp->name, mgp->pcie_link_width); 4432 aligned = 1; 4433 goto done; 4434 } 4435 4436 if (0 == myri10ge_firmware_probe(mgp)) 4437 return (0); /* keep optimized firmware */ 4438 4439 done: 4440 if (aligned) { 4441 mgp->fw_name = "rss_eth_z8e"; 4442 mgp->eth_z8e = (unsigned char *)rss_eth_z8e; 4443 mgp->eth_z8e_length = rss_eth_z8e_length; 4444 mgp->tx_boundary = 4096; 4445 } else { 4446 mgp->fw_name = "rss_ethp_z8e"; 4447 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e; 4448 mgp->eth_z8e_length = rss_ethp_z8e_length; 4449 mgp->tx_boundary = 2048; 4450 } 4451 4452 return (myri10ge_load_firmware(mgp)); 4453 } 4454 4455 static int 4456 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler) 4457 { 4458 dev_info_t *devinfo = mgp->dip; 4459 int count, avail, actual, intr_types; 4460 int x, y, rc, inum = 0; 4461 4462 4463 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 4464 if (rc != DDI_SUCCESS) { 4465 cmn_err(CE_WARN, 4466 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name, 4467 rc); 4468 return (DDI_FAILURE); 4469 } 4470 4471 if (!myri10ge_use_msi) 4472 intr_types &= ~DDI_INTR_TYPE_MSI; 4473 if (!myri10ge_use_msix) 4474 intr_types &= ~DDI_INTR_TYPE_MSIX; 4475 4476 if (intr_types & DDI_INTR_TYPE_MSIX) { 4477 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX; 4478 mgp->intr_type = "MSI-X"; 4479 } else if (intr_types & DDI_INTR_TYPE_MSI) { 4480 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI; 4481 mgp->intr_type = "MSI"; 4482 } else { 4483 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED; 4484 mgp->intr_type = "Legacy"; 4485 } 4486 /* Get number of interrupts */ 4487 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count); 4488 if ((rc != DDI_SUCCESS) || (count == 0)) { 4489 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, " 4490 "count: %d", mgp->name, rc, count); 4491 4492 return (DDI_FAILURE); 4493 } 4494 4495 /* Get number of available interrupts */ 4496 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail); 4497 if ((rc != DDI_SUCCESS) || (avail == 0)) { 4498 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, " 4499 "rc: %d, avail: %d\n", mgp->name, rc, avail); 4500 return (DDI_FAILURE); 4501 } 4502 if (avail < count) { 4503 cmn_err(CE_NOTE, 4504 "!%s: nintrs() returned %d, navail returned %d", 4505 mgp->name, count, avail); 4506 count = avail; 4507 } 4508 4509 if (count < mgp->num_slices) 4510 return (DDI_FAILURE); 4511 4512 if (count > mgp->num_slices) 4513 count = mgp->num_slices; 4514 4515 /* Allocate memory for MSI interrupts */ 4516 mgp->intr_size = count * sizeof (ddi_intr_handle_t); 4517 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP); 4518 4519 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum, 4520 count, &actual, DDI_INTR_ALLOC_NORMAL); 4521 4522 if ((rc != DDI_SUCCESS) || (actual == 0)) { 4523 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d", 4524 mgp->name, rc); 4525 4526 kmem_free(mgp->htable, mgp->intr_size); 4527 mgp->htable = NULL; 4528 return (DDI_FAILURE); 4529 } 4530 4531 if ((actual < count) && myri10ge_verbose) { 4532 cmn_err(CE_NOTE, "%s: got %d/%d slices", 4533 mgp->name, actual, count); 4534 } 4535 4536 mgp->intr_cnt = actual; 4537 4538 /* 4539 * Get priority for first irq, assume remaining are all the same 4540 */ 4541 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri) 4542 != DDI_SUCCESS) { 4543 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name); 4544 4545 /* Free already allocated intr */ 4546 for (y = 0; y < actual; y++) { 4547 (void) ddi_intr_free(mgp->htable[y]); 4548 } 4549 4550 kmem_free(mgp->htable, mgp->intr_size); 4551 mgp->htable = NULL; 4552 return (DDI_FAILURE); 4553 } 4554 4555 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri; 4556 4557 if (!add_handler) 4558 return (DDI_SUCCESS); 4559 4560 /* Call ddi_intr_add_handler() */ 4561 for (x = 0; x < actual; x++) { 4562 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr, 4563 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) { 4564 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed", 4565 mgp->name); 4566 4567 /* Free already allocated intr */ 4568 for (y = 0; y < actual; y++) { 4569 (void) ddi_intr_free(mgp->htable[y]); 4570 } 4571 4572 kmem_free(mgp->htable, mgp->intr_size); 4573 mgp->htable = NULL; 4574 return (DDI_FAILURE); 4575 } 4576 } 4577 4578 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap); 4579 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4580 /* Call ddi_intr_block_enable() for MSI */ 4581 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt); 4582 } else { 4583 /* Call ddi_intr_enable() for MSI non block enable */ 4584 for (x = 0; x < mgp->intr_cnt; x++) { 4585 (void) ddi_intr_enable(mgp->htable[x]); 4586 } 4587 } 4588 4589 return (DDI_SUCCESS); 4590 } 4591 4592 static void 4593 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed) 4594 { 4595 int x, err; 4596 4597 /* Disable all interrupts */ 4598 if (handler_installed) { 4599 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) { 4600 /* Call ddi_intr_block_disable() */ 4601 (void) ddi_intr_block_disable(mgp->htable, 4602 mgp->intr_cnt); 4603 } else { 4604 for (x = 0; x < mgp->intr_cnt; x++) { 4605 (void) ddi_intr_disable(mgp->htable[x]); 4606 } 4607 } 4608 } 4609 4610 for (x = 0; x < mgp->intr_cnt; x++) { 4611 if (handler_installed) { 4612 /* Call ddi_intr_remove_handler() */ 4613 err = ddi_intr_remove_handler(mgp->htable[x]); 4614 if (err != DDI_SUCCESS) { 4615 cmn_err(CE_WARN, 4616 "%s: ddi_intr_remove_handler for" 4617 "vec %d returned %d\n", mgp->name, 4618 x, err); 4619 } 4620 } 4621 err = ddi_intr_free(mgp->htable[x]); 4622 if (err != DDI_SUCCESS) { 4623 cmn_err(CE_WARN, 4624 "%s: ddi_intr_free for vec %d returned %d\n", 4625 mgp->name, x, err); 4626 } 4627 } 4628 kmem_free(mgp->htable, mgp->intr_size); 4629 mgp->htable = NULL; 4630 } 4631 4632 static void 4633 myri10ge_test_physical(dev_info_t *dip) 4634 { 4635 ddi_dma_handle_t handle; 4636 struct myri10ge_dma_stuff dma; 4637 void *addr; 4638 int err; 4639 4640 /* test #1, sufficient for older sparc systems */ 4641 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 4642 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr, 4643 DDI_DMA_DONTWAIT, NULL, &handle); 4644 if (err == DDI_DMA_BADATTR) 4645 goto fail; 4646 ddi_dma_free_handle(&handle); 4647 4648 /* test #2, required on Olympis where the bind is what fails */ 4649 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr, 4650 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, 4651 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT); 4652 if (addr == NULL) 4653 goto fail; 4654 myri10ge_dma_free(&dma); 4655 return; 4656 4657 fail: 4658 if (myri10ge_verbose) 4659 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, " 4660 "using IOMMU\n", ddi_get_instance(dip)); 4661 4662 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL; 4663 } 4664 4665 static void 4666 myri10ge_get_props(dev_info_t *dip) 4667 { 4668 4669 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4670 "myri10ge_flow_control", myri10ge_flow_control); 4671 4672 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4673 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay); 4674 4675 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 4676 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4677 "myri10ge_nvidia_ecrc_enable", 1); 4678 #endif 4679 4680 4681 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4682 "myri10ge_use_msi", myri10ge_use_msi); 4683 4684 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4685 "myri10ge_deassert_wait", myri10ge_deassert_wait); 4686 4687 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4688 "myri10ge_verbose", myri10ge_verbose); 4689 4690 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4691 "myri10ge_tx_copylen", myri10ge_tx_copylen); 4692 4693 if (myri10ge_tx_copylen < 60) { 4694 cmn_err(CE_WARN, 4695 "myri10ge_tx_copylen must be >= 60 bytes\n"); 4696 myri10ge_tx_copylen = 60; 4697 } 4698 4699 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4700 "myri10ge_mtu_override", myri10ge_mtu_override); 4701 4702 if (myri10ge_mtu_override >= MYRI10GE_MIN_GLD_MTU && 4703 myri10ge_mtu_override <= MYRI10GE_MAX_GLD_MTU) 4704 myri10ge_mtu = myri10ge_mtu_override + 4705 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ; 4706 else if (myri10ge_mtu_override != 0) { 4707 cmn_err(CE_WARN, 4708 "myri10ge_mtu_override must be between 1500 and " 4709 "9000 bytes\n"); 4710 } 4711 4712 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4713 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial); 4714 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4715 "myri10ge_bigbufs_max", myri10ge_bigbufs_max); 4716 4717 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4718 "myri10ge_watchdog_reset", myri10ge_watchdog_reset); 4719 4720 if (myri10ge_bigbufs_initial < 128) { 4721 cmn_err(CE_WARN, 4722 "myri10ge_bigbufs_initial be at least 128\n"); 4723 myri10ge_bigbufs_initial = 128; 4724 } 4725 if (myri10ge_bigbufs_max < 128) { 4726 cmn_err(CE_WARN, 4727 "myri10ge_bigbufs_max be at least 128\n"); 4728 myri10ge_bigbufs_max = 128; 4729 } 4730 4731 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) { 4732 cmn_err(CE_WARN, 4733 "myri10ge_bigbufs_max must be >= " 4734 "myri10ge_bigbufs_initial\n"); 4735 myri10ge_bigbufs_max = myri10ge_bigbufs_initial; 4736 } 4737 4738 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4739 "myri10ge_force_firmware", myri10ge_force_firmware); 4740 4741 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4742 "myri10ge_max_slices", myri10ge_max_slices); 4743 4744 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4745 "myri10ge_use_msix", myri10ge_use_msix); 4746 4747 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4748 "myri10ge_rss_hash", myri10ge_rss_hash); 4749 4750 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX || 4751 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { 4752 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n", 4753 myri10ge_rss_hash); 4754 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4755 } 4756 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4757 "myri10ge_lro", myri10ge_lro); 4758 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4759 "myri10ge_lro_cnt", myri10ge_lro_cnt); 4760 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4761 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr); 4762 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4763 "myri10ge_tx_hash", myri10ge_tx_hash); 4764 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4765 "myri10ge_use_lso", myri10ge_use_lso); 4766 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4767 "myri10ge_lso_copy", myri10ge_lso_copy); 4768 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4769 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial); 4770 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0, 4771 "myri10ge_small_bytes", myri10ge_small_bytes); 4772 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) { 4773 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n", 4774 myri10ge_small_bytes); 4775 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n"); 4776 myri10ge_small_bytes += 128; 4777 myri10ge_small_bytes &= ~(128 -1); 4778 myri10ge_small_bytes -= MXGEFW_PAD; 4779 cmn_err(CE_WARN, "rounded up to %d\n", 4780 myri10ge_small_bytes); 4781 4782 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4783 } 4784 } 4785 4786 #ifndef PCI_EXP_LNKSTA 4787 #define PCI_EXP_LNKSTA 18 4788 #endif 4789 4790 static int 4791 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid) 4792 { 4793 uint16_t status; 4794 uint8_t ptr; 4795 4796 /* check to see if we have capabilities */ 4797 status = pci_config_get16(handle, PCI_CONF_STAT); 4798 if (!(status & PCI_STAT_CAP)) { 4799 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n"); 4800 return (ENXIO); 4801 } 4802 4803 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR); 4804 4805 /* Walk the capabilities list, looking for a PCI Express cap */ 4806 while (ptr != PCI_CAP_NEXT_PTR_NULL) { 4807 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid) 4808 break; 4809 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR); 4810 } 4811 if (ptr < 64) { 4812 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr); 4813 return (ENXIO); 4814 } 4815 *capptr = ptr; 4816 return (0); 4817 } 4818 4819 static int 4820 myri10ge_set_max_readreq(ddi_acc_handle_t handle) 4821 { 4822 int err; 4823 uint16_t val; 4824 uint8_t ptr; 4825 4826 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4827 if (err != 0) { 4828 cmn_err(CE_WARN, "could not find PCIe cap\n"); 4829 return (ENXIO); 4830 } 4831 4832 /* set max read req to 4096 */ 4833 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4834 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) | 4835 PCIE_DEVCTL_MAX_READ_REQ_4096; 4836 pci_config_put16(handle, ptr + PCIE_DEVCTL, val); 4837 val = pci_config_get16(handle, ptr + PCIE_DEVCTL); 4838 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) != 4839 PCIE_DEVCTL_MAX_READ_REQ_4096) { 4840 cmn_err(CE_WARN, "could not set max read req (%x)\n", val); 4841 return (EINVAL); 4842 } 4843 return (0); 4844 } 4845 4846 static int 4847 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link) 4848 { 4849 int err; 4850 uint16_t val; 4851 uint8_t ptr; 4852 4853 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E); 4854 if (err != 0) { 4855 cmn_err(CE_WARN, "could not set max read req\n"); 4856 return (ENXIO); 4857 } 4858 4859 /* read link width */ 4860 val = pci_config_get16(handle, ptr + PCIE_LINKSTS); 4861 val &= PCIE_LINKSTS_NEG_WIDTH_MASK; 4862 *link = (val >> 4); 4863 return (0); 4864 } 4865 4866 static int 4867 myri10ge_reset_nic(struct myri10ge_priv *mgp) 4868 { 4869 ddi_acc_handle_t handle = mgp->cfg_hdl; 4870 uint32_t reboot; 4871 uint16_t cmd; 4872 int err; 4873 4874 cmd = pci_config_get16(handle, PCI_CONF_COMM); 4875 if ((cmd & PCI_COMM_ME) == 0) { 4876 /* 4877 * Bus master DMA disabled? Check to see if the card 4878 * rebooted due to a parity error For now, just report 4879 * it 4880 */ 4881 4882 /* enter read32 mode */ 4883 pci_config_put8(handle, mgp->vso + 0x10, 0x3); 4884 /* read REBOOT_STATUS (0xfffffff0) */ 4885 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0); 4886 reboot = pci_config_get16(handle, mgp->vso + 0x14); 4887 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot); 4888 return (0); 4889 } 4890 if (!myri10ge_watchdog_reset) { 4891 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name); 4892 return (1); 4893 } 4894 4895 myri10ge_stop_locked(mgp); 4896 err = myri10ge_start_locked(mgp); 4897 if (err == DDI_FAILURE) { 4898 return (0); 4899 } 4900 mac_tx_update(mgp->mh); 4901 return (1); 4902 } 4903 4904 static inline int 4905 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx) 4906 { 4907 if (tx->sched != tx->stall && 4908 tx->done == tx->watchdog_done && 4909 tx->watchdog_req != tx->watchdog_done) 4910 return (1); 4911 return (0); 4912 } 4913 4914 static void 4915 myri10ge_watchdog(void *arg) 4916 { 4917 struct myri10ge_priv *mgp; 4918 struct myri10ge_slice_state *ss; 4919 myri10ge_tx_ring_t *tx; 4920 int nic_ok = 1; 4921 int slices_stalled, rx_pause, i; 4922 int add_rx; 4923 4924 mgp = arg; 4925 mutex_enter(&mgp->intrlock); 4926 if (mgp->running != MYRI10GE_ETH_RUNNING) { 4927 cmn_err(CE_WARN, 4928 "%s not running, not rearming watchdog (%d)\n", 4929 mgp->name, mgp->running); 4930 mutex_exit(&mgp->intrlock); 4931 return; 4932 } 4933 4934 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause); 4935 4936 /* 4937 * make sure nic is stalled before we reset the nic, so as to 4938 * ensure we don't rip the transmit data structures out from 4939 * under a pending transmit 4940 */ 4941 4942 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) { 4943 tx = &mgp->ss[i].tx; 4944 slices_stalled = myri10ge_ring_stalled(tx); 4945 if (slices_stalled) 4946 break; 4947 } 4948 4949 if (slices_stalled) { 4950 if (mgp->watchdog_rx_pause == rx_pause) { 4951 cmn_err(CE_WARN, 4952 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)", 4953 mgp->name, i, tx->sched, tx->stall, 4954 tx->done, tx->watchdog_done, tx->req, tx->pkt_done, 4955 (int)ntohl(mgp->ss[i].fw_stats->send_done_count)); 4956 nic_ok = myri10ge_reset_nic(mgp); 4957 } else { 4958 cmn_err(CE_WARN, 4959 "%s Flow controlled, check link partner\n", 4960 mgp->name); 4961 } 4962 } 4963 4964 if (!nic_ok) { 4965 cmn_err(CE_WARN, 4966 "%s Nic dead, not rearming watchdog\n", mgp->name); 4967 mutex_exit(&mgp->intrlock); 4968 return; 4969 } 4970 for (i = 0; i < mgp->num_slices; i++) { 4971 ss = &mgp->ss[i]; 4972 tx = &ss->tx; 4973 tx->watchdog_done = tx->done; 4974 tx->watchdog_req = tx->req; 4975 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) { 4976 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy); 4977 add_rx = 4978 min(ss->jpool.num_alloc, 4979 myri10ge_bigbufs_max - 4980 (ss->jpool.num_alloc - 4981 ss->jbufs_for_smalls)); 4982 if (add_rx != 0) { 4983 (void) myri10ge_add_jbufs(ss, add_rx, 0); 4984 /* now feed them to the firmware */ 4985 mutex_enter(&ss->jpool.mtx); 4986 myri10ge_restock_jumbos(ss); 4987 mutex_exit(&ss->jpool.mtx); 4988 } 4989 } 4990 } 4991 mgp->watchdog_rx_pause = rx_pause; 4992 4993 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 4994 mgp->timer_ticks); 4995 mutex_exit(&mgp->intrlock); 4996 } 4997 4998 /*ARGSUSED*/ 4999 static int 5000 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5001 { 5002 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5003 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay); 5004 return (0); 5005 } 5006 5007 /*ARGSUSED*/ 5008 static int 5009 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value, 5010 caddr_t cp, cred_t *credp) 5011 { 5012 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5013 char *end; 5014 size_t new_value; 5015 5016 new_value = mi_strtol(value, &end, 10); 5017 if (end == value) 5018 return (EINVAL); 5019 5020 mutex_enter(&myri10ge_param_lock); 5021 mgp->intr_coal_delay = (int)new_value; 5022 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay); 5023 mutex_exit(&myri10ge_param_lock); 5024 return (0); 5025 } 5026 5027 /*ARGSUSED*/ 5028 static int 5029 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5030 { 5031 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5032 (void) mi_mpprintf(mp, "%d", mgp->pause); 5033 return (0); 5034 } 5035 5036 /*ARGSUSED*/ 5037 static int 5038 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value, 5039 caddr_t cp, cred_t *credp) 5040 { 5041 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp; 5042 char *end; 5043 size_t new_value; 5044 int err = 0; 5045 5046 new_value = mi_strtol(value, &end, 10); 5047 if (end == value) 5048 return (EINVAL); 5049 if (new_value != 0) 5050 new_value = 1; 5051 5052 mutex_enter(&myri10ge_param_lock); 5053 if (new_value != mgp->pause) 5054 err = myri10ge_change_pause(mgp, new_value); 5055 mutex_exit(&myri10ge_param_lock); 5056 return (err); 5057 } 5058 5059 /*ARGSUSED*/ 5060 static int 5061 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp) 5062 { 5063 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp); 5064 return (0); 5065 } 5066 5067 /*ARGSUSED*/ 5068 static int 5069 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value, 5070 caddr_t cp, cred_t *credp) 5071 { 5072 char *end; 5073 size_t new_value; 5074 5075 new_value = mi_strtol(value, &end, 10); 5076 if (end == value) 5077 return (EINVAL); 5078 *(int *)(void *)cp = new_value; 5079 5080 return (0); 5081 } 5082 5083 static void 5084 myri10ge_ndd_init(struct myri10ge_priv *mgp) 5085 { 5086 mgp->nd_head = NULL; 5087 5088 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay", 5089 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp); 5090 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control", 5091 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp); 5092 (void) nd_load(&mgp->nd_head, "myri10ge_verbose", 5093 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose); 5094 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait", 5095 myri10ge_get_int, myri10ge_set_int, 5096 (caddr_t)&myri10ge_deassert_wait); 5097 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max", 5098 myri10ge_get_int, myri10ge_set_int, 5099 (caddr_t)&myri10ge_bigbufs_max); 5100 (void) nd_load(&mgp->nd_head, "myri10ge_lro", 5101 myri10ge_get_int, myri10ge_set_int, 5102 (caddr_t)&myri10ge_lro); 5103 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr", 5104 myri10ge_get_int, myri10ge_set_int, 5105 (caddr_t)&myri10ge_lro_max_aggr); 5106 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash", 5107 myri10ge_get_int, myri10ge_set_int, 5108 (caddr_t)&myri10ge_tx_hash); 5109 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy", 5110 myri10ge_get_int, myri10ge_set_int, 5111 (caddr_t)&myri10ge_lso_copy); 5112 } 5113 5114 static void 5115 myri10ge_ndd_fini(struct myri10ge_priv *mgp) 5116 { 5117 nd_free(&mgp->nd_head); 5118 } 5119 5120 static void 5121 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp) 5122 { 5123 struct iocblk *iocp; 5124 struct myri10ge_priv *mgp = arg; 5125 int cmd, ok, err; 5126 5127 iocp = (struct iocblk *)(void *)mp->b_rptr; 5128 cmd = iocp->ioc_cmd; 5129 5130 ok = 0; 5131 err = 0; 5132 5133 switch (cmd) { 5134 case ND_GET: 5135 case ND_SET: 5136 ok = nd_getset(wq, mgp->nd_head, mp); 5137 break; 5138 default: 5139 break; 5140 } 5141 if (!ok) 5142 err = EINVAL; 5143 else 5144 err = iocp->ioc_error; 5145 5146 if (!err) 5147 miocack(wq, mp, iocp->ioc_count, err); 5148 else 5149 miocnak(wq, mp, 0, err); 5150 } 5151 5152 static struct myri10ge_priv *mgp_list; 5153 5154 struct myri10ge_priv * 5155 myri10ge_get_instance(uint_t unit) 5156 { 5157 struct myri10ge_priv *mgp; 5158 5159 mutex_enter(&myri10ge_param_lock); 5160 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) { 5161 if (unit == ddi_get_instance(mgp->dip)) { 5162 mgp->refcnt++; 5163 break; 5164 } 5165 } 5166 mutex_exit(&myri10ge_param_lock); 5167 return (mgp); 5168 } 5169 5170 void 5171 myri10ge_put_instance(struct myri10ge_priv *mgp) 5172 { 5173 mutex_enter(&myri10ge_param_lock); 5174 mgp->refcnt--; 5175 mutex_exit(&myri10ge_param_lock); 5176 } 5177 5178 static boolean_t 5179 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data) 5180 { 5181 struct myri10ge_priv *mgp = arg; 5182 uint32_t *cap_hcksum; 5183 mac_capab_lso_t *cap_lso; 5184 mac_capab_rings_t *cap_rings; 5185 5186 switch (cap) { 5187 case MAC_CAPAB_HCKSUM: 5188 cap_hcksum = cap_data; 5189 *cap_hcksum = HCKSUM_INET_PARTIAL; 5190 break; 5191 case MAC_CAPAB_RINGS: 5192 cap_rings = cap_data; 5193 switch (cap_rings->mr_type) { 5194 case MAC_RING_TYPE_RX: 5195 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5196 cap_rings->mr_rnum = mgp->num_slices; 5197 cap_rings->mr_gnum = 1; 5198 cap_rings->mr_rget = myri10ge_fill_ring; 5199 cap_rings->mr_gget = myri10ge_fill_group; 5200 break; 5201 case MAC_RING_TYPE_TX: 5202 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 5203 cap_rings->mr_rnum = mgp->num_slices; 5204 cap_rings->mr_gnum = 0; 5205 cap_rings->mr_rget = myri10ge_fill_ring; 5206 cap_rings->mr_gget = NULL; 5207 break; 5208 default: 5209 return (B_FALSE); 5210 } 5211 break; 5212 case MAC_CAPAB_LSO: 5213 cap_lso = cap_data; 5214 if (!myri10ge_use_lso) 5215 return (B_FALSE); 5216 if (!(mgp->features & MYRI10GE_TSO)) 5217 return (B_FALSE); 5218 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4; 5219 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1; 5220 break; 5221 5222 default: 5223 return (B_FALSE); 5224 } 5225 return (B_TRUE); 5226 } 5227 5228 5229 static int 5230 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val) 5231 { 5232 struct myri10ge_priv *mgp = arg; 5233 struct myri10ge_rx_ring_stats *rstat; 5234 struct myri10ge_tx_ring_stats *tstat; 5235 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats; 5236 struct myri10ge_slice_state *ss; 5237 uint64_t tmp = 0; 5238 int i; 5239 5240 switch (stat) { 5241 case MAC_STAT_IFSPEED: 5242 *val = 10ull * 1000ull * 1000000ull; 5243 break; 5244 5245 case MAC_STAT_MULTIRCV: 5246 for (i = 0; i < mgp->num_slices; i++) { 5247 rstat = &mgp->ss[i].rx_stats; 5248 tmp += rstat->multircv; 5249 } 5250 *val = tmp; 5251 break; 5252 5253 case MAC_STAT_BRDCSTRCV: 5254 for (i = 0; i < mgp->num_slices; i++) { 5255 rstat = &mgp->ss[i].rx_stats; 5256 tmp += rstat->brdcstrcv; 5257 } 5258 *val = tmp; 5259 break; 5260 5261 case MAC_STAT_MULTIXMT: 5262 for (i = 0; i < mgp->num_slices; i++) { 5263 tstat = &mgp->ss[i].tx.stats; 5264 tmp += tstat->multixmt; 5265 } 5266 *val = tmp; 5267 break; 5268 5269 case MAC_STAT_BRDCSTXMT: 5270 for (i = 0; i < mgp->num_slices; i++) { 5271 tstat = &mgp->ss[i].tx.stats; 5272 tmp += tstat->brdcstxmt; 5273 } 5274 *val = tmp; 5275 break; 5276 5277 case MAC_STAT_NORCVBUF: 5278 tmp = ntohl(fw_stats->dropped_no_big_buffer); 5279 tmp += ntohl(fw_stats->dropped_no_small_buffer); 5280 tmp += ntohl(fw_stats->dropped_link_overflow); 5281 for (i = 0; i < mgp->num_slices; i++) { 5282 ss = &mgp->ss[i]; 5283 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf); 5284 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf); 5285 } 5286 *val = tmp; 5287 break; 5288 5289 case MAC_STAT_IERRORS: 5290 tmp += ntohl(fw_stats->dropped_bad_crc32); 5291 tmp += ntohl(fw_stats->dropped_bad_phy); 5292 tmp += ntohl(fw_stats->dropped_runt); 5293 tmp += ntohl(fw_stats->dropped_overrun); 5294 *val = tmp; 5295 break; 5296 5297 case MAC_STAT_OERRORS: 5298 for (i = 0; i < mgp->num_slices; i++) { 5299 ss = &mgp->ss[i]; 5300 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags); 5301 tmp += MYRI10GE_SLICE_STAT(xmit_err); 5302 } 5303 *val = tmp; 5304 break; 5305 5306 case MAC_STAT_RBYTES: 5307 for (i = 0; i < mgp->num_slices; i++) { 5308 rstat = &mgp->ss[i].rx_stats; 5309 tmp += rstat->ibytes; 5310 } 5311 *val = tmp; 5312 break; 5313 5314 case MAC_STAT_IPACKETS: 5315 for (i = 0; i < mgp->num_slices; i++) { 5316 rstat = &mgp->ss[i].rx_stats; 5317 tmp += rstat->ipackets; 5318 } 5319 *val = tmp; 5320 break; 5321 5322 case MAC_STAT_OBYTES: 5323 for (i = 0; i < mgp->num_slices; i++) { 5324 tstat = &mgp->ss[i].tx.stats; 5325 tmp += tstat->obytes; 5326 } 5327 *val = tmp; 5328 break; 5329 5330 case MAC_STAT_OPACKETS: 5331 for (i = 0; i < mgp->num_slices; i++) { 5332 tstat = &mgp->ss[i].tx.stats; 5333 tmp += tstat->opackets; 5334 } 5335 *val = tmp; 5336 break; 5337 5338 case ETHER_STAT_TOOLONG_ERRORS: 5339 *val = ntohl(fw_stats->dropped_overrun); 5340 break; 5341 5342 #ifdef SOLARIS_S11 5343 case ETHER_STAT_TOOSHORT_ERRORS: 5344 *val = ntohl(fw_stats->dropped_runt); 5345 break; 5346 #endif 5347 5348 case ETHER_STAT_LINK_PAUSE: 5349 *val = mgp->pause; 5350 break; 5351 5352 case ETHER_STAT_LINK_AUTONEG: 5353 *val = 1; 5354 break; 5355 5356 case ETHER_STAT_LINK_DUPLEX: 5357 *val = LINK_DUPLEX_FULL; 5358 break; 5359 5360 default: 5361 return (ENOTSUP); 5362 } 5363 5364 return (0); 5365 } 5366 5367 /* ARGSUSED */ 5368 static void 5369 myri10ge_m_propinfo(void *arg, const char *pr_name, 5370 mac_prop_id_t pr_num, mac_prop_info_handle_t prh) 5371 { 5372 switch (pr_num) { 5373 case MAC_PROP_MTU: 5374 mac_prop_info_set_default_uint32(prh, MYRI10GE_DEFAULT_GLD_MTU); 5375 mac_prop_info_set_range_uint32(prh, MYRI10GE_MIN_GLD_MTU, 5376 MYRI10GE_MAX_GLD_MTU); 5377 break; 5378 default: 5379 break; 5380 } 5381 } 5382 5383 /*ARGSUSED*/ 5384 static int 5385 myri10ge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 5386 uint_t pr_valsize, const void *pr_val) 5387 { 5388 int err = 0; 5389 struct myri10ge_priv *mgp = arg; 5390 5391 switch (pr_num) { 5392 case MAC_PROP_MTU: { 5393 uint32_t mtu; 5394 if (pr_valsize < sizeof (mtu)) { 5395 err = EINVAL; 5396 break; 5397 } 5398 bcopy(pr_val, &mtu, sizeof (mtu)); 5399 if (mtu > MYRI10GE_MAX_GLD_MTU || 5400 mtu < MYRI10GE_MIN_GLD_MTU) { 5401 err = EINVAL; 5402 break; 5403 } 5404 5405 mutex_enter(&mgp->intrlock); 5406 if (mgp->running != MYRI10GE_ETH_STOPPED) { 5407 err = EBUSY; 5408 mutex_exit(&mgp->intrlock); 5409 break; 5410 } 5411 5412 myri10ge_mtu = mtu + sizeof (struct ether_header) + 5413 MXGEFW_PAD + VLAN_TAGSZ; 5414 mutex_exit(&mgp->intrlock); 5415 break; 5416 } 5417 default: 5418 err = ENOTSUP; 5419 break; 5420 } 5421 5422 return (err); 5423 } 5424 5425 static mac_callbacks_t myri10ge_m_callbacks = { 5426 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO), 5427 myri10ge_m_stat, 5428 myri10ge_m_start, 5429 myri10ge_m_stop, 5430 myri10ge_m_promisc, 5431 myri10ge_m_multicst, 5432 NULL, 5433 NULL, 5434 NULL, 5435 myri10ge_m_ioctl, 5436 myri10ge_m_getcapab, 5437 NULL, 5438 NULL, 5439 myri10ge_m_setprop, 5440 NULL, 5441 myri10ge_m_propinfo 5442 }; 5443 5444 5445 static int 5446 myri10ge_probe_slices(struct myri10ge_priv *mgp) 5447 { 5448 myri10ge_cmd_t cmd; 5449 int status; 5450 5451 mgp->num_slices = 1; 5452 5453 /* hit the board with a reset to ensure it is alive */ 5454 (void) memset(&cmd, 0, sizeof (cmd)); 5455 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd); 5456 if (status != 0) { 5457 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name); 5458 return (ENXIO); 5459 } 5460 5461 if (myri10ge_use_msix == 0) 5462 return (0); 5463 5464 /* tell it the size of the interrupt queues */ 5465 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot); 5466 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 5467 if (status != 0) { 5468 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n", 5469 mgp->name); 5470 return (ENXIO); 5471 } 5472 5473 /* ask the maximum number of slices it supports */ 5474 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 5475 &cmd); 5476 if (status != 0) 5477 return (0); 5478 5479 mgp->num_slices = cmd.data0; 5480 5481 /* 5482 * if the admin did not specify a limit to how many 5483 * slices we should use, cap it automatically to the 5484 * number of CPUs currently online 5485 */ 5486 if (myri10ge_max_slices == -1) 5487 myri10ge_max_slices = ncpus; 5488 5489 if (mgp->num_slices > myri10ge_max_slices) 5490 mgp->num_slices = myri10ge_max_slices; 5491 5492 5493 /* 5494 * Now try to allocate as many MSI-X vectors as we have 5495 * slices. We give up on MSI-X if we can only get a single 5496 * vector. 5497 */ 5498 while (mgp->num_slices > 1) { 5499 /* make sure it is a power of two */ 5500 while (!ISP2(mgp->num_slices)) 5501 mgp->num_slices--; 5502 if (mgp->num_slices == 1) 5503 return (0); 5504 5505 status = myri10ge_add_intrs(mgp, 0); 5506 if (status == 0) { 5507 myri10ge_rem_intrs(mgp, 0); 5508 if (mgp->intr_cnt == mgp->num_slices) { 5509 if (myri10ge_verbose) 5510 printf("Got %d slices!\n", 5511 mgp->num_slices); 5512 return (0); 5513 } 5514 mgp->num_slices = mgp->intr_cnt; 5515 } else { 5516 mgp->num_slices = mgp->num_slices / 2; 5517 } 5518 } 5519 5520 if (myri10ge_verbose) 5521 printf("Got %d slices\n", mgp->num_slices); 5522 return (0); 5523 } 5524 5525 static void 5526 myri10ge_lro_free(struct myri10ge_slice_state *ss) 5527 { 5528 struct lro_entry *lro; 5529 5530 while (ss->lro_free != NULL) { 5531 lro = ss->lro_free; 5532 ss->lro_free = lro->next; 5533 kmem_free(lro, sizeof (*lro)); 5534 } 5535 } 5536 5537 static void 5538 myri10ge_lro_alloc(struct myri10ge_slice_state *ss) 5539 { 5540 struct lro_entry *lro; 5541 int idx; 5542 5543 ss->lro_free = NULL; 5544 ss->lro_active = NULL; 5545 5546 for (idx = 0; idx < myri10ge_lro_cnt; idx++) { 5547 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP); 5548 if (lro == NULL) 5549 continue; 5550 lro->next = ss->lro_free; 5551 ss->lro_free = lro; 5552 } 5553 } 5554 5555 static void 5556 myri10ge_free_slices(struct myri10ge_priv *mgp) 5557 { 5558 struct myri10ge_slice_state *ss; 5559 size_t bytes; 5560 int i; 5561 5562 if (mgp->ss == NULL) 5563 return; 5564 5565 for (i = 0; i < mgp->num_slices; i++) { 5566 ss = &mgp->ss[i]; 5567 if (ss->rx_done.entry == NULL) 5568 continue; 5569 myri10ge_dma_free(&ss->rx_done.dma); 5570 ss->rx_done.entry = NULL; 5571 if (ss->fw_stats == NULL) 5572 continue; 5573 myri10ge_dma_free(&ss->fw_stats_dma); 5574 ss->fw_stats = NULL; 5575 mutex_destroy(&ss->rx_lock); 5576 mutex_destroy(&ss->tx.lock); 5577 mutex_destroy(&ss->tx.handle_lock); 5578 mutex_destroy(&ss->poll_lock); 5579 myri10ge_jpool_fini(ss); 5580 myri10ge_slice_stat_destroy(ss); 5581 myri10ge_lro_free(ss); 5582 } 5583 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5584 kmem_free(mgp->ss, bytes); 5585 mgp->ss = NULL; 5586 } 5587 5588 5589 static int 5590 myri10ge_alloc_slices(struct myri10ge_priv *mgp) 5591 { 5592 struct myri10ge_slice_state *ss; 5593 size_t bytes; 5594 int i; 5595 5596 bytes = sizeof (*mgp->ss) * mgp->num_slices; 5597 mgp->ss = kmem_zalloc(bytes, KM_SLEEP); 5598 if (mgp->ss == NULL) 5599 return (ENOMEM); 5600 for (i = 0; i < mgp->num_slices; i++) { 5601 ss = &mgp->ss[i]; 5602 5603 ss->mgp = mgp; 5604 5605 /* allocate the per-slice firmware stats */ 5606 bytes = sizeof (*ss->fw_stats); 5607 ss->fw_stats = (mcp_irq_data_t *)(void *) 5608 myri10ge_dma_alloc(mgp->dip, bytes, 5609 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5610 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5611 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT); 5612 if (ss->fw_stats == NULL) 5613 goto abort; 5614 (void) memset(ss->fw_stats, 0, bytes); 5615 5616 /* allocate rx done ring */ 5617 bytes = mgp->max_intr_slots * 5618 sizeof (*ss->rx_done.entry); 5619 ss->rx_done.entry = (mcp_slot_t *)(void *) 5620 myri10ge_dma_alloc(mgp->dip, bytes, 5621 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5622 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT, 5623 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT); 5624 if (ss->rx_done.entry == NULL) { 5625 goto abort; 5626 } 5627 (void) memset(ss->rx_done.entry, 0, bytes); 5628 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie); 5629 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL); 5630 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL); 5631 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL); 5632 myri10ge_jpool_init(ss); 5633 (void) myri10ge_slice_stat_init(ss); 5634 myri10ge_lro_alloc(ss); 5635 } 5636 5637 return (0); 5638 5639 abort: 5640 myri10ge_free_slices(mgp); 5641 return (ENOMEM); 5642 } 5643 5644 static int 5645 myri10ge_save_msi_state(struct myri10ge_priv *mgp, 5646 ddi_acc_handle_t handle) 5647 { 5648 uint8_t ptr; 5649 int err; 5650 5651 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5652 if (err != 0) { 5653 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5654 mgp->name); 5655 return (DDI_FAILURE); 5656 } 5657 mgp->pci_saved_state.msi_ctrl = 5658 pci_config_get16(handle, ptr + PCI_MSI_CTRL); 5659 mgp->pci_saved_state.msi_addr_low = 5660 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET); 5661 mgp->pci_saved_state.msi_addr_high = 5662 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4); 5663 mgp->pci_saved_state.msi_data_32 = 5664 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA); 5665 mgp->pci_saved_state.msi_data_64 = 5666 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA); 5667 return (DDI_SUCCESS); 5668 } 5669 5670 static int 5671 myri10ge_restore_msi_state(struct myri10ge_priv *mgp, 5672 ddi_acc_handle_t handle) 5673 { 5674 uint8_t ptr; 5675 int err; 5676 5677 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI); 5678 if (err != 0) { 5679 cmn_err(CE_WARN, "%s: could not find MSI cap\n", 5680 mgp->name); 5681 return (DDI_FAILURE); 5682 } 5683 5684 pci_config_put16(handle, ptr + PCI_MSI_CTRL, 5685 mgp->pci_saved_state.msi_ctrl); 5686 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET, 5687 mgp->pci_saved_state.msi_addr_low); 5688 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4, 5689 mgp->pci_saved_state.msi_addr_high); 5690 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA, 5691 mgp->pci_saved_state.msi_data_32); 5692 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA, 5693 mgp->pci_saved_state.msi_data_64); 5694 5695 return (DDI_SUCCESS); 5696 } 5697 5698 static int 5699 myri10ge_save_pci_state(struct myri10ge_priv *mgp) 5700 { 5701 ddi_acc_handle_t handle = mgp->cfg_hdl; 5702 int i; 5703 int err = DDI_SUCCESS; 5704 5705 5706 /* Save the non-extended PCI config space 32-bits at a time */ 5707 for (i = 0; i < 16; i++) 5708 mgp->pci_saved_state.base[i] = 5709 pci_config_get32(handle, i*4); 5710 5711 /* now save MSI interrupt state *, if needed */ 5712 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5713 err = myri10ge_save_msi_state(mgp, handle); 5714 5715 return (err); 5716 } 5717 5718 static int 5719 myri10ge_restore_pci_state(struct myri10ge_priv *mgp) 5720 { 5721 ddi_acc_handle_t handle = mgp->cfg_hdl; 5722 int i; 5723 int err = DDI_SUCCESS; 5724 5725 5726 /* Restore the non-extended PCI config space 32-bits at a time */ 5727 for (i = 15; i >= 0; i--) 5728 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]); 5729 5730 /* now restore MSI interrupt state *, if needed */ 5731 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI) 5732 err = myri10ge_restore_msi_state(mgp, handle); 5733 5734 if (mgp->max_read_request_4k) 5735 (void) myri10ge_set_max_readreq(handle); 5736 return (err); 5737 } 5738 5739 5740 static int 5741 myri10ge_suspend(dev_info_t *dip) 5742 { 5743 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5744 int status; 5745 5746 if (mgp == NULL) { 5747 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n"); 5748 return (DDI_FAILURE); 5749 } 5750 if (mgp->dip != dip) { 5751 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n"); 5752 return (DDI_FAILURE); 5753 } 5754 mutex_enter(&mgp->intrlock); 5755 if (mgp->running == MYRI10GE_ETH_RUNNING) { 5756 mgp->running = MYRI10GE_ETH_STOPPING; 5757 mutex_exit(&mgp->intrlock); 5758 (void) untimeout(mgp->timer_id); 5759 mutex_enter(&mgp->intrlock); 5760 myri10ge_stop_locked(mgp); 5761 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING; 5762 } 5763 status = myri10ge_save_pci_state(mgp); 5764 mutex_exit(&mgp->intrlock); 5765 return (status); 5766 } 5767 5768 static int 5769 myri10ge_resume(dev_info_t *dip) 5770 { 5771 struct myri10ge_priv *mgp = ddi_get_driver_private(dip); 5772 int status = DDI_SUCCESS; 5773 5774 if (mgp == NULL) { 5775 cmn_err(CE_WARN, "null dip in myri10ge_resume\n"); 5776 return (DDI_FAILURE); 5777 } 5778 if (mgp->dip != dip) { 5779 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n"); 5780 return (DDI_FAILURE); 5781 } 5782 5783 mutex_enter(&mgp->intrlock); 5784 status = myri10ge_restore_pci_state(mgp); 5785 if (status == DDI_SUCCESS && 5786 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) { 5787 status = myri10ge_start_locked(mgp); 5788 } 5789 mutex_exit(&mgp->intrlock); 5790 if (status != DDI_SUCCESS) 5791 return (status); 5792 5793 /* start the watchdog timer */ 5794 mgp->timer_id = timeout(myri10ge_watchdog, mgp, 5795 mgp->timer_ticks); 5796 return (DDI_SUCCESS); 5797 } 5798 5799 static int 5800 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5801 { 5802 5803 struct myri10ge_priv *mgp; 5804 mac_register_t *macp, *omacp; 5805 ddi_acc_handle_t handle; 5806 uint32_t csr, hdr_offset; 5807 int status, span, link_width, max_read_request_4k; 5808 unsigned long bus_number, dev_number, func_number; 5809 size_t bytes; 5810 offset_t ss_offset; 5811 uint8_t vso; 5812 5813 if (cmd == DDI_RESUME) { 5814 return (myri10ge_resume(dip)); 5815 } 5816 5817 if (cmd != DDI_ATTACH) 5818 return (DDI_FAILURE); 5819 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) 5820 return (DDI_FAILURE); 5821 5822 /* enable busmater and io space access */ 5823 csr = pci_config_get32(handle, PCI_CONF_COMM); 5824 pci_config_put32(handle, PCI_CONF_COMM, 5825 (csr |PCI_COMM_ME|PCI_COMM_MAE)); 5826 status = myri10ge_read_pcie_link_width(handle, &link_width); 5827 if (status != 0) { 5828 cmn_err(CE_WARN, "could not read link width!\n"); 5829 link_width = 0; 5830 } 5831 max_read_request_4k = !myri10ge_set_max_readreq(handle); 5832 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS); 5833 if (status != 0) 5834 goto abort_with_cfg_hdl; 5835 if ((omacp = mac_alloc(MAC_VERSION)) == NULL) 5836 goto abort_with_cfg_hdl; 5837 /* 5838 * XXXX Hack: mac_register_t grows in newer kernels. To be 5839 * able to write newer fields, such as m_margin, without 5840 * writing outside allocated memory, we allocate our own macp 5841 * and pass that to mac_register() 5842 */ 5843 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP); 5844 macp->m_version = omacp->m_version; 5845 5846 if ((mgp = (struct myri10ge_priv *) 5847 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) { 5848 goto abort_with_macinfo; 5849 } 5850 ddi_set_driver_private(dip, mgp); 5851 5852 /* setup device name for log messages */ 5853 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip)); 5854 5855 mutex_enter(&myri10ge_param_lock); 5856 myri10ge_get_props(dip); 5857 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 5858 mgp->pause = myri10ge_flow_control; 5859 mutex_exit(&myri10ge_param_lock); 5860 5861 mgp->max_read_request_4k = max_read_request_4k; 5862 mgp->pcie_link_width = link_width; 5863 mgp->running = MYRI10GE_ETH_STOPPED; 5864 mgp->vso = vso; 5865 mgp->dip = dip; 5866 mgp->cfg_hdl = handle; 5867 5868 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */ 5869 myri10ge_test_physical(dip); 5870 5871 /* allocate command page */ 5872 bytes = sizeof (*mgp->cmd); 5873 mgp->cmd = (mcp_cmd_response_t *) 5874 (void *)myri10ge_dma_alloc(dip, bytes, 5875 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr, 5876 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT, 5877 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT); 5878 if (mgp->cmd == NULL) 5879 goto abort_with_mgp; 5880 5881 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number, 5882 &dev_number, &func_number); 5883 if (myri10ge_verbose) 5884 printf("%s at %ld:%ld:%ld attaching\n", mgp->name, 5885 bus_number, dev_number, func_number); 5886 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram, 5887 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr, 5888 &mgp->io_handle); 5889 if (status != DDI_SUCCESS) { 5890 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name); 5891 printf("%s: reg_set = %d, span = %d, status = %d", 5892 mgp->name, mgp->reg_set, span, status); 5893 goto abort_with_mgp; 5894 } 5895 5896 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET); 5897 hdr_offset = ntohl(hdr_offset) & 0xffffc; 5898 ss_offset = hdr_offset + 5899 offsetof(struct mcp_gen_header, string_specs); 5900 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset)); 5901 myri10ge_pio_copy32(mgp->eeprom_strings, 5902 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size), 5903 MYRI10GE_EEPROM_STRINGS_SIZE); 5904 (void) memset(mgp->eeprom_strings + 5905 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 5906 5907 status = myri10ge_read_mac_addr(mgp); 5908 if (status) { 5909 goto abort_with_mapped; 5910 } 5911 5912 status = myri10ge_select_firmware(mgp); 5913 if (status != 0) { 5914 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name); 5915 goto abort_with_mapped; 5916 } 5917 5918 status = myri10ge_probe_slices(mgp); 5919 if (status != 0) { 5920 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name); 5921 goto abort_with_dummy_rdma; 5922 } 5923 5924 status = myri10ge_alloc_slices(mgp); 5925 if (status != 0) { 5926 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name); 5927 goto abort_with_dummy_rdma; 5928 } 5929 5930 /* add the interrupt handler */ 5931 status = myri10ge_add_intrs(mgp, 1); 5932 if (status != 0) { 5933 cmn_err(CE_WARN, "%s: Failed to add interrupt\n", 5934 mgp->name); 5935 goto abort_with_slices; 5936 } 5937 5938 /* now that we have an iblock_cookie, init the mutexes */ 5939 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie); 5940 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie); 5941 5942 5943 status = myri10ge_nic_stat_init(mgp); 5944 if (status != DDI_SUCCESS) 5945 goto abort_with_interrupts; 5946 status = myri10ge_info_init(mgp); 5947 if (status != DDI_SUCCESS) 5948 goto abort_with_stats; 5949 5950 /* 5951 * Initialize GLD state 5952 */ 5953 5954 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 5955 macp->m_driver = mgp; 5956 macp->m_dip = dip; 5957 macp->m_src_addr = mgp->mac_addr; 5958 macp->m_callbacks = &myri10ge_m_callbacks; 5959 macp->m_min_sdu = 0; 5960 macp->m_max_sdu = myri10ge_mtu - 5961 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ); 5962 #ifdef SOLARIS_S11 5963 macp->m_margin = VLAN_TAGSZ; 5964 #endif 5965 macp->m_v12n = MAC_VIRT_LEVEL1; 5966 status = mac_register(macp, &mgp->mh); 5967 if (status != 0) { 5968 cmn_err(CE_WARN, "%s: mac_register failed with %d\n", 5969 mgp->name, status); 5970 goto abort_with_info; 5971 } 5972 myri10ge_ndd_init(mgp); 5973 if (myri10ge_verbose) 5974 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name, 5975 mgp->intr_type, mgp->tx_boundary, mgp->fw_name); 5976 mutex_enter(&myri10ge_param_lock); 5977 mgp->next = mgp_list; 5978 mgp_list = mgp; 5979 mutex_exit(&myri10ge_param_lock); 5980 kmem_free(macp, sizeof (*macp) * 8); 5981 mac_free(omacp); 5982 return (DDI_SUCCESS); 5983 5984 abort_with_info: 5985 myri10ge_info_destroy(mgp); 5986 5987 abort_with_stats: 5988 myri10ge_nic_stat_destroy(mgp); 5989 5990 abort_with_interrupts: 5991 mutex_destroy(&mgp->cmd_lock); 5992 mutex_destroy(&mgp->intrlock); 5993 myri10ge_rem_intrs(mgp, 1); 5994 5995 abort_with_slices: 5996 myri10ge_free_slices(mgp); 5997 5998 abort_with_dummy_rdma: 5999 myri10ge_dummy_rdma(mgp, 0); 6000 6001 abort_with_mapped: 6002 ddi_regs_map_free(&mgp->io_handle); 6003 6004 myri10ge_dma_free(&mgp->cmd_dma); 6005 6006 abort_with_mgp: 6007 kmem_free(mgp, sizeof (*mgp)); 6008 6009 abort_with_macinfo: 6010 kmem_free(macp, sizeof (*macp) * 8); 6011 mac_free(omacp); 6012 6013 abort_with_cfg_hdl: 6014 pci_config_teardown(&handle); 6015 return (DDI_FAILURE); 6016 6017 } 6018 6019 6020 static int 6021 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 6022 { 6023 struct myri10ge_priv *mgp, *tmp; 6024 int status, i, jbufs_alloced; 6025 6026 if (cmd == DDI_SUSPEND) { 6027 status = myri10ge_suspend(dip); 6028 return (status); 6029 } 6030 6031 if (cmd != DDI_DETACH) { 6032 return (DDI_FAILURE); 6033 } 6034 /* Get the driver private (gld_mac_info_t) structure */ 6035 mgp = ddi_get_driver_private(dip); 6036 6037 mutex_enter(&mgp->intrlock); 6038 jbufs_alloced = 0; 6039 for (i = 0; i < mgp->num_slices; i++) { 6040 myri10ge_remove_jbufs(&mgp->ss[i]); 6041 jbufs_alloced += mgp->ss[i].jpool.num_alloc; 6042 } 6043 mutex_exit(&mgp->intrlock); 6044 if (jbufs_alloced != 0) { 6045 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n", 6046 mgp->name, jbufs_alloced); 6047 return (DDI_FAILURE); 6048 } 6049 6050 mutex_enter(&myri10ge_param_lock); 6051 if (mgp->refcnt != 0) { 6052 mutex_exit(&myri10ge_param_lock); 6053 cmn_err(CE_NOTE, "%s: %d external refs remain\n", 6054 mgp->name, mgp->refcnt); 6055 return (DDI_FAILURE); 6056 } 6057 mutex_exit(&myri10ge_param_lock); 6058 6059 status = mac_unregister(mgp->mh); 6060 if (status != DDI_SUCCESS) 6061 return (status); 6062 6063 myri10ge_ndd_fini(mgp); 6064 myri10ge_dummy_rdma(mgp, 0); 6065 myri10ge_nic_stat_destroy(mgp); 6066 myri10ge_info_destroy(mgp); 6067 6068 mutex_destroy(&mgp->cmd_lock); 6069 mutex_destroy(&mgp->intrlock); 6070 6071 myri10ge_rem_intrs(mgp, 1); 6072 6073 myri10ge_free_slices(mgp); 6074 ddi_regs_map_free(&mgp->io_handle); 6075 myri10ge_dma_free(&mgp->cmd_dma); 6076 pci_config_teardown(&mgp->cfg_hdl); 6077 6078 mutex_enter(&myri10ge_param_lock); 6079 if (mgp_list == mgp) { 6080 mgp_list = mgp->next; 6081 } else { 6082 tmp = mgp_list; 6083 while (tmp->next != mgp && tmp->next != NULL) 6084 tmp = tmp->next; 6085 if (tmp->next != NULL) 6086 tmp->next = tmp->next->next; 6087 } 6088 kmem_free(mgp, sizeof (*mgp)); 6089 mutex_exit(&myri10ge_param_lock); 6090 return (DDI_SUCCESS); 6091 } 6092 6093 /* 6094 * Helper for quiesce entry point: Interrupt threads are not being 6095 * scheduled, so we must poll for the confirmation DMA to arrive in 6096 * the firmware stats block for slice 0. We're essentially running 6097 * the guts of the interrupt handler, and just cherry picking the 6098 * confirmation that the NIC is queuesced (stats->link_down) 6099 */ 6100 6101 static int 6102 myri10ge_poll_down(struct myri10ge_priv *mgp) 6103 { 6104 struct myri10ge_slice_state *ss = mgp->ss; 6105 mcp_irq_data_t *stats = ss->fw_stats; 6106 int valid; 6107 int found_down = 0; 6108 6109 6110 /* check for a pending IRQ */ 6111 6112 if (! *((volatile uint8_t *)& stats->valid)) 6113 return (0); 6114 valid = stats->valid; 6115 6116 /* 6117 * Make sure to tell the NIC to lower a legacy IRQ, else 6118 * it may have corrupt state after restarting 6119 */ 6120 6121 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) { 6122 /* lower legacy IRQ */ 6123 *mgp->irq_deassert = 0; 6124 mb(); 6125 /* wait for irq conf DMA */ 6126 while (*((volatile uint8_t *)& stats->valid)) 6127 ; 6128 } 6129 if (stats->stats_updated && stats->link_down) 6130 found_down = 1; 6131 6132 if (valid & 0x1) 6133 *ss->irq_claim = BE_32(3); 6134 *(ss->irq_claim + 1) = BE_32(3); 6135 6136 return (found_down); 6137 } 6138 6139 static int 6140 myri10ge_quiesce(dev_info_t *dip) 6141 { 6142 struct myri10ge_priv *mgp; 6143 myri10ge_cmd_t cmd; 6144 int status, down, i; 6145 6146 mgp = ddi_get_driver_private(dip); 6147 if (mgp == NULL) 6148 return (DDI_FAILURE); 6149 6150 /* if devices was unplumbed, it is guaranteed to be quiescent */ 6151 if (mgp->running == MYRI10GE_ETH_STOPPED) 6152 return (DDI_SUCCESS); 6153 6154 /* send a down CMD to queuesce NIC */ 6155 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 6156 if (status) { 6157 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name); 6158 return (DDI_FAILURE); 6159 } 6160 6161 for (i = 0; i < 20; i++) { 6162 down = myri10ge_poll_down(mgp); 6163 if (down) 6164 break; 6165 delay(drv_usectohz(100000)); 6166 mb(); 6167 } 6168 if (down) 6169 return (DDI_SUCCESS); 6170 return (DDI_FAILURE); 6171 } 6172 6173 /* 6174 * Distinguish between allocb'ed blocks, and gesballoc'ed attached 6175 * storage. 6176 */ 6177 static void 6178 myri10ge_find_lastfree(void) 6179 { 6180 mblk_t *mp = allocb(1024, 0); 6181 dblk_t *dbp; 6182 6183 if (mp == NULL) { 6184 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n"); 6185 return; 6186 } 6187 dbp = mp->b_datap; 6188 myri10ge_db_lastfree = (void *)dbp->db_lastfree; 6189 } 6190 6191 int 6192 _init(void) 6193 { 6194 int i; 6195 6196 if (myri10ge_verbose) 6197 cmn_err(CE_NOTE, 6198 "Myricom 10G driver (10GbE) version %s loading\n", 6199 MYRI10GE_VERSION_STR); 6200 myri10ge_find_lastfree(); 6201 mac_init_ops(&myri10ge_ops, "myri10ge"); 6202 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL); 6203 if ((i = mod_install(&modlinkage)) != 0) { 6204 cmn_err(CE_WARN, "mod_install returned %d\n", i); 6205 mac_fini_ops(&myri10ge_ops); 6206 mutex_destroy(&myri10ge_param_lock); 6207 } 6208 return (i); 6209 } 6210 6211 int 6212 _fini(void) 6213 { 6214 int i; 6215 i = mod_remove(&modlinkage); 6216 if (i != 0) { 6217 return (i); 6218 } 6219 mac_fini_ops(&myri10ge_ops); 6220 mutex_destroy(&myri10ge_param_lock); 6221 return (0); 6222 } 6223 6224 int 6225 _info(struct modinfo *modinfop) 6226 { 6227 return (mod_info(&modlinkage, modinfop)); 6228 } 6229 6230 6231 /* 6232 * This file uses MyriGE driver indentation. 6233 * 6234 * Local Variables: 6235 * c-file-style:"sun" 6236 * tab-width:8 6237 * End: 6238 */ 6239