1 /* 2 * Copyright (c) 2016 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Most low-level chip related functions (other than attachment) reside in 36 * this module. Most functions assume that the caller is already holding 37 * appropriate locks to prevent SMP collisions. 38 */ 39 40 #include "nvme.h" 41 42 MALLOC_DEFINE(M_NVME, "NVMe Driver", "NVME"); 43 44 /* 45 * DMA mapping callbacks. 46 */ 47 static 48 void 49 nvme_dmamem_saveseg(void *info, bus_dma_segment_t *segs, int nsegs, int error) 50 { 51 KKASSERT(error == 0); 52 KKASSERT(nsegs == 1); 53 *(bus_addr_t *)info = segs->ds_addr; 54 } 55 56 /* 57 * Low-level chip enable/disable. 58 */ 59 int 60 nvme_enable(nvme_softc_t *sc, int enable) 61 { 62 uint32_t reg; 63 int error = 0; 64 int base_ticks; 65 66 reg = nvme_read(sc, NVME_REG_CONFIG); 67 if (enable == 0 && (reg & NVME_CONFIG_EN)) { 68 /* 69 * Disable the chip so we can program it. 70 */ 71 reg &= ~NVME_CONFIG_EN; 72 nvme_write(sc, NVME_REG_CONFIG, reg); 73 } else if (enable && (reg & NVME_CONFIG_EN) == 0) { 74 /* 75 * Enable the chip once programmed. 76 */ 77 reg |= NVME_CONFIG_EN; 78 nvme_write(sc, NVME_REG_CONFIG, reg); 79 } 80 error = ENXIO; 81 base_ticks = ticks; 82 while ((int)(ticks - base_ticks) < sc->entimo) { 83 reg = nvme_read(sc, NVME_REG_STATUS); 84 if (enable == 0 && (reg & NVME_STATUS_RDY) == 0) { 85 error = 0; 86 break; 87 } 88 if (enable && (reg & NVME_STATUS_RDY)) { 89 error = 0; 90 break; 91 } 92 nvme_os_sleep(50); /* 50ms poll */ 93 } 94 95 /* 96 * Interrupt masking (only applicable when MSI-X not used, 3.1.3 and 97 * 3.1.4 state that these registers should not be accessed with MSI-X) 98 */ 99 if (error == 0 && sc->nirqs == 1) { 100 if (enable) { 101 nvme_write(sc, NVME_REG_INTSET, ~1); 102 nvme_write(sc, NVME_REG_INTCLR, 1); 103 } else { 104 nvme_write(sc, NVME_REG_INTSET, ~1); 105 } 106 } 107 108 if (error) { 109 device_printf(sc->dev, "Cannot %s device\n", 110 (enable ? "enable" : "disable")); 111 } else { 112 #if 0 113 kprintf("gratuitous 15 second sleep\n"); 114 nvme_os_sleep(15000); 115 kprintf("gratuitous 15 second sleep done\n"); 116 #endif 117 } 118 return error; 119 } 120 121 /* 122 * Allocate submission and completion queues. If qid is 0 we are allocating 123 * the ADMIN queues, otherwise we are allocating I/O queues. 124 */ 125 int 126 nvme_alloc_subqueue(nvme_softc_t *sc, uint16_t qid) 127 { 128 nvme_subqueue_t *queue = &sc->subqueues[qid]; 129 int error = 0; 130 131 /* 132 * For now implement the maximum queue size negotiated in the 133 * attach. 134 */ 135 lockinit(&queue->lk, "nvqlk", 0, 0); 136 queue->sc = sc; 137 queue->nqe = sc->maxqe; 138 queue->qid = qid; 139 queue->subq_doorbell_reg = NVME_REG_SUBQ_BELL(qid, sc->dstrd4); 140 141 /* 142 * dma memory for the submission queue 143 */ 144 if (error == 0) { 145 error = bus_dmamem_alloc(sc->sque_tag, (void **)&queue->ksubq, 146 BUS_DMA_ZERO, &queue->sque_map); 147 } 148 if (error == 0) { 149 error = bus_dmamap_load(sc->sque_tag, queue->sque_map, 150 queue->ksubq, 151 bus_dma_tag_getmaxsize(sc->sque_tag), 152 nvme_dmamem_saveseg, &queue->psubq, 153 0); 154 } 155 156 /* 157 * dma memory for enough PRPs to map MAXPHYS bytes of memory per 158 * request. A MAXPHYS buffer which begins partially straddling 159 * a page boundary can still be accomodated because we have an 160 * additional PRP entry in cmd.head. 161 */ 162 if (error == 0) { 163 error = bus_dmamem_alloc(sc->prps_tag, (void **)&queue->kprps, 164 BUS_DMA_ZERO, &queue->prps_map); 165 } 166 if (error == 0) { 167 error = bus_dmamap_load(sc->prps_tag, queue->prps_map, 168 queue->kprps, 169 bus_dma_tag_getmaxsize(sc->prps_tag), 170 nvme_dmamem_saveseg, &queue->pprps, 171 0); 172 } 173 174 /* 175 * dma memory for admin data 176 */ 177 if (qid == 0 && error == 0) { 178 error = bus_dmamem_alloc(sc->adm_tag, 179 (void **)&queue->kdatapgs, 180 BUS_DMA_ZERO, &queue->adm_map); 181 } 182 if (qid == 0 && error == 0) { 183 error = bus_dmamap_load(sc->adm_tag, queue->adm_map, 184 queue->kdatapgs, 185 bus_dma_tag_getmaxsize(sc->adm_tag), 186 nvme_dmamem_saveseg, &queue->pdatapgs, 187 0); 188 } 189 190 /* 191 * Driver request structures 192 */ 193 if (error == 0) { 194 nvme_request_t *req; 195 uint32_t i; 196 197 queue->reqary = kmalloc(sizeof(nvme_request_t) * queue->nqe, 198 M_NVME, M_WAITOK | M_ZERO); 199 for (i = 0; i < queue->nqe; ++i) { 200 req = &queue->reqary[i]; 201 req->next_avail = queue->first_avail; 202 queue->first_avail = req; 203 req->subq = queue; 204 req->comq = &sc->comqueues[queue->comqid]; 205 req->cmd_id = i; 206 if (qid == 0) { 207 req->info = &queue->kdatapgs[i]; 208 req->pinfo = queue->pdatapgs + 209 i * sizeof(nvme_admin_data_t); 210 } 211 } 212 } 213 214 /* 215 * Error handling 216 */ 217 if (error) 218 nvme_free_subqueue(sc, qid); 219 return error; 220 } 221 222 int 223 nvme_alloc_comqueue(nvme_softc_t *sc, uint16_t qid) 224 { 225 nvme_comqueue_t *queue = &sc->comqueues[qid]; 226 int error = 0; 227 228 /* 229 * For now implement the maximum queue size negotiated in the 230 * attach. 231 */ 232 lockinit(&queue->lk, "nvqlk", 0, 0); 233 queue->sc = sc; 234 queue->qid = qid; 235 queue->phase = NVME_COMQ_STATUS_PHASE; 236 queue->comq_doorbell_reg = NVME_REG_COMQ_BELL(qid, sc->dstrd4); 237 238 if (error == 0) { 239 error = bus_dmamem_alloc(sc->cque_tag, (void **)&queue->kcomq, 240 BUS_DMA_ZERO, &queue->cque_map); 241 } 242 if (error == 0) { 243 error = bus_dmamap_load(sc->cque_tag, queue->cque_map, 244 queue->kcomq, 245 bus_dma_tag_getmaxsize(sc->cque_tag), 246 nvme_dmamem_saveseg, &queue->pcomq, 247 0); 248 } 249 250 /* 251 * Set nqe last. The comq polling loop tests this field and we 252 * do not want it to spuriously assume that the comq is initialized 253 * until it actually is. 254 */ 255 if (error == 0) 256 queue->nqe = sc->maxqe; 257 258 if (error) 259 nvme_free_comqueue(sc, qid); 260 return error; 261 } 262 263 void 264 nvme_free_subqueue(nvme_softc_t *sc, uint16_t qid) 265 { 266 nvme_subqueue_t *queue = &sc->subqueues[qid]; 267 268 queue->first_avail = NULL; 269 if (queue->reqary) { 270 kfree(queue->reqary, M_NVME); 271 queue->reqary = NULL; 272 } 273 if (queue->ksubq) { 274 bus_dmamem_free(sc->sque_tag, queue->ksubq, queue->sque_map); 275 bus_dmamap_unload(sc->sque_tag, queue->sque_map); 276 bus_dmamap_destroy(sc->sque_tag, queue->sque_map); 277 } 278 if (queue->kprps) { 279 bus_dmamem_free(sc->prps_tag, queue->kprps, queue->prps_map); 280 bus_dmamap_unload(sc->prps_tag, queue->prps_map); 281 bus_dmamap_destroy(sc->prps_tag, queue->prps_map); 282 } 283 if (queue->kdatapgs) { 284 bus_dmamem_free(sc->adm_tag, queue->kdatapgs, queue->adm_map); 285 bus_dmamap_unload(sc->adm_tag, queue->adm_map); 286 bus_dmamap_destroy(sc->adm_tag, queue->adm_map); 287 } 288 bzero(queue, sizeof(*queue)); 289 } 290 291 void 292 nvme_free_comqueue(nvme_softc_t *sc, uint16_t qid) 293 { 294 nvme_comqueue_t *queue = &sc->comqueues[qid]; 295 296 /* 297 * Clear this field first so poll loops ignore the comq. 298 */ 299 queue->nqe = 0; 300 301 if (queue->kcomq) { 302 bus_dmamem_free(sc->cque_tag, queue->kcomq, queue->cque_map); 303 bus_dmamap_unload(sc->cque_tag, queue->cque_map); 304 bus_dmamap_destroy(sc->cque_tag, queue->cque_map); 305 } 306 bzero(queue, sizeof(*queue)); 307 } 308 309 /* 310 * ADMIN AND I/O REQUEST HANDLING 311 */ 312 313 /* 314 * Obtain a request and handle DMA mapping the supplied kernel buffer. 315 * Fields in cmd.head will be initialized and remaining fields will be zero'd. 316 * Caller is responsible for filling in remaining fields as appropriate. 317 * 318 * Caller must hold the queue lock. 319 */ 320 nvme_request_t * 321 nvme_get_admin_request(nvme_softc_t *sc, uint8_t opcode) 322 { 323 nvme_request_t *req; 324 325 req = nvme_get_request(&sc->subqueues[0], opcode, NULL, 0); 326 req->cmd.head.prp1 = req->pinfo; 327 req->callback = NULL; 328 329 return req; 330 } 331 332 /* 333 * ADMIN AND I/O REQUEST HANDLING 334 */ 335 336 /* 337 * Obtain a request and handle DMA mapping the supplied kernel buffer. 338 * Fields in cmd.head will be initialized and remaining fields will be zero'd. 339 * Caller is responsible for filling in remaining fields as appropriate. 340 * 341 * May return NULL if no requests are available or if there is no room in 342 * the submission queue to handle it (should only be possible on an I/O queue, 343 * admin queue operations are managed). 344 * 345 * Caller should NOT hold the queue lock. 346 */ 347 nvme_request_t * 348 nvme_get_request(nvme_subqueue_t *queue, uint8_t opcode, 349 char *kva, size_t bytes) 350 { 351 nvme_request_t *req; 352 nvme_request_t *next; 353 354 /* 355 * No easy lockless way to pull a new request off. We have to check 356 * for a number of conditions and there may be multiple threads 357 * making this call simultaneously, which complicates matters even 358 * more. 359 */ 360 lockmgr(&queue->lk, LK_EXCLUSIVE); 361 362 /* 363 * Make sure the submission queue has room to accomodate the 364 * request. Requests can be completed out of order so the 365 * submission ring could still be full even though we have 366 * requests available. 367 */ 368 if ((queue->subq_tail + queue->unsubmitted + 1) % queue->nqe == 369 queue->subq_head) { 370 lockmgr(&queue->lk, LK_RELEASE); 371 KKASSERT(queue->qid != 0); 372 atomic_swap_int(&queue->signal_requeue, 1); 373 374 return NULL; 375 } 376 377 /* 378 * Pop the next available request off of the first_avail linked 379 * list. An atomic op must be used here because nvme_put_request() 380 * returns requests to the list without holding queue->lk. 381 */ 382 for (;;) { 383 req = queue->first_avail; 384 cpu_ccfence(); 385 if (req == NULL) { 386 lockmgr(&queue->lk, LK_RELEASE); 387 KKASSERT(queue->qid != 0); 388 atomic_swap_int(&queue->signal_requeue, 1); 389 390 return NULL; 391 } 392 next = req->next_avail; 393 if (atomic_cmpset_ptr(&queue->first_avail, req, next)) 394 break; 395 } 396 397 /* 398 * We have to keep track of unsubmitted requests in order to be 399 * able to properly check whether the ring is full or not (check 400 * is done at the top of this procedure, above). 401 */ 402 ++queue->unsubmitted; 403 lockmgr(&queue->lk, LK_RELEASE); 404 405 /* 406 * Fill-in basic fields and do the DMA mapping. 407 */ 408 req->next_avail = NULL; 409 KKASSERT(req->state == NVME_REQ_AVAIL); 410 req->state = NVME_REQ_ALLOCATED; 411 req->callback = NULL; 412 req->waiting = 0; 413 414 req->cmd.head.opcode = opcode; 415 req->cmd.head.flags = NVME_SUBQFLG_PRP | NVME_SUBQFLG_NORM; 416 req->cmd.head.cid = req->cmd_id; 417 req->cmd.head.nsid = 0; 418 req->cmd.head.mptr = 0; 419 req->cmd.head.prp1 = 0; 420 req->cmd.head.prp2 = 0; 421 req->cmd.dw10 = 0; 422 req->cmd.dw11 = 0; 423 req->cmd.dw12 = 0; 424 req->cmd.dw13 = 0; 425 req->cmd.dw14 = 0; 426 req->cmd.dw15 = 0; 427 428 if (kva) { 429 size_t count = 0; 430 size_t idx = 0; 431 vm_paddr_t paddr; 432 vm_paddr_t pprptab; 433 uint64_t *kprptab; 434 KKASSERT(bytes >= 0 && bytes <= MAXPHYS); 435 436 kprptab = queue->kprps + 437 (MAXPHYS / PAGE_SIZE) * req->cmd_id; 438 pprptab = queue->pprps + 439 (MAXPHYS / PAGE_SIZE) * req->cmd_id * 440 sizeof(uint64_t); 441 442 while (count < bytes) { 443 paddr = vtophys(kva + count); 444 if (idx == 0) { 445 KKASSERT((paddr & 3) == 0); 446 req->cmd.head.prp1 = paddr; 447 count += (((intptr_t)kva + PAGE_SIZE) & 448 ~(intptr_t)PAGE_MASK) - 449 (intptr_t)kva; 450 } else if (idx == 1 && count + PAGE_SIZE >= bytes) { 451 KKASSERT((paddr & PAGE_MASK) == 0); 452 req->cmd.head.prp2 = paddr; 453 count += PAGE_SIZE; 454 } else { 455 KKASSERT((paddr & PAGE_MASK) == 0); 456 /* if (idx == 1) -- not needed, just repeat */ 457 req->cmd.head.prp2 = pprptab; /* repeat */ 458 kprptab[idx - 1] = paddr; 459 count += PAGE_SIZE; 460 } 461 ++idx; 462 } 463 } 464 return req; 465 } 466 467 /* 468 * Submit request for execution. This will doorbell the subq. 469 * 470 * Caller must hold the queue lock. 471 */ 472 void 473 nvme_submit_request(nvme_request_t *req) 474 { 475 nvme_subqueue_t *queue = req->subq; 476 nvme_allcmd_t *cmd; 477 478 cmd = &queue->ksubq[queue->subq_tail]; 479 --queue->unsubmitted; 480 if (++queue->subq_tail == queue->nqe) 481 queue->subq_tail = 0; 482 KKASSERT(queue->subq_tail != queue->subq_head); 483 *cmd = req->cmd; 484 cpu_sfence(); /* needed? */ 485 req->state = NVME_REQ_SUBMITTED; 486 nvme_write(queue->sc, queue->subq_doorbell_reg, queue->subq_tail); 487 } 488 489 /* 490 * Wait for a request to complete. 491 * 492 * Caller does not need to hold the queue lock. If it does, or if it 493 * holds some other lock, it should pass it in so it can be released across 494 * sleeps, else pass NULL. 495 */ 496 int 497 nvme_wait_request(nvme_request_t *req, int ticks) 498 { 499 struct lock *lk; 500 int code; 501 502 req->waiting = 1; 503 if (req->state != NVME_REQ_COMPLETED) { 504 lk = &req->comq->lk; 505 cpu_lfence(); 506 lockmgr(lk, LK_EXCLUSIVE); 507 while (req->state == NVME_REQ_SUBMITTED) { 508 nvme_poll_completions(req->comq, lk); 509 if (req->state != NVME_REQ_SUBMITTED) 510 break; 511 lksleep(req, lk, 0, "nvwait", hz); 512 } 513 lockmgr(lk, LK_RELEASE); 514 KKASSERT(req->state == NVME_REQ_COMPLETED); 515 } 516 cpu_lfence(); 517 code = NVME_COMQ_STATUS_CODE_GET(req->res.tail.status); 518 519 return code; 520 } 521 522 /* 523 * Put request away, making it available for reuse. If this is an admin 524 * request its auxillary data page is also being released for reuse. 525 * 526 * Caller does NOT have to hold the queue lock. 527 */ 528 void 529 nvme_put_request(nvme_request_t *req) 530 { 531 nvme_subqueue_t *queue = req->subq; 532 nvme_request_t *next; 533 534 /* 535 * Insert on head for best cache reuse. 536 */ 537 KKASSERT(req->state == NVME_REQ_COMPLETED); 538 req->state = NVME_REQ_AVAIL; 539 for (;;) { 540 next = queue->first_avail; 541 cpu_ccfence(); 542 req->next_avail = next; 543 if (atomic_cmpset_ptr(&queue->first_avail, next, req)) 544 break; 545 } 546 547 /* 548 * If BIOs were deferred due to lack of request space signal the 549 * admin thread to requeue them. This is a bit messy and normally 550 * should not happen due to the large number of queue entries nvme 551 * usually has. Let it race for now (admin has a 1hz tick). 552 */ 553 if (atomic_swap_int(&queue->signal_requeue, 0)) { 554 atomic_set_int(&queue->sc->admin_signal, ADMIN_SIG_REQUEUE); 555 wakeup(&queue->sc->admin_signal); 556 } 557 } 558 559 /* 560 * Poll for completions on queue, copy the 16-byte hw result entry 561 * into the request and poke the doorbell to update the controller's 562 * understanding of comq_head. 563 * 564 * If lk is non-NULL it will be passed to the callback which typically 565 * releases it temporarily when calling biodone() or doing other complex 566 * work on the result. 567 * 568 * Caller must usually hold comq->lk. 569 */ 570 void 571 nvme_poll_completions(nvme_comqueue_t *comq, struct lock *lk) 572 { 573 nvme_softc_t *sc = comq->sc; 574 nvme_request_t *req; 575 nvme_subqueue_t *subq; 576 nvme_allres_t *res; 577 #if 0 578 int didwork = 0; 579 #endif 580 581 KKASSERT(comq->comq_tail < comq->nqe); 582 cpu_lfence(); /* needed prior to first phase test */ 583 for (;;) { 584 /* 585 * WARNING! LOCK MAY HAVE BEEN TEMPORARILY LOST DURING LOOP. 586 */ 587 res = &comq->kcomq[comq->comq_tail]; 588 if ((res->tail.status ^ comq->phase) & NVME_COMQ_STATUS_PHASE) 589 break; 590 591 /* 592 * Process result on completion queue. 593 * 594 * Bump comq_tail, flip the phase detect when we roll-over. 595 * doorbell every 1/4 queue and at the end of the loop. 596 */ 597 if (++comq->comq_tail == comq->nqe) { 598 comq->comq_tail = 0; 599 comq->phase ^= NVME_COMQ_STATUS_PHASE; 600 } 601 602 /* 603 * WARNING! I imploded the chip by reusing a command id 604 * before it was discarded in the completion queue 605 * via the doorbell, so for now we always write 606 * the doorbell before marking the request as 607 * COMPLETED (it can be reused instantly upon 608 * being marked). 609 */ 610 #if 0 611 if (++didwork == (comq->nqe >> 2)) { 612 didwork = 0; 613 nvme_write(comq->sc, comq->comq_doorbell_reg, 614 comq->comq_tail); 615 } 616 #endif 617 cpu_lfence(); /* needed prior to content check */ 618 619 /* 620 * Locate the request and related submission queue. The 621 * request could be on a different queue. A submission 622 * queue can have only one completion queue, so we can 623 * update subq_head without locking the submission queue. 624 */ 625 subq = &sc->subqueues[res->tail.subq_id]; 626 subq->subq_head = res->tail.subq_head_ptr; 627 req = &subq->reqary[res->tail.cmd_id]; 628 629 /* 630 * Copy the fields and wakeup anyone waiting on req. 631 * The response field in the completion queue can be reused 632 * once we doorbell which is why we make a copy. 633 */ 634 KKASSERT(req->state == NVME_REQ_SUBMITTED && 635 req->comq == comq); 636 req->res = *res; 637 nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail); 638 cpu_sfence(); 639 req->state = NVME_REQ_COMPLETED; 640 if (req->callback) { 641 req->callback(req, lk); 642 } else if (req->waiting) { 643 wakeup(req); 644 } 645 } 646 #if 0 647 if (didwork) 648 nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail); 649 #endif 650 } 651 652 /* 653 * Core interrupt handler (called from dedicated interrupt thread, possibly 654 * preempts other threads). 655 * 656 * NOTE: For pin-based level interrupts, the chipset interrupt is cleared 657 * automatically once all the head doorbells are updated. However, 658 * most chipsets assume MSI-X will be used and MAY NOT IMPLEMENT 659 * pin-based interrupts properly. I found the BPX card, for example, 660 * is unable to clear a pin-based interrupt. 661 */ 662 void 663 nvme_intr(void *arg) 664 { 665 nvme_comqueue_t *comq = arg; 666 nvme_softc_t *sc; 667 int i; 668 int skip; 669 670 /* 671 * Process all completion queues associated with this vector. The 672 * interrupt is masked in the APIC. Do NOT mess with the NVMe 673 * masking registers because (1) We don't need to and it wastes time, 674 * and (2) We aren't supposed to touch them if using MSI-X anyway. 675 */ 676 sc = comq->sc; 677 if (sc->nirqs == 1) 678 skip = 1; 679 else 680 skip = sc->nirqs - 1; 681 682 for (i = comq->qid; i <= sc->niocomqs; i += skip) { 683 if (comq->nqe) { 684 lockmgr(&comq->lk, LK_EXCLUSIVE); 685 nvme_poll_completions(comq, &comq->lk); 686 lockmgr(&comq->lk, LK_RELEASE); 687 } 688 comq += skip; 689 } 690 } 691 692 /* 693 * ADMIN HELPER COMMAND ROLLUP FUNCTIONS 694 */ 695 /* 696 * Issue command to create a submission queue. 697 */ 698 int 699 nvme_create_subqueue(nvme_softc_t *sc, uint16_t qid) 700 { 701 nvme_request_t *req; 702 nvme_subqueue_t *subq = &sc->subqueues[qid]; 703 int status; 704 705 req = nvme_get_admin_request(sc, NVME_OP_CREATE_SUBQ); 706 req->cmd.head.prp1 = subq->psubq; 707 req->cmd.crsub.subq_id = qid; 708 req->cmd.crsub.subq_size = subq->nqe - 1; /* 0's based value */ 709 req->cmd.crsub.flags = NVME_CREATESUB_PC | NVME_CREATESUB_PRI_URG; 710 req->cmd.crsub.comq_id = subq->comqid; 711 712 nvme_submit_request(req); 713 status = nvme_wait_request(req, hz); 714 nvme_put_request(req); 715 716 return status; 717 } 718 719 /* 720 * Issue command to create a completion queue. 721 */ 722 int 723 nvme_create_comqueue(nvme_softc_t *sc, uint16_t qid) 724 { 725 nvme_request_t *req; 726 nvme_comqueue_t *comq = &sc->comqueues[qid]; 727 int status; 728 int error; 729 uint16_t ivect; 730 731 error = 0; 732 if (sc->nirqs > 1) { 733 ivect = 1 + (qid - 1) % (sc->nirqs - 1); 734 if (qid && ivect == qid) { 735 error = bus_setup_intr(sc->dev, sc->irq[ivect], 736 INTR_MPSAFE | INTR_HIFREQ, 737 nvme_intr, 738 &sc->comqueues[ivect], 739 &sc->irq_handle[ivect], 740 NULL); 741 } 742 } else { 743 ivect = 0; 744 } 745 if (error) 746 return error; 747 748 req = nvme_get_admin_request(sc, NVME_OP_CREATE_COMQ); 749 req->cmd.head.prp1 = comq->pcomq; 750 req->cmd.crcom.comq_id = qid; 751 req->cmd.crcom.comq_size = comq->nqe - 1; /* 0's based value */ 752 req->cmd.crcom.ivect = ivect; 753 req->cmd.crcom.flags = NVME_CREATECOM_PC | NVME_CREATECOM_IEN; 754 755 nvme_submit_request(req); 756 status = nvme_wait_request(req, hz); 757 nvme_put_request(req); 758 759 return status; 760 } 761 762 /* 763 * Issue command to delete a submission queue. 764 */ 765 int 766 nvme_delete_subqueue(nvme_softc_t *sc, uint16_t qid) 767 { 768 nvme_request_t *req; 769 /*nvme_subqueue_t *subq = &sc->subqueues[qid];*/ 770 int status; 771 772 req = nvme_get_admin_request(sc, NVME_OP_DELETE_SUBQ); 773 req->cmd.head.prp1 = 0; 774 req->cmd.delete.qid = qid; 775 776 nvme_submit_request(req); 777 status = nvme_wait_request(req, hz); 778 nvme_put_request(req); 779 780 return status; 781 } 782 783 /* 784 * Issue command to delete a completion queue. 785 */ 786 int 787 nvme_delete_comqueue(nvme_softc_t *sc, uint16_t qid) 788 { 789 nvme_request_t *req; 790 /*nvme_comqueue_t *comq = &sc->comqueues[qid];*/ 791 int status; 792 uint16_t ivect; 793 794 req = nvme_get_admin_request(sc, NVME_OP_DELETE_COMQ); 795 req->cmd.head.prp1 = 0; 796 req->cmd.delete.qid = qid; 797 798 nvme_submit_request(req); 799 status = nvme_wait_request(req, hz); 800 nvme_put_request(req); 801 802 if (qid && sc->nirqs > 1) { 803 ivect = 1 + (qid - 1) % (sc->nirqs - 1); 804 if (ivect == qid) { 805 bus_teardown_intr(sc->dev, 806 sc->irq[ivect], 807 sc->irq_handle[ivect]); 808 } 809 } 810 811 return status; 812 } 813 814 /* 815 * Issue friendly shutdown to controller. 816 */ 817 int 818 nvme_issue_shutdown(nvme_softc_t *sc) 819 { 820 uint32_t reg; 821 int base_ticks; 822 int error; 823 824 /* 825 * Put us in shutdown 826 */ 827 reg = nvme_read(sc, NVME_REG_CONFIG); 828 reg &= ~NVME_CONFIG_SHUT_MASK; 829 reg |= NVME_CONFIG_SHUT_NORM; 830 nvme_write(sc, NVME_REG_CONFIG, reg); 831 832 /* 833 * Wait up to 10 seconds for acknowlegement 834 */ 835 error = ENXIO; 836 base_ticks = ticks; 837 while ((int)(ticks - base_ticks) < 10 * 20) { 838 reg = nvme_read(sc, NVME_REG_STATUS); 839 if ((reg & NVME_STATUS_SHUT_MASK) & NVME_STATUS_SHUT_DONE) { 840 error = 0; 841 break; 842 } 843 nvme_os_sleep(50); /* 50ms poll */ 844 } 845 if (error) 846 device_printf(sc->dev, "Unable to shutdown chip nicely\n"); 847 else 848 device_printf(sc->dev, "Normal chip shutdown succeeded\n"); 849 850 return error; 851 } 852 853 /* 854 * Make space-padded string serial and model numbers more readable. 855 */ 856 size_t 857 string_cleanup(char *str, int domiddle) 858 { 859 size_t i; 860 size_t j; 861 int atbeg = 1; 862 863 for (i = j = 0; str[i]; ++i) { 864 if ((str[i] == ' ' || str[i] == '\r') && 865 (atbeg || domiddle)) { 866 continue; 867 } else { 868 atbeg = 0; 869 } 870 str[j] = str[i]; 871 ++j; 872 } 873 while (domiddle == 0 && j > 0 && (str[j-1] == ' ' || str[j-1] == '\r')) 874 --j; 875 str[j] = 0; 876 if (domiddle == 0) { 877 for (j = 0; str[j]; ++j) { 878 if (str[j] == ' ') 879 str[j] = '_'; 880 } 881 } 882 883 return j; 884 } 885