xref: /dragonfly/sys/dev/disk/nvme/nvme.c (revision 38b720cd)
1 /*
2  * Copyright (c) 2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Most low-level chip related functions (other than attachment) reside in
36  * this module.  Most functions assume that the caller is already holding
37  * appropriate locks to prevent SMP collisions.
38  */
39 
40 #include "nvme.h"
41 
42 MALLOC_DEFINE(M_NVME, "NVMe Storage Device", "NVME");
43 
44 /*
45  * DMA mapping callbacks.
46  */
47 static
48 void
49 nvme_dmamem_saveseg(void *info, bus_dma_segment_t *segs, int nsegs, int error)
50 {
51         KKASSERT(error == 0);
52 	KKASSERT(nsegs == 1);
53 	*(bus_addr_t *)info = segs->ds_addr;
54 }
55 
56 /*
57  * Low-level chip enable/disable.
58  */
59 int
60 nvme_enable(nvme_softc_t *sc, int enable)
61 {
62 	uint32_t reg;
63 	int error = 0;
64 	int base_ticks;
65 
66 	reg = nvme_read(sc, NVME_REG_CONFIG);
67 	if (enable == 0 && (reg & NVME_CONFIG_EN)) {
68 		/*
69 		 * Disable the chip so we can program it.
70 		 */
71 		reg &= ~NVME_CONFIG_EN;
72 		nvme_write(sc, NVME_REG_CONFIG, reg);
73 	} else if (enable && (reg & NVME_CONFIG_EN) == 0) {
74 		/*
75 		 * Enable the chip once programmed.
76 		 */
77 		reg |= NVME_CONFIG_EN;
78 		nvme_write(sc, NVME_REG_CONFIG, reg);
79 	}
80 	error = ENXIO;
81 	base_ticks = ticks;
82 	while ((int)(ticks - base_ticks) < sc->entimo) {
83 		reg = nvme_read(sc, NVME_REG_STATUS);
84 		if (enable == 0 && (reg & NVME_STATUS_RDY) == 0) {
85 			error = 0;
86 			break;
87 		}
88 		if (enable && (reg & NVME_STATUS_RDY)) {
89 			error = 0;
90 			break;
91 		}
92 		nvme_os_sleep(50);	/* 50ms poll */
93 	}
94 	if (error) {
95 		device_printf(sc->dev, "Cannot %s device\n",
96 			      (enable ? "enable" : "disable"));
97 	} else {
98 #if 0
99 		kprintf("gratuitous 15 second sleep\n");
100 		nvme_os_sleep(15000);
101 		kprintf("gratuitous 15 second sleep done\n");
102 #endif
103 	}
104 	return error;
105 }
106 
107 /*
108  * Allocate submission and completion queues.  If qid is 0 we are allocating
109  * the ADMIN queues, otherwise we are allocating I/O queues.
110  */
111 int
112 nvme_alloc_subqueue(nvme_softc_t *sc, uint16_t qid)
113 {
114 	nvme_subqueue_t *queue = &sc->subqueues[qid];
115 	int error = 0;
116 
117 	/*
118 	 * For now implement the maximum queue size negotiated in the
119 	 * attach.
120 	 */
121 	lockinit(&queue->lk, "nvqlk", 0, 0);
122 	queue->sc = sc;
123 	queue->nqe = sc->maxqe;
124 	queue->qid = qid;
125 	queue->subq_doorbell_reg = NVME_REG_SUBQ_BELL(qid, sc->dstrd4);
126 
127 	/*
128 	 * dma memory for the submission queue
129 	 */
130 	if (error == 0) {
131 		error = bus_dmamem_alloc(sc->sque_tag, (void **)&queue->ksubq,
132 					 BUS_DMA_ZERO, &queue->sque_map);
133 	}
134 	if (error == 0) {
135 		error = bus_dmamap_load(sc->sque_tag, queue->sque_map,
136 					queue->ksubq,
137 					bus_dma_tag_getmaxsize(sc->sque_tag),
138 					nvme_dmamem_saveseg, &queue->psubq,
139 					0);
140 	}
141 
142 	/*
143 	 * dma memory for enough PRPs to map MAXPHYS bytes of memory per
144 	 * request.  A MAXPHYS buffer which begins partially straddling
145 	 * a page boundary can still be accomodated because we have an
146 	 * additional PRP entry in cmd.head.
147 	 */
148 	if (error == 0) {
149 		error = bus_dmamem_alloc(sc->prps_tag, (void **)&queue->kprps,
150 					 BUS_DMA_ZERO, &queue->prps_map);
151 	}
152 	if (error == 0) {
153 		error = bus_dmamap_load(sc->prps_tag, queue->prps_map,
154 					queue->kprps,
155 					bus_dma_tag_getmaxsize(sc->prps_tag),
156 					nvme_dmamem_saveseg, &queue->pprps,
157 					0);
158 	}
159 
160 	/*
161 	 * dma memory for admin data
162 	 */
163 	if (qid == 0 && error == 0) {
164 		error = bus_dmamem_alloc(sc->adm_tag,
165 					 (void **)&queue->kdatapgs,
166 					 BUS_DMA_ZERO, &queue->adm_map);
167 	}
168 	if (qid == 0 && error == 0) {
169 		error = bus_dmamap_load(sc->adm_tag, queue->adm_map,
170 					queue->kdatapgs,
171 					bus_dma_tag_getmaxsize(sc->adm_tag),
172 					nvme_dmamem_saveseg, &queue->pdatapgs,
173 					0);
174 	}
175 
176 	/*
177 	 * Driver request structures
178 	 */
179 	if (error == 0) {
180 		nvme_request_t *req;
181 		uint32_t i;
182 
183 		queue->reqary = kmalloc(sizeof(nvme_request_t) * queue->nqe,
184 					M_NVME, M_WAITOK | M_ZERO);
185 		for (i = 0; i < queue->nqe; ++i) {
186 			req = &queue->reqary[i];
187 			req->next_avail = queue->first_avail;
188 			queue->first_avail = req;
189 			req->subq = queue;
190 			req->comq = &sc->comqueues[queue->comqid];
191 			req->cmd_id = i;
192 			if (qid == 0) {
193 				req->info = &queue->kdatapgs[i];
194 				req->pinfo = queue->pdatapgs +
195 					     i * sizeof(nvme_admin_data_t);
196 			}
197 		}
198 	}
199 
200 	/*
201 	 * Error handling
202 	 */
203 	if (error)
204 		nvme_free_subqueue(sc, qid);
205 	return error;
206 }
207 
208 int
209 nvme_alloc_comqueue(nvme_softc_t *sc, uint16_t qid)
210 {
211 	nvme_comqueue_t *queue = &sc->comqueues[qid];
212 	int error = 0;
213 
214 	/*
215 	 * For now implement the maximum queue size negotiated in the
216 	 * attach.
217 	 */
218 	lockinit(&queue->lk, "nvqlk", 0, 0);
219 	queue->sc = sc;
220 	queue->nqe = sc->maxqe;
221 	queue->qid = qid;
222 	queue->phase = NVME_COMQ_STATUS_PHASE;
223 	queue->comq_doorbell_reg = NVME_REG_COMQ_BELL(qid, sc->dstrd4);
224 
225 	if (error == 0) {
226 		error = bus_dmamem_alloc(sc->cque_tag, (void **)&queue->kcomq,
227 					 BUS_DMA_ZERO, &queue->cque_map);
228 	}
229 	if (error == 0) {
230 		error = bus_dmamap_load(sc->cque_tag, queue->cque_map,
231 					queue->kcomq,
232 					bus_dma_tag_getmaxsize(sc->cque_tag),
233 					nvme_dmamem_saveseg, &queue->pcomq,
234 					0);
235 	}
236 
237 	/*
238 	 * Error handling
239 	 */
240 	if (error)
241 		nvme_free_comqueue(sc, qid);
242 	return error;
243 }
244 
245 void
246 nvme_free_subqueue(nvme_softc_t *sc, uint16_t qid)
247 {
248 	nvme_subqueue_t *queue = &sc->subqueues[qid];
249 
250 	queue->first_avail = NULL;
251 	if (queue->reqary) {
252 		kfree(queue->reqary, M_NVME);
253 		queue->reqary = NULL;
254 	}
255 	if (queue->ksubq) {
256 		bus_dmamem_free(sc->sque_tag, queue->ksubq, queue->sque_map);
257 		bus_dmamap_unload(sc->sque_tag, queue->sque_map);
258 		bus_dmamap_destroy(sc->sque_tag, queue->sque_map);
259 	}
260 	if (queue->kprps) {
261 		bus_dmamem_free(sc->prps_tag, queue->kprps, queue->prps_map);
262 		bus_dmamap_unload(sc->prps_tag, queue->prps_map);
263 		bus_dmamap_destroy(sc->prps_tag, queue->prps_map);
264 	}
265 	if (queue->kdatapgs) {
266 		bus_dmamem_free(sc->adm_tag, queue->kdatapgs, queue->adm_map);
267 		bus_dmamap_unload(sc->adm_tag, queue->adm_map);
268 		bus_dmamap_destroy(sc->adm_tag, queue->adm_map);
269 	}
270 	bzero(queue, sizeof(*queue));
271 }
272 
273 void
274 nvme_free_comqueue(nvme_softc_t *sc, uint16_t qid)
275 {
276 	nvme_comqueue_t *queue = &sc->comqueues[qid];
277 
278 	if (queue->kcomq) {
279 		bus_dmamem_free(sc->cque_tag, queue->kcomq, queue->cque_map);
280 		bus_dmamap_unload(sc->cque_tag, queue->cque_map);
281 		bus_dmamap_destroy(sc->cque_tag, queue->cque_map);
282 	}
283 	bzero(queue, sizeof(*queue));
284 }
285 
286 /*
287  * ADMIN AND I/O REQUEST HANDLING
288  */
289 
290 /*
291  * Obtain a request and handle DMA mapping the supplied kernel buffer.
292  * Fields in cmd.head will be initialized and remaining fields will be zero'd.
293  * Caller is responsible for filling in remaining fields as appropriate.
294  *
295  * Caller must hold the queue lock.
296  */
297 nvme_request_t *
298 nvme_get_admin_request(nvme_softc_t *sc, uint8_t opcode)
299 {
300 	nvme_request_t *req;
301 
302 	req = nvme_get_request(&sc->subqueues[0], opcode, NULL, 0);
303 	req->cmd.head.prp1 = req->pinfo;
304 	req->callback = NULL;
305 
306 	return req;
307 }
308 
309 /*
310  * ADMIN AND I/O REQUEST HANDLING
311  */
312 
313 /*
314  * Obtain a request and handle DMA mapping the supplied kernel buffer.
315  * Fields in cmd.head will be initialized and remaining fields will be zero'd.
316  * Caller is responsible for filling in remaining fields as appropriate.
317  *
318  * May return NULL if no requests are available or if there is no room in
319  * the submission queue to handle it (should only be possible on an I/O queue,
320  * admin queue operations are managed).
321  *
322  * Caller should NOT hold the queue lock.
323  */
324 nvme_request_t *
325 nvme_get_request(nvme_subqueue_t *queue, uint8_t opcode,
326 		 char *kva, size_t bytes)
327 {
328 	nvme_request_t *req;
329 	nvme_request_t *next;
330 
331 	/*
332 	 * No easy lockless way to pull a new request off.  We have to check
333 	 * for a number of conditions and there may be multiple threads
334 	 * making this call simultaneously, which complicates matters even
335 	 * more.
336 	 */
337 	lockmgr(&queue->lk, LK_EXCLUSIVE);
338 
339 	/*
340 	 * Make sure the submission queue has room to accomodate the
341 	 * request.  Requests can be completed out of order so the
342 	 * submission ring could still be full even though we have
343 	 * requests available.
344 	 */
345 	if ((queue->subq_tail + queue->unsubmitted + 1) % queue->nqe ==
346 	    queue->subq_head) {
347 		lockmgr(&queue->lk, LK_RELEASE);
348 		KKASSERT(queue->qid != 0);
349 		atomic_swap_int(&queue->signal_requeue, 1);
350 
351 		return NULL;
352 	}
353 
354 	/*
355 	 * Pop the next available request off of the first_avail linked
356 	 * list.  An atomic op must be used here because nvme_put_request()
357 	 * returns requests to the list without holding queue->lk.
358 	 */
359 	for (;;) {
360 		req = queue->first_avail;
361 		cpu_ccfence();
362 		if (req == NULL) {
363 			lockmgr(&queue->lk, LK_RELEASE);
364 			KKASSERT(queue->qid != 0);
365 			atomic_swap_int(&queue->signal_requeue, 1);
366 
367 			return NULL;
368 		}
369 		next = req->next_avail;
370 		if (atomic_cmpset_ptr(&queue->first_avail, req, next))
371 			break;
372 	}
373 
374 	/*
375 	 * We have to keep track of unsubmitted requests in order to be
376 	 * able to properly check whether the ring is full or not (check
377 	 * is done at the top of this procedure, above).
378 	 */
379 	++queue->unsubmitted;
380 	lockmgr(&queue->lk, LK_RELEASE);
381 
382 	/*
383 	 * Fill-in basic fields and do the DMA mapping.
384 	 */
385 	req->next_avail = NULL;
386 	KKASSERT(req->state == NVME_REQ_AVAIL);
387 	req->state = NVME_REQ_ALLOCATED;
388 	req->callback = NULL;
389 	req->waiting = 0;
390 
391 	req->cmd.head.opcode = opcode;
392 	req->cmd.head.flags = NVME_SUBQFLG_PRP | NVME_SUBQFLG_NORM;
393 	req->cmd.head.cid = req->cmd_id;
394 	req->cmd.head.nsid = 0;
395 	req->cmd.head.mptr = 0;
396 	req->cmd.head.prp1 = 0;
397 	req->cmd.head.prp2 = 0;
398 	req->cmd.dw10 = 0;
399 	req->cmd.dw11 = 0;
400 	req->cmd.dw12 = 0;
401 	req->cmd.dw13 = 0;
402 	req->cmd.dw14 = 0;
403 	req->cmd.dw15 = 0;
404 
405 	if (kva) {
406 		size_t count = 0;
407 		size_t idx = 0;
408 		vm_paddr_t paddr;
409 		vm_paddr_t pprptab;
410 		uint64_t *kprptab;
411 		KKASSERT(bytes >= 0 && bytes <= MAXPHYS);
412 
413 		kprptab = queue->kprps +
414 			  (MAXPHYS / PAGE_SIZE) * req->cmd_id;
415 		pprptab = queue->pprps +
416 			  (MAXPHYS / PAGE_SIZE) * req->cmd_id *
417 			  sizeof(uint64_t);
418 
419 		while (count < bytes) {
420 			paddr = vtophys(kva + count);
421 			if (idx == 0) {
422 				KKASSERT((paddr & 3) == 0);
423 				req->cmd.head.prp1 = paddr;
424 				count += (((intptr_t)kva + PAGE_SIZE) &
425 					  ~(intptr_t)PAGE_MASK) -
426 					 (intptr_t)kva;
427 			} else if (idx == 1 && count + PAGE_SIZE >= bytes) {
428 				KKASSERT((paddr & PAGE_MASK) == 0);
429 				req->cmd.head.prp2 = paddr;
430 				count += PAGE_SIZE;
431 			} else {
432 				KKASSERT((paddr & PAGE_MASK) == 0);
433 				/* if (idx == 1) -- not needed, just repeat */
434 				req->cmd.head.prp2 = pprptab; /* repeat */
435 				kprptab[idx - 1] = paddr;
436 				count += PAGE_SIZE;
437 			}
438 			++idx;
439 		}
440 	}
441 	return req;
442 }
443 
444 /*
445  * Submit request for execution.  This will doorbell the subq.
446  *
447  * Caller must hold the queue lock.
448  */
449 void
450 nvme_submit_request(nvme_request_t *req)
451 {
452 	nvme_subqueue_t *queue = req->subq;
453 	nvme_allcmd_t *cmd;
454 
455 	cmd = &queue->ksubq[queue->subq_tail];
456 	--queue->unsubmitted;
457 	if (++queue->subq_tail == queue->nqe)
458 		queue->subq_tail = 0;
459 	KKASSERT(queue->subq_tail != queue->subq_head);
460 	*cmd = req->cmd;
461 	cpu_sfence();	/* needed? */
462 	req->state = NVME_REQ_SUBMITTED;
463 	nvme_write(queue->sc, queue->subq_doorbell_reg, queue->subq_tail);
464 }
465 
466 /*
467  * Wait for a request to complete.
468  *
469  * Caller does not need to hold the queue lock.  If it does, or if it
470  * holds some other lock, it should pass it in so it can be released across
471  * sleeps, else pass NULL.
472  */
473 int
474 nvme_wait_request(nvme_request_t *req, int ticks)
475 {
476 	struct lock *lk;
477 	int code;
478 
479 	req->waiting = 1;
480 	if (req->state != NVME_REQ_COMPLETED) {
481 		lk = &req->comq->lk;
482 		cpu_lfence();
483 		lockmgr(lk, LK_EXCLUSIVE);
484 		while (req->state == NVME_REQ_SUBMITTED) {
485 			nvme_poll_completions(req->comq, lk);
486 			if (req->state != NVME_REQ_SUBMITTED)
487 				break;
488 			lksleep(req, lk, 0, "nvwait", hz);
489 		}
490 		lockmgr(lk, LK_RELEASE);
491 		KKASSERT(req->state == NVME_REQ_COMPLETED);
492 	}
493 	cpu_lfence();
494 	code = NVME_COMQ_STATUS_CODE_GET(req->res.tail.status);
495 
496 	return code;
497 }
498 
499 /*
500  * Put request away, making it available for reuse.  If this is an admin
501  * request its auxillary data page is also being released for reuse.
502  *
503  * Caller does NOT have to hold the queue lock.
504  */
505 void
506 nvme_put_request(nvme_request_t *req)
507 {
508 	nvme_subqueue_t *queue = req->subq;
509 	nvme_request_t *next;
510 
511 	/*
512 	 * Insert on head for best cache reuse.
513 	 */
514 	KKASSERT(req->state == NVME_REQ_COMPLETED);
515 	req->state = NVME_REQ_AVAIL;
516 	for (;;) {
517 		next = queue->first_avail;
518 		cpu_ccfence();
519 		req->next_avail = next;
520 		if (atomic_cmpset_ptr(&queue->first_avail, next, req))
521 			break;
522 	}
523 
524 	/*
525 	 * If BIOs were deferred due to lack of request space signal the
526 	 * admin thread to requeue them.  This is a bit messy and normally
527 	 * should not happen due to the large number of queue entries nvme
528 	 * usually has.  Let it race for now (admin has a 1hz tick).
529 	 */
530 	if (atomic_swap_int(&queue->signal_requeue, 0)) {
531 		atomic_set_int(&queue->sc->admin_signal, ADMIN_SIG_REQUEUE);
532 		wakeup(&queue->sc->admin_signal);
533 	}
534 }
535 
536 /*
537  * Poll for completions on queue, copy the 16-byte hw result entry
538  * into the request and poke the doorbell to update the controller's
539  * understanding of comq_head.
540  *
541  * If lk is non-NULL it will be passed to the callback which typically
542  * releases it temporarily when calling biodone() or doing other complex
543  * work on the result.
544  *
545  * Caller must usually hold comq->lk.
546  */
547 void
548 nvme_poll_completions(nvme_comqueue_t *comq, struct lock *lk)
549 {
550 	nvme_softc_t *sc = comq->sc;
551 	nvme_request_t *req;
552 	nvme_subqueue_t *subq;
553 	nvme_allres_t *res;
554 #if 0
555 	int didwork = 0;
556 #endif
557 
558 	KKASSERT(comq->comq_tail < comq->nqe);
559 	cpu_lfence();		/* needed prior to first phase test */
560 	for (;;) {
561 		/*
562 		 * WARNING! LOCK MAY HAVE BEEN TEMPORARILY LOST DURING LOOP.
563 		 */
564 		res = &comq->kcomq[comq->comq_tail];
565 		if ((res->tail.status ^ comq->phase) & NVME_COMQ_STATUS_PHASE)
566 			break;
567 
568 		/*
569 		 * Process result on completion queue.
570 		 *
571 		 * Bump comq_tail, flip the phase detect when we roll-over.
572 		 * doorbell every 1/4 queue and at the end of the loop.
573 		 */
574 		if (++comq->comq_tail == comq->nqe) {
575 			comq->comq_tail = 0;
576 			comq->phase ^= NVME_COMQ_STATUS_PHASE;
577 		}
578 
579 		/*
580 		 * WARNING! I imploded the chip by reusing a command id
581 		 *	    before it was discarded in the completion queue
582 		 *	    via the doorbell, so for now we always write
583 		 *	    the doorbell before marking the request as
584 		 *	    COMPLETED (it can be reused instantly upon
585 		 *	    being marked).
586 		 */
587 #if 0
588 		if (++didwork == (comq->nqe >> 2)) {
589 			didwork = 0;
590 			nvme_write(comq->sc, comq->comq_doorbell_reg,
591 				   comq->comq_tail);
592 		}
593 #endif
594 		cpu_lfence();	/* needed prior to content check */
595 
596 		/*
597 		 * Locate the request and related submission queue.  The
598 		 * request could be on a different queue.  A submission
599 		 * queue can have only one completion queue, so we can
600 		 * update subq_head without locking the submission queue.
601 		 */
602 		subq = &sc->subqueues[res->tail.subq_id];
603 		subq->subq_head = res->tail.subq_head_ptr;
604 		req = &subq->reqary[res->tail.cmd_id];
605 
606 		/*
607 		 * Copy the fields and wakeup anyone waiting on req.
608 		 * The response field in the completion queue can be reused
609 		 * once we doorbell which is why we make a copy.
610 		 */
611 		KKASSERT(req->state == NVME_REQ_SUBMITTED &&
612 			 req->comq == comq);
613 		req->res = *res;
614 		nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
615 		cpu_sfence();
616 		req->state = NVME_REQ_COMPLETED;
617 		if (req->callback) {
618 			req->callback(req, lk);
619 		} else if (req->waiting) {
620 			wakeup(req);
621 		}
622 	}
623 #if 0
624 	if (didwork)
625 		nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
626 #endif
627 }
628 
629 void
630 nvme_intr(void *arg)
631 {
632 	nvme_comqueue_t *comq = arg;
633 	nvme_softc_t *sc;
634 	int i;
635 	int skip;
636 
637 	sc = comq->sc;
638 	if (sc->nirqs == 1)
639 		skip = 1;
640 	else
641 		skip = sc->nirqs - 1;
642 
643 	for (i = comq->qid; i <= sc->niocomqs; i += skip) {
644 		if (comq->nqe) {
645 			lockmgr(&comq->lk, LK_EXCLUSIVE);
646 			nvme_poll_completions(comq, &comq->lk);
647 			lockmgr(&comq->lk, LK_RELEASE);
648 		}
649 		comq += skip;
650 	}
651 }
652 
653 /*
654  * ADMIN HELPER COMMAND ROLLUP FUNCTIONS
655  */
656 /*
657  * Issue command to create a submission queue.
658  */
659 int
660 nvme_create_subqueue(nvme_softc_t *sc, uint16_t qid)
661 {
662 	nvme_request_t *req;
663 	nvme_subqueue_t *subq = &sc->subqueues[qid];
664 	int status;
665 
666 	req = nvme_get_admin_request(sc, NVME_OP_CREATE_SUBQ);
667 	req->cmd.head.prp1 = subq->psubq;
668 	req->cmd.crsub.subq_id = qid;
669 	req->cmd.crsub.subq_size = subq->nqe - 1;	/* 0's based value */
670 	req->cmd.crsub.flags = NVME_CREATESUB_PC | NVME_CREATESUB_PRI_URG;
671 	req->cmd.crsub.comq_id = subq->comqid;
672 
673 	nvme_submit_request(req);
674 	status = nvme_wait_request(req, hz);
675 	nvme_put_request(req);
676 
677 	return status;
678 }
679 
680 /*
681  * Issue command to create a completion queue.
682  */
683 int
684 nvme_create_comqueue(nvme_softc_t *sc, uint16_t qid)
685 {
686 	nvme_request_t *req;
687 	nvme_comqueue_t *comq = &sc->comqueues[qid];
688 	int status;
689 	int error;
690 	uint16_t ivect;
691 
692 	error = 0;
693 	if (sc->nirqs > 1) {
694 		ivect = 1 + (qid - 1) % (sc->nirqs - 1);
695 		if (qid && ivect == qid) {
696 			error = bus_setup_intr(sc->dev, sc->irq[ivect],
697 						INTR_MPSAFE | INTR_HIFREQ,
698 						nvme_intr,
699 						&sc->comqueues[ivect],
700 						&sc->irq_handle[ivect],
701 						NULL);
702 		}
703 	} else {
704 		ivect = 0;
705 	}
706 	if (error)
707 		return error;
708 
709 	req = nvme_get_admin_request(sc, NVME_OP_CREATE_COMQ);
710 	req->cmd.head.prp1 = comq->pcomq;
711 	req->cmd.crcom.comq_id = qid;
712 	req->cmd.crcom.comq_size = comq->nqe - 1;	/* 0's based value */
713 	req->cmd.crcom.ivect = ivect;
714 	req->cmd.crcom.flags = NVME_CREATECOM_PC | NVME_CREATECOM_IEN;
715 
716 	nvme_submit_request(req);
717 	status = nvme_wait_request(req, hz);
718 	nvme_put_request(req);
719 
720 	return status;
721 }
722 
723 /*
724  * Issue command to delete a submission queue.
725  */
726 int
727 nvme_delete_subqueue(nvme_softc_t *sc, uint16_t qid)
728 {
729 	nvme_request_t *req;
730 	/*nvme_subqueue_t *subq = &sc->subqueues[qid];*/
731 	int status;
732 
733 	req = nvme_get_admin_request(sc, NVME_OP_DELETE_SUBQ);
734 	req->cmd.head.prp1 = 0;
735 	req->cmd.delete.qid = qid;
736 
737 	nvme_submit_request(req);
738 	status = nvme_wait_request(req, hz);
739 	nvme_put_request(req);
740 
741 	return status;
742 }
743 
744 /*
745  * Issue command to delete a completion queue.
746  */
747 int
748 nvme_delete_comqueue(nvme_softc_t *sc, uint16_t qid)
749 {
750 	nvme_request_t *req;
751 	/*nvme_comqueue_t *comq = &sc->comqueues[qid];*/
752 	int status;
753 	uint16_t ivect;
754 
755 	req = nvme_get_admin_request(sc, NVME_OP_DELETE_COMQ);
756 	req->cmd.head.prp1 = 0;
757 	req->cmd.delete.qid = qid;
758 
759 	nvme_submit_request(req);
760 	status = nvme_wait_request(req, hz);
761 	nvme_put_request(req);
762 
763 	if (qid && sc->nirqs > 1) {
764 		ivect = 1 + (qid - 1) % (sc->nirqs - 1);
765 		if (ivect == qid) {
766 			bus_teardown_intr(sc->dev,
767 					  sc->irq[ivect],
768 					  sc->irq_handle[ivect]);
769 		}
770 	}
771 
772 	return status;
773 }
774 
775 /*
776  * Issue friendly shutdown to controller.
777  */
778 int
779 nvme_issue_shutdown(nvme_softc_t *sc)
780 {
781 	uint32_t reg;
782 	int base_ticks;
783 	int error;
784 
785 	/*
786 	 * Put us in shutdown
787 	 */
788 	reg = nvme_read(sc, NVME_REG_CONFIG);
789 	reg &= ~NVME_CONFIG_SHUT_MASK;
790 	reg |= NVME_CONFIG_SHUT_NORM;
791 	nvme_write(sc, NVME_REG_CONFIG, reg);
792 
793 	/*
794 	 * Wait up to 10 seconds for acknowlegement
795 	 */
796 	error = ENXIO;
797 	base_ticks = ticks;
798 	while ((int)(ticks - base_ticks) < 10 * 20) {
799 		reg = nvme_read(sc, NVME_REG_STATUS);
800 		if ((reg & NVME_STATUS_SHUT_MASK) & NVME_STATUS_SHUT_DONE) {
801 			error = 0;
802 			break;
803 		}
804 		nvme_os_sleep(50);	/* 50ms poll */
805 	}
806 	if (error)
807 		device_printf(sc->dev, "Unable to shutdown chip nicely\n");
808 	else
809 		device_printf(sc->dev, "Normal chip shutdown succeeded\n");
810 
811 	return error;
812 }
813 
814 /*
815  * Make space-padded string serial and model numbers more readable.
816  */
817 size_t
818 string_cleanup(char *str, int domiddle)
819 {
820 	size_t i;
821 	size_t j;
822 	int atbeg = 1;
823 
824 	for (i = j = 0; str[i]; ++i) {
825 		if ((str[i] == ' ' || str[i] == '\r') &&
826 		    (atbeg || domiddle)) {
827 			continue;
828 		} else {
829 			atbeg = 0;
830 		}
831 		str[j] = str[i];
832 		++j;
833 	}
834 	while (domiddle == 0 && j > 0 && (str[j-1] == ' ' || str[j-1] == '\r'))
835 		--j;
836 	str[j] = 0;
837 	if (domiddle == 0) {
838 		for (j = 0; str[j]; ++j) {
839 			if (str[j] == ' ')
840 				str[j] = '_';
841 		}
842 	}
843 
844 	return j;
845 }
846