xref: /dragonfly/sys/dev/disk/nvme/nvme.c (revision 65867155)
1 /*
2  * Copyright (c) 2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Most low-level chip related functions (other than attachment) reside in
36  * this module.  Most functions assume that the caller is already holding
37  * appropriate locks to prevent SMP collisions.
38  */
39 
40 #include "nvme.h"
41 
42 MALLOC_DEFINE(M_NVME, "NVMe Storage Device", "NVME");
43 
44 /*
45  * DMA mapping callbacks.
46  */
47 static
48 void
49 nvme_dmamem_saveseg(void *info, bus_dma_segment_t *segs, int nsegs, int error)
50 {
51         KKASSERT(error == 0);
52 	KKASSERT(nsegs == 1);
53 	*(bus_addr_t *)info = segs->ds_addr;
54 }
55 
56 /*
57  * Low-level chip enable/disable.
58  */
59 int
60 nvme_enable(nvme_softc_t *sc, int enable)
61 {
62 	uint32_t reg;
63 	int error = 0;
64 	int base_ticks;
65 
66 	reg = nvme_read(sc, NVME_REG_CONFIG);
67 	if (enable == 0 && (reg & NVME_CONFIG_EN)) {
68 		/*
69 		 * Disable the chip so we can program it.
70 		 */
71 		reg &= ~NVME_CONFIG_EN;
72 		nvme_write(sc, NVME_REG_CONFIG, reg);
73 	} else if (enable && (reg & NVME_CONFIG_EN) == 0) {
74 		/*
75 		 * Enable the chip once programmed.
76 		 */
77 		reg |= NVME_CONFIG_EN;
78 		nvme_write(sc, NVME_REG_CONFIG, reg);
79 	}
80 	error = ENXIO;
81 	base_ticks = ticks;
82 	while ((int)(ticks - base_ticks) < sc->entimo) {
83 		reg = nvme_read(sc, NVME_REG_STATUS);
84 		if (enable == 0 && (reg & NVME_STATUS_RDY) == 0) {
85 			error = 0;
86 			break;
87 		}
88 		if (enable && (reg & NVME_STATUS_RDY)) {
89 			error = 0;
90 			break;
91 		}
92 		nvme_os_sleep(50);	/* 50ms poll */
93 	}
94 	if (error) {
95 		device_printf(sc->dev, "Cannot %s device\n",
96 			      (enable ? "enable" : "disable"));
97 	} else {
98 #if 0
99 		kprintf("gratuitous 15 second sleep\n");
100 		nvme_os_sleep(15000);
101 		kprintf("gratuitous 15 second sleep done\n");
102 #endif
103 	}
104 	return error;
105 }
106 
107 /*
108  * Allocate submission and completion queues.  If qid is 0 we are allocating
109  * the ADMIN queues, otherwise we are allocating I/O queues.
110  */
111 int
112 nvme_alloc_subqueue(nvme_softc_t *sc, uint16_t qid)
113 {
114 	nvme_subqueue_t *queue = &sc->subqueues[qid];
115 	int error = 0;
116 
117 	/*
118 	 * For now implement the maximum queue size negotiated in the
119 	 * attach.
120 	 */
121 	lockinit(&queue->lk, "nvqlk", 0, 0);
122 	queue->sc = sc;
123 	queue->nqe = sc->maxqe;
124 	queue->qid = qid;
125 	queue->subq_doorbell_reg = NVME_REG_SUBQ_BELL(qid, sc->dstrd4);
126 
127 	/*
128 	 * dma memory for the submission queue
129 	 */
130 	if (error == 0) {
131 		error = bus_dmamem_alloc(sc->sque_tag, (void **)&queue->ksubq,
132 					 BUS_DMA_ZERO, &queue->sque_map);
133 	}
134 	if (error == 0) {
135 		error = bus_dmamap_load(sc->sque_tag, queue->sque_map,
136 					queue->ksubq,
137 					bus_dma_tag_getmaxsize(sc->sque_tag),
138 					nvme_dmamem_saveseg, &queue->psubq,
139 					0);
140 	}
141 
142 	/*
143 	 * dma memory for enough PRPs to map MAXPHYS bytes of memory per
144 	 * request.  A MAXPHYS buffer which begins partially straddling
145 	 * a page boundary can still be accomodated because we have an
146 	 * additional PRP entry in cmd.head.
147 	 */
148 	if (error == 0) {
149 		error = bus_dmamem_alloc(sc->prps_tag, (void **)&queue->kprps,
150 					 BUS_DMA_ZERO, &queue->prps_map);
151 	}
152 	if (error == 0) {
153 		error = bus_dmamap_load(sc->prps_tag, queue->prps_map,
154 					queue->kprps,
155 					bus_dma_tag_getmaxsize(sc->prps_tag),
156 					nvme_dmamem_saveseg, &queue->pprps,
157 					0);
158 	}
159 
160 	/*
161 	 * dma memory for admin data
162 	 */
163 	if (qid == 0 && error == 0) {
164 		error = bus_dmamem_alloc(sc->adm_tag,
165 					 (void **)&queue->kdatapgs,
166 					 BUS_DMA_ZERO, &queue->adm_map);
167 	}
168 	if (qid == 0 && error == 0) {
169 		error = bus_dmamap_load(sc->adm_tag, queue->adm_map,
170 					queue->kdatapgs,
171 					bus_dma_tag_getmaxsize(sc->adm_tag),
172 					nvme_dmamem_saveseg, &queue->pdatapgs,
173 					0);
174 	}
175 
176 	/*
177 	 * Driver request structures
178 	 */
179 	if (error == 0) {
180 		nvme_request_t *req;
181 		uint32_t i;
182 
183 		queue->reqary = kmalloc(sizeof(nvme_request_t) * queue->nqe,
184 					M_NVME, M_WAITOK | M_ZERO);
185 		for (i = 0; i < queue->nqe; ++i) {
186 			req = &queue->reqary[i];
187 			req->next_avail = queue->first_avail;
188 			queue->first_avail = req;
189 			req->subq = queue;
190 			req->comq = &sc->comqueues[queue->comqid];
191 			req->cmd_id = i;
192 			if (qid == 0) {
193 				req->info = &queue->kdatapgs[i];
194 				req->pinfo = queue->pdatapgs +
195 					     i * sizeof(nvme_admin_data_t);
196 			}
197 		}
198 	}
199 
200 	/*
201 	 * Error handling
202 	 */
203 	if (error)
204 		nvme_free_subqueue(sc, qid);
205 	return error;
206 }
207 
208 int
209 nvme_alloc_comqueue(nvme_softc_t *sc, uint16_t qid)
210 {
211 	nvme_comqueue_t *queue = &sc->comqueues[qid];
212 	int error = 0;
213 
214 	/*
215 	 * For now implement the maximum queue size negotiated in the
216 	 * attach.
217 	 */
218 	lockinit(&queue->lk, "nvqlk", 0, 0);
219 	queue->sc = sc;
220 	queue->nqe = sc->maxqe;
221 	queue->qid = qid;
222 	queue->phase = NVME_COMQ_STATUS_PHASE;
223 	queue->comq_doorbell_reg = NVME_REG_COMQ_BELL(qid, sc->dstrd4);
224 
225 	if (error == 0) {
226 		error = bus_dmamem_alloc(sc->cque_tag, (void **)&queue->kcomq,
227 					 BUS_DMA_ZERO, &queue->cque_map);
228 	}
229 	if (error == 0) {
230 		error = bus_dmamap_load(sc->cque_tag, queue->cque_map,
231 					queue->kcomq,
232 					bus_dma_tag_getmaxsize(sc->cque_tag),
233 					nvme_dmamem_saveseg, &queue->pcomq,
234 					0);
235 	}
236 
237 	/*
238 	 * Error handling
239 	 */
240 	if (error)
241 		nvme_free_comqueue(sc, qid);
242 	return error;
243 }
244 
245 void
246 nvme_free_subqueue(nvme_softc_t *sc, uint16_t qid)
247 {
248 	nvme_subqueue_t *queue = &sc->subqueues[qid];
249 
250 	queue->first_avail = NULL;
251 	if (queue->reqary) {
252 		kfree(queue->reqary, M_NVME);
253 		queue->reqary = NULL;
254 	}
255 	if (queue->ksubq) {
256 		bus_dmamem_free(sc->sque_tag, queue->ksubq, queue->sque_map);
257 		bus_dmamap_unload(sc->sque_tag, queue->sque_map);
258 		bus_dmamap_destroy(sc->sque_tag, queue->sque_map);
259 	}
260 	if (queue->kprps) {
261 		bus_dmamem_free(sc->prps_tag, queue->kprps, queue->prps_map);
262 		bus_dmamap_unload(sc->prps_tag, queue->prps_map);
263 		bus_dmamap_destroy(sc->prps_tag, queue->prps_map);
264 	}
265 	if (queue->kdatapgs) {
266 		bus_dmamem_free(sc->adm_tag, queue->kdatapgs, queue->adm_map);
267 		bus_dmamap_unload(sc->adm_tag, queue->adm_map);
268 		bus_dmamap_destroy(sc->adm_tag, queue->adm_map);
269 	}
270 	bzero(queue, sizeof(*queue));
271 }
272 
273 void
274 nvme_free_comqueue(nvme_softc_t *sc, uint16_t qid)
275 {
276 	nvme_comqueue_t *queue = &sc->comqueues[qid];
277 
278 	if (queue->kcomq) {
279 		bus_dmamem_free(sc->cque_tag, queue->kcomq, queue->cque_map);
280 		bus_dmamap_unload(sc->cque_tag, queue->cque_map);
281 		bus_dmamap_destroy(sc->cque_tag, queue->cque_map);
282 	}
283 	bzero(queue, sizeof(*queue));
284 }
285 
286 /*
287  * ADMIN AND I/O REQUEST HANDLING
288  */
289 
290 /*
291  * Obtain a request and handle DMA mapping the supplied kernel buffer.
292  * Fields in cmd.head will be initialized and remaining fields will be zero'd.
293  * Caller is responsible for filling in remaining fields as appropriate.
294  *
295  * Caller must hold the queue lock.
296  */
297 nvme_request_t *
298 nvme_get_admin_request(nvme_softc_t *sc, uint8_t opcode)
299 {
300 	nvme_request_t *req;
301 
302 	req = nvme_get_request(&sc->subqueues[0], opcode, NULL, 0);
303 	req->cmd.head.prp1 = req->pinfo;
304 	req->callback = NULL;
305 
306 	return req;
307 }
308 
309 /*
310  * ADMIN AND I/O REQUEST HANDLING
311  */
312 
313 /*
314  * Obtain a request and handle DMA mapping the supplied kernel buffer.
315  * Fields in cmd.head will be initialized and remaining fields will be zero'd.
316  * Caller is responsible for filling in remaining fields as appropriate.
317  *
318  * May return NULL if no requests are available (should only be possible
319  * on an I/O queue, admin queue operations are managed).
320  *
321  * Caller should NOT hold the queue lock.
322  */
323 nvme_request_t *
324 nvme_get_request(nvme_subqueue_t *queue, uint8_t opcode,
325 		 char *kva, size_t bytes)
326 {
327 	nvme_request_t *req;
328 	nvme_request_t *next;
329 
330 	/*
331 	 * The lock is currently needed because a another cpu could pull
332 	 * a request off, use it, finish, and put it back (and next pointer
333 	 * might then be different) all inbetween our req = and our atomic
334 	 * op.  This would assign the wrong 'next' field.
335 	 *
336 	 * XXX optimize this.
337 	 */
338 	lockmgr(&queue->lk, LK_EXCLUSIVE);
339 	for (;;) {
340 		req = queue->first_avail;
341 		cpu_ccfence();
342 		if (req == NULL) {
343 			queue->signal_requeue = 1;
344 			lockmgr(&queue->lk, LK_RELEASE);
345 			KKASSERT(queue->qid != 0);
346 
347 			return NULL;
348 		}
349 		next = req->next_avail;
350 		if (atomic_cmpset_ptr(&queue->first_avail, req, next))
351 			break;
352 	}
353 	lockmgr(&queue->lk, LK_RELEASE);
354 	req->next_avail = NULL;
355 	KKASSERT(req->state == NVME_REQ_AVAIL);
356 	req->state = NVME_REQ_ALLOCATED;
357 	req->callback = NULL;
358 	req->waiting = 0;
359 
360 	req->cmd.head.opcode = opcode;
361 	req->cmd.head.flags = NVME_SUBQFLG_PRP | NVME_SUBQFLG_NORM;
362 	req->cmd.head.cid = req->cmd_id;
363 	req->cmd.head.nsid = 0;
364 	req->cmd.head.mptr = 0;
365 	req->cmd.head.prp1 = 0;
366 	req->cmd.head.prp2 = 0;
367 	req->cmd.dw10 = 0;
368 	req->cmd.dw11 = 0;
369 	req->cmd.dw12 = 0;
370 	req->cmd.dw13 = 0;
371 	req->cmd.dw14 = 0;
372 	req->cmd.dw15 = 0;
373 
374 	if (kva) {
375 		size_t count = 0;
376 		size_t idx = 0;
377 		vm_paddr_t paddr;
378 		vm_paddr_t pprptab;
379 		uint64_t *kprptab;
380 		KKASSERT(bytes >= 0 && bytes <= MAXPHYS);
381 
382 		kprptab = queue->kprps +
383 			  (MAXPHYS / PAGE_SIZE) * req->cmd_id;
384 		pprptab = queue->pprps +
385 			  (MAXPHYS / PAGE_SIZE) * req->cmd_id *
386 			  sizeof(uint64_t);
387 
388 		while (count < bytes) {
389 			paddr = vtophys(kva + count);
390 			if (idx == 0) {
391 				KKASSERT((paddr & 3) == 0);
392 				req->cmd.head.prp1 = paddr;
393 				count += (((intptr_t)kva + PAGE_SIZE) &
394 					  ~(intptr_t)PAGE_MASK) -
395 					 (intptr_t)kva;
396 			} else if (idx == 1 && count + PAGE_SIZE >= bytes) {
397 				KKASSERT((paddr & PAGE_MASK) == 0);
398 				req->cmd.head.prp2 = paddr;
399 				count += PAGE_SIZE;
400 			} else {
401 				KKASSERT((paddr & PAGE_MASK) == 0);
402 				/* if (idx == 1) -- not needed, just repeat */
403 				req->cmd.head.prp2 = pprptab; /* repeat */
404 				kprptab[idx - 1] = paddr;
405 				count += PAGE_SIZE;
406 			}
407 			++idx;
408 		}
409 	}
410 	return req;
411 }
412 
413 /*
414  * Submit request for execution.  This will doorbell the subq.
415  *
416  * Caller must hold the queue lock.
417  */
418 void
419 nvme_submit_request(nvme_request_t *req)
420 {
421 	nvme_subqueue_t *queue = req->subq;
422 	nvme_allcmd_t *cmd;
423 
424 	cmd = &queue->ksubq[queue->subq_tail];
425 	if (++queue->subq_tail == queue->nqe)
426 		queue->subq_tail = 0;
427 	*cmd = req->cmd;
428 	cpu_sfence();	/* needed? */
429 	req->state = NVME_REQ_SUBMITTED;
430 	nvme_write(queue->sc, queue->subq_doorbell_reg, queue->subq_tail);
431 }
432 
433 /*
434  * Wait for a request to complete.
435  *
436  * Caller does not need to hold the queue lock.  If it does, or if it
437  * holds some other lock, it should pass it in so it can be released across
438  * sleeps, else pass NULL.
439  */
440 int
441 nvme_wait_request(nvme_request_t *req, int ticks)
442 {
443 	struct lock *lk;
444 	int code;
445 
446 	req->waiting = 1;
447 	if (req->state != NVME_REQ_COMPLETED) {
448 		lk = &req->comq->lk;
449 		cpu_lfence();
450 		lockmgr(lk, LK_EXCLUSIVE);
451 		while (req->state == NVME_REQ_SUBMITTED) {
452 			nvme_poll_completions(req->comq, lk);
453 			if (req->state != NVME_REQ_SUBMITTED)
454 				break;
455 			lksleep(req, lk, 0, "nvwait", hz);
456 		}
457 		lockmgr(lk, LK_RELEASE);
458 		KKASSERT(req->state == NVME_REQ_COMPLETED);
459 	}
460 	cpu_lfence();
461 	code = NVME_COMQ_STATUS_CODE_GET(req->res.tail.status);
462 
463 	return code;
464 }
465 
466 /*
467  * Put request away, making it available for reuse.  If this is an admin
468  * request its auxillary data page is also being released for reuse.
469  *
470  * Caller does NOT have to hold the queue lock.
471  */
472 void
473 nvme_put_request(nvme_request_t *req)
474 {
475 	nvme_subqueue_t *queue = req->subq;
476 	nvme_request_t *next;
477 
478 	/*
479 	 * Insert on head for best cache reuse.
480 	 */
481 	KKASSERT(req->state == NVME_REQ_COMPLETED);
482 	req->state = NVME_REQ_AVAIL;
483 	for (;;) {
484 		next = queue->first_avail;
485 		cpu_ccfence();
486 		req->next_avail = next;
487 		if (atomic_cmpset_ptr(&queue->first_avail, next, req))
488 			break;
489 	}
490 
491 	/*
492 	 * If BIOs were deferred due to lack of request space signal the
493 	 * admin thread to requeue them.  This is a bit messy and normally
494 	 * should not happen due to the large number of queue entries nvme
495 	 * usually has.  Let it race for now (admin has a 1hz tick).
496 	 */
497 	if (queue->signal_requeue) {
498 		queue->signal_requeue = 0;
499 		atomic_set_int(&queue->sc->admin_signal, ADMIN_SIG_REQUEUE);
500 		wakeup(&queue->sc->admin_signal);
501 	}
502 }
503 
504 /*
505  * Poll for completions on queue, copy the 16-byte hw result entry
506  * into the request and poke the doorbell to update the controller's
507  * understanding of comq_head.
508  *
509  * If lk is non-NULL it will be passed to the callback which typically
510  * releases it temporarily when calling biodone() or doing other complex
511  * work on the result.
512  *
513  * Caller must usually hold comq->lk.
514  */
515 void
516 nvme_poll_completions(nvme_comqueue_t *comq, struct lock *lk)
517 {
518 	nvme_softc_t *sc = comq->sc;
519 	nvme_request_t *req;
520 	nvme_subqueue_t *subq;
521 	nvme_allres_t *res;
522 #if 0
523 	int didwork = 0;
524 #endif
525 
526 	KKASSERT(comq->comq_tail < comq->nqe);
527 	cpu_lfence();		/* needed prior to first phase test */
528 	for (;;) {
529 		/*
530 		 * WARNING! LOCK MAY HAVE BEEN TEMPORARILY LOST DURING LOOP.
531 		 */
532 		res = &comq->kcomq[comq->comq_tail];
533 		if ((res->tail.status ^ comq->phase) & NVME_COMQ_STATUS_PHASE)
534 			break;
535 
536 		/*
537 		 * Process result on completion queue.
538 		 *
539 		 * Bump comq_tail, flip the phase detect when we roll-over.
540 		 * doorbell every 1/4 queue and at the end of the loop.
541 		 */
542 		if (++comq->comq_tail == comq->nqe) {
543 			comq->comq_tail = 0;
544 			comq->phase ^= NVME_COMQ_STATUS_PHASE;
545 		}
546 
547 		/*
548 		 * WARNING! I imploded the chip by reusing a command id
549 		 *	    before it was discarded in the completion queue
550 		 *	    via the doorbell, so for now we always write
551 		 *	    the doorbell before marking the request as
552 		 *	    COMPLETED (it can be reused instantly upon
553 		 *	    being marked).
554 		 */
555 #if 0
556 		if (++didwork == (comq->nqe >> 2)) {
557 			didwork = 0;
558 			nvme_write(comq->sc, comq->comq_doorbell_reg,
559 				   comq->comq_tail);
560 		}
561 #endif
562 		cpu_lfence();	/* needed prior to content check */
563 
564 		/*
565 		 * Locate the request.  The request could be on a different
566 		 * queue.  Copy the fields and wakeup anyone waiting on req.
567 		 * The response field in the completion queue can be reused
568 		 * once we doorbell which is why we make a copy.
569 		 */
570 		subq = &sc->subqueues[res->tail.subq_id];
571 		req = &subq->reqary[res->tail.cmd_id];
572 		KKASSERT(req->state == NVME_REQ_SUBMITTED &&
573 			 req->comq == comq);
574 		req->res = *res;
575 		nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
576 		cpu_sfence();
577 		req->state = NVME_REQ_COMPLETED;
578 		if (req->callback) {
579 			req->callback(req, lk);
580 		} else if (req->waiting) {
581 			wakeup(req);
582 		}
583 	}
584 #if 0
585 	if (didwork)
586 		nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
587 #endif
588 }
589 
590 void
591 nvme_intr(void *arg)
592 {
593 	nvme_comqueue_t *comq = arg;
594 	nvme_softc_t *sc;
595 	int i;
596 	int skip;
597 
598 	sc = comq->sc;
599 	if (sc->nirqs == 1)
600 		skip = 1;
601 	else
602 		skip = sc->nirqs - 1;
603 
604 	for (i = comq->qid; i <= sc->niocomqs; i += skip) {
605 		if (comq->nqe) {
606 			lockmgr(&comq->lk, LK_EXCLUSIVE);
607 			nvme_poll_completions(comq, &comq->lk);
608 			lockmgr(&comq->lk, LK_RELEASE);
609 		}
610 		comq += skip;
611 	}
612 }
613 
614 /*
615  * ADMIN HELPER COMMAND ROLLUP FUNCTIONS
616  */
617 /*
618  * Issue command to create a submission queue.
619  */
620 int
621 nvme_create_subqueue(nvme_softc_t *sc, uint16_t qid)
622 {
623 	nvme_request_t *req;
624 	nvme_subqueue_t *subq = &sc->subqueues[qid];
625 	int status;
626 
627 	req = nvme_get_admin_request(sc, NVME_OP_CREATE_SUBQ);
628 	req->cmd.head.prp1 = subq->psubq;
629 	req->cmd.crsub.subq_id = qid;
630 	req->cmd.crsub.subq_size = subq->nqe - 1;	/* 0's based value */
631 	req->cmd.crsub.flags = NVME_CREATESUB_PC | NVME_CREATESUB_PRI_URG;
632 	req->cmd.crsub.comq_id = subq->comqid;
633 
634 	nvme_submit_request(req);
635 	status = nvme_wait_request(req, hz);
636 	nvme_put_request(req);
637 
638 	return status;
639 }
640 
641 /*
642  * Issue command to create a completion queue.
643  */
644 int
645 nvme_create_comqueue(nvme_softc_t *sc, uint16_t qid)
646 {
647 	nvme_request_t *req;
648 	nvme_comqueue_t *comq = &sc->comqueues[qid];
649 	int status;
650 	int error;
651 	uint16_t ivect;
652 
653 	error = 0;
654 	if (sc->nirqs > 1) {
655 		ivect = 1 + (qid - 1) % (sc->nirqs - 1);
656 		if (qid && ivect == qid) {
657 			error = bus_setup_intr(sc->dev, sc->irq[ivect],
658 						INTR_MPSAFE,
659 						nvme_intr,
660 						&sc->comqueues[ivect],
661 						&sc->irq_handle[ivect],
662 						NULL);
663 		}
664 	} else {
665 		ivect = 0;
666 	}
667 	if (error)
668 		return error;
669 
670 	req = nvme_get_admin_request(sc, NVME_OP_CREATE_COMQ);
671 	req->cmd.head.prp1 = comq->pcomq;
672 	req->cmd.crcom.comq_id = qid;
673 	req->cmd.crcom.comq_size = comq->nqe - 1;	/* 0's based value */
674 	req->cmd.crcom.ivect = ivect;
675 	req->cmd.crcom.flags = NVME_CREATECOM_PC | NVME_CREATECOM_IEN;
676 
677 	nvme_submit_request(req);
678 	status = nvme_wait_request(req, hz);
679 	nvme_put_request(req);
680 
681 	return status;
682 }
683 
684 /*
685  * Issue command to delete a submission queue.
686  */
687 int
688 nvme_delete_subqueue(nvme_softc_t *sc, uint16_t qid)
689 {
690 	nvme_request_t *req;
691 	/*nvme_subqueue_t *subq = &sc->subqueues[qid];*/
692 	int status;
693 
694 	req = nvme_get_admin_request(sc, NVME_OP_DELETE_SUBQ);
695 	req->cmd.head.prp1 = 0;
696 	req->cmd.delete.qid = qid;
697 
698 	nvme_submit_request(req);
699 	status = nvme_wait_request(req, hz);
700 	nvme_put_request(req);
701 
702 	return status;
703 }
704 
705 /*
706  * Issue command to delete a completion queue.
707  */
708 int
709 nvme_delete_comqueue(nvme_softc_t *sc, uint16_t qid)
710 {
711 	nvme_request_t *req;
712 	/*nvme_comqueue_t *comq = &sc->comqueues[qid];*/
713 	int status;
714 	uint16_t ivect;
715 
716 	req = nvme_get_admin_request(sc, NVME_OP_DELETE_COMQ);
717 	req->cmd.head.prp1 = 0;
718 	req->cmd.delete.qid = qid;
719 
720 	nvme_submit_request(req);
721 	status = nvme_wait_request(req, hz);
722 	nvme_put_request(req);
723 
724 	if (qid && sc->nirqs > 1) {
725 		ivect = 1 + (qid - 1) % (sc->nirqs - 1);
726 		if (ivect == qid) {
727 			bus_teardown_intr(sc->dev,
728 					  sc->irq[ivect],
729 					  sc->irq_handle[ivect]);
730 		}
731 	}
732 
733 	return status;
734 }
735 
736 /*
737  * Issue friendly shutdown to controller.
738  */
739 int
740 nvme_issue_shutdown(nvme_softc_t *sc)
741 {
742 	uint32_t reg;
743 	int base_ticks;
744 	int error;
745 
746 	/*
747 	 * Put us in shutdown
748 	 */
749 	reg = nvme_read(sc, NVME_REG_CONFIG);
750 	reg &= ~NVME_CONFIG_SHUT_MASK;
751 	reg |= NVME_CONFIG_SHUT_NORM;
752 	nvme_write(sc, NVME_REG_CONFIG, reg);
753 
754 	/*
755 	 * Wait up to 10 seconds for acknowlegement
756 	 */
757 	error = ENXIO;
758 	base_ticks = ticks;
759 	while ((int)(ticks - base_ticks) < 10 * 20) {
760 		reg = nvme_read(sc, NVME_REG_STATUS);
761 		if ((reg & NVME_STATUS_SHUT_MASK) & NVME_STATUS_SHUT_DONE) {
762 			error = 0;
763 			break;
764 		}
765 		nvme_os_sleep(50);	/* 50ms poll */
766 	}
767 	if (error)
768 		device_printf(sc->dev, "Unable to shutdown chip nicely\n");
769 	else
770 		device_printf(sc->dev, "Normal chip shutdown succeeded\n");
771 
772 	return error;
773 }
774 
775 /*
776  * Make space-padded string serial and model numbers more readable.
777  */
778 size_t
779 string_cleanup(char *str, int domiddle)
780 {
781 	size_t i;
782 	size_t j;
783 	int atbeg = 1;
784 
785 	for (i = j = 0; str[i]; ++i) {
786 		if ((str[i] == ' ' || str[i] == '\r') &&
787 		    (atbeg || domiddle)) {
788 			continue;
789 		} else {
790 			atbeg = 0;
791 		}
792 		str[j] = str[i];
793 		++j;
794 	}
795 	while (domiddle == 0 && j > 0 && (str[j-1] == ' ' || str[j-1] == '\r'))
796 		--j;
797 	str[j] = 0;
798 	if (domiddle == 0) {
799 		for (j = 0; str[j]; ++j) {
800 			if (str[j] == ' ')
801 				str[j] = '_';
802 		}
803 	}
804 
805 	return j;
806 }
807