xref: /openbsd/sys/dev/ic/nvme.c (revision 09467b48)
1 /*	$OpenBSD: nvme.c,v 1.83 2020/07/20 14:41:13 krw Exp $ */
2 
3 /*
4  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/kernel.h>
23 #include <sys/malloc.h>
24 #include <sys/device.h>
25 #include <sys/queue.h>
26 #include <sys/mutex.h>
27 #include <sys/pool.h>
28 
29 #include <sys/atomic.h>
30 
31 #include <machine/bus.h>
32 
33 #include <scsi/scsi_all.h>
34 #include <scsi/scsi_disk.h>
35 #include <scsi/scsiconf.h>
36 
37 #include <dev/ic/nvmereg.h>
38 #include <dev/ic/nvmevar.h>
39 
40 struct cfdriver nvme_cd = {
41 	NULL,
42 	"nvme",
43 	DV_DULL
44 };
45 
46 int	nvme_ready(struct nvme_softc *, u_int32_t);
47 int	nvme_enable(struct nvme_softc *);
48 int	nvme_disable(struct nvme_softc *);
49 int	nvme_shutdown(struct nvme_softc *);
50 int	nvme_resume(struct nvme_softc *);
51 
52 void	nvme_dumpregs(struct nvme_softc *);
53 int	nvme_identify(struct nvme_softc *, u_int);
54 void	nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
55 
56 int	nvme_ccbs_alloc(struct nvme_softc *, u_int);
57 void	nvme_ccbs_free(struct nvme_softc *, u_int);
58 
59 void *	nvme_ccb_get(void *);
60 void	nvme_ccb_put(void *, void *);
61 
62 int	nvme_poll(struct nvme_softc *, struct nvme_queue *, struct nvme_ccb *,
63 	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
64 void	nvme_poll_fill(struct nvme_softc *, struct nvme_ccb *, void *);
65 void	nvme_poll_done(struct nvme_softc *, struct nvme_ccb *,
66 	    struct nvme_cqe *);
67 void	nvme_sqe_fill(struct nvme_softc *, struct nvme_ccb *, void *);
68 void	nvme_empty_done(struct nvme_softc *, struct nvme_ccb *,
69 	    struct nvme_cqe *);
70 
71 struct nvme_queue *
72 	nvme_q_alloc(struct nvme_softc *, u_int16_t, u_int, u_int);
73 int	nvme_q_create(struct nvme_softc *, struct nvme_queue *);
74 int	nvme_q_reset(struct nvme_softc *, struct nvme_queue *);
75 int	nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
76 void	nvme_q_submit(struct nvme_softc *,
77 	    struct nvme_queue *, struct nvme_ccb *,
78 	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
79 int	nvme_q_complete(struct nvme_softc *, struct nvme_queue *);
80 void	nvme_q_free(struct nvme_softc *, struct nvme_queue *);
81 
82 struct nvme_dmamem *
83 	nvme_dmamem_alloc(struct nvme_softc *, size_t);
84 void	nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *);
85 void	nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, int);
86 
87 void	nvme_scsi_cmd(struct scsi_xfer *);
88 void	nvme_minphys(struct buf *, struct scsi_link *);
89 int	nvme_scsi_probe(struct scsi_link *);
90 void	nvme_scsi_free(struct scsi_link *);
91 
92 #ifdef HIBERNATE
93 #include <uvm/uvm_extern.h>
94 #include <sys/hibernate.h>
95 #include <sys/disk.h>
96 #include <sys/disklabel.h>
97 
98 int	nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *);
99 #endif
100 
101 struct scsi_adapter nvme_switch = {
102 	nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free, NULL
103 };
104 
105 void	nvme_scsi_io(struct scsi_xfer *, int);
106 void	nvme_scsi_io_fill(struct nvme_softc *, struct nvme_ccb *, void *);
107 void	nvme_scsi_io_done(struct nvme_softc *, struct nvme_ccb *,
108 	    struct nvme_cqe *);
109 
110 void	nvme_scsi_sync(struct scsi_xfer *);
111 void	nvme_scsi_sync_fill(struct nvme_softc *, struct nvme_ccb *, void *);
112 void	nvme_scsi_sync_done(struct nvme_softc *, struct nvme_ccb *,
113 	    struct nvme_cqe *);
114 
115 void	nvme_scsi_inq(struct scsi_xfer *);
116 void	nvme_scsi_inquiry(struct scsi_xfer *);
117 void	nvme_scsi_capacity16(struct scsi_xfer *);
118 void	nvme_scsi_capacity(struct scsi_xfer *);
119 
120 #define nvme_read4(_s, _r) \
121 	bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r))
122 #define nvme_write4(_s, _r, _v) \
123 	bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v))
124 /*
125  * Some controllers, at least Apple NVMe, always require split
126  * transfers, so don't use bus_space_{read,write}_8() on LP64.
127  */
128 static inline u_int64_t
129 nvme_read8(struct nvme_softc *sc, bus_size_t r)
130 {
131 	u_int64_t v;
132 	u_int32_t *a = (u_int32_t *)&v;
133 
134 #if _BYTE_ORDER == _LITTLE_ENDIAN
135 	a[0] = nvme_read4(sc, r);
136 	a[1] = nvme_read4(sc, r + 4);
137 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
138 	a[1] = nvme_read4(sc, r);
139 	a[0] = nvme_read4(sc, r + 4);
140 #endif
141 
142 	return (v);
143 }
144 
145 static inline void
146 nvme_write8(struct nvme_softc *sc, bus_size_t r, u_int64_t v)
147 {
148 	u_int32_t *a = (u_int32_t *)&v;
149 
150 #if _BYTE_ORDER == _LITTLE_ENDIAN
151 	nvme_write4(sc, r, a[0]);
152 	nvme_write4(sc, r + 4, a[1]);
153 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
154 	nvme_write4(sc, r, a[1]);
155 	nvme_write4(sc, r + 4, a[0]);
156 #endif
157 }
158 #define nvme_barrier(_s, _r, _l, _f) \
159 	bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f))
160 
161 void
162 nvme_dumpregs(struct nvme_softc *sc)
163 {
164 	u_int64_t r8;
165 	u_int32_t r4;
166 
167 	r8 = nvme_read8(sc, NVME_CAP);
168 	printf("%s: cap  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
169 	printf("%s:  mpsmax %u (%u)\n", DEVNAME(sc),
170 	    (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
171 	printf("%s:  mpsmin %u (%u)\n", DEVNAME(sc),
172 	    (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
173 	printf("%s:  css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8));
174 	printf("%s:  nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
175 	printf("%s:  dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
176 	printf("%s:  to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
177 	printf("%s:  ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8));
178 	printf("%s:  cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8));
179 	printf("%s:  mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8));
180 
181 	printf("%s: vs   0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
182 
183 	r4 = nvme_read4(sc, NVME_CC);
184 	printf("%s: cc   0x%04x\n", DEVNAME(sc), r4);
185 	printf("%s:  iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4));
186 	printf("%s:  iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4));
187 	printf("%s:  shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
188 	printf("%s:  ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
189 	printf("%s:  mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4));
190 	printf("%s:  css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
191 	printf("%s:  en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN));
192 
193 	printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS));
194 	printf("%s: aqa  0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA));
195 	printf("%s: asq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
196 	printf("%s: acq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
197 }
198 
199 int
200 nvme_ready(struct nvme_softc *sc, u_int32_t rdy)
201 {
202 	u_int i = 0;
203 
204 	while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
205 		if (i++ > sc->sc_rdy_to)
206 			return (1);
207 
208 		delay(1000);
209 		nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
210 	}
211 
212 	return (0);
213 }
214 
215 int
216 nvme_enable(struct nvme_softc *sc)
217 {
218 	u_int32_t cc;
219 
220 	cc = nvme_read4(sc, NVME_CC);
221 	if (ISSET(cc, NVME_CC_EN))
222 		return (nvme_ready(sc, NVME_CSTS_RDY));
223 
224 	nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
225 	    NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
226 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
227 
228 	nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
229 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
230 	nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
231 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
232 
233 	CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
234 	    NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
235 	SET(cc, NVME_CC_IOSQES(6));	/* Submission queue size == 2**6 (64) */
236 	SET(cc, NVME_CC_IOCQES(4));	/* Completion queue size == 2**4 (16) */
237 	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
238 	SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
239 	SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
240 	SET(cc, NVME_CC_MPS(ffs(sc->sc_mps) - 1));
241 	SET(cc, NVME_CC_EN);
242 
243 	nvme_write4(sc, NVME_CC, cc);
244 	nvme_barrier(sc, 0, sc->sc_ios,
245 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
246 
247 	return (nvme_ready(sc, NVME_CSTS_RDY));
248 }
249 
250 int
251 nvme_disable(struct nvme_softc *sc)
252 {
253 	u_int32_t cc, csts;
254 
255 	cc = nvme_read4(sc, NVME_CC);
256 	if (ISSET(cc, NVME_CC_EN)) {
257 		csts = nvme_read4(sc, NVME_CSTS);
258 		if (!ISSET(csts, NVME_CSTS_CFS) &&
259 		    nvme_ready(sc, NVME_CSTS_RDY) != 0)
260 			return (1);
261 	}
262 
263 	CLR(cc, NVME_CC_EN);
264 
265 	nvme_write4(sc, NVME_CC, cc);
266 	nvme_barrier(sc, 0, sc->sc_ios,
267 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
268 
269 	return (nvme_ready(sc, 0));
270 }
271 
272 int
273 nvme_attach(struct nvme_softc *sc)
274 {
275 	struct scsibus_attach_args saa;
276 	u_int64_t cap;
277 	u_int32_t reg;
278 	u_int nccbs = 0;
279 
280 	mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
281 	SIMPLEQ_INIT(&sc->sc_ccb_list);
282 	scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put);
283 
284 	reg = nvme_read4(sc, NVME_VS);
285 	if (reg == 0xffffffff) {
286 		printf(", invalid mapping\n");
287 		return (1);
288 	}
289 
290 	printf(", NVMe %d.%d\n", NVME_VS_MJR(reg), NVME_VS_MNR(reg));
291 
292 	cap = nvme_read8(sc, NVME_CAP);
293 	sc->sc_dstrd = NVME_CAP_DSTRD(cap);
294 	if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
295 		printf("%s: NVMe minimum page size %u "
296 		    "is greater than CPU page size %u\n", DEVNAME(sc),
297 		    1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
298 		return (1);
299 	}
300 	if (NVME_CAP_MPSMAX(cap) < PAGE_SHIFT)
301 		sc->sc_mps = 1 << NVME_CAP_MPSMAX(cap);
302 	else
303 		sc->sc_mps = 1 << PAGE_SHIFT;
304 
305 	sc->sc_rdy_to = NVME_CAP_TO(cap);
306 	sc->sc_mdts = MAXPHYS;
307 	sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
308 
309 	if (nvme_disable(sc) != 0) {
310 		printf("%s: unable to disable controller\n", DEVNAME(sc));
311 		return (1);
312 	}
313 
314 	sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, 128, sc->sc_dstrd);
315 	if (sc->sc_admin_q == NULL) {
316 		printf("%s: unable to allocate admin queue\n", DEVNAME(sc));
317 		return (1);
318 	}
319 
320 	if (nvme_ccbs_alloc(sc, 16) != 0) {
321 		printf("%s: unable to allocate initial ccbs\n", DEVNAME(sc));
322 		goto free_admin_q;
323 	}
324 	nccbs = 16;
325 
326 	if (nvme_enable(sc) != 0) {
327 		printf("%s: unable to enable controller\n", DEVNAME(sc));
328 		goto free_ccbs;
329 	}
330 
331 	if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
332 		printf("%s: unable to identify controller\n", DEVNAME(sc));
333 		goto disable;
334 	}
335 
336 	/* We now know the real values of sc_mdts and sc_max_prpl. */
337 	nvme_ccbs_free(sc, nccbs);
338 	if (nvme_ccbs_alloc(sc, 64) != 0) {
339 		printf("%s: unable to allocate ccbs\n", DEVNAME(sc));
340 		goto free_admin_q;
341 	}
342 	nccbs = 64;
343 
344 	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
345 	if (sc->sc_q == NULL) {
346 		printf("%s: unable to allocate io q\n", DEVNAME(sc));
347 		goto disable;
348 	}
349 
350 	if (nvme_q_create(sc, sc->sc_q) != 0) {
351 		printf("%s: unable to create io q\n", DEVNAME(sc));
352 		goto free_q;
353 	}
354 
355 	sc->sc_hib_q = nvme_q_alloc(sc, NVME_HIB_Q, 4, sc->sc_dstrd);
356 	if (sc->sc_hib_q == NULL) {
357 		printf("%s: unable to allocate hibernate io queue\n", DEVNAME(sc));
358 		goto free_q;
359 	}
360 
361 	nvme_write4(sc, NVME_INTMC, 1);
362 
363 	sc->sc_namespaces = mallocarray(sc->sc_nn + 1,
364 	    sizeof(*sc->sc_namespaces), M_DEVBUF, M_WAITOK|M_ZERO);
365 
366 	saa.saa_adapter = &nvme_switch;
367 	saa.saa_adapter_softc = sc;
368 	saa.saa_adapter_buswidth = sc->sc_nn + 1;
369 	saa.saa_luns = 1;
370 	saa.saa_adapter_target = 0;
371 	saa.saa_openings = 64;
372 	saa.saa_pool = &sc->sc_iopool;
373 	saa.saa_quirks = saa.saa_flags = 0;
374 	saa.saa_wwpn = saa.saa_wwnn = 0;
375 
376 	config_found(&sc->sc_dev, &saa, scsiprint);
377 
378 	return (0);
379 
380 free_q:
381 	nvme_q_free(sc, sc->sc_q);
382 disable:
383 	nvme_disable(sc);
384 free_ccbs:
385 	nvme_ccbs_free(sc, nccbs);
386 free_admin_q:
387 	nvme_q_free(sc, sc->sc_admin_q);
388 
389 	return (1);
390 }
391 
392 int
393 nvme_resume(struct nvme_softc *sc)
394 {
395 	if (nvme_disable(sc) != 0) {
396 		printf("%s: unable to disable controller\n", DEVNAME(sc));
397 		return (1);
398 	}
399 
400 	if (nvme_q_reset(sc, sc->sc_admin_q) != 0) {
401 		printf("%s: unable to reset admin queue\n", DEVNAME(sc));
402 		return (1);
403 	}
404 
405 	if (nvme_enable(sc) != 0) {
406 		printf("%s: unable to enable controller\n", DEVNAME(sc));
407 		return (1);
408 	}
409 
410 	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
411 	if (sc->sc_q == NULL) {
412 		printf("%s: unable to allocate io q\n", DEVNAME(sc));
413 		goto disable;
414 	}
415 
416 	if (nvme_q_create(sc, sc->sc_q) != 0) {
417 		printf("%s: unable to create io q\n", DEVNAME(sc));
418 		goto free_q;
419 	}
420 
421 	nvme_write4(sc, NVME_INTMC, 1);
422 
423 	return (0);
424 
425 free_q:
426 	nvme_q_free(sc, sc->sc_q);
427 disable:
428 	nvme_disable(sc);
429 
430 	return (1);
431 }
432 
433 int
434 nvme_scsi_probe(struct scsi_link *link)
435 {
436 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
437 	struct nvme_sqe sqe;
438 	struct nvm_identify_namespace *identify;
439 	struct nvme_dmamem *mem;
440 	struct nvme_ccb *ccb;
441 	int rv;
442 
443 	ccb = scsi_io_get(&sc->sc_iopool, 0);
444 	KASSERT(ccb != NULL);
445 
446 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
447 	if (mem == NULL)
448 		return (ENOMEM);
449 
450 	memset(&sqe, 0, sizeof(sqe));
451 	sqe.opcode = NVM_ADMIN_IDENTIFY;
452 	htolem32(&sqe.nsid, link->target);
453 	htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
454 	htolem32(&sqe.cdw10, 0);
455 
456 	ccb->ccb_done = nvme_empty_done;
457 	ccb->ccb_cookie = &sqe;
458 
459 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
460 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
461 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
462 
463 	scsi_io_put(&sc->sc_iopool, ccb);
464 
465 	identify = NVME_DMA_KVA(mem);
466 	if (rv == 0 && lemtoh64(&identify->nsze) > 0) {
467 		/* Commit namespace if it has a size greater than zero. */
468 		identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK);
469 		memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
470 		sc->sc_namespaces[link->target].ident = identify;
471 	}
472 
473 	nvme_dmamem_free(sc, mem);
474 
475 	return (rv);
476 }
477 
478 int
479 nvme_shutdown(struct nvme_softc *sc)
480 {
481 	u_int32_t cc, csts;
482 	int i;
483 
484 	nvme_write4(sc, NVME_INTMC, 0);
485 
486 	if (nvme_q_delete(sc, sc->sc_q) != 0) {
487 		printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
488 		goto disable;
489 	}
490 
491 	cc = nvme_read4(sc, NVME_CC);
492 	CLR(cc, NVME_CC_SHN_MASK);
493 	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
494 	nvme_write4(sc, NVME_CC, cc);
495 
496 	for (i = 0; i < 4000; i++) {
497 		nvme_barrier(sc, 0, sc->sc_ios,
498 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
499 		csts = nvme_read4(sc, NVME_CSTS);
500 		if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
501 			return (0);
502 
503 		delay(1000);
504 	}
505 
506 	printf("%s: unable to shutdown, disabling\n", DEVNAME(sc));
507 
508 disable:
509 	nvme_disable(sc);
510 	return (0);
511 }
512 
513 int
514 nvme_activate(struct nvme_softc *sc, int act)
515 {
516 	int rv;
517 
518 	switch (act) {
519 	case DVACT_POWERDOWN:
520 		rv = config_activate_children(&sc->sc_dev, act);
521 		nvme_shutdown(sc);
522 		break;
523 	case DVACT_RESUME:
524 		rv = nvme_resume(sc);
525 		if (rv == 0)
526 			rv = config_activate_children(&sc->sc_dev, act);
527 		break;
528 	default:
529 		rv = config_activate_children(&sc->sc_dev, act);
530 		break;
531 	}
532 
533 	return (rv);
534 }
535 
536 void
537 nvme_scsi_cmd(struct scsi_xfer *xs)
538 {
539 	switch (xs->cmd->opcode) {
540 	case READ_COMMAND:
541 	case READ_BIG:
542 	case READ_12:
543 	case READ_16:
544 		nvme_scsi_io(xs, SCSI_DATA_IN);
545 		return;
546 	case WRITE_COMMAND:
547 	case WRITE_BIG:
548 	case WRITE_12:
549 	case WRITE_16:
550 		nvme_scsi_io(xs, SCSI_DATA_OUT);
551 		return;
552 
553 	case SYNCHRONIZE_CACHE:
554 		nvme_scsi_sync(xs);
555 		return;
556 
557 	case INQUIRY:
558 		nvme_scsi_inq(xs);
559 		return;
560 	case READ_CAPACITY_16:
561 		nvme_scsi_capacity16(xs);
562 		return;
563 	case READ_CAPACITY:
564 		nvme_scsi_capacity(xs);
565 		return;
566 
567 	case TEST_UNIT_READY:
568 	case PREVENT_ALLOW:
569 	case START_STOP:
570 		xs->error = XS_NOERROR;
571 		scsi_done(xs);
572 		return;
573 
574 	default:
575 		break;
576 	}
577 
578 	xs->error = XS_DRIVER_STUFFUP;
579 	scsi_done(xs);
580 }
581 
582 void
583 nvme_minphys(struct buf *bp, struct scsi_link *link)
584 {
585 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
586 
587 	if (bp->b_bcount > sc->sc_mdts)
588 		bp->b_bcount = sc->sc_mdts;
589 }
590 
591 void
592 nvme_scsi_io(struct scsi_xfer *xs, int dir)
593 {
594 	struct scsi_link *link = xs->sc_link;
595 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
596 	struct nvme_ccb *ccb = xs->io;
597 	bus_dmamap_t dmap = ccb->ccb_dmamap;
598 	int i;
599 
600 	if ((xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)) != dir)
601 		goto stuffup;
602 
603 	ccb->ccb_done = nvme_scsi_io_done;
604 	ccb->ccb_cookie = xs;
605 
606 	if (bus_dmamap_load(sc->sc_dmat, dmap,
607 	    xs->data, xs->datalen, NULL, ISSET(xs->flags, SCSI_NOSLEEP) ?
608 	    BUS_DMA_NOWAIT : BUS_DMA_WAITOK) != 0)
609 		goto stuffup;
610 
611 	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
612 	    ISSET(xs->flags, SCSI_DATA_IN) ?
613 	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
614 
615 	if (dmap->dm_nsegs > 2) {
616 		for (i = 1; i < dmap->dm_nsegs; i++) {
617 			htolem64(&ccb->ccb_prpl[i - 1],
618 			    dmap->dm_segs[i].ds_addr);
619 		}
620 		bus_dmamap_sync(sc->sc_dmat,
621 		    NVME_DMA_MAP(sc->sc_ccb_prpls),
622 		    ccb->ccb_prpl_off,
623 		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
624 		    BUS_DMASYNC_PREWRITE);
625 	}
626 
627 	if (ISSET(xs->flags, SCSI_POLL)) {
628 		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
629 		return;
630 	}
631 
632 	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
633 	return;
634 
635 stuffup:
636 	xs->error = XS_DRIVER_STUFFUP;
637 	scsi_done(xs);
638 }
639 
640 void
641 nvme_scsi_io_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
642 {
643 	struct nvme_sqe_io *sqe = slot;
644 	struct scsi_xfer *xs = ccb->ccb_cookie;
645 	struct scsi_link *link = xs->sc_link;
646 	bus_dmamap_t dmap = ccb->ccb_dmamap;
647 	u_int64_t lba;
648 	u_int32_t blocks;
649 
650 	scsi_cmd_rw_decode(xs->cmd, &lba, &blocks);
651 
652 	sqe->opcode = ISSET(xs->flags, SCSI_DATA_IN) ?
653 	    NVM_CMD_READ : NVM_CMD_WRITE;
654 	htolem32(&sqe->nsid, link->target);
655 
656 	htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
657 	switch (dmap->dm_nsegs) {
658 	case 1:
659 		break;
660 	case 2:
661 		htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
662 		break;
663 	default:
664 		/* the prp list is already set up and synced */
665 		htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
666 		break;
667 	}
668 
669 	htolem64(&sqe->slba, lba);
670 	htolem16(&sqe->nlb, blocks - 1);
671 }
672 
673 void
674 nvme_scsi_io_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
675     struct nvme_cqe *cqe)
676 {
677 	struct scsi_xfer *xs = ccb->ccb_cookie;
678 	bus_dmamap_t dmap = ccb->ccb_dmamap;
679 	u_int16_t flags;
680 
681 	if (dmap->dm_nsegs > 2) {
682 		bus_dmamap_sync(sc->sc_dmat,
683 		    NVME_DMA_MAP(sc->sc_ccb_prpls),
684 		    ccb->ccb_prpl_off,
685 		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
686 		    BUS_DMASYNC_POSTWRITE);
687 	}
688 
689 	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
690 	    ISSET(xs->flags, SCSI_DATA_IN) ?
691 	    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
692 
693 	bus_dmamap_unload(sc->sc_dmat, dmap);
694 
695 	flags = lemtoh16(&cqe->flags);
696 
697 	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
698 	    XS_NOERROR : XS_DRIVER_STUFFUP;
699 	xs->status = SCSI_OK;
700 	xs->resid = 0;
701 	scsi_done(xs);
702 }
703 
704 void
705 nvme_scsi_sync(struct scsi_xfer *xs)
706 {
707 	struct scsi_link *link = xs->sc_link;
708 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
709 	struct nvme_ccb *ccb = xs->io;
710 
711 	ccb->ccb_done = nvme_scsi_sync_done;
712 	ccb->ccb_cookie = xs;
713 
714 	if (ISSET(xs->flags, SCSI_POLL)) {
715 		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
716 		return;
717 	}
718 
719 	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
720 }
721 
722 void
723 nvme_scsi_sync_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
724 {
725 	struct nvme_sqe *sqe = slot;
726 	struct scsi_xfer *xs = ccb->ccb_cookie;
727 	struct scsi_link *link = xs->sc_link;
728 
729 	sqe->opcode = NVM_CMD_FLUSH;
730 	htolem32(&sqe->nsid, link->target);
731 }
732 
733 void
734 nvme_scsi_sync_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
735     struct nvme_cqe *cqe)
736 {
737 	struct scsi_xfer *xs = ccb->ccb_cookie;
738 	u_int16_t flags;
739 
740 	flags = lemtoh16(&cqe->flags);
741 
742 	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
743 	    XS_NOERROR : XS_DRIVER_STUFFUP;
744 	xs->status = SCSI_OK;
745 	xs->resid = 0;
746 	scsi_done(xs);
747 }
748 
749 void
750 nvme_scsi_inq(struct scsi_xfer *xs)
751 {
752 	struct scsi_inquiry *inq = (struct scsi_inquiry *)xs->cmd;
753 
754 	if (!ISSET(inq->flags, SI_EVPD)) {
755 		nvme_scsi_inquiry(xs);
756 		return;
757 	}
758 
759 	switch (inq->pagecode) {
760 	default:
761 		/* printf("%s: %d\n", __func__, inq->pagecode); */
762 		break;
763 	}
764 
765 	xs->error = XS_DRIVER_STUFFUP;
766 	scsi_done(xs);
767 }
768 
769 void
770 nvme_scsi_inquiry(struct scsi_xfer *xs)
771 {
772 	struct scsi_inquiry_data inq;
773 	struct scsi_link *link = xs->sc_link;
774 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
775 	struct nvm_identify_namespace *ns;
776 
777 	ns = sc->sc_namespaces[link->target].ident;
778 
779 	memset(&inq, 0, sizeof(inq));
780 
781 	inq.device = T_DIRECT;
782 	inq.version = 0x06; /* SPC-4 */
783 	inq.response_format = 2;
784 	inq.additional_length = 32;
785 	inq.flags |= SID_CmdQue;
786 	memcpy(inq.vendor, "NVMe    ", sizeof(inq.vendor));
787 	memcpy(inq.product, sc->sc_identify.mn, sizeof(inq.product));
788 	memcpy(inq.revision, sc->sc_identify.fr, sizeof(inq.revision));
789 
790 	memcpy(xs->data, &inq, MIN(sizeof(inq), xs->datalen));
791 
792 	xs->error = XS_NOERROR;
793 	scsi_done(xs);
794 }
795 
796 void
797 nvme_scsi_capacity16(struct scsi_xfer *xs)
798 {
799 	struct scsi_read_cap_data_16 rcd;
800 	struct scsi_link *link = xs->sc_link;
801 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
802 	struct nvm_identify_namespace *ns;
803 	struct nvm_namespace_format *f;
804 	u_int64_t nsze;
805 	u_int16_t tpe = READ_CAP_16_TPE;
806 
807 	ns = sc->sc_namespaces[link->target].ident;
808 
809 	if (xs->cmdlen != sizeof(struct scsi_read_capacity_16)) {
810 		xs->error = XS_DRIVER_STUFFUP;
811 		scsi_done(xs);
812 		return;
813 	}
814 
815 	/* sd_read_cap_16() will add one */
816 	nsze = lemtoh64(&ns->nsze) - 1;
817 	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
818 
819 	memset(&rcd, 0, sizeof(rcd));
820 	_lto8b(nsze, rcd.addr);
821 	_lto4b(1 << f->lbads, rcd.length);
822 	_lto2b(tpe, rcd.lowest_aligned);
823 
824 	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
825 
826 	xs->error = XS_NOERROR;
827 	scsi_done(xs);
828 }
829 
830 void
831 nvme_scsi_capacity(struct scsi_xfer *xs)
832 {
833 	struct scsi_read_cap_data rcd;
834 	struct scsi_link *link = xs->sc_link;
835 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
836 	struct nvm_identify_namespace *ns;
837 	struct nvm_namespace_format *f;
838 	u_int64_t nsze;
839 
840 	ns = sc->sc_namespaces[link->target].ident;
841 
842 	if (xs->cmdlen != sizeof(struct scsi_read_capacity)) {
843 		xs->error = XS_DRIVER_STUFFUP;
844 		scsi_done(xs);
845 		return;
846 	}
847 
848 	/* sd_read_cap_10() will add one */
849 	nsze = lemtoh64(&ns->nsze) - 1;
850 	if (nsze > 0xffffffff)
851 		nsze = 0xffffffff;
852 
853 	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
854 
855 	memset(&rcd, 0, sizeof(rcd));
856 	_lto4b(nsze, rcd.addr);
857 	_lto4b(1 << f->lbads, rcd.length);
858 
859 	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
860 
861 	xs->error = XS_NOERROR;
862 	scsi_done(xs);
863 }
864 
865 void
866 nvme_scsi_free(struct scsi_link *link)
867 {
868 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
869 	struct nvm_identify_namespace *identify;
870 
871 	identify = sc->sc_namespaces[link->target].ident;
872 	sc->sc_namespaces[link->target].ident = NULL;
873 
874 	free(identify, M_DEVBUF, sizeof(*identify));
875 }
876 
877 void
878 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
879     void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
880 {
881 	struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
882 	u_int32_t tail;
883 
884 	mtx_enter(&q->q_sq_mtx);
885 	tail = q->q_sq_tail;
886 	if (++q->q_sq_tail >= q->q_entries)
887 		q->q_sq_tail = 0;
888 
889 	sqe += tail;
890 
891 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
892 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
893 	memset(sqe, 0, sizeof(*sqe));
894 	(*fill)(sc, ccb, sqe);
895 	sqe->cid = ccb->ccb_id;
896 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
897 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
898 
899 	nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
900 	mtx_leave(&q->q_sq_mtx);
901 }
902 
903 struct nvme_poll_state {
904 	struct nvme_sqe s;
905 	struct nvme_cqe c;
906 };
907 
908 int
909 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
910     void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
911 {
912 	struct nvme_poll_state state;
913 	void (*done)(struct nvme_softc *, struct nvme_ccb *, struct nvme_cqe *);
914 	void *cookie;
915 	u_int16_t flags;
916 
917 	memset(&state, 0, sizeof(state));
918 	(*fill)(sc, ccb, &state.s);
919 
920 	done = ccb->ccb_done;
921 	cookie = ccb->ccb_cookie;
922 
923 	ccb->ccb_done = nvme_poll_done;
924 	ccb->ccb_cookie = &state;
925 
926 	nvme_q_submit(sc, q, ccb, nvme_poll_fill);
927 	while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) {
928 		if (nvme_q_complete(sc, q) == 0)
929 			delay(10);
930 
931 		/* XXX no timeout? */
932 	}
933 
934 	ccb->ccb_cookie = cookie;
935 	done(sc, ccb, &state.c);
936 
937 	flags = lemtoh16(&state.c.flags);
938 
939 	return (flags & ~NVME_CQE_PHASE);
940 }
941 
942 void
943 nvme_poll_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
944 {
945 	struct nvme_sqe *sqe = slot;
946 	struct nvme_poll_state *state = ccb->ccb_cookie;
947 
948 	*sqe = state->s;
949 }
950 
951 void
952 nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
953     struct nvme_cqe *cqe)
954 {
955 	struct nvme_poll_state *state = ccb->ccb_cookie;
956 
957 	state->c = *cqe;
958 	SET(state->c.flags, htole16(NVME_CQE_PHASE));
959 }
960 
961 void
962 nvme_sqe_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
963 {
964 	struct nvme_sqe *src = ccb->ccb_cookie;
965 	struct nvme_sqe *dst = slot;
966 
967 	*dst = *src;
968 }
969 
970 void
971 nvme_empty_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
972     struct nvme_cqe *cqe)
973 {
974 }
975 
976 int
977 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
978 {
979 	struct nvme_ccb *ccb;
980 	struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
981 	u_int32_t head;
982 	u_int16_t flags;
983 	int rv = 0;
984 
985 	if (!mtx_enter_try(&q->q_cq_mtx))
986 		return (-1);
987 
988 	head = q->q_cq_head;
989 
990 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
991 	for (;;) {
992 		cqe = &ring[head];
993 		flags = lemtoh16(&cqe->flags);
994 		if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
995 			break;
996 
997 		ccb = &sc->sc_ccbs[cqe->cid];
998 		ccb->ccb_done(sc, ccb, cqe);
999 
1000 		if (++head >= q->q_entries) {
1001 			head = 0;
1002 			q->q_cq_phase ^= NVME_CQE_PHASE;
1003 		}
1004 
1005 		rv = 1;
1006 	}
1007 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1008 
1009 	if (rv)
1010 		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
1011 	mtx_leave(&q->q_cq_mtx);
1012 
1013 	return (rv);
1014 }
1015 
1016 int
1017 nvme_identify(struct nvme_softc *sc, u_int mpsmin)
1018 {
1019 	char sn[41], mn[81], fr[17];
1020 	struct nvm_identify_controller *identify;
1021 	struct nvme_dmamem *mem;
1022 	struct nvme_ccb *ccb;
1023 	int rv = 1;
1024 
1025 	ccb = nvme_ccb_get(sc);
1026 	if (ccb == NULL)
1027 		panic("nvme_identify: nvme_ccb_get returned NULL");
1028 
1029 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
1030 	if (mem == NULL)
1031 		return (1);
1032 
1033 	ccb->ccb_done = nvme_empty_done;
1034 	ccb->ccb_cookie = mem;
1035 
1036 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1037 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify);
1038 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1039 
1040 	nvme_ccb_put(sc, ccb);
1041 
1042 	if (rv != 0)
1043 		goto done;
1044 
1045 	identify = NVME_DMA_KVA(mem);
1046 
1047 	scsi_strvis(sn, identify->sn, sizeof(identify->sn));
1048 	scsi_strvis(mn, identify->mn, sizeof(identify->mn));
1049 	scsi_strvis(fr, identify->fr, sizeof(identify->fr));
1050 
1051 	printf("%s: %s, firmware %s, serial %s\n", DEVNAME(sc), mn, fr, sn);
1052 
1053 	if (identify->mdts > 0) {
1054 		sc->sc_mdts = (1 << identify->mdts) * (1 << mpsmin);
1055 		if (sc->sc_mdts > NVME_MAXPHYS)
1056 			sc->sc_mdts = NVME_MAXPHYS;
1057 		sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
1058 	}
1059 
1060 	sc->sc_nn = lemtoh32(&identify->nn);
1061 
1062 	/*
1063 	 * At least one Apple NVMe device presents a second, bogus disk that is
1064 	 * inaccessible, so cap targets at 1.
1065 	 *
1066 	 * sd1 at scsibus1 targ 2 lun 0: <NVMe, APPLE SSD AP0512, 16.1> [..]
1067 	 * sd1: 0MB, 4096 bytes/sector, 2 sectors
1068 	 */
1069 	if (sc->sc_nn > 1 &&
1070 	    mn[0] == 'A' && mn[1] == 'P' && mn[2] == 'P' && mn[3] == 'L' &&
1071 	    mn[4] == 'E')
1072 		sc->sc_nn = 1;
1073 
1074 	memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify));
1075 
1076 done:
1077 	nvme_dmamem_free(sc, mem);
1078 
1079 	return (rv);
1080 }
1081 
1082 int
1083 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
1084 {
1085 	struct nvme_sqe_q sqe;
1086 	struct nvme_ccb *ccb;
1087 	int rv;
1088 
1089 	ccb = scsi_io_get(&sc->sc_iopool, 0);
1090 	KASSERT(ccb != NULL);
1091 
1092 	ccb->ccb_done = nvme_empty_done;
1093 	ccb->ccb_cookie = &sqe;
1094 
1095 	memset(&sqe, 0, sizeof(sqe));
1096 	sqe.opcode = NVM_ADMIN_ADD_IOCQ;
1097 	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
1098 	htolem16(&sqe.qsize, q->q_entries - 1);
1099 	htolem16(&sqe.qid, q->q_id);
1100 	sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1101 
1102 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1103 	if (rv != 0)
1104 		goto fail;
1105 
1106 	ccb->ccb_done = nvme_empty_done;
1107 	ccb->ccb_cookie = &sqe;
1108 
1109 	memset(&sqe, 0, sizeof(sqe));
1110 	sqe.opcode = NVM_ADMIN_ADD_IOSQ;
1111 	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
1112 	htolem16(&sqe.qsize, q->q_entries - 1);
1113 	htolem16(&sqe.qid, q->q_id);
1114 	htolem16(&sqe.cqid, q->q_id);
1115 	sqe.qflags = NVM_SQE_Q_PC;
1116 
1117 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1118 	if (rv != 0)
1119 		goto fail;
1120 
1121 fail:
1122 	scsi_io_put(&sc->sc_iopool, ccb);
1123 	return (rv);
1124 }
1125 
1126 int
1127 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
1128 {
1129 	struct nvme_sqe_q sqe;
1130 	struct nvme_ccb *ccb;
1131 	int rv;
1132 
1133 	ccb = scsi_io_get(&sc->sc_iopool, 0);
1134 	KASSERT(ccb != NULL);
1135 
1136 	ccb->ccb_done = nvme_empty_done;
1137 	ccb->ccb_cookie = &sqe;
1138 
1139 	memset(&sqe, 0, sizeof(sqe));
1140 	sqe.opcode = NVM_ADMIN_DEL_IOSQ;
1141 	htolem16(&sqe.qid, q->q_id);
1142 
1143 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1144 	if (rv != 0)
1145 		goto fail;
1146 
1147 	ccb->ccb_done = nvme_empty_done;
1148 	ccb->ccb_cookie = &sqe;
1149 
1150 	memset(&sqe, 0, sizeof(sqe));
1151 	sqe.opcode = NVM_ADMIN_DEL_IOCQ;
1152 	htolem16(&sqe.qid, q->q_id);
1153 
1154 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1155 	if (rv != 0)
1156 		goto fail;
1157 
1158 	nvme_q_free(sc, q);
1159 
1160 fail:
1161 	scsi_io_put(&sc->sc_iopool, ccb);
1162 	return (rv);
1163 
1164 }
1165 
1166 void
1167 nvme_fill_identify(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1168 {
1169 	struct nvme_sqe *sqe = slot;
1170 	struct nvme_dmamem *mem = ccb->ccb_cookie;
1171 
1172 	sqe->opcode = NVM_ADMIN_IDENTIFY;
1173 	htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
1174 	htolem32(&sqe->cdw10, 1);
1175 }
1176 
1177 int
1178 nvme_ccbs_alloc(struct nvme_softc *sc, u_int nccbs)
1179 {
1180 	struct nvme_ccb *ccb;
1181 	bus_addr_t off;
1182 	u_int64_t *prpl;
1183 	u_int i;
1184 
1185 	sc->sc_ccbs = mallocarray(nccbs, sizeof(*ccb), M_DEVBUF,
1186 	    M_WAITOK | M_CANFAIL);
1187 	if (sc->sc_ccbs == NULL)
1188 		return (1);
1189 
1190 	sc->sc_ccb_prpls = nvme_dmamem_alloc(sc,
1191 	    sizeof(*prpl) * sc->sc_max_prpl * nccbs);
1192 
1193 	prpl = NVME_DMA_KVA(sc->sc_ccb_prpls);
1194 	off = 0;
1195 
1196 	for (i = 0; i < nccbs; i++) {
1197 		ccb = &sc->sc_ccbs[i];
1198 
1199 		if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
1200 		    sc->sc_max_prpl + 1, /* we get a free prp in the sqe */
1201 		    sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
1202 		    &ccb->ccb_dmamap) != 0)
1203 			goto free_maps;
1204 
1205 		ccb->ccb_id = i;
1206 		ccb->ccb_prpl = prpl;
1207 		ccb->ccb_prpl_off = off;
1208 		ccb->ccb_prpl_dva = NVME_DMA_DVA(sc->sc_ccb_prpls) + off;
1209 
1210 		SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_list, ccb, ccb_entry);
1211 
1212 		prpl += sc->sc_max_prpl;
1213 		off += sizeof(*prpl) * sc->sc_max_prpl;
1214 	}
1215 
1216 	return (0);
1217 
1218 free_maps:
1219 	nvme_ccbs_free(sc, nccbs);
1220 	return (1);
1221 }
1222 
1223 void *
1224 nvme_ccb_get(void *cookie)
1225 {
1226 	struct nvme_softc *sc = cookie;
1227 	struct nvme_ccb *ccb;
1228 
1229 	mtx_enter(&sc->sc_ccb_mtx);
1230 	ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list);
1231 	if (ccb != NULL)
1232 		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1233 	mtx_leave(&sc->sc_ccb_mtx);
1234 
1235 	return (ccb);
1236 }
1237 
1238 void
1239 nvme_ccb_put(void *cookie, void *io)
1240 {
1241 	struct nvme_softc *sc = cookie;
1242 	struct nvme_ccb *ccb = io;
1243 
1244 	mtx_enter(&sc->sc_ccb_mtx);
1245 	SIMPLEQ_INSERT_HEAD(&sc->sc_ccb_list, ccb, ccb_entry);
1246 	mtx_leave(&sc->sc_ccb_mtx);
1247 }
1248 
1249 void
1250 nvme_ccbs_free(struct nvme_softc *sc, unsigned int nccbs)
1251 {
1252 	struct nvme_ccb *ccb;
1253 
1254 	while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list)) != NULL) {
1255 		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1256 		bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
1257 	}
1258 
1259 	nvme_dmamem_free(sc, sc->sc_ccb_prpls);
1260 	free(sc->sc_ccbs, M_DEVBUF, nccbs * sizeof(*ccb));
1261 }
1262 
1263 struct nvme_queue *
1264 nvme_q_alloc(struct nvme_softc *sc, u_int16_t id, u_int entries, u_int dstrd)
1265 {
1266 	struct nvme_queue *q;
1267 
1268 	q = malloc(sizeof(*q), M_DEVBUF, M_WAITOK | M_CANFAIL);
1269 	if (q == NULL)
1270 		return (NULL);
1271 
1272 	q->q_sq_dmamem = nvme_dmamem_alloc(sc,
1273 	    sizeof(struct nvme_sqe) * entries);
1274 	if (q->q_sq_dmamem == NULL)
1275 		goto free;
1276 
1277 	q->q_cq_dmamem = nvme_dmamem_alloc(sc,
1278 	    sizeof(struct nvme_cqe) * entries);
1279 	if (q->q_cq_dmamem == NULL)
1280 		goto free_sq;
1281 
1282 	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1283 	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1284 
1285 	mtx_init(&q->q_sq_mtx, IPL_BIO);
1286 	mtx_init(&q->q_cq_mtx, IPL_BIO);
1287 	q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
1288 	q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
1289 
1290 	q->q_id = id;
1291 	q->q_entries = entries;
1292 	q->q_sq_tail = 0;
1293 	q->q_cq_head = 0;
1294 	q->q_cq_phase = NVME_CQE_PHASE;
1295 
1296 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1297 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1298 
1299 	return (q);
1300 
1301 free_sq:
1302 	nvme_dmamem_free(sc, q->q_sq_dmamem);
1303 free:
1304 	free(q, M_DEVBUF, sizeof *q);
1305 
1306 	return (NULL);
1307 }
1308 
1309 int
1310 nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q)
1311 {
1312 	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1313 	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1314 
1315 	q->q_sqtdbl = NVME_SQTDBL(q->q_id, sc->sc_dstrd);
1316 	q->q_cqhdbl = NVME_CQHDBL(q->q_id, sc->sc_dstrd);
1317 
1318 	q->q_sq_tail = 0;
1319 	q->q_cq_head = 0;
1320 	q->q_cq_phase = NVME_CQE_PHASE;
1321 
1322 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1323 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1324 
1325 	return (0);
1326 }
1327 
1328 void
1329 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
1330 {
1331 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1332 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
1333 	nvme_dmamem_free(sc, q->q_cq_dmamem);
1334 	nvme_dmamem_free(sc, q->q_sq_dmamem);
1335 	free(q, M_DEVBUF, sizeof *q);
1336 }
1337 
1338 int
1339 nvme_intr(void *xsc)
1340 {
1341 	struct nvme_softc *sc = xsc;
1342 	int rv = 0;
1343 
1344 	if (nvme_q_complete(sc, sc->sc_q))
1345 		rv = 1;
1346 	if (nvme_q_complete(sc, sc->sc_admin_q))
1347 		rv = 1;
1348 
1349 	return (rv);
1350 }
1351 
1352 int
1353 nvme_intr_intx(void *xsc)
1354 {
1355 	struct nvme_softc *sc = xsc;
1356 	int rv;
1357 
1358 	nvme_write4(sc, NVME_INTMS, 1);
1359 	rv = nvme_intr(sc);
1360 	nvme_write4(sc, NVME_INTMC, 1);
1361 
1362 	return (rv);
1363 }
1364 
1365 struct nvme_dmamem *
1366 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
1367 {
1368 	struct nvme_dmamem *ndm;
1369 	int nsegs;
1370 
1371 	ndm = malloc(sizeof(*ndm), M_DEVBUF, M_WAITOK | M_ZERO);
1372 	if (ndm == NULL)
1373 		return (NULL);
1374 
1375 	ndm->ndm_size = size;
1376 
1377 	if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
1378 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0)
1379 		goto ndmfree;
1380 
1381 	if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
1382 	    1, &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1383 		goto destroy;
1384 
1385 	if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
1386 	    &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
1387 		goto free;
1388 
1389 	if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
1390 	    NULL, BUS_DMA_WAITOK) != 0)
1391 		goto unmap;
1392 
1393 	return (ndm);
1394 
1395 unmap:
1396 	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
1397 free:
1398 	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1399 destroy:
1400 	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1401 ndmfree:
1402 	free(ndm, M_DEVBUF, sizeof *ndm);
1403 
1404 	return (NULL);
1405 }
1406 
1407 void
1408 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
1409 {
1410 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
1411 	    0, NVME_DMA_LEN(mem), ops);
1412 }
1413 
1414 void
1415 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
1416 {
1417 	bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
1418 	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
1419 	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1420 	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1421 	free(ndm, M_DEVBUF, sizeof *ndm);
1422 }
1423 
1424 #ifdef HIBERNATE
1425 
1426 int
1427 nvme_hibernate_admin_cmd(struct nvme_softc *sc, struct nvme_sqe *sqe,
1428     struct nvme_cqe *cqe, int cid)
1429 {
1430 	struct nvme_sqe *asqe = NVME_DMA_KVA(sc->sc_admin_q->q_sq_dmamem);
1431 	struct nvme_cqe *acqe = NVME_DMA_KVA(sc->sc_admin_q->q_cq_dmamem);
1432 	struct nvme_queue *q = sc->sc_admin_q;
1433 	int tail;
1434 	u_int16_t flags;
1435 
1436 	/* submit command */
1437 	tail = q->q_sq_tail;
1438 	if (++q->q_sq_tail >= q->q_entries)
1439 		q->q_sq_tail = 0;
1440 
1441 	asqe += tail;
1442 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1443 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1444 	*asqe = *sqe;
1445 	asqe->cid = cid;
1446 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1447 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1448 
1449 	nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
1450 
1451 	/* wait for completion */
1452 	acqe += q->q_cq_head;
1453 	for (;;) {
1454 		nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1455 		flags = lemtoh16(&acqe->flags);
1456 		if ((flags & NVME_CQE_PHASE) == q->q_cq_phase)
1457 			break;
1458 
1459 		delay(10);
1460 	}
1461 
1462 	if (++q->q_cq_head >= q->q_entries) {
1463 		q->q_cq_head = 0;
1464 		q->q_cq_phase ^= NVME_CQE_PHASE;
1465 	}
1466 	nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
1467 	if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || (acqe->cid != cid))
1468 		return (EIO);
1469 
1470 	return (0);
1471 }
1472 
1473 int
1474 nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size,
1475     int op, void *page)
1476 {
1477 	struct nvme_hibernate_page {
1478 		u_int64_t		prpl[MAXPHYS / PAGE_SIZE];
1479 
1480 		struct nvme_softc	*sc;
1481 		int			nsid;
1482 		int			sq_tail;
1483 		int			cq_head;
1484 		int			cqe_phase;
1485 
1486 		daddr_t			poffset;
1487 		size_t			psize;
1488 	} *my = page;
1489 	struct nvme_sqe_io *isqe;
1490 	struct nvme_cqe *icqe;
1491 	paddr_t data_phys, page_phys;
1492 	u_int64_t data_bus_phys, page_bus_phys;
1493 	u_int16_t flags;
1494 	int i;
1495 
1496 	if (op == HIB_INIT) {
1497 		struct device *disk;
1498 		struct device *scsibus;
1499 		extern struct cfdriver sd_cd;
1500 		struct scsi_link *link;
1501 		struct scsibus_softc *bus_sc;
1502 		struct nvme_sqe_q qsqe;
1503 		struct nvme_cqe qcqe;
1504 
1505 		/* find nvme softc */
1506 		disk = disk_lookup(&sd_cd, DISKUNIT(dev));
1507 		scsibus = disk->dv_parent;
1508 		my->sc = (struct nvme_softc *)disk->dv_parent->dv_parent;
1509 
1510 		/* find scsi_link, which tells us the target */
1511 		my->nsid = 0;
1512 		bus_sc = (struct scsibus_softc *)scsibus;
1513 		SLIST_FOREACH(link, &bus_sc->sc_link_list, bus_list) {
1514 			if (link->device_softc == disk) {
1515 				my->nsid = link->target;
1516 				break;
1517 			}
1518 		}
1519 		if (my->nsid == 0)
1520 			return (EIO);
1521 
1522 		my->poffset = blkno;
1523 		my->psize = size;
1524 
1525 		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0,
1526 		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe));
1527 		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem), 0,
1528 		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_sqe));
1529 
1530 		my->sq_tail = 0;
1531 		my->cq_head = 0;
1532 		my->cqe_phase = NVME_CQE_PHASE;
1533 
1534 		pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1535 
1536 		memset(&qsqe, 0, sizeof(qsqe));
1537 		qsqe.opcode = NVM_ADMIN_ADD_IOCQ;
1538 		htolem64(&qsqe.prp1,
1539 		    NVME_DMA_DVA(my->sc->sc_hib_q->q_cq_dmamem));
1540 		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1541 		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1542 		qsqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1543 		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1544 		    &qcqe, 1) != 0)
1545 			return (EIO);
1546 
1547 		memset(&qsqe, 0, sizeof(qsqe));
1548 		qsqe.opcode = NVM_ADMIN_ADD_IOSQ;
1549 		htolem64(&qsqe.prp1,
1550 		    NVME_DMA_DVA(my->sc->sc_hib_q->q_sq_dmamem));
1551 		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1552 		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1553 		htolem16(&qsqe.cqid, my->sc->sc_hib_q->q_id);
1554 		qsqe.qflags = NVM_SQE_Q_PC;
1555 		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1556 		    &qcqe, 2) != 0)
1557 			return (EIO);
1558 
1559 		return (0);
1560 	}
1561 
1562 	if (op != HIB_W)
1563 		return (0);
1564 
1565 	isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem);
1566 	isqe += my->sq_tail;
1567 	if (++my->sq_tail == my->sc->sc_hib_q->q_entries)
1568 		my->sq_tail = 0;
1569 
1570 	memset(isqe, 0, sizeof(*isqe));
1571 	isqe->opcode = NVM_CMD_WRITE;
1572 	htolem32(&isqe->nsid, my->nsid);
1573 
1574 	pmap_extract(pmap_kernel(), addr, &data_phys);
1575 	data_bus_phys = data_phys;
1576 	htolem64(&isqe->entry.prp[0], data_bus_phys);
1577 	if ((size > my->sc->sc_mps) && (size <= my->sc->sc_mps * 2)) {
1578 		htolem64(&isqe->entry.prp[1], data_bus_phys + my->sc->sc_mps);
1579 	} else if (size > my->sc->sc_mps * 2) {
1580 		pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1581 		page_bus_phys = page_phys;
1582 		htolem64(&isqe->entry.prp[1], page_bus_phys +
1583 		    offsetof(struct nvme_hibernate_page, prpl));
1584 		for (i = 1; i < (size / my->sc->sc_mps); i++) {
1585 			htolem64(&my->prpl[i - 1], data_bus_phys +
1586 			    (i * my->sc->sc_mps));
1587 		}
1588 	}
1589 
1590 	isqe->slba = blkno + my->poffset;
1591 	isqe->nlb = (size / DEV_BSIZE) - 1;
1592 	isqe->cid = blkno % 0xffff;
1593 
1594 	nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1595 	    my->sq_tail);
1596 
1597 	icqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem);
1598 	icqe += my->cq_head;
1599 	for (;;) {
1600 		flags = lemtoh16(&icqe->flags);
1601 		if ((flags & NVME_CQE_PHASE) == my->cqe_phase)
1602 			break;
1603 
1604 		delay(10);
1605 	}
1606 
1607 	if (++my->cq_head == my->sc->sc_hib_q->q_entries) {
1608 		my->cq_head = 0;
1609 		my->cqe_phase ^= NVME_CQE_PHASE;
1610 	}
1611 	nvme_write4(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1612 	    my->cq_head);
1613 	if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) ||
1614 	    (icqe->cid != blkno % 0xffff))
1615 		return (EIO);
1616 
1617 	return (0);
1618 }
1619 
1620 #endif
1621