1 /* $NetBSD: nvme.c,v 1.54 2020/12/27 16:52:01 jmcneill Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.54 2020/12/27 16:52:01 jmcneill Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 #include "ioconf.h" 44 45 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 46 47 int nvme_adminq_size = 32; 48 int nvme_ioq_size = 1024; 49 50 static int nvme_print(void *, const char *); 51 52 static int nvme_ready(struct nvme_softc *, uint32_t); 53 static int nvme_enable(struct nvme_softc *, u_int); 54 static int nvme_disable(struct nvme_softc *); 55 static int nvme_shutdown(struct nvme_softc *); 56 57 #ifdef NVME_DEBUG 58 static void nvme_dumpregs(struct nvme_softc *); 59 #endif 60 static int nvme_identify(struct nvme_softc *, u_int); 61 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 62 void *); 63 64 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 65 static void nvme_ccbs_free(struct nvme_queue *); 66 67 static struct nvme_ccb * 68 nvme_ccb_get(struct nvme_queue *, bool); 69 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 70 71 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 72 struct nvme_ccb *, void (*)(struct nvme_queue *, 73 struct nvme_ccb *, void *), int); 74 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 75 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 76 struct nvme_cqe *); 77 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 78 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 79 struct nvme_cqe *); 80 81 static struct nvme_queue * 82 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 83 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 84 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 85 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 86 struct nvme_ccb *, void (*)(struct nvme_queue *, 87 struct nvme_ccb *, void *)); 88 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 89 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 90 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 91 bool (*)(void *), void *); 92 93 static struct nvme_dmamem * 94 nvme_dmamem_alloc(struct nvme_softc *, size_t); 95 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *); 96 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, 97 int); 98 99 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 100 void *); 101 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 102 struct nvme_cqe *); 103 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 104 void *); 105 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 106 struct nvme_cqe *); 107 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 108 void *); 109 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 110 struct nvme_cqe *); 111 112 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 113 void *); 114 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 115 struct nvme_cqe *); 116 static int nvme_command_passthrough(struct nvme_softc *, 117 struct nvme_pt_command *, uint16_t, struct lwp *, bool); 118 119 static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *, 120 u_int *); 121 122 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 123 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 124 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 125 #define NVME_TIMO_SY 60 /* sync cache timeout */ 126 127 #define nvme_read4(_s, _r) \ 128 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r)) 129 #define nvme_write4(_s, _r, _v) \ 130 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 131 /* 132 * Some controllers, at least Apple NVMe, always require split 133 * transfers, so don't use bus_space_{read,write}_8() on LP64. 134 */ 135 static inline uint64_t 136 nvme_read8(struct nvme_softc *sc, bus_size_t r) 137 { 138 uint64_t v; 139 uint32_t *a = (uint32_t *)&v; 140 141 #if _BYTE_ORDER == _LITTLE_ENDIAN 142 a[0] = nvme_read4(sc, r); 143 a[1] = nvme_read4(sc, r + 4); 144 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 145 a[1] = nvme_read4(sc, r); 146 a[0] = nvme_read4(sc, r + 4); 147 #endif 148 149 return v; 150 } 151 152 static inline void 153 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 154 { 155 uint32_t *a = (uint32_t *)&v; 156 157 #if _BYTE_ORDER == _LITTLE_ENDIAN 158 nvme_write4(sc, r, a[0]); 159 nvme_write4(sc, r + 4, a[1]); 160 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 161 nvme_write4(sc, r, a[1]); 162 nvme_write4(sc, r + 4, a[0]); 163 #endif 164 } 165 #define nvme_barrier(_s, _r, _l, _f) \ 166 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f)) 167 168 #ifdef NVME_DEBUG 169 static __used void 170 nvme_dumpregs(struct nvme_softc *sc) 171 { 172 uint64_t r8; 173 uint32_t r4; 174 175 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 176 r8 = nvme_read8(sc, NVME_CAP); 177 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 178 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 179 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 180 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 181 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 182 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 183 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 184 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 185 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 186 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 187 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 188 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 189 190 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 191 192 r4 = nvme_read4(sc, NVME_CC); 193 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 194 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 195 (1 << NVME_CC_IOCQES_R(r4))); 196 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 197 (1 << NVME_CC_IOSQES_R(r4))); 198 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 199 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 200 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 201 (1 << NVME_CC_MPS_R(r4))); 202 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 203 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 204 205 r4 = nvme_read4(sc, NVME_CSTS); 206 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 207 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 208 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 209 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 210 211 r4 = nvme_read4(sc, NVME_AQA); 212 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 213 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 214 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 215 216 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 217 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 218 #undef DEVNAME 219 } 220 #endif /* NVME_DEBUG */ 221 222 static int 223 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 224 { 225 u_int i = 0; 226 227 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 228 if (i++ > sc->sc_rdy_to) 229 return ENXIO; 230 231 delay(1000); 232 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 233 } 234 235 return 0; 236 } 237 238 static int 239 nvme_enable(struct nvme_softc *sc, u_int mps) 240 { 241 uint32_t cc, csts; 242 int error; 243 244 cc = nvme_read4(sc, NVME_CC); 245 csts = nvme_read4(sc, NVME_CSTS); 246 247 /* 248 * See note in nvme_disable. Short circuit if we're already enabled. 249 */ 250 if (ISSET(cc, NVME_CC_EN)) { 251 if (ISSET(csts, NVME_CSTS_RDY)) 252 return 0; 253 254 goto waitready; 255 } else { 256 /* EN == 0 already wait for RDY == 0 or fail */ 257 error = nvme_ready(sc, 0); 258 if (error) 259 return error; 260 } 261 262 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 263 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 264 delay(5000); 265 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 266 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 267 delay(5000); 268 269 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 270 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 271 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 272 delay(5000); 273 274 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 275 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 276 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 277 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 278 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 279 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 280 SET(cc, NVME_CC_MPS(mps)); 281 SET(cc, NVME_CC_EN); 282 283 nvme_write4(sc, NVME_CC, cc); 284 nvme_barrier(sc, 0, sc->sc_ios, 285 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 286 287 waitready: 288 return nvme_ready(sc, NVME_CSTS_RDY); 289 } 290 291 static int 292 nvme_disable(struct nvme_softc *sc) 293 { 294 uint32_t cc, csts; 295 int error; 296 297 cc = nvme_read4(sc, NVME_CC); 298 csts = nvme_read4(sc, NVME_CSTS); 299 300 /* 301 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 302 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 303 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 304 * isn't the desired value. Short circuit if we're already disabled. 305 */ 306 if (ISSET(cc, NVME_CC_EN)) { 307 if (!ISSET(csts, NVME_CSTS_RDY)) { 308 /* EN == 1, wait for RDY == 1 or fail */ 309 error = nvme_ready(sc, NVME_CSTS_RDY); 310 if (error) 311 return error; 312 } 313 } else { 314 /* EN == 0 already wait for RDY == 0 */ 315 if (!ISSET(csts, NVME_CSTS_RDY)) 316 return 0; 317 318 goto waitready; 319 } 320 321 CLR(cc, NVME_CC_EN); 322 nvme_write4(sc, NVME_CC, cc); 323 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 324 325 /* 326 * Some drives have issues with accessing the mmio after we disable, 327 * so delay for a bit after we write the bit to cope with these issues. 328 */ 329 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 330 delay(B4_CHK_RDY_DELAY_MS); 331 332 waitready: 333 return nvme_ready(sc, 0); 334 } 335 336 int 337 nvme_attach(struct nvme_softc *sc) 338 { 339 uint64_t cap; 340 uint32_t reg; 341 u_int dstrd; 342 u_int mps = PAGE_SHIFT; 343 u_int ncq, nsq; 344 uint16_t adminq_entries = nvme_adminq_size; 345 uint16_t ioq_entries = nvme_ioq_size; 346 int i; 347 348 reg = nvme_read4(sc, NVME_VS); 349 if (reg == 0xffffffff) { 350 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 351 return 1; 352 } 353 354 if (NVME_VS_TER(reg) == 0) 355 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 356 NVME_VS_MNR(reg)); 357 else 358 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 359 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 360 361 cap = nvme_read8(sc, NVME_CAP); 362 dstrd = NVME_CAP_DSTRD(cap); 363 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 364 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 365 "is greater than CPU page size %u\n", 366 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 367 return 1; 368 } 369 if (NVME_CAP_MPSMAX(cap) < mps) 370 mps = NVME_CAP_MPSMAX(cap); 371 if (ioq_entries > NVME_CAP_MQES(cap)) 372 ioq_entries = NVME_CAP_MQES(cap); 373 374 /* set initial values to be used for admin queue during probe */ 375 sc->sc_rdy_to = NVME_CAP_TO(cap); 376 sc->sc_mps = 1 << mps; 377 sc->sc_mdts = MAXPHYS; 378 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts)); 379 380 if (nvme_disable(sc) != 0) { 381 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 382 return 1; 383 } 384 385 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd); 386 if (sc->sc_admin_q == NULL) { 387 aprint_error_dev(sc->sc_dev, 388 "unable to allocate admin queue\n"); 389 return 1; 390 } 391 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 392 goto free_admin_q; 393 394 if (nvme_enable(sc, mps) != 0) { 395 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 396 goto disestablish_admin_q; 397 } 398 399 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 400 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 401 goto disable; 402 } 403 if (sc->sc_nn == 0) { 404 aprint_error_dev(sc->sc_dev, "namespace not found\n"); 405 goto disable; 406 } 407 408 /* we know how big things are now */ 409 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 410 411 /* reallocate ccbs of admin queue with new max sgl. */ 412 nvme_ccbs_free(sc->sc_admin_q); 413 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 414 415 if (sc->sc_use_mq) { 416 /* Limit the number of queues to the number allocated in HW */ 417 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) { 418 aprint_error_dev(sc->sc_dev, 419 "unable to get number of queues\n"); 420 goto disable; 421 } 422 if (sc->sc_nq > ncq) 423 sc->sc_nq = ncq; 424 if (sc->sc_nq > nsq) 425 sc->sc_nq = nsq; 426 } 427 428 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 429 for (i = 0; i < sc->sc_nq; i++) { 430 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd); 431 if (sc->sc_q[i] == NULL) { 432 aprint_error_dev(sc->sc_dev, 433 "unable to allocate io queue\n"); 434 goto free_q; 435 } 436 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 437 aprint_error_dev(sc->sc_dev, 438 "unable to create io queue\n"); 439 nvme_q_free(sc, sc->sc_q[i]); 440 goto free_q; 441 } 442 } 443 444 if (!sc->sc_use_mq) 445 nvme_write4(sc, NVME_INTMC, 1); 446 447 /* probe subdevices */ 448 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 449 KM_SLEEP); 450 nvme_rescan(sc->sc_dev, "nvme", &i); 451 452 return 0; 453 454 free_q: 455 while (--i >= 0) { 456 nvme_q_delete(sc, sc->sc_q[i]); 457 nvme_q_free(sc, sc->sc_q[i]); 458 } 459 disable: 460 nvme_disable(sc); 461 disestablish_admin_q: 462 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 463 free_admin_q: 464 nvme_q_free(sc, sc->sc_admin_q); 465 466 return 1; 467 } 468 469 int 470 nvme_rescan(device_t self, const char *attr, const int *flags) 471 { 472 struct nvme_softc *sc = device_private(self); 473 struct nvme_attach_args naa; 474 struct nvm_namespace_format *f; 475 struct nvme_namespace *ns; 476 uint64_t cap; 477 int ioq_entries = nvme_ioq_size; 478 int i; 479 int error; 480 481 cap = nvme_read8(sc, NVME_CAP); 482 if (ioq_entries > NVME_CAP_MQES(cap)) 483 ioq_entries = NVME_CAP_MQES(cap); 484 485 for (i = 1; i <= sc->sc_nn; i++) { 486 if (sc->sc_namespaces[i - 1].dev) 487 continue; 488 489 /* identify to check for availability */ 490 error = nvme_ns_identify(sc, i); 491 if (error) { 492 aprint_error_dev(self, "couldn't identify namespace #%d\n", i); 493 continue; 494 } 495 496 ns = nvme_ns_get(sc, i); 497 KASSERT(ns); 498 499 f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)]; 500 501 /* 502 * NVME1.0e 6.11 Identify command 503 * 504 * LBADS values smaller than 9 are not supported, a value 505 * of zero means that the format is not used. 506 */ 507 if (f->lbads < 9) { 508 if (f->lbads > 0) 509 aprint_error_dev(self, 510 "unsupported logical data size %u\n", f->lbads); 511 continue; 512 } 513 514 memset(&naa, 0, sizeof(naa)); 515 naa.naa_nsid = i; 516 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 517 naa.naa_maxphys = sc->sc_mdts; 518 naa.naa_typename = sc->sc_modelname; 519 sc->sc_namespaces[i - 1].dev = config_found(sc->sc_dev, &naa, 520 nvme_print); 521 } 522 return 0; 523 } 524 525 static int 526 nvme_print(void *aux, const char *pnp) 527 { 528 struct nvme_attach_args *naa = aux; 529 530 if (pnp) 531 aprint_normal("ld at %s", pnp); 532 533 if (naa->naa_nsid > 0) 534 aprint_normal(" nsid %d", naa->naa_nsid); 535 536 return UNCONF; 537 } 538 539 int 540 nvme_detach(struct nvme_softc *sc, int flags) 541 { 542 int i, error; 543 544 error = config_detach_children(sc->sc_dev, flags); 545 if (error) 546 return error; 547 548 error = nvme_shutdown(sc); 549 if (error) 550 return error; 551 552 /* from now on we are committed to detach, following will never fail */ 553 for (i = 0; i < sc->sc_nq; i++) 554 nvme_q_free(sc, sc->sc_q[i]); 555 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 556 nvme_q_free(sc, sc->sc_admin_q); 557 558 return 0; 559 } 560 561 static int 562 nvme_shutdown(struct nvme_softc *sc) 563 { 564 uint32_t cc, csts; 565 bool disabled = false; 566 int i; 567 568 if (!sc->sc_use_mq) 569 nvme_write4(sc, NVME_INTMS, 1); 570 571 for (i = 0; i < sc->sc_nq; i++) { 572 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 573 aprint_error_dev(sc->sc_dev, 574 "unable to delete io queue %d, disabling\n", i + 1); 575 disabled = true; 576 } 577 } 578 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 579 if (disabled) 580 goto disable; 581 582 cc = nvme_read4(sc, NVME_CC); 583 CLR(cc, NVME_CC_SHN_MASK); 584 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 585 nvme_write4(sc, NVME_CC, cc); 586 587 for (i = 0; i < 4000; i++) { 588 nvme_barrier(sc, 0, sc->sc_ios, 589 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 590 csts = nvme_read4(sc, NVME_CSTS); 591 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 592 return 0; 593 594 delay(1000); 595 } 596 597 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 598 599 disable: 600 nvme_disable(sc); 601 return 0; 602 } 603 604 void 605 nvme_childdet(device_t self, device_t child) 606 { 607 struct nvme_softc *sc = device_private(self); 608 int i; 609 610 for (i = 0; i < sc->sc_nn; i++) { 611 if (sc->sc_namespaces[i].dev == child) { 612 /* Already freed ns->ident. */ 613 sc->sc_namespaces[i].dev = NULL; 614 break; 615 } 616 } 617 } 618 619 int 620 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 621 { 622 struct nvme_sqe sqe; 623 struct nvm_identify_namespace *identify; 624 struct nvme_dmamem *mem; 625 struct nvme_ccb *ccb; 626 struct nvme_namespace *ns; 627 int rv; 628 629 KASSERT(nsid > 0); 630 631 ns = nvme_ns_get(sc, nsid); 632 KASSERT(ns); 633 634 if (ns->ident != NULL) 635 return 0; 636 637 ccb = nvme_ccb_get(sc->sc_admin_q, false); 638 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 639 640 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 641 if (mem == NULL) { 642 nvme_ccb_put(sc->sc_admin_q, ccb); 643 return ENOMEM; 644 } 645 646 memset(&sqe, 0, sizeof(sqe)); 647 sqe.opcode = NVM_ADMIN_IDENTIFY; 648 htolem32(&sqe.nsid, nsid); 649 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 650 htolem32(&sqe.cdw10, 0); 651 652 ccb->ccb_done = nvme_empty_done; 653 ccb->ccb_cookie = &sqe; 654 655 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 656 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 657 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 658 659 nvme_ccb_put(sc->sc_admin_q, ccb); 660 661 if (rv != 0) { 662 rv = EIO; 663 goto done; 664 } 665 666 /* commit */ 667 668 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 669 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 670 671 /* Convert data to host endian */ 672 nvme_identify_namespace_swapbytes(identify); 673 674 ns->ident = identify; 675 676 done: 677 nvme_dmamem_free(sc, mem); 678 679 return rv; 680 } 681 682 int 683 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 684 struct buf *bp, void *data, size_t datasize, 685 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 686 { 687 struct nvme_queue *q = nvme_get_q(sc, bp, false); 688 struct nvme_ccb *ccb; 689 bus_dmamap_t dmap; 690 int i, error; 691 692 ccb = nvme_ccb_get(q, false); 693 if (ccb == NULL) 694 return EAGAIN; 695 696 ccb->ccb_done = nvme_ns_io_done; 697 ccb->ccb_cookie = cookie; 698 699 /* namespace context */ 700 ccb->nnc_nsid = nsid; 701 ccb->nnc_flags = flags; 702 ccb->nnc_buf = bp; 703 ccb->nnc_datasize = datasize; 704 ccb->nnc_secsize = secsize; 705 ccb->nnc_blkno = blkno; 706 ccb->nnc_done = nnc_done; 707 708 dmap = ccb->ccb_dmamap; 709 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 710 datasize, NULL, 711 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 712 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 713 (ISSET(flags, NVME_NS_CTX_F_READ) ? 714 BUS_DMA_READ : BUS_DMA_WRITE)); 715 if (error) { 716 nvme_ccb_put(q, ccb); 717 return error; 718 } 719 720 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 721 ISSET(flags, NVME_NS_CTX_F_READ) ? 722 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 723 724 if (dmap->dm_nsegs > 2) { 725 for (i = 1; i < dmap->dm_nsegs; i++) { 726 htolem64(&ccb->ccb_prpl[i - 1], 727 dmap->dm_segs[i].ds_addr); 728 } 729 bus_dmamap_sync(sc->sc_dmat, 730 NVME_DMA_MAP(q->q_ccb_prpls), 731 ccb->ccb_prpl_off, 732 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 733 BUS_DMASYNC_PREWRITE); 734 } 735 736 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 737 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 738 return EIO; 739 return 0; 740 } 741 742 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 743 return 0; 744 } 745 746 static void 747 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 748 { 749 struct nvme_sqe_io *sqe = slot; 750 bus_dmamap_t dmap = ccb->ccb_dmamap; 751 752 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 753 NVM_CMD_READ : NVM_CMD_WRITE; 754 htolem32(&sqe->nsid, ccb->nnc_nsid); 755 756 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 757 switch (dmap->dm_nsegs) { 758 case 1: 759 break; 760 case 2: 761 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 762 break; 763 default: 764 /* the prp list is already set up and synced */ 765 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 766 break; 767 } 768 769 htolem64(&sqe->slba, ccb->nnc_blkno); 770 771 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 772 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 773 774 /* guaranteed by upper layers, but check just in case */ 775 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 776 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 777 } 778 779 static void 780 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 781 struct nvme_cqe *cqe) 782 { 783 struct nvme_softc *sc = q->q_sc; 784 bus_dmamap_t dmap = ccb->ccb_dmamap; 785 void *nnc_cookie = ccb->ccb_cookie; 786 nvme_nnc_done nnc_done = ccb->nnc_done; 787 struct buf *bp = ccb->nnc_buf; 788 789 if (dmap->dm_nsegs > 2) { 790 bus_dmamap_sync(sc->sc_dmat, 791 NVME_DMA_MAP(q->q_ccb_prpls), 792 ccb->ccb_prpl_off, 793 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 794 BUS_DMASYNC_POSTWRITE); 795 } 796 797 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 798 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 799 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 800 801 bus_dmamap_unload(sc->sc_dmat, dmap); 802 nvme_ccb_put(q, ccb); 803 804 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 805 } 806 807 /* 808 * If there is no volatile write cache, it makes no sense to issue 809 * flush commands or query for the status. 810 */ 811 static bool 812 nvme_has_volatile_write_cache(struct nvme_softc *sc) 813 { 814 /* sc_identify is filled during attachment */ 815 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 816 } 817 818 static bool 819 nvme_ns_sync_finished(void *cookie) 820 { 821 int *result = cookie; 822 823 return (*result != 0); 824 } 825 826 int 827 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 828 { 829 struct nvme_queue *q = nvme_get_q(sc, NULL, true); 830 struct nvme_ccb *ccb; 831 int result = 0; 832 833 if (!nvme_has_volatile_write_cache(sc)) { 834 /* cache not present, no value in trying to flush it */ 835 return 0; 836 } 837 838 ccb = nvme_ccb_get(q, true); 839 KASSERT(ccb != NULL); 840 841 ccb->ccb_done = nvme_ns_sync_done; 842 ccb->ccb_cookie = &result; 843 844 /* namespace context */ 845 ccb->nnc_nsid = nsid; 846 ccb->nnc_flags = flags; 847 ccb->nnc_done = NULL; 848 849 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 850 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 851 return EIO; 852 return 0; 853 } 854 855 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 856 857 /* wait for completion */ 858 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 859 KASSERT(result != 0); 860 861 return (result > 0) ? 0 : EIO; 862 } 863 864 static void 865 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 866 { 867 struct nvme_sqe *sqe = slot; 868 869 sqe->opcode = NVM_CMD_FLUSH; 870 htolem32(&sqe->nsid, ccb->nnc_nsid); 871 } 872 873 static void 874 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 875 struct nvme_cqe *cqe) 876 { 877 int *result = ccb->ccb_cookie; 878 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 879 880 if (status == NVME_CQE_SC_SUCCESS) 881 *result = 1; 882 else 883 *result = -1; 884 885 nvme_ccb_put(q, ccb); 886 } 887 888 static bool 889 nvme_getcache_finished(void *xc) 890 { 891 int *addr = xc; 892 893 return (*addr != 0); 894 } 895 896 /* 897 * Get status of volatile write cache. Always asynchronous. 898 */ 899 int 900 nvme_admin_getcache(struct nvme_softc *sc, int *addr) 901 { 902 struct nvme_ccb *ccb; 903 struct nvme_queue *q = sc->sc_admin_q; 904 int result = 0, error; 905 906 if (!nvme_has_volatile_write_cache(sc)) { 907 /* cache simply not present */ 908 *addr = 0; 909 return 0; 910 } 911 912 ccb = nvme_ccb_get(q, true); 913 KASSERT(ccb != NULL); 914 915 ccb->ccb_done = nvme_getcache_done; 916 ccb->ccb_cookie = &result; 917 918 /* namespace context */ 919 ccb->nnc_flags = 0; 920 ccb->nnc_done = NULL; 921 922 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 923 924 /* wait for completion */ 925 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 926 KASSERT(result != 0); 927 928 if (result > 0) { 929 *addr = result; 930 error = 0; 931 } else 932 error = EINVAL; 933 934 return error; 935 } 936 937 static void 938 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 939 { 940 struct nvme_sqe *sqe = slot; 941 942 sqe->opcode = NVM_ADMIN_GET_FEATURES; 943 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 944 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 945 } 946 947 static void 948 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 949 struct nvme_cqe *cqe) 950 { 951 int *addr = ccb->ccb_cookie; 952 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 953 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 954 int result; 955 956 if (status == NVME_CQE_SC_SUCCESS) { 957 result = 0; 958 959 /* 960 * DPO not supported, Dataset Management (DSM) field doesn't 961 * specify the same semantics. FUA is always supported. 962 */ 963 result = DKCACHE_FUA; 964 965 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE) 966 result |= DKCACHE_WRITE; 967 968 /* 969 * If volatile write cache is present, the flag shall also be 970 * settable. 971 */ 972 result |= DKCACHE_WCHANGE; 973 974 /* 975 * ONCS field indicates whether the optional SAVE is also 976 * supported for Set Features. According to spec v1.3, 977 * Volatile Write Cache however doesn't support persistency 978 * across power cycle/reset. 979 */ 980 981 } else { 982 result = -1; 983 } 984 985 *addr = result; 986 987 nvme_ccb_put(q, ccb); 988 } 989 990 struct nvme_setcache_state { 991 int dkcache; 992 int result; 993 }; 994 995 static bool 996 nvme_setcache_finished(void *xc) 997 { 998 struct nvme_setcache_state *st = xc; 999 1000 return (st->result != 0); 1001 } 1002 1003 static void 1004 nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1005 { 1006 struct nvme_sqe *sqe = slot; 1007 struct nvme_setcache_state *st = ccb->ccb_cookie; 1008 1009 sqe->opcode = NVM_ADMIN_SET_FEATURES; 1010 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1011 if (st->dkcache & DKCACHE_WRITE) 1012 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1013 } 1014 1015 static void 1016 nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1017 struct nvme_cqe *cqe) 1018 { 1019 struct nvme_setcache_state *st = ccb->ccb_cookie; 1020 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1021 1022 if (status == NVME_CQE_SC_SUCCESS) { 1023 st->result = 1; 1024 } else { 1025 st->result = -1; 1026 } 1027 1028 nvme_ccb_put(q, ccb); 1029 } 1030 1031 /* 1032 * Set status of volatile write cache. Always asynchronous. 1033 */ 1034 int 1035 nvme_admin_setcache(struct nvme_softc *sc, int dkcache) 1036 { 1037 struct nvme_ccb *ccb; 1038 struct nvme_queue *q = sc->sc_admin_q; 1039 int error; 1040 struct nvme_setcache_state st; 1041 1042 if (!nvme_has_volatile_write_cache(sc)) { 1043 /* cache simply not present */ 1044 return EOPNOTSUPP; 1045 } 1046 1047 if (dkcache & ~(DKCACHE_WRITE)) { 1048 /* unsupported parameters */ 1049 return EOPNOTSUPP; 1050 } 1051 1052 ccb = nvme_ccb_get(q, true); 1053 KASSERT(ccb != NULL); 1054 1055 memset(&st, 0, sizeof(st)); 1056 st.dkcache = dkcache; 1057 1058 ccb->ccb_done = nvme_setcache_done; 1059 ccb->ccb_cookie = &st; 1060 1061 /* namespace context */ 1062 ccb->nnc_flags = 0; 1063 ccb->nnc_done = NULL; 1064 1065 nvme_q_submit(sc, q, ccb, nvme_setcache_fill); 1066 1067 /* wait for completion */ 1068 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st); 1069 KASSERT(st.result != 0); 1070 1071 if (st.result > 0) 1072 error = 0; 1073 else 1074 error = EINVAL; 1075 1076 return error; 1077 } 1078 1079 void 1080 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 1081 { 1082 struct nvme_namespace *ns; 1083 struct nvm_identify_namespace *identify; 1084 1085 ns = nvme_ns_get(sc, nsid); 1086 KASSERT(ns); 1087 1088 identify = ns->ident; 1089 ns->ident = NULL; 1090 if (identify != NULL) 1091 kmem_free(identify, sizeof(*identify)); 1092 } 1093 1094 struct nvme_pt_state { 1095 struct nvme_pt_command *pt; 1096 bool finished; 1097 }; 1098 1099 static void 1100 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1101 { 1102 struct nvme_softc *sc = q->q_sc; 1103 struct nvme_sqe *sqe = slot; 1104 struct nvme_pt_state *state = ccb->ccb_cookie; 1105 struct nvme_pt_command *pt = state->pt; 1106 bus_dmamap_t dmap = ccb->ccb_dmamap; 1107 int i; 1108 1109 sqe->opcode = pt->cmd.opcode; 1110 htolem32(&sqe->nsid, pt->cmd.nsid); 1111 1112 if (pt->buf != NULL && pt->len > 0) { 1113 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 1114 switch (dmap->dm_nsegs) { 1115 case 1: 1116 break; 1117 case 2: 1118 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 1119 break; 1120 default: 1121 for (i = 1; i < dmap->dm_nsegs; i++) { 1122 htolem64(&ccb->ccb_prpl[i - 1], 1123 dmap->dm_segs[i].ds_addr); 1124 } 1125 bus_dmamap_sync(sc->sc_dmat, 1126 NVME_DMA_MAP(q->q_ccb_prpls), 1127 ccb->ccb_prpl_off, 1128 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1129 BUS_DMASYNC_PREWRITE); 1130 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 1131 break; 1132 } 1133 } 1134 1135 htolem32(&sqe->cdw10, pt->cmd.cdw10); 1136 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1137 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1138 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1139 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1140 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1141 } 1142 1143 static void 1144 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1145 { 1146 struct nvme_softc *sc = q->q_sc; 1147 struct nvme_pt_state *state = ccb->ccb_cookie; 1148 struct nvme_pt_command *pt = state->pt; 1149 bus_dmamap_t dmap = ccb->ccb_dmamap; 1150 1151 if (pt->buf != NULL && pt->len > 0) { 1152 if (dmap->dm_nsegs > 2) { 1153 bus_dmamap_sync(sc->sc_dmat, 1154 NVME_DMA_MAP(q->q_ccb_prpls), 1155 ccb->ccb_prpl_off, 1156 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1157 BUS_DMASYNC_POSTWRITE); 1158 } 1159 1160 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1161 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1162 bus_dmamap_unload(sc->sc_dmat, dmap); 1163 } 1164 1165 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1166 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1167 1168 state->finished = true; 1169 1170 nvme_ccb_put(q, ccb); 1171 } 1172 1173 static bool 1174 nvme_pt_finished(void *cookie) 1175 { 1176 struct nvme_pt_state *state = cookie; 1177 1178 return state->finished; 1179 } 1180 1181 static int 1182 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1183 uint16_t nsid, struct lwp *l, bool is_adminq) 1184 { 1185 struct nvme_queue *q; 1186 struct nvme_ccb *ccb; 1187 void *buf = NULL; 1188 struct nvme_pt_state state; 1189 int error; 1190 1191 /* limit command size to maximum data transfer size */ 1192 if ((pt->buf == NULL && pt->len > 0) || 1193 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1194 return EINVAL; 1195 1196 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc, NULL, true); 1197 ccb = nvme_ccb_get(q, true); 1198 KASSERT(ccb != NULL); 1199 1200 if (pt->buf != NULL) { 1201 KASSERT(pt->len > 0); 1202 buf = kmem_alloc(pt->len, KM_SLEEP); 1203 if (!pt->is_read) { 1204 error = copyin(pt->buf, buf, pt->len); 1205 if (error) 1206 goto kmem_free; 1207 } 1208 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1209 pt->len, NULL, 1210 BUS_DMA_WAITOK | 1211 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1212 if (error) 1213 goto kmem_free; 1214 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1215 0, ccb->ccb_dmamap->dm_mapsize, 1216 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1217 } 1218 1219 memset(&state, 0, sizeof(state)); 1220 state.pt = pt; 1221 state.finished = false; 1222 1223 ccb->ccb_done = nvme_pt_done; 1224 ccb->ccb_cookie = &state; 1225 1226 pt->cmd.nsid = nsid; 1227 1228 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1229 1230 /* wait for completion */ 1231 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1232 KASSERT(state.finished); 1233 1234 error = 0; 1235 1236 if (buf != NULL) { 1237 if (error == 0 && pt->is_read) 1238 error = copyout(buf, pt->buf, pt->len); 1239 kmem_free: 1240 kmem_free(buf, pt->len); 1241 } 1242 1243 return error; 1244 } 1245 1246 static void 1247 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1248 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1249 { 1250 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1251 uint32_t tail; 1252 1253 mutex_enter(&q->q_sq_mtx); 1254 tail = q->q_sq_tail; 1255 if (++q->q_sq_tail >= q->q_entries) 1256 q->q_sq_tail = 0; 1257 1258 sqe += tail; 1259 1260 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1261 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1262 memset(sqe, 0, sizeof(*sqe)); 1263 (*fill)(q, ccb, sqe); 1264 htolem16(&sqe->cid, ccb->ccb_id); 1265 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1266 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1267 1268 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail); 1269 mutex_exit(&q->q_sq_mtx); 1270 } 1271 1272 struct nvme_poll_state { 1273 struct nvme_sqe s; 1274 struct nvme_cqe c; 1275 void *cookie; 1276 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1277 }; 1278 1279 static int 1280 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1281 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1282 { 1283 struct nvme_poll_state state; 1284 uint16_t flags; 1285 int step = 10; 1286 int maxloop = timo_sec * 1000000 / step; 1287 int error = 0; 1288 1289 memset(&state, 0, sizeof(state)); 1290 (*fill)(q, ccb, &state.s); 1291 1292 state.done = ccb->ccb_done; 1293 state.cookie = ccb->ccb_cookie; 1294 1295 ccb->ccb_done = nvme_poll_done; 1296 ccb->ccb_cookie = &state; 1297 1298 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1299 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1300 if (nvme_q_complete(sc, q) == 0) 1301 delay(step); 1302 1303 if (timo_sec >= 0 && --maxloop <= 0) { 1304 error = ETIMEDOUT; 1305 break; 1306 } 1307 } 1308 1309 if (error == 0) { 1310 flags = lemtoh16(&state.c.flags); 1311 return flags & ~NVME_CQE_PHASE; 1312 } else { 1313 /* 1314 * If it succeds later, it would hit ccb which will have been 1315 * already reused for something else. Not good. Cross 1316 * fingers and hope for best. XXX do controller reset? 1317 */ 1318 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1319 1320 /* Invoke the callback to clean state anyway */ 1321 struct nvme_cqe cqe; 1322 memset(&cqe, 0, sizeof(cqe)); 1323 ccb->ccb_done(q, ccb, &cqe); 1324 1325 return 1; 1326 } 1327 } 1328 1329 static void 1330 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1331 { 1332 struct nvme_sqe *sqe = slot; 1333 struct nvme_poll_state *state = ccb->ccb_cookie; 1334 1335 *sqe = state->s; 1336 } 1337 1338 static void 1339 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1340 struct nvme_cqe *cqe) 1341 { 1342 struct nvme_poll_state *state = ccb->ccb_cookie; 1343 1344 state->c = *cqe; 1345 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1346 1347 ccb->ccb_cookie = state->cookie; 1348 state->done(q, ccb, &state->c); 1349 } 1350 1351 static void 1352 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1353 { 1354 struct nvme_sqe *src = ccb->ccb_cookie; 1355 struct nvme_sqe *dst = slot; 1356 1357 *dst = *src; 1358 } 1359 1360 static void 1361 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1362 struct nvme_cqe *cqe) 1363 { 1364 } 1365 1366 static int 1367 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1368 { 1369 struct nvme_ccb *ccb; 1370 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1371 uint16_t flags; 1372 int rv = 0; 1373 1374 mutex_enter(&q->q_cq_mtx); 1375 1376 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1377 for (;;) { 1378 cqe = &ring[q->q_cq_head]; 1379 flags = lemtoh16(&cqe->flags); 1380 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1381 break; 1382 1383 ccb = &q->q_ccbs[lemtoh16(&cqe->cid)]; 1384 1385 if (++q->q_cq_head >= q->q_entries) { 1386 q->q_cq_head = 0; 1387 q->q_cq_phase ^= NVME_CQE_PHASE; 1388 } 1389 1390 #ifdef DEBUG 1391 /* 1392 * If we get spurious completion notification, something 1393 * is seriously hosed up. Very likely DMA to some random 1394 * memory place happened, so just bail out. 1395 */ 1396 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1397 panic("%s: invalid ccb detected", 1398 device_xname(sc->sc_dev)); 1399 /* NOTREACHED */ 1400 } 1401 #endif 1402 1403 rv++; 1404 1405 /* 1406 * Unlock the mutex before calling the ccb_done callback 1407 * and re-lock afterwards. The callback triggers lddone() 1408 * which schedules another i/o, and also calls nvme_ccb_put(). 1409 * Unlock/relock avoids possibility of deadlock. 1410 */ 1411 mutex_exit(&q->q_cq_mtx); 1412 ccb->ccb_done(q, ccb, cqe); 1413 mutex_enter(&q->q_cq_mtx); 1414 } 1415 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1416 1417 if (rv) 1418 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1419 1420 mutex_exit(&q->q_cq_mtx); 1421 1422 return rv; 1423 } 1424 1425 static void 1426 nvme_q_wait_complete(struct nvme_softc *sc, 1427 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1428 { 1429 mutex_enter(&q->q_ccb_mtx); 1430 if (finished(cookie)) 1431 goto out; 1432 1433 for(;;) { 1434 q->q_ccb_waiting = true; 1435 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1436 1437 if (finished(cookie)) 1438 break; 1439 } 1440 1441 out: 1442 mutex_exit(&q->q_ccb_mtx); 1443 } 1444 1445 static int 1446 nvme_identify(struct nvme_softc *sc, u_int mps) 1447 { 1448 char sn[41], mn[81], fr[17]; 1449 struct nvm_identify_controller *identify; 1450 struct nvme_dmamem *mem; 1451 struct nvme_ccb *ccb; 1452 u_int mdts; 1453 int rv = 1; 1454 1455 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1456 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1457 1458 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1459 if (mem == NULL) 1460 return 1; 1461 1462 ccb->ccb_done = nvme_empty_done; 1463 ccb->ccb_cookie = mem; 1464 1465 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1466 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1467 NVME_TIMO_IDENT); 1468 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1469 1470 nvme_ccb_put(sc->sc_admin_q, ccb); 1471 1472 if (rv != 0) 1473 goto done; 1474 1475 identify = NVME_DMA_KVA(mem); 1476 sc->sc_identify = *identify; 1477 identify = NULL; 1478 1479 /* Convert data to host endian */ 1480 nvme_identify_controller_swapbytes(&sc->sc_identify); 1481 1482 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1483 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1484 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1485 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1486 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1487 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1488 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1489 sn); 1490 1491 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname)); 1492 1493 if (sc->sc_identify.mdts > 0) { 1494 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1495 if (mdts < sc->sc_mdts) 1496 sc->sc_mdts = mdts; 1497 } 1498 1499 sc->sc_nn = sc->sc_identify.nn; 1500 1501 done: 1502 nvme_dmamem_free(sc, mem); 1503 1504 return rv; 1505 } 1506 1507 static int 1508 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1509 { 1510 struct nvme_sqe_q sqe; 1511 struct nvme_ccb *ccb; 1512 int rv; 1513 1514 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1515 return 1; 1516 1517 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1518 KASSERT(ccb != NULL); 1519 1520 ccb->ccb_done = nvme_empty_done; 1521 ccb->ccb_cookie = &sqe; 1522 1523 memset(&sqe, 0, sizeof(sqe)); 1524 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1525 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1526 htolem16(&sqe.qsize, q->q_entries - 1); 1527 htolem16(&sqe.qid, q->q_id); 1528 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1529 if (sc->sc_use_mq) 1530 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1531 1532 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1533 if (rv != 0) 1534 goto fail; 1535 1536 ccb->ccb_done = nvme_empty_done; 1537 ccb->ccb_cookie = &sqe; 1538 1539 memset(&sqe, 0, sizeof(sqe)); 1540 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1541 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1542 htolem16(&sqe.qsize, q->q_entries - 1); 1543 htolem16(&sqe.qid, q->q_id); 1544 htolem16(&sqe.cqid, q->q_id); 1545 sqe.qflags = NVM_SQE_Q_PC; 1546 1547 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1548 if (rv != 0) 1549 goto fail; 1550 1551 nvme_ccb_put(sc->sc_admin_q, ccb); 1552 return 0; 1553 1554 fail: 1555 if (sc->sc_use_mq) 1556 sc->sc_intr_disestablish(sc, q->q_id); 1557 1558 nvme_ccb_put(sc->sc_admin_q, ccb); 1559 return rv; 1560 } 1561 1562 static int 1563 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1564 { 1565 struct nvme_sqe_q sqe; 1566 struct nvme_ccb *ccb; 1567 int rv; 1568 1569 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1570 KASSERT(ccb != NULL); 1571 1572 ccb->ccb_done = nvme_empty_done; 1573 ccb->ccb_cookie = &sqe; 1574 1575 memset(&sqe, 0, sizeof(sqe)); 1576 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1577 htolem16(&sqe.qid, q->q_id); 1578 1579 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1580 if (rv != 0) 1581 goto fail; 1582 1583 ccb->ccb_done = nvme_empty_done; 1584 ccb->ccb_cookie = &sqe; 1585 1586 memset(&sqe, 0, sizeof(sqe)); 1587 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1588 htolem16(&sqe.qid, q->q_id); 1589 1590 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1591 if (rv != 0) 1592 goto fail; 1593 1594 fail: 1595 nvme_ccb_put(sc->sc_admin_q, ccb); 1596 1597 if (rv == 0 && sc->sc_use_mq) { 1598 if (sc->sc_intr_disestablish(sc, q->q_id)) 1599 rv = 1; 1600 } 1601 1602 return rv; 1603 } 1604 1605 static void 1606 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1607 { 1608 struct nvme_sqe *sqe = slot; 1609 struct nvme_dmamem *mem = ccb->ccb_cookie; 1610 1611 sqe->opcode = NVM_ADMIN_IDENTIFY; 1612 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1613 htolem32(&sqe->cdw10, 1); 1614 } 1615 1616 static int 1617 nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa, 1618 u_int *nsqa) 1619 { 1620 struct nvme_pt_state state; 1621 struct nvme_pt_command pt; 1622 struct nvme_ccb *ccb; 1623 int rv; 1624 1625 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1626 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1627 1628 memset(&pt, 0, sizeof(pt)); 1629 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES; 1630 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1631 pt.cmd.cdw11 = ((nq - 1) << 16) | (nq - 1); 1632 1633 memset(&state, 0, sizeof(state)); 1634 state.pt = &pt; 1635 state.finished = false; 1636 1637 ccb->ccb_done = nvme_pt_done; 1638 ccb->ccb_cookie = &state; 1639 1640 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1641 1642 if (rv != 0) { 1643 *ncqa = *nsqa = 0; 1644 return EIO; 1645 } 1646 1647 *ncqa = (pt.cpl.cdw0 >> 16) + 1; 1648 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1; 1649 1650 return 0; 1651 } 1652 1653 static int 1654 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1655 { 1656 struct nvme_softc *sc = q->q_sc; 1657 struct nvme_ccb *ccb; 1658 bus_addr_t off; 1659 uint64_t *prpl; 1660 u_int i; 1661 1662 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1663 cv_init(&q->q_ccb_wait, "nvmeqw"); 1664 q->q_ccb_waiting = false; 1665 SIMPLEQ_INIT(&q->q_ccb_list); 1666 1667 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1668 1669 q->q_nccbs = nccbs; 1670 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1671 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1672 1673 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1674 off = 0; 1675 1676 for (i = 0; i < nccbs; i++) { 1677 ccb = &q->q_ccbs[i]; 1678 1679 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1680 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1681 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1682 &ccb->ccb_dmamap) != 0) 1683 goto free_maps; 1684 1685 ccb->ccb_id = i; 1686 ccb->ccb_prpl = prpl; 1687 ccb->ccb_prpl_off = off; 1688 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1689 1690 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1691 1692 prpl += sc->sc_max_sgl; 1693 off += sizeof(*prpl) * sc->sc_max_sgl; 1694 } 1695 1696 return 0; 1697 1698 free_maps: 1699 nvme_ccbs_free(q); 1700 return 1; 1701 } 1702 1703 static struct nvme_ccb * 1704 nvme_ccb_get(struct nvme_queue *q, bool wait) 1705 { 1706 struct nvme_ccb *ccb = NULL; 1707 1708 mutex_enter(&q->q_ccb_mtx); 1709 again: 1710 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1711 if (ccb != NULL) { 1712 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1713 #ifdef DEBUG 1714 ccb->ccb_cookie = NULL; 1715 #endif 1716 } else { 1717 if (__predict_false(wait)) { 1718 q->q_ccb_waiting = true; 1719 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1720 goto again; 1721 } 1722 } 1723 mutex_exit(&q->q_ccb_mtx); 1724 1725 return ccb; 1726 } 1727 1728 static void 1729 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1730 { 1731 1732 mutex_enter(&q->q_ccb_mtx); 1733 #ifdef DEBUG 1734 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1735 #endif 1736 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1737 1738 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1739 if (__predict_false(q->q_ccb_waiting)) { 1740 q->q_ccb_waiting = false; 1741 cv_broadcast(&q->q_ccb_wait); 1742 } 1743 1744 mutex_exit(&q->q_ccb_mtx); 1745 } 1746 1747 static void 1748 nvme_ccbs_free(struct nvme_queue *q) 1749 { 1750 struct nvme_softc *sc = q->q_sc; 1751 struct nvme_ccb *ccb; 1752 1753 mutex_enter(&q->q_ccb_mtx); 1754 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1755 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1756 /* 1757 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter() 1758 * internally. don't hold spin mutex 1759 */ 1760 mutex_exit(&q->q_ccb_mtx); 1761 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1762 mutex_enter(&q->q_ccb_mtx); 1763 } 1764 mutex_exit(&q->q_ccb_mtx); 1765 1766 nvme_dmamem_free(sc, q->q_ccb_prpls); 1767 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1768 q->q_ccbs = NULL; 1769 cv_destroy(&q->q_ccb_wait); 1770 mutex_destroy(&q->q_ccb_mtx); 1771 } 1772 1773 static struct nvme_queue * 1774 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1775 { 1776 struct nvme_queue *q; 1777 1778 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1779 q->q_sc = sc; 1780 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1781 sizeof(struct nvme_sqe) * entries); 1782 if (q->q_sq_dmamem == NULL) 1783 goto free; 1784 1785 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1786 sizeof(struct nvme_cqe) * entries); 1787 if (q->q_cq_dmamem == NULL) 1788 goto free_sq; 1789 1790 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1791 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1792 1793 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1794 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1795 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1796 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1797 q->q_id = id; 1798 q->q_entries = entries; 1799 q->q_sq_tail = 0; 1800 q->q_cq_head = 0; 1801 q->q_cq_phase = NVME_CQE_PHASE; 1802 1803 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1804 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1805 1806 /* 1807 * Due to definition of full and empty queue (queue is empty 1808 * when head == tail, full when tail is one less then head), 1809 * we can actually only have (entries - 1) in-flight commands. 1810 */ 1811 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1812 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1813 goto free_cq; 1814 } 1815 1816 return q; 1817 1818 free_cq: 1819 nvme_dmamem_free(sc, q->q_cq_dmamem); 1820 free_sq: 1821 nvme_dmamem_free(sc, q->q_sq_dmamem); 1822 free: 1823 kmem_free(q, sizeof(*q)); 1824 1825 return NULL; 1826 } 1827 1828 static void 1829 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1830 { 1831 nvme_ccbs_free(q); 1832 mutex_destroy(&q->q_sq_mtx); 1833 mutex_destroy(&q->q_cq_mtx); 1834 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1835 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1836 nvme_dmamem_free(sc, q->q_cq_dmamem); 1837 nvme_dmamem_free(sc, q->q_sq_dmamem); 1838 kmem_free(q, sizeof(*q)); 1839 } 1840 1841 int 1842 nvme_intr(void *xsc) 1843 { 1844 struct nvme_softc *sc = xsc; 1845 1846 /* 1847 * INTx is level triggered, controller deasserts the interrupt only 1848 * when we advance command queue head via write to the doorbell. 1849 * Tell the controller to block the interrupts while we process 1850 * the queue(s). 1851 */ 1852 nvme_write4(sc, NVME_INTMS, 1); 1853 1854 softint_schedule(sc->sc_softih[0]); 1855 1856 /* don't know, might not have been for us */ 1857 return 1; 1858 } 1859 1860 void 1861 nvme_softintr_intx(void *xq) 1862 { 1863 struct nvme_queue *q = xq; 1864 struct nvme_softc *sc = q->q_sc; 1865 1866 nvme_q_complete(sc, sc->sc_admin_q); 1867 if (sc->sc_q != NULL) 1868 nvme_q_complete(sc, sc->sc_q[0]); 1869 1870 /* 1871 * Processing done, tell controller to issue interrupts again. There 1872 * is no race, as NVMe spec requires the controller to maintain state, 1873 * and assert the interrupt whenever there are unacknowledged 1874 * completion queue entries. 1875 */ 1876 nvme_write4(sc, NVME_INTMC, 1); 1877 } 1878 1879 int 1880 nvme_intr_msi(void *xq) 1881 { 1882 struct nvme_queue *q = xq; 1883 1884 KASSERT(q && q->q_sc && q->q_sc->sc_softih 1885 && q->q_sc->sc_softih[q->q_id]); 1886 1887 /* 1888 * MSI/MSI-X are edge triggered, so can handover processing to softint 1889 * without masking the interrupt. 1890 */ 1891 softint_schedule(q->q_sc->sc_softih[q->q_id]); 1892 1893 return 1; 1894 } 1895 1896 void 1897 nvme_softintr_msi(void *xq) 1898 { 1899 struct nvme_queue *q = xq; 1900 struct nvme_softc *sc = q->q_sc; 1901 1902 nvme_q_complete(sc, q); 1903 } 1904 1905 static struct nvme_dmamem * 1906 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1907 { 1908 struct nvme_dmamem *ndm; 1909 int nsegs; 1910 1911 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 1912 if (ndm == NULL) 1913 return NULL; 1914 1915 ndm->ndm_size = size; 1916 1917 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0, 1918 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 1919 goto ndmfree; 1920 1921 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 1922 1, &nsegs, BUS_DMA_WAITOK) != 0) 1923 goto destroy; 1924 1925 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 1926 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 1927 goto free; 1928 1929 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 1930 NULL, BUS_DMA_WAITOK) != 0) 1931 goto unmap; 1932 1933 memset(ndm->ndm_kva, 0, size); 1934 bus_dmamap_sync(sc->sc_dmat, ndm->ndm_map, 0, size, BUS_DMASYNC_PREREAD); 1935 1936 return ndm; 1937 1938 unmap: 1939 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 1940 free: 1941 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1942 destroy: 1943 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1944 ndmfree: 1945 kmem_free(ndm, sizeof(*ndm)); 1946 return NULL; 1947 } 1948 1949 static void 1950 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 1951 { 1952 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 1953 0, NVME_DMA_LEN(mem), ops); 1954 } 1955 1956 void 1957 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 1958 { 1959 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 1960 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 1961 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1962 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1963 kmem_free(ndm, sizeof(*ndm)); 1964 } 1965 1966 /* 1967 * ioctl 1968 */ 1969 1970 dev_type_open(nvmeopen); 1971 dev_type_close(nvmeclose); 1972 dev_type_ioctl(nvmeioctl); 1973 1974 const struct cdevsw nvme_cdevsw = { 1975 .d_open = nvmeopen, 1976 .d_close = nvmeclose, 1977 .d_read = noread, 1978 .d_write = nowrite, 1979 .d_ioctl = nvmeioctl, 1980 .d_stop = nostop, 1981 .d_tty = notty, 1982 .d_poll = nopoll, 1983 .d_mmap = nommap, 1984 .d_kqfilter = nokqfilter, 1985 .d_discard = nodiscard, 1986 .d_flag = D_OTHER, 1987 }; 1988 1989 /* 1990 * Accept an open operation on the control device. 1991 */ 1992 int 1993 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 1994 { 1995 struct nvme_softc *sc; 1996 int unit = minor(dev) / 0x10000; 1997 int nsid = minor(dev) & 0xffff; 1998 int nsidx; 1999 2000 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 2001 return ENXIO; 2002 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 2003 return ENXIO; 2004 2005 if (nsid == 0) { 2006 /* controller */ 2007 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 2008 return EBUSY; 2009 SET(sc->sc_flags, NVME_F_OPEN); 2010 } else { 2011 /* namespace */ 2012 nsidx = nsid - 1; 2013 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 2014 return ENXIO; 2015 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 2016 return EBUSY; 2017 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2018 } 2019 return 0; 2020 } 2021 2022 /* 2023 * Accept the last close on the control device. 2024 */ 2025 int 2026 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 2027 { 2028 struct nvme_softc *sc; 2029 int unit = minor(dev) / 0x10000; 2030 int nsid = minor(dev) & 0xffff; 2031 int nsidx; 2032 2033 sc = device_lookup_private(&nvme_cd, unit); 2034 if (sc == NULL) 2035 return ENXIO; 2036 2037 if (nsid == 0) { 2038 /* controller */ 2039 CLR(sc->sc_flags, NVME_F_OPEN); 2040 } else { 2041 /* namespace */ 2042 nsidx = nsid - 1; 2043 if (nsidx >= sc->sc_nn) 2044 return ENXIO; 2045 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2046 } 2047 2048 return 0; 2049 } 2050 2051 /* 2052 * Handle control operations. 2053 */ 2054 int 2055 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 2056 { 2057 struct nvme_softc *sc; 2058 int unit = minor(dev) / 0x10000; 2059 int nsid = minor(dev) & 0xffff; 2060 struct nvme_pt_command *pt; 2061 2062 sc = device_lookup_private(&nvme_cd, unit); 2063 if (sc == NULL) 2064 return ENXIO; 2065 2066 switch (cmd) { 2067 case NVME_PASSTHROUGH_CMD: 2068 pt = data; 2069 return nvme_command_passthrough(sc, data, 2070 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0); 2071 } 2072 2073 return ENOTTY; 2074 } 2075