1 /* $NetBSD: nvme.c,v 1.55 2021/04/24 23:36:55 thorpej Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.55 2021/04/24 23:36:55 thorpej Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 #include "ioconf.h" 44 #include "locators.h" 45 46 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 47 48 int nvme_adminq_size = 32; 49 int nvme_ioq_size = 1024; 50 51 static int nvme_print(void *, const char *); 52 53 static int nvme_ready(struct nvme_softc *, uint32_t); 54 static int nvme_enable(struct nvme_softc *, u_int); 55 static int nvme_disable(struct nvme_softc *); 56 static int nvme_shutdown(struct nvme_softc *); 57 58 #ifdef NVME_DEBUG 59 static void nvme_dumpregs(struct nvme_softc *); 60 #endif 61 static int nvme_identify(struct nvme_softc *, u_int); 62 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 63 void *); 64 65 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 66 static void nvme_ccbs_free(struct nvme_queue *); 67 68 static struct nvme_ccb * 69 nvme_ccb_get(struct nvme_queue *, bool); 70 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 71 72 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 73 struct nvme_ccb *, void (*)(struct nvme_queue *, 74 struct nvme_ccb *, void *), int); 75 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 76 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 77 struct nvme_cqe *); 78 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 79 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 80 struct nvme_cqe *); 81 82 static struct nvme_queue * 83 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 84 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 85 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 86 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 87 struct nvme_ccb *, void (*)(struct nvme_queue *, 88 struct nvme_ccb *, void *)); 89 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 90 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 91 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 92 bool (*)(void *), void *); 93 94 static struct nvme_dmamem * 95 nvme_dmamem_alloc(struct nvme_softc *, size_t); 96 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *); 97 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, 98 int); 99 100 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 101 void *); 102 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 103 struct nvme_cqe *); 104 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 105 void *); 106 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 107 struct nvme_cqe *); 108 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 109 void *); 110 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 111 struct nvme_cqe *); 112 113 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 114 void *); 115 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 116 struct nvme_cqe *); 117 static int nvme_command_passthrough(struct nvme_softc *, 118 struct nvme_pt_command *, uint16_t, struct lwp *, bool); 119 120 static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *, 121 u_int *); 122 123 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 124 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 125 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 126 #define NVME_TIMO_SY 60 /* sync cache timeout */ 127 128 #define nvme_read4(_s, _r) \ 129 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r)) 130 #define nvme_write4(_s, _r, _v) \ 131 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 132 /* 133 * Some controllers, at least Apple NVMe, always require split 134 * transfers, so don't use bus_space_{read,write}_8() on LP64. 135 */ 136 static inline uint64_t 137 nvme_read8(struct nvme_softc *sc, bus_size_t r) 138 { 139 uint64_t v; 140 uint32_t *a = (uint32_t *)&v; 141 142 #if _BYTE_ORDER == _LITTLE_ENDIAN 143 a[0] = nvme_read4(sc, r); 144 a[1] = nvme_read4(sc, r + 4); 145 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 146 a[1] = nvme_read4(sc, r); 147 a[0] = nvme_read4(sc, r + 4); 148 #endif 149 150 return v; 151 } 152 153 static inline void 154 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 155 { 156 uint32_t *a = (uint32_t *)&v; 157 158 #if _BYTE_ORDER == _LITTLE_ENDIAN 159 nvme_write4(sc, r, a[0]); 160 nvme_write4(sc, r + 4, a[1]); 161 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 162 nvme_write4(sc, r, a[1]); 163 nvme_write4(sc, r + 4, a[0]); 164 #endif 165 } 166 #define nvme_barrier(_s, _r, _l, _f) \ 167 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f)) 168 169 #ifdef NVME_DEBUG 170 static __used void 171 nvme_dumpregs(struct nvme_softc *sc) 172 { 173 uint64_t r8; 174 uint32_t r4; 175 176 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 177 r8 = nvme_read8(sc, NVME_CAP); 178 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 179 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 180 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 181 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 182 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 183 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 184 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 185 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 186 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 187 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 188 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 189 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 190 191 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 192 193 r4 = nvme_read4(sc, NVME_CC); 194 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 195 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 196 (1 << NVME_CC_IOCQES_R(r4))); 197 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 198 (1 << NVME_CC_IOSQES_R(r4))); 199 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 200 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 201 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 202 (1 << NVME_CC_MPS_R(r4))); 203 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 204 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 205 206 r4 = nvme_read4(sc, NVME_CSTS); 207 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 208 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 209 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 210 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 211 212 r4 = nvme_read4(sc, NVME_AQA); 213 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 214 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 215 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 216 217 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 218 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 219 #undef DEVNAME 220 } 221 #endif /* NVME_DEBUG */ 222 223 static int 224 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 225 { 226 u_int i = 0; 227 228 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 229 if (i++ > sc->sc_rdy_to) 230 return ENXIO; 231 232 delay(1000); 233 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 234 } 235 236 return 0; 237 } 238 239 static int 240 nvme_enable(struct nvme_softc *sc, u_int mps) 241 { 242 uint32_t cc, csts; 243 int error; 244 245 cc = nvme_read4(sc, NVME_CC); 246 csts = nvme_read4(sc, NVME_CSTS); 247 248 /* 249 * See note in nvme_disable. Short circuit if we're already enabled. 250 */ 251 if (ISSET(cc, NVME_CC_EN)) { 252 if (ISSET(csts, NVME_CSTS_RDY)) 253 return 0; 254 255 goto waitready; 256 } else { 257 /* EN == 0 already wait for RDY == 0 or fail */ 258 error = nvme_ready(sc, 0); 259 if (error) 260 return error; 261 } 262 263 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 264 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 265 delay(5000); 266 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 267 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 268 delay(5000); 269 270 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 271 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 272 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 273 delay(5000); 274 275 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 276 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 277 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 278 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 279 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 280 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 281 SET(cc, NVME_CC_MPS(mps)); 282 SET(cc, NVME_CC_EN); 283 284 nvme_write4(sc, NVME_CC, cc); 285 nvme_barrier(sc, 0, sc->sc_ios, 286 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 287 288 waitready: 289 return nvme_ready(sc, NVME_CSTS_RDY); 290 } 291 292 static int 293 nvme_disable(struct nvme_softc *sc) 294 { 295 uint32_t cc, csts; 296 int error; 297 298 cc = nvme_read4(sc, NVME_CC); 299 csts = nvme_read4(sc, NVME_CSTS); 300 301 /* 302 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 303 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 304 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 305 * isn't the desired value. Short circuit if we're already disabled. 306 */ 307 if (ISSET(cc, NVME_CC_EN)) { 308 if (!ISSET(csts, NVME_CSTS_RDY)) { 309 /* EN == 1, wait for RDY == 1 or fail */ 310 error = nvme_ready(sc, NVME_CSTS_RDY); 311 if (error) 312 return error; 313 } 314 } else { 315 /* EN == 0 already wait for RDY == 0 */ 316 if (!ISSET(csts, NVME_CSTS_RDY)) 317 return 0; 318 319 goto waitready; 320 } 321 322 CLR(cc, NVME_CC_EN); 323 nvme_write4(sc, NVME_CC, cc); 324 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 325 326 /* 327 * Some drives have issues with accessing the mmio after we disable, 328 * so delay for a bit after we write the bit to cope with these issues. 329 */ 330 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 331 delay(B4_CHK_RDY_DELAY_MS); 332 333 waitready: 334 return nvme_ready(sc, 0); 335 } 336 337 int 338 nvme_attach(struct nvme_softc *sc) 339 { 340 uint64_t cap; 341 uint32_t reg; 342 u_int dstrd; 343 u_int mps = PAGE_SHIFT; 344 u_int ncq, nsq; 345 uint16_t adminq_entries = nvme_adminq_size; 346 uint16_t ioq_entries = nvme_ioq_size; 347 int i; 348 349 reg = nvme_read4(sc, NVME_VS); 350 if (reg == 0xffffffff) { 351 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 352 return 1; 353 } 354 355 if (NVME_VS_TER(reg) == 0) 356 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 357 NVME_VS_MNR(reg)); 358 else 359 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 360 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 361 362 cap = nvme_read8(sc, NVME_CAP); 363 dstrd = NVME_CAP_DSTRD(cap); 364 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 365 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 366 "is greater than CPU page size %u\n", 367 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 368 return 1; 369 } 370 if (NVME_CAP_MPSMAX(cap) < mps) 371 mps = NVME_CAP_MPSMAX(cap); 372 if (ioq_entries > NVME_CAP_MQES(cap)) 373 ioq_entries = NVME_CAP_MQES(cap); 374 375 /* set initial values to be used for admin queue during probe */ 376 sc->sc_rdy_to = NVME_CAP_TO(cap); 377 sc->sc_mps = 1 << mps; 378 sc->sc_mdts = MAXPHYS; 379 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts)); 380 381 if (nvme_disable(sc) != 0) { 382 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 383 return 1; 384 } 385 386 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd); 387 if (sc->sc_admin_q == NULL) { 388 aprint_error_dev(sc->sc_dev, 389 "unable to allocate admin queue\n"); 390 return 1; 391 } 392 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 393 goto free_admin_q; 394 395 if (nvme_enable(sc, mps) != 0) { 396 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 397 goto disestablish_admin_q; 398 } 399 400 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 401 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 402 goto disable; 403 } 404 if (sc->sc_nn == 0) { 405 aprint_error_dev(sc->sc_dev, "namespace not found\n"); 406 goto disable; 407 } 408 409 /* we know how big things are now */ 410 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 411 412 /* reallocate ccbs of admin queue with new max sgl. */ 413 nvme_ccbs_free(sc->sc_admin_q); 414 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 415 416 if (sc->sc_use_mq) { 417 /* Limit the number of queues to the number allocated in HW */ 418 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) { 419 aprint_error_dev(sc->sc_dev, 420 "unable to get number of queues\n"); 421 goto disable; 422 } 423 if (sc->sc_nq > ncq) 424 sc->sc_nq = ncq; 425 if (sc->sc_nq > nsq) 426 sc->sc_nq = nsq; 427 } 428 429 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 430 for (i = 0; i < sc->sc_nq; i++) { 431 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd); 432 if (sc->sc_q[i] == NULL) { 433 aprint_error_dev(sc->sc_dev, 434 "unable to allocate io queue\n"); 435 goto free_q; 436 } 437 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 438 aprint_error_dev(sc->sc_dev, 439 "unable to create io queue\n"); 440 nvme_q_free(sc, sc->sc_q[i]); 441 goto free_q; 442 } 443 } 444 445 if (!sc->sc_use_mq) 446 nvme_write4(sc, NVME_INTMC, 1); 447 448 /* probe subdevices */ 449 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 450 KM_SLEEP); 451 nvme_rescan(sc->sc_dev, NULL, NULL); 452 453 return 0; 454 455 free_q: 456 while (--i >= 0) { 457 nvme_q_delete(sc, sc->sc_q[i]); 458 nvme_q_free(sc, sc->sc_q[i]); 459 } 460 disable: 461 nvme_disable(sc); 462 disestablish_admin_q: 463 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 464 free_admin_q: 465 nvme_q_free(sc, sc->sc_admin_q); 466 467 return 1; 468 } 469 470 int 471 nvme_rescan(device_t self, const char *ifattr, const int *locs) 472 { 473 struct nvme_softc *sc = device_private(self); 474 struct nvme_attach_args naa; 475 struct nvm_namespace_format *f; 476 struct nvme_namespace *ns; 477 uint64_t cap; 478 int ioq_entries = nvme_ioq_size; 479 int i, mlocs[NVMECF_NLOCS]; 480 int error; 481 482 cap = nvme_read8(sc, NVME_CAP); 483 if (ioq_entries > NVME_CAP_MQES(cap)) 484 ioq_entries = NVME_CAP_MQES(cap); 485 486 for (i = 1; i <= sc->sc_nn; i++) { 487 if (sc->sc_namespaces[i - 1].dev) 488 continue; 489 490 /* identify to check for availability */ 491 error = nvme_ns_identify(sc, i); 492 if (error) { 493 aprint_error_dev(self, "couldn't identify namespace #%d\n", i); 494 continue; 495 } 496 497 ns = nvme_ns_get(sc, i); 498 KASSERT(ns); 499 500 f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)]; 501 502 /* 503 * NVME1.0e 6.11 Identify command 504 * 505 * LBADS values smaller than 9 are not supported, a value 506 * of zero means that the format is not used. 507 */ 508 if (f->lbads < 9) { 509 if (f->lbads > 0) 510 aprint_error_dev(self, 511 "unsupported logical data size %u\n", f->lbads); 512 continue; 513 } 514 515 mlocs[NVMECF_NSID] = i; 516 517 memset(&naa, 0, sizeof(naa)); 518 naa.naa_nsid = i; 519 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 520 naa.naa_maxphys = sc->sc_mdts; 521 naa.naa_typename = sc->sc_modelname; 522 sc->sc_namespaces[i - 1].dev = 523 config_found(sc->sc_dev, &naa, nvme_print, 524 CFARG_SUBMATCH, config_stdsubmatch, 525 CFARG_LOCATORS, mlocs, 526 CFARG_EOL); 527 } 528 return 0; 529 } 530 531 static int 532 nvme_print(void *aux, const char *pnp) 533 { 534 struct nvme_attach_args *naa = aux; 535 536 if (pnp) 537 aprint_normal("ld at %s", pnp); 538 539 if (naa->naa_nsid > 0) 540 aprint_normal(" nsid %d", naa->naa_nsid); 541 542 return UNCONF; 543 } 544 545 int 546 nvme_detach(struct nvme_softc *sc, int flags) 547 { 548 int i, error; 549 550 error = config_detach_children(sc->sc_dev, flags); 551 if (error) 552 return error; 553 554 error = nvme_shutdown(sc); 555 if (error) 556 return error; 557 558 /* from now on we are committed to detach, following will never fail */ 559 for (i = 0; i < sc->sc_nq; i++) 560 nvme_q_free(sc, sc->sc_q[i]); 561 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 562 nvme_q_free(sc, sc->sc_admin_q); 563 564 return 0; 565 } 566 567 static int 568 nvme_shutdown(struct nvme_softc *sc) 569 { 570 uint32_t cc, csts; 571 bool disabled = false; 572 int i; 573 574 if (!sc->sc_use_mq) 575 nvme_write4(sc, NVME_INTMS, 1); 576 577 for (i = 0; i < sc->sc_nq; i++) { 578 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 579 aprint_error_dev(sc->sc_dev, 580 "unable to delete io queue %d, disabling\n", i + 1); 581 disabled = true; 582 } 583 } 584 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 585 if (disabled) 586 goto disable; 587 588 cc = nvme_read4(sc, NVME_CC); 589 CLR(cc, NVME_CC_SHN_MASK); 590 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 591 nvme_write4(sc, NVME_CC, cc); 592 593 for (i = 0; i < 4000; i++) { 594 nvme_barrier(sc, 0, sc->sc_ios, 595 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 596 csts = nvme_read4(sc, NVME_CSTS); 597 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 598 return 0; 599 600 delay(1000); 601 } 602 603 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 604 605 disable: 606 nvme_disable(sc); 607 return 0; 608 } 609 610 void 611 nvme_childdet(device_t self, device_t child) 612 { 613 struct nvme_softc *sc = device_private(self); 614 int i; 615 616 for (i = 0; i < sc->sc_nn; i++) { 617 if (sc->sc_namespaces[i].dev == child) { 618 /* Already freed ns->ident. */ 619 sc->sc_namespaces[i].dev = NULL; 620 break; 621 } 622 } 623 } 624 625 int 626 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 627 { 628 struct nvme_sqe sqe; 629 struct nvm_identify_namespace *identify; 630 struct nvme_dmamem *mem; 631 struct nvme_ccb *ccb; 632 struct nvme_namespace *ns; 633 int rv; 634 635 KASSERT(nsid > 0); 636 637 ns = nvme_ns_get(sc, nsid); 638 KASSERT(ns); 639 640 if (ns->ident != NULL) 641 return 0; 642 643 ccb = nvme_ccb_get(sc->sc_admin_q, false); 644 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 645 646 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 647 if (mem == NULL) { 648 nvme_ccb_put(sc->sc_admin_q, ccb); 649 return ENOMEM; 650 } 651 652 memset(&sqe, 0, sizeof(sqe)); 653 sqe.opcode = NVM_ADMIN_IDENTIFY; 654 htolem32(&sqe.nsid, nsid); 655 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 656 htolem32(&sqe.cdw10, 0); 657 658 ccb->ccb_done = nvme_empty_done; 659 ccb->ccb_cookie = &sqe; 660 661 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 662 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 663 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 664 665 nvme_ccb_put(sc->sc_admin_q, ccb); 666 667 if (rv != 0) { 668 rv = EIO; 669 goto done; 670 } 671 672 /* commit */ 673 674 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 675 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 676 677 /* Convert data to host endian */ 678 nvme_identify_namespace_swapbytes(identify); 679 680 ns->ident = identify; 681 682 done: 683 nvme_dmamem_free(sc, mem); 684 685 return rv; 686 } 687 688 int 689 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 690 struct buf *bp, void *data, size_t datasize, 691 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 692 { 693 struct nvme_queue *q = nvme_get_q(sc, bp, false); 694 struct nvme_ccb *ccb; 695 bus_dmamap_t dmap; 696 int i, error; 697 698 ccb = nvme_ccb_get(q, false); 699 if (ccb == NULL) 700 return EAGAIN; 701 702 ccb->ccb_done = nvme_ns_io_done; 703 ccb->ccb_cookie = cookie; 704 705 /* namespace context */ 706 ccb->nnc_nsid = nsid; 707 ccb->nnc_flags = flags; 708 ccb->nnc_buf = bp; 709 ccb->nnc_datasize = datasize; 710 ccb->nnc_secsize = secsize; 711 ccb->nnc_blkno = blkno; 712 ccb->nnc_done = nnc_done; 713 714 dmap = ccb->ccb_dmamap; 715 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 716 datasize, NULL, 717 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 718 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 719 (ISSET(flags, NVME_NS_CTX_F_READ) ? 720 BUS_DMA_READ : BUS_DMA_WRITE)); 721 if (error) { 722 nvme_ccb_put(q, ccb); 723 return error; 724 } 725 726 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 727 ISSET(flags, NVME_NS_CTX_F_READ) ? 728 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 729 730 if (dmap->dm_nsegs > 2) { 731 for (i = 1; i < dmap->dm_nsegs; i++) { 732 htolem64(&ccb->ccb_prpl[i - 1], 733 dmap->dm_segs[i].ds_addr); 734 } 735 bus_dmamap_sync(sc->sc_dmat, 736 NVME_DMA_MAP(q->q_ccb_prpls), 737 ccb->ccb_prpl_off, 738 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 739 BUS_DMASYNC_PREWRITE); 740 } 741 742 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 743 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 744 return EIO; 745 return 0; 746 } 747 748 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 749 return 0; 750 } 751 752 static void 753 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 754 { 755 struct nvme_sqe_io *sqe = slot; 756 bus_dmamap_t dmap = ccb->ccb_dmamap; 757 758 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 759 NVM_CMD_READ : NVM_CMD_WRITE; 760 htolem32(&sqe->nsid, ccb->nnc_nsid); 761 762 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 763 switch (dmap->dm_nsegs) { 764 case 1: 765 break; 766 case 2: 767 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 768 break; 769 default: 770 /* the prp list is already set up and synced */ 771 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 772 break; 773 } 774 775 htolem64(&sqe->slba, ccb->nnc_blkno); 776 777 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 778 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 779 780 /* guaranteed by upper layers, but check just in case */ 781 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 782 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 783 } 784 785 static void 786 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 787 struct nvme_cqe *cqe) 788 { 789 struct nvme_softc *sc = q->q_sc; 790 bus_dmamap_t dmap = ccb->ccb_dmamap; 791 void *nnc_cookie = ccb->ccb_cookie; 792 nvme_nnc_done nnc_done = ccb->nnc_done; 793 struct buf *bp = ccb->nnc_buf; 794 795 if (dmap->dm_nsegs > 2) { 796 bus_dmamap_sync(sc->sc_dmat, 797 NVME_DMA_MAP(q->q_ccb_prpls), 798 ccb->ccb_prpl_off, 799 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 800 BUS_DMASYNC_POSTWRITE); 801 } 802 803 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 804 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 805 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 806 807 bus_dmamap_unload(sc->sc_dmat, dmap); 808 nvme_ccb_put(q, ccb); 809 810 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 811 } 812 813 /* 814 * If there is no volatile write cache, it makes no sense to issue 815 * flush commands or query for the status. 816 */ 817 static bool 818 nvme_has_volatile_write_cache(struct nvme_softc *sc) 819 { 820 /* sc_identify is filled during attachment */ 821 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 822 } 823 824 static bool 825 nvme_ns_sync_finished(void *cookie) 826 { 827 int *result = cookie; 828 829 return (*result != 0); 830 } 831 832 int 833 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 834 { 835 struct nvme_queue *q = nvme_get_q(sc, NULL, true); 836 struct nvme_ccb *ccb; 837 int result = 0; 838 839 if (!nvme_has_volatile_write_cache(sc)) { 840 /* cache not present, no value in trying to flush it */ 841 return 0; 842 } 843 844 ccb = nvme_ccb_get(q, true); 845 KASSERT(ccb != NULL); 846 847 ccb->ccb_done = nvme_ns_sync_done; 848 ccb->ccb_cookie = &result; 849 850 /* namespace context */ 851 ccb->nnc_nsid = nsid; 852 ccb->nnc_flags = flags; 853 ccb->nnc_done = NULL; 854 855 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 856 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 857 return EIO; 858 return 0; 859 } 860 861 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 862 863 /* wait for completion */ 864 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 865 KASSERT(result != 0); 866 867 return (result > 0) ? 0 : EIO; 868 } 869 870 static void 871 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 872 { 873 struct nvme_sqe *sqe = slot; 874 875 sqe->opcode = NVM_CMD_FLUSH; 876 htolem32(&sqe->nsid, ccb->nnc_nsid); 877 } 878 879 static void 880 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 881 struct nvme_cqe *cqe) 882 { 883 int *result = ccb->ccb_cookie; 884 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 885 886 if (status == NVME_CQE_SC_SUCCESS) 887 *result = 1; 888 else 889 *result = -1; 890 891 nvme_ccb_put(q, ccb); 892 } 893 894 static bool 895 nvme_getcache_finished(void *xc) 896 { 897 int *addr = xc; 898 899 return (*addr != 0); 900 } 901 902 /* 903 * Get status of volatile write cache. Always asynchronous. 904 */ 905 int 906 nvme_admin_getcache(struct nvme_softc *sc, int *addr) 907 { 908 struct nvme_ccb *ccb; 909 struct nvme_queue *q = sc->sc_admin_q; 910 int result = 0, error; 911 912 if (!nvme_has_volatile_write_cache(sc)) { 913 /* cache simply not present */ 914 *addr = 0; 915 return 0; 916 } 917 918 ccb = nvme_ccb_get(q, true); 919 KASSERT(ccb != NULL); 920 921 ccb->ccb_done = nvme_getcache_done; 922 ccb->ccb_cookie = &result; 923 924 /* namespace context */ 925 ccb->nnc_flags = 0; 926 ccb->nnc_done = NULL; 927 928 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 929 930 /* wait for completion */ 931 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 932 KASSERT(result != 0); 933 934 if (result > 0) { 935 *addr = result; 936 error = 0; 937 } else 938 error = EINVAL; 939 940 return error; 941 } 942 943 static void 944 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 945 { 946 struct nvme_sqe *sqe = slot; 947 948 sqe->opcode = NVM_ADMIN_GET_FEATURES; 949 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 950 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 951 } 952 953 static void 954 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 955 struct nvme_cqe *cqe) 956 { 957 int *addr = ccb->ccb_cookie; 958 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 959 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 960 int result; 961 962 if (status == NVME_CQE_SC_SUCCESS) { 963 result = 0; 964 965 /* 966 * DPO not supported, Dataset Management (DSM) field doesn't 967 * specify the same semantics. FUA is always supported. 968 */ 969 result = DKCACHE_FUA; 970 971 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE) 972 result |= DKCACHE_WRITE; 973 974 /* 975 * If volatile write cache is present, the flag shall also be 976 * settable. 977 */ 978 result |= DKCACHE_WCHANGE; 979 980 /* 981 * ONCS field indicates whether the optional SAVE is also 982 * supported for Set Features. According to spec v1.3, 983 * Volatile Write Cache however doesn't support persistency 984 * across power cycle/reset. 985 */ 986 987 } else { 988 result = -1; 989 } 990 991 *addr = result; 992 993 nvme_ccb_put(q, ccb); 994 } 995 996 struct nvme_setcache_state { 997 int dkcache; 998 int result; 999 }; 1000 1001 static bool 1002 nvme_setcache_finished(void *xc) 1003 { 1004 struct nvme_setcache_state *st = xc; 1005 1006 return (st->result != 0); 1007 } 1008 1009 static void 1010 nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1011 { 1012 struct nvme_sqe *sqe = slot; 1013 struct nvme_setcache_state *st = ccb->ccb_cookie; 1014 1015 sqe->opcode = NVM_ADMIN_SET_FEATURES; 1016 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1017 if (st->dkcache & DKCACHE_WRITE) 1018 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1019 } 1020 1021 static void 1022 nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1023 struct nvme_cqe *cqe) 1024 { 1025 struct nvme_setcache_state *st = ccb->ccb_cookie; 1026 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1027 1028 if (status == NVME_CQE_SC_SUCCESS) { 1029 st->result = 1; 1030 } else { 1031 st->result = -1; 1032 } 1033 1034 nvme_ccb_put(q, ccb); 1035 } 1036 1037 /* 1038 * Set status of volatile write cache. Always asynchronous. 1039 */ 1040 int 1041 nvme_admin_setcache(struct nvme_softc *sc, int dkcache) 1042 { 1043 struct nvme_ccb *ccb; 1044 struct nvme_queue *q = sc->sc_admin_q; 1045 int error; 1046 struct nvme_setcache_state st; 1047 1048 if (!nvme_has_volatile_write_cache(sc)) { 1049 /* cache simply not present */ 1050 return EOPNOTSUPP; 1051 } 1052 1053 if (dkcache & ~(DKCACHE_WRITE)) { 1054 /* unsupported parameters */ 1055 return EOPNOTSUPP; 1056 } 1057 1058 ccb = nvme_ccb_get(q, true); 1059 KASSERT(ccb != NULL); 1060 1061 memset(&st, 0, sizeof(st)); 1062 st.dkcache = dkcache; 1063 1064 ccb->ccb_done = nvme_setcache_done; 1065 ccb->ccb_cookie = &st; 1066 1067 /* namespace context */ 1068 ccb->nnc_flags = 0; 1069 ccb->nnc_done = NULL; 1070 1071 nvme_q_submit(sc, q, ccb, nvme_setcache_fill); 1072 1073 /* wait for completion */ 1074 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st); 1075 KASSERT(st.result != 0); 1076 1077 if (st.result > 0) 1078 error = 0; 1079 else 1080 error = EINVAL; 1081 1082 return error; 1083 } 1084 1085 void 1086 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 1087 { 1088 struct nvme_namespace *ns; 1089 struct nvm_identify_namespace *identify; 1090 1091 ns = nvme_ns_get(sc, nsid); 1092 KASSERT(ns); 1093 1094 identify = ns->ident; 1095 ns->ident = NULL; 1096 if (identify != NULL) 1097 kmem_free(identify, sizeof(*identify)); 1098 } 1099 1100 struct nvme_pt_state { 1101 struct nvme_pt_command *pt; 1102 bool finished; 1103 }; 1104 1105 static void 1106 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1107 { 1108 struct nvme_softc *sc = q->q_sc; 1109 struct nvme_sqe *sqe = slot; 1110 struct nvme_pt_state *state = ccb->ccb_cookie; 1111 struct nvme_pt_command *pt = state->pt; 1112 bus_dmamap_t dmap = ccb->ccb_dmamap; 1113 int i; 1114 1115 sqe->opcode = pt->cmd.opcode; 1116 htolem32(&sqe->nsid, pt->cmd.nsid); 1117 1118 if (pt->buf != NULL && pt->len > 0) { 1119 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 1120 switch (dmap->dm_nsegs) { 1121 case 1: 1122 break; 1123 case 2: 1124 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 1125 break; 1126 default: 1127 for (i = 1; i < dmap->dm_nsegs; i++) { 1128 htolem64(&ccb->ccb_prpl[i - 1], 1129 dmap->dm_segs[i].ds_addr); 1130 } 1131 bus_dmamap_sync(sc->sc_dmat, 1132 NVME_DMA_MAP(q->q_ccb_prpls), 1133 ccb->ccb_prpl_off, 1134 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1135 BUS_DMASYNC_PREWRITE); 1136 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 1137 break; 1138 } 1139 } 1140 1141 htolem32(&sqe->cdw10, pt->cmd.cdw10); 1142 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1143 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1144 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1145 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1146 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1147 } 1148 1149 static void 1150 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1151 { 1152 struct nvme_softc *sc = q->q_sc; 1153 struct nvme_pt_state *state = ccb->ccb_cookie; 1154 struct nvme_pt_command *pt = state->pt; 1155 bus_dmamap_t dmap = ccb->ccb_dmamap; 1156 1157 if (pt->buf != NULL && pt->len > 0) { 1158 if (dmap->dm_nsegs > 2) { 1159 bus_dmamap_sync(sc->sc_dmat, 1160 NVME_DMA_MAP(q->q_ccb_prpls), 1161 ccb->ccb_prpl_off, 1162 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1163 BUS_DMASYNC_POSTWRITE); 1164 } 1165 1166 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1167 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1168 bus_dmamap_unload(sc->sc_dmat, dmap); 1169 } 1170 1171 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1172 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1173 1174 state->finished = true; 1175 1176 nvme_ccb_put(q, ccb); 1177 } 1178 1179 static bool 1180 nvme_pt_finished(void *cookie) 1181 { 1182 struct nvme_pt_state *state = cookie; 1183 1184 return state->finished; 1185 } 1186 1187 static int 1188 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1189 uint16_t nsid, struct lwp *l, bool is_adminq) 1190 { 1191 struct nvme_queue *q; 1192 struct nvme_ccb *ccb; 1193 void *buf = NULL; 1194 struct nvme_pt_state state; 1195 int error; 1196 1197 /* limit command size to maximum data transfer size */ 1198 if ((pt->buf == NULL && pt->len > 0) || 1199 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1200 return EINVAL; 1201 1202 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc, NULL, true); 1203 ccb = nvme_ccb_get(q, true); 1204 KASSERT(ccb != NULL); 1205 1206 if (pt->buf != NULL) { 1207 KASSERT(pt->len > 0); 1208 buf = kmem_alloc(pt->len, KM_SLEEP); 1209 if (!pt->is_read) { 1210 error = copyin(pt->buf, buf, pt->len); 1211 if (error) 1212 goto kmem_free; 1213 } 1214 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1215 pt->len, NULL, 1216 BUS_DMA_WAITOK | 1217 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1218 if (error) 1219 goto kmem_free; 1220 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1221 0, ccb->ccb_dmamap->dm_mapsize, 1222 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1223 } 1224 1225 memset(&state, 0, sizeof(state)); 1226 state.pt = pt; 1227 state.finished = false; 1228 1229 ccb->ccb_done = nvme_pt_done; 1230 ccb->ccb_cookie = &state; 1231 1232 pt->cmd.nsid = nsid; 1233 1234 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1235 1236 /* wait for completion */ 1237 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1238 KASSERT(state.finished); 1239 1240 error = 0; 1241 1242 if (buf != NULL) { 1243 if (error == 0 && pt->is_read) 1244 error = copyout(buf, pt->buf, pt->len); 1245 kmem_free: 1246 kmem_free(buf, pt->len); 1247 } 1248 1249 return error; 1250 } 1251 1252 static void 1253 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1254 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1255 { 1256 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1257 uint32_t tail; 1258 1259 mutex_enter(&q->q_sq_mtx); 1260 tail = q->q_sq_tail; 1261 if (++q->q_sq_tail >= q->q_entries) 1262 q->q_sq_tail = 0; 1263 1264 sqe += tail; 1265 1266 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1267 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1268 memset(sqe, 0, sizeof(*sqe)); 1269 (*fill)(q, ccb, sqe); 1270 htolem16(&sqe->cid, ccb->ccb_id); 1271 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1272 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1273 1274 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail); 1275 mutex_exit(&q->q_sq_mtx); 1276 } 1277 1278 struct nvme_poll_state { 1279 struct nvme_sqe s; 1280 struct nvme_cqe c; 1281 void *cookie; 1282 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1283 }; 1284 1285 static int 1286 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1287 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1288 { 1289 struct nvme_poll_state state; 1290 uint16_t flags; 1291 int step = 10; 1292 int maxloop = timo_sec * 1000000 / step; 1293 int error = 0; 1294 1295 memset(&state, 0, sizeof(state)); 1296 (*fill)(q, ccb, &state.s); 1297 1298 state.done = ccb->ccb_done; 1299 state.cookie = ccb->ccb_cookie; 1300 1301 ccb->ccb_done = nvme_poll_done; 1302 ccb->ccb_cookie = &state; 1303 1304 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1305 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1306 if (nvme_q_complete(sc, q) == 0) 1307 delay(step); 1308 1309 if (timo_sec >= 0 && --maxloop <= 0) { 1310 error = ETIMEDOUT; 1311 break; 1312 } 1313 } 1314 1315 if (error == 0) { 1316 flags = lemtoh16(&state.c.flags); 1317 return flags & ~NVME_CQE_PHASE; 1318 } else { 1319 /* 1320 * If it succeds later, it would hit ccb which will have been 1321 * already reused for something else. Not good. Cross 1322 * fingers and hope for best. XXX do controller reset? 1323 */ 1324 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1325 1326 /* Invoke the callback to clean state anyway */ 1327 struct nvme_cqe cqe; 1328 memset(&cqe, 0, sizeof(cqe)); 1329 ccb->ccb_done(q, ccb, &cqe); 1330 1331 return 1; 1332 } 1333 } 1334 1335 static void 1336 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1337 { 1338 struct nvme_sqe *sqe = slot; 1339 struct nvme_poll_state *state = ccb->ccb_cookie; 1340 1341 *sqe = state->s; 1342 } 1343 1344 static void 1345 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1346 struct nvme_cqe *cqe) 1347 { 1348 struct nvme_poll_state *state = ccb->ccb_cookie; 1349 1350 state->c = *cqe; 1351 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1352 1353 ccb->ccb_cookie = state->cookie; 1354 state->done(q, ccb, &state->c); 1355 } 1356 1357 static void 1358 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1359 { 1360 struct nvme_sqe *src = ccb->ccb_cookie; 1361 struct nvme_sqe *dst = slot; 1362 1363 *dst = *src; 1364 } 1365 1366 static void 1367 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1368 struct nvme_cqe *cqe) 1369 { 1370 } 1371 1372 static int 1373 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1374 { 1375 struct nvme_ccb *ccb; 1376 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1377 uint16_t flags; 1378 int rv = 0; 1379 1380 mutex_enter(&q->q_cq_mtx); 1381 1382 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1383 for (;;) { 1384 cqe = &ring[q->q_cq_head]; 1385 flags = lemtoh16(&cqe->flags); 1386 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1387 break; 1388 1389 ccb = &q->q_ccbs[lemtoh16(&cqe->cid)]; 1390 1391 if (++q->q_cq_head >= q->q_entries) { 1392 q->q_cq_head = 0; 1393 q->q_cq_phase ^= NVME_CQE_PHASE; 1394 } 1395 1396 #ifdef DEBUG 1397 /* 1398 * If we get spurious completion notification, something 1399 * is seriously hosed up. Very likely DMA to some random 1400 * memory place happened, so just bail out. 1401 */ 1402 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1403 panic("%s: invalid ccb detected", 1404 device_xname(sc->sc_dev)); 1405 /* NOTREACHED */ 1406 } 1407 #endif 1408 1409 rv++; 1410 1411 /* 1412 * Unlock the mutex before calling the ccb_done callback 1413 * and re-lock afterwards. The callback triggers lddone() 1414 * which schedules another i/o, and also calls nvme_ccb_put(). 1415 * Unlock/relock avoids possibility of deadlock. 1416 */ 1417 mutex_exit(&q->q_cq_mtx); 1418 ccb->ccb_done(q, ccb, cqe); 1419 mutex_enter(&q->q_cq_mtx); 1420 } 1421 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1422 1423 if (rv) 1424 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1425 1426 mutex_exit(&q->q_cq_mtx); 1427 1428 return rv; 1429 } 1430 1431 static void 1432 nvme_q_wait_complete(struct nvme_softc *sc, 1433 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1434 { 1435 mutex_enter(&q->q_ccb_mtx); 1436 if (finished(cookie)) 1437 goto out; 1438 1439 for(;;) { 1440 q->q_ccb_waiting = true; 1441 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1442 1443 if (finished(cookie)) 1444 break; 1445 } 1446 1447 out: 1448 mutex_exit(&q->q_ccb_mtx); 1449 } 1450 1451 static int 1452 nvme_identify(struct nvme_softc *sc, u_int mps) 1453 { 1454 char sn[41], mn[81], fr[17]; 1455 struct nvm_identify_controller *identify; 1456 struct nvme_dmamem *mem; 1457 struct nvme_ccb *ccb; 1458 u_int mdts; 1459 int rv = 1; 1460 1461 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1462 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1463 1464 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1465 if (mem == NULL) 1466 return 1; 1467 1468 ccb->ccb_done = nvme_empty_done; 1469 ccb->ccb_cookie = mem; 1470 1471 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1472 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1473 NVME_TIMO_IDENT); 1474 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1475 1476 nvme_ccb_put(sc->sc_admin_q, ccb); 1477 1478 if (rv != 0) 1479 goto done; 1480 1481 identify = NVME_DMA_KVA(mem); 1482 sc->sc_identify = *identify; 1483 identify = NULL; 1484 1485 /* Convert data to host endian */ 1486 nvme_identify_controller_swapbytes(&sc->sc_identify); 1487 1488 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1489 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1490 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1491 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1492 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1493 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1494 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1495 sn); 1496 1497 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname)); 1498 1499 if (sc->sc_identify.mdts > 0) { 1500 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1501 if (mdts < sc->sc_mdts) 1502 sc->sc_mdts = mdts; 1503 } 1504 1505 sc->sc_nn = sc->sc_identify.nn; 1506 1507 done: 1508 nvme_dmamem_free(sc, mem); 1509 1510 return rv; 1511 } 1512 1513 static int 1514 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1515 { 1516 struct nvme_sqe_q sqe; 1517 struct nvme_ccb *ccb; 1518 int rv; 1519 1520 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1521 return 1; 1522 1523 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1524 KASSERT(ccb != NULL); 1525 1526 ccb->ccb_done = nvme_empty_done; 1527 ccb->ccb_cookie = &sqe; 1528 1529 memset(&sqe, 0, sizeof(sqe)); 1530 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1531 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1532 htolem16(&sqe.qsize, q->q_entries - 1); 1533 htolem16(&sqe.qid, q->q_id); 1534 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1535 if (sc->sc_use_mq) 1536 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1537 1538 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1539 if (rv != 0) 1540 goto fail; 1541 1542 ccb->ccb_done = nvme_empty_done; 1543 ccb->ccb_cookie = &sqe; 1544 1545 memset(&sqe, 0, sizeof(sqe)); 1546 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1547 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1548 htolem16(&sqe.qsize, q->q_entries - 1); 1549 htolem16(&sqe.qid, q->q_id); 1550 htolem16(&sqe.cqid, q->q_id); 1551 sqe.qflags = NVM_SQE_Q_PC; 1552 1553 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1554 if (rv != 0) 1555 goto fail; 1556 1557 nvme_ccb_put(sc->sc_admin_q, ccb); 1558 return 0; 1559 1560 fail: 1561 if (sc->sc_use_mq) 1562 sc->sc_intr_disestablish(sc, q->q_id); 1563 1564 nvme_ccb_put(sc->sc_admin_q, ccb); 1565 return rv; 1566 } 1567 1568 static int 1569 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1570 { 1571 struct nvme_sqe_q sqe; 1572 struct nvme_ccb *ccb; 1573 int rv; 1574 1575 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1576 KASSERT(ccb != NULL); 1577 1578 ccb->ccb_done = nvme_empty_done; 1579 ccb->ccb_cookie = &sqe; 1580 1581 memset(&sqe, 0, sizeof(sqe)); 1582 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1583 htolem16(&sqe.qid, q->q_id); 1584 1585 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1586 if (rv != 0) 1587 goto fail; 1588 1589 ccb->ccb_done = nvme_empty_done; 1590 ccb->ccb_cookie = &sqe; 1591 1592 memset(&sqe, 0, sizeof(sqe)); 1593 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1594 htolem16(&sqe.qid, q->q_id); 1595 1596 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1597 if (rv != 0) 1598 goto fail; 1599 1600 fail: 1601 nvme_ccb_put(sc->sc_admin_q, ccb); 1602 1603 if (rv == 0 && sc->sc_use_mq) { 1604 if (sc->sc_intr_disestablish(sc, q->q_id)) 1605 rv = 1; 1606 } 1607 1608 return rv; 1609 } 1610 1611 static void 1612 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1613 { 1614 struct nvme_sqe *sqe = slot; 1615 struct nvme_dmamem *mem = ccb->ccb_cookie; 1616 1617 sqe->opcode = NVM_ADMIN_IDENTIFY; 1618 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1619 htolem32(&sqe->cdw10, 1); 1620 } 1621 1622 static int 1623 nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa, 1624 u_int *nsqa) 1625 { 1626 struct nvme_pt_state state; 1627 struct nvme_pt_command pt; 1628 struct nvme_ccb *ccb; 1629 int rv; 1630 1631 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1632 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1633 1634 memset(&pt, 0, sizeof(pt)); 1635 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES; 1636 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1637 pt.cmd.cdw11 = ((nq - 1) << 16) | (nq - 1); 1638 1639 memset(&state, 0, sizeof(state)); 1640 state.pt = &pt; 1641 state.finished = false; 1642 1643 ccb->ccb_done = nvme_pt_done; 1644 ccb->ccb_cookie = &state; 1645 1646 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1647 1648 if (rv != 0) { 1649 *ncqa = *nsqa = 0; 1650 return EIO; 1651 } 1652 1653 *ncqa = (pt.cpl.cdw0 >> 16) + 1; 1654 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1; 1655 1656 return 0; 1657 } 1658 1659 static int 1660 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1661 { 1662 struct nvme_softc *sc = q->q_sc; 1663 struct nvme_ccb *ccb; 1664 bus_addr_t off; 1665 uint64_t *prpl; 1666 u_int i; 1667 1668 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1669 cv_init(&q->q_ccb_wait, "nvmeqw"); 1670 q->q_ccb_waiting = false; 1671 SIMPLEQ_INIT(&q->q_ccb_list); 1672 1673 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1674 1675 q->q_nccbs = nccbs; 1676 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1677 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1678 1679 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1680 off = 0; 1681 1682 for (i = 0; i < nccbs; i++) { 1683 ccb = &q->q_ccbs[i]; 1684 1685 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1686 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1687 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1688 &ccb->ccb_dmamap) != 0) 1689 goto free_maps; 1690 1691 ccb->ccb_id = i; 1692 ccb->ccb_prpl = prpl; 1693 ccb->ccb_prpl_off = off; 1694 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1695 1696 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1697 1698 prpl += sc->sc_max_sgl; 1699 off += sizeof(*prpl) * sc->sc_max_sgl; 1700 } 1701 1702 return 0; 1703 1704 free_maps: 1705 nvme_ccbs_free(q); 1706 return 1; 1707 } 1708 1709 static struct nvme_ccb * 1710 nvme_ccb_get(struct nvme_queue *q, bool wait) 1711 { 1712 struct nvme_ccb *ccb = NULL; 1713 1714 mutex_enter(&q->q_ccb_mtx); 1715 again: 1716 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1717 if (ccb != NULL) { 1718 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1719 #ifdef DEBUG 1720 ccb->ccb_cookie = NULL; 1721 #endif 1722 } else { 1723 if (__predict_false(wait)) { 1724 q->q_ccb_waiting = true; 1725 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1726 goto again; 1727 } 1728 } 1729 mutex_exit(&q->q_ccb_mtx); 1730 1731 return ccb; 1732 } 1733 1734 static void 1735 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1736 { 1737 1738 mutex_enter(&q->q_ccb_mtx); 1739 #ifdef DEBUG 1740 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1741 #endif 1742 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1743 1744 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1745 if (__predict_false(q->q_ccb_waiting)) { 1746 q->q_ccb_waiting = false; 1747 cv_broadcast(&q->q_ccb_wait); 1748 } 1749 1750 mutex_exit(&q->q_ccb_mtx); 1751 } 1752 1753 static void 1754 nvme_ccbs_free(struct nvme_queue *q) 1755 { 1756 struct nvme_softc *sc = q->q_sc; 1757 struct nvme_ccb *ccb; 1758 1759 mutex_enter(&q->q_ccb_mtx); 1760 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1761 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1762 /* 1763 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter() 1764 * internally. don't hold spin mutex 1765 */ 1766 mutex_exit(&q->q_ccb_mtx); 1767 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1768 mutex_enter(&q->q_ccb_mtx); 1769 } 1770 mutex_exit(&q->q_ccb_mtx); 1771 1772 nvme_dmamem_free(sc, q->q_ccb_prpls); 1773 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1774 q->q_ccbs = NULL; 1775 cv_destroy(&q->q_ccb_wait); 1776 mutex_destroy(&q->q_ccb_mtx); 1777 } 1778 1779 static struct nvme_queue * 1780 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1781 { 1782 struct nvme_queue *q; 1783 1784 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1785 q->q_sc = sc; 1786 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1787 sizeof(struct nvme_sqe) * entries); 1788 if (q->q_sq_dmamem == NULL) 1789 goto free; 1790 1791 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1792 sizeof(struct nvme_cqe) * entries); 1793 if (q->q_cq_dmamem == NULL) 1794 goto free_sq; 1795 1796 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1797 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1798 1799 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1800 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1801 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1802 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1803 q->q_id = id; 1804 q->q_entries = entries; 1805 q->q_sq_tail = 0; 1806 q->q_cq_head = 0; 1807 q->q_cq_phase = NVME_CQE_PHASE; 1808 1809 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1810 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1811 1812 /* 1813 * Due to definition of full and empty queue (queue is empty 1814 * when head == tail, full when tail is one less then head), 1815 * we can actually only have (entries - 1) in-flight commands. 1816 */ 1817 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1818 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1819 goto free_cq; 1820 } 1821 1822 return q; 1823 1824 free_cq: 1825 nvme_dmamem_free(sc, q->q_cq_dmamem); 1826 free_sq: 1827 nvme_dmamem_free(sc, q->q_sq_dmamem); 1828 free: 1829 kmem_free(q, sizeof(*q)); 1830 1831 return NULL; 1832 } 1833 1834 static void 1835 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1836 { 1837 nvme_ccbs_free(q); 1838 mutex_destroy(&q->q_sq_mtx); 1839 mutex_destroy(&q->q_cq_mtx); 1840 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1841 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1842 nvme_dmamem_free(sc, q->q_cq_dmamem); 1843 nvme_dmamem_free(sc, q->q_sq_dmamem); 1844 kmem_free(q, sizeof(*q)); 1845 } 1846 1847 int 1848 nvme_intr(void *xsc) 1849 { 1850 struct nvme_softc *sc = xsc; 1851 1852 /* 1853 * INTx is level triggered, controller deasserts the interrupt only 1854 * when we advance command queue head via write to the doorbell. 1855 * Tell the controller to block the interrupts while we process 1856 * the queue(s). 1857 */ 1858 nvme_write4(sc, NVME_INTMS, 1); 1859 1860 softint_schedule(sc->sc_softih[0]); 1861 1862 /* don't know, might not have been for us */ 1863 return 1; 1864 } 1865 1866 void 1867 nvme_softintr_intx(void *xq) 1868 { 1869 struct nvme_queue *q = xq; 1870 struct nvme_softc *sc = q->q_sc; 1871 1872 nvme_q_complete(sc, sc->sc_admin_q); 1873 if (sc->sc_q != NULL) 1874 nvme_q_complete(sc, sc->sc_q[0]); 1875 1876 /* 1877 * Processing done, tell controller to issue interrupts again. There 1878 * is no race, as NVMe spec requires the controller to maintain state, 1879 * and assert the interrupt whenever there are unacknowledged 1880 * completion queue entries. 1881 */ 1882 nvme_write4(sc, NVME_INTMC, 1); 1883 } 1884 1885 int 1886 nvme_intr_msi(void *xq) 1887 { 1888 struct nvme_queue *q = xq; 1889 1890 KASSERT(q && q->q_sc && q->q_sc->sc_softih 1891 && q->q_sc->sc_softih[q->q_id]); 1892 1893 /* 1894 * MSI/MSI-X are edge triggered, so can handover processing to softint 1895 * without masking the interrupt. 1896 */ 1897 softint_schedule(q->q_sc->sc_softih[q->q_id]); 1898 1899 return 1; 1900 } 1901 1902 void 1903 nvme_softintr_msi(void *xq) 1904 { 1905 struct nvme_queue *q = xq; 1906 struct nvme_softc *sc = q->q_sc; 1907 1908 nvme_q_complete(sc, q); 1909 } 1910 1911 static struct nvme_dmamem * 1912 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1913 { 1914 struct nvme_dmamem *ndm; 1915 int nsegs; 1916 1917 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 1918 if (ndm == NULL) 1919 return NULL; 1920 1921 ndm->ndm_size = size; 1922 1923 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0, 1924 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 1925 goto ndmfree; 1926 1927 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 1928 1, &nsegs, BUS_DMA_WAITOK) != 0) 1929 goto destroy; 1930 1931 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 1932 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 1933 goto free; 1934 1935 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 1936 NULL, BUS_DMA_WAITOK) != 0) 1937 goto unmap; 1938 1939 memset(ndm->ndm_kva, 0, size); 1940 bus_dmamap_sync(sc->sc_dmat, ndm->ndm_map, 0, size, BUS_DMASYNC_PREREAD); 1941 1942 return ndm; 1943 1944 unmap: 1945 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 1946 free: 1947 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1948 destroy: 1949 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1950 ndmfree: 1951 kmem_free(ndm, sizeof(*ndm)); 1952 return NULL; 1953 } 1954 1955 static void 1956 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 1957 { 1958 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 1959 0, NVME_DMA_LEN(mem), ops); 1960 } 1961 1962 void 1963 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 1964 { 1965 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 1966 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 1967 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1968 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1969 kmem_free(ndm, sizeof(*ndm)); 1970 } 1971 1972 /* 1973 * ioctl 1974 */ 1975 1976 dev_type_open(nvmeopen); 1977 dev_type_close(nvmeclose); 1978 dev_type_ioctl(nvmeioctl); 1979 1980 const struct cdevsw nvme_cdevsw = { 1981 .d_open = nvmeopen, 1982 .d_close = nvmeclose, 1983 .d_read = noread, 1984 .d_write = nowrite, 1985 .d_ioctl = nvmeioctl, 1986 .d_stop = nostop, 1987 .d_tty = notty, 1988 .d_poll = nopoll, 1989 .d_mmap = nommap, 1990 .d_kqfilter = nokqfilter, 1991 .d_discard = nodiscard, 1992 .d_flag = D_OTHER, 1993 }; 1994 1995 /* 1996 * Accept an open operation on the control device. 1997 */ 1998 int 1999 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 2000 { 2001 struct nvme_softc *sc; 2002 int unit = minor(dev) / 0x10000; 2003 int nsid = minor(dev) & 0xffff; 2004 int nsidx; 2005 2006 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 2007 return ENXIO; 2008 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 2009 return ENXIO; 2010 2011 if (nsid == 0) { 2012 /* controller */ 2013 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 2014 return EBUSY; 2015 SET(sc->sc_flags, NVME_F_OPEN); 2016 } else { 2017 /* namespace */ 2018 nsidx = nsid - 1; 2019 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 2020 return ENXIO; 2021 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 2022 return EBUSY; 2023 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2024 } 2025 return 0; 2026 } 2027 2028 /* 2029 * Accept the last close on the control device. 2030 */ 2031 int 2032 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 2033 { 2034 struct nvme_softc *sc; 2035 int unit = minor(dev) / 0x10000; 2036 int nsid = minor(dev) & 0xffff; 2037 int nsidx; 2038 2039 sc = device_lookup_private(&nvme_cd, unit); 2040 if (sc == NULL) 2041 return ENXIO; 2042 2043 if (nsid == 0) { 2044 /* controller */ 2045 CLR(sc->sc_flags, NVME_F_OPEN); 2046 } else { 2047 /* namespace */ 2048 nsidx = nsid - 1; 2049 if (nsidx >= sc->sc_nn) 2050 return ENXIO; 2051 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2052 } 2053 2054 return 0; 2055 } 2056 2057 /* 2058 * Handle control operations. 2059 */ 2060 int 2061 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 2062 { 2063 struct nvme_softc *sc; 2064 int unit = minor(dev) / 0x10000; 2065 int nsid = minor(dev) & 0xffff; 2066 struct nvme_pt_command *pt; 2067 2068 sc = device_lookup_private(&nvme_cd, unit); 2069 if (sc == NULL) 2070 return ENXIO; 2071 2072 switch (cmd) { 2073 case NVME_PASSTHROUGH_CMD: 2074 pt = data; 2075 return nvme_command_passthrough(sc, data, 2076 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0); 2077 } 2078 2079 return ENOTTY; 2080 } 2081