1 /* $NetBSD: nvme.c,v 1.65 2022/08/30 01:13:10 riastradh Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.65 2022/08/30 01:13:10 riastradh Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 #include "ioconf.h" 44 #include "locators.h" 45 46 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 47 48 int nvme_adminq_size = 32; 49 int nvme_ioq_size = 1024; 50 51 static int nvme_print(void *, const char *); 52 53 static int nvme_ready(struct nvme_softc *, uint32_t); 54 static int nvme_enable(struct nvme_softc *, u_int); 55 static int nvme_disable(struct nvme_softc *); 56 static int nvme_shutdown(struct nvme_softc *); 57 58 uint32_t nvme_op_sq_enter(struct nvme_softc *, 59 struct nvme_queue *, struct nvme_ccb *); 60 void nvme_op_sq_leave(struct nvme_softc *, 61 struct nvme_queue *, struct nvme_ccb *); 62 uint32_t nvme_op_sq_enter_locked(struct nvme_softc *, 63 struct nvme_queue *, struct nvme_ccb *); 64 void nvme_op_sq_leave_locked(struct nvme_softc *, 65 struct nvme_queue *, struct nvme_ccb *); 66 67 void nvme_op_cq_done(struct nvme_softc *, 68 struct nvme_queue *, struct nvme_ccb *); 69 70 static const struct nvme_ops nvme_ops = { 71 .op_sq_enter = nvme_op_sq_enter, 72 .op_sq_leave = nvme_op_sq_leave, 73 .op_sq_enter_locked = nvme_op_sq_enter_locked, 74 .op_sq_leave_locked = nvme_op_sq_leave_locked, 75 76 .op_cq_done = nvme_op_cq_done, 77 }; 78 79 #ifdef NVME_DEBUG 80 static void nvme_dumpregs(struct nvme_softc *); 81 #endif 82 static int nvme_identify(struct nvme_softc *, u_int); 83 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 84 void *); 85 86 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 87 static void nvme_ccbs_free(struct nvme_queue *); 88 89 static struct nvme_ccb * 90 nvme_ccb_get(struct nvme_queue *, bool); 91 static struct nvme_ccb * 92 nvme_ccb_get_bio(struct nvme_softc *, struct buf *, 93 struct nvme_queue **); 94 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 95 96 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 97 struct nvme_ccb *, void (*)(struct nvme_queue *, 98 struct nvme_ccb *, void *), int); 99 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 100 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 101 struct nvme_cqe *); 102 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 103 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 104 struct nvme_cqe *); 105 106 static struct nvme_queue * 107 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 108 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 109 static void nvme_q_reset(struct nvme_softc *, struct nvme_queue *); 110 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 111 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 112 struct nvme_ccb *, void (*)(struct nvme_queue *, 113 struct nvme_ccb *, void *)); 114 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 115 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 116 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 117 bool (*)(void *), void *); 118 119 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 120 void *); 121 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 122 struct nvme_cqe *); 123 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 124 void *); 125 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 126 struct nvme_cqe *); 127 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 128 void *); 129 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 130 struct nvme_cqe *); 131 132 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 133 void *); 134 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 135 struct nvme_cqe *); 136 static int nvme_command_passthrough(struct nvme_softc *, 137 struct nvme_pt_command *, uint32_t, struct lwp *, bool); 138 139 static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *, 140 u_int *); 141 142 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 143 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 144 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 145 #define NVME_TIMO_SY 60 /* sync cache timeout */ 146 147 /* 148 * Some controllers, at least Apple NVMe, always require split 149 * transfers, so don't use bus_space_{read,write}_8() on LP64. 150 */ 151 uint64_t 152 nvme_read8(struct nvme_softc *sc, bus_size_t r) 153 { 154 uint64_t v; 155 uint32_t *a = (uint32_t *)&v; 156 157 #if _BYTE_ORDER == _LITTLE_ENDIAN 158 a[0] = nvme_read4(sc, r); 159 a[1] = nvme_read4(sc, r + 4); 160 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 161 a[1] = nvme_read4(sc, r); 162 a[0] = nvme_read4(sc, r + 4); 163 #endif 164 165 return v; 166 } 167 168 void 169 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 170 { 171 uint32_t *a = (uint32_t *)&v; 172 173 #if _BYTE_ORDER == _LITTLE_ENDIAN 174 nvme_write4(sc, r, a[0]); 175 nvme_write4(sc, r + 4, a[1]); 176 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 177 nvme_write4(sc, r, a[1]); 178 nvme_write4(sc, r + 4, a[0]); 179 #endif 180 } 181 182 #ifdef NVME_DEBUG 183 static __used void 184 nvme_dumpregs(struct nvme_softc *sc) 185 { 186 uint64_t r8; 187 uint32_t r4; 188 189 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 190 r8 = nvme_read8(sc, NVME_CAP); 191 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 192 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 193 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 194 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 195 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 196 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 197 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 198 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 199 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 200 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 201 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 202 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 203 204 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 205 206 r4 = nvme_read4(sc, NVME_CC); 207 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 208 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 209 (1 << NVME_CC_IOCQES_R(r4))); 210 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 211 (1 << NVME_CC_IOSQES_R(r4))); 212 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 213 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 214 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 215 (1 << NVME_CC_MPS_R(r4))); 216 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 217 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 218 219 r4 = nvme_read4(sc, NVME_CSTS); 220 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 221 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 222 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 223 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 224 225 r4 = nvme_read4(sc, NVME_AQA); 226 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 227 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 228 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 229 230 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 231 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 232 #undef DEVNAME 233 } 234 #endif /* NVME_DEBUG */ 235 236 static int 237 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 238 { 239 u_int i = 0; 240 241 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 242 if (i++ > sc->sc_rdy_to) 243 return ENXIO; 244 245 delay(1000); 246 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 247 } 248 249 return 0; 250 } 251 252 static int 253 nvme_enable(struct nvme_softc *sc, u_int mps) 254 { 255 uint32_t cc, csts; 256 int error; 257 258 cc = nvme_read4(sc, NVME_CC); 259 csts = nvme_read4(sc, NVME_CSTS); 260 261 /* 262 * See note in nvme_disable. Short circuit if we're already enabled. 263 */ 264 if (ISSET(cc, NVME_CC_EN)) { 265 if (ISSET(csts, NVME_CSTS_RDY)) 266 return 0; 267 268 goto waitready; 269 } else { 270 /* EN == 0 already wait for RDY == 0 or fail */ 271 error = nvme_ready(sc, 0); 272 if (error) 273 return error; 274 } 275 276 if (sc->sc_ops->op_enable != NULL) 277 sc->sc_ops->op_enable(sc); 278 279 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 280 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 281 delay(5000); 282 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 283 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 284 delay(5000); 285 286 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 287 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 288 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 289 delay(5000); 290 291 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 292 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 293 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 294 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 295 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 296 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 297 SET(cc, NVME_CC_MPS(mps)); 298 SET(cc, NVME_CC_EN); 299 300 nvme_write4(sc, NVME_CC, cc); 301 nvme_barrier(sc, 0, sc->sc_ios, 302 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 303 304 waitready: 305 return nvme_ready(sc, NVME_CSTS_RDY); 306 } 307 308 static int 309 nvme_disable(struct nvme_softc *sc) 310 { 311 uint32_t cc, csts; 312 int error; 313 314 cc = nvme_read4(sc, NVME_CC); 315 csts = nvme_read4(sc, NVME_CSTS); 316 317 /* 318 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 319 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 320 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 321 * isn't the desired value. Short circuit if we're already disabled. 322 */ 323 if (ISSET(cc, NVME_CC_EN)) { 324 if (!ISSET(csts, NVME_CSTS_RDY)) { 325 /* EN == 1, wait for RDY == 1 or fail */ 326 error = nvme_ready(sc, NVME_CSTS_RDY); 327 if (error) 328 return error; 329 } 330 } else { 331 /* EN == 0 already wait for RDY == 0 */ 332 if (!ISSET(csts, NVME_CSTS_RDY)) 333 return 0; 334 335 goto waitready; 336 } 337 338 CLR(cc, NVME_CC_EN); 339 nvme_write4(sc, NVME_CC, cc); 340 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 341 342 /* 343 * Some drives have issues with accessing the mmio after we disable, 344 * so delay for a bit after we write the bit to cope with these issues. 345 */ 346 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 347 delay(B4_CHK_RDY_DELAY_MS); 348 349 waitready: 350 return nvme_ready(sc, 0); 351 } 352 353 int 354 nvme_attach(struct nvme_softc *sc) 355 { 356 uint64_t cap; 357 uint32_t reg; 358 u_int mps = PAGE_SHIFT; 359 u_int ncq, nsq; 360 uint16_t adminq_entries = nvme_adminq_size; 361 uint16_t ioq_entries = nvme_ioq_size; 362 int i; 363 364 if (sc->sc_ops == NULL) 365 sc->sc_ops = &nvme_ops; 366 367 reg = nvme_read4(sc, NVME_VS); 368 if (reg == 0xffffffff) { 369 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 370 return 1; 371 } 372 373 if (NVME_VS_TER(reg) == 0) 374 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 375 NVME_VS_MNR(reg)); 376 else 377 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 378 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 379 380 cap = nvme_read8(sc, NVME_CAP); 381 sc->sc_dstrd = NVME_CAP_DSTRD(cap); 382 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 383 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 384 "is greater than CPU page size %u\n", 385 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 386 return 1; 387 } 388 if (NVME_CAP_MPSMAX(cap) < mps) 389 mps = NVME_CAP_MPSMAX(cap); 390 if (ioq_entries > NVME_CAP_MQES(cap)) 391 ioq_entries = NVME_CAP_MQES(cap); 392 393 /* set initial values to be used for admin queue during probe */ 394 sc->sc_rdy_to = NVME_CAP_TO(cap); 395 sc->sc_mps = 1 << mps; 396 sc->sc_mdts = MAXPHYS; 397 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts)); 398 399 if (nvme_disable(sc) != 0) { 400 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 401 return 1; 402 } 403 404 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, 405 sc->sc_dstrd); 406 if (sc->sc_admin_q == NULL) { 407 aprint_error_dev(sc->sc_dev, 408 "unable to allocate admin queue\n"); 409 return 1; 410 } 411 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 412 goto free_admin_q; 413 414 if (nvme_enable(sc, mps) != 0) { 415 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 416 goto disestablish_admin_q; 417 } 418 419 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 420 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 421 goto disable; 422 } 423 if (sc->sc_nn == 0) { 424 aprint_error_dev(sc->sc_dev, "namespace not found\n"); 425 goto disable; 426 } 427 428 /* we know how big things are now */ 429 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 430 431 /* reallocate ccbs of admin queue with new max sgl. */ 432 nvme_ccbs_free(sc->sc_admin_q); 433 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 434 435 if (sc->sc_use_mq) { 436 /* Limit the number of queues to the number allocated in HW */ 437 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) { 438 aprint_error_dev(sc->sc_dev, 439 "unable to get number of queues\n"); 440 goto disable; 441 } 442 if (sc->sc_nq > ncq) 443 sc->sc_nq = ncq; 444 if (sc->sc_nq > nsq) 445 sc->sc_nq = nsq; 446 } 447 448 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 449 for (i = 0; i < sc->sc_nq; i++) { 450 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, 451 sc->sc_dstrd); 452 if (sc->sc_q[i] == NULL) { 453 aprint_error_dev(sc->sc_dev, 454 "unable to allocate io queue\n"); 455 goto free_q; 456 } 457 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 458 aprint_error_dev(sc->sc_dev, 459 "unable to create io queue\n"); 460 nvme_q_free(sc, sc->sc_q[i]); 461 goto free_q; 462 } 463 } 464 465 if (!sc->sc_use_mq) 466 nvme_write4(sc, NVME_INTMC, 1); 467 468 /* probe subdevices */ 469 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 470 KM_SLEEP); 471 nvme_rescan(sc->sc_dev, NULL, NULL); 472 473 return 0; 474 475 free_q: 476 while (--i >= 0) { 477 nvme_q_delete(sc, sc->sc_q[i]); 478 nvme_q_free(sc, sc->sc_q[i]); 479 } 480 disable: 481 nvme_disable(sc); 482 disestablish_admin_q: 483 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 484 free_admin_q: 485 nvme_q_free(sc, sc->sc_admin_q); 486 487 return 1; 488 } 489 490 int 491 nvme_rescan(device_t self, const char *ifattr, const int *locs) 492 { 493 struct nvme_softc *sc = device_private(self); 494 struct nvme_attach_args naa; 495 struct nvm_namespace_format *f; 496 struct nvme_namespace *ns; 497 uint64_t cap; 498 int ioq_entries = nvme_ioq_size; 499 int i, mlocs[NVMECF_NLOCS]; 500 int error; 501 502 cap = nvme_read8(sc, NVME_CAP); 503 if (ioq_entries > NVME_CAP_MQES(cap)) 504 ioq_entries = NVME_CAP_MQES(cap); 505 506 for (i = 1; i <= sc->sc_nn; i++) { 507 if (sc->sc_namespaces[i - 1].dev) 508 continue; 509 510 /* identify to check for availability */ 511 error = nvme_ns_identify(sc, i); 512 if (error) { 513 aprint_error_dev(self, "couldn't identify namespace #%d\n", i); 514 continue; 515 } 516 517 ns = nvme_ns_get(sc, i); 518 KASSERT(ns); 519 520 f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)]; 521 522 /* 523 * NVME1.0e 6.11 Identify command 524 * 525 * LBADS values smaller than 9 are not supported, a value 526 * of zero means that the format is not used. 527 */ 528 if (f->lbads < 9) { 529 if (f->lbads > 0) 530 aprint_error_dev(self, 531 "unsupported logical data size %u\n", f->lbads); 532 continue; 533 } 534 535 mlocs[NVMECF_NSID] = i; 536 537 memset(&naa, 0, sizeof(naa)); 538 naa.naa_nsid = i; 539 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 540 naa.naa_maxphys = sc->sc_mdts; 541 naa.naa_typename = sc->sc_modelname; 542 sc->sc_namespaces[i - 1].dev = 543 config_found(sc->sc_dev, &naa, nvme_print, 544 CFARGS(.submatch = config_stdsubmatch, 545 .locators = mlocs)); 546 } 547 return 0; 548 } 549 550 static int 551 nvme_print(void *aux, const char *pnp) 552 { 553 struct nvme_attach_args *naa = aux; 554 555 if (pnp) 556 aprint_normal("ld at %s", pnp); 557 558 if (naa->naa_nsid > 0) 559 aprint_normal(" nsid %d", naa->naa_nsid); 560 561 return UNCONF; 562 } 563 564 int 565 nvme_detach(struct nvme_softc *sc, int flags) 566 { 567 int i, error; 568 569 error = config_detach_children(sc->sc_dev, flags); 570 if (error) 571 return error; 572 573 error = nvme_shutdown(sc); 574 if (error) 575 return error; 576 577 /* from now on we are committed to detach, following will never fail */ 578 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 579 for (i = 0; i < sc->sc_nq; i++) 580 nvme_q_free(sc, sc->sc_q[i]); 581 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 582 nvme_q_free(sc, sc->sc_admin_q); 583 584 return 0; 585 } 586 587 int 588 nvme_suspend(struct nvme_softc *sc) 589 { 590 591 return nvme_shutdown(sc); 592 } 593 594 int 595 nvme_resume(struct nvme_softc *sc) 596 { 597 int ioq_entries = nvme_ioq_size; 598 uint64_t cap; 599 int i, error; 600 601 error = nvme_disable(sc); 602 if (error) { 603 device_printf(sc->sc_dev, "unable to disable controller\n"); 604 return error; 605 } 606 607 nvme_q_reset(sc, sc->sc_admin_q); 608 609 error = nvme_enable(sc, ffs(sc->sc_mps) - 1); 610 if (error) { 611 device_printf(sc->sc_dev, "unable to enable controller\n"); 612 return error; 613 } 614 615 for (i = 0; i < sc->sc_nq; i++) { 616 cap = nvme_read8(sc, NVME_CAP); 617 if (ioq_entries > NVME_CAP_MQES(cap)) 618 ioq_entries = NVME_CAP_MQES(cap); 619 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, 620 sc->sc_dstrd); 621 if (sc->sc_q[i] == NULL) { 622 error = ENOMEM; 623 device_printf(sc->sc_dev, "unable to allocate io q %d" 624 "\n", i); 625 goto disable; 626 } 627 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 628 error = EIO; 629 device_printf(sc->sc_dev, "unable to create io q %d" 630 "\n", i); 631 nvme_q_free(sc, sc->sc_q[i]); 632 goto free_q; 633 } 634 } 635 636 nvme_write4(sc, NVME_INTMC, 1); 637 638 return 0; 639 640 free_q: 641 while (i --> 0) 642 nvme_q_free(sc, sc->sc_q[i]); 643 disable: 644 (void)nvme_disable(sc); 645 646 return error; 647 } 648 649 static int 650 nvme_shutdown(struct nvme_softc *sc) 651 { 652 uint32_t cc, csts; 653 bool disabled = false; 654 int i; 655 656 if (!sc->sc_use_mq) 657 nvme_write4(sc, NVME_INTMS, 1); 658 659 for (i = 0; i < sc->sc_nq; i++) { 660 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 661 aprint_error_dev(sc->sc_dev, 662 "unable to delete io queue %d, disabling\n", i + 1); 663 disabled = true; 664 } 665 } 666 if (disabled) 667 goto disable; 668 669 cc = nvme_read4(sc, NVME_CC); 670 CLR(cc, NVME_CC_SHN_MASK); 671 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 672 nvme_write4(sc, NVME_CC, cc); 673 674 for (i = 0; i < 4000; i++) { 675 nvme_barrier(sc, 0, sc->sc_ios, 676 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 677 csts = nvme_read4(sc, NVME_CSTS); 678 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 679 return 0; 680 681 delay(1000); 682 } 683 684 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 685 686 disable: 687 nvme_disable(sc); 688 return 0; 689 } 690 691 void 692 nvme_childdet(device_t self, device_t child) 693 { 694 struct nvme_softc *sc = device_private(self); 695 int i; 696 697 for (i = 0; i < sc->sc_nn; i++) { 698 if (sc->sc_namespaces[i].dev == child) { 699 /* Already freed ns->ident. */ 700 sc->sc_namespaces[i].dev = NULL; 701 break; 702 } 703 } 704 } 705 706 int 707 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 708 { 709 struct nvme_sqe sqe; 710 struct nvm_identify_namespace *identify; 711 struct nvme_dmamem *mem; 712 struct nvme_ccb *ccb; 713 struct nvme_namespace *ns; 714 int rv; 715 716 KASSERT(nsid > 0); 717 718 ns = nvme_ns_get(sc, nsid); 719 KASSERT(ns); 720 721 if (ns->ident != NULL) 722 return 0; 723 724 ccb = nvme_ccb_get(sc->sc_admin_q, false); 725 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 726 727 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 728 if (mem == NULL) { 729 nvme_ccb_put(sc->sc_admin_q, ccb); 730 return ENOMEM; 731 } 732 733 memset(&sqe, 0, sizeof(sqe)); 734 sqe.opcode = NVM_ADMIN_IDENTIFY; 735 htolem32(&sqe.nsid, nsid); 736 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 737 htolem32(&sqe.cdw10, 0); 738 739 ccb->ccb_done = nvme_empty_done; 740 ccb->ccb_cookie = &sqe; 741 742 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 743 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 744 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 745 746 nvme_ccb_put(sc->sc_admin_q, ccb); 747 748 if (rv != 0) { 749 rv = EIO; 750 goto done; 751 } 752 753 /* commit */ 754 755 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 756 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 757 758 /* Convert data to host endian */ 759 nvme_identify_namespace_swapbytes(identify); 760 761 ns->ident = identify; 762 763 done: 764 nvme_dmamem_free(sc, mem); 765 766 return rv; 767 } 768 769 int 770 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 771 struct buf *bp, void *data, size_t datasize, 772 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 773 { 774 struct nvme_queue *q; 775 struct nvme_ccb *ccb; 776 bus_dmamap_t dmap; 777 int i, error; 778 779 ccb = nvme_ccb_get_bio(sc, bp, &q); 780 if (ccb == NULL) 781 return EAGAIN; 782 783 ccb->ccb_done = nvme_ns_io_done; 784 ccb->ccb_cookie = cookie; 785 786 /* namespace context */ 787 ccb->nnc_nsid = nsid; 788 ccb->nnc_flags = flags; 789 ccb->nnc_buf = bp; 790 ccb->nnc_datasize = datasize; 791 ccb->nnc_secsize = secsize; 792 ccb->nnc_blkno = blkno; 793 ccb->nnc_done = nnc_done; 794 795 dmap = ccb->ccb_dmamap; 796 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 797 datasize, NULL, 798 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 799 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 800 (ISSET(flags, NVME_NS_CTX_F_READ) ? 801 BUS_DMA_READ : BUS_DMA_WRITE)); 802 if (error) { 803 nvme_ccb_put(q, ccb); 804 return error; 805 } 806 807 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 808 ISSET(flags, NVME_NS_CTX_F_READ) ? 809 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 810 811 if (dmap->dm_nsegs > 2) { 812 for (i = 1; i < dmap->dm_nsegs; i++) { 813 htolem64(&ccb->ccb_prpl[i - 1], 814 dmap->dm_segs[i].ds_addr); 815 } 816 bus_dmamap_sync(sc->sc_dmat, 817 NVME_DMA_MAP(q->q_ccb_prpls), 818 ccb->ccb_prpl_off, 819 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 820 BUS_DMASYNC_PREWRITE); 821 } 822 823 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 824 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 825 return EIO; 826 return 0; 827 } 828 829 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 830 return 0; 831 } 832 833 static void 834 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 835 { 836 struct nvme_sqe_io *sqe = slot; 837 bus_dmamap_t dmap = ccb->ccb_dmamap; 838 839 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 840 NVM_CMD_READ : NVM_CMD_WRITE; 841 htolem32(&sqe->nsid, ccb->nnc_nsid); 842 843 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 844 switch (dmap->dm_nsegs) { 845 case 1: 846 break; 847 case 2: 848 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 849 break; 850 default: 851 /* the prp list is already set up and synced */ 852 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 853 break; 854 } 855 856 htolem64(&sqe->slba, ccb->nnc_blkno); 857 858 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 859 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 860 861 /* guaranteed by upper layers, but check just in case */ 862 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 863 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 864 } 865 866 static void 867 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 868 struct nvme_cqe *cqe) 869 { 870 struct nvme_softc *sc = q->q_sc; 871 bus_dmamap_t dmap = ccb->ccb_dmamap; 872 void *nnc_cookie = ccb->ccb_cookie; 873 nvme_nnc_done nnc_done = ccb->nnc_done; 874 struct buf *bp = ccb->nnc_buf; 875 876 if (dmap->dm_nsegs > 2) { 877 bus_dmamap_sync(sc->sc_dmat, 878 NVME_DMA_MAP(q->q_ccb_prpls), 879 ccb->ccb_prpl_off, 880 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 881 BUS_DMASYNC_POSTWRITE); 882 } 883 884 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 885 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 886 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 887 888 bus_dmamap_unload(sc->sc_dmat, dmap); 889 nvme_ccb_put(q, ccb); 890 891 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 892 } 893 894 /* 895 * If there is no volatile write cache, it makes no sense to issue 896 * flush commands or query for the status. 897 */ 898 static bool 899 nvme_has_volatile_write_cache(struct nvme_softc *sc) 900 { 901 /* sc_identify is filled during attachment */ 902 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 903 } 904 905 static bool 906 nvme_ns_sync_finished(void *cookie) 907 { 908 int *result = cookie; 909 910 return (*result != 0); 911 } 912 913 int 914 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 915 { 916 struct nvme_queue *q = nvme_get_q(sc); 917 struct nvme_ccb *ccb; 918 int result = 0; 919 920 if (!nvme_has_volatile_write_cache(sc)) { 921 /* cache not present, no value in trying to flush it */ 922 return 0; 923 } 924 925 ccb = nvme_ccb_get(q, true); 926 KASSERT(ccb != NULL); 927 928 ccb->ccb_done = nvme_ns_sync_done; 929 ccb->ccb_cookie = &result; 930 931 /* namespace context */ 932 ccb->nnc_nsid = nsid; 933 ccb->nnc_flags = flags; 934 ccb->nnc_done = NULL; 935 936 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 937 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 938 return EIO; 939 return 0; 940 } 941 942 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 943 944 /* wait for completion */ 945 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 946 KASSERT(result != 0); 947 948 return (result > 0) ? 0 : EIO; 949 } 950 951 static void 952 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 953 { 954 struct nvme_sqe *sqe = slot; 955 956 sqe->opcode = NVM_CMD_FLUSH; 957 htolem32(&sqe->nsid, ccb->nnc_nsid); 958 } 959 960 static void 961 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 962 struct nvme_cqe *cqe) 963 { 964 int *result = ccb->ccb_cookie; 965 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 966 967 if (status == NVME_CQE_SC_SUCCESS) 968 *result = 1; 969 else 970 *result = -1; 971 972 nvme_ccb_put(q, ccb); 973 } 974 975 static bool 976 nvme_getcache_finished(void *xc) 977 { 978 int *addr = xc; 979 980 return (*addr != 0); 981 } 982 983 /* 984 * Get status of volatile write cache. Always asynchronous. 985 */ 986 int 987 nvme_admin_getcache(struct nvme_softc *sc, int *addr) 988 { 989 struct nvme_ccb *ccb; 990 struct nvme_queue *q = sc->sc_admin_q; 991 int result = 0, error; 992 993 if (!nvme_has_volatile_write_cache(sc)) { 994 /* cache simply not present */ 995 *addr = 0; 996 return 0; 997 } 998 999 ccb = nvme_ccb_get(q, true); 1000 KASSERT(ccb != NULL); 1001 1002 ccb->ccb_done = nvme_getcache_done; 1003 ccb->ccb_cookie = &result; 1004 1005 /* namespace context */ 1006 ccb->nnc_flags = 0; 1007 ccb->nnc_done = NULL; 1008 1009 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 1010 1011 /* wait for completion */ 1012 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 1013 KASSERT(result != 0); 1014 1015 if (result > 0) { 1016 *addr = result; 1017 error = 0; 1018 } else 1019 error = EINVAL; 1020 1021 return error; 1022 } 1023 1024 static void 1025 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1026 { 1027 struct nvme_sqe *sqe = slot; 1028 1029 sqe->opcode = NVM_ADMIN_GET_FEATURES; 1030 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1031 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1032 } 1033 1034 static void 1035 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1036 struct nvme_cqe *cqe) 1037 { 1038 int *addr = ccb->ccb_cookie; 1039 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1040 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 1041 int result; 1042 1043 if (status == NVME_CQE_SC_SUCCESS) { 1044 result = 0; 1045 1046 /* 1047 * DPO not supported, Dataset Management (DSM) field doesn't 1048 * specify the same semantics. FUA is always supported. 1049 */ 1050 result = DKCACHE_FUA; 1051 1052 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE) 1053 result |= DKCACHE_WRITE; 1054 1055 /* 1056 * If volatile write cache is present, the flag shall also be 1057 * settable. 1058 */ 1059 result |= DKCACHE_WCHANGE; 1060 1061 /* 1062 * ONCS field indicates whether the optional SAVE is also 1063 * supported for Set Features. According to spec v1.3, 1064 * Volatile Write Cache however doesn't support persistency 1065 * across power cycle/reset. 1066 */ 1067 1068 } else { 1069 result = -1; 1070 } 1071 1072 *addr = result; 1073 1074 nvme_ccb_put(q, ccb); 1075 } 1076 1077 struct nvme_setcache_state { 1078 int dkcache; 1079 int result; 1080 }; 1081 1082 static bool 1083 nvme_setcache_finished(void *xc) 1084 { 1085 struct nvme_setcache_state *st = xc; 1086 1087 return (st->result != 0); 1088 } 1089 1090 static void 1091 nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1092 { 1093 struct nvme_sqe *sqe = slot; 1094 struct nvme_setcache_state *st = ccb->ccb_cookie; 1095 1096 sqe->opcode = NVM_ADMIN_SET_FEATURES; 1097 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1098 if (st->dkcache & DKCACHE_WRITE) 1099 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1100 } 1101 1102 static void 1103 nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1104 struct nvme_cqe *cqe) 1105 { 1106 struct nvme_setcache_state *st = ccb->ccb_cookie; 1107 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1108 1109 if (status == NVME_CQE_SC_SUCCESS) { 1110 st->result = 1; 1111 } else { 1112 st->result = -1; 1113 } 1114 1115 nvme_ccb_put(q, ccb); 1116 } 1117 1118 /* 1119 * Set status of volatile write cache. Always asynchronous. 1120 */ 1121 int 1122 nvme_admin_setcache(struct nvme_softc *sc, int dkcache) 1123 { 1124 struct nvme_ccb *ccb; 1125 struct nvme_queue *q = sc->sc_admin_q; 1126 int error; 1127 struct nvme_setcache_state st; 1128 1129 if (!nvme_has_volatile_write_cache(sc)) { 1130 /* cache simply not present */ 1131 return EOPNOTSUPP; 1132 } 1133 1134 if (dkcache & ~(DKCACHE_WRITE)) { 1135 /* unsupported parameters */ 1136 return EOPNOTSUPP; 1137 } 1138 1139 ccb = nvme_ccb_get(q, true); 1140 KASSERT(ccb != NULL); 1141 1142 memset(&st, 0, sizeof(st)); 1143 st.dkcache = dkcache; 1144 1145 ccb->ccb_done = nvme_setcache_done; 1146 ccb->ccb_cookie = &st; 1147 1148 /* namespace context */ 1149 ccb->nnc_flags = 0; 1150 ccb->nnc_done = NULL; 1151 1152 nvme_q_submit(sc, q, ccb, nvme_setcache_fill); 1153 1154 /* wait for completion */ 1155 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st); 1156 KASSERT(st.result != 0); 1157 1158 if (st.result > 0) 1159 error = 0; 1160 else 1161 error = EINVAL; 1162 1163 return error; 1164 } 1165 1166 void 1167 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 1168 { 1169 struct nvme_namespace *ns; 1170 struct nvm_identify_namespace *identify; 1171 1172 ns = nvme_ns_get(sc, nsid); 1173 KASSERT(ns); 1174 1175 identify = ns->ident; 1176 ns->ident = NULL; 1177 if (identify != NULL) 1178 kmem_free(identify, sizeof(*identify)); 1179 } 1180 1181 struct nvme_pt_state { 1182 struct nvme_pt_command *pt; 1183 bool finished; 1184 }; 1185 1186 static void 1187 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1188 { 1189 struct nvme_softc *sc = q->q_sc; 1190 struct nvme_sqe *sqe = slot; 1191 struct nvme_pt_state *state = ccb->ccb_cookie; 1192 struct nvme_pt_command *pt = state->pt; 1193 bus_dmamap_t dmap = ccb->ccb_dmamap; 1194 int i; 1195 1196 sqe->opcode = pt->cmd.opcode; 1197 htolem32(&sqe->nsid, pt->cmd.nsid); 1198 1199 if (pt->buf != NULL && pt->len > 0) { 1200 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 1201 switch (dmap->dm_nsegs) { 1202 case 1: 1203 break; 1204 case 2: 1205 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 1206 break; 1207 default: 1208 for (i = 1; i < dmap->dm_nsegs; i++) { 1209 htolem64(&ccb->ccb_prpl[i - 1], 1210 dmap->dm_segs[i].ds_addr); 1211 } 1212 bus_dmamap_sync(sc->sc_dmat, 1213 NVME_DMA_MAP(q->q_ccb_prpls), 1214 ccb->ccb_prpl_off, 1215 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1216 BUS_DMASYNC_PREWRITE); 1217 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 1218 break; 1219 } 1220 } 1221 1222 htolem32(&sqe->cdw10, pt->cmd.cdw10); 1223 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1224 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1225 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1226 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1227 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1228 } 1229 1230 static void 1231 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1232 { 1233 struct nvme_softc *sc = q->q_sc; 1234 struct nvme_pt_state *state = ccb->ccb_cookie; 1235 struct nvme_pt_command *pt = state->pt; 1236 bus_dmamap_t dmap = ccb->ccb_dmamap; 1237 1238 if (pt->buf != NULL && pt->len > 0) { 1239 if (dmap->dm_nsegs > 2) { 1240 bus_dmamap_sync(sc->sc_dmat, 1241 NVME_DMA_MAP(q->q_ccb_prpls), 1242 ccb->ccb_prpl_off, 1243 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1244 BUS_DMASYNC_POSTWRITE); 1245 } 1246 1247 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1248 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1249 bus_dmamap_unload(sc->sc_dmat, dmap); 1250 } 1251 1252 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1253 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1254 1255 state->finished = true; 1256 1257 nvme_ccb_put(q, ccb); 1258 } 1259 1260 static bool 1261 nvme_pt_finished(void *cookie) 1262 { 1263 struct nvme_pt_state *state = cookie; 1264 1265 return state->finished; 1266 } 1267 1268 static int 1269 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1270 uint32_t nsid, struct lwp *l, bool is_adminq) 1271 { 1272 struct nvme_queue *q; 1273 struct nvme_ccb *ccb; 1274 void *buf = NULL; 1275 struct nvme_pt_state state; 1276 int error; 1277 1278 /* limit command size to maximum data transfer size */ 1279 if ((pt->buf == NULL && pt->len > 0) || 1280 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1281 return EINVAL; 1282 1283 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); 1284 ccb = nvme_ccb_get(q, true); 1285 KASSERT(ccb != NULL); 1286 1287 if (pt->buf != NULL) { 1288 KASSERT(pt->len > 0); 1289 buf = kmem_alloc(pt->len, KM_SLEEP); 1290 if (!pt->is_read) { 1291 error = copyin(pt->buf, buf, pt->len); 1292 if (error) 1293 goto kmem_free; 1294 } 1295 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1296 pt->len, NULL, 1297 BUS_DMA_WAITOK | 1298 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1299 if (error) 1300 goto kmem_free; 1301 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1302 0, ccb->ccb_dmamap->dm_mapsize, 1303 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1304 } 1305 1306 memset(&state, 0, sizeof(state)); 1307 state.pt = pt; 1308 state.finished = false; 1309 1310 ccb->ccb_done = nvme_pt_done; 1311 ccb->ccb_cookie = &state; 1312 1313 pt->cmd.nsid = nsid; 1314 1315 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1316 1317 /* wait for completion */ 1318 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1319 KASSERT(state.finished); 1320 1321 error = 0; 1322 1323 if (buf != NULL) { 1324 if (error == 0 && pt->is_read) 1325 error = copyout(buf, pt->buf, pt->len); 1326 kmem_free: 1327 kmem_free(buf, pt->len); 1328 } 1329 1330 return error; 1331 } 1332 1333 uint32_t 1334 nvme_op_sq_enter(struct nvme_softc *sc, 1335 struct nvme_queue *q, struct nvme_ccb *ccb) 1336 { 1337 mutex_enter(&q->q_sq_mtx); 1338 1339 return nvme_op_sq_enter_locked(sc, q, ccb); 1340 } 1341 1342 uint32_t 1343 nvme_op_sq_enter_locked(struct nvme_softc *sc, 1344 struct nvme_queue *q, struct nvme_ccb *ccb) 1345 { 1346 return q->q_sq_tail; 1347 } 1348 1349 void 1350 nvme_op_sq_leave_locked(struct nvme_softc *sc, 1351 struct nvme_queue *q, struct nvme_ccb *ccb) 1352 { 1353 uint32_t tail; 1354 1355 tail = ++q->q_sq_tail; 1356 if (tail >= q->q_entries) 1357 tail = 0; 1358 q->q_sq_tail = tail; 1359 nvme_write4(sc, q->q_sqtdbl, tail); 1360 } 1361 1362 void 1363 nvme_op_sq_leave(struct nvme_softc *sc, 1364 struct nvme_queue *q, struct nvme_ccb *ccb) 1365 { 1366 nvme_op_sq_leave_locked(sc, q, ccb); 1367 1368 mutex_exit(&q->q_sq_mtx); 1369 } 1370 1371 static void 1372 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1373 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1374 { 1375 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1376 uint32_t tail; 1377 1378 tail = sc->sc_ops->op_sq_enter(sc, q, ccb); 1379 1380 sqe += tail; 1381 1382 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1383 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1384 memset(sqe, 0, sizeof(*sqe)); 1385 (*fill)(q, ccb, sqe); 1386 htolem16(&sqe->cid, ccb->ccb_id); 1387 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1388 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1389 1390 sc->sc_ops->op_sq_leave(sc, q, ccb); 1391 } 1392 1393 struct nvme_poll_state { 1394 struct nvme_sqe s; 1395 struct nvme_cqe c; 1396 void *cookie; 1397 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1398 }; 1399 1400 static int 1401 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1402 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1403 { 1404 struct nvme_poll_state state; 1405 uint16_t flags; 1406 int step = 10; 1407 int maxloop = timo_sec * 1000000 / step; 1408 int error = 0; 1409 1410 memset(&state, 0, sizeof(state)); 1411 (*fill)(q, ccb, &state.s); 1412 1413 state.done = ccb->ccb_done; 1414 state.cookie = ccb->ccb_cookie; 1415 1416 ccb->ccb_done = nvme_poll_done; 1417 ccb->ccb_cookie = &state; 1418 1419 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1420 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1421 if (nvme_q_complete(sc, q) == 0) 1422 delay(step); 1423 1424 if (timo_sec >= 0 && --maxloop <= 0) { 1425 error = ETIMEDOUT; 1426 break; 1427 } 1428 } 1429 1430 if (error == 0) { 1431 flags = lemtoh16(&state.c.flags); 1432 return flags & ~NVME_CQE_PHASE; 1433 } else { 1434 /* 1435 * If it succeds later, it would hit ccb which will have been 1436 * already reused for something else. Not good. Cross 1437 * fingers and hope for best. XXX do controller reset? 1438 */ 1439 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1440 1441 /* Invoke the callback to clean state anyway */ 1442 struct nvme_cqe cqe; 1443 memset(&cqe, 0, sizeof(cqe)); 1444 ccb->ccb_done(q, ccb, &cqe); 1445 1446 return 1; 1447 } 1448 } 1449 1450 static void 1451 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1452 { 1453 struct nvme_sqe *sqe = slot; 1454 struct nvme_poll_state *state = ccb->ccb_cookie; 1455 1456 *sqe = state->s; 1457 } 1458 1459 static void 1460 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1461 struct nvme_cqe *cqe) 1462 { 1463 struct nvme_poll_state *state = ccb->ccb_cookie; 1464 1465 state->c = *cqe; 1466 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1467 1468 ccb->ccb_cookie = state->cookie; 1469 state->done(q, ccb, &state->c); 1470 } 1471 1472 static void 1473 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1474 { 1475 struct nvme_sqe *src = ccb->ccb_cookie; 1476 struct nvme_sqe *dst = slot; 1477 1478 *dst = *src; 1479 } 1480 1481 static void 1482 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1483 struct nvme_cqe *cqe) 1484 { 1485 } 1486 1487 void 1488 nvme_op_cq_done(struct nvme_softc *sc, 1489 struct nvme_queue *q, struct nvme_ccb *ccb) 1490 { 1491 /* nop */ 1492 } 1493 1494 static int 1495 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1496 { 1497 struct nvme_ccb *ccb; 1498 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1499 uint16_t flags; 1500 int rv = 0; 1501 1502 mutex_enter(&q->q_cq_mtx); 1503 1504 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1505 for (;;) { 1506 cqe = &ring[q->q_cq_head]; 1507 flags = lemtoh16(&cqe->flags); 1508 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1509 break; 1510 1511 /* 1512 * Make sure we have read the flags _before_ we read 1513 * the cid. Otherwise the CPU might speculatively read 1514 * the cid before the entry has been assigned to our 1515 * phase. 1516 */ 1517 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1518 1519 ccb = &q->q_ccbs[lemtoh16(&cqe->cid)]; 1520 1521 if (++q->q_cq_head >= q->q_entries) { 1522 q->q_cq_head = 0; 1523 q->q_cq_phase ^= NVME_CQE_PHASE; 1524 } 1525 1526 #ifdef DEBUG 1527 /* 1528 * If we get spurious completion notification, something 1529 * is seriously hosed up. Very likely DMA to some random 1530 * memory place happened, so just bail out. 1531 */ 1532 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1533 panic("%s: invalid ccb detected", 1534 device_xname(sc->sc_dev)); 1535 /* NOTREACHED */ 1536 } 1537 #endif 1538 1539 rv++; 1540 1541 sc->sc_ops->op_cq_done(sc, q, ccb); 1542 1543 /* 1544 * Unlock the mutex before calling the ccb_done callback 1545 * and re-lock afterwards. The callback triggers lddone() 1546 * which schedules another i/o, and also calls nvme_ccb_put(). 1547 * Unlock/relock avoids possibility of deadlock. 1548 */ 1549 mutex_exit(&q->q_cq_mtx); 1550 ccb->ccb_done(q, ccb, cqe); 1551 mutex_enter(&q->q_cq_mtx); 1552 } 1553 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1554 1555 if (rv) 1556 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1557 1558 mutex_exit(&q->q_cq_mtx); 1559 1560 return rv; 1561 } 1562 1563 static void 1564 nvme_q_wait_complete(struct nvme_softc *sc, 1565 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1566 { 1567 mutex_enter(&q->q_ccb_mtx); 1568 if (finished(cookie)) 1569 goto out; 1570 1571 for(;;) { 1572 q->q_ccb_waiting = true; 1573 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1574 1575 if (finished(cookie)) 1576 break; 1577 } 1578 1579 out: 1580 mutex_exit(&q->q_ccb_mtx); 1581 } 1582 1583 static int 1584 nvme_identify(struct nvme_softc *sc, u_int mps) 1585 { 1586 char sn[41], mn[81], fr[17]; 1587 struct nvm_identify_controller *identify; 1588 struct nvme_dmamem *mem; 1589 struct nvme_ccb *ccb; 1590 u_int mdts; 1591 int rv = 1; 1592 1593 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1594 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1595 1596 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1597 if (mem == NULL) 1598 return 1; 1599 1600 ccb->ccb_done = nvme_empty_done; 1601 ccb->ccb_cookie = mem; 1602 1603 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1604 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1605 NVME_TIMO_IDENT); 1606 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1607 1608 nvme_ccb_put(sc->sc_admin_q, ccb); 1609 1610 if (rv != 0) 1611 goto done; 1612 1613 identify = NVME_DMA_KVA(mem); 1614 sc->sc_identify = *identify; 1615 identify = NULL; 1616 1617 /* Convert data to host endian */ 1618 nvme_identify_controller_swapbytes(&sc->sc_identify); 1619 1620 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1621 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1622 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1623 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1624 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1625 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1626 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1627 sn); 1628 1629 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname)); 1630 1631 if (sc->sc_identify.mdts > 0) { 1632 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1633 if (mdts < sc->sc_mdts) 1634 sc->sc_mdts = mdts; 1635 } 1636 1637 sc->sc_nn = sc->sc_identify.nn; 1638 1639 done: 1640 nvme_dmamem_free(sc, mem); 1641 1642 return rv; 1643 } 1644 1645 static int 1646 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1647 { 1648 struct nvme_sqe_q sqe; 1649 struct nvme_ccb *ccb; 1650 int rv; 1651 1652 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1653 return 1; 1654 1655 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1656 KASSERT(ccb != NULL); 1657 1658 ccb->ccb_done = nvme_empty_done; 1659 ccb->ccb_cookie = &sqe; 1660 1661 memset(&sqe, 0, sizeof(sqe)); 1662 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1663 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1664 htolem16(&sqe.qsize, q->q_entries - 1); 1665 htolem16(&sqe.qid, q->q_id); 1666 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1667 if (sc->sc_use_mq) 1668 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1669 1670 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1671 if (rv != 0) 1672 goto fail; 1673 1674 ccb->ccb_done = nvme_empty_done; 1675 ccb->ccb_cookie = &sqe; 1676 1677 memset(&sqe, 0, sizeof(sqe)); 1678 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1679 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1680 htolem16(&sqe.qsize, q->q_entries - 1); 1681 htolem16(&sqe.qid, q->q_id); 1682 htolem16(&sqe.cqid, q->q_id); 1683 sqe.qflags = NVM_SQE_Q_PC; 1684 1685 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1686 if (rv != 0) 1687 goto fail; 1688 1689 nvme_ccb_put(sc->sc_admin_q, ccb); 1690 return 0; 1691 1692 fail: 1693 if (sc->sc_use_mq) 1694 sc->sc_intr_disestablish(sc, q->q_id); 1695 1696 nvme_ccb_put(sc->sc_admin_q, ccb); 1697 return rv; 1698 } 1699 1700 static int 1701 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1702 { 1703 struct nvme_sqe_q sqe; 1704 struct nvme_ccb *ccb; 1705 int rv; 1706 1707 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1708 KASSERT(ccb != NULL); 1709 1710 ccb->ccb_done = nvme_empty_done; 1711 ccb->ccb_cookie = &sqe; 1712 1713 memset(&sqe, 0, sizeof(sqe)); 1714 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1715 htolem16(&sqe.qid, q->q_id); 1716 1717 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1718 if (rv != 0) 1719 goto fail; 1720 1721 ccb->ccb_done = nvme_empty_done; 1722 ccb->ccb_cookie = &sqe; 1723 1724 memset(&sqe, 0, sizeof(sqe)); 1725 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1726 htolem16(&sqe.qid, q->q_id); 1727 1728 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1729 if (rv != 0) 1730 goto fail; 1731 1732 fail: 1733 nvme_ccb_put(sc->sc_admin_q, ccb); 1734 1735 if (rv == 0 && sc->sc_use_mq) { 1736 if (sc->sc_intr_disestablish(sc, q->q_id)) 1737 rv = 1; 1738 } 1739 1740 return rv; 1741 } 1742 1743 static void 1744 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1745 { 1746 struct nvme_sqe *sqe = slot; 1747 struct nvme_dmamem *mem = ccb->ccb_cookie; 1748 1749 sqe->opcode = NVM_ADMIN_IDENTIFY; 1750 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1751 htolem32(&sqe->cdw10, 1); 1752 } 1753 1754 static int 1755 nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa, 1756 u_int *nsqa) 1757 { 1758 struct nvme_pt_state state; 1759 struct nvme_pt_command pt; 1760 struct nvme_ccb *ccb; 1761 int rv; 1762 1763 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1764 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1765 1766 memset(&pt, 0, sizeof(pt)); 1767 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES; 1768 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1769 pt.cmd.cdw11 = ((nq - 1) << 16) | (nq - 1); 1770 1771 memset(&state, 0, sizeof(state)); 1772 state.pt = &pt; 1773 state.finished = false; 1774 1775 ccb->ccb_done = nvme_pt_done; 1776 ccb->ccb_cookie = &state; 1777 1778 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1779 1780 if (rv != 0) { 1781 *ncqa = *nsqa = 0; 1782 return EIO; 1783 } 1784 1785 *ncqa = (pt.cpl.cdw0 >> 16) + 1; 1786 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1; 1787 1788 return 0; 1789 } 1790 1791 static int 1792 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1793 { 1794 struct nvme_softc *sc = q->q_sc; 1795 struct nvme_ccb *ccb; 1796 bus_addr_t off; 1797 uint64_t *prpl; 1798 u_int i; 1799 1800 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1801 cv_init(&q->q_ccb_wait, "nvmeqw"); 1802 q->q_ccb_waiting = false; 1803 SIMPLEQ_INIT(&q->q_ccb_list); 1804 1805 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1806 1807 q->q_nccbs = nccbs; 1808 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1809 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1810 1811 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1812 off = 0; 1813 1814 for (i = 0; i < nccbs; i++) { 1815 ccb = &q->q_ccbs[i]; 1816 1817 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1818 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1819 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1820 &ccb->ccb_dmamap) != 0) 1821 goto free_maps; 1822 1823 ccb->ccb_id = i; 1824 ccb->ccb_prpl = prpl; 1825 ccb->ccb_prpl_off = off; 1826 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1827 1828 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1829 1830 prpl += sc->sc_max_sgl; 1831 off += sizeof(*prpl) * sc->sc_max_sgl; 1832 } 1833 1834 return 0; 1835 1836 free_maps: 1837 nvme_ccbs_free(q); 1838 return 1; 1839 } 1840 1841 static struct nvme_ccb * 1842 nvme_ccb_get(struct nvme_queue *q, bool wait) 1843 { 1844 struct nvme_ccb *ccb = NULL; 1845 1846 mutex_enter(&q->q_ccb_mtx); 1847 again: 1848 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1849 if (ccb != NULL) { 1850 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1851 #ifdef DEBUG 1852 ccb->ccb_cookie = NULL; 1853 #endif 1854 } else { 1855 if (__predict_false(wait)) { 1856 q->q_ccb_waiting = true; 1857 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1858 goto again; 1859 } 1860 } 1861 mutex_exit(&q->q_ccb_mtx); 1862 1863 return ccb; 1864 } 1865 1866 static struct nvme_ccb * 1867 nvme_ccb_get_bio(struct nvme_softc *sc, struct buf *bp, 1868 struct nvme_queue **selq) 1869 { 1870 u_int cpuindex = cpu_index(bp->b_ci ? bp->b_ci : curcpu()); 1871 1872 /* 1873 * Find a queue with available ccbs, preferring the originating 1874 * CPU's queue. 1875 */ 1876 1877 for (u_int qoff = 0; qoff < sc->sc_nq; qoff++) { 1878 struct nvme_queue *q = sc->sc_q[(cpuindex + qoff) % sc->sc_nq]; 1879 struct nvme_ccb *ccb; 1880 1881 mutex_enter(&q->q_ccb_mtx); 1882 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1883 if (ccb != NULL) { 1884 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1885 #ifdef DEBUG 1886 ccb->ccb_cookie = NULL; 1887 #endif 1888 } 1889 mutex_exit(&q->q_ccb_mtx); 1890 1891 if (ccb != NULL) { 1892 *selq = q; 1893 return ccb; 1894 } 1895 } 1896 1897 return NULL; 1898 } 1899 1900 static void 1901 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1902 { 1903 1904 mutex_enter(&q->q_ccb_mtx); 1905 #ifdef DEBUG 1906 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1907 #endif 1908 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1909 1910 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1911 if (__predict_false(q->q_ccb_waiting)) { 1912 q->q_ccb_waiting = false; 1913 cv_broadcast(&q->q_ccb_wait); 1914 } 1915 1916 mutex_exit(&q->q_ccb_mtx); 1917 } 1918 1919 static void 1920 nvme_ccbs_free(struct nvme_queue *q) 1921 { 1922 struct nvme_softc *sc = q->q_sc; 1923 struct nvme_ccb *ccb; 1924 1925 mutex_enter(&q->q_ccb_mtx); 1926 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1927 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1928 /* 1929 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter() 1930 * internally. don't hold spin mutex 1931 */ 1932 mutex_exit(&q->q_ccb_mtx); 1933 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1934 mutex_enter(&q->q_ccb_mtx); 1935 } 1936 mutex_exit(&q->q_ccb_mtx); 1937 1938 nvme_dmamem_free(sc, q->q_ccb_prpls); 1939 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1940 q->q_ccbs = NULL; 1941 cv_destroy(&q->q_ccb_wait); 1942 mutex_destroy(&q->q_ccb_mtx); 1943 } 1944 1945 static struct nvme_queue * 1946 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1947 { 1948 struct nvme_queue *q; 1949 1950 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1951 q->q_sc = sc; 1952 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1953 sizeof(struct nvme_sqe) * entries); 1954 if (q->q_sq_dmamem == NULL) 1955 goto free; 1956 1957 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1958 sizeof(struct nvme_cqe) * entries); 1959 if (q->q_cq_dmamem == NULL) 1960 goto free_sq; 1961 1962 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1963 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1964 1965 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1966 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1967 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1968 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1969 q->q_id = id; 1970 q->q_entries = entries; 1971 q->q_sq_tail = 0; 1972 q->q_cq_head = 0; 1973 q->q_cq_phase = NVME_CQE_PHASE; 1974 1975 if (sc->sc_ops->op_q_alloc != NULL) { 1976 if (sc->sc_ops->op_q_alloc(sc, q) != 0) 1977 goto free_cq; 1978 } 1979 1980 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1981 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1982 1983 /* 1984 * Due to definition of full and empty queue (queue is empty 1985 * when head == tail, full when tail is one less then head), 1986 * we can actually only have (entries - 1) in-flight commands. 1987 */ 1988 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1989 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1990 goto free_cq; 1991 } 1992 1993 return q; 1994 1995 free_cq: 1996 nvme_dmamem_free(sc, q->q_cq_dmamem); 1997 free_sq: 1998 nvme_dmamem_free(sc, q->q_sq_dmamem); 1999 free: 2000 kmem_free(q, sizeof(*q)); 2001 2002 return NULL; 2003 } 2004 2005 static void 2006 nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q) 2007 { 2008 2009 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 2010 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 2011 2012 q->q_sq_tail = 0; 2013 q->q_cq_head = 0; 2014 q->q_cq_phase = NVME_CQE_PHASE; 2015 2016 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 2017 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 2018 } 2019 2020 static void 2021 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 2022 { 2023 nvme_ccbs_free(q); 2024 mutex_destroy(&q->q_sq_mtx); 2025 mutex_destroy(&q->q_cq_mtx); 2026 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 2027 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 2028 2029 if (sc->sc_ops->op_q_alloc != NULL) 2030 sc->sc_ops->op_q_free(sc, q); 2031 2032 nvme_dmamem_free(sc, q->q_cq_dmamem); 2033 nvme_dmamem_free(sc, q->q_sq_dmamem); 2034 kmem_free(q, sizeof(*q)); 2035 } 2036 2037 int 2038 nvme_intr(void *xsc) 2039 { 2040 struct nvme_softc *sc = xsc; 2041 2042 /* 2043 * INTx is level triggered, controller deasserts the interrupt only 2044 * when we advance command queue head via write to the doorbell. 2045 * Tell the controller to block the interrupts while we process 2046 * the queue(s). 2047 */ 2048 nvme_write4(sc, NVME_INTMS, 1); 2049 2050 softint_schedule(sc->sc_softih[0]); 2051 2052 /* don't know, might not have been for us */ 2053 return 1; 2054 } 2055 2056 void 2057 nvme_softintr_intx(void *xq) 2058 { 2059 struct nvme_queue *q = xq; 2060 struct nvme_softc *sc = q->q_sc; 2061 2062 nvme_q_complete(sc, sc->sc_admin_q); 2063 if (sc->sc_q != NULL) 2064 nvme_q_complete(sc, sc->sc_q[0]); 2065 2066 /* 2067 * Processing done, tell controller to issue interrupts again. There 2068 * is no race, as NVMe spec requires the controller to maintain state, 2069 * and assert the interrupt whenever there are unacknowledged 2070 * completion queue entries. 2071 */ 2072 nvme_write4(sc, NVME_INTMC, 1); 2073 } 2074 2075 int 2076 nvme_intr_msi(void *xq) 2077 { 2078 struct nvme_queue *q = xq; 2079 2080 KASSERT(q); 2081 KASSERT(q->q_sc); 2082 KASSERT(q->q_sc->sc_softih); 2083 KASSERT(q->q_sc->sc_softih[q->q_id]); 2084 2085 /* 2086 * MSI/MSI-X are edge triggered, so can handover processing to softint 2087 * without masking the interrupt. 2088 */ 2089 softint_schedule(q->q_sc->sc_softih[q->q_id]); 2090 2091 return 1; 2092 } 2093 2094 void 2095 nvme_softintr_msi(void *xq) 2096 { 2097 struct nvme_queue *q = xq; 2098 struct nvme_softc *sc = q->q_sc; 2099 2100 nvme_q_complete(sc, q); 2101 } 2102 2103 struct nvme_dmamem * 2104 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 2105 { 2106 struct nvme_dmamem *ndm; 2107 int nsegs; 2108 2109 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 2110 if (ndm == NULL) 2111 return NULL; 2112 2113 ndm->ndm_size = size; 2114 2115 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0, 2116 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 2117 goto ndmfree; 2118 2119 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 2120 1, &nsegs, BUS_DMA_WAITOK) != 0) 2121 goto destroy; 2122 2123 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 2124 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 2125 goto free; 2126 2127 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 2128 NULL, BUS_DMA_WAITOK) != 0) 2129 goto unmap; 2130 2131 memset(ndm->ndm_kva, 0, size); 2132 bus_dmamap_sync(sc->sc_dmat, ndm->ndm_map, 0, size, BUS_DMASYNC_PREREAD); 2133 2134 return ndm; 2135 2136 unmap: 2137 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 2138 free: 2139 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2140 destroy: 2141 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2142 ndmfree: 2143 kmem_free(ndm, sizeof(*ndm)); 2144 return NULL; 2145 } 2146 2147 void 2148 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 2149 { 2150 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 2151 0, NVME_DMA_LEN(mem), ops); 2152 } 2153 2154 void 2155 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 2156 { 2157 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 2158 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 2159 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2160 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2161 kmem_free(ndm, sizeof(*ndm)); 2162 } 2163 2164 /* 2165 * ioctl 2166 */ 2167 2168 dev_type_open(nvmeopen); 2169 dev_type_close(nvmeclose); 2170 dev_type_ioctl(nvmeioctl); 2171 2172 const struct cdevsw nvme_cdevsw = { 2173 .d_open = nvmeopen, 2174 .d_close = nvmeclose, 2175 .d_read = noread, 2176 .d_write = nowrite, 2177 .d_ioctl = nvmeioctl, 2178 .d_stop = nostop, 2179 .d_tty = notty, 2180 .d_poll = nopoll, 2181 .d_mmap = nommap, 2182 .d_kqfilter = nokqfilter, 2183 .d_discard = nodiscard, 2184 .d_flag = D_OTHER, 2185 }; 2186 2187 /* 2188 * Accept an open operation on the control device. 2189 */ 2190 int 2191 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 2192 { 2193 struct nvme_softc *sc; 2194 int unit = minor(dev) / 0x10000; 2195 int nsid = minor(dev) & 0xffff; 2196 int nsidx; 2197 2198 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 2199 return ENXIO; 2200 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 2201 return ENXIO; 2202 2203 if (nsid == 0) { 2204 /* controller */ 2205 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 2206 return EBUSY; 2207 SET(sc->sc_flags, NVME_F_OPEN); 2208 } else { 2209 /* namespace */ 2210 nsidx = nsid - 1; 2211 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 2212 return ENXIO; 2213 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 2214 return EBUSY; 2215 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2216 } 2217 return 0; 2218 } 2219 2220 /* 2221 * Accept the last close on the control device. 2222 */ 2223 int 2224 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 2225 { 2226 struct nvme_softc *sc; 2227 int unit = minor(dev) / 0x10000; 2228 int nsid = minor(dev) & 0xffff; 2229 int nsidx; 2230 2231 sc = device_lookup_private(&nvme_cd, unit); 2232 if (sc == NULL) 2233 return ENXIO; 2234 2235 if (nsid == 0) { 2236 /* controller */ 2237 CLR(sc->sc_flags, NVME_F_OPEN); 2238 } else { 2239 /* namespace */ 2240 nsidx = nsid - 1; 2241 if (nsidx >= sc->sc_nn) 2242 return ENXIO; 2243 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2244 } 2245 2246 return 0; 2247 } 2248 2249 /* 2250 * Handle control operations. 2251 */ 2252 int 2253 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 2254 { 2255 struct nvme_softc *sc; 2256 int unit = minor(dev) / 0x10000; 2257 int nsid = minor(dev) & 0xffff; 2258 struct nvme_pt_command *pt; 2259 2260 sc = device_lookup_private(&nvme_cd, unit); 2261 if (sc == NULL) 2262 return ENXIO; 2263 2264 switch (cmd) { 2265 case NVME_PASSTHROUGH_CMD: 2266 pt = data; 2267 return nvme_command_passthrough(sc, data, 2268 nsid == 0 ? pt->cmd.nsid : (uint32_t)nsid, l, nsid == 0); 2269 } 2270 2271 return ENOTTY; 2272 } 2273