1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2016 Tegile Systems, Inc. All rights reserved. 15 * Copyright (c) 2016 The MathWorks, Inc. All rights reserved. 16 */ 17 18 /* 19 * blkdev driver for NVMe compliant storage devices 20 * 21 * This driver was written to conform to version 1.0e of the NVMe specification. 22 * It may work with newer versions, but that is completely untested and disabled 23 * by default. 24 * 25 * The driver has only been tested on x86 systems and will not work on big- 26 * endian systems without changes to the code accessing registers and data 27 * structures used by the hardware. 28 * 29 * 30 * Interrupt Usage: 31 * 32 * The driver will use a FIXED interrupt while configuring the device as the 33 * specification requires. Later in the attach process it will switch to MSI-X 34 * or MSI if supported. The driver wants to have one interrupt vector per CPU, 35 * but it will work correctly if less are available. Interrupts can be shared 36 * by queues, the interrupt handler will iterate through the I/O queue array by 37 * steps of n_intr_cnt. Usually only the admin queue will share an interrupt 38 * with one I/O queue. The interrupt handler will retrieve completed commands 39 * from all queues sharing an interrupt vector and will post them to a taskq 40 * for completion processing. 41 * 42 * 43 * Command Processing: 44 * 45 * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up 46 * to 65536 I/O commands. The driver will configure one I/O queue pair per 47 * available interrupt vector, with the queue length usually much smaller than 48 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer 49 * interrupt vectors will be used. 50 * 51 * Additionally the hardware provides a single special admin queue pair that can 52 * hold up to 4096 admin commands. 53 * 54 * From the hardware perspective both queues of a queue pair are independent, 55 * but they share some driver state: the command array (holding pointers to 56 * commands currently being processed by the hardware) and the active command 57 * counter. Access to the submission side of a queue pair and the shared state 58 * is protected by nq_mutex. The completion side of a queue pair does not need 59 * that protection apart from its access to the shared state; it is called only 60 * in the interrupt handler which does not run concurrently for the same 61 * interrupt vector. 62 * 63 * When a command is submitted to a queue pair the active command counter is 64 * incremented and a pointer to the command is stored in the command array. The 65 * array index is used as command identifier (CID) in the submission queue 66 * entry. Some commands may take a very long time to complete, and if the queue 67 * wraps around in that time a submission may find the next array slot to still 68 * be used by a long-running command. In this case the array is sequentially 69 * searched for the next free slot. The length of the command array is the same 70 * as the configured queue length. 71 * 72 * 73 * Namespace Support: 74 * 75 * NVMe devices can have multiple namespaces, each being a independent data 76 * store. The driver supports multiple namespaces and creates a blkdev interface 77 * for each namespace found. Namespaces can have various attributes to support 78 * thin provisioning and protection information. This driver does not support 79 * any of this and ignores namespaces that have these attributes. 80 * 81 * 82 * Blkdev Interface: 83 * 84 * This driver uses blkdev to do all the heavy lifting involved with presenting 85 * a disk device to the system. As a result, the processing of I/O requests is 86 * relatively simple as blkdev takes care of partitioning, boundary checks, DMA 87 * setup, and splitting of transfers into manageable chunks. 88 * 89 * I/O requests coming in from blkdev are turned into NVM commands and posted to 90 * an I/O queue. The queue is selected by taking the CPU id modulo the number of 91 * queues. There is currently no timeout handling of I/O commands. 92 * 93 * Blkdev also supports querying device/media information and generating a 94 * devid. The driver reports the best block size as determined by the namespace 95 * format back to blkdev as physical block size to support partition and block 96 * alignment. The devid is composed using the device vendor ID, model number, 97 * serial number, and the namespace ID. 98 * 99 * 100 * Error Handling: 101 * 102 * Error handling is currently limited to detecting fatal hardware errors, 103 * either by asynchronous events, or synchronously through command status or 104 * admin command timeouts. In case of severe errors the device is fenced off, 105 * all further requests will return EIO. FMA is then called to fault the device. 106 * 107 * The hardware has a limit for outstanding asynchronous event requests. Before 108 * this limit is known the driver assumes it is at least 1 and posts a single 109 * asynchronous request. Later when the limit is known more asynchronous event 110 * requests are posted to allow quicker reception of error information. When an 111 * asynchronous event is posted by the hardware the driver will parse the error 112 * status fields and log information or fault the device, depending on the 113 * severity of the asynchronous event. The asynchronous event request is then 114 * reused and posted to the admin queue again. 115 * 116 * On command completion the command status is checked for errors. In case of 117 * errors indicating a driver bug the driver panics. Almost all other error 118 * status values just cause EIO to be returned. 119 * 120 * Command timeouts are currently detected for all admin commands except 121 * asynchronous event requests. If a command times out and the hardware appears 122 * to be healthy the driver attempts to abort the command. If this fails the 123 * driver assumes the device to be dead, fences it off, and calls FMA to retire 124 * it. In general admin commands are issued at attach time only. No timeout 125 * handling of normal I/O commands is presently done. 126 * 127 * In some cases it may be possible that the ABORT command times out, too. In 128 * that case the device is also declared dead and fenced off. 129 * 130 * 131 * Quiesce / Fast Reboot: 132 * 133 * The driver currently does not support fast reboot. A quiesce(9E) entry point 134 * is still provided which is used to send a shutdown notification to the 135 * device. 136 * 137 * 138 * Driver Configuration: 139 * 140 * The following driver properties can be changed to control some aspects of the 141 * drivers operation: 142 * - strict-version: can be set to 0 to allow devices conforming to newer 143 * versions to be used 144 * - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor 145 * specific command status as a fatal error leading device faulting 146 * - admin-queue-len: the maximum length of the admin queue (16-4096) 147 * - io-queue-len: the maximum length of the I/O queues (16-65536) 148 * - async-event-limit: the maximum number of asynchronous event requests to be 149 * posted by the driver 150 * 151 * 152 * TODO: 153 * - figure out sane default for I/O queue depth reported to blkdev 154 * - polled I/O support to support kernel core dumping 155 * - FMA handling of media errors 156 * - support for the Volatile Write Cache 157 * - support for devices supporting very large I/O requests using chained PRPs 158 * - support for querying log pages from user space 159 * - support for configuring hardware parameters like interrupt coalescing 160 * - support for media formatting and hard partitioning into namespaces 161 * - support for big-endian systems 162 * - support for fast reboot 163 */ 164 165 #include <sys/byteorder.h> 166 #ifdef _BIG_ENDIAN 167 #error nvme driver needs porting for big-endian platforms 168 #endif 169 170 #include <sys/modctl.h> 171 #include <sys/conf.h> 172 #include <sys/devops.h> 173 #include <sys/ddi.h> 174 #include <sys/sunddi.h> 175 #include <sys/bitmap.h> 176 #include <sys/sysmacros.h> 177 #include <sys/param.h> 178 #include <sys/varargs.h> 179 #include <sys/cpuvar.h> 180 #include <sys/disp.h> 181 #include <sys/blkdev.h> 182 #include <sys/atomic.h> 183 #include <sys/archsystm.h> 184 #include <sys/sata/sata_hba.h> 185 186 #include "nvme_reg.h" 187 #include "nvme_var.h" 188 189 190 /* NVMe spec version supported */ 191 static const int nvme_version_major = 1; 192 static const int nvme_version_minor = 0; 193 194 /* tunable for admin command timeout in seconds, default is 1s */ 195 static volatile int nvme_admin_cmd_timeout = 1; 196 197 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t); 198 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t); 199 static int nvme_quiesce(dev_info_t *); 200 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *); 201 static int nvme_setup_interrupts(nvme_t *, int, int); 202 static void nvme_release_interrupts(nvme_t *); 203 static uint_t nvme_intr(caddr_t, caddr_t); 204 205 static void nvme_shutdown(nvme_t *, int, boolean_t); 206 static boolean_t nvme_reset(nvme_t *, boolean_t); 207 static int nvme_init(nvme_t *); 208 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int); 209 static void nvme_free_cmd(nvme_cmd_t *); 210 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t, 211 bd_xfer_t *); 212 static int nvme_admin_cmd(nvme_cmd_t *, int); 213 static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *); 214 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *); 215 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t); 216 static void nvme_wakeup_cmd(void *); 217 static void nvme_async_event_task(void *); 218 219 static int nvme_check_unknown_cmd_status(nvme_cmd_t *); 220 static int nvme_check_vendor_cmd_status(nvme_cmd_t *); 221 static int nvme_check_integrity_cmd_status(nvme_cmd_t *); 222 static int nvme_check_specific_cmd_status(nvme_cmd_t *); 223 static int nvme_check_generic_cmd_status(nvme_cmd_t *); 224 static inline int nvme_check_cmd_status(nvme_cmd_t *); 225 226 static void nvme_abort_cmd(nvme_cmd_t *); 227 static int nvme_async_event(nvme_t *); 228 static void *nvme_get_logpage(nvme_t *, uint8_t, ...); 229 static void *nvme_identify(nvme_t *, uint32_t); 230 static int nvme_set_nqueues(nvme_t *, uint16_t); 231 232 static void nvme_free_dma(nvme_dma_t *); 233 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *, 234 nvme_dma_t **); 235 static int nvme_zalloc_queue_dma(nvme_t *, uint32_t, uint16_t, uint_t, 236 nvme_dma_t **); 237 static void nvme_free_qpair(nvme_qpair_t *); 238 static int nvme_alloc_qpair(nvme_t *, uint32_t, nvme_qpair_t **, int); 239 static int nvme_create_io_qpair(nvme_t *, nvme_qpair_t *, uint16_t); 240 241 static inline void nvme_put64(nvme_t *, uintptr_t, uint64_t); 242 static inline void nvme_put32(nvme_t *, uintptr_t, uint32_t); 243 static inline uint64_t nvme_get64(nvme_t *, uintptr_t); 244 static inline uint32_t nvme_get32(nvme_t *, uintptr_t); 245 246 static boolean_t nvme_check_regs_hdl(nvme_t *); 247 static boolean_t nvme_check_dma_hdl(nvme_dma_t *); 248 249 static int nvme_fill_prp(nvme_cmd_t *, bd_xfer_t *); 250 251 static void nvme_bd_xfer_done(void *); 252 static void nvme_bd_driveinfo(void *, bd_drive_t *); 253 static int nvme_bd_mediainfo(void *, bd_media_t *); 254 static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t); 255 static int nvme_bd_read(void *, bd_xfer_t *); 256 static int nvme_bd_write(void *, bd_xfer_t *); 257 static int nvme_bd_sync(void *, bd_xfer_t *); 258 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *); 259 260 static int nvme_prp_dma_constructor(void *, void *, int); 261 static void nvme_prp_dma_destructor(void *, void *); 262 263 static void nvme_prepare_devid(nvme_t *, uint32_t); 264 265 static void *nvme_state; 266 static kmem_cache_t *nvme_cmd_cache; 267 268 /* 269 * DMA attributes for queue DMA memory 270 * 271 * Queue DMA memory must be page aligned. The maximum length of a queue is 272 * 65536 entries, and an entry can be 64 bytes long. 273 */ 274 static ddi_dma_attr_t nvme_queue_dma_attr = { 275 .dma_attr_version = DMA_ATTR_V0, 276 .dma_attr_addr_lo = 0, 277 .dma_attr_addr_hi = 0xffffffffffffffffULL, 278 .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1, 279 .dma_attr_align = 0x1000, 280 .dma_attr_burstsizes = 0x7ff, 281 .dma_attr_minxfer = 0x1000, 282 .dma_attr_maxxfer = (UINT16_MAX + 1) * sizeof (nvme_sqe_t), 283 .dma_attr_seg = 0xffffffffffffffffULL, 284 .dma_attr_sgllen = 1, 285 .dma_attr_granular = 1, 286 .dma_attr_flags = 0, 287 }; 288 289 /* 290 * DMA attributes for transfers using Physical Region Page (PRP) entries 291 * 292 * A PRP entry describes one page of DMA memory using the page size specified 293 * in the controller configuration's memory page size register (CC.MPS). It uses 294 * a 64bit base address aligned to this page size. There is no limitation on 295 * chaining PRPs together for arbitrarily large DMA transfers. 296 */ 297 static ddi_dma_attr_t nvme_prp_dma_attr = { 298 .dma_attr_version = DMA_ATTR_V0, 299 .dma_attr_addr_lo = 0, 300 .dma_attr_addr_hi = 0xffffffffffffffffULL, 301 .dma_attr_count_max = 0xfff, 302 .dma_attr_align = 0x1000, 303 .dma_attr_burstsizes = 0x7ff, 304 .dma_attr_minxfer = 0x1000, 305 .dma_attr_maxxfer = 0x1000, 306 .dma_attr_seg = 0xfff, 307 .dma_attr_sgllen = -1, 308 .dma_attr_granular = 1, 309 .dma_attr_flags = 0, 310 }; 311 312 /* 313 * DMA attributes for transfers using scatter/gather lists 314 * 315 * A SGL entry describes a chunk of DMA memory using a 64bit base address and a 316 * 32bit length field. SGL Segment and SGL Last Segment entries require the 317 * length to be a multiple of 16 bytes. 318 */ 319 static ddi_dma_attr_t nvme_sgl_dma_attr = { 320 .dma_attr_version = DMA_ATTR_V0, 321 .dma_attr_addr_lo = 0, 322 .dma_attr_addr_hi = 0xffffffffffffffffULL, 323 .dma_attr_count_max = 0xffffffffUL, 324 .dma_attr_align = 1, 325 .dma_attr_burstsizes = 0x7ff, 326 .dma_attr_minxfer = 0x10, 327 .dma_attr_maxxfer = 0xfffffffffULL, 328 .dma_attr_seg = 0xffffffffffffffffULL, 329 .dma_attr_sgllen = -1, 330 .dma_attr_granular = 0x10, 331 .dma_attr_flags = 0 332 }; 333 334 static ddi_device_acc_attr_t nvme_reg_acc_attr = { 335 .devacc_attr_version = DDI_DEVICE_ATTR_V0, 336 .devacc_attr_endian_flags = DDI_STRUCTURE_LE_ACC, 337 .devacc_attr_dataorder = DDI_STRICTORDER_ACC 338 }; 339 340 static struct dev_ops nvme_dev_ops = { 341 .devo_rev = DEVO_REV, 342 .devo_refcnt = 0, 343 .devo_getinfo = ddi_no_info, 344 .devo_identify = nulldev, 345 .devo_probe = nulldev, 346 .devo_attach = nvme_attach, 347 .devo_detach = nvme_detach, 348 .devo_reset = nodev, 349 .devo_cb_ops = NULL, 350 .devo_bus_ops = NULL, 351 .devo_power = NULL, 352 .devo_quiesce = nvme_quiesce, 353 }; 354 355 static struct modldrv nvme_modldrv = { 356 .drv_modops = &mod_driverops, 357 .drv_linkinfo = "NVMe v1.0e", 358 .drv_dev_ops = &nvme_dev_ops 359 }; 360 361 static struct modlinkage nvme_modlinkage = { 362 .ml_rev = MODREV_1, 363 .ml_linkage = { &nvme_modldrv, NULL } 364 }; 365 366 static bd_ops_t nvme_bd_ops = { 367 .o_version = BD_OPS_VERSION_0, 368 .o_drive_info = nvme_bd_driveinfo, 369 .o_media_info = nvme_bd_mediainfo, 370 .o_devid_init = nvme_bd_devid, 371 .o_sync_cache = nvme_bd_sync, 372 .o_read = nvme_bd_read, 373 .o_write = nvme_bd_write, 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 error = ddi_soft_state_init(&nvme_state, sizeof (nvme_t), 1); 382 if (error != DDI_SUCCESS) 383 return (error); 384 385 nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache", 386 sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0); 387 388 bd_mod_init(&nvme_dev_ops); 389 390 error = mod_install(&nvme_modlinkage); 391 if (error != DDI_SUCCESS) { 392 ddi_soft_state_fini(&nvme_state); 393 bd_mod_fini(&nvme_dev_ops); 394 } 395 396 return (error); 397 } 398 399 int 400 _fini(void) 401 { 402 int error; 403 404 error = mod_remove(&nvme_modlinkage); 405 if (error == DDI_SUCCESS) { 406 ddi_soft_state_fini(&nvme_state); 407 kmem_cache_destroy(nvme_cmd_cache); 408 bd_mod_fini(&nvme_dev_ops); 409 } 410 411 return (error); 412 } 413 414 int 415 _info(struct modinfo *modinfop) 416 { 417 return (mod_info(&nvme_modlinkage, modinfop)); 418 } 419 420 static inline void 421 nvme_put64(nvme_t *nvme, uintptr_t reg, uint64_t val) 422 { 423 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0); 424 425 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 426 ddi_put64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg), val); 427 } 428 429 static inline void 430 nvme_put32(nvme_t *nvme, uintptr_t reg, uint32_t val) 431 { 432 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0); 433 434 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 435 ddi_put32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg), val); 436 } 437 438 static inline uint64_t 439 nvme_get64(nvme_t *nvme, uintptr_t reg) 440 { 441 uint64_t val; 442 443 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0); 444 445 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 446 val = ddi_get64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg)); 447 448 return (val); 449 } 450 451 static inline uint32_t 452 nvme_get32(nvme_t *nvme, uintptr_t reg) 453 { 454 uint32_t val; 455 456 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0); 457 458 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 459 val = ddi_get32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg)); 460 461 return (val); 462 } 463 464 static boolean_t 465 nvme_check_regs_hdl(nvme_t *nvme) 466 { 467 ddi_fm_error_t error; 468 469 ddi_fm_acc_err_get(nvme->n_regh, &error, DDI_FME_VERSION); 470 471 if (error.fme_status != DDI_FM_OK) 472 return (B_TRUE); 473 474 return (B_FALSE); 475 } 476 477 static boolean_t 478 nvme_check_dma_hdl(nvme_dma_t *dma) 479 { 480 ddi_fm_error_t error; 481 482 if (dma == NULL) 483 return (B_FALSE); 484 485 ddi_fm_dma_err_get(dma->nd_dmah, &error, DDI_FME_VERSION); 486 487 if (error.fme_status != DDI_FM_OK) 488 return (B_TRUE); 489 490 return (B_FALSE); 491 } 492 493 static void 494 nvme_free_dma_common(nvme_dma_t *dma) 495 { 496 if (dma->nd_dmah != NULL) 497 (void) ddi_dma_unbind_handle(dma->nd_dmah); 498 if (dma->nd_acch != NULL) 499 ddi_dma_mem_free(&dma->nd_acch); 500 if (dma->nd_dmah != NULL) 501 ddi_dma_free_handle(&dma->nd_dmah); 502 } 503 504 static void 505 nvme_free_dma(nvme_dma_t *dma) 506 { 507 nvme_free_dma_common(dma); 508 kmem_free(dma, sizeof (*dma)); 509 } 510 511 /* ARGSUSED */ 512 static void 513 nvme_prp_dma_destructor(void *buf, void *private) 514 { 515 nvme_dma_t *dma = (nvme_dma_t *)buf; 516 517 nvme_free_dma_common(dma); 518 } 519 520 static int 521 nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma, 522 size_t len, uint_t flags, ddi_dma_attr_t *dma_attr) 523 { 524 if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL, 525 &dma->nd_dmah) != DDI_SUCCESS) { 526 /* 527 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and 528 * the only other possible error is DDI_DMA_BADATTR which 529 * indicates a driver bug which should cause a panic. 530 */ 531 dev_err(nvme->n_dip, CE_PANIC, 532 "!failed to get DMA handle, check DMA attributes"); 533 return (DDI_FAILURE); 534 } 535 536 /* 537 * ddi_dma_mem_alloc() can only fail when DDI_DMA_NOSLEEP is specified 538 * or the flags are conflicting, which isn't the case here. 539 */ 540 (void) ddi_dma_mem_alloc(dma->nd_dmah, len, &nvme->n_reg_acc_attr, 541 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &dma->nd_memp, 542 &dma->nd_len, &dma->nd_acch); 543 544 if (ddi_dma_addr_bind_handle(dma->nd_dmah, NULL, dma->nd_memp, 545 dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, 546 &dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) { 547 dev_err(nvme->n_dip, CE_WARN, 548 "!failed to bind DMA memory"); 549 atomic_inc_32(&nvme->n_dma_bind_err); 550 nvme_free_dma_common(dma); 551 return (DDI_FAILURE); 552 } 553 554 return (DDI_SUCCESS); 555 } 556 557 static int 558 nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags, 559 ddi_dma_attr_t *dma_attr, nvme_dma_t **ret) 560 { 561 nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP); 562 563 if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) != 564 DDI_SUCCESS) { 565 *ret = NULL; 566 kmem_free(dma, sizeof (nvme_dma_t)); 567 return (DDI_FAILURE); 568 } 569 570 bzero(dma->nd_memp, dma->nd_len); 571 572 *ret = dma; 573 return (DDI_SUCCESS); 574 } 575 576 /* ARGSUSED */ 577 static int 578 nvme_prp_dma_constructor(void *buf, void *private, int flags) 579 { 580 nvme_dma_t *dma = (nvme_dma_t *)buf; 581 nvme_t *nvme = (nvme_t *)private; 582 583 dma->nd_dmah = NULL; 584 dma->nd_acch = NULL; 585 586 if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize, 587 DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) { 588 return (-1); 589 } 590 591 ASSERT(dma->nd_ncookie == 1); 592 593 dma->nd_cached = B_TRUE; 594 595 return (0); 596 } 597 598 static int 599 nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len, 600 uint_t flags, nvme_dma_t **dma) 601 { 602 uint32_t len = nentry * qe_len; 603 ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr; 604 605 len = roundup(len, nvme->n_pagesize); 606 607 q_dma_attr.dma_attr_minxfer = len; 608 609 if (nvme_zalloc_dma(nvme, len, flags, &q_dma_attr, dma) 610 != DDI_SUCCESS) { 611 dev_err(nvme->n_dip, CE_WARN, 612 "!failed to get DMA memory for queue"); 613 goto fail; 614 } 615 616 if ((*dma)->nd_ncookie != 1) { 617 dev_err(nvme->n_dip, CE_WARN, 618 "!got too many cookies for queue DMA"); 619 goto fail; 620 } 621 622 return (DDI_SUCCESS); 623 624 fail: 625 if (*dma) { 626 nvme_free_dma(*dma); 627 *dma = NULL; 628 } 629 630 return (DDI_FAILURE); 631 } 632 633 static void 634 nvme_free_qpair(nvme_qpair_t *qp) 635 { 636 int i; 637 638 mutex_destroy(&qp->nq_mutex); 639 640 if (qp->nq_sqdma != NULL) 641 nvme_free_dma(qp->nq_sqdma); 642 if (qp->nq_cqdma != NULL) 643 nvme_free_dma(qp->nq_cqdma); 644 645 if (qp->nq_active_cmds > 0) 646 for (i = 0; i != qp->nq_nentry; i++) 647 if (qp->nq_cmd[i] != NULL) 648 nvme_free_cmd(qp->nq_cmd[i]); 649 650 if (qp->nq_cmd != NULL) 651 kmem_free(qp->nq_cmd, sizeof (nvme_cmd_t *) * qp->nq_nentry); 652 653 kmem_free(qp, sizeof (nvme_qpair_t)); 654 } 655 656 static int 657 nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp, 658 int idx) 659 { 660 nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP); 661 662 mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER, 663 DDI_INTR_PRI(nvme->n_intr_pri)); 664 665 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t), 666 DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS) 667 goto fail; 668 669 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_cqe_t), 670 DDI_DMA_READ, &qp->nq_cqdma) != DDI_SUCCESS) 671 goto fail; 672 673 qp->nq_sq = (nvme_sqe_t *)qp->nq_sqdma->nd_memp; 674 qp->nq_cq = (nvme_cqe_t *)qp->nq_cqdma->nd_memp; 675 qp->nq_nentry = nentry; 676 677 qp->nq_sqtdbl = NVME_REG_SQTDBL(nvme, idx); 678 qp->nq_cqhdbl = NVME_REG_CQHDBL(nvme, idx); 679 680 qp->nq_cmd = kmem_zalloc(sizeof (nvme_cmd_t *) * nentry, KM_SLEEP); 681 qp->nq_next_cmd = 0; 682 683 *nqp = qp; 684 return (DDI_SUCCESS); 685 686 fail: 687 nvme_free_qpair(qp); 688 *nqp = NULL; 689 690 return (DDI_FAILURE); 691 } 692 693 static nvme_cmd_t * 694 nvme_alloc_cmd(nvme_t *nvme, int kmflag) 695 { 696 nvme_cmd_t *cmd = kmem_cache_alloc(nvme_cmd_cache, kmflag); 697 698 if (cmd == NULL) 699 return (cmd); 700 701 bzero(cmd, sizeof (nvme_cmd_t)); 702 703 cmd->nc_nvme = nvme; 704 705 mutex_init(&cmd->nc_mutex, NULL, MUTEX_DRIVER, 706 DDI_INTR_PRI(nvme->n_intr_pri)); 707 cv_init(&cmd->nc_cv, NULL, CV_DRIVER, NULL); 708 709 return (cmd); 710 } 711 712 static void 713 nvme_free_cmd(nvme_cmd_t *cmd) 714 { 715 if (cmd->nc_dma) { 716 if (cmd->nc_dma->nd_cached) 717 kmem_cache_free(cmd->nc_nvme->n_prp_cache, 718 cmd->nc_dma); 719 else 720 nvme_free_dma(cmd->nc_dma); 721 cmd->nc_dma = NULL; 722 } 723 724 cv_destroy(&cmd->nc_cv); 725 mutex_destroy(&cmd->nc_mutex); 726 727 kmem_cache_free(nvme_cmd_cache, cmd); 728 } 729 730 static int 731 nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) 732 { 733 nvme_reg_sqtdbl_t tail = { 0 }; 734 735 mutex_enter(&qp->nq_mutex); 736 737 if (qp->nq_active_cmds == qp->nq_nentry) { 738 mutex_exit(&qp->nq_mutex); 739 return (DDI_FAILURE); 740 } 741 742 cmd->nc_completed = B_FALSE; 743 744 /* 745 * Try to insert the cmd into the active cmd array at the nq_next_cmd 746 * slot. If the slot is already occupied advance to the next slot and 747 * try again. This can happen for long running commands like async event 748 * requests. 749 */ 750 while (qp->nq_cmd[qp->nq_next_cmd] != NULL) 751 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry; 752 qp->nq_cmd[qp->nq_next_cmd] = cmd; 753 754 qp->nq_active_cmds++; 755 756 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd; 757 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t)); 758 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah, 759 sizeof (nvme_sqe_t) * qp->nq_sqtail, 760 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV); 761 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry; 762 763 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry; 764 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r); 765 766 mutex_exit(&qp->nq_mutex); 767 return (DDI_SUCCESS); 768 } 769 770 static nvme_cmd_t * 771 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp) 772 { 773 nvme_reg_cqhdbl_t head = { 0 }; 774 775 nvme_cqe_t *cqe; 776 nvme_cmd_t *cmd; 777 778 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0, 779 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL); 780 781 cqe = &qp->nq_cq[qp->nq_cqhead]; 782 783 /* Check phase tag of CQE. Hardware inverts it for new entries. */ 784 if (cqe->cqe_sf.sf_p == qp->nq_phase) 785 return (NULL); 786 787 ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp); 788 ASSERT(cqe->cqe_cid < qp->nq_nentry); 789 790 mutex_enter(&qp->nq_mutex); 791 cmd = qp->nq_cmd[cqe->cqe_cid]; 792 qp->nq_cmd[cqe->cqe_cid] = NULL; 793 qp->nq_active_cmds--; 794 mutex_exit(&qp->nq_mutex); 795 796 ASSERT(cmd != NULL); 797 ASSERT(cmd->nc_nvme == nvme); 798 ASSERT(cmd->nc_sqid == cqe->cqe_sqid); 799 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid); 800 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t)); 801 802 qp->nq_sqhead = cqe->cqe_sqhd; 803 804 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry; 805 806 /* Toggle phase on wrap-around. */ 807 if (qp->nq_cqhead == 0) 808 qp->nq_phase = qp->nq_phase ? 0 : 1; 809 810 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r); 811 812 return (cmd); 813 } 814 815 static int 816 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd) 817 { 818 nvme_cqe_t *cqe = &cmd->nc_cqe; 819 820 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 821 "!unknown command status received: opc = %x, sqid = %d, cid = %d, " 822 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc, 823 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct, 824 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m); 825 826 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 827 828 if (cmd->nc_nvme->n_strict_version) { 829 cmd->nc_nvme->n_dead = B_TRUE; 830 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 831 } 832 833 return (EIO); 834 } 835 836 static int 837 nvme_check_vendor_cmd_status(nvme_cmd_t *cmd) 838 { 839 nvme_cqe_t *cqe = &cmd->nc_cqe; 840 841 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 842 "!unknown command status received: opc = %x, sqid = %d, cid = %d, " 843 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc, 844 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct, 845 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m); 846 if (!cmd->nc_nvme->n_ignore_unknown_vendor_status) { 847 cmd->nc_nvme->n_dead = B_TRUE; 848 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 849 } 850 851 return (EIO); 852 } 853 854 static int 855 nvme_check_integrity_cmd_status(nvme_cmd_t *cmd) 856 { 857 nvme_cqe_t *cqe = &cmd->nc_cqe; 858 859 switch (cqe->cqe_sf.sf_sc) { 860 case NVME_CQE_SC_INT_NVM_WRITE: 861 /* write fail */ 862 /* TODO: post ereport */ 863 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 864 return (EIO); 865 866 case NVME_CQE_SC_INT_NVM_READ: 867 /* read fail */ 868 /* TODO: post ereport */ 869 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 870 return (EIO); 871 872 default: 873 return (nvme_check_unknown_cmd_status(cmd)); 874 } 875 } 876 877 static int 878 nvme_check_generic_cmd_status(nvme_cmd_t *cmd) 879 { 880 nvme_cqe_t *cqe = &cmd->nc_cqe; 881 882 switch (cqe->cqe_sf.sf_sc) { 883 case NVME_CQE_SC_GEN_SUCCESS: 884 return (0); 885 886 /* 887 * Errors indicating a bug in the driver should cause a panic. 888 */ 889 case NVME_CQE_SC_GEN_INV_OPC: 890 /* Invalid Command Opcode */ 891 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 892 "invalid opcode in cmd %p", (void *)cmd); 893 return (0); 894 895 case NVME_CQE_SC_GEN_INV_FLD: 896 /* Invalid Field in Command */ 897 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 898 "invalid field in cmd %p", (void *)cmd); 899 return (0); 900 901 case NVME_CQE_SC_GEN_ID_CNFL: 902 /* Command ID Conflict */ 903 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 904 "cmd ID conflict in cmd %p", (void *)cmd); 905 return (0); 906 907 case NVME_CQE_SC_GEN_INV_NS: 908 /* Invalid Namespace or Format */ 909 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 910 "invalid NS/format in cmd %p", (void *)cmd); 911 return (0); 912 913 case NVME_CQE_SC_GEN_NVM_LBA_RANGE: 914 /* LBA Out Of Range */ 915 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 916 "LBA out of range in cmd %p", (void *)cmd); 917 return (0); 918 919 /* 920 * Non-fatal errors, handle gracefully. 921 */ 922 case NVME_CQE_SC_GEN_DATA_XFR_ERR: 923 /* Data Transfer Error (DMA) */ 924 /* TODO: post ereport */ 925 atomic_inc_32(&cmd->nc_nvme->n_data_xfr_err); 926 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 927 return (EIO); 928 929 case NVME_CQE_SC_GEN_INTERNAL_ERR: 930 /* 931 * Internal Error. The spec (v1.0, section 4.5.1.2) says 932 * detailed error information is returned as async event, 933 * so we pretty much ignore the error here and handle it 934 * in the async event handler. 935 */ 936 atomic_inc_32(&cmd->nc_nvme->n_internal_err); 937 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 938 return (EIO); 939 940 case NVME_CQE_SC_GEN_ABORT_REQUEST: 941 /* 942 * Command Abort Requested. This normally happens only when a 943 * command times out. 944 */ 945 /* TODO: post ereport or change blkdev to handle this? */ 946 atomic_inc_32(&cmd->nc_nvme->n_abort_rq_err); 947 return (ECANCELED); 948 949 case NVME_CQE_SC_GEN_ABORT_PWRLOSS: 950 /* Command Aborted due to Power Loss Notification */ 951 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 952 cmd->nc_nvme->n_dead = B_TRUE; 953 return (EIO); 954 955 case NVME_CQE_SC_GEN_ABORT_SQ_DEL: 956 /* Command Aborted due to SQ Deletion */ 957 atomic_inc_32(&cmd->nc_nvme->n_abort_sq_del); 958 return (EIO); 959 960 case NVME_CQE_SC_GEN_NVM_CAP_EXC: 961 /* Capacity Exceeded */ 962 atomic_inc_32(&cmd->nc_nvme->n_nvm_cap_exc); 963 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 964 return (EIO); 965 966 case NVME_CQE_SC_GEN_NVM_NS_NOTRDY: 967 /* Namespace Not Ready */ 968 atomic_inc_32(&cmd->nc_nvme->n_nvm_ns_notrdy); 969 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 970 return (EIO); 971 972 default: 973 return (nvme_check_unknown_cmd_status(cmd)); 974 } 975 } 976 977 static int 978 nvme_check_specific_cmd_status(nvme_cmd_t *cmd) 979 { 980 nvme_cqe_t *cqe = &cmd->nc_cqe; 981 982 switch (cqe->cqe_sf.sf_sc) { 983 case NVME_CQE_SC_SPC_INV_CQ: 984 /* Completion Queue Invalid */ 985 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE); 986 atomic_inc_32(&cmd->nc_nvme->n_inv_cq_err); 987 return (EINVAL); 988 989 case NVME_CQE_SC_SPC_INV_QID: 990 /* Invalid Queue Identifier */ 991 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE || 992 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_SQUEUE || 993 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE || 994 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE); 995 atomic_inc_32(&cmd->nc_nvme->n_inv_qid_err); 996 return (EINVAL); 997 998 case NVME_CQE_SC_SPC_MAX_QSZ_EXC: 999 /* Max Queue Size Exceeded */ 1000 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE || 1001 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE); 1002 atomic_inc_32(&cmd->nc_nvme->n_max_qsz_exc); 1003 return (EINVAL); 1004 1005 case NVME_CQE_SC_SPC_ABRT_CMD_EXC: 1006 /* Abort Command Limit Exceeded */ 1007 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT); 1008 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 1009 "abort command limit exceeded in cmd %p", (void *)cmd); 1010 return (0); 1011 1012 case NVME_CQE_SC_SPC_ASYNC_EVREQ_EXC: 1013 /* Async Event Request Limit Exceeded */ 1014 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ASYNC_EVENT); 1015 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 1016 "async event request limit exceeded in cmd %p", 1017 (void *)cmd); 1018 return (0); 1019 1020 case NVME_CQE_SC_SPC_INV_INT_VECT: 1021 /* Invalid Interrupt Vector */ 1022 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE); 1023 atomic_inc_32(&cmd->nc_nvme->n_inv_int_vect); 1024 return (EINVAL); 1025 1026 case NVME_CQE_SC_SPC_INV_LOG_PAGE: 1027 /* Invalid Log Page */ 1028 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_GET_LOG_PAGE); 1029 atomic_inc_32(&cmd->nc_nvme->n_inv_log_page); 1030 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1031 return (EINVAL); 1032 1033 case NVME_CQE_SC_SPC_INV_FORMAT: 1034 /* Invalid Format */ 1035 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_FORMAT); 1036 atomic_inc_32(&cmd->nc_nvme->n_inv_format); 1037 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1038 return (EINVAL); 1039 1040 case NVME_CQE_SC_SPC_INV_Q_DEL: 1041 /* Invalid Queue Deletion */ 1042 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE); 1043 atomic_inc_32(&cmd->nc_nvme->n_inv_q_del); 1044 return (EINVAL); 1045 1046 case NVME_CQE_SC_SPC_NVM_CNFL_ATTR: 1047 /* Conflicting Attributes */ 1048 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_DSET_MGMT || 1049 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ || 1050 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1051 atomic_inc_32(&cmd->nc_nvme->n_cnfl_attr); 1052 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1053 return (EINVAL); 1054 1055 case NVME_CQE_SC_SPC_NVM_INV_PROT: 1056 /* Invalid Protection Information */ 1057 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_COMPARE || 1058 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ || 1059 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1060 atomic_inc_32(&cmd->nc_nvme->n_inv_prot); 1061 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1062 return (EINVAL); 1063 1064 case NVME_CQE_SC_SPC_NVM_READONLY: 1065 /* Write to Read Only Range */ 1066 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1067 atomic_inc_32(&cmd->nc_nvme->n_readonly); 1068 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1069 return (EROFS); 1070 1071 default: 1072 return (nvme_check_unknown_cmd_status(cmd)); 1073 } 1074 } 1075 1076 static inline int 1077 nvme_check_cmd_status(nvme_cmd_t *cmd) 1078 { 1079 nvme_cqe_t *cqe = &cmd->nc_cqe; 1080 1081 /* take a shortcut if everything is alright */ 1082 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC && 1083 cqe->cqe_sf.sf_sc == NVME_CQE_SC_GEN_SUCCESS) 1084 return (0); 1085 1086 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC) 1087 return (nvme_check_generic_cmd_status(cmd)); 1088 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_SPECIFIC) 1089 return (nvme_check_specific_cmd_status(cmd)); 1090 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_INTEGRITY) 1091 return (nvme_check_integrity_cmd_status(cmd)); 1092 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_VENDOR) 1093 return (nvme_check_vendor_cmd_status(cmd)); 1094 1095 return (nvme_check_unknown_cmd_status(cmd)); 1096 } 1097 1098 /* 1099 * nvme_abort_cmd_cb -- replaces nc_callback of aborted commands 1100 * 1101 * This functions takes care of cleaning up aborted commands. The command 1102 * status is checked to catch any fatal errors. 1103 */ 1104 static void 1105 nvme_abort_cmd_cb(void *arg) 1106 { 1107 nvme_cmd_t *cmd = arg; 1108 1109 /* 1110 * Grab the command mutex. Once we have it we hold the last reference 1111 * to the command and can safely free it. 1112 */ 1113 mutex_enter(&cmd->nc_mutex); 1114 (void) nvme_check_cmd_status(cmd); 1115 mutex_exit(&cmd->nc_mutex); 1116 1117 nvme_free_cmd(cmd); 1118 } 1119 1120 static void 1121 nvme_abort_cmd(nvme_cmd_t *abort_cmd) 1122 { 1123 nvme_t *nvme = abort_cmd->nc_nvme; 1124 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1125 nvme_abort_cmd_t ac = { 0 }; 1126 1127 sema_p(&nvme->n_abort_sema); 1128 1129 ac.b.ac_cid = abort_cmd->nc_sqe.sqe_cid; 1130 ac.b.ac_sqid = abort_cmd->nc_sqid; 1131 1132 /* 1133 * Drop the mutex of the aborted command. From this point on 1134 * we must assume that the abort callback has freed the command. 1135 */ 1136 mutex_exit(&abort_cmd->nc_mutex); 1137 1138 cmd->nc_sqid = 0; 1139 cmd->nc_sqe.sqe_opc = NVME_OPC_ABORT; 1140 cmd->nc_callback = nvme_wakeup_cmd; 1141 cmd->nc_sqe.sqe_cdw10 = ac.r; 1142 1143 /* 1144 * Send the ABORT to the hardware. The ABORT command will return _after_ 1145 * the aborted command has completed (aborted or otherwise). 1146 */ 1147 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1148 sema_v(&nvme->n_abort_sema); 1149 dev_err(nvme->n_dip, CE_WARN, 1150 "!nvme_admin_cmd failed for ABORT"); 1151 atomic_inc_32(&nvme->n_abort_failed); 1152 return; 1153 } 1154 sema_v(&nvme->n_abort_sema); 1155 1156 if (nvme_check_cmd_status(cmd)) { 1157 dev_err(nvme->n_dip, CE_WARN, 1158 "!ABORT failed with sct = %x, sc = %x", 1159 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1160 atomic_inc_32(&nvme->n_abort_failed); 1161 } else { 1162 atomic_inc_32(&nvme->n_cmd_aborted); 1163 } 1164 1165 nvme_free_cmd(cmd); 1166 } 1167 1168 /* 1169 * nvme_wait_cmd -- wait for command completion or timeout 1170 * 1171 * Returns B_TRUE if the command completed normally. 1172 * 1173 * Returns B_FALSE if the command timed out and an abort was attempted. The 1174 * command mutex will be dropped and the command must be considered freed. The 1175 * freeing of the command is normally done by the abort command callback. 1176 * 1177 * In case of a serious error or a timeout of the abort command the hardware 1178 * will be declared dead and FMA will be notified. 1179 */ 1180 static boolean_t 1181 nvme_wait_cmd(nvme_cmd_t *cmd, uint_t sec) 1182 { 1183 clock_t timeout = ddi_get_lbolt() + drv_usectohz(sec * MICROSEC); 1184 nvme_t *nvme = cmd->nc_nvme; 1185 nvme_reg_csts_t csts; 1186 1187 ASSERT(mutex_owned(&cmd->nc_mutex)); 1188 1189 while (!cmd->nc_completed) { 1190 if (cv_timedwait(&cmd->nc_cv, &cmd->nc_mutex, timeout) == -1) 1191 break; 1192 } 1193 1194 if (cmd->nc_completed) 1195 return (B_TRUE); 1196 1197 /* 1198 * The command timed out. Change the callback to the cleanup function. 1199 */ 1200 cmd->nc_callback = nvme_abort_cmd_cb; 1201 1202 /* 1203 * Check controller for fatal status, any errors associated with the 1204 * register or DMA handle, or for a double timeout (abort command timed 1205 * out). If necessary log a warning and call FMA. 1206 */ 1207 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1208 dev_err(nvme->n_dip, CE_WARN, "!command timeout, " 1209 "OPC = %x, CFS = %d", cmd->nc_sqe.sqe_opc, csts.b.csts_cfs); 1210 atomic_inc_32(&nvme->n_cmd_timeout); 1211 1212 if (csts.b.csts_cfs || 1213 nvme_check_regs_hdl(nvme) || 1214 nvme_check_dma_hdl(cmd->nc_dma) || 1215 cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT) { 1216 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1217 nvme->n_dead = B_TRUE; 1218 mutex_exit(&cmd->nc_mutex); 1219 } else { 1220 /* 1221 * Try to abort the command. The command mutex is released by 1222 * nvme_abort_cmd(). 1223 * If the abort succeeds it will have freed the aborted command. 1224 * If the abort fails for other reasons we must assume that the 1225 * command may complete at any time, and the callback will free 1226 * it for us. 1227 */ 1228 nvme_abort_cmd(cmd); 1229 } 1230 1231 return (B_FALSE); 1232 } 1233 1234 static void 1235 nvme_wakeup_cmd(void *arg) 1236 { 1237 nvme_cmd_t *cmd = arg; 1238 1239 mutex_enter(&cmd->nc_mutex); 1240 /* 1241 * There is a slight chance that this command completed shortly after 1242 * the timeout was hit in nvme_wait_cmd() but before the callback was 1243 * changed. Catch that case here and clean up accordingly. 1244 */ 1245 if (cmd->nc_callback == nvme_abort_cmd_cb) { 1246 mutex_exit(&cmd->nc_mutex); 1247 nvme_abort_cmd_cb(cmd); 1248 return; 1249 } 1250 1251 cmd->nc_completed = B_TRUE; 1252 cv_signal(&cmd->nc_cv); 1253 mutex_exit(&cmd->nc_mutex); 1254 } 1255 1256 static void 1257 nvme_async_event_task(void *arg) 1258 { 1259 nvme_cmd_t *cmd = arg; 1260 nvme_t *nvme = cmd->nc_nvme; 1261 nvme_error_log_entry_t *error_log = NULL; 1262 nvme_health_log_t *health_log = NULL; 1263 nvme_async_event_t event; 1264 int ret; 1265 1266 /* 1267 * Check for errors associated with the async request itself. The only 1268 * command-specific error is "async event limit exceeded", which 1269 * indicates a programming error in the driver and causes a panic in 1270 * nvme_check_cmd_status(). 1271 * 1272 * Other possible errors are various scenarios where the async request 1273 * was aborted, or internal errors in the device. Internal errors are 1274 * reported to FMA, the command aborts need no special handling here. 1275 */ 1276 if (nvme_check_cmd_status(cmd)) { 1277 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 1278 "!async event request returned failure, sct = %x, " 1279 "sc = %x, dnr = %d, m = %d", cmd->nc_cqe.cqe_sf.sf_sct, 1280 cmd->nc_cqe.cqe_sf.sf_sc, cmd->nc_cqe.cqe_sf.sf_dnr, 1281 cmd->nc_cqe.cqe_sf.sf_m); 1282 1283 if (cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC && 1284 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INTERNAL_ERR) { 1285 cmd->nc_nvme->n_dead = B_TRUE; 1286 ddi_fm_service_impact(cmd->nc_nvme->n_dip, 1287 DDI_SERVICE_LOST); 1288 } 1289 nvme_free_cmd(cmd); 1290 return; 1291 } 1292 1293 1294 event.r = cmd->nc_cqe.cqe_dw0; 1295 1296 /* Clear CQE and re-submit the async request. */ 1297 bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t)); 1298 ret = nvme_submit_cmd(nvme->n_adminq, cmd); 1299 1300 if (ret != DDI_SUCCESS) { 1301 dev_err(nvme->n_dip, CE_WARN, 1302 "!failed to resubmit async event request"); 1303 atomic_inc_32(&nvme->n_async_resubmit_failed); 1304 nvme_free_cmd(cmd); 1305 } 1306 1307 switch (event.b.ae_type) { 1308 case NVME_ASYNC_TYPE_ERROR: 1309 if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) { 1310 error_log = (nvme_error_log_entry_t *) 1311 nvme_get_logpage(nvme, event.b.ae_logpage); 1312 } else { 1313 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in " 1314 "async event reply: %d", event.b.ae_logpage); 1315 atomic_inc_32(&nvme->n_wrong_logpage); 1316 } 1317 1318 switch (event.b.ae_info) { 1319 case NVME_ASYNC_ERROR_INV_SQ: 1320 dev_err(nvme->n_dip, CE_PANIC, "programming error: " 1321 "invalid submission queue"); 1322 return; 1323 1324 case NVME_ASYNC_ERROR_INV_DBL: 1325 dev_err(nvme->n_dip, CE_PANIC, "programming error: " 1326 "invalid doorbell write value"); 1327 return; 1328 1329 case NVME_ASYNC_ERROR_DIAGFAIL: 1330 dev_err(nvme->n_dip, CE_WARN, "!diagnostic failure"); 1331 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1332 nvme->n_dead = B_TRUE; 1333 atomic_inc_32(&nvme->n_diagfail_event); 1334 break; 1335 1336 case NVME_ASYNC_ERROR_PERSISTENT: 1337 dev_err(nvme->n_dip, CE_WARN, "!persistent internal " 1338 "device error"); 1339 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1340 nvme->n_dead = B_TRUE; 1341 atomic_inc_32(&nvme->n_persistent_event); 1342 break; 1343 1344 case NVME_ASYNC_ERROR_TRANSIENT: 1345 dev_err(nvme->n_dip, CE_WARN, "!transient internal " 1346 "device error"); 1347 /* TODO: send ereport */ 1348 atomic_inc_32(&nvme->n_transient_event); 1349 break; 1350 1351 case NVME_ASYNC_ERROR_FW_LOAD: 1352 dev_err(nvme->n_dip, CE_WARN, 1353 "!firmware image load error"); 1354 atomic_inc_32(&nvme->n_fw_load_event); 1355 break; 1356 } 1357 break; 1358 1359 case NVME_ASYNC_TYPE_HEALTH: 1360 if (event.b.ae_logpage == NVME_LOGPAGE_HEALTH) { 1361 health_log = (nvme_health_log_t *) 1362 nvme_get_logpage(nvme, event.b.ae_logpage, -1); 1363 } else { 1364 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in " 1365 "async event reply: %d", event.b.ae_logpage); 1366 atomic_inc_32(&nvme->n_wrong_logpage); 1367 } 1368 1369 switch (event.b.ae_info) { 1370 case NVME_ASYNC_HEALTH_RELIABILITY: 1371 dev_err(nvme->n_dip, CE_WARN, 1372 "!device reliability compromised"); 1373 /* TODO: send ereport */ 1374 atomic_inc_32(&nvme->n_reliability_event); 1375 break; 1376 1377 case NVME_ASYNC_HEALTH_TEMPERATURE: 1378 dev_err(nvme->n_dip, CE_WARN, 1379 "!temperature above threshold"); 1380 /* TODO: send ereport */ 1381 atomic_inc_32(&nvme->n_temperature_event); 1382 break; 1383 1384 case NVME_ASYNC_HEALTH_SPARE: 1385 dev_err(nvme->n_dip, CE_WARN, 1386 "!spare space below threshold"); 1387 /* TODO: send ereport */ 1388 atomic_inc_32(&nvme->n_spare_event); 1389 break; 1390 } 1391 break; 1392 1393 case NVME_ASYNC_TYPE_VENDOR: 1394 dev_err(nvme->n_dip, CE_WARN, "!vendor specific async event " 1395 "received, info = %x, logpage = %x", event.b.ae_info, 1396 event.b.ae_logpage); 1397 atomic_inc_32(&nvme->n_vendor_event); 1398 break; 1399 1400 default: 1401 dev_err(nvme->n_dip, CE_WARN, "!unknown async event received, " 1402 "type = %x, info = %x, logpage = %x", event.b.ae_type, 1403 event.b.ae_info, event.b.ae_logpage); 1404 atomic_inc_32(&nvme->n_unknown_event); 1405 break; 1406 } 1407 1408 if (error_log) 1409 kmem_free(error_log, sizeof (nvme_error_log_entry_t) * 1410 nvme->n_error_log_len); 1411 1412 if (health_log) 1413 kmem_free(health_log, sizeof (nvme_health_log_t)); 1414 } 1415 1416 static int 1417 nvme_admin_cmd(nvme_cmd_t *cmd, int sec) 1418 { 1419 int ret; 1420 1421 mutex_enter(&cmd->nc_mutex); 1422 ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd); 1423 1424 if (ret != DDI_SUCCESS) { 1425 mutex_exit(&cmd->nc_mutex); 1426 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 1427 "!nvme_submit_cmd failed"); 1428 atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full); 1429 nvme_free_cmd(cmd); 1430 return (DDI_FAILURE); 1431 } 1432 1433 if (nvme_wait_cmd(cmd, sec) == B_FALSE) { 1434 /* 1435 * The command timed out. An abort command was posted that 1436 * will take care of the cleanup. 1437 */ 1438 return (DDI_FAILURE); 1439 } 1440 mutex_exit(&cmd->nc_mutex); 1441 1442 return (DDI_SUCCESS); 1443 } 1444 1445 static int 1446 nvme_async_event(nvme_t *nvme) 1447 { 1448 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1449 int ret; 1450 1451 cmd->nc_sqid = 0; 1452 cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT; 1453 cmd->nc_callback = nvme_async_event_task; 1454 1455 ret = nvme_submit_cmd(nvme->n_adminq, cmd); 1456 1457 if (ret != DDI_SUCCESS) { 1458 dev_err(nvme->n_dip, CE_WARN, 1459 "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT"); 1460 nvme_free_cmd(cmd); 1461 return (DDI_FAILURE); 1462 } 1463 1464 return (DDI_SUCCESS); 1465 } 1466 1467 static void * 1468 nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...) 1469 { 1470 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1471 void *buf = NULL; 1472 nvme_getlogpage_t getlogpage = { 0 }; 1473 size_t bufsize; 1474 va_list ap; 1475 1476 va_start(ap, logpage); 1477 1478 cmd->nc_sqid = 0; 1479 cmd->nc_callback = nvme_wakeup_cmd; 1480 cmd->nc_sqe.sqe_opc = NVME_OPC_GET_LOG_PAGE; 1481 1482 getlogpage.b.lp_lid = logpage; 1483 1484 switch (logpage) { 1485 case NVME_LOGPAGE_ERROR: 1486 cmd->nc_sqe.sqe_nsid = (uint32_t)-1; 1487 bufsize = nvme->n_error_log_len * 1488 sizeof (nvme_error_log_entry_t); 1489 break; 1490 1491 case NVME_LOGPAGE_HEALTH: 1492 cmd->nc_sqe.sqe_nsid = va_arg(ap, uint32_t); 1493 bufsize = sizeof (nvme_health_log_t); 1494 break; 1495 1496 case NVME_LOGPAGE_FWSLOT: 1497 cmd->nc_sqe.sqe_nsid = (uint32_t)-1; 1498 bufsize = sizeof (nvme_fwslot_log_t); 1499 break; 1500 1501 default: 1502 dev_err(nvme->n_dip, CE_WARN, "!unknown log page requested: %d", 1503 logpage); 1504 atomic_inc_32(&nvme->n_unknown_logpage); 1505 goto fail; 1506 } 1507 1508 va_end(ap); 1509 1510 getlogpage.b.lp_numd = bufsize / sizeof (uint32_t) - 1; 1511 1512 cmd->nc_sqe.sqe_cdw10 = getlogpage.r; 1513 1514 if (nvme_zalloc_dma(nvme, getlogpage.b.lp_numd * sizeof (uint32_t), 1515 DDI_DMA_READ, &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) { 1516 dev_err(nvme->n_dip, CE_WARN, 1517 "!nvme_zalloc_dma failed for GET LOG PAGE"); 1518 goto fail; 1519 } 1520 1521 if (cmd->nc_dma->nd_ncookie > 2) { 1522 dev_err(nvme->n_dip, CE_WARN, 1523 "!too many DMA cookies for GET LOG PAGE"); 1524 atomic_inc_32(&nvme->n_too_many_cookies); 1525 goto fail; 1526 } 1527 1528 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress; 1529 if (cmd->nc_dma->nd_ncookie > 1) { 1530 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, 1531 &cmd->nc_dma->nd_cookie); 1532 cmd->nc_sqe.sqe_dptr.d_prp[1] = 1533 cmd->nc_dma->nd_cookie.dmac_laddress; 1534 } 1535 1536 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1537 dev_err(nvme->n_dip, CE_WARN, 1538 "!nvme_admin_cmd failed for GET LOG PAGE"); 1539 return (NULL); 1540 } 1541 1542 if (nvme_check_cmd_status(cmd)) { 1543 dev_err(nvme->n_dip, CE_WARN, 1544 "!GET LOG PAGE failed with sct = %x, sc = %x", 1545 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1546 goto fail; 1547 } 1548 1549 buf = kmem_alloc(bufsize, KM_SLEEP); 1550 bcopy(cmd->nc_dma->nd_memp, buf, bufsize); 1551 1552 fail: 1553 nvme_free_cmd(cmd); 1554 1555 return (buf); 1556 } 1557 1558 static void * 1559 nvme_identify(nvme_t *nvme, uint32_t nsid) 1560 { 1561 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1562 void *buf = NULL; 1563 1564 cmd->nc_sqid = 0; 1565 cmd->nc_callback = nvme_wakeup_cmd; 1566 cmd->nc_sqe.sqe_opc = NVME_OPC_IDENTIFY; 1567 cmd->nc_sqe.sqe_nsid = nsid; 1568 cmd->nc_sqe.sqe_cdw10 = nsid ? NVME_IDENTIFY_NSID : NVME_IDENTIFY_CTRL; 1569 1570 if (nvme_zalloc_dma(nvme, NVME_IDENTIFY_BUFSIZE, DDI_DMA_READ, 1571 &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) { 1572 dev_err(nvme->n_dip, CE_WARN, 1573 "!nvme_zalloc_dma failed for IDENTIFY"); 1574 goto fail; 1575 } 1576 1577 if (cmd->nc_dma->nd_ncookie > 2) { 1578 dev_err(nvme->n_dip, CE_WARN, 1579 "!too many DMA cookies for IDENTIFY"); 1580 atomic_inc_32(&nvme->n_too_many_cookies); 1581 goto fail; 1582 } 1583 1584 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress; 1585 if (cmd->nc_dma->nd_ncookie > 1) { 1586 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, 1587 &cmd->nc_dma->nd_cookie); 1588 cmd->nc_sqe.sqe_dptr.d_prp[1] = 1589 cmd->nc_dma->nd_cookie.dmac_laddress; 1590 } 1591 1592 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1593 dev_err(nvme->n_dip, CE_WARN, 1594 "!nvme_admin_cmd failed for IDENTIFY"); 1595 return (NULL); 1596 } 1597 1598 if (nvme_check_cmd_status(cmd)) { 1599 dev_err(nvme->n_dip, CE_WARN, 1600 "!IDENTIFY failed with sct = %x, sc = %x", 1601 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1602 goto fail; 1603 } 1604 1605 buf = kmem_alloc(NVME_IDENTIFY_BUFSIZE, KM_SLEEP); 1606 bcopy(cmd->nc_dma->nd_memp, buf, NVME_IDENTIFY_BUFSIZE); 1607 1608 fail: 1609 nvme_free_cmd(cmd); 1610 1611 return (buf); 1612 } 1613 1614 static int 1615 nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues) 1616 { 1617 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1618 nvme_nqueue_t nq = { 0 }; 1619 1620 nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1; 1621 1622 cmd->nc_sqid = 0; 1623 cmd->nc_callback = nvme_wakeup_cmd; 1624 cmd->nc_sqe.sqe_opc = NVME_OPC_SET_FEATURES; 1625 cmd->nc_sqe.sqe_cdw10 = NVME_FEAT_NQUEUES; 1626 cmd->nc_sqe.sqe_cdw11 = nq.r; 1627 1628 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1629 dev_err(nvme->n_dip, CE_WARN, 1630 "!nvme_admin_cmd failed for SET FEATURES (NQUEUES)"); 1631 return (0); 1632 } 1633 1634 if (nvme_check_cmd_status(cmd)) { 1635 dev_err(nvme->n_dip, CE_WARN, 1636 "!SET FEATURES (NQUEUES) failed with sct = %x, sc = %x", 1637 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1638 nvme_free_cmd(cmd); 1639 return (0); 1640 } 1641 1642 nq.r = cmd->nc_cqe.cqe_dw0; 1643 nvme_free_cmd(cmd); 1644 1645 /* 1646 * Always use the same number of submission and completion queues, and 1647 * never use more than the requested number of queues. 1648 */ 1649 return (MIN(nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq) + 1)); 1650 } 1651 1652 static int 1653 nvme_create_io_qpair(nvme_t *nvme, nvme_qpair_t *qp, uint16_t idx) 1654 { 1655 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1656 nvme_create_queue_dw10_t dw10 = { 0 }; 1657 nvme_create_cq_dw11_t c_dw11 = { 0 }; 1658 nvme_create_sq_dw11_t s_dw11 = { 0 }; 1659 1660 dw10.b.q_qid = idx; 1661 dw10.b.q_qsize = qp->nq_nentry - 1; 1662 1663 c_dw11.b.cq_pc = 1; 1664 c_dw11.b.cq_ien = 1; 1665 c_dw11.b.cq_iv = idx % nvme->n_intr_cnt; 1666 1667 cmd->nc_sqid = 0; 1668 cmd->nc_callback = nvme_wakeup_cmd; 1669 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_CQUEUE; 1670 cmd->nc_sqe.sqe_cdw10 = dw10.r; 1671 cmd->nc_sqe.sqe_cdw11 = c_dw11.r; 1672 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_cqdma->nd_cookie.dmac_laddress; 1673 1674 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1675 dev_err(nvme->n_dip, CE_WARN, 1676 "!nvme_admin_cmd failed for CREATE CQUEUE"); 1677 return (DDI_FAILURE); 1678 } 1679 1680 if (nvme_check_cmd_status(cmd)) { 1681 dev_err(nvme->n_dip, CE_WARN, 1682 "!CREATE CQUEUE failed with sct = %x, sc = %x", 1683 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1684 nvme_free_cmd(cmd); 1685 return (DDI_FAILURE); 1686 } 1687 1688 nvme_free_cmd(cmd); 1689 1690 s_dw11.b.sq_pc = 1; 1691 s_dw11.b.sq_cqid = idx; 1692 1693 cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1694 cmd->nc_sqid = 0; 1695 cmd->nc_callback = nvme_wakeup_cmd; 1696 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_SQUEUE; 1697 cmd->nc_sqe.sqe_cdw10 = dw10.r; 1698 cmd->nc_sqe.sqe_cdw11 = s_dw11.r; 1699 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress; 1700 1701 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1702 dev_err(nvme->n_dip, CE_WARN, 1703 "!nvme_admin_cmd failed for CREATE SQUEUE"); 1704 return (DDI_FAILURE); 1705 } 1706 1707 if (nvme_check_cmd_status(cmd)) { 1708 dev_err(nvme->n_dip, CE_WARN, 1709 "!CREATE SQUEUE failed with sct = %x, sc = %x", 1710 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1711 nvme_free_cmd(cmd); 1712 return (DDI_FAILURE); 1713 } 1714 1715 nvme_free_cmd(cmd); 1716 1717 return (DDI_SUCCESS); 1718 } 1719 1720 static boolean_t 1721 nvme_reset(nvme_t *nvme, boolean_t quiesce) 1722 { 1723 nvme_reg_csts_t csts; 1724 int i; 1725 1726 nvme_put32(nvme, NVME_REG_CC, 0); 1727 1728 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1729 if (csts.b.csts_rdy == 1) { 1730 nvme_put32(nvme, NVME_REG_CC, 0); 1731 for (i = 0; i != nvme->n_timeout * 10; i++) { 1732 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1733 if (csts.b.csts_rdy == 0) 1734 break; 1735 1736 if (quiesce) 1737 drv_usecwait(50000); 1738 else 1739 delay(drv_usectohz(50000)); 1740 } 1741 } 1742 1743 nvme_put32(nvme, NVME_REG_AQA, 0); 1744 nvme_put32(nvme, NVME_REG_ASQ, 0); 1745 nvme_put32(nvme, NVME_REG_ACQ, 0); 1746 1747 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1748 return (csts.b.csts_rdy == 0 ? B_TRUE : B_FALSE); 1749 } 1750 1751 static void 1752 nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce) 1753 { 1754 nvme_reg_cc_t cc; 1755 nvme_reg_csts_t csts; 1756 int i; 1757 1758 ASSERT(mode == NVME_CC_SHN_NORMAL || mode == NVME_CC_SHN_ABRUPT); 1759 1760 cc.r = nvme_get32(nvme, NVME_REG_CC); 1761 cc.b.cc_shn = mode & 0x3; 1762 nvme_put32(nvme, NVME_REG_CC, cc.r); 1763 1764 for (i = 0; i != 10; i++) { 1765 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1766 if (csts.b.csts_shst == NVME_CSTS_SHN_COMPLETE) 1767 break; 1768 1769 if (quiesce) 1770 drv_usecwait(100000); 1771 else 1772 delay(drv_usectohz(100000)); 1773 } 1774 } 1775 1776 1777 static void 1778 nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) 1779 { 1780 char model[sizeof (nvme->n_idctl->id_model) + 1]; 1781 char serial[sizeof (nvme->n_idctl->id_serial) + 1]; 1782 1783 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model)); 1784 bcopy(nvme->n_idctl->id_serial, serial, 1785 sizeof (nvme->n_idctl->id_serial)); 1786 1787 model[sizeof (nvme->n_idctl->id_model)] = '\0'; 1788 serial[sizeof (nvme->n_idctl->id_serial)] = '\0'; 1789 1790 (void) snprintf(nvme->n_ns[nsid - 1].ns_devid, 1791 sizeof (nvme->n_ns[0].ns_devid), "%4X-%s-%s-%X", 1792 nvme->n_idctl->id_vid, model, serial, nsid); 1793 } 1794 1795 static int 1796 nvme_init(nvme_t *nvme) 1797 { 1798 nvme_reg_cc_t cc = { 0 }; 1799 nvme_reg_aqa_t aqa = { 0 }; 1800 nvme_reg_asq_t asq = { 0 }; 1801 nvme_reg_acq_t acq = { 0 }; 1802 nvme_reg_cap_t cap; 1803 nvme_reg_vs_t vs; 1804 nvme_reg_csts_t csts; 1805 int i = 0; 1806 int nqueues; 1807 char model[sizeof (nvme->n_idctl->id_model) + 1]; 1808 char *vendor, *product; 1809 1810 /* Check controller version */ 1811 vs.r = nvme_get32(nvme, NVME_REG_VS); 1812 dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d", 1813 vs.b.vs_mjr, vs.b.vs_mnr); 1814 1815 if (nvme_version_major < vs.b.vs_mjr || 1816 (nvme_version_major == vs.b.vs_mjr && 1817 nvme_version_minor < vs.b.vs_mnr)) { 1818 dev_err(nvme->n_dip, CE_WARN, "!no support for version > %d.%d", 1819 nvme_version_major, nvme_version_minor); 1820 if (nvme->n_strict_version) 1821 goto fail; 1822 } 1823 1824 /* retrieve controller configuration */ 1825 cap.r = nvme_get64(nvme, NVME_REG_CAP); 1826 1827 if ((cap.b.cap_css & NVME_CAP_CSS_NVM) == 0) { 1828 dev_err(nvme->n_dip, CE_WARN, 1829 "!NVM command set not supported by hardware"); 1830 goto fail; 1831 } 1832 1833 nvme->n_nssr_supported = cap.b.cap_nssrs; 1834 nvme->n_doorbell_stride = 4 << cap.b.cap_dstrd; 1835 nvme->n_timeout = cap.b.cap_to; 1836 nvme->n_arbitration_mechanisms = cap.b.cap_ams; 1837 nvme->n_cont_queues_reqd = cap.b.cap_cqr; 1838 nvme->n_max_queue_entries = cap.b.cap_mqes + 1; 1839 1840 /* 1841 * The MPSMIN and MPSMAX fields in the CAP register use 0 to specify 1842 * the base page size of 4k (1<<12), so add 12 here to get the real 1843 * page size value. 1844 */ 1845 nvme->n_pageshift = MIN(MAX(cap.b.cap_mpsmin + 12, PAGESHIFT), 1846 cap.b.cap_mpsmax + 12); 1847 nvme->n_pagesize = 1UL << (nvme->n_pageshift); 1848 1849 /* 1850 * Set up Queue DMA to transfer at least 1 page-aligned page at a time. 1851 */ 1852 nvme->n_queue_dma_attr.dma_attr_align = nvme->n_pagesize; 1853 nvme->n_queue_dma_attr.dma_attr_minxfer = nvme->n_pagesize; 1854 1855 /* 1856 * Set up PRP DMA to transfer 1 page-aligned page at a time. 1857 * Maxxfer may be increased after we identified the controller limits. 1858 */ 1859 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_pagesize; 1860 nvme->n_prp_dma_attr.dma_attr_minxfer = nvme->n_pagesize; 1861 nvme->n_prp_dma_attr.dma_attr_align = nvme->n_pagesize; 1862 nvme->n_prp_dma_attr.dma_attr_seg = nvme->n_pagesize - 1; 1863 1864 /* 1865 * Reset controller if it's still in ready state. 1866 */ 1867 if (nvme_reset(nvme, B_FALSE) == B_FALSE) { 1868 dev_err(nvme->n_dip, CE_WARN, "!unable to reset controller"); 1869 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1870 nvme->n_dead = B_TRUE; 1871 goto fail; 1872 } 1873 1874 /* 1875 * Create the admin queue pair. 1876 */ 1877 if (nvme_alloc_qpair(nvme, nvme->n_admin_queue_len, &nvme->n_adminq, 0) 1878 != DDI_SUCCESS) { 1879 dev_err(nvme->n_dip, CE_WARN, 1880 "!unable to allocate admin qpair"); 1881 goto fail; 1882 } 1883 nvme->n_ioq = kmem_alloc(sizeof (nvme_qpair_t *), KM_SLEEP); 1884 nvme->n_ioq[0] = nvme->n_adminq; 1885 1886 nvme->n_progress |= NVME_ADMIN_QUEUE; 1887 1888 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, 1889 "admin-queue-len", nvme->n_admin_queue_len); 1890 1891 aqa.b.aqa_asqs = aqa.b.aqa_acqs = nvme->n_admin_queue_len - 1; 1892 asq = nvme->n_adminq->nq_sqdma->nd_cookie.dmac_laddress; 1893 acq = nvme->n_adminq->nq_cqdma->nd_cookie.dmac_laddress; 1894 1895 ASSERT((asq & (nvme->n_pagesize - 1)) == 0); 1896 ASSERT((acq & (nvme->n_pagesize - 1)) == 0); 1897 1898 nvme_put32(nvme, NVME_REG_AQA, aqa.r); 1899 nvme_put64(nvme, NVME_REG_ASQ, asq); 1900 nvme_put64(nvme, NVME_REG_ACQ, acq); 1901 1902 cc.b.cc_ams = 0; /* use Round-Robin arbitration */ 1903 cc.b.cc_css = 0; /* use NVM command set */ 1904 cc.b.cc_mps = nvme->n_pageshift - 12; 1905 cc.b.cc_shn = 0; /* no shutdown in progress */ 1906 cc.b.cc_en = 1; /* enable controller */ 1907 cc.b.cc_iosqes = 6; /* submission queue entry is 2^6 bytes long */ 1908 cc.b.cc_iocqes = 4; /* completion queue entry is 2^4 bytes long */ 1909 1910 nvme_put32(nvme, NVME_REG_CC, cc.r); 1911 1912 /* 1913 * Wait for the controller to become ready. 1914 */ 1915 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1916 if (csts.b.csts_rdy == 0) { 1917 for (i = 0; i != nvme->n_timeout * 10; i++) { 1918 delay(drv_usectohz(50000)); 1919 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1920 1921 if (csts.b.csts_cfs == 1) { 1922 dev_err(nvme->n_dip, CE_WARN, 1923 "!controller fatal status at init"); 1924 ddi_fm_service_impact(nvme->n_dip, 1925 DDI_SERVICE_LOST); 1926 nvme->n_dead = B_TRUE; 1927 goto fail; 1928 } 1929 1930 if (csts.b.csts_rdy == 1) 1931 break; 1932 } 1933 } 1934 1935 if (csts.b.csts_rdy == 0) { 1936 dev_err(nvme->n_dip, CE_WARN, "!controller not ready"); 1937 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1938 nvme->n_dead = B_TRUE; 1939 goto fail; 1940 } 1941 1942 /* 1943 * Assume an abort command limit of 1. We'll destroy and re-init 1944 * that later when we know the true abort command limit. 1945 */ 1946 sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL); 1947 1948 /* 1949 * Setup initial interrupt for admin queue. 1950 */ 1951 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1) 1952 != DDI_SUCCESS) && 1953 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1) 1954 != DDI_SUCCESS) && 1955 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1) 1956 != DDI_SUCCESS)) { 1957 dev_err(nvme->n_dip, CE_WARN, 1958 "!failed to setup initial interrupt"); 1959 goto fail; 1960 } 1961 1962 /* 1963 * Post an asynchronous event command to catch errors. 1964 */ 1965 if (nvme_async_event(nvme) != DDI_SUCCESS) { 1966 dev_err(nvme->n_dip, CE_WARN, 1967 "!failed to post async event"); 1968 goto fail; 1969 } 1970 1971 /* 1972 * Identify Controller 1973 */ 1974 nvme->n_idctl = nvme_identify(nvme, 0); 1975 if (nvme->n_idctl == NULL) { 1976 dev_err(nvme->n_dip, CE_WARN, 1977 "!failed to identify controller"); 1978 goto fail; 1979 } 1980 1981 /* 1982 * Get Vendor & Product ID 1983 */ 1984 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model)); 1985 model[sizeof (nvme->n_idctl->id_model)] = '\0'; 1986 sata_split_model(model, &vendor, &product); 1987 1988 if (vendor == NULL) 1989 nvme->n_vendor = strdup("NVMe"); 1990 else 1991 nvme->n_vendor = strdup(vendor); 1992 1993 nvme->n_product = strdup(product); 1994 1995 /* 1996 * Get controller limits. 1997 */ 1998 nvme->n_async_event_limit = MAX(NVME_MIN_ASYNC_EVENT_LIMIT, 1999 MIN(nvme->n_admin_queue_len / 10, 2000 MIN(nvme->n_idctl->id_aerl + 1, nvme->n_async_event_limit))); 2001 2002 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, 2003 "async-event-limit", nvme->n_async_event_limit); 2004 2005 nvme->n_abort_command_limit = nvme->n_idctl->id_acl + 1; 2006 2007 /* 2008 * Reinitialize the semaphore with the true abort command limit 2009 * supported by the hardware. It's not necessary to disable interrupts 2010 * as only command aborts use the semaphore, and no commands are 2011 * executed or aborted while we're here. 2012 */ 2013 sema_destroy(&nvme->n_abort_sema); 2014 sema_init(&nvme->n_abort_sema, nvme->n_abort_command_limit - 1, NULL, 2015 SEMA_DRIVER, NULL); 2016 2017 nvme->n_progress |= NVME_CTRL_LIMITS; 2018 2019 if (nvme->n_idctl->id_mdts == 0) 2020 nvme->n_max_data_transfer_size = nvme->n_pagesize * 65536; 2021 else 2022 nvme->n_max_data_transfer_size = 2023 1ull << (nvme->n_pageshift + nvme->n_idctl->id_mdts); 2024 2025 nvme->n_error_log_len = nvme->n_idctl->id_elpe + 1; 2026 2027 /* 2028 * Limit n_max_data_transfer_size to what we can handle in one PRP. 2029 * Chained PRPs are currently unsupported. 2030 * 2031 * This is a no-op on hardware which doesn't support a transfer size 2032 * big enough to require chained PRPs. 2033 */ 2034 nvme->n_max_data_transfer_size = MIN(nvme->n_max_data_transfer_size, 2035 (nvme->n_pagesize / sizeof (uint64_t) * nvme->n_pagesize)); 2036 2037 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_max_data_transfer_size; 2038 2039 /* 2040 * Make sure the minimum/maximum queue entry sizes are not 2041 * larger/smaller than the default. 2042 */ 2043 2044 if (((1 << nvme->n_idctl->id_sqes.qes_min) > sizeof (nvme_sqe_t)) || 2045 ((1 << nvme->n_idctl->id_sqes.qes_max) < sizeof (nvme_sqe_t)) || 2046 ((1 << nvme->n_idctl->id_cqes.qes_min) > sizeof (nvme_cqe_t)) || 2047 ((1 << nvme->n_idctl->id_cqes.qes_max) < sizeof (nvme_cqe_t))) 2048 goto fail; 2049 2050 /* 2051 * Check for the presence of a Volatile Write Cache. If present, 2052 * enable it by default. 2053 */ 2054 if (nvme->n_idctl->id_vwc.vwc_present == 0) { 2055 nvme->n_volatile_write_cache_enabled = B_FALSE; 2056 nvme_bd_ops.o_sync_cache = NULL; 2057 } else { 2058 /* 2059 * TODO: send SET FEATURES to enable VWC 2060 * (have no hardware to test this) 2061 */ 2062 nvme->n_volatile_write_cache_enabled = B_FALSE; 2063 nvme_bd_ops.o_sync_cache = NULL; 2064 } 2065 2066 /* 2067 * Grab a copy of all mandatory log pages. 2068 * 2069 * TODO: should go away once user space tool exists to print logs 2070 */ 2071 nvme->n_error_log = (nvme_error_log_entry_t *) 2072 nvme_get_logpage(nvme, NVME_LOGPAGE_ERROR); 2073 nvme->n_health_log = (nvme_health_log_t *) 2074 nvme_get_logpage(nvme, NVME_LOGPAGE_HEALTH, -1); 2075 nvme->n_fwslot_log = (nvme_fwslot_log_t *) 2076 nvme_get_logpage(nvme, NVME_LOGPAGE_FWSLOT); 2077 2078 /* 2079 * Identify Namespaces 2080 */ 2081 nvme->n_namespace_count = nvme->n_idctl->id_nn; 2082 nvme->n_ns = kmem_zalloc(sizeof (nvme_namespace_t) * 2083 nvme->n_namespace_count, KM_SLEEP); 2084 2085 for (i = 0; i != nvme->n_namespace_count; i++) { 2086 nvme_identify_nsid_t *idns; 2087 int last_rp; 2088 2089 nvme->n_ns[i].ns_nvme = nvme; 2090 nvme->n_ns[i].ns_idns = idns = nvme_identify(nvme, i + 1); 2091 2092 if (idns == NULL) { 2093 dev_err(nvme->n_dip, CE_WARN, 2094 "!failed to identify namespace %d", i + 1); 2095 goto fail; 2096 } 2097 2098 nvme->n_ns[i].ns_id = i + 1; 2099 nvme->n_ns[i].ns_block_count = idns->id_nsize; 2100 nvme->n_ns[i].ns_block_size = 2101 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads; 2102 nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size; 2103 2104 nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id); 2105 2106 /* 2107 * Find the LBA format with no metadata and the best relative 2108 * performance. A value of 3 means "degraded", 0 is best. 2109 */ 2110 last_rp = 3; 2111 for (int j = 0; j <= idns->id_nlbaf; j++) { 2112 if (idns->id_lbaf[j].lbaf_lbads == 0) 2113 break; 2114 if (idns->id_lbaf[j].lbaf_ms != 0) 2115 continue; 2116 if (idns->id_lbaf[j].lbaf_rp >= last_rp) 2117 continue; 2118 last_rp = idns->id_lbaf[j].lbaf_rp; 2119 nvme->n_ns[i].ns_best_block_size = 2120 1 << idns->id_lbaf[j].lbaf_lbads; 2121 } 2122 2123 /* 2124 * We currently don't support namespaces that use either: 2125 * - thin provisioning 2126 * - protection information 2127 */ 2128 if (idns->id_nsfeat.f_thin || 2129 idns->id_dps.dp_pinfo) { 2130 dev_err(nvme->n_dip, CE_WARN, 2131 "!ignoring namespace %d, unsupported features: " 2132 "thin = %d, pinfo = %d", i + 1, 2133 idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo); 2134 nvme->n_ns[i].ns_ignore = B_TRUE; 2135 } 2136 } 2137 2138 /* 2139 * Try to set up MSI/MSI-X interrupts. 2140 */ 2141 if ((nvme->n_intr_types & (DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX)) 2142 != 0) { 2143 nvme_release_interrupts(nvme); 2144 2145 nqueues = MIN(UINT16_MAX, ncpus); 2146 2147 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 2148 nqueues) != DDI_SUCCESS) && 2149 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 2150 nqueues) != DDI_SUCCESS)) { 2151 dev_err(nvme->n_dip, CE_WARN, 2152 "!failed to setup MSI/MSI-X interrupts"); 2153 goto fail; 2154 } 2155 } 2156 2157 nqueues = nvme->n_intr_cnt; 2158 2159 /* 2160 * Create I/O queue pairs. 2161 */ 2162 nvme->n_ioq_count = nvme_set_nqueues(nvme, nqueues); 2163 if (nvme->n_ioq_count == 0) { 2164 dev_err(nvme->n_dip, CE_WARN, 2165 "!failed to set number of I/O queues to %d", nqueues); 2166 goto fail; 2167 } 2168 2169 /* 2170 * Reallocate I/O queue array 2171 */ 2172 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *)); 2173 nvme->n_ioq = kmem_zalloc(sizeof (nvme_qpair_t *) * 2174 (nvme->n_ioq_count + 1), KM_SLEEP); 2175 nvme->n_ioq[0] = nvme->n_adminq; 2176 2177 /* 2178 * If we got less queues than we asked for we might as well give 2179 * some of the interrupt vectors back to the system. 2180 */ 2181 if (nvme->n_ioq_count < nqueues) { 2182 nvme_release_interrupts(nvme); 2183 2184 if (nvme_setup_interrupts(nvme, nvme->n_intr_type, 2185 nvme->n_ioq_count) != DDI_SUCCESS) { 2186 dev_err(nvme->n_dip, CE_WARN, 2187 "!failed to reduce number of interrupts"); 2188 goto fail; 2189 } 2190 } 2191 2192 /* 2193 * Alloc & register I/O queue pairs 2194 */ 2195 nvme->n_io_queue_len = 2196 MIN(nvme->n_io_queue_len, nvme->n_max_queue_entries); 2197 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, "io-queue-len", 2198 nvme->n_io_queue_len); 2199 2200 for (i = 1; i != nvme->n_ioq_count + 1; i++) { 2201 if (nvme_alloc_qpair(nvme, nvme->n_io_queue_len, 2202 &nvme->n_ioq[i], i) != DDI_SUCCESS) { 2203 dev_err(nvme->n_dip, CE_WARN, 2204 "!unable to allocate I/O qpair %d", i); 2205 goto fail; 2206 } 2207 2208 if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i) 2209 != DDI_SUCCESS) { 2210 dev_err(nvme->n_dip, CE_WARN, 2211 "!unable to create I/O qpair %d", i); 2212 goto fail; 2213 } 2214 } 2215 2216 /* 2217 * Post more asynchronous events commands to reduce event reporting 2218 * latency as suggested by the spec. 2219 */ 2220 for (i = 1; i != nvme->n_async_event_limit; i++) { 2221 if (nvme_async_event(nvme) != DDI_SUCCESS) { 2222 dev_err(nvme->n_dip, CE_WARN, 2223 "!failed to post async event %d", i); 2224 goto fail; 2225 } 2226 } 2227 2228 return (DDI_SUCCESS); 2229 2230 fail: 2231 (void) nvme_reset(nvme, B_FALSE); 2232 return (DDI_FAILURE); 2233 } 2234 2235 static uint_t 2236 nvme_intr(caddr_t arg1, caddr_t arg2) 2237 { 2238 /*LINTED: E_PTR_BAD_CAST_ALIGN*/ 2239 nvme_t *nvme = (nvme_t *)arg1; 2240 int inum = (int)(uintptr_t)arg2; 2241 int ccnt = 0; 2242 int qnum; 2243 nvme_cmd_t *cmd; 2244 2245 if (inum >= nvme->n_intr_cnt) 2246 return (DDI_INTR_UNCLAIMED); 2247 2248 /* 2249 * The interrupt vector a queue uses is calculated as queue_idx % 2250 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array 2251 * in steps of n_intr_cnt to process all queues using this vector. 2252 */ 2253 for (qnum = inum; 2254 qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL; 2255 qnum += nvme->n_intr_cnt) { 2256 while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) { 2257 taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq, 2258 cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent); 2259 ccnt++; 2260 } 2261 } 2262 2263 return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 2264 } 2265 2266 static void 2267 nvme_release_interrupts(nvme_t *nvme) 2268 { 2269 int i; 2270 2271 for (i = 0; i < nvme->n_intr_cnt; i++) { 2272 if (nvme->n_inth[i] == NULL) 2273 break; 2274 2275 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) 2276 (void) ddi_intr_block_disable(&nvme->n_inth[i], 1); 2277 else 2278 (void) ddi_intr_disable(nvme->n_inth[i]); 2279 2280 (void) ddi_intr_remove_handler(nvme->n_inth[i]); 2281 (void) ddi_intr_free(nvme->n_inth[i]); 2282 } 2283 2284 kmem_free(nvme->n_inth, nvme->n_inth_sz); 2285 nvme->n_inth = NULL; 2286 nvme->n_inth_sz = 0; 2287 2288 nvme->n_progress &= ~NVME_INTERRUPTS; 2289 } 2290 2291 static int 2292 nvme_setup_interrupts(nvme_t *nvme, int intr_type, int nqpairs) 2293 { 2294 int nintrs, navail, count; 2295 int ret; 2296 int i; 2297 2298 if (nvme->n_intr_types == 0) { 2299 ret = ddi_intr_get_supported_types(nvme->n_dip, 2300 &nvme->n_intr_types); 2301 if (ret != DDI_SUCCESS) { 2302 dev_err(nvme->n_dip, CE_WARN, 2303 "!%s: ddi_intr_get_supported types failed", 2304 __func__); 2305 return (ret); 2306 } 2307 } 2308 2309 if ((nvme->n_intr_types & intr_type) == 0) 2310 return (DDI_FAILURE); 2311 2312 ret = ddi_intr_get_nintrs(nvme->n_dip, intr_type, &nintrs); 2313 if (ret != DDI_SUCCESS) { 2314 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_nintrs failed", 2315 __func__); 2316 return (ret); 2317 } 2318 2319 ret = ddi_intr_get_navail(nvme->n_dip, intr_type, &navail); 2320 if (ret != DDI_SUCCESS) { 2321 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_navail failed", 2322 __func__); 2323 return (ret); 2324 } 2325 2326 /* We want at most one interrupt per queue pair. */ 2327 if (navail > nqpairs) 2328 navail = nqpairs; 2329 2330 nvme->n_inth_sz = sizeof (ddi_intr_handle_t) * navail; 2331 nvme->n_inth = kmem_zalloc(nvme->n_inth_sz, KM_SLEEP); 2332 2333 ret = ddi_intr_alloc(nvme->n_dip, nvme->n_inth, intr_type, 0, navail, 2334 &count, 0); 2335 if (ret != DDI_SUCCESS) { 2336 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_alloc failed", 2337 __func__); 2338 goto fail; 2339 } 2340 2341 nvme->n_intr_cnt = count; 2342 2343 ret = ddi_intr_get_pri(nvme->n_inth[0], &nvme->n_intr_pri); 2344 if (ret != DDI_SUCCESS) { 2345 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_pri failed", 2346 __func__); 2347 goto fail; 2348 } 2349 2350 for (i = 0; i < count; i++) { 2351 ret = ddi_intr_add_handler(nvme->n_inth[i], nvme_intr, 2352 (void *)nvme, (void *)(uintptr_t)i); 2353 if (ret != DDI_SUCCESS) { 2354 dev_err(nvme->n_dip, CE_WARN, 2355 "!%s: ddi_intr_add_handler failed", __func__); 2356 goto fail; 2357 } 2358 } 2359 2360 (void) ddi_intr_get_cap(nvme->n_inth[0], &nvme->n_intr_cap); 2361 2362 for (i = 0; i < count; i++) { 2363 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) 2364 ret = ddi_intr_block_enable(&nvme->n_inth[i], 1); 2365 else 2366 ret = ddi_intr_enable(nvme->n_inth[i]); 2367 2368 if (ret != DDI_SUCCESS) { 2369 dev_err(nvme->n_dip, CE_WARN, 2370 "!%s: enabling interrupt %d failed", __func__, i); 2371 goto fail; 2372 } 2373 } 2374 2375 nvme->n_intr_type = intr_type; 2376 2377 nvme->n_progress |= NVME_INTERRUPTS; 2378 2379 return (DDI_SUCCESS); 2380 2381 fail: 2382 nvme_release_interrupts(nvme); 2383 2384 return (ret); 2385 } 2386 2387 static int 2388 nvme_fm_errcb(dev_info_t *dip, ddi_fm_error_t *fm_error, const void *arg) 2389 { 2390 _NOTE(ARGUNUSED(arg)); 2391 2392 pci_ereport_post(dip, fm_error, NULL); 2393 return (fm_error->fme_status); 2394 } 2395 2396 static int 2397 nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2398 { 2399 nvme_t *nvme; 2400 int instance; 2401 int nregs; 2402 off_t regsize; 2403 int i; 2404 char name[32]; 2405 2406 if (cmd != DDI_ATTACH) 2407 return (DDI_FAILURE); 2408 2409 instance = ddi_get_instance(dip); 2410 2411 if (ddi_soft_state_zalloc(nvme_state, instance) != DDI_SUCCESS) 2412 return (DDI_FAILURE); 2413 2414 nvme = ddi_get_soft_state(nvme_state, instance); 2415 ddi_set_driver_private(dip, nvme); 2416 nvme->n_dip = dip; 2417 2418 nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2419 DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE; 2420 nvme->n_ignore_unknown_vendor_status = ddi_prop_get_int(DDI_DEV_T_ANY, 2421 dip, DDI_PROP_DONTPASS, "ignore-unknown-vendor-status", 0) == 1 ? 2422 B_TRUE : B_FALSE; 2423 nvme->n_admin_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2424 DDI_PROP_DONTPASS, "admin-queue-len", NVME_DEFAULT_ADMIN_QUEUE_LEN); 2425 nvme->n_io_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2426 DDI_PROP_DONTPASS, "io-queue-len", NVME_DEFAULT_IO_QUEUE_LEN); 2427 nvme->n_async_event_limit = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2428 DDI_PROP_DONTPASS, "async-event-limit", 2429 NVME_DEFAULT_ASYNC_EVENT_LIMIT); 2430 2431 if (nvme->n_admin_queue_len < NVME_MIN_ADMIN_QUEUE_LEN) 2432 nvme->n_admin_queue_len = NVME_MIN_ADMIN_QUEUE_LEN; 2433 else if (nvme->n_admin_queue_len > NVME_MAX_ADMIN_QUEUE_LEN) 2434 nvme->n_admin_queue_len = NVME_MAX_ADMIN_QUEUE_LEN; 2435 2436 if (nvme->n_io_queue_len < NVME_MIN_IO_QUEUE_LEN) 2437 nvme->n_io_queue_len = NVME_MIN_IO_QUEUE_LEN; 2438 2439 if (nvme->n_async_event_limit < 1) 2440 nvme->n_async_event_limit = NVME_DEFAULT_ASYNC_EVENT_LIMIT; 2441 2442 nvme->n_reg_acc_attr = nvme_reg_acc_attr; 2443 nvme->n_queue_dma_attr = nvme_queue_dma_attr; 2444 nvme->n_prp_dma_attr = nvme_prp_dma_attr; 2445 nvme->n_sgl_dma_attr = nvme_sgl_dma_attr; 2446 2447 /* 2448 * Setup FMA support. 2449 */ 2450 nvme->n_fm_cap = ddi_getprop(DDI_DEV_T_ANY, dip, 2451 DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "fm-capable", 2452 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE | 2453 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE); 2454 2455 ddi_fm_init(dip, &nvme->n_fm_cap, &nvme->n_fm_ibc); 2456 2457 if (nvme->n_fm_cap) { 2458 if (nvme->n_fm_cap & DDI_FM_ACCCHK_CAPABLE) 2459 nvme->n_reg_acc_attr.devacc_attr_access = 2460 DDI_FLAGERR_ACC; 2461 2462 if (nvme->n_fm_cap & DDI_FM_DMACHK_CAPABLE) { 2463 nvme->n_prp_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 2464 nvme->n_sgl_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 2465 } 2466 2467 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) || 2468 DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2469 pci_ereport_setup(dip); 2470 2471 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2472 ddi_fm_handler_register(dip, nvme_fm_errcb, 2473 (void *)nvme); 2474 } 2475 2476 nvme->n_progress |= NVME_FMA_INIT; 2477 2478 /* 2479 * The spec defines several register sets. Only the controller 2480 * registers (set 1) are currently used. 2481 */ 2482 if (ddi_dev_nregs(dip, &nregs) == DDI_FAILURE || 2483 nregs < 2 || 2484 ddi_dev_regsize(dip, 1, ®size) == DDI_FAILURE) 2485 goto fail; 2486 2487 if (ddi_regs_map_setup(dip, 1, &nvme->n_regs, 0, regsize, 2488 &nvme->n_reg_acc_attr, &nvme->n_regh) != DDI_SUCCESS) { 2489 dev_err(dip, CE_WARN, "!failed to map regset 1"); 2490 goto fail; 2491 } 2492 2493 nvme->n_progress |= NVME_REGS_MAPPED; 2494 2495 /* 2496 * Create taskq for command completion. 2497 */ 2498 (void) snprintf(name, sizeof (name), "%s%d_cmd_taskq", 2499 ddi_driver_name(dip), ddi_get_instance(dip)); 2500 nvme->n_cmd_taskq = ddi_taskq_create(dip, name, MIN(UINT16_MAX, ncpus), 2501 TASKQ_DEFAULTPRI, 0); 2502 if (nvme->n_cmd_taskq == NULL) { 2503 dev_err(dip, CE_WARN, "!failed to create cmd taskq"); 2504 goto fail; 2505 } 2506 2507 /* 2508 * Create PRP DMA cache 2509 */ 2510 (void) snprintf(name, sizeof (name), "%s%d_prp_cache", 2511 ddi_driver_name(dip), ddi_get_instance(dip)); 2512 nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t), 2513 0, nvme_prp_dma_constructor, nvme_prp_dma_destructor, 2514 NULL, (void *)nvme, NULL, 0); 2515 2516 if (nvme_init(nvme) != DDI_SUCCESS) 2517 goto fail; 2518 2519 /* 2520 * Attach the blkdev driver for each namespace. 2521 */ 2522 for (i = 0; i != nvme->n_namespace_count; i++) { 2523 if (nvme->n_ns[i].ns_ignore) 2524 continue; 2525 2526 nvme->n_ns[i].ns_bd_hdl = bd_alloc_handle(&nvme->n_ns[i], 2527 &nvme_bd_ops, &nvme->n_prp_dma_attr, KM_SLEEP); 2528 2529 if (nvme->n_ns[i].ns_bd_hdl == NULL) { 2530 dev_err(dip, CE_WARN, 2531 "!failed to get blkdev handle for namespace %d", i); 2532 goto fail; 2533 } 2534 2535 if (bd_attach_handle(dip, nvme->n_ns[i].ns_bd_hdl) 2536 != DDI_SUCCESS) { 2537 dev_err(dip, CE_WARN, 2538 "!failed to attach blkdev handle for namespace %d", 2539 i); 2540 goto fail; 2541 } 2542 } 2543 2544 return (DDI_SUCCESS); 2545 2546 fail: 2547 /* attach successful anyway so that FMA can retire the device */ 2548 if (nvme->n_dead) 2549 return (DDI_SUCCESS); 2550 2551 (void) nvme_detach(dip, DDI_DETACH); 2552 2553 return (DDI_FAILURE); 2554 } 2555 2556 static int 2557 nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2558 { 2559 int instance, i; 2560 nvme_t *nvme; 2561 2562 if (cmd != DDI_DETACH) 2563 return (DDI_FAILURE); 2564 2565 instance = ddi_get_instance(dip); 2566 2567 nvme = ddi_get_soft_state(nvme_state, instance); 2568 2569 if (nvme == NULL) 2570 return (DDI_FAILURE); 2571 2572 if (nvme->n_ns) { 2573 for (i = 0; i != nvme->n_namespace_count; i++) { 2574 if (nvme->n_ns[i].ns_bd_hdl) { 2575 (void) bd_detach_handle( 2576 nvme->n_ns[i].ns_bd_hdl); 2577 bd_free_handle(nvme->n_ns[i].ns_bd_hdl); 2578 } 2579 2580 if (nvme->n_ns[i].ns_idns) 2581 kmem_free(nvme->n_ns[i].ns_idns, 2582 sizeof (nvme_identify_nsid_t)); 2583 } 2584 2585 kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) * 2586 nvme->n_namespace_count); 2587 } 2588 2589 if (nvme->n_progress & NVME_INTERRUPTS) 2590 nvme_release_interrupts(nvme); 2591 2592 if (nvme->n_cmd_taskq) 2593 ddi_taskq_wait(nvme->n_cmd_taskq); 2594 2595 if (nvme->n_ioq_count > 0) { 2596 for (i = 1; i != nvme->n_ioq_count + 1; i++) { 2597 if (nvme->n_ioq[i] != NULL) { 2598 /* TODO: send destroy queue commands */ 2599 nvme_free_qpair(nvme->n_ioq[i]); 2600 } 2601 } 2602 2603 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) * 2604 (nvme->n_ioq_count + 1)); 2605 } 2606 2607 if (nvme->n_prp_cache != NULL) { 2608 kmem_cache_destroy(nvme->n_prp_cache); 2609 } 2610 2611 if (nvme->n_progress & NVME_REGS_MAPPED) { 2612 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE); 2613 (void) nvme_reset(nvme, B_FALSE); 2614 } 2615 2616 if (nvme->n_cmd_taskq) 2617 ddi_taskq_destroy(nvme->n_cmd_taskq); 2618 2619 if (nvme->n_progress & NVME_CTRL_LIMITS) 2620 sema_destroy(&nvme->n_abort_sema); 2621 2622 if (nvme->n_progress & NVME_ADMIN_QUEUE) 2623 nvme_free_qpair(nvme->n_adminq); 2624 2625 if (nvme->n_idctl) 2626 kmem_free(nvme->n_idctl, sizeof (nvme_identify_ctrl_t)); 2627 2628 if (nvme->n_progress & NVME_REGS_MAPPED) 2629 ddi_regs_map_free(&nvme->n_regh); 2630 2631 if (nvme->n_progress & NVME_FMA_INIT) { 2632 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2633 ddi_fm_handler_unregister(nvme->n_dip); 2634 2635 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) || 2636 DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2637 pci_ereport_teardown(nvme->n_dip); 2638 2639 ddi_fm_fini(nvme->n_dip); 2640 } 2641 2642 if (nvme->n_vendor != NULL) 2643 strfree(nvme->n_vendor); 2644 2645 if (nvme->n_product != NULL) 2646 strfree(nvme->n_product); 2647 2648 ddi_soft_state_free(nvme_state, instance); 2649 2650 return (DDI_SUCCESS); 2651 } 2652 2653 static int 2654 nvme_quiesce(dev_info_t *dip) 2655 { 2656 int instance; 2657 nvme_t *nvme; 2658 2659 instance = ddi_get_instance(dip); 2660 2661 nvme = ddi_get_soft_state(nvme_state, instance); 2662 2663 if (nvme == NULL) 2664 return (DDI_FAILURE); 2665 2666 nvme_shutdown(nvme, NVME_CC_SHN_ABRUPT, B_TRUE); 2667 2668 (void) nvme_reset(nvme, B_TRUE); 2669 2670 return (DDI_FAILURE); 2671 } 2672 2673 static int 2674 nvme_fill_prp(nvme_cmd_t *cmd, bd_xfer_t *xfer) 2675 { 2676 nvme_t *nvme = cmd->nc_nvme; 2677 int nprp_page, nprp; 2678 uint64_t *prp; 2679 2680 if (xfer->x_ndmac == 0) 2681 return (DDI_FAILURE); 2682 2683 cmd->nc_sqe.sqe_dptr.d_prp[0] = xfer->x_dmac.dmac_laddress; 2684 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac); 2685 2686 if (xfer->x_ndmac == 1) { 2687 cmd->nc_sqe.sqe_dptr.d_prp[1] = 0; 2688 return (DDI_SUCCESS); 2689 } else if (xfer->x_ndmac == 2) { 2690 cmd->nc_sqe.sqe_dptr.d_prp[1] = xfer->x_dmac.dmac_laddress; 2691 return (DDI_SUCCESS); 2692 } 2693 2694 xfer->x_ndmac--; 2695 2696 nprp_page = nvme->n_pagesize / sizeof (uint64_t) - 1; 2697 ASSERT(nprp_page > 0); 2698 nprp = (xfer->x_ndmac + nprp_page - 1) / nprp_page; 2699 2700 /* 2701 * We currently don't support chained PRPs and set up our DMA 2702 * attributes to reflect that. If we still get an I/O request 2703 * that needs a chained PRP something is very wrong. 2704 */ 2705 VERIFY(nprp == 1); 2706 2707 cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP); 2708 bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len); 2709 2710 cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress; 2711 2712 /*LINTED: E_PTR_BAD_CAST_ALIGN*/ 2713 for (prp = (uint64_t *)cmd->nc_dma->nd_memp; 2714 xfer->x_ndmac > 0; 2715 prp++, xfer->x_ndmac--) { 2716 *prp = xfer->x_dmac.dmac_laddress; 2717 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac); 2718 } 2719 2720 (void) ddi_dma_sync(cmd->nc_dma->nd_dmah, 0, cmd->nc_dma->nd_len, 2721 DDI_DMA_SYNC_FORDEV); 2722 return (DDI_SUCCESS); 2723 } 2724 2725 static nvme_cmd_t * 2726 nvme_create_nvm_cmd(nvme_namespace_t *ns, uint8_t opc, bd_xfer_t *xfer) 2727 { 2728 nvme_t *nvme = ns->ns_nvme; 2729 nvme_cmd_t *cmd; 2730 2731 /* 2732 * Blkdev only sets BD_XFER_POLL when dumping, so don't sleep. 2733 */ 2734 cmd = nvme_alloc_cmd(nvme, (xfer->x_flags & BD_XFER_POLL) ? 2735 KM_NOSLEEP : KM_SLEEP); 2736 2737 if (cmd == NULL) 2738 return (NULL); 2739 2740 cmd->nc_sqe.sqe_opc = opc; 2741 cmd->nc_callback = nvme_bd_xfer_done; 2742 cmd->nc_xfer = xfer; 2743 2744 switch (opc) { 2745 case NVME_OPC_NVM_WRITE: 2746 case NVME_OPC_NVM_READ: 2747 VERIFY(xfer->x_nblks <= 0x10000); 2748 2749 cmd->nc_sqe.sqe_nsid = ns->ns_id; 2750 2751 cmd->nc_sqe.sqe_cdw10 = xfer->x_blkno & 0xffffffffu; 2752 cmd->nc_sqe.sqe_cdw11 = (xfer->x_blkno >> 32); 2753 cmd->nc_sqe.sqe_cdw12 = (uint16_t)(xfer->x_nblks - 1); 2754 2755 if (nvme_fill_prp(cmd, xfer) != DDI_SUCCESS) 2756 goto fail; 2757 break; 2758 2759 case NVME_OPC_NVM_FLUSH: 2760 cmd->nc_sqe.sqe_nsid = ns->ns_id; 2761 break; 2762 2763 default: 2764 goto fail; 2765 } 2766 2767 return (cmd); 2768 2769 fail: 2770 nvme_free_cmd(cmd); 2771 return (NULL); 2772 } 2773 2774 static void 2775 nvme_bd_xfer_done(void *arg) 2776 { 2777 nvme_cmd_t *cmd = arg; 2778 bd_xfer_t *xfer = cmd->nc_xfer; 2779 int error = 0; 2780 2781 error = nvme_check_cmd_status(cmd); 2782 nvme_free_cmd(cmd); 2783 2784 bd_xfer_done(xfer, error); 2785 } 2786 2787 static void 2788 nvme_bd_driveinfo(void *arg, bd_drive_t *drive) 2789 { 2790 nvme_namespace_t *ns = arg; 2791 nvme_t *nvme = ns->ns_nvme; 2792 2793 /* 2794 * blkdev maintains one queue size per instance (namespace), 2795 * but all namespace share the I/O queues. 2796 * TODO: need to figure out a sane default, or use per-NS I/O queues, 2797 * or change blkdev to handle EAGAIN 2798 */ 2799 drive->d_qsize = nvme->n_ioq_count * nvme->n_io_queue_len 2800 / nvme->n_namespace_count; 2801 2802 /* 2803 * d_maxxfer is not set, which means the value is taken from the DMA 2804 * attributes specified to bd_alloc_handle. 2805 */ 2806 2807 drive->d_removable = B_FALSE; 2808 drive->d_hotpluggable = B_FALSE; 2809 2810 drive->d_target = ns->ns_id; 2811 drive->d_lun = 0; 2812 2813 drive->d_model = nvme->n_idctl->id_model; 2814 drive->d_model_len = sizeof (nvme->n_idctl->id_model); 2815 drive->d_vendor = nvme->n_vendor; 2816 drive->d_vendor_len = strlen(nvme->n_vendor); 2817 drive->d_product = nvme->n_product; 2818 drive->d_product_len = strlen(nvme->n_product); 2819 drive->d_serial = nvme->n_idctl->id_serial; 2820 drive->d_serial_len = sizeof (nvme->n_idctl->id_serial); 2821 drive->d_revision = nvme->n_idctl->id_fwrev; 2822 drive->d_revision_len = sizeof (nvme->n_idctl->id_fwrev); 2823 } 2824 2825 static int 2826 nvme_bd_mediainfo(void *arg, bd_media_t *media) 2827 { 2828 nvme_namespace_t *ns = arg; 2829 2830 media->m_nblks = ns->ns_block_count; 2831 media->m_blksize = ns->ns_block_size; 2832 media->m_readonly = B_FALSE; 2833 media->m_solidstate = B_TRUE; 2834 2835 media->m_pblksize = ns->ns_best_block_size; 2836 2837 return (0); 2838 } 2839 2840 static int 2841 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) 2842 { 2843 nvme_t *nvme = ns->ns_nvme; 2844 nvme_cmd_t *cmd; 2845 2846 if (nvme->n_dead) 2847 return (EIO); 2848 2849 /* No polling for now */ 2850 if (xfer->x_flags & BD_XFER_POLL) 2851 return (EIO); 2852 2853 cmd = nvme_create_nvm_cmd(ns, opc, xfer); 2854 if (cmd == NULL) 2855 return (ENOMEM); 2856 2857 cmd->nc_sqid = (CPU->cpu_id % nvme->n_ioq_count) + 1; 2858 ASSERT(cmd->nc_sqid <= nvme->n_ioq_count); 2859 2860 if (nvme_submit_cmd(nvme->n_ioq[cmd->nc_sqid], cmd) 2861 != DDI_SUCCESS) 2862 return (EAGAIN); 2863 2864 return (0); 2865 } 2866 2867 static int 2868 nvme_bd_read(void *arg, bd_xfer_t *xfer) 2869 { 2870 nvme_namespace_t *ns = arg; 2871 2872 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_READ)); 2873 } 2874 2875 static int 2876 nvme_bd_write(void *arg, bd_xfer_t *xfer) 2877 { 2878 nvme_namespace_t *ns = arg; 2879 2880 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_WRITE)); 2881 } 2882 2883 static int 2884 nvme_bd_sync(void *arg, bd_xfer_t *xfer) 2885 { 2886 nvme_namespace_t *ns = arg; 2887 2888 if (ns->ns_nvme->n_dead) 2889 return (EIO); 2890 2891 /* 2892 * If the volatile write cache isn't enabled the FLUSH command is a 2893 * no-op, so we can take a shortcut here. 2894 */ 2895 if (ns->ns_nvme->n_volatile_write_cache_enabled == B_FALSE) { 2896 bd_xfer_done(xfer, ENOTSUP); 2897 return (0); 2898 } 2899 2900 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_FLUSH)); 2901 } 2902 2903 static int 2904 nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid) 2905 { 2906 nvme_namespace_t *ns = arg; 2907 2908 return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid), 2909 ns->ns_devid, devid)); 2910 } 2911