1 // Low level NVMe disk access
2 //
3 // Copyright 2017 Amazon.com, Inc. or its affiliates.
4 //
5 // This file may be distributed under the terms of the GNU LGPLv3 license.
6
7 #include "blockcmd.h"
8 #include "malloc.h" // malloc_high
9 #include "output.h" // dprintf
10 #include "pci.h"
11 #include "pci_ids.h" // PCI_CLASS_STORAGE_NVME
12 #include "pci_regs.h" // PCI_BASE_ADDRESS_0
13 #include "pcidevice.h" // foreachpci
14 #include "stacks.h" // yield
15 #include "std/disk.h" // DISK_RET_
16 #include "string.h" // memset
17 #include "util.h" // boot_add_hd
18 #include "x86.h" // readl
19
20 #include "nvme.h"
21 #include "nvme-int.h"
22
23 static void *
zalloc_page_aligned(struct zone_s * zone,u32 size)24 zalloc_page_aligned(struct zone_s *zone, u32 size)
25 {
26 void *res = _malloc(zone, size, NVME_PAGE_SIZE);
27 if (res) memset(res, 0, size);
28 return res;
29 }
30
31 static void
nvme_init_queue_common(struct nvme_ctrl * ctrl,struct nvme_queue * q,u16 q_idx,u16 length)32 nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, u16 q_idx,
33 u16 length)
34 {
35 memset(q, 0, sizeof(*q));
36 q->dbl = (u32 *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride);
37 dprintf(3, " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl);
38 q->mask = length - 1;
39 }
40
41 static int
nvme_init_sq(struct nvme_ctrl * ctrl,struct nvme_sq * sq,u16 q_idx,u16 length,struct nvme_cq * cq)42 nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, u16 length,
43 struct nvme_cq *cq)
44 {
45 nvme_init_queue_common(ctrl, &sq->common, q_idx, length);
46 sq->sqe = zalloc_page_aligned(&ZoneHigh, sizeof(*sq->sqe) * length);
47
48 if (!sq->sqe) {
49 warn_noalloc();
50 return -1;
51 }
52
53 dprintf(3, "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe);
54 sq->cq = cq;
55 sq->head = 0;
56 sq->tail = 0;
57
58 return 0;
59 }
60
61 static int
nvme_init_cq(struct nvme_ctrl * ctrl,struct nvme_cq * cq,u16 q_idx,u16 length)62 nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx, u16 length)
63 {
64 nvme_init_queue_common(ctrl, &cq->common, q_idx, length);
65 cq->cqe = zalloc_page_aligned(&ZoneHigh, sizeof(*cq->cqe) * length);
66 if (!cq->cqe) {
67 warn_noalloc();
68 return -1;
69 }
70
71 cq->head = 0;
72
73 /* All CQE phase bits are initialized to zero. This means initially we wait
74 for the host controller to set these to 1. */
75 cq->phase = 1;
76
77 return 0;
78 }
79
80 static int
nvme_poll_cq(struct nvme_cq * cq)81 nvme_poll_cq(struct nvme_cq *cq)
82 {
83 u32 dw3 = readl(&cq->cqe[cq->head].dword[3]);
84 return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase);
85 }
86
87 static int
nvme_is_cqe_success(struct nvme_cqe const * cqe)88 nvme_is_cqe_success(struct nvme_cqe const *cqe)
89 {
90 return ((cqe->status >> 1) & 0xFF) == 0;
91 }
92
93 static struct nvme_cqe
nvme_error_cqe(void)94 nvme_error_cqe(void)
95 {
96 struct nvme_cqe r;
97
98 /* 0xFF is a vendor specific status code != success. Should be okay for
99 indicating failure. */
100 memset(&r, 0xFF, sizeof(r));
101 return r;
102 }
103
104 static struct nvme_cqe
nvme_consume_cqe(struct nvme_sq * sq)105 nvme_consume_cqe(struct nvme_sq *sq)
106 {
107 struct nvme_cq *cq = sq->cq;
108
109 if (!nvme_poll_cq(cq)) {
110 /* Cannot consume a completion queue entry, if there is none ready. */
111 return nvme_error_cqe();
112 }
113
114 struct nvme_cqe *cqe = &cq->cqe[cq->head];
115 u16 cq_next_head = (cq->head + 1) & cq->common.mask;
116 dprintf(4, "cq %p head %u -> %u\n", cq, cq->head, cq_next_head);
117 if (cq_next_head < cq->head) {
118 dprintf(3, "cq %p wrap\n", cq);
119 cq->phase = ~cq->phase;
120 }
121 cq->head = cq_next_head;
122
123 /* Update the submission queue head. */
124 if (cqe->sq_head != sq->head) {
125 sq->head = cqe->sq_head;
126 dprintf(4, "sq %p advanced to %u\n", sq, cqe->sq_head);
127 }
128
129 /* Tell the controller that we consumed the completion. */
130 writel(cq->common.dbl, cq->head);
131
132 return *cqe;
133 }
134
135 static struct nvme_cqe
nvme_wait(struct nvme_sq * sq)136 nvme_wait(struct nvme_sq *sq)
137 {
138 static const unsigned nvme_timeout = 5000 /* ms */;
139 u32 to = timer_calc(nvme_timeout);
140 while (!nvme_poll_cq(sq->cq)) {
141 yield();
142
143 if (timer_check(to)) {
144 warn_timeout();
145 return nvme_error_cqe();
146 }
147 }
148
149 return nvme_consume_cqe(sq);
150 }
151
152 /* Returns the next submission queue entry (or NULL if the queue is full). It
153 also fills out Command Dword 0 and clears the rest. */
154 static struct nvme_sqe *
nvme_get_next_sqe(struct nvme_sq * sq,u8 opc,void * metadata,void * data)155 nvme_get_next_sqe(struct nvme_sq *sq, u8 opc, void *metadata, void *data)
156 {
157 if (((sq->head + 1) & sq->common.mask) == sq->tail) {
158 dprintf(3, "submission queue is full");
159 return NULL;
160 }
161
162 struct nvme_sqe *sqe = &sq->sqe[sq->tail];
163 dprintf(4, "sq %p next_sqe %u\n", sq, sq->tail);
164
165 memset(sqe, 0, sizeof(*sqe));
166 sqe->cdw0 = opc | (sq->tail << 16 /* CID */);
167 sqe->mptr = (u32)metadata;
168 sqe->dptr_prp1 = (u32)data;
169
170 if (sqe->dptr_prp1 & (NVME_PAGE_SIZE - 1)) {
171 /* Data buffer not page aligned. */
172 warn_internalerror();
173 }
174
175 return sqe;
176 }
177
178 /* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */
179 static void
nvme_commit_sqe(struct nvme_sq * sq)180 nvme_commit_sqe(struct nvme_sq *sq)
181 {
182 dprintf(4, "sq %p commit_sqe %u\n", sq, sq->tail);
183 sq->tail = (sq->tail + 1) & sq->common.mask;
184 writel(sq->common.dbl, sq->tail);
185 }
186
187 /* Perform an identify command on the admin queue and return the resulting
188 buffer. This may be a NULL pointer, if something failed. This function
189 cannot be used after initialization, because it uses buffers in tmp zone. */
190 static union nvme_identify *
nvme_admin_identify(struct nvme_ctrl * ctrl,u8 cns,u32 nsid)191 nvme_admin_identify(struct nvme_ctrl *ctrl, u8 cns, u32 nsid)
192 {
193 union nvme_identify *identify_buf = zalloc_page_aligned(&ZoneTmpHigh, 4096);
194 if (!identify_buf) {
195 /* Could not allocate identify buffer. */
196 warn_internalerror();
197 return NULL;
198 }
199
200 struct nvme_sqe *cmd_identify;
201 cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq,
202 NVME_SQE_OPC_ADMIN_IDENTIFY, NULL,
203 identify_buf);
204
205 if (!cmd_identify) {
206 warn_internalerror();
207 goto error;
208 }
209
210 cmd_identify->nsid = nsid;
211 cmd_identify->dword[10] = cns;
212
213 nvme_commit_sqe(&ctrl->admin_sq);
214
215 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
216
217 if (!nvme_is_cqe_success(&cqe)) {
218 goto error;
219 }
220
221 return identify_buf;
222 error:
223 free(identify_buf);
224 return NULL;
225 }
226
227 static struct nvme_identify_ctrl *
nvme_admin_identify_ctrl(struct nvme_ctrl * ctrl)228 nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl)
229 {
230 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl;
231 }
232
233 static struct nvme_identify_ns *
nvme_admin_identify_ns(struct nvme_ctrl * ctrl,u32 ns_id)234 nvme_admin_identify_ns(struct nvme_ctrl *ctrl, u32 ns_id)
235 {
236 return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS,
237 ns_id)->ns;
238 }
239
240 static void
nvme_probe_ns(struct nvme_ctrl * ctrl,struct nvme_namespace * ns,u32 ns_id)241 nvme_probe_ns(struct nvme_ctrl *ctrl, struct nvme_namespace *ns, u32 ns_id)
242 {
243 ns->ctrl = ctrl;
244 ns->ns_id = ns_id;
245
246 struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id);
247 if (!id) {
248 dprintf(2, "NVMe couldn't identify namespace %u.\n", ns_id);
249 goto free_buffer;
250 }
251
252 u8 current_lba_format = id->flbas & 0xF;
253 if (current_lba_format > id->nlbaf) {
254 dprintf(2, "NVMe NS %u: current LBA format %u is beyond what the "
255 " namespace supports (%u)?\n",
256 ns_id, current_lba_format, id->nlbaf + 1);
257 goto free_buffer;
258 }
259
260 ns->lba_count = id->nsze;
261 if (!ns->lba_count) {
262 dprintf(2, "NVMe NS %u is inactive.\n", ns_id);
263 goto free_buffer;
264 }
265
266 struct nvme_lba_format *fmt = &id->lbaf[current_lba_format];
267
268 ns->block_size = 1U << fmt->lbads;
269 ns->metadata_size = fmt->ms;
270
271 if (ns->block_size > NVME_PAGE_SIZE) {
272 /* If we see devices that trigger this path, we need to increase our
273 buffer size. */
274 warn_internalerror();
275 goto free_buffer;
276 }
277
278 ns->drive.cntl_id = ns - ctrl->ns;
279 ns->drive.removable = 0;
280 ns->drive.type = DTYPE_NVME;
281 ns->drive.blksize = ns->block_size;
282 ns->drive.sectors = ns->lba_count;
283
284 ns->dma_buffer = zalloc_page_aligned(&ZoneHigh, NVME_PAGE_SIZE);
285
286 char *desc = znprintf(MAXDESCSIZE, "NVMe NS %u: %llu MiB (%llu %u-byte "
287 "blocks + %u-byte metadata)\n",
288 ns_id, (ns->lba_count * ns->block_size) >> 20,
289 ns->lba_count, ns->block_size, ns->metadata_size);
290
291 dprintf(3, "%s", desc);
292 boot_add_hd(&ns->drive, desc, bootprio_find_pci_device(ctrl->pci));
293
294 free_buffer:
295 free (id);
296 }
297
298
299 /* Release memory allocated for a completion queue */
300 static void
nvme_destroy_cq(struct nvme_cq * cq)301 nvme_destroy_cq(struct nvme_cq *cq)
302 {
303 free(cq->cqe);
304 cq->cqe = NULL;
305 }
306
307 /* Release memory allocated for a submission queue */
308 static void
nvme_destroy_sq(struct nvme_sq * sq)309 nvme_destroy_sq(struct nvme_sq *sq)
310 {
311 free(sq->sqe);
312 sq->sqe = NULL;
313 }
314
315 /* Returns 0 on success. */
316 static int
nvme_create_io_cq(struct nvme_ctrl * ctrl,struct nvme_cq * cq,u16 q_idx)317 nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, u16 q_idx)
318 {
319 int rc;
320 struct nvme_sqe *cmd_create_cq;
321 u32 length = 1 + (ctrl->reg->cap & 0xffff);
322 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
323 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
324
325 rc = nvme_init_cq(ctrl, cq, q_idx, length);
326 if (rc) {
327 goto err;
328 }
329
330 cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq,
331 NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL,
332 cq->cqe);
333 if (!cmd_create_cq) {
334 goto err_destroy_cq;
335 }
336
337 cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1);
338 cmd_create_cq->dword[11] = 1 /* physically contiguous */;
339
340 nvme_commit_sqe(&ctrl->admin_sq);
341
342 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
343
344 if (!nvme_is_cqe_success(&cqe)) {
345 dprintf(2, "create io cq failed: %08x %08x %08x %08x\n",
346 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
347
348 goto err_destroy_cq;
349 }
350
351 return 0;
352
353 err_destroy_cq:
354 nvme_destroy_cq(cq);
355 err:
356 return -1;
357 }
358
359 /* Returns 0 on success. */
360 static int
nvme_create_io_sq(struct nvme_ctrl * ctrl,struct nvme_sq * sq,u16 q_idx,struct nvme_cq * cq)361 nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, u16 q_idx, struct nvme_cq *cq)
362 {
363 int rc;
364 struct nvme_sqe *cmd_create_sq;
365 u32 length = 1 + (ctrl->reg->cap & 0xffff);
366 if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe))
367 length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe);
368
369 rc = nvme_init_sq(ctrl, sq, q_idx, length, cq);
370 if (rc) {
371 goto err;
372 }
373
374 cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq,
375 NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL,
376 sq->sqe);
377 if (!cmd_create_sq) {
378 goto err_destroy_sq;
379 }
380
381 cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1);
382 cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */;
383 dprintf(3, "sq %p create dword10 %08x dword11 %08x\n", sq,
384 cmd_create_sq->dword[10], cmd_create_sq->dword[11]);
385
386 nvme_commit_sqe(&ctrl->admin_sq);
387
388 struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq);
389
390 if (!nvme_is_cqe_success(&cqe)) {
391 dprintf(2, "create io sq failed: %08x %08x %08x %08x\n",
392 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
393 goto err_destroy_sq;
394 }
395
396 return 0;
397
398 err_destroy_sq:
399 nvme_destroy_sq(sq);
400 err:
401 return -1;
402 }
403
404 /* Reads count sectors into buf. Returns DISK_RET_*. The buffer cannot cross
405 page boundaries. */
406 static int
nvme_io_readwrite(struct nvme_namespace * ns,u64 lba,char * buf,u16 count,int write)407 nvme_io_readwrite(struct nvme_namespace *ns, u64 lba, char *buf, u16 count,
408 int write)
409 {
410 u32 buf_addr = (u32)buf;
411
412 if ((buf_addr & 0x3) ||
413 ((buf_addr & ~(NVME_PAGE_SIZE - 1)) !=
414 ((buf_addr + ns->block_size * count - 1) & ~(NVME_PAGE_SIZE - 1)))) {
415 /* Buffer is misaligned or crosses page boundary */
416 warn_internalerror();
417 return DISK_RET_EBADTRACK;
418 }
419
420 struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq,
421 write ? NVME_SQE_OPC_IO_WRITE
422 : NVME_SQE_OPC_IO_READ,
423 NULL, buf);
424 io_read->nsid = ns->ns_id;
425 io_read->dword[10] = (u32)lba;
426 io_read->dword[11] = (u32)(lba >> 32);
427 io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1);
428
429 nvme_commit_sqe(&ns->ctrl->io_sq);
430
431 struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq);
432
433 if (!nvme_is_cqe_success(&cqe)) {
434 dprintf(2, "read io: %08x %08x %08x %08x\n",
435 cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]);
436
437 return DISK_RET_EBADTRACK;
438 }
439
440 return DISK_RET_SUCCESS;
441 }
442
443 static int
nvme_create_io_queues(struct nvme_ctrl * ctrl)444 nvme_create_io_queues(struct nvme_ctrl *ctrl)
445 {
446 if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3))
447 goto err;
448
449 if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq))
450 goto err_free_cq;
451
452 return 0;
453
454 err_free_cq:
455 nvme_destroy_cq(&ctrl->io_cq);
456 err:
457 return -1;
458 }
459
460 static void
nvme_destroy_io_queues(struct nvme_ctrl * ctrl)461 nvme_destroy_io_queues(struct nvme_ctrl *ctrl)
462 {
463 nvme_destroy_sq(&ctrl->io_sq);
464 nvme_destroy_cq(&ctrl->io_cq);
465 }
466
467 /* Waits for CSTS.RDY to match rdy. Returns 0 on success. */
468 static int
nvme_wait_csts_rdy(struct nvme_ctrl * ctrl,unsigned rdy)469 nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy)
470 {
471 u32 const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU);
472 u32 to = timer_calc(max_to);
473 u32 csts;
474
475 while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) {
476 yield();
477
478 if (csts & NVME_CSTS_FATAL) {
479 dprintf(3, "NVMe fatal error during controller shutdown\n");
480 return -1;
481 }
482
483 if (timer_check(to)) {
484 warn_timeout();
485 return -1;
486 }
487 }
488
489 return 0;
490 }
491
492 /* Returns 0 on success. */
493 static int
nvme_controller_enable(struct nvme_ctrl * ctrl)494 nvme_controller_enable(struct nvme_ctrl *ctrl)
495 {
496 int rc;
497
498 pci_enable_busmaster(ctrl->pci);
499
500 /* Turn the controller off. */
501 ctrl->reg->cc = 0;
502 if (nvme_wait_csts_rdy(ctrl, 0)) {
503 dprintf(2, "NVMe fatal error during controller shutdown\n");
504 return -1;
505 }
506
507 ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF);
508
509 rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1,
510 NVME_PAGE_SIZE / sizeof(struct nvme_cqe));
511 if (rc) {
512 return -1;
513 }
514
515 rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0,
516 NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq);
517 if (rc) {
518 goto err_destroy_admin_cq;
519 }
520
521 ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16
522 | ctrl->admin_sq.common.mask;
523
524 ctrl->reg->asq = (u32)ctrl->admin_sq.sqe;
525 ctrl->reg->acq = (u32)ctrl->admin_cq.cqe;
526
527 dprintf(3, " admin submission queue: %p\n", ctrl->admin_sq.sqe);
528 dprintf(3, " admin completion queue: %p\n", ctrl->admin_cq.cqe);
529
530 ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20)
531 | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */);
532
533 if (nvme_wait_csts_rdy(ctrl, 1)) {
534 dprintf(2, "NVMe fatal error while enabling controller\n");
535 goto err_destroy_admin_sq;
536 }
537
538 /* The admin queue is set up and the controller is ready. Let's figure out
539 what namespaces we have. */
540
541 struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl);
542
543 if (!identify) {
544 dprintf(2, "NVMe couldn't identify controller.\n");
545 goto err_destroy_admin_sq;
546 }
547
548 dprintf(3, "NVMe has %u namespace%s.\n",
549 identify->nn, (identify->nn == 1) ? "" : "s");
550
551 ctrl->ns_count = identify->nn;
552 free(identify);
553
554 if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) {
555 /* No point to continue, if the controller says it doesn't have
556 namespaces or we couldn't create I/O queues. */
557 goto err_destroy_admin_sq;
558 }
559
560 ctrl->ns = malloc_fseg(sizeof(*ctrl->ns) * ctrl->ns_count);
561 if (!ctrl->ns) {
562 warn_noalloc();
563 goto err_destroy_ioq;
564 }
565 memset(ctrl->ns, 0, sizeof(*ctrl->ns) * ctrl->ns_count);
566
567 /* Populate namespace IDs */
568 int ns_idx;
569 for (ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) {
570 nvme_probe_ns(ctrl, &ctrl->ns[ns_idx], ns_idx + 1);
571 }
572
573 dprintf(3, "NVMe initialization complete!\n");
574 return 0;
575
576 err_destroy_ioq:
577 nvme_destroy_io_queues(ctrl);
578 err_destroy_admin_sq:
579 nvme_destroy_sq(&ctrl->admin_sq);
580 err_destroy_admin_cq:
581 nvme_destroy_cq(&ctrl->admin_cq);
582 return -1;
583 }
584
585 /* Initialize an NVMe controller and detect its drives. */
586 static void
nvme_controller_setup(void * opaque)587 nvme_controller_setup(void *opaque)
588 {
589 struct pci_device *pci = opaque;
590
591 struct nvme_reg volatile *reg = pci_enable_membar(pci, PCI_BASE_ADDRESS_0);
592 if (!reg)
593 return;
594
595 u32 version = reg->vs;
596 dprintf(3, "Found NVMe controller with version %u.%u.%u.\n",
597 version >> 16, (version >> 8) & 0xFF, version & 0xFF);
598 dprintf(3, " Capabilities %016llx\n", reg->cap);
599
600 if (~reg->cap & NVME_CAP_CSS_NVME) {
601 dprintf(3, "Controller doesn't speak NVMe command set. Skipping.\n");
602 goto err;
603 }
604
605 struct nvme_ctrl *ctrl = malloc_high(sizeof(*ctrl));
606 if (!ctrl) {
607 warn_noalloc();
608 goto err;
609 }
610
611 memset(ctrl, 0, sizeof(*ctrl));
612
613 ctrl->reg = reg;
614 ctrl->pci = pci;
615
616 if (nvme_controller_enable(ctrl)) {
617 goto err_free_ctrl;
618 }
619
620 return;
621
622 err_free_ctrl:
623 free(ctrl);
624 err:
625 dprintf(2, "Failed to enable NVMe controller.\n");
626 }
627
628 // Locate and init NVMe controllers
629 static void
nvme_scan(void)630 nvme_scan(void)
631 {
632 // Scan PCI bus for NVMe adapters
633 struct pci_device *pci;
634
635 foreachpci(pci) {
636 if (pci->class != PCI_CLASS_STORAGE_NVME)
637 continue;
638 if (pci->prog_if != 2 /* as of NVM 1.0e */) {
639 dprintf(3, "Found incompatble NVMe: prog-if=%02x\n", pci->prog_if);
640 continue;
641 }
642
643 run_thread(nvme_controller_setup, pci);
644 }
645 }
646
647 static int
nvme_cmd_readwrite(struct nvme_namespace * ns,struct disk_op_s * op,int write)648 nvme_cmd_readwrite(struct nvme_namespace *ns, struct disk_op_s *op, int write)
649 {
650 int res = DISK_RET_SUCCESS;
651 u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
652 u16 i;
653
654 for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {
655 u16 blocks_remaining = op->count - i;
656 u16 blocks = blocks_remaining < max_blocks ? blocks_remaining
657 : max_blocks;
658 char *op_buf = op->buf_fl + i * ns->block_size;
659
660 if (write) {
661 memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
662 }
663
664 res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, write);
665 dprintf(3, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
666 : "read",
667 op->lba + i, blocks, res);
668
669 if (!write && res == DISK_RET_SUCCESS) {
670 memcpy(op_buf, ns->dma_buffer, blocks * ns->block_size);
671 }
672
673 i += blocks;
674 }
675
676 return res;
677 }
678
679 int
nvme_process_op(struct disk_op_s * op)680 nvme_process_op(struct disk_op_s *op)
681 {
682 if (!CONFIG_NVME)
683 return DISK_RET_SUCCESS;
684
685 struct nvme_namespace *ns = container_of(op->drive_fl, struct nvme_namespace,
686 drive);
687
688 switch (op->command) {
689 case CMD_READ:
690 case CMD_WRITE:
691 return nvme_cmd_readwrite(ns, op, op->command == CMD_WRITE);
692 default:
693 return default_process_op(op);
694 }
695 }
696
697 void
nvme_setup(void)698 nvme_setup(void)
699 {
700 ASSERT32FLAT();
701 if (!CONFIG_NVME)
702 return;
703
704 dprintf(3, "init nvme\n");
705 nvme_scan();
706 }
707
708 /* EOF */
709