xref: /qemu/hw/nvme/nvme.h (revision ca61e750)
1 /*
2  * QEMU NVM Express
3  *
4  * Copyright (c) 2012 Intel Corporation
5  * Copyright (c) 2021 Minwoo Im
6  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7  *
8  * Authors:
9  *   Keith Busch            <kbusch@kernel.org>
10  *   Klaus Jensen           <k.jensen@samsung.com>
11  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12  *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13  *   Minwoo Im              <minwoo.im.dev@gmail.com>
14  *
15  * This code is licensed under the GNU GPL v2 or later.
16  */
17 
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
20 
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci.h"
23 #include "hw/block/block.h"
24 
25 #include "block/nvme.h"
26 
27 #define NVME_MAX_CONTROLLERS 32
28 #define NVME_MAX_NAMESPACES  256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32 
33 typedef struct NvmeCtrl NvmeCtrl;
34 typedef struct NvmeNamespace NvmeNamespace;
35 
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38 
39 typedef struct NvmeBus {
40     BusState parent_bus;
41 } NvmeBus;
42 
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 
47 typedef struct NvmeSubsystem {
48     DeviceState parent_obj;
49     NvmeBus     bus;
50     uint8_t     subnqn[256];
51     char        *serial;
52 
53     NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
54     NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
55 
56     struct {
57         char *nqn;
58     } params;
59 } NvmeSubsystem;
60 
61 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
62 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
63 
64 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
65                                          uint32_t cntlid)
66 {
67     if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
68         return NULL;
69     }
70 
71     return subsys->ctrls[cntlid];
72 }
73 
74 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
75                                             uint32_t nsid)
76 {
77     if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
78         return NULL;
79     }
80 
81     return subsys->namespaces[nsid];
82 }
83 
84 #define TYPE_NVME_NS "nvme-ns"
85 #define NVME_NS(obj) \
86     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
87 
88 typedef struct NvmeZone {
89     NvmeZoneDescr   d;
90     uint64_t        w_ptr;
91     QTAILQ_ENTRY(NvmeZone) entry;
92 } NvmeZone;
93 
94 typedef struct NvmeNamespaceParams {
95     bool     detached;
96     bool     shared;
97     uint32_t nsid;
98     QemuUUID uuid;
99     uint64_t eui64;
100     bool     eui64_default;
101 
102     uint16_t ms;
103     uint8_t  mset;
104     uint8_t  pi;
105     uint8_t  pil;
106     uint8_t  pif;
107 
108     uint16_t mssrl;
109     uint32_t mcl;
110     uint8_t  msrc;
111 
112     bool     zoned;
113     bool     cross_zone_read;
114     uint64_t zone_size_bs;
115     uint64_t zone_cap_bs;
116     uint32_t max_active_zones;
117     uint32_t max_open_zones;
118     uint32_t zd_extension_size;
119 
120     uint32_t numzrwa;
121     uint64_t zrwas;
122     uint64_t zrwafg;
123 } NvmeNamespaceParams;
124 
125 typedef struct NvmeNamespace {
126     DeviceState  parent_obj;
127     BlockConf    blkconf;
128     int32_t      bootindex;
129     int64_t      size;
130     int64_t      moff;
131     NvmeIdNs     id_ns;
132     NvmeIdNsNvm  id_ns_nvm;
133     NvmeLBAF     lbaf;
134     unsigned int nlbaf;
135     size_t       lbasz;
136     const uint32_t *iocs;
137     uint8_t      csi;
138     uint16_t     status;
139     int          attached;
140     uint8_t      pif;
141 
142     struct {
143         uint16_t zrwas;
144         uint16_t zrwafg;
145         uint32_t numzrwa;
146     } zns;
147 
148     QTAILQ_ENTRY(NvmeNamespace) entry;
149 
150     NvmeIdNsZoned   *id_ns_zoned;
151     NvmeZone        *zone_array;
152     QTAILQ_HEAD(, NvmeZone) exp_open_zones;
153     QTAILQ_HEAD(, NvmeZone) imp_open_zones;
154     QTAILQ_HEAD(, NvmeZone) closed_zones;
155     QTAILQ_HEAD(, NvmeZone) full_zones;
156     uint32_t        num_zones;
157     uint64_t        zone_size;
158     uint64_t        zone_capacity;
159     uint32_t        zone_size_log2;
160     uint8_t         *zd_extensions;
161     int32_t         nr_open_zones;
162     int32_t         nr_active_zones;
163 
164     NvmeNamespaceParams params;
165 
166     struct {
167         uint32_t err_rec;
168     } features;
169 } NvmeNamespace;
170 
171 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
172 {
173     if (ns) {
174         return ns->params.nsid;
175     }
176 
177     return 0;
178 }
179 
180 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
181 {
182     return lba << ns->lbaf.ds;
183 }
184 
185 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
186 {
187     return ns->lbaf.ms * lba;
188 }
189 
190 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
191 {
192     return ns->moff + nvme_m2b(ns, lba);
193 }
194 
195 static inline bool nvme_ns_ext(NvmeNamespace *ns)
196 {
197     return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
198 }
199 
200 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
201 {
202     return zone->d.zs >> 4;
203 }
204 
205 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
206 {
207     zone->d.zs = state << 4;
208 }
209 
210 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
211 {
212     return zone->d.zslba + ns->zone_size;
213 }
214 
215 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
216 {
217     return zone->d.zslba + zone->d.zcap;
218 }
219 
220 static inline bool nvme_wp_is_valid(NvmeZone *zone)
221 {
222     uint8_t st = nvme_get_zone_state(zone);
223 
224     return st != NVME_ZONE_STATE_FULL &&
225            st != NVME_ZONE_STATE_READ_ONLY &&
226            st != NVME_ZONE_STATE_OFFLINE;
227 }
228 
229 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
230                                              uint32_t zone_idx)
231 {
232     return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
233 }
234 
235 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
236 {
237     assert(ns->nr_open_zones >= 0);
238     if (ns->params.max_open_zones) {
239         ns->nr_open_zones++;
240         assert(ns->nr_open_zones <= ns->params.max_open_zones);
241     }
242 }
243 
244 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
245 {
246     if (ns->params.max_open_zones) {
247         assert(ns->nr_open_zones > 0);
248         ns->nr_open_zones--;
249     }
250     assert(ns->nr_open_zones >= 0);
251 }
252 
253 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
254 {
255     assert(ns->nr_active_zones >= 0);
256     if (ns->params.max_active_zones) {
257         ns->nr_active_zones++;
258         assert(ns->nr_active_zones <= ns->params.max_active_zones);
259     }
260 }
261 
262 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
263 {
264     if (ns->params.max_active_zones) {
265         assert(ns->nr_active_zones > 0);
266         ns->nr_active_zones--;
267         assert(ns->nr_active_zones >= ns->nr_open_zones);
268     }
269     assert(ns->nr_active_zones >= 0);
270 }
271 
272 void nvme_ns_init_format(NvmeNamespace *ns);
273 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
274 void nvme_ns_drain(NvmeNamespace *ns);
275 void nvme_ns_shutdown(NvmeNamespace *ns);
276 void nvme_ns_cleanup(NvmeNamespace *ns);
277 
278 typedef struct NvmeAsyncEvent {
279     QTAILQ_ENTRY(NvmeAsyncEvent) entry;
280     NvmeAerResult result;
281 } NvmeAsyncEvent;
282 
283 enum {
284     NVME_SG_ALLOC = 1 << 0,
285     NVME_SG_DMA   = 1 << 1,
286 };
287 
288 typedef struct NvmeSg {
289     int flags;
290 
291     union {
292         QEMUSGList   qsg;
293         QEMUIOVector iov;
294     };
295 } NvmeSg;
296 
297 typedef enum NvmeTxDirection {
298     NVME_TX_DIRECTION_TO_DEVICE   = 0,
299     NVME_TX_DIRECTION_FROM_DEVICE = 1,
300 } NvmeTxDirection;
301 
302 typedef struct NvmeRequest {
303     struct NvmeSQueue       *sq;
304     struct NvmeNamespace    *ns;
305     BlockAIOCB              *aiocb;
306     uint16_t                status;
307     void                    *opaque;
308     NvmeCqe                 cqe;
309     NvmeCmd                 cmd;
310     BlockAcctCookie         acct;
311     NvmeSg                  sg;
312     QTAILQ_ENTRY(NvmeRequest)entry;
313 } NvmeRequest;
314 
315 typedef struct NvmeBounceContext {
316     NvmeRequest *req;
317 
318     struct {
319         QEMUIOVector iov;
320         uint8_t *bounce;
321     } data, mdata;
322 } NvmeBounceContext;
323 
324 static inline const char *nvme_adm_opc_str(uint8_t opc)
325 {
326     switch (opc) {
327     case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
328     case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
329     case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
330     case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
331     case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
332     case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
333     case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
334     case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
335     case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
336     case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
337     case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
338     case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
339     default:                            return "NVME_ADM_CMD_UNKNOWN";
340     }
341 }
342 
343 static inline const char *nvme_io_opc_str(uint8_t opc)
344 {
345     switch (opc) {
346     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
347     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
348     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
349     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
350     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
351     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
352     case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
353     case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
354     case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
355     case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
356     case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
357     default:                        return "NVME_NVM_CMD_UNKNOWN";
358     }
359 }
360 
361 typedef struct NvmeSQueue {
362     struct NvmeCtrl *ctrl;
363     uint16_t    sqid;
364     uint16_t    cqid;
365     uint32_t    head;
366     uint32_t    tail;
367     uint32_t    size;
368     uint64_t    dma_addr;
369     QEMUTimer   *timer;
370     NvmeRequest *io_req;
371     QTAILQ_HEAD(, NvmeRequest) req_list;
372     QTAILQ_HEAD(, NvmeRequest) out_req_list;
373     QTAILQ_ENTRY(NvmeSQueue) entry;
374 } NvmeSQueue;
375 
376 typedef struct NvmeCQueue {
377     struct NvmeCtrl *ctrl;
378     uint8_t     phase;
379     uint16_t    cqid;
380     uint16_t    irq_enabled;
381     uint32_t    head;
382     uint32_t    tail;
383     uint32_t    vector;
384     uint32_t    size;
385     uint64_t    dma_addr;
386     QEMUTimer   *timer;
387     QTAILQ_HEAD(, NvmeSQueue) sq_list;
388     QTAILQ_HEAD(, NvmeRequest) req_list;
389 } NvmeCQueue;
390 
391 #define TYPE_NVME "nvme"
392 #define NVME(obj) \
393         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
394 
395 typedef struct NvmeParams {
396     char     *serial;
397     uint32_t num_queues; /* deprecated since 5.1 */
398     uint32_t max_ioqpairs;
399     uint16_t msix_qsize;
400     uint32_t cmb_size_mb;
401     uint8_t  aerl;
402     uint32_t aer_max_queued;
403     uint8_t  mdts;
404     uint8_t  vsl;
405     bool     use_intel_id;
406     uint8_t  zasl;
407     bool     auto_transition_zones;
408     bool     legacy_cmb;
409 } NvmeParams;
410 
411 typedef struct NvmeCtrl {
412     PCIDevice    parent_obj;
413     MemoryRegion bar0;
414     MemoryRegion iomem;
415     NvmeBar      bar;
416     NvmeParams   params;
417     NvmeBus      bus;
418 
419     uint16_t    cntlid;
420     bool        qs_created;
421     uint32_t    page_size;
422     uint16_t    page_bits;
423     uint16_t    max_prp_ents;
424     uint16_t    cqe_size;
425     uint16_t    sqe_size;
426     uint32_t    reg_size;
427     uint32_t    max_q_ents;
428     uint8_t     outstanding_aers;
429     uint32_t    irq_status;
430     int         cq_pending;
431     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
432     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
433     uint64_t    starttime_ms;
434     uint16_t    temperature;
435     uint8_t     smart_critical_warning;
436 
437     struct {
438         MemoryRegion mem;
439         uint8_t      *buf;
440         bool         cmse;
441         hwaddr       cba;
442     } cmb;
443 
444     struct {
445         HostMemoryBackend *dev;
446         bool              cmse;
447         hwaddr            cba;
448     } pmr;
449 
450     uint8_t     aer_mask;
451     NvmeRequest **aer_reqs;
452     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
453     int         aer_queued;
454 
455     uint32_t    dmrsl;
456 
457     /* Namespace ID is started with 1 so bitmap should be 1-based */
458 #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
459     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
460 
461     NvmeSubsystem   *subsys;
462 
463     NvmeNamespace   namespace;
464     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
465     NvmeSQueue      **sq;
466     NvmeCQueue      **cq;
467     NvmeSQueue      admin_sq;
468     NvmeCQueue      admin_cq;
469     NvmeIdCtrl      id_ctrl;
470 
471     struct {
472         struct {
473             uint16_t temp_thresh_hi;
474             uint16_t temp_thresh_low;
475         };
476 
477         uint32_t                async_config;
478         NvmeHostBehaviorSupport hbs;
479     } features;
480 } NvmeCtrl;
481 
482 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
483 {
484     if (!nsid || nsid > NVME_MAX_NAMESPACES) {
485         return NULL;
486     }
487 
488     return n->namespaces[nsid];
489 }
490 
491 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
492 {
493     NvmeSQueue *sq = req->sq;
494     NvmeCtrl *n = sq->ctrl;
495 
496     return n->cq[sq->cqid];
497 }
498 
499 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
500 {
501     NvmeSQueue *sq = req->sq;
502     return sq->ctrl;
503 }
504 
505 static inline uint16_t nvme_cid(NvmeRequest *req)
506 {
507     if (!req) {
508         return 0xffff;
509     }
510 
511     return le16_to_cpu(req->cqe.cid);
512 }
513 
514 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
515 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
516                           NvmeTxDirection dir, NvmeRequest *req);
517 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
518                            NvmeTxDirection dir, NvmeRequest *req);
519 void nvme_rw_complete_cb(void *opaque, int ret);
520 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
521                        NvmeCmd *cmd);
522 
523 #endif /* HW_NVME_NVME_H */
524