xref: /qemu/hw/nvme/nvme.h (revision 336d354b)
1 /*
2  * QEMU NVM Express
3  *
4  * Copyright (c) 2012 Intel Corporation
5  * Copyright (c) 2021 Minwoo Im
6  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7  *
8  * Authors:
9  *   Keith Busch            <kbusch@kernel.org>
10  *   Klaus Jensen           <k.jensen@samsung.com>
11  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12  *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13  *   Minwoo Im              <minwoo.im.dev@gmail.com>
14  *
15  * This code is licensed under the GNU GPL v2 or later.
16  */
17 
18 #ifndef HW_NVME_INTERNAL_H
19 #define HW_NVME_INTERNAL_H
20 
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci.h"
23 #include "hw/block/block.h"
24 
25 #include "block/nvme.h"
26 
27 #define NVME_MAX_CONTROLLERS 32
28 #define NVME_MAX_NAMESPACES  256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32 
33 typedef struct NvmeCtrl NvmeCtrl;
34 typedef struct NvmeNamespace NvmeNamespace;
35 
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38 
39 typedef struct NvmeBus {
40     BusState parent_bus;
41 } NvmeBus;
42 
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 
47 typedef struct NvmeSubsystem {
48     DeviceState parent_obj;
49     NvmeBus     bus;
50     uint8_t     subnqn[256];
51 
52     NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
53     NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
54 
55     struct {
56         char *nqn;
57     } params;
58 } NvmeSubsystem;
59 
60 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
61 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
62 
63 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
64                                          uint32_t cntlid)
65 {
66     if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
67         return NULL;
68     }
69 
70     return subsys->ctrls[cntlid];
71 }
72 
73 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
74                                             uint32_t nsid)
75 {
76     if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
77         return NULL;
78     }
79 
80     return subsys->namespaces[nsid];
81 }
82 
83 #define TYPE_NVME_NS "nvme-ns"
84 #define NVME_NS(obj) \
85     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
86 
87 typedef struct NvmeZone {
88     NvmeZoneDescr   d;
89     uint64_t        w_ptr;
90     QTAILQ_ENTRY(NvmeZone) entry;
91 } NvmeZone;
92 
93 typedef struct NvmeNamespaceParams {
94     bool     detached;
95     bool     shared;
96     uint32_t nsid;
97     QemuUUID uuid;
98     uint64_t eui64;
99     bool     eui64_default;
100 
101     uint16_t ms;
102     uint8_t  mset;
103     uint8_t  pi;
104     uint8_t  pil;
105     uint8_t  pif;
106 
107     uint16_t mssrl;
108     uint32_t mcl;
109     uint8_t  msrc;
110 
111     bool     zoned;
112     bool     cross_zone_read;
113     uint64_t zone_size_bs;
114     uint64_t zone_cap_bs;
115     uint32_t max_active_zones;
116     uint32_t max_open_zones;
117     uint32_t zd_extension_size;
118 
119     uint32_t numzrwa;
120     uint64_t zrwas;
121     uint64_t zrwafg;
122 } NvmeNamespaceParams;
123 
124 typedef struct NvmeNamespace {
125     DeviceState  parent_obj;
126     BlockConf    blkconf;
127     int32_t      bootindex;
128     int64_t      size;
129     int64_t      moff;
130     NvmeIdNs     id_ns;
131     NvmeIdNsNvm  id_ns_nvm;
132     NvmeLBAF     lbaf;
133     unsigned int nlbaf;
134     size_t       lbasz;
135     const uint32_t *iocs;
136     uint8_t      csi;
137     uint16_t     status;
138     int          attached;
139     uint8_t      pif;
140 
141     struct {
142         uint16_t zrwas;
143         uint16_t zrwafg;
144         uint32_t numzrwa;
145     } zns;
146 
147     QTAILQ_ENTRY(NvmeNamespace) entry;
148 
149     NvmeIdNsZoned   *id_ns_zoned;
150     NvmeZone        *zone_array;
151     QTAILQ_HEAD(, NvmeZone) exp_open_zones;
152     QTAILQ_HEAD(, NvmeZone) imp_open_zones;
153     QTAILQ_HEAD(, NvmeZone) closed_zones;
154     QTAILQ_HEAD(, NvmeZone) full_zones;
155     uint32_t        num_zones;
156     uint64_t        zone_size;
157     uint64_t        zone_capacity;
158     uint32_t        zone_size_log2;
159     uint8_t         *zd_extensions;
160     int32_t         nr_open_zones;
161     int32_t         nr_active_zones;
162 
163     NvmeNamespaceParams params;
164 
165     struct {
166         uint32_t err_rec;
167     } features;
168 } NvmeNamespace;
169 
170 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
171 {
172     if (ns) {
173         return ns->params.nsid;
174     }
175 
176     return 0;
177 }
178 
179 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
180 {
181     return lba << ns->lbaf.ds;
182 }
183 
184 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
185 {
186     return ns->lbaf.ms * lba;
187 }
188 
189 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
190 {
191     return ns->moff + nvme_m2b(ns, lba);
192 }
193 
194 static inline bool nvme_ns_ext(NvmeNamespace *ns)
195 {
196     return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
197 }
198 
199 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
200 {
201     return zone->d.zs >> 4;
202 }
203 
204 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
205 {
206     zone->d.zs = state << 4;
207 }
208 
209 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
210 {
211     return zone->d.zslba + ns->zone_size;
212 }
213 
214 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
215 {
216     return zone->d.zslba + zone->d.zcap;
217 }
218 
219 static inline bool nvme_wp_is_valid(NvmeZone *zone)
220 {
221     uint8_t st = nvme_get_zone_state(zone);
222 
223     return st != NVME_ZONE_STATE_FULL &&
224            st != NVME_ZONE_STATE_READ_ONLY &&
225            st != NVME_ZONE_STATE_OFFLINE;
226 }
227 
228 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
229                                              uint32_t zone_idx)
230 {
231     return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
232 }
233 
234 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
235 {
236     assert(ns->nr_open_zones >= 0);
237     if (ns->params.max_open_zones) {
238         ns->nr_open_zones++;
239         assert(ns->nr_open_zones <= ns->params.max_open_zones);
240     }
241 }
242 
243 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
244 {
245     if (ns->params.max_open_zones) {
246         assert(ns->nr_open_zones > 0);
247         ns->nr_open_zones--;
248     }
249     assert(ns->nr_open_zones >= 0);
250 }
251 
252 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
253 {
254     assert(ns->nr_active_zones >= 0);
255     if (ns->params.max_active_zones) {
256         ns->nr_active_zones++;
257         assert(ns->nr_active_zones <= ns->params.max_active_zones);
258     }
259 }
260 
261 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
262 {
263     if (ns->params.max_active_zones) {
264         assert(ns->nr_active_zones > 0);
265         ns->nr_active_zones--;
266         assert(ns->nr_active_zones >= ns->nr_open_zones);
267     }
268     assert(ns->nr_active_zones >= 0);
269 }
270 
271 void nvme_ns_init_format(NvmeNamespace *ns);
272 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
273 void nvme_ns_drain(NvmeNamespace *ns);
274 void nvme_ns_shutdown(NvmeNamespace *ns);
275 void nvme_ns_cleanup(NvmeNamespace *ns);
276 
277 typedef struct NvmeAsyncEvent {
278     QTAILQ_ENTRY(NvmeAsyncEvent) entry;
279     NvmeAerResult result;
280 } NvmeAsyncEvent;
281 
282 enum {
283     NVME_SG_ALLOC = 1 << 0,
284     NVME_SG_DMA   = 1 << 1,
285 };
286 
287 typedef struct NvmeSg {
288     int flags;
289 
290     union {
291         QEMUSGList   qsg;
292         QEMUIOVector iov;
293     };
294 } NvmeSg;
295 
296 typedef enum NvmeTxDirection {
297     NVME_TX_DIRECTION_TO_DEVICE   = 0,
298     NVME_TX_DIRECTION_FROM_DEVICE = 1,
299 } NvmeTxDirection;
300 
301 typedef struct NvmeRequest {
302     struct NvmeSQueue       *sq;
303     struct NvmeNamespace    *ns;
304     BlockAIOCB              *aiocb;
305     uint16_t                status;
306     void                    *opaque;
307     NvmeCqe                 cqe;
308     NvmeCmd                 cmd;
309     BlockAcctCookie         acct;
310     NvmeSg                  sg;
311     QTAILQ_ENTRY(NvmeRequest)entry;
312 } NvmeRequest;
313 
314 typedef struct NvmeBounceContext {
315     NvmeRequest *req;
316 
317     struct {
318         QEMUIOVector iov;
319         uint8_t *bounce;
320     } data, mdata;
321 } NvmeBounceContext;
322 
323 static inline const char *nvme_adm_opc_str(uint8_t opc)
324 {
325     switch (opc) {
326     case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
327     case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
328     case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
329     case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
330     case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
331     case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
332     case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
333     case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
334     case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
335     case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
336     case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
337     case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
338     default:                            return "NVME_ADM_CMD_UNKNOWN";
339     }
340 }
341 
342 static inline const char *nvme_io_opc_str(uint8_t opc)
343 {
344     switch (opc) {
345     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
346     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
347     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
348     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
349     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
350     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
351     case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
352     case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
353     case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
354     case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
355     case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
356     default:                        return "NVME_NVM_CMD_UNKNOWN";
357     }
358 }
359 
360 typedef struct NvmeSQueue {
361     struct NvmeCtrl *ctrl;
362     uint16_t    sqid;
363     uint16_t    cqid;
364     uint32_t    head;
365     uint32_t    tail;
366     uint32_t    size;
367     uint64_t    dma_addr;
368     QEMUTimer   *timer;
369     NvmeRequest *io_req;
370     QTAILQ_HEAD(, NvmeRequest) req_list;
371     QTAILQ_HEAD(, NvmeRequest) out_req_list;
372     QTAILQ_ENTRY(NvmeSQueue) entry;
373 } NvmeSQueue;
374 
375 typedef struct NvmeCQueue {
376     struct NvmeCtrl *ctrl;
377     uint8_t     phase;
378     uint16_t    cqid;
379     uint16_t    irq_enabled;
380     uint32_t    head;
381     uint32_t    tail;
382     uint32_t    vector;
383     uint32_t    size;
384     uint64_t    dma_addr;
385     QEMUTimer   *timer;
386     QTAILQ_HEAD(, NvmeSQueue) sq_list;
387     QTAILQ_HEAD(, NvmeRequest) req_list;
388 } NvmeCQueue;
389 
390 #define TYPE_NVME "nvme"
391 #define NVME(obj) \
392         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
393 
394 typedef struct NvmeParams {
395     char     *serial;
396     uint32_t num_queues; /* deprecated since 5.1 */
397     uint32_t max_ioqpairs;
398     uint16_t msix_qsize;
399     uint32_t cmb_size_mb;
400     uint8_t  aerl;
401     uint32_t aer_max_queued;
402     uint8_t  mdts;
403     uint8_t  vsl;
404     bool     use_intel_id;
405     uint8_t  zasl;
406     bool     auto_transition_zones;
407     bool     legacy_cmb;
408 } NvmeParams;
409 
410 typedef struct NvmeCtrl {
411     PCIDevice    parent_obj;
412     MemoryRegion bar0;
413     MemoryRegion iomem;
414     NvmeBar      bar;
415     NvmeParams   params;
416     NvmeBus      bus;
417 
418     uint16_t    cntlid;
419     bool        qs_created;
420     uint32_t    page_size;
421     uint16_t    page_bits;
422     uint16_t    max_prp_ents;
423     uint16_t    cqe_size;
424     uint16_t    sqe_size;
425     uint32_t    reg_size;
426     uint32_t    max_q_ents;
427     uint8_t     outstanding_aers;
428     uint32_t    irq_status;
429     int         cq_pending;
430     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
431     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
432     uint64_t    starttime_ms;
433     uint16_t    temperature;
434     uint8_t     smart_critical_warning;
435 
436     struct {
437         MemoryRegion mem;
438         uint8_t      *buf;
439         bool         cmse;
440         hwaddr       cba;
441     } cmb;
442 
443     struct {
444         HostMemoryBackend *dev;
445         bool              cmse;
446         hwaddr            cba;
447     } pmr;
448 
449     uint8_t     aer_mask;
450     NvmeRequest **aer_reqs;
451     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
452     int         aer_queued;
453 
454     uint32_t    dmrsl;
455 
456     /* Namespace ID is started with 1 so bitmap should be 1-based */
457 #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
458     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
459 
460     NvmeSubsystem   *subsys;
461 
462     NvmeNamespace   namespace;
463     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
464     NvmeSQueue      **sq;
465     NvmeCQueue      **cq;
466     NvmeSQueue      admin_sq;
467     NvmeCQueue      admin_cq;
468     NvmeIdCtrl      id_ctrl;
469 
470     struct {
471         struct {
472             uint16_t temp_thresh_hi;
473             uint16_t temp_thresh_low;
474         };
475 
476         uint32_t                async_config;
477         NvmeHostBehaviorSupport hbs;
478     } features;
479 } NvmeCtrl;
480 
481 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
482 {
483     if (!nsid || nsid > NVME_MAX_NAMESPACES) {
484         return NULL;
485     }
486 
487     return n->namespaces[nsid];
488 }
489 
490 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
491 {
492     NvmeSQueue *sq = req->sq;
493     NvmeCtrl *n = sq->ctrl;
494 
495     return n->cq[sq->cqid];
496 }
497 
498 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
499 {
500     NvmeSQueue *sq = req->sq;
501     return sq->ctrl;
502 }
503 
504 static inline uint16_t nvme_cid(NvmeRequest *req)
505 {
506     if (!req) {
507         return 0xffff;
508     }
509 
510     return le16_to_cpu(req->cqe.cid);
511 }
512 
513 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
514 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
515                           NvmeTxDirection dir, NvmeRequest *req);
516 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
517                            NvmeTxDirection dir, NvmeRequest *req);
518 void nvme_rw_complete_cb(void *opaque, int ret);
519 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
520                        NvmeCmd *cmd);
521 
522 #endif /* HW_NVME_INTERNAL_H */
523