xref: /qemu/hw/nvme/nvme.h (revision 146b5fa505fc21baa129c7538936fbdd9875b6ed)
1 /*
2  * QEMU NVM Express
3  *
4  * Copyright (c) 2012 Intel Corporation
5  * Copyright (c) 2021 Minwoo Im
6  * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7  *
8  * Authors:
9  *   Keith Busch            <kbusch@kernel.org>
10  *   Klaus Jensen           <k.jensen@samsung.com>
11  *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12  *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13  *   Minwoo Im              <minwoo.im.dev@gmail.com>
14  *
15  * This code is licensed under the GNU GPL v2 or later.
16  */
17 
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
20 
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci.h"
23 #include "hw/block/block.h"
24 
25 #include "block/nvme.h"
26 
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES  256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 
31 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32 
33 typedef struct NvmeCtrl NvmeCtrl;
34 typedef struct NvmeNamespace NvmeNamespace;
35 
36 #define TYPE_NVME_BUS "nvme-bus"
37 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38 
39 typedef struct NvmeBus {
40     BusState parent_bus;
41 } NvmeBus;
42 
43 #define TYPE_NVME_SUBSYS "nvme-subsys"
44 #define NVME_SUBSYS(obj) \
45     OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
47 
48 typedef struct NvmeSubsystem {
49     DeviceState parent_obj;
50     NvmeBus     bus;
51     uint8_t     subnqn[256];
52     char        *serial;
53 
54     NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
55     NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
56 
57     struct {
58         char *nqn;
59     } params;
60 } NvmeSubsystem;
61 
62 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
63 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
64 
65 static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
66                                          uint32_t cntlid)
67 {
68     if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
69         return NULL;
70     }
71 
72     if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
73         return NULL;
74     }
75 
76     return subsys->ctrls[cntlid];
77 }
78 
79 static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
80                                             uint32_t nsid)
81 {
82     if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
83         return NULL;
84     }
85 
86     return subsys->namespaces[nsid];
87 }
88 
89 #define TYPE_NVME_NS "nvme-ns"
90 #define NVME_NS(obj) \
91     OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
92 
93 typedef struct NvmeZone {
94     NvmeZoneDescr   d;
95     uint64_t        w_ptr;
96     QTAILQ_ENTRY(NvmeZone) entry;
97 } NvmeZone;
98 
99 typedef struct NvmeNamespaceParams {
100     bool     detached;
101     bool     shared;
102     uint32_t nsid;
103     QemuUUID uuid;
104     uint64_t eui64;
105     bool     eui64_default;
106 
107     uint16_t ms;
108     uint8_t  mset;
109     uint8_t  pi;
110     uint8_t  pil;
111     uint8_t  pif;
112 
113     uint16_t mssrl;
114     uint32_t mcl;
115     uint8_t  msrc;
116 
117     bool     zoned;
118     bool     cross_zone_read;
119     uint64_t zone_size_bs;
120     uint64_t zone_cap_bs;
121     uint32_t max_active_zones;
122     uint32_t max_open_zones;
123     uint32_t zd_extension_size;
124 
125     uint32_t numzrwa;
126     uint64_t zrwas;
127     uint64_t zrwafg;
128 } NvmeNamespaceParams;
129 
130 typedef struct NvmeNamespace {
131     DeviceState  parent_obj;
132     BlockConf    blkconf;
133     int32_t      bootindex;
134     int64_t      size;
135     int64_t      moff;
136     NvmeIdNs     id_ns;
137     NvmeIdNsNvm  id_ns_nvm;
138     NvmeLBAF     lbaf;
139     unsigned int nlbaf;
140     size_t       lbasz;
141     const uint32_t *iocs;
142     uint8_t      csi;
143     uint16_t     status;
144     int          attached;
145     uint8_t      pif;
146 
147     struct {
148         uint16_t zrwas;
149         uint16_t zrwafg;
150         uint32_t numzrwa;
151     } zns;
152 
153     QTAILQ_ENTRY(NvmeNamespace) entry;
154 
155     NvmeIdNsZoned   *id_ns_zoned;
156     NvmeZone        *zone_array;
157     QTAILQ_HEAD(, NvmeZone) exp_open_zones;
158     QTAILQ_HEAD(, NvmeZone) imp_open_zones;
159     QTAILQ_HEAD(, NvmeZone) closed_zones;
160     QTAILQ_HEAD(, NvmeZone) full_zones;
161     uint32_t        num_zones;
162     uint64_t        zone_size;
163     uint64_t        zone_capacity;
164     uint32_t        zone_size_log2;
165     uint8_t         *zd_extensions;
166     int32_t         nr_open_zones;
167     int32_t         nr_active_zones;
168 
169     NvmeNamespaceParams params;
170 
171     struct {
172         uint32_t err_rec;
173     } features;
174 } NvmeNamespace;
175 
176 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
177 {
178     if (ns) {
179         return ns->params.nsid;
180     }
181 
182     return 0;
183 }
184 
185 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
186 {
187     return lba << ns->lbaf.ds;
188 }
189 
190 static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
191 {
192     return ns->lbaf.ms * lba;
193 }
194 
195 static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
196 {
197     return ns->moff + nvme_m2b(ns, lba);
198 }
199 
200 static inline bool nvme_ns_ext(NvmeNamespace *ns)
201 {
202     return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
203 }
204 
205 static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
206 {
207     return zone->d.zs >> 4;
208 }
209 
210 static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
211 {
212     zone->d.zs = state << 4;
213 }
214 
215 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
216 {
217     return zone->d.zslba + ns->zone_size;
218 }
219 
220 static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
221 {
222     return zone->d.zslba + zone->d.zcap;
223 }
224 
225 static inline bool nvme_wp_is_valid(NvmeZone *zone)
226 {
227     uint8_t st = nvme_get_zone_state(zone);
228 
229     return st != NVME_ZONE_STATE_FULL &&
230            st != NVME_ZONE_STATE_READ_ONLY &&
231            st != NVME_ZONE_STATE_OFFLINE;
232 }
233 
234 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
235                                              uint32_t zone_idx)
236 {
237     return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
238 }
239 
240 static inline void nvme_aor_inc_open(NvmeNamespace *ns)
241 {
242     assert(ns->nr_open_zones >= 0);
243     if (ns->params.max_open_zones) {
244         ns->nr_open_zones++;
245         assert(ns->nr_open_zones <= ns->params.max_open_zones);
246     }
247 }
248 
249 static inline void nvme_aor_dec_open(NvmeNamespace *ns)
250 {
251     if (ns->params.max_open_zones) {
252         assert(ns->nr_open_zones > 0);
253         ns->nr_open_zones--;
254     }
255     assert(ns->nr_open_zones >= 0);
256 }
257 
258 static inline void nvme_aor_inc_active(NvmeNamespace *ns)
259 {
260     assert(ns->nr_active_zones >= 0);
261     if (ns->params.max_active_zones) {
262         ns->nr_active_zones++;
263         assert(ns->nr_active_zones <= ns->params.max_active_zones);
264     }
265 }
266 
267 static inline void nvme_aor_dec_active(NvmeNamespace *ns)
268 {
269     if (ns->params.max_active_zones) {
270         assert(ns->nr_active_zones > 0);
271         ns->nr_active_zones--;
272         assert(ns->nr_active_zones >= ns->nr_open_zones);
273     }
274     assert(ns->nr_active_zones >= 0);
275 }
276 
277 void nvme_ns_init_format(NvmeNamespace *ns);
278 int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
279 void nvme_ns_drain(NvmeNamespace *ns);
280 void nvme_ns_shutdown(NvmeNamespace *ns);
281 void nvme_ns_cleanup(NvmeNamespace *ns);
282 
283 typedef struct NvmeAsyncEvent {
284     QTAILQ_ENTRY(NvmeAsyncEvent) entry;
285     NvmeAerResult result;
286 } NvmeAsyncEvent;
287 
288 enum {
289     NVME_SG_ALLOC = 1 << 0,
290     NVME_SG_DMA   = 1 << 1,
291 };
292 
293 typedef struct NvmeSg {
294     int flags;
295 
296     union {
297         QEMUSGList   qsg;
298         QEMUIOVector iov;
299     };
300 } NvmeSg;
301 
302 typedef enum NvmeTxDirection {
303     NVME_TX_DIRECTION_TO_DEVICE   = 0,
304     NVME_TX_DIRECTION_FROM_DEVICE = 1,
305 } NvmeTxDirection;
306 
307 typedef struct NvmeRequest {
308     struct NvmeSQueue       *sq;
309     struct NvmeNamespace    *ns;
310     BlockAIOCB              *aiocb;
311     uint16_t                status;
312     void                    *opaque;
313     NvmeCqe                 cqe;
314     NvmeCmd                 cmd;
315     BlockAcctCookie         acct;
316     NvmeSg                  sg;
317     QTAILQ_ENTRY(NvmeRequest)entry;
318 } NvmeRequest;
319 
320 typedef struct NvmeBounceContext {
321     NvmeRequest *req;
322 
323     struct {
324         QEMUIOVector iov;
325         uint8_t *bounce;
326     } data, mdata;
327 } NvmeBounceContext;
328 
329 static inline const char *nvme_adm_opc_str(uint8_t opc)
330 {
331     switch (opc) {
332     case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
333     case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
334     case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
335     case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
336     case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
337     case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
338     case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
339     case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
340     case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
341     case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
342     case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
343     case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
344     case NVME_ADM_CMD_DBBUF_CONFIG:     return "NVME_ADM_CMD_DBBUF_CONFIG";
345     case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
346     default:                            return "NVME_ADM_CMD_UNKNOWN";
347     }
348 }
349 
350 static inline const char *nvme_io_opc_str(uint8_t opc)
351 {
352     switch (opc) {
353     case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
354     case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
355     case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
356     case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
357     case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
358     case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
359     case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
360     case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
361     case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
362     case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
363     case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
364     default:                        return "NVME_NVM_CMD_UNKNOWN";
365     }
366 }
367 
368 typedef struct NvmeSQueue {
369     struct NvmeCtrl *ctrl;
370     uint16_t    sqid;
371     uint16_t    cqid;
372     uint32_t    head;
373     uint32_t    tail;
374     uint32_t    size;
375     uint64_t    dma_addr;
376     uint64_t    db_addr;
377     uint64_t    ei_addr;
378     QEMUTimer   *timer;
379     NvmeRequest *io_req;
380     QTAILQ_HEAD(, NvmeRequest) req_list;
381     QTAILQ_HEAD(, NvmeRequest) out_req_list;
382     QTAILQ_ENTRY(NvmeSQueue) entry;
383 } NvmeSQueue;
384 
385 typedef struct NvmeCQueue {
386     struct NvmeCtrl *ctrl;
387     uint8_t     phase;
388     uint16_t    cqid;
389     uint16_t    irq_enabled;
390     uint32_t    head;
391     uint32_t    tail;
392     uint32_t    vector;
393     uint32_t    size;
394     uint64_t    dma_addr;
395     uint64_t    db_addr;
396     uint64_t    ei_addr;
397     QEMUTimer   *timer;
398     QTAILQ_HEAD(, NvmeSQueue) sq_list;
399     QTAILQ_HEAD(, NvmeRequest) req_list;
400 } NvmeCQueue;
401 
402 #define TYPE_NVME "nvme"
403 #define NVME(obj) \
404         OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
405 
406 typedef struct NvmeParams {
407     char     *serial;
408     uint32_t num_queues; /* deprecated since 5.1 */
409     uint32_t max_ioqpairs;
410     uint16_t msix_qsize;
411     uint32_t cmb_size_mb;
412     uint8_t  aerl;
413     uint32_t aer_max_queued;
414     uint8_t  mdts;
415     uint8_t  vsl;
416     bool     use_intel_id;
417     uint8_t  zasl;
418     bool     auto_transition_zones;
419     bool     legacy_cmb;
420     uint8_t  sriov_max_vfs;
421     uint16_t sriov_vq_flexible;
422     uint16_t sriov_vi_flexible;
423     uint8_t  sriov_max_vq_per_vf;
424     uint8_t  sriov_max_vi_per_vf;
425 } NvmeParams;
426 
427 typedef struct NvmeCtrl {
428     PCIDevice    parent_obj;
429     MemoryRegion bar0;
430     MemoryRegion iomem;
431     NvmeBar      bar;
432     NvmeParams   params;
433     NvmeBus      bus;
434 
435     uint16_t    cntlid;
436     bool        qs_created;
437     uint32_t    page_size;
438     uint16_t    page_bits;
439     uint16_t    max_prp_ents;
440     uint16_t    cqe_size;
441     uint16_t    sqe_size;
442     uint32_t    max_q_ents;
443     uint8_t     outstanding_aers;
444     uint32_t    irq_status;
445     int         cq_pending;
446     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
447     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
448     uint64_t    starttime_ms;
449     uint16_t    temperature;
450     uint8_t     smart_critical_warning;
451     uint32_t    conf_msix_qsize;
452     uint32_t    conf_ioqpairs;
453     uint64_t    dbbuf_dbs;
454     uint64_t    dbbuf_eis;
455     bool        dbbuf_enabled;
456 
457     struct {
458         MemoryRegion mem;
459         uint8_t      *buf;
460         bool         cmse;
461         hwaddr       cba;
462     } cmb;
463 
464     struct {
465         HostMemoryBackend *dev;
466         bool              cmse;
467         hwaddr            cba;
468     } pmr;
469 
470     uint8_t     aer_mask;
471     NvmeRequest **aer_reqs;
472     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
473     int         aer_queued;
474 
475     uint32_t    dmrsl;
476 
477     /* Namespace ID is started with 1 so bitmap should be 1-based */
478 #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
479     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
480 
481     NvmeSubsystem   *subsys;
482 
483     NvmeNamespace   namespace;
484     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
485     NvmeSQueue      **sq;
486     NvmeCQueue      **cq;
487     NvmeSQueue      admin_sq;
488     NvmeCQueue      admin_cq;
489     NvmeIdCtrl      id_ctrl;
490 
491     struct {
492         struct {
493             uint16_t temp_thresh_hi;
494             uint16_t temp_thresh_low;
495         };
496 
497         uint32_t                async_config;
498         NvmeHostBehaviorSupport hbs;
499     } features;
500 
501     NvmePriCtrlCap  pri_ctrl_cap;
502     NvmeSecCtrlList sec_ctrl_list;
503     struct {
504         uint16_t    vqrfap;
505         uint16_t    virfap;
506     } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
507 } NvmeCtrl;
508 
509 typedef enum NvmeResetType {
510     NVME_RESET_FUNCTION   = 0,
511     NVME_RESET_CONTROLLER = 1,
512 } NvmeResetType;
513 
514 static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
515 {
516     if (!nsid || nsid > NVME_MAX_NAMESPACES) {
517         return NULL;
518     }
519 
520     return n->namespaces[nsid];
521 }
522 
523 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
524 {
525     NvmeSQueue *sq = req->sq;
526     NvmeCtrl *n = sq->ctrl;
527 
528     return n->cq[sq->cqid];
529 }
530 
531 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
532 {
533     NvmeSQueue *sq = req->sq;
534     return sq->ctrl;
535 }
536 
537 static inline uint16_t nvme_cid(NvmeRequest *req)
538 {
539     if (!req) {
540         return 0xffff;
541     }
542 
543     return le16_to_cpu(req->cqe.cid);
544 }
545 
546 static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
547 {
548     PCIDevice *pci_dev = &n->parent_obj;
549     NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
550 
551     if (pci_is_vf(pci_dev)) {
552         return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
553     }
554 
555     return NULL;
556 }
557 
558 static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
559                                                       uint16_t cntlid)
560 {
561     NvmeSecCtrlList *list = &n->sec_ctrl_list;
562     uint8_t i;
563 
564     for (i = 0; i < list->numcntl; i++) {
565         if (le16_to_cpu(list->sec[i].scid) == cntlid) {
566             return &list->sec[i];
567         }
568     }
569 
570     return NULL;
571 }
572 
573 void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
574 uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
575                           NvmeTxDirection dir, NvmeRequest *req);
576 uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
577                            NvmeTxDirection dir, NvmeRequest *req);
578 void nvme_rw_complete_cb(void *opaque, int ret);
579 uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
580                        NvmeCmd *cmd);
581 
582 #endif /* HW_NVME_NVME_H */
583