xref: /qemu/hw/nvme/nvme.h (revision e2c1c34f139f49ef909bb4322607fb8b39002312)
1  /*
2   * QEMU NVM Express
3   *
4   * Copyright (c) 2012 Intel Corporation
5   * Copyright (c) 2021 Minwoo Im
6   * Copyright (c) 2021 Samsung Electronics Co., Ltd.
7   *
8   * Authors:
9   *   Keith Busch            <kbusch@kernel.org>
10   *   Klaus Jensen           <k.jensen@samsung.com>
11   *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
12   *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
13   *   Minwoo Im              <minwoo.im.dev@gmail.com>
14   *
15   * This code is licensed under the GNU GPL v2 or later.
16   */
17  
18  #ifndef HW_NVME_NVME_H
19  #define HW_NVME_NVME_H
20  
21  #include "qemu/uuid.h"
22  #include "hw/pci/pci_device.h"
23  #include "hw/block/block.h"
24  
25  #include "block/nvme.h"
26  
27  #define NVME_MAX_CONTROLLERS 256
28  #define NVME_MAX_NAMESPACES  256
29  #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30  
31  QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
32  
33  typedef struct NvmeCtrl NvmeCtrl;
34  typedef struct NvmeNamespace NvmeNamespace;
35  
36  #define TYPE_NVME_BUS "nvme-bus"
37  OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
38  
39  typedef struct NvmeBus {
40      BusState parent_bus;
41  } NvmeBus;
42  
43  #define TYPE_NVME_SUBSYS "nvme-subsys"
44  #define NVME_SUBSYS(obj) \
45      OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
46  #define SUBSYS_SLOT_RSVD (void *)0xFFFF
47  
48  typedef struct NvmeSubsystem {
49      DeviceState parent_obj;
50      NvmeBus     bus;
51      uint8_t     subnqn[256];
52      char        *serial;
53  
54      NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
55      NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
56  
57      struct {
58          char *nqn;
59      } params;
60  } NvmeSubsystem;
61  
62  int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
63  void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
64  
65  static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
66                                           uint32_t cntlid)
67  {
68      if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
69          return NULL;
70      }
71  
72      if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
73          return NULL;
74      }
75  
76      return subsys->ctrls[cntlid];
77  }
78  
79  static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
80                                              uint32_t nsid)
81  {
82      if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
83          return NULL;
84      }
85  
86      return subsys->namespaces[nsid];
87  }
88  
89  #define TYPE_NVME_NS "nvme-ns"
90  #define NVME_NS(obj) \
91      OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
92  
93  typedef struct NvmeZone {
94      NvmeZoneDescr   d;
95      uint64_t        w_ptr;
96      QTAILQ_ENTRY(NvmeZone) entry;
97  } NvmeZone;
98  
99  typedef struct NvmeNamespaceParams {
100      bool     detached;
101      bool     shared;
102      uint32_t nsid;
103      QemuUUID uuid;
104      uint64_t eui64;
105      bool     eui64_default;
106  
107      uint16_t ms;
108      uint8_t  mset;
109      uint8_t  pi;
110      uint8_t  pil;
111      uint8_t  pif;
112  
113      uint16_t mssrl;
114      uint32_t mcl;
115      uint8_t  msrc;
116  
117      bool     zoned;
118      bool     cross_zone_read;
119      uint64_t zone_size_bs;
120      uint64_t zone_cap_bs;
121      uint32_t max_active_zones;
122      uint32_t max_open_zones;
123      uint32_t zd_extension_size;
124  
125      uint32_t numzrwa;
126      uint64_t zrwas;
127      uint64_t zrwafg;
128  } NvmeNamespaceParams;
129  
130  typedef struct NvmeNamespace {
131      DeviceState  parent_obj;
132      BlockConf    blkconf;
133      int32_t      bootindex;
134      int64_t      size;
135      int64_t      moff;
136      NvmeIdNs     id_ns;
137      NvmeIdNsNvm  id_ns_nvm;
138      NvmeLBAF     lbaf;
139      unsigned int nlbaf;
140      size_t       lbasz;
141      const uint32_t *iocs;
142      uint8_t      csi;
143      uint16_t     status;
144      int          attached;
145      uint8_t      pif;
146  
147      struct {
148          uint16_t zrwas;
149          uint16_t zrwafg;
150          uint32_t numzrwa;
151      } zns;
152  
153      QTAILQ_ENTRY(NvmeNamespace) entry;
154  
155      NvmeIdNsZoned   *id_ns_zoned;
156      NvmeZone        *zone_array;
157      QTAILQ_HEAD(, NvmeZone) exp_open_zones;
158      QTAILQ_HEAD(, NvmeZone) imp_open_zones;
159      QTAILQ_HEAD(, NvmeZone) closed_zones;
160      QTAILQ_HEAD(, NvmeZone) full_zones;
161      uint32_t        num_zones;
162      uint64_t        zone_size;
163      uint64_t        zone_capacity;
164      uint32_t        zone_size_log2;
165      uint8_t         *zd_extensions;
166      int32_t         nr_open_zones;
167      int32_t         nr_active_zones;
168  
169      NvmeNamespaceParams params;
170  
171      struct {
172          uint32_t err_rec;
173      } features;
174  } NvmeNamespace;
175  
176  static inline uint32_t nvme_nsid(NvmeNamespace *ns)
177  {
178      if (ns) {
179          return ns->params.nsid;
180      }
181  
182      return 0;
183  }
184  
185  static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
186  {
187      return lba << ns->lbaf.ds;
188  }
189  
190  static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
191  {
192      return ns->lbaf.ms * lba;
193  }
194  
195  static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
196  {
197      return ns->moff + nvme_m2b(ns, lba);
198  }
199  
200  static inline bool nvme_ns_ext(NvmeNamespace *ns)
201  {
202      return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
203  }
204  
205  static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
206  {
207      return zone->d.zs >> 4;
208  }
209  
210  static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
211  {
212      zone->d.zs = state << 4;
213  }
214  
215  static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
216  {
217      return zone->d.zslba + ns->zone_size;
218  }
219  
220  static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
221  {
222      return zone->d.zslba + zone->d.zcap;
223  }
224  
225  static inline bool nvme_wp_is_valid(NvmeZone *zone)
226  {
227      uint8_t st = nvme_get_zone_state(zone);
228  
229      return st != NVME_ZONE_STATE_FULL &&
230             st != NVME_ZONE_STATE_READ_ONLY &&
231             st != NVME_ZONE_STATE_OFFLINE;
232  }
233  
234  static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
235                                               uint32_t zone_idx)
236  {
237      return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
238  }
239  
240  static inline void nvme_aor_inc_open(NvmeNamespace *ns)
241  {
242      assert(ns->nr_open_zones >= 0);
243      if (ns->params.max_open_zones) {
244          ns->nr_open_zones++;
245          assert(ns->nr_open_zones <= ns->params.max_open_zones);
246      }
247  }
248  
249  static inline void nvme_aor_dec_open(NvmeNamespace *ns)
250  {
251      if (ns->params.max_open_zones) {
252          assert(ns->nr_open_zones > 0);
253          ns->nr_open_zones--;
254      }
255      assert(ns->nr_open_zones >= 0);
256  }
257  
258  static inline void nvme_aor_inc_active(NvmeNamespace *ns)
259  {
260      assert(ns->nr_active_zones >= 0);
261      if (ns->params.max_active_zones) {
262          ns->nr_active_zones++;
263          assert(ns->nr_active_zones <= ns->params.max_active_zones);
264      }
265  }
266  
267  static inline void nvme_aor_dec_active(NvmeNamespace *ns)
268  {
269      if (ns->params.max_active_zones) {
270          assert(ns->nr_active_zones > 0);
271          ns->nr_active_zones--;
272          assert(ns->nr_active_zones >= ns->nr_open_zones);
273      }
274      assert(ns->nr_active_zones >= 0);
275  }
276  
277  void nvme_ns_init_format(NvmeNamespace *ns);
278  int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
279  void nvme_ns_drain(NvmeNamespace *ns);
280  void nvme_ns_shutdown(NvmeNamespace *ns);
281  void nvme_ns_cleanup(NvmeNamespace *ns);
282  
283  typedef struct NvmeAsyncEvent {
284      QTAILQ_ENTRY(NvmeAsyncEvent) entry;
285      NvmeAerResult result;
286  } NvmeAsyncEvent;
287  
288  enum {
289      NVME_SG_ALLOC = 1 << 0,
290      NVME_SG_DMA   = 1 << 1,
291  };
292  
293  typedef struct NvmeSg {
294      int flags;
295  
296      union {
297          QEMUSGList   qsg;
298          QEMUIOVector iov;
299      };
300  } NvmeSg;
301  
302  typedef enum NvmeTxDirection {
303      NVME_TX_DIRECTION_TO_DEVICE   = 0,
304      NVME_TX_DIRECTION_FROM_DEVICE = 1,
305  } NvmeTxDirection;
306  
307  typedef struct NvmeRequest {
308      struct NvmeSQueue       *sq;
309      struct NvmeNamespace    *ns;
310      BlockAIOCB              *aiocb;
311      uint16_t                status;
312      void                    *opaque;
313      NvmeCqe                 cqe;
314      NvmeCmd                 cmd;
315      BlockAcctCookie         acct;
316      NvmeSg                  sg;
317      QTAILQ_ENTRY(NvmeRequest)entry;
318  } NvmeRequest;
319  
320  typedef struct NvmeBounceContext {
321      NvmeRequest *req;
322  
323      struct {
324          QEMUIOVector iov;
325          uint8_t *bounce;
326      } data, mdata;
327  } NvmeBounceContext;
328  
329  static inline const char *nvme_adm_opc_str(uint8_t opc)
330  {
331      switch (opc) {
332      case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
333      case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
334      case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
335      case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
336      case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
337      case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
338      case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
339      case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
340      case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
341      case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
342      case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
343      case NVME_ADM_CMD_VIRT_MNGMT:       return "NVME_ADM_CMD_VIRT_MNGMT";
344      case NVME_ADM_CMD_DBBUF_CONFIG:     return "NVME_ADM_CMD_DBBUF_CONFIG";
345      case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
346      default:                            return "NVME_ADM_CMD_UNKNOWN";
347      }
348  }
349  
350  static inline const char *nvme_io_opc_str(uint8_t opc)
351  {
352      switch (opc) {
353      case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
354      case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
355      case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
356      case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
357      case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
358      case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
359      case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
360      case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
361      case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
362      case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
363      case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
364      default:                        return "NVME_NVM_CMD_UNKNOWN";
365      }
366  }
367  
368  typedef struct NvmeSQueue {
369      struct NvmeCtrl *ctrl;
370      uint16_t    sqid;
371      uint16_t    cqid;
372      uint32_t    head;
373      uint32_t    tail;
374      uint32_t    size;
375      uint64_t    dma_addr;
376      uint64_t    db_addr;
377      uint64_t    ei_addr;
378      QEMUBH      *bh;
379      EventNotifier notifier;
380      bool        ioeventfd_enabled;
381      NvmeRequest *io_req;
382      QTAILQ_HEAD(, NvmeRequest) req_list;
383      QTAILQ_HEAD(, NvmeRequest) out_req_list;
384      QTAILQ_ENTRY(NvmeSQueue) entry;
385  } NvmeSQueue;
386  
387  typedef struct NvmeCQueue {
388      struct NvmeCtrl *ctrl;
389      uint8_t     phase;
390      uint16_t    cqid;
391      uint16_t    irq_enabled;
392      uint32_t    head;
393      uint32_t    tail;
394      uint32_t    vector;
395      uint32_t    size;
396      uint64_t    dma_addr;
397      uint64_t    db_addr;
398      uint64_t    ei_addr;
399      QEMUBH      *bh;
400      EventNotifier notifier;
401      bool        ioeventfd_enabled;
402      QTAILQ_HEAD(, NvmeSQueue) sq_list;
403      QTAILQ_HEAD(, NvmeRequest) req_list;
404  } NvmeCQueue;
405  
406  #define TYPE_NVME "nvme"
407  #define NVME(obj) \
408          OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
409  
410  typedef struct NvmeParams {
411      char     *serial;
412      uint32_t num_queues; /* deprecated since 5.1 */
413      uint32_t max_ioqpairs;
414      uint16_t msix_qsize;
415      uint32_t cmb_size_mb;
416      uint8_t  aerl;
417      uint32_t aer_max_queued;
418      uint8_t  mdts;
419      uint8_t  vsl;
420      bool     use_intel_id;
421      uint8_t  zasl;
422      bool     auto_transition_zones;
423      bool     legacy_cmb;
424      bool     ioeventfd;
425      uint8_t  sriov_max_vfs;
426      uint16_t sriov_vq_flexible;
427      uint16_t sriov_vi_flexible;
428      uint8_t  sriov_max_vq_per_vf;
429      uint8_t  sriov_max_vi_per_vf;
430  } NvmeParams;
431  
432  typedef struct NvmeCtrl {
433      PCIDevice    parent_obj;
434      MemoryRegion bar0;
435      MemoryRegion iomem;
436      NvmeBar      bar;
437      NvmeParams   params;
438      NvmeBus      bus;
439  
440      uint16_t    cntlid;
441      bool        qs_created;
442      uint32_t    page_size;
443      uint16_t    page_bits;
444      uint16_t    max_prp_ents;
445      uint16_t    cqe_size;
446      uint16_t    sqe_size;
447      uint32_t    max_q_ents;
448      uint8_t     outstanding_aers;
449      uint32_t    irq_status;
450      int         cq_pending;
451      uint64_t    host_timestamp;                 /* Timestamp sent by the host */
452      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
453      uint64_t    starttime_ms;
454      uint16_t    temperature;
455      uint8_t     smart_critical_warning;
456      uint32_t    conf_msix_qsize;
457      uint32_t    conf_ioqpairs;
458      uint64_t    dbbuf_dbs;
459      uint64_t    dbbuf_eis;
460      bool        dbbuf_enabled;
461  
462      struct {
463          MemoryRegion mem;
464          uint8_t      *buf;
465          bool         cmse;
466          hwaddr       cba;
467      } cmb;
468  
469      struct {
470          HostMemoryBackend *dev;
471          bool              cmse;
472          hwaddr            cba;
473      } pmr;
474  
475      uint8_t     aer_mask;
476      NvmeRequest **aer_reqs;
477      QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
478      int         aer_queued;
479  
480      uint32_t    dmrsl;
481  
482      /* Namespace ID is started with 1 so bitmap should be 1-based */
483  #define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
484      DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
485  
486      NvmeSubsystem   *subsys;
487  
488      NvmeNamespace   namespace;
489      NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
490      NvmeSQueue      **sq;
491      NvmeCQueue      **cq;
492      NvmeSQueue      admin_sq;
493      NvmeCQueue      admin_cq;
494      NvmeIdCtrl      id_ctrl;
495  
496      struct {
497          struct {
498              uint16_t temp_thresh_hi;
499              uint16_t temp_thresh_low;
500          };
501  
502          uint32_t                async_config;
503          NvmeHostBehaviorSupport hbs;
504      } features;
505  
506      NvmePriCtrlCap  pri_ctrl_cap;
507      NvmeSecCtrlList sec_ctrl_list;
508      struct {
509          uint16_t    vqrfap;
510          uint16_t    virfap;
511      } next_pri_ctrl_cap;    /* These override pri_ctrl_cap after reset */
512  } NvmeCtrl;
513  
514  typedef enum NvmeResetType {
515      NVME_RESET_FUNCTION   = 0,
516      NVME_RESET_CONTROLLER = 1,
517  } NvmeResetType;
518  
519  static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
520  {
521      if (!nsid || nsid > NVME_MAX_NAMESPACES) {
522          return NULL;
523      }
524  
525      return n->namespaces[nsid];
526  }
527  
528  static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
529  {
530      NvmeSQueue *sq = req->sq;
531      NvmeCtrl *n = sq->ctrl;
532  
533      return n->cq[sq->cqid];
534  }
535  
536  static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
537  {
538      NvmeSQueue *sq = req->sq;
539      return sq->ctrl;
540  }
541  
542  static inline uint16_t nvme_cid(NvmeRequest *req)
543  {
544      if (!req) {
545          return 0xffff;
546      }
547  
548      return le16_to_cpu(req->cqe.cid);
549  }
550  
551  static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
552  {
553      PCIDevice *pci_dev = &n->parent_obj;
554      NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
555  
556      if (pci_is_vf(pci_dev)) {
557          return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
558      }
559  
560      return NULL;
561  }
562  
563  static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
564                                                        uint16_t cntlid)
565  {
566      NvmeSecCtrlList *list = &n->sec_ctrl_list;
567      uint8_t i;
568  
569      for (i = 0; i < list->numcntl; i++) {
570          if (le16_to_cpu(list->sec[i].scid) == cntlid) {
571              return &list->sec[i];
572          }
573      }
574  
575      return NULL;
576  }
577  
578  void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
579  uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
580                            NvmeTxDirection dir, NvmeRequest *req);
581  uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
582                             NvmeTxDirection dir, NvmeRequest *req);
583  void nvme_rw_complete_cb(void *opaque, int ret);
584  uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
585                         NvmeCmd *cmd);
586  
587  #endif /* HW_NVME_NVME_H */
588