1eb2e8974SMinwoo Im /*
2eb2e8974SMinwoo Im * QEMU NVM Express Subsystem: nvme-subsys
3eb2e8974SMinwoo Im *
4eb2e8974SMinwoo Im * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
5eb2e8974SMinwoo Im *
6eb2e8974SMinwoo Im * This code is licensed under the GNU GPL v2. Refer COPYING.
7eb2e8974SMinwoo Im */
8eb2e8974SMinwoo Im
9eb2e8974SMinwoo Im #include "qemu/osdep.h"
1073064edfSJesper Devantier #include "qemu/units.h"
11eb2e8974SMinwoo Im #include "qapi/error.h"
127ef37c1cSKlaus Jensen
13eb2e8974SMinwoo Im #include "nvme.h"
14eb2e8974SMinwoo Im
1573064edfSJesper Devantier #define NVME_DEFAULT_RU_SIZE (96 * MiB)
1673064edfSJesper Devantier
nvme_subsys_reserve_cntlids(NvmeCtrl * n,int start,int num)1799f48ae7SLukasz Maniak static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
1899f48ae7SLukasz Maniak {
1999f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys;
201a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list;
2199f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl;
2299f48ae7SLukasz Maniak int i, cnt = 0;
2399f48ae7SLukasz Maniak
2499f48ae7SLukasz Maniak for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
2599f48ae7SLukasz Maniak if (!subsys->ctrls[i]) {
261a494d11SMinwoo Im sctrl = &list[cnt];
2799f48ae7SLukasz Maniak sctrl->scid = cpu_to_le16(i);
2899f48ae7SLukasz Maniak subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
2999f48ae7SLukasz Maniak cnt++;
3099f48ae7SLukasz Maniak }
3199f48ae7SLukasz Maniak }
3299f48ae7SLukasz Maniak
3399f48ae7SLukasz Maniak return cnt;
3499f48ae7SLukasz Maniak }
3599f48ae7SLukasz Maniak
nvme_subsys_unreserve_cntlids(NvmeCtrl * n)3699f48ae7SLukasz Maniak static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
3799f48ae7SLukasz Maniak {
3899f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys;
391a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list;
4099f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl;
4199f48ae7SLukasz Maniak int i, cntlid;
4299f48ae7SLukasz Maniak
4399f48ae7SLukasz Maniak for (i = 0; i < n->params.sriov_max_vfs; i++) {
441a494d11SMinwoo Im sctrl = &list[i];
4599f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid);
4699f48ae7SLukasz Maniak
4799f48ae7SLukasz Maniak if (cntlid) {
4899f48ae7SLukasz Maniak assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD);
4999f48ae7SLukasz Maniak subsys->ctrls[cntlid] = NULL;
5099f48ae7SLukasz Maniak sctrl->scid = 0;
5199f48ae7SLukasz Maniak }
5299f48ae7SLukasz Maniak }
5399f48ae7SLukasz Maniak }
5499f48ae7SLukasz Maniak
nvme_subsys_register_ctrl(NvmeCtrl * n,Error ** errp)55e36a261dSMinwoo Im int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
56e36a261dSMinwoo Im {
57e36a261dSMinwoo Im NvmeSubsystem *subsys = n->subsys;
5899f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
598c996e32SKlaus Jensen int cntlid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
60e36a261dSMinwoo Im
6199f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) {
6299f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid);
6399f48ae7SLukasz Maniak } else {
64c6159d0eSMinwoo Im n->sec_ctrl_list = g_new0(NvmeSecCtrlEntry, num_vfs);
65c6159d0eSMinwoo Im
66e36a261dSMinwoo Im for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
67e36a261dSMinwoo Im if (!subsys->ctrls[cntlid]) {
68e36a261dSMinwoo Im break;
69e36a261dSMinwoo Im }
70e36a261dSMinwoo Im }
71e36a261dSMinwoo Im
72e36a261dSMinwoo Im if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
73e36a261dSMinwoo Im error_setg(errp, "no more free controller id");
74e36a261dSMinwoo Im return -1;
75e36a261dSMinwoo Im }
76e36a261dSMinwoo Im
7799f48ae7SLukasz Maniak num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs);
7899f48ae7SLukasz Maniak if (num_rsvd != num_vfs) {
7999f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n);
8099f48ae7SLukasz Maniak error_setg(errp,
8199f48ae7SLukasz Maniak "no more free controller ids for secondary controllers");
8299f48ae7SLukasz Maniak return -1;
8399f48ae7SLukasz Maniak }
8499f48ae7SLukasz Maniak }
8599f48ae7SLukasz Maniak
86a859eb9fSKlaus Jensen if (!subsys->serial) {
87a859eb9fSKlaus Jensen subsys->serial = g_strdup(n->params.serial);
88a859eb9fSKlaus Jensen } else if (strcmp(subsys->serial, n->params.serial)) {
89a859eb9fSKlaus Jensen error_setg(errp, "invalid controller serial");
90a859eb9fSKlaus Jensen return -1;
91a859eb9fSKlaus Jensen }
92a859eb9fSKlaus Jensen
93e36a261dSMinwoo Im subsys->ctrls[cntlid] = n;
94e36a261dSMinwoo Im
95e36a261dSMinwoo Im return cntlid;
96e36a261dSMinwoo Im }
97e36a261dSMinwoo Im
nvme_subsys_unregister_ctrl(NvmeSubsystem * subsys,NvmeCtrl * n)98b0fde9e8SKlaus Jensen void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
99b0fde9e8SKlaus Jensen {
10099f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) {
10199f48ae7SLukasz Maniak subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD;
10299f48ae7SLukasz Maniak } else {
103b0fde9e8SKlaus Jensen subsys->ctrls[n->cntlid] = NULL;
10499f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n);
10599f48ae7SLukasz Maniak }
10699f48ae7SLukasz Maniak
1079fc6e86eSHannes Reinecke n->cntlid = -1;
108b0fde9e8SKlaus Jensen }
109b0fde9e8SKlaus Jensen
nvme_calc_rgif(uint16_t nruh,uint16_t nrg,uint8_t * rgif)11073064edfSJesper Devantier static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif)
11173064edfSJesper Devantier {
11273064edfSJesper Devantier uint16_t val;
11373064edfSJesper Devantier unsigned int i;
11473064edfSJesper Devantier
11573064edfSJesper Devantier if (unlikely(nrg == 1)) {
11673064edfSJesper Devantier /* PIDRG_NORGI scenario, all of pid is used for PHID */
11773064edfSJesper Devantier *rgif = 0;
11873064edfSJesper Devantier return true;
11973064edfSJesper Devantier }
12073064edfSJesper Devantier
12173064edfSJesper Devantier val = nrg;
12273064edfSJesper Devantier i = 0;
12373064edfSJesper Devantier while (val) {
12473064edfSJesper Devantier val >>= 1;
12573064edfSJesper Devantier i++;
12673064edfSJesper Devantier }
12773064edfSJesper Devantier *rgif = i;
12873064edfSJesper Devantier
12973064edfSJesper Devantier /* ensure remaining bits suffice to represent number of phids in a RG */
13073064edfSJesper Devantier if (unlikely((UINT16_MAX >> i) < nruh)) {
13173064edfSJesper Devantier *rgif = 0;
13273064edfSJesper Devantier return false;
13373064edfSJesper Devantier }
13473064edfSJesper Devantier
13573064edfSJesper Devantier return true;
13673064edfSJesper Devantier }
13773064edfSJesper Devantier
nvme_subsys_setup_fdp(NvmeSubsystem * subsys,Error ** errp)13873064edfSJesper Devantier static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp)
13973064edfSJesper Devantier {
14073064edfSJesper Devantier NvmeEnduranceGroup *endgrp = &subsys->endgrp;
14173064edfSJesper Devantier
14273064edfSJesper Devantier if (!subsys->params.fdp.runs) {
14373064edfSJesper Devantier error_setg(errp, "fdp.runs must be non-zero");
14473064edfSJesper Devantier return false;
14573064edfSJesper Devantier }
14673064edfSJesper Devantier
14773064edfSJesper Devantier endgrp->fdp.runs = subsys->params.fdp.runs;
14873064edfSJesper Devantier
14973064edfSJesper Devantier if (!subsys->params.fdp.nrg) {
15073064edfSJesper Devantier error_setg(errp, "fdp.nrg must be non-zero");
15173064edfSJesper Devantier return false;
15273064edfSJesper Devantier }
15373064edfSJesper Devantier
15473064edfSJesper Devantier endgrp->fdp.nrg = subsys->params.fdp.nrg;
15573064edfSJesper Devantier
1563ae8a54aSKlaus Jensen if (!subsys->params.fdp.nruh ||
1573ae8a54aSKlaus Jensen subsys->params.fdp.nruh > NVME_FDP_MAXPIDS) {
1583ae8a54aSKlaus Jensen error_setg(errp, "fdp.nruh must be non-zero and less than %u",
1593ae8a54aSKlaus Jensen NVME_FDP_MAXPIDS);
16073064edfSJesper Devantier return false;
16173064edfSJesper Devantier }
16273064edfSJesper Devantier
16373064edfSJesper Devantier endgrp->fdp.nruh = subsys->params.fdp.nruh;
16473064edfSJesper Devantier
16573064edfSJesper Devantier if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) {
16673064edfSJesper Devantier error_setg(errp,
16773064edfSJesper Devantier "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")",
16873064edfSJesper Devantier endgrp->fdp.nruh, endgrp->fdp.nrg);
16973064edfSJesper Devantier return false;
17073064edfSJesper Devantier }
17173064edfSJesper Devantier
17273064edfSJesper Devantier endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh);
17373064edfSJesper Devantier
17473064edfSJesper Devantier for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) {
17573064edfSJesper Devantier endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) {
17673064edfSJesper Devantier .ruht = NVME_RUHT_INITIALLY_ISOLATED,
17773064edfSJesper Devantier .ruha = NVME_RUHA_UNUSED,
17873064edfSJesper Devantier };
17973064edfSJesper Devantier
18073064edfSJesper Devantier endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg);
18173064edfSJesper Devantier }
18273064edfSJesper Devantier
18373064edfSJesper Devantier endgrp->fdp.enabled = true;
18473064edfSJesper Devantier
18573064edfSJesper Devantier return true;
18673064edfSJesper Devantier }
18773064edfSJesper Devantier
nvme_subsys_setup(NvmeSubsystem * subsys,Error ** errp)18873064edfSJesper Devantier static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp)
189eb2e8974SMinwoo Im {
190eb2e8974SMinwoo Im const char *nqn = subsys->params.nqn ?
191eb2e8974SMinwoo Im subsys->params.nqn : subsys->parent_obj.id;
192eb2e8974SMinwoo Im
193eb2e8974SMinwoo Im snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
194eb2e8974SMinwoo Im "nqn.2019-08.org.qemu:%s", nqn);
19573064edfSJesper Devantier
19673064edfSJesper Devantier if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) {
19773064edfSJesper Devantier return false;
19873064edfSJesper Devantier }
19973064edfSJesper Devantier
20073064edfSJesper Devantier return true;
201eb2e8974SMinwoo Im }
202eb2e8974SMinwoo Im
nvme_subsys_realize(DeviceState * dev,Error ** errp)203eb2e8974SMinwoo Im static void nvme_subsys_realize(DeviceState *dev, Error **errp)
204eb2e8974SMinwoo Im {
205eb2e8974SMinwoo Im NvmeSubsystem *subsys = NVME_SUBSYS(dev);
206eb2e8974SMinwoo Im
207d637e1dcSPeter Maydell qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
2085ffbaeedSKlaus Jensen
20973064edfSJesper Devantier nvme_subsys_setup(subsys, errp);
210eb2e8974SMinwoo Im }
211eb2e8974SMinwoo Im
212b32c22bfSRichard Henderson static const Property nvme_subsystem_props[] = {
213eb2e8974SMinwoo Im DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
21473064edfSJesper Devantier DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false),
21573064edfSJesper Devantier DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs,
21673064edfSJesper Devantier NVME_DEFAULT_RU_SIZE),
21773064edfSJesper Devantier DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1),
21873064edfSJesper Devantier DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0),
219eb2e8974SMinwoo Im };
220eb2e8974SMinwoo Im
nvme_subsys_class_init(ObjectClass * oc,const void * data)221*12d1a768SPhilippe Mathieu-Daudé static void nvme_subsys_class_init(ObjectClass *oc, const void *data)
222eb2e8974SMinwoo Im {
223eb2e8974SMinwoo Im DeviceClass *dc = DEVICE_CLASS(oc);
224eb2e8974SMinwoo Im
225eb2e8974SMinwoo Im set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
226eb2e8974SMinwoo Im
227eb2e8974SMinwoo Im dc->realize = nvme_subsys_realize;
228eb2e8974SMinwoo Im dc->desc = "Virtual NVMe subsystem";
229eb2e8974SMinwoo Im
230eb2e8974SMinwoo Im device_class_set_props(dc, nvme_subsystem_props);
231eb2e8974SMinwoo Im }
232eb2e8974SMinwoo Im
233eb2e8974SMinwoo Im static const TypeInfo nvme_subsys_info = {
234eb2e8974SMinwoo Im .name = TYPE_NVME_SUBSYS,
235eb2e8974SMinwoo Im .parent = TYPE_DEVICE,
236eb2e8974SMinwoo Im .class_init = nvme_subsys_class_init,
237eb2e8974SMinwoo Im .instance_size = sizeof(NvmeSubsystem),
238eb2e8974SMinwoo Im };
239eb2e8974SMinwoo Im
nvme_subsys_register_types(void)240eb2e8974SMinwoo Im static void nvme_subsys_register_types(void)
241eb2e8974SMinwoo Im {
242eb2e8974SMinwoo Im type_register_static(&nvme_subsys_info);
243eb2e8974SMinwoo Im }
244eb2e8974SMinwoo Im
245eb2e8974SMinwoo Im type_init(nvme_subsys_register_types)
246