1eb2e8974SMinwoo Im /* 2eb2e8974SMinwoo Im * QEMU NVM Express Subsystem: nvme-subsys 3eb2e8974SMinwoo Im * 4eb2e8974SMinwoo Im * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com> 5eb2e8974SMinwoo Im * 6eb2e8974SMinwoo Im * This code is licensed under the GNU GPL v2. Refer COPYING. 7eb2e8974SMinwoo Im */ 8eb2e8974SMinwoo Im 9eb2e8974SMinwoo Im #include "qemu/osdep.h" 1073064edfSJesper Devantier #include "qemu/units.h" 11eb2e8974SMinwoo Im #include "qapi/error.h" 127ef37c1cSKlaus Jensen 13eb2e8974SMinwoo Im #include "nvme.h" 14eb2e8974SMinwoo Im 1573064edfSJesper Devantier #define NVME_DEFAULT_RU_SIZE (96 * MiB) 1673064edfSJesper Devantier 1799f48ae7SLukasz Maniak static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num) 1899f48ae7SLukasz Maniak { 1999f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys; 2099f48ae7SLukasz Maniak NvmeSecCtrlList *list = &n->sec_ctrl_list; 2199f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl; 2299f48ae7SLukasz Maniak int i, cnt = 0; 2399f48ae7SLukasz Maniak 2499f48ae7SLukasz Maniak for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) { 2599f48ae7SLukasz Maniak if (!subsys->ctrls[i]) { 2699f48ae7SLukasz Maniak sctrl = &list->sec[cnt]; 2799f48ae7SLukasz Maniak sctrl->scid = cpu_to_le16(i); 2899f48ae7SLukasz Maniak subsys->ctrls[i] = SUBSYS_SLOT_RSVD; 2999f48ae7SLukasz Maniak cnt++; 3099f48ae7SLukasz Maniak } 3199f48ae7SLukasz Maniak } 3299f48ae7SLukasz Maniak 3399f48ae7SLukasz Maniak return cnt; 3499f48ae7SLukasz Maniak } 3599f48ae7SLukasz Maniak 3699f48ae7SLukasz Maniak static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n) 3799f48ae7SLukasz Maniak { 3899f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys; 3999f48ae7SLukasz Maniak NvmeSecCtrlList *list = &n->sec_ctrl_list; 4099f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl; 4199f48ae7SLukasz Maniak int i, cntlid; 4299f48ae7SLukasz Maniak 4399f48ae7SLukasz Maniak for (i = 0; i < n->params.sriov_max_vfs; i++) { 4499f48ae7SLukasz Maniak sctrl = &list->sec[i]; 4599f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid); 4699f48ae7SLukasz Maniak 4799f48ae7SLukasz Maniak if (cntlid) { 4899f48ae7SLukasz Maniak assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD); 4999f48ae7SLukasz Maniak subsys->ctrls[cntlid] = NULL; 5099f48ae7SLukasz Maniak sctrl->scid = 0; 5199f48ae7SLukasz Maniak } 5299f48ae7SLukasz Maniak } 5399f48ae7SLukasz Maniak } 5499f48ae7SLukasz Maniak 55e36a261dSMinwoo Im int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) 56e36a261dSMinwoo Im { 57e36a261dSMinwoo Im NvmeSubsystem *subsys = n->subsys; 5899f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); 5999f48ae7SLukasz Maniak int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs; 60e36a261dSMinwoo Im 6199f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) { 6299f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid); 6399f48ae7SLukasz Maniak } else { 64e36a261dSMinwoo Im for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { 65e36a261dSMinwoo Im if (!subsys->ctrls[cntlid]) { 66e36a261dSMinwoo Im break; 67e36a261dSMinwoo Im } 68e36a261dSMinwoo Im } 69e36a261dSMinwoo Im 70e36a261dSMinwoo Im if (cntlid == ARRAY_SIZE(subsys->ctrls)) { 71e36a261dSMinwoo Im error_setg(errp, "no more free controller id"); 72e36a261dSMinwoo Im return -1; 73e36a261dSMinwoo Im } 74e36a261dSMinwoo Im 7599f48ae7SLukasz Maniak num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs); 7699f48ae7SLukasz Maniak if (num_rsvd != num_vfs) { 7799f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n); 7899f48ae7SLukasz Maniak error_setg(errp, 7999f48ae7SLukasz Maniak "no more free controller ids for secondary controllers"); 8099f48ae7SLukasz Maniak return -1; 8199f48ae7SLukasz Maniak } 8299f48ae7SLukasz Maniak } 8399f48ae7SLukasz Maniak 84a859eb9fSKlaus Jensen if (!subsys->serial) { 85a859eb9fSKlaus Jensen subsys->serial = g_strdup(n->params.serial); 86a859eb9fSKlaus Jensen } else if (strcmp(subsys->serial, n->params.serial)) { 87a859eb9fSKlaus Jensen error_setg(errp, "invalid controller serial"); 88a859eb9fSKlaus Jensen return -1; 89a859eb9fSKlaus Jensen } 90a859eb9fSKlaus Jensen 91e36a261dSMinwoo Im subsys->ctrls[cntlid] = n; 92e36a261dSMinwoo Im 939fc6e86eSHannes Reinecke for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { 949fc6e86eSHannes Reinecke NvmeNamespace *ns = subsys->namespaces[nsid]; 959fc6e86eSHannes Reinecke if (ns && ns->params.shared && !ns->params.detached) { 969fc6e86eSHannes Reinecke nvme_attach_ns(n, ns); 979fc6e86eSHannes Reinecke } 989fc6e86eSHannes Reinecke } 999fc6e86eSHannes Reinecke 100e36a261dSMinwoo Im return cntlid; 101e36a261dSMinwoo Im } 102e36a261dSMinwoo Im 103b0fde9e8SKlaus Jensen void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) 104b0fde9e8SKlaus Jensen { 10599f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) { 10699f48ae7SLukasz Maniak subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD; 10799f48ae7SLukasz Maniak } else { 108b0fde9e8SKlaus Jensen subsys->ctrls[n->cntlid] = NULL; 10999f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n); 11099f48ae7SLukasz Maniak } 11199f48ae7SLukasz Maniak 1129fc6e86eSHannes Reinecke n->cntlid = -1; 113b0fde9e8SKlaus Jensen } 114b0fde9e8SKlaus Jensen 11573064edfSJesper Devantier static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif) 11673064edfSJesper Devantier { 11773064edfSJesper Devantier uint16_t val; 11873064edfSJesper Devantier unsigned int i; 11973064edfSJesper Devantier 12073064edfSJesper Devantier if (unlikely(nrg == 1)) { 12173064edfSJesper Devantier /* PIDRG_NORGI scenario, all of pid is used for PHID */ 12273064edfSJesper Devantier *rgif = 0; 12373064edfSJesper Devantier return true; 12473064edfSJesper Devantier } 12573064edfSJesper Devantier 12673064edfSJesper Devantier val = nrg; 12773064edfSJesper Devantier i = 0; 12873064edfSJesper Devantier while (val) { 12973064edfSJesper Devantier val >>= 1; 13073064edfSJesper Devantier i++; 13173064edfSJesper Devantier } 13273064edfSJesper Devantier *rgif = i; 13373064edfSJesper Devantier 13473064edfSJesper Devantier /* ensure remaining bits suffice to represent number of phids in a RG */ 13573064edfSJesper Devantier if (unlikely((UINT16_MAX >> i) < nruh)) { 13673064edfSJesper Devantier *rgif = 0; 13773064edfSJesper Devantier return false; 13873064edfSJesper Devantier } 13973064edfSJesper Devantier 14073064edfSJesper Devantier return true; 14173064edfSJesper Devantier } 14273064edfSJesper Devantier 14373064edfSJesper Devantier static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp) 14473064edfSJesper Devantier { 14573064edfSJesper Devantier NvmeEnduranceGroup *endgrp = &subsys->endgrp; 14673064edfSJesper Devantier 14773064edfSJesper Devantier if (!subsys->params.fdp.runs) { 14873064edfSJesper Devantier error_setg(errp, "fdp.runs must be non-zero"); 14973064edfSJesper Devantier return false; 15073064edfSJesper Devantier } 15173064edfSJesper Devantier 15273064edfSJesper Devantier endgrp->fdp.runs = subsys->params.fdp.runs; 15373064edfSJesper Devantier 15473064edfSJesper Devantier if (!subsys->params.fdp.nrg) { 15573064edfSJesper Devantier error_setg(errp, "fdp.nrg must be non-zero"); 15673064edfSJesper Devantier return false; 15773064edfSJesper Devantier } 15873064edfSJesper Devantier 15973064edfSJesper Devantier endgrp->fdp.nrg = subsys->params.fdp.nrg; 16073064edfSJesper Devantier 161*3ae8a54aSKlaus Jensen if (!subsys->params.fdp.nruh || 162*3ae8a54aSKlaus Jensen subsys->params.fdp.nruh > NVME_FDP_MAXPIDS) { 163*3ae8a54aSKlaus Jensen error_setg(errp, "fdp.nruh must be non-zero and less than %u", 164*3ae8a54aSKlaus Jensen NVME_FDP_MAXPIDS); 16573064edfSJesper Devantier return false; 16673064edfSJesper Devantier } 16773064edfSJesper Devantier 16873064edfSJesper Devantier endgrp->fdp.nruh = subsys->params.fdp.nruh; 16973064edfSJesper Devantier 17073064edfSJesper Devantier if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) { 17173064edfSJesper Devantier error_setg(errp, 17273064edfSJesper Devantier "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")", 17373064edfSJesper Devantier endgrp->fdp.nruh, endgrp->fdp.nrg); 17473064edfSJesper Devantier return false; 17573064edfSJesper Devantier } 17673064edfSJesper Devantier 17773064edfSJesper Devantier endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh); 17873064edfSJesper Devantier 17973064edfSJesper Devantier for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) { 18073064edfSJesper Devantier endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) { 18173064edfSJesper Devantier .ruht = NVME_RUHT_INITIALLY_ISOLATED, 18273064edfSJesper Devantier .ruha = NVME_RUHA_UNUSED, 18373064edfSJesper Devantier }; 18473064edfSJesper Devantier 18573064edfSJesper Devantier endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg); 18673064edfSJesper Devantier } 18773064edfSJesper Devantier 18873064edfSJesper Devantier endgrp->fdp.enabled = true; 18973064edfSJesper Devantier 19073064edfSJesper Devantier return true; 19173064edfSJesper Devantier } 19273064edfSJesper Devantier 19373064edfSJesper Devantier static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp) 194eb2e8974SMinwoo Im { 195eb2e8974SMinwoo Im const char *nqn = subsys->params.nqn ? 196eb2e8974SMinwoo Im subsys->params.nqn : subsys->parent_obj.id; 197eb2e8974SMinwoo Im 198eb2e8974SMinwoo Im snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), 199eb2e8974SMinwoo Im "nqn.2019-08.org.qemu:%s", nqn); 20073064edfSJesper Devantier 20173064edfSJesper Devantier if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) { 20273064edfSJesper Devantier return false; 20373064edfSJesper Devantier } 20473064edfSJesper Devantier 20573064edfSJesper Devantier return true; 206eb2e8974SMinwoo Im } 207eb2e8974SMinwoo Im 208eb2e8974SMinwoo Im static void nvme_subsys_realize(DeviceState *dev, Error **errp) 209eb2e8974SMinwoo Im { 210eb2e8974SMinwoo Im NvmeSubsystem *subsys = NVME_SUBSYS(dev); 211eb2e8974SMinwoo Im 212d637e1dcSPeter Maydell qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id); 2135ffbaeedSKlaus Jensen 21473064edfSJesper Devantier nvme_subsys_setup(subsys, errp); 215eb2e8974SMinwoo Im } 216eb2e8974SMinwoo Im 217eb2e8974SMinwoo Im static Property nvme_subsystem_props[] = { 218eb2e8974SMinwoo Im DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), 21973064edfSJesper Devantier DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false), 22073064edfSJesper Devantier DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs, 22173064edfSJesper Devantier NVME_DEFAULT_RU_SIZE), 22273064edfSJesper Devantier DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1), 22373064edfSJesper Devantier DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0), 224eb2e8974SMinwoo Im DEFINE_PROP_END_OF_LIST(), 225eb2e8974SMinwoo Im }; 226eb2e8974SMinwoo Im 227eb2e8974SMinwoo Im static void nvme_subsys_class_init(ObjectClass *oc, void *data) 228eb2e8974SMinwoo Im { 229eb2e8974SMinwoo Im DeviceClass *dc = DEVICE_CLASS(oc); 230eb2e8974SMinwoo Im 231eb2e8974SMinwoo Im set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 232eb2e8974SMinwoo Im 233eb2e8974SMinwoo Im dc->realize = nvme_subsys_realize; 234eb2e8974SMinwoo Im dc->desc = "Virtual NVMe subsystem"; 235cc6fb6bcSKlaus Jensen dc->hotpluggable = false; 236eb2e8974SMinwoo Im 237eb2e8974SMinwoo Im device_class_set_props(dc, nvme_subsystem_props); 238eb2e8974SMinwoo Im } 239eb2e8974SMinwoo Im 240eb2e8974SMinwoo Im static const TypeInfo nvme_subsys_info = { 241eb2e8974SMinwoo Im .name = TYPE_NVME_SUBSYS, 242eb2e8974SMinwoo Im .parent = TYPE_DEVICE, 243eb2e8974SMinwoo Im .class_init = nvme_subsys_class_init, 244eb2e8974SMinwoo Im .instance_size = sizeof(NvmeSubsystem), 245eb2e8974SMinwoo Im }; 246eb2e8974SMinwoo Im 247eb2e8974SMinwoo Im static void nvme_subsys_register_types(void) 248eb2e8974SMinwoo Im { 249eb2e8974SMinwoo Im type_register_static(&nvme_subsys_info); 250eb2e8974SMinwoo Im } 251eb2e8974SMinwoo Im 252eb2e8974SMinwoo Im type_init(nvme_subsys_register_types) 253