1eb2e8974SMinwoo Im /* 2eb2e8974SMinwoo Im * QEMU NVM Express Subsystem: nvme-subsys 3eb2e8974SMinwoo Im * 4eb2e8974SMinwoo Im * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com> 5eb2e8974SMinwoo Im * 6eb2e8974SMinwoo Im * This code is licensed under the GNU GPL v2. Refer COPYING. 7eb2e8974SMinwoo Im */ 8eb2e8974SMinwoo Im 9eb2e8974SMinwoo Im #include "qemu/osdep.h" 1073064edfSJesper Devantier #include "qemu/units.h" 11eb2e8974SMinwoo Im #include "qapi/error.h" 127ef37c1cSKlaus Jensen 13eb2e8974SMinwoo Im #include "nvme.h" 14eb2e8974SMinwoo Im 1573064edfSJesper Devantier #define NVME_DEFAULT_RU_SIZE (96 * MiB) 1673064edfSJesper Devantier 1799f48ae7SLukasz Maniak static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num) 1899f48ae7SLukasz Maniak { 1999f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys; 201a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list; 2199f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl; 2299f48ae7SLukasz Maniak int i, cnt = 0; 2399f48ae7SLukasz Maniak 2499f48ae7SLukasz Maniak for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) { 2599f48ae7SLukasz Maniak if (!subsys->ctrls[i]) { 261a494d11SMinwoo Im sctrl = &list[cnt]; 2799f48ae7SLukasz Maniak sctrl->scid = cpu_to_le16(i); 2899f48ae7SLukasz Maniak subsys->ctrls[i] = SUBSYS_SLOT_RSVD; 2999f48ae7SLukasz Maniak cnt++; 3099f48ae7SLukasz Maniak } 3199f48ae7SLukasz Maniak } 3299f48ae7SLukasz Maniak 3399f48ae7SLukasz Maniak return cnt; 3499f48ae7SLukasz Maniak } 3599f48ae7SLukasz Maniak 3699f48ae7SLukasz Maniak static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n) 3799f48ae7SLukasz Maniak { 3899f48ae7SLukasz Maniak NvmeSubsystem *subsys = n->subsys; 391a494d11SMinwoo Im NvmeSecCtrlEntry *list = n->sec_ctrl_list; 4099f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl; 4199f48ae7SLukasz Maniak int i, cntlid; 4299f48ae7SLukasz Maniak 4399f48ae7SLukasz Maniak for (i = 0; i < n->params.sriov_max_vfs; i++) { 441a494d11SMinwoo Im sctrl = &list[i]; 4599f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid); 4699f48ae7SLukasz Maniak 4799f48ae7SLukasz Maniak if (cntlid) { 4899f48ae7SLukasz Maniak assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD); 4999f48ae7SLukasz Maniak subsys->ctrls[cntlid] = NULL; 5099f48ae7SLukasz Maniak sctrl->scid = 0; 5199f48ae7SLukasz Maniak } 5299f48ae7SLukasz Maniak } 5399f48ae7SLukasz Maniak } 5499f48ae7SLukasz Maniak 55e36a261dSMinwoo Im int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) 56e36a261dSMinwoo Im { 57e36a261dSMinwoo Im NvmeSubsystem *subsys = n->subsys; 5899f48ae7SLukasz Maniak NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); 5999f48ae7SLukasz Maniak int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs; 60e36a261dSMinwoo Im 6199f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) { 6299f48ae7SLukasz Maniak cntlid = le16_to_cpu(sctrl->scid); 6399f48ae7SLukasz Maniak } else { 64c6159d0eSMinwoo Im n->sec_ctrl_list = g_new0(NvmeSecCtrlEntry, num_vfs); 65c6159d0eSMinwoo Im 66e36a261dSMinwoo Im for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { 67e36a261dSMinwoo Im if (!subsys->ctrls[cntlid]) { 68e36a261dSMinwoo Im break; 69e36a261dSMinwoo Im } 70e36a261dSMinwoo Im } 71e36a261dSMinwoo Im 72e36a261dSMinwoo Im if (cntlid == ARRAY_SIZE(subsys->ctrls)) { 73e36a261dSMinwoo Im error_setg(errp, "no more free controller id"); 74e36a261dSMinwoo Im return -1; 75e36a261dSMinwoo Im } 76e36a261dSMinwoo Im 7799f48ae7SLukasz Maniak num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs); 7899f48ae7SLukasz Maniak if (num_rsvd != num_vfs) { 7999f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n); 8099f48ae7SLukasz Maniak error_setg(errp, 8199f48ae7SLukasz Maniak "no more free controller ids for secondary controllers"); 8299f48ae7SLukasz Maniak return -1; 8399f48ae7SLukasz Maniak } 8499f48ae7SLukasz Maniak } 8599f48ae7SLukasz Maniak 86a859eb9fSKlaus Jensen if (!subsys->serial) { 87a859eb9fSKlaus Jensen subsys->serial = g_strdup(n->params.serial); 88a859eb9fSKlaus Jensen } else if (strcmp(subsys->serial, n->params.serial)) { 89a859eb9fSKlaus Jensen error_setg(errp, "invalid controller serial"); 90a859eb9fSKlaus Jensen return -1; 91a859eb9fSKlaus Jensen } 92a859eb9fSKlaus Jensen 93e36a261dSMinwoo Im subsys->ctrls[cntlid] = n; 94e36a261dSMinwoo Im 959fc6e86eSHannes Reinecke for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { 969fc6e86eSHannes Reinecke NvmeNamespace *ns = subsys->namespaces[nsid]; 979fc6e86eSHannes Reinecke if (ns && ns->params.shared && !ns->params.detached) { 989fc6e86eSHannes Reinecke nvme_attach_ns(n, ns); 999fc6e86eSHannes Reinecke } 1009fc6e86eSHannes Reinecke } 1019fc6e86eSHannes Reinecke 102e36a261dSMinwoo Im return cntlid; 103e36a261dSMinwoo Im } 104e36a261dSMinwoo Im 105b0fde9e8SKlaus Jensen void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) 106b0fde9e8SKlaus Jensen { 10799f48ae7SLukasz Maniak if (pci_is_vf(&n->parent_obj)) { 10899f48ae7SLukasz Maniak subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD; 10999f48ae7SLukasz Maniak } else { 110b0fde9e8SKlaus Jensen subsys->ctrls[n->cntlid] = NULL; 11199f48ae7SLukasz Maniak nvme_subsys_unreserve_cntlids(n); 11299f48ae7SLukasz Maniak } 11399f48ae7SLukasz Maniak 1149fc6e86eSHannes Reinecke n->cntlid = -1; 115b0fde9e8SKlaus Jensen } 116b0fde9e8SKlaus Jensen 11773064edfSJesper Devantier static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif) 11873064edfSJesper Devantier { 11973064edfSJesper Devantier uint16_t val; 12073064edfSJesper Devantier unsigned int i; 12173064edfSJesper Devantier 12273064edfSJesper Devantier if (unlikely(nrg == 1)) { 12373064edfSJesper Devantier /* PIDRG_NORGI scenario, all of pid is used for PHID */ 12473064edfSJesper Devantier *rgif = 0; 12573064edfSJesper Devantier return true; 12673064edfSJesper Devantier } 12773064edfSJesper Devantier 12873064edfSJesper Devantier val = nrg; 12973064edfSJesper Devantier i = 0; 13073064edfSJesper Devantier while (val) { 13173064edfSJesper Devantier val >>= 1; 13273064edfSJesper Devantier i++; 13373064edfSJesper Devantier } 13473064edfSJesper Devantier *rgif = i; 13573064edfSJesper Devantier 13673064edfSJesper Devantier /* ensure remaining bits suffice to represent number of phids in a RG */ 13773064edfSJesper Devantier if (unlikely((UINT16_MAX >> i) < nruh)) { 13873064edfSJesper Devantier *rgif = 0; 13973064edfSJesper Devantier return false; 14073064edfSJesper Devantier } 14173064edfSJesper Devantier 14273064edfSJesper Devantier return true; 14373064edfSJesper Devantier } 14473064edfSJesper Devantier 14573064edfSJesper Devantier static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp) 14673064edfSJesper Devantier { 14773064edfSJesper Devantier NvmeEnduranceGroup *endgrp = &subsys->endgrp; 14873064edfSJesper Devantier 14973064edfSJesper Devantier if (!subsys->params.fdp.runs) { 15073064edfSJesper Devantier error_setg(errp, "fdp.runs must be non-zero"); 15173064edfSJesper Devantier return false; 15273064edfSJesper Devantier } 15373064edfSJesper Devantier 15473064edfSJesper Devantier endgrp->fdp.runs = subsys->params.fdp.runs; 15573064edfSJesper Devantier 15673064edfSJesper Devantier if (!subsys->params.fdp.nrg) { 15773064edfSJesper Devantier error_setg(errp, "fdp.nrg must be non-zero"); 15873064edfSJesper Devantier return false; 15973064edfSJesper Devantier } 16073064edfSJesper Devantier 16173064edfSJesper Devantier endgrp->fdp.nrg = subsys->params.fdp.nrg; 16273064edfSJesper Devantier 1633ae8a54aSKlaus Jensen if (!subsys->params.fdp.nruh || 1643ae8a54aSKlaus Jensen subsys->params.fdp.nruh > NVME_FDP_MAXPIDS) { 1653ae8a54aSKlaus Jensen error_setg(errp, "fdp.nruh must be non-zero and less than %u", 1663ae8a54aSKlaus Jensen NVME_FDP_MAXPIDS); 16773064edfSJesper Devantier return false; 16873064edfSJesper Devantier } 16973064edfSJesper Devantier 17073064edfSJesper Devantier endgrp->fdp.nruh = subsys->params.fdp.nruh; 17173064edfSJesper Devantier 17273064edfSJesper Devantier if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) { 17373064edfSJesper Devantier error_setg(errp, 17473064edfSJesper Devantier "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")", 17573064edfSJesper Devantier endgrp->fdp.nruh, endgrp->fdp.nrg); 17673064edfSJesper Devantier return false; 17773064edfSJesper Devantier } 17873064edfSJesper Devantier 17973064edfSJesper Devantier endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh); 18073064edfSJesper Devantier 18173064edfSJesper Devantier for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) { 18273064edfSJesper Devantier endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) { 18373064edfSJesper Devantier .ruht = NVME_RUHT_INITIALLY_ISOLATED, 18473064edfSJesper Devantier .ruha = NVME_RUHA_UNUSED, 18573064edfSJesper Devantier }; 18673064edfSJesper Devantier 18773064edfSJesper Devantier endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg); 18873064edfSJesper Devantier } 18973064edfSJesper Devantier 19073064edfSJesper Devantier endgrp->fdp.enabled = true; 19173064edfSJesper Devantier 19273064edfSJesper Devantier return true; 19373064edfSJesper Devantier } 19473064edfSJesper Devantier 19573064edfSJesper Devantier static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp) 196eb2e8974SMinwoo Im { 197eb2e8974SMinwoo Im const char *nqn = subsys->params.nqn ? 198eb2e8974SMinwoo Im subsys->params.nqn : subsys->parent_obj.id; 199eb2e8974SMinwoo Im 200eb2e8974SMinwoo Im snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), 201eb2e8974SMinwoo Im "nqn.2019-08.org.qemu:%s", nqn); 20273064edfSJesper Devantier 20373064edfSJesper Devantier if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) { 20473064edfSJesper Devantier return false; 20573064edfSJesper Devantier } 20673064edfSJesper Devantier 20773064edfSJesper Devantier return true; 208eb2e8974SMinwoo Im } 209eb2e8974SMinwoo Im 210eb2e8974SMinwoo Im static void nvme_subsys_realize(DeviceState *dev, Error **errp) 211eb2e8974SMinwoo Im { 212eb2e8974SMinwoo Im NvmeSubsystem *subsys = NVME_SUBSYS(dev); 213eb2e8974SMinwoo Im 214d637e1dcSPeter Maydell qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id); 2155ffbaeedSKlaus Jensen 21673064edfSJesper Devantier nvme_subsys_setup(subsys, errp); 217eb2e8974SMinwoo Im } 218eb2e8974SMinwoo Im 219*b32c22bfSRichard Henderson static const Property nvme_subsystem_props[] = { 220eb2e8974SMinwoo Im DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), 22173064edfSJesper Devantier DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false), 22273064edfSJesper Devantier DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs, 22373064edfSJesper Devantier NVME_DEFAULT_RU_SIZE), 22473064edfSJesper Devantier DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1), 22573064edfSJesper Devantier DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0), 226eb2e8974SMinwoo Im DEFINE_PROP_END_OF_LIST(), 227eb2e8974SMinwoo Im }; 228eb2e8974SMinwoo Im 229eb2e8974SMinwoo Im static void nvme_subsys_class_init(ObjectClass *oc, void *data) 230eb2e8974SMinwoo Im { 231eb2e8974SMinwoo Im DeviceClass *dc = DEVICE_CLASS(oc); 232eb2e8974SMinwoo Im 233eb2e8974SMinwoo Im set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 234eb2e8974SMinwoo Im 235eb2e8974SMinwoo Im dc->realize = nvme_subsys_realize; 236eb2e8974SMinwoo Im dc->desc = "Virtual NVMe subsystem"; 237cc6fb6bcSKlaus Jensen dc->hotpluggable = false; 238eb2e8974SMinwoo Im 239eb2e8974SMinwoo Im device_class_set_props(dc, nvme_subsystem_props); 240eb2e8974SMinwoo Im } 241eb2e8974SMinwoo Im 242eb2e8974SMinwoo Im static const TypeInfo nvme_subsys_info = { 243eb2e8974SMinwoo Im .name = TYPE_NVME_SUBSYS, 244eb2e8974SMinwoo Im .parent = TYPE_DEVICE, 245eb2e8974SMinwoo Im .class_init = nvme_subsys_class_init, 246eb2e8974SMinwoo Im .instance_size = sizeof(NvmeSubsystem), 247eb2e8974SMinwoo Im }; 248eb2e8974SMinwoo Im 249eb2e8974SMinwoo Im static void nvme_subsys_register_types(void) 250eb2e8974SMinwoo Im { 251eb2e8974SMinwoo Im type_register_static(&nvme_subsys_info); 252eb2e8974SMinwoo Im } 253eb2e8974SMinwoo Im 254eb2e8974SMinwoo Im type_init(nvme_subsys_register_types) 255