17f0f1aceSKlaus Jensen /* 27f0f1aceSKlaus Jensen * QEMU NVM Express Virtual Namespace 37f0f1aceSKlaus Jensen * 47f0f1aceSKlaus Jensen * Copyright (c) 2019 CNEX Labs 57f0f1aceSKlaus Jensen * Copyright (c) 2020 Samsung Electronics 67f0f1aceSKlaus Jensen * 77f0f1aceSKlaus Jensen * Authors: 87f0f1aceSKlaus Jensen * Klaus Jensen <k.jensen@samsung.com> 97f0f1aceSKlaus Jensen * 107f0f1aceSKlaus Jensen * This work is licensed under the terms of the GNU GPL, version 2. See the 117f0f1aceSKlaus Jensen * COPYING file in the top-level directory. 127f0f1aceSKlaus Jensen * 137f0f1aceSKlaus Jensen */ 147f0f1aceSKlaus Jensen 157f0f1aceSKlaus Jensen #include "qemu/osdep.h" 167f0f1aceSKlaus Jensen #include "qemu/units.h" 171b5804a8SKlaus Jensen #include "qemu/error-report.h" 187ef37c1cSKlaus Jensen #include "qapi/error.h" 197f0f1aceSKlaus Jensen #include "sysemu/sysemu.h" 207f0f1aceSKlaus Jensen #include "sysemu/block-backend.h" 217f0f1aceSKlaus Jensen 227f0f1aceSKlaus Jensen #include "nvme.h" 237ef37c1cSKlaus Jensen #include "trace.h" 247f0f1aceSKlaus Jensen 252605257aSKlaus Jensen #define MIN_DISCARD_GRANULARITY (4 * KiB) 26de482d1fSKlaus Jensen #define NVME_DEFAULT_ZONE_SIZE (128 * MiB) 272605257aSKlaus Jensen 28dc04d25eSMinwoo Im void nvme_ns_init_format(NvmeNamespace *ns) 29516990f4SKlaus Jensen { 30516990f4SKlaus Jensen NvmeIdNs *id_ns = &ns->id_ns; 31516990f4SKlaus Jensen BlockDriverInfo bdi; 321e64faccSDmitry Tikhov int npdg, ret; 331e64faccSDmitry Tikhov int64_t nlbas; 34516990f4SKlaus Jensen 356146f3ddSKlaus Jensen ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; 366146f3ddSKlaus Jensen ns->lbasz = 1 << ns->lbaf.ds; 376146f3ddSKlaus Jensen 386146f3ddSKlaus Jensen nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); 39516990f4SKlaus Jensen 40516990f4SKlaus Jensen id_ns->nsze = cpu_to_le64(nlbas); 41516990f4SKlaus Jensen 42516990f4SKlaus Jensen /* no thin provisioning */ 43516990f4SKlaus Jensen id_ns->ncap = id_ns->nsze; 44516990f4SKlaus Jensen id_ns->nuse = id_ns->ncap; 45516990f4SKlaus Jensen 461e64faccSDmitry Tikhov ns->moff = nlbas << ns->lbaf.ds; 47516990f4SKlaus Jensen 486146f3ddSKlaus Jensen npdg = ns->blkconf.discard_granularity / ns->lbasz; 49516990f4SKlaus Jensen 50516990f4SKlaus Jensen ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); 51516990f4SKlaus Jensen if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { 526146f3ddSKlaus Jensen npdg = bdi.cluster_size / ns->lbasz; 53516990f4SKlaus Jensen } 54516990f4SKlaus Jensen 55516990f4SKlaus Jensen id_ns->npda = id_ns->npdg = npdg - 1; 56516990f4SKlaus Jensen } 57516990f4SKlaus Jensen 582605257aSKlaus Jensen static int nvme_ns_init(NvmeNamespace *ns, Error **errp) 597f0f1aceSKlaus Jensen { 603276dde4SHeinrich Schuchardt static uint64_t ns_count; 617f0f1aceSKlaus Jensen NvmeIdNs *id_ns = &ns->id_ns; 6244219b60SNaveen Nagar NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm; 636a674bc2SMinwoo Im uint8_t ds; 646a674bc2SMinwoo Im uint16_t ms; 656a674bc2SMinwoo Im int i; 667f0f1aceSKlaus Jensen 67516990f4SKlaus Jensen ns->csi = NVME_CSI_NVM; 68dc04d25eSMinwoo Im ns->status = 0x0; 69516990f4SKlaus Jensen 70146f720cSKlaus Jensen ns->id_ns.dlfeat = 0x1; 717f0f1aceSKlaus Jensen 72516990f4SKlaus Jensen /* support DULBE and I/O optimization fields */ 73516990f4SKlaus Jensen id_ns->nsfeat |= (0x4 | 0x10); 74516990f4SKlaus Jensen 75e5489356SKlaus Jensen if (ns->params.shared) { 76516990f4SKlaus Jensen id_ns->nmic |= NVME_NMIC_NS_SHARED; 77516990f4SKlaus Jensen } 78516990f4SKlaus Jensen 793276dde4SHeinrich Schuchardt /* Substitute a missing EUI-64 by an autogenerated one */ 803276dde4SHeinrich Schuchardt ++ns_count; 813276dde4SHeinrich Schuchardt if (!ns->params.eui64 && ns->params.eui64_default) { 823276dde4SHeinrich Schuchardt ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT; 833276dde4SHeinrich Schuchardt } 843276dde4SHeinrich Schuchardt 85516990f4SKlaus Jensen /* simple copy */ 86516990f4SKlaus Jensen id_ns->mssrl = cpu_to_le16(ns->params.mssrl); 87516990f4SKlaus Jensen id_ns->mcl = cpu_to_le32(ns->params.mcl); 88516990f4SKlaus Jensen id_ns->msrc = ns->params.msrc; 896870cfb8SHeinrich Schuchardt id_ns->eui64 = cpu_to_be64(ns->params.eui64); 90516990f4SKlaus Jensen 916a674bc2SMinwoo Im ds = 31 - clz32(ns->blkconf.logical_block_size); 926a674bc2SMinwoo Im ms = ns->params.ms; 937f0f1aceSKlaus Jensen 9418de1526SGollu Appalanaidu id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; 95bc3a65e9SKlaus Jensen 96421a3092SGollu Appalanaidu if (ms && ns->params.mset) { 9718de1526SGollu Appalanaidu id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; 98bc3a65e9SKlaus Jensen } 99146f720cSKlaus Jensen 100146f720cSKlaus Jensen id_ns->dpc = 0x1f; 101421a3092SGollu Appalanaidu id_ns->dps = ns->params.pi; 102421a3092SGollu Appalanaidu if (ns->params.pi && ns->params.pil) { 103421a3092SGollu Appalanaidu id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; 104421a3092SGollu Appalanaidu } 1056a674bc2SMinwoo Im 10644219b60SNaveen Nagar ns->pif = ns->params.pif; 10744219b60SNaveen Nagar 108421a3092SGollu Appalanaidu static const NvmeLBAF lbaf[16] = { 1096a674bc2SMinwoo Im [0] = { .ds = 9 }, 1106a674bc2SMinwoo Im [1] = { .ds = 9, .ms = 8 }, 1116a674bc2SMinwoo Im [2] = { .ds = 9, .ms = 16 }, 1126a674bc2SMinwoo Im [3] = { .ds = 9, .ms = 64 }, 1136a674bc2SMinwoo Im [4] = { .ds = 12 }, 1146a674bc2SMinwoo Im [5] = { .ds = 12, .ms = 8 }, 1156a674bc2SMinwoo Im [6] = { .ds = 12, .ms = 16 }, 1166a674bc2SMinwoo Im [7] = { .ds = 12, .ms = 64 }, 1176a674bc2SMinwoo Im }; 1186a674bc2SMinwoo Im 119763c05dfSNaveen Nagar ns->nlbaf = 8; 120bc3a65e9SKlaus Jensen 121763c05dfSNaveen Nagar memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); 122763c05dfSNaveen Nagar 123763c05dfSNaveen Nagar for (i = 0; i < ns->nlbaf; i++) { 1246a674bc2SMinwoo Im NvmeLBAF *lbaf = &id_ns->lbaf[i]; 1256a674bc2SMinwoo Im if (lbaf->ds == ds) { 1266a674bc2SMinwoo Im if (lbaf->ms == ms) { 1276a674bc2SMinwoo Im id_ns->flbas |= i; 1286a674bc2SMinwoo Im goto lbaf_found; 1296a674bc2SMinwoo Im } 1306a674bc2SMinwoo Im } 1316a674bc2SMinwoo Im } 1326a674bc2SMinwoo Im 1336a674bc2SMinwoo Im /* add non-standard lba format */ 134763c05dfSNaveen Nagar id_ns->lbaf[ns->nlbaf].ds = ds; 135763c05dfSNaveen Nagar id_ns->lbaf[ns->nlbaf].ms = ms; 136763c05dfSNaveen Nagar ns->nlbaf++; 137763c05dfSNaveen Nagar 138763c05dfSNaveen Nagar id_ns->flbas |= i; 1396a674bc2SMinwoo Im 14044219b60SNaveen Nagar 1416a674bc2SMinwoo Im lbaf_found: 14244219b60SNaveen Nagar id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7; 143763c05dfSNaveen Nagar id_ns->nlbaf = ns->nlbaf - 1; 144516990f4SKlaus Jensen nvme_ns_init_format(ns); 145e4e430b3SKlaus Jensen 1462605257aSKlaus Jensen return 0; 1477f0f1aceSKlaus Jensen } 1487f0f1aceSKlaus Jensen 149337ccd76SMinwoo Im static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) 1507f0f1aceSKlaus Jensen { 15186b1cf32SKevin Wolf bool read_only; 15286b1cf32SKevin Wolf 1537f0f1aceSKlaus Jensen if (!blkconf_blocksizes(&ns->blkconf, errp)) { 1547f0f1aceSKlaus Jensen return -1; 1557f0f1aceSKlaus Jensen } 1567f0f1aceSKlaus Jensen 15786b1cf32SKevin Wolf read_only = !blk_supports_write_perm(ns->blkconf.blk); 15886b1cf32SKevin Wolf if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) { 1597f0f1aceSKlaus Jensen return -1; 1607f0f1aceSKlaus Jensen } 1617f0f1aceSKlaus Jensen 1622605257aSKlaus Jensen if (ns->blkconf.discard_granularity == -1) { 1632605257aSKlaus Jensen ns->blkconf.discard_granularity = 1642605257aSKlaus Jensen MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); 1652605257aSKlaus Jensen } 1662605257aSKlaus Jensen 1677f0f1aceSKlaus Jensen ns->size = blk_getlength(ns->blkconf.blk); 1687f0f1aceSKlaus Jensen if (ns->size < 0) { 1697f0f1aceSKlaus Jensen error_setg_errno(errp, -ns->size, "could not get blockdev size"); 1707f0f1aceSKlaus Jensen return -1; 1717f0f1aceSKlaus Jensen } 1727f0f1aceSKlaus Jensen 1737f0f1aceSKlaus Jensen return 0; 1747f0f1aceSKlaus Jensen } 1757f0f1aceSKlaus Jensen 176a479335bSDmitry Fomichev static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) 177a479335bSDmitry Fomichev { 178a479335bSDmitry Fomichev uint64_t zone_size, zone_cap; 179a479335bSDmitry Fomichev 180a479335bSDmitry Fomichev /* Make sure that the values of ZNS properties are sane */ 181a479335bSDmitry Fomichev if (ns->params.zone_size_bs) { 182a479335bSDmitry Fomichev zone_size = ns->params.zone_size_bs; 183a479335bSDmitry Fomichev } else { 184a479335bSDmitry Fomichev zone_size = NVME_DEFAULT_ZONE_SIZE; 185a479335bSDmitry Fomichev } 186a479335bSDmitry Fomichev if (ns->params.zone_cap_bs) { 187a479335bSDmitry Fomichev zone_cap = ns->params.zone_cap_bs; 188a479335bSDmitry Fomichev } else { 189a479335bSDmitry Fomichev zone_cap = zone_size; 190a479335bSDmitry Fomichev } 191a479335bSDmitry Fomichev if (zone_cap > zone_size) { 192a479335bSDmitry Fomichev error_setg(errp, "zone capacity %"PRIu64"B exceeds " 193a479335bSDmitry Fomichev "zone size %"PRIu64"B", zone_cap, zone_size); 194a479335bSDmitry Fomichev return -1; 195a479335bSDmitry Fomichev } 1966146f3ddSKlaus Jensen if (zone_size < ns->lbasz) { 197a479335bSDmitry Fomichev error_setg(errp, "zone size %"PRIu64"B too small, " 1986146f3ddSKlaus Jensen "must be at least %zuB", zone_size, ns->lbasz); 199a479335bSDmitry Fomichev return -1; 200a479335bSDmitry Fomichev } 2016146f3ddSKlaus Jensen if (zone_cap < ns->lbasz) { 202a479335bSDmitry Fomichev error_setg(errp, "zone capacity %"PRIu64"B too small, " 2036146f3ddSKlaus Jensen "must be at least %zuB", zone_cap, ns->lbasz); 204a479335bSDmitry Fomichev return -1; 205a479335bSDmitry Fomichev } 206a479335bSDmitry Fomichev 207a479335bSDmitry Fomichev /* 208a479335bSDmitry Fomichev * Save the main zone geometry values to avoid 209a479335bSDmitry Fomichev * calculating them later again. 210a479335bSDmitry Fomichev */ 2116146f3ddSKlaus Jensen ns->zone_size = zone_size / ns->lbasz; 2126146f3ddSKlaus Jensen ns->zone_capacity = zone_cap / ns->lbasz; 2136146f3ddSKlaus Jensen ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; 2148d18ddcdSDmitry Fomichev 2158d18ddcdSDmitry Fomichev /* Do a few more sanity checks of ZNS properties */ 216044f1876SMinwoo Im if (!ns->num_zones) { 217044f1876SMinwoo Im error_setg(errp, 218044f1876SMinwoo Im "insufficient drive capacity, must be at least the size " 219044f1876SMinwoo Im "of one zone (%"PRIu64"B)", zone_size); 220044f1876SMinwoo Im return -1; 221044f1876SMinwoo Im } 222044f1876SMinwoo Im 223a479335bSDmitry Fomichev return 0; 224a479335bSDmitry Fomichev } 225a479335bSDmitry Fomichev 226a479335bSDmitry Fomichev static void nvme_ns_zoned_init_state(NvmeNamespace *ns) 227a479335bSDmitry Fomichev { 228a479335bSDmitry Fomichev uint64_t start = 0, zone_size = ns->zone_size; 229a479335bSDmitry Fomichev uint64_t capacity = ns->num_zones * zone_size; 230a479335bSDmitry Fomichev NvmeZone *zone; 231a479335bSDmitry Fomichev int i; 232a479335bSDmitry Fomichev 233a479335bSDmitry Fomichev ns->zone_array = g_new0(NvmeZone, ns->num_zones); 2341a9290adSDmitry Fomichev if (ns->params.zd_extension_size) { 2351a9290adSDmitry Fomichev ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * 2361a9290adSDmitry Fomichev ns->num_zones); 2371a9290adSDmitry Fomichev } 238a479335bSDmitry Fomichev 239a479335bSDmitry Fomichev QTAILQ_INIT(&ns->exp_open_zones); 240a479335bSDmitry Fomichev QTAILQ_INIT(&ns->imp_open_zones); 241a479335bSDmitry Fomichev QTAILQ_INIT(&ns->closed_zones); 242a479335bSDmitry Fomichev QTAILQ_INIT(&ns->full_zones); 243a479335bSDmitry Fomichev 244a479335bSDmitry Fomichev zone = ns->zone_array; 245a479335bSDmitry Fomichev for (i = 0; i < ns->num_zones; i++, zone++) { 246a479335bSDmitry Fomichev if (start + zone_size > capacity) { 247a479335bSDmitry Fomichev zone_size = capacity - start; 248a479335bSDmitry Fomichev } 249a479335bSDmitry Fomichev zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; 250a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); 251a479335bSDmitry Fomichev zone->d.za = 0; 252a479335bSDmitry Fomichev zone->d.zcap = ns->zone_capacity; 253a479335bSDmitry Fomichev zone->d.zslba = start; 254a479335bSDmitry Fomichev zone->d.wp = start; 255a479335bSDmitry Fomichev zone->w_ptr = start; 256a479335bSDmitry Fomichev start += zone_size; 257a479335bSDmitry Fomichev } 258a479335bSDmitry Fomichev 259a479335bSDmitry Fomichev ns->zone_size_log2 = 0; 260a479335bSDmitry Fomichev if (is_power_of_2(ns->zone_size)) { 261a479335bSDmitry Fomichev ns->zone_size_log2 = 63 - clz64(ns->zone_size); 262a479335bSDmitry Fomichev } 263a479335bSDmitry Fomichev } 264a479335bSDmitry Fomichev 2656a674bc2SMinwoo Im static void nvme_ns_init_zoned(NvmeNamespace *ns) 266a479335bSDmitry Fomichev { 267a479335bSDmitry Fomichev NvmeIdNsZoned *id_ns_z; 2686a674bc2SMinwoo Im int i; 269a479335bSDmitry Fomichev 270a479335bSDmitry Fomichev nvme_ns_zoned_init_state(ns); 271a479335bSDmitry Fomichev 272b21e2380SMarkus Armbruster id_ns_z = g_new0(NvmeIdNsZoned, 1); 273a479335bSDmitry Fomichev 274312c3531SGollu Appalanaidu /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ 2758d18ddcdSDmitry Fomichev id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); 2768d18ddcdSDmitry Fomichev id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); 277a479335bSDmitry Fomichev id_ns_z->zoc = 0; 27825872031SKlaus Jensen id_ns_z->ozcs = ns->params.cross_zone_read ? 27925872031SKlaus Jensen NVME_ID_NS_ZONED_OZCS_RAZB : 0x00; 280a479335bSDmitry Fomichev 2816a674bc2SMinwoo Im for (i = 0; i <= ns->id_ns.nlbaf; i++) { 2826a674bc2SMinwoo Im id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); 2836a674bc2SMinwoo Im id_ns_z->lbafe[i].zdes = 2841a9290adSDmitry Fomichev ns->params.zd_extension_size >> 6; /* Units of 64B */ 2856a674bc2SMinwoo Im } 286a479335bSDmitry Fomichev 287e321b4cdSKlaus Jensen if (ns->params.zrwas) { 288e321b4cdSKlaus Jensen ns->zns.numzrwa = ns->params.numzrwa ? 289e321b4cdSKlaus Jensen ns->params.numzrwa : ns->num_zones; 290e321b4cdSKlaus Jensen 291e321b4cdSKlaus Jensen ns->zns.zrwas = ns->params.zrwas >> ns->lbaf.ds; 292e321b4cdSKlaus Jensen ns->zns.zrwafg = ns->params.zrwafg >> ns->lbaf.ds; 293e321b4cdSKlaus Jensen 294e321b4cdSKlaus Jensen id_ns_z->ozcs |= NVME_ID_NS_ZONED_OZCS_ZRWASUP; 295e321b4cdSKlaus Jensen id_ns_z->zrwacap = NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP; 296e321b4cdSKlaus Jensen 297e321b4cdSKlaus Jensen id_ns_z->numzrwa = cpu_to_le32(ns->params.numzrwa); 298e321b4cdSKlaus Jensen id_ns_z->zrwas = cpu_to_le16(ns->zns.zrwas); 299e321b4cdSKlaus Jensen id_ns_z->zrwafg = cpu_to_le16(ns->zns.zrwafg); 300e321b4cdSKlaus Jensen } 301e321b4cdSKlaus Jensen 302e321b4cdSKlaus Jensen id_ns_z->ozcs = cpu_to_le16(id_ns_z->ozcs); 303e321b4cdSKlaus Jensen 304a479335bSDmitry Fomichev ns->csi = NVME_CSI_ZONED; 305a479335bSDmitry Fomichev ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); 306a479335bSDmitry Fomichev ns->id_ns.ncap = ns->id_ns.nsze; 307a479335bSDmitry Fomichev ns->id_ns.nuse = ns->id_ns.ncap; 308a479335bSDmitry Fomichev 3091b5804a8SKlaus Jensen /* 3101b5804a8SKlaus Jensen * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" 3111b5804a8SKlaus Jensen * status of logical blocks. Since the spec defines that logical blocks 3121b5804a8SKlaus Jensen * SHALL be deallocated when then zone is in the Empty or Offline states, 3131b5804a8SKlaus Jensen * we can only support DULBE if the zone size is a multiple of the 3141b5804a8SKlaus Jensen * calculated NPDG. 3151b5804a8SKlaus Jensen */ 3161b5804a8SKlaus Jensen if (ns->zone_size % (ns->id_ns.npdg + 1)) { 3171b5804a8SKlaus Jensen warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " 3181b5804a8SKlaus Jensen "the calculated deallocation granularity (%d blocks); " 3191b5804a8SKlaus Jensen "DULBE support disabled", 3201b5804a8SKlaus Jensen ns->zone_size, ns->id_ns.npdg + 1); 3211b5804a8SKlaus Jensen 3221b5804a8SKlaus Jensen ns->id_ns.nsfeat &= ~0x4; 3231b5804a8SKlaus Jensen } 3241b5804a8SKlaus Jensen 325a479335bSDmitry Fomichev ns->id_ns_zoned = id_ns_z; 326a479335bSDmitry Fomichev } 327a479335bSDmitry Fomichev 328a479335bSDmitry Fomichev static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) 329a479335bSDmitry Fomichev { 330a479335bSDmitry Fomichev uint8_t state; 331a479335bSDmitry Fomichev 332a479335bSDmitry Fomichev zone->w_ptr = zone->d.wp; 333a479335bSDmitry Fomichev state = nvme_get_zone_state(zone); 3341a9290adSDmitry Fomichev if (zone->d.wp != zone->d.zslba || 3351a9290adSDmitry Fomichev (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { 336a479335bSDmitry Fomichev if (state != NVME_ZONE_STATE_CLOSED) { 337a479335bSDmitry Fomichev trace_pci_nvme_clear_ns_close(state, zone->d.zslba); 338a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); 339a479335bSDmitry Fomichev } 3408d18ddcdSDmitry Fomichev nvme_aor_inc_active(ns); 341a479335bSDmitry Fomichev QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); 342a479335bSDmitry Fomichev } else { 343a479335bSDmitry Fomichev trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); 344e321b4cdSKlaus Jensen if (zone->d.za & NVME_ZA_ZRWA_VALID) { 345e321b4cdSKlaus Jensen zone->d.za &= ~NVME_ZA_ZRWA_VALID; 346e321b4cdSKlaus Jensen ns->zns.numzrwa++; 347e321b4cdSKlaus Jensen } 348a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); 349a479335bSDmitry Fomichev } 350a479335bSDmitry Fomichev } 351a479335bSDmitry Fomichev 352a479335bSDmitry Fomichev /* 353a479335bSDmitry Fomichev * Close all the zones that are currently open. 354a479335bSDmitry Fomichev */ 355a479335bSDmitry Fomichev static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) 356a479335bSDmitry Fomichev { 357a479335bSDmitry Fomichev NvmeZone *zone, *next; 358a479335bSDmitry Fomichev 359a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { 360a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->closed_zones, zone, entry); 3618d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 362a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 363a479335bSDmitry Fomichev } 364a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { 365a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); 3668d18ddcdSDmitry Fomichev nvme_aor_dec_open(ns); 3678d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 368a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 369a479335bSDmitry Fomichev } 370a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { 371a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); 3728d18ddcdSDmitry Fomichev nvme_aor_dec_open(ns); 3738d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 374a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 375a479335bSDmitry Fomichev } 3768d18ddcdSDmitry Fomichev 3778d18ddcdSDmitry Fomichev assert(ns->nr_open_zones == 0); 378a479335bSDmitry Fomichev } 379a479335bSDmitry Fomichev 3805e4f6bccSKlaus Jensen static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) 3817f0f1aceSKlaus Jensen { 38244219b60SNaveen Nagar unsigned int pi_size; 38344219b60SNaveen Nagar 3847f0f1aceSKlaus Jensen if (!ns->blkconf.blk) { 3857f0f1aceSKlaus Jensen error_setg(errp, "block backend not configured"); 3867f0f1aceSKlaus Jensen return -1; 3877f0f1aceSKlaus Jensen } 3887f0f1aceSKlaus Jensen 38944219b60SNaveen Nagar if (ns->params.pi) { 39044219b60SNaveen Nagar if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) { 39144219b60SNaveen Nagar error_setg(errp, "invalid 'pi' value"); 392146f720cSKlaus Jensen return -1; 393146f720cSKlaus Jensen } 394146f720cSKlaus Jensen 39544219b60SNaveen Nagar switch (ns->params.pif) { 39644219b60SNaveen Nagar case NVME_PI_GUARD_16: 39744219b60SNaveen Nagar pi_size = 8; 39844219b60SNaveen Nagar break; 39944219b60SNaveen Nagar case NVME_PI_GUARD_64: 40044219b60SNaveen Nagar pi_size = 16; 40144219b60SNaveen Nagar break; 40244219b60SNaveen Nagar default: 40344219b60SNaveen Nagar error_setg(errp, "invalid 'pif'"); 40444219b60SNaveen Nagar return -1; 40544219b60SNaveen Nagar } 40644219b60SNaveen Nagar 40744219b60SNaveen Nagar if (ns->params.ms < pi_size) { 40844219b60SNaveen Nagar error_setg(errp, "at least %u bytes of metadata required to " 40944219b60SNaveen Nagar "enable protection information", pi_size); 41044219b60SNaveen Nagar return -1; 41144219b60SNaveen Nagar } 41244219b60SNaveen Nagar } 41344219b60SNaveen Nagar 414e5489356SKlaus Jensen if (ns->params.nsid > NVME_MAX_NAMESPACES) { 415e5489356SKlaus Jensen error_setg(errp, "invalid namespace id (must be between 0 and %d)", 416e5489356SKlaus Jensen NVME_MAX_NAMESPACES); 417e5489356SKlaus Jensen return -1; 418e5489356SKlaus Jensen } 419e5489356SKlaus Jensen 42049ad39c5SKlaus Jensen if (ns->params.zoned) { 42149ad39c5SKlaus Jensen if (ns->params.max_active_zones) { 42249ad39c5SKlaus Jensen if (ns->params.max_open_zones > ns->params.max_active_zones) { 42349ad39c5SKlaus Jensen error_setg(errp, "max_open_zones (%u) exceeds " 42449ad39c5SKlaus Jensen "max_active_zones (%u)", ns->params.max_open_zones, 42549ad39c5SKlaus Jensen ns->params.max_active_zones); 42649ad39c5SKlaus Jensen return -1; 42749ad39c5SKlaus Jensen } 42849ad39c5SKlaus Jensen 42949ad39c5SKlaus Jensen if (!ns->params.max_open_zones) { 43049ad39c5SKlaus Jensen ns->params.max_open_zones = ns->params.max_active_zones; 43149ad39c5SKlaus Jensen } 43249ad39c5SKlaus Jensen } 43349ad39c5SKlaus Jensen 43449ad39c5SKlaus Jensen if (ns->params.zd_extension_size) { 43549ad39c5SKlaus Jensen if (ns->params.zd_extension_size & 0x3f) { 43649ad39c5SKlaus Jensen error_setg(errp, "zone descriptor extension size must be a " 43749ad39c5SKlaus Jensen "multiple of 64B"); 43849ad39c5SKlaus Jensen return -1; 43949ad39c5SKlaus Jensen } 44049ad39c5SKlaus Jensen if ((ns->params.zd_extension_size >> 6) > 0xff) { 44149ad39c5SKlaus Jensen error_setg(errp, 44249ad39c5SKlaus Jensen "zone descriptor extension size is too large"); 44349ad39c5SKlaus Jensen return -1; 44449ad39c5SKlaus Jensen } 44549ad39c5SKlaus Jensen } 446e321b4cdSKlaus Jensen 447e321b4cdSKlaus Jensen if (ns->params.zrwas) { 448e321b4cdSKlaus Jensen if (ns->params.zrwas % ns->blkconf.logical_block_size) { 449e321b4cdSKlaus Jensen error_setg(errp, "zone random write area size (zoned.zrwas " 450e321b4cdSKlaus Jensen "%"PRIu64") must be a multiple of the logical " 451e321b4cdSKlaus Jensen "block size (logical_block_size %"PRIu32")", 452e321b4cdSKlaus Jensen ns->params.zrwas, ns->blkconf.logical_block_size); 453e321b4cdSKlaus Jensen return -1; 454e321b4cdSKlaus Jensen } 455e321b4cdSKlaus Jensen 456e321b4cdSKlaus Jensen if (ns->params.zrwafg == -1) { 457e321b4cdSKlaus Jensen ns->params.zrwafg = ns->blkconf.logical_block_size; 458e321b4cdSKlaus Jensen } 459e321b4cdSKlaus Jensen 460e321b4cdSKlaus Jensen if (ns->params.zrwas % ns->params.zrwafg) { 461e321b4cdSKlaus Jensen error_setg(errp, "zone random write area size (zoned.zrwas " 462e321b4cdSKlaus Jensen "%"PRIu64") must be a multiple of the zone random " 463e321b4cdSKlaus Jensen "write area flush granularity (zoned.zrwafg, " 464e321b4cdSKlaus Jensen "%"PRIu64")", ns->params.zrwas, ns->params.zrwafg); 465e321b4cdSKlaus Jensen return -1; 466e321b4cdSKlaus Jensen } 467e321b4cdSKlaus Jensen 468e321b4cdSKlaus Jensen if (ns->params.max_active_zones) { 469e321b4cdSKlaus Jensen if (ns->params.numzrwa > ns->params.max_active_zones) { 470e321b4cdSKlaus Jensen error_setg(errp, "number of zone random write area " 471e321b4cdSKlaus Jensen "resources (zoned.numzrwa, %d) must be less " 472e321b4cdSKlaus Jensen "than or equal to maximum active resources " 473e321b4cdSKlaus Jensen "(zoned.max_active_zones, %d)", 474e321b4cdSKlaus Jensen ns->params.numzrwa, 475e321b4cdSKlaus Jensen ns->params.max_active_zones); 476e321b4cdSKlaus Jensen return -1; 477e321b4cdSKlaus Jensen } 478e321b4cdSKlaus Jensen } 479e321b4cdSKlaus Jensen } 48049ad39c5SKlaus Jensen } 48149ad39c5SKlaus Jensen 4827f0f1aceSKlaus Jensen return 0; 4837f0f1aceSKlaus Jensen } 4847f0f1aceSKlaus Jensen 4855e4f6bccSKlaus Jensen int nvme_ns_setup(NvmeNamespace *ns, Error **errp) 4867f0f1aceSKlaus Jensen { 4875e4f6bccSKlaus Jensen if (nvme_ns_check_constraints(ns, errp)) { 4887f0f1aceSKlaus Jensen return -1; 4897f0f1aceSKlaus Jensen } 4907f0f1aceSKlaus Jensen 491337ccd76SMinwoo Im if (nvme_ns_init_blk(ns, errp)) { 4927f0f1aceSKlaus Jensen return -1; 4937f0f1aceSKlaus Jensen } 4947f0f1aceSKlaus Jensen 4952605257aSKlaus Jensen if (nvme_ns_init(ns, errp)) { 4962605257aSKlaus Jensen return -1; 4972605257aSKlaus Jensen } 498a479335bSDmitry Fomichev if (ns->params.zoned) { 499a479335bSDmitry Fomichev if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { 500a479335bSDmitry Fomichev return -1; 501a479335bSDmitry Fomichev } 5026a674bc2SMinwoo Im nvme_ns_init_zoned(ns); 503a479335bSDmitry Fomichev } 50454064e51SKlaus Jensen 5057f0f1aceSKlaus Jensen return 0; 5067f0f1aceSKlaus Jensen } 5077f0f1aceSKlaus Jensen 5087f0f1aceSKlaus Jensen void nvme_ns_drain(NvmeNamespace *ns) 5097f0f1aceSKlaus Jensen { 5107f0f1aceSKlaus Jensen blk_drain(ns->blkconf.blk); 5117f0f1aceSKlaus Jensen } 5127f0f1aceSKlaus Jensen 513ba69f224SDmitry Fomichev void nvme_ns_shutdown(NvmeNamespace *ns) 5147f0f1aceSKlaus Jensen { 5157f0f1aceSKlaus Jensen blk_flush(ns->blkconf.blk); 516a479335bSDmitry Fomichev if (ns->params.zoned) { 517a479335bSDmitry Fomichev nvme_zoned_ns_shutdown(ns); 518a479335bSDmitry Fomichev } 519a479335bSDmitry Fomichev } 520a479335bSDmitry Fomichev 521a479335bSDmitry Fomichev void nvme_ns_cleanup(NvmeNamespace *ns) 522a479335bSDmitry Fomichev { 523a479335bSDmitry Fomichev if (ns->params.zoned) { 524a479335bSDmitry Fomichev g_free(ns->id_ns_zoned); 525a479335bSDmitry Fomichev g_free(ns->zone_array); 5261a9290adSDmitry Fomichev g_free(ns->zd_extensions); 527a479335bSDmitry Fomichev } 5287f0f1aceSKlaus Jensen } 5297f0f1aceSKlaus Jensen 5305ffbaeedSKlaus Jensen static void nvme_ns_unrealize(DeviceState *dev) 5315ffbaeedSKlaus Jensen { 5325ffbaeedSKlaus Jensen NvmeNamespace *ns = NVME_NS(dev); 5335ffbaeedSKlaus Jensen 5345ffbaeedSKlaus Jensen nvme_ns_drain(ns); 5355ffbaeedSKlaus Jensen nvme_ns_shutdown(ns); 5365ffbaeedSKlaus Jensen nvme_ns_cleanup(ns); 5375ffbaeedSKlaus Jensen } 5385ffbaeedSKlaus Jensen 5397f0f1aceSKlaus Jensen static void nvme_ns_realize(DeviceState *dev, Error **errp) 5407f0f1aceSKlaus Jensen { 5417f0f1aceSKlaus Jensen NvmeNamespace *ns = NVME_NS(dev); 5427f0f1aceSKlaus Jensen BusState *s = qdev_get_parent_bus(dev); 5437f0f1aceSKlaus Jensen NvmeCtrl *n = NVME(s->parent); 544e5489356SKlaus Jensen NvmeSubsystem *subsys = n->subsys; 545e5489356SKlaus Jensen uint32_t nsid = ns->params.nsid; 546e5489356SKlaus Jensen int i; 5477f0f1aceSKlaus Jensen 5485e4f6bccSKlaus Jensen if (!n->subsys) { 549dfa82ac2SNiklas Cassel /* If no subsys, the ns cannot be attached to more than one ctrl. */ 550dfa82ac2SNiklas Cassel ns->params.shared = false; 5515e4f6bccSKlaus Jensen if (ns->params.detached) { 5525e4f6bccSKlaus Jensen error_setg(errp, "detached requires that the nvme device is " 5535e4f6bccSKlaus Jensen "linked to an nvme-subsys device"); 5545e4f6bccSKlaus Jensen return; 5555e4f6bccSKlaus Jensen } 5565ffbaeedSKlaus Jensen } else { 5575ffbaeedSKlaus Jensen /* 5585ffbaeedSKlaus Jensen * If this namespace belongs to a subsystem (through a link on the 5595ffbaeedSKlaus Jensen * controller device), reparent the device. 5605ffbaeedSKlaus Jensen */ 5615ffbaeedSKlaus Jensen if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { 5625ffbaeedSKlaus Jensen return; 5635ffbaeedSKlaus Jensen } 564*534a93d3SNiklas Cassel ns->subsys = subsys; 5655e4f6bccSKlaus Jensen } 5665e4f6bccSKlaus Jensen 5675e4f6bccSKlaus Jensen if (nvme_ns_setup(ns, errp)) { 5687f0f1aceSKlaus Jensen return; 5697f0f1aceSKlaus Jensen } 57015d024d4SMinwoo Im 571e5489356SKlaus Jensen if (!nsid) { 572e5489356SKlaus Jensen for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { 573e5489356SKlaus Jensen if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { 574e5489356SKlaus Jensen continue; 575e5489356SKlaus Jensen } 576e5489356SKlaus Jensen 577e5489356SKlaus Jensen nsid = ns->params.nsid = i; 578e5489356SKlaus Jensen break; 579e5489356SKlaus Jensen } 580e5489356SKlaus Jensen 581e5489356SKlaus Jensen if (!nsid) { 582e5489356SKlaus Jensen error_setg(errp, "no free namespace id"); 583e5707685SMinwoo Im return; 584e5707685SMinwoo Im } 585e5707685SMinwoo Im } else { 586e5489356SKlaus Jensen if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { 587e5489356SKlaus Jensen error_setg(errp, "namespace id '%d' already allocated", nsid); 58815d024d4SMinwoo Im return; 58915d024d4SMinwoo Im } 590e5707685SMinwoo Im } 591e5489356SKlaus Jensen 592e5489356SKlaus Jensen if (subsys) { 593e5489356SKlaus Jensen subsys->namespaces[nsid] = ns; 594e5489356SKlaus Jensen 595e5489356SKlaus Jensen if (ns->params.detached) { 596e5489356SKlaus Jensen return; 597e5489356SKlaus Jensen } 598e5489356SKlaus Jensen 599e5489356SKlaus Jensen if (ns->params.shared) { 600e5489356SKlaus Jensen for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { 601e5489356SKlaus Jensen NvmeCtrl *ctrl = subsys->ctrls[i]; 602e5489356SKlaus Jensen 60399f48ae7SLukasz Maniak if (ctrl && ctrl != SUBSYS_SLOT_RSVD) { 604e5489356SKlaus Jensen nvme_attach_ns(ctrl, ns); 605e5489356SKlaus Jensen } 606e5489356SKlaus Jensen } 607e5489356SKlaus Jensen 608e5489356SKlaus Jensen return; 609e5489356SKlaus Jensen } 610e5489356SKlaus Jensen } 611e5489356SKlaus Jensen 612e5489356SKlaus Jensen nvme_attach_ns(n, ns); 6137f0f1aceSKlaus Jensen } 6147f0f1aceSKlaus Jensen 6157f0f1aceSKlaus Jensen static Property nvme_ns_props[] = { 6167f0f1aceSKlaus Jensen DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), 617037953b5SMinwoo Im DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), 618916b0f0bSKlaus Jensen DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), 6197f0f1aceSKlaus Jensen DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), 620bd9f371cSKlaus Jensen DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid), 6216870cfb8SHeinrich Schuchardt DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), 622bc3a65e9SKlaus Jensen DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), 623bc3a65e9SKlaus Jensen DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), 624146f720cSKlaus Jensen DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), 625146f720cSKlaus Jensen DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), 62644219b60SNaveen Nagar DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0), 627e4e430b3SKlaus Jensen DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), 628e4e430b3SKlaus Jensen DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), 629e4e430b3SKlaus Jensen DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), 630a479335bSDmitry Fomichev DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), 631a479335bSDmitry Fomichev DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, 632a479335bSDmitry Fomichev NVME_DEFAULT_ZONE_SIZE), 633a479335bSDmitry Fomichev DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, 634a479335bSDmitry Fomichev 0), 635a479335bSDmitry Fomichev DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, 636a479335bSDmitry Fomichev params.cross_zone_read, false), 6378d18ddcdSDmitry Fomichev DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, 6388d18ddcdSDmitry Fomichev params.max_active_zones, 0), 6398d18ddcdSDmitry Fomichev DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, 6408d18ddcdSDmitry Fomichev params.max_open_zones, 0), 6411a9290adSDmitry Fomichev DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, 6421a9290adSDmitry Fomichev params.zd_extension_size, 0), 643e321b4cdSKlaus Jensen DEFINE_PROP_UINT32("zoned.numzrwa", NvmeNamespace, params.numzrwa, 0), 644e321b4cdSKlaus Jensen DEFINE_PROP_SIZE("zoned.zrwas", NvmeNamespace, params.zrwas, 0), 645e321b4cdSKlaus Jensen DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), 6463276dde4SHeinrich Schuchardt DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, 64736d83272SKlaus Jensen false), 6487f0f1aceSKlaus Jensen DEFINE_PROP_END_OF_LIST(), 6497f0f1aceSKlaus Jensen }; 6507f0f1aceSKlaus Jensen 6517f0f1aceSKlaus Jensen static void nvme_ns_class_init(ObjectClass *oc, void *data) 6527f0f1aceSKlaus Jensen { 6537f0f1aceSKlaus Jensen DeviceClass *dc = DEVICE_CLASS(oc); 6547f0f1aceSKlaus Jensen 6557f0f1aceSKlaus Jensen set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 6567f0f1aceSKlaus Jensen 6577f0f1aceSKlaus Jensen dc->bus_type = TYPE_NVME_BUS; 6587f0f1aceSKlaus Jensen dc->realize = nvme_ns_realize; 6595ffbaeedSKlaus Jensen dc->unrealize = nvme_ns_unrealize; 6607f0f1aceSKlaus Jensen device_class_set_props(dc, nvme_ns_props); 6617f0f1aceSKlaus Jensen dc->desc = "Virtual NVMe namespace"; 6627f0f1aceSKlaus Jensen } 6637f0f1aceSKlaus Jensen 6647f0f1aceSKlaus Jensen static void nvme_ns_instance_init(Object *obj) 6657f0f1aceSKlaus Jensen { 6667f0f1aceSKlaus Jensen NvmeNamespace *ns = NVME_NS(obj); 6677f0f1aceSKlaus Jensen char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); 6687f0f1aceSKlaus Jensen 6697f0f1aceSKlaus Jensen device_add_bootindex_property(obj, &ns->bootindex, "bootindex", 6707f0f1aceSKlaus Jensen bootindex, DEVICE(obj)); 6717f0f1aceSKlaus Jensen 6727f0f1aceSKlaus Jensen g_free(bootindex); 6737f0f1aceSKlaus Jensen } 6747f0f1aceSKlaus Jensen 6757f0f1aceSKlaus Jensen static const TypeInfo nvme_ns_info = { 6767f0f1aceSKlaus Jensen .name = TYPE_NVME_NS, 6777f0f1aceSKlaus Jensen .parent = TYPE_DEVICE, 6787f0f1aceSKlaus Jensen .class_init = nvme_ns_class_init, 6797f0f1aceSKlaus Jensen .instance_size = sizeof(NvmeNamespace), 6807f0f1aceSKlaus Jensen .instance_init = nvme_ns_instance_init, 6817f0f1aceSKlaus Jensen }; 6827f0f1aceSKlaus Jensen 6837f0f1aceSKlaus Jensen static void nvme_ns_register_types(void) 6847f0f1aceSKlaus Jensen { 6857f0f1aceSKlaus Jensen type_register_static(&nvme_ns_info); 6867f0f1aceSKlaus Jensen } 6877f0f1aceSKlaus Jensen 6887f0f1aceSKlaus Jensen type_init(nvme_ns_register_types) 689