17f0f1aceSKlaus Jensen /* 27f0f1aceSKlaus Jensen * QEMU NVM Express Virtual Namespace 37f0f1aceSKlaus Jensen * 47f0f1aceSKlaus Jensen * Copyright (c) 2019 CNEX Labs 57f0f1aceSKlaus Jensen * Copyright (c) 2020 Samsung Electronics 67f0f1aceSKlaus Jensen * 77f0f1aceSKlaus Jensen * Authors: 87f0f1aceSKlaus Jensen * Klaus Jensen <k.jensen@samsung.com> 97f0f1aceSKlaus Jensen * 107f0f1aceSKlaus Jensen * This work is licensed under the terms of the GNU GPL, version 2. See the 117f0f1aceSKlaus Jensen * COPYING file in the top-level directory. 127f0f1aceSKlaus Jensen * 137f0f1aceSKlaus Jensen */ 147f0f1aceSKlaus Jensen 157f0f1aceSKlaus Jensen #include "qemu/osdep.h" 167f0f1aceSKlaus Jensen #include "qemu/units.h" 171b5804a8SKlaus Jensen #include "qemu/error-report.h" 187ef37c1cSKlaus Jensen #include "qapi/error.h" 197f0f1aceSKlaus Jensen #include "sysemu/sysemu.h" 207f0f1aceSKlaus Jensen #include "sysemu/block-backend.h" 217f0f1aceSKlaus Jensen 227f0f1aceSKlaus Jensen #include "nvme.h" 237ef37c1cSKlaus Jensen #include "trace.h" 247f0f1aceSKlaus Jensen 252605257aSKlaus Jensen #define MIN_DISCARD_GRANULARITY (4 * KiB) 26de482d1fSKlaus Jensen #define NVME_DEFAULT_ZONE_SIZE (128 * MiB) 272605257aSKlaus Jensen 28dc04d25eSMinwoo Im void nvme_ns_init_format(NvmeNamespace *ns) 29516990f4SKlaus Jensen { 30516990f4SKlaus Jensen NvmeIdNs *id_ns = &ns->id_ns; 31516990f4SKlaus Jensen BlockDriverInfo bdi; 32516990f4SKlaus Jensen int npdg, nlbas, ret; 33516990f4SKlaus Jensen 346146f3ddSKlaus Jensen ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; 356146f3ddSKlaus Jensen ns->lbasz = 1 << ns->lbaf.ds; 366146f3ddSKlaus Jensen 376146f3ddSKlaus Jensen nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); 38516990f4SKlaus Jensen 39516990f4SKlaus Jensen id_ns->nsze = cpu_to_le64(nlbas); 40516990f4SKlaus Jensen 41516990f4SKlaus Jensen /* no thin provisioning */ 42516990f4SKlaus Jensen id_ns->ncap = id_ns->nsze; 43516990f4SKlaus Jensen id_ns->nuse = id_ns->ncap; 44516990f4SKlaus Jensen 45*3ef73f94SKlaus Jensen ns->moff = (int64_t)nlbas << ns->lbaf.ds; 46516990f4SKlaus Jensen 476146f3ddSKlaus Jensen npdg = ns->blkconf.discard_granularity / ns->lbasz; 48516990f4SKlaus Jensen 49516990f4SKlaus Jensen ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); 50516990f4SKlaus Jensen if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { 516146f3ddSKlaus Jensen npdg = bdi.cluster_size / ns->lbasz; 52516990f4SKlaus Jensen } 53516990f4SKlaus Jensen 54516990f4SKlaus Jensen id_ns->npda = id_ns->npdg = npdg - 1; 55516990f4SKlaus Jensen } 56516990f4SKlaus Jensen 572605257aSKlaus Jensen static int nvme_ns_init(NvmeNamespace *ns, Error **errp) 587f0f1aceSKlaus Jensen { 597f0f1aceSKlaus Jensen NvmeIdNs *id_ns = &ns->id_ns; 606a674bc2SMinwoo Im uint8_t ds; 616a674bc2SMinwoo Im uint16_t ms; 626a674bc2SMinwoo Im int i; 637f0f1aceSKlaus Jensen 64516990f4SKlaus Jensen ns->csi = NVME_CSI_NVM; 65dc04d25eSMinwoo Im ns->status = 0x0; 66516990f4SKlaus Jensen 67146f720cSKlaus Jensen ns->id_ns.dlfeat = 0x1; 687f0f1aceSKlaus Jensen 69516990f4SKlaus Jensen /* support DULBE and I/O optimization fields */ 70516990f4SKlaus Jensen id_ns->nsfeat |= (0x4 | 0x10); 71516990f4SKlaus Jensen 72e5489356SKlaus Jensen if (ns->params.shared) { 73516990f4SKlaus Jensen id_ns->nmic |= NVME_NMIC_NS_SHARED; 74516990f4SKlaus Jensen } 75516990f4SKlaus Jensen 76516990f4SKlaus Jensen /* simple copy */ 77516990f4SKlaus Jensen id_ns->mssrl = cpu_to_le16(ns->params.mssrl); 78516990f4SKlaus Jensen id_ns->mcl = cpu_to_le32(ns->params.mcl); 79516990f4SKlaus Jensen id_ns->msrc = ns->params.msrc; 80516990f4SKlaus Jensen 816a674bc2SMinwoo Im ds = 31 - clz32(ns->blkconf.logical_block_size); 826a674bc2SMinwoo Im ms = ns->params.ms; 837f0f1aceSKlaus Jensen 84bc3a65e9SKlaus Jensen if (ns->params.ms) { 85bc3a65e9SKlaus Jensen id_ns->mc = 0x3; 86bc3a65e9SKlaus Jensen 87bc3a65e9SKlaus Jensen if (ns->params.mset) { 88bc3a65e9SKlaus Jensen id_ns->flbas |= 0x10; 89bc3a65e9SKlaus Jensen } 90146f720cSKlaus Jensen 91146f720cSKlaus Jensen id_ns->dpc = 0x1f; 92146f720cSKlaus Jensen id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; 936a674bc2SMinwoo Im 946a674bc2SMinwoo Im NvmeLBAF lbaf[16] = { 956a674bc2SMinwoo Im [0] = { .ds = 9 }, 966a674bc2SMinwoo Im [1] = { .ds = 9, .ms = 8 }, 976a674bc2SMinwoo Im [2] = { .ds = 9, .ms = 16 }, 986a674bc2SMinwoo Im [3] = { .ds = 9, .ms = 64 }, 996a674bc2SMinwoo Im [4] = { .ds = 12 }, 1006a674bc2SMinwoo Im [5] = { .ds = 12, .ms = 8 }, 1016a674bc2SMinwoo Im [6] = { .ds = 12, .ms = 16 }, 1026a674bc2SMinwoo Im [7] = { .ds = 12, .ms = 64 }, 1036a674bc2SMinwoo Im }; 1046a674bc2SMinwoo Im 1056a674bc2SMinwoo Im memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); 1066a674bc2SMinwoo Im id_ns->nlbaf = 7; 1076a674bc2SMinwoo Im } else { 1086a674bc2SMinwoo Im NvmeLBAF lbaf[16] = { 1096a674bc2SMinwoo Im [0] = { .ds = 9 }, 1106a674bc2SMinwoo Im [1] = { .ds = 12 }, 1116a674bc2SMinwoo Im }; 1126a674bc2SMinwoo Im 1136a674bc2SMinwoo Im memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); 1146a674bc2SMinwoo Im id_ns->nlbaf = 1; 115bc3a65e9SKlaus Jensen } 116bc3a65e9SKlaus Jensen 1176a674bc2SMinwoo Im for (i = 0; i <= id_ns->nlbaf; i++) { 1186a674bc2SMinwoo Im NvmeLBAF *lbaf = &id_ns->lbaf[i]; 1196a674bc2SMinwoo Im if (lbaf->ds == ds) { 1206a674bc2SMinwoo Im if (lbaf->ms == ms) { 1216a674bc2SMinwoo Im id_ns->flbas |= i; 1226a674bc2SMinwoo Im goto lbaf_found; 1236a674bc2SMinwoo Im } 1246a674bc2SMinwoo Im } 1256a674bc2SMinwoo Im } 1266a674bc2SMinwoo Im 1276a674bc2SMinwoo Im /* add non-standard lba format */ 1286a674bc2SMinwoo Im id_ns->nlbaf++; 1296a674bc2SMinwoo Im id_ns->lbaf[id_ns->nlbaf].ds = ds; 1306a674bc2SMinwoo Im id_ns->lbaf[id_ns->nlbaf].ms = ms; 1316a674bc2SMinwoo Im id_ns->flbas |= id_ns->nlbaf; 1326a674bc2SMinwoo Im 1336a674bc2SMinwoo Im lbaf_found: 134516990f4SKlaus Jensen nvme_ns_init_format(ns); 135e4e430b3SKlaus Jensen 1362605257aSKlaus Jensen return 0; 1377f0f1aceSKlaus Jensen } 1387f0f1aceSKlaus Jensen 139337ccd76SMinwoo Im static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) 1407f0f1aceSKlaus Jensen { 14186b1cf32SKevin Wolf bool read_only; 14286b1cf32SKevin Wolf 1437f0f1aceSKlaus Jensen if (!blkconf_blocksizes(&ns->blkconf, errp)) { 1447f0f1aceSKlaus Jensen return -1; 1457f0f1aceSKlaus Jensen } 1467f0f1aceSKlaus Jensen 14786b1cf32SKevin Wolf read_only = !blk_supports_write_perm(ns->blkconf.blk); 14886b1cf32SKevin Wolf if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) { 1497f0f1aceSKlaus Jensen return -1; 1507f0f1aceSKlaus Jensen } 1517f0f1aceSKlaus Jensen 1522605257aSKlaus Jensen if (ns->blkconf.discard_granularity == -1) { 1532605257aSKlaus Jensen ns->blkconf.discard_granularity = 1542605257aSKlaus Jensen MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); 1552605257aSKlaus Jensen } 1562605257aSKlaus Jensen 1577f0f1aceSKlaus Jensen ns->size = blk_getlength(ns->blkconf.blk); 1587f0f1aceSKlaus Jensen if (ns->size < 0) { 1597f0f1aceSKlaus Jensen error_setg_errno(errp, -ns->size, "could not get blockdev size"); 1607f0f1aceSKlaus Jensen return -1; 1617f0f1aceSKlaus Jensen } 1627f0f1aceSKlaus Jensen 1637f0f1aceSKlaus Jensen return 0; 1647f0f1aceSKlaus Jensen } 1657f0f1aceSKlaus Jensen 166a479335bSDmitry Fomichev static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) 167a479335bSDmitry Fomichev { 168a479335bSDmitry Fomichev uint64_t zone_size, zone_cap; 169a479335bSDmitry Fomichev 170a479335bSDmitry Fomichev /* Make sure that the values of ZNS properties are sane */ 171a479335bSDmitry Fomichev if (ns->params.zone_size_bs) { 172a479335bSDmitry Fomichev zone_size = ns->params.zone_size_bs; 173a479335bSDmitry Fomichev } else { 174a479335bSDmitry Fomichev zone_size = NVME_DEFAULT_ZONE_SIZE; 175a479335bSDmitry Fomichev } 176a479335bSDmitry Fomichev if (ns->params.zone_cap_bs) { 177a479335bSDmitry Fomichev zone_cap = ns->params.zone_cap_bs; 178a479335bSDmitry Fomichev } else { 179a479335bSDmitry Fomichev zone_cap = zone_size; 180a479335bSDmitry Fomichev } 181a479335bSDmitry Fomichev if (zone_cap > zone_size) { 182a479335bSDmitry Fomichev error_setg(errp, "zone capacity %"PRIu64"B exceeds " 183a479335bSDmitry Fomichev "zone size %"PRIu64"B", zone_cap, zone_size); 184a479335bSDmitry Fomichev return -1; 185a479335bSDmitry Fomichev } 1866146f3ddSKlaus Jensen if (zone_size < ns->lbasz) { 187a479335bSDmitry Fomichev error_setg(errp, "zone size %"PRIu64"B too small, " 1886146f3ddSKlaus Jensen "must be at least %zuB", zone_size, ns->lbasz); 189a479335bSDmitry Fomichev return -1; 190a479335bSDmitry Fomichev } 1916146f3ddSKlaus Jensen if (zone_cap < ns->lbasz) { 192a479335bSDmitry Fomichev error_setg(errp, "zone capacity %"PRIu64"B too small, " 1936146f3ddSKlaus Jensen "must be at least %zuB", zone_cap, ns->lbasz); 194a479335bSDmitry Fomichev return -1; 195a479335bSDmitry Fomichev } 196a479335bSDmitry Fomichev 197a479335bSDmitry Fomichev /* 198a479335bSDmitry Fomichev * Save the main zone geometry values to avoid 199a479335bSDmitry Fomichev * calculating them later again. 200a479335bSDmitry Fomichev */ 2016146f3ddSKlaus Jensen ns->zone_size = zone_size / ns->lbasz; 2026146f3ddSKlaus Jensen ns->zone_capacity = zone_cap / ns->lbasz; 2036146f3ddSKlaus Jensen ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; 2048d18ddcdSDmitry Fomichev 2058d18ddcdSDmitry Fomichev /* Do a few more sanity checks of ZNS properties */ 206044f1876SMinwoo Im if (!ns->num_zones) { 207044f1876SMinwoo Im error_setg(errp, 208044f1876SMinwoo Im "insufficient drive capacity, must be at least the size " 209044f1876SMinwoo Im "of one zone (%"PRIu64"B)", zone_size); 210044f1876SMinwoo Im return -1; 211044f1876SMinwoo Im } 212044f1876SMinwoo Im 2138d18ddcdSDmitry Fomichev if (ns->params.max_open_zones > ns->num_zones) { 2148d18ddcdSDmitry Fomichev error_setg(errp, 2158d18ddcdSDmitry Fomichev "max_open_zones value %u exceeds the number of zones %u", 2168d18ddcdSDmitry Fomichev ns->params.max_open_zones, ns->num_zones); 2178d18ddcdSDmitry Fomichev return -1; 2188d18ddcdSDmitry Fomichev } 2198d18ddcdSDmitry Fomichev if (ns->params.max_active_zones > ns->num_zones) { 2208d18ddcdSDmitry Fomichev error_setg(errp, 2218d18ddcdSDmitry Fomichev "max_active_zones value %u exceeds the number of zones %u", 2228d18ddcdSDmitry Fomichev ns->params.max_active_zones, ns->num_zones); 2238d18ddcdSDmitry Fomichev return -1; 2248d18ddcdSDmitry Fomichev } 2258d18ddcdSDmitry Fomichev 2269ae39004SKlaus Jensen if (ns->params.max_active_zones) { 2279ae39004SKlaus Jensen if (ns->params.max_open_zones > ns->params.max_active_zones) { 2289ae39004SKlaus Jensen error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)", 2299ae39004SKlaus Jensen ns->params.max_open_zones, ns->params.max_active_zones); 2309ae39004SKlaus Jensen return -1; 2319ae39004SKlaus Jensen } 2329ae39004SKlaus Jensen 2339ae39004SKlaus Jensen if (!ns->params.max_open_zones) { 2349ae39004SKlaus Jensen ns->params.max_open_zones = ns->params.max_active_zones; 2359ae39004SKlaus Jensen } 2369ae39004SKlaus Jensen } 2379ae39004SKlaus Jensen 2381a9290adSDmitry Fomichev if (ns->params.zd_extension_size) { 2391a9290adSDmitry Fomichev if (ns->params.zd_extension_size & 0x3f) { 2401a9290adSDmitry Fomichev error_setg(errp, 2411a9290adSDmitry Fomichev "zone descriptor extension size must be a multiple of 64B"); 2421a9290adSDmitry Fomichev return -1; 2431a9290adSDmitry Fomichev } 2441a9290adSDmitry Fomichev if ((ns->params.zd_extension_size >> 6) > 0xff) { 2451a9290adSDmitry Fomichev error_setg(errp, "zone descriptor extension size is too large"); 2461a9290adSDmitry Fomichev return -1; 2471a9290adSDmitry Fomichev } 2481a9290adSDmitry Fomichev } 2491a9290adSDmitry Fomichev 250a479335bSDmitry Fomichev return 0; 251a479335bSDmitry Fomichev } 252a479335bSDmitry Fomichev 253a479335bSDmitry Fomichev static void nvme_ns_zoned_init_state(NvmeNamespace *ns) 254a479335bSDmitry Fomichev { 255a479335bSDmitry Fomichev uint64_t start = 0, zone_size = ns->zone_size; 256a479335bSDmitry Fomichev uint64_t capacity = ns->num_zones * zone_size; 257a479335bSDmitry Fomichev NvmeZone *zone; 258a479335bSDmitry Fomichev int i; 259a479335bSDmitry Fomichev 260a479335bSDmitry Fomichev ns->zone_array = g_new0(NvmeZone, ns->num_zones); 2611a9290adSDmitry Fomichev if (ns->params.zd_extension_size) { 2621a9290adSDmitry Fomichev ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * 2631a9290adSDmitry Fomichev ns->num_zones); 2641a9290adSDmitry Fomichev } 265a479335bSDmitry Fomichev 266a479335bSDmitry Fomichev QTAILQ_INIT(&ns->exp_open_zones); 267a479335bSDmitry Fomichev QTAILQ_INIT(&ns->imp_open_zones); 268a479335bSDmitry Fomichev QTAILQ_INIT(&ns->closed_zones); 269a479335bSDmitry Fomichev QTAILQ_INIT(&ns->full_zones); 270a479335bSDmitry Fomichev 271a479335bSDmitry Fomichev zone = ns->zone_array; 272a479335bSDmitry Fomichev for (i = 0; i < ns->num_zones; i++, zone++) { 273a479335bSDmitry Fomichev if (start + zone_size > capacity) { 274a479335bSDmitry Fomichev zone_size = capacity - start; 275a479335bSDmitry Fomichev } 276a479335bSDmitry Fomichev zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; 277a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); 278a479335bSDmitry Fomichev zone->d.za = 0; 279a479335bSDmitry Fomichev zone->d.zcap = ns->zone_capacity; 280a479335bSDmitry Fomichev zone->d.zslba = start; 281a479335bSDmitry Fomichev zone->d.wp = start; 282a479335bSDmitry Fomichev zone->w_ptr = start; 283a479335bSDmitry Fomichev start += zone_size; 284a479335bSDmitry Fomichev } 285a479335bSDmitry Fomichev 286a479335bSDmitry Fomichev ns->zone_size_log2 = 0; 287a479335bSDmitry Fomichev if (is_power_of_2(ns->zone_size)) { 288a479335bSDmitry Fomichev ns->zone_size_log2 = 63 - clz64(ns->zone_size); 289a479335bSDmitry Fomichev } 290a479335bSDmitry Fomichev } 291a479335bSDmitry Fomichev 2926a674bc2SMinwoo Im static void nvme_ns_init_zoned(NvmeNamespace *ns) 293a479335bSDmitry Fomichev { 294a479335bSDmitry Fomichev NvmeIdNsZoned *id_ns_z; 2956a674bc2SMinwoo Im int i; 296a479335bSDmitry Fomichev 297a479335bSDmitry Fomichev nvme_ns_zoned_init_state(ns); 298a479335bSDmitry Fomichev 299a479335bSDmitry Fomichev id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); 300a479335bSDmitry Fomichev 301312c3531SGollu Appalanaidu /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ 3028d18ddcdSDmitry Fomichev id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); 3038d18ddcdSDmitry Fomichev id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); 304a479335bSDmitry Fomichev id_ns_z->zoc = 0; 305a479335bSDmitry Fomichev id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00; 306a479335bSDmitry Fomichev 3076a674bc2SMinwoo Im for (i = 0; i <= ns->id_ns.nlbaf; i++) { 3086a674bc2SMinwoo Im id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); 3096a674bc2SMinwoo Im id_ns_z->lbafe[i].zdes = 3101a9290adSDmitry Fomichev ns->params.zd_extension_size >> 6; /* Units of 64B */ 3116a674bc2SMinwoo Im } 312a479335bSDmitry Fomichev 313a479335bSDmitry Fomichev ns->csi = NVME_CSI_ZONED; 314a479335bSDmitry Fomichev ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); 315a479335bSDmitry Fomichev ns->id_ns.ncap = ns->id_ns.nsze; 316a479335bSDmitry Fomichev ns->id_ns.nuse = ns->id_ns.ncap; 317a479335bSDmitry Fomichev 3181b5804a8SKlaus Jensen /* 3191b5804a8SKlaus Jensen * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" 3201b5804a8SKlaus Jensen * status of logical blocks. Since the spec defines that logical blocks 3211b5804a8SKlaus Jensen * SHALL be deallocated when then zone is in the Empty or Offline states, 3221b5804a8SKlaus Jensen * we can only support DULBE if the zone size is a multiple of the 3231b5804a8SKlaus Jensen * calculated NPDG. 3241b5804a8SKlaus Jensen */ 3251b5804a8SKlaus Jensen if (ns->zone_size % (ns->id_ns.npdg + 1)) { 3261b5804a8SKlaus Jensen warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " 3271b5804a8SKlaus Jensen "the calculated deallocation granularity (%d blocks); " 3281b5804a8SKlaus Jensen "DULBE support disabled", 3291b5804a8SKlaus Jensen ns->zone_size, ns->id_ns.npdg + 1); 3301b5804a8SKlaus Jensen 3311b5804a8SKlaus Jensen ns->id_ns.nsfeat &= ~0x4; 3321b5804a8SKlaus Jensen } 3331b5804a8SKlaus Jensen 334a479335bSDmitry Fomichev ns->id_ns_zoned = id_ns_z; 335a479335bSDmitry Fomichev } 336a479335bSDmitry Fomichev 337a479335bSDmitry Fomichev static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) 338a479335bSDmitry Fomichev { 339a479335bSDmitry Fomichev uint8_t state; 340a479335bSDmitry Fomichev 341a479335bSDmitry Fomichev zone->w_ptr = zone->d.wp; 342a479335bSDmitry Fomichev state = nvme_get_zone_state(zone); 3431a9290adSDmitry Fomichev if (zone->d.wp != zone->d.zslba || 3441a9290adSDmitry Fomichev (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { 345a479335bSDmitry Fomichev if (state != NVME_ZONE_STATE_CLOSED) { 346a479335bSDmitry Fomichev trace_pci_nvme_clear_ns_close(state, zone->d.zslba); 347a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); 348a479335bSDmitry Fomichev } 3498d18ddcdSDmitry Fomichev nvme_aor_inc_active(ns); 350a479335bSDmitry Fomichev QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); 351a479335bSDmitry Fomichev } else { 352a479335bSDmitry Fomichev trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); 353a479335bSDmitry Fomichev nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); 354a479335bSDmitry Fomichev } 355a479335bSDmitry Fomichev } 356a479335bSDmitry Fomichev 357a479335bSDmitry Fomichev /* 358a479335bSDmitry Fomichev * Close all the zones that are currently open. 359a479335bSDmitry Fomichev */ 360a479335bSDmitry Fomichev static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) 361a479335bSDmitry Fomichev { 362a479335bSDmitry Fomichev NvmeZone *zone, *next; 363a479335bSDmitry Fomichev 364a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { 365a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->closed_zones, zone, entry); 3668d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 367a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 368a479335bSDmitry Fomichev } 369a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { 370a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); 3718d18ddcdSDmitry Fomichev nvme_aor_dec_open(ns); 3728d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 373a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 374a479335bSDmitry Fomichev } 375a479335bSDmitry Fomichev QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { 376a479335bSDmitry Fomichev QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); 3778d18ddcdSDmitry Fomichev nvme_aor_dec_open(ns); 3788d18ddcdSDmitry Fomichev nvme_aor_dec_active(ns); 379a479335bSDmitry Fomichev nvme_clear_zone(ns, zone); 380a479335bSDmitry Fomichev } 3818d18ddcdSDmitry Fomichev 3828d18ddcdSDmitry Fomichev assert(ns->nr_open_zones == 0); 383a479335bSDmitry Fomichev } 384a479335bSDmitry Fomichev 385e5489356SKlaus Jensen static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, 386e5489356SKlaus Jensen Error **errp) 3877f0f1aceSKlaus Jensen { 3887f0f1aceSKlaus Jensen if (!ns->blkconf.blk) { 3897f0f1aceSKlaus Jensen error_setg(errp, "block backend not configured"); 3907f0f1aceSKlaus Jensen return -1; 3917f0f1aceSKlaus Jensen } 3927f0f1aceSKlaus Jensen 3935ad7d017SKlaus Jensen if (ns->params.pi && ns->params.ms < 8) { 394146f720cSKlaus Jensen error_setg(errp, "at least 8 bytes of metadata required to enable " 395146f720cSKlaus Jensen "protection information"); 396146f720cSKlaus Jensen return -1; 397146f720cSKlaus Jensen } 398146f720cSKlaus Jensen 399e5489356SKlaus Jensen if (ns->params.nsid > NVME_MAX_NAMESPACES) { 400e5489356SKlaus Jensen error_setg(errp, "invalid namespace id (must be between 0 and %d)", 401e5489356SKlaus Jensen NVME_MAX_NAMESPACES); 402e5489356SKlaus Jensen return -1; 403e5489356SKlaus Jensen } 404e5489356SKlaus Jensen 405e5489356SKlaus Jensen if (!n->subsys) { 406e5489356SKlaus Jensen if (ns->params.detached) { 407e5489356SKlaus Jensen error_setg(errp, "detached requires that the nvme device is " 408e5489356SKlaus Jensen "linked to an nvme-subsys device"); 409e5489356SKlaus Jensen return -1; 410e5489356SKlaus Jensen } 411e5489356SKlaus Jensen 412e5489356SKlaus Jensen if (ns->params.shared) { 413e5489356SKlaus Jensen error_setg(errp, "shared requires that the nvme device is " 414e5489356SKlaus Jensen "linked to an nvme-subsys device"); 415e5489356SKlaus Jensen return -1; 416e5489356SKlaus Jensen } 417e5489356SKlaus Jensen } 418e5489356SKlaus Jensen 4197f0f1aceSKlaus Jensen return 0; 4207f0f1aceSKlaus Jensen } 4217f0f1aceSKlaus Jensen 422e5489356SKlaus Jensen int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) 4237f0f1aceSKlaus Jensen { 424e5489356SKlaus Jensen if (nvme_ns_check_constraints(n, ns, errp)) { 4257f0f1aceSKlaus Jensen return -1; 4267f0f1aceSKlaus Jensen } 4277f0f1aceSKlaus Jensen 428337ccd76SMinwoo Im if (nvme_ns_init_blk(ns, errp)) { 4297f0f1aceSKlaus Jensen return -1; 4307f0f1aceSKlaus Jensen } 4317f0f1aceSKlaus Jensen 4322605257aSKlaus Jensen if (nvme_ns_init(ns, errp)) { 4332605257aSKlaus Jensen return -1; 4342605257aSKlaus Jensen } 435a479335bSDmitry Fomichev if (ns->params.zoned) { 436a479335bSDmitry Fomichev if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { 437a479335bSDmitry Fomichev return -1; 438a479335bSDmitry Fomichev } 4396a674bc2SMinwoo Im nvme_ns_init_zoned(ns); 440a479335bSDmitry Fomichev } 44154064e51SKlaus Jensen 4427f0f1aceSKlaus Jensen return 0; 4437f0f1aceSKlaus Jensen } 4447f0f1aceSKlaus Jensen 4457f0f1aceSKlaus Jensen void nvme_ns_drain(NvmeNamespace *ns) 4467f0f1aceSKlaus Jensen { 4477f0f1aceSKlaus Jensen blk_drain(ns->blkconf.blk); 4487f0f1aceSKlaus Jensen } 4497f0f1aceSKlaus Jensen 450ba69f224SDmitry Fomichev void nvme_ns_shutdown(NvmeNamespace *ns) 4517f0f1aceSKlaus Jensen { 4527f0f1aceSKlaus Jensen blk_flush(ns->blkconf.blk); 453a479335bSDmitry Fomichev if (ns->params.zoned) { 454a479335bSDmitry Fomichev nvme_zoned_ns_shutdown(ns); 455a479335bSDmitry Fomichev } 456a479335bSDmitry Fomichev } 457a479335bSDmitry Fomichev 458a479335bSDmitry Fomichev void nvme_ns_cleanup(NvmeNamespace *ns) 459a479335bSDmitry Fomichev { 460a479335bSDmitry Fomichev if (ns->params.zoned) { 461a479335bSDmitry Fomichev g_free(ns->id_ns_zoned); 462a479335bSDmitry Fomichev g_free(ns->zone_array); 4631a9290adSDmitry Fomichev g_free(ns->zd_extensions); 464a479335bSDmitry Fomichev } 4657f0f1aceSKlaus Jensen } 4667f0f1aceSKlaus Jensen 4677f0f1aceSKlaus Jensen static void nvme_ns_realize(DeviceState *dev, Error **errp) 4687f0f1aceSKlaus Jensen { 4697f0f1aceSKlaus Jensen NvmeNamespace *ns = NVME_NS(dev); 4707f0f1aceSKlaus Jensen BusState *s = qdev_get_parent_bus(dev); 4717f0f1aceSKlaus Jensen NvmeCtrl *n = NVME(s->parent); 472e5489356SKlaus Jensen NvmeSubsystem *subsys = n->subsys; 473e5489356SKlaus Jensen uint32_t nsid = ns->params.nsid; 474e5489356SKlaus Jensen int i; 4757f0f1aceSKlaus Jensen 476e5489356SKlaus Jensen if (nvme_ns_setup(n, ns, errp)) { 4777f0f1aceSKlaus Jensen return; 4787f0f1aceSKlaus Jensen } 47915d024d4SMinwoo Im 480e5489356SKlaus Jensen if (!nsid) { 481e5489356SKlaus Jensen for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { 482e5489356SKlaus Jensen if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { 483e5489356SKlaus Jensen continue; 484e5489356SKlaus Jensen } 485e5489356SKlaus Jensen 486e5489356SKlaus Jensen nsid = ns->params.nsid = i; 487e5489356SKlaus Jensen break; 488e5489356SKlaus Jensen } 489e5489356SKlaus Jensen 490e5489356SKlaus Jensen if (!nsid) { 491e5489356SKlaus Jensen error_setg(errp, "no free namespace id"); 492e5707685SMinwoo Im return; 493e5707685SMinwoo Im } 494e5707685SMinwoo Im } else { 495e5489356SKlaus Jensen if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { 496e5489356SKlaus Jensen error_setg(errp, "namespace id '%d' already allocated", nsid); 49715d024d4SMinwoo Im return; 49815d024d4SMinwoo Im } 499e5707685SMinwoo Im } 500e5489356SKlaus Jensen 501e5489356SKlaus Jensen if (subsys) { 502e5489356SKlaus Jensen subsys->namespaces[nsid] = ns; 503e5489356SKlaus Jensen 504e5489356SKlaus Jensen if (ns->params.detached) { 505e5489356SKlaus Jensen return; 506e5489356SKlaus Jensen } 507e5489356SKlaus Jensen 508e5489356SKlaus Jensen if (ns->params.shared) { 509e5489356SKlaus Jensen for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { 510e5489356SKlaus Jensen NvmeCtrl *ctrl = subsys->ctrls[i]; 511e5489356SKlaus Jensen 512e5489356SKlaus Jensen if (ctrl) { 513e5489356SKlaus Jensen nvme_attach_ns(ctrl, ns); 514e5489356SKlaus Jensen } 515e5489356SKlaus Jensen } 516e5489356SKlaus Jensen 517e5489356SKlaus Jensen return; 518e5489356SKlaus Jensen } 519e5489356SKlaus Jensen } 520e5489356SKlaus Jensen 521e5489356SKlaus Jensen nvme_attach_ns(n, ns); 5227f0f1aceSKlaus Jensen } 5237f0f1aceSKlaus Jensen 5247f0f1aceSKlaus Jensen static Property nvme_ns_props[] = { 5257f0f1aceSKlaus Jensen DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), 526037953b5SMinwoo Im DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), 527e5489356SKlaus Jensen DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false), 5287f0f1aceSKlaus Jensen DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), 529b52f26cdSDmitry Fomichev DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), 530bc3a65e9SKlaus Jensen DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), 531bc3a65e9SKlaus Jensen DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), 532146f720cSKlaus Jensen DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), 533146f720cSKlaus Jensen DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), 534e4e430b3SKlaus Jensen DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), 535e4e430b3SKlaus Jensen DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), 536e4e430b3SKlaus Jensen DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), 537a479335bSDmitry Fomichev DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), 538a479335bSDmitry Fomichev DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, 539a479335bSDmitry Fomichev NVME_DEFAULT_ZONE_SIZE), 540a479335bSDmitry Fomichev DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, 541a479335bSDmitry Fomichev 0), 542a479335bSDmitry Fomichev DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, 543a479335bSDmitry Fomichev params.cross_zone_read, false), 5448d18ddcdSDmitry Fomichev DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, 5458d18ddcdSDmitry Fomichev params.max_active_zones, 0), 5468d18ddcdSDmitry Fomichev DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, 5478d18ddcdSDmitry Fomichev params.max_open_zones, 0), 5481a9290adSDmitry Fomichev DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, 5491a9290adSDmitry Fomichev params.zd_extension_size, 0), 5507f0f1aceSKlaus Jensen DEFINE_PROP_END_OF_LIST(), 5517f0f1aceSKlaus Jensen }; 5527f0f1aceSKlaus Jensen 5537f0f1aceSKlaus Jensen static void nvme_ns_class_init(ObjectClass *oc, void *data) 5547f0f1aceSKlaus Jensen { 5557f0f1aceSKlaus Jensen DeviceClass *dc = DEVICE_CLASS(oc); 5567f0f1aceSKlaus Jensen 5577f0f1aceSKlaus Jensen set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); 5587f0f1aceSKlaus Jensen 5597f0f1aceSKlaus Jensen dc->bus_type = TYPE_NVME_BUS; 5607f0f1aceSKlaus Jensen dc->realize = nvme_ns_realize; 5617f0f1aceSKlaus Jensen device_class_set_props(dc, nvme_ns_props); 5627f0f1aceSKlaus Jensen dc->desc = "Virtual NVMe namespace"; 5637f0f1aceSKlaus Jensen } 5647f0f1aceSKlaus Jensen 5657f0f1aceSKlaus Jensen static void nvme_ns_instance_init(Object *obj) 5667f0f1aceSKlaus Jensen { 5677f0f1aceSKlaus Jensen NvmeNamespace *ns = NVME_NS(obj); 5687f0f1aceSKlaus Jensen char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); 5697f0f1aceSKlaus Jensen 5707f0f1aceSKlaus Jensen device_add_bootindex_property(obj, &ns->bootindex, "bootindex", 5717f0f1aceSKlaus Jensen bootindex, DEVICE(obj)); 5727f0f1aceSKlaus Jensen 5737f0f1aceSKlaus Jensen g_free(bootindex); 5747f0f1aceSKlaus Jensen } 5757f0f1aceSKlaus Jensen 5767f0f1aceSKlaus Jensen static const TypeInfo nvme_ns_info = { 5777f0f1aceSKlaus Jensen .name = TYPE_NVME_NS, 5787f0f1aceSKlaus Jensen .parent = TYPE_DEVICE, 5797f0f1aceSKlaus Jensen .class_init = nvme_ns_class_init, 5807f0f1aceSKlaus Jensen .instance_size = sizeof(NvmeNamespace), 5817f0f1aceSKlaus Jensen .instance_init = nvme_ns_instance_init, 5827f0f1aceSKlaus Jensen }; 5837f0f1aceSKlaus Jensen 5847f0f1aceSKlaus Jensen static void nvme_ns_register_types(void) 5857f0f1aceSKlaus Jensen { 5867f0f1aceSKlaus Jensen type_register_static(&nvme_ns_info); 5877f0f1aceSKlaus Jensen } 5887f0f1aceSKlaus Jensen 5897f0f1aceSKlaus Jensen type_init(nvme_ns_register_types) 590