1585f8587Sbellard /* 2585f8587Sbellard * Block driver for the QCOW version 2 format 3585f8587Sbellard * 4585f8587Sbellard * Copyright (c) 2004-2006 Fabrice Bellard 5585f8587Sbellard * 6585f8587Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7585f8587Sbellard * of this software and associated documentation files (the "Software"), to deal 8585f8587Sbellard * in the Software without restriction, including without limitation the rights 9585f8587Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10585f8587Sbellard * copies of the Software, and to permit persons to whom the Software is 11585f8587Sbellard * furnished to do so, subject to the following conditions: 12585f8587Sbellard * 13585f8587Sbellard * The above copyright notice and this permission notice shall be included in 14585f8587Sbellard * all copies or substantial portions of the Software. 15585f8587Sbellard * 16585f8587Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17585f8587Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18585f8587Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19585f8587Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20585f8587Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21585f8587Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22585f8587Sbellard * THE SOFTWARE. 23585f8587Sbellard */ 24e688df6bSMarkus Armbruster 2580c71a24SPeter Maydell #include "qemu/osdep.h" 262714f13dSVladimir Sementsov-Ogievskiy 27609f45eaSMax Reitz #include "block/qdict.h" 2823588797SKevin Wolf #include "sysemu/block-backend.h" 29db725815SMarkus Armbruster #include "qemu/main-loop.h" 301de7afc9SPaolo Bonzini #include "qemu/module.h" 310d8c41daSMichael S. Tsirkin #include "qcow2.h" 321de7afc9SPaolo Bonzini #include "qemu/error-report.h" 33e688df6bSMarkus Armbruster #include "qapi/error.h" 349af23989SMarkus Armbruster #include "qapi/qapi-events-block-core.h" 356b673957SMarkus Armbruster #include "qapi/qmp/qdict.h" 366b673957SMarkus Armbruster #include "qapi/qmp/qstring.h" 373cce16f4SKevin Wolf #include "trace.h" 381bd0e2d1SChunyan Liu #include "qemu/option_int.h" 39f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 4058369e22SPaolo Bonzini #include "qemu/bswap.h" 415df022cfSPeter Maydell #include "qemu/memalign.h" 42b76b4f60SKevin Wolf #include "qapi/qobject-input-visitor.h" 43b76b4f60SKevin Wolf #include "qapi/qapi-visit-block-core.h" 440d8c41daSMichael S. Tsirkin #include "crypto.h" 45d710cf57SVladimir Sementsov-Ogievskiy #include "block/aio_task.h" 46585f8587Sbellard 47585f8587Sbellard /* 48585f8587Sbellard Differences with QCOW: 49585f8587Sbellard 50585f8587Sbellard - Support for multiple incremental snapshots. 51585f8587Sbellard - Memory management by reference counts. 52585f8587Sbellard - Clusters which have a reference count of one have the bit 53585f8587Sbellard QCOW_OFLAG_COPIED to optimize write performance. 54585f8587Sbellard - Size of compressed clusters is stored in sectors to reduce bit usage 55585f8587Sbellard in the cluster offsets. 56585f8587Sbellard - Support for storing additional data (such as the VM state) in the 57585f8587Sbellard snapshots. 58585f8587Sbellard - If a backing store is used, the cluster size is not constrained 59585f8587Sbellard (could be backported to QCOW). 60585f8587Sbellard - L2 tables have always a size of one cluster. 61585f8587Sbellard */ 62585f8587Sbellard 639b80ddf3Saliguori 649b80ddf3Saliguori typedef struct { 659b80ddf3Saliguori uint32_t magic; 669b80ddf3Saliguori uint32_t len; 67c4217f64SJeff Cody } QEMU_PACKED QCowExtension; 6821d82ac9SJeff Cody 697c80ab3fSJes Sorensen #define QCOW2_EXT_MAGIC_END 0 708098969cSAndrey Shinkevich #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca 71cfcc4c62SKevin Wolf #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 724652b8f3SDaniel P. Berrange #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 7388ddffaeSVladimir Sementsov-Ogievskiy #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 7493c24936SKevin Wolf #define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441 759b80ddf3Saliguori 76c3c10f72SVladimir Sementsov-Ogievskiy static int coroutine_fn 77c3c10f72SVladimir Sementsov-Ogievskiy qcow2_co_preadv_compressed(BlockDriverState *bs, 789a3978a4SVladimir Sementsov-Ogievskiy uint64_t l2_entry, 79c3c10f72SVladimir Sementsov-Ogievskiy uint64_t offset, 80c3c10f72SVladimir Sementsov-Ogievskiy uint64_t bytes, 81df893d25SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 82df893d25SVladimir Sementsov-Ogievskiy size_t qiov_offset); 83c3c10f72SVladimir Sementsov-Ogievskiy 847c80ab3fSJes Sorensen static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 85585f8587Sbellard { 86585f8587Sbellard const QCowHeader *cow_header = (const void *)buf; 87585f8587Sbellard 88585f8587Sbellard if (buf_size >= sizeof(QCowHeader) && 89585f8587Sbellard be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 906744cbabSKevin Wolf be32_to_cpu(cow_header->version) >= 2) 91585f8587Sbellard return 100; 92585f8587Sbellard else 93585f8587Sbellard return 0; 94585f8587Sbellard } 95585f8587Sbellard 969b80ddf3Saliguori 97*757dda54SAlberto Faria static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 984652b8f3SDaniel P. Berrange uint8_t *buf, size_t buflen, 994652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1004652b8f3SDaniel P. Berrange { 1014652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1024652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1034652b8f3SDaniel P. Berrange ssize_t ret; 1044652b8f3SDaniel P. Berrange 1054652b8f3SDaniel P. Berrange if ((offset + buflen) > s->crypto_header.length) { 1064652b8f3SDaniel P. Berrange error_setg(errp, "Request for data outside of extension header"); 1074652b8f3SDaniel P. Berrange return -1; 1084652b8f3SDaniel P. Berrange } 1094652b8f3SDaniel P. Berrange 11032cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->crypto_header.offset + offset, buflen, buf, 11153fb7844SAlberto Faria 0); 1124652b8f3SDaniel P. Berrange if (ret < 0) { 1134652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not read encryption header"); 1144652b8f3SDaniel P. Berrange return -1; 1154652b8f3SDaniel P. Berrange } 116*757dda54SAlberto Faria return 0; 1174652b8f3SDaniel P. Berrange } 1184652b8f3SDaniel P. Berrange 1194652b8f3SDaniel P. Berrange 120*757dda54SAlberto Faria static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 1214652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1224652b8f3SDaniel P. Berrange { 1234652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1244652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1254652b8f3SDaniel P. Berrange int64_t ret; 1264652b8f3SDaniel P. Berrange int64_t clusterlen; 1274652b8f3SDaniel P. Berrange 1284652b8f3SDaniel P. Berrange ret = qcow2_alloc_clusters(bs, headerlen); 1294652b8f3SDaniel P. Berrange if (ret < 0) { 1304652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, 1314652b8f3SDaniel P. Berrange "Cannot allocate cluster for LUKS header size %zu", 1324652b8f3SDaniel P. Berrange headerlen); 1334652b8f3SDaniel P. Berrange return -1; 1344652b8f3SDaniel P. Berrange } 1354652b8f3SDaniel P. Berrange 1364652b8f3SDaniel P. Berrange s->crypto_header.length = headerlen; 1374652b8f3SDaniel P. Berrange s->crypto_header.offset = ret; 1384652b8f3SDaniel P. Berrange 139087ab8e7SDaniel P. Berrangé /* 140087ab8e7SDaniel P. Berrangé * Zero fill all space in cluster so it has predictable 141087ab8e7SDaniel P. Berrangé * content, as we may not initialize some regions of the 142087ab8e7SDaniel P. Berrangé * header (eg only 1 out of 8 key slots will be initialized) 143087ab8e7SDaniel P. Berrangé */ 1444652b8f3SDaniel P. Berrange clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 145966b000fSKevin Wolf assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); 1464652b8f3SDaniel P. Berrange ret = bdrv_pwrite_zeroes(bs->file, 147087ab8e7SDaniel P. Berrangé ret, 148087ab8e7SDaniel P. Berrangé clusterlen, 0); 1494652b8f3SDaniel P. Berrange if (ret < 0) { 1504652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 1514652b8f3SDaniel P. Berrange return -1; 1524652b8f3SDaniel P. Berrange } 1534652b8f3SDaniel P. Berrange 154*757dda54SAlberto Faria return 0; 1554652b8f3SDaniel P. Berrange } 1564652b8f3SDaniel P. Berrange 1574652b8f3SDaniel P. Berrange 158*757dda54SAlberto Faria static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 1594652b8f3SDaniel P. Berrange const uint8_t *buf, size_t buflen, 1604652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1614652b8f3SDaniel P. Berrange { 1624652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1634652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1644652b8f3SDaniel P. Berrange ssize_t ret; 1654652b8f3SDaniel P. Berrange 1664652b8f3SDaniel P. Berrange if ((offset + buflen) > s->crypto_header.length) { 1674652b8f3SDaniel P. Berrange error_setg(errp, "Request for data outside of extension header"); 1684652b8f3SDaniel P. Berrange return -1; 1694652b8f3SDaniel P. Berrange } 1704652b8f3SDaniel P. Berrange 17132cc71deSAlberto Faria ret = bdrv_pwrite(bs->file, s->crypto_header.offset + offset, buflen, buf, 17253fb7844SAlberto Faria 0); 1734652b8f3SDaniel P. Berrange if (ret < 0) { 1744652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not read encryption header"); 1754652b8f3SDaniel P. Berrange return -1; 1764652b8f3SDaniel P. Berrange } 177*757dda54SAlberto Faria return 0; 1784652b8f3SDaniel P. Berrange } 1794652b8f3SDaniel P. Berrange 18090766d9dSMaxim Levitsky static QDict* 18190766d9dSMaxim Levitsky qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp) 18290766d9dSMaxim Levitsky { 18390766d9dSMaxim Levitsky QDict *cryptoopts_qdict; 18490766d9dSMaxim Levitsky QDict *opts_qdict; 18590766d9dSMaxim Levitsky 18690766d9dSMaxim Levitsky /* Extract "encrypt." options into a qdict */ 18790766d9dSMaxim Levitsky opts_qdict = qemu_opts_to_qdict(opts, NULL); 18890766d9dSMaxim Levitsky qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); 18990766d9dSMaxim Levitsky qobject_unref(opts_qdict); 19090766d9dSMaxim Levitsky qdict_put_str(cryptoopts_qdict, "format", fmt); 19190766d9dSMaxim Levitsky return cryptoopts_qdict; 19290766d9dSMaxim Levitsky } 1934652b8f3SDaniel P. Berrange 1949b80ddf3Saliguori /* 1959b80ddf3Saliguori * read qcow2 extension and fill bs 1969b80ddf3Saliguori * start reading from start_offset 1979b80ddf3Saliguori * finish reading upon magic of value 0 or when end_offset reached 1989b80ddf3Saliguori * unknown magic is skipped (future extension this version knows nothing about) 1999b80ddf3Saliguori * return 0 upon success, non-0 otherwise 2009b80ddf3Saliguori */ 2017c80ab3fSJes Sorensen static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 2023ef6c40aSMax Reitz uint64_t end_offset, void **p_feature_table, 20388ddffaeSVladimir Sementsov-Ogievskiy int flags, bool *need_update_header, 20488ddffaeSVladimir Sementsov-Ogievskiy Error **errp) 2059b80ddf3Saliguori { 206ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2079b80ddf3Saliguori QCowExtension ext; 2089b80ddf3Saliguori uint64_t offset; 20975bab85cSKevin Wolf int ret; 21088ddffaeSVladimir Sementsov-Ogievskiy Qcow2BitmapHeaderExt bitmaps_ext; 21188ddffaeSVladimir Sementsov-Ogievskiy 21288ddffaeSVladimir Sementsov-Ogievskiy if (need_update_header != NULL) { 21388ddffaeSVladimir Sementsov-Ogievskiy *need_update_header = false; 21488ddffaeSVladimir Sementsov-Ogievskiy } 2159b80ddf3Saliguori 2169b80ddf3Saliguori #ifdef DEBUG_EXT 2177c80ab3fSJes Sorensen printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 2189b80ddf3Saliguori #endif 2199b80ddf3Saliguori offset = start_offset; 2209b80ddf3Saliguori while (offset < end_offset) { 2219b80ddf3Saliguori 2229b80ddf3Saliguori #ifdef DEBUG_EXT 2239b80ddf3Saliguori /* Sanity check */ 2249b80ddf3Saliguori if (offset > s->cluster_size) 2257c80ab3fSJes Sorensen printf("qcow2_read_extension: suspicious offset %lu\n", offset); 2269b80ddf3Saliguori 2279b2260cbSDong Xu Wang printf("attempting to read extended header in offset %lu\n", offset); 2289b80ddf3Saliguori #endif 2299b80ddf3Saliguori 23032cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, sizeof(ext), &ext, 0); 2313ef6c40aSMax Reitz if (ret < 0) { 2323ef6c40aSMax Reitz error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 2333ef6c40aSMax Reitz "pread fail from offset %" PRIu64, offset); 2349b80ddf3Saliguori return 1; 2359b80ddf3Saliguori } 2363b698f52SPeter Maydell ext.magic = be32_to_cpu(ext.magic); 2373b698f52SPeter Maydell ext.len = be32_to_cpu(ext.len); 2389b80ddf3Saliguori offset += sizeof(ext); 2399b80ddf3Saliguori #ifdef DEBUG_EXT 2409b80ddf3Saliguori printf("ext.magic = 0x%x\n", ext.magic); 2419b80ddf3Saliguori #endif 2422ebafc85SKevin Wolf if (offset > end_offset || ext.len > end_offset - offset) { 2433ef6c40aSMax Reitz error_setg(errp, "Header extension too large"); 24464ca6aeeSKevin Wolf return -EINVAL; 24564ca6aeeSKevin Wolf } 24664ca6aeeSKevin Wolf 2479b80ddf3Saliguori switch (ext.magic) { 2487c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_END: 2499b80ddf3Saliguori return 0; 250f965509cSaliguori 2517c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_BACKING_FORMAT: 252f965509cSaliguori if (ext.len >= sizeof(bs->backing_format)) { 253521b2b5dSMax Reitz error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 254521b2b5dSMax Reitz " too large (>=%zu)", ext.len, 255521b2b5dSMax Reitz sizeof(bs->backing_format)); 256f965509cSaliguori return 2; 257f965509cSaliguori } 25832cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, ext.len, bs->backing_format, 0); 2593ef6c40aSMax Reitz if (ret < 0) { 2603ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 2613ef6c40aSMax Reitz "Could not read format name"); 262f965509cSaliguori return 3; 2633ef6c40aSMax Reitz } 264f965509cSaliguori bs->backing_format[ext.len] = '\0'; 265e4603fe1SKevin Wolf s->image_backing_format = g_strdup(bs->backing_format); 266f965509cSaliguori #ifdef DEBUG_EXT 267f965509cSaliguori printf("Qcow2: Got format extension %s\n", bs->backing_format); 268f965509cSaliguori #endif 269f965509cSaliguori break; 270f965509cSaliguori 271cfcc4c62SKevin Wolf case QCOW2_EXT_MAGIC_FEATURE_TABLE: 272cfcc4c62SKevin Wolf if (p_feature_table != NULL) { 273cfcc4c62SKevin Wolf void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 27432cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, ext.len, feature_table, 0); 275cfcc4c62SKevin Wolf if (ret < 0) { 2763ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 2773ef6c40aSMax Reitz "Could not read table"); 278cfcc4c62SKevin Wolf return ret; 279cfcc4c62SKevin Wolf } 280cfcc4c62SKevin Wolf 281cfcc4c62SKevin Wolf *p_feature_table = feature_table; 282cfcc4c62SKevin Wolf } 283cfcc4c62SKevin Wolf break; 284cfcc4c62SKevin Wolf 2854652b8f3SDaniel P. Berrange case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 2864652b8f3SDaniel P. Berrange unsigned int cflags = 0; 2874652b8f3SDaniel P. Berrange if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 2884652b8f3SDaniel P. Berrange error_setg(errp, "CRYPTO header extension only " 2894652b8f3SDaniel P. Berrange "expected with LUKS encryption method"); 2904652b8f3SDaniel P. Berrange return -EINVAL; 2914652b8f3SDaniel P. Berrange } 2924652b8f3SDaniel P. Berrange if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 2934652b8f3SDaniel P. Berrange error_setg(errp, "CRYPTO header extension size %u, " 2944652b8f3SDaniel P. Berrange "but expected size %zu", ext.len, 2954652b8f3SDaniel P. Berrange sizeof(Qcow2CryptoHeaderExtension)); 2964652b8f3SDaniel P. Berrange return -EINVAL; 2974652b8f3SDaniel P. Berrange } 2984652b8f3SDaniel P. Berrange 29932cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, ext.len, &s->crypto_header, 0); 3004652b8f3SDaniel P. Berrange if (ret < 0) { 3014652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, 3024652b8f3SDaniel P. Berrange "Unable to read CRYPTO header extension"); 3034652b8f3SDaniel P. Berrange return ret; 3044652b8f3SDaniel P. Berrange } 3053b698f52SPeter Maydell s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 3063b698f52SPeter Maydell s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 3074652b8f3SDaniel P. Berrange 3084652b8f3SDaniel P. Berrange if ((s->crypto_header.offset % s->cluster_size) != 0) { 3094652b8f3SDaniel P. Berrange error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 3104652b8f3SDaniel P. Berrange "not a multiple of cluster size '%u'", 3114652b8f3SDaniel P. Berrange s->crypto_header.offset, s->cluster_size); 3124652b8f3SDaniel P. Berrange return -EINVAL; 3134652b8f3SDaniel P. Berrange } 3144652b8f3SDaniel P. Berrange 3154652b8f3SDaniel P. Berrange if (flags & BDRV_O_NO_IO) { 3164652b8f3SDaniel P. Berrange cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 3174652b8f3SDaniel P. Berrange } 3181cd9a787SDaniel P. Berrange s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 3194652b8f3SDaniel P. Berrange qcow2_crypto_hdr_read_func, 3208ac0f15fSVladimir Sementsov-Ogievskiy bs, cflags, QCOW2_MAX_THREADS, errp); 3214652b8f3SDaniel P. Berrange if (!s->crypto) { 3224652b8f3SDaniel P. Berrange return -EINVAL; 3234652b8f3SDaniel P. Berrange } 3244652b8f3SDaniel P. Berrange } break; 3254652b8f3SDaniel P. Berrange 32688ddffaeSVladimir Sementsov-Ogievskiy case QCOW2_EXT_MAGIC_BITMAPS: 32788ddffaeSVladimir Sementsov-Ogievskiy if (ext.len != sizeof(bitmaps_ext)) { 32888ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 32988ddffaeSVladimir Sementsov-Ogievskiy "Invalid extension length"); 33088ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 33188ddffaeSVladimir Sementsov-Ogievskiy } 33288ddffaeSVladimir Sementsov-Ogievskiy 33388ddffaeSVladimir Sementsov-Ogievskiy if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 334c9ceb3ecSMax Reitz if (s->qcow_version < 3) { 335c9ceb3ecSMax Reitz /* Let's be a bit more specific */ 336c9ceb3ecSMax Reitz warn_report("This qcow2 v2 image contains bitmaps, but " 337c9ceb3ecSMax Reitz "they may have been modified by a program " 338c9ceb3ecSMax Reitz "without persistent bitmap support; so now " 339c9ceb3ecSMax Reitz "they must all be considered inconsistent"); 340c9ceb3ecSMax Reitz } else { 34155d527a9SAlistair Francis warn_report("a program lacking bitmap support " 34288ddffaeSVladimir Sementsov-Ogievskiy "modified this file, so all bitmaps are now " 34355d527a9SAlistair Francis "considered inconsistent"); 344c9ceb3ecSMax Reitz } 34555d527a9SAlistair Francis error_printf("Some clusters may be leaked, " 34655d527a9SAlistair Francis "run 'qemu-img check -r' on the image " 34788ddffaeSVladimir Sementsov-Ogievskiy "file to fix."); 34888ddffaeSVladimir Sementsov-Ogievskiy if (need_update_header != NULL) { 34988ddffaeSVladimir Sementsov-Ogievskiy /* Updating is needed to drop invalid bitmap extension. */ 35088ddffaeSVladimir Sementsov-Ogievskiy *need_update_header = true; 35188ddffaeSVladimir Sementsov-Ogievskiy } 35288ddffaeSVladimir Sementsov-Ogievskiy break; 35388ddffaeSVladimir Sementsov-Ogievskiy } 35488ddffaeSVladimir Sementsov-Ogievskiy 35532cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, ext.len, &bitmaps_ext, 0); 35688ddffaeSVladimir Sementsov-Ogievskiy if (ret < 0) { 35788ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 35888ddffaeSVladimir Sementsov-Ogievskiy "Could not read ext header"); 35988ddffaeSVladimir Sementsov-Ogievskiy return ret; 36088ddffaeSVladimir Sementsov-Ogievskiy } 36188ddffaeSVladimir Sementsov-Ogievskiy 36288ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.reserved32 != 0) { 36388ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 36488ddffaeSVladimir Sementsov-Ogievskiy "Reserved field is not zero"); 36588ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 36688ddffaeSVladimir Sementsov-Ogievskiy } 36788ddffaeSVladimir Sementsov-Ogievskiy 3683b698f52SPeter Maydell bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps); 3693b698f52SPeter Maydell bitmaps_ext.bitmap_directory_size = 3703b698f52SPeter Maydell be64_to_cpu(bitmaps_ext.bitmap_directory_size); 3713b698f52SPeter Maydell bitmaps_ext.bitmap_directory_offset = 3723b698f52SPeter Maydell be64_to_cpu(bitmaps_ext.bitmap_directory_offset); 37388ddffaeSVladimir Sementsov-Ogievskiy 37488ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 37588ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, 37688ddffaeSVladimir Sementsov-Ogievskiy "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 37788ddffaeSVladimir Sementsov-Ogievskiy "exceeding the QEMU supported maximum of %d", 37888ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 37988ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 38088ddffaeSVladimir Sementsov-Ogievskiy } 38188ddffaeSVladimir Sementsov-Ogievskiy 38288ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.nb_bitmaps == 0) { 38388ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "found bitmaps extension with zero bitmaps"); 38488ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 38588ddffaeSVladimir Sementsov-Ogievskiy } 38688ddffaeSVladimir Sementsov-Ogievskiy 38774e60fb5SAlberto Garcia if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) { 38888ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "bitmaps_ext: " 38988ddffaeSVladimir Sementsov-Ogievskiy "invalid bitmap directory offset"); 39088ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 39188ddffaeSVladimir Sementsov-Ogievskiy } 39288ddffaeSVladimir Sementsov-Ogievskiy 39388ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.bitmap_directory_size > 39488ddffaeSVladimir Sementsov-Ogievskiy QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 39588ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "bitmaps_ext: " 39688ddffaeSVladimir Sementsov-Ogievskiy "bitmap directory size (%" PRIu64 ") exceeds " 39788ddffaeSVladimir Sementsov-Ogievskiy "the maximum supported size (%d)", 39888ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_size, 39988ddffaeSVladimir Sementsov-Ogievskiy QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 40088ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 40188ddffaeSVladimir Sementsov-Ogievskiy } 40288ddffaeSVladimir Sementsov-Ogievskiy 40388ddffaeSVladimir Sementsov-Ogievskiy s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 40488ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_offset = 40588ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_offset; 40688ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_size = 40788ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_size; 40888ddffaeSVladimir Sementsov-Ogievskiy 40988ddffaeSVladimir Sementsov-Ogievskiy #ifdef DEBUG_EXT 41088ddffaeSVladimir Sementsov-Ogievskiy printf("Qcow2: Got bitmaps extension: " 41188ddffaeSVladimir Sementsov-Ogievskiy "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 41288ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_offset, s->nb_bitmaps); 41388ddffaeSVladimir Sementsov-Ogievskiy #endif 41488ddffaeSVladimir Sementsov-Ogievskiy break; 41588ddffaeSVladimir Sementsov-Ogievskiy 4169b890bdcSKevin Wolf case QCOW2_EXT_MAGIC_DATA_FILE: 4179b890bdcSKevin Wolf { 4189b890bdcSKevin Wolf s->image_data_file = g_malloc0(ext.len + 1); 41932cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, ext.len, s->image_data_file, 0); 4209b890bdcSKevin Wolf if (ret < 0) { 4219b890bdcSKevin Wolf error_setg_errno(errp, -ret, 4229b890bdcSKevin Wolf "ERROR: Could not read data file name"); 4239b890bdcSKevin Wolf return ret; 4249b890bdcSKevin Wolf } 4259b890bdcSKevin Wolf #ifdef DEBUG_EXT 4269b890bdcSKevin Wolf printf("Qcow2: Got external data file %s\n", s->image_data_file); 4279b890bdcSKevin Wolf #endif 4289b890bdcSKevin Wolf break; 4299b890bdcSKevin Wolf } 4309b890bdcSKevin Wolf 4319b80ddf3Saliguori default: 43275bab85cSKevin Wolf /* unknown magic - save it in case we need to rewrite the header */ 4334096974eSEric Blake /* If you add a new feature, make sure to also update the fast 4344096974eSEric Blake * path of qcow2_make_empty() to deal with it. */ 43575bab85cSKevin Wolf { 43675bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext; 43775bab85cSKevin Wolf 43875bab85cSKevin Wolf uext = g_malloc0(sizeof(*uext) + ext.len); 43975bab85cSKevin Wolf uext->magic = ext.magic; 44075bab85cSKevin Wolf uext->len = ext.len; 44175bab85cSKevin Wolf QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 44275bab85cSKevin Wolf 44332cc71deSAlberto Faria ret = bdrv_pread(bs->file, offset, uext->len, uext->data, 0); 44475bab85cSKevin Wolf if (ret < 0) { 4453ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: unknown extension: " 4463ef6c40aSMax Reitz "Could not read data"); 44775bab85cSKevin Wolf return ret; 44875bab85cSKevin Wolf } 44975bab85cSKevin Wolf } 4509b80ddf3Saliguori break; 4519b80ddf3Saliguori } 452fd29b4bbSKevin Wolf 453fd29b4bbSKevin Wolf offset += ((ext.len + 7) & ~7); 4549b80ddf3Saliguori } 4559b80ddf3Saliguori 4569b80ddf3Saliguori return 0; 4579b80ddf3Saliguori } 4589b80ddf3Saliguori 45975bab85cSKevin Wolf static void cleanup_unknown_header_ext(BlockDriverState *bs) 46075bab85cSKevin Wolf { 461ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 46275bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext, *next; 46375bab85cSKevin Wolf 46475bab85cSKevin Wolf QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 46575bab85cSKevin Wolf QLIST_REMOVE(uext, next); 46675bab85cSKevin Wolf g_free(uext); 46775bab85cSKevin Wolf } 46875bab85cSKevin Wolf } 4699b80ddf3Saliguori 470a55448b3SMax Reitz static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 471a55448b3SMax Reitz uint64_t mask) 472cfcc4c62SKevin Wolf { 4737cdca2e2SAlberto Garcia g_autoptr(GString) features = g_string_sized_new(60); 47412ac6d3dSKevin Wolf 475cfcc4c62SKevin Wolf while (table && table->name[0] != '\0') { 476cfcc4c62SKevin Wolf if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 47712ac6d3dSKevin Wolf if (mask & (1ULL << table->bit)) { 4787cdca2e2SAlberto Garcia if (features->len > 0) { 4797cdca2e2SAlberto Garcia g_string_append(features, ", "); 4807cdca2e2SAlberto Garcia } 4817cdca2e2SAlberto Garcia g_string_append_printf(features, "%.46s", table->name); 48212ac6d3dSKevin Wolf mask &= ~(1ULL << table->bit); 483cfcc4c62SKevin Wolf } 484cfcc4c62SKevin Wolf } 485cfcc4c62SKevin Wolf table++; 486cfcc4c62SKevin Wolf } 487cfcc4c62SKevin Wolf 488cfcc4c62SKevin Wolf if (mask) { 4897cdca2e2SAlberto Garcia if (features->len > 0) { 4907cdca2e2SAlberto Garcia g_string_append(features, ", "); 4917cdca2e2SAlberto Garcia } 4927cdca2e2SAlberto Garcia g_string_append_printf(features, 4937cdca2e2SAlberto Garcia "Unknown incompatible feature: %" PRIx64, mask); 494cfcc4c62SKevin Wolf } 49512ac6d3dSKevin Wolf 4967cdca2e2SAlberto Garcia error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str); 497cfcc4c62SKevin Wolf } 498cfcc4c62SKevin Wolf 499c61d0004SStefan Hajnoczi /* 500bfe8043eSStefan Hajnoczi * Sets the dirty bit and flushes afterwards if necessary. 501bfe8043eSStefan Hajnoczi * 502bfe8043eSStefan Hajnoczi * The incompatible_features bit is only set if the image file header was 503bfe8043eSStefan Hajnoczi * updated successfully. Therefore it is not required to check the return 504bfe8043eSStefan Hajnoczi * value of this function. 505bfe8043eSStefan Hajnoczi */ 506280d3735SKevin Wolf int qcow2_mark_dirty(BlockDriverState *bs) 507bfe8043eSStefan Hajnoczi { 508ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 509bfe8043eSStefan Hajnoczi uint64_t val; 510bfe8043eSStefan Hajnoczi int ret; 511bfe8043eSStefan Hajnoczi 512bfe8043eSStefan Hajnoczi assert(s->qcow_version >= 3); 513bfe8043eSStefan Hajnoczi 514bfe8043eSStefan Hajnoczi if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 515bfe8043eSStefan Hajnoczi return 0; /* already dirty */ 516bfe8043eSStefan Hajnoczi } 517bfe8043eSStefan Hajnoczi 518bfe8043eSStefan Hajnoczi val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 519d9ca2ea2SKevin Wolf ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 52032cc71deSAlberto Faria sizeof(val), &val, 0); 521bfe8043eSStefan Hajnoczi if (ret < 0) { 522bfe8043eSStefan Hajnoczi return ret; 523bfe8043eSStefan Hajnoczi } 5249a4f4c31SKevin Wolf ret = bdrv_flush(bs->file->bs); 525bfe8043eSStefan Hajnoczi if (ret < 0) { 526bfe8043eSStefan Hajnoczi return ret; 527bfe8043eSStefan Hajnoczi } 528bfe8043eSStefan Hajnoczi 529bfe8043eSStefan Hajnoczi /* Only treat image as dirty if the header was updated successfully */ 530bfe8043eSStefan Hajnoczi s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 531bfe8043eSStefan Hajnoczi return 0; 532bfe8043eSStefan Hajnoczi } 533bfe8043eSStefan Hajnoczi 534bfe8043eSStefan Hajnoczi /* 535c61d0004SStefan Hajnoczi * Clears the dirty bit and flushes before if necessary. Only call this 536c61d0004SStefan Hajnoczi * function when there are no pending requests, it does not guard against 537c61d0004SStefan Hajnoczi * concurrent requests dirtying the image. 538c61d0004SStefan Hajnoczi */ 539c61d0004SStefan Hajnoczi static int qcow2_mark_clean(BlockDriverState *bs) 540c61d0004SStefan Hajnoczi { 541ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 542c61d0004SStefan Hajnoczi 543c61d0004SStefan Hajnoczi if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 5444c2e5f8fSKevin Wolf int ret; 5454c2e5f8fSKevin Wolf 5464c2e5f8fSKevin Wolf s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 5474c2e5f8fSKevin Wolf 5488b220eb7SPaolo Bonzini ret = qcow2_flush_caches(bs); 549c61d0004SStefan Hajnoczi if (ret < 0) { 550c61d0004SStefan Hajnoczi return ret; 551c61d0004SStefan Hajnoczi } 552c61d0004SStefan Hajnoczi 553c61d0004SStefan Hajnoczi return qcow2_update_header(bs); 554c61d0004SStefan Hajnoczi } 555c61d0004SStefan Hajnoczi return 0; 556c61d0004SStefan Hajnoczi } 557c61d0004SStefan Hajnoczi 55869c98726SMax Reitz /* 55969c98726SMax Reitz * Marks the image as corrupt. 56069c98726SMax Reitz */ 56169c98726SMax Reitz int qcow2_mark_corrupt(BlockDriverState *bs) 56269c98726SMax Reitz { 563ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 56469c98726SMax Reitz 56569c98726SMax Reitz s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 56669c98726SMax Reitz return qcow2_update_header(bs); 56769c98726SMax Reitz } 56869c98726SMax Reitz 56969c98726SMax Reitz /* 57069c98726SMax Reitz * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 57169c98726SMax Reitz * before if necessary. 57269c98726SMax Reitz */ 57369c98726SMax Reitz int qcow2_mark_consistent(BlockDriverState *bs) 57469c98726SMax Reitz { 575ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 57669c98726SMax Reitz 57769c98726SMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 5788b220eb7SPaolo Bonzini int ret = qcow2_flush_caches(bs); 57969c98726SMax Reitz if (ret < 0) { 58069c98726SMax Reitz return ret; 58169c98726SMax Reitz } 58269c98726SMax Reitz 58369c98726SMax Reitz s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 58469c98726SMax Reitz return qcow2_update_header(bs); 58569c98726SMax Reitz } 58669c98726SMax Reitz return 0; 58769c98726SMax Reitz } 58869c98726SMax Reitz 5898bc584feSMax Reitz static void qcow2_add_check_result(BdrvCheckResult *out, 5908bc584feSMax Reitz const BdrvCheckResult *src, 5918bc584feSMax Reitz bool set_allocation_info) 5928bc584feSMax Reitz { 5938bc584feSMax Reitz out->corruptions += src->corruptions; 5948bc584feSMax Reitz out->leaks += src->leaks; 5958bc584feSMax Reitz out->check_errors += src->check_errors; 5968bc584feSMax Reitz out->corruptions_fixed += src->corruptions_fixed; 5978bc584feSMax Reitz out->leaks_fixed += src->leaks_fixed; 5988bc584feSMax Reitz 5998bc584feSMax Reitz if (set_allocation_info) { 6008bc584feSMax Reitz out->image_end_offset = src->image_end_offset; 6018bc584feSMax Reitz out->bfi = src->bfi; 6028bc584feSMax Reitz } 6038bc584feSMax Reitz } 6048bc584feSMax Reitz 6052fd61638SPaolo Bonzini static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 6062fd61638SPaolo Bonzini BdrvCheckResult *result, 607acbe5982SStefan Hajnoczi BdrvCheckMode fix) 608acbe5982SStefan Hajnoczi { 6098bc584feSMax Reitz BdrvCheckResult snapshot_res = {}; 6108bc584feSMax Reitz BdrvCheckResult refcount_res = {}; 6118bc584feSMax Reitz int ret; 6128bc584feSMax Reitz 6138bc584feSMax Reitz memset(result, 0, sizeof(*result)); 6148bc584feSMax Reitz 6158bc584feSMax Reitz ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix); 6168bc584feSMax Reitz if (ret < 0) { 617fe446b5dSMax Reitz qcow2_add_check_result(result, &snapshot_res, false); 6188bc584feSMax Reitz return ret; 6198bc584feSMax Reitz } 6208bc584feSMax Reitz 6218bc584feSMax Reitz ret = qcow2_check_refcounts(bs, &refcount_res, fix); 6228bc584feSMax Reitz qcow2_add_check_result(result, &refcount_res, true); 623acbe5982SStefan Hajnoczi if (ret < 0) { 624fe446b5dSMax Reitz qcow2_add_check_result(result, &snapshot_res, false); 625fe446b5dSMax Reitz return ret; 626fe446b5dSMax Reitz } 627fe446b5dSMax Reitz 628fe446b5dSMax Reitz ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix); 629fe446b5dSMax Reitz qcow2_add_check_result(result, &snapshot_res, false); 630fe446b5dSMax Reitz if (ret < 0) { 631acbe5982SStefan Hajnoczi return ret; 632acbe5982SStefan Hajnoczi } 633acbe5982SStefan Hajnoczi 634acbe5982SStefan Hajnoczi if (fix && result->check_errors == 0 && result->corruptions == 0) { 63524530f3eSMax Reitz ret = qcow2_mark_clean(bs); 63624530f3eSMax Reitz if (ret < 0) { 63724530f3eSMax Reitz return ret; 63824530f3eSMax Reitz } 63924530f3eSMax Reitz return qcow2_mark_consistent(bs); 640acbe5982SStefan Hajnoczi } 641acbe5982SStefan Hajnoczi return ret; 642acbe5982SStefan Hajnoczi } 643acbe5982SStefan Hajnoczi 6442fd61638SPaolo Bonzini static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 6452fd61638SPaolo Bonzini BdrvCheckResult *result, 6462fd61638SPaolo Bonzini BdrvCheckMode fix) 6472fd61638SPaolo Bonzini { 6482fd61638SPaolo Bonzini BDRVQcow2State *s = bs->opaque; 6492fd61638SPaolo Bonzini int ret; 6502fd61638SPaolo Bonzini 6512fd61638SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 6522fd61638SPaolo Bonzini ret = qcow2_co_check_locked(bs, result, fix); 6532fd61638SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 6542fd61638SPaolo Bonzini return ret; 6552fd61638SPaolo Bonzini } 6562fd61638SPaolo Bonzini 6570cf0e598SAlberto Garcia int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 6580cf0e598SAlberto Garcia uint64_t entries, size_t entry_len, 6590cf0e598SAlberto Garcia int64_t max_size_bytes, const char *table_name, 6600cf0e598SAlberto Garcia Error **errp) 6618c7de283SKevin Wolf { 662ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 6630cf0e598SAlberto Garcia 6640cf0e598SAlberto Garcia if (entries > max_size_bytes / entry_len) { 6650cf0e598SAlberto Garcia error_setg(errp, "%s too large", table_name); 6660cf0e598SAlberto Garcia return -EFBIG; 6670cf0e598SAlberto Garcia } 6688c7de283SKevin Wolf 6698c7de283SKevin Wolf /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 6708c7de283SKevin Wolf * because values will be passed to qemu functions taking int64_t. */ 6710cf0e598SAlberto Garcia if ((INT64_MAX - entries * entry_len < offset) || 6720cf0e598SAlberto Garcia (offset_into_cluster(s, offset) != 0)) { 6730cf0e598SAlberto Garcia error_setg(errp, "%s offset invalid", table_name); 6748c7de283SKevin Wolf return -EINVAL; 6758c7de283SKevin Wolf } 6768c7de283SKevin Wolf 6778c7de283SKevin Wolf return 0; 6788c7de283SKevin Wolf } 6798c7de283SKevin Wolf 6808a2ce0bcSAlberto Garcia static const char *const mutable_opts[] = { 6818a2ce0bcSAlberto Garcia QCOW2_OPT_LAZY_REFCOUNTS, 6828a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_REQUEST, 6838a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_SNAPSHOT, 6848a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_OTHER, 6858a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP, 6868a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_TEMPLATE, 6878a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_MAIN_HEADER, 6888a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_ACTIVE_L1, 6898a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_ACTIVE_L2, 6908a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 6918a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 6928a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 6938a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_INACTIVE_L1, 6948a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_INACTIVE_L2, 6958a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 6968a2ce0bcSAlberto Garcia QCOW2_OPT_CACHE_SIZE, 6978a2ce0bcSAlberto Garcia QCOW2_OPT_L2_CACHE_SIZE, 6988a2ce0bcSAlberto Garcia QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 6998a2ce0bcSAlberto Garcia QCOW2_OPT_REFCOUNT_CACHE_SIZE, 7008a2ce0bcSAlberto Garcia QCOW2_OPT_CACHE_CLEAN_INTERVAL, 7018a2ce0bcSAlberto Garcia NULL 7028a2ce0bcSAlberto Garcia }; 7038a2ce0bcSAlberto Garcia 70474c4510aSKevin Wolf static QemuOptsList qcow2_runtime_opts = { 70574c4510aSKevin Wolf .name = "qcow2", 70674c4510aSKevin Wolf .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 70774c4510aSKevin Wolf .desc = { 70874c4510aSKevin Wolf { 70964aa99d3SKevin Wolf .name = QCOW2_OPT_LAZY_REFCOUNTS, 71074c4510aSKevin Wolf .type = QEMU_OPT_BOOL, 71174c4510aSKevin Wolf .help = "Postpone refcount updates", 71274c4510aSKevin Wolf }, 71367af674eSKevin Wolf { 71467af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_REQUEST, 71567af674eSKevin Wolf .type = QEMU_OPT_BOOL, 71667af674eSKevin Wolf .help = "Pass guest discard requests to the layer below", 71767af674eSKevin Wolf }, 71867af674eSKevin Wolf { 71967af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_SNAPSHOT, 72067af674eSKevin Wolf .type = QEMU_OPT_BOOL, 72167af674eSKevin Wolf .help = "Generate discard requests when snapshot related space " 72267af674eSKevin Wolf "is freed", 72367af674eSKevin Wolf }, 72467af674eSKevin Wolf { 72567af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_OTHER, 72667af674eSKevin Wolf .type = QEMU_OPT_BOOL, 72767af674eSKevin Wolf .help = "Generate discard requests when other clusters are freed", 72867af674eSKevin Wolf }, 72905de7e86SMax Reitz { 73005de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP, 73105de7e86SMax Reitz .type = QEMU_OPT_STRING, 73205de7e86SMax Reitz .help = "Selects which overlap checks to perform from a range of " 73305de7e86SMax Reitz "templates (none, constant, cached, all)", 73405de7e86SMax Reitz }, 73505de7e86SMax Reitz { 736ee42b5ceSMax Reitz .name = QCOW2_OPT_OVERLAP_TEMPLATE, 737ee42b5ceSMax Reitz .type = QEMU_OPT_STRING, 738ee42b5ceSMax Reitz .help = "Selects which overlap checks to perform from a range of " 739ee42b5ceSMax Reitz "templates (none, constant, cached, all)", 740ee42b5ceSMax Reitz }, 741ee42b5ceSMax Reitz { 74205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 74305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 74405de7e86SMax Reitz .help = "Check for unintended writes into the main qcow2 header", 74505de7e86SMax Reitz }, 74605de7e86SMax Reitz { 74705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 74805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 74905de7e86SMax Reitz .help = "Check for unintended writes into the active L1 table", 75005de7e86SMax Reitz }, 75105de7e86SMax Reitz { 75205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 75305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 75405de7e86SMax Reitz .help = "Check for unintended writes into an active L2 table", 75505de7e86SMax Reitz }, 75605de7e86SMax Reitz { 75705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 75805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 75905de7e86SMax Reitz .help = "Check for unintended writes into the refcount table", 76005de7e86SMax Reitz }, 76105de7e86SMax Reitz { 76205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 76305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 76405de7e86SMax Reitz .help = "Check for unintended writes into a refcount block", 76505de7e86SMax Reitz }, 76605de7e86SMax Reitz { 76705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 76805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 76905de7e86SMax Reitz .help = "Check for unintended writes into the snapshot table", 77005de7e86SMax Reitz }, 77105de7e86SMax Reitz { 77205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 77305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 77405de7e86SMax Reitz .help = "Check for unintended writes into an inactive L1 table", 77505de7e86SMax Reitz }, 77605de7e86SMax Reitz { 77705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 77805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 77905de7e86SMax Reitz .help = "Check for unintended writes into an inactive L2 table", 78005de7e86SMax Reitz }, 7816c1c8d5dSMax Reitz { 7820e4e4318SVladimir Sementsov-Ogievskiy .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 7830e4e4318SVladimir Sementsov-Ogievskiy .type = QEMU_OPT_BOOL, 7840e4e4318SVladimir Sementsov-Ogievskiy .help = "Check for unintended writes into the bitmap directory", 7850e4e4318SVladimir Sementsov-Ogievskiy }, 7860e4e4318SVladimir Sementsov-Ogievskiy { 7876c1c8d5dSMax Reitz .name = QCOW2_OPT_CACHE_SIZE, 7886c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 7896c1c8d5dSMax Reitz .help = "Maximum combined metadata (L2 tables and refcount blocks) " 7906c1c8d5dSMax Reitz "cache size", 7916c1c8d5dSMax Reitz }, 7926c1c8d5dSMax Reitz { 7936c1c8d5dSMax Reitz .name = QCOW2_OPT_L2_CACHE_SIZE, 7946c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 7956c1c8d5dSMax Reitz .help = "Maximum L2 table cache size", 7966c1c8d5dSMax Reitz }, 7976c1c8d5dSMax Reitz { 7981221fe6fSAlberto Garcia .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 7991221fe6fSAlberto Garcia .type = QEMU_OPT_SIZE, 8001221fe6fSAlberto Garcia .help = "Size of each entry in the L2 cache", 8011221fe6fSAlberto Garcia }, 8021221fe6fSAlberto Garcia { 8036c1c8d5dSMax Reitz .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 8046c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 8056c1c8d5dSMax Reitz .help = "Maximum refcount block cache size", 8066c1c8d5dSMax Reitz }, 807279621c0SAlberto Garcia { 808279621c0SAlberto Garcia .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 809279621c0SAlberto Garcia .type = QEMU_OPT_NUMBER, 810279621c0SAlberto Garcia .help = "Clean unused cache entries after this time (in seconds)", 811279621c0SAlberto Garcia }, 8124652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 8134652b8f3SDaniel P. Berrange "ID of secret providing qcow2 AES key or LUKS passphrase"), 81474c4510aSKevin Wolf { /* end of list */ } 81574c4510aSKevin Wolf }, 81674c4510aSKevin Wolf }; 81774c4510aSKevin Wolf 8184092e99dSMax Reitz static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 8194092e99dSMax Reitz [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 8204092e99dSMax Reitz [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 8214092e99dSMax Reitz [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 8224092e99dSMax Reitz [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 8234092e99dSMax Reitz [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 8244092e99dSMax Reitz [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 8254092e99dSMax Reitz [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 8264092e99dSMax Reitz [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 8270e4e4318SVladimir Sementsov-Ogievskiy [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 8284092e99dSMax Reitz }; 8294092e99dSMax Reitz 830279621c0SAlberto Garcia static void cache_clean_timer_cb(void *opaque) 831279621c0SAlberto Garcia { 832279621c0SAlberto Garcia BlockDriverState *bs = opaque; 833ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 834b2f68bffSAlberto Garcia qcow2_cache_clean_unused(s->l2_table_cache); 835b2f68bffSAlberto Garcia qcow2_cache_clean_unused(s->refcount_block_cache); 836279621c0SAlberto Garcia timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 837279621c0SAlberto Garcia (int64_t) s->cache_clean_interval * 1000); 838279621c0SAlberto Garcia } 839279621c0SAlberto Garcia 840279621c0SAlberto Garcia static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 841279621c0SAlberto Garcia { 842ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 843279621c0SAlberto Garcia if (s->cache_clean_interval > 0) { 844ad0ce642SPavel Dovgalyuk s->cache_clean_timer = 845ad0ce642SPavel Dovgalyuk aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL, 846ad0ce642SPavel Dovgalyuk SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, 847ad0ce642SPavel Dovgalyuk cache_clean_timer_cb, bs); 848279621c0SAlberto Garcia timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 849279621c0SAlberto Garcia (int64_t) s->cache_clean_interval * 1000); 850279621c0SAlberto Garcia } 851279621c0SAlberto Garcia } 852279621c0SAlberto Garcia 853279621c0SAlberto Garcia static void cache_clean_timer_del(BlockDriverState *bs) 854279621c0SAlberto Garcia { 855ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 856279621c0SAlberto Garcia if (s->cache_clean_timer) { 857279621c0SAlberto Garcia timer_free(s->cache_clean_timer); 858279621c0SAlberto Garcia s->cache_clean_timer = NULL; 859279621c0SAlberto Garcia } 860279621c0SAlberto Garcia } 861279621c0SAlberto Garcia 862279621c0SAlberto Garcia static void qcow2_detach_aio_context(BlockDriverState *bs) 863279621c0SAlberto Garcia { 864279621c0SAlberto Garcia cache_clean_timer_del(bs); 865279621c0SAlberto Garcia } 866279621c0SAlberto Garcia 867279621c0SAlberto Garcia static void qcow2_attach_aio_context(BlockDriverState *bs, 868279621c0SAlberto Garcia AioContext *new_context) 869279621c0SAlberto Garcia { 870279621c0SAlberto Garcia cache_clean_timer_init(bs, new_context); 871279621c0SAlberto Garcia } 872279621c0SAlberto Garcia 873772c4cadSVladimir Sementsov-Ogievskiy static bool read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 874bc85ef26SMax Reitz uint64_t *l2_cache_size, 8751221fe6fSAlberto Garcia uint64_t *l2_cache_entry_size, 8766c1c8d5dSMax Reitz uint64_t *refcount_cache_size, Error **errp) 8776c1c8d5dSMax Reitz { 878ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 879b749562dSLeonid Bloch uint64_t combined_cache_size, l2_cache_max_setting; 8806c1c8d5dSMax Reitz bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 881af39bd0dSAlberto Garcia bool l2_cache_entry_size_set; 8827af5eea9SAlberto Garcia int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 883b749562dSLeonid Bloch uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 884b70d0820SAlberto Garcia uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); 885b70d0820SAlberto Garcia /* An L2 table is always one cluster in size so the max cache size 886b70d0820SAlberto Garcia * should be a multiple of the cluster size. */ 887c8fd8554SAlberto Garcia uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s), 888b70d0820SAlberto Garcia s->cluster_size); 8896c1c8d5dSMax Reitz 8906c1c8d5dSMax Reitz combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 8916c1c8d5dSMax Reitz l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 8926c1c8d5dSMax Reitz refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 893af39bd0dSAlberto Garcia l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE); 8946c1c8d5dSMax Reitz 8956c1c8d5dSMax Reitz combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 896b749562dSLeonid Bloch l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 897b749562dSLeonid Bloch DEFAULT_L2_CACHE_MAX_SIZE); 8986c1c8d5dSMax Reitz *refcount_cache_size = qemu_opt_get_size(opts, 8996c1c8d5dSMax Reitz QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 9006c1c8d5dSMax Reitz 9011221fe6fSAlberto Garcia *l2_cache_entry_size = qemu_opt_get_size( 9021221fe6fSAlberto Garcia opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 9031221fe6fSAlberto Garcia 904b749562dSLeonid Bloch *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); 905b749562dSLeonid Bloch 9066c1c8d5dSMax Reitz if (combined_cache_size_set) { 9076c1c8d5dSMax Reitz if (l2_cache_size_set && refcount_cache_size_set) { 9086c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 9096c1c8d5dSMax Reitz " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 910308999e9SLeonid Bloch "at the same time"); 911772c4cadSVladimir Sementsov-Ogievskiy return false; 912b749562dSLeonid Bloch } else if (l2_cache_size_set && 913b749562dSLeonid Bloch (l2_cache_max_setting > combined_cache_size)) { 9146c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 9156c1c8d5dSMax Reitz QCOW2_OPT_CACHE_SIZE); 916772c4cadSVladimir Sementsov-Ogievskiy return false; 9176c1c8d5dSMax Reitz } else if (*refcount_cache_size > combined_cache_size) { 9186c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 9196c1c8d5dSMax Reitz QCOW2_OPT_CACHE_SIZE); 920772c4cadSVladimir Sementsov-Ogievskiy return false; 9216c1c8d5dSMax Reitz } 9226c1c8d5dSMax Reitz 9236c1c8d5dSMax Reitz if (l2_cache_size_set) { 9246c1c8d5dSMax Reitz *refcount_cache_size = combined_cache_size - *l2_cache_size; 9256c1c8d5dSMax Reitz } else if (refcount_cache_size_set) { 9266c1c8d5dSMax Reitz *l2_cache_size = combined_cache_size - *refcount_cache_size; 9276c1c8d5dSMax Reitz } else { 92852253998SAlberto Garcia /* Assign as much memory as possible to the L2 cache, and 92952253998SAlberto Garcia * use the remainder for the refcount cache */ 93052253998SAlberto Garcia if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 93152253998SAlberto Garcia *l2_cache_size = max_l2_cache; 93252253998SAlberto Garcia *refcount_cache_size = combined_cache_size - *l2_cache_size; 93352253998SAlberto Garcia } else { 93452253998SAlberto Garcia *refcount_cache_size = 93552253998SAlberto Garcia MIN(combined_cache_size, min_refcount_cache); 9366c1c8d5dSMax Reitz *l2_cache_size = combined_cache_size - *refcount_cache_size; 9376c1c8d5dSMax Reitz } 93852253998SAlberto Garcia } 9396c1c8d5dSMax Reitz } 940af39bd0dSAlberto Garcia 941af39bd0dSAlberto Garcia /* 942af39bd0dSAlberto Garcia * If the L2 cache is not enough to cover the whole disk then 943af39bd0dSAlberto Garcia * default to 4KB entries. Smaller entries reduce the cost of 944af39bd0dSAlberto Garcia * loads and evictions and increase I/O performance. 945af39bd0dSAlberto Garcia */ 946af39bd0dSAlberto Garcia if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) { 947af39bd0dSAlberto Garcia *l2_cache_entry_size = MIN(s->cluster_size, 4096); 948af39bd0dSAlberto Garcia } 949af39bd0dSAlberto Garcia 950657ada52SLeonid Bloch /* l2_cache_size and refcount_cache_size are ensured to have at least 951657ada52SLeonid Bloch * their minimum values in qcow2_update_options_prepare() */ 9521221fe6fSAlberto Garcia 9531221fe6fSAlberto Garcia if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 9541221fe6fSAlberto Garcia *l2_cache_entry_size > s->cluster_size || 9551221fe6fSAlberto Garcia !is_power_of_2(*l2_cache_entry_size)) { 9561221fe6fSAlberto Garcia error_setg(errp, "L2 cache entry size must be a power of two " 9571221fe6fSAlberto Garcia "between %d and the cluster size (%d)", 9581221fe6fSAlberto Garcia 1 << MIN_CLUSTER_BITS, s->cluster_size); 959772c4cadSVladimir Sementsov-Ogievskiy return false; 9601221fe6fSAlberto Garcia } 961772c4cadSVladimir Sementsov-Ogievskiy 962772c4cadSVladimir Sementsov-Ogievskiy return true; 9636c1c8d5dSMax Reitz } 9646c1c8d5dSMax Reitz 965ee55b173SKevin Wolf typedef struct Qcow2ReopenState { 966ee55b173SKevin Wolf Qcow2Cache *l2_table_cache; 967ee55b173SKevin Wolf Qcow2Cache *refcount_block_cache; 9683c2e511aSAlberto Garcia int l2_slice_size; /* Number of entries in a slice of the L2 table */ 969ee55b173SKevin Wolf bool use_lazy_refcounts; 970ee55b173SKevin Wolf int overlap_check; 971ee55b173SKevin Wolf bool discard_passthrough[QCOW2_DISCARD_MAX]; 972ee55b173SKevin Wolf uint64_t cache_clean_interval; 973b25b387fSDaniel P. Berrange QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 974ee55b173SKevin Wolf } Qcow2ReopenState; 975ee55b173SKevin Wolf 976ee55b173SKevin Wolf static int qcow2_update_options_prepare(BlockDriverState *bs, 977ee55b173SKevin Wolf Qcow2ReopenState *r, 978ee55b173SKevin Wolf QDict *options, int flags, 979ee55b173SKevin Wolf Error **errp) 9804c75d1a1SKevin Wolf { 9814c75d1a1SKevin Wolf BDRVQcow2State *s = bs->opaque; 98294edf3fbSKevin Wolf QemuOpts *opts = NULL; 9834c75d1a1SKevin Wolf const char *opt_overlap_check, *opt_overlap_check_template; 9844c75d1a1SKevin Wolf int overlap_check_template = 0; 9851221fe6fSAlberto Garcia uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 9864c75d1a1SKevin Wolf int i; 987b25b387fSDaniel P. Berrange const char *encryptfmt; 988b25b387fSDaniel P. Berrange QDict *encryptopts = NULL; 9894c75d1a1SKevin Wolf int ret; 9904c75d1a1SKevin Wolf 991b25b387fSDaniel P. Berrange qdict_extract_subqdict(options, &encryptopts, "encrypt."); 992b25b387fSDaniel P. Berrange encryptfmt = qdict_get_try_str(encryptopts, "format"); 993b25b387fSDaniel P. Berrange 99494edf3fbSKevin Wolf opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 995af175e85SMarkus Armbruster if (!qemu_opts_absorb_qdict(opts, options, errp)) { 99694edf3fbSKevin Wolf ret = -EINVAL; 99794edf3fbSKevin Wolf goto fail; 99894edf3fbSKevin Wolf } 99994edf3fbSKevin Wolf 100094edf3fbSKevin Wolf /* get L2 table/refcount block cache size from command line options */ 1001772c4cadSVladimir Sementsov-Ogievskiy if (!read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 1002772c4cadSVladimir Sementsov-Ogievskiy &refcount_cache_size, errp)) { 100394edf3fbSKevin Wolf ret = -EINVAL; 100494edf3fbSKevin Wolf goto fail; 100594edf3fbSKevin Wolf } 100694edf3fbSKevin Wolf 10071221fe6fSAlberto Garcia l2_cache_size /= l2_cache_entry_size; 100894edf3fbSKevin Wolf if (l2_cache_size < MIN_L2_CACHE_SIZE) { 100994edf3fbSKevin Wolf l2_cache_size = MIN_L2_CACHE_SIZE; 101094edf3fbSKevin Wolf } 101194edf3fbSKevin Wolf if (l2_cache_size > INT_MAX) { 101294edf3fbSKevin Wolf error_setg(errp, "L2 cache size too big"); 101394edf3fbSKevin Wolf ret = -EINVAL; 101494edf3fbSKevin Wolf goto fail; 101594edf3fbSKevin Wolf } 101694edf3fbSKevin Wolf 101794edf3fbSKevin Wolf refcount_cache_size /= s->cluster_size; 101894edf3fbSKevin Wolf if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 101994edf3fbSKevin Wolf refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 102094edf3fbSKevin Wolf } 102194edf3fbSKevin Wolf if (refcount_cache_size > INT_MAX) { 102294edf3fbSKevin Wolf error_setg(errp, "Refcount cache size too big"); 102394edf3fbSKevin Wolf ret = -EINVAL; 102494edf3fbSKevin Wolf goto fail; 102594edf3fbSKevin Wolf } 102694edf3fbSKevin Wolf 10275b0959a7SKevin Wolf /* alloc new L2 table/refcount block cache, flush old one */ 10285b0959a7SKevin Wolf if (s->l2_table_cache) { 10295b0959a7SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 10305b0959a7SKevin Wolf if (ret) { 10315b0959a7SKevin Wolf error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 10325b0959a7SKevin Wolf goto fail; 10335b0959a7SKevin Wolf } 10345b0959a7SKevin Wolf } 10355b0959a7SKevin Wolf 10365b0959a7SKevin Wolf if (s->refcount_block_cache) { 10375b0959a7SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 10385b0959a7SKevin Wolf if (ret) { 10395b0959a7SKevin Wolf error_setg_errno(errp, -ret, 10405b0959a7SKevin Wolf "Failed to flush the refcount block cache"); 10415b0959a7SKevin Wolf goto fail; 10425b0959a7SKevin Wolf } 10435b0959a7SKevin Wolf } 10445b0959a7SKevin Wolf 1045c8fd8554SAlberto Garcia r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s); 10461221fe6fSAlberto Garcia r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 10471221fe6fSAlberto Garcia l2_cache_entry_size); 10481221fe6fSAlberto Garcia r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 10491221fe6fSAlberto Garcia s->cluster_size); 1050ee55b173SKevin Wolf if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 105194edf3fbSKevin Wolf error_setg(errp, "Could not allocate metadata caches"); 105294edf3fbSKevin Wolf ret = -ENOMEM; 105394edf3fbSKevin Wolf goto fail; 105494edf3fbSKevin Wolf } 105594edf3fbSKevin Wolf 105694edf3fbSKevin Wolf /* New interval for cache cleanup timer */ 1057ee55b173SKevin Wolf r->cache_clean_interval = 10585b0959a7SKevin Wolf qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 1059e957b50bSLeonid Bloch DEFAULT_CACHE_CLEAN_INTERVAL); 106091203f08SAlberto Garcia #ifndef CONFIG_LINUX 106191203f08SAlberto Garcia if (r->cache_clean_interval != 0) { 106291203f08SAlberto Garcia error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 106391203f08SAlberto Garcia " not supported on this host"); 106491203f08SAlberto Garcia ret = -EINVAL; 106591203f08SAlberto Garcia goto fail; 106691203f08SAlberto Garcia } 106791203f08SAlberto Garcia #endif 1068ee55b173SKevin Wolf if (r->cache_clean_interval > UINT_MAX) { 106994edf3fbSKevin Wolf error_setg(errp, "Cache clean interval too big"); 107094edf3fbSKevin Wolf ret = -EINVAL; 107194edf3fbSKevin Wolf goto fail; 107294edf3fbSKevin Wolf } 107394edf3fbSKevin Wolf 10745b0959a7SKevin Wolf /* lazy-refcounts; flush if going from enabled to disabled */ 1075ee55b173SKevin Wolf r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 10764c75d1a1SKevin Wolf (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 1077ee55b173SKevin Wolf if (r->use_lazy_refcounts && s->qcow_version < 3) { 1078007dbc39SKevin Wolf error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 1079007dbc39SKevin Wolf "qemu 1.1 compatibility level"); 1080007dbc39SKevin Wolf ret = -EINVAL; 1081007dbc39SKevin Wolf goto fail; 1082007dbc39SKevin Wolf } 10834c75d1a1SKevin Wolf 10845b0959a7SKevin Wolf if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 10855b0959a7SKevin Wolf ret = qcow2_mark_clean(bs); 10865b0959a7SKevin Wolf if (ret < 0) { 10875b0959a7SKevin Wolf error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 10885b0959a7SKevin Wolf goto fail; 10895b0959a7SKevin Wolf } 10905b0959a7SKevin Wolf } 10915b0959a7SKevin Wolf 1092007dbc39SKevin Wolf /* Overlap check options */ 10934c75d1a1SKevin Wolf opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 10944c75d1a1SKevin Wolf opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 10954c75d1a1SKevin Wolf if (opt_overlap_check_template && opt_overlap_check && 10964c75d1a1SKevin Wolf strcmp(opt_overlap_check_template, opt_overlap_check)) 10974c75d1a1SKevin Wolf { 10984c75d1a1SKevin Wolf error_setg(errp, "Conflicting values for qcow2 options '" 10994c75d1a1SKevin Wolf QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 11004c75d1a1SKevin Wolf "' ('%s')", opt_overlap_check, opt_overlap_check_template); 11014c75d1a1SKevin Wolf ret = -EINVAL; 11024c75d1a1SKevin Wolf goto fail; 11034c75d1a1SKevin Wolf } 11044c75d1a1SKevin Wolf if (!opt_overlap_check) { 11054c75d1a1SKevin Wolf opt_overlap_check = opt_overlap_check_template ?: "cached"; 11064c75d1a1SKevin Wolf } 11074c75d1a1SKevin Wolf 11084c75d1a1SKevin Wolf if (!strcmp(opt_overlap_check, "none")) { 11094c75d1a1SKevin Wolf overlap_check_template = 0; 11104c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "constant")) { 11114c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_CONSTANT; 11124c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "cached")) { 11134c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_CACHED; 11144c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "all")) { 11154c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_ALL; 11164c75d1a1SKevin Wolf } else { 11174c75d1a1SKevin Wolf error_setg(errp, "Unsupported value '%s' for qcow2 option " 11184c75d1a1SKevin Wolf "'overlap-check'. Allowed are any of the following: " 11194c75d1a1SKevin Wolf "none, constant, cached, all", opt_overlap_check); 11204c75d1a1SKevin Wolf ret = -EINVAL; 11214c75d1a1SKevin Wolf goto fail; 11224c75d1a1SKevin Wolf } 11234c75d1a1SKevin Wolf 1124ee55b173SKevin Wolf r->overlap_check = 0; 11254c75d1a1SKevin Wolf for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 11264c75d1a1SKevin Wolf /* overlap-check defines a template bitmask, but every flag may be 11274c75d1a1SKevin Wolf * overwritten through the associated boolean option */ 1128ee55b173SKevin Wolf r->overlap_check |= 11294c75d1a1SKevin Wolf qemu_opt_get_bool(opts, overlap_bool_option_names[i], 11304c75d1a1SKevin Wolf overlap_check_template & (1 << i)) << i; 11314c75d1a1SKevin Wolf } 11324c75d1a1SKevin Wolf 1133ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1134ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1135ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1136007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1137007dbc39SKevin Wolf flags & BDRV_O_UNMAP); 1138ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1139007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1140ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1141007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1142007dbc39SKevin Wolf 1143b25b387fSDaniel P. Berrange switch (s->crypt_method_header) { 1144b25b387fSDaniel P. Berrange case QCOW_CRYPT_NONE: 1145b25b387fSDaniel P. Berrange if (encryptfmt) { 1146b25b387fSDaniel P. Berrange error_setg(errp, "No encryption in image header, but options " 1147b25b387fSDaniel P. Berrange "specified format '%s'", encryptfmt); 1148b25b387fSDaniel P. Berrange ret = -EINVAL; 1149b25b387fSDaniel P. Berrange goto fail; 1150b25b387fSDaniel P. Berrange } 1151b25b387fSDaniel P. Berrange break; 1152b25b387fSDaniel P. Berrange 1153b25b387fSDaniel P. Berrange case QCOW_CRYPT_AES: 1154b25b387fSDaniel P. Berrange if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1155b25b387fSDaniel P. Berrange error_setg(errp, 1156b25b387fSDaniel P. Berrange "Header reported 'aes' encryption format but " 1157b25b387fSDaniel P. Berrange "options specify '%s'", encryptfmt); 1158b25b387fSDaniel P. Berrange ret = -EINVAL; 1159b25b387fSDaniel P. Berrange goto fail; 1160b25b387fSDaniel P. Berrange } 1161796d3239SMarkus Armbruster qdict_put_str(encryptopts, "format", "qcow"); 1162796d3239SMarkus Armbruster r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 11631184b411SVladimir Sementsov-Ogievskiy if (!r->crypto_opts) { 11641184b411SVladimir Sementsov-Ogievskiy ret = -EINVAL; 11651184b411SVladimir Sementsov-Ogievskiy goto fail; 11661184b411SVladimir Sementsov-Ogievskiy } 1167b25b387fSDaniel P. Berrange break; 1168b25b387fSDaniel P. Berrange 11694652b8f3SDaniel P. Berrange case QCOW_CRYPT_LUKS: 11704652b8f3SDaniel P. Berrange if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 11714652b8f3SDaniel P. Berrange error_setg(errp, 11724652b8f3SDaniel P. Berrange "Header reported 'luks' encryption format but " 11734652b8f3SDaniel P. Berrange "options specify '%s'", encryptfmt); 11744652b8f3SDaniel P. Berrange ret = -EINVAL; 11754652b8f3SDaniel P. Berrange goto fail; 11764652b8f3SDaniel P. Berrange } 1177796d3239SMarkus Armbruster qdict_put_str(encryptopts, "format", "luks"); 1178796d3239SMarkus Armbruster r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 11791184b411SVladimir Sementsov-Ogievskiy if (!r->crypto_opts) { 11801184b411SVladimir Sementsov-Ogievskiy ret = -EINVAL; 11811184b411SVladimir Sementsov-Ogievskiy goto fail; 11821184b411SVladimir Sementsov-Ogievskiy } 11834652b8f3SDaniel P. Berrange break; 11844652b8f3SDaniel P. Berrange 1185b25b387fSDaniel P. Berrange default: 1186b25b387fSDaniel P. Berrange error_setg(errp, "Unsupported encryption method %d", 1187b25b387fSDaniel P. Berrange s->crypt_method_header); 1188b25b387fSDaniel P. Berrange ret = -EINVAL; 1189b25b387fSDaniel P. Berrange goto fail; 1190b25b387fSDaniel P. Berrange } 1191b25b387fSDaniel P. Berrange 11924c75d1a1SKevin Wolf ret = 0; 11934c75d1a1SKevin Wolf fail: 1194cb3e7f08SMarc-André Lureau qobject_unref(encryptopts); 119594edf3fbSKevin Wolf qemu_opts_del(opts); 119694edf3fbSKevin Wolf opts = NULL; 1197ee55b173SKevin Wolf return ret; 1198ee55b173SKevin Wolf } 1199ee55b173SKevin Wolf 1200ee55b173SKevin Wolf static void qcow2_update_options_commit(BlockDriverState *bs, 1201ee55b173SKevin Wolf Qcow2ReopenState *r) 1202ee55b173SKevin Wolf { 1203ee55b173SKevin Wolf BDRVQcow2State *s = bs->opaque; 1204ee55b173SKevin Wolf int i; 1205ee55b173SKevin Wolf 12065b0959a7SKevin Wolf if (s->l2_table_cache) { 1207e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 12085b0959a7SKevin Wolf } 12095b0959a7SKevin Wolf if (s->refcount_block_cache) { 1210e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 12115b0959a7SKevin Wolf } 1212ee55b173SKevin Wolf s->l2_table_cache = r->l2_table_cache; 1213ee55b173SKevin Wolf s->refcount_block_cache = r->refcount_block_cache; 12143c2e511aSAlberto Garcia s->l2_slice_size = r->l2_slice_size; 1215ee55b173SKevin Wolf 1216ee55b173SKevin Wolf s->overlap_check = r->overlap_check; 1217ee55b173SKevin Wolf s->use_lazy_refcounts = r->use_lazy_refcounts; 1218ee55b173SKevin Wolf 1219ee55b173SKevin Wolf for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1220ee55b173SKevin Wolf s->discard_passthrough[i] = r->discard_passthrough[i]; 1221ee55b173SKevin Wolf } 1222ee55b173SKevin Wolf 12235b0959a7SKevin Wolf if (s->cache_clean_interval != r->cache_clean_interval) { 12245b0959a7SKevin Wolf cache_clean_timer_del(bs); 1225ee55b173SKevin Wolf s->cache_clean_interval = r->cache_clean_interval; 1226ee55b173SKevin Wolf cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1227ee55b173SKevin Wolf } 1228b25b387fSDaniel P. Berrange 1229b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1230b25b387fSDaniel P. Berrange s->crypto_opts = r->crypto_opts; 12315b0959a7SKevin Wolf } 1232ee55b173SKevin Wolf 1233ee55b173SKevin Wolf static void qcow2_update_options_abort(BlockDriverState *bs, 1234ee55b173SKevin Wolf Qcow2ReopenState *r) 1235ee55b173SKevin Wolf { 1236ee55b173SKevin Wolf if (r->l2_table_cache) { 1237e64d4072SAlberto Garcia qcow2_cache_destroy(r->l2_table_cache); 1238ee55b173SKevin Wolf } 1239ee55b173SKevin Wolf if (r->refcount_block_cache) { 1240e64d4072SAlberto Garcia qcow2_cache_destroy(r->refcount_block_cache); 1241ee55b173SKevin Wolf } 1242b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1243ee55b173SKevin Wolf } 1244ee55b173SKevin Wolf 1245ee55b173SKevin Wolf static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1246ee55b173SKevin Wolf int flags, Error **errp) 1247ee55b173SKevin Wolf { 1248ee55b173SKevin Wolf Qcow2ReopenState r = {}; 1249ee55b173SKevin Wolf int ret; 1250ee55b173SKevin Wolf 1251ee55b173SKevin Wolf ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1252ee55b173SKevin Wolf if (ret >= 0) { 1253ee55b173SKevin Wolf qcow2_update_options_commit(bs, &r); 1254ee55b173SKevin Wolf } else { 1255ee55b173SKevin Wolf qcow2_update_options_abort(bs, &r); 1256ee55b173SKevin Wolf } 125794edf3fbSKevin Wolf 12584c75d1a1SKevin Wolf return ret; 12594c75d1a1SKevin Wolf } 12604c75d1a1SKevin Wolf 1261572ad978SDenis Plotnikov static int validate_compression_type(BDRVQcow2State *s, Error **errp) 1262572ad978SDenis Plotnikov { 1263572ad978SDenis Plotnikov switch (s->compression_type) { 1264572ad978SDenis Plotnikov case QCOW2_COMPRESSION_TYPE_ZLIB: 1265d298ac10SDenis Plotnikov #ifdef CONFIG_ZSTD 1266d298ac10SDenis Plotnikov case QCOW2_COMPRESSION_TYPE_ZSTD: 1267d298ac10SDenis Plotnikov #endif 1268572ad978SDenis Plotnikov break; 1269572ad978SDenis Plotnikov 1270572ad978SDenis Plotnikov default: 1271572ad978SDenis Plotnikov error_setg(errp, "qcow2: unknown compression type: %u", 1272572ad978SDenis Plotnikov s->compression_type); 1273572ad978SDenis Plotnikov return -ENOTSUP; 1274572ad978SDenis Plotnikov } 1275572ad978SDenis Plotnikov 1276572ad978SDenis Plotnikov /* 1277572ad978SDenis Plotnikov * if the compression type differs from QCOW2_COMPRESSION_TYPE_ZLIB 1278572ad978SDenis Plotnikov * the incompatible feature flag must be set 1279572ad978SDenis Plotnikov */ 1280572ad978SDenis Plotnikov if (s->compression_type == QCOW2_COMPRESSION_TYPE_ZLIB) { 1281572ad978SDenis Plotnikov if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) { 1282572ad978SDenis Plotnikov error_setg(errp, "qcow2: Compression type incompatible feature " 1283572ad978SDenis Plotnikov "bit must not be set"); 1284572ad978SDenis Plotnikov return -EINVAL; 1285572ad978SDenis Plotnikov } 1286572ad978SDenis Plotnikov } else { 1287572ad978SDenis Plotnikov if (!(s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION)) { 1288572ad978SDenis Plotnikov error_setg(errp, "qcow2: Compression type incompatible feature " 1289572ad978SDenis Plotnikov "bit must be set"); 1290572ad978SDenis Plotnikov return -EINVAL; 1291572ad978SDenis Plotnikov } 1292572ad978SDenis Plotnikov } 1293572ad978SDenis Plotnikov 1294572ad978SDenis Plotnikov return 0; 1295572ad978SDenis Plotnikov } 1296572ad978SDenis Plotnikov 12971fafcd93SPaolo Bonzini /* Called with s->lock held. */ 12981fafcd93SPaolo Bonzini static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 129906e9cd19SHanna Reitz int flags, bool open_data_file, 130006e9cd19SHanna Reitz Error **errp) 1301585f8587Sbellard { 1302bc520249SVladimir Sementsov-Ogievskiy ERRP_GUARD(); 1303ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 13046d33e8e7SKevin Wolf unsigned int len, i; 13056d33e8e7SKevin Wolf int ret = 0; 1306585f8587Sbellard QCowHeader header; 13079b80ddf3Saliguori uint64_t ext_end; 13082cf7cfa1SKevin Wolf uint64_t l1_vm_state_index; 130988ddffaeSVladimir Sementsov-Ogievskiy bool update_header = false; 1310585f8587Sbellard 131132cc71deSAlberto Faria ret = bdrv_pread(bs->file, 0, sizeof(header), &header, 0); 13126d85a57eSJes Sorensen if (ret < 0) { 13133ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1314585f8587Sbellard goto fail; 13156d85a57eSJes Sorensen } 13163b698f52SPeter Maydell header.magic = be32_to_cpu(header.magic); 13173b698f52SPeter Maydell header.version = be32_to_cpu(header.version); 13183b698f52SPeter Maydell header.backing_file_offset = be64_to_cpu(header.backing_file_offset); 13193b698f52SPeter Maydell header.backing_file_size = be32_to_cpu(header.backing_file_size); 13203b698f52SPeter Maydell header.size = be64_to_cpu(header.size); 13213b698f52SPeter Maydell header.cluster_bits = be32_to_cpu(header.cluster_bits); 13223b698f52SPeter Maydell header.crypt_method = be32_to_cpu(header.crypt_method); 13233b698f52SPeter Maydell header.l1_table_offset = be64_to_cpu(header.l1_table_offset); 13243b698f52SPeter Maydell header.l1_size = be32_to_cpu(header.l1_size); 13253b698f52SPeter Maydell header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset); 13263b698f52SPeter Maydell header.refcount_table_clusters = 13273b698f52SPeter Maydell be32_to_cpu(header.refcount_table_clusters); 13283b698f52SPeter Maydell header.snapshots_offset = be64_to_cpu(header.snapshots_offset); 13293b698f52SPeter Maydell header.nb_snapshots = be32_to_cpu(header.nb_snapshots); 1330585f8587Sbellard 1331e8cdcec1SKevin Wolf if (header.magic != QCOW_MAGIC) { 13323ef6c40aSMax Reitz error_setg(errp, "Image is not in qcow2 format"); 133376abe407SPaolo Bonzini ret = -EINVAL; 1334585f8587Sbellard goto fail; 13356d85a57eSJes Sorensen } 13366744cbabSKevin Wolf if (header.version < 2 || header.version > 3) { 1337a55448b3SMax Reitz error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1338e8cdcec1SKevin Wolf ret = -ENOTSUP; 1339e8cdcec1SKevin Wolf goto fail; 1340e8cdcec1SKevin Wolf } 13416744cbabSKevin Wolf 13426744cbabSKevin Wolf s->qcow_version = header.version; 13436744cbabSKevin Wolf 134424342f2cSKevin Wolf /* Initialise cluster size */ 134524342f2cSKevin Wolf if (header.cluster_bits < MIN_CLUSTER_BITS || 134624342f2cSKevin Wolf header.cluster_bits > MAX_CLUSTER_BITS) { 1347521b2b5dSMax Reitz error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1348521b2b5dSMax Reitz header.cluster_bits); 134924342f2cSKevin Wolf ret = -EINVAL; 135024342f2cSKevin Wolf goto fail; 135124342f2cSKevin Wolf } 135224342f2cSKevin Wolf 135324342f2cSKevin Wolf s->cluster_bits = header.cluster_bits; 135424342f2cSKevin Wolf s->cluster_size = 1 << s->cluster_bits; 135524342f2cSKevin Wolf 13566744cbabSKevin Wolf /* Initialise version 3 header fields */ 13576744cbabSKevin Wolf if (header.version == 2) { 13586744cbabSKevin Wolf header.incompatible_features = 0; 13596744cbabSKevin Wolf header.compatible_features = 0; 13606744cbabSKevin Wolf header.autoclear_features = 0; 13616744cbabSKevin Wolf header.refcount_order = 4; 13626744cbabSKevin Wolf header.header_length = 72; 13636744cbabSKevin Wolf } else { 13643b698f52SPeter Maydell header.incompatible_features = 13653b698f52SPeter Maydell be64_to_cpu(header.incompatible_features); 13663b698f52SPeter Maydell header.compatible_features = be64_to_cpu(header.compatible_features); 13673b698f52SPeter Maydell header.autoclear_features = be64_to_cpu(header.autoclear_features); 13683b698f52SPeter Maydell header.refcount_order = be32_to_cpu(header.refcount_order); 13693b698f52SPeter Maydell header.header_length = be32_to_cpu(header.header_length); 137024342f2cSKevin Wolf 137124342f2cSKevin Wolf if (header.header_length < 104) { 137224342f2cSKevin Wolf error_setg(errp, "qcow2 header too short"); 137324342f2cSKevin Wolf ret = -EINVAL; 137424342f2cSKevin Wolf goto fail; 137524342f2cSKevin Wolf } 137624342f2cSKevin Wolf } 137724342f2cSKevin Wolf 137824342f2cSKevin Wolf if (header.header_length > s->cluster_size) { 137924342f2cSKevin Wolf error_setg(errp, "qcow2 header exceeds cluster size"); 138024342f2cSKevin Wolf ret = -EINVAL; 138124342f2cSKevin Wolf goto fail; 13826744cbabSKevin Wolf } 13836744cbabSKevin Wolf 13846744cbabSKevin Wolf if (header.header_length > sizeof(header)) { 13856744cbabSKevin Wolf s->unknown_header_fields_size = header.header_length - sizeof(header); 13866744cbabSKevin Wolf s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 138732cc71deSAlberto Faria ret = bdrv_pread(bs->file, sizeof(header), 138832cc71deSAlberto Faria s->unknown_header_fields_size, 138932cc71deSAlberto Faria s->unknown_header_fields, 0); 13906744cbabSKevin Wolf if (ret < 0) { 13913ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 13923ef6c40aSMax Reitz "fields"); 13936744cbabSKevin Wolf goto fail; 13946744cbabSKevin Wolf } 13956744cbabSKevin Wolf } 13966744cbabSKevin Wolf 1397a1b3955cSKevin Wolf if (header.backing_file_offset > s->cluster_size) { 1398a1b3955cSKevin Wolf error_setg(errp, "Invalid backing file offset"); 1399a1b3955cSKevin Wolf ret = -EINVAL; 1400a1b3955cSKevin Wolf goto fail; 1401a1b3955cSKevin Wolf } 1402a1b3955cSKevin Wolf 1403cfcc4c62SKevin Wolf if (header.backing_file_offset) { 1404cfcc4c62SKevin Wolf ext_end = header.backing_file_offset; 1405cfcc4c62SKevin Wolf } else { 1406cfcc4c62SKevin Wolf ext_end = 1 << header.cluster_bits; 1407cfcc4c62SKevin Wolf } 1408cfcc4c62SKevin Wolf 14096744cbabSKevin Wolf /* Handle feature bits */ 14106744cbabSKevin Wolf s->incompatible_features = header.incompatible_features; 14116744cbabSKevin Wolf s->compatible_features = header.compatible_features; 14126744cbabSKevin Wolf s->autoclear_features = header.autoclear_features; 14136744cbabSKevin Wolf 1414572ad978SDenis Plotnikov /* 1415572ad978SDenis Plotnikov * Handle compression type 1416572ad978SDenis Plotnikov * Older qcow2 images don't contain the compression type header. 1417572ad978SDenis Plotnikov * Distinguish them by the header length and use 1418572ad978SDenis Plotnikov * the only valid (default) compression type in that case 1419572ad978SDenis Plotnikov */ 1420572ad978SDenis Plotnikov if (header.header_length > offsetof(QCowHeader, compression_type)) { 1421572ad978SDenis Plotnikov s->compression_type = header.compression_type; 1422572ad978SDenis Plotnikov } else { 1423572ad978SDenis Plotnikov s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; 1424572ad978SDenis Plotnikov } 1425572ad978SDenis Plotnikov 1426572ad978SDenis Plotnikov ret = validate_compression_type(s, errp); 1427572ad978SDenis Plotnikov if (ret) { 1428572ad978SDenis Plotnikov goto fail; 1429572ad978SDenis Plotnikov } 1430572ad978SDenis Plotnikov 1431c61d0004SStefan Hajnoczi if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1432cfcc4c62SKevin Wolf void *feature_table = NULL; 1433cfcc4c62SKevin Wolf qcow2_read_extensions(bs, header.header_length, ext_end, 143488ddffaeSVladimir Sementsov-Ogievskiy &feature_table, flags, NULL, NULL); 1435a55448b3SMax Reitz report_unsupported_feature(errp, feature_table, 1436c61d0004SStefan Hajnoczi s->incompatible_features & 1437c61d0004SStefan Hajnoczi ~QCOW2_INCOMPAT_MASK); 14386744cbabSKevin Wolf ret = -ENOTSUP; 1439c5a33ee9SPrasad Joshi g_free(feature_table); 14406744cbabSKevin Wolf goto fail; 14416744cbabSKevin Wolf } 14426744cbabSKevin Wolf 144369c98726SMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 144469c98726SMax Reitz /* Corrupt images may not be written to unless they are being repaired 144569c98726SMax Reitz */ 144669c98726SMax Reitz if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 14473ef6c40aSMax Reitz error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 14483ef6c40aSMax Reitz "read/write"); 144969c98726SMax Reitz ret = -EACCES; 145069c98726SMax Reitz goto fail; 145169c98726SMax Reitz } 145269c98726SMax Reitz } 145369c98726SMax Reitz 1454d0346b55SAlberto Garcia s->subclusters_per_cluster = 1455d0346b55SAlberto Garcia has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1; 1456d0346b55SAlberto Garcia s->subcluster_size = s->cluster_size / s->subclusters_per_cluster; 1457d0346b55SAlberto Garcia s->subcluster_bits = ctz32(s->subcluster_size); 1458d0346b55SAlberto Garcia 14597be20252SAlberto Garcia if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) { 14607be20252SAlberto Garcia error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size); 14617be20252SAlberto Garcia ret = -EINVAL; 14627be20252SAlberto Garcia goto fail; 14637be20252SAlberto Garcia } 14647be20252SAlberto Garcia 14656744cbabSKevin Wolf /* Check support for various header values */ 1466b72faf9fSMax Reitz if (header.refcount_order > 6) { 1467b72faf9fSMax Reitz error_setg(errp, "Reference count entry width too large; may not " 1468b72faf9fSMax Reitz "exceed 64 bits"); 1469b72faf9fSMax Reitz ret = -EINVAL; 14706744cbabSKevin Wolf goto fail; 14716744cbabSKevin Wolf } 1472b6481f37SMax Reitz s->refcount_order = header.refcount_order; 1473346a53dfSMax Reitz s->refcount_bits = 1 << s->refcount_order; 1474346a53dfSMax Reitz s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1475346a53dfSMax Reitz s->refcount_max += s->refcount_max - 1; 14766744cbabSKevin Wolf 1477585f8587Sbellard s->crypt_method_header = header.crypt_method; 14786d85a57eSJes Sorensen if (s->crypt_method_header) { 1479e6ff69bfSDaniel P. Berrange if (bdrv_uses_whitelist() && 1480e6ff69bfSDaniel P. Berrange s->crypt_method_header == QCOW_CRYPT_AES) { 14818c0dcbc4SDaniel P. Berrange error_setg(errp, 14828c0dcbc4SDaniel P. Berrange "Use of AES-CBC encrypted qcow2 images is no longer " 14838c0dcbc4SDaniel P. Berrange "supported in system emulators"); 14848c0dcbc4SDaniel P. Berrange error_append_hint(errp, 14858c0dcbc4SDaniel P. Berrange "You can use 'qemu-img convert' to convert your " 14868c0dcbc4SDaniel P. Berrange "image to an alternative supported format, such " 14878c0dcbc4SDaniel P. Berrange "as unencrypted qcow2, or raw with the LUKS " 14888c0dcbc4SDaniel P. Berrange "format instead.\n"); 14898c0dcbc4SDaniel P. Berrange ret = -ENOSYS; 14908c0dcbc4SDaniel P. Berrange goto fail; 1491e6ff69bfSDaniel P. Berrange } 1492e6ff69bfSDaniel P. Berrange 14934652b8f3SDaniel P. Berrange if (s->crypt_method_header == QCOW_CRYPT_AES) { 14944652b8f3SDaniel P. Berrange s->crypt_physical_offset = false; 14954652b8f3SDaniel P. Berrange } else { 14964652b8f3SDaniel P. Berrange /* Assuming LUKS and any future crypt methods we 14974652b8f3SDaniel P. Berrange * add will all use physical offsets, due to the 14984652b8f3SDaniel P. Berrange * fact that the alternative is insecure... */ 14994652b8f3SDaniel P. Berrange s->crypt_physical_offset = true; 15004652b8f3SDaniel P. Berrange } 15014652b8f3SDaniel P. Berrange 150254115412SEric Blake bs->encrypted = true; 15036d85a57eSJes Sorensen } 150424342f2cSKevin Wolf 1505c8fd8554SAlberto Garcia s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s)); 1506585f8587Sbellard s->l2_size = 1 << s->l2_bits; 15071d13d654SMax Reitz /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 15081d13d654SMax Reitz s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 15091d13d654SMax Reitz s->refcount_block_size = 1 << s->refcount_block_bits; 1510bd016b91SLeonid Bloch bs->total_sectors = header.size / BDRV_SECTOR_SIZE; 1511585f8587Sbellard s->csize_shift = (62 - (s->cluster_bits - 8)); 1512585f8587Sbellard s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1513585f8587Sbellard s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 15145dab2fadSKevin Wolf 1515585f8587Sbellard s->refcount_table_offset = header.refcount_table_offset; 1516585f8587Sbellard s->refcount_table_size = 1517585f8587Sbellard header.refcount_table_clusters << (s->cluster_bits - 3); 1518585f8587Sbellard 1519951053a9SAlberto Garcia if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1520951053a9SAlberto Garcia error_setg(errp, "Image does not contain a reference count table"); 1521951053a9SAlberto Garcia ret = -EINVAL; 1522951053a9SAlberto Garcia goto fail; 1523951053a9SAlberto Garcia } 1524951053a9SAlberto Garcia 15250cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, s->refcount_table_offset, 15260cf0e598SAlberto Garcia header.refcount_table_clusters, 15270cf0e598SAlberto Garcia s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 15280cf0e598SAlberto Garcia "Reference count table", errp); 15298c7de283SKevin Wolf if (ret < 0) { 15308c7de283SKevin Wolf goto fail; 15318c7de283SKevin Wolf } 15328c7de283SKevin Wolf 15338bc584feSMax Reitz if (!(flags & BDRV_O_CHECK)) { 15348bc584feSMax Reitz /* 15358bc584feSMax Reitz * The total size in bytes of the snapshot table is checked in 15360cf0e598SAlberto Garcia * qcow2_read_snapshots() because the size of each snapshot is 15370cf0e598SAlberto Garcia * variable and we don't know it yet. 15388bc584feSMax Reitz * Here we only check the offset and number of snapshots. 15398bc584feSMax Reitz */ 15400cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, header.snapshots_offset, 1541ce48f2f4SKevin Wolf header.nb_snapshots, 15420cf0e598SAlberto Garcia sizeof(QCowSnapshotHeader), 15438bc584feSMax Reitz sizeof(QCowSnapshotHeader) * 15448bc584feSMax Reitz QCOW_MAX_SNAPSHOTS, 15450cf0e598SAlberto Garcia "Snapshot table", errp); 1546ce48f2f4SKevin Wolf if (ret < 0) { 1547ce48f2f4SKevin Wolf goto fail; 1548ce48f2f4SKevin Wolf } 15498bc584feSMax Reitz } 1550ce48f2f4SKevin Wolf 1551585f8587Sbellard /* read the level 1 table */ 15520cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, header.l1_table_offset, 155302b1ecfaSAlberto Garcia header.l1_size, L1E_SIZE, 15540cf0e598SAlberto Garcia QCOW_MAX_L1_SIZE, "Active L1 table", errp); 15550cf0e598SAlberto Garcia if (ret < 0) { 15562d51c32cSKevin Wolf goto fail; 15572d51c32cSKevin Wolf } 1558585f8587Sbellard s->l1_size = header.l1_size; 15590cf0e598SAlberto Garcia s->l1_table_offset = header.l1_table_offset; 15602cf7cfa1SKevin Wolf 15612cf7cfa1SKevin Wolf l1_vm_state_index = size_to_l1(s, header.size); 15622cf7cfa1SKevin Wolf if (l1_vm_state_index > INT_MAX) { 15633ef6c40aSMax Reitz error_setg(errp, "Image is too big"); 15642cf7cfa1SKevin Wolf ret = -EFBIG; 15652cf7cfa1SKevin Wolf goto fail; 15662cf7cfa1SKevin Wolf } 15672cf7cfa1SKevin Wolf s->l1_vm_state_index = l1_vm_state_index; 15682cf7cfa1SKevin Wolf 1569585f8587Sbellard /* the L1 table must contain at least enough entries to put 1570585f8587Sbellard header.size bytes */ 15716d85a57eSJes Sorensen if (s->l1_size < s->l1_vm_state_index) { 15723ef6c40aSMax Reitz error_setg(errp, "L1 table is too small"); 15736d85a57eSJes Sorensen ret = -EINVAL; 1574585f8587Sbellard goto fail; 15756d85a57eSJes Sorensen } 15762d51c32cSKevin Wolf 1577d191d12dSStefan Weil if (s->l1_size > 0) { 157802b1ecfaSAlberto Garcia s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE); 1579de82815dSKevin Wolf if (s->l1_table == NULL) { 1580de82815dSKevin Wolf error_setg(errp, "Could not allocate L1 table"); 1581de82815dSKevin Wolf ret = -ENOMEM; 1582de82815dSKevin Wolf goto fail; 1583de82815dSKevin Wolf } 158432cc71deSAlberto Faria ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE, 158532cc71deSAlberto Faria s->l1_table, 0); 15866d85a57eSJes Sorensen if (ret < 0) { 15873ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read L1 table"); 1588585f8587Sbellard goto fail; 15896d85a57eSJes Sorensen } 1590585f8587Sbellard for(i = 0;i < s->l1_size; i++) { 15913b698f52SPeter Maydell s->l1_table[i] = be64_to_cpu(s->l1_table[i]); 1592585f8587Sbellard } 1593d191d12dSStefan Weil } 159429c1a730SKevin Wolf 159594edf3fbSKevin Wolf /* Parse driver-specific options */ 159694edf3fbSKevin Wolf ret = qcow2_update_options(bs, options, flags, errp); 159790efa0eaSKevin Wolf if (ret < 0) { 159890efa0eaSKevin Wolf goto fail; 159990efa0eaSKevin Wolf } 160090efa0eaSKevin Wolf 160106d9260fSAnthony Liguori s->flags = flags; 1602585f8587Sbellard 16036d85a57eSJes Sorensen ret = qcow2_refcount_init(bs); 16046d85a57eSJes Sorensen if (ret != 0) { 16053ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1606585f8587Sbellard goto fail; 16076d85a57eSJes Sorensen } 1608585f8587Sbellard 160972cf2d4fSBlue Swirl QLIST_INIT(&s->cluster_allocs); 16100b919faeSKevin Wolf QTAILQ_INIT(&s->discards); 1611f214978aSKevin Wolf 16129b80ddf3Saliguori /* read qcow2 extensions */ 16133ef6c40aSMax Reitz if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1614af175e85SMarkus Armbruster flags, &update_header, errp)) { 16156d85a57eSJes Sorensen ret = -EINVAL; 16169b80ddf3Saliguori goto fail; 16176d85a57eSJes Sorensen } 16189b80ddf3Saliguori 161906e9cd19SHanna Reitz if (open_data_file) { 16200e8c08beSKevin Wolf /* Open external data file */ 16218b1869daSMax Reitz s->data_file = bdrv_open_child(NULL, options, "data-file", bs, 16228b1869daSMax Reitz &child_of_bds, BDRV_CHILD_DATA, 1623bc520249SVladimir Sementsov-Ogievskiy true, errp); 1624bc520249SVladimir Sementsov-Ogievskiy if (*errp) { 16250e8c08beSKevin Wolf ret = -EINVAL; 16260e8c08beSKevin Wolf goto fail; 16270e8c08beSKevin Wolf } 16280e8c08beSKevin Wolf 16290e8c08beSKevin Wolf if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { 16309b890bdcSKevin Wolf if (!s->data_file && s->image_data_file) { 16319b890bdcSKevin Wolf s->data_file = bdrv_open_child(s->image_data_file, options, 16328b1869daSMax Reitz "data-file", bs, &child_of_bds, 16338b1869daSMax Reitz BDRV_CHILD_DATA, false, errp); 16349b890bdcSKevin Wolf if (!s->data_file) { 16359b890bdcSKevin Wolf ret = -EINVAL; 16369b890bdcSKevin Wolf goto fail; 16379b890bdcSKevin Wolf } 16389b890bdcSKevin Wolf } 16390e8c08beSKevin Wolf if (!s->data_file) { 16400e8c08beSKevin Wolf error_setg(errp, "'data-file' is required for this image"); 16410e8c08beSKevin Wolf ret = -EINVAL; 16420e8c08beSKevin Wolf goto fail; 16430e8c08beSKevin Wolf } 16448b1869daSMax Reitz 16458b1869daSMax Reitz /* No data here */ 16468b1869daSMax Reitz bs->file->role &= ~BDRV_CHILD_DATA; 16478b1869daSMax Reitz 16488b1869daSMax Reitz /* Must succeed because we have given up permissions if anything */ 16498b1869daSMax Reitz bdrv_child_refresh_perms(bs, bs->file, &error_abort); 16500e8c08beSKevin Wolf } else { 16510e8c08beSKevin Wolf if (s->data_file) { 165206e9cd19SHanna Reitz error_setg(errp, "'data-file' can only be set for images with " 165306e9cd19SHanna Reitz "an external data file"); 16540e8c08beSKevin Wolf ret = -EINVAL; 16550e8c08beSKevin Wolf goto fail; 16566c3944dcSKevin Wolf } 16576c3944dcSKevin Wolf 165893c24936SKevin Wolf s->data_file = bs->file; 16596c3944dcSKevin Wolf 16606c3944dcSKevin Wolf if (data_file_is_raw(bs)) { 16616c3944dcSKevin Wolf error_setg(errp, "data-file-raw requires a data file"); 16626c3944dcSKevin Wolf ret = -EINVAL; 16636c3944dcSKevin Wolf goto fail; 16640e8c08beSKevin Wolf } 16650e8c08beSKevin Wolf } 166606e9cd19SHanna Reitz } 166793c24936SKevin Wolf 16684652b8f3SDaniel P. Berrange /* qcow2_read_extension may have set up the crypto context 16694652b8f3SDaniel P. Berrange * if the crypt method needs a header region, some methods 16704652b8f3SDaniel P. Berrange * don't need header extensions, so must check here 16714652b8f3SDaniel P. Berrange */ 16724652b8f3SDaniel P. Berrange if (s->crypt_method_header && !s->crypto) { 1673b25b387fSDaniel P. Berrange if (s->crypt_method_header == QCOW_CRYPT_AES) { 1674b25b387fSDaniel P. Berrange unsigned int cflags = 0; 1675b25b387fSDaniel P. Berrange if (flags & BDRV_O_NO_IO) { 1676b25b387fSDaniel P. Berrange cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1677b25b387fSDaniel P. Berrange } 16781cd9a787SDaniel P. Berrange s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 16798ac0f15fSVladimir Sementsov-Ogievskiy NULL, NULL, cflags, 16808ac0f15fSVladimir Sementsov-Ogievskiy QCOW2_MAX_THREADS, errp); 1681b25b387fSDaniel P. Berrange if (!s->crypto) { 1682b25b387fSDaniel P. Berrange ret = -EINVAL; 1683b25b387fSDaniel P. Berrange goto fail; 1684b25b387fSDaniel P. Berrange } 16854652b8f3SDaniel P. Berrange } else if (!(flags & BDRV_O_NO_IO)) { 16864652b8f3SDaniel P. Berrange error_setg(errp, "Missing CRYPTO header for crypt method %d", 16874652b8f3SDaniel P. Berrange s->crypt_method_header); 16884652b8f3SDaniel P. Berrange ret = -EINVAL; 16894652b8f3SDaniel P. Berrange goto fail; 16904652b8f3SDaniel P. Berrange } 1691b25b387fSDaniel P. Berrange } 1692b25b387fSDaniel P. Berrange 1693585f8587Sbellard /* read the backing file name */ 1694585f8587Sbellard if (header.backing_file_offset != 0) { 1695585f8587Sbellard len = header.backing_file_size; 16969a29e18fSJeff Cody if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1697e729fa6aSJeff Cody len >= sizeof(bs->backing_file)) { 16986d33e8e7SKevin Wolf error_setg(errp, "Backing file name too long"); 16996d33e8e7SKevin Wolf ret = -EINVAL; 17006d33e8e7SKevin Wolf goto fail; 17016d85a57eSJes Sorensen } 170232cc71deSAlberto Faria ret = bdrv_pread(bs->file, header.backing_file_offset, len, 170332cc71deSAlberto Faria bs->auto_backing_file, 0); 17046d85a57eSJes Sorensen if (ret < 0) { 17053ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read backing file name"); 1706585f8587Sbellard goto fail; 17076d85a57eSJes Sorensen } 1708998c2019SMax Reitz bs->auto_backing_file[len] = '\0'; 1709998c2019SMax Reitz pstrcpy(bs->backing_file, sizeof(bs->backing_file), 1710998c2019SMax Reitz bs->auto_backing_file); 1711998c2019SMax Reitz s->image_backing_file = g_strdup(bs->auto_backing_file); 1712585f8587Sbellard } 171342deb29fSKevin Wolf 17148bc584feSMax Reitz /* 17158bc584feSMax Reitz * Internal snapshots; skip reading them in check mode, because 17168bc584feSMax Reitz * we do not need them then, and we do not want to abort because 17178bc584feSMax Reitz * of a broken table. 17188bc584feSMax Reitz */ 17198bc584feSMax Reitz if (!(flags & BDRV_O_CHECK)) { 172011b128f4SKevin Wolf s->snapshots_offset = header.snapshots_offset; 172111b128f4SKevin Wolf s->nb_snapshots = header.nb_snapshots; 172211b128f4SKevin Wolf 1723ecf6c7c0SMax Reitz ret = qcow2_read_snapshots(bs, errp); 172442deb29fSKevin Wolf if (ret < 0) { 1725585f8587Sbellard goto fail; 17266d85a57eSJes Sorensen } 17278bc584feSMax Reitz } 1728585f8587Sbellard 1729af7b708dSStefan Hajnoczi /* Clear unknown autoclear feature bits */ 173088ddffaeSVladimir Sementsov-Ogievskiy update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1731307261b2SVladimir Sementsov-Ogievskiy update_header = update_header && bdrv_is_writable(bs); 1732d1258dd0SVladimir Sementsov-Ogievskiy if (update_header) { 173388ddffaeSVladimir Sementsov-Ogievskiy s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1734d1258dd0SVladimir Sementsov-Ogievskiy } 1735d1258dd0SVladimir Sementsov-Ogievskiy 17369c98f145SVladimir Sementsov-Ogievskiy /* == Handle persistent dirty bitmaps == 17379c98f145SVladimir Sementsov-Ogievskiy * 17389c98f145SVladimir Sementsov-Ogievskiy * We want load dirty bitmaps in three cases: 17399c98f145SVladimir Sementsov-Ogievskiy * 17409c98f145SVladimir Sementsov-Ogievskiy * 1. Normal open of the disk in active mode, not related to invalidation 17419c98f145SVladimir Sementsov-Ogievskiy * after migration. 17429c98f145SVladimir Sementsov-Ogievskiy * 17439c98f145SVladimir Sementsov-Ogievskiy * 2. Invalidation of the target vm after pre-copy phase of migration, if 17449c98f145SVladimir Sementsov-Ogievskiy * bitmaps are _not_ migrating through migration channel, i.e. 17459c98f145SVladimir Sementsov-Ogievskiy * 'dirty-bitmaps' capability is disabled. 17469c98f145SVladimir Sementsov-Ogievskiy * 17479c98f145SVladimir Sementsov-Ogievskiy * 3. Invalidation of source vm after failed or canceled migration. 17489c98f145SVladimir Sementsov-Ogievskiy * This is a very interesting case. There are two possible types of 17499c98f145SVladimir Sementsov-Ogievskiy * bitmaps: 17509c98f145SVladimir Sementsov-Ogievskiy * 17519c98f145SVladimir Sementsov-Ogievskiy * A. Stored on inactivation and removed. They should be loaded from the 17529c98f145SVladimir Sementsov-Ogievskiy * image. 17539c98f145SVladimir Sementsov-Ogievskiy * 17549c98f145SVladimir Sementsov-Ogievskiy * B. Not stored: not-persistent bitmaps and bitmaps, migrated through 17559c98f145SVladimir Sementsov-Ogievskiy * the migration channel (with dirty-bitmaps capability). 17569c98f145SVladimir Sementsov-Ogievskiy * 17579c98f145SVladimir Sementsov-Ogievskiy * On the other hand, there are two possible sub-cases: 17589c98f145SVladimir Sementsov-Ogievskiy * 17599c98f145SVladimir Sementsov-Ogievskiy * 3.1 disk was changed by somebody else while were inactive. In this 17609c98f145SVladimir Sementsov-Ogievskiy * case all in-RAM dirty bitmaps (both persistent and not) are 17619c98f145SVladimir Sementsov-Ogievskiy * definitely invalid. And we don't have any method to determine 17629c98f145SVladimir Sementsov-Ogievskiy * this. 17639c98f145SVladimir Sementsov-Ogievskiy * 17649c98f145SVladimir Sementsov-Ogievskiy * Simple and safe thing is to just drop all the bitmaps of type B on 17659c98f145SVladimir Sementsov-Ogievskiy * inactivation. But in this case we lose bitmaps in valid 4.2 case. 17669c98f145SVladimir Sementsov-Ogievskiy * 17679c98f145SVladimir Sementsov-Ogievskiy * On the other hand, resuming source vm, if disk was already changed 17689c98f145SVladimir Sementsov-Ogievskiy * is a bad thing anyway: not only bitmaps, the whole vm state is 17699c98f145SVladimir Sementsov-Ogievskiy * out of sync with disk. 17709c98f145SVladimir Sementsov-Ogievskiy * 17719c98f145SVladimir Sementsov-Ogievskiy * This means, that user or management tool, who for some reason 17729c98f145SVladimir Sementsov-Ogievskiy * decided to resume source vm, after disk was already changed by 17739c98f145SVladimir Sementsov-Ogievskiy * target vm, should at least drop all dirty bitmaps by hand. 17749c98f145SVladimir Sementsov-Ogievskiy * 17759c98f145SVladimir Sementsov-Ogievskiy * So, we can ignore this case for now, but TODO: "generation" 17769c98f145SVladimir Sementsov-Ogievskiy * extension for qcow2, to determine, that image was changed after 17779c98f145SVladimir Sementsov-Ogievskiy * last inactivation. And if it is changed, we will drop (or at least 17789c98f145SVladimir Sementsov-Ogievskiy * mark as 'invalid' all the bitmaps of type B, both persistent 17799c98f145SVladimir Sementsov-Ogievskiy * and not). 17809c98f145SVladimir Sementsov-Ogievskiy * 17819c98f145SVladimir Sementsov-Ogievskiy * 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved 17829c98f145SVladimir Sementsov-Ogievskiy * to disk ('dirty-bitmaps' capability disabled), or not saved 17839c98f145SVladimir Sementsov-Ogievskiy * ('dirty-bitmaps' capability enabled), but we don't need to care 17849c98f145SVladimir Sementsov-Ogievskiy * of: let's load bitmaps as always: stored bitmaps will be loaded, 17859c98f145SVladimir Sementsov-Ogievskiy * and not stored has flag IN_USE=1 in the image and will be skipped 17869c98f145SVladimir Sementsov-Ogievskiy * on loading. 17879c98f145SVladimir Sementsov-Ogievskiy * 17889c98f145SVladimir Sementsov-Ogievskiy * One remaining possible case when we don't want load bitmaps: 17899c98f145SVladimir Sementsov-Ogievskiy * 17909c98f145SVladimir Sementsov-Ogievskiy * 4. Open disk in inactive mode in target vm (bitmaps are migrating or 17919c98f145SVladimir Sementsov-Ogievskiy * will be loaded on invalidation, no needs try loading them before) 17929c98f145SVladimir Sementsov-Ogievskiy */ 17939c98f145SVladimir Sementsov-Ogievskiy 17949c98f145SVladimir Sementsov-Ogievskiy if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) { 17959c98f145SVladimir Sementsov-Ogievskiy /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */ 17960c1e9d2aSVladimir Sementsov-Ogievskiy bool header_updated; 17970c1e9d2aSVladimir Sementsov-Ogievskiy if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) { 1798d1258dd0SVladimir Sementsov-Ogievskiy ret = -EINVAL; 1799d1258dd0SVladimir Sementsov-Ogievskiy goto fail; 1800d1258dd0SVladimir Sementsov-Ogievskiy } 1801d1258dd0SVladimir Sementsov-Ogievskiy 180266be5c3eSTuguoyi update_header = update_header && !header_updated; 180366be5c3eSTuguoyi } 180466be5c3eSTuguoyi 1805d1258dd0SVladimir Sementsov-Ogievskiy if (update_header) { 1806af7b708dSStefan Hajnoczi ret = qcow2_update_header(bs); 1807af7b708dSStefan Hajnoczi if (ret < 0) { 18083ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1809af7b708dSStefan Hajnoczi goto fail; 1810af7b708dSStefan Hajnoczi } 1811af7b708dSStefan Hajnoczi } 1812af7b708dSStefan Hajnoczi 18133b650816SKevin Wolf bs->supported_zero_flags = header.version >= 3 ? 18143b650816SKevin Wolf BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0; 1815f01643fbSKevin Wolf bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; 181668d100e9SKevin Wolf 1817c61d0004SStefan Hajnoczi /* Repair image if dirty */ 1818307261b2SVladimir Sementsov-Ogievskiy if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && 1819058f8f16SStefan Hajnoczi (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1820c61d0004SStefan Hajnoczi BdrvCheckResult result = {0}; 1821c61d0004SStefan Hajnoczi 18222fd61638SPaolo Bonzini ret = qcow2_co_check_locked(bs, &result, 18232fd61638SPaolo Bonzini BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1824791fff50SMax Reitz if (ret < 0 || result.check_errors) { 1825791fff50SMax Reitz if (ret >= 0) { 1826791fff50SMax Reitz ret = -EIO; 1827791fff50SMax Reitz } 18283ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not repair dirty image"); 1829c61d0004SStefan Hajnoczi goto fail; 1830c61d0004SStefan Hajnoczi } 1831c61d0004SStefan Hajnoczi } 1832c61d0004SStefan Hajnoczi 1833585f8587Sbellard #ifdef DEBUG_ALLOC 18346cbc3031SPhilipp Hahn { 18356cbc3031SPhilipp Hahn BdrvCheckResult result = {0}; 1836b35278f7SStefan Hajnoczi qcow2_check_refcounts(bs, &result, 0); 18376cbc3031SPhilipp Hahn } 1838585f8587Sbellard #endif 1839ceb029cdSVladimir Sementsov-Ogievskiy 18406f13a316SVladimir Sementsov-Ogievskiy qemu_co_queue_init(&s->thread_task_queue); 1841ceb029cdSVladimir Sementsov-Ogievskiy 18426d85a57eSJes Sorensen return ret; 1843585f8587Sbellard 1844585f8587Sbellard fail: 18459b890bdcSKevin Wolf g_free(s->image_data_file); 184606e9cd19SHanna Reitz if (open_data_file && has_data_file(bs)) { 18470e8c08beSKevin Wolf bdrv_unref_child(bs, s->data_file); 1848808cf3cbSVladimir Sementsov-Ogievskiy s->data_file = NULL; 18490e8c08beSKevin Wolf } 18506744cbabSKevin Wolf g_free(s->unknown_header_fields); 185175bab85cSKevin Wolf cleanup_unknown_header_ext(bs); 1852ed6ccf0fSKevin Wolf qcow2_free_snapshots(bs); 1853ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 1854de82815dSKevin Wolf qemu_vfree(s->l1_table); 1855cf93980eSMax Reitz /* else pre-write overlap checks in cache_destroy may crash */ 1856cf93980eSMax Reitz s->l1_table = NULL; 1857279621c0SAlberto Garcia cache_clean_timer_del(bs); 185829c1a730SKevin Wolf if (s->l2_table_cache) { 1859e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 186029c1a730SKevin Wolf } 1861c5a33ee9SPrasad Joshi if (s->refcount_block_cache) { 1862e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 1863c5a33ee9SPrasad Joshi } 1864b25b387fSDaniel P. Berrange qcrypto_block_free(s->crypto); 1865b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 18666d85a57eSJes Sorensen return ret; 1867585f8587Sbellard } 1868585f8587Sbellard 18691fafcd93SPaolo Bonzini typedef struct QCow2OpenCo { 18701fafcd93SPaolo Bonzini BlockDriverState *bs; 18711fafcd93SPaolo Bonzini QDict *options; 18721fafcd93SPaolo Bonzini int flags; 18731fafcd93SPaolo Bonzini Error **errp; 18741fafcd93SPaolo Bonzini int ret; 18751fafcd93SPaolo Bonzini } QCow2OpenCo; 18761fafcd93SPaolo Bonzini 18771fafcd93SPaolo Bonzini static void coroutine_fn qcow2_open_entry(void *opaque) 18781fafcd93SPaolo Bonzini { 18791fafcd93SPaolo Bonzini QCow2OpenCo *qoc = opaque; 18801fafcd93SPaolo Bonzini BDRVQcow2State *s = qoc->bs->opaque; 18811fafcd93SPaolo Bonzini 18821fafcd93SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 188306e9cd19SHanna Reitz qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true, 188406e9cd19SHanna Reitz qoc->errp); 18851fafcd93SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 18861fafcd93SPaolo Bonzini } 18871fafcd93SPaolo Bonzini 18884e4bf5c4SKevin Wolf static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 18894e4bf5c4SKevin Wolf Error **errp) 18904e4bf5c4SKevin Wolf { 18911fafcd93SPaolo Bonzini BDRVQcow2State *s = bs->opaque; 18921fafcd93SPaolo Bonzini QCow2OpenCo qoc = { 18931fafcd93SPaolo Bonzini .bs = bs, 18941fafcd93SPaolo Bonzini .options = options, 18951fafcd93SPaolo Bonzini .flags = flags, 18961fafcd93SPaolo Bonzini .errp = errp, 18971fafcd93SPaolo Bonzini .ret = -EINPROGRESS 18981fafcd93SPaolo Bonzini }; 18991fafcd93SPaolo Bonzini 19008b1869daSMax Reitz bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, 19018b1869daSMax Reitz BDRV_CHILD_IMAGE, false, errp); 19024e4bf5c4SKevin Wolf if (!bs->file) { 19034e4bf5c4SKevin Wolf return -EINVAL; 19044e4bf5c4SKevin Wolf } 19054e4bf5c4SKevin Wolf 19061fafcd93SPaolo Bonzini /* Initialise locks */ 19071fafcd93SPaolo Bonzini qemu_co_mutex_init(&s->lock); 19081fafcd93SPaolo Bonzini 19091fafcd93SPaolo Bonzini if (qemu_in_coroutine()) { 19101fafcd93SPaolo Bonzini /* From bdrv_co_create. */ 19111fafcd93SPaolo Bonzini qcow2_open_entry(&qoc); 19121fafcd93SPaolo Bonzini } else { 19134720cbeeSKevin Wolf assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 19141fafcd93SPaolo Bonzini qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 19151fafcd93SPaolo Bonzini BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 19161fafcd93SPaolo Bonzini } 19171fafcd93SPaolo Bonzini return qoc.ret; 19184e4bf5c4SKevin Wolf } 19194e4bf5c4SKevin Wolf 19203baca891SKevin Wolf static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1921d34682cdSKevin Wolf { 1922ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1923d34682cdSKevin Wolf 1924a84178ccSEric Blake if (bs->encrypted) { 1925a84178ccSEric Blake /* Encryption works on a sector granularity */ 19266f8f015cSAlberto Garcia bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); 1927a84178ccSEric Blake } 1928a6841a2dSAlberto Garcia bs->bl.pwrite_zeroes_alignment = s->subcluster_size; 1929ecdbead6SEric Blake bs->bl.pdiscard_alignment = s->cluster_size; 1930d34682cdSKevin Wolf } 1931d34682cdSKevin Wolf 193221d82ac9SJeff Cody static int qcow2_reopen_prepare(BDRVReopenState *state, 193321d82ac9SJeff Cody BlockReopenQueue *queue, Error **errp) 193421d82ac9SJeff Cody { 1935bcfd86d6SKevin Wolf BDRVQcow2State *s = state->bs->opaque; 19365b0959a7SKevin Wolf Qcow2ReopenState *r; 19374c2e5f8fSKevin Wolf int ret; 19384c2e5f8fSKevin Wolf 19395b0959a7SKevin Wolf r = g_new0(Qcow2ReopenState, 1); 19405b0959a7SKevin Wolf state->opaque = r; 19415b0959a7SKevin Wolf 19425b0959a7SKevin Wolf ret = qcow2_update_options_prepare(state->bs, r, state->options, 19435b0959a7SKevin Wolf state->flags, errp); 19445b0959a7SKevin Wolf if (ret < 0) { 19455b0959a7SKevin Wolf goto fail; 19465b0959a7SKevin Wolf } 19475b0959a7SKevin Wolf 19485b0959a7SKevin Wolf /* We need to write out any unwritten data if we reopen read-only. */ 19494c2e5f8fSKevin Wolf if ((state->flags & BDRV_O_RDWR) == 0) { 1950169b8793SVladimir Sementsov-Ogievskiy ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1951169b8793SVladimir Sementsov-Ogievskiy if (ret < 0) { 1952169b8793SVladimir Sementsov-Ogievskiy goto fail; 1953169b8793SVladimir Sementsov-Ogievskiy } 1954169b8793SVladimir Sementsov-Ogievskiy 19554c2e5f8fSKevin Wolf ret = bdrv_flush(state->bs); 19564c2e5f8fSKevin Wolf if (ret < 0) { 19575b0959a7SKevin Wolf goto fail; 19584c2e5f8fSKevin Wolf } 19594c2e5f8fSKevin Wolf 19604c2e5f8fSKevin Wolf ret = qcow2_mark_clean(state->bs); 19614c2e5f8fSKevin Wolf if (ret < 0) { 19625b0959a7SKevin Wolf goto fail; 19634c2e5f8fSKevin Wolf } 19644c2e5f8fSKevin Wolf } 19654c2e5f8fSKevin Wolf 1966bcfd86d6SKevin Wolf /* 1967bcfd86d6SKevin Wolf * Without an external data file, s->data_file points to the same BdrvChild 1968bcfd86d6SKevin Wolf * as bs->file. It needs to be resynced after reopen because bs->file may 1969bcfd86d6SKevin Wolf * be changed. We can't use it in the meantime. 1970bcfd86d6SKevin Wolf */ 1971bcfd86d6SKevin Wolf if (!has_data_file(state->bs)) { 1972bcfd86d6SKevin Wolf assert(s->data_file == state->bs->file); 1973bcfd86d6SKevin Wolf s->data_file = NULL; 1974bcfd86d6SKevin Wolf } 1975bcfd86d6SKevin Wolf 197621d82ac9SJeff Cody return 0; 19775b0959a7SKevin Wolf 19785b0959a7SKevin Wolf fail: 19795b0959a7SKevin Wolf qcow2_update_options_abort(state->bs, r); 19805b0959a7SKevin Wolf g_free(r); 19815b0959a7SKevin Wolf return ret; 19825b0959a7SKevin Wolf } 19835b0959a7SKevin Wolf 19845b0959a7SKevin Wolf static void qcow2_reopen_commit(BDRVReopenState *state) 19855b0959a7SKevin Wolf { 1986bcfd86d6SKevin Wolf BDRVQcow2State *s = state->bs->opaque; 1987bcfd86d6SKevin Wolf 19885b0959a7SKevin Wolf qcow2_update_options_commit(state->bs, state->opaque); 1989bcfd86d6SKevin Wolf if (!s->data_file) { 1990bcfd86d6SKevin Wolf /* 1991bcfd86d6SKevin Wolf * If we don't have an external data file, s->data_file was cleared by 1992bcfd86d6SKevin Wolf * qcow2_reopen_prepare() and needs to be updated. 1993bcfd86d6SKevin Wolf */ 1994bcfd86d6SKevin Wolf s->data_file = state->bs->file; 1995bcfd86d6SKevin Wolf } 199665eb7c85SPeter Krempa g_free(state->opaque); 199765eb7c85SPeter Krempa } 199865eb7c85SPeter Krempa 199965eb7c85SPeter Krempa static void qcow2_reopen_commit_post(BDRVReopenState *state) 200065eb7c85SPeter Krempa { 20014dd09f62SVladimir Sementsov-Ogievskiy if (state->flags & BDRV_O_RDWR) { 20024dd09f62SVladimir Sementsov-Ogievskiy Error *local_err = NULL; 20034dd09f62SVladimir Sementsov-Ogievskiy 20044dd09f62SVladimir Sementsov-Ogievskiy if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) { 20054dd09f62SVladimir Sementsov-Ogievskiy /* 20064dd09f62SVladimir Sementsov-Ogievskiy * This is not fatal, bitmaps just left read-only, so all following 20074dd09f62SVladimir Sementsov-Ogievskiy * writes will fail. User can remove read-only bitmaps to unblock 20084dd09f62SVladimir Sementsov-Ogievskiy * writes or retry reopen. 20094dd09f62SVladimir Sementsov-Ogievskiy */ 20104dd09f62SVladimir Sementsov-Ogievskiy error_reportf_err(local_err, 20114dd09f62SVladimir Sementsov-Ogievskiy "%s: Failed to make dirty bitmaps writable: ", 20124dd09f62SVladimir Sementsov-Ogievskiy bdrv_get_node_name(state->bs)); 20134dd09f62SVladimir Sementsov-Ogievskiy } 20144dd09f62SVladimir Sementsov-Ogievskiy } 20155b0959a7SKevin Wolf } 20165b0959a7SKevin Wolf 20175b0959a7SKevin Wolf static void qcow2_reopen_abort(BDRVReopenState *state) 20185b0959a7SKevin Wolf { 2019bcfd86d6SKevin Wolf BDRVQcow2State *s = state->bs->opaque; 2020bcfd86d6SKevin Wolf 2021bcfd86d6SKevin Wolf if (!s->data_file) { 2022bcfd86d6SKevin Wolf /* 2023bcfd86d6SKevin Wolf * If we don't have an external data file, s->data_file was cleared by 2024bcfd86d6SKevin Wolf * qcow2_reopen_prepare() and needs to be restored. 2025bcfd86d6SKevin Wolf */ 2026bcfd86d6SKevin Wolf s->data_file = state->bs->file; 2027bcfd86d6SKevin Wolf } 20285b0959a7SKevin Wolf qcow2_update_options_abort(state->bs, state->opaque); 20295b0959a7SKevin Wolf g_free(state->opaque); 203021d82ac9SJeff Cody } 203121d82ac9SJeff Cody 20325365f44dSKevin Wolf static void qcow2_join_options(QDict *options, QDict *old_options) 20335365f44dSKevin Wolf { 20345365f44dSKevin Wolf bool has_new_overlap_template = 20355365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_OVERLAP) || 20365365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 20375365f44dSKevin Wolf bool has_new_total_cache_size = 20385365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 20395365f44dSKevin Wolf bool has_all_cache_options; 20405365f44dSKevin Wolf 20415365f44dSKevin Wolf /* New overlap template overrides all old overlap options */ 20425365f44dSKevin Wolf if (has_new_overlap_template) { 20435365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP); 20445365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 20455365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 20465365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 20475365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 20485365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 20495365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 20505365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 20515365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 20525365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 20535365f44dSKevin Wolf } 20545365f44dSKevin Wolf 20555365f44dSKevin Wolf /* New total cache size overrides all old options */ 20565365f44dSKevin Wolf if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 20575365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 20585365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 20595365f44dSKevin Wolf } 20605365f44dSKevin Wolf 20615365f44dSKevin Wolf qdict_join(options, old_options, false); 20625365f44dSKevin Wolf 20635365f44dSKevin Wolf /* 20645365f44dSKevin Wolf * If after merging all cache size options are set, an old total size is 20655365f44dSKevin Wolf * overwritten. Do keep all options, however, if all three are new. The 20665365f44dSKevin Wolf * resulting error message is what we want to happen. 20675365f44dSKevin Wolf */ 20685365f44dSKevin Wolf has_all_cache_options = 20695365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 20705365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 20715365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 20725365f44dSKevin Wolf 20735365f44dSKevin Wolf if (has_all_cache_options && !has_new_total_cache_size) { 20745365f44dSKevin Wolf qdict_del(options, QCOW2_OPT_CACHE_SIZE); 20755365f44dSKevin Wolf } 20765365f44dSKevin Wolf } 20775365f44dSKevin Wolf 2078a320fb04SEric Blake static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 2079a320fb04SEric Blake bool want_zero, 2080a320fb04SEric Blake int64_t offset, int64_t count, 2081a320fb04SEric Blake int64_t *pnum, int64_t *map, 2082a320fb04SEric Blake BlockDriverState **file) 2083585f8587Sbellard { 2084ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2085388e5816SAlberto Garcia uint64_t host_offset; 2086ecfe1863SKevin Wolf unsigned int bytes; 208710dabdc5SAlberto Garcia QCow2SubclusterType type; 208874e60fb5SAlberto Garcia int ret, status = 0; 2089585f8587Sbellard 20905e978550SKevin Wolf qemu_co_mutex_lock(&s->lock); 20915e978550SKevin Wolf 209269f47505SVladimir Sementsov-Ogievskiy if (!s->metadata_preallocation_checked) { 209369f47505SVladimir Sementsov-Ogievskiy ret = qcow2_detect_metadata_preallocation(bs); 209469f47505SVladimir Sementsov-Ogievskiy s->metadata_preallocation = (ret == 1); 209569f47505SVladimir Sementsov-Ogievskiy s->metadata_preallocation_checked = true; 209669f47505SVladimir Sementsov-Ogievskiy } 209769f47505SVladimir Sementsov-Ogievskiy 2098a320fb04SEric Blake bytes = MIN(INT_MAX, count); 2099ca4a0bb8SAlberto Garcia ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type); 2100f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock); 21011c46efaaSKevin Wolf if (ret < 0) { 2102d663640cSPaolo Bonzini return ret; 21031c46efaaSKevin Wolf } 2104095a9c58Saliguori 2105a320fb04SEric Blake *pnum = bytes; 2106ecfe1863SKevin Wolf 210710dabdc5SAlberto Garcia if ((type == QCOW2_SUBCLUSTER_NORMAL || 210897490a14SAlberto Garcia type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 210997490a14SAlberto Garcia type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) { 2110388e5816SAlberto Garcia *map = host_offset; 211137be1403SKevin Wolf *file = s->data_file->bs; 2112a320fb04SEric Blake status |= BDRV_BLOCK_OFFSET_VALID; 21134bc74be9SPaolo Bonzini } 211410dabdc5SAlberto Garcia if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 211510dabdc5SAlberto Garcia type == QCOW2_SUBCLUSTER_ZERO_ALLOC) { 21164bc74be9SPaolo Bonzini status |= BDRV_BLOCK_ZERO; 211797490a14SAlberto Garcia } else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && 211897490a14SAlberto Garcia type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) { 21194bc74be9SPaolo Bonzini status |= BDRV_BLOCK_DATA; 21204bc74be9SPaolo Bonzini } 212169f47505SVladimir Sementsov-Ogievskiy if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) && 212269f47505SVladimir Sementsov-Ogievskiy (status & BDRV_BLOCK_OFFSET_VALID)) 212369f47505SVladimir Sementsov-Ogievskiy { 212469f47505SVladimir Sementsov-Ogievskiy status |= BDRV_BLOCK_RECURSE; 212569f47505SVladimir Sementsov-Ogievskiy } 21264bc74be9SPaolo Bonzini return status; 2127585f8587Sbellard } 2128585f8587Sbellard 2129fd9fcd37SFam Zheng static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, 2130fd9fcd37SFam Zheng QCowL2Meta **pl2meta, 2131fd9fcd37SFam Zheng bool link_l2) 2132fd9fcd37SFam Zheng { 2133fd9fcd37SFam Zheng int ret = 0; 2134fd9fcd37SFam Zheng QCowL2Meta *l2meta = *pl2meta; 2135fd9fcd37SFam Zheng 2136fd9fcd37SFam Zheng while (l2meta != NULL) { 2137fd9fcd37SFam Zheng QCowL2Meta *next; 2138fd9fcd37SFam Zheng 2139354d930dSFam Zheng if (link_l2) { 2140fd9fcd37SFam Zheng ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 2141fd9fcd37SFam Zheng if (ret) { 2142fd9fcd37SFam Zheng goto out; 2143fd9fcd37SFam Zheng } 21448b24cd14SKevin Wolf } else { 21458b24cd14SKevin Wolf qcow2_alloc_cluster_abort(bs, l2meta); 2146fd9fcd37SFam Zheng } 2147fd9fcd37SFam Zheng 2148fd9fcd37SFam Zheng /* Take the request off the list of running requests */ 2149fd9fcd37SFam Zheng QLIST_REMOVE(l2meta, next_in_flight); 2150fd9fcd37SFam Zheng 2151fd9fcd37SFam Zheng qemu_co_queue_restart_all(&l2meta->dependent_requests); 2152fd9fcd37SFam Zheng 2153fd9fcd37SFam Zheng next = l2meta->next; 2154fd9fcd37SFam Zheng g_free(l2meta); 2155fd9fcd37SFam Zheng l2meta = next; 2156fd9fcd37SFam Zheng } 2157fd9fcd37SFam Zheng out: 2158fd9fcd37SFam Zheng *pl2meta = l2meta; 2159fd9fcd37SFam Zheng return ret; 2160fd9fcd37SFam Zheng } 2161fd9fcd37SFam Zheng 216288f468e5SVladimir Sementsov-Ogievskiy static coroutine_fn int 216388f468e5SVladimir Sementsov-Ogievskiy qcow2_co_preadv_encrypted(BlockDriverState *bs, 21649c4269d5SAlberto Garcia uint64_t host_offset, 216588f468e5SVladimir Sementsov-Ogievskiy uint64_t offset, 216688f468e5SVladimir Sementsov-Ogievskiy uint64_t bytes, 216788f468e5SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 216888f468e5SVladimir Sementsov-Ogievskiy uint64_t qiov_offset) 216988f468e5SVladimir Sementsov-Ogievskiy { 217088f468e5SVladimir Sementsov-Ogievskiy int ret; 217188f468e5SVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 217288f468e5SVladimir Sementsov-Ogievskiy uint8_t *buf; 217388f468e5SVladimir Sementsov-Ogievskiy 217488f468e5SVladimir Sementsov-Ogievskiy assert(bs->encrypted && s->crypto); 217588f468e5SVladimir Sementsov-Ogievskiy assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 217688f468e5SVladimir Sementsov-Ogievskiy 217788f468e5SVladimir Sementsov-Ogievskiy /* 217888f468e5SVladimir Sementsov-Ogievskiy * For encrypted images, read everything into a temporary 217988f468e5SVladimir Sementsov-Ogievskiy * contiguous buffer on which the AES functions can work. 218088f468e5SVladimir Sementsov-Ogievskiy * Also, decryption in a separate buffer is better as it 218188f468e5SVladimir Sementsov-Ogievskiy * prevents the guest from learning information about the 218288f468e5SVladimir Sementsov-Ogievskiy * encrypted nature of the virtual disk. 218388f468e5SVladimir Sementsov-Ogievskiy */ 218488f468e5SVladimir Sementsov-Ogievskiy 218588f468e5SVladimir Sementsov-Ogievskiy buf = qemu_try_blockalign(s->data_file->bs, bytes); 218688f468e5SVladimir Sementsov-Ogievskiy if (buf == NULL) { 218788f468e5SVladimir Sementsov-Ogievskiy return -ENOMEM; 218888f468e5SVladimir Sementsov-Ogievskiy } 218988f468e5SVladimir Sementsov-Ogievskiy 219088f468e5SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 21919c4269d5SAlberto Garcia ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0); 219288f468e5SVladimir Sementsov-Ogievskiy if (ret < 0) { 219388f468e5SVladimir Sementsov-Ogievskiy goto fail; 219488f468e5SVladimir Sementsov-Ogievskiy } 219588f468e5SVladimir Sementsov-Ogievskiy 21969c4269d5SAlberto Garcia if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0) 219788f468e5SVladimir Sementsov-Ogievskiy { 219888f468e5SVladimir Sementsov-Ogievskiy ret = -EIO; 219988f468e5SVladimir Sementsov-Ogievskiy goto fail; 220088f468e5SVladimir Sementsov-Ogievskiy } 220188f468e5SVladimir Sementsov-Ogievskiy qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes); 220288f468e5SVladimir Sementsov-Ogievskiy 220388f468e5SVladimir Sementsov-Ogievskiy fail: 220488f468e5SVladimir Sementsov-Ogievskiy qemu_vfree(buf); 220588f468e5SVladimir Sementsov-Ogievskiy 220688f468e5SVladimir Sementsov-Ogievskiy return ret; 220788f468e5SVladimir Sementsov-Ogievskiy } 220888f468e5SVladimir Sementsov-Ogievskiy 2209d710cf57SVladimir Sementsov-Ogievskiy typedef struct Qcow2AioTask { 2210d710cf57SVladimir Sementsov-Ogievskiy AioTask task; 2211d710cf57SVladimir Sementsov-Ogievskiy 2212d710cf57SVladimir Sementsov-Ogievskiy BlockDriverState *bs; 221310dabdc5SAlberto Garcia QCow2SubclusterType subcluster_type; /* only for read */ 22149a3978a4SVladimir Sementsov-Ogievskiy uint64_t host_offset; /* or l2_entry for compressed read */ 2215d710cf57SVladimir Sementsov-Ogievskiy uint64_t offset; 2216d710cf57SVladimir Sementsov-Ogievskiy uint64_t bytes; 2217d710cf57SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov; 2218d710cf57SVladimir Sementsov-Ogievskiy uint64_t qiov_offset; 2219d710cf57SVladimir Sementsov-Ogievskiy QCowL2Meta *l2meta; /* only for write */ 2220d710cf57SVladimir Sementsov-Ogievskiy } Qcow2AioTask; 2221d710cf57SVladimir Sementsov-Ogievskiy 2222d710cf57SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task); 2223d710cf57SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_add_task(BlockDriverState *bs, 2224d710cf57SVladimir Sementsov-Ogievskiy AioTaskPool *pool, 2225d710cf57SVladimir Sementsov-Ogievskiy AioTaskFunc func, 222610dabdc5SAlberto Garcia QCow2SubclusterType subcluster_type, 22279c4269d5SAlberto Garcia uint64_t host_offset, 2228d710cf57SVladimir Sementsov-Ogievskiy uint64_t offset, 2229d710cf57SVladimir Sementsov-Ogievskiy uint64_t bytes, 2230d710cf57SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 2231d710cf57SVladimir Sementsov-Ogievskiy size_t qiov_offset, 2232d710cf57SVladimir Sementsov-Ogievskiy QCowL2Meta *l2meta) 2233d710cf57SVladimir Sementsov-Ogievskiy { 2234d710cf57SVladimir Sementsov-Ogievskiy Qcow2AioTask local_task; 2235d710cf57SVladimir Sementsov-Ogievskiy Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task; 2236d710cf57SVladimir Sementsov-Ogievskiy 2237d710cf57SVladimir Sementsov-Ogievskiy *task = (Qcow2AioTask) { 2238d710cf57SVladimir Sementsov-Ogievskiy .task.func = func, 2239d710cf57SVladimir Sementsov-Ogievskiy .bs = bs, 224010dabdc5SAlberto Garcia .subcluster_type = subcluster_type, 2241d710cf57SVladimir Sementsov-Ogievskiy .qiov = qiov, 22429c4269d5SAlberto Garcia .host_offset = host_offset, 2243d710cf57SVladimir Sementsov-Ogievskiy .offset = offset, 2244d710cf57SVladimir Sementsov-Ogievskiy .bytes = bytes, 2245d710cf57SVladimir Sementsov-Ogievskiy .qiov_offset = qiov_offset, 2246d710cf57SVladimir Sementsov-Ogievskiy .l2meta = l2meta, 2247d710cf57SVladimir Sementsov-Ogievskiy }; 2248d710cf57SVladimir Sementsov-Ogievskiy 2249d710cf57SVladimir Sementsov-Ogievskiy trace_qcow2_add_task(qemu_coroutine_self(), bs, pool, 2250d710cf57SVladimir Sementsov-Ogievskiy func == qcow2_co_preadv_task_entry ? "read" : "write", 225110dabdc5SAlberto Garcia subcluster_type, host_offset, offset, bytes, 2252d710cf57SVladimir Sementsov-Ogievskiy qiov, qiov_offset); 2253d710cf57SVladimir Sementsov-Ogievskiy 2254d710cf57SVladimir Sementsov-Ogievskiy if (!pool) { 2255d710cf57SVladimir Sementsov-Ogievskiy return func(&task->task); 2256d710cf57SVladimir Sementsov-Ogievskiy } 2257d710cf57SVladimir Sementsov-Ogievskiy 2258d710cf57SVladimir Sementsov-Ogievskiy aio_task_pool_start_task(pool, &task->task); 2259d710cf57SVladimir Sementsov-Ogievskiy 2260d710cf57SVladimir Sementsov-Ogievskiy return 0; 2261d710cf57SVladimir Sementsov-Ogievskiy } 2262d710cf57SVladimir Sementsov-Ogievskiy 226388f468e5SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs, 226410dabdc5SAlberto Garcia QCow2SubclusterType subc_type, 22659c4269d5SAlberto Garcia uint64_t host_offset, 226688f468e5SVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t bytes, 226788f468e5SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 226888f468e5SVladimir Sementsov-Ogievskiy size_t qiov_offset) 226988f468e5SVladimir Sementsov-Ogievskiy { 227088f468e5SVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 227188f468e5SVladimir Sementsov-Ogievskiy 227210dabdc5SAlberto Garcia switch (subc_type) { 227310dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 227410dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 227588f468e5SVladimir Sementsov-Ogievskiy /* Both zero types are handled in qcow2_co_preadv_part */ 227688f468e5SVladimir Sementsov-Ogievskiy g_assert_not_reached(); 227788f468e5SVladimir Sementsov-Ogievskiy 227810dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 227997490a14SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 228088f468e5SVladimir Sementsov-Ogievskiy assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */ 228188f468e5SVladimir Sementsov-Ogievskiy 228288f468e5SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 228388f468e5SVladimir Sementsov-Ogievskiy return bdrv_co_preadv_part(bs->backing, offset, bytes, 228488f468e5SVladimir Sementsov-Ogievskiy qiov, qiov_offset, 0); 228588f468e5SVladimir Sementsov-Ogievskiy 228610dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_COMPRESSED: 22879c4269d5SAlberto Garcia return qcow2_co_preadv_compressed(bs, host_offset, 228888f468e5SVladimir Sementsov-Ogievskiy offset, bytes, qiov, qiov_offset); 228988f468e5SVladimir Sementsov-Ogievskiy 229010dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 229188f468e5SVladimir Sementsov-Ogievskiy if (bs->encrypted) { 22929c4269d5SAlberto Garcia return qcow2_co_preadv_encrypted(bs, host_offset, 229388f468e5SVladimir Sementsov-Ogievskiy offset, bytes, qiov, qiov_offset); 229488f468e5SVladimir Sementsov-Ogievskiy } 229588f468e5SVladimir Sementsov-Ogievskiy 229688f468e5SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 22979c4269d5SAlberto Garcia return bdrv_co_preadv_part(s->data_file, host_offset, 229888f468e5SVladimir Sementsov-Ogievskiy bytes, qiov, qiov_offset, 0); 229988f468e5SVladimir Sementsov-Ogievskiy 230088f468e5SVladimir Sementsov-Ogievskiy default: 230188f468e5SVladimir Sementsov-Ogievskiy g_assert_not_reached(); 230288f468e5SVladimir Sementsov-Ogievskiy } 230388f468e5SVladimir Sementsov-Ogievskiy 230488f468e5SVladimir Sementsov-Ogievskiy g_assert_not_reached(); 230588f468e5SVladimir Sementsov-Ogievskiy } 230688f468e5SVladimir Sementsov-Ogievskiy 2307d710cf57SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task) 2308d710cf57SVladimir Sementsov-Ogievskiy { 2309d710cf57SVladimir Sementsov-Ogievskiy Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); 2310d710cf57SVladimir Sementsov-Ogievskiy 2311d710cf57SVladimir Sementsov-Ogievskiy assert(!t->l2meta); 2312d710cf57SVladimir Sementsov-Ogievskiy 231310dabdc5SAlberto Garcia return qcow2_co_preadv_task(t->bs, t->subcluster_type, 231410dabdc5SAlberto Garcia t->host_offset, t->offset, t->bytes, 231510dabdc5SAlberto Garcia t->qiov, t->qiov_offset); 2316d710cf57SVladimir Sementsov-Ogievskiy } 2317d710cf57SVladimir Sementsov-Ogievskiy 2318df893d25SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, 2319f7ef38ddSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, 2320df893d25SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 2321f7ef38ddSVladimir Sementsov-Ogievskiy size_t qiov_offset, 2322f7ef38ddSVladimir Sementsov-Ogievskiy BdrvRequestFlags flags) 23231490791fSaliguori { 2324ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2325d710cf57SVladimir Sementsov-Ogievskiy int ret = 0; 2326ecfe1863SKevin Wolf unsigned int cur_bytes; /* number of bytes in current iteration */ 2327388e5816SAlberto Garcia uint64_t host_offset = 0; 232810dabdc5SAlberto Garcia QCow2SubclusterType type; 2329d710cf57SVladimir Sementsov-Ogievskiy AioTaskPool *aio = NULL; 2330585f8587Sbellard 2331d710cf57SVladimir Sementsov-Ogievskiy while (bytes != 0 && aio_task_pool_status(aio) == 0) { 2332faf575c1SFrediano Ziglio /* prepare next request */ 2333ecfe1863SKevin Wolf cur_bytes = MIN(bytes, INT_MAX); 2334b25b387fSDaniel P. Berrange if (s->crypto) { 2335ecfe1863SKevin Wolf cur_bytes = MIN(cur_bytes, 2336ecfe1863SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2337bd28f835SKevin Wolf } 2338bd28f835SKevin Wolf 2339f24196d3SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 2340ca4a0bb8SAlberto Garcia ret = qcow2_get_host_offset(bs, offset, &cur_bytes, 2341ca4a0bb8SAlberto Garcia &host_offset, &type); 2342f24196d3SVladimir Sementsov-Ogievskiy qemu_co_mutex_unlock(&s->lock); 23431c46efaaSKevin Wolf if (ret < 0) { 2344d710cf57SVladimir Sementsov-Ogievskiy goto out; 23451c46efaaSKevin Wolf } 23461c46efaaSKevin Wolf 234710dabdc5SAlberto Garcia if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 234810dabdc5SAlberto Garcia type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 234997490a14SAlberto Garcia (type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) || 235097490a14SAlberto Garcia (type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing)) 235188f468e5SVladimir Sementsov-Ogievskiy { 235288f468e5SVladimir Sementsov-Ogievskiy qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); 2353a9465922Sbellard } else { 2354d710cf57SVladimir Sementsov-Ogievskiy if (!aio && cur_bytes != bytes) { 2355d710cf57SVladimir Sementsov-Ogievskiy aio = aio_task_pool_new(QCOW2_MAX_WORKERS); 2356d710cf57SVladimir Sementsov-Ogievskiy } 2357ca4a0bb8SAlberto Garcia ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type, 23589c4269d5SAlberto Garcia host_offset, offset, cur_bytes, 2359d710cf57SVladimir Sementsov-Ogievskiy qiov, qiov_offset, NULL); 23608af36488SKevin Wolf if (ret < 0) { 2361d710cf57SVladimir Sementsov-Ogievskiy goto out; 23628af36488SKevin Wolf } 2363faf575c1SFrediano Ziglio } 2364faf575c1SFrediano Ziglio 2365ecfe1863SKevin Wolf bytes -= cur_bytes; 2366ecfe1863SKevin Wolf offset += cur_bytes; 2367df893d25SVladimir Sementsov-Ogievskiy qiov_offset += cur_bytes; 23685ebaa27eSFrediano Ziglio } 2369f141eafeSaliguori 2370d710cf57SVladimir Sementsov-Ogievskiy out: 2371d710cf57SVladimir Sementsov-Ogievskiy if (aio) { 2372d710cf57SVladimir Sementsov-Ogievskiy aio_task_pool_wait_all(aio); 2373d710cf57SVladimir Sementsov-Ogievskiy if (ret == 0) { 2374d710cf57SVladimir Sementsov-Ogievskiy ret = aio_task_pool_status(aio); 2375d710cf57SVladimir Sementsov-Ogievskiy } 2376d710cf57SVladimir Sementsov-Ogievskiy g_free(aio); 2377d710cf57SVladimir Sementsov-Ogievskiy } 2378d710cf57SVladimir Sementsov-Ogievskiy 2379d710cf57SVladimir Sementsov-Ogievskiy return ret; 2380585f8587Sbellard } 2381585f8587Sbellard 2382ee22a9d8SAlberto Garcia /* Check if it's possible to merge a write request with the writing of 2383ee22a9d8SAlberto Garcia * the data from the COW regions */ 2384ee22a9d8SAlberto Garcia static bool merge_cow(uint64_t offset, unsigned bytes, 23855396234bSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, size_t qiov_offset, 23865396234bSVladimir Sementsov-Ogievskiy QCowL2Meta *l2meta) 2387ee22a9d8SAlberto Garcia { 2388ee22a9d8SAlberto Garcia QCowL2Meta *m; 2389ee22a9d8SAlberto Garcia 2390ee22a9d8SAlberto Garcia for (m = l2meta; m != NULL; m = m->next) { 2391ee22a9d8SAlberto Garcia /* If both COW regions are empty then there's nothing to merge */ 2392ee22a9d8SAlberto Garcia if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 2393ee22a9d8SAlberto Garcia continue; 2394ee22a9d8SAlberto Garcia } 2395ee22a9d8SAlberto Garcia 2396c8bb23cbSAnton Nefedov /* If COW regions are handled already, skip this too */ 2397c8bb23cbSAnton Nefedov if (m->skip_cow) { 2398c8bb23cbSAnton Nefedov continue; 2399c8bb23cbSAnton Nefedov } 2400c8bb23cbSAnton Nefedov 24013441ad4bSAlberto Garcia /* 24023441ad4bSAlberto Garcia * The write request should start immediately after the first 24033441ad4bSAlberto Garcia * COW region. This does not always happen because the area 24043441ad4bSAlberto Garcia * touched by the request can be larger than the one defined 24053441ad4bSAlberto Garcia * by @m (a single request can span an area consisting of a 24063441ad4bSAlberto Garcia * mix of previously unallocated and allocated clusters, that 24073441ad4bSAlberto Garcia * is why @l2meta is a list). 24083441ad4bSAlberto Garcia */ 2409ee22a9d8SAlberto Garcia if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 24103441ad4bSAlberto Garcia /* In this case the request starts before this region */ 24113441ad4bSAlberto Garcia assert(offset < l2meta_cow_start(m)); 24123441ad4bSAlberto Garcia assert(m->cow_start.nb_bytes == 0); 2413ee22a9d8SAlberto Garcia continue; 2414ee22a9d8SAlberto Garcia } 2415ee22a9d8SAlberto Garcia 24163441ad4bSAlberto Garcia /* The write request should end immediately before the second 24173441ad4bSAlberto Garcia * COW region (see above for why it does not always happen) */ 2418ee22a9d8SAlberto Garcia if (m->offset + m->cow_end.offset != offset + bytes) { 24193441ad4bSAlberto Garcia assert(offset + bytes > m->offset + m->cow_end.offset); 24203441ad4bSAlberto Garcia assert(m->cow_end.nb_bytes == 0); 2421ee22a9d8SAlberto Garcia continue; 2422ee22a9d8SAlberto Garcia } 2423ee22a9d8SAlberto Garcia 2424ee22a9d8SAlberto Garcia /* Make sure that adding both COW regions to the QEMUIOVector 2425ee22a9d8SAlberto Garcia * does not exceed IOV_MAX */ 24265396234bSVladimir Sementsov-Ogievskiy if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) { 2427ee22a9d8SAlberto Garcia continue; 2428ee22a9d8SAlberto Garcia } 2429ee22a9d8SAlberto Garcia 24305396234bSVladimir Sementsov-Ogievskiy m->data_qiov = qiov; 24315396234bSVladimir Sementsov-Ogievskiy m->data_qiov_offset = qiov_offset; 2432ee22a9d8SAlberto Garcia return true; 2433ee22a9d8SAlberto Garcia } 2434ee22a9d8SAlberto Garcia 2435ee22a9d8SAlberto Garcia return false; 2436ee22a9d8SAlberto Garcia } 2437ee22a9d8SAlberto Garcia 243846cd1e8aSAlberto Garcia /* 243946cd1e8aSAlberto Garcia * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error. 244046cd1e8aSAlberto Garcia * Note that returning 0 does not guarantee non-zero data. 244146cd1e8aSAlberto Garcia */ 244246cd1e8aSAlberto Garcia static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) 2443c8bb23cbSAnton Nefedov { 2444c8bb23cbSAnton Nefedov /* 2445c8bb23cbSAnton Nefedov * This check is designed for optimization shortcut so it must be 2446c8bb23cbSAnton Nefedov * efficient. 244746cd1e8aSAlberto Garcia * Instead of is_zero(), use bdrv_co_is_zero_fast() as it is 244846cd1e8aSAlberto Garcia * faster (but not as accurate and can result in false negatives). 2449c8bb23cbSAnton Nefedov */ 245046cd1e8aSAlberto Garcia int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset, 245146cd1e8aSAlberto Garcia m->cow_start.nb_bytes); 245246cd1e8aSAlberto Garcia if (ret <= 0) { 245346cd1e8aSAlberto Garcia return ret; 245446cd1e8aSAlberto Garcia } 245546cd1e8aSAlberto Garcia 245646cd1e8aSAlberto Garcia return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset, 2457c8bb23cbSAnton Nefedov m->cow_end.nb_bytes); 2458c8bb23cbSAnton Nefedov } 2459c8bb23cbSAnton Nefedov 2460c8bb23cbSAnton Nefedov static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) 2461c8bb23cbSAnton Nefedov { 2462c8bb23cbSAnton Nefedov BDRVQcow2State *s = bs->opaque; 2463c8bb23cbSAnton Nefedov QCowL2Meta *m; 2464c8bb23cbSAnton Nefedov 2465c8bb23cbSAnton Nefedov if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) { 2466c8bb23cbSAnton Nefedov return 0; 2467c8bb23cbSAnton Nefedov } 2468c8bb23cbSAnton Nefedov 2469c8bb23cbSAnton Nefedov if (bs->encrypted) { 2470c8bb23cbSAnton Nefedov return 0; 2471c8bb23cbSAnton Nefedov } 2472c8bb23cbSAnton Nefedov 2473c8bb23cbSAnton Nefedov for (m = l2meta; m != NULL; m = m->next) { 2474c8bb23cbSAnton Nefedov int ret; 2475bf4a66eeSAlberto Garcia uint64_t start_offset = m->alloc_offset + m->cow_start.offset; 2476bf4a66eeSAlberto Garcia unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes - 2477bf4a66eeSAlberto Garcia m->cow_start.offset; 2478c8bb23cbSAnton Nefedov 2479c8bb23cbSAnton Nefedov if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) { 2480c8bb23cbSAnton Nefedov continue; 2481c8bb23cbSAnton Nefedov } 2482c8bb23cbSAnton Nefedov 248346cd1e8aSAlberto Garcia ret = is_zero_cow(bs, m); 248446cd1e8aSAlberto Garcia if (ret < 0) { 248546cd1e8aSAlberto Garcia return ret; 248646cd1e8aSAlberto Garcia } else if (ret == 0) { 2487c8bb23cbSAnton Nefedov continue; 2488c8bb23cbSAnton Nefedov } 2489c8bb23cbSAnton Nefedov 2490c8bb23cbSAnton Nefedov /* 2491c8bb23cbSAnton Nefedov * instead of writing zero COW buffers, 2492c8bb23cbSAnton Nefedov * efficiently zero out the whole clusters 2493c8bb23cbSAnton Nefedov */ 2494c8bb23cbSAnton Nefedov 2495bf4a66eeSAlberto Garcia ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes, 2496c8bb23cbSAnton Nefedov true); 2497c8bb23cbSAnton Nefedov if (ret < 0) { 2498c8bb23cbSAnton Nefedov return ret; 2499c8bb23cbSAnton Nefedov } 2500c8bb23cbSAnton Nefedov 2501c8bb23cbSAnton Nefedov BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE); 2502bf4a66eeSAlberto Garcia ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes, 2503c8bb23cbSAnton Nefedov BDRV_REQ_NO_FALLBACK); 2504c8bb23cbSAnton Nefedov if (ret < 0) { 2505c8bb23cbSAnton Nefedov if (ret != -ENOTSUP && ret != -EAGAIN) { 2506c8bb23cbSAnton Nefedov return ret; 2507c8bb23cbSAnton Nefedov } 2508c8bb23cbSAnton Nefedov continue; 2509c8bb23cbSAnton Nefedov } 2510c8bb23cbSAnton Nefedov 2511c8bb23cbSAnton Nefedov trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters); 2512c8bb23cbSAnton Nefedov m->skip_cow = true; 2513c8bb23cbSAnton Nefedov } 2514c8bb23cbSAnton Nefedov return 0; 2515c8bb23cbSAnton Nefedov } 2516c8bb23cbSAnton Nefedov 25176aa7a263SVladimir Sementsov-Ogievskiy /* 25186aa7a263SVladimir Sementsov-Ogievskiy * qcow2_co_pwritev_task 25196aa7a263SVladimir Sementsov-Ogievskiy * Called with s->lock unlocked 25206aa7a263SVladimir Sementsov-Ogievskiy * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must 25216aa7a263SVladimir Sementsov-Ogievskiy * not use it somehow after qcow2_co_pwritev_task() call 25226aa7a263SVladimir Sementsov-Ogievskiy */ 25236aa7a263SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, 25249c4269d5SAlberto Garcia uint64_t host_offset, 25256aa7a263SVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t bytes, 25266aa7a263SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 25276aa7a263SVladimir Sementsov-Ogievskiy uint64_t qiov_offset, 25286aa7a263SVladimir Sementsov-Ogievskiy QCowL2Meta *l2meta) 25296aa7a263SVladimir Sementsov-Ogievskiy { 25306aa7a263SVladimir Sementsov-Ogievskiy int ret; 25316aa7a263SVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 25326aa7a263SVladimir Sementsov-Ogievskiy void *crypt_buf = NULL; 25336aa7a263SVladimir Sementsov-Ogievskiy QEMUIOVector encrypted_qiov; 25346aa7a263SVladimir Sementsov-Ogievskiy 25356aa7a263SVladimir Sementsov-Ogievskiy if (bs->encrypted) { 25366aa7a263SVladimir Sementsov-Ogievskiy assert(s->crypto); 25376aa7a263SVladimir Sementsov-Ogievskiy assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 25386aa7a263SVladimir Sementsov-Ogievskiy crypt_buf = qemu_try_blockalign(bs->file->bs, bytes); 25396aa7a263SVladimir Sementsov-Ogievskiy if (crypt_buf == NULL) { 25406aa7a263SVladimir Sementsov-Ogievskiy ret = -ENOMEM; 25416aa7a263SVladimir Sementsov-Ogievskiy goto out_unlocked; 25426aa7a263SVladimir Sementsov-Ogievskiy } 25436aa7a263SVladimir Sementsov-Ogievskiy qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes); 25446aa7a263SVladimir Sementsov-Ogievskiy 25459c4269d5SAlberto Garcia if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) { 25466aa7a263SVladimir Sementsov-Ogievskiy ret = -EIO; 25476aa7a263SVladimir Sementsov-Ogievskiy goto out_unlocked; 25486aa7a263SVladimir Sementsov-Ogievskiy } 25496aa7a263SVladimir Sementsov-Ogievskiy 25506aa7a263SVladimir Sementsov-Ogievskiy qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes); 25516aa7a263SVladimir Sementsov-Ogievskiy qiov = &encrypted_qiov; 25526aa7a263SVladimir Sementsov-Ogievskiy qiov_offset = 0; 25536aa7a263SVladimir Sementsov-Ogievskiy } 25546aa7a263SVladimir Sementsov-Ogievskiy 25556aa7a263SVladimir Sementsov-Ogievskiy /* Try to efficiently initialize the physical space with zeroes */ 25566aa7a263SVladimir Sementsov-Ogievskiy ret = handle_alloc_space(bs, l2meta); 25576aa7a263SVladimir Sementsov-Ogievskiy if (ret < 0) { 25586aa7a263SVladimir Sementsov-Ogievskiy goto out_unlocked; 25596aa7a263SVladimir Sementsov-Ogievskiy } 25606aa7a263SVladimir Sementsov-Ogievskiy 25616aa7a263SVladimir Sementsov-Ogievskiy /* 25626aa7a263SVladimir Sementsov-Ogievskiy * If we need to do COW, check if it's possible to merge the 25636aa7a263SVladimir Sementsov-Ogievskiy * writing of the guest data together with that of the COW regions. 25646aa7a263SVladimir Sementsov-Ogievskiy * If it's not possible (or not necessary) then write the 25656aa7a263SVladimir Sementsov-Ogievskiy * guest data now. 25666aa7a263SVladimir Sementsov-Ogievskiy */ 25676aa7a263SVladimir Sementsov-Ogievskiy if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) { 25686aa7a263SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 25699c4269d5SAlberto Garcia trace_qcow2_writev_data(qemu_coroutine_self(), host_offset); 25709c4269d5SAlberto Garcia ret = bdrv_co_pwritev_part(s->data_file, host_offset, 25716aa7a263SVladimir Sementsov-Ogievskiy bytes, qiov, qiov_offset, 0); 25726aa7a263SVladimir Sementsov-Ogievskiy if (ret < 0) { 25736aa7a263SVladimir Sementsov-Ogievskiy goto out_unlocked; 25746aa7a263SVladimir Sementsov-Ogievskiy } 25756aa7a263SVladimir Sementsov-Ogievskiy } 25766aa7a263SVladimir Sementsov-Ogievskiy 25776aa7a263SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 25786aa7a263SVladimir Sementsov-Ogievskiy 25796aa7a263SVladimir Sementsov-Ogievskiy ret = qcow2_handle_l2meta(bs, &l2meta, true); 25806aa7a263SVladimir Sementsov-Ogievskiy goto out_locked; 25816aa7a263SVladimir Sementsov-Ogievskiy 25826aa7a263SVladimir Sementsov-Ogievskiy out_unlocked: 25836aa7a263SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 25846aa7a263SVladimir Sementsov-Ogievskiy 25856aa7a263SVladimir Sementsov-Ogievskiy out_locked: 25866aa7a263SVladimir Sementsov-Ogievskiy qcow2_handle_l2meta(bs, &l2meta, false); 25876aa7a263SVladimir Sementsov-Ogievskiy qemu_co_mutex_unlock(&s->lock); 25886aa7a263SVladimir Sementsov-Ogievskiy 25896aa7a263SVladimir Sementsov-Ogievskiy qemu_vfree(crypt_buf); 25906aa7a263SVladimir Sementsov-Ogievskiy 25916aa7a263SVladimir Sementsov-Ogievskiy return ret; 25926aa7a263SVladimir Sementsov-Ogievskiy } 25936aa7a263SVladimir Sementsov-Ogievskiy 2594d710cf57SVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task) 2595d710cf57SVladimir Sementsov-Ogievskiy { 2596d710cf57SVladimir Sementsov-Ogievskiy Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); 2597d710cf57SVladimir Sementsov-Ogievskiy 259810dabdc5SAlberto Garcia assert(!t->subcluster_type); 2599d710cf57SVladimir Sementsov-Ogievskiy 26009c4269d5SAlberto Garcia return qcow2_co_pwritev_task(t->bs, t->host_offset, 2601d710cf57SVladimir Sementsov-Ogievskiy t->offset, t->bytes, t->qiov, t->qiov_offset, 2602d710cf57SVladimir Sementsov-Ogievskiy t->l2meta); 2603d710cf57SVladimir Sementsov-Ogievskiy } 2604d710cf57SVladimir Sementsov-Ogievskiy 26055396234bSVladimir Sementsov-Ogievskiy static coroutine_fn int qcow2_co_pwritev_part( 2606e75abedaSVladimir Sementsov-Ogievskiy BlockDriverState *bs, int64_t offset, int64_t bytes, 2607e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags) 2608585f8587Sbellard { 2609ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2610d46a0bb2SKevin Wolf int offset_in_cluster; 261168d100e9SKevin Wolf int ret; 2612d46a0bb2SKevin Wolf unsigned int cur_bytes; /* number of sectors in current iteration */ 2613bfd0989aSAlberto Garcia uint64_t host_offset; 26148d2497c3SKevin Wolf QCowL2Meta *l2meta = NULL; 2615d710cf57SVladimir Sementsov-Ogievskiy AioTaskPool *aio = NULL; 2616c2271403SFrediano Ziglio 2617d46a0bb2SKevin Wolf trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 26183cce16f4SKevin Wolf 2619d710cf57SVladimir Sementsov-Ogievskiy while (bytes != 0 && aio_task_pool_status(aio) == 0) { 26203fc48d09SFrediano Ziglio 2621f50f88b9SKevin Wolf l2meta = NULL; 2622cf5c1a23SKevin Wolf 26233cce16f4SKevin Wolf trace_qcow2_writev_start_part(qemu_coroutine_self()); 2624d46a0bb2SKevin Wolf offset_in_cluster = offset_into_cluster(s, offset); 2625d46a0bb2SKevin Wolf cur_bytes = MIN(bytes, INT_MAX); 2626d46a0bb2SKevin Wolf if (bs->encrypted) { 2627d46a0bb2SKevin Wolf cur_bytes = MIN(cur_bytes, 2628d46a0bb2SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 2629d46a0bb2SKevin Wolf - offset_in_cluster); 26305ebaa27eSFrediano Ziglio } 2631095a9c58Saliguori 26326aa7a263SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 26336aa7a263SVladimir Sementsov-Ogievskiy 2634bfd0989aSAlberto Garcia ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes, 2635bfd0989aSAlberto Garcia &host_offset, &l2meta); 2636148da7eaSKevin Wolf if (ret < 0) { 26375447c3a0SVladimir Sementsov-Ogievskiy goto out_locked; 2638148da7eaSKevin Wolf } 2639148da7eaSKevin Wolf 2640bfd0989aSAlberto Garcia ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, 26415447c3a0SVladimir Sementsov-Ogievskiy cur_bytes, true); 26425447c3a0SVladimir Sementsov-Ogievskiy if (ret < 0) { 26435447c3a0SVladimir Sementsov-Ogievskiy goto out_locked; 26445447c3a0SVladimir Sementsov-Ogievskiy } 26455447c3a0SVladimir Sementsov-Ogievskiy 26465447c3a0SVladimir Sementsov-Ogievskiy qemu_co_mutex_unlock(&s->lock); 26475447c3a0SVladimir Sementsov-Ogievskiy 2648d710cf57SVladimir Sementsov-Ogievskiy if (!aio && cur_bytes != bytes) { 2649d710cf57SVladimir Sementsov-Ogievskiy aio = aio_task_pool_new(QCOW2_MAX_WORKERS); 2650d710cf57SVladimir Sementsov-Ogievskiy } 2651d710cf57SVladimir Sementsov-Ogievskiy ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0, 2652bfd0989aSAlberto Garcia host_offset, offset, 26539c4269d5SAlberto Garcia cur_bytes, qiov, qiov_offset, l2meta); 26546aa7a263SVladimir Sementsov-Ogievskiy l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */ 2655c8bb23cbSAnton Nefedov if (ret < 0) { 26566aa7a263SVladimir Sementsov-Ogievskiy goto fail_nometa; 2657faf575c1SFrediano Ziglio } 2658faf575c1SFrediano Ziglio 2659d46a0bb2SKevin Wolf bytes -= cur_bytes; 2660d46a0bb2SKevin Wolf offset += cur_bytes; 26616aa7a263SVladimir Sementsov-Ogievskiy qiov_offset += cur_bytes; 2662d46a0bb2SKevin Wolf trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 26635ebaa27eSFrediano Ziglio } 26643fc48d09SFrediano Ziglio ret = 0; 2665faf575c1SFrediano Ziglio 26665447c3a0SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 26675447c3a0SVladimir Sementsov-Ogievskiy 26685447c3a0SVladimir Sementsov-Ogievskiy out_locked: 2669fd9fcd37SFam Zheng qcow2_handle_l2meta(bs, &l2meta, false); 26700fa9131aSKevin Wolf 2671a8c57408SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 2672a8c57408SPaolo Bonzini 26736aa7a263SVladimir Sementsov-Ogievskiy fail_nometa: 2674d710cf57SVladimir Sementsov-Ogievskiy if (aio) { 2675d710cf57SVladimir Sementsov-Ogievskiy aio_task_pool_wait_all(aio); 2676d710cf57SVladimir Sementsov-Ogievskiy if (ret == 0) { 2677d710cf57SVladimir Sementsov-Ogievskiy ret = aio_task_pool_status(aio); 2678d710cf57SVladimir Sementsov-Ogievskiy } 2679d710cf57SVladimir Sementsov-Ogievskiy g_free(aio); 2680d710cf57SVladimir Sementsov-Ogievskiy } 2681d710cf57SVladimir Sementsov-Ogievskiy 26823cce16f4SKevin Wolf trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 268342496d62SKevin Wolf 268468d100e9SKevin Wolf return ret; 2685585f8587Sbellard } 2686585f8587Sbellard 2687ec6d8912SKevin Wolf static int qcow2_inactivate(BlockDriverState *bs) 2688ec6d8912SKevin Wolf { 2689ec6d8912SKevin Wolf BDRVQcow2State *s = bs->opaque; 2690ec6d8912SKevin Wolf int ret, result = 0; 26915f72826eSVladimir Sementsov-Ogievskiy Error *local_err = NULL; 2692ec6d8912SKevin Wolf 2693644ddbb7SVladimir Sementsov-Ogievskiy qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err); 269483a8c775SPavel Butsykin if (local_err != NULL) { 269583a8c775SPavel Butsykin result = -EINVAL; 2696132adb68SVladimir Sementsov-Ogievskiy error_reportf_err(local_err, "Lost persistent bitmaps during " 2697132adb68SVladimir Sementsov-Ogievskiy "inactivation of node '%s': ", 269883a8c775SPavel Butsykin bdrv_get_device_or_node_name(bs)); 269983a8c775SPavel Butsykin } 270083a8c775SPavel Butsykin 2701ec6d8912SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 2702ec6d8912SKevin Wolf if (ret) { 2703ec6d8912SKevin Wolf result = ret; 2704ec6d8912SKevin Wolf error_report("Failed to flush the L2 table cache: %s", 2705ec6d8912SKevin Wolf strerror(-ret)); 2706ec6d8912SKevin Wolf } 2707ec6d8912SKevin Wolf 2708ec6d8912SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2709ec6d8912SKevin Wolf if (ret) { 2710ec6d8912SKevin Wolf result = ret; 2711ec6d8912SKevin Wolf error_report("Failed to flush the refcount block cache: %s", 2712ec6d8912SKevin Wolf strerror(-ret)); 2713ec6d8912SKevin Wolf } 2714ec6d8912SKevin Wolf 2715ec6d8912SKevin Wolf if (result == 0) { 2716ec6d8912SKevin Wolf qcow2_mark_clean(bs); 2717ec6d8912SKevin Wolf } 2718ec6d8912SKevin Wolf 2719ec6d8912SKevin Wolf return result; 2720ec6d8912SKevin Wolf } 2721ec6d8912SKevin Wolf 272206e9cd19SHanna Reitz static void qcow2_do_close(BlockDriverState *bs, bool close_data_file) 2723585f8587Sbellard { 2724ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2725de82815dSKevin Wolf qemu_vfree(s->l1_table); 2726cf93980eSMax Reitz /* else pre-write overlap checks in cache_destroy may crash */ 2727cf93980eSMax Reitz s->l1_table = NULL; 272829c1a730SKevin Wolf 2729140fd5a6SKevin Wolf if (!(s->flags & BDRV_O_INACTIVE)) { 2730ec6d8912SKevin Wolf qcow2_inactivate(bs); 27313b5e14c7SMax Reitz } 2732c61d0004SStefan Hajnoczi 2733279621c0SAlberto Garcia cache_clean_timer_del(bs); 2734e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 2735e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 273629c1a730SKevin Wolf 2737b25b387fSDaniel P. Berrange qcrypto_block_free(s->crypto); 2738b25b387fSDaniel P. Berrange s->crypto = NULL; 27394aebf0f0SPan Nengyuan qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 2740f6fa64f6SDaniel P. Berrange 27416744cbabSKevin Wolf g_free(s->unknown_header_fields); 274275bab85cSKevin Wolf cleanup_unknown_header_ext(bs); 27436744cbabSKevin Wolf 27449b890bdcSKevin Wolf g_free(s->image_data_file); 2745e4603fe1SKevin Wolf g_free(s->image_backing_file); 2746e4603fe1SKevin Wolf g_free(s->image_backing_format); 2747e4603fe1SKevin Wolf 274806e9cd19SHanna Reitz if (close_data_file && has_data_file(bs)) { 27490e8c08beSKevin Wolf bdrv_unref_child(bs, s->data_file); 2750808cf3cbSVladimir Sementsov-Ogievskiy s->data_file = NULL; 27510e8c08beSKevin Wolf } 27520e8c08beSKevin Wolf 2753ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 275428c1202bSLi Zhi Hui qcow2_free_snapshots(bs); 2755585f8587Sbellard } 2756585f8587Sbellard 275706e9cd19SHanna Reitz static void qcow2_close(BlockDriverState *bs) 275806e9cd19SHanna Reitz { 275906e9cd19SHanna Reitz qcow2_do_close(bs, true); 276006e9cd19SHanna Reitz } 276106e9cd19SHanna Reitz 27622b148f39SPaolo Bonzini static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 27632b148f39SPaolo Bonzini Error **errp) 276406d9260fSAnthony Liguori { 2765e6247c9cSVladimir Sementsov-Ogievskiy ERRP_GUARD(); 2766ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 276706e9cd19SHanna Reitz BdrvChild *data_file; 276806d9260fSAnthony Liguori int flags = s->flags; 2769b25b387fSDaniel P. Berrange QCryptoBlock *crypto = NULL; 2770acdfb480SKevin Wolf QDict *options; 27715a8a30dbSKevin Wolf int ret; 277206d9260fSAnthony Liguori 277306d9260fSAnthony Liguori /* 277406d9260fSAnthony Liguori * Backing files are read-only which makes all of their metadata immutable, 277506d9260fSAnthony Liguori * that means we don't have to worry about reopening them here. 277606d9260fSAnthony Liguori */ 277706d9260fSAnthony Liguori 2778b25b387fSDaniel P. Berrange crypto = s->crypto; 2779b25b387fSDaniel P. Berrange s->crypto = NULL; 278006d9260fSAnthony Liguori 278106e9cd19SHanna Reitz /* 278206e9cd19SHanna Reitz * Do not reopen s->data_file (i.e., have qcow2_do_close() not close it, 278306e9cd19SHanna Reitz * and then prevent qcow2_do_open() from opening it), because this function 278406e9cd19SHanna Reitz * runs in the I/O path and as such we must not invoke global-state 278506e9cd19SHanna Reitz * functions like bdrv_unref_child() and bdrv_open_child(). 278606e9cd19SHanna Reitz */ 278706d9260fSAnthony Liguori 278806e9cd19SHanna Reitz qcow2_do_close(bs, false); 278906e9cd19SHanna Reitz 279006e9cd19SHanna Reitz data_file = s->data_file; 2791ff99129aSKevin Wolf memset(s, 0, sizeof(BDRVQcow2State)); 279206e9cd19SHanna Reitz s->data_file = data_file; 279306e9cd19SHanna Reitz 2794d475e5acSKevin Wolf options = qdict_clone_shallow(bs->options); 27955a8a30dbSKevin Wolf 2796140fd5a6SKevin Wolf flags &= ~BDRV_O_INACTIVE; 27972b148f39SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 279806e9cd19SHanna Reitz ret = qcow2_do_open(bs, options, flags, false, errp); 27992b148f39SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 2800cb3e7f08SMarc-André Lureau qobject_unref(options); 2801e6247c9cSVladimir Sementsov-Ogievskiy if (ret < 0) { 2802e6247c9cSVladimir Sementsov-Ogievskiy error_prepend(errp, "Could not reopen qcow2 layer: "); 2803191fb11bSKevin Wolf bs->drv = NULL; 28045a8a30dbSKevin Wolf return; 28055a8a30dbSKevin Wolf } 2806acdfb480SKevin Wolf 2807b25b387fSDaniel P. Berrange s->crypto = crypto; 280806d9260fSAnthony Liguori } 280906d9260fSAnthony Liguori 2810e24e49e6SKevin Wolf static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2811e24e49e6SKevin Wolf size_t len, size_t buflen) 2812756e6736SKevin Wolf { 2813e24e49e6SKevin Wolf QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2814e24e49e6SKevin Wolf size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2815756e6736SKevin Wolf 2816e24e49e6SKevin Wolf if (buflen < ext_len) { 2817756e6736SKevin Wolf return -ENOSPC; 2818756e6736SKevin Wolf } 2819756e6736SKevin Wolf 2820e24e49e6SKevin Wolf *ext_backing_fmt = (QCowExtension) { 2821e24e49e6SKevin Wolf .magic = cpu_to_be32(magic), 2822e24e49e6SKevin Wolf .len = cpu_to_be32(len), 2823e24e49e6SKevin Wolf }; 28240647d47cSStefan Hajnoczi 28250647d47cSStefan Hajnoczi if (len) { 2826e24e49e6SKevin Wolf memcpy(buf + sizeof(QCowExtension), s, len); 28270647d47cSStefan Hajnoczi } 2828756e6736SKevin Wolf 2829e24e49e6SKevin Wolf return ext_len; 2830756e6736SKevin Wolf } 2831756e6736SKevin Wolf 2832e24e49e6SKevin Wolf /* 2833e24e49e6SKevin Wolf * Updates the qcow2 header, including the variable length parts of it, i.e. 2834e24e49e6SKevin Wolf * the backing file name and all extensions. qcow2 was not designed to allow 2835e24e49e6SKevin Wolf * such changes, so if we run out of space (we can only use the first cluster) 2836e24e49e6SKevin Wolf * this function may fail. 2837e24e49e6SKevin Wolf * 2838e24e49e6SKevin Wolf * Returns 0 on success, -errno in error cases. 2839e24e49e6SKevin Wolf */ 2840e24e49e6SKevin Wolf int qcow2_update_header(BlockDriverState *bs) 2841e24e49e6SKevin Wolf { 2842ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2843e24e49e6SKevin Wolf QCowHeader *header; 2844e24e49e6SKevin Wolf char *buf; 2845e24e49e6SKevin Wolf size_t buflen = s->cluster_size; 2846e24e49e6SKevin Wolf int ret; 2847e24e49e6SKevin Wolf uint64_t total_size; 2848e24e49e6SKevin Wolf uint32_t refcount_table_clusters; 28496744cbabSKevin Wolf size_t header_length; 285075bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext; 2851e24e49e6SKevin Wolf 2852e24e49e6SKevin Wolf buf = qemu_blockalign(bs, buflen); 2853e24e49e6SKevin Wolf 2854e24e49e6SKevin Wolf /* Header structure */ 2855e24e49e6SKevin Wolf header = (QCowHeader*) buf; 2856e24e49e6SKevin Wolf 2857e24e49e6SKevin Wolf if (buflen < sizeof(*header)) { 2858e24e49e6SKevin Wolf ret = -ENOSPC; 2859e24e49e6SKevin Wolf goto fail; 2860756e6736SKevin Wolf } 2861756e6736SKevin Wolf 28626744cbabSKevin Wolf header_length = sizeof(*header) + s->unknown_header_fields_size; 2863e24e49e6SKevin Wolf total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2864e24e49e6SKevin Wolf refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2865e24e49e6SKevin Wolf 2866572ad978SDenis Plotnikov ret = validate_compression_type(s, NULL); 2867572ad978SDenis Plotnikov if (ret) { 2868572ad978SDenis Plotnikov goto fail; 2869572ad978SDenis Plotnikov } 2870572ad978SDenis Plotnikov 2871e24e49e6SKevin Wolf *header = (QCowHeader) { 28726744cbabSKevin Wolf /* Version 2 fields */ 2873e24e49e6SKevin Wolf .magic = cpu_to_be32(QCOW_MAGIC), 28746744cbabSKevin Wolf .version = cpu_to_be32(s->qcow_version), 2875e24e49e6SKevin Wolf .backing_file_offset = 0, 2876e24e49e6SKevin Wolf .backing_file_size = 0, 2877e24e49e6SKevin Wolf .cluster_bits = cpu_to_be32(s->cluster_bits), 2878e24e49e6SKevin Wolf .size = cpu_to_be64(total_size), 2879e24e49e6SKevin Wolf .crypt_method = cpu_to_be32(s->crypt_method_header), 2880e24e49e6SKevin Wolf .l1_size = cpu_to_be32(s->l1_size), 2881e24e49e6SKevin Wolf .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2882e24e49e6SKevin Wolf .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2883e24e49e6SKevin Wolf .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2884e24e49e6SKevin Wolf .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2885e24e49e6SKevin Wolf .snapshots_offset = cpu_to_be64(s->snapshots_offset), 28866744cbabSKevin Wolf 28876744cbabSKevin Wolf /* Version 3 fields */ 28886744cbabSKevin Wolf .incompatible_features = cpu_to_be64(s->incompatible_features), 28896744cbabSKevin Wolf .compatible_features = cpu_to_be64(s->compatible_features), 28906744cbabSKevin Wolf .autoclear_features = cpu_to_be64(s->autoclear_features), 2891b6481f37SMax Reitz .refcount_order = cpu_to_be32(s->refcount_order), 28926744cbabSKevin Wolf .header_length = cpu_to_be32(header_length), 2893572ad978SDenis Plotnikov .compression_type = s->compression_type, 2894e24e49e6SKevin Wolf }; 2895e24e49e6SKevin Wolf 28966744cbabSKevin Wolf /* For older versions, write a shorter header */ 28976744cbabSKevin Wolf switch (s->qcow_version) { 28986744cbabSKevin Wolf case 2: 28996744cbabSKevin Wolf ret = offsetof(QCowHeader, incompatible_features); 29006744cbabSKevin Wolf break; 29016744cbabSKevin Wolf case 3: 29026744cbabSKevin Wolf ret = sizeof(*header); 29036744cbabSKevin Wolf break; 29046744cbabSKevin Wolf default: 2905b6c14762SJim Meyering ret = -EINVAL; 2906b6c14762SJim Meyering goto fail; 29076744cbabSKevin Wolf } 29086744cbabSKevin Wolf 29096744cbabSKevin Wolf buf += ret; 29106744cbabSKevin Wolf buflen -= ret; 29116744cbabSKevin Wolf memset(buf, 0, buflen); 29126744cbabSKevin Wolf 29136744cbabSKevin Wolf /* Preserve any unknown field in the header */ 29146744cbabSKevin Wolf if (s->unknown_header_fields_size) { 29156744cbabSKevin Wolf if (buflen < s->unknown_header_fields_size) { 29166744cbabSKevin Wolf ret = -ENOSPC; 29176744cbabSKevin Wolf goto fail; 29186744cbabSKevin Wolf } 29196744cbabSKevin Wolf 29206744cbabSKevin Wolf memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 29216744cbabSKevin Wolf buf += s->unknown_header_fields_size; 29226744cbabSKevin Wolf buflen -= s->unknown_header_fields_size; 29236744cbabSKevin Wolf } 2924e24e49e6SKevin Wolf 2925e24e49e6SKevin Wolf /* Backing file format header extension */ 2926e4603fe1SKevin Wolf if (s->image_backing_format) { 2927e24e49e6SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2928e4603fe1SKevin Wolf s->image_backing_format, 2929e4603fe1SKevin Wolf strlen(s->image_backing_format), 2930e24e49e6SKevin Wolf buflen); 2931756e6736SKevin Wolf if (ret < 0) { 2932756e6736SKevin Wolf goto fail; 2933756e6736SKevin Wolf } 2934756e6736SKevin Wolf 2935e24e49e6SKevin Wolf buf += ret; 2936e24e49e6SKevin Wolf buflen -= ret; 2937e24e49e6SKevin Wolf } 2938756e6736SKevin Wolf 29399b890bdcSKevin Wolf /* External data file header extension */ 29409b890bdcSKevin Wolf if (has_data_file(bs) && s->image_data_file) { 29419b890bdcSKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE, 29429b890bdcSKevin Wolf s->image_data_file, strlen(s->image_data_file), 29439b890bdcSKevin Wolf buflen); 29449b890bdcSKevin Wolf if (ret < 0) { 29459b890bdcSKevin Wolf goto fail; 29469b890bdcSKevin Wolf } 29479b890bdcSKevin Wolf 29489b890bdcSKevin Wolf buf += ret; 29499b890bdcSKevin Wolf buflen -= ret; 29509b890bdcSKevin Wolf } 29519b890bdcSKevin Wolf 29524652b8f3SDaniel P. Berrange /* Full disk encryption header pointer extension */ 29534652b8f3SDaniel P. Berrange if (s->crypto_header.offset != 0) { 29543b698f52SPeter Maydell s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset); 29553b698f52SPeter Maydell s->crypto_header.length = cpu_to_be64(s->crypto_header.length); 29564652b8f3SDaniel P. Berrange ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 29574652b8f3SDaniel P. Berrange &s->crypto_header, sizeof(s->crypto_header), 29584652b8f3SDaniel P. Berrange buflen); 29593b698f52SPeter Maydell s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 29603b698f52SPeter Maydell s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 29614652b8f3SDaniel P. Berrange if (ret < 0) { 29624652b8f3SDaniel P. Berrange goto fail; 29634652b8f3SDaniel P. Berrange } 29644652b8f3SDaniel P. Berrange buf += ret; 29654652b8f3SDaniel P. Berrange buflen -= ret; 29664652b8f3SDaniel P. Berrange } 29674652b8f3SDaniel P. Berrange 2968e7be13adSEric Blake /* 2969e7be13adSEric Blake * Feature table. A mere 8 feature names occupies 392 bytes, and 2970e7be13adSEric Blake * when coupled with the v3 minimum header of 104 bytes plus the 2971e7be13adSEric Blake * 8-byte end-of-extension marker, that would leave only 8 bytes 2972e7be13adSEric Blake * for a backing file name in an image with 512-byte clusters. 2973e7be13adSEric Blake * Thus, we choose to omit this header for cluster sizes 4k and 2974e7be13adSEric Blake * smaller. 2975e7be13adSEric Blake */ 2976e7be13adSEric Blake if (s->qcow_version >= 3 && s->cluster_size > 4096) { 2977bb40ebceSEric Blake static const Qcow2Feature features[] = { 2978c61d0004SStefan Hajnoczi { 2979c61d0004SStefan Hajnoczi .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2980c61d0004SStefan Hajnoczi .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2981c61d0004SStefan Hajnoczi .name = "dirty bit", 2982c61d0004SStefan Hajnoczi }, 2983bfe8043eSStefan Hajnoczi { 298469c98726SMax Reitz .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 298569c98726SMax Reitz .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 298669c98726SMax Reitz .name = "corrupt bit", 298769c98726SMax Reitz }, 298869c98726SMax Reitz { 298993c24936SKevin Wolf .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 299093c24936SKevin Wolf .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR, 299193c24936SKevin Wolf .name = "external data file", 299293c24936SKevin Wolf }, 299393c24936SKevin Wolf { 2994572ad978SDenis Plotnikov .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2995572ad978SDenis Plotnikov .bit = QCOW2_INCOMPAT_COMPRESSION_BITNR, 2996572ad978SDenis Plotnikov .name = "compression type", 2997572ad978SDenis Plotnikov }, 2998572ad978SDenis Plotnikov { 29997be20252SAlberto Garcia .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 30007be20252SAlberto Garcia .bit = QCOW2_INCOMPAT_EXTL2_BITNR, 30017be20252SAlberto Garcia .name = "extended L2 entries", 30027be20252SAlberto Garcia }, 30037be20252SAlberto Garcia { 3004bfe8043eSStefan Hajnoczi .type = QCOW2_FEAT_TYPE_COMPATIBLE, 3005bfe8043eSStefan Hajnoczi .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 3006bfe8043eSStefan Hajnoczi .name = "lazy refcounts", 3007bfe8043eSStefan Hajnoczi }, 3008bb40ebceSEric Blake { 3009bb40ebceSEric Blake .type = QCOW2_FEAT_TYPE_AUTOCLEAR, 3010bb40ebceSEric Blake .bit = QCOW2_AUTOCLEAR_BITMAPS_BITNR, 3011bb40ebceSEric Blake .name = "bitmaps", 3012bb40ebceSEric Blake }, 3013bb40ebceSEric Blake { 3014bb40ebceSEric Blake .type = QCOW2_FEAT_TYPE_AUTOCLEAR, 3015bb40ebceSEric Blake .bit = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR, 3016bb40ebceSEric Blake .name = "raw external data", 3017bb40ebceSEric Blake }, 3018cfcc4c62SKevin Wolf }; 3019cfcc4c62SKevin Wolf 3020cfcc4c62SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 3021cfcc4c62SKevin Wolf features, sizeof(features), buflen); 3022cfcc4c62SKevin Wolf if (ret < 0) { 3023cfcc4c62SKevin Wolf goto fail; 3024cfcc4c62SKevin Wolf } 3025cfcc4c62SKevin Wolf buf += ret; 3026cfcc4c62SKevin Wolf buflen -= ret; 30271a4828c7SKevin Wolf } 3028cfcc4c62SKevin Wolf 302988ddffaeSVladimir Sementsov-Ogievskiy /* Bitmap extension */ 303088ddffaeSVladimir Sementsov-Ogievskiy if (s->nb_bitmaps > 0) { 303188ddffaeSVladimir Sementsov-Ogievskiy Qcow2BitmapHeaderExt bitmaps_header = { 303288ddffaeSVladimir Sementsov-Ogievskiy .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 303388ddffaeSVladimir Sementsov-Ogievskiy .bitmap_directory_size = 303488ddffaeSVladimir Sementsov-Ogievskiy cpu_to_be64(s->bitmap_directory_size), 303588ddffaeSVladimir Sementsov-Ogievskiy .bitmap_directory_offset = 303688ddffaeSVladimir Sementsov-Ogievskiy cpu_to_be64(s->bitmap_directory_offset) 303788ddffaeSVladimir Sementsov-Ogievskiy }; 303888ddffaeSVladimir Sementsov-Ogievskiy ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 303988ddffaeSVladimir Sementsov-Ogievskiy &bitmaps_header, sizeof(bitmaps_header), 304088ddffaeSVladimir Sementsov-Ogievskiy buflen); 304188ddffaeSVladimir Sementsov-Ogievskiy if (ret < 0) { 304288ddffaeSVladimir Sementsov-Ogievskiy goto fail; 304388ddffaeSVladimir Sementsov-Ogievskiy } 304488ddffaeSVladimir Sementsov-Ogievskiy buf += ret; 304588ddffaeSVladimir Sementsov-Ogievskiy buflen -= ret; 304688ddffaeSVladimir Sementsov-Ogievskiy } 304788ddffaeSVladimir Sementsov-Ogievskiy 304875bab85cSKevin Wolf /* Keep unknown header extensions */ 304975bab85cSKevin Wolf QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 305075bab85cSKevin Wolf ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 305175bab85cSKevin Wolf if (ret < 0) { 305275bab85cSKevin Wolf goto fail; 305375bab85cSKevin Wolf } 305475bab85cSKevin Wolf 305575bab85cSKevin Wolf buf += ret; 305675bab85cSKevin Wolf buflen -= ret; 305775bab85cSKevin Wolf } 305875bab85cSKevin Wolf 3059e24e49e6SKevin Wolf /* End of header extensions */ 3060e24e49e6SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 3061756e6736SKevin Wolf if (ret < 0) { 3062756e6736SKevin Wolf goto fail; 3063756e6736SKevin Wolf } 3064756e6736SKevin Wolf 3065e24e49e6SKevin Wolf buf += ret; 3066e24e49e6SKevin Wolf buflen -= ret; 3067e24e49e6SKevin Wolf 3068e24e49e6SKevin Wolf /* Backing file name */ 3069e4603fe1SKevin Wolf if (s->image_backing_file) { 3070e4603fe1SKevin Wolf size_t backing_file_len = strlen(s->image_backing_file); 3071e24e49e6SKevin Wolf 3072e24e49e6SKevin Wolf if (buflen < backing_file_len) { 3073e24e49e6SKevin Wolf ret = -ENOSPC; 3074e24e49e6SKevin Wolf goto fail; 3075e24e49e6SKevin Wolf } 3076e24e49e6SKevin Wolf 307700ea1881SJim Meyering /* Using strncpy is ok here, since buf is not NUL-terminated. */ 3078e4603fe1SKevin Wolf strncpy(buf, s->image_backing_file, buflen); 3079e24e49e6SKevin Wolf 3080e24e49e6SKevin Wolf header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 3081e24e49e6SKevin Wolf header->backing_file_size = cpu_to_be32(backing_file_len); 3082e24e49e6SKevin Wolf } 3083e24e49e6SKevin Wolf 3084e24e49e6SKevin Wolf /* Write the new header */ 308532cc71deSAlberto Faria ret = bdrv_pwrite(bs->file, 0, s->cluster_size, header, 0); 3086756e6736SKevin Wolf if (ret < 0) { 3087756e6736SKevin Wolf goto fail; 3088756e6736SKevin Wolf } 3089756e6736SKevin Wolf 3090756e6736SKevin Wolf ret = 0; 3091756e6736SKevin Wolf fail: 3092e24e49e6SKevin Wolf qemu_vfree(header); 3093756e6736SKevin Wolf return ret; 3094756e6736SKevin Wolf } 3095756e6736SKevin Wolf 3096756e6736SKevin Wolf static int qcow2_change_backing_file(BlockDriverState *bs, 3097756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 3098756e6736SKevin Wolf { 3099ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 3100e4603fe1SKevin Wolf 31016c3944dcSKevin Wolf /* Adding a backing file means that the external data file alone won't be 31026c3944dcSKevin Wolf * enough to make sense of the content */ 31036c3944dcSKevin Wolf if (backing_file && data_file_is_raw(bs)) { 31046c3944dcSKevin Wolf return -EINVAL; 31056c3944dcSKevin Wolf } 31066c3944dcSKevin Wolf 31074e876bcfSMax Reitz if (backing_file && strlen(backing_file) > 1023) { 31084e876bcfSMax Reitz return -EINVAL; 31094e876bcfSMax Reitz } 31104e876bcfSMax Reitz 3111998c2019SMax Reitz pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 3112998c2019SMax Reitz backing_file ?: ""); 3113e24e49e6SKevin Wolf pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 3114e24e49e6SKevin Wolf pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 3115e24e49e6SKevin Wolf 3116e4603fe1SKevin Wolf g_free(s->image_backing_file); 3117e4603fe1SKevin Wolf g_free(s->image_backing_format); 3118e4603fe1SKevin Wolf 3119e4603fe1SKevin Wolf s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 3120e4603fe1SKevin Wolf s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 3121e4603fe1SKevin Wolf 3122e24e49e6SKevin Wolf return qcow2_update_header(bs); 3123756e6736SKevin Wolf } 3124756e6736SKevin Wolf 312560900b7bSKevin Wolf static int qcow2_set_up_encryption(BlockDriverState *bs, 312660900b7bSKevin Wolf QCryptoBlockCreateOptions *cryptoopts, 312760900b7bSKevin Wolf Error **errp) 312860900b7bSKevin Wolf { 312960900b7bSKevin Wolf BDRVQcow2State *s = bs->opaque; 313060900b7bSKevin Wolf QCryptoBlock *crypto = NULL; 313160900b7bSKevin Wolf int fmt, ret; 313260900b7bSKevin Wolf 313360900b7bSKevin Wolf switch (cryptoopts->format) { 313460900b7bSKevin Wolf case Q_CRYPTO_BLOCK_FORMAT_LUKS: 313560900b7bSKevin Wolf fmt = QCOW_CRYPT_LUKS; 313660900b7bSKevin Wolf break; 313760900b7bSKevin Wolf case Q_CRYPTO_BLOCK_FORMAT_QCOW: 313860900b7bSKevin Wolf fmt = QCOW_CRYPT_AES; 313960900b7bSKevin Wolf break; 314060900b7bSKevin Wolf default: 314160900b7bSKevin Wolf error_setg(errp, "Crypto format not supported in qcow2"); 314260900b7bSKevin Wolf return -EINVAL; 314360900b7bSKevin Wolf } 314460900b7bSKevin Wolf 31454652b8f3SDaniel P. Berrange s->crypt_method_header = fmt; 3146b25b387fSDaniel P. Berrange 31471cd9a787SDaniel P. Berrange crypto = qcrypto_block_create(cryptoopts, "encrypt.", 31484652b8f3SDaniel P. Berrange qcow2_crypto_hdr_init_func, 31494652b8f3SDaniel P. Berrange qcow2_crypto_hdr_write_func, 3150b25b387fSDaniel P. Berrange bs, errp); 3151b25b387fSDaniel P. Berrange if (!crypto) { 315260900b7bSKevin Wolf return -EINVAL; 3153b25b387fSDaniel P. Berrange } 3154b25b387fSDaniel P. Berrange 3155b25b387fSDaniel P. Berrange ret = qcow2_update_header(bs); 3156b25b387fSDaniel P. Berrange if (ret < 0) { 3157b25b387fSDaniel P. Berrange error_setg_errno(errp, -ret, "Could not write encryption header"); 3158b25b387fSDaniel P. Berrange goto out; 3159b25b387fSDaniel P. Berrange } 3160b25b387fSDaniel P. Berrange 316160900b7bSKevin Wolf ret = 0; 3162b25b387fSDaniel P. Berrange out: 3163b25b387fSDaniel P. Berrange qcrypto_block_free(crypto); 3164b25b387fSDaniel P. Berrange return ret; 3165b25b387fSDaniel P. Berrange } 3166b25b387fSDaniel P. Berrange 31677bc45dc1SMax Reitz /** 31687bc45dc1SMax Reitz * Preallocates metadata structures for data clusters between @offset (in the 31697bc45dc1SMax Reitz * guest disk) and @new_length (which is thus generally the new guest disk 31707bc45dc1SMax Reitz * size). 31717bc45dc1SMax Reitz * 31727bc45dc1SMax Reitz * Returns: 0 on success, -errno on failure. 31737bc45dc1SMax Reitz */ 317447e86b86SKevin Wolf static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, 3175718c0fceSKevin Wolf uint64_t new_length, PreallocMode mode, 3176718c0fceSKevin Wolf Error **errp) 3177a35e1c17SKevin Wolf { 317893e32b3eSKevin Wolf BDRVQcow2State *s = bs->opaque; 3179d46a0bb2SKevin Wolf uint64_t bytes; 3180060bee89SKevin Wolf uint64_t host_offset = 0; 3181718c0fceSKevin Wolf int64_t file_length; 3182d46a0bb2SKevin Wolf unsigned int cur_bytes; 3183148da7eaSKevin Wolf int ret; 31841a52b73dSAlberto Garcia QCowL2Meta *meta = NULL, *m; 3185a35e1c17SKevin Wolf 31867bc45dc1SMax Reitz assert(offset <= new_length); 31877bc45dc1SMax Reitz bytes = new_length - offset; 3188a35e1c17SKevin Wolf 3189d46a0bb2SKevin Wolf while (bytes) { 3190f29fbf7cSKevin Wolf cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size)); 3191bfd0989aSAlberto Garcia ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes, 3192060bee89SKevin Wolf &host_offset, &meta); 3193148da7eaSKevin Wolf if (ret < 0) { 3194360bd074SKevin Wolf error_setg_errno(errp, -ret, "Allocating clusters failed"); 31951a52b73dSAlberto Garcia goto out; 3196a35e1c17SKevin Wolf } 3197a35e1c17SKevin Wolf 31981a52b73dSAlberto Garcia for (m = meta; m != NULL; m = m->next) { 31991a52b73dSAlberto Garcia m->prealloc = true; 32001a52b73dSAlberto Garcia } 3201c792707fSStefan Hajnoczi 32021a52b73dSAlberto Garcia ret = qcow2_handle_l2meta(bs, &meta, true); 320319dbcbf7SKevin Wolf if (ret < 0) { 3204360bd074SKevin Wolf error_setg_errno(errp, -ret, "Mapping clusters failed"); 32051a52b73dSAlberto Garcia goto out; 3206f50f88b9SKevin Wolf } 3207f214978aSKevin Wolf 3208a35e1c17SKevin Wolf /* TODO Preallocate data if requested */ 3209a35e1c17SKevin Wolf 3210d46a0bb2SKevin Wolf bytes -= cur_bytes; 3211d46a0bb2SKevin Wolf offset += cur_bytes; 3212a35e1c17SKevin Wolf } 3213a35e1c17SKevin Wolf 3214a35e1c17SKevin Wolf /* 3215a35e1c17SKevin Wolf * It is expected that the image file is large enough to actually contain 3216a35e1c17SKevin Wolf * all of the allocated clusters (otherwise we get failing reads after 3217a35e1c17SKevin Wolf * EOF). Extend the image to the last allocated sector. 3218a35e1c17SKevin Wolf */ 3219718c0fceSKevin Wolf file_length = bdrv_getlength(s->data_file->bs); 3220718c0fceSKevin Wolf if (file_length < 0) { 3221718c0fceSKevin Wolf error_setg_errno(errp, -file_length, "Could not get file size"); 32221a52b73dSAlberto Garcia ret = file_length; 32231a52b73dSAlberto Garcia goto out; 3224718c0fceSKevin Wolf } 3225718c0fceSKevin Wolf 3226718c0fceSKevin Wolf if (host_offset + cur_bytes > file_length) { 3227718c0fceSKevin Wolf if (mode == PREALLOC_MODE_METADATA) { 3228718c0fceSKevin Wolf mode = PREALLOC_MODE_OFF; 3229718c0fceSKevin Wolf } 3230c80d8b06SMax Reitz ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, 32317b8e4857SKevin Wolf mode, 0, errp); 323219dbcbf7SKevin Wolf if (ret < 0) { 32331a52b73dSAlberto Garcia goto out; 323419dbcbf7SKevin Wolf } 3235a35e1c17SKevin Wolf } 3236a35e1c17SKevin Wolf 32371a52b73dSAlberto Garcia ret = 0; 32381a52b73dSAlberto Garcia 32391a52b73dSAlberto Garcia out: 32401a52b73dSAlberto Garcia qcow2_handle_l2meta(bs, &meta, false); 32411a52b73dSAlberto Garcia return ret; 3242a35e1c17SKevin Wolf } 3243a35e1c17SKevin Wolf 32447c5bcc42SStefan Hajnoczi /* qcow2_refcount_metadata_size: 32457c5bcc42SStefan Hajnoczi * @clusters: number of clusters to refcount (including data and L1/L2 tables) 32467c5bcc42SStefan Hajnoczi * @cluster_size: size of a cluster, in bytes 32477c5bcc42SStefan Hajnoczi * @refcount_order: refcount bits power-of-2 exponent 324812cc30a8SMax Reitz * @generous_increase: allow for the refcount table to be 1.5x as large as it 324912cc30a8SMax Reitz * needs to be 32507c5bcc42SStefan Hajnoczi * 32517c5bcc42SStefan Hajnoczi * Returns: Number of bytes required for refcount blocks and table metadata. 32527c5bcc42SStefan Hajnoczi */ 325312cc30a8SMax Reitz int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 325412cc30a8SMax Reitz int refcount_order, bool generous_increase, 325512cc30a8SMax Reitz uint64_t *refblock_count) 32567c5bcc42SStefan Hajnoczi { 32577c5bcc42SStefan Hajnoczi /* 32587c5bcc42SStefan Hajnoczi * Every host cluster is reference-counted, including metadata (even 32597c5bcc42SStefan Hajnoczi * refcount metadata is recursively included). 32607c5bcc42SStefan Hajnoczi * 32617c5bcc42SStefan Hajnoczi * An accurate formula for the size of refcount metadata size is difficult 32627c5bcc42SStefan Hajnoczi * to derive. An easier method of calculation is finding the fixed point 32637c5bcc42SStefan Hajnoczi * where no further refcount blocks or table clusters are required to 32647c5bcc42SStefan Hajnoczi * reference count every cluster. 32657c5bcc42SStefan Hajnoczi */ 326602b1ecfaSAlberto Garcia int64_t blocks_per_table_cluster = cluster_size / REFTABLE_ENTRY_SIZE; 32677c5bcc42SStefan Hajnoczi int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 32687c5bcc42SStefan Hajnoczi int64_t table = 0; /* number of refcount table clusters */ 32697c5bcc42SStefan Hajnoczi int64_t blocks = 0; /* number of refcount block clusters */ 32707c5bcc42SStefan Hajnoczi int64_t last; 32717c5bcc42SStefan Hajnoczi int64_t n = 0; 32727c5bcc42SStefan Hajnoczi 32737c5bcc42SStefan Hajnoczi do { 32747c5bcc42SStefan Hajnoczi last = n; 32757c5bcc42SStefan Hajnoczi blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 32767c5bcc42SStefan Hajnoczi table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 32777c5bcc42SStefan Hajnoczi n = clusters + blocks + table; 327812cc30a8SMax Reitz 327912cc30a8SMax Reitz if (n == last && generous_increase) { 328012cc30a8SMax Reitz clusters += DIV_ROUND_UP(table, 2); 328112cc30a8SMax Reitz n = 0; /* force another loop */ 328212cc30a8SMax Reitz generous_increase = false; 328312cc30a8SMax Reitz } 32847c5bcc42SStefan Hajnoczi } while (n != last); 32857c5bcc42SStefan Hajnoczi 328612cc30a8SMax Reitz if (refblock_count) { 328712cc30a8SMax Reitz *refblock_count = blocks; 328812cc30a8SMax Reitz } 328912cc30a8SMax Reitz 32907c5bcc42SStefan Hajnoczi return (blocks + table) * cluster_size; 32917c5bcc42SStefan Hajnoczi } 32927c5bcc42SStefan Hajnoczi 329395c67e3bSStefan Hajnoczi /** 329495c67e3bSStefan Hajnoczi * qcow2_calc_prealloc_size: 329595c67e3bSStefan Hajnoczi * @total_size: virtual disk size in bytes 329695c67e3bSStefan Hajnoczi * @cluster_size: cluster size in bytes 329795c67e3bSStefan Hajnoczi * @refcount_order: refcount bits power-of-2 exponent 32980dd07b29SAlberto Garcia * @extended_l2: true if the image has extended L2 entries 3299a9420734SKevin Wolf * 330095c67e3bSStefan Hajnoczi * Returns: Total number of bytes required for the fully allocated image 330195c67e3bSStefan Hajnoczi * (including metadata). 3302a9420734SKevin Wolf */ 330395c67e3bSStefan Hajnoczi static int64_t qcow2_calc_prealloc_size(int64_t total_size, 330495c67e3bSStefan Hajnoczi size_t cluster_size, 33050dd07b29SAlberto Garcia int refcount_order, 33060dd07b29SAlberto Garcia bool extended_l2) 330795c67e3bSStefan Hajnoczi { 33080e4271b7SHu Tao int64_t meta_size = 0; 33097c5bcc42SStefan Hajnoczi uint64_t nl1e, nl2e; 33109e029689SAlberto Garcia int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 33110dd07b29SAlberto Garcia size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; 33120e4271b7SHu Tao 33130e4271b7SHu Tao /* header: 1 cluster */ 33140e4271b7SHu Tao meta_size += cluster_size; 33150e4271b7SHu Tao 33160e4271b7SHu Tao /* total size of L2 tables */ 33170e4271b7SHu Tao nl2e = aligned_total_size / cluster_size; 33180dd07b29SAlberto Garcia nl2e = ROUND_UP(nl2e, cluster_size / l2e_size); 33190dd07b29SAlberto Garcia meta_size += nl2e * l2e_size; 33200e4271b7SHu Tao 33210e4271b7SHu Tao /* total size of L1 tables */ 33220dd07b29SAlberto Garcia nl1e = nl2e * l2e_size / cluster_size; 332302b1ecfaSAlberto Garcia nl1e = ROUND_UP(nl1e, cluster_size / L1E_SIZE); 332402b1ecfaSAlberto Garcia meta_size += nl1e * L1E_SIZE; 33250e4271b7SHu Tao 33267c5bcc42SStefan Hajnoczi /* total size of refcount table and blocks */ 33277c5bcc42SStefan Hajnoczi meta_size += qcow2_refcount_metadata_size( 33287c5bcc42SStefan Hajnoczi (meta_size + aligned_total_size) / cluster_size, 332912cc30a8SMax Reitz cluster_size, refcount_order, false, NULL); 33300e4271b7SHu Tao 333195c67e3bSStefan Hajnoczi return meta_size + aligned_total_size; 333295c67e3bSStefan Hajnoczi } 333395c67e3bSStefan Hajnoczi 33347be20252SAlberto Garcia static bool validate_cluster_size(size_t cluster_size, bool extended_l2, 33357be20252SAlberto Garcia Error **errp) 333695c67e3bSStefan Hajnoczi { 333729ca9e45SKevin Wolf int cluster_bits = ctz32(cluster_size); 333895c67e3bSStefan Hajnoczi if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 333995c67e3bSStefan Hajnoczi (1 << cluster_bits) != cluster_size) 334095c67e3bSStefan Hajnoczi { 334195c67e3bSStefan Hajnoczi error_setg(errp, "Cluster size must be a power of two between %d and " 334295c67e3bSStefan Hajnoczi "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 334329ca9e45SKevin Wolf return false; 334429ca9e45SKevin Wolf } 33457be20252SAlberto Garcia 33467be20252SAlberto Garcia if (extended_l2) { 33477be20252SAlberto Garcia unsigned min_cluster_size = 33487be20252SAlberto Garcia (1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER; 33497be20252SAlberto Garcia if (cluster_size < min_cluster_size) { 33507be20252SAlberto Garcia error_setg(errp, "Extended L2 entries are only supported with " 33517be20252SAlberto Garcia "cluster sizes of at least %u bytes", min_cluster_size); 33527be20252SAlberto Garcia return false; 33537be20252SAlberto Garcia } 33547be20252SAlberto Garcia } 33557be20252SAlberto Garcia 335629ca9e45SKevin Wolf return true; 335729ca9e45SKevin Wolf } 335829ca9e45SKevin Wolf 33597be20252SAlberto Garcia static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2, 33607be20252SAlberto Garcia Error **errp) 336129ca9e45SKevin Wolf { 336229ca9e45SKevin Wolf size_t cluster_size; 336329ca9e45SKevin Wolf 336429ca9e45SKevin Wolf cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 336529ca9e45SKevin Wolf DEFAULT_CLUSTER_SIZE); 33667be20252SAlberto Garcia if (!validate_cluster_size(cluster_size, extended_l2, errp)) { 33670eb4a8c1SStefan Hajnoczi return 0; 336895c67e3bSStefan Hajnoczi } 33690eb4a8c1SStefan Hajnoczi return cluster_size; 33700eb4a8c1SStefan Hajnoczi } 33710eb4a8c1SStefan Hajnoczi 33720eb4a8c1SStefan Hajnoczi static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 33730eb4a8c1SStefan Hajnoczi { 33740eb4a8c1SStefan Hajnoczi char *buf; 33750eb4a8c1SStefan Hajnoczi int ret; 33760eb4a8c1SStefan Hajnoczi 33770eb4a8c1SStefan Hajnoczi buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 33780eb4a8c1SStefan Hajnoczi if (!buf) { 33790eb4a8c1SStefan Hajnoczi ret = 3; /* default */ 33800eb4a8c1SStefan Hajnoczi } else if (!strcmp(buf, "0.10")) { 33810eb4a8c1SStefan Hajnoczi ret = 2; 33820eb4a8c1SStefan Hajnoczi } else if (!strcmp(buf, "1.1")) { 33830eb4a8c1SStefan Hajnoczi ret = 3; 33840eb4a8c1SStefan Hajnoczi } else { 33850eb4a8c1SStefan Hajnoczi error_setg(errp, "Invalid compatibility level: '%s'", buf); 33860eb4a8c1SStefan Hajnoczi ret = -EINVAL; 33870eb4a8c1SStefan Hajnoczi } 33880eb4a8c1SStefan Hajnoczi g_free(buf); 33890eb4a8c1SStefan Hajnoczi return ret; 33900eb4a8c1SStefan Hajnoczi } 33910eb4a8c1SStefan Hajnoczi 33920eb4a8c1SStefan Hajnoczi static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 33930eb4a8c1SStefan Hajnoczi Error **errp) 33940eb4a8c1SStefan Hajnoczi { 33950eb4a8c1SStefan Hajnoczi uint64_t refcount_bits; 33960eb4a8c1SStefan Hajnoczi 33970eb4a8c1SStefan Hajnoczi refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 33980eb4a8c1SStefan Hajnoczi if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 33990eb4a8c1SStefan Hajnoczi error_setg(errp, "Refcount width must be a power of two and may not " 34000eb4a8c1SStefan Hajnoczi "exceed 64 bits"); 34010eb4a8c1SStefan Hajnoczi return 0; 34020eb4a8c1SStefan Hajnoczi } 34030eb4a8c1SStefan Hajnoczi 34040eb4a8c1SStefan Hajnoczi if (version < 3 && refcount_bits != 16) { 34050eb4a8c1SStefan Hajnoczi error_setg(errp, "Different refcount widths than 16 bits require " 34060eb4a8c1SStefan Hajnoczi "compatibility level 1.1 or above (use compat=1.1 or " 34070eb4a8c1SStefan Hajnoczi "greater)"); 34080eb4a8c1SStefan Hajnoczi return 0; 34090eb4a8c1SStefan Hajnoczi } 34100eb4a8c1SStefan Hajnoczi 34110eb4a8c1SStefan Hajnoczi return refcount_bits; 34120eb4a8c1SStefan Hajnoczi } 34130eb4a8c1SStefan Hajnoczi 3414c274393aSStefan Hajnoczi static int coroutine_fn 341560900b7bSKevin Wolf qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 34160eb4a8c1SStefan Hajnoczi { 341729ca9e45SKevin Wolf BlockdevCreateOptionsQcow2 *qcow2_opts; 34180eb4a8c1SStefan Hajnoczi QDict *options; 341995c67e3bSStefan Hajnoczi 342095c67e3bSStefan Hajnoczi /* 342195c67e3bSStefan Hajnoczi * Open the image file and write a minimal qcow2 header. 342295c67e3bSStefan Hajnoczi * 342395c67e3bSStefan Hajnoczi * We keep things simple and start with a zero-sized image. We also 342495c67e3bSStefan Hajnoczi * do without refcount blocks or a L1 table for now. We'll fix the 342595c67e3bSStefan Hajnoczi * inconsistency later. 342695c67e3bSStefan Hajnoczi * 342795c67e3bSStefan Hajnoczi * We do need a refcount table because growing the refcount table means 3428a951a631SEric Blake * allocating two new refcount blocks - the second of which would be at 342995c67e3bSStefan Hajnoczi * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 343095c67e3bSStefan Hajnoczi * size for any qcow2 image. 343195c67e3bSStefan Hajnoczi */ 3432e1d74bc6SKevin Wolf BlockBackend *blk = NULL; 3433e1d74bc6SKevin Wolf BlockDriverState *bs = NULL; 3434dcc98687SKevin Wolf BlockDriverState *data_bs = NULL; 343595c67e3bSStefan Hajnoczi QCowHeader *header; 343629ca9e45SKevin Wolf size_t cluster_size; 343729ca9e45SKevin Wolf int version; 343829ca9e45SKevin Wolf int refcount_order; 343995c67e3bSStefan Hajnoczi uint64_t *refcount_table; 344095c67e3bSStefan Hajnoczi int ret; 3441572ad978SDenis Plotnikov uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; 344295c67e3bSStefan Hajnoczi 344329ca9e45SKevin Wolf assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 344429ca9e45SKevin Wolf qcow2_opts = &create_options->u.qcow2; 344529ca9e45SKevin Wolf 3446e1d74bc6SKevin Wolf bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 3447e1d74bc6SKevin Wolf if (bs == NULL) { 3448e1d74bc6SKevin Wolf return -EIO; 3449e1d74bc6SKevin Wolf } 3450e1d74bc6SKevin Wolf 3451e1d74bc6SKevin Wolf /* Validate options and set default values */ 345229ca9e45SKevin Wolf if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 34533afea402SAlberto Garcia error_setg(errp, "Image size must be a multiple of %u bytes", 34543afea402SAlberto Garcia (unsigned) BDRV_SECTOR_SIZE); 345529ca9e45SKevin Wolf ret = -EINVAL; 345629ca9e45SKevin Wolf goto out; 345729ca9e45SKevin Wolf } 345829ca9e45SKevin Wolf 345929ca9e45SKevin Wolf if (qcow2_opts->has_version) { 346029ca9e45SKevin Wolf switch (qcow2_opts->version) { 346129ca9e45SKevin Wolf case BLOCKDEV_QCOW2_VERSION_V2: 346229ca9e45SKevin Wolf version = 2; 346329ca9e45SKevin Wolf break; 346429ca9e45SKevin Wolf case BLOCKDEV_QCOW2_VERSION_V3: 346529ca9e45SKevin Wolf version = 3; 346629ca9e45SKevin Wolf break; 346729ca9e45SKevin Wolf default: 346829ca9e45SKevin Wolf g_assert_not_reached(); 346929ca9e45SKevin Wolf } 347029ca9e45SKevin Wolf } else { 347129ca9e45SKevin Wolf version = 3; 347229ca9e45SKevin Wolf } 347329ca9e45SKevin Wolf 347429ca9e45SKevin Wolf if (qcow2_opts->has_cluster_size) { 347529ca9e45SKevin Wolf cluster_size = qcow2_opts->cluster_size; 347629ca9e45SKevin Wolf } else { 347729ca9e45SKevin Wolf cluster_size = DEFAULT_CLUSTER_SIZE; 347829ca9e45SKevin Wolf } 347929ca9e45SKevin Wolf 34807be20252SAlberto Garcia if (!qcow2_opts->has_extended_l2) { 34817be20252SAlberto Garcia qcow2_opts->extended_l2 = false; 34827be20252SAlberto Garcia } 34837be20252SAlberto Garcia if (qcow2_opts->extended_l2) { 34847be20252SAlberto Garcia if (version < 3) { 34857be20252SAlberto Garcia error_setg(errp, "Extended L2 entries are only supported with " 34867be20252SAlberto Garcia "compatibility level 1.1 and above (use version=v3 or " 34877be20252SAlberto Garcia "greater)"); 34887be20252SAlberto Garcia ret = -EINVAL; 34897be20252SAlberto Garcia goto out; 34907be20252SAlberto Garcia } 34917be20252SAlberto Garcia } 34927be20252SAlberto Garcia 34937be20252SAlberto Garcia if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) { 3494e1d74bc6SKevin Wolf ret = -EINVAL; 3495e1d74bc6SKevin Wolf goto out; 349629ca9e45SKevin Wolf } 349729ca9e45SKevin Wolf 349829ca9e45SKevin Wolf if (!qcow2_opts->has_preallocation) { 349929ca9e45SKevin Wolf qcow2_opts->preallocation = PREALLOC_MODE_OFF; 350029ca9e45SKevin Wolf } 350129ca9e45SKevin Wolf if (qcow2_opts->has_backing_file && 35022118771dSAlberto Garcia qcow2_opts->preallocation != PREALLOC_MODE_OFF && 35032118771dSAlberto Garcia !qcow2_opts->extended_l2) 350429ca9e45SKevin Wolf { 35052118771dSAlberto Garcia error_setg(errp, "Backing file and preallocation can only be used at " 35062118771dSAlberto Garcia "the same time if extended_l2 is on"); 3507e1d74bc6SKevin Wolf ret = -EINVAL; 3508e1d74bc6SKevin Wolf goto out; 350929ca9e45SKevin Wolf } 351029ca9e45SKevin Wolf if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 351129ca9e45SKevin Wolf error_setg(errp, "Backing format cannot be used without backing file"); 3512e1d74bc6SKevin Wolf ret = -EINVAL; 3513e1d74bc6SKevin Wolf goto out; 351429ca9e45SKevin Wolf } 351529ca9e45SKevin Wolf 351629ca9e45SKevin Wolf if (!qcow2_opts->has_lazy_refcounts) { 351729ca9e45SKevin Wolf qcow2_opts->lazy_refcounts = false; 351829ca9e45SKevin Wolf } 351929ca9e45SKevin Wolf if (version < 3 && qcow2_opts->lazy_refcounts) { 352029ca9e45SKevin Wolf error_setg(errp, "Lazy refcounts only supported with compatibility " 3521b76b4f60SKevin Wolf "level 1.1 and above (use version=v3 or greater)"); 3522e1d74bc6SKevin Wolf ret = -EINVAL; 3523e1d74bc6SKevin Wolf goto out; 352429ca9e45SKevin Wolf } 352529ca9e45SKevin Wolf 352629ca9e45SKevin Wolf if (!qcow2_opts->has_refcount_bits) { 352729ca9e45SKevin Wolf qcow2_opts->refcount_bits = 16; 352829ca9e45SKevin Wolf } 352929ca9e45SKevin Wolf if (qcow2_opts->refcount_bits > 64 || 353029ca9e45SKevin Wolf !is_power_of_2(qcow2_opts->refcount_bits)) 353129ca9e45SKevin Wolf { 353229ca9e45SKevin Wolf error_setg(errp, "Refcount width must be a power of two and may not " 353329ca9e45SKevin Wolf "exceed 64 bits"); 3534e1d74bc6SKevin Wolf ret = -EINVAL; 3535e1d74bc6SKevin Wolf goto out; 353629ca9e45SKevin Wolf } 353729ca9e45SKevin Wolf if (version < 3 && qcow2_opts->refcount_bits != 16) { 353829ca9e45SKevin Wolf error_setg(errp, "Different refcount widths than 16 bits require " 3539b76b4f60SKevin Wolf "compatibility level 1.1 or above (use version=v3 or " 354029ca9e45SKevin Wolf "greater)"); 3541e1d74bc6SKevin Wolf ret = -EINVAL; 3542e1d74bc6SKevin Wolf goto out; 354329ca9e45SKevin Wolf } 354429ca9e45SKevin Wolf refcount_order = ctz32(qcow2_opts->refcount_bits); 354529ca9e45SKevin Wolf 35466c3944dcSKevin Wolf if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) { 35476c3944dcSKevin Wolf error_setg(errp, "data-file-raw requires data-file"); 35486c3944dcSKevin Wolf ret = -EINVAL; 35496c3944dcSKevin Wolf goto out; 35506c3944dcSKevin Wolf } 35516c3944dcSKevin Wolf if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) { 35526c3944dcSKevin Wolf error_setg(errp, "Backing file and data-file-raw cannot be used at " 35536c3944dcSKevin Wolf "the same time"); 35546c3944dcSKevin Wolf ret = -EINVAL; 35556c3944dcSKevin Wolf goto out; 35566c3944dcSKevin Wolf } 355748410829SMax Reitz if (qcow2_opts->data_file_raw && 355848410829SMax Reitz qcow2_opts->preallocation == PREALLOC_MODE_OFF) 355948410829SMax Reitz { 356048410829SMax Reitz /* 356148410829SMax Reitz * data-file-raw means that "the external data file can be 356248410829SMax Reitz * read as a consistent standalone raw image without looking 356348410829SMax Reitz * at the qcow2 metadata." It does not say that the metadata 356448410829SMax Reitz * must be ignored, though (and the qcow2 driver in fact does 356548410829SMax Reitz * not ignore it), so the L1/L2 tables must be present and 356648410829SMax Reitz * give a 1:1 mapping, so you get the same result regardless 356748410829SMax Reitz * of whether you look at the metadata or whether you ignore 356848410829SMax Reitz * it. 356948410829SMax Reitz */ 357048410829SMax Reitz qcow2_opts->preallocation = PREALLOC_MODE_METADATA; 357148410829SMax Reitz 357248410829SMax Reitz /* 357348410829SMax Reitz * Cannot use preallocation with backing files, but giving a 357448410829SMax Reitz * backing file when specifying data_file_raw is an error 357548410829SMax Reitz * anyway. 357648410829SMax Reitz */ 357748410829SMax Reitz assert(!qcow2_opts->has_backing_file); 357848410829SMax Reitz } 35796c3944dcSKevin Wolf 3580dcc98687SKevin Wolf if (qcow2_opts->data_file) { 3581dcc98687SKevin Wolf if (version < 3) { 3582dcc98687SKevin Wolf error_setg(errp, "External data files are only supported with " 3583dcc98687SKevin Wolf "compatibility level 1.1 and above (use version=v3 or " 3584dcc98687SKevin Wolf "greater)"); 3585dcc98687SKevin Wolf ret = -EINVAL; 3586dcc98687SKevin Wolf goto out; 3587dcc98687SKevin Wolf } 3588dcc98687SKevin Wolf data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp); 3589a0cf8363SKevin Wolf if (data_bs == NULL) { 3590dcc98687SKevin Wolf ret = -EIO; 3591dcc98687SKevin Wolf goto out; 3592dcc98687SKevin Wolf } 3593dcc98687SKevin Wolf } 359429ca9e45SKevin Wolf 3595572ad978SDenis Plotnikov if (qcow2_opts->has_compression_type && 3596572ad978SDenis Plotnikov qcow2_opts->compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) { 3597572ad978SDenis Plotnikov 3598572ad978SDenis Plotnikov ret = -EINVAL; 3599572ad978SDenis Plotnikov 3600572ad978SDenis Plotnikov if (version < 3) { 3601572ad978SDenis Plotnikov error_setg(errp, "Non-zlib compression type is only supported with " 3602572ad978SDenis Plotnikov "compatibility level 1.1 and above (use version=v3 or " 3603572ad978SDenis Plotnikov "greater)"); 3604572ad978SDenis Plotnikov goto out; 3605572ad978SDenis Plotnikov } 3606572ad978SDenis Plotnikov 3607572ad978SDenis Plotnikov switch (qcow2_opts->compression_type) { 3608d298ac10SDenis Plotnikov #ifdef CONFIG_ZSTD 3609d298ac10SDenis Plotnikov case QCOW2_COMPRESSION_TYPE_ZSTD: 3610d298ac10SDenis Plotnikov break; 3611d298ac10SDenis Plotnikov #endif 3612572ad978SDenis Plotnikov default: 3613572ad978SDenis Plotnikov error_setg(errp, "Unknown compression type"); 3614572ad978SDenis Plotnikov goto out; 3615572ad978SDenis Plotnikov } 3616572ad978SDenis Plotnikov 3617572ad978SDenis Plotnikov compression_type = qcow2_opts->compression_type; 3618572ad978SDenis Plotnikov } 3619572ad978SDenis Plotnikov 362029ca9e45SKevin Wolf /* Create BlockBackend to write to the image */ 3621a3aeeab5SEric Blake blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, 3622a3aeeab5SEric Blake errp); 3623a3aeeab5SEric Blake if (!blk) { 3624a3aeeab5SEric Blake ret = -EPERM; 3625cbf2b7c4SKevin Wolf goto out; 3626a9420734SKevin Wolf } 362723588797SKevin Wolf blk_set_allow_write_beyond_eof(blk, true); 362823588797SKevin Wolf 3629a9420734SKevin Wolf /* Write the header */ 3630f8413b3cSKevin Wolf QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 3631f8413b3cSKevin Wolf header = g_malloc0(cluster_size); 3632f8413b3cSKevin Wolf *header = (QCowHeader) { 3633f8413b3cSKevin Wolf .magic = cpu_to_be32(QCOW_MAGIC), 3634f8413b3cSKevin Wolf .version = cpu_to_be32(version), 36350eb4a8c1SStefan Hajnoczi .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 3636f8413b3cSKevin Wolf .size = cpu_to_be64(0), 3637f8413b3cSKevin Wolf .l1_table_offset = cpu_to_be64(0), 3638f8413b3cSKevin Wolf .l1_size = cpu_to_be32(0), 3639f8413b3cSKevin Wolf .refcount_table_offset = cpu_to_be64(cluster_size), 3640f8413b3cSKevin Wolf .refcount_table_clusters = cpu_to_be32(1), 3641bd4b167fSMax Reitz .refcount_order = cpu_to_be32(refcount_order), 3642572ad978SDenis Plotnikov /* don't deal with endianness since compression_type is 1 byte long */ 3643572ad978SDenis Plotnikov .compression_type = compression_type, 3644f8413b3cSKevin Wolf .header_length = cpu_to_be32(sizeof(*header)), 3645f8413b3cSKevin Wolf }; 3646a9420734SKevin Wolf 3647b25b387fSDaniel P. Berrange /* We'll update this to correct value later */ 3648f8413b3cSKevin Wolf header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 3649a9420734SKevin Wolf 365029ca9e45SKevin Wolf if (qcow2_opts->lazy_refcounts) { 3651f8413b3cSKevin Wolf header->compatible_features |= 3652bfe8043eSStefan Hajnoczi cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 3653bfe8043eSStefan Hajnoczi } 3654dcc98687SKevin Wolf if (data_bs) { 3655dcc98687SKevin Wolf header->incompatible_features |= 3656dcc98687SKevin Wolf cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE); 3657dcc98687SKevin Wolf } 36586c3944dcSKevin Wolf if (qcow2_opts->data_file_raw) { 36596c3944dcSKevin Wolf header->autoclear_features |= 36606c3944dcSKevin Wolf cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW); 36616c3944dcSKevin Wolf } 3662572ad978SDenis Plotnikov if (compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) { 3663572ad978SDenis Plotnikov header->incompatible_features |= 3664572ad978SDenis Plotnikov cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION); 3665572ad978SDenis Plotnikov } 3666bfe8043eSStefan Hajnoczi 36677be20252SAlberto Garcia if (qcow2_opts->extended_l2) { 36687be20252SAlberto Garcia header->incompatible_features |= 36697be20252SAlberto Garcia cpu_to_be64(QCOW2_INCOMPAT_EXTL2); 36707be20252SAlberto Garcia } 36717be20252SAlberto Garcia 36728341f00dSEric Blake ret = blk_pwrite(blk, 0, header, cluster_size, 0); 3673f8413b3cSKevin Wolf g_free(header); 3674a9420734SKevin Wolf if (ret < 0) { 36753ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not write qcow2 header"); 3676a9420734SKevin Wolf goto out; 3677a9420734SKevin Wolf } 3678a9420734SKevin Wolf 3679b106ad91SKevin Wolf /* Write a refcount table with one refcount block */ 3680b106ad91SKevin Wolf refcount_table = g_malloc0(2 * cluster_size); 3681b106ad91SKevin Wolf refcount_table[0] = cpu_to_be64(2 * cluster_size); 36828341f00dSEric Blake ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 36837267c094SAnthony Liguori g_free(refcount_table); 3684a9420734SKevin Wolf 3685a9420734SKevin Wolf if (ret < 0) { 36863ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not write refcount table"); 3687a9420734SKevin Wolf goto out; 3688a9420734SKevin Wolf } 3689a9420734SKevin Wolf 369023588797SKevin Wolf blk_unref(blk); 369123588797SKevin Wolf blk = NULL; 3692a9420734SKevin Wolf 3693a9420734SKevin Wolf /* 3694a9420734SKevin Wolf * And now open the image and make it consistent first (i.e. increase the 3695a9420734SKevin Wolf * refcount of the cluster that is occupied by the header and the refcount 3696a9420734SKevin Wolf * table) 3697a9420734SKevin Wolf */ 3698e6641719SMax Reitz options = qdict_new(); 369946f5ac20SEric Blake qdict_put_str(options, "driver", "qcow2"); 3700cbf2b7c4SKevin Wolf qdict_put_str(options, "file", bs->node_name); 3701dcc98687SKevin Wolf if (data_bs) { 3702dcc98687SKevin Wolf qdict_put_str(options, "data-file", data_bs->node_name); 3703dcc98687SKevin Wolf } 3704cbf2b7c4SKevin Wolf blk = blk_new_open(NULL, NULL, options, 370555880601SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 3706af175e85SMarkus Armbruster errp); 370723588797SKevin Wolf if (blk == NULL) { 370823588797SKevin Wolf ret = -EIO; 3709a9420734SKevin Wolf goto out; 3710a9420734SKevin Wolf } 3711a9420734SKevin Wolf 371223588797SKevin Wolf ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 3713a9420734SKevin Wolf if (ret < 0) { 37143ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 37153ef6c40aSMax Reitz "header and refcount table"); 3716a9420734SKevin Wolf goto out; 3717a9420734SKevin Wolf 3718a9420734SKevin Wolf } else if (ret != 0) { 3719a9420734SKevin Wolf error_report("Huh, first cluster in empty image is already in use?"); 3720a9420734SKevin Wolf abort(); 3721a9420734SKevin Wolf } 3722a9420734SKevin Wolf 37239b890bdcSKevin Wolf /* Set the external data file if necessary */ 37249b890bdcSKevin Wolf if (data_bs) { 37259b890bdcSKevin Wolf BDRVQcow2State *s = blk_bs(blk)->opaque; 37269b890bdcSKevin Wolf s->image_data_file = g_strdup(data_bs->filename); 37279b890bdcSKevin Wolf } 37289b890bdcSKevin Wolf 3729b527c9b3SKevin Wolf /* Create a full header (including things like feature table) */ 373023588797SKevin Wolf ret = qcow2_update_header(blk_bs(blk)); 3731b527c9b3SKevin Wolf if (ret < 0) { 3732b527c9b3SKevin Wolf error_setg_errno(errp, -ret, "Could not update qcow2 header"); 3733b527c9b3SKevin Wolf goto out; 3734b527c9b3SKevin Wolf } 3735b527c9b3SKevin Wolf 3736a9420734SKevin Wolf /* Okay, now that we have a valid image, let's give it the right size */ 3737c80d8b06SMax Reitz ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, 37388c6242b6SKevin Wolf 0, errp); 3739a9420734SKevin Wolf if (ret < 0) { 3740ed3d2ec9SMax Reitz error_prepend(errp, "Could not resize image: "); 3741a9420734SKevin Wolf goto out; 3742a9420734SKevin Wolf } 3743a9420734SKevin Wolf 3744a9420734SKevin Wolf /* Want a backing file? There you go. */ 374529ca9e45SKevin Wolf if (qcow2_opts->has_backing_file) { 374629ca9e45SKevin Wolf const char *backing_format = NULL; 374729ca9e45SKevin Wolf 374829ca9e45SKevin Wolf if (qcow2_opts->has_backing_fmt) { 374929ca9e45SKevin Wolf backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 375029ca9e45SKevin Wolf } 375129ca9e45SKevin Wolf 375229ca9e45SKevin Wolf ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 3753e54ee1b3SEric Blake backing_format, false); 3754a9420734SKevin Wolf if (ret < 0) { 37553ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 375629ca9e45SKevin Wolf "with format '%s'", qcow2_opts->backing_file, 375729ca9e45SKevin Wolf backing_format); 3758a9420734SKevin Wolf goto out; 3759a9420734SKevin Wolf } 3760a9420734SKevin Wolf } 3761a9420734SKevin Wolf 3762b25b387fSDaniel P. Berrange /* Want encryption? There you go. */ 376360900b7bSKevin Wolf if (qcow2_opts->has_encrypt) { 376460900b7bSKevin Wolf ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3765b25b387fSDaniel P. Berrange if (ret < 0) { 3766b25b387fSDaniel P. Berrange goto out; 3767b25b387fSDaniel P. Berrange } 3768b25b387fSDaniel P. Berrange } 3769b25b387fSDaniel P. Berrange 377023588797SKevin Wolf blk_unref(blk); 377123588797SKevin Wolf blk = NULL; 3772ba2ab2f2SMax Reitz 3773b25b387fSDaniel P. Berrange /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3774b25b387fSDaniel P. Berrange * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3775b25b387fSDaniel P. Berrange * have to setup decryption context. We're not doing any I/O on the top 3776b25b387fSDaniel P. Berrange * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3777b25b387fSDaniel P. Berrange * not have effect. 3778b25b387fSDaniel P. Berrange */ 3779e6641719SMax Reitz options = qdict_new(); 378046f5ac20SEric Blake qdict_put_str(options, "driver", "qcow2"); 3781cbf2b7c4SKevin Wolf qdict_put_str(options, "file", bs->node_name); 3782dcc98687SKevin Wolf if (data_bs) { 3783dcc98687SKevin Wolf qdict_put_str(options, "data-file", data_bs->node_name); 3784dcc98687SKevin Wolf } 3785cbf2b7c4SKevin Wolf blk = blk_new_open(NULL, NULL, options, 3786b25b387fSDaniel P. Berrange BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3787af175e85SMarkus Armbruster errp); 378823588797SKevin Wolf if (blk == NULL) { 378923588797SKevin Wolf ret = -EIO; 3790ba2ab2f2SMax Reitz goto out; 3791ba2ab2f2SMax Reitz } 3792ba2ab2f2SMax Reitz 3793a9420734SKevin Wolf ret = 0; 3794a9420734SKevin Wolf out: 379523588797SKevin Wolf blk_unref(blk); 3796e1d74bc6SKevin Wolf bdrv_unref(bs); 3797dcc98687SKevin Wolf bdrv_unref(data_bs); 3798a9420734SKevin Wolf return ret; 3799a9420734SKevin Wolf } 3800de5f3f40SKevin Wolf 3801b92902dfSMaxim Levitsky static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, 3802b92902dfSMaxim Levitsky const char *filename, 3803b92902dfSMaxim Levitsky QemuOpts *opts, 3804efc75e2aSStefan Hajnoczi Error **errp) 3805de5f3f40SKevin Wolf { 3806b76b4f60SKevin Wolf BlockdevCreateOptions *create_options = NULL; 380792adf9dbSMarkus Armbruster QDict *qdict; 3808b76b4f60SKevin Wolf Visitor *v; 3809cbf2b7c4SKevin Wolf BlockDriverState *bs = NULL; 38109b890bdcSKevin Wolf BlockDriverState *data_bs = NULL; 3811b76b4f60SKevin Wolf const char *val; 38123ef6c40aSMax Reitz int ret; 3813de5f3f40SKevin Wolf 3814b76b4f60SKevin Wolf /* Only the keyval visitor supports the dotted syntax needed for 3815b76b4f60SKevin Wolf * encryption, so go through a QDict before getting a QAPI type. Ignore 3816b76b4f60SKevin Wolf * options meant for the protocol layer so that the visitor doesn't 3817b76b4f60SKevin Wolf * complain. */ 3818b76b4f60SKevin Wolf qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3819b76b4f60SKevin Wolf true); 3820b76b4f60SKevin Wolf 3821b76b4f60SKevin Wolf /* Handle encryption options */ 3822b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3823b76b4f60SKevin Wolf if (val && !strcmp(val, "on")) { 3824b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3825b76b4f60SKevin Wolf } else if (val && !strcmp(val, "off")) { 3826b76b4f60SKevin Wolf qdict_del(qdict, BLOCK_OPT_ENCRYPT); 382729ca9e45SKevin Wolf } 382860900b7bSKevin Wolf 3829b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3830b76b4f60SKevin Wolf if (val && !strcmp(val, "aes")) { 3831b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 383260900b7bSKevin Wolf } 383360900b7bSKevin Wolf 3834b76b4f60SKevin Wolf /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3835b76b4f60SKevin Wolf * version=v2/v3 below. */ 3836b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3837b76b4f60SKevin Wolf if (val && !strcmp(val, "0.10")) { 3838b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3839b76b4f60SKevin Wolf } else if (val && !strcmp(val, "1.1")) { 3840b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3841b76b4f60SKevin Wolf } 3842b76b4f60SKevin Wolf 3843b76b4f60SKevin Wolf /* Change legacy command line options into QMP ones */ 3844b76b4f60SKevin Wolf static const QDictRenames opt_renames[] = { 3845b76b4f60SKevin Wolf { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3846b76b4f60SKevin Wolf { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3847b76b4f60SKevin Wolf { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3848b76b4f60SKevin Wolf { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 38497be20252SAlberto Garcia { BLOCK_OPT_EXTL2, "extended-l2" }, 3850b76b4f60SKevin Wolf { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3851b76b4f60SKevin Wolf { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3852b76b4f60SKevin Wolf { BLOCK_OPT_COMPAT_LEVEL, "version" }, 38536c3944dcSKevin Wolf { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" }, 3854572ad978SDenis Plotnikov { BLOCK_OPT_COMPRESSION_TYPE, "compression-type" }, 3855b76b4f60SKevin Wolf { NULL, NULL }, 3856b76b4f60SKevin Wolf }; 3857b76b4f60SKevin Wolf 3858b76b4f60SKevin Wolf if (!qdict_rename_keys(qdict, opt_renames, errp)) { 38590eb4a8c1SStefan Hajnoczi ret = -EINVAL; 38600eb4a8c1SStefan Hajnoczi goto finish; 38610eb4a8c1SStefan Hajnoczi } 3862bd4b167fSMax Reitz 3863cbf2b7c4SKevin Wolf /* Create and open the file (protocol layer) */ 3864cbf2b7c4SKevin Wolf ret = bdrv_create_file(filename, opts, errp); 3865cbf2b7c4SKevin Wolf if (ret < 0) { 3866cbf2b7c4SKevin Wolf goto finish; 3867cbf2b7c4SKevin Wolf } 3868cbf2b7c4SKevin Wolf 3869cbf2b7c4SKevin Wolf bs = bdrv_open(filename, NULL, NULL, 3870cbf2b7c4SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3871cbf2b7c4SKevin Wolf if (bs == NULL) { 3872cbf2b7c4SKevin Wolf ret = -EIO; 3873cbf2b7c4SKevin Wolf goto finish; 3874cbf2b7c4SKevin Wolf } 3875cbf2b7c4SKevin Wolf 38769b890bdcSKevin Wolf /* Create and open an external data file (protocol layer) */ 38779b890bdcSKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); 38789b890bdcSKevin Wolf if (val) { 38799b890bdcSKevin Wolf ret = bdrv_create_file(val, opts, errp); 38809b890bdcSKevin Wolf if (ret < 0) { 38819b890bdcSKevin Wolf goto finish; 38829b890bdcSKevin Wolf } 38839b890bdcSKevin Wolf 38849b890bdcSKevin Wolf data_bs = bdrv_open(val, NULL, NULL, 38859b890bdcSKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, 38869b890bdcSKevin Wolf errp); 38879b890bdcSKevin Wolf if (data_bs == NULL) { 38889b890bdcSKevin Wolf ret = -EIO; 38899b890bdcSKevin Wolf goto finish; 38909b890bdcSKevin Wolf } 38919b890bdcSKevin Wolf 38929b890bdcSKevin Wolf qdict_del(qdict, BLOCK_OPT_DATA_FILE); 38939b890bdcSKevin Wolf qdict_put_str(qdict, "data-file", data_bs->node_name); 38949b890bdcSKevin Wolf } 38959b890bdcSKevin Wolf 3896b76b4f60SKevin Wolf /* Set 'driver' and 'node' options */ 3897b76b4f60SKevin Wolf qdict_put_str(qdict, "driver", "qcow2"); 3898b76b4f60SKevin Wolf qdict_put_str(qdict, "file", bs->node_name); 3899b76b4f60SKevin Wolf 3900b76b4f60SKevin Wolf /* Now get the QAPI type BlockdevCreateOptions */ 3901af91062eSMarkus Armbruster v = qobject_input_visitor_new_flat_confused(qdict, errp); 3902af91062eSMarkus Armbruster if (!v) { 3903b76b4f60SKevin Wolf ret = -EINVAL; 3904b76b4f60SKevin Wolf goto finish; 3905b76b4f60SKevin Wolf } 3906b76b4f60SKevin Wolf 3907b11a093cSMarkus Armbruster visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); 3908b76b4f60SKevin Wolf visit_free(v); 3909b11a093cSMarkus Armbruster if (!create_options) { 3910b76b4f60SKevin Wolf ret = -EINVAL; 3911b76b4f60SKevin Wolf goto finish; 3912b76b4f60SKevin Wolf } 3913b76b4f60SKevin Wolf 3914b76b4f60SKevin Wolf /* Silently round up size */ 3915b76b4f60SKevin Wolf create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3916b76b4f60SKevin Wolf BDRV_SECTOR_SIZE); 3917b76b4f60SKevin Wolf 3918cbf2b7c4SKevin Wolf /* Create the qcow2 image (format layer) */ 3919b76b4f60SKevin Wolf ret = qcow2_co_create(create_options, errp); 39206094cbebSMaxim Levitsky finish: 3921cbf2b7c4SKevin Wolf if (ret < 0) { 39226094cbebSMaxim Levitsky bdrv_co_delete_file_noerr(bs); 39236094cbebSMaxim Levitsky bdrv_co_delete_file_noerr(data_bs); 39246094cbebSMaxim Levitsky } else { 39256094cbebSMaxim Levitsky ret = 0; 3926cbf2b7c4SKevin Wolf } 39271bd0e2d1SChunyan Liu 3928cb3e7f08SMarc-André Lureau qobject_unref(qdict); 3929cbf2b7c4SKevin Wolf bdrv_unref(bs); 39309b890bdcSKevin Wolf bdrv_unref(data_bs); 3931b76b4f60SKevin Wolf qapi_free_BlockdevCreateOptions(create_options); 39323ef6c40aSMax Reitz return ret; 3933de5f3f40SKevin Wolf } 3934de5f3f40SKevin Wolf 39352928abceSDenis V. Lunev 3936f06f6b66SEric Blake static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 39372928abceSDenis V. Lunev { 393831826642SEric Blake int64_t nr; 393931826642SEric Blake int res; 3940f06f6b66SEric Blake 3941f06f6b66SEric Blake /* Clamp to image length, before checking status of underlying sectors */ 39428cbf74b2SEric Blake if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 39438cbf74b2SEric Blake bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3944fbaa6bb3SEric Blake } 3945fbaa6bb3SEric Blake 3946f06f6b66SEric Blake if (!bytes) { 3947ebb718a5SEric Blake return true; 39482928abceSDenis V. Lunev } 394967c095c8SVladimir Sementsov-Ogievskiy 395067c095c8SVladimir Sementsov-Ogievskiy /* 395167c095c8SVladimir Sementsov-Ogievskiy * bdrv_block_status_above doesn't merge different types of zeros, for 395267c095c8SVladimir Sementsov-Ogievskiy * example, zeros which come from the region which is unallocated in 395367c095c8SVladimir Sementsov-Ogievskiy * the whole backing chain, and zeros which come because of a short 395467c095c8SVladimir Sementsov-Ogievskiy * backing file. So, we need a loop. 395567c095c8SVladimir Sementsov-Ogievskiy */ 395667c095c8SVladimir Sementsov-Ogievskiy do { 39578cbf74b2SEric Blake res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 395867c095c8SVladimir Sementsov-Ogievskiy offset += nr; 395967c095c8SVladimir Sementsov-Ogievskiy bytes -= nr; 396067c095c8SVladimir Sementsov-Ogievskiy } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes); 396167c095c8SVladimir Sementsov-Ogievskiy 396267c095c8SVladimir Sementsov-Ogievskiy return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0; 39632928abceSDenis V. Lunev } 39642928abceSDenis V. Lunev 39655544b59fSEric Blake static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3966f34b2bcfSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, BdrvRequestFlags flags) 3967621f0589SKevin Wolf { 3968621f0589SKevin Wolf int ret; 3969ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 3970621f0589SKevin Wolf 3971a6841a2dSAlberto Garcia uint32_t head = offset_into_subcluster(s, offset); 3972a6841a2dSAlberto Garcia uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) - 3973a6841a2dSAlberto Garcia (offset + bytes); 39742928abceSDenis V. Lunev 3975f5a5ca79SManos Pitsidianakis trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3976f5a5ca79SManos Pitsidianakis if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3977fbaa6bb3SEric Blake tail = 0; 3978fbaa6bb3SEric Blake } 39795a64e942SDenis V. Lunev 3980ebb718a5SEric Blake if (head || tail) { 3981ebb718a5SEric Blake uint64_t off; 3982ecfe1863SKevin Wolf unsigned int nr; 398310dabdc5SAlberto Garcia QCow2SubclusterType type; 39842928abceSDenis V. Lunev 3985a6841a2dSAlberto Garcia assert(head + bytes + tail <= s->subcluster_size); 39862928abceSDenis V. Lunev 3987ebb718a5SEric Blake /* check whether remainder of cluster already reads as zero */ 3988f06f6b66SEric Blake if (!(is_zero(bs, offset - head, head) && 3989a6841a2dSAlberto Garcia is_zero(bs, offset + bytes, tail))) { 3990621f0589SKevin Wolf return -ENOTSUP; 3991621f0589SKevin Wolf } 3992621f0589SKevin Wolf 3993621f0589SKevin Wolf qemu_co_mutex_lock(&s->lock); 39942928abceSDenis V. Lunev /* We can have new write after previous check */ 3995a6841a2dSAlberto Garcia offset -= head; 3996a6841a2dSAlberto Garcia bytes = s->subcluster_size; 3997a6841a2dSAlberto Garcia nr = s->subcluster_size; 3998ca4a0bb8SAlberto Garcia ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type); 3999ca4a0bb8SAlberto Garcia if (ret < 0 || 400010dabdc5SAlberto Garcia (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && 400197490a14SAlberto Garcia type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && 400210dabdc5SAlberto Garcia type != QCOW2_SUBCLUSTER_ZERO_PLAIN && 400310dabdc5SAlberto Garcia type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) { 40042928abceSDenis V. Lunev qemu_co_mutex_unlock(&s->lock); 4005580384d6SAlberto Garcia return ret < 0 ? ret : -ENOTSUP; 40062928abceSDenis V. Lunev } 40072928abceSDenis V. Lunev } else { 40082928abceSDenis V. Lunev qemu_co_mutex_lock(&s->lock); 40092928abceSDenis V. Lunev } 40102928abceSDenis V. Lunev 4011f5a5ca79SManos Pitsidianakis trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 40125a64e942SDenis V. Lunev 4013a6841a2dSAlberto Garcia /* Whatever is left can use real zero subclusters */ 4014a6841a2dSAlberto Garcia ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags); 4015621f0589SKevin Wolf qemu_co_mutex_unlock(&s->lock); 4016621f0589SKevin Wolf 4017621f0589SKevin Wolf return ret; 4018621f0589SKevin Wolf } 4019621f0589SKevin Wolf 402082e8a788SEric Blake static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 40210c802287SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes) 40225ea929e3SKevin Wolf { 40236db39ae2SPaolo Bonzini int ret; 4024ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 40256db39ae2SPaolo Bonzini 402680f5c011SAlberto Garcia /* If the image does not support QCOW_OFLAG_ZERO then discarding 402780f5c011SAlberto Garcia * clusters could expose stale data from the backing file. */ 402880f5c011SAlberto Garcia if (s->qcow_version < 3 && bs->backing) { 402980f5c011SAlberto Garcia return -ENOTSUP; 403080f5c011SAlberto Garcia } 403180f5c011SAlberto Garcia 4032f5a5ca79SManos Pitsidianakis if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 4033f5a5ca79SManos Pitsidianakis assert(bytes < s->cluster_size); 4034048c5fd1SEric Blake /* Ignore partial clusters, except for the special case of the 4035048c5fd1SEric Blake * complete partial cluster at the end of an unaligned file */ 4036048c5fd1SEric Blake if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 4037f5a5ca79SManos Pitsidianakis offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 403849228d1eSEric Blake return -ENOTSUP; 403949228d1eSEric Blake } 4040048c5fd1SEric Blake } 404149228d1eSEric Blake 40426db39ae2SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 4043f5a5ca79SManos Pitsidianakis ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 4044d2cb36afSEric Blake false); 40456db39ae2SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 40466db39ae2SPaolo Bonzini return ret; 40475ea929e3SKevin Wolf } 40485ea929e3SKevin Wolf 4049fd9fcd37SFam Zheng static int coroutine_fn 4050fd9fcd37SFam Zheng qcow2_co_copy_range_from(BlockDriverState *bs, 405148535049SVladimir Sementsov-Ogievskiy BdrvChild *src, int64_t src_offset, 405248535049SVladimir Sementsov-Ogievskiy BdrvChild *dst, int64_t dst_offset, 405348535049SVladimir Sementsov-Ogievskiy int64_t bytes, BdrvRequestFlags read_flags, 405467b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags) 4055fd9fcd37SFam Zheng { 4056fd9fcd37SFam Zheng BDRVQcow2State *s = bs->opaque; 4057fd9fcd37SFam Zheng int ret; 4058fd9fcd37SFam Zheng unsigned int cur_bytes; /* number of bytes in current iteration */ 4059fd9fcd37SFam Zheng BdrvChild *child = NULL; 406067b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags cur_write_flags; 4061fd9fcd37SFam Zheng 4062fd9fcd37SFam Zheng assert(!bs->encrypted); 4063fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 4064fd9fcd37SFam Zheng 4065fd9fcd37SFam Zheng while (bytes != 0) { 4066fd9fcd37SFam Zheng uint64_t copy_offset = 0; 406710dabdc5SAlberto Garcia QCow2SubclusterType type; 4068fd9fcd37SFam Zheng /* prepare next request */ 4069fd9fcd37SFam Zheng cur_bytes = MIN(bytes, INT_MAX); 407067b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags = write_flags; 4071fd9fcd37SFam Zheng 4072ca4a0bb8SAlberto Garcia ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes, 4073ca4a0bb8SAlberto Garcia ©_offset, &type); 4074fd9fcd37SFam Zheng if (ret < 0) { 4075fd9fcd37SFam Zheng goto out; 4076fd9fcd37SFam Zheng } 4077fd9fcd37SFam Zheng 4078ca4a0bb8SAlberto Garcia switch (type) { 407910dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 408097490a14SAlberto Garcia case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 4081fd9fcd37SFam Zheng if (bs->backing && bs->backing->bs) { 4082fd9fcd37SFam Zheng int64_t backing_length = bdrv_getlength(bs->backing->bs); 4083fd9fcd37SFam Zheng if (src_offset >= backing_length) { 408467b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 4085fd9fcd37SFam Zheng } else { 4086fd9fcd37SFam Zheng child = bs->backing; 4087fd9fcd37SFam Zheng cur_bytes = MIN(cur_bytes, backing_length - src_offset); 4088fd9fcd37SFam Zheng copy_offset = src_offset; 4089fd9fcd37SFam Zheng } 4090fd9fcd37SFam Zheng } else { 409167b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 4092fd9fcd37SFam Zheng } 4093fd9fcd37SFam Zheng break; 4094fd9fcd37SFam Zheng 409510dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_PLAIN: 409610dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_ZERO_ALLOC: 409767b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 4098fd9fcd37SFam Zheng break; 4099fd9fcd37SFam Zheng 410010dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_COMPRESSED: 4101fd9fcd37SFam Zheng ret = -ENOTSUP; 4102fd9fcd37SFam Zheng goto out; 4103fd9fcd37SFam Zheng 410410dabdc5SAlberto Garcia case QCOW2_SUBCLUSTER_NORMAL: 4105966b000fSKevin Wolf child = s->data_file; 4106fd9fcd37SFam Zheng break; 4107fd9fcd37SFam Zheng 4108fd9fcd37SFam Zheng default: 4109fd9fcd37SFam Zheng abort(); 4110fd9fcd37SFam Zheng } 4111fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 4112fd9fcd37SFam Zheng ret = bdrv_co_copy_range_from(child, 4113fd9fcd37SFam Zheng copy_offset, 4114fd9fcd37SFam Zheng dst, dst_offset, 411567b51fb9SVladimir Sementsov-Ogievskiy cur_bytes, read_flags, cur_write_flags); 4116fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 4117fd9fcd37SFam Zheng if (ret < 0) { 4118fd9fcd37SFam Zheng goto out; 4119fd9fcd37SFam Zheng } 4120fd9fcd37SFam Zheng 4121fd9fcd37SFam Zheng bytes -= cur_bytes; 4122fd9fcd37SFam Zheng src_offset += cur_bytes; 4123fd9fcd37SFam Zheng dst_offset += cur_bytes; 4124fd9fcd37SFam Zheng } 4125fd9fcd37SFam Zheng ret = 0; 4126fd9fcd37SFam Zheng 4127fd9fcd37SFam Zheng out: 4128fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 4129fd9fcd37SFam Zheng return ret; 4130fd9fcd37SFam Zheng } 4131fd9fcd37SFam Zheng 4132fd9fcd37SFam Zheng static int coroutine_fn 4133fd9fcd37SFam Zheng qcow2_co_copy_range_to(BlockDriverState *bs, 413448535049SVladimir Sementsov-Ogievskiy BdrvChild *src, int64_t src_offset, 413548535049SVladimir Sementsov-Ogievskiy BdrvChild *dst, int64_t dst_offset, 413648535049SVladimir Sementsov-Ogievskiy int64_t bytes, BdrvRequestFlags read_flags, 413767b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags) 4138fd9fcd37SFam Zheng { 4139fd9fcd37SFam Zheng BDRVQcow2State *s = bs->opaque; 4140fd9fcd37SFam Zheng int ret; 4141fd9fcd37SFam Zheng unsigned int cur_bytes; /* number of sectors in current iteration */ 4142bfd0989aSAlberto Garcia uint64_t host_offset; 4143fd9fcd37SFam Zheng QCowL2Meta *l2meta = NULL; 4144fd9fcd37SFam Zheng 4145fd9fcd37SFam Zheng assert(!bs->encrypted); 4146fd9fcd37SFam Zheng 4147fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 4148fd9fcd37SFam Zheng 4149fd9fcd37SFam Zheng while (bytes != 0) { 4150fd9fcd37SFam Zheng 4151fd9fcd37SFam Zheng l2meta = NULL; 4152fd9fcd37SFam Zheng 4153fd9fcd37SFam Zheng cur_bytes = MIN(bytes, INT_MAX); 4154fd9fcd37SFam Zheng 4155fd9fcd37SFam Zheng /* TODO: 4156fd9fcd37SFam Zheng * If src->bs == dst->bs, we could simply copy by incrementing 4157fd9fcd37SFam Zheng * the refcnt, without copying user data. 4158fd9fcd37SFam Zheng * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ 4159bfd0989aSAlberto Garcia ret = qcow2_alloc_host_offset(bs, dst_offset, &cur_bytes, 4160bfd0989aSAlberto Garcia &host_offset, &l2meta); 4161fd9fcd37SFam Zheng if (ret < 0) { 4162fd9fcd37SFam Zheng goto fail; 4163fd9fcd37SFam Zheng } 4164fd9fcd37SFam Zheng 4165bfd0989aSAlberto Garcia ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, cur_bytes, 4166bfd0989aSAlberto Garcia true); 4167fd9fcd37SFam Zheng if (ret < 0) { 4168fd9fcd37SFam Zheng goto fail; 4169fd9fcd37SFam Zheng } 4170fd9fcd37SFam Zheng 4171fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 4172bfd0989aSAlberto Garcia ret = bdrv_co_copy_range_to(src, src_offset, s->data_file, host_offset, 417367b51fb9SVladimir Sementsov-Ogievskiy cur_bytes, read_flags, write_flags); 4174fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 4175fd9fcd37SFam Zheng if (ret < 0) { 4176fd9fcd37SFam Zheng goto fail; 4177fd9fcd37SFam Zheng } 4178fd9fcd37SFam Zheng 4179fd9fcd37SFam Zheng ret = qcow2_handle_l2meta(bs, &l2meta, true); 4180fd9fcd37SFam Zheng if (ret) { 4181fd9fcd37SFam Zheng goto fail; 4182fd9fcd37SFam Zheng } 4183fd9fcd37SFam Zheng 4184fd9fcd37SFam Zheng bytes -= cur_bytes; 4185e06f4639SFam Zheng src_offset += cur_bytes; 4186fd9fcd37SFam Zheng dst_offset += cur_bytes; 4187fd9fcd37SFam Zheng } 4188fd9fcd37SFam Zheng ret = 0; 4189fd9fcd37SFam Zheng 4190fd9fcd37SFam Zheng fail: 4191fd9fcd37SFam Zheng qcow2_handle_l2meta(bs, &l2meta, false); 4192fd9fcd37SFam Zheng 4193fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 4194fd9fcd37SFam Zheng 4195fd9fcd37SFam Zheng trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 4196fd9fcd37SFam Zheng 4197fd9fcd37SFam Zheng return ret; 4198fd9fcd37SFam Zheng } 4199fd9fcd37SFam Zheng 4200061ca8a3SKevin Wolf static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 4201c80d8b06SMax Reitz bool exact, PreallocMode prealloc, 420292b92799SKevin Wolf BdrvRequestFlags flags, Error **errp) 4203419b19d9SStefan Hajnoczi { 4204ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 420595b98f34SMax Reitz uint64_t old_length; 42062cf7cfa1SKevin Wolf int64_t new_l1_size; 42072cf7cfa1SKevin Wolf int ret; 420845b4949cSLeonid Bloch QDict *options; 4209419b19d9SStefan Hajnoczi 4210772d1f97SMax Reitz if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 4211772d1f97SMax Reitz prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 4212772d1f97SMax Reitz { 42138243ccb7SMax Reitz error_setg(errp, "Unsupported preallocation mode '%s'", 4214977c736fSMarkus Armbruster PreallocMode_str(prealloc)); 42158243ccb7SMax Reitz return -ENOTSUP; 42168243ccb7SMax Reitz } 42178243ccb7SMax Reitz 42183afea402SAlberto Garcia if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { 42193afea402SAlberto Garcia error_setg(errp, "The new size must be a multiple of %u", 42203afea402SAlberto Garcia (unsigned) BDRV_SECTOR_SIZE); 4221419b19d9SStefan Hajnoczi return -EINVAL; 4222419b19d9SStefan Hajnoczi } 4223419b19d9SStefan Hajnoczi 4224061ca8a3SKevin Wolf qemu_co_mutex_lock(&s->lock); 4225061ca8a3SKevin Wolf 42267fa140abSEric Blake /* 42277fa140abSEric Blake * Even though we store snapshot size for all images, it was not 42287fa140abSEric Blake * required until v3, so it is not safe to proceed for v2. 42297fa140abSEric Blake */ 42307fa140abSEric Blake if (s->nb_snapshots && s->qcow_version < 3) { 42317fa140abSEric Blake error_setg(errp, "Can't resize a v2 image which has snapshots"); 4232061ca8a3SKevin Wolf ret = -ENOTSUP; 4233061ca8a3SKevin Wolf goto fail; 4234419b19d9SStefan Hajnoczi } 4235419b19d9SStefan Hajnoczi 4236ee1244a2SEric Blake /* See qcow2-bitmap.c for which bitmap scenarios prevent a resize. */ 4237d19c6b36SJohn Snow if (qcow2_truncate_bitmaps_check(bs, errp)) { 4238061ca8a3SKevin Wolf ret = -ENOTSUP; 4239061ca8a3SKevin Wolf goto fail; 424088ddffaeSVladimir Sementsov-Ogievskiy } 424188ddffaeSVladimir Sementsov-Ogievskiy 4242bd016b91SLeonid Bloch old_length = bs->total_sectors * BDRV_SECTOR_SIZE; 424346b732cdSPavel Butsykin new_l1_size = size_to_l1(s, offset); 424495b98f34SMax Reitz 424595b98f34SMax Reitz if (offset < old_length) { 4246163bc39dSPavel Butsykin int64_t last_cluster, old_file_size; 424746b732cdSPavel Butsykin if (prealloc != PREALLOC_MODE_OFF) { 424846b732cdSPavel Butsykin error_setg(errp, 424946b732cdSPavel Butsykin "Preallocation can't be used for shrinking an image"); 4250061ca8a3SKevin Wolf ret = -EINVAL; 4251061ca8a3SKevin Wolf goto fail; 4252419b19d9SStefan Hajnoczi } 4253419b19d9SStefan Hajnoczi 425446b732cdSPavel Butsykin ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 425546b732cdSPavel Butsykin old_length - ROUND_UP(offset, 425646b732cdSPavel Butsykin s->cluster_size), 425746b732cdSPavel Butsykin QCOW2_DISCARD_ALWAYS, true); 425846b732cdSPavel Butsykin if (ret < 0) { 425946b732cdSPavel Butsykin error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 4260061ca8a3SKevin Wolf goto fail; 426146b732cdSPavel Butsykin } 426246b732cdSPavel Butsykin 426346b732cdSPavel Butsykin ret = qcow2_shrink_l1_table(bs, new_l1_size); 426446b732cdSPavel Butsykin if (ret < 0) { 426546b732cdSPavel Butsykin error_setg_errno(errp, -ret, 426646b732cdSPavel Butsykin "Failed to reduce the number of L2 tables"); 4267061ca8a3SKevin Wolf goto fail; 426846b732cdSPavel Butsykin } 426946b732cdSPavel Butsykin 427046b732cdSPavel Butsykin ret = qcow2_shrink_reftable(bs); 427146b732cdSPavel Butsykin if (ret < 0) { 427246b732cdSPavel Butsykin error_setg_errno(errp, -ret, 427346b732cdSPavel Butsykin "Failed to discard unused refblocks"); 4274061ca8a3SKevin Wolf goto fail; 427546b732cdSPavel Butsykin } 4276163bc39dSPavel Butsykin 4277163bc39dSPavel Butsykin old_file_size = bdrv_getlength(bs->file->bs); 4278163bc39dSPavel Butsykin if (old_file_size < 0) { 4279163bc39dSPavel Butsykin error_setg_errno(errp, -old_file_size, 4280163bc39dSPavel Butsykin "Failed to inquire current file length"); 4281061ca8a3SKevin Wolf ret = old_file_size; 4282061ca8a3SKevin Wolf goto fail; 4283163bc39dSPavel Butsykin } 4284163bc39dSPavel Butsykin last_cluster = qcow2_get_last_cluster(bs, old_file_size); 4285163bc39dSPavel Butsykin if (last_cluster < 0) { 4286163bc39dSPavel Butsykin error_setg_errno(errp, -last_cluster, 4287163bc39dSPavel Butsykin "Failed to find the last cluster"); 4288061ca8a3SKevin Wolf ret = last_cluster; 4289061ca8a3SKevin Wolf goto fail; 4290163bc39dSPavel Butsykin } 4291163bc39dSPavel Butsykin if ((last_cluster + 1) * s->cluster_size < old_file_size) { 4292233521b1SMax Reitz Error *local_err = NULL; 4293233521b1SMax Reitz 4294e61a28a9SMax Reitz /* 4295e61a28a9SMax Reitz * Do not pass @exact here: It will not help the user if 4296e61a28a9SMax Reitz * we get an error here just because they wanted to shrink 4297e61a28a9SMax Reitz * their qcow2 image (on a block device) with qemu-img. 4298e61a28a9SMax Reitz * (And on the qcow2 layer, the @exact requirement is 4299e61a28a9SMax Reitz * always fulfilled, so there is no need to pass it on.) 4300e61a28a9SMax Reitz */ 4301061ca8a3SKevin Wolf bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 43027b8e4857SKevin Wolf false, PREALLOC_MODE_OFF, 0, &local_err); 4303233521b1SMax Reitz if (local_err) { 4304233521b1SMax Reitz warn_reportf_err(local_err, 4305233521b1SMax Reitz "Failed to truncate the tail of the image: "); 4306163bc39dSPavel Butsykin } 4307163bc39dSPavel Butsykin } 430846b732cdSPavel Butsykin } else { 430972893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, new_l1_size, true); 4310419b19d9SStefan Hajnoczi if (ret < 0) { 4311f59adb32SMax Reitz error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 4312061ca8a3SKevin Wolf goto fail; 4313419b19d9SStefan Hajnoczi } 431448410829SMax Reitz 431548410829SMax Reitz if (data_file_is_raw(bs) && prealloc == PREALLOC_MODE_OFF) { 431648410829SMax Reitz /* 431748410829SMax Reitz * When creating a qcow2 image with data-file-raw, we enforce 431848410829SMax Reitz * at least prealloc=metadata, so that the L1/L2 tables are 431948410829SMax Reitz * fully allocated and reading from the data file will return 432048410829SMax Reitz * the same data as reading from the qcow2 image. When the 432148410829SMax Reitz * image is grown, we must consequently preallocate the 432248410829SMax Reitz * metadata structures to cover the added area. 432348410829SMax Reitz */ 432448410829SMax Reitz prealloc = PREALLOC_MODE_METADATA; 432548410829SMax Reitz } 432646b732cdSPavel Butsykin } 4327419b19d9SStefan Hajnoczi 432895b98f34SMax Reitz switch (prealloc) { 432995b98f34SMax Reitz case PREALLOC_MODE_OFF: 4330718c0fceSKevin Wolf if (has_data_file(bs)) { 4331e61a28a9SMax Reitz /* 4332e61a28a9SMax Reitz * If the caller wants an exact resize, the external data 4333e61a28a9SMax Reitz * file should be resized to the exact target size, too, 4334e61a28a9SMax Reitz * so we pass @exact here. 4335e61a28a9SMax Reitz */ 43367b8e4857SKevin Wolf ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, 43377b8e4857SKevin Wolf errp); 4338718c0fceSKevin Wolf if (ret < 0) { 4339718c0fceSKevin Wolf goto fail; 4340718c0fceSKevin Wolf } 4341718c0fceSKevin Wolf } 434295b98f34SMax Reitz break; 434395b98f34SMax Reitz 434495b98f34SMax Reitz case PREALLOC_MODE_METADATA: 4345718c0fceSKevin Wolf ret = preallocate_co(bs, old_length, offset, prealloc, errp); 434695b98f34SMax Reitz if (ret < 0) { 4347061ca8a3SKevin Wolf goto fail; 434895b98f34SMax Reitz } 434995b98f34SMax Reitz break; 435095b98f34SMax Reitz 4351772d1f97SMax Reitz case PREALLOC_MODE_FALLOC: 4352772d1f97SMax Reitz case PREALLOC_MODE_FULL: 4353772d1f97SMax Reitz { 4354772d1f97SMax Reitz int64_t allocation_start, host_offset, guest_offset; 4355772d1f97SMax Reitz int64_t clusters_allocated; 43564b96fa38SMax Reitz int64_t old_file_size, last_cluster, new_file_size; 4357772d1f97SMax Reitz uint64_t nb_new_data_clusters, nb_new_l2_tables; 435840dee943SAlberto Garcia bool subclusters_need_allocation = false; 4359772d1f97SMax Reitz 4360966b000fSKevin Wolf /* With a data file, preallocation means just allocating the metadata 4361966b000fSKevin Wolf * and forwarding the truncate request to the data file */ 4362966b000fSKevin Wolf if (has_data_file(bs)) { 4363718c0fceSKevin Wolf ret = preallocate_co(bs, old_length, offset, prealloc, errp); 4364966b000fSKevin Wolf if (ret < 0) { 4365966b000fSKevin Wolf goto fail; 4366966b000fSKevin Wolf } 4367966b000fSKevin Wolf break; 4368966b000fSKevin Wolf } 4369966b000fSKevin Wolf 4370772d1f97SMax Reitz old_file_size = bdrv_getlength(bs->file->bs); 4371772d1f97SMax Reitz if (old_file_size < 0) { 4372772d1f97SMax Reitz error_setg_errno(errp, -old_file_size, 4373772d1f97SMax Reitz "Failed to inquire current file length"); 4374061ca8a3SKevin Wolf ret = old_file_size; 4375061ca8a3SKevin Wolf goto fail; 4376772d1f97SMax Reitz } 43774b96fa38SMax Reitz 43784b96fa38SMax Reitz last_cluster = qcow2_get_last_cluster(bs, old_file_size); 43794b96fa38SMax Reitz if (last_cluster >= 0) { 43804b96fa38SMax Reitz old_file_size = (last_cluster + 1) * s->cluster_size; 43814b96fa38SMax Reitz } else { 4382e400ad1eSMax Reitz old_file_size = ROUND_UP(old_file_size, s->cluster_size); 43834b96fa38SMax Reitz } 4384772d1f97SMax Reitz 4385a5675f39SAlberto Garcia nb_new_data_clusters = (ROUND_UP(offset, s->cluster_size) - 4386a5675f39SAlberto Garcia start_of_cluster(s, old_length)) >> s->cluster_bits; 4387772d1f97SMax Reitz 4388772d1f97SMax Reitz /* This is an overestimation; we will not actually allocate space for 4389772d1f97SMax Reitz * these in the file but just make sure the new refcount structures are 4390772d1f97SMax Reitz * able to cover them so we will not have to allocate new refblocks 4391772d1f97SMax Reitz * while entering the data blocks in the potentially new L2 tables. 4392772d1f97SMax Reitz * (We do not actually care where the L2 tables are placed. Maybe they 4393772d1f97SMax Reitz * are already allocated or they can be placed somewhere before 4394772d1f97SMax Reitz * @old_file_size. It does not matter because they will be fully 4395772d1f97SMax Reitz * allocated automatically, so they do not need to be covered by the 4396772d1f97SMax Reitz * preallocation. All that matters is that we will not have to allocate 4397772d1f97SMax Reitz * new refcount structures for them.) */ 4398772d1f97SMax Reitz nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 4399c8fd8554SAlberto Garcia s->cluster_size / l2_entry_size(s)); 4400772d1f97SMax Reitz /* The cluster range may not be aligned to L2 boundaries, so add one L2 4401772d1f97SMax Reitz * table for a potential head/tail */ 4402772d1f97SMax Reitz nb_new_l2_tables++; 4403772d1f97SMax Reitz 4404772d1f97SMax Reitz allocation_start = qcow2_refcount_area(bs, old_file_size, 4405772d1f97SMax Reitz nb_new_data_clusters + 4406772d1f97SMax Reitz nb_new_l2_tables, 4407772d1f97SMax Reitz true, 0, 0); 4408772d1f97SMax Reitz if (allocation_start < 0) { 4409772d1f97SMax Reitz error_setg_errno(errp, -allocation_start, 4410772d1f97SMax Reitz "Failed to resize refcount structures"); 4411061ca8a3SKevin Wolf ret = allocation_start; 4412061ca8a3SKevin Wolf goto fail; 4413772d1f97SMax Reitz } 4414772d1f97SMax Reitz 4415772d1f97SMax Reitz clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 4416772d1f97SMax Reitz nb_new_data_clusters); 4417772d1f97SMax Reitz if (clusters_allocated < 0) { 4418772d1f97SMax Reitz error_setg_errno(errp, -clusters_allocated, 4419772d1f97SMax Reitz "Failed to allocate data clusters"); 4420061ca8a3SKevin Wolf ret = clusters_allocated; 4421061ca8a3SKevin Wolf goto fail; 4422772d1f97SMax Reitz } 4423772d1f97SMax Reitz 4424772d1f97SMax Reitz assert(clusters_allocated == nb_new_data_clusters); 4425772d1f97SMax Reitz 4426772d1f97SMax Reitz /* Allocate the data area */ 4427772d1f97SMax Reitz new_file_size = allocation_start + 4428772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size; 4429eb8a0cf3SKevin Wolf /* 4430eb8a0cf3SKevin Wolf * Image file grows, so @exact does not matter. 4431eb8a0cf3SKevin Wolf * 4432eb8a0cf3SKevin Wolf * If we need to zero out the new area, try first whether the protocol 4433eb8a0cf3SKevin Wolf * driver can already take care of this. 4434eb8a0cf3SKevin Wolf */ 4435eb8a0cf3SKevin Wolf if (flags & BDRV_REQ_ZERO_WRITE) { 4436eb8a0cf3SKevin Wolf ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 4437eb8a0cf3SKevin Wolf BDRV_REQ_ZERO_WRITE, NULL); 4438eb8a0cf3SKevin Wolf if (ret >= 0) { 4439eb8a0cf3SKevin Wolf flags &= ~BDRV_REQ_ZERO_WRITE; 444040dee943SAlberto Garcia /* Ensure that we read zeroes and not backing file data */ 444140dee943SAlberto Garcia subclusters_need_allocation = true; 4442eb8a0cf3SKevin Wolf } 4443eb8a0cf3SKevin Wolf } else { 4444eb8a0cf3SKevin Wolf ret = -1; 4445eb8a0cf3SKevin Wolf } 4446eb8a0cf3SKevin Wolf if (ret < 0) { 44477b8e4857SKevin Wolf ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, 44487b8e4857SKevin Wolf errp); 4449eb8a0cf3SKevin Wolf } 4450772d1f97SMax Reitz if (ret < 0) { 4451772d1f97SMax Reitz error_prepend(errp, "Failed to resize underlying file: "); 4452772d1f97SMax Reitz qcow2_free_clusters(bs, allocation_start, 4453772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size, 4454772d1f97SMax Reitz QCOW2_DISCARD_OTHER); 4455061ca8a3SKevin Wolf goto fail; 4456772d1f97SMax Reitz } 4457772d1f97SMax Reitz 4458772d1f97SMax Reitz /* Create the necessary L2 entries */ 4459772d1f97SMax Reitz host_offset = allocation_start; 4460772d1f97SMax Reitz guest_offset = old_length; 4461772d1f97SMax Reitz while (nb_new_data_clusters) { 446213bec229SAlberto Garcia int64_t nb_clusters = MIN( 446313bec229SAlberto Garcia nb_new_data_clusters, 446413bec229SAlberto Garcia s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 4465a5675f39SAlberto Garcia unsigned cow_start_length = offset_into_cluster(s, guest_offset); 4466a5675f39SAlberto Garcia QCowL2Meta allocation; 4467a5675f39SAlberto Garcia guest_offset = start_of_cluster(s, guest_offset); 4468a5675f39SAlberto Garcia allocation = (QCowL2Meta) { 4469772d1f97SMax Reitz .offset = guest_offset, 4470772d1f97SMax Reitz .alloc_offset = host_offset, 4471772d1f97SMax Reitz .nb_clusters = nb_clusters, 4472a5675f39SAlberto Garcia .cow_start = { 4473a5675f39SAlberto Garcia .offset = 0, 4474a5675f39SAlberto Garcia .nb_bytes = cow_start_length, 4475a5675f39SAlberto Garcia }, 4476a5675f39SAlberto Garcia .cow_end = { 4477a5675f39SAlberto Garcia .offset = nb_clusters << s->cluster_bits, 4478a5675f39SAlberto Garcia .nb_bytes = 0, 4479a5675f39SAlberto Garcia }, 448040dee943SAlberto Garcia .prealloc = !subclusters_need_allocation, 4481772d1f97SMax Reitz }; 4482772d1f97SMax Reitz qemu_co_queue_init(&allocation.dependent_requests); 4483772d1f97SMax Reitz 4484772d1f97SMax Reitz ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 4485772d1f97SMax Reitz if (ret < 0) { 4486772d1f97SMax Reitz error_setg_errno(errp, -ret, "Failed to update L2 tables"); 4487772d1f97SMax Reitz qcow2_free_clusters(bs, host_offset, 4488772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size, 4489772d1f97SMax Reitz QCOW2_DISCARD_OTHER); 4490061ca8a3SKevin Wolf goto fail; 4491772d1f97SMax Reitz } 4492772d1f97SMax Reitz 4493772d1f97SMax Reitz guest_offset += nb_clusters * s->cluster_size; 4494772d1f97SMax Reitz host_offset += nb_clusters * s->cluster_size; 4495772d1f97SMax Reitz nb_new_data_clusters -= nb_clusters; 4496772d1f97SMax Reitz } 4497772d1f97SMax Reitz break; 4498772d1f97SMax Reitz } 4499772d1f97SMax Reitz 450095b98f34SMax Reitz default: 450195b98f34SMax Reitz g_assert_not_reached(); 450295b98f34SMax Reitz } 450395b98f34SMax Reitz 4504f01643fbSKevin Wolf if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { 4505a6841a2dSAlberto Garcia uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size); 4506f01643fbSKevin Wolf 4507f01643fbSKevin Wolf /* 4508a6841a2dSAlberto Garcia * Use zero clusters as much as we can. qcow2_subcluster_zeroize() 4509a6841a2dSAlberto Garcia * requires a subcluster-aligned start. The end may be unaligned if 4510a6841a2dSAlberto Garcia * it is at the end of the image (which it is here). 4511f01643fbSKevin Wolf */ 4512e4d7019eSAlberto Garcia if (offset > zero_start) { 4513a6841a2dSAlberto Garcia ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start, 4514a6841a2dSAlberto Garcia 0); 4515f01643fbSKevin Wolf if (ret < 0) { 4516f01643fbSKevin Wolf error_setg_errno(errp, -ret, "Failed to zero out new clusters"); 4517f01643fbSKevin Wolf goto fail; 4518f01643fbSKevin Wolf } 4519e4d7019eSAlberto Garcia } 4520f01643fbSKevin Wolf 4521f01643fbSKevin Wolf /* Write explicit zeros for the unaligned head */ 4522f01643fbSKevin Wolf if (zero_start > old_length) { 4523e4d7019eSAlberto Garcia uint64_t len = MIN(zero_start, offset) - old_length; 4524f01643fbSKevin Wolf uint8_t *buf = qemu_blockalign0(bs, len); 4525f01643fbSKevin Wolf QEMUIOVector qiov; 4526f01643fbSKevin Wolf qemu_iovec_init_buf(&qiov, buf, len); 4527f01643fbSKevin Wolf 4528f01643fbSKevin Wolf qemu_co_mutex_unlock(&s->lock); 4529f01643fbSKevin Wolf ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); 4530f01643fbSKevin Wolf qemu_co_mutex_lock(&s->lock); 4531f01643fbSKevin Wolf 4532f01643fbSKevin Wolf qemu_vfree(buf); 4533f01643fbSKevin Wolf if (ret < 0) { 4534f01643fbSKevin Wolf error_setg_errno(errp, -ret, "Failed to zero out the new area"); 4535f01643fbSKevin Wolf goto fail; 4536f01643fbSKevin Wolf } 4537f01643fbSKevin Wolf } 4538f01643fbSKevin Wolf } 4539f01643fbSKevin Wolf 454095b98f34SMax Reitz if (prealloc != PREALLOC_MODE_OFF) { 454195b98f34SMax Reitz /* Flush metadata before actually changing the image size */ 4542061ca8a3SKevin Wolf ret = qcow2_write_caches(bs); 454395b98f34SMax Reitz if (ret < 0) { 454495b98f34SMax Reitz error_setg_errno(errp, -ret, 454595b98f34SMax Reitz "Failed to flush the preallocated area to disk"); 4546061ca8a3SKevin Wolf goto fail; 454795b98f34SMax Reitz } 454895b98f34SMax Reitz } 454995b98f34SMax Reitz 455045b4949cSLeonid Bloch bs->total_sectors = offset / BDRV_SECTOR_SIZE; 455145b4949cSLeonid Bloch 4552419b19d9SStefan Hajnoczi /* write updated header.size */ 4553419b19d9SStefan Hajnoczi offset = cpu_to_be64(offset); 455432cc71deSAlberto Faria ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 455532cc71deSAlberto Faria sizeof(offset), &offset, 0); 4556419b19d9SStefan Hajnoczi if (ret < 0) { 4557f59adb32SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image size"); 4558061ca8a3SKevin Wolf goto fail; 4559419b19d9SStefan Hajnoczi } 4560419b19d9SStefan Hajnoczi 4561419b19d9SStefan Hajnoczi s->l1_vm_state_index = new_l1_size; 456245b4949cSLeonid Bloch 456345b4949cSLeonid Bloch /* Update cache sizes */ 456445b4949cSLeonid Bloch options = qdict_clone_shallow(bs->options); 456545b4949cSLeonid Bloch ret = qcow2_update_options(bs, options, s->flags, errp); 456645b4949cSLeonid Bloch qobject_unref(options); 456745b4949cSLeonid Bloch if (ret < 0) { 456845b4949cSLeonid Bloch goto fail; 456945b4949cSLeonid Bloch } 4570061ca8a3SKevin Wolf ret = 0; 4571061ca8a3SKevin Wolf fail: 4572061ca8a3SKevin Wolf qemu_co_mutex_unlock(&s->lock); 4573061ca8a3SKevin Wolf return ret; 4574419b19d9SStefan Hajnoczi } 4575419b19d9SStefan Hajnoczi 4576fcccefc5SPavel Butsykin static coroutine_fn int 45770d483dceSAndrey Shinkevich qcow2_co_pwritev_compressed_task(BlockDriverState *bs, 45785396234bSVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t bytes, 45795396234bSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, size_t qiov_offset) 458020d97356SBlue Swirl { 4581ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 45822714f13dSVladimir Sementsov-Ogievskiy int ret; 4583e1f4a37aSAlberto Garcia ssize_t out_len; 4584fcccefc5SPavel Butsykin uint8_t *buf, *out_buf; 458577e023ffSKevin Wolf uint64_t cluster_offset; 458620d97356SBlue Swirl 45870d483dceSAndrey Shinkevich assert(bytes == s->cluster_size || (bytes < s->cluster_size && 45880d483dceSAndrey Shinkevich (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS))); 45893e3b838fSAnton Nefedov 4590fcccefc5SPavel Butsykin buf = qemu_blockalign(bs, s->cluster_size); 45910d483dceSAndrey Shinkevich if (bytes < s->cluster_size) { 4592a2c0ca6fSPavel Butsykin /* Zero-pad last write if image size is not cluster aligned */ 4593a2c0ca6fSPavel Butsykin memset(buf + bytes, 0, s->cluster_size - bytes); 4594a2c0ca6fSPavel Butsykin } 45955396234bSVladimir Sementsov-Ogievskiy qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes); 459620d97356SBlue Swirl 4597ebf7bba0SVladimir Sementsov-Ogievskiy out_buf = g_malloc(s->cluster_size); 459820d97356SBlue Swirl 45996994fd78SVladimir Sementsov-Ogievskiy out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1, 46006994fd78SVladimir Sementsov-Ogievskiy buf, s->cluster_size); 4601e1f4a37aSAlberto Garcia if (out_len == -ENOMEM) { 460220d97356SBlue Swirl /* could not compress: write normal cluster */ 46035396234bSVladimir Sementsov-Ogievskiy ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0); 46048f1efd00SKevin Wolf if (ret < 0) { 46058f1efd00SKevin Wolf goto fail; 46068f1efd00SKevin Wolf } 4607fcccefc5SPavel Butsykin goto success; 4608e1f4a37aSAlberto Garcia } else if (out_len < 0) { 4609e1f4a37aSAlberto Garcia ret = -EINVAL; 4610e1f4a37aSAlberto Garcia goto fail; 4611fcccefc5SPavel Butsykin } 4612fcccefc5SPavel Butsykin 4613fcccefc5SPavel Butsykin qemu_co_mutex_lock(&s->lock); 461477e023ffSKevin Wolf ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len, 461577e023ffSKevin Wolf &cluster_offset); 461677e023ffSKevin Wolf if (ret < 0) { 4617fcccefc5SPavel Butsykin qemu_co_mutex_unlock(&s->lock); 46188f1efd00SKevin Wolf goto fail; 46198f1efd00SKevin Wolf } 4620cf93980eSMax Reitz 4621966b000fSKevin Wolf ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true); 4622fcccefc5SPavel Butsykin qemu_co_mutex_unlock(&s->lock); 4623cf93980eSMax Reitz if (ret < 0) { 4624cf93980eSMax Reitz goto fail; 4625cf93980eSMax Reitz } 4626cf93980eSMax Reitz 4627966b000fSKevin Wolf BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED); 4628b00cb15bSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0); 46298f1efd00SKevin Wolf if (ret < 0) { 46308f1efd00SKevin Wolf goto fail; 463120d97356SBlue Swirl } 4632fcccefc5SPavel Butsykin success: 46338f1efd00SKevin Wolf ret = 0; 46348f1efd00SKevin Wolf fail: 4635fcccefc5SPavel Butsykin qemu_vfree(buf); 46367267c094SAnthony Liguori g_free(out_buf); 46378f1efd00SKevin Wolf return ret; 463820d97356SBlue Swirl } 463920d97356SBlue Swirl 46400d483dceSAndrey Shinkevich static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task) 46410d483dceSAndrey Shinkevich { 46420d483dceSAndrey Shinkevich Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); 46430d483dceSAndrey Shinkevich 464410dabdc5SAlberto Garcia assert(!t->subcluster_type && !t->l2meta); 46450d483dceSAndrey Shinkevich 46460d483dceSAndrey Shinkevich return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov, 46470d483dceSAndrey Shinkevich t->qiov_offset); 46480d483dceSAndrey Shinkevich } 46490d483dceSAndrey Shinkevich 46500d483dceSAndrey Shinkevich /* 46510d483dceSAndrey Shinkevich * XXX: put compressed sectors first, then all the cluster aligned 46520d483dceSAndrey Shinkevich * tables to avoid losing bytes in alignment 46530d483dceSAndrey Shinkevich */ 46540d483dceSAndrey Shinkevich static coroutine_fn int 46550d483dceSAndrey Shinkevich qcow2_co_pwritev_compressed_part(BlockDriverState *bs, 4656e75abedaSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, 46570d483dceSAndrey Shinkevich QEMUIOVector *qiov, size_t qiov_offset) 46580d483dceSAndrey Shinkevich { 46590d483dceSAndrey Shinkevich BDRVQcow2State *s = bs->opaque; 46600d483dceSAndrey Shinkevich AioTaskPool *aio = NULL; 46610d483dceSAndrey Shinkevich int ret = 0; 46620d483dceSAndrey Shinkevich 46630d483dceSAndrey Shinkevich if (has_data_file(bs)) { 46640d483dceSAndrey Shinkevich return -ENOTSUP; 46650d483dceSAndrey Shinkevich } 46660d483dceSAndrey Shinkevich 46670d483dceSAndrey Shinkevich if (bytes == 0) { 46680d483dceSAndrey Shinkevich /* 46690d483dceSAndrey Shinkevich * align end of file to a sector boundary to ease reading with 46700d483dceSAndrey Shinkevich * sector based I/Os 46710d483dceSAndrey Shinkevich */ 46720d483dceSAndrey Shinkevich int64_t len = bdrv_getlength(bs->file->bs); 46730d483dceSAndrey Shinkevich if (len < 0) { 46740d483dceSAndrey Shinkevich return len; 46750d483dceSAndrey Shinkevich } 46767b8e4857SKevin Wolf return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, 46777b8e4857SKevin Wolf NULL); 46780d483dceSAndrey Shinkevich } 46790d483dceSAndrey Shinkevich 46800d483dceSAndrey Shinkevich if (offset_into_cluster(s, offset)) { 46810d483dceSAndrey Shinkevich return -EINVAL; 46820d483dceSAndrey Shinkevich } 46830d483dceSAndrey Shinkevich 4684fb43d2d4SAlberto Garcia if (offset_into_cluster(s, bytes) && 4685fb43d2d4SAlberto Garcia (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) { 4686fb43d2d4SAlberto Garcia return -EINVAL; 4687fb43d2d4SAlberto Garcia } 4688fb43d2d4SAlberto Garcia 46890d483dceSAndrey Shinkevich while (bytes && aio_task_pool_status(aio) == 0) { 46900d483dceSAndrey Shinkevich uint64_t chunk_size = MIN(bytes, s->cluster_size); 46910d483dceSAndrey Shinkevich 46920d483dceSAndrey Shinkevich if (!aio && chunk_size != bytes) { 46930d483dceSAndrey Shinkevich aio = aio_task_pool_new(QCOW2_MAX_WORKERS); 46940d483dceSAndrey Shinkevich } 46950d483dceSAndrey Shinkevich 46960d483dceSAndrey Shinkevich ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry, 46970d483dceSAndrey Shinkevich 0, 0, offset, chunk_size, qiov, qiov_offset, NULL); 46980d483dceSAndrey Shinkevich if (ret < 0) { 46990d483dceSAndrey Shinkevich break; 47000d483dceSAndrey Shinkevich } 47010d483dceSAndrey Shinkevich qiov_offset += chunk_size; 47020d483dceSAndrey Shinkevich offset += chunk_size; 47030d483dceSAndrey Shinkevich bytes -= chunk_size; 47040d483dceSAndrey Shinkevich } 47050d483dceSAndrey Shinkevich 47060d483dceSAndrey Shinkevich if (aio) { 47070d483dceSAndrey Shinkevich aio_task_pool_wait_all(aio); 47080d483dceSAndrey Shinkevich if (ret == 0) { 47090d483dceSAndrey Shinkevich ret = aio_task_pool_status(aio); 47100d483dceSAndrey Shinkevich } 47110d483dceSAndrey Shinkevich g_free(aio); 47120d483dceSAndrey Shinkevich } 47130d483dceSAndrey Shinkevich 47140d483dceSAndrey Shinkevich return ret; 47150d483dceSAndrey Shinkevich } 47160d483dceSAndrey Shinkevich 4717c3c10f72SVladimir Sementsov-Ogievskiy static int coroutine_fn 4718c3c10f72SVladimir Sementsov-Ogievskiy qcow2_co_preadv_compressed(BlockDriverState *bs, 47199a3978a4SVladimir Sementsov-Ogievskiy uint64_t l2_entry, 4720c3c10f72SVladimir Sementsov-Ogievskiy uint64_t offset, 4721c3c10f72SVladimir Sementsov-Ogievskiy uint64_t bytes, 4722df893d25SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 4723df893d25SVladimir Sementsov-Ogievskiy size_t qiov_offset) 4724f4b3e2a9SVladimir Sementsov-Ogievskiy { 4725f4b3e2a9SVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 4726a6e09846SVladimir Sementsov-Ogievskiy int ret = 0, csize; 4727f4b3e2a9SVladimir Sementsov-Ogievskiy uint64_t coffset; 4728c3c10f72SVladimir Sementsov-Ogievskiy uint8_t *buf, *out_buf; 4729c3c10f72SVladimir Sementsov-Ogievskiy int offset_in_cluster = offset_into_cluster(s, offset); 4730f4b3e2a9SVladimir Sementsov-Ogievskiy 4731a6e09846SVladimir Sementsov-Ogievskiy qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize); 4732f4b3e2a9SVladimir Sementsov-Ogievskiy 4733c3c10f72SVladimir Sementsov-Ogievskiy buf = g_try_malloc(csize); 4734c3c10f72SVladimir Sementsov-Ogievskiy if (!buf) { 4735f4b3e2a9SVladimir Sementsov-Ogievskiy return -ENOMEM; 4736f4b3e2a9SVladimir Sementsov-Ogievskiy } 4737c068a1cdSVladimir Sementsov-Ogievskiy 4738c3c10f72SVladimir Sementsov-Ogievskiy out_buf = qemu_blockalign(bs, s->cluster_size); 4739c3c10f72SVladimir Sementsov-Ogievskiy 4740f4b3e2a9SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 4741b00cb15bSVladimir Sementsov-Ogievskiy ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0); 4742f4b3e2a9SVladimir Sementsov-Ogievskiy if (ret < 0) { 4743c3c10f72SVladimir Sementsov-Ogievskiy goto fail; 4744c3c10f72SVladimir Sementsov-Ogievskiy } 4745c3c10f72SVladimir Sementsov-Ogievskiy 4746e23c9d7aSVladimir Sementsov-Ogievskiy if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) { 4747c3c10f72SVladimir Sementsov-Ogievskiy ret = -EIO; 4748c3c10f72SVladimir Sementsov-Ogievskiy goto fail; 4749c3c10f72SVladimir Sementsov-Ogievskiy } 4750c3c10f72SVladimir Sementsov-Ogievskiy 4751df893d25SVladimir Sementsov-Ogievskiy qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes); 4752c3c10f72SVladimir Sementsov-Ogievskiy 4753c3c10f72SVladimir Sementsov-Ogievskiy fail: 4754c3c10f72SVladimir Sementsov-Ogievskiy qemu_vfree(out_buf); 4755c3c10f72SVladimir Sementsov-Ogievskiy g_free(buf); 4756c3c10f72SVladimir Sementsov-Ogievskiy 4757f4b3e2a9SVladimir Sementsov-Ogievskiy return ret; 4758f4b3e2a9SVladimir Sementsov-Ogievskiy } 4759f4b3e2a9SVladimir Sementsov-Ogievskiy 476094054183SMax Reitz static int make_completely_empty(BlockDriverState *bs) 476194054183SMax Reitz { 4762ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4763ed3d2ec9SMax Reitz Error *local_err = NULL; 476494054183SMax Reitz int ret, l1_clusters; 476594054183SMax Reitz int64_t offset; 476694054183SMax Reitz uint64_t *new_reftable = NULL; 476794054183SMax Reitz uint64_t rt_entry, l1_size2; 476894054183SMax Reitz struct { 476994054183SMax Reitz uint64_t l1_offset; 477094054183SMax Reitz uint64_t reftable_offset; 477194054183SMax Reitz uint32_t reftable_clusters; 477294054183SMax Reitz } QEMU_PACKED l1_ofs_rt_ofs_cls; 477394054183SMax Reitz 477494054183SMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 477594054183SMax Reitz if (ret < 0) { 477694054183SMax Reitz goto fail; 477794054183SMax Reitz } 477894054183SMax Reitz 477994054183SMax Reitz ret = qcow2_cache_empty(bs, s->refcount_block_cache); 478094054183SMax Reitz if (ret < 0) { 478194054183SMax Reitz goto fail; 478294054183SMax Reitz } 478394054183SMax Reitz 478494054183SMax Reitz /* Refcounts will be broken utterly */ 478594054183SMax Reitz ret = qcow2_mark_dirty(bs); 478694054183SMax Reitz if (ret < 0) { 478794054183SMax Reitz goto fail; 478894054183SMax Reitz } 478994054183SMax Reitz 479094054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 479194054183SMax Reitz 479202b1ecfaSAlberto Garcia l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE); 479302b1ecfaSAlberto Garcia l1_size2 = (uint64_t)s->l1_size * L1E_SIZE; 479494054183SMax Reitz 479594054183SMax Reitz /* After this call, neither the in-memory nor the on-disk refcount 479694054183SMax Reitz * information accurately describe the actual references */ 479794054183SMax Reitz 4798720ff280SKevin Wolf ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 479974021bc4SEric Blake l1_clusters * s->cluster_size, 0); 480094054183SMax Reitz if (ret < 0) { 480194054183SMax Reitz goto fail_broken_refcounts; 480294054183SMax Reitz } 480394054183SMax Reitz memset(s->l1_table, 0, l1_size2); 480494054183SMax Reitz 480594054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 480694054183SMax Reitz 480794054183SMax Reitz /* Overwrite enough clusters at the beginning of the sectors to place 480894054183SMax Reitz * the refcount table, a refcount block and the L1 table in; this may 480994054183SMax Reitz * overwrite parts of the existing refcount and L1 table, which is not 481094054183SMax Reitz * an issue because the dirty flag is set, complete data loss is in fact 481194054183SMax Reitz * desired and partial data loss is consequently fine as well */ 4812720ff280SKevin Wolf ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 481374021bc4SEric Blake (2 + l1_clusters) * s->cluster_size, 0); 481494054183SMax Reitz /* This call (even if it failed overall) may have overwritten on-disk 481594054183SMax Reitz * refcount structures; in that case, the in-memory refcount information 481694054183SMax Reitz * will probably differ from the on-disk information which makes the BDS 481794054183SMax Reitz * unusable */ 481894054183SMax Reitz if (ret < 0) { 481994054183SMax Reitz goto fail_broken_refcounts; 482094054183SMax Reitz } 482194054183SMax Reitz 482294054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 482394054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 482494054183SMax Reitz 482594054183SMax Reitz /* "Create" an empty reftable (one cluster) directly after the image 482694054183SMax Reitz * header and an empty L1 table three clusters after the image header; 482794054183SMax Reitz * the cluster between those two will be used as the first refblock */ 4828f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 4829f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 4830f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 4831d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 483232cc71deSAlberto Faria sizeof(l1_ofs_rt_ofs_cls), &l1_ofs_rt_ofs_cls, 0); 483394054183SMax Reitz if (ret < 0) { 483494054183SMax Reitz goto fail_broken_refcounts; 483594054183SMax Reitz } 483694054183SMax Reitz 483794054183SMax Reitz s->l1_table_offset = 3 * s->cluster_size; 483894054183SMax Reitz 483902b1ecfaSAlberto Garcia new_reftable = g_try_new0(uint64_t, s->cluster_size / REFTABLE_ENTRY_SIZE); 484094054183SMax Reitz if (!new_reftable) { 484194054183SMax Reitz ret = -ENOMEM; 484294054183SMax Reitz goto fail_broken_refcounts; 484394054183SMax Reitz } 484494054183SMax Reitz 484594054183SMax Reitz s->refcount_table_offset = s->cluster_size; 484602b1ecfaSAlberto Garcia s->refcount_table_size = s->cluster_size / REFTABLE_ENTRY_SIZE; 48477061a078SAlberto Garcia s->max_refcount_table_index = 0; 484894054183SMax Reitz 484994054183SMax Reitz g_free(s->refcount_table); 485094054183SMax Reitz s->refcount_table = new_reftable; 485194054183SMax Reitz new_reftable = NULL; 485294054183SMax Reitz 485394054183SMax Reitz /* Now the in-memory refcount information again corresponds to the on-disk 485494054183SMax Reitz * information (reftable is empty and no refblocks (the refblock cache is 485594054183SMax Reitz * empty)); however, this means some clusters (e.g. the image header) are 485694054183SMax Reitz * referenced, but not refcounted, but the normal qcow2 code assumes that 485794054183SMax Reitz * the in-memory information is always correct */ 485894054183SMax Reitz 485994054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 486094054183SMax Reitz 486194054183SMax Reitz /* Enter the first refblock into the reftable */ 486294054183SMax Reitz rt_entry = cpu_to_be64(2 * s->cluster_size); 486332cc71deSAlberto Faria ret = bdrv_pwrite_sync(bs->file, s->cluster_size, sizeof(rt_entry), 486432cc71deSAlberto Faria &rt_entry, 0); 486594054183SMax Reitz if (ret < 0) { 486694054183SMax Reitz goto fail_broken_refcounts; 486794054183SMax Reitz } 486894054183SMax Reitz s->refcount_table[0] = 2 * s->cluster_size; 486994054183SMax Reitz 487094054183SMax Reitz s->free_cluster_index = 0; 487194054183SMax Reitz assert(3 + l1_clusters <= s->refcount_block_size); 487294054183SMax Reitz offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 487394054183SMax Reitz if (offset < 0) { 487494054183SMax Reitz ret = offset; 487594054183SMax Reitz goto fail_broken_refcounts; 487694054183SMax Reitz } else if (offset > 0) { 487794054183SMax Reitz error_report("First cluster in emptied image is in use"); 487894054183SMax Reitz abort(); 487994054183SMax Reitz } 488094054183SMax Reitz 488194054183SMax Reitz /* Now finally the in-memory information corresponds to the on-disk 488294054183SMax Reitz * structures and is correct */ 488394054183SMax Reitz ret = qcow2_mark_clean(bs); 488494054183SMax Reitz if (ret < 0) { 488594054183SMax Reitz goto fail; 488694054183SMax Reitz } 488794054183SMax Reitz 4888c80d8b06SMax Reitz ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, 48897b8e4857SKevin Wolf PREALLOC_MODE_OFF, 0, &local_err); 489094054183SMax Reitz if (ret < 0) { 4891ed3d2ec9SMax Reitz error_report_err(local_err); 489294054183SMax Reitz goto fail; 489394054183SMax Reitz } 489494054183SMax Reitz 489594054183SMax Reitz return 0; 489694054183SMax Reitz 489794054183SMax Reitz fail_broken_refcounts: 489894054183SMax Reitz /* The BDS is unusable at this point. If we wanted to make it usable, we 489994054183SMax Reitz * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 490094054183SMax Reitz * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 490194054183SMax Reitz * again. However, because the functions which could have caused this error 490294054183SMax Reitz * path to be taken are used by those functions as well, it's very likely 490394054183SMax Reitz * that that sequence will fail as well. Therefore, just eject the BDS. */ 490494054183SMax Reitz bs->drv = NULL; 490594054183SMax Reitz 490694054183SMax Reitz fail: 490794054183SMax Reitz g_free(new_reftable); 490894054183SMax Reitz return ret; 490994054183SMax Reitz } 491094054183SMax Reitz 4911491d27e2SMax Reitz static int qcow2_make_empty(BlockDriverState *bs) 4912491d27e2SMax Reitz { 4913ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4914d2cb36afSEric Blake uint64_t offset, end_offset; 4915d2cb36afSEric Blake int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 491694054183SMax Reitz int l1_clusters, ret = 0; 4917491d27e2SMax Reitz 491802b1ecfaSAlberto Garcia l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE); 491994054183SMax Reitz 49204096974eSEric Blake if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 4921f0603329SDaniel P. Berrange 3 + l1_clusters <= s->refcount_block_size && 4922db04524fSKevin Wolf s->crypt_method_header != QCOW_CRYPT_LUKS && 4923db04524fSKevin Wolf !has_data_file(bs)) { 49244096974eSEric Blake /* The following function only works for qcow2 v3 images (it 49254096974eSEric Blake * requires the dirty flag) and only as long as there are no 49264096974eSEric Blake * features that reserve extra clusters (such as snapshots, 49274096974eSEric Blake * LUKS header, or persistent bitmaps), because it completely 49284096974eSEric Blake * empties the image. Furthermore, the L1 table and three 49294096974eSEric Blake * additional clusters (image header, refcount table, one 4930db04524fSKevin Wolf * refcount block) have to fit inside one refcount block. It 4931db04524fSKevin Wolf * only resets the image file, i.e. does not work with an 4932db04524fSKevin Wolf * external data file. */ 493394054183SMax Reitz return make_completely_empty(bs); 493494054183SMax Reitz } 493594054183SMax Reitz 493694054183SMax Reitz /* This fallback code simply discards every active cluster; this is slow, 493794054183SMax Reitz * but works in all cases */ 4938d2cb36afSEric Blake end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 4939d2cb36afSEric Blake for (offset = 0; offset < end_offset; offset += step) { 4940491d27e2SMax Reitz /* As this function is generally used after committing an external 4941491d27e2SMax Reitz * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 4942491d27e2SMax Reitz * default action for this kind of discard is to pass the discard, 4943491d27e2SMax Reitz * which will ideally result in an actually smaller image file, as 4944491d27e2SMax Reitz * is probably desired. */ 4945d2cb36afSEric Blake ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 4946491d27e2SMax Reitz QCOW2_DISCARD_SNAPSHOT, true); 4947491d27e2SMax Reitz if (ret < 0) { 4948491d27e2SMax Reitz break; 4949491d27e2SMax Reitz } 4950491d27e2SMax Reitz } 4951491d27e2SMax Reitz 4952491d27e2SMax Reitz return ret; 4953491d27e2SMax Reitz } 4954491d27e2SMax Reitz 4955a968168cSDong Xu Wang static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 495620d97356SBlue Swirl { 4957ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 495829c1a730SKevin Wolf int ret; 495929c1a730SKevin Wolf 49608b94ff85SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 49618b220eb7SPaolo Bonzini ret = qcow2_write_caches(bs); 49628b94ff85SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 496329c1a730SKevin Wolf 49648b220eb7SPaolo Bonzini return ret; 4965eb489bb1SKevin Wolf } 4966eb489bb1SKevin Wolf 4967c501c352SStefan Hajnoczi static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 4968c501c352SStefan Hajnoczi Error **errp) 4969c501c352SStefan Hajnoczi { 4970c501c352SStefan Hajnoczi Error *local_err = NULL; 4971c501c352SStefan Hajnoczi BlockMeasureInfo *info; 4972c501c352SStefan Hajnoczi uint64_t required = 0; /* bytes that contribute to required size */ 4973c501c352SStefan Hajnoczi uint64_t virtual_size; /* disk size as seen by guest */ 4974c501c352SStefan Hajnoczi uint64_t refcount_bits; 4975c501c352SStefan Hajnoczi uint64_t l2_tables; 497661914f89SStefan Hajnoczi uint64_t luks_payload_size = 0; 4977c501c352SStefan Hajnoczi size_t cluster_size; 4978c501c352SStefan Hajnoczi int version; 4979c501c352SStefan Hajnoczi char *optstr; 4980c501c352SStefan Hajnoczi PreallocMode prealloc; 4981c501c352SStefan Hajnoczi bool has_backing_file; 498261914f89SStefan Hajnoczi bool has_luks; 49837be20252SAlberto Garcia bool extended_l2; 49840dd07b29SAlberto Garcia size_t l2e_size; 4985c501c352SStefan Hajnoczi 4986c501c352SStefan Hajnoczi /* Parse image creation options */ 49877be20252SAlberto Garcia extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false); 49887be20252SAlberto Garcia 49897be20252SAlberto Garcia cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2, 49907be20252SAlberto Garcia &local_err); 4991c501c352SStefan Hajnoczi if (local_err) { 4992c501c352SStefan Hajnoczi goto err; 4993c501c352SStefan Hajnoczi } 4994c501c352SStefan Hajnoczi 4995c501c352SStefan Hajnoczi version = qcow2_opt_get_version_del(opts, &local_err); 4996c501c352SStefan Hajnoczi if (local_err) { 4997c501c352SStefan Hajnoczi goto err; 4998c501c352SStefan Hajnoczi } 4999c501c352SStefan Hajnoczi 5000c501c352SStefan Hajnoczi refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 5001c501c352SStefan Hajnoczi if (local_err) { 5002c501c352SStefan Hajnoczi goto err; 5003c501c352SStefan Hajnoczi } 5004c501c352SStefan Hajnoczi 5005c501c352SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 5006f7abe0ecSMarc-André Lureau prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 500706c60b6cSMarkus Armbruster PREALLOC_MODE_OFF, &local_err); 5008c501c352SStefan Hajnoczi g_free(optstr); 5009c501c352SStefan Hajnoczi if (local_err) { 5010c501c352SStefan Hajnoczi goto err; 5011c501c352SStefan Hajnoczi } 5012c501c352SStefan Hajnoczi 5013c501c352SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 5014c501c352SStefan Hajnoczi has_backing_file = !!optstr; 5015c501c352SStefan Hajnoczi g_free(optstr); 5016c501c352SStefan Hajnoczi 501761914f89SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); 501861914f89SStefan Hajnoczi has_luks = optstr && strcmp(optstr, "luks") == 0; 501961914f89SStefan Hajnoczi g_free(optstr); 502061914f89SStefan Hajnoczi 502161914f89SStefan Hajnoczi if (has_luks) { 50226d49d3a8SStefan Hajnoczi g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL; 502390766d9dSMaxim Levitsky QDict *cryptoopts = qcow2_extract_crypto_opts(opts, "luks", errp); 502461914f89SStefan Hajnoczi size_t headerlen; 502561914f89SStefan Hajnoczi 50266d49d3a8SStefan Hajnoczi create_opts = block_crypto_create_opts_init(cryptoopts, errp); 50276d49d3a8SStefan Hajnoczi qobject_unref(cryptoopts); 50286d49d3a8SStefan Hajnoczi if (!create_opts) { 50296d49d3a8SStefan Hajnoczi goto err; 50306d49d3a8SStefan Hajnoczi } 50316d49d3a8SStefan Hajnoczi 50326d49d3a8SStefan Hajnoczi if (!qcrypto_block_calculate_payload_offset(create_opts, 50336d49d3a8SStefan Hajnoczi "encrypt.", 50346d49d3a8SStefan Hajnoczi &headerlen, 50356d49d3a8SStefan Hajnoczi &local_err)) { 503661914f89SStefan Hajnoczi goto err; 503761914f89SStefan Hajnoczi } 503861914f89SStefan Hajnoczi 503961914f89SStefan Hajnoczi luks_payload_size = ROUND_UP(headerlen, cluster_size); 504061914f89SStefan Hajnoczi } 504161914f89SStefan Hajnoczi 50429e029689SAlberto Garcia virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 50439e029689SAlberto Garcia virtual_size = ROUND_UP(virtual_size, cluster_size); 5044c501c352SStefan Hajnoczi 5045c501c352SStefan Hajnoczi /* Check that virtual disk size is valid */ 50460dd07b29SAlberto Garcia l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL; 5047c501c352SStefan Hajnoczi l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 50480dd07b29SAlberto Garcia cluster_size / l2e_size); 504902b1ecfaSAlberto Garcia if (l2_tables * L1E_SIZE > QCOW_MAX_L1_SIZE) { 5050c501c352SStefan Hajnoczi error_setg(&local_err, "The image size is too large " 5051c501c352SStefan Hajnoczi "(try using a larger cluster size)"); 5052c501c352SStefan Hajnoczi goto err; 5053c501c352SStefan Hajnoczi } 5054c501c352SStefan Hajnoczi 5055c501c352SStefan Hajnoczi /* Account for input image */ 5056c501c352SStefan Hajnoczi if (in_bs) { 5057c501c352SStefan Hajnoczi int64_t ssize = bdrv_getlength(in_bs); 5058c501c352SStefan Hajnoczi if (ssize < 0) { 5059c501c352SStefan Hajnoczi error_setg_errno(&local_err, -ssize, 5060c501c352SStefan Hajnoczi "Unable to get image virtual_size"); 5061c501c352SStefan Hajnoczi goto err; 5062c501c352SStefan Hajnoczi } 5063c501c352SStefan Hajnoczi 50649e029689SAlberto Garcia virtual_size = ROUND_UP(ssize, cluster_size); 5065c501c352SStefan Hajnoczi 5066c501c352SStefan Hajnoczi if (has_backing_file) { 5067c501c352SStefan Hajnoczi /* We don't how much of the backing chain is shared by the input 5068c501c352SStefan Hajnoczi * image and the new image file. In the worst case the new image's 5069c501c352SStefan Hajnoczi * backing file has nothing in common with the input image. Be 5070c501c352SStefan Hajnoczi * conservative and assume all clusters need to be written. 5071c501c352SStefan Hajnoczi */ 5072c501c352SStefan Hajnoczi required = virtual_size; 5073c501c352SStefan Hajnoczi } else { 5074b85ee453SEric Blake int64_t offset; 507531826642SEric Blake int64_t pnum = 0; 5076c501c352SStefan Hajnoczi 507731826642SEric Blake for (offset = 0; offset < ssize; offset += pnum) { 507831826642SEric Blake int ret; 5079c501c352SStefan Hajnoczi 508031826642SEric Blake ret = bdrv_block_status_above(in_bs, NULL, offset, 508131826642SEric Blake ssize - offset, &pnum, NULL, 508231826642SEric Blake NULL); 5083c501c352SStefan Hajnoczi if (ret < 0) { 5084c501c352SStefan Hajnoczi error_setg_errno(&local_err, -ret, 5085c501c352SStefan Hajnoczi "Unable to get block status"); 5086c501c352SStefan Hajnoczi goto err; 5087c501c352SStefan Hajnoczi } 5088c501c352SStefan Hajnoczi 5089c501c352SStefan Hajnoczi if (ret & BDRV_BLOCK_ZERO) { 5090c501c352SStefan Hajnoczi /* Skip zero regions (safe with no backing file) */ 5091c501c352SStefan Hajnoczi } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 5092c501c352SStefan Hajnoczi (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 5093c501c352SStefan Hajnoczi /* Extend pnum to end of cluster for next iteration */ 509431826642SEric Blake pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 5095c501c352SStefan Hajnoczi 5096c501c352SStefan Hajnoczi /* Count clusters we've seen */ 509731826642SEric Blake required += offset % cluster_size + pnum; 5098c501c352SStefan Hajnoczi } 5099c501c352SStefan Hajnoczi } 5100c501c352SStefan Hajnoczi } 5101c501c352SStefan Hajnoczi } 5102c501c352SStefan Hajnoczi 5103c501c352SStefan Hajnoczi /* Take into account preallocation. Nothing special is needed for 5104c501c352SStefan Hajnoczi * PREALLOC_MODE_METADATA since metadata is always counted. 5105c501c352SStefan Hajnoczi */ 5106c501c352SStefan Hajnoczi if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 5107c501c352SStefan Hajnoczi required = virtual_size; 5108c501c352SStefan Hajnoczi } 5109c501c352SStefan Hajnoczi 51105d72c68bSEric Blake info = g_new0(BlockMeasureInfo, 1); 51110dd07b29SAlberto Garcia info->fully_allocated = luks_payload_size + 5112c501c352SStefan Hajnoczi qcow2_calc_prealloc_size(virtual_size, cluster_size, 51130dd07b29SAlberto Garcia ctz32(refcount_bits), extended_l2); 5114c501c352SStefan Hajnoczi 51155d72c68bSEric Blake /* 51165d72c68bSEric Blake * Remove data clusters that are not required. This overestimates the 5117c501c352SStefan Hajnoczi * required size because metadata needed for the fully allocated file is 51185d72c68bSEric Blake * still counted. Show bitmaps only if both source and destination 51195d72c68bSEric Blake * would support them. 5120c501c352SStefan Hajnoczi */ 5121c501c352SStefan Hajnoczi info->required = info->fully_allocated - virtual_size + required; 51225d72c68bSEric Blake info->has_bitmaps = version >= 3 && in_bs && 51235d72c68bSEric Blake bdrv_supports_persistent_dirty_bitmap(in_bs); 51245d72c68bSEric Blake if (info->has_bitmaps) { 51255d72c68bSEric Blake info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs, 51265d72c68bSEric Blake cluster_size); 51275d72c68bSEric Blake } 5128c501c352SStefan Hajnoczi return info; 5129c501c352SStefan Hajnoczi 5130c501c352SStefan Hajnoczi err: 5131c501c352SStefan Hajnoczi error_propagate(errp, local_err); 5132c501c352SStefan Hajnoczi return NULL; 5133c501c352SStefan Hajnoczi } 5134c501c352SStefan Hajnoczi 51357c80ab3fSJes Sorensen static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 513620d97356SBlue Swirl { 5137ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 513820d97356SBlue Swirl bdi->cluster_size = s->cluster_size; 51397c80ab3fSJes Sorensen bdi->vm_state_offset = qcow2_vm_state_offset(s); 514038b44096SVladimir Sementsov-Ogievskiy bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY; 514120d97356SBlue Swirl return 0; 514220d97356SBlue Swirl } 514320d97356SBlue Swirl 51441bf6e9caSAndrey Shinkevich static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, 51451bf6e9caSAndrey Shinkevich Error **errp) 514637764dfbSMax Reitz { 5147ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 51480a12f6f8SDaniel P. Berrange ImageInfoSpecific *spec_info; 51490a12f6f8SDaniel P. Berrange QCryptoBlockInfo *encrypt_info = NULL; 515037764dfbSMax Reitz 51510a12f6f8SDaniel P. Berrange if (s->crypto != NULL) { 515283bad8cbSVladimir Sementsov-Ogievskiy encrypt_info = qcrypto_block_get_info(s->crypto, errp); 515383bad8cbSVladimir Sementsov-Ogievskiy if (!encrypt_info) { 51541bf6e9caSAndrey Shinkevich return NULL; 51551bf6e9caSAndrey Shinkevich } 51560a12f6f8SDaniel P. Berrange } 51570a12f6f8SDaniel P. Berrange 51580a12f6f8SDaniel P. Berrange spec_info = g_new(ImageInfoSpecific, 1); 515937764dfbSMax Reitz *spec_info = (ImageInfoSpecific){ 51606a8f9661SEric Blake .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 5161b8968c87SAndrey Shinkevich .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1), 516237764dfbSMax Reitz }; 516337764dfbSMax Reitz if (s->qcow_version == 2) { 516432bafa8fSEric Blake *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 516537764dfbSMax Reitz .compat = g_strdup("0.10"), 51660709c5a1SMax Reitz .refcount_bits = s->refcount_bits, 516737764dfbSMax Reitz }; 516837764dfbSMax Reitz } else if (s->qcow_version == 3) { 5169b8968c87SAndrey Shinkevich Qcow2BitmapInfoList *bitmaps; 517083bad8cbSVladimir Sementsov-Ogievskiy if (!qcow2_get_bitmap_info_list(bs, &bitmaps, errp)) { 5171b8968c87SAndrey Shinkevich qapi_free_ImageInfoSpecific(spec_info); 517271eaec2eSEric Blake qapi_free_QCryptoBlockInfo(encrypt_info); 5173b8968c87SAndrey Shinkevich return NULL; 5174b8968c87SAndrey Shinkevich } 517532bafa8fSEric Blake *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 517637764dfbSMax Reitz .compat = g_strdup("1.1"), 517737764dfbSMax Reitz .lazy_refcounts = s->compatible_features & 517837764dfbSMax Reitz QCOW2_COMPAT_LAZY_REFCOUNTS, 517937764dfbSMax Reitz .has_lazy_refcounts = true, 51809009b196SMax Reitz .corrupt = s->incompatible_features & 51819009b196SMax Reitz QCOW2_INCOMPAT_CORRUPT, 51829009b196SMax Reitz .has_corrupt = true, 51837be20252SAlberto Garcia .has_extended_l2 = true, 51847be20252SAlberto Garcia .extended_l2 = has_subclusters(s), 51850709c5a1SMax Reitz .refcount_bits = s->refcount_bits, 5186b8968c87SAndrey Shinkevich .has_bitmaps = !!bitmaps, 5187b8968c87SAndrey Shinkevich .bitmaps = bitmaps, 51889b890bdcSKevin Wolf .has_data_file = !!s->image_data_file, 51899b890bdcSKevin Wolf .data_file = g_strdup(s->image_data_file), 51906c3944dcSKevin Wolf .has_data_file_raw = has_data_file(bs), 51916c3944dcSKevin Wolf .data_file_raw = data_file_is_raw(bs), 5192572ad978SDenis Plotnikov .compression_type = s->compression_type, 519337764dfbSMax Reitz }; 5194b1fc8f93SDenis V. Lunev } else { 5195b1fc8f93SDenis V. Lunev /* if this assertion fails, this probably means a new version was 5196b1fc8f93SDenis V. Lunev * added without having it covered here */ 5197b1fc8f93SDenis V. Lunev assert(false); 519837764dfbSMax Reitz } 519937764dfbSMax Reitz 52000a12f6f8SDaniel P. Berrange if (encrypt_info) { 52010a12f6f8SDaniel P. Berrange ImageInfoSpecificQCow2Encryption *qencrypt = 52020a12f6f8SDaniel P. Berrange g_new(ImageInfoSpecificQCow2Encryption, 1); 52030a12f6f8SDaniel P. Berrange switch (encrypt_info->format) { 52040a12f6f8SDaniel P. Berrange case Q_CRYPTO_BLOCK_FORMAT_QCOW: 52050a12f6f8SDaniel P. Berrange qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 52060a12f6f8SDaniel P. Berrange break; 52070a12f6f8SDaniel P. Berrange case Q_CRYPTO_BLOCK_FORMAT_LUKS: 52080a12f6f8SDaniel P. Berrange qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 52090a12f6f8SDaniel P. Berrange qencrypt->u.luks = encrypt_info->u.luks; 52100a12f6f8SDaniel P. Berrange break; 52110a12f6f8SDaniel P. Berrange default: 52120a12f6f8SDaniel P. Berrange abort(); 52130a12f6f8SDaniel P. Berrange } 52140a12f6f8SDaniel P. Berrange /* Since we did shallow copy above, erase any pointers 52150a12f6f8SDaniel P. Berrange * in the original info */ 52160a12f6f8SDaniel P. Berrange memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 52170a12f6f8SDaniel P. Berrange qapi_free_QCryptoBlockInfo(encrypt_info); 52180a12f6f8SDaniel P. Berrange 52190a12f6f8SDaniel P. Berrange spec_info->u.qcow2.data->has_encrypt = true; 52200a12f6f8SDaniel P. Berrange spec_info->u.qcow2.data->encrypt = qencrypt; 52210a12f6f8SDaniel P. Berrange } 52220a12f6f8SDaniel P. Berrange 522337764dfbSMax Reitz return spec_info; 522437764dfbSMax Reitz } 522537764dfbSMax Reitz 522638841dcdSMax Reitz static int qcow2_has_zero_init(BlockDriverState *bs) 522738841dcdSMax Reitz { 522838841dcdSMax Reitz BDRVQcow2State *s = bs->opaque; 522938841dcdSMax Reitz bool preallocated; 523038841dcdSMax Reitz 523138841dcdSMax Reitz if (qemu_in_coroutine()) { 523238841dcdSMax Reitz qemu_co_mutex_lock(&s->lock); 523338841dcdSMax Reitz } 523438841dcdSMax Reitz /* 523538841dcdSMax Reitz * Check preallocation status: Preallocated images have all L2 523638841dcdSMax Reitz * tables allocated, nonpreallocated images have none. It is 523738841dcdSMax Reitz * therefore enough to check the first one. 523838841dcdSMax Reitz */ 523938841dcdSMax Reitz preallocated = s->l1_size > 0 && s->l1_table[0] != 0; 524038841dcdSMax Reitz if (qemu_in_coroutine()) { 524138841dcdSMax Reitz qemu_co_mutex_unlock(&s->lock); 524238841dcdSMax Reitz } 524338841dcdSMax Reitz 524438841dcdSMax Reitz if (!preallocated) { 524538841dcdSMax Reitz return 1; 524638841dcdSMax Reitz } else if (bs->encrypted) { 524738841dcdSMax Reitz return 0; 524838841dcdSMax Reitz } else { 524938841dcdSMax Reitz return bdrv_has_zero_init(s->data_file->bs); 525038841dcdSMax Reitz } 525138841dcdSMax Reitz } 525238841dcdSMax Reitz 5253558902ccSVladimir Sementsov-Ogievskiy /* 5254558902ccSVladimir Sementsov-Ogievskiy * Check the request to vmstate. On success return 5255558902ccSVladimir Sementsov-Ogievskiy * qcow2_vm_state_offset(bs) + @pos 5256558902ccSVladimir Sementsov-Ogievskiy */ 5257558902ccSVladimir Sementsov-Ogievskiy static int64_t qcow2_check_vmstate_request(BlockDriverState *bs, 5258558902ccSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, int64_t pos) 5259558902ccSVladimir Sementsov-Ogievskiy { 5260558902ccSVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 5261558902ccSVladimir Sementsov-Ogievskiy int64_t vmstate_offset = qcow2_vm_state_offset(s); 5262558902ccSVladimir Sementsov-Ogievskiy int ret; 5263558902ccSVladimir Sementsov-Ogievskiy 5264558902ccSVladimir Sementsov-Ogievskiy /* Incoming requests must be OK */ 5265558902ccSVladimir Sementsov-Ogievskiy bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort); 5266558902ccSVladimir Sementsov-Ogievskiy 5267558902ccSVladimir Sementsov-Ogievskiy if (INT64_MAX - pos < vmstate_offset) { 5268558902ccSVladimir Sementsov-Ogievskiy return -EIO; 5269558902ccSVladimir Sementsov-Ogievskiy } 5270558902ccSVladimir Sementsov-Ogievskiy 5271558902ccSVladimir Sementsov-Ogievskiy pos += vmstate_offset; 5272558902ccSVladimir Sementsov-Ogievskiy ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); 5273558902ccSVladimir Sementsov-Ogievskiy if (ret < 0) { 5274558902ccSVladimir Sementsov-Ogievskiy return ret; 5275558902ccSVladimir Sementsov-Ogievskiy } 5276558902ccSVladimir Sementsov-Ogievskiy 5277558902ccSVladimir Sementsov-Ogievskiy return pos; 5278558902ccSVladimir Sementsov-Ogievskiy } 5279558902ccSVladimir Sementsov-Ogievskiy 5280cf8074b3SKevin Wolf static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 5281cf8074b3SKevin Wolf int64_t pos) 528220d97356SBlue Swirl { 5283558902ccSVladimir Sementsov-Ogievskiy int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); 5284558902ccSVladimir Sementsov-Ogievskiy if (offset < 0) { 5285558902ccSVladimir Sementsov-Ogievskiy return offset; 5286558902ccSVladimir Sementsov-Ogievskiy } 528720d97356SBlue Swirl 528866f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 5289558902ccSVladimir Sementsov-Ogievskiy return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0); 529020d97356SBlue Swirl } 529120d97356SBlue Swirl 52925ddda0b8SKevin Wolf static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 52935ddda0b8SKevin Wolf int64_t pos) 529420d97356SBlue Swirl { 5295558902ccSVladimir Sementsov-Ogievskiy int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos); 5296558902ccSVladimir Sementsov-Ogievskiy if (offset < 0) { 5297558902ccSVladimir Sementsov-Ogievskiy return offset; 5298558902ccSVladimir Sementsov-Ogievskiy } 529920d97356SBlue Swirl 530066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 5301558902ccSVladimir Sementsov-Ogievskiy return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0); 530220d97356SBlue Swirl } 530320d97356SBlue Swirl 5304083c2456SVladimir Sementsov-Ogievskiy static int qcow2_has_compressed_clusters(BlockDriverState *bs) 5305083c2456SVladimir Sementsov-Ogievskiy { 5306083c2456SVladimir Sementsov-Ogievskiy int64_t offset = 0; 5307083c2456SVladimir Sementsov-Ogievskiy int64_t bytes = bdrv_getlength(bs); 5308083c2456SVladimir Sementsov-Ogievskiy 5309083c2456SVladimir Sementsov-Ogievskiy if (bytes < 0) { 5310083c2456SVladimir Sementsov-Ogievskiy return bytes; 5311083c2456SVladimir Sementsov-Ogievskiy } 5312083c2456SVladimir Sementsov-Ogievskiy 5313083c2456SVladimir Sementsov-Ogievskiy while (bytes != 0) { 5314083c2456SVladimir Sementsov-Ogievskiy int ret; 5315083c2456SVladimir Sementsov-Ogievskiy QCow2SubclusterType type; 5316083c2456SVladimir Sementsov-Ogievskiy unsigned int cur_bytes = MIN(INT_MAX, bytes); 5317083c2456SVladimir Sementsov-Ogievskiy uint64_t host_offset; 5318083c2456SVladimir Sementsov-Ogievskiy 5319083c2456SVladimir Sementsov-Ogievskiy ret = qcow2_get_host_offset(bs, offset, &cur_bytes, &host_offset, 5320083c2456SVladimir Sementsov-Ogievskiy &type); 5321083c2456SVladimir Sementsov-Ogievskiy if (ret < 0) { 5322083c2456SVladimir Sementsov-Ogievskiy return ret; 5323083c2456SVladimir Sementsov-Ogievskiy } 5324083c2456SVladimir Sementsov-Ogievskiy 5325083c2456SVladimir Sementsov-Ogievskiy if (type == QCOW2_SUBCLUSTER_COMPRESSED) { 5326083c2456SVladimir Sementsov-Ogievskiy return 1; 5327083c2456SVladimir Sementsov-Ogievskiy } 5328083c2456SVladimir Sementsov-Ogievskiy 5329083c2456SVladimir Sementsov-Ogievskiy offset += cur_bytes; 5330083c2456SVladimir Sementsov-Ogievskiy bytes -= cur_bytes; 5331083c2456SVladimir Sementsov-Ogievskiy } 5332083c2456SVladimir Sementsov-Ogievskiy 5333083c2456SVladimir Sementsov-Ogievskiy return 0; 5334083c2456SVladimir Sementsov-Ogievskiy } 5335083c2456SVladimir Sementsov-Ogievskiy 53369296b3edSMax Reitz /* 53379296b3edSMax Reitz * Downgrades an image's version. To achieve this, any incompatible features 53389296b3edSMax Reitz * have to be removed. 53399296b3edSMax Reitz */ 53404057a2b2SMax Reitz static int qcow2_downgrade(BlockDriverState *bs, int target_version, 5341d1402b50SMax Reitz BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 5342d1402b50SMax Reitz Error **errp) 53439296b3edSMax Reitz { 5344ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 53459296b3edSMax Reitz int current_version = s->qcow_version; 53469296b3edSMax Reitz int ret; 53477fa140abSEric Blake int i; 53489296b3edSMax Reitz 5349d1402b50SMax Reitz /* This is qcow2_downgrade(), not qcow2_upgrade() */ 5350d1402b50SMax Reitz assert(target_version < current_version); 5351d1402b50SMax Reitz 5352d1402b50SMax Reitz /* There are no other versions (now) that you can downgrade to */ 5353d1402b50SMax Reitz assert(target_version == 2); 53549296b3edSMax Reitz 53559296b3edSMax Reitz if (s->refcount_order != 4) { 5356d1402b50SMax Reitz error_setg(errp, "compat=0.10 requires refcount_bits=16"); 53579296b3edSMax Reitz return -ENOTSUP; 53589296b3edSMax Reitz } 53599296b3edSMax Reitz 5360966b000fSKevin Wolf if (has_data_file(bs)) { 5361966b000fSKevin Wolf error_setg(errp, "Cannot downgrade an image with a data file"); 5362966b000fSKevin Wolf return -ENOTSUP; 5363966b000fSKevin Wolf } 5364966b000fSKevin Wolf 53657fa140abSEric Blake /* 53667fa140abSEric Blake * If any internal snapshot has a different size than the current 53677fa140abSEric Blake * image size, or VM state size that exceeds 32 bits, downgrading 53687fa140abSEric Blake * is unsafe. Even though we would still use v3-compliant output 53697fa140abSEric Blake * to preserve that data, other v2 programs might not realize 53707fa140abSEric Blake * those optional fields are important. 53717fa140abSEric Blake */ 53727fa140abSEric Blake for (i = 0; i < s->nb_snapshots; i++) { 53737fa140abSEric Blake if (s->snapshots[i].vm_state_size > UINT32_MAX || 53747fa140abSEric Blake s->snapshots[i].disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { 53757fa140abSEric Blake error_setg(errp, "Internal snapshots prevent downgrade of image"); 53767fa140abSEric Blake return -ENOTSUP; 53777fa140abSEric Blake } 53787fa140abSEric Blake } 53797fa140abSEric Blake 53809296b3edSMax Reitz /* clear incompatible features */ 53819296b3edSMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 53829296b3edSMax Reitz ret = qcow2_mark_clean(bs); 53839296b3edSMax Reitz if (ret < 0) { 5384d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to make the image clean"); 53859296b3edSMax Reitz return ret; 53869296b3edSMax Reitz } 53879296b3edSMax Reitz } 53889296b3edSMax Reitz 53899296b3edSMax Reitz /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 53909296b3edSMax Reitz * the first place; if that happens nonetheless, returning -ENOTSUP is the 53919296b3edSMax Reitz * best thing to do anyway */ 53929296b3edSMax Reitz 5393083c2456SVladimir Sementsov-Ogievskiy if (s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION) { 5394d1402b50SMax Reitz error_setg(errp, "Cannot downgrade an image with incompatible features " 5395083c2456SVladimir Sementsov-Ogievskiy "0x%" PRIx64 " set", 5396083c2456SVladimir Sementsov-Ogievskiy s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION); 53979296b3edSMax Reitz return -ENOTSUP; 53989296b3edSMax Reitz } 53999296b3edSMax Reitz 54009296b3edSMax Reitz /* since we can ignore compatible features, we can set them to 0 as well */ 54019296b3edSMax Reitz s->compatible_features = 0; 54029296b3edSMax Reitz /* if lazy refcounts have been used, they have already been fixed through 54039296b3edSMax Reitz * clearing the dirty flag */ 54049296b3edSMax Reitz 54059296b3edSMax Reitz /* clearing autoclear features is trivial */ 54069296b3edSMax Reitz s->autoclear_features = 0; 54079296b3edSMax Reitz 54088b13976dSMax Reitz ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 54099296b3edSMax Reitz if (ret < 0) { 5410d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); 54119296b3edSMax Reitz return ret; 54129296b3edSMax Reitz } 54139296b3edSMax Reitz 5414083c2456SVladimir Sementsov-Ogievskiy if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) { 5415083c2456SVladimir Sementsov-Ogievskiy ret = qcow2_has_compressed_clusters(bs); 5416083c2456SVladimir Sementsov-Ogievskiy if (ret < 0) { 5417083c2456SVladimir Sementsov-Ogievskiy error_setg(errp, "Failed to check block status"); 5418083c2456SVladimir Sementsov-Ogievskiy return -EINVAL; 5419083c2456SVladimir Sementsov-Ogievskiy } 5420083c2456SVladimir Sementsov-Ogievskiy if (ret) { 5421083c2456SVladimir Sementsov-Ogievskiy error_setg(errp, "Cannot downgrade an image with zstd compression " 5422083c2456SVladimir Sementsov-Ogievskiy "type and existing compressed clusters"); 5423083c2456SVladimir Sementsov-Ogievskiy return -ENOTSUP; 5424083c2456SVladimir Sementsov-Ogievskiy } 5425083c2456SVladimir Sementsov-Ogievskiy /* 5426083c2456SVladimir Sementsov-Ogievskiy * No compressed clusters for now, so just chose default zlib 5427083c2456SVladimir Sementsov-Ogievskiy * compression. 5428083c2456SVladimir Sementsov-Ogievskiy */ 5429083c2456SVladimir Sementsov-Ogievskiy s->incompatible_features &= ~QCOW2_INCOMPAT_COMPRESSION; 5430083c2456SVladimir Sementsov-Ogievskiy s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; 5431083c2456SVladimir Sementsov-Ogievskiy } 5432083c2456SVladimir Sementsov-Ogievskiy 5433083c2456SVladimir Sementsov-Ogievskiy assert(s->incompatible_features == 0); 5434083c2456SVladimir Sementsov-Ogievskiy 54359296b3edSMax Reitz s->qcow_version = target_version; 54369296b3edSMax Reitz ret = qcow2_update_header(bs); 54379296b3edSMax Reitz if (ret < 0) { 54389296b3edSMax Reitz s->qcow_version = current_version; 5439d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 54409296b3edSMax Reitz return ret; 54419296b3edSMax Reitz } 54429296b3edSMax Reitz return 0; 54439296b3edSMax Reitz } 54449296b3edSMax Reitz 5445722efb0cSMax Reitz /* 5446722efb0cSMax Reitz * Upgrades an image's version. While newer versions encompass all 5447722efb0cSMax Reitz * features of older versions, some things may have to be presented 5448722efb0cSMax Reitz * differently. 5449722efb0cSMax Reitz */ 5450722efb0cSMax Reitz static int qcow2_upgrade(BlockDriverState *bs, int target_version, 5451722efb0cSMax Reitz BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 5452722efb0cSMax Reitz Error **errp) 5453722efb0cSMax Reitz { 5454722efb0cSMax Reitz BDRVQcow2State *s = bs->opaque; 54550a85af35SMax Reitz bool need_snapshot_update; 5456722efb0cSMax Reitz int current_version = s->qcow_version; 54570a85af35SMax Reitz int i; 5458722efb0cSMax Reitz int ret; 5459722efb0cSMax Reitz 5460722efb0cSMax Reitz /* This is qcow2_upgrade(), not qcow2_downgrade() */ 5461722efb0cSMax Reitz assert(target_version > current_version); 5462722efb0cSMax Reitz 5463722efb0cSMax Reitz /* There are no other versions (yet) that you can upgrade to */ 5464722efb0cSMax Reitz assert(target_version == 3); 5465722efb0cSMax Reitz 54660a85af35SMax Reitz status_cb(bs, 0, 2, cb_opaque); 54670a85af35SMax Reitz 54680a85af35SMax Reitz /* 54690a85af35SMax Reitz * In v2, snapshots do not need to have extra data. v3 requires 54700a85af35SMax Reitz * the 64-bit VM state size and the virtual disk size to be 54710a85af35SMax Reitz * present. 54720a85af35SMax Reitz * qcow2_write_snapshots() will always write the list in the 54730a85af35SMax Reitz * v3-compliant format. 54740a85af35SMax Reitz */ 54750a85af35SMax Reitz need_snapshot_update = false; 54760a85af35SMax Reitz for (i = 0; i < s->nb_snapshots; i++) { 54770a85af35SMax Reitz if (s->snapshots[i].extra_data_size < 54780a85af35SMax Reitz sizeof_field(QCowSnapshotExtraData, vm_state_size_large) + 54790a85af35SMax Reitz sizeof_field(QCowSnapshotExtraData, disk_size)) 54800a85af35SMax Reitz { 54810a85af35SMax Reitz need_snapshot_update = true; 54820a85af35SMax Reitz break; 54830a85af35SMax Reitz } 54840a85af35SMax Reitz } 54850a85af35SMax Reitz if (need_snapshot_update) { 54860a85af35SMax Reitz ret = qcow2_write_snapshots(bs); 54870a85af35SMax Reitz if (ret < 0) { 54880a85af35SMax Reitz error_setg_errno(errp, -ret, "Failed to update the snapshot table"); 54890a85af35SMax Reitz return ret; 54900a85af35SMax Reitz } 54910a85af35SMax Reitz } 54920a85af35SMax Reitz status_cb(bs, 1, 2, cb_opaque); 5493722efb0cSMax Reitz 5494722efb0cSMax Reitz s->qcow_version = target_version; 5495722efb0cSMax Reitz ret = qcow2_update_header(bs); 5496722efb0cSMax Reitz if (ret < 0) { 5497722efb0cSMax Reitz s->qcow_version = current_version; 5498722efb0cSMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 5499722efb0cSMax Reitz return ret; 5500722efb0cSMax Reitz } 55010a85af35SMax Reitz status_cb(bs, 2, 2, cb_opaque); 5502722efb0cSMax Reitz 5503722efb0cSMax Reitz return 0; 5504722efb0cSMax Reitz } 5505722efb0cSMax Reitz 5506c293a809SMax Reitz typedef enum Qcow2AmendOperation { 5507c293a809SMax Reitz /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 5508c293a809SMax Reitz * statically initialized to so that the helper CB can discern the first 5509c293a809SMax Reitz * invocation from an operation change */ 5510c293a809SMax Reitz QCOW2_NO_OPERATION = 0, 5511c293a809SMax Reitz 5512722efb0cSMax Reitz QCOW2_UPGRADING, 551390766d9dSMaxim Levitsky QCOW2_UPDATING_ENCRYPTION, 551461ce55fcSMax Reitz QCOW2_CHANGING_REFCOUNT_ORDER, 5515c293a809SMax Reitz QCOW2_DOWNGRADING, 5516c293a809SMax Reitz } Qcow2AmendOperation; 5517c293a809SMax Reitz 5518c293a809SMax Reitz typedef struct Qcow2AmendHelperCBInfo { 5519c293a809SMax Reitz /* The code coordinating the amend operations should only modify 5520c293a809SMax Reitz * these four fields; the rest will be managed by the CB */ 5521c293a809SMax Reitz BlockDriverAmendStatusCB *original_status_cb; 5522c293a809SMax Reitz void *original_cb_opaque; 5523c293a809SMax Reitz 5524c293a809SMax Reitz Qcow2AmendOperation current_operation; 5525c293a809SMax Reitz 5526c293a809SMax Reitz /* Total number of operations to perform (only set once) */ 5527c293a809SMax Reitz int total_operations; 5528c293a809SMax Reitz 5529c293a809SMax Reitz /* The following fields are managed by the CB */ 5530c293a809SMax Reitz 5531c293a809SMax Reitz /* Number of operations completed */ 5532c293a809SMax Reitz int operations_completed; 5533c293a809SMax Reitz 5534c293a809SMax Reitz /* Cumulative offset of all completed operations */ 5535c293a809SMax Reitz int64_t offset_completed; 5536c293a809SMax Reitz 5537c293a809SMax Reitz Qcow2AmendOperation last_operation; 5538c293a809SMax Reitz int64_t last_work_size; 5539c293a809SMax Reitz } Qcow2AmendHelperCBInfo; 5540c293a809SMax Reitz 5541c293a809SMax Reitz static void qcow2_amend_helper_cb(BlockDriverState *bs, 5542c293a809SMax Reitz int64_t operation_offset, 5543c293a809SMax Reitz int64_t operation_work_size, void *opaque) 5544c293a809SMax Reitz { 5545c293a809SMax Reitz Qcow2AmendHelperCBInfo *info = opaque; 5546c293a809SMax Reitz int64_t current_work_size; 5547c293a809SMax Reitz int64_t projected_work_size; 5548c293a809SMax Reitz 5549c293a809SMax Reitz if (info->current_operation != info->last_operation) { 5550c293a809SMax Reitz if (info->last_operation != QCOW2_NO_OPERATION) { 5551c293a809SMax Reitz info->offset_completed += info->last_work_size; 5552c293a809SMax Reitz info->operations_completed++; 5553c293a809SMax Reitz } 5554c293a809SMax Reitz 5555c293a809SMax Reitz info->last_operation = info->current_operation; 5556c293a809SMax Reitz } 5557c293a809SMax Reitz 5558c293a809SMax Reitz assert(info->total_operations > 0); 5559c293a809SMax Reitz assert(info->operations_completed < info->total_operations); 5560c293a809SMax Reitz 5561c293a809SMax Reitz info->last_work_size = operation_work_size; 5562c293a809SMax Reitz 5563c293a809SMax Reitz current_work_size = info->offset_completed + operation_work_size; 5564c293a809SMax Reitz 5565c293a809SMax Reitz /* current_work_size is the total work size for (operations_completed + 1) 5566c293a809SMax Reitz * operations (which includes this one), so multiply it by the number of 5567c293a809SMax Reitz * operations not covered and divide it by the number of operations 5568c293a809SMax Reitz * covered to get a projection for the operations not covered */ 5569c293a809SMax Reitz projected_work_size = current_work_size * (info->total_operations - 5570c293a809SMax Reitz info->operations_completed - 1) 5571c293a809SMax Reitz / (info->operations_completed + 1); 5572c293a809SMax Reitz 5573c293a809SMax Reitz info->original_status_cb(bs, info->offset_completed + operation_offset, 5574c293a809SMax Reitz current_work_size + projected_work_size, 5575c293a809SMax Reitz info->original_cb_opaque); 5576c293a809SMax Reitz } 5577c293a809SMax Reitz 557877485434SMax Reitz static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 55798b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 5580d1402b50SMax Reitz void *cb_opaque, 5581a3579bfaSMaxim Levitsky bool force, 5582d1402b50SMax Reitz Error **errp) 55839296b3edSMax Reitz { 5584ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 55859296b3edSMax Reitz int old_version = s->qcow_version, new_version = old_version; 55869296b3edSMax Reitz uint64_t new_size = 0; 55879b890bdcSKevin Wolf const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL; 55889296b3edSMax Reitz bool lazy_refcounts = s->use_lazy_refcounts; 55896c3944dcSKevin Wolf bool data_file_raw = data_file_is_raw(bs); 55901bd0e2d1SChunyan Liu const char *compat = NULL; 559161ce55fcSMax Reitz int refcount_bits = s->refcount_bits; 55929296b3edSMax Reitz int ret; 55931bd0e2d1SChunyan Liu QemuOptDesc *desc = opts->list->desc; 5594c293a809SMax Reitz Qcow2AmendHelperCBInfo helper_cb_info; 559590766d9dSMaxim Levitsky bool encryption_update = false; 55969296b3edSMax Reitz 55971bd0e2d1SChunyan Liu while (desc && desc->name) { 55981bd0e2d1SChunyan Liu if (!qemu_opt_find(opts, desc->name)) { 55999296b3edSMax Reitz /* only change explicitly defined options */ 56001bd0e2d1SChunyan Liu desc++; 56019296b3edSMax Reitz continue; 56029296b3edSMax Reitz } 56039296b3edSMax Reitz 56048a17b83cSMax Reitz if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 56058a17b83cSMax Reitz compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 56061bd0e2d1SChunyan Liu if (!compat) { 56079296b3edSMax Reitz /* preserve default */ 5608f7077c98SEric Blake } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) { 56099296b3edSMax Reitz new_version = 2; 5610f7077c98SEric Blake } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) { 56119296b3edSMax Reitz new_version = 3; 56129296b3edSMax Reitz } else { 5613d1402b50SMax Reitz error_setg(errp, "Unknown compatibility level %s", compat); 56149296b3edSMax Reitz return -EINVAL; 56159296b3edSMax Reitz } 56168a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 56178a17b83cSMax Reitz new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 56188a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 56198a17b83cSMax Reitz backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 56208a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 56218a17b83cSMax Reitz backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 562290766d9dSMaxim Levitsky } else if (g_str_has_prefix(desc->name, "encrypt.")) { 562390766d9dSMaxim Levitsky if (!s->crypto) { 562490766d9dSMaxim Levitsky error_setg(errp, 562590766d9dSMaxim Levitsky "Can't amend encryption options - encryption not present"); 562690766d9dSMaxim Levitsky return -EINVAL; 562790766d9dSMaxim Levitsky } 562890766d9dSMaxim Levitsky if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 562990766d9dSMaxim Levitsky error_setg(errp, 563090766d9dSMaxim Levitsky "Only LUKS encryption options can be amended"); 563190766d9dSMaxim Levitsky return -ENOTSUP; 563290766d9dSMaxim Levitsky } 563390766d9dSMaxim Levitsky encryption_update = true; 56348a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 56358a17b83cSMax Reitz lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 56361bd0e2d1SChunyan Liu lazy_refcounts); 563706d05fa7SMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 563861ce55fcSMax Reitz refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 563961ce55fcSMax Reitz refcount_bits); 564061ce55fcSMax Reitz 564161ce55fcSMax Reitz if (refcount_bits <= 0 || refcount_bits > 64 || 564261ce55fcSMax Reitz !is_power_of_2(refcount_bits)) 564361ce55fcSMax Reitz { 5644d1402b50SMax Reitz error_setg(errp, "Refcount width must be a power of two and " 5645d1402b50SMax Reitz "may not exceed 64 bits"); 564661ce55fcSMax Reitz return -EINVAL; 564761ce55fcSMax Reitz } 56489b890bdcSKevin Wolf } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) { 56499b890bdcSKevin Wolf data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE); 56509b890bdcSKevin Wolf if (data_file && !has_data_file(bs)) { 56519b890bdcSKevin Wolf error_setg(errp, "data-file can only be set for images that " 56529b890bdcSKevin Wolf "use an external data file"); 56539b890bdcSKevin Wolf return -EINVAL; 56549b890bdcSKevin Wolf } 56556c3944dcSKevin Wolf } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) { 56566c3944dcSKevin Wolf data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW, 56576c3944dcSKevin Wolf data_file_raw); 56586c3944dcSKevin Wolf if (data_file_raw && !data_file_is_raw(bs)) { 56596c3944dcSKevin Wolf error_setg(errp, "data-file-raw cannot be set on existing " 56606c3944dcSKevin Wolf "images"); 56616c3944dcSKevin Wolf return -EINVAL; 56626c3944dcSKevin Wolf } 56639296b3edSMax Reitz } else { 5664164e0f89SMax Reitz /* if this point is reached, this probably means a new option was 56659296b3edSMax Reitz * added without having it covered here */ 5666164e0f89SMax Reitz abort(); 56679296b3edSMax Reitz } 56681bd0e2d1SChunyan Liu 56691bd0e2d1SChunyan Liu desc++; 56709296b3edSMax Reitz } 56719296b3edSMax Reitz 5672c293a809SMax Reitz helper_cb_info = (Qcow2AmendHelperCBInfo){ 5673c293a809SMax Reitz .original_status_cb = status_cb, 5674c293a809SMax Reitz .original_cb_opaque = cb_opaque, 5675722efb0cSMax Reitz .total_operations = (new_version != old_version) 567690766d9dSMaxim Levitsky + (s->refcount_bits != refcount_bits) + 567790766d9dSMaxim Levitsky (encryption_update == true) 5678c293a809SMax Reitz }; 5679c293a809SMax Reitz 56801038bbb8SMax Reitz /* Upgrade first (some features may require compat=1.1) */ 56819296b3edSMax Reitz if (new_version > old_version) { 5682722efb0cSMax Reitz helper_cb_info.current_operation = QCOW2_UPGRADING; 5683722efb0cSMax Reitz ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb, 5684722efb0cSMax Reitz &helper_cb_info, errp); 56859296b3edSMax Reitz if (ret < 0) { 56869296b3edSMax Reitz return ret; 56879296b3edSMax Reitz } 56889296b3edSMax Reitz } 56899296b3edSMax Reitz 569090766d9dSMaxim Levitsky if (encryption_update) { 569190766d9dSMaxim Levitsky QDict *amend_opts_dict; 569290766d9dSMaxim Levitsky QCryptoBlockAmendOptions *amend_opts; 569390766d9dSMaxim Levitsky 569490766d9dSMaxim Levitsky helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION; 569590766d9dSMaxim Levitsky amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp); 569690766d9dSMaxim Levitsky if (!amend_opts_dict) { 569790766d9dSMaxim Levitsky return -EINVAL; 569890766d9dSMaxim Levitsky } 569990766d9dSMaxim Levitsky amend_opts = block_crypto_amend_opts_init(amend_opts_dict, errp); 570090766d9dSMaxim Levitsky qobject_unref(amend_opts_dict); 570190766d9dSMaxim Levitsky if (!amend_opts) { 570290766d9dSMaxim Levitsky return -EINVAL; 570390766d9dSMaxim Levitsky } 570490766d9dSMaxim Levitsky ret = qcrypto_block_amend_options(s->crypto, 570590766d9dSMaxim Levitsky qcow2_crypto_hdr_read_func, 570690766d9dSMaxim Levitsky qcow2_crypto_hdr_write_func, 570790766d9dSMaxim Levitsky bs, 570890766d9dSMaxim Levitsky amend_opts, 570990766d9dSMaxim Levitsky force, 571090766d9dSMaxim Levitsky errp); 571190766d9dSMaxim Levitsky qapi_free_QCryptoBlockAmendOptions(amend_opts); 571290766d9dSMaxim Levitsky if (ret < 0) { 571390766d9dSMaxim Levitsky return ret; 571490766d9dSMaxim Levitsky } 571590766d9dSMaxim Levitsky } 571690766d9dSMaxim Levitsky 571761ce55fcSMax Reitz if (s->refcount_bits != refcount_bits) { 571861ce55fcSMax Reitz int refcount_order = ctz32(refcount_bits); 571961ce55fcSMax Reitz 572061ce55fcSMax Reitz if (new_version < 3 && refcount_bits != 16) { 5721d1402b50SMax Reitz error_setg(errp, "Refcount widths other than 16 bits require " 572261ce55fcSMax Reitz "compatibility level 1.1 or above (use compat=1.1 or " 572361ce55fcSMax Reitz "greater)"); 572461ce55fcSMax Reitz return -EINVAL; 572561ce55fcSMax Reitz } 572661ce55fcSMax Reitz 572761ce55fcSMax Reitz helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 572861ce55fcSMax Reitz ret = qcow2_change_refcount_order(bs, refcount_order, 572961ce55fcSMax Reitz &qcow2_amend_helper_cb, 5730d1402b50SMax Reitz &helper_cb_info, errp); 573161ce55fcSMax Reitz if (ret < 0) { 573261ce55fcSMax Reitz return ret; 573361ce55fcSMax Reitz } 573461ce55fcSMax Reitz } 573561ce55fcSMax Reitz 57366c3944dcSKevin Wolf /* data-file-raw blocks backing files, so clear it first if requested */ 57376c3944dcSKevin Wolf if (data_file_raw) { 57386c3944dcSKevin Wolf s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW; 57396c3944dcSKevin Wolf } else { 57406c3944dcSKevin Wolf s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW; 57416c3944dcSKevin Wolf } 57426c3944dcSKevin Wolf 57439b890bdcSKevin Wolf if (data_file) { 57449b890bdcSKevin Wolf g_free(s->image_data_file); 57459b890bdcSKevin Wolf s->image_data_file = *data_file ? g_strdup(data_file) : NULL; 57469b890bdcSKevin Wolf } 57479b890bdcSKevin Wolf 57489b890bdcSKevin Wolf ret = qcow2_update_header(bs); 57499b890bdcSKevin Wolf if (ret < 0) { 57509b890bdcSKevin Wolf error_setg_errno(errp, -ret, "Failed to update the image header"); 57519b890bdcSKevin Wolf return ret; 57529b890bdcSKevin Wolf } 57539b890bdcSKevin Wolf 57549296b3edSMax Reitz if (backing_file || backing_format) { 5755bc5ee6daSEric Blake if (g_strcmp0(backing_file, s->image_backing_file) || 5756bc5ee6daSEric Blake g_strcmp0(backing_format, s->image_backing_format)) { 57575a385bf5SEric Blake error_setg(errp, "Cannot amend the backing file"); 57585a385bf5SEric Blake error_append_hint(errp, 57595a385bf5SEric Blake "You can use 'qemu-img rebase' instead.\n"); 57605a385bf5SEric Blake return -EINVAL; 57619296b3edSMax Reitz } 57629296b3edSMax Reitz } 57639296b3edSMax Reitz 57649296b3edSMax Reitz if (s->use_lazy_refcounts != lazy_refcounts) { 57659296b3edSMax Reitz if (lazy_refcounts) { 57661038bbb8SMax Reitz if (new_version < 3) { 5767d1402b50SMax Reitz error_setg(errp, "Lazy refcounts only supported with " 5768d1402b50SMax Reitz "compatibility level 1.1 and above (use compat=1.1 " 5769d1402b50SMax Reitz "or greater)"); 57709296b3edSMax Reitz return -EINVAL; 57719296b3edSMax Reitz } 57729296b3edSMax Reitz s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 57739296b3edSMax Reitz ret = qcow2_update_header(bs); 57749296b3edSMax Reitz if (ret < 0) { 57759296b3edSMax Reitz s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 5776d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 57779296b3edSMax Reitz return ret; 57789296b3edSMax Reitz } 57799296b3edSMax Reitz s->use_lazy_refcounts = true; 57809296b3edSMax Reitz } else { 57819296b3edSMax Reitz /* make image clean first */ 57829296b3edSMax Reitz ret = qcow2_mark_clean(bs); 57839296b3edSMax Reitz if (ret < 0) { 5784d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to make the image clean"); 57859296b3edSMax Reitz return ret; 57869296b3edSMax Reitz } 57879296b3edSMax Reitz /* now disallow lazy refcounts */ 57889296b3edSMax Reitz s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 57899296b3edSMax Reitz ret = qcow2_update_header(bs); 57909296b3edSMax Reitz if (ret < 0) { 57919296b3edSMax Reitz s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 5792d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 57939296b3edSMax Reitz return ret; 57949296b3edSMax Reitz } 57959296b3edSMax Reitz s->use_lazy_refcounts = false; 57969296b3edSMax Reitz } 57979296b3edSMax Reitz } 57989296b3edSMax Reitz 57999296b3edSMax Reitz if (new_size) { 5800a3aeeab5SEric Blake BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, 5801a3aeeab5SEric Blake errp); 5802a3aeeab5SEric Blake if (!blk) { 5803a3aeeab5SEric Blake return -EPERM; 5804d7086422SKevin Wolf } 5805d7086422SKevin Wolf 5806e8d04f92SMax Reitz /* 5807e8d04f92SMax Reitz * Amending image options should ensure that the image has 5808e8d04f92SMax Reitz * exactly the given new values, so pass exact=true here. 5809e8d04f92SMax Reitz */ 58108c6242b6SKevin Wolf ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); 581170b27f36SKevin Wolf blk_unref(blk); 58129296b3edSMax Reitz if (ret < 0) { 58139296b3edSMax Reitz return ret; 58149296b3edSMax Reitz } 58159296b3edSMax Reitz } 58169296b3edSMax Reitz 58171038bbb8SMax Reitz /* Downgrade last (so unsupported features can be removed before) */ 58181038bbb8SMax Reitz if (new_version < old_version) { 5819c293a809SMax Reitz helper_cb_info.current_operation = QCOW2_DOWNGRADING; 5820c293a809SMax Reitz ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 5821d1402b50SMax Reitz &helper_cb_info, errp); 58221038bbb8SMax Reitz if (ret < 0) { 58231038bbb8SMax Reitz return ret; 58241038bbb8SMax Reitz } 58251038bbb8SMax Reitz } 58261038bbb8SMax Reitz 58279296b3edSMax Reitz return 0; 58289296b3edSMax Reitz } 58299296b3edSMax Reitz 58308ea1613dSMaxim Levitsky static int coroutine_fn qcow2_co_amend(BlockDriverState *bs, 58318ea1613dSMaxim Levitsky BlockdevAmendOptions *opts, 58328ea1613dSMaxim Levitsky bool force, 58338ea1613dSMaxim Levitsky Error **errp) 58348ea1613dSMaxim Levitsky { 58358ea1613dSMaxim Levitsky BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2; 58368ea1613dSMaxim Levitsky BDRVQcow2State *s = bs->opaque; 58378ea1613dSMaxim Levitsky int ret = 0; 58388ea1613dSMaxim Levitsky 58398ea1613dSMaxim Levitsky if (qopts->has_encrypt) { 58408ea1613dSMaxim Levitsky if (!s->crypto) { 58418ea1613dSMaxim Levitsky error_setg(errp, "image is not encrypted, can't amend"); 58428ea1613dSMaxim Levitsky return -EOPNOTSUPP; 58438ea1613dSMaxim Levitsky } 58448ea1613dSMaxim Levitsky 58458ea1613dSMaxim Levitsky if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) { 58468ea1613dSMaxim Levitsky error_setg(errp, 58478ea1613dSMaxim Levitsky "Amend can't be used to change the qcow2 encryption format"); 58488ea1613dSMaxim Levitsky return -EOPNOTSUPP; 58498ea1613dSMaxim Levitsky } 58508ea1613dSMaxim Levitsky 58518ea1613dSMaxim Levitsky if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 58528ea1613dSMaxim Levitsky error_setg(errp, 58538ea1613dSMaxim Levitsky "Only LUKS encryption options can be amended for qcow2 with blockdev-amend"); 58548ea1613dSMaxim Levitsky return -EOPNOTSUPP; 58558ea1613dSMaxim Levitsky } 58568ea1613dSMaxim Levitsky 58578ea1613dSMaxim Levitsky ret = qcrypto_block_amend_options(s->crypto, 58588ea1613dSMaxim Levitsky qcow2_crypto_hdr_read_func, 58598ea1613dSMaxim Levitsky qcow2_crypto_hdr_write_func, 58608ea1613dSMaxim Levitsky bs, 58618ea1613dSMaxim Levitsky qopts->encrypt, 58628ea1613dSMaxim Levitsky force, 58638ea1613dSMaxim Levitsky errp); 58648ea1613dSMaxim Levitsky } 58658ea1613dSMaxim Levitsky return ret; 58668ea1613dSMaxim Levitsky } 58678ea1613dSMaxim Levitsky 586885186ebdSMax Reitz /* 586985186ebdSMax Reitz * If offset or size are negative, respectively, they will not be included in 587085186ebdSMax Reitz * the BLOCK_IMAGE_CORRUPTED event emitted. 587185186ebdSMax Reitz * fatal will be ignored for read-only BDS; corruptions found there will always 587285186ebdSMax Reitz * be considered non-fatal. 587385186ebdSMax Reitz */ 587485186ebdSMax Reitz void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 587585186ebdSMax Reitz int64_t size, const char *message_format, ...) 587685186ebdSMax Reitz { 5877ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 5878dc881b44SAlberto Garcia const char *node_name; 587985186ebdSMax Reitz char *message; 588085186ebdSMax Reitz va_list ap; 588185186ebdSMax Reitz 5882ddf3b47eSMax Reitz fatal = fatal && bdrv_is_writable(bs); 588385186ebdSMax Reitz 588485186ebdSMax Reitz if (s->signaled_corruption && 588585186ebdSMax Reitz (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 588685186ebdSMax Reitz { 588785186ebdSMax Reitz return; 588885186ebdSMax Reitz } 588985186ebdSMax Reitz 589085186ebdSMax Reitz va_start(ap, message_format); 589185186ebdSMax Reitz message = g_strdup_vprintf(message_format, ap); 589285186ebdSMax Reitz va_end(ap); 589385186ebdSMax Reitz 589485186ebdSMax Reitz if (fatal) { 589585186ebdSMax Reitz fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 589685186ebdSMax Reitz "corruption events will be suppressed\n", message); 589785186ebdSMax Reitz } else { 589885186ebdSMax Reitz fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 589985186ebdSMax Reitz "corruption events will be suppressed\n", message); 590085186ebdSMax Reitz } 590185186ebdSMax Reitz 5902dc881b44SAlberto Garcia node_name = bdrv_get_node_name(bs); 5903dc881b44SAlberto Garcia qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 5904dc881b44SAlberto Garcia *node_name != '\0', node_name, 5905dc881b44SAlberto Garcia message, offset >= 0, offset, 5906dc881b44SAlberto Garcia size >= 0, size, 59073ab72385SPeter Xu fatal); 590885186ebdSMax Reitz g_free(message); 590985186ebdSMax Reitz 591085186ebdSMax Reitz if (fatal) { 591185186ebdSMax Reitz qcow2_mark_corrupt(bs); 591285186ebdSMax Reitz bs->drv = NULL; /* make BDS unusable */ 591385186ebdSMax Reitz } 591485186ebdSMax Reitz 591585186ebdSMax Reitz s->signaled_corruption = true; 591685186ebdSMax Reitz } 591785186ebdSMax Reitz 5918df373fb0SMaxim Levitsky #define QCOW_COMMON_OPTIONS \ 5919df373fb0SMaxim Levitsky { \ 5920df373fb0SMaxim Levitsky .name = BLOCK_OPT_SIZE, \ 5921df373fb0SMaxim Levitsky .type = QEMU_OPT_SIZE, \ 5922df373fb0SMaxim Levitsky .help = "Virtual disk size" \ 5923df373fb0SMaxim Levitsky }, \ 5924df373fb0SMaxim Levitsky { \ 5925df373fb0SMaxim Levitsky .name = BLOCK_OPT_COMPAT_LEVEL, \ 5926df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 5927df373fb0SMaxim Levitsky .help = "Compatibility level (v2 [0.10] or v3 [1.1])" \ 5928df373fb0SMaxim Levitsky }, \ 5929df373fb0SMaxim Levitsky { \ 5930df373fb0SMaxim Levitsky .name = BLOCK_OPT_BACKING_FILE, \ 5931df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 5932df373fb0SMaxim Levitsky .help = "File name of a base image" \ 5933df373fb0SMaxim Levitsky }, \ 5934df373fb0SMaxim Levitsky { \ 5935df373fb0SMaxim Levitsky .name = BLOCK_OPT_BACKING_FMT, \ 5936df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 5937df373fb0SMaxim Levitsky .help = "Image format of the base image" \ 5938df373fb0SMaxim Levitsky }, \ 5939df373fb0SMaxim Levitsky { \ 5940df373fb0SMaxim Levitsky .name = BLOCK_OPT_DATA_FILE, \ 5941df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 5942df373fb0SMaxim Levitsky .help = "File name of an external data file" \ 5943df373fb0SMaxim Levitsky }, \ 5944df373fb0SMaxim Levitsky { \ 5945df373fb0SMaxim Levitsky .name = BLOCK_OPT_DATA_FILE_RAW, \ 5946df373fb0SMaxim Levitsky .type = QEMU_OPT_BOOL, \ 5947df373fb0SMaxim Levitsky .help = "The external data file must stay valid " \ 5948df373fb0SMaxim Levitsky "as a raw image" \ 5949df373fb0SMaxim Levitsky }, \ 5950df373fb0SMaxim Levitsky { \ 59510b6786a9SMaxim Levitsky .name = BLOCK_OPT_LAZY_REFCOUNTS, \ 59520b6786a9SMaxim Levitsky .type = QEMU_OPT_BOOL, \ 59530b6786a9SMaxim Levitsky .help = "Postpone refcount updates", \ 59540b6786a9SMaxim Levitsky .def_value_str = "off" \ 59550b6786a9SMaxim Levitsky }, \ 59560b6786a9SMaxim Levitsky { \ 59570b6786a9SMaxim Levitsky .name = BLOCK_OPT_REFCOUNT_BITS, \ 59580b6786a9SMaxim Levitsky .type = QEMU_OPT_NUMBER, \ 59590b6786a9SMaxim Levitsky .help = "Width of a reference count entry in bits", \ 59600b6786a9SMaxim Levitsky .def_value_str = "16" \ 59610b6786a9SMaxim Levitsky } 59620b6786a9SMaxim Levitsky 59630b6786a9SMaxim Levitsky static QemuOptsList qcow2_create_opts = { 59640b6786a9SMaxim Levitsky .name = "qcow2-create-opts", 59650b6786a9SMaxim Levitsky .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 59660b6786a9SMaxim Levitsky .desc = { 59670b6786a9SMaxim Levitsky { \ 5968df373fb0SMaxim Levitsky .name = BLOCK_OPT_ENCRYPT, \ 5969df373fb0SMaxim Levitsky .type = QEMU_OPT_BOOL, \ 5970df373fb0SMaxim Levitsky .help = "Encrypt the image with format 'aes'. (Deprecated " \ 5971df373fb0SMaxim Levitsky "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", \ 5972df373fb0SMaxim Levitsky }, \ 5973df373fb0SMaxim Levitsky { \ 5974df373fb0SMaxim Levitsky .name = BLOCK_OPT_ENCRYPT_FORMAT, \ 5975df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 5976df373fb0SMaxim Levitsky .help = "Encrypt the image, format choices: 'aes', 'luks'", \ 5977df373fb0SMaxim Levitsky }, \ 5978df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", \ 5979df373fb0SMaxim Levitsky "ID of secret providing qcow AES key or LUKS passphrase"), \ 5980df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), \ 5981df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), \ 5982df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), \ 5983df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), \ 5984df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), \ 5985df373fb0SMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), \ 5986df373fb0SMaxim Levitsky { \ 5987df373fb0SMaxim Levitsky .name = BLOCK_OPT_CLUSTER_SIZE, \ 5988df373fb0SMaxim Levitsky .type = QEMU_OPT_SIZE, \ 5989df373fb0SMaxim Levitsky .help = "qcow2 cluster size", \ 5990df373fb0SMaxim Levitsky .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) \ 5991df373fb0SMaxim Levitsky }, \ 5992df373fb0SMaxim Levitsky { \ 59937be20252SAlberto Garcia .name = BLOCK_OPT_EXTL2, \ 59947be20252SAlberto Garcia .type = QEMU_OPT_BOOL, \ 59957be20252SAlberto Garcia .help = "Extended L2 tables", \ 59967be20252SAlberto Garcia .def_value_str = "off" \ 59977be20252SAlberto Garcia }, \ 59987be20252SAlberto Garcia { \ 5999df373fb0SMaxim Levitsky .name = BLOCK_OPT_PREALLOC, \ 6000df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 6001df373fb0SMaxim Levitsky .help = "Preallocation mode (allowed values: off, " \ 6002df373fb0SMaxim Levitsky "metadata, falloc, full)" \ 6003df373fb0SMaxim Levitsky }, \ 6004df373fb0SMaxim Levitsky { \ 6005df373fb0SMaxim Levitsky .name = BLOCK_OPT_COMPRESSION_TYPE, \ 6006df373fb0SMaxim Levitsky .type = QEMU_OPT_STRING, \ 6007df373fb0SMaxim Levitsky .help = "Compression method used for image cluster " \ 6008df373fb0SMaxim Levitsky "compression", \ 6009df373fb0SMaxim Levitsky .def_value_str = "zlib" \ 60100b6786a9SMaxim Levitsky }, 6011df373fb0SMaxim Levitsky QCOW_COMMON_OPTIONS, 6012df373fb0SMaxim Levitsky { /* end of list */ } 6013df373fb0SMaxim Levitsky } 6014df373fb0SMaxim Levitsky }; 6015df373fb0SMaxim Levitsky 6016df373fb0SMaxim Levitsky static QemuOptsList qcow2_amend_opts = { 6017df373fb0SMaxim Levitsky .name = "qcow2-amend-opts", 6018df373fb0SMaxim Levitsky .head = QTAILQ_HEAD_INITIALIZER(qcow2_amend_opts.head), 6019df373fb0SMaxim Levitsky .desc = { 602090766d9dSMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_STATE("encrypt."), 602190766d9dSMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT("encrypt."), 602290766d9dSMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET("encrypt."), 602390766d9dSMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET("encrypt."), 602490766d9dSMaxim Levitsky BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 6025df373fb0SMaxim Levitsky QCOW_COMMON_OPTIONS, 60261bd0e2d1SChunyan Liu { /* end of list */ } 60271bd0e2d1SChunyan Liu } 602820d97356SBlue Swirl }; 602920d97356SBlue Swirl 60302654267cSMax Reitz static const char *const qcow2_strong_runtime_opts[] = { 60312654267cSMax Reitz "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, 60322654267cSMax Reitz 60332654267cSMax Reitz NULL 60342654267cSMax Reitz }; 60352654267cSMax Reitz 60365f535a94SMax Reitz BlockDriver bdrv_qcow2 = { 603720d97356SBlue Swirl .format_name = "qcow2", 6038ff99129aSKevin Wolf .instance_size = sizeof(BDRVQcow2State), 60397c80ab3fSJes Sorensen .bdrv_probe = qcow2_probe, 60407c80ab3fSJes Sorensen .bdrv_open = qcow2_open, 60417c80ab3fSJes Sorensen .bdrv_close = qcow2_close, 604221d82ac9SJeff Cody .bdrv_reopen_prepare = qcow2_reopen_prepare, 60435b0959a7SKevin Wolf .bdrv_reopen_commit = qcow2_reopen_commit, 604465eb7c85SPeter Krempa .bdrv_reopen_commit_post = qcow2_reopen_commit_post, 60455b0959a7SKevin Wolf .bdrv_reopen_abort = qcow2_reopen_abort, 60465365f44dSKevin Wolf .bdrv_join_options = qcow2_join_options, 604769dca43dSMax Reitz .bdrv_child_perm = bdrv_default_perms, 6048efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = qcow2_co_create_opts, 6049b0292b85SKevin Wolf .bdrv_co_create = qcow2_co_create, 605038841dcdSMax Reitz .bdrv_has_zero_init = qcow2_has_zero_init, 6051a320fb04SEric Blake .bdrv_co_block_status = qcow2_co_block_status, 605220d97356SBlue Swirl 6053df893d25SVladimir Sementsov-Ogievskiy .bdrv_co_preadv_part = qcow2_co_preadv_part, 60545396234bSVladimir Sementsov-Ogievskiy .bdrv_co_pwritev_part = qcow2_co_pwritev_part, 6055eb489bb1SKevin Wolf .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 6056419b19d9SStefan Hajnoczi 60575544b59fSEric Blake .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 605882e8a788SEric Blake .bdrv_co_pdiscard = qcow2_co_pdiscard, 6059fd9fcd37SFam Zheng .bdrv_co_copy_range_from = qcow2_co_copy_range_from, 6060fd9fcd37SFam Zheng .bdrv_co_copy_range_to = qcow2_co_copy_range_to, 6061061ca8a3SKevin Wolf .bdrv_co_truncate = qcow2_co_truncate, 60625396234bSVladimir Sementsov-Ogievskiy .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part, 6063491d27e2SMax Reitz .bdrv_make_empty = qcow2_make_empty, 606420d97356SBlue Swirl 606520d97356SBlue Swirl .bdrv_snapshot_create = qcow2_snapshot_create, 606620d97356SBlue Swirl .bdrv_snapshot_goto = qcow2_snapshot_goto, 606720d97356SBlue Swirl .bdrv_snapshot_delete = qcow2_snapshot_delete, 606820d97356SBlue Swirl .bdrv_snapshot_list = qcow2_snapshot_list, 606951ef6727Sedison .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 6070c501c352SStefan Hajnoczi .bdrv_measure = qcow2_measure, 60717c80ab3fSJes Sorensen .bdrv_get_info = qcow2_get_info, 607237764dfbSMax Reitz .bdrv_get_specific_info = qcow2_get_specific_info, 607320d97356SBlue Swirl 60747c80ab3fSJes Sorensen .bdrv_save_vmstate = qcow2_save_vmstate, 60757c80ab3fSJes Sorensen .bdrv_load_vmstate = qcow2_load_vmstate, 607620d97356SBlue Swirl 6077d67066d8SMax Reitz .is_format = true, 60788ee79e70SKevin Wolf .supports_backing = true, 607920d97356SBlue Swirl .bdrv_change_backing_file = qcow2_change_backing_file, 608020d97356SBlue Swirl 6081d34682cdSKevin Wolf .bdrv_refresh_limits = qcow2_refresh_limits, 60822b148f39SPaolo Bonzini .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 6083ec6d8912SKevin Wolf .bdrv_inactivate = qcow2_inactivate, 608406d9260fSAnthony Liguori 60851bd0e2d1SChunyan Liu .create_opts = &qcow2_create_opts, 6086df373fb0SMaxim Levitsky .amend_opts = &qcow2_amend_opts, 60872654267cSMax Reitz .strong_runtime_opts = qcow2_strong_runtime_opts, 60888a2ce0bcSAlberto Garcia .mutable_opts = mutable_opts, 60892fd61638SPaolo Bonzini .bdrv_co_check = qcow2_co_check, 6090c282e1fdSChunyan Liu .bdrv_amend_options = qcow2_amend_options, 60918ea1613dSMaxim Levitsky .bdrv_co_amend = qcow2_co_amend, 6092279621c0SAlberto Garcia 6093279621c0SAlberto Garcia .bdrv_detach_aio_context = qcow2_detach_aio_context, 6094279621c0SAlberto Garcia .bdrv_attach_aio_context = qcow2_attach_aio_context, 60951b6b0562SVladimir Sementsov-Ogievskiy 6096ef893b5cSEric Blake .bdrv_supports_persistent_dirty_bitmap = 6097ef893b5cSEric Blake qcow2_supports_persistent_dirty_bitmap, 6098d2c3080eSVladimir Sementsov-Ogievskiy .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap, 6099d2c3080eSVladimir Sementsov-Ogievskiy .bdrv_co_remove_persistent_dirty_bitmap = 6100d2c3080eSVladimir Sementsov-Ogievskiy qcow2_co_remove_persistent_dirty_bitmap, 610120d97356SBlue Swirl }; 610220d97356SBlue Swirl 61035efa9d5aSAnthony Liguori static void bdrv_qcow2_init(void) 61045efa9d5aSAnthony Liguori { 61055efa9d5aSAnthony Liguori bdrv_register(&bdrv_qcow2); 61065efa9d5aSAnthony Liguori } 61075efa9d5aSAnthony Liguori 61085efa9d5aSAnthony Liguori block_init(bdrv_qcow2_init); 6109