1585f8587Sbellard /* 2585f8587Sbellard * Block driver for the QCOW version 2 format 3585f8587Sbellard * 4585f8587Sbellard * Copyright (c) 2004-2006 Fabrice Bellard 5585f8587Sbellard * 6585f8587Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7585f8587Sbellard * of this software and associated documentation files (the "Software"), to deal 8585f8587Sbellard * in the Software without restriction, including without limitation the rights 9585f8587Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10585f8587Sbellard * copies of the Software, and to permit persons to whom the Software is 11585f8587Sbellard * furnished to do so, subject to the following conditions: 12585f8587Sbellard * 13585f8587Sbellard * The above copyright notice and this permission notice shall be included in 14585f8587Sbellard * all copies or substantial portions of the Software. 15585f8587Sbellard * 16585f8587Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17585f8587Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18585f8587Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19585f8587Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20585f8587Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21585f8587Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22585f8587Sbellard * THE SOFTWARE. 23585f8587Sbellard */ 24e688df6bSMarkus Armbruster 2580c71a24SPeter Maydell #include "qemu/osdep.h" 262714f13dSVladimir Sementsov-Ogievskiy 272714f13dSVladimir Sementsov-Ogievskiy #define ZLIB_CONST 282714f13dSVladimir Sementsov-Ogievskiy #include <zlib.h> 292714f13dSVladimir Sementsov-Ogievskiy 30737e150eSPaolo Bonzini #include "block/block_int.h" 31609f45eaSMax Reitz #include "block/qdict.h" 3223588797SKevin Wolf #include "sysemu/block-backend.h" 331de7afc9SPaolo Bonzini #include "qemu/module.h" 340d8c41daSMichael S. Tsirkin #include "qcow2.h" 351de7afc9SPaolo Bonzini #include "qemu/error-report.h" 36e688df6bSMarkus Armbruster #include "qapi/error.h" 379af23989SMarkus Armbruster #include "qapi/qapi-events-block-core.h" 386b673957SMarkus Armbruster #include "qapi/qmp/qdict.h" 396b673957SMarkus Armbruster #include "qapi/qmp/qstring.h" 403cce16f4SKevin Wolf #include "trace.h" 411bd0e2d1SChunyan Liu #include "qemu/option_int.h" 42f348b6d1SVeronia Bahaa #include "qemu/cutils.h" 4358369e22SPaolo Bonzini #include "qemu/bswap.h" 44b76b4f60SKevin Wolf #include "qapi/qobject-input-visitor.h" 45b76b4f60SKevin Wolf #include "qapi/qapi-visit-block-core.h" 460d8c41daSMichael S. Tsirkin #include "crypto.h" 47ceb029cdSVladimir Sementsov-Ogievskiy #include "block/thread-pool.h" 48585f8587Sbellard 49585f8587Sbellard /* 50585f8587Sbellard Differences with QCOW: 51585f8587Sbellard 52585f8587Sbellard - Support for multiple incremental snapshots. 53585f8587Sbellard - Memory management by reference counts. 54585f8587Sbellard - Clusters which have a reference count of one have the bit 55585f8587Sbellard QCOW_OFLAG_COPIED to optimize write performance. 56585f8587Sbellard - Size of compressed clusters is stored in sectors to reduce bit usage 57585f8587Sbellard in the cluster offsets. 58585f8587Sbellard - Support for storing additional data (such as the VM state) in the 59585f8587Sbellard snapshots. 60585f8587Sbellard - If a backing store is used, the cluster size is not constrained 61585f8587Sbellard (could be backported to QCOW). 62585f8587Sbellard - L2 tables have always a size of one cluster. 63585f8587Sbellard */ 64585f8587Sbellard 659b80ddf3Saliguori 669b80ddf3Saliguori typedef struct { 679b80ddf3Saliguori uint32_t magic; 689b80ddf3Saliguori uint32_t len; 69c4217f64SJeff Cody } QEMU_PACKED QCowExtension; 7021d82ac9SJeff Cody 717c80ab3fSJes Sorensen #define QCOW2_EXT_MAGIC_END 0 727c80ab3fSJes Sorensen #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 73cfcc4c62SKevin Wolf #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 744652b8f3SDaniel P. Berrange #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 7588ddffaeSVladimir Sementsov-Ogievskiy #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 7693c24936SKevin Wolf #define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441 779b80ddf3Saliguori 78c3c10f72SVladimir Sementsov-Ogievskiy static int coroutine_fn 79c3c10f72SVladimir Sementsov-Ogievskiy qcow2_co_preadv_compressed(BlockDriverState *bs, 80c3c10f72SVladimir Sementsov-Ogievskiy uint64_t file_cluster_offset, 81c3c10f72SVladimir Sementsov-Ogievskiy uint64_t offset, 82c3c10f72SVladimir Sementsov-Ogievskiy uint64_t bytes, 83c3c10f72SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov); 84c3c10f72SVladimir Sementsov-Ogievskiy 857c80ab3fSJes Sorensen static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 86585f8587Sbellard { 87585f8587Sbellard const QCowHeader *cow_header = (const void *)buf; 88585f8587Sbellard 89585f8587Sbellard if (buf_size >= sizeof(QCowHeader) && 90585f8587Sbellard be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 916744cbabSKevin Wolf be32_to_cpu(cow_header->version) >= 2) 92585f8587Sbellard return 100; 93585f8587Sbellard else 94585f8587Sbellard return 0; 95585f8587Sbellard } 96585f8587Sbellard 979b80ddf3Saliguori 984652b8f3SDaniel P. Berrange static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 994652b8f3SDaniel P. Berrange uint8_t *buf, size_t buflen, 1004652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1014652b8f3SDaniel P. Berrange { 1024652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1034652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1044652b8f3SDaniel P. Berrange ssize_t ret; 1054652b8f3SDaniel P. Berrange 1064652b8f3SDaniel P. Berrange if ((offset + buflen) > s->crypto_header.length) { 1074652b8f3SDaniel P. Berrange error_setg(errp, "Request for data outside of extension header"); 1084652b8f3SDaniel P. Berrange return -1; 1094652b8f3SDaniel P. Berrange } 1104652b8f3SDaniel P. Berrange 1114652b8f3SDaniel P. Berrange ret = bdrv_pread(bs->file, 1124652b8f3SDaniel P. Berrange s->crypto_header.offset + offset, buf, buflen); 1134652b8f3SDaniel P. Berrange if (ret < 0) { 1144652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not read encryption header"); 1154652b8f3SDaniel P. Berrange return -1; 1164652b8f3SDaniel P. Berrange } 1174652b8f3SDaniel P. Berrange return ret; 1184652b8f3SDaniel P. Berrange } 1194652b8f3SDaniel P. Berrange 1204652b8f3SDaniel P. Berrange 1214652b8f3SDaniel P. Berrange static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 1224652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1234652b8f3SDaniel P. Berrange { 1244652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1254652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1264652b8f3SDaniel P. Berrange int64_t ret; 1274652b8f3SDaniel P. Berrange int64_t clusterlen; 1284652b8f3SDaniel P. Berrange 1294652b8f3SDaniel P. Berrange ret = qcow2_alloc_clusters(bs, headerlen); 1304652b8f3SDaniel P. Berrange if (ret < 0) { 1314652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, 1324652b8f3SDaniel P. Berrange "Cannot allocate cluster for LUKS header size %zu", 1334652b8f3SDaniel P. Berrange headerlen); 1344652b8f3SDaniel P. Berrange return -1; 1354652b8f3SDaniel P. Berrange } 1364652b8f3SDaniel P. Berrange 1374652b8f3SDaniel P. Berrange s->crypto_header.length = headerlen; 1384652b8f3SDaniel P. Berrange s->crypto_header.offset = ret; 1394652b8f3SDaniel P. Berrange 1404652b8f3SDaniel P. Berrange /* Zero fill remaining space in cluster so it has predictable 1414652b8f3SDaniel P. Berrange * content in case of future spec changes */ 1424652b8f3SDaniel P. Berrange clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 143966b000fSKevin Wolf assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); 1444652b8f3SDaniel P. Berrange ret = bdrv_pwrite_zeroes(bs->file, 1454652b8f3SDaniel P. Berrange ret + headerlen, 1464652b8f3SDaniel P. Berrange clusterlen - headerlen, 0); 1474652b8f3SDaniel P. Berrange if (ret < 0) { 1484652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 1494652b8f3SDaniel P. Berrange return -1; 1504652b8f3SDaniel P. Berrange } 1514652b8f3SDaniel P. Berrange 1524652b8f3SDaniel P. Berrange return ret; 1534652b8f3SDaniel P. Berrange } 1544652b8f3SDaniel P. Berrange 1554652b8f3SDaniel P. Berrange 1564652b8f3SDaniel P. Berrange static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 1574652b8f3SDaniel P. Berrange const uint8_t *buf, size_t buflen, 1584652b8f3SDaniel P. Berrange void *opaque, Error **errp) 1594652b8f3SDaniel P. Berrange { 1604652b8f3SDaniel P. Berrange BlockDriverState *bs = opaque; 1614652b8f3SDaniel P. Berrange BDRVQcow2State *s = bs->opaque; 1624652b8f3SDaniel P. Berrange ssize_t ret; 1634652b8f3SDaniel P. Berrange 1644652b8f3SDaniel P. Berrange if ((offset + buflen) > s->crypto_header.length) { 1654652b8f3SDaniel P. Berrange error_setg(errp, "Request for data outside of extension header"); 1664652b8f3SDaniel P. Berrange return -1; 1674652b8f3SDaniel P. Berrange } 1684652b8f3SDaniel P. Berrange 1694652b8f3SDaniel P. Berrange ret = bdrv_pwrite(bs->file, 1704652b8f3SDaniel P. Berrange s->crypto_header.offset + offset, buf, buflen); 1714652b8f3SDaniel P. Berrange if (ret < 0) { 1724652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, "Could not read encryption header"); 1734652b8f3SDaniel P. Berrange return -1; 1744652b8f3SDaniel P. Berrange } 1754652b8f3SDaniel P. Berrange return ret; 1764652b8f3SDaniel P. Berrange } 1774652b8f3SDaniel P. Berrange 1784652b8f3SDaniel P. Berrange 1799b80ddf3Saliguori /* 1809b80ddf3Saliguori * read qcow2 extension and fill bs 1819b80ddf3Saliguori * start reading from start_offset 1829b80ddf3Saliguori * finish reading upon magic of value 0 or when end_offset reached 1839b80ddf3Saliguori * unknown magic is skipped (future extension this version knows nothing about) 1849b80ddf3Saliguori * return 0 upon success, non-0 otherwise 1859b80ddf3Saliguori */ 1867c80ab3fSJes Sorensen static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 1873ef6c40aSMax Reitz uint64_t end_offset, void **p_feature_table, 18888ddffaeSVladimir Sementsov-Ogievskiy int flags, bool *need_update_header, 18988ddffaeSVladimir Sementsov-Ogievskiy Error **errp) 1909b80ddf3Saliguori { 191ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1929b80ddf3Saliguori QCowExtension ext; 1939b80ddf3Saliguori uint64_t offset; 19475bab85cSKevin Wolf int ret; 19588ddffaeSVladimir Sementsov-Ogievskiy Qcow2BitmapHeaderExt bitmaps_ext; 19688ddffaeSVladimir Sementsov-Ogievskiy 19788ddffaeSVladimir Sementsov-Ogievskiy if (need_update_header != NULL) { 19888ddffaeSVladimir Sementsov-Ogievskiy *need_update_header = false; 19988ddffaeSVladimir Sementsov-Ogievskiy } 2009b80ddf3Saliguori 2019b80ddf3Saliguori #ifdef DEBUG_EXT 2027c80ab3fSJes Sorensen printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 2039b80ddf3Saliguori #endif 2049b80ddf3Saliguori offset = start_offset; 2059b80ddf3Saliguori while (offset < end_offset) { 2069b80ddf3Saliguori 2079b80ddf3Saliguori #ifdef DEBUG_EXT 2089b80ddf3Saliguori /* Sanity check */ 2099b80ddf3Saliguori if (offset > s->cluster_size) 2107c80ab3fSJes Sorensen printf("qcow2_read_extension: suspicious offset %lu\n", offset); 2119b80ddf3Saliguori 2129b2260cbSDong Xu Wang printf("attempting to read extended header in offset %lu\n", offset); 2139b80ddf3Saliguori #endif 2149b80ddf3Saliguori 215cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 2163ef6c40aSMax Reitz if (ret < 0) { 2173ef6c40aSMax Reitz error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 2183ef6c40aSMax Reitz "pread fail from offset %" PRIu64, offset); 2199b80ddf3Saliguori return 1; 2209b80ddf3Saliguori } 2213b698f52SPeter Maydell ext.magic = be32_to_cpu(ext.magic); 2223b698f52SPeter Maydell ext.len = be32_to_cpu(ext.len); 2239b80ddf3Saliguori offset += sizeof(ext); 2249b80ddf3Saliguori #ifdef DEBUG_EXT 2259b80ddf3Saliguori printf("ext.magic = 0x%x\n", ext.magic); 2269b80ddf3Saliguori #endif 2272ebafc85SKevin Wolf if (offset > end_offset || ext.len > end_offset - offset) { 2283ef6c40aSMax Reitz error_setg(errp, "Header extension too large"); 22964ca6aeeSKevin Wolf return -EINVAL; 23064ca6aeeSKevin Wolf } 23164ca6aeeSKevin Wolf 2329b80ddf3Saliguori switch (ext.magic) { 2337c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_END: 2349b80ddf3Saliguori return 0; 235f965509cSaliguori 2367c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_BACKING_FORMAT: 237f965509cSaliguori if (ext.len >= sizeof(bs->backing_format)) { 238521b2b5dSMax Reitz error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 239521b2b5dSMax Reitz " too large (>=%zu)", ext.len, 240521b2b5dSMax Reitz sizeof(bs->backing_format)); 241f965509cSaliguori return 2; 242f965509cSaliguori } 243cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 2443ef6c40aSMax Reitz if (ret < 0) { 2453ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 2463ef6c40aSMax Reitz "Could not read format name"); 247f965509cSaliguori return 3; 2483ef6c40aSMax Reitz } 249f965509cSaliguori bs->backing_format[ext.len] = '\0'; 250e4603fe1SKevin Wolf s->image_backing_format = g_strdup(bs->backing_format); 251f965509cSaliguori #ifdef DEBUG_EXT 252f965509cSaliguori printf("Qcow2: Got format extension %s\n", bs->backing_format); 253f965509cSaliguori #endif 254f965509cSaliguori break; 255f965509cSaliguori 256cfcc4c62SKevin Wolf case QCOW2_EXT_MAGIC_FEATURE_TABLE: 257cfcc4c62SKevin Wolf if (p_feature_table != NULL) { 258cfcc4c62SKevin Wolf void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 259cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 260cfcc4c62SKevin Wolf if (ret < 0) { 2613ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 2623ef6c40aSMax Reitz "Could not read table"); 263cfcc4c62SKevin Wolf return ret; 264cfcc4c62SKevin Wolf } 265cfcc4c62SKevin Wolf 266cfcc4c62SKevin Wolf *p_feature_table = feature_table; 267cfcc4c62SKevin Wolf } 268cfcc4c62SKevin Wolf break; 269cfcc4c62SKevin Wolf 2704652b8f3SDaniel P. Berrange case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 2714652b8f3SDaniel P. Berrange unsigned int cflags = 0; 2724652b8f3SDaniel P. Berrange if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 2734652b8f3SDaniel P. Berrange error_setg(errp, "CRYPTO header extension only " 2744652b8f3SDaniel P. Berrange "expected with LUKS encryption method"); 2754652b8f3SDaniel P. Berrange return -EINVAL; 2764652b8f3SDaniel P. Berrange } 2774652b8f3SDaniel P. Berrange if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 2784652b8f3SDaniel P. Berrange error_setg(errp, "CRYPTO header extension size %u, " 2794652b8f3SDaniel P. Berrange "but expected size %zu", ext.len, 2804652b8f3SDaniel P. Berrange sizeof(Qcow2CryptoHeaderExtension)); 2814652b8f3SDaniel P. Berrange return -EINVAL; 2824652b8f3SDaniel P. Berrange } 2834652b8f3SDaniel P. Berrange 2844652b8f3SDaniel P. Berrange ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); 2854652b8f3SDaniel P. Berrange if (ret < 0) { 2864652b8f3SDaniel P. Berrange error_setg_errno(errp, -ret, 2874652b8f3SDaniel P. Berrange "Unable to read CRYPTO header extension"); 2884652b8f3SDaniel P. Berrange return ret; 2894652b8f3SDaniel P. Berrange } 2903b698f52SPeter Maydell s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 2913b698f52SPeter Maydell s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 2924652b8f3SDaniel P. Berrange 2934652b8f3SDaniel P. Berrange if ((s->crypto_header.offset % s->cluster_size) != 0) { 2944652b8f3SDaniel P. Berrange error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 2954652b8f3SDaniel P. Berrange "not a multiple of cluster size '%u'", 2964652b8f3SDaniel P. Berrange s->crypto_header.offset, s->cluster_size); 2974652b8f3SDaniel P. Berrange return -EINVAL; 2984652b8f3SDaniel P. Berrange } 2994652b8f3SDaniel P. Berrange 3004652b8f3SDaniel P. Berrange if (flags & BDRV_O_NO_IO) { 3014652b8f3SDaniel P. Berrange cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 3024652b8f3SDaniel P. Berrange } 3031cd9a787SDaniel P. Berrange s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 3044652b8f3SDaniel P. Berrange qcow2_crypto_hdr_read_func, 305c972fa12SVladimir Sementsov-Ogievskiy bs, cflags, 1, errp); 3064652b8f3SDaniel P. Berrange if (!s->crypto) { 3074652b8f3SDaniel P. Berrange return -EINVAL; 3084652b8f3SDaniel P. Berrange } 3094652b8f3SDaniel P. Berrange } break; 3104652b8f3SDaniel P. Berrange 31188ddffaeSVladimir Sementsov-Ogievskiy case QCOW2_EXT_MAGIC_BITMAPS: 31288ddffaeSVladimir Sementsov-Ogievskiy if (ext.len != sizeof(bitmaps_ext)) { 31388ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 31488ddffaeSVladimir Sementsov-Ogievskiy "Invalid extension length"); 31588ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 31688ddffaeSVladimir Sementsov-Ogievskiy } 31788ddffaeSVladimir Sementsov-Ogievskiy 31888ddffaeSVladimir Sementsov-Ogievskiy if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 319c9ceb3ecSMax Reitz if (s->qcow_version < 3) { 320c9ceb3ecSMax Reitz /* Let's be a bit more specific */ 321c9ceb3ecSMax Reitz warn_report("This qcow2 v2 image contains bitmaps, but " 322c9ceb3ecSMax Reitz "they may have been modified by a program " 323c9ceb3ecSMax Reitz "without persistent bitmap support; so now " 324c9ceb3ecSMax Reitz "they must all be considered inconsistent"); 325c9ceb3ecSMax Reitz } else { 32655d527a9SAlistair Francis warn_report("a program lacking bitmap support " 32788ddffaeSVladimir Sementsov-Ogievskiy "modified this file, so all bitmaps are now " 32855d527a9SAlistair Francis "considered inconsistent"); 329c9ceb3ecSMax Reitz } 33055d527a9SAlistair Francis error_printf("Some clusters may be leaked, " 33155d527a9SAlistair Francis "run 'qemu-img check -r' on the image " 33288ddffaeSVladimir Sementsov-Ogievskiy "file to fix."); 33388ddffaeSVladimir Sementsov-Ogievskiy if (need_update_header != NULL) { 33488ddffaeSVladimir Sementsov-Ogievskiy /* Updating is needed to drop invalid bitmap extension. */ 33588ddffaeSVladimir Sementsov-Ogievskiy *need_update_header = true; 33688ddffaeSVladimir Sementsov-Ogievskiy } 33788ddffaeSVladimir Sementsov-Ogievskiy break; 33888ddffaeSVladimir Sementsov-Ogievskiy } 33988ddffaeSVladimir Sementsov-Ogievskiy 34088ddffaeSVladimir Sementsov-Ogievskiy ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); 34188ddffaeSVladimir Sementsov-Ogievskiy if (ret < 0) { 34288ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 34388ddffaeSVladimir Sementsov-Ogievskiy "Could not read ext header"); 34488ddffaeSVladimir Sementsov-Ogievskiy return ret; 34588ddffaeSVladimir Sementsov-Ogievskiy } 34688ddffaeSVladimir Sementsov-Ogievskiy 34788ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.reserved32 != 0) { 34888ddffaeSVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret, "bitmaps_ext: " 34988ddffaeSVladimir Sementsov-Ogievskiy "Reserved field is not zero"); 35088ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 35188ddffaeSVladimir Sementsov-Ogievskiy } 35288ddffaeSVladimir Sementsov-Ogievskiy 3533b698f52SPeter Maydell bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps); 3543b698f52SPeter Maydell bitmaps_ext.bitmap_directory_size = 3553b698f52SPeter Maydell be64_to_cpu(bitmaps_ext.bitmap_directory_size); 3563b698f52SPeter Maydell bitmaps_ext.bitmap_directory_offset = 3573b698f52SPeter Maydell be64_to_cpu(bitmaps_ext.bitmap_directory_offset); 35888ddffaeSVladimir Sementsov-Ogievskiy 35988ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 36088ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, 36188ddffaeSVladimir Sementsov-Ogievskiy "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 36288ddffaeSVladimir Sementsov-Ogievskiy "exceeding the QEMU supported maximum of %d", 36388ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 36488ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 36588ddffaeSVladimir Sementsov-Ogievskiy } 36688ddffaeSVladimir Sementsov-Ogievskiy 36788ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.nb_bitmaps == 0) { 36888ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "found bitmaps extension with zero bitmaps"); 36988ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 37088ddffaeSVladimir Sementsov-Ogievskiy } 37188ddffaeSVladimir Sementsov-Ogievskiy 37288ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { 37388ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "bitmaps_ext: " 37488ddffaeSVladimir Sementsov-Ogievskiy "invalid bitmap directory offset"); 37588ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 37688ddffaeSVladimir Sementsov-Ogievskiy } 37788ddffaeSVladimir Sementsov-Ogievskiy 37888ddffaeSVladimir Sementsov-Ogievskiy if (bitmaps_ext.bitmap_directory_size > 37988ddffaeSVladimir Sementsov-Ogievskiy QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 38088ddffaeSVladimir Sementsov-Ogievskiy error_setg(errp, "bitmaps_ext: " 38188ddffaeSVladimir Sementsov-Ogievskiy "bitmap directory size (%" PRIu64 ") exceeds " 38288ddffaeSVladimir Sementsov-Ogievskiy "the maximum supported size (%d)", 38388ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_size, 38488ddffaeSVladimir Sementsov-Ogievskiy QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 38588ddffaeSVladimir Sementsov-Ogievskiy return -EINVAL; 38688ddffaeSVladimir Sementsov-Ogievskiy } 38788ddffaeSVladimir Sementsov-Ogievskiy 38888ddffaeSVladimir Sementsov-Ogievskiy s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 38988ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_offset = 39088ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_offset; 39188ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_size = 39288ddffaeSVladimir Sementsov-Ogievskiy bitmaps_ext.bitmap_directory_size; 39388ddffaeSVladimir Sementsov-Ogievskiy 39488ddffaeSVladimir Sementsov-Ogievskiy #ifdef DEBUG_EXT 39588ddffaeSVladimir Sementsov-Ogievskiy printf("Qcow2: Got bitmaps extension: " 39688ddffaeSVladimir Sementsov-Ogievskiy "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 39788ddffaeSVladimir Sementsov-Ogievskiy s->bitmap_directory_offset, s->nb_bitmaps); 39888ddffaeSVladimir Sementsov-Ogievskiy #endif 39988ddffaeSVladimir Sementsov-Ogievskiy break; 40088ddffaeSVladimir Sementsov-Ogievskiy 4019b890bdcSKevin Wolf case QCOW2_EXT_MAGIC_DATA_FILE: 4029b890bdcSKevin Wolf { 4039b890bdcSKevin Wolf s->image_data_file = g_malloc0(ext.len + 1); 4049b890bdcSKevin Wolf ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len); 4059b890bdcSKevin Wolf if (ret < 0) { 4069b890bdcSKevin Wolf error_setg_errno(errp, -ret, 4079b890bdcSKevin Wolf "ERROR: Could not read data file name"); 4089b890bdcSKevin Wolf return ret; 4099b890bdcSKevin Wolf } 4109b890bdcSKevin Wolf #ifdef DEBUG_EXT 4119b890bdcSKevin Wolf printf("Qcow2: Got external data file %s\n", s->image_data_file); 4129b890bdcSKevin Wolf #endif 4139b890bdcSKevin Wolf break; 4149b890bdcSKevin Wolf } 4159b890bdcSKevin Wolf 4169b80ddf3Saliguori default: 41775bab85cSKevin Wolf /* unknown magic - save it in case we need to rewrite the header */ 4184096974eSEric Blake /* If you add a new feature, make sure to also update the fast 4194096974eSEric Blake * path of qcow2_make_empty() to deal with it. */ 42075bab85cSKevin Wolf { 42175bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext; 42275bab85cSKevin Wolf 42375bab85cSKevin Wolf uext = g_malloc0(sizeof(*uext) + ext.len); 42475bab85cSKevin Wolf uext->magic = ext.magic; 42575bab85cSKevin Wolf uext->len = ext.len; 42675bab85cSKevin Wolf QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 42775bab85cSKevin Wolf 428cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 42975bab85cSKevin Wolf if (ret < 0) { 4303ef6c40aSMax Reitz error_setg_errno(errp, -ret, "ERROR: unknown extension: " 4313ef6c40aSMax Reitz "Could not read data"); 43275bab85cSKevin Wolf return ret; 43375bab85cSKevin Wolf } 43475bab85cSKevin Wolf } 4359b80ddf3Saliguori break; 4369b80ddf3Saliguori } 437fd29b4bbSKevin Wolf 438fd29b4bbSKevin Wolf offset += ((ext.len + 7) & ~7); 4399b80ddf3Saliguori } 4409b80ddf3Saliguori 4419b80ddf3Saliguori return 0; 4429b80ddf3Saliguori } 4439b80ddf3Saliguori 44475bab85cSKevin Wolf static void cleanup_unknown_header_ext(BlockDriverState *bs) 44575bab85cSKevin Wolf { 446ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 44775bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext, *next; 44875bab85cSKevin Wolf 44975bab85cSKevin Wolf QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 45075bab85cSKevin Wolf QLIST_REMOVE(uext, next); 45175bab85cSKevin Wolf g_free(uext); 45275bab85cSKevin Wolf } 45375bab85cSKevin Wolf } 4549b80ddf3Saliguori 455a55448b3SMax Reitz static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 456a55448b3SMax Reitz uint64_t mask) 457cfcc4c62SKevin Wolf { 45812ac6d3dSKevin Wolf char *features = g_strdup(""); 45912ac6d3dSKevin Wolf char *old; 46012ac6d3dSKevin Wolf 461cfcc4c62SKevin Wolf while (table && table->name[0] != '\0') { 462cfcc4c62SKevin Wolf if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 46312ac6d3dSKevin Wolf if (mask & (1ULL << table->bit)) { 46412ac6d3dSKevin Wolf old = features; 46512ac6d3dSKevin Wolf features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 46612ac6d3dSKevin Wolf table->name); 46712ac6d3dSKevin Wolf g_free(old); 46812ac6d3dSKevin Wolf mask &= ~(1ULL << table->bit); 469cfcc4c62SKevin Wolf } 470cfcc4c62SKevin Wolf } 471cfcc4c62SKevin Wolf table++; 472cfcc4c62SKevin Wolf } 473cfcc4c62SKevin Wolf 474cfcc4c62SKevin Wolf if (mask) { 47512ac6d3dSKevin Wolf old = features; 47612ac6d3dSKevin Wolf features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 47712ac6d3dSKevin Wolf old, *old ? ", " : "", mask); 47812ac6d3dSKevin Wolf g_free(old); 479cfcc4c62SKevin Wolf } 48012ac6d3dSKevin Wolf 481a55448b3SMax Reitz error_setg(errp, "Unsupported qcow2 feature(s): %s", features); 48212ac6d3dSKevin Wolf g_free(features); 483cfcc4c62SKevin Wolf } 484cfcc4c62SKevin Wolf 485c61d0004SStefan Hajnoczi /* 486bfe8043eSStefan Hajnoczi * Sets the dirty bit and flushes afterwards if necessary. 487bfe8043eSStefan Hajnoczi * 488bfe8043eSStefan Hajnoczi * The incompatible_features bit is only set if the image file header was 489bfe8043eSStefan Hajnoczi * updated successfully. Therefore it is not required to check the return 490bfe8043eSStefan Hajnoczi * value of this function. 491bfe8043eSStefan Hajnoczi */ 492280d3735SKevin Wolf int qcow2_mark_dirty(BlockDriverState *bs) 493bfe8043eSStefan Hajnoczi { 494ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 495bfe8043eSStefan Hajnoczi uint64_t val; 496bfe8043eSStefan Hajnoczi int ret; 497bfe8043eSStefan Hajnoczi 498bfe8043eSStefan Hajnoczi assert(s->qcow_version >= 3); 499bfe8043eSStefan Hajnoczi 500bfe8043eSStefan Hajnoczi if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 501bfe8043eSStefan Hajnoczi return 0; /* already dirty */ 502bfe8043eSStefan Hajnoczi } 503bfe8043eSStefan Hajnoczi 504bfe8043eSStefan Hajnoczi val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 505d9ca2ea2SKevin Wolf ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 506bfe8043eSStefan Hajnoczi &val, sizeof(val)); 507bfe8043eSStefan Hajnoczi if (ret < 0) { 508bfe8043eSStefan Hajnoczi return ret; 509bfe8043eSStefan Hajnoczi } 5109a4f4c31SKevin Wolf ret = bdrv_flush(bs->file->bs); 511bfe8043eSStefan Hajnoczi if (ret < 0) { 512bfe8043eSStefan Hajnoczi return ret; 513bfe8043eSStefan Hajnoczi } 514bfe8043eSStefan Hajnoczi 515bfe8043eSStefan Hajnoczi /* Only treat image as dirty if the header was updated successfully */ 516bfe8043eSStefan Hajnoczi s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 517bfe8043eSStefan Hajnoczi return 0; 518bfe8043eSStefan Hajnoczi } 519bfe8043eSStefan Hajnoczi 520bfe8043eSStefan Hajnoczi /* 521c61d0004SStefan Hajnoczi * Clears the dirty bit and flushes before if necessary. Only call this 522c61d0004SStefan Hajnoczi * function when there are no pending requests, it does not guard against 523c61d0004SStefan Hajnoczi * concurrent requests dirtying the image. 524c61d0004SStefan Hajnoczi */ 525c61d0004SStefan Hajnoczi static int qcow2_mark_clean(BlockDriverState *bs) 526c61d0004SStefan Hajnoczi { 527ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 528c61d0004SStefan Hajnoczi 529c61d0004SStefan Hajnoczi if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 5304c2e5f8fSKevin Wolf int ret; 5314c2e5f8fSKevin Wolf 5324c2e5f8fSKevin Wolf s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 5334c2e5f8fSKevin Wolf 5348b220eb7SPaolo Bonzini ret = qcow2_flush_caches(bs); 535c61d0004SStefan Hajnoczi if (ret < 0) { 536c61d0004SStefan Hajnoczi return ret; 537c61d0004SStefan Hajnoczi } 538c61d0004SStefan Hajnoczi 539c61d0004SStefan Hajnoczi return qcow2_update_header(bs); 540c61d0004SStefan Hajnoczi } 541c61d0004SStefan Hajnoczi return 0; 542c61d0004SStefan Hajnoczi } 543c61d0004SStefan Hajnoczi 54469c98726SMax Reitz /* 54569c98726SMax Reitz * Marks the image as corrupt. 54669c98726SMax Reitz */ 54769c98726SMax Reitz int qcow2_mark_corrupt(BlockDriverState *bs) 54869c98726SMax Reitz { 549ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 55069c98726SMax Reitz 55169c98726SMax Reitz s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 55269c98726SMax Reitz return qcow2_update_header(bs); 55369c98726SMax Reitz } 55469c98726SMax Reitz 55569c98726SMax Reitz /* 55669c98726SMax Reitz * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 55769c98726SMax Reitz * before if necessary. 55869c98726SMax Reitz */ 55969c98726SMax Reitz int qcow2_mark_consistent(BlockDriverState *bs) 56069c98726SMax Reitz { 561ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 56269c98726SMax Reitz 56369c98726SMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 5648b220eb7SPaolo Bonzini int ret = qcow2_flush_caches(bs); 56569c98726SMax Reitz if (ret < 0) { 56669c98726SMax Reitz return ret; 56769c98726SMax Reitz } 56869c98726SMax Reitz 56969c98726SMax Reitz s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 57069c98726SMax Reitz return qcow2_update_header(bs); 57169c98726SMax Reitz } 57269c98726SMax Reitz return 0; 57369c98726SMax Reitz } 57469c98726SMax Reitz 5752fd61638SPaolo Bonzini static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 5762fd61638SPaolo Bonzini BdrvCheckResult *result, 577acbe5982SStefan Hajnoczi BdrvCheckMode fix) 578acbe5982SStefan Hajnoczi { 579acbe5982SStefan Hajnoczi int ret = qcow2_check_refcounts(bs, result, fix); 580acbe5982SStefan Hajnoczi if (ret < 0) { 581acbe5982SStefan Hajnoczi return ret; 582acbe5982SStefan Hajnoczi } 583acbe5982SStefan Hajnoczi 584acbe5982SStefan Hajnoczi if (fix && result->check_errors == 0 && result->corruptions == 0) { 58524530f3eSMax Reitz ret = qcow2_mark_clean(bs); 58624530f3eSMax Reitz if (ret < 0) { 58724530f3eSMax Reitz return ret; 58824530f3eSMax Reitz } 58924530f3eSMax Reitz return qcow2_mark_consistent(bs); 590acbe5982SStefan Hajnoczi } 591acbe5982SStefan Hajnoczi return ret; 592acbe5982SStefan Hajnoczi } 593acbe5982SStefan Hajnoczi 5942fd61638SPaolo Bonzini static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 5952fd61638SPaolo Bonzini BdrvCheckResult *result, 5962fd61638SPaolo Bonzini BdrvCheckMode fix) 5972fd61638SPaolo Bonzini { 5982fd61638SPaolo Bonzini BDRVQcow2State *s = bs->opaque; 5992fd61638SPaolo Bonzini int ret; 6002fd61638SPaolo Bonzini 6012fd61638SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 6022fd61638SPaolo Bonzini ret = qcow2_co_check_locked(bs, result, fix); 6032fd61638SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 6042fd61638SPaolo Bonzini return ret; 6052fd61638SPaolo Bonzini } 6062fd61638SPaolo Bonzini 6070cf0e598SAlberto Garcia int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 6080cf0e598SAlberto Garcia uint64_t entries, size_t entry_len, 6090cf0e598SAlberto Garcia int64_t max_size_bytes, const char *table_name, 6100cf0e598SAlberto Garcia Error **errp) 6118c7de283SKevin Wolf { 612ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 6130cf0e598SAlberto Garcia 6140cf0e598SAlberto Garcia if (entries > max_size_bytes / entry_len) { 6150cf0e598SAlberto Garcia error_setg(errp, "%s too large", table_name); 6160cf0e598SAlberto Garcia return -EFBIG; 6170cf0e598SAlberto Garcia } 6188c7de283SKevin Wolf 6198c7de283SKevin Wolf /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 6208c7de283SKevin Wolf * because values will be passed to qemu functions taking int64_t. */ 6210cf0e598SAlberto Garcia if ((INT64_MAX - entries * entry_len < offset) || 6220cf0e598SAlberto Garcia (offset_into_cluster(s, offset) != 0)) { 6230cf0e598SAlberto Garcia error_setg(errp, "%s offset invalid", table_name); 6248c7de283SKevin Wolf return -EINVAL; 6258c7de283SKevin Wolf } 6268c7de283SKevin Wolf 6278c7de283SKevin Wolf return 0; 6288c7de283SKevin Wolf } 6298c7de283SKevin Wolf 6308a2ce0bcSAlberto Garcia static const char *const mutable_opts[] = { 6318a2ce0bcSAlberto Garcia QCOW2_OPT_LAZY_REFCOUNTS, 6328a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_REQUEST, 6338a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_SNAPSHOT, 6348a2ce0bcSAlberto Garcia QCOW2_OPT_DISCARD_OTHER, 6358a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP, 6368a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_TEMPLATE, 6378a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_MAIN_HEADER, 6388a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_ACTIVE_L1, 6398a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_ACTIVE_L2, 6408a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 6418a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 6428a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 6438a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_INACTIVE_L1, 6448a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_INACTIVE_L2, 6458a2ce0bcSAlberto Garcia QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 6468a2ce0bcSAlberto Garcia QCOW2_OPT_CACHE_SIZE, 6478a2ce0bcSAlberto Garcia QCOW2_OPT_L2_CACHE_SIZE, 6488a2ce0bcSAlberto Garcia QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 6498a2ce0bcSAlberto Garcia QCOW2_OPT_REFCOUNT_CACHE_SIZE, 6508a2ce0bcSAlberto Garcia QCOW2_OPT_CACHE_CLEAN_INTERVAL, 6518a2ce0bcSAlberto Garcia NULL 6528a2ce0bcSAlberto Garcia }; 6538a2ce0bcSAlberto Garcia 65474c4510aSKevin Wolf static QemuOptsList qcow2_runtime_opts = { 65574c4510aSKevin Wolf .name = "qcow2", 65674c4510aSKevin Wolf .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 65774c4510aSKevin Wolf .desc = { 65874c4510aSKevin Wolf { 65964aa99d3SKevin Wolf .name = QCOW2_OPT_LAZY_REFCOUNTS, 66074c4510aSKevin Wolf .type = QEMU_OPT_BOOL, 66174c4510aSKevin Wolf .help = "Postpone refcount updates", 66274c4510aSKevin Wolf }, 66367af674eSKevin Wolf { 66467af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_REQUEST, 66567af674eSKevin Wolf .type = QEMU_OPT_BOOL, 66667af674eSKevin Wolf .help = "Pass guest discard requests to the layer below", 66767af674eSKevin Wolf }, 66867af674eSKevin Wolf { 66967af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_SNAPSHOT, 67067af674eSKevin Wolf .type = QEMU_OPT_BOOL, 67167af674eSKevin Wolf .help = "Generate discard requests when snapshot related space " 67267af674eSKevin Wolf "is freed", 67367af674eSKevin Wolf }, 67467af674eSKevin Wolf { 67567af674eSKevin Wolf .name = QCOW2_OPT_DISCARD_OTHER, 67667af674eSKevin Wolf .type = QEMU_OPT_BOOL, 67767af674eSKevin Wolf .help = "Generate discard requests when other clusters are freed", 67867af674eSKevin Wolf }, 67905de7e86SMax Reitz { 68005de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP, 68105de7e86SMax Reitz .type = QEMU_OPT_STRING, 68205de7e86SMax Reitz .help = "Selects which overlap checks to perform from a range of " 68305de7e86SMax Reitz "templates (none, constant, cached, all)", 68405de7e86SMax Reitz }, 68505de7e86SMax Reitz { 686ee42b5ceSMax Reitz .name = QCOW2_OPT_OVERLAP_TEMPLATE, 687ee42b5ceSMax Reitz .type = QEMU_OPT_STRING, 688ee42b5ceSMax Reitz .help = "Selects which overlap checks to perform from a range of " 689ee42b5ceSMax Reitz "templates (none, constant, cached, all)", 690ee42b5ceSMax Reitz }, 691ee42b5ceSMax Reitz { 69205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 69305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 69405de7e86SMax Reitz .help = "Check for unintended writes into the main qcow2 header", 69505de7e86SMax Reitz }, 69605de7e86SMax Reitz { 69705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 69805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 69905de7e86SMax Reitz .help = "Check for unintended writes into the active L1 table", 70005de7e86SMax Reitz }, 70105de7e86SMax Reitz { 70205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 70305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 70405de7e86SMax Reitz .help = "Check for unintended writes into an active L2 table", 70505de7e86SMax Reitz }, 70605de7e86SMax Reitz { 70705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 70805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 70905de7e86SMax Reitz .help = "Check for unintended writes into the refcount table", 71005de7e86SMax Reitz }, 71105de7e86SMax Reitz { 71205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 71305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 71405de7e86SMax Reitz .help = "Check for unintended writes into a refcount block", 71505de7e86SMax Reitz }, 71605de7e86SMax Reitz { 71705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 71805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 71905de7e86SMax Reitz .help = "Check for unintended writes into the snapshot table", 72005de7e86SMax Reitz }, 72105de7e86SMax Reitz { 72205de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 72305de7e86SMax Reitz .type = QEMU_OPT_BOOL, 72405de7e86SMax Reitz .help = "Check for unintended writes into an inactive L1 table", 72505de7e86SMax Reitz }, 72605de7e86SMax Reitz { 72705de7e86SMax Reitz .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 72805de7e86SMax Reitz .type = QEMU_OPT_BOOL, 72905de7e86SMax Reitz .help = "Check for unintended writes into an inactive L2 table", 73005de7e86SMax Reitz }, 7316c1c8d5dSMax Reitz { 7320e4e4318SVladimir Sementsov-Ogievskiy .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 7330e4e4318SVladimir Sementsov-Ogievskiy .type = QEMU_OPT_BOOL, 7340e4e4318SVladimir Sementsov-Ogievskiy .help = "Check for unintended writes into the bitmap directory", 7350e4e4318SVladimir Sementsov-Ogievskiy }, 7360e4e4318SVladimir Sementsov-Ogievskiy { 7376c1c8d5dSMax Reitz .name = QCOW2_OPT_CACHE_SIZE, 7386c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 7396c1c8d5dSMax Reitz .help = "Maximum combined metadata (L2 tables and refcount blocks) " 7406c1c8d5dSMax Reitz "cache size", 7416c1c8d5dSMax Reitz }, 7426c1c8d5dSMax Reitz { 7436c1c8d5dSMax Reitz .name = QCOW2_OPT_L2_CACHE_SIZE, 7446c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 7456c1c8d5dSMax Reitz .help = "Maximum L2 table cache size", 7466c1c8d5dSMax Reitz }, 7476c1c8d5dSMax Reitz { 7481221fe6fSAlberto Garcia .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 7491221fe6fSAlberto Garcia .type = QEMU_OPT_SIZE, 7501221fe6fSAlberto Garcia .help = "Size of each entry in the L2 cache", 7511221fe6fSAlberto Garcia }, 7521221fe6fSAlberto Garcia { 7536c1c8d5dSMax Reitz .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 7546c1c8d5dSMax Reitz .type = QEMU_OPT_SIZE, 7556c1c8d5dSMax Reitz .help = "Maximum refcount block cache size", 7566c1c8d5dSMax Reitz }, 757279621c0SAlberto Garcia { 758279621c0SAlberto Garcia .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 759279621c0SAlberto Garcia .type = QEMU_OPT_NUMBER, 760279621c0SAlberto Garcia .help = "Clean unused cache entries after this time (in seconds)", 761279621c0SAlberto Garcia }, 7624652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 7634652b8f3SDaniel P. Berrange "ID of secret providing qcow2 AES key or LUKS passphrase"), 76474c4510aSKevin Wolf { /* end of list */ } 76574c4510aSKevin Wolf }, 76674c4510aSKevin Wolf }; 76774c4510aSKevin Wolf 7684092e99dSMax Reitz static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 7694092e99dSMax Reitz [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 7704092e99dSMax Reitz [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 7714092e99dSMax Reitz [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 7724092e99dSMax Reitz [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 7734092e99dSMax Reitz [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 7744092e99dSMax Reitz [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 7754092e99dSMax Reitz [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 7764092e99dSMax Reitz [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 7770e4e4318SVladimir Sementsov-Ogievskiy [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 7784092e99dSMax Reitz }; 7794092e99dSMax Reitz 780279621c0SAlberto Garcia static void cache_clean_timer_cb(void *opaque) 781279621c0SAlberto Garcia { 782279621c0SAlberto Garcia BlockDriverState *bs = opaque; 783ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 784b2f68bffSAlberto Garcia qcow2_cache_clean_unused(s->l2_table_cache); 785b2f68bffSAlberto Garcia qcow2_cache_clean_unused(s->refcount_block_cache); 786279621c0SAlberto Garcia timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 787279621c0SAlberto Garcia (int64_t) s->cache_clean_interval * 1000); 788279621c0SAlberto Garcia } 789279621c0SAlberto Garcia 790279621c0SAlberto Garcia static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 791279621c0SAlberto Garcia { 792ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 793279621c0SAlberto Garcia if (s->cache_clean_interval > 0) { 794279621c0SAlberto Garcia s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 795279621c0SAlberto Garcia SCALE_MS, cache_clean_timer_cb, 796279621c0SAlberto Garcia bs); 797279621c0SAlberto Garcia timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 798279621c0SAlberto Garcia (int64_t) s->cache_clean_interval * 1000); 799279621c0SAlberto Garcia } 800279621c0SAlberto Garcia } 801279621c0SAlberto Garcia 802279621c0SAlberto Garcia static void cache_clean_timer_del(BlockDriverState *bs) 803279621c0SAlberto Garcia { 804ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 805279621c0SAlberto Garcia if (s->cache_clean_timer) { 806279621c0SAlberto Garcia timer_del(s->cache_clean_timer); 807279621c0SAlberto Garcia timer_free(s->cache_clean_timer); 808279621c0SAlberto Garcia s->cache_clean_timer = NULL; 809279621c0SAlberto Garcia } 810279621c0SAlberto Garcia } 811279621c0SAlberto Garcia 812279621c0SAlberto Garcia static void qcow2_detach_aio_context(BlockDriverState *bs) 813279621c0SAlberto Garcia { 814279621c0SAlberto Garcia cache_clean_timer_del(bs); 815279621c0SAlberto Garcia } 816279621c0SAlberto Garcia 817279621c0SAlberto Garcia static void qcow2_attach_aio_context(BlockDriverState *bs, 818279621c0SAlberto Garcia AioContext *new_context) 819279621c0SAlberto Garcia { 820279621c0SAlberto Garcia cache_clean_timer_init(bs, new_context); 821279621c0SAlberto Garcia } 822279621c0SAlberto Garcia 823bc85ef26SMax Reitz static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 824bc85ef26SMax Reitz uint64_t *l2_cache_size, 8251221fe6fSAlberto Garcia uint64_t *l2_cache_entry_size, 8266c1c8d5dSMax Reitz uint64_t *refcount_cache_size, Error **errp) 8276c1c8d5dSMax Reitz { 828ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 829b749562dSLeonid Bloch uint64_t combined_cache_size, l2_cache_max_setting; 8306c1c8d5dSMax Reitz bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 831af39bd0dSAlberto Garcia bool l2_cache_entry_size_set; 8327af5eea9SAlberto Garcia int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 833b749562dSLeonid Bloch uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 834b749562dSLeonid Bloch uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); 8356c1c8d5dSMax Reitz 8366c1c8d5dSMax Reitz combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 8376c1c8d5dSMax Reitz l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 8386c1c8d5dSMax Reitz refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 839af39bd0dSAlberto Garcia l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE); 8406c1c8d5dSMax Reitz 8416c1c8d5dSMax Reitz combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 842b749562dSLeonid Bloch l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 843b749562dSLeonid Bloch DEFAULT_L2_CACHE_MAX_SIZE); 8446c1c8d5dSMax Reitz *refcount_cache_size = qemu_opt_get_size(opts, 8456c1c8d5dSMax Reitz QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 8466c1c8d5dSMax Reitz 8471221fe6fSAlberto Garcia *l2_cache_entry_size = qemu_opt_get_size( 8481221fe6fSAlberto Garcia opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 8491221fe6fSAlberto Garcia 850b749562dSLeonid Bloch *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); 851b749562dSLeonid Bloch 8526c1c8d5dSMax Reitz if (combined_cache_size_set) { 8536c1c8d5dSMax Reitz if (l2_cache_size_set && refcount_cache_size_set) { 8546c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 8556c1c8d5dSMax Reitz " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 856308999e9SLeonid Bloch "at the same time"); 8576c1c8d5dSMax Reitz return; 858b749562dSLeonid Bloch } else if (l2_cache_size_set && 859b749562dSLeonid Bloch (l2_cache_max_setting > combined_cache_size)) { 8606c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 8616c1c8d5dSMax Reitz QCOW2_OPT_CACHE_SIZE); 8626c1c8d5dSMax Reitz return; 8636c1c8d5dSMax Reitz } else if (*refcount_cache_size > combined_cache_size) { 8646c1c8d5dSMax Reitz error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 8656c1c8d5dSMax Reitz QCOW2_OPT_CACHE_SIZE); 8666c1c8d5dSMax Reitz return; 8676c1c8d5dSMax Reitz } 8686c1c8d5dSMax Reitz 8696c1c8d5dSMax Reitz if (l2_cache_size_set) { 8706c1c8d5dSMax Reitz *refcount_cache_size = combined_cache_size - *l2_cache_size; 8716c1c8d5dSMax Reitz } else if (refcount_cache_size_set) { 8726c1c8d5dSMax Reitz *l2_cache_size = combined_cache_size - *refcount_cache_size; 8736c1c8d5dSMax Reitz } else { 87452253998SAlberto Garcia /* Assign as much memory as possible to the L2 cache, and 87552253998SAlberto Garcia * use the remainder for the refcount cache */ 87652253998SAlberto Garcia if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 87752253998SAlberto Garcia *l2_cache_size = max_l2_cache; 87852253998SAlberto Garcia *refcount_cache_size = combined_cache_size - *l2_cache_size; 87952253998SAlberto Garcia } else { 88052253998SAlberto Garcia *refcount_cache_size = 88152253998SAlberto Garcia MIN(combined_cache_size, min_refcount_cache); 8826c1c8d5dSMax Reitz *l2_cache_size = combined_cache_size - *refcount_cache_size; 8836c1c8d5dSMax Reitz } 88452253998SAlberto Garcia } 8856c1c8d5dSMax Reitz } 886af39bd0dSAlberto Garcia 887af39bd0dSAlberto Garcia /* 888af39bd0dSAlberto Garcia * If the L2 cache is not enough to cover the whole disk then 889af39bd0dSAlberto Garcia * default to 4KB entries. Smaller entries reduce the cost of 890af39bd0dSAlberto Garcia * loads and evictions and increase I/O performance. 891af39bd0dSAlberto Garcia */ 892af39bd0dSAlberto Garcia if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) { 893af39bd0dSAlberto Garcia *l2_cache_entry_size = MIN(s->cluster_size, 4096); 894af39bd0dSAlberto Garcia } 895af39bd0dSAlberto Garcia 896657ada52SLeonid Bloch /* l2_cache_size and refcount_cache_size are ensured to have at least 897657ada52SLeonid Bloch * their minimum values in qcow2_update_options_prepare() */ 8981221fe6fSAlberto Garcia 8991221fe6fSAlberto Garcia if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 9001221fe6fSAlberto Garcia *l2_cache_entry_size > s->cluster_size || 9011221fe6fSAlberto Garcia !is_power_of_2(*l2_cache_entry_size)) { 9021221fe6fSAlberto Garcia error_setg(errp, "L2 cache entry size must be a power of two " 9031221fe6fSAlberto Garcia "between %d and the cluster size (%d)", 9041221fe6fSAlberto Garcia 1 << MIN_CLUSTER_BITS, s->cluster_size); 9051221fe6fSAlberto Garcia return; 9061221fe6fSAlberto Garcia } 9076c1c8d5dSMax Reitz } 9086c1c8d5dSMax Reitz 909ee55b173SKevin Wolf typedef struct Qcow2ReopenState { 910ee55b173SKevin Wolf Qcow2Cache *l2_table_cache; 911ee55b173SKevin Wolf Qcow2Cache *refcount_block_cache; 9123c2e511aSAlberto Garcia int l2_slice_size; /* Number of entries in a slice of the L2 table */ 913ee55b173SKevin Wolf bool use_lazy_refcounts; 914ee55b173SKevin Wolf int overlap_check; 915ee55b173SKevin Wolf bool discard_passthrough[QCOW2_DISCARD_MAX]; 916ee55b173SKevin Wolf uint64_t cache_clean_interval; 917b25b387fSDaniel P. Berrange QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 918ee55b173SKevin Wolf } Qcow2ReopenState; 919ee55b173SKevin Wolf 920ee55b173SKevin Wolf static int qcow2_update_options_prepare(BlockDriverState *bs, 921ee55b173SKevin Wolf Qcow2ReopenState *r, 922ee55b173SKevin Wolf QDict *options, int flags, 923ee55b173SKevin Wolf Error **errp) 9244c75d1a1SKevin Wolf { 9254c75d1a1SKevin Wolf BDRVQcow2State *s = bs->opaque; 92694edf3fbSKevin Wolf QemuOpts *opts = NULL; 9274c75d1a1SKevin Wolf const char *opt_overlap_check, *opt_overlap_check_template; 9284c75d1a1SKevin Wolf int overlap_check_template = 0; 9291221fe6fSAlberto Garcia uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 9304c75d1a1SKevin Wolf int i; 931b25b387fSDaniel P. Berrange const char *encryptfmt; 932b25b387fSDaniel P. Berrange QDict *encryptopts = NULL; 93394edf3fbSKevin Wolf Error *local_err = NULL; 9344c75d1a1SKevin Wolf int ret; 9354c75d1a1SKevin Wolf 936b25b387fSDaniel P. Berrange qdict_extract_subqdict(options, &encryptopts, "encrypt."); 937b25b387fSDaniel P. Berrange encryptfmt = qdict_get_try_str(encryptopts, "format"); 938b25b387fSDaniel P. Berrange 93994edf3fbSKevin Wolf opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 94094edf3fbSKevin Wolf qemu_opts_absorb_qdict(opts, options, &local_err); 94194edf3fbSKevin Wolf if (local_err) { 94294edf3fbSKevin Wolf error_propagate(errp, local_err); 94394edf3fbSKevin Wolf ret = -EINVAL; 94494edf3fbSKevin Wolf goto fail; 94594edf3fbSKevin Wolf } 94694edf3fbSKevin Wolf 94794edf3fbSKevin Wolf /* get L2 table/refcount block cache size from command line options */ 9481221fe6fSAlberto Garcia read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 9491221fe6fSAlberto Garcia &refcount_cache_size, &local_err); 95094edf3fbSKevin Wolf if (local_err) { 95194edf3fbSKevin Wolf error_propagate(errp, local_err); 95294edf3fbSKevin Wolf ret = -EINVAL; 95394edf3fbSKevin Wolf goto fail; 95494edf3fbSKevin Wolf } 95594edf3fbSKevin Wolf 9561221fe6fSAlberto Garcia l2_cache_size /= l2_cache_entry_size; 95794edf3fbSKevin Wolf if (l2_cache_size < MIN_L2_CACHE_SIZE) { 95894edf3fbSKevin Wolf l2_cache_size = MIN_L2_CACHE_SIZE; 95994edf3fbSKevin Wolf } 96094edf3fbSKevin Wolf if (l2_cache_size > INT_MAX) { 96194edf3fbSKevin Wolf error_setg(errp, "L2 cache size too big"); 96294edf3fbSKevin Wolf ret = -EINVAL; 96394edf3fbSKevin Wolf goto fail; 96494edf3fbSKevin Wolf } 96594edf3fbSKevin Wolf 96694edf3fbSKevin Wolf refcount_cache_size /= s->cluster_size; 96794edf3fbSKevin Wolf if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 96894edf3fbSKevin Wolf refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 96994edf3fbSKevin Wolf } 97094edf3fbSKevin Wolf if (refcount_cache_size > INT_MAX) { 97194edf3fbSKevin Wolf error_setg(errp, "Refcount cache size too big"); 97294edf3fbSKevin Wolf ret = -EINVAL; 97394edf3fbSKevin Wolf goto fail; 97494edf3fbSKevin Wolf } 97594edf3fbSKevin Wolf 9765b0959a7SKevin Wolf /* alloc new L2 table/refcount block cache, flush old one */ 9775b0959a7SKevin Wolf if (s->l2_table_cache) { 9785b0959a7SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 9795b0959a7SKevin Wolf if (ret) { 9805b0959a7SKevin Wolf error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 9815b0959a7SKevin Wolf goto fail; 9825b0959a7SKevin Wolf } 9835b0959a7SKevin Wolf } 9845b0959a7SKevin Wolf 9855b0959a7SKevin Wolf if (s->refcount_block_cache) { 9865b0959a7SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 9875b0959a7SKevin Wolf if (ret) { 9885b0959a7SKevin Wolf error_setg_errno(errp, -ret, 9895b0959a7SKevin Wolf "Failed to flush the refcount block cache"); 9905b0959a7SKevin Wolf goto fail; 9915b0959a7SKevin Wolf } 9925b0959a7SKevin Wolf } 9935b0959a7SKevin Wolf 9941221fe6fSAlberto Garcia r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); 9951221fe6fSAlberto Garcia r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 9961221fe6fSAlberto Garcia l2_cache_entry_size); 9971221fe6fSAlberto Garcia r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 9981221fe6fSAlberto Garcia s->cluster_size); 999ee55b173SKevin Wolf if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 100094edf3fbSKevin Wolf error_setg(errp, "Could not allocate metadata caches"); 100194edf3fbSKevin Wolf ret = -ENOMEM; 100294edf3fbSKevin Wolf goto fail; 100394edf3fbSKevin Wolf } 100494edf3fbSKevin Wolf 100594edf3fbSKevin Wolf /* New interval for cache cleanup timer */ 1006ee55b173SKevin Wolf r->cache_clean_interval = 10075b0959a7SKevin Wolf qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 1008e957b50bSLeonid Bloch DEFAULT_CACHE_CLEAN_INTERVAL); 100991203f08SAlberto Garcia #ifndef CONFIG_LINUX 101091203f08SAlberto Garcia if (r->cache_clean_interval != 0) { 101191203f08SAlberto Garcia error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 101291203f08SAlberto Garcia " not supported on this host"); 101391203f08SAlberto Garcia ret = -EINVAL; 101491203f08SAlberto Garcia goto fail; 101591203f08SAlberto Garcia } 101691203f08SAlberto Garcia #endif 1017ee55b173SKevin Wolf if (r->cache_clean_interval > UINT_MAX) { 101894edf3fbSKevin Wolf error_setg(errp, "Cache clean interval too big"); 101994edf3fbSKevin Wolf ret = -EINVAL; 102094edf3fbSKevin Wolf goto fail; 102194edf3fbSKevin Wolf } 102294edf3fbSKevin Wolf 10235b0959a7SKevin Wolf /* lazy-refcounts; flush if going from enabled to disabled */ 1024ee55b173SKevin Wolf r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 10254c75d1a1SKevin Wolf (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 1026ee55b173SKevin Wolf if (r->use_lazy_refcounts && s->qcow_version < 3) { 1027007dbc39SKevin Wolf error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 1028007dbc39SKevin Wolf "qemu 1.1 compatibility level"); 1029007dbc39SKevin Wolf ret = -EINVAL; 1030007dbc39SKevin Wolf goto fail; 1031007dbc39SKevin Wolf } 10324c75d1a1SKevin Wolf 10335b0959a7SKevin Wolf if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 10345b0959a7SKevin Wolf ret = qcow2_mark_clean(bs); 10355b0959a7SKevin Wolf if (ret < 0) { 10365b0959a7SKevin Wolf error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 10375b0959a7SKevin Wolf goto fail; 10385b0959a7SKevin Wolf } 10395b0959a7SKevin Wolf } 10405b0959a7SKevin Wolf 1041007dbc39SKevin Wolf /* Overlap check options */ 10424c75d1a1SKevin Wolf opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 10434c75d1a1SKevin Wolf opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 10444c75d1a1SKevin Wolf if (opt_overlap_check_template && opt_overlap_check && 10454c75d1a1SKevin Wolf strcmp(opt_overlap_check_template, opt_overlap_check)) 10464c75d1a1SKevin Wolf { 10474c75d1a1SKevin Wolf error_setg(errp, "Conflicting values for qcow2 options '" 10484c75d1a1SKevin Wolf QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 10494c75d1a1SKevin Wolf "' ('%s')", opt_overlap_check, opt_overlap_check_template); 10504c75d1a1SKevin Wolf ret = -EINVAL; 10514c75d1a1SKevin Wolf goto fail; 10524c75d1a1SKevin Wolf } 10534c75d1a1SKevin Wolf if (!opt_overlap_check) { 10544c75d1a1SKevin Wolf opt_overlap_check = opt_overlap_check_template ?: "cached"; 10554c75d1a1SKevin Wolf } 10564c75d1a1SKevin Wolf 10574c75d1a1SKevin Wolf if (!strcmp(opt_overlap_check, "none")) { 10584c75d1a1SKevin Wolf overlap_check_template = 0; 10594c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "constant")) { 10604c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_CONSTANT; 10614c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "cached")) { 10624c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_CACHED; 10634c75d1a1SKevin Wolf } else if (!strcmp(opt_overlap_check, "all")) { 10644c75d1a1SKevin Wolf overlap_check_template = QCOW2_OL_ALL; 10654c75d1a1SKevin Wolf } else { 10664c75d1a1SKevin Wolf error_setg(errp, "Unsupported value '%s' for qcow2 option " 10674c75d1a1SKevin Wolf "'overlap-check'. Allowed are any of the following: " 10684c75d1a1SKevin Wolf "none, constant, cached, all", opt_overlap_check); 10694c75d1a1SKevin Wolf ret = -EINVAL; 10704c75d1a1SKevin Wolf goto fail; 10714c75d1a1SKevin Wolf } 10724c75d1a1SKevin Wolf 1073ee55b173SKevin Wolf r->overlap_check = 0; 10744c75d1a1SKevin Wolf for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 10754c75d1a1SKevin Wolf /* overlap-check defines a template bitmask, but every flag may be 10764c75d1a1SKevin Wolf * overwritten through the associated boolean option */ 1077ee55b173SKevin Wolf r->overlap_check |= 10784c75d1a1SKevin Wolf qemu_opt_get_bool(opts, overlap_bool_option_names[i], 10794c75d1a1SKevin Wolf overlap_check_template & (1 << i)) << i; 10804c75d1a1SKevin Wolf } 10814c75d1a1SKevin Wolf 1082ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1083ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1084ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1085007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1086007dbc39SKevin Wolf flags & BDRV_O_UNMAP); 1087ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1088007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1089ee55b173SKevin Wolf r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1090007dbc39SKevin Wolf qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1091007dbc39SKevin Wolf 1092b25b387fSDaniel P. Berrange switch (s->crypt_method_header) { 1093b25b387fSDaniel P. Berrange case QCOW_CRYPT_NONE: 1094b25b387fSDaniel P. Berrange if (encryptfmt) { 1095b25b387fSDaniel P. Berrange error_setg(errp, "No encryption in image header, but options " 1096b25b387fSDaniel P. Berrange "specified format '%s'", encryptfmt); 1097b25b387fSDaniel P. Berrange ret = -EINVAL; 1098b25b387fSDaniel P. Berrange goto fail; 1099b25b387fSDaniel P. Berrange } 1100b25b387fSDaniel P. Berrange break; 1101b25b387fSDaniel P. Berrange 1102b25b387fSDaniel P. Berrange case QCOW_CRYPT_AES: 1103b25b387fSDaniel P. Berrange if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1104b25b387fSDaniel P. Berrange error_setg(errp, 1105b25b387fSDaniel P. Berrange "Header reported 'aes' encryption format but " 1106b25b387fSDaniel P. Berrange "options specify '%s'", encryptfmt); 1107b25b387fSDaniel P. Berrange ret = -EINVAL; 1108b25b387fSDaniel P. Berrange goto fail; 1109b25b387fSDaniel P. Berrange } 1110796d3239SMarkus Armbruster qdict_put_str(encryptopts, "format", "qcow"); 1111796d3239SMarkus Armbruster r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1112b25b387fSDaniel P. Berrange break; 1113b25b387fSDaniel P. Berrange 11144652b8f3SDaniel P. Berrange case QCOW_CRYPT_LUKS: 11154652b8f3SDaniel P. Berrange if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 11164652b8f3SDaniel P. Berrange error_setg(errp, 11174652b8f3SDaniel P. Berrange "Header reported 'luks' encryption format but " 11184652b8f3SDaniel P. Berrange "options specify '%s'", encryptfmt); 11194652b8f3SDaniel P. Berrange ret = -EINVAL; 11204652b8f3SDaniel P. Berrange goto fail; 11214652b8f3SDaniel P. Berrange } 1122796d3239SMarkus Armbruster qdict_put_str(encryptopts, "format", "luks"); 1123796d3239SMarkus Armbruster r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 11244652b8f3SDaniel P. Berrange break; 11254652b8f3SDaniel P. Berrange 1126b25b387fSDaniel P. Berrange default: 1127b25b387fSDaniel P. Berrange error_setg(errp, "Unsupported encryption method %d", 1128b25b387fSDaniel P. Berrange s->crypt_method_header); 1129b25b387fSDaniel P. Berrange break; 1130b25b387fSDaniel P. Berrange } 1131b25b387fSDaniel P. Berrange if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { 1132b25b387fSDaniel P. Berrange ret = -EINVAL; 1133b25b387fSDaniel P. Berrange goto fail; 1134b25b387fSDaniel P. Berrange } 1135b25b387fSDaniel P. Berrange 11364c75d1a1SKevin Wolf ret = 0; 11374c75d1a1SKevin Wolf fail: 1138cb3e7f08SMarc-André Lureau qobject_unref(encryptopts); 113994edf3fbSKevin Wolf qemu_opts_del(opts); 114094edf3fbSKevin Wolf opts = NULL; 1141ee55b173SKevin Wolf return ret; 1142ee55b173SKevin Wolf } 1143ee55b173SKevin Wolf 1144ee55b173SKevin Wolf static void qcow2_update_options_commit(BlockDriverState *bs, 1145ee55b173SKevin Wolf Qcow2ReopenState *r) 1146ee55b173SKevin Wolf { 1147ee55b173SKevin Wolf BDRVQcow2State *s = bs->opaque; 1148ee55b173SKevin Wolf int i; 1149ee55b173SKevin Wolf 11505b0959a7SKevin Wolf if (s->l2_table_cache) { 1151e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 11525b0959a7SKevin Wolf } 11535b0959a7SKevin Wolf if (s->refcount_block_cache) { 1154e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 11555b0959a7SKevin Wolf } 1156ee55b173SKevin Wolf s->l2_table_cache = r->l2_table_cache; 1157ee55b173SKevin Wolf s->refcount_block_cache = r->refcount_block_cache; 11583c2e511aSAlberto Garcia s->l2_slice_size = r->l2_slice_size; 1159ee55b173SKevin Wolf 1160ee55b173SKevin Wolf s->overlap_check = r->overlap_check; 1161ee55b173SKevin Wolf s->use_lazy_refcounts = r->use_lazy_refcounts; 1162ee55b173SKevin Wolf 1163ee55b173SKevin Wolf for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1164ee55b173SKevin Wolf s->discard_passthrough[i] = r->discard_passthrough[i]; 1165ee55b173SKevin Wolf } 1166ee55b173SKevin Wolf 11675b0959a7SKevin Wolf if (s->cache_clean_interval != r->cache_clean_interval) { 11685b0959a7SKevin Wolf cache_clean_timer_del(bs); 1169ee55b173SKevin Wolf s->cache_clean_interval = r->cache_clean_interval; 1170ee55b173SKevin Wolf cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1171ee55b173SKevin Wolf } 1172b25b387fSDaniel P. Berrange 1173b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1174b25b387fSDaniel P. Berrange s->crypto_opts = r->crypto_opts; 11755b0959a7SKevin Wolf } 1176ee55b173SKevin Wolf 1177ee55b173SKevin Wolf static void qcow2_update_options_abort(BlockDriverState *bs, 1178ee55b173SKevin Wolf Qcow2ReopenState *r) 1179ee55b173SKevin Wolf { 1180ee55b173SKevin Wolf if (r->l2_table_cache) { 1181e64d4072SAlberto Garcia qcow2_cache_destroy(r->l2_table_cache); 1182ee55b173SKevin Wolf } 1183ee55b173SKevin Wolf if (r->refcount_block_cache) { 1184e64d4072SAlberto Garcia qcow2_cache_destroy(r->refcount_block_cache); 1185ee55b173SKevin Wolf } 1186b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1187ee55b173SKevin Wolf } 1188ee55b173SKevin Wolf 1189ee55b173SKevin Wolf static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1190ee55b173SKevin Wolf int flags, Error **errp) 1191ee55b173SKevin Wolf { 1192ee55b173SKevin Wolf Qcow2ReopenState r = {}; 1193ee55b173SKevin Wolf int ret; 1194ee55b173SKevin Wolf 1195ee55b173SKevin Wolf ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1196ee55b173SKevin Wolf if (ret >= 0) { 1197ee55b173SKevin Wolf qcow2_update_options_commit(bs, &r); 1198ee55b173SKevin Wolf } else { 1199ee55b173SKevin Wolf qcow2_update_options_abort(bs, &r); 1200ee55b173SKevin Wolf } 120194edf3fbSKevin Wolf 12024c75d1a1SKevin Wolf return ret; 12034c75d1a1SKevin Wolf } 12044c75d1a1SKevin Wolf 12051fafcd93SPaolo Bonzini /* Called with s->lock held. */ 12061fafcd93SPaolo Bonzini static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 12071fafcd93SPaolo Bonzini int flags, Error **errp) 1208585f8587Sbellard { 1209ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 12106d33e8e7SKevin Wolf unsigned int len, i; 12116d33e8e7SKevin Wolf int ret = 0; 1212585f8587Sbellard QCowHeader header; 121374c4510aSKevin Wolf Error *local_err = NULL; 12149b80ddf3Saliguori uint64_t ext_end; 12152cf7cfa1SKevin Wolf uint64_t l1_vm_state_index; 121688ddffaeSVladimir Sementsov-Ogievskiy bool update_header = false; 1217585f8587Sbellard 1218cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 12196d85a57eSJes Sorensen if (ret < 0) { 12203ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1221585f8587Sbellard goto fail; 12226d85a57eSJes Sorensen } 12233b698f52SPeter Maydell header.magic = be32_to_cpu(header.magic); 12243b698f52SPeter Maydell header.version = be32_to_cpu(header.version); 12253b698f52SPeter Maydell header.backing_file_offset = be64_to_cpu(header.backing_file_offset); 12263b698f52SPeter Maydell header.backing_file_size = be32_to_cpu(header.backing_file_size); 12273b698f52SPeter Maydell header.size = be64_to_cpu(header.size); 12283b698f52SPeter Maydell header.cluster_bits = be32_to_cpu(header.cluster_bits); 12293b698f52SPeter Maydell header.crypt_method = be32_to_cpu(header.crypt_method); 12303b698f52SPeter Maydell header.l1_table_offset = be64_to_cpu(header.l1_table_offset); 12313b698f52SPeter Maydell header.l1_size = be32_to_cpu(header.l1_size); 12323b698f52SPeter Maydell header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset); 12333b698f52SPeter Maydell header.refcount_table_clusters = 12343b698f52SPeter Maydell be32_to_cpu(header.refcount_table_clusters); 12353b698f52SPeter Maydell header.snapshots_offset = be64_to_cpu(header.snapshots_offset); 12363b698f52SPeter Maydell header.nb_snapshots = be32_to_cpu(header.nb_snapshots); 1237585f8587Sbellard 1238e8cdcec1SKevin Wolf if (header.magic != QCOW_MAGIC) { 12393ef6c40aSMax Reitz error_setg(errp, "Image is not in qcow2 format"); 124076abe407SPaolo Bonzini ret = -EINVAL; 1241585f8587Sbellard goto fail; 12426d85a57eSJes Sorensen } 12436744cbabSKevin Wolf if (header.version < 2 || header.version > 3) { 1244a55448b3SMax Reitz error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1245e8cdcec1SKevin Wolf ret = -ENOTSUP; 1246e8cdcec1SKevin Wolf goto fail; 1247e8cdcec1SKevin Wolf } 12486744cbabSKevin Wolf 12496744cbabSKevin Wolf s->qcow_version = header.version; 12506744cbabSKevin Wolf 125124342f2cSKevin Wolf /* Initialise cluster size */ 125224342f2cSKevin Wolf if (header.cluster_bits < MIN_CLUSTER_BITS || 125324342f2cSKevin Wolf header.cluster_bits > MAX_CLUSTER_BITS) { 1254521b2b5dSMax Reitz error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1255521b2b5dSMax Reitz header.cluster_bits); 125624342f2cSKevin Wolf ret = -EINVAL; 125724342f2cSKevin Wolf goto fail; 125824342f2cSKevin Wolf } 125924342f2cSKevin Wolf 126024342f2cSKevin Wolf s->cluster_bits = header.cluster_bits; 126124342f2cSKevin Wolf s->cluster_size = 1 << s->cluster_bits; 1262a35f87f5SAlberto Garcia s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); 126324342f2cSKevin Wolf 12646744cbabSKevin Wolf /* Initialise version 3 header fields */ 12656744cbabSKevin Wolf if (header.version == 2) { 12666744cbabSKevin Wolf header.incompatible_features = 0; 12676744cbabSKevin Wolf header.compatible_features = 0; 12686744cbabSKevin Wolf header.autoclear_features = 0; 12696744cbabSKevin Wolf header.refcount_order = 4; 12706744cbabSKevin Wolf header.header_length = 72; 12716744cbabSKevin Wolf } else { 12723b698f52SPeter Maydell header.incompatible_features = 12733b698f52SPeter Maydell be64_to_cpu(header.incompatible_features); 12743b698f52SPeter Maydell header.compatible_features = be64_to_cpu(header.compatible_features); 12753b698f52SPeter Maydell header.autoclear_features = be64_to_cpu(header.autoclear_features); 12763b698f52SPeter Maydell header.refcount_order = be32_to_cpu(header.refcount_order); 12773b698f52SPeter Maydell header.header_length = be32_to_cpu(header.header_length); 127824342f2cSKevin Wolf 127924342f2cSKevin Wolf if (header.header_length < 104) { 128024342f2cSKevin Wolf error_setg(errp, "qcow2 header too short"); 128124342f2cSKevin Wolf ret = -EINVAL; 128224342f2cSKevin Wolf goto fail; 128324342f2cSKevin Wolf } 128424342f2cSKevin Wolf } 128524342f2cSKevin Wolf 128624342f2cSKevin Wolf if (header.header_length > s->cluster_size) { 128724342f2cSKevin Wolf error_setg(errp, "qcow2 header exceeds cluster size"); 128824342f2cSKevin Wolf ret = -EINVAL; 128924342f2cSKevin Wolf goto fail; 12906744cbabSKevin Wolf } 12916744cbabSKevin Wolf 12926744cbabSKevin Wolf if (header.header_length > sizeof(header)) { 12936744cbabSKevin Wolf s->unknown_header_fields_size = header.header_length - sizeof(header); 12946744cbabSKevin Wolf s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 1295cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 12966744cbabSKevin Wolf s->unknown_header_fields_size); 12976744cbabSKevin Wolf if (ret < 0) { 12983ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 12993ef6c40aSMax Reitz "fields"); 13006744cbabSKevin Wolf goto fail; 13016744cbabSKevin Wolf } 13026744cbabSKevin Wolf } 13036744cbabSKevin Wolf 1304a1b3955cSKevin Wolf if (header.backing_file_offset > s->cluster_size) { 1305a1b3955cSKevin Wolf error_setg(errp, "Invalid backing file offset"); 1306a1b3955cSKevin Wolf ret = -EINVAL; 1307a1b3955cSKevin Wolf goto fail; 1308a1b3955cSKevin Wolf } 1309a1b3955cSKevin Wolf 1310cfcc4c62SKevin Wolf if (header.backing_file_offset) { 1311cfcc4c62SKevin Wolf ext_end = header.backing_file_offset; 1312cfcc4c62SKevin Wolf } else { 1313cfcc4c62SKevin Wolf ext_end = 1 << header.cluster_bits; 1314cfcc4c62SKevin Wolf } 1315cfcc4c62SKevin Wolf 13166744cbabSKevin Wolf /* Handle feature bits */ 13176744cbabSKevin Wolf s->incompatible_features = header.incompatible_features; 13186744cbabSKevin Wolf s->compatible_features = header.compatible_features; 13196744cbabSKevin Wolf s->autoclear_features = header.autoclear_features; 13206744cbabSKevin Wolf 1321c61d0004SStefan Hajnoczi if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1322cfcc4c62SKevin Wolf void *feature_table = NULL; 1323cfcc4c62SKevin Wolf qcow2_read_extensions(bs, header.header_length, ext_end, 132488ddffaeSVladimir Sementsov-Ogievskiy &feature_table, flags, NULL, NULL); 1325a55448b3SMax Reitz report_unsupported_feature(errp, feature_table, 1326c61d0004SStefan Hajnoczi s->incompatible_features & 1327c61d0004SStefan Hajnoczi ~QCOW2_INCOMPAT_MASK); 13286744cbabSKevin Wolf ret = -ENOTSUP; 1329c5a33ee9SPrasad Joshi g_free(feature_table); 13306744cbabSKevin Wolf goto fail; 13316744cbabSKevin Wolf } 13326744cbabSKevin Wolf 133369c98726SMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 133469c98726SMax Reitz /* Corrupt images may not be written to unless they are being repaired 133569c98726SMax Reitz */ 133669c98726SMax Reitz if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 13373ef6c40aSMax Reitz error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 13383ef6c40aSMax Reitz "read/write"); 133969c98726SMax Reitz ret = -EACCES; 134069c98726SMax Reitz goto fail; 134169c98726SMax Reitz } 134269c98726SMax Reitz } 134369c98726SMax Reitz 13446744cbabSKevin Wolf /* Check support for various header values */ 1345b72faf9fSMax Reitz if (header.refcount_order > 6) { 1346b72faf9fSMax Reitz error_setg(errp, "Reference count entry width too large; may not " 1347b72faf9fSMax Reitz "exceed 64 bits"); 1348b72faf9fSMax Reitz ret = -EINVAL; 13496744cbabSKevin Wolf goto fail; 13506744cbabSKevin Wolf } 1351b6481f37SMax Reitz s->refcount_order = header.refcount_order; 1352346a53dfSMax Reitz s->refcount_bits = 1 << s->refcount_order; 1353346a53dfSMax Reitz s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1354346a53dfSMax Reitz s->refcount_max += s->refcount_max - 1; 13556744cbabSKevin Wolf 1356585f8587Sbellard s->crypt_method_header = header.crypt_method; 13576d85a57eSJes Sorensen if (s->crypt_method_header) { 1358e6ff69bfSDaniel P. Berrange if (bdrv_uses_whitelist() && 1359e6ff69bfSDaniel P. Berrange s->crypt_method_header == QCOW_CRYPT_AES) { 13608c0dcbc4SDaniel P. Berrange error_setg(errp, 13618c0dcbc4SDaniel P. Berrange "Use of AES-CBC encrypted qcow2 images is no longer " 13628c0dcbc4SDaniel P. Berrange "supported in system emulators"); 13638c0dcbc4SDaniel P. Berrange error_append_hint(errp, 13648c0dcbc4SDaniel P. Berrange "You can use 'qemu-img convert' to convert your " 13658c0dcbc4SDaniel P. Berrange "image to an alternative supported format, such " 13668c0dcbc4SDaniel P. Berrange "as unencrypted qcow2, or raw with the LUKS " 13678c0dcbc4SDaniel P. Berrange "format instead.\n"); 13688c0dcbc4SDaniel P. Berrange ret = -ENOSYS; 13698c0dcbc4SDaniel P. Berrange goto fail; 1370e6ff69bfSDaniel P. Berrange } 1371e6ff69bfSDaniel P. Berrange 13724652b8f3SDaniel P. Berrange if (s->crypt_method_header == QCOW_CRYPT_AES) { 13734652b8f3SDaniel P. Berrange s->crypt_physical_offset = false; 13744652b8f3SDaniel P. Berrange } else { 13754652b8f3SDaniel P. Berrange /* Assuming LUKS and any future crypt methods we 13764652b8f3SDaniel P. Berrange * add will all use physical offsets, due to the 13774652b8f3SDaniel P. Berrange * fact that the alternative is insecure... */ 13784652b8f3SDaniel P. Berrange s->crypt_physical_offset = true; 13794652b8f3SDaniel P. Berrange } 13804652b8f3SDaniel P. Berrange 138154115412SEric Blake bs->encrypted = true; 13826d85a57eSJes Sorensen } 138324342f2cSKevin Wolf 1384585f8587Sbellard s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 1385585f8587Sbellard s->l2_size = 1 << s->l2_bits; 13861d13d654SMax Reitz /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 13871d13d654SMax Reitz s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 13881d13d654SMax Reitz s->refcount_block_size = 1 << s->refcount_block_bits; 1389bd016b91SLeonid Bloch bs->total_sectors = header.size / BDRV_SECTOR_SIZE; 1390585f8587Sbellard s->csize_shift = (62 - (s->cluster_bits - 8)); 1391585f8587Sbellard s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1392585f8587Sbellard s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 13935dab2fadSKevin Wolf 1394585f8587Sbellard s->refcount_table_offset = header.refcount_table_offset; 1395585f8587Sbellard s->refcount_table_size = 1396585f8587Sbellard header.refcount_table_clusters << (s->cluster_bits - 3); 1397585f8587Sbellard 1398951053a9SAlberto Garcia if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1399951053a9SAlberto Garcia error_setg(errp, "Image does not contain a reference count table"); 1400951053a9SAlberto Garcia ret = -EINVAL; 1401951053a9SAlberto Garcia goto fail; 1402951053a9SAlberto Garcia } 1403951053a9SAlberto Garcia 14040cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, s->refcount_table_offset, 14050cf0e598SAlberto Garcia header.refcount_table_clusters, 14060cf0e598SAlberto Garcia s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 14070cf0e598SAlberto Garcia "Reference count table", errp); 14088c7de283SKevin Wolf if (ret < 0) { 14098c7de283SKevin Wolf goto fail; 14108c7de283SKevin Wolf } 14118c7de283SKevin Wolf 14120cf0e598SAlberto Garcia /* The total size in bytes of the snapshot table is checked in 14130cf0e598SAlberto Garcia * qcow2_read_snapshots() because the size of each snapshot is 14140cf0e598SAlberto Garcia * variable and we don't know it yet. 14150cf0e598SAlberto Garcia * Here we only check the offset and number of snapshots. */ 14160cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, header.snapshots_offset, 1417ce48f2f4SKevin Wolf header.nb_snapshots, 14180cf0e598SAlberto Garcia sizeof(QCowSnapshotHeader), 14190cf0e598SAlberto Garcia sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, 14200cf0e598SAlberto Garcia "Snapshot table", errp); 1421ce48f2f4SKevin Wolf if (ret < 0) { 1422ce48f2f4SKevin Wolf goto fail; 1423ce48f2f4SKevin Wolf } 1424ce48f2f4SKevin Wolf 1425585f8587Sbellard /* read the level 1 table */ 14260cf0e598SAlberto Garcia ret = qcow2_validate_table(bs, header.l1_table_offset, 14270cf0e598SAlberto Garcia header.l1_size, sizeof(uint64_t), 14280cf0e598SAlberto Garcia QCOW_MAX_L1_SIZE, "Active L1 table", errp); 14290cf0e598SAlberto Garcia if (ret < 0) { 14302d51c32cSKevin Wolf goto fail; 14312d51c32cSKevin Wolf } 1432585f8587Sbellard s->l1_size = header.l1_size; 14330cf0e598SAlberto Garcia s->l1_table_offset = header.l1_table_offset; 14342cf7cfa1SKevin Wolf 14352cf7cfa1SKevin Wolf l1_vm_state_index = size_to_l1(s, header.size); 14362cf7cfa1SKevin Wolf if (l1_vm_state_index > INT_MAX) { 14373ef6c40aSMax Reitz error_setg(errp, "Image is too big"); 14382cf7cfa1SKevin Wolf ret = -EFBIG; 14392cf7cfa1SKevin Wolf goto fail; 14402cf7cfa1SKevin Wolf } 14412cf7cfa1SKevin Wolf s->l1_vm_state_index = l1_vm_state_index; 14422cf7cfa1SKevin Wolf 1443585f8587Sbellard /* the L1 table must contain at least enough entries to put 1444585f8587Sbellard header.size bytes */ 14456d85a57eSJes Sorensen if (s->l1_size < s->l1_vm_state_index) { 14463ef6c40aSMax Reitz error_setg(errp, "L1 table is too small"); 14476d85a57eSJes Sorensen ret = -EINVAL; 1448585f8587Sbellard goto fail; 14496d85a57eSJes Sorensen } 14502d51c32cSKevin Wolf 1451d191d12dSStefan Weil if (s->l1_size > 0) { 14529a4f4c31SKevin Wolf s->l1_table = qemu_try_blockalign(bs->file->bs, 14539e029689SAlberto Garcia ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); 1454de82815dSKevin Wolf if (s->l1_table == NULL) { 1455de82815dSKevin Wolf error_setg(errp, "Could not allocate L1 table"); 1456de82815dSKevin Wolf ret = -ENOMEM; 1457de82815dSKevin Wolf goto fail; 1458de82815dSKevin Wolf } 1459cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 14606d85a57eSJes Sorensen s->l1_size * sizeof(uint64_t)); 14616d85a57eSJes Sorensen if (ret < 0) { 14623ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read L1 table"); 1463585f8587Sbellard goto fail; 14646d85a57eSJes Sorensen } 1465585f8587Sbellard for(i = 0;i < s->l1_size; i++) { 14663b698f52SPeter Maydell s->l1_table[i] = be64_to_cpu(s->l1_table[i]); 1467585f8587Sbellard } 1468d191d12dSStefan Weil } 146929c1a730SKevin Wolf 147094edf3fbSKevin Wolf /* Parse driver-specific options */ 147194edf3fbSKevin Wolf ret = qcow2_update_options(bs, options, flags, errp); 147290efa0eaSKevin Wolf if (ret < 0) { 147390efa0eaSKevin Wolf goto fail; 147490efa0eaSKevin Wolf } 147590efa0eaSKevin Wolf 147606d9260fSAnthony Liguori s->flags = flags; 1477585f8587Sbellard 14786d85a57eSJes Sorensen ret = qcow2_refcount_init(bs); 14796d85a57eSJes Sorensen if (ret != 0) { 14803ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1481585f8587Sbellard goto fail; 14826d85a57eSJes Sorensen } 1483585f8587Sbellard 148472cf2d4fSBlue Swirl QLIST_INIT(&s->cluster_allocs); 14850b919faeSKevin Wolf QTAILQ_INIT(&s->discards); 1486f214978aSKevin Wolf 14879b80ddf3Saliguori /* read qcow2 extensions */ 14883ef6c40aSMax Reitz if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 148988ddffaeSVladimir Sementsov-Ogievskiy flags, &update_header, &local_err)) { 14903ef6c40aSMax Reitz error_propagate(errp, local_err); 14916d85a57eSJes Sorensen ret = -EINVAL; 14929b80ddf3Saliguori goto fail; 14936d85a57eSJes Sorensen } 14949b80ddf3Saliguori 14950e8c08beSKevin Wolf /* Open external data file */ 14960e8c08beSKevin Wolf s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file, 14970e8c08beSKevin Wolf true, &local_err); 14980e8c08beSKevin Wolf if (local_err) { 14990e8c08beSKevin Wolf error_propagate(errp, local_err); 15000e8c08beSKevin Wolf ret = -EINVAL; 15010e8c08beSKevin Wolf goto fail; 15020e8c08beSKevin Wolf } 15030e8c08beSKevin Wolf 15040e8c08beSKevin Wolf if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { 15059b890bdcSKevin Wolf if (!s->data_file && s->image_data_file) { 15069b890bdcSKevin Wolf s->data_file = bdrv_open_child(s->image_data_file, options, 15079b890bdcSKevin Wolf "data-file", bs, &child_file, 15089b890bdcSKevin Wolf false, errp); 15099b890bdcSKevin Wolf if (!s->data_file) { 15109b890bdcSKevin Wolf ret = -EINVAL; 15119b890bdcSKevin Wolf goto fail; 15129b890bdcSKevin Wolf } 15139b890bdcSKevin Wolf } 15140e8c08beSKevin Wolf if (!s->data_file) { 15150e8c08beSKevin Wolf error_setg(errp, "'data-file' is required for this image"); 15160e8c08beSKevin Wolf ret = -EINVAL; 15170e8c08beSKevin Wolf goto fail; 15180e8c08beSKevin Wolf } 15190e8c08beSKevin Wolf } else { 15200e8c08beSKevin Wolf if (s->data_file) { 15210e8c08beSKevin Wolf error_setg(errp, "'data-file' can only be set for images with an " 15220e8c08beSKevin Wolf "external data file"); 15230e8c08beSKevin Wolf ret = -EINVAL; 15240e8c08beSKevin Wolf goto fail; 15256c3944dcSKevin Wolf } 15266c3944dcSKevin Wolf 152793c24936SKevin Wolf s->data_file = bs->file; 15286c3944dcSKevin Wolf 15296c3944dcSKevin Wolf if (data_file_is_raw(bs)) { 15306c3944dcSKevin Wolf error_setg(errp, "data-file-raw requires a data file"); 15316c3944dcSKevin Wolf ret = -EINVAL; 15326c3944dcSKevin Wolf goto fail; 15330e8c08beSKevin Wolf } 15340e8c08beSKevin Wolf } 153593c24936SKevin Wolf 15364652b8f3SDaniel P. Berrange /* qcow2_read_extension may have set up the crypto context 15374652b8f3SDaniel P. Berrange * if the crypt method needs a header region, some methods 15384652b8f3SDaniel P. Berrange * don't need header extensions, so must check here 15394652b8f3SDaniel P. Berrange */ 15404652b8f3SDaniel P. Berrange if (s->crypt_method_header && !s->crypto) { 1541b25b387fSDaniel P. Berrange if (s->crypt_method_header == QCOW_CRYPT_AES) { 1542b25b387fSDaniel P. Berrange unsigned int cflags = 0; 1543b25b387fSDaniel P. Berrange if (flags & BDRV_O_NO_IO) { 1544b25b387fSDaniel P. Berrange cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1545b25b387fSDaniel P. Berrange } 15461cd9a787SDaniel P. Berrange s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 1547c972fa12SVladimir Sementsov-Ogievskiy NULL, NULL, cflags, 1, errp); 1548b25b387fSDaniel P. Berrange if (!s->crypto) { 1549b25b387fSDaniel P. Berrange ret = -EINVAL; 1550b25b387fSDaniel P. Berrange goto fail; 1551b25b387fSDaniel P. Berrange } 15524652b8f3SDaniel P. Berrange } else if (!(flags & BDRV_O_NO_IO)) { 15534652b8f3SDaniel P. Berrange error_setg(errp, "Missing CRYPTO header for crypt method %d", 15544652b8f3SDaniel P. Berrange s->crypt_method_header); 15554652b8f3SDaniel P. Berrange ret = -EINVAL; 15564652b8f3SDaniel P. Berrange goto fail; 15574652b8f3SDaniel P. Berrange } 1558b25b387fSDaniel P. Berrange } 1559b25b387fSDaniel P. Berrange 1560585f8587Sbellard /* read the backing file name */ 1561585f8587Sbellard if (header.backing_file_offset != 0) { 1562585f8587Sbellard len = header.backing_file_size; 15639a29e18fSJeff Cody if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1564e729fa6aSJeff Cody len >= sizeof(bs->backing_file)) { 15656d33e8e7SKevin Wolf error_setg(errp, "Backing file name too long"); 15666d33e8e7SKevin Wolf ret = -EINVAL; 15676d33e8e7SKevin Wolf goto fail; 15686d85a57eSJes Sorensen } 1569cf2ab8fcSKevin Wolf ret = bdrv_pread(bs->file, header.backing_file_offset, 1570998c2019SMax Reitz bs->auto_backing_file, len); 15716d85a57eSJes Sorensen if (ret < 0) { 15723ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read backing file name"); 1573585f8587Sbellard goto fail; 15746d85a57eSJes Sorensen } 1575998c2019SMax Reitz bs->auto_backing_file[len] = '\0'; 1576998c2019SMax Reitz pstrcpy(bs->backing_file, sizeof(bs->backing_file), 1577998c2019SMax Reitz bs->auto_backing_file); 1578998c2019SMax Reitz s->image_backing_file = g_strdup(bs->auto_backing_file); 1579585f8587Sbellard } 158042deb29fSKevin Wolf 158111b128f4SKevin Wolf /* Internal snapshots */ 158211b128f4SKevin Wolf s->snapshots_offset = header.snapshots_offset; 158311b128f4SKevin Wolf s->nb_snapshots = header.nb_snapshots; 158411b128f4SKevin Wolf 158542deb29fSKevin Wolf ret = qcow2_read_snapshots(bs); 158642deb29fSKevin Wolf if (ret < 0) { 15873ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not read snapshots"); 1588585f8587Sbellard goto fail; 15896d85a57eSJes Sorensen } 1590585f8587Sbellard 1591af7b708dSStefan Hajnoczi /* Clear unknown autoclear feature bits */ 159288ddffaeSVladimir Sementsov-Ogievskiy update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1593d1258dd0SVladimir Sementsov-Ogievskiy update_header = 1594d1258dd0SVladimir Sementsov-Ogievskiy update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); 1595d1258dd0SVladimir Sementsov-Ogievskiy if (update_header) { 159688ddffaeSVladimir Sementsov-Ogievskiy s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1597d1258dd0SVladimir Sementsov-Ogievskiy } 1598d1258dd0SVladimir Sementsov-Ogievskiy 15999c98f145SVladimir Sementsov-Ogievskiy /* == Handle persistent dirty bitmaps == 16009c98f145SVladimir Sementsov-Ogievskiy * 16019c98f145SVladimir Sementsov-Ogievskiy * We want load dirty bitmaps in three cases: 16029c98f145SVladimir Sementsov-Ogievskiy * 16039c98f145SVladimir Sementsov-Ogievskiy * 1. Normal open of the disk in active mode, not related to invalidation 16049c98f145SVladimir Sementsov-Ogievskiy * after migration. 16059c98f145SVladimir Sementsov-Ogievskiy * 16069c98f145SVladimir Sementsov-Ogievskiy * 2. Invalidation of the target vm after pre-copy phase of migration, if 16079c98f145SVladimir Sementsov-Ogievskiy * bitmaps are _not_ migrating through migration channel, i.e. 16089c98f145SVladimir Sementsov-Ogievskiy * 'dirty-bitmaps' capability is disabled. 16099c98f145SVladimir Sementsov-Ogievskiy * 16109c98f145SVladimir Sementsov-Ogievskiy * 3. Invalidation of source vm after failed or canceled migration. 16119c98f145SVladimir Sementsov-Ogievskiy * This is a very interesting case. There are two possible types of 16129c98f145SVladimir Sementsov-Ogievskiy * bitmaps: 16139c98f145SVladimir Sementsov-Ogievskiy * 16149c98f145SVladimir Sementsov-Ogievskiy * A. Stored on inactivation and removed. They should be loaded from the 16159c98f145SVladimir Sementsov-Ogievskiy * image. 16169c98f145SVladimir Sementsov-Ogievskiy * 16179c98f145SVladimir Sementsov-Ogievskiy * B. Not stored: not-persistent bitmaps and bitmaps, migrated through 16189c98f145SVladimir Sementsov-Ogievskiy * the migration channel (with dirty-bitmaps capability). 16199c98f145SVladimir Sementsov-Ogievskiy * 16209c98f145SVladimir Sementsov-Ogievskiy * On the other hand, there are two possible sub-cases: 16219c98f145SVladimir Sementsov-Ogievskiy * 16229c98f145SVladimir Sementsov-Ogievskiy * 3.1 disk was changed by somebody else while were inactive. In this 16239c98f145SVladimir Sementsov-Ogievskiy * case all in-RAM dirty bitmaps (both persistent and not) are 16249c98f145SVladimir Sementsov-Ogievskiy * definitely invalid. And we don't have any method to determine 16259c98f145SVladimir Sementsov-Ogievskiy * this. 16269c98f145SVladimir Sementsov-Ogievskiy * 16279c98f145SVladimir Sementsov-Ogievskiy * Simple and safe thing is to just drop all the bitmaps of type B on 16289c98f145SVladimir Sementsov-Ogievskiy * inactivation. But in this case we lose bitmaps in valid 4.2 case. 16299c98f145SVladimir Sementsov-Ogievskiy * 16309c98f145SVladimir Sementsov-Ogievskiy * On the other hand, resuming source vm, if disk was already changed 16319c98f145SVladimir Sementsov-Ogievskiy * is a bad thing anyway: not only bitmaps, the whole vm state is 16329c98f145SVladimir Sementsov-Ogievskiy * out of sync with disk. 16339c98f145SVladimir Sementsov-Ogievskiy * 16349c98f145SVladimir Sementsov-Ogievskiy * This means, that user or management tool, who for some reason 16359c98f145SVladimir Sementsov-Ogievskiy * decided to resume source vm, after disk was already changed by 16369c98f145SVladimir Sementsov-Ogievskiy * target vm, should at least drop all dirty bitmaps by hand. 16379c98f145SVladimir Sementsov-Ogievskiy * 16389c98f145SVladimir Sementsov-Ogievskiy * So, we can ignore this case for now, but TODO: "generation" 16399c98f145SVladimir Sementsov-Ogievskiy * extension for qcow2, to determine, that image was changed after 16409c98f145SVladimir Sementsov-Ogievskiy * last inactivation. And if it is changed, we will drop (or at least 16419c98f145SVladimir Sementsov-Ogievskiy * mark as 'invalid' all the bitmaps of type B, both persistent 16429c98f145SVladimir Sementsov-Ogievskiy * and not). 16439c98f145SVladimir Sementsov-Ogievskiy * 16449c98f145SVladimir Sementsov-Ogievskiy * 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved 16459c98f145SVladimir Sementsov-Ogievskiy * to disk ('dirty-bitmaps' capability disabled), or not saved 16469c98f145SVladimir Sementsov-Ogievskiy * ('dirty-bitmaps' capability enabled), but we don't need to care 16479c98f145SVladimir Sementsov-Ogievskiy * of: let's load bitmaps as always: stored bitmaps will be loaded, 16489c98f145SVladimir Sementsov-Ogievskiy * and not stored has flag IN_USE=1 in the image and will be skipped 16499c98f145SVladimir Sementsov-Ogievskiy * on loading. 16509c98f145SVladimir Sementsov-Ogievskiy * 16519c98f145SVladimir Sementsov-Ogievskiy * One remaining possible case when we don't want load bitmaps: 16529c98f145SVladimir Sementsov-Ogievskiy * 16539c98f145SVladimir Sementsov-Ogievskiy * 4. Open disk in inactive mode in target vm (bitmaps are migrating or 16549c98f145SVladimir Sementsov-Ogievskiy * will be loaded on invalidation, no needs try loading them before) 16559c98f145SVladimir Sementsov-Ogievskiy */ 16569c98f145SVladimir Sementsov-Ogievskiy 16579c98f145SVladimir Sementsov-Ogievskiy if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) { 16589c98f145SVladimir Sementsov-Ogievskiy /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */ 16599c98f145SVladimir Sementsov-Ogievskiy bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); 16609c98f145SVladimir Sementsov-Ogievskiy 16619c98f145SVladimir Sementsov-Ogievskiy update_header = update_header && !header_updated; 1662605bc8beSVladimir Sementsov-Ogievskiy } 1663d1258dd0SVladimir Sementsov-Ogievskiy if (local_err != NULL) { 1664d1258dd0SVladimir Sementsov-Ogievskiy error_propagate(errp, local_err); 1665d1258dd0SVladimir Sementsov-Ogievskiy ret = -EINVAL; 1666d1258dd0SVladimir Sementsov-Ogievskiy goto fail; 1667d1258dd0SVladimir Sementsov-Ogievskiy } 1668d1258dd0SVladimir Sementsov-Ogievskiy 1669d1258dd0SVladimir Sementsov-Ogievskiy if (update_header) { 1670af7b708dSStefan Hajnoczi ret = qcow2_update_header(bs); 1671af7b708dSStefan Hajnoczi if (ret < 0) { 16723ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1673af7b708dSStefan Hajnoczi goto fail; 1674af7b708dSStefan Hajnoczi } 1675af7b708dSStefan Hajnoczi } 1676af7b708dSStefan Hajnoczi 1677e24d813bSEric Blake bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; 167868d100e9SKevin Wolf 1679c61d0004SStefan Hajnoczi /* Repair image if dirty */ 168004c01a5cSKevin Wolf if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1681058f8f16SStefan Hajnoczi (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1682c61d0004SStefan Hajnoczi BdrvCheckResult result = {0}; 1683c61d0004SStefan Hajnoczi 16842fd61638SPaolo Bonzini ret = qcow2_co_check_locked(bs, &result, 16852fd61638SPaolo Bonzini BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1686791fff50SMax Reitz if (ret < 0 || result.check_errors) { 1687791fff50SMax Reitz if (ret >= 0) { 1688791fff50SMax Reitz ret = -EIO; 1689791fff50SMax Reitz } 16903ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not repair dirty image"); 1691c61d0004SStefan Hajnoczi goto fail; 1692c61d0004SStefan Hajnoczi } 1693c61d0004SStefan Hajnoczi } 1694c61d0004SStefan Hajnoczi 1695585f8587Sbellard #ifdef DEBUG_ALLOC 16966cbc3031SPhilipp Hahn { 16976cbc3031SPhilipp Hahn BdrvCheckResult result = {0}; 1698b35278f7SStefan Hajnoczi qcow2_check_refcounts(bs, &result, 0); 16996cbc3031SPhilipp Hahn } 1700585f8587Sbellard #endif 1701ceb029cdSVladimir Sementsov-Ogievskiy 1702ceb029cdSVladimir Sementsov-Ogievskiy qemu_co_queue_init(&s->compress_wait_queue); 1703ceb029cdSVladimir Sementsov-Ogievskiy 17046d85a57eSJes Sorensen return ret; 1705585f8587Sbellard 1706585f8587Sbellard fail: 17079b890bdcSKevin Wolf g_free(s->image_data_file); 17080e8c08beSKevin Wolf if (has_data_file(bs)) { 17090e8c08beSKevin Wolf bdrv_unref_child(bs, s->data_file); 17100e8c08beSKevin Wolf } 17116744cbabSKevin Wolf g_free(s->unknown_header_fields); 171275bab85cSKevin Wolf cleanup_unknown_header_ext(bs); 1713ed6ccf0fSKevin Wolf qcow2_free_snapshots(bs); 1714ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 1715de82815dSKevin Wolf qemu_vfree(s->l1_table); 1716cf93980eSMax Reitz /* else pre-write overlap checks in cache_destroy may crash */ 1717cf93980eSMax Reitz s->l1_table = NULL; 1718279621c0SAlberto Garcia cache_clean_timer_del(bs); 171929c1a730SKevin Wolf if (s->l2_table_cache) { 1720e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 172129c1a730SKevin Wolf } 1722c5a33ee9SPrasad Joshi if (s->refcount_block_cache) { 1723e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 1724c5a33ee9SPrasad Joshi } 1725b25b387fSDaniel P. Berrange qcrypto_block_free(s->crypto); 1726b25b387fSDaniel P. Berrange qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 17276d85a57eSJes Sorensen return ret; 1728585f8587Sbellard } 1729585f8587Sbellard 17301fafcd93SPaolo Bonzini typedef struct QCow2OpenCo { 17311fafcd93SPaolo Bonzini BlockDriverState *bs; 17321fafcd93SPaolo Bonzini QDict *options; 17331fafcd93SPaolo Bonzini int flags; 17341fafcd93SPaolo Bonzini Error **errp; 17351fafcd93SPaolo Bonzini int ret; 17361fafcd93SPaolo Bonzini } QCow2OpenCo; 17371fafcd93SPaolo Bonzini 17381fafcd93SPaolo Bonzini static void coroutine_fn qcow2_open_entry(void *opaque) 17391fafcd93SPaolo Bonzini { 17401fafcd93SPaolo Bonzini QCow2OpenCo *qoc = opaque; 17411fafcd93SPaolo Bonzini BDRVQcow2State *s = qoc->bs->opaque; 17421fafcd93SPaolo Bonzini 17431fafcd93SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 17441fafcd93SPaolo Bonzini qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); 17451fafcd93SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 17461fafcd93SPaolo Bonzini } 17471fafcd93SPaolo Bonzini 17484e4bf5c4SKevin Wolf static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 17494e4bf5c4SKevin Wolf Error **errp) 17504e4bf5c4SKevin Wolf { 17511fafcd93SPaolo Bonzini BDRVQcow2State *s = bs->opaque; 17521fafcd93SPaolo Bonzini QCow2OpenCo qoc = { 17531fafcd93SPaolo Bonzini .bs = bs, 17541fafcd93SPaolo Bonzini .options = options, 17551fafcd93SPaolo Bonzini .flags = flags, 17561fafcd93SPaolo Bonzini .errp = errp, 17571fafcd93SPaolo Bonzini .ret = -EINPROGRESS 17581fafcd93SPaolo Bonzini }; 17591fafcd93SPaolo Bonzini 17604e4bf5c4SKevin Wolf bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 17614e4bf5c4SKevin Wolf false, errp); 17624e4bf5c4SKevin Wolf if (!bs->file) { 17634e4bf5c4SKevin Wolf return -EINVAL; 17644e4bf5c4SKevin Wolf } 17654e4bf5c4SKevin Wolf 17661fafcd93SPaolo Bonzini /* Initialise locks */ 17671fafcd93SPaolo Bonzini qemu_co_mutex_init(&s->lock); 17681fafcd93SPaolo Bonzini 17691fafcd93SPaolo Bonzini if (qemu_in_coroutine()) { 17701fafcd93SPaolo Bonzini /* From bdrv_co_create. */ 17711fafcd93SPaolo Bonzini qcow2_open_entry(&qoc); 17721fafcd93SPaolo Bonzini } else { 17734720cbeeSKevin Wolf assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 17741fafcd93SPaolo Bonzini qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 17751fafcd93SPaolo Bonzini BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 17761fafcd93SPaolo Bonzini } 17771fafcd93SPaolo Bonzini return qoc.ret; 17784e4bf5c4SKevin Wolf } 17794e4bf5c4SKevin Wolf 17803baca891SKevin Wolf static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1781d34682cdSKevin Wolf { 1782ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1783d34682cdSKevin Wolf 1784a84178ccSEric Blake if (bs->encrypted) { 1785a84178ccSEric Blake /* Encryption works on a sector granularity */ 17866f8f015cSAlberto Garcia bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); 1787a84178ccSEric Blake } 1788cf081fcaSEric Blake bs->bl.pwrite_zeroes_alignment = s->cluster_size; 1789ecdbead6SEric Blake bs->bl.pdiscard_alignment = s->cluster_size; 1790d34682cdSKevin Wolf } 1791d34682cdSKevin Wolf 179221d82ac9SJeff Cody static int qcow2_reopen_prepare(BDRVReopenState *state, 179321d82ac9SJeff Cody BlockReopenQueue *queue, Error **errp) 179421d82ac9SJeff Cody { 17955b0959a7SKevin Wolf Qcow2ReopenState *r; 17964c2e5f8fSKevin Wolf int ret; 17974c2e5f8fSKevin Wolf 17985b0959a7SKevin Wolf r = g_new0(Qcow2ReopenState, 1); 17995b0959a7SKevin Wolf state->opaque = r; 18005b0959a7SKevin Wolf 18015b0959a7SKevin Wolf ret = qcow2_update_options_prepare(state->bs, r, state->options, 18025b0959a7SKevin Wolf state->flags, errp); 18035b0959a7SKevin Wolf if (ret < 0) { 18045b0959a7SKevin Wolf goto fail; 18055b0959a7SKevin Wolf } 18065b0959a7SKevin Wolf 18075b0959a7SKevin Wolf /* We need to write out any unwritten data if we reopen read-only. */ 18084c2e5f8fSKevin Wolf if ((state->flags & BDRV_O_RDWR) == 0) { 1809169b8793SVladimir Sementsov-Ogievskiy ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1810169b8793SVladimir Sementsov-Ogievskiy if (ret < 0) { 1811169b8793SVladimir Sementsov-Ogievskiy goto fail; 1812169b8793SVladimir Sementsov-Ogievskiy } 1813169b8793SVladimir Sementsov-Ogievskiy 18144c2e5f8fSKevin Wolf ret = bdrv_flush(state->bs); 18154c2e5f8fSKevin Wolf if (ret < 0) { 18165b0959a7SKevin Wolf goto fail; 18174c2e5f8fSKevin Wolf } 18184c2e5f8fSKevin Wolf 18194c2e5f8fSKevin Wolf ret = qcow2_mark_clean(state->bs); 18204c2e5f8fSKevin Wolf if (ret < 0) { 18215b0959a7SKevin Wolf goto fail; 18224c2e5f8fSKevin Wolf } 18234c2e5f8fSKevin Wolf } 18244c2e5f8fSKevin Wolf 182521d82ac9SJeff Cody return 0; 18265b0959a7SKevin Wolf 18275b0959a7SKevin Wolf fail: 18285b0959a7SKevin Wolf qcow2_update_options_abort(state->bs, r); 18295b0959a7SKevin Wolf g_free(r); 18305b0959a7SKevin Wolf return ret; 18315b0959a7SKevin Wolf } 18325b0959a7SKevin Wolf 18335b0959a7SKevin Wolf static void qcow2_reopen_commit(BDRVReopenState *state) 18345b0959a7SKevin Wolf { 18355b0959a7SKevin Wolf qcow2_update_options_commit(state->bs, state->opaque); 18365b0959a7SKevin Wolf g_free(state->opaque); 18375b0959a7SKevin Wolf } 18385b0959a7SKevin Wolf 18395b0959a7SKevin Wolf static void qcow2_reopen_abort(BDRVReopenState *state) 18405b0959a7SKevin Wolf { 18415b0959a7SKevin Wolf qcow2_update_options_abort(state->bs, state->opaque); 18425b0959a7SKevin Wolf g_free(state->opaque); 184321d82ac9SJeff Cody } 184421d82ac9SJeff Cody 18455365f44dSKevin Wolf static void qcow2_join_options(QDict *options, QDict *old_options) 18465365f44dSKevin Wolf { 18475365f44dSKevin Wolf bool has_new_overlap_template = 18485365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_OVERLAP) || 18495365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 18505365f44dSKevin Wolf bool has_new_total_cache_size = 18515365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 18525365f44dSKevin Wolf bool has_all_cache_options; 18535365f44dSKevin Wolf 18545365f44dSKevin Wolf /* New overlap template overrides all old overlap options */ 18555365f44dSKevin Wolf if (has_new_overlap_template) { 18565365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP); 18575365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 18585365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 18595365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 18605365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 18615365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 18625365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 18635365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 18645365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 18655365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 18665365f44dSKevin Wolf } 18675365f44dSKevin Wolf 18685365f44dSKevin Wolf /* New total cache size overrides all old options */ 18695365f44dSKevin Wolf if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 18705365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 18715365f44dSKevin Wolf qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 18725365f44dSKevin Wolf } 18735365f44dSKevin Wolf 18745365f44dSKevin Wolf qdict_join(options, old_options, false); 18755365f44dSKevin Wolf 18765365f44dSKevin Wolf /* 18775365f44dSKevin Wolf * If after merging all cache size options are set, an old total size is 18785365f44dSKevin Wolf * overwritten. Do keep all options, however, if all three are new. The 18795365f44dSKevin Wolf * resulting error message is what we want to happen. 18805365f44dSKevin Wolf */ 18815365f44dSKevin Wolf has_all_cache_options = 18825365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 18835365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 18845365f44dSKevin Wolf qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 18855365f44dSKevin Wolf 18865365f44dSKevin Wolf if (has_all_cache_options && !has_new_total_cache_size) { 18875365f44dSKevin Wolf qdict_del(options, QCOW2_OPT_CACHE_SIZE); 18885365f44dSKevin Wolf } 18895365f44dSKevin Wolf } 18905365f44dSKevin Wolf 1891a320fb04SEric Blake static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 1892a320fb04SEric Blake bool want_zero, 1893a320fb04SEric Blake int64_t offset, int64_t count, 1894a320fb04SEric Blake int64_t *pnum, int64_t *map, 1895a320fb04SEric Blake BlockDriverState **file) 1896585f8587Sbellard { 1897ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1898585f8587Sbellard uint64_t cluster_offset; 18994bc74be9SPaolo Bonzini int index_in_cluster, ret; 1900ecfe1863SKevin Wolf unsigned int bytes; 1901a320fb04SEric Blake int status = 0; 1902585f8587Sbellard 1903a320fb04SEric Blake bytes = MIN(INT_MAX, count); 1904f8a2e5e3SStefan Hajnoczi qemu_co_mutex_lock(&s->lock); 1905a320fb04SEric Blake ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); 1906f8a2e5e3SStefan Hajnoczi qemu_co_mutex_unlock(&s->lock); 19071c46efaaSKevin Wolf if (ret < 0) { 1908d663640cSPaolo Bonzini return ret; 19091c46efaaSKevin Wolf } 1910095a9c58Saliguori 1911a320fb04SEric Blake *pnum = bytes; 1912ecfe1863SKevin Wolf 191337be1403SKevin Wolf if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) && 1914b25b387fSDaniel P. Berrange !s->crypto) { 1915a320fb04SEric Blake index_in_cluster = offset & (s->cluster_size - 1); 1916a320fb04SEric Blake *map = cluster_offset | index_in_cluster; 191737be1403SKevin Wolf *file = s->data_file->bs; 1918a320fb04SEric Blake status |= BDRV_BLOCK_OFFSET_VALID; 19194bc74be9SPaolo Bonzini } 1920fdfab37dSEric Blake if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { 19214bc74be9SPaolo Bonzini status |= BDRV_BLOCK_ZERO; 19224bc74be9SPaolo Bonzini } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 19234bc74be9SPaolo Bonzini status |= BDRV_BLOCK_DATA; 19244bc74be9SPaolo Bonzini } 19254bc74be9SPaolo Bonzini return status; 1926585f8587Sbellard } 1927585f8587Sbellard 1928fd9fcd37SFam Zheng static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, 1929fd9fcd37SFam Zheng QCowL2Meta **pl2meta, 1930fd9fcd37SFam Zheng bool link_l2) 1931fd9fcd37SFam Zheng { 1932fd9fcd37SFam Zheng int ret = 0; 1933fd9fcd37SFam Zheng QCowL2Meta *l2meta = *pl2meta; 1934fd9fcd37SFam Zheng 1935fd9fcd37SFam Zheng while (l2meta != NULL) { 1936fd9fcd37SFam Zheng QCowL2Meta *next; 1937fd9fcd37SFam Zheng 1938354d930dSFam Zheng if (link_l2) { 1939fd9fcd37SFam Zheng ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1940fd9fcd37SFam Zheng if (ret) { 1941fd9fcd37SFam Zheng goto out; 1942fd9fcd37SFam Zheng } 19438b24cd14SKevin Wolf } else { 19448b24cd14SKevin Wolf qcow2_alloc_cluster_abort(bs, l2meta); 1945fd9fcd37SFam Zheng } 1946fd9fcd37SFam Zheng 1947fd9fcd37SFam Zheng /* Take the request off the list of running requests */ 1948fd9fcd37SFam Zheng if (l2meta->nb_clusters != 0) { 1949fd9fcd37SFam Zheng QLIST_REMOVE(l2meta, next_in_flight); 1950fd9fcd37SFam Zheng } 1951fd9fcd37SFam Zheng 1952fd9fcd37SFam Zheng qemu_co_queue_restart_all(&l2meta->dependent_requests); 1953fd9fcd37SFam Zheng 1954fd9fcd37SFam Zheng next = l2meta->next; 1955fd9fcd37SFam Zheng g_free(l2meta); 1956fd9fcd37SFam Zheng l2meta = next; 1957fd9fcd37SFam Zheng } 1958fd9fcd37SFam Zheng out: 1959fd9fcd37SFam Zheng *pl2meta = l2meta; 1960fd9fcd37SFam Zheng return ret; 1961fd9fcd37SFam Zheng } 1962fd9fcd37SFam Zheng 1963ecfe1863SKevin Wolf static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, 1964ecfe1863SKevin Wolf uint64_t bytes, QEMUIOVector *qiov, 1965ecfe1863SKevin Wolf int flags) 19661490791fSaliguori { 1967ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 1968546a7dc4SEdgar Kaziakhmedov int offset_in_cluster; 196968d100e9SKevin Wolf int ret; 1970ecfe1863SKevin Wolf unsigned int cur_bytes; /* number of bytes in current iteration */ 1971c2bdd990SFrediano Ziglio uint64_t cluster_offset = 0; 19723fc48d09SFrediano Ziglio uint64_t bytes_done = 0; 19733fc48d09SFrediano Ziglio QEMUIOVector hd_qiov; 19743fc48d09SFrediano Ziglio uint8_t *cluster_data = NULL; 1975585f8587Sbellard 19763fc48d09SFrediano Ziglio qemu_iovec_init(&hd_qiov, qiov->niov); 19773fc48d09SFrediano Ziglio 19783fc48d09SFrediano Ziglio qemu_co_mutex_lock(&s->lock); 19793fc48d09SFrediano Ziglio 1980ecfe1863SKevin Wolf while (bytes != 0) { 1981585f8587Sbellard 1982faf575c1SFrediano Ziglio /* prepare next request */ 1983ecfe1863SKevin Wolf cur_bytes = MIN(bytes, INT_MAX); 1984b25b387fSDaniel P. Berrange if (s->crypto) { 1985ecfe1863SKevin Wolf cur_bytes = MIN(cur_bytes, 1986ecfe1863SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1987bd28f835SKevin Wolf } 1988bd28f835SKevin Wolf 1989ecfe1863SKevin Wolf ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); 19901c46efaaSKevin Wolf if (ret < 0) { 19913fc48d09SFrediano Ziglio goto fail; 19921c46efaaSKevin Wolf } 19931c46efaaSKevin Wolf 1994ecfe1863SKevin Wolf offset_in_cluster = offset_into_cluster(s, offset); 1995585f8587Sbellard 19963fc48d09SFrediano Ziglio qemu_iovec_reset(&hd_qiov); 1997ecfe1863SKevin Wolf qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1998bd28f835SKevin Wolf 199968d000a3SKevin Wolf switch (ret) { 200068d000a3SKevin Wolf case QCOW2_CLUSTER_UNALLOCATED: 2001bd28f835SKevin Wolf 2002760e0063SKevin Wolf if (bs->backing) { 200366f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 200468d100e9SKevin Wolf qemu_co_mutex_unlock(&s->lock); 2005546a7dc4SEdgar Kaziakhmedov ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, 2006546a7dc4SEdgar Kaziakhmedov &hd_qiov, 0); 200768d100e9SKevin Wolf qemu_co_mutex_lock(&s->lock); 200868d100e9SKevin Wolf if (ret < 0) { 20093fc48d09SFrediano Ziglio goto fail; 20103ab4c7e9SKevin Wolf } 2011a9465922Sbellard } else { 2012585f8587Sbellard /* Note: in this case, no need to wait */ 2013ecfe1863SKevin Wolf qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 20141490791fSaliguori } 201568d000a3SKevin Wolf break; 201668d000a3SKevin Wolf 2017fdfab37dSEric Blake case QCOW2_CLUSTER_ZERO_PLAIN: 2018fdfab37dSEric Blake case QCOW2_CLUSTER_ZERO_ALLOC: 2019ecfe1863SKevin Wolf qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 20206377af48SKevin Wolf break; 20216377af48SKevin Wolf 202268d000a3SKevin Wolf case QCOW2_CLUSTER_COMPRESSED: 2023c3c10f72SVladimir Sementsov-Ogievskiy qemu_co_mutex_unlock(&s->lock); 2024c3c10f72SVladimir Sementsov-Ogievskiy ret = qcow2_co_preadv_compressed(bs, cluster_offset, 2025c3c10f72SVladimir Sementsov-Ogievskiy offset, cur_bytes, 2026c3c10f72SVladimir Sementsov-Ogievskiy &hd_qiov); 2027c3c10f72SVladimir Sementsov-Ogievskiy qemu_co_mutex_lock(&s->lock); 20288af36488SKevin Wolf if (ret < 0) { 20293fc48d09SFrediano Ziglio goto fail; 20308af36488SKevin Wolf } 2031bd28f835SKevin Wolf 203268d000a3SKevin Wolf break; 203368d000a3SKevin Wolf 203468d000a3SKevin Wolf case QCOW2_CLUSTER_NORMAL: 2035c2bdd990SFrediano Ziglio if ((cluster_offset & 511) != 0) { 20363fc48d09SFrediano Ziglio ret = -EIO; 20373fc48d09SFrediano Ziglio goto fail; 2038585f8587Sbellard } 2039c87c0672Saliguori 20408336aafaSDaniel P. Berrange if (bs->encrypted) { 2041b25b387fSDaniel P. Berrange assert(s->crypto); 20428336aafaSDaniel P. Berrange 2043bd28f835SKevin Wolf /* 2044bd28f835SKevin Wolf * For encrypted images, read everything into a temporary 2045bd28f835SKevin Wolf * contiguous buffer on which the AES functions can work. 2046bd28f835SKevin Wolf */ 20473fc48d09SFrediano Ziglio if (!cluster_data) { 20483fc48d09SFrediano Ziglio cluster_data = 2049966b000fSKevin Wolf qemu_try_blockalign(s->data_file->bs, 20509a4f4c31SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS 2051de82815dSKevin Wolf * s->cluster_size); 2052de82815dSKevin Wolf if (cluster_data == NULL) { 2053de82815dSKevin Wolf ret = -ENOMEM; 2054de82815dSKevin Wolf goto fail; 2055de82815dSKevin Wolf } 2056bd28f835SKevin Wolf } 2057bd28f835SKevin Wolf 2058ecfe1863SKevin Wolf assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 20593fc48d09SFrediano Ziglio qemu_iovec_reset(&hd_qiov); 2060ecfe1863SKevin Wolf qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2061bd28f835SKevin Wolf } 2062bd28f835SKevin Wolf 206366f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 206468d100e9SKevin Wolf qemu_co_mutex_unlock(&s->lock); 2065966b000fSKevin Wolf ret = bdrv_co_preadv(s->data_file, 2066ecfe1863SKevin Wolf cluster_offset + offset_in_cluster, 2067ecfe1863SKevin Wolf cur_bytes, &hd_qiov, 0); 206868d100e9SKevin Wolf qemu_co_mutex_lock(&s->lock); 206968d100e9SKevin Wolf if (ret < 0) { 20703fc48d09SFrediano Ziglio goto fail; 2071585f8587Sbellard } 20728336aafaSDaniel P. Berrange if (bs->encrypted) { 2073b25b387fSDaniel P. Berrange assert(s->crypto); 2074ecfe1863SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 2075ecfe1863SKevin Wolf assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 2076b25b387fSDaniel P. Berrange if (qcrypto_block_decrypt(s->crypto, 20774652b8f3SDaniel P. Berrange (s->crypt_physical_offset ? 20784652b8f3SDaniel P. Berrange cluster_offset + offset_in_cluster : 20794609742aSDaniel P. Berrange offset), 2080446d306dSDaniel P. Berrange cluster_data, 2081b25b387fSDaniel P. Berrange cur_bytes, 2082c3a8fe33SAlberto Garcia NULL) < 0) { 2083f6fa64f6SDaniel P. Berrange ret = -EIO; 2084f6fa64f6SDaniel P. Berrange goto fail; 2085f6fa64f6SDaniel P. Berrange } 2086ecfe1863SKevin Wolf qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); 2087171e3d6bSKevin Wolf } 208868d000a3SKevin Wolf break; 208968d000a3SKevin Wolf 209068d000a3SKevin Wolf default: 209168d000a3SKevin Wolf g_assert_not_reached(); 209268d000a3SKevin Wolf ret = -EIO; 209368d000a3SKevin Wolf goto fail; 2094faf575c1SFrediano Ziglio } 2095faf575c1SFrediano Ziglio 2096ecfe1863SKevin Wolf bytes -= cur_bytes; 2097ecfe1863SKevin Wolf offset += cur_bytes; 2098ecfe1863SKevin Wolf bytes_done += cur_bytes; 20995ebaa27eSFrediano Ziglio } 21003fc48d09SFrediano Ziglio ret = 0; 2101f141eafeSaliguori 21023fc48d09SFrediano Ziglio fail: 210368d100e9SKevin Wolf qemu_co_mutex_unlock(&s->lock); 210468d100e9SKevin Wolf 21053fc48d09SFrediano Ziglio qemu_iovec_destroy(&hd_qiov); 2106dea43a65SFrediano Ziglio qemu_vfree(cluster_data); 210768d100e9SKevin Wolf 210868d100e9SKevin Wolf return ret; 2109585f8587Sbellard } 2110585f8587Sbellard 2111ee22a9d8SAlberto Garcia /* Check if it's possible to merge a write request with the writing of 2112ee22a9d8SAlberto Garcia * the data from the COW regions */ 2113ee22a9d8SAlberto Garcia static bool merge_cow(uint64_t offset, unsigned bytes, 2114ee22a9d8SAlberto Garcia QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) 2115ee22a9d8SAlberto Garcia { 2116ee22a9d8SAlberto Garcia QCowL2Meta *m; 2117ee22a9d8SAlberto Garcia 2118ee22a9d8SAlberto Garcia for (m = l2meta; m != NULL; m = m->next) { 2119ee22a9d8SAlberto Garcia /* If both COW regions are empty then there's nothing to merge */ 2120ee22a9d8SAlberto Garcia if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 2121ee22a9d8SAlberto Garcia continue; 2122ee22a9d8SAlberto Garcia } 2123ee22a9d8SAlberto Garcia 2124ee22a9d8SAlberto Garcia /* The data (middle) region must be immediately after the 2125ee22a9d8SAlberto Garcia * start region */ 2126ee22a9d8SAlberto Garcia if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 2127ee22a9d8SAlberto Garcia continue; 2128ee22a9d8SAlberto Garcia } 2129ee22a9d8SAlberto Garcia 2130ee22a9d8SAlberto Garcia /* The end region must be immediately after the data (middle) 2131ee22a9d8SAlberto Garcia * region */ 2132ee22a9d8SAlberto Garcia if (m->offset + m->cow_end.offset != offset + bytes) { 2133ee22a9d8SAlberto Garcia continue; 2134ee22a9d8SAlberto Garcia } 2135ee22a9d8SAlberto Garcia 2136ee22a9d8SAlberto Garcia /* Make sure that adding both COW regions to the QEMUIOVector 2137ee22a9d8SAlberto Garcia * does not exceed IOV_MAX */ 2138ee22a9d8SAlberto Garcia if (hd_qiov->niov > IOV_MAX - 2) { 2139ee22a9d8SAlberto Garcia continue; 2140ee22a9d8SAlberto Garcia } 2141ee22a9d8SAlberto Garcia 2142ee22a9d8SAlberto Garcia m->data_qiov = hd_qiov; 2143ee22a9d8SAlberto Garcia return true; 2144ee22a9d8SAlberto Garcia } 2145ee22a9d8SAlberto Garcia 2146ee22a9d8SAlberto Garcia return false; 2147ee22a9d8SAlberto Garcia } 2148ee22a9d8SAlberto Garcia 2149d46a0bb2SKevin Wolf static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, 2150d46a0bb2SKevin Wolf uint64_t bytes, QEMUIOVector *qiov, 2151d46a0bb2SKevin Wolf int flags) 2152585f8587Sbellard { 2153ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2154d46a0bb2SKevin Wolf int offset_in_cluster; 215568d100e9SKevin Wolf int ret; 2156d46a0bb2SKevin Wolf unsigned int cur_bytes; /* number of sectors in current iteration */ 2157c2bdd990SFrediano Ziglio uint64_t cluster_offset; 21583fc48d09SFrediano Ziglio QEMUIOVector hd_qiov; 21593fc48d09SFrediano Ziglio uint64_t bytes_done = 0; 21603fc48d09SFrediano Ziglio uint8_t *cluster_data = NULL; 21618d2497c3SKevin Wolf QCowL2Meta *l2meta = NULL; 2162c2271403SFrediano Ziglio 2163d46a0bb2SKevin Wolf trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 21643cce16f4SKevin Wolf 21653fc48d09SFrediano Ziglio qemu_iovec_init(&hd_qiov, qiov->niov); 2166585f8587Sbellard 21673fc48d09SFrediano Ziglio qemu_co_mutex_lock(&s->lock); 21683fc48d09SFrediano Ziglio 2169d46a0bb2SKevin Wolf while (bytes != 0) { 21703fc48d09SFrediano Ziglio 2171f50f88b9SKevin Wolf l2meta = NULL; 2172cf5c1a23SKevin Wolf 21733cce16f4SKevin Wolf trace_qcow2_writev_start_part(qemu_coroutine_self()); 2174d46a0bb2SKevin Wolf offset_in_cluster = offset_into_cluster(s, offset); 2175d46a0bb2SKevin Wolf cur_bytes = MIN(bytes, INT_MAX); 2176d46a0bb2SKevin Wolf if (bs->encrypted) { 2177d46a0bb2SKevin Wolf cur_bytes = MIN(cur_bytes, 2178d46a0bb2SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 2179d46a0bb2SKevin Wolf - offset_in_cluster); 21805ebaa27eSFrediano Ziglio } 2181095a9c58Saliguori 2182d46a0bb2SKevin Wolf ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2183d46a0bb2SKevin Wolf &cluster_offset, &l2meta); 2184148da7eaSKevin Wolf if (ret < 0) { 21853fc48d09SFrediano Ziglio goto fail; 2186148da7eaSKevin Wolf } 2187148da7eaSKevin Wolf 2188c2bdd990SFrediano Ziglio assert((cluster_offset & 511) == 0); 2189148da7eaSKevin Wolf 21903fc48d09SFrediano Ziglio qemu_iovec_reset(&hd_qiov); 2191d46a0bb2SKevin Wolf qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 21926f5f060bSKevin Wolf 21938336aafaSDaniel P. Berrange if (bs->encrypted) { 2194b25b387fSDaniel P. Berrange assert(s->crypto); 21953fc48d09SFrediano Ziglio if (!cluster_data) { 21969a4f4c31SKevin Wolf cluster_data = qemu_try_blockalign(bs->file->bs, 2197de82815dSKevin Wolf QCOW_MAX_CRYPT_CLUSTERS 2198de82815dSKevin Wolf * s->cluster_size); 2199de82815dSKevin Wolf if (cluster_data == NULL) { 2200de82815dSKevin Wolf ret = -ENOMEM; 2201de82815dSKevin Wolf goto fail; 2202de82815dSKevin Wolf } 2203585f8587Sbellard } 22046f5f060bSKevin Wolf 22053fc48d09SFrediano Ziglio assert(hd_qiov.size <= 22065ebaa27eSFrediano Ziglio QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2207d5e6b161SMichael Tokarev qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 22086f5f060bSKevin Wolf 22094652b8f3SDaniel P. Berrange if (qcrypto_block_encrypt(s->crypto, 22104652b8f3SDaniel P. Berrange (s->crypt_physical_offset ? 22114652b8f3SDaniel P. Berrange cluster_offset + offset_in_cluster : 22124609742aSDaniel P. Berrange offset), 2213446d306dSDaniel P. Berrange cluster_data, 2214c3a8fe33SAlberto Garcia cur_bytes, NULL) < 0) { 2215f6fa64f6SDaniel P. Berrange ret = -EIO; 2216f6fa64f6SDaniel P. Berrange goto fail; 2217f6fa64f6SDaniel P. Berrange } 22186f5f060bSKevin Wolf 22193fc48d09SFrediano Ziglio qemu_iovec_reset(&hd_qiov); 2220d46a0bb2SKevin Wolf qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2221585f8587Sbellard } 22226f5f060bSKevin Wolf 2223231bb267SMax Reitz ret = qcow2_pre_write_overlap_check(bs, 0, 2224966b000fSKevin Wolf cluster_offset + offset_in_cluster, cur_bytes, true); 2225cf93980eSMax Reitz if (ret < 0) { 2226cf93980eSMax Reitz goto fail; 2227cf93980eSMax Reitz } 2228cf93980eSMax Reitz 2229ee22a9d8SAlberto Garcia /* If we need to do COW, check if it's possible to merge the 2230ee22a9d8SAlberto Garcia * writing of the guest data together with that of the COW regions. 2231ee22a9d8SAlberto Garcia * If it's not possible (or not necessary) then write the 2232ee22a9d8SAlberto Garcia * guest data now. */ 2233ee22a9d8SAlberto Garcia if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { 223468d100e9SKevin Wolf qemu_co_mutex_unlock(&s->lock); 223567a7a0ebSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 22363cce16f4SKevin Wolf trace_qcow2_writev_data(qemu_coroutine_self(), 2237d46a0bb2SKevin Wolf cluster_offset + offset_in_cluster); 2238966b000fSKevin Wolf ret = bdrv_co_pwritev(s->data_file, 2239d46a0bb2SKevin Wolf cluster_offset + offset_in_cluster, 2240d46a0bb2SKevin Wolf cur_bytes, &hd_qiov, 0); 224168d100e9SKevin Wolf qemu_co_mutex_lock(&s->lock); 224268d100e9SKevin Wolf if (ret < 0) { 22433fc48d09SFrediano Ziglio goto fail; 2244171e3d6bSKevin Wolf } 2245ee22a9d8SAlberto Garcia } 2246f141eafeSaliguori 2247fd9fcd37SFam Zheng ret = qcow2_handle_l2meta(bs, &l2meta, true); 2248fd9fcd37SFam Zheng if (ret) { 22493fc48d09SFrediano Ziglio goto fail; 2250faf575c1SFrediano Ziglio } 2251faf575c1SFrediano Ziglio 2252d46a0bb2SKevin Wolf bytes -= cur_bytes; 2253d46a0bb2SKevin Wolf offset += cur_bytes; 2254d46a0bb2SKevin Wolf bytes_done += cur_bytes; 2255d46a0bb2SKevin Wolf trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 22565ebaa27eSFrediano Ziglio } 22573fc48d09SFrediano Ziglio ret = 0; 2258faf575c1SFrediano Ziglio 22593fc48d09SFrediano Ziglio fail: 2260fd9fcd37SFam Zheng qcow2_handle_l2meta(bs, &l2meta, false); 22610fa9131aSKevin Wolf 2262a8c57408SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 2263a8c57408SPaolo Bonzini 22643fc48d09SFrediano Ziglio qemu_iovec_destroy(&hd_qiov); 2265dea43a65SFrediano Ziglio qemu_vfree(cluster_data); 22663cce16f4SKevin Wolf trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 226742496d62SKevin Wolf 226868d100e9SKevin Wolf return ret; 2269585f8587Sbellard } 2270585f8587Sbellard 2271ec6d8912SKevin Wolf static int qcow2_inactivate(BlockDriverState *bs) 2272ec6d8912SKevin Wolf { 2273ec6d8912SKevin Wolf BDRVQcow2State *s = bs->opaque; 2274ec6d8912SKevin Wolf int ret, result = 0; 22755f72826eSVladimir Sementsov-Ogievskiy Error *local_err = NULL; 2276ec6d8912SKevin Wolf 227783a8c775SPavel Butsykin qcow2_store_persistent_dirty_bitmaps(bs, &local_err); 227883a8c775SPavel Butsykin if (local_err != NULL) { 227983a8c775SPavel Butsykin result = -EINVAL; 2280132adb68SVladimir Sementsov-Ogievskiy error_reportf_err(local_err, "Lost persistent bitmaps during " 2281132adb68SVladimir Sementsov-Ogievskiy "inactivation of node '%s': ", 228283a8c775SPavel Butsykin bdrv_get_device_or_node_name(bs)); 228383a8c775SPavel Butsykin } 228483a8c775SPavel Butsykin 2285ec6d8912SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 2286ec6d8912SKevin Wolf if (ret) { 2287ec6d8912SKevin Wolf result = ret; 2288ec6d8912SKevin Wolf error_report("Failed to flush the L2 table cache: %s", 2289ec6d8912SKevin Wolf strerror(-ret)); 2290ec6d8912SKevin Wolf } 2291ec6d8912SKevin Wolf 2292ec6d8912SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2293ec6d8912SKevin Wolf if (ret) { 2294ec6d8912SKevin Wolf result = ret; 2295ec6d8912SKevin Wolf error_report("Failed to flush the refcount block cache: %s", 2296ec6d8912SKevin Wolf strerror(-ret)); 2297ec6d8912SKevin Wolf } 2298ec6d8912SKevin Wolf 2299ec6d8912SKevin Wolf if (result == 0) { 2300ec6d8912SKevin Wolf qcow2_mark_clean(bs); 2301ec6d8912SKevin Wolf } 2302ec6d8912SKevin Wolf 2303ec6d8912SKevin Wolf return result; 2304ec6d8912SKevin Wolf } 2305ec6d8912SKevin Wolf 23067c80ab3fSJes Sorensen static void qcow2_close(BlockDriverState *bs) 2307585f8587Sbellard { 2308ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2309de82815dSKevin Wolf qemu_vfree(s->l1_table); 2310cf93980eSMax Reitz /* else pre-write overlap checks in cache_destroy may crash */ 2311cf93980eSMax Reitz s->l1_table = NULL; 231229c1a730SKevin Wolf 2313140fd5a6SKevin Wolf if (!(s->flags & BDRV_O_INACTIVE)) { 2314ec6d8912SKevin Wolf qcow2_inactivate(bs); 23153b5e14c7SMax Reitz } 2316c61d0004SStefan Hajnoczi 2317279621c0SAlberto Garcia cache_clean_timer_del(bs); 2318e64d4072SAlberto Garcia qcow2_cache_destroy(s->l2_table_cache); 2319e64d4072SAlberto Garcia qcow2_cache_destroy(s->refcount_block_cache); 232029c1a730SKevin Wolf 2321b25b387fSDaniel P. Berrange qcrypto_block_free(s->crypto); 2322b25b387fSDaniel P. Berrange s->crypto = NULL; 2323f6fa64f6SDaniel P. Berrange 23246744cbabSKevin Wolf g_free(s->unknown_header_fields); 232575bab85cSKevin Wolf cleanup_unknown_header_ext(bs); 23266744cbabSKevin Wolf 23279b890bdcSKevin Wolf g_free(s->image_data_file); 2328e4603fe1SKevin Wolf g_free(s->image_backing_file); 2329e4603fe1SKevin Wolf g_free(s->image_backing_format); 2330e4603fe1SKevin Wolf 23310e8c08beSKevin Wolf if (has_data_file(bs)) { 23320e8c08beSKevin Wolf bdrv_unref_child(bs, s->data_file); 23330e8c08beSKevin Wolf } 23340e8c08beSKevin Wolf 2335ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 233628c1202bSLi Zhi Hui qcow2_free_snapshots(bs); 2337585f8587Sbellard } 2338585f8587Sbellard 23392b148f39SPaolo Bonzini static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 23402b148f39SPaolo Bonzini Error **errp) 234106d9260fSAnthony Liguori { 2342ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 234306d9260fSAnthony Liguori int flags = s->flags; 2344b25b387fSDaniel P. Berrange QCryptoBlock *crypto = NULL; 2345acdfb480SKevin Wolf QDict *options; 23465a8a30dbSKevin Wolf Error *local_err = NULL; 23475a8a30dbSKevin Wolf int ret; 234806d9260fSAnthony Liguori 234906d9260fSAnthony Liguori /* 235006d9260fSAnthony Liguori * Backing files are read-only which makes all of their metadata immutable, 235106d9260fSAnthony Liguori * that means we don't have to worry about reopening them here. 235206d9260fSAnthony Liguori */ 235306d9260fSAnthony Liguori 2354b25b387fSDaniel P. Berrange crypto = s->crypto; 2355b25b387fSDaniel P. Berrange s->crypto = NULL; 235606d9260fSAnthony Liguori 235706d9260fSAnthony Liguori qcow2_close(bs); 235806d9260fSAnthony Liguori 2359ff99129aSKevin Wolf memset(s, 0, sizeof(BDRVQcow2State)); 2360d475e5acSKevin Wolf options = qdict_clone_shallow(bs->options); 23615a8a30dbSKevin Wolf 2362140fd5a6SKevin Wolf flags &= ~BDRV_O_INACTIVE; 23632b148f39SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 23644e4bf5c4SKevin Wolf ret = qcow2_do_open(bs, options, flags, &local_err); 23652b148f39SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 2366cb3e7f08SMarc-André Lureau qobject_unref(options); 23675a8a30dbSKevin Wolf if (local_err) { 23684b576648SMarkus Armbruster error_propagate_prepend(errp, local_err, 23694b576648SMarkus Armbruster "Could not reopen qcow2 layer: "); 2370191fb11bSKevin Wolf bs->drv = NULL; 23715a8a30dbSKevin Wolf return; 23725a8a30dbSKevin Wolf } else if (ret < 0) { 23735a8a30dbSKevin Wolf error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 2374191fb11bSKevin Wolf bs->drv = NULL; 23755a8a30dbSKevin Wolf return; 23765a8a30dbSKevin Wolf } 2377acdfb480SKevin Wolf 2378b25b387fSDaniel P. Berrange s->crypto = crypto; 237906d9260fSAnthony Liguori } 238006d9260fSAnthony Liguori 2381e24e49e6SKevin Wolf static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2382e24e49e6SKevin Wolf size_t len, size_t buflen) 2383756e6736SKevin Wolf { 2384e24e49e6SKevin Wolf QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2385e24e49e6SKevin Wolf size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2386756e6736SKevin Wolf 2387e24e49e6SKevin Wolf if (buflen < ext_len) { 2388756e6736SKevin Wolf return -ENOSPC; 2389756e6736SKevin Wolf } 2390756e6736SKevin Wolf 2391e24e49e6SKevin Wolf *ext_backing_fmt = (QCowExtension) { 2392e24e49e6SKevin Wolf .magic = cpu_to_be32(magic), 2393e24e49e6SKevin Wolf .len = cpu_to_be32(len), 2394e24e49e6SKevin Wolf }; 23950647d47cSStefan Hajnoczi 23960647d47cSStefan Hajnoczi if (len) { 2397e24e49e6SKevin Wolf memcpy(buf + sizeof(QCowExtension), s, len); 23980647d47cSStefan Hajnoczi } 2399756e6736SKevin Wolf 2400e24e49e6SKevin Wolf return ext_len; 2401756e6736SKevin Wolf } 2402756e6736SKevin Wolf 2403e24e49e6SKevin Wolf /* 2404e24e49e6SKevin Wolf * Updates the qcow2 header, including the variable length parts of it, i.e. 2405e24e49e6SKevin Wolf * the backing file name and all extensions. qcow2 was not designed to allow 2406e24e49e6SKevin Wolf * such changes, so if we run out of space (we can only use the first cluster) 2407e24e49e6SKevin Wolf * this function may fail. 2408e24e49e6SKevin Wolf * 2409e24e49e6SKevin Wolf * Returns 0 on success, -errno in error cases. 2410e24e49e6SKevin Wolf */ 2411e24e49e6SKevin Wolf int qcow2_update_header(BlockDriverState *bs) 2412e24e49e6SKevin Wolf { 2413ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2414e24e49e6SKevin Wolf QCowHeader *header; 2415e24e49e6SKevin Wolf char *buf; 2416e24e49e6SKevin Wolf size_t buflen = s->cluster_size; 2417e24e49e6SKevin Wolf int ret; 2418e24e49e6SKevin Wolf uint64_t total_size; 2419e24e49e6SKevin Wolf uint32_t refcount_table_clusters; 24206744cbabSKevin Wolf size_t header_length; 242175bab85cSKevin Wolf Qcow2UnknownHeaderExtension *uext; 2422e24e49e6SKevin Wolf 2423e24e49e6SKevin Wolf buf = qemu_blockalign(bs, buflen); 2424e24e49e6SKevin Wolf 2425e24e49e6SKevin Wolf /* Header structure */ 2426e24e49e6SKevin Wolf header = (QCowHeader*) buf; 2427e24e49e6SKevin Wolf 2428e24e49e6SKevin Wolf if (buflen < sizeof(*header)) { 2429e24e49e6SKevin Wolf ret = -ENOSPC; 2430e24e49e6SKevin Wolf goto fail; 2431756e6736SKevin Wolf } 2432756e6736SKevin Wolf 24336744cbabSKevin Wolf header_length = sizeof(*header) + s->unknown_header_fields_size; 2434e24e49e6SKevin Wolf total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2435e24e49e6SKevin Wolf refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2436e24e49e6SKevin Wolf 2437e24e49e6SKevin Wolf *header = (QCowHeader) { 24386744cbabSKevin Wolf /* Version 2 fields */ 2439e24e49e6SKevin Wolf .magic = cpu_to_be32(QCOW_MAGIC), 24406744cbabSKevin Wolf .version = cpu_to_be32(s->qcow_version), 2441e24e49e6SKevin Wolf .backing_file_offset = 0, 2442e24e49e6SKevin Wolf .backing_file_size = 0, 2443e24e49e6SKevin Wolf .cluster_bits = cpu_to_be32(s->cluster_bits), 2444e24e49e6SKevin Wolf .size = cpu_to_be64(total_size), 2445e24e49e6SKevin Wolf .crypt_method = cpu_to_be32(s->crypt_method_header), 2446e24e49e6SKevin Wolf .l1_size = cpu_to_be32(s->l1_size), 2447e24e49e6SKevin Wolf .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2448e24e49e6SKevin Wolf .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2449e24e49e6SKevin Wolf .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2450e24e49e6SKevin Wolf .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2451e24e49e6SKevin Wolf .snapshots_offset = cpu_to_be64(s->snapshots_offset), 24526744cbabSKevin Wolf 24536744cbabSKevin Wolf /* Version 3 fields */ 24546744cbabSKevin Wolf .incompatible_features = cpu_to_be64(s->incompatible_features), 24556744cbabSKevin Wolf .compatible_features = cpu_to_be64(s->compatible_features), 24566744cbabSKevin Wolf .autoclear_features = cpu_to_be64(s->autoclear_features), 2457b6481f37SMax Reitz .refcount_order = cpu_to_be32(s->refcount_order), 24586744cbabSKevin Wolf .header_length = cpu_to_be32(header_length), 2459e24e49e6SKevin Wolf }; 2460e24e49e6SKevin Wolf 24616744cbabSKevin Wolf /* For older versions, write a shorter header */ 24626744cbabSKevin Wolf switch (s->qcow_version) { 24636744cbabSKevin Wolf case 2: 24646744cbabSKevin Wolf ret = offsetof(QCowHeader, incompatible_features); 24656744cbabSKevin Wolf break; 24666744cbabSKevin Wolf case 3: 24676744cbabSKevin Wolf ret = sizeof(*header); 24686744cbabSKevin Wolf break; 24696744cbabSKevin Wolf default: 2470b6c14762SJim Meyering ret = -EINVAL; 2471b6c14762SJim Meyering goto fail; 24726744cbabSKevin Wolf } 24736744cbabSKevin Wolf 24746744cbabSKevin Wolf buf += ret; 24756744cbabSKevin Wolf buflen -= ret; 24766744cbabSKevin Wolf memset(buf, 0, buflen); 24776744cbabSKevin Wolf 24786744cbabSKevin Wolf /* Preserve any unknown field in the header */ 24796744cbabSKevin Wolf if (s->unknown_header_fields_size) { 24806744cbabSKevin Wolf if (buflen < s->unknown_header_fields_size) { 24816744cbabSKevin Wolf ret = -ENOSPC; 24826744cbabSKevin Wolf goto fail; 24836744cbabSKevin Wolf } 24846744cbabSKevin Wolf 24856744cbabSKevin Wolf memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 24866744cbabSKevin Wolf buf += s->unknown_header_fields_size; 24876744cbabSKevin Wolf buflen -= s->unknown_header_fields_size; 24886744cbabSKevin Wolf } 2489e24e49e6SKevin Wolf 2490e24e49e6SKevin Wolf /* Backing file format header extension */ 2491e4603fe1SKevin Wolf if (s->image_backing_format) { 2492e24e49e6SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2493e4603fe1SKevin Wolf s->image_backing_format, 2494e4603fe1SKevin Wolf strlen(s->image_backing_format), 2495e24e49e6SKevin Wolf buflen); 2496756e6736SKevin Wolf if (ret < 0) { 2497756e6736SKevin Wolf goto fail; 2498756e6736SKevin Wolf } 2499756e6736SKevin Wolf 2500e24e49e6SKevin Wolf buf += ret; 2501e24e49e6SKevin Wolf buflen -= ret; 2502e24e49e6SKevin Wolf } 2503756e6736SKevin Wolf 25049b890bdcSKevin Wolf /* External data file header extension */ 25059b890bdcSKevin Wolf if (has_data_file(bs) && s->image_data_file) { 25069b890bdcSKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE, 25079b890bdcSKevin Wolf s->image_data_file, strlen(s->image_data_file), 25089b890bdcSKevin Wolf buflen); 25099b890bdcSKevin Wolf if (ret < 0) { 25109b890bdcSKevin Wolf goto fail; 25119b890bdcSKevin Wolf } 25129b890bdcSKevin Wolf 25139b890bdcSKevin Wolf buf += ret; 25149b890bdcSKevin Wolf buflen -= ret; 25159b890bdcSKevin Wolf } 25169b890bdcSKevin Wolf 25174652b8f3SDaniel P. Berrange /* Full disk encryption header pointer extension */ 25184652b8f3SDaniel P. Berrange if (s->crypto_header.offset != 0) { 25193b698f52SPeter Maydell s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset); 25203b698f52SPeter Maydell s->crypto_header.length = cpu_to_be64(s->crypto_header.length); 25214652b8f3SDaniel P. Berrange ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 25224652b8f3SDaniel P. Berrange &s->crypto_header, sizeof(s->crypto_header), 25234652b8f3SDaniel P. Berrange buflen); 25243b698f52SPeter Maydell s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 25253b698f52SPeter Maydell s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 25264652b8f3SDaniel P. Berrange if (ret < 0) { 25274652b8f3SDaniel P. Berrange goto fail; 25284652b8f3SDaniel P. Berrange } 25294652b8f3SDaniel P. Berrange buf += ret; 25304652b8f3SDaniel P. Berrange buflen -= ret; 25314652b8f3SDaniel P. Berrange } 25324652b8f3SDaniel P. Berrange 2533cfcc4c62SKevin Wolf /* Feature table */ 25341a4828c7SKevin Wolf if (s->qcow_version >= 3) { 2535cfcc4c62SKevin Wolf Qcow2Feature features[] = { 2536c61d0004SStefan Hajnoczi { 2537c61d0004SStefan Hajnoczi .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2538c61d0004SStefan Hajnoczi .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2539c61d0004SStefan Hajnoczi .name = "dirty bit", 2540c61d0004SStefan Hajnoczi }, 2541bfe8043eSStefan Hajnoczi { 254269c98726SMax Reitz .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 254369c98726SMax Reitz .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 254469c98726SMax Reitz .name = "corrupt bit", 254569c98726SMax Reitz }, 254669c98726SMax Reitz { 254793c24936SKevin Wolf .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 254893c24936SKevin Wolf .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR, 254993c24936SKevin Wolf .name = "external data file", 255093c24936SKevin Wolf }, 255193c24936SKevin Wolf { 2552bfe8043eSStefan Hajnoczi .type = QCOW2_FEAT_TYPE_COMPATIBLE, 2553bfe8043eSStefan Hajnoczi .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 2554bfe8043eSStefan Hajnoczi .name = "lazy refcounts", 2555bfe8043eSStefan Hajnoczi }, 2556cfcc4c62SKevin Wolf }; 2557cfcc4c62SKevin Wolf 2558cfcc4c62SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 2559cfcc4c62SKevin Wolf features, sizeof(features), buflen); 2560cfcc4c62SKevin Wolf if (ret < 0) { 2561cfcc4c62SKevin Wolf goto fail; 2562cfcc4c62SKevin Wolf } 2563cfcc4c62SKevin Wolf buf += ret; 2564cfcc4c62SKevin Wolf buflen -= ret; 25651a4828c7SKevin Wolf } 2566cfcc4c62SKevin Wolf 256788ddffaeSVladimir Sementsov-Ogievskiy /* Bitmap extension */ 256888ddffaeSVladimir Sementsov-Ogievskiy if (s->nb_bitmaps > 0) { 256988ddffaeSVladimir Sementsov-Ogievskiy Qcow2BitmapHeaderExt bitmaps_header = { 257088ddffaeSVladimir Sementsov-Ogievskiy .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 257188ddffaeSVladimir Sementsov-Ogievskiy .bitmap_directory_size = 257288ddffaeSVladimir Sementsov-Ogievskiy cpu_to_be64(s->bitmap_directory_size), 257388ddffaeSVladimir Sementsov-Ogievskiy .bitmap_directory_offset = 257488ddffaeSVladimir Sementsov-Ogievskiy cpu_to_be64(s->bitmap_directory_offset) 257588ddffaeSVladimir Sementsov-Ogievskiy }; 257688ddffaeSVladimir Sementsov-Ogievskiy ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 257788ddffaeSVladimir Sementsov-Ogievskiy &bitmaps_header, sizeof(bitmaps_header), 257888ddffaeSVladimir Sementsov-Ogievskiy buflen); 257988ddffaeSVladimir Sementsov-Ogievskiy if (ret < 0) { 258088ddffaeSVladimir Sementsov-Ogievskiy goto fail; 258188ddffaeSVladimir Sementsov-Ogievskiy } 258288ddffaeSVladimir Sementsov-Ogievskiy buf += ret; 258388ddffaeSVladimir Sementsov-Ogievskiy buflen -= ret; 258488ddffaeSVladimir Sementsov-Ogievskiy } 258588ddffaeSVladimir Sementsov-Ogievskiy 258675bab85cSKevin Wolf /* Keep unknown header extensions */ 258775bab85cSKevin Wolf QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 258875bab85cSKevin Wolf ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 258975bab85cSKevin Wolf if (ret < 0) { 259075bab85cSKevin Wolf goto fail; 259175bab85cSKevin Wolf } 259275bab85cSKevin Wolf 259375bab85cSKevin Wolf buf += ret; 259475bab85cSKevin Wolf buflen -= ret; 259575bab85cSKevin Wolf } 259675bab85cSKevin Wolf 2597e24e49e6SKevin Wolf /* End of header extensions */ 2598e24e49e6SKevin Wolf ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 2599756e6736SKevin Wolf if (ret < 0) { 2600756e6736SKevin Wolf goto fail; 2601756e6736SKevin Wolf } 2602756e6736SKevin Wolf 2603e24e49e6SKevin Wolf buf += ret; 2604e24e49e6SKevin Wolf buflen -= ret; 2605e24e49e6SKevin Wolf 2606e24e49e6SKevin Wolf /* Backing file name */ 2607e4603fe1SKevin Wolf if (s->image_backing_file) { 2608e4603fe1SKevin Wolf size_t backing_file_len = strlen(s->image_backing_file); 2609e24e49e6SKevin Wolf 2610e24e49e6SKevin Wolf if (buflen < backing_file_len) { 2611e24e49e6SKevin Wolf ret = -ENOSPC; 2612e24e49e6SKevin Wolf goto fail; 2613e24e49e6SKevin Wolf } 2614e24e49e6SKevin Wolf 261500ea1881SJim Meyering /* Using strncpy is ok here, since buf is not NUL-terminated. */ 2616e4603fe1SKevin Wolf strncpy(buf, s->image_backing_file, buflen); 2617e24e49e6SKevin Wolf 2618e24e49e6SKevin Wolf header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 2619e24e49e6SKevin Wolf header->backing_file_size = cpu_to_be32(backing_file_len); 2620e24e49e6SKevin Wolf } 2621e24e49e6SKevin Wolf 2622e24e49e6SKevin Wolf /* Write the new header */ 2623d9ca2ea2SKevin Wolf ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 2624756e6736SKevin Wolf if (ret < 0) { 2625756e6736SKevin Wolf goto fail; 2626756e6736SKevin Wolf } 2627756e6736SKevin Wolf 2628756e6736SKevin Wolf ret = 0; 2629756e6736SKevin Wolf fail: 2630e24e49e6SKevin Wolf qemu_vfree(header); 2631756e6736SKevin Wolf return ret; 2632756e6736SKevin Wolf } 2633756e6736SKevin Wolf 2634756e6736SKevin Wolf static int qcow2_change_backing_file(BlockDriverState *bs, 2635756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 2636756e6736SKevin Wolf { 2637ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 2638e4603fe1SKevin Wolf 26396c3944dcSKevin Wolf /* Adding a backing file means that the external data file alone won't be 26406c3944dcSKevin Wolf * enough to make sense of the content */ 26416c3944dcSKevin Wolf if (backing_file && data_file_is_raw(bs)) { 26426c3944dcSKevin Wolf return -EINVAL; 26436c3944dcSKevin Wolf } 26446c3944dcSKevin Wolf 26454e876bcfSMax Reitz if (backing_file && strlen(backing_file) > 1023) { 26464e876bcfSMax Reitz return -EINVAL; 26474e876bcfSMax Reitz } 26484e876bcfSMax Reitz 2649998c2019SMax Reitz pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 2650998c2019SMax Reitz backing_file ?: ""); 2651e24e49e6SKevin Wolf pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2652e24e49e6SKevin Wolf pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2653e24e49e6SKevin Wolf 2654e4603fe1SKevin Wolf g_free(s->image_backing_file); 2655e4603fe1SKevin Wolf g_free(s->image_backing_format); 2656e4603fe1SKevin Wolf 2657e4603fe1SKevin Wolf s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2658e4603fe1SKevin Wolf s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2659e4603fe1SKevin Wolf 2660e24e49e6SKevin Wolf return qcow2_update_header(bs); 2661756e6736SKevin Wolf } 2662756e6736SKevin Wolf 26634652b8f3SDaniel P. Berrange static int qcow2_crypt_method_from_format(const char *encryptfmt) 26644652b8f3SDaniel P. Berrange { 26654652b8f3SDaniel P. Berrange if (g_str_equal(encryptfmt, "luks")) { 26664652b8f3SDaniel P. Berrange return QCOW_CRYPT_LUKS; 26674652b8f3SDaniel P. Berrange } else if (g_str_equal(encryptfmt, "aes")) { 26684652b8f3SDaniel P. Berrange return QCOW_CRYPT_AES; 26694652b8f3SDaniel P. Berrange } else { 26704652b8f3SDaniel P. Berrange return -EINVAL; 26714652b8f3SDaniel P. Berrange } 26724652b8f3SDaniel P. Berrange } 2673b25b387fSDaniel P. Berrange 267460900b7bSKevin Wolf static int qcow2_set_up_encryption(BlockDriverState *bs, 267560900b7bSKevin Wolf QCryptoBlockCreateOptions *cryptoopts, 267660900b7bSKevin Wolf Error **errp) 267760900b7bSKevin Wolf { 267860900b7bSKevin Wolf BDRVQcow2State *s = bs->opaque; 267960900b7bSKevin Wolf QCryptoBlock *crypto = NULL; 268060900b7bSKevin Wolf int fmt, ret; 268160900b7bSKevin Wolf 268260900b7bSKevin Wolf switch (cryptoopts->format) { 268360900b7bSKevin Wolf case Q_CRYPTO_BLOCK_FORMAT_LUKS: 268460900b7bSKevin Wolf fmt = QCOW_CRYPT_LUKS; 268560900b7bSKevin Wolf break; 268660900b7bSKevin Wolf case Q_CRYPTO_BLOCK_FORMAT_QCOW: 268760900b7bSKevin Wolf fmt = QCOW_CRYPT_AES; 268860900b7bSKevin Wolf break; 268960900b7bSKevin Wolf default: 269060900b7bSKevin Wolf error_setg(errp, "Crypto format not supported in qcow2"); 269160900b7bSKevin Wolf return -EINVAL; 269260900b7bSKevin Wolf } 269360900b7bSKevin Wolf 26944652b8f3SDaniel P. Berrange s->crypt_method_header = fmt; 2695b25b387fSDaniel P. Berrange 26961cd9a787SDaniel P. Berrange crypto = qcrypto_block_create(cryptoopts, "encrypt.", 26974652b8f3SDaniel P. Berrange qcow2_crypto_hdr_init_func, 26984652b8f3SDaniel P. Berrange qcow2_crypto_hdr_write_func, 2699b25b387fSDaniel P. Berrange bs, errp); 2700b25b387fSDaniel P. Berrange if (!crypto) { 270160900b7bSKevin Wolf return -EINVAL; 2702b25b387fSDaniel P. Berrange } 2703b25b387fSDaniel P. Berrange 2704b25b387fSDaniel P. Berrange ret = qcow2_update_header(bs); 2705b25b387fSDaniel P. Berrange if (ret < 0) { 2706b25b387fSDaniel P. Berrange error_setg_errno(errp, -ret, "Could not write encryption header"); 2707b25b387fSDaniel P. Berrange goto out; 2708b25b387fSDaniel P. Berrange } 2709b25b387fSDaniel P. Berrange 271060900b7bSKevin Wolf ret = 0; 2711b25b387fSDaniel P. Berrange out: 2712b25b387fSDaniel P. Berrange qcrypto_block_free(crypto); 2713b25b387fSDaniel P. Berrange return ret; 2714b25b387fSDaniel P. Berrange } 2715b25b387fSDaniel P. Berrange 27167bc45dc1SMax Reitz /** 27177bc45dc1SMax Reitz * Preallocates metadata structures for data clusters between @offset (in the 27187bc45dc1SMax Reitz * guest disk) and @new_length (which is thus generally the new guest disk 27197bc45dc1SMax Reitz * size). 27207bc45dc1SMax Reitz * 27217bc45dc1SMax Reitz * Returns: 0 on success, -errno on failure. 27227bc45dc1SMax Reitz */ 272347e86b86SKevin Wolf static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, 2724718c0fceSKevin Wolf uint64_t new_length, PreallocMode mode, 2725718c0fceSKevin Wolf Error **errp) 2726a35e1c17SKevin Wolf { 272793e32b3eSKevin Wolf BDRVQcow2State *s = bs->opaque; 2728d46a0bb2SKevin Wolf uint64_t bytes; 2729060bee89SKevin Wolf uint64_t host_offset = 0; 2730718c0fceSKevin Wolf int64_t file_length; 2731d46a0bb2SKevin Wolf unsigned int cur_bytes; 2732148da7eaSKevin Wolf int ret; 2733f50f88b9SKevin Wolf QCowL2Meta *meta; 2734a35e1c17SKevin Wolf 27357bc45dc1SMax Reitz assert(offset <= new_length); 27367bc45dc1SMax Reitz bytes = new_length - offset; 2737a35e1c17SKevin Wolf 2738d46a0bb2SKevin Wolf while (bytes) { 2739f29fbf7cSKevin Wolf cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size)); 2740d46a0bb2SKevin Wolf ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2741060bee89SKevin Wolf &host_offset, &meta); 2742148da7eaSKevin Wolf if (ret < 0) { 2743360bd074SKevin Wolf error_setg_errno(errp, -ret, "Allocating clusters failed"); 274447e86b86SKevin Wolf return ret; 2745a35e1c17SKevin Wolf } 2746a35e1c17SKevin Wolf 2747c792707fSStefan Hajnoczi while (meta) { 2748c792707fSStefan Hajnoczi QCowL2Meta *next = meta->next; 2749c792707fSStefan Hajnoczi 2750f50f88b9SKevin Wolf ret = qcow2_alloc_cluster_link_l2(bs, meta); 275119dbcbf7SKevin Wolf if (ret < 0) { 2752360bd074SKevin Wolf error_setg_errno(errp, -ret, "Mapping clusters failed"); 27537c2bbf4aSHu Tao qcow2_free_any_clusters(bs, meta->alloc_offset, 27547c2bbf4aSHu Tao meta->nb_clusters, QCOW2_DISCARD_NEVER); 275547e86b86SKevin Wolf return ret; 2756a35e1c17SKevin Wolf } 2757a35e1c17SKevin Wolf 27587c2bbf4aSHu Tao /* There are no dependent requests, but we need to remove our 27597c2bbf4aSHu Tao * request from the list of in-flight requests */ 27604e95314eSKevin Wolf QLIST_REMOVE(meta, next_in_flight); 2761c792707fSStefan Hajnoczi 2762c792707fSStefan Hajnoczi g_free(meta); 2763c792707fSStefan Hajnoczi meta = next; 2764f50f88b9SKevin Wolf } 2765f214978aSKevin Wolf 2766a35e1c17SKevin Wolf /* TODO Preallocate data if requested */ 2767a35e1c17SKevin Wolf 2768d46a0bb2SKevin Wolf bytes -= cur_bytes; 2769d46a0bb2SKevin Wolf offset += cur_bytes; 2770a35e1c17SKevin Wolf } 2771a35e1c17SKevin Wolf 2772a35e1c17SKevin Wolf /* 2773a35e1c17SKevin Wolf * It is expected that the image file is large enough to actually contain 2774a35e1c17SKevin Wolf * all of the allocated clusters (otherwise we get failing reads after 2775a35e1c17SKevin Wolf * EOF). Extend the image to the last allocated sector. 2776a35e1c17SKevin Wolf */ 2777718c0fceSKevin Wolf file_length = bdrv_getlength(s->data_file->bs); 2778718c0fceSKevin Wolf if (file_length < 0) { 2779718c0fceSKevin Wolf error_setg_errno(errp, -file_length, "Could not get file size"); 2780718c0fceSKevin Wolf return file_length; 2781718c0fceSKevin Wolf } 2782718c0fceSKevin Wolf 2783718c0fceSKevin Wolf if (host_offset + cur_bytes > file_length) { 2784718c0fceSKevin Wolf if (mode == PREALLOC_MODE_METADATA) { 2785718c0fceSKevin Wolf mode = PREALLOC_MODE_OFF; 2786718c0fceSKevin Wolf } 2787718c0fceSKevin Wolf ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, mode, 2788718c0fceSKevin Wolf errp); 278919dbcbf7SKevin Wolf if (ret < 0) { 279047e86b86SKevin Wolf return ret; 279119dbcbf7SKevin Wolf } 2792a35e1c17SKevin Wolf } 2793a35e1c17SKevin Wolf 279447e86b86SKevin Wolf return 0; 2795a35e1c17SKevin Wolf } 2796a35e1c17SKevin Wolf 27977c5bcc42SStefan Hajnoczi /* qcow2_refcount_metadata_size: 27987c5bcc42SStefan Hajnoczi * @clusters: number of clusters to refcount (including data and L1/L2 tables) 27997c5bcc42SStefan Hajnoczi * @cluster_size: size of a cluster, in bytes 28007c5bcc42SStefan Hajnoczi * @refcount_order: refcount bits power-of-2 exponent 280112cc30a8SMax Reitz * @generous_increase: allow for the refcount table to be 1.5x as large as it 280212cc30a8SMax Reitz * needs to be 28037c5bcc42SStefan Hajnoczi * 28047c5bcc42SStefan Hajnoczi * Returns: Number of bytes required for refcount blocks and table metadata. 28057c5bcc42SStefan Hajnoczi */ 280612cc30a8SMax Reitz int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 280712cc30a8SMax Reitz int refcount_order, bool generous_increase, 280812cc30a8SMax Reitz uint64_t *refblock_count) 28097c5bcc42SStefan Hajnoczi { 28107c5bcc42SStefan Hajnoczi /* 28117c5bcc42SStefan Hajnoczi * Every host cluster is reference-counted, including metadata (even 28127c5bcc42SStefan Hajnoczi * refcount metadata is recursively included). 28137c5bcc42SStefan Hajnoczi * 28147c5bcc42SStefan Hajnoczi * An accurate formula for the size of refcount metadata size is difficult 28157c5bcc42SStefan Hajnoczi * to derive. An easier method of calculation is finding the fixed point 28167c5bcc42SStefan Hajnoczi * where no further refcount blocks or table clusters are required to 28177c5bcc42SStefan Hajnoczi * reference count every cluster. 28187c5bcc42SStefan Hajnoczi */ 28197c5bcc42SStefan Hajnoczi int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); 28207c5bcc42SStefan Hajnoczi int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 28217c5bcc42SStefan Hajnoczi int64_t table = 0; /* number of refcount table clusters */ 28227c5bcc42SStefan Hajnoczi int64_t blocks = 0; /* number of refcount block clusters */ 28237c5bcc42SStefan Hajnoczi int64_t last; 28247c5bcc42SStefan Hajnoczi int64_t n = 0; 28257c5bcc42SStefan Hajnoczi 28267c5bcc42SStefan Hajnoczi do { 28277c5bcc42SStefan Hajnoczi last = n; 28287c5bcc42SStefan Hajnoczi blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 28297c5bcc42SStefan Hajnoczi table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 28307c5bcc42SStefan Hajnoczi n = clusters + blocks + table; 283112cc30a8SMax Reitz 283212cc30a8SMax Reitz if (n == last && generous_increase) { 283312cc30a8SMax Reitz clusters += DIV_ROUND_UP(table, 2); 283412cc30a8SMax Reitz n = 0; /* force another loop */ 283512cc30a8SMax Reitz generous_increase = false; 283612cc30a8SMax Reitz } 28377c5bcc42SStefan Hajnoczi } while (n != last); 28387c5bcc42SStefan Hajnoczi 283912cc30a8SMax Reitz if (refblock_count) { 284012cc30a8SMax Reitz *refblock_count = blocks; 284112cc30a8SMax Reitz } 284212cc30a8SMax Reitz 28437c5bcc42SStefan Hajnoczi return (blocks + table) * cluster_size; 28447c5bcc42SStefan Hajnoczi } 28457c5bcc42SStefan Hajnoczi 284695c67e3bSStefan Hajnoczi /** 284795c67e3bSStefan Hajnoczi * qcow2_calc_prealloc_size: 284895c67e3bSStefan Hajnoczi * @total_size: virtual disk size in bytes 284995c67e3bSStefan Hajnoczi * @cluster_size: cluster size in bytes 285095c67e3bSStefan Hajnoczi * @refcount_order: refcount bits power-of-2 exponent 2851a9420734SKevin Wolf * 285295c67e3bSStefan Hajnoczi * Returns: Total number of bytes required for the fully allocated image 285395c67e3bSStefan Hajnoczi * (including metadata). 2854a9420734SKevin Wolf */ 285595c67e3bSStefan Hajnoczi static int64_t qcow2_calc_prealloc_size(int64_t total_size, 285695c67e3bSStefan Hajnoczi size_t cluster_size, 285795c67e3bSStefan Hajnoczi int refcount_order) 285895c67e3bSStefan Hajnoczi { 28590e4271b7SHu Tao int64_t meta_size = 0; 28607c5bcc42SStefan Hajnoczi uint64_t nl1e, nl2e; 28619e029689SAlberto Garcia int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 28620e4271b7SHu Tao 28630e4271b7SHu Tao /* header: 1 cluster */ 28640e4271b7SHu Tao meta_size += cluster_size; 28650e4271b7SHu Tao 28660e4271b7SHu Tao /* total size of L2 tables */ 28670e4271b7SHu Tao nl2e = aligned_total_size / cluster_size; 28689e029689SAlberto Garcia nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); 28690e4271b7SHu Tao meta_size += nl2e * sizeof(uint64_t); 28700e4271b7SHu Tao 28710e4271b7SHu Tao /* total size of L1 tables */ 28720e4271b7SHu Tao nl1e = nl2e * sizeof(uint64_t) / cluster_size; 28739e029689SAlberto Garcia nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); 28740e4271b7SHu Tao meta_size += nl1e * sizeof(uint64_t); 28750e4271b7SHu Tao 28767c5bcc42SStefan Hajnoczi /* total size of refcount table and blocks */ 28777c5bcc42SStefan Hajnoczi meta_size += qcow2_refcount_metadata_size( 28787c5bcc42SStefan Hajnoczi (meta_size + aligned_total_size) / cluster_size, 287912cc30a8SMax Reitz cluster_size, refcount_order, false, NULL); 28800e4271b7SHu Tao 288195c67e3bSStefan Hajnoczi return meta_size + aligned_total_size; 288295c67e3bSStefan Hajnoczi } 288395c67e3bSStefan Hajnoczi 288429ca9e45SKevin Wolf static bool validate_cluster_size(size_t cluster_size, Error **errp) 288595c67e3bSStefan Hajnoczi { 288629ca9e45SKevin Wolf int cluster_bits = ctz32(cluster_size); 288795c67e3bSStefan Hajnoczi if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 288895c67e3bSStefan Hajnoczi (1 << cluster_bits) != cluster_size) 288995c67e3bSStefan Hajnoczi { 289095c67e3bSStefan Hajnoczi error_setg(errp, "Cluster size must be a power of two between %d and " 289195c67e3bSStefan Hajnoczi "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 289229ca9e45SKevin Wolf return false; 289329ca9e45SKevin Wolf } 289429ca9e45SKevin Wolf return true; 289529ca9e45SKevin Wolf } 289629ca9e45SKevin Wolf 289729ca9e45SKevin Wolf static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) 289829ca9e45SKevin Wolf { 289929ca9e45SKevin Wolf size_t cluster_size; 290029ca9e45SKevin Wolf 290129ca9e45SKevin Wolf cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 290229ca9e45SKevin Wolf DEFAULT_CLUSTER_SIZE); 290329ca9e45SKevin Wolf if (!validate_cluster_size(cluster_size, errp)) { 29040eb4a8c1SStefan Hajnoczi return 0; 290595c67e3bSStefan Hajnoczi } 29060eb4a8c1SStefan Hajnoczi return cluster_size; 29070eb4a8c1SStefan Hajnoczi } 29080eb4a8c1SStefan Hajnoczi 29090eb4a8c1SStefan Hajnoczi static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 29100eb4a8c1SStefan Hajnoczi { 29110eb4a8c1SStefan Hajnoczi char *buf; 29120eb4a8c1SStefan Hajnoczi int ret; 29130eb4a8c1SStefan Hajnoczi 29140eb4a8c1SStefan Hajnoczi buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 29150eb4a8c1SStefan Hajnoczi if (!buf) { 29160eb4a8c1SStefan Hajnoczi ret = 3; /* default */ 29170eb4a8c1SStefan Hajnoczi } else if (!strcmp(buf, "0.10")) { 29180eb4a8c1SStefan Hajnoczi ret = 2; 29190eb4a8c1SStefan Hajnoczi } else if (!strcmp(buf, "1.1")) { 29200eb4a8c1SStefan Hajnoczi ret = 3; 29210eb4a8c1SStefan Hajnoczi } else { 29220eb4a8c1SStefan Hajnoczi error_setg(errp, "Invalid compatibility level: '%s'", buf); 29230eb4a8c1SStefan Hajnoczi ret = -EINVAL; 29240eb4a8c1SStefan Hajnoczi } 29250eb4a8c1SStefan Hajnoczi g_free(buf); 29260eb4a8c1SStefan Hajnoczi return ret; 29270eb4a8c1SStefan Hajnoczi } 29280eb4a8c1SStefan Hajnoczi 29290eb4a8c1SStefan Hajnoczi static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 29300eb4a8c1SStefan Hajnoczi Error **errp) 29310eb4a8c1SStefan Hajnoczi { 29320eb4a8c1SStefan Hajnoczi uint64_t refcount_bits; 29330eb4a8c1SStefan Hajnoczi 29340eb4a8c1SStefan Hajnoczi refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 29350eb4a8c1SStefan Hajnoczi if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 29360eb4a8c1SStefan Hajnoczi error_setg(errp, "Refcount width must be a power of two and may not " 29370eb4a8c1SStefan Hajnoczi "exceed 64 bits"); 29380eb4a8c1SStefan Hajnoczi return 0; 29390eb4a8c1SStefan Hajnoczi } 29400eb4a8c1SStefan Hajnoczi 29410eb4a8c1SStefan Hajnoczi if (version < 3 && refcount_bits != 16) { 29420eb4a8c1SStefan Hajnoczi error_setg(errp, "Different refcount widths than 16 bits require " 29430eb4a8c1SStefan Hajnoczi "compatibility level 1.1 or above (use compat=1.1 or " 29440eb4a8c1SStefan Hajnoczi "greater)"); 29450eb4a8c1SStefan Hajnoczi return 0; 29460eb4a8c1SStefan Hajnoczi } 29470eb4a8c1SStefan Hajnoczi 29480eb4a8c1SStefan Hajnoczi return refcount_bits; 29490eb4a8c1SStefan Hajnoczi } 29500eb4a8c1SStefan Hajnoczi 2951c274393aSStefan Hajnoczi static int coroutine_fn 295260900b7bSKevin Wolf qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 29530eb4a8c1SStefan Hajnoczi { 295429ca9e45SKevin Wolf BlockdevCreateOptionsQcow2 *qcow2_opts; 29550eb4a8c1SStefan Hajnoczi QDict *options; 295695c67e3bSStefan Hajnoczi 295795c67e3bSStefan Hajnoczi /* 295895c67e3bSStefan Hajnoczi * Open the image file and write a minimal qcow2 header. 295995c67e3bSStefan Hajnoczi * 296095c67e3bSStefan Hajnoczi * We keep things simple and start with a zero-sized image. We also 296195c67e3bSStefan Hajnoczi * do without refcount blocks or a L1 table for now. We'll fix the 296295c67e3bSStefan Hajnoczi * inconsistency later. 296395c67e3bSStefan Hajnoczi * 296495c67e3bSStefan Hajnoczi * We do need a refcount table because growing the refcount table means 296595c67e3bSStefan Hajnoczi * allocating two new refcount blocks - the seconds of which would be at 296695c67e3bSStefan Hajnoczi * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 296795c67e3bSStefan Hajnoczi * size for any qcow2 image. 296895c67e3bSStefan Hajnoczi */ 2969e1d74bc6SKevin Wolf BlockBackend *blk = NULL; 2970e1d74bc6SKevin Wolf BlockDriverState *bs = NULL; 2971dcc98687SKevin Wolf BlockDriverState *data_bs = NULL; 297295c67e3bSStefan Hajnoczi QCowHeader *header; 297329ca9e45SKevin Wolf size_t cluster_size; 297429ca9e45SKevin Wolf int version; 297529ca9e45SKevin Wolf int refcount_order; 297695c67e3bSStefan Hajnoczi uint64_t* refcount_table; 297795c67e3bSStefan Hajnoczi Error *local_err = NULL; 297895c67e3bSStefan Hajnoczi int ret; 297995c67e3bSStefan Hajnoczi 298029ca9e45SKevin Wolf assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 298129ca9e45SKevin Wolf qcow2_opts = &create_options->u.qcow2; 298229ca9e45SKevin Wolf 2983e1d74bc6SKevin Wolf bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 2984e1d74bc6SKevin Wolf if (bs == NULL) { 2985e1d74bc6SKevin Wolf return -EIO; 2986e1d74bc6SKevin Wolf } 2987e1d74bc6SKevin Wolf 2988e1d74bc6SKevin Wolf /* Validate options and set default values */ 298929ca9e45SKevin Wolf if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 299029ca9e45SKevin Wolf error_setg(errp, "Image size must be a multiple of 512 bytes"); 299129ca9e45SKevin Wolf ret = -EINVAL; 299229ca9e45SKevin Wolf goto out; 299329ca9e45SKevin Wolf } 299429ca9e45SKevin Wolf 299529ca9e45SKevin Wolf if (qcow2_opts->has_version) { 299629ca9e45SKevin Wolf switch (qcow2_opts->version) { 299729ca9e45SKevin Wolf case BLOCKDEV_QCOW2_VERSION_V2: 299829ca9e45SKevin Wolf version = 2; 299929ca9e45SKevin Wolf break; 300029ca9e45SKevin Wolf case BLOCKDEV_QCOW2_VERSION_V3: 300129ca9e45SKevin Wolf version = 3; 300229ca9e45SKevin Wolf break; 300329ca9e45SKevin Wolf default: 300429ca9e45SKevin Wolf g_assert_not_reached(); 300529ca9e45SKevin Wolf } 300629ca9e45SKevin Wolf } else { 300729ca9e45SKevin Wolf version = 3; 300829ca9e45SKevin Wolf } 300929ca9e45SKevin Wolf 301029ca9e45SKevin Wolf if (qcow2_opts->has_cluster_size) { 301129ca9e45SKevin Wolf cluster_size = qcow2_opts->cluster_size; 301229ca9e45SKevin Wolf } else { 301329ca9e45SKevin Wolf cluster_size = DEFAULT_CLUSTER_SIZE; 301429ca9e45SKevin Wolf } 301529ca9e45SKevin Wolf 301629ca9e45SKevin Wolf if (!validate_cluster_size(cluster_size, errp)) { 3017e1d74bc6SKevin Wolf ret = -EINVAL; 3018e1d74bc6SKevin Wolf goto out; 301929ca9e45SKevin Wolf } 302029ca9e45SKevin Wolf 302129ca9e45SKevin Wolf if (!qcow2_opts->has_preallocation) { 302229ca9e45SKevin Wolf qcow2_opts->preallocation = PREALLOC_MODE_OFF; 302329ca9e45SKevin Wolf } 302429ca9e45SKevin Wolf if (qcow2_opts->has_backing_file && 302529ca9e45SKevin Wolf qcow2_opts->preallocation != PREALLOC_MODE_OFF) 302629ca9e45SKevin Wolf { 302729ca9e45SKevin Wolf error_setg(errp, "Backing file and preallocation cannot be used at " 302829ca9e45SKevin Wolf "the same time"); 3029e1d74bc6SKevin Wolf ret = -EINVAL; 3030e1d74bc6SKevin Wolf goto out; 303129ca9e45SKevin Wolf } 303229ca9e45SKevin Wolf if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 303329ca9e45SKevin Wolf error_setg(errp, "Backing format cannot be used without backing file"); 3034e1d74bc6SKevin Wolf ret = -EINVAL; 3035e1d74bc6SKevin Wolf goto out; 303629ca9e45SKevin Wolf } 303729ca9e45SKevin Wolf 303829ca9e45SKevin Wolf if (!qcow2_opts->has_lazy_refcounts) { 303929ca9e45SKevin Wolf qcow2_opts->lazy_refcounts = false; 304029ca9e45SKevin Wolf } 304129ca9e45SKevin Wolf if (version < 3 && qcow2_opts->lazy_refcounts) { 304229ca9e45SKevin Wolf error_setg(errp, "Lazy refcounts only supported with compatibility " 3043b76b4f60SKevin Wolf "level 1.1 and above (use version=v3 or greater)"); 3044e1d74bc6SKevin Wolf ret = -EINVAL; 3045e1d74bc6SKevin Wolf goto out; 304629ca9e45SKevin Wolf } 304729ca9e45SKevin Wolf 304829ca9e45SKevin Wolf if (!qcow2_opts->has_refcount_bits) { 304929ca9e45SKevin Wolf qcow2_opts->refcount_bits = 16; 305029ca9e45SKevin Wolf } 305129ca9e45SKevin Wolf if (qcow2_opts->refcount_bits > 64 || 305229ca9e45SKevin Wolf !is_power_of_2(qcow2_opts->refcount_bits)) 305329ca9e45SKevin Wolf { 305429ca9e45SKevin Wolf error_setg(errp, "Refcount width must be a power of two and may not " 305529ca9e45SKevin Wolf "exceed 64 bits"); 3056e1d74bc6SKevin Wolf ret = -EINVAL; 3057e1d74bc6SKevin Wolf goto out; 305829ca9e45SKevin Wolf } 305929ca9e45SKevin Wolf if (version < 3 && qcow2_opts->refcount_bits != 16) { 306029ca9e45SKevin Wolf error_setg(errp, "Different refcount widths than 16 bits require " 3061b76b4f60SKevin Wolf "compatibility level 1.1 or above (use version=v3 or " 306229ca9e45SKevin Wolf "greater)"); 3063e1d74bc6SKevin Wolf ret = -EINVAL; 3064e1d74bc6SKevin Wolf goto out; 306529ca9e45SKevin Wolf } 306629ca9e45SKevin Wolf refcount_order = ctz32(qcow2_opts->refcount_bits); 306729ca9e45SKevin Wolf 30686c3944dcSKevin Wolf if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) { 30696c3944dcSKevin Wolf error_setg(errp, "data-file-raw requires data-file"); 30706c3944dcSKevin Wolf ret = -EINVAL; 30716c3944dcSKevin Wolf goto out; 30726c3944dcSKevin Wolf } 30736c3944dcSKevin Wolf if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) { 30746c3944dcSKevin Wolf error_setg(errp, "Backing file and data-file-raw cannot be used at " 30756c3944dcSKevin Wolf "the same time"); 30766c3944dcSKevin Wolf ret = -EINVAL; 30776c3944dcSKevin Wolf goto out; 30786c3944dcSKevin Wolf } 30796c3944dcSKevin Wolf 3080dcc98687SKevin Wolf if (qcow2_opts->data_file) { 3081dcc98687SKevin Wolf if (version < 3) { 3082dcc98687SKevin Wolf error_setg(errp, "External data files are only supported with " 3083dcc98687SKevin Wolf "compatibility level 1.1 and above (use version=v3 or " 3084dcc98687SKevin Wolf "greater)"); 3085dcc98687SKevin Wolf ret = -EINVAL; 3086dcc98687SKevin Wolf goto out; 3087dcc98687SKevin Wolf } 3088dcc98687SKevin Wolf data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp); 3089a0cf8363SKevin Wolf if (data_bs == NULL) { 3090dcc98687SKevin Wolf ret = -EIO; 3091dcc98687SKevin Wolf goto out; 3092dcc98687SKevin Wolf } 3093dcc98687SKevin Wolf } 309429ca9e45SKevin Wolf 309529ca9e45SKevin Wolf /* Create BlockBackend to write to the image */ 3096cbf2b7c4SKevin Wolf blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 3097cbf2b7c4SKevin Wolf ret = blk_insert_bs(blk, bs, errp); 3098a9420734SKevin Wolf if (ret < 0) { 3099cbf2b7c4SKevin Wolf goto out; 3100a9420734SKevin Wolf } 310123588797SKevin Wolf blk_set_allow_write_beyond_eof(blk, true); 310223588797SKevin Wolf 3103e4b5dad8SKevin Wolf /* Clear the protocol layer and preallocate it if necessary */ 3104e4b5dad8SKevin Wolf ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); 3105e4b5dad8SKevin Wolf if (ret < 0) { 3106e4b5dad8SKevin Wolf goto out; 3107e4b5dad8SKevin Wolf } 3108e4b5dad8SKevin Wolf 3109a9420734SKevin Wolf /* Write the header */ 3110f8413b3cSKevin Wolf QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 3111f8413b3cSKevin Wolf header = g_malloc0(cluster_size); 3112f8413b3cSKevin Wolf *header = (QCowHeader) { 3113f8413b3cSKevin Wolf .magic = cpu_to_be32(QCOW_MAGIC), 3114f8413b3cSKevin Wolf .version = cpu_to_be32(version), 31150eb4a8c1SStefan Hajnoczi .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 3116f8413b3cSKevin Wolf .size = cpu_to_be64(0), 3117f8413b3cSKevin Wolf .l1_table_offset = cpu_to_be64(0), 3118f8413b3cSKevin Wolf .l1_size = cpu_to_be32(0), 3119f8413b3cSKevin Wolf .refcount_table_offset = cpu_to_be64(cluster_size), 3120f8413b3cSKevin Wolf .refcount_table_clusters = cpu_to_be32(1), 3121bd4b167fSMax Reitz .refcount_order = cpu_to_be32(refcount_order), 3122f8413b3cSKevin Wolf .header_length = cpu_to_be32(sizeof(*header)), 3123f8413b3cSKevin Wolf }; 3124a9420734SKevin Wolf 3125b25b387fSDaniel P. Berrange /* We'll update this to correct value later */ 3126f8413b3cSKevin Wolf header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 3127a9420734SKevin Wolf 312829ca9e45SKevin Wolf if (qcow2_opts->lazy_refcounts) { 3129f8413b3cSKevin Wolf header->compatible_features |= 3130bfe8043eSStefan Hajnoczi cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 3131bfe8043eSStefan Hajnoczi } 3132dcc98687SKevin Wolf if (data_bs) { 3133dcc98687SKevin Wolf header->incompatible_features |= 3134dcc98687SKevin Wolf cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE); 3135dcc98687SKevin Wolf } 31366c3944dcSKevin Wolf if (qcow2_opts->data_file_raw) { 31376c3944dcSKevin Wolf header->autoclear_features |= 31386c3944dcSKevin Wolf cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW); 31396c3944dcSKevin Wolf } 3140bfe8043eSStefan Hajnoczi 31418341f00dSEric Blake ret = blk_pwrite(blk, 0, header, cluster_size, 0); 3142f8413b3cSKevin Wolf g_free(header); 3143a9420734SKevin Wolf if (ret < 0) { 31443ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not write qcow2 header"); 3145a9420734SKevin Wolf goto out; 3146a9420734SKevin Wolf } 3147a9420734SKevin Wolf 3148b106ad91SKevin Wolf /* Write a refcount table with one refcount block */ 3149b106ad91SKevin Wolf refcount_table = g_malloc0(2 * cluster_size); 3150b106ad91SKevin Wolf refcount_table[0] = cpu_to_be64(2 * cluster_size); 31518341f00dSEric Blake ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 31527267c094SAnthony Liguori g_free(refcount_table); 3153a9420734SKevin Wolf 3154a9420734SKevin Wolf if (ret < 0) { 31553ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not write refcount table"); 3156a9420734SKevin Wolf goto out; 3157a9420734SKevin Wolf } 3158a9420734SKevin Wolf 315923588797SKevin Wolf blk_unref(blk); 316023588797SKevin Wolf blk = NULL; 3161a9420734SKevin Wolf 3162a9420734SKevin Wolf /* 3163a9420734SKevin Wolf * And now open the image and make it consistent first (i.e. increase the 3164a9420734SKevin Wolf * refcount of the cluster that is occupied by the header and the refcount 3165a9420734SKevin Wolf * table) 3166a9420734SKevin Wolf */ 3167e6641719SMax Reitz options = qdict_new(); 316846f5ac20SEric Blake qdict_put_str(options, "driver", "qcow2"); 3169cbf2b7c4SKevin Wolf qdict_put_str(options, "file", bs->node_name); 3170dcc98687SKevin Wolf if (data_bs) { 3171dcc98687SKevin Wolf qdict_put_str(options, "data-file", data_bs->node_name); 3172dcc98687SKevin Wolf } 3173cbf2b7c4SKevin Wolf blk = blk_new_open(NULL, NULL, options, 317455880601SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 317555880601SKevin Wolf &local_err); 317623588797SKevin Wolf if (blk == NULL) { 31773ef6c40aSMax Reitz error_propagate(errp, local_err); 317823588797SKevin Wolf ret = -EIO; 3179a9420734SKevin Wolf goto out; 3180a9420734SKevin Wolf } 3181a9420734SKevin Wolf 318223588797SKevin Wolf ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 3183a9420734SKevin Wolf if (ret < 0) { 31843ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 31853ef6c40aSMax Reitz "header and refcount table"); 3186a9420734SKevin Wolf goto out; 3187a9420734SKevin Wolf 3188a9420734SKevin Wolf } else if (ret != 0) { 3189a9420734SKevin Wolf error_report("Huh, first cluster in empty image is already in use?"); 3190a9420734SKevin Wolf abort(); 3191a9420734SKevin Wolf } 3192a9420734SKevin Wolf 31939b890bdcSKevin Wolf /* Set the external data file if necessary */ 31949b890bdcSKevin Wolf if (data_bs) { 31959b890bdcSKevin Wolf BDRVQcow2State *s = blk_bs(blk)->opaque; 31969b890bdcSKevin Wolf s->image_data_file = g_strdup(data_bs->filename); 31979b890bdcSKevin Wolf } 31989b890bdcSKevin Wolf 3199b527c9b3SKevin Wolf /* Create a full header (including things like feature table) */ 320023588797SKevin Wolf ret = qcow2_update_header(blk_bs(blk)); 3201b527c9b3SKevin Wolf if (ret < 0) { 3202b527c9b3SKevin Wolf error_setg_errno(errp, -ret, "Could not update qcow2 header"); 3203b527c9b3SKevin Wolf goto out; 3204b527c9b3SKevin Wolf } 3205b527c9b3SKevin Wolf 3206a9420734SKevin Wolf /* Okay, now that we have a valid image, let's give it the right size */ 3207c5e86ebcSKevin Wolf ret = blk_truncate(blk, qcow2_opts->size, qcow2_opts->preallocation, errp); 3208a9420734SKevin Wolf if (ret < 0) { 3209ed3d2ec9SMax Reitz error_prepend(errp, "Could not resize image: "); 3210a9420734SKevin Wolf goto out; 3211a9420734SKevin Wolf } 3212a9420734SKevin Wolf 3213a9420734SKevin Wolf /* Want a backing file? There you go.*/ 321429ca9e45SKevin Wolf if (qcow2_opts->has_backing_file) { 321529ca9e45SKevin Wolf const char *backing_format = NULL; 321629ca9e45SKevin Wolf 321729ca9e45SKevin Wolf if (qcow2_opts->has_backing_fmt) { 321829ca9e45SKevin Wolf backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 321929ca9e45SKevin Wolf } 322029ca9e45SKevin Wolf 322129ca9e45SKevin Wolf ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 322229ca9e45SKevin Wolf backing_format); 3223a9420734SKevin Wolf if (ret < 0) { 32243ef6c40aSMax Reitz error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 322529ca9e45SKevin Wolf "with format '%s'", qcow2_opts->backing_file, 322629ca9e45SKevin Wolf backing_format); 3227a9420734SKevin Wolf goto out; 3228a9420734SKevin Wolf } 3229a9420734SKevin Wolf } 3230a9420734SKevin Wolf 3231b25b387fSDaniel P. Berrange /* Want encryption? There you go. */ 323260900b7bSKevin Wolf if (qcow2_opts->has_encrypt) { 323360900b7bSKevin Wolf ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3234b25b387fSDaniel P. Berrange if (ret < 0) { 3235b25b387fSDaniel P. Berrange goto out; 3236b25b387fSDaniel P. Berrange } 3237b25b387fSDaniel P. Berrange } 3238b25b387fSDaniel P. Berrange 323923588797SKevin Wolf blk_unref(blk); 324023588797SKevin Wolf blk = NULL; 3241ba2ab2f2SMax Reitz 3242b25b387fSDaniel P. Berrange /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3243b25b387fSDaniel P. Berrange * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3244b25b387fSDaniel P. Berrange * have to setup decryption context. We're not doing any I/O on the top 3245b25b387fSDaniel P. Berrange * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3246b25b387fSDaniel P. Berrange * not have effect. 3247b25b387fSDaniel P. Berrange */ 3248e6641719SMax Reitz options = qdict_new(); 324946f5ac20SEric Blake qdict_put_str(options, "driver", "qcow2"); 3250cbf2b7c4SKevin Wolf qdict_put_str(options, "file", bs->node_name); 3251dcc98687SKevin Wolf if (data_bs) { 3252dcc98687SKevin Wolf qdict_put_str(options, "data-file", data_bs->node_name); 3253dcc98687SKevin Wolf } 3254cbf2b7c4SKevin Wolf blk = blk_new_open(NULL, NULL, options, 3255b25b387fSDaniel P. Berrange BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3256b25b387fSDaniel P. Berrange &local_err); 325723588797SKevin Wolf if (blk == NULL) { 3258ba2ab2f2SMax Reitz error_propagate(errp, local_err); 325923588797SKevin Wolf ret = -EIO; 3260ba2ab2f2SMax Reitz goto out; 3261ba2ab2f2SMax Reitz } 3262ba2ab2f2SMax Reitz 3263a9420734SKevin Wolf ret = 0; 3264a9420734SKevin Wolf out: 326523588797SKevin Wolf blk_unref(blk); 3266e1d74bc6SKevin Wolf bdrv_unref(bs); 3267dcc98687SKevin Wolf bdrv_unref(data_bs); 3268a9420734SKevin Wolf return ret; 3269a9420734SKevin Wolf } 3270de5f3f40SKevin Wolf 3271efc75e2aSStefan Hajnoczi static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, 3272efc75e2aSStefan Hajnoczi Error **errp) 3273de5f3f40SKevin Wolf { 3274b76b4f60SKevin Wolf BlockdevCreateOptions *create_options = NULL; 327592adf9dbSMarkus Armbruster QDict *qdict; 3276b76b4f60SKevin Wolf Visitor *v; 3277cbf2b7c4SKevin Wolf BlockDriverState *bs = NULL; 32789b890bdcSKevin Wolf BlockDriverState *data_bs = NULL; 32793ef6c40aSMax Reitz Error *local_err = NULL; 3280b76b4f60SKevin Wolf const char *val; 32813ef6c40aSMax Reitz int ret; 3282de5f3f40SKevin Wolf 3283b76b4f60SKevin Wolf /* Only the keyval visitor supports the dotted syntax needed for 3284b76b4f60SKevin Wolf * encryption, so go through a QDict before getting a QAPI type. Ignore 3285b76b4f60SKevin Wolf * options meant for the protocol layer so that the visitor doesn't 3286b76b4f60SKevin Wolf * complain. */ 3287b76b4f60SKevin Wolf qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3288b76b4f60SKevin Wolf true); 3289b76b4f60SKevin Wolf 3290b76b4f60SKevin Wolf /* Handle encryption options */ 3291b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3292b76b4f60SKevin Wolf if (val && !strcmp(val, "on")) { 3293b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3294b76b4f60SKevin Wolf } else if (val && !strcmp(val, "off")) { 3295b76b4f60SKevin Wolf qdict_del(qdict, BLOCK_OPT_ENCRYPT); 329629ca9e45SKevin Wolf } 329760900b7bSKevin Wolf 3298b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3299b76b4f60SKevin Wolf if (val && !strcmp(val, "aes")) { 3300b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 330160900b7bSKevin Wolf } 330260900b7bSKevin Wolf 3303b76b4f60SKevin Wolf /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3304b76b4f60SKevin Wolf * version=v2/v3 below. */ 3305b76b4f60SKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3306b76b4f60SKevin Wolf if (val && !strcmp(val, "0.10")) { 3307b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3308b76b4f60SKevin Wolf } else if (val && !strcmp(val, "1.1")) { 3309b76b4f60SKevin Wolf qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3310b76b4f60SKevin Wolf } 3311b76b4f60SKevin Wolf 3312b76b4f60SKevin Wolf /* Change legacy command line options into QMP ones */ 3313b76b4f60SKevin Wolf static const QDictRenames opt_renames[] = { 3314b76b4f60SKevin Wolf { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3315b76b4f60SKevin Wolf { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3316b76b4f60SKevin Wolf { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3317b76b4f60SKevin Wolf { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 3318b76b4f60SKevin Wolf { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3319b76b4f60SKevin Wolf { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3320b76b4f60SKevin Wolf { BLOCK_OPT_COMPAT_LEVEL, "version" }, 33216c3944dcSKevin Wolf { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" }, 3322b76b4f60SKevin Wolf { NULL, NULL }, 3323b76b4f60SKevin Wolf }; 3324b76b4f60SKevin Wolf 3325b76b4f60SKevin Wolf if (!qdict_rename_keys(qdict, opt_renames, errp)) { 33260eb4a8c1SStefan Hajnoczi ret = -EINVAL; 33270eb4a8c1SStefan Hajnoczi goto finish; 33280eb4a8c1SStefan Hajnoczi } 3329bd4b167fSMax Reitz 3330cbf2b7c4SKevin Wolf /* Create and open the file (protocol layer) */ 3331cbf2b7c4SKevin Wolf ret = bdrv_create_file(filename, opts, errp); 3332cbf2b7c4SKevin Wolf if (ret < 0) { 3333cbf2b7c4SKevin Wolf goto finish; 3334cbf2b7c4SKevin Wolf } 3335cbf2b7c4SKevin Wolf 3336cbf2b7c4SKevin Wolf bs = bdrv_open(filename, NULL, NULL, 3337cbf2b7c4SKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3338cbf2b7c4SKevin Wolf if (bs == NULL) { 3339cbf2b7c4SKevin Wolf ret = -EIO; 3340cbf2b7c4SKevin Wolf goto finish; 3341cbf2b7c4SKevin Wolf } 3342cbf2b7c4SKevin Wolf 33439b890bdcSKevin Wolf /* Create and open an external data file (protocol layer) */ 33449b890bdcSKevin Wolf val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); 33459b890bdcSKevin Wolf if (val) { 33469b890bdcSKevin Wolf ret = bdrv_create_file(val, opts, errp); 33479b890bdcSKevin Wolf if (ret < 0) { 33489b890bdcSKevin Wolf goto finish; 33499b890bdcSKevin Wolf } 33509b890bdcSKevin Wolf 33519b890bdcSKevin Wolf data_bs = bdrv_open(val, NULL, NULL, 33529b890bdcSKevin Wolf BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, 33539b890bdcSKevin Wolf errp); 33549b890bdcSKevin Wolf if (data_bs == NULL) { 33559b890bdcSKevin Wolf ret = -EIO; 33569b890bdcSKevin Wolf goto finish; 33579b890bdcSKevin Wolf } 33589b890bdcSKevin Wolf 33599b890bdcSKevin Wolf qdict_del(qdict, BLOCK_OPT_DATA_FILE); 33609b890bdcSKevin Wolf qdict_put_str(qdict, "data-file", data_bs->node_name); 33619b890bdcSKevin Wolf } 33629b890bdcSKevin Wolf 3363b76b4f60SKevin Wolf /* Set 'driver' and 'node' options */ 3364b76b4f60SKevin Wolf qdict_put_str(qdict, "driver", "qcow2"); 3365b76b4f60SKevin Wolf qdict_put_str(qdict, "file", bs->node_name); 3366b76b4f60SKevin Wolf 3367b76b4f60SKevin Wolf /* Now get the QAPI type BlockdevCreateOptions */ 3368af91062eSMarkus Armbruster v = qobject_input_visitor_new_flat_confused(qdict, errp); 3369af91062eSMarkus Armbruster if (!v) { 3370b76b4f60SKevin Wolf ret = -EINVAL; 3371b76b4f60SKevin Wolf goto finish; 3372b76b4f60SKevin Wolf } 3373b76b4f60SKevin Wolf 3374b76b4f60SKevin Wolf visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 3375b76b4f60SKevin Wolf visit_free(v); 3376b76b4f60SKevin Wolf 3377b76b4f60SKevin Wolf if (local_err) { 3378b76b4f60SKevin Wolf error_propagate(errp, local_err); 3379b76b4f60SKevin Wolf ret = -EINVAL; 3380b76b4f60SKevin Wolf goto finish; 3381b76b4f60SKevin Wolf } 3382b76b4f60SKevin Wolf 3383b76b4f60SKevin Wolf /* Silently round up size */ 3384b76b4f60SKevin Wolf create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3385b76b4f60SKevin Wolf BDRV_SECTOR_SIZE); 3386b76b4f60SKevin Wolf 3387cbf2b7c4SKevin Wolf /* Create the qcow2 image (format layer) */ 3388b76b4f60SKevin Wolf ret = qcow2_co_create(create_options, errp); 3389cbf2b7c4SKevin Wolf if (ret < 0) { 3390cbf2b7c4SKevin Wolf goto finish; 3391cbf2b7c4SKevin Wolf } 33921bd0e2d1SChunyan Liu 3393b76b4f60SKevin Wolf ret = 0; 33941bd0e2d1SChunyan Liu finish: 3395cb3e7f08SMarc-André Lureau qobject_unref(qdict); 3396cbf2b7c4SKevin Wolf bdrv_unref(bs); 33979b890bdcSKevin Wolf bdrv_unref(data_bs); 3398b76b4f60SKevin Wolf qapi_free_BlockdevCreateOptions(create_options); 33993ef6c40aSMax Reitz return ret; 3400de5f3f40SKevin Wolf } 3401de5f3f40SKevin Wolf 34022928abceSDenis V. Lunev 3403f06f6b66SEric Blake static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 34042928abceSDenis V. Lunev { 340531826642SEric Blake int64_t nr; 340631826642SEric Blake int res; 3407f06f6b66SEric Blake 3408f06f6b66SEric Blake /* Clamp to image length, before checking status of underlying sectors */ 34098cbf74b2SEric Blake if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 34108cbf74b2SEric Blake bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3411fbaa6bb3SEric Blake } 3412fbaa6bb3SEric Blake 3413f06f6b66SEric Blake if (!bytes) { 3414ebb718a5SEric Blake return true; 34152928abceSDenis V. Lunev } 34168cbf74b2SEric Blake res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 341731826642SEric Blake return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; 34182928abceSDenis V. Lunev } 34192928abceSDenis V. Lunev 34205544b59fSEric Blake static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3421f5a5ca79SManos Pitsidianakis int64_t offset, int bytes, BdrvRequestFlags flags) 3422621f0589SKevin Wolf { 3423621f0589SKevin Wolf int ret; 3424ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 3425621f0589SKevin Wolf 34265544b59fSEric Blake uint32_t head = offset % s->cluster_size; 3427f5a5ca79SManos Pitsidianakis uint32_t tail = (offset + bytes) % s->cluster_size; 34282928abceSDenis V. Lunev 3429f5a5ca79SManos Pitsidianakis trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3430f5a5ca79SManos Pitsidianakis if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3431fbaa6bb3SEric Blake tail = 0; 3432fbaa6bb3SEric Blake } 34335a64e942SDenis V. Lunev 3434ebb718a5SEric Blake if (head || tail) { 3435ebb718a5SEric Blake uint64_t off; 3436ecfe1863SKevin Wolf unsigned int nr; 34372928abceSDenis V. Lunev 3438f5a5ca79SManos Pitsidianakis assert(head + bytes <= s->cluster_size); 34392928abceSDenis V. Lunev 3440ebb718a5SEric Blake /* check whether remainder of cluster already reads as zero */ 3441f06f6b66SEric Blake if (!(is_zero(bs, offset - head, head) && 3442f06f6b66SEric Blake is_zero(bs, offset + bytes, 3443f06f6b66SEric Blake tail ? s->cluster_size - tail : 0))) { 3444621f0589SKevin Wolf return -ENOTSUP; 3445621f0589SKevin Wolf } 3446621f0589SKevin Wolf 3447621f0589SKevin Wolf qemu_co_mutex_lock(&s->lock); 34482928abceSDenis V. Lunev /* We can have new write after previous check */ 3449f06f6b66SEric Blake offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); 3450f5a5ca79SManos Pitsidianakis bytes = s->cluster_size; 3451ecfe1863SKevin Wolf nr = s->cluster_size; 34525544b59fSEric Blake ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); 3453fdfab37dSEric Blake if (ret != QCOW2_CLUSTER_UNALLOCATED && 3454fdfab37dSEric Blake ret != QCOW2_CLUSTER_ZERO_PLAIN && 3455fdfab37dSEric Blake ret != QCOW2_CLUSTER_ZERO_ALLOC) { 34562928abceSDenis V. Lunev qemu_co_mutex_unlock(&s->lock); 34572928abceSDenis V. Lunev return -ENOTSUP; 34582928abceSDenis V. Lunev } 34592928abceSDenis V. Lunev } else { 34602928abceSDenis V. Lunev qemu_co_mutex_lock(&s->lock); 34612928abceSDenis V. Lunev } 34622928abceSDenis V. Lunev 3463f5a5ca79SManos Pitsidianakis trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 34645a64e942SDenis V. Lunev 34652928abceSDenis V. Lunev /* Whatever is left can use real zero clusters */ 3466f5a5ca79SManos Pitsidianakis ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); 3467621f0589SKevin Wolf qemu_co_mutex_unlock(&s->lock); 3468621f0589SKevin Wolf 3469621f0589SKevin Wolf return ret; 3470621f0589SKevin Wolf } 3471621f0589SKevin Wolf 347282e8a788SEric Blake static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 3473f5a5ca79SManos Pitsidianakis int64_t offset, int bytes) 34745ea929e3SKevin Wolf { 34756db39ae2SPaolo Bonzini int ret; 3476ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 34776db39ae2SPaolo Bonzini 3478f5a5ca79SManos Pitsidianakis if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 3479f5a5ca79SManos Pitsidianakis assert(bytes < s->cluster_size); 3480048c5fd1SEric Blake /* Ignore partial clusters, except for the special case of the 3481048c5fd1SEric Blake * complete partial cluster at the end of an unaligned file */ 3482048c5fd1SEric Blake if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 3483f5a5ca79SManos Pitsidianakis offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 348449228d1eSEric Blake return -ENOTSUP; 348549228d1eSEric Blake } 3486048c5fd1SEric Blake } 348749228d1eSEric Blake 34886db39ae2SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 3489f5a5ca79SManos Pitsidianakis ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 3490d2cb36afSEric Blake false); 34916db39ae2SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 34926db39ae2SPaolo Bonzini return ret; 34935ea929e3SKevin Wolf } 34945ea929e3SKevin Wolf 3495fd9fcd37SFam Zheng static int coroutine_fn 3496fd9fcd37SFam Zheng qcow2_co_copy_range_from(BlockDriverState *bs, 3497fd9fcd37SFam Zheng BdrvChild *src, uint64_t src_offset, 3498fd9fcd37SFam Zheng BdrvChild *dst, uint64_t dst_offset, 349967b51fb9SVladimir Sementsov-Ogievskiy uint64_t bytes, BdrvRequestFlags read_flags, 350067b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags) 3501fd9fcd37SFam Zheng { 3502fd9fcd37SFam Zheng BDRVQcow2State *s = bs->opaque; 3503fd9fcd37SFam Zheng int ret; 3504fd9fcd37SFam Zheng unsigned int cur_bytes; /* number of bytes in current iteration */ 3505fd9fcd37SFam Zheng BdrvChild *child = NULL; 350667b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags cur_write_flags; 3507fd9fcd37SFam Zheng 3508fd9fcd37SFam Zheng assert(!bs->encrypted); 3509fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 3510fd9fcd37SFam Zheng 3511fd9fcd37SFam Zheng while (bytes != 0) { 3512fd9fcd37SFam Zheng uint64_t copy_offset = 0; 3513fd9fcd37SFam Zheng /* prepare next request */ 3514fd9fcd37SFam Zheng cur_bytes = MIN(bytes, INT_MAX); 351567b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags = write_flags; 3516fd9fcd37SFam Zheng 3517fd9fcd37SFam Zheng ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); 3518fd9fcd37SFam Zheng if (ret < 0) { 3519fd9fcd37SFam Zheng goto out; 3520fd9fcd37SFam Zheng } 3521fd9fcd37SFam Zheng 3522fd9fcd37SFam Zheng switch (ret) { 3523fd9fcd37SFam Zheng case QCOW2_CLUSTER_UNALLOCATED: 3524fd9fcd37SFam Zheng if (bs->backing && bs->backing->bs) { 3525fd9fcd37SFam Zheng int64_t backing_length = bdrv_getlength(bs->backing->bs); 3526fd9fcd37SFam Zheng if (src_offset >= backing_length) { 352767b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3528fd9fcd37SFam Zheng } else { 3529fd9fcd37SFam Zheng child = bs->backing; 3530fd9fcd37SFam Zheng cur_bytes = MIN(cur_bytes, backing_length - src_offset); 3531fd9fcd37SFam Zheng copy_offset = src_offset; 3532fd9fcd37SFam Zheng } 3533fd9fcd37SFam Zheng } else { 353467b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3535fd9fcd37SFam Zheng } 3536fd9fcd37SFam Zheng break; 3537fd9fcd37SFam Zheng 3538fd9fcd37SFam Zheng case QCOW2_CLUSTER_ZERO_PLAIN: 3539fd9fcd37SFam Zheng case QCOW2_CLUSTER_ZERO_ALLOC: 354067b51fb9SVladimir Sementsov-Ogievskiy cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3541fd9fcd37SFam Zheng break; 3542fd9fcd37SFam Zheng 3543fd9fcd37SFam Zheng case QCOW2_CLUSTER_COMPRESSED: 3544fd9fcd37SFam Zheng ret = -ENOTSUP; 3545fd9fcd37SFam Zheng goto out; 3546fd9fcd37SFam Zheng 3547fd9fcd37SFam Zheng case QCOW2_CLUSTER_NORMAL: 3548966b000fSKevin Wolf child = s->data_file; 3549fd9fcd37SFam Zheng copy_offset += offset_into_cluster(s, src_offset); 3550fd9fcd37SFam Zheng if ((copy_offset & 511) != 0) { 3551fd9fcd37SFam Zheng ret = -EIO; 3552fd9fcd37SFam Zheng goto out; 3553fd9fcd37SFam Zheng } 3554fd9fcd37SFam Zheng break; 3555fd9fcd37SFam Zheng 3556fd9fcd37SFam Zheng default: 3557fd9fcd37SFam Zheng abort(); 3558fd9fcd37SFam Zheng } 3559fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 3560fd9fcd37SFam Zheng ret = bdrv_co_copy_range_from(child, 3561fd9fcd37SFam Zheng copy_offset, 3562fd9fcd37SFam Zheng dst, dst_offset, 356367b51fb9SVladimir Sementsov-Ogievskiy cur_bytes, read_flags, cur_write_flags); 3564fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 3565fd9fcd37SFam Zheng if (ret < 0) { 3566fd9fcd37SFam Zheng goto out; 3567fd9fcd37SFam Zheng } 3568fd9fcd37SFam Zheng 3569fd9fcd37SFam Zheng bytes -= cur_bytes; 3570fd9fcd37SFam Zheng src_offset += cur_bytes; 3571fd9fcd37SFam Zheng dst_offset += cur_bytes; 3572fd9fcd37SFam Zheng } 3573fd9fcd37SFam Zheng ret = 0; 3574fd9fcd37SFam Zheng 3575fd9fcd37SFam Zheng out: 3576fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 3577fd9fcd37SFam Zheng return ret; 3578fd9fcd37SFam Zheng } 3579fd9fcd37SFam Zheng 3580fd9fcd37SFam Zheng static int coroutine_fn 3581fd9fcd37SFam Zheng qcow2_co_copy_range_to(BlockDriverState *bs, 3582fd9fcd37SFam Zheng BdrvChild *src, uint64_t src_offset, 3583fd9fcd37SFam Zheng BdrvChild *dst, uint64_t dst_offset, 358467b51fb9SVladimir Sementsov-Ogievskiy uint64_t bytes, BdrvRequestFlags read_flags, 358567b51fb9SVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags) 3586fd9fcd37SFam Zheng { 3587fd9fcd37SFam Zheng BDRVQcow2State *s = bs->opaque; 3588fd9fcd37SFam Zheng int offset_in_cluster; 3589fd9fcd37SFam Zheng int ret; 3590fd9fcd37SFam Zheng unsigned int cur_bytes; /* number of sectors in current iteration */ 3591fd9fcd37SFam Zheng uint64_t cluster_offset; 3592fd9fcd37SFam Zheng QCowL2Meta *l2meta = NULL; 3593fd9fcd37SFam Zheng 3594fd9fcd37SFam Zheng assert(!bs->encrypted); 3595fd9fcd37SFam Zheng 3596fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 3597fd9fcd37SFam Zheng 3598fd9fcd37SFam Zheng while (bytes != 0) { 3599fd9fcd37SFam Zheng 3600fd9fcd37SFam Zheng l2meta = NULL; 3601fd9fcd37SFam Zheng 3602fd9fcd37SFam Zheng offset_in_cluster = offset_into_cluster(s, dst_offset); 3603fd9fcd37SFam Zheng cur_bytes = MIN(bytes, INT_MAX); 3604fd9fcd37SFam Zheng 3605fd9fcd37SFam Zheng /* TODO: 3606fd9fcd37SFam Zheng * If src->bs == dst->bs, we could simply copy by incrementing 3607fd9fcd37SFam Zheng * the refcnt, without copying user data. 3608fd9fcd37SFam Zheng * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ 3609fd9fcd37SFam Zheng ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes, 3610fd9fcd37SFam Zheng &cluster_offset, &l2meta); 3611fd9fcd37SFam Zheng if (ret < 0) { 3612fd9fcd37SFam Zheng goto fail; 3613fd9fcd37SFam Zheng } 3614fd9fcd37SFam Zheng 3615fd9fcd37SFam Zheng assert((cluster_offset & 511) == 0); 3616fd9fcd37SFam Zheng 3617fd9fcd37SFam Zheng ret = qcow2_pre_write_overlap_check(bs, 0, 3618966b000fSKevin Wolf cluster_offset + offset_in_cluster, cur_bytes, true); 3619fd9fcd37SFam Zheng if (ret < 0) { 3620fd9fcd37SFam Zheng goto fail; 3621fd9fcd37SFam Zheng } 3622fd9fcd37SFam Zheng 3623fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 3624fd9fcd37SFam Zheng ret = bdrv_co_copy_range_to(src, src_offset, 3625966b000fSKevin Wolf s->data_file, 3626fd9fcd37SFam Zheng cluster_offset + offset_in_cluster, 362767b51fb9SVladimir Sementsov-Ogievskiy cur_bytes, read_flags, write_flags); 3628fd9fcd37SFam Zheng qemu_co_mutex_lock(&s->lock); 3629fd9fcd37SFam Zheng if (ret < 0) { 3630fd9fcd37SFam Zheng goto fail; 3631fd9fcd37SFam Zheng } 3632fd9fcd37SFam Zheng 3633fd9fcd37SFam Zheng ret = qcow2_handle_l2meta(bs, &l2meta, true); 3634fd9fcd37SFam Zheng if (ret) { 3635fd9fcd37SFam Zheng goto fail; 3636fd9fcd37SFam Zheng } 3637fd9fcd37SFam Zheng 3638fd9fcd37SFam Zheng bytes -= cur_bytes; 3639e06f4639SFam Zheng src_offset += cur_bytes; 3640fd9fcd37SFam Zheng dst_offset += cur_bytes; 3641fd9fcd37SFam Zheng } 3642fd9fcd37SFam Zheng ret = 0; 3643fd9fcd37SFam Zheng 3644fd9fcd37SFam Zheng fail: 3645fd9fcd37SFam Zheng qcow2_handle_l2meta(bs, &l2meta, false); 3646fd9fcd37SFam Zheng 3647fd9fcd37SFam Zheng qemu_co_mutex_unlock(&s->lock); 3648fd9fcd37SFam Zheng 3649fd9fcd37SFam Zheng trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 3650fd9fcd37SFam Zheng 3651fd9fcd37SFam Zheng return ret; 3652fd9fcd37SFam Zheng } 3653fd9fcd37SFam Zheng 3654061ca8a3SKevin Wolf static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 36558243ccb7SMax Reitz PreallocMode prealloc, Error **errp) 3656419b19d9SStefan Hajnoczi { 3657ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 365895b98f34SMax Reitz uint64_t old_length; 36592cf7cfa1SKevin Wolf int64_t new_l1_size; 36602cf7cfa1SKevin Wolf int ret; 366145b4949cSLeonid Bloch QDict *options; 3662419b19d9SStefan Hajnoczi 3663772d1f97SMax Reitz if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 3664772d1f97SMax Reitz prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 3665772d1f97SMax Reitz { 36668243ccb7SMax Reitz error_setg(errp, "Unsupported preallocation mode '%s'", 3667977c736fSMarkus Armbruster PreallocMode_str(prealloc)); 36688243ccb7SMax Reitz return -ENOTSUP; 36698243ccb7SMax Reitz } 36708243ccb7SMax Reitz 3671419b19d9SStefan Hajnoczi if (offset & 511) { 36724bff28b8SMax Reitz error_setg(errp, "The new size must be a multiple of 512"); 3673419b19d9SStefan Hajnoczi return -EINVAL; 3674419b19d9SStefan Hajnoczi } 3675419b19d9SStefan Hajnoczi 3676061ca8a3SKevin Wolf qemu_co_mutex_lock(&s->lock); 3677061ca8a3SKevin Wolf 3678419b19d9SStefan Hajnoczi /* cannot proceed if image has snapshots */ 3679419b19d9SStefan Hajnoczi if (s->nb_snapshots) { 36804bff28b8SMax Reitz error_setg(errp, "Can't resize an image which has snapshots"); 3681061ca8a3SKevin Wolf ret = -ENOTSUP; 3682061ca8a3SKevin Wolf goto fail; 3683419b19d9SStefan Hajnoczi } 3684419b19d9SStefan Hajnoczi 368588ddffaeSVladimir Sementsov-Ogievskiy /* cannot proceed if image has bitmaps */ 3686d19c6b36SJohn Snow if (qcow2_truncate_bitmaps_check(bs, errp)) { 3687061ca8a3SKevin Wolf ret = -ENOTSUP; 3688061ca8a3SKevin Wolf goto fail; 368988ddffaeSVladimir Sementsov-Ogievskiy } 369088ddffaeSVladimir Sementsov-Ogievskiy 3691bd016b91SLeonid Bloch old_length = bs->total_sectors * BDRV_SECTOR_SIZE; 369246b732cdSPavel Butsykin new_l1_size = size_to_l1(s, offset); 369395b98f34SMax Reitz 369495b98f34SMax Reitz if (offset < old_length) { 3695163bc39dSPavel Butsykin int64_t last_cluster, old_file_size; 369646b732cdSPavel Butsykin if (prealloc != PREALLOC_MODE_OFF) { 369746b732cdSPavel Butsykin error_setg(errp, 369846b732cdSPavel Butsykin "Preallocation can't be used for shrinking an image"); 3699061ca8a3SKevin Wolf ret = -EINVAL; 3700061ca8a3SKevin Wolf goto fail; 3701419b19d9SStefan Hajnoczi } 3702419b19d9SStefan Hajnoczi 370346b732cdSPavel Butsykin ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 370446b732cdSPavel Butsykin old_length - ROUND_UP(offset, 370546b732cdSPavel Butsykin s->cluster_size), 370646b732cdSPavel Butsykin QCOW2_DISCARD_ALWAYS, true); 370746b732cdSPavel Butsykin if (ret < 0) { 370846b732cdSPavel Butsykin error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3709061ca8a3SKevin Wolf goto fail; 371046b732cdSPavel Butsykin } 371146b732cdSPavel Butsykin 371246b732cdSPavel Butsykin ret = qcow2_shrink_l1_table(bs, new_l1_size); 371346b732cdSPavel Butsykin if (ret < 0) { 371446b732cdSPavel Butsykin error_setg_errno(errp, -ret, 371546b732cdSPavel Butsykin "Failed to reduce the number of L2 tables"); 3716061ca8a3SKevin Wolf goto fail; 371746b732cdSPavel Butsykin } 371846b732cdSPavel Butsykin 371946b732cdSPavel Butsykin ret = qcow2_shrink_reftable(bs); 372046b732cdSPavel Butsykin if (ret < 0) { 372146b732cdSPavel Butsykin error_setg_errno(errp, -ret, 372246b732cdSPavel Butsykin "Failed to discard unused refblocks"); 3723061ca8a3SKevin Wolf goto fail; 372446b732cdSPavel Butsykin } 3725163bc39dSPavel Butsykin 3726163bc39dSPavel Butsykin old_file_size = bdrv_getlength(bs->file->bs); 3727163bc39dSPavel Butsykin if (old_file_size < 0) { 3728163bc39dSPavel Butsykin error_setg_errno(errp, -old_file_size, 3729163bc39dSPavel Butsykin "Failed to inquire current file length"); 3730061ca8a3SKevin Wolf ret = old_file_size; 3731061ca8a3SKevin Wolf goto fail; 3732163bc39dSPavel Butsykin } 3733163bc39dSPavel Butsykin last_cluster = qcow2_get_last_cluster(bs, old_file_size); 3734163bc39dSPavel Butsykin if (last_cluster < 0) { 3735163bc39dSPavel Butsykin error_setg_errno(errp, -last_cluster, 3736163bc39dSPavel Butsykin "Failed to find the last cluster"); 3737061ca8a3SKevin Wolf ret = last_cluster; 3738061ca8a3SKevin Wolf goto fail; 3739163bc39dSPavel Butsykin } 3740163bc39dSPavel Butsykin if ((last_cluster + 1) * s->cluster_size < old_file_size) { 3741233521b1SMax Reitz Error *local_err = NULL; 3742233521b1SMax Reitz 3743061ca8a3SKevin Wolf bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 3744233521b1SMax Reitz PREALLOC_MODE_OFF, &local_err); 3745233521b1SMax Reitz if (local_err) { 3746233521b1SMax Reitz warn_reportf_err(local_err, 3747233521b1SMax Reitz "Failed to truncate the tail of the image: "); 3748163bc39dSPavel Butsykin } 3749163bc39dSPavel Butsykin } 375046b732cdSPavel Butsykin } else { 375172893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3752419b19d9SStefan Hajnoczi if (ret < 0) { 3753f59adb32SMax Reitz error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3754061ca8a3SKevin Wolf goto fail; 3755419b19d9SStefan Hajnoczi } 375646b732cdSPavel Butsykin } 3757419b19d9SStefan Hajnoczi 375895b98f34SMax Reitz switch (prealloc) { 375995b98f34SMax Reitz case PREALLOC_MODE_OFF: 3760718c0fceSKevin Wolf if (has_data_file(bs)) { 3761718c0fceSKevin Wolf ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp); 3762718c0fceSKevin Wolf if (ret < 0) { 3763718c0fceSKevin Wolf goto fail; 3764718c0fceSKevin Wolf } 3765718c0fceSKevin Wolf } 376695b98f34SMax Reitz break; 376795b98f34SMax Reitz 376895b98f34SMax Reitz case PREALLOC_MODE_METADATA: 3769718c0fceSKevin Wolf ret = preallocate_co(bs, old_length, offset, prealloc, errp); 377095b98f34SMax Reitz if (ret < 0) { 3771061ca8a3SKevin Wolf goto fail; 377295b98f34SMax Reitz } 377395b98f34SMax Reitz break; 377495b98f34SMax Reitz 3775772d1f97SMax Reitz case PREALLOC_MODE_FALLOC: 3776772d1f97SMax Reitz case PREALLOC_MODE_FULL: 3777772d1f97SMax Reitz { 3778772d1f97SMax Reitz int64_t allocation_start, host_offset, guest_offset; 3779772d1f97SMax Reitz int64_t clusters_allocated; 3780772d1f97SMax Reitz int64_t old_file_size, new_file_size; 3781772d1f97SMax Reitz uint64_t nb_new_data_clusters, nb_new_l2_tables; 3782772d1f97SMax Reitz 3783966b000fSKevin Wolf /* With a data file, preallocation means just allocating the metadata 3784966b000fSKevin Wolf * and forwarding the truncate request to the data file */ 3785966b000fSKevin Wolf if (has_data_file(bs)) { 3786718c0fceSKevin Wolf ret = preallocate_co(bs, old_length, offset, prealloc, errp); 3787966b000fSKevin Wolf if (ret < 0) { 3788966b000fSKevin Wolf goto fail; 3789966b000fSKevin Wolf } 3790966b000fSKevin Wolf break; 3791966b000fSKevin Wolf } 3792966b000fSKevin Wolf 3793772d1f97SMax Reitz old_file_size = bdrv_getlength(bs->file->bs); 3794772d1f97SMax Reitz if (old_file_size < 0) { 3795772d1f97SMax Reitz error_setg_errno(errp, -old_file_size, 3796772d1f97SMax Reitz "Failed to inquire current file length"); 3797061ca8a3SKevin Wolf ret = old_file_size; 3798061ca8a3SKevin Wolf goto fail; 3799772d1f97SMax Reitz } 3800e400ad1eSMax Reitz old_file_size = ROUND_UP(old_file_size, s->cluster_size); 3801772d1f97SMax Reitz 3802772d1f97SMax Reitz nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, 3803772d1f97SMax Reitz s->cluster_size); 3804772d1f97SMax Reitz 3805772d1f97SMax Reitz /* This is an overestimation; we will not actually allocate space for 3806772d1f97SMax Reitz * these in the file but just make sure the new refcount structures are 3807772d1f97SMax Reitz * able to cover them so we will not have to allocate new refblocks 3808772d1f97SMax Reitz * while entering the data blocks in the potentially new L2 tables. 3809772d1f97SMax Reitz * (We do not actually care where the L2 tables are placed. Maybe they 3810772d1f97SMax Reitz * are already allocated or they can be placed somewhere before 3811772d1f97SMax Reitz * @old_file_size. It does not matter because they will be fully 3812772d1f97SMax Reitz * allocated automatically, so they do not need to be covered by the 3813772d1f97SMax Reitz * preallocation. All that matters is that we will not have to allocate 3814772d1f97SMax Reitz * new refcount structures for them.) */ 3815772d1f97SMax Reitz nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 3816772d1f97SMax Reitz s->cluster_size / sizeof(uint64_t)); 3817772d1f97SMax Reitz /* The cluster range may not be aligned to L2 boundaries, so add one L2 3818772d1f97SMax Reitz * table for a potential head/tail */ 3819772d1f97SMax Reitz nb_new_l2_tables++; 3820772d1f97SMax Reitz 3821772d1f97SMax Reitz allocation_start = qcow2_refcount_area(bs, old_file_size, 3822772d1f97SMax Reitz nb_new_data_clusters + 3823772d1f97SMax Reitz nb_new_l2_tables, 3824772d1f97SMax Reitz true, 0, 0); 3825772d1f97SMax Reitz if (allocation_start < 0) { 3826772d1f97SMax Reitz error_setg_errno(errp, -allocation_start, 3827772d1f97SMax Reitz "Failed to resize refcount structures"); 3828061ca8a3SKevin Wolf ret = allocation_start; 3829061ca8a3SKevin Wolf goto fail; 3830772d1f97SMax Reitz } 3831772d1f97SMax Reitz 3832772d1f97SMax Reitz clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 3833772d1f97SMax Reitz nb_new_data_clusters); 3834772d1f97SMax Reitz if (clusters_allocated < 0) { 3835772d1f97SMax Reitz error_setg_errno(errp, -clusters_allocated, 3836772d1f97SMax Reitz "Failed to allocate data clusters"); 3837061ca8a3SKevin Wolf ret = clusters_allocated; 3838061ca8a3SKevin Wolf goto fail; 3839772d1f97SMax Reitz } 3840772d1f97SMax Reitz 3841772d1f97SMax Reitz assert(clusters_allocated == nb_new_data_clusters); 3842772d1f97SMax Reitz 3843772d1f97SMax Reitz /* Allocate the data area */ 3844772d1f97SMax Reitz new_file_size = allocation_start + 3845772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size; 3846061ca8a3SKevin Wolf ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); 3847772d1f97SMax Reitz if (ret < 0) { 3848772d1f97SMax Reitz error_prepend(errp, "Failed to resize underlying file: "); 3849772d1f97SMax Reitz qcow2_free_clusters(bs, allocation_start, 3850772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size, 3851772d1f97SMax Reitz QCOW2_DISCARD_OTHER); 3852061ca8a3SKevin Wolf goto fail; 3853772d1f97SMax Reitz } 3854772d1f97SMax Reitz 3855772d1f97SMax Reitz /* Create the necessary L2 entries */ 3856772d1f97SMax Reitz host_offset = allocation_start; 3857772d1f97SMax Reitz guest_offset = old_length; 3858772d1f97SMax Reitz while (nb_new_data_clusters) { 385913bec229SAlberto Garcia int64_t nb_clusters = MIN( 386013bec229SAlberto Garcia nb_new_data_clusters, 386113bec229SAlberto Garcia s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 3862772d1f97SMax Reitz QCowL2Meta allocation = { 3863772d1f97SMax Reitz .offset = guest_offset, 3864772d1f97SMax Reitz .alloc_offset = host_offset, 3865772d1f97SMax Reitz .nb_clusters = nb_clusters, 3866772d1f97SMax Reitz }; 3867772d1f97SMax Reitz qemu_co_queue_init(&allocation.dependent_requests); 3868772d1f97SMax Reitz 3869772d1f97SMax Reitz ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 3870772d1f97SMax Reitz if (ret < 0) { 3871772d1f97SMax Reitz error_setg_errno(errp, -ret, "Failed to update L2 tables"); 3872772d1f97SMax Reitz qcow2_free_clusters(bs, host_offset, 3873772d1f97SMax Reitz nb_new_data_clusters * s->cluster_size, 3874772d1f97SMax Reitz QCOW2_DISCARD_OTHER); 3875061ca8a3SKevin Wolf goto fail; 3876772d1f97SMax Reitz } 3877772d1f97SMax Reitz 3878772d1f97SMax Reitz guest_offset += nb_clusters * s->cluster_size; 3879772d1f97SMax Reitz host_offset += nb_clusters * s->cluster_size; 3880772d1f97SMax Reitz nb_new_data_clusters -= nb_clusters; 3881772d1f97SMax Reitz } 3882772d1f97SMax Reitz break; 3883772d1f97SMax Reitz } 3884772d1f97SMax Reitz 388595b98f34SMax Reitz default: 388695b98f34SMax Reitz g_assert_not_reached(); 388795b98f34SMax Reitz } 388895b98f34SMax Reitz 388995b98f34SMax Reitz if (prealloc != PREALLOC_MODE_OFF) { 389095b98f34SMax Reitz /* Flush metadata before actually changing the image size */ 3891061ca8a3SKevin Wolf ret = qcow2_write_caches(bs); 389295b98f34SMax Reitz if (ret < 0) { 389395b98f34SMax Reitz error_setg_errno(errp, -ret, 389495b98f34SMax Reitz "Failed to flush the preallocated area to disk"); 3895061ca8a3SKevin Wolf goto fail; 389695b98f34SMax Reitz } 389795b98f34SMax Reitz } 389895b98f34SMax Reitz 389945b4949cSLeonid Bloch bs->total_sectors = offset / BDRV_SECTOR_SIZE; 390045b4949cSLeonid Bloch 3901419b19d9SStefan Hajnoczi /* write updated header.size */ 3902419b19d9SStefan Hajnoczi offset = cpu_to_be64(offset); 3903d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 3904419b19d9SStefan Hajnoczi &offset, sizeof(uint64_t)); 3905419b19d9SStefan Hajnoczi if (ret < 0) { 3906f59adb32SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image size"); 3907061ca8a3SKevin Wolf goto fail; 3908419b19d9SStefan Hajnoczi } 3909419b19d9SStefan Hajnoczi 3910419b19d9SStefan Hajnoczi s->l1_vm_state_index = new_l1_size; 391145b4949cSLeonid Bloch 391245b4949cSLeonid Bloch /* Update cache sizes */ 391345b4949cSLeonid Bloch options = qdict_clone_shallow(bs->options); 391445b4949cSLeonid Bloch ret = qcow2_update_options(bs, options, s->flags, errp); 391545b4949cSLeonid Bloch qobject_unref(options); 391645b4949cSLeonid Bloch if (ret < 0) { 391745b4949cSLeonid Bloch goto fail; 391845b4949cSLeonid Bloch } 3919061ca8a3SKevin Wolf ret = 0; 3920061ca8a3SKevin Wolf fail: 3921061ca8a3SKevin Wolf qemu_co_mutex_unlock(&s->lock); 3922061ca8a3SKevin Wolf return ret; 3923419b19d9SStefan Hajnoczi } 3924419b19d9SStefan Hajnoczi 39252714f13dSVladimir Sementsov-Ogievskiy /* 39262714f13dSVladimir Sementsov-Ogievskiy * qcow2_compress() 39272714f13dSVladimir Sementsov-Ogievskiy * 39286994fd78SVladimir Sementsov-Ogievskiy * @dest - destination buffer, @dest_size bytes 39296994fd78SVladimir Sementsov-Ogievskiy * @src - source buffer, @src_size bytes 39302714f13dSVladimir Sementsov-Ogievskiy * 39312714f13dSVladimir Sementsov-Ogievskiy * Returns: compressed size on success 3932e1f4a37aSAlberto Garcia * -ENOMEM destination buffer is not enough to store compressed data 3933e1f4a37aSAlberto Garcia * -EIO on any other error 39342714f13dSVladimir Sementsov-Ogievskiy */ 39356994fd78SVladimir Sementsov-Ogievskiy static ssize_t qcow2_compress(void *dest, size_t dest_size, 39366994fd78SVladimir Sementsov-Ogievskiy const void *src, size_t src_size) 39372714f13dSVladimir Sementsov-Ogievskiy { 39382714f13dSVladimir Sementsov-Ogievskiy ssize_t ret; 39392714f13dSVladimir Sementsov-Ogievskiy z_stream strm; 39402714f13dSVladimir Sementsov-Ogievskiy 39412714f13dSVladimir Sementsov-Ogievskiy /* best compression, small window, no zlib header */ 39422714f13dSVladimir Sementsov-Ogievskiy memset(&strm, 0, sizeof(strm)); 39432714f13dSVladimir Sementsov-Ogievskiy ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 39442714f13dSVladimir Sementsov-Ogievskiy -12, 9, Z_DEFAULT_STRATEGY); 394519a44488SVladimir Sementsov-Ogievskiy if (ret != Z_OK) { 3946e1f4a37aSAlberto Garcia return -EIO; 39472714f13dSVladimir Sementsov-Ogievskiy } 39482714f13dSVladimir Sementsov-Ogievskiy 39492714f13dSVladimir Sementsov-Ogievskiy /* strm.next_in is not const in old zlib versions, such as those used on 39502714f13dSVladimir Sementsov-Ogievskiy * OpenBSD/NetBSD, so cast the const away */ 39516994fd78SVladimir Sementsov-Ogievskiy strm.avail_in = src_size; 39522714f13dSVladimir Sementsov-Ogievskiy strm.next_in = (void *) src; 39536994fd78SVladimir Sementsov-Ogievskiy strm.avail_out = dest_size; 39542714f13dSVladimir Sementsov-Ogievskiy strm.next_out = dest; 39552714f13dSVladimir Sementsov-Ogievskiy 39562714f13dSVladimir Sementsov-Ogievskiy ret = deflate(&strm, Z_FINISH); 39572714f13dSVladimir Sementsov-Ogievskiy if (ret == Z_STREAM_END) { 39586994fd78SVladimir Sementsov-Ogievskiy ret = dest_size - strm.avail_out; 39592714f13dSVladimir Sementsov-Ogievskiy } else { 3960e1f4a37aSAlberto Garcia ret = (ret == Z_OK ? -ENOMEM : -EIO); 39612714f13dSVladimir Sementsov-Ogievskiy } 39622714f13dSVladimir Sementsov-Ogievskiy 39632714f13dSVladimir Sementsov-Ogievskiy deflateEnd(&strm); 39642714f13dSVladimir Sementsov-Ogievskiy 39652714f13dSVladimir Sementsov-Ogievskiy return ret; 39662714f13dSVladimir Sementsov-Ogievskiy } 39672714f13dSVladimir Sementsov-Ogievskiy 3968341926abSVladimir Sementsov-Ogievskiy /* 3969341926abSVladimir Sementsov-Ogievskiy * qcow2_decompress() 3970341926abSVladimir Sementsov-Ogievskiy * 3971341926abSVladimir Sementsov-Ogievskiy * Decompress some data (not more than @src_size bytes) to produce exactly 3972341926abSVladimir Sementsov-Ogievskiy * @dest_size bytes. 3973341926abSVladimir Sementsov-Ogievskiy * 3974341926abSVladimir Sementsov-Ogievskiy * @dest - destination buffer, @dest_size bytes 3975341926abSVladimir Sementsov-Ogievskiy * @src - source buffer, @src_size bytes 3976341926abSVladimir Sementsov-Ogievskiy * 3977341926abSVladimir Sementsov-Ogievskiy * Returns: 0 on success 3978341926abSVladimir Sementsov-Ogievskiy * -1 on fail 3979341926abSVladimir Sementsov-Ogievskiy */ 3980341926abSVladimir Sementsov-Ogievskiy static ssize_t qcow2_decompress(void *dest, size_t dest_size, 3981341926abSVladimir Sementsov-Ogievskiy const void *src, size_t src_size) 3982f4b3e2a9SVladimir Sementsov-Ogievskiy { 3983341926abSVladimir Sementsov-Ogievskiy int ret = 0; 3984341926abSVladimir Sementsov-Ogievskiy z_stream strm; 3985f4b3e2a9SVladimir Sementsov-Ogievskiy 3986341926abSVladimir Sementsov-Ogievskiy memset(&strm, 0, sizeof(strm)); 3987341926abSVladimir Sementsov-Ogievskiy strm.avail_in = src_size; 3988341926abSVladimir Sementsov-Ogievskiy strm.next_in = (void *) src; 3989341926abSVladimir Sementsov-Ogievskiy strm.avail_out = dest_size; 3990341926abSVladimir Sementsov-Ogievskiy strm.next_out = dest; 3991f4b3e2a9SVladimir Sementsov-Ogievskiy 3992341926abSVladimir Sementsov-Ogievskiy ret = inflateInit2(&strm, -12); 3993f4b3e2a9SVladimir Sementsov-Ogievskiy if (ret != Z_OK) { 3994f4b3e2a9SVladimir Sementsov-Ogievskiy return -1; 3995f4b3e2a9SVladimir Sementsov-Ogievskiy } 3996341926abSVladimir Sementsov-Ogievskiy 3997341926abSVladimir Sementsov-Ogievskiy ret = inflate(&strm, Z_FINISH); 3998341926abSVladimir Sementsov-Ogievskiy if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { 3999341926abSVladimir Sementsov-Ogievskiy /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but 4000341926abSVladimir Sementsov-Ogievskiy * @src buffer may be processed partly (because in qcow2 we know size of 4001341926abSVladimir Sementsov-Ogievskiy * compressed data with precision of one sector) */ 4002341926abSVladimir Sementsov-Ogievskiy ret = -1; 4003f4b3e2a9SVladimir Sementsov-Ogievskiy } 4004341926abSVladimir Sementsov-Ogievskiy 4005341926abSVladimir Sementsov-Ogievskiy inflateEnd(&strm); 4006341926abSVladimir Sementsov-Ogievskiy 4007341926abSVladimir Sementsov-Ogievskiy return ret; 4008f4b3e2a9SVladimir Sementsov-Ogievskiy } 4009f4b3e2a9SVladimir Sementsov-Ogievskiy 4010ceb029cdSVladimir Sementsov-Ogievskiy #define MAX_COMPRESS_THREADS 4 4011ceb029cdSVladimir Sementsov-Ogievskiy 4012e23c9d7aSVladimir Sementsov-Ogievskiy typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, 4013e23c9d7aSVladimir Sementsov-Ogievskiy const void *src, size_t src_size); 4014ceb029cdSVladimir Sementsov-Ogievskiy typedef struct Qcow2CompressData { 4015ceb029cdSVladimir Sementsov-Ogievskiy void *dest; 40166994fd78SVladimir Sementsov-Ogievskiy size_t dest_size; 4017ceb029cdSVladimir Sementsov-Ogievskiy const void *src; 40186994fd78SVladimir Sementsov-Ogievskiy size_t src_size; 4019ceb029cdSVladimir Sementsov-Ogievskiy ssize_t ret; 4020e23c9d7aSVladimir Sementsov-Ogievskiy 4021e23c9d7aSVladimir Sementsov-Ogievskiy Qcow2CompressFunc func; 4022ceb029cdSVladimir Sementsov-Ogievskiy } Qcow2CompressData; 4023ceb029cdSVladimir Sementsov-Ogievskiy 4024ceb029cdSVladimir Sementsov-Ogievskiy static int qcow2_compress_pool_func(void *opaque) 4025ceb029cdSVladimir Sementsov-Ogievskiy { 4026ceb029cdSVladimir Sementsov-Ogievskiy Qcow2CompressData *data = opaque; 4027ceb029cdSVladimir Sementsov-Ogievskiy 4028e23c9d7aSVladimir Sementsov-Ogievskiy data->ret = data->func(data->dest, data->dest_size, 40296994fd78SVladimir Sementsov-Ogievskiy data->src, data->src_size); 4030ceb029cdSVladimir Sementsov-Ogievskiy 4031ceb029cdSVladimir Sementsov-Ogievskiy return 0; 4032ceb029cdSVladimir Sementsov-Ogievskiy } 4033ceb029cdSVladimir Sementsov-Ogievskiy 4034ceb029cdSVladimir Sementsov-Ogievskiy static void qcow2_compress_complete(void *opaque, int ret) 4035ceb029cdSVladimir Sementsov-Ogievskiy { 4036ceb029cdSVladimir Sementsov-Ogievskiy qemu_coroutine_enter(opaque); 4037ceb029cdSVladimir Sementsov-Ogievskiy } 4038ceb029cdSVladimir Sementsov-Ogievskiy 4039e23c9d7aSVladimir Sementsov-Ogievskiy static ssize_t coroutine_fn 4040e23c9d7aSVladimir Sementsov-Ogievskiy qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, 4041e23c9d7aSVladimir Sementsov-Ogievskiy const void *src, size_t src_size, Qcow2CompressFunc func) 4042ceb029cdSVladimir Sementsov-Ogievskiy { 4043ceb029cdSVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 4044ceb029cdSVladimir Sementsov-Ogievskiy BlockAIOCB *acb; 4045ceb029cdSVladimir Sementsov-Ogievskiy ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); 4046ceb029cdSVladimir Sementsov-Ogievskiy Qcow2CompressData arg = { 4047ceb029cdSVladimir Sementsov-Ogievskiy .dest = dest, 40486994fd78SVladimir Sementsov-Ogievskiy .dest_size = dest_size, 4049ceb029cdSVladimir Sementsov-Ogievskiy .src = src, 40506994fd78SVladimir Sementsov-Ogievskiy .src_size = src_size, 4051e23c9d7aSVladimir Sementsov-Ogievskiy .func = func, 4052ceb029cdSVladimir Sementsov-Ogievskiy }; 4053ceb029cdSVladimir Sementsov-Ogievskiy 4054ceb029cdSVladimir Sementsov-Ogievskiy while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { 4055ceb029cdSVladimir Sementsov-Ogievskiy qemu_co_queue_wait(&s->compress_wait_queue, NULL); 4056ceb029cdSVladimir Sementsov-Ogievskiy } 4057ceb029cdSVladimir Sementsov-Ogievskiy 4058ceb029cdSVladimir Sementsov-Ogievskiy s->nb_compress_threads++; 4059ceb029cdSVladimir Sementsov-Ogievskiy acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, 4060ceb029cdSVladimir Sementsov-Ogievskiy qcow2_compress_complete, 4061ceb029cdSVladimir Sementsov-Ogievskiy qemu_coroutine_self()); 4062ceb029cdSVladimir Sementsov-Ogievskiy 4063ceb029cdSVladimir Sementsov-Ogievskiy if (!acb) { 4064ceb029cdSVladimir Sementsov-Ogievskiy s->nb_compress_threads--; 4065ceb029cdSVladimir Sementsov-Ogievskiy return -EINVAL; 4066ceb029cdSVladimir Sementsov-Ogievskiy } 4067ceb029cdSVladimir Sementsov-Ogievskiy qemu_coroutine_yield(); 4068ceb029cdSVladimir Sementsov-Ogievskiy s->nb_compress_threads--; 4069ceb029cdSVladimir Sementsov-Ogievskiy qemu_co_queue_next(&s->compress_wait_queue); 4070ceb029cdSVladimir Sementsov-Ogievskiy 4071ceb029cdSVladimir Sementsov-Ogievskiy return arg.ret; 4072ceb029cdSVladimir Sementsov-Ogievskiy } 4073ceb029cdSVladimir Sementsov-Ogievskiy 4074e23c9d7aSVladimir Sementsov-Ogievskiy static ssize_t coroutine_fn 4075e23c9d7aSVladimir Sementsov-Ogievskiy qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, 4076e23c9d7aSVladimir Sementsov-Ogievskiy const void *src, size_t src_size) 4077e23c9d7aSVladimir Sementsov-Ogievskiy { 4078e23c9d7aSVladimir Sementsov-Ogievskiy return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, 4079e23c9d7aSVladimir Sementsov-Ogievskiy qcow2_compress); 4080e23c9d7aSVladimir Sementsov-Ogievskiy } 4081e23c9d7aSVladimir Sementsov-Ogievskiy 4082e23c9d7aSVladimir Sementsov-Ogievskiy static ssize_t coroutine_fn 4083e23c9d7aSVladimir Sementsov-Ogievskiy qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, 4084e23c9d7aSVladimir Sementsov-Ogievskiy const void *src, size_t src_size) 4085e23c9d7aSVladimir Sementsov-Ogievskiy { 4086e23c9d7aSVladimir Sementsov-Ogievskiy return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, 4087e23c9d7aSVladimir Sementsov-Ogievskiy qcow2_decompress); 4088e23c9d7aSVladimir Sementsov-Ogievskiy } 4089e23c9d7aSVladimir Sementsov-Ogievskiy 409020d97356SBlue Swirl /* XXX: put compressed sectors first, then all the cluster aligned 409120d97356SBlue Swirl tables to avoid losing bytes in alignment */ 4092fcccefc5SPavel Butsykin static coroutine_fn int 4093fcccefc5SPavel Butsykin qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 4094fcccefc5SPavel Butsykin uint64_t bytes, QEMUIOVector *qiov) 409520d97356SBlue Swirl { 4096ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 40972714f13dSVladimir Sementsov-Ogievskiy int ret; 4098e1f4a37aSAlberto Garcia ssize_t out_len; 4099fcccefc5SPavel Butsykin uint8_t *buf, *out_buf; 410077e023ffSKevin Wolf uint64_t cluster_offset; 410120d97356SBlue Swirl 4102966b000fSKevin Wolf if (has_data_file(bs)) { 4103966b000fSKevin Wolf return -ENOTSUP; 4104966b000fSKevin Wolf } 4105966b000fSKevin Wolf 4106fcccefc5SPavel Butsykin if (bytes == 0) { 410720d97356SBlue Swirl /* align end of file to a sector boundary to ease reading with 410820d97356SBlue Swirl sector based I/Os */ 410977e023ffSKevin Wolf int64_t len = bdrv_getlength(bs->file->bs); 411077e023ffSKevin Wolf if (len < 0) { 411177e023ffSKevin Wolf return len; 4112d0d5d0e3SEric Blake } 411377e023ffSKevin Wolf return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL); 411420d97356SBlue Swirl } 411520d97356SBlue Swirl 41163e3b838fSAnton Nefedov if (offset_into_cluster(s, offset)) { 41173e3b838fSAnton Nefedov return -EINVAL; 41183e3b838fSAnton Nefedov } 41193e3b838fSAnton Nefedov 4120fcccefc5SPavel Butsykin buf = qemu_blockalign(bs, s->cluster_size); 4121a2c0ca6fSPavel Butsykin if (bytes != s->cluster_size) { 4122a2c0ca6fSPavel Butsykin if (bytes > s->cluster_size || 4123a2c0ca6fSPavel Butsykin offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 4124a2c0ca6fSPavel Butsykin { 4125a2c0ca6fSPavel Butsykin qemu_vfree(buf); 4126a2c0ca6fSPavel Butsykin return -EINVAL; 4127a2c0ca6fSPavel Butsykin } 4128a2c0ca6fSPavel Butsykin /* Zero-pad last write if image size is not cluster aligned */ 4129a2c0ca6fSPavel Butsykin memset(buf + bytes, 0, s->cluster_size - bytes); 4130a2c0ca6fSPavel Butsykin } 41318b2bd093SPavel Butsykin qemu_iovec_to_buf(qiov, 0, buf, bytes); 413220d97356SBlue Swirl 4133ebf7bba0SVladimir Sementsov-Ogievskiy out_buf = g_malloc(s->cluster_size); 413420d97356SBlue Swirl 41356994fd78SVladimir Sementsov-Ogievskiy out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1, 41366994fd78SVladimir Sementsov-Ogievskiy buf, s->cluster_size); 4137e1f4a37aSAlberto Garcia if (out_len == -ENOMEM) { 413820d97356SBlue Swirl /* could not compress: write normal cluster */ 4139fcccefc5SPavel Butsykin ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); 41408f1efd00SKevin Wolf if (ret < 0) { 41418f1efd00SKevin Wolf goto fail; 41428f1efd00SKevin Wolf } 4143fcccefc5SPavel Butsykin goto success; 4144e1f4a37aSAlberto Garcia } else if (out_len < 0) { 4145e1f4a37aSAlberto Garcia ret = -EINVAL; 4146e1f4a37aSAlberto Garcia goto fail; 4147fcccefc5SPavel Butsykin } 4148fcccefc5SPavel Butsykin 4149fcccefc5SPavel Butsykin qemu_co_mutex_lock(&s->lock); 415077e023ffSKevin Wolf ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len, 415177e023ffSKevin Wolf &cluster_offset); 415277e023ffSKevin Wolf if (ret < 0) { 4153fcccefc5SPavel Butsykin qemu_co_mutex_unlock(&s->lock); 41548f1efd00SKevin Wolf goto fail; 41558f1efd00SKevin Wolf } 4156cf93980eSMax Reitz 4157966b000fSKevin Wolf ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true); 4158fcccefc5SPavel Butsykin qemu_co_mutex_unlock(&s->lock); 4159cf93980eSMax Reitz if (ret < 0) { 4160cf93980eSMax Reitz goto fail; 4161cf93980eSMax Reitz } 4162cf93980eSMax Reitz 4163966b000fSKevin Wolf BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED); 4164*b00cb15bSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0); 41658f1efd00SKevin Wolf if (ret < 0) { 41668f1efd00SKevin Wolf goto fail; 416720d97356SBlue Swirl } 4168fcccefc5SPavel Butsykin success: 41698f1efd00SKevin Wolf ret = 0; 41708f1efd00SKevin Wolf fail: 4171fcccefc5SPavel Butsykin qemu_vfree(buf); 41727267c094SAnthony Liguori g_free(out_buf); 41738f1efd00SKevin Wolf return ret; 417420d97356SBlue Swirl } 417520d97356SBlue Swirl 4176c3c10f72SVladimir Sementsov-Ogievskiy static int coroutine_fn 4177c3c10f72SVladimir Sementsov-Ogievskiy qcow2_co_preadv_compressed(BlockDriverState *bs, 4178c3c10f72SVladimir Sementsov-Ogievskiy uint64_t file_cluster_offset, 4179c3c10f72SVladimir Sementsov-Ogievskiy uint64_t offset, 4180c3c10f72SVladimir Sementsov-Ogievskiy uint64_t bytes, 4181c3c10f72SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov) 4182f4b3e2a9SVladimir Sementsov-Ogievskiy { 4183f4b3e2a9SVladimir Sementsov-Ogievskiy BDRVQcow2State *s = bs->opaque; 4184c3c10f72SVladimir Sementsov-Ogievskiy int ret = 0, csize, nb_csectors; 4185f4b3e2a9SVladimir Sementsov-Ogievskiy uint64_t coffset; 4186c3c10f72SVladimir Sementsov-Ogievskiy uint8_t *buf, *out_buf; 4187c3c10f72SVladimir Sementsov-Ogievskiy int offset_in_cluster = offset_into_cluster(s, offset); 4188f4b3e2a9SVladimir Sementsov-Ogievskiy 4189c3c10f72SVladimir Sementsov-Ogievskiy coffset = file_cluster_offset & s->cluster_offset_mask; 4190c3c10f72SVladimir Sementsov-Ogievskiy nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 4191c068a1cdSVladimir Sementsov-Ogievskiy csize = nb_csectors * 512 - (coffset & 511); 4192f4b3e2a9SVladimir Sementsov-Ogievskiy 4193c3c10f72SVladimir Sementsov-Ogievskiy buf = g_try_malloc(csize); 4194c3c10f72SVladimir Sementsov-Ogievskiy if (!buf) { 4195f4b3e2a9SVladimir Sementsov-Ogievskiy return -ENOMEM; 4196f4b3e2a9SVladimir Sementsov-Ogievskiy } 4197c068a1cdSVladimir Sementsov-Ogievskiy 4198c3c10f72SVladimir Sementsov-Ogievskiy out_buf = qemu_blockalign(bs, s->cluster_size); 4199c3c10f72SVladimir Sementsov-Ogievskiy 4200f4b3e2a9SVladimir Sementsov-Ogievskiy BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 4201*b00cb15bSVladimir Sementsov-Ogievskiy ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0); 4202f4b3e2a9SVladimir Sementsov-Ogievskiy if (ret < 0) { 4203c3c10f72SVladimir Sementsov-Ogievskiy goto fail; 4204c3c10f72SVladimir Sementsov-Ogievskiy } 4205c3c10f72SVladimir Sementsov-Ogievskiy 4206e23c9d7aSVladimir Sementsov-Ogievskiy if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) { 4207c3c10f72SVladimir Sementsov-Ogievskiy ret = -EIO; 4208c3c10f72SVladimir Sementsov-Ogievskiy goto fail; 4209c3c10f72SVladimir Sementsov-Ogievskiy } 4210c3c10f72SVladimir Sementsov-Ogievskiy 4211c3c10f72SVladimir Sementsov-Ogievskiy qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes); 4212c3c10f72SVladimir Sementsov-Ogievskiy 4213c3c10f72SVladimir Sementsov-Ogievskiy fail: 4214c3c10f72SVladimir Sementsov-Ogievskiy qemu_vfree(out_buf); 4215c3c10f72SVladimir Sementsov-Ogievskiy g_free(buf); 4216c3c10f72SVladimir Sementsov-Ogievskiy 4217f4b3e2a9SVladimir Sementsov-Ogievskiy return ret; 4218f4b3e2a9SVladimir Sementsov-Ogievskiy } 4219f4b3e2a9SVladimir Sementsov-Ogievskiy 422094054183SMax Reitz static int make_completely_empty(BlockDriverState *bs) 422194054183SMax Reitz { 4222ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4223ed3d2ec9SMax Reitz Error *local_err = NULL; 422494054183SMax Reitz int ret, l1_clusters; 422594054183SMax Reitz int64_t offset; 422694054183SMax Reitz uint64_t *new_reftable = NULL; 422794054183SMax Reitz uint64_t rt_entry, l1_size2; 422894054183SMax Reitz struct { 422994054183SMax Reitz uint64_t l1_offset; 423094054183SMax Reitz uint64_t reftable_offset; 423194054183SMax Reitz uint32_t reftable_clusters; 423294054183SMax Reitz } QEMU_PACKED l1_ofs_rt_ofs_cls; 423394054183SMax Reitz 423494054183SMax Reitz ret = qcow2_cache_empty(bs, s->l2_table_cache); 423594054183SMax Reitz if (ret < 0) { 423694054183SMax Reitz goto fail; 423794054183SMax Reitz } 423894054183SMax Reitz 423994054183SMax Reitz ret = qcow2_cache_empty(bs, s->refcount_block_cache); 424094054183SMax Reitz if (ret < 0) { 424194054183SMax Reitz goto fail; 424294054183SMax Reitz } 424394054183SMax Reitz 424494054183SMax Reitz /* Refcounts will be broken utterly */ 424594054183SMax Reitz ret = qcow2_mark_dirty(bs); 424694054183SMax Reitz if (ret < 0) { 424794054183SMax Reitz goto fail; 424894054183SMax Reitz } 424994054183SMax Reitz 425094054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 425194054183SMax Reitz 425294054183SMax Reitz l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 425394054183SMax Reitz l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 425494054183SMax Reitz 425594054183SMax Reitz /* After this call, neither the in-memory nor the on-disk refcount 425694054183SMax Reitz * information accurately describe the actual references */ 425794054183SMax Reitz 4258720ff280SKevin Wolf ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 425974021bc4SEric Blake l1_clusters * s->cluster_size, 0); 426094054183SMax Reitz if (ret < 0) { 426194054183SMax Reitz goto fail_broken_refcounts; 426294054183SMax Reitz } 426394054183SMax Reitz memset(s->l1_table, 0, l1_size2); 426494054183SMax Reitz 426594054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 426694054183SMax Reitz 426794054183SMax Reitz /* Overwrite enough clusters at the beginning of the sectors to place 426894054183SMax Reitz * the refcount table, a refcount block and the L1 table in; this may 426994054183SMax Reitz * overwrite parts of the existing refcount and L1 table, which is not 427094054183SMax Reitz * an issue because the dirty flag is set, complete data loss is in fact 427194054183SMax Reitz * desired and partial data loss is consequently fine as well */ 4272720ff280SKevin Wolf ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 427374021bc4SEric Blake (2 + l1_clusters) * s->cluster_size, 0); 427494054183SMax Reitz /* This call (even if it failed overall) may have overwritten on-disk 427594054183SMax Reitz * refcount structures; in that case, the in-memory refcount information 427694054183SMax Reitz * will probably differ from the on-disk information which makes the BDS 427794054183SMax Reitz * unusable */ 427894054183SMax Reitz if (ret < 0) { 427994054183SMax Reitz goto fail_broken_refcounts; 428094054183SMax Reitz } 428194054183SMax Reitz 428294054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 428394054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 428494054183SMax Reitz 428594054183SMax Reitz /* "Create" an empty reftable (one cluster) directly after the image 428694054183SMax Reitz * header and an empty L1 table three clusters after the image header; 428794054183SMax Reitz * the cluster between those two will be used as the first refblock */ 4288f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 4289f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 4290f1f7a1ddSPeter Maydell l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 4291d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 429294054183SMax Reitz &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 429394054183SMax Reitz if (ret < 0) { 429494054183SMax Reitz goto fail_broken_refcounts; 429594054183SMax Reitz } 429694054183SMax Reitz 429794054183SMax Reitz s->l1_table_offset = 3 * s->cluster_size; 429894054183SMax Reitz 429994054183SMax Reitz new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 430094054183SMax Reitz if (!new_reftable) { 430194054183SMax Reitz ret = -ENOMEM; 430294054183SMax Reitz goto fail_broken_refcounts; 430394054183SMax Reitz } 430494054183SMax Reitz 430594054183SMax Reitz s->refcount_table_offset = s->cluster_size; 430694054183SMax Reitz s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 43077061a078SAlberto Garcia s->max_refcount_table_index = 0; 430894054183SMax Reitz 430994054183SMax Reitz g_free(s->refcount_table); 431094054183SMax Reitz s->refcount_table = new_reftable; 431194054183SMax Reitz new_reftable = NULL; 431294054183SMax Reitz 431394054183SMax Reitz /* Now the in-memory refcount information again corresponds to the on-disk 431494054183SMax Reitz * information (reftable is empty and no refblocks (the refblock cache is 431594054183SMax Reitz * empty)); however, this means some clusters (e.g. the image header) are 431694054183SMax Reitz * referenced, but not refcounted, but the normal qcow2 code assumes that 431794054183SMax Reitz * the in-memory information is always correct */ 431894054183SMax Reitz 431994054183SMax Reitz BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 432094054183SMax Reitz 432194054183SMax Reitz /* Enter the first refblock into the reftable */ 432294054183SMax Reitz rt_entry = cpu_to_be64(2 * s->cluster_size); 4323d9ca2ea2SKevin Wolf ret = bdrv_pwrite_sync(bs->file, s->cluster_size, 432494054183SMax Reitz &rt_entry, sizeof(rt_entry)); 432594054183SMax Reitz if (ret < 0) { 432694054183SMax Reitz goto fail_broken_refcounts; 432794054183SMax Reitz } 432894054183SMax Reitz s->refcount_table[0] = 2 * s->cluster_size; 432994054183SMax Reitz 433094054183SMax Reitz s->free_cluster_index = 0; 433194054183SMax Reitz assert(3 + l1_clusters <= s->refcount_block_size); 433294054183SMax Reitz offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 433394054183SMax Reitz if (offset < 0) { 433494054183SMax Reitz ret = offset; 433594054183SMax Reitz goto fail_broken_refcounts; 433694054183SMax Reitz } else if (offset > 0) { 433794054183SMax Reitz error_report("First cluster in emptied image is in use"); 433894054183SMax Reitz abort(); 433994054183SMax Reitz } 434094054183SMax Reitz 434194054183SMax Reitz /* Now finally the in-memory information corresponds to the on-disk 434294054183SMax Reitz * structures and is correct */ 434394054183SMax Reitz ret = qcow2_mark_clean(bs); 434494054183SMax Reitz if (ret < 0) { 434594054183SMax Reitz goto fail; 434694054183SMax Reitz } 434794054183SMax Reitz 4348ed3d2ec9SMax Reitz ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, 43497ea37c30SMax Reitz PREALLOC_MODE_OFF, &local_err); 435094054183SMax Reitz if (ret < 0) { 4351ed3d2ec9SMax Reitz error_report_err(local_err); 435294054183SMax Reitz goto fail; 435394054183SMax Reitz } 435494054183SMax Reitz 435594054183SMax Reitz return 0; 435694054183SMax Reitz 435794054183SMax Reitz fail_broken_refcounts: 435894054183SMax Reitz /* The BDS is unusable at this point. If we wanted to make it usable, we 435994054183SMax Reitz * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 436094054183SMax Reitz * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 436194054183SMax Reitz * again. However, because the functions which could have caused this error 436294054183SMax Reitz * path to be taken are used by those functions as well, it's very likely 436394054183SMax Reitz * that that sequence will fail as well. Therefore, just eject the BDS. */ 436494054183SMax Reitz bs->drv = NULL; 436594054183SMax Reitz 436694054183SMax Reitz fail: 436794054183SMax Reitz g_free(new_reftable); 436894054183SMax Reitz return ret; 436994054183SMax Reitz } 437094054183SMax Reitz 4371491d27e2SMax Reitz static int qcow2_make_empty(BlockDriverState *bs) 4372491d27e2SMax Reitz { 4373ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 4374d2cb36afSEric Blake uint64_t offset, end_offset; 4375d2cb36afSEric Blake int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 437694054183SMax Reitz int l1_clusters, ret = 0; 4377491d27e2SMax Reitz 437894054183SMax Reitz l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 437994054183SMax Reitz 43804096974eSEric Blake if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 4381f0603329SDaniel P. Berrange 3 + l1_clusters <= s->refcount_block_size && 4382db04524fSKevin Wolf s->crypt_method_header != QCOW_CRYPT_LUKS && 4383db04524fSKevin Wolf !has_data_file(bs)) { 43844096974eSEric Blake /* The following function only works for qcow2 v3 images (it 43854096974eSEric Blake * requires the dirty flag) and only as long as there are no 43864096974eSEric Blake * features that reserve extra clusters (such as snapshots, 43874096974eSEric Blake * LUKS header, or persistent bitmaps), because it completely 43884096974eSEric Blake * empties the image. Furthermore, the L1 table and three 43894096974eSEric Blake * additional clusters (image header, refcount table, one 4390db04524fSKevin Wolf * refcount block) have to fit inside one refcount block. It 4391db04524fSKevin Wolf * only resets the image file, i.e. does not work with an 4392db04524fSKevin Wolf * external data file. */ 439394054183SMax Reitz return make_completely_empty(bs); 439494054183SMax Reitz } 439594054183SMax Reitz 439694054183SMax Reitz /* This fallback code simply discards every active cluster; this is slow, 439794054183SMax Reitz * but works in all cases */ 4398d2cb36afSEric Blake end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 4399d2cb36afSEric Blake for (offset = 0; offset < end_offset; offset += step) { 4400491d27e2SMax Reitz /* As this function is generally used after committing an external 4401491d27e2SMax Reitz * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 4402491d27e2SMax Reitz * default action for this kind of discard is to pass the discard, 4403491d27e2SMax Reitz * which will ideally result in an actually smaller image file, as 4404491d27e2SMax Reitz * is probably desired. */ 4405d2cb36afSEric Blake ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 4406491d27e2SMax Reitz QCOW2_DISCARD_SNAPSHOT, true); 4407491d27e2SMax Reitz if (ret < 0) { 4408491d27e2SMax Reitz break; 4409491d27e2SMax Reitz } 4410491d27e2SMax Reitz } 4411491d27e2SMax Reitz 4412491d27e2SMax Reitz return ret; 4413491d27e2SMax Reitz } 4414491d27e2SMax Reitz 4415a968168cSDong Xu Wang static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 441620d97356SBlue Swirl { 4417ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 441829c1a730SKevin Wolf int ret; 441929c1a730SKevin Wolf 44208b94ff85SPaolo Bonzini qemu_co_mutex_lock(&s->lock); 44218b220eb7SPaolo Bonzini ret = qcow2_write_caches(bs); 44228b94ff85SPaolo Bonzini qemu_co_mutex_unlock(&s->lock); 442329c1a730SKevin Wolf 44248b220eb7SPaolo Bonzini return ret; 4425eb489bb1SKevin Wolf } 4426eb489bb1SKevin Wolf 442761914f89SStefan Hajnoczi static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block, 442861914f89SStefan Hajnoczi size_t headerlen, void *opaque, Error **errp) 442961914f89SStefan Hajnoczi { 443061914f89SStefan Hajnoczi size_t *headerlenp = opaque; 443161914f89SStefan Hajnoczi 443261914f89SStefan Hajnoczi /* Stash away the payload size */ 443361914f89SStefan Hajnoczi *headerlenp = headerlen; 443461914f89SStefan Hajnoczi return 0; 443561914f89SStefan Hajnoczi } 443661914f89SStefan Hajnoczi 443761914f89SStefan Hajnoczi static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block, 443861914f89SStefan Hajnoczi size_t offset, const uint8_t *buf, size_t buflen, 443961914f89SStefan Hajnoczi void *opaque, Error **errp) 444061914f89SStefan Hajnoczi { 444161914f89SStefan Hajnoczi /* Discard the bytes, we're not actually writing to an image */ 444261914f89SStefan Hajnoczi return buflen; 444361914f89SStefan Hajnoczi } 444461914f89SStefan Hajnoczi 444561914f89SStefan Hajnoczi /* Determine the number of bytes for the LUKS payload */ 444661914f89SStefan Hajnoczi static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len, 444761914f89SStefan Hajnoczi Error **errp) 444861914f89SStefan Hajnoczi { 444961914f89SStefan Hajnoczi QDict *opts_qdict; 445061914f89SStefan Hajnoczi QDict *cryptoopts_qdict; 445161914f89SStefan Hajnoczi QCryptoBlockCreateOptions *cryptoopts; 445261914f89SStefan Hajnoczi QCryptoBlock *crypto; 445361914f89SStefan Hajnoczi 445461914f89SStefan Hajnoczi /* Extract "encrypt." options into a qdict */ 445561914f89SStefan Hajnoczi opts_qdict = qemu_opts_to_qdict(opts, NULL); 445661914f89SStefan Hajnoczi qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); 445761914f89SStefan Hajnoczi qobject_unref(opts_qdict); 445861914f89SStefan Hajnoczi 445961914f89SStefan Hajnoczi /* Build QCryptoBlockCreateOptions object from qdict */ 446061914f89SStefan Hajnoczi qdict_put_str(cryptoopts_qdict, "format", "luks"); 446161914f89SStefan Hajnoczi cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp); 446261914f89SStefan Hajnoczi qobject_unref(cryptoopts_qdict); 446361914f89SStefan Hajnoczi if (!cryptoopts) { 446461914f89SStefan Hajnoczi return false; 446561914f89SStefan Hajnoczi } 446661914f89SStefan Hajnoczi 446761914f89SStefan Hajnoczi /* Fake LUKS creation in order to determine the payload size */ 446861914f89SStefan Hajnoczi crypto = qcrypto_block_create(cryptoopts, "encrypt.", 446961914f89SStefan Hajnoczi qcow2_measure_crypto_hdr_init_func, 447061914f89SStefan Hajnoczi qcow2_measure_crypto_hdr_write_func, 447161914f89SStefan Hajnoczi len, errp); 447261914f89SStefan Hajnoczi qapi_free_QCryptoBlockCreateOptions(cryptoopts); 447361914f89SStefan Hajnoczi if (!crypto) { 447461914f89SStefan Hajnoczi return false; 447561914f89SStefan Hajnoczi } 447661914f89SStefan Hajnoczi 447761914f89SStefan Hajnoczi qcrypto_block_free(crypto); 447861914f89SStefan Hajnoczi return true; 447961914f89SStefan Hajnoczi } 448061914f89SStefan Hajnoczi 4481c501c352SStefan Hajnoczi static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 4482c501c352SStefan Hajnoczi Error **errp) 4483c501c352SStefan Hajnoczi { 4484c501c352SStefan Hajnoczi Error *local_err = NULL; 4485c501c352SStefan Hajnoczi BlockMeasureInfo *info; 4486c501c352SStefan Hajnoczi uint64_t required = 0; /* bytes that contribute to required size */ 4487c501c352SStefan Hajnoczi uint64_t virtual_size; /* disk size as seen by guest */ 4488c501c352SStefan Hajnoczi uint64_t refcount_bits; 4489c501c352SStefan Hajnoczi uint64_t l2_tables; 449061914f89SStefan Hajnoczi uint64_t luks_payload_size = 0; 4491c501c352SStefan Hajnoczi size_t cluster_size; 4492c501c352SStefan Hajnoczi int version; 4493c501c352SStefan Hajnoczi char *optstr; 4494c501c352SStefan Hajnoczi PreallocMode prealloc; 4495c501c352SStefan Hajnoczi bool has_backing_file; 449661914f89SStefan Hajnoczi bool has_luks; 4497c501c352SStefan Hajnoczi 4498c501c352SStefan Hajnoczi /* Parse image creation options */ 4499c501c352SStefan Hajnoczi cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); 4500c501c352SStefan Hajnoczi if (local_err) { 4501c501c352SStefan Hajnoczi goto err; 4502c501c352SStefan Hajnoczi } 4503c501c352SStefan Hajnoczi 4504c501c352SStefan Hajnoczi version = qcow2_opt_get_version_del(opts, &local_err); 4505c501c352SStefan Hajnoczi if (local_err) { 4506c501c352SStefan Hajnoczi goto err; 4507c501c352SStefan Hajnoczi } 4508c501c352SStefan Hajnoczi 4509c501c352SStefan Hajnoczi refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 4510c501c352SStefan Hajnoczi if (local_err) { 4511c501c352SStefan Hajnoczi goto err; 4512c501c352SStefan Hajnoczi } 4513c501c352SStefan Hajnoczi 4514c501c352SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 4515f7abe0ecSMarc-André Lureau prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 451606c60b6cSMarkus Armbruster PREALLOC_MODE_OFF, &local_err); 4517c501c352SStefan Hajnoczi g_free(optstr); 4518c501c352SStefan Hajnoczi if (local_err) { 4519c501c352SStefan Hajnoczi goto err; 4520c501c352SStefan Hajnoczi } 4521c501c352SStefan Hajnoczi 4522c501c352SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 4523c501c352SStefan Hajnoczi has_backing_file = !!optstr; 4524c501c352SStefan Hajnoczi g_free(optstr); 4525c501c352SStefan Hajnoczi 452661914f89SStefan Hajnoczi optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); 452761914f89SStefan Hajnoczi has_luks = optstr && strcmp(optstr, "luks") == 0; 452861914f89SStefan Hajnoczi g_free(optstr); 452961914f89SStefan Hajnoczi 453061914f89SStefan Hajnoczi if (has_luks) { 453161914f89SStefan Hajnoczi size_t headerlen; 453261914f89SStefan Hajnoczi 453361914f89SStefan Hajnoczi if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) { 453461914f89SStefan Hajnoczi goto err; 453561914f89SStefan Hajnoczi } 453661914f89SStefan Hajnoczi 453761914f89SStefan Hajnoczi luks_payload_size = ROUND_UP(headerlen, cluster_size); 453861914f89SStefan Hajnoczi } 453961914f89SStefan Hajnoczi 45409e029689SAlberto Garcia virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 45419e029689SAlberto Garcia virtual_size = ROUND_UP(virtual_size, cluster_size); 4542c501c352SStefan Hajnoczi 4543c501c352SStefan Hajnoczi /* Check that virtual disk size is valid */ 4544c501c352SStefan Hajnoczi l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 4545c501c352SStefan Hajnoczi cluster_size / sizeof(uint64_t)); 4546c501c352SStefan Hajnoczi if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { 4547c501c352SStefan Hajnoczi error_setg(&local_err, "The image size is too large " 4548c501c352SStefan Hajnoczi "(try using a larger cluster size)"); 4549c501c352SStefan Hajnoczi goto err; 4550c501c352SStefan Hajnoczi } 4551c501c352SStefan Hajnoczi 4552c501c352SStefan Hajnoczi /* Account for input image */ 4553c501c352SStefan Hajnoczi if (in_bs) { 4554c501c352SStefan Hajnoczi int64_t ssize = bdrv_getlength(in_bs); 4555c501c352SStefan Hajnoczi if (ssize < 0) { 4556c501c352SStefan Hajnoczi error_setg_errno(&local_err, -ssize, 4557c501c352SStefan Hajnoczi "Unable to get image virtual_size"); 4558c501c352SStefan Hajnoczi goto err; 4559c501c352SStefan Hajnoczi } 4560c501c352SStefan Hajnoczi 45619e029689SAlberto Garcia virtual_size = ROUND_UP(ssize, cluster_size); 4562c501c352SStefan Hajnoczi 4563c501c352SStefan Hajnoczi if (has_backing_file) { 4564c501c352SStefan Hajnoczi /* We don't how much of the backing chain is shared by the input 4565c501c352SStefan Hajnoczi * image and the new image file. In the worst case the new image's 4566c501c352SStefan Hajnoczi * backing file has nothing in common with the input image. Be 4567c501c352SStefan Hajnoczi * conservative and assume all clusters need to be written. 4568c501c352SStefan Hajnoczi */ 4569c501c352SStefan Hajnoczi required = virtual_size; 4570c501c352SStefan Hajnoczi } else { 4571b85ee453SEric Blake int64_t offset; 457231826642SEric Blake int64_t pnum = 0; 4573c501c352SStefan Hajnoczi 457431826642SEric Blake for (offset = 0; offset < ssize; offset += pnum) { 457531826642SEric Blake int ret; 4576c501c352SStefan Hajnoczi 457731826642SEric Blake ret = bdrv_block_status_above(in_bs, NULL, offset, 457831826642SEric Blake ssize - offset, &pnum, NULL, 457931826642SEric Blake NULL); 4580c501c352SStefan Hajnoczi if (ret < 0) { 4581c501c352SStefan Hajnoczi error_setg_errno(&local_err, -ret, 4582c501c352SStefan Hajnoczi "Unable to get block status"); 4583c501c352SStefan Hajnoczi goto err; 4584c501c352SStefan Hajnoczi } 4585c501c352SStefan Hajnoczi 4586c501c352SStefan Hajnoczi if (ret & BDRV_BLOCK_ZERO) { 4587c501c352SStefan Hajnoczi /* Skip zero regions (safe with no backing file) */ 4588c501c352SStefan Hajnoczi } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 4589c501c352SStefan Hajnoczi (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 4590c501c352SStefan Hajnoczi /* Extend pnum to end of cluster for next iteration */ 459131826642SEric Blake pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 4592c501c352SStefan Hajnoczi 4593c501c352SStefan Hajnoczi /* Count clusters we've seen */ 459431826642SEric Blake required += offset % cluster_size + pnum; 4595c501c352SStefan Hajnoczi } 4596c501c352SStefan Hajnoczi } 4597c501c352SStefan Hajnoczi } 4598c501c352SStefan Hajnoczi } 4599c501c352SStefan Hajnoczi 4600c501c352SStefan Hajnoczi /* Take into account preallocation. Nothing special is needed for 4601c501c352SStefan Hajnoczi * PREALLOC_MODE_METADATA since metadata is always counted. 4602c501c352SStefan Hajnoczi */ 4603c501c352SStefan Hajnoczi if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 4604c501c352SStefan Hajnoczi required = virtual_size; 4605c501c352SStefan Hajnoczi } 4606c501c352SStefan Hajnoczi 4607c501c352SStefan Hajnoczi info = g_new(BlockMeasureInfo, 1); 4608c501c352SStefan Hajnoczi info->fully_allocated = 4609c501c352SStefan Hajnoczi qcow2_calc_prealloc_size(virtual_size, cluster_size, 461061914f89SStefan Hajnoczi ctz32(refcount_bits)) + luks_payload_size; 4611c501c352SStefan Hajnoczi 4612c501c352SStefan Hajnoczi /* Remove data clusters that are not required. This overestimates the 4613c501c352SStefan Hajnoczi * required size because metadata needed for the fully allocated file is 4614c501c352SStefan Hajnoczi * still counted. 4615c501c352SStefan Hajnoczi */ 4616c501c352SStefan Hajnoczi info->required = info->fully_allocated - virtual_size + required; 4617c501c352SStefan Hajnoczi return info; 4618c501c352SStefan Hajnoczi 4619c501c352SStefan Hajnoczi err: 4620c501c352SStefan Hajnoczi error_propagate(errp, local_err); 4621c501c352SStefan Hajnoczi return NULL; 4622c501c352SStefan Hajnoczi } 4623c501c352SStefan Hajnoczi 46247c80ab3fSJes Sorensen static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 462520d97356SBlue Swirl { 4626ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 462795de6d70SPaolo Bonzini bdi->unallocated_blocks_are_zero = true; 462820d97356SBlue Swirl bdi->cluster_size = s->cluster_size; 46297c80ab3fSJes Sorensen bdi->vm_state_offset = qcow2_vm_state_offset(s); 463020d97356SBlue Swirl return 0; 463120d97356SBlue Swirl } 463220d97356SBlue Swirl 46331bf6e9caSAndrey Shinkevich static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, 46341bf6e9caSAndrey Shinkevich Error **errp) 463537764dfbSMax Reitz { 4636ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 46370a12f6f8SDaniel P. Berrange ImageInfoSpecific *spec_info; 46380a12f6f8SDaniel P. Berrange QCryptoBlockInfo *encrypt_info = NULL; 46391bf6e9caSAndrey Shinkevich Error *local_err = NULL; 464037764dfbSMax Reitz 46410a12f6f8SDaniel P. Berrange if (s->crypto != NULL) { 46421bf6e9caSAndrey Shinkevich encrypt_info = qcrypto_block_get_info(s->crypto, &local_err); 46431bf6e9caSAndrey Shinkevich if (local_err) { 46441bf6e9caSAndrey Shinkevich error_propagate(errp, local_err); 46451bf6e9caSAndrey Shinkevich return NULL; 46461bf6e9caSAndrey Shinkevich } 46470a12f6f8SDaniel P. Berrange } 46480a12f6f8SDaniel P. Berrange 46490a12f6f8SDaniel P. Berrange spec_info = g_new(ImageInfoSpecific, 1); 465037764dfbSMax Reitz *spec_info = (ImageInfoSpecific){ 46516a8f9661SEric Blake .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 4652b8968c87SAndrey Shinkevich .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1), 465337764dfbSMax Reitz }; 465437764dfbSMax Reitz if (s->qcow_version == 2) { 465532bafa8fSEric Blake *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 465637764dfbSMax Reitz .compat = g_strdup("0.10"), 46570709c5a1SMax Reitz .refcount_bits = s->refcount_bits, 465837764dfbSMax Reitz }; 465937764dfbSMax Reitz } else if (s->qcow_version == 3) { 4660b8968c87SAndrey Shinkevich Qcow2BitmapInfoList *bitmaps; 4661b8968c87SAndrey Shinkevich bitmaps = qcow2_get_bitmap_info_list(bs, &local_err); 4662b8968c87SAndrey Shinkevich if (local_err) { 4663b8968c87SAndrey Shinkevich error_propagate(errp, local_err); 4664b8968c87SAndrey Shinkevich qapi_free_ImageInfoSpecific(spec_info); 4665b8968c87SAndrey Shinkevich return NULL; 4666b8968c87SAndrey Shinkevich } 466732bafa8fSEric Blake *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 466837764dfbSMax Reitz .compat = g_strdup("1.1"), 466937764dfbSMax Reitz .lazy_refcounts = s->compatible_features & 467037764dfbSMax Reitz QCOW2_COMPAT_LAZY_REFCOUNTS, 467137764dfbSMax Reitz .has_lazy_refcounts = true, 46729009b196SMax Reitz .corrupt = s->incompatible_features & 46739009b196SMax Reitz QCOW2_INCOMPAT_CORRUPT, 46749009b196SMax Reitz .has_corrupt = true, 46750709c5a1SMax Reitz .refcount_bits = s->refcount_bits, 4676b8968c87SAndrey Shinkevich .has_bitmaps = !!bitmaps, 4677b8968c87SAndrey Shinkevich .bitmaps = bitmaps, 46789b890bdcSKevin Wolf .has_data_file = !!s->image_data_file, 46799b890bdcSKevin Wolf .data_file = g_strdup(s->image_data_file), 46806c3944dcSKevin Wolf .has_data_file_raw = has_data_file(bs), 46816c3944dcSKevin Wolf .data_file_raw = data_file_is_raw(bs), 468237764dfbSMax Reitz }; 4683b1fc8f93SDenis V. Lunev } else { 4684b1fc8f93SDenis V. Lunev /* if this assertion fails, this probably means a new version was 4685b1fc8f93SDenis V. Lunev * added without having it covered here */ 4686b1fc8f93SDenis V. Lunev assert(false); 468737764dfbSMax Reitz } 468837764dfbSMax Reitz 46890a12f6f8SDaniel P. Berrange if (encrypt_info) { 46900a12f6f8SDaniel P. Berrange ImageInfoSpecificQCow2Encryption *qencrypt = 46910a12f6f8SDaniel P. Berrange g_new(ImageInfoSpecificQCow2Encryption, 1); 46920a12f6f8SDaniel P. Berrange switch (encrypt_info->format) { 46930a12f6f8SDaniel P. Berrange case Q_CRYPTO_BLOCK_FORMAT_QCOW: 46940a12f6f8SDaniel P. Berrange qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 46950a12f6f8SDaniel P. Berrange break; 46960a12f6f8SDaniel P. Berrange case Q_CRYPTO_BLOCK_FORMAT_LUKS: 46970a12f6f8SDaniel P. Berrange qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 46980a12f6f8SDaniel P. Berrange qencrypt->u.luks = encrypt_info->u.luks; 46990a12f6f8SDaniel P. Berrange break; 47000a12f6f8SDaniel P. Berrange default: 47010a12f6f8SDaniel P. Berrange abort(); 47020a12f6f8SDaniel P. Berrange } 47030a12f6f8SDaniel P. Berrange /* Since we did shallow copy above, erase any pointers 47040a12f6f8SDaniel P. Berrange * in the original info */ 47050a12f6f8SDaniel P. Berrange memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 47060a12f6f8SDaniel P. Berrange qapi_free_QCryptoBlockInfo(encrypt_info); 47070a12f6f8SDaniel P. Berrange 47080a12f6f8SDaniel P. Berrange spec_info->u.qcow2.data->has_encrypt = true; 47090a12f6f8SDaniel P. Berrange spec_info->u.qcow2.data->encrypt = qencrypt; 47100a12f6f8SDaniel P. Berrange } 47110a12f6f8SDaniel P. Berrange 471237764dfbSMax Reitz return spec_info; 471337764dfbSMax Reitz } 471437764dfbSMax Reitz 4715cf8074b3SKevin Wolf static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4716cf8074b3SKevin Wolf int64_t pos) 471720d97356SBlue Swirl { 4718ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 471920d97356SBlue Swirl 472066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 4721734a7758SKevin Wolf return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, 4722734a7758SKevin Wolf qiov->size, qiov, 0); 472320d97356SBlue Swirl } 472420d97356SBlue Swirl 47255ddda0b8SKevin Wolf static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 47265ddda0b8SKevin Wolf int64_t pos) 472720d97356SBlue Swirl { 4728ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 472920d97356SBlue Swirl 473066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 4731734a7758SKevin Wolf return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, 4732734a7758SKevin Wolf qiov->size, qiov, 0); 473320d97356SBlue Swirl } 473420d97356SBlue Swirl 47359296b3edSMax Reitz /* 47369296b3edSMax Reitz * Downgrades an image's version. To achieve this, any incompatible features 47379296b3edSMax Reitz * have to be removed. 47389296b3edSMax Reitz */ 47394057a2b2SMax Reitz static int qcow2_downgrade(BlockDriverState *bs, int target_version, 4740d1402b50SMax Reitz BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 4741d1402b50SMax Reitz Error **errp) 47429296b3edSMax Reitz { 4743ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 47449296b3edSMax Reitz int current_version = s->qcow_version; 47459296b3edSMax Reitz int ret; 47469296b3edSMax Reitz 4747d1402b50SMax Reitz /* This is qcow2_downgrade(), not qcow2_upgrade() */ 4748d1402b50SMax Reitz assert(target_version < current_version); 4749d1402b50SMax Reitz 4750d1402b50SMax Reitz /* There are no other versions (now) that you can downgrade to */ 4751d1402b50SMax Reitz assert(target_version == 2); 47529296b3edSMax Reitz 47539296b3edSMax Reitz if (s->refcount_order != 4) { 4754d1402b50SMax Reitz error_setg(errp, "compat=0.10 requires refcount_bits=16"); 47559296b3edSMax Reitz return -ENOTSUP; 47569296b3edSMax Reitz } 47579296b3edSMax Reitz 4758966b000fSKevin Wolf if (has_data_file(bs)) { 4759966b000fSKevin Wolf error_setg(errp, "Cannot downgrade an image with a data file"); 4760966b000fSKevin Wolf return -ENOTSUP; 4761966b000fSKevin Wolf } 4762966b000fSKevin Wolf 47639296b3edSMax Reitz /* clear incompatible features */ 47649296b3edSMax Reitz if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 47659296b3edSMax Reitz ret = qcow2_mark_clean(bs); 47669296b3edSMax Reitz if (ret < 0) { 4767d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to make the image clean"); 47689296b3edSMax Reitz return ret; 47699296b3edSMax Reitz } 47709296b3edSMax Reitz } 47719296b3edSMax Reitz 47729296b3edSMax Reitz /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 47739296b3edSMax Reitz * the first place; if that happens nonetheless, returning -ENOTSUP is the 47749296b3edSMax Reitz * best thing to do anyway */ 47759296b3edSMax Reitz 47769296b3edSMax Reitz if (s->incompatible_features) { 4777d1402b50SMax Reitz error_setg(errp, "Cannot downgrade an image with incompatible features " 4778d1402b50SMax Reitz "%#" PRIx64 " set", s->incompatible_features); 47799296b3edSMax Reitz return -ENOTSUP; 47809296b3edSMax Reitz } 47819296b3edSMax Reitz 47829296b3edSMax Reitz /* since we can ignore compatible features, we can set them to 0 as well */ 47839296b3edSMax Reitz s->compatible_features = 0; 47849296b3edSMax Reitz /* if lazy refcounts have been used, they have already been fixed through 47859296b3edSMax Reitz * clearing the dirty flag */ 47869296b3edSMax Reitz 47879296b3edSMax Reitz /* clearing autoclear features is trivial */ 47889296b3edSMax Reitz s->autoclear_features = 0; 47899296b3edSMax Reitz 47908b13976dSMax Reitz ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 47919296b3edSMax Reitz if (ret < 0) { 4792d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); 47939296b3edSMax Reitz return ret; 47949296b3edSMax Reitz } 47959296b3edSMax Reitz 47969296b3edSMax Reitz s->qcow_version = target_version; 47979296b3edSMax Reitz ret = qcow2_update_header(bs); 47989296b3edSMax Reitz if (ret < 0) { 47999296b3edSMax Reitz s->qcow_version = current_version; 4800d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 48019296b3edSMax Reitz return ret; 48029296b3edSMax Reitz } 48039296b3edSMax Reitz return 0; 48049296b3edSMax Reitz } 48059296b3edSMax Reitz 4806c293a809SMax Reitz typedef enum Qcow2AmendOperation { 4807c293a809SMax Reitz /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 4808c293a809SMax Reitz * statically initialized to so that the helper CB can discern the first 4809c293a809SMax Reitz * invocation from an operation change */ 4810c293a809SMax Reitz QCOW2_NO_OPERATION = 0, 4811c293a809SMax Reitz 481261ce55fcSMax Reitz QCOW2_CHANGING_REFCOUNT_ORDER, 4813c293a809SMax Reitz QCOW2_DOWNGRADING, 4814c293a809SMax Reitz } Qcow2AmendOperation; 4815c293a809SMax Reitz 4816c293a809SMax Reitz typedef struct Qcow2AmendHelperCBInfo { 4817c293a809SMax Reitz /* The code coordinating the amend operations should only modify 4818c293a809SMax Reitz * these four fields; the rest will be managed by the CB */ 4819c293a809SMax Reitz BlockDriverAmendStatusCB *original_status_cb; 4820c293a809SMax Reitz void *original_cb_opaque; 4821c293a809SMax Reitz 4822c293a809SMax Reitz Qcow2AmendOperation current_operation; 4823c293a809SMax Reitz 4824c293a809SMax Reitz /* Total number of operations to perform (only set once) */ 4825c293a809SMax Reitz int total_operations; 4826c293a809SMax Reitz 4827c293a809SMax Reitz /* The following fields are managed by the CB */ 4828c293a809SMax Reitz 4829c293a809SMax Reitz /* Number of operations completed */ 4830c293a809SMax Reitz int operations_completed; 4831c293a809SMax Reitz 4832c293a809SMax Reitz /* Cumulative offset of all completed operations */ 4833c293a809SMax Reitz int64_t offset_completed; 4834c293a809SMax Reitz 4835c293a809SMax Reitz Qcow2AmendOperation last_operation; 4836c293a809SMax Reitz int64_t last_work_size; 4837c293a809SMax Reitz } Qcow2AmendHelperCBInfo; 4838c293a809SMax Reitz 4839c293a809SMax Reitz static void qcow2_amend_helper_cb(BlockDriverState *bs, 4840c293a809SMax Reitz int64_t operation_offset, 4841c293a809SMax Reitz int64_t operation_work_size, void *opaque) 4842c293a809SMax Reitz { 4843c293a809SMax Reitz Qcow2AmendHelperCBInfo *info = opaque; 4844c293a809SMax Reitz int64_t current_work_size; 4845c293a809SMax Reitz int64_t projected_work_size; 4846c293a809SMax Reitz 4847c293a809SMax Reitz if (info->current_operation != info->last_operation) { 4848c293a809SMax Reitz if (info->last_operation != QCOW2_NO_OPERATION) { 4849c293a809SMax Reitz info->offset_completed += info->last_work_size; 4850c293a809SMax Reitz info->operations_completed++; 4851c293a809SMax Reitz } 4852c293a809SMax Reitz 4853c293a809SMax Reitz info->last_operation = info->current_operation; 4854c293a809SMax Reitz } 4855c293a809SMax Reitz 4856c293a809SMax Reitz assert(info->total_operations > 0); 4857c293a809SMax Reitz assert(info->operations_completed < info->total_operations); 4858c293a809SMax Reitz 4859c293a809SMax Reitz info->last_work_size = operation_work_size; 4860c293a809SMax Reitz 4861c293a809SMax Reitz current_work_size = info->offset_completed + operation_work_size; 4862c293a809SMax Reitz 4863c293a809SMax Reitz /* current_work_size is the total work size for (operations_completed + 1) 4864c293a809SMax Reitz * operations (which includes this one), so multiply it by the number of 4865c293a809SMax Reitz * operations not covered and divide it by the number of operations 4866c293a809SMax Reitz * covered to get a projection for the operations not covered */ 4867c293a809SMax Reitz projected_work_size = current_work_size * (info->total_operations - 4868c293a809SMax Reitz info->operations_completed - 1) 4869c293a809SMax Reitz / (info->operations_completed + 1); 4870c293a809SMax Reitz 4871c293a809SMax Reitz info->original_status_cb(bs, info->offset_completed + operation_offset, 4872c293a809SMax Reitz current_work_size + projected_work_size, 4873c293a809SMax Reitz info->original_cb_opaque); 4874c293a809SMax Reitz } 4875c293a809SMax Reitz 487677485434SMax Reitz static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 48778b13976dSMax Reitz BlockDriverAmendStatusCB *status_cb, 4878d1402b50SMax Reitz void *cb_opaque, 4879d1402b50SMax Reitz Error **errp) 48809296b3edSMax Reitz { 4881ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 48829296b3edSMax Reitz int old_version = s->qcow_version, new_version = old_version; 48839296b3edSMax Reitz uint64_t new_size = 0; 48849b890bdcSKevin Wolf const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL; 48859296b3edSMax Reitz bool lazy_refcounts = s->use_lazy_refcounts; 48866c3944dcSKevin Wolf bool data_file_raw = data_file_is_raw(bs); 48871bd0e2d1SChunyan Liu const char *compat = NULL; 48881bd0e2d1SChunyan Liu uint64_t cluster_size = s->cluster_size; 48891bd0e2d1SChunyan Liu bool encrypt; 48904652b8f3SDaniel P. Berrange int encformat; 489161ce55fcSMax Reitz int refcount_bits = s->refcount_bits; 48929296b3edSMax Reitz int ret; 48931bd0e2d1SChunyan Liu QemuOptDesc *desc = opts->list->desc; 4894c293a809SMax Reitz Qcow2AmendHelperCBInfo helper_cb_info; 48959296b3edSMax Reitz 48961bd0e2d1SChunyan Liu while (desc && desc->name) { 48971bd0e2d1SChunyan Liu if (!qemu_opt_find(opts, desc->name)) { 48989296b3edSMax Reitz /* only change explicitly defined options */ 48991bd0e2d1SChunyan Liu desc++; 49009296b3edSMax Reitz continue; 49019296b3edSMax Reitz } 49029296b3edSMax Reitz 49038a17b83cSMax Reitz if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 49048a17b83cSMax Reitz compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 49051bd0e2d1SChunyan Liu if (!compat) { 49069296b3edSMax Reitz /* preserve default */ 49071bd0e2d1SChunyan Liu } else if (!strcmp(compat, "0.10")) { 49089296b3edSMax Reitz new_version = 2; 49091bd0e2d1SChunyan Liu } else if (!strcmp(compat, "1.1")) { 49109296b3edSMax Reitz new_version = 3; 49119296b3edSMax Reitz } else { 4912d1402b50SMax Reitz error_setg(errp, "Unknown compatibility level %s", compat); 49139296b3edSMax Reitz return -EINVAL; 49149296b3edSMax Reitz } 49158a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 4916d1402b50SMax Reitz error_setg(errp, "Cannot change preallocation mode"); 49179296b3edSMax Reitz return -ENOTSUP; 49188a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 49198a17b83cSMax Reitz new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 49208a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 49218a17b83cSMax Reitz backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 49228a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 49238a17b83cSMax Reitz backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 49248a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 49258a17b83cSMax Reitz encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 4926b25b387fSDaniel P. Berrange !!s->crypto); 4927f6fa64f6SDaniel P. Berrange 4928b25b387fSDaniel P. Berrange if (encrypt != !!s->crypto) { 4929d1402b50SMax Reitz error_setg(errp, 4930d1402b50SMax Reitz "Changing the encryption flag is not supported"); 49319296b3edSMax Reitz return -ENOTSUP; 49329296b3edSMax Reitz } 49334652b8f3SDaniel P. Berrange } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { 49344652b8f3SDaniel P. Berrange encformat = qcow2_crypt_method_from_format( 49354652b8f3SDaniel P. Berrange qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); 49364652b8f3SDaniel P. Berrange 49374652b8f3SDaniel P. Berrange if (encformat != s->crypt_method_header) { 4938d1402b50SMax Reitz error_setg(errp, 4939d1402b50SMax Reitz "Changing the encryption format is not supported"); 49404652b8f3SDaniel P. Berrange return -ENOTSUP; 49414652b8f3SDaniel P. Berrange } 4942f66afbe2SDaniel P. Berrange } else if (g_str_has_prefix(desc->name, "encrypt.")) { 4943d1402b50SMax Reitz error_setg(errp, 4944d1402b50SMax Reitz "Changing the encryption parameters is not supported"); 4945f66afbe2SDaniel P. Berrange return -ENOTSUP; 49468a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 49478a17b83cSMax Reitz cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 49481bd0e2d1SChunyan Liu cluster_size); 49491bd0e2d1SChunyan Liu if (cluster_size != s->cluster_size) { 4950d1402b50SMax Reitz error_setg(errp, "Changing the cluster size is not supported"); 49519296b3edSMax Reitz return -ENOTSUP; 49529296b3edSMax Reitz } 49538a17b83cSMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 49548a17b83cSMax Reitz lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 49551bd0e2d1SChunyan Liu lazy_refcounts); 495606d05fa7SMax Reitz } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 495761ce55fcSMax Reitz refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 495861ce55fcSMax Reitz refcount_bits); 495961ce55fcSMax Reitz 496061ce55fcSMax Reitz if (refcount_bits <= 0 || refcount_bits > 64 || 496161ce55fcSMax Reitz !is_power_of_2(refcount_bits)) 496261ce55fcSMax Reitz { 4963d1402b50SMax Reitz error_setg(errp, "Refcount width must be a power of two and " 4964d1402b50SMax Reitz "may not exceed 64 bits"); 496561ce55fcSMax Reitz return -EINVAL; 496661ce55fcSMax Reitz } 49679b890bdcSKevin Wolf } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) { 49689b890bdcSKevin Wolf data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE); 49699b890bdcSKevin Wolf if (data_file && !has_data_file(bs)) { 49709b890bdcSKevin Wolf error_setg(errp, "data-file can only be set for images that " 49719b890bdcSKevin Wolf "use an external data file"); 49729b890bdcSKevin Wolf return -EINVAL; 49739b890bdcSKevin Wolf } 49746c3944dcSKevin Wolf } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) { 49756c3944dcSKevin Wolf data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW, 49766c3944dcSKevin Wolf data_file_raw); 49776c3944dcSKevin Wolf if (data_file_raw && !data_file_is_raw(bs)) { 49786c3944dcSKevin Wolf error_setg(errp, "data-file-raw cannot be set on existing " 49796c3944dcSKevin Wolf "images"); 49806c3944dcSKevin Wolf return -EINVAL; 49816c3944dcSKevin Wolf } 49829296b3edSMax Reitz } else { 4983164e0f89SMax Reitz /* if this point is reached, this probably means a new option was 49849296b3edSMax Reitz * added without having it covered here */ 4985164e0f89SMax Reitz abort(); 49869296b3edSMax Reitz } 49871bd0e2d1SChunyan Liu 49881bd0e2d1SChunyan Liu desc++; 49899296b3edSMax Reitz } 49909296b3edSMax Reitz 4991c293a809SMax Reitz helper_cb_info = (Qcow2AmendHelperCBInfo){ 4992c293a809SMax Reitz .original_status_cb = status_cb, 4993c293a809SMax Reitz .original_cb_opaque = cb_opaque, 4994c293a809SMax Reitz .total_operations = (new_version < old_version) 499561ce55fcSMax Reitz + (s->refcount_bits != refcount_bits) 4996c293a809SMax Reitz }; 4997c293a809SMax Reitz 49981038bbb8SMax Reitz /* Upgrade first (some features may require compat=1.1) */ 49999296b3edSMax Reitz if (new_version > old_version) { 50009296b3edSMax Reitz s->qcow_version = new_version; 50019296b3edSMax Reitz ret = qcow2_update_header(bs); 50029296b3edSMax Reitz if (ret < 0) { 50039296b3edSMax Reitz s->qcow_version = old_version; 5004d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 50059296b3edSMax Reitz return ret; 50069296b3edSMax Reitz } 50079296b3edSMax Reitz } 50089296b3edSMax Reitz 500961ce55fcSMax Reitz if (s->refcount_bits != refcount_bits) { 501061ce55fcSMax Reitz int refcount_order = ctz32(refcount_bits); 501161ce55fcSMax Reitz 501261ce55fcSMax Reitz if (new_version < 3 && refcount_bits != 16) { 5013d1402b50SMax Reitz error_setg(errp, "Refcount widths other than 16 bits require " 501461ce55fcSMax Reitz "compatibility level 1.1 or above (use compat=1.1 or " 501561ce55fcSMax Reitz "greater)"); 501661ce55fcSMax Reitz return -EINVAL; 501761ce55fcSMax Reitz } 501861ce55fcSMax Reitz 501961ce55fcSMax Reitz helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 502061ce55fcSMax Reitz ret = qcow2_change_refcount_order(bs, refcount_order, 502161ce55fcSMax Reitz &qcow2_amend_helper_cb, 5022d1402b50SMax Reitz &helper_cb_info, errp); 502361ce55fcSMax Reitz if (ret < 0) { 502461ce55fcSMax Reitz return ret; 502561ce55fcSMax Reitz } 502661ce55fcSMax Reitz } 502761ce55fcSMax Reitz 50286c3944dcSKevin Wolf /* data-file-raw blocks backing files, so clear it first if requested */ 50296c3944dcSKevin Wolf if (data_file_raw) { 50306c3944dcSKevin Wolf s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW; 50316c3944dcSKevin Wolf } else { 50326c3944dcSKevin Wolf s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW; 50336c3944dcSKevin Wolf } 50346c3944dcSKevin Wolf 50359b890bdcSKevin Wolf if (data_file) { 50369b890bdcSKevin Wolf g_free(s->image_data_file); 50379b890bdcSKevin Wolf s->image_data_file = *data_file ? g_strdup(data_file) : NULL; 50389b890bdcSKevin Wolf } 50399b890bdcSKevin Wolf 50409b890bdcSKevin Wolf ret = qcow2_update_header(bs); 50419b890bdcSKevin Wolf if (ret < 0) { 50429b890bdcSKevin Wolf error_setg_errno(errp, -ret, "Failed to update the image header"); 50439b890bdcSKevin Wolf return ret; 50449b890bdcSKevin Wolf } 50459b890bdcSKevin Wolf 50469296b3edSMax Reitz if (backing_file || backing_format) { 5047e4603fe1SKevin Wolf ret = qcow2_change_backing_file(bs, 5048e4603fe1SKevin Wolf backing_file ?: s->image_backing_file, 5049e4603fe1SKevin Wolf backing_format ?: s->image_backing_format); 50509296b3edSMax Reitz if (ret < 0) { 5051d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to change the backing file"); 50529296b3edSMax Reitz return ret; 50539296b3edSMax Reitz } 50549296b3edSMax Reitz } 50559296b3edSMax Reitz 50569296b3edSMax Reitz if (s->use_lazy_refcounts != lazy_refcounts) { 50579296b3edSMax Reitz if (lazy_refcounts) { 50581038bbb8SMax Reitz if (new_version < 3) { 5059d1402b50SMax Reitz error_setg(errp, "Lazy refcounts only supported with " 5060d1402b50SMax Reitz "compatibility level 1.1 and above (use compat=1.1 " 5061d1402b50SMax Reitz "or greater)"); 50629296b3edSMax Reitz return -EINVAL; 50639296b3edSMax Reitz } 50649296b3edSMax Reitz s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 50659296b3edSMax Reitz ret = qcow2_update_header(bs); 50669296b3edSMax Reitz if (ret < 0) { 50679296b3edSMax Reitz s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 5068d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 50699296b3edSMax Reitz return ret; 50709296b3edSMax Reitz } 50719296b3edSMax Reitz s->use_lazy_refcounts = true; 50729296b3edSMax Reitz } else { 50739296b3edSMax Reitz /* make image clean first */ 50749296b3edSMax Reitz ret = qcow2_mark_clean(bs); 50759296b3edSMax Reitz if (ret < 0) { 5076d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to make the image clean"); 50779296b3edSMax Reitz return ret; 50789296b3edSMax Reitz } 50799296b3edSMax Reitz /* now disallow lazy refcounts */ 50809296b3edSMax Reitz s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 50819296b3edSMax Reitz ret = qcow2_update_header(bs); 50829296b3edSMax Reitz if (ret < 0) { 50839296b3edSMax Reitz s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 5084d1402b50SMax Reitz error_setg_errno(errp, -ret, "Failed to update the image header"); 50859296b3edSMax Reitz return ret; 50869296b3edSMax Reitz } 50879296b3edSMax Reitz s->use_lazy_refcounts = false; 50889296b3edSMax Reitz } 50899296b3edSMax Reitz } 50909296b3edSMax Reitz 50919296b3edSMax Reitz if (new_size) { 50926d0eb64dSKevin Wolf BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); 5093d1402b50SMax Reitz ret = blk_insert_bs(blk, bs, errp); 5094d7086422SKevin Wolf if (ret < 0) { 5095d7086422SKevin Wolf blk_unref(blk); 5096d7086422SKevin Wolf return ret; 5097d7086422SKevin Wolf } 5098d7086422SKevin Wolf 5099d1402b50SMax Reitz ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp); 510070b27f36SKevin Wolf blk_unref(blk); 51019296b3edSMax Reitz if (ret < 0) { 51029296b3edSMax Reitz return ret; 51039296b3edSMax Reitz } 51049296b3edSMax Reitz } 51059296b3edSMax Reitz 51061038bbb8SMax Reitz /* Downgrade last (so unsupported features can be removed before) */ 51071038bbb8SMax Reitz if (new_version < old_version) { 5108c293a809SMax Reitz helper_cb_info.current_operation = QCOW2_DOWNGRADING; 5109c293a809SMax Reitz ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 5110d1402b50SMax Reitz &helper_cb_info, errp); 51111038bbb8SMax Reitz if (ret < 0) { 51121038bbb8SMax Reitz return ret; 51131038bbb8SMax Reitz } 51141038bbb8SMax Reitz } 51151038bbb8SMax Reitz 51169296b3edSMax Reitz return 0; 51179296b3edSMax Reitz } 51189296b3edSMax Reitz 511985186ebdSMax Reitz /* 512085186ebdSMax Reitz * If offset or size are negative, respectively, they will not be included in 512185186ebdSMax Reitz * the BLOCK_IMAGE_CORRUPTED event emitted. 512285186ebdSMax Reitz * fatal will be ignored for read-only BDS; corruptions found there will always 512385186ebdSMax Reitz * be considered non-fatal. 512485186ebdSMax Reitz */ 512585186ebdSMax Reitz void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 512685186ebdSMax Reitz int64_t size, const char *message_format, ...) 512785186ebdSMax Reitz { 5128ff99129aSKevin Wolf BDRVQcow2State *s = bs->opaque; 5129dc881b44SAlberto Garcia const char *node_name; 513085186ebdSMax Reitz char *message; 513185186ebdSMax Reitz va_list ap; 513285186ebdSMax Reitz 5133ddf3b47eSMax Reitz fatal = fatal && bdrv_is_writable(bs); 513485186ebdSMax Reitz 513585186ebdSMax Reitz if (s->signaled_corruption && 513685186ebdSMax Reitz (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 513785186ebdSMax Reitz { 513885186ebdSMax Reitz return; 513985186ebdSMax Reitz } 514085186ebdSMax Reitz 514185186ebdSMax Reitz va_start(ap, message_format); 514285186ebdSMax Reitz message = g_strdup_vprintf(message_format, ap); 514385186ebdSMax Reitz va_end(ap); 514485186ebdSMax Reitz 514585186ebdSMax Reitz if (fatal) { 514685186ebdSMax Reitz fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 514785186ebdSMax Reitz "corruption events will be suppressed\n", message); 514885186ebdSMax Reitz } else { 514985186ebdSMax Reitz fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 515085186ebdSMax Reitz "corruption events will be suppressed\n", message); 515185186ebdSMax Reitz } 515285186ebdSMax Reitz 5153dc881b44SAlberto Garcia node_name = bdrv_get_node_name(bs); 5154dc881b44SAlberto Garcia qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 5155dc881b44SAlberto Garcia *node_name != '\0', node_name, 5156dc881b44SAlberto Garcia message, offset >= 0, offset, 5157dc881b44SAlberto Garcia size >= 0, size, 51583ab72385SPeter Xu fatal); 515985186ebdSMax Reitz g_free(message); 516085186ebdSMax Reitz 516185186ebdSMax Reitz if (fatal) { 516285186ebdSMax Reitz qcow2_mark_corrupt(bs); 516385186ebdSMax Reitz bs->drv = NULL; /* make BDS unusable */ 516485186ebdSMax Reitz } 516585186ebdSMax Reitz 516685186ebdSMax Reitz s->signaled_corruption = true; 516785186ebdSMax Reitz } 516885186ebdSMax Reitz 51691bd0e2d1SChunyan Liu static QemuOptsList qcow2_create_opts = { 51701bd0e2d1SChunyan Liu .name = "qcow2-create-opts", 51711bd0e2d1SChunyan Liu .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 51721bd0e2d1SChunyan Liu .desc = { 517320d97356SBlue Swirl { 517420d97356SBlue Swirl .name = BLOCK_OPT_SIZE, 51751bd0e2d1SChunyan Liu .type = QEMU_OPT_SIZE, 517620d97356SBlue Swirl .help = "Virtual disk size" 517720d97356SBlue Swirl }, 517820d97356SBlue Swirl { 51796744cbabSKevin Wolf .name = BLOCK_OPT_COMPAT_LEVEL, 51801bd0e2d1SChunyan Liu .type = QEMU_OPT_STRING, 51816744cbabSKevin Wolf .help = "Compatibility level (0.10 or 1.1)" 51826744cbabSKevin Wolf }, 51836744cbabSKevin Wolf { 518420d97356SBlue Swirl .name = BLOCK_OPT_BACKING_FILE, 51851bd0e2d1SChunyan Liu .type = QEMU_OPT_STRING, 518620d97356SBlue Swirl .help = "File name of a base image" 518720d97356SBlue Swirl }, 518820d97356SBlue Swirl { 518920d97356SBlue Swirl .name = BLOCK_OPT_BACKING_FMT, 51901bd0e2d1SChunyan Liu .type = QEMU_OPT_STRING, 519120d97356SBlue Swirl .help = "Image format of the base image" 519220d97356SBlue Swirl }, 519320d97356SBlue Swirl { 51949b890bdcSKevin Wolf .name = BLOCK_OPT_DATA_FILE, 51959b890bdcSKevin Wolf .type = QEMU_OPT_STRING, 51969b890bdcSKevin Wolf .help = "File name of an external data file" 51979b890bdcSKevin Wolf }, 51989b890bdcSKevin Wolf { 51996c3944dcSKevin Wolf .name = BLOCK_OPT_DATA_FILE_RAW, 52006c3944dcSKevin Wolf .type = QEMU_OPT_BOOL, 52016c3944dcSKevin Wolf .help = "The external data file must stay valid as a raw image" 52026c3944dcSKevin Wolf }, 52036c3944dcSKevin Wolf { 520420d97356SBlue Swirl .name = BLOCK_OPT_ENCRYPT, 52051bd0e2d1SChunyan Liu .type = QEMU_OPT_BOOL, 52060cb8d47bSDaniel P. Berrange .help = "Encrypt the image with format 'aes'. (Deprecated " 52070cb8d47bSDaniel P. Berrange "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 52080cb8d47bSDaniel P. Berrange }, 52090cb8d47bSDaniel P. Berrange { 52100cb8d47bSDaniel P. Berrange .name = BLOCK_OPT_ENCRYPT_FORMAT, 52110cb8d47bSDaniel P. Berrange .type = QEMU_OPT_STRING, 52124652b8f3SDaniel P. Berrange .help = "Encrypt the image, format choices: 'aes', 'luks'", 521320d97356SBlue Swirl }, 52144652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 52154652b8f3SDaniel P. Berrange "ID of secret providing qcow AES key or LUKS passphrase"), 52164652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), 52174652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), 52184652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), 52194652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), 52204652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), 52214652b8f3SDaniel P. Berrange BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 522220d97356SBlue Swirl { 522320d97356SBlue Swirl .name = BLOCK_OPT_CLUSTER_SIZE, 52241bd0e2d1SChunyan Liu .type = QEMU_OPT_SIZE, 522599cce9faSKevin Wolf .help = "qcow2 cluster size", 52261bd0e2d1SChunyan Liu .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 522720d97356SBlue Swirl }, 522820d97356SBlue Swirl { 522920d97356SBlue Swirl .name = BLOCK_OPT_PREALLOC, 52301bd0e2d1SChunyan Liu .type = QEMU_OPT_STRING, 52310e4271b7SHu Tao .help = "Preallocation mode (allowed values: off, metadata, " 52320e4271b7SHu Tao "falloc, full)" 523320d97356SBlue Swirl }, 5234bfe8043eSStefan Hajnoczi { 5235bfe8043eSStefan Hajnoczi .name = BLOCK_OPT_LAZY_REFCOUNTS, 52361bd0e2d1SChunyan Liu .type = QEMU_OPT_BOOL, 5237bfe8043eSStefan Hajnoczi .help = "Postpone refcount updates", 52381bd0e2d1SChunyan Liu .def_value_str = "off" 5239bfe8043eSStefan Hajnoczi }, 524006d05fa7SMax Reitz { 524106d05fa7SMax Reitz .name = BLOCK_OPT_REFCOUNT_BITS, 524206d05fa7SMax Reitz .type = QEMU_OPT_NUMBER, 524306d05fa7SMax Reitz .help = "Width of a reference count entry in bits", 524406d05fa7SMax Reitz .def_value_str = "16" 524506d05fa7SMax Reitz }, 52461bd0e2d1SChunyan Liu { /* end of list */ } 52471bd0e2d1SChunyan Liu } 524820d97356SBlue Swirl }; 524920d97356SBlue Swirl 52502654267cSMax Reitz static const char *const qcow2_strong_runtime_opts[] = { 52512654267cSMax Reitz "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, 52522654267cSMax Reitz 52532654267cSMax Reitz NULL 52542654267cSMax Reitz }; 52552654267cSMax Reitz 52565f535a94SMax Reitz BlockDriver bdrv_qcow2 = { 525720d97356SBlue Swirl .format_name = "qcow2", 5258ff99129aSKevin Wolf .instance_size = sizeof(BDRVQcow2State), 52597c80ab3fSJes Sorensen .bdrv_probe = qcow2_probe, 52607c80ab3fSJes Sorensen .bdrv_open = qcow2_open, 52617c80ab3fSJes Sorensen .bdrv_close = qcow2_close, 526221d82ac9SJeff Cody .bdrv_reopen_prepare = qcow2_reopen_prepare, 52635b0959a7SKevin Wolf .bdrv_reopen_commit = qcow2_reopen_commit, 52645b0959a7SKevin Wolf .bdrv_reopen_abort = qcow2_reopen_abort, 52655365f44dSKevin Wolf .bdrv_join_options = qcow2_join_options, 5266862f215fSKevin Wolf .bdrv_child_perm = bdrv_format_default_perms, 5267efc75e2aSStefan Hajnoczi .bdrv_co_create_opts = qcow2_co_create_opts, 5268b0292b85SKevin Wolf .bdrv_co_create = qcow2_co_create, 52693ac21627SPeter Lieven .bdrv_has_zero_init = bdrv_has_zero_init_1, 5270a320fb04SEric Blake .bdrv_co_block_status = qcow2_co_block_status, 527120d97356SBlue Swirl 5272ecfe1863SKevin Wolf .bdrv_co_preadv = qcow2_co_preadv, 5273d46a0bb2SKevin Wolf .bdrv_co_pwritev = qcow2_co_pwritev, 5274eb489bb1SKevin Wolf .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 5275419b19d9SStefan Hajnoczi 52765544b59fSEric Blake .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 527782e8a788SEric Blake .bdrv_co_pdiscard = qcow2_co_pdiscard, 5278fd9fcd37SFam Zheng .bdrv_co_copy_range_from = qcow2_co_copy_range_from, 5279fd9fcd37SFam Zheng .bdrv_co_copy_range_to = qcow2_co_copy_range_to, 5280061ca8a3SKevin Wolf .bdrv_co_truncate = qcow2_co_truncate, 5281fcccefc5SPavel Butsykin .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, 5282491d27e2SMax Reitz .bdrv_make_empty = qcow2_make_empty, 528320d97356SBlue Swirl 528420d97356SBlue Swirl .bdrv_snapshot_create = qcow2_snapshot_create, 528520d97356SBlue Swirl .bdrv_snapshot_goto = qcow2_snapshot_goto, 528620d97356SBlue Swirl .bdrv_snapshot_delete = qcow2_snapshot_delete, 528720d97356SBlue Swirl .bdrv_snapshot_list = qcow2_snapshot_list, 528851ef6727Sedison .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 5289c501c352SStefan Hajnoczi .bdrv_measure = qcow2_measure, 52907c80ab3fSJes Sorensen .bdrv_get_info = qcow2_get_info, 529137764dfbSMax Reitz .bdrv_get_specific_info = qcow2_get_specific_info, 529220d97356SBlue Swirl 52937c80ab3fSJes Sorensen .bdrv_save_vmstate = qcow2_save_vmstate, 52947c80ab3fSJes Sorensen .bdrv_load_vmstate = qcow2_load_vmstate, 529520d97356SBlue Swirl 52968ee79e70SKevin Wolf .supports_backing = true, 529720d97356SBlue Swirl .bdrv_change_backing_file = qcow2_change_backing_file, 529820d97356SBlue Swirl 5299d34682cdSKevin Wolf .bdrv_refresh_limits = qcow2_refresh_limits, 53002b148f39SPaolo Bonzini .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 5301ec6d8912SKevin Wolf .bdrv_inactivate = qcow2_inactivate, 530206d9260fSAnthony Liguori 53031bd0e2d1SChunyan Liu .create_opts = &qcow2_create_opts, 53042654267cSMax Reitz .strong_runtime_opts = qcow2_strong_runtime_opts, 53058a2ce0bcSAlberto Garcia .mutable_opts = mutable_opts, 53062fd61638SPaolo Bonzini .bdrv_co_check = qcow2_co_check, 5307c282e1fdSChunyan Liu .bdrv_amend_options = qcow2_amend_options, 5308279621c0SAlberto Garcia 5309279621c0SAlberto Garcia .bdrv_detach_aio_context = qcow2_detach_aio_context, 5310279621c0SAlberto Garcia .bdrv_attach_aio_context = qcow2_attach_aio_context, 53111b6b0562SVladimir Sementsov-Ogievskiy 53121b6b0562SVladimir Sementsov-Ogievskiy .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, 5313da0eb242SVladimir Sementsov-Ogievskiy .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, 5314469c71edSVladimir Sementsov-Ogievskiy .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, 531520d97356SBlue Swirl }; 531620d97356SBlue Swirl 53175efa9d5aSAnthony Liguori static void bdrv_qcow2_init(void) 53185efa9d5aSAnthony Liguori { 53195efa9d5aSAnthony Liguori bdrv_register(&bdrv_qcow2); 53205efa9d5aSAnthony Liguori } 53215efa9d5aSAnthony Liguori 53225efa9d5aSAnthony Liguori block_init(bdrv_qcow2_init); 5323