1585f8587Sbellard /* 2585f8587Sbellard * Block driver for the QCOW version 2 format 3585f8587Sbellard * 4585f8587Sbellard * Copyright (c) 2004-2006 Fabrice Bellard 5585f8587Sbellard * 6585f8587Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7585f8587Sbellard * of this software and associated documentation files (the "Software"), to deal 8585f8587Sbellard * in the Software without restriction, including without limitation the rights 9585f8587Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10585f8587Sbellard * copies of the Software, and to permit persons to whom the Software is 11585f8587Sbellard * furnished to do so, subject to the following conditions: 12585f8587Sbellard * 13585f8587Sbellard * The above copyright notice and this permission notice shall be included in 14585f8587Sbellard * all copies or substantial portions of the Software. 15585f8587Sbellard * 16585f8587Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17585f8587Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18585f8587Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19585f8587Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20585f8587Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21585f8587Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22585f8587Sbellard * THE SOFTWARE. 23585f8587Sbellard */ 24faf07963Spbrook #include "qemu-common.h" 25585f8587Sbellard #include "block_int.h" 265efa9d5aSAnthony Liguori #include "module.h" 27585f8587Sbellard #include <zlib.h> 28585f8587Sbellard #include "aes.h" 29f7d0fe02SKevin Wolf #include "block/qcow2.h" 30a9420734SKevin Wolf #include "qemu-error.h" 31585f8587Sbellard 32585f8587Sbellard /* 33585f8587Sbellard Differences with QCOW: 34585f8587Sbellard 35585f8587Sbellard - Support for multiple incremental snapshots. 36585f8587Sbellard - Memory management by reference counts. 37585f8587Sbellard - Clusters which have a reference count of one have the bit 38585f8587Sbellard QCOW_OFLAG_COPIED to optimize write performance. 39585f8587Sbellard - Size of compressed clusters is stored in sectors to reduce bit usage 40585f8587Sbellard in the cluster offsets. 41585f8587Sbellard - Support for storing additional data (such as the VM state) in the 42585f8587Sbellard snapshots. 43585f8587Sbellard - If a backing store is used, the cluster size is not constrained 44585f8587Sbellard (could be backported to QCOW). 45585f8587Sbellard - L2 tables have always a size of one cluster. 46585f8587Sbellard */ 47585f8587Sbellard 489b80ddf3Saliguori 499b80ddf3Saliguori typedef struct { 509b80ddf3Saliguori uint32_t magic; 519b80ddf3Saliguori uint32_t len; 529b80ddf3Saliguori } QCowExtension; 537c80ab3fSJes Sorensen #define QCOW2_EXT_MAGIC_END 0 547c80ab3fSJes Sorensen #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 559b80ddf3Saliguori 567c80ab3fSJes Sorensen static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 57585f8587Sbellard { 58585f8587Sbellard const QCowHeader *cow_header = (const void *)buf; 59585f8587Sbellard 60585f8587Sbellard if (buf_size >= sizeof(QCowHeader) && 61585f8587Sbellard be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 62585f8587Sbellard be32_to_cpu(cow_header->version) == QCOW_VERSION) 63585f8587Sbellard return 100; 64585f8587Sbellard else 65585f8587Sbellard return 0; 66585f8587Sbellard } 67585f8587Sbellard 689b80ddf3Saliguori 699b80ddf3Saliguori /* 709b80ddf3Saliguori * read qcow2 extension and fill bs 719b80ddf3Saliguori * start reading from start_offset 729b80ddf3Saliguori * finish reading upon magic of value 0 or when end_offset reached 739b80ddf3Saliguori * unknown magic is skipped (future extension this version knows nothing about) 749b80ddf3Saliguori * return 0 upon success, non-0 otherwise 759b80ddf3Saliguori */ 767c80ab3fSJes Sorensen static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 779b80ddf3Saliguori uint64_t end_offset) 789b80ddf3Saliguori { 799b80ddf3Saliguori QCowExtension ext; 809b80ddf3Saliguori uint64_t offset; 819b80ddf3Saliguori 829b80ddf3Saliguori #ifdef DEBUG_EXT 837c80ab3fSJes Sorensen printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 849b80ddf3Saliguori #endif 859b80ddf3Saliguori offset = start_offset; 869b80ddf3Saliguori while (offset < end_offset) { 879b80ddf3Saliguori 889b80ddf3Saliguori #ifdef DEBUG_EXT 899b80ddf3Saliguori /* Sanity check */ 909b80ddf3Saliguori if (offset > s->cluster_size) 917c80ab3fSJes Sorensen printf("qcow2_read_extension: suspicious offset %lu\n", offset); 929b80ddf3Saliguori 939b80ddf3Saliguori printf("attemting to read extended header in offset %lu\n", offset); 949b80ddf3Saliguori #endif 959b80ddf3Saliguori 9666f82ceeSKevin Wolf if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) { 977c80ab3fSJes Sorensen fprintf(stderr, "qcow2_read_extension: ERROR: " 980bfcd599SBlue Swirl "pread fail from offset %" PRIu64 "\n", 990bfcd599SBlue Swirl offset); 1009b80ddf3Saliguori return 1; 1019b80ddf3Saliguori } 1029b80ddf3Saliguori be32_to_cpus(&ext.magic); 1039b80ddf3Saliguori be32_to_cpus(&ext.len); 1049b80ddf3Saliguori offset += sizeof(ext); 1059b80ddf3Saliguori #ifdef DEBUG_EXT 1069b80ddf3Saliguori printf("ext.magic = 0x%x\n", ext.magic); 1079b80ddf3Saliguori #endif 1089b80ddf3Saliguori switch (ext.magic) { 1097c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_END: 1109b80ddf3Saliguori return 0; 111f965509cSaliguori 1127c80ab3fSJes Sorensen case QCOW2_EXT_MAGIC_BACKING_FORMAT: 113f965509cSaliguori if (ext.len >= sizeof(bs->backing_format)) { 114f965509cSaliguori fprintf(stderr, "ERROR: ext_backing_format: len=%u too large" 1154c978075Saliguori " (>=%zu)\n", 116f965509cSaliguori ext.len, sizeof(bs->backing_format)); 117f965509cSaliguori return 2; 118f965509cSaliguori } 11966f82ceeSKevin Wolf if (bdrv_pread(bs->file, offset , bs->backing_format, 120f965509cSaliguori ext.len) != ext.len) 121f965509cSaliguori return 3; 122f965509cSaliguori bs->backing_format[ext.len] = '\0'; 123f965509cSaliguori #ifdef DEBUG_EXT 124f965509cSaliguori printf("Qcow2: Got format extension %s\n", bs->backing_format); 125f965509cSaliguori #endif 126e1c7f0e3SKevin Wolf offset = ((offset + ext.len + 7) & ~7); 127f965509cSaliguori break; 128f965509cSaliguori 1299b80ddf3Saliguori default: 1309b80ddf3Saliguori /* unknown magic -- just skip it */ 131e1c7f0e3SKevin Wolf offset = ((offset + ext.len + 7) & ~7); 1329b80ddf3Saliguori break; 1339b80ddf3Saliguori } 1349b80ddf3Saliguori } 1359b80ddf3Saliguori 1369b80ddf3Saliguori return 0; 1379b80ddf3Saliguori } 1389b80ddf3Saliguori 1399b80ddf3Saliguori 1407c80ab3fSJes Sorensen static int qcow2_open(BlockDriverState *bs, int flags) 141585f8587Sbellard { 142585f8587Sbellard BDRVQcowState *s = bs->opaque; 1436d85a57eSJes Sorensen int len, i, ret = 0; 144585f8587Sbellard QCowHeader header; 1459b80ddf3Saliguori uint64_t ext_end; 14629c1a730SKevin Wolf bool writethrough; 147585f8587Sbellard 1486d85a57eSJes Sorensen ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 1496d85a57eSJes Sorensen if (ret < 0) { 150585f8587Sbellard goto fail; 1516d85a57eSJes Sorensen } 152585f8587Sbellard be32_to_cpus(&header.magic); 153585f8587Sbellard be32_to_cpus(&header.version); 154585f8587Sbellard be64_to_cpus(&header.backing_file_offset); 155585f8587Sbellard be32_to_cpus(&header.backing_file_size); 156585f8587Sbellard be64_to_cpus(&header.size); 157585f8587Sbellard be32_to_cpus(&header.cluster_bits); 158585f8587Sbellard be32_to_cpus(&header.crypt_method); 159585f8587Sbellard be64_to_cpus(&header.l1_table_offset); 160585f8587Sbellard be32_to_cpus(&header.l1_size); 161585f8587Sbellard be64_to_cpus(&header.refcount_table_offset); 162585f8587Sbellard be32_to_cpus(&header.refcount_table_clusters); 163585f8587Sbellard be64_to_cpus(&header.snapshots_offset); 164585f8587Sbellard be32_to_cpus(&header.nb_snapshots); 165585f8587Sbellard 1666d85a57eSJes Sorensen if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) { 1676d85a57eSJes Sorensen ret = -EINVAL; 168585f8587Sbellard goto fail; 1696d85a57eSJes Sorensen } 170d191d12dSStefan Weil if (header.cluster_bits < MIN_CLUSTER_BITS || 1716d85a57eSJes Sorensen header.cluster_bits > MAX_CLUSTER_BITS) { 1726d85a57eSJes Sorensen ret = -EINVAL; 173585f8587Sbellard goto fail; 1746d85a57eSJes Sorensen } 1756d85a57eSJes Sorensen if (header.crypt_method > QCOW_CRYPT_AES) { 1766d85a57eSJes Sorensen ret = -EINVAL; 177585f8587Sbellard goto fail; 1786d85a57eSJes Sorensen } 179585f8587Sbellard s->crypt_method_header = header.crypt_method; 1806d85a57eSJes Sorensen if (s->crypt_method_header) { 181585f8587Sbellard bs->encrypted = 1; 1826d85a57eSJes Sorensen } 183585f8587Sbellard s->cluster_bits = header.cluster_bits; 184585f8587Sbellard s->cluster_size = 1 << s->cluster_bits; 185585f8587Sbellard s->cluster_sectors = 1 << (s->cluster_bits - 9); 186585f8587Sbellard s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 187585f8587Sbellard s->l2_size = 1 << s->l2_bits; 188585f8587Sbellard bs->total_sectors = header.size / 512; 189585f8587Sbellard s->csize_shift = (62 - (s->cluster_bits - 8)); 190585f8587Sbellard s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 191585f8587Sbellard s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 192585f8587Sbellard s->refcount_table_offset = header.refcount_table_offset; 193585f8587Sbellard s->refcount_table_size = 194585f8587Sbellard header.refcount_table_clusters << (s->cluster_bits - 3); 195585f8587Sbellard 196585f8587Sbellard s->snapshots_offset = header.snapshots_offset; 197585f8587Sbellard s->nb_snapshots = header.nb_snapshots; 198585f8587Sbellard 199585f8587Sbellard /* read the level 1 table */ 200585f8587Sbellard s->l1_size = header.l1_size; 201419b19d9SStefan Hajnoczi s->l1_vm_state_index = size_to_l1(s, header.size); 202585f8587Sbellard /* the L1 table must contain at least enough entries to put 203585f8587Sbellard header.size bytes */ 2046d85a57eSJes Sorensen if (s->l1_size < s->l1_vm_state_index) { 2056d85a57eSJes Sorensen ret = -EINVAL; 206585f8587Sbellard goto fail; 2076d85a57eSJes Sorensen } 208585f8587Sbellard s->l1_table_offset = header.l1_table_offset; 209d191d12dSStefan Weil if (s->l1_size > 0) { 2103f6a3ee5SKevin Wolf s->l1_table = qemu_mallocz( 2113f6a3ee5SKevin Wolf align_offset(s->l1_size * sizeof(uint64_t), 512)); 2126d85a57eSJes Sorensen ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 2136d85a57eSJes Sorensen s->l1_size * sizeof(uint64_t)); 2146d85a57eSJes Sorensen if (ret < 0) { 215585f8587Sbellard goto fail; 2166d85a57eSJes Sorensen } 217585f8587Sbellard for(i = 0;i < s->l1_size; i++) { 218585f8587Sbellard be64_to_cpus(&s->l1_table[i]); 219585f8587Sbellard } 220d191d12dSStefan Weil } 22129c1a730SKevin Wolf 22229c1a730SKevin Wolf /* alloc L2 table/refcount block cache */ 22329c1a730SKevin Wolf writethrough = ((flags & BDRV_O_CACHE_MASK) == 0); 22429c1a730SKevin Wolf s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, writethrough); 22529c1a730SKevin Wolf s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE, 22629c1a730SKevin Wolf writethrough); 22729c1a730SKevin Wolf 228585f8587Sbellard s->cluster_cache = qemu_malloc(s->cluster_size); 229585f8587Sbellard /* one more sector for decompressed data alignment */ 230095a9c58Saliguori s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 231095a9c58Saliguori + 512); 232585f8587Sbellard s->cluster_cache_offset = -1; 233585f8587Sbellard 2346d85a57eSJes Sorensen ret = qcow2_refcount_init(bs); 2356d85a57eSJes Sorensen if (ret != 0) { 236585f8587Sbellard goto fail; 2376d85a57eSJes Sorensen } 238585f8587Sbellard 23972cf2d4fSBlue Swirl QLIST_INIT(&s->cluster_allocs); 240f214978aSKevin Wolf 2419b80ddf3Saliguori /* read qcow2 extensions */ 2426d85a57eSJes Sorensen if (header.backing_file_offset) { 2439b80ddf3Saliguori ext_end = header.backing_file_offset; 2446d85a57eSJes Sorensen } else { 2459b80ddf3Saliguori ext_end = s->cluster_size; 2466d85a57eSJes Sorensen } 2476d85a57eSJes Sorensen if (qcow2_read_extensions(bs, sizeof(header), ext_end)) { 2486d85a57eSJes Sorensen ret = -EINVAL; 2499b80ddf3Saliguori goto fail; 2506d85a57eSJes Sorensen } 2519b80ddf3Saliguori 252585f8587Sbellard /* read the backing file name */ 253585f8587Sbellard if (header.backing_file_offset != 0) { 254585f8587Sbellard len = header.backing_file_size; 2556d85a57eSJes Sorensen if (len > 1023) { 256585f8587Sbellard len = 1023; 2576d85a57eSJes Sorensen } 2586d85a57eSJes Sorensen ret = bdrv_pread(bs->file, header.backing_file_offset, 2596d85a57eSJes Sorensen bs->backing_file, len); 2606d85a57eSJes Sorensen if (ret < 0) { 261585f8587Sbellard goto fail; 2626d85a57eSJes Sorensen } 263585f8587Sbellard bs->backing_file[len] = '\0'; 264585f8587Sbellard } 2656d85a57eSJes Sorensen if (qcow2_read_snapshots(bs) < 0) { 2666d85a57eSJes Sorensen ret = -EINVAL; 267585f8587Sbellard goto fail; 2686d85a57eSJes Sorensen } 269585f8587Sbellard 270585f8587Sbellard #ifdef DEBUG_ALLOC 27114899cdfSFilip Navara qcow2_check_refcounts(bs); 272585f8587Sbellard #endif 2736d85a57eSJes Sorensen return ret; 274585f8587Sbellard 275585f8587Sbellard fail: 276ed6ccf0fSKevin Wolf qcow2_free_snapshots(bs); 277ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 278585f8587Sbellard qemu_free(s->l1_table); 27929c1a730SKevin Wolf if (s->l2_table_cache) { 28029c1a730SKevin Wolf qcow2_cache_destroy(bs, s->l2_table_cache); 28129c1a730SKevin Wolf } 282585f8587Sbellard qemu_free(s->cluster_cache); 283585f8587Sbellard qemu_free(s->cluster_data); 2846d85a57eSJes Sorensen return ret; 285585f8587Sbellard } 286585f8587Sbellard 2877c80ab3fSJes Sorensen static int qcow2_set_key(BlockDriverState *bs, const char *key) 288585f8587Sbellard { 289585f8587Sbellard BDRVQcowState *s = bs->opaque; 290585f8587Sbellard uint8_t keybuf[16]; 291585f8587Sbellard int len, i; 292585f8587Sbellard 293585f8587Sbellard memset(keybuf, 0, 16); 294585f8587Sbellard len = strlen(key); 295585f8587Sbellard if (len > 16) 296585f8587Sbellard len = 16; 297585f8587Sbellard /* XXX: we could compress the chars to 7 bits to increase 298585f8587Sbellard entropy */ 299585f8587Sbellard for(i = 0;i < len;i++) { 300585f8587Sbellard keybuf[i] = key[i]; 301585f8587Sbellard } 302585f8587Sbellard s->crypt_method = s->crypt_method_header; 303585f8587Sbellard 304585f8587Sbellard if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 305585f8587Sbellard return -1; 306585f8587Sbellard if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 307585f8587Sbellard return -1; 308585f8587Sbellard #if 0 309585f8587Sbellard /* test */ 310585f8587Sbellard { 311585f8587Sbellard uint8_t in[16]; 312585f8587Sbellard uint8_t out[16]; 313585f8587Sbellard uint8_t tmp[16]; 314585f8587Sbellard for(i=0;i<16;i++) 315585f8587Sbellard in[i] = i; 316585f8587Sbellard AES_encrypt(in, tmp, &s->aes_encrypt_key); 317585f8587Sbellard AES_decrypt(tmp, out, &s->aes_decrypt_key); 318585f8587Sbellard for(i = 0; i < 16; i++) 319585f8587Sbellard printf(" %02x", tmp[i]); 320585f8587Sbellard printf("\n"); 321585f8587Sbellard for(i = 0; i < 16; i++) 322585f8587Sbellard printf(" %02x", out[i]); 323585f8587Sbellard printf("\n"); 324585f8587Sbellard } 325585f8587Sbellard #endif 326585f8587Sbellard return 0; 327585f8587Sbellard } 328585f8587Sbellard 3297c80ab3fSJes Sorensen static int qcow2_is_allocated(BlockDriverState *bs, int64_t sector_num, 330585f8587Sbellard int nb_sectors, int *pnum) 331585f8587Sbellard { 332585f8587Sbellard uint64_t cluster_offset; 3331c46efaaSKevin Wolf int ret; 334585f8587Sbellard 335095a9c58Saliguori *pnum = nb_sectors; 3361c46efaaSKevin Wolf /* FIXME We can get errors here, but the bdrv_is_allocated interface can't 3371c46efaaSKevin Wolf * pass them on today */ 3381c46efaaSKevin Wolf ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); 3391c46efaaSKevin Wolf if (ret < 0) { 3401c46efaaSKevin Wolf *pnum = 0; 3411c46efaaSKevin Wolf } 342095a9c58Saliguori 343585f8587Sbellard return (cluster_offset != 0); 344585f8587Sbellard } 345585f8587Sbellard 346a9465922Sbellard /* handle reading after the end of the backing file */ 347bd28f835SKevin Wolf int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, 348bd28f835SKevin Wolf int64_t sector_num, int nb_sectors) 349a9465922Sbellard { 350a9465922Sbellard int n1; 351a9465922Sbellard if ((sector_num + nb_sectors) <= bs->total_sectors) 352a9465922Sbellard return nb_sectors; 353a9465922Sbellard if (sector_num >= bs->total_sectors) 354a9465922Sbellard n1 = 0; 355a9465922Sbellard else 356a9465922Sbellard n1 = bs->total_sectors - sector_num; 357bd28f835SKevin Wolf 358bd28f835SKevin Wolf qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1)); 359bd28f835SKevin Wolf 360a9465922Sbellard return n1; 361a9465922Sbellard } 362a9465922Sbellard 363ce1a14dcSpbrook typedef struct QCowAIOCB { 364ce1a14dcSpbrook BlockDriverAIOCB common; 365585f8587Sbellard int64_t sector_num; 366f141eafeSaliguori QEMUIOVector *qiov; 3677b88e48bSChristoph Hellwig int remaining_sectors; 3687b88e48bSChristoph Hellwig int cur_nr_sectors; /* number of sectors in current iteration */ 369bd28f835SKevin Wolf uint64_t bytes_done; 370585f8587Sbellard uint64_t cluster_offset; 371585f8587Sbellard uint8_t *cluster_data; 372585f8587Sbellard BlockDriverAIOCB *hd_aiocb; 373c87c0672Saliguori QEMUIOVector hd_qiov; 3741490791fSaliguori QEMUBH *bh; 375e976c6a1Saliguori QCowL2Meta l2meta; 37672cf2d4fSBlue Swirl QLIST_ENTRY(QCowAIOCB) next_depend; 377585f8587Sbellard } QCowAIOCB; 378585f8587Sbellard 3797c80ab3fSJes Sorensen static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb) 380c16b5a2cSChristoph Hellwig { 381b666d239SKevin Wolf QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common); 382c16b5a2cSChristoph Hellwig if (acb->hd_aiocb) 383c16b5a2cSChristoph Hellwig bdrv_aio_cancel(acb->hd_aiocb); 384c16b5a2cSChristoph Hellwig qemu_aio_release(acb); 385c16b5a2cSChristoph Hellwig } 386c16b5a2cSChristoph Hellwig 3877c80ab3fSJes Sorensen static AIOPool qcow2_aio_pool = { 388c16b5a2cSChristoph Hellwig .aiocb_size = sizeof(QCowAIOCB), 3897c80ab3fSJes Sorensen .cancel = qcow2_aio_cancel, 390c16b5a2cSChristoph Hellwig }; 391c16b5a2cSChristoph Hellwig 3927c80ab3fSJes Sorensen static void qcow2_aio_read_cb(void *opaque, int ret); 3937c80ab3fSJes Sorensen static void qcow2_aio_read_bh(void *opaque) 3941490791fSaliguori { 3951490791fSaliguori QCowAIOCB *acb = opaque; 3961490791fSaliguori qemu_bh_delete(acb->bh); 3971490791fSaliguori acb->bh = NULL; 3987c80ab3fSJes Sorensen qcow2_aio_read_cb(opaque, 0); 3991490791fSaliguori } 4001490791fSaliguori 4017c80ab3fSJes Sorensen static int qcow2_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) 402a32ef786Saliguori { 403a32ef786Saliguori if (acb->bh) 404a32ef786Saliguori return -EIO; 405a32ef786Saliguori 406a32ef786Saliguori acb->bh = qemu_bh_new(cb, acb); 407a32ef786Saliguori if (!acb->bh) 408a32ef786Saliguori return -EIO; 409a32ef786Saliguori 410a32ef786Saliguori qemu_bh_schedule(acb->bh); 411a32ef786Saliguori 412a32ef786Saliguori return 0; 413a32ef786Saliguori } 414a32ef786Saliguori 4157c80ab3fSJes Sorensen static void qcow2_aio_read_cb(void *opaque, int ret) 416585f8587Sbellard { 417ce1a14dcSpbrook QCowAIOCB *acb = opaque; 418ce1a14dcSpbrook BlockDriverState *bs = acb->common.bs; 419585f8587Sbellard BDRVQcowState *s = bs->opaque; 420a9465922Sbellard int index_in_cluster, n1; 421585f8587Sbellard 422ce1a14dcSpbrook acb->hd_aiocb = NULL; 423f141eafeSaliguori if (ret < 0) 424f141eafeSaliguori goto done; 425585f8587Sbellard 426585f8587Sbellard /* post process the read buffer */ 427ce1a14dcSpbrook if (!acb->cluster_offset) { 428585f8587Sbellard /* nothing to do */ 429ce1a14dcSpbrook } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 430585f8587Sbellard /* nothing to do */ 431585f8587Sbellard } else { 432585f8587Sbellard if (s->crypt_method) { 433bd28f835SKevin Wolf qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, 434bd28f835SKevin Wolf acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key); 435bd28f835SKevin Wolf qemu_iovec_reset(&acb->hd_qiov); 436bd28f835SKevin Wolf qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, 437bd28f835SKevin Wolf acb->cur_nr_sectors * 512); 438bd28f835SKevin Wolf qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data, 439bd28f835SKevin Wolf 512 * acb->cur_nr_sectors); 440585f8587Sbellard } 441585f8587Sbellard } 442585f8587Sbellard 4437b88e48bSChristoph Hellwig acb->remaining_sectors -= acb->cur_nr_sectors; 4447b88e48bSChristoph Hellwig acb->sector_num += acb->cur_nr_sectors; 445bd28f835SKevin Wolf acb->bytes_done += acb->cur_nr_sectors * 512; 446585f8587Sbellard 4477b88e48bSChristoph Hellwig if (acb->remaining_sectors == 0) { 448585f8587Sbellard /* request completed */ 449f141eafeSaliguori ret = 0; 450f141eafeSaliguori goto done; 451585f8587Sbellard } 452585f8587Sbellard 453585f8587Sbellard /* prepare next AIO request */ 4547b88e48bSChristoph Hellwig acb->cur_nr_sectors = acb->remaining_sectors; 455bd28f835SKevin Wolf if (s->crypt_method) { 456bd28f835SKevin Wolf acb->cur_nr_sectors = MIN(acb->cur_nr_sectors, 457bd28f835SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 458bd28f835SKevin Wolf } 459bd28f835SKevin Wolf 4601c46efaaSKevin Wolf ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, 4611c46efaaSKevin Wolf &acb->cur_nr_sectors, &acb->cluster_offset); 4621c46efaaSKevin Wolf if (ret < 0) { 4631c46efaaSKevin Wolf goto done; 4641c46efaaSKevin Wolf } 4651c46efaaSKevin Wolf 466095a9c58Saliguori index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 467585f8587Sbellard 468bd28f835SKevin Wolf qemu_iovec_reset(&acb->hd_qiov); 469bd28f835SKevin Wolf qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, 470bd28f835SKevin Wolf acb->cur_nr_sectors * 512); 471bd28f835SKevin Wolf 472ce1a14dcSpbrook if (!acb->cluster_offset) { 473bd28f835SKevin Wolf 474585f8587Sbellard if (bs->backing_hd) { 475585f8587Sbellard /* read from the base image */ 476bd28f835SKevin Wolf n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov, 477bd28f835SKevin Wolf acb->sector_num, acb->cur_nr_sectors); 478a9465922Sbellard if (n1 > 0) { 47966f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 480c87c0672Saliguori acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, 4817b88e48bSChristoph Hellwig &acb->hd_qiov, acb->cur_nr_sectors, 4827c80ab3fSJes Sorensen qcow2_aio_read_cb, acb); 483ce1a14dcSpbrook if (acb->hd_aiocb == NULL) 484f141eafeSaliguori goto done; 485585f8587Sbellard } else { 4867c80ab3fSJes Sorensen ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); 487a32ef786Saliguori if (ret < 0) 488f141eafeSaliguori goto done; 4891490791fSaliguori } 490a9465922Sbellard } else { 491585f8587Sbellard /* Note: in this case, no need to wait */ 492bd28f835SKevin Wolf qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); 4937c80ab3fSJes Sorensen ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); 494a32ef786Saliguori if (ret < 0) 495f141eafeSaliguori goto done; 4961490791fSaliguori } 497ce1a14dcSpbrook } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 498585f8587Sbellard /* add AIO support for compressed blocks ? */ 49966f82ceeSKevin Wolf if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0) 500f141eafeSaliguori goto done; 501bd28f835SKevin Wolf 502bd28f835SKevin Wolf qemu_iovec_from_buffer(&acb->hd_qiov, 503bd28f835SKevin Wolf s->cluster_cache + index_in_cluster * 512, 5047b88e48bSChristoph Hellwig 512 * acb->cur_nr_sectors); 505bd28f835SKevin Wolf 5067c80ab3fSJes Sorensen ret = qcow2_schedule_bh(qcow2_aio_read_bh, acb); 507a32ef786Saliguori if (ret < 0) 508f141eafeSaliguori goto done; 509585f8587Sbellard } else { 510ce1a14dcSpbrook if ((acb->cluster_offset & 511) != 0) { 511585f8587Sbellard ret = -EIO; 512f141eafeSaliguori goto done; 513585f8587Sbellard } 514c87c0672Saliguori 515bd28f835SKevin Wolf if (s->crypt_method) { 516bd28f835SKevin Wolf /* 517bd28f835SKevin Wolf * For encrypted images, read everything into a temporary 518bd28f835SKevin Wolf * contiguous buffer on which the AES functions can work. 519bd28f835SKevin Wolf */ 520bd28f835SKevin Wolf if (!acb->cluster_data) { 521bd28f835SKevin Wolf acb->cluster_data = 522bd28f835SKevin Wolf qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 523bd28f835SKevin Wolf } 524bd28f835SKevin Wolf 525bd28f835SKevin Wolf assert(acb->cur_nr_sectors <= 526bd28f835SKevin Wolf QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 527bd28f835SKevin Wolf qemu_iovec_reset(&acb->hd_qiov); 528bd28f835SKevin Wolf qemu_iovec_add(&acb->hd_qiov, acb->cluster_data, 529bd28f835SKevin Wolf 512 * acb->cur_nr_sectors); 530bd28f835SKevin Wolf } 531bd28f835SKevin Wolf 53266f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 53366f82ceeSKevin Wolf acb->hd_aiocb = bdrv_aio_readv(bs->file, 534ce1a14dcSpbrook (acb->cluster_offset >> 9) + index_in_cluster, 5357b88e48bSChristoph Hellwig &acb->hd_qiov, acb->cur_nr_sectors, 5367c80ab3fSJes Sorensen qcow2_aio_read_cb, acb); 537171e3d6bSKevin Wolf if (acb->hd_aiocb == NULL) { 538171e3d6bSKevin Wolf ret = -EIO; 539f141eafeSaliguori goto done; 540585f8587Sbellard } 541171e3d6bSKevin Wolf } 542f141eafeSaliguori 543f141eafeSaliguori return; 544f141eafeSaliguori done: 545f141eafeSaliguori acb->common.cb(acb->common.opaque, ret); 546bd28f835SKevin Wolf qemu_iovec_destroy(&acb->hd_qiov); 547f141eafeSaliguori qemu_aio_release(acb); 548585f8587Sbellard } 549585f8587Sbellard 5507c80ab3fSJes Sorensen static QCowAIOCB *qcow2_aio_setup(BlockDriverState *bs, int64_t sector_num, 5517c80ab3fSJes Sorensen QEMUIOVector *qiov, int nb_sectors, 5527c80ab3fSJes Sorensen BlockDriverCompletionFunc *cb, 5537c80ab3fSJes Sorensen void *opaque, int is_write) 554585f8587Sbellard { 555ce1a14dcSpbrook QCowAIOCB *acb; 556585f8587Sbellard 5577c80ab3fSJes Sorensen acb = qemu_aio_get(&qcow2_aio_pool, bs, cb, opaque); 558ce1a14dcSpbrook if (!acb) 559ce1a14dcSpbrook return NULL; 560ce1a14dcSpbrook acb->hd_aiocb = NULL; 561ce1a14dcSpbrook acb->sector_num = sector_num; 562f141eafeSaliguori acb->qiov = qiov; 563bd28f835SKevin Wolf 564bd28f835SKevin Wolf qemu_iovec_init(&acb->hd_qiov, qiov->niov); 565bd28f835SKevin Wolf 566bd28f835SKevin Wolf acb->bytes_done = 0; 5677b88e48bSChristoph Hellwig acb->remaining_sectors = nb_sectors; 5687b88e48bSChristoph Hellwig acb->cur_nr_sectors = 0; 569ce1a14dcSpbrook acb->cluster_offset = 0; 570e976c6a1Saliguori acb->l2meta.nb_clusters = 0; 57172cf2d4fSBlue Swirl QLIST_INIT(&acb->l2meta.dependent_requests); 572ce1a14dcSpbrook return acb; 573ce1a14dcSpbrook } 574ce1a14dcSpbrook 5757c80ab3fSJes Sorensen static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs, 5767c80ab3fSJes Sorensen int64_t sector_num, 5777c80ab3fSJes Sorensen QEMUIOVector *qiov, int nb_sectors, 5787c80ab3fSJes Sorensen BlockDriverCompletionFunc *cb, 5797c80ab3fSJes Sorensen void *opaque) 580ce1a14dcSpbrook { 581ce1a14dcSpbrook QCowAIOCB *acb; 582ce1a14dcSpbrook 5837c80ab3fSJes Sorensen acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 584ce1a14dcSpbrook if (!acb) 585ce1a14dcSpbrook return NULL; 586585f8587Sbellard 5877c80ab3fSJes Sorensen qcow2_aio_read_cb(acb, 0); 588ce1a14dcSpbrook return &acb->common; 589585f8587Sbellard } 590585f8587Sbellard 5917c80ab3fSJes Sorensen static void qcow2_aio_write_cb(void *opaque, int ret); 592f214978aSKevin Wolf 593f214978aSKevin Wolf static void run_dependent_requests(QCowL2Meta *m) 594f214978aSKevin Wolf { 595f214978aSKevin Wolf QCowAIOCB *req; 596f214978aSKevin Wolf QCowAIOCB *next; 597f214978aSKevin Wolf 598f214978aSKevin Wolf /* Take the request off the list of running requests */ 599f214978aSKevin Wolf if (m->nb_clusters != 0) { 60072cf2d4fSBlue Swirl QLIST_REMOVE(m, next_in_flight); 601f214978aSKevin Wolf } 602f214978aSKevin Wolf 603d4c146f0SStefan Hajnoczi /* Restart all dependent requests */ 604d4c146f0SStefan Hajnoczi QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) { 6057c80ab3fSJes Sorensen qcow2_aio_write_cb(req, 0); 606f214978aSKevin Wolf } 607f214978aSKevin Wolf 608f214978aSKevin Wolf /* Empty the list for the next part of the request */ 60972cf2d4fSBlue Swirl QLIST_INIT(&m->dependent_requests); 610f214978aSKevin Wolf } 611f214978aSKevin Wolf 6127c80ab3fSJes Sorensen static void qcow2_aio_write_cb(void *opaque, int ret) 613585f8587Sbellard { 614ce1a14dcSpbrook QCowAIOCB *acb = opaque; 615ce1a14dcSpbrook BlockDriverState *bs = acb->common.bs; 616585f8587Sbellard BDRVQcowState *s = bs->opaque; 617585f8587Sbellard int index_in_cluster; 618095a9c58Saliguori int n_end; 619585f8587Sbellard 620ce1a14dcSpbrook acb->hd_aiocb = NULL; 621ce1a14dcSpbrook 622f214978aSKevin Wolf if (ret >= 0) { 623148da7eaSKevin Wolf ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta); 624f214978aSKevin Wolf } 625f214978aSKevin Wolf 626f214978aSKevin Wolf run_dependent_requests(&acb->l2meta); 627f214978aSKevin Wolf 628f141eafeSaliguori if (ret < 0) 629f141eafeSaliguori goto done; 630585f8587Sbellard 6317b88e48bSChristoph Hellwig acb->remaining_sectors -= acb->cur_nr_sectors; 6327b88e48bSChristoph Hellwig acb->sector_num += acb->cur_nr_sectors; 6336f5f060bSKevin Wolf acb->bytes_done += acb->cur_nr_sectors * 512; 634585f8587Sbellard 6357b88e48bSChristoph Hellwig if (acb->remaining_sectors == 0) { 636585f8587Sbellard /* request completed */ 637f141eafeSaliguori ret = 0; 638f141eafeSaliguori goto done; 639585f8587Sbellard } 640585f8587Sbellard 641ce1a14dcSpbrook index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 6427b88e48bSChristoph Hellwig n_end = index_in_cluster + acb->remaining_sectors; 643095a9c58Saliguori if (s->crypt_method && 644095a9c58Saliguori n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) 645095a9c58Saliguori n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; 646095a9c58Saliguori 647148da7eaSKevin Wolf ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9, 6487b88e48bSChristoph Hellwig index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta); 649148da7eaSKevin Wolf if (ret < 0) { 650148da7eaSKevin Wolf goto done; 651148da7eaSKevin Wolf } 652148da7eaSKevin Wolf 653148da7eaSKevin Wolf acb->cluster_offset = acb->l2meta.cluster_offset; 654f214978aSKevin Wolf 655f214978aSKevin Wolf /* Need to wait for another request? If so, we are done for now. */ 656148da7eaSKevin Wolf if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) { 65772cf2d4fSBlue Swirl QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests, 658f214978aSKevin Wolf acb, next_depend); 659f214978aSKevin Wolf return; 660f214978aSKevin Wolf } 661f214978aSKevin Wolf 662148da7eaSKevin Wolf assert((acb->cluster_offset & 511) == 0); 663148da7eaSKevin Wolf 6646f5f060bSKevin Wolf qemu_iovec_reset(&acb->hd_qiov); 6656f5f060bSKevin Wolf qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, 6666f5f060bSKevin Wolf acb->cur_nr_sectors * 512); 6676f5f060bSKevin Wolf 668585f8587Sbellard if (s->crypt_method) { 669ce1a14dcSpbrook if (!acb->cluster_data) { 670095a9c58Saliguori acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * 671095a9c58Saliguori s->cluster_size); 672585f8587Sbellard } 6736f5f060bSKevin Wolf 6746f5f060bSKevin Wolf assert(acb->hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 6756f5f060bSKevin Wolf qemu_iovec_to_buffer(&acb->hd_qiov, acb->cluster_data); 6766f5f060bSKevin Wolf 6776f5f060bSKevin Wolf qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, 6786f5f060bSKevin Wolf acb->cluster_data, acb->cur_nr_sectors, 1, &s->aes_encrypt_key); 6796f5f060bSKevin Wolf 6806f5f060bSKevin Wolf qemu_iovec_reset(&acb->hd_qiov); 6816f5f060bSKevin Wolf qemu_iovec_add(&acb->hd_qiov, acb->cluster_data, 6826f5f060bSKevin Wolf acb->cur_nr_sectors * 512); 683585f8587Sbellard } 6846f5f060bSKevin Wolf 68566f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 68666f82ceeSKevin Wolf acb->hd_aiocb = bdrv_aio_writev(bs->file, 687e976c6a1Saliguori (acb->cluster_offset >> 9) + index_in_cluster, 6887b88e48bSChristoph Hellwig &acb->hd_qiov, acb->cur_nr_sectors, 6897c80ab3fSJes Sorensen qcow2_aio_write_cb, acb); 690171e3d6bSKevin Wolf if (acb->hd_aiocb == NULL) { 691171e3d6bSKevin Wolf ret = -EIO; 692c644db3dSKevin Wolf goto fail; 693171e3d6bSKevin Wolf } 694f141eafeSaliguori 695f141eafeSaliguori return; 696f141eafeSaliguori 697c644db3dSKevin Wolf fail: 698c644db3dSKevin Wolf if (acb->l2meta.nb_clusters != 0) { 699c644db3dSKevin Wolf QLIST_REMOVE(&acb->l2meta, next_in_flight); 700c644db3dSKevin Wolf } 701f141eafeSaliguori done: 702f141eafeSaliguori acb->common.cb(acb->common.opaque, ret); 7036f5f060bSKevin Wolf qemu_iovec_destroy(&acb->hd_qiov); 704f141eafeSaliguori qemu_aio_release(acb); 705585f8587Sbellard } 706585f8587Sbellard 7077c80ab3fSJes Sorensen static BlockDriverAIOCB *qcow2_aio_writev(BlockDriverState *bs, 7087c80ab3fSJes Sorensen int64_t sector_num, 7097c80ab3fSJes Sorensen QEMUIOVector *qiov, int nb_sectors, 7107c80ab3fSJes Sorensen BlockDriverCompletionFunc *cb, 7117c80ab3fSJes Sorensen void *opaque) 712585f8587Sbellard { 713585f8587Sbellard BDRVQcowState *s = bs->opaque; 714ce1a14dcSpbrook QCowAIOCB *acb; 715585f8587Sbellard 716585f8587Sbellard s->cluster_cache_offset = -1; /* disable compressed cache */ 717585f8587Sbellard 7187c80ab3fSJes Sorensen acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 719ce1a14dcSpbrook if (!acb) 720ce1a14dcSpbrook return NULL; 721585f8587Sbellard 7227c80ab3fSJes Sorensen qcow2_aio_write_cb(acb, 0); 723ce1a14dcSpbrook return &acb->common; 724585f8587Sbellard } 725585f8587Sbellard 7267c80ab3fSJes Sorensen static void qcow2_close(BlockDriverState *bs) 727585f8587Sbellard { 728585f8587Sbellard BDRVQcowState *s = bs->opaque; 729585f8587Sbellard qemu_free(s->l1_table); 73029c1a730SKevin Wolf 73129c1a730SKevin Wolf qcow2_cache_flush(bs, s->l2_table_cache); 73229c1a730SKevin Wolf qcow2_cache_flush(bs, s->refcount_block_cache); 73329c1a730SKevin Wolf 73429c1a730SKevin Wolf qcow2_cache_destroy(bs, s->l2_table_cache); 73529c1a730SKevin Wolf qcow2_cache_destroy(bs, s->refcount_block_cache); 73629c1a730SKevin Wolf 737585f8587Sbellard qemu_free(s->cluster_cache); 738585f8587Sbellard qemu_free(s->cluster_data); 739ed6ccf0fSKevin Wolf qcow2_refcount_close(bs); 740585f8587Sbellard } 741585f8587Sbellard 742756e6736SKevin Wolf /* 743756e6736SKevin Wolf * Updates the variable length parts of the qcow2 header, i.e. the backing file 744756e6736SKevin Wolf * name and all extensions. qcow2 was not designed to allow such changes, so if 745756e6736SKevin Wolf * we run out of space (we can only use the first cluster) this function may 746756e6736SKevin Wolf * fail. 747756e6736SKevin Wolf * 748756e6736SKevin Wolf * Returns 0 on success, -errno in error cases. 749756e6736SKevin Wolf */ 750756e6736SKevin Wolf static int qcow2_update_ext_header(BlockDriverState *bs, 751756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 752756e6736SKevin Wolf { 753756e6736SKevin Wolf size_t backing_file_len = 0; 754756e6736SKevin Wolf size_t backing_fmt_len = 0; 755756e6736SKevin Wolf BDRVQcowState *s = bs->opaque; 756756e6736SKevin Wolf QCowExtension ext_backing_fmt = {0, 0}; 757756e6736SKevin Wolf int ret; 758756e6736SKevin Wolf 759756e6736SKevin Wolf /* Backing file format doesn't make sense without a backing file */ 760756e6736SKevin Wolf if (backing_fmt && !backing_file) { 761756e6736SKevin Wolf return -EINVAL; 762756e6736SKevin Wolf } 763756e6736SKevin Wolf 764756e6736SKevin Wolf /* Prepare the backing file format extension if needed */ 765756e6736SKevin Wolf if (backing_fmt) { 766756e6736SKevin Wolf ext_backing_fmt.len = cpu_to_be32(strlen(backing_fmt)); 7677c80ab3fSJes Sorensen ext_backing_fmt.magic = cpu_to_be32(QCOW2_EXT_MAGIC_BACKING_FORMAT); 768756e6736SKevin Wolf backing_fmt_len = ((sizeof(ext_backing_fmt) 769756e6736SKevin Wolf + strlen(backing_fmt) + 7) & ~7); 770756e6736SKevin Wolf } 771756e6736SKevin Wolf 772756e6736SKevin Wolf /* Check if we can fit the new header into the first cluster */ 773756e6736SKevin Wolf if (backing_file) { 774756e6736SKevin Wolf backing_file_len = strlen(backing_file); 775756e6736SKevin Wolf } 776756e6736SKevin Wolf 777756e6736SKevin Wolf size_t header_size = sizeof(QCowHeader) + backing_file_len 778756e6736SKevin Wolf + backing_fmt_len; 779756e6736SKevin Wolf 780756e6736SKevin Wolf if (header_size > s->cluster_size) { 781756e6736SKevin Wolf return -ENOSPC; 782756e6736SKevin Wolf } 783756e6736SKevin Wolf 784756e6736SKevin Wolf /* Rewrite backing file name and qcow2 extensions */ 785756e6736SKevin Wolf size_t ext_size = header_size - sizeof(QCowHeader); 786756e6736SKevin Wolf uint8_t buf[ext_size]; 787756e6736SKevin Wolf size_t offset = 0; 788756e6736SKevin Wolf size_t backing_file_offset = 0; 789756e6736SKevin Wolf 790756e6736SKevin Wolf if (backing_file) { 791756e6736SKevin Wolf if (backing_fmt) { 792756e6736SKevin Wolf int padding = backing_fmt_len - 793756e6736SKevin Wolf (sizeof(ext_backing_fmt) + strlen(backing_fmt)); 794756e6736SKevin Wolf 795756e6736SKevin Wolf memcpy(buf + offset, &ext_backing_fmt, sizeof(ext_backing_fmt)); 796756e6736SKevin Wolf offset += sizeof(ext_backing_fmt); 797756e6736SKevin Wolf 798756e6736SKevin Wolf memcpy(buf + offset, backing_fmt, strlen(backing_fmt)); 799756e6736SKevin Wolf offset += strlen(backing_fmt); 800756e6736SKevin Wolf 801756e6736SKevin Wolf memset(buf + offset, 0, padding); 802756e6736SKevin Wolf offset += padding; 803756e6736SKevin Wolf } 804756e6736SKevin Wolf 805756e6736SKevin Wolf memcpy(buf + offset, backing_file, backing_file_len); 806756e6736SKevin Wolf backing_file_offset = sizeof(QCowHeader) + offset; 807756e6736SKevin Wolf } 808756e6736SKevin Wolf 8098b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, sizeof(QCowHeader), buf, ext_size); 810756e6736SKevin Wolf if (ret < 0) { 811756e6736SKevin Wolf goto fail; 812756e6736SKevin Wolf } 813756e6736SKevin Wolf 814756e6736SKevin Wolf /* Update header fields */ 815756e6736SKevin Wolf uint64_t be_backing_file_offset = cpu_to_be64(backing_file_offset); 816756e6736SKevin Wolf uint32_t be_backing_file_size = cpu_to_be32(backing_file_len); 817756e6736SKevin Wolf 8188b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_offset), 819756e6736SKevin Wolf &be_backing_file_offset, sizeof(uint64_t)); 820756e6736SKevin Wolf if (ret < 0) { 821756e6736SKevin Wolf goto fail; 822756e6736SKevin Wolf } 823756e6736SKevin Wolf 8248b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_size), 825756e6736SKevin Wolf &be_backing_file_size, sizeof(uint32_t)); 826756e6736SKevin Wolf if (ret < 0) { 827756e6736SKevin Wolf goto fail; 828756e6736SKevin Wolf } 829756e6736SKevin Wolf 830756e6736SKevin Wolf ret = 0; 831756e6736SKevin Wolf fail: 832756e6736SKevin Wolf return ret; 833756e6736SKevin Wolf } 834756e6736SKevin Wolf 835756e6736SKevin Wolf static int qcow2_change_backing_file(BlockDriverState *bs, 836756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 837756e6736SKevin Wolf { 838756e6736SKevin Wolf return qcow2_update_ext_header(bs, backing_file, backing_fmt); 839756e6736SKevin Wolf } 840756e6736SKevin Wolf 841a35e1c17SKevin Wolf static int preallocate(BlockDriverState *bs) 842a35e1c17SKevin Wolf { 843a35e1c17SKevin Wolf uint64_t nb_sectors; 844a35e1c17SKevin Wolf uint64_t offset; 845a35e1c17SKevin Wolf int num; 846148da7eaSKevin Wolf int ret; 847a35e1c17SKevin Wolf QCowL2Meta meta; 848a35e1c17SKevin Wolf 849a35e1c17SKevin Wolf nb_sectors = bdrv_getlength(bs) >> 9; 850a35e1c17SKevin Wolf offset = 0; 85172cf2d4fSBlue Swirl QLIST_INIT(&meta.dependent_requests); 852148da7eaSKevin Wolf meta.cluster_offset = 0; 853a35e1c17SKevin Wolf 854a35e1c17SKevin Wolf while (nb_sectors) { 855a35e1c17SKevin Wolf num = MIN(nb_sectors, INT_MAX >> 9); 856148da7eaSKevin Wolf ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); 857148da7eaSKevin Wolf if (ret < 0) { 85819dbcbf7SKevin Wolf return ret; 859a35e1c17SKevin Wolf } 860a35e1c17SKevin Wolf 86119dbcbf7SKevin Wolf ret = qcow2_alloc_cluster_link_l2(bs, &meta); 86219dbcbf7SKevin Wolf if (ret < 0) { 863148da7eaSKevin Wolf qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); 86419dbcbf7SKevin Wolf return ret; 865a35e1c17SKevin Wolf } 866a35e1c17SKevin Wolf 867f214978aSKevin Wolf /* There are no dependent requests, but we need to remove our request 868f214978aSKevin Wolf * from the list of in-flight requests */ 869f214978aSKevin Wolf run_dependent_requests(&meta); 870f214978aSKevin Wolf 871a35e1c17SKevin Wolf /* TODO Preallocate data if requested */ 872a35e1c17SKevin Wolf 873a35e1c17SKevin Wolf nb_sectors -= num; 874a35e1c17SKevin Wolf offset += num << 9; 875a35e1c17SKevin Wolf } 876a35e1c17SKevin Wolf 877a35e1c17SKevin Wolf /* 878a35e1c17SKevin Wolf * It is expected that the image file is large enough to actually contain 879a35e1c17SKevin Wolf * all of the allocated clusters (otherwise we get failing reads after 880a35e1c17SKevin Wolf * EOF). Extend the image to the last allocated sector. 881a35e1c17SKevin Wolf */ 882148da7eaSKevin Wolf if (meta.cluster_offset != 0) { 883ea80b906SKevin Wolf uint8_t buf[512]; 884ea80b906SKevin Wolf memset(buf, 0, 512); 88519dbcbf7SKevin Wolf ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1); 88619dbcbf7SKevin Wolf if (ret < 0) { 88719dbcbf7SKevin Wolf return ret; 88819dbcbf7SKevin Wolf } 889a35e1c17SKevin Wolf } 890a35e1c17SKevin Wolf 891a35e1c17SKevin Wolf return 0; 892a35e1c17SKevin Wolf } 893a35e1c17SKevin Wolf 8947c80ab3fSJes Sorensen static int qcow2_create2(const char *filename, int64_t total_size, 895a9420734SKevin Wolf const char *backing_file, const char *backing_format, 896a9420734SKevin Wolf int flags, size_t cluster_size, int prealloc, 897a9420734SKevin Wolf QEMUOptionParameter *options) 898a9420734SKevin Wolf { 899a9420734SKevin Wolf /* Calulate cluster_bits */ 900a9420734SKevin Wolf int cluster_bits; 901a9420734SKevin Wolf cluster_bits = ffs(cluster_size) - 1; 902a9420734SKevin Wolf if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 903a9420734SKevin Wolf (1 << cluster_bits) != cluster_size) 904a9420734SKevin Wolf { 905a9420734SKevin Wolf error_report( 906a9420734SKevin Wolf "Cluster size must be a power of two between %d and %dk\n", 907a9420734SKevin Wolf 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 908a9420734SKevin Wolf return -EINVAL; 909a9420734SKevin Wolf } 910a9420734SKevin Wolf 911a9420734SKevin Wolf /* 912a9420734SKevin Wolf * Open the image file and write a minimal qcow2 header. 913a9420734SKevin Wolf * 914a9420734SKevin Wolf * We keep things simple and start with a zero-sized image. We also 915a9420734SKevin Wolf * do without refcount blocks or a L1 table for now. We'll fix the 916a9420734SKevin Wolf * inconsistency later. 917a9420734SKevin Wolf * 918a9420734SKevin Wolf * We do need a refcount table because growing the refcount table means 919a9420734SKevin Wolf * allocating two new refcount blocks - the seconds of which would be at 920a9420734SKevin Wolf * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 921a9420734SKevin Wolf * size for any qcow2 image. 922a9420734SKevin Wolf */ 923a9420734SKevin Wolf BlockDriverState* bs; 924a9420734SKevin Wolf QCowHeader header; 925a9420734SKevin Wolf uint8_t* refcount_table; 926a9420734SKevin Wolf int ret; 927a9420734SKevin Wolf 928a9420734SKevin Wolf ret = bdrv_create_file(filename, options); 929a9420734SKevin Wolf if (ret < 0) { 930a9420734SKevin Wolf return ret; 931a9420734SKevin Wolf } 932a9420734SKevin Wolf 933a9420734SKevin Wolf ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR); 934a9420734SKevin Wolf if (ret < 0) { 935a9420734SKevin Wolf return ret; 936a9420734SKevin Wolf } 937a9420734SKevin Wolf 938a9420734SKevin Wolf /* Write the header */ 939a9420734SKevin Wolf memset(&header, 0, sizeof(header)); 940a9420734SKevin Wolf header.magic = cpu_to_be32(QCOW_MAGIC); 941a9420734SKevin Wolf header.version = cpu_to_be32(QCOW_VERSION); 942a9420734SKevin Wolf header.cluster_bits = cpu_to_be32(cluster_bits); 943a9420734SKevin Wolf header.size = cpu_to_be64(0); 944a9420734SKevin Wolf header.l1_table_offset = cpu_to_be64(0); 945a9420734SKevin Wolf header.l1_size = cpu_to_be32(0); 946a9420734SKevin Wolf header.refcount_table_offset = cpu_to_be64(cluster_size); 947a9420734SKevin Wolf header.refcount_table_clusters = cpu_to_be32(1); 948a9420734SKevin Wolf 949a9420734SKevin Wolf if (flags & BLOCK_FLAG_ENCRYPT) { 950a9420734SKevin Wolf header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 951a9420734SKevin Wolf } else { 952a9420734SKevin Wolf header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 953a9420734SKevin Wolf } 954a9420734SKevin Wolf 955a9420734SKevin Wolf ret = bdrv_pwrite(bs, 0, &header, sizeof(header)); 956a9420734SKevin Wolf if (ret < 0) { 957a9420734SKevin Wolf goto out; 958a9420734SKevin Wolf } 959a9420734SKevin Wolf 960a9420734SKevin Wolf /* Write an empty refcount table */ 961a9420734SKevin Wolf refcount_table = qemu_mallocz(cluster_size); 962a9420734SKevin Wolf ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); 963a9420734SKevin Wolf qemu_free(refcount_table); 964a9420734SKevin Wolf 965a9420734SKevin Wolf if (ret < 0) { 966a9420734SKevin Wolf goto out; 967a9420734SKevin Wolf } 968a9420734SKevin Wolf 969a9420734SKevin Wolf bdrv_close(bs); 970a9420734SKevin Wolf 971a9420734SKevin Wolf /* 972a9420734SKevin Wolf * And now open the image and make it consistent first (i.e. increase the 973a9420734SKevin Wolf * refcount of the cluster that is occupied by the header and the refcount 974a9420734SKevin Wolf * table) 975a9420734SKevin Wolf */ 976a9420734SKevin Wolf BlockDriver* drv = bdrv_find_format("qcow2"); 977a9420734SKevin Wolf assert(drv != NULL); 978a9420734SKevin Wolf ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv); 979a9420734SKevin Wolf if (ret < 0) { 980a9420734SKevin Wolf goto out; 981a9420734SKevin Wolf } 982a9420734SKevin Wolf 983a9420734SKevin Wolf ret = qcow2_alloc_clusters(bs, 2 * cluster_size); 984a9420734SKevin Wolf if (ret < 0) { 985a9420734SKevin Wolf goto out; 986a9420734SKevin Wolf 987a9420734SKevin Wolf } else if (ret != 0) { 988a9420734SKevin Wolf error_report("Huh, first cluster in empty image is already in use?"); 989a9420734SKevin Wolf abort(); 990a9420734SKevin Wolf } 991a9420734SKevin Wolf 992a9420734SKevin Wolf /* Okay, now that we have a valid image, let's give it the right size */ 993a9420734SKevin Wolf ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); 994a9420734SKevin Wolf if (ret < 0) { 995a9420734SKevin Wolf goto out; 996a9420734SKevin Wolf } 997a9420734SKevin Wolf 998a9420734SKevin Wolf /* Want a backing file? There you go.*/ 999a9420734SKevin Wolf if (backing_file) { 1000a9420734SKevin Wolf ret = bdrv_change_backing_file(bs, backing_file, backing_format); 1001a9420734SKevin Wolf if (ret < 0) { 1002a9420734SKevin Wolf goto out; 1003a9420734SKevin Wolf } 1004a9420734SKevin Wolf } 1005a9420734SKevin Wolf 1006a9420734SKevin Wolf /* And if we're supposed to preallocate metadata, do that now */ 1007a9420734SKevin Wolf if (prealloc) { 1008a9420734SKevin Wolf ret = preallocate(bs); 1009a9420734SKevin Wolf if (ret < 0) { 1010a9420734SKevin Wolf goto out; 1011a9420734SKevin Wolf } 1012a9420734SKevin Wolf } 1013a9420734SKevin Wolf 1014a9420734SKevin Wolf ret = 0; 1015a9420734SKevin Wolf out: 1016a9420734SKevin Wolf bdrv_delete(bs); 1017a9420734SKevin Wolf return ret; 1018a9420734SKevin Wolf } 1019de5f3f40SKevin Wolf 10207c80ab3fSJes Sorensen static int qcow2_create(const char *filename, QEMUOptionParameter *options) 1021de5f3f40SKevin Wolf { 1022de5f3f40SKevin Wolf const char *backing_file = NULL; 1023de5f3f40SKevin Wolf const char *backing_fmt = NULL; 1024de5f3f40SKevin Wolf uint64_t sectors = 0; 1025de5f3f40SKevin Wolf int flags = 0; 1026de5f3f40SKevin Wolf size_t cluster_size = 65536; 1027de5f3f40SKevin Wolf int prealloc = 0; 1028de5f3f40SKevin Wolf 1029de5f3f40SKevin Wolf /* Read out options */ 1030de5f3f40SKevin Wolf while (options && options->name) { 1031de5f3f40SKevin Wolf if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1032de5f3f40SKevin Wolf sectors = options->value.n / 512; 1033de5f3f40SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1034de5f3f40SKevin Wolf backing_file = options->value.s; 1035de5f3f40SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { 1036de5f3f40SKevin Wolf backing_fmt = options->value.s; 1037de5f3f40SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { 1038de5f3f40SKevin Wolf flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; 1039de5f3f40SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { 1040de5f3f40SKevin Wolf if (options->value.n) { 1041de5f3f40SKevin Wolf cluster_size = options->value.n; 1042de5f3f40SKevin Wolf } 1043de5f3f40SKevin Wolf } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { 1044de5f3f40SKevin Wolf if (!options->value.s || !strcmp(options->value.s, "off")) { 1045de5f3f40SKevin Wolf prealloc = 0; 1046de5f3f40SKevin Wolf } else if (!strcmp(options->value.s, "metadata")) { 1047de5f3f40SKevin Wolf prealloc = 1; 1048de5f3f40SKevin Wolf } else { 1049de5f3f40SKevin Wolf fprintf(stderr, "Invalid preallocation mode: '%s'\n", 1050de5f3f40SKevin Wolf options->value.s); 1051de5f3f40SKevin Wolf return -EINVAL; 1052de5f3f40SKevin Wolf } 1053de5f3f40SKevin Wolf } 1054de5f3f40SKevin Wolf options++; 1055de5f3f40SKevin Wolf } 1056de5f3f40SKevin Wolf 1057de5f3f40SKevin Wolf if (backing_file && prealloc) { 1058de5f3f40SKevin Wolf fprintf(stderr, "Backing file and preallocation cannot be used at " 1059de5f3f40SKevin Wolf "the same time\n"); 1060de5f3f40SKevin Wolf return -EINVAL; 1061de5f3f40SKevin Wolf } 1062de5f3f40SKevin Wolf 10637c80ab3fSJes Sorensen return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, 1064a9420734SKevin Wolf cluster_size, prealloc, options); 1065de5f3f40SKevin Wolf } 1066de5f3f40SKevin Wolf 10677c80ab3fSJes Sorensen static int qcow2_make_empty(BlockDriverState *bs) 106820d97356SBlue Swirl { 106920d97356SBlue Swirl #if 0 107020d97356SBlue Swirl /* XXX: not correct */ 107120d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 107220d97356SBlue Swirl uint32_t l1_length = s->l1_size * sizeof(uint64_t); 107320d97356SBlue Swirl int ret; 107420d97356SBlue Swirl 107520d97356SBlue Swirl memset(s->l1_table, 0, l1_length); 107666f82ceeSKevin Wolf if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0) 107720d97356SBlue Swirl return -1; 107866f82ceeSKevin Wolf ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); 107920d97356SBlue Swirl if (ret < 0) 108020d97356SBlue Swirl return ret; 108120d97356SBlue Swirl 108220d97356SBlue Swirl l2_cache_reset(bs); 108320d97356SBlue Swirl #endif 108420d97356SBlue Swirl return 0; 108520d97356SBlue Swirl } 108620d97356SBlue Swirl 1087*5ea929e3SKevin Wolf static int qcow2_discard(BlockDriverState *bs, int64_t sector_num, 1088*5ea929e3SKevin Wolf int nb_sectors) 1089*5ea929e3SKevin Wolf { 1090*5ea929e3SKevin Wolf return qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, 1091*5ea929e3SKevin Wolf nb_sectors); 1092*5ea929e3SKevin Wolf } 1093*5ea929e3SKevin Wolf 1094419b19d9SStefan Hajnoczi static int qcow2_truncate(BlockDriverState *bs, int64_t offset) 1095419b19d9SStefan Hajnoczi { 1096419b19d9SStefan Hajnoczi BDRVQcowState *s = bs->opaque; 1097419b19d9SStefan Hajnoczi int ret, new_l1_size; 1098419b19d9SStefan Hajnoczi 1099419b19d9SStefan Hajnoczi if (offset & 511) { 1100419b19d9SStefan Hajnoczi return -EINVAL; 1101419b19d9SStefan Hajnoczi } 1102419b19d9SStefan Hajnoczi 1103419b19d9SStefan Hajnoczi /* cannot proceed if image has snapshots */ 1104419b19d9SStefan Hajnoczi if (s->nb_snapshots) { 1105419b19d9SStefan Hajnoczi return -ENOTSUP; 1106419b19d9SStefan Hajnoczi } 1107419b19d9SStefan Hajnoczi 1108419b19d9SStefan Hajnoczi /* shrinking is currently not supported */ 1109419b19d9SStefan Hajnoczi if (offset < bs->total_sectors * 512) { 1110419b19d9SStefan Hajnoczi return -ENOTSUP; 1111419b19d9SStefan Hajnoczi } 1112419b19d9SStefan Hajnoczi 1113419b19d9SStefan Hajnoczi new_l1_size = size_to_l1(s, offset); 111472893756SStefan Hajnoczi ret = qcow2_grow_l1_table(bs, new_l1_size, true); 1115419b19d9SStefan Hajnoczi if (ret < 0) { 1116419b19d9SStefan Hajnoczi return ret; 1117419b19d9SStefan Hajnoczi } 1118419b19d9SStefan Hajnoczi 1119419b19d9SStefan Hajnoczi /* write updated header.size */ 1120419b19d9SStefan Hajnoczi offset = cpu_to_be64(offset); 11218b3b7206SKevin Wolf ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 1122419b19d9SStefan Hajnoczi &offset, sizeof(uint64_t)); 1123419b19d9SStefan Hajnoczi if (ret < 0) { 1124419b19d9SStefan Hajnoczi return ret; 1125419b19d9SStefan Hajnoczi } 1126419b19d9SStefan Hajnoczi 1127419b19d9SStefan Hajnoczi s->l1_vm_state_index = new_l1_size; 1128419b19d9SStefan Hajnoczi return 0; 1129419b19d9SStefan Hajnoczi } 1130419b19d9SStefan Hajnoczi 113120d97356SBlue Swirl /* XXX: put compressed sectors first, then all the cluster aligned 113220d97356SBlue Swirl tables to avoid losing bytes in alignment */ 11337c80ab3fSJes Sorensen static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, 113420d97356SBlue Swirl const uint8_t *buf, int nb_sectors) 113520d97356SBlue Swirl { 113620d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 113720d97356SBlue Swirl z_stream strm; 113820d97356SBlue Swirl int ret, out_len; 113920d97356SBlue Swirl uint8_t *out_buf; 114020d97356SBlue Swirl uint64_t cluster_offset; 114120d97356SBlue Swirl 114220d97356SBlue Swirl if (nb_sectors == 0) { 114320d97356SBlue Swirl /* align end of file to a sector boundary to ease reading with 114420d97356SBlue Swirl sector based I/Os */ 114566f82ceeSKevin Wolf cluster_offset = bdrv_getlength(bs->file); 114620d97356SBlue Swirl cluster_offset = (cluster_offset + 511) & ~511; 114766f82ceeSKevin Wolf bdrv_truncate(bs->file, cluster_offset); 114820d97356SBlue Swirl return 0; 114920d97356SBlue Swirl } 115020d97356SBlue Swirl 115120d97356SBlue Swirl if (nb_sectors != s->cluster_sectors) 115220d97356SBlue Swirl return -EINVAL; 115320d97356SBlue Swirl 115420d97356SBlue Swirl out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 115520d97356SBlue Swirl 115620d97356SBlue Swirl /* best compression, small window, no zlib header */ 115720d97356SBlue Swirl memset(&strm, 0, sizeof(strm)); 115820d97356SBlue Swirl ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 115920d97356SBlue Swirl Z_DEFLATED, -12, 116020d97356SBlue Swirl 9, Z_DEFAULT_STRATEGY); 116120d97356SBlue Swirl if (ret != 0) { 116220d97356SBlue Swirl qemu_free(out_buf); 116320d97356SBlue Swirl return -1; 116420d97356SBlue Swirl } 116520d97356SBlue Swirl 116620d97356SBlue Swirl strm.avail_in = s->cluster_size; 116720d97356SBlue Swirl strm.next_in = (uint8_t *)buf; 116820d97356SBlue Swirl strm.avail_out = s->cluster_size; 116920d97356SBlue Swirl strm.next_out = out_buf; 117020d97356SBlue Swirl 117120d97356SBlue Swirl ret = deflate(&strm, Z_FINISH); 117220d97356SBlue Swirl if (ret != Z_STREAM_END && ret != Z_OK) { 117320d97356SBlue Swirl qemu_free(out_buf); 117420d97356SBlue Swirl deflateEnd(&strm); 117520d97356SBlue Swirl return -1; 117620d97356SBlue Swirl } 117720d97356SBlue Swirl out_len = strm.next_out - out_buf; 117820d97356SBlue Swirl 117920d97356SBlue Swirl deflateEnd(&strm); 118020d97356SBlue Swirl 118120d97356SBlue Swirl if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 118220d97356SBlue Swirl /* could not compress: write normal cluster */ 118320d97356SBlue Swirl bdrv_write(bs, sector_num, buf, s->cluster_sectors); 118420d97356SBlue Swirl } else { 118520d97356SBlue Swirl cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, 118620d97356SBlue Swirl sector_num << 9, out_len); 118720d97356SBlue Swirl if (!cluster_offset) 118820d97356SBlue Swirl return -1; 118920d97356SBlue Swirl cluster_offset &= s->cluster_offset_mask; 119066f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 119166f82ceeSKevin Wolf if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) { 119220d97356SBlue Swirl qemu_free(out_buf); 119320d97356SBlue Swirl return -1; 119420d97356SBlue Swirl } 119520d97356SBlue Swirl } 119620d97356SBlue Swirl 119720d97356SBlue Swirl qemu_free(out_buf); 119820d97356SBlue Swirl return 0; 119920d97356SBlue Swirl } 120020d97356SBlue Swirl 12017c80ab3fSJes Sorensen static int qcow2_flush(BlockDriverState *bs) 120220d97356SBlue Swirl { 120329c1a730SKevin Wolf BDRVQcowState *s = bs->opaque; 120429c1a730SKevin Wolf int ret; 120529c1a730SKevin Wolf 120629c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 120729c1a730SKevin Wolf if (ret < 0) { 120829c1a730SKevin Wolf return ret; 120929c1a730SKevin Wolf } 121029c1a730SKevin Wolf 121129c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 121229c1a730SKevin Wolf if (ret < 0) { 121329c1a730SKevin Wolf return ret; 121429c1a730SKevin Wolf } 121529c1a730SKevin Wolf 1216205ef796SKevin Wolf return bdrv_flush(bs->file); 121720d97356SBlue Swirl } 121820d97356SBlue Swirl 12197c80ab3fSJes Sorensen static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, 12207c80ab3fSJes Sorensen BlockDriverCompletionFunc *cb, 12217c80ab3fSJes Sorensen void *opaque) 122220d97356SBlue Swirl { 122329c1a730SKevin Wolf BDRVQcowState *s = bs->opaque; 122429c1a730SKevin Wolf int ret; 122529c1a730SKevin Wolf 122629c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->l2_table_cache); 122729c1a730SKevin Wolf if (ret < 0) { 122829c1a730SKevin Wolf return NULL; 122929c1a730SKevin Wolf } 123029c1a730SKevin Wolf 123129c1a730SKevin Wolf ret = qcow2_cache_flush(bs, s->refcount_block_cache); 123229c1a730SKevin Wolf if (ret < 0) { 123329c1a730SKevin Wolf return NULL; 123429c1a730SKevin Wolf } 123529c1a730SKevin Wolf 123666f82ceeSKevin Wolf return bdrv_aio_flush(bs->file, cb, opaque); 123720d97356SBlue Swirl } 123820d97356SBlue Swirl 12397c80ab3fSJes Sorensen static int64_t qcow2_vm_state_offset(BDRVQcowState *s) 124020d97356SBlue Swirl { 124120d97356SBlue Swirl return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); 124220d97356SBlue Swirl } 124320d97356SBlue Swirl 12447c80ab3fSJes Sorensen static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 124520d97356SBlue Swirl { 124620d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 124720d97356SBlue Swirl bdi->cluster_size = s->cluster_size; 12487c80ab3fSJes Sorensen bdi->vm_state_offset = qcow2_vm_state_offset(s); 124920d97356SBlue Swirl return 0; 125020d97356SBlue Swirl } 125120d97356SBlue Swirl 125220d97356SBlue Swirl 12537c80ab3fSJes Sorensen static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result) 125420d97356SBlue Swirl { 12559ac228e0SKevin Wolf return qcow2_check_refcounts(bs, result); 125620d97356SBlue Swirl } 125720d97356SBlue Swirl 125820d97356SBlue Swirl #if 0 125920d97356SBlue Swirl static void dump_refcounts(BlockDriverState *bs) 126020d97356SBlue Swirl { 126120d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 126220d97356SBlue Swirl int64_t nb_clusters, k, k1, size; 126320d97356SBlue Swirl int refcount; 126420d97356SBlue Swirl 126566f82ceeSKevin Wolf size = bdrv_getlength(bs->file); 126620d97356SBlue Swirl nb_clusters = size_to_clusters(s, size); 126720d97356SBlue Swirl for(k = 0; k < nb_clusters;) { 126820d97356SBlue Swirl k1 = k; 126920d97356SBlue Swirl refcount = get_refcount(bs, k); 127020d97356SBlue Swirl k++; 127120d97356SBlue Swirl while (k < nb_clusters && get_refcount(bs, k) == refcount) 127220d97356SBlue Swirl k++; 12730bfcd599SBlue Swirl printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount, 12740bfcd599SBlue Swirl k - k1); 127520d97356SBlue Swirl } 127620d97356SBlue Swirl } 127720d97356SBlue Swirl #endif 127820d97356SBlue Swirl 12797c80ab3fSJes Sorensen static int qcow2_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 128020d97356SBlue Swirl int64_t pos, int size) 128120d97356SBlue Swirl { 128220d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 128320d97356SBlue Swirl int growable = bs->growable; 128420d97356SBlue Swirl int ret; 128520d97356SBlue Swirl 128666f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 128720d97356SBlue Swirl bs->growable = 1; 12887c80ab3fSJes Sorensen ret = bdrv_pwrite(bs, qcow2_vm_state_offset(s) + pos, buf, size); 128920d97356SBlue Swirl bs->growable = growable; 129020d97356SBlue Swirl 129120d97356SBlue Swirl return ret; 129220d97356SBlue Swirl } 129320d97356SBlue Swirl 12947c80ab3fSJes Sorensen static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, 129520d97356SBlue Swirl int64_t pos, int size) 129620d97356SBlue Swirl { 129720d97356SBlue Swirl BDRVQcowState *s = bs->opaque; 129820d97356SBlue Swirl int growable = bs->growable; 129920d97356SBlue Swirl int ret; 130020d97356SBlue Swirl 130166f82ceeSKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 130220d97356SBlue Swirl bs->growable = 1; 13037c80ab3fSJes Sorensen ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); 130420d97356SBlue Swirl bs->growable = growable; 130520d97356SBlue Swirl 130620d97356SBlue Swirl return ret; 130720d97356SBlue Swirl } 130820d97356SBlue Swirl 13097c80ab3fSJes Sorensen static QEMUOptionParameter qcow2_create_options[] = { 131020d97356SBlue Swirl { 131120d97356SBlue Swirl .name = BLOCK_OPT_SIZE, 131220d97356SBlue Swirl .type = OPT_SIZE, 131320d97356SBlue Swirl .help = "Virtual disk size" 131420d97356SBlue Swirl }, 131520d97356SBlue Swirl { 131620d97356SBlue Swirl .name = BLOCK_OPT_BACKING_FILE, 131720d97356SBlue Swirl .type = OPT_STRING, 131820d97356SBlue Swirl .help = "File name of a base image" 131920d97356SBlue Swirl }, 132020d97356SBlue Swirl { 132120d97356SBlue Swirl .name = BLOCK_OPT_BACKING_FMT, 132220d97356SBlue Swirl .type = OPT_STRING, 132320d97356SBlue Swirl .help = "Image format of the base image" 132420d97356SBlue Swirl }, 132520d97356SBlue Swirl { 132620d97356SBlue Swirl .name = BLOCK_OPT_ENCRYPT, 132720d97356SBlue Swirl .type = OPT_FLAG, 132820d97356SBlue Swirl .help = "Encrypt the image" 132920d97356SBlue Swirl }, 133020d97356SBlue Swirl { 133120d97356SBlue Swirl .name = BLOCK_OPT_CLUSTER_SIZE, 133220d97356SBlue Swirl .type = OPT_SIZE, 133320d97356SBlue Swirl .help = "qcow2 cluster size" 133420d97356SBlue Swirl }, 133520d97356SBlue Swirl { 133620d97356SBlue Swirl .name = BLOCK_OPT_PREALLOC, 133720d97356SBlue Swirl .type = OPT_STRING, 133820d97356SBlue Swirl .help = "Preallocation mode (allowed values: off, metadata)" 133920d97356SBlue Swirl }, 134020d97356SBlue Swirl { NULL } 134120d97356SBlue Swirl }; 134220d97356SBlue Swirl 134320d97356SBlue Swirl static BlockDriver bdrv_qcow2 = { 134420d97356SBlue Swirl .format_name = "qcow2", 134520d97356SBlue Swirl .instance_size = sizeof(BDRVQcowState), 13467c80ab3fSJes Sorensen .bdrv_probe = qcow2_probe, 13477c80ab3fSJes Sorensen .bdrv_open = qcow2_open, 13487c80ab3fSJes Sorensen .bdrv_close = qcow2_close, 13497c80ab3fSJes Sorensen .bdrv_create = qcow2_create, 13507c80ab3fSJes Sorensen .bdrv_flush = qcow2_flush, 13517c80ab3fSJes Sorensen .bdrv_is_allocated = qcow2_is_allocated, 13527c80ab3fSJes Sorensen .bdrv_set_key = qcow2_set_key, 13537c80ab3fSJes Sorensen .bdrv_make_empty = qcow2_make_empty, 135420d97356SBlue Swirl 13557c80ab3fSJes Sorensen .bdrv_aio_readv = qcow2_aio_readv, 13567c80ab3fSJes Sorensen .bdrv_aio_writev = qcow2_aio_writev, 13577c80ab3fSJes Sorensen .bdrv_aio_flush = qcow2_aio_flush, 1358419b19d9SStefan Hajnoczi 1359*5ea929e3SKevin Wolf .bdrv_discard = qcow2_discard, 1360419b19d9SStefan Hajnoczi .bdrv_truncate = qcow2_truncate, 13617c80ab3fSJes Sorensen .bdrv_write_compressed = qcow2_write_compressed, 136220d97356SBlue Swirl 136320d97356SBlue Swirl .bdrv_snapshot_create = qcow2_snapshot_create, 136420d97356SBlue Swirl .bdrv_snapshot_goto = qcow2_snapshot_goto, 136520d97356SBlue Swirl .bdrv_snapshot_delete = qcow2_snapshot_delete, 136620d97356SBlue Swirl .bdrv_snapshot_list = qcow2_snapshot_list, 136751ef6727Sedison .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 13687c80ab3fSJes Sorensen .bdrv_get_info = qcow2_get_info, 136920d97356SBlue Swirl 13707c80ab3fSJes Sorensen .bdrv_save_vmstate = qcow2_save_vmstate, 13717c80ab3fSJes Sorensen .bdrv_load_vmstate = qcow2_load_vmstate, 137220d97356SBlue Swirl 137320d97356SBlue Swirl .bdrv_change_backing_file = qcow2_change_backing_file, 137420d97356SBlue Swirl 13757c80ab3fSJes Sorensen .create_options = qcow2_create_options, 13767c80ab3fSJes Sorensen .bdrv_check = qcow2_check, 137720d97356SBlue Swirl }; 137820d97356SBlue Swirl 13795efa9d5aSAnthony Liguori static void bdrv_qcow2_init(void) 13805efa9d5aSAnthony Liguori { 13815efa9d5aSAnthony Liguori bdrv_register(&bdrv_qcow2); 13825efa9d5aSAnthony Liguori } 13835efa9d5aSAnthony Liguori 13845efa9d5aSAnthony Liguori block_init(bdrv_qcow2_init); 1385