xref: /qemu/block/qcow2.c (revision 8d2497c3552e19a60e7a75d20976471ecb2a8e2b) !
1585f8587Sbellard /*
2585f8587Sbellard  * Block driver for the QCOW version 2 format
3585f8587Sbellard  *
4585f8587Sbellard  * Copyright (c) 2004-2006 Fabrice Bellard
5585f8587Sbellard  *
6585f8587Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7585f8587Sbellard  * of this software and associated documentation files (the "Software"), to deal
8585f8587Sbellard  * in the Software without restriction, including without limitation the rights
9585f8587Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10585f8587Sbellard  * copies of the Software, and to permit persons to whom the Software is
11585f8587Sbellard  * furnished to do so, subject to the following conditions:
12585f8587Sbellard  *
13585f8587Sbellard  * The above copyright notice and this permission notice shall be included in
14585f8587Sbellard  * all copies or substantial portions of the Software.
15585f8587Sbellard  *
16585f8587Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17585f8587Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18585f8587Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19585f8587Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20585f8587Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21585f8587Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22585f8587Sbellard  * THE SOFTWARE.
23585f8587Sbellard  */
24faf07963Spbrook #include "qemu-common.h"
25737e150eSPaolo Bonzini #include "block/block_int.h"
261de7afc9SPaolo Bonzini #include "qemu/module.h"
27585f8587Sbellard #include <zlib.h>
28737e150eSPaolo Bonzini #include "block/aes.h"
29f7d0fe02SKevin Wolf #include "block/qcow2.h"
301de7afc9SPaolo Bonzini #include "qemu/error-report.h"
317b1b5d19SPaolo Bonzini #include "qapi/qmp/qerror.h"
323cce16f4SKevin Wolf #include "trace.h"
33585f8587Sbellard 
34585f8587Sbellard /*
35585f8587Sbellard   Differences with QCOW:
36585f8587Sbellard 
37585f8587Sbellard   - Support for multiple incremental snapshots.
38585f8587Sbellard   - Memory management by reference counts.
39585f8587Sbellard   - Clusters which have a reference count of one have the bit
40585f8587Sbellard     QCOW_OFLAG_COPIED to optimize write performance.
41585f8587Sbellard   - Size of compressed clusters is stored in sectors to reduce bit usage
42585f8587Sbellard     in the cluster offsets.
43585f8587Sbellard   - Support for storing additional data (such as the VM state) in the
44585f8587Sbellard     snapshots.
45585f8587Sbellard   - If a backing store is used, the cluster size is not constrained
46585f8587Sbellard     (could be backported to QCOW).
47585f8587Sbellard   - L2 tables have always a size of one cluster.
48585f8587Sbellard */
49585f8587Sbellard 
509b80ddf3Saliguori 
519b80ddf3Saliguori typedef struct {
529b80ddf3Saliguori     uint32_t magic;
539b80ddf3Saliguori     uint32_t len;
549b80ddf3Saliguori } QCowExtension;
5521d82ac9SJeff Cody 
567c80ab3fSJes Sorensen #define  QCOW2_EXT_MAGIC_END 0
577c80ab3fSJes Sorensen #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
58cfcc4c62SKevin Wolf #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
599b80ddf3Saliguori 
607c80ab3fSJes Sorensen static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
61585f8587Sbellard {
62585f8587Sbellard     const QCowHeader *cow_header = (const void *)buf;
63585f8587Sbellard 
64585f8587Sbellard     if (buf_size >= sizeof(QCowHeader) &&
65585f8587Sbellard         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
666744cbabSKevin Wolf         be32_to_cpu(cow_header->version) >= 2)
67585f8587Sbellard         return 100;
68585f8587Sbellard     else
69585f8587Sbellard         return 0;
70585f8587Sbellard }
71585f8587Sbellard 
729b80ddf3Saliguori 
739b80ddf3Saliguori /*
749b80ddf3Saliguori  * read qcow2 extension and fill bs
759b80ddf3Saliguori  * start reading from start_offset
769b80ddf3Saliguori  * finish reading upon magic of value 0 or when end_offset reached
779b80ddf3Saliguori  * unknown magic is skipped (future extension this version knows nothing about)
789b80ddf3Saliguori  * return 0 upon success, non-0 otherwise
799b80ddf3Saliguori  */
807c80ab3fSJes Sorensen static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
81cfcc4c62SKevin Wolf                                  uint64_t end_offset, void **p_feature_table)
829b80ddf3Saliguori {
8375bab85cSKevin Wolf     BDRVQcowState *s = bs->opaque;
849b80ddf3Saliguori     QCowExtension ext;
859b80ddf3Saliguori     uint64_t offset;
8675bab85cSKevin Wolf     int ret;
879b80ddf3Saliguori 
889b80ddf3Saliguori #ifdef DEBUG_EXT
897c80ab3fSJes Sorensen     printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
909b80ddf3Saliguori #endif
919b80ddf3Saliguori     offset = start_offset;
929b80ddf3Saliguori     while (offset < end_offset) {
939b80ddf3Saliguori 
949b80ddf3Saliguori #ifdef DEBUG_EXT
959b80ddf3Saliguori         /* Sanity check */
969b80ddf3Saliguori         if (offset > s->cluster_size)
977c80ab3fSJes Sorensen             printf("qcow2_read_extension: suspicious offset %lu\n", offset);
989b80ddf3Saliguori 
999b2260cbSDong Xu Wang         printf("attempting to read extended header in offset %lu\n", offset);
1009b80ddf3Saliguori #endif
1019b80ddf3Saliguori 
10266f82ceeSKevin Wolf         if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
1037c80ab3fSJes Sorensen             fprintf(stderr, "qcow2_read_extension: ERROR: "
1040bfcd599SBlue Swirl                     "pread fail from offset %" PRIu64 "\n",
1050bfcd599SBlue Swirl                     offset);
1069b80ddf3Saliguori             return 1;
1079b80ddf3Saliguori         }
1089b80ddf3Saliguori         be32_to_cpus(&ext.magic);
1099b80ddf3Saliguori         be32_to_cpus(&ext.len);
1109b80ddf3Saliguori         offset += sizeof(ext);
1119b80ddf3Saliguori #ifdef DEBUG_EXT
1129b80ddf3Saliguori         printf("ext.magic = 0x%x\n", ext.magic);
1139b80ddf3Saliguori #endif
11464ca6aeeSKevin Wolf         if (ext.len > end_offset - offset) {
11564ca6aeeSKevin Wolf             error_report("Header extension too large");
11664ca6aeeSKevin Wolf             return -EINVAL;
11764ca6aeeSKevin Wolf         }
11864ca6aeeSKevin Wolf 
1199b80ddf3Saliguori         switch (ext.magic) {
1207c80ab3fSJes Sorensen         case QCOW2_EXT_MAGIC_END:
1219b80ddf3Saliguori             return 0;
122f965509cSaliguori 
1237c80ab3fSJes Sorensen         case QCOW2_EXT_MAGIC_BACKING_FORMAT:
124f965509cSaliguori             if (ext.len >= sizeof(bs->backing_format)) {
125f965509cSaliguori                 fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
1264c978075Saliguori                         " (>=%zu)\n",
127f965509cSaliguori                         ext.len, sizeof(bs->backing_format));
128f965509cSaliguori                 return 2;
129f965509cSaliguori             }
13066f82ceeSKevin Wolf             if (bdrv_pread(bs->file, offset , bs->backing_format,
131f965509cSaliguori                            ext.len) != ext.len)
132f965509cSaliguori                 return 3;
133f965509cSaliguori             bs->backing_format[ext.len] = '\0';
134f965509cSaliguori #ifdef DEBUG_EXT
135f965509cSaliguori             printf("Qcow2: Got format extension %s\n", bs->backing_format);
136f965509cSaliguori #endif
137f965509cSaliguori             break;
138f965509cSaliguori 
139cfcc4c62SKevin Wolf         case QCOW2_EXT_MAGIC_FEATURE_TABLE:
140cfcc4c62SKevin Wolf             if (p_feature_table != NULL) {
141cfcc4c62SKevin Wolf                 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
142cfcc4c62SKevin Wolf                 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
143cfcc4c62SKevin Wolf                 if (ret < 0) {
144cfcc4c62SKevin Wolf                     return ret;
145cfcc4c62SKevin Wolf                 }
146cfcc4c62SKevin Wolf 
147cfcc4c62SKevin Wolf                 *p_feature_table = feature_table;
148cfcc4c62SKevin Wolf             }
149cfcc4c62SKevin Wolf             break;
150cfcc4c62SKevin Wolf 
1519b80ddf3Saliguori         default:
15275bab85cSKevin Wolf             /* unknown magic - save it in case we need to rewrite the header */
15375bab85cSKevin Wolf             {
15475bab85cSKevin Wolf                 Qcow2UnknownHeaderExtension *uext;
15575bab85cSKevin Wolf 
15675bab85cSKevin Wolf                 uext = g_malloc0(sizeof(*uext)  + ext.len);
15775bab85cSKevin Wolf                 uext->magic = ext.magic;
15875bab85cSKevin Wolf                 uext->len = ext.len;
15975bab85cSKevin Wolf                 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
16075bab85cSKevin Wolf 
16175bab85cSKevin Wolf                 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
16275bab85cSKevin Wolf                 if (ret < 0) {
16375bab85cSKevin Wolf                     return ret;
16475bab85cSKevin Wolf                 }
16575bab85cSKevin Wolf             }
1669b80ddf3Saliguori             break;
1679b80ddf3Saliguori         }
168fd29b4bbSKevin Wolf 
169fd29b4bbSKevin Wolf         offset += ((ext.len + 7) & ~7);
1709b80ddf3Saliguori     }
1719b80ddf3Saliguori 
1729b80ddf3Saliguori     return 0;
1739b80ddf3Saliguori }
1749b80ddf3Saliguori 
17575bab85cSKevin Wolf static void cleanup_unknown_header_ext(BlockDriverState *bs)
17675bab85cSKevin Wolf {
17775bab85cSKevin Wolf     BDRVQcowState *s = bs->opaque;
17875bab85cSKevin Wolf     Qcow2UnknownHeaderExtension *uext, *next;
17975bab85cSKevin Wolf 
18075bab85cSKevin Wolf     QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
18175bab85cSKevin Wolf         QLIST_REMOVE(uext, next);
18275bab85cSKevin Wolf         g_free(uext);
18375bab85cSKevin Wolf     }
18475bab85cSKevin Wolf }
1859b80ddf3Saliguori 
186b9531b6eSStefan Weil static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
187b9531b6eSStefan Weil     const char *fmt, ...)
1886744cbabSKevin Wolf {
1896744cbabSKevin Wolf     char msg[64];
1906744cbabSKevin Wolf     va_list ap;
1916744cbabSKevin Wolf 
1926744cbabSKevin Wolf     va_start(ap, fmt);
1936744cbabSKevin Wolf     vsnprintf(msg, sizeof(msg), fmt, ap);
1946744cbabSKevin Wolf     va_end(ap);
1956744cbabSKevin Wolf 
1966744cbabSKevin Wolf     qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
1976744cbabSKevin Wolf         bs->device_name, "qcow2", msg);
1986744cbabSKevin Wolf }
1996744cbabSKevin Wolf 
200cfcc4c62SKevin Wolf static void report_unsupported_feature(BlockDriverState *bs,
201cfcc4c62SKevin Wolf     Qcow2Feature *table, uint64_t mask)
202cfcc4c62SKevin Wolf {
203cfcc4c62SKevin Wolf     while (table && table->name[0] != '\0') {
204cfcc4c62SKevin Wolf         if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
205cfcc4c62SKevin Wolf             if (mask & (1 << table->bit)) {
206cfcc4c62SKevin Wolf                 report_unsupported(bs, "%.46s",table->name);
207cfcc4c62SKevin Wolf                 mask &= ~(1 << table->bit);
208cfcc4c62SKevin Wolf             }
209cfcc4c62SKevin Wolf         }
210cfcc4c62SKevin Wolf         table++;
211cfcc4c62SKevin Wolf     }
212cfcc4c62SKevin Wolf 
213cfcc4c62SKevin Wolf     if (mask) {
214cfcc4c62SKevin Wolf         report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
215cfcc4c62SKevin Wolf     }
216cfcc4c62SKevin Wolf }
217cfcc4c62SKevin Wolf 
218c61d0004SStefan Hajnoczi /*
219bfe8043eSStefan Hajnoczi  * Sets the dirty bit and flushes afterwards if necessary.
220bfe8043eSStefan Hajnoczi  *
221bfe8043eSStefan Hajnoczi  * The incompatible_features bit is only set if the image file header was
222bfe8043eSStefan Hajnoczi  * updated successfully.  Therefore it is not required to check the return
223bfe8043eSStefan Hajnoczi  * value of this function.
224bfe8043eSStefan Hajnoczi  */
225280d3735SKevin Wolf int qcow2_mark_dirty(BlockDriverState *bs)
226bfe8043eSStefan Hajnoczi {
227bfe8043eSStefan Hajnoczi     BDRVQcowState *s = bs->opaque;
228bfe8043eSStefan Hajnoczi     uint64_t val;
229bfe8043eSStefan Hajnoczi     int ret;
230bfe8043eSStefan Hajnoczi 
231bfe8043eSStefan Hajnoczi     assert(s->qcow_version >= 3);
232bfe8043eSStefan Hajnoczi 
233bfe8043eSStefan Hajnoczi     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
234bfe8043eSStefan Hajnoczi         return 0; /* already dirty */
235bfe8043eSStefan Hajnoczi     }
236bfe8043eSStefan Hajnoczi 
237bfe8043eSStefan Hajnoczi     val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
238bfe8043eSStefan Hajnoczi     ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
239bfe8043eSStefan Hajnoczi                       &val, sizeof(val));
240bfe8043eSStefan Hajnoczi     if (ret < 0) {
241bfe8043eSStefan Hajnoczi         return ret;
242bfe8043eSStefan Hajnoczi     }
243bfe8043eSStefan Hajnoczi     ret = bdrv_flush(bs->file);
244bfe8043eSStefan Hajnoczi     if (ret < 0) {
245bfe8043eSStefan Hajnoczi         return ret;
246bfe8043eSStefan Hajnoczi     }
247bfe8043eSStefan Hajnoczi 
248bfe8043eSStefan Hajnoczi     /* Only treat image as dirty if the header was updated successfully */
249bfe8043eSStefan Hajnoczi     s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
250bfe8043eSStefan Hajnoczi     return 0;
251bfe8043eSStefan Hajnoczi }
252bfe8043eSStefan Hajnoczi 
253bfe8043eSStefan Hajnoczi /*
254c61d0004SStefan Hajnoczi  * Clears the dirty bit and flushes before if necessary.  Only call this
255c61d0004SStefan Hajnoczi  * function when there are no pending requests, it does not guard against
256c61d0004SStefan Hajnoczi  * concurrent requests dirtying the image.
257c61d0004SStefan Hajnoczi  */
258c61d0004SStefan Hajnoczi static int qcow2_mark_clean(BlockDriverState *bs)
259c61d0004SStefan Hajnoczi {
260c61d0004SStefan Hajnoczi     BDRVQcowState *s = bs->opaque;
261c61d0004SStefan Hajnoczi 
262c61d0004SStefan Hajnoczi     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
263c61d0004SStefan Hajnoczi         int ret = bdrv_flush(bs);
264c61d0004SStefan Hajnoczi         if (ret < 0) {
265c61d0004SStefan Hajnoczi             return ret;
266c61d0004SStefan Hajnoczi         }
267c61d0004SStefan Hajnoczi 
268c61d0004SStefan Hajnoczi         s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
269c61d0004SStefan Hajnoczi         return qcow2_update_header(bs);
270c61d0004SStefan Hajnoczi     }
271c61d0004SStefan Hajnoczi     return 0;
272c61d0004SStefan Hajnoczi }
273c61d0004SStefan Hajnoczi 
274acbe5982SStefan Hajnoczi static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
275acbe5982SStefan Hajnoczi                        BdrvCheckMode fix)
276acbe5982SStefan Hajnoczi {
277acbe5982SStefan Hajnoczi     int ret = qcow2_check_refcounts(bs, result, fix);
278acbe5982SStefan Hajnoczi     if (ret < 0) {
279acbe5982SStefan Hajnoczi         return ret;
280acbe5982SStefan Hajnoczi     }
281acbe5982SStefan Hajnoczi 
282acbe5982SStefan Hajnoczi     if (fix && result->check_errors == 0 && result->corruptions == 0) {
283acbe5982SStefan Hajnoczi         return qcow2_mark_clean(bs);
284acbe5982SStefan Hajnoczi     }
285acbe5982SStefan Hajnoczi     return ret;
286acbe5982SStefan Hajnoczi }
287acbe5982SStefan Hajnoczi 
2887c80ab3fSJes Sorensen static int qcow2_open(BlockDriverState *bs, int flags)
289585f8587Sbellard {
290585f8587Sbellard     BDRVQcowState *s = bs->opaque;
2916d85a57eSJes Sorensen     int len, i, ret = 0;
292585f8587Sbellard     QCowHeader header;
2939b80ddf3Saliguori     uint64_t ext_end;
294585f8587Sbellard 
2956d85a57eSJes Sorensen     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
2966d85a57eSJes Sorensen     if (ret < 0) {
297585f8587Sbellard         goto fail;
2986d85a57eSJes Sorensen     }
299585f8587Sbellard     be32_to_cpus(&header.magic);
300585f8587Sbellard     be32_to_cpus(&header.version);
301585f8587Sbellard     be64_to_cpus(&header.backing_file_offset);
302585f8587Sbellard     be32_to_cpus(&header.backing_file_size);
303585f8587Sbellard     be64_to_cpus(&header.size);
304585f8587Sbellard     be32_to_cpus(&header.cluster_bits);
305585f8587Sbellard     be32_to_cpus(&header.crypt_method);
306585f8587Sbellard     be64_to_cpus(&header.l1_table_offset);
307585f8587Sbellard     be32_to_cpus(&header.l1_size);
308585f8587Sbellard     be64_to_cpus(&header.refcount_table_offset);
309585f8587Sbellard     be32_to_cpus(&header.refcount_table_clusters);
310585f8587Sbellard     be64_to_cpus(&header.snapshots_offset);
311585f8587Sbellard     be32_to_cpus(&header.nb_snapshots);
312585f8587Sbellard 
313e8cdcec1SKevin Wolf     if (header.magic != QCOW_MAGIC) {
3146d85a57eSJes Sorensen         ret = -EINVAL;
315585f8587Sbellard         goto fail;
3166d85a57eSJes Sorensen     }
3176744cbabSKevin Wolf     if (header.version < 2 || header.version > 3) {
3186744cbabSKevin Wolf         report_unsupported(bs, "QCOW version %d", header.version);
319e8cdcec1SKevin Wolf         ret = -ENOTSUP;
320e8cdcec1SKevin Wolf         goto fail;
321e8cdcec1SKevin Wolf     }
3226744cbabSKevin Wolf 
3236744cbabSKevin Wolf     s->qcow_version = header.version;
3246744cbabSKevin Wolf 
3256744cbabSKevin Wolf     /* Initialise version 3 header fields */
3266744cbabSKevin Wolf     if (header.version == 2) {
3276744cbabSKevin Wolf         header.incompatible_features    = 0;
3286744cbabSKevin Wolf         header.compatible_features      = 0;
3296744cbabSKevin Wolf         header.autoclear_features       = 0;
3306744cbabSKevin Wolf         header.refcount_order           = 4;
3316744cbabSKevin Wolf         header.header_length            = 72;
3326744cbabSKevin Wolf     } else {
3336744cbabSKevin Wolf         be64_to_cpus(&header.incompatible_features);
3346744cbabSKevin Wolf         be64_to_cpus(&header.compatible_features);
3356744cbabSKevin Wolf         be64_to_cpus(&header.autoclear_features);
3366744cbabSKevin Wolf         be32_to_cpus(&header.refcount_order);
3376744cbabSKevin Wolf         be32_to_cpus(&header.header_length);
3386744cbabSKevin Wolf     }
3396744cbabSKevin Wolf 
3406744cbabSKevin Wolf     if (header.header_length > sizeof(header)) {
3416744cbabSKevin Wolf         s->unknown_header_fields_size = header.header_length - sizeof(header);
3426744cbabSKevin Wolf         s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
3436744cbabSKevin Wolf         ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
3446744cbabSKevin Wolf                          s->unknown_header_fields_size);
3456744cbabSKevin Wolf         if (ret < 0) {
3466744cbabSKevin Wolf             goto fail;
3476744cbabSKevin Wolf         }
3486744cbabSKevin Wolf     }
3496744cbabSKevin Wolf 
350cfcc4c62SKevin Wolf     if (header.backing_file_offset) {
351cfcc4c62SKevin Wolf         ext_end = header.backing_file_offset;
352cfcc4c62SKevin Wolf     } else {
353cfcc4c62SKevin Wolf         ext_end = 1 << header.cluster_bits;
354cfcc4c62SKevin Wolf     }
355cfcc4c62SKevin Wolf 
3566744cbabSKevin Wolf     /* Handle feature bits */
3576744cbabSKevin Wolf     s->incompatible_features    = header.incompatible_features;
3586744cbabSKevin Wolf     s->compatible_features      = header.compatible_features;
3596744cbabSKevin Wolf     s->autoclear_features       = header.autoclear_features;
3606744cbabSKevin Wolf 
361c61d0004SStefan Hajnoczi     if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
362cfcc4c62SKevin Wolf         void *feature_table = NULL;
363cfcc4c62SKevin Wolf         qcow2_read_extensions(bs, header.header_length, ext_end,
364cfcc4c62SKevin Wolf                               &feature_table);
365cfcc4c62SKevin Wolf         report_unsupported_feature(bs, feature_table,
366c61d0004SStefan Hajnoczi                                    s->incompatible_features &
367c61d0004SStefan Hajnoczi                                    ~QCOW2_INCOMPAT_MASK);
3686744cbabSKevin Wolf         ret = -ENOTSUP;
3696744cbabSKevin Wolf         goto fail;
3706744cbabSKevin Wolf     }
3716744cbabSKevin Wolf 
3726744cbabSKevin Wolf     /* Check support for various header values */
3736744cbabSKevin Wolf     if (header.refcount_order != 4) {
3746744cbabSKevin Wolf         report_unsupported(bs, "%d bit reference counts",
3756744cbabSKevin Wolf                            1 << header.refcount_order);
3766744cbabSKevin Wolf         ret = -ENOTSUP;
3776744cbabSKevin Wolf         goto fail;
3786744cbabSKevin Wolf     }
3796744cbabSKevin Wolf 
380d191d12dSStefan Weil     if (header.cluster_bits < MIN_CLUSTER_BITS ||
3816d85a57eSJes Sorensen         header.cluster_bits > MAX_CLUSTER_BITS) {
3826d85a57eSJes Sorensen         ret = -EINVAL;
383585f8587Sbellard         goto fail;
3846d85a57eSJes Sorensen     }
3856d85a57eSJes Sorensen     if (header.crypt_method > QCOW_CRYPT_AES) {
3866d85a57eSJes Sorensen         ret = -EINVAL;
387585f8587Sbellard         goto fail;
3886d85a57eSJes Sorensen     }
389585f8587Sbellard     s->crypt_method_header = header.crypt_method;
3906d85a57eSJes Sorensen     if (s->crypt_method_header) {
391585f8587Sbellard         bs->encrypted = 1;
3926d85a57eSJes Sorensen     }
393585f8587Sbellard     s->cluster_bits = header.cluster_bits;
394585f8587Sbellard     s->cluster_size = 1 << s->cluster_bits;
395585f8587Sbellard     s->cluster_sectors = 1 << (s->cluster_bits - 9);
396585f8587Sbellard     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
397585f8587Sbellard     s->l2_size = 1 << s->l2_bits;
398585f8587Sbellard     bs->total_sectors = header.size / 512;
399585f8587Sbellard     s->csize_shift = (62 - (s->cluster_bits - 8));
400585f8587Sbellard     s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
401585f8587Sbellard     s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
402585f8587Sbellard     s->refcount_table_offset = header.refcount_table_offset;
403585f8587Sbellard     s->refcount_table_size =
404585f8587Sbellard         header.refcount_table_clusters << (s->cluster_bits - 3);
405585f8587Sbellard 
406585f8587Sbellard     s->snapshots_offset = header.snapshots_offset;
407585f8587Sbellard     s->nb_snapshots = header.nb_snapshots;
408585f8587Sbellard 
409585f8587Sbellard     /* read the level 1 table */
410585f8587Sbellard     s->l1_size = header.l1_size;
411419b19d9SStefan Hajnoczi     s->l1_vm_state_index = size_to_l1(s, header.size);
412585f8587Sbellard     /* the L1 table must contain at least enough entries to put
413585f8587Sbellard        header.size bytes */
4146d85a57eSJes Sorensen     if (s->l1_size < s->l1_vm_state_index) {
4156d85a57eSJes Sorensen         ret = -EINVAL;
416585f8587Sbellard         goto fail;
4176d85a57eSJes Sorensen     }
418585f8587Sbellard     s->l1_table_offset = header.l1_table_offset;
419d191d12dSStefan Weil     if (s->l1_size > 0) {
4207267c094SAnthony Liguori         s->l1_table = g_malloc0(
4213f6a3ee5SKevin Wolf             align_offset(s->l1_size * sizeof(uint64_t), 512));
4226d85a57eSJes Sorensen         ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
4236d85a57eSJes Sorensen                          s->l1_size * sizeof(uint64_t));
4246d85a57eSJes Sorensen         if (ret < 0) {
425585f8587Sbellard             goto fail;
4266d85a57eSJes Sorensen         }
427585f8587Sbellard         for(i = 0;i < s->l1_size; i++) {
428585f8587Sbellard             be64_to_cpus(&s->l1_table[i]);
429585f8587Sbellard         }
430d191d12dSStefan Weil     }
43129c1a730SKevin Wolf 
43229c1a730SKevin Wolf     /* alloc L2 table/refcount block cache */
4336af4e9eaSPaolo Bonzini     s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
4346af4e9eaSPaolo Bonzini     s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
43529c1a730SKevin Wolf 
4367267c094SAnthony Liguori     s->cluster_cache = g_malloc(s->cluster_size);
437585f8587Sbellard     /* one more sector for decompressed data alignment */
438dea43a65SFrediano Ziglio     s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
439095a9c58Saliguori                                   + 512);
440585f8587Sbellard     s->cluster_cache_offset = -1;
44106d9260fSAnthony Liguori     s->flags = flags;
442585f8587Sbellard 
4436d85a57eSJes Sorensen     ret = qcow2_refcount_init(bs);
4446d85a57eSJes Sorensen     if (ret != 0) {
445585f8587Sbellard         goto fail;
4466d85a57eSJes Sorensen     }
447585f8587Sbellard 
44872cf2d4fSBlue Swirl     QLIST_INIT(&s->cluster_allocs);
449f214978aSKevin Wolf 
4509b80ddf3Saliguori     /* read qcow2 extensions */
451cfcc4c62SKevin Wolf     if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
4526d85a57eSJes Sorensen         ret = -EINVAL;
4539b80ddf3Saliguori         goto fail;
4546d85a57eSJes Sorensen     }
4559b80ddf3Saliguori 
456585f8587Sbellard     /* read the backing file name */
457585f8587Sbellard     if (header.backing_file_offset != 0) {
458585f8587Sbellard         len = header.backing_file_size;
4596d85a57eSJes Sorensen         if (len > 1023) {
460585f8587Sbellard             len = 1023;
4616d85a57eSJes Sorensen         }
4626d85a57eSJes Sorensen         ret = bdrv_pread(bs->file, header.backing_file_offset,
4636d85a57eSJes Sorensen                          bs->backing_file, len);
4646d85a57eSJes Sorensen         if (ret < 0) {
465585f8587Sbellard             goto fail;
4666d85a57eSJes Sorensen         }
467585f8587Sbellard         bs->backing_file[len] = '\0';
468585f8587Sbellard     }
46942deb29fSKevin Wolf 
47042deb29fSKevin Wolf     ret = qcow2_read_snapshots(bs);
47142deb29fSKevin Wolf     if (ret < 0) {
472585f8587Sbellard         goto fail;
4736d85a57eSJes Sorensen     }
474585f8587Sbellard 
475af7b708dSStefan Hajnoczi     /* Clear unknown autoclear feature bits */
476af7b708dSStefan Hajnoczi     if (!bs->read_only && s->autoclear_features != 0) {
477af7b708dSStefan Hajnoczi         s->autoclear_features = 0;
478af7b708dSStefan Hajnoczi         ret = qcow2_update_header(bs);
479af7b708dSStefan Hajnoczi         if (ret < 0) {
480af7b708dSStefan Hajnoczi             goto fail;
481af7b708dSStefan Hajnoczi         }
482af7b708dSStefan Hajnoczi     }
483af7b708dSStefan Hajnoczi 
48468d100e9SKevin Wolf     /* Initialise locks */
48568d100e9SKevin Wolf     qemu_co_mutex_init(&s->lock);
48668d100e9SKevin Wolf 
487c61d0004SStefan Hajnoczi     /* Repair image if dirty */
488058f8f16SStefan Hajnoczi     if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
489058f8f16SStefan Hajnoczi         (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
490c61d0004SStefan Hajnoczi         BdrvCheckResult result = {0};
491c61d0004SStefan Hajnoczi 
492acbe5982SStefan Hajnoczi         ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
493c61d0004SStefan Hajnoczi         if (ret < 0) {
494c61d0004SStefan Hajnoczi             goto fail;
495c61d0004SStefan Hajnoczi         }
496c61d0004SStefan Hajnoczi     }
497c61d0004SStefan Hajnoczi 
498585f8587Sbellard #ifdef DEBUG_ALLOC
4996cbc3031SPhilipp Hahn     {
5006cbc3031SPhilipp Hahn         BdrvCheckResult result = {0};
501b35278f7SStefan Hajnoczi         qcow2_check_refcounts(bs, &result, 0);
5026cbc3031SPhilipp Hahn     }
503585f8587Sbellard #endif
5046d85a57eSJes Sorensen     return ret;
505585f8587Sbellard 
506585f8587Sbellard  fail:
5076744cbabSKevin Wolf     g_free(s->unknown_header_fields);
50875bab85cSKevin Wolf     cleanup_unknown_header_ext(bs);
509ed6ccf0fSKevin Wolf     qcow2_free_snapshots(bs);
510ed6ccf0fSKevin Wolf     qcow2_refcount_close(bs);
5117267c094SAnthony Liguori     g_free(s->l1_table);
51229c1a730SKevin Wolf     if (s->l2_table_cache) {
51329c1a730SKevin Wolf         qcow2_cache_destroy(bs, s->l2_table_cache);
51429c1a730SKevin Wolf     }
5157267c094SAnthony Liguori     g_free(s->cluster_cache);
516dea43a65SFrediano Ziglio     qemu_vfree(s->cluster_data);
5176d85a57eSJes Sorensen     return ret;
518585f8587Sbellard }
519585f8587Sbellard 
5207c80ab3fSJes Sorensen static int qcow2_set_key(BlockDriverState *bs, const char *key)
521585f8587Sbellard {
522585f8587Sbellard     BDRVQcowState *s = bs->opaque;
523585f8587Sbellard     uint8_t keybuf[16];
524585f8587Sbellard     int len, i;
525585f8587Sbellard 
526585f8587Sbellard     memset(keybuf, 0, 16);
527585f8587Sbellard     len = strlen(key);
528585f8587Sbellard     if (len > 16)
529585f8587Sbellard         len = 16;
530585f8587Sbellard     /* XXX: we could compress the chars to 7 bits to increase
531585f8587Sbellard        entropy */
532585f8587Sbellard     for(i = 0;i < len;i++) {
533585f8587Sbellard         keybuf[i] = key[i];
534585f8587Sbellard     }
535585f8587Sbellard     s->crypt_method = s->crypt_method_header;
536585f8587Sbellard 
537585f8587Sbellard     if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
538585f8587Sbellard         return -1;
539585f8587Sbellard     if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
540585f8587Sbellard         return -1;
541585f8587Sbellard #if 0
542585f8587Sbellard     /* test */
543585f8587Sbellard     {
544585f8587Sbellard         uint8_t in[16];
545585f8587Sbellard         uint8_t out[16];
546585f8587Sbellard         uint8_t tmp[16];
547585f8587Sbellard         for(i=0;i<16;i++)
548585f8587Sbellard             in[i] = i;
549585f8587Sbellard         AES_encrypt(in, tmp, &s->aes_encrypt_key);
550585f8587Sbellard         AES_decrypt(tmp, out, &s->aes_decrypt_key);
551585f8587Sbellard         for(i = 0; i < 16; i++)
552585f8587Sbellard             printf(" %02x", tmp[i]);
553585f8587Sbellard         printf("\n");
554585f8587Sbellard         for(i = 0; i < 16; i++)
555585f8587Sbellard             printf(" %02x", out[i]);
556585f8587Sbellard         printf("\n");
557585f8587Sbellard     }
558585f8587Sbellard #endif
559585f8587Sbellard     return 0;
560585f8587Sbellard }
561585f8587Sbellard 
56221d82ac9SJeff Cody /* We have nothing to do for QCOW2 reopen, stubs just return
56321d82ac9SJeff Cody  * success */
56421d82ac9SJeff Cody static int qcow2_reopen_prepare(BDRVReopenState *state,
56521d82ac9SJeff Cody                                 BlockReopenQueue *queue, Error **errp)
56621d82ac9SJeff Cody {
56721d82ac9SJeff Cody     return 0;
56821d82ac9SJeff Cody }
56921d82ac9SJeff Cody 
570f8a2e5e3SStefan Hajnoczi static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
571f8a2e5e3SStefan Hajnoczi         int64_t sector_num, int nb_sectors, int *pnum)
572585f8587Sbellard {
573f8a2e5e3SStefan Hajnoczi     BDRVQcowState *s = bs->opaque;
574585f8587Sbellard     uint64_t cluster_offset;
5751c46efaaSKevin Wolf     int ret;
576585f8587Sbellard 
577095a9c58Saliguori     *pnum = nb_sectors;
578f8a2e5e3SStefan Hajnoczi     /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
579f8a2e5e3SStefan Hajnoczi      * can't pass them on today */
580f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_lock(&s->lock);
5811c46efaaSKevin Wolf     ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
582f8a2e5e3SStefan Hajnoczi     qemu_co_mutex_unlock(&s->lock);
5831c46efaaSKevin Wolf     if (ret < 0) {
5841c46efaaSKevin Wolf         *pnum = 0;
5851c46efaaSKevin Wolf     }
586095a9c58Saliguori 
587585f8587Sbellard     return (cluster_offset != 0);
588585f8587Sbellard }
589585f8587Sbellard 
590a9465922Sbellard /* handle reading after the end of the backing file */
591bd28f835SKevin Wolf int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
592bd28f835SKevin Wolf                   int64_t sector_num, int nb_sectors)
593a9465922Sbellard {
594a9465922Sbellard     int n1;
595a9465922Sbellard     if ((sector_num + nb_sectors) <= bs->total_sectors)
596a9465922Sbellard         return nb_sectors;
597a9465922Sbellard     if (sector_num >= bs->total_sectors)
598a9465922Sbellard         n1 = 0;
599a9465922Sbellard     else
600a9465922Sbellard         n1 = bs->total_sectors - sector_num;
601bd28f835SKevin Wolf 
6023d9b4925SMichael Tokarev     qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
603bd28f835SKevin Wolf 
604a9465922Sbellard     return n1;
605a9465922Sbellard }
606a9465922Sbellard 
607a968168cSDong Xu Wang static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
6083fc48d09SFrediano Ziglio                           int remaining_sectors, QEMUIOVector *qiov)
6091490791fSaliguori {
610585f8587Sbellard     BDRVQcowState *s = bs->opaque;
611a9465922Sbellard     int index_in_cluster, n1;
61268d100e9SKevin Wolf     int ret;
613faf575c1SFrediano Ziglio     int cur_nr_sectors; /* number of sectors in current iteration */
614c2bdd990SFrediano Ziglio     uint64_t cluster_offset = 0;
6153fc48d09SFrediano Ziglio     uint64_t bytes_done = 0;
6163fc48d09SFrediano Ziglio     QEMUIOVector hd_qiov;
6173fc48d09SFrediano Ziglio     uint8_t *cluster_data = NULL;
618585f8587Sbellard 
6193fc48d09SFrediano Ziglio     qemu_iovec_init(&hd_qiov, qiov->niov);
6203fc48d09SFrediano Ziglio 
6213fc48d09SFrediano Ziglio     qemu_co_mutex_lock(&s->lock);
6223fc48d09SFrediano Ziglio 
6233fc48d09SFrediano Ziglio     while (remaining_sectors != 0) {
624585f8587Sbellard 
625faf575c1SFrediano Ziglio         /* prepare next request */
6263fc48d09SFrediano Ziglio         cur_nr_sectors = remaining_sectors;
627bd28f835SKevin Wolf         if (s->crypt_method) {
628faf575c1SFrediano Ziglio             cur_nr_sectors = MIN(cur_nr_sectors,
629bd28f835SKevin Wolf                 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
630bd28f835SKevin Wolf         }
631bd28f835SKevin Wolf 
6323fc48d09SFrediano Ziglio         ret = qcow2_get_cluster_offset(bs, sector_num << 9,
633c2bdd990SFrediano Ziglio             &cur_nr_sectors, &cluster_offset);
6341c46efaaSKevin Wolf         if (ret < 0) {
6353fc48d09SFrediano Ziglio             goto fail;
6361c46efaaSKevin Wolf         }
6371c46efaaSKevin Wolf 
6383fc48d09SFrediano Ziglio         index_in_cluster = sector_num & (s->cluster_sectors - 1);
639585f8587Sbellard 
6403fc48d09SFrediano Ziglio         qemu_iovec_reset(&hd_qiov);
6411b093c48SMichael Tokarev         qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
642faf575c1SFrediano Ziglio             cur_nr_sectors * 512);
643bd28f835SKevin Wolf 
64468d000a3SKevin Wolf         switch (ret) {
64568d000a3SKevin Wolf         case QCOW2_CLUSTER_UNALLOCATED:
646bd28f835SKevin Wolf 
647585f8587Sbellard             if (bs->backing_hd) {
648585f8587Sbellard                 /* read from the base image */
6493fc48d09SFrediano Ziglio                 n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
6503fc48d09SFrediano Ziglio                     sector_num, cur_nr_sectors);
651a9465922Sbellard                 if (n1 > 0) {
65266f82ceeSKevin Wolf                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
65368d100e9SKevin Wolf                     qemu_co_mutex_unlock(&s->lock);
6543fc48d09SFrediano Ziglio                     ret = bdrv_co_readv(bs->backing_hd, sector_num,
6553fc48d09SFrediano Ziglio                                         n1, &hd_qiov);
65668d100e9SKevin Wolf                     qemu_co_mutex_lock(&s->lock);
65768d100e9SKevin Wolf                     if (ret < 0) {
6583fc48d09SFrediano Ziglio                         goto fail;
6593ab4c7e9SKevin Wolf                     }
6601490791fSaliguori                 }
661a9465922Sbellard             } else {
662585f8587Sbellard                 /* Note: in this case, no need to wait */
6633d9b4925SMichael Tokarev                 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
6641490791fSaliguori             }
66568d000a3SKevin Wolf             break;
66668d000a3SKevin Wolf 
6676377af48SKevin Wolf         case QCOW2_CLUSTER_ZERO:
6686377af48SKevin Wolf             if (s->qcow_version < 3) {
6696377af48SKevin Wolf                 ret = -EIO;
6706377af48SKevin Wolf                 goto fail;
6716377af48SKevin Wolf             }
6723d9b4925SMichael Tokarev             qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
6736377af48SKevin Wolf             break;
6746377af48SKevin Wolf 
67568d000a3SKevin Wolf         case QCOW2_CLUSTER_COMPRESSED:
676585f8587Sbellard             /* add AIO support for compressed blocks ? */
677c2bdd990SFrediano Ziglio             ret = qcow2_decompress_cluster(bs, cluster_offset);
6788af36488SKevin Wolf             if (ret < 0) {
6793fc48d09SFrediano Ziglio                 goto fail;
6808af36488SKevin Wolf             }
681bd28f835SKevin Wolf 
68203396148SMichael Tokarev             qemu_iovec_from_buf(&hd_qiov, 0,
683bd28f835SKevin Wolf                 s->cluster_cache + index_in_cluster * 512,
684faf575c1SFrediano Ziglio                 512 * cur_nr_sectors);
68568d000a3SKevin Wolf             break;
68668d000a3SKevin Wolf 
68768d000a3SKevin Wolf         case QCOW2_CLUSTER_NORMAL:
688c2bdd990SFrediano Ziglio             if ((cluster_offset & 511) != 0) {
6893fc48d09SFrediano Ziglio                 ret = -EIO;
6903fc48d09SFrediano Ziglio                 goto fail;
691585f8587Sbellard             }
692c87c0672Saliguori 
693bd28f835SKevin Wolf             if (s->crypt_method) {
694bd28f835SKevin Wolf                 /*
695bd28f835SKevin Wolf                  * For encrypted images, read everything into a temporary
696bd28f835SKevin Wolf                  * contiguous buffer on which the AES functions can work.
697bd28f835SKevin Wolf                  */
6983fc48d09SFrediano Ziglio                 if (!cluster_data) {
6993fc48d09SFrediano Ziglio                     cluster_data =
700dea43a65SFrediano Ziglio                         qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
701bd28f835SKevin Wolf                 }
702bd28f835SKevin Wolf 
703faf575c1SFrediano Ziglio                 assert(cur_nr_sectors <=
704bd28f835SKevin Wolf                     QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
7053fc48d09SFrediano Ziglio                 qemu_iovec_reset(&hd_qiov);
7063fc48d09SFrediano Ziglio                 qemu_iovec_add(&hd_qiov, cluster_data,
707faf575c1SFrediano Ziglio                     512 * cur_nr_sectors);
708bd28f835SKevin Wolf             }
709bd28f835SKevin Wolf 
71066f82ceeSKevin Wolf             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
71168d100e9SKevin Wolf             qemu_co_mutex_unlock(&s->lock);
71268d100e9SKevin Wolf             ret = bdrv_co_readv(bs->file,
713c2bdd990SFrediano Ziglio                                 (cluster_offset >> 9) + index_in_cluster,
7143fc48d09SFrediano Ziglio                                 cur_nr_sectors, &hd_qiov);
71568d100e9SKevin Wolf             qemu_co_mutex_lock(&s->lock);
71668d100e9SKevin Wolf             if (ret < 0) {
7173fc48d09SFrediano Ziglio                 goto fail;
718585f8587Sbellard             }
719faf575c1SFrediano Ziglio             if (s->crypt_method) {
7203fc48d09SFrediano Ziglio                 qcow2_encrypt_sectors(s, sector_num,  cluster_data,
7213fc48d09SFrediano Ziglio                     cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
72203396148SMichael Tokarev                 qemu_iovec_from_buf(qiov, bytes_done,
72303396148SMichael Tokarev                     cluster_data, 512 * cur_nr_sectors);
724171e3d6bSKevin Wolf             }
72568d000a3SKevin Wolf             break;
72668d000a3SKevin Wolf 
72768d000a3SKevin Wolf         default:
72868d000a3SKevin Wolf             g_assert_not_reached();
72968d000a3SKevin Wolf             ret = -EIO;
73068d000a3SKevin Wolf             goto fail;
731faf575c1SFrediano Ziglio         }
732faf575c1SFrediano Ziglio 
7333fc48d09SFrediano Ziglio         remaining_sectors -= cur_nr_sectors;
7343fc48d09SFrediano Ziglio         sector_num += cur_nr_sectors;
7353fc48d09SFrediano Ziglio         bytes_done += cur_nr_sectors * 512;
7365ebaa27eSFrediano Ziglio     }
7373fc48d09SFrediano Ziglio     ret = 0;
738f141eafeSaliguori 
7393fc48d09SFrediano Ziglio fail:
74068d100e9SKevin Wolf     qemu_co_mutex_unlock(&s->lock);
74168d100e9SKevin Wolf 
7423fc48d09SFrediano Ziglio     qemu_iovec_destroy(&hd_qiov);
743dea43a65SFrediano Ziglio     qemu_vfree(cluster_data);
74468d100e9SKevin Wolf 
74568d100e9SKevin Wolf     return ret;
746585f8587Sbellard }
747585f8587Sbellard 
748a968168cSDong Xu Wang static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
7493fc48d09SFrediano Ziglio                            int64_t sector_num,
7503fc48d09SFrediano Ziglio                            int remaining_sectors,
7513fc48d09SFrediano Ziglio                            QEMUIOVector *qiov)
752585f8587Sbellard {
753585f8587Sbellard     BDRVQcowState *s = bs->opaque;
754585f8587Sbellard     int index_in_cluster;
755095a9c58Saliguori     int n_end;
75668d100e9SKevin Wolf     int ret;
757faf575c1SFrediano Ziglio     int cur_nr_sectors; /* number of sectors in current iteration */
758c2bdd990SFrediano Ziglio     uint64_t cluster_offset;
7593fc48d09SFrediano Ziglio     QEMUIOVector hd_qiov;
7603fc48d09SFrediano Ziglio     uint64_t bytes_done = 0;
7613fc48d09SFrediano Ziglio     uint8_t *cluster_data = NULL;
762*8d2497c3SKevin Wolf     QCowL2Meta *l2meta = NULL;
763c2271403SFrediano Ziglio 
7643cce16f4SKevin Wolf     trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
7653cce16f4SKevin Wolf                                  remaining_sectors);
7663cce16f4SKevin Wolf 
7673fc48d09SFrediano Ziglio     qemu_iovec_init(&hd_qiov, qiov->niov);
768585f8587Sbellard 
7693fc48d09SFrediano Ziglio     s->cluster_cache_offset = -1; /* disable compressed cache */
7703fc48d09SFrediano Ziglio 
7713fc48d09SFrediano Ziglio     qemu_co_mutex_lock(&s->lock);
7723fc48d09SFrediano Ziglio 
7733fc48d09SFrediano Ziglio     while (remaining_sectors != 0) {
7743fc48d09SFrediano Ziglio 
775f50f88b9SKevin Wolf         l2meta = NULL;
776cf5c1a23SKevin Wolf 
7773cce16f4SKevin Wolf         trace_qcow2_writev_start_part(qemu_coroutine_self());
7783fc48d09SFrediano Ziglio         index_in_cluster = sector_num & (s->cluster_sectors - 1);
7793fc48d09SFrediano Ziglio         n_end = index_in_cluster + remaining_sectors;
780095a9c58Saliguori         if (s->crypt_method &&
7815ebaa27eSFrediano Ziglio             n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
782095a9c58Saliguori             n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
7835ebaa27eSFrediano Ziglio         }
784095a9c58Saliguori 
7853fc48d09SFrediano Ziglio         ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
786f50f88b9SKevin Wolf             index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
787148da7eaSKevin Wolf         if (ret < 0) {
7883fc48d09SFrediano Ziglio             goto fail;
789148da7eaSKevin Wolf         }
790148da7eaSKevin Wolf 
791c2bdd990SFrediano Ziglio         assert((cluster_offset & 511) == 0);
792148da7eaSKevin Wolf 
7933fc48d09SFrediano Ziglio         qemu_iovec_reset(&hd_qiov);
7941b093c48SMichael Tokarev         qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
795faf575c1SFrediano Ziglio             cur_nr_sectors * 512);
7966f5f060bSKevin Wolf 
797585f8587Sbellard         if (s->crypt_method) {
7983fc48d09SFrediano Ziglio             if (!cluster_data) {
799dea43a65SFrediano Ziglio                 cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
800095a9c58Saliguori                                                  s->cluster_size);
801585f8587Sbellard             }
8026f5f060bSKevin Wolf 
8033fc48d09SFrediano Ziglio             assert(hd_qiov.size <=
8045ebaa27eSFrediano Ziglio                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
805d5e6b161SMichael Tokarev             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
8066f5f060bSKevin Wolf 
8073fc48d09SFrediano Ziglio             qcow2_encrypt_sectors(s, sector_num, cluster_data,
8083fc48d09SFrediano Ziglio                 cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
8096f5f060bSKevin Wolf 
8103fc48d09SFrediano Ziglio             qemu_iovec_reset(&hd_qiov);
8113fc48d09SFrediano Ziglio             qemu_iovec_add(&hd_qiov, cluster_data,
812faf575c1SFrediano Ziglio                 cur_nr_sectors * 512);
813585f8587Sbellard         }
8146f5f060bSKevin Wolf 
81568d100e9SKevin Wolf         qemu_co_mutex_unlock(&s->lock);
81667a7a0ebSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
8173cce16f4SKevin Wolf         trace_qcow2_writev_data(qemu_coroutine_self(),
8183cce16f4SKevin Wolf                                 (cluster_offset >> 9) + index_in_cluster);
81968d100e9SKevin Wolf         ret = bdrv_co_writev(bs->file,
820c2bdd990SFrediano Ziglio                              (cluster_offset >> 9) + index_in_cluster,
8213fc48d09SFrediano Ziglio                              cur_nr_sectors, &hd_qiov);
82268d100e9SKevin Wolf         qemu_co_mutex_lock(&s->lock);
82368d100e9SKevin Wolf         if (ret < 0) {
8243fc48d09SFrediano Ziglio             goto fail;
825171e3d6bSKevin Wolf         }
826f141eafeSaliguori 
827f50f88b9SKevin Wolf         if (l2meta != NULL) {
828cf5c1a23SKevin Wolf             ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
829faf575c1SFrediano Ziglio             if (ret < 0) {
8303fc48d09SFrediano Ziglio                 goto fail;
831faf575c1SFrediano Ziglio             }
832faf575c1SFrediano Ziglio 
8334e95314eSKevin Wolf             /* Take the request off the list of running requests */
8344e95314eSKevin Wolf             if (l2meta->nb_clusters != 0) {
8354e95314eSKevin Wolf                 QLIST_REMOVE(l2meta, next_in_flight);
8364e95314eSKevin Wolf             }
8374e95314eSKevin Wolf 
8384e95314eSKevin Wolf             qemu_co_mutex_unlock(&s->lock);
8394e95314eSKevin Wolf             qemu_co_queue_restart_all(&l2meta->dependent_requests);
8404e95314eSKevin Wolf             qemu_co_mutex_lock(&s->lock);
8414e95314eSKevin Wolf 
842cf5c1a23SKevin Wolf             g_free(l2meta);
843cf5c1a23SKevin Wolf             l2meta = NULL;
844f50f88b9SKevin Wolf         }
8450fa9131aSKevin Wolf 
8463fc48d09SFrediano Ziglio         remaining_sectors -= cur_nr_sectors;
8473fc48d09SFrediano Ziglio         sector_num += cur_nr_sectors;
8483fc48d09SFrediano Ziglio         bytes_done += cur_nr_sectors * 512;
8493cce16f4SKevin Wolf         trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
8505ebaa27eSFrediano Ziglio     }
8513fc48d09SFrediano Ziglio     ret = 0;
852faf575c1SFrediano Ziglio 
8533fc48d09SFrediano Ziglio fail:
8544e95314eSKevin Wolf     qemu_co_mutex_unlock(&s->lock);
8554e95314eSKevin Wolf 
856cf5c1a23SKevin Wolf     if (l2meta != NULL) {
8574e95314eSKevin Wolf         if (l2meta->nb_clusters != 0) {
8584e95314eSKevin Wolf             QLIST_REMOVE(l2meta, next_in_flight);
8594e95314eSKevin Wolf         }
8604e95314eSKevin Wolf         qemu_co_queue_restart_all(&l2meta->dependent_requests);
861cf5c1a23SKevin Wolf         g_free(l2meta);
862cf5c1a23SKevin Wolf     }
8630fa9131aSKevin Wolf 
8643fc48d09SFrediano Ziglio     qemu_iovec_destroy(&hd_qiov);
865dea43a65SFrediano Ziglio     qemu_vfree(cluster_data);
8663cce16f4SKevin Wolf     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
86742496d62SKevin Wolf 
86868d100e9SKevin Wolf     return ret;
869585f8587Sbellard }
870585f8587Sbellard 
8717c80ab3fSJes Sorensen static void qcow2_close(BlockDriverState *bs)
872585f8587Sbellard {
873585f8587Sbellard     BDRVQcowState *s = bs->opaque;
8747267c094SAnthony Liguori     g_free(s->l1_table);
87529c1a730SKevin Wolf 
87629c1a730SKevin Wolf     qcow2_cache_flush(bs, s->l2_table_cache);
87729c1a730SKevin Wolf     qcow2_cache_flush(bs, s->refcount_block_cache);
87829c1a730SKevin Wolf 
879c61d0004SStefan Hajnoczi     qcow2_mark_clean(bs);
880c61d0004SStefan Hajnoczi 
88129c1a730SKevin Wolf     qcow2_cache_destroy(bs, s->l2_table_cache);
88229c1a730SKevin Wolf     qcow2_cache_destroy(bs, s->refcount_block_cache);
88329c1a730SKevin Wolf 
8846744cbabSKevin Wolf     g_free(s->unknown_header_fields);
88575bab85cSKevin Wolf     cleanup_unknown_header_ext(bs);
8866744cbabSKevin Wolf 
8877267c094SAnthony Liguori     g_free(s->cluster_cache);
888dea43a65SFrediano Ziglio     qemu_vfree(s->cluster_data);
889ed6ccf0fSKevin Wolf     qcow2_refcount_close(bs);
89028c1202bSLi Zhi Hui     qcow2_free_snapshots(bs);
891585f8587Sbellard }
892585f8587Sbellard 
89306d9260fSAnthony Liguori static void qcow2_invalidate_cache(BlockDriverState *bs)
89406d9260fSAnthony Liguori {
89506d9260fSAnthony Liguori     BDRVQcowState *s = bs->opaque;
89606d9260fSAnthony Liguori     int flags = s->flags;
89706d9260fSAnthony Liguori     AES_KEY aes_encrypt_key;
89806d9260fSAnthony Liguori     AES_KEY aes_decrypt_key;
89906d9260fSAnthony Liguori     uint32_t crypt_method = 0;
90006d9260fSAnthony Liguori 
90106d9260fSAnthony Liguori     /*
90206d9260fSAnthony Liguori      * Backing files are read-only which makes all of their metadata immutable,
90306d9260fSAnthony Liguori      * that means we don't have to worry about reopening them here.
90406d9260fSAnthony Liguori      */
90506d9260fSAnthony Liguori 
90606d9260fSAnthony Liguori     if (s->crypt_method) {
90706d9260fSAnthony Liguori         crypt_method = s->crypt_method;
90806d9260fSAnthony Liguori         memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
90906d9260fSAnthony Liguori         memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
91006d9260fSAnthony Liguori     }
91106d9260fSAnthony Liguori 
91206d9260fSAnthony Liguori     qcow2_close(bs);
91306d9260fSAnthony Liguori 
91406d9260fSAnthony Liguori     memset(s, 0, sizeof(BDRVQcowState));
91506d9260fSAnthony Liguori     qcow2_open(bs, flags);
91606d9260fSAnthony Liguori 
91706d9260fSAnthony Liguori     if (crypt_method) {
91806d9260fSAnthony Liguori         s->crypt_method = crypt_method;
91906d9260fSAnthony Liguori         memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
92006d9260fSAnthony Liguori         memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
92106d9260fSAnthony Liguori     }
92206d9260fSAnthony Liguori }
92306d9260fSAnthony Liguori 
924e24e49e6SKevin Wolf static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
925e24e49e6SKevin Wolf     size_t len, size_t buflen)
926756e6736SKevin Wolf {
927e24e49e6SKevin Wolf     QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
928e24e49e6SKevin Wolf     size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
929756e6736SKevin Wolf 
930e24e49e6SKevin Wolf     if (buflen < ext_len) {
931756e6736SKevin Wolf         return -ENOSPC;
932756e6736SKevin Wolf     }
933756e6736SKevin Wolf 
934e24e49e6SKevin Wolf     *ext_backing_fmt = (QCowExtension) {
935e24e49e6SKevin Wolf         .magic  = cpu_to_be32(magic),
936e24e49e6SKevin Wolf         .len    = cpu_to_be32(len),
937e24e49e6SKevin Wolf     };
938e24e49e6SKevin Wolf     memcpy(buf + sizeof(QCowExtension), s, len);
939756e6736SKevin Wolf 
940e24e49e6SKevin Wolf     return ext_len;
941756e6736SKevin Wolf }
942756e6736SKevin Wolf 
943e24e49e6SKevin Wolf /*
944e24e49e6SKevin Wolf  * Updates the qcow2 header, including the variable length parts of it, i.e.
945e24e49e6SKevin Wolf  * the backing file name and all extensions. qcow2 was not designed to allow
946e24e49e6SKevin Wolf  * such changes, so if we run out of space (we can only use the first cluster)
947e24e49e6SKevin Wolf  * this function may fail.
948e24e49e6SKevin Wolf  *
949e24e49e6SKevin Wolf  * Returns 0 on success, -errno in error cases.
950e24e49e6SKevin Wolf  */
951e24e49e6SKevin Wolf int qcow2_update_header(BlockDriverState *bs)
952e24e49e6SKevin Wolf {
953e24e49e6SKevin Wolf     BDRVQcowState *s = bs->opaque;
954e24e49e6SKevin Wolf     QCowHeader *header;
955e24e49e6SKevin Wolf     char *buf;
956e24e49e6SKevin Wolf     size_t buflen = s->cluster_size;
957e24e49e6SKevin Wolf     int ret;
958e24e49e6SKevin Wolf     uint64_t total_size;
959e24e49e6SKevin Wolf     uint32_t refcount_table_clusters;
9606744cbabSKevin Wolf     size_t header_length;
96175bab85cSKevin Wolf     Qcow2UnknownHeaderExtension *uext;
962e24e49e6SKevin Wolf 
963e24e49e6SKevin Wolf     buf = qemu_blockalign(bs, buflen);
964e24e49e6SKevin Wolf 
965e24e49e6SKevin Wolf     /* Header structure */
966e24e49e6SKevin Wolf     header = (QCowHeader*) buf;
967e24e49e6SKevin Wolf 
968e24e49e6SKevin Wolf     if (buflen < sizeof(*header)) {
969e24e49e6SKevin Wolf         ret = -ENOSPC;
970e24e49e6SKevin Wolf         goto fail;
971756e6736SKevin Wolf     }
972756e6736SKevin Wolf 
9736744cbabSKevin Wolf     header_length = sizeof(*header) + s->unknown_header_fields_size;
974e24e49e6SKevin Wolf     total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
975e24e49e6SKevin Wolf     refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
976e24e49e6SKevin Wolf 
977e24e49e6SKevin Wolf     *header = (QCowHeader) {
9786744cbabSKevin Wolf         /* Version 2 fields */
979e24e49e6SKevin Wolf         .magic                  = cpu_to_be32(QCOW_MAGIC),
9806744cbabSKevin Wolf         .version                = cpu_to_be32(s->qcow_version),
981e24e49e6SKevin Wolf         .backing_file_offset    = 0,
982e24e49e6SKevin Wolf         .backing_file_size      = 0,
983e24e49e6SKevin Wolf         .cluster_bits           = cpu_to_be32(s->cluster_bits),
984e24e49e6SKevin Wolf         .size                   = cpu_to_be64(total_size),
985e24e49e6SKevin Wolf         .crypt_method           = cpu_to_be32(s->crypt_method_header),
986e24e49e6SKevin Wolf         .l1_size                = cpu_to_be32(s->l1_size),
987e24e49e6SKevin Wolf         .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
988e24e49e6SKevin Wolf         .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
989e24e49e6SKevin Wolf         .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
990e24e49e6SKevin Wolf         .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
991e24e49e6SKevin Wolf         .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
9926744cbabSKevin Wolf 
9936744cbabSKevin Wolf         /* Version 3 fields */
9946744cbabSKevin Wolf         .incompatible_features  = cpu_to_be64(s->incompatible_features),
9956744cbabSKevin Wolf         .compatible_features    = cpu_to_be64(s->compatible_features),
9966744cbabSKevin Wolf         .autoclear_features     = cpu_to_be64(s->autoclear_features),
9976744cbabSKevin Wolf         .refcount_order         = cpu_to_be32(3 + REFCOUNT_SHIFT),
9986744cbabSKevin Wolf         .header_length          = cpu_to_be32(header_length),
999e24e49e6SKevin Wolf     };
1000e24e49e6SKevin Wolf 
10016744cbabSKevin Wolf     /* For older versions, write a shorter header */
10026744cbabSKevin Wolf     switch (s->qcow_version) {
10036744cbabSKevin Wolf     case 2:
10046744cbabSKevin Wolf         ret = offsetof(QCowHeader, incompatible_features);
10056744cbabSKevin Wolf         break;
10066744cbabSKevin Wolf     case 3:
10076744cbabSKevin Wolf         ret = sizeof(*header);
10086744cbabSKevin Wolf         break;
10096744cbabSKevin Wolf     default:
1010b6c14762SJim Meyering         ret = -EINVAL;
1011b6c14762SJim Meyering         goto fail;
10126744cbabSKevin Wolf     }
10136744cbabSKevin Wolf 
10146744cbabSKevin Wolf     buf += ret;
10156744cbabSKevin Wolf     buflen -= ret;
10166744cbabSKevin Wolf     memset(buf, 0, buflen);
10176744cbabSKevin Wolf 
10186744cbabSKevin Wolf     /* Preserve any unknown field in the header */
10196744cbabSKevin Wolf     if (s->unknown_header_fields_size) {
10206744cbabSKevin Wolf         if (buflen < s->unknown_header_fields_size) {
10216744cbabSKevin Wolf             ret = -ENOSPC;
10226744cbabSKevin Wolf             goto fail;
10236744cbabSKevin Wolf         }
10246744cbabSKevin Wolf 
10256744cbabSKevin Wolf         memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
10266744cbabSKevin Wolf         buf += s->unknown_header_fields_size;
10276744cbabSKevin Wolf         buflen -= s->unknown_header_fields_size;
10286744cbabSKevin Wolf     }
1029e24e49e6SKevin Wolf 
1030e24e49e6SKevin Wolf     /* Backing file format header extension */
1031e24e49e6SKevin Wolf     if (*bs->backing_format) {
1032e24e49e6SKevin Wolf         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
1033e24e49e6SKevin Wolf                              bs->backing_format, strlen(bs->backing_format),
1034e24e49e6SKevin Wolf                              buflen);
1035756e6736SKevin Wolf         if (ret < 0) {
1036756e6736SKevin Wolf             goto fail;
1037756e6736SKevin Wolf         }
1038756e6736SKevin Wolf 
1039e24e49e6SKevin Wolf         buf += ret;
1040e24e49e6SKevin Wolf         buflen -= ret;
1041e24e49e6SKevin Wolf     }
1042756e6736SKevin Wolf 
1043cfcc4c62SKevin Wolf     /* Feature table */
1044cfcc4c62SKevin Wolf     Qcow2Feature features[] = {
1045c61d0004SStefan Hajnoczi         {
1046c61d0004SStefan Hajnoczi             .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
1047c61d0004SStefan Hajnoczi             .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
1048c61d0004SStefan Hajnoczi             .name = "dirty bit",
1049c61d0004SStefan Hajnoczi         },
1050bfe8043eSStefan Hajnoczi         {
1051bfe8043eSStefan Hajnoczi             .type = QCOW2_FEAT_TYPE_COMPATIBLE,
1052bfe8043eSStefan Hajnoczi             .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
1053bfe8043eSStefan Hajnoczi             .name = "lazy refcounts",
1054bfe8043eSStefan Hajnoczi         },
1055cfcc4c62SKevin Wolf     };
1056cfcc4c62SKevin Wolf 
1057cfcc4c62SKevin Wolf     ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
1058cfcc4c62SKevin Wolf                          features, sizeof(features), buflen);
1059cfcc4c62SKevin Wolf     if (ret < 0) {
1060cfcc4c62SKevin Wolf         goto fail;
1061cfcc4c62SKevin Wolf     }
1062cfcc4c62SKevin Wolf     buf += ret;
1063cfcc4c62SKevin Wolf     buflen -= ret;
1064cfcc4c62SKevin Wolf 
106575bab85cSKevin Wolf     /* Keep unknown header extensions */
106675bab85cSKevin Wolf     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
106775bab85cSKevin Wolf         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
106875bab85cSKevin Wolf         if (ret < 0) {
106975bab85cSKevin Wolf             goto fail;
107075bab85cSKevin Wolf         }
107175bab85cSKevin Wolf 
107275bab85cSKevin Wolf         buf += ret;
107375bab85cSKevin Wolf         buflen -= ret;
107475bab85cSKevin Wolf     }
107575bab85cSKevin Wolf 
1076e24e49e6SKevin Wolf     /* End of header extensions */
1077e24e49e6SKevin Wolf     ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
1078756e6736SKevin Wolf     if (ret < 0) {
1079756e6736SKevin Wolf         goto fail;
1080756e6736SKevin Wolf     }
1081756e6736SKevin Wolf 
1082e24e49e6SKevin Wolf     buf += ret;
1083e24e49e6SKevin Wolf     buflen -= ret;
1084e24e49e6SKevin Wolf 
1085e24e49e6SKevin Wolf     /* Backing file name */
1086e24e49e6SKevin Wolf     if (*bs->backing_file) {
1087e24e49e6SKevin Wolf         size_t backing_file_len = strlen(bs->backing_file);
1088e24e49e6SKevin Wolf 
1089e24e49e6SKevin Wolf         if (buflen < backing_file_len) {
1090e24e49e6SKevin Wolf             ret = -ENOSPC;
1091e24e49e6SKevin Wolf             goto fail;
1092e24e49e6SKevin Wolf         }
1093e24e49e6SKevin Wolf 
109400ea1881SJim Meyering         /* Using strncpy is ok here, since buf is not NUL-terminated. */
1095e24e49e6SKevin Wolf         strncpy(buf, bs->backing_file, buflen);
1096e24e49e6SKevin Wolf 
1097e24e49e6SKevin Wolf         header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
1098e24e49e6SKevin Wolf         header->backing_file_size   = cpu_to_be32(backing_file_len);
1099e24e49e6SKevin Wolf     }
1100e24e49e6SKevin Wolf 
1101e24e49e6SKevin Wolf     /* Write the new header */
1102e24e49e6SKevin Wolf     ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
1103756e6736SKevin Wolf     if (ret < 0) {
1104756e6736SKevin Wolf         goto fail;
1105756e6736SKevin Wolf     }
1106756e6736SKevin Wolf 
1107756e6736SKevin Wolf     ret = 0;
1108756e6736SKevin Wolf fail:
1109e24e49e6SKevin Wolf     qemu_vfree(header);
1110756e6736SKevin Wolf     return ret;
1111756e6736SKevin Wolf }
1112756e6736SKevin Wolf 
1113756e6736SKevin Wolf static int qcow2_change_backing_file(BlockDriverState *bs,
1114756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
1115756e6736SKevin Wolf {
1116e24e49e6SKevin Wolf     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1117e24e49e6SKevin Wolf     pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1118e24e49e6SKevin Wolf 
1119e24e49e6SKevin Wolf     return qcow2_update_header(bs);
1120756e6736SKevin Wolf }
1121756e6736SKevin Wolf 
1122a35e1c17SKevin Wolf static int preallocate(BlockDriverState *bs)
1123a35e1c17SKevin Wolf {
1124a35e1c17SKevin Wolf     uint64_t nb_sectors;
1125a35e1c17SKevin Wolf     uint64_t offset;
1126060bee89SKevin Wolf     uint64_t host_offset = 0;
1127a35e1c17SKevin Wolf     int num;
1128148da7eaSKevin Wolf     int ret;
1129f50f88b9SKevin Wolf     QCowL2Meta *meta;
1130a35e1c17SKevin Wolf 
1131a35e1c17SKevin Wolf     nb_sectors = bdrv_getlength(bs) >> 9;
1132a35e1c17SKevin Wolf     offset = 0;
1133a35e1c17SKevin Wolf 
1134a35e1c17SKevin Wolf     while (nb_sectors) {
1135a35e1c17SKevin Wolf         num = MIN(nb_sectors, INT_MAX >> 9);
1136060bee89SKevin Wolf         ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
1137060bee89SKevin Wolf                                          &host_offset, &meta);
1138148da7eaSKevin Wolf         if (ret < 0) {
113919dbcbf7SKevin Wolf             return ret;
1140a35e1c17SKevin Wolf         }
1141a35e1c17SKevin Wolf 
1142f50f88b9SKevin Wolf         ret = qcow2_alloc_cluster_link_l2(bs, meta);
114319dbcbf7SKevin Wolf         if (ret < 0) {
1144f50f88b9SKevin Wolf             qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
114519dbcbf7SKevin Wolf             return ret;
1146a35e1c17SKevin Wolf         }
1147a35e1c17SKevin Wolf 
1148f214978aSKevin Wolf         /* There are no dependent requests, but we need to remove our request
1149f214978aSKevin Wolf          * from the list of in-flight requests */
1150f50f88b9SKevin Wolf         if (meta != NULL) {
11514e95314eSKevin Wolf             QLIST_REMOVE(meta, next_in_flight);
1152f50f88b9SKevin Wolf         }
1153f214978aSKevin Wolf 
1154a35e1c17SKevin Wolf         /* TODO Preallocate data if requested */
1155a35e1c17SKevin Wolf 
1156a35e1c17SKevin Wolf         nb_sectors -= num;
1157a35e1c17SKevin Wolf         offset += num << 9;
1158a35e1c17SKevin Wolf     }
1159a35e1c17SKevin Wolf 
1160a35e1c17SKevin Wolf     /*
1161a35e1c17SKevin Wolf      * It is expected that the image file is large enough to actually contain
1162a35e1c17SKevin Wolf      * all of the allocated clusters (otherwise we get failing reads after
1163a35e1c17SKevin Wolf      * EOF). Extend the image to the last allocated sector.
1164a35e1c17SKevin Wolf      */
1165060bee89SKevin Wolf     if (host_offset != 0) {
1166ea80b906SKevin Wolf         uint8_t buf[512];
1167ea80b906SKevin Wolf         memset(buf, 0, 512);
1168060bee89SKevin Wolf         ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
116919dbcbf7SKevin Wolf         if (ret < 0) {
117019dbcbf7SKevin Wolf             return ret;
117119dbcbf7SKevin Wolf         }
1172a35e1c17SKevin Wolf     }
1173a35e1c17SKevin Wolf 
1174a35e1c17SKevin Wolf     return 0;
1175a35e1c17SKevin Wolf }
1176a35e1c17SKevin Wolf 
11777c80ab3fSJes Sorensen static int qcow2_create2(const char *filename, int64_t total_size,
1178a9420734SKevin Wolf                          const char *backing_file, const char *backing_format,
1179a9420734SKevin Wolf                          int flags, size_t cluster_size, int prealloc,
11806744cbabSKevin Wolf                          QEMUOptionParameter *options, int version)
1181a9420734SKevin Wolf {
11829b2260cbSDong Xu Wang     /* Calculate cluster_bits */
1183a9420734SKevin Wolf     int cluster_bits;
1184a9420734SKevin Wolf     cluster_bits = ffs(cluster_size) - 1;
1185a9420734SKevin Wolf     if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
1186a9420734SKevin Wolf         (1 << cluster_bits) != cluster_size)
1187a9420734SKevin Wolf     {
1188a9420734SKevin Wolf         error_report(
11896daf194dSMarkus Armbruster             "Cluster size must be a power of two between %d and %dk",
1190a9420734SKevin Wolf             1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
1191a9420734SKevin Wolf         return -EINVAL;
1192a9420734SKevin Wolf     }
1193a9420734SKevin Wolf 
1194a9420734SKevin Wolf     /*
1195a9420734SKevin Wolf      * Open the image file and write a minimal qcow2 header.
1196a9420734SKevin Wolf      *
1197a9420734SKevin Wolf      * We keep things simple and start with a zero-sized image. We also
1198a9420734SKevin Wolf      * do without refcount blocks or a L1 table for now. We'll fix the
1199a9420734SKevin Wolf      * inconsistency later.
1200a9420734SKevin Wolf      *
1201a9420734SKevin Wolf      * We do need a refcount table because growing the refcount table means
1202a9420734SKevin Wolf      * allocating two new refcount blocks - the seconds of which would be at
1203a9420734SKevin Wolf      * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
1204a9420734SKevin Wolf      * size for any qcow2 image.
1205a9420734SKevin Wolf      */
1206a9420734SKevin Wolf     BlockDriverState* bs;
1207a9420734SKevin Wolf     QCowHeader header;
1208a9420734SKevin Wolf     uint8_t* refcount_table;
1209a9420734SKevin Wolf     int ret;
1210a9420734SKevin Wolf 
1211a9420734SKevin Wolf     ret = bdrv_create_file(filename, options);
1212a9420734SKevin Wolf     if (ret < 0) {
1213a9420734SKevin Wolf         return ret;
1214a9420734SKevin Wolf     }
1215a9420734SKevin Wolf 
1216a9420734SKevin Wolf     ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
1217a9420734SKevin Wolf     if (ret < 0) {
1218a9420734SKevin Wolf         return ret;
1219a9420734SKevin Wolf     }
1220a9420734SKevin Wolf 
1221a9420734SKevin Wolf     /* Write the header */
1222a9420734SKevin Wolf     memset(&header, 0, sizeof(header));
1223a9420734SKevin Wolf     header.magic = cpu_to_be32(QCOW_MAGIC);
12246744cbabSKevin Wolf     header.version = cpu_to_be32(version);
1225a9420734SKevin Wolf     header.cluster_bits = cpu_to_be32(cluster_bits);
1226a9420734SKevin Wolf     header.size = cpu_to_be64(0);
1227a9420734SKevin Wolf     header.l1_table_offset = cpu_to_be64(0);
1228a9420734SKevin Wolf     header.l1_size = cpu_to_be32(0);
1229a9420734SKevin Wolf     header.refcount_table_offset = cpu_to_be64(cluster_size);
1230a9420734SKevin Wolf     header.refcount_table_clusters = cpu_to_be32(1);
12316744cbabSKevin Wolf     header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
12326744cbabSKevin Wolf     header.header_length = cpu_to_be32(sizeof(header));
1233a9420734SKevin Wolf 
1234a9420734SKevin Wolf     if (flags & BLOCK_FLAG_ENCRYPT) {
1235a9420734SKevin Wolf         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
1236a9420734SKevin Wolf     } else {
1237a9420734SKevin Wolf         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1238a9420734SKevin Wolf     }
1239a9420734SKevin Wolf 
1240bfe8043eSStefan Hajnoczi     if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
1241bfe8043eSStefan Hajnoczi         header.compatible_features |=
1242bfe8043eSStefan Hajnoczi             cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
1243bfe8043eSStefan Hajnoczi     }
1244bfe8043eSStefan Hajnoczi 
1245a9420734SKevin Wolf     ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
1246a9420734SKevin Wolf     if (ret < 0) {
1247a9420734SKevin Wolf         goto out;
1248a9420734SKevin Wolf     }
1249a9420734SKevin Wolf 
1250a9420734SKevin Wolf     /* Write an empty refcount table */
12517267c094SAnthony Liguori     refcount_table = g_malloc0(cluster_size);
1252a9420734SKevin Wolf     ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
12537267c094SAnthony Liguori     g_free(refcount_table);
1254a9420734SKevin Wolf 
1255a9420734SKevin Wolf     if (ret < 0) {
1256a9420734SKevin Wolf         goto out;
1257a9420734SKevin Wolf     }
1258a9420734SKevin Wolf 
1259a9420734SKevin Wolf     bdrv_close(bs);
1260a9420734SKevin Wolf 
1261a9420734SKevin Wolf     /*
1262a9420734SKevin Wolf      * And now open the image and make it consistent first (i.e. increase the
1263a9420734SKevin Wolf      * refcount of the cluster that is occupied by the header and the refcount
1264a9420734SKevin Wolf      * table)
1265a9420734SKevin Wolf      */
1266a9420734SKevin Wolf     BlockDriver* drv = bdrv_find_format("qcow2");
1267a9420734SKevin Wolf     assert(drv != NULL);
1268e1a7107fSKevin Wolf     ret = bdrv_open(bs, filename,
1269e1a7107fSKevin Wolf         BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
1270a9420734SKevin Wolf     if (ret < 0) {
1271a9420734SKevin Wolf         goto out;
1272a9420734SKevin Wolf     }
1273a9420734SKevin Wolf 
1274a9420734SKevin Wolf     ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
1275a9420734SKevin Wolf     if (ret < 0) {
1276a9420734SKevin Wolf         goto out;
1277a9420734SKevin Wolf 
1278a9420734SKevin Wolf     } else if (ret != 0) {
1279a9420734SKevin Wolf         error_report("Huh, first cluster in empty image is already in use?");
1280a9420734SKevin Wolf         abort();
1281a9420734SKevin Wolf     }
1282a9420734SKevin Wolf 
1283a9420734SKevin Wolf     /* Okay, now that we have a valid image, let's give it the right size */
1284a9420734SKevin Wolf     ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
1285a9420734SKevin Wolf     if (ret < 0) {
1286a9420734SKevin Wolf         goto out;
1287a9420734SKevin Wolf     }
1288a9420734SKevin Wolf 
1289a9420734SKevin Wolf     /* Want a backing file? There you go.*/
1290a9420734SKevin Wolf     if (backing_file) {
1291a9420734SKevin Wolf         ret = bdrv_change_backing_file(bs, backing_file, backing_format);
1292a9420734SKevin Wolf         if (ret < 0) {
1293a9420734SKevin Wolf             goto out;
1294a9420734SKevin Wolf         }
1295a9420734SKevin Wolf     }
1296a9420734SKevin Wolf 
1297a9420734SKevin Wolf     /* And if we're supposed to preallocate metadata, do that now */
1298a9420734SKevin Wolf     if (prealloc) {
129915552c4aSZhi Yong Wu         BDRVQcowState *s = bs->opaque;
130015552c4aSZhi Yong Wu         qemu_co_mutex_lock(&s->lock);
1301a9420734SKevin Wolf         ret = preallocate(bs);
130215552c4aSZhi Yong Wu         qemu_co_mutex_unlock(&s->lock);
1303a9420734SKevin Wolf         if (ret < 0) {
1304a9420734SKevin Wolf             goto out;
1305a9420734SKevin Wolf         }
1306a9420734SKevin Wolf     }
1307a9420734SKevin Wolf 
1308a9420734SKevin Wolf     ret = 0;
1309a9420734SKevin Wolf out:
1310a9420734SKevin Wolf     bdrv_delete(bs);
1311a9420734SKevin Wolf     return ret;
1312a9420734SKevin Wolf }
1313de5f3f40SKevin Wolf 
13147c80ab3fSJes Sorensen static int qcow2_create(const char *filename, QEMUOptionParameter *options)
1315de5f3f40SKevin Wolf {
1316de5f3f40SKevin Wolf     const char *backing_file = NULL;
1317de5f3f40SKevin Wolf     const char *backing_fmt = NULL;
1318de5f3f40SKevin Wolf     uint64_t sectors = 0;
1319de5f3f40SKevin Wolf     int flags = 0;
132099cce9faSKevin Wolf     size_t cluster_size = DEFAULT_CLUSTER_SIZE;
1321de5f3f40SKevin Wolf     int prealloc = 0;
13226744cbabSKevin Wolf     int version = 2;
1323de5f3f40SKevin Wolf 
1324de5f3f40SKevin Wolf     /* Read out options */
1325de5f3f40SKevin Wolf     while (options && options->name) {
1326de5f3f40SKevin Wolf         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1327de5f3f40SKevin Wolf             sectors = options->value.n / 512;
1328de5f3f40SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1329de5f3f40SKevin Wolf             backing_file = options->value.s;
1330de5f3f40SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
1331de5f3f40SKevin Wolf             backing_fmt = options->value.s;
1332de5f3f40SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
1333de5f3f40SKevin Wolf             flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
1334de5f3f40SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
1335de5f3f40SKevin Wolf             if (options->value.n) {
1336de5f3f40SKevin Wolf                 cluster_size = options->value.n;
1337de5f3f40SKevin Wolf             }
1338de5f3f40SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
1339de5f3f40SKevin Wolf             if (!options->value.s || !strcmp(options->value.s, "off")) {
1340de5f3f40SKevin Wolf                 prealloc = 0;
1341de5f3f40SKevin Wolf             } else if (!strcmp(options->value.s, "metadata")) {
1342de5f3f40SKevin Wolf                 prealloc = 1;
1343de5f3f40SKevin Wolf             } else {
1344de5f3f40SKevin Wolf                 fprintf(stderr, "Invalid preallocation mode: '%s'\n",
1345de5f3f40SKevin Wolf                     options->value.s);
1346de5f3f40SKevin Wolf                 return -EINVAL;
1347de5f3f40SKevin Wolf             }
13486744cbabSKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
13496744cbabSKevin Wolf             if (!options->value.s || !strcmp(options->value.s, "0.10")) {
13506744cbabSKevin Wolf                 version = 2;
13516744cbabSKevin Wolf             } else if (!strcmp(options->value.s, "1.1")) {
13526744cbabSKevin Wolf                 version = 3;
13536744cbabSKevin Wolf             } else {
13546744cbabSKevin Wolf                 fprintf(stderr, "Invalid compatibility level: '%s'\n",
13556744cbabSKevin Wolf                     options->value.s);
13566744cbabSKevin Wolf                 return -EINVAL;
13576744cbabSKevin Wolf             }
1358bfe8043eSStefan Hajnoczi         } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
1359bfe8043eSStefan Hajnoczi             flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
1360de5f3f40SKevin Wolf         }
1361de5f3f40SKevin Wolf         options++;
1362de5f3f40SKevin Wolf     }
1363de5f3f40SKevin Wolf 
1364de5f3f40SKevin Wolf     if (backing_file && prealloc) {
1365de5f3f40SKevin Wolf         fprintf(stderr, "Backing file and preallocation cannot be used at "
1366de5f3f40SKevin Wolf             "the same time\n");
1367de5f3f40SKevin Wolf         return -EINVAL;
1368de5f3f40SKevin Wolf     }
1369de5f3f40SKevin Wolf 
1370bfe8043eSStefan Hajnoczi     if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
1371bfe8043eSStefan Hajnoczi         fprintf(stderr, "Lazy refcounts only supported with compatibility "
1372bfe8043eSStefan Hajnoczi                 "level 1.1 and above (use compat=1.1 or greater)\n");
1373bfe8043eSStefan Hajnoczi         return -EINVAL;
1374bfe8043eSStefan Hajnoczi     }
1375bfe8043eSStefan Hajnoczi 
13767c80ab3fSJes Sorensen     return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
13776744cbabSKevin Wolf                          cluster_size, prealloc, options, version);
1378de5f3f40SKevin Wolf }
1379de5f3f40SKevin Wolf 
13807c80ab3fSJes Sorensen static int qcow2_make_empty(BlockDriverState *bs)
138120d97356SBlue Swirl {
138220d97356SBlue Swirl #if 0
138320d97356SBlue Swirl     /* XXX: not correct */
138420d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
138520d97356SBlue Swirl     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
138620d97356SBlue Swirl     int ret;
138720d97356SBlue Swirl 
138820d97356SBlue Swirl     memset(s->l1_table, 0, l1_length);
138966f82ceeSKevin Wolf     if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
139020d97356SBlue Swirl         return -1;
139166f82ceeSKevin Wolf     ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
139220d97356SBlue Swirl     if (ret < 0)
139320d97356SBlue Swirl         return ret;
139420d97356SBlue Swirl 
139520d97356SBlue Swirl     l2_cache_reset(bs);
139620d97356SBlue Swirl #endif
139720d97356SBlue Swirl     return 0;
139820d97356SBlue Swirl }
139920d97356SBlue Swirl 
1400621f0589SKevin Wolf static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
1401621f0589SKevin Wolf     int64_t sector_num, int nb_sectors)
1402621f0589SKevin Wolf {
1403621f0589SKevin Wolf     int ret;
1404621f0589SKevin Wolf     BDRVQcowState *s = bs->opaque;
1405621f0589SKevin Wolf 
1406621f0589SKevin Wolf     /* Emulate misaligned zero writes */
1407621f0589SKevin Wolf     if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
1408621f0589SKevin Wolf         return -ENOTSUP;
1409621f0589SKevin Wolf     }
1410621f0589SKevin Wolf 
1411621f0589SKevin Wolf     /* Whatever is left can use real zero clusters */
1412621f0589SKevin Wolf     qemu_co_mutex_lock(&s->lock);
1413621f0589SKevin Wolf     ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
1414621f0589SKevin Wolf         nb_sectors);
1415621f0589SKevin Wolf     qemu_co_mutex_unlock(&s->lock);
1416621f0589SKevin Wolf 
1417621f0589SKevin Wolf     return ret;
1418621f0589SKevin Wolf }
1419621f0589SKevin Wolf 
14206db39ae2SPaolo Bonzini static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
14216db39ae2SPaolo Bonzini     int64_t sector_num, int nb_sectors)
14225ea929e3SKevin Wolf {
14236db39ae2SPaolo Bonzini     int ret;
14246db39ae2SPaolo Bonzini     BDRVQcowState *s = bs->opaque;
14256db39ae2SPaolo Bonzini 
14266db39ae2SPaolo Bonzini     qemu_co_mutex_lock(&s->lock);
14276db39ae2SPaolo Bonzini     ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
14285ea929e3SKevin Wolf         nb_sectors);
14296db39ae2SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
14306db39ae2SPaolo Bonzini     return ret;
14315ea929e3SKevin Wolf }
14325ea929e3SKevin Wolf 
1433419b19d9SStefan Hajnoczi static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
1434419b19d9SStefan Hajnoczi {
1435419b19d9SStefan Hajnoczi     BDRVQcowState *s = bs->opaque;
1436419b19d9SStefan Hajnoczi     int ret, new_l1_size;
1437419b19d9SStefan Hajnoczi 
1438419b19d9SStefan Hajnoczi     if (offset & 511) {
1439259b2173SKevin Wolf         error_report("The new size must be a multiple of 512");
1440419b19d9SStefan Hajnoczi         return -EINVAL;
1441419b19d9SStefan Hajnoczi     }
1442419b19d9SStefan Hajnoczi 
1443419b19d9SStefan Hajnoczi     /* cannot proceed if image has snapshots */
1444419b19d9SStefan Hajnoczi     if (s->nb_snapshots) {
1445259b2173SKevin Wolf         error_report("Can't resize an image which has snapshots");
1446419b19d9SStefan Hajnoczi         return -ENOTSUP;
1447419b19d9SStefan Hajnoczi     }
1448419b19d9SStefan Hajnoczi 
1449419b19d9SStefan Hajnoczi     /* shrinking is currently not supported */
1450419b19d9SStefan Hajnoczi     if (offset < bs->total_sectors * 512) {
1451259b2173SKevin Wolf         error_report("qcow2 doesn't support shrinking images yet");
1452419b19d9SStefan Hajnoczi         return -ENOTSUP;
1453419b19d9SStefan Hajnoczi     }
1454419b19d9SStefan Hajnoczi 
1455419b19d9SStefan Hajnoczi     new_l1_size = size_to_l1(s, offset);
145672893756SStefan Hajnoczi     ret = qcow2_grow_l1_table(bs, new_l1_size, true);
1457419b19d9SStefan Hajnoczi     if (ret < 0) {
1458419b19d9SStefan Hajnoczi         return ret;
1459419b19d9SStefan Hajnoczi     }
1460419b19d9SStefan Hajnoczi 
1461419b19d9SStefan Hajnoczi     /* write updated header.size */
1462419b19d9SStefan Hajnoczi     offset = cpu_to_be64(offset);
14638b3b7206SKevin Wolf     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
1464419b19d9SStefan Hajnoczi                            &offset, sizeof(uint64_t));
1465419b19d9SStefan Hajnoczi     if (ret < 0) {
1466419b19d9SStefan Hajnoczi         return ret;
1467419b19d9SStefan Hajnoczi     }
1468419b19d9SStefan Hajnoczi 
1469419b19d9SStefan Hajnoczi     s->l1_vm_state_index = new_l1_size;
1470419b19d9SStefan Hajnoczi     return 0;
1471419b19d9SStefan Hajnoczi }
1472419b19d9SStefan Hajnoczi 
147320d97356SBlue Swirl /* XXX: put compressed sectors first, then all the cluster aligned
147420d97356SBlue Swirl    tables to avoid losing bytes in alignment */
14757c80ab3fSJes Sorensen static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
147620d97356SBlue Swirl                                   const uint8_t *buf, int nb_sectors)
147720d97356SBlue Swirl {
147820d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
147920d97356SBlue Swirl     z_stream strm;
148020d97356SBlue Swirl     int ret, out_len;
148120d97356SBlue Swirl     uint8_t *out_buf;
148220d97356SBlue Swirl     uint64_t cluster_offset;
148320d97356SBlue Swirl 
148420d97356SBlue Swirl     if (nb_sectors == 0) {
148520d97356SBlue Swirl         /* align end of file to a sector boundary to ease reading with
148620d97356SBlue Swirl            sector based I/Os */
148766f82ceeSKevin Wolf         cluster_offset = bdrv_getlength(bs->file);
148820d97356SBlue Swirl         cluster_offset = (cluster_offset + 511) & ~511;
148966f82ceeSKevin Wolf         bdrv_truncate(bs->file, cluster_offset);
149020d97356SBlue Swirl         return 0;
149120d97356SBlue Swirl     }
149220d97356SBlue Swirl 
149320d97356SBlue Swirl     if (nb_sectors != s->cluster_sectors)
149420d97356SBlue Swirl         return -EINVAL;
149520d97356SBlue Swirl 
14967267c094SAnthony Liguori     out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
149720d97356SBlue Swirl 
149820d97356SBlue Swirl     /* best compression, small window, no zlib header */
149920d97356SBlue Swirl     memset(&strm, 0, sizeof(strm));
150020d97356SBlue Swirl     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
150120d97356SBlue Swirl                        Z_DEFLATED, -12,
150220d97356SBlue Swirl                        9, Z_DEFAULT_STRATEGY);
150320d97356SBlue Swirl     if (ret != 0) {
15048f1efd00SKevin Wolf         ret = -EINVAL;
15058f1efd00SKevin Wolf         goto fail;
150620d97356SBlue Swirl     }
150720d97356SBlue Swirl 
150820d97356SBlue Swirl     strm.avail_in = s->cluster_size;
150920d97356SBlue Swirl     strm.next_in = (uint8_t *)buf;
151020d97356SBlue Swirl     strm.avail_out = s->cluster_size;
151120d97356SBlue Swirl     strm.next_out = out_buf;
151220d97356SBlue Swirl 
151320d97356SBlue Swirl     ret = deflate(&strm, Z_FINISH);
151420d97356SBlue Swirl     if (ret != Z_STREAM_END && ret != Z_OK) {
151520d97356SBlue Swirl         deflateEnd(&strm);
15168f1efd00SKevin Wolf         ret = -EINVAL;
15178f1efd00SKevin Wolf         goto fail;
151820d97356SBlue Swirl     }
151920d97356SBlue Swirl     out_len = strm.next_out - out_buf;
152020d97356SBlue Swirl 
152120d97356SBlue Swirl     deflateEnd(&strm);
152220d97356SBlue Swirl 
152320d97356SBlue Swirl     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
152420d97356SBlue Swirl         /* could not compress: write normal cluster */
15258f1efd00SKevin Wolf         ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
15268f1efd00SKevin Wolf         if (ret < 0) {
15278f1efd00SKevin Wolf             goto fail;
15288f1efd00SKevin Wolf         }
152920d97356SBlue Swirl     } else {
153020d97356SBlue Swirl         cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
153120d97356SBlue Swirl             sector_num << 9, out_len);
15328f1efd00SKevin Wolf         if (!cluster_offset) {
15338f1efd00SKevin Wolf             ret = -EIO;
15348f1efd00SKevin Wolf             goto fail;
15358f1efd00SKevin Wolf         }
153620d97356SBlue Swirl         cluster_offset &= s->cluster_offset_mask;
153766f82ceeSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
15388f1efd00SKevin Wolf         ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
15398f1efd00SKevin Wolf         if (ret < 0) {
15408f1efd00SKevin Wolf             goto fail;
154120d97356SBlue Swirl         }
154220d97356SBlue Swirl     }
154320d97356SBlue Swirl 
15448f1efd00SKevin Wolf     ret = 0;
15458f1efd00SKevin Wolf fail:
15467267c094SAnthony Liguori     g_free(out_buf);
15478f1efd00SKevin Wolf     return ret;
154820d97356SBlue Swirl }
154920d97356SBlue Swirl 
1550a968168cSDong Xu Wang static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
155120d97356SBlue Swirl {
155229c1a730SKevin Wolf     BDRVQcowState *s = bs->opaque;
155329c1a730SKevin Wolf     int ret;
155429c1a730SKevin Wolf 
15558b94ff85SPaolo Bonzini     qemu_co_mutex_lock(&s->lock);
155629c1a730SKevin Wolf     ret = qcow2_cache_flush(bs, s->l2_table_cache);
155729c1a730SKevin Wolf     if (ret < 0) {
1558c95de7e2SDong Xu Wang         qemu_co_mutex_unlock(&s->lock);
15598b94ff85SPaolo Bonzini         return ret;
156029c1a730SKevin Wolf     }
156129c1a730SKevin Wolf 
1562bfe8043eSStefan Hajnoczi     if (qcow2_need_accurate_refcounts(s)) {
156329c1a730SKevin Wolf         ret = qcow2_cache_flush(bs, s->refcount_block_cache);
156429c1a730SKevin Wolf         if (ret < 0) {
1565c95de7e2SDong Xu Wang             qemu_co_mutex_unlock(&s->lock);
15668b94ff85SPaolo Bonzini             return ret;
156729c1a730SKevin Wolf         }
1568bfe8043eSStefan Hajnoczi     }
15698b94ff85SPaolo Bonzini     qemu_co_mutex_unlock(&s->lock);
157029c1a730SKevin Wolf 
1571eb489bb1SKevin Wolf     return 0;
1572eb489bb1SKevin Wolf }
1573eb489bb1SKevin Wolf 
15747c80ab3fSJes Sorensen static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
157520d97356SBlue Swirl {
157620d97356SBlue Swirl 	return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
157720d97356SBlue Swirl }
157820d97356SBlue Swirl 
15797c80ab3fSJes Sorensen static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
158020d97356SBlue Swirl {
158120d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
158220d97356SBlue Swirl     bdi->cluster_size = s->cluster_size;
15837c80ab3fSJes Sorensen     bdi->vm_state_offset = qcow2_vm_state_offset(s);
158420d97356SBlue Swirl     return 0;
158520d97356SBlue Swirl }
158620d97356SBlue Swirl 
158720d97356SBlue Swirl #if 0
158820d97356SBlue Swirl static void dump_refcounts(BlockDriverState *bs)
158920d97356SBlue Swirl {
159020d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
159120d97356SBlue Swirl     int64_t nb_clusters, k, k1, size;
159220d97356SBlue Swirl     int refcount;
159320d97356SBlue Swirl 
159466f82ceeSKevin Wolf     size = bdrv_getlength(bs->file);
159520d97356SBlue Swirl     nb_clusters = size_to_clusters(s, size);
159620d97356SBlue Swirl     for(k = 0; k < nb_clusters;) {
159720d97356SBlue Swirl         k1 = k;
159820d97356SBlue Swirl         refcount = get_refcount(bs, k);
159920d97356SBlue Swirl         k++;
160020d97356SBlue Swirl         while (k < nb_clusters && get_refcount(bs, k) == refcount)
160120d97356SBlue Swirl             k++;
16020bfcd599SBlue Swirl         printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
16030bfcd599SBlue Swirl                k - k1);
160420d97356SBlue Swirl     }
160520d97356SBlue Swirl }
160620d97356SBlue Swirl #endif
160720d97356SBlue Swirl 
16087c80ab3fSJes Sorensen static int qcow2_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
160920d97356SBlue Swirl                               int64_t pos, int size)
161020d97356SBlue Swirl {
161120d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
161220d97356SBlue Swirl     int growable = bs->growable;
161320d97356SBlue Swirl     int ret;
161420d97356SBlue Swirl 
161566f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
161620d97356SBlue Swirl     bs->growable = 1;
16177c80ab3fSJes Sorensen     ret = bdrv_pwrite(bs, qcow2_vm_state_offset(s) + pos, buf, size);
161820d97356SBlue Swirl     bs->growable = growable;
161920d97356SBlue Swirl 
162020d97356SBlue Swirl     return ret;
162120d97356SBlue Swirl }
162220d97356SBlue Swirl 
16237c80ab3fSJes Sorensen static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
162420d97356SBlue Swirl                               int64_t pos, int size)
162520d97356SBlue Swirl {
162620d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
162720d97356SBlue Swirl     int growable = bs->growable;
162820d97356SBlue Swirl     int ret;
162920d97356SBlue Swirl 
163066f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
163120d97356SBlue Swirl     bs->growable = 1;
16327c80ab3fSJes Sorensen     ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
163320d97356SBlue Swirl     bs->growable = growable;
163420d97356SBlue Swirl 
163520d97356SBlue Swirl     return ret;
163620d97356SBlue Swirl }
163720d97356SBlue Swirl 
16387c80ab3fSJes Sorensen static QEMUOptionParameter qcow2_create_options[] = {
163920d97356SBlue Swirl     {
164020d97356SBlue Swirl         .name = BLOCK_OPT_SIZE,
164120d97356SBlue Swirl         .type = OPT_SIZE,
164220d97356SBlue Swirl         .help = "Virtual disk size"
164320d97356SBlue Swirl     },
164420d97356SBlue Swirl     {
16456744cbabSKevin Wolf         .name = BLOCK_OPT_COMPAT_LEVEL,
16466744cbabSKevin Wolf         .type = OPT_STRING,
16476744cbabSKevin Wolf         .help = "Compatibility level (0.10 or 1.1)"
16486744cbabSKevin Wolf     },
16496744cbabSKevin Wolf     {
165020d97356SBlue Swirl         .name = BLOCK_OPT_BACKING_FILE,
165120d97356SBlue Swirl         .type = OPT_STRING,
165220d97356SBlue Swirl         .help = "File name of a base image"
165320d97356SBlue Swirl     },
165420d97356SBlue Swirl     {
165520d97356SBlue Swirl         .name = BLOCK_OPT_BACKING_FMT,
165620d97356SBlue Swirl         .type = OPT_STRING,
165720d97356SBlue Swirl         .help = "Image format of the base image"
165820d97356SBlue Swirl     },
165920d97356SBlue Swirl     {
166020d97356SBlue Swirl         .name = BLOCK_OPT_ENCRYPT,
166120d97356SBlue Swirl         .type = OPT_FLAG,
166220d97356SBlue Swirl         .help = "Encrypt the image"
166320d97356SBlue Swirl     },
166420d97356SBlue Swirl     {
166520d97356SBlue Swirl         .name = BLOCK_OPT_CLUSTER_SIZE,
166620d97356SBlue Swirl         .type = OPT_SIZE,
166799cce9faSKevin Wolf         .help = "qcow2 cluster size",
166899cce9faSKevin Wolf         .value = { .n = DEFAULT_CLUSTER_SIZE },
166920d97356SBlue Swirl     },
167020d97356SBlue Swirl     {
167120d97356SBlue Swirl         .name = BLOCK_OPT_PREALLOC,
167220d97356SBlue Swirl         .type = OPT_STRING,
167320d97356SBlue Swirl         .help = "Preallocation mode (allowed values: off, metadata)"
167420d97356SBlue Swirl     },
1675bfe8043eSStefan Hajnoczi     {
1676bfe8043eSStefan Hajnoczi         .name = BLOCK_OPT_LAZY_REFCOUNTS,
1677bfe8043eSStefan Hajnoczi         .type = OPT_FLAG,
1678bfe8043eSStefan Hajnoczi         .help = "Postpone refcount updates",
1679bfe8043eSStefan Hajnoczi     },
168020d97356SBlue Swirl     { NULL }
168120d97356SBlue Swirl };
168220d97356SBlue Swirl 
168320d97356SBlue Swirl static BlockDriver bdrv_qcow2 = {
168420d97356SBlue Swirl     .format_name        = "qcow2",
168520d97356SBlue Swirl     .instance_size      = sizeof(BDRVQcowState),
16867c80ab3fSJes Sorensen     .bdrv_probe         = qcow2_probe,
16877c80ab3fSJes Sorensen     .bdrv_open          = qcow2_open,
16887c80ab3fSJes Sorensen     .bdrv_close         = qcow2_close,
168921d82ac9SJeff Cody     .bdrv_reopen_prepare  = qcow2_reopen_prepare,
16907c80ab3fSJes Sorensen     .bdrv_create        = qcow2_create,
1691f8a2e5e3SStefan Hajnoczi     .bdrv_co_is_allocated = qcow2_co_is_allocated,
16927c80ab3fSJes Sorensen     .bdrv_set_key       = qcow2_set_key,
16937c80ab3fSJes Sorensen     .bdrv_make_empty    = qcow2_make_empty,
169420d97356SBlue Swirl 
169568d100e9SKevin Wolf     .bdrv_co_readv          = qcow2_co_readv,
169668d100e9SKevin Wolf     .bdrv_co_writev         = qcow2_co_writev,
1697eb489bb1SKevin Wolf     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
1698419b19d9SStefan Hajnoczi 
1699621f0589SKevin Wolf     .bdrv_co_write_zeroes   = qcow2_co_write_zeroes,
17006db39ae2SPaolo Bonzini     .bdrv_co_discard        = qcow2_co_discard,
1701419b19d9SStefan Hajnoczi     .bdrv_truncate          = qcow2_truncate,
17027c80ab3fSJes Sorensen     .bdrv_write_compressed  = qcow2_write_compressed,
170320d97356SBlue Swirl 
170420d97356SBlue Swirl     .bdrv_snapshot_create   = qcow2_snapshot_create,
170520d97356SBlue Swirl     .bdrv_snapshot_goto     = qcow2_snapshot_goto,
170620d97356SBlue Swirl     .bdrv_snapshot_delete   = qcow2_snapshot_delete,
170720d97356SBlue Swirl     .bdrv_snapshot_list     = qcow2_snapshot_list,
170851ef6727Sedison     .bdrv_snapshot_load_tmp     = qcow2_snapshot_load_tmp,
17097c80ab3fSJes Sorensen     .bdrv_get_info      = qcow2_get_info,
171020d97356SBlue Swirl 
17117c80ab3fSJes Sorensen     .bdrv_save_vmstate    = qcow2_save_vmstate,
17127c80ab3fSJes Sorensen     .bdrv_load_vmstate    = qcow2_load_vmstate,
171320d97356SBlue Swirl 
171420d97356SBlue Swirl     .bdrv_change_backing_file   = qcow2_change_backing_file,
171520d97356SBlue Swirl 
171606d9260fSAnthony Liguori     .bdrv_invalidate_cache      = qcow2_invalidate_cache,
171706d9260fSAnthony Liguori 
17187c80ab3fSJes Sorensen     .create_options = qcow2_create_options,
17197c80ab3fSJes Sorensen     .bdrv_check = qcow2_check,
172020d97356SBlue Swirl };
172120d97356SBlue Swirl 
17225efa9d5aSAnthony Liguori static void bdrv_qcow2_init(void)
17235efa9d5aSAnthony Liguori {
17245efa9d5aSAnthony Liguori     bdrv_register(&bdrv_qcow2);
17255efa9d5aSAnthony Liguori }
17265efa9d5aSAnthony Liguori 
17275efa9d5aSAnthony Liguori block_init(bdrv_qcow2_init);
1728