xref: /qemu/block/qcow2.c (revision 419b19d9b427fad5ff2fa886d8cc217f7acee18c)
1585f8587Sbellard /*
2585f8587Sbellard  * Block driver for the QCOW version 2 format
3585f8587Sbellard  *
4585f8587Sbellard  * Copyright (c) 2004-2006 Fabrice Bellard
5585f8587Sbellard  *
6585f8587Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7585f8587Sbellard  * of this software and associated documentation files (the "Software"), to deal
8585f8587Sbellard  * in the Software without restriction, including without limitation the rights
9585f8587Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10585f8587Sbellard  * copies of the Software, and to permit persons to whom the Software is
11585f8587Sbellard  * furnished to do so, subject to the following conditions:
12585f8587Sbellard  *
13585f8587Sbellard  * The above copyright notice and this permission notice shall be included in
14585f8587Sbellard  * all copies or substantial portions of the Software.
15585f8587Sbellard  *
16585f8587Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17585f8587Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18585f8587Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19585f8587Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20585f8587Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21585f8587Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22585f8587Sbellard  * THE SOFTWARE.
23585f8587Sbellard  */
24faf07963Spbrook #include "qemu-common.h"
25585f8587Sbellard #include "block_int.h"
265efa9d5aSAnthony Liguori #include "module.h"
27585f8587Sbellard #include <zlib.h>
28585f8587Sbellard #include "aes.h"
29f7d0fe02SKevin Wolf #include "block/qcow2.h"
30585f8587Sbellard 
31585f8587Sbellard /*
32585f8587Sbellard   Differences with QCOW:
33585f8587Sbellard 
34585f8587Sbellard   - Support for multiple incremental snapshots.
35585f8587Sbellard   - Memory management by reference counts.
36585f8587Sbellard   - Clusters which have a reference count of one have the bit
37585f8587Sbellard     QCOW_OFLAG_COPIED to optimize write performance.
38585f8587Sbellard   - Size of compressed clusters is stored in sectors to reduce bit usage
39585f8587Sbellard     in the cluster offsets.
40585f8587Sbellard   - Support for storing additional data (such as the VM state) in the
41585f8587Sbellard     snapshots.
42585f8587Sbellard   - If a backing store is used, the cluster size is not constrained
43585f8587Sbellard     (could be backported to QCOW).
44585f8587Sbellard   - L2 tables have always a size of one cluster.
45585f8587Sbellard */
46585f8587Sbellard 
479b80ddf3Saliguori 
489b80ddf3Saliguori typedef struct {
499b80ddf3Saliguori     uint32_t magic;
509b80ddf3Saliguori     uint32_t len;
519b80ddf3Saliguori } QCowExtension;
529b80ddf3Saliguori #define  QCOW_EXT_MAGIC_END 0
53f965509cSaliguori #define  QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
549b80ddf3Saliguori 
5520d97356SBlue Swirl static int qcow_create(const char *filename, QEMUOptionParameter *options);
56585f8587Sbellard 
57585f8587Sbellard static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
58585f8587Sbellard {
59585f8587Sbellard     const QCowHeader *cow_header = (const void *)buf;
60585f8587Sbellard 
61585f8587Sbellard     if (buf_size >= sizeof(QCowHeader) &&
62585f8587Sbellard         be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
63585f8587Sbellard         be32_to_cpu(cow_header->version) == QCOW_VERSION)
64585f8587Sbellard         return 100;
65585f8587Sbellard     else
66585f8587Sbellard         return 0;
67585f8587Sbellard }
68585f8587Sbellard 
699b80ddf3Saliguori 
709b80ddf3Saliguori /*
719b80ddf3Saliguori  * read qcow2 extension and fill bs
729b80ddf3Saliguori  * start reading from start_offset
739b80ddf3Saliguori  * finish reading upon magic of value 0 or when end_offset reached
749b80ddf3Saliguori  * unknown magic is skipped (future extension this version knows nothing about)
759b80ddf3Saliguori  * return 0 upon success, non-0 otherwise
769b80ddf3Saliguori  */
779b80ddf3Saliguori static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
789b80ddf3Saliguori                                 uint64_t end_offset)
799b80ddf3Saliguori {
809b80ddf3Saliguori     QCowExtension ext;
819b80ddf3Saliguori     uint64_t offset;
829b80ddf3Saliguori 
839b80ddf3Saliguori #ifdef DEBUG_EXT
849b80ddf3Saliguori     printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
859b80ddf3Saliguori #endif
869b80ddf3Saliguori     offset = start_offset;
879b80ddf3Saliguori     while (offset < end_offset) {
889b80ddf3Saliguori 
899b80ddf3Saliguori #ifdef DEBUG_EXT
909b80ddf3Saliguori         /* Sanity check */
919b80ddf3Saliguori         if (offset > s->cluster_size)
929b80ddf3Saliguori             printf("qcow_handle_extension: suspicious offset %lu\n", offset);
939b80ddf3Saliguori 
949b80ddf3Saliguori         printf("attemting to read extended header in offset %lu\n", offset);
959b80ddf3Saliguori #endif
969b80ddf3Saliguori 
9766f82ceeSKevin Wolf         if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
984c978075Saliguori             fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
994c978075Saliguori                     (unsigned long long)offset);
1009b80ddf3Saliguori             return 1;
1019b80ddf3Saliguori         }
1029b80ddf3Saliguori         be32_to_cpus(&ext.magic);
1039b80ddf3Saliguori         be32_to_cpus(&ext.len);
1049b80ddf3Saliguori         offset += sizeof(ext);
1059b80ddf3Saliguori #ifdef DEBUG_EXT
1069b80ddf3Saliguori         printf("ext.magic = 0x%x\n", ext.magic);
1079b80ddf3Saliguori #endif
1089b80ddf3Saliguori         switch (ext.magic) {
1099b80ddf3Saliguori         case QCOW_EXT_MAGIC_END:
1109b80ddf3Saliguori             return 0;
111f965509cSaliguori 
112f965509cSaliguori         case QCOW_EXT_MAGIC_BACKING_FORMAT:
113f965509cSaliguori             if (ext.len >= sizeof(bs->backing_format)) {
114f965509cSaliguori                 fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
1154c978075Saliguori                         " (>=%zu)\n",
116f965509cSaliguori                         ext.len, sizeof(bs->backing_format));
117f965509cSaliguori                 return 2;
118f965509cSaliguori             }
11966f82ceeSKevin Wolf             if (bdrv_pread(bs->file, offset , bs->backing_format,
120f965509cSaliguori                            ext.len) != ext.len)
121f965509cSaliguori                 return 3;
122f965509cSaliguori             bs->backing_format[ext.len] = '\0';
123f965509cSaliguori #ifdef DEBUG_EXT
124f965509cSaliguori             printf("Qcow2: Got format extension %s\n", bs->backing_format);
125f965509cSaliguori #endif
126e1c7f0e3SKevin Wolf             offset = ((offset + ext.len + 7) & ~7);
127f965509cSaliguori             break;
128f965509cSaliguori 
1299b80ddf3Saliguori         default:
1309b80ddf3Saliguori             /* unknown magic -- just skip it */
131e1c7f0e3SKevin Wolf             offset = ((offset + ext.len + 7) & ~7);
1329b80ddf3Saliguori             break;
1339b80ddf3Saliguori         }
1349b80ddf3Saliguori     }
1359b80ddf3Saliguori 
1369b80ddf3Saliguori     return 0;
1379b80ddf3Saliguori }
1389b80ddf3Saliguori 
1399b80ddf3Saliguori 
14066f82ceeSKevin Wolf static int qcow_open(BlockDriverState *bs, int flags)
141585f8587Sbellard {
142585f8587Sbellard     BDRVQcowState *s = bs->opaque;
143*419b19d9SStefan Hajnoczi     int len, i;
144585f8587Sbellard     QCowHeader header;
1459b80ddf3Saliguori     uint64_t ext_end;
146585f8587Sbellard 
14766f82ceeSKevin Wolf     if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header))
148585f8587Sbellard         goto fail;
149585f8587Sbellard     be32_to_cpus(&header.magic);
150585f8587Sbellard     be32_to_cpus(&header.version);
151585f8587Sbellard     be64_to_cpus(&header.backing_file_offset);
152585f8587Sbellard     be32_to_cpus(&header.backing_file_size);
153585f8587Sbellard     be64_to_cpus(&header.size);
154585f8587Sbellard     be32_to_cpus(&header.cluster_bits);
155585f8587Sbellard     be32_to_cpus(&header.crypt_method);
156585f8587Sbellard     be64_to_cpus(&header.l1_table_offset);
157585f8587Sbellard     be32_to_cpus(&header.l1_size);
158585f8587Sbellard     be64_to_cpus(&header.refcount_table_offset);
159585f8587Sbellard     be32_to_cpus(&header.refcount_table_clusters);
160585f8587Sbellard     be64_to_cpus(&header.snapshots_offset);
161585f8587Sbellard     be32_to_cpus(&header.nb_snapshots);
162585f8587Sbellard 
163585f8587Sbellard     if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
164585f8587Sbellard         goto fail;
165d191d12dSStefan Weil     if (header.cluster_bits < MIN_CLUSTER_BITS ||
16673c632edSKevin Wolf         header.cluster_bits > MAX_CLUSTER_BITS)
167585f8587Sbellard         goto fail;
168585f8587Sbellard     if (header.crypt_method > QCOW_CRYPT_AES)
169585f8587Sbellard         goto fail;
170585f8587Sbellard     s->crypt_method_header = header.crypt_method;
171585f8587Sbellard     if (s->crypt_method_header)
172585f8587Sbellard         bs->encrypted = 1;
173585f8587Sbellard     s->cluster_bits = header.cluster_bits;
174585f8587Sbellard     s->cluster_size = 1 << s->cluster_bits;
175585f8587Sbellard     s->cluster_sectors = 1 << (s->cluster_bits - 9);
176585f8587Sbellard     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
177585f8587Sbellard     s->l2_size = 1 << s->l2_bits;
178585f8587Sbellard     bs->total_sectors = header.size / 512;
179585f8587Sbellard     s->csize_shift = (62 - (s->cluster_bits - 8));
180585f8587Sbellard     s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
181585f8587Sbellard     s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
182585f8587Sbellard     s->refcount_table_offset = header.refcount_table_offset;
183585f8587Sbellard     s->refcount_table_size =
184585f8587Sbellard         header.refcount_table_clusters << (s->cluster_bits - 3);
185585f8587Sbellard 
186585f8587Sbellard     s->snapshots_offset = header.snapshots_offset;
187585f8587Sbellard     s->nb_snapshots = header.nb_snapshots;
188585f8587Sbellard 
189585f8587Sbellard     /* read the level 1 table */
190585f8587Sbellard     s->l1_size = header.l1_size;
191*419b19d9SStefan Hajnoczi     s->l1_vm_state_index = size_to_l1(s, header.size);
192585f8587Sbellard     /* the L1 table must contain at least enough entries to put
193585f8587Sbellard        header.size bytes */
194585f8587Sbellard     if (s->l1_size < s->l1_vm_state_index)
195585f8587Sbellard         goto fail;
196585f8587Sbellard     s->l1_table_offset = header.l1_table_offset;
197d191d12dSStefan Weil     if (s->l1_size > 0) {
1983f6a3ee5SKevin Wolf         s->l1_table = qemu_mallocz(
1993f6a3ee5SKevin Wolf             align_offset(s->l1_size * sizeof(uint64_t), 512));
20066f82ceeSKevin Wolf         if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
201585f8587Sbellard             s->l1_size * sizeof(uint64_t))
202585f8587Sbellard             goto fail;
203585f8587Sbellard         for(i = 0;i < s->l1_size; i++) {
204585f8587Sbellard             be64_to_cpus(&s->l1_table[i]);
205585f8587Sbellard         }
206d191d12dSStefan Weil     }
207585f8587Sbellard     /* alloc L2 cache */
208585f8587Sbellard     s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
209585f8587Sbellard     s->cluster_cache = qemu_malloc(s->cluster_size);
210585f8587Sbellard     /* one more sector for decompressed data alignment */
211095a9c58Saliguori     s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
212095a9c58Saliguori                                   + 512);
213585f8587Sbellard     s->cluster_cache_offset = -1;
214585f8587Sbellard 
215ed6ccf0fSKevin Wolf     if (qcow2_refcount_init(bs) < 0)
216585f8587Sbellard         goto fail;
217585f8587Sbellard 
21872cf2d4fSBlue Swirl     QLIST_INIT(&s->cluster_allocs);
219f214978aSKevin Wolf 
2209b80ddf3Saliguori     /* read qcow2 extensions */
2219b80ddf3Saliguori     if (header.backing_file_offset)
2229b80ddf3Saliguori         ext_end = header.backing_file_offset;
2239b80ddf3Saliguori     else
2249b80ddf3Saliguori         ext_end = s->cluster_size;
2259b80ddf3Saliguori     if (qcow_read_extensions(bs, sizeof(header), ext_end))
2269b80ddf3Saliguori         goto fail;
2279b80ddf3Saliguori 
228585f8587Sbellard     /* read the backing file name */
229585f8587Sbellard     if (header.backing_file_offset != 0) {
230585f8587Sbellard         len = header.backing_file_size;
231585f8587Sbellard         if (len > 1023)
232585f8587Sbellard             len = 1023;
23366f82ceeSKevin Wolf         if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len)
234585f8587Sbellard             goto fail;
235585f8587Sbellard         bs->backing_file[len] = '\0';
236585f8587Sbellard     }
237ed6ccf0fSKevin Wolf     if (qcow2_read_snapshots(bs) < 0)
238585f8587Sbellard         goto fail;
239585f8587Sbellard 
240585f8587Sbellard #ifdef DEBUG_ALLOC
24114899cdfSFilip Navara     qcow2_check_refcounts(bs);
242585f8587Sbellard #endif
243585f8587Sbellard     return 0;
244585f8587Sbellard 
245585f8587Sbellard  fail:
246ed6ccf0fSKevin Wolf     qcow2_free_snapshots(bs);
247ed6ccf0fSKevin Wolf     qcow2_refcount_close(bs);
248585f8587Sbellard     qemu_free(s->l1_table);
249585f8587Sbellard     qemu_free(s->l2_cache);
250585f8587Sbellard     qemu_free(s->cluster_cache);
251585f8587Sbellard     qemu_free(s->cluster_data);
252585f8587Sbellard     return -1;
253585f8587Sbellard }
254585f8587Sbellard 
255585f8587Sbellard static int qcow_set_key(BlockDriverState *bs, const char *key)
256585f8587Sbellard {
257585f8587Sbellard     BDRVQcowState *s = bs->opaque;
258585f8587Sbellard     uint8_t keybuf[16];
259585f8587Sbellard     int len, i;
260585f8587Sbellard 
261585f8587Sbellard     memset(keybuf, 0, 16);
262585f8587Sbellard     len = strlen(key);
263585f8587Sbellard     if (len > 16)
264585f8587Sbellard         len = 16;
265585f8587Sbellard     /* XXX: we could compress the chars to 7 bits to increase
266585f8587Sbellard        entropy */
267585f8587Sbellard     for(i = 0;i < len;i++) {
268585f8587Sbellard         keybuf[i] = key[i];
269585f8587Sbellard     }
270585f8587Sbellard     s->crypt_method = s->crypt_method_header;
271585f8587Sbellard 
272585f8587Sbellard     if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
273585f8587Sbellard         return -1;
274585f8587Sbellard     if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
275585f8587Sbellard         return -1;
276585f8587Sbellard #if 0
277585f8587Sbellard     /* test */
278585f8587Sbellard     {
279585f8587Sbellard         uint8_t in[16];
280585f8587Sbellard         uint8_t out[16];
281585f8587Sbellard         uint8_t tmp[16];
282585f8587Sbellard         for(i=0;i<16;i++)
283585f8587Sbellard             in[i] = i;
284585f8587Sbellard         AES_encrypt(in, tmp, &s->aes_encrypt_key);
285585f8587Sbellard         AES_decrypt(tmp, out, &s->aes_decrypt_key);
286585f8587Sbellard         for(i = 0; i < 16; i++)
287585f8587Sbellard             printf(" %02x", tmp[i]);
288585f8587Sbellard         printf("\n");
289585f8587Sbellard         for(i = 0; i < 16; i++)
290585f8587Sbellard             printf(" %02x", out[i]);
291585f8587Sbellard         printf("\n");
292585f8587Sbellard     }
293585f8587Sbellard #endif
294585f8587Sbellard     return 0;
295585f8587Sbellard }
296585f8587Sbellard 
297585f8587Sbellard static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
298585f8587Sbellard                              int nb_sectors, int *pnum)
299585f8587Sbellard {
300585f8587Sbellard     uint64_t cluster_offset;
301585f8587Sbellard 
302095a9c58Saliguori     *pnum = nb_sectors;
303ed6ccf0fSKevin Wolf     cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, pnum);
304095a9c58Saliguori 
305585f8587Sbellard     return (cluster_offset != 0);
306585f8587Sbellard }
307585f8587Sbellard 
308a9465922Sbellard /* handle reading after the end of the backing file */
309ed6ccf0fSKevin Wolf int qcow2_backing_read1(BlockDriverState *bs,
310a9465922Sbellard                   int64_t sector_num, uint8_t *buf, int nb_sectors)
311a9465922Sbellard {
312a9465922Sbellard     int n1;
313a9465922Sbellard     if ((sector_num + nb_sectors) <= bs->total_sectors)
314a9465922Sbellard         return nb_sectors;
315a9465922Sbellard     if (sector_num >= bs->total_sectors)
316a9465922Sbellard         n1 = 0;
317a9465922Sbellard     else
318a9465922Sbellard         n1 = bs->total_sectors - sector_num;
319a9465922Sbellard     memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
320a9465922Sbellard     return n1;
321a9465922Sbellard }
322a9465922Sbellard 
323ce1a14dcSpbrook typedef struct QCowAIOCB {
324ce1a14dcSpbrook     BlockDriverAIOCB common;
325585f8587Sbellard     int64_t sector_num;
326f141eafeSaliguori     QEMUIOVector *qiov;
327585f8587Sbellard     uint8_t *buf;
328f141eafeSaliguori     void *orig_buf;
3297b88e48bSChristoph Hellwig     int remaining_sectors;
3307b88e48bSChristoph Hellwig     int cur_nr_sectors;	/* number of sectors in current iteration */
331585f8587Sbellard     uint64_t cluster_offset;
332585f8587Sbellard     uint8_t *cluster_data;
333585f8587Sbellard     BlockDriverAIOCB *hd_aiocb;
334c87c0672Saliguori     struct iovec hd_iov;
335c87c0672Saliguori     QEMUIOVector hd_qiov;
3361490791fSaliguori     QEMUBH *bh;
337e976c6a1Saliguori     QCowL2Meta l2meta;
33872cf2d4fSBlue Swirl     QLIST_ENTRY(QCowAIOCB) next_depend;
339585f8587Sbellard } QCowAIOCB;
340585f8587Sbellard 
341c16b5a2cSChristoph Hellwig static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
342c16b5a2cSChristoph Hellwig {
343c16b5a2cSChristoph Hellwig     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
344c16b5a2cSChristoph Hellwig     if (acb->hd_aiocb)
345c16b5a2cSChristoph Hellwig         bdrv_aio_cancel(acb->hd_aiocb);
346c16b5a2cSChristoph Hellwig     qemu_aio_release(acb);
347c16b5a2cSChristoph Hellwig }
348c16b5a2cSChristoph Hellwig 
349c16b5a2cSChristoph Hellwig static AIOPool qcow_aio_pool = {
350c16b5a2cSChristoph Hellwig     .aiocb_size         = sizeof(QCowAIOCB),
351c16b5a2cSChristoph Hellwig     .cancel             = qcow_aio_cancel,
352c16b5a2cSChristoph Hellwig };
353c16b5a2cSChristoph Hellwig 
3541490791fSaliguori static void qcow_aio_read_cb(void *opaque, int ret);
3551490791fSaliguori static void qcow_aio_read_bh(void *opaque)
3561490791fSaliguori {
3571490791fSaliguori     QCowAIOCB *acb = opaque;
3581490791fSaliguori     qemu_bh_delete(acb->bh);
3591490791fSaliguori     acb->bh = NULL;
3601490791fSaliguori     qcow_aio_read_cb(opaque, 0);
3611490791fSaliguori }
3621490791fSaliguori 
363a32ef786Saliguori static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
364a32ef786Saliguori {
365a32ef786Saliguori     if (acb->bh)
366a32ef786Saliguori         return -EIO;
367a32ef786Saliguori 
368a32ef786Saliguori     acb->bh = qemu_bh_new(cb, acb);
369a32ef786Saliguori     if (!acb->bh)
370a32ef786Saliguori         return -EIO;
371a32ef786Saliguori 
372a32ef786Saliguori     qemu_bh_schedule(acb->bh);
373a32ef786Saliguori 
374a32ef786Saliguori     return 0;
375a32ef786Saliguori }
376a32ef786Saliguori 
377585f8587Sbellard static void qcow_aio_read_cb(void *opaque, int ret)
378585f8587Sbellard {
379ce1a14dcSpbrook     QCowAIOCB *acb = opaque;
380ce1a14dcSpbrook     BlockDriverState *bs = acb->common.bs;
381585f8587Sbellard     BDRVQcowState *s = bs->opaque;
382a9465922Sbellard     int index_in_cluster, n1;
383585f8587Sbellard 
384ce1a14dcSpbrook     acb->hd_aiocb = NULL;
385f141eafeSaliguori     if (ret < 0)
386f141eafeSaliguori         goto done;
387585f8587Sbellard 
388585f8587Sbellard     /* post process the read buffer */
389ce1a14dcSpbrook     if (!acb->cluster_offset) {
390585f8587Sbellard         /* nothing to do */
391ce1a14dcSpbrook     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
392585f8587Sbellard         /* nothing to do */
393585f8587Sbellard     } else {
394585f8587Sbellard         if (s->crypt_method) {
395ed6ccf0fSKevin Wolf             qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
3967b88e48bSChristoph Hellwig                             acb->cur_nr_sectors, 0,
397585f8587Sbellard                             &s->aes_decrypt_key);
398585f8587Sbellard         }
399585f8587Sbellard     }
400585f8587Sbellard 
4017b88e48bSChristoph Hellwig     acb->remaining_sectors -= acb->cur_nr_sectors;
4027b88e48bSChristoph Hellwig     acb->sector_num += acb->cur_nr_sectors;
4037b88e48bSChristoph Hellwig     acb->buf += acb->cur_nr_sectors * 512;
404585f8587Sbellard 
4057b88e48bSChristoph Hellwig     if (acb->remaining_sectors == 0) {
406585f8587Sbellard         /* request completed */
407f141eafeSaliguori         ret = 0;
408f141eafeSaliguori         goto done;
409585f8587Sbellard     }
410585f8587Sbellard 
411585f8587Sbellard     /* prepare next AIO request */
4127b88e48bSChristoph Hellwig     acb->cur_nr_sectors = acb->remaining_sectors;
4137b88e48bSChristoph Hellwig     acb->cluster_offset = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
4147b88e48bSChristoph Hellwig                                                    &acb->cur_nr_sectors);
415095a9c58Saliguori     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
416585f8587Sbellard 
417ce1a14dcSpbrook     if (!acb->cluster_offset) {
418585f8587Sbellard         if (bs->backing_hd) {
419585f8587Sbellard             /* read from the base image */
420ed6ccf0fSKevin Wolf             n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
4217b88e48bSChristoph Hellwig                                acb->buf, acb->cur_nr_sectors);
422a9465922Sbellard             if (n1 > 0) {
4233f4cb3d3Sblueswir1                 acb->hd_iov.iov_base = (void *)acb->buf;
4247b88e48bSChristoph Hellwig                 acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
425c87c0672Saliguori                 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
42666f82ceeSKevin Wolf                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
427c87c0672Saliguori                 acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
4287b88e48bSChristoph Hellwig                                     &acb->hd_qiov, acb->cur_nr_sectors,
429c87c0672Saliguori 				    qcow_aio_read_cb, acb);
430ce1a14dcSpbrook                 if (acb->hd_aiocb == NULL)
431f141eafeSaliguori                     goto done;
432585f8587Sbellard             } else {
433a32ef786Saliguori                 ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
434a32ef786Saliguori                 if (ret < 0)
435f141eafeSaliguori                     goto done;
4361490791fSaliguori             }
437a9465922Sbellard         } else {
438585f8587Sbellard             /* Note: in this case, no need to wait */
4397b88e48bSChristoph Hellwig             memset(acb->buf, 0, 512 * acb->cur_nr_sectors);
440a32ef786Saliguori             ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
441a32ef786Saliguori             if (ret < 0)
442f141eafeSaliguori                 goto done;
4431490791fSaliguori         }
444ce1a14dcSpbrook     } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
445585f8587Sbellard         /* add AIO support for compressed blocks ? */
44666f82ceeSKevin Wolf         if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
447f141eafeSaliguori             goto done;
4487b88e48bSChristoph Hellwig         memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
4497b88e48bSChristoph Hellwig                512 * acb->cur_nr_sectors);
450a32ef786Saliguori         ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
451a32ef786Saliguori         if (ret < 0)
452f141eafeSaliguori             goto done;
453585f8587Sbellard     } else {
454ce1a14dcSpbrook         if ((acb->cluster_offset & 511) != 0) {
455585f8587Sbellard             ret = -EIO;
456f141eafeSaliguori             goto done;
457585f8587Sbellard         }
458c87c0672Saliguori 
4593f4cb3d3Sblueswir1         acb->hd_iov.iov_base = (void *)acb->buf;
4607b88e48bSChristoph Hellwig         acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
461c87c0672Saliguori         qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
46266f82ceeSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
46366f82ceeSKevin Wolf         acb->hd_aiocb = bdrv_aio_readv(bs->file,
464ce1a14dcSpbrook                             (acb->cluster_offset >> 9) + index_in_cluster,
4657b88e48bSChristoph Hellwig                             &acb->hd_qiov, acb->cur_nr_sectors,
4667b88e48bSChristoph Hellwig                             qcow_aio_read_cb, acb);
467171e3d6bSKevin Wolf         if (acb->hd_aiocb == NULL) {
468171e3d6bSKevin Wolf             ret = -EIO;
469f141eafeSaliguori             goto done;
470585f8587Sbellard         }
471171e3d6bSKevin Wolf     }
472f141eafeSaliguori 
473f141eafeSaliguori     return;
474f141eafeSaliguori done:
475f141eafeSaliguori     if (acb->qiov->niov > 1) {
476f141eafeSaliguori         qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
477f141eafeSaliguori         qemu_vfree(acb->orig_buf);
478f141eafeSaliguori     }
479f141eafeSaliguori     acb->common.cb(acb->common.opaque, ret);
480f141eafeSaliguori     qemu_aio_release(acb);
481585f8587Sbellard }
482585f8587Sbellard 
483ce1a14dcSpbrook static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
484f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
485f141eafeSaliguori         BlockDriverCompletionFunc *cb, void *opaque, int is_write)
486585f8587Sbellard {
487ce1a14dcSpbrook     QCowAIOCB *acb;
488585f8587Sbellard 
489c16b5a2cSChristoph Hellwig     acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
490ce1a14dcSpbrook     if (!acb)
491ce1a14dcSpbrook         return NULL;
492ce1a14dcSpbrook     acb->hd_aiocb = NULL;
493ce1a14dcSpbrook     acb->sector_num = sector_num;
494f141eafeSaliguori     acb->qiov = qiov;
495f141eafeSaliguori     if (qiov->niov > 1) {
496e268ca52Saliguori         acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
497f141eafeSaliguori         if (is_write)
498f141eafeSaliguori             qemu_iovec_to_buffer(qiov, acb->buf);
4993f4cb3d3Sblueswir1     } else {
5003f4cb3d3Sblueswir1         acb->buf = (uint8_t *)qiov->iov->iov_base;
5013f4cb3d3Sblueswir1     }
5027b88e48bSChristoph Hellwig     acb->remaining_sectors = nb_sectors;
5037b88e48bSChristoph Hellwig     acb->cur_nr_sectors = 0;
504ce1a14dcSpbrook     acb->cluster_offset = 0;
505e976c6a1Saliguori     acb->l2meta.nb_clusters = 0;
50672cf2d4fSBlue Swirl     QLIST_INIT(&acb->l2meta.dependent_requests);
507ce1a14dcSpbrook     return acb;
508ce1a14dcSpbrook }
509ce1a14dcSpbrook 
510f141eafeSaliguori static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
511f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
512ce1a14dcSpbrook         BlockDriverCompletionFunc *cb, void *opaque)
513ce1a14dcSpbrook {
514ce1a14dcSpbrook     QCowAIOCB *acb;
515ce1a14dcSpbrook 
516f141eafeSaliguori     acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
517ce1a14dcSpbrook     if (!acb)
518ce1a14dcSpbrook         return NULL;
519585f8587Sbellard 
520585f8587Sbellard     qcow_aio_read_cb(acb, 0);
521ce1a14dcSpbrook     return &acb->common;
522585f8587Sbellard }
523585f8587Sbellard 
524f214978aSKevin Wolf static void qcow_aio_write_cb(void *opaque, int ret);
525f214978aSKevin Wolf 
526f214978aSKevin Wolf static void run_dependent_requests(QCowL2Meta *m)
527f214978aSKevin Wolf {
528f214978aSKevin Wolf     QCowAIOCB *req;
529f214978aSKevin Wolf     QCowAIOCB *next;
530f214978aSKevin Wolf 
531f214978aSKevin Wolf     /* Take the request off the list of running requests */
532f214978aSKevin Wolf     if (m->nb_clusters != 0) {
53372cf2d4fSBlue Swirl         QLIST_REMOVE(m, next_in_flight);
534f214978aSKevin Wolf     }
535f214978aSKevin Wolf 
536d4c146f0SStefan Hajnoczi     /* Restart all dependent requests */
537d4c146f0SStefan Hajnoczi     QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) {
538f214978aSKevin Wolf         qcow_aio_write_cb(req, 0);
539f214978aSKevin Wolf     }
540f214978aSKevin Wolf 
541f214978aSKevin Wolf     /* Empty the list for the next part of the request */
54272cf2d4fSBlue Swirl     QLIST_INIT(&m->dependent_requests);
543f214978aSKevin Wolf }
544f214978aSKevin Wolf 
545585f8587Sbellard static void qcow_aio_write_cb(void *opaque, int ret)
546585f8587Sbellard {
547ce1a14dcSpbrook     QCowAIOCB *acb = opaque;
548ce1a14dcSpbrook     BlockDriverState *bs = acb->common.bs;
549585f8587Sbellard     BDRVQcowState *s = bs->opaque;
550585f8587Sbellard     int index_in_cluster;
551585f8587Sbellard     const uint8_t *src_buf;
552095a9c58Saliguori     int n_end;
553585f8587Sbellard 
554ce1a14dcSpbrook     acb->hd_aiocb = NULL;
555ce1a14dcSpbrook 
556f214978aSKevin Wolf     if (ret >= 0) {
557148da7eaSKevin Wolf         ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
558f214978aSKevin Wolf     }
559f214978aSKevin Wolf 
560f214978aSKevin Wolf     run_dependent_requests(&acb->l2meta);
561f214978aSKevin Wolf 
562f141eafeSaliguori     if (ret < 0)
563f141eafeSaliguori         goto done;
564585f8587Sbellard 
5657b88e48bSChristoph Hellwig     acb->remaining_sectors -= acb->cur_nr_sectors;
5667b88e48bSChristoph Hellwig     acb->sector_num += acb->cur_nr_sectors;
5677b88e48bSChristoph Hellwig     acb->buf += acb->cur_nr_sectors * 512;
568585f8587Sbellard 
5697b88e48bSChristoph Hellwig     if (acb->remaining_sectors == 0) {
570585f8587Sbellard         /* request completed */
571f141eafeSaliguori         ret = 0;
572f141eafeSaliguori         goto done;
573585f8587Sbellard     }
574585f8587Sbellard 
575ce1a14dcSpbrook     index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
5767b88e48bSChristoph Hellwig     n_end = index_in_cluster + acb->remaining_sectors;
577095a9c58Saliguori     if (s->crypt_method &&
578095a9c58Saliguori         n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
579095a9c58Saliguori         n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
580095a9c58Saliguori 
581148da7eaSKevin Wolf     ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
5827b88e48bSChristoph Hellwig         index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
583148da7eaSKevin Wolf     if (ret < 0) {
584148da7eaSKevin Wolf         goto done;
585148da7eaSKevin Wolf     }
586148da7eaSKevin Wolf 
587148da7eaSKevin Wolf     acb->cluster_offset = acb->l2meta.cluster_offset;
588f214978aSKevin Wolf 
589f214978aSKevin Wolf     /* Need to wait for another request? If so, we are done for now. */
590148da7eaSKevin Wolf     if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) {
59172cf2d4fSBlue Swirl         QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
592f214978aSKevin Wolf             acb, next_depend);
593f214978aSKevin Wolf         return;
594f214978aSKevin Wolf     }
595f214978aSKevin Wolf 
596148da7eaSKevin Wolf     assert((acb->cluster_offset & 511) == 0);
597148da7eaSKevin Wolf 
598585f8587Sbellard     if (s->crypt_method) {
599ce1a14dcSpbrook         if (!acb->cluster_data) {
600095a9c58Saliguori             acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
601095a9c58Saliguori                                              s->cluster_size);
602585f8587Sbellard         }
603ed6ccf0fSKevin Wolf         qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
6047b88e48bSChristoph Hellwig                         acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
605ce1a14dcSpbrook         src_buf = acb->cluster_data;
606585f8587Sbellard     } else {
607ce1a14dcSpbrook         src_buf = acb->buf;
608585f8587Sbellard     }
609c87c0672Saliguori     acb->hd_iov.iov_base = (void *)src_buf;
6107b88e48bSChristoph Hellwig     acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
611c87c0672Saliguori     qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
61266f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
61366f82ceeSKevin Wolf     acb->hd_aiocb = bdrv_aio_writev(bs->file,
614e976c6a1Saliguori                                     (acb->cluster_offset >> 9) + index_in_cluster,
6157b88e48bSChristoph Hellwig                                     &acb->hd_qiov, acb->cur_nr_sectors,
616585f8587Sbellard                                     qcow_aio_write_cb, acb);
617171e3d6bSKevin Wolf     if (acb->hd_aiocb == NULL) {
618171e3d6bSKevin Wolf         ret = -EIO;
619c644db3dSKevin Wolf         goto fail;
620171e3d6bSKevin Wolf     }
621f141eafeSaliguori 
622f141eafeSaliguori     return;
623f141eafeSaliguori 
624c644db3dSKevin Wolf fail:
625c644db3dSKevin Wolf     if (acb->l2meta.nb_clusters != 0) {
626c644db3dSKevin Wolf         QLIST_REMOVE(&acb->l2meta, next_in_flight);
627c644db3dSKevin Wolf     }
628f141eafeSaliguori done:
629f141eafeSaliguori     if (acb->qiov->niov > 1)
630f141eafeSaliguori         qemu_vfree(acb->orig_buf);
631f141eafeSaliguori     acb->common.cb(acb->common.opaque, ret);
632f141eafeSaliguori     qemu_aio_release(acb);
633585f8587Sbellard }
634585f8587Sbellard 
635f141eafeSaliguori static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
636f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
637ce1a14dcSpbrook         BlockDriverCompletionFunc *cb, void *opaque)
638585f8587Sbellard {
639585f8587Sbellard     BDRVQcowState *s = bs->opaque;
640ce1a14dcSpbrook     QCowAIOCB *acb;
641585f8587Sbellard 
642585f8587Sbellard     s->cluster_cache_offset = -1; /* disable compressed cache */
643585f8587Sbellard 
644f141eafeSaliguori     acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
645ce1a14dcSpbrook     if (!acb)
646ce1a14dcSpbrook         return NULL;
647585f8587Sbellard 
648585f8587Sbellard     qcow_aio_write_cb(acb, 0);
649ce1a14dcSpbrook     return &acb->common;
650585f8587Sbellard }
651585f8587Sbellard 
652585f8587Sbellard static void qcow_close(BlockDriverState *bs)
653585f8587Sbellard {
654585f8587Sbellard     BDRVQcowState *s = bs->opaque;
655585f8587Sbellard     qemu_free(s->l1_table);
656585f8587Sbellard     qemu_free(s->l2_cache);
657585f8587Sbellard     qemu_free(s->cluster_cache);
658585f8587Sbellard     qemu_free(s->cluster_data);
659ed6ccf0fSKevin Wolf     qcow2_refcount_close(bs);
660585f8587Sbellard }
661585f8587Sbellard 
662756e6736SKevin Wolf /*
663756e6736SKevin Wolf  * Updates the variable length parts of the qcow2 header, i.e. the backing file
664756e6736SKevin Wolf  * name and all extensions. qcow2 was not designed to allow such changes, so if
665756e6736SKevin Wolf  * we run out of space (we can only use the first cluster) this function may
666756e6736SKevin Wolf  * fail.
667756e6736SKevin Wolf  *
668756e6736SKevin Wolf  * Returns 0 on success, -errno in error cases.
669756e6736SKevin Wolf  */
670756e6736SKevin Wolf static int qcow2_update_ext_header(BlockDriverState *bs,
671756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
672756e6736SKevin Wolf {
673756e6736SKevin Wolf     size_t backing_file_len = 0;
674756e6736SKevin Wolf     size_t backing_fmt_len = 0;
675756e6736SKevin Wolf     BDRVQcowState *s = bs->opaque;
676756e6736SKevin Wolf     QCowExtension ext_backing_fmt = {0, 0};
677756e6736SKevin Wolf     int ret;
678756e6736SKevin Wolf 
679756e6736SKevin Wolf     /* Backing file format doesn't make sense without a backing file */
680756e6736SKevin Wolf     if (backing_fmt && !backing_file) {
681756e6736SKevin Wolf         return -EINVAL;
682756e6736SKevin Wolf     }
683756e6736SKevin Wolf 
684756e6736SKevin Wolf     /* Prepare the backing file format extension if needed */
685756e6736SKevin Wolf     if (backing_fmt) {
686756e6736SKevin Wolf         ext_backing_fmt.len = cpu_to_be32(strlen(backing_fmt));
687756e6736SKevin Wolf         ext_backing_fmt.magic = cpu_to_be32(QCOW_EXT_MAGIC_BACKING_FORMAT);
688756e6736SKevin Wolf         backing_fmt_len = ((sizeof(ext_backing_fmt)
689756e6736SKevin Wolf             + strlen(backing_fmt) + 7) & ~7);
690756e6736SKevin Wolf     }
691756e6736SKevin Wolf 
692756e6736SKevin Wolf     /* Check if we can fit the new header into the first cluster */
693756e6736SKevin Wolf     if (backing_file) {
694756e6736SKevin Wolf         backing_file_len = strlen(backing_file);
695756e6736SKevin Wolf     }
696756e6736SKevin Wolf 
697756e6736SKevin Wolf     size_t header_size = sizeof(QCowHeader) + backing_file_len
698756e6736SKevin Wolf         + backing_fmt_len;
699756e6736SKevin Wolf 
700756e6736SKevin Wolf     if (header_size > s->cluster_size) {
701756e6736SKevin Wolf         return -ENOSPC;
702756e6736SKevin Wolf     }
703756e6736SKevin Wolf 
704756e6736SKevin Wolf     /* Rewrite backing file name and qcow2 extensions */
705756e6736SKevin Wolf     size_t ext_size = header_size - sizeof(QCowHeader);
706756e6736SKevin Wolf     uint8_t buf[ext_size];
707756e6736SKevin Wolf     size_t offset = 0;
708756e6736SKevin Wolf     size_t backing_file_offset = 0;
709756e6736SKevin Wolf 
710756e6736SKevin Wolf     if (backing_file) {
711756e6736SKevin Wolf         if (backing_fmt) {
712756e6736SKevin Wolf             int padding = backing_fmt_len -
713756e6736SKevin Wolf                 (sizeof(ext_backing_fmt) + strlen(backing_fmt));
714756e6736SKevin Wolf 
715756e6736SKevin Wolf             memcpy(buf + offset, &ext_backing_fmt, sizeof(ext_backing_fmt));
716756e6736SKevin Wolf             offset += sizeof(ext_backing_fmt);
717756e6736SKevin Wolf 
718756e6736SKevin Wolf             memcpy(buf + offset, backing_fmt, strlen(backing_fmt));
719756e6736SKevin Wolf             offset += strlen(backing_fmt);
720756e6736SKevin Wolf 
721756e6736SKevin Wolf             memset(buf + offset, 0, padding);
722756e6736SKevin Wolf             offset += padding;
723756e6736SKevin Wolf         }
724756e6736SKevin Wolf 
725756e6736SKevin Wolf         memcpy(buf + offset, backing_file, backing_file_len);
726756e6736SKevin Wolf         backing_file_offset = sizeof(QCowHeader) + offset;
727756e6736SKevin Wolf     }
728756e6736SKevin Wolf 
72966f82ceeSKevin Wolf     ret = bdrv_pwrite(bs->file, sizeof(QCowHeader), buf, ext_size);
730756e6736SKevin Wolf     if (ret < 0) {
731756e6736SKevin Wolf         goto fail;
732756e6736SKevin Wolf     }
733756e6736SKevin Wolf 
734756e6736SKevin Wolf     /* Update header fields */
735756e6736SKevin Wolf     uint64_t be_backing_file_offset = cpu_to_be64(backing_file_offset);
736756e6736SKevin Wolf     uint32_t be_backing_file_size = cpu_to_be32(backing_file_len);
737756e6736SKevin Wolf 
73866f82ceeSKevin Wolf     ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, backing_file_offset),
739756e6736SKevin Wolf         &be_backing_file_offset, sizeof(uint64_t));
740756e6736SKevin Wolf     if (ret < 0) {
741756e6736SKevin Wolf         goto fail;
742756e6736SKevin Wolf     }
743756e6736SKevin Wolf 
74466f82ceeSKevin Wolf     ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, backing_file_size),
745756e6736SKevin Wolf         &be_backing_file_size, sizeof(uint32_t));
746756e6736SKevin Wolf     if (ret < 0) {
747756e6736SKevin Wolf         goto fail;
748756e6736SKevin Wolf     }
749756e6736SKevin Wolf 
750756e6736SKevin Wolf     ret = 0;
751756e6736SKevin Wolf fail:
752756e6736SKevin Wolf     return ret;
753756e6736SKevin Wolf }
754756e6736SKevin Wolf 
755756e6736SKevin Wolf static int qcow2_change_backing_file(BlockDriverState *bs,
756756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
757756e6736SKevin Wolf {
758756e6736SKevin Wolf     return qcow2_update_ext_header(bs, backing_file, backing_fmt);
759756e6736SKevin Wolf }
760756e6736SKevin Wolf 
76173c632edSKevin Wolf static int get_bits_from_size(size_t size)
76273c632edSKevin Wolf {
76373c632edSKevin Wolf     int res = 0;
76473c632edSKevin Wolf 
76573c632edSKevin Wolf     if (size == 0) {
76673c632edSKevin Wolf         return -1;
76773c632edSKevin Wolf     }
76873c632edSKevin Wolf 
76973c632edSKevin Wolf     while (size != 1) {
77073c632edSKevin Wolf         /* Not a power of two */
77173c632edSKevin Wolf         if (size & 1) {
77273c632edSKevin Wolf             return -1;
77373c632edSKevin Wolf         }
77473c632edSKevin Wolf 
77573c632edSKevin Wolf         size >>= 1;
77673c632edSKevin Wolf         res++;
77773c632edSKevin Wolf     }
77873c632edSKevin Wolf 
77973c632edSKevin Wolf     return res;
78073c632edSKevin Wolf }
78173c632edSKevin Wolf 
782a35e1c17SKevin Wolf 
783a35e1c17SKevin Wolf static int preallocate(BlockDriverState *bs)
784a35e1c17SKevin Wolf {
785a35e1c17SKevin Wolf     uint64_t nb_sectors;
786a35e1c17SKevin Wolf     uint64_t offset;
787a35e1c17SKevin Wolf     int num;
788148da7eaSKevin Wolf     int ret;
789a35e1c17SKevin Wolf     QCowL2Meta meta;
790a35e1c17SKevin Wolf 
791a35e1c17SKevin Wolf     nb_sectors = bdrv_getlength(bs) >> 9;
792a35e1c17SKevin Wolf     offset = 0;
79372cf2d4fSBlue Swirl     QLIST_INIT(&meta.dependent_requests);
794148da7eaSKevin Wolf     meta.cluster_offset = 0;
795a35e1c17SKevin Wolf 
796a35e1c17SKevin Wolf     while (nb_sectors) {
797a35e1c17SKevin Wolf         num = MIN(nb_sectors, INT_MAX >> 9);
798148da7eaSKevin Wolf         ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
799a35e1c17SKevin Wolf 
800148da7eaSKevin Wolf         if (ret < 0) {
801a35e1c17SKevin Wolf             return -1;
802a35e1c17SKevin Wolf         }
803a35e1c17SKevin Wolf 
804148da7eaSKevin Wolf         if (qcow2_alloc_cluster_link_l2(bs, &meta) < 0) {
805148da7eaSKevin Wolf             qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
806a35e1c17SKevin Wolf             return -1;
807a35e1c17SKevin Wolf         }
808a35e1c17SKevin Wolf 
809f214978aSKevin Wolf         /* There are no dependent requests, but we need to remove our request
810f214978aSKevin Wolf          * from the list of in-flight requests */
811f214978aSKevin Wolf         run_dependent_requests(&meta);
812f214978aSKevin Wolf 
813a35e1c17SKevin Wolf         /* TODO Preallocate data if requested */
814a35e1c17SKevin Wolf 
815a35e1c17SKevin Wolf         nb_sectors -= num;
816a35e1c17SKevin Wolf         offset += num << 9;
817a35e1c17SKevin Wolf     }
818a35e1c17SKevin Wolf 
819a35e1c17SKevin Wolf     /*
820a35e1c17SKevin Wolf      * It is expected that the image file is large enough to actually contain
821a35e1c17SKevin Wolf      * all of the allocated clusters (otherwise we get failing reads after
822a35e1c17SKevin Wolf      * EOF). Extend the image to the last allocated sector.
823a35e1c17SKevin Wolf      */
824148da7eaSKevin Wolf     if (meta.cluster_offset != 0) {
825ea80b906SKevin Wolf         uint8_t buf[512];
826ea80b906SKevin Wolf         memset(buf, 0, 512);
82766f82ceeSKevin Wolf         bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1);
828a35e1c17SKevin Wolf     }
829a35e1c17SKevin Wolf 
830a35e1c17SKevin Wolf     return 0;
831a35e1c17SKevin Wolf }
832a35e1c17SKevin Wolf 
83320d97356SBlue Swirl static int qcow_make_empty(BlockDriverState *bs)
83420d97356SBlue Swirl {
83520d97356SBlue Swirl #if 0
83620d97356SBlue Swirl     /* XXX: not correct */
83720d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
83820d97356SBlue Swirl     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
83920d97356SBlue Swirl     int ret;
84020d97356SBlue Swirl 
84120d97356SBlue Swirl     memset(s->l1_table, 0, l1_length);
84266f82ceeSKevin Wolf     if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
84320d97356SBlue Swirl         return -1;
84466f82ceeSKevin Wolf     ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
84520d97356SBlue Swirl     if (ret < 0)
84620d97356SBlue Swirl         return ret;
84720d97356SBlue Swirl 
84820d97356SBlue Swirl     l2_cache_reset(bs);
84920d97356SBlue Swirl #endif
85020d97356SBlue Swirl     return 0;
85120d97356SBlue Swirl }
85220d97356SBlue Swirl 
853*419b19d9SStefan Hajnoczi static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
854*419b19d9SStefan Hajnoczi {
855*419b19d9SStefan Hajnoczi     BDRVQcowState *s = bs->opaque;
856*419b19d9SStefan Hajnoczi     int ret, new_l1_size;
857*419b19d9SStefan Hajnoczi 
858*419b19d9SStefan Hajnoczi     if (offset & 511) {
859*419b19d9SStefan Hajnoczi         return -EINVAL;
860*419b19d9SStefan Hajnoczi     }
861*419b19d9SStefan Hajnoczi 
862*419b19d9SStefan Hajnoczi     /* cannot proceed if image has snapshots */
863*419b19d9SStefan Hajnoczi     if (s->nb_snapshots) {
864*419b19d9SStefan Hajnoczi         return -ENOTSUP;
865*419b19d9SStefan Hajnoczi     }
866*419b19d9SStefan Hajnoczi 
867*419b19d9SStefan Hajnoczi     /* shrinking is currently not supported */
868*419b19d9SStefan Hajnoczi     if (offset < bs->total_sectors * 512) {
869*419b19d9SStefan Hajnoczi         return -ENOTSUP;
870*419b19d9SStefan Hajnoczi     }
871*419b19d9SStefan Hajnoczi 
872*419b19d9SStefan Hajnoczi     new_l1_size = size_to_l1(s, offset);
873*419b19d9SStefan Hajnoczi     ret = qcow2_grow_l1_table(bs, new_l1_size);
874*419b19d9SStefan Hajnoczi     if (ret < 0) {
875*419b19d9SStefan Hajnoczi         return ret;
876*419b19d9SStefan Hajnoczi     }
877*419b19d9SStefan Hajnoczi 
878*419b19d9SStefan Hajnoczi     /* write updated header.size */
879*419b19d9SStefan Hajnoczi     offset = cpu_to_be64(offset);
880*419b19d9SStefan Hajnoczi     ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, size),
881*419b19d9SStefan Hajnoczi                       &offset, sizeof(uint64_t));
882*419b19d9SStefan Hajnoczi     if (ret < 0) {
883*419b19d9SStefan Hajnoczi         return ret;
884*419b19d9SStefan Hajnoczi     }
885*419b19d9SStefan Hajnoczi 
886*419b19d9SStefan Hajnoczi     s->l1_vm_state_index = new_l1_size;
887*419b19d9SStefan Hajnoczi     return 0;
888*419b19d9SStefan Hajnoczi }
889*419b19d9SStefan Hajnoczi 
89020d97356SBlue Swirl /* XXX: put compressed sectors first, then all the cluster aligned
89120d97356SBlue Swirl    tables to avoid losing bytes in alignment */
89220d97356SBlue Swirl static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
89320d97356SBlue Swirl                                  const uint8_t *buf, int nb_sectors)
89420d97356SBlue Swirl {
89520d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
89620d97356SBlue Swirl     z_stream strm;
89720d97356SBlue Swirl     int ret, out_len;
89820d97356SBlue Swirl     uint8_t *out_buf;
89920d97356SBlue Swirl     uint64_t cluster_offset;
90020d97356SBlue Swirl 
90120d97356SBlue Swirl     if (nb_sectors == 0) {
90220d97356SBlue Swirl         /* align end of file to a sector boundary to ease reading with
90320d97356SBlue Swirl            sector based I/Os */
90466f82ceeSKevin Wolf         cluster_offset = bdrv_getlength(bs->file);
90520d97356SBlue Swirl         cluster_offset = (cluster_offset + 511) & ~511;
90666f82ceeSKevin Wolf         bdrv_truncate(bs->file, cluster_offset);
90720d97356SBlue Swirl         return 0;
90820d97356SBlue Swirl     }
90920d97356SBlue Swirl 
91020d97356SBlue Swirl     if (nb_sectors != s->cluster_sectors)
91120d97356SBlue Swirl         return -EINVAL;
91220d97356SBlue Swirl 
91320d97356SBlue Swirl     out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
91420d97356SBlue Swirl 
91520d97356SBlue Swirl     /* best compression, small window, no zlib header */
91620d97356SBlue Swirl     memset(&strm, 0, sizeof(strm));
91720d97356SBlue Swirl     ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
91820d97356SBlue Swirl                        Z_DEFLATED, -12,
91920d97356SBlue Swirl                        9, Z_DEFAULT_STRATEGY);
92020d97356SBlue Swirl     if (ret != 0) {
92120d97356SBlue Swirl         qemu_free(out_buf);
92220d97356SBlue Swirl         return -1;
92320d97356SBlue Swirl     }
92420d97356SBlue Swirl 
92520d97356SBlue Swirl     strm.avail_in = s->cluster_size;
92620d97356SBlue Swirl     strm.next_in = (uint8_t *)buf;
92720d97356SBlue Swirl     strm.avail_out = s->cluster_size;
92820d97356SBlue Swirl     strm.next_out = out_buf;
92920d97356SBlue Swirl 
93020d97356SBlue Swirl     ret = deflate(&strm, Z_FINISH);
93120d97356SBlue Swirl     if (ret != Z_STREAM_END && ret != Z_OK) {
93220d97356SBlue Swirl         qemu_free(out_buf);
93320d97356SBlue Swirl         deflateEnd(&strm);
93420d97356SBlue Swirl         return -1;
93520d97356SBlue Swirl     }
93620d97356SBlue Swirl     out_len = strm.next_out - out_buf;
93720d97356SBlue Swirl 
93820d97356SBlue Swirl     deflateEnd(&strm);
93920d97356SBlue Swirl 
94020d97356SBlue Swirl     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
94120d97356SBlue Swirl         /* could not compress: write normal cluster */
94220d97356SBlue Swirl         bdrv_write(bs, sector_num, buf, s->cluster_sectors);
94320d97356SBlue Swirl     } else {
94420d97356SBlue Swirl         cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
94520d97356SBlue Swirl             sector_num << 9, out_len);
94620d97356SBlue Swirl         if (!cluster_offset)
94720d97356SBlue Swirl             return -1;
94820d97356SBlue Swirl         cluster_offset &= s->cluster_offset_mask;
94966f82ceeSKevin Wolf         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
95066f82ceeSKevin Wolf         if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) {
95120d97356SBlue Swirl             qemu_free(out_buf);
95220d97356SBlue Swirl             return -1;
95320d97356SBlue Swirl         }
95420d97356SBlue Swirl     }
95520d97356SBlue Swirl 
95620d97356SBlue Swirl     qemu_free(out_buf);
95720d97356SBlue Swirl     return 0;
95820d97356SBlue Swirl }
95920d97356SBlue Swirl 
96020d97356SBlue Swirl static void qcow_flush(BlockDriverState *bs)
96120d97356SBlue Swirl {
96266f82ceeSKevin Wolf     bdrv_flush(bs->file);
96320d97356SBlue Swirl }
96420d97356SBlue Swirl 
96520d97356SBlue Swirl static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
96620d97356SBlue Swirl          BlockDriverCompletionFunc *cb, void *opaque)
96720d97356SBlue Swirl {
96866f82ceeSKevin Wolf     return bdrv_aio_flush(bs->file, cb, opaque);
96920d97356SBlue Swirl }
97020d97356SBlue Swirl 
97120d97356SBlue Swirl static int64_t qcow_vm_state_offset(BDRVQcowState *s)
97220d97356SBlue Swirl {
97320d97356SBlue Swirl 	return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
97420d97356SBlue Swirl }
97520d97356SBlue Swirl 
97620d97356SBlue Swirl static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
97720d97356SBlue Swirl {
97820d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
97920d97356SBlue Swirl     bdi->cluster_size = s->cluster_size;
98020d97356SBlue Swirl     bdi->vm_state_offset = qcow_vm_state_offset(s);
98120d97356SBlue Swirl     return 0;
98220d97356SBlue Swirl }
98320d97356SBlue Swirl 
98420d97356SBlue Swirl 
98520d97356SBlue Swirl static int qcow_check(BlockDriverState *bs)
98620d97356SBlue Swirl {
98720d97356SBlue Swirl     return qcow2_check_refcounts(bs);
98820d97356SBlue Swirl }
98920d97356SBlue Swirl 
99020d97356SBlue Swirl #if 0
99120d97356SBlue Swirl static void dump_refcounts(BlockDriverState *bs)
99220d97356SBlue Swirl {
99320d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
99420d97356SBlue Swirl     int64_t nb_clusters, k, k1, size;
99520d97356SBlue Swirl     int refcount;
99620d97356SBlue Swirl 
99766f82ceeSKevin Wolf     size = bdrv_getlength(bs->file);
99820d97356SBlue Swirl     nb_clusters = size_to_clusters(s, size);
99920d97356SBlue Swirl     for(k = 0; k < nb_clusters;) {
100020d97356SBlue Swirl         k1 = k;
100120d97356SBlue Swirl         refcount = get_refcount(bs, k);
100220d97356SBlue Swirl         k++;
100320d97356SBlue Swirl         while (k < nb_clusters && get_refcount(bs, k) == refcount)
100420d97356SBlue Swirl             k++;
100520d97356SBlue Swirl         printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
100620d97356SBlue Swirl     }
100720d97356SBlue Swirl }
100820d97356SBlue Swirl #endif
100920d97356SBlue Swirl 
101020d97356SBlue Swirl static int qcow_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
101120d97356SBlue Swirl                            int64_t pos, int size)
101220d97356SBlue Swirl {
101320d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
101420d97356SBlue Swirl     int growable = bs->growable;
101520d97356SBlue Swirl     int ret;
101620d97356SBlue Swirl 
101766f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
101820d97356SBlue Swirl     bs->growable = 1;
101920d97356SBlue Swirl     ret = bdrv_pwrite(bs, qcow_vm_state_offset(s) + pos, buf, size);
102020d97356SBlue Swirl     bs->growable = growable;
102120d97356SBlue Swirl 
102220d97356SBlue Swirl     return ret;
102320d97356SBlue Swirl }
102420d97356SBlue Swirl 
102520d97356SBlue Swirl static int qcow_load_vmstate(BlockDriverState *bs, uint8_t *buf,
102620d97356SBlue Swirl                            int64_t pos, int size)
102720d97356SBlue Swirl {
102820d97356SBlue Swirl     BDRVQcowState *s = bs->opaque;
102920d97356SBlue Swirl     int growable = bs->growable;
103020d97356SBlue Swirl     int ret;
103120d97356SBlue Swirl 
103266f82ceeSKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
103320d97356SBlue Swirl     bs->growable = 1;
103420d97356SBlue Swirl     ret = bdrv_pread(bs, qcow_vm_state_offset(s) + pos, buf, size);
103520d97356SBlue Swirl     bs->growable = growable;
103620d97356SBlue Swirl 
103720d97356SBlue Swirl     return ret;
103820d97356SBlue Swirl }
103920d97356SBlue Swirl 
104020d97356SBlue Swirl static QEMUOptionParameter qcow_create_options[] = {
104120d97356SBlue Swirl     {
104220d97356SBlue Swirl         .name = BLOCK_OPT_SIZE,
104320d97356SBlue Swirl         .type = OPT_SIZE,
104420d97356SBlue Swirl         .help = "Virtual disk size"
104520d97356SBlue Swirl     },
104620d97356SBlue Swirl     {
104720d97356SBlue Swirl         .name = BLOCK_OPT_BACKING_FILE,
104820d97356SBlue Swirl         .type = OPT_STRING,
104920d97356SBlue Swirl         .help = "File name of a base image"
105020d97356SBlue Swirl     },
105120d97356SBlue Swirl     {
105220d97356SBlue Swirl         .name = BLOCK_OPT_BACKING_FMT,
105320d97356SBlue Swirl         .type = OPT_STRING,
105420d97356SBlue Swirl         .help = "Image format of the base image"
105520d97356SBlue Swirl     },
105620d97356SBlue Swirl     {
105720d97356SBlue Swirl         .name = BLOCK_OPT_ENCRYPT,
105820d97356SBlue Swirl         .type = OPT_FLAG,
105920d97356SBlue Swirl         .help = "Encrypt the image"
106020d97356SBlue Swirl     },
106120d97356SBlue Swirl     {
106220d97356SBlue Swirl         .name = BLOCK_OPT_CLUSTER_SIZE,
106320d97356SBlue Swirl         .type = OPT_SIZE,
106420d97356SBlue Swirl         .help = "qcow2 cluster size"
106520d97356SBlue Swirl     },
106620d97356SBlue Swirl     {
106720d97356SBlue Swirl         .name = BLOCK_OPT_PREALLOC,
106820d97356SBlue Swirl         .type = OPT_STRING,
106920d97356SBlue Swirl         .help = "Preallocation mode (allowed values: off, metadata)"
107020d97356SBlue Swirl     },
107120d97356SBlue Swirl     { NULL }
107220d97356SBlue Swirl };
107320d97356SBlue Swirl 
107420d97356SBlue Swirl static BlockDriver bdrv_qcow2 = {
107520d97356SBlue Swirl     .format_name	= "qcow2",
107620d97356SBlue Swirl     .instance_size	= sizeof(BDRVQcowState),
107720d97356SBlue Swirl     .bdrv_probe		= qcow_probe,
107820d97356SBlue Swirl     .bdrv_open		= qcow_open,
107920d97356SBlue Swirl     .bdrv_close		= qcow_close,
108020d97356SBlue Swirl     .bdrv_create	= qcow_create,
108120d97356SBlue Swirl     .bdrv_flush		= qcow_flush,
108220d97356SBlue Swirl     .bdrv_is_allocated	= qcow_is_allocated,
108320d97356SBlue Swirl     .bdrv_set_key	= qcow_set_key,
108420d97356SBlue Swirl     .bdrv_make_empty	= qcow_make_empty,
108520d97356SBlue Swirl 
108620d97356SBlue Swirl     .bdrv_aio_readv	= qcow_aio_readv,
108720d97356SBlue Swirl     .bdrv_aio_writev	= qcow_aio_writev,
108820d97356SBlue Swirl     .bdrv_aio_flush	= qcow_aio_flush,
1089*419b19d9SStefan Hajnoczi 
1090*419b19d9SStefan Hajnoczi     .bdrv_truncate          = qcow2_truncate,
109120d97356SBlue Swirl     .bdrv_write_compressed  = qcow_write_compressed,
109220d97356SBlue Swirl 
109320d97356SBlue Swirl     .bdrv_snapshot_create   = qcow2_snapshot_create,
109420d97356SBlue Swirl     .bdrv_snapshot_goto     = qcow2_snapshot_goto,
109520d97356SBlue Swirl     .bdrv_snapshot_delete   = qcow2_snapshot_delete,
109620d97356SBlue Swirl     .bdrv_snapshot_list     = qcow2_snapshot_list,
109720d97356SBlue Swirl     .bdrv_get_info	= qcow_get_info,
109820d97356SBlue Swirl 
109920d97356SBlue Swirl     .bdrv_save_vmstate    = qcow_save_vmstate,
110020d97356SBlue Swirl     .bdrv_load_vmstate    = qcow_load_vmstate,
110120d97356SBlue Swirl 
110220d97356SBlue Swirl     .bdrv_change_backing_file   = qcow2_change_backing_file,
110320d97356SBlue Swirl 
110420d97356SBlue Swirl     .create_options = qcow_create_options,
110520d97356SBlue Swirl     .bdrv_check = qcow_check,
110620d97356SBlue Swirl };
110720d97356SBlue Swirl 
1108f965509cSaliguori static int qcow_create2(const char *filename, int64_t total_size,
1109f965509cSaliguori                         const char *backing_file, const char *backing_format,
1110a35e1c17SKevin Wolf                         int flags, size_t cluster_size, int prealloc)
1111585f8587Sbellard {
1112f965509cSaliguori 
1113585f8587Sbellard     int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
11144768fa90SKevin Wolf     int ref_clusters, reftable_clusters, backing_format_len = 0;
1115e1c7f0e3SKevin Wolf     int rounded_ext_bf_len = 0;
1116585f8587Sbellard     QCowHeader header;
1117585f8587Sbellard     uint64_t tmp, offset;
11184768fa90SKevin Wolf     uint64_t old_ref_clusters;
1119585f8587Sbellard     QCowCreateState s1, *s = &s1;
1120f965509cSaliguori     QCowExtension ext_bf = {0, 0};
1121db89119dSKirill A. Shutemov     int ret;
1122585f8587Sbellard 
1123585f8587Sbellard     memset(s, 0, sizeof(*s));
1124585f8587Sbellard 
1125585f8587Sbellard     fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
1126585f8587Sbellard     if (fd < 0)
1127bef57da5SJuan Quintela         return -errno;
1128585f8587Sbellard     memset(&header, 0, sizeof(header));
1129585f8587Sbellard     header.magic = cpu_to_be32(QCOW_MAGIC);
1130585f8587Sbellard     header.version = cpu_to_be32(QCOW_VERSION);
1131585f8587Sbellard     header.size = cpu_to_be64(total_size * 512);
1132585f8587Sbellard     header_size = sizeof(header);
1133585f8587Sbellard     backing_filename_len = 0;
1134585f8587Sbellard     if (backing_file) {
1135f965509cSaliguori         if (backing_format) {
1136f965509cSaliguori             ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
1137f965509cSaliguori             backing_format_len = strlen(backing_format);
1138e1c7f0e3SKevin Wolf             ext_bf.len = backing_format_len;
1139e1c7f0e3SKevin Wolf             rounded_ext_bf_len = (sizeof(ext_bf) + ext_bf.len + 7) & ~7;
1140e1c7f0e3SKevin Wolf             header_size += rounded_ext_bf_len;
1141f965509cSaliguori         }
1142585f8587Sbellard         header.backing_file_offset = cpu_to_be64(header_size);
1143585f8587Sbellard         backing_filename_len = strlen(backing_file);
1144585f8587Sbellard         header.backing_file_size = cpu_to_be32(backing_filename_len);
1145585f8587Sbellard         header_size += backing_filename_len;
1146585f8587Sbellard     }
114773c632edSKevin Wolf 
114873c632edSKevin Wolf     /* Cluster size */
114973c632edSKevin Wolf     s->cluster_bits = get_bits_from_size(cluster_size);
115073c632edSKevin Wolf     if (s->cluster_bits < MIN_CLUSTER_BITS ||
115173c632edSKevin Wolf         s->cluster_bits > MAX_CLUSTER_BITS)
115273c632edSKevin Wolf     {
115373c632edSKevin Wolf         fprintf(stderr, "Cluster size must be a power of two between "
115473c632edSKevin Wolf             "%d and %dk\n",
115573c632edSKevin Wolf             1 << MIN_CLUSTER_BITS,
115673c632edSKevin Wolf             1 << (MAX_CLUSTER_BITS - 10));
115773c632edSKevin Wolf         return -EINVAL;
115873c632edSKevin Wolf     }
1159585f8587Sbellard     s->cluster_size = 1 << s->cluster_bits;
116073c632edSKevin Wolf 
1161585f8587Sbellard     header.cluster_bits = cpu_to_be32(s->cluster_bits);
1162585f8587Sbellard     header_size = (header_size + 7) & ~7;
1163ec36ba14Sths     if (flags & BLOCK_FLAG_ENCRYPT) {
1164585f8587Sbellard         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
1165585f8587Sbellard     } else {
1166585f8587Sbellard         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1167585f8587Sbellard     }
1168585f8587Sbellard     l2_bits = s->cluster_bits - 3;
1169585f8587Sbellard     shift = s->cluster_bits + l2_bits;
1170585f8587Sbellard     l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
1171585f8587Sbellard     offset = align_offset(header_size, s->cluster_size);
1172585f8587Sbellard     s->l1_table_offset = offset;
1173585f8587Sbellard     header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
1174585f8587Sbellard     header.l1_size = cpu_to_be32(l1_size);
117515e6690aSbellard     offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
1176585f8587Sbellard 
11774768fa90SKevin Wolf     /* count how many refcount blocks needed */
11784768fa90SKevin Wolf 
11794768fa90SKevin Wolf #define NUM_CLUSTERS(bytes) \
11804768fa90SKevin Wolf     (((bytes) + (s->cluster_size) - 1) / (s->cluster_size))
11814768fa90SKevin Wolf 
11824768fa90SKevin Wolf     ref_clusters = NUM_CLUSTERS(NUM_CLUSTERS(offset) * sizeof(uint16_t));
11834768fa90SKevin Wolf 
11844768fa90SKevin Wolf     do {
11854768fa90SKevin Wolf         uint64_t image_clusters;
11864768fa90SKevin Wolf         old_ref_clusters = ref_clusters;
11874768fa90SKevin Wolf 
11884768fa90SKevin Wolf         /* Number of clusters used for the refcount table */
11894768fa90SKevin Wolf         reftable_clusters = NUM_CLUSTERS(ref_clusters * sizeof(uint64_t));
11904768fa90SKevin Wolf 
11914768fa90SKevin Wolf         /* Number of clusters that the whole image will have */
11924768fa90SKevin Wolf         image_clusters = NUM_CLUSTERS(offset) + ref_clusters
11934768fa90SKevin Wolf             + reftable_clusters;
11944768fa90SKevin Wolf 
11954768fa90SKevin Wolf         /* Number of refcount blocks needed for the image */
11964768fa90SKevin Wolf         ref_clusters = NUM_CLUSTERS(image_clusters * sizeof(uint16_t));
11974768fa90SKevin Wolf 
11984768fa90SKevin Wolf     } while (ref_clusters != old_ref_clusters);
11994768fa90SKevin Wolf 
12004768fa90SKevin Wolf     s->refcount_table = qemu_mallocz(reftable_clusters * s->cluster_size);
1201585f8587Sbellard 
1202585f8587Sbellard     s->refcount_table_offset = offset;
1203585f8587Sbellard     header.refcount_table_offset = cpu_to_be64(offset);
12044768fa90SKevin Wolf     header.refcount_table_clusters = cpu_to_be32(reftable_clusters);
12054768fa90SKevin Wolf     offset += (reftable_clusters * s->cluster_size);
1206585f8587Sbellard     s->refcount_block_offset = offset;
12072d2431f0Saliguori 
12082d2431f0Saliguori     for (i=0; i < ref_clusters; i++) {
12092d2431f0Saliguori         s->refcount_table[i] = cpu_to_be64(offset);
1210585f8587Sbellard         offset += s->cluster_size;
12112d2431f0Saliguori     }
12122d2431f0Saliguori 
12132d2431f0Saliguori     s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size);
1214585f8587Sbellard 
1215585f8587Sbellard     /* update refcounts */
1216ed6ccf0fSKevin Wolf     qcow2_create_refcount_update(s, 0, header_size);
1217ed6ccf0fSKevin Wolf     qcow2_create_refcount_update(s, s->l1_table_offset,
1218ed6ccf0fSKevin Wolf         l1_size * sizeof(uint64_t));
12194768fa90SKevin Wolf     qcow2_create_refcount_update(s, s->refcount_table_offset,
12204768fa90SKevin Wolf         reftable_clusters * s->cluster_size);
1221ed6ccf0fSKevin Wolf     qcow2_create_refcount_update(s, s->refcount_block_offset,
1222ed6ccf0fSKevin Wolf         ref_clusters * s->cluster_size);
1223585f8587Sbellard 
1224585f8587Sbellard     /* write all the data */
1225db89119dSKirill A. Shutemov     ret = qemu_write_full(fd, &header, sizeof(header));
1226db89119dSKirill A. Shutemov     if (ret != sizeof(header)) {
1227bef57da5SJuan Quintela         ret = -errno;
1228db89119dSKirill A. Shutemov         goto exit;
1229db89119dSKirill A. Shutemov     }
1230585f8587Sbellard     if (backing_file) {
1231f965509cSaliguori         if (backing_format_len) {
1232f965509cSaliguori             char zero[16];
1233e1c7f0e3SKevin Wolf             int padding = rounded_ext_bf_len - (ext_bf.len + sizeof(ext_bf));
1234f965509cSaliguori 
1235f965509cSaliguori             memset(zero, 0, sizeof(zero));
1236f965509cSaliguori             cpu_to_be32s(&ext_bf.magic);
1237f965509cSaliguori             cpu_to_be32s(&ext_bf.len);
1238db89119dSKirill A. Shutemov             ret = qemu_write_full(fd, &ext_bf, sizeof(ext_bf));
1239db89119dSKirill A. Shutemov             if (ret != sizeof(ext_bf)) {
1240bef57da5SJuan Quintela                 ret = -errno;
1241db89119dSKirill A. Shutemov                 goto exit;
1242db89119dSKirill A. Shutemov             }
1243db89119dSKirill A. Shutemov             ret = qemu_write_full(fd, backing_format, backing_format_len);
1244db89119dSKirill A. Shutemov             if (ret != backing_format_len) {
1245bef57da5SJuan Quintela                 ret = -errno;
1246db89119dSKirill A. Shutemov                 goto exit;
1247db89119dSKirill A. Shutemov             }
1248e1c7f0e3SKevin Wolf             if (padding > 0) {
1249db89119dSKirill A. Shutemov                 ret = qemu_write_full(fd, zero, padding);
1250db89119dSKirill A. Shutemov                 if (ret != padding) {
1251bef57da5SJuan Quintela                     ret = -errno;
1252db89119dSKirill A. Shutemov                     goto exit;
1253f965509cSaliguori                 }
1254f965509cSaliguori             }
1255db89119dSKirill A. Shutemov         }
1256db89119dSKirill A. Shutemov         ret = qemu_write_full(fd, backing_file, backing_filename_len);
1257db89119dSKirill A. Shutemov         if (ret != backing_filename_len) {
1258bef57da5SJuan Quintela             ret = -errno;
1259db89119dSKirill A. Shutemov             goto exit;
1260db89119dSKirill A. Shutemov         }
1261585f8587Sbellard     }
1262585f8587Sbellard     lseek(fd, s->l1_table_offset, SEEK_SET);
1263585f8587Sbellard     tmp = 0;
1264585f8587Sbellard     for(i = 0;i < l1_size; i++) {
1265db89119dSKirill A. Shutemov         ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1266db89119dSKirill A. Shutemov         if (ret != sizeof(tmp)) {
1267bef57da5SJuan Quintela             ret = -errno;
1268db89119dSKirill A. Shutemov             goto exit;
1269db89119dSKirill A. Shutemov         }
1270585f8587Sbellard     }
1271585f8587Sbellard     lseek(fd, s->refcount_table_offset, SEEK_SET);
12724768fa90SKevin Wolf     ret = qemu_write_full(fd, s->refcount_table,
12734768fa90SKevin Wolf         reftable_clusters * s->cluster_size);
12744768fa90SKevin Wolf     if (ret != reftable_clusters * s->cluster_size) {
1275bef57da5SJuan Quintela         ret = -errno;
1276db89119dSKirill A. Shutemov         goto exit;
1277db89119dSKirill A. Shutemov     }
1278585f8587Sbellard 
1279585f8587Sbellard     lseek(fd, s->refcount_block_offset, SEEK_SET);
1280db89119dSKirill A. Shutemov     ret = qemu_write_full(fd, s->refcount_block,
1281db89119dSKirill A. Shutemov 		    ref_clusters * s->cluster_size);
12826f745bdaSKevin Wolf     if (ret != ref_clusters * s->cluster_size) {
1283bef57da5SJuan Quintela         ret = -errno;
1284db89119dSKirill A. Shutemov         goto exit;
1285db89119dSKirill A. Shutemov     }
1286585f8587Sbellard 
1287db89119dSKirill A. Shutemov     ret = 0;
1288db89119dSKirill A. Shutemov exit:
1289585f8587Sbellard     qemu_free(s->refcount_table);
1290585f8587Sbellard     qemu_free(s->refcount_block);
1291585f8587Sbellard     close(fd);
1292a35e1c17SKevin Wolf 
1293a35e1c17SKevin Wolf     /* Preallocate metadata */
12946f745bdaSKevin Wolf     if (ret == 0 && prealloc) {
1295a35e1c17SKevin Wolf         BlockDriverState *bs;
1296a35e1c17SKevin Wolf         bs = bdrv_new("");
1297d6e9098eSKevin Wolf         bdrv_open(bs, filename, BDRV_O_CACHE_WB | BDRV_O_RDWR, &bdrv_qcow2);
1298a35e1c17SKevin Wolf         preallocate(bs);
1299a35e1c17SKevin Wolf         bdrv_close(bs);
1300a35e1c17SKevin Wolf     }
1301a35e1c17SKevin Wolf 
1302db89119dSKirill A. Shutemov     return ret;
1303585f8587Sbellard }
1304585f8587Sbellard 
13050e7e1989SKevin Wolf static int qcow_create(const char *filename, QEMUOptionParameter *options)
1306f965509cSaliguori {
13070e7e1989SKevin Wolf     const char *backing_file = NULL;
13080e7e1989SKevin Wolf     const char *backing_fmt = NULL;
13090e7e1989SKevin Wolf     uint64_t sectors = 0;
13100e7e1989SKevin Wolf     int flags = 0;
13119ccb258eSKevin Wolf     size_t cluster_size = 65536;
1312a35e1c17SKevin Wolf     int prealloc = 0;
13130e7e1989SKevin Wolf 
13140e7e1989SKevin Wolf     /* Read out options */
13150e7e1989SKevin Wolf     while (options && options->name) {
13160e7e1989SKevin Wolf         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
13170e7e1989SKevin Wolf             sectors = options->value.n / 512;
13180e7e1989SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
13190e7e1989SKevin Wolf             backing_file = options->value.s;
13200e7e1989SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
13210e7e1989SKevin Wolf             backing_fmt = options->value.s;
13220e7e1989SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
13230e7e1989SKevin Wolf             flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
132473c632edSKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
132573c632edSKevin Wolf             if (options->value.n) {
132673c632edSKevin Wolf                 cluster_size = options->value.n;
132773c632edSKevin Wolf             }
1328a35e1c17SKevin Wolf         } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
1329a35e1c17SKevin Wolf             if (!options->value.s || !strcmp(options->value.s, "off")) {
1330a35e1c17SKevin Wolf                 prealloc = 0;
1331a35e1c17SKevin Wolf             } else if (!strcmp(options->value.s, "metadata")) {
1332a35e1c17SKevin Wolf                 prealloc = 1;
1333a35e1c17SKevin Wolf             } else {
1334a35e1c17SKevin Wolf                 fprintf(stderr, "Invalid preallocation mode: '%s'\n",
1335a35e1c17SKevin Wolf                     options->value.s);
1336a35e1c17SKevin Wolf                 return -EINVAL;
1337a35e1c17SKevin Wolf             }
13380e7e1989SKevin Wolf         }
13390e7e1989SKevin Wolf         options++;
13400e7e1989SKevin Wolf     }
13410e7e1989SKevin Wolf 
1342a35e1c17SKevin Wolf     if (backing_file && prealloc) {
1343a35e1c17SKevin Wolf         fprintf(stderr, "Backing file and preallocation cannot be used at "
1344a35e1c17SKevin Wolf             "the same time\n");
1345a35e1c17SKevin Wolf         return -EINVAL;
1346a35e1c17SKevin Wolf     }
1347a35e1c17SKevin Wolf 
134873c632edSKevin Wolf     return qcow_create2(filename, sectors, backing_file, backing_fmt, flags,
1349a35e1c17SKevin Wolf         cluster_size, prealloc);
1350f965509cSaliguori }
1351f965509cSaliguori 
13525efa9d5aSAnthony Liguori static void bdrv_qcow2_init(void)
13535efa9d5aSAnthony Liguori {
13545efa9d5aSAnthony Liguori     bdrv_register(&bdrv_qcow2);
13555efa9d5aSAnthony Liguori }
13565efa9d5aSAnthony Liguori 
13575efa9d5aSAnthony Liguori block_init(bdrv_qcow2_init);
1358