#include "kvm/qcow.h" #include "kvm/disk-image.h" #include "kvm/read-write.h" #include "kvm/mutex.h" #include "kvm/util.h" #include #include #include #include #include #include #include #ifdef CONFIG_HAS_ZLIB #include #endif #include #include #include static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new) { struct rb_node **link = &(root->rb_node), *parent = NULL; u64 offset = new->offset; /* search the tree */ while (*link) { struct qcow_l2_table *t; t = rb_entry(*link, struct qcow_l2_table, node); if (!t) goto error; parent = *link; if (t->offset > offset) link = &(*link)->rb_left; else if (t->offset < offset) link = &(*link)->rb_right; else goto out; } /* add new node */ rb_link_node(&new->node, parent, link); rb_insert_color(&new->node, root); out: return 0; error: return -1; } static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset) { struct rb_node *link = root->rb_node; while (link) { struct qcow_l2_table *t; t = rb_entry(link, struct qcow_l2_table, node); if (!t) goto out; if (t->offset > offset) link = link->rb_left; else if (t->offset < offset) link = link->rb_right; else return t; } out: return NULL; } static void l1_table_free_cache(struct qcow_l1_table *l1t) { struct rb_root *r = &l1t->root; struct list_head *pos, *n; struct qcow_l2_table *t; list_for_each_safe(pos, n, &l1t->lru_list) { /* Remove cache table from the list and RB tree */ list_del(pos); t = list_entry(pos, struct qcow_l2_table, list); rb_erase(&t->node, r); /* Free the cached node */ free(t); } } static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) { struct qcow_header *header = q->header; u64 size; if (!c->dirty) return 0; size = 1 << header->l2_bits; if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0) return -1; c->dirty = 0; return 0; } static int cache_table(struct qcow *q, struct qcow_l2_table *c) { struct qcow_l1_table *l1t = &q->table; struct rb_root *r = &l1t->root; struct qcow_l2_table *lru; if (l1t->nr_cached == MAX_CACHE_NODES) { /* * The node at the head of the list is least recently used * node. Remove it from the list and replaced with a new node. */ lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list); if (qcow_l2_cache_write(q, lru) < 0) goto error; /* Remove the node from the cache */ rb_erase(&lru->node, r); list_del_init(&lru->list); l1t->nr_cached--; /* Free the LRUed node */ free(lru); } /* Add new node in RB Tree: Helps in searching faster */ if (l2_table_insert(r, c) < 0) goto error; /* Add in LRU replacement list */ list_add_tail(&c->list, &l1t->lru_list); l1t->nr_cached++; return 0; error: return -1; } static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset) { struct qcow_l1_table *l1t = &q->table; struct qcow_l2_table *l2t; l2t = l2_table_lookup(&l1t->root, offset); if (!l2t) return NULL; /* Update the LRU state, by moving the searched node to list tail */ list_move_tail(&l2t->list, &l1t->lru_list); return l2t; } /* Allocates a new node for caching L2 table */ static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; struct qcow_l2_table *c; u64 l2t_sz; u64 size; l2t_sz = 1 << header->l2_bits; size = sizeof(*c) + l2t_sz * sizeof(u64); c = calloc(1, size); if (!c) goto out; c->offset = offset; RB_CLEAR_NODE(&c->node); INIT_LIST_HEAD(&c->list); out: return c; } static inline u64 get_l1_index(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; return offset >> (header->l2_bits + header->cluster_bits); } static inline u64 get_l2_index(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); } static inline u64 get_cluster_offset(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; return offset & ((1 << header->cluster_bits)-1); } static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) { struct qcow_header *header = q->header; struct qcow_l2_table *l2t; u64 size; size = 1 << header->l2_bits; /* search an entry for offset in cache */ l2t = l2_table_search(q, offset); if (l2t) return l2t; /* allocate new node for caching l2 table */ l2t = new_cache_table(q, offset); if (!l2t) goto error; /* table not cached: read from the disk */ if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) goto error; /* cache the table */ if (cache_table(q, l2t) < 0) goto error; return l2t; error: free(l2t); return NULL; } static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size, const u8 *buf, int buf_size) { #ifdef CONFIG_HAS_ZLIB z_stream strm1, *strm = &strm1; int ret, out_len; memset(strm, 0, sizeof(*strm)); strm->next_in = (u8 *)buf; strm->avail_in = buf_size; strm->next_out = out_buf; strm->avail_out = out_buf_size; ret = inflateInit2(strm, -12); if (ret != Z_OK) return -1; ret = inflate(strm, Z_FINISH); out_len = strm->next_out - out_buf; if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || out_len != out_buf_size) { inflateEnd(strm); return -1; } inflateEnd(strm); return 0; #else return -1; #endif } static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) { struct qcow_header *header = q->header; struct qcow_l1_table *l1t = &q->table; struct qcow_l2_table *l2t; u64 clust_offset; u64 clust_start; u64 l2t_offset; size_t length; u64 l2t_size; u64 l1_idx; u64 l2_idx; int coffset; int csize; l1_idx = get_l1_index(q, offset); if (l1_idx >= l1t->table_size) return -1; clust_offset = get_cluster_offset(q, offset); if (clust_offset >= q->cluster_size) return -1; length = q->cluster_size - clust_offset; if (length > dst_len) length = dst_len; mutex_lock(&q->mutex); l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); if (!l2t_offset) goto zero_cluster; l2t_size = 1 << header->l2_bits; /* read and cache level 2 table */ l2t = qcow_read_l2_table(q, l2t_offset); if (!l2t) goto out_error; l2_idx = get_l2_index(q, offset); if (l2_idx >= l2t_size) goto out_error; clust_start = be64_to_cpu(l2t->table[l2_idx]); if (clust_start & QCOW1_OFLAG_COMPRESSED) { coffset = clust_start & q->cluster_offset_mask; csize = clust_start >> (63 - q->header->cluster_bits); csize &= (q->cluster_size - 1); if (pread_in_full(q->fd, q->cluster_data, csize, coffset) < 0) { goto out_error; } if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, q->cluster_data, csize) < 0) { goto out_error; } memcpy(dst, q->cluster_cache + clust_offset, length); mutex_unlock(&q->mutex); } else{ if (!clust_start) goto zero_cluster; mutex_unlock(&q->mutex); if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) return -1; } return length; zero_cluster: mutex_unlock(&q->mutex); memset(dst, 0, length); return length; out_error: mutex_unlock(&q->mutex); length = -1; return -1; } static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) { struct qcow_header *header = q->header; struct qcow_l1_table *l1t = &q->table; struct qcow_l2_table *l2t; u64 clust_offset; u64 clust_start; u64 l2t_offset; size_t length; u64 l2t_size; u64 l1_idx; u64 l2_idx; int coffset; int sector_offset; int nb_csectors; int csize; l1_idx = get_l1_index(q, offset); if (l1_idx >= l1t->table_size) return -1; clust_offset = get_cluster_offset(q, offset); if (clust_offset >= q->cluster_size) return -1; length = q->cluster_size - clust_offset; if (length > dst_len) length = dst_len; mutex_lock(&q->mutex); l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); l2t_offset &= ~QCOW2_OFLAG_COPIED; if (!l2t_offset) goto zero_cluster; l2t_size = 1 << header->l2_bits; /* read and cache level 2 table */ l2t = qcow_read_l2_table(q, l2t_offset); if (!l2t) goto out_error; l2_idx = get_l2_index(q, offset); if (l2_idx >= l2t_size) goto out_error; clust_start = be64_to_cpu(l2t->table[l2_idx]); if (clust_start & QCOW2_OFLAG_COMPRESSED) { coffset = clust_start & q->cluster_offset_mask; nb_csectors = ((clust_start >> q->csize_shift) & q->csize_mask) + 1; sector_offset = coffset & (SECTOR_SIZE - 1); csize = nb_csectors * SECTOR_SIZE - sector_offset; if (pread_in_full(q->fd, q->cluster_data, nb_csectors * SECTOR_SIZE, coffset & ~(SECTOR_SIZE - 1)) < 0) { goto out_error; } if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, q->cluster_data + sector_offset, csize) < 0) { goto out_error; } memcpy(dst, q->cluster_cache + clust_offset, length); mutex_unlock(&q->mutex); } else{ clust_start &= QCOW2_OFFSET_MASK; if (!clust_start) goto zero_cluster; mutex_unlock(&q->mutex); if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) return -1; } return length; zero_cluster: mutex_unlock(&q->mutex); memset(dst, 0, length); return length; out_error: mutex_unlock(&q->mutex); length = -1; return -1; } static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) { struct qcow *q = disk->priv; struct qcow_header *header = q->header; u32 nr_read; u64 offset; char *buf; u32 nr; buf = dst; nr_read = 0; while (nr_read < dst_len) { offset = sector << SECTOR_SHIFT; if (offset >= header->size) return -1; if (q->version == QCOW1_VERSION) nr = qcow1_read_cluster(q, offset, buf, dst_len - nr_read); else nr = qcow2_read_cluster(q, offset, buf, dst_len - nr_read); if (nr <= 0) return -1; nr_read += nr; buf += nr; sector += (nr >> SECTOR_SHIFT); } return dst_len; } static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, const struct iovec *iov, int iovcount) { ssize_t nr, total = 0; while (iovcount--) { nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len); if (nr != (ssize_t)iov->iov_len) { pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); return -1; } sector += iov->iov_len >> SECTOR_SHIFT; iov++; total += nr; } return total; } static inline u64 file_size(int fd) { struct stat st; if (fstat(fd, &st) < 0) return 0; return st.st_size; } static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) { if (pwrite_in_full(fd, buf, count, offset) < 0) return -1; return fdatasync(fd); } /* Writes a level 2 table at the end of the file. */ static u64 qcow_write_l2_table(struct qcow *q, u64 *table) { struct qcow_header *header = q->header; u64 clust_sz; u64 f_sz; u64 off; u64 sz; f_sz = file_size(q->fd); if (!f_sz) return 0; sz = 1 << header->l2_bits; clust_sz = 1 << header->cluster_bits; off = ALIGN(f_sz, clust_sz); if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) return 0; return off; } static void refcount_table_free_cache(struct qcow_refcount_table *rft) { struct rb_root *r = &rft->root; struct list_head *pos, *n; struct qcow_refcount_block *t; list_for_each_safe(pos, n, &rft->lru_list) { list_del(pos); t = list_entry(pos, struct qcow_refcount_block, list); rb_erase(&t->node, r); free(t); } } static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new) { struct rb_node **link = &(root->rb_node), *parent = NULL; u64 offset = new->offset; /* search the tree */ while (*link) { struct qcow_refcount_block *t; t = rb_entry(*link, struct qcow_refcount_block, node); if (!t) goto error; parent = *link; if (t->offset > offset) link = &(*link)->rb_left; else if (t->offset < offset) link = &(*link)->rb_right; else goto out; } /* add new node */ rb_link_node(&new->node, parent, link); rb_insert_color(&new->node, root); out: return 0; error: return -1; } static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb) { if (!rfb->dirty) return 0; if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0) return -1; rfb->dirty = 0; return 0; } static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c) { struct qcow_refcount_table *rft = &q->refcount_table; struct rb_root *r = &rft->root; struct qcow_refcount_block *lru; if (rft->nr_cached == MAX_CACHE_NODES) { lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list); if (write_refcount_block(q, lru) < 0) goto error; rb_erase(&lru->node, r); list_del_init(&lru->list); rft->nr_cached--; free(lru); } if (refcount_block_insert(r, c) < 0) goto error; list_add_tail(&c->list, &rft->lru_list); rft->nr_cached++; return 0; error: return -1; } static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset) { struct qcow_refcount_block *rfb; rfb = malloc(sizeof *rfb + q->cluster_size); if (!rfb) return NULL; rfb->offset = rfb_offset; rfb->size = q->cluster_size / sizeof(u16); RB_CLEAR_NODE(&rfb->node); INIT_LIST_HEAD(&rfb->list); return rfb; } static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset) { struct rb_node *link = root->rb_node; while (link) { struct qcow_refcount_block *t; t = rb_entry(link, struct qcow_refcount_block, node); if (!t) goto out; if (t->offset > offset) link = link->rb_left; else if (t->offset < offset) link = link->rb_right; else return t; } out: return NULL; } static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset) { struct qcow_refcount_table *rft = &q->refcount_table; struct qcow_refcount_block *rfb; rfb = refcount_block_lookup(&rft->root, offset); if (!rfb) return NULL; /* Update the LRU state, by moving the searched node to list tail */ list_move_tail(&rfb->list, &rft->lru_list); return rfb; } static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx) { struct qcow_header *header = q->header; struct qcow_refcount_table *rft = &q->refcount_table; struct qcow_refcount_block *rfb; u64 rfb_offset; u64 rft_idx; rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT); if (rft_idx >= rft->rf_size) return NULL; rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]); rfb = refcount_block_search(q, rfb_offset); if (rfb) return rfb; rfb = new_refcount_block(q, rfb_offset); if (!rfb) return NULL; if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0) goto error_free_rfb; if (cache_refcount_block(q, rfb) < 0) goto error_free_rfb; return rfb; error_free_rfb: free(rfb); return NULL; } /* * QCOW file might grow during a write operation. Not only data but metadata is * also written at the end of the file. Therefore it is necessary to ensure * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to * synchronize the in-core state of QCOW image to disk. * * We also try to restore the image to a consistent state if the metdata * operation fails. The two metadat operations are: level 1 and level 2 table * update. If either of them fails the image is truncated to a consistent state. */ static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) { struct qcow_header *header = q->header; struct qcow_l1_table *l1t = &q->table; struct qcow_l2_table *l2t; u64 clust_start; u64 clust_flags; u64 l2t_offset; u64 clust_off; u64 l2t_size; u64 clust_sz; u64 l1t_idx; u64 l2t_idx; u64 f_sz; u64 len; l2t = NULL; l2t_size = 1 << header->l2_bits; clust_sz = 1 << header->cluster_bits; l1t_idx = get_l1_index(q, offset); if (l1t_idx >= l1t->table_size) return -1; l2t_idx = get_l2_index(q, offset); if (l2t_idx >= l2t_size) return -1; clust_off = get_cluster_offset(q, offset); if (clust_off >= clust_sz) return -1; len = clust_sz - clust_off; if (len > src_len) len = src_len; mutex_lock(&q->mutex); l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]); if (l2t_offset & QCOW2_OFLAG_COMPRESSED) { pr_warning("compressed clusters are not supported"); goto error; } if (!(l2t_offset & QCOW2_OFLAG_COPIED)) { pr_warning("L2 copy-on-write clusters are not supported"); goto error; } l2t_offset &= QCOW2_OFFSET_MASK; if (l2t_offset) { /* read and cache l2 table */ l2t = qcow_read_l2_table(q, l2t_offset); if (!l2t) goto error; } else { l2t = new_cache_table(q, l2t_offset); if (!l2t) goto error; /* Capture the state of the consistent QCOW image */ f_sz = file_size(q->fd); if (!f_sz) goto free_cache; /* Write the l2 table of 0's at the end of the file */ l2t_offset = qcow_write_l2_table(q, l2t->table); if (!l2t_offset) goto free_cache; if (cache_table(q, l2t) < 0) { if (ftruncate(q->fd, f_sz) < 0) goto free_cache; goto free_cache; } /* Update the in-core entry */ l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset); } /* Capture the state of the consistent QCOW image */ f_sz = file_size(q->fd); if (!f_sz) goto error; clust_start = be64_to_cpu(l2t->table[l2t_idx]); clust_flags = clust_start & QCOW2_OFLAGS_MASK; if (clust_flags & QCOW2_OFLAG_COMPRESSED) { pr_warning("compressed clusters are not supported"); goto error; } clust_start &= QCOW2_OFFSET_MASK; if (!clust_start) { clust_start = ALIGN(f_sz, clust_sz); l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED); l2t->dirty = 1; } if (!(clust_flags & QCOW2_OFLAG_COPIED)) { struct qcow_refcount_block *rfb = NULL; u16 clust_refcount; u64 clust_idx; u64 rfb_idx; clust_idx = (clust_start & QCOW2_OFFSET_MASK) >> (header->cluster_bits); rfb = qcow_read_refcount_block(q, clust_idx); if (!rfb) { pr_warning("L1: error while reading refcount table"); goto error; } rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); if (rfb_idx >= rfb->size) { pr_warning("L1: refcount block index out of bounds"); goto error; } clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]); if (!clust_refcount) { clust_refcount = 1; rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount); rfb->dirty = 1; } if (clust_refcount > 1) { pr_warning("L1 copy-on-write clusters are not supported"); goto error; } } mutex_unlock(&q->mutex); /* Write actual data */ if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) return -1; return len; free_cache: free(l2t); error: mutex_unlock(&q->mutex); return -1; } static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len) { struct qcow *q = disk->priv; struct qcow_header *header = q->header; u32 nr_written; char *buf; u64 offset; ssize_t nr; buf = src; nr_written = 0; offset = sector << SECTOR_SHIFT; while (nr_written < src_len) { if (offset >= header->size) return -1; nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); if (nr < 0) return -1; nr_written += nr; buf += nr; offset += nr; } return nr_written; } static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, const struct iovec *iov, int iovcount) { ssize_t nr, total = 0; while (iovcount--) { nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len); if (nr != (ssize_t)iov->iov_len) { pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); return -1; } sector += iov->iov_len >> SECTOR_SHIFT; iov++; total += nr; } return total; } static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, const struct iovec *iov, int iovcount) { /* I/O error */ pr_info("%s: no write support\n", __func__); return -1; } static int qcow_disk_flush(struct disk_image *disk) { struct qcow *q = disk->priv; struct qcow_refcount_table *rft; struct qcow_header *header; struct list_head *pos, *n; struct qcow_l1_table *l1t; header = q->header; l1t = &q->table; rft = &q->refcount_table; mutex_lock(&q->mutex); list_for_each_safe(pos, n, &rft->lru_list) { struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list); if (write_refcount_block(q, c) < 0) goto error_unlock; } if (fdatasync(disk->fd) < 0) goto error_unlock; list_for_each_safe(pos, n, &l1t->lru_list) { struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); if (qcow_l2_cache_write(q, c) < 0) goto error_unlock; } if (fdatasync(disk->fd) < 0) goto error_unlock; if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0) goto error_unlock; mutex_unlock(&q->mutex); return fsync(disk->fd); error_unlock: mutex_unlock(&q->mutex); return -1; } static int qcow_disk_close(struct disk_image *disk) { struct qcow *q; if (!disk) return 0; q = disk->priv; refcount_table_free_cache(&q->refcount_table); l1_table_free_cache(&q->table); free(q->cluster_data); free(q->cluster_cache); free(q->refcount_table.rf_table); free(q->table.l1_table); free(q->header); free(q); return 0; } static struct disk_image_operations qcow_disk_readonly_ops = { .read_sector = qcow_read_sector, .write_sector = qcow_nowrite_sector, .close = qcow_disk_close, }; static struct disk_image_operations qcow_disk_ops = { .read_sector = qcow_read_sector, .write_sector = qcow_write_sector, .flush = qcow_disk_flush, .close = qcow_disk_close, }; static int qcow_read_refcount_table(struct qcow *q) { struct qcow_header *header = q->header; struct qcow_refcount_table *rft = &q->refcount_table; rft->rf_size = (header->refcount_table_size * q->cluster_size) / sizeof(u64); rft->rf_table = calloc(rft->rf_size, sizeof(u64)); if (!rft->rf_table) return -1; rft->root = RB_ROOT; INIT_LIST_HEAD(&rft->lru_list); return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset); } static int qcow_read_l1_table(struct qcow *q) { struct qcow_header *header = q->header; struct qcow_l1_table *table = &q->table; table->table_size = header->l1_size; table->l1_table = calloc(table->table_size, sizeof(u64)); if (!table->l1_table) return -1; return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); } static void *qcow2_read_header(int fd) { struct qcow2_header_disk f_header; struct qcow_header *header; header = malloc(sizeof(struct qcow_header)); if (!header) return NULL; if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { free(header); return NULL; } be32_to_cpus(&f_header.magic); be32_to_cpus(&f_header.version); be64_to_cpus(&f_header.backing_file_offset); be32_to_cpus(&f_header.backing_file_size); be32_to_cpus(&f_header.cluster_bits); be64_to_cpus(&f_header.size); be32_to_cpus(&f_header.crypt_method); be32_to_cpus(&f_header.l1_size); be64_to_cpus(&f_header.l1_table_offset); be64_to_cpus(&f_header.refcount_table_offset); be32_to_cpus(&f_header.refcount_table_clusters); be32_to_cpus(&f_header.nb_snapshots); be64_to_cpus(&f_header.snapshots_offset); *header = (struct qcow_header) { .size = f_header.size, .l1_table_offset = f_header.l1_table_offset, .l1_size = f_header.l1_size, .cluster_bits = f_header.cluster_bits, .l2_bits = f_header.cluster_bits - 3, .refcount_table_offset = f_header.refcount_table_offset, .refcount_table_size = f_header.refcount_table_clusters, }; return header; } static struct disk_image *qcow2_probe(int fd, bool readonly) { struct disk_image *disk_image; struct qcow_l1_table *l1t; struct qcow_header *h; struct qcow *q; q = calloc(1, sizeof(struct qcow)); if (!q) return NULL; mutex_init(&q->mutex); q->fd = fd; l1t = &q->table; l1t->root = RB_ROOT; INIT_LIST_HEAD(&l1t->lru_list); h = q->header = qcow2_read_header(fd); if (!h) goto free_qcow; q->version = QCOW2_VERSION; q->csize_shift = (62 - (q->header->cluster_bits - 8)); q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1; q->cluster_offset_mask = (1LL << q->csize_shift) - 1; q->cluster_size = 1 << q->header->cluster_bits; q->cluster_data = malloc(q->cluster_size); if (!q->cluster_data) { pr_warning("cluster data malloc error!"); goto free_header; } q->cluster_cache = malloc(q->cluster_size); if (!q->cluster_cache) { pr_warning("cluster cache malloc error!"); goto free_cluster_data; } if (qcow_read_l1_table(q) < 0) goto free_cluster_cache; if (qcow_read_refcount_table(q) < 0) goto free_l1_table; /* * Do not use mmap use read/write instead */ if (readonly) disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); else disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); if (!disk_image) goto free_refcount_table; disk_image->priv = q; return disk_image; free_refcount_table: if (q->refcount_table.rf_table) free(q->refcount_table.rf_table); free_l1_table: if (q->table.l1_table) free(q->table.l1_table); free_cluster_cache: if (q->cluster_cache) free(q->cluster_cache); free_cluster_data: if (q->cluster_data) free(q->cluster_data); free_header: if (q->header) free(q->header); free_qcow: if (q) free(q); return NULL; } static bool qcow2_check_image(int fd) { struct qcow2_header_disk f_header; if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) return false; be32_to_cpus(&f_header.magic); be32_to_cpus(&f_header.version); if (f_header.magic != QCOW_MAGIC) return false; if (f_header.version != QCOW2_VERSION) return false; return true; } static void *qcow1_read_header(int fd) { struct qcow1_header_disk f_header; struct qcow_header *header; header = malloc(sizeof(struct qcow_header)); if (!header) return NULL; if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { free(header); return NULL; } be32_to_cpus(&f_header.magic); be32_to_cpus(&f_header.version); be64_to_cpus(&f_header.backing_file_offset); be32_to_cpus(&f_header.backing_file_size); be32_to_cpus(&f_header.mtime); be64_to_cpus(&f_header.size); be32_to_cpus(&f_header.crypt_method); be64_to_cpus(&f_header.l1_table_offset); *header = (struct qcow_header) { .size = f_header.size, .l1_table_offset = f_header.l1_table_offset, .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), .cluster_bits = f_header.cluster_bits, .l2_bits = f_header.l2_bits, }; return header; } static struct disk_image *qcow1_probe(int fd, bool readonly) { struct disk_image *disk_image; struct qcow_l1_table *l1t; struct qcow_header *h; struct qcow *q; q = calloc(1, sizeof(struct qcow)); if (!q) return NULL; mutex_init(&q->mutex); q->fd = fd; l1t = &q->table; l1t->root = RB_ROOT; INIT_LIST_HEAD(&l1t->lru_list); h = q->header = qcow1_read_header(fd); if (!h) goto free_qcow; q->version = QCOW1_VERSION; q->cluster_size = 1 << q->header->cluster_bits; q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1; q->cluster_data = malloc(q->cluster_size); if (!q->cluster_data) { pr_warning("cluster data malloc error!"); goto free_header; } q->cluster_cache = malloc(q->cluster_size); if (!q->cluster_cache) { pr_warning("cluster cache malloc error!"); goto free_cluster_data; } if (qcow_read_l1_table(q) < 0) goto free_cluster_cache; /* * Do not use mmap use read/write instead */ if (readonly) disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); else disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); if (!disk_image) goto free_l1_table; disk_image->priv = q; return disk_image; free_l1_table: if (q->table.l1_table) free(q->table.l1_table); free_cluster_cache: if (q->cluster_cache) free(q->cluster_cache); free_cluster_data: if (q->cluster_data) free(q->cluster_data); free_header: if (q->header) free(q->header); free_qcow: if (q) free(q); return NULL; } static bool qcow1_check_image(int fd) { struct qcow1_header_disk f_header; if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) return false; be32_to_cpus(&f_header.magic); be32_to_cpus(&f_header.version); if (f_header.magic != QCOW_MAGIC) return false; if (f_header.version != QCOW1_VERSION) return false; return true; } struct disk_image *qcow_probe(int fd, bool readonly) { if (qcow1_check_image(fd)) return qcow1_probe(fd, readonly); if (qcow2_check_image(fd)) return qcow2_probe(fd, readonly); return NULL; }