186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 15af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB 16af68c51aSLan Tianyu #include <zlib.h> 17af68c51aSLan Tianyu #endif 1886835cedSPrasad Joshi 1986835cedSPrasad Joshi #include <linux/byteorder.h> 20865c675fSPrasad Joshi #include <linux/kernel.h> 210df6b4d9SPekka Enberg #include <linux/types.h> 2286835cedSPrasad Joshi 23e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new) 243309045fSPrasad Joshi { 253309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 263309045fSPrasad Joshi u64 offset = new->offset; 273309045fSPrasad Joshi 283309045fSPrasad Joshi /* search the tree */ 293309045fSPrasad Joshi while (*link) { 30473d58ffSPekka Enberg struct qcow_l2_table *t; 313309045fSPrasad Joshi 32473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 333309045fSPrasad Joshi if (!t) 343309045fSPrasad Joshi goto error; 353309045fSPrasad Joshi 363309045fSPrasad Joshi parent = *link; 373309045fSPrasad Joshi 383309045fSPrasad Joshi if (t->offset > offset) 393309045fSPrasad Joshi link = &(*link)->rb_left; 403309045fSPrasad Joshi else if (t->offset < offset) 413309045fSPrasad Joshi link = &(*link)->rb_right; 423309045fSPrasad Joshi else 433309045fSPrasad Joshi goto out; 443309045fSPrasad Joshi } 453309045fSPrasad Joshi 463309045fSPrasad Joshi /* add new node */ 473309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 483309045fSPrasad Joshi rb_insert_color(&new->node, root); 493309045fSPrasad Joshi out: 503309045fSPrasad Joshi return 0; 513309045fSPrasad Joshi error: 523309045fSPrasad Joshi return -1; 533309045fSPrasad Joshi } 543309045fSPrasad Joshi 55e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset) 563309045fSPrasad Joshi { 573309045fSPrasad Joshi struct rb_node *link = root->rb_node; 583309045fSPrasad Joshi 593309045fSPrasad Joshi while (link) { 60473d58ffSPekka Enberg struct qcow_l2_table *t; 613309045fSPrasad Joshi 62473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 633309045fSPrasad Joshi if (!t) 643309045fSPrasad Joshi goto out; 653309045fSPrasad Joshi 663309045fSPrasad Joshi if (t->offset > offset) 673309045fSPrasad Joshi link = link->rb_left; 683309045fSPrasad Joshi else if (t->offset < offset) 693309045fSPrasad Joshi link = link->rb_right; 703309045fSPrasad Joshi else 713309045fSPrasad Joshi return t; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi out: 743309045fSPrasad Joshi return NULL; 753309045fSPrasad Joshi } 763309045fSPrasad Joshi 77e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t) 783309045fSPrasad Joshi { 797b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 803309045fSPrasad Joshi struct list_head *pos, *n; 81473d58ffSPekka Enberg struct qcow_l2_table *t; 823309045fSPrasad Joshi 837b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 843309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 853309045fSPrasad Joshi list_del(pos); 86473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 873309045fSPrasad Joshi rb_erase(&t->node, r); 883309045fSPrasad Joshi 893309045fSPrasad Joshi /* Free the cached node */ 903309045fSPrasad Joshi free(t); 913309045fSPrasad Joshi } 923309045fSPrasad Joshi } 933309045fSPrasad Joshi 94a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 95a4e46515SPekka Enberg { 96a4e46515SPekka Enberg struct qcow_header *header = q->header; 97a4e46515SPekka Enberg u64 size; 98a4e46515SPekka Enberg 99aff88976SPekka Enberg if (!c->dirty) 100aff88976SPekka Enberg return 0; 101aff88976SPekka Enberg 102a4e46515SPekka Enberg size = 1 << header->l2_bits; 103a4e46515SPekka Enberg 104aff88976SPekka Enberg if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0) 105aff88976SPekka Enberg return -1; 106aff88976SPekka Enberg 107aff88976SPekka Enberg c->dirty = 0; 108aff88976SPekka Enberg 109aff88976SPekka Enberg return 0; 110a4e46515SPekka Enberg } 111a4e46515SPekka Enberg 112473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1133309045fSPrasad Joshi { 1147b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 1157b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 116473d58ffSPekka Enberg struct qcow_l2_table *lru; 1173309045fSPrasad Joshi 1187b4eb530SPekka Enberg if (l1t->nr_cached == MAX_CACHE_NODES) { 1193309045fSPrasad Joshi /* 1203309045fSPrasad Joshi * The node at the head of the list is least recently used 1213309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1223309045fSPrasad Joshi */ 1237b4eb530SPekka Enberg lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list); 1243309045fSPrasad Joshi 125a4e46515SPekka Enberg if (qcow_l2_cache_write(q, lru) < 0) 126a4e46515SPekka Enberg goto error; 127a4e46515SPekka Enberg 1283309045fSPrasad Joshi /* Remove the node from the cache */ 1293309045fSPrasad Joshi rb_erase(&lru->node, r); 1303309045fSPrasad Joshi list_del_init(&lru->list); 1317b4eb530SPekka Enberg l1t->nr_cached--; 1323309045fSPrasad Joshi 1333309045fSPrasad Joshi /* Free the LRUed node */ 1343309045fSPrasad Joshi free(lru); 1353309045fSPrasad Joshi } 1363309045fSPrasad Joshi 1373309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 138e94cdf08SPekka Enberg if (l2_table_insert(r, c) < 0) 1393309045fSPrasad Joshi goto error; 1403309045fSPrasad Joshi 1413309045fSPrasad Joshi /* Add in LRU replacement list */ 1427b4eb530SPekka Enberg list_add_tail(&c->list, &l1t->lru_list); 1437b4eb530SPekka Enberg l1t->nr_cached++; 1443309045fSPrasad Joshi 1453309045fSPrasad Joshi return 0; 1463309045fSPrasad Joshi error: 1473309045fSPrasad Joshi return -1; 1483309045fSPrasad Joshi } 1493309045fSPrasad Joshi 150e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset) 1513309045fSPrasad Joshi { 1527b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 153fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1543309045fSPrasad Joshi 155e94cdf08SPekka Enberg l2t = l2_table_lookup(&l1t->root, offset); 156fe8bdde0SPekka Enberg if (!l2t) 157fe8bdde0SPekka Enberg return NULL; 1583309045fSPrasad Joshi 1593309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 1607b4eb530SPekka Enberg list_move_tail(&l2t->list, &l1t->lru_list); 1613309045fSPrasad Joshi 162fe8bdde0SPekka Enberg return l2t; 1633309045fSPrasad Joshi } 1643309045fSPrasad Joshi 1653309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 166473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1673309045fSPrasad Joshi { 1683309045fSPrasad Joshi struct qcow_header *header = q->header; 169473d58ffSPekka Enberg struct qcow_l2_table *c; 1703309045fSPrasad Joshi u64 l2t_sz; 1713309045fSPrasad Joshi u64 size; 1723309045fSPrasad Joshi 1733309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1743309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1753309045fSPrasad Joshi c = calloc(1, size); 1763309045fSPrasad Joshi if (!c) 1773309045fSPrasad Joshi goto out; 1783309045fSPrasad Joshi 1793309045fSPrasad Joshi c->offset = offset; 1803309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1813309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1823309045fSPrasad Joshi out: 1833309045fSPrasad Joshi return c; 1843309045fSPrasad Joshi } 1853309045fSPrasad Joshi 186742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 18786835cedSPrasad Joshi { 188ad627d62SPekka Enberg struct qcow_header *header = q->header; 18986835cedSPrasad Joshi 19086835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 19186835cedSPrasad Joshi } 19286835cedSPrasad Joshi 193742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 19486835cedSPrasad Joshi { 195ad627d62SPekka Enberg struct qcow_header *header = q->header; 19686835cedSPrasad Joshi 19786835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 19886835cedSPrasad Joshi } 19986835cedSPrasad Joshi 200742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 20186835cedSPrasad Joshi { 202ad627d62SPekka Enberg struct qcow_header *header = q->header; 20386835cedSPrasad Joshi 20486835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 20586835cedSPrasad Joshi } 20686835cedSPrasad Joshi 207fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 2083309045fSPrasad Joshi { 2093309045fSPrasad Joshi struct qcow_header *header = q->header; 210fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 2113309045fSPrasad Joshi u64 size; 2123309045fSPrasad Joshi 2133309045fSPrasad Joshi size = 1 << header->l2_bits; 2143309045fSPrasad Joshi 2153309045fSPrasad Joshi /* search an entry for offset in cache */ 216e94cdf08SPekka Enberg l2t = l2_table_search(q, offset); 217fe8bdde0SPekka Enberg if (l2t) 218fe8bdde0SPekka Enberg return l2t; 2193309045fSPrasad Joshi 2203309045fSPrasad Joshi /* allocate new node for caching l2 table */ 221fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 222fe8bdde0SPekka Enberg if (!l2t) 2233309045fSPrasad Joshi goto error; 2243309045fSPrasad Joshi 2253309045fSPrasad Joshi /* table not cached: read from the disk */ 226fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2273309045fSPrasad Joshi goto error; 2283309045fSPrasad Joshi 2293309045fSPrasad Joshi /* cache the table */ 230fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2313309045fSPrasad Joshi goto error; 2323309045fSPrasad Joshi 233fe8bdde0SPekka Enberg return l2t; 2343309045fSPrasad Joshi error: 235fe8bdde0SPekka Enberg free(l2t); 236fe8bdde0SPekka Enberg return NULL; 2373309045fSPrasad Joshi } 2383309045fSPrasad Joshi 239af68c51aSLan Tianyu static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size, 240af68c51aSLan Tianyu const u8 *buf, int buf_size) 241af68c51aSLan Tianyu { 242af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB 243af68c51aSLan Tianyu z_stream strm1, *strm = &strm1; 244af68c51aSLan Tianyu int ret, out_len; 245af68c51aSLan Tianyu 246af68c51aSLan Tianyu memset(strm, 0, sizeof(*strm)); 247af68c51aSLan Tianyu 248af68c51aSLan Tianyu strm->next_in = (u8 *)buf; 249af68c51aSLan Tianyu strm->avail_in = buf_size; 250af68c51aSLan Tianyu strm->next_out = out_buf; 251af68c51aSLan Tianyu strm->avail_out = out_buf_size; 252af68c51aSLan Tianyu 253af68c51aSLan Tianyu ret = inflateInit2(strm, -12); 254af68c51aSLan Tianyu if (ret != Z_OK) 255af68c51aSLan Tianyu return -1; 256af68c51aSLan Tianyu 257af68c51aSLan Tianyu ret = inflate(strm, Z_FINISH); 258af68c51aSLan Tianyu out_len = strm->next_out - out_buf; 259af68c51aSLan Tianyu if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 260af68c51aSLan Tianyu out_len != out_buf_size) { 261af68c51aSLan Tianyu inflateEnd(strm); 262af68c51aSLan Tianyu return -1; 263af68c51aSLan Tianyu } 264af68c51aSLan Tianyu 265af68c51aSLan Tianyu inflateEnd(strm); 266af68c51aSLan Tianyu return 0; 267af68c51aSLan Tianyu #else 268af68c51aSLan Tianyu return -1; 269af68c51aSLan Tianyu #endif 270af68c51aSLan Tianyu } 271af68c51aSLan Tianyu 272af68c51aSLan Tianyu static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, 273af68c51aSLan Tianyu void *dst, u32 dst_len) 27486835cedSPrasad Joshi { 275ad627d62SPekka Enberg struct qcow_header *header = q->header; 2763fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 2773fb67b93SPekka Enberg struct qcow_l2_table *l2t; 278742fce76SPrasad Joshi u64 clust_offset; 279742fce76SPrasad Joshi u64 clust_start; 2803fb67b93SPekka Enberg u64 l2t_offset; 281a51948ceSPekka Enberg size_t length; 2823fb67b93SPekka Enberg u64 l2t_size; 283742fce76SPrasad Joshi u64 l1_idx; 284742fce76SPrasad Joshi u64 l2_idx; 285af68c51aSLan Tianyu int coffset; 286af68c51aSLan Tianyu int csize; 28786835cedSPrasad Joshi 288c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2893fb67b93SPekka Enberg if (l1_idx >= l1t->table_size) 290c0799eb9SPekka Enberg return -1; 29186835cedSPrasad Joshi 2923dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 293af68c51aSLan Tianyu if (clust_offset >= q->cluster_size) 294c0799eb9SPekka Enberg return -1; 2953dac48d4SPrasad Joshi 296af68c51aSLan Tianyu length = q->cluster_size - clust_offset; 2973dac48d4SPrasad Joshi if (length > dst_len) 2983dac48d4SPrasad Joshi length = dst_len; 2993dac48d4SPrasad Joshi 300c0799eb9SPekka Enberg mutex_lock(&q->mutex); 301b2ebe61bSPekka Enberg 3023fb67b93SPekka Enberg l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); 3033fb67b93SPekka Enberg if (!l2t_offset) 3043dac48d4SPrasad Joshi goto zero_cluster; 30586835cedSPrasad Joshi 3063fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 30786835cedSPrasad Joshi 3083309045fSPrasad Joshi /* read and cache level 2 table */ 3093fb67b93SPekka Enberg l2t = qcow_read_l2_table(q, l2t_offset); 3103fb67b93SPekka Enberg if (!l2t) 311b6edb0ecSSasha Levin goto out_error; 31286835cedSPrasad Joshi 313c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 3143fb67b93SPekka Enberg if (l2_idx >= l2t_size) 315b6edb0ecSSasha Levin goto out_error; 31686835cedSPrasad Joshi 3173fb67b93SPekka Enberg clust_start = be64_to_cpu(l2t->table[l2_idx]); 318af68c51aSLan Tianyu if (clust_start & QCOW1_OFLAG_COMPRESSED) { 319af68c51aSLan Tianyu coffset = clust_start & q->cluster_offset_mask; 320af68c51aSLan Tianyu csize = clust_start >> (63 - q->header->cluster_bits); 321af68c51aSLan Tianyu csize &= (q->cluster_size - 1); 322af68c51aSLan Tianyu 323af68c51aSLan Tianyu if (pread_in_full(q->fd, q->cluster_data, csize, 324af68c51aSLan Tianyu coffset) < 0) { 325b2ebe61bSPekka Enberg goto out_error; 326b2ebe61bSPekka Enberg } 327b2ebe61bSPekka Enberg 328af68c51aSLan Tianyu if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, 329af68c51aSLan Tianyu q->cluster_data, csize) < 0) { 330af68c51aSLan Tianyu goto out_error; 331af68c51aSLan Tianyu } 332af68c51aSLan Tianyu 333af68c51aSLan Tianyu memcpy(dst, q->cluster_cache + clust_offset, length); 334af68c51aSLan Tianyu mutex_unlock(&q->mutex); 335af68c51aSLan Tianyu } else{ 33686835cedSPrasad Joshi if (!clust_start) 3373dac48d4SPrasad Joshi goto zero_cluster; 33886835cedSPrasad Joshi 339c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 34086835cedSPrasad Joshi 341af68c51aSLan Tianyu if (pread_in_full(q->fd, dst, length, 342af68c51aSLan Tianyu clust_start + clust_offset) < 0) 343c0799eb9SPekka Enberg return -1; 344af68c51aSLan Tianyu } 345c0799eb9SPekka Enberg 3463dac48d4SPrasad Joshi return length; 34786835cedSPrasad Joshi 348179b71f0SPekka Enberg zero_cluster: 349c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 350179b71f0SPekka Enberg memset(dst, 0, length); 351c0799eb9SPekka Enberg return length; 352179b71f0SPekka Enberg 35386835cedSPrasad Joshi out_error: 354c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 355179b71f0SPekka Enberg length = -1; 356c0799eb9SPekka Enberg return -1; 3573dac48d4SPrasad Joshi } 358b6edb0ecSSasha Levin 359af68c51aSLan Tianyu static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset, 360af68c51aSLan Tianyu void *dst, u32 dst_len) 361af68c51aSLan Tianyu { 362af68c51aSLan Tianyu struct qcow_header *header = q->header; 363af68c51aSLan Tianyu struct qcow_l1_table *l1t = &q->table; 364af68c51aSLan Tianyu struct qcow_l2_table *l2t; 365af68c51aSLan Tianyu u64 clust_offset; 366af68c51aSLan Tianyu u64 clust_start; 367af68c51aSLan Tianyu u64 l2t_offset; 368af68c51aSLan Tianyu size_t length; 369af68c51aSLan Tianyu u64 l2t_size; 370af68c51aSLan Tianyu u64 l1_idx; 371af68c51aSLan Tianyu u64 l2_idx; 372af68c51aSLan Tianyu int coffset; 373af68c51aSLan Tianyu int sector_offset; 374af68c51aSLan Tianyu int nb_csectors; 375af68c51aSLan Tianyu int csize; 376af68c51aSLan Tianyu 377af68c51aSLan Tianyu l1_idx = get_l1_index(q, offset); 378af68c51aSLan Tianyu if (l1_idx >= l1t->table_size) 379af68c51aSLan Tianyu return -1; 380af68c51aSLan Tianyu 381af68c51aSLan Tianyu clust_offset = get_cluster_offset(q, offset); 382af68c51aSLan Tianyu if (clust_offset >= q->cluster_size) 383af68c51aSLan Tianyu return -1; 384af68c51aSLan Tianyu 385af68c51aSLan Tianyu length = q->cluster_size - clust_offset; 386af68c51aSLan Tianyu if (length > dst_len) 387af68c51aSLan Tianyu length = dst_len; 388af68c51aSLan Tianyu 389af68c51aSLan Tianyu mutex_lock(&q->mutex); 390af68c51aSLan Tianyu 391af68c51aSLan Tianyu l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); 392af68c51aSLan Tianyu 393af68c51aSLan Tianyu l2t_offset &= ~QCOW2_OFLAG_COPIED; 394af68c51aSLan Tianyu if (!l2t_offset) 395af68c51aSLan Tianyu goto zero_cluster; 396af68c51aSLan Tianyu 397af68c51aSLan Tianyu l2t_size = 1 << header->l2_bits; 398af68c51aSLan Tianyu 399af68c51aSLan Tianyu /* read and cache level 2 table */ 400af68c51aSLan Tianyu l2t = qcow_read_l2_table(q, l2t_offset); 401af68c51aSLan Tianyu if (!l2t) 402af68c51aSLan Tianyu goto out_error; 403af68c51aSLan Tianyu 404af68c51aSLan Tianyu l2_idx = get_l2_index(q, offset); 405af68c51aSLan Tianyu if (l2_idx >= l2t_size) 406af68c51aSLan Tianyu goto out_error; 407af68c51aSLan Tianyu 408af68c51aSLan Tianyu clust_start = be64_to_cpu(l2t->table[l2_idx]); 409af68c51aSLan Tianyu if (clust_start & QCOW2_OFLAG_COMPRESSED) { 410af68c51aSLan Tianyu coffset = clust_start & q->cluster_offset_mask; 411af68c51aSLan Tianyu nb_csectors = ((clust_start >> q->csize_shift) 412af68c51aSLan Tianyu & q->csize_mask) + 1; 413af68c51aSLan Tianyu sector_offset = coffset & (SECTOR_SIZE - 1); 414af68c51aSLan Tianyu csize = nb_csectors * SECTOR_SIZE - sector_offset; 415af68c51aSLan Tianyu 416af68c51aSLan Tianyu if (pread_in_full(q->fd, q->cluster_data, 417af68c51aSLan Tianyu nb_csectors * SECTOR_SIZE, 418af68c51aSLan Tianyu coffset & ~(SECTOR_SIZE - 1)) < 0) { 419af68c51aSLan Tianyu goto out_error; 420af68c51aSLan Tianyu } 421af68c51aSLan Tianyu 422af68c51aSLan Tianyu if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, 423af68c51aSLan Tianyu q->cluster_data + sector_offset, 424af68c51aSLan Tianyu csize) < 0) { 425af68c51aSLan Tianyu goto out_error; 426af68c51aSLan Tianyu } 427af68c51aSLan Tianyu 428af68c51aSLan Tianyu memcpy(dst, q->cluster_cache + clust_offset, length); 429af68c51aSLan Tianyu mutex_unlock(&q->mutex); 430af68c51aSLan Tianyu } else{ 431af68c51aSLan Tianyu clust_start &= QCOW2_OFFSET_MASK; 432af68c51aSLan Tianyu if (!clust_start) 433af68c51aSLan Tianyu goto zero_cluster; 434af68c51aSLan Tianyu 435af68c51aSLan Tianyu mutex_unlock(&q->mutex); 436af68c51aSLan Tianyu 437af68c51aSLan Tianyu if (pread_in_full(q->fd, dst, length, 438af68c51aSLan Tianyu clust_start + clust_offset) < 0) 439af68c51aSLan Tianyu return -1; 440af68c51aSLan Tianyu } 441af68c51aSLan Tianyu 442af68c51aSLan Tianyu return length; 443af68c51aSLan Tianyu 444af68c51aSLan Tianyu zero_cluster: 445af68c51aSLan Tianyu mutex_unlock(&q->mutex); 446af68c51aSLan Tianyu memset(dst, 0, length); 447af68c51aSLan Tianyu return length; 448af68c51aSLan Tianyu 449af68c51aSLan Tianyu out_error: 450af68c51aSLan Tianyu mutex_unlock(&q->mutex); 451af68c51aSLan Tianyu length = -1; 452af68c51aSLan Tianyu return -1; 453af68c51aSLan Tianyu } 454af68c51aSLan Tianyu 455*2534c9b6SSasha Levin static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector, 456af68c51aSLan Tianyu void *dst, u32 dst_len) 4573dac48d4SPrasad Joshi { 45843835ac9SSasha Levin struct qcow *q = disk->priv; 459ad627d62SPekka Enberg struct qcow_header *header = q->header; 460d8eea993SPekka Enberg u32 nr_read; 4610df6b4d9SPekka Enberg u64 offset; 4620df6b4d9SPekka Enberg char *buf; 4633dac48d4SPrasad Joshi u32 nr; 4643dac48d4SPrasad Joshi 4650df6b4d9SPekka Enberg buf = dst; 466d8eea993SPekka Enberg nr_read = 0; 4670df6b4d9SPekka Enberg 468d8eea993SPekka Enberg while (nr_read < dst_len) { 4693dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 4703dac48d4SPrasad Joshi if (offset >= header->size) 4710df6b4d9SPekka Enberg return -1; 4723dac48d4SPrasad Joshi 473af68c51aSLan Tianyu if (q->version == QCOW1_VERSION) 474af68c51aSLan Tianyu nr = qcow1_read_cluster(q, offset, buf, 475af68c51aSLan Tianyu dst_len - nr_read); 476af68c51aSLan Tianyu else 477af68c51aSLan Tianyu nr = qcow2_read_cluster(q, offset, buf, 478af68c51aSLan Tianyu dst_len - nr_read); 479af68c51aSLan Tianyu 480a51948ceSPekka Enberg if (nr <= 0) 4810df6b4d9SPekka Enberg return -1; 4823dac48d4SPrasad Joshi 483d8eea993SPekka Enberg nr_read += nr; 4843dac48d4SPrasad Joshi buf += nr; 4853dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 4863dac48d4SPrasad Joshi } 4870df6b4d9SPekka Enberg 48872133dd2SAsias He return dst_len; 48986835cedSPrasad Joshi } 49086835cedSPrasad Joshi 491*2534c9b6SSasha Levin static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, 492*2534c9b6SSasha Levin const struct iovec *iov, int iovcount) 493*2534c9b6SSasha Levin { 494*2534c9b6SSasha Levin ssize_t nr, total = 0; 495*2534c9b6SSasha Levin 496*2534c9b6SSasha Levin while (iovcount--) { 497*2534c9b6SSasha Levin nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len); 498*2534c9b6SSasha Levin if (nr != (ssize_t)iov->iov_len) { 499*2534c9b6SSasha Levin pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); 500*2534c9b6SSasha Levin return -1; 501*2534c9b6SSasha Levin } 502*2534c9b6SSasha Levin 503*2534c9b6SSasha Levin sector += iov->iov_len >> SECTOR_SHIFT; 504*2534c9b6SSasha Levin iov++; 505*2534c9b6SSasha Levin total += nr; 506*2534c9b6SSasha Levin } 507*2534c9b6SSasha Levin 508*2534c9b6SSasha Levin return total; 509*2534c9b6SSasha Levin } 510*2534c9b6SSasha Levin 511865c675fSPrasad Joshi static inline u64 file_size(int fd) 512865c675fSPrasad Joshi { 513865c675fSPrasad Joshi struct stat st; 5140df6b4d9SPekka Enberg 515865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 516865c675fSPrasad Joshi return 0; 5170df6b4d9SPekka Enberg 518865c675fSPrasad Joshi return st.st_size; 519865c675fSPrasad Joshi } 520865c675fSPrasad Joshi 5210df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 522865c675fSPrasad Joshi { 523865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 524865c675fSPrasad Joshi return -1; 5250df6b4d9SPekka Enberg 5267d94a719SPekka Enberg return fdatasync(fd); 527865c675fSPrasad Joshi } 528865c675fSPrasad Joshi 529865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 530b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 531865c675fSPrasad Joshi { 532865c675fSPrasad Joshi struct qcow_header *header = q->header; 533865c675fSPrasad Joshi u64 clust_sz; 534865c675fSPrasad Joshi u64 f_sz; 5350df6b4d9SPekka Enberg u64 off; 5360df6b4d9SPekka Enberg u64 sz; 537865c675fSPrasad Joshi 538865c675fSPrasad Joshi f_sz = file_size(q->fd); 539865c675fSPrasad Joshi if (!f_sz) 540865c675fSPrasad Joshi return 0; 541865c675fSPrasad Joshi 542865c675fSPrasad Joshi sz = 1 << header->l2_bits; 543865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 544865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 545865c675fSPrasad Joshi 5466fe151aeSPekka Enberg if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) 547865c675fSPrasad Joshi return 0; 5480df6b4d9SPekka Enberg 549865c675fSPrasad Joshi return off; 550865c675fSPrasad Joshi } 551865c675fSPrasad Joshi 5523ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft) 5533ecac800SPekka Enberg { 5543ecac800SPekka Enberg struct rb_root *r = &rft->root; 5553ecac800SPekka Enberg struct list_head *pos, *n; 5563ecac800SPekka Enberg struct qcow_refcount_block *t; 5573ecac800SPekka Enberg 5583ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 5593ecac800SPekka Enberg list_del(pos); 5603ecac800SPekka Enberg t = list_entry(pos, struct qcow_refcount_block, list); 5613ecac800SPekka Enberg rb_erase(&t->node, r); 5623ecac800SPekka Enberg 5633ecac800SPekka Enberg free(t); 5643ecac800SPekka Enberg } 5653ecac800SPekka Enberg } 5663ecac800SPekka Enberg 5673ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new) 5683ecac800SPekka Enberg { 5693ecac800SPekka Enberg struct rb_node **link = &(root->rb_node), *parent = NULL; 5703ecac800SPekka Enberg u64 offset = new->offset; 5713ecac800SPekka Enberg 5723ecac800SPekka Enberg /* search the tree */ 5733ecac800SPekka Enberg while (*link) { 5743ecac800SPekka Enberg struct qcow_refcount_block *t; 5753ecac800SPekka Enberg 5763ecac800SPekka Enberg t = rb_entry(*link, struct qcow_refcount_block, node); 5773ecac800SPekka Enberg if (!t) 5783ecac800SPekka Enberg goto error; 5793ecac800SPekka Enberg 5803ecac800SPekka Enberg parent = *link; 5813ecac800SPekka Enberg 5823ecac800SPekka Enberg if (t->offset > offset) 5833ecac800SPekka Enberg link = &(*link)->rb_left; 5843ecac800SPekka Enberg else if (t->offset < offset) 5853ecac800SPekka Enberg link = &(*link)->rb_right; 5863ecac800SPekka Enberg else 5873ecac800SPekka Enberg goto out; 5883ecac800SPekka Enberg } 5893ecac800SPekka Enberg 5903ecac800SPekka Enberg /* add new node */ 5913ecac800SPekka Enberg rb_link_node(&new->node, parent, link); 5923ecac800SPekka Enberg rb_insert_color(&new->node, root); 5933ecac800SPekka Enberg out: 5943ecac800SPekka Enberg return 0; 5953ecac800SPekka Enberg error: 5963ecac800SPekka Enberg return -1; 5973ecac800SPekka Enberg } 5983ecac800SPekka Enberg 5993ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb) 6003ecac800SPekka Enberg { 6013ecac800SPekka Enberg if (!rfb->dirty) 6023ecac800SPekka Enberg return 0; 6033ecac800SPekka Enberg 6043ecac800SPekka Enberg if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0) 6053ecac800SPekka Enberg return -1; 6063ecac800SPekka Enberg 6073ecac800SPekka Enberg rfb->dirty = 0; 6083ecac800SPekka Enberg 6093ecac800SPekka Enberg return 0; 6103ecac800SPekka Enberg } 6113ecac800SPekka Enberg 6123ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c) 6133ecac800SPekka Enberg { 6143ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 6153ecac800SPekka Enberg struct rb_root *r = &rft->root; 6163ecac800SPekka Enberg struct qcow_refcount_block *lru; 6173ecac800SPekka Enberg 6183ecac800SPekka Enberg if (rft->nr_cached == MAX_CACHE_NODES) { 6193ecac800SPekka Enberg lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list); 6203ecac800SPekka Enberg 6213ecac800SPekka Enberg if (write_refcount_block(q, lru) < 0) 6223ecac800SPekka Enberg goto error; 6233ecac800SPekka Enberg 6243ecac800SPekka Enberg rb_erase(&lru->node, r); 6253ecac800SPekka Enberg list_del_init(&lru->list); 6263ecac800SPekka Enberg rft->nr_cached--; 6273ecac800SPekka Enberg 6283ecac800SPekka Enberg free(lru); 6293ecac800SPekka Enberg } 6303ecac800SPekka Enberg 6313ecac800SPekka Enberg if (refcount_block_insert(r, c) < 0) 6323ecac800SPekka Enberg goto error; 6333ecac800SPekka Enberg 6343ecac800SPekka Enberg list_add_tail(&c->list, &rft->lru_list); 6353ecac800SPekka Enberg rft->nr_cached++; 6363ecac800SPekka Enberg 6373ecac800SPekka Enberg return 0; 6383ecac800SPekka Enberg error: 6393ecac800SPekka Enberg return -1; 6403ecac800SPekka Enberg } 6413ecac800SPekka Enberg 6423ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset) 6433ecac800SPekka Enberg { 6443ecac800SPekka Enberg struct qcow_refcount_block *rfb; 6453ecac800SPekka Enberg 646af68c51aSLan Tianyu rfb = malloc(sizeof *rfb + q->cluster_size); 6473ecac800SPekka Enberg if (!rfb) 6483ecac800SPekka Enberg return NULL; 6493ecac800SPekka Enberg 6503ecac800SPekka Enberg rfb->offset = rfb_offset; 651af68c51aSLan Tianyu rfb->size = q->cluster_size / sizeof(u16); 6523ecac800SPekka Enberg RB_CLEAR_NODE(&rfb->node); 6533ecac800SPekka Enberg INIT_LIST_HEAD(&rfb->list); 6543ecac800SPekka Enberg 6553ecac800SPekka Enberg return rfb; 6563ecac800SPekka Enberg } 6573ecac800SPekka Enberg 6583ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset) 6593ecac800SPekka Enberg { 6603ecac800SPekka Enberg struct rb_node *link = root->rb_node; 6613ecac800SPekka Enberg 6623ecac800SPekka Enberg while (link) { 6633ecac800SPekka Enberg struct qcow_refcount_block *t; 6643ecac800SPekka Enberg 6653ecac800SPekka Enberg t = rb_entry(link, struct qcow_refcount_block, node); 6663ecac800SPekka Enberg if (!t) 6673ecac800SPekka Enberg goto out; 6683ecac800SPekka Enberg 6693ecac800SPekka Enberg if (t->offset > offset) 6703ecac800SPekka Enberg link = link->rb_left; 6713ecac800SPekka Enberg else if (t->offset < offset) 6723ecac800SPekka Enberg link = link->rb_right; 6733ecac800SPekka Enberg else 6743ecac800SPekka Enberg return t; 6753ecac800SPekka Enberg } 6763ecac800SPekka Enberg out: 6773ecac800SPekka Enberg return NULL; 6783ecac800SPekka Enberg } 6793ecac800SPekka Enberg 6803ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset) 6813ecac800SPekka Enberg { 6823ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 6833ecac800SPekka Enberg struct qcow_refcount_block *rfb; 6843ecac800SPekka Enberg 6853ecac800SPekka Enberg rfb = refcount_block_lookup(&rft->root, offset); 6863ecac800SPekka Enberg if (!rfb) 6873ecac800SPekka Enberg return NULL; 6883ecac800SPekka Enberg 6893ecac800SPekka Enberg /* Update the LRU state, by moving the searched node to list tail */ 6903ecac800SPekka Enberg list_move_tail(&rfb->list, &rft->lru_list); 6913ecac800SPekka Enberg 6923ecac800SPekka Enberg return rfb; 6933ecac800SPekka Enberg } 6943ecac800SPekka Enberg 6953ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx) 6963ecac800SPekka Enberg { 6973ecac800SPekka Enberg struct qcow_header *header = q->header; 6983ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 6993ecac800SPekka Enberg struct qcow_refcount_block *rfb; 7003ecac800SPekka Enberg u64 rfb_offset; 7013ecac800SPekka Enberg u64 rft_idx; 7023ecac800SPekka Enberg 7033ecac800SPekka Enberg rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT); 7043ecac800SPekka Enberg if (rft_idx >= rft->rf_size) 7053ecac800SPekka Enberg return NULL; 7063ecac800SPekka Enberg 7073ecac800SPekka Enberg rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]); 7083ecac800SPekka Enberg 7093ecac800SPekka Enberg rfb = refcount_block_search(q, rfb_offset); 7103ecac800SPekka Enberg if (rfb) 7113ecac800SPekka Enberg return rfb; 7123ecac800SPekka Enberg 7133ecac800SPekka Enberg rfb = new_refcount_block(q, rfb_offset); 7143ecac800SPekka Enberg if (!rfb) 7153ecac800SPekka Enberg return NULL; 7163ecac800SPekka Enberg 7173ecac800SPekka Enberg if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0) 7183ecac800SPekka Enberg goto error_free_rfb; 7193ecac800SPekka Enberg 7203ecac800SPekka Enberg if (cache_refcount_block(q, rfb) < 0) 7213ecac800SPekka Enberg goto error_free_rfb; 7223ecac800SPekka Enberg 7233ecac800SPekka Enberg return rfb; 7243ecac800SPekka Enberg 7253ecac800SPekka Enberg error_free_rfb: 7263ecac800SPekka Enberg free(rfb); 7273ecac800SPekka Enberg 7283ecac800SPekka Enberg return NULL; 7293ecac800SPekka Enberg } 7303ecac800SPekka Enberg 731865c675fSPrasad Joshi /* 732865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 733865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 7340df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 735865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 736865c675fSPrasad Joshi * 737865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 738865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 739865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 740865c675fSPrasad Joshi */ 741b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 742865c675fSPrasad Joshi { 743865c675fSPrasad Joshi struct qcow_header *header = q->header; 7443fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 745fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 7460df6b4d9SPekka Enberg u64 clust_start; 7473ecac800SPekka Enberg u64 clust_flags; 7483fb67b93SPekka Enberg u64 l2t_offset; 7490df6b4d9SPekka Enberg u64 clust_off; 7503fb67b93SPekka Enberg u64 l2t_size; 751865c675fSPrasad Joshi u64 clust_sz; 752865c675fSPrasad Joshi u64 l1t_idx; 753865c675fSPrasad Joshi u64 l2t_idx; 754865c675fSPrasad Joshi u64 f_sz; 7550df6b4d9SPekka Enberg u64 len; 756865c675fSPrasad Joshi 757fe8bdde0SPekka Enberg l2t = NULL; 7583fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 759865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 760865c675fSPrasad Joshi 761865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 7623fb67b93SPekka Enberg if (l1t_idx >= l1t->table_size) 763c0799eb9SPekka Enberg return -1; 764865c675fSPrasad Joshi 765865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 7663fb67b93SPekka Enberg if (l2t_idx >= l2t_size) 767c0799eb9SPekka Enberg return -1; 768865c675fSPrasad Joshi 769865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 770865c675fSPrasad Joshi if (clust_off >= clust_sz) 771c0799eb9SPekka Enberg return -1; 772865c675fSPrasad Joshi 773865c675fSPrasad Joshi len = clust_sz - clust_off; 774865c675fSPrasad Joshi if (len > src_len) 775865c675fSPrasad Joshi len = src_len; 776865c675fSPrasad Joshi 777c0799eb9SPekka Enberg mutex_lock(&q->mutex); 778c0799eb9SPekka Enberg 7793fb67b93SPekka Enberg l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]); 780af68c51aSLan Tianyu if (l2t_offset & QCOW2_OFLAG_COMPRESSED) { 781121dd76eSPekka Enberg pr_warning("compressed clusters are not supported"); 782121dd76eSPekka Enberg goto error; 783121dd76eSPekka Enberg } 784af68c51aSLan Tianyu if (!(l2t_offset & QCOW2_OFLAG_COPIED)) { 7853ecac800SPekka Enberg pr_warning("L2 copy-on-write clusters are not supported"); 786b2ebe61bSPekka Enberg goto error; 787b2ebe61bSPekka Enberg } 788b2ebe61bSPekka Enberg 789af68c51aSLan Tianyu l2t_offset &= QCOW2_OFFSET_MASK; 7903fb67b93SPekka Enberg if (l2t_offset) { 7913309045fSPrasad Joshi /* read and cache l2 table */ 7923fb67b93SPekka Enberg l2t = qcow_read_l2_table(q, l2t_offset); 793fe8bdde0SPekka Enberg if (!l2t) 7943309045fSPrasad Joshi goto error; 795865c675fSPrasad Joshi } else { 7963fb67b93SPekka Enberg l2t = new_cache_table(q, l2t_offset); 797fe8bdde0SPekka Enberg if (!l2t) 7983309045fSPrasad Joshi goto error; 7993309045fSPrasad Joshi 8000df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 801865c675fSPrasad Joshi f_sz = file_size(q->fd); 802865c675fSPrasad Joshi if (!f_sz) 8033309045fSPrasad Joshi goto free_cache; 804865c675fSPrasad Joshi 805865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 8063fb67b93SPekka Enberg l2t_offset = qcow_write_l2_table(q, l2t->table); 8073fb67b93SPekka Enberg if (!l2t_offset) 8083309045fSPrasad Joshi goto free_cache; 809865c675fSPrasad Joshi 810fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 8113309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 8123309045fSPrasad Joshi goto free_cache; 8133309045fSPrasad Joshi 8143309045fSPrasad Joshi goto free_cache; 815865c675fSPrasad Joshi } 816865c675fSPrasad Joshi 8170df6b4d9SPekka Enberg /* Update the in-core entry */ 8183fb67b93SPekka Enberg l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset); 819865c675fSPrasad Joshi } 820865c675fSPrasad Joshi 8210df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 822865c675fSPrasad Joshi f_sz = file_size(q->fd); 823865c675fSPrasad Joshi if (!f_sz) 8243309045fSPrasad Joshi goto error; 825865c675fSPrasad Joshi 826b2ebe61bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]); 8273ecac800SPekka Enberg 828af68c51aSLan Tianyu clust_flags = clust_start & QCOW2_OFLAGS_MASK; 829af68c51aSLan Tianyu if (clust_flags & QCOW2_OFLAG_COMPRESSED) { 830121dd76eSPekka Enberg pr_warning("compressed clusters are not supported"); 831121dd76eSPekka Enberg goto error; 832121dd76eSPekka Enberg } 833b2ebe61bSPekka Enberg 834af68c51aSLan Tianyu clust_start &= QCOW2_OFFSET_MASK; 835865c675fSPrasad Joshi if (!clust_start) { 836865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 837af68c51aSLan Tianyu l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED); 838aff88976SPekka Enberg l2t->dirty = 1; 839865c675fSPrasad Joshi } 8400df6b4d9SPekka Enberg 841af68c51aSLan Tianyu if (!(clust_flags & QCOW2_OFLAG_COPIED)) { 8423ecac800SPekka Enberg struct qcow_refcount_block *rfb = NULL; 8433ecac800SPekka Enberg u16 clust_refcount; 8443ecac800SPekka Enberg u64 clust_idx; 8453ecac800SPekka Enberg u64 rfb_idx; 8463ecac800SPekka Enberg 847af68c51aSLan Tianyu clust_idx = (clust_start & QCOW2_OFFSET_MASK) 848af68c51aSLan Tianyu >> (header->cluster_bits); 8493ecac800SPekka Enberg 8503ecac800SPekka Enberg rfb = qcow_read_refcount_block(q, clust_idx); 8513ecac800SPekka Enberg if (!rfb) { 8523ecac800SPekka Enberg pr_warning("L1: error while reading refcount table"); 8533ecac800SPekka Enberg goto error; 8543ecac800SPekka Enberg } 8553ecac800SPekka Enberg 8563ecac800SPekka Enberg rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); 8573ecac800SPekka Enberg if (rfb_idx >= rfb->size) { 8583ecac800SPekka Enberg pr_warning("L1: refcount block index out of bounds"); 8593ecac800SPekka Enberg goto error; 8603ecac800SPekka Enberg } 8613ecac800SPekka Enberg 8623ecac800SPekka Enberg clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]); 8633ecac800SPekka Enberg if (!clust_refcount) { 8643ecac800SPekka Enberg clust_refcount = 1; 8653ecac800SPekka Enberg rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount); 8663ecac800SPekka Enberg rfb->dirty = 1; 8673ecac800SPekka Enberg } 8683ecac800SPekka Enberg 8693ecac800SPekka Enberg if (clust_refcount > 1) { 8703ecac800SPekka Enberg pr_warning("L1 copy-on-write clusters are not supported"); 8713ecac800SPekka Enberg goto error; 8723ecac800SPekka Enberg } 8733ecac800SPekka Enberg } 8743ecac800SPekka Enberg 875c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 876c0799eb9SPekka Enberg 877a4e46515SPekka Enberg /* Write actual data */ 878a4e46515SPekka Enberg if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 879a4e46515SPekka Enberg return -1; 880a4e46515SPekka Enberg 881865c675fSPrasad Joshi return len; 8823309045fSPrasad Joshi 8833309045fSPrasad Joshi free_cache: 884fe8bdde0SPekka Enberg free(l2t); 885865c675fSPrasad Joshi error: 886c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 887865c675fSPrasad Joshi return -1; 888865c675fSPrasad Joshi } 889865c675fSPrasad Joshi 890*2534c9b6SSasha Levin static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len) 89186835cedSPrasad Joshi { 892865c675fSPrasad Joshi struct qcow *q = disk->priv; 893865c675fSPrasad Joshi struct qcow_header *header = q->header; 894c4acb611SIngo Molnar u32 nr_written; 8950df6b4d9SPekka Enberg char *buf; 896865c675fSPrasad Joshi u64 offset; 897865c675fSPrasad Joshi ssize_t nr; 898865c675fSPrasad Joshi 8990df6b4d9SPekka Enberg buf = src; 9000df6b4d9SPekka Enberg nr_written = 0; 901865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 9020df6b4d9SPekka Enberg 9030df6b4d9SPekka Enberg while (nr_written < src_len) { 904865c675fSPrasad Joshi if (offset >= header->size) 9050df6b4d9SPekka Enberg return -1; 906865c675fSPrasad Joshi 907b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 908865c675fSPrasad Joshi if (nr < 0) 9090df6b4d9SPekka Enberg return -1; 910865c675fSPrasad Joshi 9110df6b4d9SPekka Enberg nr_written += nr; 912865c675fSPrasad Joshi buf += nr; 913865c675fSPrasad Joshi offset += nr; 914865c675fSPrasad Joshi } 9150df6b4d9SPekka Enberg 91672133dd2SAsias He return nr_written; 91786835cedSPrasad Joshi } 91886835cedSPrasad Joshi 919*2534c9b6SSasha Levin static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, 920*2534c9b6SSasha Levin const struct iovec *iov, int iovcount) 921*2534c9b6SSasha Levin { 922*2534c9b6SSasha Levin ssize_t nr, total = 0; 923*2534c9b6SSasha Levin 924*2534c9b6SSasha Levin while (iovcount--) { 925*2534c9b6SSasha Levin nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len); 926*2534c9b6SSasha Levin if (nr != (ssize_t)iov->iov_len) { 927*2534c9b6SSasha Levin pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); 928*2534c9b6SSasha Levin return -1; 929*2534c9b6SSasha Levin } 930*2534c9b6SSasha Levin 931*2534c9b6SSasha Levin sector += iov->iov_len >> SECTOR_SHIFT; 932*2534c9b6SSasha Levin iov++; 933*2534c9b6SSasha Levin total += nr; 934*2534c9b6SSasha Levin } 935*2534c9b6SSasha Levin 936*2534c9b6SSasha Levin return total; 937*2534c9b6SSasha Levin } 938*2534c9b6SSasha Levin 939*2534c9b6SSasha Levin static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, 940*2534c9b6SSasha Levin const struct iovec *iov, int iovcount) 941f10860caSPekka Enberg { 942f10860caSPekka Enberg /* I/O error */ 943b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 944f10860caSPekka Enberg return -1; 945f10860caSPekka Enberg } 946f10860caSPekka Enberg 947659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 948659f4186SPekka Enberg { 94973984b11SPekka Enberg struct qcow *q = disk->priv; 9503ecac800SPekka Enberg struct qcow_refcount_table *rft; 95173984b11SPekka Enberg struct qcow_header *header; 952a4e46515SPekka Enberg struct list_head *pos, *n; 9537b4eb530SPekka Enberg struct qcow_l1_table *l1t; 95473984b11SPekka Enberg 95573984b11SPekka Enberg header = q->header; 9567b4eb530SPekka Enberg l1t = &q->table; 9573ecac800SPekka Enberg rft = &q->refcount_table; 95873984b11SPekka Enberg 959a4e46515SPekka Enberg mutex_lock(&q->mutex); 960a4e46515SPekka Enberg 9613ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 9623ecac800SPekka Enberg struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list); 9633ecac800SPekka Enberg 9643ecac800SPekka Enberg if (write_refcount_block(q, c) < 0) 9653ecac800SPekka Enberg goto error_unlock; 9663ecac800SPekka Enberg } 9673ecac800SPekka Enberg 9683ecac800SPekka Enberg if (fdatasync(disk->fd) < 0) 9693ecac800SPekka Enberg goto error_unlock; 9703ecac800SPekka Enberg 9717b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 972a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 973a4e46515SPekka Enberg 974a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 975a4e46515SPekka Enberg goto error_unlock; 976a4e46515SPekka Enberg } 977a4e46515SPekka Enberg 978a4e46515SPekka Enberg if (fdatasync(disk->fd) < 0) 979a4e46515SPekka Enberg goto error_unlock; 980a4e46515SPekka Enberg 9817b4eb530SPekka Enberg if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0) 982a4e46515SPekka Enberg goto error_unlock; 983a4e46515SPekka Enberg 984a4e46515SPekka Enberg mutex_unlock(&q->mutex); 98573984b11SPekka Enberg 986659f4186SPekka Enberg return fsync(disk->fd); 987a4e46515SPekka Enberg 988a4e46515SPekka Enberg error_unlock: 989a4e46515SPekka Enberg mutex_unlock(&q->mutex); 990a4e46515SPekka Enberg return -1; 991659f4186SPekka Enberg } 992659f4186SPekka Enberg 993b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 99486835cedSPrasad Joshi { 99586835cedSPrasad Joshi struct qcow *q; 99686835cedSPrasad Joshi 99743835ac9SSasha Levin if (!disk) 99872133dd2SAsias He return 0; 99986835cedSPrasad Joshi 100043835ac9SSasha Levin q = disk->priv; 100186835cedSPrasad Joshi 10023ecac800SPekka Enberg refcount_table_free_cache(&q->refcount_table); 1003e94cdf08SPekka Enberg l1_table_free_cache(&q->table); 1004af68c51aSLan Tianyu free(q->cluster_data); 1005af68c51aSLan Tianyu free(q->cluster_cache); 10063ecac800SPekka Enberg free(q->refcount_table.rf_table); 10076c6f79b6SPrasad Joshi free(q->table.l1_table); 100886835cedSPrasad Joshi free(q->header); 100986835cedSPrasad Joshi free(q); 101072133dd2SAsias He 101172133dd2SAsias He return 0; 101286835cedSPrasad Joshi } 101386835cedSPrasad Joshi 1014b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 1015b1c84095SPekka Enberg .read_sector = qcow_read_sector, 1016b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 1017b1c84095SPekka Enberg .close = qcow_disk_close, 1018f10860caSPekka Enberg }; 1019f10860caSPekka Enberg 1020b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 1021b1c84095SPekka Enberg .read_sector = qcow_read_sector, 1022b1c84095SPekka Enberg .write_sector = qcow_write_sector, 1023659f4186SPekka Enberg .flush = qcow_disk_flush, 1024b1c84095SPekka Enberg .close = qcow_disk_close, 102586835cedSPrasad Joshi }; 102686835cedSPrasad Joshi 10273ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q) 10283ecac800SPekka Enberg { 10293ecac800SPekka Enberg struct qcow_header *header = q->header; 10303ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 10313ecac800SPekka Enberg 1032af68c51aSLan Tianyu rft->rf_size = (header->refcount_table_size * q->cluster_size) 1033af68c51aSLan Tianyu / sizeof(u64); 10343ecac800SPekka Enberg 10353ecac800SPekka Enberg rft->rf_table = calloc(rft->rf_size, sizeof(u64)); 10363ecac800SPekka Enberg if (!rft->rf_table) 10373ecac800SPekka Enberg return -1; 10383ecac800SPekka Enberg 10393ecac800SPekka Enberg rft->root = RB_ROOT; 10403ecac800SPekka Enberg INIT_LIST_HEAD(&rft->lru_list); 10413ecac800SPekka Enberg 10423ecac800SPekka Enberg return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset); 10433ecac800SPekka Enberg } 10443ecac800SPekka Enberg 104586835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 104686835cedSPrasad Joshi { 1047ad627d62SPekka Enberg struct qcow_header *header = q->header; 1048473aaa2dSPekka Enberg struct qcow_l1_table *table = &q->table; 104986835cedSPrasad Joshi 1050ad627d62SPekka Enberg table->table_size = header->l1_size; 105186835cedSPrasad Joshi 105200adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 105300adcc1bSPrasad Joshi if (!table->l1_table) 105486835cedSPrasad Joshi return -1; 105586835cedSPrasad Joshi 1056659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 105786835cedSPrasad Joshi } 105886835cedSPrasad Joshi 1059ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 106086835cedSPrasad Joshi { 1061ad627d62SPekka Enberg struct qcow2_header_disk f_header; 1062ad627d62SPekka Enberg struct qcow_header *header; 106386835cedSPrasad Joshi 1064ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 106586835cedSPrasad Joshi if (!header) 106686835cedSPrasad Joshi return NULL; 106786835cedSPrasad Joshi 10680657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 10690657f33dSPrasad Joshi free(header); 107086835cedSPrasad Joshi return NULL; 10710657f33dSPrasad Joshi } 107286835cedSPrasad Joshi 1073ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1074ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1075ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 1076ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 1077ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 1078ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 1079ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 1080ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 1081ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 1082ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 1083ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 1084ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 1085ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 1086ad627d62SPekka Enberg 1087ad627d62SPekka Enberg *header = (struct qcow_header) { 1088ad627d62SPekka Enberg .size = f_header.size, 1089ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 1090ad627d62SPekka Enberg .l1_size = f_header.l1_size, 1091ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 1092ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 10933ecac800SPekka Enberg .refcount_table_offset = f_header.refcount_table_offset, 10943ecac800SPekka Enberg .refcount_table_size = f_header.refcount_table_clusters, 1095ad627d62SPekka Enberg }; 1096ad627d62SPekka Enberg 1097ad627d62SPekka Enberg return header; 1098ad627d62SPekka Enberg } 1099ad627d62SPekka Enberg 1100f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 1101ad627d62SPekka Enberg { 1102ad627d62SPekka Enberg struct disk_image *disk_image; 11037b4eb530SPekka Enberg struct qcow_l1_table *l1t; 11047b4eb530SPekka Enberg struct qcow_header *h; 11057b4eb530SPekka Enberg struct qcow *q; 1106ad627d62SPekka Enberg 1107ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 1108ad627d62SPekka Enberg if (!q) 1109af68c51aSLan Tianyu return NULL; 1110ad627d62SPekka Enberg 1111c0799eb9SPekka Enberg mutex_init(&q->mutex); 1112ad627d62SPekka Enberg q->fd = fd; 11137b4eb530SPekka Enberg 11147b4eb530SPekka Enberg l1t = &q->table; 11157b4eb530SPekka Enberg 11167b4eb530SPekka Enberg l1t->root = RB_ROOT; 11177b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 1118ad627d62SPekka Enberg 1119ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 1120ad627d62SPekka Enberg if (!h) 1121af68c51aSLan Tianyu goto free_qcow; 1122af68c51aSLan Tianyu 1123af68c51aSLan Tianyu q->version = QCOW2_VERSION; 1124af68c51aSLan Tianyu q->csize_shift = (62 - (q->header->cluster_bits - 8)); 1125af68c51aSLan Tianyu q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1; 1126af68c51aSLan Tianyu q->cluster_offset_mask = (1LL << q->csize_shift) - 1; 1127af68c51aSLan Tianyu q->cluster_size = 1 << q->header->cluster_bits; 1128af68c51aSLan Tianyu 1129af68c51aSLan Tianyu q->cluster_data = malloc(q->cluster_size); 1130af68c51aSLan Tianyu if (!q->cluster_data) { 1131af68c51aSLan Tianyu pr_warning("cluster data malloc error!"); 1132af68c51aSLan Tianyu goto free_header; 1133af68c51aSLan Tianyu } 1134af68c51aSLan Tianyu 1135af68c51aSLan Tianyu q->cluster_cache = malloc(q->cluster_size); 1136af68c51aSLan Tianyu if (!q->cluster_cache) { 1137af68c51aSLan Tianyu pr_warning("cluster cache malloc error!"); 1138af68c51aSLan Tianyu goto free_cluster_data; 1139af68c51aSLan Tianyu } 1140ad627d62SPekka Enberg 1141ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 1142af68c51aSLan Tianyu goto free_cluster_cache; 1143ad627d62SPekka Enberg 11443ecac800SPekka Enberg if (qcow_read_refcount_table(q) < 0) 1145af68c51aSLan Tianyu goto free_l1_table; 11463ecac800SPekka Enberg 11477d22135fSAsias He /* 11487d22135fSAsias He * Do not use mmap use read/write instead 11497d22135fSAsias He */ 1150f10860caSPekka Enberg if (readonly) 115138c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); 1152f10860caSPekka Enberg else 115338c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); 1154f10860caSPekka Enberg 1155ad627d62SPekka Enberg if (!disk_image) 1156af68c51aSLan Tianyu goto free_refcount_table; 1157ad627d62SPekka Enberg disk_image->priv = q; 1158ad627d62SPekka Enberg 1159ad627d62SPekka Enberg return disk_image; 1160ad627d62SPekka Enberg 1161af68c51aSLan Tianyu free_refcount_table: 1162af68c51aSLan Tianyu if (q->refcount_table.rf_table) 1163af68c51aSLan Tianyu free(q->refcount_table.rf_table); 1164af68c51aSLan Tianyu free_l1_table: 1165af68c51aSLan Tianyu if (q->table.l1_table) 1166ad627d62SPekka Enberg free(q->table.l1_table); 1167af68c51aSLan Tianyu free_cluster_cache: 1168af68c51aSLan Tianyu if (q->cluster_cache) 1169af68c51aSLan Tianyu free(q->cluster_cache); 1170af68c51aSLan Tianyu free_cluster_data: 1171af68c51aSLan Tianyu if (q->cluster_data) 1172af68c51aSLan Tianyu free(q->cluster_data); 1173af68c51aSLan Tianyu free_header: 1174af68c51aSLan Tianyu if (q->header) 1175ad627d62SPekka Enberg free(q->header); 1176af68c51aSLan Tianyu free_qcow: 1177af68c51aSLan Tianyu if (q) 1178ad627d62SPekka Enberg free(q); 1179ad627d62SPekka Enberg 1180ad627d62SPekka Enberg return NULL; 1181ad627d62SPekka Enberg } 1182ad627d62SPekka Enberg 1183ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 1184ad627d62SPekka Enberg { 1185ad627d62SPekka Enberg struct qcow2_header_disk f_header; 1186ad627d62SPekka Enberg 1187ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 1188ad627d62SPekka Enberg return false; 1189ad627d62SPekka Enberg 1190ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1191ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1192ad627d62SPekka Enberg 1193ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 1194ad627d62SPekka Enberg return false; 1195ad627d62SPekka Enberg 1196ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 1197ad627d62SPekka Enberg return false; 1198ad627d62SPekka Enberg 1199ad627d62SPekka Enberg return true; 1200ad627d62SPekka Enberg } 1201ad627d62SPekka Enberg 1202ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 1203ad627d62SPekka Enberg { 1204ad627d62SPekka Enberg struct qcow1_header_disk f_header; 1205ad627d62SPekka Enberg struct qcow_header *header; 1206ad627d62SPekka Enberg 1207ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 1208ad627d62SPekka Enberg if (!header) 1209ad627d62SPekka Enberg return NULL; 1210ad627d62SPekka Enberg 1211d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 1212d39cefd2SSasha Levin free(header); 1213ad627d62SPekka Enberg return NULL; 1214d39cefd2SSasha Levin } 1215ad627d62SPekka Enberg 1216ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1217ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1218ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 1219ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 1220ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 1221ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 1222ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 1223ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 1224ad627d62SPekka Enberg 1225ad627d62SPekka Enberg *header = (struct qcow_header) { 1226ad627d62SPekka Enberg .size = f_header.size, 1227ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 1228ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 1229ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 1230ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 1231ad627d62SPekka Enberg }; 123286835cedSPrasad Joshi 123386835cedSPrasad Joshi return header; 123486835cedSPrasad Joshi } 123586835cedSPrasad Joshi 1236f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 123786835cedSPrasad Joshi { 123886835cedSPrasad Joshi struct disk_image *disk_image; 12397b4eb530SPekka Enberg struct qcow_l1_table *l1t; 12407b4eb530SPekka Enberg struct qcow_header *h; 12417b4eb530SPekka Enberg struct qcow *q; 124286835cedSPrasad Joshi 124386835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 124486835cedSPrasad Joshi if (!q) 1245af68c51aSLan Tianyu return NULL; 124686835cedSPrasad Joshi 1247c0799eb9SPekka Enberg mutex_init(&q->mutex); 124886835cedSPrasad Joshi q->fd = fd; 12497b4eb530SPekka Enberg 12507b4eb530SPekka Enberg l1t = &q->table; 12517b4eb530SPekka Enberg 12527b4eb530SPekka Enberg l1t->root = RB_ROOT; 12537b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 125486835cedSPrasad Joshi 125586835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 125686835cedSPrasad Joshi if (!h) 1257af68c51aSLan Tianyu goto free_qcow; 1258af68c51aSLan Tianyu 1259af68c51aSLan Tianyu q->version = QCOW1_VERSION; 1260af68c51aSLan Tianyu q->cluster_size = 1 << q->header->cluster_bits; 1261af68c51aSLan Tianyu q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1; 1262af68c51aSLan Tianyu 1263af68c51aSLan Tianyu q->cluster_data = malloc(q->cluster_size); 1264af68c51aSLan Tianyu if (!q->cluster_data) { 1265af68c51aSLan Tianyu pr_warning("cluster data malloc error!"); 1266af68c51aSLan Tianyu goto free_header; 1267af68c51aSLan Tianyu } 1268af68c51aSLan Tianyu 1269af68c51aSLan Tianyu q->cluster_cache = malloc(q->cluster_size); 1270af68c51aSLan Tianyu if (!q->cluster_cache) { 1271af68c51aSLan Tianyu pr_warning("cluster cache malloc error!"); 1272af68c51aSLan Tianyu goto free_cluster_data; 1273af68c51aSLan Tianyu } 127486835cedSPrasad Joshi 127586835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 1276af68c51aSLan Tianyu goto free_cluster_cache; 127786835cedSPrasad Joshi 12787d22135fSAsias He /* 12797d22135fSAsias He * Do not use mmap use read/write instead 12807d22135fSAsias He */ 1281f10860caSPekka Enberg if (readonly) 128238c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); 1283f10860caSPekka Enberg else 128438c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); 1285f10860caSPekka Enberg 128686835cedSPrasad Joshi if (!disk_image) 1287af68c51aSLan Tianyu goto free_l1_table; 128886835cedSPrasad Joshi disk_image->priv = q; 128986835cedSPrasad Joshi 129086835cedSPrasad Joshi return disk_image; 129186835cedSPrasad Joshi 1292af68c51aSLan Tianyu free_l1_table: 1293af68c51aSLan Tianyu if (q->table.l1_table) 12946c6f79b6SPrasad Joshi free(q->table.l1_table); 1295af68c51aSLan Tianyu free_cluster_cache: 1296af68c51aSLan Tianyu if (q->cluster_cache) 1297af68c51aSLan Tianyu free(q->cluster_cache); 1298af68c51aSLan Tianyu free_cluster_data: 1299af68c51aSLan Tianyu if (q->cluster_data) 1300af68c51aSLan Tianyu free(q->cluster_data); 1301af68c51aSLan Tianyu free_header: 1302af68c51aSLan Tianyu if (q->header) 130386835cedSPrasad Joshi free(q->header); 1304af68c51aSLan Tianyu free_qcow: 1305af68c51aSLan Tianyu if (q) 130686835cedSPrasad Joshi free(q); 130786835cedSPrasad Joshi 130886835cedSPrasad Joshi return NULL; 130986835cedSPrasad Joshi } 131086835cedSPrasad Joshi 1311ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 131286835cedSPrasad Joshi { 1313ad627d62SPekka Enberg struct qcow1_header_disk f_header; 131486835cedSPrasad Joshi 1315ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 1316ad627d62SPekka Enberg return false; 131786835cedSPrasad Joshi 1318ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1319ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 132086835cedSPrasad Joshi 1321ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 1322ad627d62SPekka Enberg return false; 132386835cedSPrasad Joshi 1324ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 1325ad627d62SPekka Enberg return false; 132686835cedSPrasad Joshi 1327ad627d62SPekka Enberg return true; 132886835cedSPrasad Joshi } 132986835cedSPrasad Joshi 1330f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 133186835cedSPrasad Joshi { 1332ad627d62SPekka Enberg if (qcow1_check_image(fd)) 1333f10860caSPekka Enberg return qcow1_probe(fd, readonly); 1334ad627d62SPekka Enberg 1335ad627d62SPekka Enberg if (qcow2_check_image(fd)) 1336f10860caSPekka Enberg return qcow2_probe(fd, readonly); 1337ad627d62SPekka Enberg 1338ad627d62SPekka Enberg return NULL; 133986835cedSPrasad Joshi } 1340