186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 152d2179c1SLan Tianyu #include <errno.h> 16af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB 17af68c51aSLan Tianyu #include <zlib.h> 18af68c51aSLan Tianyu #endif 1986835cedSPrasad Joshi 202d2179c1SLan Tianyu #include <linux/err.h> 2186835cedSPrasad Joshi #include <linux/byteorder.h> 22865c675fSPrasad Joshi #include <linux/kernel.h> 230df6b4d9SPekka Enberg #include <linux/types.h> 2486835cedSPrasad Joshi 252d2179c1SLan Tianyu static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append); 262d2179c1SLan Tianyu static int qcow_write_refcount_table(struct qcow *q); 272d2179c1SLan Tianyu static u64 qcow_alloc_clusters(struct qcow *q, u64 size, int update_ref); 282d2179c1SLan Tianyu static void qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size); 29e184700aSLan Tianyu 30e184700aSLan Tianyu static inline int qcow_pwrite_sync(int fd, 31e184700aSLan Tianyu void *buf, size_t count, off_t offset) 32e184700aSLan Tianyu { 33e184700aSLan Tianyu if (pwrite_in_full(fd, buf, count, offset) < 0) 34e184700aSLan Tianyu return -1; 35e184700aSLan Tianyu 36e184700aSLan Tianyu return fdatasync(fd); 37e184700aSLan Tianyu } 38e184700aSLan Tianyu 39e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new) 403309045fSPrasad Joshi { 413309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 423309045fSPrasad Joshi u64 offset = new->offset; 433309045fSPrasad Joshi 443309045fSPrasad Joshi /* search the tree */ 453309045fSPrasad Joshi while (*link) { 46473d58ffSPekka Enberg struct qcow_l2_table *t; 473309045fSPrasad Joshi 48473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 493309045fSPrasad Joshi if (!t) 503309045fSPrasad Joshi goto error; 513309045fSPrasad Joshi 523309045fSPrasad Joshi parent = *link; 533309045fSPrasad Joshi 543309045fSPrasad Joshi if (t->offset > offset) 553309045fSPrasad Joshi link = &(*link)->rb_left; 563309045fSPrasad Joshi else if (t->offset < offset) 573309045fSPrasad Joshi link = &(*link)->rb_right; 583309045fSPrasad Joshi else 593309045fSPrasad Joshi goto out; 603309045fSPrasad Joshi } 613309045fSPrasad Joshi 623309045fSPrasad Joshi /* add new node */ 633309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 643309045fSPrasad Joshi rb_insert_color(&new->node, root); 653309045fSPrasad Joshi out: 663309045fSPrasad Joshi return 0; 673309045fSPrasad Joshi error: 683309045fSPrasad Joshi return -1; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi 71e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset) 723309045fSPrasad Joshi { 733309045fSPrasad Joshi struct rb_node *link = root->rb_node; 743309045fSPrasad Joshi 753309045fSPrasad Joshi while (link) { 76473d58ffSPekka Enberg struct qcow_l2_table *t; 773309045fSPrasad Joshi 78473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 793309045fSPrasad Joshi if (!t) 803309045fSPrasad Joshi goto out; 813309045fSPrasad Joshi 823309045fSPrasad Joshi if (t->offset > offset) 833309045fSPrasad Joshi link = link->rb_left; 843309045fSPrasad Joshi else if (t->offset < offset) 853309045fSPrasad Joshi link = link->rb_right; 863309045fSPrasad Joshi else 873309045fSPrasad Joshi return t; 883309045fSPrasad Joshi } 893309045fSPrasad Joshi out: 903309045fSPrasad Joshi return NULL; 913309045fSPrasad Joshi } 923309045fSPrasad Joshi 93e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t) 943309045fSPrasad Joshi { 957b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 963309045fSPrasad Joshi struct list_head *pos, *n; 97473d58ffSPekka Enberg struct qcow_l2_table *t; 983309045fSPrasad Joshi 997b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 1003309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 1013309045fSPrasad Joshi list_del(pos); 102473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 1033309045fSPrasad Joshi rb_erase(&t->node, r); 1043309045fSPrasad Joshi 1053309045fSPrasad Joshi /* Free the cached node */ 1063309045fSPrasad Joshi free(t); 1073309045fSPrasad Joshi } 1083309045fSPrasad Joshi } 1093309045fSPrasad Joshi 110a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 111a4e46515SPekka Enberg { 112a4e46515SPekka Enberg struct qcow_header *header = q->header; 113a4e46515SPekka Enberg u64 size; 114a4e46515SPekka Enberg 115aff88976SPekka Enberg if (!c->dirty) 116aff88976SPekka Enberg return 0; 117aff88976SPekka Enberg 118a4e46515SPekka Enberg size = 1 << header->l2_bits; 119a4e46515SPekka Enberg 120e184700aSLan Tianyu if (qcow_pwrite_sync(q->fd, c->table, 121e184700aSLan Tianyu size * sizeof(u64), c->offset) < 0) 122aff88976SPekka Enberg return -1; 123aff88976SPekka Enberg 124aff88976SPekka Enberg c->dirty = 0; 125aff88976SPekka Enberg 126aff88976SPekka Enberg return 0; 127a4e46515SPekka Enberg } 128a4e46515SPekka Enberg 129473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1303309045fSPrasad Joshi { 1317b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 1327b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 133473d58ffSPekka Enberg struct qcow_l2_table *lru; 1343309045fSPrasad Joshi 1357b4eb530SPekka Enberg if (l1t->nr_cached == MAX_CACHE_NODES) { 1363309045fSPrasad Joshi /* 1373309045fSPrasad Joshi * The node at the head of the list is least recently used 1383309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1393309045fSPrasad Joshi */ 1407b4eb530SPekka Enberg lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list); 1413309045fSPrasad Joshi 1423309045fSPrasad Joshi /* Remove the node from the cache */ 1433309045fSPrasad Joshi rb_erase(&lru->node, r); 1443309045fSPrasad Joshi list_del_init(&lru->list); 1457b4eb530SPekka Enberg l1t->nr_cached--; 1463309045fSPrasad Joshi 1473309045fSPrasad Joshi /* Free the LRUed node */ 1483309045fSPrasad Joshi free(lru); 1493309045fSPrasad Joshi } 1503309045fSPrasad Joshi 1513309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 152e94cdf08SPekka Enberg if (l2_table_insert(r, c) < 0) 1533309045fSPrasad Joshi goto error; 1543309045fSPrasad Joshi 1553309045fSPrasad Joshi /* Add in LRU replacement list */ 1567b4eb530SPekka Enberg list_add_tail(&c->list, &l1t->lru_list); 1577b4eb530SPekka Enberg l1t->nr_cached++; 1583309045fSPrasad Joshi 1593309045fSPrasad Joshi return 0; 1603309045fSPrasad Joshi error: 1613309045fSPrasad Joshi return -1; 1623309045fSPrasad Joshi } 1633309045fSPrasad Joshi 164e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset) 1653309045fSPrasad Joshi { 1667b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 167fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1683309045fSPrasad Joshi 169e94cdf08SPekka Enberg l2t = l2_table_lookup(&l1t->root, offset); 170fe8bdde0SPekka Enberg if (!l2t) 171fe8bdde0SPekka Enberg return NULL; 1723309045fSPrasad Joshi 1733309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 1747b4eb530SPekka Enberg list_move_tail(&l2t->list, &l1t->lru_list); 1753309045fSPrasad Joshi 176fe8bdde0SPekka Enberg return l2t; 1773309045fSPrasad Joshi } 1783309045fSPrasad Joshi 1793309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 180473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1813309045fSPrasad Joshi { 1823309045fSPrasad Joshi struct qcow_header *header = q->header; 183473d58ffSPekka Enberg struct qcow_l2_table *c; 1843309045fSPrasad Joshi u64 l2t_sz; 1853309045fSPrasad Joshi u64 size; 1863309045fSPrasad Joshi 1873309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1883309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1893309045fSPrasad Joshi c = calloc(1, size); 1903309045fSPrasad Joshi if (!c) 1913309045fSPrasad Joshi goto out; 1923309045fSPrasad Joshi 1933309045fSPrasad Joshi c->offset = offset; 1943309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1953309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1963309045fSPrasad Joshi out: 1973309045fSPrasad Joshi return c; 1983309045fSPrasad Joshi } 1993309045fSPrasad Joshi 200742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 20186835cedSPrasad Joshi { 202ad627d62SPekka Enberg struct qcow_header *header = q->header; 20386835cedSPrasad Joshi 20486835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 20586835cedSPrasad Joshi } 20686835cedSPrasad Joshi 207742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 20886835cedSPrasad Joshi { 209ad627d62SPekka Enberg struct qcow_header *header = q->header; 21086835cedSPrasad Joshi 21186835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 21286835cedSPrasad Joshi } 21386835cedSPrasad Joshi 214742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 21586835cedSPrasad Joshi { 216ad627d62SPekka Enberg struct qcow_header *header = q->header; 21786835cedSPrasad Joshi 21886835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 21986835cedSPrasad Joshi } 22086835cedSPrasad Joshi 221fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 2223309045fSPrasad Joshi { 2233309045fSPrasad Joshi struct qcow_header *header = q->header; 224fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 2253309045fSPrasad Joshi u64 size; 2263309045fSPrasad Joshi 2273309045fSPrasad Joshi size = 1 << header->l2_bits; 2283309045fSPrasad Joshi 2293309045fSPrasad Joshi /* search an entry for offset in cache */ 230e94cdf08SPekka Enberg l2t = l2_table_search(q, offset); 231fe8bdde0SPekka Enberg if (l2t) 232fe8bdde0SPekka Enberg return l2t; 2333309045fSPrasad Joshi 2343309045fSPrasad Joshi /* allocate new node for caching l2 table */ 235fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 236fe8bdde0SPekka Enberg if (!l2t) 2373309045fSPrasad Joshi goto error; 2383309045fSPrasad Joshi 2393309045fSPrasad Joshi /* table not cached: read from the disk */ 240fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2413309045fSPrasad Joshi goto error; 2423309045fSPrasad Joshi 2433309045fSPrasad Joshi /* cache the table */ 244fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2453309045fSPrasad Joshi goto error; 2463309045fSPrasad Joshi 247fe8bdde0SPekka Enberg return l2t; 2483309045fSPrasad Joshi error: 249fe8bdde0SPekka Enberg free(l2t); 250fe8bdde0SPekka Enberg return NULL; 2513309045fSPrasad Joshi } 2523309045fSPrasad Joshi 253af68c51aSLan Tianyu static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size, 254af68c51aSLan Tianyu const u8 *buf, int buf_size) 255af68c51aSLan Tianyu { 256af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB 257af68c51aSLan Tianyu z_stream strm1, *strm = &strm1; 258af68c51aSLan Tianyu int ret, out_len; 259af68c51aSLan Tianyu 260af68c51aSLan Tianyu memset(strm, 0, sizeof(*strm)); 261af68c51aSLan Tianyu 262af68c51aSLan Tianyu strm->next_in = (u8 *)buf; 263af68c51aSLan Tianyu strm->avail_in = buf_size; 264af68c51aSLan Tianyu strm->next_out = out_buf; 265af68c51aSLan Tianyu strm->avail_out = out_buf_size; 266af68c51aSLan Tianyu 267af68c51aSLan Tianyu ret = inflateInit2(strm, -12); 268af68c51aSLan Tianyu if (ret != Z_OK) 269af68c51aSLan Tianyu return -1; 270af68c51aSLan Tianyu 271af68c51aSLan Tianyu ret = inflate(strm, Z_FINISH); 272af68c51aSLan Tianyu out_len = strm->next_out - out_buf; 273af68c51aSLan Tianyu if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 274af68c51aSLan Tianyu out_len != out_buf_size) { 275af68c51aSLan Tianyu inflateEnd(strm); 276af68c51aSLan Tianyu return -1; 277af68c51aSLan Tianyu } 278af68c51aSLan Tianyu 279af68c51aSLan Tianyu inflateEnd(strm); 280af68c51aSLan Tianyu return 0; 281af68c51aSLan Tianyu #else 282af68c51aSLan Tianyu return -1; 283af68c51aSLan Tianyu #endif 284af68c51aSLan Tianyu } 285af68c51aSLan Tianyu 286af68c51aSLan Tianyu static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset, 287af68c51aSLan Tianyu void *dst, u32 dst_len) 28886835cedSPrasad Joshi { 289ad627d62SPekka Enberg struct qcow_header *header = q->header; 2903fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 2913fb67b93SPekka Enberg struct qcow_l2_table *l2t; 292742fce76SPrasad Joshi u64 clust_offset; 293742fce76SPrasad Joshi u64 clust_start; 2943fb67b93SPekka Enberg u64 l2t_offset; 295a51948ceSPekka Enberg size_t length; 2963fb67b93SPekka Enberg u64 l2t_size; 297742fce76SPrasad Joshi u64 l1_idx; 298742fce76SPrasad Joshi u64 l2_idx; 299af68c51aSLan Tianyu int coffset; 300af68c51aSLan Tianyu int csize; 30186835cedSPrasad Joshi 302c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 3033fb67b93SPekka Enberg if (l1_idx >= l1t->table_size) 304c0799eb9SPekka Enberg return -1; 30586835cedSPrasad Joshi 3063dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 307af68c51aSLan Tianyu if (clust_offset >= q->cluster_size) 308c0799eb9SPekka Enberg return -1; 3093dac48d4SPrasad Joshi 310af68c51aSLan Tianyu length = q->cluster_size - clust_offset; 3113dac48d4SPrasad Joshi if (length > dst_len) 3123dac48d4SPrasad Joshi length = dst_len; 3133dac48d4SPrasad Joshi 314c0799eb9SPekka Enberg mutex_lock(&q->mutex); 315b2ebe61bSPekka Enberg 3163fb67b93SPekka Enberg l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); 3173fb67b93SPekka Enberg if (!l2t_offset) 3183dac48d4SPrasad Joshi goto zero_cluster; 31986835cedSPrasad Joshi 3203fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 32186835cedSPrasad Joshi 3223309045fSPrasad Joshi /* read and cache level 2 table */ 3233fb67b93SPekka Enberg l2t = qcow_read_l2_table(q, l2t_offset); 3243fb67b93SPekka Enberg if (!l2t) 325b6edb0ecSSasha Levin goto out_error; 32686835cedSPrasad Joshi 327c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 3283fb67b93SPekka Enberg if (l2_idx >= l2t_size) 329b6edb0ecSSasha Levin goto out_error; 33086835cedSPrasad Joshi 3313fb67b93SPekka Enberg clust_start = be64_to_cpu(l2t->table[l2_idx]); 332af68c51aSLan Tianyu if (clust_start & QCOW1_OFLAG_COMPRESSED) { 333af68c51aSLan Tianyu coffset = clust_start & q->cluster_offset_mask; 334af68c51aSLan Tianyu csize = clust_start >> (63 - q->header->cluster_bits); 335af68c51aSLan Tianyu csize &= (q->cluster_size - 1); 336af68c51aSLan Tianyu 337af68c51aSLan Tianyu if (pread_in_full(q->fd, q->cluster_data, csize, 3383a60be06SSasha Levin coffset) < 0) 339b2ebe61bSPekka Enberg goto out_error; 340b2ebe61bSPekka Enberg 341af68c51aSLan Tianyu if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, 3423a60be06SSasha Levin q->cluster_data, csize) < 0) 343af68c51aSLan Tianyu goto out_error; 344af68c51aSLan Tianyu 345af68c51aSLan Tianyu memcpy(dst, q->cluster_cache + clust_offset, length); 346af68c51aSLan Tianyu mutex_unlock(&q->mutex); 347af68c51aSLan Tianyu } else { 34886835cedSPrasad Joshi if (!clust_start) 3493dac48d4SPrasad Joshi goto zero_cluster; 35086835cedSPrasad Joshi 351c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 35286835cedSPrasad Joshi 353af68c51aSLan Tianyu if (pread_in_full(q->fd, dst, length, 354af68c51aSLan Tianyu clust_start + clust_offset) < 0) 355c0799eb9SPekka Enberg return -1; 356af68c51aSLan Tianyu } 357c0799eb9SPekka Enberg 3583dac48d4SPrasad Joshi return length; 35986835cedSPrasad Joshi 360179b71f0SPekka Enberg zero_cluster: 361c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 362179b71f0SPekka Enberg memset(dst, 0, length); 363c0799eb9SPekka Enberg return length; 364179b71f0SPekka Enberg 36586835cedSPrasad Joshi out_error: 366c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 367179b71f0SPekka Enberg length = -1; 368c0799eb9SPekka Enberg return -1; 3693dac48d4SPrasad Joshi } 370b6edb0ecSSasha Levin 371af68c51aSLan Tianyu static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset, 372af68c51aSLan Tianyu void *dst, u32 dst_len) 373af68c51aSLan Tianyu { 374af68c51aSLan Tianyu struct qcow_header *header = q->header; 375af68c51aSLan Tianyu struct qcow_l1_table *l1t = &q->table; 376af68c51aSLan Tianyu struct qcow_l2_table *l2t; 377af68c51aSLan Tianyu u64 clust_offset; 378af68c51aSLan Tianyu u64 clust_start; 379af68c51aSLan Tianyu u64 l2t_offset; 380af68c51aSLan Tianyu size_t length; 381af68c51aSLan Tianyu u64 l2t_size; 382af68c51aSLan Tianyu u64 l1_idx; 383af68c51aSLan Tianyu u64 l2_idx; 384af68c51aSLan Tianyu int coffset; 385af68c51aSLan Tianyu int sector_offset; 386af68c51aSLan Tianyu int nb_csectors; 387af68c51aSLan Tianyu int csize; 388af68c51aSLan Tianyu 389af68c51aSLan Tianyu l1_idx = get_l1_index(q, offset); 390af68c51aSLan Tianyu if (l1_idx >= l1t->table_size) 391af68c51aSLan Tianyu return -1; 392af68c51aSLan Tianyu 393af68c51aSLan Tianyu clust_offset = get_cluster_offset(q, offset); 394af68c51aSLan Tianyu if (clust_offset >= q->cluster_size) 395af68c51aSLan Tianyu return -1; 396af68c51aSLan Tianyu 397af68c51aSLan Tianyu length = q->cluster_size - clust_offset; 398af68c51aSLan Tianyu if (length > dst_len) 399af68c51aSLan Tianyu length = dst_len; 400af68c51aSLan Tianyu 401af68c51aSLan Tianyu mutex_lock(&q->mutex); 402af68c51aSLan Tianyu 403af68c51aSLan Tianyu l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); 404af68c51aSLan Tianyu 405af68c51aSLan Tianyu l2t_offset &= ~QCOW2_OFLAG_COPIED; 406af68c51aSLan Tianyu if (!l2t_offset) 407af68c51aSLan Tianyu goto zero_cluster; 408af68c51aSLan Tianyu 409af68c51aSLan Tianyu l2t_size = 1 << header->l2_bits; 410af68c51aSLan Tianyu 411af68c51aSLan Tianyu /* read and cache level 2 table */ 412af68c51aSLan Tianyu l2t = qcow_read_l2_table(q, l2t_offset); 413af68c51aSLan Tianyu if (!l2t) 414af68c51aSLan Tianyu goto out_error; 415af68c51aSLan Tianyu 416af68c51aSLan Tianyu l2_idx = get_l2_index(q, offset); 417af68c51aSLan Tianyu if (l2_idx >= l2t_size) 418af68c51aSLan Tianyu goto out_error; 419af68c51aSLan Tianyu 420af68c51aSLan Tianyu clust_start = be64_to_cpu(l2t->table[l2_idx]); 421af68c51aSLan Tianyu if (clust_start & QCOW2_OFLAG_COMPRESSED) { 422af68c51aSLan Tianyu coffset = clust_start & q->cluster_offset_mask; 423af68c51aSLan Tianyu nb_csectors = ((clust_start >> q->csize_shift) 424af68c51aSLan Tianyu & q->csize_mask) + 1; 425af68c51aSLan Tianyu sector_offset = coffset & (SECTOR_SIZE - 1); 426af68c51aSLan Tianyu csize = nb_csectors * SECTOR_SIZE - sector_offset; 427af68c51aSLan Tianyu 428af68c51aSLan Tianyu if (pread_in_full(q->fd, q->cluster_data, 429af68c51aSLan Tianyu nb_csectors * SECTOR_SIZE, 430af68c51aSLan Tianyu coffset & ~(SECTOR_SIZE - 1)) < 0) { 431af68c51aSLan Tianyu goto out_error; 432af68c51aSLan Tianyu } 433af68c51aSLan Tianyu 434af68c51aSLan Tianyu if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size, 435af68c51aSLan Tianyu q->cluster_data + sector_offset, 436af68c51aSLan Tianyu csize) < 0) { 437af68c51aSLan Tianyu goto out_error; 438af68c51aSLan Tianyu } 439af68c51aSLan Tianyu 440af68c51aSLan Tianyu memcpy(dst, q->cluster_cache + clust_offset, length); 441af68c51aSLan Tianyu mutex_unlock(&q->mutex); 442af68c51aSLan Tianyu } else { 443af68c51aSLan Tianyu clust_start &= QCOW2_OFFSET_MASK; 444af68c51aSLan Tianyu if (!clust_start) 445af68c51aSLan Tianyu goto zero_cluster; 446af68c51aSLan Tianyu 447af68c51aSLan Tianyu mutex_unlock(&q->mutex); 448af68c51aSLan Tianyu 449af68c51aSLan Tianyu if (pread_in_full(q->fd, dst, length, 450af68c51aSLan Tianyu clust_start + clust_offset) < 0) 451af68c51aSLan Tianyu return -1; 452af68c51aSLan Tianyu } 453af68c51aSLan Tianyu 454af68c51aSLan Tianyu return length; 455af68c51aSLan Tianyu 456af68c51aSLan Tianyu zero_cluster: 457af68c51aSLan Tianyu mutex_unlock(&q->mutex); 458af68c51aSLan Tianyu memset(dst, 0, length); 459af68c51aSLan Tianyu return length; 460af68c51aSLan Tianyu 461af68c51aSLan Tianyu out_error: 462af68c51aSLan Tianyu mutex_unlock(&q->mutex); 463af68c51aSLan Tianyu length = -1; 464af68c51aSLan Tianyu return -1; 465af68c51aSLan Tianyu } 466af68c51aSLan Tianyu 4672534c9b6SSasha Levin static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector, 468af68c51aSLan Tianyu void *dst, u32 dst_len) 4693dac48d4SPrasad Joshi { 47043835ac9SSasha Levin struct qcow *q = disk->priv; 471ad627d62SPekka Enberg struct qcow_header *header = q->header; 472d8eea993SPekka Enberg u32 nr_read; 4730df6b4d9SPekka Enberg u64 offset; 4740df6b4d9SPekka Enberg char *buf; 4753dac48d4SPrasad Joshi u32 nr; 4763dac48d4SPrasad Joshi 4770df6b4d9SPekka Enberg buf = dst; 478d8eea993SPekka Enberg nr_read = 0; 4790df6b4d9SPekka Enberg 480d8eea993SPekka Enberg while (nr_read < dst_len) { 4813dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 4823dac48d4SPrasad Joshi if (offset >= header->size) 4830df6b4d9SPekka Enberg return -1; 4843dac48d4SPrasad Joshi 485af68c51aSLan Tianyu if (q->version == QCOW1_VERSION) 486af68c51aSLan Tianyu nr = qcow1_read_cluster(q, offset, buf, 487af68c51aSLan Tianyu dst_len - nr_read); 488af68c51aSLan Tianyu else 489af68c51aSLan Tianyu nr = qcow2_read_cluster(q, offset, buf, 490af68c51aSLan Tianyu dst_len - nr_read); 491af68c51aSLan Tianyu 492a51948ceSPekka Enberg if (nr <= 0) 4930df6b4d9SPekka Enberg return -1; 4943dac48d4SPrasad Joshi 495d8eea993SPekka Enberg nr_read += nr; 4963dac48d4SPrasad Joshi buf += nr; 4973dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 4983dac48d4SPrasad Joshi } 4990df6b4d9SPekka Enberg 50072133dd2SAsias He return dst_len; 50186835cedSPrasad Joshi } 50286835cedSPrasad Joshi 5032534c9b6SSasha Levin static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, 5045af21162SSasha Levin const struct iovec *iov, int iovcount, void *param) 5052534c9b6SSasha Levin { 5062534c9b6SSasha Levin ssize_t nr, total = 0; 5072534c9b6SSasha Levin 5082534c9b6SSasha Levin while (iovcount--) { 5092534c9b6SSasha Levin nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len); 5102534c9b6SSasha Levin if (nr != (ssize_t)iov->iov_len) { 5112534c9b6SSasha Levin pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); 5122534c9b6SSasha Levin return -1; 5132534c9b6SSasha Levin } 5142534c9b6SSasha Levin 5152534c9b6SSasha Levin sector += iov->iov_len >> SECTOR_SHIFT; 5162534c9b6SSasha Levin total += nr; 5173a60be06SSasha Levin iov++; 5182534c9b6SSasha Levin } 5192534c9b6SSasha Levin 5202534c9b6SSasha Levin return total; 5212534c9b6SSasha Levin } 5222534c9b6SSasha Levin 5233ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft) 5243ecac800SPekka Enberg { 5253ecac800SPekka Enberg struct rb_root *r = &rft->root; 5263ecac800SPekka Enberg struct list_head *pos, *n; 5273ecac800SPekka Enberg struct qcow_refcount_block *t; 5283ecac800SPekka Enberg 5293ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 5303ecac800SPekka Enberg list_del(pos); 5313ecac800SPekka Enberg t = list_entry(pos, struct qcow_refcount_block, list); 5323ecac800SPekka Enberg rb_erase(&t->node, r); 5333ecac800SPekka Enberg 5343ecac800SPekka Enberg free(t); 5353ecac800SPekka Enberg } 5363ecac800SPekka Enberg } 5373ecac800SPekka Enberg 5383ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new) 5393ecac800SPekka Enberg { 5403ecac800SPekka Enberg struct rb_node **link = &(root->rb_node), *parent = NULL; 5413ecac800SPekka Enberg u64 offset = new->offset; 5423ecac800SPekka Enberg 5433ecac800SPekka Enberg /* search the tree */ 5443ecac800SPekka Enberg while (*link) { 5453ecac800SPekka Enberg struct qcow_refcount_block *t; 5463ecac800SPekka Enberg 5473ecac800SPekka Enberg t = rb_entry(*link, struct qcow_refcount_block, node); 5483ecac800SPekka Enberg if (!t) 5493ecac800SPekka Enberg goto error; 5503ecac800SPekka Enberg 5513ecac800SPekka Enberg parent = *link; 5523ecac800SPekka Enberg 5533ecac800SPekka Enberg if (t->offset > offset) 5543ecac800SPekka Enberg link = &(*link)->rb_left; 5553ecac800SPekka Enberg else if (t->offset < offset) 5563ecac800SPekka Enberg link = &(*link)->rb_right; 5573ecac800SPekka Enberg else 5583ecac800SPekka Enberg goto out; 5593ecac800SPekka Enberg } 5603ecac800SPekka Enberg 5613ecac800SPekka Enberg /* add new node */ 5623ecac800SPekka Enberg rb_link_node(&new->node, parent, link); 5633ecac800SPekka Enberg rb_insert_color(&new->node, root); 5643ecac800SPekka Enberg out: 5653ecac800SPekka Enberg return 0; 5663ecac800SPekka Enberg error: 5673ecac800SPekka Enberg return -1; 5683ecac800SPekka Enberg } 5693ecac800SPekka Enberg 5703ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb) 5713ecac800SPekka Enberg { 5723ecac800SPekka Enberg if (!rfb->dirty) 5733ecac800SPekka Enberg return 0; 5743ecac800SPekka Enberg 575e184700aSLan Tianyu if (qcow_pwrite_sync(q->fd, rfb->entries, 576e184700aSLan Tianyu rfb->size * sizeof(u16), rfb->offset) < 0) 5773ecac800SPekka Enberg return -1; 5783ecac800SPekka Enberg 5793ecac800SPekka Enberg rfb->dirty = 0; 5803ecac800SPekka Enberg 5813ecac800SPekka Enberg return 0; 5823ecac800SPekka Enberg } 5833ecac800SPekka Enberg 5843ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c) 5853ecac800SPekka Enberg { 5863ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 5873ecac800SPekka Enberg struct rb_root *r = &rft->root; 5883ecac800SPekka Enberg struct qcow_refcount_block *lru; 5893ecac800SPekka Enberg 5903ecac800SPekka Enberg if (rft->nr_cached == MAX_CACHE_NODES) { 5913ecac800SPekka Enberg lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list); 5923ecac800SPekka Enberg 5933ecac800SPekka Enberg rb_erase(&lru->node, r); 5943ecac800SPekka Enberg list_del_init(&lru->list); 5953ecac800SPekka Enberg rft->nr_cached--; 5963ecac800SPekka Enberg 5973ecac800SPekka Enberg free(lru); 5983ecac800SPekka Enberg } 5993ecac800SPekka Enberg 6003ecac800SPekka Enberg if (refcount_block_insert(r, c) < 0) 6013ecac800SPekka Enberg goto error; 6023ecac800SPekka Enberg 6033ecac800SPekka Enberg list_add_tail(&c->list, &rft->lru_list); 6043ecac800SPekka Enberg rft->nr_cached++; 6053ecac800SPekka Enberg 6063ecac800SPekka Enberg return 0; 6073ecac800SPekka Enberg error: 6083ecac800SPekka Enberg return -1; 6093ecac800SPekka Enberg } 6103ecac800SPekka Enberg 6113ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset) 6123ecac800SPekka Enberg { 6133ecac800SPekka Enberg struct qcow_refcount_block *rfb; 6143ecac800SPekka Enberg 615af68c51aSLan Tianyu rfb = malloc(sizeof *rfb + q->cluster_size); 6163ecac800SPekka Enberg if (!rfb) 6173ecac800SPekka Enberg return NULL; 6183ecac800SPekka Enberg 6193ecac800SPekka Enberg rfb->offset = rfb_offset; 620af68c51aSLan Tianyu rfb->size = q->cluster_size / sizeof(u16); 6213ecac800SPekka Enberg RB_CLEAR_NODE(&rfb->node); 6223ecac800SPekka Enberg INIT_LIST_HEAD(&rfb->list); 6233ecac800SPekka Enberg 6243ecac800SPekka Enberg return rfb; 6253ecac800SPekka Enberg } 6263ecac800SPekka Enberg 6273ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset) 6283ecac800SPekka Enberg { 6293ecac800SPekka Enberg struct rb_node *link = root->rb_node; 6303ecac800SPekka Enberg 6313ecac800SPekka Enberg while (link) { 6323ecac800SPekka Enberg struct qcow_refcount_block *t; 6333ecac800SPekka Enberg 6343ecac800SPekka Enberg t = rb_entry(link, struct qcow_refcount_block, node); 6353ecac800SPekka Enberg if (!t) 6363ecac800SPekka Enberg goto out; 6373ecac800SPekka Enberg 6383ecac800SPekka Enberg if (t->offset > offset) 6393ecac800SPekka Enberg link = link->rb_left; 6403ecac800SPekka Enberg else if (t->offset < offset) 6413ecac800SPekka Enberg link = link->rb_right; 6423ecac800SPekka Enberg else 6433ecac800SPekka Enberg return t; 6443ecac800SPekka Enberg } 6453ecac800SPekka Enberg out: 6463ecac800SPekka Enberg return NULL; 6473ecac800SPekka Enberg } 6483ecac800SPekka Enberg 6493ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset) 6503ecac800SPekka Enberg { 6513ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 6523ecac800SPekka Enberg struct qcow_refcount_block *rfb; 6533ecac800SPekka Enberg 6543ecac800SPekka Enberg rfb = refcount_block_lookup(&rft->root, offset); 6553ecac800SPekka Enberg if (!rfb) 6563ecac800SPekka Enberg return NULL; 6573ecac800SPekka Enberg 6583ecac800SPekka Enberg /* Update the LRU state, by moving the searched node to list tail */ 6593ecac800SPekka Enberg list_move_tail(&rfb->list, &rft->lru_list); 6603ecac800SPekka Enberg 6613ecac800SPekka Enberg return rfb; 6623ecac800SPekka Enberg } 6633ecac800SPekka Enberg 6642d2179c1SLan Tianyu static struct qcow_refcount_block *qcow_grow_refcount_block(struct qcow *q, 6652d2179c1SLan Tianyu u64 clust_idx) 6662d2179c1SLan Tianyu { 6672d2179c1SLan Tianyu struct qcow_header *header = q->header; 6682d2179c1SLan Tianyu struct qcow_refcount_table *rft = &q->refcount_table; 6692d2179c1SLan Tianyu struct qcow_refcount_block *rfb; 6702d2179c1SLan Tianyu u64 new_block_offset; 6712d2179c1SLan Tianyu u64 rft_idx; 6722d2179c1SLan Tianyu 6732d2179c1SLan Tianyu rft_idx = clust_idx >> (header->cluster_bits - 6742d2179c1SLan Tianyu QCOW_REFCOUNT_BLOCK_SHIFT); 6752d2179c1SLan Tianyu 6762d2179c1SLan Tianyu if (rft_idx >= rft->rf_size) { 6772d2179c1SLan Tianyu pr_warning("Don't support grow refcount block table"); 6782d2179c1SLan Tianyu return NULL; 6792d2179c1SLan Tianyu } 6802d2179c1SLan Tianyu 6812d2179c1SLan Tianyu new_block_offset = qcow_alloc_clusters(q, q->cluster_size, 0); 682823c7fd8SAndre Przywara if (new_block_offset == (u64)-1) 6832d2179c1SLan Tianyu return NULL; 6842d2179c1SLan Tianyu 6852d2179c1SLan Tianyu rfb = new_refcount_block(q, new_block_offset); 6862d2179c1SLan Tianyu if (!rfb) 6872d2179c1SLan Tianyu return NULL; 6882d2179c1SLan Tianyu 6892d2179c1SLan Tianyu memset(rfb->entries, 0x00, q->cluster_size); 6902d2179c1SLan Tianyu rfb->dirty = 1; 6912d2179c1SLan Tianyu 6922d2179c1SLan Tianyu /* write refcount block */ 6932d2179c1SLan Tianyu if (write_refcount_block(q, rfb) < 0) 6942d2179c1SLan Tianyu goto free_rfb; 6952d2179c1SLan Tianyu 6962d2179c1SLan Tianyu if (cache_refcount_block(q, rfb) < 0) 6972d2179c1SLan Tianyu goto free_rfb; 6982d2179c1SLan Tianyu 6992d2179c1SLan Tianyu rft->rf_table[rft_idx] = cpu_to_be64(new_block_offset); 7002d2179c1SLan Tianyu if (update_cluster_refcount(q, new_block_offset >> 7012d2179c1SLan Tianyu header->cluster_bits, 1) < 0) 7022d2179c1SLan Tianyu goto recover_rft; 7032d2179c1SLan Tianyu 7042d2179c1SLan Tianyu if (qcow_write_refcount_table(q) < 0) 7052d2179c1SLan Tianyu goto recover_rft; 7062d2179c1SLan Tianyu 7072d2179c1SLan Tianyu return rfb; 7082d2179c1SLan Tianyu 7092d2179c1SLan Tianyu recover_rft: 7102d2179c1SLan Tianyu rft->rf_table[rft_idx] = 0; 7112d2179c1SLan Tianyu free_rfb: 7122d2179c1SLan Tianyu free(rfb); 7132d2179c1SLan Tianyu return NULL; 7142d2179c1SLan Tianyu } 7152d2179c1SLan Tianyu 7163ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx) 7173ecac800SPekka Enberg { 7183ecac800SPekka Enberg struct qcow_header *header = q->header; 7193ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 7203ecac800SPekka Enberg struct qcow_refcount_block *rfb; 7213ecac800SPekka Enberg u64 rfb_offset; 7223ecac800SPekka Enberg u64 rft_idx; 7233ecac800SPekka Enberg 7243ecac800SPekka Enberg rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT); 7253ecac800SPekka Enberg if (rft_idx >= rft->rf_size) 7262d2179c1SLan Tianyu return ERR_PTR(-ENOSPC); 7273ecac800SPekka Enberg 7283ecac800SPekka Enberg rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]); 7292d2179c1SLan Tianyu if (!rfb_offset) 7302d2179c1SLan Tianyu return ERR_PTR(-ENOSPC); 731e184700aSLan Tianyu 7323ecac800SPekka Enberg rfb = refcount_block_search(q, rfb_offset); 7333ecac800SPekka Enberg if (rfb) 7343ecac800SPekka Enberg return rfb; 7353ecac800SPekka Enberg 7363ecac800SPekka Enberg rfb = new_refcount_block(q, rfb_offset); 7373ecac800SPekka Enberg if (!rfb) 7383ecac800SPekka Enberg return NULL; 7393ecac800SPekka Enberg 7403ecac800SPekka Enberg if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0) 7413ecac800SPekka Enberg goto error_free_rfb; 7423ecac800SPekka Enberg 7433ecac800SPekka Enberg if (cache_refcount_block(q, rfb) < 0) 7443ecac800SPekka Enberg goto error_free_rfb; 7453ecac800SPekka Enberg 7463ecac800SPekka Enberg return rfb; 7473ecac800SPekka Enberg 7483ecac800SPekka Enberg error_free_rfb: 7493ecac800SPekka Enberg free(rfb); 7503ecac800SPekka Enberg 7513ecac800SPekka Enberg return NULL; 7523ecac800SPekka Enberg } 7533ecac800SPekka Enberg 754e184700aSLan Tianyu static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx) 755e184700aSLan Tianyu { 756e184700aSLan Tianyu struct qcow_refcount_block *rfb = NULL; 757e184700aSLan Tianyu struct qcow_header *header = q->header; 758e184700aSLan Tianyu u64 rfb_idx; 759e184700aSLan Tianyu 760e184700aSLan Tianyu rfb = qcow_read_refcount_block(q, clust_idx); 7612d2179c1SLan Tianyu if (PTR_ERR(rfb) == -ENOSPC) 7622d2179c1SLan Tianyu return 0; 7632d2179c1SLan Tianyu else if (IS_ERR_OR_NULL(rfb)) { 764e184700aSLan Tianyu pr_warning("Error while reading refcount table"); 765e184700aSLan Tianyu return -1; 766e184700aSLan Tianyu } 767e184700aSLan Tianyu 768e184700aSLan Tianyu rfb_idx = clust_idx & (((1ULL << 769e184700aSLan Tianyu (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); 770e184700aSLan Tianyu 771e184700aSLan Tianyu if (rfb_idx >= rfb->size) { 772e184700aSLan Tianyu pr_warning("L1: refcount block index out of bounds"); 773e184700aSLan Tianyu return -1; 774e184700aSLan Tianyu } 775e184700aSLan Tianyu 776e184700aSLan Tianyu return be16_to_cpu(rfb->entries[rfb_idx]); 777e184700aSLan Tianyu } 778e184700aSLan Tianyu 779e184700aSLan Tianyu static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append) 780e184700aSLan Tianyu { 781e184700aSLan Tianyu struct qcow_refcount_block *rfb = NULL; 782e184700aSLan Tianyu struct qcow_header *header = q->header; 783e184700aSLan Tianyu u16 refcount; 784e184700aSLan Tianyu u64 rfb_idx; 785e184700aSLan Tianyu 786e184700aSLan Tianyu rfb = qcow_read_refcount_block(q, clust_idx); 7872d2179c1SLan Tianyu if (PTR_ERR(rfb) == -ENOSPC) { 7882d2179c1SLan Tianyu rfb = qcow_grow_refcount_block(q, clust_idx); 789e184700aSLan Tianyu if (!rfb) { 7902d2179c1SLan Tianyu pr_warning("error while growing refcount table"); 7912d2179c1SLan Tianyu return -1; 7922d2179c1SLan Tianyu } 7932d2179c1SLan Tianyu } else if (IS_ERR_OR_NULL(rfb)) { 794e184700aSLan Tianyu pr_warning("error while reading refcount table"); 795e184700aSLan Tianyu return -1; 796e184700aSLan Tianyu } 797e184700aSLan Tianyu 798e184700aSLan Tianyu rfb_idx = clust_idx & (((1ULL << 799e184700aSLan Tianyu (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); 800e184700aSLan Tianyu if (rfb_idx >= rfb->size) { 801e184700aSLan Tianyu pr_warning("refcount block index out of bounds"); 802e184700aSLan Tianyu return -1; 803e184700aSLan Tianyu } 804e184700aSLan Tianyu 805e184700aSLan Tianyu refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append; 806e184700aSLan Tianyu rfb->entries[rfb_idx] = cpu_to_be16(refcount); 807e184700aSLan Tianyu rfb->dirty = 1; 808e184700aSLan Tianyu 809e184700aSLan Tianyu /* write refcount block */ 810e184700aSLan Tianyu if (write_refcount_block(q, rfb) < 0) { 811e184700aSLan Tianyu pr_warning("refcount block index out of bounds"); 812e184700aSLan Tianyu return -1; 813e184700aSLan Tianyu } 814e184700aSLan Tianyu 815e184700aSLan Tianyu /* update free_clust_idx since refcount becomes zero */ 816e184700aSLan Tianyu if (!refcount && clust_idx < q->free_clust_idx) 817e184700aSLan Tianyu q->free_clust_idx = clust_idx; 818e184700aSLan Tianyu 819e184700aSLan Tianyu return 0; 820e184700aSLan Tianyu } 821e184700aSLan Tianyu 822e184700aSLan Tianyu static void qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size) 823e184700aSLan Tianyu { 824e184700aSLan Tianyu struct qcow_header *header = q->header; 825e184700aSLan Tianyu u64 start, end, offset; 826e184700aSLan Tianyu 827e184700aSLan Tianyu start = clust_start & ~(q->cluster_size - 1); 828e184700aSLan Tianyu end = (clust_start + size - 1) & ~(q->cluster_size - 1); 829e184700aSLan Tianyu for (offset = start; offset <= end; offset += q->cluster_size) 830e184700aSLan Tianyu update_cluster_refcount(q, offset >> header->cluster_bits, -1); 831e184700aSLan Tianyu } 832e184700aSLan Tianyu 833865c675fSPrasad Joshi /* 834e184700aSLan Tianyu * Allocate clusters according to the size. Find a postion that 835e184700aSLan Tianyu * can satisfy the size. free_clust_idx is initialized to zero and 836e184700aSLan Tianyu * Record last position. 837865c675fSPrasad Joshi */ 8382d2179c1SLan Tianyu static u64 qcow_alloc_clusters(struct qcow *q, u64 size, int update_ref) 839e184700aSLan Tianyu { 840e184700aSLan Tianyu struct qcow_header *header = q->header; 841e184700aSLan Tianyu u16 clust_refcount; 8422d2179c1SLan Tianyu u32 clust_idx = 0, i; 843e184700aSLan Tianyu u64 clust_num; 844e184700aSLan Tianyu 845e184700aSLan Tianyu clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits; 846e184700aSLan Tianyu 847e184700aSLan Tianyu again: 848e184700aSLan Tianyu for (i = 0; i < clust_num; i++) { 849e184700aSLan Tianyu clust_idx = q->free_clust_idx++; 850e184700aSLan Tianyu clust_refcount = qcow_get_refcount(q, clust_idx); 851823c7fd8SAndre Przywara if (clust_refcount == (u16)-1) 852e184700aSLan Tianyu return -1; 853e184700aSLan Tianyu else if (clust_refcount > 0) 854e184700aSLan Tianyu goto again; 855e184700aSLan Tianyu } 856e184700aSLan Tianyu 8572d2179c1SLan Tianyu clust_idx++; 8582d2179c1SLan Tianyu 8592d2179c1SLan Tianyu if (update_ref) 860e184700aSLan Tianyu for (i = 0; i < clust_num; i++) 861e184700aSLan Tianyu if (update_cluster_refcount(q, 8622d2179c1SLan Tianyu clust_idx - clust_num + i, 1)) 863e184700aSLan Tianyu return -1; 864e184700aSLan Tianyu 8652d2179c1SLan Tianyu return (clust_idx - clust_num) << header->cluster_bits; 866e184700aSLan Tianyu } 867e184700aSLan Tianyu 868e184700aSLan Tianyu static int qcow_write_l1_table(struct qcow *q) 869e184700aSLan Tianyu { 870e184700aSLan Tianyu struct qcow_l1_table *l1t = &q->table; 871e184700aSLan Tianyu struct qcow_header *header = q->header; 872e184700aSLan Tianyu 873e184700aSLan Tianyu if (qcow_pwrite_sync(q->fd, l1t->l1_table, 874e184700aSLan Tianyu l1t->table_size * sizeof(u64), 875e184700aSLan Tianyu header->l1_table_offset) < 0) 876e184700aSLan Tianyu return -1; 877e184700aSLan Tianyu 878e184700aSLan Tianyu return 0; 879e184700aSLan Tianyu } 880e184700aSLan Tianyu 881e184700aSLan Tianyu /* 882e184700aSLan Tianyu * Get l2 table. If the table has been copied, read table directly. 883e184700aSLan Tianyu * If the table exists, allocate a new cluster and copy the table 884e184700aSLan Tianyu * to the new cluster. 885e184700aSLan Tianyu */ 886e184700aSLan Tianyu static int get_cluster_table(struct qcow *q, u64 offset, 887e184700aSLan Tianyu struct qcow_l2_table **result_l2t, u64 *result_l2_index) 888865c675fSPrasad Joshi { 889865c675fSPrasad Joshi struct qcow_header *header = q->header; 8903fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 891fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 892865c675fSPrasad Joshi u64 l1t_idx; 893e184700aSLan Tianyu u64 l2t_offset; 894865c675fSPrasad Joshi u64 l2t_idx; 895e184700aSLan Tianyu u64 l2t_size; 896e184700aSLan Tianyu u64 l2t_new_offset; 897865c675fSPrasad Joshi 8983fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 899865c675fSPrasad Joshi 900865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 9013fb67b93SPekka Enberg if (l1t_idx >= l1t->table_size) 902c0799eb9SPekka Enberg return -1; 903865c675fSPrasad Joshi 904865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 9053fb67b93SPekka Enberg if (l2t_idx >= l2t_size) 906c0799eb9SPekka Enberg return -1; 907865c675fSPrasad Joshi 908e184700aSLan Tianyu l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]); 909e184700aSLan Tianyu if (l2t_offset & QCOW2_OFLAG_COPIED) { 910e184700aSLan Tianyu l2t_offset &= ~QCOW2_OFLAG_COPIED; 911e184700aSLan Tianyu l2t = qcow_read_l2_table(q, l2t_offset); 912e184700aSLan Tianyu if (!l2t) 913e184700aSLan Tianyu goto error; 914e184700aSLan Tianyu } else { 9152d2179c1SLan Tianyu l2t_new_offset = qcow_alloc_clusters(q, 9162d2179c1SLan Tianyu l2t_size*sizeof(u64), 1); 9172d2179c1SLan Tianyu 918823c7fd8SAndre Przywara if (l2t_new_offset != (u64)-1) 919e184700aSLan Tianyu goto error; 920e184700aSLan Tianyu 921e184700aSLan Tianyu l2t = new_cache_table(q, l2t_new_offset); 922e184700aSLan Tianyu if (!l2t) 923e184700aSLan Tianyu goto free_cluster; 924e184700aSLan Tianyu 925e184700aSLan Tianyu if (l2t_offset) { 926e184700aSLan Tianyu l2t = qcow_read_l2_table(q, l2t_offset); 927e184700aSLan Tianyu if (!l2t) 928e184700aSLan Tianyu goto free_cache; 929e184700aSLan Tianyu } else 930e184700aSLan Tianyu memset(l2t->table, 0x00, l2t_size * sizeof(u64)); 931e184700aSLan Tianyu 932e184700aSLan Tianyu /* write l2 table */ 933e184700aSLan Tianyu l2t->dirty = 1; 934e184700aSLan Tianyu if (qcow_l2_cache_write(q, l2t) < 0) 935e184700aSLan Tianyu goto free_cache; 936e184700aSLan Tianyu 937e184700aSLan Tianyu /* cache l2 table */ 938e184700aSLan Tianyu if (cache_table(q, l2t)) 939e184700aSLan Tianyu goto free_cache; 940e184700aSLan Tianyu 941e184700aSLan Tianyu /* update the l1 talble */ 942e184700aSLan Tianyu l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset 943e184700aSLan Tianyu | QCOW2_OFLAG_COPIED); 944e184700aSLan Tianyu if (qcow_write_l1_table(q)) { 945e184700aSLan Tianyu pr_warning("Update l1 table error"); 946e184700aSLan Tianyu goto free_cache; 947e184700aSLan Tianyu } 948e184700aSLan Tianyu 949e184700aSLan Tianyu /* free old cluster */ 950e184700aSLan Tianyu qcow_free_clusters(q, l2t_offset, q->cluster_size); 951e184700aSLan Tianyu } 952e184700aSLan Tianyu 953e184700aSLan Tianyu *result_l2t = l2t; 954e184700aSLan Tianyu *result_l2_index = l2t_idx; 955e184700aSLan Tianyu 956e184700aSLan Tianyu return 0; 957e184700aSLan Tianyu 958e184700aSLan Tianyu free_cache: 959e184700aSLan Tianyu free(l2t); 960e184700aSLan Tianyu 961e184700aSLan Tianyu free_cluster: 962e184700aSLan Tianyu qcow_free_clusters(q, l2t_new_offset, q->cluster_size); 963e184700aSLan Tianyu 964e184700aSLan Tianyu error: 965e184700aSLan Tianyu return -1; 966e184700aSLan Tianyu } 967e184700aSLan Tianyu 968e184700aSLan Tianyu /* 969e184700aSLan Tianyu * If the cluster has been copied, write data directly. If not, 970e184700aSLan Tianyu * read the original data and write it to the new cluster with 971e184700aSLan Tianyu * modification. 972e184700aSLan Tianyu */ 973e184700aSLan Tianyu static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, 974e184700aSLan Tianyu void *buf, u32 src_len) 975e184700aSLan Tianyu { 976e184700aSLan Tianyu struct qcow_l2_table *l2t; 977e184700aSLan Tianyu u64 clust_new_start; 978e184700aSLan Tianyu u64 clust_start; 979e184700aSLan Tianyu u64 clust_flags; 980e184700aSLan Tianyu u64 clust_off; 981e184700aSLan Tianyu u64 l2t_idx; 982e184700aSLan Tianyu u64 len; 983e184700aSLan Tianyu 984e184700aSLan Tianyu l2t = NULL; 985e184700aSLan Tianyu 986865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 987e184700aSLan Tianyu if (clust_off >= q->cluster_size) 988c0799eb9SPekka Enberg return -1; 989865c675fSPrasad Joshi 990e184700aSLan Tianyu len = q->cluster_size - clust_off; 991865c675fSPrasad Joshi if (len > src_len) 992865c675fSPrasad Joshi len = src_len; 993865c675fSPrasad Joshi 994c0799eb9SPekka Enberg mutex_lock(&q->mutex); 995c0799eb9SPekka Enberg 996e184700aSLan Tianyu if (get_cluster_table(q, offset, &l2t, &l2t_idx)) { 997e184700aSLan Tianyu pr_warning("Get l2 table error"); 998121dd76eSPekka Enberg goto error; 999121dd76eSPekka Enberg } 1000865c675fSPrasad Joshi 1001b2ebe61bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]); 1002af68c51aSLan Tianyu clust_flags = clust_start & QCOW2_OFLAGS_MASK; 1003b2ebe61bSPekka Enberg 1004af68c51aSLan Tianyu clust_start &= QCOW2_OFFSET_MASK; 1005af68c51aSLan Tianyu if (!(clust_flags & QCOW2_OFLAG_COPIED)) { 10062d2179c1SLan Tianyu clust_new_start = qcow_alloc_clusters(q, q->cluster_size, 1); 1007823c7fd8SAndre Przywara if (clust_new_start != (u64)-1) { 1008e184700aSLan Tianyu pr_warning("Cluster alloc error"); 10093ecac800SPekka Enberg goto error; 10103ecac800SPekka Enberg } 10113ecac800SPekka Enberg 1012e184700aSLan Tianyu offset &= ~(q->cluster_size - 1); 10133ecac800SPekka Enberg 1014e184700aSLan Tianyu /* if clust_start is not zero, read the original data*/ 1015e184700aSLan Tianyu if (clust_start) { 1016c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 1017e184700aSLan Tianyu if (qcow2_read_cluster(q, offset, q->copy_buff, 1018e184700aSLan Tianyu q->cluster_size) < 0) { 1019e184700aSLan Tianyu pr_warning("Read copy cluster error"); 1020e184700aSLan Tianyu qcow_free_clusters(q, clust_new_start, 1021e184700aSLan Tianyu q->cluster_size); 1022e184700aSLan Tianyu return -1; 1023e184700aSLan Tianyu } 1024e184700aSLan Tianyu mutex_lock(&q->mutex); 1025e184700aSLan Tianyu } else 1026e184700aSLan Tianyu memset(q->copy_buff, 0x00, q->cluster_size); 1027e184700aSLan Tianyu 1028e184700aSLan Tianyu memcpy(q->copy_buff + clust_off, buf, len); 1029c0799eb9SPekka Enberg 1030a4e46515SPekka Enberg /* Write actual data */ 1031e184700aSLan Tianyu if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size, 1032e184700aSLan Tianyu clust_new_start) < 0) 1033e184700aSLan Tianyu goto free_cluster; 1034a4e46515SPekka Enberg 1035e184700aSLan Tianyu /* update l2 table*/ 1036e184700aSLan Tianyu l2t->table[l2t_idx] = cpu_to_be64(clust_new_start 1037e184700aSLan Tianyu | QCOW2_OFLAG_COPIED); 1038e184700aSLan Tianyu l2t->dirty = 1; 1039e184700aSLan Tianyu 1040e184700aSLan Tianyu if (qcow_l2_cache_write(q, l2t)) 1041e184700aSLan Tianyu goto free_cluster; 1042e184700aSLan Tianyu 1043e184700aSLan Tianyu /* free old cluster*/ 1044e184700aSLan Tianyu if (clust_flags & QCOW2_OFLAG_COMPRESSED) { 1045e184700aSLan Tianyu int size; 1046e184700aSLan Tianyu size = ((clust_start >> q->csize_shift) & 1047e184700aSLan Tianyu q->csize_mask) + 1; 1048e184700aSLan Tianyu size *= 512; 1049e184700aSLan Tianyu clust_start &= q->cluster_offset_mask; 1050e184700aSLan Tianyu clust_start &= ~511; 1051e184700aSLan Tianyu 1052e184700aSLan Tianyu qcow_free_clusters(q, clust_start, size); 1053e184700aSLan Tianyu } else if (clust_start) 1054e184700aSLan Tianyu qcow_free_clusters(q, clust_start, q->cluster_size); 1055e184700aSLan Tianyu 1056e184700aSLan Tianyu } else { 1057e184700aSLan Tianyu /* Write actual data */ 1058e184700aSLan Tianyu if (pwrite_in_full(q->fd, buf, len, 1059e184700aSLan Tianyu clust_start + clust_off) < 0) 1060e184700aSLan Tianyu goto error; 1061e184700aSLan Tianyu } 1062e184700aSLan Tianyu mutex_unlock(&q->mutex); 1063865c675fSPrasad Joshi return len; 10643309045fSPrasad Joshi 1065e184700aSLan Tianyu free_cluster: 1066e184700aSLan Tianyu qcow_free_clusters(q, clust_new_start, q->cluster_size); 1067e184700aSLan Tianyu 1068865c675fSPrasad Joshi error: 1069c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 1070865c675fSPrasad Joshi return -1; 1071865c675fSPrasad Joshi } 1072865c675fSPrasad Joshi 10732534c9b6SSasha Levin static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len) 107486835cedSPrasad Joshi { 1075865c675fSPrasad Joshi struct qcow *q = disk->priv; 1076865c675fSPrasad Joshi struct qcow_header *header = q->header; 1077c4acb611SIngo Molnar u32 nr_written; 10780df6b4d9SPekka Enberg char *buf; 1079865c675fSPrasad Joshi u64 offset; 1080865c675fSPrasad Joshi ssize_t nr; 1081865c675fSPrasad Joshi 10820df6b4d9SPekka Enberg buf = src; 10830df6b4d9SPekka Enberg nr_written = 0; 1084865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 10850df6b4d9SPekka Enberg 10860df6b4d9SPekka Enberg while (nr_written < src_len) { 1087865c675fSPrasad Joshi if (offset >= header->size) 10880df6b4d9SPekka Enberg return -1; 1089865c675fSPrasad Joshi 1090b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 1091865c675fSPrasad Joshi if (nr < 0) 10920df6b4d9SPekka Enberg return -1; 1093865c675fSPrasad Joshi 10940df6b4d9SPekka Enberg nr_written += nr; 1095865c675fSPrasad Joshi buf += nr; 1096865c675fSPrasad Joshi offset += nr; 1097865c675fSPrasad Joshi } 10980df6b4d9SPekka Enberg 109972133dd2SAsias He return nr_written; 110086835cedSPrasad Joshi } 110186835cedSPrasad Joshi 11022534c9b6SSasha Levin static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, 11035af21162SSasha Levin const struct iovec *iov, int iovcount, void *param) 11042534c9b6SSasha Levin { 11052534c9b6SSasha Levin ssize_t nr, total = 0; 11062534c9b6SSasha Levin 11072534c9b6SSasha Levin while (iovcount--) { 11082534c9b6SSasha Levin nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len); 11092534c9b6SSasha Levin if (nr != (ssize_t)iov->iov_len) { 11102534c9b6SSasha Levin pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len); 11112534c9b6SSasha Levin return -1; 11122534c9b6SSasha Levin } 11132534c9b6SSasha Levin 11142534c9b6SSasha Levin sector += iov->iov_len >> SECTOR_SHIFT; 11152534c9b6SSasha Levin iov++; 11162534c9b6SSasha Levin total += nr; 11172534c9b6SSasha Levin } 11182534c9b6SSasha Levin 11192534c9b6SSasha Levin return total; 11202534c9b6SSasha Levin } 11212534c9b6SSasha Levin 1122659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 1123659f4186SPekka Enberg { 112473984b11SPekka Enberg struct qcow *q = disk->priv; 11253ecac800SPekka Enberg struct qcow_refcount_table *rft; 1126a4e46515SPekka Enberg struct list_head *pos, *n; 11277b4eb530SPekka Enberg struct qcow_l1_table *l1t; 112873984b11SPekka Enberg 11297b4eb530SPekka Enberg l1t = &q->table; 11303ecac800SPekka Enberg rft = &q->refcount_table; 113173984b11SPekka Enberg 1132a4e46515SPekka Enberg mutex_lock(&q->mutex); 1133a4e46515SPekka Enberg 11343ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 11353ecac800SPekka Enberg struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list); 11363ecac800SPekka Enberg 11373ecac800SPekka Enberg if (write_refcount_block(q, c) < 0) 11383ecac800SPekka Enberg goto error_unlock; 11393ecac800SPekka Enberg } 11403ecac800SPekka Enberg 11417b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 1142a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 1143a4e46515SPekka Enberg 1144a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 1145a4e46515SPekka Enberg goto error_unlock; 1146a4e46515SPekka Enberg } 1147a4e46515SPekka Enberg 1148e184700aSLan Tianyu if (qcow_write_l1_table < 0) 1149a4e46515SPekka Enberg goto error_unlock; 1150a4e46515SPekka Enberg 1151a4e46515SPekka Enberg mutex_unlock(&q->mutex); 115273984b11SPekka Enberg 1153659f4186SPekka Enberg return fsync(disk->fd); 1154a4e46515SPekka Enberg 1155a4e46515SPekka Enberg error_unlock: 1156a4e46515SPekka Enberg mutex_unlock(&q->mutex); 1157a4e46515SPekka Enberg return -1; 1158659f4186SPekka Enberg } 1159659f4186SPekka Enberg 1160b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 116186835cedSPrasad Joshi { 116286835cedSPrasad Joshi struct qcow *q; 116386835cedSPrasad Joshi 116443835ac9SSasha Levin if (!disk) 116572133dd2SAsias He return 0; 116686835cedSPrasad Joshi 116743835ac9SSasha Levin q = disk->priv; 116886835cedSPrasad Joshi 11693ecac800SPekka Enberg refcount_table_free_cache(&q->refcount_table); 1170e94cdf08SPekka Enberg l1_table_free_cache(&q->table); 1171e184700aSLan Tianyu free(q->copy_buff); 1172af68c51aSLan Tianyu free(q->cluster_data); 1173af68c51aSLan Tianyu free(q->cluster_cache); 11743ecac800SPekka Enberg free(q->refcount_table.rf_table); 11756c6f79b6SPrasad Joshi free(q->table.l1_table); 117686835cedSPrasad Joshi free(q->header); 117786835cedSPrasad Joshi free(q); 117872133dd2SAsias He 117972133dd2SAsias He return 0; 118086835cedSPrasad Joshi } 118186835cedSPrasad Joshi 1182b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 1183dcd3cd8eSAsias He .read = qcow_read_sector, 1184b1c84095SPekka Enberg .close = qcow_disk_close, 1185f10860caSPekka Enberg }; 1186f10860caSPekka Enberg 1187b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 1188dcd3cd8eSAsias He .read = qcow_read_sector, 1189dcd3cd8eSAsias He .write = qcow_write_sector, 1190659f4186SPekka Enberg .flush = qcow_disk_flush, 1191b1c84095SPekka Enberg .close = qcow_disk_close, 119286835cedSPrasad Joshi }; 119386835cedSPrasad Joshi 11943ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q) 11953ecac800SPekka Enberg { 11963ecac800SPekka Enberg struct qcow_header *header = q->header; 11973ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 11983ecac800SPekka Enberg 1199af68c51aSLan Tianyu rft->rf_size = (header->refcount_table_size * q->cluster_size) 1200af68c51aSLan Tianyu / sizeof(u64); 12013ecac800SPekka Enberg 12023ecac800SPekka Enberg rft->rf_table = calloc(rft->rf_size, sizeof(u64)); 12033ecac800SPekka Enberg if (!rft->rf_table) 12043ecac800SPekka Enberg return -1; 12053ecac800SPekka Enberg 120615542babSAndre Przywara rft->root = (struct rb_root) RB_ROOT; 12073ecac800SPekka Enberg INIT_LIST_HEAD(&rft->lru_list); 12083ecac800SPekka Enberg 12093ecac800SPekka Enberg return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset); 12103ecac800SPekka Enberg } 12113ecac800SPekka Enberg 12122d2179c1SLan Tianyu static int qcow_write_refcount_table(struct qcow *q) 12132d2179c1SLan Tianyu { 12142d2179c1SLan Tianyu struct qcow_header *header = q->header; 12152d2179c1SLan Tianyu struct qcow_refcount_table *rft = &q->refcount_table; 12162d2179c1SLan Tianyu 12172d2179c1SLan Tianyu return qcow_pwrite_sync(q->fd, rft->rf_table, 12182d2179c1SLan Tianyu rft->rf_size * sizeof(u64), header->refcount_table_offset); 12192d2179c1SLan Tianyu } 12202d2179c1SLan Tianyu 122186835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 122286835cedSPrasad Joshi { 1223ad627d62SPekka Enberg struct qcow_header *header = q->header; 1224473aaa2dSPekka Enberg struct qcow_l1_table *table = &q->table; 122586835cedSPrasad Joshi 1226ad627d62SPekka Enberg table->table_size = header->l1_size; 122786835cedSPrasad Joshi 122800adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 122900adcc1bSPrasad Joshi if (!table->l1_table) 123086835cedSPrasad Joshi return -1; 123186835cedSPrasad Joshi 1232659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 123386835cedSPrasad Joshi } 123486835cedSPrasad Joshi 1235ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 123686835cedSPrasad Joshi { 1237ad627d62SPekka Enberg struct qcow2_header_disk f_header; 1238ad627d62SPekka Enberg struct qcow_header *header; 123986835cedSPrasad Joshi 1240ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 124186835cedSPrasad Joshi if (!header) 124286835cedSPrasad Joshi return NULL; 124386835cedSPrasad Joshi 12440657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 12450657f33dSPrasad Joshi free(header); 124686835cedSPrasad Joshi return NULL; 12470657f33dSPrasad Joshi } 124886835cedSPrasad Joshi 1249ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1250ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1251ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 1252ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 1253ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 1254ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 1255ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 1256ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 1257ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 1258ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 1259ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 1260ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 1261ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 1262ad627d62SPekka Enberg 1263ad627d62SPekka Enberg *header = (struct qcow_header) { 1264ad627d62SPekka Enberg .size = f_header.size, 1265ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 1266ad627d62SPekka Enberg .l1_size = f_header.l1_size, 1267ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 1268ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 12693ecac800SPekka Enberg .refcount_table_offset = f_header.refcount_table_offset, 12703ecac800SPekka Enberg .refcount_table_size = f_header.refcount_table_clusters, 1271ad627d62SPekka Enberg }; 1272ad627d62SPekka Enberg 1273ad627d62SPekka Enberg return header; 1274ad627d62SPekka Enberg } 1275ad627d62SPekka Enberg 1276f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 1277ad627d62SPekka Enberg { 1278ad627d62SPekka Enberg struct disk_image *disk_image; 12797b4eb530SPekka Enberg struct qcow_l1_table *l1t; 12807b4eb530SPekka Enberg struct qcow_header *h; 12817b4eb530SPekka Enberg struct qcow *q; 1282ad627d62SPekka Enberg 1283ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 1284ad627d62SPekka Enberg if (!q) 1285af68c51aSLan Tianyu return NULL; 1286ad627d62SPekka Enberg 1287c0799eb9SPekka Enberg mutex_init(&q->mutex); 1288ad627d62SPekka Enberg q->fd = fd; 12897b4eb530SPekka Enberg 12907b4eb530SPekka Enberg l1t = &q->table; 12917b4eb530SPekka Enberg 129215542babSAndre Przywara l1t->root = (struct rb_root) RB_ROOT; 12937b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 1294ad627d62SPekka Enberg 1295ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 1296ad627d62SPekka Enberg if (!h) 1297af68c51aSLan Tianyu goto free_qcow; 1298af68c51aSLan Tianyu 1299af68c51aSLan Tianyu q->version = QCOW2_VERSION; 1300af68c51aSLan Tianyu q->csize_shift = (62 - (q->header->cluster_bits - 8)); 1301af68c51aSLan Tianyu q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1; 1302af68c51aSLan Tianyu q->cluster_offset_mask = (1LL << q->csize_shift) - 1; 1303af68c51aSLan Tianyu q->cluster_size = 1 << q->header->cluster_bits; 1304af68c51aSLan Tianyu 1305e184700aSLan Tianyu q->copy_buff = malloc(q->cluster_size); 1306e184700aSLan Tianyu if (!q->copy_buff) { 1307e184700aSLan Tianyu pr_warning("copy buff malloc error"); 1308e184700aSLan Tianyu goto free_header; 1309e184700aSLan Tianyu } 1310e184700aSLan Tianyu 1311af68c51aSLan Tianyu q->cluster_data = malloc(q->cluster_size); 1312af68c51aSLan Tianyu if (!q->cluster_data) { 1313e184700aSLan Tianyu pr_warning("cluster data malloc error"); 1314e184700aSLan Tianyu goto free_copy_buff; 1315af68c51aSLan Tianyu } 1316af68c51aSLan Tianyu 1317af68c51aSLan Tianyu q->cluster_cache = malloc(q->cluster_size); 1318af68c51aSLan Tianyu if (!q->cluster_cache) { 1319e184700aSLan Tianyu pr_warning("cluster cache malloc error"); 1320af68c51aSLan Tianyu goto free_cluster_data; 1321af68c51aSLan Tianyu } 1322ad627d62SPekka Enberg 1323ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 1324af68c51aSLan Tianyu goto free_cluster_cache; 1325ad627d62SPekka Enberg 13263ecac800SPekka Enberg if (qcow_read_refcount_table(q) < 0) 1327af68c51aSLan Tianyu goto free_l1_table; 13283ecac800SPekka Enberg 13297d22135fSAsias He /* 13307d22135fSAsias He * Do not use mmap use read/write instead 13317d22135fSAsias He */ 1332f10860caSPekka Enberg if (readonly) 133338c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); 1334f10860caSPekka Enberg else 133538c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); 1336f10860caSPekka Enberg 13379f9207c5SSasha Levin if (IS_ERR_OR_NULL(disk_image)) 1338af68c51aSLan Tianyu goto free_refcount_table; 1339f41a132bSSasha Levin 134059e8453aSSasha Levin disk_image->async = 0; 1341ad627d62SPekka Enberg disk_image->priv = q; 1342ad627d62SPekka Enberg 1343ad627d62SPekka Enberg return disk_image; 1344ad627d62SPekka Enberg 1345af68c51aSLan Tianyu free_refcount_table: 1346af68c51aSLan Tianyu if (q->refcount_table.rf_table) 1347af68c51aSLan Tianyu free(q->refcount_table.rf_table); 1348af68c51aSLan Tianyu free_l1_table: 1349af68c51aSLan Tianyu if (q->table.l1_table) 1350ad627d62SPekka Enberg free(q->table.l1_table); 1351af68c51aSLan Tianyu free_cluster_cache: 1352af68c51aSLan Tianyu if (q->cluster_cache) 1353af68c51aSLan Tianyu free(q->cluster_cache); 1354af68c51aSLan Tianyu free_cluster_data: 1355af68c51aSLan Tianyu if (q->cluster_data) 1356af68c51aSLan Tianyu free(q->cluster_data); 1357e184700aSLan Tianyu free_copy_buff: 1358e184700aSLan Tianyu if (q->copy_buff) 1359e184700aSLan Tianyu free(q->copy_buff); 1360af68c51aSLan Tianyu free_header: 1361af68c51aSLan Tianyu if (q->header) 1362ad627d62SPekka Enberg free(q->header); 1363af68c51aSLan Tianyu free_qcow: 1364ad627d62SPekka Enberg free(q); 1365ad627d62SPekka Enberg 1366ad627d62SPekka Enberg return NULL; 1367ad627d62SPekka Enberg } 1368ad627d62SPekka Enberg 1369ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 1370ad627d62SPekka Enberg { 1371ad627d62SPekka Enberg struct qcow2_header_disk f_header; 1372ad627d62SPekka Enberg 1373ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 1374ad627d62SPekka Enberg return false; 1375ad627d62SPekka Enberg 1376ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1377ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1378ad627d62SPekka Enberg 1379ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 1380ad627d62SPekka Enberg return false; 1381ad627d62SPekka Enberg 1382ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 1383ad627d62SPekka Enberg return false; 1384ad627d62SPekka Enberg 1385ad627d62SPekka Enberg return true; 1386ad627d62SPekka Enberg } 1387ad627d62SPekka Enberg 1388ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 1389ad627d62SPekka Enberg { 1390ad627d62SPekka Enberg struct qcow1_header_disk f_header; 1391ad627d62SPekka Enberg struct qcow_header *header; 1392ad627d62SPekka Enberg 1393ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 1394ad627d62SPekka Enberg if (!header) 1395ad627d62SPekka Enberg return NULL; 1396ad627d62SPekka Enberg 1397d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 1398d39cefd2SSasha Levin free(header); 1399ad627d62SPekka Enberg return NULL; 1400d39cefd2SSasha Levin } 1401ad627d62SPekka Enberg 1402ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1403ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1404ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 1405ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 1406ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 1407ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 1408ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 1409ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 1410ad627d62SPekka Enberg 1411ad627d62SPekka Enberg *header = (struct qcow_header) { 1412ad627d62SPekka Enberg .size = f_header.size, 1413ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 1414ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 1415ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 1416ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 1417ad627d62SPekka Enberg }; 141886835cedSPrasad Joshi 141986835cedSPrasad Joshi return header; 142086835cedSPrasad Joshi } 142186835cedSPrasad Joshi 1422f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 142386835cedSPrasad Joshi { 142486835cedSPrasad Joshi struct disk_image *disk_image; 14257b4eb530SPekka Enberg struct qcow_l1_table *l1t; 14267b4eb530SPekka Enberg struct qcow_header *h; 14277b4eb530SPekka Enberg struct qcow *q; 142886835cedSPrasad Joshi 142986835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 143086835cedSPrasad Joshi if (!q) 1431af68c51aSLan Tianyu return NULL; 143286835cedSPrasad Joshi 1433c0799eb9SPekka Enberg mutex_init(&q->mutex); 143486835cedSPrasad Joshi q->fd = fd; 14357b4eb530SPekka Enberg 14367b4eb530SPekka Enberg l1t = &q->table; 14377b4eb530SPekka Enberg 143815542babSAndre Przywara l1t->root = (struct rb_root)RB_ROOT; 14397b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 1440*ca14d9edSJean-Philippe Brucker INIT_LIST_HEAD(&q->refcount_table.lru_list); 144186835cedSPrasad Joshi 144286835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 144386835cedSPrasad Joshi if (!h) 1444af68c51aSLan Tianyu goto free_qcow; 1445af68c51aSLan Tianyu 1446af68c51aSLan Tianyu q->version = QCOW1_VERSION; 1447af68c51aSLan Tianyu q->cluster_size = 1 << q->header->cluster_bits; 1448af68c51aSLan Tianyu q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1; 1449e184700aSLan Tianyu q->free_clust_idx = 0; 1450af68c51aSLan Tianyu 1451af68c51aSLan Tianyu q->cluster_data = malloc(q->cluster_size); 1452af68c51aSLan Tianyu if (!q->cluster_data) { 1453e184700aSLan Tianyu pr_warning("cluster data malloc error"); 1454af68c51aSLan Tianyu goto free_header; 1455af68c51aSLan Tianyu } 1456af68c51aSLan Tianyu 1457af68c51aSLan Tianyu q->cluster_cache = malloc(q->cluster_size); 1458af68c51aSLan Tianyu if (!q->cluster_cache) { 1459e184700aSLan Tianyu pr_warning("cluster cache malloc error"); 1460af68c51aSLan Tianyu goto free_cluster_data; 1461af68c51aSLan Tianyu } 146286835cedSPrasad Joshi 146386835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 1464af68c51aSLan Tianyu goto free_cluster_cache; 146586835cedSPrasad Joshi 14667d22135fSAsias He /* 14677d22135fSAsias He * Do not use mmap use read/write instead 14687d22135fSAsias He */ 1469f10860caSPekka Enberg if (readonly) 147038c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR); 1471f10860caSPekka Enberg else 147238c396e4SSasha Levin disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR); 1473f10860caSPekka Enberg 147486835cedSPrasad Joshi if (!disk_image) 1475af68c51aSLan Tianyu goto free_l1_table; 1476f41a132bSSasha Levin 1477f41a132bSSasha Levin disk_image->async = 1; 147886835cedSPrasad Joshi disk_image->priv = q; 147986835cedSPrasad Joshi 148086835cedSPrasad Joshi return disk_image; 148186835cedSPrasad Joshi 1482af68c51aSLan Tianyu free_l1_table: 1483af68c51aSLan Tianyu if (q->table.l1_table) 14846c6f79b6SPrasad Joshi free(q->table.l1_table); 1485af68c51aSLan Tianyu free_cluster_cache: 1486af68c51aSLan Tianyu if (q->cluster_cache) 1487af68c51aSLan Tianyu free(q->cluster_cache); 1488af68c51aSLan Tianyu free_cluster_data: 1489af68c51aSLan Tianyu if (q->cluster_data) 1490af68c51aSLan Tianyu free(q->cluster_data); 1491af68c51aSLan Tianyu free_header: 1492af68c51aSLan Tianyu if (q->header) 149386835cedSPrasad Joshi free(q->header); 1494af68c51aSLan Tianyu free_qcow: 149586835cedSPrasad Joshi free(q); 149686835cedSPrasad Joshi 149786835cedSPrasad Joshi return NULL; 149886835cedSPrasad Joshi } 149986835cedSPrasad Joshi 1500ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 150186835cedSPrasad Joshi { 1502ad627d62SPekka Enberg struct qcow1_header_disk f_header; 150386835cedSPrasad Joshi 1504ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 1505ad627d62SPekka Enberg return false; 150686835cedSPrasad Joshi 1507ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1508ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 150986835cedSPrasad Joshi 1510ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 1511ad627d62SPekka Enberg return false; 151286835cedSPrasad Joshi 1513ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 1514ad627d62SPekka Enberg return false; 151586835cedSPrasad Joshi 1516ad627d62SPekka Enberg return true; 151786835cedSPrasad Joshi } 151886835cedSPrasad Joshi 1519f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 152086835cedSPrasad Joshi { 1521ad627d62SPekka Enberg if (qcow1_check_image(fd)) 1522f10860caSPekka Enberg return qcow1_probe(fd, readonly); 1523ad627d62SPekka Enberg 1524ad627d62SPekka Enberg if (qcow2_check_image(fd)) 1525f10860caSPekka Enberg return qcow2_probe(fd, readonly); 1526ad627d62SPekka Enberg 1527ad627d62SPekka Enberg return NULL; 152886835cedSPrasad Joshi } 1529