186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 1586835cedSPrasad Joshi 1686835cedSPrasad Joshi #include <linux/byteorder.h> 17865c675fSPrasad Joshi #include <linux/kernel.h> 180df6b4d9SPekka Enberg #include <linux/types.h> 1986835cedSPrasad Joshi 20e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new) 213309045fSPrasad Joshi { 223309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 233309045fSPrasad Joshi u64 offset = new->offset; 243309045fSPrasad Joshi 253309045fSPrasad Joshi /* search the tree */ 263309045fSPrasad Joshi while (*link) { 27473d58ffSPekka Enberg struct qcow_l2_table *t; 283309045fSPrasad Joshi 29473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 303309045fSPrasad Joshi if (!t) 313309045fSPrasad Joshi goto error; 323309045fSPrasad Joshi 333309045fSPrasad Joshi parent = *link; 343309045fSPrasad Joshi 353309045fSPrasad Joshi if (t->offset > offset) 363309045fSPrasad Joshi link = &(*link)->rb_left; 373309045fSPrasad Joshi else if (t->offset < offset) 383309045fSPrasad Joshi link = &(*link)->rb_right; 393309045fSPrasad Joshi else 403309045fSPrasad Joshi goto out; 413309045fSPrasad Joshi } 423309045fSPrasad Joshi 433309045fSPrasad Joshi /* add new node */ 443309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 453309045fSPrasad Joshi rb_insert_color(&new->node, root); 463309045fSPrasad Joshi out: 473309045fSPrasad Joshi return 0; 483309045fSPrasad Joshi error: 493309045fSPrasad Joshi return -1; 503309045fSPrasad Joshi } 513309045fSPrasad Joshi 52e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset) 533309045fSPrasad Joshi { 543309045fSPrasad Joshi struct rb_node *link = root->rb_node; 553309045fSPrasad Joshi 563309045fSPrasad Joshi while (link) { 57473d58ffSPekka Enberg struct qcow_l2_table *t; 583309045fSPrasad Joshi 59473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 603309045fSPrasad Joshi if (!t) 613309045fSPrasad Joshi goto out; 623309045fSPrasad Joshi 633309045fSPrasad Joshi if (t->offset > offset) 643309045fSPrasad Joshi link = link->rb_left; 653309045fSPrasad Joshi else if (t->offset < offset) 663309045fSPrasad Joshi link = link->rb_right; 673309045fSPrasad Joshi else 683309045fSPrasad Joshi return t; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi out: 713309045fSPrasad Joshi return NULL; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi 74e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t) 753309045fSPrasad Joshi { 767b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 773309045fSPrasad Joshi struct list_head *pos, *n; 78473d58ffSPekka Enberg struct qcow_l2_table *t; 793309045fSPrasad Joshi 807b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 813309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 823309045fSPrasad Joshi list_del(pos); 83473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 843309045fSPrasad Joshi rb_erase(&t->node, r); 853309045fSPrasad Joshi 863309045fSPrasad Joshi /* Free the cached node */ 873309045fSPrasad Joshi free(t); 883309045fSPrasad Joshi } 893309045fSPrasad Joshi } 903309045fSPrasad Joshi 91a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 92a4e46515SPekka Enberg { 93a4e46515SPekka Enberg struct qcow_header *header = q->header; 94a4e46515SPekka Enberg u64 size; 95a4e46515SPekka Enberg 96aff88976SPekka Enberg if (!c->dirty) 97aff88976SPekka Enberg return 0; 98aff88976SPekka Enberg 99a4e46515SPekka Enberg size = 1 << header->l2_bits; 100a4e46515SPekka Enberg 101aff88976SPekka Enberg if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0) 102aff88976SPekka Enberg return -1; 103aff88976SPekka Enberg 104aff88976SPekka Enberg c->dirty = 0; 105aff88976SPekka Enberg 106aff88976SPekka Enberg return 0; 107a4e46515SPekka Enberg } 108a4e46515SPekka Enberg 109473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1103309045fSPrasad Joshi { 1117b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 1127b4eb530SPekka Enberg struct rb_root *r = &l1t->root; 113473d58ffSPekka Enberg struct qcow_l2_table *lru; 1143309045fSPrasad Joshi 1157b4eb530SPekka Enberg if (l1t->nr_cached == MAX_CACHE_NODES) { 1163309045fSPrasad Joshi /* 1173309045fSPrasad Joshi * The node at the head of the list is least recently used 1183309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1193309045fSPrasad Joshi */ 1207b4eb530SPekka Enberg lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list); 1213309045fSPrasad Joshi 122a4e46515SPekka Enberg if (qcow_l2_cache_write(q, lru) < 0) 123a4e46515SPekka Enberg goto error; 124a4e46515SPekka Enberg 1253309045fSPrasad Joshi /* Remove the node from the cache */ 1263309045fSPrasad Joshi rb_erase(&lru->node, r); 1273309045fSPrasad Joshi list_del_init(&lru->list); 1287b4eb530SPekka Enberg l1t->nr_cached--; 1293309045fSPrasad Joshi 1303309045fSPrasad Joshi /* Free the LRUed node */ 1313309045fSPrasad Joshi free(lru); 1323309045fSPrasad Joshi } 1333309045fSPrasad Joshi 1343309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 135e94cdf08SPekka Enberg if (l2_table_insert(r, c) < 0) 1363309045fSPrasad Joshi goto error; 1373309045fSPrasad Joshi 1383309045fSPrasad Joshi /* Add in LRU replacement list */ 1397b4eb530SPekka Enberg list_add_tail(&c->list, &l1t->lru_list); 1407b4eb530SPekka Enberg l1t->nr_cached++; 1413309045fSPrasad Joshi 1423309045fSPrasad Joshi return 0; 1433309045fSPrasad Joshi error: 1443309045fSPrasad Joshi return -1; 1453309045fSPrasad Joshi } 1463309045fSPrasad Joshi 147e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset) 1483309045fSPrasad Joshi { 1497b4eb530SPekka Enberg struct qcow_l1_table *l1t = &q->table; 150fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1513309045fSPrasad Joshi 152e94cdf08SPekka Enberg l2t = l2_table_lookup(&l1t->root, offset); 153fe8bdde0SPekka Enberg if (!l2t) 154fe8bdde0SPekka Enberg return NULL; 1553309045fSPrasad Joshi 1563309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 1577b4eb530SPekka Enberg list_move_tail(&l2t->list, &l1t->lru_list); 1583309045fSPrasad Joshi 159fe8bdde0SPekka Enberg return l2t; 1603309045fSPrasad Joshi } 1613309045fSPrasad Joshi 1623309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 163473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1643309045fSPrasad Joshi { 1653309045fSPrasad Joshi struct qcow_header *header = q->header; 166473d58ffSPekka Enberg struct qcow_l2_table *c; 1673309045fSPrasad Joshi u64 l2t_sz; 1683309045fSPrasad Joshi u64 size; 1693309045fSPrasad Joshi 1703309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1713309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1723309045fSPrasad Joshi c = calloc(1, size); 1733309045fSPrasad Joshi if (!c) 1743309045fSPrasad Joshi goto out; 1753309045fSPrasad Joshi 1763309045fSPrasad Joshi c->offset = offset; 1773309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1783309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1793309045fSPrasad Joshi out: 1803309045fSPrasad Joshi return c; 1813309045fSPrasad Joshi } 1823309045fSPrasad Joshi 183742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 18486835cedSPrasad Joshi { 185ad627d62SPekka Enberg struct qcow_header *header = q->header; 18686835cedSPrasad Joshi 18786835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 18886835cedSPrasad Joshi } 18986835cedSPrasad Joshi 190742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 19186835cedSPrasad Joshi { 192ad627d62SPekka Enberg struct qcow_header *header = q->header; 19386835cedSPrasad Joshi 19486835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 19586835cedSPrasad Joshi } 19686835cedSPrasad Joshi 197742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 19886835cedSPrasad Joshi { 199ad627d62SPekka Enberg struct qcow_header *header = q->header; 20086835cedSPrasad Joshi 20186835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 20286835cedSPrasad Joshi } 20386835cedSPrasad Joshi 204fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 2053309045fSPrasad Joshi { 2063309045fSPrasad Joshi struct qcow_header *header = q->header; 207fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 2083309045fSPrasad Joshi u64 size; 2093309045fSPrasad Joshi 2103309045fSPrasad Joshi size = 1 << header->l2_bits; 2113309045fSPrasad Joshi 2123309045fSPrasad Joshi /* search an entry for offset in cache */ 213e94cdf08SPekka Enberg l2t = l2_table_search(q, offset); 214fe8bdde0SPekka Enberg if (l2t) 215fe8bdde0SPekka Enberg return l2t; 2163309045fSPrasad Joshi 2173309045fSPrasad Joshi /* allocate new node for caching l2 table */ 218fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 219fe8bdde0SPekka Enberg if (!l2t) 2203309045fSPrasad Joshi goto error; 2213309045fSPrasad Joshi 2223309045fSPrasad Joshi /* table not cached: read from the disk */ 223fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2243309045fSPrasad Joshi goto error; 2253309045fSPrasad Joshi 2263309045fSPrasad Joshi /* cache the table */ 227fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2283309045fSPrasad Joshi goto error; 2293309045fSPrasad Joshi 230fe8bdde0SPekka Enberg return l2t; 2313309045fSPrasad Joshi error: 232fe8bdde0SPekka Enberg free(l2t); 233fe8bdde0SPekka Enberg return NULL; 2343309045fSPrasad Joshi } 2353309045fSPrasad Joshi 236b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 23786835cedSPrasad Joshi { 238ad627d62SPekka Enberg struct qcow_header *header = q->header; 2393fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 2403fb67b93SPekka Enberg struct qcow_l2_table *l2t; 2413dac48d4SPrasad Joshi u64 cluster_size; 242742fce76SPrasad Joshi u64 clust_offset; 243742fce76SPrasad Joshi u64 clust_start; 2443fb67b93SPekka Enberg u64 l2t_offset; 245a51948ceSPekka Enberg size_t length; 2463fb67b93SPekka Enberg u64 l2t_size; 247742fce76SPrasad Joshi u64 l1_idx; 248742fce76SPrasad Joshi u64 l2_idx; 24986835cedSPrasad Joshi 250dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 25186835cedSPrasad Joshi 252c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2533fb67b93SPekka Enberg if (l1_idx >= l1t->table_size) 254c0799eb9SPekka Enberg return -1; 25586835cedSPrasad Joshi 2563dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2573dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 258c0799eb9SPekka Enberg return -1; 2593dac48d4SPrasad Joshi 2603dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2613dac48d4SPrasad Joshi if (length > dst_len) 2623dac48d4SPrasad Joshi length = dst_len; 2633dac48d4SPrasad Joshi 264c0799eb9SPekka Enberg mutex_lock(&q->mutex); 265b2ebe61bSPekka Enberg 2663fb67b93SPekka Enberg l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]); 2673fb67b93SPekka Enberg if (l2t_offset & QCOW_OFLAG_COMPRESSED) { 268b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 269b2ebe61bSPekka Enberg goto out_error; 270b2ebe61bSPekka Enberg } 271b2ebe61bSPekka Enberg 2723fb67b93SPekka Enberg l2t_offset &= QCOW_OFFSET_MASK; 2733fb67b93SPekka Enberg if (!l2t_offset) 2743dac48d4SPrasad Joshi goto zero_cluster; 27586835cedSPrasad Joshi 2763fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 27786835cedSPrasad Joshi 2783309045fSPrasad Joshi /* read and cache level 2 table */ 2793fb67b93SPekka Enberg l2t = qcow_read_l2_table(q, l2t_offset); 2803fb67b93SPekka Enberg if (!l2t) 281b6edb0ecSSasha Levin goto out_error; 28286835cedSPrasad Joshi 283c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 2843fb67b93SPekka Enberg if (l2_idx >= l2t_size) 285b6edb0ecSSasha Levin goto out_error; 28686835cedSPrasad Joshi 2873fb67b93SPekka Enberg clust_start = be64_to_cpu(l2t->table[l2_idx]); 288b2ebe61bSPekka Enberg if (clust_start & QCOW_OFLAG_COMPRESSED) { 289b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 290b2ebe61bSPekka Enberg goto out_error; 291b2ebe61bSPekka Enberg } 292b2ebe61bSPekka Enberg 293b2ebe61bSPekka Enberg clust_start &= QCOW_OFFSET_MASK; 29486835cedSPrasad Joshi if (!clust_start) 2953dac48d4SPrasad Joshi goto zero_cluster; 29686835cedSPrasad Joshi 297c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 29886835cedSPrasad Joshi 299c0799eb9SPekka Enberg if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 300c0799eb9SPekka Enberg return -1; 301c0799eb9SPekka Enberg 3023dac48d4SPrasad Joshi return length; 30386835cedSPrasad Joshi 304179b71f0SPekka Enberg zero_cluster: 305c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 306179b71f0SPekka Enberg memset(dst, 0, length); 307c0799eb9SPekka Enberg return length; 308179b71f0SPekka Enberg 30986835cedSPrasad Joshi out_error: 310c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 311179b71f0SPekka Enberg length = -1; 312c0799eb9SPekka Enberg return -1; 3133dac48d4SPrasad Joshi } 314b6edb0ecSSasha Levin 315b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 3163dac48d4SPrasad Joshi { 31743835ac9SSasha Levin struct qcow *q = disk->priv; 318ad627d62SPekka Enberg struct qcow_header *header = q->header; 319d8eea993SPekka Enberg u32 nr_read; 3200df6b4d9SPekka Enberg u64 offset; 3210df6b4d9SPekka Enberg char *buf; 3223dac48d4SPrasad Joshi u32 nr; 3233dac48d4SPrasad Joshi 3240df6b4d9SPekka Enberg buf = dst; 325d8eea993SPekka Enberg nr_read = 0; 3260df6b4d9SPekka Enberg 327d8eea993SPekka Enberg while (nr_read < dst_len) { 3283dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 3293dac48d4SPrasad Joshi if (offset >= header->size) 3300df6b4d9SPekka Enberg return -1; 3313dac48d4SPrasad Joshi 332b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 333a51948ceSPekka Enberg if (nr <= 0) 3340df6b4d9SPekka Enberg return -1; 3353dac48d4SPrasad Joshi 336d8eea993SPekka Enberg nr_read += nr; 3373dac48d4SPrasad Joshi buf += nr; 3383dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3393dac48d4SPrasad Joshi } 3400df6b4d9SPekka Enberg 34172133dd2SAsias He return dst_len; 34286835cedSPrasad Joshi } 34386835cedSPrasad Joshi 344865c675fSPrasad Joshi static inline u64 file_size(int fd) 345865c675fSPrasad Joshi { 346865c675fSPrasad Joshi struct stat st; 3470df6b4d9SPekka Enberg 348865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 349865c675fSPrasad Joshi return 0; 3500df6b4d9SPekka Enberg 351865c675fSPrasad Joshi return st.st_size; 352865c675fSPrasad Joshi } 353865c675fSPrasad Joshi 3540df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 355865c675fSPrasad Joshi { 356865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 357865c675fSPrasad Joshi return -1; 3580df6b4d9SPekka Enberg 3597d94a719SPekka Enberg return fdatasync(fd); 360865c675fSPrasad Joshi } 361865c675fSPrasad Joshi 362865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 363b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 364865c675fSPrasad Joshi { 365865c675fSPrasad Joshi struct qcow_header *header = q->header; 366865c675fSPrasad Joshi u64 clust_sz; 367865c675fSPrasad Joshi u64 f_sz; 3680df6b4d9SPekka Enberg u64 off; 3690df6b4d9SPekka Enberg u64 sz; 370865c675fSPrasad Joshi 371865c675fSPrasad Joshi f_sz = file_size(q->fd); 372865c675fSPrasad Joshi if (!f_sz) 373865c675fSPrasad Joshi return 0; 374865c675fSPrasad Joshi 375865c675fSPrasad Joshi sz = 1 << header->l2_bits; 376865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 377865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 378865c675fSPrasad Joshi 3796fe151aeSPekka Enberg if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) 380865c675fSPrasad Joshi return 0; 3810df6b4d9SPekka Enberg 382865c675fSPrasad Joshi return off; 383865c675fSPrasad Joshi } 384865c675fSPrasad Joshi 385*3ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft) 386*3ecac800SPekka Enberg { 387*3ecac800SPekka Enberg struct rb_root *r = &rft->root; 388*3ecac800SPekka Enberg struct list_head *pos, *n; 389*3ecac800SPekka Enberg struct qcow_refcount_block *t; 390*3ecac800SPekka Enberg 391*3ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 392*3ecac800SPekka Enberg list_del(pos); 393*3ecac800SPekka Enberg t = list_entry(pos, struct qcow_refcount_block, list); 394*3ecac800SPekka Enberg rb_erase(&t->node, r); 395*3ecac800SPekka Enberg 396*3ecac800SPekka Enberg free(t); 397*3ecac800SPekka Enberg } 398*3ecac800SPekka Enberg } 399*3ecac800SPekka Enberg 400*3ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new) 401*3ecac800SPekka Enberg { 402*3ecac800SPekka Enberg struct rb_node **link = &(root->rb_node), *parent = NULL; 403*3ecac800SPekka Enberg u64 offset = new->offset; 404*3ecac800SPekka Enberg 405*3ecac800SPekka Enberg /* search the tree */ 406*3ecac800SPekka Enberg while (*link) { 407*3ecac800SPekka Enberg struct qcow_refcount_block *t; 408*3ecac800SPekka Enberg 409*3ecac800SPekka Enberg t = rb_entry(*link, struct qcow_refcount_block, node); 410*3ecac800SPekka Enberg if (!t) 411*3ecac800SPekka Enberg goto error; 412*3ecac800SPekka Enberg 413*3ecac800SPekka Enberg parent = *link; 414*3ecac800SPekka Enberg 415*3ecac800SPekka Enberg if (t->offset > offset) 416*3ecac800SPekka Enberg link = &(*link)->rb_left; 417*3ecac800SPekka Enberg else if (t->offset < offset) 418*3ecac800SPekka Enberg link = &(*link)->rb_right; 419*3ecac800SPekka Enberg else 420*3ecac800SPekka Enberg goto out; 421*3ecac800SPekka Enberg } 422*3ecac800SPekka Enberg 423*3ecac800SPekka Enberg /* add new node */ 424*3ecac800SPekka Enberg rb_link_node(&new->node, parent, link); 425*3ecac800SPekka Enberg rb_insert_color(&new->node, root); 426*3ecac800SPekka Enberg out: 427*3ecac800SPekka Enberg return 0; 428*3ecac800SPekka Enberg error: 429*3ecac800SPekka Enberg return -1; 430*3ecac800SPekka Enberg } 431*3ecac800SPekka Enberg 432*3ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb) 433*3ecac800SPekka Enberg { 434*3ecac800SPekka Enberg if (!rfb->dirty) 435*3ecac800SPekka Enberg return 0; 436*3ecac800SPekka Enberg 437*3ecac800SPekka Enberg if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0) 438*3ecac800SPekka Enberg return -1; 439*3ecac800SPekka Enberg 440*3ecac800SPekka Enberg rfb->dirty = 0; 441*3ecac800SPekka Enberg 442*3ecac800SPekka Enberg return 0; 443*3ecac800SPekka Enberg } 444*3ecac800SPekka Enberg 445*3ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c) 446*3ecac800SPekka Enberg { 447*3ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 448*3ecac800SPekka Enberg struct rb_root *r = &rft->root; 449*3ecac800SPekka Enberg struct qcow_refcount_block *lru; 450*3ecac800SPekka Enberg 451*3ecac800SPekka Enberg if (rft->nr_cached == MAX_CACHE_NODES) { 452*3ecac800SPekka Enberg lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list); 453*3ecac800SPekka Enberg 454*3ecac800SPekka Enberg if (write_refcount_block(q, lru) < 0) 455*3ecac800SPekka Enberg goto error; 456*3ecac800SPekka Enberg 457*3ecac800SPekka Enberg rb_erase(&lru->node, r); 458*3ecac800SPekka Enberg list_del_init(&lru->list); 459*3ecac800SPekka Enberg rft->nr_cached--; 460*3ecac800SPekka Enberg 461*3ecac800SPekka Enberg free(lru); 462*3ecac800SPekka Enberg } 463*3ecac800SPekka Enberg 464*3ecac800SPekka Enberg if (refcount_block_insert(r, c) < 0) 465*3ecac800SPekka Enberg goto error; 466*3ecac800SPekka Enberg 467*3ecac800SPekka Enberg list_add_tail(&c->list, &rft->lru_list); 468*3ecac800SPekka Enberg rft->nr_cached++; 469*3ecac800SPekka Enberg 470*3ecac800SPekka Enberg return 0; 471*3ecac800SPekka Enberg error: 472*3ecac800SPekka Enberg return -1; 473*3ecac800SPekka Enberg } 474*3ecac800SPekka Enberg 475*3ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset) 476*3ecac800SPekka Enberg { 477*3ecac800SPekka Enberg struct qcow_header *header = q->header; 478*3ecac800SPekka Enberg struct qcow_refcount_block *rfb; 479*3ecac800SPekka Enberg u64 cluster_size; 480*3ecac800SPekka Enberg 481*3ecac800SPekka Enberg cluster_size = 1 << header->cluster_bits; 482*3ecac800SPekka Enberg 483*3ecac800SPekka Enberg rfb = malloc(sizeof *rfb + cluster_size); 484*3ecac800SPekka Enberg if (!rfb) 485*3ecac800SPekka Enberg return NULL; 486*3ecac800SPekka Enberg 487*3ecac800SPekka Enberg rfb->offset = rfb_offset; 488*3ecac800SPekka Enberg rfb->size = cluster_size / sizeof(u16); 489*3ecac800SPekka Enberg RB_CLEAR_NODE(&rfb->node); 490*3ecac800SPekka Enberg INIT_LIST_HEAD(&rfb->list); 491*3ecac800SPekka Enberg 492*3ecac800SPekka Enberg return rfb; 493*3ecac800SPekka Enberg } 494*3ecac800SPekka Enberg 495*3ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset) 496*3ecac800SPekka Enberg { 497*3ecac800SPekka Enberg struct rb_node *link = root->rb_node; 498*3ecac800SPekka Enberg 499*3ecac800SPekka Enberg while (link) { 500*3ecac800SPekka Enberg struct qcow_refcount_block *t; 501*3ecac800SPekka Enberg 502*3ecac800SPekka Enberg t = rb_entry(link, struct qcow_refcount_block, node); 503*3ecac800SPekka Enberg if (!t) 504*3ecac800SPekka Enberg goto out; 505*3ecac800SPekka Enberg 506*3ecac800SPekka Enberg if (t->offset > offset) 507*3ecac800SPekka Enberg link = link->rb_left; 508*3ecac800SPekka Enberg else if (t->offset < offset) 509*3ecac800SPekka Enberg link = link->rb_right; 510*3ecac800SPekka Enberg else 511*3ecac800SPekka Enberg return t; 512*3ecac800SPekka Enberg } 513*3ecac800SPekka Enberg out: 514*3ecac800SPekka Enberg return NULL; 515*3ecac800SPekka Enberg } 516*3ecac800SPekka Enberg 517*3ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset) 518*3ecac800SPekka Enberg { 519*3ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 520*3ecac800SPekka Enberg struct qcow_refcount_block *rfb; 521*3ecac800SPekka Enberg 522*3ecac800SPekka Enberg rfb = refcount_block_lookup(&rft->root, offset); 523*3ecac800SPekka Enberg if (!rfb) 524*3ecac800SPekka Enberg return NULL; 525*3ecac800SPekka Enberg 526*3ecac800SPekka Enberg /* Update the LRU state, by moving the searched node to list tail */ 527*3ecac800SPekka Enberg list_move_tail(&rfb->list, &rft->lru_list); 528*3ecac800SPekka Enberg 529*3ecac800SPekka Enberg return rfb; 530*3ecac800SPekka Enberg } 531*3ecac800SPekka Enberg 532*3ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx) 533*3ecac800SPekka Enberg { 534*3ecac800SPekka Enberg struct qcow_header *header = q->header; 535*3ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 536*3ecac800SPekka Enberg struct qcow_refcount_block *rfb; 537*3ecac800SPekka Enberg u64 rfb_offset; 538*3ecac800SPekka Enberg u64 rft_idx; 539*3ecac800SPekka Enberg 540*3ecac800SPekka Enberg rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT); 541*3ecac800SPekka Enberg if (rft_idx >= rft->rf_size) 542*3ecac800SPekka Enberg return NULL; 543*3ecac800SPekka Enberg 544*3ecac800SPekka Enberg rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]); 545*3ecac800SPekka Enberg 546*3ecac800SPekka Enberg rfb = refcount_block_search(q, rfb_offset); 547*3ecac800SPekka Enberg if (rfb) 548*3ecac800SPekka Enberg return rfb; 549*3ecac800SPekka Enberg 550*3ecac800SPekka Enberg rfb = new_refcount_block(q, rfb_offset); 551*3ecac800SPekka Enberg if (!rfb) 552*3ecac800SPekka Enberg return NULL; 553*3ecac800SPekka Enberg 554*3ecac800SPekka Enberg if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0) 555*3ecac800SPekka Enberg goto error_free_rfb; 556*3ecac800SPekka Enberg 557*3ecac800SPekka Enberg if (cache_refcount_block(q, rfb) < 0) 558*3ecac800SPekka Enberg goto error_free_rfb; 559*3ecac800SPekka Enberg 560*3ecac800SPekka Enberg return rfb; 561*3ecac800SPekka Enberg 562*3ecac800SPekka Enberg error_free_rfb: 563*3ecac800SPekka Enberg free(rfb); 564*3ecac800SPekka Enberg 565*3ecac800SPekka Enberg return NULL; 566*3ecac800SPekka Enberg } 567*3ecac800SPekka Enberg 568865c675fSPrasad Joshi /* 569865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 570865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 5710df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 572865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 573865c675fSPrasad Joshi * 574865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 575865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 576865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 577865c675fSPrasad Joshi */ 578b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 579865c675fSPrasad Joshi { 580865c675fSPrasad Joshi struct qcow_header *header = q->header; 5813fb67b93SPekka Enberg struct qcow_l1_table *l1t = &q->table; 582fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 5830df6b4d9SPekka Enberg u64 clust_start; 584*3ecac800SPekka Enberg u64 clust_flags; 5853fb67b93SPekka Enberg u64 l2t_offset; 5860df6b4d9SPekka Enberg u64 clust_off; 5873fb67b93SPekka Enberg u64 l2t_size; 588865c675fSPrasad Joshi u64 clust_sz; 589865c675fSPrasad Joshi u64 l1t_idx; 590865c675fSPrasad Joshi u64 l2t_idx; 591865c675fSPrasad Joshi u64 f_sz; 5920df6b4d9SPekka Enberg u64 len; 593865c675fSPrasad Joshi 594fe8bdde0SPekka Enberg l2t = NULL; 5953fb67b93SPekka Enberg l2t_size = 1 << header->l2_bits; 596865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 597865c675fSPrasad Joshi 598865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 5993fb67b93SPekka Enberg if (l1t_idx >= l1t->table_size) 600c0799eb9SPekka Enberg return -1; 601865c675fSPrasad Joshi 602865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 6033fb67b93SPekka Enberg if (l2t_idx >= l2t_size) 604c0799eb9SPekka Enberg return -1; 605865c675fSPrasad Joshi 606865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 607865c675fSPrasad Joshi if (clust_off >= clust_sz) 608c0799eb9SPekka Enberg return -1; 609865c675fSPrasad Joshi 610865c675fSPrasad Joshi len = clust_sz - clust_off; 611865c675fSPrasad Joshi if (len > src_len) 612865c675fSPrasad Joshi len = src_len; 613865c675fSPrasad Joshi 614c0799eb9SPekka Enberg mutex_lock(&q->mutex); 615c0799eb9SPekka Enberg 6163fb67b93SPekka Enberg l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]); 6173fb67b93SPekka Enberg if (l2t_offset & QCOW_OFLAG_COMPRESSED) { 618121dd76eSPekka Enberg pr_warning("compressed clusters are not supported"); 619121dd76eSPekka Enberg goto error; 620121dd76eSPekka Enberg } 6213fb67b93SPekka Enberg if (!(l2t_offset & QCOW_OFLAG_COPIED)) { 622*3ecac800SPekka Enberg pr_warning("L2 copy-on-write clusters are not supported"); 623b2ebe61bSPekka Enberg goto error; 624b2ebe61bSPekka Enberg } 625b2ebe61bSPekka Enberg 6263fb67b93SPekka Enberg l2t_offset &= QCOW_OFFSET_MASK; 6273fb67b93SPekka Enberg if (l2t_offset) { 6283309045fSPrasad Joshi /* read and cache l2 table */ 6293fb67b93SPekka Enberg l2t = qcow_read_l2_table(q, l2t_offset); 630fe8bdde0SPekka Enberg if (!l2t) 6313309045fSPrasad Joshi goto error; 632865c675fSPrasad Joshi } else { 6333fb67b93SPekka Enberg l2t = new_cache_table(q, l2t_offset); 634fe8bdde0SPekka Enberg if (!l2t) 6353309045fSPrasad Joshi goto error; 6363309045fSPrasad Joshi 6370df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 638865c675fSPrasad Joshi f_sz = file_size(q->fd); 639865c675fSPrasad Joshi if (!f_sz) 6403309045fSPrasad Joshi goto free_cache; 641865c675fSPrasad Joshi 642865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 6433fb67b93SPekka Enberg l2t_offset = qcow_write_l2_table(q, l2t->table); 6443fb67b93SPekka Enberg if (!l2t_offset) 6453309045fSPrasad Joshi goto free_cache; 646865c675fSPrasad Joshi 647fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 6483309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 6493309045fSPrasad Joshi goto free_cache; 6503309045fSPrasad Joshi 6513309045fSPrasad Joshi goto free_cache; 652865c675fSPrasad Joshi } 653865c675fSPrasad Joshi 6540df6b4d9SPekka Enberg /* Update the in-core entry */ 6553fb67b93SPekka Enberg l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset); 656865c675fSPrasad Joshi } 657865c675fSPrasad Joshi 6580df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 659865c675fSPrasad Joshi f_sz = file_size(q->fd); 660865c675fSPrasad Joshi if (!f_sz) 6613309045fSPrasad Joshi goto error; 662865c675fSPrasad Joshi 663b2ebe61bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]); 664*3ecac800SPekka Enberg 665*3ecac800SPekka Enberg clust_flags = clust_start & QCOW_OFLAGS_MASK; 666*3ecac800SPekka Enberg if (clust_flags & QCOW_OFLAG_COMPRESSED) { 667121dd76eSPekka Enberg pr_warning("compressed clusters are not supported"); 668121dd76eSPekka Enberg goto error; 669121dd76eSPekka Enberg } 670b2ebe61bSPekka Enberg 671b2ebe61bSPekka Enberg clust_start &= QCOW_OFFSET_MASK; 672865c675fSPrasad Joshi if (!clust_start) { 673865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 674*3ecac800SPekka Enberg l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW_OFLAG_COPIED); 675aff88976SPekka Enberg l2t->dirty = 1; 676865c675fSPrasad Joshi } 6770df6b4d9SPekka Enberg 678*3ecac800SPekka Enberg if (!(clust_flags & QCOW_OFLAG_COPIED)) { 679*3ecac800SPekka Enberg struct qcow_refcount_block *rfb = NULL; 680*3ecac800SPekka Enberg u16 clust_refcount; 681*3ecac800SPekka Enberg u64 clust_idx; 682*3ecac800SPekka Enberg u64 rfb_idx; 683*3ecac800SPekka Enberg 684*3ecac800SPekka Enberg clust_idx = (clust_start & QCOW_OFFSET_MASK) >> (header->cluster_bits); 685*3ecac800SPekka Enberg 686*3ecac800SPekka Enberg rfb = qcow_read_refcount_block(q, clust_idx); 687*3ecac800SPekka Enberg if (!rfb) { 688*3ecac800SPekka Enberg pr_warning("L1: error while reading refcount table"); 689*3ecac800SPekka Enberg goto error; 690*3ecac800SPekka Enberg } 691*3ecac800SPekka Enberg 692*3ecac800SPekka Enberg rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1)); 693*3ecac800SPekka Enberg if (rfb_idx >= rfb->size) { 694*3ecac800SPekka Enberg pr_warning("L1: refcount block index out of bounds"); 695*3ecac800SPekka Enberg goto error; 696*3ecac800SPekka Enberg } 697*3ecac800SPekka Enberg 698*3ecac800SPekka Enberg clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]); 699*3ecac800SPekka Enberg if (!clust_refcount) { 700*3ecac800SPekka Enberg clust_refcount = 1; 701*3ecac800SPekka Enberg rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount); 702*3ecac800SPekka Enberg rfb->dirty = 1; 703*3ecac800SPekka Enberg } 704*3ecac800SPekka Enberg 705*3ecac800SPekka Enberg if (clust_refcount > 1) { 706*3ecac800SPekka Enberg pr_warning("L1 copy-on-write clusters are not supported"); 707*3ecac800SPekka Enberg goto error; 708*3ecac800SPekka Enberg } 709*3ecac800SPekka Enberg } 710*3ecac800SPekka Enberg 711c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 712c0799eb9SPekka Enberg 713a4e46515SPekka Enberg /* Write actual data */ 714a4e46515SPekka Enberg if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 715a4e46515SPekka Enberg return -1; 716a4e46515SPekka Enberg 717865c675fSPrasad Joshi return len; 7183309045fSPrasad Joshi 7193309045fSPrasad Joshi free_cache: 720fe8bdde0SPekka Enberg free(l2t); 721865c675fSPrasad Joshi error: 722c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 723865c675fSPrasad Joshi return -1; 724865c675fSPrasad Joshi } 725865c675fSPrasad Joshi 726b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 72786835cedSPrasad Joshi { 728865c675fSPrasad Joshi struct qcow *q = disk->priv; 729865c675fSPrasad Joshi struct qcow_header *header = q->header; 730c4acb611SIngo Molnar u32 nr_written; 7310df6b4d9SPekka Enberg char *buf; 732865c675fSPrasad Joshi u64 offset; 733865c675fSPrasad Joshi ssize_t nr; 734865c675fSPrasad Joshi 7350df6b4d9SPekka Enberg buf = src; 7360df6b4d9SPekka Enberg nr_written = 0; 737865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 7380df6b4d9SPekka Enberg 7390df6b4d9SPekka Enberg while (nr_written < src_len) { 740865c675fSPrasad Joshi if (offset >= header->size) 7410df6b4d9SPekka Enberg return -1; 742865c675fSPrasad Joshi 743b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 744865c675fSPrasad Joshi if (nr < 0) 7450df6b4d9SPekka Enberg return -1; 746865c675fSPrasad Joshi 7470df6b4d9SPekka Enberg nr_written += nr; 748865c675fSPrasad Joshi buf += nr; 749865c675fSPrasad Joshi offset += nr; 750865c675fSPrasad Joshi } 7510df6b4d9SPekka Enberg 75272133dd2SAsias He return nr_written; 75386835cedSPrasad Joshi } 75486835cedSPrasad Joshi 755b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 756f10860caSPekka Enberg { 757f10860caSPekka Enberg /* I/O error */ 758b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 759f10860caSPekka Enberg return -1; 760f10860caSPekka Enberg } 761f10860caSPekka Enberg 762659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 763659f4186SPekka Enberg { 76473984b11SPekka Enberg struct qcow *q = disk->priv; 765*3ecac800SPekka Enberg struct qcow_refcount_table *rft; 76673984b11SPekka Enberg struct qcow_header *header; 767a4e46515SPekka Enberg struct list_head *pos, *n; 7687b4eb530SPekka Enberg struct qcow_l1_table *l1t; 76973984b11SPekka Enberg 77073984b11SPekka Enberg header = q->header; 7717b4eb530SPekka Enberg l1t = &q->table; 772*3ecac800SPekka Enberg rft = &q->refcount_table; 77373984b11SPekka Enberg 774a4e46515SPekka Enberg mutex_lock(&q->mutex); 775a4e46515SPekka Enberg 776*3ecac800SPekka Enberg list_for_each_safe(pos, n, &rft->lru_list) { 777*3ecac800SPekka Enberg struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list); 778*3ecac800SPekka Enberg 779*3ecac800SPekka Enberg if (write_refcount_block(q, c) < 0) 780*3ecac800SPekka Enberg goto error_unlock; 781*3ecac800SPekka Enberg } 782*3ecac800SPekka Enberg 783*3ecac800SPekka Enberg if (fdatasync(disk->fd) < 0) 784*3ecac800SPekka Enberg goto error_unlock; 785*3ecac800SPekka Enberg 7867b4eb530SPekka Enberg list_for_each_safe(pos, n, &l1t->lru_list) { 787a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 788a4e46515SPekka Enberg 789a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 790a4e46515SPekka Enberg goto error_unlock; 791a4e46515SPekka Enberg } 792a4e46515SPekka Enberg 793a4e46515SPekka Enberg if (fdatasync(disk->fd) < 0) 794a4e46515SPekka Enberg goto error_unlock; 795a4e46515SPekka Enberg 7967b4eb530SPekka Enberg if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0) 797a4e46515SPekka Enberg goto error_unlock; 798a4e46515SPekka Enberg 799a4e46515SPekka Enberg mutex_unlock(&q->mutex); 80073984b11SPekka Enberg 801659f4186SPekka Enberg return fsync(disk->fd); 802a4e46515SPekka Enberg 803a4e46515SPekka Enberg error_unlock: 804a4e46515SPekka Enberg mutex_unlock(&q->mutex); 805a4e46515SPekka Enberg return -1; 806659f4186SPekka Enberg } 807659f4186SPekka Enberg 808b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 80986835cedSPrasad Joshi { 81086835cedSPrasad Joshi struct qcow *q; 81186835cedSPrasad Joshi 81243835ac9SSasha Levin if (!disk) 81372133dd2SAsias He return 0; 81486835cedSPrasad Joshi 81543835ac9SSasha Levin q = disk->priv; 81686835cedSPrasad Joshi 817*3ecac800SPekka Enberg refcount_table_free_cache(&q->refcount_table); 818e94cdf08SPekka Enberg l1_table_free_cache(&q->table); 819*3ecac800SPekka Enberg free(q->refcount_table.rf_table); 8206c6f79b6SPrasad Joshi free(q->table.l1_table); 82186835cedSPrasad Joshi free(q->header); 82286835cedSPrasad Joshi free(q); 82372133dd2SAsias He 82472133dd2SAsias He return 0; 82586835cedSPrasad Joshi } 82686835cedSPrasad Joshi 827b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 828b1c84095SPekka Enberg .read_sector = qcow_read_sector, 829b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 830b1c84095SPekka Enberg .close = qcow_disk_close, 831f10860caSPekka Enberg }; 832f10860caSPekka Enberg 833b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 834b1c84095SPekka Enberg .read_sector = qcow_read_sector, 835b1c84095SPekka Enberg .write_sector = qcow_write_sector, 836659f4186SPekka Enberg .flush = qcow_disk_flush, 837b1c84095SPekka Enberg .close = qcow_disk_close, 83886835cedSPrasad Joshi }; 83986835cedSPrasad Joshi 840*3ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q) 841*3ecac800SPekka Enberg { 842*3ecac800SPekka Enberg struct qcow_header *header = q->header; 843*3ecac800SPekka Enberg struct qcow_refcount_table *rft = &q->refcount_table; 844*3ecac800SPekka Enberg u64 cluster_size; 845*3ecac800SPekka Enberg 846*3ecac800SPekka Enberg cluster_size = 1 << header->cluster_bits; 847*3ecac800SPekka Enberg 848*3ecac800SPekka Enberg rft->rf_size = (header->refcount_table_size * cluster_size) / sizeof(u64); 849*3ecac800SPekka Enberg 850*3ecac800SPekka Enberg rft->rf_table = calloc(rft->rf_size, sizeof(u64)); 851*3ecac800SPekka Enberg if (!rft->rf_table) 852*3ecac800SPekka Enberg return -1; 853*3ecac800SPekka Enberg 854*3ecac800SPekka Enberg rft->root = RB_ROOT; 855*3ecac800SPekka Enberg INIT_LIST_HEAD(&rft->lru_list); 856*3ecac800SPekka Enberg 857*3ecac800SPekka Enberg return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset); 858*3ecac800SPekka Enberg } 859*3ecac800SPekka Enberg 86086835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 86186835cedSPrasad Joshi { 862ad627d62SPekka Enberg struct qcow_header *header = q->header; 863473aaa2dSPekka Enberg struct qcow_l1_table *table = &q->table; 86486835cedSPrasad Joshi 865ad627d62SPekka Enberg table->table_size = header->l1_size; 86686835cedSPrasad Joshi 86700adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 86800adcc1bSPrasad Joshi if (!table->l1_table) 86986835cedSPrasad Joshi return -1; 87086835cedSPrasad Joshi 871659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 87286835cedSPrasad Joshi } 87386835cedSPrasad Joshi 874ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 87586835cedSPrasad Joshi { 876ad627d62SPekka Enberg struct qcow2_header_disk f_header; 877ad627d62SPekka Enberg struct qcow_header *header; 87886835cedSPrasad Joshi 879ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 88086835cedSPrasad Joshi if (!header) 88186835cedSPrasad Joshi return NULL; 88286835cedSPrasad Joshi 8830657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 8840657f33dSPrasad Joshi free(header); 88586835cedSPrasad Joshi return NULL; 8860657f33dSPrasad Joshi } 88786835cedSPrasad Joshi 888ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 889ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 890ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 891ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 892ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 893ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 894ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 895ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 896ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 897ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 898ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 899ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 900ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 901ad627d62SPekka Enberg 902ad627d62SPekka Enberg *header = (struct qcow_header) { 903ad627d62SPekka Enberg .size = f_header.size, 904ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 905ad627d62SPekka Enberg .l1_size = f_header.l1_size, 906ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 907ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 908*3ecac800SPekka Enberg .refcount_table_offset = f_header.refcount_table_offset, 909*3ecac800SPekka Enberg .refcount_table_size = f_header.refcount_table_clusters, 910ad627d62SPekka Enberg }; 911ad627d62SPekka Enberg 912ad627d62SPekka Enberg return header; 913ad627d62SPekka Enberg } 914ad627d62SPekka Enberg 915f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 916ad627d62SPekka Enberg { 917ad627d62SPekka Enberg struct disk_image *disk_image; 9187b4eb530SPekka Enberg struct qcow_l1_table *l1t; 9197b4eb530SPekka Enberg struct qcow_header *h; 9207b4eb530SPekka Enberg struct qcow *q; 921ad627d62SPekka Enberg 922ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 923ad627d62SPekka Enberg if (!q) 924ad627d62SPekka Enberg goto error; 925ad627d62SPekka Enberg 926c0799eb9SPekka Enberg mutex_init(&q->mutex); 927ad627d62SPekka Enberg q->fd = fd; 9287b4eb530SPekka Enberg 9297b4eb530SPekka Enberg l1t = &q->table; 9307b4eb530SPekka Enberg 9317b4eb530SPekka Enberg l1t->root = RB_ROOT; 9327b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 933ad627d62SPekka Enberg 934ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 935ad627d62SPekka Enberg if (!h) 936ad627d62SPekka Enberg goto error; 937ad627d62SPekka Enberg 938ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 939ad627d62SPekka Enberg goto error; 940ad627d62SPekka Enberg 941*3ecac800SPekka Enberg if (qcow_read_refcount_table(q) < 0) 942*3ecac800SPekka Enberg goto error; 943*3ecac800SPekka Enberg 9447d22135fSAsias He /* 9457d22135fSAsias He * Do not use mmap use read/write instead 9467d22135fSAsias He */ 947f10860caSPekka Enberg if (readonly) 948b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 949f10860caSPekka Enberg else 950b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 951f10860caSPekka Enberg 952ad627d62SPekka Enberg if (!disk_image) 953ad627d62SPekka Enberg goto error; 954ad627d62SPekka Enberg disk_image->priv = q; 955ad627d62SPekka Enberg 956ad627d62SPekka Enberg return disk_image; 957ad627d62SPekka Enberg error: 958ad627d62SPekka Enberg if (!q) 959ad627d62SPekka Enberg return NULL; 960ad627d62SPekka Enberg 961ad627d62SPekka Enberg free(q->table.l1_table); 962ad627d62SPekka Enberg free(q->header); 963ad627d62SPekka Enberg free(q); 964ad627d62SPekka Enberg 965ad627d62SPekka Enberg return NULL; 966ad627d62SPekka Enberg } 967ad627d62SPekka Enberg 968ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 969ad627d62SPekka Enberg { 970ad627d62SPekka Enberg struct qcow2_header_disk f_header; 971ad627d62SPekka Enberg 972ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 973ad627d62SPekka Enberg return false; 974ad627d62SPekka Enberg 975ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 976ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 977ad627d62SPekka Enberg 978ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 979ad627d62SPekka Enberg return false; 980ad627d62SPekka Enberg 981ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 982ad627d62SPekka Enberg return false; 983ad627d62SPekka Enberg 984ad627d62SPekka Enberg return true; 985ad627d62SPekka Enberg } 986ad627d62SPekka Enberg 987ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 988ad627d62SPekka Enberg { 989ad627d62SPekka Enberg struct qcow1_header_disk f_header; 990ad627d62SPekka Enberg struct qcow_header *header; 991ad627d62SPekka Enberg 992ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 993ad627d62SPekka Enberg if (!header) 994ad627d62SPekka Enberg return NULL; 995ad627d62SPekka Enberg 996d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 997d39cefd2SSasha Levin free(header); 998ad627d62SPekka Enberg return NULL; 999d39cefd2SSasha Levin } 1000ad627d62SPekka Enberg 1001ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1002ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 1003ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 1004ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 1005ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 1006ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 1007ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 1008ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 1009ad627d62SPekka Enberg 1010ad627d62SPekka Enberg *header = (struct qcow_header) { 1011ad627d62SPekka Enberg .size = f_header.size, 1012ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 1013ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 1014ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 1015ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 1016ad627d62SPekka Enberg }; 101786835cedSPrasad Joshi 101886835cedSPrasad Joshi return header; 101986835cedSPrasad Joshi } 102086835cedSPrasad Joshi 1021f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 102286835cedSPrasad Joshi { 102386835cedSPrasad Joshi struct disk_image *disk_image; 10247b4eb530SPekka Enberg struct qcow_l1_table *l1t; 10257b4eb530SPekka Enberg struct qcow_header *h; 10267b4eb530SPekka Enberg struct qcow *q; 102786835cedSPrasad Joshi 102886835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 102986835cedSPrasad Joshi if (!q) 103086835cedSPrasad Joshi goto error; 103186835cedSPrasad Joshi 1032c0799eb9SPekka Enberg mutex_init(&q->mutex); 103386835cedSPrasad Joshi q->fd = fd; 10347b4eb530SPekka Enberg 10357b4eb530SPekka Enberg l1t = &q->table; 10367b4eb530SPekka Enberg 10377b4eb530SPekka Enberg l1t->root = RB_ROOT; 10387b4eb530SPekka Enberg INIT_LIST_HEAD(&l1t->lru_list); 103986835cedSPrasad Joshi 104086835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 104186835cedSPrasad Joshi if (!h) 104286835cedSPrasad Joshi goto error; 104386835cedSPrasad Joshi 104486835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 104586835cedSPrasad Joshi goto error; 104686835cedSPrasad Joshi 10477d22135fSAsias He /* 10487d22135fSAsias He * Do not use mmap use read/write instead 10497d22135fSAsias He */ 1050f10860caSPekka Enberg if (readonly) 1051b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 1052f10860caSPekka Enberg else 1053b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 1054f10860caSPekka Enberg 105586835cedSPrasad Joshi if (!disk_image) 105686835cedSPrasad Joshi goto error; 105786835cedSPrasad Joshi disk_image->priv = q; 105886835cedSPrasad Joshi 105986835cedSPrasad Joshi return disk_image; 106086835cedSPrasad Joshi error: 106186835cedSPrasad Joshi if (!q) 106286835cedSPrasad Joshi return NULL; 106386835cedSPrasad Joshi 10646c6f79b6SPrasad Joshi free(q->table.l1_table); 106586835cedSPrasad Joshi free(q->header); 106686835cedSPrasad Joshi free(q); 106786835cedSPrasad Joshi 106886835cedSPrasad Joshi return NULL; 106986835cedSPrasad Joshi } 107086835cedSPrasad Joshi 1071ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 107286835cedSPrasad Joshi { 1073ad627d62SPekka Enberg struct qcow1_header_disk f_header; 107486835cedSPrasad Joshi 1075ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 1076ad627d62SPekka Enberg return false; 107786835cedSPrasad Joshi 1078ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 1079ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 108086835cedSPrasad Joshi 1081ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 1082ad627d62SPekka Enberg return false; 108386835cedSPrasad Joshi 1084ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 1085ad627d62SPekka Enberg return false; 108686835cedSPrasad Joshi 1087ad627d62SPekka Enberg return true; 108886835cedSPrasad Joshi } 108986835cedSPrasad Joshi 1090f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 109186835cedSPrasad Joshi { 1092ad627d62SPekka Enberg if (qcow1_check_image(fd)) 1093f10860caSPekka Enberg return qcow1_probe(fd, readonly); 1094ad627d62SPekka Enberg 1095ad627d62SPekka Enberg if (qcow2_check_image(fd)) 1096f10860caSPekka Enberg return qcow2_probe(fd, readonly); 1097ad627d62SPekka Enberg 1098ad627d62SPekka Enberg return NULL; 109986835cedSPrasad Joshi } 1100