186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 1586835cedSPrasad Joshi 1686835cedSPrasad Joshi #include <linux/byteorder.h> 17865c675fSPrasad Joshi #include <linux/kernel.h> 180df6b4d9SPekka Enberg #include <linux/types.h> 1986835cedSPrasad Joshi 20473d58ffSPekka Enberg static int insert(struct rb_root *root, struct qcow_l2_table *new) 213309045fSPrasad Joshi { 223309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 233309045fSPrasad Joshi u64 offset = new->offset; 243309045fSPrasad Joshi 253309045fSPrasad Joshi /* search the tree */ 263309045fSPrasad Joshi while (*link) { 27473d58ffSPekka Enberg struct qcow_l2_table *t; 283309045fSPrasad Joshi 29473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 303309045fSPrasad Joshi if (!t) 313309045fSPrasad Joshi goto error; 323309045fSPrasad Joshi 333309045fSPrasad Joshi parent = *link; 343309045fSPrasad Joshi 353309045fSPrasad Joshi if (t->offset > offset) 363309045fSPrasad Joshi link = &(*link)->rb_left; 373309045fSPrasad Joshi else if (t->offset < offset) 383309045fSPrasad Joshi link = &(*link)->rb_right; 393309045fSPrasad Joshi else 403309045fSPrasad Joshi goto out; 413309045fSPrasad Joshi } 423309045fSPrasad Joshi 433309045fSPrasad Joshi /* add new node */ 443309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 453309045fSPrasad Joshi rb_insert_color(&new->node, root); 463309045fSPrasad Joshi out: 473309045fSPrasad Joshi return 0; 483309045fSPrasad Joshi error: 493309045fSPrasad Joshi return -1; 503309045fSPrasad Joshi } 513309045fSPrasad Joshi 52473d58ffSPekka Enberg static struct qcow_l2_table *search(struct rb_root *root, u64 offset) 533309045fSPrasad Joshi { 543309045fSPrasad Joshi struct rb_node *link = root->rb_node; 553309045fSPrasad Joshi 563309045fSPrasad Joshi while (link) { 57473d58ffSPekka Enberg struct qcow_l2_table *t; 583309045fSPrasad Joshi 59473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 603309045fSPrasad Joshi if (!t) 613309045fSPrasad Joshi goto out; 623309045fSPrasad Joshi 633309045fSPrasad Joshi if (t->offset > offset) 643309045fSPrasad Joshi link = link->rb_left; 653309045fSPrasad Joshi else if (t->offset < offset) 663309045fSPrasad Joshi link = link->rb_right; 673309045fSPrasad Joshi else 683309045fSPrasad Joshi return t; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi out: 713309045fSPrasad Joshi return NULL; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi 743309045fSPrasad Joshi static void free_cache(struct qcow *q) 753309045fSPrasad Joshi { 763309045fSPrasad Joshi struct list_head *pos, *n; 77473d58ffSPekka Enberg struct qcow_l2_table *t; 783309045fSPrasad Joshi struct rb_root *r = &q->root; 793309045fSPrasad Joshi 803309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 813309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 823309045fSPrasad Joshi list_del(pos); 83473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 843309045fSPrasad Joshi rb_erase(&t->node, r); 853309045fSPrasad Joshi 863309045fSPrasad Joshi /* Free the cached node */ 873309045fSPrasad Joshi free(t); 883309045fSPrasad Joshi } 893309045fSPrasad Joshi } 903309045fSPrasad Joshi 91a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 92a4e46515SPekka Enberg { 93a4e46515SPekka Enberg struct qcow_header *header = q->header; 94a4e46515SPekka Enberg u64 size; 95a4e46515SPekka Enberg 96aff88976SPekka Enberg if (!c->dirty) 97aff88976SPekka Enberg return 0; 98aff88976SPekka Enberg 99a4e46515SPekka Enberg size = 1 << header->l2_bits; 100a4e46515SPekka Enberg 101aff88976SPekka Enberg if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0) 102aff88976SPekka Enberg return -1; 103aff88976SPekka Enberg 104aff88976SPekka Enberg c->dirty = 0; 105aff88976SPekka Enberg 106aff88976SPekka Enberg return 0; 107a4e46515SPekka Enberg } 108a4e46515SPekka Enberg 109473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1103309045fSPrasad Joshi { 1113309045fSPrasad Joshi struct rb_root *r = &q->root; 112473d58ffSPekka Enberg struct qcow_l2_table *lru; 1133309045fSPrasad Joshi 1143309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 1153309045fSPrasad Joshi /* 1163309045fSPrasad Joshi * The node at the head of the list is least recently used 1173309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1183309045fSPrasad Joshi */ 119473d58ffSPekka Enberg lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); 1203309045fSPrasad Joshi 121a4e46515SPekka Enberg if (qcow_l2_cache_write(q, lru) < 0) 122a4e46515SPekka Enberg goto error; 123a4e46515SPekka Enberg 1243309045fSPrasad Joshi /* Remove the node from the cache */ 1253309045fSPrasad Joshi rb_erase(&lru->node, r); 1263309045fSPrasad Joshi list_del_init(&lru->list); 1273309045fSPrasad Joshi q->nr_cached--; 1283309045fSPrasad Joshi 1293309045fSPrasad Joshi /* Free the LRUed node */ 1303309045fSPrasad Joshi free(lru); 1313309045fSPrasad Joshi } 1323309045fSPrasad Joshi 1333309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 1343309045fSPrasad Joshi if (insert(r, c) < 0) 1353309045fSPrasad Joshi goto error; 1363309045fSPrasad Joshi 1373309045fSPrasad Joshi /* Add in LRU replacement list */ 1383309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 1393309045fSPrasad Joshi q->nr_cached++; 1403309045fSPrasad Joshi 1413309045fSPrasad Joshi return 0; 1423309045fSPrasad Joshi error: 1433309045fSPrasad Joshi return -1; 1443309045fSPrasad Joshi } 1453309045fSPrasad Joshi 146fe8bdde0SPekka Enberg static struct qcow_l2_table *search_table(struct qcow *q, u64 offset) 1473309045fSPrasad Joshi { 148fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1493309045fSPrasad Joshi 150fe8bdde0SPekka Enberg l2t = search(&q->root, offset); 151fe8bdde0SPekka Enberg if (!l2t) 152fe8bdde0SPekka Enberg return NULL; 1533309045fSPrasad Joshi 1543309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 155fe8bdde0SPekka Enberg list_move_tail(&l2t->list, &q->lru_list); 1563309045fSPrasad Joshi 157fe8bdde0SPekka Enberg return l2t; 1583309045fSPrasad Joshi } 1593309045fSPrasad Joshi 1603309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 161473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1623309045fSPrasad Joshi { 1633309045fSPrasad Joshi struct qcow_header *header = q->header; 164473d58ffSPekka Enberg struct qcow_l2_table *c; 1653309045fSPrasad Joshi u64 l2t_sz; 1663309045fSPrasad Joshi u64 size; 1673309045fSPrasad Joshi 1683309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1693309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1703309045fSPrasad Joshi c = calloc(1, size); 1713309045fSPrasad Joshi if (!c) 1723309045fSPrasad Joshi goto out; 1733309045fSPrasad Joshi 1743309045fSPrasad Joshi c->offset = offset; 1753309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1763309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1773309045fSPrasad Joshi out: 1783309045fSPrasad Joshi return c; 1793309045fSPrasad Joshi } 1803309045fSPrasad Joshi 181742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 18286835cedSPrasad Joshi { 183ad627d62SPekka Enberg struct qcow_header *header = q->header; 18486835cedSPrasad Joshi 18586835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 18686835cedSPrasad Joshi } 18786835cedSPrasad Joshi 188742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 18986835cedSPrasad Joshi { 190ad627d62SPekka Enberg struct qcow_header *header = q->header; 19186835cedSPrasad Joshi 19286835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 19386835cedSPrasad Joshi } 19486835cedSPrasad Joshi 195742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 19686835cedSPrasad Joshi { 197ad627d62SPekka Enberg struct qcow_header *header = q->header; 19886835cedSPrasad Joshi 19986835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 20086835cedSPrasad Joshi } 20186835cedSPrasad Joshi 202fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 2033309045fSPrasad Joshi { 2043309045fSPrasad Joshi struct qcow_header *header = q->header; 205fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 2063309045fSPrasad Joshi u64 size; 2073309045fSPrasad Joshi 2083309045fSPrasad Joshi size = 1 << header->l2_bits; 2093309045fSPrasad Joshi 2103309045fSPrasad Joshi /* search an entry for offset in cache */ 211fe8bdde0SPekka Enberg l2t = search_table(q, offset); 212fe8bdde0SPekka Enberg if (l2t) 213fe8bdde0SPekka Enberg return l2t; 2143309045fSPrasad Joshi 2153309045fSPrasad Joshi /* allocate new node for caching l2 table */ 216fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 217fe8bdde0SPekka Enberg if (!l2t) 2183309045fSPrasad Joshi goto error; 2193309045fSPrasad Joshi 2203309045fSPrasad Joshi /* table not cached: read from the disk */ 221fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2223309045fSPrasad Joshi goto error; 2233309045fSPrasad Joshi 2243309045fSPrasad Joshi /* cache the table */ 225fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2263309045fSPrasad Joshi goto error; 2273309045fSPrasad Joshi 228fe8bdde0SPekka Enberg return l2t; 2293309045fSPrasad Joshi error: 230fe8bdde0SPekka Enberg free(l2t); 231fe8bdde0SPekka Enberg return NULL; 2323309045fSPrasad Joshi } 2333309045fSPrasad Joshi 234b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 23586835cedSPrasad Joshi { 236ad627d62SPekka Enberg struct qcow_header *header = q->header; 2373dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 238fe8bdde0SPekka Enberg struct qcow_l2_table *l2_table; 239742fce76SPrasad Joshi u64 l2_table_offset; 240742fce76SPrasad Joshi u64 l2_table_size; 2413dac48d4SPrasad Joshi u64 cluster_size; 242742fce76SPrasad Joshi u64 clust_offset; 243742fce76SPrasad Joshi u64 clust_start; 244a51948ceSPekka Enberg size_t length; 245742fce76SPrasad Joshi u64 l1_idx; 246742fce76SPrasad Joshi u64 l2_idx; 24786835cedSPrasad Joshi 248dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 24986835cedSPrasad Joshi 250c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2513dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 252c0799eb9SPekka Enberg return -1; 25386835cedSPrasad Joshi 2543dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2553dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 256c0799eb9SPekka Enberg return -1; 2573dac48d4SPrasad Joshi 2583dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2593dac48d4SPrasad Joshi if (length > dst_len) 2603dac48d4SPrasad Joshi length = dst_len; 2613dac48d4SPrasad Joshi 262c0799eb9SPekka Enberg mutex_lock(&q->mutex); 263*b2ebe61bSPekka Enberg 264*b2ebe61bSPekka Enberg l2_table_offset = be64_to_cpu(table->l1_table[l1_idx]); 265*b2ebe61bSPekka Enberg if (l2_table_offset & QCOW_OFLAG_COMPRESSED) { 266*b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 267*b2ebe61bSPekka Enberg goto out_error; 268*b2ebe61bSPekka Enberg } 269*b2ebe61bSPekka Enberg 270*b2ebe61bSPekka Enberg l2_table_offset &= QCOW_OFFSET_MASK; 27186835cedSPrasad Joshi if (!l2_table_offset) 2723dac48d4SPrasad Joshi goto zero_cluster; 27386835cedSPrasad Joshi 27486835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 27586835cedSPrasad Joshi 2763309045fSPrasad Joshi /* read and cache level 2 table */ 277fe8bdde0SPekka Enberg l2_table = qcow_read_l2_table(q, l2_table_offset); 278fe8bdde0SPekka Enberg if (!l2_table) 279b6edb0ecSSasha Levin goto out_error; 28086835cedSPrasad Joshi 281c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 28286835cedSPrasad Joshi if (l2_idx >= l2_table_size) 283b6edb0ecSSasha Levin goto out_error; 28486835cedSPrasad Joshi 285*b2ebe61bSPekka Enberg clust_start = be64_to_cpu(l2_table->table[l2_idx]); 286*b2ebe61bSPekka Enberg if (clust_start & QCOW_OFLAG_COMPRESSED) { 287*b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 288*b2ebe61bSPekka Enberg goto out_error; 289*b2ebe61bSPekka Enberg } 290*b2ebe61bSPekka Enberg 291*b2ebe61bSPekka Enberg clust_start &= QCOW_OFFSET_MASK; 29286835cedSPrasad Joshi if (!clust_start) 2933dac48d4SPrasad Joshi goto zero_cluster; 29486835cedSPrasad Joshi 295c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 29686835cedSPrasad Joshi 297c0799eb9SPekka Enberg if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 298c0799eb9SPekka Enberg return -1; 299c0799eb9SPekka Enberg 3003dac48d4SPrasad Joshi return length; 30186835cedSPrasad Joshi 302179b71f0SPekka Enberg zero_cluster: 303c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 304179b71f0SPekka Enberg memset(dst, 0, length); 305c0799eb9SPekka Enberg return length; 306179b71f0SPekka Enberg 30786835cedSPrasad Joshi out_error: 308c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 309179b71f0SPekka Enberg length = -1; 310c0799eb9SPekka Enberg return -1; 3113dac48d4SPrasad Joshi } 312b6edb0ecSSasha Levin 313b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 3143dac48d4SPrasad Joshi { 31543835ac9SSasha Levin struct qcow *q = disk->priv; 316ad627d62SPekka Enberg struct qcow_header *header = q->header; 317d8eea993SPekka Enberg u32 nr_read; 3180df6b4d9SPekka Enberg u64 offset; 3190df6b4d9SPekka Enberg char *buf; 3203dac48d4SPrasad Joshi u32 nr; 3213dac48d4SPrasad Joshi 3220df6b4d9SPekka Enberg buf = dst; 323d8eea993SPekka Enberg nr_read = 0; 3240df6b4d9SPekka Enberg 325d8eea993SPekka Enberg while (nr_read < dst_len) { 3263dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 3273dac48d4SPrasad Joshi if (offset >= header->size) 3280df6b4d9SPekka Enberg return -1; 3293dac48d4SPrasad Joshi 330b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 331a51948ceSPekka Enberg if (nr <= 0) 3320df6b4d9SPekka Enberg return -1; 3333dac48d4SPrasad Joshi 334d8eea993SPekka Enberg nr_read += nr; 3353dac48d4SPrasad Joshi buf += nr; 3363dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3373dac48d4SPrasad Joshi } 3380df6b4d9SPekka Enberg 33972133dd2SAsias He return dst_len; 34086835cedSPrasad Joshi } 34186835cedSPrasad Joshi 342865c675fSPrasad Joshi static inline u64 file_size(int fd) 343865c675fSPrasad Joshi { 344865c675fSPrasad Joshi struct stat st; 3450df6b4d9SPekka Enberg 346865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 347865c675fSPrasad Joshi return 0; 3480df6b4d9SPekka Enberg 349865c675fSPrasad Joshi return st.st_size; 350865c675fSPrasad Joshi } 351865c675fSPrasad Joshi 3520df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 353865c675fSPrasad Joshi { 354865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 355865c675fSPrasad Joshi return -1; 3560df6b4d9SPekka Enberg 3577d94a719SPekka Enberg return fdatasync(fd); 358865c675fSPrasad Joshi } 359865c675fSPrasad Joshi 360865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 361b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 362865c675fSPrasad Joshi { 363865c675fSPrasad Joshi struct qcow_header *header = q->header; 364865c675fSPrasad Joshi u64 clust_sz; 365865c675fSPrasad Joshi u64 f_sz; 3660df6b4d9SPekka Enberg u64 off; 3670df6b4d9SPekka Enberg u64 sz; 368865c675fSPrasad Joshi 369865c675fSPrasad Joshi f_sz = file_size(q->fd); 370865c675fSPrasad Joshi if (!f_sz) 371865c675fSPrasad Joshi return 0; 372865c675fSPrasad Joshi 373865c675fSPrasad Joshi sz = 1 << header->l2_bits; 374865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 375865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 376865c675fSPrasad Joshi 3776fe151aeSPekka Enberg if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) 378865c675fSPrasad Joshi return 0; 3790df6b4d9SPekka Enberg 380865c675fSPrasad Joshi return off; 381865c675fSPrasad Joshi } 382865c675fSPrasad Joshi 383865c675fSPrasad Joshi /* 384865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 385865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3860df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 387865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 388865c675fSPrasad Joshi * 389865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 390865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 391865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 392865c675fSPrasad Joshi */ 393b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 394865c675fSPrasad Joshi { 395865c675fSPrasad Joshi struct qcow_header *header = q->header; 396865c675fSPrasad Joshi struct qcow_table *table = &q->table; 397fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 3980df6b4d9SPekka Enberg u64 clust_start; 3990df6b4d9SPekka Enberg u64 clust_off; 400865c675fSPrasad Joshi u64 clust_sz; 401865c675fSPrasad Joshi u64 l1t_idx; 402865c675fSPrasad Joshi u64 l2t_idx; 4030df6b4d9SPekka Enberg u64 l2t_off; 4040df6b4d9SPekka Enberg u64 l2t_sz; 405865c675fSPrasad Joshi u64 f_sz; 4060df6b4d9SPekka Enberg u64 len; 407865c675fSPrasad Joshi 408fe8bdde0SPekka Enberg l2t = NULL; 409865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 410865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 411865c675fSPrasad Joshi 412865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 413865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 414c0799eb9SPekka Enberg return -1; 415865c675fSPrasad Joshi 416865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 417865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 418c0799eb9SPekka Enberg return -1; 419865c675fSPrasad Joshi 420865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 421865c675fSPrasad Joshi if (clust_off >= clust_sz) 422c0799eb9SPekka Enberg return -1; 423865c675fSPrasad Joshi 424865c675fSPrasad Joshi len = clust_sz - clust_off; 425865c675fSPrasad Joshi if (len > src_len) 426865c675fSPrasad Joshi len = src_len; 427865c675fSPrasad Joshi 428c0799eb9SPekka Enberg mutex_lock(&q->mutex); 429c0799eb9SPekka Enberg 430*b2ebe61bSPekka Enberg l2t_off = be64_to_cpu(table->l1_table[l1t_idx]); 431*b2ebe61bSPekka Enberg if (l2t_off & QCOW_OFLAG_COMPRESSED) { 432*b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 433*b2ebe61bSPekka Enberg goto error; 434*b2ebe61bSPekka Enberg } 435*b2ebe61bSPekka Enberg 436*b2ebe61bSPekka Enberg l2t_off &= QCOW_OFFSET_MASK; 437865c675fSPrasad Joshi if (l2t_off) { 4383309045fSPrasad Joshi /* read and cache l2 table */ 439fe8bdde0SPekka Enberg l2t = qcow_read_l2_table(q, l2t_off); 440fe8bdde0SPekka Enberg if (!l2t) 4413309045fSPrasad Joshi goto error; 442865c675fSPrasad Joshi } else { 443fe8bdde0SPekka Enberg l2t = new_cache_table(q, l2t_off); 444fe8bdde0SPekka Enberg if (!l2t) 4453309045fSPrasad Joshi goto error; 4463309045fSPrasad Joshi 4470df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 448865c675fSPrasad Joshi f_sz = file_size(q->fd); 449865c675fSPrasad Joshi if (!f_sz) 4503309045fSPrasad Joshi goto free_cache; 451865c675fSPrasad Joshi 452865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 453fe8bdde0SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t->table); 454865c675fSPrasad Joshi if (!l2t_off) 4553309045fSPrasad Joshi goto free_cache; 456865c675fSPrasad Joshi 457fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 4583309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4593309045fSPrasad Joshi goto free_cache; 4603309045fSPrasad Joshi 4613309045fSPrasad Joshi goto free_cache; 462865c675fSPrasad Joshi } 463865c675fSPrasad Joshi 4640df6b4d9SPekka Enberg /* Update the in-core entry */ 465659f4186SPekka Enberg table->l1_table[l1t_idx] = cpu_to_be64(l2t_off); 466865c675fSPrasad Joshi } 467865c675fSPrasad Joshi 4680df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 469865c675fSPrasad Joshi f_sz = file_size(q->fd); 470865c675fSPrasad Joshi if (!f_sz) 4713309045fSPrasad Joshi goto error; 472865c675fSPrasad Joshi 473*b2ebe61bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]); 474*b2ebe61bSPekka Enberg if (clust_start & QCOW_OFLAG_COMPRESSED) { 475*b2ebe61bSPekka Enberg pr_warning("compressed sectors are not supported"); 476*b2ebe61bSPekka Enberg goto error; 477*b2ebe61bSPekka Enberg } 478*b2ebe61bSPekka Enberg 479*b2ebe61bSPekka Enberg clust_start &= QCOW_OFFSET_MASK; 480865c675fSPrasad Joshi if (!clust_start) { 481865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 4824bd7e48bSPekka Enberg l2t->table[l2t_idx] = cpu_to_be64(clust_start); 483aff88976SPekka Enberg l2t->dirty = 1; 484865c675fSPrasad Joshi } 4850df6b4d9SPekka Enberg 486c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 487c0799eb9SPekka Enberg 488a4e46515SPekka Enberg /* Write actual data */ 489a4e46515SPekka Enberg if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 490a4e46515SPekka Enberg return -1; 491a4e46515SPekka Enberg 492865c675fSPrasad Joshi return len; 4933309045fSPrasad Joshi 4943309045fSPrasad Joshi free_cache: 495fe8bdde0SPekka Enberg free(l2t); 496865c675fSPrasad Joshi error: 497c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 498865c675fSPrasad Joshi return -1; 499865c675fSPrasad Joshi } 500865c675fSPrasad Joshi 501b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 50286835cedSPrasad Joshi { 503865c675fSPrasad Joshi struct qcow *q = disk->priv; 504865c675fSPrasad Joshi struct qcow_header *header = q->header; 505c4acb611SIngo Molnar u32 nr_written; 5060df6b4d9SPekka Enberg char *buf; 507865c675fSPrasad Joshi u64 offset; 508865c675fSPrasad Joshi ssize_t nr; 509865c675fSPrasad Joshi 5100df6b4d9SPekka Enberg buf = src; 5110df6b4d9SPekka Enberg nr_written = 0; 512865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 5130df6b4d9SPekka Enberg 5140df6b4d9SPekka Enberg while (nr_written < src_len) { 515865c675fSPrasad Joshi if (offset >= header->size) 5160df6b4d9SPekka Enberg return -1; 517865c675fSPrasad Joshi 518b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 519865c675fSPrasad Joshi if (nr < 0) 5200df6b4d9SPekka Enberg return -1; 521865c675fSPrasad Joshi 5220df6b4d9SPekka Enberg nr_written += nr; 523865c675fSPrasad Joshi buf += nr; 524865c675fSPrasad Joshi offset += nr; 525865c675fSPrasad Joshi } 5260df6b4d9SPekka Enberg 52772133dd2SAsias He return nr_written; 52886835cedSPrasad Joshi } 52986835cedSPrasad Joshi 530b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 531f10860caSPekka Enberg { 532f10860caSPekka Enberg /* I/O error */ 533b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 534f10860caSPekka Enberg return -1; 535f10860caSPekka Enberg } 536f10860caSPekka Enberg 537659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 538659f4186SPekka Enberg { 53973984b11SPekka Enberg struct qcow *q = disk->priv; 54073984b11SPekka Enberg struct qcow_header *header; 541a4e46515SPekka Enberg struct list_head *pos, *n; 54273984b11SPekka Enberg struct qcow_table *table; 54373984b11SPekka Enberg 54473984b11SPekka Enberg header = q->header; 54573984b11SPekka Enberg table = &q->table; 54673984b11SPekka Enberg 547a4e46515SPekka Enberg mutex_lock(&q->mutex); 548a4e46515SPekka Enberg 549a4e46515SPekka Enberg list_for_each_safe(pos, n, &q->lru_list) { 550a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 551a4e46515SPekka Enberg 552a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 553a4e46515SPekka Enberg goto error_unlock; 554a4e46515SPekka Enberg } 555a4e46515SPekka Enberg 556a4e46515SPekka Enberg if (fdatasync(disk->fd) < 0) 557a4e46515SPekka Enberg goto error_unlock; 558a4e46515SPekka Enberg 55973984b11SPekka Enberg if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0) 560a4e46515SPekka Enberg goto error_unlock; 561a4e46515SPekka Enberg 562a4e46515SPekka Enberg mutex_unlock(&q->mutex); 56373984b11SPekka Enberg 564659f4186SPekka Enberg return fsync(disk->fd); 565a4e46515SPekka Enberg 566a4e46515SPekka Enberg error_unlock: 567a4e46515SPekka Enberg mutex_unlock(&q->mutex); 568a4e46515SPekka Enberg return -1; 569659f4186SPekka Enberg } 570659f4186SPekka Enberg 571b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 57286835cedSPrasad Joshi { 57386835cedSPrasad Joshi struct qcow *q; 57486835cedSPrasad Joshi 57543835ac9SSasha Levin if (!disk) 57672133dd2SAsias He return 0; 57786835cedSPrasad Joshi 57843835ac9SSasha Levin q = disk->priv; 57986835cedSPrasad Joshi 5803309045fSPrasad Joshi free_cache(q); 5816c6f79b6SPrasad Joshi free(q->table.l1_table); 58286835cedSPrasad Joshi free(q->header); 58386835cedSPrasad Joshi free(q); 58472133dd2SAsias He 58572133dd2SAsias He return 0; 58686835cedSPrasad Joshi } 58786835cedSPrasad Joshi 588b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 589b1c84095SPekka Enberg .read_sector = qcow_read_sector, 590b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 591b1c84095SPekka Enberg .close = qcow_disk_close, 592f10860caSPekka Enberg }; 593f10860caSPekka Enberg 594b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 595b1c84095SPekka Enberg .read_sector = qcow_read_sector, 596b1c84095SPekka Enberg .write_sector = qcow_write_sector, 597659f4186SPekka Enberg .flush = qcow_disk_flush, 598b1c84095SPekka Enberg .close = qcow_disk_close, 59986835cedSPrasad Joshi }; 60086835cedSPrasad Joshi 60186835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 60286835cedSPrasad Joshi { 603ad627d62SPekka Enberg struct qcow_header *header = q->header; 60400adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 60586835cedSPrasad Joshi 606ad627d62SPekka Enberg table->table_size = header->l1_size; 60786835cedSPrasad Joshi 60800adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 60900adcc1bSPrasad Joshi if (!table->l1_table) 61086835cedSPrasad Joshi return -1; 61186835cedSPrasad Joshi 612659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 61386835cedSPrasad Joshi } 61486835cedSPrasad Joshi 615ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 61686835cedSPrasad Joshi { 617ad627d62SPekka Enberg struct qcow2_header_disk f_header; 618ad627d62SPekka Enberg struct qcow_header *header; 61986835cedSPrasad Joshi 620ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 62186835cedSPrasad Joshi if (!header) 62286835cedSPrasad Joshi return NULL; 62386835cedSPrasad Joshi 6240657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 6250657f33dSPrasad Joshi free(header); 62686835cedSPrasad Joshi return NULL; 6270657f33dSPrasad Joshi } 62886835cedSPrasad Joshi 629ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 630ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 631ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 632ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 633ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 634ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 635ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 636ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 637ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 638ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 639ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 640ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 641ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 642ad627d62SPekka Enberg 643ad627d62SPekka Enberg *header = (struct qcow_header) { 644ad627d62SPekka Enberg .size = f_header.size, 645ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 646ad627d62SPekka Enberg .l1_size = f_header.l1_size, 647ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 648ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 649ad627d62SPekka Enberg }; 650ad627d62SPekka Enberg 651ad627d62SPekka Enberg return header; 652ad627d62SPekka Enberg } 653ad627d62SPekka Enberg 654f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 655ad627d62SPekka Enberg { 656ad627d62SPekka Enberg struct qcow *q; 657ad627d62SPekka Enberg struct qcow_header *h; 658ad627d62SPekka Enberg struct disk_image *disk_image; 659ad627d62SPekka Enberg 660ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 661ad627d62SPekka Enberg if (!q) 662ad627d62SPekka Enberg goto error; 663ad627d62SPekka Enberg 664c0799eb9SPekka Enberg mutex_init(&q->mutex); 665ad627d62SPekka Enberg q->fd = fd; 6663309045fSPrasad Joshi q->root = RB_ROOT; 6673309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 668ad627d62SPekka Enberg 669ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 670ad627d62SPekka Enberg if (!h) 671ad627d62SPekka Enberg goto error; 672ad627d62SPekka Enberg 673ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 674ad627d62SPekka Enberg goto error; 675ad627d62SPekka Enberg 6767d22135fSAsias He /* 6777d22135fSAsias He * Do not use mmap use read/write instead 6787d22135fSAsias He */ 679f10860caSPekka Enberg if (readonly) 680b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 681f10860caSPekka Enberg else 682b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 683f10860caSPekka Enberg 684ad627d62SPekka Enberg if (!disk_image) 685ad627d62SPekka Enberg goto error; 686ad627d62SPekka Enberg disk_image->priv = q; 687ad627d62SPekka Enberg 688ad627d62SPekka Enberg return disk_image; 689ad627d62SPekka Enberg error: 690ad627d62SPekka Enberg if (!q) 691ad627d62SPekka Enberg return NULL; 692ad627d62SPekka Enberg 693ad627d62SPekka Enberg free(q->table.l1_table); 694ad627d62SPekka Enberg free(q->header); 695ad627d62SPekka Enberg free(q); 696ad627d62SPekka Enberg 697ad627d62SPekka Enberg return NULL; 698ad627d62SPekka Enberg } 699ad627d62SPekka Enberg 700ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 701ad627d62SPekka Enberg { 702ad627d62SPekka Enberg struct qcow2_header_disk f_header; 703ad627d62SPekka Enberg 704ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 705ad627d62SPekka Enberg return false; 706ad627d62SPekka Enberg 707ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 708ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 709ad627d62SPekka Enberg 710ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 711ad627d62SPekka Enberg return false; 712ad627d62SPekka Enberg 713ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 714ad627d62SPekka Enberg return false; 715ad627d62SPekka Enberg 716ad627d62SPekka Enberg return true; 717ad627d62SPekka Enberg } 718ad627d62SPekka Enberg 719ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 720ad627d62SPekka Enberg { 721ad627d62SPekka Enberg struct qcow1_header_disk f_header; 722ad627d62SPekka Enberg struct qcow_header *header; 723ad627d62SPekka Enberg 724ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 725ad627d62SPekka Enberg if (!header) 726ad627d62SPekka Enberg return NULL; 727ad627d62SPekka Enberg 728d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 729d39cefd2SSasha Levin free(header); 730ad627d62SPekka Enberg return NULL; 731d39cefd2SSasha Levin } 732ad627d62SPekka Enberg 733ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 734ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 735ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 736ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 737ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 738ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 739ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 740ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 741ad627d62SPekka Enberg 742ad627d62SPekka Enberg *header = (struct qcow_header) { 743ad627d62SPekka Enberg .size = f_header.size, 744ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 745ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 746ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 747ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 748ad627d62SPekka Enberg }; 74986835cedSPrasad Joshi 75086835cedSPrasad Joshi return header; 75186835cedSPrasad Joshi } 75286835cedSPrasad Joshi 753f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 75486835cedSPrasad Joshi { 75586835cedSPrasad Joshi struct qcow *q; 756ad627d62SPekka Enberg struct qcow_header *h; 75786835cedSPrasad Joshi struct disk_image *disk_image; 75886835cedSPrasad Joshi 75986835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 76086835cedSPrasad Joshi if (!q) 76186835cedSPrasad Joshi goto error; 76286835cedSPrasad Joshi 763c0799eb9SPekka Enberg mutex_init(&q->mutex); 76486835cedSPrasad Joshi q->fd = fd; 7653309045fSPrasad Joshi q->root = RB_ROOT; 7663309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 76786835cedSPrasad Joshi 76886835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 76986835cedSPrasad Joshi if (!h) 77086835cedSPrasad Joshi goto error; 77186835cedSPrasad Joshi 77286835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 77386835cedSPrasad Joshi goto error; 77486835cedSPrasad Joshi 7757d22135fSAsias He /* 7767d22135fSAsias He * Do not use mmap use read/write instead 7777d22135fSAsias He */ 778f10860caSPekka Enberg if (readonly) 779b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 780f10860caSPekka Enberg else 781b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 782f10860caSPekka Enberg 78386835cedSPrasad Joshi if (!disk_image) 78486835cedSPrasad Joshi goto error; 78586835cedSPrasad Joshi disk_image->priv = q; 78686835cedSPrasad Joshi 78786835cedSPrasad Joshi return disk_image; 78886835cedSPrasad Joshi error: 78986835cedSPrasad Joshi if (!q) 79086835cedSPrasad Joshi return NULL; 79186835cedSPrasad Joshi 7926c6f79b6SPrasad Joshi free(q->table.l1_table); 79386835cedSPrasad Joshi free(q->header); 79486835cedSPrasad Joshi free(q); 79586835cedSPrasad Joshi 79686835cedSPrasad Joshi return NULL; 79786835cedSPrasad Joshi } 79886835cedSPrasad Joshi 799ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 80086835cedSPrasad Joshi { 801ad627d62SPekka Enberg struct qcow1_header_disk f_header; 80286835cedSPrasad Joshi 803ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 804ad627d62SPekka Enberg return false; 80586835cedSPrasad Joshi 806ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 807ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 80886835cedSPrasad Joshi 809ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 810ad627d62SPekka Enberg return false; 81186835cedSPrasad Joshi 812ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 813ad627d62SPekka Enberg return false; 81486835cedSPrasad Joshi 815ad627d62SPekka Enberg return true; 81686835cedSPrasad Joshi } 81786835cedSPrasad Joshi 818f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 81986835cedSPrasad Joshi { 820ad627d62SPekka Enberg if (qcow1_check_image(fd)) 821f10860caSPekka Enberg return qcow1_probe(fd, readonly); 822ad627d62SPekka Enberg 823ad627d62SPekka Enberg if (qcow2_check_image(fd)) 824f10860caSPekka Enberg return qcow2_probe(fd, readonly); 825ad627d62SPekka Enberg 826ad627d62SPekka Enberg return NULL; 82786835cedSPrasad Joshi } 828