186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 1586835cedSPrasad Joshi 1686835cedSPrasad Joshi #include <linux/byteorder.h> 17865c675fSPrasad Joshi #include <linux/kernel.h> 180df6b4d9SPekka Enberg #include <linux/types.h> 1986835cedSPrasad Joshi 20473d58ffSPekka Enberg static int insert(struct rb_root *root, struct qcow_l2_table *new) 213309045fSPrasad Joshi { 223309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 233309045fSPrasad Joshi u64 offset = new->offset; 243309045fSPrasad Joshi 253309045fSPrasad Joshi /* search the tree */ 263309045fSPrasad Joshi while (*link) { 27473d58ffSPekka Enberg struct qcow_l2_table *t; 283309045fSPrasad Joshi 29473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 303309045fSPrasad Joshi if (!t) 313309045fSPrasad Joshi goto error; 323309045fSPrasad Joshi 333309045fSPrasad Joshi parent = *link; 343309045fSPrasad Joshi 353309045fSPrasad Joshi if (t->offset > offset) 363309045fSPrasad Joshi link = &(*link)->rb_left; 373309045fSPrasad Joshi else if (t->offset < offset) 383309045fSPrasad Joshi link = &(*link)->rb_right; 393309045fSPrasad Joshi else 403309045fSPrasad Joshi goto out; 413309045fSPrasad Joshi } 423309045fSPrasad Joshi 433309045fSPrasad Joshi /* add new node */ 443309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 453309045fSPrasad Joshi rb_insert_color(&new->node, root); 463309045fSPrasad Joshi out: 473309045fSPrasad Joshi return 0; 483309045fSPrasad Joshi error: 493309045fSPrasad Joshi return -1; 503309045fSPrasad Joshi } 513309045fSPrasad Joshi 52473d58ffSPekka Enberg static struct qcow_l2_table *search(struct rb_root *root, u64 offset) 533309045fSPrasad Joshi { 543309045fSPrasad Joshi struct rb_node *link = root->rb_node; 553309045fSPrasad Joshi 563309045fSPrasad Joshi while (link) { 57473d58ffSPekka Enberg struct qcow_l2_table *t; 583309045fSPrasad Joshi 59473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 603309045fSPrasad Joshi if (!t) 613309045fSPrasad Joshi goto out; 623309045fSPrasad Joshi 633309045fSPrasad Joshi if (t->offset > offset) 643309045fSPrasad Joshi link = link->rb_left; 653309045fSPrasad Joshi else if (t->offset < offset) 663309045fSPrasad Joshi link = link->rb_right; 673309045fSPrasad Joshi else 683309045fSPrasad Joshi return t; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi out: 713309045fSPrasad Joshi return NULL; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi 743309045fSPrasad Joshi static void free_cache(struct qcow *q) 753309045fSPrasad Joshi { 763309045fSPrasad Joshi struct list_head *pos, *n; 77473d58ffSPekka Enberg struct qcow_l2_table *t; 783309045fSPrasad Joshi struct rb_root *r = &q->root; 793309045fSPrasad Joshi 803309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 813309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 823309045fSPrasad Joshi list_del(pos); 83473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 843309045fSPrasad Joshi rb_erase(&t->node, r); 853309045fSPrasad Joshi 863309045fSPrasad Joshi /* Free the cached node */ 873309045fSPrasad Joshi free(t); 883309045fSPrasad Joshi } 893309045fSPrasad Joshi } 903309045fSPrasad Joshi 91a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 92a4e46515SPekka Enberg { 93a4e46515SPekka Enberg struct qcow_header *header = q->header; 94a4e46515SPekka Enberg u64 size; 95a4e46515SPekka Enberg 96*aff88976SPekka Enberg if (!c->dirty) 97*aff88976SPekka Enberg return 0; 98*aff88976SPekka Enberg 99a4e46515SPekka Enberg size = 1 << header->l2_bits; 100a4e46515SPekka Enberg 101*aff88976SPekka Enberg if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0) 102*aff88976SPekka Enberg return -1; 103*aff88976SPekka Enberg 104*aff88976SPekka Enberg c->dirty = 0; 105*aff88976SPekka Enberg 106*aff88976SPekka Enberg return 0; 107a4e46515SPekka Enberg } 108a4e46515SPekka Enberg 109473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1103309045fSPrasad Joshi { 1113309045fSPrasad Joshi struct rb_root *r = &q->root; 112473d58ffSPekka Enberg struct qcow_l2_table *lru; 1133309045fSPrasad Joshi 1143309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 1153309045fSPrasad Joshi /* 1163309045fSPrasad Joshi * The node at the head of the list is least recently used 1173309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1183309045fSPrasad Joshi */ 119473d58ffSPekka Enberg lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); 1203309045fSPrasad Joshi 121a4e46515SPekka Enberg if (qcow_l2_cache_write(q, lru) < 0) 122a4e46515SPekka Enberg goto error; 123a4e46515SPekka Enberg 1243309045fSPrasad Joshi /* Remove the node from the cache */ 1253309045fSPrasad Joshi rb_erase(&lru->node, r); 1263309045fSPrasad Joshi list_del_init(&lru->list); 1273309045fSPrasad Joshi q->nr_cached--; 1283309045fSPrasad Joshi 1293309045fSPrasad Joshi /* Free the LRUed node */ 1303309045fSPrasad Joshi free(lru); 1313309045fSPrasad Joshi } 1323309045fSPrasad Joshi 1333309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 1343309045fSPrasad Joshi if (insert(r, c) < 0) 1353309045fSPrasad Joshi goto error; 1363309045fSPrasad Joshi 1373309045fSPrasad Joshi /* Add in LRU replacement list */ 1383309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 1393309045fSPrasad Joshi q->nr_cached++; 1403309045fSPrasad Joshi 1413309045fSPrasad Joshi return 0; 1423309045fSPrasad Joshi error: 1433309045fSPrasad Joshi return -1; 1443309045fSPrasad Joshi } 1453309045fSPrasad Joshi 146fe8bdde0SPekka Enberg static struct qcow_l2_table *search_table(struct qcow *q, u64 offset) 1473309045fSPrasad Joshi { 148fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1493309045fSPrasad Joshi 150fe8bdde0SPekka Enberg l2t = search(&q->root, offset); 151fe8bdde0SPekka Enberg if (!l2t) 152fe8bdde0SPekka Enberg return NULL; 1533309045fSPrasad Joshi 1543309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 155fe8bdde0SPekka Enberg list_move_tail(&l2t->list, &q->lru_list); 1563309045fSPrasad Joshi 157fe8bdde0SPekka Enberg return l2t; 1583309045fSPrasad Joshi } 1593309045fSPrasad Joshi 1603309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 161473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1623309045fSPrasad Joshi { 1633309045fSPrasad Joshi struct qcow_header *header = q->header; 164473d58ffSPekka Enberg struct qcow_l2_table *c; 1653309045fSPrasad Joshi u64 l2t_sz; 1663309045fSPrasad Joshi u64 size; 1673309045fSPrasad Joshi 1683309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1693309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1703309045fSPrasad Joshi c = calloc(1, size); 1713309045fSPrasad Joshi if (!c) 1723309045fSPrasad Joshi goto out; 1733309045fSPrasad Joshi 1743309045fSPrasad Joshi c->offset = offset; 1753309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1763309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1773309045fSPrasad Joshi out: 1783309045fSPrasad Joshi return c; 1793309045fSPrasad Joshi } 1803309045fSPrasad Joshi 181742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 18286835cedSPrasad Joshi { 183ad627d62SPekka Enberg struct qcow_header *header = q->header; 18486835cedSPrasad Joshi 18586835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 18686835cedSPrasad Joshi } 18786835cedSPrasad Joshi 188742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 18986835cedSPrasad Joshi { 190ad627d62SPekka Enberg struct qcow_header *header = q->header; 19186835cedSPrasad Joshi 19286835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 19386835cedSPrasad Joshi } 19486835cedSPrasad Joshi 195742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 19686835cedSPrasad Joshi { 197ad627d62SPekka Enberg struct qcow_header *header = q->header; 19886835cedSPrasad Joshi 19986835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 20086835cedSPrasad Joshi } 20186835cedSPrasad Joshi 202fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 2033309045fSPrasad Joshi { 2043309045fSPrasad Joshi struct qcow_header *header = q->header; 205fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 2063309045fSPrasad Joshi u64 size; 2073309045fSPrasad Joshi 2083309045fSPrasad Joshi size = 1 << header->l2_bits; 2093309045fSPrasad Joshi 2103309045fSPrasad Joshi /* search an entry for offset in cache */ 211fe8bdde0SPekka Enberg l2t = search_table(q, offset); 212fe8bdde0SPekka Enberg if (l2t) 213fe8bdde0SPekka Enberg return l2t; 2143309045fSPrasad Joshi 2153309045fSPrasad Joshi /* allocate new node for caching l2 table */ 216fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 217fe8bdde0SPekka Enberg if (!l2t) 2183309045fSPrasad Joshi goto error; 2193309045fSPrasad Joshi 2203309045fSPrasad Joshi /* table not cached: read from the disk */ 221fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2223309045fSPrasad Joshi goto error; 2233309045fSPrasad Joshi 2243309045fSPrasad Joshi /* cache the table */ 225fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2263309045fSPrasad Joshi goto error; 2273309045fSPrasad Joshi 228fe8bdde0SPekka Enberg return l2t; 2293309045fSPrasad Joshi error: 230fe8bdde0SPekka Enberg free(l2t); 231fe8bdde0SPekka Enberg return NULL; 2323309045fSPrasad Joshi } 2333309045fSPrasad Joshi 234b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 23586835cedSPrasad Joshi { 236ad627d62SPekka Enberg struct qcow_header *header = q->header; 2373dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 238fe8bdde0SPekka Enberg struct qcow_l2_table *l2_table; 239742fce76SPrasad Joshi u64 l2_table_offset; 240742fce76SPrasad Joshi u64 l2_table_size; 2413dac48d4SPrasad Joshi u64 cluster_size; 242742fce76SPrasad Joshi u64 clust_offset; 243742fce76SPrasad Joshi u64 clust_start; 244a51948ceSPekka Enberg size_t length; 245742fce76SPrasad Joshi u64 l1_idx; 246742fce76SPrasad Joshi u64 l2_idx; 24786835cedSPrasad Joshi 248dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 24986835cedSPrasad Joshi 250c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2513dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 252c0799eb9SPekka Enberg return -1; 25386835cedSPrasad Joshi 2543dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2553dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 256c0799eb9SPekka Enberg return -1; 2573dac48d4SPrasad Joshi 2583dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2593dac48d4SPrasad Joshi if (length > dst_len) 2603dac48d4SPrasad Joshi length = dst_len; 2613dac48d4SPrasad Joshi 262c0799eb9SPekka Enberg mutex_lock(&q->mutex); 263659f4186SPekka Enberg l2_table_offset = be64_to_cpu(table->l1_table[l1_idx]) & ~header->oflag_mask; 26486835cedSPrasad Joshi if (!l2_table_offset) 2653dac48d4SPrasad Joshi goto zero_cluster; 26686835cedSPrasad Joshi 26786835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 26886835cedSPrasad Joshi 2693309045fSPrasad Joshi /* read and cache level 2 table */ 270fe8bdde0SPekka Enberg l2_table = qcow_read_l2_table(q, l2_table_offset); 271fe8bdde0SPekka Enberg if (!l2_table) 272b6edb0ecSSasha Levin goto out_error; 27386835cedSPrasad Joshi 274c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 27586835cedSPrasad Joshi if (l2_idx >= l2_table_size) 276b6edb0ecSSasha Levin goto out_error; 27786835cedSPrasad Joshi 2784bd7e48bSPekka Enberg clust_start = be64_to_cpu(l2_table->table[l2_idx]) & ~header->oflag_mask; 27986835cedSPrasad Joshi if (!clust_start) 2803dac48d4SPrasad Joshi goto zero_cluster; 28186835cedSPrasad Joshi 282c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 28386835cedSPrasad Joshi 284c0799eb9SPekka Enberg if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 285c0799eb9SPekka Enberg return -1; 286c0799eb9SPekka Enberg 2873dac48d4SPrasad Joshi return length; 28886835cedSPrasad Joshi 289179b71f0SPekka Enberg zero_cluster: 290c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 291179b71f0SPekka Enberg memset(dst, 0, length); 292c0799eb9SPekka Enberg return length; 293179b71f0SPekka Enberg 29486835cedSPrasad Joshi out_error: 295c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 296179b71f0SPekka Enberg length = -1; 297c0799eb9SPekka Enberg return -1; 2983dac48d4SPrasad Joshi } 299b6edb0ecSSasha Levin 300b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 3013dac48d4SPrasad Joshi { 30243835ac9SSasha Levin struct qcow *q = disk->priv; 303ad627d62SPekka Enberg struct qcow_header *header = q->header; 304d8eea993SPekka Enberg u32 nr_read; 3050df6b4d9SPekka Enberg u64 offset; 3060df6b4d9SPekka Enberg char *buf; 3073dac48d4SPrasad Joshi u32 nr; 3083dac48d4SPrasad Joshi 3090df6b4d9SPekka Enberg buf = dst; 310d8eea993SPekka Enberg nr_read = 0; 3110df6b4d9SPekka Enberg 312d8eea993SPekka Enberg while (nr_read < dst_len) { 3133dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 3143dac48d4SPrasad Joshi if (offset >= header->size) 3150df6b4d9SPekka Enberg return -1; 3163dac48d4SPrasad Joshi 317b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 318a51948ceSPekka Enberg if (nr <= 0) 3190df6b4d9SPekka Enberg return -1; 3203dac48d4SPrasad Joshi 321d8eea993SPekka Enberg nr_read += nr; 3223dac48d4SPrasad Joshi buf += nr; 3233dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3243dac48d4SPrasad Joshi } 3250df6b4d9SPekka Enberg 32672133dd2SAsias He return dst_len; 32786835cedSPrasad Joshi } 32886835cedSPrasad Joshi 329865c675fSPrasad Joshi static inline u64 file_size(int fd) 330865c675fSPrasad Joshi { 331865c675fSPrasad Joshi struct stat st; 3320df6b4d9SPekka Enberg 333865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 334865c675fSPrasad Joshi return 0; 3350df6b4d9SPekka Enberg 336865c675fSPrasad Joshi return st.st_size; 337865c675fSPrasad Joshi } 338865c675fSPrasad Joshi 3390df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 340865c675fSPrasad Joshi { 341865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 342865c675fSPrasad Joshi return -1; 3430df6b4d9SPekka Enberg 3447d94a719SPekka Enberg return fdatasync(fd); 345865c675fSPrasad Joshi } 346865c675fSPrasad Joshi 347865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 348b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 349865c675fSPrasad Joshi { 350865c675fSPrasad Joshi struct qcow_header *header = q->header; 351865c675fSPrasad Joshi u64 clust_sz; 352865c675fSPrasad Joshi u64 f_sz; 3530df6b4d9SPekka Enberg u64 off; 3540df6b4d9SPekka Enberg u64 sz; 355865c675fSPrasad Joshi 356865c675fSPrasad Joshi f_sz = file_size(q->fd); 357865c675fSPrasad Joshi if (!f_sz) 358865c675fSPrasad Joshi return 0; 359865c675fSPrasad Joshi 360865c675fSPrasad Joshi sz = 1 << header->l2_bits; 361865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 362865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 363865c675fSPrasad Joshi 3646fe151aeSPekka Enberg if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) 365865c675fSPrasad Joshi return 0; 3660df6b4d9SPekka Enberg 367865c675fSPrasad Joshi return off; 368865c675fSPrasad Joshi } 369865c675fSPrasad Joshi 370865c675fSPrasad Joshi /* 371865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 372865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3730df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 374865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 375865c675fSPrasad Joshi * 376865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 377865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 378865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 379865c675fSPrasad Joshi */ 380b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 381865c675fSPrasad Joshi { 382865c675fSPrasad Joshi struct qcow_header *header = q->header; 383865c675fSPrasad Joshi struct qcow_table *table = &q->table; 384fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 3850df6b4d9SPekka Enberg u64 clust_start; 3860df6b4d9SPekka Enberg u64 clust_off; 387865c675fSPrasad Joshi u64 clust_sz; 388865c675fSPrasad Joshi u64 l1t_idx; 389865c675fSPrasad Joshi u64 l2t_idx; 3900df6b4d9SPekka Enberg u64 l2t_off; 3910df6b4d9SPekka Enberg u64 l2t_sz; 392865c675fSPrasad Joshi u64 f_sz; 3930df6b4d9SPekka Enberg u64 len; 394865c675fSPrasad Joshi 395fe8bdde0SPekka Enberg l2t = NULL; 396865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 397865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 398865c675fSPrasad Joshi 399865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 400865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 401c0799eb9SPekka Enberg return -1; 402865c675fSPrasad Joshi 403865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 404865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 405c0799eb9SPekka Enberg return -1; 406865c675fSPrasad Joshi 407865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 408865c675fSPrasad Joshi if (clust_off >= clust_sz) 409c0799eb9SPekka Enberg return -1; 410865c675fSPrasad Joshi 411865c675fSPrasad Joshi len = clust_sz - clust_off; 412865c675fSPrasad Joshi if (len > src_len) 413865c675fSPrasad Joshi len = src_len; 414865c675fSPrasad Joshi 415c0799eb9SPekka Enberg mutex_lock(&q->mutex); 416c0799eb9SPekka Enberg 417659f4186SPekka Enberg l2t_off = be64_to_cpu(table->l1_table[l1t_idx]) & ~header->oflag_mask; 418865c675fSPrasad Joshi if (l2t_off) { 4193309045fSPrasad Joshi /* read and cache l2 table */ 420fe8bdde0SPekka Enberg l2t = qcow_read_l2_table(q, l2t_off); 421fe8bdde0SPekka Enberg if (!l2t) 4223309045fSPrasad Joshi goto error; 423865c675fSPrasad Joshi } else { 424fe8bdde0SPekka Enberg l2t = new_cache_table(q, l2t_off); 425fe8bdde0SPekka Enberg if (!l2t) 4263309045fSPrasad Joshi goto error; 4273309045fSPrasad Joshi 4280df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 429865c675fSPrasad Joshi f_sz = file_size(q->fd); 430865c675fSPrasad Joshi if (!f_sz) 4313309045fSPrasad Joshi goto free_cache; 432865c675fSPrasad Joshi 433865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 434fe8bdde0SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t->table); 435865c675fSPrasad Joshi if (!l2t_off) 4363309045fSPrasad Joshi goto free_cache; 437865c675fSPrasad Joshi 438fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 4393309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4403309045fSPrasad Joshi goto free_cache; 4413309045fSPrasad Joshi 4423309045fSPrasad Joshi goto free_cache; 443865c675fSPrasad Joshi } 444865c675fSPrasad Joshi 4450df6b4d9SPekka Enberg /* Update the in-core entry */ 446659f4186SPekka Enberg table->l1_table[l1t_idx] = cpu_to_be64(l2t_off); 447865c675fSPrasad Joshi } 448865c675fSPrasad Joshi 4490df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 450865c675fSPrasad Joshi f_sz = file_size(q->fd); 451865c675fSPrasad Joshi if (!f_sz) 4523309045fSPrasad Joshi goto error; 453865c675fSPrasad Joshi 4544bd7e48bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]) & ~header->oflag_mask; 455865c675fSPrasad Joshi if (!clust_start) { 456865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 4574bd7e48bSPekka Enberg l2t->table[l2t_idx] = cpu_to_be64(clust_start); 458*aff88976SPekka Enberg l2t->dirty = 1; 459865c675fSPrasad Joshi } 4600df6b4d9SPekka Enberg 461c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 462c0799eb9SPekka Enberg 463a4e46515SPekka Enberg /* Write actual data */ 464a4e46515SPekka Enberg if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 465a4e46515SPekka Enberg return -1; 466a4e46515SPekka Enberg 467865c675fSPrasad Joshi return len; 4683309045fSPrasad Joshi 4693309045fSPrasad Joshi free_cache: 470fe8bdde0SPekka Enberg free(l2t); 471865c675fSPrasad Joshi error: 472c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 473865c675fSPrasad Joshi return -1; 474865c675fSPrasad Joshi } 475865c675fSPrasad Joshi 476b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 47786835cedSPrasad Joshi { 478865c675fSPrasad Joshi struct qcow *q = disk->priv; 479865c675fSPrasad Joshi struct qcow_header *header = q->header; 480c4acb611SIngo Molnar u32 nr_written; 4810df6b4d9SPekka Enberg char *buf; 482865c675fSPrasad Joshi u64 offset; 483865c675fSPrasad Joshi ssize_t nr; 484865c675fSPrasad Joshi 4850df6b4d9SPekka Enberg buf = src; 4860df6b4d9SPekka Enberg nr_written = 0; 487865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 4880df6b4d9SPekka Enberg 4890df6b4d9SPekka Enberg while (nr_written < src_len) { 490865c675fSPrasad Joshi if (offset >= header->size) 4910df6b4d9SPekka Enberg return -1; 492865c675fSPrasad Joshi 493b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 494865c675fSPrasad Joshi if (nr < 0) 4950df6b4d9SPekka Enberg return -1; 496865c675fSPrasad Joshi 4970df6b4d9SPekka Enberg nr_written += nr; 498865c675fSPrasad Joshi buf += nr; 499865c675fSPrasad Joshi offset += nr; 500865c675fSPrasad Joshi } 5010df6b4d9SPekka Enberg 50272133dd2SAsias He return nr_written; 50386835cedSPrasad Joshi } 50486835cedSPrasad Joshi 505b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 506f10860caSPekka Enberg { 507f10860caSPekka Enberg /* I/O error */ 508b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 509f10860caSPekka Enberg return -1; 510f10860caSPekka Enberg } 511f10860caSPekka Enberg 512659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 513659f4186SPekka Enberg { 51473984b11SPekka Enberg struct qcow *q = disk->priv; 51573984b11SPekka Enberg struct qcow_header *header; 516a4e46515SPekka Enberg struct list_head *pos, *n; 51773984b11SPekka Enberg struct qcow_table *table; 51873984b11SPekka Enberg 51973984b11SPekka Enberg header = q->header; 52073984b11SPekka Enberg table = &q->table; 52173984b11SPekka Enberg 522a4e46515SPekka Enberg mutex_lock(&q->mutex); 523a4e46515SPekka Enberg 524a4e46515SPekka Enberg list_for_each_safe(pos, n, &q->lru_list) { 525a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 526a4e46515SPekka Enberg 527a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 528a4e46515SPekka Enberg goto error_unlock; 529a4e46515SPekka Enberg } 530a4e46515SPekka Enberg 531a4e46515SPekka Enberg if (fdatasync(disk->fd) < 0) 532a4e46515SPekka Enberg goto error_unlock; 533a4e46515SPekka Enberg 53473984b11SPekka Enberg if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0) 535a4e46515SPekka Enberg goto error_unlock; 536a4e46515SPekka Enberg 537a4e46515SPekka Enberg mutex_unlock(&q->mutex); 53873984b11SPekka Enberg 539659f4186SPekka Enberg return fsync(disk->fd); 540a4e46515SPekka Enberg 541a4e46515SPekka Enberg error_unlock: 542a4e46515SPekka Enberg mutex_unlock(&q->mutex); 543a4e46515SPekka Enberg return -1; 544659f4186SPekka Enberg } 545659f4186SPekka Enberg 546b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 54786835cedSPrasad Joshi { 54886835cedSPrasad Joshi struct qcow *q; 54986835cedSPrasad Joshi 55043835ac9SSasha Levin if (!disk) 55172133dd2SAsias He return 0; 55286835cedSPrasad Joshi 55343835ac9SSasha Levin q = disk->priv; 55486835cedSPrasad Joshi 5553309045fSPrasad Joshi free_cache(q); 5566c6f79b6SPrasad Joshi free(q->table.l1_table); 55786835cedSPrasad Joshi free(q->header); 55886835cedSPrasad Joshi free(q); 55972133dd2SAsias He 56072133dd2SAsias He return 0; 56186835cedSPrasad Joshi } 56286835cedSPrasad Joshi 563b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 564b1c84095SPekka Enberg .read_sector = qcow_read_sector, 565b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 566b1c84095SPekka Enberg .close = qcow_disk_close, 567f10860caSPekka Enberg }; 568f10860caSPekka Enberg 569b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 570b1c84095SPekka Enberg .read_sector = qcow_read_sector, 571b1c84095SPekka Enberg .write_sector = qcow_write_sector, 572659f4186SPekka Enberg .flush = qcow_disk_flush, 573b1c84095SPekka Enberg .close = qcow_disk_close, 57486835cedSPrasad Joshi }; 57586835cedSPrasad Joshi 57686835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 57786835cedSPrasad Joshi { 578ad627d62SPekka Enberg struct qcow_header *header = q->header; 57900adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 58086835cedSPrasad Joshi 581ad627d62SPekka Enberg table->table_size = header->l1_size; 58286835cedSPrasad Joshi 58300adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 58400adcc1bSPrasad Joshi if (!table->l1_table) 58586835cedSPrasad Joshi return -1; 58686835cedSPrasad Joshi 587659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 58886835cedSPrasad Joshi } 58986835cedSPrasad Joshi 590ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 59186835cedSPrasad Joshi { 592ad627d62SPekka Enberg struct qcow2_header_disk f_header; 593ad627d62SPekka Enberg struct qcow_header *header; 59486835cedSPrasad Joshi 595ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 59686835cedSPrasad Joshi if (!header) 59786835cedSPrasad Joshi return NULL; 59886835cedSPrasad Joshi 5990657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 6000657f33dSPrasad Joshi free(header); 60186835cedSPrasad Joshi return NULL; 6020657f33dSPrasad Joshi } 60386835cedSPrasad Joshi 604ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 605ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 606ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 607ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 608ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 609ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 610ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 611ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 612ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 613ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 614ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 615ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 616ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 617ad627d62SPekka Enberg 618ad627d62SPekka Enberg *header = (struct qcow_header) { 619ad627d62SPekka Enberg .size = f_header.size, 620ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 621ad627d62SPekka Enberg .l1_size = f_header.l1_size, 622ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 623ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 624ad627d62SPekka Enberg .oflag_mask = QCOW2_OFLAG_MASK, 625ad627d62SPekka Enberg }; 626ad627d62SPekka Enberg 627ad627d62SPekka Enberg return header; 628ad627d62SPekka Enberg } 629ad627d62SPekka Enberg 630f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 631ad627d62SPekka Enberg { 632ad627d62SPekka Enberg struct qcow *q; 633ad627d62SPekka Enberg struct qcow_header *h; 634ad627d62SPekka Enberg struct disk_image *disk_image; 635ad627d62SPekka Enberg 636ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 637ad627d62SPekka Enberg if (!q) 638ad627d62SPekka Enberg goto error; 639ad627d62SPekka Enberg 640c0799eb9SPekka Enberg mutex_init(&q->mutex); 641ad627d62SPekka Enberg q->fd = fd; 6423309045fSPrasad Joshi q->root = RB_ROOT; 6433309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 644ad627d62SPekka Enberg 645ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 646ad627d62SPekka Enberg if (!h) 647ad627d62SPekka Enberg goto error; 648ad627d62SPekka Enberg 649ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 650ad627d62SPekka Enberg goto error; 651ad627d62SPekka Enberg 6527d22135fSAsias He /* 6537d22135fSAsias He * Do not use mmap use read/write instead 6547d22135fSAsias He */ 655f10860caSPekka Enberg if (readonly) 656b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 657f10860caSPekka Enberg else 658b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 659f10860caSPekka Enberg 660ad627d62SPekka Enberg if (!disk_image) 661ad627d62SPekka Enberg goto error; 662ad627d62SPekka Enberg disk_image->priv = q; 663ad627d62SPekka Enberg 664ad627d62SPekka Enberg return disk_image; 665ad627d62SPekka Enberg error: 666ad627d62SPekka Enberg if (!q) 667ad627d62SPekka Enberg return NULL; 668ad627d62SPekka Enberg 669ad627d62SPekka Enberg free(q->table.l1_table); 670ad627d62SPekka Enberg free(q->header); 671ad627d62SPekka Enberg free(q); 672ad627d62SPekka Enberg 673ad627d62SPekka Enberg return NULL; 674ad627d62SPekka Enberg } 675ad627d62SPekka Enberg 676ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 677ad627d62SPekka Enberg { 678ad627d62SPekka Enberg struct qcow2_header_disk f_header; 679ad627d62SPekka Enberg 680ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 681ad627d62SPekka Enberg return false; 682ad627d62SPekka Enberg 683ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 684ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 685ad627d62SPekka Enberg 686ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 687ad627d62SPekka Enberg return false; 688ad627d62SPekka Enberg 689ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 690ad627d62SPekka Enberg return false; 691ad627d62SPekka Enberg 692ad627d62SPekka Enberg return true; 693ad627d62SPekka Enberg } 694ad627d62SPekka Enberg 695ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 696ad627d62SPekka Enberg { 697ad627d62SPekka Enberg struct qcow1_header_disk f_header; 698ad627d62SPekka Enberg struct qcow_header *header; 699ad627d62SPekka Enberg 700ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 701ad627d62SPekka Enberg if (!header) 702ad627d62SPekka Enberg return NULL; 703ad627d62SPekka Enberg 704d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 705d39cefd2SSasha Levin free(header); 706ad627d62SPekka Enberg return NULL; 707d39cefd2SSasha Levin } 708ad627d62SPekka Enberg 709ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 710ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 711ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 712ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 713ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 714ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 715ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 716ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 717ad627d62SPekka Enberg 718ad627d62SPekka Enberg *header = (struct qcow_header) { 719ad627d62SPekka Enberg .size = f_header.size, 720ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 721ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 722ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 723ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 724ad627d62SPekka Enberg .oflag_mask = QCOW1_OFLAG_MASK, 725ad627d62SPekka Enberg }; 72686835cedSPrasad Joshi 72786835cedSPrasad Joshi return header; 72886835cedSPrasad Joshi } 72986835cedSPrasad Joshi 730f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 73186835cedSPrasad Joshi { 73286835cedSPrasad Joshi struct qcow *q; 733ad627d62SPekka Enberg struct qcow_header *h; 73486835cedSPrasad Joshi struct disk_image *disk_image; 73586835cedSPrasad Joshi 73686835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 73786835cedSPrasad Joshi if (!q) 73886835cedSPrasad Joshi goto error; 73986835cedSPrasad Joshi 740c0799eb9SPekka Enberg mutex_init(&q->mutex); 74186835cedSPrasad Joshi q->fd = fd; 7423309045fSPrasad Joshi q->root = RB_ROOT; 7433309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 74486835cedSPrasad Joshi 74586835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 74686835cedSPrasad Joshi if (!h) 74786835cedSPrasad Joshi goto error; 74886835cedSPrasad Joshi 74986835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 75086835cedSPrasad Joshi goto error; 75186835cedSPrasad Joshi 7527d22135fSAsias He /* 7537d22135fSAsias He * Do not use mmap use read/write instead 7547d22135fSAsias He */ 755f10860caSPekka Enberg if (readonly) 756b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 757f10860caSPekka Enberg else 758b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 759f10860caSPekka Enberg 76086835cedSPrasad Joshi if (!disk_image) 76186835cedSPrasad Joshi goto error; 76286835cedSPrasad Joshi disk_image->priv = q; 76386835cedSPrasad Joshi 76486835cedSPrasad Joshi return disk_image; 76586835cedSPrasad Joshi error: 76686835cedSPrasad Joshi if (!q) 76786835cedSPrasad Joshi return NULL; 76886835cedSPrasad Joshi 7696c6f79b6SPrasad Joshi free(q->table.l1_table); 77086835cedSPrasad Joshi free(q->header); 77186835cedSPrasad Joshi free(q); 77286835cedSPrasad Joshi 77386835cedSPrasad Joshi return NULL; 77486835cedSPrasad Joshi } 77586835cedSPrasad Joshi 776ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 77786835cedSPrasad Joshi { 778ad627d62SPekka Enberg struct qcow1_header_disk f_header; 77986835cedSPrasad Joshi 780ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 781ad627d62SPekka Enberg return false; 78286835cedSPrasad Joshi 783ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 784ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 78586835cedSPrasad Joshi 786ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 787ad627d62SPekka Enberg return false; 78886835cedSPrasad Joshi 789ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 790ad627d62SPekka Enberg return false; 79186835cedSPrasad Joshi 792ad627d62SPekka Enberg return true; 79386835cedSPrasad Joshi } 79486835cedSPrasad Joshi 795f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 79686835cedSPrasad Joshi { 797ad627d62SPekka Enberg if (qcow1_check_image(fd)) 798f10860caSPekka Enberg return qcow1_probe(fd, readonly); 799ad627d62SPekka Enberg 800ad627d62SPekka Enberg if (qcow2_check_image(fd)) 801f10860caSPekka Enberg return qcow2_probe(fd, readonly); 802ad627d62SPekka Enberg 803ad627d62SPekka Enberg return NULL; 80486835cedSPrasad Joshi } 805