186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 1586835cedSPrasad Joshi 1686835cedSPrasad Joshi #include <linux/byteorder.h> 17865c675fSPrasad Joshi #include <linux/kernel.h> 180df6b4d9SPekka Enberg #include <linux/types.h> 1986835cedSPrasad Joshi 20473d58ffSPekka Enberg static int insert(struct rb_root *root, struct qcow_l2_table *new) 213309045fSPrasad Joshi { 223309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 233309045fSPrasad Joshi u64 offset = new->offset; 243309045fSPrasad Joshi 253309045fSPrasad Joshi /* search the tree */ 263309045fSPrasad Joshi while (*link) { 27473d58ffSPekka Enberg struct qcow_l2_table *t; 283309045fSPrasad Joshi 29473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 303309045fSPrasad Joshi if (!t) 313309045fSPrasad Joshi goto error; 323309045fSPrasad Joshi 333309045fSPrasad Joshi parent = *link; 343309045fSPrasad Joshi 353309045fSPrasad Joshi if (t->offset > offset) 363309045fSPrasad Joshi link = &(*link)->rb_left; 373309045fSPrasad Joshi else if (t->offset < offset) 383309045fSPrasad Joshi link = &(*link)->rb_right; 393309045fSPrasad Joshi else 403309045fSPrasad Joshi goto out; 413309045fSPrasad Joshi } 423309045fSPrasad Joshi 433309045fSPrasad Joshi /* add new node */ 443309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 453309045fSPrasad Joshi rb_insert_color(&new->node, root); 463309045fSPrasad Joshi out: 473309045fSPrasad Joshi return 0; 483309045fSPrasad Joshi error: 493309045fSPrasad Joshi return -1; 503309045fSPrasad Joshi } 513309045fSPrasad Joshi 52473d58ffSPekka Enberg static struct qcow_l2_table *search(struct rb_root *root, u64 offset) 533309045fSPrasad Joshi { 543309045fSPrasad Joshi struct rb_node *link = root->rb_node; 553309045fSPrasad Joshi 563309045fSPrasad Joshi while (link) { 57473d58ffSPekka Enberg struct qcow_l2_table *t; 583309045fSPrasad Joshi 59473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 603309045fSPrasad Joshi if (!t) 613309045fSPrasad Joshi goto out; 623309045fSPrasad Joshi 633309045fSPrasad Joshi if (t->offset > offset) 643309045fSPrasad Joshi link = link->rb_left; 653309045fSPrasad Joshi else if (t->offset < offset) 663309045fSPrasad Joshi link = link->rb_right; 673309045fSPrasad Joshi else 683309045fSPrasad Joshi return t; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi out: 713309045fSPrasad Joshi return NULL; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi 743309045fSPrasad Joshi static void free_cache(struct qcow *q) 753309045fSPrasad Joshi { 763309045fSPrasad Joshi struct list_head *pos, *n; 77473d58ffSPekka Enberg struct qcow_l2_table *t; 783309045fSPrasad Joshi struct rb_root *r = &q->root; 793309045fSPrasad Joshi 803309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 813309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 823309045fSPrasad Joshi list_del(pos); 83473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 843309045fSPrasad Joshi rb_erase(&t->node, r); 853309045fSPrasad Joshi 863309045fSPrasad Joshi /* Free the cached node */ 873309045fSPrasad Joshi free(t); 883309045fSPrasad Joshi } 893309045fSPrasad Joshi } 903309045fSPrasad Joshi 91*a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c) 92*a4e46515SPekka Enberg { 93*a4e46515SPekka Enberg struct qcow_header *header = q->header; 94*a4e46515SPekka Enberg u64 size; 95*a4e46515SPekka Enberg 96*a4e46515SPekka Enberg size = 1 << header->l2_bits; 97*a4e46515SPekka Enberg 98*a4e46515SPekka Enberg return pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset); 99*a4e46515SPekka Enberg } 100*a4e46515SPekka Enberg 101473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 1023309045fSPrasad Joshi { 1033309045fSPrasad Joshi struct rb_root *r = &q->root; 104473d58ffSPekka Enberg struct qcow_l2_table *lru; 1053309045fSPrasad Joshi 1063309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 1073309045fSPrasad Joshi /* 1083309045fSPrasad Joshi * The node at the head of the list is least recently used 1093309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1103309045fSPrasad Joshi */ 111473d58ffSPekka Enberg lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); 1123309045fSPrasad Joshi 113*a4e46515SPekka Enberg if (qcow_l2_cache_write(q, lru) < 0) 114*a4e46515SPekka Enberg goto error; 115*a4e46515SPekka Enberg 1163309045fSPrasad Joshi /* Remove the node from the cache */ 1173309045fSPrasad Joshi rb_erase(&lru->node, r); 1183309045fSPrasad Joshi list_del_init(&lru->list); 1193309045fSPrasad Joshi q->nr_cached--; 1203309045fSPrasad Joshi 1213309045fSPrasad Joshi /* Free the LRUed node */ 1223309045fSPrasad Joshi free(lru); 1233309045fSPrasad Joshi } 1243309045fSPrasad Joshi 1253309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 1263309045fSPrasad Joshi if (insert(r, c) < 0) 1273309045fSPrasad Joshi goto error; 1283309045fSPrasad Joshi 1293309045fSPrasad Joshi /* Add in LRU replacement list */ 1303309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 1313309045fSPrasad Joshi q->nr_cached++; 1323309045fSPrasad Joshi 1333309045fSPrasad Joshi return 0; 1343309045fSPrasad Joshi error: 1353309045fSPrasad Joshi return -1; 1363309045fSPrasad Joshi } 1373309045fSPrasad Joshi 138fe8bdde0SPekka Enberg static struct qcow_l2_table *search_table(struct qcow *q, u64 offset) 1393309045fSPrasad Joshi { 140fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1413309045fSPrasad Joshi 142fe8bdde0SPekka Enberg l2t = search(&q->root, offset); 143fe8bdde0SPekka Enberg if (!l2t) 144fe8bdde0SPekka Enberg return NULL; 1453309045fSPrasad Joshi 1463309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 147fe8bdde0SPekka Enberg list_move_tail(&l2t->list, &q->lru_list); 1483309045fSPrasad Joshi 149fe8bdde0SPekka Enberg return l2t; 1503309045fSPrasad Joshi } 1513309045fSPrasad Joshi 1523309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 153473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1543309045fSPrasad Joshi { 1553309045fSPrasad Joshi struct qcow_header *header = q->header; 156473d58ffSPekka Enberg struct qcow_l2_table *c; 1573309045fSPrasad Joshi u64 l2t_sz; 1583309045fSPrasad Joshi u64 size; 1593309045fSPrasad Joshi 1603309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1613309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1623309045fSPrasad Joshi c = calloc(1, size); 1633309045fSPrasad Joshi if (!c) 1643309045fSPrasad Joshi goto out; 1653309045fSPrasad Joshi 1663309045fSPrasad Joshi c->offset = offset; 1673309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1683309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1693309045fSPrasad Joshi out: 1703309045fSPrasad Joshi return c; 1713309045fSPrasad Joshi } 1723309045fSPrasad Joshi 173742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 17486835cedSPrasad Joshi { 175ad627d62SPekka Enberg struct qcow_header *header = q->header; 17686835cedSPrasad Joshi 17786835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 17886835cedSPrasad Joshi } 17986835cedSPrasad Joshi 180742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 18186835cedSPrasad Joshi { 182ad627d62SPekka Enberg struct qcow_header *header = q->header; 18386835cedSPrasad Joshi 18486835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 18586835cedSPrasad Joshi } 18686835cedSPrasad Joshi 187742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 18886835cedSPrasad Joshi { 189ad627d62SPekka Enberg struct qcow_header *header = q->header; 19086835cedSPrasad Joshi 19186835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 19286835cedSPrasad Joshi } 19386835cedSPrasad Joshi 194fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 1953309045fSPrasad Joshi { 1963309045fSPrasad Joshi struct qcow_header *header = q->header; 197fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1983309045fSPrasad Joshi u64 size; 1993309045fSPrasad Joshi 2003309045fSPrasad Joshi size = 1 << header->l2_bits; 2013309045fSPrasad Joshi 2023309045fSPrasad Joshi /* search an entry for offset in cache */ 203fe8bdde0SPekka Enberg l2t = search_table(q, offset); 204fe8bdde0SPekka Enberg if (l2t) 205fe8bdde0SPekka Enberg return l2t; 2063309045fSPrasad Joshi 2073309045fSPrasad Joshi /* allocate new node for caching l2 table */ 208fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 209fe8bdde0SPekka Enberg if (!l2t) 2103309045fSPrasad Joshi goto error; 2113309045fSPrasad Joshi 2123309045fSPrasad Joshi /* table not cached: read from the disk */ 213fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2143309045fSPrasad Joshi goto error; 2153309045fSPrasad Joshi 2163309045fSPrasad Joshi /* cache the table */ 217fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2183309045fSPrasad Joshi goto error; 2193309045fSPrasad Joshi 220fe8bdde0SPekka Enberg return l2t; 2213309045fSPrasad Joshi error: 222fe8bdde0SPekka Enberg free(l2t); 223fe8bdde0SPekka Enberg return NULL; 2243309045fSPrasad Joshi } 2253309045fSPrasad Joshi 226b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 22786835cedSPrasad Joshi { 228ad627d62SPekka Enberg struct qcow_header *header = q->header; 2293dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 230fe8bdde0SPekka Enberg struct qcow_l2_table *l2_table; 231742fce76SPrasad Joshi u64 l2_table_offset; 232742fce76SPrasad Joshi u64 l2_table_size; 2333dac48d4SPrasad Joshi u64 cluster_size; 234742fce76SPrasad Joshi u64 clust_offset; 235742fce76SPrasad Joshi u64 clust_start; 236a51948ceSPekka Enberg size_t length; 237742fce76SPrasad Joshi u64 l1_idx; 238742fce76SPrasad Joshi u64 l2_idx; 23986835cedSPrasad Joshi 240dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 24186835cedSPrasad Joshi 242c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2433dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 244c0799eb9SPekka Enberg return -1; 24586835cedSPrasad Joshi 2463dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2473dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 248c0799eb9SPekka Enberg return -1; 2493dac48d4SPrasad Joshi 2503dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2513dac48d4SPrasad Joshi if (length > dst_len) 2523dac48d4SPrasad Joshi length = dst_len; 2533dac48d4SPrasad Joshi 254c0799eb9SPekka Enberg mutex_lock(&q->mutex); 255659f4186SPekka Enberg l2_table_offset = be64_to_cpu(table->l1_table[l1_idx]) & ~header->oflag_mask; 25686835cedSPrasad Joshi if (!l2_table_offset) 2573dac48d4SPrasad Joshi goto zero_cluster; 25886835cedSPrasad Joshi 25986835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 26086835cedSPrasad Joshi 2613309045fSPrasad Joshi /* read and cache level 2 table */ 262fe8bdde0SPekka Enberg l2_table = qcow_read_l2_table(q, l2_table_offset); 263fe8bdde0SPekka Enberg if (!l2_table) 264b6edb0ecSSasha Levin goto out_error; 26586835cedSPrasad Joshi 266c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 26786835cedSPrasad Joshi if (l2_idx >= l2_table_size) 268b6edb0ecSSasha Levin goto out_error; 26986835cedSPrasad Joshi 2704bd7e48bSPekka Enberg clust_start = be64_to_cpu(l2_table->table[l2_idx]) & ~header->oflag_mask; 27186835cedSPrasad Joshi if (!clust_start) 2723dac48d4SPrasad Joshi goto zero_cluster; 27386835cedSPrasad Joshi 274c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 27586835cedSPrasad Joshi 276c0799eb9SPekka Enberg if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 277c0799eb9SPekka Enberg return -1; 278c0799eb9SPekka Enberg 2793dac48d4SPrasad Joshi return length; 28086835cedSPrasad Joshi 281179b71f0SPekka Enberg zero_cluster: 282c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 283179b71f0SPekka Enberg memset(dst, 0, length); 284c0799eb9SPekka Enberg return length; 285179b71f0SPekka Enberg 28686835cedSPrasad Joshi out_error: 287c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 288179b71f0SPekka Enberg length = -1; 289c0799eb9SPekka Enberg return -1; 2903dac48d4SPrasad Joshi } 291b6edb0ecSSasha Levin 292b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 2933dac48d4SPrasad Joshi { 29443835ac9SSasha Levin struct qcow *q = disk->priv; 295ad627d62SPekka Enberg struct qcow_header *header = q->header; 296d8eea993SPekka Enberg u32 nr_read; 2970df6b4d9SPekka Enberg u64 offset; 2980df6b4d9SPekka Enberg char *buf; 2993dac48d4SPrasad Joshi u32 nr; 3003dac48d4SPrasad Joshi 3010df6b4d9SPekka Enberg buf = dst; 302d8eea993SPekka Enberg nr_read = 0; 3030df6b4d9SPekka Enberg 304d8eea993SPekka Enberg while (nr_read < dst_len) { 3053dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 3063dac48d4SPrasad Joshi if (offset >= header->size) 3070df6b4d9SPekka Enberg return -1; 3083dac48d4SPrasad Joshi 309b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 310a51948ceSPekka Enberg if (nr <= 0) 3110df6b4d9SPekka Enberg return -1; 3123dac48d4SPrasad Joshi 313d8eea993SPekka Enberg nr_read += nr; 3143dac48d4SPrasad Joshi buf += nr; 3153dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3163dac48d4SPrasad Joshi } 3170df6b4d9SPekka Enberg 31872133dd2SAsias He return dst_len; 31986835cedSPrasad Joshi } 32086835cedSPrasad Joshi 321865c675fSPrasad Joshi static inline u64 file_size(int fd) 322865c675fSPrasad Joshi { 323865c675fSPrasad Joshi struct stat st; 3240df6b4d9SPekka Enberg 325865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 326865c675fSPrasad Joshi return 0; 3270df6b4d9SPekka Enberg 328865c675fSPrasad Joshi return st.st_size; 329865c675fSPrasad Joshi } 330865c675fSPrasad Joshi 3310df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 332865c675fSPrasad Joshi { 333865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 334865c675fSPrasad Joshi return -1; 3350df6b4d9SPekka Enberg 3367d94a719SPekka Enberg return fdatasync(fd); 337865c675fSPrasad Joshi } 338865c675fSPrasad Joshi 339865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 340b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 341865c675fSPrasad Joshi { 342865c675fSPrasad Joshi struct qcow_header *header = q->header; 343865c675fSPrasad Joshi u64 clust_sz; 344865c675fSPrasad Joshi u64 f_sz; 3450df6b4d9SPekka Enberg u64 off; 3460df6b4d9SPekka Enberg u64 sz; 347865c675fSPrasad Joshi 348865c675fSPrasad Joshi f_sz = file_size(q->fd); 349865c675fSPrasad Joshi if (!f_sz) 350865c675fSPrasad Joshi return 0; 351865c675fSPrasad Joshi 352865c675fSPrasad Joshi sz = 1 << header->l2_bits; 353865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 354865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 355865c675fSPrasad Joshi 3566fe151aeSPekka Enberg if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0) 357865c675fSPrasad Joshi return 0; 3580df6b4d9SPekka Enberg 359865c675fSPrasad Joshi return off; 360865c675fSPrasad Joshi } 361865c675fSPrasad Joshi 362865c675fSPrasad Joshi /* 363865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 364865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3650df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 366865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 367865c675fSPrasad Joshi * 368865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 369865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 370865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 371865c675fSPrasad Joshi */ 372b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 373865c675fSPrasad Joshi { 374865c675fSPrasad Joshi struct qcow_header *header = q->header; 375865c675fSPrasad Joshi struct qcow_table *table = &q->table; 376fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 3770df6b4d9SPekka Enberg u64 clust_start; 3780df6b4d9SPekka Enberg u64 clust_off; 379865c675fSPrasad Joshi u64 clust_sz; 380865c675fSPrasad Joshi u64 l1t_idx; 381865c675fSPrasad Joshi u64 l2t_idx; 3820df6b4d9SPekka Enberg u64 l2t_off; 3830df6b4d9SPekka Enberg u64 l2t_sz; 384865c675fSPrasad Joshi u64 f_sz; 3850df6b4d9SPekka Enberg u64 len; 386865c675fSPrasad Joshi 387fe8bdde0SPekka Enberg l2t = NULL; 388865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 389865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 390865c675fSPrasad Joshi 391865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 392865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 393c0799eb9SPekka Enberg return -1; 394865c675fSPrasad Joshi 395865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 396865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 397c0799eb9SPekka Enberg return -1; 398865c675fSPrasad Joshi 399865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 400865c675fSPrasad Joshi if (clust_off >= clust_sz) 401c0799eb9SPekka Enberg return -1; 402865c675fSPrasad Joshi 403865c675fSPrasad Joshi len = clust_sz - clust_off; 404865c675fSPrasad Joshi if (len > src_len) 405865c675fSPrasad Joshi len = src_len; 406865c675fSPrasad Joshi 407c0799eb9SPekka Enberg mutex_lock(&q->mutex); 408c0799eb9SPekka Enberg 409659f4186SPekka Enberg l2t_off = be64_to_cpu(table->l1_table[l1t_idx]) & ~header->oflag_mask; 410865c675fSPrasad Joshi if (l2t_off) { 4113309045fSPrasad Joshi /* read and cache l2 table */ 412fe8bdde0SPekka Enberg l2t = qcow_read_l2_table(q, l2t_off); 413fe8bdde0SPekka Enberg if (!l2t) 4143309045fSPrasad Joshi goto error; 415865c675fSPrasad Joshi } else { 416fe8bdde0SPekka Enberg l2t = new_cache_table(q, l2t_off); 417fe8bdde0SPekka Enberg if (!l2t) 4183309045fSPrasad Joshi goto error; 4193309045fSPrasad Joshi 4200df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 421865c675fSPrasad Joshi f_sz = file_size(q->fd); 422865c675fSPrasad Joshi if (!f_sz) 4233309045fSPrasad Joshi goto free_cache; 424865c675fSPrasad Joshi 425865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 426fe8bdde0SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t->table); 427865c675fSPrasad Joshi if (!l2t_off) 4283309045fSPrasad Joshi goto free_cache; 429865c675fSPrasad Joshi 430fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 4313309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4323309045fSPrasad Joshi goto free_cache; 4333309045fSPrasad Joshi 4343309045fSPrasad Joshi goto free_cache; 435865c675fSPrasad Joshi } 436865c675fSPrasad Joshi 4370df6b4d9SPekka Enberg /* Update the in-core entry */ 438659f4186SPekka Enberg table->l1_table[l1t_idx] = cpu_to_be64(l2t_off); 439865c675fSPrasad Joshi } 440865c675fSPrasad Joshi 4410df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 442865c675fSPrasad Joshi f_sz = file_size(q->fd); 443865c675fSPrasad Joshi if (!f_sz) 4443309045fSPrasad Joshi goto error; 445865c675fSPrasad Joshi 4464bd7e48bSPekka Enberg clust_start = be64_to_cpu(l2t->table[l2t_idx]) & ~header->oflag_mask; 447865c675fSPrasad Joshi if (!clust_start) { 448865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 4494bd7e48bSPekka Enberg l2t->table[l2t_idx] = cpu_to_be64(clust_start); 450865c675fSPrasad Joshi } 4510df6b4d9SPekka Enberg 452c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 453c0799eb9SPekka Enberg 454*a4e46515SPekka Enberg /* Write actual data */ 455*a4e46515SPekka Enberg if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 456*a4e46515SPekka Enberg return -1; 457*a4e46515SPekka Enberg 458865c675fSPrasad Joshi return len; 4593309045fSPrasad Joshi 4603309045fSPrasad Joshi free_cache: 461fe8bdde0SPekka Enberg free(l2t); 462865c675fSPrasad Joshi error: 463c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 464865c675fSPrasad Joshi return -1; 465865c675fSPrasad Joshi } 466865c675fSPrasad Joshi 467b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 46886835cedSPrasad Joshi { 469865c675fSPrasad Joshi struct qcow *q = disk->priv; 470865c675fSPrasad Joshi struct qcow_header *header = q->header; 471c4acb611SIngo Molnar u32 nr_written; 4720df6b4d9SPekka Enberg char *buf; 473865c675fSPrasad Joshi u64 offset; 474865c675fSPrasad Joshi ssize_t nr; 475865c675fSPrasad Joshi 4760df6b4d9SPekka Enberg buf = src; 4770df6b4d9SPekka Enberg nr_written = 0; 478865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 4790df6b4d9SPekka Enberg 4800df6b4d9SPekka Enberg while (nr_written < src_len) { 481865c675fSPrasad Joshi if (offset >= header->size) 4820df6b4d9SPekka Enberg return -1; 483865c675fSPrasad Joshi 484b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 485865c675fSPrasad Joshi if (nr < 0) 4860df6b4d9SPekka Enberg return -1; 487865c675fSPrasad Joshi 4880df6b4d9SPekka Enberg nr_written += nr; 489865c675fSPrasad Joshi buf += nr; 490865c675fSPrasad Joshi offset += nr; 491865c675fSPrasad Joshi } 4920df6b4d9SPekka Enberg 49372133dd2SAsias He return nr_written; 49486835cedSPrasad Joshi } 49586835cedSPrasad Joshi 496b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 497f10860caSPekka Enberg { 498f10860caSPekka Enberg /* I/O error */ 499b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 500f10860caSPekka Enberg return -1; 501f10860caSPekka Enberg } 502f10860caSPekka Enberg 503659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 504659f4186SPekka Enberg { 50573984b11SPekka Enberg struct qcow *q = disk->priv; 50673984b11SPekka Enberg struct qcow_header *header; 507*a4e46515SPekka Enberg struct list_head *pos, *n; 50873984b11SPekka Enberg struct qcow_table *table; 50973984b11SPekka Enberg 51073984b11SPekka Enberg header = q->header; 51173984b11SPekka Enberg table = &q->table; 51273984b11SPekka Enberg 513*a4e46515SPekka Enberg mutex_lock(&q->mutex); 514*a4e46515SPekka Enberg 515*a4e46515SPekka Enberg list_for_each_safe(pos, n, &q->lru_list) { 516*a4e46515SPekka Enberg struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list); 517*a4e46515SPekka Enberg 518*a4e46515SPekka Enberg if (qcow_l2_cache_write(q, c) < 0) 519*a4e46515SPekka Enberg goto error_unlock; 520*a4e46515SPekka Enberg } 521*a4e46515SPekka Enberg 522*a4e46515SPekka Enberg if (fdatasync(disk->fd) < 0) 523*a4e46515SPekka Enberg goto error_unlock; 524*a4e46515SPekka Enberg 52573984b11SPekka Enberg if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0) 526*a4e46515SPekka Enberg goto error_unlock; 527*a4e46515SPekka Enberg 528*a4e46515SPekka Enberg mutex_unlock(&q->mutex); 52973984b11SPekka Enberg 530659f4186SPekka Enberg return fsync(disk->fd); 531*a4e46515SPekka Enberg 532*a4e46515SPekka Enberg error_unlock: 533*a4e46515SPekka Enberg mutex_unlock(&q->mutex); 534*a4e46515SPekka Enberg return -1; 535659f4186SPekka Enberg } 536659f4186SPekka Enberg 537b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 53886835cedSPrasad Joshi { 53986835cedSPrasad Joshi struct qcow *q; 54086835cedSPrasad Joshi 54143835ac9SSasha Levin if (!disk) 54272133dd2SAsias He return 0; 54386835cedSPrasad Joshi 54443835ac9SSasha Levin q = disk->priv; 54586835cedSPrasad Joshi 5463309045fSPrasad Joshi free_cache(q); 5476c6f79b6SPrasad Joshi free(q->table.l1_table); 54886835cedSPrasad Joshi free(q->header); 54986835cedSPrasad Joshi free(q); 55072133dd2SAsias He 55172133dd2SAsias He return 0; 55286835cedSPrasad Joshi } 55386835cedSPrasad Joshi 554b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 555b1c84095SPekka Enberg .read_sector = qcow_read_sector, 556b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 557b1c84095SPekka Enberg .close = qcow_disk_close, 558f10860caSPekka Enberg }; 559f10860caSPekka Enberg 560b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 561b1c84095SPekka Enberg .read_sector = qcow_read_sector, 562b1c84095SPekka Enberg .write_sector = qcow_write_sector, 563659f4186SPekka Enberg .flush = qcow_disk_flush, 564b1c84095SPekka Enberg .close = qcow_disk_close, 56586835cedSPrasad Joshi }; 56686835cedSPrasad Joshi 56786835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 56886835cedSPrasad Joshi { 569ad627d62SPekka Enberg struct qcow_header *header = q->header; 57000adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 57186835cedSPrasad Joshi 572ad627d62SPekka Enberg table->table_size = header->l1_size; 57386835cedSPrasad Joshi 57400adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 57500adcc1bSPrasad Joshi if (!table->l1_table) 57686835cedSPrasad Joshi return -1; 57786835cedSPrasad Joshi 578659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 57986835cedSPrasad Joshi } 58086835cedSPrasad Joshi 581ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 58286835cedSPrasad Joshi { 583ad627d62SPekka Enberg struct qcow2_header_disk f_header; 584ad627d62SPekka Enberg struct qcow_header *header; 58586835cedSPrasad Joshi 586ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 58786835cedSPrasad Joshi if (!header) 58886835cedSPrasad Joshi return NULL; 58986835cedSPrasad Joshi 5900657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 5910657f33dSPrasad Joshi free(header); 59286835cedSPrasad Joshi return NULL; 5930657f33dSPrasad Joshi } 59486835cedSPrasad Joshi 595ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 596ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 597ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 598ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 599ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 600ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 601ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 602ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 603ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 604ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 605ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 606ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 607ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 608ad627d62SPekka Enberg 609ad627d62SPekka Enberg *header = (struct qcow_header) { 610ad627d62SPekka Enberg .size = f_header.size, 611ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 612ad627d62SPekka Enberg .l1_size = f_header.l1_size, 613ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 614ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 615ad627d62SPekka Enberg .oflag_mask = QCOW2_OFLAG_MASK, 616ad627d62SPekka Enberg }; 617ad627d62SPekka Enberg 618ad627d62SPekka Enberg return header; 619ad627d62SPekka Enberg } 620ad627d62SPekka Enberg 621f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 622ad627d62SPekka Enberg { 623ad627d62SPekka Enberg struct qcow *q; 624ad627d62SPekka Enberg struct qcow_header *h; 625ad627d62SPekka Enberg struct disk_image *disk_image; 626ad627d62SPekka Enberg 627ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 628ad627d62SPekka Enberg if (!q) 629ad627d62SPekka Enberg goto error; 630ad627d62SPekka Enberg 631c0799eb9SPekka Enberg mutex_init(&q->mutex); 632ad627d62SPekka Enberg q->fd = fd; 6333309045fSPrasad Joshi q->root = RB_ROOT; 6343309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 635ad627d62SPekka Enberg 636ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 637ad627d62SPekka Enberg if (!h) 638ad627d62SPekka Enberg goto error; 639ad627d62SPekka Enberg 640ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 641ad627d62SPekka Enberg goto error; 642ad627d62SPekka Enberg 6437d22135fSAsias He /* 6447d22135fSAsias He * Do not use mmap use read/write instead 6457d22135fSAsias He */ 646f10860caSPekka Enberg if (readonly) 647b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 648f10860caSPekka Enberg else 649b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 650f10860caSPekka Enberg 651ad627d62SPekka Enberg if (!disk_image) 652ad627d62SPekka Enberg goto error; 653ad627d62SPekka Enberg disk_image->priv = q; 654ad627d62SPekka Enberg 655ad627d62SPekka Enberg return disk_image; 656ad627d62SPekka Enberg error: 657ad627d62SPekka Enberg if (!q) 658ad627d62SPekka Enberg return NULL; 659ad627d62SPekka Enberg 660ad627d62SPekka Enberg free(q->table.l1_table); 661ad627d62SPekka Enberg free(q->header); 662ad627d62SPekka Enberg free(q); 663ad627d62SPekka Enberg 664ad627d62SPekka Enberg return NULL; 665ad627d62SPekka Enberg } 666ad627d62SPekka Enberg 667ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 668ad627d62SPekka Enberg { 669ad627d62SPekka Enberg struct qcow2_header_disk f_header; 670ad627d62SPekka Enberg 671ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 672ad627d62SPekka Enberg return false; 673ad627d62SPekka Enberg 674ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 675ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 676ad627d62SPekka Enberg 677ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 678ad627d62SPekka Enberg return false; 679ad627d62SPekka Enberg 680ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 681ad627d62SPekka Enberg return false; 682ad627d62SPekka Enberg 683ad627d62SPekka Enberg return true; 684ad627d62SPekka Enberg } 685ad627d62SPekka Enberg 686ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 687ad627d62SPekka Enberg { 688ad627d62SPekka Enberg struct qcow1_header_disk f_header; 689ad627d62SPekka Enberg struct qcow_header *header; 690ad627d62SPekka Enberg 691ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 692ad627d62SPekka Enberg if (!header) 693ad627d62SPekka Enberg return NULL; 694ad627d62SPekka Enberg 695d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 696d39cefd2SSasha Levin free(header); 697ad627d62SPekka Enberg return NULL; 698d39cefd2SSasha Levin } 699ad627d62SPekka Enberg 700ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 701ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 702ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 703ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 704ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 705ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 706ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 707ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 708ad627d62SPekka Enberg 709ad627d62SPekka Enberg *header = (struct qcow_header) { 710ad627d62SPekka Enberg .size = f_header.size, 711ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 712ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 713ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 714ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 715ad627d62SPekka Enberg .oflag_mask = QCOW1_OFLAG_MASK, 716ad627d62SPekka Enberg }; 71786835cedSPrasad Joshi 71886835cedSPrasad Joshi return header; 71986835cedSPrasad Joshi } 72086835cedSPrasad Joshi 721f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 72286835cedSPrasad Joshi { 72386835cedSPrasad Joshi struct qcow *q; 724ad627d62SPekka Enberg struct qcow_header *h; 72586835cedSPrasad Joshi struct disk_image *disk_image; 72686835cedSPrasad Joshi 72786835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 72886835cedSPrasad Joshi if (!q) 72986835cedSPrasad Joshi goto error; 73086835cedSPrasad Joshi 731c0799eb9SPekka Enberg mutex_init(&q->mutex); 73286835cedSPrasad Joshi q->fd = fd; 7333309045fSPrasad Joshi q->root = RB_ROOT; 7343309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 73586835cedSPrasad Joshi 73686835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 73786835cedSPrasad Joshi if (!h) 73886835cedSPrasad Joshi goto error; 73986835cedSPrasad Joshi 74086835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 74186835cedSPrasad Joshi goto error; 74286835cedSPrasad Joshi 7437d22135fSAsias He /* 7447d22135fSAsias He * Do not use mmap use read/write instead 7457d22135fSAsias He */ 746f10860caSPekka Enberg if (readonly) 747b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 748f10860caSPekka Enberg else 749b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 750f10860caSPekka Enberg 75186835cedSPrasad Joshi if (!disk_image) 75286835cedSPrasad Joshi goto error; 75386835cedSPrasad Joshi disk_image->priv = q; 75486835cedSPrasad Joshi 75586835cedSPrasad Joshi return disk_image; 75686835cedSPrasad Joshi error: 75786835cedSPrasad Joshi if (!q) 75886835cedSPrasad Joshi return NULL; 75986835cedSPrasad Joshi 7606c6f79b6SPrasad Joshi free(q->table.l1_table); 76186835cedSPrasad Joshi free(q->header); 76286835cedSPrasad Joshi free(q); 76386835cedSPrasad Joshi 76486835cedSPrasad Joshi return NULL; 76586835cedSPrasad Joshi } 76686835cedSPrasad Joshi 767ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 76886835cedSPrasad Joshi { 769ad627d62SPekka Enberg struct qcow1_header_disk f_header; 77086835cedSPrasad Joshi 771ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 772ad627d62SPekka Enberg return false; 77386835cedSPrasad Joshi 774ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 775ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 77686835cedSPrasad Joshi 777ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 778ad627d62SPekka Enberg return false; 77986835cedSPrasad Joshi 780ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 781ad627d62SPekka Enberg return false; 78286835cedSPrasad Joshi 783ad627d62SPekka Enberg return true; 78486835cedSPrasad Joshi } 78586835cedSPrasad Joshi 786f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 78786835cedSPrasad Joshi { 788ad627d62SPekka Enberg if (qcow1_check_image(fd)) 789f10860caSPekka Enberg return qcow1_probe(fd, readonly); 790ad627d62SPekka Enberg 791ad627d62SPekka Enberg if (qcow2_check_image(fd)) 792f10860caSPekka Enberg return qcow2_probe(fd, readonly); 793ad627d62SPekka Enberg 794ad627d62SPekka Enberg return NULL; 79586835cedSPrasad Joshi } 796