186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 5c0799eb9SPekka Enberg #include "kvm/mutex.h" 686835cedSPrasad Joshi #include "kvm/util.h" 786835cedSPrasad Joshi 886835cedSPrasad Joshi #include <sys/types.h> 986835cedSPrasad Joshi #include <sys/stat.h> 1086835cedSPrasad Joshi #include <stdbool.h> 1186835cedSPrasad Joshi #include <stdlib.h> 1286835cedSPrasad Joshi #include <string.h> 1386835cedSPrasad Joshi #include <unistd.h> 1486835cedSPrasad Joshi #include <fcntl.h> 1586835cedSPrasad Joshi 1686835cedSPrasad Joshi #include <linux/byteorder.h> 17865c675fSPrasad Joshi #include <linux/kernel.h> 180df6b4d9SPekka Enberg #include <linux/types.h> 1986835cedSPrasad Joshi 20473d58ffSPekka Enberg static int insert(struct rb_root *root, struct qcow_l2_table *new) 213309045fSPrasad Joshi { 223309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 233309045fSPrasad Joshi u64 offset = new->offset; 243309045fSPrasad Joshi 253309045fSPrasad Joshi /* search the tree */ 263309045fSPrasad Joshi while (*link) { 27473d58ffSPekka Enberg struct qcow_l2_table *t; 283309045fSPrasad Joshi 29473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 303309045fSPrasad Joshi if (!t) 313309045fSPrasad Joshi goto error; 323309045fSPrasad Joshi 333309045fSPrasad Joshi parent = *link; 343309045fSPrasad Joshi 353309045fSPrasad Joshi if (t->offset > offset) 363309045fSPrasad Joshi link = &(*link)->rb_left; 373309045fSPrasad Joshi else if (t->offset < offset) 383309045fSPrasad Joshi link = &(*link)->rb_right; 393309045fSPrasad Joshi else 403309045fSPrasad Joshi goto out; 413309045fSPrasad Joshi } 423309045fSPrasad Joshi 433309045fSPrasad Joshi /* add new node */ 443309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 453309045fSPrasad Joshi rb_insert_color(&new->node, root); 463309045fSPrasad Joshi out: 473309045fSPrasad Joshi return 0; 483309045fSPrasad Joshi error: 493309045fSPrasad Joshi return -1; 503309045fSPrasad Joshi } 513309045fSPrasad Joshi 52473d58ffSPekka Enberg static struct qcow_l2_table *search(struct rb_root *root, u64 offset) 533309045fSPrasad Joshi { 543309045fSPrasad Joshi struct rb_node *link = root->rb_node; 553309045fSPrasad Joshi 563309045fSPrasad Joshi while (link) { 57473d58ffSPekka Enberg struct qcow_l2_table *t; 583309045fSPrasad Joshi 59473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 603309045fSPrasad Joshi if (!t) 613309045fSPrasad Joshi goto out; 623309045fSPrasad Joshi 633309045fSPrasad Joshi if (t->offset > offset) 643309045fSPrasad Joshi link = link->rb_left; 653309045fSPrasad Joshi else if (t->offset < offset) 663309045fSPrasad Joshi link = link->rb_right; 673309045fSPrasad Joshi else 683309045fSPrasad Joshi return t; 693309045fSPrasad Joshi } 703309045fSPrasad Joshi out: 713309045fSPrasad Joshi return NULL; 723309045fSPrasad Joshi } 733309045fSPrasad Joshi 743309045fSPrasad Joshi static void free_cache(struct qcow *q) 753309045fSPrasad Joshi { 763309045fSPrasad Joshi struct list_head *pos, *n; 77473d58ffSPekka Enberg struct qcow_l2_table *t; 783309045fSPrasad Joshi struct rb_root *r = &q->root; 793309045fSPrasad Joshi 803309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 813309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 823309045fSPrasad Joshi list_del(pos); 83473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 843309045fSPrasad Joshi rb_erase(&t->node, r); 853309045fSPrasad Joshi 863309045fSPrasad Joshi /* Free the cached node */ 873309045fSPrasad Joshi free(t); 883309045fSPrasad Joshi } 893309045fSPrasad Joshi } 903309045fSPrasad Joshi 91473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 923309045fSPrasad Joshi { 933309045fSPrasad Joshi struct rb_root *r = &q->root; 94473d58ffSPekka Enberg struct qcow_l2_table *lru; 953309045fSPrasad Joshi 963309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 973309045fSPrasad Joshi /* 983309045fSPrasad Joshi * The node at the head of the list is least recently used 993309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 1003309045fSPrasad Joshi */ 101473d58ffSPekka Enberg lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); 1023309045fSPrasad Joshi 1033309045fSPrasad Joshi /* Remove the node from the cache */ 1043309045fSPrasad Joshi rb_erase(&lru->node, r); 1053309045fSPrasad Joshi list_del_init(&lru->list); 1063309045fSPrasad Joshi q->nr_cached--; 1073309045fSPrasad Joshi 1083309045fSPrasad Joshi /* Free the LRUed node */ 1093309045fSPrasad Joshi free(lru); 1103309045fSPrasad Joshi } 1113309045fSPrasad Joshi 1123309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 1133309045fSPrasad Joshi if (insert(r, c) < 0) 1143309045fSPrasad Joshi goto error; 1153309045fSPrasad Joshi 1163309045fSPrasad Joshi /* Add in LRU replacement list */ 1173309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 1183309045fSPrasad Joshi q->nr_cached++; 1193309045fSPrasad Joshi 1203309045fSPrasad Joshi return 0; 1213309045fSPrasad Joshi error: 1223309045fSPrasad Joshi return -1; 1233309045fSPrasad Joshi } 1243309045fSPrasad Joshi 125fe8bdde0SPekka Enberg static struct qcow_l2_table *search_table(struct qcow *q, u64 offset) 1263309045fSPrasad Joshi { 127fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1283309045fSPrasad Joshi 129fe8bdde0SPekka Enberg l2t = search(&q->root, offset); 130fe8bdde0SPekka Enberg if (!l2t) 131fe8bdde0SPekka Enberg return NULL; 1323309045fSPrasad Joshi 1333309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 134fe8bdde0SPekka Enberg list_move_tail(&l2t->list, &q->lru_list); 1353309045fSPrasad Joshi 136fe8bdde0SPekka Enberg return l2t; 1373309045fSPrasad Joshi } 1383309045fSPrasad Joshi 1393309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 140473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1413309045fSPrasad Joshi { 1423309045fSPrasad Joshi struct qcow_header *header = q->header; 143473d58ffSPekka Enberg struct qcow_l2_table *c; 1443309045fSPrasad Joshi u64 l2t_sz; 1453309045fSPrasad Joshi u64 size; 1463309045fSPrasad Joshi 1473309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1483309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1493309045fSPrasad Joshi c = calloc(1, size); 1503309045fSPrasad Joshi if (!c) 1513309045fSPrasad Joshi goto out; 1523309045fSPrasad Joshi 1533309045fSPrasad Joshi c->offset = offset; 1543309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1553309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1563309045fSPrasad Joshi out: 1573309045fSPrasad Joshi return c; 1583309045fSPrasad Joshi } 1593309045fSPrasad Joshi 160742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 16186835cedSPrasad Joshi { 162ad627d62SPekka Enberg struct qcow_header *header = q->header; 16386835cedSPrasad Joshi 16486835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 16586835cedSPrasad Joshi } 16686835cedSPrasad Joshi 167742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 16886835cedSPrasad Joshi { 169ad627d62SPekka Enberg struct qcow_header *header = q->header; 17086835cedSPrasad Joshi 17186835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 17286835cedSPrasad Joshi } 17386835cedSPrasad Joshi 174742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 17586835cedSPrasad Joshi { 176ad627d62SPekka Enberg struct qcow_header *header = q->header; 17786835cedSPrasad Joshi 17886835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 17986835cedSPrasad Joshi } 18086835cedSPrasad Joshi 181fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 1823309045fSPrasad Joshi { 1833309045fSPrasad Joshi struct qcow_header *header = q->header; 184fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1853309045fSPrasad Joshi u64 size; 1863309045fSPrasad Joshi u64 i; 1873309045fSPrasad Joshi 1883309045fSPrasad Joshi size = 1 << header->l2_bits; 1893309045fSPrasad Joshi 1903309045fSPrasad Joshi /* search an entry for offset in cache */ 191fe8bdde0SPekka Enberg l2t = search_table(q, offset); 192fe8bdde0SPekka Enberg if (l2t) 193fe8bdde0SPekka Enberg return l2t; 1943309045fSPrasad Joshi 1953309045fSPrasad Joshi /* allocate new node for caching l2 table */ 196fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 197fe8bdde0SPekka Enberg if (!l2t) 1983309045fSPrasad Joshi goto error; 1993309045fSPrasad Joshi 2003309045fSPrasad Joshi /* table not cached: read from the disk */ 201fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2023309045fSPrasad Joshi goto error; 2033309045fSPrasad Joshi 2043309045fSPrasad Joshi /* cache the table */ 205fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2063309045fSPrasad Joshi goto error; 2073309045fSPrasad Joshi 2083309045fSPrasad Joshi /* change cached table to CPU's byte-order */ 2093309045fSPrasad Joshi for (i = 0; i < size; i++) 210fe8bdde0SPekka Enberg be64_to_cpus(&l2t->table[i]); 2113309045fSPrasad Joshi 212fe8bdde0SPekka Enberg return l2t; 2133309045fSPrasad Joshi error: 214fe8bdde0SPekka Enberg free(l2t); 215fe8bdde0SPekka Enberg return NULL; 2163309045fSPrasad Joshi } 2173309045fSPrasad Joshi 218b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 21986835cedSPrasad Joshi { 220ad627d62SPekka Enberg struct qcow_header *header = q->header; 2213dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 222fe8bdde0SPekka Enberg struct qcow_l2_table *l2_table; 223742fce76SPrasad Joshi u64 l2_table_offset; 224742fce76SPrasad Joshi u64 l2_table_size; 2253dac48d4SPrasad Joshi u64 cluster_size; 226742fce76SPrasad Joshi u64 clust_offset; 227742fce76SPrasad Joshi u64 clust_start; 228a51948ceSPekka Enberg size_t length; 229742fce76SPrasad Joshi u64 l1_idx; 230742fce76SPrasad Joshi u64 l2_idx; 23186835cedSPrasad Joshi 232dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 23386835cedSPrasad Joshi 234c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2353dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 236c0799eb9SPekka Enberg return -1; 23786835cedSPrasad Joshi 2383dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2393dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 240c0799eb9SPekka Enberg return -1; 2413dac48d4SPrasad Joshi 2423dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2433dac48d4SPrasad Joshi if (length > dst_len) 2443dac48d4SPrasad Joshi length = dst_len; 2453dac48d4SPrasad Joshi 246c0799eb9SPekka Enberg mutex_lock(&q->mutex); 247659f4186SPekka Enberg l2_table_offset = be64_to_cpu(table->l1_table[l1_idx]) & ~header->oflag_mask; 24886835cedSPrasad Joshi if (!l2_table_offset) 2493dac48d4SPrasad Joshi goto zero_cluster; 25086835cedSPrasad Joshi 25186835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 25286835cedSPrasad Joshi 2533309045fSPrasad Joshi /* read and cache level 2 table */ 254fe8bdde0SPekka Enberg l2_table = qcow_read_l2_table(q, l2_table_offset); 255fe8bdde0SPekka Enberg if (!l2_table) 256b6edb0ecSSasha Levin goto out_error; 25786835cedSPrasad Joshi 258c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 25986835cedSPrasad Joshi if (l2_idx >= l2_table_size) 260b6edb0ecSSasha Levin goto out_error; 26186835cedSPrasad Joshi 262fe8bdde0SPekka Enberg clust_start = l2_table->table[l2_idx] & ~header->oflag_mask; 26386835cedSPrasad Joshi if (!clust_start) 2643dac48d4SPrasad Joshi goto zero_cluster; 26586835cedSPrasad Joshi 266c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 26786835cedSPrasad Joshi 268c0799eb9SPekka Enberg if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 269c0799eb9SPekka Enberg return -1; 270c0799eb9SPekka Enberg 2713dac48d4SPrasad Joshi return length; 27286835cedSPrasad Joshi 273179b71f0SPekka Enberg zero_cluster: 274c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 275179b71f0SPekka Enberg memset(dst, 0, length); 276c0799eb9SPekka Enberg return length; 277179b71f0SPekka Enberg 27886835cedSPrasad Joshi out_error: 279c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 280179b71f0SPekka Enberg length = -1; 281c0799eb9SPekka Enberg return -1; 2823dac48d4SPrasad Joshi } 283b6edb0ecSSasha Levin 284b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 2853dac48d4SPrasad Joshi { 28643835ac9SSasha Levin struct qcow *q = disk->priv; 287ad627d62SPekka Enberg struct qcow_header *header = q->header; 288d8eea993SPekka Enberg u32 nr_read; 2890df6b4d9SPekka Enberg u64 offset; 2900df6b4d9SPekka Enberg char *buf; 2913dac48d4SPrasad Joshi u32 nr; 2923dac48d4SPrasad Joshi 2930df6b4d9SPekka Enberg buf = dst; 294d8eea993SPekka Enberg nr_read = 0; 2950df6b4d9SPekka Enberg 296d8eea993SPekka Enberg while (nr_read < dst_len) { 2973dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 2983dac48d4SPrasad Joshi if (offset >= header->size) 2990df6b4d9SPekka Enberg return -1; 3003dac48d4SPrasad Joshi 301b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 302a51948ceSPekka Enberg if (nr <= 0) 3030df6b4d9SPekka Enberg return -1; 3043dac48d4SPrasad Joshi 305d8eea993SPekka Enberg nr_read += nr; 3063dac48d4SPrasad Joshi buf += nr; 3073dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3083dac48d4SPrasad Joshi } 3090df6b4d9SPekka Enberg 31072133dd2SAsias He return dst_len; 31186835cedSPrasad Joshi } 31286835cedSPrasad Joshi 313865c675fSPrasad Joshi static inline u64 file_size(int fd) 314865c675fSPrasad Joshi { 315865c675fSPrasad Joshi struct stat st; 3160df6b4d9SPekka Enberg 317865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 318865c675fSPrasad Joshi return 0; 3190df6b4d9SPekka Enberg 320865c675fSPrasad Joshi return st.st_size; 321865c675fSPrasad Joshi } 322865c675fSPrasad Joshi 3230df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 324865c675fSPrasad Joshi { 325865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 326865c675fSPrasad Joshi return -1; 3270df6b4d9SPekka Enberg 3287d94a719SPekka Enberg return fdatasync(fd); 329865c675fSPrasad Joshi } 330865c675fSPrasad Joshi 331865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 332b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 333865c675fSPrasad Joshi { 334865c675fSPrasad Joshi struct qcow_header *header = q->header; 335865c675fSPrasad Joshi u64 clust_sz; 336865c675fSPrasad Joshi u64 f_sz; 3370df6b4d9SPekka Enberg u64 off; 3380df6b4d9SPekka Enberg u64 sz; 339865c675fSPrasad Joshi 340865c675fSPrasad Joshi f_sz = file_size(q->fd); 341865c675fSPrasad Joshi if (!f_sz) 342865c675fSPrasad Joshi return 0; 343865c675fSPrasad Joshi 344865c675fSPrasad Joshi sz = 1 << header->l2_bits; 345865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 346865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 347865c675fSPrasad Joshi 3480df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, table, sz * sizeof(u64), off) < 0) 349865c675fSPrasad Joshi return 0; 3500df6b4d9SPekka Enberg 351865c675fSPrasad Joshi return off; 352865c675fSPrasad Joshi } 353865c675fSPrasad Joshi 354865c675fSPrasad Joshi /* 355865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 356865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3570df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 358865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 359865c675fSPrasad Joshi * 360865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 361865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 362865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 363865c675fSPrasad Joshi */ 364b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 365865c675fSPrasad Joshi { 366865c675fSPrasad Joshi struct qcow_header *header = q->header; 367865c675fSPrasad Joshi struct qcow_table *table = &q->table; 368fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 3690df6b4d9SPekka Enberg bool update_meta; 3700df6b4d9SPekka Enberg u64 clust_start; 3710df6b4d9SPekka Enberg u64 clust_off; 372865c675fSPrasad Joshi u64 clust_sz; 373865c675fSPrasad Joshi u64 l1t_idx; 374865c675fSPrasad Joshi u64 l2t_idx; 3750df6b4d9SPekka Enberg u64 l2t_off; 3760df6b4d9SPekka Enberg u64 l2t_sz; 377865c675fSPrasad Joshi u64 f_sz; 3780df6b4d9SPekka Enberg u64 len; 379865c675fSPrasad Joshi u64 t; 380865c675fSPrasad Joshi 381fe8bdde0SPekka Enberg l2t = NULL; 382865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 383865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 384865c675fSPrasad Joshi 385865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 386865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 387c0799eb9SPekka Enberg return -1; 388865c675fSPrasad Joshi 389865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 390865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 391c0799eb9SPekka Enberg return -1; 392865c675fSPrasad Joshi 393865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 394865c675fSPrasad Joshi if (clust_off >= clust_sz) 395c0799eb9SPekka Enberg return -1; 396865c675fSPrasad Joshi 397865c675fSPrasad Joshi len = clust_sz - clust_off; 398865c675fSPrasad Joshi if (len > src_len) 399865c675fSPrasad Joshi len = src_len; 400865c675fSPrasad Joshi 401c0799eb9SPekka Enberg mutex_lock(&q->mutex); 402c0799eb9SPekka Enberg 403659f4186SPekka Enberg l2t_off = be64_to_cpu(table->l1_table[l1t_idx]) & ~header->oflag_mask; 404865c675fSPrasad Joshi if (l2t_off) { 4053309045fSPrasad Joshi /* read and cache l2 table */ 406fe8bdde0SPekka Enberg l2t = qcow_read_l2_table(q, l2t_off); 407fe8bdde0SPekka Enberg if (!l2t) 4083309045fSPrasad Joshi goto error; 409865c675fSPrasad Joshi } else { 410fe8bdde0SPekka Enberg l2t = new_cache_table(q, l2t_off); 411fe8bdde0SPekka Enberg if (!l2t) 4123309045fSPrasad Joshi goto error; 4133309045fSPrasad Joshi 4140df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 415865c675fSPrasad Joshi f_sz = file_size(q->fd); 416865c675fSPrasad Joshi if (!f_sz) 4173309045fSPrasad Joshi goto free_cache; 418865c675fSPrasad Joshi 419865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 420fe8bdde0SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t->table); 421865c675fSPrasad Joshi if (!l2t_off) 4223309045fSPrasad Joshi goto free_cache; 423865c675fSPrasad Joshi 424fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 4253309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4263309045fSPrasad Joshi goto free_cache; 4273309045fSPrasad Joshi 4283309045fSPrasad Joshi goto free_cache; 429865c675fSPrasad Joshi } 430865c675fSPrasad Joshi 4310df6b4d9SPekka Enberg /* Update the in-core entry */ 432659f4186SPekka Enberg table->l1_table[l1t_idx] = cpu_to_be64(l2t_off); 433865c675fSPrasad Joshi } 434865c675fSPrasad Joshi 4350df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 436865c675fSPrasad Joshi f_sz = file_size(q->fd); 437865c675fSPrasad Joshi if (!f_sz) 4383309045fSPrasad Joshi goto error; 439865c675fSPrasad Joshi 440fe8bdde0SPekka Enberg clust_start = l2t->table[l2t_idx] & ~header->oflag_mask; 441865c675fSPrasad Joshi if (!clust_start) { 442865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 443865c675fSPrasad Joshi update_meta = true; 4440df6b4d9SPekka Enberg } else 4450df6b4d9SPekka Enberg update_meta = false; 446865c675fSPrasad Joshi 4470df6b4d9SPekka Enberg /* Write actual data */ 448865c675fSPrasad Joshi if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 449865c675fSPrasad Joshi goto error; 450865c675fSPrasad Joshi 451865c675fSPrasad Joshi if (update_meta) { 452865c675fSPrasad Joshi t = cpu_to_be64(clust_start); 4530df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, &t, sizeof(t), l2t_off + l2t_idx * sizeof(u64)) < 0) { 4540df6b4d9SPekka Enberg /* Restore the file to consistent state */ 455865c675fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 456865c675fSPrasad Joshi goto error; 4570df6b4d9SPekka Enberg 458865c675fSPrasad Joshi goto error; 459865c675fSPrasad Joshi } 4603309045fSPrasad Joshi 4613309045fSPrasad Joshi /* Update the cached level2 entry */ 462fe8bdde0SPekka Enberg l2t->table[l2t_idx] = clust_start; 463865c675fSPrasad Joshi } 4640df6b4d9SPekka Enberg 465c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 466c0799eb9SPekka Enberg 467865c675fSPrasad Joshi return len; 4683309045fSPrasad Joshi 4693309045fSPrasad Joshi free_cache: 470fe8bdde0SPekka Enberg free(l2t); 471865c675fSPrasad Joshi error: 472c0799eb9SPekka Enberg mutex_unlock(&q->mutex); 473865c675fSPrasad Joshi return -1; 474865c675fSPrasad Joshi } 475865c675fSPrasad Joshi 476b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 47786835cedSPrasad Joshi { 478865c675fSPrasad Joshi struct qcow *q = disk->priv; 479865c675fSPrasad Joshi struct qcow_header *header = q->header; 480c4acb611SIngo Molnar u32 nr_written; 4810df6b4d9SPekka Enberg char *buf; 482865c675fSPrasad Joshi u64 offset; 483865c675fSPrasad Joshi ssize_t nr; 484865c675fSPrasad Joshi 4850df6b4d9SPekka Enberg buf = src; 4860df6b4d9SPekka Enberg nr_written = 0; 487865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 4880df6b4d9SPekka Enberg 4890df6b4d9SPekka Enberg while (nr_written < src_len) { 490865c675fSPrasad Joshi if (offset >= header->size) 4910df6b4d9SPekka Enberg return -1; 492865c675fSPrasad Joshi 493b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 494865c675fSPrasad Joshi if (nr < 0) 4950df6b4d9SPekka Enberg return -1; 496865c675fSPrasad Joshi 4970df6b4d9SPekka Enberg nr_written += nr; 498865c675fSPrasad Joshi buf += nr; 499865c675fSPrasad Joshi offset += nr; 500865c675fSPrasad Joshi } 5010df6b4d9SPekka Enberg 50272133dd2SAsias He return nr_written; 50386835cedSPrasad Joshi } 50486835cedSPrasad Joshi 505b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 506f10860caSPekka Enberg { 507f10860caSPekka Enberg /* I/O error */ 508b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 509f10860caSPekka Enberg return -1; 510f10860caSPekka Enberg } 511f10860caSPekka Enberg 512659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk) 513659f4186SPekka Enberg { 514*73984b11SPekka Enberg struct qcow *q = disk->priv; 515*73984b11SPekka Enberg struct qcow_header *header; 516*73984b11SPekka Enberg struct qcow_table *table; 517*73984b11SPekka Enberg 518*73984b11SPekka Enberg if (fdatasync(disk->fd) < 0) 519*73984b11SPekka Enberg return -1; 520*73984b11SPekka Enberg 521*73984b11SPekka Enberg header = q->header; 522*73984b11SPekka Enberg table = &q->table; 523*73984b11SPekka Enberg 524*73984b11SPekka Enberg if (pwrite_in_full(disk->fd, table->l1_table, table->table_size * sizeof(u64), header->l1_table_offset) < 0) 525*73984b11SPekka Enberg return -1; 526*73984b11SPekka Enberg 527659f4186SPekka Enberg return fsync(disk->fd); 528659f4186SPekka Enberg } 529659f4186SPekka Enberg 530b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 53186835cedSPrasad Joshi { 53286835cedSPrasad Joshi struct qcow *q; 53386835cedSPrasad Joshi 53443835ac9SSasha Levin if (!disk) 53572133dd2SAsias He return 0; 53686835cedSPrasad Joshi 53743835ac9SSasha Levin q = disk->priv; 53886835cedSPrasad Joshi 5393309045fSPrasad Joshi free_cache(q); 5406c6f79b6SPrasad Joshi free(q->table.l1_table); 54186835cedSPrasad Joshi free(q->header); 54286835cedSPrasad Joshi free(q); 54372133dd2SAsias He 54472133dd2SAsias He return 0; 54586835cedSPrasad Joshi } 54686835cedSPrasad Joshi 547b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 548b1c84095SPekka Enberg .read_sector = qcow_read_sector, 549b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 550b1c84095SPekka Enberg .close = qcow_disk_close, 551f10860caSPekka Enberg }; 552f10860caSPekka Enberg 553b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 554b1c84095SPekka Enberg .read_sector = qcow_read_sector, 555b1c84095SPekka Enberg .write_sector = qcow_write_sector, 556659f4186SPekka Enberg .flush = qcow_disk_flush, 557b1c84095SPekka Enberg .close = qcow_disk_close, 55886835cedSPrasad Joshi }; 55986835cedSPrasad Joshi 56086835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 56186835cedSPrasad Joshi { 562ad627d62SPekka Enberg struct qcow_header *header = q->header; 56300adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 56486835cedSPrasad Joshi 565ad627d62SPekka Enberg table->table_size = header->l1_size; 56686835cedSPrasad Joshi 56700adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 56800adcc1bSPrasad Joshi if (!table->l1_table) 56986835cedSPrasad Joshi return -1; 57086835cedSPrasad Joshi 571659f4186SPekka Enberg return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset); 57286835cedSPrasad Joshi } 57386835cedSPrasad Joshi 574ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 57586835cedSPrasad Joshi { 576ad627d62SPekka Enberg struct qcow2_header_disk f_header; 577ad627d62SPekka Enberg struct qcow_header *header; 57886835cedSPrasad Joshi 579ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 58086835cedSPrasad Joshi if (!header) 58186835cedSPrasad Joshi return NULL; 58286835cedSPrasad Joshi 5830657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 5840657f33dSPrasad Joshi free(header); 58586835cedSPrasad Joshi return NULL; 5860657f33dSPrasad Joshi } 58786835cedSPrasad Joshi 588ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 589ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 590ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 591ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 592ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 593ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 594ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 595ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 596ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 597ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 598ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 599ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 600ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 601ad627d62SPekka Enberg 602ad627d62SPekka Enberg *header = (struct qcow_header) { 603ad627d62SPekka Enberg .size = f_header.size, 604ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 605ad627d62SPekka Enberg .l1_size = f_header.l1_size, 606ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 607ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 608ad627d62SPekka Enberg .oflag_mask = QCOW2_OFLAG_MASK, 609ad627d62SPekka Enberg }; 610ad627d62SPekka Enberg 611ad627d62SPekka Enberg return header; 612ad627d62SPekka Enberg } 613ad627d62SPekka Enberg 614f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 615ad627d62SPekka Enberg { 616ad627d62SPekka Enberg struct qcow *q; 617ad627d62SPekka Enberg struct qcow_header *h; 618ad627d62SPekka Enberg struct disk_image *disk_image; 619ad627d62SPekka Enberg 620ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 621ad627d62SPekka Enberg if (!q) 622ad627d62SPekka Enberg goto error; 623ad627d62SPekka Enberg 624c0799eb9SPekka Enberg mutex_init(&q->mutex); 625ad627d62SPekka Enberg q->fd = fd; 6263309045fSPrasad Joshi q->root = RB_ROOT; 6273309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 628ad627d62SPekka Enberg 629ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 630ad627d62SPekka Enberg if (!h) 631ad627d62SPekka Enberg goto error; 632ad627d62SPekka Enberg 633ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 634ad627d62SPekka Enberg goto error; 635ad627d62SPekka Enberg 6367d22135fSAsias He /* 6377d22135fSAsias He * Do not use mmap use read/write instead 6387d22135fSAsias He */ 639f10860caSPekka Enberg if (readonly) 640b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 641f10860caSPekka Enberg else 642b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 643f10860caSPekka Enberg 644ad627d62SPekka Enberg if (!disk_image) 645ad627d62SPekka Enberg goto error; 646ad627d62SPekka Enberg disk_image->priv = q; 647ad627d62SPekka Enberg 648ad627d62SPekka Enberg return disk_image; 649ad627d62SPekka Enberg error: 650ad627d62SPekka Enberg if (!q) 651ad627d62SPekka Enberg return NULL; 652ad627d62SPekka Enberg 653ad627d62SPekka Enberg free(q->table.l1_table); 654ad627d62SPekka Enberg free(q->header); 655ad627d62SPekka Enberg free(q); 656ad627d62SPekka Enberg 657ad627d62SPekka Enberg return NULL; 658ad627d62SPekka Enberg } 659ad627d62SPekka Enberg 660ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 661ad627d62SPekka Enberg { 662ad627d62SPekka Enberg struct qcow2_header_disk f_header; 663ad627d62SPekka Enberg 664ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 665ad627d62SPekka Enberg return false; 666ad627d62SPekka Enberg 667ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 668ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 669ad627d62SPekka Enberg 670ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 671ad627d62SPekka Enberg return false; 672ad627d62SPekka Enberg 673ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 674ad627d62SPekka Enberg return false; 675ad627d62SPekka Enberg 676ad627d62SPekka Enberg return true; 677ad627d62SPekka Enberg } 678ad627d62SPekka Enberg 679ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 680ad627d62SPekka Enberg { 681ad627d62SPekka Enberg struct qcow1_header_disk f_header; 682ad627d62SPekka Enberg struct qcow_header *header; 683ad627d62SPekka Enberg 684ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 685ad627d62SPekka Enberg if (!header) 686ad627d62SPekka Enberg return NULL; 687ad627d62SPekka Enberg 688d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 689d39cefd2SSasha Levin free(header); 690ad627d62SPekka Enberg return NULL; 691d39cefd2SSasha Levin } 692ad627d62SPekka Enberg 693ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 694ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 695ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 696ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 697ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 698ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 699ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 700ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 701ad627d62SPekka Enberg 702ad627d62SPekka Enberg *header = (struct qcow_header) { 703ad627d62SPekka Enberg .size = f_header.size, 704ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 705ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 706ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 707ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 708ad627d62SPekka Enberg .oflag_mask = QCOW1_OFLAG_MASK, 709ad627d62SPekka Enberg }; 71086835cedSPrasad Joshi 71186835cedSPrasad Joshi return header; 71286835cedSPrasad Joshi } 71386835cedSPrasad Joshi 714f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 71586835cedSPrasad Joshi { 71686835cedSPrasad Joshi struct qcow *q; 717ad627d62SPekka Enberg struct qcow_header *h; 71886835cedSPrasad Joshi struct disk_image *disk_image; 71986835cedSPrasad Joshi 72086835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 72186835cedSPrasad Joshi if (!q) 72286835cedSPrasad Joshi goto error; 72386835cedSPrasad Joshi 724c0799eb9SPekka Enberg mutex_init(&q->mutex); 72586835cedSPrasad Joshi q->fd = fd; 7263309045fSPrasad Joshi q->root = RB_ROOT; 7273309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 72886835cedSPrasad Joshi 72986835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 73086835cedSPrasad Joshi if (!h) 73186835cedSPrasad Joshi goto error; 73286835cedSPrasad Joshi 73386835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 73486835cedSPrasad Joshi goto error; 73586835cedSPrasad Joshi 7367d22135fSAsias He /* 7377d22135fSAsias He * Do not use mmap use read/write instead 7387d22135fSAsias He */ 739f10860caSPekka Enberg if (readonly) 740b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 741f10860caSPekka Enberg else 742b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 743f10860caSPekka Enberg 74486835cedSPrasad Joshi if (!disk_image) 74586835cedSPrasad Joshi goto error; 74686835cedSPrasad Joshi disk_image->priv = q; 74786835cedSPrasad Joshi 74886835cedSPrasad Joshi return disk_image; 74986835cedSPrasad Joshi error: 75086835cedSPrasad Joshi if (!q) 75186835cedSPrasad Joshi return NULL; 75286835cedSPrasad Joshi 7536c6f79b6SPrasad Joshi free(q->table.l1_table); 75486835cedSPrasad Joshi free(q->header); 75586835cedSPrasad Joshi free(q); 75686835cedSPrasad Joshi 75786835cedSPrasad Joshi return NULL; 75886835cedSPrasad Joshi } 75986835cedSPrasad Joshi 760ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 76186835cedSPrasad Joshi { 762ad627d62SPekka Enberg struct qcow1_header_disk f_header; 76386835cedSPrasad Joshi 764ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 765ad627d62SPekka Enberg return false; 76686835cedSPrasad Joshi 767ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 768ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 76986835cedSPrasad Joshi 770ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 771ad627d62SPekka Enberg return false; 77286835cedSPrasad Joshi 773ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 774ad627d62SPekka Enberg return false; 77586835cedSPrasad Joshi 776ad627d62SPekka Enberg return true; 77786835cedSPrasad Joshi } 77886835cedSPrasad Joshi 779f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 78086835cedSPrasad Joshi { 781ad627d62SPekka Enberg if (qcow1_check_image(fd)) 782f10860caSPekka Enberg return qcow1_probe(fd, readonly); 783ad627d62SPekka Enberg 784ad627d62SPekka Enberg if (qcow2_check_image(fd)) 785f10860caSPekka Enberg return qcow2_probe(fd, readonly); 786ad627d62SPekka Enberg 787ad627d62SPekka Enberg return NULL; 78886835cedSPrasad Joshi } 789