186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 586835cedSPrasad Joshi #include "kvm/util.h" 686835cedSPrasad Joshi 786835cedSPrasad Joshi #include <sys/types.h> 886835cedSPrasad Joshi #include <sys/stat.h> 986835cedSPrasad Joshi #include <stdbool.h> 1086835cedSPrasad Joshi #include <stdlib.h> 1186835cedSPrasad Joshi #include <string.h> 1286835cedSPrasad Joshi #include <unistd.h> 1386835cedSPrasad Joshi #include <fcntl.h> 1486835cedSPrasad Joshi 1586835cedSPrasad Joshi #include <linux/byteorder.h> 16865c675fSPrasad Joshi #include <linux/kernel.h> 170df6b4d9SPekka Enberg #include <linux/types.h> 1886835cedSPrasad Joshi 19473d58ffSPekka Enberg static int insert(struct rb_root *root, struct qcow_l2_table *new) 203309045fSPrasad Joshi { 213309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 223309045fSPrasad Joshi u64 offset = new->offset; 233309045fSPrasad Joshi 243309045fSPrasad Joshi /* search the tree */ 253309045fSPrasad Joshi while (*link) { 26473d58ffSPekka Enberg struct qcow_l2_table *t; 273309045fSPrasad Joshi 28473d58ffSPekka Enberg t = rb_entry(*link, struct qcow_l2_table, node); 293309045fSPrasad Joshi if (!t) 303309045fSPrasad Joshi goto error; 313309045fSPrasad Joshi 323309045fSPrasad Joshi parent = *link; 333309045fSPrasad Joshi 343309045fSPrasad Joshi if (t->offset > offset) 353309045fSPrasad Joshi link = &(*link)->rb_left; 363309045fSPrasad Joshi else if (t->offset < offset) 373309045fSPrasad Joshi link = &(*link)->rb_right; 383309045fSPrasad Joshi else 393309045fSPrasad Joshi goto out; 403309045fSPrasad Joshi } 413309045fSPrasad Joshi 423309045fSPrasad Joshi /* add new node */ 433309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 443309045fSPrasad Joshi rb_insert_color(&new->node, root); 453309045fSPrasad Joshi out: 463309045fSPrasad Joshi return 0; 473309045fSPrasad Joshi error: 483309045fSPrasad Joshi return -1; 493309045fSPrasad Joshi } 503309045fSPrasad Joshi 51473d58ffSPekka Enberg static struct qcow_l2_table *search(struct rb_root *root, u64 offset) 523309045fSPrasad Joshi { 533309045fSPrasad Joshi struct rb_node *link = root->rb_node; 543309045fSPrasad Joshi 553309045fSPrasad Joshi while (link) { 56473d58ffSPekka Enberg struct qcow_l2_table *t; 573309045fSPrasad Joshi 58473d58ffSPekka Enberg t = rb_entry(link, struct qcow_l2_table, node); 593309045fSPrasad Joshi if (!t) 603309045fSPrasad Joshi goto out; 613309045fSPrasad Joshi 623309045fSPrasad Joshi if (t->offset > offset) 633309045fSPrasad Joshi link = link->rb_left; 643309045fSPrasad Joshi else if (t->offset < offset) 653309045fSPrasad Joshi link = link->rb_right; 663309045fSPrasad Joshi else 673309045fSPrasad Joshi return t; 683309045fSPrasad Joshi } 693309045fSPrasad Joshi out: 703309045fSPrasad Joshi return NULL; 713309045fSPrasad Joshi } 723309045fSPrasad Joshi 733309045fSPrasad Joshi static void free_cache(struct qcow *q) 743309045fSPrasad Joshi { 753309045fSPrasad Joshi struct list_head *pos, *n; 76473d58ffSPekka Enberg struct qcow_l2_table *t; 773309045fSPrasad Joshi struct rb_root *r = &q->root; 783309045fSPrasad Joshi 793309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 803309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 813309045fSPrasad Joshi list_del(pos); 82473d58ffSPekka Enberg t = list_entry(pos, struct qcow_l2_table, list); 833309045fSPrasad Joshi rb_erase(&t->node, r); 843309045fSPrasad Joshi 853309045fSPrasad Joshi /* Free the cached node */ 863309045fSPrasad Joshi free(t); 873309045fSPrasad Joshi } 883309045fSPrasad Joshi } 893309045fSPrasad Joshi 90473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c) 913309045fSPrasad Joshi { 923309045fSPrasad Joshi struct rb_root *r = &q->root; 93473d58ffSPekka Enberg struct qcow_l2_table *lru; 943309045fSPrasad Joshi 953309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 963309045fSPrasad Joshi /* 973309045fSPrasad Joshi * The node at the head of the list is least recently used 983309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 993309045fSPrasad Joshi */ 100473d58ffSPekka Enberg lru = list_first_entry(&q->lru_list, struct qcow_l2_table, list); 1013309045fSPrasad Joshi 1023309045fSPrasad Joshi /* Remove the node from the cache */ 1033309045fSPrasad Joshi rb_erase(&lru->node, r); 1043309045fSPrasad Joshi list_del_init(&lru->list); 1053309045fSPrasad Joshi q->nr_cached--; 1063309045fSPrasad Joshi 1073309045fSPrasad Joshi /* Free the LRUed node */ 1083309045fSPrasad Joshi free(lru); 1093309045fSPrasad Joshi } 1103309045fSPrasad Joshi 1113309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 1123309045fSPrasad Joshi if (insert(r, c) < 0) 1133309045fSPrasad Joshi goto error; 1143309045fSPrasad Joshi 1153309045fSPrasad Joshi /* Add in LRU replacement list */ 1163309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 1173309045fSPrasad Joshi q->nr_cached++; 1183309045fSPrasad Joshi 1193309045fSPrasad Joshi return 0; 1203309045fSPrasad Joshi error: 1213309045fSPrasad Joshi return -1; 1223309045fSPrasad Joshi } 1233309045fSPrasad Joshi 124*fe8bdde0SPekka Enberg static struct qcow_l2_table *search_table(struct qcow *q, u64 offset) 1253309045fSPrasad Joshi { 126*fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1273309045fSPrasad Joshi 128*fe8bdde0SPekka Enberg l2t = search(&q->root, offset); 129*fe8bdde0SPekka Enberg if (!l2t) 130*fe8bdde0SPekka Enberg return NULL; 1313309045fSPrasad Joshi 1323309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 133*fe8bdde0SPekka Enberg list_move_tail(&l2t->list, &q->lru_list); 1343309045fSPrasad Joshi 135*fe8bdde0SPekka Enberg return l2t; 1363309045fSPrasad Joshi } 1373309045fSPrasad Joshi 1383309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 139473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset) 1403309045fSPrasad Joshi { 1413309045fSPrasad Joshi struct qcow_header *header = q->header; 142473d58ffSPekka Enberg struct qcow_l2_table *c; 1433309045fSPrasad Joshi u64 l2t_sz; 1443309045fSPrasad Joshi u64 size; 1453309045fSPrasad Joshi 1463309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 1473309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 1483309045fSPrasad Joshi c = calloc(1, size); 1493309045fSPrasad Joshi if (!c) 1503309045fSPrasad Joshi goto out; 1513309045fSPrasad Joshi 1523309045fSPrasad Joshi c->offset = offset; 1533309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 1543309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 1553309045fSPrasad Joshi out: 1563309045fSPrasad Joshi return c; 1573309045fSPrasad Joshi } 1583309045fSPrasad Joshi 159742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 16086835cedSPrasad Joshi { 161ad627d62SPekka Enberg struct qcow_header *header = q->header; 16286835cedSPrasad Joshi 16386835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 16486835cedSPrasad Joshi } 16586835cedSPrasad Joshi 166742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 16786835cedSPrasad Joshi { 168ad627d62SPekka Enberg struct qcow_header *header = q->header; 16986835cedSPrasad Joshi 17086835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 17186835cedSPrasad Joshi } 17286835cedSPrasad Joshi 173742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 17486835cedSPrasad Joshi { 175ad627d62SPekka Enberg struct qcow_header *header = q->header; 17686835cedSPrasad Joshi 17786835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 17886835cedSPrasad Joshi } 17986835cedSPrasad Joshi 180*fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset) 1813309045fSPrasad Joshi { 1823309045fSPrasad Joshi struct qcow_header *header = q->header; 183*fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 1843309045fSPrasad Joshi u64 size; 1853309045fSPrasad Joshi u64 i; 1863309045fSPrasad Joshi 1873309045fSPrasad Joshi size = 1 << header->l2_bits; 1883309045fSPrasad Joshi 1893309045fSPrasad Joshi /* search an entry for offset in cache */ 190*fe8bdde0SPekka Enberg l2t = search_table(q, offset); 191*fe8bdde0SPekka Enberg if (l2t) 192*fe8bdde0SPekka Enberg return l2t; 1933309045fSPrasad Joshi 1943309045fSPrasad Joshi /* allocate new node for caching l2 table */ 195*fe8bdde0SPekka Enberg l2t = new_cache_table(q, offset); 196*fe8bdde0SPekka Enberg if (!l2t) 1973309045fSPrasad Joshi goto error; 1983309045fSPrasad Joshi 1993309045fSPrasad Joshi /* table not cached: read from the disk */ 200*fe8bdde0SPekka Enberg if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0) 2013309045fSPrasad Joshi goto error; 2023309045fSPrasad Joshi 2033309045fSPrasad Joshi /* cache the table */ 204*fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) 2053309045fSPrasad Joshi goto error; 2063309045fSPrasad Joshi 2073309045fSPrasad Joshi /* change cached table to CPU's byte-order */ 2083309045fSPrasad Joshi for (i = 0; i < size; i++) 209*fe8bdde0SPekka Enberg be64_to_cpus(&l2t->table[i]); 2103309045fSPrasad Joshi 211*fe8bdde0SPekka Enberg return l2t; 2123309045fSPrasad Joshi error: 213*fe8bdde0SPekka Enberg free(l2t); 214*fe8bdde0SPekka Enberg return NULL; 2153309045fSPrasad Joshi } 2163309045fSPrasad Joshi 217b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 21886835cedSPrasad Joshi { 219ad627d62SPekka Enberg struct qcow_header *header = q->header; 2203dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 221*fe8bdde0SPekka Enberg struct qcow_l2_table *l2_table; 222742fce76SPrasad Joshi u64 l2_table_offset; 223742fce76SPrasad Joshi u64 l2_table_size; 2243dac48d4SPrasad Joshi u64 cluster_size; 225742fce76SPrasad Joshi u64 clust_offset; 226742fce76SPrasad Joshi u64 clust_start; 227a51948ceSPekka Enberg size_t length; 228742fce76SPrasad Joshi u64 l1_idx; 229742fce76SPrasad Joshi u64 l2_idx; 23086835cedSPrasad Joshi 231dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 23286835cedSPrasad Joshi 233c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2343dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 23586835cedSPrasad Joshi goto out_error; 23686835cedSPrasad Joshi 2373dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2383dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 2393dac48d4SPrasad Joshi goto out_error; 2403dac48d4SPrasad Joshi 2413dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2423dac48d4SPrasad Joshi if (length > dst_len) 2433dac48d4SPrasad Joshi length = dst_len; 2443dac48d4SPrasad Joshi 245ad627d62SPekka Enberg l2_table_offset = table->l1_table[l1_idx] & ~header->oflag_mask; 24686835cedSPrasad Joshi if (!l2_table_offset) 2473dac48d4SPrasad Joshi goto zero_cluster; 24886835cedSPrasad Joshi 24986835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 25086835cedSPrasad Joshi 2513309045fSPrasad Joshi /* read and cache level 2 table */ 252*fe8bdde0SPekka Enberg l2_table = qcow_read_l2_table(q, l2_table_offset); 253*fe8bdde0SPekka Enberg if (!l2_table) 254b6edb0ecSSasha Levin goto out_error; 25586835cedSPrasad Joshi 256c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 25786835cedSPrasad Joshi if (l2_idx >= l2_table_size) 258b6edb0ecSSasha Levin goto out_error; 25986835cedSPrasad Joshi 260*fe8bdde0SPekka Enberg clust_start = l2_table->table[l2_idx] & ~header->oflag_mask; 26186835cedSPrasad Joshi if (!clust_start) 2623dac48d4SPrasad Joshi goto zero_cluster; 26386835cedSPrasad Joshi 2643dac48d4SPrasad Joshi if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 265b6edb0ecSSasha Levin goto out_error; 26686835cedSPrasad Joshi 267179b71f0SPekka Enberg out: 2683dac48d4SPrasad Joshi return length; 26986835cedSPrasad Joshi 270179b71f0SPekka Enberg zero_cluster: 271179b71f0SPekka Enberg memset(dst, 0, length); 272179b71f0SPekka Enberg goto out; 273179b71f0SPekka Enberg 27486835cedSPrasad Joshi out_error: 275179b71f0SPekka Enberg length = -1; 276179b71f0SPekka Enberg goto out; 2773dac48d4SPrasad Joshi } 278b6edb0ecSSasha Levin 279b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 2803dac48d4SPrasad Joshi { 28143835ac9SSasha Levin struct qcow *q = disk->priv; 282ad627d62SPekka Enberg struct qcow_header *header = q->header; 283d8eea993SPekka Enberg u32 nr_read; 2840df6b4d9SPekka Enberg u64 offset; 2850df6b4d9SPekka Enberg char *buf; 2863dac48d4SPrasad Joshi u32 nr; 2873dac48d4SPrasad Joshi 2880df6b4d9SPekka Enberg buf = dst; 289d8eea993SPekka Enberg nr_read = 0; 2900df6b4d9SPekka Enberg 291d8eea993SPekka Enberg while (nr_read < dst_len) { 2923dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 2933dac48d4SPrasad Joshi if (offset >= header->size) 2940df6b4d9SPekka Enberg return -1; 2953dac48d4SPrasad Joshi 296b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 297a51948ceSPekka Enberg if (nr <= 0) 2980df6b4d9SPekka Enberg return -1; 2993dac48d4SPrasad Joshi 300d8eea993SPekka Enberg nr_read += nr; 3013dac48d4SPrasad Joshi buf += nr; 3023dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3033dac48d4SPrasad Joshi } 3040df6b4d9SPekka Enberg 30572133dd2SAsias He return dst_len; 30686835cedSPrasad Joshi } 30786835cedSPrasad Joshi 308865c675fSPrasad Joshi static inline u64 file_size(int fd) 309865c675fSPrasad Joshi { 310865c675fSPrasad Joshi struct stat st; 3110df6b4d9SPekka Enberg 312865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 313865c675fSPrasad Joshi return 0; 3140df6b4d9SPekka Enberg 315865c675fSPrasad Joshi return st.st_size; 316865c675fSPrasad Joshi } 317865c675fSPrasad Joshi 3180df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 319865c675fSPrasad Joshi { 320865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 321865c675fSPrasad Joshi return -1; 3220df6b4d9SPekka Enberg 3237d94a719SPekka Enberg return fdatasync(fd); 324865c675fSPrasad Joshi } 325865c675fSPrasad Joshi 326865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 327b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 328865c675fSPrasad Joshi { 329865c675fSPrasad Joshi struct qcow_header *header = q->header; 330865c675fSPrasad Joshi u64 clust_sz; 331865c675fSPrasad Joshi u64 f_sz; 3320df6b4d9SPekka Enberg u64 off; 3330df6b4d9SPekka Enberg u64 sz; 334865c675fSPrasad Joshi 335865c675fSPrasad Joshi f_sz = file_size(q->fd); 336865c675fSPrasad Joshi if (!f_sz) 337865c675fSPrasad Joshi return 0; 338865c675fSPrasad Joshi 339865c675fSPrasad Joshi sz = 1 << header->l2_bits; 340865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 341865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 342865c675fSPrasad Joshi 3430df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, table, sz * sizeof(u64), off) < 0) 344865c675fSPrasad Joshi return 0; 3450df6b4d9SPekka Enberg 346865c675fSPrasad Joshi return off; 347865c675fSPrasad Joshi } 348865c675fSPrasad Joshi 349865c675fSPrasad Joshi /* 350865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 351865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3520df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 353865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 354865c675fSPrasad Joshi * 355865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 356865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 357865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 358865c675fSPrasad Joshi */ 359b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 360865c675fSPrasad Joshi { 361865c675fSPrasad Joshi struct qcow_header *header = q->header; 362865c675fSPrasad Joshi struct qcow_table *table = &q->table; 363*fe8bdde0SPekka Enberg struct qcow_l2_table *l2t; 3640df6b4d9SPekka Enberg bool update_meta; 3650df6b4d9SPekka Enberg u64 clust_start; 3660df6b4d9SPekka Enberg u64 clust_off; 367865c675fSPrasad Joshi u64 clust_sz; 368865c675fSPrasad Joshi u64 l1t_idx; 369865c675fSPrasad Joshi u64 l2t_idx; 3700df6b4d9SPekka Enberg u64 l2t_off; 3710df6b4d9SPekka Enberg u64 l2t_sz; 372865c675fSPrasad Joshi u64 f_sz; 3730df6b4d9SPekka Enberg u64 len; 374865c675fSPrasad Joshi u64 t; 375865c675fSPrasad Joshi 376*fe8bdde0SPekka Enberg l2t = NULL; 377865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 378865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 379865c675fSPrasad Joshi 380865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 381865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 382865c675fSPrasad Joshi goto error; 383865c675fSPrasad Joshi 384865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 385865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 386865c675fSPrasad Joshi goto error; 387865c675fSPrasad Joshi 388865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 389865c675fSPrasad Joshi if (clust_off >= clust_sz) 390865c675fSPrasad Joshi goto error; 391865c675fSPrasad Joshi 392865c675fSPrasad Joshi len = clust_sz - clust_off; 393865c675fSPrasad Joshi if (len > src_len) 394865c675fSPrasad Joshi len = src_len; 395865c675fSPrasad Joshi 396865c675fSPrasad Joshi l2t_off = table->l1_table[l1t_idx] & ~header->oflag_mask; 397865c675fSPrasad Joshi if (l2t_off) { 3983309045fSPrasad Joshi /* read and cache l2 table */ 399*fe8bdde0SPekka Enberg l2t = qcow_read_l2_table(q, l2t_off); 400*fe8bdde0SPekka Enberg if (!l2t) 4013309045fSPrasad Joshi goto error; 402865c675fSPrasad Joshi } else { 403*fe8bdde0SPekka Enberg l2t = new_cache_table(q, l2t_off); 404*fe8bdde0SPekka Enberg if (!l2t) 4053309045fSPrasad Joshi goto error; 4063309045fSPrasad Joshi 4070df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 408865c675fSPrasad Joshi f_sz = file_size(q->fd); 409865c675fSPrasad Joshi if (!f_sz) 4103309045fSPrasad Joshi goto free_cache; 411865c675fSPrasad Joshi 412865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 413*fe8bdde0SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t->table); 414865c675fSPrasad Joshi if (!l2t_off) 4153309045fSPrasad Joshi goto free_cache; 416865c675fSPrasad Joshi 417865c675fSPrasad Joshi /* Metadata update: update on disk level 1 table */ 418865c675fSPrasad Joshi t = cpu_to_be64(l2t_off); 4190df6b4d9SPekka Enberg 4200df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, &t, sizeof(t), header->l1_table_offset + l1t_idx * sizeof(u64)) < 0) { 421865c675fSPrasad Joshi /* restore file to consistent state */ 422865c675fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4233309045fSPrasad Joshi goto free_cache; 4240df6b4d9SPekka Enberg 4253309045fSPrasad Joshi goto free_cache; 4263309045fSPrasad Joshi } 4273309045fSPrasad Joshi 428*fe8bdde0SPekka Enberg if (cache_table(q, l2t) < 0) { 4293309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 4303309045fSPrasad Joshi goto free_cache; 4313309045fSPrasad Joshi 4323309045fSPrasad Joshi goto free_cache; 433865c675fSPrasad Joshi } 434865c675fSPrasad Joshi 4350df6b4d9SPekka Enberg /* Update the in-core entry */ 436865c675fSPrasad Joshi table->l1_table[l1t_idx] = l2t_off; 437865c675fSPrasad Joshi } 438865c675fSPrasad Joshi 4390df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 440865c675fSPrasad Joshi f_sz = file_size(q->fd); 441865c675fSPrasad Joshi if (!f_sz) 4423309045fSPrasad Joshi goto error; 443865c675fSPrasad Joshi 444*fe8bdde0SPekka Enberg clust_start = l2t->table[l2t_idx] & ~header->oflag_mask; 445865c675fSPrasad Joshi if (!clust_start) { 446865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 447865c675fSPrasad Joshi update_meta = true; 4480df6b4d9SPekka Enberg } else 4490df6b4d9SPekka Enberg update_meta = false; 450865c675fSPrasad Joshi 4510df6b4d9SPekka Enberg /* Write actual data */ 452865c675fSPrasad Joshi if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 453865c675fSPrasad Joshi goto error; 454865c675fSPrasad Joshi 455865c675fSPrasad Joshi if (update_meta) { 456865c675fSPrasad Joshi t = cpu_to_be64(clust_start); 4570df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, &t, sizeof(t), l2t_off + l2t_idx * sizeof(u64)) < 0) { 4580df6b4d9SPekka Enberg /* Restore the file to consistent state */ 459865c675fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 460865c675fSPrasad Joshi goto error; 4610df6b4d9SPekka Enberg 462865c675fSPrasad Joshi goto error; 463865c675fSPrasad Joshi } 4643309045fSPrasad Joshi 4653309045fSPrasad Joshi /* Update the cached level2 entry */ 466*fe8bdde0SPekka Enberg l2t->table[l2t_idx] = clust_start; 467865c675fSPrasad Joshi } 4680df6b4d9SPekka Enberg 469865c675fSPrasad Joshi return len; 4703309045fSPrasad Joshi 4713309045fSPrasad Joshi free_cache: 472*fe8bdde0SPekka Enberg free(l2t); 473865c675fSPrasad Joshi error: 474865c675fSPrasad Joshi return -1; 475865c675fSPrasad Joshi } 476865c675fSPrasad Joshi 477b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 47886835cedSPrasad Joshi { 479865c675fSPrasad Joshi struct qcow *q = disk->priv; 480865c675fSPrasad Joshi struct qcow_header *header = q->header; 481c4acb611SIngo Molnar u32 nr_written; 4820df6b4d9SPekka Enberg char *buf; 483865c675fSPrasad Joshi u64 offset; 484865c675fSPrasad Joshi ssize_t nr; 485865c675fSPrasad Joshi 4860df6b4d9SPekka Enberg buf = src; 4870df6b4d9SPekka Enberg nr_written = 0; 488865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 4890df6b4d9SPekka Enberg 4900df6b4d9SPekka Enberg while (nr_written < src_len) { 491865c675fSPrasad Joshi if (offset >= header->size) 4920df6b4d9SPekka Enberg return -1; 493865c675fSPrasad Joshi 494b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 495865c675fSPrasad Joshi if (nr < 0) 4960df6b4d9SPekka Enberg return -1; 497865c675fSPrasad Joshi 4980df6b4d9SPekka Enberg nr_written += nr; 499865c675fSPrasad Joshi buf += nr; 500865c675fSPrasad Joshi offset += nr; 501865c675fSPrasad Joshi } 5020df6b4d9SPekka Enberg 50372133dd2SAsias He return nr_written; 50486835cedSPrasad Joshi } 50586835cedSPrasad Joshi 506b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 507f10860caSPekka Enberg { 508f10860caSPekka Enberg /* I/O error */ 509b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 510f10860caSPekka Enberg return -1; 511f10860caSPekka Enberg } 512f10860caSPekka Enberg 513b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 51486835cedSPrasad Joshi { 51586835cedSPrasad Joshi struct qcow *q; 51686835cedSPrasad Joshi 51743835ac9SSasha Levin if (!disk) 51872133dd2SAsias He return 0; 51986835cedSPrasad Joshi 52043835ac9SSasha Levin q = disk->priv; 52186835cedSPrasad Joshi 5223309045fSPrasad Joshi free_cache(q); 5236c6f79b6SPrasad Joshi free(q->table.l1_table); 52486835cedSPrasad Joshi free(q->header); 52586835cedSPrasad Joshi free(q); 52672133dd2SAsias He 52772133dd2SAsias He return 0; 52886835cedSPrasad Joshi } 52986835cedSPrasad Joshi 530b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 531b1c84095SPekka Enberg .read_sector = qcow_read_sector, 532b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 533b1c84095SPekka Enberg .close = qcow_disk_close, 534f10860caSPekka Enberg }; 535f10860caSPekka Enberg 536b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 537b1c84095SPekka Enberg .read_sector = qcow_read_sector, 538b1c84095SPekka Enberg .write_sector = qcow_write_sector, 539b1c84095SPekka Enberg .close = qcow_disk_close, 54086835cedSPrasad Joshi }; 54186835cedSPrasad Joshi 54286835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 54386835cedSPrasad Joshi { 544ad627d62SPekka Enberg struct qcow_header *header = q->header; 54500adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 54600adcc1bSPrasad Joshi u64 i; 54786835cedSPrasad Joshi 548ad627d62SPekka Enberg table->table_size = header->l1_size; 54986835cedSPrasad Joshi 55000adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 55100adcc1bSPrasad Joshi if (!table->l1_table) 55286835cedSPrasad Joshi return -1; 55386835cedSPrasad Joshi 55400adcc1bSPrasad Joshi if (pread_in_full(q->fd, table->l1_table, sizeof(u64) * 55500adcc1bSPrasad Joshi table->table_size, header->l1_table_offset) < 0) 55686835cedSPrasad Joshi return -1; 55786835cedSPrasad Joshi 55800adcc1bSPrasad Joshi for (i = 0; i < table->table_size; i++) 55900adcc1bSPrasad Joshi be64_to_cpus(&table->l1_table[i]); 56000adcc1bSPrasad Joshi 56186835cedSPrasad Joshi return 0; 56286835cedSPrasad Joshi } 56386835cedSPrasad Joshi 564ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 56586835cedSPrasad Joshi { 566ad627d62SPekka Enberg struct qcow2_header_disk f_header; 567ad627d62SPekka Enberg struct qcow_header *header; 56886835cedSPrasad Joshi 569ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 57086835cedSPrasad Joshi if (!header) 57186835cedSPrasad Joshi return NULL; 57286835cedSPrasad Joshi 5730657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 5740657f33dSPrasad Joshi free(header); 57586835cedSPrasad Joshi return NULL; 5760657f33dSPrasad Joshi } 57786835cedSPrasad Joshi 578ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 579ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 580ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 581ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 582ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 583ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 584ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 585ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 586ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 587ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 588ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 589ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 590ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 591ad627d62SPekka Enberg 592ad627d62SPekka Enberg *header = (struct qcow_header) { 593ad627d62SPekka Enberg .size = f_header.size, 594ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 595ad627d62SPekka Enberg .l1_size = f_header.l1_size, 596ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 597ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 598ad627d62SPekka Enberg .oflag_mask = QCOW2_OFLAG_MASK, 599ad627d62SPekka Enberg }; 600ad627d62SPekka Enberg 601ad627d62SPekka Enberg return header; 602ad627d62SPekka Enberg } 603ad627d62SPekka Enberg 604f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 605ad627d62SPekka Enberg { 606ad627d62SPekka Enberg struct qcow *q; 607ad627d62SPekka Enberg struct qcow_header *h; 608ad627d62SPekka Enberg struct disk_image *disk_image; 609ad627d62SPekka Enberg 610ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 611ad627d62SPekka Enberg if (!q) 612ad627d62SPekka Enberg goto error; 613ad627d62SPekka Enberg 614ad627d62SPekka Enberg q->fd = fd; 6153309045fSPrasad Joshi q->root = RB_ROOT; 6163309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 617ad627d62SPekka Enberg 618ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 619ad627d62SPekka Enberg if (!h) 620ad627d62SPekka Enberg goto error; 621ad627d62SPekka Enberg 622ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 623ad627d62SPekka Enberg goto error; 624ad627d62SPekka Enberg 6257d22135fSAsias He /* 6267d22135fSAsias He * Do not use mmap use read/write instead 6277d22135fSAsias He */ 628f10860caSPekka Enberg if (readonly) 629b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 630f10860caSPekka Enberg else 631b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 632f10860caSPekka Enberg 633ad627d62SPekka Enberg if (!disk_image) 634ad627d62SPekka Enberg goto error; 635ad627d62SPekka Enberg disk_image->priv = q; 636ad627d62SPekka Enberg 637ad627d62SPekka Enberg return disk_image; 638ad627d62SPekka Enberg error: 639ad627d62SPekka Enberg if (!q) 640ad627d62SPekka Enberg return NULL; 641ad627d62SPekka Enberg 642ad627d62SPekka Enberg free(q->table.l1_table); 643ad627d62SPekka Enberg free(q->header); 644ad627d62SPekka Enberg free(q); 645ad627d62SPekka Enberg 646ad627d62SPekka Enberg return NULL; 647ad627d62SPekka Enberg } 648ad627d62SPekka Enberg 649ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 650ad627d62SPekka Enberg { 651ad627d62SPekka Enberg struct qcow2_header_disk f_header; 652ad627d62SPekka Enberg 653ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 654ad627d62SPekka Enberg return false; 655ad627d62SPekka Enberg 656ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 657ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 658ad627d62SPekka Enberg 659ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 660ad627d62SPekka Enberg return false; 661ad627d62SPekka Enberg 662ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 663ad627d62SPekka Enberg return false; 664ad627d62SPekka Enberg 665ad627d62SPekka Enberg return true; 666ad627d62SPekka Enberg } 667ad627d62SPekka Enberg 668ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 669ad627d62SPekka Enberg { 670ad627d62SPekka Enberg struct qcow1_header_disk f_header; 671ad627d62SPekka Enberg struct qcow_header *header; 672ad627d62SPekka Enberg 673ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 674ad627d62SPekka Enberg if (!header) 675ad627d62SPekka Enberg return NULL; 676ad627d62SPekka Enberg 677d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 678d39cefd2SSasha Levin free(header); 679ad627d62SPekka Enberg return NULL; 680d39cefd2SSasha Levin } 681ad627d62SPekka Enberg 682ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 683ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 684ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 685ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 686ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 687ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 688ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 689ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 690ad627d62SPekka Enberg 691ad627d62SPekka Enberg *header = (struct qcow_header) { 692ad627d62SPekka Enberg .size = f_header.size, 693ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 694ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 695ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 696ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 697ad627d62SPekka Enberg .oflag_mask = QCOW1_OFLAG_MASK, 698ad627d62SPekka Enberg }; 69986835cedSPrasad Joshi 70086835cedSPrasad Joshi return header; 70186835cedSPrasad Joshi } 70286835cedSPrasad Joshi 703f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 70486835cedSPrasad Joshi { 70586835cedSPrasad Joshi struct qcow *q; 706ad627d62SPekka Enberg struct qcow_header *h; 70786835cedSPrasad Joshi struct disk_image *disk_image; 70886835cedSPrasad Joshi 70986835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 71086835cedSPrasad Joshi if (!q) 71186835cedSPrasad Joshi goto error; 71286835cedSPrasad Joshi 71386835cedSPrasad Joshi q->fd = fd; 7143309045fSPrasad Joshi q->root = RB_ROOT; 7153309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 71686835cedSPrasad Joshi 71786835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 71886835cedSPrasad Joshi if (!h) 71986835cedSPrasad Joshi goto error; 72086835cedSPrasad Joshi 72186835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 72286835cedSPrasad Joshi goto error; 72386835cedSPrasad Joshi 7247d22135fSAsias He /* 7257d22135fSAsias He * Do not use mmap use read/write instead 7267d22135fSAsias He */ 727f10860caSPekka Enberg if (readonly) 728b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 729f10860caSPekka Enberg else 730b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 731f10860caSPekka Enberg 73286835cedSPrasad Joshi if (!disk_image) 73386835cedSPrasad Joshi goto error; 73486835cedSPrasad Joshi disk_image->priv = q; 73586835cedSPrasad Joshi 73686835cedSPrasad Joshi return disk_image; 73786835cedSPrasad Joshi error: 73886835cedSPrasad Joshi if (!q) 73986835cedSPrasad Joshi return NULL; 74086835cedSPrasad Joshi 7416c6f79b6SPrasad Joshi free(q->table.l1_table); 74286835cedSPrasad Joshi free(q->header); 74386835cedSPrasad Joshi free(q); 74486835cedSPrasad Joshi 74586835cedSPrasad Joshi return NULL; 74686835cedSPrasad Joshi } 74786835cedSPrasad Joshi 748ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 74986835cedSPrasad Joshi { 750ad627d62SPekka Enberg struct qcow1_header_disk f_header; 75186835cedSPrasad Joshi 752ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 753ad627d62SPekka Enberg return false; 75486835cedSPrasad Joshi 755ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 756ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 75786835cedSPrasad Joshi 758ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 759ad627d62SPekka Enberg return false; 76086835cedSPrasad Joshi 761ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 762ad627d62SPekka Enberg return false; 76386835cedSPrasad Joshi 764ad627d62SPekka Enberg return true; 76586835cedSPrasad Joshi } 76686835cedSPrasad Joshi 767f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 76886835cedSPrasad Joshi { 769ad627d62SPekka Enberg if (qcow1_check_image(fd)) 770f10860caSPekka Enberg return qcow1_probe(fd, readonly); 771ad627d62SPekka Enberg 772ad627d62SPekka Enberg if (qcow2_check_image(fd)) 773f10860caSPekka Enberg return qcow2_probe(fd, readonly); 774ad627d62SPekka Enberg 775ad627d62SPekka Enberg return NULL; 77686835cedSPrasad Joshi } 777