186835cedSPrasad Joshi #include "kvm/qcow.h" 286835cedSPrasad Joshi 386835cedSPrasad Joshi #include "kvm/disk-image.h" 486835cedSPrasad Joshi #include "kvm/read-write.h" 586835cedSPrasad Joshi #include "kvm/util.h" 686835cedSPrasad Joshi 786835cedSPrasad Joshi #include <sys/types.h> 886835cedSPrasad Joshi #include <sys/stat.h> 986835cedSPrasad Joshi #include <stdbool.h> 1086835cedSPrasad Joshi #include <stdlib.h> 1186835cedSPrasad Joshi #include <string.h> 1286835cedSPrasad Joshi #include <unistd.h> 1386835cedSPrasad Joshi #include <fcntl.h> 1486835cedSPrasad Joshi 1586835cedSPrasad Joshi #include <linux/byteorder.h> 16865c675fSPrasad Joshi #include <linux/kernel.h> 170df6b4d9SPekka Enberg #include <linux/types.h> 1886835cedSPrasad Joshi 19*3309045fSPrasad Joshi static int insert(struct rb_root *root, struct qcow_l2_cache *new) 20*3309045fSPrasad Joshi { 21*3309045fSPrasad Joshi struct rb_node **link = &(root->rb_node), *parent = NULL; 22*3309045fSPrasad Joshi u64 offset = new->offset; 23*3309045fSPrasad Joshi 24*3309045fSPrasad Joshi /* search the tree */ 25*3309045fSPrasad Joshi while (*link) { 26*3309045fSPrasad Joshi struct qcow_l2_cache *t; 27*3309045fSPrasad Joshi 28*3309045fSPrasad Joshi t = rb_entry(*link, struct qcow_l2_cache, node); 29*3309045fSPrasad Joshi if (!t) 30*3309045fSPrasad Joshi goto error; 31*3309045fSPrasad Joshi 32*3309045fSPrasad Joshi parent = *link; 33*3309045fSPrasad Joshi 34*3309045fSPrasad Joshi if (t->offset > offset) 35*3309045fSPrasad Joshi link = &(*link)->rb_left; 36*3309045fSPrasad Joshi else if (t->offset < offset) 37*3309045fSPrasad Joshi link = &(*link)->rb_right; 38*3309045fSPrasad Joshi else 39*3309045fSPrasad Joshi goto out; 40*3309045fSPrasad Joshi } 41*3309045fSPrasad Joshi 42*3309045fSPrasad Joshi /* add new node */ 43*3309045fSPrasad Joshi rb_link_node(&new->node, parent, link); 44*3309045fSPrasad Joshi rb_insert_color(&new->node, root); 45*3309045fSPrasad Joshi out: 46*3309045fSPrasad Joshi return 0; 47*3309045fSPrasad Joshi error: 48*3309045fSPrasad Joshi return -1; 49*3309045fSPrasad Joshi } 50*3309045fSPrasad Joshi 51*3309045fSPrasad Joshi static struct qcow_l2_cache *search(struct rb_root *root, u64 offset) 52*3309045fSPrasad Joshi { 53*3309045fSPrasad Joshi struct rb_node *link = root->rb_node; 54*3309045fSPrasad Joshi 55*3309045fSPrasad Joshi while (link) { 56*3309045fSPrasad Joshi struct qcow_l2_cache *t; 57*3309045fSPrasad Joshi 58*3309045fSPrasad Joshi t = rb_entry(link, struct qcow_l2_cache, node); 59*3309045fSPrasad Joshi if (!t) 60*3309045fSPrasad Joshi goto out; 61*3309045fSPrasad Joshi 62*3309045fSPrasad Joshi if (t->offset > offset) 63*3309045fSPrasad Joshi link = link->rb_left; 64*3309045fSPrasad Joshi else if (t->offset < offset) 65*3309045fSPrasad Joshi link = link->rb_right; 66*3309045fSPrasad Joshi else 67*3309045fSPrasad Joshi return t; 68*3309045fSPrasad Joshi } 69*3309045fSPrasad Joshi out: 70*3309045fSPrasad Joshi return NULL; 71*3309045fSPrasad Joshi } 72*3309045fSPrasad Joshi 73*3309045fSPrasad Joshi static void free_cache(struct qcow *q) 74*3309045fSPrasad Joshi { 75*3309045fSPrasad Joshi struct list_head *pos, *n; 76*3309045fSPrasad Joshi struct qcow_l2_cache *t; 77*3309045fSPrasad Joshi struct rb_root *r = &q->root; 78*3309045fSPrasad Joshi 79*3309045fSPrasad Joshi list_for_each_safe(pos, n, &q->lru_list) { 80*3309045fSPrasad Joshi /* Remove cache table from the list and RB tree */ 81*3309045fSPrasad Joshi list_del(pos); 82*3309045fSPrasad Joshi t = list_entry(pos, struct qcow_l2_cache, list); 83*3309045fSPrasad Joshi rb_erase(&t->node, r); 84*3309045fSPrasad Joshi 85*3309045fSPrasad Joshi /* Free the cached node */ 86*3309045fSPrasad Joshi free(t); 87*3309045fSPrasad Joshi } 88*3309045fSPrasad Joshi } 89*3309045fSPrasad Joshi 90*3309045fSPrasad Joshi static int cache_table(struct qcow *q, struct qcow_l2_cache *c) 91*3309045fSPrasad Joshi { 92*3309045fSPrasad Joshi struct rb_root *r = &q->root; 93*3309045fSPrasad Joshi struct qcow_l2_cache *lru; 94*3309045fSPrasad Joshi 95*3309045fSPrasad Joshi if (q->nr_cached == MAX_CACHE_NODES) { 96*3309045fSPrasad Joshi /* 97*3309045fSPrasad Joshi * The node at the head of the list is least recently used 98*3309045fSPrasad Joshi * node. Remove it from the list and replaced with a new node. 99*3309045fSPrasad Joshi */ 100*3309045fSPrasad Joshi lru = list_first_entry(&q->lru_list, struct qcow_l2_cache, list); 101*3309045fSPrasad Joshi 102*3309045fSPrasad Joshi /* Remove the node from the cache */ 103*3309045fSPrasad Joshi rb_erase(&lru->node, r); 104*3309045fSPrasad Joshi list_del_init(&lru->list); 105*3309045fSPrasad Joshi q->nr_cached--; 106*3309045fSPrasad Joshi 107*3309045fSPrasad Joshi /* Free the LRUed node */ 108*3309045fSPrasad Joshi free(lru); 109*3309045fSPrasad Joshi } 110*3309045fSPrasad Joshi 111*3309045fSPrasad Joshi /* Add new node in RB Tree: Helps in searching faster */ 112*3309045fSPrasad Joshi if (insert(r, c) < 0) 113*3309045fSPrasad Joshi goto error; 114*3309045fSPrasad Joshi 115*3309045fSPrasad Joshi /* Add in LRU replacement list */ 116*3309045fSPrasad Joshi list_add_tail(&c->list, &q->lru_list); 117*3309045fSPrasad Joshi q->nr_cached++; 118*3309045fSPrasad Joshi 119*3309045fSPrasad Joshi return 0; 120*3309045fSPrasad Joshi error: 121*3309045fSPrasad Joshi return -1; 122*3309045fSPrasad Joshi } 123*3309045fSPrasad Joshi 124*3309045fSPrasad Joshi static int search_table(struct qcow *q, u64 **table, u64 offset) 125*3309045fSPrasad Joshi { 126*3309045fSPrasad Joshi struct qcow_l2_cache *c; 127*3309045fSPrasad Joshi 128*3309045fSPrasad Joshi *table = NULL; 129*3309045fSPrasad Joshi 130*3309045fSPrasad Joshi c = search(&q->root, offset); 131*3309045fSPrasad Joshi if (!c) 132*3309045fSPrasad Joshi return -1; 133*3309045fSPrasad Joshi 134*3309045fSPrasad Joshi /* Update the LRU state, by moving the searched node to list tail */ 135*3309045fSPrasad Joshi list_move_tail(&c->list, &q->lru_list); 136*3309045fSPrasad Joshi 137*3309045fSPrasad Joshi *table = c->table; 138*3309045fSPrasad Joshi return 0; 139*3309045fSPrasad Joshi } 140*3309045fSPrasad Joshi 141*3309045fSPrasad Joshi /* Allocates a new node for caching L2 table */ 142*3309045fSPrasad Joshi static struct qcow_l2_cache *new_cache_table(struct qcow *q, u64 offset) 143*3309045fSPrasad Joshi { 144*3309045fSPrasad Joshi struct qcow_header *header = q->header; 145*3309045fSPrasad Joshi struct qcow_l2_cache *c; 146*3309045fSPrasad Joshi u64 l2t_sz; 147*3309045fSPrasad Joshi u64 size; 148*3309045fSPrasad Joshi 149*3309045fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 150*3309045fSPrasad Joshi size = sizeof(*c) + l2t_sz * sizeof(u64); 151*3309045fSPrasad Joshi c = calloc(1, size); 152*3309045fSPrasad Joshi if (!c) 153*3309045fSPrasad Joshi goto out; 154*3309045fSPrasad Joshi 155*3309045fSPrasad Joshi c->offset = offset; 156*3309045fSPrasad Joshi RB_CLEAR_NODE(&c->node); 157*3309045fSPrasad Joshi INIT_LIST_HEAD(&c->list); 158*3309045fSPrasad Joshi out: 159*3309045fSPrasad Joshi return c; 160*3309045fSPrasad Joshi } 161*3309045fSPrasad Joshi 162742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset) 16386835cedSPrasad Joshi { 164ad627d62SPekka Enberg struct qcow_header *header = q->header; 16586835cedSPrasad Joshi 16686835cedSPrasad Joshi return offset >> (header->l2_bits + header->cluster_bits); 16786835cedSPrasad Joshi } 16886835cedSPrasad Joshi 169742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset) 17086835cedSPrasad Joshi { 171ad627d62SPekka Enberg struct qcow_header *header = q->header; 17286835cedSPrasad Joshi 17386835cedSPrasad Joshi return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1); 17486835cedSPrasad Joshi } 17586835cedSPrasad Joshi 176742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset) 17786835cedSPrasad Joshi { 178ad627d62SPekka Enberg struct qcow_header *header = q->header; 17986835cedSPrasad Joshi 18086835cedSPrasad Joshi return offset & ((1 << header->cluster_bits)-1); 18186835cedSPrasad Joshi } 18286835cedSPrasad Joshi 183*3309045fSPrasad Joshi static int qcow_read_l2_table(struct qcow *q, u64 **table, u64 offset) 184*3309045fSPrasad Joshi { 185*3309045fSPrasad Joshi struct qcow_header *header = q->header; 186*3309045fSPrasad Joshi struct qcow_l2_cache *c; 187*3309045fSPrasad Joshi u64 size; 188*3309045fSPrasad Joshi u64 i; 189*3309045fSPrasad Joshi u64 *t; 190*3309045fSPrasad Joshi 191*3309045fSPrasad Joshi c = NULL; 192*3309045fSPrasad Joshi *table = NULL; 193*3309045fSPrasad Joshi size = 1 << header->l2_bits; 194*3309045fSPrasad Joshi 195*3309045fSPrasad Joshi /* search an entry for offset in cache */ 196*3309045fSPrasad Joshi if (search_table(q, table, offset) >= 0) 197*3309045fSPrasad Joshi return 0; 198*3309045fSPrasad Joshi 199*3309045fSPrasad Joshi /* allocate new node for caching l2 table */ 200*3309045fSPrasad Joshi c = new_cache_table(q, offset); 201*3309045fSPrasad Joshi if (!c) 202*3309045fSPrasad Joshi goto error; 203*3309045fSPrasad Joshi t = c->table; 204*3309045fSPrasad Joshi 205*3309045fSPrasad Joshi /* table not cached: read from the disk */ 206*3309045fSPrasad Joshi if (pread_in_full(q->fd, t, size * sizeof(u64), offset) < 0) 207*3309045fSPrasad Joshi goto error; 208*3309045fSPrasad Joshi 209*3309045fSPrasad Joshi /* cache the table */ 210*3309045fSPrasad Joshi if (cache_table(q, c) < 0) 211*3309045fSPrasad Joshi goto error; 212*3309045fSPrasad Joshi 213*3309045fSPrasad Joshi /* change cached table to CPU's byte-order */ 214*3309045fSPrasad Joshi for (i = 0; i < size; i++) 215*3309045fSPrasad Joshi be64_to_cpus(&t[i]); 216*3309045fSPrasad Joshi 217*3309045fSPrasad Joshi *table = t; 218*3309045fSPrasad Joshi return 0; 219*3309045fSPrasad Joshi error: 220*3309045fSPrasad Joshi free(c); 221*3309045fSPrasad Joshi return -1; 222*3309045fSPrasad Joshi } 223*3309045fSPrasad Joshi 224b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len) 22586835cedSPrasad Joshi { 226ad627d62SPekka Enberg struct qcow_header *header = q->header; 2273dac48d4SPrasad Joshi struct qcow_table *table = &q->table; 228742fce76SPrasad Joshi u64 l2_table_offset; 229742fce76SPrasad Joshi u64 l2_table_size; 2303dac48d4SPrasad Joshi u64 cluster_size; 231742fce76SPrasad Joshi u64 clust_offset; 232742fce76SPrasad Joshi u64 clust_start; 233a51948ceSPekka Enberg size_t length; 2340df6b4d9SPekka Enberg u64 *l2_table; 235742fce76SPrasad Joshi u64 l1_idx; 236742fce76SPrasad Joshi u64 l2_idx; 23786835cedSPrasad Joshi 2380df6b4d9SPekka Enberg 239dae803fbSPekka Enberg cluster_size = 1 << header->cluster_bits; 24086835cedSPrasad Joshi 241c5e0624bSPrasad Joshi l1_idx = get_l1_index(q, offset); 2423dac48d4SPrasad Joshi if (l1_idx >= table->table_size) 24386835cedSPrasad Joshi goto out_error; 24486835cedSPrasad Joshi 2453dac48d4SPrasad Joshi clust_offset = get_cluster_offset(q, offset); 2463dac48d4SPrasad Joshi if (clust_offset >= cluster_size) 2473dac48d4SPrasad Joshi goto out_error; 2483dac48d4SPrasad Joshi 2493dac48d4SPrasad Joshi length = cluster_size - clust_offset; 2503dac48d4SPrasad Joshi if (length > dst_len) 2513dac48d4SPrasad Joshi length = dst_len; 2523dac48d4SPrasad Joshi 253ad627d62SPekka Enberg l2_table_offset = table->l1_table[l1_idx] & ~header->oflag_mask; 25486835cedSPrasad Joshi if (!l2_table_offset) 2553dac48d4SPrasad Joshi goto zero_cluster; 25686835cedSPrasad Joshi 25786835cedSPrasad Joshi l2_table_size = 1 << header->l2_bits; 25886835cedSPrasad Joshi 259*3309045fSPrasad Joshi /* read and cache level 2 table */ 260*3309045fSPrasad Joshi if (qcow_read_l2_table(q, &l2_table, l2_table_offset) < 0) 261b6edb0ecSSasha Levin goto out_error; 26286835cedSPrasad Joshi 263c5e0624bSPrasad Joshi l2_idx = get_l2_index(q, offset); 26486835cedSPrasad Joshi if (l2_idx >= l2_table_size) 265b6edb0ecSSasha Levin goto out_error; 26686835cedSPrasad Joshi 267*3309045fSPrasad Joshi clust_start = l2_table[l2_idx] & ~header->oflag_mask; 26886835cedSPrasad Joshi if (!clust_start) 2693dac48d4SPrasad Joshi goto zero_cluster; 27086835cedSPrasad Joshi 2713dac48d4SPrasad Joshi if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0) 272b6edb0ecSSasha Levin goto out_error; 27386835cedSPrasad Joshi 274179b71f0SPekka Enberg out: 2753dac48d4SPrasad Joshi return length; 27686835cedSPrasad Joshi 277179b71f0SPekka Enberg zero_cluster: 278179b71f0SPekka Enberg memset(dst, 0, length); 279179b71f0SPekka Enberg goto out; 280179b71f0SPekka Enberg 28186835cedSPrasad Joshi out_error: 282179b71f0SPekka Enberg length = -1; 283179b71f0SPekka Enberg goto out; 2843dac48d4SPrasad Joshi } 285b6edb0ecSSasha Levin 286b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len) 2873dac48d4SPrasad Joshi { 28843835ac9SSasha Levin struct qcow *q = disk->priv; 289ad627d62SPekka Enberg struct qcow_header *header = q->header; 290d8eea993SPekka Enberg u32 nr_read; 2910df6b4d9SPekka Enberg u64 offset; 2920df6b4d9SPekka Enberg char *buf; 2933dac48d4SPrasad Joshi u32 nr; 2943dac48d4SPrasad Joshi 2950df6b4d9SPekka Enberg buf = dst; 296d8eea993SPekka Enberg nr_read = 0; 2970df6b4d9SPekka Enberg 298d8eea993SPekka Enberg while (nr_read < dst_len) { 2993dac48d4SPrasad Joshi offset = sector << SECTOR_SHIFT; 3003dac48d4SPrasad Joshi if (offset >= header->size) 3010df6b4d9SPekka Enberg return -1; 3023dac48d4SPrasad Joshi 303b1c84095SPekka Enberg nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read); 304a51948ceSPekka Enberg if (nr <= 0) 3050df6b4d9SPekka Enberg return -1; 3063dac48d4SPrasad Joshi 307d8eea993SPekka Enberg nr_read += nr; 3083dac48d4SPrasad Joshi buf += nr; 3093dac48d4SPrasad Joshi sector += (nr >> SECTOR_SHIFT); 3103dac48d4SPrasad Joshi } 3110df6b4d9SPekka Enberg 31272133dd2SAsias He return dst_len; 31386835cedSPrasad Joshi } 31486835cedSPrasad Joshi 315865c675fSPrasad Joshi static inline u64 file_size(int fd) 316865c675fSPrasad Joshi { 317865c675fSPrasad Joshi struct stat st; 3180df6b4d9SPekka Enberg 319865c675fSPrasad Joshi if (fstat(fd, &st) < 0) 320865c675fSPrasad Joshi return 0; 3210df6b4d9SPekka Enberg 322865c675fSPrasad Joshi return st.st_size; 323865c675fSPrasad Joshi } 324865c675fSPrasad Joshi 3250df6b4d9SPekka Enberg #define SYNC_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE) 3260df6b4d9SPekka Enberg 3270df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset) 328865c675fSPrasad Joshi { 329865c675fSPrasad Joshi if (pwrite_in_full(fd, buf, count, offset) < 0) 330865c675fSPrasad Joshi return -1; 3310df6b4d9SPekka Enberg 3320df6b4d9SPekka Enberg return sync_file_range(fd, offset, count, SYNC_FLAGS); 333865c675fSPrasad Joshi } 334865c675fSPrasad Joshi 335865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */ 336b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table) 337865c675fSPrasad Joshi { 338865c675fSPrasad Joshi struct qcow_header *header = q->header; 339865c675fSPrasad Joshi u64 clust_sz; 340865c675fSPrasad Joshi u64 f_sz; 3410df6b4d9SPekka Enberg u64 off; 3420df6b4d9SPekka Enberg u64 sz; 343865c675fSPrasad Joshi 344865c675fSPrasad Joshi f_sz = file_size(q->fd); 345865c675fSPrasad Joshi if (!f_sz) 346865c675fSPrasad Joshi return 0; 347865c675fSPrasad Joshi 348865c675fSPrasad Joshi sz = 1 << header->l2_bits; 349865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 350865c675fSPrasad Joshi off = ALIGN(f_sz, clust_sz); 351865c675fSPrasad Joshi 3520df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, table, sz * sizeof(u64), off) < 0) 353865c675fSPrasad Joshi return 0; 3540df6b4d9SPekka Enberg 355865c675fSPrasad Joshi return off; 356865c675fSPrasad Joshi } 357865c675fSPrasad Joshi 358865c675fSPrasad Joshi /* 359865c675fSPrasad Joshi * QCOW file might grow during a write operation. Not only data but metadata is 360865c675fSPrasad Joshi * also written at the end of the file. Therefore it is necessary to ensure 3610df6b4d9SPekka Enberg * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to 362865c675fSPrasad Joshi * synchronize the in-core state of QCOW image to disk. 363865c675fSPrasad Joshi * 364865c675fSPrasad Joshi * We also try to restore the image to a consistent state if the metdata 365865c675fSPrasad Joshi * operation fails. The two metadat operations are: level 1 and level 2 table 366865c675fSPrasad Joshi * update. If either of them fails the image is truncated to a consistent state. 367865c675fSPrasad Joshi */ 368b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len) 369865c675fSPrasad Joshi { 370865c675fSPrasad Joshi struct qcow_header *header = q->header; 371865c675fSPrasad Joshi struct qcow_table *table = &q->table; 372*3309045fSPrasad Joshi struct qcow_l2_cache *c; 3730df6b4d9SPekka Enberg bool update_meta; 3740df6b4d9SPekka Enberg u64 clust_start; 3750df6b4d9SPekka Enberg u64 clust_off; 376865c675fSPrasad Joshi u64 clust_sz; 377865c675fSPrasad Joshi u64 l1t_idx; 378865c675fSPrasad Joshi u64 l2t_idx; 3790df6b4d9SPekka Enberg u64 l2t_off; 3800df6b4d9SPekka Enberg u64 l2t_sz; 381865c675fSPrasad Joshi u64 *l2t; 382865c675fSPrasad Joshi u64 f_sz; 3830df6b4d9SPekka Enberg u64 len; 384865c675fSPrasad Joshi u64 t; 385865c675fSPrasad Joshi 386*3309045fSPrasad Joshi c = NULL; 387865c675fSPrasad Joshi l2t_sz = 1 << header->l2_bits; 388865c675fSPrasad Joshi clust_sz = 1 << header->cluster_bits; 389865c675fSPrasad Joshi 390865c675fSPrasad Joshi l1t_idx = get_l1_index(q, offset); 391865c675fSPrasad Joshi if (l1t_idx >= table->table_size) 392865c675fSPrasad Joshi goto error; 393865c675fSPrasad Joshi 394865c675fSPrasad Joshi l2t_idx = get_l2_index(q, offset); 395865c675fSPrasad Joshi if (l2t_idx >= l2t_sz) 396865c675fSPrasad Joshi goto error; 397865c675fSPrasad Joshi 398865c675fSPrasad Joshi clust_off = get_cluster_offset(q, offset); 399865c675fSPrasad Joshi if (clust_off >= clust_sz) 400865c675fSPrasad Joshi goto error; 401865c675fSPrasad Joshi 402865c675fSPrasad Joshi len = clust_sz - clust_off; 403865c675fSPrasad Joshi if (len > src_len) 404865c675fSPrasad Joshi len = src_len; 405865c675fSPrasad Joshi 406865c675fSPrasad Joshi l2t_off = table->l1_table[l1t_idx] & ~header->oflag_mask; 407865c675fSPrasad Joshi if (l2t_off) { 408*3309045fSPrasad Joshi /* read and cache l2 table */ 409*3309045fSPrasad Joshi if (qcow_read_l2_table(q, &l2t, l2t_off) < 0) 410*3309045fSPrasad Joshi goto error; 411865c675fSPrasad Joshi } else { 412*3309045fSPrasad Joshi c = new_cache_table(q, l2t_off); 413*3309045fSPrasad Joshi if (!c) 414*3309045fSPrasad Joshi goto error; 415*3309045fSPrasad Joshi l2t = c->table; 416*3309045fSPrasad Joshi 4170df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 418865c675fSPrasad Joshi f_sz = file_size(q->fd); 419865c675fSPrasad Joshi if (!f_sz) 420*3309045fSPrasad Joshi goto free_cache; 421865c675fSPrasad Joshi 422865c675fSPrasad Joshi /* Write the l2 table of 0's at the end of the file */ 423b1c84095SPekka Enberg l2t_off = qcow_write_l2_table(q, l2t); 424865c675fSPrasad Joshi if (!l2t_off) 425*3309045fSPrasad Joshi goto free_cache; 426865c675fSPrasad Joshi 427865c675fSPrasad Joshi /* Metadata update: update on disk level 1 table */ 428865c675fSPrasad Joshi t = cpu_to_be64(l2t_off); 4290df6b4d9SPekka Enberg 4300df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, &t, sizeof(t), header->l1_table_offset + l1t_idx * sizeof(u64)) < 0) { 431865c675fSPrasad Joshi /* restore file to consistent state */ 432865c675fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 433*3309045fSPrasad Joshi goto free_cache; 4340df6b4d9SPekka Enberg 435*3309045fSPrasad Joshi goto free_cache; 436*3309045fSPrasad Joshi } 437*3309045fSPrasad Joshi 438*3309045fSPrasad Joshi if (cache_table(q, c) < 0) { 439*3309045fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 440*3309045fSPrasad Joshi goto free_cache; 441*3309045fSPrasad Joshi 442*3309045fSPrasad Joshi goto free_cache; 443865c675fSPrasad Joshi } 444865c675fSPrasad Joshi 4450df6b4d9SPekka Enberg /* Update the in-core entry */ 446865c675fSPrasad Joshi table->l1_table[l1t_idx] = l2t_off; 447865c675fSPrasad Joshi } 448865c675fSPrasad Joshi 4490df6b4d9SPekka Enberg /* Capture the state of the consistent QCOW image */ 450865c675fSPrasad Joshi f_sz = file_size(q->fd); 451865c675fSPrasad Joshi if (!f_sz) 452*3309045fSPrasad Joshi goto error; 453865c675fSPrasad Joshi 454*3309045fSPrasad Joshi clust_start = l2t[l2t_idx] & ~header->oflag_mask; 455865c675fSPrasad Joshi if (!clust_start) { 456865c675fSPrasad Joshi clust_start = ALIGN(f_sz, clust_sz); 457865c675fSPrasad Joshi update_meta = true; 4580df6b4d9SPekka Enberg } else 4590df6b4d9SPekka Enberg update_meta = false; 460865c675fSPrasad Joshi 4610df6b4d9SPekka Enberg /* Write actual data */ 462865c675fSPrasad Joshi if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0) 463865c675fSPrasad Joshi goto error; 464865c675fSPrasad Joshi 465865c675fSPrasad Joshi if (update_meta) { 466865c675fSPrasad Joshi t = cpu_to_be64(clust_start); 4670df6b4d9SPekka Enberg if (qcow_pwrite_sync(q->fd, &t, sizeof(t), l2t_off + l2t_idx * sizeof(u64)) < 0) { 4680df6b4d9SPekka Enberg /* Restore the file to consistent state */ 469865c675fSPrasad Joshi if (ftruncate(q->fd, f_sz) < 0) 470865c675fSPrasad Joshi goto error; 4710df6b4d9SPekka Enberg 472865c675fSPrasad Joshi goto error; 473865c675fSPrasad Joshi } 474*3309045fSPrasad Joshi 475*3309045fSPrasad Joshi /* Update the cached level2 entry */ 476*3309045fSPrasad Joshi l2t[l2t_idx] = clust_start; 477865c675fSPrasad Joshi } 4780df6b4d9SPekka Enberg 479865c675fSPrasad Joshi return len; 480*3309045fSPrasad Joshi 481*3309045fSPrasad Joshi free_cache: 482*3309045fSPrasad Joshi free(c); 483865c675fSPrasad Joshi error: 484865c675fSPrasad Joshi return -1; 485865c675fSPrasad Joshi } 486865c675fSPrasad Joshi 487b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 48886835cedSPrasad Joshi { 489865c675fSPrasad Joshi struct qcow *q = disk->priv; 490865c675fSPrasad Joshi struct qcow_header *header = q->header; 491c4acb611SIngo Molnar u32 nr_written; 4920df6b4d9SPekka Enberg char *buf; 493865c675fSPrasad Joshi u64 offset; 494865c675fSPrasad Joshi ssize_t nr; 495865c675fSPrasad Joshi 4960df6b4d9SPekka Enberg buf = src; 4970df6b4d9SPekka Enberg nr_written = 0; 498865c675fSPrasad Joshi offset = sector << SECTOR_SHIFT; 4990df6b4d9SPekka Enberg 5000df6b4d9SPekka Enberg while (nr_written < src_len) { 501865c675fSPrasad Joshi if (offset >= header->size) 5020df6b4d9SPekka Enberg return -1; 503865c675fSPrasad Joshi 504b1c84095SPekka Enberg nr = qcow_write_cluster(q, offset, buf, src_len - nr_written); 505865c675fSPrasad Joshi if (nr < 0) 5060df6b4d9SPekka Enberg return -1; 507865c675fSPrasad Joshi 5080df6b4d9SPekka Enberg nr_written += nr; 509865c675fSPrasad Joshi buf += nr; 510865c675fSPrasad Joshi offset += nr; 511865c675fSPrasad Joshi } 5120df6b4d9SPekka Enberg 51372133dd2SAsias He return nr_written; 51486835cedSPrasad Joshi } 51586835cedSPrasad Joshi 516b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len) 517f10860caSPekka Enberg { 518f10860caSPekka Enberg /* I/O error */ 519b1c84095SPekka Enberg pr_info("%s: no write support\n", __func__); 520f10860caSPekka Enberg return -1; 521f10860caSPekka Enberg } 522f10860caSPekka Enberg 523b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk) 52486835cedSPrasad Joshi { 52586835cedSPrasad Joshi struct qcow *q; 52686835cedSPrasad Joshi 52743835ac9SSasha Levin if (!disk) 52872133dd2SAsias He return 0; 52986835cedSPrasad Joshi 53043835ac9SSasha Levin q = disk->priv; 53186835cedSPrasad Joshi 532*3309045fSPrasad Joshi free_cache(q); 5336c6f79b6SPrasad Joshi free(q->table.l1_table); 53486835cedSPrasad Joshi free(q->header); 53586835cedSPrasad Joshi free(q); 53672133dd2SAsias He 53772133dd2SAsias He return 0; 53886835cedSPrasad Joshi } 53986835cedSPrasad Joshi 540b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = { 541b1c84095SPekka Enberg .read_sector = qcow_read_sector, 542b1c84095SPekka Enberg .write_sector = qcow_nowrite_sector, 543b1c84095SPekka Enberg .close = qcow_disk_close, 544f10860caSPekka Enberg }; 545f10860caSPekka Enberg 546b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = { 547b1c84095SPekka Enberg .read_sector = qcow_read_sector, 548b1c84095SPekka Enberg .write_sector = qcow_write_sector, 549b1c84095SPekka Enberg .close = qcow_disk_close, 55086835cedSPrasad Joshi }; 55186835cedSPrasad Joshi 55286835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q) 55386835cedSPrasad Joshi { 554ad627d62SPekka Enberg struct qcow_header *header = q->header; 55500adcc1bSPrasad Joshi struct qcow_table *table = &q->table; 55600adcc1bSPrasad Joshi u64 i; 55786835cedSPrasad Joshi 558ad627d62SPekka Enberg table->table_size = header->l1_size; 55986835cedSPrasad Joshi 56000adcc1bSPrasad Joshi table->l1_table = calloc(table->table_size, sizeof(u64)); 56100adcc1bSPrasad Joshi if (!table->l1_table) 56286835cedSPrasad Joshi return -1; 56386835cedSPrasad Joshi 56400adcc1bSPrasad Joshi if (pread_in_full(q->fd, table->l1_table, sizeof(u64) * 56500adcc1bSPrasad Joshi table->table_size, header->l1_table_offset) < 0) 56686835cedSPrasad Joshi return -1; 56786835cedSPrasad Joshi 56800adcc1bSPrasad Joshi for (i = 0; i < table->table_size; i++) 56900adcc1bSPrasad Joshi be64_to_cpus(&table->l1_table[i]); 57000adcc1bSPrasad Joshi 57186835cedSPrasad Joshi return 0; 57286835cedSPrasad Joshi } 57386835cedSPrasad Joshi 574ad627d62SPekka Enberg static void *qcow2_read_header(int fd) 57586835cedSPrasad Joshi { 576ad627d62SPekka Enberg struct qcow2_header_disk f_header; 577ad627d62SPekka Enberg struct qcow_header *header; 57886835cedSPrasad Joshi 579ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 58086835cedSPrasad Joshi if (!header) 58186835cedSPrasad Joshi return NULL; 58286835cedSPrasad Joshi 5830657f33dSPrasad Joshi if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) { 5840657f33dSPrasad Joshi free(header); 58586835cedSPrasad Joshi return NULL; 5860657f33dSPrasad Joshi } 58786835cedSPrasad Joshi 588ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 589ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 590ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 591ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 592ad627d62SPekka Enberg be32_to_cpus(&f_header.cluster_bits); 593ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 594ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 595ad627d62SPekka Enberg be32_to_cpus(&f_header.l1_size); 596ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 597ad627d62SPekka Enberg be64_to_cpus(&f_header.refcount_table_offset); 598ad627d62SPekka Enberg be32_to_cpus(&f_header.refcount_table_clusters); 599ad627d62SPekka Enberg be32_to_cpus(&f_header.nb_snapshots); 600ad627d62SPekka Enberg be64_to_cpus(&f_header.snapshots_offset); 601ad627d62SPekka Enberg 602ad627d62SPekka Enberg *header = (struct qcow_header) { 603ad627d62SPekka Enberg .size = f_header.size, 604ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 605ad627d62SPekka Enberg .l1_size = f_header.l1_size, 606ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 607ad627d62SPekka Enberg .l2_bits = f_header.cluster_bits - 3, 608ad627d62SPekka Enberg .oflag_mask = QCOW2_OFLAG_MASK, 609ad627d62SPekka Enberg }; 610ad627d62SPekka Enberg 611ad627d62SPekka Enberg return header; 612ad627d62SPekka Enberg } 613ad627d62SPekka Enberg 614f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly) 615ad627d62SPekka Enberg { 616ad627d62SPekka Enberg struct qcow *q; 617ad627d62SPekka Enberg struct qcow_header *h; 618ad627d62SPekka Enberg struct disk_image *disk_image; 619ad627d62SPekka Enberg 620ad627d62SPekka Enberg q = calloc(1, sizeof(struct qcow)); 621ad627d62SPekka Enberg if (!q) 622ad627d62SPekka Enberg goto error; 623ad627d62SPekka Enberg 624ad627d62SPekka Enberg q->fd = fd; 625*3309045fSPrasad Joshi q->root = RB_ROOT; 626*3309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 627ad627d62SPekka Enberg 628ad627d62SPekka Enberg h = q->header = qcow2_read_header(fd); 629ad627d62SPekka Enberg if (!h) 630ad627d62SPekka Enberg goto error; 631ad627d62SPekka Enberg 632ad627d62SPekka Enberg if (qcow_read_l1_table(q) < 0) 633ad627d62SPekka Enberg goto error; 634ad627d62SPekka Enberg 6357d22135fSAsias He /* 6367d22135fSAsias He * Do not use mmap use read/write instead 6377d22135fSAsias He */ 638f10860caSPekka Enberg if (readonly) 639b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 640f10860caSPekka Enberg else 641b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 642f10860caSPekka Enberg 643ad627d62SPekka Enberg if (!disk_image) 644ad627d62SPekka Enberg goto error; 645ad627d62SPekka Enberg disk_image->priv = q; 646ad627d62SPekka Enberg 647ad627d62SPekka Enberg return disk_image; 648ad627d62SPekka Enberg error: 649ad627d62SPekka Enberg if (!q) 650ad627d62SPekka Enberg return NULL; 651ad627d62SPekka Enberg 652ad627d62SPekka Enberg free(q->table.l1_table); 653ad627d62SPekka Enberg free(q->header); 654ad627d62SPekka Enberg free(q); 655ad627d62SPekka Enberg 656ad627d62SPekka Enberg return NULL; 657ad627d62SPekka Enberg } 658ad627d62SPekka Enberg 659ad627d62SPekka Enberg static bool qcow2_check_image(int fd) 660ad627d62SPekka Enberg { 661ad627d62SPekka Enberg struct qcow2_header_disk f_header; 662ad627d62SPekka Enberg 663ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) 664ad627d62SPekka Enberg return false; 665ad627d62SPekka Enberg 666ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 667ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 668ad627d62SPekka Enberg 669ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 670ad627d62SPekka Enberg return false; 671ad627d62SPekka Enberg 672ad627d62SPekka Enberg if (f_header.version != QCOW2_VERSION) 673ad627d62SPekka Enberg return false; 674ad627d62SPekka Enberg 675ad627d62SPekka Enberg return true; 676ad627d62SPekka Enberg } 677ad627d62SPekka Enberg 678ad627d62SPekka Enberg static void *qcow1_read_header(int fd) 679ad627d62SPekka Enberg { 680ad627d62SPekka Enberg struct qcow1_header_disk f_header; 681ad627d62SPekka Enberg struct qcow_header *header; 682ad627d62SPekka Enberg 683ad627d62SPekka Enberg header = malloc(sizeof(struct qcow_header)); 684ad627d62SPekka Enberg if (!header) 685ad627d62SPekka Enberg return NULL; 686ad627d62SPekka Enberg 687d39cefd2SSasha Levin if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) { 688d39cefd2SSasha Levin free(header); 689ad627d62SPekka Enberg return NULL; 690d39cefd2SSasha Levin } 691ad627d62SPekka Enberg 692ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 693ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 694ad627d62SPekka Enberg be64_to_cpus(&f_header.backing_file_offset); 695ad627d62SPekka Enberg be32_to_cpus(&f_header.backing_file_size); 696ad627d62SPekka Enberg be32_to_cpus(&f_header.mtime); 697ad627d62SPekka Enberg be64_to_cpus(&f_header.size); 698ad627d62SPekka Enberg be32_to_cpus(&f_header.crypt_method); 699ad627d62SPekka Enberg be64_to_cpus(&f_header.l1_table_offset); 700ad627d62SPekka Enberg 701ad627d62SPekka Enberg *header = (struct qcow_header) { 702ad627d62SPekka Enberg .size = f_header.size, 703ad627d62SPekka Enberg .l1_table_offset = f_header.l1_table_offset, 704ad627d62SPekka Enberg .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)), 705ad627d62SPekka Enberg .cluster_bits = f_header.cluster_bits, 706ad627d62SPekka Enberg .l2_bits = f_header.l2_bits, 707ad627d62SPekka Enberg .oflag_mask = QCOW1_OFLAG_MASK, 708ad627d62SPekka Enberg }; 70986835cedSPrasad Joshi 71086835cedSPrasad Joshi return header; 71186835cedSPrasad Joshi } 71286835cedSPrasad Joshi 713f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly) 71486835cedSPrasad Joshi { 71586835cedSPrasad Joshi struct qcow *q; 716ad627d62SPekka Enberg struct qcow_header *h; 71786835cedSPrasad Joshi struct disk_image *disk_image; 71886835cedSPrasad Joshi 71986835cedSPrasad Joshi q = calloc(1, sizeof(struct qcow)); 72086835cedSPrasad Joshi if (!q) 72186835cedSPrasad Joshi goto error; 72286835cedSPrasad Joshi 72386835cedSPrasad Joshi q->fd = fd; 724*3309045fSPrasad Joshi q->root = RB_ROOT; 725*3309045fSPrasad Joshi INIT_LIST_HEAD(&q->lru_list); 72686835cedSPrasad Joshi 72786835cedSPrasad Joshi h = q->header = qcow1_read_header(fd); 72886835cedSPrasad Joshi if (!h) 72986835cedSPrasad Joshi goto error; 73086835cedSPrasad Joshi 73186835cedSPrasad Joshi if (qcow_read_l1_table(q) < 0) 73286835cedSPrasad Joshi goto error; 73386835cedSPrasad Joshi 7347d22135fSAsias He /* 7357d22135fSAsias He * Do not use mmap use read/write instead 7367d22135fSAsias He */ 737f10860caSPekka Enberg if (readonly) 738b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP); 739f10860caSPekka Enberg else 740b1c84095SPekka Enberg disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP); 741f10860caSPekka Enberg 74286835cedSPrasad Joshi if (!disk_image) 74386835cedSPrasad Joshi goto error; 74486835cedSPrasad Joshi disk_image->priv = q; 74586835cedSPrasad Joshi 74686835cedSPrasad Joshi return disk_image; 74786835cedSPrasad Joshi error: 74886835cedSPrasad Joshi if (!q) 74986835cedSPrasad Joshi return NULL; 75086835cedSPrasad Joshi 7516c6f79b6SPrasad Joshi free(q->table.l1_table); 75286835cedSPrasad Joshi free(q->header); 75386835cedSPrasad Joshi free(q); 75486835cedSPrasad Joshi 75586835cedSPrasad Joshi return NULL; 75686835cedSPrasad Joshi } 75786835cedSPrasad Joshi 758ad627d62SPekka Enberg static bool qcow1_check_image(int fd) 75986835cedSPrasad Joshi { 760ad627d62SPekka Enberg struct qcow1_header_disk f_header; 76186835cedSPrasad Joshi 762ad627d62SPekka Enberg if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) 763ad627d62SPekka Enberg return false; 76486835cedSPrasad Joshi 765ad627d62SPekka Enberg be32_to_cpus(&f_header.magic); 766ad627d62SPekka Enberg be32_to_cpus(&f_header.version); 76786835cedSPrasad Joshi 768ad627d62SPekka Enberg if (f_header.magic != QCOW_MAGIC) 769ad627d62SPekka Enberg return false; 77086835cedSPrasad Joshi 771ad627d62SPekka Enberg if (f_header.version != QCOW1_VERSION) 772ad627d62SPekka Enberg return false; 77386835cedSPrasad Joshi 774ad627d62SPekka Enberg return true; 77586835cedSPrasad Joshi } 77686835cedSPrasad Joshi 777f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly) 77886835cedSPrasad Joshi { 779ad627d62SPekka Enberg if (qcow1_check_image(fd)) 780f10860caSPekka Enberg return qcow1_probe(fd, readonly); 781ad627d62SPekka Enberg 782ad627d62SPekka Enberg if (qcow2_check_image(fd)) 783f10860caSPekka Enberg return qcow2_probe(fd, readonly); 784ad627d62SPekka Enberg 785ad627d62SPekka Enberg return NULL; 78686835cedSPrasad Joshi } 787