xref: /kvmtool/disk/qcow.c (revision 3ecac800ec294c0f04d27d033f4cb6a9fbf9b71c)
186835cedSPrasad Joshi #include "kvm/qcow.h"
286835cedSPrasad Joshi 
386835cedSPrasad Joshi #include "kvm/disk-image.h"
486835cedSPrasad Joshi #include "kvm/read-write.h"
5c0799eb9SPekka Enberg #include "kvm/mutex.h"
686835cedSPrasad Joshi #include "kvm/util.h"
786835cedSPrasad Joshi 
886835cedSPrasad Joshi #include <sys/types.h>
986835cedSPrasad Joshi #include <sys/stat.h>
1086835cedSPrasad Joshi #include <stdbool.h>
1186835cedSPrasad Joshi #include <stdlib.h>
1286835cedSPrasad Joshi #include <string.h>
1386835cedSPrasad Joshi #include <unistd.h>
1486835cedSPrasad Joshi #include <fcntl.h>
1586835cedSPrasad Joshi 
1686835cedSPrasad Joshi #include <linux/byteorder.h>
17865c675fSPrasad Joshi #include <linux/kernel.h>
180df6b4d9SPekka Enberg #include <linux/types.h>
1986835cedSPrasad Joshi 
20e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
213309045fSPrasad Joshi {
223309045fSPrasad Joshi 	struct rb_node **link = &(root->rb_node), *parent = NULL;
233309045fSPrasad Joshi 	u64 offset = new->offset;
243309045fSPrasad Joshi 
253309045fSPrasad Joshi 	/* search the tree */
263309045fSPrasad Joshi 	while (*link) {
27473d58ffSPekka Enberg 		struct qcow_l2_table *t;
283309045fSPrasad Joshi 
29473d58ffSPekka Enberg 		t = rb_entry(*link, struct qcow_l2_table, node);
303309045fSPrasad Joshi 		if (!t)
313309045fSPrasad Joshi 			goto error;
323309045fSPrasad Joshi 
333309045fSPrasad Joshi 		parent = *link;
343309045fSPrasad Joshi 
353309045fSPrasad Joshi 		if (t->offset > offset)
363309045fSPrasad Joshi 			link = &(*link)->rb_left;
373309045fSPrasad Joshi 		else if (t->offset < offset)
383309045fSPrasad Joshi 			link = &(*link)->rb_right;
393309045fSPrasad Joshi 		else
403309045fSPrasad Joshi 			goto out;
413309045fSPrasad Joshi 	}
423309045fSPrasad Joshi 
433309045fSPrasad Joshi 	/* add new node */
443309045fSPrasad Joshi 	rb_link_node(&new->node, parent, link);
453309045fSPrasad Joshi 	rb_insert_color(&new->node, root);
463309045fSPrasad Joshi out:
473309045fSPrasad Joshi 	return 0;
483309045fSPrasad Joshi error:
493309045fSPrasad Joshi 	return -1;
503309045fSPrasad Joshi }
513309045fSPrasad Joshi 
52e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset)
533309045fSPrasad Joshi {
543309045fSPrasad Joshi 	struct rb_node *link = root->rb_node;
553309045fSPrasad Joshi 
563309045fSPrasad Joshi 	while (link) {
57473d58ffSPekka Enberg 		struct qcow_l2_table *t;
583309045fSPrasad Joshi 
59473d58ffSPekka Enberg 		t = rb_entry(link, struct qcow_l2_table, node);
603309045fSPrasad Joshi 		if (!t)
613309045fSPrasad Joshi 			goto out;
623309045fSPrasad Joshi 
633309045fSPrasad Joshi 		if (t->offset > offset)
643309045fSPrasad Joshi 			link = link->rb_left;
653309045fSPrasad Joshi 		else if (t->offset < offset)
663309045fSPrasad Joshi 			link = link->rb_right;
673309045fSPrasad Joshi 		else
683309045fSPrasad Joshi 			return t;
693309045fSPrasad Joshi 	}
703309045fSPrasad Joshi out:
713309045fSPrasad Joshi 	return NULL;
723309045fSPrasad Joshi }
733309045fSPrasad Joshi 
74e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t)
753309045fSPrasad Joshi {
767b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
773309045fSPrasad Joshi 	struct list_head *pos, *n;
78473d58ffSPekka Enberg 	struct qcow_l2_table *t;
793309045fSPrasad Joshi 
807b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
813309045fSPrasad Joshi 		/* Remove cache table from the list and RB tree */
823309045fSPrasad Joshi 		list_del(pos);
83473d58ffSPekka Enberg 		t = list_entry(pos, struct qcow_l2_table, list);
843309045fSPrasad Joshi 		rb_erase(&t->node, r);
853309045fSPrasad Joshi 
863309045fSPrasad Joshi 		/* Free the cached node */
873309045fSPrasad Joshi 		free(t);
883309045fSPrasad Joshi 	}
893309045fSPrasad Joshi }
903309045fSPrasad Joshi 
91a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
92a4e46515SPekka Enberg {
93a4e46515SPekka Enberg 	struct qcow_header *header = q->header;
94a4e46515SPekka Enberg 	u64 size;
95a4e46515SPekka Enberg 
96aff88976SPekka Enberg 	if (!c->dirty)
97aff88976SPekka Enberg 		return 0;
98aff88976SPekka Enberg 
99a4e46515SPekka Enberg 	size = 1 << header->l2_bits;
100a4e46515SPekka Enberg 
101aff88976SPekka Enberg 	if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0)
102aff88976SPekka Enberg 		return -1;
103aff88976SPekka Enberg 
104aff88976SPekka Enberg 	c->dirty = 0;
105aff88976SPekka Enberg 
106aff88976SPekka Enberg 	return 0;
107a4e46515SPekka Enberg }
108a4e46515SPekka Enberg 
109473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c)
1103309045fSPrasad Joshi {
1117b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
1127b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
113473d58ffSPekka Enberg 	struct qcow_l2_table *lru;
1143309045fSPrasad Joshi 
1157b4eb530SPekka Enberg 	if (l1t->nr_cached == MAX_CACHE_NODES) {
1163309045fSPrasad Joshi 		/*
1173309045fSPrasad Joshi 		 * The node at the head of the list is least recently used
1183309045fSPrasad Joshi 		 * node. Remove it from the list and replaced with a new node.
1193309045fSPrasad Joshi 		 */
1207b4eb530SPekka Enberg 		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
1213309045fSPrasad Joshi 
122a4e46515SPekka Enberg 		if (qcow_l2_cache_write(q, lru) < 0)
123a4e46515SPekka Enberg 			goto error;
124a4e46515SPekka Enberg 
1253309045fSPrasad Joshi 		/* Remove the node from the cache */
1263309045fSPrasad Joshi 		rb_erase(&lru->node, r);
1273309045fSPrasad Joshi 		list_del_init(&lru->list);
1287b4eb530SPekka Enberg 		l1t->nr_cached--;
1293309045fSPrasad Joshi 
1303309045fSPrasad Joshi 		/* Free the LRUed node */
1313309045fSPrasad Joshi 		free(lru);
1323309045fSPrasad Joshi 	}
1333309045fSPrasad Joshi 
1343309045fSPrasad Joshi 	/* Add new node in RB Tree: Helps in searching faster */
135e94cdf08SPekka Enberg 	if (l2_table_insert(r, c) < 0)
1363309045fSPrasad Joshi 		goto error;
1373309045fSPrasad Joshi 
1383309045fSPrasad Joshi 	/* Add in LRU replacement list */
1397b4eb530SPekka Enberg 	list_add_tail(&c->list, &l1t->lru_list);
1407b4eb530SPekka Enberg 	l1t->nr_cached++;
1413309045fSPrasad Joshi 
1423309045fSPrasad Joshi 	return 0;
1433309045fSPrasad Joshi error:
1443309045fSPrasad Joshi 	return -1;
1453309045fSPrasad Joshi }
1463309045fSPrasad Joshi 
147e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset)
1483309045fSPrasad Joshi {
1497b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
150fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
1513309045fSPrasad Joshi 
152e94cdf08SPekka Enberg 	l2t = l2_table_lookup(&l1t->root, offset);
153fe8bdde0SPekka Enberg 	if (!l2t)
154fe8bdde0SPekka Enberg 		return NULL;
1553309045fSPrasad Joshi 
1563309045fSPrasad Joshi 	/* Update the LRU state, by moving the searched node to list tail */
1577b4eb530SPekka Enberg 	list_move_tail(&l2t->list, &l1t->lru_list);
1583309045fSPrasad Joshi 
159fe8bdde0SPekka Enberg 	return l2t;
1603309045fSPrasad Joshi }
1613309045fSPrasad Joshi 
1623309045fSPrasad Joshi /* Allocates a new node for caching L2 table */
163473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset)
1643309045fSPrasad Joshi {
1653309045fSPrasad Joshi 	struct qcow_header *header = q->header;
166473d58ffSPekka Enberg 	struct qcow_l2_table *c;
1673309045fSPrasad Joshi 	u64 l2t_sz;
1683309045fSPrasad Joshi 	u64 size;
1693309045fSPrasad Joshi 
1703309045fSPrasad Joshi 	l2t_sz = 1 << header->l2_bits;
1713309045fSPrasad Joshi 	size   = sizeof(*c) + l2t_sz * sizeof(u64);
1723309045fSPrasad Joshi 	c      = calloc(1, size);
1733309045fSPrasad Joshi 	if (!c)
1743309045fSPrasad Joshi 		goto out;
1753309045fSPrasad Joshi 
1763309045fSPrasad Joshi 	c->offset = offset;
1773309045fSPrasad Joshi 	RB_CLEAR_NODE(&c->node);
1783309045fSPrasad Joshi 	INIT_LIST_HEAD(&c->list);
1793309045fSPrasad Joshi out:
1803309045fSPrasad Joshi 	return c;
1813309045fSPrasad Joshi }
1823309045fSPrasad Joshi 
183742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset)
18486835cedSPrasad Joshi {
185ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
18686835cedSPrasad Joshi 
18786835cedSPrasad Joshi 	return offset >> (header->l2_bits + header->cluster_bits);
18886835cedSPrasad Joshi }
18986835cedSPrasad Joshi 
190742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset)
19186835cedSPrasad Joshi {
192ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
19386835cedSPrasad Joshi 
19486835cedSPrasad Joshi 	return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1);
19586835cedSPrasad Joshi }
19686835cedSPrasad Joshi 
197742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset)
19886835cedSPrasad Joshi {
199ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
20086835cedSPrasad Joshi 
20186835cedSPrasad Joshi 	return offset & ((1 << header->cluster_bits)-1);
20286835cedSPrasad Joshi }
20386835cedSPrasad Joshi 
204fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset)
2053309045fSPrasad Joshi {
2063309045fSPrasad Joshi 	struct qcow_header *header = q->header;
207fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
2083309045fSPrasad Joshi 	u64 size;
2093309045fSPrasad Joshi 
2103309045fSPrasad Joshi 	size = 1 << header->l2_bits;
2113309045fSPrasad Joshi 
2123309045fSPrasad Joshi 	/* search an entry for offset in cache */
213e94cdf08SPekka Enberg 	l2t = l2_table_search(q, offset);
214fe8bdde0SPekka Enberg 	if (l2t)
215fe8bdde0SPekka Enberg 		return l2t;
2163309045fSPrasad Joshi 
2173309045fSPrasad Joshi 	/* allocate new node for caching l2 table */
218fe8bdde0SPekka Enberg 	l2t = new_cache_table(q, offset);
219fe8bdde0SPekka Enberg 	if (!l2t)
2203309045fSPrasad Joshi 		goto error;
2213309045fSPrasad Joshi 
2223309045fSPrasad Joshi 	/* table not cached: read from the disk */
223fe8bdde0SPekka Enberg 	if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0)
2243309045fSPrasad Joshi 		goto error;
2253309045fSPrasad Joshi 
2263309045fSPrasad Joshi 	/* cache the table */
227fe8bdde0SPekka Enberg 	if (cache_table(q, l2t) < 0)
2283309045fSPrasad Joshi 		goto error;
2293309045fSPrasad Joshi 
230fe8bdde0SPekka Enberg 	return l2t;
2313309045fSPrasad Joshi error:
232fe8bdde0SPekka Enberg 	free(l2t);
233fe8bdde0SPekka Enberg 	return NULL;
2343309045fSPrasad Joshi }
2353309045fSPrasad Joshi 
236b1c84095SPekka Enberg static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len)
23786835cedSPrasad Joshi {
238ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
2393fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
2403fb67b93SPekka Enberg 	struct qcow_l2_table *l2t;
2413dac48d4SPrasad Joshi 	u64 cluster_size;
242742fce76SPrasad Joshi 	u64 clust_offset;
243742fce76SPrasad Joshi 	u64 clust_start;
2443fb67b93SPekka Enberg 	u64 l2t_offset;
245a51948ceSPekka Enberg 	size_t length;
2463fb67b93SPekka Enberg 	u64 l2t_size;
247742fce76SPrasad Joshi 	u64 l1_idx;
248742fce76SPrasad Joshi 	u64 l2_idx;
24986835cedSPrasad Joshi 
250dae803fbSPekka Enberg 	cluster_size = 1 << header->cluster_bits;
25186835cedSPrasad Joshi 
252c5e0624bSPrasad Joshi 	l1_idx = get_l1_index(q, offset);
2533fb67b93SPekka Enberg 	if (l1_idx >= l1t->table_size)
254c0799eb9SPekka Enberg 		return -1;
25586835cedSPrasad Joshi 
2563dac48d4SPrasad Joshi 	clust_offset = get_cluster_offset(q, offset);
2573dac48d4SPrasad Joshi 	if (clust_offset >= cluster_size)
258c0799eb9SPekka Enberg 		return -1;
2593dac48d4SPrasad Joshi 
2603dac48d4SPrasad Joshi 	length = cluster_size - clust_offset;
2613dac48d4SPrasad Joshi 	if (length > dst_len)
2623dac48d4SPrasad Joshi 		length = dst_len;
2633dac48d4SPrasad Joshi 
264c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
265b2ebe61bSPekka Enberg 
2663fb67b93SPekka Enberg 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
2673fb67b93SPekka Enberg 	if (l2t_offset & QCOW_OFLAG_COMPRESSED) {
268b2ebe61bSPekka Enberg 		pr_warning("compressed sectors are not supported");
269b2ebe61bSPekka Enberg 		goto out_error;
270b2ebe61bSPekka Enberg 	}
271b2ebe61bSPekka Enberg 
2723fb67b93SPekka Enberg 	l2t_offset &= QCOW_OFFSET_MASK;
2733fb67b93SPekka Enberg 	if (!l2t_offset)
2743dac48d4SPrasad Joshi 		goto zero_cluster;
27586835cedSPrasad Joshi 
2763fb67b93SPekka Enberg 	l2t_size = 1 << header->l2_bits;
27786835cedSPrasad Joshi 
2783309045fSPrasad Joshi 	/* read and cache level 2 table */
2793fb67b93SPekka Enberg 	l2t = qcow_read_l2_table(q, l2t_offset);
2803fb67b93SPekka Enberg 	if (!l2t)
281b6edb0ecSSasha Levin 		goto out_error;
28286835cedSPrasad Joshi 
283c5e0624bSPrasad Joshi 	l2_idx = get_l2_index(q, offset);
2843fb67b93SPekka Enberg 	if (l2_idx >= l2t_size)
285b6edb0ecSSasha Levin 		goto out_error;
28686835cedSPrasad Joshi 
2873fb67b93SPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
288b2ebe61bSPekka Enberg 	if (clust_start & QCOW_OFLAG_COMPRESSED) {
289b2ebe61bSPekka Enberg 		pr_warning("compressed sectors are not supported");
290b2ebe61bSPekka Enberg 		goto out_error;
291b2ebe61bSPekka Enberg 	}
292b2ebe61bSPekka Enberg 
293b2ebe61bSPekka Enberg 	clust_start &= QCOW_OFFSET_MASK;
29486835cedSPrasad Joshi 	if (!clust_start)
2953dac48d4SPrasad Joshi 		goto zero_cluster;
29686835cedSPrasad Joshi 
297c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
29886835cedSPrasad Joshi 
299c0799eb9SPekka Enberg 	if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0)
300c0799eb9SPekka Enberg 		return -1;
301c0799eb9SPekka Enberg 
3023dac48d4SPrasad Joshi 	return length;
30386835cedSPrasad Joshi 
304179b71f0SPekka Enberg zero_cluster:
305c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
306179b71f0SPekka Enberg 	memset(dst, 0, length);
307c0799eb9SPekka Enberg 	return length;
308179b71f0SPekka Enberg 
30986835cedSPrasad Joshi out_error:
310c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
311179b71f0SPekka Enberg 	length = -1;
312c0799eb9SPekka Enberg 	return -1;
3133dac48d4SPrasad Joshi }
314b6edb0ecSSasha Levin 
315b1c84095SPekka Enberg static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len)
3163dac48d4SPrasad Joshi {
31743835ac9SSasha Levin 	struct qcow *q = disk->priv;
318ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
319d8eea993SPekka Enberg 	u32 nr_read;
3200df6b4d9SPekka Enberg 	u64 offset;
3210df6b4d9SPekka Enberg 	char *buf;
3223dac48d4SPrasad Joshi 	u32 nr;
3233dac48d4SPrasad Joshi 
3240df6b4d9SPekka Enberg 	buf		= dst;
325d8eea993SPekka Enberg 	nr_read		= 0;
3260df6b4d9SPekka Enberg 
327d8eea993SPekka Enberg 	while (nr_read < dst_len) {
3283dac48d4SPrasad Joshi 		offset		= sector << SECTOR_SHIFT;
3293dac48d4SPrasad Joshi 		if (offset >= header->size)
3300df6b4d9SPekka Enberg 			return -1;
3313dac48d4SPrasad Joshi 
332b1c84095SPekka Enberg 		nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read);
333a51948ceSPekka Enberg 		if (nr <= 0)
3340df6b4d9SPekka Enberg 			return -1;
3353dac48d4SPrasad Joshi 
336d8eea993SPekka Enberg 		nr_read		+= nr;
3373dac48d4SPrasad Joshi 		buf		+= nr;
3383dac48d4SPrasad Joshi 		sector		+= (nr >> SECTOR_SHIFT);
3393dac48d4SPrasad Joshi 	}
3400df6b4d9SPekka Enberg 
34172133dd2SAsias He 	return dst_len;
34286835cedSPrasad Joshi }
34386835cedSPrasad Joshi 
344865c675fSPrasad Joshi static inline u64 file_size(int fd)
345865c675fSPrasad Joshi {
346865c675fSPrasad Joshi 	struct stat st;
3470df6b4d9SPekka Enberg 
348865c675fSPrasad Joshi 	if (fstat(fd, &st) < 0)
349865c675fSPrasad Joshi 		return 0;
3500df6b4d9SPekka Enberg 
351865c675fSPrasad Joshi 	return st.st_size;
352865c675fSPrasad Joshi }
353865c675fSPrasad Joshi 
3540df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset)
355865c675fSPrasad Joshi {
356865c675fSPrasad Joshi 	if (pwrite_in_full(fd, buf, count, offset) < 0)
357865c675fSPrasad Joshi 		return -1;
3580df6b4d9SPekka Enberg 
3597d94a719SPekka Enberg 	return fdatasync(fd);
360865c675fSPrasad Joshi }
361865c675fSPrasad Joshi 
362865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */
363b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table)
364865c675fSPrasad Joshi {
365865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
366865c675fSPrasad Joshi 	u64 clust_sz;
367865c675fSPrasad Joshi 	u64 f_sz;
3680df6b4d9SPekka Enberg 	u64 off;
3690df6b4d9SPekka Enberg 	u64 sz;
370865c675fSPrasad Joshi 
371865c675fSPrasad Joshi 	f_sz		= file_size(q->fd);
372865c675fSPrasad Joshi 	if (!f_sz)
373865c675fSPrasad Joshi 		return 0;
374865c675fSPrasad Joshi 
375865c675fSPrasad Joshi 	sz		= 1 << header->l2_bits;
376865c675fSPrasad Joshi 	clust_sz	= 1 << header->cluster_bits;
377865c675fSPrasad Joshi 	off		= ALIGN(f_sz, clust_sz);
378865c675fSPrasad Joshi 
3796fe151aeSPekka Enberg 	if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0)
380865c675fSPrasad Joshi 		return 0;
3810df6b4d9SPekka Enberg 
382865c675fSPrasad Joshi 	return off;
383865c675fSPrasad Joshi }
384865c675fSPrasad Joshi 
385*3ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft)
386*3ecac800SPekka Enberg {
387*3ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
388*3ecac800SPekka Enberg 	struct list_head *pos, *n;
389*3ecac800SPekka Enberg 	struct qcow_refcount_block *t;
390*3ecac800SPekka Enberg 
391*3ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
392*3ecac800SPekka Enberg 		list_del(pos);
393*3ecac800SPekka Enberg 		t = list_entry(pos, struct qcow_refcount_block, list);
394*3ecac800SPekka Enberg 		rb_erase(&t->node, r);
395*3ecac800SPekka Enberg 
396*3ecac800SPekka Enberg 		free(t);
397*3ecac800SPekka Enberg 	}
398*3ecac800SPekka Enberg }
399*3ecac800SPekka Enberg 
400*3ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new)
401*3ecac800SPekka Enberg {
402*3ecac800SPekka Enberg 	struct rb_node **link = &(root->rb_node), *parent = NULL;
403*3ecac800SPekka Enberg 	u64 offset = new->offset;
404*3ecac800SPekka Enberg 
405*3ecac800SPekka Enberg 	/* search the tree */
406*3ecac800SPekka Enberg 	while (*link) {
407*3ecac800SPekka Enberg 		struct qcow_refcount_block *t;
408*3ecac800SPekka Enberg 
409*3ecac800SPekka Enberg 		t = rb_entry(*link, struct qcow_refcount_block, node);
410*3ecac800SPekka Enberg 		if (!t)
411*3ecac800SPekka Enberg 			goto error;
412*3ecac800SPekka Enberg 
413*3ecac800SPekka Enberg 		parent = *link;
414*3ecac800SPekka Enberg 
415*3ecac800SPekka Enberg 		if (t->offset > offset)
416*3ecac800SPekka Enberg 			link = &(*link)->rb_left;
417*3ecac800SPekka Enberg 		else if (t->offset < offset)
418*3ecac800SPekka Enberg 			link = &(*link)->rb_right;
419*3ecac800SPekka Enberg 		else
420*3ecac800SPekka Enberg 			goto out;
421*3ecac800SPekka Enberg 	}
422*3ecac800SPekka Enberg 
423*3ecac800SPekka Enberg 	/* add new node */
424*3ecac800SPekka Enberg 	rb_link_node(&new->node, parent, link);
425*3ecac800SPekka Enberg 	rb_insert_color(&new->node, root);
426*3ecac800SPekka Enberg out:
427*3ecac800SPekka Enberg 	return 0;
428*3ecac800SPekka Enberg error:
429*3ecac800SPekka Enberg 	return -1;
430*3ecac800SPekka Enberg }
431*3ecac800SPekka Enberg 
432*3ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb)
433*3ecac800SPekka Enberg {
434*3ecac800SPekka Enberg 	if (!rfb->dirty)
435*3ecac800SPekka Enberg 		return 0;
436*3ecac800SPekka Enberg 
437*3ecac800SPekka Enberg 	if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0)
438*3ecac800SPekka Enberg 		return -1;
439*3ecac800SPekka Enberg 
440*3ecac800SPekka Enberg 	rfb->dirty = 0;
441*3ecac800SPekka Enberg 
442*3ecac800SPekka Enberg 	return 0;
443*3ecac800SPekka Enberg }
444*3ecac800SPekka Enberg 
445*3ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
446*3ecac800SPekka Enberg {
447*3ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
448*3ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
449*3ecac800SPekka Enberg 	struct qcow_refcount_block *lru;
450*3ecac800SPekka Enberg 
451*3ecac800SPekka Enberg 	if (rft->nr_cached == MAX_CACHE_NODES) {
452*3ecac800SPekka Enberg 		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
453*3ecac800SPekka Enberg 
454*3ecac800SPekka Enberg 		if (write_refcount_block(q, lru) < 0)
455*3ecac800SPekka Enberg 			goto error;
456*3ecac800SPekka Enberg 
457*3ecac800SPekka Enberg 		rb_erase(&lru->node, r);
458*3ecac800SPekka Enberg 		list_del_init(&lru->list);
459*3ecac800SPekka Enberg 		rft->nr_cached--;
460*3ecac800SPekka Enberg 
461*3ecac800SPekka Enberg 		free(lru);
462*3ecac800SPekka Enberg 	}
463*3ecac800SPekka Enberg 
464*3ecac800SPekka Enberg 	if (refcount_block_insert(r, c) < 0)
465*3ecac800SPekka Enberg 		goto error;
466*3ecac800SPekka Enberg 
467*3ecac800SPekka Enberg 	list_add_tail(&c->list, &rft->lru_list);
468*3ecac800SPekka Enberg 	rft->nr_cached++;
469*3ecac800SPekka Enberg 
470*3ecac800SPekka Enberg 	return 0;
471*3ecac800SPekka Enberg error:
472*3ecac800SPekka Enberg 	return -1;
473*3ecac800SPekka Enberg }
474*3ecac800SPekka Enberg 
475*3ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset)
476*3ecac800SPekka Enberg {
477*3ecac800SPekka Enberg 	struct qcow_header *header = q->header;
478*3ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
479*3ecac800SPekka Enberg 	u64 cluster_size;
480*3ecac800SPekka Enberg 
481*3ecac800SPekka Enberg 	cluster_size = 1 << header->cluster_bits;
482*3ecac800SPekka Enberg 
483*3ecac800SPekka Enberg 	rfb = malloc(sizeof *rfb + cluster_size);
484*3ecac800SPekka Enberg 	if (!rfb)
485*3ecac800SPekka Enberg 		return NULL;
486*3ecac800SPekka Enberg 
487*3ecac800SPekka Enberg 	rfb->offset = rfb_offset;
488*3ecac800SPekka Enberg 	rfb->size = cluster_size / sizeof(u16);
489*3ecac800SPekka Enberg 	RB_CLEAR_NODE(&rfb->node);
490*3ecac800SPekka Enberg 	INIT_LIST_HEAD(&rfb->list);
491*3ecac800SPekka Enberg 
492*3ecac800SPekka Enberg 	return rfb;
493*3ecac800SPekka Enberg }
494*3ecac800SPekka Enberg 
495*3ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset)
496*3ecac800SPekka Enberg {
497*3ecac800SPekka Enberg 	struct rb_node *link = root->rb_node;
498*3ecac800SPekka Enberg 
499*3ecac800SPekka Enberg 	while (link) {
500*3ecac800SPekka Enberg 		struct qcow_refcount_block *t;
501*3ecac800SPekka Enberg 
502*3ecac800SPekka Enberg 		t = rb_entry(link, struct qcow_refcount_block, node);
503*3ecac800SPekka Enberg 		if (!t)
504*3ecac800SPekka Enberg 			goto out;
505*3ecac800SPekka Enberg 
506*3ecac800SPekka Enberg 		if (t->offset > offset)
507*3ecac800SPekka Enberg 			link = link->rb_left;
508*3ecac800SPekka Enberg 		else if (t->offset < offset)
509*3ecac800SPekka Enberg 			link = link->rb_right;
510*3ecac800SPekka Enberg 		else
511*3ecac800SPekka Enberg 			return t;
512*3ecac800SPekka Enberg 	}
513*3ecac800SPekka Enberg out:
514*3ecac800SPekka Enberg 	return NULL;
515*3ecac800SPekka Enberg }
516*3ecac800SPekka Enberg 
517*3ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset)
518*3ecac800SPekka Enberg {
519*3ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
520*3ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
521*3ecac800SPekka Enberg 
522*3ecac800SPekka Enberg 	rfb = refcount_block_lookup(&rft->root, offset);
523*3ecac800SPekka Enberg 	if (!rfb)
524*3ecac800SPekka Enberg 		return NULL;
525*3ecac800SPekka Enberg 
526*3ecac800SPekka Enberg 	/* Update the LRU state, by moving the searched node to list tail */
527*3ecac800SPekka Enberg 	list_move_tail(&rfb->list, &rft->lru_list);
528*3ecac800SPekka Enberg 
529*3ecac800SPekka Enberg 	return rfb;
530*3ecac800SPekka Enberg }
531*3ecac800SPekka Enberg 
532*3ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx)
533*3ecac800SPekka Enberg {
534*3ecac800SPekka Enberg 	struct qcow_header *header = q->header;
535*3ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
536*3ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
537*3ecac800SPekka Enberg 	u64 rfb_offset;
538*3ecac800SPekka Enberg 	u64 rft_idx;
539*3ecac800SPekka Enberg 
540*3ecac800SPekka Enberg 	rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT);
541*3ecac800SPekka Enberg 	if (rft_idx >= rft->rf_size)
542*3ecac800SPekka Enberg 		return NULL;
543*3ecac800SPekka Enberg 
544*3ecac800SPekka Enberg 	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
545*3ecac800SPekka Enberg 
546*3ecac800SPekka Enberg 	rfb = refcount_block_search(q, rfb_offset);
547*3ecac800SPekka Enberg 	if (rfb)
548*3ecac800SPekka Enberg 		return rfb;
549*3ecac800SPekka Enberg 
550*3ecac800SPekka Enberg 	rfb = new_refcount_block(q, rfb_offset);
551*3ecac800SPekka Enberg 	if (!rfb)
552*3ecac800SPekka Enberg 		return NULL;
553*3ecac800SPekka Enberg 
554*3ecac800SPekka Enberg 	if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0)
555*3ecac800SPekka Enberg 		goto error_free_rfb;
556*3ecac800SPekka Enberg 
557*3ecac800SPekka Enberg 	if (cache_refcount_block(q, rfb) < 0)
558*3ecac800SPekka Enberg 		goto error_free_rfb;
559*3ecac800SPekka Enberg 
560*3ecac800SPekka Enberg 	return rfb;
561*3ecac800SPekka Enberg 
562*3ecac800SPekka Enberg error_free_rfb:
563*3ecac800SPekka Enberg 	free(rfb);
564*3ecac800SPekka Enberg 
565*3ecac800SPekka Enberg 	return NULL;
566*3ecac800SPekka Enberg }
567*3ecac800SPekka Enberg 
568865c675fSPrasad Joshi /*
569865c675fSPrasad Joshi  * QCOW file might grow during a write operation. Not only data but metadata is
570865c675fSPrasad Joshi  * also written at the end of the file. Therefore it is necessary to ensure
5710df6b4d9SPekka Enberg  * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
572865c675fSPrasad Joshi  * synchronize the in-core state of QCOW image to disk.
573865c675fSPrasad Joshi  *
574865c675fSPrasad Joshi  * We also try to restore the image to a consistent state if the metdata
575865c675fSPrasad Joshi  * operation fails. The two metadat operations are: level 1 and level 2 table
576865c675fSPrasad Joshi  * update. If either of them fails the image is truncated to a consistent state.
577865c675fSPrasad Joshi  */
578b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
579865c675fSPrasad Joshi {
580865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
5813fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
582fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
5830df6b4d9SPekka Enberg 	u64 clust_start;
584*3ecac800SPekka Enberg 	u64 clust_flags;
5853fb67b93SPekka Enberg 	u64 l2t_offset;
5860df6b4d9SPekka Enberg 	u64 clust_off;
5873fb67b93SPekka Enberg 	u64 l2t_size;
588865c675fSPrasad Joshi 	u64 clust_sz;
589865c675fSPrasad Joshi 	u64 l1t_idx;
590865c675fSPrasad Joshi 	u64 l2t_idx;
591865c675fSPrasad Joshi 	u64 f_sz;
5920df6b4d9SPekka Enberg 	u64 len;
593865c675fSPrasad Joshi 
594fe8bdde0SPekka Enberg 	l2t		= NULL;
5953fb67b93SPekka Enberg 	l2t_size	= 1 << header->l2_bits;
596865c675fSPrasad Joshi 	clust_sz	= 1 << header->cluster_bits;
597865c675fSPrasad Joshi 
598865c675fSPrasad Joshi 	l1t_idx = get_l1_index(q, offset);
5993fb67b93SPekka Enberg 	if (l1t_idx >= l1t->table_size)
600c0799eb9SPekka Enberg 		return -1;
601865c675fSPrasad Joshi 
602865c675fSPrasad Joshi 	l2t_idx = get_l2_index(q, offset);
6033fb67b93SPekka Enberg 	if (l2t_idx >= l2t_size)
604c0799eb9SPekka Enberg 		return -1;
605865c675fSPrasad Joshi 
606865c675fSPrasad Joshi 	clust_off = get_cluster_offset(q, offset);
607865c675fSPrasad Joshi 	if (clust_off >= clust_sz)
608c0799eb9SPekka Enberg 		return -1;
609865c675fSPrasad Joshi 
610865c675fSPrasad Joshi 	len = clust_sz - clust_off;
611865c675fSPrasad Joshi 	if (len > src_len)
612865c675fSPrasad Joshi 		len = src_len;
613865c675fSPrasad Joshi 
614c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
615c0799eb9SPekka Enberg 
6163fb67b93SPekka Enberg 	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
6173fb67b93SPekka Enberg 	if (l2t_offset & QCOW_OFLAG_COMPRESSED) {
618121dd76eSPekka Enberg 		pr_warning("compressed clusters are not supported");
619121dd76eSPekka Enberg 		goto error;
620121dd76eSPekka Enberg 	}
6213fb67b93SPekka Enberg 	if (!(l2t_offset & QCOW_OFLAG_COPIED)) {
622*3ecac800SPekka Enberg 		pr_warning("L2 copy-on-write clusters are not supported");
623b2ebe61bSPekka Enberg 		goto error;
624b2ebe61bSPekka Enberg 	}
625b2ebe61bSPekka Enberg 
6263fb67b93SPekka Enberg 	l2t_offset &= QCOW_OFFSET_MASK;
6273fb67b93SPekka Enberg 	if (l2t_offset) {
6283309045fSPrasad Joshi 		/* read and cache l2 table */
6293fb67b93SPekka Enberg 		l2t = qcow_read_l2_table(q, l2t_offset);
630fe8bdde0SPekka Enberg 		if (!l2t)
6313309045fSPrasad Joshi 			goto error;
632865c675fSPrasad Joshi 	} else {
6333fb67b93SPekka Enberg 		l2t = new_cache_table(q, l2t_offset);
634fe8bdde0SPekka Enberg 		if (!l2t)
6353309045fSPrasad Joshi 			goto error;
6363309045fSPrasad Joshi 
6370df6b4d9SPekka Enberg 		/* Capture the state of the consistent QCOW image */
638865c675fSPrasad Joshi 		f_sz = file_size(q->fd);
639865c675fSPrasad Joshi 		if (!f_sz)
6403309045fSPrasad Joshi 			goto free_cache;
641865c675fSPrasad Joshi 
642865c675fSPrasad Joshi 		/* Write the l2 table of 0's at the end of the file */
6433fb67b93SPekka Enberg 		l2t_offset = qcow_write_l2_table(q, l2t->table);
6443fb67b93SPekka Enberg 		if (!l2t_offset)
6453309045fSPrasad Joshi 			goto free_cache;
646865c675fSPrasad Joshi 
647fe8bdde0SPekka Enberg 		if (cache_table(q, l2t) < 0) {
6483309045fSPrasad Joshi 			if (ftruncate(q->fd, f_sz) < 0)
6493309045fSPrasad Joshi 				goto free_cache;
6503309045fSPrasad Joshi 
6513309045fSPrasad Joshi 			goto free_cache;
652865c675fSPrasad Joshi 		}
653865c675fSPrasad Joshi 
6540df6b4d9SPekka Enberg 		/* Update the in-core entry */
6553fb67b93SPekka Enberg 		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
656865c675fSPrasad Joshi 	}
657865c675fSPrasad Joshi 
6580df6b4d9SPekka Enberg 	/* Capture the state of the consistent QCOW image */
659865c675fSPrasad Joshi 	f_sz		= file_size(q->fd);
660865c675fSPrasad Joshi 	if (!f_sz)
6613309045fSPrasad Joshi 		goto error;
662865c675fSPrasad Joshi 
663b2ebe61bSPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
664*3ecac800SPekka Enberg 
665*3ecac800SPekka Enberg 	clust_flags = clust_start & QCOW_OFLAGS_MASK;
666*3ecac800SPekka Enberg 	if (clust_flags & QCOW_OFLAG_COMPRESSED) {
667121dd76eSPekka Enberg 		pr_warning("compressed clusters are not supported");
668121dd76eSPekka Enberg 		goto error;
669121dd76eSPekka Enberg 	}
670b2ebe61bSPekka Enberg 
671b2ebe61bSPekka Enberg 	clust_start &= QCOW_OFFSET_MASK;
672865c675fSPrasad Joshi 	if (!clust_start) {
673865c675fSPrasad Joshi 		clust_start		= ALIGN(f_sz, clust_sz);
674*3ecac800SPekka Enberg 		l2t->table[l2t_idx]	= cpu_to_be64(clust_start | QCOW_OFLAG_COPIED);
675aff88976SPekka Enberg 		l2t->dirty		= 1;
676865c675fSPrasad Joshi 	}
6770df6b4d9SPekka Enberg 
678*3ecac800SPekka Enberg 	if (!(clust_flags & QCOW_OFLAG_COPIED)) {
679*3ecac800SPekka Enberg 		struct qcow_refcount_block *rfb = NULL;
680*3ecac800SPekka Enberg 		u16 clust_refcount;
681*3ecac800SPekka Enberg 		u64 clust_idx;
682*3ecac800SPekka Enberg 		u64 rfb_idx;
683*3ecac800SPekka Enberg 
684*3ecac800SPekka Enberg 		clust_idx = (clust_start & QCOW_OFFSET_MASK) >> (header->cluster_bits);
685*3ecac800SPekka Enberg 
686*3ecac800SPekka Enberg 		rfb = qcow_read_refcount_block(q, clust_idx);
687*3ecac800SPekka Enberg 		if (!rfb) {
688*3ecac800SPekka Enberg 			pr_warning("L1: error while reading refcount table");
689*3ecac800SPekka Enberg 			goto error;
690*3ecac800SPekka Enberg 		}
691*3ecac800SPekka Enberg 
692*3ecac800SPekka Enberg 		rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
693*3ecac800SPekka Enberg 		if (rfb_idx >= rfb->size) {
694*3ecac800SPekka Enberg 			pr_warning("L1: refcount block index out of bounds");
695*3ecac800SPekka Enberg 			goto error;
696*3ecac800SPekka Enberg 		}
697*3ecac800SPekka Enberg 
698*3ecac800SPekka Enberg 		clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
699*3ecac800SPekka Enberg 		if (!clust_refcount) {
700*3ecac800SPekka Enberg 			clust_refcount = 1;
701*3ecac800SPekka Enberg 			rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
702*3ecac800SPekka Enberg 			rfb->dirty = 1;
703*3ecac800SPekka Enberg 		}
704*3ecac800SPekka Enberg 
705*3ecac800SPekka Enberg 		if (clust_refcount > 1) {
706*3ecac800SPekka Enberg 			pr_warning("L1 copy-on-write clusters are not supported");
707*3ecac800SPekka Enberg 			goto error;
708*3ecac800SPekka Enberg 		}
709*3ecac800SPekka Enberg 	}
710*3ecac800SPekka Enberg 
711c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
712c0799eb9SPekka Enberg 
713a4e46515SPekka Enberg 	/* Write actual data */
714a4e46515SPekka Enberg 	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
715a4e46515SPekka Enberg 		return -1;
716a4e46515SPekka Enberg 
717865c675fSPrasad Joshi 	return len;
7183309045fSPrasad Joshi 
7193309045fSPrasad Joshi free_cache:
720fe8bdde0SPekka Enberg 	free(l2t);
721865c675fSPrasad Joshi error:
722c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
723865c675fSPrasad Joshi 	return -1;
724865c675fSPrasad Joshi }
725865c675fSPrasad Joshi 
726b1c84095SPekka Enberg static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len)
72786835cedSPrasad Joshi {
728865c675fSPrasad Joshi 	struct qcow *q = disk->priv;
729865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
730c4acb611SIngo Molnar 	u32 nr_written;
7310df6b4d9SPekka Enberg 	char *buf;
732865c675fSPrasad Joshi 	u64 offset;
733865c675fSPrasad Joshi 	ssize_t nr;
734865c675fSPrasad Joshi 
7350df6b4d9SPekka Enberg 	buf		= src;
7360df6b4d9SPekka Enberg 	nr_written	= 0;
737865c675fSPrasad Joshi 	offset		= sector << SECTOR_SHIFT;
7380df6b4d9SPekka Enberg 
7390df6b4d9SPekka Enberg 	while (nr_written < src_len) {
740865c675fSPrasad Joshi 		if (offset >= header->size)
7410df6b4d9SPekka Enberg 			return -1;
742865c675fSPrasad Joshi 
743b1c84095SPekka Enberg 		nr = qcow_write_cluster(q, offset, buf, src_len - nr_written);
744865c675fSPrasad Joshi 		if (nr < 0)
7450df6b4d9SPekka Enberg 			return -1;
746865c675fSPrasad Joshi 
7470df6b4d9SPekka Enberg 		nr_written	+= nr;
748865c675fSPrasad Joshi 		buf		+= nr;
749865c675fSPrasad Joshi 		offset		+= nr;
750865c675fSPrasad Joshi 	}
7510df6b4d9SPekka Enberg 
75272133dd2SAsias He 	return nr_written;
75386835cedSPrasad Joshi }
75486835cedSPrasad Joshi 
755b1c84095SPekka Enberg static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len)
756f10860caSPekka Enberg {
757f10860caSPekka Enberg 	/* I/O error */
758b1c84095SPekka Enberg 	pr_info("%s: no write support\n", __func__);
759f10860caSPekka Enberg 	return -1;
760f10860caSPekka Enberg }
761f10860caSPekka Enberg 
762659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk)
763659f4186SPekka Enberg {
76473984b11SPekka Enberg 	struct qcow *q = disk->priv;
765*3ecac800SPekka Enberg 	struct qcow_refcount_table *rft;
76673984b11SPekka Enberg 	struct qcow_header *header;
767a4e46515SPekka Enberg 	struct list_head *pos, *n;
7687b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
76973984b11SPekka Enberg 
77073984b11SPekka Enberg 	header = q->header;
7717b4eb530SPekka Enberg 	l1t = &q->table;
772*3ecac800SPekka Enberg 	rft = &q->refcount_table;
77373984b11SPekka Enberg 
774a4e46515SPekka Enberg 	mutex_lock(&q->mutex);
775a4e46515SPekka Enberg 
776*3ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
777*3ecac800SPekka Enberg 		struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list);
778*3ecac800SPekka Enberg 
779*3ecac800SPekka Enberg 		if (write_refcount_block(q, c) < 0)
780*3ecac800SPekka Enberg 			goto error_unlock;
781*3ecac800SPekka Enberg 	}
782*3ecac800SPekka Enberg 
783*3ecac800SPekka Enberg 	if (fdatasync(disk->fd) < 0)
784*3ecac800SPekka Enberg 		goto error_unlock;
785*3ecac800SPekka Enberg 
7867b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
787a4e46515SPekka Enberg 		struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
788a4e46515SPekka Enberg 
789a4e46515SPekka Enberg 		if (qcow_l2_cache_write(q, c) < 0)
790a4e46515SPekka Enberg 			goto error_unlock;
791a4e46515SPekka Enberg 	}
792a4e46515SPekka Enberg 
793a4e46515SPekka Enberg 	if (fdatasync(disk->fd) < 0)
794a4e46515SPekka Enberg 		goto error_unlock;
795a4e46515SPekka Enberg 
7967b4eb530SPekka Enberg 	if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0)
797a4e46515SPekka Enberg 		goto error_unlock;
798a4e46515SPekka Enberg 
799a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
80073984b11SPekka Enberg 
801659f4186SPekka Enberg 	return fsync(disk->fd);
802a4e46515SPekka Enberg 
803a4e46515SPekka Enberg error_unlock:
804a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
805a4e46515SPekka Enberg 	return -1;
806659f4186SPekka Enberg }
807659f4186SPekka Enberg 
808b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk)
80986835cedSPrasad Joshi {
81086835cedSPrasad Joshi 	struct qcow *q;
81186835cedSPrasad Joshi 
81243835ac9SSasha Levin 	if (!disk)
81372133dd2SAsias He 		return 0;
81486835cedSPrasad Joshi 
81543835ac9SSasha Levin 	q = disk->priv;
81686835cedSPrasad Joshi 
817*3ecac800SPekka Enberg 	refcount_table_free_cache(&q->refcount_table);
818e94cdf08SPekka Enberg 	l1_table_free_cache(&q->table);
819*3ecac800SPekka Enberg 	free(q->refcount_table.rf_table);
8206c6f79b6SPrasad Joshi 	free(q->table.l1_table);
82186835cedSPrasad Joshi 	free(q->header);
82286835cedSPrasad Joshi 	free(q);
82372133dd2SAsias He 
82472133dd2SAsias He 	return 0;
82586835cedSPrasad Joshi }
82686835cedSPrasad Joshi 
827b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = {
828b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
829b1c84095SPekka Enberg 	.write_sector		= qcow_nowrite_sector,
830b1c84095SPekka Enberg 	.close			= qcow_disk_close,
831f10860caSPekka Enberg };
832f10860caSPekka Enberg 
833b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = {
834b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
835b1c84095SPekka Enberg 	.write_sector		= qcow_write_sector,
836659f4186SPekka Enberg 	.flush			= qcow_disk_flush,
837b1c84095SPekka Enberg 	.close			= qcow_disk_close,
83886835cedSPrasad Joshi };
83986835cedSPrasad Joshi 
840*3ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q)
841*3ecac800SPekka Enberg {
842*3ecac800SPekka Enberg 	struct qcow_header *header = q->header;
843*3ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
844*3ecac800SPekka Enberg 	u64 cluster_size;
845*3ecac800SPekka Enberg 
846*3ecac800SPekka Enberg 	cluster_size = 1 << header->cluster_bits;
847*3ecac800SPekka Enberg 
848*3ecac800SPekka Enberg 	rft->rf_size = (header->refcount_table_size * cluster_size) / sizeof(u64);
849*3ecac800SPekka Enberg 
850*3ecac800SPekka Enberg 	rft->rf_table = calloc(rft->rf_size, sizeof(u64));
851*3ecac800SPekka Enberg 	if (!rft->rf_table)
852*3ecac800SPekka Enberg 		return -1;
853*3ecac800SPekka Enberg 
854*3ecac800SPekka Enberg 	rft->root = RB_ROOT;
855*3ecac800SPekka Enberg 	INIT_LIST_HEAD(&rft->lru_list);
856*3ecac800SPekka Enberg 
857*3ecac800SPekka Enberg 	return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset);
858*3ecac800SPekka Enberg }
859*3ecac800SPekka Enberg 
86086835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q)
86186835cedSPrasad Joshi {
862ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
863473aaa2dSPekka Enberg 	struct qcow_l1_table *table = &q->table;
86486835cedSPrasad Joshi 
865ad627d62SPekka Enberg 	table->table_size	= header->l1_size;
86686835cedSPrasad Joshi 
86700adcc1bSPrasad Joshi 	table->l1_table	= calloc(table->table_size, sizeof(u64));
86800adcc1bSPrasad Joshi 	if (!table->l1_table)
86986835cedSPrasad Joshi 		return -1;
87086835cedSPrasad Joshi 
871659f4186SPekka Enberg 	return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset);
87286835cedSPrasad Joshi }
87386835cedSPrasad Joshi 
874ad627d62SPekka Enberg static void *qcow2_read_header(int fd)
87586835cedSPrasad Joshi {
876ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
877ad627d62SPekka Enberg 	struct qcow_header *header;
87886835cedSPrasad Joshi 
879ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
88086835cedSPrasad Joshi 	if (!header)
88186835cedSPrasad Joshi 		return NULL;
88286835cedSPrasad Joshi 
8830657f33dSPrasad Joshi 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) {
8840657f33dSPrasad Joshi 		free(header);
88586835cedSPrasad Joshi 		return NULL;
8860657f33dSPrasad Joshi 	}
88786835cedSPrasad Joshi 
888ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
889ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
890ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
891ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
892ad627d62SPekka Enberg 	be32_to_cpus(&f_header.cluster_bits);
893ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
894ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
895ad627d62SPekka Enberg 	be32_to_cpus(&f_header.l1_size);
896ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
897ad627d62SPekka Enberg 	be64_to_cpus(&f_header.refcount_table_offset);
898ad627d62SPekka Enberg 	be32_to_cpus(&f_header.refcount_table_clusters);
899ad627d62SPekka Enberg 	be32_to_cpus(&f_header.nb_snapshots);
900ad627d62SPekka Enberg 	be64_to_cpus(&f_header.snapshots_offset);
901ad627d62SPekka Enberg 
902ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
903ad627d62SPekka Enberg 		.size			= f_header.size,
904ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
905ad627d62SPekka Enberg 		.l1_size		= f_header.l1_size,
906ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
907ad627d62SPekka Enberg 		.l2_bits		= f_header.cluster_bits - 3,
908*3ecac800SPekka Enberg 		.refcount_table_offset	= f_header.refcount_table_offset,
909*3ecac800SPekka Enberg 		.refcount_table_size	= f_header.refcount_table_clusters,
910ad627d62SPekka Enberg 	};
911ad627d62SPekka Enberg 
912ad627d62SPekka Enberg 	return header;
913ad627d62SPekka Enberg }
914ad627d62SPekka Enberg 
915f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly)
916ad627d62SPekka Enberg {
917ad627d62SPekka Enberg 	struct disk_image *disk_image;
9187b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
9197b4eb530SPekka Enberg 	struct qcow_header *h;
9207b4eb530SPekka Enberg 	struct qcow *q;
921ad627d62SPekka Enberg 
922ad627d62SPekka Enberg 	q = calloc(1, sizeof(struct qcow));
923ad627d62SPekka Enberg 	if (!q)
924ad627d62SPekka Enberg 		goto error;
925ad627d62SPekka Enberg 
926c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
927ad627d62SPekka Enberg 	q->fd = fd;
9287b4eb530SPekka Enberg 
9297b4eb530SPekka Enberg 	l1t = &q->table;
9307b4eb530SPekka Enberg 
9317b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
9327b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
933ad627d62SPekka Enberg 
934ad627d62SPekka Enberg 	h = q->header = qcow2_read_header(fd);
935ad627d62SPekka Enberg 	if (!h)
936ad627d62SPekka Enberg 		goto error;
937ad627d62SPekka Enberg 
938ad627d62SPekka Enberg 	if (qcow_read_l1_table(q) < 0)
939ad627d62SPekka Enberg 		goto error;
940ad627d62SPekka Enberg 
941*3ecac800SPekka Enberg 	if (qcow_read_refcount_table(q) < 0)
942*3ecac800SPekka Enberg 		goto error;
943*3ecac800SPekka Enberg 
9447d22135fSAsias He 	/*
9457d22135fSAsias He 	 * Do not use mmap use read/write instead
9467d22135fSAsias He 	 */
947f10860caSPekka Enberg 	if (readonly)
948b1c84095SPekka Enberg 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP);
949f10860caSPekka Enberg 	else
950b1c84095SPekka Enberg 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP);
951f10860caSPekka Enberg 
952ad627d62SPekka Enberg 	if (!disk_image)
953ad627d62SPekka Enberg 		goto error;
954ad627d62SPekka Enberg 	disk_image->priv = q;
955ad627d62SPekka Enberg 
956ad627d62SPekka Enberg 	return disk_image;
957ad627d62SPekka Enberg error:
958ad627d62SPekka Enberg 	if (!q)
959ad627d62SPekka Enberg 		return NULL;
960ad627d62SPekka Enberg 
961ad627d62SPekka Enberg 	free(q->table.l1_table);
962ad627d62SPekka Enberg 	free(q->header);
963ad627d62SPekka Enberg 	free(q);
964ad627d62SPekka Enberg 
965ad627d62SPekka Enberg 	return NULL;
966ad627d62SPekka Enberg }
967ad627d62SPekka Enberg 
968ad627d62SPekka Enberg static bool qcow2_check_image(int fd)
969ad627d62SPekka Enberg {
970ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
971ad627d62SPekka Enberg 
972ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0)
973ad627d62SPekka Enberg 		return false;
974ad627d62SPekka Enberg 
975ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
976ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
977ad627d62SPekka Enberg 
978ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
979ad627d62SPekka Enberg 		return false;
980ad627d62SPekka Enberg 
981ad627d62SPekka Enberg 	if (f_header.version != QCOW2_VERSION)
982ad627d62SPekka Enberg 		return false;
983ad627d62SPekka Enberg 
984ad627d62SPekka Enberg 	return true;
985ad627d62SPekka Enberg }
986ad627d62SPekka Enberg 
987ad627d62SPekka Enberg static void *qcow1_read_header(int fd)
988ad627d62SPekka Enberg {
989ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
990ad627d62SPekka Enberg 	struct qcow_header *header;
991ad627d62SPekka Enberg 
992ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
993ad627d62SPekka Enberg 	if (!header)
994ad627d62SPekka Enberg 		return NULL;
995ad627d62SPekka Enberg 
996d39cefd2SSasha Levin 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) {
997d39cefd2SSasha Levin 		free(header);
998ad627d62SPekka Enberg 		return NULL;
999d39cefd2SSasha Levin 	}
1000ad627d62SPekka Enberg 
1001ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1002ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1003ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
1004ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
1005ad627d62SPekka Enberg 	be32_to_cpus(&f_header.mtime);
1006ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
1007ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
1008ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
1009ad627d62SPekka Enberg 
1010ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
1011ad627d62SPekka Enberg 		.size			= f_header.size,
1012ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
1013ad627d62SPekka Enberg 		.l1_size		= f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)),
1014ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
1015ad627d62SPekka Enberg 		.l2_bits		= f_header.l2_bits,
1016ad627d62SPekka Enberg 	};
101786835cedSPrasad Joshi 
101886835cedSPrasad Joshi 	return header;
101986835cedSPrasad Joshi }
102086835cedSPrasad Joshi 
1021f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly)
102286835cedSPrasad Joshi {
102386835cedSPrasad Joshi 	struct disk_image *disk_image;
10247b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
10257b4eb530SPekka Enberg 	struct qcow_header *h;
10267b4eb530SPekka Enberg 	struct qcow *q;
102786835cedSPrasad Joshi 
102886835cedSPrasad Joshi 	q = calloc(1, sizeof(struct qcow));
102986835cedSPrasad Joshi 	if (!q)
103086835cedSPrasad Joshi 		goto error;
103186835cedSPrasad Joshi 
1032c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
103386835cedSPrasad Joshi 	q->fd = fd;
10347b4eb530SPekka Enberg 
10357b4eb530SPekka Enberg 	l1t = &q->table;
10367b4eb530SPekka Enberg 
10377b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
10387b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
103986835cedSPrasad Joshi 
104086835cedSPrasad Joshi 	h = q->header = qcow1_read_header(fd);
104186835cedSPrasad Joshi 	if (!h)
104286835cedSPrasad Joshi 		goto error;
104386835cedSPrasad Joshi 
104486835cedSPrasad Joshi 	if (qcow_read_l1_table(q) < 0)
104586835cedSPrasad Joshi 		goto error;
104686835cedSPrasad Joshi 
10477d22135fSAsias He 	/*
10487d22135fSAsias He 	 * Do not use mmap use read/write instead
10497d22135fSAsias He 	 */
1050f10860caSPekka Enberg 	if (readonly)
1051b1c84095SPekka Enberg 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP);
1052f10860caSPekka Enberg 	else
1053b1c84095SPekka Enberg 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP);
1054f10860caSPekka Enberg 
105586835cedSPrasad Joshi 	if (!disk_image)
105686835cedSPrasad Joshi 		goto error;
105786835cedSPrasad Joshi 	disk_image->priv = q;
105886835cedSPrasad Joshi 
105986835cedSPrasad Joshi 	return disk_image;
106086835cedSPrasad Joshi error:
106186835cedSPrasad Joshi 	if (!q)
106286835cedSPrasad Joshi 		return NULL;
106386835cedSPrasad Joshi 
10646c6f79b6SPrasad Joshi 	free(q->table.l1_table);
106586835cedSPrasad Joshi 	free(q->header);
106686835cedSPrasad Joshi 	free(q);
106786835cedSPrasad Joshi 
106886835cedSPrasad Joshi 	return NULL;
106986835cedSPrasad Joshi }
107086835cedSPrasad Joshi 
1071ad627d62SPekka Enberg static bool qcow1_check_image(int fd)
107286835cedSPrasad Joshi {
1073ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
107486835cedSPrasad Joshi 
1075ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0)
1076ad627d62SPekka Enberg 		return false;
107786835cedSPrasad Joshi 
1078ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1079ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
108086835cedSPrasad Joshi 
1081ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
1082ad627d62SPekka Enberg 		return false;
108386835cedSPrasad Joshi 
1084ad627d62SPekka Enberg 	if (f_header.version != QCOW1_VERSION)
1085ad627d62SPekka Enberg 		return false;
108686835cedSPrasad Joshi 
1087ad627d62SPekka Enberg 	return true;
108886835cedSPrasad Joshi }
108986835cedSPrasad Joshi 
1090f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly)
109186835cedSPrasad Joshi {
1092ad627d62SPekka Enberg 	if (qcow1_check_image(fd))
1093f10860caSPekka Enberg 		return qcow1_probe(fd, readonly);
1094ad627d62SPekka Enberg 
1095ad627d62SPekka Enberg 	if (qcow2_check_image(fd))
1096f10860caSPekka Enberg 		return qcow2_probe(fd, readonly);
1097ad627d62SPekka Enberg 
1098ad627d62SPekka Enberg 	return NULL;
109986835cedSPrasad Joshi }
1100