xref: /kvmtool/disk/qcow.c (revision 2534c9b6413c6b4b27669f57f7fc77639d0540a3)
186835cedSPrasad Joshi #include "kvm/qcow.h"
286835cedSPrasad Joshi 
386835cedSPrasad Joshi #include "kvm/disk-image.h"
486835cedSPrasad Joshi #include "kvm/read-write.h"
5c0799eb9SPekka Enberg #include "kvm/mutex.h"
686835cedSPrasad Joshi #include "kvm/util.h"
786835cedSPrasad Joshi 
886835cedSPrasad Joshi #include <sys/types.h>
986835cedSPrasad Joshi #include <sys/stat.h>
1086835cedSPrasad Joshi #include <stdbool.h>
1186835cedSPrasad Joshi #include <stdlib.h>
1286835cedSPrasad Joshi #include <string.h>
1386835cedSPrasad Joshi #include <unistd.h>
1486835cedSPrasad Joshi #include <fcntl.h>
15af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB
16af68c51aSLan Tianyu #include <zlib.h>
17af68c51aSLan Tianyu #endif
1886835cedSPrasad Joshi 
1986835cedSPrasad Joshi #include <linux/byteorder.h>
20865c675fSPrasad Joshi #include <linux/kernel.h>
210df6b4d9SPekka Enberg #include <linux/types.h>
2286835cedSPrasad Joshi 
23e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
243309045fSPrasad Joshi {
253309045fSPrasad Joshi 	struct rb_node **link = &(root->rb_node), *parent = NULL;
263309045fSPrasad Joshi 	u64 offset = new->offset;
273309045fSPrasad Joshi 
283309045fSPrasad Joshi 	/* search the tree */
293309045fSPrasad Joshi 	while (*link) {
30473d58ffSPekka Enberg 		struct qcow_l2_table *t;
313309045fSPrasad Joshi 
32473d58ffSPekka Enberg 		t = rb_entry(*link, struct qcow_l2_table, node);
333309045fSPrasad Joshi 		if (!t)
343309045fSPrasad Joshi 			goto error;
353309045fSPrasad Joshi 
363309045fSPrasad Joshi 		parent = *link;
373309045fSPrasad Joshi 
383309045fSPrasad Joshi 		if (t->offset > offset)
393309045fSPrasad Joshi 			link = &(*link)->rb_left;
403309045fSPrasad Joshi 		else if (t->offset < offset)
413309045fSPrasad Joshi 			link = &(*link)->rb_right;
423309045fSPrasad Joshi 		else
433309045fSPrasad Joshi 			goto out;
443309045fSPrasad Joshi 	}
453309045fSPrasad Joshi 
463309045fSPrasad Joshi 	/* add new node */
473309045fSPrasad Joshi 	rb_link_node(&new->node, parent, link);
483309045fSPrasad Joshi 	rb_insert_color(&new->node, root);
493309045fSPrasad Joshi out:
503309045fSPrasad Joshi 	return 0;
513309045fSPrasad Joshi error:
523309045fSPrasad Joshi 	return -1;
533309045fSPrasad Joshi }
543309045fSPrasad Joshi 
55e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset)
563309045fSPrasad Joshi {
573309045fSPrasad Joshi 	struct rb_node *link = root->rb_node;
583309045fSPrasad Joshi 
593309045fSPrasad Joshi 	while (link) {
60473d58ffSPekka Enberg 		struct qcow_l2_table *t;
613309045fSPrasad Joshi 
62473d58ffSPekka Enberg 		t = rb_entry(link, struct qcow_l2_table, node);
633309045fSPrasad Joshi 		if (!t)
643309045fSPrasad Joshi 			goto out;
653309045fSPrasad Joshi 
663309045fSPrasad Joshi 		if (t->offset > offset)
673309045fSPrasad Joshi 			link = link->rb_left;
683309045fSPrasad Joshi 		else if (t->offset < offset)
693309045fSPrasad Joshi 			link = link->rb_right;
703309045fSPrasad Joshi 		else
713309045fSPrasad Joshi 			return t;
723309045fSPrasad Joshi 	}
733309045fSPrasad Joshi out:
743309045fSPrasad Joshi 	return NULL;
753309045fSPrasad Joshi }
763309045fSPrasad Joshi 
77e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t)
783309045fSPrasad Joshi {
797b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
803309045fSPrasad Joshi 	struct list_head *pos, *n;
81473d58ffSPekka Enberg 	struct qcow_l2_table *t;
823309045fSPrasad Joshi 
837b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
843309045fSPrasad Joshi 		/* Remove cache table from the list and RB tree */
853309045fSPrasad Joshi 		list_del(pos);
86473d58ffSPekka Enberg 		t = list_entry(pos, struct qcow_l2_table, list);
873309045fSPrasad Joshi 		rb_erase(&t->node, r);
883309045fSPrasad Joshi 
893309045fSPrasad Joshi 		/* Free the cached node */
903309045fSPrasad Joshi 		free(t);
913309045fSPrasad Joshi 	}
923309045fSPrasad Joshi }
933309045fSPrasad Joshi 
94a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
95a4e46515SPekka Enberg {
96a4e46515SPekka Enberg 	struct qcow_header *header = q->header;
97a4e46515SPekka Enberg 	u64 size;
98a4e46515SPekka Enberg 
99aff88976SPekka Enberg 	if (!c->dirty)
100aff88976SPekka Enberg 		return 0;
101aff88976SPekka Enberg 
102a4e46515SPekka Enberg 	size = 1 << header->l2_bits;
103a4e46515SPekka Enberg 
104aff88976SPekka Enberg 	if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0)
105aff88976SPekka Enberg 		return -1;
106aff88976SPekka Enberg 
107aff88976SPekka Enberg 	c->dirty = 0;
108aff88976SPekka Enberg 
109aff88976SPekka Enberg 	return 0;
110a4e46515SPekka Enberg }
111a4e46515SPekka Enberg 
112473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c)
1133309045fSPrasad Joshi {
1147b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
1157b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
116473d58ffSPekka Enberg 	struct qcow_l2_table *lru;
1173309045fSPrasad Joshi 
1187b4eb530SPekka Enberg 	if (l1t->nr_cached == MAX_CACHE_NODES) {
1193309045fSPrasad Joshi 		/*
1203309045fSPrasad Joshi 		 * The node at the head of the list is least recently used
1213309045fSPrasad Joshi 		 * node. Remove it from the list and replaced with a new node.
1223309045fSPrasad Joshi 		 */
1237b4eb530SPekka Enberg 		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
1243309045fSPrasad Joshi 
125a4e46515SPekka Enberg 		if (qcow_l2_cache_write(q, lru) < 0)
126a4e46515SPekka Enberg 			goto error;
127a4e46515SPekka Enberg 
1283309045fSPrasad Joshi 		/* Remove the node from the cache */
1293309045fSPrasad Joshi 		rb_erase(&lru->node, r);
1303309045fSPrasad Joshi 		list_del_init(&lru->list);
1317b4eb530SPekka Enberg 		l1t->nr_cached--;
1323309045fSPrasad Joshi 
1333309045fSPrasad Joshi 		/* Free the LRUed node */
1343309045fSPrasad Joshi 		free(lru);
1353309045fSPrasad Joshi 	}
1363309045fSPrasad Joshi 
1373309045fSPrasad Joshi 	/* Add new node in RB Tree: Helps in searching faster */
138e94cdf08SPekka Enberg 	if (l2_table_insert(r, c) < 0)
1393309045fSPrasad Joshi 		goto error;
1403309045fSPrasad Joshi 
1413309045fSPrasad Joshi 	/* Add in LRU replacement list */
1427b4eb530SPekka Enberg 	list_add_tail(&c->list, &l1t->lru_list);
1437b4eb530SPekka Enberg 	l1t->nr_cached++;
1443309045fSPrasad Joshi 
1453309045fSPrasad Joshi 	return 0;
1463309045fSPrasad Joshi error:
1473309045fSPrasad Joshi 	return -1;
1483309045fSPrasad Joshi }
1493309045fSPrasad Joshi 
150e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset)
1513309045fSPrasad Joshi {
1527b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
153fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
1543309045fSPrasad Joshi 
155e94cdf08SPekka Enberg 	l2t = l2_table_lookup(&l1t->root, offset);
156fe8bdde0SPekka Enberg 	if (!l2t)
157fe8bdde0SPekka Enberg 		return NULL;
1583309045fSPrasad Joshi 
1593309045fSPrasad Joshi 	/* Update the LRU state, by moving the searched node to list tail */
1607b4eb530SPekka Enberg 	list_move_tail(&l2t->list, &l1t->lru_list);
1613309045fSPrasad Joshi 
162fe8bdde0SPekka Enberg 	return l2t;
1633309045fSPrasad Joshi }
1643309045fSPrasad Joshi 
1653309045fSPrasad Joshi /* Allocates a new node for caching L2 table */
166473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset)
1673309045fSPrasad Joshi {
1683309045fSPrasad Joshi 	struct qcow_header *header = q->header;
169473d58ffSPekka Enberg 	struct qcow_l2_table *c;
1703309045fSPrasad Joshi 	u64 l2t_sz;
1713309045fSPrasad Joshi 	u64 size;
1723309045fSPrasad Joshi 
1733309045fSPrasad Joshi 	l2t_sz = 1 << header->l2_bits;
1743309045fSPrasad Joshi 	size   = sizeof(*c) + l2t_sz * sizeof(u64);
1753309045fSPrasad Joshi 	c      = calloc(1, size);
1763309045fSPrasad Joshi 	if (!c)
1773309045fSPrasad Joshi 		goto out;
1783309045fSPrasad Joshi 
1793309045fSPrasad Joshi 	c->offset = offset;
1803309045fSPrasad Joshi 	RB_CLEAR_NODE(&c->node);
1813309045fSPrasad Joshi 	INIT_LIST_HEAD(&c->list);
1823309045fSPrasad Joshi out:
1833309045fSPrasad Joshi 	return c;
1843309045fSPrasad Joshi }
1853309045fSPrasad Joshi 
186742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset)
18786835cedSPrasad Joshi {
188ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
18986835cedSPrasad Joshi 
19086835cedSPrasad Joshi 	return offset >> (header->l2_bits + header->cluster_bits);
19186835cedSPrasad Joshi }
19286835cedSPrasad Joshi 
193742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset)
19486835cedSPrasad Joshi {
195ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
19686835cedSPrasad Joshi 
19786835cedSPrasad Joshi 	return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1);
19886835cedSPrasad Joshi }
19986835cedSPrasad Joshi 
200742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset)
20186835cedSPrasad Joshi {
202ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
20386835cedSPrasad Joshi 
20486835cedSPrasad Joshi 	return offset & ((1 << header->cluster_bits)-1);
20586835cedSPrasad Joshi }
20686835cedSPrasad Joshi 
207fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset)
2083309045fSPrasad Joshi {
2093309045fSPrasad Joshi 	struct qcow_header *header = q->header;
210fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
2113309045fSPrasad Joshi 	u64 size;
2123309045fSPrasad Joshi 
2133309045fSPrasad Joshi 	size = 1 << header->l2_bits;
2143309045fSPrasad Joshi 
2153309045fSPrasad Joshi 	/* search an entry for offset in cache */
216e94cdf08SPekka Enberg 	l2t = l2_table_search(q, offset);
217fe8bdde0SPekka Enberg 	if (l2t)
218fe8bdde0SPekka Enberg 		return l2t;
2193309045fSPrasad Joshi 
2203309045fSPrasad Joshi 	/* allocate new node for caching l2 table */
221fe8bdde0SPekka Enberg 	l2t = new_cache_table(q, offset);
222fe8bdde0SPekka Enberg 	if (!l2t)
2233309045fSPrasad Joshi 		goto error;
2243309045fSPrasad Joshi 
2253309045fSPrasad Joshi 	/* table not cached: read from the disk */
226fe8bdde0SPekka Enberg 	if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0)
2273309045fSPrasad Joshi 		goto error;
2283309045fSPrasad Joshi 
2293309045fSPrasad Joshi 	/* cache the table */
230fe8bdde0SPekka Enberg 	if (cache_table(q, l2t) < 0)
2313309045fSPrasad Joshi 		goto error;
2323309045fSPrasad Joshi 
233fe8bdde0SPekka Enberg 	return l2t;
2343309045fSPrasad Joshi error:
235fe8bdde0SPekka Enberg 	free(l2t);
236fe8bdde0SPekka Enberg 	return NULL;
2373309045fSPrasad Joshi }
2383309045fSPrasad Joshi 
239af68c51aSLan Tianyu static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size,
240af68c51aSLan Tianyu 	const u8 *buf, int buf_size)
241af68c51aSLan Tianyu {
242af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB
243af68c51aSLan Tianyu 	z_stream strm1, *strm = &strm1;
244af68c51aSLan Tianyu 	int ret, out_len;
245af68c51aSLan Tianyu 
246af68c51aSLan Tianyu 	memset(strm, 0, sizeof(*strm));
247af68c51aSLan Tianyu 
248af68c51aSLan Tianyu 	strm->next_in = (u8 *)buf;
249af68c51aSLan Tianyu 	strm->avail_in = buf_size;
250af68c51aSLan Tianyu 	strm->next_out = out_buf;
251af68c51aSLan Tianyu 	strm->avail_out = out_buf_size;
252af68c51aSLan Tianyu 
253af68c51aSLan Tianyu 	ret = inflateInit2(strm, -12);
254af68c51aSLan Tianyu 	if (ret != Z_OK)
255af68c51aSLan Tianyu 		return -1;
256af68c51aSLan Tianyu 
257af68c51aSLan Tianyu 	ret = inflate(strm, Z_FINISH);
258af68c51aSLan Tianyu 	out_len = strm->next_out - out_buf;
259af68c51aSLan Tianyu 	if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
260af68c51aSLan Tianyu 		out_len != out_buf_size) {
261af68c51aSLan Tianyu 		inflateEnd(strm);
262af68c51aSLan Tianyu 		return -1;
263af68c51aSLan Tianyu 	}
264af68c51aSLan Tianyu 
265af68c51aSLan Tianyu 	inflateEnd(strm);
266af68c51aSLan Tianyu 	return 0;
267af68c51aSLan Tianyu #else
268af68c51aSLan Tianyu 	return -1;
269af68c51aSLan Tianyu #endif
270af68c51aSLan Tianyu }
271af68c51aSLan Tianyu 
272af68c51aSLan Tianyu static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset,
273af68c51aSLan Tianyu 	void *dst, u32 dst_len)
27486835cedSPrasad Joshi {
275ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
2763fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
2773fb67b93SPekka Enberg 	struct qcow_l2_table *l2t;
278742fce76SPrasad Joshi 	u64 clust_offset;
279742fce76SPrasad Joshi 	u64 clust_start;
2803fb67b93SPekka Enberg 	u64 l2t_offset;
281a51948ceSPekka Enberg 	size_t length;
2823fb67b93SPekka Enberg 	u64 l2t_size;
283742fce76SPrasad Joshi 	u64 l1_idx;
284742fce76SPrasad Joshi 	u64 l2_idx;
285af68c51aSLan Tianyu 	int coffset;
286af68c51aSLan Tianyu 	int csize;
28786835cedSPrasad Joshi 
288c5e0624bSPrasad Joshi 	l1_idx = get_l1_index(q, offset);
2893fb67b93SPekka Enberg 	if (l1_idx >= l1t->table_size)
290c0799eb9SPekka Enberg 		return -1;
29186835cedSPrasad Joshi 
2923dac48d4SPrasad Joshi 	clust_offset = get_cluster_offset(q, offset);
293af68c51aSLan Tianyu 	if (clust_offset >= q->cluster_size)
294c0799eb9SPekka Enberg 		return -1;
2953dac48d4SPrasad Joshi 
296af68c51aSLan Tianyu 	length = q->cluster_size - clust_offset;
2973dac48d4SPrasad Joshi 	if (length > dst_len)
2983dac48d4SPrasad Joshi 		length = dst_len;
2993dac48d4SPrasad Joshi 
300c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
301b2ebe61bSPekka Enberg 
3023fb67b93SPekka Enberg 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
3033fb67b93SPekka Enberg 	if (!l2t_offset)
3043dac48d4SPrasad Joshi 		goto zero_cluster;
30586835cedSPrasad Joshi 
3063fb67b93SPekka Enberg 	l2t_size = 1 << header->l2_bits;
30786835cedSPrasad Joshi 
3083309045fSPrasad Joshi 	/* read and cache level 2 table */
3093fb67b93SPekka Enberg 	l2t = qcow_read_l2_table(q, l2t_offset);
3103fb67b93SPekka Enberg 	if (!l2t)
311b6edb0ecSSasha Levin 		goto out_error;
31286835cedSPrasad Joshi 
313c5e0624bSPrasad Joshi 	l2_idx = get_l2_index(q, offset);
3143fb67b93SPekka Enberg 	if (l2_idx >= l2t_size)
315b6edb0ecSSasha Levin 		goto out_error;
31686835cedSPrasad Joshi 
3173fb67b93SPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
318af68c51aSLan Tianyu 	if (clust_start & QCOW1_OFLAG_COMPRESSED) {
319af68c51aSLan Tianyu 		coffset = clust_start & q->cluster_offset_mask;
320af68c51aSLan Tianyu 		csize = clust_start >> (63 - q->header->cluster_bits);
321af68c51aSLan Tianyu 		csize &= (q->cluster_size - 1);
322af68c51aSLan Tianyu 
323af68c51aSLan Tianyu 		if (pread_in_full(q->fd, q->cluster_data, csize,
324af68c51aSLan Tianyu 				  coffset) < 0) {
325b2ebe61bSPekka Enberg 			goto out_error;
326b2ebe61bSPekka Enberg 		}
327b2ebe61bSPekka Enberg 
328af68c51aSLan Tianyu 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
329af68c51aSLan Tianyu 					q->cluster_data, csize) < 0) {
330af68c51aSLan Tianyu 			goto out_error;
331af68c51aSLan Tianyu 		}
332af68c51aSLan Tianyu 
333af68c51aSLan Tianyu 		memcpy(dst, q->cluster_cache + clust_offset, length);
334af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
335af68c51aSLan Tianyu 	} else{
33686835cedSPrasad Joshi 		if (!clust_start)
3373dac48d4SPrasad Joshi 			goto zero_cluster;
33886835cedSPrasad Joshi 
339c0799eb9SPekka Enberg 		mutex_unlock(&q->mutex);
34086835cedSPrasad Joshi 
341af68c51aSLan Tianyu 		if (pread_in_full(q->fd, dst, length,
342af68c51aSLan Tianyu 				  clust_start + clust_offset) < 0)
343c0799eb9SPekka Enberg 			return -1;
344af68c51aSLan Tianyu 	}
345c0799eb9SPekka Enberg 
3463dac48d4SPrasad Joshi 	return length;
34786835cedSPrasad Joshi 
348179b71f0SPekka Enberg zero_cluster:
349c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
350179b71f0SPekka Enberg 	memset(dst, 0, length);
351c0799eb9SPekka Enberg 	return length;
352179b71f0SPekka Enberg 
35386835cedSPrasad Joshi out_error:
354c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
355179b71f0SPekka Enberg 	length = -1;
356c0799eb9SPekka Enberg 	return -1;
3573dac48d4SPrasad Joshi }
358b6edb0ecSSasha Levin 
359af68c51aSLan Tianyu static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset,
360af68c51aSLan Tianyu 	void *dst, u32 dst_len)
361af68c51aSLan Tianyu {
362af68c51aSLan Tianyu 	struct qcow_header *header = q->header;
363af68c51aSLan Tianyu 	struct qcow_l1_table *l1t = &q->table;
364af68c51aSLan Tianyu 	struct qcow_l2_table *l2t;
365af68c51aSLan Tianyu 	u64 clust_offset;
366af68c51aSLan Tianyu 	u64 clust_start;
367af68c51aSLan Tianyu 	u64 l2t_offset;
368af68c51aSLan Tianyu 	size_t length;
369af68c51aSLan Tianyu 	u64 l2t_size;
370af68c51aSLan Tianyu 	u64 l1_idx;
371af68c51aSLan Tianyu 	u64 l2_idx;
372af68c51aSLan Tianyu 	int coffset;
373af68c51aSLan Tianyu 	int sector_offset;
374af68c51aSLan Tianyu 	int nb_csectors;
375af68c51aSLan Tianyu 	int csize;
376af68c51aSLan Tianyu 
377af68c51aSLan Tianyu 	l1_idx = get_l1_index(q, offset);
378af68c51aSLan Tianyu 	if (l1_idx >= l1t->table_size)
379af68c51aSLan Tianyu 		return -1;
380af68c51aSLan Tianyu 
381af68c51aSLan Tianyu 	clust_offset = get_cluster_offset(q, offset);
382af68c51aSLan Tianyu 	if (clust_offset >= q->cluster_size)
383af68c51aSLan Tianyu 		return -1;
384af68c51aSLan Tianyu 
385af68c51aSLan Tianyu 	length = q->cluster_size - clust_offset;
386af68c51aSLan Tianyu 	if (length > dst_len)
387af68c51aSLan Tianyu 		length = dst_len;
388af68c51aSLan Tianyu 
389af68c51aSLan Tianyu 	mutex_lock(&q->mutex);
390af68c51aSLan Tianyu 
391af68c51aSLan Tianyu 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
392af68c51aSLan Tianyu 
393af68c51aSLan Tianyu 	l2t_offset &= ~QCOW2_OFLAG_COPIED;
394af68c51aSLan Tianyu 	if (!l2t_offset)
395af68c51aSLan Tianyu 		goto zero_cluster;
396af68c51aSLan Tianyu 
397af68c51aSLan Tianyu 	l2t_size = 1 << header->l2_bits;
398af68c51aSLan Tianyu 
399af68c51aSLan Tianyu 	/* read and cache level 2 table */
400af68c51aSLan Tianyu 	l2t = qcow_read_l2_table(q, l2t_offset);
401af68c51aSLan Tianyu 	if (!l2t)
402af68c51aSLan Tianyu 		goto out_error;
403af68c51aSLan Tianyu 
404af68c51aSLan Tianyu 	l2_idx = get_l2_index(q, offset);
405af68c51aSLan Tianyu 	if (l2_idx >= l2t_size)
406af68c51aSLan Tianyu 		goto out_error;
407af68c51aSLan Tianyu 
408af68c51aSLan Tianyu 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
409af68c51aSLan Tianyu 	if (clust_start & QCOW2_OFLAG_COMPRESSED) {
410af68c51aSLan Tianyu 		coffset = clust_start & q->cluster_offset_mask;
411af68c51aSLan Tianyu 		nb_csectors = ((clust_start >> q->csize_shift)
412af68c51aSLan Tianyu 			& q->csize_mask) + 1;
413af68c51aSLan Tianyu 		sector_offset = coffset & (SECTOR_SIZE - 1);
414af68c51aSLan Tianyu 		csize = nb_csectors * SECTOR_SIZE - sector_offset;
415af68c51aSLan Tianyu 
416af68c51aSLan Tianyu 		if (pread_in_full(q->fd, q->cluster_data,
417af68c51aSLan Tianyu 				  nb_csectors * SECTOR_SIZE,
418af68c51aSLan Tianyu 				  coffset & ~(SECTOR_SIZE - 1)) < 0) {
419af68c51aSLan Tianyu 			goto out_error;
420af68c51aSLan Tianyu 		}
421af68c51aSLan Tianyu 
422af68c51aSLan Tianyu 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
423af68c51aSLan Tianyu 					q->cluster_data + sector_offset,
424af68c51aSLan Tianyu 					csize) < 0) {
425af68c51aSLan Tianyu 			goto out_error;
426af68c51aSLan Tianyu 		}
427af68c51aSLan Tianyu 
428af68c51aSLan Tianyu 		memcpy(dst, q->cluster_cache + clust_offset, length);
429af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
430af68c51aSLan Tianyu 	} else{
431af68c51aSLan Tianyu 		clust_start &= QCOW2_OFFSET_MASK;
432af68c51aSLan Tianyu 		if (!clust_start)
433af68c51aSLan Tianyu 			goto zero_cluster;
434af68c51aSLan Tianyu 
435af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
436af68c51aSLan Tianyu 
437af68c51aSLan Tianyu 		if (pread_in_full(q->fd, dst, length,
438af68c51aSLan Tianyu 				  clust_start + clust_offset) < 0)
439af68c51aSLan Tianyu 			return -1;
440af68c51aSLan Tianyu 	}
441af68c51aSLan Tianyu 
442af68c51aSLan Tianyu 	return length;
443af68c51aSLan Tianyu 
444af68c51aSLan Tianyu zero_cluster:
445af68c51aSLan Tianyu 	mutex_unlock(&q->mutex);
446af68c51aSLan Tianyu 	memset(dst, 0, length);
447af68c51aSLan Tianyu 	return length;
448af68c51aSLan Tianyu 
449af68c51aSLan Tianyu out_error:
450af68c51aSLan Tianyu 	mutex_unlock(&q->mutex);
451af68c51aSLan Tianyu 	length = -1;
452af68c51aSLan Tianyu 	return -1;
453af68c51aSLan Tianyu }
454af68c51aSLan Tianyu 
455*2534c9b6SSasha Levin static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector,
456af68c51aSLan Tianyu 	void *dst, u32 dst_len)
4573dac48d4SPrasad Joshi {
45843835ac9SSasha Levin 	struct qcow *q = disk->priv;
459ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
460d8eea993SPekka Enberg 	u32 nr_read;
4610df6b4d9SPekka Enberg 	u64 offset;
4620df6b4d9SPekka Enberg 	char *buf;
4633dac48d4SPrasad Joshi 	u32 nr;
4643dac48d4SPrasad Joshi 
4650df6b4d9SPekka Enberg 	buf		= dst;
466d8eea993SPekka Enberg 	nr_read		= 0;
4670df6b4d9SPekka Enberg 
468d8eea993SPekka Enberg 	while (nr_read < dst_len) {
4693dac48d4SPrasad Joshi 		offset		= sector << SECTOR_SHIFT;
4703dac48d4SPrasad Joshi 		if (offset >= header->size)
4710df6b4d9SPekka Enberg 			return -1;
4723dac48d4SPrasad Joshi 
473af68c51aSLan Tianyu 		if (q->version == QCOW1_VERSION)
474af68c51aSLan Tianyu 			nr = qcow1_read_cluster(q, offset, buf,
475af68c51aSLan Tianyu 				dst_len - nr_read);
476af68c51aSLan Tianyu 		else
477af68c51aSLan Tianyu 			nr = qcow2_read_cluster(q, offset, buf,
478af68c51aSLan Tianyu 				dst_len - nr_read);
479af68c51aSLan Tianyu 
480a51948ceSPekka Enberg 		if (nr <= 0)
4810df6b4d9SPekka Enberg 			return -1;
4823dac48d4SPrasad Joshi 
483d8eea993SPekka Enberg 		nr_read		+= nr;
4843dac48d4SPrasad Joshi 		buf		+= nr;
4853dac48d4SPrasad Joshi 		sector		+= (nr >> SECTOR_SHIFT);
4863dac48d4SPrasad Joshi 	}
4870df6b4d9SPekka Enberg 
48872133dd2SAsias He 	return dst_len;
48986835cedSPrasad Joshi }
49086835cedSPrasad Joshi 
491*2534c9b6SSasha Levin static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector,
492*2534c9b6SSasha Levin 				const struct iovec *iov, int iovcount)
493*2534c9b6SSasha Levin {
494*2534c9b6SSasha Levin 	ssize_t nr, total = 0;
495*2534c9b6SSasha Levin 
496*2534c9b6SSasha Levin 	while (iovcount--) {
497*2534c9b6SSasha Levin 		nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len);
498*2534c9b6SSasha Levin 		if (nr != (ssize_t)iov->iov_len) {
499*2534c9b6SSasha Levin 			pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
500*2534c9b6SSasha Levin 			return -1;
501*2534c9b6SSasha Levin 		}
502*2534c9b6SSasha Levin 
503*2534c9b6SSasha Levin 		sector	+= iov->iov_len >> SECTOR_SHIFT;
504*2534c9b6SSasha Levin 		iov++;
505*2534c9b6SSasha Levin 		total	+= nr;
506*2534c9b6SSasha Levin 	}
507*2534c9b6SSasha Levin 
508*2534c9b6SSasha Levin 	return total;
509*2534c9b6SSasha Levin }
510*2534c9b6SSasha Levin 
511865c675fSPrasad Joshi static inline u64 file_size(int fd)
512865c675fSPrasad Joshi {
513865c675fSPrasad Joshi 	struct stat st;
5140df6b4d9SPekka Enberg 
515865c675fSPrasad Joshi 	if (fstat(fd, &st) < 0)
516865c675fSPrasad Joshi 		return 0;
5170df6b4d9SPekka Enberg 
518865c675fSPrasad Joshi 	return st.st_size;
519865c675fSPrasad Joshi }
520865c675fSPrasad Joshi 
5210df6b4d9SPekka Enberg static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset)
522865c675fSPrasad Joshi {
523865c675fSPrasad Joshi 	if (pwrite_in_full(fd, buf, count, offset) < 0)
524865c675fSPrasad Joshi 		return -1;
5250df6b4d9SPekka Enberg 
5267d94a719SPekka Enberg 	return fdatasync(fd);
527865c675fSPrasad Joshi }
528865c675fSPrasad Joshi 
529865c675fSPrasad Joshi /* Writes a level 2 table at the end of the file. */
530b1c84095SPekka Enberg static u64 qcow_write_l2_table(struct qcow *q, u64 *table)
531865c675fSPrasad Joshi {
532865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
533865c675fSPrasad Joshi 	u64 clust_sz;
534865c675fSPrasad Joshi 	u64 f_sz;
5350df6b4d9SPekka Enberg 	u64 off;
5360df6b4d9SPekka Enberg 	u64 sz;
537865c675fSPrasad Joshi 
538865c675fSPrasad Joshi 	f_sz		= file_size(q->fd);
539865c675fSPrasad Joshi 	if (!f_sz)
540865c675fSPrasad Joshi 		return 0;
541865c675fSPrasad Joshi 
542865c675fSPrasad Joshi 	sz		= 1 << header->l2_bits;
543865c675fSPrasad Joshi 	clust_sz	= 1 << header->cluster_bits;
544865c675fSPrasad Joshi 	off		= ALIGN(f_sz, clust_sz);
545865c675fSPrasad Joshi 
5466fe151aeSPekka Enberg 	if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0)
547865c675fSPrasad Joshi 		return 0;
5480df6b4d9SPekka Enberg 
549865c675fSPrasad Joshi 	return off;
550865c675fSPrasad Joshi }
551865c675fSPrasad Joshi 
5523ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft)
5533ecac800SPekka Enberg {
5543ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
5553ecac800SPekka Enberg 	struct list_head *pos, *n;
5563ecac800SPekka Enberg 	struct qcow_refcount_block *t;
5573ecac800SPekka Enberg 
5583ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
5593ecac800SPekka Enberg 		list_del(pos);
5603ecac800SPekka Enberg 		t = list_entry(pos, struct qcow_refcount_block, list);
5613ecac800SPekka Enberg 		rb_erase(&t->node, r);
5623ecac800SPekka Enberg 
5633ecac800SPekka Enberg 		free(t);
5643ecac800SPekka Enberg 	}
5653ecac800SPekka Enberg }
5663ecac800SPekka Enberg 
5673ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new)
5683ecac800SPekka Enberg {
5693ecac800SPekka Enberg 	struct rb_node **link = &(root->rb_node), *parent = NULL;
5703ecac800SPekka Enberg 	u64 offset = new->offset;
5713ecac800SPekka Enberg 
5723ecac800SPekka Enberg 	/* search the tree */
5733ecac800SPekka Enberg 	while (*link) {
5743ecac800SPekka Enberg 		struct qcow_refcount_block *t;
5753ecac800SPekka Enberg 
5763ecac800SPekka Enberg 		t = rb_entry(*link, struct qcow_refcount_block, node);
5773ecac800SPekka Enberg 		if (!t)
5783ecac800SPekka Enberg 			goto error;
5793ecac800SPekka Enberg 
5803ecac800SPekka Enberg 		parent = *link;
5813ecac800SPekka Enberg 
5823ecac800SPekka Enberg 		if (t->offset > offset)
5833ecac800SPekka Enberg 			link = &(*link)->rb_left;
5843ecac800SPekka Enberg 		else if (t->offset < offset)
5853ecac800SPekka Enberg 			link = &(*link)->rb_right;
5863ecac800SPekka Enberg 		else
5873ecac800SPekka Enberg 			goto out;
5883ecac800SPekka Enberg 	}
5893ecac800SPekka Enberg 
5903ecac800SPekka Enberg 	/* add new node */
5913ecac800SPekka Enberg 	rb_link_node(&new->node, parent, link);
5923ecac800SPekka Enberg 	rb_insert_color(&new->node, root);
5933ecac800SPekka Enberg out:
5943ecac800SPekka Enberg 	return 0;
5953ecac800SPekka Enberg error:
5963ecac800SPekka Enberg 	return -1;
5973ecac800SPekka Enberg }
5983ecac800SPekka Enberg 
5993ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb)
6003ecac800SPekka Enberg {
6013ecac800SPekka Enberg 	if (!rfb->dirty)
6023ecac800SPekka Enberg 		return 0;
6033ecac800SPekka Enberg 
6043ecac800SPekka Enberg 	if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0)
6053ecac800SPekka Enberg 		return -1;
6063ecac800SPekka Enberg 
6073ecac800SPekka Enberg 	rfb->dirty = 0;
6083ecac800SPekka Enberg 
6093ecac800SPekka Enberg 	return 0;
6103ecac800SPekka Enberg }
6113ecac800SPekka Enberg 
6123ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
6133ecac800SPekka Enberg {
6143ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
6153ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
6163ecac800SPekka Enberg 	struct qcow_refcount_block *lru;
6173ecac800SPekka Enberg 
6183ecac800SPekka Enberg 	if (rft->nr_cached == MAX_CACHE_NODES) {
6193ecac800SPekka Enberg 		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
6203ecac800SPekka Enberg 
6213ecac800SPekka Enberg 		if (write_refcount_block(q, lru) < 0)
6223ecac800SPekka Enberg 			goto error;
6233ecac800SPekka Enberg 
6243ecac800SPekka Enberg 		rb_erase(&lru->node, r);
6253ecac800SPekka Enberg 		list_del_init(&lru->list);
6263ecac800SPekka Enberg 		rft->nr_cached--;
6273ecac800SPekka Enberg 
6283ecac800SPekka Enberg 		free(lru);
6293ecac800SPekka Enberg 	}
6303ecac800SPekka Enberg 
6313ecac800SPekka Enberg 	if (refcount_block_insert(r, c) < 0)
6323ecac800SPekka Enberg 		goto error;
6333ecac800SPekka Enberg 
6343ecac800SPekka Enberg 	list_add_tail(&c->list, &rft->lru_list);
6353ecac800SPekka Enberg 	rft->nr_cached++;
6363ecac800SPekka Enberg 
6373ecac800SPekka Enberg 	return 0;
6383ecac800SPekka Enberg error:
6393ecac800SPekka Enberg 	return -1;
6403ecac800SPekka Enberg }
6413ecac800SPekka Enberg 
6423ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset)
6433ecac800SPekka Enberg {
6443ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
6453ecac800SPekka Enberg 
646af68c51aSLan Tianyu 	rfb = malloc(sizeof *rfb + q->cluster_size);
6473ecac800SPekka Enberg 	if (!rfb)
6483ecac800SPekka Enberg 		return NULL;
6493ecac800SPekka Enberg 
6503ecac800SPekka Enberg 	rfb->offset = rfb_offset;
651af68c51aSLan Tianyu 	rfb->size = q->cluster_size / sizeof(u16);
6523ecac800SPekka Enberg 	RB_CLEAR_NODE(&rfb->node);
6533ecac800SPekka Enberg 	INIT_LIST_HEAD(&rfb->list);
6543ecac800SPekka Enberg 
6553ecac800SPekka Enberg 	return rfb;
6563ecac800SPekka Enberg }
6573ecac800SPekka Enberg 
6583ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset)
6593ecac800SPekka Enberg {
6603ecac800SPekka Enberg 	struct rb_node *link = root->rb_node;
6613ecac800SPekka Enberg 
6623ecac800SPekka Enberg 	while (link) {
6633ecac800SPekka Enberg 		struct qcow_refcount_block *t;
6643ecac800SPekka Enberg 
6653ecac800SPekka Enberg 		t = rb_entry(link, struct qcow_refcount_block, node);
6663ecac800SPekka Enberg 		if (!t)
6673ecac800SPekka Enberg 			goto out;
6683ecac800SPekka Enberg 
6693ecac800SPekka Enberg 		if (t->offset > offset)
6703ecac800SPekka Enberg 			link = link->rb_left;
6713ecac800SPekka Enberg 		else if (t->offset < offset)
6723ecac800SPekka Enberg 			link = link->rb_right;
6733ecac800SPekka Enberg 		else
6743ecac800SPekka Enberg 			return t;
6753ecac800SPekka Enberg 	}
6763ecac800SPekka Enberg out:
6773ecac800SPekka Enberg 	return NULL;
6783ecac800SPekka Enberg }
6793ecac800SPekka Enberg 
6803ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset)
6813ecac800SPekka Enberg {
6823ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
6833ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
6843ecac800SPekka Enberg 
6853ecac800SPekka Enberg 	rfb = refcount_block_lookup(&rft->root, offset);
6863ecac800SPekka Enberg 	if (!rfb)
6873ecac800SPekka Enberg 		return NULL;
6883ecac800SPekka Enberg 
6893ecac800SPekka Enberg 	/* Update the LRU state, by moving the searched node to list tail */
6903ecac800SPekka Enberg 	list_move_tail(&rfb->list, &rft->lru_list);
6913ecac800SPekka Enberg 
6923ecac800SPekka Enberg 	return rfb;
6933ecac800SPekka Enberg }
6943ecac800SPekka Enberg 
6953ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx)
6963ecac800SPekka Enberg {
6973ecac800SPekka Enberg 	struct qcow_header *header = q->header;
6983ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
6993ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
7003ecac800SPekka Enberg 	u64 rfb_offset;
7013ecac800SPekka Enberg 	u64 rft_idx;
7023ecac800SPekka Enberg 
7033ecac800SPekka Enberg 	rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT);
7043ecac800SPekka Enberg 	if (rft_idx >= rft->rf_size)
7053ecac800SPekka Enberg 		return NULL;
7063ecac800SPekka Enberg 
7073ecac800SPekka Enberg 	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
7083ecac800SPekka Enberg 
7093ecac800SPekka Enberg 	rfb = refcount_block_search(q, rfb_offset);
7103ecac800SPekka Enberg 	if (rfb)
7113ecac800SPekka Enberg 		return rfb;
7123ecac800SPekka Enberg 
7133ecac800SPekka Enberg 	rfb = new_refcount_block(q, rfb_offset);
7143ecac800SPekka Enberg 	if (!rfb)
7153ecac800SPekka Enberg 		return NULL;
7163ecac800SPekka Enberg 
7173ecac800SPekka Enberg 	if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0)
7183ecac800SPekka Enberg 		goto error_free_rfb;
7193ecac800SPekka Enberg 
7203ecac800SPekka Enberg 	if (cache_refcount_block(q, rfb) < 0)
7213ecac800SPekka Enberg 		goto error_free_rfb;
7223ecac800SPekka Enberg 
7233ecac800SPekka Enberg 	return rfb;
7243ecac800SPekka Enberg 
7253ecac800SPekka Enberg error_free_rfb:
7263ecac800SPekka Enberg 	free(rfb);
7273ecac800SPekka Enberg 
7283ecac800SPekka Enberg 	return NULL;
7293ecac800SPekka Enberg }
7303ecac800SPekka Enberg 
731865c675fSPrasad Joshi /*
732865c675fSPrasad Joshi  * QCOW file might grow during a write operation. Not only data but metadata is
733865c675fSPrasad Joshi  * also written at the end of the file. Therefore it is necessary to ensure
7340df6b4d9SPekka Enberg  * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
735865c675fSPrasad Joshi  * synchronize the in-core state of QCOW image to disk.
736865c675fSPrasad Joshi  *
737865c675fSPrasad Joshi  * We also try to restore the image to a consistent state if the metdata
738865c675fSPrasad Joshi  * operation fails. The two metadat operations are: level 1 and level 2 table
739865c675fSPrasad Joshi  * update. If either of them fails the image is truncated to a consistent state.
740865c675fSPrasad Joshi  */
741b1c84095SPekka Enberg static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
742865c675fSPrasad Joshi {
743865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
7443fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
745fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
7460df6b4d9SPekka Enberg 	u64 clust_start;
7473ecac800SPekka Enberg 	u64 clust_flags;
7483fb67b93SPekka Enberg 	u64 l2t_offset;
7490df6b4d9SPekka Enberg 	u64 clust_off;
7503fb67b93SPekka Enberg 	u64 l2t_size;
751865c675fSPrasad Joshi 	u64 clust_sz;
752865c675fSPrasad Joshi 	u64 l1t_idx;
753865c675fSPrasad Joshi 	u64 l2t_idx;
754865c675fSPrasad Joshi 	u64 f_sz;
7550df6b4d9SPekka Enberg 	u64 len;
756865c675fSPrasad Joshi 
757fe8bdde0SPekka Enberg 	l2t		= NULL;
7583fb67b93SPekka Enberg 	l2t_size	= 1 << header->l2_bits;
759865c675fSPrasad Joshi 	clust_sz	= 1 << header->cluster_bits;
760865c675fSPrasad Joshi 
761865c675fSPrasad Joshi 	l1t_idx = get_l1_index(q, offset);
7623fb67b93SPekka Enberg 	if (l1t_idx >= l1t->table_size)
763c0799eb9SPekka Enberg 		return -1;
764865c675fSPrasad Joshi 
765865c675fSPrasad Joshi 	l2t_idx = get_l2_index(q, offset);
7663fb67b93SPekka Enberg 	if (l2t_idx >= l2t_size)
767c0799eb9SPekka Enberg 		return -1;
768865c675fSPrasad Joshi 
769865c675fSPrasad Joshi 	clust_off = get_cluster_offset(q, offset);
770865c675fSPrasad Joshi 	if (clust_off >= clust_sz)
771c0799eb9SPekka Enberg 		return -1;
772865c675fSPrasad Joshi 
773865c675fSPrasad Joshi 	len = clust_sz - clust_off;
774865c675fSPrasad Joshi 	if (len > src_len)
775865c675fSPrasad Joshi 		len = src_len;
776865c675fSPrasad Joshi 
777c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
778c0799eb9SPekka Enberg 
7793fb67b93SPekka Enberg 	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
780af68c51aSLan Tianyu 	if (l2t_offset & QCOW2_OFLAG_COMPRESSED) {
781121dd76eSPekka Enberg 		pr_warning("compressed clusters are not supported");
782121dd76eSPekka Enberg 		goto error;
783121dd76eSPekka Enberg 	}
784af68c51aSLan Tianyu 	if (!(l2t_offset & QCOW2_OFLAG_COPIED)) {
7853ecac800SPekka Enberg 		pr_warning("L2 copy-on-write clusters are not supported");
786b2ebe61bSPekka Enberg 		goto error;
787b2ebe61bSPekka Enberg 	}
788b2ebe61bSPekka Enberg 
789af68c51aSLan Tianyu 	l2t_offset &= QCOW2_OFFSET_MASK;
7903fb67b93SPekka Enberg 	if (l2t_offset) {
7913309045fSPrasad Joshi 		/* read and cache l2 table */
7923fb67b93SPekka Enberg 		l2t = qcow_read_l2_table(q, l2t_offset);
793fe8bdde0SPekka Enberg 		if (!l2t)
7943309045fSPrasad Joshi 			goto error;
795865c675fSPrasad Joshi 	} else {
7963fb67b93SPekka Enberg 		l2t = new_cache_table(q, l2t_offset);
797fe8bdde0SPekka Enberg 		if (!l2t)
7983309045fSPrasad Joshi 			goto error;
7993309045fSPrasad Joshi 
8000df6b4d9SPekka Enberg 		/* Capture the state of the consistent QCOW image */
801865c675fSPrasad Joshi 		f_sz = file_size(q->fd);
802865c675fSPrasad Joshi 		if (!f_sz)
8033309045fSPrasad Joshi 			goto free_cache;
804865c675fSPrasad Joshi 
805865c675fSPrasad Joshi 		/* Write the l2 table of 0's at the end of the file */
8063fb67b93SPekka Enberg 		l2t_offset = qcow_write_l2_table(q, l2t->table);
8073fb67b93SPekka Enberg 		if (!l2t_offset)
8083309045fSPrasad Joshi 			goto free_cache;
809865c675fSPrasad Joshi 
810fe8bdde0SPekka Enberg 		if (cache_table(q, l2t) < 0) {
8113309045fSPrasad Joshi 			if (ftruncate(q->fd, f_sz) < 0)
8123309045fSPrasad Joshi 				goto free_cache;
8133309045fSPrasad Joshi 
8143309045fSPrasad Joshi 			goto free_cache;
815865c675fSPrasad Joshi 		}
816865c675fSPrasad Joshi 
8170df6b4d9SPekka Enberg 		/* Update the in-core entry */
8183fb67b93SPekka Enberg 		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
819865c675fSPrasad Joshi 	}
820865c675fSPrasad Joshi 
8210df6b4d9SPekka Enberg 	/* Capture the state of the consistent QCOW image */
822865c675fSPrasad Joshi 	f_sz		= file_size(q->fd);
823865c675fSPrasad Joshi 	if (!f_sz)
8243309045fSPrasad Joshi 		goto error;
825865c675fSPrasad Joshi 
826b2ebe61bSPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
8273ecac800SPekka Enberg 
828af68c51aSLan Tianyu 	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
829af68c51aSLan Tianyu 	if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
830121dd76eSPekka Enberg 		pr_warning("compressed clusters are not supported");
831121dd76eSPekka Enberg 		goto error;
832121dd76eSPekka Enberg 	}
833b2ebe61bSPekka Enberg 
834af68c51aSLan Tianyu 	clust_start &= QCOW2_OFFSET_MASK;
835865c675fSPrasad Joshi 	if (!clust_start) {
836865c675fSPrasad Joshi 		clust_start		= ALIGN(f_sz, clust_sz);
837af68c51aSLan Tianyu 		l2t->table[l2t_idx]	= cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED);
838aff88976SPekka Enberg 		l2t->dirty		= 1;
839865c675fSPrasad Joshi 	}
8400df6b4d9SPekka Enberg 
841af68c51aSLan Tianyu 	if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
8423ecac800SPekka Enberg 		struct qcow_refcount_block *rfb = NULL;
8433ecac800SPekka Enberg 		u16 clust_refcount;
8443ecac800SPekka Enberg 		u64 clust_idx;
8453ecac800SPekka Enberg 		u64 rfb_idx;
8463ecac800SPekka Enberg 
847af68c51aSLan Tianyu 		clust_idx = (clust_start & QCOW2_OFFSET_MASK)
848af68c51aSLan Tianyu 			>> (header->cluster_bits);
8493ecac800SPekka Enberg 
8503ecac800SPekka Enberg 		rfb = qcow_read_refcount_block(q, clust_idx);
8513ecac800SPekka Enberg 		if (!rfb) {
8523ecac800SPekka Enberg 			pr_warning("L1: error while reading refcount table");
8533ecac800SPekka Enberg 			goto error;
8543ecac800SPekka Enberg 		}
8553ecac800SPekka Enberg 
8563ecac800SPekka Enberg 		rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
8573ecac800SPekka Enberg 		if (rfb_idx >= rfb->size) {
8583ecac800SPekka Enberg 			pr_warning("L1: refcount block index out of bounds");
8593ecac800SPekka Enberg 			goto error;
8603ecac800SPekka Enberg 		}
8613ecac800SPekka Enberg 
8623ecac800SPekka Enberg 		clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
8633ecac800SPekka Enberg 		if (!clust_refcount) {
8643ecac800SPekka Enberg 			clust_refcount = 1;
8653ecac800SPekka Enberg 			rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
8663ecac800SPekka Enberg 			rfb->dirty = 1;
8673ecac800SPekka Enberg 		}
8683ecac800SPekka Enberg 
8693ecac800SPekka Enberg 		if (clust_refcount > 1) {
8703ecac800SPekka Enberg 			pr_warning("L1 copy-on-write clusters are not supported");
8713ecac800SPekka Enberg 			goto error;
8723ecac800SPekka Enberg 		}
8733ecac800SPekka Enberg 	}
8743ecac800SPekka Enberg 
875c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
876c0799eb9SPekka Enberg 
877a4e46515SPekka Enberg 	/* Write actual data */
878a4e46515SPekka Enberg 	if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
879a4e46515SPekka Enberg 		return -1;
880a4e46515SPekka Enberg 
881865c675fSPrasad Joshi 	return len;
8823309045fSPrasad Joshi 
8833309045fSPrasad Joshi free_cache:
884fe8bdde0SPekka Enberg 	free(l2t);
885865c675fSPrasad Joshi error:
886c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
887865c675fSPrasad Joshi 	return -1;
888865c675fSPrasad Joshi }
889865c675fSPrasad Joshi 
890*2534c9b6SSasha Levin static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len)
89186835cedSPrasad Joshi {
892865c675fSPrasad Joshi 	struct qcow *q = disk->priv;
893865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
894c4acb611SIngo Molnar 	u32 nr_written;
8950df6b4d9SPekka Enberg 	char *buf;
896865c675fSPrasad Joshi 	u64 offset;
897865c675fSPrasad Joshi 	ssize_t nr;
898865c675fSPrasad Joshi 
8990df6b4d9SPekka Enberg 	buf		= src;
9000df6b4d9SPekka Enberg 	nr_written	= 0;
901865c675fSPrasad Joshi 	offset		= sector << SECTOR_SHIFT;
9020df6b4d9SPekka Enberg 
9030df6b4d9SPekka Enberg 	while (nr_written < src_len) {
904865c675fSPrasad Joshi 		if (offset >= header->size)
9050df6b4d9SPekka Enberg 			return -1;
906865c675fSPrasad Joshi 
907b1c84095SPekka Enberg 		nr = qcow_write_cluster(q, offset, buf, src_len - nr_written);
908865c675fSPrasad Joshi 		if (nr < 0)
9090df6b4d9SPekka Enberg 			return -1;
910865c675fSPrasad Joshi 
9110df6b4d9SPekka Enberg 		nr_written	+= nr;
912865c675fSPrasad Joshi 		buf		+= nr;
913865c675fSPrasad Joshi 		offset		+= nr;
914865c675fSPrasad Joshi 	}
9150df6b4d9SPekka Enberg 
91672133dd2SAsias He 	return nr_written;
91786835cedSPrasad Joshi }
91886835cedSPrasad Joshi 
919*2534c9b6SSasha Levin static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector,
920*2534c9b6SSasha Levin 				const struct iovec *iov, int iovcount)
921*2534c9b6SSasha Levin {
922*2534c9b6SSasha Levin 	ssize_t nr, total = 0;
923*2534c9b6SSasha Levin 
924*2534c9b6SSasha Levin 	while (iovcount--) {
925*2534c9b6SSasha Levin 		nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len);
926*2534c9b6SSasha Levin 		if (nr != (ssize_t)iov->iov_len) {
927*2534c9b6SSasha Levin 			pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
928*2534c9b6SSasha Levin 			return -1;
929*2534c9b6SSasha Levin 		}
930*2534c9b6SSasha Levin 
931*2534c9b6SSasha Levin 		sector	+= iov->iov_len >> SECTOR_SHIFT;
932*2534c9b6SSasha Levin 		iov++;
933*2534c9b6SSasha Levin 		total	+= nr;
934*2534c9b6SSasha Levin 	}
935*2534c9b6SSasha Levin 
936*2534c9b6SSasha Levin 	return total;
937*2534c9b6SSasha Levin }
938*2534c9b6SSasha Levin 
939*2534c9b6SSasha Levin static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector,
940*2534c9b6SSasha Levin 				const struct iovec *iov, int iovcount)
941f10860caSPekka Enberg {
942f10860caSPekka Enberg 	/* I/O error */
943b1c84095SPekka Enberg 	pr_info("%s: no write support\n", __func__);
944f10860caSPekka Enberg 	return -1;
945f10860caSPekka Enberg }
946f10860caSPekka Enberg 
947659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk)
948659f4186SPekka Enberg {
94973984b11SPekka Enberg 	struct qcow *q = disk->priv;
9503ecac800SPekka Enberg 	struct qcow_refcount_table *rft;
95173984b11SPekka Enberg 	struct qcow_header *header;
952a4e46515SPekka Enberg 	struct list_head *pos, *n;
9537b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
95473984b11SPekka Enberg 
95573984b11SPekka Enberg 	header = q->header;
9567b4eb530SPekka Enberg 	l1t = &q->table;
9573ecac800SPekka Enberg 	rft = &q->refcount_table;
95873984b11SPekka Enberg 
959a4e46515SPekka Enberg 	mutex_lock(&q->mutex);
960a4e46515SPekka Enberg 
9613ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
9623ecac800SPekka Enberg 		struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list);
9633ecac800SPekka Enberg 
9643ecac800SPekka Enberg 		if (write_refcount_block(q, c) < 0)
9653ecac800SPekka Enberg 			goto error_unlock;
9663ecac800SPekka Enberg 	}
9673ecac800SPekka Enberg 
9683ecac800SPekka Enberg 	if (fdatasync(disk->fd) < 0)
9693ecac800SPekka Enberg 		goto error_unlock;
9703ecac800SPekka Enberg 
9717b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
972a4e46515SPekka Enberg 		struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
973a4e46515SPekka Enberg 
974a4e46515SPekka Enberg 		if (qcow_l2_cache_write(q, c) < 0)
975a4e46515SPekka Enberg 			goto error_unlock;
976a4e46515SPekka Enberg 	}
977a4e46515SPekka Enberg 
978a4e46515SPekka Enberg 	if (fdatasync(disk->fd) < 0)
979a4e46515SPekka Enberg 		goto error_unlock;
980a4e46515SPekka Enberg 
9817b4eb530SPekka Enberg 	if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0)
982a4e46515SPekka Enberg 		goto error_unlock;
983a4e46515SPekka Enberg 
984a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
98573984b11SPekka Enberg 
986659f4186SPekka Enberg 	return fsync(disk->fd);
987a4e46515SPekka Enberg 
988a4e46515SPekka Enberg error_unlock:
989a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
990a4e46515SPekka Enberg 	return -1;
991659f4186SPekka Enberg }
992659f4186SPekka Enberg 
993b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk)
99486835cedSPrasad Joshi {
99586835cedSPrasad Joshi 	struct qcow *q;
99686835cedSPrasad Joshi 
99743835ac9SSasha Levin 	if (!disk)
99872133dd2SAsias He 		return 0;
99986835cedSPrasad Joshi 
100043835ac9SSasha Levin 	q = disk->priv;
100186835cedSPrasad Joshi 
10023ecac800SPekka Enberg 	refcount_table_free_cache(&q->refcount_table);
1003e94cdf08SPekka Enberg 	l1_table_free_cache(&q->table);
1004af68c51aSLan Tianyu 	free(q->cluster_data);
1005af68c51aSLan Tianyu 	free(q->cluster_cache);
10063ecac800SPekka Enberg 	free(q->refcount_table.rf_table);
10076c6f79b6SPrasad Joshi 	free(q->table.l1_table);
100886835cedSPrasad Joshi 	free(q->header);
100986835cedSPrasad Joshi 	free(q);
101072133dd2SAsias He 
101172133dd2SAsias He 	return 0;
101286835cedSPrasad Joshi }
101386835cedSPrasad Joshi 
1014b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = {
1015b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
1016b1c84095SPekka Enberg 	.write_sector		= qcow_nowrite_sector,
1017b1c84095SPekka Enberg 	.close			= qcow_disk_close,
1018f10860caSPekka Enberg };
1019f10860caSPekka Enberg 
1020b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = {
1021b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
1022b1c84095SPekka Enberg 	.write_sector		= qcow_write_sector,
1023659f4186SPekka Enberg 	.flush			= qcow_disk_flush,
1024b1c84095SPekka Enberg 	.close			= qcow_disk_close,
102586835cedSPrasad Joshi };
102686835cedSPrasad Joshi 
10273ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q)
10283ecac800SPekka Enberg {
10293ecac800SPekka Enberg 	struct qcow_header *header = q->header;
10303ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
10313ecac800SPekka Enberg 
1032af68c51aSLan Tianyu 	rft->rf_size = (header->refcount_table_size * q->cluster_size)
1033af68c51aSLan Tianyu 		/ sizeof(u64);
10343ecac800SPekka Enberg 
10353ecac800SPekka Enberg 	rft->rf_table = calloc(rft->rf_size, sizeof(u64));
10363ecac800SPekka Enberg 	if (!rft->rf_table)
10373ecac800SPekka Enberg 		return -1;
10383ecac800SPekka Enberg 
10393ecac800SPekka Enberg 	rft->root = RB_ROOT;
10403ecac800SPekka Enberg 	INIT_LIST_HEAD(&rft->lru_list);
10413ecac800SPekka Enberg 
10423ecac800SPekka Enberg 	return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset);
10433ecac800SPekka Enberg }
10443ecac800SPekka Enberg 
104586835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q)
104686835cedSPrasad Joshi {
1047ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
1048473aaa2dSPekka Enberg 	struct qcow_l1_table *table = &q->table;
104986835cedSPrasad Joshi 
1050ad627d62SPekka Enberg 	table->table_size	= header->l1_size;
105186835cedSPrasad Joshi 
105200adcc1bSPrasad Joshi 	table->l1_table	= calloc(table->table_size, sizeof(u64));
105300adcc1bSPrasad Joshi 	if (!table->l1_table)
105486835cedSPrasad Joshi 		return -1;
105586835cedSPrasad Joshi 
1056659f4186SPekka Enberg 	return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset);
105786835cedSPrasad Joshi }
105886835cedSPrasad Joshi 
1059ad627d62SPekka Enberg static void *qcow2_read_header(int fd)
106086835cedSPrasad Joshi {
1061ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
1062ad627d62SPekka Enberg 	struct qcow_header *header;
106386835cedSPrasad Joshi 
1064ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
106586835cedSPrasad Joshi 	if (!header)
106686835cedSPrasad Joshi 		return NULL;
106786835cedSPrasad Joshi 
10680657f33dSPrasad Joshi 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) {
10690657f33dSPrasad Joshi 		free(header);
107086835cedSPrasad Joshi 		return NULL;
10710657f33dSPrasad Joshi 	}
107286835cedSPrasad Joshi 
1073ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1074ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1075ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
1076ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
1077ad627d62SPekka Enberg 	be32_to_cpus(&f_header.cluster_bits);
1078ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
1079ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
1080ad627d62SPekka Enberg 	be32_to_cpus(&f_header.l1_size);
1081ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
1082ad627d62SPekka Enberg 	be64_to_cpus(&f_header.refcount_table_offset);
1083ad627d62SPekka Enberg 	be32_to_cpus(&f_header.refcount_table_clusters);
1084ad627d62SPekka Enberg 	be32_to_cpus(&f_header.nb_snapshots);
1085ad627d62SPekka Enberg 	be64_to_cpus(&f_header.snapshots_offset);
1086ad627d62SPekka Enberg 
1087ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
1088ad627d62SPekka Enberg 		.size			= f_header.size,
1089ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
1090ad627d62SPekka Enberg 		.l1_size		= f_header.l1_size,
1091ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
1092ad627d62SPekka Enberg 		.l2_bits		= f_header.cluster_bits - 3,
10933ecac800SPekka Enberg 		.refcount_table_offset	= f_header.refcount_table_offset,
10943ecac800SPekka Enberg 		.refcount_table_size	= f_header.refcount_table_clusters,
1095ad627d62SPekka Enberg 	};
1096ad627d62SPekka Enberg 
1097ad627d62SPekka Enberg 	return header;
1098ad627d62SPekka Enberg }
1099ad627d62SPekka Enberg 
1100f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly)
1101ad627d62SPekka Enberg {
1102ad627d62SPekka Enberg 	struct disk_image *disk_image;
11037b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
11047b4eb530SPekka Enberg 	struct qcow_header *h;
11057b4eb530SPekka Enberg 	struct qcow *q;
1106ad627d62SPekka Enberg 
1107ad627d62SPekka Enberg 	q = calloc(1, sizeof(struct qcow));
1108ad627d62SPekka Enberg 	if (!q)
1109af68c51aSLan Tianyu 		return NULL;
1110ad627d62SPekka Enberg 
1111c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
1112ad627d62SPekka Enberg 	q->fd = fd;
11137b4eb530SPekka Enberg 
11147b4eb530SPekka Enberg 	l1t = &q->table;
11157b4eb530SPekka Enberg 
11167b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
11177b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
1118ad627d62SPekka Enberg 
1119ad627d62SPekka Enberg 	h = q->header = qcow2_read_header(fd);
1120ad627d62SPekka Enberg 	if (!h)
1121af68c51aSLan Tianyu 		goto free_qcow;
1122af68c51aSLan Tianyu 
1123af68c51aSLan Tianyu 	q->version = QCOW2_VERSION;
1124af68c51aSLan Tianyu 	q->csize_shift = (62 - (q->header->cluster_bits - 8));
1125af68c51aSLan Tianyu 	q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1;
1126af68c51aSLan Tianyu 	q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
1127af68c51aSLan Tianyu 	q->cluster_size = 1 << q->header->cluster_bits;
1128af68c51aSLan Tianyu 
1129af68c51aSLan Tianyu 	q->cluster_data = malloc(q->cluster_size);
1130af68c51aSLan Tianyu 	if (!q->cluster_data) {
1131af68c51aSLan Tianyu 		pr_warning("cluster data malloc error!");
1132af68c51aSLan Tianyu 		goto free_header;
1133af68c51aSLan Tianyu 	}
1134af68c51aSLan Tianyu 
1135af68c51aSLan Tianyu 	q->cluster_cache = malloc(q->cluster_size);
1136af68c51aSLan Tianyu 	if (!q->cluster_cache) {
1137af68c51aSLan Tianyu 		pr_warning("cluster cache malloc error!");
1138af68c51aSLan Tianyu 		goto free_cluster_data;
1139af68c51aSLan Tianyu 	}
1140ad627d62SPekka Enberg 
1141ad627d62SPekka Enberg 	if (qcow_read_l1_table(q) < 0)
1142af68c51aSLan Tianyu 		goto free_cluster_cache;
1143ad627d62SPekka Enberg 
11443ecac800SPekka Enberg 	if (qcow_read_refcount_table(q) < 0)
1145af68c51aSLan Tianyu 		goto free_l1_table;
11463ecac800SPekka Enberg 
11477d22135fSAsias He 	/*
11487d22135fSAsias He 	 * Do not use mmap use read/write instead
11497d22135fSAsias He 	 */
1150f10860caSPekka Enberg 	if (readonly)
115138c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1152f10860caSPekka Enberg 	else
115338c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1154f10860caSPekka Enberg 
1155ad627d62SPekka Enberg 	if (!disk_image)
1156af68c51aSLan Tianyu 		goto free_refcount_table;
1157ad627d62SPekka Enberg 	disk_image->priv = q;
1158ad627d62SPekka Enberg 
1159ad627d62SPekka Enberg 	return disk_image;
1160ad627d62SPekka Enberg 
1161af68c51aSLan Tianyu free_refcount_table:
1162af68c51aSLan Tianyu 	if (q->refcount_table.rf_table)
1163af68c51aSLan Tianyu 		free(q->refcount_table.rf_table);
1164af68c51aSLan Tianyu free_l1_table:
1165af68c51aSLan Tianyu 	if (q->table.l1_table)
1166ad627d62SPekka Enberg 		free(q->table.l1_table);
1167af68c51aSLan Tianyu free_cluster_cache:
1168af68c51aSLan Tianyu 	if (q->cluster_cache)
1169af68c51aSLan Tianyu 		free(q->cluster_cache);
1170af68c51aSLan Tianyu free_cluster_data:
1171af68c51aSLan Tianyu 	if (q->cluster_data)
1172af68c51aSLan Tianyu 		free(q->cluster_data);
1173af68c51aSLan Tianyu free_header:
1174af68c51aSLan Tianyu 	if (q->header)
1175ad627d62SPekka Enberg 		free(q->header);
1176af68c51aSLan Tianyu free_qcow:
1177af68c51aSLan Tianyu 	if (q)
1178ad627d62SPekka Enberg 		free(q);
1179ad627d62SPekka Enberg 
1180ad627d62SPekka Enberg 	return NULL;
1181ad627d62SPekka Enberg }
1182ad627d62SPekka Enberg 
1183ad627d62SPekka Enberg static bool qcow2_check_image(int fd)
1184ad627d62SPekka Enberg {
1185ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
1186ad627d62SPekka Enberg 
1187ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0)
1188ad627d62SPekka Enberg 		return false;
1189ad627d62SPekka Enberg 
1190ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1191ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1192ad627d62SPekka Enberg 
1193ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
1194ad627d62SPekka Enberg 		return false;
1195ad627d62SPekka Enberg 
1196ad627d62SPekka Enberg 	if (f_header.version != QCOW2_VERSION)
1197ad627d62SPekka Enberg 		return false;
1198ad627d62SPekka Enberg 
1199ad627d62SPekka Enberg 	return true;
1200ad627d62SPekka Enberg }
1201ad627d62SPekka Enberg 
1202ad627d62SPekka Enberg static void *qcow1_read_header(int fd)
1203ad627d62SPekka Enberg {
1204ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
1205ad627d62SPekka Enberg 	struct qcow_header *header;
1206ad627d62SPekka Enberg 
1207ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
1208ad627d62SPekka Enberg 	if (!header)
1209ad627d62SPekka Enberg 		return NULL;
1210ad627d62SPekka Enberg 
1211d39cefd2SSasha Levin 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) {
1212d39cefd2SSasha Levin 		free(header);
1213ad627d62SPekka Enberg 		return NULL;
1214d39cefd2SSasha Levin 	}
1215ad627d62SPekka Enberg 
1216ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1217ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1218ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
1219ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
1220ad627d62SPekka Enberg 	be32_to_cpus(&f_header.mtime);
1221ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
1222ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
1223ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
1224ad627d62SPekka Enberg 
1225ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
1226ad627d62SPekka Enberg 		.size			= f_header.size,
1227ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
1228ad627d62SPekka Enberg 		.l1_size		= f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)),
1229ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
1230ad627d62SPekka Enberg 		.l2_bits		= f_header.l2_bits,
1231ad627d62SPekka Enberg 	};
123286835cedSPrasad Joshi 
123386835cedSPrasad Joshi 	return header;
123486835cedSPrasad Joshi }
123586835cedSPrasad Joshi 
1236f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly)
123786835cedSPrasad Joshi {
123886835cedSPrasad Joshi 	struct disk_image *disk_image;
12397b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
12407b4eb530SPekka Enberg 	struct qcow_header *h;
12417b4eb530SPekka Enberg 	struct qcow *q;
124286835cedSPrasad Joshi 
124386835cedSPrasad Joshi 	q = calloc(1, sizeof(struct qcow));
124486835cedSPrasad Joshi 	if (!q)
1245af68c51aSLan Tianyu 		return NULL;
124686835cedSPrasad Joshi 
1247c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
124886835cedSPrasad Joshi 	q->fd = fd;
12497b4eb530SPekka Enberg 
12507b4eb530SPekka Enberg 	l1t = &q->table;
12517b4eb530SPekka Enberg 
12527b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
12537b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
125486835cedSPrasad Joshi 
125586835cedSPrasad Joshi 	h = q->header = qcow1_read_header(fd);
125686835cedSPrasad Joshi 	if (!h)
1257af68c51aSLan Tianyu 		goto free_qcow;
1258af68c51aSLan Tianyu 
1259af68c51aSLan Tianyu 	q->version = QCOW1_VERSION;
1260af68c51aSLan Tianyu 	q->cluster_size = 1 << q->header->cluster_bits;
1261af68c51aSLan Tianyu 	q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
1262af68c51aSLan Tianyu 
1263af68c51aSLan Tianyu 	q->cluster_data = malloc(q->cluster_size);
1264af68c51aSLan Tianyu 	if (!q->cluster_data) {
1265af68c51aSLan Tianyu 		pr_warning("cluster data malloc error!");
1266af68c51aSLan Tianyu 		goto free_header;
1267af68c51aSLan Tianyu 	}
1268af68c51aSLan Tianyu 
1269af68c51aSLan Tianyu 	q->cluster_cache = malloc(q->cluster_size);
1270af68c51aSLan Tianyu 	if (!q->cluster_cache) {
1271af68c51aSLan Tianyu 		pr_warning("cluster cache malloc error!");
1272af68c51aSLan Tianyu 		goto free_cluster_data;
1273af68c51aSLan Tianyu 	}
127486835cedSPrasad Joshi 
127586835cedSPrasad Joshi 	if (qcow_read_l1_table(q) < 0)
1276af68c51aSLan Tianyu 		goto free_cluster_cache;
127786835cedSPrasad Joshi 
12787d22135fSAsias He 	/*
12797d22135fSAsias He 	 * Do not use mmap use read/write instead
12807d22135fSAsias He 	 */
1281f10860caSPekka Enberg 	if (readonly)
128238c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1283f10860caSPekka Enberg 	else
128438c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1285f10860caSPekka Enberg 
128686835cedSPrasad Joshi 	if (!disk_image)
1287af68c51aSLan Tianyu 		goto free_l1_table;
128886835cedSPrasad Joshi 	disk_image->priv = q;
128986835cedSPrasad Joshi 
129086835cedSPrasad Joshi 	return disk_image;
129186835cedSPrasad Joshi 
1292af68c51aSLan Tianyu free_l1_table:
1293af68c51aSLan Tianyu 	if (q->table.l1_table)
12946c6f79b6SPrasad Joshi 		free(q->table.l1_table);
1295af68c51aSLan Tianyu free_cluster_cache:
1296af68c51aSLan Tianyu 	if (q->cluster_cache)
1297af68c51aSLan Tianyu 		free(q->cluster_cache);
1298af68c51aSLan Tianyu free_cluster_data:
1299af68c51aSLan Tianyu 	if (q->cluster_data)
1300af68c51aSLan Tianyu 		free(q->cluster_data);
1301af68c51aSLan Tianyu free_header:
1302af68c51aSLan Tianyu 	if (q->header)
130386835cedSPrasad Joshi 		free(q->header);
1304af68c51aSLan Tianyu free_qcow:
1305af68c51aSLan Tianyu 	if (q)
130686835cedSPrasad Joshi 		free(q);
130786835cedSPrasad Joshi 
130886835cedSPrasad Joshi 	return NULL;
130986835cedSPrasad Joshi }
131086835cedSPrasad Joshi 
1311ad627d62SPekka Enberg static bool qcow1_check_image(int fd)
131286835cedSPrasad Joshi {
1313ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
131486835cedSPrasad Joshi 
1315ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0)
1316ad627d62SPekka Enberg 		return false;
131786835cedSPrasad Joshi 
1318ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1319ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
132086835cedSPrasad Joshi 
1321ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
1322ad627d62SPekka Enberg 		return false;
132386835cedSPrasad Joshi 
1324ad627d62SPekka Enberg 	if (f_header.version != QCOW1_VERSION)
1325ad627d62SPekka Enberg 		return false;
132686835cedSPrasad Joshi 
1327ad627d62SPekka Enberg 	return true;
132886835cedSPrasad Joshi }
132986835cedSPrasad Joshi 
1330f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly)
133186835cedSPrasad Joshi {
1332ad627d62SPekka Enberg 	if (qcow1_check_image(fd))
1333f10860caSPekka Enberg 		return qcow1_probe(fd, readonly);
1334ad627d62SPekka Enberg 
1335ad627d62SPekka Enberg 	if (qcow2_check_image(fd))
1336f10860caSPekka Enberg 		return qcow2_probe(fd, readonly);
1337ad627d62SPekka Enberg 
1338ad627d62SPekka Enberg 	return NULL;
133986835cedSPrasad Joshi }
1340