xref: /kvmtool/disk/qcow.c (revision e184700adb686405cbb73f3d9b4902f3ad112019)
186835cedSPrasad Joshi #include "kvm/qcow.h"
286835cedSPrasad Joshi 
386835cedSPrasad Joshi #include "kvm/disk-image.h"
486835cedSPrasad Joshi #include "kvm/read-write.h"
5c0799eb9SPekka Enberg #include "kvm/mutex.h"
686835cedSPrasad Joshi #include "kvm/util.h"
786835cedSPrasad Joshi 
886835cedSPrasad Joshi #include <sys/types.h>
986835cedSPrasad Joshi #include <sys/stat.h>
1086835cedSPrasad Joshi #include <stdbool.h>
1186835cedSPrasad Joshi #include <stdlib.h>
1286835cedSPrasad Joshi #include <string.h>
1386835cedSPrasad Joshi #include <unistd.h>
1486835cedSPrasad Joshi #include <fcntl.h>
15af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB
16af68c51aSLan Tianyu #include <zlib.h>
17af68c51aSLan Tianyu #endif
1886835cedSPrasad Joshi 
1986835cedSPrasad Joshi #include <linux/byteorder.h>
20865c675fSPrasad Joshi #include <linux/kernel.h>
210df6b4d9SPekka Enberg #include <linux/types.h>
2286835cedSPrasad Joshi 
23*e184700aSLan Tianyu 
24*e184700aSLan Tianyu static inline int qcow_pwrite_sync(int fd,
25*e184700aSLan Tianyu 	void *buf, size_t count, off_t offset)
26*e184700aSLan Tianyu {
27*e184700aSLan Tianyu 	if (pwrite_in_full(fd, buf, count, offset) < 0)
28*e184700aSLan Tianyu 		return -1;
29*e184700aSLan Tianyu 
30*e184700aSLan Tianyu 	return fdatasync(fd);
31*e184700aSLan Tianyu }
32*e184700aSLan Tianyu 
33e94cdf08SPekka Enberg static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
343309045fSPrasad Joshi {
353309045fSPrasad Joshi 	struct rb_node **link = &(root->rb_node), *parent = NULL;
363309045fSPrasad Joshi 	u64 offset = new->offset;
373309045fSPrasad Joshi 
383309045fSPrasad Joshi 	/* search the tree */
393309045fSPrasad Joshi 	while (*link) {
40473d58ffSPekka Enberg 		struct qcow_l2_table *t;
413309045fSPrasad Joshi 
42473d58ffSPekka Enberg 		t = rb_entry(*link, struct qcow_l2_table, node);
433309045fSPrasad Joshi 		if (!t)
443309045fSPrasad Joshi 			goto error;
453309045fSPrasad Joshi 
463309045fSPrasad Joshi 		parent = *link;
473309045fSPrasad Joshi 
483309045fSPrasad Joshi 		if (t->offset > offset)
493309045fSPrasad Joshi 			link = &(*link)->rb_left;
503309045fSPrasad Joshi 		else if (t->offset < offset)
513309045fSPrasad Joshi 			link = &(*link)->rb_right;
523309045fSPrasad Joshi 		else
533309045fSPrasad Joshi 			goto out;
543309045fSPrasad Joshi 	}
553309045fSPrasad Joshi 
563309045fSPrasad Joshi 	/* add new node */
573309045fSPrasad Joshi 	rb_link_node(&new->node, parent, link);
583309045fSPrasad Joshi 	rb_insert_color(&new->node, root);
593309045fSPrasad Joshi out:
603309045fSPrasad Joshi 	return 0;
613309045fSPrasad Joshi error:
623309045fSPrasad Joshi 	return -1;
633309045fSPrasad Joshi }
643309045fSPrasad Joshi 
65e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset)
663309045fSPrasad Joshi {
673309045fSPrasad Joshi 	struct rb_node *link = root->rb_node;
683309045fSPrasad Joshi 
693309045fSPrasad Joshi 	while (link) {
70473d58ffSPekka Enberg 		struct qcow_l2_table *t;
713309045fSPrasad Joshi 
72473d58ffSPekka Enberg 		t = rb_entry(link, struct qcow_l2_table, node);
733309045fSPrasad Joshi 		if (!t)
743309045fSPrasad Joshi 			goto out;
753309045fSPrasad Joshi 
763309045fSPrasad Joshi 		if (t->offset > offset)
773309045fSPrasad Joshi 			link = link->rb_left;
783309045fSPrasad Joshi 		else if (t->offset < offset)
793309045fSPrasad Joshi 			link = link->rb_right;
803309045fSPrasad Joshi 		else
813309045fSPrasad Joshi 			return t;
823309045fSPrasad Joshi 	}
833309045fSPrasad Joshi out:
843309045fSPrasad Joshi 	return NULL;
853309045fSPrasad Joshi }
863309045fSPrasad Joshi 
87e94cdf08SPekka Enberg static void l1_table_free_cache(struct qcow_l1_table *l1t)
883309045fSPrasad Joshi {
897b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
903309045fSPrasad Joshi 	struct list_head *pos, *n;
91473d58ffSPekka Enberg 	struct qcow_l2_table *t;
923309045fSPrasad Joshi 
937b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
943309045fSPrasad Joshi 		/* Remove cache table from the list and RB tree */
953309045fSPrasad Joshi 		list_del(pos);
96473d58ffSPekka Enberg 		t = list_entry(pos, struct qcow_l2_table, list);
973309045fSPrasad Joshi 		rb_erase(&t->node, r);
983309045fSPrasad Joshi 
993309045fSPrasad Joshi 		/* Free the cached node */
1003309045fSPrasad Joshi 		free(t);
1013309045fSPrasad Joshi 	}
1023309045fSPrasad Joshi }
1033309045fSPrasad Joshi 
104a4e46515SPekka Enberg static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
105a4e46515SPekka Enberg {
106a4e46515SPekka Enberg 	struct qcow_header *header = q->header;
107a4e46515SPekka Enberg 	u64 size;
108a4e46515SPekka Enberg 
109aff88976SPekka Enberg 	if (!c->dirty)
110aff88976SPekka Enberg 		return 0;
111aff88976SPekka Enberg 
112a4e46515SPekka Enberg 	size = 1 << header->l2_bits;
113a4e46515SPekka Enberg 
114*e184700aSLan Tianyu 	if (qcow_pwrite_sync(q->fd, c->table,
115*e184700aSLan Tianyu 		size * sizeof(u64), c->offset) < 0)
116aff88976SPekka Enberg 		return -1;
117aff88976SPekka Enberg 
118aff88976SPekka Enberg 	c->dirty = 0;
119aff88976SPekka Enberg 
120aff88976SPekka Enberg 	return 0;
121a4e46515SPekka Enberg }
122a4e46515SPekka Enberg 
123473d58ffSPekka Enberg static int cache_table(struct qcow *q, struct qcow_l2_table *c)
1243309045fSPrasad Joshi {
1257b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
1267b4eb530SPekka Enberg 	struct rb_root *r = &l1t->root;
127473d58ffSPekka Enberg 	struct qcow_l2_table *lru;
1283309045fSPrasad Joshi 
1297b4eb530SPekka Enberg 	if (l1t->nr_cached == MAX_CACHE_NODES) {
1303309045fSPrasad Joshi 		/*
1313309045fSPrasad Joshi 		 * The node at the head of the list is least recently used
1323309045fSPrasad Joshi 		 * node. Remove it from the list and replaced with a new node.
1333309045fSPrasad Joshi 		 */
1347b4eb530SPekka Enberg 		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
1353309045fSPrasad Joshi 
1363309045fSPrasad Joshi 		/* Remove the node from the cache */
1373309045fSPrasad Joshi 		rb_erase(&lru->node, r);
1383309045fSPrasad Joshi 		list_del_init(&lru->list);
1397b4eb530SPekka Enberg 		l1t->nr_cached--;
1403309045fSPrasad Joshi 
1413309045fSPrasad Joshi 		/* Free the LRUed node */
1423309045fSPrasad Joshi 		free(lru);
1433309045fSPrasad Joshi 	}
1443309045fSPrasad Joshi 
1453309045fSPrasad Joshi 	/* Add new node in RB Tree: Helps in searching faster */
146e94cdf08SPekka Enberg 	if (l2_table_insert(r, c) < 0)
1473309045fSPrasad Joshi 		goto error;
1483309045fSPrasad Joshi 
1493309045fSPrasad Joshi 	/* Add in LRU replacement list */
1507b4eb530SPekka Enberg 	list_add_tail(&c->list, &l1t->lru_list);
1517b4eb530SPekka Enberg 	l1t->nr_cached++;
1523309045fSPrasad Joshi 
1533309045fSPrasad Joshi 	return 0;
1543309045fSPrasad Joshi error:
1553309045fSPrasad Joshi 	return -1;
1563309045fSPrasad Joshi }
1573309045fSPrasad Joshi 
158e94cdf08SPekka Enberg static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset)
1593309045fSPrasad Joshi {
1607b4eb530SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
161fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
1623309045fSPrasad Joshi 
163e94cdf08SPekka Enberg 	l2t = l2_table_lookup(&l1t->root, offset);
164fe8bdde0SPekka Enberg 	if (!l2t)
165fe8bdde0SPekka Enberg 		return NULL;
1663309045fSPrasad Joshi 
1673309045fSPrasad Joshi 	/* Update the LRU state, by moving the searched node to list tail */
1687b4eb530SPekka Enberg 	list_move_tail(&l2t->list, &l1t->lru_list);
1693309045fSPrasad Joshi 
170fe8bdde0SPekka Enberg 	return l2t;
1713309045fSPrasad Joshi }
1723309045fSPrasad Joshi 
1733309045fSPrasad Joshi /* Allocates a new node for caching L2 table */
174473d58ffSPekka Enberg static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset)
1753309045fSPrasad Joshi {
1763309045fSPrasad Joshi 	struct qcow_header *header = q->header;
177473d58ffSPekka Enberg 	struct qcow_l2_table *c;
1783309045fSPrasad Joshi 	u64 l2t_sz;
1793309045fSPrasad Joshi 	u64 size;
1803309045fSPrasad Joshi 
1813309045fSPrasad Joshi 	l2t_sz = 1 << header->l2_bits;
1823309045fSPrasad Joshi 	size   = sizeof(*c) + l2t_sz * sizeof(u64);
1833309045fSPrasad Joshi 	c      = calloc(1, size);
1843309045fSPrasad Joshi 	if (!c)
1853309045fSPrasad Joshi 		goto out;
1863309045fSPrasad Joshi 
1873309045fSPrasad Joshi 	c->offset = offset;
1883309045fSPrasad Joshi 	RB_CLEAR_NODE(&c->node);
1893309045fSPrasad Joshi 	INIT_LIST_HEAD(&c->list);
1903309045fSPrasad Joshi out:
1913309045fSPrasad Joshi 	return c;
1923309045fSPrasad Joshi }
1933309045fSPrasad Joshi 
194742fce76SPrasad Joshi static inline u64 get_l1_index(struct qcow *q, u64 offset)
19586835cedSPrasad Joshi {
196ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
19786835cedSPrasad Joshi 
19886835cedSPrasad Joshi 	return offset >> (header->l2_bits + header->cluster_bits);
19986835cedSPrasad Joshi }
20086835cedSPrasad Joshi 
201742fce76SPrasad Joshi static inline u64 get_l2_index(struct qcow *q, u64 offset)
20286835cedSPrasad Joshi {
203ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
20486835cedSPrasad Joshi 
20586835cedSPrasad Joshi 	return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1);
20686835cedSPrasad Joshi }
20786835cedSPrasad Joshi 
208742fce76SPrasad Joshi static inline u64 get_cluster_offset(struct qcow *q, u64 offset)
20986835cedSPrasad Joshi {
210ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
21186835cedSPrasad Joshi 
21286835cedSPrasad Joshi 	return offset & ((1 << header->cluster_bits)-1);
21386835cedSPrasad Joshi }
21486835cedSPrasad Joshi 
215fe8bdde0SPekka Enberg static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset)
2163309045fSPrasad Joshi {
2173309045fSPrasad Joshi 	struct qcow_header *header = q->header;
218fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
2193309045fSPrasad Joshi 	u64 size;
2203309045fSPrasad Joshi 
2213309045fSPrasad Joshi 	size = 1 << header->l2_bits;
2223309045fSPrasad Joshi 
2233309045fSPrasad Joshi 	/* search an entry for offset in cache */
224e94cdf08SPekka Enberg 	l2t = l2_table_search(q, offset);
225fe8bdde0SPekka Enberg 	if (l2t)
226fe8bdde0SPekka Enberg 		return l2t;
2273309045fSPrasad Joshi 
2283309045fSPrasad Joshi 	/* allocate new node for caching l2 table */
229fe8bdde0SPekka Enberg 	l2t = new_cache_table(q, offset);
230fe8bdde0SPekka Enberg 	if (!l2t)
2313309045fSPrasad Joshi 		goto error;
2323309045fSPrasad Joshi 
2333309045fSPrasad Joshi 	/* table not cached: read from the disk */
234fe8bdde0SPekka Enberg 	if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0)
2353309045fSPrasad Joshi 		goto error;
2363309045fSPrasad Joshi 
2373309045fSPrasad Joshi 	/* cache the table */
238fe8bdde0SPekka Enberg 	if (cache_table(q, l2t) < 0)
2393309045fSPrasad Joshi 		goto error;
2403309045fSPrasad Joshi 
241fe8bdde0SPekka Enberg 	return l2t;
2423309045fSPrasad Joshi error:
243fe8bdde0SPekka Enberg 	free(l2t);
244fe8bdde0SPekka Enberg 	return NULL;
2453309045fSPrasad Joshi }
2463309045fSPrasad Joshi 
247af68c51aSLan Tianyu static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size,
248af68c51aSLan Tianyu 	const u8 *buf, int buf_size)
249af68c51aSLan Tianyu {
250af68c51aSLan Tianyu #ifdef CONFIG_HAS_ZLIB
251af68c51aSLan Tianyu 	z_stream strm1, *strm = &strm1;
252af68c51aSLan Tianyu 	int ret, out_len;
253af68c51aSLan Tianyu 
254af68c51aSLan Tianyu 	memset(strm, 0, sizeof(*strm));
255af68c51aSLan Tianyu 
256af68c51aSLan Tianyu 	strm->next_in = (u8 *)buf;
257af68c51aSLan Tianyu 	strm->avail_in = buf_size;
258af68c51aSLan Tianyu 	strm->next_out = out_buf;
259af68c51aSLan Tianyu 	strm->avail_out = out_buf_size;
260af68c51aSLan Tianyu 
261af68c51aSLan Tianyu 	ret = inflateInit2(strm, -12);
262af68c51aSLan Tianyu 	if (ret != Z_OK)
263af68c51aSLan Tianyu 		return -1;
264af68c51aSLan Tianyu 
265af68c51aSLan Tianyu 	ret = inflate(strm, Z_FINISH);
266af68c51aSLan Tianyu 	out_len = strm->next_out - out_buf;
267af68c51aSLan Tianyu 	if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
268af68c51aSLan Tianyu 		out_len != out_buf_size) {
269af68c51aSLan Tianyu 		inflateEnd(strm);
270af68c51aSLan Tianyu 		return -1;
271af68c51aSLan Tianyu 	}
272af68c51aSLan Tianyu 
273af68c51aSLan Tianyu 	inflateEnd(strm);
274af68c51aSLan Tianyu 	return 0;
275af68c51aSLan Tianyu #else
276af68c51aSLan Tianyu 	return -1;
277af68c51aSLan Tianyu #endif
278af68c51aSLan Tianyu }
279af68c51aSLan Tianyu 
280af68c51aSLan Tianyu static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset,
281af68c51aSLan Tianyu 	void *dst, u32 dst_len)
28286835cedSPrasad Joshi {
283ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
2843fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
2853fb67b93SPekka Enberg 	struct qcow_l2_table *l2t;
286742fce76SPrasad Joshi 	u64 clust_offset;
287742fce76SPrasad Joshi 	u64 clust_start;
2883fb67b93SPekka Enberg 	u64 l2t_offset;
289a51948ceSPekka Enberg 	size_t length;
2903fb67b93SPekka Enberg 	u64 l2t_size;
291742fce76SPrasad Joshi 	u64 l1_idx;
292742fce76SPrasad Joshi 	u64 l2_idx;
293af68c51aSLan Tianyu 	int coffset;
294af68c51aSLan Tianyu 	int csize;
29586835cedSPrasad Joshi 
296c5e0624bSPrasad Joshi 	l1_idx = get_l1_index(q, offset);
2973fb67b93SPekka Enberg 	if (l1_idx >= l1t->table_size)
298c0799eb9SPekka Enberg 		return -1;
29986835cedSPrasad Joshi 
3003dac48d4SPrasad Joshi 	clust_offset = get_cluster_offset(q, offset);
301af68c51aSLan Tianyu 	if (clust_offset >= q->cluster_size)
302c0799eb9SPekka Enberg 		return -1;
3033dac48d4SPrasad Joshi 
304af68c51aSLan Tianyu 	length = q->cluster_size - clust_offset;
3053dac48d4SPrasad Joshi 	if (length > dst_len)
3063dac48d4SPrasad Joshi 		length = dst_len;
3073dac48d4SPrasad Joshi 
308c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
309b2ebe61bSPekka Enberg 
3103fb67b93SPekka Enberg 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
3113fb67b93SPekka Enberg 	if (!l2t_offset)
3123dac48d4SPrasad Joshi 		goto zero_cluster;
31386835cedSPrasad Joshi 
3143fb67b93SPekka Enberg 	l2t_size = 1 << header->l2_bits;
31586835cedSPrasad Joshi 
3163309045fSPrasad Joshi 	/* read and cache level 2 table */
3173fb67b93SPekka Enberg 	l2t = qcow_read_l2_table(q, l2t_offset);
3183fb67b93SPekka Enberg 	if (!l2t)
319b6edb0ecSSasha Levin 		goto out_error;
32086835cedSPrasad Joshi 
321c5e0624bSPrasad Joshi 	l2_idx = get_l2_index(q, offset);
3223fb67b93SPekka Enberg 	if (l2_idx >= l2t_size)
323b6edb0ecSSasha Levin 		goto out_error;
32486835cedSPrasad Joshi 
3253fb67b93SPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
326af68c51aSLan Tianyu 	if (clust_start & QCOW1_OFLAG_COMPRESSED) {
327af68c51aSLan Tianyu 		coffset = clust_start & q->cluster_offset_mask;
328af68c51aSLan Tianyu 		csize = clust_start >> (63 - q->header->cluster_bits);
329af68c51aSLan Tianyu 		csize &= (q->cluster_size - 1);
330af68c51aSLan Tianyu 
331af68c51aSLan Tianyu 		if (pread_in_full(q->fd, q->cluster_data, csize,
332af68c51aSLan Tianyu 				  coffset) < 0) {
333b2ebe61bSPekka Enberg 			goto out_error;
334b2ebe61bSPekka Enberg 		}
335b2ebe61bSPekka Enberg 
336af68c51aSLan Tianyu 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
337af68c51aSLan Tianyu 					q->cluster_data, csize) < 0) {
338af68c51aSLan Tianyu 			goto out_error;
339af68c51aSLan Tianyu 		}
340af68c51aSLan Tianyu 
341af68c51aSLan Tianyu 		memcpy(dst, q->cluster_cache + clust_offset, length);
342af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
343af68c51aSLan Tianyu 	} else{
34486835cedSPrasad Joshi 		if (!clust_start)
3453dac48d4SPrasad Joshi 			goto zero_cluster;
34686835cedSPrasad Joshi 
347c0799eb9SPekka Enberg 		mutex_unlock(&q->mutex);
34886835cedSPrasad Joshi 
349af68c51aSLan Tianyu 		if (pread_in_full(q->fd, dst, length,
350af68c51aSLan Tianyu 				  clust_start + clust_offset) < 0)
351c0799eb9SPekka Enberg 			return -1;
352af68c51aSLan Tianyu 	}
353c0799eb9SPekka Enberg 
3543dac48d4SPrasad Joshi 	return length;
35586835cedSPrasad Joshi 
356179b71f0SPekka Enberg zero_cluster:
357c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
358179b71f0SPekka Enberg 	memset(dst, 0, length);
359c0799eb9SPekka Enberg 	return length;
360179b71f0SPekka Enberg 
36186835cedSPrasad Joshi out_error:
362c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
363179b71f0SPekka Enberg 	length = -1;
364c0799eb9SPekka Enberg 	return -1;
3653dac48d4SPrasad Joshi }
366b6edb0ecSSasha Levin 
367af68c51aSLan Tianyu static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset,
368af68c51aSLan Tianyu 	void *dst, u32 dst_len)
369af68c51aSLan Tianyu {
370af68c51aSLan Tianyu 	struct qcow_header *header = q->header;
371af68c51aSLan Tianyu 	struct qcow_l1_table *l1t = &q->table;
372af68c51aSLan Tianyu 	struct qcow_l2_table *l2t;
373af68c51aSLan Tianyu 	u64 clust_offset;
374af68c51aSLan Tianyu 	u64 clust_start;
375af68c51aSLan Tianyu 	u64 l2t_offset;
376af68c51aSLan Tianyu 	size_t length;
377af68c51aSLan Tianyu 	u64 l2t_size;
378af68c51aSLan Tianyu 	u64 l1_idx;
379af68c51aSLan Tianyu 	u64 l2_idx;
380af68c51aSLan Tianyu 	int coffset;
381af68c51aSLan Tianyu 	int sector_offset;
382af68c51aSLan Tianyu 	int nb_csectors;
383af68c51aSLan Tianyu 	int csize;
384af68c51aSLan Tianyu 
385af68c51aSLan Tianyu 	l1_idx = get_l1_index(q, offset);
386af68c51aSLan Tianyu 	if (l1_idx >= l1t->table_size)
387af68c51aSLan Tianyu 		return -1;
388af68c51aSLan Tianyu 
389af68c51aSLan Tianyu 	clust_offset = get_cluster_offset(q, offset);
390af68c51aSLan Tianyu 	if (clust_offset >= q->cluster_size)
391af68c51aSLan Tianyu 		return -1;
392af68c51aSLan Tianyu 
393af68c51aSLan Tianyu 	length = q->cluster_size - clust_offset;
394af68c51aSLan Tianyu 	if (length > dst_len)
395af68c51aSLan Tianyu 		length = dst_len;
396af68c51aSLan Tianyu 
397af68c51aSLan Tianyu 	mutex_lock(&q->mutex);
398af68c51aSLan Tianyu 
399af68c51aSLan Tianyu 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
400af68c51aSLan Tianyu 
401af68c51aSLan Tianyu 	l2t_offset &= ~QCOW2_OFLAG_COPIED;
402af68c51aSLan Tianyu 	if (!l2t_offset)
403af68c51aSLan Tianyu 		goto zero_cluster;
404af68c51aSLan Tianyu 
405af68c51aSLan Tianyu 	l2t_size = 1 << header->l2_bits;
406af68c51aSLan Tianyu 
407af68c51aSLan Tianyu 	/* read and cache level 2 table */
408af68c51aSLan Tianyu 	l2t = qcow_read_l2_table(q, l2t_offset);
409af68c51aSLan Tianyu 	if (!l2t)
410af68c51aSLan Tianyu 		goto out_error;
411af68c51aSLan Tianyu 
412af68c51aSLan Tianyu 	l2_idx = get_l2_index(q, offset);
413af68c51aSLan Tianyu 	if (l2_idx >= l2t_size)
414af68c51aSLan Tianyu 		goto out_error;
415af68c51aSLan Tianyu 
416af68c51aSLan Tianyu 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
417af68c51aSLan Tianyu 	if (clust_start & QCOW2_OFLAG_COMPRESSED) {
418af68c51aSLan Tianyu 		coffset = clust_start & q->cluster_offset_mask;
419af68c51aSLan Tianyu 		nb_csectors = ((clust_start >> q->csize_shift)
420af68c51aSLan Tianyu 			& q->csize_mask) + 1;
421af68c51aSLan Tianyu 		sector_offset = coffset & (SECTOR_SIZE - 1);
422af68c51aSLan Tianyu 		csize = nb_csectors * SECTOR_SIZE - sector_offset;
423af68c51aSLan Tianyu 
424af68c51aSLan Tianyu 		if (pread_in_full(q->fd, q->cluster_data,
425af68c51aSLan Tianyu 				  nb_csectors * SECTOR_SIZE,
426af68c51aSLan Tianyu 				  coffset & ~(SECTOR_SIZE - 1)) < 0) {
427af68c51aSLan Tianyu 			goto out_error;
428af68c51aSLan Tianyu 		}
429af68c51aSLan Tianyu 
430af68c51aSLan Tianyu 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
431af68c51aSLan Tianyu 					q->cluster_data + sector_offset,
432af68c51aSLan Tianyu 					csize) < 0) {
433af68c51aSLan Tianyu 			goto out_error;
434af68c51aSLan Tianyu 		}
435af68c51aSLan Tianyu 
436af68c51aSLan Tianyu 		memcpy(dst, q->cluster_cache + clust_offset, length);
437af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
438af68c51aSLan Tianyu 	} else{
439af68c51aSLan Tianyu 		clust_start &= QCOW2_OFFSET_MASK;
440af68c51aSLan Tianyu 		if (!clust_start)
441af68c51aSLan Tianyu 			goto zero_cluster;
442af68c51aSLan Tianyu 
443af68c51aSLan Tianyu 		mutex_unlock(&q->mutex);
444af68c51aSLan Tianyu 
445af68c51aSLan Tianyu 		if (pread_in_full(q->fd, dst, length,
446af68c51aSLan Tianyu 				  clust_start + clust_offset) < 0)
447af68c51aSLan Tianyu 			return -1;
448af68c51aSLan Tianyu 	}
449af68c51aSLan Tianyu 
450af68c51aSLan Tianyu 	return length;
451af68c51aSLan Tianyu 
452af68c51aSLan Tianyu zero_cluster:
453af68c51aSLan Tianyu 	mutex_unlock(&q->mutex);
454af68c51aSLan Tianyu 	memset(dst, 0, length);
455af68c51aSLan Tianyu 	return length;
456af68c51aSLan Tianyu 
457af68c51aSLan Tianyu out_error:
458af68c51aSLan Tianyu 	mutex_unlock(&q->mutex);
459af68c51aSLan Tianyu 	length = -1;
460af68c51aSLan Tianyu 	return -1;
461af68c51aSLan Tianyu }
462af68c51aSLan Tianyu 
4632534c9b6SSasha Levin static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector,
464af68c51aSLan Tianyu 	void *dst, u32 dst_len)
4653dac48d4SPrasad Joshi {
46643835ac9SSasha Levin 	struct qcow *q = disk->priv;
467ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
468d8eea993SPekka Enberg 	u32 nr_read;
4690df6b4d9SPekka Enberg 	u64 offset;
4700df6b4d9SPekka Enberg 	char *buf;
4713dac48d4SPrasad Joshi 	u32 nr;
4723dac48d4SPrasad Joshi 
4730df6b4d9SPekka Enberg 	buf		= dst;
474d8eea993SPekka Enberg 	nr_read		= 0;
4750df6b4d9SPekka Enberg 
476d8eea993SPekka Enberg 	while (nr_read < dst_len) {
4773dac48d4SPrasad Joshi 		offset		= sector << SECTOR_SHIFT;
4783dac48d4SPrasad Joshi 		if (offset >= header->size)
4790df6b4d9SPekka Enberg 			return -1;
4803dac48d4SPrasad Joshi 
481af68c51aSLan Tianyu 		if (q->version == QCOW1_VERSION)
482af68c51aSLan Tianyu 			nr = qcow1_read_cluster(q, offset, buf,
483af68c51aSLan Tianyu 				dst_len - nr_read);
484af68c51aSLan Tianyu 		else
485af68c51aSLan Tianyu 			nr = qcow2_read_cluster(q, offset, buf,
486af68c51aSLan Tianyu 				dst_len - nr_read);
487af68c51aSLan Tianyu 
488a51948ceSPekka Enberg 		if (nr <= 0)
4890df6b4d9SPekka Enberg 			return -1;
4903dac48d4SPrasad Joshi 
491d8eea993SPekka Enberg 		nr_read		+= nr;
4923dac48d4SPrasad Joshi 		buf		+= nr;
4933dac48d4SPrasad Joshi 		sector		+= (nr >> SECTOR_SHIFT);
4943dac48d4SPrasad Joshi 	}
4950df6b4d9SPekka Enberg 
49672133dd2SAsias He 	return dst_len;
49786835cedSPrasad Joshi }
49886835cedSPrasad Joshi 
4992534c9b6SSasha Levin static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector,
5005af21162SSasha Levin 				const struct iovec *iov, int iovcount, void *param)
5012534c9b6SSasha Levin {
5022534c9b6SSasha Levin 	ssize_t nr, total = 0;
5032534c9b6SSasha Levin 
5042534c9b6SSasha Levin 	while (iovcount--) {
5052534c9b6SSasha Levin 		nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len);
5062534c9b6SSasha Levin 		if (nr != (ssize_t)iov->iov_len) {
5072534c9b6SSasha Levin 			pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
5082534c9b6SSasha Levin 			return -1;
5092534c9b6SSasha Levin 		}
5102534c9b6SSasha Levin 
5112534c9b6SSasha Levin 		sector	+= iov->iov_len >> SECTOR_SHIFT;
5122534c9b6SSasha Levin 		iov++;
5132534c9b6SSasha Levin 		total	+= nr;
5142534c9b6SSasha Levin 	}
5152534c9b6SSasha Levin 
5162534c9b6SSasha Levin 	return total;
5172534c9b6SSasha Levin }
5182534c9b6SSasha Levin 
5193ecac800SPekka Enberg static void refcount_table_free_cache(struct qcow_refcount_table *rft)
5203ecac800SPekka Enberg {
5213ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
5223ecac800SPekka Enberg 	struct list_head *pos, *n;
5233ecac800SPekka Enberg 	struct qcow_refcount_block *t;
5243ecac800SPekka Enberg 
5253ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
5263ecac800SPekka Enberg 		list_del(pos);
5273ecac800SPekka Enberg 		t = list_entry(pos, struct qcow_refcount_block, list);
5283ecac800SPekka Enberg 		rb_erase(&t->node, r);
5293ecac800SPekka Enberg 
5303ecac800SPekka Enberg 		free(t);
5313ecac800SPekka Enberg 	}
5323ecac800SPekka Enberg }
5333ecac800SPekka Enberg 
5343ecac800SPekka Enberg static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new)
5353ecac800SPekka Enberg {
5363ecac800SPekka Enberg 	struct rb_node **link = &(root->rb_node), *parent = NULL;
5373ecac800SPekka Enberg 	u64 offset = new->offset;
5383ecac800SPekka Enberg 
5393ecac800SPekka Enberg 	/* search the tree */
5403ecac800SPekka Enberg 	while (*link) {
5413ecac800SPekka Enberg 		struct qcow_refcount_block *t;
5423ecac800SPekka Enberg 
5433ecac800SPekka Enberg 		t = rb_entry(*link, struct qcow_refcount_block, node);
5443ecac800SPekka Enberg 		if (!t)
5453ecac800SPekka Enberg 			goto error;
5463ecac800SPekka Enberg 
5473ecac800SPekka Enberg 		parent = *link;
5483ecac800SPekka Enberg 
5493ecac800SPekka Enberg 		if (t->offset > offset)
5503ecac800SPekka Enberg 			link = &(*link)->rb_left;
5513ecac800SPekka Enberg 		else if (t->offset < offset)
5523ecac800SPekka Enberg 			link = &(*link)->rb_right;
5533ecac800SPekka Enberg 		else
5543ecac800SPekka Enberg 			goto out;
5553ecac800SPekka Enberg 	}
5563ecac800SPekka Enberg 
5573ecac800SPekka Enberg 	/* add new node */
5583ecac800SPekka Enberg 	rb_link_node(&new->node, parent, link);
5593ecac800SPekka Enberg 	rb_insert_color(&new->node, root);
5603ecac800SPekka Enberg out:
5613ecac800SPekka Enberg 	return 0;
5623ecac800SPekka Enberg error:
5633ecac800SPekka Enberg 	return -1;
5643ecac800SPekka Enberg }
5653ecac800SPekka Enberg 
5663ecac800SPekka Enberg static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb)
5673ecac800SPekka Enberg {
5683ecac800SPekka Enberg 	if (!rfb->dirty)
5693ecac800SPekka Enberg 		return 0;
5703ecac800SPekka Enberg 
571*e184700aSLan Tianyu 	if (qcow_pwrite_sync(q->fd, rfb->entries,
572*e184700aSLan Tianyu 		rfb->size * sizeof(u16), rfb->offset) < 0)
5733ecac800SPekka Enberg 		return -1;
5743ecac800SPekka Enberg 
5753ecac800SPekka Enberg 	rfb->dirty = 0;
5763ecac800SPekka Enberg 
5773ecac800SPekka Enberg 	return 0;
5783ecac800SPekka Enberg }
5793ecac800SPekka Enberg 
5803ecac800SPekka Enberg static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
5813ecac800SPekka Enberg {
5823ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
5833ecac800SPekka Enberg 	struct rb_root *r = &rft->root;
5843ecac800SPekka Enberg 	struct qcow_refcount_block *lru;
5853ecac800SPekka Enberg 
5863ecac800SPekka Enberg 	if (rft->nr_cached == MAX_CACHE_NODES) {
5873ecac800SPekka Enberg 		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
5883ecac800SPekka Enberg 
5893ecac800SPekka Enberg 		rb_erase(&lru->node, r);
5903ecac800SPekka Enberg 		list_del_init(&lru->list);
5913ecac800SPekka Enberg 		rft->nr_cached--;
5923ecac800SPekka Enberg 
5933ecac800SPekka Enberg 		free(lru);
5943ecac800SPekka Enberg 	}
5953ecac800SPekka Enberg 
5963ecac800SPekka Enberg 	if (refcount_block_insert(r, c) < 0)
5973ecac800SPekka Enberg 		goto error;
5983ecac800SPekka Enberg 
5993ecac800SPekka Enberg 	list_add_tail(&c->list, &rft->lru_list);
6003ecac800SPekka Enberg 	rft->nr_cached++;
6013ecac800SPekka Enberg 
6023ecac800SPekka Enberg 	return 0;
6033ecac800SPekka Enberg error:
6043ecac800SPekka Enberg 	return -1;
6053ecac800SPekka Enberg }
6063ecac800SPekka Enberg 
6073ecac800SPekka Enberg static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset)
6083ecac800SPekka Enberg {
6093ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
6103ecac800SPekka Enberg 
611af68c51aSLan Tianyu 	rfb = malloc(sizeof *rfb + q->cluster_size);
6123ecac800SPekka Enberg 	if (!rfb)
6133ecac800SPekka Enberg 		return NULL;
6143ecac800SPekka Enberg 
6153ecac800SPekka Enberg 	rfb->offset = rfb_offset;
616af68c51aSLan Tianyu 	rfb->size = q->cluster_size / sizeof(u16);
6173ecac800SPekka Enberg 	RB_CLEAR_NODE(&rfb->node);
6183ecac800SPekka Enberg 	INIT_LIST_HEAD(&rfb->list);
6193ecac800SPekka Enberg 
6203ecac800SPekka Enberg 	return rfb;
6213ecac800SPekka Enberg }
6223ecac800SPekka Enberg 
6233ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset)
6243ecac800SPekka Enberg {
6253ecac800SPekka Enberg 	struct rb_node *link = root->rb_node;
6263ecac800SPekka Enberg 
6273ecac800SPekka Enberg 	while (link) {
6283ecac800SPekka Enberg 		struct qcow_refcount_block *t;
6293ecac800SPekka Enberg 
6303ecac800SPekka Enberg 		t = rb_entry(link, struct qcow_refcount_block, node);
6313ecac800SPekka Enberg 		if (!t)
6323ecac800SPekka Enberg 			goto out;
6333ecac800SPekka Enberg 
6343ecac800SPekka Enberg 		if (t->offset > offset)
6353ecac800SPekka Enberg 			link = link->rb_left;
6363ecac800SPekka Enberg 		else if (t->offset < offset)
6373ecac800SPekka Enberg 			link = link->rb_right;
6383ecac800SPekka Enberg 		else
6393ecac800SPekka Enberg 			return t;
6403ecac800SPekka Enberg 	}
6413ecac800SPekka Enberg out:
6423ecac800SPekka Enberg 	return NULL;
6433ecac800SPekka Enberg }
6443ecac800SPekka Enberg 
6453ecac800SPekka Enberg static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset)
6463ecac800SPekka Enberg {
6473ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
6483ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
6493ecac800SPekka Enberg 
6503ecac800SPekka Enberg 	rfb = refcount_block_lookup(&rft->root, offset);
6513ecac800SPekka Enberg 	if (!rfb)
6523ecac800SPekka Enberg 		return NULL;
6533ecac800SPekka Enberg 
6543ecac800SPekka Enberg 	/* Update the LRU state, by moving the searched node to list tail */
6553ecac800SPekka Enberg 	list_move_tail(&rfb->list, &rft->lru_list);
6563ecac800SPekka Enberg 
6573ecac800SPekka Enberg 	return rfb;
6583ecac800SPekka Enberg }
6593ecac800SPekka Enberg 
6603ecac800SPekka Enberg static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx)
6613ecac800SPekka Enberg {
6623ecac800SPekka Enberg 	struct qcow_header *header = q->header;
6633ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
6643ecac800SPekka Enberg 	struct qcow_refcount_block *rfb;
6653ecac800SPekka Enberg 	u64 rfb_offset;
6663ecac800SPekka Enberg 	u64 rft_idx;
6673ecac800SPekka Enberg 
6683ecac800SPekka Enberg 	rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT);
6693ecac800SPekka Enberg 	if (rft_idx >= rft->rf_size)
6703ecac800SPekka Enberg 		return NULL;
6713ecac800SPekka Enberg 
6723ecac800SPekka Enberg 	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
6733ecac800SPekka Enberg 
674*e184700aSLan Tianyu 	if (!rfb_offset) {
675*e184700aSLan Tianyu 		pr_warning("Don't support to grow refcount table");
676*e184700aSLan Tianyu 		return NULL;
677*e184700aSLan Tianyu 	}
678*e184700aSLan Tianyu 
6793ecac800SPekka Enberg 	rfb = refcount_block_search(q, rfb_offset);
6803ecac800SPekka Enberg 	if (rfb)
6813ecac800SPekka Enberg 		return rfb;
6823ecac800SPekka Enberg 
6833ecac800SPekka Enberg 	rfb = new_refcount_block(q, rfb_offset);
6843ecac800SPekka Enberg 	if (!rfb)
6853ecac800SPekka Enberg 		return NULL;
6863ecac800SPekka Enberg 
6873ecac800SPekka Enberg 	if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0)
6883ecac800SPekka Enberg 		goto error_free_rfb;
6893ecac800SPekka Enberg 
6903ecac800SPekka Enberg 	if (cache_refcount_block(q, rfb) < 0)
6913ecac800SPekka Enberg 		goto error_free_rfb;
6923ecac800SPekka Enberg 
6933ecac800SPekka Enberg 	return rfb;
6943ecac800SPekka Enberg 
6953ecac800SPekka Enberg error_free_rfb:
6963ecac800SPekka Enberg 	free(rfb);
6973ecac800SPekka Enberg 
6983ecac800SPekka Enberg 	return NULL;
6993ecac800SPekka Enberg }
7003ecac800SPekka Enberg 
701*e184700aSLan Tianyu static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
702*e184700aSLan Tianyu {
703*e184700aSLan Tianyu 	struct qcow_refcount_block *rfb = NULL;
704*e184700aSLan Tianyu 	struct qcow_header *header = q->header;
705*e184700aSLan Tianyu 	u64 rfb_idx;
706*e184700aSLan Tianyu 
707*e184700aSLan Tianyu 	rfb = qcow_read_refcount_block(q, clust_idx);
708*e184700aSLan Tianyu 	if (!rfb) {
709*e184700aSLan Tianyu 		pr_warning("Error while reading refcount table");
710*e184700aSLan Tianyu 		return -1;
711*e184700aSLan Tianyu 	}
712*e184700aSLan Tianyu 
713*e184700aSLan Tianyu 	rfb_idx = clust_idx & (((1ULL <<
714*e184700aSLan Tianyu 		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
715*e184700aSLan Tianyu 
716*e184700aSLan Tianyu 	if (rfb_idx >= rfb->size) {
717*e184700aSLan Tianyu 		pr_warning("L1: refcount block index out of bounds");
718*e184700aSLan Tianyu 		return -1;
719*e184700aSLan Tianyu 	}
720*e184700aSLan Tianyu 
721*e184700aSLan Tianyu 	return be16_to_cpu(rfb->entries[rfb_idx]);
722*e184700aSLan Tianyu }
723*e184700aSLan Tianyu 
724*e184700aSLan Tianyu static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
725*e184700aSLan Tianyu {
726*e184700aSLan Tianyu 	struct qcow_refcount_block *rfb = NULL;
727*e184700aSLan Tianyu 	struct qcow_header *header = q->header;
728*e184700aSLan Tianyu 	u16 refcount;
729*e184700aSLan Tianyu 	u64 rfb_idx;
730*e184700aSLan Tianyu 
731*e184700aSLan Tianyu 	rfb = qcow_read_refcount_block(q, clust_idx);
732*e184700aSLan Tianyu 	if (!rfb) {
733*e184700aSLan Tianyu 		pr_warning("error while reading refcount table");
734*e184700aSLan Tianyu 		return -1;
735*e184700aSLan Tianyu 	}
736*e184700aSLan Tianyu 
737*e184700aSLan Tianyu 	rfb_idx = clust_idx & (((1ULL <<
738*e184700aSLan Tianyu 		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
739*e184700aSLan Tianyu 	if (rfb_idx >= rfb->size) {
740*e184700aSLan Tianyu 		pr_warning("refcount block index out of bounds");
741*e184700aSLan Tianyu 		return -1;
742*e184700aSLan Tianyu 	}
743*e184700aSLan Tianyu 
744*e184700aSLan Tianyu 	refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
745*e184700aSLan Tianyu 	rfb->entries[rfb_idx] = cpu_to_be16(refcount);
746*e184700aSLan Tianyu 	rfb->dirty = 1;
747*e184700aSLan Tianyu 
748*e184700aSLan Tianyu 	/* write refcount block */
749*e184700aSLan Tianyu 	if (write_refcount_block(q, rfb) < 0) {
750*e184700aSLan Tianyu 		pr_warning("refcount block index out of bounds");
751*e184700aSLan Tianyu 		return -1;
752*e184700aSLan Tianyu 	}
753*e184700aSLan Tianyu 
754*e184700aSLan Tianyu 	/* update free_clust_idx since refcount becomes zero */
755*e184700aSLan Tianyu 	if (!refcount && clust_idx < q->free_clust_idx)
756*e184700aSLan Tianyu 		q->free_clust_idx = clust_idx;
757*e184700aSLan Tianyu 
758*e184700aSLan Tianyu 	return 0;
759*e184700aSLan Tianyu }
760*e184700aSLan Tianyu 
761*e184700aSLan Tianyu static void  qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size)
762*e184700aSLan Tianyu {
763*e184700aSLan Tianyu 	struct qcow_header *header = q->header;
764*e184700aSLan Tianyu 	u64 start, end, offset;
765*e184700aSLan Tianyu 
766*e184700aSLan Tianyu 	start = clust_start & ~(q->cluster_size - 1);
767*e184700aSLan Tianyu 	end = (clust_start + size - 1) & ~(q->cluster_size - 1);
768*e184700aSLan Tianyu 	for (offset = start; offset <= end; offset += q->cluster_size)
769*e184700aSLan Tianyu 		update_cluster_refcount(q, offset >> header->cluster_bits, -1);
770*e184700aSLan Tianyu }
771*e184700aSLan Tianyu 
772865c675fSPrasad Joshi /*
773*e184700aSLan Tianyu  * Allocate clusters according to the size. Find a postion that
774*e184700aSLan Tianyu  * can satisfy the size. free_clust_idx is initialized to zero and
775*e184700aSLan Tianyu  * Record last position.
776865c675fSPrasad Joshi  */
777*e184700aSLan Tianyu static u64 qcow_alloc_clusters(struct qcow *q, u64 size)
778*e184700aSLan Tianyu {
779*e184700aSLan Tianyu 	struct qcow_header *header = q->header;
780*e184700aSLan Tianyu 	u16 clust_refcount;
781*e184700aSLan Tianyu 	u32 clust_idx, i;
782*e184700aSLan Tianyu 	u64 clust_num;
783*e184700aSLan Tianyu 
784*e184700aSLan Tianyu 	clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
785*e184700aSLan Tianyu 
786*e184700aSLan Tianyu again:
787*e184700aSLan Tianyu 	for (i = 0; i < clust_num; i++) {
788*e184700aSLan Tianyu 		clust_idx = q->free_clust_idx++;
789*e184700aSLan Tianyu 		clust_refcount = qcow_get_refcount(q, clust_idx);
790*e184700aSLan Tianyu 		if (clust_refcount < 0)
791*e184700aSLan Tianyu 			return -1;
792*e184700aSLan Tianyu 		else if (clust_refcount > 0)
793*e184700aSLan Tianyu 			goto again;
794*e184700aSLan Tianyu 	}
795*e184700aSLan Tianyu 
796*e184700aSLan Tianyu 	for (i = 0; i < clust_num; i++)
797*e184700aSLan Tianyu 		if (update_cluster_refcount(q,
798*e184700aSLan Tianyu 			q->free_clust_idx - clust_num + i, 1))
799*e184700aSLan Tianyu 			return -1;
800*e184700aSLan Tianyu 
801*e184700aSLan Tianyu 	return (q->free_clust_idx - clust_num) << header->cluster_bits;
802*e184700aSLan Tianyu }
803*e184700aSLan Tianyu 
804*e184700aSLan Tianyu static int qcow_write_l1_table(struct qcow *q)
805*e184700aSLan Tianyu {
806*e184700aSLan Tianyu 	struct qcow_l1_table *l1t = &q->table;
807*e184700aSLan Tianyu 	struct qcow_header *header = q->header;
808*e184700aSLan Tianyu 
809*e184700aSLan Tianyu 	if (qcow_pwrite_sync(q->fd, l1t->l1_table,
810*e184700aSLan Tianyu 		l1t->table_size * sizeof(u64),
811*e184700aSLan Tianyu 		header->l1_table_offset) < 0)
812*e184700aSLan Tianyu 		return -1;
813*e184700aSLan Tianyu 
814*e184700aSLan Tianyu 	return 0;
815*e184700aSLan Tianyu }
816*e184700aSLan Tianyu 
817*e184700aSLan Tianyu /*
818*e184700aSLan Tianyu  * Get l2 table. If the table has been copied, read table directly.
819*e184700aSLan Tianyu  * If the table exists, allocate a new cluster and copy the table
820*e184700aSLan Tianyu  * to the new cluster.
821*e184700aSLan Tianyu  */
822*e184700aSLan Tianyu static int get_cluster_table(struct qcow *q, u64 offset,
823*e184700aSLan Tianyu 	struct qcow_l2_table **result_l2t, u64 *result_l2_index)
824865c675fSPrasad Joshi {
825865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
8263fb67b93SPekka Enberg 	struct qcow_l1_table *l1t = &q->table;
827fe8bdde0SPekka Enberg 	struct qcow_l2_table *l2t;
828865c675fSPrasad Joshi 	u64 l1t_idx;
829*e184700aSLan Tianyu 	u64 l2t_offset;
830865c675fSPrasad Joshi 	u64 l2t_idx;
831*e184700aSLan Tianyu 	u64 l2t_size;
832*e184700aSLan Tianyu 	u64 l2t_new_offset;
833865c675fSPrasad Joshi 
8343fb67b93SPekka Enberg 	l2t_size = 1 << header->l2_bits;
835865c675fSPrasad Joshi 
836865c675fSPrasad Joshi 	l1t_idx = get_l1_index(q, offset);
8373fb67b93SPekka Enberg 	if (l1t_idx >= l1t->table_size)
838c0799eb9SPekka Enberg 		return -1;
839865c675fSPrasad Joshi 
840865c675fSPrasad Joshi 	l2t_idx = get_l2_index(q, offset);
8413fb67b93SPekka Enberg 	if (l2t_idx >= l2t_size)
842c0799eb9SPekka Enberg 		return -1;
843865c675fSPrasad Joshi 
844*e184700aSLan Tianyu 	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
845*e184700aSLan Tianyu 	if (l2t_offset & QCOW2_OFLAG_COPIED) {
846*e184700aSLan Tianyu 		l2t_offset &= ~QCOW2_OFLAG_COPIED;
847*e184700aSLan Tianyu 		l2t = qcow_read_l2_table(q, l2t_offset);
848*e184700aSLan Tianyu 		if (!l2t)
849*e184700aSLan Tianyu 			goto error;
850*e184700aSLan Tianyu 	} else {
851*e184700aSLan Tianyu 		l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64));
852*e184700aSLan Tianyu 		if (l2t_new_offset < 0)
853*e184700aSLan Tianyu 			goto error;
854*e184700aSLan Tianyu 
855*e184700aSLan Tianyu 		l2t = new_cache_table(q, l2t_new_offset);
856*e184700aSLan Tianyu 		if (!l2t)
857*e184700aSLan Tianyu 			goto free_cluster;
858*e184700aSLan Tianyu 
859*e184700aSLan Tianyu 		if (l2t_offset) {
860*e184700aSLan Tianyu 			l2t = qcow_read_l2_table(q, l2t_offset);
861*e184700aSLan Tianyu 			if (!l2t)
862*e184700aSLan Tianyu 				goto free_cache;
863*e184700aSLan Tianyu 		} else
864*e184700aSLan Tianyu 			memset(l2t->table, 0x00, l2t_size * sizeof(u64));
865*e184700aSLan Tianyu 
866*e184700aSLan Tianyu 		/* write l2 table */
867*e184700aSLan Tianyu 		l2t->dirty = 1;
868*e184700aSLan Tianyu 		if (qcow_l2_cache_write(q, l2t) < 0)
869*e184700aSLan Tianyu 			goto free_cache;
870*e184700aSLan Tianyu 
871*e184700aSLan Tianyu 		/* cache l2 table */
872*e184700aSLan Tianyu 		if (cache_table(q, l2t))
873*e184700aSLan Tianyu 			goto free_cache;
874*e184700aSLan Tianyu 
875*e184700aSLan Tianyu 		/* update the l1 talble */
876*e184700aSLan Tianyu 		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
877*e184700aSLan Tianyu 			| QCOW2_OFLAG_COPIED);
878*e184700aSLan Tianyu 		if (qcow_write_l1_table(q)) {
879*e184700aSLan Tianyu 			pr_warning("Update l1 table error");
880*e184700aSLan Tianyu 			goto free_cache;
881*e184700aSLan Tianyu 		}
882*e184700aSLan Tianyu 
883*e184700aSLan Tianyu 		/* free old cluster */
884*e184700aSLan Tianyu 		qcow_free_clusters(q, l2t_offset, q->cluster_size);
885*e184700aSLan Tianyu 	}
886*e184700aSLan Tianyu 
887*e184700aSLan Tianyu 	*result_l2t = l2t;
888*e184700aSLan Tianyu 	*result_l2_index = l2t_idx;
889*e184700aSLan Tianyu 
890*e184700aSLan Tianyu 	return 0;
891*e184700aSLan Tianyu 
892*e184700aSLan Tianyu free_cache:
893*e184700aSLan Tianyu 	free(l2t);
894*e184700aSLan Tianyu 
895*e184700aSLan Tianyu free_cluster:
896*e184700aSLan Tianyu 	qcow_free_clusters(q, l2t_new_offset, q->cluster_size);
897*e184700aSLan Tianyu 
898*e184700aSLan Tianyu error:
899*e184700aSLan Tianyu 	return -1;
900*e184700aSLan Tianyu }
901*e184700aSLan Tianyu 
902*e184700aSLan Tianyu /*
903*e184700aSLan Tianyu  * If the cluster has been copied, write data directly. If not,
904*e184700aSLan Tianyu  * read the original data and write it to the new cluster with
905*e184700aSLan Tianyu  * modification.
906*e184700aSLan Tianyu  */
907*e184700aSLan Tianyu static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
908*e184700aSLan Tianyu 		void *buf, u32 src_len)
909*e184700aSLan Tianyu {
910*e184700aSLan Tianyu 	struct qcow_l2_table *l2t;
911*e184700aSLan Tianyu 	u64 clust_new_start;
912*e184700aSLan Tianyu 	u64 clust_start;
913*e184700aSLan Tianyu 	u64 clust_flags;
914*e184700aSLan Tianyu 	u64 clust_off;
915*e184700aSLan Tianyu 	u64 l2t_idx;
916*e184700aSLan Tianyu 	u64 len;
917*e184700aSLan Tianyu 
918*e184700aSLan Tianyu 	l2t = NULL;
919*e184700aSLan Tianyu 
920865c675fSPrasad Joshi 	clust_off = get_cluster_offset(q, offset);
921*e184700aSLan Tianyu 	if (clust_off >= q->cluster_size)
922c0799eb9SPekka Enberg 		return -1;
923865c675fSPrasad Joshi 
924*e184700aSLan Tianyu 	len = q->cluster_size - clust_off;
925865c675fSPrasad Joshi 	if (len > src_len)
926865c675fSPrasad Joshi 		len = src_len;
927865c675fSPrasad Joshi 
928c0799eb9SPekka Enberg 	mutex_lock(&q->mutex);
929c0799eb9SPekka Enberg 
930*e184700aSLan Tianyu 	if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
931*e184700aSLan Tianyu 		pr_warning("Get l2 table error");
932121dd76eSPekka Enberg 		goto error;
933121dd76eSPekka Enberg 	}
934865c675fSPrasad Joshi 
935b2ebe61bSPekka Enberg 	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
936af68c51aSLan Tianyu 	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
937b2ebe61bSPekka Enberg 
938af68c51aSLan Tianyu 	clust_start &= QCOW2_OFFSET_MASK;
939af68c51aSLan Tianyu 	if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
940*e184700aSLan Tianyu 		clust_new_start	= qcow_alloc_clusters(q, q->cluster_size);
941*e184700aSLan Tianyu 		if (clust_new_start < 0) {
942*e184700aSLan Tianyu 			pr_warning("Cluster alloc error");
9433ecac800SPekka Enberg 			goto error;
9443ecac800SPekka Enberg 		}
9453ecac800SPekka Enberg 
946*e184700aSLan Tianyu 		offset &= ~(q->cluster_size - 1);
9473ecac800SPekka Enberg 
948*e184700aSLan Tianyu 		/* if clust_start is not zero, read the original data*/
949*e184700aSLan Tianyu 		if (clust_start) {
950c0799eb9SPekka Enberg 			mutex_unlock(&q->mutex);
951*e184700aSLan Tianyu 			if (qcow2_read_cluster(q, offset, q->copy_buff,
952*e184700aSLan Tianyu 				q->cluster_size) < 0) {
953*e184700aSLan Tianyu 				pr_warning("Read copy cluster error");
954*e184700aSLan Tianyu 				qcow_free_clusters(q, clust_new_start,
955*e184700aSLan Tianyu 					q->cluster_size);
956*e184700aSLan Tianyu 				return -1;
957*e184700aSLan Tianyu 			}
958*e184700aSLan Tianyu 			mutex_lock(&q->mutex);
959*e184700aSLan Tianyu 		} else
960*e184700aSLan Tianyu 			memset(q->copy_buff, 0x00, q->cluster_size);
961*e184700aSLan Tianyu 
962*e184700aSLan Tianyu 		memcpy(q->copy_buff + clust_off, buf, len);
963c0799eb9SPekka Enberg 
964a4e46515SPekka Enberg 		 /* Write actual data */
965*e184700aSLan Tianyu 		if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
966*e184700aSLan Tianyu 			clust_new_start) < 0)
967*e184700aSLan Tianyu 			goto free_cluster;
968a4e46515SPekka Enberg 
969*e184700aSLan Tianyu 		/* update l2 table*/
970*e184700aSLan Tianyu 		l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
971*e184700aSLan Tianyu 			| QCOW2_OFLAG_COPIED);
972*e184700aSLan Tianyu 		l2t->dirty = 1;
973*e184700aSLan Tianyu 
974*e184700aSLan Tianyu 		if (qcow_l2_cache_write(q, l2t))
975*e184700aSLan Tianyu 			goto free_cluster;
976*e184700aSLan Tianyu 
977*e184700aSLan Tianyu 		/* free old cluster*/
978*e184700aSLan Tianyu 		if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
979*e184700aSLan Tianyu 			int size;
980*e184700aSLan Tianyu 			size = ((clust_start >> q->csize_shift) &
981*e184700aSLan Tianyu 				q->csize_mask) + 1;
982*e184700aSLan Tianyu 			size *= 512;
983*e184700aSLan Tianyu 			clust_start &= q->cluster_offset_mask;
984*e184700aSLan Tianyu 			clust_start &= ~511;
985*e184700aSLan Tianyu 
986*e184700aSLan Tianyu 			qcow_free_clusters(q, clust_start, size);
987*e184700aSLan Tianyu 		} else if (clust_start)
988*e184700aSLan Tianyu 			qcow_free_clusters(q, clust_start, q->cluster_size);
989*e184700aSLan Tianyu 
990*e184700aSLan Tianyu 	} else {
991*e184700aSLan Tianyu 		/* Write actual data */
992*e184700aSLan Tianyu 		if (pwrite_in_full(q->fd, buf, len,
993*e184700aSLan Tianyu 			clust_start + clust_off) < 0)
994*e184700aSLan Tianyu 			goto error;
995*e184700aSLan Tianyu 	}
996*e184700aSLan Tianyu 	mutex_unlock(&q->mutex);
997865c675fSPrasad Joshi 	return len;
9983309045fSPrasad Joshi 
999*e184700aSLan Tianyu free_cluster:
1000*e184700aSLan Tianyu 	qcow_free_clusters(q, clust_new_start, q->cluster_size);
1001*e184700aSLan Tianyu 
1002865c675fSPrasad Joshi error:
1003c0799eb9SPekka Enberg 	mutex_unlock(&q->mutex);
1004865c675fSPrasad Joshi 	return -1;
1005865c675fSPrasad Joshi }
1006865c675fSPrasad Joshi 
10072534c9b6SSasha Levin static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len)
100886835cedSPrasad Joshi {
1009865c675fSPrasad Joshi 	struct qcow *q = disk->priv;
1010865c675fSPrasad Joshi 	struct qcow_header *header = q->header;
1011c4acb611SIngo Molnar 	u32 nr_written;
10120df6b4d9SPekka Enberg 	char *buf;
1013865c675fSPrasad Joshi 	u64 offset;
1014865c675fSPrasad Joshi 	ssize_t nr;
1015865c675fSPrasad Joshi 
10160df6b4d9SPekka Enberg 	buf		= src;
10170df6b4d9SPekka Enberg 	nr_written	= 0;
1018865c675fSPrasad Joshi 	offset		= sector << SECTOR_SHIFT;
10190df6b4d9SPekka Enberg 
10200df6b4d9SPekka Enberg 	while (nr_written < src_len) {
1021865c675fSPrasad Joshi 		if (offset >= header->size)
10220df6b4d9SPekka Enberg 			return -1;
1023865c675fSPrasad Joshi 
1024b1c84095SPekka Enberg 		nr = qcow_write_cluster(q, offset, buf, src_len - nr_written);
1025865c675fSPrasad Joshi 		if (nr < 0)
10260df6b4d9SPekka Enberg 			return -1;
1027865c675fSPrasad Joshi 
10280df6b4d9SPekka Enberg 		nr_written	+= nr;
1029865c675fSPrasad Joshi 		buf		+= nr;
1030865c675fSPrasad Joshi 		offset		+= nr;
1031865c675fSPrasad Joshi 	}
10320df6b4d9SPekka Enberg 
103372133dd2SAsias He 	return nr_written;
103486835cedSPrasad Joshi }
103586835cedSPrasad Joshi 
10362534c9b6SSasha Levin static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector,
10375af21162SSasha Levin 				const struct iovec *iov, int iovcount, void *param)
10382534c9b6SSasha Levin {
10392534c9b6SSasha Levin 	ssize_t nr, total = 0;
10402534c9b6SSasha Levin 
10412534c9b6SSasha Levin 	while (iovcount--) {
10422534c9b6SSasha Levin 		nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len);
10432534c9b6SSasha Levin 		if (nr != (ssize_t)iov->iov_len) {
10442534c9b6SSasha Levin 			pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
10452534c9b6SSasha Levin 			return -1;
10462534c9b6SSasha Levin 		}
10472534c9b6SSasha Levin 
10482534c9b6SSasha Levin 		sector	+= iov->iov_len >> SECTOR_SHIFT;
10492534c9b6SSasha Levin 		iov++;
10502534c9b6SSasha Levin 		total	+= nr;
10512534c9b6SSasha Levin 	}
10522534c9b6SSasha Levin 
10532534c9b6SSasha Levin 	return total;
10542534c9b6SSasha Levin }
10552534c9b6SSasha Levin 
1056659f4186SPekka Enberg static int qcow_disk_flush(struct disk_image *disk)
1057659f4186SPekka Enberg {
105873984b11SPekka Enberg 	struct qcow *q = disk->priv;
10593ecac800SPekka Enberg 	struct qcow_refcount_table *rft;
1060a4e46515SPekka Enberg 	struct list_head *pos, *n;
10617b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
106273984b11SPekka Enberg 
10637b4eb530SPekka Enberg 	l1t = &q->table;
10643ecac800SPekka Enberg 	rft = &q->refcount_table;
106573984b11SPekka Enberg 
1066a4e46515SPekka Enberg 	mutex_lock(&q->mutex);
1067a4e46515SPekka Enberg 
10683ecac800SPekka Enberg 	list_for_each_safe(pos, n, &rft->lru_list) {
10693ecac800SPekka Enberg 		struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list);
10703ecac800SPekka Enberg 
10713ecac800SPekka Enberg 		if (write_refcount_block(q, c) < 0)
10723ecac800SPekka Enberg 			goto error_unlock;
10733ecac800SPekka Enberg 	}
10743ecac800SPekka Enberg 
10757b4eb530SPekka Enberg 	list_for_each_safe(pos, n, &l1t->lru_list) {
1076a4e46515SPekka Enberg 		struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
1077a4e46515SPekka Enberg 
1078a4e46515SPekka Enberg 		if (qcow_l2_cache_write(q, c) < 0)
1079a4e46515SPekka Enberg 			goto error_unlock;
1080a4e46515SPekka Enberg 	}
1081a4e46515SPekka Enberg 
1082*e184700aSLan Tianyu 	if (qcow_write_l1_table < 0)
1083a4e46515SPekka Enberg 		goto error_unlock;
1084a4e46515SPekka Enberg 
1085a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
108673984b11SPekka Enberg 
1087659f4186SPekka Enberg 	return fsync(disk->fd);
1088a4e46515SPekka Enberg 
1089a4e46515SPekka Enberg error_unlock:
1090a4e46515SPekka Enberg 	mutex_unlock(&q->mutex);
1091a4e46515SPekka Enberg 	return -1;
1092659f4186SPekka Enberg }
1093659f4186SPekka Enberg 
1094b1c84095SPekka Enberg static int qcow_disk_close(struct disk_image *disk)
109586835cedSPrasad Joshi {
109686835cedSPrasad Joshi 	struct qcow *q;
109786835cedSPrasad Joshi 
109843835ac9SSasha Levin 	if (!disk)
109972133dd2SAsias He 		return 0;
110086835cedSPrasad Joshi 
110143835ac9SSasha Levin 	q = disk->priv;
110286835cedSPrasad Joshi 
11033ecac800SPekka Enberg 	refcount_table_free_cache(&q->refcount_table);
1104e94cdf08SPekka Enberg 	l1_table_free_cache(&q->table);
1105*e184700aSLan Tianyu 	free(q->copy_buff);
1106af68c51aSLan Tianyu 	free(q->cluster_data);
1107af68c51aSLan Tianyu 	free(q->cluster_cache);
11083ecac800SPekka Enberg 	free(q->refcount_table.rf_table);
11096c6f79b6SPrasad Joshi 	free(q->table.l1_table);
111086835cedSPrasad Joshi 	free(q->header);
111186835cedSPrasad Joshi 	free(q);
111272133dd2SAsias He 
111372133dd2SAsias He 	return 0;
111486835cedSPrasad Joshi }
111586835cedSPrasad Joshi 
1116b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_readonly_ops = {
1117b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
1118b1c84095SPekka Enberg 	.close			= qcow_disk_close,
1119f10860caSPekka Enberg };
1120f10860caSPekka Enberg 
1121b1c84095SPekka Enberg static struct disk_image_operations qcow_disk_ops = {
1122b1c84095SPekka Enberg 	.read_sector		= qcow_read_sector,
1123b1c84095SPekka Enberg 	.write_sector		= qcow_write_sector,
1124659f4186SPekka Enberg 	.flush			= qcow_disk_flush,
1125b1c84095SPekka Enberg 	.close			= qcow_disk_close,
112686835cedSPrasad Joshi };
112786835cedSPrasad Joshi 
11283ecac800SPekka Enberg static int qcow_read_refcount_table(struct qcow *q)
11293ecac800SPekka Enberg {
11303ecac800SPekka Enberg 	struct qcow_header *header = q->header;
11313ecac800SPekka Enberg 	struct qcow_refcount_table *rft = &q->refcount_table;
11323ecac800SPekka Enberg 
1133af68c51aSLan Tianyu 	rft->rf_size = (header->refcount_table_size * q->cluster_size)
1134af68c51aSLan Tianyu 		/ sizeof(u64);
11353ecac800SPekka Enberg 
11363ecac800SPekka Enberg 	rft->rf_table = calloc(rft->rf_size, sizeof(u64));
11373ecac800SPekka Enberg 	if (!rft->rf_table)
11383ecac800SPekka Enberg 		return -1;
11393ecac800SPekka Enberg 
11403ecac800SPekka Enberg 	rft->root = RB_ROOT;
11413ecac800SPekka Enberg 	INIT_LIST_HEAD(&rft->lru_list);
11423ecac800SPekka Enberg 
11433ecac800SPekka Enberg 	return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset);
11443ecac800SPekka Enberg }
11453ecac800SPekka Enberg 
114686835cedSPrasad Joshi static int qcow_read_l1_table(struct qcow *q)
114786835cedSPrasad Joshi {
1148ad627d62SPekka Enberg 	struct qcow_header *header = q->header;
1149473aaa2dSPekka Enberg 	struct qcow_l1_table *table = &q->table;
115086835cedSPrasad Joshi 
1151ad627d62SPekka Enberg 	table->table_size	= header->l1_size;
115286835cedSPrasad Joshi 
115300adcc1bSPrasad Joshi 	table->l1_table	= calloc(table->table_size, sizeof(u64));
115400adcc1bSPrasad Joshi 	if (!table->l1_table)
115586835cedSPrasad Joshi 		return -1;
115686835cedSPrasad Joshi 
1157659f4186SPekka Enberg 	return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset);
115886835cedSPrasad Joshi }
115986835cedSPrasad Joshi 
1160ad627d62SPekka Enberg static void *qcow2_read_header(int fd)
116186835cedSPrasad Joshi {
1162ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
1163ad627d62SPekka Enberg 	struct qcow_header *header;
116486835cedSPrasad Joshi 
1165ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
116686835cedSPrasad Joshi 	if (!header)
116786835cedSPrasad Joshi 		return NULL;
116886835cedSPrasad Joshi 
11690657f33dSPrasad Joshi 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) {
11700657f33dSPrasad Joshi 		free(header);
117186835cedSPrasad Joshi 		return NULL;
11720657f33dSPrasad Joshi 	}
117386835cedSPrasad Joshi 
1174ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1175ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1176ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
1177ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
1178ad627d62SPekka Enberg 	be32_to_cpus(&f_header.cluster_bits);
1179ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
1180ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
1181ad627d62SPekka Enberg 	be32_to_cpus(&f_header.l1_size);
1182ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
1183ad627d62SPekka Enberg 	be64_to_cpus(&f_header.refcount_table_offset);
1184ad627d62SPekka Enberg 	be32_to_cpus(&f_header.refcount_table_clusters);
1185ad627d62SPekka Enberg 	be32_to_cpus(&f_header.nb_snapshots);
1186ad627d62SPekka Enberg 	be64_to_cpus(&f_header.snapshots_offset);
1187ad627d62SPekka Enberg 
1188ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
1189ad627d62SPekka Enberg 		.size			= f_header.size,
1190ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
1191ad627d62SPekka Enberg 		.l1_size		= f_header.l1_size,
1192ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
1193ad627d62SPekka Enberg 		.l2_bits		= f_header.cluster_bits - 3,
11943ecac800SPekka Enberg 		.refcount_table_offset	= f_header.refcount_table_offset,
11953ecac800SPekka Enberg 		.refcount_table_size	= f_header.refcount_table_clusters,
1196ad627d62SPekka Enberg 	};
1197ad627d62SPekka Enberg 
1198ad627d62SPekka Enberg 	return header;
1199ad627d62SPekka Enberg }
1200ad627d62SPekka Enberg 
1201f10860caSPekka Enberg static struct disk_image *qcow2_probe(int fd, bool readonly)
1202ad627d62SPekka Enberg {
1203ad627d62SPekka Enberg 	struct disk_image *disk_image;
12047b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
12057b4eb530SPekka Enberg 	struct qcow_header *h;
12067b4eb530SPekka Enberg 	struct qcow *q;
1207ad627d62SPekka Enberg 
1208ad627d62SPekka Enberg 	q = calloc(1, sizeof(struct qcow));
1209ad627d62SPekka Enberg 	if (!q)
1210af68c51aSLan Tianyu 		return NULL;
1211ad627d62SPekka Enberg 
1212c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
1213ad627d62SPekka Enberg 	q->fd = fd;
12147b4eb530SPekka Enberg 
12157b4eb530SPekka Enberg 	l1t = &q->table;
12167b4eb530SPekka Enberg 
12177b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
12187b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
1219ad627d62SPekka Enberg 
1220ad627d62SPekka Enberg 	h = q->header = qcow2_read_header(fd);
1221ad627d62SPekka Enberg 	if (!h)
1222af68c51aSLan Tianyu 		goto free_qcow;
1223af68c51aSLan Tianyu 
1224af68c51aSLan Tianyu 	q->version = QCOW2_VERSION;
1225af68c51aSLan Tianyu 	q->csize_shift = (62 - (q->header->cluster_bits - 8));
1226af68c51aSLan Tianyu 	q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1;
1227af68c51aSLan Tianyu 	q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
1228af68c51aSLan Tianyu 	q->cluster_size = 1 << q->header->cluster_bits;
1229af68c51aSLan Tianyu 
1230*e184700aSLan Tianyu 	q->copy_buff = malloc(q->cluster_size);
1231*e184700aSLan Tianyu 	if (!q->copy_buff) {
1232*e184700aSLan Tianyu 		pr_warning("copy buff malloc error");
1233*e184700aSLan Tianyu 		goto free_header;
1234*e184700aSLan Tianyu 	}
1235*e184700aSLan Tianyu 
1236af68c51aSLan Tianyu 	q->cluster_data = malloc(q->cluster_size);
1237af68c51aSLan Tianyu 	if (!q->cluster_data) {
1238*e184700aSLan Tianyu 		pr_warning("cluster data malloc error");
1239*e184700aSLan Tianyu 		goto free_copy_buff;
1240af68c51aSLan Tianyu 	}
1241af68c51aSLan Tianyu 
1242af68c51aSLan Tianyu 	q->cluster_cache = malloc(q->cluster_size);
1243af68c51aSLan Tianyu 	if (!q->cluster_cache) {
1244*e184700aSLan Tianyu 		pr_warning("cluster cache malloc error");
1245af68c51aSLan Tianyu 		goto free_cluster_data;
1246af68c51aSLan Tianyu 	}
1247ad627d62SPekka Enberg 
1248ad627d62SPekka Enberg 	if (qcow_read_l1_table(q) < 0)
1249af68c51aSLan Tianyu 		goto free_cluster_cache;
1250ad627d62SPekka Enberg 
12513ecac800SPekka Enberg 	if (qcow_read_refcount_table(q) < 0)
1252af68c51aSLan Tianyu 		goto free_l1_table;
12533ecac800SPekka Enberg 
12547d22135fSAsias He 	/*
12557d22135fSAsias He 	 * Do not use mmap use read/write instead
12567d22135fSAsias He 	 */
1257f10860caSPekka Enberg 	if (readonly)
125838c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1259f10860caSPekka Enberg 	else
126038c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1261f10860caSPekka Enberg 
1262ad627d62SPekka Enberg 	if (!disk_image)
1263af68c51aSLan Tianyu 		goto free_refcount_table;
1264f41a132bSSasha Levin 
126559e8453aSSasha Levin 	disk_image->async = 0;
1266ad627d62SPekka Enberg 	disk_image->priv = q;
1267ad627d62SPekka Enberg 
1268ad627d62SPekka Enberg 	return disk_image;
1269ad627d62SPekka Enberg 
1270af68c51aSLan Tianyu free_refcount_table:
1271af68c51aSLan Tianyu 	if (q->refcount_table.rf_table)
1272af68c51aSLan Tianyu 		free(q->refcount_table.rf_table);
1273af68c51aSLan Tianyu free_l1_table:
1274af68c51aSLan Tianyu 	if (q->table.l1_table)
1275ad627d62SPekka Enberg 		free(q->table.l1_table);
1276af68c51aSLan Tianyu free_cluster_cache:
1277af68c51aSLan Tianyu 	if (q->cluster_cache)
1278af68c51aSLan Tianyu 		free(q->cluster_cache);
1279af68c51aSLan Tianyu free_cluster_data:
1280af68c51aSLan Tianyu 	if (q->cluster_data)
1281af68c51aSLan Tianyu 		free(q->cluster_data);
1282*e184700aSLan Tianyu free_copy_buff:
1283*e184700aSLan Tianyu 	if (q->copy_buff)
1284*e184700aSLan Tianyu 		free(q->copy_buff);
1285af68c51aSLan Tianyu free_header:
1286af68c51aSLan Tianyu 	if (q->header)
1287ad627d62SPekka Enberg 		free(q->header);
1288af68c51aSLan Tianyu free_qcow:
1289af68c51aSLan Tianyu 	if (q)
1290ad627d62SPekka Enberg 		free(q);
1291ad627d62SPekka Enberg 
1292ad627d62SPekka Enberg 	return NULL;
1293ad627d62SPekka Enberg }
1294ad627d62SPekka Enberg 
1295ad627d62SPekka Enberg static bool qcow2_check_image(int fd)
1296ad627d62SPekka Enberg {
1297ad627d62SPekka Enberg 	struct qcow2_header_disk f_header;
1298ad627d62SPekka Enberg 
1299ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0)
1300ad627d62SPekka Enberg 		return false;
1301ad627d62SPekka Enberg 
1302ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1303ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1304ad627d62SPekka Enberg 
1305ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
1306ad627d62SPekka Enberg 		return false;
1307ad627d62SPekka Enberg 
1308ad627d62SPekka Enberg 	if (f_header.version != QCOW2_VERSION)
1309ad627d62SPekka Enberg 		return false;
1310ad627d62SPekka Enberg 
1311ad627d62SPekka Enberg 	return true;
1312ad627d62SPekka Enberg }
1313ad627d62SPekka Enberg 
1314ad627d62SPekka Enberg static void *qcow1_read_header(int fd)
1315ad627d62SPekka Enberg {
1316ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
1317ad627d62SPekka Enberg 	struct qcow_header *header;
1318ad627d62SPekka Enberg 
1319ad627d62SPekka Enberg 	header = malloc(sizeof(struct qcow_header));
1320ad627d62SPekka Enberg 	if (!header)
1321ad627d62SPekka Enberg 		return NULL;
1322ad627d62SPekka Enberg 
1323d39cefd2SSasha Levin 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) {
1324d39cefd2SSasha Levin 		free(header);
1325ad627d62SPekka Enberg 		return NULL;
1326d39cefd2SSasha Levin 	}
1327ad627d62SPekka Enberg 
1328ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1329ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
1330ad627d62SPekka Enberg 	be64_to_cpus(&f_header.backing_file_offset);
1331ad627d62SPekka Enberg 	be32_to_cpus(&f_header.backing_file_size);
1332ad627d62SPekka Enberg 	be32_to_cpus(&f_header.mtime);
1333ad627d62SPekka Enberg 	be64_to_cpus(&f_header.size);
1334ad627d62SPekka Enberg 	be32_to_cpus(&f_header.crypt_method);
1335ad627d62SPekka Enberg 	be64_to_cpus(&f_header.l1_table_offset);
1336ad627d62SPekka Enberg 
1337ad627d62SPekka Enberg 	*header		= (struct qcow_header) {
1338ad627d62SPekka Enberg 		.size			= f_header.size,
1339ad627d62SPekka Enberg 		.l1_table_offset	= f_header.l1_table_offset,
1340ad627d62SPekka Enberg 		.l1_size		= f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)),
1341ad627d62SPekka Enberg 		.cluster_bits		= f_header.cluster_bits,
1342ad627d62SPekka Enberg 		.l2_bits		= f_header.l2_bits,
1343ad627d62SPekka Enberg 	};
134486835cedSPrasad Joshi 
134586835cedSPrasad Joshi 	return header;
134686835cedSPrasad Joshi }
134786835cedSPrasad Joshi 
1348f10860caSPekka Enberg static struct disk_image *qcow1_probe(int fd, bool readonly)
134986835cedSPrasad Joshi {
135086835cedSPrasad Joshi 	struct disk_image *disk_image;
13517b4eb530SPekka Enberg 	struct qcow_l1_table *l1t;
13527b4eb530SPekka Enberg 	struct qcow_header *h;
13537b4eb530SPekka Enberg 	struct qcow *q;
135486835cedSPrasad Joshi 
135586835cedSPrasad Joshi 	q = calloc(1, sizeof(struct qcow));
135686835cedSPrasad Joshi 	if (!q)
1357af68c51aSLan Tianyu 		return NULL;
135886835cedSPrasad Joshi 
1359c0799eb9SPekka Enberg 	mutex_init(&q->mutex);
136086835cedSPrasad Joshi 	q->fd = fd;
13617b4eb530SPekka Enberg 
13627b4eb530SPekka Enberg 	l1t = &q->table;
13637b4eb530SPekka Enberg 
13647b4eb530SPekka Enberg 	l1t->root = RB_ROOT;
13657b4eb530SPekka Enberg 	INIT_LIST_HEAD(&l1t->lru_list);
136686835cedSPrasad Joshi 
136786835cedSPrasad Joshi 	h = q->header = qcow1_read_header(fd);
136886835cedSPrasad Joshi 	if (!h)
1369af68c51aSLan Tianyu 		goto free_qcow;
1370af68c51aSLan Tianyu 
1371af68c51aSLan Tianyu 	q->version = QCOW1_VERSION;
1372af68c51aSLan Tianyu 	q->cluster_size = 1 << q->header->cluster_bits;
1373af68c51aSLan Tianyu 	q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
1374*e184700aSLan Tianyu 	q->free_clust_idx = 0;
1375af68c51aSLan Tianyu 
1376af68c51aSLan Tianyu 	q->cluster_data = malloc(q->cluster_size);
1377af68c51aSLan Tianyu 	if (!q->cluster_data) {
1378*e184700aSLan Tianyu 		pr_warning("cluster data malloc error");
1379af68c51aSLan Tianyu 		goto free_header;
1380af68c51aSLan Tianyu 	}
1381af68c51aSLan Tianyu 
1382af68c51aSLan Tianyu 	q->cluster_cache = malloc(q->cluster_size);
1383af68c51aSLan Tianyu 	if (!q->cluster_cache) {
1384*e184700aSLan Tianyu 		pr_warning("cluster cache malloc error");
1385af68c51aSLan Tianyu 		goto free_cluster_data;
1386af68c51aSLan Tianyu 	}
138786835cedSPrasad Joshi 
138886835cedSPrasad Joshi 	if (qcow_read_l1_table(q) < 0)
1389af68c51aSLan Tianyu 		goto free_cluster_cache;
139086835cedSPrasad Joshi 
13917d22135fSAsias He 	/*
13927d22135fSAsias He 	 * Do not use mmap use read/write instead
13937d22135fSAsias He 	 */
1394f10860caSPekka Enberg 	if (readonly)
139538c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1396f10860caSPekka Enberg 	else
139738c396e4SSasha Levin 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1398f10860caSPekka Enberg 
139986835cedSPrasad Joshi 	if (!disk_image)
1400af68c51aSLan Tianyu 		goto free_l1_table;
1401f41a132bSSasha Levin 
1402f41a132bSSasha Levin 	disk_image->async = 1;
140386835cedSPrasad Joshi 	disk_image->priv = q;
140486835cedSPrasad Joshi 
140586835cedSPrasad Joshi 	return disk_image;
140686835cedSPrasad Joshi 
1407af68c51aSLan Tianyu free_l1_table:
1408af68c51aSLan Tianyu 	if (q->table.l1_table)
14096c6f79b6SPrasad Joshi 		free(q->table.l1_table);
1410af68c51aSLan Tianyu free_cluster_cache:
1411af68c51aSLan Tianyu 	if (q->cluster_cache)
1412af68c51aSLan Tianyu 		free(q->cluster_cache);
1413af68c51aSLan Tianyu free_cluster_data:
1414af68c51aSLan Tianyu 	if (q->cluster_data)
1415af68c51aSLan Tianyu 		free(q->cluster_data);
1416af68c51aSLan Tianyu free_header:
1417af68c51aSLan Tianyu 	if (q->header)
141886835cedSPrasad Joshi 		free(q->header);
1419af68c51aSLan Tianyu free_qcow:
1420af68c51aSLan Tianyu 	if (q)
142186835cedSPrasad Joshi 		free(q);
142286835cedSPrasad Joshi 
142386835cedSPrasad Joshi 	return NULL;
142486835cedSPrasad Joshi }
142586835cedSPrasad Joshi 
1426ad627d62SPekka Enberg static bool qcow1_check_image(int fd)
142786835cedSPrasad Joshi {
1428ad627d62SPekka Enberg 	struct qcow1_header_disk f_header;
142986835cedSPrasad Joshi 
1430ad627d62SPekka Enberg 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0)
1431ad627d62SPekka Enberg 		return false;
143286835cedSPrasad Joshi 
1433ad627d62SPekka Enberg 	be32_to_cpus(&f_header.magic);
1434ad627d62SPekka Enberg 	be32_to_cpus(&f_header.version);
143586835cedSPrasad Joshi 
1436ad627d62SPekka Enberg 	if (f_header.magic != QCOW_MAGIC)
1437ad627d62SPekka Enberg 		return false;
143886835cedSPrasad Joshi 
1439ad627d62SPekka Enberg 	if (f_header.version != QCOW1_VERSION)
1440ad627d62SPekka Enberg 		return false;
144186835cedSPrasad Joshi 
1442ad627d62SPekka Enberg 	return true;
144386835cedSPrasad Joshi }
144486835cedSPrasad Joshi 
1445f10860caSPekka Enberg struct disk_image *qcow_probe(int fd, bool readonly)
144686835cedSPrasad Joshi {
1447ad627d62SPekka Enberg 	if (qcow1_check_image(fd))
1448f10860caSPekka Enberg 		return qcow1_probe(fd, readonly);
1449ad627d62SPekka Enberg 
1450ad627d62SPekka Enberg 	if (qcow2_check_image(fd))
1451f10860caSPekka Enberg 		return qcow2_probe(fd, readonly);
1452ad627d62SPekka Enberg 
1453ad627d62SPekka Enberg 	return NULL;
145486835cedSPrasad Joshi }
1455