xref: /kvmtool/disk/qcow.c (revision d62e8ee002eb1ef50d0ca9dfa4f0c04782e53879)
1 #include "kvm/qcow.h"
2 
3 #include "kvm/disk-image.h"
4 #include "kvm/read-write.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
7 
8 #include <sys/types.h>
9 #include <sys/stat.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <fcntl.h>
15 #include <errno.h>
16 #ifdef CONFIG_HAS_ZLIB
17 #include <zlib.h>
18 #endif
19 
20 #include <linux/err.h>
21 #include <linux/byteorder.h>
22 #include <linux/kernel.h>
23 #include <linux/types.h>
24 
25 static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append);
26 static int qcow_write_refcount_table(struct qcow *q);
27 static u64 qcow_alloc_clusters(struct qcow *q, u64 size, int update_ref);
28 static void  qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size);
29 
qcow_pwrite_sync(int fd,void * buf,size_t count,off_t offset)30 static inline int qcow_pwrite_sync(int fd,
31 	void *buf, size_t count, off_t offset)
32 {
33 	if (pwrite_in_full(fd, buf, count, offset) < 0)
34 		return -1;
35 
36 	return fdatasync(fd);
37 }
38 
l2_table_insert(struct rb_root * root,struct qcow_l2_table * new)39 static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
40 {
41 	struct rb_node **link = &(root->rb_node), *parent = NULL;
42 	u64 offset = new->offset;
43 
44 	/* search the tree */
45 	while (*link) {
46 		struct qcow_l2_table *t;
47 
48 		t = rb_entry(*link, struct qcow_l2_table, node);
49 		if (!t)
50 			goto error;
51 
52 		parent = *link;
53 
54 		if (t->offset > offset)
55 			link = &(*link)->rb_left;
56 		else if (t->offset < offset)
57 			link = &(*link)->rb_right;
58 		else
59 			goto out;
60 	}
61 
62 	/* add new node */
63 	rb_link_node(&new->node, parent, link);
64 	rb_insert_color(&new->node, root);
65 out:
66 	return 0;
67 error:
68 	return -1;
69 }
70 
l2_table_lookup(struct rb_root * root,u64 offset)71 static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset)
72 {
73 	struct rb_node *link = root->rb_node;
74 
75 	while (link) {
76 		struct qcow_l2_table *t;
77 
78 		t = rb_entry(link, struct qcow_l2_table, node);
79 		if (!t)
80 			goto out;
81 
82 		if (t->offset > offset)
83 			link = link->rb_left;
84 		else if (t->offset < offset)
85 			link = link->rb_right;
86 		else
87 			return t;
88 	}
89 out:
90 	return NULL;
91 }
92 
l1_table_free_cache(struct qcow_l1_table * l1t)93 static void l1_table_free_cache(struct qcow_l1_table *l1t)
94 {
95 	struct rb_root *r = &l1t->root;
96 	struct list_head *pos, *n;
97 	struct qcow_l2_table *t;
98 
99 	list_for_each_safe(pos, n, &l1t->lru_list) {
100 		/* Remove cache table from the list and RB tree */
101 		list_del(pos);
102 		t = list_entry(pos, struct qcow_l2_table, list);
103 		rb_erase(&t->node, r);
104 
105 		/* Free the cached node */
106 		free(t);
107 	}
108 }
109 
qcow_l2_cache_write(struct qcow * q,struct qcow_l2_table * c)110 static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
111 {
112 	struct qcow_header *header = q->header;
113 	u64 size;
114 
115 	if (!c->dirty)
116 		return 0;
117 
118 	size = 1 << header->l2_bits;
119 
120 	if (qcow_pwrite_sync(q->fd, c->table,
121 		size * sizeof(u64), c->offset) < 0)
122 		return -1;
123 
124 	c->dirty = 0;
125 
126 	return 0;
127 }
128 
cache_table(struct qcow * q,struct qcow_l2_table * c)129 static int cache_table(struct qcow *q, struct qcow_l2_table *c)
130 {
131 	struct qcow_l1_table *l1t = &q->table;
132 	struct rb_root *r = &l1t->root;
133 	struct qcow_l2_table *lru;
134 
135 	if (l1t->nr_cached == MAX_CACHE_NODES) {
136 		/*
137 		 * The node at the head of the list is least recently used
138 		 * node. Remove it from the list and replaced with a new node.
139 		 */
140 		lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
141 
142 		/* Remove the node from the cache */
143 		rb_erase(&lru->node, r);
144 		list_del_init(&lru->list);
145 		l1t->nr_cached--;
146 
147 		/* Free the LRUed node */
148 		free(lru);
149 	}
150 
151 	/* Add new node in RB Tree: Helps in searching faster */
152 	if (l2_table_insert(r, c) < 0)
153 		goto error;
154 
155 	/* Add in LRU replacement list */
156 	list_add_tail(&c->list, &l1t->lru_list);
157 	l1t->nr_cached++;
158 
159 	return 0;
160 error:
161 	return -1;
162 }
163 
l2_table_search(struct qcow * q,u64 offset)164 static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset)
165 {
166 	struct qcow_l1_table *l1t = &q->table;
167 	struct qcow_l2_table *l2t;
168 
169 	l2t = l2_table_lookup(&l1t->root, offset);
170 	if (!l2t)
171 		return NULL;
172 
173 	/* Update the LRU state, by moving the searched node to list tail */
174 	list_move_tail(&l2t->list, &l1t->lru_list);
175 
176 	return l2t;
177 }
178 
179 /* Allocates a new node for caching L2 table */
new_cache_table(struct qcow * q,u64 offset)180 static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset)
181 {
182 	struct qcow_header *header = q->header;
183 	struct qcow_l2_table *c;
184 	u64 l2t_sz;
185 	u64 size;
186 
187 	l2t_sz = 1 << header->l2_bits;
188 	size   = sizeof(*c) + l2t_sz * sizeof(u64);
189 	c      = calloc(1, size);
190 	if (!c)
191 		goto out;
192 
193 	c->offset = offset;
194 	RB_CLEAR_NODE(&c->node);
195 	INIT_LIST_HEAD(&c->list);
196 out:
197 	return c;
198 }
199 
get_l1_index(struct qcow * q,u64 offset)200 static inline u64 get_l1_index(struct qcow *q, u64 offset)
201 {
202 	struct qcow_header *header = q->header;
203 
204 	return offset >> (header->l2_bits + header->cluster_bits);
205 }
206 
get_l2_index(struct qcow * q,u64 offset)207 static inline u64 get_l2_index(struct qcow *q, u64 offset)
208 {
209 	struct qcow_header *header = q->header;
210 
211 	return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1);
212 }
213 
get_cluster_offset(struct qcow * q,u64 offset)214 static inline u64 get_cluster_offset(struct qcow *q, u64 offset)
215 {
216 	struct qcow_header *header = q->header;
217 
218 	return offset & ((1 << header->cluster_bits)-1);
219 }
220 
qcow_read_l2_table(struct qcow * q,u64 offset)221 static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset)
222 {
223 	struct qcow_header *header = q->header;
224 	struct qcow_l2_table *l2t;
225 	u64 size;
226 
227 	size = 1 << header->l2_bits;
228 
229 	/* search an entry for offset in cache */
230 	l2t = l2_table_search(q, offset);
231 	if (l2t)
232 		return l2t;
233 
234 	/* allocate new node for caching l2 table */
235 	l2t = new_cache_table(q, offset);
236 	if (!l2t)
237 		goto error;
238 
239 	/* table not cached: read from the disk */
240 	if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0)
241 		goto error;
242 
243 	/* cache the table */
244 	if (cache_table(q, l2t) < 0)
245 		goto error;
246 
247 	return l2t;
248 error:
249 	free(l2t);
250 	return NULL;
251 }
252 
qcow_decompress_buffer(u8 * out_buf,int out_buf_size,const u8 * buf,int buf_size)253 static int qcow_decompress_buffer(u8 *out_buf, int out_buf_size,
254 	const u8 *buf, int buf_size)
255 {
256 #ifdef CONFIG_HAS_ZLIB
257 	z_stream strm1, *strm = &strm1;
258 	int ret, out_len;
259 
260 	memset(strm, 0, sizeof(*strm));
261 
262 	strm->next_in	= (u8 *)buf;
263 	strm->avail_in	= buf_size;
264 	strm->next_out	= out_buf;
265 	strm->avail_out	= out_buf_size;
266 
267 	ret = inflateInit2(strm, -12);
268 	if (ret != Z_OK)
269 		return -1;
270 
271 	ret = inflate(strm, Z_FINISH);
272 	out_len = strm->next_out - out_buf;
273 	if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
274 		out_len != out_buf_size) {
275 		inflateEnd(strm);
276 		return -1;
277 	}
278 
279 	inflateEnd(strm);
280 	return 0;
281 #else
282 	return -1;
283 #endif
284 }
285 
qcow1_read_cluster(struct qcow * q,u64 offset,void * dst,u32 dst_len)286 static ssize_t qcow1_read_cluster(struct qcow *q, u64 offset,
287 	void *dst, u32 dst_len)
288 {
289 	struct qcow_header *header = q->header;
290 	struct qcow_l1_table *l1t = &q->table;
291 	struct qcow_l2_table *l2t;
292 	u64 clust_offset;
293 	u64 clust_start;
294 	u64 l2t_offset;
295 	size_t length;
296 	u64 l2t_size;
297 	u64 l1_idx;
298 	u64 l2_idx;
299 	int coffset;
300 	int csize;
301 
302 	l1_idx = get_l1_index(q, offset);
303 	if (l1_idx >= l1t->table_size)
304 		return -1;
305 
306 	clust_offset = get_cluster_offset(q, offset);
307 	if (clust_offset >= q->cluster_size)
308 		return -1;
309 
310 	length = q->cluster_size - clust_offset;
311 	if (length > dst_len)
312 		length = dst_len;
313 
314 	mutex_lock(&q->mutex);
315 
316 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
317 	if (!l2t_offset)
318 		goto zero_cluster;
319 
320 	l2t_size = 1 << header->l2_bits;
321 
322 	/* read and cache level 2 table */
323 	l2t = qcow_read_l2_table(q, l2t_offset);
324 	if (!l2t)
325 		goto out_error;
326 
327 	l2_idx = get_l2_index(q, offset);
328 	if (l2_idx >= l2t_size)
329 		goto out_error;
330 
331 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
332 	if (clust_start & QCOW1_OFLAG_COMPRESSED) {
333 		coffset	= clust_start & q->cluster_offset_mask;
334 		csize	= clust_start >> (63 - q->header->cluster_bits);
335 		csize	&= (q->cluster_size - 1);
336 
337 		if (pread_in_full(q->fd, q->cluster_data, csize,
338 				  coffset) < 0)
339 			goto out_error;
340 
341 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
342 					q->cluster_data, csize) < 0)
343 			goto out_error;
344 
345 		memcpy(dst, q->cluster_cache + clust_offset, length);
346 		mutex_unlock(&q->mutex);
347 	} else {
348 		if (!clust_start)
349 			goto zero_cluster;
350 
351 		mutex_unlock(&q->mutex);
352 
353 		if (pread_in_full(q->fd, dst, length,
354 				  clust_start + clust_offset) < 0)
355 			return -1;
356 	}
357 
358 	return length;
359 
360 zero_cluster:
361 	mutex_unlock(&q->mutex);
362 	memset(dst, 0, length);
363 	return length;
364 
365 out_error:
366 	mutex_unlock(&q->mutex);
367 	length = -1;
368 	return -1;
369 }
370 
qcow2_read_cluster(struct qcow * q,u64 offset,void * dst,u32 dst_len)371 static ssize_t qcow2_read_cluster(struct qcow *q, u64 offset,
372 	void *dst, u32 dst_len)
373 {
374 	struct qcow_header *header = q->header;
375 	struct qcow_l1_table *l1t = &q->table;
376 	struct qcow_l2_table *l2t;
377 	u64 clust_offset;
378 	u64 clust_start;
379 	u64 l2t_offset;
380 	size_t length;
381 	u64 l2t_size;
382 	u64 l1_idx;
383 	u64 l2_idx;
384 	int coffset;
385 	int sector_offset;
386 	int nb_csectors;
387 	int csize;
388 
389 	l1_idx = get_l1_index(q, offset);
390 	if (l1_idx >= l1t->table_size)
391 		return -1;
392 
393 	clust_offset = get_cluster_offset(q, offset);
394 	if (clust_offset >= q->cluster_size)
395 		return -1;
396 
397 	length = q->cluster_size - clust_offset;
398 	if (length > dst_len)
399 		length = dst_len;
400 
401 	mutex_lock(&q->mutex);
402 
403 	l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
404 
405 	l2t_offset &= ~QCOW2_OFLAG_COPIED;
406 	if (!l2t_offset)
407 		goto zero_cluster;
408 
409 	l2t_size = 1 << header->l2_bits;
410 
411 	/* read and cache level 2 table */
412 	l2t = qcow_read_l2_table(q, l2t_offset);
413 	if (!l2t)
414 		goto out_error;
415 
416 	l2_idx = get_l2_index(q, offset);
417 	if (l2_idx >= l2t_size)
418 		goto out_error;
419 
420 	clust_start = be64_to_cpu(l2t->table[l2_idx]);
421 	if (clust_start & QCOW2_OFLAG_COMPRESSED) {
422 		coffset = clust_start & q->cluster_offset_mask;
423 		nb_csectors = ((clust_start >> q->csize_shift)
424 			& q->csize_mask) + 1;
425 		sector_offset = coffset & (SECTOR_SIZE - 1);
426 		csize = nb_csectors * SECTOR_SIZE - sector_offset;
427 
428 		if (pread_in_full(q->fd, q->cluster_data,
429 				  nb_csectors * SECTOR_SIZE,
430 				  coffset & ~(SECTOR_SIZE - 1)) < 0) {
431 			goto out_error;
432 		}
433 
434 		if (qcow_decompress_buffer(q->cluster_cache, q->cluster_size,
435 					q->cluster_data + sector_offset,
436 					csize) < 0) {
437 			goto out_error;
438 		}
439 
440 		memcpy(dst, q->cluster_cache + clust_offset, length);
441 		mutex_unlock(&q->mutex);
442 	} else {
443 		clust_start &= QCOW2_OFFSET_MASK;
444 		if (!clust_start)
445 			goto zero_cluster;
446 
447 		mutex_unlock(&q->mutex);
448 
449 		if (pread_in_full(q->fd, dst, length,
450 				  clust_start + clust_offset) < 0)
451 			return -1;
452 	}
453 
454 	return length;
455 
456 zero_cluster:
457 	mutex_unlock(&q->mutex);
458 	memset(dst, 0, length);
459 	return length;
460 
461 out_error:
462 	mutex_unlock(&q->mutex);
463 	length = -1;
464 	return -1;
465 }
466 
qcow_read_sector_single(struct disk_image * disk,u64 sector,void * dst,u32 dst_len)467 static ssize_t qcow_read_sector_single(struct disk_image *disk, u64 sector,
468 	void *dst, u32 dst_len)
469 {
470 	struct qcow *q = disk->priv;
471 	struct qcow_header *header = q->header;
472 	u32 nr_read;
473 	u64 offset;
474 	char *buf;
475 	u32 nr;
476 
477 	buf = dst;
478 	nr_read = 0;
479 
480 	while (nr_read < dst_len) {
481 		offset = sector << SECTOR_SHIFT;
482 		if (offset >= header->size)
483 			return -1;
484 
485 		if (q->version == QCOW1_VERSION)
486 			nr = qcow1_read_cluster(q, offset, buf,
487 				dst_len - nr_read);
488 		else
489 			nr = qcow2_read_cluster(q, offset, buf,
490 				dst_len - nr_read);
491 
492 		if (nr <= 0)
493 			return -1;
494 
495 		nr_read	+= nr;
496 		buf	+= nr;
497 		sector	+= (nr >> SECTOR_SHIFT);
498 	}
499 
500 	return dst_len;
501 }
502 
qcow_read_sector(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)503 static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector,
504 				const struct iovec *iov, int iovcount, void *param)
505 {
506 	ssize_t nr, total = 0;
507 
508 	while (iovcount--) {
509 		nr = qcow_read_sector_single(disk, sector, iov->iov_base, iov->iov_len);
510 		if (nr != (ssize_t)iov->iov_len) {
511 			pr_info("qcow_read_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
512 			return -1;
513 		}
514 
515 		sector += iov->iov_len >> SECTOR_SHIFT;
516 		total += nr;
517 		iov++;
518 	}
519 
520 	return total;
521 }
522 
refcount_table_free_cache(struct qcow_refcount_table * rft)523 static void refcount_table_free_cache(struct qcow_refcount_table *rft)
524 {
525 	struct rb_root *r = &rft->root;
526 	struct list_head *pos, *n;
527 	struct qcow_refcount_block *t;
528 
529 	list_for_each_safe(pos, n, &rft->lru_list) {
530 		list_del(pos);
531 		t = list_entry(pos, struct qcow_refcount_block, list);
532 		rb_erase(&t->node, r);
533 
534 		free(t);
535 	}
536 }
537 
refcount_block_insert(struct rb_root * root,struct qcow_refcount_block * new)538 static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new)
539 {
540 	struct rb_node **link = &(root->rb_node), *parent = NULL;
541 	u64 offset = new->offset;
542 
543 	/* search the tree */
544 	while (*link) {
545 		struct qcow_refcount_block *t;
546 
547 		t = rb_entry(*link, struct qcow_refcount_block, node);
548 		if (!t)
549 			goto error;
550 
551 		parent = *link;
552 
553 		if (t->offset > offset)
554 			link = &(*link)->rb_left;
555 		else if (t->offset < offset)
556 			link = &(*link)->rb_right;
557 		else
558 			goto out;
559 	}
560 
561 	/* add new node */
562 	rb_link_node(&new->node, parent, link);
563 	rb_insert_color(&new->node, root);
564 out:
565 	return 0;
566 error:
567 	return -1;
568 }
569 
write_refcount_block(struct qcow * q,struct qcow_refcount_block * rfb)570 static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb)
571 {
572 	if (!rfb->dirty)
573 		return 0;
574 
575 	if (qcow_pwrite_sync(q->fd, rfb->entries,
576 		rfb->size * sizeof(u16), rfb->offset) < 0)
577 		return -1;
578 
579 	rfb->dirty = 0;
580 
581 	return 0;
582 }
583 
cache_refcount_block(struct qcow * q,struct qcow_refcount_block * c)584 static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
585 {
586 	struct qcow_refcount_table *rft = &q->refcount_table;
587 	struct rb_root *r = &rft->root;
588 	struct qcow_refcount_block *lru;
589 
590 	if (rft->nr_cached == MAX_CACHE_NODES) {
591 		lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
592 
593 		rb_erase(&lru->node, r);
594 		list_del_init(&lru->list);
595 		rft->nr_cached--;
596 
597 		free(lru);
598 	}
599 
600 	if (refcount_block_insert(r, c) < 0)
601 		goto error;
602 
603 	list_add_tail(&c->list, &rft->lru_list);
604 	rft->nr_cached++;
605 
606 	return 0;
607 error:
608 	return -1;
609 }
610 
new_refcount_block(struct qcow * q,u64 rfb_offset)611 static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset)
612 {
613 	struct qcow_refcount_block *rfb;
614 
615 	rfb = malloc(sizeof *rfb + q->cluster_size);
616 	if (!rfb)
617 		return NULL;
618 
619 	rfb->offset = rfb_offset;
620 	rfb->size = q->cluster_size / sizeof(u16);
621 	RB_CLEAR_NODE(&rfb->node);
622 	INIT_LIST_HEAD(&rfb->list);
623 
624 	return rfb;
625 }
626 
refcount_block_lookup(struct rb_root * root,u64 offset)627 static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset)
628 {
629 	struct rb_node *link = root->rb_node;
630 
631 	while (link) {
632 		struct qcow_refcount_block *t;
633 
634 		t = rb_entry(link, struct qcow_refcount_block, node);
635 		if (!t)
636 			goto out;
637 
638 		if (t->offset > offset)
639 			link = link->rb_left;
640 		else if (t->offset < offset)
641 			link = link->rb_right;
642 		else
643 			return t;
644 	}
645 out:
646 	return NULL;
647 }
648 
refcount_block_search(struct qcow * q,u64 offset)649 static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset)
650 {
651 	struct qcow_refcount_table *rft = &q->refcount_table;
652 	struct qcow_refcount_block *rfb;
653 
654 	rfb = refcount_block_lookup(&rft->root, offset);
655 	if (!rfb)
656 		return NULL;
657 
658 	/* Update the LRU state, by moving the searched node to list tail */
659 	list_move_tail(&rfb->list, &rft->lru_list);
660 
661 	return rfb;
662 }
663 
qcow_grow_refcount_block(struct qcow * q,u64 clust_idx)664 static struct qcow_refcount_block *qcow_grow_refcount_block(struct qcow *q,
665 	u64 clust_idx)
666 {
667 	struct qcow_header *header = q->header;
668 	struct qcow_refcount_table *rft = &q->refcount_table;
669 	struct qcow_refcount_block *rfb;
670 	u64 new_block_offset;
671 	u64 rft_idx;
672 
673 	rft_idx = clust_idx >> (header->cluster_bits -
674 		QCOW_REFCOUNT_BLOCK_SHIFT);
675 
676 	if (rft_idx >= rft->rf_size) {
677 		pr_warning("Don't support grow refcount block table");
678 		return NULL;
679 	}
680 
681 	new_block_offset = qcow_alloc_clusters(q, q->cluster_size, 0);
682 	if (new_block_offset == (u64)-1)
683 		return NULL;
684 
685 	rfb = new_refcount_block(q, new_block_offset);
686 	if (!rfb)
687 		return NULL;
688 
689 	memset(rfb->entries, 0x00, q->cluster_size);
690 	rfb->dirty = 1;
691 
692 	/* write refcount block */
693 	if (write_refcount_block(q, rfb) < 0)
694 		goto free_rfb;
695 
696 	if (cache_refcount_block(q, rfb) < 0)
697 		goto free_rfb;
698 
699 	rft->rf_table[rft_idx] = cpu_to_be64(new_block_offset);
700 	if (update_cluster_refcount(q, new_block_offset >>
701 		    header->cluster_bits, 1) < 0)
702 		goto recover_rft;
703 
704 	if (qcow_write_refcount_table(q) < 0)
705 		goto recover_rft;
706 
707 	return rfb;
708 
709 recover_rft:
710 	rft->rf_table[rft_idx] = 0;
711 free_rfb:
712 	free(rfb);
713 	return NULL;
714 }
715 
qcow_read_refcount_block(struct qcow * q,u64 clust_idx)716 static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx)
717 {
718 	struct qcow_header *header = q->header;
719 	struct qcow_refcount_table *rft = &q->refcount_table;
720 	struct qcow_refcount_block *rfb;
721 	u64 rfb_offset;
722 	u64 rft_idx;
723 
724 	rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT);
725 	if (rft_idx >= rft->rf_size)
726 		return ERR_PTR(-ENOSPC);
727 
728 	rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
729 	if (!rfb_offset)
730 		return ERR_PTR(-ENOSPC);
731 
732 	rfb = refcount_block_search(q, rfb_offset);
733 	if (rfb)
734 		return rfb;
735 
736 	rfb = new_refcount_block(q, rfb_offset);
737 	if (!rfb)
738 		return NULL;
739 
740 	if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0)
741 		goto error_free_rfb;
742 
743 	if (cache_refcount_block(q, rfb) < 0)
744 		goto error_free_rfb;
745 
746 	return rfb;
747 
748 error_free_rfb:
749 	free(rfb);
750 
751 	return NULL;
752 }
753 
qcow_get_refcount(struct qcow * q,u64 clust_idx)754 static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
755 {
756 	struct qcow_refcount_block *rfb = NULL;
757 	struct qcow_header *header = q->header;
758 	u64 rfb_idx;
759 
760 	rfb = qcow_read_refcount_block(q, clust_idx);
761 	if (PTR_ERR(rfb) == -ENOSPC)
762 		return 0;
763 	else if (IS_ERR_OR_NULL(rfb)) {
764 		pr_warning("Error while reading refcount table");
765 		return -1;
766 	}
767 
768 	rfb_idx = clust_idx & (((1ULL <<
769 		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
770 
771 	if (rfb_idx >= rfb->size) {
772 		pr_warning("L1: refcount block index out of bounds");
773 		return -1;
774 	}
775 
776 	return be16_to_cpu(rfb->entries[rfb_idx]);
777 }
778 
update_cluster_refcount(struct qcow * q,u64 clust_idx,u16 append)779 static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
780 {
781 	struct qcow_refcount_block *rfb = NULL;
782 	struct qcow_header *header = q->header;
783 	u16 refcount;
784 	u64 rfb_idx;
785 
786 	rfb = qcow_read_refcount_block(q, clust_idx);
787 	if (PTR_ERR(rfb) == -ENOSPC) {
788 		rfb = qcow_grow_refcount_block(q, clust_idx);
789 		if (!rfb) {
790 			pr_warning("error while growing refcount table");
791 			return -1;
792 		}
793 	} else if (IS_ERR_OR_NULL(rfb)) {
794 		pr_warning("error while reading refcount table");
795 		return -1;
796 	}
797 
798 	rfb_idx = clust_idx & (((1ULL <<
799 		(header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
800 	if (rfb_idx >= rfb->size) {
801 		pr_warning("refcount block index out of bounds");
802 		return -1;
803 	}
804 
805 	refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
806 	rfb->entries[rfb_idx] = cpu_to_be16(refcount);
807 	rfb->dirty = 1;
808 
809 	/* write refcount block */
810 	if (write_refcount_block(q, rfb) < 0) {
811 		pr_warning("refcount block index out of bounds");
812 		return -1;
813 	}
814 
815 	/* update free_clust_idx since refcount becomes zero */
816 	if (!refcount && clust_idx < q->free_clust_idx)
817 		q->free_clust_idx = clust_idx;
818 
819 	return 0;
820 }
821 
qcow_free_clusters(struct qcow * q,u64 clust_start,u64 size)822 static void  qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size)
823 {
824 	struct qcow_header *header = q->header;
825 	u64 start, end, offset;
826 
827 	start = clust_start & ~(q->cluster_size - 1);
828 	end = (clust_start + size - 1) & ~(q->cluster_size - 1);
829 	for (offset = start; offset <= end; offset += q->cluster_size)
830 		update_cluster_refcount(q, offset >> header->cluster_bits, -1);
831 }
832 
833 /*
834  * Allocate clusters according to the size. Find a postion that
835  * can satisfy the size. free_clust_idx is initialized to zero and
836  * Record last position.
837  */
qcow_alloc_clusters(struct qcow * q,u64 size,int update_ref)838 static u64 qcow_alloc_clusters(struct qcow *q, u64 size, int update_ref)
839 {
840 	struct qcow_header *header = q->header;
841 	u16 clust_refcount;
842 	u32 clust_idx = 0, i;
843 	u64 clust_num;
844 
845 	clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
846 
847 again:
848 	for (i = 0; i < clust_num; i++) {
849 		clust_idx = q->free_clust_idx++;
850 		clust_refcount = qcow_get_refcount(q, clust_idx);
851 		if (clust_refcount == (u16)-1)
852 			return -1;
853 		else if (clust_refcount > 0)
854 			goto again;
855 	}
856 
857 	clust_idx++;
858 
859 	if (update_ref)
860 		for (i = 0; i < clust_num; i++)
861 			if (update_cluster_refcount(q,
862 				clust_idx - clust_num + i, 1))
863 				return -1;
864 
865 	return (clust_idx - clust_num) << header->cluster_bits;
866 }
867 
qcow_write_l1_table(struct qcow * q)868 static int qcow_write_l1_table(struct qcow *q)
869 {
870 	struct qcow_l1_table *l1t = &q->table;
871 	struct qcow_header *header = q->header;
872 
873 	if (qcow_pwrite_sync(q->fd, l1t->l1_table,
874 		l1t->table_size * sizeof(u64),
875 		header->l1_table_offset) < 0)
876 		return -1;
877 
878 	return 0;
879 }
880 
881 /*
882  * Get l2 table. If the table has been copied, read table directly.
883  * If the table exists, allocate a new cluster and copy the table
884  * to the new cluster.
885  */
get_cluster_table(struct qcow * q,u64 offset,struct qcow_l2_table ** result_l2t,u64 * result_l2_index)886 static int get_cluster_table(struct qcow *q, u64 offset,
887 	struct qcow_l2_table **result_l2t, u64 *result_l2_index)
888 {
889 	struct qcow_header *header = q->header;
890 	struct qcow_l1_table *l1t = &q->table;
891 	struct qcow_l2_table *l2t;
892 	u64 l1t_idx;
893 	u64 l2t_offset;
894 	u64 l2t_idx;
895 	u64 l2t_size;
896 	u64 l2t_new_offset;
897 
898 	l2t_size = 1 << header->l2_bits;
899 
900 	l1t_idx = get_l1_index(q, offset);
901 	if (l1t_idx >= l1t->table_size)
902 		return -1;
903 
904 	l2t_idx = get_l2_index(q, offset);
905 	if (l2t_idx >= l2t_size)
906 		return -1;
907 
908 	l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
909 	if (l2t_offset & QCOW2_OFLAG_COPIED) {
910 		l2t_offset &= ~QCOW2_OFLAG_COPIED;
911 		l2t = qcow_read_l2_table(q, l2t_offset);
912 		if (!l2t)
913 			goto error;
914 	} else {
915 		l2t_new_offset = qcow_alloc_clusters(q,
916 			l2t_size*sizeof(u64), 1);
917 
918 		if (l2t_new_offset != (u64)-1)
919 			goto error;
920 
921 		l2t = new_cache_table(q, l2t_new_offset);
922 		if (!l2t)
923 			goto free_cluster;
924 
925 		if (l2t_offset) {
926 			l2t = qcow_read_l2_table(q, l2t_offset);
927 			if (!l2t)
928 				goto free_cache;
929 		} else
930 			memset(l2t->table, 0x00, l2t_size * sizeof(u64));
931 
932 		/* write l2 table */
933 		l2t->dirty = 1;
934 		if (qcow_l2_cache_write(q, l2t) < 0)
935 			goto free_cache;
936 
937 		/* cache l2 table */
938 		if (cache_table(q, l2t))
939 			goto free_cache;
940 
941 		/* update the l1 talble */
942 		l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
943 			| QCOW2_OFLAG_COPIED);
944 		if (qcow_write_l1_table(q)) {
945 			pr_warning("Update l1 table error");
946 			goto free_cache;
947 		}
948 
949 		/* free old cluster */
950 		qcow_free_clusters(q, l2t_offset, q->cluster_size);
951 	}
952 
953 	*result_l2t = l2t;
954 	*result_l2_index = l2t_idx;
955 
956 	return 0;
957 
958 free_cache:
959 	free(l2t);
960 
961 free_cluster:
962 	qcow_free_clusters(q, l2t_new_offset, q->cluster_size);
963 
964 error:
965 	return -1;
966 }
967 
968 /*
969  * If the cluster has been copied, write data directly. If not,
970  * read the original data and write it to the new cluster with
971  * modification.
972  */
qcow_write_cluster(struct qcow * q,u64 offset,void * buf,u32 src_len)973 static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
974 		void *buf, u32 src_len)
975 {
976 	struct qcow_l2_table *l2t;
977 	u64 clust_new_start;
978 	u64 clust_start;
979 	u64 clust_flags;
980 	u64 clust_off;
981 	u64 l2t_idx;
982 	u64 len;
983 
984 	l2t = NULL;
985 
986 	clust_off = get_cluster_offset(q, offset);
987 	if (clust_off >= q->cluster_size)
988 		return -1;
989 
990 	len = q->cluster_size - clust_off;
991 	if (len > src_len)
992 		len = src_len;
993 
994 	mutex_lock(&q->mutex);
995 
996 	if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
997 		pr_warning("Get l2 table error");
998 		goto error;
999 	}
1000 
1001 	clust_start = be64_to_cpu(l2t->table[l2t_idx]);
1002 	clust_flags = clust_start & QCOW2_OFLAGS_MASK;
1003 
1004 	clust_start &= QCOW2_OFFSET_MASK;
1005 	if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
1006 		clust_new_start	= qcow_alloc_clusters(q, q->cluster_size, 1);
1007 		if (clust_new_start != (u64)-1) {
1008 			pr_warning("Cluster alloc error");
1009 			goto error;
1010 		}
1011 
1012 		offset &= ~(q->cluster_size - 1);
1013 
1014 		/* if clust_start is not zero, read the original data*/
1015 		if (clust_start) {
1016 			mutex_unlock(&q->mutex);
1017 			if (qcow2_read_cluster(q, offset, q->copy_buff,
1018 				q->cluster_size) < 0) {
1019 				pr_warning("Read copy cluster error");
1020 				qcow_free_clusters(q, clust_new_start,
1021 					q->cluster_size);
1022 				return -1;
1023 			}
1024 			mutex_lock(&q->mutex);
1025 		} else
1026 			memset(q->copy_buff, 0x00, q->cluster_size);
1027 
1028 		memcpy(q->copy_buff + clust_off, buf, len);
1029 
1030 		 /* Write actual data */
1031 		if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
1032 			clust_new_start) < 0)
1033 			goto free_cluster;
1034 
1035 		/* update l2 table*/
1036 		l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
1037 			| QCOW2_OFLAG_COPIED);
1038 		l2t->dirty = 1;
1039 
1040 		if (qcow_l2_cache_write(q, l2t))
1041 			goto free_cluster;
1042 
1043 		/* free old cluster*/
1044 		if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
1045 			int size;
1046 			size = ((clust_start >> q->csize_shift) &
1047 				q->csize_mask) + 1;
1048 			size *= 512;
1049 			clust_start &= q->cluster_offset_mask;
1050 			clust_start &= ~511;
1051 
1052 			qcow_free_clusters(q, clust_start, size);
1053 		} else if (clust_start)
1054 			qcow_free_clusters(q, clust_start, q->cluster_size);
1055 
1056 	} else {
1057 		/* Write actual data */
1058 		if (pwrite_in_full(q->fd, buf, len,
1059 			clust_start + clust_off) < 0)
1060 			goto error;
1061 	}
1062 	mutex_unlock(&q->mutex);
1063 	return len;
1064 
1065 free_cluster:
1066 	qcow_free_clusters(q, clust_new_start, q->cluster_size);
1067 
1068 error:
1069 	mutex_unlock(&q->mutex);
1070 	return -1;
1071 }
1072 
qcow_write_sector_single(struct disk_image * disk,u64 sector,void * src,u32 src_len)1073 static ssize_t qcow_write_sector_single(struct disk_image *disk, u64 sector, void *src, u32 src_len)
1074 {
1075 	struct qcow *q = disk->priv;
1076 	struct qcow_header *header = q->header;
1077 	u32 nr_written;
1078 	char *buf;
1079 	u64 offset;
1080 	ssize_t nr;
1081 
1082 	buf		= src;
1083 	nr_written	= 0;
1084 	offset		= sector << SECTOR_SHIFT;
1085 
1086 	while (nr_written < src_len) {
1087 		if (offset >= header->size)
1088 			return -1;
1089 
1090 		nr = qcow_write_cluster(q, offset, buf, src_len - nr_written);
1091 		if (nr < 0)
1092 			return -1;
1093 
1094 		nr_written	+= nr;
1095 		buf		+= nr;
1096 		offset		+= nr;
1097 	}
1098 
1099 	return nr_written;
1100 }
1101 
qcow_write_sector(struct disk_image * disk,u64 sector,const struct iovec * iov,int iovcount,void * param)1102 static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector,
1103 				const struct iovec *iov, int iovcount, void *param)
1104 {
1105 	ssize_t nr, total = 0;
1106 
1107 	while (iovcount--) {
1108 		nr = qcow_write_sector_single(disk, sector, iov->iov_base, iov->iov_len);
1109 		if (nr != (ssize_t)iov->iov_len) {
1110 			pr_info("qcow_write_sector error: nr=%ld iov_len=%ld\n", (long)nr, (long)iov->iov_len);
1111 			return -1;
1112 		}
1113 
1114 		sector	+= iov->iov_len >> SECTOR_SHIFT;
1115 		iov++;
1116 		total	+= nr;
1117 	}
1118 
1119 	return total;
1120 }
1121 
qcow_disk_flush(struct disk_image * disk)1122 static int qcow_disk_flush(struct disk_image *disk)
1123 {
1124 	struct qcow *q = disk->priv;
1125 	struct qcow_refcount_table *rft;
1126 	struct list_head *pos, *n;
1127 	struct qcow_l1_table *l1t;
1128 
1129 	l1t = &q->table;
1130 	rft = &q->refcount_table;
1131 
1132 	mutex_lock(&q->mutex);
1133 
1134 	list_for_each_safe(pos, n, &rft->lru_list) {
1135 		struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list);
1136 
1137 		if (write_refcount_block(q, c) < 0)
1138 			goto error_unlock;
1139 	}
1140 
1141 	list_for_each_safe(pos, n, &l1t->lru_list) {
1142 		struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
1143 
1144 		if (qcow_l2_cache_write(q, c) < 0)
1145 			goto error_unlock;
1146 	}
1147 
1148 	if (qcow_write_l1_table < 0)
1149 		goto error_unlock;
1150 
1151 	mutex_unlock(&q->mutex);
1152 
1153 	return fsync(disk->fd);
1154 
1155 error_unlock:
1156 	mutex_unlock(&q->mutex);
1157 	return -1;
1158 }
1159 
qcow_disk_close(struct disk_image * disk)1160 static int qcow_disk_close(struct disk_image *disk)
1161 {
1162 	struct qcow *q;
1163 
1164 	if (!disk)
1165 		return 0;
1166 
1167 	q = disk->priv;
1168 
1169 	refcount_table_free_cache(&q->refcount_table);
1170 	l1_table_free_cache(&q->table);
1171 	free(q->copy_buff);
1172 	free(q->cluster_data);
1173 	free(q->cluster_cache);
1174 	free(q->refcount_table.rf_table);
1175 	free(q->table.l1_table);
1176 	free(q->header);
1177 	free(q);
1178 
1179 	return 0;
1180 }
1181 
1182 static struct disk_image_operations qcow_disk_readonly_ops = {
1183 	.read	= qcow_read_sector,
1184 	.close	= qcow_disk_close,
1185 };
1186 
1187 static struct disk_image_operations qcow_disk_ops = {
1188 	.read	= qcow_read_sector,
1189 	.write	= qcow_write_sector,
1190 	.flush	= qcow_disk_flush,
1191 	.close	= qcow_disk_close,
1192 };
1193 
qcow_read_refcount_table(struct qcow * q)1194 static int qcow_read_refcount_table(struct qcow *q)
1195 {
1196 	struct qcow_header *header = q->header;
1197 	struct qcow_refcount_table *rft = &q->refcount_table;
1198 
1199 	rft->rf_size = (header->refcount_table_size * q->cluster_size)
1200 		/ sizeof(u64);
1201 
1202 	rft->rf_table = calloc(rft->rf_size, sizeof(u64));
1203 	if (!rft->rf_table)
1204 		return -1;
1205 
1206 	rft->root = (struct rb_root) RB_ROOT;
1207 	INIT_LIST_HEAD(&rft->lru_list);
1208 
1209 	return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset);
1210 }
1211 
qcow_write_refcount_table(struct qcow * q)1212 static int qcow_write_refcount_table(struct qcow *q)
1213 {
1214 	struct qcow_header *header = q->header;
1215 	struct qcow_refcount_table *rft = &q->refcount_table;
1216 
1217 	return qcow_pwrite_sync(q->fd, rft->rf_table,
1218 		rft->rf_size * sizeof(u64), header->refcount_table_offset);
1219 }
1220 
qcow_read_l1_table(struct qcow * q)1221 static int qcow_read_l1_table(struct qcow *q)
1222 {
1223 	struct qcow_header *header = q->header;
1224 	struct qcow_l1_table *table = &q->table;
1225 
1226 	table->table_size = header->l1_size;
1227 
1228 	table->l1_table	= calloc(table->table_size, sizeof(u64));
1229 	if (!table->l1_table)
1230 		return -1;
1231 
1232 	return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset);
1233 }
1234 
qcow2_read_header(int fd)1235 static void *qcow2_read_header(int fd)
1236 {
1237 	struct qcow2_header_disk f_header;
1238 	struct qcow_header *header;
1239 
1240 	header = malloc(sizeof(struct qcow_header));
1241 	if (!header)
1242 		return NULL;
1243 
1244 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) {
1245 		free(header);
1246 		return NULL;
1247 	}
1248 
1249 	be32_to_cpus(&f_header.magic);
1250 	be32_to_cpus(&f_header.version);
1251 	be64_to_cpus(&f_header.backing_file_offset);
1252 	be32_to_cpus(&f_header.backing_file_size);
1253 	be32_to_cpus(&f_header.cluster_bits);
1254 	be64_to_cpus(&f_header.size);
1255 	be32_to_cpus(&f_header.crypt_method);
1256 	be32_to_cpus(&f_header.l1_size);
1257 	be64_to_cpus(&f_header.l1_table_offset);
1258 	be64_to_cpus(&f_header.refcount_table_offset);
1259 	be32_to_cpus(&f_header.refcount_table_clusters);
1260 	be32_to_cpus(&f_header.nb_snapshots);
1261 	be64_to_cpus(&f_header.snapshots_offset);
1262 
1263 	*header		= (struct qcow_header) {
1264 		.size			= f_header.size,
1265 		.l1_table_offset	= f_header.l1_table_offset,
1266 		.l1_size		= f_header.l1_size,
1267 		.cluster_bits		= f_header.cluster_bits,
1268 		.l2_bits		= f_header.cluster_bits - 3,
1269 		.refcount_table_offset	= f_header.refcount_table_offset,
1270 		.refcount_table_size	= f_header.refcount_table_clusters,
1271 	};
1272 
1273 	return header;
1274 }
1275 
qcow2_probe(int fd,bool readonly)1276 static struct disk_image *qcow2_probe(int fd, bool readonly)
1277 {
1278 	struct disk_image *disk_image;
1279 	struct qcow_l1_table *l1t;
1280 	struct qcow_header *h;
1281 	struct qcow *q;
1282 
1283 	q = calloc(1, sizeof(struct qcow));
1284 	if (!q)
1285 		return NULL;
1286 
1287 	mutex_init(&q->mutex);
1288 	q->fd = fd;
1289 
1290 	l1t = &q->table;
1291 
1292 	l1t->root = (struct rb_root) RB_ROOT;
1293 	INIT_LIST_HEAD(&l1t->lru_list);
1294 
1295 	h = q->header = qcow2_read_header(fd);
1296 	if (!h)
1297 		goto free_qcow;
1298 
1299 	q->version = QCOW2_VERSION;
1300 	q->csize_shift = (62 - (q->header->cluster_bits - 8));
1301 	q->csize_mask = (1 << (q->header->cluster_bits - 8)) - 1;
1302 	q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
1303 	q->cluster_size = 1 << q->header->cluster_bits;
1304 
1305 	q->copy_buff = malloc(q->cluster_size);
1306 	if (!q->copy_buff) {
1307 		pr_warning("copy buff malloc error");
1308 		goto free_header;
1309 	}
1310 
1311 	q->cluster_data = malloc(q->cluster_size);
1312 	if (!q->cluster_data) {
1313 		pr_warning("cluster data malloc error");
1314 		goto free_copy_buff;
1315 	}
1316 
1317 	q->cluster_cache = malloc(q->cluster_size);
1318 	if (!q->cluster_cache) {
1319 		pr_warning("cluster cache malloc error");
1320 		goto free_cluster_data;
1321 	}
1322 
1323 	if (qcow_read_l1_table(q) < 0)
1324 		goto free_cluster_cache;
1325 
1326 	if (qcow_read_refcount_table(q) < 0)
1327 		goto free_l1_table;
1328 
1329 	/*
1330 	 * Do not use mmap use read/write instead
1331 	 */
1332 	if (readonly)
1333 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1334 	else
1335 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1336 
1337 	if (IS_ERR_OR_NULL(disk_image))
1338 		goto free_refcount_table;
1339 
1340 	disk_image->priv = q;
1341 
1342 	return disk_image;
1343 
1344 free_refcount_table:
1345 	if (q->refcount_table.rf_table)
1346 		free(q->refcount_table.rf_table);
1347 free_l1_table:
1348 	if (q->table.l1_table)
1349 		free(q->table.l1_table);
1350 free_cluster_cache:
1351 	if (q->cluster_cache)
1352 		free(q->cluster_cache);
1353 free_cluster_data:
1354 	if (q->cluster_data)
1355 		free(q->cluster_data);
1356 free_copy_buff:
1357 	if (q->copy_buff)
1358 		free(q->copy_buff);
1359 free_header:
1360 	if (q->header)
1361 		free(q->header);
1362 free_qcow:
1363 	free(q);
1364 
1365 	return NULL;
1366 }
1367 
qcow2_check_image(int fd)1368 static bool qcow2_check_image(int fd)
1369 {
1370 	struct qcow2_header_disk f_header;
1371 
1372 	if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0)
1373 		return false;
1374 
1375 	be32_to_cpus(&f_header.magic);
1376 	be32_to_cpus(&f_header.version);
1377 
1378 	if (f_header.magic != QCOW_MAGIC)
1379 		return false;
1380 
1381 	if (f_header.version != QCOW2_VERSION)
1382 		return false;
1383 
1384 	return true;
1385 }
1386 
qcow1_read_header(int fd)1387 static void *qcow1_read_header(int fd)
1388 {
1389 	struct qcow1_header_disk f_header;
1390 	struct qcow_header *header;
1391 
1392 	header = malloc(sizeof(struct qcow_header));
1393 	if (!header)
1394 		return NULL;
1395 
1396 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) {
1397 		free(header);
1398 		return NULL;
1399 	}
1400 
1401 	be32_to_cpus(&f_header.magic);
1402 	be32_to_cpus(&f_header.version);
1403 	be64_to_cpus(&f_header.backing_file_offset);
1404 	be32_to_cpus(&f_header.backing_file_size);
1405 	be32_to_cpus(&f_header.mtime);
1406 	be64_to_cpus(&f_header.size);
1407 	be32_to_cpus(&f_header.crypt_method);
1408 	be64_to_cpus(&f_header.l1_table_offset);
1409 
1410 	*header		= (struct qcow_header) {
1411 		.size			= f_header.size,
1412 		.l1_table_offset	= f_header.l1_table_offset,
1413 		.l1_size		= f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)),
1414 		.cluster_bits		= f_header.cluster_bits,
1415 		.l2_bits		= f_header.l2_bits,
1416 	};
1417 
1418 	return header;
1419 }
1420 
qcow1_probe(int fd,bool readonly)1421 static struct disk_image *qcow1_probe(int fd, bool readonly)
1422 {
1423 	struct disk_image *disk_image;
1424 	struct qcow_l1_table *l1t;
1425 	struct qcow_header *h;
1426 	struct qcow *q;
1427 
1428 	q = calloc(1, sizeof(struct qcow));
1429 	if (!q)
1430 		return NULL;
1431 
1432 	mutex_init(&q->mutex);
1433 	q->fd = fd;
1434 
1435 	l1t = &q->table;
1436 
1437 	l1t->root = (struct rb_root)RB_ROOT;
1438 	INIT_LIST_HEAD(&l1t->lru_list);
1439 	INIT_LIST_HEAD(&q->refcount_table.lru_list);
1440 
1441 	h = q->header = qcow1_read_header(fd);
1442 	if (!h)
1443 		goto free_qcow;
1444 
1445 	q->version = QCOW1_VERSION;
1446 	q->cluster_size = 1 << q->header->cluster_bits;
1447 	q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
1448 	q->free_clust_idx = 0;
1449 
1450 	q->cluster_data = malloc(q->cluster_size);
1451 	if (!q->cluster_data) {
1452 		pr_warning("cluster data malloc error");
1453 		goto free_header;
1454 	}
1455 
1456 	q->cluster_cache = malloc(q->cluster_size);
1457 	if (!q->cluster_cache) {
1458 		pr_warning("cluster cache malloc error");
1459 		goto free_cluster_data;
1460 	}
1461 
1462 	if (qcow_read_l1_table(q) < 0)
1463 		goto free_cluster_cache;
1464 
1465 	/*
1466 	 * Do not use mmap use read/write instead
1467 	 */
1468 	if (readonly)
1469 		disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_REGULAR);
1470 	else
1471 		disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_REGULAR);
1472 
1473 	if (!disk_image)
1474 		goto free_l1_table;
1475 
1476 	disk_image->priv = q;
1477 
1478 	return disk_image;
1479 
1480 free_l1_table:
1481 	if (q->table.l1_table)
1482 		free(q->table.l1_table);
1483 free_cluster_cache:
1484 	if (q->cluster_cache)
1485 		free(q->cluster_cache);
1486 free_cluster_data:
1487 	if (q->cluster_data)
1488 		free(q->cluster_data);
1489 free_header:
1490 	if (q->header)
1491 		free(q->header);
1492 free_qcow:
1493 	free(q);
1494 
1495 	return NULL;
1496 }
1497 
qcow1_check_image(int fd)1498 static bool qcow1_check_image(int fd)
1499 {
1500 	struct qcow1_header_disk f_header;
1501 
1502 	if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0)
1503 		return false;
1504 
1505 	be32_to_cpus(&f_header.magic);
1506 	be32_to_cpus(&f_header.version);
1507 
1508 	if (f_header.magic != QCOW_MAGIC)
1509 		return false;
1510 
1511 	if (f_header.version != QCOW1_VERSION)
1512 		return false;
1513 
1514 	return true;
1515 }
1516 
qcow_probe(int fd,bool readonly)1517 struct disk_image *qcow_probe(int fd, bool readonly)
1518 {
1519 	if (qcow1_check_image(fd))
1520 		return qcow1_probe(fd, readonly);
1521 
1522 	if (qcow2_check_image(fd))
1523 		return qcow2_probe(fd, readonly);
1524 
1525 	return NULL;
1526 }
1527