xref: /kvmtool/virtio/blk.c (revision 867b15ccd7dae9ba7a174f97d4fe76e90a79d957)
1416b2c2dSAsias He #include "kvm/virtio-blk.h"
2b30d05adSPekka Enberg 
331638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
45a24a9f2SPekka Enberg #include "kvm/disk-image.h"
54ef0f4d6SPekka Enberg #include "kvm/mutex.h"
6fe99fd4eSPekka Enberg #include "kvm/util.h"
78b1ff07eSPekka Enberg #include "kvm/kvm.h"
8b30d05adSPekka Enberg #include "kvm/pci.h"
9fb0957f2SSasha Levin #include "kvm/threadpool.h"
10ec75b82fSSasha Levin #include "kvm/ioeventfd.h"
11404d164bSSasha Levin #include "kvm/guest_compat.h"
12427948d5SSasha Levin #include "kvm/virtio-pci.h"
13f41a132bSSasha Levin #include "kvm/virtio.h"
14b30d05adSPekka Enberg 
1520c64ecaSPekka Enberg #include <linux/virtio_ring.h>
1620c64ecaSPekka Enberg #include <linux/virtio_blk.h>
17427948d5SSasha Levin #include <linux/kernel.h>
18ebe9ac19SSasha Levin #include <linux/list.h>
193fdf659dSSasha Levin #include <linux/types.h>
200528c2a7SPekka Enberg #include <pthread.h>
214155ba8cSPekka Enberg 
224749e795SSasha Levin #define VIRTIO_BLK_MAX_DEV		4
2310eca11dSPekka Enberg 
243d7831a1SAsias He /*
253d7831a1SAsias He  * the header and status consume too entries
263d7831a1SAsias He  */
273d7831a1SAsias He #define DISK_SEG_MAX			(VIRTIO_BLK_QUEUE_SIZE - 2)
284059ad8bSAsias He #define VIRTIO_BLK_QUEUE_SIZE		256
29f41a132bSSasha Levin #define NUM_VIRT_QUEUES			1
3010eca11dSPekka Enberg 
318b52f877SSasha Levin struct blk_dev_req {
324749e795SSasha Levin 	struct virt_queue		*vq;
33fe2a70d1SSasha Levin 	struct blk_dev			*bdev;
3469971b13SSasha Levin 	struct iovec			iov[VIRTIO_BLK_QUEUE_SIZE];
3569971b13SSasha Levin 	u16				out, in, head;
368b52f877SSasha Levin 	struct kvm			*kvm;
374749e795SSasha Levin };
384749e795SSasha Levin 
39fe2a70d1SSasha Levin struct blk_dev {
40d3476f7dSSasha Levin 	struct mutex			mutex;
418b52f877SSasha Levin 
42ebe9ac19SSasha Levin 	struct list_head		list;
430528c2a7SPekka Enberg 
4402eca50cSAsias He 	struct virtio_device		vdev;
4540ce993fSPekka Enberg 	struct virtio_blk_config	blk_config;
46*867b15ccSJean-Philippe Brucker 	u64				capacity;
4738605e1cSSasha Levin 	struct disk_image		*disk;
48427948d5SSasha Levin 	u32				features;
4910eca11dSPekka Enberg 
5045e47970SAsias He 	struct virt_queue		vqs[NUM_VIRT_QUEUES];
518b52f877SSasha Levin 	struct blk_dev_req		reqs[VIRTIO_BLK_QUEUE_SIZE];
525ac1178bSAsias He 
535ac1178bSAsias He 	pthread_t			io_thread;
545ac1178bSAsias He 	int				io_efd;
555ac1178bSAsias He 
565ac1178bSAsias He 	struct kvm			*kvm;
57fbc2fbf9SPekka Enberg };
58fbc2fbf9SPekka Enberg 
59ebe9ac19SSasha Levin static LIST_HEAD(bdevs);
60bdbbcb63SAsias He static int compat_id = -1;
6140ce993fSPekka Enberg 
628b52f877SSasha Levin void virtio_blk_complete(void *param, long len)
638b52f877SSasha Levin {
648b52f877SSasha Levin 	struct blk_dev_req *req = param;
658b52f877SSasha Levin 	struct blk_dev *bdev = req->bdev;
668b52f877SSasha Levin 	int queueid = req->vq - bdev->vqs;
673fdf659dSSasha Levin 	u8 *status;
688b52f877SSasha Levin 
698b52f877SSasha Levin 	/* status */
708b52f877SSasha Levin 	status	= req->iov[req->out + req->in - 1].iov_base;
718b52f877SSasha Levin 	*status	= (len < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
728b52f877SSasha Levin 
738b52f877SSasha Levin 	mutex_lock(&bdev->mutex);
748b52f877SSasha Levin 	virt_queue__set_used_elem(req->vq, req->head, len);
758b52f877SSasha Levin 	mutex_unlock(&bdev->mutex);
768b52f877SSasha Levin 
777ab3d207SSasha Levin 	if (virtio_queue__should_signal(&bdev->vqs[queueid]))
7802eca50cSAsias He 		bdev->vdev.ops->signal_vq(req->kvm, &bdev->vdev, queueid);
798b52f877SSasha Levin }
808b52f877SSasha Levin 
8101dafc9eSMarc Zyngier static void virtio_blk_do_io_request(struct kvm *kvm, struct virt_queue *vq, struct blk_dev_req *req)
828b52f877SSasha Levin {
838b52f877SSasha Levin 	struct virtio_blk_outhdr *req_hdr;
8469971b13SSasha Levin 	ssize_t block_cnt;
8569971b13SSasha Levin 	struct blk_dev *bdev;
8669971b13SSasha Levin 	struct iovec *iov;
87f41a132bSSasha Levin 	u16 out, in;
8801dafc9eSMarc Zyngier 	u32 type;
8901dafc9eSMarc Zyngier 	u64 sector;
904155ba8cSPekka Enberg 
9169971b13SSasha Levin 	block_cnt	= -1;
928b52f877SSasha Levin 	bdev		= req->bdev;
938b52f877SSasha Levin 	iov		= req->iov;
948b52f877SSasha Levin 	out		= req->out;
958b52f877SSasha Levin 	in		= req->in;
968b52f877SSasha Levin 	req_hdr		= iov[0].iov_base;
9703110ff3SAsias He 
9801dafc9eSMarc Zyngier 	type = virtio_guest_to_host_u32(vq, req_hdr->type);
9901dafc9eSMarc Zyngier 	sector = virtio_guest_to_host_u64(vq, req_hdr->sector);
10001dafc9eSMarc Zyngier 
10101dafc9eSMarc Zyngier 	switch (type) {
10203110ff3SAsias He 	case VIRTIO_BLK_T_IN:
10301dafc9eSMarc Zyngier 		block_cnt = disk_image__read(bdev->disk, sector,
10434239c78SAsias He 				iov + 1, in + out - 2, req);
105258dd093SPekka Enberg 		break;
10603110ff3SAsias He 	case VIRTIO_BLK_T_OUT:
10701dafc9eSMarc Zyngier 		block_cnt = disk_image__write(bdev->disk, sector,
10834239c78SAsias He 				iov + 1, in + out - 2, req);
109258dd093SPekka Enberg 		break;
11029084a74SPrasad Joshi 	case VIRTIO_BLK_T_FLUSH:
11129084a74SPrasad Joshi 		block_cnt = disk_image__flush(bdev->disk);
112fb434ac3SSasha Levin 		virtio_blk_complete(req, block_cnt);
11329084a74SPrasad Joshi 		break;
114ff6462e8SSasha Levin 	case VIRTIO_BLK_T_GET_ID:
115ff6462e8SSasha Levin 		block_cnt = VIRTIO_BLK_ID_BYTES;
11634239c78SAsias He 		disk_image__get_serial(bdev->disk,
11734239c78SAsias He 				(iov + 1)->iov_base, &block_cnt);
118fb434ac3SSasha Levin 		virtio_blk_complete(req, block_cnt);
119ff6462e8SSasha Levin 		break;
120258dd093SPekka Enberg 	default:
12101dafc9eSMarc Zyngier 		pr_warning("request type %d", type);
12270b53f25SSasha Levin 		block_cnt	= -1;
123407475bfSPekka Enberg 		break;
12403110ff3SAsias He 	}
1254155ba8cSPekka Enberg }
1264155ba8cSPekka Enberg 
12769971b13SSasha Levin static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev)
12845e47970SAsias He {
1292fddfdb5SAsias He 	struct blk_dev_req *req;
1302fddfdb5SAsias He 	u16 head;
131407475bfSPekka Enberg 
1322fddfdb5SAsias He 	while (virt_queue__available(vq)) {
1332fddfdb5SAsias He 		head		= virt_queue__pop(vq);
1342fddfdb5SAsias He 		req		= &bdev->reqs[head];
13534239c78SAsias He 		req->head	= virt_queue__get_head_iov(vq, req->iov, &req->out,
13634239c78SAsias He 					&req->in, head, kvm);
1372fddfdb5SAsias He 		req->vq		= vq;
13845e47970SAsias He 
13901dafc9eSMarc Zyngier 		virtio_blk_do_io_request(kvm, vq, req);
14069971b13SSasha Levin 	}
1414baf6f73SSasha Levin }
1420528c2a7SPekka Enberg 
143c5ae742bSSasha Levin static u8 *get_config(struct kvm *kvm, void *dev)
144427948d5SSasha Levin {
145427948d5SSasha Levin 	struct blk_dev *bdev = dev;
146427948d5SSasha Levin 
147c5ae742bSSasha Levin 	return ((u8 *)(&bdev->blk_config));
148427948d5SSasha Levin }
149427948d5SSasha Levin 
150e4730284SMartin Radev static size_t get_config_size(struct kvm *kvm, void *dev)
151e4730284SMartin Radev {
152e4730284SMartin Radev 	struct blk_dev *bdev = dev;
153e4730284SMartin Radev 
154e4730284SMartin Radev 	return sizeof(bdev->blk_config);
155e4730284SMartin Radev }
156e4730284SMartin Radev 
157427948d5SSasha Levin static u32 get_host_features(struct kvm *kvm, void *dev)
158427948d5SSasha Levin {
1595c5cae75SJean-Philippe Brucker 	struct blk_dev *bdev = dev;
1605c5cae75SJean-Philippe Brucker 
1617ab3d207SSasha Levin 	return	1UL << VIRTIO_BLK_F_SEG_MAX
1627ab3d207SSasha Levin 		| 1UL << VIRTIO_BLK_F_FLUSH
163754c8ce3SSasha Levin 		| 1UL << VIRTIO_RING_F_EVENT_IDX
1645c5cae75SJean-Philippe Brucker 		| 1UL << VIRTIO_RING_F_INDIRECT_DESC
1655c5cae75SJean-Philippe Brucker 		| (bdev->disk->readonly ? 1UL << VIRTIO_BLK_F_RO : 0);
166427948d5SSasha Levin }
167427948d5SSasha Levin 
168427948d5SSasha Levin static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
169427948d5SSasha Levin {
170427948d5SSasha Levin 	struct blk_dev *bdev = dev;
171427948d5SSasha Levin 
172427948d5SSasha Levin 	bdev->features = features;
173427948d5SSasha Levin }
174427948d5SSasha Levin 
17595242e44SJean-Philippe Brucker static void notify_status(struct kvm *kvm, void *dev, u32 status)
17695242e44SJean-Philippe Brucker {
177*867b15ccSJean-Philippe Brucker 	struct blk_dev *bdev = dev;
178*867b15ccSJean-Philippe Brucker 	struct virtio_blk_config *conf = &bdev->blk_config;
179*867b15ccSJean-Philippe Brucker 
180*867b15ccSJean-Philippe Brucker 	if (!(status & VIRTIO__STATUS_CONFIG))
181*867b15ccSJean-Philippe Brucker 		return;
182*867b15ccSJean-Philippe Brucker 
183*867b15ccSJean-Philippe Brucker 	conf->capacity = virtio_host_to_guest_u64(&bdev->vdev, bdev->capacity);
184*867b15ccSJean-Philippe Brucker 	conf->seg_max = virtio_host_to_guest_u32(&bdev->vdev, DISK_SEG_MAX);
18595242e44SJean-Philippe Brucker }
18695242e44SJean-Philippe Brucker 
1875ac1178bSAsias He static void *virtio_blk_thread(void *dev)
1885ac1178bSAsias He {
1895ac1178bSAsias He 	struct blk_dev *bdev = dev;
1905ac1178bSAsias He 	u64 data;
191a7aa454eSSasha Levin 	int r;
1925ac1178bSAsias He 
193a4d8c55eSSasha Levin 	kvm__set_thread_name("virtio-blk-io");
194a4d8c55eSSasha Levin 
1955ac1178bSAsias He 	while (1) {
196a7aa454eSSasha Levin 		r = read(bdev->io_efd, &data, sizeof(u64));
197a7aa454eSSasha Levin 		if (r < 0)
198a7aa454eSSasha Levin 			continue;
1995ac1178bSAsias He 		virtio_blk_do_io(bdev->kvm, &bdev->vqs[0], bdev);
2005ac1178bSAsias He 	}
2015ac1178bSAsias He 
2025ac1178bSAsias He 	pthread_exit(NULL);
2035ac1178bSAsias He 	return NULL;
2045ac1178bSAsias He }
2055ac1178bSAsias He 
206609ee906SJean-Philippe Brucker static int init_vq(struct kvm *kvm, void *dev, u32 vq)
2076730b51fSJean-Philippe Brucker {
2086730b51fSJean-Philippe Brucker 	unsigned int i;
2096730b51fSJean-Philippe Brucker 	struct blk_dev *bdev = dev;
2106730b51fSJean-Philippe Brucker 
2116730b51fSJean-Philippe Brucker 	compat__remove_message(compat_id);
2126730b51fSJean-Philippe Brucker 
213fd41cde0SJean-Philippe Brucker 	virtio_init_device_vq(kvm, &bdev->vdev, &bdev->vqs[vq],
214609ee906SJean-Philippe Brucker 			      VIRTIO_BLK_QUEUE_SIZE);
2156730b51fSJean-Philippe Brucker 
2166730b51fSJean-Philippe Brucker 	if (vq != 0)
2176730b51fSJean-Philippe Brucker 		return 0;
2186730b51fSJean-Philippe Brucker 
2196730b51fSJean-Philippe Brucker 	for (i = 0; i < ARRAY_SIZE(bdev->reqs); i++) {
2206730b51fSJean-Philippe Brucker 		bdev->reqs[i] = (struct blk_dev_req) {
2216730b51fSJean-Philippe Brucker 			.bdev = bdev,
2226730b51fSJean-Philippe Brucker 			.kvm = kvm,
2236730b51fSJean-Philippe Brucker 		};
2246730b51fSJean-Philippe Brucker 	}
2256730b51fSJean-Philippe Brucker 
2266730b51fSJean-Philippe Brucker 	mutex_init(&bdev->mutex);
2276730b51fSJean-Philippe Brucker 	bdev->io_efd = eventfd(0, 0);
2286730b51fSJean-Philippe Brucker 	if (bdev->io_efd < 0)
2296730b51fSJean-Philippe Brucker 		return -errno;
2306730b51fSJean-Philippe Brucker 
2316730b51fSJean-Philippe Brucker 	if (pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev))
2326730b51fSJean-Philippe Brucker 		return -errno;
2336730b51fSJean-Philippe Brucker 
2346730b51fSJean-Philippe Brucker 	return 0;
2356730b51fSJean-Philippe Brucker }
2366730b51fSJean-Philippe Brucker 
2376730b51fSJean-Philippe Brucker static void exit_vq(struct kvm *kvm, void *dev, u32 vq)
2386730b51fSJean-Philippe Brucker {
2396730b51fSJean-Philippe Brucker 	struct blk_dev *bdev = dev;
2406730b51fSJean-Philippe Brucker 
2416730b51fSJean-Philippe Brucker 	if (vq != 0)
2426730b51fSJean-Philippe Brucker 		return;
2436730b51fSJean-Philippe Brucker 
2446730b51fSJean-Philippe Brucker 	close(bdev->io_efd);
2456730b51fSJean-Philippe Brucker 	pthread_cancel(bdev->io_thread);
2466730b51fSJean-Philippe Brucker 	pthread_join(bdev->io_thread, NULL);
2473f218e89SJean-Philippe Brucker 
2483f218e89SJean-Philippe Brucker 	disk_image__wait(bdev->disk);
2496730b51fSJean-Philippe Brucker }
2506730b51fSJean-Philippe Brucker 
251427948d5SSasha Levin static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
252427948d5SSasha Levin {
253427948d5SSasha Levin 	struct blk_dev *bdev = dev;
2545ac1178bSAsias He 	u64 data = 1;
255a7aa454eSSasha Levin 	int r;
256427948d5SSasha Levin 
257a7aa454eSSasha Levin 	r = write(bdev->io_efd, &data, sizeof(data));
258a7aa454eSSasha Levin 	if (r < 0)
259a7aa454eSSasha Levin 		return r;
260427948d5SSasha Levin 
261427948d5SSasha Levin 	return 0;
262427948d5SSasha Levin }
263427948d5SSasha Levin 
26453fbb17bSJean-Philippe Brucker static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq)
265427948d5SSasha Levin {
266427948d5SSasha Levin 	struct blk_dev *bdev = dev;
267427948d5SSasha Levin 
26853fbb17bSJean-Philippe Brucker 	return &bdev->vqs[vq];
269427948d5SSasha Levin }
270427948d5SSasha Levin 
271427948d5SSasha Levin static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
272427948d5SSasha Levin {
273ffcc904aSAsias He 	/* FIXME: dynamic */
274427948d5SSasha Levin 	return VIRTIO_BLK_QUEUE_SIZE;
275427948d5SSasha Levin }
276427948d5SSasha Levin 
277ffcc904aSAsias He static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
278ffcc904aSAsias He {
279ffcc904aSAsias He 	/* FIXME: dynamic */
280ffcc904aSAsias He 	return size;
281ffcc904aSAsias He }
282ffcc904aSAsias He 
28331e0eaccSMartin Radev static unsigned int get_vq_count(struct kvm *kvm, void *dev)
284b98ac591SJean-Philippe Brucker {
285b98ac591SJean-Philippe Brucker 	return NUM_VIRT_QUEUES;
286b98ac591SJean-Philippe Brucker }
287b98ac591SJean-Philippe Brucker 
28815542babSAndre Przywara static struct virtio_ops blk_dev_virtio_ops = {
2891c47ce69SSasha Levin 	.get_config		= get_config,
290e4730284SMartin Radev 	.get_config_size	= get_config_size,
2911c47ce69SSasha Levin 	.get_host_features	= get_host_features,
2921c47ce69SSasha Levin 	.set_guest_features	= set_guest_features,
293b98ac591SJean-Philippe Brucker 	.get_vq_count		= get_vq_count,
2941c47ce69SSasha Levin 	.init_vq		= init_vq,
2956730b51fSJean-Philippe Brucker 	.exit_vq		= exit_vq,
29695242e44SJean-Philippe Brucker 	.notify_status		= notify_status,
2971c47ce69SSasha Levin 	.notify_vq		= notify_vq,
29853fbb17bSJean-Philippe Brucker 	.get_vq			= get_vq,
2991c47ce69SSasha Levin 	.get_size_vq		= get_size_vq,
300ffcc904aSAsias He 	.set_size_vq		= set_size_vq,
3011c47ce69SSasha Levin };
3021c47ce69SSasha Levin 
3039f9207c5SSasha Levin static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
3044749e795SSasha Levin {
305fe2a70d1SSasha Levin 	struct blk_dev *bdev;
306db927775SAlexandru Elisei 	int r;
3074749e795SSasha Levin 
3084749e795SSasha Levin 	if (!disk)
3099f9207c5SSasha Levin 		return -EINVAL;
3104749e795SSasha Levin 
311ebe9ac19SSasha Levin 	bdev = calloc(1, sizeof(struct blk_dev));
312ebe9ac19SSasha Levin 	if (bdev == NULL)
3139f9207c5SSasha Levin 		return -ENOMEM;
3144749e795SSasha Levin 
315fe2a70d1SSasha Levin 	*bdev = (struct blk_dev) {
3164749e795SSasha Levin 		.disk			= disk,
3174749e795SSasha Levin 		.capacity		= disk->size / SECTOR_SIZE,
3185ac1178bSAsias He 		.kvm			= kvm,
319427948d5SSasha Levin 	};
320427948d5SSasha Levin 
321db927775SAlexandru Elisei 	list_add_tail(&bdev->list, &bdevs);
322db927775SAlexandru Elisei 
323db927775SAlexandru Elisei 	r = virtio_init(kvm, bdev, &bdev->vdev, &blk_dev_virtio_ops,
324d97dadecSWill Deacon 			VIRTIO_DEFAULT_TRANS(kvm), PCI_DEVICE_ID_VIRTIO_BLK,
325ae06ce71SWill Deacon 			VIRTIO_ID_BLOCK, PCI_CLASS_BLK);
326db927775SAlexandru Elisei 	if (r < 0)
327db927775SAlexandru Elisei 		return r;
328ebe9ac19SSasha Levin 
329fb434ac3SSasha Levin 	disk_image__set_callback(bdev->disk, virtio_blk_complete);
330fb434ac3SSasha Levin 
331d278197dSAsias He 	if (compat_id == -1)
33252f34d2cSAsias He 		compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK");
3335ac1178bSAsias He 
3349f9207c5SSasha Levin 	return 0;
335b30d05adSPekka Enberg }
336bcb6aacaSPrasad Joshi 
3379f9207c5SSasha Levin static int virtio_blk__exit_one(struct kvm *kvm, struct blk_dev *bdev)
338bcb6aacaSPrasad Joshi {
3399f9207c5SSasha Levin 	list_del(&bdev->list);
3409f9207c5SSasha Levin 	free(bdev);
341bcb6aacaSPrasad Joshi 
3429f9207c5SSasha Levin 	return 0;
343bcb6aacaSPrasad Joshi }
344a0a1e3c2SPrasad Joshi 
3459f9207c5SSasha Levin int virtio_blk__init(struct kvm *kvm)
3469f9207c5SSasha Levin {
3479f9207c5SSasha Levin 	int i, r = 0;
3489f9207c5SSasha Levin 
3499f9207c5SSasha Levin 	for (i = 0; i < kvm->nr_disks; i++) {
350a67da3beSAsias He 		if (kvm->disks[i]->wwpn)
351a67da3beSAsias He 			continue;
3529f9207c5SSasha Levin 		r = virtio_blk__init_one(kvm, kvm->disks[i]);
3539f9207c5SSasha Levin 		if (r < 0)
3549f9207c5SSasha Levin 			goto cleanup;
3559f9207c5SSasha Levin 	}
3569f9207c5SSasha Levin 
3579f9207c5SSasha Levin 	return 0;
3589f9207c5SSasha Levin cleanup:
359db927775SAlexandru Elisei 	virtio_blk__exit(kvm);
360db927775SAlexandru Elisei 	return r;
3619f9207c5SSasha Levin }
36249a8afd1SSasha Levin virtio_dev_init(virtio_blk__init);
3639f9207c5SSasha Levin 
3649f9207c5SSasha Levin int virtio_blk__exit(struct kvm *kvm)
365a0a1e3c2SPrasad Joshi {
366ebe9ac19SSasha Levin 	while (!list_empty(&bdevs)) {
367ebe9ac19SSasha Levin 		struct blk_dev *bdev;
368a0a1e3c2SPrasad Joshi 
369ebe9ac19SSasha Levin 		bdev = list_first_entry(&bdevs, struct blk_dev, list);
3709f9207c5SSasha Levin 		virtio_blk__exit_one(kvm, bdev);
371ebe9ac19SSasha Levin 	}
3729f9207c5SSasha Levin 
3739f9207c5SSasha Levin 	return 0;
374a0a1e3c2SPrasad Joshi }
37549a8afd1SSasha Levin virtio_dev_exit(virtio_blk__exit);
376