xref: /kvmtool/virtio/blk.c (revision ff6462e8088a2f399231b34d5f665ee9d287b1b3)
1416b2c2dSAsias He #include "kvm/virtio-blk.h"
2b30d05adSPekka Enberg 
331638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
42449f6e3SSasha Levin #include "kvm/irq.h"
55a24a9f2SPekka Enberg #include "kvm/disk-image.h"
639d6af07SAsias He #include "kvm/virtio.h"
7b30d05adSPekka Enberg #include "kvm/ioport.h"
84ef0f4d6SPekka Enberg #include "kvm/mutex.h"
9fe99fd4eSPekka Enberg #include "kvm/util.h"
108b1ff07eSPekka Enberg #include "kvm/kvm.h"
11b30d05adSPekka Enberg #include "kvm/pci.h"
12fb0957f2SSasha Levin #include "kvm/threadpool.h"
13ec75b82fSSasha Levin #include "kvm/ioeventfd.h"
14b30d05adSPekka Enberg 
1520c64ecaSPekka Enberg #include <linux/virtio_ring.h>
1620c64ecaSPekka Enberg #include <linux/virtio_blk.h>
170528c2a7SPekka Enberg 
18ebe9ac19SSasha Levin #include <linux/list.h>
193fdf659dSSasha Levin #include <linux/types.h>
200528c2a7SPekka Enberg #include <pthread.h>
214155ba8cSPekka Enberg 
224749e795SSasha Levin #define VIRTIO_BLK_MAX_DEV		4
2310eca11dSPekka Enberg #define NUM_VIRT_QUEUES			1
2410eca11dSPekka Enberg 
2503110ff3SAsias He #define VIRTIO_BLK_QUEUE_SIZE		128
263d7831a1SAsias He /*
273d7831a1SAsias He  * the header and status consume too entries
283d7831a1SAsias He  */
293d7831a1SAsias He #define DISK_SEG_MAX			(VIRTIO_BLK_QUEUE_SIZE - 2)
3010eca11dSPekka Enberg 
31fe2a70d1SSasha Levin struct blk_dev_job {
324749e795SSasha Levin 	struct virt_queue		*vq;
33fe2a70d1SSasha Levin 	struct blk_dev			*bdev;
3469971b13SSasha Levin 	struct iovec			iov[VIRTIO_BLK_QUEUE_SIZE];
3569971b13SSasha Levin 	u16				out, in, head;
36df0c7f57SSasha Levin 	struct thread_pool__job		job_id;
374749e795SSasha Levin };
384749e795SSasha Levin 
39fe2a70d1SSasha Levin struct blk_dev {
400528c2a7SPekka Enberg 	pthread_mutex_t			mutex;
41ebe9ac19SSasha Levin 	struct list_head		list;
420528c2a7SPekka Enberg 
4340ce993fSPekka Enberg 	struct virtio_blk_config	blk_config;
4438605e1cSSasha Levin 	struct disk_image		*disk;
45ebe9ac19SSasha Levin 	u64				base_addr;
463fdf659dSSasha Levin 	u32				host_features;
473fdf659dSSasha Levin 	u32				guest_features;
483fdf659dSSasha Levin 	u16				config_vector;
493fdf659dSSasha Levin 	u8				status;
50ebfc7327SAsias He 	u8				isr;
5147bf1d0fSPekka Enberg 
5247bf1d0fSPekka Enberg 	/* virtio queue */
533fdf659dSSasha Levin 	u16				queue_selector;
5410eca11dSPekka Enberg 
5545e47970SAsias He 	struct virt_queue		vqs[NUM_VIRT_QUEUES];
5669971b13SSasha Levin 	struct blk_dev_job		jobs[VIRTIO_BLK_QUEUE_SIZE];
5769971b13SSasha Levin 	u16				job_idx;
58ef1f02f2SSasha Levin 	struct pci_device_header	pci_hdr;
59fbc2fbf9SPekka Enberg };
60fbc2fbf9SPekka Enberg 
61ebe9ac19SSasha Levin static LIST_HEAD(bdevs);
6240ce993fSPekka Enberg 
63407475bfSPekka Enberg static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long offset, int size, u32 count)
6440ce993fSPekka Enberg {
65fe2a70d1SSasha Levin 	u8 *config_space = (u8 *) &bdev->blk_config;
6640ce993fSPekka Enberg 
6740ce993fSPekka Enberg 	if (size != 1 || count != 1)
6840ce993fSPekka Enberg 		return false;
6940ce993fSPekka Enberg 
70b8f43678SSasha Levin 	ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]);
7140ce993fSPekka Enberg 
7240ce993fSPekka Enberg 	return true;
7340ce993fSPekka Enberg }
7440ce993fSPekka Enberg 
753d62dea6SSasha Levin static bool virtio_blk_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count)
76fbc2fbf9SPekka Enberg {
77407475bfSPekka Enberg 	struct blk_dev *bdev;
78ebe9ac19SSasha Levin 	u16 offset;
790528c2a7SPekka Enberg 	bool ret = true;
800528c2a7SPekka Enberg 
81ebe9ac19SSasha Levin 	bdev	= ioport->priv;
82ebe9ac19SSasha Levin 	offset	= port - bdev->base_addr;
834749e795SSasha Levin 
84fe2a70d1SSasha Levin 	mutex_lock(&bdev->mutex);
85fbc2fbf9SPekka Enberg 
86fbc2fbf9SPekka Enberg 	switch (offset) {
87fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_HOST_FEATURES:
88fe2a70d1SSasha Levin 		ioport__write32(data, bdev->host_features);
89fbc2fbf9SPekka Enberg 		break;
90fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_GUEST_FEATURES:
910528c2a7SPekka Enberg 		ret		= false;
929ee67e60SAsias He 		break;
93fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_PFN:
94fe2a70d1SSasha Levin 		ioport__write32(data, bdev->vqs[bdev->queue_selector].pfn);
958b1ff07eSPekka Enberg 		break;
96fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_NUM:
9710eca11dSPekka Enberg 		ioport__write16(data, VIRTIO_BLK_QUEUE_SIZE);
988b1ff07eSPekka Enberg 		break;
99fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_SEL:
100fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_NOTIFY:
1010528c2a7SPekka Enberg 		ret		= false;
1029ee67e60SAsias He 		break;
103fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_STATUS:
104fe2a70d1SSasha Levin 		ioport__write8(data, bdev->status);
105fbc2fbf9SPekka Enberg 		break;
106fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_ISR:
107ebfc7327SAsias He 		ioport__write8(data, bdev->isr);
10843835ac9SSasha Levin 		kvm__irq_line(kvm, bdev->pci_hdr.irq_line, VIRTIO_IRQ_LOW);
109ebfc7327SAsias He 		bdev->isr = VIRTIO_IRQ_LOW;
1107e61688eSPekka Enberg 		break;
111fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_CONFIG_VECTOR:
112fe2a70d1SSasha Levin 		ioport__write16(data, bdev->config_vector);
11340ce993fSPekka Enberg 		break;
114fbc2fbf9SPekka Enberg 	default:
115407475bfSPekka Enberg 		ret = virtio_blk_dev_in(bdev, data, offset, size, count);
116407475bfSPekka Enberg 		break;
117fbc2fbf9SPekka Enberg 	};
118fbc2fbf9SPekka Enberg 
119fe2a70d1SSasha Levin 	mutex_unlock(&bdev->mutex);
1200528c2a7SPekka Enberg 
1210528c2a7SPekka Enberg 	return ret;
122fbc2fbf9SPekka Enberg }
123fbc2fbf9SPekka Enberg 
12469971b13SSasha Levin static void virtio_blk_do_io_request(struct kvm *kvm, void *param)
1254155ba8cSPekka Enberg {
1264155ba8cSPekka Enberg 	struct virtio_blk_outhdr *req;
1273fdf659dSSasha Levin 	u8 *status;
12869971b13SSasha Levin 	ssize_t block_cnt;
12969971b13SSasha Levin 	struct blk_dev_job *job;
13069971b13SSasha Levin 	struct blk_dev *bdev;
13169971b13SSasha Levin 	struct virt_queue *queue;
13269971b13SSasha Levin 	struct iovec *iov;
13369971b13SSasha Levin 	u16 out, in, head;
1344155ba8cSPekka Enberg 
13569971b13SSasha Levin 	block_cnt	= -1;
13669971b13SSasha Levin 	job		= param;
13769971b13SSasha Levin 	bdev		= job->bdev;
13869971b13SSasha Levin 	queue		= job->vq;
13969971b13SSasha Levin 	iov		= job->iov;
14069971b13SSasha Levin 	out		= job->out;
14169971b13SSasha Levin 	in		= job->in;
14269971b13SSasha Levin 	head		= job->head;
14345e47970SAsias He 	req		= iov[0].iov_base;
14403110ff3SAsias He 
145258dd093SPekka Enberg 	switch (req->type) {
14603110ff3SAsias He 	case VIRTIO_BLK_T_IN:
147b8861977SAsias He 		block_cnt	= disk_image__read(bdev->disk, req->sector, iov + 1, in + out - 2);
148258dd093SPekka Enberg 		break;
14903110ff3SAsias He 	case VIRTIO_BLK_T_OUT:
150b8861977SAsias He 		block_cnt	= disk_image__write(bdev->disk, req->sector, iov + 1, in + out - 2);
151258dd093SPekka Enberg 		break;
15229084a74SPrasad Joshi 	case VIRTIO_BLK_T_FLUSH:
15329084a74SPrasad Joshi 		block_cnt       = disk_image__flush(bdev->disk);
15429084a74SPrasad Joshi 		break;
155*ff6462e8SSasha Levin 	case VIRTIO_BLK_T_GET_ID:
156*ff6462e8SSasha Levin 		block_cnt	= VIRTIO_BLK_ID_BYTES;
157*ff6462e8SSasha Levin 		disk_image__get_serial(bdev->disk, (iov + 1)->iov_base, &block_cnt);
158*ff6462e8SSasha Levin 		break;
159258dd093SPekka Enberg 	default:
1604542f276SCyrill Gorcunov 		pr_warning("request type %d", req->type);
16170b53f25SSasha Levin 		block_cnt	= -1;
162407475bfSPekka Enberg 		break;
16303110ff3SAsias He 	}
16403110ff3SAsias He 
16545e47970SAsias He 	/* status */
16645e47970SAsias He 	status			= iov[out + in - 1].iov_base;
16770b53f25SSasha Levin 	*status			= (block_cnt < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
16803110ff3SAsias He 
16969971b13SSasha Levin 	mutex_lock(&bdev->mutex);
17045e47970SAsias He 	virt_queue__set_used_elem(queue, head, block_cnt);
17169971b13SSasha Levin 	mutex_unlock(&bdev->mutex);
1724155ba8cSPekka Enberg 
17369971b13SSasha Levin 	virt_queue__trigger_irq(queue, bdev->pci_hdr.irq_line, &bdev->isr, kvm);
1744155ba8cSPekka Enberg }
1754155ba8cSPekka Enberg 
17669971b13SSasha Levin static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev)
17745e47970SAsias He {
17869971b13SSasha Levin 	while (virt_queue__available(vq)) {
17969971b13SSasha Levin 		struct blk_dev_job *job = &bdev->jobs[bdev->job_idx++ % VIRTIO_BLK_QUEUE_SIZE];
180407475bfSPekka Enberg 
18169971b13SSasha Levin 		*job			= (struct blk_dev_job) {
18269971b13SSasha Levin 			.vq			= vq,
18369971b13SSasha Levin 			.bdev			= bdev,
18469971b13SSasha Levin 		};
18569971b13SSasha Levin 		job->head = virt_queue__get_iov(vq, job->iov, &job->out, &job->in, kvm);
18645e47970SAsias He 
18769971b13SSasha Levin 		thread_pool__init_job(&job->job_id, kvm, virtio_blk_do_io_request, job);
18869971b13SSasha Levin 		thread_pool__do_job(&job->job_id);
18969971b13SSasha Levin 	}
1904baf6f73SSasha Levin }
1910528c2a7SPekka Enberg 
1923d62dea6SSasha Levin static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count)
193fbc2fbf9SPekka Enberg {
194407475bfSPekka Enberg 	struct blk_dev *bdev;
195ebe9ac19SSasha Levin 	u16 offset;
1960528c2a7SPekka Enberg 	bool ret = true;
1970528c2a7SPekka Enberg 
198ebe9ac19SSasha Levin 	bdev	= ioport->priv;
199ebe9ac19SSasha Levin 	offset	= port - bdev->base_addr;
2004749e795SSasha Levin 
201fe2a70d1SSasha Levin 	mutex_lock(&bdev->mutex);
202fbc2fbf9SPekka Enberg 
203fbc2fbf9SPekka Enberg 	switch (offset) {
204fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_GUEST_FEATURES:
205fe2a70d1SSasha Levin 		bdev->guest_features	= ioport__read32(data);
206fbc2fbf9SPekka Enberg 		break;
20710eca11dSPekka Enberg 	case VIRTIO_PCI_QUEUE_PFN: {
20810eca11dSPekka Enberg 		struct virt_queue *queue;
20910eca11dSPekka Enberg 		void *p;
21010eca11dSPekka Enberg 
211fe2a70d1SSasha Levin 		queue			= &bdev->vqs[bdev->queue_selector];
21210eca11dSPekka Enberg 		queue->pfn		= ioport__read32(data);
21343835ac9SSasha Levin 		p			= guest_pfn_to_host(kvm, queue->pfn);
21410eca11dSPekka Enberg 
215b8f43678SSasha Levin 		vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
21610eca11dSPekka Enberg 
2177e61688eSPekka Enberg 		break;
21810eca11dSPekka Enberg 	}
219fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_SEL:
220fe2a70d1SSasha Levin 		bdev->queue_selector	= ioport__read16(data);
2217e61688eSPekka Enberg 		break;
22210eca11dSPekka Enberg 	case VIRTIO_PCI_QUEUE_NOTIFY: {
2233fdf659dSSasha Levin 		u16 queue_index;
224407475bfSPekka Enberg 
22510eca11dSPekka Enberg 		queue_index		= ioport__read16(data);
22669971b13SSasha Levin 		virtio_blk_do_io(kvm, &bdev->vqs[queue_index], bdev);
227407475bfSPekka Enberg 
2287e61688eSPekka Enberg 		break;
22910eca11dSPekka Enberg 	}
230fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_STATUS:
231fe2a70d1SSasha Levin 		bdev->status		= ioport__read8(data);
232fbc2fbf9SPekka Enberg 		break;
233fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_CONFIG_VECTOR:
234fe2a70d1SSasha Levin 		bdev->config_vector	= VIRTIO_MSI_NO_VECTOR;
23540ce993fSPekka Enberg 		break;
236fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_QUEUE_VECTOR:
23740ce993fSPekka Enberg 		break;
238fbc2fbf9SPekka Enberg 	default:
2390528c2a7SPekka Enberg 		ret			= false;
240407475bfSPekka Enberg 		break;
241fbc2fbf9SPekka Enberg 	};
242fbc2fbf9SPekka Enberg 
243fe2a70d1SSasha Levin 	mutex_unlock(&bdev->mutex);
2440528c2a7SPekka Enberg 
2450528c2a7SPekka Enberg 	return ret;
246fbc2fbf9SPekka Enberg }
247fbc2fbf9SPekka Enberg 
248416b2c2dSAsias He static struct ioport_operations virtio_blk_io_ops = {
249416b2c2dSAsias He 	.io_in	= virtio_blk_pci_io_in,
250416b2c2dSAsias He 	.io_out	= virtio_blk_pci_io_out,
251fbc2fbf9SPekka Enberg };
252fbc2fbf9SPekka Enberg 
253ec75b82fSSasha Levin static void ioevent_callback(struct kvm *kvm, void *param)
254ec75b82fSSasha Levin {
25569971b13SSasha Levin 	struct blk_dev *bdev = param;
256ec75b82fSSasha Levin 
25769971b13SSasha Levin 	virtio_blk_do_io(kvm, &bdev->vqs[0], bdev);
258ec75b82fSSasha Levin }
259ec75b82fSSasha Levin 
26043835ac9SSasha Levin void virtio_blk__init(struct kvm *kvm, struct disk_image *disk)
2614749e795SSasha Levin {
2624749e795SSasha Levin 	u16 blk_dev_base_addr;
263ec75b82fSSasha Levin 	u8 dev, pin, line, i;
264fe2a70d1SSasha Levin 	struct blk_dev *bdev;
265ec75b82fSSasha Levin 	struct ioevent ioevent;
2664749e795SSasha Levin 
2674749e795SSasha Levin 	if (!disk)
2684749e795SSasha Levin 		return;
2694749e795SSasha Levin 
270ebe9ac19SSasha Levin 	bdev = calloc(1, sizeof(struct blk_dev));
271ebe9ac19SSasha Levin 	if (bdev == NULL)
272fe2a70d1SSasha Levin 		die("Failed allocating bdev");
2734749e795SSasha Levin 
274ebe9ac19SSasha Levin 	blk_dev_base_addr	= ioport__register(IOPORT_EMPTY, &virtio_blk_io_ops, IOPORT_SIZE, bdev);
2754749e795SSasha Levin 
276fe2a70d1SSasha Levin 	*bdev			= (struct blk_dev) {
2774749e795SSasha Levin 		.mutex				= PTHREAD_MUTEX_INITIALIZER,
2784749e795SSasha Levin 		.disk				= disk,
279ebe9ac19SSasha Levin 		.base_addr			= blk_dev_base_addr,
2804749e795SSasha Levin 		.blk_config			= (struct virtio_blk_config) {
2814749e795SSasha Levin 			.capacity		= disk->size / SECTOR_SIZE,
2823d7831a1SAsias He 			.seg_max		= DISK_SEG_MAX,
2834749e795SSasha Levin 		},
284ef1f02f2SSasha Levin 		.pci_hdr = (struct pci_device_header) {
285b30d05adSPekka Enberg 			.vendor_id		= PCI_VENDOR_ID_REDHAT_QUMRANET,
286b30d05adSPekka Enberg 			.device_id		= PCI_DEVICE_ID_VIRTIO_BLK,
287b30d05adSPekka Enberg 			.header_type		= PCI_HEADER_TYPE_NORMAL,
288b30d05adSPekka Enberg 			.revision_id		= 0,
289b30d05adSPekka Enberg 			.class			= 0x010000,
290b30d05adSPekka Enberg 			.subsys_vendor_id	= PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
2910a7ab0c6SSasha Levin 			.subsys_id		= VIRTIO_ID_BLOCK,
2924749e795SSasha Levin 			.bar[0]			= blk_dev_base_addr | PCI_BASE_ADDRESS_SPACE_IO,
2934749e795SSasha Levin 		},
2943d7831a1SAsias He 		/*
2953d7831a1SAsias He 		 * Note we don't set VIRTIO_BLK_F_GEOMETRY here so the
2963d7831a1SAsias He 		 * guest kernel will compute disk geometry by own, the
2973d7831a1SAsias He 		 * same applies to VIRTIO_BLK_F_BLK_SIZE
2983d7831a1SAsias He 		 */
29929084a74SPrasad Joshi 		.host_features			= (1UL << VIRTIO_BLK_F_SEG_MAX | 1UL << VIRTIO_BLK_F_FLUSH),
300b30d05adSPekka Enberg 	};
301b30d05adSPekka Enberg 
302ebe9ac19SSasha Levin 	list_add_tail(&bdev->list, &bdevs);
303ebe9ac19SSasha Levin 
3040a7ab0c6SSasha Levin 	if (irq__register_device(VIRTIO_ID_BLOCK, &dev, &pin, &line) < 0)
3052449f6e3SSasha Levin 		return;
3062449f6e3SSasha Levin 
307ef1f02f2SSasha Levin 	bdev->pci_hdr.irq_pin	= pin;
308ef1f02f2SSasha Levin 	bdev->pci_hdr.irq_line	= line;
3092449f6e3SSasha Levin 
310ef1f02f2SSasha Levin 	pci__register(&bdev->pci_hdr, dev);
311ec75b82fSSasha Levin 
312ec75b82fSSasha Levin 	for (i = 0; i < NUM_VIRT_QUEUES; i++) {
313ec75b82fSSasha Levin 		ioevent = (struct ioevent) {
314ec75b82fSSasha Levin 			.io_addr		= blk_dev_base_addr + VIRTIO_PCI_QUEUE_NOTIFY,
315ec75b82fSSasha Levin 			.io_len			= sizeof(u16),
316ec75b82fSSasha Levin 			.fn			= ioevent_callback,
317ec75b82fSSasha Levin 			.datamatch		= i,
31869971b13SSasha Levin 			.fn_ptr			= bdev,
319ec75b82fSSasha Levin 			.fn_kvm			= kvm,
320ec75b82fSSasha Levin 			.fd			= eventfd(0, 0),
321ec75b82fSSasha Levin 		};
322ec75b82fSSasha Levin 
323ec75b82fSSasha Levin 		ioeventfd__add_event(&ioevent);
324ec75b82fSSasha Levin 	}
325b30d05adSPekka Enberg }
326bcb6aacaSPrasad Joshi 
327bcb6aacaSPrasad Joshi void virtio_blk__init_all(struct kvm *kvm)
328bcb6aacaSPrasad Joshi {
329bcb6aacaSPrasad Joshi 	int i;
330bcb6aacaSPrasad Joshi 
331bcb6aacaSPrasad Joshi 	for (i = 0; i < kvm->nr_disks; i++)
332bcb6aacaSPrasad Joshi 		virtio_blk__init(kvm, kvm->disks[i]);
333bcb6aacaSPrasad Joshi }
334a0a1e3c2SPrasad Joshi 
335a0a1e3c2SPrasad Joshi void virtio_blk__delete_all(struct kvm *kvm)
336a0a1e3c2SPrasad Joshi {
337ebe9ac19SSasha Levin 	while (!list_empty(&bdevs)) {
338ebe9ac19SSasha Levin 		struct blk_dev *bdev;
339a0a1e3c2SPrasad Joshi 
340ebe9ac19SSasha Levin 		bdev = list_first_entry(&bdevs, struct blk_dev, list);
341ec75b82fSSasha Levin 		ioeventfd__del_event(bdev->base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 0);
342ebe9ac19SSasha Levin 		list_del(&bdev->list);
343ebe9ac19SSasha Levin 		free(bdev);
344ebe9ac19SSasha Levin 	}
345a0a1e3c2SPrasad Joshi }
346