xref: /kvmtool/virtio/blk.c (revision 69971b13fe759fc5c3ae3f0c24f3e3808271a9c2)
1416b2c2dSAsias He #include "kvm/virtio-blk.h"
2b30d05adSPekka Enberg 
331638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h"
42449f6e3SSasha Levin #include "kvm/irq.h"
55a24a9f2SPekka Enberg #include "kvm/disk-image.h"
639d6af07SAsias He #include "kvm/virtio.h"
7b30d05adSPekka Enberg #include "kvm/ioport.h"
84ef0f4d6SPekka Enberg #include "kvm/mutex.h"
9fe99fd4eSPekka Enberg #include "kvm/util.h"
108b1ff07eSPekka Enberg #include "kvm/kvm.h"
11b30d05adSPekka Enberg #include "kvm/pci.h"
12fb0957f2SSasha Levin #include "kvm/threadpool.h"
13ec75b82fSSasha Levin #include "kvm/ioeventfd.h"
14b30d05adSPekka Enberg 
1520c64ecaSPekka Enberg #include <linux/virtio_ring.h>
1620c64ecaSPekka Enberg #include <linux/virtio_blk.h>
170528c2a7SPekka Enberg 
18ebe9ac19SSasha Levin #include <linux/list.h>
193fdf659dSSasha Levin #include <linux/types.h>
200528c2a7SPekka Enberg #include <pthread.h>
214155ba8cSPekka Enberg 
224749e795SSasha Levin #define VIRTIO_BLK_MAX_DEV		4
2310eca11dSPekka Enberg #define NUM_VIRT_QUEUES			1
2410eca11dSPekka Enberg 
2503110ff3SAsias He #define VIRTIO_BLK_QUEUE_SIZE		128
263d7831a1SAsias He /*
273d7831a1SAsias He  * the header and status consume too entries
283d7831a1SAsias He  */
293d7831a1SAsias He #define DISK_SEG_MAX			(VIRTIO_BLK_QUEUE_SIZE - 2)
3010eca11dSPekka Enberg 
31fe2a70d1SSasha Levin struct blk_dev_job {
324749e795SSasha Levin 	struct virt_queue		*vq;
33fe2a70d1SSasha Levin 	struct blk_dev			*bdev;
34*69971b13SSasha Levin 	struct iovec			iov[VIRTIO_BLK_QUEUE_SIZE];
35*69971b13SSasha Levin 	u16				out, in, head;
36df0c7f57SSasha Levin 	struct thread_pool__job		job_id;
374749e795SSasha Levin };
384749e795SSasha Levin 
39fe2a70d1SSasha Levin struct blk_dev {
400528c2a7SPekka Enberg 	pthread_mutex_t			mutex;
41ebe9ac19SSasha Levin 	struct list_head		list;
420528c2a7SPekka Enberg 
4340ce993fSPekka Enberg 	struct virtio_blk_config	blk_config;
4438605e1cSSasha Levin 	struct disk_image		*disk;
45ebe9ac19SSasha Levin 	u64				base_addr;
463fdf659dSSasha Levin 	u32				host_features;
473fdf659dSSasha Levin 	u32				guest_features;
483fdf659dSSasha Levin 	u16				config_vector;
493fdf659dSSasha Levin 	u8				status;
50ebfc7327SAsias He 	u8				isr;
5147bf1d0fSPekka Enberg 
5247bf1d0fSPekka Enberg 	/* virtio queue */
533fdf659dSSasha Levin 	u16				queue_selector;
5410eca11dSPekka Enberg 
5545e47970SAsias He 	struct virt_queue		vqs[NUM_VIRT_QUEUES];
56*69971b13SSasha Levin 	struct blk_dev_job		jobs[VIRTIO_BLK_QUEUE_SIZE];
57*69971b13SSasha Levin 	u16				job_idx;
58ef1f02f2SSasha Levin 	struct pci_device_header	pci_hdr;
59fbc2fbf9SPekka Enberg };
60fbc2fbf9SPekka Enberg 
61ebe9ac19SSasha Levin static LIST_HEAD(bdevs);
6240ce993fSPekka Enberg 
63407475bfSPekka Enberg static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long offset, int size, u32 count)
6440ce993fSPekka Enberg {
65fe2a70d1SSasha Levin 	u8 *config_space = (u8 *) &bdev->blk_config;
6640ce993fSPekka Enberg 
6740ce993fSPekka Enberg 	if (size != 1 || count != 1)
6840ce993fSPekka Enberg 		return false;
6940ce993fSPekka Enberg 
70b8f43678SSasha Levin 	ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]);
7140ce993fSPekka Enberg 
7240ce993fSPekka Enberg 	return true;
7340ce993fSPekka Enberg }
7440ce993fSPekka Enberg 
753d62dea6SSasha Levin static bool virtio_blk_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count)
76fbc2fbf9SPekka Enberg {
77407475bfSPekka Enberg 	struct blk_dev *bdev;
78ebe9ac19SSasha Levin 	u16 offset;
790528c2a7SPekka Enberg 	bool ret = true;
800528c2a7SPekka Enberg 
81ebe9ac19SSasha Levin 	bdev	= ioport->priv;
82ebe9ac19SSasha Levin 	offset	= port - bdev->base_addr;
834749e795SSasha Levin 
84fe2a70d1SSasha Levin 	mutex_lock(&bdev->mutex);
85fbc2fbf9SPekka Enberg 
86fbc2fbf9SPekka Enberg 	switch (offset) {
87fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_HOST_FEATURES:
88fe2a70d1SSasha Levin 		ioport__write32(data, bdev->host_features);
89fbc2fbf9SPekka Enberg 		break;
90fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_GUEST_FEATURES:
910528c2a7SPekka Enberg 		ret		= false;
929ee67e60SAsias He 		break;
93fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_PFN:
94fe2a70d1SSasha Levin 		ioport__write32(data, bdev->vqs[bdev->queue_selector].pfn);
958b1ff07eSPekka Enberg 		break;
96fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_NUM:
9710eca11dSPekka Enberg 		ioport__write16(data, VIRTIO_BLK_QUEUE_SIZE);
988b1ff07eSPekka Enberg 		break;
99fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_SEL:
100fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_NOTIFY:
1010528c2a7SPekka Enberg 		ret		= false;
1029ee67e60SAsias He 		break;
103fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_STATUS:
104fe2a70d1SSasha Levin 		ioport__write8(data, bdev->status);
105fbc2fbf9SPekka Enberg 		break;
106fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_ISR:
107ebfc7327SAsias He 		ioport__write8(data, bdev->isr);
10843835ac9SSasha Levin 		kvm__irq_line(kvm, bdev->pci_hdr.irq_line, VIRTIO_IRQ_LOW);
109ebfc7327SAsias He 		bdev->isr = VIRTIO_IRQ_LOW;
1107e61688eSPekka Enberg 		break;
111fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_CONFIG_VECTOR:
112fe2a70d1SSasha Levin 		ioport__write16(data, bdev->config_vector);
11340ce993fSPekka Enberg 		break;
114fbc2fbf9SPekka Enberg 	default:
115407475bfSPekka Enberg 		ret = virtio_blk_dev_in(bdev, data, offset, size, count);
116407475bfSPekka Enberg 		break;
117fbc2fbf9SPekka Enberg 	};
118fbc2fbf9SPekka Enberg 
119fe2a70d1SSasha Levin 	mutex_unlock(&bdev->mutex);
1200528c2a7SPekka Enberg 
1210528c2a7SPekka Enberg 	return ret;
122fbc2fbf9SPekka Enberg }
123fbc2fbf9SPekka Enberg 
124*69971b13SSasha Levin static void virtio_blk_do_io_request(struct kvm *kvm, void *param)
1254155ba8cSPekka Enberg {
1264155ba8cSPekka Enberg 	struct virtio_blk_outhdr *req;
1273fdf659dSSasha Levin 	u8 *status;
128*69971b13SSasha Levin 	ssize_t block_cnt;
129*69971b13SSasha Levin 	struct blk_dev_job *job;
130*69971b13SSasha Levin 	struct blk_dev *bdev;
131*69971b13SSasha Levin 	struct virt_queue *queue;
132*69971b13SSasha Levin 	struct iovec *iov;
133*69971b13SSasha Levin 	u16 out, in, head;
1344155ba8cSPekka Enberg 
135*69971b13SSasha Levin 	block_cnt	= -1;
136*69971b13SSasha Levin 	job		= param;
137*69971b13SSasha Levin 	bdev		= job->bdev;
138*69971b13SSasha Levin 	queue		= job->vq;
139*69971b13SSasha Levin 	iov		= job->iov;
140*69971b13SSasha Levin 	out		= job->out;
141*69971b13SSasha Levin 	in		= job->in;
142*69971b13SSasha Levin 	head		= job->head;
14345e47970SAsias He 	req		= iov[0].iov_base;
14403110ff3SAsias He 
145258dd093SPekka Enberg 	switch (req->type) {
14603110ff3SAsias He 	case VIRTIO_BLK_T_IN:
147b8861977SAsias He 		block_cnt	= disk_image__read(bdev->disk, req->sector, iov + 1, in + out - 2);
148258dd093SPekka Enberg 		break;
14903110ff3SAsias He 	case VIRTIO_BLK_T_OUT:
150b8861977SAsias He 		block_cnt	= disk_image__write(bdev->disk, req->sector, iov + 1, in + out - 2);
151258dd093SPekka Enberg 		break;
15229084a74SPrasad Joshi 	case VIRTIO_BLK_T_FLUSH:
15329084a74SPrasad Joshi 		block_cnt       = disk_image__flush(bdev->disk);
15429084a74SPrasad Joshi 		break;
155258dd093SPekka Enberg 	default:
1564542f276SCyrill Gorcunov 		pr_warning("request type %d", req->type);
15770b53f25SSasha Levin 		block_cnt	= -1;
158407475bfSPekka Enberg 		break;
15903110ff3SAsias He 	}
16003110ff3SAsias He 
16145e47970SAsias He 	/* status */
16245e47970SAsias He 	status			= iov[out + in - 1].iov_base;
16370b53f25SSasha Levin 	*status			= (block_cnt < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
16403110ff3SAsias He 
165*69971b13SSasha Levin 	mutex_lock(&bdev->mutex);
16645e47970SAsias He 	virt_queue__set_used_elem(queue, head, block_cnt);
167*69971b13SSasha Levin 	mutex_unlock(&bdev->mutex);
1684155ba8cSPekka Enberg 
169*69971b13SSasha Levin 	virt_queue__trigger_irq(queue, bdev->pci_hdr.irq_line, &bdev->isr, kvm);
1704155ba8cSPekka Enberg }
1714155ba8cSPekka Enberg 
172*69971b13SSasha Levin static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev)
17345e47970SAsias He {
174*69971b13SSasha Levin 	while (virt_queue__available(vq)) {
175*69971b13SSasha Levin 		struct blk_dev_job *job = &bdev->jobs[bdev->job_idx++ % VIRTIO_BLK_QUEUE_SIZE];
176407475bfSPekka Enberg 
177*69971b13SSasha Levin 		*job			= (struct blk_dev_job) {
178*69971b13SSasha Levin 			.vq			= vq,
179*69971b13SSasha Levin 			.bdev			= bdev,
180*69971b13SSasha Levin 		};
181*69971b13SSasha Levin 		job->head = virt_queue__get_iov(vq, job->iov, &job->out, &job->in, kvm);
18245e47970SAsias He 
183*69971b13SSasha Levin 		thread_pool__init_job(&job->job_id, kvm, virtio_blk_do_io_request, job);
184*69971b13SSasha Levin 		thread_pool__do_job(&job->job_id);
185*69971b13SSasha Levin 	}
1864baf6f73SSasha Levin }
1870528c2a7SPekka Enberg 
1883d62dea6SSasha Levin static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count)
189fbc2fbf9SPekka Enberg {
190407475bfSPekka Enberg 	struct blk_dev *bdev;
191ebe9ac19SSasha Levin 	u16 offset;
1920528c2a7SPekka Enberg 	bool ret = true;
1930528c2a7SPekka Enberg 
194ebe9ac19SSasha Levin 	bdev	= ioport->priv;
195ebe9ac19SSasha Levin 	offset	= port - bdev->base_addr;
1964749e795SSasha Levin 
197fe2a70d1SSasha Levin 	mutex_lock(&bdev->mutex);
198fbc2fbf9SPekka Enberg 
199fbc2fbf9SPekka Enberg 	switch (offset) {
200fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_GUEST_FEATURES:
201fe2a70d1SSasha Levin 		bdev->guest_features	= ioport__read32(data);
202fbc2fbf9SPekka Enberg 		break;
20310eca11dSPekka Enberg 	case VIRTIO_PCI_QUEUE_PFN: {
20410eca11dSPekka Enberg 		struct virt_queue *queue;
20510eca11dSPekka Enberg 		void *p;
20610eca11dSPekka Enberg 
207fe2a70d1SSasha Levin 		queue			= &bdev->vqs[bdev->queue_selector];
20810eca11dSPekka Enberg 		queue->pfn		= ioport__read32(data);
20943835ac9SSasha Levin 		p			= guest_pfn_to_host(kvm, queue->pfn);
21010eca11dSPekka Enberg 
211b8f43678SSasha Levin 		vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
21210eca11dSPekka Enberg 
2137e61688eSPekka Enberg 		break;
21410eca11dSPekka Enberg 	}
215fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_QUEUE_SEL:
216fe2a70d1SSasha Levin 		bdev->queue_selector	= ioport__read16(data);
2177e61688eSPekka Enberg 		break;
21810eca11dSPekka Enberg 	case VIRTIO_PCI_QUEUE_NOTIFY: {
2193fdf659dSSasha Levin 		u16 queue_index;
220407475bfSPekka Enberg 
22110eca11dSPekka Enberg 		queue_index		= ioport__read16(data);
222*69971b13SSasha Levin 		virtio_blk_do_io(kvm, &bdev->vqs[queue_index], bdev);
223407475bfSPekka Enberg 
2247e61688eSPekka Enberg 		break;
22510eca11dSPekka Enberg 	}
226fbc2fbf9SPekka Enberg 	case VIRTIO_PCI_STATUS:
227fe2a70d1SSasha Levin 		bdev->status		= ioport__read8(data);
228fbc2fbf9SPekka Enberg 		break;
229fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_CONFIG_VECTOR:
230fe2a70d1SSasha Levin 		bdev->config_vector	= VIRTIO_MSI_NO_VECTOR;
23140ce993fSPekka Enberg 		break;
232fbc2fbf9SPekka Enberg 	case VIRTIO_MSI_QUEUE_VECTOR:
23340ce993fSPekka Enberg 		break;
234fbc2fbf9SPekka Enberg 	default:
2350528c2a7SPekka Enberg 		ret			= false;
236407475bfSPekka Enberg 		break;
237fbc2fbf9SPekka Enberg 	};
238fbc2fbf9SPekka Enberg 
239fe2a70d1SSasha Levin 	mutex_unlock(&bdev->mutex);
2400528c2a7SPekka Enberg 
2410528c2a7SPekka Enberg 	return ret;
242fbc2fbf9SPekka Enberg }
243fbc2fbf9SPekka Enberg 
244416b2c2dSAsias He static struct ioport_operations virtio_blk_io_ops = {
245416b2c2dSAsias He 	.io_in	= virtio_blk_pci_io_in,
246416b2c2dSAsias He 	.io_out	= virtio_blk_pci_io_out,
247fbc2fbf9SPekka Enberg };
248fbc2fbf9SPekka Enberg 
249ec75b82fSSasha Levin static void ioevent_callback(struct kvm *kvm, void *param)
250ec75b82fSSasha Levin {
251*69971b13SSasha Levin 	struct blk_dev *bdev = param;
252ec75b82fSSasha Levin 
253*69971b13SSasha Levin 	virtio_blk_do_io(kvm, &bdev->vqs[0], bdev);
254ec75b82fSSasha Levin }
255ec75b82fSSasha Levin 
25643835ac9SSasha Levin void virtio_blk__init(struct kvm *kvm, struct disk_image *disk)
2574749e795SSasha Levin {
2584749e795SSasha Levin 	u16 blk_dev_base_addr;
259ec75b82fSSasha Levin 	u8 dev, pin, line, i;
260fe2a70d1SSasha Levin 	struct blk_dev *bdev;
261ec75b82fSSasha Levin 	struct ioevent ioevent;
2624749e795SSasha Levin 
2634749e795SSasha Levin 	if (!disk)
2644749e795SSasha Levin 		return;
2654749e795SSasha Levin 
266ebe9ac19SSasha Levin 	bdev = calloc(1, sizeof(struct blk_dev));
267ebe9ac19SSasha Levin 	if (bdev == NULL)
268fe2a70d1SSasha Levin 		die("Failed allocating bdev");
2694749e795SSasha Levin 
270ebe9ac19SSasha Levin 	blk_dev_base_addr	= ioport__register(IOPORT_EMPTY, &virtio_blk_io_ops, IOPORT_SIZE, bdev);
2714749e795SSasha Levin 
272fe2a70d1SSasha Levin 	*bdev			= (struct blk_dev) {
2734749e795SSasha Levin 		.mutex				= PTHREAD_MUTEX_INITIALIZER,
2744749e795SSasha Levin 		.disk				= disk,
275ebe9ac19SSasha Levin 		.base_addr			= blk_dev_base_addr,
2764749e795SSasha Levin 		.blk_config			= (struct virtio_blk_config) {
2774749e795SSasha Levin 			.capacity		= disk->size / SECTOR_SIZE,
2783d7831a1SAsias He 			.seg_max		= DISK_SEG_MAX,
2794749e795SSasha Levin 		},
280ef1f02f2SSasha Levin 		.pci_hdr = (struct pci_device_header) {
281b30d05adSPekka Enberg 			.vendor_id		= PCI_VENDOR_ID_REDHAT_QUMRANET,
282b30d05adSPekka Enberg 			.device_id		= PCI_DEVICE_ID_VIRTIO_BLK,
283b30d05adSPekka Enberg 			.header_type		= PCI_HEADER_TYPE_NORMAL,
284b30d05adSPekka Enberg 			.revision_id		= 0,
285b30d05adSPekka Enberg 			.class			= 0x010000,
286b30d05adSPekka Enberg 			.subsys_vendor_id	= PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
2870a7ab0c6SSasha Levin 			.subsys_id		= VIRTIO_ID_BLOCK,
2884749e795SSasha Levin 			.bar[0]			= blk_dev_base_addr | PCI_BASE_ADDRESS_SPACE_IO,
2894749e795SSasha Levin 		},
2903d7831a1SAsias He 		/*
2913d7831a1SAsias He 		 * Note we don't set VIRTIO_BLK_F_GEOMETRY here so the
2923d7831a1SAsias He 		 * guest kernel will compute disk geometry by own, the
2933d7831a1SAsias He 		 * same applies to VIRTIO_BLK_F_BLK_SIZE
2943d7831a1SAsias He 		 */
29529084a74SPrasad Joshi 		.host_features			= (1UL << VIRTIO_BLK_F_SEG_MAX | 1UL << VIRTIO_BLK_F_FLUSH),
296b30d05adSPekka Enberg 	};
297b30d05adSPekka Enberg 
298ebe9ac19SSasha Levin 	list_add_tail(&bdev->list, &bdevs);
299ebe9ac19SSasha Levin 
3000a7ab0c6SSasha Levin 	if (irq__register_device(VIRTIO_ID_BLOCK, &dev, &pin, &line) < 0)
3012449f6e3SSasha Levin 		return;
3022449f6e3SSasha Levin 
303ef1f02f2SSasha Levin 	bdev->pci_hdr.irq_pin	= pin;
304ef1f02f2SSasha Levin 	bdev->pci_hdr.irq_line	= line;
3052449f6e3SSasha Levin 
306ef1f02f2SSasha Levin 	pci__register(&bdev->pci_hdr, dev);
307ec75b82fSSasha Levin 
308ec75b82fSSasha Levin 	for (i = 0; i < NUM_VIRT_QUEUES; i++) {
309ec75b82fSSasha Levin 		ioevent = (struct ioevent) {
310ec75b82fSSasha Levin 			.io_addr		= blk_dev_base_addr + VIRTIO_PCI_QUEUE_NOTIFY,
311ec75b82fSSasha Levin 			.io_len			= sizeof(u16),
312ec75b82fSSasha Levin 			.fn			= ioevent_callback,
313ec75b82fSSasha Levin 			.datamatch		= i,
314*69971b13SSasha Levin 			.fn_ptr			= bdev,
315ec75b82fSSasha Levin 			.fn_kvm			= kvm,
316ec75b82fSSasha Levin 			.fd			= eventfd(0, 0),
317ec75b82fSSasha Levin 		};
318ec75b82fSSasha Levin 
319ec75b82fSSasha Levin 		ioeventfd__add_event(&ioevent);
320ec75b82fSSasha Levin 	}
321b30d05adSPekka Enberg }
322bcb6aacaSPrasad Joshi 
323bcb6aacaSPrasad Joshi void virtio_blk__init_all(struct kvm *kvm)
324bcb6aacaSPrasad Joshi {
325bcb6aacaSPrasad Joshi 	int i;
326bcb6aacaSPrasad Joshi 
327bcb6aacaSPrasad Joshi 	for (i = 0; i < kvm->nr_disks; i++)
328bcb6aacaSPrasad Joshi 		virtio_blk__init(kvm, kvm->disks[i]);
329bcb6aacaSPrasad Joshi }
330a0a1e3c2SPrasad Joshi 
331a0a1e3c2SPrasad Joshi void virtio_blk__delete_all(struct kvm *kvm)
332a0a1e3c2SPrasad Joshi {
333ebe9ac19SSasha Levin 	while (!list_empty(&bdevs)) {
334ebe9ac19SSasha Levin 		struct blk_dev *bdev;
335a0a1e3c2SPrasad Joshi 
336ebe9ac19SSasha Levin 		bdev = list_first_entry(&bdevs, struct blk_dev, list);
337ec75b82fSSasha Levin 		ioeventfd__del_event(bdev->base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 0);
338ebe9ac19SSasha Levin 		list_del(&bdev->list);
339ebe9ac19SSasha Levin 		free(bdev);
340ebe9ac19SSasha Levin 	}
341a0a1e3c2SPrasad Joshi }
342