xref: /kvmtool/virtio/core.c (revision b17552ee6c9728c20c9d0bd037ef134277daaa40)
1 #include <linux/virtio_ring.h>
2 #include <linux/types.h>
3 #include <sys/uio.h>
4 #include <stdlib.h>
5 
6 #include "kvm/guest_compat.h"
7 #include "kvm/barrier.h"
8 #include "kvm/virtio.h"
9 #include "kvm/virtio-pci.h"
10 #include "kvm/virtio-mmio.h"
11 #include "kvm/util.h"
12 #include "kvm/kvm.h"
13 
14 
15 const char* virtio_trans_name(enum virtio_trans trans)
16 {
17 	if (trans == VIRTIO_PCI || trans == VIRTIO_PCI_LEGACY)
18 		return "pci";
19 	else if (trans == VIRTIO_MMIO || trans == VIRTIO_MMIO_LEGACY)
20 		return "mmio";
21 	return "unknown";
22 }
23 
24 int virtio_transport_parser(const struct option *opt, const char *arg, int unset)
25 {
26 	enum virtio_trans *type = opt->value;
27 	struct kvm *kvm;
28 
29 	if (!strcmp(opt->long_name, "virtio-transport")) {
30 		if (!strcmp(arg, "pci")) {
31 			*type = VIRTIO_PCI;
32 		} else if (!strcmp(arg, "pci-legacy")) {
33 			*type = VIRTIO_PCI_LEGACY;
34 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
35 		} else if (!strcmp(arg, "mmio")) {
36 			*type = VIRTIO_MMIO;
37 		} else if (!strcmp(arg, "mmio-legacy")) {
38 			*type = VIRTIO_MMIO_LEGACY;
39 #endif
40 		} else {
41 			pr_err("virtio-transport: unknown type \"%s\"\n", arg);
42 			return -1;
43 		}
44 	} else if (!strcmp(opt->long_name, "virtio-legacy")) {
45 		*type = VIRTIO_PCI_LEGACY;
46 	} else if (!strcmp(opt->long_name, "force-pci")) {
47 		kvm = opt->ptr;
48 		kvm->cfg.virtio_transport = VIRTIO_PCI;
49 	}
50 
51 	return 0;
52 }
53 
54 void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump)
55 {
56 	u16 idx = virtio_guest_to_host_u16(queue->endian,
57 					   queue->vring.used->idx);
58 
59 	/*
60 	 * Use wmb to assure that used elem was updated with head and len.
61 	 * We need a wmb here since we can't advance idx unless we're ready
62 	 * to pass the used element to the guest.
63 	 */
64 	wmb();
65 	idx += jump;
66 	queue->vring.used->idx = virtio_host_to_guest_u16(queue->endian, idx);
67 }
68 
69 struct vring_used_elem *
70 virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head,
71 				    u32 len, u16 offset)
72 {
73 	struct vring_used_elem *used_elem;
74 	u16 idx = virtio_guest_to_host_u16(queue->endian, queue->vring.used->idx);
75 
76 	idx += offset;
77 	used_elem	= &queue->vring.used->ring[idx % queue->vring.num];
78 	used_elem->id	= virtio_host_to_guest_u32(queue->endian, head);
79 	used_elem->len	= virtio_host_to_guest_u32(queue->endian, len);
80 
81 	return used_elem;
82 }
83 
84 struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
85 {
86 	struct vring_used_elem *used_elem;
87 
88 	used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0);
89 	virt_queue__used_idx_advance(queue, 1);
90 
91 	return used_elem;
92 }
93 
94 static inline bool virt_desc__test_flag(struct virt_queue *vq,
95 					struct vring_desc *desc, u16 flag)
96 {
97 	return !!(virtio_guest_to_host_u16(vq->endian, desc->flags) & flag);
98 }
99 
100 /*
101  * Each buffer in the virtqueues is actually a chain of descriptors.  This
102  * function returns the next descriptor in the chain, or max if we're at the
103  * end.
104  */
105 static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc,
106 			  unsigned int i, unsigned int max)
107 {
108 	unsigned int next;
109 
110 	/* If this descriptor says it doesn't chain, we're done. */
111 	if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT))
112 		return max;
113 
114 	next = virtio_guest_to_host_u16(vq->endian, desc[i].next);
115 
116 	/* Ensure they're not leading us off end of descriptors. */
117 	return min(next, max);
118 }
119 
120 u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm)
121 {
122 	struct vring_desc *desc;
123 	u16 idx;
124 	u16 max;
125 
126 	idx = head;
127 	*out = *in = 0;
128 	max = vq->vring.num;
129 	desc = vq->vring.desc;
130 
131 	if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) {
132 		max = virtio_guest_to_host_u32(vq->endian, desc[idx].len) / sizeof(struct vring_desc);
133 		desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
134 		idx = 0;
135 	}
136 
137 	do {
138 		/* Grab the first descriptor, and check it's OK. */
139 		iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq->endian, desc[idx].len);
140 		iov[*out + *in].iov_base = guest_flat_to_host(kvm,
141 							      virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
142 		/* If this is an input descriptor, increment that count. */
143 		if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE))
144 			(*in)++;
145 		else
146 			(*out)++;
147 	} while ((idx = next_desc(vq, desc, idx, max)) != max);
148 
149 	return head;
150 }
151 
152 u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm)
153 {
154 	u16 head;
155 
156 	head = virt_queue__pop(vq);
157 
158 	return virt_queue__get_head_iov(vq, iov, out, in, head, kvm);
159 }
160 
161 /* in and out are relative to guest */
162 u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue,
163 			      struct iovec in_iov[], struct iovec out_iov[],
164 			      u16 *in, u16 *out)
165 {
166 	struct vring_desc *desc;
167 	u16 head, idx;
168 
169 	idx = head = virt_queue__pop(queue);
170 	*out = *in = 0;
171 	do {
172 		u64 addr;
173 		desc = virt_queue__get_desc(queue, idx);
174 		addr = virtio_guest_to_host_u64(queue->endian, desc->addr);
175 		if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) {
176 			in_iov[*in].iov_base = guest_flat_to_host(kvm, addr);
177 			in_iov[*in].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
178 			(*in)++;
179 		} else {
180 			out_iov[*out].iov_base = guest_flat_to_host(kvm, addr);
181 			out_iov[*out].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
182 			(*out)++;
183 		}
184 		if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT))
185 			idx = virtio_guest_to_host_u16(queue->endian, desc->next);
186 		else
187 			break;
188 	} while (1);
189 
190 	return head;
191 }
192 
193 void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev,
194 			   struct virt_queue *vq, size_t nr_descs)
195 {
196 	struct vring_addr *addr = &vq->vring_addr;
197 
198 	vq->endian		= vdev->endian;
199 	vq->use_event_idx	= (vdev->features & (1UL << VIRTIO_RING_F_EVENT_IDX));
200 	vq->enabled		= true;
201 
202 	if (addr->legacy) {
203 		unsigned long base = (u64)addr->pfn * addr->pgsize;
204 		void *p = guest_flat_to_host(kvm, base);
205 
206 		vring_init(&vq->vring, nr_descs, p, addr->align);
207 	} else {
208 		u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo;
209 		u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo;
210 		u64 used = (u64)addr->used_hi << 32 | addr->used_lo;
211 
212 		vq->vring = (struct vring) {
213 			.desc	= guest_flat_to_host(kvm, desc),
214 			.used	= guest_flat_to_host(kvm, used),
215 			.avail	= guest_flat_to_host(kvm, avail),
216 			.num	= nr_descs,
217 		};
218 	}
219 }
220 
221 void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev,
222 			   void *dev, int num)
223 {
224 	struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num);
225 
226 	if (vq->enabled && vdev->ops->exit_vq)
227 		vdev->ops->exit_vq(kvm, dev, num);
228 	memset(vq, 0, sizeof(*vq));
229 }
230 
231 int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off)
232 {
233 	if (msix) {
234 		if (offset < 4)
235 			return VIRTIO_PCI_O_MSIX;
236 		else
237 			offset -= 4;
238 	}
239 
240 	*config_off = offset;
241 
242 	return VIRTIO_PCI_O_CONFIG;
243 }
244 
245 bool virtio_queue__should_signal(struct virt_queue *vq)
246 {
247 	u16 old_idx, new_idx, event_idx;
248 
249 	/*
250 	 * Use mb to assure used idx has been increased before we signal the
251 	 * guest, and we don't read a stale value for used_event. Without a mb
252 	 * here we might not send a notification that we need to send, or the
253 	 * guest may ignore the queue since it won't see an updated idx.
254 	 */
255 	mb();
256 
257 	if (!vq->use_event_idx) {
258 		/*
259 		 * When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the
260 		 * guest if it didn't explicitly request to be left alone.
261 		 */
262 		return !(virtio_guest_to_host_u16(vq->endian, vq->vring.avail->flags) &
263 			 VRING_AVAIL_F_NO_INTERRUPT);
264 	}
265 
266 	old_idx		= vq->last_used_signalled;
267 	new_idx		= virtio_guest_to_host_u16(vq->endian, vq->vring.used->idx);
268 	event_idx	= virtio_guest_to_host_u16(vq->endian, vring_used_event(&vq->vring));
269 
270 	if (vring_need_event(event_idx, new_idx, old_idx)) {
271 		vq->last_used_signalled = new_idx;
272 		return true;
273 	}
274 
275 	return false;
276 }
277 
278 void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev,
279 			       void *dev, u64 features)
280 {
281 	/* TODO: fail negotiation if features & ~host_features */
282 
283 	vdev->features |= features;
284 }
285 
286 void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev,
287 			  void *dev, u8 status)
288 {
289 	u32 ext_status = status;
290 
291 	vdev->status &= ~VIRTIO_CONFIG_S_MASK;
292 	vdev->status |= status;
293 
294 	/* Add a few hints to help devices */
295 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
296 	    !(vdev->status & VIRTIO__STATUS_START)) {
297 		vdev->status |= VIRTIO__STATUS_START;
298 		ext_status |= VIRTIO__STATUS_START;
299 
300 	} else if (!status && (vdev->status & VIRTIO__STATUS_START)) {
301 		vdev->status &= ~VIRTIO__STATUS_START;
302 		ext_status |= VIRTIO__STATUS_STOP;
303 
304 		/*
305 		 * Reset virtqueues and stop all traffic now, so that the device
306 		 * can safely reset the backend in notify_status().
307 		 */
308 		vdev->ops->reset(kvm, vdev);
309 	}
310 	if (!status)
311 		ext_status |= VIRTIO__STATUS_CONFIG;
312 
313 	if (vdev->ops->notify_status)
314 		vdev->ops->notify_status(kvm, dev, ext_status);
315 }
316 
317 bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev,
318 			  void *dev, unsigned long offset, void *data,
319 			  size_t size, bool is_write)
320 {
321 	void *in, *out, *config;
322 	size_t config_size = vdev->ops->get_config_size(kvm, dev);
323 
324 	if (WARN_ONCE(offset + size > config_size,
325 		      "Config access offset (%lu) is beyond config size (%zu)\n",
326 		      offset, config_size))
327 		return false;
328 
329 	config = vdev->ops->get_config(kvm, dev) + offset;
330 
331 	in = is_write ? data : config;
332 	out = is_write ? config : data;
333 
334 	switch (size) {
335 	case 1:
336 		*(u8 *)out = *(u8 *)in;
337 		break;
338 	case 2:
339 		*(u16 *)out = *(u16 *)in;
340 		break;
341 	case 4:
342 		*(u32 *)out = *(u32 *)in;
343 		break;
344 	case 8:
345 		*(u64 *)out = *(u64 *)in;
346 		break;
347 	default:
348 		WARN_ONCE(1, "%s: invalid access size\n", __func__);
349 		return false;
350 	}
351 
352 	return true;
353 }
354 
355 int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
356 		struct virtio_ops *ops, enum virtio_trans trans,
357 		int device_id, int subsys_id, int class)
358 {
359 	void *virtio;
360 	int r;
361 
362 	switch (trans) {
363 	case VIRTIO_PCI_LEGACY:
364 		vdev->legacy			= true;
365 		/* fall through */
366 	case VIRTIO_PCI:
367 		virtio = calloc(sizeof(struct virtio_pci), 1);
368 		if (!virtio)
369 			return -ENOMEM;
370 		vdev->virtio			= virtio;
371 		vdev->ops			= ops;
372 		vdev->ops->signal_vq		= virtio_pci__signal_vq;
373 		vdev->ops->signal_config	= virtio_pci__signal_config;
374 		vdev->ops->init			= virtio_pci__init;
375 		vdev->ops->exit			= virtio_pci__exit;
376 		vdev->ops->reset		= virtio_pci__reset;
377 		r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
378 		break;
379 	case VIRTIO_MMIO_LEGACY:
380 		vdev->legacy			= true;
381 		/* fall through */
382 	case VIRTIO_MMIO:
383 		virtio = calloc(sizeof(struct virtio_mmio), 1);
384 		if (!virtio)
385 			return -ENOMEM;
386 		vdev->virtio			= virtio;
387 		vdev->ops			= ops;
388 		vdev->ops->signal_vq		= virtio_mmio_signal_vq;
389 		vdev->ops->signal_config	= virtio_mmio_signal_config;
390 		vdev->ops->init			= virtio_mmio_init;
391 		vdev->ops->exit			= virtio_mmio_exit;
392 		vdev->ops->reset		= virtio_mmio_reset;
393 		r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
394 		break;
395 	default:
396 		r = -1;
397 	};
398 
399 	return r;
400 }
401 
402 int virtio_compat_add_message(const char *device, const char *config)
403 {
404 	int len = 1024;
405 	int compat_id;
406 	char *title;
407 	char *desc;
408 
409 	title = malloc(len);
410 	if (!title)
411 		return -ENOMEM;
412 
413 	desc = malloc(len);
414 	if (!desc) {
415 		free(title);
416 		return -ENOMEM;
417 	}
418 
419 	snprintf(title, len, "%s device was not detected.", device);
420 	snprintf(desc,  len, "While you have requested a %s device, "
421 			     "the guest kernel did not initialize it.\n"
422 			     "\tPlease make sure that the guest kernel was "
423 			     "compiled with %s=y enabled in .config.",
424 			     device, config);
425 
426 	compat_id = compat__add_message(title, desc);
427 
428 	free(desc);
429 	free(title);
430 
431 	return compat_id;
432 }
433