xref: /kvmtool/virtio/core.c (revision 867b15ccd7dae9ba7a174f97d4fe76e90a79d957)
1 #include <linux/virtio_ring.h>
2 #include <linux/types.h>
3 #include <sys/uio.h>
4 #include <stdlib.h>
5 
6 #include "kvm/guest_compat.h"
7 #include "kvm/barrier.h"
8 #include "kvm/virtio.h"
9 #include "kvm/virtio-pci.h"
10 #include "kvm/virtio-mmio.h"
11 #include "kvm/util.h"
12 #include "kvm/kvm.h"
13 
14 
15 const char* virtio_trans_name(enum virtio_trans trans)
16 {
17 	if (trans == VIRTIO_PCI)
18 		return "pci";
19 	else if (trans == VIRTIO_MMIO)
20 		return "mmio";
21 	return "unknown";
22 }
23 
24 void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump)
25 {
26 	u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx);
27 
28 	/*
29 	 * Use wmb to assure that used elem was updated with head and len.
30 	 * We need a wmb here since we can't advance idx unless we're ready
31 	 * to pass the used element to the guest.
32 	 */
33 	wmb();
34 	idx += jump;
35 	queue->vring.used->idx = virtio_host_to_guest_u16(queue, idx);
36 }
37 
38 struct vring_used_elem *
39 virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head,
40 				    u32 len, u16 offset)
41 {
42 	struct vring_used_elem *used_elem;
43 	u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx);
44 
45 	idx += offset;
46 	used_elem	= &queue->vring.used->ring[idx % queue->vring.num];
47 	used_elem->id	= virtio_host_to_guest_u32(queue, head);
48 	used_elem->len	= virtio_host_to_guest_u32(queue, len);
49 
50 	return used_elem;
51 }
52 
53 struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
54 {
55 	struct vring_used_elem *used_elem;
56 
57 	used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0);
58 	virt_queue__used_idx_advance(queue, 1);
59 
60 	return used_elem;
61 }
62 
63 static inline bool virt_desc__test_flag(struct virt_queue *vq,
64 					struct vring_desc *desc, u16 flag)
65 {
66 	return !!(virtio_guest_to_host_u16(vq, desc->flags) & flag);
67 }
68 
69 /*
70  * Each buffer in the virtqueues is actually a chain of descriptors.  This
71  * function returns the next descriptor in the chain, or max if we're at the
72  * end.
73  */
74 static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc,
75 			  unsigned int i, unsigned int max)
76 {
77 	unsigned int next;
78 
79 	/* If this descriptor says it doesn't chain, we're done. */
80 	if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT))
81 		return max;
82 
83 	next = virtio_guest_to_host_u16(vq, desc[i].next);
84 
85 	/* Ensure they're not leading us off end of descriptors. */
86 	return min(next, max);
87 }
88 
89 u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm)
90 {
91 	struct vring_desc *desc;
92 	u16 idx;
93 	u16 max;
94 
95 	idx = head;
96 	*out = *in = 0;
97 	max = vq->vring.num;
98 	desc = vq->vring.desc;
99 
100 	if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) {
101 		max = virtio_guest_to_host_u32(vq, desc[idx].len) / sizeof(struct vring_desc);
102 		desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq, desc[idx].addr));
103 		idx = 0;
104 	}
105 
106 	do {
107 		/* Grab the first descriptor, and check it's OK. */
108 		iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq, desc[idx].len);
109 		iov[*out + *in].iov_base = guest_flat_to_host(kvm,
110 							      virtio_guest_to_host_u64(vq, desc[idx].addr));
111 		/* If this is an input descriptor, increment that count. */
112 		if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE))
113 			(*in)++;
114 		else
115 			(*out)++;
116 	} while ((idx = next_desc(vq, desc, idx, max)) != max);
117 
118 	return head;
119 }
120 
121 u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm)
122 {
123 	u16 head;
124 
125 	head = virt_queue__pop(vq);
126 
127 	return virt_queue__get_head_iov(vq, iov, out, in, head, kvm);
128 }
129 
130 /* in and out are relative to guest */
131 u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue,
132 			      struct iovec in_iov[], struct iovec out_iov[],
133 			      u16 *in, u16 *out)
134 {
135 	struct vring_desc *desc;
136 	u16 head, idx;
137 
138 	idx = head = virt_queue__pop(queue);
139 	*out = *in = 0;
140 	do {
141 		u64 addr;
142 		desc = virt_queue__get_desc(queue, idx);
143 		addr = virtio_guest_to_host_u64(queue, desc->addr);
144 		if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) {
145 			in_iov[*in].iov_base = guest_flat_to_host(kvm, addr);
146 			in_iov[*in].iov_len = virtio_guest_to_host_u32(queue, desc->len);
147 			(*in)++;
148 		} else {
149 			out_iov[*out].iov_base = guest_flat_to_host(kvm, addr);
150 			out_iov[*out].iov_len = virtio_guest_to_host_u32(queue, desc->len);
151 			(*out)++;
152 		}
153 		if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT))
154 			idx = virtio_guest_to_host_u16(queue, desc->next);
155 		else
156 			break;
157 	} while (1);
158 
159 	return head;
160 }
161 
162 void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev,
163 			   struct virt_queue *vq, size_t nr_descs)
164 {
165 	struct vring_addr *addr = &vq->vring_addr;
166 
167 	vq->endian		= vdev->endian;
168 	vq->use_event_idx	= (vdev->features & VIRTIO_RING_F_EVENT_IDX);
169 	vq->enabled		= true;
170 
171 	if (addr->legacy) {
172 		unsigned long base = (u64)addr->pfn * addr->pgsize;
173 		void *p = guest_flat_to_host(kvm, base);
174 
175 		vring_init(&vq->vring, nr_descs, p, addr->align);
176 	} else {
177 		u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo;
178 		u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo;
179 		u64 used = (u64)addr->used_hi << 32 | addr->used_lo;
180 
181 		vq->vring = (struct vring) {
182 			.desc	= guest_flat_to_host(kvm, desc),
183 			.used	= guest_flat_to_host(kvm, used),
184 			.avail	= guest_flat_to_host(kvm, avail),
185 			.num	= nr_descs,
186 		};
187 	}
188 }
189 
190 void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev,
191 			   void *dev, int num)
192 {
193 	struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num);
194 
195 	if (vq->enabled && vdev->ops->exit_vq)
196 		vdev->ops->exit_vq(kvm, dev, num);
197 	memset(vq, 0, sizeof(*vq));
198 }
199 
200 int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off)
201 {
202 	if (msix) {
203 		if (offset < 4)
204 			return VIRTIO_PCI_O_MSIX;
205 		else
206 			offset -= 4;
207 	}
208 
209 	*config_off = offset;
210 
211 	return VIRTIO_PCI_O_CONFIG;
212 }
213 
214 bool virtio_queue__should_signal(struct virt_queue *vq)
215 {
216 	u16 old_idx, new_idx, event_idx;
217 
218 	/*
219 	 * Use mb to assure used idx has been increased before we signal the
220 	 * guest, and we don't read a stale value for used_event. Without a mb
221 	 * here we might not send a notification that we need to send, or the
222 	 * guest may ignore the queue since it won't see an updated idx.
223 	 */
224 	mb();
225 
226 	if (!vq->use_event_idx) {
227 		/*
228 		 * When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the
229 		 * guest if it didn't explicitly request to be left alone.
230 		 */
231 		return !(virtio_guest_to_host_u16(vq, vq->vring.avail->flags) &
232 			 VRING_AVAIL_F_NO_INTERRUPT);
233 	}
234 
235 	old_idx		= vq->last_used_signalled;
236 	new_idx		= virtio_guest_to_host_u16(vq, vq->vring.used->idx);
237 	event_idx	= virtio_guest_to_host_u16(vq, vring_used_event(&vq->vring));
238 
239 	if (vring_need_event(event_idx, new_idx, old_idx)) {
240 		vq->last_used_signalled = new_idx;
241 		return true;
242 	}
243 
244 	return false;
245 }
246 
247 void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev,
248 			       void *dev, u32 features)
249 {
250 	/* TODO: fail negotiation if features & ~host_features */
251 
252 	vdev->features = features;
253 	vdev->ops->set_guest_features(kvm, dev, features);
254 }
255 
256 void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev,
257 			  void *dev, u8 status)
258 {
259 	u32 ext_status = status;
260 
261 	vdev->status &= ~VIRTIO_CONFIG_S_MASK;
262 	vdev->status |= status;
263 
264 	/* Add a few hints to help devices */
265 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
266 	    !(vdev->status & VIRTIO__STATUS_START)) {
267 		vdev->status |= VIRTIO__STATUS_START;
268 		ext_status |= VIRTIO__STATUS_START;
269 
270 	} else if (!status && (vdev->status & VIRTIO__STATUS_START)) {
271 		vdev->status &= ~VIRTIO__STATUS_START;
272 		ext_status |= VIRTIO__STATUS_STOP;
273 
274 		/*
275 		 * Reset virtqueues and stop all traffic now, so that the device
276 		 * can safely reset the backend in notify_status().
277 		 */
278 		vdev->ops->reset(kvm, vdev);
279 	}
280 	if (!status)
281 		ext_status |= VIRTIO__STATUS_CONFIG;
282 
283 	if (vdev->ops->notify_status)
284 		vdev->ops->notify_status(kvm, dev, ext_status);
285 }
286 
287 bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev,
288 			  void *dev, unsigned long offset, void *data,
289 			  size_t size, bool is_write)
290 {
291 	void *in, *out, *config;
292 	size_t config_size = vdev->ops->get_config_size(kvm, dev);
293 
294 	if (WARN_ONCE(offset + size > config_size,
295 		      "Config access offset (%lu) is beyond config size (%zu)\n",
296 		      offset, config_size))
297 		return false;
298 
299 	config = vdev->ops->get_config(kvm, dev) + offset;
300 
301 	in = is_write ? data : config;
302 	out = is_write ? config : data;
303 
304 	switch (size) {
305 	case 1:
306 		*(u8 *)out = *(u8 *)in;
307 		break;
308 	case 2:
309 		*(u16 *)out = *(u16 *)in;
310 		break;
311 	case 4:
312 		*(u32 *)out = *(u32 *)in;
313 		break;
314 	case 8:
315 		*(u64 *)out = *(u64 *)in;
316 		break;
317 	default:
318 		WARN_ONCE(1, "%s: invalid access size\n", __func__);
319 		return false;
320 	}
321 
322 	return true;
323 }
324 
325 int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
326 		struct virtio_ops *ops, enum virtio_trans trans,
327 		int device_id, int subsys_id, int class)
328 {
329 	void *virtio;
330 	int r;
331 
332 	switch (trans) {
333 	case VIRTIO_PCI:
334 		virtio = calloc(sizeof(struct virtio_pci), 1);
335 		if (!virtio)
336 			return -ENOMEM;
337 		vdev->virtio			= virtio;
338 		vdev->ops			= ops;
339 		vdev->ops->signal_vq		= virtio_pci__signal_vq;
340 		vdev->ops->signal_config	= virtio_pci__signal_config;
341 		vdev->ops->init			= virtio_pci__init;
342 		vdev->ops->exit			= virtio_pci__exit;
343 		vdev->ops->reset		= virtio_pci__reset;
344 		r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
345 		break;
346 	case VIRTIO_MMIO:
347 		virtio = calloc(sizeof(struct virtio_mmio), 1);
348 		if (!virtio)
349 			return -ENOMEM;
350 		vdev->virtio			= virtio;
351 		vdev->ops			= ops;
352 		vdev->ops->signal_vq		= virtio_mmio_signal_vq;
353 		vdev->ops->signal_config	= virtio_mmio_signal_config;
354 		vdev->ops->init			= virtio_mmio_init;
355 		vdev->ops->exit			= virtio_mmio_exit;
356 		vdev->ops->reset		= virtio_mmio_reset;
357 		r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
358 		break;
359 	default:
360 		r = -1;
361 	};
362 
363 	return r;
364 }
365 
366 int virtio_compat_add_message(const char *device, const char *config)
367 {
368 	int len = 1024;
369 	int compat_id;
370 	char *title;
371 	char *desc;
372 
373 	title = malloc(len);
374 	if (!title)
375 		return -ENOMEM;
376 
377 	desc = malloc(len);
378 	if (!desc) {
379 		free(title);
380 		return -ENOMEM;
381 	}
382 
383 	snprintf(title, len, "%s device was not detected.", device);
384 	snprintf(desc,  len, "While you have requested a %s device, "
385 			     "the guest kernel did not initialize it.\n"
386 			     "\tPlease make sure that the guest kernel was "
387 			     "compiled with %s=y enabled in .config.",
388 			     device, config);
389 
390 	compat_id = compat__add_message(title, desc);
391 
392 	free(desc);
393 	free(title);
394 
395 	return compat_id;
396 }
397