1 #include <linux/virtio_ring.h>
2 #include <linux/types.h>
3 #include <sys/uio.h>
4 #include <stdlib.h>
5
6 #include "kvm/guest_compat.h"
7 #include "kvm/barrier.h"
8 #include "kvm/virtio.h"
9 #include "kvm/virtio-pci.h"
10 #include "kvm/virtio-mmio.h"
11 #include "kvm/util.h"
12 #include "kvm/kvm.h"
13
14
virtio_trans_name(enum virtio_trans trans)15 const char* virtio_trans_name(enum virtio_trans trans)
16 {
17 if (trans == VIRTIO_PCI || trans == VIRTIO_PCI_LEGACY)
18 return "pci";
19 else if (trans == VIRTIO_MMIO || trans == VIRTIO_MMIO_LEGACY)
20 return "mmio";
21 return "unknown";
22 }
23
virtio_transport_parser(const struct option * opt,const char * arg,int unset)24 int virtio_transport_parser(const struct option *opt, const char *arg, int unset)
25 {
26 enum virtio_trans *type = opt->value;
27 struct kvm *kvm;
28
29 if (!strcmp(opt->long_name, "virtio-transport")) {
30 if (!strcmp(arg, "pci")) {
31 *type = VIRTIO_PCI;
32 } else if (!strcmp(arg, "pci-legacy")) {
33 *type = VIRTIO_PCI_LEGACY;
34 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
35 } else if (!strcmp(arg, "mmio")) {
36 *type = VIRTIO_MMIO;
37 } else if (!strcmp(arg, "mmio-legacy")) {
38 *type = VIRTIO_MMIO_LEGACY;
39 #endif
40 } else {
41 pr_err("virtio-transport: unknown type \"%s\"\n", arg);
42 return -1;
43 }
44 } else if (!strcmp(opt->long_name, "virtio-legacy")) {
45 *type = VIRTIO_PCI_LEGACY;
46 } else if (!strcmp(opt->long_name, "force-pci")) {
47 kvm = opt->ptr;
48 kvm->cfg.virtio_transport = VIRTIO_PCI;
49 }
50
51 return 0;
52 }
53
virt_queue__used_idx_advance(struct virt_queue * queue,u16 jump)54 void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump)
55 {
56 u16 idx = virtio_guest_to_host_u16(queue->endian,
57 queue->vring.used->idx);
58
59 /*
60 * Use wmb to assure that used elem was updated with head and len.
61 * We need a wmb here since we can't advance idx unless we're ready
62 * to pass the used element to the guest.
63 */
64 wmb();
65 idx += jump;
66 queue->vring.used->idx = virtio_host_to_guest_u16(queue->endian, idx);
67 }
68
69 struct vring_used_elem *
virt_queue__set_used_elem_no_update(struct virt_queue * queue,u32 head,u32 len,u16 offset)70 virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head,
71 u32 len, u16 offset)
72 {
73 struct vring_used_elem *used_elem;
74 u16 idx = virtio_guest_to_host_u16(queue->endian, queue->vring.used->idx);
75
76 idx += offset;
77 used_elem = &queue->vring.used->ring[idx % queue->vring.num];
78 used_elem->id = virtio_host_to_guest_u32(queue->endian, head);
79 used_elem->len = virtio_host_to_guest_u32(queue->endian, len);
80
81 return used_elem;
82 }
83
virt_queue__set_used_elem(struct virt_queue * queue,u32 head,u32 len)84 struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
85 {
86 struct vring_used_elem *used_elem;
87
88 used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0);
89 virt_queue__used_idx_advance(queue, 1);
90
91 return used_elem;
92 }
93
virt_desc__test_flag(struct virt_queue * vq,struct vring_desc * desc,u16 flag)94 static inline bool virt_desc__test_flag(struct virt_queue *vq,
95 struct vring_desc *desc, u16 flag)
96 {
97 return !!(virtio_guest_to_host_u16(vq->endian, desc->flags) & flag);
98 }
99
100 /*
101 * Each buffer in the virtqueues is actually a chain of descriptors. This
102 * function returns the next descriptor in the chain, or max if we're at the
103 * end.
104 */
next_desc(struct virt_queue * vq,struct vring_desc * desc,unsigned int i,unsigned int max)105 static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc,
106 unsigned int i, unsigned int max)
107 {
108 unsigned int next;
109
110 /* If this descriptor says it doesn't chain, we're done. */
111 if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT))
112 return max;
113
114 next = virtio_guest_to_host_u16(vq->endian, desc[i].next);
115
116 /* Ensure they're not leading us off end of descriptors. */
117 return min(next, max);
118 }
119
virt_queue__get_head_iov(struct virt_queue * vq,struct iovec iov[],u16 * out,u16 * in,u16 head,struct kvm * kvm)120 u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm)
121 {
122 struct vring_desc *desc;
123 u16 idx;
124 u16 max;
125
126 idx = head;
127 *out = *in = 0;
128 max = vq->vring.num;
129 desc = vq->vring.desc;
130
131 if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) {
132 max = virtio_guest_to_host_u32(vq->endian, desc[idx].len) / sizeof(struct vring_desc);
133 desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
134 idx = 0;
135 }
136
137 do {
138 /* Grab the first descriptor, and check it's OK. */
139 iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq->endian, desc[idx].len);
140 iov[*out + *in].iov_base = guest_flat_to_host(kvm,
141 virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
142 /* If this is an input descriptor, increment that count. */
143 if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE))
144 (*in)++;
145 else
146 (*out)++;
147 } while ((idx = next_desc(vq, desc, idx, max)) != max);
148
149 return head;
150 }
151
virt_queue__get_iov(struct virt_queue * vq,struct iovec iov[],u16 * out,u16 * in,struct kvm * kvm)152 u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm)
153 {
154 u16 head;
155
156 head = virt_queue__pop(vq);
157
158 return virt_queue__get_head_iov(vq, iov, out, in, head, kvm);
159 }
160
161 /* in and out are relative to guest */
virt_queue__get_inout_iov(struct kvm * kvm,struct virt_queue * queue,struct iovec in_iov[],struct iovec out_iov[],u16 * in,u16 * out)162 u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue,
163 struct iovec in_iov[], struct iovec out_iov[],
164 u16 *in, u16 *out)
165 {
166 struct vring_desc *desc;
167 u16 head, idx;
168
169 idx = head = virt_queue__pop(queue);
170 *out = *in = 0;
171 do {
172 u64 addr;
173 desc = virt_queue__get_desc(queue, idx);
174 addr = virtio_guest_to_host_u64(queue->endian, desc->addr);
175 if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) {
176 in_iov[*in].iov_base = guest_flat_to_host(kvm, addr);
177 in_iov[*in].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
178 (*in)++;
179 } else {
180 out_iov[*out].iov_base = guest_flat_to_host(kvm, addr);
181 out_iov[*out].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
182 (*out)++;
183 }
184 if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT))
185 idx = virtio_guest_to_host_u16(queue->endian, desc->next);
186 else
187 break;
188 } while (1);
189
190 return head;
191 }
192
virtio_init_device_vq(struct kvm * kvm,struct virtio_device * vdev,struct virt_queue * vq,size_t nr_descs)193 void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev,
194 struct virt_queue *vq, size_t nr_descs)
195 {
196 struct vring_addr *addr = &vq->vring_addr;
197
198 vq->endian = vdev->endian;
199 vq->use_event_idx = (vdev->features & (1UL << VIRTIO_RING_F_EVENT_IDX));
200 vq->enabled = true;
201 vq->vdev = vdev;
202
203 if (addr->legacy) {
204 unsigned long base = (u64)addr->pfn * addr->pgsize;
205 void *p = guest_flat_to_host(kvm, base);
206
207 vring_init(&vq->vring, nr_descs, p, addr->align);
208 } else {
209 u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo;
210 u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo;
211 u64 used = (u64)addr->used_hi << 32 | addr->used_lo;
212
213 vq->vring = (struct vring) {
214 .desc = guest_flat_to_host(kvm, desc),
215 .used = guest_flat_to_host(kvm, used),
216 .avail = guest_flat_to_host(kvm, avail),
217 .num = nr_descs,
218 };
219 }
220 }
221
virtio_exit_vq(struct kvm * kvm,struct virtio_device * vdev,void * dev,int num)222 void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev,
223 void *dev, int num)
224 {
225 struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num);
226
227 if (vq->enabled && vdev->ops->exit_vq)
228 vdev->ops->exit_vq(kvm, dev, num);
229 memset(vq, 0, sizeof(*vq));
230 }
231
virtio__get_dev_specific_field(int offset,bool msix,u32 * config_off)232 int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off)
233 {
234 if (msix) {
235 if (offset < 4)
236 return VIRTIO_PCI_O_MSIX;
237 else
238 offset -= 4;
239 }
240
241 *config_off = offset;
242
243 return VIRTIO_PCI_O_CONFIG;
244 }
245
virtio_queue__should_signal(struct virt_queue * vq)246 bool virtio_queue__should_signal(struct virt_queue *vq)
247 {
248 u16 old_idx, new_idx, event_idx;
249
250 /*
251 * Use mb to assure used idx has been increased before we signal the
252 * guest, and we don't read a stale value for used_event. Without a mb
253 * here we might not send a notification that we need to send, or the
254 * guest may ignore the queue since it won't see an updated idx.
255 */
256 mb();
257
258 if (!vq->use_event_idx) {
259 /*
260 * When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the
261 * guest if it didn't explicitly request to be left alone.
262 */
263 return !(virtio_guest_to_host_u16(vq->endian, vq->vring.avail->flags) &
264 VRING_AVAIL_F_NO_INTERRUPT);
265 }
266
267 old_idx = vq->last_used_signalled;
268 new_idx = virtio_guest_to_host_u16(vq->endian, vq->vring.used->idx);
269 event_idx = virtio_guest_to_host_u16(vq->endian, vring_used_event(&vq->vring));
270
271 if (vring_need_event(event_idx, new_idx, old_idx)) {
272 vq->last_used_signalled = new_idx;
273 return true;
274 }
275
276 return false;
277 }
278
virtio_set_guest_features(struct kvm * kvm,struct virtio_device * vdev,void * dev,u64 features)279 void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev,
280 void *dev, u64 features)
281 {
282 /* TODO: fail negotiation if features & ~host_features */
283
284 vdev->features |= features;
285 }
286
virtio_notify_status(struct kvm * kvm,struct virtio_device * vdev,void * dev,u8 status)287 void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev,
288 void *dev, u8 status)
289 {
290 u32 ext_status = status;
291
292 vdev->status &= ~VIRTIO_CONFIG_S_MASK;
293 vdev->status |= status;
294
295 /* Add a few hints to help devices */
296 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
297 !(vdev->status & VIRTIO__STATUS_START)) {
298 vdev->status |= VIRTIO__STATUS_START;
299 ext_status |= VIRTIO__STATUS_START;
300
301 } else if (!status && (vdev->status & VIRTIO__STATUS_START)) {
302 vdev->status &= ~VIRTIO__STATUS_START;
303 ext_status |= VIRTIO__STATUS_STOP;
304
305 /*
306 * Reset virtqueues and stop all traffic now, so that the device
307 * can safely reset the backend in notify_status().
308 */
309 vdev->ops->reset(kvm, vdev);
310 }
311 if (!status)
312 ext_status |= VIRTIO__STATUS_CONFIG;
313
314 if (vdev->ops->notify_status)
315 vdev->ops->notify_status(kvm, dev, ext_status);
316 }
317
virtio_access_config(struct kvm * kvm,struct virtio_device * vdev,void * dev,unsigned long offset,void * data,size_t size,bool is_write)318 bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev,
319 void *dev, unsigned long offset, void *data,
320 size_t size, bool is_write)
321 {
322 void *in, *out, *config;
323 size_t config_size = vdev->ops->get_config_size(kvm, dev);
324
325 if (WARN_ONCE(offset + size > config_size,
326 "Config access offset (%lu) is beyond config size (%zu)\n",
327 offset, config_size))
328 return false;
329
330 config = vdev->ops->get_config(kvm, dev) + offset;
331
332 in = is_write ? data : config;
333 out = is_write ? config : data;
334
335 switch (size) {
336 case 1:
337 *(u8 *)out = *(u8 *)in;
338 break;
339 case 2:
340 *(u16 *)out = *(u16 *)in;
341 break;
342 case 4:
343 *(u32 *)out = *(u32 *)in;
344 break;
345 case 8:
346 *(u64 *)out = *(u64 *)in;
347 break;
348 default:
349 WARN_ONCE(1, "%s: invalid access size\n", __func__);
350 return false;
351 }
352
353 return true;
354 }
355
virtio_init(struct kvm * kvm,void * dev,struct virtio_device * vdev,struct virtio_ops * ops,enum virtio_trans trans,int device_id,int subsys_id,int class)356 int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
357 struct virtio_ops *ops, enum virtio_trans trans,
358 int device_id, int subsys_id, int class)
359 {
360 void *virtio;
361 int r;
362
363 switch (trans) {
364 case VIRTIO_PCI_LEGACY:
365 vdev->legacy = true;
366 /* fall through */
367 case VIRTIO_PCI:
368 virtio = calloc(sizeof(struct virtio_pci), 1);
369 if (!virtio)
370 return -ENOMEM;
371 vdev->virtio = virtio;
372 vdev->ops = ops;
373 vdev->ops->signal_vq = virtio_pci__signal_vq;
374 vdev->ops->signal_config = virtio_pci__signal_config;
375 vdev->ops->init = virtio_pci__init;
376 vdev->ops->exit = virtio_pci__exit;
377 vdev->ops->reset = virtio_pci__reset;
378 r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
379 break;
380 case VIRTIO_MMIO_LEGACY:
381 vdev->legacy = true;
382 /* fall through */
383 case VIRTIO_MMIO:
384 virtio = calloc(sizeof(struct virtio_mmio), 1);
385 if (!virtio)
386 return -ENOMEM;
387 vdev->virtio = virtio;
388 vdev->ops = ops;
389 vdev->ops->signal_vq = virtio_mmio_signal_vq;
390 vdev->ops->signal_config = virtio_mmio_signal_config;
391 vdev->ops->init = virtio_mmio_init;
392 vdev->ops->exit = virtio_mmio_exit;
393 vdev->ops->reset = virtio_mmio_reset;
394 r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
395 break;
396 default:
397 r = -1;
398 };
399
400 return r;
401 }
402
virtio_exit(struct kvm * kvm,struct virtio_device * vdev)403 void virtio_exit(struct kvm *kvm, struct virtio_device *vdev)
404 {
405 if (vdev->ops && vdev->ops->exit)
406 vdev->ops->exit(kvm, vdev);
407 }
408
virtio_compat_add_message(const char * device,const char * config)409 int virtio_compat_add_message(const char *device, const char *config)
410 {
411 int len = 1024;
412 int compat_id;
413 char *title;
414 char *desc;
415
416 title = malloc(len);
417 if (!title)
418 return -ENOMEM;
419
420 desc = malloc(len);
421 if (!desc) {
422 free(title);
423 return -ENOMEM;
424 }
425
426 snprintf(title, len, "%s device was not detected.", device);
427 snprintf(desc, len, "While you have requested a %s device, "
428 "the guest kernel did not initialize it.\n"
429 "\tPlease make sure that the guest kernel was "
430 "compiled with %s=y enabled in .config.",
431 device, config);
432
433 compat_id = compat__add_message(title, desc);
434
435 free(desc);
436 free(title);
437
438 return compat_id;
439 }
440