1 /*
2 * Virtio PCI Bindings
3 *
4 * Copyright IBM, Corp. 2007
5 * Copyright (c) 2009 CodeSourcery
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Paul Brook <paul@codesourcery.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2. See
12 * the COPYING file in the top-level directory.
13 *
14 * Contributions after 2012-01-13 are licensed under the terms of the
15 * GNU GPL, version 2 or (at your option) any later version.
16 */
17
18 #include "qemu/osdep.h"
19
20 #include "exec/memop.h"
21 #include "standard-headers/linux/virtio_pci.h"
22 #include "standard-headers/linux/virtio_ids.h"
23 #include "hw/boards.h"
24 #include "hw/virtio/virtio.h"
25 #include "migration/qemu-file-types.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/pci_bus.h"
28 #include "hw/qdev-properties.h"
29 #include "qapi/error.h"
30 #include "qemu/error-report.h"
31 #include "qemu/log.h"
32 #include "qemu/module.h"
33 #include "hw/pci/msi.h"
34 #include "hw/pci/msix.h"
35 #include "hw/loader.h"
36 #include "system/kvm.h"
37 #include "hw/virtio/virtio-pci.h"
38 #include "qemu/range.h"
39 #include "hw/virtio/virtio-bus.h"
40 #include "qapi/visitor.h"
41 #include "system/replay.h"
42 #include "trace.h"
43
44 #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
45
46 #undef VIRTIO_PCI_CONFIG
47
48 /* The remaining space is defined by each driver as the per-driver
49 * configuration space */
50 #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
51
52 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
53 VirtIOPCIProxy *dev);
54 static void virtio_pci_reset(DeviceState *qdev);
55
56 /* virtio device */
57 /* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
to_virtio_pci_proxy(DeviceState * d)58 static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
59 {
60 return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
61 }
62
63 /* DeviceState to VirtIOPCIProxy. Note: used on datapath,
64 * be careful and test performance if you change this.
65 */
to_virtio_pci_proxy_fast(DeviceState * d)66 static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
67 {
68 return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
69 }
70
virtio_pci_notify(DeviceState * d,uint16_t vector)71 static void virtio_pci_notify(DeviceState *d, uint16_t vector)
72 {
73 VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
74
75 if (msix_enabled(&proxy->pci_dev)) {
76 if (vector != VIRTIO_NO_VECTOR) {
77 msix_notify(&proxy->pci_dev, vector);
78 }
79 } else {
80 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
81 pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
82 }
83 }
84
virtio_pci_save_config(DeviceState * d,QEMUFile * f)85 static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
86 {
87 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
88 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
89
90 pci_device_save(&proxy->pci_dev, f);
91 msix_save(&proxy->pci_dev, f);
92 if (msix_present(&proxy->pci_dev))
93 qemu_put_be16(f, vdev->config_vector);
94 }
95
96 static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
97 .name = "virtio_pci/modern_queue_state",
98 .version_id = 1,
99 .minimum_version_id = 1,
100 .fields = (const VMStateField[]) {
101 VMSTATE_UINT16(num, VirtIOPCIQueue),
102 VMSTATE_UNUSED(1), /* enabled was stored as be16 */
103 VMSTATE_BOOL(enabled, VirtIOPCIQueue),
104 VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
105 VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
106 VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
107 VMSTATE_END_OF_LIST()
108 }
109 };
110
virtio_pci_modern_state_needed(void * opaque)111 static bool virtio_pci_modern_state_needed(void *opaque)
112 {
113 VirtIOPCIProxy *proxy = opaque;
114
115 return virtio_pci_modern(proxy);
116 }
117
118 static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
119 .name = "virtio_pci/modern_state",
120 .version_id = 1,
121 .minimum_version_id = 1,
122 .needed = &virtio_pci_modern_state_needed,
123 .fields = (const VMStateField[]) {
124 VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
125 VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
126 VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
127 VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
128 vmstate_virtio_pci_modern_queue_state,
129 VirtIOPCIQueue),
130 VMSTATE_END_OF_LIST()
131 }
132 };
133
134 static const VMStateDescription vmstate_virtio_pci = {
135 .name = "virtio_pci",
136 .version_id = 1,
137 .minimum_version_id = 1,
138 .fields = (const VMStateField[]) {
139 VMSTATE_END_OF_LIST()
140 },
141 .subsections = (const VMStateDescription * const []) {
142 &vmstate_virtio_pci_modern_state_sub,
143 NULL
144 }
145 };
146
virtio_pci_has_extra_state(DeviceState * d)147 static bool virtio_pci_has_extra_state(DeviceState *d)
148 {
149 return true;
150 }
151
virtio_pci_save_extra_state(DeviceState * d,QEMUFile * f)152 static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
153 {
154 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
155
156 vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
157 }
158
virtio_pci_load_extra_state(DeviceState * d,QEMUFile * f)159 static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
160 {
161 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
162
163 return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
164 }
165
virtio_pci_save_queue(DeviceState * d,int n,QEMUFile * f)166 static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
167 {
168 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
169 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
170
171 if (msix_present(&proxy->pci_dev))
172 qemu_put_be16(f, virtio_queue_vector(vdev, n));
173 }
174
virtio_pci_load_config(DeviceState * d,QEMUFile * f)175 static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
176 {
177 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
178 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
179 uint16_t vector;
180
181 int ret;
182 ret = pci_device_load(&proxy->pci_dev, f);
183 if (ret) {
184 return ret;
185 }
186 msix_unuse_all_vectors(&proxy->pci_dev);
187 msix_load(&proxy->pci_dev, f);
188 if (msix_present(&proxy->pci_dev)) {
189 qemu_get_be16s(f, &vector);
190
191 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
192 return -EINVAL;
193 }
194 } else {
195 vector = VIRTIO_NO_VECTOR;
196 }
197 vdev->config_vector = vector;
198 if (vector != VIRTIO_NO_VECTOR) {
199 msix_vector_use(&proxy->pci_dev, vector);
200 }
201 return 0;
202 }
203
virtio_pci_load_queue(DeviceState * d,int n,QEMUFile * f)204 static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
205 {
206 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
207 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
208
209 uint16_t vector;
210 if (msix_present(&proxy->pci_dev)) {
211 qemu_get_be16s(f, &vector);
212 if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
213 return -EINVAL;
214 }
215 } else {
216 vector = VIRTIO_NO_VECTOR;
217 }
218 virtio_queue_set_vector(vdev, n, vector);
219 if (vector != VIRTIO_NO_VECTOR) {
220 msix_vector_use(&proxy->pci_dev, vector);
221 }
222
223 return 0;
224 }
225
226 typedef struct VirtIOPCIIDInfo {
227 /* virtio id */
228 uint16_t vdev_id;
229 /* pci device id for the transitional device */
230 uint16_t trans_devid;
231 uint16_t class_id;
232 } VirtIOPCIIDInfo;
233
234 static const VirtIOPCIIDInfo virtio_pci_id_info[] = {
235 {
236 .vdev_id = VIRTIO_ID_CRYPTO,
237 .class_id = PCI_CLASS_OTHERS,
238 }, {
239 .vdev_id = VIRTIO_ID_FS,
240 .class_id = PCI_CLASS_STORAGE_OTHER,
241 }, {
242 .vdev_id = VIRTIO_ID_NET,
243 .trans_devid = PCI_DEVICE_ID_VIRTIO_NET,
244 .class_id = PCI_CLASS_NETWORK_ETHERNET,
245 }, {
246 .vdev_id = VIRTIO_ID_BLOCK,
247 .trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK,
248 .class_id = PCI_CLASS_STORAGE_SCSI,
249 }, {
250 .vdev_id = VIRTIO_ID_CONSOLE,
251 .trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE,
252 .class_id = PCI_CLASS_COMMUNICATION_OTHER,
253 }, {
254 .vdev_id = VIRTIO_ID_SCSI,
255 .trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI,
256 .class_id = PCI_CLASS_STORAGE_SCSI
257 }, {
258 .vdev_id = VIRTIO_ID_9P,
259 .trans_devid = PCI_DEVICE_ID_VIRTIO_9P,
260 .class_id = PCI_BASE_CLASS_NETWORK,
261 }, {
262 .vdev_id = VIRTIO_ID_BALLOON,
263 .trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON,
264 .class_id = PCI_CLASS_OTHERS,
265 }, {
266 .vdev_id = VIRTIO_ID_RNG,
267 .trans_devid = PCI_DEVICE_ID_VIRTIO_RNG,
268 .class_id = PCI_CLASS_OTHERS,
269 },
270 };
271
virtio_pci_get_id_info(uint16_t vdev_id)272 static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id)
273 {
274 const VirtIOPCIIDInfo *info = NULL;
275 int i;
276
277 for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) {
278 if (virtio_pci_id_info[i].vdev_id == vdev_id) {
279 info = &virtio_pci_id_info[i];
280 break;
281 }
282 }
283
284 if (!info) {
285 /* The device id is invalid or not added to the id_info yet. */
286 error_report("Invalid virtio device(id %u)", vdev_id);
287 abort();
288 }
289
290 return info;
291 }
292
293 /*
294 * Get the Transitional Device ID for the specific device, return
295 * zero if the device is non-transitional.
296 */
virtio_pci_get_trans_devid(uint16_t device_id)297 uint16_t virtio_pci_get_trans_devid(uint16_t device_id)
298 {
299 return virtio_pci_get_id_info(device_id)->trans_devid;
300 }
301
302 /*
303 * Get the Class ID for the specific device.
304 */
virtio_pci_get_class_id(uint16_t device_id)305 uint16_t virtio_pci_get_class_id(uint16_t device_id)
306 {
307 return virtio_pci_get_id_info(device_id)->class_id;
308 }
309
virtio_pci_ioeventfd_enabled(DeviceState * d)310 static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
311 {
312 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
313
314 return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
315 }
316
317 #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
318
virtio_pci_queue_mem_mult(struct VirtIOPCIProxy * proxy)319 static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
320 {
321 return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
322 QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
323 }
324
virtio_pci_ioeventfd_assign(DeviceState * d,EventNotifier * notifier,int n,bool assign)325 static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
326 int n, bool assign)
327 {
328 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
329 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
330 VirtQueue *vq = virtio_get_queue(vdev, n);
331 bool legacy = virtio_pci_legacy(proxy);
332 bool modern = virtio_pci_modern(proxy);
333 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
334 MemoryRegion *modern_mr = &proxy->notify.mr;
335 MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
336 MemoryRegion *legacy_mr = &proxy->bar;
337 hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
338 virtio_get_queue_index(vq);
339 hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
340
341 if (assign) {
342 if (modern) {
343 memory_region_add_eventfd(modern_mr, modern_addr, 0,
344 false, n, notifier);
345 if (modern_pio) {
346 memory_region_add_eventfd(modern_notify_mr, 0, 2,
347 true, n, notifier);
348 }
349 }
350 if (legacy) {
351 memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
352 true, n, notifier);
353 }
354 } else {
355 if (modern) {
356 memory_region_del_eventfd(modern_mr, modern_addr, 0,
357 false, n, notifier);
358 if (modern_pio) {
359 memory_region_del_eventfd(modern_notify_mr, 0, 2,
360 true, n, notifier);
361 }
362 }
363 if (legacy) {
364 memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
365 true, n, notifier);
366 }
367 }
368 return 0;
369 }
370
virtio_pci_start_ioeventfd(VirtIOPCIProxy * proxy)371 static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
372 {
373 virtio_bus_start_ioeventfd(&proxy->bus);
374 }
375
virtio_pci_stop_ioeventfd(VirtIOPCIProxy * proxy)376 static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
377 {
378 virtio_bus_stop_ioeventfd(&proxy->bus);
379 }
380
virtio_ioport_write(void * opaque,uint32_t addr,uint32_t val)381 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
382 {
383 VirtIOPCIProxy *proxy = opaque;
384 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
385 uint16_t vector, vq_idx;
386 hwaddr pa;
387
388 switch (addr) {
389 case VIRTIO_PCI_GUEST_FEATURES:
390 /* Guest does not negotiate properly? We have to assume nothing. */
391 if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
392 val = virtio_bus_get_vdev_bad_features(&proxy->bus);
393 }
394 virtio_set_features(vdev, val);
395 break;
396 case VIRTIO_PCI_QUEUE_PFN:
397 pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
398 if (pa == 0) {
399 virtio_pci_reset(DEVICE(proxy));
400 }
401 else
402 virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
403 break;
404 case VIRTIO_PCI_QUEUE_SEL:
405 if (val < VIRTIO_QUEUE_MAX)
406 vdev->queue_sel = val;
407 break;
408 case VIRTIO_PCI_QUEUE_NOTIFY:
409 vq_idx = val;
410 if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
411 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
412 VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
413
414 virtio_queue_set_shadow_avail_idx(vq, val >> 16);
415 }
416 virtio_queue_notify(vdev, vq_idx);
417 }
418 break;
419 case VIRTIO_PCI_STATUS:
420 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
421 virtio_pci_stop_ioeventfd(proxy);
422 }
423
424 virtio_set_status(vdev, val & 0xFF);
425
426 if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
427 virtio_pci_start_ioeventfd(proxy);
428 }
429
430 if (vdev->status == 0) {
431 virtio_pci_reset(DEVICE(proxy));
432 }
433
434 /* Linux before 2.6.34 drives the device without enabling
435 the PCI device bus master bit. Enable it automatically
436 for the guest. This is a PCI spec violation but so is
437 initiating DMA with bus master bit clear. */
438 if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
439 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
440 proxy->pci_dev.config[PCI_COMMAND] |
441 PCI_COMMAND_MASTER, 1);
442 }
443 break;
444 case VIRTIO_MSI_CONFIG_VECTOR:
445 if (vdev->config_vector != VIRTIO_NO_VECTOR) {
446 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
447 }
448 /* Make it possible for guest to discover an error took place. */
449 if (val < proxy->nvectors) {
450 msix_vector_use(&proxy->pci_dev, val);
451 } else {
452 val = VIRTIO_NO_VECTOR;
453 }
454 vdev->config_vector = val;
455 break;
456 case VIRTIO_MSI_QUEUE_VECTOR:
457 vector = virtio_queue_vector(vdev, vdev->queue_sel);
458 if (vector != VIRTIO_NO_VECTOR) {
459 msix_vector_unuse(&proxy->pci_dev, vector);
460 }
461 /* Make it possible for guest to discover an error took place. */
462 if (val < proxy->nvectors) {
463 msix_vector_use(&proxy->pci_dev, val);
464 } else {
465 val = VIRTIO_NO_VECTOR;
466 }
467 virtio_queue_set_vector(vdev, vdev->queue_sel, val);
468 break;
469 default:
470 qemu_log_mask(LOG_GUEST_ERROR,
471 "%s: unexpected address 0x%x value 0x%x\n",
472 __func__, addr, val);
473 break;
474 }
475 }
476
virtio_ioport_read(VirtIOPCIProxy * proxy,uint32_t addr)477 static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
478 {
479 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
480 uint32_t ret = 0xFFFFFFFF;
481
482 switch (addr) {
483 case VIRTIO_PCI_HOST_FEATURES:
484 ret = vdev->host_features;
485 break;
486 case VIRTIO_PCI_GUEST_FEATURES:
487 ret = vdev->guest_features;
488 break;
489 case VIRTIO_PCI_QUEUE_PFN:
490 ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
491 >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
492 break;
493 case VIRTIO_PCI_QUEUE_NUM:
494 ret = virtio_queue_get_num(vdev, vdev->queue_sel);
495 break;
496 case VIRTIO_PCI_QUEUE_SEL:
497 ret = vdev->queue_sel;
498 break;
499 case VIRTIO_PCI_STATUS:
500 ret = vdev->status;
501 break;
502 case VIRTIO_PCI_ISR:
503 /* reading from the ISR also clears it. */
504 ret = qatomic_xchg(&vdev->isr, 0);
505 pci_irq_deassert(&proxy->pci_dev);
506 break;
507 case VIRTIO_MSI_CONFIG_VECTOR:
508 ret = vdev->config_vector;
509 break;
510 case VIRTIO_MSI_QUEUE_VECTOR:
511 ret = virtio_queue_vector(vdev, vdev->queue_sel);
512 break;
513 default:
514 break;
515 }
516
517 return ret;
518 }
519
virtio_pci_config_read(void * opaque,hwaddr addr,unsigned size)520 static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
521 unsigned size)
522 {
523 VirtIOPCIProxy *proxy = opaque;
524 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
525 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
526 uint64_t val = 0;
527
528 if (vdev == NULL) {
529 return UINT64_MAX;
530 }
531
532 if (addr < config) {
533 return virtio_ioport_read(proxy, addr);
534 }
535 addr -= config;
536
537 switch (size) {
538 case 1:
539 val = virtio_config_readb(vdev, addr);
540 break;
541 case 2:
542 val = virtio_config_readw(vdev, addr);
543 if (virtio_is_big_endian(vdev)) {
544 val = bswap16(val);
545 }
546 break;
547 case 4:
548 val = virtio_config_readl(vdev, addr);
549 if (virtio_is_big_endian(vdev)) {
550 val = bswap32(val);
551 }
552 break;
553 }
554 return val;
555 }
556
virtio_pci_config_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)557 static void virtio_pci_config_write(void *opaque, hwaddr addr,
558 uint64_t val, unsigned size)
559 {
560 VirtIOPCIProxy *proxy = opaque;
561 uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
562 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
563
564 if (vdev == NULL) {
565 return;
566 }
567
568 if (addr < config) {
569 virtio_ioport_write(proxy, addr, val);
570 return;
571 }
572 addr -= config;
573 /*
574 * Virtio-PCI is odd. Ioports are LE but config space is target native
575 * endian.
576 */
577 switch (size) {
578 case 1:
579 virtio_config_writeb(vdev, addr, val);
580 break;
581 case 2:
582 if (virtio_is_big_endian(vdev)) {
583 val = bswap16(val);
584 }
585 virtio_config_writew(vdev, addr, val);
586 break;
587 case 4:
588 if (virtio_is_big_endian(vdev)) {
589 val = bswap32(val);
590 }
591 virtio_config_writel(vdev, addr, val);
592 break;
593 }
594 }
595
596 static const MemoryRegionOps virtio_pci_config_ops = {
597 .read = virtio_pci_config_read,
598 .write = virtio_pci_config_write,
599 .impl = {
600 .min_access_size = 1,
601 .max_access_size = 4,
602 },
603 .endianness = DEVICE_LITTLE_ENDIAN,
604 };
605
virtio_address_space_lookup(VirtIOPCIProxy * proxy,hwaddr * off,int len)606 static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
607 hwaddr *off, int len)
608 {
609 int i;
610 VirtIOPCIRegion *reg;
611
612 for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
613 reg = &proxy->regs[i];
614 if (*off >= reg->offset &&
615 *off + len <= reg->offset + reg->size) {
616 MemoryRegionSection mrs = memory_region_find(®->mr,
617 *off - reg->offset, len);
618 assert(mrs.mr);
619 *off = mrs.offset_within_region;
620 memory_region_unref(mrs.mr);
621 return mrs.mr;
622 }
623 }
624
625 return NULL;
626 }
627
628 /* Below are generic functions to do memcpy from/to an address space,
629 * without byteswaps, with input validation.
630 *
631 * As regular address_space_* APIs all do some kind of byteswap at least for
632 * some host/target combinations, we are forced to explicitly convert to a
633 * known-endianness integer value.
634 * It doesn't really matter which endian format to go through, so the code
635 * below selects the endian that causes the least amount of work on the given
636 * host.
637 *
638 * Note: host pointer must be aligned.
639 */
640 static
virtio_address_space_write(VirtIOPCIProxy * proxy,hwaddr addr,const uint8_t * buf,int len)641 void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
642 const uint8_t *buf, int len)
643 {
644 uint64_t val;
645 MemoryRegion *mr;
646
647 /* address_space_* APIs assume an aligned address.
648 * As address is under guest control, handle illegal values.
649 */
650 addr &= ~(len - 1);
651
652 mr = virtio_address_space_lookup(proxy, &addr, len);
653 if (!mr) {
654 return;
655 }
656
657 /* Make sure caller aligned buf properly */
658 assert(!(((uintptr_t)buf) & (len - 1)));
659
660 switch (len) {
661 case 1:
662 val = pci_get_byte(buf);
663 break;
664 case 2:
665 val = pci_get_word(buf);
666 break;
667 case 4:
668 val = pci_get_long(buf);
669 break;
670 default:
671 /* As length is under guest control, handle illegal values. */
672 return;
673 }
674 memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
675 MEMTXATTRS_UNSPECIFIED);
676 }
677
678 static void
virtio_address_space_read(VirtIOPCIProxy * proxy,hwaddr addr,uint8_t * buf,int len)679 virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
680 uint8_t *buf, int len)
681 {
682 uint64_t val;
683 MemoryRegion *mr;
684
685 /* address_space_* APIs assume an aligned address.
686 * As address is under guest control, handle illegal values.
687 */
688 addr &= ~(len - 1);
689
690 mr = virtio_address_space_lookup(proxy, &addr, len);
691 if (!mr) {
692 return;
693 }
694
695 /* Make sure caller aligned buf properly */
696 assert(!(((uintptr_t)buf) & (len - 1)));
697
698 memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
699 MEMTXATTRS_UNSPECIFIED);
700 switch (len) {
701 case 1:
702 pci_set_byte(buf, val);
703 break;
704 case 2:
705 pci_set_word(buf, val);
706 break;
707 case 4:
708 pci_set_long(buf, val);
709 break;
710 default:
711 /* As length is under guest control, handle illegal values. */
712 break;
713 }
714 }
715
virtio_pci_ats_ctrl_trigger(PCIDevice * pci_dev,bool enable)716 static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable)
717 {
718 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
719 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
720 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
721
722 vdev->device_iotlb_enabled = enable;
723
724 if (k->toggle_device_iotlb) {
725 k->toggle_device_iotlb(vdev);
726 }
727 }
728
pcie_ats_config_write(PCIDevice * dev,uint32_t address,uint32_t val,int len)729 static void pcie_ats_config_write(PCIDevice *dev, uint32_t address,
730 uint32_t val, int len)
731 {
732 uint32_t off;
733 uint16_t ats_cap = dev->exp.ats_cap;
734
735 if (!ats_cap || address < ats_cap) {
736 return;
737 }
738 off = address - ats_cap;
739 if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
740 return;
741 }
742
743 if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {
744 virtio_pci_ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
745 }
746 }
747
virtio_write_config(PCIDevice * pci_dev,uint32_t address,uint32_t val,int len)748 static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
749 uint32_t val, int len)
750 {
751 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
752 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
753 struct virtio_pci_cfg_cap *cfg;
754
755 pci_default_write_config(pci_dev, address, val, len);
756
757 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
758 pcie_cap_flr_write_config(pci_dev, address, val, len);
759 }
760
761 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
762 pcie_ats_config_write(pci_dev, address, val, len);
763 }
764
765 if (range_covers_byte(address, len, PCI_COMMAND)) {
766 if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
767 virtio_set_disabled(vdev, true);
768 virtio_pci_stop_ioeventfd(proxy);
769 virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
770 } else {
771 virtio_set_disabled(vdev, false);
772 }
773 }
774
775 if (proxy->config_cap &&
776 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
777 pci_cfg_data),
778 sizeof cfg->pci_cfg_data)) {
779 uint32_t off;
780 uint32_t caplen;
781
782 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
783 off = le32_to_cpu(cfg->cap.offset);
784 caplen = le32_to_cpu(cfg->cap.length);
785
786 if (caplen == 1 || caplen == 2 || caplen == 4) {
787 assert(caplen <= sizeof cfg->pci_cfg_data);
788 virtio_address_space_write(proxy, off, cfg->pci_cfg_data, caplen);
789 }
790 }
791 }
792
virtio_read_config(PCIDevice * pci_dev,uint32_t address,int len)793 static uint32_t virtio_read_config(PCIDevice *pci_dev,
794 uint32_t address, int len)
795 {
796 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
797 struct virtio_pci_cfg_cap *cfg;
798
799 if (proxy->config_cap &&
800 ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
801 pci_cfg_data),
802 sizeof cfg->pci_cfg_data)) {
803 uint32_t off;
804 uint32_t caplen;
805
806 cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
807 off = le32_to_cpu(cfg->cap.offset);
808 caplen = le32_to_cpu(cfg->cap.length);
809
810 if (caplen == 1 || caplen == 2 || caplen == 4) {
811 assert(caplen <= sizeof cfg->pci_cfg_data);
812 virtio_address_space_read(proxy, off, cfg->pci_cfg_data, caplen);
813 }
814 }
815
816 return pci_default_read_config(pci_dev, address, len);
817 }
818
kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy * proxy,unsigned int vector)819 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
820 unsigned int vector)
821 {
822 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
823 int ret;
824
825 if (irqfd->users == 0) {
826 KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
827 ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
828 if (ret < 0) {
829 return ret;
830 }
831 kvm_irqchip_commit_route_changes(&c);
832 irqfd->virq = ret;
833 }
834 irqfd->users++;
835 return 0;
836 }
837
kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy * proxy,unsigned int vector)838 static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
839 unsigned int vector)
840 {
841 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
842 if (--irqfd->users == 0) {
843 kvm_irqchip_release_virq(kvm_state, irqfd->virq);
844 }
845 }
846
kvm_virtio_pci_irqfd_use(VirtIOPCIProxy * proxy,EventNotifier * n,unsigned int vector)847 static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
848 EventNotifier *n,
849 unsigned int vector)
850 {
851 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
852 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
853 }
854
kvm_virtio_pci_irqfd_release(VirtIOPCIProxy * proxy,EventNotifier * n,unsigned int vector)855 static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
856 EventNotifier *n ,
857 unsigned int vector)
858 {
859 VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
860 int ret;
861
862 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
863 assert(ret == 0);
864 }
virtio_pci_get_notifier(VirtIOPCIProxy * proxy,int queue_no,EventNotifier ** n,unsigned int * vector)865 static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
866 EventNotifier **n, unsigned int *vector)
867 {
868 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
869 VirtQueue *vq;
870
871 if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
872 return -1;
873
874 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
875 *n = virtio_config_get_guest_notifier(vdev);
876 *vector = vdev->config_vector;
877 } else {
878 if (!virtio_queue_get_num(vdev, queue_no)) {
879 return -1;
880 }
881 *vector = virtio_queue_vector(vdev, queue_no);
882 vq = virtio_get_queue(vdev, queue_no);
883 *n = virtio_queue_get_guest_notifier(vq);
884 }
885 return 0;
886 }
887
kvm_virtio_pci_vector_use_one(VirtIOPCIProxy * proxy,int queue_no)888 static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
889 {
890 unsigned int vector;
891 int ret;
892 EventNotifier *n;
893 PCIDevice *dev = &proxy->pci_dev;
894 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
895 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
896
897 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
898 if (ret < 0) {
899 return ret;
900 }
901 if (vector >= msix_nr_vectors_allocated(dev)) {
902 return 0;
903 }
904 ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
905 if (ret < 0) {
906 return ret;
907 }
908 /*
909 * If guest supports masking, set up irqfd now.
910 * Otherwise, delay until unmasked in the frontend.
911 */
912 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
913 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
914 if (ret < 0) {
915 kvm_virtio_pci_vq_vector_release(proxy, vector);
916 return ret;
917 }
918 }
919
920 return 0;
921 }
kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy * proxy,int nvqs)922 static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs)
923 {
924 int queue_no;
925 int ret = 0;
926 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
927
928 for (queue_no = 0; queue_no < nvqs; queue_no++) {
929 if (!virtio_queue_get_num(vdev, queue_no)) {
930 return -1;
931 }
932 ret = kvm_virtio_pci_vector_use_one(proxy, queue_no);
933 }
934 return ret;
935 }
936
kvm_virtio_pci_vector_config_use(VirtIOPCIProxy * proxy)937 static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy)
938 {
939 return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
940 }
941
kvm_virtio_pci_vector_release_one(VirtIOPCIProxy * proxy,int queue_no)942 static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
943 int queue_no)
944 {
945 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
946 unsigned int vector;
947 EventNotifier *n;
948 int ret;
949 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
950 PCIDevice *dev = &proxy->pci_dev;
951
952 ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
953 if (ret < 0) {
954 return;
955 }
956 if (vector >= msix_nr_vectors_allocated(dev)) {
957 return;
958 }
959 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
960 kvm_virtio_pci_irqfd_release(proxy, n, vector);
961 }
962 kvm_virtio_pci_vq_vector_release(proxy, vector);
963 }
964
kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy * proxy,int nvqs)965 static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs)
966 {
967 int queue_no;
968 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
969
970 for (queue_no = 0; queue_no < nvqs; queue_no++) {
971 if (!virtio_queue_get_num(vdev, queue_no)) {
972 break;
973 }
974 kvm_virtio_pci_vector_release_one(proxy, queue_no);
975 }
976 }
977
kvm_virtio_pci_vector_config_release(VirtIOPCIProxy * proxy)978 static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy)
979 {
980 kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
981 }
982
virtio_pci_one_vector_unmask(VirtIOPCIProxy * proxy,unsigned int queue_no,unsigned int vector,MSIMessage msg,EventNotifier * n)983 static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
984 unsigned int queue_no,
985 unsigned int vector,
986 MSIMessage msg,
987 EventNotifier *n)
988 {
989 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
990 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
991 VirtIOIRQFD *irqfd;
992 int ret = 0;
993
994 if (proxy->vector_irqfd) {
995 irqfd = &proxy->vector_irqfd[vector];
996 if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
997 ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
998 &proxy->pci_dev);
999 if (ret < 0) {
1000 return ret;
1001 }
1002 kvm_irqchip_commit_routes(kvm_state);
1003 }
1004 }
1005
1006 /* If guest supports masking, irqfd is already setup, unmask it.
1007 * Otherwise, set it up now.
1008 */
1009 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
1010 k->guest_notifier_mask(vdev, queue_no, false);
1011 /* Test after unmasking to avoid losing events. */
1012 if (k->guest_notifier_pending &&
1013 k->guest_notifier_pending(vdev, queue_no)) {
1014 event_notifier_set(n);
1015 }
1016 } else {
1017 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
1018 }
1019 return ret;
1020 }
1021
virtio_pci_one_vector_mask(VirtIOPCIProxy * proxy,unsigned int queue_no,unsigned int vector,EventNotifier * n)1022 static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy,
1023 unsigned int queue_no,
1024 unsigned int vector,
1025 EventNotifier *n)
1026 {
1027 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1028 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1029
1030 /* If guest supports masking, keep irqfd but mask it.
1031 * Otherwise, clean it up now.
1032 */
1033 if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
1034 k->guest_notifier_mask(vdev, queue_no, true);
1035 } else {
1036 kvm_virtio_pci_irqfd_release(proxy, n, vector);
1037 }
1038 }
1039
virtio_pci_vector_unmask(PCIDevice * dev,unsigned vector,MSIMessage msg)1040 static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
1041 MSIMessage msg)
1042 {
1043 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1044 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1045 VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
1046 EventNotifier *n;
1047 int ret, index, unmasked = 0;
1048
1049 while (vq) {
1050 index = virtio_get_queue_index(vq);
1051 if (!virtio_queue_get_num(vdev, index)) {
1052 break;
1053 }
1054 if (index < proxy->nvqs_with_notifiers) {
1055 n = virtio_queue_get_guest_notifier(vq);
1056 ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n);
1057 if (ret < 0) {
1058 goto undo;
1059 }
1060 ++unmasked;
1061 }
1062 vq = virtio_vector_next_queue(vq);
1063 }
1064 /* unmask config intr */
1065 if (vector == vdev->config_vector) {
1066 n = virtio_config_get_guest_notifier(vdev);
1067 ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector,
1068 msg, n);
1069 if (ret < 0) {
1070 goto undo_config;
1071 }
1072 }
1073 return 0;
1074 undo_config:
1075 n = virtio_config_get_guest_notifier(vdev);
1076 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
1077 undo:
1078 vq = virtio_vector_first_queue(vdev, vector);
1079 while (vq && unmasked >= 0) {
1080 index = virtio_get_queue_index(vq);
1081 if (index < proxy->nvqs_with_notifiers) {
1082 n = virtio_queue_get_guest_notifier(vq);
1083 virtio_pci_one_vector_mask(proxy, index, vector, n);
1084 --unmasked;
1085 }
1086 vq = virtio_vector_next_queue(vq);
1087 }
1088 return ret;
1089 }
1090
virtio_pci_vector_mask(PCIDevice * dev,unsigned vector)1091 static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
1092 {
1093 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1094 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1095 VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
1096 EventNotifier *n;
1097 int index;
1098
1099 while (vq) {
1100 index = virtio_get_queue_index(vq);
1101 n = virtio_queue_get_guest_notifier(vq);
1102 if (!virtio_queue_get_num(vdev, index)) {
1103 break;
1104 }
1105 if (index < proxy->nvqs_with_notifiers) {
1106 virtio_pci_one_vector_mask(proxy, index, vector, n);
1107 }
1108 vq = virtio_vector_next_queue(vq);
1109 }
1110
1111 if (vector == vdev->config_vector) {
1112 n = virtio_config_get_guest_notifier(vdev);
1113 virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
1114 }
1115 }
1116
virtio_pci_vector_poll(PCIDevice * dev,unsigned int vector_start,unsigned int vector_end)1117 static void virtio_pci_vector_poll(PCIDevice *dev,
1118 unsigned int vector_start,
1119 unsigned int vector_end)
1120 {
1121 VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
1122 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1123 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1124 int queue_no;
1125 unsigned int vector;
1126 EventNotifier *notifier;
1127 int ret;
1128
1129 for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
1130 ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector);
1131 if (ret < 0) {
1132 break;
1133 }
1134 if (vector < vector_start || vector >= vector_end ||
1135 !msix_is_masked(dev, vector)) {
1136 continue;
1137 }
1138 if (k->guest_notifier_pending) {
1139 if (k->guest_notifier_pending(vdev, queue_no)) {
1140 msix_set_pending(dev, vector);
1141 }
1142 } else if (event_notifier_test_and_clear(notifier)) {
1143 msix_set_pending(dev, vector);
1144 }
1145 }
1146 /* poll the config intr */
1147 ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier,
1148 &vector);
1149 if (ret < 0) {
1150 return;
1151 }
1152 if (vector < vector_start || vector >= vector_end ||
1153 !msix_is_masked(dev, vector)) {
1154 return;
1155 }
1156 if (k->guest_notifier_pending) {
1157 if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) {
1158 msix_set_pending(dev, vector);
1159 }
1160 } else if (event_notifier_test_and_clear(notifier)) {
1161 msix_set_pending(dev, vector);
1162 }
1163 }
1164
virtio_pci_set_guest_notifier_fd_handler(VirtIODevice * vdev,VirtQueue * vq,int n,bool assign,bool with_irqfd)1165 void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
1166 int n, bool assign,
1167 bool with_irqfd)
1168 {
1169 if (n == VIRTIO_CONFIG_IRQ_IDX) {
1170 virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
1171 } else {
1172 virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd);
1173 }
1174 }
1175
virtio_pci_set_guest_notifier(DeviceState * d,int n,bool assign,bool with_irqfd)1176 static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
1177 bool with_irqfd)
1178 {
1179 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1180 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1181 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1182 VirtQueue *vq = NULL;
1183 EventNotifier *notifier = NULL;
1184
1185 if (n == VIRTIO_CONFIG_IRQ_IDX) {
1186 notifier = virtio_config_get_guest_notifier(vdev);
1187 } else {
1188 vq = virtio_get_queue(vdev, n);
1189 notifier = virtio_queue_get_guest_notifier(vq);
1190 }
1191
1192 if (assign) {
1193 int r = event_notifier_init(notifier, 0);
1194 if (r < 0) {
1195 return r;
1196 }
1197 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd);
1198 } else {
1199 virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false,
1200 with_irqfd);
1201 event_notifier_cleanup(notifier);
1202 }
1203
1204 if (!msix_enabled(&proxy->pci_dev) &&
1205 vdev->use_guest_notifier_mask &&
1206 vdc->guest_notifier_mask) {
1207 vdc->guest_notifier_mask(vdev, n, !assign);
1208 }
1209
1210 return 0;
1211 }
1212
virtio_pci_query_guest_notifiers(DeviceState * d)1213 static bool virtio_pci_query_guest_notifiers(DeviceState *d)
1214 {
1215 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1216
1217 if (msix_enabled(&proxy->pci_dev)) {
1218 return true;
1219 } else {
1220 return pci_irq_disabled(&proxy->pci_dev);
1221 }
1222 }
1223
virtio_pci_set_guest_notifiers(DeviceState * d,int nvqs,bool assign)1224 static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
1225 {
1226 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1227 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1228 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1229 int r, n;
1230 bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
1231 kvm_msi_via_irqfd_enabled();
1232
1233 nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
1234
1235 /*
1236 * When deassigning, pass a consistent nvqs value to avoid leaking
1237 * notifiers. But first check we've actually been configured, exit
1238 * early if we haven't.
1239 */
1240 if (!assign && !proxy->nvqs_with_notifiers) {
1241 return 0;
1242 }
1243 assert(assign || nvqs == proxy->nvqs_with_notifiers);
1244
1245 proxy->nvqs_with_notifiers = nvqs;
1246
1247 /* Must unset vector notifier while guest notifier is still assigned */
1248 if ((proxy->vector_irqfd ||
1249 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
1250 !assign) {
1251 msix_unset_vector_notifiers(&proxy->pci_dev);
1252 if (proxy->vector_irqfd) {
1253 kvm_virtio_pci_vector_vq_release(proxy, nvqs);
1254 kvm_virtio_pci_vector_config_release(proxy);
1255 g_free(proxy->vector_irqfd);
1256 proxy->vector_irqfd = NULL;
1257 }
1258 }
1259
1260 for (n = 0; n < nvqs; n++) {
1261 if (!virtio_queue_get_num(vdev, n)) {
1262 break;
1263 }
1264
1265 r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
1266 if (r < 0) {
1267 goto assign_error;
1268 }
1269 }
1270 r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign,
1271 with_irqfd);
1272 if (r < 0) {
1273 goto config_assign_error;
1274 }
1275 /* Must set vector notifier after guest notifier has been assigned */
1276 if ((with_irqfd ||
1277 (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
1278 assign) {
1279 if (with_irqfd) {
1280 proxy->vector_irqfd =
1281 g_malloc0(sizeof(*proxy->vector_irqfd) *
1282 msix_nr_vectors_allocated(&proxy->pci_dev));
1283 r = kvm_virtio_pci_vector_vq_use(proxy, nvqs);
1284 if (r < 0) {
1285 goto config_assign_error;
1286 }
1287 r = kvm_virtio_pci_vector_config_use(proxy);
1288 if (r < 0) {
1289 goto config_error;
1290 }
1291 }
1292
1293 r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask,
1294 virtio_pci_vector_mask,
1295 virtio_pci_vector_poll);
1296 if (r < 0) {
1297 goto notifiers_error;
1298 }
1299 }
1300
1301 return 0;
1302
1303 notifiers_error:
1304 if (with_irqfd) {
1305 assert(assign);
1306 kvm_virtio_pci_vector_vq_release(proxy, nvqs);
1307 }
1308 config_error:
1309 if (with_irqfd) {
1310 kvm_virtio_pci_vector_config_release(proxy);
1311 }
1312 config_assign_error:
1313 virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign,
1314 with_irqfd);
1315 assign_error:
1316 /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
1317 assert(assign);
1318 while (--n >= 0) {
1319 virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
1320 }
1321 g_free(proxy->vector_irqfd);
1322 proxy->vector_irqfd = NULL;
1323 return r;
1324 }
1325
virtio_pci_set_host_notifier_mr(DeviceState * d,int n,MemoryRegion * mr,bool assign)1326 static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
1327 MemoryRegion *mr, bool assign)
1328 {
1329 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1330 int offset;
1331
1332 if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
1333 virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
1334 return -1;
1335 }
1336
1337 if (assign) {
1338 offset = virtio_pci_queue_mem_mult(proxy) * n;
1339 memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
1340 } else {
1341 memory_region_del_subregion(&proxy->notify.mr, mr);
1342 }
1343
1344 return 0;
1345 }
1346
virtio_pci_vmstate_change(DeviceState * d,bool running)1347 static void virtio_pci_vmstate_change(DeviceState *d, bool running)
1348 {
1349 VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
1350 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1351
1352 if (running) {
1353 /* Old QEMU versions did not set bus master enable on status write.
1354 * Detect DRIVER set and enable it.
1355 */
1356 if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
1357 (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
1358 !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1359 pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
1360 proxy->pci_dev.config[PCI_COMMAND] |
1361 PCI_COMMAND_MASTER, 1);
1362 }
1363 virtio_pci_start_ioeventfd(proxy);
1364 } else {
1365 virtio_pci_stop_ioeventfd(proxy);
1366 }
1367 }
1368
1369 /*
1370 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
1371 */
1372
virtio_pci_query_nvectors(DeviceState * d)1373 static int virtio_pci_query_nvectors(DeviceState *d)
1374 {
1375 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1376
1377 return proxy->nvectors;
1378 }
1379
virtio_pci_get_dma_as(DeviceState * d)1380 static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
1381 {
1382 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1383 PCIDevice *dev = &proxy->pci_dev;
1384
1385 return pci_get_address_space(dev);
1386 }
1387
virtio_pci_iommu_enabled(DeviceState * d)1388 static bool virtio_pci_iommu_enabled(DeviceState *d)
1389 {
1390 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1391 PCIDevice *dev = &proxy->pci_dev;
1392 AddressSpace *dma_as = pci_device_iommu_address_space(dev);
1393
1394 if (dma_as == &address_space_memory) {
1395 return false;
1396 }
1397
1398 return true;
1399 }
1400
virtio_pci_queue_enabled(DeviceState * d,int n)1401 static bool virtio_pci_queue_enabled(DeviceState *d, int n)
1402 {
1403 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1404 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1405
1406 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1407 return proxy->vqs[n].enabled;
1408 }
1409
1410 return virtio_queue_enabled_legacy(vdev, n);
1411 }
1412
virtio_pci_add_mem_cap(VirtIOPCIProxy * proxy,struct virtio_pci_cap * cap)1413 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
1414 struct virtio_pci_cap *cap)
1415 {
1416 PCIDevice *dev = &proxy->pci_dev;
1417 int offset;
1418
1419 offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
1420 cap->cap_len, &error_abort);
1421
1422 assert(cap->cap_len >= sizeof *cap);
1423 memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
1424 cap->cap_len - PCI_CAP_FLAGS);
1425
1426 return offset;
1427 }
1428
virtio_pci_set_vector(VirtIODevice * vdev,VirtIOPCIProxy * proxy,int queue_no,uint16_t old_vector,uint16_t new_vector)1429 static void virtio_pci_set_vector(VirtIODevice *vdev,
1430 VirtIOPCIProxy *proxy,
1431 int queue_no, uint16_t old_vector,
1432 uint16_t new_vector)
1433 {
1434 bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
1435 msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
1436
1437 if (new_vector == old_vector) {
1438 return;
1439 }
1440
1441 /*
1442 * If the device uses irqfd and the vector changes after DRIVER_OK is
1443 * set, we need to release the old vector and set up the new one.
1444 * Otherwise just need to set the new vector on the device.
1445 */
1446 if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) {
1447 kvm_virtio_pci_vector_release_one(proxy, queue_no);
1448 }
1449 /* Set the new vector on the device. */
1450 if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
1451 vdev->config_vector = new_vector;
1452 } else {
1453 virtio_queue_set_vector(vdev, queue_no, new_vector);
1454 }
1455 /* If the new vector changed need to set it up. */
1456 if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) {
1457 kvm_virtio_pci_vector_use_one(proxy, queue_no);
1458 }
1459 }
1460
virtio_pci_add_shm_cap(VirtIOPCIProxy * proxy,uint8_t bar,uint64_t offset,uint64_t length,uint8_t id)1461 int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
1462 uint8_t bar, uint64_t offset, uint64_t length,
1463 uint8_t id)
1464 {
1465 struct virtio_pci_cap64 cap = {
1466 .cap.cap_len = sizeof cap,
1467 .cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG,
1468 };
1469
1470 cap.cap.bar = bar;
1471 cap.cap.length = cpu_to_le32(length);
1472 cap.length_hi = cpu_to_le32(length >> 32);
1473 cap.cap.offset = cpu_to_le32(offset);
1474 cap.offset_hi = cpu_to_le32(offset >> 32);
1475 cap.cap.id = id;
1476 return virtio_pci_add_mem_cap(proxy, &cap.cap);
1477 }
1478
virtio_pci_common_read(void * opaque,hwaddr addr,unsigned size)1479 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
1480 unsigned size)
1481 {
1482 VirtIOPCIProxy *proxy = opaque;
1483 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1484 uint32_t val = 0;
1485 int i;
1486
1487 if (vdev == NULL) {
1488 return UINT64_MAX;
1489 }
1490
1491 switch (addr) {
1492 case VIRTIO_PCI_COMMON_DFSELECT:
1493 val = proxy->dfselect;
1494 break;
1495 case VIRTIO_PCI_COMMON_DF:
1496 if (proxy->dfselect <= 1) {
1497 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1498
1499 val = (vdev->host_features & ~vdc->legacy_features) >>
1500 (32 * proxy->dfselect);
1501 }
1502 break;
1503 case VIRTIO_PCI_COMMON_GFSELECT:
1504 val = proxy->gfselect;
1505 break;
1506 case VIRTIO_PCI_COMMON_GF:
1507 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1508 val = proxy->guest_features[proxy->gfselect];
1509 }
1510 break;
1511 case VIRTIO_PCI_COMMON_MSIX:
1512 val = vdev->config_vector;
1513 break;
1514 case VIRTIO_PCI_COMMON_NUMQ:
1515 for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
1516 if (virtio_queue_get_num(vdev, i)) {
1517 val = i + 1;
1518 }
1519 }
1520 break;
1521 case VIRTIO_PCI_COMMON_STATUS:
1522 val = vdev->status;
1523 break;
1524 case VIRTIO_PCI_COMMON_CFGGENERATION:
1525 val = vdev->generation;
1526 break;
1527 case VIRTIO_PCI_COMMON_Q_SELECT:
1528 val = vdev->queue_sel;
1529 break;
1530 case VIRTIO_PCI_COMMON_Q_SIZE:
1531 val = virtio_queue_get_num(vdev, vdev->queue_sel);
1532 break;
1533 case VIRTIO_PCI_COMMON_Q_MSIX:
1534 val = virtio_queue_vector(vdev, vdev->queue_sel);
1535 break;
1536 case VIRTIO_PCI_COMMON_Q_ENABLE:
1537 val = proxy->vqs[vdev->queue_sel].enabled;
1538 break;
1539 case VIRTIO_PCI_COMMON_Q_NOFF:
1540 /* Simply map queues in order */
1541 val = vdev->queue_sel;
1542 break;
1543 case VIRTIO_PCI_COMMON_Q_DESCLO:
1544 val = proxy->vqs[vdev->queue_sel].desc[0];
1545 break;
1546 case VIRTIO_PCI_COMMON_Q_DESCHI:
1547 val = proxy->vqs[vdev->queue_sel].desc[1];
1548 break;
1549 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1550 val = proxy->vqs[vdev->queue_sel].avail[0];
1551 break;
1552 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1553 val = proxy->vqs[vdev->queue_sel].avail[1];
1554 break;
1555 case VIRTIO_PCI_COMMON_Q_USEDLO:
1556 val = proxy->vqs[vdev->queue_sel].used[0];
1557 break;
1558 case VIRTIO_PCI_COMMON_Q_USEDHI:
1559 val = proxy->vqs[vdev->queue_sel].used[1];
1560 break;
1561 case VIRTIO_PCI_COMMON_Q_RESET:
1562 val = proxy->vqs[vdev->queue_sel].reset;
1563 break;
1564 default:
1565 val = 0;
1566 }
1567
1568 return val;
1569 }
1570
virtio_pci_common_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1571 static void virtio_pci_common_write(void *opaque, hwaddr addr,
1572 uint64_t val, unsigned size)
1573 {
1574 VirtIOPCIProxy *proxy = opaque;
1575 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1576 uint16_t vector;
1577
1578 if (vdev == NULL) {
1579 return;
1580 }
1581
1582 switch (addr) {
1583 case VIRTIO_PCI_COMMON_DFSELECT:
1584 proxy->dfselect = val;
1585 break;
1586 case VIRTIO_PCI_COMMON_GFSELECT:
1587 proxy->gfselect = val;
1588 break;
1589 case VIRTIO_PCI_COMMON_GF:
1590 if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
1591 proxy->guest_features[proxy->gfselect] = val;
1592 virtio_set_features(vdev,
1593 (((uint64_t)proxy->guest_features[1]) << 32) |
1594 proxy->guest_features[0]);
1595 }
1596 break;
1597 case VIRTIO_PCI_COMMON_MSIX:
1598 if (vdev->config_vector != VIRTIO_NO_VECTOR) {
1599 msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
1600 }
1601 /* Make it possible for guest to discover an error took place. */
1602 if (val < proxy->nvectors) {
1603 msix_vector_use(&proxy->pci_dev, val);
1604 } else {
1605 val = VIRTIO_NO_VECTOR;
1606 }
1607 virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX,
1608 vdev->config_vector, val);
1609 break;
1610 case VIRTIO_PCI_COMMON_STATUS:
1611 if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1612 virtio_pci_stop_ioeventfd(proxy);
1613 }
1614
1615 virtio_set_status(vdev, val & 0xFF);
1616
1617 if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
1618 virtio_pci_start_ioeventfd(proxy);
1619 }
1620
1621 if (vdev->status == 0) {
1622 virtio_pci_reset(DEVICE(proxy));
1623 }
1624
1625 break;
1626 case VIRTIO_PCI_COMMON_Q_SELECT:
1627 if (val < VIRTIO_QUEUE_MAX) {
1628 vdev->queue_sel = val;
1629 }
1630 break;
1631 case VIRTIO_PCI_COMMON_Q_SIZE:
1632 proxy->vqs[vdev->queue_sel].num = val;
1633 virtio_queue_set_num(vdev, vdev->queue_sel,
1634 proxy->vqs[vdev->queue_sel].num);
1635 virtio_init_region_cache(vdev, vdev->queue_sel);
1636 break;
1637 case VIRTIO_PCI_COMMON_Q_MSIX:
1638 vector = virtio_queue_vector(vdev, vdev->queue_sel);
1639 if (vector != VIRTIO_NO_VECTOR) {
1640 msix_vector_unuse(&proxy->pci_dev, vector);
1641 }
1642 /* Make it possible for guest to discover an error took place. */
1643 if (val < proxy->nvectors) {
1644 msix_vector_use(&proxy->pci_dev, val);
1645 } else {
1646 val = VIRTIO_NO_VECTOR;
1647 }
1648 virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val);
1649 break;
1650 case VIRTIO_PCI_COMMON_Q_ENABLE:
1651 if (val == 1) {
1652 virtio_queue_set_num(vdev, vdev->queue_sel,
1653 proxy->vqs[vdev->queue_sel].num);
1654 virtio_queue_set_rings(vdev, vdev->queue_sel,
1655 ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
1656 proxy->vqs[vdev->queue_sel].desc[0],
1657 ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
1658 proxy->vqs[vdev->queue_sel].avail[0],
1659 ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
1660 proxy->vqs[vdev->queue_sel].used[0]);
1661 proxy->vqs[vdev->queue_sel].enabled = 1;
1662 proxy->vqs[vdev->queue_sel].reset = 0;
1663 virtio_queue_enable(vdev, vdev->queue_sel);
1664 } else {
1665 virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
1666 }
1667 break;
1668 case VIRTIO_PCI_COMMON_Q_DESCLO:
1669 proxy->vqs[vdev->queue_sel].desc[0] = val;
1670 break;
1671 case VIRTIO_PCI_COMMON_Q_DESCHI:
1672 proxy->vqs[vdev->queue_sel].desc[1] = val;
1673 break;
1674 case VIRTIO_PCI_COMMON_Q_AVAILLO:
1675 proxy->vqs[vdev->queue_sel].avail[0] = val;
1676 break;
1677 case VIRTIO_PCI_COMMON_Q_AVAILHI:
1678 proxy->vqs[vdev->queue_sel].avail[1] = val;
1679 break;
1680 case VIRTIO_PCI_COMMON_Q_USEDLO:
1681 proxy->vqs[vdev->queue_sel].used[0] = val;
1682 break;
1683 case VIRTIO_PCI_COMMON_Q_USEDHI:
1684 proxy->vqs[vdev->queue_sel].used[1] = val;
1685 break;
1686 case VIRTIO_PCI_COMMON_Q_RESET:
1687 if (val == 1) {
1688 proxy->vqs[vdev->queue_sel].reset = 1;
1689
1690 virtio_queue_reset(vdev, vdev->queue_sel);
1691
1692 proxy->vqs[vdev->queue_sel].reset = 0;
1693 proxy->vqs[vdev->queue_sel].enabled = 0;
1694 }
1695 break;
1696 default:
1697 break;
1698 }
1699 }
1700
1701
virtio_pci_notify_read(void * opaque,hwaddr addr,unsigned size)1702 static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
1703 unsigned size)
1704 {
1705 VirtIOPCIProxy *proxy = opaque;
1706 if (virtio_bus_get_device(&proxy->bus) == NULL) {
1707 return UINT64_MAX;
1708 }
1709
1710 return 0;
1711 }
1712
virtio_pci_notify_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1713 static void virtio_pci_notify_write(void *opaque, hwaddr addr,
1714 uint64_t val, unsigned size)
1715 {
1716 VirtIOPCIProxy *proxy = opaque;
1717 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1718
1719 unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
1720
1721 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1722 trace_virtio_pci_notify_write(addr, val, size);
1723 virtio_queue_notify(vdev, queue);
1724 }
1725 }
1726
virtio_pci_notify_write_pio(void * opaque,hwaddr addr,uint64_t val,unsigned size)1727 static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
1728 uint64_t val, unsigned size)
1729 {
1730 VirtIOPCIProxy *proxy = opaque;
1731 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1732
1733 unsigned queue = val;
1734
1735 if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
1736 trace_virtio_pci_notify_write_pio(addr, val, size);
1737 virtio_queue_notify(vdev, queue);
1738 }
1739 }
1740
virtio_pci_isr_read(void * opaque,hwaddr addr,unsigned size)1741 static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
1742 unsigned size)
1743 {
1744 VirtIOPCIProxy *proxy = opaque;
1745 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1746 uint64_t val;
1747
1748 if (vdev == NULL) {
1749 return UINT64_MAX;
1750 }
1751
1752 val = qatomic_xchg(&vdev->isr, 0);
1753 pci_irq_deassert(&proxy->pci_dev);
1754 return val;
1755 }
1756
virtio_pci_isr_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1757 static void virtio_pci_isr_write(void *opaque, hwaddr addr,
1758 uint64_t val, unsigned size)
1759 {
1760 }
1761
virtio_pci_device_read(void * opaque,hwaddr addr,unsigned size)1762 static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
1763 unsigned size)
1764 {
1765 VirtIOPCIProxy *proxy = opaque;
1766 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1767 uint64_t val;
1768
1769 if (vdev == NULL) {
1770 return UINT64_MAX;
1771 }
1772
1773 switch (size) {
1774 case 1:
1775 val = virtio_config_modern_readb(vdev, addr);
1776 break;
1777 case 2:
1778 val = virtio_config_modern_readw(vdev, addr);
1779 break;
1780 case 4:
1781 val = virtio_config_modern_readl(vdev, addr);
1782 break;
1783 default:
1784 val = 0;
1785 break;
1786 }
1787 return val;
1788 }
1789
virtio_pci_device_write(void * opaque,hwaddr addr,uint64_t val,unsigned size)1790 static void virtio_pci_device_write(void *opaque, hwaddr addr,
1791 uint64_t val, unsigned size)
1792 {
1793 VirtIOPCIProxy *proxy = opaque;
1794 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1795
1796 if (vdev == NULL) {
1797 return;
1798 }
1799
1800 switch (size) {
1801 case 1:
1802 virtio_config_modern_writeb(vdev, addr, val);
1803 break;
1804 case 2:
1805 virtio_config_modern_writew(vdev, addr, val);
1806 break;
1807 case 4:
1808 virtio_config_modern_writel(vdev, addr, val);
1809 break;
1810 }
1811 }
1812
virtio_pci_modern_regions_init(VirtIOPCIProxy * proxy,const char * vdev_name)1813 static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
1814 const char *vdev_name)
1815 {
1816 static const MemoryRegionOps common_ops = {
1817 .read = virtio_pci_common_read,
1818 .write = virtio_pci_common_write,
1819 .impl = {
1820 .min_access_size = 1,
1821 .max_access_size = 4,
1822 },
1823 .endianness = DEVICE_LITTLE_ENDIAN,
1824 };
1825 static const MemoryRegionOps isr_ops = {
1826 .read = virtio_pci_isr_read,
1827 .write = virtio_pci_isr_write,
1828 .impl = {
1829 .min_access_size = 1,
1830 .max_access_size = 4,
1831 },
1832 .endianness = DEVICE_LITTLE_ENDIAN,
1833 };
1834 static const MemoryRegionOps device_ops = {
1835 .read = virtio_pci_device_read,
1836 .write = virtio_pci_device_write,
1837 .impl = {
1838 .min_access_size = 1,
1839 .max_access_size = 4,
1840 },
1841 .endianness = DEVICE_LITTLE_ENDIAN,
1842 };
1843 static const MemoryRegionOps notify_ops = {
1844 .read = virtio_pci_notify_read,
1845 .write = virtio_pci_notify_write,
1846 .impl = {
1847 .min_access_size = 1,
1848 .max_access_size = 4,
1849 },
1850 .endianness = DEVICE_LITTLE_ENDIAN,
1851 };
1852 static const MemoryRegionOps notify_pio_ops = {
1853 .read = virtio_pci_notify_read,
1854 .write = virtio_pci_notify_write_pio,
1855 .impl = {
1856 .min_access_size = 1,
1857 .max_access_size = 4,
1858 },
1859 .endianness = DEVICE_LITTLE_ENDIAN,
1860 };
1861 g_autoptr(GString) name = g_string_new(NULL);
1862
1863 g_string_printf(name, "virtio-pci-common-%s", vdev_name);
1864 memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
1865 &common_ops,
1866 proxy,
1867 name->str,
1868 proxy->common.size);
1869
1870 g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
1871 memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
1872 &isr_ops,
1873 proxy,
1874 name->str,
1875 proxy->isr.size);
1876
1877 g_string_printf(name, "virtio-pci-device-%s", vdev_name);
1878 memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
1879 &device_ops,
1880 proxy,
1881 name->str,
1882 proxy->device.size);
1883
1884 g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
1885 memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
1886 ¬ify_ops,
1887 proxy,
1888 name->str,
1889 proxy->notify.size);
1890
1891 g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
1892 memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
1893 ¬ify_pio_ops,
1894 proxy,
1895 name->str,
1896 proxy->notify_pio.size);
1897 }
1898
virtio_pci_modern_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap,MemoryRegion * mr,uint8_t bar)1899 static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
1900 VirtIOPCIRegion *region,
1901 struct virtio_pci_cap *cap,
1902 MemoryRegion *mr,
1903 uint8_t bar)
1904 {
1905 memory_region_add_subregion(mr, region->offset, ®ion->mr);
1906
1907 cap->cfg_type = region->type;
1908 cap->bar = bar;
1909 cap->offset = cpu_to_le32(region->offset);
1910 cap->length = cpu_to_le32(region->size);
1911 virtio_pci_add_mem_cap(proxy, cap);
1912
1913 }
1914
virtio_pci_modern_mem_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap)1915 static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
1916 VirtIOPCIRegion *region,
1917 struct virtio_pci_cap *cap)
1918 {
1919 virtio_pci_modern_region_map(proxy, region, cap,
1920 &proxy->modern_bar, proxy->modern_mem_bar_idx);
1921 }
1922
virtio_pci_modern_io_region_map(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region,struct virtio_pci_cap * cap)1923 static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
1924 VirtIOPCIRegion *region,
1925 struct virtio_pci_cap *cap)
1926 {
1927 virtio_pci_modern_region_map(proxy, region, cap,
1928 &proxy->io_bar, proxy->modern_io_bar_idx);
1929 }
1930
virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region)1931 static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
1932 VirtIOPCIRegion *region)
1933 {
1934 memory_region_del_subregion(&proxy->modern_bar,
1935 ®ion->mr);
1936 }
1937
virtio_pci_modern_io_region_unmap(VirtIOPCIProxy * proxy,VirtIOPCIRegion * region)1938 static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
1939 VirtIOPCIRegion *region)
1940 {
1941 memory_region_del_subregion(&proxy->io_bar,
1942 ®ion->mr);
1943 }
1944
virtio_pci_pre_plugged(DeviceState * d,Error ** errp)1945 static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
1946 {
1947 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1948 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
1949
1950 if (virtio_pci_modern(proxy)) {
1951 virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
1952 }
1953
1954 virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
1955 }
1956
1957 /* This is called by virtio-bus just after the device is plugged. */
virtio_pci_device_plugged(DeviceState * d,Error ** errp)1958 static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
1959 {
1960 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
1961 VirtioBusState *bus = &proxy->bus;
1962 bool legacy = virtio_pci_legacy(proxy);
1963 bool modern;
1964 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
1965 uint8_t *config;
1966 uint32_t size;
1967 VirtIODevice *vdev = virtio_bus_get_device(bus);
1968 int16_t res;
1969
1970 /*
1971 * Virtio capabilities present without
1972 * VIRTIO_F_VERSION_1 confuses guests
1973 */
1974 if (!proxy->ignore_backend_features &&
1975 !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
1976 virtio_pci_disable_modern(proxy);
1977
1978 if (!legacy) {
1979 error_setg(errp, "Device doesn't support modern mode, and legacy"
1980 " mode is disabled");
1981 error_append_hint(errp, "Set disable-legacy to off\n");
1982
1983 return;
1984 }
1985 }
1986
1987 modern = virtio_pci_modern(proxy);
1988
1989 config = proxy->pci_dev.config;
1990 if (proxy->class_code) {
1991 pci_config_set_class(config, proxy->class_code);
1992 }
1993
1994 if (legacy) {
1995 if (!virtio_legacy_allowed(vdev)) {
1996 /*
1997 * To avoid migration issues, we allow legacy mode when legacy
1998 * check is disabled in the old machine types (< 5.1).
1999 */
2000 if (virtio_legacy_check_disabled(vdev)) {
2001 warn_report("device is modern-only, but for backward "
2002 "compatibility legacy is allowed");
2003 } else {
2004 error_setg(errp,
2005 "device is modern-only, use disable-legacy=on");
2006 return;
2007 }
2008 }
2009 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
2010 error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
2011 " neither legacy nor transitional device");
2012 return;
2013 }
2014 /*
2015 * Legacy and transitional devices use specific subsystem IDs.
2016 * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
2017 * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
2018 */
2019 pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
2020 if (proxy->trans_devid) {
2021 pci_config_set_device_id(config, proxy->trans_devid);
2022 }
2023 } else {
2024 /* pure virtio-1.0 */
2025 pci_set_word(config + PCI_VENDOR_ID,
2026 PCI_VENDOR_ID_REDHAT_QUMRANET);
2027 pci_set_word(config + PCI_DEVICE_ID,
2028 PCI_DEVICE_ID_VIRTIO_10_BASE + virtio_bus_get_vdev_id(bus));
2029 pci_config_set_revision(config, 1);
2030 }
2031 config[PCI_INTERRUPT_PIN] = 1;
2032
2033
2034 if (modern) {
2035 struct virtio_pci_cap cap = {
2036 .cap_len = sizeof cap,
2037 };
2038 struct virtio_pci_notify_cap notify = {
2039 .cap.cap_len = sizeof notify,
2040 .notify_off_multiplier =
2041 cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
2042 };
2043 struct virtio_pci_cfg_cap cfg = {
2044 .cap.cap_len = sizeof cfg,
2045 .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
2046 };
2047 struct virtio_pci_notify_cap notify_pio = {
2048 .cap.cap_len = sizeof notify,
2049 .notify_off_multiplier = cpu_to_le32(0x0),
2050 };
2051
2052 struct virtio_pci_cfg_cap *cfg_mask;
2053
2054 virtio_pci_modern_regions_init(proxy, vdev->name);
2055
2056 virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
2057 virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
2058 virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
2059 virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap);
2060
2061 if (modern_pio) {
2062 memory_region_init(&proxy->io_bar, OBJECT(proxy),
2063 "virtio-pci-io", 0x4);
2064 address_space_init(&proxy->modern_cfg_io_as, &proxy->io_bar,
2065 "virtio-pci-cfg-io-as");
2066
2067 pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
2068 PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
2069
2070 virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
2071 ¬ify_pio.cap);
2072 }
2073
2074 pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
2075 PCI_BASE_ADDRESS_SPACE_MEMORY |
2076 PCI_BASE_ADDRESS_MEM_PREFETCH |
2077 PCI_BASE_ADDRESS_MEM_TYPE_64,
2078 &proxy->modern_bar);
2079
2080 proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
2081 cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
2082 pci_set_byte(&cfg_mask->cap.bar, ~0x0);
2083 pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
2084 pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
2085 pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
2086 }
2087
2088 if (proxy->nvectors) {
2089 int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
2090 proxy->msix_bar_idx, NULL);
2091 if (err) {
2092 /* Notice when a system that supports MSIx can't initialize it */
2093 if (err != -ENOTSUP) {
2094 warn_report("unable to init msix vectors to %" PRIu32,
2095 proxy->nvectors);
2096 }
2097 proxy->nvectors = 0;
2098 }
2099 }
2100
2101 proxy->pci_dev.config_write = virtio_write_config;
2102 proxy->pci_dev.config_read = virtio_read_config;
2103
2104 if (legacy) {
2105 size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
2106 + virtio_bus_get_vdev_config_len(bus);
2107 size = pow2ceil(size);
2108
2109 memory_region_init_io(&proxy->bar, OBJECT(proxy),
2110 &virtio_pci_config_ops,
2111 proxy, "virtio-pci", size);
2112
2113 pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
2114 PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
2115 }
2116
2117 if (pci_is_vf(&proxy->pci_dev)) {
2118 pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset);
2119 proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF;
2120 } else {
2121 res = pcie_sriov_pf_init_from_user_created_vfs(
2122 &proxy->pci_dev, proxy->last_pcie_cap_offset, errp);
2123 if (res > 0) {
2124 proxy->last_pcie_cap_offset += res;
2125 virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
2126 }
2127 }
2128 }
2129
virtio_pci_device_unplugged(DeviceState * d)2130 static void virtio_pci_device_unplugged(DeviceState *d)
2131 {
2132 VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
2133 bool modern = virtio_pci_modern(proxy);
2134 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
2135
2136 virtio_pci_stop_ioeventfd(proxy);
2137
2138 if (modern) {
2139 virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
2140 virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
2141 virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
2142 virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
2143 if (modern_pio) {
2144 virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
2145 }
2146 }
2147 }
2148
virtio_pci_realize(PCIDevice * pci_dev,Error ** errp)2149 static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
2150 {
2151 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
2152 VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
2153 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
2154 !pci_bus_is_root(pci_get_bus(pci_dev));
2155
2156 /* fd-based ioevents can't be synchronized in record/replay */
2157 if (replay_mode != REPLAY_MODE_NONE) {
2158 proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
2159 }
2160
2161 /*
2162 * virtio pci bar layout used by default.
2163 * subclasses can re-arrange things if needed.
2164 *
2165 * region 0 -- virtio legacy io bar
2166 * region 1 -- msi-x bar
2167 * region 2 -- virtio modern io bar (off by default)
2168 * region 4+5 -- virtio modern memory (64bit) bar
2169 *
2170 */
2171 proxy->legacy_io_bar_idx = 0;
2172 proxy->msix_bar_idx = 1;
2173 proxy->modern_io_bar_idx = 2;
2174 proxy->modern_mem_bar_idx = 4;
2175
2176 proxy->common.offset = 0x0;
2177 proxy->common.size = 0x1000;
2178 proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
2179
2180 proxy->isr.offset = 0x1000;
2181 proxy->isr.size = 0x1000;
2182 proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
2183
2184 proxy->device.offset = 0x2000;
2185 proxy->device.size = 0x1000;
2186 proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
2187
2188 proxy->notify.offset = 0x3000;
2189 proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
2190 proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
2191
2192 proxy->notify_pio.offset = 0x0;
2193 proxy->notify_pio.size = 0x4;
2194 proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
2195
2196 /* subclasses can enforce modern, so do this unconditionally */
2197 memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
2198 /* PCI BAR regions must be powers of 2 */
2199 pow2ceil(proxy->notify.offset + proxy->notify.size));
2200
2201 address_space_init(&proxy->modern_cfg_mem_as, &proxy->modern_bar,
2202 "virtio-pci-cfg-mem-as");
2203
2204 if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
2205 proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
2206 }
2207
2208 if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
2209 error_setg(errp, "device cannot work as neither modern nor legacy mode"
2210 " is enabled");
2211 error_append_hint(errp, "Set either disable-modern or disable-legacy"
2212 " to off\n");
2213 return;
2214 }
2215
2216 if (pcie_port && pci_is_express(pci_dev)) {
2217 int pos;
2218 proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
2219
2220 pos = pcie_endpoint_cap_init(pci_dev, 0);
2221 assert(pos > 0);
2222
2223 pos = pci_pm_init(pci_dev, 0, errp);
2224 if (pos < 0) {
2225 return;
2226 }
2227
2228 /*
2229 * Indicates that this function complies with revision 1.2 of the
2230 * PCI Power Management Interface Specification.
2231 */
2232 pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
2233
2234 if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
2235 pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset,
2236 PCI_ERR_SIZEOF, NULL);
2237 proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF;
2238 }
2239
2240 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
2241 /* Init error enabling flags */
2242 pcie_cap_deverr_init(pci_dev);
2243 }
2244
2245 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
2246 /* Init Link Control Register */
2247 pcie_cap_lnkctl_init(pci_dev);
2248 }
2249
2250 if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) {
2251 pci_set_word(pci_dev->config + pos + PCI_PM_CTRL,
2252 PCI_PM_CTRL_NO_SOFT_RESET);
2253 }
2254
2255 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
2256 /* Init Power Management Control Register */
2257 pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
2258 PCI_PM_CTRL_STATE_MASK);
2259 }
2260
2261 if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
2262 pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset,
2263 proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
2264 proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
2265 }
2266
2267 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
2268 /* Set Function Level Reset capability bit */
2269 pcie_cap_flr_init(pci_dev);
2270 }
2271 } else {
2272 /*
2273 * make future invocations of pci_is_express() return false
2274 * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
2275 */
2276 pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
2277 }
2278
2279 virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
2280 if (k->realize) {
2281 k->realize(proxy, errp);
2282 }
2283 }
2284
virtio_pci_exit(PCIDevice * pci_dev)2285 static void virtio_pci_exit(PCIDevice *pci_dev)
2286 {
2287 VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
2288 bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
2289 !pci_bus_is_root(pci_get_bus(pci_dev));
2290 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
2291
2292 pcie_sriov_pf_exit(&proxy->pci_dev);
2293 msix_uninit_exclusive_bar(pci_dev);
2294 if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
2295 pci_is_express(pci_dev)) {
2296 pcie_aer_exit(pci_dev);
2297 }
2298 address_space_destroy(&proxy->modern_cfg_mem_as);
2299 if (modern_pio) {
2300 address_space_destroy(&proxy->modern_cfg_io_as);
2301 }
2302 }
2303
virtio_pci_reset(DeviceState * qdev)2304 static void virtio_pci_reset(DeviceState *qdev)
2305 {
2306 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
2307 VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
2308 int i;
2309
2310 virtio_bus_reset(bus);
2311 msix_unuse_all_vectors(&proxy->pci_dev);
2312
2313 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2314 proxy->vqs[i].enabled = 0;
2315 proxy->vqs[i].reset = 0;
2316 proxy->vqs[i].num = 0;
2317 proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
2318 proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
2319 proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
2320 }
2321 }
2322
virtio_pci_no_soft_reset(PCIDevice * dev)2323 static bool virtio_pci_no_soft_reset(PCIDevice *dev)
2324 {
2325 uint16_t pmcsr;
2326
2327 if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
2328 return false;
2329 }
2330
2331 pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
2332
2333 /*
2334 * When No_Soft_Reset bit is set and the device
2335 * is in D3hot state, don't reset device
2336 */
2337 return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) &&
2338 (pmcsr & PCI_PM_CTRL_STATE_MASK) == 3;
2339 }
2340
virtio_pci_bus_reset_hold(Object * obj,ResetType type)2341 static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
2342 {
2343 PCIDevice *dev = PCI_DEVICE(obj);
2344 DeviceState *qdev = DEVICE(obj);
2345
2346 if (virtio_pci_no_soft_reset(dev)) {
2347 return;
2348 }
2349
2350 virtio_pci_reset(qdev);
2351
2352 if (pci_is_express(dev)) {
2353 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
2354
2355 pcie_cap_deverr_reset(dev);
2356 pcie_cap_lnkctl_reset(dev);
2357
2358 if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
2359 pci_word_test_and_clear_mask(
2360 dev->config + dev->pm_cap + PCI_PM_CTRL,
2361 PCI_PM_CTRL_STATE_MASK);
2362 }
2363 }
2364 }
2365
2366 static const Property virtio_pci_properties[] = {
2367 DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
2368 VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
2369 DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
2370 VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
2371 DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
2372 VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
2373 DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
2374 ignore_backend_features, false),
2375 DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
2376 VIRTIO_PCI_FLAG_ATS_BIT, false),
2377 DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
2378 VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
2379 DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
2380 VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
2381 DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
2382 VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
2383 DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
2384 VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
2385 DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags,
2386 VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false),
2387 DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
2388 VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
2389 DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
2390 VIRTIO_PCI_FLAG_AER_BIT, false),
2391 };
2392
virtio_pci_dc_realize(DeviceState * qdev,Error ** errp)2393 static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
2394 {
2395 VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
2396 VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
2397 PCIDevice *pci_dev = &proxy->pci_dev;
2398
2399 if (virtio_pci_modern(proxy)) {
2400 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
2401 }
2402
2403 vpciklass->parent_dc_realize(qdev, errp);
2404 }
2405
virtio_pci_sync_config(DeviceState * dev,Error ** errp)2406 static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
2407 {
2408 VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
2409 VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
2410
2411 return qdev_sync_config(DEVICE(vdev), errp);
2412 }
2413
virtio_pci_class_init(ObjectClass * klass,const void * data)2414 static void virtio_pci_class_init(ObjectClass *klass, const void *data)
2415 {
2416 DeviceClass *dc = DEVICE_CLASS(klass);
2417 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
2418 VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
2419 ResettableClass *rc = RESETTABLE_CLASS(klass);
2420
2421 device_class_set_props(dc, virtio_pci_properties);
2422 k->realize = virtio_pci_realize;
2423 k->exit = virtio_pci_exit;
2424 k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
2425 k->revision = VIRTIO_PCI_ABI_VERSION;
2426 k->class_id = PCI_CLASS_OTHERS;
2427 device_class_set_parent_realize(dc, virtio_pci_dc_realize,
2428 &vpciklass->parent_dc_realize);
2429 rc->phases.hold = virtio_pci_bus_reset_hold;
2430 dc->sync_config = virtio_pci_sync_config;
2431 }
2432
2433 static const TypeInfo virtio_pci_info = {
2434 .name = TYPE_VIRTIO_PCI,
2435 .parent = TYPE_PCI_DEVICE,
2436 .instance_size = sizeof(VirtIOPCIProxy),
2437 .class_init = virtio_pci_class_init,
2438 .class_size = sizeof(VirtioPCIClass),
2439 .abstract = true,
2440 };
2441
2442 static const Property virtio_pci_generic_properties[] = {
2443 DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
2444 ON_OFF_AUTO_AUTO),
2445 DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
2446 };
2447
virtio_pci_base_class_init(ObjectClass * klass,const void * data)2448 static void virtio_pci_base_class_init(ObjectClass *klass, const void *data)
2449 {
2450 const VirtioPCIDeviceTypeInfo *t = data;
2451 if (t->class_init) {
2452 t->class_init(klass, NULL);
2453 }
2454 }
2455
virtio_pci_generic_class_init(ObjectClass * klass,const void * data)2456 static void virtio_pci_generic_class_init(ObjectClass *klass, const void *data)
2457 {
2458 DeviceClass *dc = DEVICE_CLASS(klass);
2459
2460 device_class_set_props(dc, virtio_pci_generic_properties);
2461 }
2462
virtio_pci_transitional_instance_init(Object * obj)2463 static void virtio_pci_transitional_instance_init(Object *obj)
2464 {
2465 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2466
2467 proxy->disable_legacy = ON_OFF_AUTO_OFF;
2468 proxy->disable_modern = false;
2469 }
2470
virtio_pci_non_transitional_instance_init(Object * obj)2471 static void virtio_pci_non_transitional_instance_init(Object *obj)
2472 {
2473 VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
2474
2475 proxy->disable_legacy = ON_OFF_AUTO_ON;
2476 proxy->disable_modern = false;
2477 }
2478
virtio_pci_types_register(const VirtioPCIDeviceTypeInfo * t)2479 void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
2480 {
2481 char *base_name = NULL;
2482 TypeInfo base_type_info = {
2483 .name = t->base_name,
2484 .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI,
2485 .instance_size = t->instance_size,
2486 .instance_init = t->instance_init,
2487 .instance_finalize = t->instance_finalize,
2488 .class_size = t->class_size,
2489 .abstract = true,
2490 .interfaces = t->interfaces,
2491 };
2492 TypeInfo generic_type_info = {
2493 .name = t->generic_name,
2494 .parent = base_type_info.name,
2495 .class_init = virtio_pci_generic_class_init,
2496 .interfaces = (const InterfaceInfo[]) {
2497 { INTERFACE_PCIE_DEVICE },
2498 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2499 { }
2500 },
2501 };
2502
2503 if (!base_type_info.name) {
2504 /* No base type -> register a single generic device type */
2505 /* use intermediate %s-base-type to add generic device props */
2506 base_name = g_strdup_printf("%s-base-type", t->generic_name);
2507 base_type_info.name = base_name;
2508 base_type_info.class_init = virtio_pci_generic_class_init;
2509
2510 generic_type_info.parent = base_name;
2511 generic_type_info.class_init = virtio_pci_base_class_init;
2512 generic_type_info.class_data = t;
2513
2514 assert(!t->non_transitional_name);
2515 assert(!t->transitional_name);
2516 } else {
2517 base_type_info.class_init = virtio_pci_base_class_init;
2518 base_type_info.class_data = t;
2519 }
2520
2521 type_register_static(&base_type_info);
2522 if (generic_type_info.name) {
2523 type_register_static(&generic_type_info);
2524 }
2525
2526 if (t->non_transitional_name) {
2527 const TypeInfo non_transitional_type_info = {
2528 .name = t->non_transitional_name,
2529 .parent = base_type_info.name,
2530 .instance_init = virtio_pci_non_transitional_instance_init,
2531 .interfaces = (const InterfaceInfo[]) {
2532 { INTERFACE_PCIE_DEVICE },
2533 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2534 { }
2535 },
2536 };
2537 type_register_static(&non_transitional_type_info);
2538 }
2539
2540 if (t->transitional_name) {
2541 const TypeInfo transitional_type_info = {
2542 .name = t->transitional_name,
2543 .parent = base_type_info.name,
2544 .instance_init = virtio_pci_transitional_instance_init,
2545 .interfaces = (const InterfaceInfo[]) {
2546 /*
2547 * Transitional virtio devices work only as Conventional PCI
2548 * devices because they require PIO ports.
2549 */
2550 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
2551 { }
2552 },
2553 };
2554 type_register_static(&transitional_type_info);
2555 }
2556 g_free(base_name);
2557 }
2558
virtio_pci_optimal_num_queues(unsigned fixed_queues)2559 unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
2560 {
2561 /*
2562 * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
2563 * virtqueue buffers can handle their completion. When a different vCPU
2564 * handles completion it may need to IPI the vCPU that submitted the
2565 * request and this adds overhead.
2566 *
2567 * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
2568 * guests with very many vCPUs and a device that is only used by a few
2569 * vCPUs. Unfortunately optimizing that case requires manual pinning inside
2570 * the guest, so those users might as well manually set the number of
2571 * queues. There is no upper limit that can be applied automatically and
2572 * doing so arbitrarily would result in a sudden performance drop once the
2573 * threshold number of vCPUs is exceeded.
2574 */
2575 unsigned num_queues = current_machine->smp.cpus;
2576
2577 /*
2578 * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
2579 * config change interrupt and the fixed virtqueues must be taken into
2580 * account too.
2581 */
2582 num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
2583
2584 /*
2585 * There is a limit to how many virtqueues a device can have.
2586 */
2587 return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
2588 }
2589
2590 /* virtio-pci-bus */
2591
virtio_pci_bus_new(VirtioBusState * bus,size_t bus_size,VirtIOPCIProxy * dev)2592 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
2593 VirtIOPCIProxy *dev)
2594 {
2595 DeviceState *qdev = DEVICE(dev);
2596 char virtio_bus_name[] = "virtio-bus";
2597
2598 qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
2599 }
2600
virtio_pci_bus_class_init(ObjectClass * klass,const void * data)2601 static void virtio_pci_bus_class_init(ObjectClass *klass, const void *data)
2602 {
2603 BusClass *bus_class = BUS_CLASS(klass);
2604 VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
2605 bus_class->max_dev = 1;
2606 k->notify = virtio_pci_notify;
2607 k->save_config = virtio_pci_save_config;
2608 k->load_config = virtio_pci_load_config;
2609 k->save_queue = virtio_pci_save_queue;
2610 k->load_queue = virtio_pci_load_queue;
2611 k->save_extra_state = virtio_pci_save_extra_state;
2612 k->load_extra_state = virtio_pci_load_extra_state;
2613 k->has_extra_state = virtio_pci_has_extra_state;
2614 k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
2615 k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
2616 k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
2617 k->vmstate_change = virtio_pci_vmstate_change;
2618 k->pre_plugged = virtio_pci_pre_plugged;
2619 k->device_plugged = virtio_pci_device_plugged;
2620 k->device_unplugged = virtio_pci_device_unplugged;
2621 k->query_nvectors = virtio_pci_query_nvectors;
2622 k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
2623 k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
2624 k->get_dma_as = virtio_pci_get_dma_as;
2625 k->iommu_enabled = virtio_pci_iommu_enabled;
2626 k->queue_enabled = virtio_pci_queue_enabled;
2627 }
2628
2629 static const TypeInfo virtio_pci_bus_info = {
2630 .name = TYPE_VIRTIO_PCI_BUS,
2631 .parent = TYPE_VIRTIO_BUS,
2632 .instance_size = sizeof(VirtioPCIBusState),
2633 .class_size = sizeof(VirtioPCIBusClass),
2634 .class_init = virtio_pci_bus_class_init,
2635 };
2636
virtio_pci_register_types(void)2637 static void virtio_pci_register_types(void)
2638 {
2639 /* Base types: */
2640 type_register_static(&virtio_pci_bus_info);
2641 type_register_static(&virtio_pci_info);
2642 }
2643
2644 type_init(virtio_pci_register_types)
2645
2646