Lines Matching +full:config +full:- +full:host

10  * the COPYING file in the top-level directory.
12 * Based on qemu-kvm device-assignment:
18 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
30 #include "hw/qdev-properties.h"
31 #include "hw/qdev-properties-system.h"
34 #include "qemu/error-report.h"
35 #include "qemu/main-loop.h"
45 #include "migration/qemu-file.h"
47 #include "vfio-migration-internal.h"
48 #include "vfio-helpers.h"
50 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
64 * been serviced and the time gap is long enough, we re-enable mmaps for
68 * regular interrupts and see much better latency by staying in non-mmap
71 * other options with the x-intx-mmap-timeout-ms parameter (a value of
78 if (vdev->intx.pending) { in vfio_intx_mmap_enable()
79 timer_mod(vdev->intx.mmap_timer, in vfio_intx_mmap_enable()
80 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vdev->intx.mmap_timeout); in vfio_intx_mmap_enable()
91 if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) { in vfio_intx_interrupt()
95 trace_vfio_intx_interrupt(vdev->vbasedev.name, 'A' + vdev->intx.pin); in vfio_intx_interrupt()
97 vdev->intx.pending = true; in vfio_intx_interrupt()
98 pci_irq_assert(&vdev->pdev); in vfio_intx_interrupt()
100 if (vdev->intx.mmap_timeout) { in vfio_intx_interrupt()
101 timer_mod(vdev->intx.mmap_timer, in vfio_intx_interrupt()
102 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vdev->intx.mmap_timeout); in vfio_intx_interrupt()
110 if (!vdev->intx.pending) { in vfio_intx_eoi()
114 trace_vfio_intx_eoi(vbasedev->name); in vfio_intx_eoi()
116 vdev->intx.pending = false; in vfio_intx_eoi()
117 pci_irq_deassert(&vdev->pdev); in vfio_intx_eoi()
124 int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_enable_kvm()
126 if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || in vfio_intx_enable_kvm()
127 vdev->intx.route.mode != PCI_INTX_ENABLED || in vfio_intx_enable_kvm()
134 vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
135 vdev->intx.pending = false; in vfio_intx_enable_kvm()
136 pci_irq_deassert(&vdev->pdev); in vfio_intx_enable_kvm()
139 if (event_notifier_init(&vdev->intx.unmask, 0)) { in vfio_intx_enable_kvm()
145 &vdev->intx.interrupt, in vfio_intx_enable_kvm()
146 &vdev->intx.unmask, in vfio_intx_enable_kvm()
147 vdev->intx.route.irq)) { in vfio_intx_enable_kvm()
152 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, in vfio_intx_enable_kvm()
154 event_notifier_get_fd(&vdev->intx.unmask), in vfio_intx_enable_kvm()
160 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
162 vdev->intx.kvm_accel = true; in vfio_intx_enable_kvm()
164 trace_vfio_intx_enable_kvm(vdev->vbasedev.name); in vfio_intx_enable_kvm()
169 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, in vfio_intx_enable_kvm()
170 vdev->intx.route.irq); in vfio_intx_enable_kvm()
172 event_notifier_cleanup(&vdev->intx.unmask); in vfio_intx_enable_kvm()
175 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
185 if (!vdev->intx.kvm_accel) { in vfio_intx_disable_kvm()
191 * interrupts, QEMU IRQ de-asserted. in vfio_intx_disable_kvm()
193 vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable_kvm()
194 vdev->intx.pending = false; in vfio_intx_disable_kvm()
195 pci_irq_deassert(&vdev->pdev); in vfio_intx_disable_kvm()
198 if (kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, in vfio_intx_disable_kvm()
199 vdev->intx.route.irq)) { in vfio_intx_disable_kvm()
204 event_notifier_cleanup(&vdev->intx.unmask); in vfio_intx_disable_kvm()
207 qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt), in vfio_intx_disable_kvm()
210 vdev->intx.kvm_accel = false; in vfio_intx_disable_kvm()
212 /* If we've missed an event, let it re-fire through QEMU */ in vfio_intx_disable_kvm()
213 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable_kvm()
215 trace_vfio_intx_disable_kvm(vdev->vbasedev.name); in vfio_intx_disable_kvm()
223 trace_vfio_intx_update(vdev->vbasedev.name, in vfio_intx_update()
224 vdev->intx.route.irq, route->irq); in vfio_intx_update()
228 vdev->intx.route = *route; in vfio_intx_update()
230 if (route->mode != PCI_INTX_ENABLED) { in vfio_intx_update()
235 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_intx_update()
238 /* Re-enable the interrupt in cased we missed an EOI */ in vfio_intx_update()
239 vfio_intx_eoi(&vdev->vbasedev); in vfio_intx_update()
247 if (vdev->interrupt != VFIO_INT_INTx) { in vfio_intx_routing_notifier()
251 route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); in vfio_intx_routing_notifier()
253 if (pci_intx_route_changed(&vdev->intx.route, &route)) { in vfio_intx_routing_notifier()
263 vfio_intx_update(vdev, &vdev->intx.route); in vfio_irqchip_change()
268 uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); in vfio_intx_enable()
280 vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ in vfio_intx_enable()
281 pci_config_set_interrupt_pin(vdev->pdev.config, pin); in vfio_intx_enable()
289 vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, in vfio_intx_enable()
290 vdev->intx.pin); in vfio_intx_enable()
294 ret = event_notifier_init(&vdev->intx.interrupt, 0); in vfio_intx_enable()
296 error_setg_errno(errp, -ret, "event_notifier_init failed"); in vfio_intx_enable()
299 fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_enable()
302 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, in vfio_intx_enable()
305 event_notifier_cleanup(&vdev->intx.interrupt); in vfio_intx_enable()
310 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_intx_enable()
313 vdev->interrupt = VFIO_INT_INTx; in vfio_intx_enable()
315 trace_vfio_intx_enable(vdev->vbasedev.name); in vfio_intx_enable()
323 timer_del(vdev->intx.mmap_timer); in vfio_intx_disable()
325 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable()
326 vdev->intx.pending = false; in vfio_intx_disable()
327 pci_irq_deassert(&vdev->pdev); in vfio_intx_disable()
330 fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_disable()
332 event_notifier_cleanup(&vdev->intx.interrupt); in vfio_intx_disable()
334 vdev->interrupt = VFIO_INT_NONE; in vfio_intx_disable()
336 trace_vfio_intx_disable(vdev->vbasedev.name); in vfio_intx_disable()
345 VFIOPCIDevice *vdev = vector->vdev; in vfio_msi_interrupt()
349 int nr = vector - vdev->msi_vectors; in vfio_msi_interrupt()
351 if (!event_notifier_test_and_clear(&vector->interrupt)) { in vfio_msi_interrupt()
355 if (vdev->interrupt == VFIO_INT_MSIX) { in vfio_msi_interrupt()
360 if (msix_is_masked(&vdev->pdev, nr)) { in vfio_msi_interrupt()
361 set_bit(nr, vdev->msix->pending); in vfio_msi_interrupt()
362 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true); in vfio_msi_interrupt()
363 trace_vfio_msix_pba_enable(vdev->vbasedev.name); in vfio_msi_interrupt()
365 } else if (vdev->interrupt == VFIO_INT_MSI) { in vfio_msi_interrupt()
372 msg = get_msg(&vdev->pdev, nr); in vfio_msi_interrupt()
373 trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data); in vfio_msi_interrupt()
374 notify(&vdev->pdev, nr); in vfio_msi_interrupt()
378 * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
390 irq_set->argsz = argsz; in vfio_enable_msix_no_vec()
391 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | in vfio_enable_msix_no_vec()
393 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; in vfio_enable_msix_no_vec()
394 irq_set->start = 0; in vfio_enable_msix_no_vec()
395 irq_set->count = 1; in vfio_enable_msix_no_vec()
396 fd = (int32_t *)&irq_set->data; in vfio_enable_msix_no_vec()
397 *fd = -1; in vfio_enable_msix_no_vec()
399 return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); in vfio_enable_msix_no_vec()
409 * If dynamic MSI-X allocation is supported, the vectors to be allocated in vfio_enable_vectors()
410 * and enabled can be scattered. Before kernel enabling MSI-X, setting in vfio_enable_vectors()
411 * nr_vectors causes all these vectors to be allocated on host. in vfio_enable_vectors()
414 * MSI-X enabled first, then set vectors with a potentially sparse set of in vfio_enable_vectors()
417 if (msix && !vdev->msix->noresize) { in vfio_enable_vectors()
425 argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds)); in vfio_enable_vectors()
428 irq_set->argsz = argsz; in vfio_enable_vectors()
429 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; in vfio_enable_vectors()
430 irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX; in vfio_enable_vectors()
431 irq_set->start = 0; in vfio_enable_vectors()
432 irq_set->count = vdev->nr_vectors; in vfio_enable_vectors()
433 fds = (int32_t *)&irq_set->data; in vfio_enable_vectors()
435 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_enable_vectors()
436 int fd = -1; in vfio_enable_vectors()
439 * MSI vs MSI-X - The guest has direct access to MSI mask and pending in vfio_enable_vectors()
441 * MSI-X mask and pending bits are emulated, so we want to use the in vfio_enable_vectors()
444 if (vdev->msi_vectors[i].use) { in vfio_enable_vectors()
445 if (vdev->msi_vectors[i].virq < 0 || in vfio_enable_vectors()
446 (msix && msix_is_masked(&vdev->pdev, i))) { in vfio_enable_vectors()
447 fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); in vfio_enable_vectors()
449 fd = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt); in vfio_enable_vectors()
456 ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); in vfio_enable_vectors()
466 if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { in vfio_add_kvm_msi_virq()
470 vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change, in vfio_add_kvm_msi_virq()
471 vector_n, &vdev->pdev); in vfio_add_kvm_msi_virq()
476 if (vector->virq < 0) { in vfio_connect_kvm_msi_virq()
480 if (event_notifier_init(&vector->kvm_interrupt, 0)) { in vfio_connect_kvm_msi_virq()
484 if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, in vfio_connect_kvm_msi_virq()
485 NULL, vector->virq) < 0) { in vfio_connect_kvm_msi_virq()
492 event_notifier_cleanup(&vector->kvm_interrupt); in vfio_connect_kvm_msi_virq()
494 kvm_irqchip_release_virq(kvm_state, vector->virq); in vfio_connect_kvm_msi_virq()
495 vector->virq = -1; in vfio_connect_kvm_msi_virq()
500 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, in vfio_remove_kvm_msi_virq()
501 vector->virq); in vfio_remove_kvm_msi_virq()
502 kvm_irqchip_release_virq(kvm_state, vector->virq); in vfio_remove_kvm_msi_virq()
503 vector->virq = -1; in vfio_remove_kvm_msi_virq()
504 event_notifier_cleanup(&vector->kvm_interrupt); in vfio_remove_kvm_msi_virq()
510 kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); in vfio_update_kvm_msi_virq()
520 if (vector->virq >= 0) { in set_irq_signalling()
521 fd = event_notifier_get_fd(&vector->kvm_interrupt); in set_irq_signalling()
523 fd = event_notifier_get_fd(&vector->interrupt); in set_irq_signalling()
529 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); in set_irq_signalling()
539 bool resizing = !!(vdev->nr_vectors < nr + 1); in vfio_msix_vector_do_use()
541 trace_vfio_msix_vector_do_use(vdev->vbasedev.name, nr); in vfio_msix_vector_do_use()
543 vector = &vdev->msi_vectors[nr]; in vfio_msix_vector_do_use()
545 if (!vector->use) { in vfio_msix_vector_do_use()
546 vector->vdev = vdev; in vfio_msix_vector_do_use()
547 vector->virq = -1; in vfio_msix_vector_do_use()
548 if (event_notifier_init(&vector->interrupt, 0)) { in vfio_msix_vector_do_use()
551 vector->use = true; in vfio_msix_vector_do_use()
555 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msix_vector_do_use()
562 if (vector->virq >= 0) { in vfio_msix_vector_do_use()
570 if (vdev->defer_kvm_irq_routing) { in vfio_msix_vector_do_use()
583 * host allocate all possible MSI vectors for a device if they're not in vfio_msix_vector_do_use()
587 * When dynamic allocation is supported, let the host only allocate in vfio_msix_vector_do_use()
593 vdev->nr_vectors = nr + 1; in vfio_msix_vector_do_use()
596 if (!vdev->defer_kvm_irq_routing) { in vfio_msix_vector_do_use()
597 if (vdev->msix->noresize && resizing) { in vfio_msix_vector_do_use()
598 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); in vfio_msix_vector_do_use()
602 strerror(-ret)); in vfio_msix_vector_do_use()
605 set_irq_signalling(&vdev->vbasedev, vector, nr); in vfio_msix_vector_do_use()
610 clear_bit(nr, vdev->msix->pending); in vfio_msix_vector_do_use()
611 if (find_first_bit(vdev->msix->pending, in vfio_msix_vector_do_use()
612 vdev->nr_vectors) == vdev->nr_vectors) { in vfio_msix_vector_do_use()
613 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); in vfio_msix_vector_do_use()
614 trace_vfio_msix_pba_disable(vdev->vbasedev.name); in vfio_msix_vector_do_use()
629 VFIOMSIVector *vector = &vdev->msi_vectors[nr]; in vfio_msix_vector_release()
631 trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); in vfio_msix_vector_release()
636 * the KVM setup in place, simply switch VFIO to use the non-bypass in vfio_msix_vector_release()
637 * eventfd. We'll then fire the interrupt through QEMU and the MSI-X in vfio_msix_vector_release()
639 * be re-asserted on unmask. Nothing to do if already using QEMU mode. in vfio_msix_vector_release()
641 if (vector->virq >= 0) { in vfio_msix_vector_release()
642 int32_t fd = event_notifier_get_fd(&vector->interrupt); in vfio_msix_vector_release()
645 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, in vfio_msix_vector_release()
648 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msix_vector_release()
655 assert(!vdev->defer_kvm_irq_routing); in vfio_prepare_kvm_msi_virq_batch()
656 vdev->defer_kvm_irq_routing = true; in vfio_prepare_kvm_msi_virq_batch()
664 assert(vdev->defer_kvm_irq_routing); in vfio_commit_kvm_msi_virq_batch()
665 vdev->defer_kvm_irq_routing = false; in vfio_commit_kvm_msi_virq_batch()
669 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_commit_kvm_msi_virq_batch()
670 vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]); in vfio_commit_kvm_msi_virq_batch()
680 vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); in vfio_msix_enable()
682 vdev->interrupt = VFIO_INT_MSIX; in vfio_msix_enable()
685 * Setting vector notifiers triggers synchronous vector-use in vfio_msix_enable()
692 if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, in vfio_msix_enable()
699 if (vdev->nr_vectors) { in vfio_msix_enable()
703 strerror(-ret)); in vfio_msix_enable()
708 * physical state of the device and expect that enabling MSI-X from the in vfio_msix_enable()
709 * guest enables the same on the host. When our guest is Linux, the in vfio_msix_enable()
711 * MSI-X capability, but leaves the vector table masked. We therefore in vfio_msix_enable()
713 * to switch the physical device into MSI-X mode because that may come a in vfio_msix_enable()
715 * invalid fd to make the physical device MSI-X enabled, but with no in vfio_msix_enable()
720 error_report("vfio: failed to enable MSI-X, %s", in vfio_msix_enable()
721 strerror(-ret)); in vfio_msix_enable()
725 trace_vfio_msix_enable(vdev->vbasedev.name); in vfio_msix_enable()
734 vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); in vfio_msi_enable()
743 vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); in vfio_msi_enable()
745 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msi_enable()
746 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_msi_enable()
748 vector->vdev = vdev; in vfio_msi_enable()
749 vector->virq = -1; in vfio_msi_enable()
750 vector->use = true; in vfio_msi_enable()
752 if (event_notifier_init(&vector->interrupt, 0)) { in vfio_msi_enable()
756 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msi_enable()
769 vdev->interrupt = VFIO_INT_MSI; in vfio_msi_enable()
775 strerror(-ret)); in vfio_msi_enable()
778 "MSI vectors, retry with %d", vdev->nr_vectors, ret); in vfio_msi_enable()
784 vdev->nr_vectors = ret; in vfio_msi_enable()
798 trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors); in vfio_msi_enable()
805 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msi_disable_common()
806 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_msi_disable_common()
807 if (vdev->msi_vectors[i].use) { in vfio_msi_disable_common()
808 if (vector->virq >= 0) { in vfio_msi_disable_common()
811 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msi_disable_common()
813 event_notifier_cleanup(&vector->interrupt); in vfio_msi_disable_common()
817 g_free(vdev->msi_vectors); in vfio_msi_disable_common()
818 vdev->msi_vectors = NULL; in vfio_msi_disable_common()
819 vdev->nr_vectors = 0; in vfio_msi_disable_common()
820 vdev->interrupt = VFIO_INT_NONE; in vfio_msi_disable_common()
828 msix_unset_vector_notifiers(&vdev->pdev); in vfio_msix_disable()
831 * MSI-X will only release vectors if MSI-X is still enabled on the in vfio_msix_disable()
834 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msix_disable()
835 if (vdev->msi_vectors[i].use) { in vfio_msix_disable()
836 vfio_msix_vector_release(&vdev->pdev, i); in vfio_msix_disable()
837 msix_vector_unuse(&vdev->pdev, i); in vfio_msix_disable()
842 * Always clear MSI-X IRQ index. A PF device could have enabled in vfio_msix_disable()
843 * MSI-X with no vectors. See vfio_msix_enable(). in vfio_msix_disable()
845 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); in vfio_msix_disable()
849 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msix_disable()
852 memset(vdev->msix->pending, 0, in vfio_msix_disable()
853 BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); in vfio_msix_disable()
855 trace_vfio_msix_disable(vdev->vbasedev.name); in vfio_msix_disable()
862 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); in vfio_msi_disable()
866 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msi_disable()
869 trace_vfio_msi_disable(vdev->vbasedev.name); in vfio_msi_disable()
876 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_update_msi()
877 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_update_msi()
880 if (!vector->use || vector->virq < 0) { in vfio_update_msi()
884 msg = msi_get_message(&vdev->pdev, i); in vfio_update_msi()
885 vfio_update_kvm_msi_virq(vector, msg, &vdev->pdev); in vfio_update_msi()
891 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_load_rom()
902 error_report("vfio: Error getting ROM info: %s", strerror(-ret)); in vfio_pci_load_rom()
906 trace_vfio_pci_load_rom(vbasedev->name, (unsigned long)reg_info->size, in vfio_pci_load_rom()
907 (unsigned long)reg_info->offset, in vfio_pci_load_rom()
908 (unsigned long)reg_info->flags); in vfio_pci_load_rom()
910 vdev->rom_size = size = reg_info->size; in vfio_pci_load_rom()
911 vdev->rom_offset = reg_info->offset; in vfio_pci_load_rom()
913 if (!vdev->rom_size) { in vfio_pci_load_rom()
914 vdev->rom_read_failed = true; in vfio_pci_load_rom()
915 error_report("vfio-pci: Cannot read device rom at %s", vbasedev->name); in vfio_pci_load_rom()
922 vdev->rom = g_malloc(size); in vfio_pci_load_rom()
923 memset(vdev->rom, 0xff, size); in vfio_pci_load_rom()
926 bytes = vbasedev->io_ops->region_read(vbasedev, in vfio_pci_load_rom()
928 off, size, vdev->rom + off); in vfio_pci_load_rom()
934 size -= bytes; in vfio_pci_load_rom()
936 if (bytes == -EINTR || bytes == -EAGAIN) { in vfio_pci_load_rom()
952 if (pci_get_word(vdev->rom) == 0xaa55 && in vfio_pci_load_rom()
953 pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && in vfio_pci_load_rom()
954 !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { in vfio_pci_load_rom()
957 vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); in vfio_pci_load_rom()
958 did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); in vfio_pci_load_rom()
960 if (vid == vdev->vendor_id && did != vdev->device_id) { in vfio_pci_load_rom()
962 uint8_t csum, *data = vdev->rom; in vfio_pci_load_rom()
964 pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, in vfio_pci_load_rom()
965 vdev->device_id); in vfio_pci_load_rom()
968 for (csum = 0, i = 0; i < vdev->rom_size; i++) { in vfio_pci_load_rom()
972 data[6] = -csum; in vfio_pci_load_rom()
977 /* "Raw" read of underlying config space. */
981 return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev, in vfio_pci_config_space_read()
986 /* "Raw" write of underlying config space. */
990 return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, in vfio_pci_config_space_write()
1007 if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { in vfio_rom_read()
1011 memcpy(&val, vdev->rom + addr, in vfio_rom_read()
1012 (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); in vfio_rom_read()
1029 trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data); in vfio_rom_read()
1047 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_size_rom()
1051 if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { in vfio_pci_size_rom()
1053 if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) { in vfio_pci_size_rom()
1056 vdev->vbasedev.name); in vfio_pci_size_rom()
1071 error_report("%s(%s) ROM access failed", __func__, vbasedev->name); in vfio_pci_size_rom()
1082 if (vdev->pdev.rom_bar > 0) { in vfio_pci_size_rom()
1085 vdev->vbasedev.name); in vfio_pci_size_rom()
1091 vdev->vbasedev.name); in vfio_pci_size_rom()
1097 trace_vfio_pci_size_rom(vdev->vbasedev.name, size); in vfio_pci_size_rom()
1099 name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); in vfio_pci_size_rom()
1101 memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), in vfio_pci_size_rom()
1105 pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, in vfio_pci_size_rom()
1106 PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); in vfio_pci_size_rom()
1108 vdev->rom_read_failed = false; in vfio_pci_size_rom()
1115 VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); in vfio_vga_write()
1122 off_t offset = vga->fd_offset + region->offset + addr; in vfio_vga_write()
1139 if (pwrite(vga->fd, &buf, size, offset) != size) { in vfio_vga_write()
1141 __func__, region->offset + addr, data, size); in vfio_vga_write()
1144 trace_vfio_vga_write(region->offset + addr, data, size); in vfio_vga_write()
1150 VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); in vfio_vga_read()
1158 off_t offset = vga->fd_offset + region->offset + addr; in vfio_vga_read()
1160 if (pread(vga->fd, &buf, size, offset) != size) { in vfio_vga_read()
1162 __func__, region->offset + addr, size); in vfio_vga_read()
1163 return (uint64_t)-1; in vfio_vga_read()
1181 trace_vfio_vga_read(region->offset + addr, size, data); in vfio_vga_read()
1193 * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
1194 * size if the BAR is in an exclusive page in host so that we could map
1195 * this BAR to guest. But this sub-page BAR may not occupy an exclusive
1198 * with the sub-page BAR in guest. Besides, we should also recover the
1199 * size of this sub-page BAR when its base address is changed in guest
1205 VFIORegion *region = &vdev->bars[bar].region; in vfio_sub_page_bar_update_mapping()
1209 uint64_t size = region->size; in vfio_sub_page_bar_update_mapping()
1212 if (region->nr_mmaps != 1 || !region->mmaps[0].mmap || in vfio_sub_page_bar_update_mapping()
1213 region->mmaps[0].size != region->size) { in vfio_sub_page_bar_update_mapping()
1217 r = &pdev->io_regions[bar]; in vfio_sub_page_bar_update_mapping()
1218 bar_addr = r->addr; in vfio_sub_page_bar_update_mapping()
1219 base_mr = vdev->bars[bar].mr; in vfio_sub_page_bar_update_mapping()
1220 region_mr = region->mem; in vfio_sub_page_bar_update_mapping()
1221 mmap_mr = &region->mmaps[0].mem; in vfio_sub_page_bar_update_mapping()
1231 if (vdev->bars[bar].size < size) { in vfio_sub_page_bar_update_mapping()
1236 if (size != vdev->bars[bar].size && memory_region_is_mapped(base_mr)) { in vfio_sub_page_bar_update_mapping()
1237 memory_region_del_subregion(r->address_space, base_mr); in vfio_sub_page_bar_update_mapping()
1238 memory_region_add_subregion_overlap(r->address_space, in vfio_sub_page_bar_update_mapping()
1246 * PCI config space
1251 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_read_config()
1254 memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); in vfio_pci_read_config()
1261 if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { in vfio_pci_read_config()
1267 __func__, vbasedev->name, addr, len, in vfio_pci_read_config()
1269 return -1; in vfio_pci_read_config()
1276 trace_vfio_pci_read_config(vdev->vbasedev.name, addr, len, val); in vfio_pci_read_config()
1285 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_write_config()
1289 trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); in vfio_pci_write_config()
1295 __func__, vbasedev->name, addr, val, len, in vfio_pci_write_config()
1299 /* MSI/MSI-X Enabling/Disabling */ in vfio_pci_write_config()
1300 if (pdev->cap_present & QEMU_PCI_CAP_MSI && in vfio_pci_write_config()
1301 ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) { in vfio_pci_write_config()
1319 } else if (pdev->cap_present & QEMU_PCI_CAP_MSIX && in vfio_pci_write_config()
1320 ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) { in vfio_pci_write_config()
1334 pcibus_t old_addr[PCI_NUM_REGIONS - 1]; in vfio_pci_write_config()
1338 old_addr[bar] = pdev->io_regions[bar].addr; in vfio_pci_write_config()
1344 if (old_addr[bar] != pdev->io_regions[bar].addr && in vfio_pci_write_config()
1345 vdev->bars[bar].region.size > 0 && in vfio_pci_write_config()
1346 vdev->bars[bar].region.size < qemu_real_host_page_size()) { in vfio_pci_write_config()
1366 if (vdev->interrupt == VFIO_INT_MSIX) { in vfio_disable_interrupts()
1368 } else if (vdev->interrupt == VFIO_INT_MSI) { in vfio_disable_interrupts()
1372 if (vdev->interrupt == VFIO_INT_INTx) { in vfio_disable_interrupts()
1397 trace_vfio_msi_setup(vdev->vbasedev.name, pos); in vfio_msi_setup()
1399 ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err); in vfio_msi_setup()
1401 if (ret == -ENOTSUP) { in vfio_msi_setup()
1407 vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); in vfio_msi_setup()
1415 VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; in vfio_pci_fixup_msix_region()
1418 * If the host driver allows mapping of a MSIX data, we are going to in vfio_pci_fixup_msix_region()
1421 if (vfio_device_has_region_cap(&vdev->vbasedev, region->nr, in vfio_pci_fixup_msix_region()
1430 if (region->nr_mmaps != 1 || region->mmaps[0].offset || in vfio_pci_fixup_msix_region()
1431 region->size != region->mmaps[0].size) { in vfio_pci_fixup_msix_region()
1435 /* MSI-X table start and end aligned to host page size */ in vfio_pci_fixup_msix_region()
1436 start = vdev->msix->table_offset & qemu_real_host_page_mask(); in vfio_pci_fixup_msix_region()
1437 end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + in vfio_pci_fixup_msix_region()
1438 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); in vfio_pci_fixup_msix_region()
1441 * Does the MSI-X table cover the beginning of the BAR? The whole BAR? in vfio_pci_fixup_msix_region()
1442 * NB - Host page size is necessarily a power of two and so is the PCI in vfio_pci_fixup_msix_region()
1443 * BAR (not counting EA yet), therefore if we have host page aligned in vfio_pci_fixup_msix_region()
1445 * must be at least host page sized and therefore mmap'able. in vfio_pci_fixup_msix_region()
1448 if (end >= region->size) { in vfio_pci_fixup_msix_region()
1449 region->nr_mmaps = 0; in vfio_pci_fixup_msix_region()
1450 g_free(region->mmaps); in vfio_pci_fixup_msix_region()
1451 region->mmaps = NULL; in vfio_pci_fixup_msix_region()
1452 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1453 vdev->msix->table_bar, 0, 0); in vfio_pci_fixup_msix_region()
1455 region->mmaps[0].offset = end; in vfio_pci_fixup_msix_region()
1456 region->mmaps[0].size = region->size - end; in vfio_pci_fixup_msix_region()
1457 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1458 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1459 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1463 } else if (end >= region->size) { in vfio_pci_fixup_msix_region()
1464 region->mmaps[0].size = start; in vfio_pci_fixup_msix_region()
1465 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1466 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1467 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1471 region->nr_mmaps = 2; in vfio_pci_fixup_msix_region()
1472 region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); in vfio_pci_fixup_msix_region()
1474 memcpy(&region->mmaps[1], &region->mmaps[0], sizeof(VFIOMmap)); in vfio_pci_fixup_msix_region()
1476 region->mmaps[0].size = start; in vfio_pci_fixup_msix_region()
1477 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1478 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1479 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1481 region->mmaps[1].offset = end; in vfio_pci_fixup_msix_region()
1482 region->mmaps[1].size = region->size - end; in vfio_pci_fixup_msix_region()
1483 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1484 vdev->msix->table_bar, region->mmaps[1].offset, in vfio_pci_fixup_msix_region()
1485 region->mmaps[1].offset + region->mmaps[1].size); in vfio_pci_fixup_msix_region()
1491 int target_bar = -1; in vfio_pci_relocate_msix()
1494 if (!vdev->msix || vdev->msix_relo == OFF_AUTO_PCIBAR_OFF) { in vfio_pci_relocate_msix()
1498 /* The actual minimum size of MSI-X structures */ in vfio_pci_relocate_msix()
1499 msix_sz = (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE) + in vfio_pci_relocate_msix()
1500 (QEMU_ALIGN_UP(vdev->msix->entries, 64) / 8); in vfio_pci_relocate_msix()
1501 /* Round up to host pages, we don't want to share a page */ in vfio_pci_relocate_msix()
1506 if (vdev->msix_relo == OFF_AUTO_PCIBAR_AUTO) { in vfio_pci_relocate_msix()
1516 error_setg(errp, "No automatic MSI-X relocation available for " in vfio_pci_relocate_msix()
1517 "device %04x:%04x", vdev->vendor_id, vdev->device_id); in vfio_pci_relocate_msix()
1521 target_bar = (int)(vdev->msix_relo - OFF_AUTO_PCIBAR_BAR0); in vfio_pci_relocate_msix()
1524 /* I/O port BARs cannot host MSI-X structures */ in vfio_pci_relocate_msix()
1525 if (vdev->bars[target_bar].ioport) { in vfio_pci_relocate_msix()
1526 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1531 /* Cannot use a BAR in the "shadow" of a 64-bit BAR */ in vfio_pci_relocate_msix()
1532 if (!vdev->bars[target_bar].size && in vfio_pci_relocate_msix()
1533 target_bar > 0 && vdev->bars[target_bar - 1].mem64) { in vfio_pci_relocate_msix()
1534 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1535 "consumed by 64-bit BAR %d", target_bar, target_bar - 1); in vfio_pci_relocate_msix()
1539 /* 2GB max size for 32-bit BARs, cannot double if already > 1G */ in vfio_pci_relocate_msix()
1540 if (vdev->bars[target_bar].size > 1 * GiB && in vfio_pci_relocate_msix()
1541 !vdev->bars[target_bar].mem64) { in vfio_pci_relocate_msix()
1542 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1543 "no space to extend 32-bit BAR", target_bar); in vfio_pci_relocate_msix()
1549 * prefetchable since QEMU MSI-X emulation has no read side effects in vfio_pci_relocate_msix()
1552 if (!vdev->bars[target_bar].size) { in vfio_pci_relocate_msix()
1553 if (target_bar < (PCI_ROM_SLOT - 1) && in vfio_pci_relocate_msix()
1554 !vdev->bars[target_bar + 1].size) { in vfio_pci_relocate_msix()
1555 vdev->bars[target_bar].mem64 = true; in vfio_pci_relocate_msix()
1556 vdev->bars[target_bar].type = PCI_BASE_ADDRESS_MEM_TYPE_64; in vfio_pci_relocate_msix()
1558 vdev->bars[target_bar].type |= PCI_BASE_ADDRESS_MEM_PREFETCH; in vfio_pci_relocate_msix()
1559 vdev->bars[target_bar].size = msix_sz; in vfio_pci_relocate_msix()
1560 vdev->msix->table_offset = 0; in vfio_pci_relocate_msix()
1562 vdev->bars[target_bar].size = MAX(vdev->bars[target_bar].size * 2, in vfio_pci_relocate_msix()
1565 * Due to above size calc, MSI-X always starts halfway into the BAR, in vfio_pci_relocate_msix()
1566 * which will always be a separate host page. in vfio_pci_relocate_msix()
1568 vdev->msix->table_offset = vdev->bars[target_bar].size / 2; in vfio_pci_relocate_msix()
1571 vdev->msix->table_bar = target_bar; in vfio_pci_relocate_msix()
1572 vdev->msix->pba_bar = target_bar; in vfio_pci_relocate_msix()
1573 /* Requires 8-byte alignment, but PCI_MSIX_ENTRY_SIZE guarantees that */ in vfio_pci_relocate_msix()
1574 vdev->msix->pba_offset = vdev->msix->table_offset + in vfio_pci_relocate_msix()
1575 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE); in vfio_pci_relocate_msix()
1577 trace_vfio_msix_relo(vdev->vbasedev.name, in vfio_pci_relocate_msix()
1578 vdev->msix->table_bar, vdev->msix->table_offset); in vfio_pci_relocate_msix()
1584 * capabilities into the chain. In order to setup MSI-X we need a
1586 * attempt to mmap the MSI-X table area, which VFIO won't allow, we
1587 * need to first look for where the MSI-X table lives. So we
1588 * unfortunately split MSI-X setup across two functions.
1599 pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); in vfio_msix_early_setup()
1632 msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1633 msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1634 msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1635 msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1636 msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; in vfio_msix_early_setup()
1638 ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, in vfio_msix_early_setup()
1641 error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); in vfio_msix_early_setup()
1646 msix->noresize = !!(irq_info.flags & VFIO_IRQ_INFO_NORESIZE); in vfio_msix_early_setup()
1653 if (msix->pba_offset >= vdev->bars[msix->pba_bar].region.size) { in vfio_msix_early_setup()
1660 if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO && in vfio_msix_early_setup()
1661 (vdev->device_id & 0xff00) == 0x5800) { in vfio_msix_early_setup()
1662 msix->pba_offset = 0x1000; in vfio_msix_early_setup()
1670 msix->pba_offset = 0xb400; in vfio_msix_early_setup()
1671 } else if (vdev->msix_relo == OFF_AUTO_PCIBAR_OFF) { in vfio_msix_early_setup()
1679 trace_vfio_msix_early_setup(vdev->vbasedev.name, pos, msix->table_bar, in vfio_msix_early_setup()
1680 msix->table_offset, msix->entries, in vfio_msix_early_setup()
1681 msix->noresize); in vfio_msix_early_setup()
1682 vdev->msix = msix; in vfio_msix_early_setup()
1694 vdev->msix->pending = g_new0(unsigned long, in vfio_msix_setup()
1695 BITS_TO_LONGS(vdev->msix->entries)); in vfio_msix_setup()
1696 ret = msix_init(&vdev->pdev, vdev->msix->entries, in vfio_msix_setup()
1697 vdev->bars[vdev->msix->table_bar].mr, in vfio_msix_setup()
1698 vdev->msix->table_bar, vdev->msix->table_offset, in vfio_msix_setup()
1699 vdev->bars[vdev->msix->pba_bar].mr, in vfio_msix_setup()
1700 vdev->msix->pba_bar, vdev->msix->pba_offset, pos, in vfio_msix_setup()
1703 if (ret == -ENOTSUP) { in vfio_msix_setup()
1714 * MSI-X structures and avoid overlapping non-MSI-X related registers. in vfio_msix_setup()
1715 * For an assigned device, this hopefully means that emulation of MSI-X in vfio_msix_setup()
1725 * vector-use notifier is called, which occurs on unmask, we test whether in vfio_msix_setup()
1728 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); in vfio_msix_setup()
1735 * cannot be mapped because of a host page size bigger than the MSIX table in vfio_msix_setup()
1739 "vfio-no-msix-emulation", NULL)) { in vfio_msix_setup()
1740 memory_region_set_enabled(&vdev->pdev.msix_table_mmio, false); in vfio_msix_setup()
1748 msi_uninit(&vdev->pdev); in vfio_teardown_msi()
1750 if (vdev->msix) { in vfio_teardown_msi()
1751 msix_uninit(&vdev->pdev, in vfio_teardown_msi()
1752 vdev->bars[vdev->msix->table_bar].mr, in vfio_teardown_msi()
1753 vdev->bars[vdev->msix->pba_bar].mr); in vfio_teardown_msi()
1754 g_free(vdev->msix->pending); in vfio_teardown_msi()
1766 vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); in vfio_mmap_set_enabled()
1772 VFIOBAR *bar = &vdev->bars[nr]; in vfio_bar_prepare()
1778 if (!bar->region.size) { in vfio_bar_prepare()
1791 bar->ioport = (pci_bar & PCI_BASE_ADDRESS_SPACE_IO); in vfio_bar_prepare()
1792 bar->mem64 = bar->ioport ? 0 : (pci_bar & PCI_BASE_ADDRESS_MEM_TYPE_64); in vfio_bar_prepare()
1793 bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : in vfio_bar_prepare()
1795 bar->size = bar->region.size; in vfio_bar_prepare()
1809 VFIOBAR *bar = &vdev->bars[nr]; in vfio_bar_register()
1812 if (!bar->size) { in vfio_bar_register()
1816 bar->mr = g_new0(MemoryRegion, 1); in vfio_bar_register()
1817 name = g_strdup_printf("%s base BAR %d", vdev->vbasedev.name, nr); in vfio_bar_register()
1818 memory_region_init_io(bar->mr, OBJECT(vdev), NULL, NULL, name, bar->size); in vfio_bar_register()
1821 if (bar->region.size) { in vfio_bar_register()
1822 memory_region_add_subregion(bar->mr, 0, bar->region.mem); in vfio_bar_register()
1824 if (vfio_region_mmap(&bar->region)) { in vfio_bar_register()
1826 vdev->vbasedev.name, nr); in vfio_bar_register()
1830 pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr); in vfio_bar_register()
1847 VFIOBAR *bar = &vdev->bars[i]; in vfio_bars_exit()
1850 vfio_region_exit(&bar->region); in vfio_bars_exit()
1851 if (bar->region.size) { in vfio_bars_exit()
1852 memory_region_del_subregion(bar->mr, bar->region.mem); in vfio_bars_exit()
1856 if (vdev->vga) { in vfio_bars_exit()
1857 pci_unregister_vga(&vdev->pdev); in vfio_bars_exit()
1867 VFIOBAR *bar = &vdev->bars[i]; in vfio_bars_finalize()
1870 vfio_region_finalize(&bar->region); in vfio_bars_finalize()
1871 if (bar->mr) { in vfio_bars_finalize()
1872 assert(bar->size); in vfio_bars_finalize()
1873 object_unparent(OBJECT(bar->mr)); in vfio_bars_finalize()
1874 g_free(bar->mr); in vfio_bars_finalize()
1875 bar->mr = NULL; in vfio_bars_finalize()
1879 if (vdev->vga) { in vfio_bars_finalize()
1881 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { in vfio_bars_finalize()
1882 object_unparent(OBJECT(&vdev->vga->region[i].mem)); in vfio_bars_finalize()
1884 g_free(vdev->vga); in vfio_bars_finalize()
1896 for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp; in vfio_std_cap_max_size()
1897 tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]) { in vfio_std_cap_max_size()
1903 return next - pos; in vfio_std_cap_max_size()
1907 static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos) in vfio_ext_cap_max_size() argument
1912 tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) { in vfio_ext_cap_max_size()
1918 return next - pos; in vfio_ext_cap_max_size()
1929 vfio_set_word_bits(vdev->pdev.config + pos, val, mask); in vfio_add_emulated_word()
1930 vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask); in vfio_add_emulated_word()
1931 vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask); in vfio_add_emulated_word()
1942 vfio_set_long_bits(vdev->pdev.config + pos, val, mask); in vfio_add_emulated_long()
1943 vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask); in vfio_add_emulated_long()
1944 vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); in vfio_add_emulated_long()
1951 PCIBus *bus = pci_get_bus(&vdev->pdev); in vfio_pci_enable_rp_atomics()
1952 PCIDevice *parent = bus->parent_dev; in vfio_pci_enable_rp_atomics()
1964 if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap || in vfio_pci_enable_rp_atomics()
1967 vdev->pdev.devfn || in vfio_pci_enable_rp_atomics()
1968 vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { in vfio_pci_enable_rp_atomics()
1972 pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; in vfio_pci_enable_rp_atomics()
1981 info = vfio_get_device_info(vdev->vbasedev.fd); in vfio_pci_enable_rp_atomics()
1992 if (cap->flags & VFIO_PCI_ATOMIC_COMP32) { in vfio_pci_enable_rp_atomics()
1995 if (cap->flags & VFIO_PCI_ATOMIC_COMP64) { in vfio_pci_enable_rp_atomics()
1998 if (cap->flags & VFIO_PCI_ATOMIC_COMP128) { in vfio_pci_enable_rp_atomics()
2007 vdev->clear_parent_atomics_on_exit = true; in vfio_pci_enable_rp_atomics()
2012 if (vdev->clear_parent_atomics_on_exit) { in vfio_pci_disable_rp_atomics()
2013 PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev; in vfio_pci_disable_rp_atomics()
2014 uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; in vfio_pci_disable_rp_atomics()
2028 flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS); in vfio_setup_pcie_cap()
2040 if (!pci_bus_is_express(pci_get_bus(&vdev->pdev))) { in vfio_setup_pcie_cap()
2041 PCIBus *bus = pci_get_bus(&vdev->pdev); in vfio_setup_pcie_cap()
2046 * as-is on non-express buses. The reason being that some drivers in vfio_setup_pcie_cap()
2057 * valid transitions between bus types. An express device on a non- in vfio_setup_pcie_cap()
2073 } else if (pci_bus_is_root(pci_get_bus(&vdev->pdev))) { in vfio_setup_pcie_cap()
2130 * Intel 82599 SR-IOV VFs report an invalid PCIe capability version 0 in vfio_setup_pcie_cap()
2134 * config space. in vfio_setup_pcie_cap()
2141 pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size, in vfio_setup_pcie_cap()
2147 vdev->pdev.exp.exp_cap = pos; in vfio_setup_pcie_cap()
2154 uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); in vfio_check_pcie_flr()
2157 trace_vfio_check_pcie_flr(vdev->vbasedev.name); in vfio_check_pcie_flr()
2158 vdev->has_flr = true; in vfio_check_pcie_flr()
2164 uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); in vfio_check_pm_reset()
2167 trace_vfio_check_pm_reset(vdev->vbasedev.name); in vfio_check_pm_reset()
2168 vdev->has_pm_reset = true; in vfio_check_pm_reset()
2174 uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); in vfio_check_af_flr()
2177 trace_vfio_check_af_flr(vdev->vbasedev.name); in vfio_check_af_flr()
2178 vdev->has_flr = true; in vfio_check_af_flr()
2185 PCIDevice *pdev = &vdev->pdev; in vfio_add_vendor_specific_cap()
2193 * Exempt config space check for Vendor Specific Information during in vfio_add_vendor_specific_cap()
2195 * Config space check is still enforced for 3 byte VSC header. in vfio_add_vendor_specific_cap()
2197 if (vdev->skip_vsc_check && size > 3) { in vfio_add_vendor_specific_cap()
2198 memset(pdev->cmask + pos + 3, 0, size - 3); in vfio_add_vendor_specific_cap()
2207 PCIDevice *pdev = &vdev->pdev; in vfio_add_std_cap()
2211 cap_id = pdev->config[pos]; in vfio_add_std_cap()
2212 next = pdev->config[pos + PCI_CAP_LIST_NEXT]; in vfio_add_std_cap()
2217 * Since QEMU doesn't actually handle many of the config accesses, in vfio_add_std_cap()
2226 * This is also why we pre-calculate size above as cached config space in vfio_add_std_cap()
2235 pdev->config[PCI_CAPABILITY_LIST] = 0; in vfio_add_std_cap()
2236 vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff; in vfio_add_std_cap()
2237 vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST; in vfio_add_std_cap()
2248 pci_set_byte(vdev->emulated_config_bits + pos + PCI_CAP_LIST_NEXT, 0xff); in vfio_add_std_cap()
2265 * PCI-core config space emulation needs write access to the power in vfio_add_std_cap()
2268 pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK); in vfio_add_std_cap()
2296 ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); in vfio_setup_rebar_ecap()
2303 ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); in vfio_setup_rebar_ecap()
2315 * might need an opt-in or reservation scheme in the kernel. in vfio_setup_rebar_ecap()
2318 return -EINVAL; in vfio_setup_rebar_ecap()
2341 PCIDevice *pdev = &vdev->pdev; in vfio_add_ext_cap()
2345 uint8_t *config; in vfio_add_ext_cap() local
2349 !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) { in vfio_add_ext_cap()
2356 * physical device, we cache the config space to avoid overwriting in vfio_add_ext_cap()
2357 * the original config space when we parse the extended capabilities. in vfio_add_ext_cap()
2359 config = g_memdup(pdev->config, vdev->config_size); in vfio_add_ext_cap()
2375 * capability ID, version, AND next pointer. A non-zero next pointer in vfio_add_ext_cap()
2385 pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE, in vfio_add_ext_cap()
2387 pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0); in vfio_add_ext_cap()
2388 pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0); in vfio_add_ext_cap()
2391 next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) { in vfio_add_ext_cap()
2392 header = pci_get_long(config + next); in vfio_add_ext_cap()
2399 * recognize. Since QEMU doesn't actually handle many of the config in vfio_add_ext_cap()
2402 size = vfio_ext_cap_max_size(config, next); in vfio_add_ext_cap()
2405 pci_long_test_and_set_mask(vdev->emulated_config_bits + next, in vfio_add_ext_cap()
2410 case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ in vfio_add_ext_cap()
2412 trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); in vfio_add_ext_cap()
2426 if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { in vfio_add_ext_cap()
2427 pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); in vfio_add_ext_cap()
2430 g_free(config); in vfio_add_ext_cap()
2435 PCIDevice *pdev = &vdev->pdev; in vfio_add_capabilities()
2437 if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) || in vfio_add_capabilities()
2438 !pdev->config[PCI_CAPABILITY_LIST]) { in vfio_add_capabilities()
2442 if (!vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST], errp)) { in vfio_add_capabilities()
2452 PCIDevice *pdev = &vdev->pdev; in vfio_pci_pre_reset()
2467 if (pdev->pm_cap) { in vfio_pci_pre_reset()
2471 pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); in vfio_pci_pre_reset()
2475 vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); in vfio_pci_pre_reset()
2477 pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); in vfio_pci_pre_reset()
2489 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_post_reset()
2494 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_pci_post_reset()
2497 for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { in vfio_pci_post_reset()
2505 vbasedev->name, nr, strwriteerror(ret)); in vfio_pci_post_reset()
2516 sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, in vfio_pci_host_match()
2517 addr->bus, addr->slot, addr->function); in vfio_pci_host_match()
2531 info->argsz = sizeof(*info); in vfio_pci_get_pci_hot_reset_info()
2533 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); in vfio_pci_get_pci_hot_reset_info()
2535 ret = -errno; in vfio_pci_get_pci_hot_reset_info()
2537 if (!vdev->has_pm_reset) { in vfio_pci_get_pci_hot_reset_info()
2539 "no available reset mechanism.", vdev->vbasedev.name); in vfio_pci_get_pci_hot_reset_info()
2544 count = info->count; in vfio_pci_get_pci_hot_reset_info()
2545 info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); in vfio_pci_get_pci_hot_reset_info()
2546 info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); in vfio_pci_get_pci_hot_reset_info()
2548 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); in vfio_pci_get_pci_hot_reset_info()
2550 ret = -errno; in vfio_pci_get_pci_hot_reset_info()
2562 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_hot_reset()
2563 const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer); in vfio_pci_hot_reset()
2565 return vioc->pci_hot_reset(vbasedev, single); in vfio_pci_hot_reset()
2569 * We want to differentiate hot reset of multiple in-use devices vs hot reset
2570 * of a single in-use device. VFIO_DEVICE_RESET will already handle the case
2571 * of doing hot resets when there is only a single device per bus. The in-use
2573 * multiple devices, but only a single in-use device, means that we can call
2574 * it from our bus ->reset() callback since the extent is effectively a single
2576 * are multiple in-use devices, we can only trigger the hot reset during a
2579 * path where both our reset handler and ->reset() callback are used. Calling
2580 * _one() will only do a hot reset for the one in-use devices case, calling
2597 if (!vbasedev->reset_works || (!vdev->has_flr && vdev->has_pm_reset)) { in vfio_pci_compute_needs_reset()
2598 vbasedev->needs_reset = true; in vfio_pci_compute_needs_reset()
2627 return vdev->ramfb_migrate == ON_OFF_AUTO_ON || in vfio_display_migration_needed()
2628 (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO && vdev->enable_ramfb); in vfio_display_migration_needed()
2669 PCIDevice *pdev = &vdev->pdev; in vfio_pci_load_config()
2670 pcibus_t old_addr[PCI_NUM_REGIONS - 1]; in vfio_pci_load_config()
2674 old_addr[bar] = pdev->io_regions[bar].addr; in vfio_pci_load_config()
2683 pci_get_word(pdev->config + PCI_COMMAND), 2); in vfio_pci_load_config()
2690 if (old_addr[bar] != pdev->io_regions[bar].addr && in vfio_pci_load_config()
2691 vdev->bars[bar].region.size > 0 && in vfio_pci_load_config()
2692 vdev->bars[bar].region.size < qemu_real_host_page_size()) { in vfio_pci_load_config()
2717 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_populate_vga()
2723 error_setg_errno(errp, -ret, in vfio_populate_vga()
2729 if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || in vfio_populate_vga()
2730 !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || in vfio_populate_vga()
2731 reg_info->size < 0xbffff + 1) { in vfio_populate_vga()
2733 (unsigned long)reg_info->flags, in vfio_populate_vga()
2734 (unsigned long)reg_info->size); in vfio_populate_vga()
2738 vdev->vga = g_new0(VFIOVGA, 1); in vfio_populate_vga()
2740 vdev->vga->fd_offset = reg_info->offset; in vfio_populate_vga()
2741 vdev->vga->fd = vdev->vbasedev.fd; in vfio_populate_vga()
2743 vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; in vfio_populate_vga()
2744 vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; in vfio_populate_vga()
2745 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); in vfio_populate_vga()
2747 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, in vfio_populate_vga()
2749 &vdev->vga->region[QEMU_PCI_VGA_MEM], in vfio_populate_vga()
2750 "vfio-vga-mmio@0xa0000", in vfio_populate_vga()
2753 vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; in vfio_populate_vga()
2754 vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; in vfio_populate_vga()
2755 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); in vfio_populate_vga()
2757 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, in vfio_populate_vga()
2759 &vdev->vga->region[QEMU_PCI_VGA_IO_LO], in vfio_populate_vga()
2760 "vfio-vga-io@0x3b0", in vfio_populate_vga()
2763 vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; in vfio_populate_vga()
2764 vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; in vfio_populate_vga()
2765 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); in vfio_populate_vga()
2767 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, in vfio_populate_vga()
2769 &vdev->vga->region[QEMU_PCI_VGA_IO_HI], in vfio_populate_vga()
2770 "vfio-vga-io@0x3c0", in vfio_populate_vga()
2773 pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, in vfio_populate_vga()
2774 &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, in vfio_populate_vga()
2775 &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); in vfio_populate_vga()
2782 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_populate_device()
2785 int i, ret = -1; in vfio_populate_device()
2788 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PCI)) { in vfio_populate_device()
2793 if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { in vfio_populate_device()
2795 vbasedev->num_regions); in vfio_populate_device()
2799 if (vbasedev->num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { in vfio_populate_device()
2800 error_setg(errp, "unexpected number of irqs %u", vbasedev->num_irqs); in vfio_populate_device()
2805 char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); in vfio_populate_device()
2808 &vdev->bars[i].region, i, name); in vfio_populate_device()
2812 error_setg_errno(errp, -ret, "failed to get region %d info", i); in vfio_populate_device()
2816 QLIST_INIT(&vdev->bars[i].quirks); in vfio_populate_device()
2822 error_setg_errno(errp, -ret, "failed to get config info"); in vfio_populate_device()
2826 trace_vfio_populate_device_config(vdev->vbasedev.name, in vfio_populate_device()
2827 (unsigned long)reg_info->size, in vfio_populate_device()
2828 (unsigned long)reg_info->offset, in vfio_populate_device()
2829 (unsigned long)reg_info->flags); in vfio_populate_device()
2831 vdev->config_size = reg_info->size; in vfio_populate_device()
2832 if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { in vfio_populate_device()
2833 vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; in vfio_populate_device()
2835 vdev->config_offset = reg_info->offset; in vfio_populate_device()
2837 if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { in vfio_populate_device()
2840 "requested feature x-vga\n"); in vfio_populate_device()
2848 trace_vfio_populate_device_get_irq_info_failure(strerror(-ret)); in vfio_populate_device()
2850 vdev->pci_aer = true; in vfio_populate_device()
2854 vbasedev->name); in vfio_populate_device()
2864 g_free(vdev->emulated_config_bits); in vfio_pci_put_device()
2865 g_free(vdev->rom); in vfio_pci_put_device()
2871 * g_free(vdev->igd_opregion); in vfio_pci_put_device()
2874 vfio_device_detach(&vdev->vbasedev); in vfio_pci_put_device()
2876 g_free(vdev->vbasedev.name); in vfio_pci_put_device()
2877 g_free(vdev->msix); in vfio_pci_put_device()
2884 if (!event_notifier_test_and_clear(&vdev->err_notifier)) { in vfio_err_notifier_handler()
2897 …etected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); in vfio_err_notifier_handler()
2913 if (!vdev->pci_aer) { in vfio_register_err_notifier()
2917 if (event_notifier_init(&vdev->err_notifier, 0)) { in vfio_register_err_notifier()
2919 vdev->pci_aer = false; in vfio_register_err_notifier()
2923 fd = event_notifier_get_fd(&vdev->err_notifier); in vfio_register_err_notifier()
2926 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, in vfio_register_err_notifier()
2928 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_register_err_notifier()
2930 event_notifier_cleanup(&vdev->err_notifier); in vfio_register_err_notifier()
2931 vdev->pci_aer = false; in vfio_register_err_notifier()
2939 if (!vdev->pci_aer) { in vfio_unregister_err_notifier()
2943 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, in vfio_unregister_err_notifier()
2944 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { in vfio_unregister_err_notifier()
2945 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_unregister_err_notifier()
2947 qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier), in vfio_unregister_err_notifier()
2949 event_notifier_cleanup(&vdev->err_notifier); in vfio_unregister_err_notifier()
2957 if (!event_notifier_test_and_clear(&vdev->req_notifier)) { in vfio_req_notifier_handler()
2963 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_req_notifier_handler()
2974 if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) { in vfio_register_req_notifier()
2978 ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, in vfio_register_req_notifier()
2984 if (event_notifier_init(&vdev->req_notifier, 0)) { in vfio_register_req_notifier()
2989 fd = event_notifier_get_fd(&vdev->req_notifier); in vfio_register_req_notifier()
2992 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, in vfio_register_req_notifier()
2994 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_register_req_notifier()
2996 event_notifier_cleanup(&vdev->req_notifier); in vfio_register_req_notifier()
2998 vdev->req_enabled = true; in vfio_register_req_notifier()
3006 if (!vdev->req_enabled) { in vfio_unregister_req_notifier()
3010 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, in vfio_unregister_req_notifier()
3011 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { in vfio_unregister_req_notifier()
3012 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_unregister_req_notifier()
3014 qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier), in vfio_unregister_req_notifier()
3016 event_notifier_cleanup(&vdev->req_notifier); in vfio_unregister_req_notifier()
3018 vdev->req_enabled = false; in vfio_unregister_req_notifier()
3023 PCIDevice *pdev = &vdev->pdev; in vfio_pci_config_setup()
3024 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_config_setup()
3028 config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); in vfio_pci_config_setup()
3030 /* Get a copy of config space */ in vfio_pci_config_setup()
3032 vdev->pdev.config); in vfio_pci_config_setup()
3034 ret = ret < 0 ? -ret : EFAULT; in vfio_pci_config_setup()
3035 error_setg_errno(errp, ret, "failed to read device config space"); in vfio_pci_config_setup()
3040 vdev->emulated_config_bits = g_malloc0(vdev->config_size); in vfio_pci_config_setup()
3043 memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); in vfio_pci_config_setup()
3045 memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); in vfio_pci_config_setup()
3049 * device ID is managed by the vendor and need only be a 16-bit value. in vfio_pci_config_setup()
3050 * Allow any 16-bit value for subsystem so they can be hidden or changed. in vfio_pci_config_setup()
3052 if (vdev->vendor_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3053 if (vdev->vendor_id >= 0xffff) { in vfio_pci_config_setup()
3057 vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); in vfio_pci_config_setup()
3058 trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); in vfio_pci_config_setup()
3060 vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); in vfio_pci_config_setup()
3063 if (vdev->device_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3064 if (vdev->device_id > 0xffff) { in vfio_pci_config_setup()
3068 vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); in vfio_pci_config_setup()
3069 trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); in vfio_pci_config_setup()
3071 vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); in vfio_pci_config_setup()
3074 if (vdev->sub_vendor_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3075 if (vdev->sub_vendor_id > 0xffff) { in vfio_pci_config_setup()
3080 vdev->sub_vendor_id, ~0); in vfio_pci_config_setup()
3081 trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, in vfio_pci_config_setup()
3082 vdev->sub_vendor_id); in vfio_pci_config_setup()
3085 if (vdev->sub_device_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3086 if (vdev->sub_device_id > 0xffff) { in vfio_pci_config_setup()
3090 vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); in vfio_pci_config_setup()
3091 trace_vfio_pci_emulated_sub_device_id(vbasedev->name, in vfio_pci_config_setup()
3092 vdev->sub_device_id); in vfio_pci_config_setup()
3095 /* QEMU can change multi-function devices to single function, or reverse */ in vfio_pci_config_setup()
3096 vdev->emulated_config_bits[PCI_HEADER_TYPE] = in vfio_pci_config_setup()
3100 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { in vfio_pci_config_setup()
3101 vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; in vfio_pci_config_setup()
3103 vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; in vfio_pci_config_setup()
3107 * Clear host resource mapping info. If we choose not to register a in vfio_pci_config_setup()
3109 * confusing, unwritable, residual addresses from the host here. in vfio_pci_config_setup()
3111 memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); in vfio_pci_config_setup()
3112 memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); in vfio_pci_config_setup()
3129 PCIDevice *pdev = &vdev->pdev; in vfio_interrupt_setup()
3132 if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { in vfio_interrupt_setup()
3133 memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, in vfio_interrupt_setup()
3137 if (pdev->cap_present & QEMU_PCI_CAP_MSI) { in vfio_interrupt_setup()
3138 memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, in vfio_interrupt_setup()
3139 vdev->msi_cap_size); in vfio_interrupt_setup()
3142 if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { in vfio_interrupt_setup()
3143 vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, in vfio_interrupt_setup()
3145 pci_device_set_intx_routing_notifier(&vdev->pdev, in vfio_interrupt_setup()
3147 vdev->irqchip_change_notifier.notify = vfio_irqchip_change; in vfio_interrupt_setup()
3148 kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); in vfio_interrupt_setup()
3150 timer_free(vdev->intx.mmap_timer); in vfio_interrupt_setup()
3151 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_interrupt_setup()
3152 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_interrupt_setup()
3163 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_realize()
3168 if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { in vfio_pci_realize()
3169 if (!(~vdev->host.domain || ~vdev->host.bus || in vfio_pci_realize()
3170 ~vdev->host.slot || ~vdev->host.function)) { in vfio_pci_realize()
3171 error_setg(errp, "No provided host device"); in vfio_pci_realize()
3172 error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " in vfio_pci_realize()
3174 "or -device vfio-pci,fd=DEVICE_FD " in vfio_pci_realize()
3176 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); in vfio_pci_realize()
3179 vbasedev->sysfsdev = in vfio_pci_realize()
3181 vdev->host.domain, vdev->host.bus, in vfio_pci_realize()
3182 vdev->host.slot, vdev->host.function); in vfio_pci_realize()
3193 * the x-balloon-allowed option unless this is minimally an mdev device. in vfio_pci_realize()
3195 vbasedev->mdev = vfio_device_is_mdev(vbasedev); in vfio_pci_realize()
3197 trace_vfio_mdev(vbasedev->name, vbasedev->mdev); in vfio_pci_realize()
3199 if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { in vfio_pci_realize()
3200 error_setg(errp, "x-balloon-allowed only potentially compatible " in vfio_pci_realize()
3205 if (!qemu_uuid_is_null(&vdev->vf_token)) { in vfio_pci_realize()
3206 qemu_uuid_unparse(&vdev->vf_token, uuid); in vfio_pci_realize()
3207 name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); in vfio_pci_realize()
3209 name = g_strdup(vbasedev->name); in vfio_pci_realize()
3225 if (!vbasedev->mdev && in vfio_pci_realize()
3226 !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { in vfio_pci_realize()
3239 if (vdev->vga) { in vfio_pci_realize()
3251 if (vdev->display != ON_OFF_AUTO_OFF) { in vfio_pci_realize()
3256 if (vdev->enable_ramfb && vdev->dpy == NULL) { in vfio_pci_realize()
3260 if (vdev->display_xres || vdev->display_yres) { in vfio_pci_realize()
3261 if (vdev->dpy == NULL) { in vfio_pci_realize()
3265 if (vdev->dpy->edid_regs == NULL) { in vfio_pci_realize()
3271 if (vdev->ramfb_migrate == ON_OFF_AUTO_ON && !vdev->enable_ramfb) { in vfio_pci_realize()
3272 warn_report("x-ramfb-migrate=on but ramfb=off. " in vfio_pci_realize()
3273 "Forcing x-ramfb-migrate to off."); in vfio_pci_realize()
3274 vdev->ramfb_migrate = ON_OFF_AUTO_OFF; in vfio_pci_realize()
3276 if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { in vfio_pci_realize()
3277 if (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO) { in vfio_pci_realize()
3278 vdev->ramfb_migrate = ON_OFF_AUTO_OFF; in vfio_pci_realize()
3279 } else if (vdev->ramfb_migrate == ON_OFF_AUTO_ON) { in vfio_pci_realize()
3280 error_setg(errp, "x-ramfb-migrate requires enable-migration"); in vfio_pci_realize()
3285 if (!pdev->failover_pair_id) { in vfio_pci_realize()
3298 if (vdev->interrupt == VFIO_INT_INTx) { in vfio_pci_realize()
3301 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_pci_realize()
3302 if (vdev->irqchip_change_notifier.notify) { in vfio_pci_realize()
3303 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_pci_realize()
3305 if (vdev->intx.mmap_timer) { in vfio_pci_realize()
3306 timer_free(vdev->intx.mmap_timer); in vfio_pci_realize()
3309 if (!vbasedev->mdev) { in vfio_pci_realize()
3316 error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); in vfio_pci_realize()
3329 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_exitfn()
3333 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_exitfn()
3334 if (vdev->irqchip_change_notifier.notify) { in vfio_exitfn()
3335 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_exitfn()
3338 if (vdev->intx.mmap_timer) { in vfio_exitfn()
3339 timer_free(vdev->intx.mmap_timer); in vfio_exitfn()
3345 if (!vbasedev->mdev) { in vfio_exitfn()
3354 trace_vfio_pci_reset(vdev->vbasedev.name); in vfio_pci_reset()
3358 if (vdev->display != ON_OFF_AUTO_OFF) { in vfio_pci_reset()
3362 if (vdev->resetfn && !vdev->resetfn(vdev)) { in vfio_pci_reset()
3366 if (vdev->vbasedev.reset_works && in vfio_pci_reset()
3367 (vdev->has_flr || !vdev->has_pm_reset) && in vfio_pci_reset()
3368 !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { in vfio_pci_reset()
3369 trace_vfio_pci_reset_flr(vdev->vbasedev.name); in vfio_pci_reset()
3379 if (vdev->vbasedev.reset_works && vdev->has_pm_reset && in vfio_pci_reset()
3380 !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { in vfio_pci_reset()
3381 trace_vfio_pci_reset_pm(vdev->vbasedev.name); in vfio_pci_reset()
3393 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_instance_init()
3395 device_add_bootindex_property(obj, &vdev->bootindex, in vfio_instance_init()
3397 &pci_dev->qdev); in vfio_instance_init()
3398 vdev->host.domain = ~0U; in vfio_instance_init()
3399 vdev->host.bus = ~0U; in vfio_instance_init()
3400 vdev->host.slot = ~0U; in vfio_instance_init()
3401 vdev->host.function = ~0U; in vfio_instance_init()
3406 vdev->nv_gpudirect_clique = 0xFF; in vfio_instance_init()
3410 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; in vfio_instance_init()
3418 dc->desc = "VFIO PCI base device"; in vfio_pci_base_dev_class_init()
3419 set_bit(DEVICE_CATEGORY_MISC, dc->categories); in vfio_pci_base_dev_class_init()
3420 pdc->exit = vfio_exitfn; in vfio_pci_base_dev_class_init()
3421 pdc->config_read = vfio_pci_read_config; in vfio_pci_base_dev_class_init()
3422 pdc->config_write = vfio_pci_write_config; in vfio_pci_base_dev_class_init()
3441 DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
3442 DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
3444 DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
3447 DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
3454 DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
3456 DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
3458 DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
3460 DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
3462 DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features,
3464 DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
3466 DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
3468 DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
3472 DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
3474 DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
3475 DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
3477 DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
3478 DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
3479 DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
3480 DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
3482 DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd,
3484 DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
3486 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
3487 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
3488 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
3490 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
3492 DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
3493 DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
3496 DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
3502 DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
3509 vfio_device_set_fd(&vdev->vbasedev, str, errp); in vfio_pci_set_fd()
3523 dc->desc = "VFIO-based PCI device assignment"; in vfio_pci_dev_class_init()
3524 pdc->realize = vfio_pci_realize; in vfio_pci_dev_class_init()
3527 "host", in vfio_pci_dev_class_init()
3528 … "Host PCI address [domain:]<bus:slot.function> of assigned device"); in vfio_pci_dev_class_init()
3530 "x-intx-mmap-timeout-ms", in vfio_pci_dev_class_init()
3532 "(milliseconds) to re-enable device direct access " in vfio_pci_dev_class_init()
3535 "x-vga", in vfio_pci_dev_class_init()
3538 "x-req", in vfio_pci_dev_class_init()
3541 "x-no-mmap", in vfio_pci_dev_class_init()
3545 "x-no-kvm-intx", in vfio_pci_dev_class_init()
3546 "Disable direct VFIO->KVM INTx injection. Allows to " in vfio_pci_dev_class_init()
3549 "x-no-kvm-msi", in vfio_pci_dev_class_init()
3550 "Disable direct VFIO->KVM MSI injection. Allows to " in vfio_pci_dev_class_init()
3553 "x-no-kvm-msix", in vfio_pci_dev_class_init()
3554 "Disable direct VFIO->KVM MSIx injection. Allows to " in vfio_pci_dev_class_init()
3557 "x-pci-vendor-id", in vfio_pci_dev_class_init()
3560 "x-pci-device-id", in vfio_pci_dev_class_init()
3563 "x-pci-sub-vendor-id", in vfio_pci_dev_class_init()
3567 "x-pci-sub-device-id", in vfio_pci_dev_class_init()
3572 "Host sysfs path of assigned device"); in vfio_pci_dev_class_init()
3574 "x-igd-opregion", in vfio_pci_dev_class_init()
3575 "Expose host IGD OpRegion to guest"); in vfio_pci_dev_class_init()
3577 "x-igd-gms", in vfio_pci_dev_class_init()
3580 "x-nv-gpudirect-clique", in vfio_pci_dev_class_init()
3582 "clique for device [0-15]"); in vfio_pci_dev_class_init()
3584 "x-no-geforce-quirks", in vfio_pci_dev_class_init()
3591 "x-msix-relocation", in vfio_pci_dev_class_init()
3592 "Specify MSI-X MMIO relocation to the end of specified " in vfio_pci_dev_class_init()
3596 "x-no-kvm-ioeventfd", in vfio_pci_dev_class_init()
3599 "x-no-vfio-ioeventfd", in vfio_pci_dev_class_init()
3603 "x-balloon-allowed", in vfio_pci_dev_class_init()
3612 "x-pre-copy-dirty-page-tracking", in vfio_pci_dev_class_init()
3615 object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */ in vfio_pci_dev_class_init()
3616 "enable-migration", in vfio_pci_dev_class_init()
3617 "Enale device migration. Also requires a host VFIO PCI " in vfio_pci_dev_class_init()
3620 "vf-token", in vfio_pci_dev_class_init()
3626 "Set host IOMMUFD backend device"); in vfio_pci_dev_class_init()
3629 "x-device-dirty-page-tracking", in vfio_pci_dev_class_init()
3631 "container-based dirty page tracking"); in vfio_pci_dev_class_init()
3633 "migration-events", in vfio_pci_dev_class_init()
3637 "skip-vsc-check", in vfio_pci_dev_class_init()
3638 "Skip config space check for Vendor Specific Capability. " in vfio_pci_dev_class_init()
3642 "x-migration-multifd-transfer", in vfio_pci_dev_class_init()
3658 DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate,
3668 dc->hotpluggable = false; in vfio_pci_nohotplug_dev_class_init()
3672 "Enable ramfb to provide pre-boot graphics for devices " in vfio_pci_nohotplug_dev_class_init()
3675 "x-ramfb-migrate", in vfio_pci_nohotplug_dev_class_init()
3690 * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can in register_vfio_pci_dev_type()