Lines Matching +full:- +full:- +full:enable +full:- +full:sparse
10 * the COPYING file in the top-level directory.
12 * Based on qemu-kvm device-assignment:
18 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
30 #include "hw/qdev-properties.h"
31 #include "hw/qdev-properties-system.h"
32 #include "hw/vfio/vfio-cpr.h"
36 #include "qemu/error-report.h"
37 #include "qemu/main-loop.h"
47 #include "migration/qemu-file.h"
49 #include "vfio-migration-internal.h"
50 #include "vfio-helpers.h"
52 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
75 error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); in vfio_notifier_init()
96 * been serviced and the time gap is long enough, we re-enable mmaps for
100 * regular interrupts and see much better latency by staying in non-mmap
103 * other options with the x-intx-mmap-timeout-ms parameter (a value of
110 if (vdev->intx.pending) { in vfio_intx_mmap_enable()
111 timer_mod(vdev->intx.mmap_timer, in vfio_intx_mmap_enable()
112 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vdev->intx.mmap_timeout); in vfio_intx_mmap_enable()
123 if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) { in vfio_intx_interrupt()
127 trace_vfio_intx_interrupt(vdev->vbasedev.name, 'A' + vdev->intx.pin); in vfio_intx_interrupt()
129 vdev->intx.pending = true; in vfio_intx_interrupt()
130 pci_irq_assert(&vdev->pdev); in vfio_intx_interrupt()
132 if (vdev->intx.mmap_timeout) { in vfio_intx_interrupt()
133 timer_mod(vdev->intx.mmap_timer, in vfio_intx_interrupt()
134 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vdev->intx.mmap_timeout); in vfio_intx_interrupt()
142 if (!vdev->intx.pending) { in vfio_pci_intx_eoi()
146 trace_vfio_pci_intx_eoi(vbasedev->name); in vfio_pci_intx_eoi()
148 vdev->intx.pending = false; in vfio_pci_intx_eoi()
149 pci_irq_deassert(&vdev->pdev); in vfio_pci_intx_eoi()
156 int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_enable_kvm()
158 if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || in vfio_intx_enable_kvm()
159 vdev->intx.route.mode != PCI_INTX_ENABLED || in vfio_intx_enable_kvm()
166 vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
167 vdev->intx.pending = false; in vfio_intx_enable_kvm()
168 pci_irq_deassert(&vdev->pdev); in vfio_intx_enable_kvm()
171 if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { in vfio_intx_enable_kvm()
176 &vdev->intx.interrupt, in vfio_intx_enable_kvm()
177 &vdev->intx.unmask, in vfio_intx_enable_kvm()
178 vdev->intx.route.irq)) { in vfio_intx_enable_kvm()
183 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, in vfio_intx_enable_kvm()
185 event_notifier_get_fd(&vdev->intx.unmask), in vfio_intx_enable_kvm()
191 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
193 vdev->intx.kvm_accel = true; in vfio_intx_enable_kvm()
195 trace_vfio_intx_enable_kvm(vdev->vbasedev.name); in vfio_intx_enable_kvm()
200 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, in vfio_intx_enable_kvm()
201 vdev->intx.route.irq); in vfio_intx_enable_kvm()
203 vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); in vfio_intx_enable_kvm()
206 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_enable_kvm()
216 if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || in vfio_cpr_intx_enable_kvm()
217 vdev->intx.route.mode != PCI_INTX_ENABLED || in vfio_cpr_intx_enable_kvm()
222 if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { in vfio_cpr_intx_enable_kvm()
227 &vdev->intx.interrupt, in vfio_cpr_intx_enable_kvm()
228 &vdev->intx.unmask, in vfio_cpr_intx_enable_kvm()
229 vdev->intx.route.irq)) { in vfio_cpr_intx_enable_kvm()
231 vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); in vfio_cpr_intx_enable_kvm()
235 vdev->intx.kvm_accel = true; in vfio_cpr_intx_enable_kvm()
236 trace_vfio_intx_enable_kvm(vdev->vbasedev.name); in vfio_cpr_intx_enable_kvm()
246 if (!vdev->intx.kvm_accel) { in vfio_intx_disable_kvm()
252 * interrupts, QEMU IRQ de-asserted. in vfio_intx_disable_kvm()
254 vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable_kvm()
255 vdev->intx.pending = false; in vfio_intx_disable_kvm()
256 pci_irq_deassert(&vdev->pdev); in vfio_intx_disable_kvm()
259 if (kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, in vfio_intx_disable_kvm()
260 vdev->intx.route.irq)) { in vfio_intx_disable_kvm()
265 vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); in vfio_intx_disable_kvm()
268 qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt), in vfio_intx_disable_kvm()
271 vdev->intx.kvm_accel = false; in vfio_intx_disable_kvm()
273 /* If we've missed an event, let it re-fire through QEMU */ in vfio_intx_disable_kvm()
274 vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable_kvm()
276 trace_vfio_intx_disable_kvm(vdev->vbasedev.name); in vfio_intx_disable_kvm()
284 trace_vfio_intx_update(vdev->vbasedev.name, in vfio_intx_update()
285 vdev->intx.route.irq, route->irq); in vfio_intx_update()
289 vdev->intx.route = *route; in vfio_intx_update()
291 if (route->mode != PCI_INTX_ENABLED) { in vfio_intx_update()
296 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_intx_update()
299 /* Re-enable the interrupt in cased we missed an EOI */ in vfio_intx_update()
300 vfio_pci_intx_eoi(&vdev->vbasedev); in vfio_intx_update()
308 if (vdev->interrupt != VFIO_INT_INTx) { in vfio_intx_routing_notifier()
312 route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); in vfio_intx_routing_notifier()
314 if (pci_intx_route_changed(&vdev->intx.route, &route)) { in vfio_intx_routing_notifier()
324 vfio_intx_update(vdev, &vdev->intx.route); in vfio_irqchip_change()
329 uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); in vfio_intx_enable()
346 vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ in vfio_intx_enable()
347 pci_config_set_interrupt_pin(vdev->pdev.config, pin); in vfio_intx_enable()
355 vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, in vfio_intx_enable()
356 vdev->intx.pin); in vfio_intx_enable()
360 if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0, in vfio_intx_enable()
364 fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_enable()
370 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_intx_enable()
375 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, in vfio_intx_enable()
378 vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); in vfio_intx_enable()
383 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_intx_enable()
387 vdev->interrupt = VFIO_INT_INTx; in vfio_intx_enable()
389 trace_vfio_intx_enable(vdev->vbasedev.name); in vfio_intx_enable()
397 timer_del(vdev->intx.mmap_timer); in vfio_intx_disable()
399 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); in vfio_intx_disable()
400 vdev->intx.pending = false; in vfio_intx_disable()
401 pci_irq_deassert(&vdev->pdev); in vfio_intx_disable()
404 fd = event_notifier_get_fd(&vdev->intx.interrupt); in vfio_intx_disable()
406 vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); in vfio_intx_disable()
408 vdev->interrupt = VFIO_INT_NONE; in vfio_intx_disable()
410 trace_vfio_intx_disable(vdev->vbasedev.name); in vfio_intx_disable()
424 VFIOPCIDevice *vdev = vector->vdev; in vfio_msi_interrupt()
428 int nr = vector - vdev->msi_vectors; in vfio_msi_interrupt()
430 if (!event_notifier_test_and_clear(&vector->interrupt)) { in vfio_msi_interrupt()
434 if (vdev->interrupt == VFIO_INT_MSIX) { in vfio_msi_interrupt()
438 /* A masked vector firing needs to use the PBA, enable it */ in vfio_msi_interrupt()
439 if (msix_is_masked(&vdev->pdev, nr)) { in vfio_msi_interrupt()
440 set_bit(nr, vdev->msix->pending); in vfio_msi_interrupt()
441 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true); in vfio_msi_interrupt()
442 trace_vfio_msix_pba_enable(vdev->vbasedev.name); in vfio_msi_interrupt()
444 } else if (vdev->interrupt == VFIO_INT_MSI) { in vfio_msi_interrupt()
451 msg = get_msg(&vdev->pdev, nr); in vfio_msi_interrupt()
452 trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data); in vfio_msi_interrupt()
453 notify(&vdev->pdev, nr); in vfio_msi_interrupt()
458 VFIOMSIVector *vector = &vdev->msi_vectors[nr]; in vfio_pci_msi_set_handler()
459 int fd = event_notifier_get_fd(&vector->interrupt); in vfio_pci_msi_set_handler()
465 * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
477 irq_set->argsz = argsz; in vfio_enable_msix_no_vec()
478 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | in vfio_enable_msix_no_vec()
480 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; in vfio_enable_msix_no_vec()
481 irq_set->start = 0; in vfio_enable_msix_no_vec()
482 irq_set->count = 1; in vfio_enable_msix_no_vec()
483 fd = (int32_t *)&irq_set->data; in vfio_enable_msix_no_vec()
484 *fd = -1; in vfio_enable_msix_no_vec()
486 return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); in vfio_enable_msix_no_vec()
496 * If dynamic MSI-X allocation is supported, the vectors to be allocated in vfio_enable_vectors()
497 * and enabled can be scattered. Before kernel enabling MSI-X, setting in vfio_enable_vectors()
501 * MSI-X enabled first, then set vectors with a potentially sparse set of in vfio_enable_vectors()
502 * eventfds to enable interrupts only when enabled in guest. in vfio_enable_vectors()
504 if (msix && !vdev->msix->noresize) { in vfio_enable_vectors()
512 argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds)); in vfio_enable_vectors()
515 irq_set->argsz = argsz; in vfio_enable_vectors()
516 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; in vfio_enable_vectors()
517 irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX; in vfio_enable_vectors()
518 irq_set->start = 0; in vfio_enable_vectors()
519 irq_set->count = vdev->nr_vectors; in vfio_enable_vectors()
520 fds = (int32_t *)&irq_set->data; in vfio_enable_vectors()
522 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_enable_vectors()
523 int fd = -1; in vfio_enable_vectors()
526 * MSI vs MSI-X - The guest has direct access to MSI mask and pending in vfio_enable_vectors()
528 * MSI-X mask and pending bits are emulated, so we want to use the in vfio_enable_vectors()
531 if (vdev->msi_vectors[i].use) { in vfio_enable_vectors()
532 if (vdev->msi_vectors[i].virq < 0 || in vfio_enable_vectors()
533 (msix && msix_is_masked(&vdev->pdev, i))) { in vfio_enable_vectors()
534 fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); in vfio_enable_vectors()
536 fd = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt); in vfio_enable_vectors()
543 ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); in vfio_enable_vectors()
553 if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { in vfio_pci_add_kvm_msi_virq()
557 vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change, in vfio_pci_add_kvm_msi_virq()
558 vector_n, &vdev->pdev); in vfio_pci_add_kvm_msi_virq()
565 if (vector->virq < 0) { in vfio_connect_kvm_msi_virq()
569 if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr, in vfio_connect_kvm_msi_virq()
574 if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, in vfio_connect_kvm_msi_virq()
575 NULL, vector->virq) < 0) { in vfio_connect_kvm_msi_virq()
582 vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr); in vfio_connect_kvm_msi_virq()
584 kvm_irqchip_release_virq(kvm_state, vector->virq); in vfio_connect_kvm_msi_virq()
585 vector->virq = -1; in vfio_connect_kvm_msi_virq()
591 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, in vfio_remove_kvm_msi_virq()
592 vector->virq); in vfio_remove_kvm_msi_virq()
593 kvm_irqchip_release_virq(kvm_state, vector->virq); in vfio_remove_kvm_msi_virq()
594 vector->virq = -1; in vfio_remove_kvm_msi_virq()
595 vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr); in vfio_remove_kvm_msi_virq()
601 kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); in vfio_update_kvm_msi_virq()
611 if (vector->virq >= 0) { in set_irq_signalling()
612 fd = event_notifier_get_fd(&vector->kvm_interrupt); in set_irq_signalling()
614 fd = event_notifier_get_fd(&vector->interrupt); in set_irq_signalling()
620 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); in set_irq_signalling()
626 VFIOMSIVector *vector = &vdev->msi_vectors[nr]; in vfio_pci_vector_init()
627 PCIDevice *pdev = &vdev->pdev; in vfio_pci_vector_init()
630 vector->vdev = vdev; in vfio_pci_vector_init()
631 vector->virq = -1; in vfio_pci_vector_init()
632 if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr, in vfio_pci_vector_init()
636 vector->use = true; in vfio_pci_vector_init()
637 if (vdev->interrupt == VFIO_INT_MSIX) { in vfio_pci_vector_init()
648 bool resizing = !!(vdev->nr_vectors < nr + 1); in vfio_msix_vector_do_use()
650 trace_vfio_msix_vector_do_use(vdev->vbasedev.name, nr); in vfio_msix_vector_do_use()
652 vector = &vdev->msi_vectors[nr]; in vfio_msix_vector_do_use()
654 if (!vector->use) { in vfio_msix_vector_do_use()
658 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msix_vector_do_use()
662 * Attempt to enable route through KVM irqchip, in vfio_msix_vector_do_use()
665 if (vector->virq >= 0) { in vfio_msix_vector_do_use()
673 if (vdev->defer_kvm_irq_routing) { in vfio_msix_vector_do_use()
691 * and enable a vector when it is in use in guest. nr_vectors represents in vfio_msix_vector_do_use()
696 vdev->nr_vectors = nr + 1; in vfio_msix_vector_do_use()
699 if (!vdev->defer_kvm_irq_routing) { in vfio_msix_vector_do_use()
700 if (vdev->msix->noresize && resizing) { in vfio_msix_vector_do_use()
701 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); in vfio_msix_vector_do_use()
704 error_report("vfio: failed to enable vectors, %s", in vfio_msix_vector_do_use()
705 strerror(-ret)); in vfio_msix_vector_do_use()
708 set_irq_signalling(&vdev->vbasedev, vector, nr); in vfio_msix_vector_do_use()
713 clear_bit(nr, vdev->msix->pending); in vfio_msix_vector_do_use()
714 if (find_first_bit(vdev->msix->pending, in vfio_msix_vector_do_use()
715 vdev->nr_vectors) == vdev->nr_vectors) { in vfio_msix_vector_do_use()
716 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); in vfio_msix_vector_do_use()
717 trace_vfio_msix_pba_disable(vdev->vbasedev.name); in vfio_msix_vector_do_use()
741 VFIOMSIVector *vector = &vdev->msi_vectors[nr]; in vfio_msix_vector_release()
743 trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); in vfio_msix_vector_release()
748 * the KVM setup in place, simply switch VFIO to use the non-bypass in vfio_msix_vector_release()
749 * eventfd. We'll then fire the interrupt through QEMU and the MSI-X in vfio_msix_vector_release()
751 * be re-asserted on unmask. Nothing to do if already using QEMU mode. in vfio_msix_vector_release()
753 if (vector->virq >= 0) { in vfio_msix_vector_release()
754 int32_t fd = event_notifier_get_fd(&vector->interrupt); in vfio_msix_vector_release()
757 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, in vfio_msix_vector_release()
760 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msix_vector_release()
767 msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, in vfio_pci_msix_set_notifiers()
773 assert(!vdev->defer_kvm_irq_routing); in vfio_pci_prepare_kvm_msi_virq_batch()
774 vdev->defer_kvm_irq_routing = true; in vfio_pci_prepare_kvm_msi_virq_batch()
782 assert(vdev->defer_kvm_irq_routing); in vfio_pci_commit_kvm_msi_virq_batch()
783 vdev->defer_kvm_irq_routing = false; in vfio_pci_commit_kvm_msi_virq_batch()
787 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_pci_commit_kvm_msi_virq_batch()
788 vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i); in vfio_pci_commit_kvm_msi_virq_batch()
798 vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); in vfio_msix_enable()
800 vdev->interrupt = VFIO_INT_MSIX; in vfio_msix_enable()
803 * Setting vector notifiers triggers synchronous vector-use in vfio_msix_enable()
810 if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, in vfio_msix_enable()
817 if (vdev->nr_vectors) { in vfio_msix_enable()
820 error_report("vfio: failed to enable vectors, %s", in vfio_msix_enable()
821 strerror(-ret)); in vfio_msix_enable()
826 * physical state of the device and expect that enabling MSI-X from the in vfio_msix_enable()
829 * MSI-X capability, but leaves the vector table masked. We therefore in vfio_msix_enable()
831 * to switch the physical device into MSI-X mode because that may come a in vfio_msix_enable()
833 * invalid fd to make the physical device MSI-X enabled, but with no in vfio_msix_enable()
838 error_report("vfio: failed to enable MSI-X, %s", in vfio_msix_enable()
839 strerror(-ret)); in vfio_msix_enable()
843 trace_vfio_msix_enable(vdev->vbasedev.name); in vfio_msix_enable()
852 vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); in vfio_msi_enable()
855 * Setting vector notifiers needs to enable route for each vector. in vfio_msi_enable()
861 vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); in vfio_msi_enable()
863 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msi_enable()
864 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_msi_enable()
867 vector->vdev = vdev; in vfio_msi_enable()
868 vector->virq = -1; in vfio_msi_enable()
869 vector->use = true; in vfio_msi_enable()
871 if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i, in vfio_msi_enable()
876 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msi_enable()
880 * Attempt to enable route through KVM irqchip, in vfio_msi_enable()
889 vdev->interrupt = VFIO_INT_MSI; in vfio_msi_enable()
895 strerror(-ret)); in vfio_msi_enable()
897 error_report("vfio: Error: Failed to enable %d " in vfio_msi_enable()
898 "MSI vectors, retry with %d", vdev->nr_vectors, ret); in vfio_msi_enable()
904 vdev->nr_vectors = ret; in vfio_msi_enable()
913 error_report("vfio: Error: Failed to enable MSI"); in vfio_msi_enable()
918 trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors); in vfio_msi_enable()
925 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msi_disable_common()
926 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_msi_disable_common()
927 if (vdev->msi_vectors[i].use) { in vfio_msi_disable_common()
928 if (vector->virq >= 0) { in vfio_msi_disable_common()
931 qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), in vfio_msi_disable_common()
933 vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i); in vfio_msi_disable_common()
937 g_free(vdev->msi_vectors); in vfio_msi_disable_common()
938 vdev->msi_vectors = NULL; in vfio_msi_disable_common()
939 vdev->nr_vectors = 0; in vfio_msi_disable_common()
940 vdev->interrupt = VFIO_INT_NONE; in vfio_msi_disable_common()
948 msix_unset_vector_notifiers(&vdev->pdev); in vfio_msix_disable()
951 * MSI-X will only release vectors if MSI-X is still enabled on the in vfio_msix_disable()
954 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_msix_disable()
955 if (vdev->msi_vectors[i].use) { in vfio_msix_disable()
956 vfio_msix_vector_release(&vdev->pdev, i); in vfio_msix_disable()
957 msix_vector_unuse(&vdev->pdev, i); in vfio_msix_disable()
962 * Always clear MSI-X IRQ index. A PF device could have enabled in vfio_msix_disable()
963 * MSI-X with no vectors. See vfio_msix_enable(). in vfio_msix_disable()
965 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); in vfio_msix_disable()
969 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msix_disable()
972 memset(vdev->msix->pending, 0, in vfio_msix_disable()
973 BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); in vfio_msix_disable()
975 trace_vfio_msix_disable(vdev->vbasedev.name); in vfio_msix_disable()
982 vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); in vfio_msi_disable()
986 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_msi_disable()
989 trace_vfio_msi_disable(vdev->vbasedev.name); in vfio_msi_disable()
996 for (i = 0; i < vdev->nr_vectors; i++) { in vfio_update_msi()
997 VFIOMSIVector *vector = &vdev->msi_vectors[i]; in vfio_update_msi()
1000 if (!vector->use || vector->virq < 0) { in vfio_update_msi()
1004 msg = msi_get_message(&vdev->pdev, i); in vfio_update_msi()
1005 vfio_update_kvm_msi_virq(vector, msg, &vdev->pdev); in vfio_update_msi()
1011 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_load_rom()
1022 error_report("vfio: Error getting ROM info: %s", strerror(-ret)); in vfio_pci_load_rom()
1026 trace_vfio_pci_load_rom(vbasedev->name, (unsigned long)reg_info->size, in vfio_pci_load_rom()
1027 (unsigned long)reg_info->offset, in vfio_pci_load_rom()
1028 (unsigned long)reg_info->flags); in vfio_pci_load_rom()
1030 vdev->rom_size = size = reg_info->size; in vfio_pci_load_rom()
1031 vdev->rom_offset = reg_info->offset; in vfio_pci_load_rom()
1033 if (!vdev->rom_size) { in vfio_pci_load_rom()
1034 vdev->rom_read_failed = true; in vfio_pci_load_rom()
1035 error_report("vfio-pci: Cannot read device rom at %s", vbasedev->name); in vfio_pci_load_rom()
1042 vdev->rom = g_malloc(size); in vfio_pci_load_rom()
1043 memset(vdev->rom, 0xff, size); in vfio_pci_load_rom()
1046 bytes = vbasedev->io_ops->region_read(vbasedev, in vfio_pci_load_rom()
1048 off, size, vdev->rom + off); in vfio_pci_load_rom()
1054 size -= bytes; in vfio_pci_load_rom()
1056 if (bytes == -EINTR || bytes == -EAGAIN) { in vfio_pci_load_rom()
1072 if (pci_get_word(vdev->rom) == 0xaa55 && in vfio_pci_load_rom()
1073 pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && in vfio_pci_load_rom()
1074 !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { in vfio_pci_load_rom()
1077 vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); in vfio_pci_load_rom()
1078 did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); in vfio_pci_load_rom()
1080 if (vid == vdev->vendor_id && did != vdev->device_id) { in vfio_pci_load_rom()
1082 uint8_t csum, *data = vdev->rom; in vfio_pci_load_rom()
1084 pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, in vfio_pci_load_rom()
1085 vdev->device_id); in vfio_pci_load_rom()
1088 for (csum = 0, i = 0; i < vdev->rom_size; i++) { in vfio_pci_load_rom()
1092 data[6] = -csum; in vfio_pci_load_rom()
1101 return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev, in vfio_pci_config_space_read()
1110 return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, in vfio_pci_config_space_write()
1127 if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { in vfio_rom_read()
1131 memcpy(&val, vdev->rom + addr, in vfio_rom_read()
1132 (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); in vfio_rom_read()
1149 trace_vfio_rom_read(vdev->vbasedev.name, addr, size, data); in vfio_rom_read()
1167 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_size_rom()
1171 if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { in vfio_pci_size_rom()
1173 if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) { in vfio_pci_size_rom()
1176 vdev->vbasedev.name); in vfio_pci_size_rom()
1191 error_report("%s(%s) ROM access failed", __func__, vbasedev->name); in vfio_pci_size_rom()
1202 if (vdev->pdev.rom_bar > 0) { in vfio_pci_size_rom()
1205 vdev->vbasedev.name); in vfio_pci_size_rom()
1211 vdev->vbasedev.name); in vfio_pci_size_rom()
1217 trace_vfio_pci_size_rom(vdev->vbasedev.name, size); in vfio_pci_size_rom()
1219 name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); in vfio_pci_size_rom()
1221 memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), in vfio_pci_size_rom()
1225 pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, in vfio_pci_size_rom()
1226 PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); in vfio_pci_size_rom()
1228 vdev->rom_read_failed = false; in vfio_pci_size_rom()
1235 VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); in vfio_vga_write()
1242 off_t offset = vga->fd_offset + region->offset + addr; in vfio_vga_write()
1259 if (pwrite(vga->fd, &buf, size, offset) != size) { in vfio_vga_write()
1261 __func__, region->offset + addr, data, size); in vfio_vga_write()
1264 trace_vfio_vga_write(region->offset + addr, data, size); in vfio_vga_write()
1270 VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); in vfio_vga_read()
1278 off_t offset = vga->fd_offset + region->offset + addr; in vfio_vga_read()
1280 if (pread(vga->fd, &buf, size, offset) != size) { in vfio_vga_read()
1282 __func__, region->offset + addr, size); in vfio_vga_read()
1283 return (uint64_t)-1; in vfio_vga_read()
1301 trace_vfio_vga_read(region->offset + addr, size, data); in vfio_vga_read()
1313 * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
1315 * this BAR to guest. But this sub-page BAR may not occupy an exclusive
1318 * with the sub-page BAR in guest. Besides, we should also recover the
1319 * size of this sub-page BAR when its base address is changed in guest
1325 VFIORegion *region = &vdev->bars[bar].region; in vfio_sub_page_bar_update_mapping()
1329 uint64_t size = region->size; in vfio_sub_page_bar_update_mapping()
1332 if (region->nr_mmaps != 1 || !region->mmaps[0].mmap || in vfio_sub_page_bar_update_mapping()
1333 region->mmaps[0].size != region->size) { in vfio_sub_page_bar_update_mapping()
1337 r = &pdev->io_regions[bar]; in vfio_sub_page_bar_update_mapping()
1338 bar_addr = r->addr; in vfio_sub_page_bar_update_mapping()
1339 base_mr = vdev->bars[bar].mr; in vfio_sub_page_bar_update_mapping()
1340 region_mr = region->mem; in vfio_sub_page_bar_update_mapping()
1341 mmap_mr = ®ion->mmaps[0].mem; in vfio_sub_page_bar_update_mapping()
1351 if (vdev->bars[bar].size < size) { in vfio_sub_page_bar_update_mapping()
1356 if (size != vdev->bars[bar].size && memory_region_is_mapped(base_mr)) { in vfio_sub_page_bar_update_mapping()
1357 memory_region_del_subregion(r->address_space, base_mr); in vfio_sub_page_bar_update_mapping()
1358 memory_region_add_subregion_overlap(r->address_space, in vfio_sub_page_bar_update_mapping()
1371 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_read_config()
1374 memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); in vfio_pci_read_config()
1381 if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { in vfio_pci_read_config()
1387 __func__, vbasedev->name, addr, len, in vfio_pci_read_config()
1389 return -1; in vfio_pci_read_config()
1396 trace_vfio_pci_read_config(vdev->vbasedev.name, addr, len, val); in vfio_pci_read_config()
1405 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_write_config()
1409 trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); in vfio_pci_write_config()
1415 __func__, vbasedev->name, addr, val, len, in vfio_pci_write_config()
1419 /* MSI/MSI-X Enabling/Disabling */ in vfio_pci_write_config()
1420 if (pdev->cap_present & QEMU_PCI_CAP_MSI && in vfio_pci_write_config()
1421 ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) { in vfio_pci_write_config()
1439 } else if (pdev->cap_present & QEMU_PCI_CAP_MSIX && in vfio_pci_write_config()
1440 ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) { in vfio_pci_write_config()
1454 pcibus_t old_addr[PCI_NUM_REGIONS - 1]; in vfio_pci_write_config()
1458 old_addr[bar] = pdev->io_regions[bar].addr; in vfio_pci_write_config()
1464 if (old_addr[bar] != pdev->io_regions[bar].addr && in vfio_pci_write_config()
1465 vdev->bars[bar].region.size > 0 && in vfio_pci_write_config()
1466 vdev->bars[bar].region.size < qemu_real_host_page_size()) { in vfio_pci_write_config()
1486 if (vdev->interrupt == VFIO_INT_MSIX) { in vfio_disable_interrupts()
1488 } else if (vdev->interrupt == VFIO_INT_MSI) { in vfio_disable_interrupts()
1492 if (vdev->interrupt == VFIO_INT_INTx) { in vfio_disable_interrupts()
1517 trace_vfio_msi_setup(vdev->vbasedev.name, pos); in vfio_msi_setup()
1519 ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err); in vfio_msi_setup()
1521 if (ret == -ENOTSUP) { in vfio_msi_setup()
1527 vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); in vfio_msi_setup()
1535 VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; in vfio_pci_fixup_msix_region()
1541 if (vfio_device_has_region_cap(&vdev->vbasedev, region->nr, in vfio_pci_fixup_msix_region()
1550 if (region->nr_mmaps != 1 || region->mmaps[0].offset || in vfio_pci_fixup_msix_region()
1551 region->size != region->mmaps[0].size) { in vfio_pci_fixup_msix_region()
1555 /* MSI-X table start and end aligned to host page size */ in vfio_pci_fixup_msix_region()
1556 start = vdev->msix->table_offset & qemu_real_host_page_mask(); in vfio_pci_fixup_msix_region()
1557 end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + in vfio_pci_fixup_msix_region()
1558 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); in vfio_pci_fixup_msix_region()
1561 * Does the MSI-X table cover the beginning of the BAR? The whole BAR? in vfio_pci_fixup_msix_region()
1562 * NB - Host page size is necessarily a power of two and so is the PCI in vfio_pci_fixup_msix_region()
1568 if (end >= region->size) { in vfio_pci_fixup_msix_region()
1569 region->nr_mmaps = 0; in vfio_pci_fixup_msix_region()
1570 g_free(region->mmaps); in vfio_pci_fixup_msix_region()
1571 region->mmaps = NULL; in vfio_pci_fixup_msix_region()
1572 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1573 vdev->msix->table_bar, 0, 0); in vfio_pci_fixup_msix_region()
1575 region->mmaps[0].offset = end; in vfio_pci_fixup_msix_region()
1576 region->mmaps[0].size = region->size - end; in vfio_pci_fixup_msix_region()
1577 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1578 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1579 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1583 } else if (end >= region->size) { in vfio_pci_fixup_msix_region()
1584 region->mmaps[0].size = start; in vfio_pci_fixup_msix_region()
1585 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1586 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1587 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1591 region->nr_mmaps = 2; in vfio_pci_fixup_msix_region()
1592 region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); in vfio_pci_fixup_msix_region()
1594 memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); in vfio_pci_fixup_msix_region()
1596 region->mmaps[0].size = start; in vfio_pci_fixup_msix_region()
1597 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1598 vdev->msix->table_bar, region->mmaps[0].offset, in vfio_pci_fixup_msix_region()
1599 region->mmaps[0].offset + region->mmaps[0].size); in vfio_pci_fixup_msix_region()
1601 region->mmaps[1].offset = end; in vfio_pci_fixup_msix_region()
1602 region->mmaps[1].size = region->size - end; in vfio_pci_fixup_msix_region()
1603 trace_vfio_msix_fixup(vdev->vbasedev.name, in vfio_pci_fixup_msix_region()
1604 vdev->msix->table_bar, region->mmaps[1].offset, in vfio_pci_fixup_msix_region()
1605 region->mmaps[1].offset + region->mmaps[1].size); in vfio_pci_fixup_msix_region()
1611 int target_bar = -1; in vfio_pci_relocate_msix()
1614 if (!vdev->msix || vdev->msix_relo == OFF_AUTO_PCIBAR_OFF) { in vfio_pci_relocate_msix()
1618 /* The actual minimum size of MSI-X structures */ in vfio_pci_relocate_msix()
1619 msix_sz = (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE) + in vfio_pci_relocate_msix()
1620 (QEMU_ALIGN_UP(vdev->msix->entries, 64) / 8); in vfio_pci_relocate_msix()
1626 if (vdev->msix_relo == OFF_AUTO_PCIBAR_AUTO) { in vfio_pci_relocate_msix()
1636 error_setg(errp, "No automatic MSI-X relocation available for " in vfio_pci_relocate_msix()
1637 "device %04x:%04x", vdev->vendor_id, vdev->device_id); in vfio_pci_relocate_msix()
1641 target_bar = (int)(vdev->msix_relo - OFF_AUTO_PCIBAR_BAR0); in vfio_pci_relocate_msix()
1644 /* I/O port BARs cannot host MSI-X structures */ in vfio_pci_relocate_msix()
1645 if (vdev->bars[target_bar].ioport) { in vfio_pci_relocate_msix()
1646 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1651 /* Cannot use a BAR in the "shadow" of a 64-bit BAR */ in vfio_pci_relocate_msix()
1652 if (!vdev->bars[target_bar].size && in vfio_pci_relocate_msix()
1653 target_bar > 0 && vdev->bars[target_bar - 1].mem64) { in vfio_pci_relocate_msix()
1654 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1655 "consumed by 64-bit BAR %d", target_bar, target_bar - 1); in vfio_pci_relocate_msix()
1659 /* 2GB max size for 32-bit BARs, cannot double if already > 1G */ in vfio_pci_relocate_msix()
1660 if (vdev->bars[target_bar].size > 1 * GiB && in vfio_pci_relocate_msix()
1661 !vdev->bars[target_bar].mem64) { in vfio_pci_relocate_msix()
1662 error_setg(errp, "Invalid MSI-X relocation BAR %d, " in vfio_pci_relocate_msix()
1663 "no space to extend 32-bit BAR", target_bar); in vfio_pci_relocate_msix()
1669 * prefetchable since QEMU MSI-X emulation has no read side effects in vfio_pci_relocate_msix()
1672 if (!vdev->bars[target_bar].size) { in vfio_pci_relocate_msix()
1673 if (target_bar < (PCI_ROM_SLOT - 1) && in vfio_pci_relocate_msix()
1674 !vdev->bars[target_bar + 1].size) { in vfio_pci_relocate_msix()
1675 vdev->bars[target_bar].mem64 = true; in vfio_pci_relocate_msix()
1676 vdev->bars[target_bar].type = PCI_BASE_ADDRESS_MEM_TYPE_64; in vfio_pci_relocate_msix()
1678 vdev->bars[target_bar].type |= PCI_BASE_ADDRESS_MEM_PREFETCH; in vfio_pci_relocate_msix()
1679 vdev->bars[target_bar].size = msix_sz; in vfio_pci_relocate_msix()
1680 vdev->msix->table_offset = 0; in vfio_pci_relocate_msix()
1682 vdev->bars[target_bar].size = MAX(vdev->bars[target_bar].size * 2, in vfio_pci_relocate_msix()
1685 * Due to above size calc, MSI-X always starts halfway into the BAR, in vfio_pci_relocate_msix()
1688 vdev->msix->table_offset = vdev->bars[target_bar].size / 2; in vfio_pci_relocate_msix()
1691 vdev->msix->table_bar = target_bar; in vfio_pci_relocate_msix()
1692 vdev->msix->pba_bar = target_bar; in vfio_pci_relocate_msix()
1693 /* Requires 8-byte alignment, but PCI_MSIX_ENTRY_SIZE guarantees that */ in vfio_pci_relocate_msix()
1694 vdev->msix->pba_offset = vdev->msix->table_offset + in vfio_pci_relocate_msix()
1695 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE); in vfio_pci_relocate_msix()
1697 trace_vfio_msix_relo(vdev->vbasedev.name, in vfio_pci_relocate_msix()
1698 vdev->msix->table_bar, vdev->msix->table_offset); in vfio_pci_relocate_msix()
1704 * capabilities into the chain. In order to setup MSI-X we need a
1706 * attempt to mmap the MSI-X table area, which VFIO won't allow, we
1707 * need to first look for where the MSI-X table lives. So we
1708 * unfortunately split MSI-X setup across two functions.
1719 pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); in vfio_msix_early_setup()
1752 msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1753 msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1754 msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1755 msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; in vfio_msix_early_setup()
1756 msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; in vfio_msix_early_setup()
1758 ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, in vfio_msix_early_setup()
1761 error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); in vfio_msix_early_setup()
1766 msix->noresize = !!(irq_info.flags & VFIO_IRQ_INFO_NORESIZE); in vfio_msix_early_setup()
1773 if (msix->pba_offset >= vdev->bars[msix->pba_bar].region.size) { in vfio_msix_early_setup()
1780 if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO && in vfio_msix_early_setup()
1781 (vdev->device_id & 0xff00) == 0x5800) { in vfio_msix_early_setup()
1782 msix->pba_offset = 0x1000; in vfio_msix_early_setup()
1790 msix->pba_offset = 0xb400; in vfio_msix_early_setup()
1791 } else if (vdev->msix_relo == OFF_AUTO_PCIBAR_OFF) { in vfio_msix_early_setup()
1799 trace_vfio_msix_early_setup(vdev->vbasedev.name, pos, msix->table_bar, in vfio_msix_early_setup()
1800 msix->table_offset, msix->entries, in vfio_msix_early_setup()
1801 msix->noresize); in vfio_msix_early_setup()
1802 vdev->msix = msix; in vfio_msix_early_setup()
1814 vdev->msix->pending = g_new0(unsigned long, in vfio_msix_setup()
1815 BITS_TO_LONGS(vdev->msix->entries)); in vfio_msix_setup()
1816 ret = msix_init(&vdev->pdev, vdev->msix->entries, in vfio_msix_setup()
1817 vdev->bars[vdev->msix->table_bar].mr, in vfio_msix_setup()
1818 vdev->msix->table_bar, vdev->msix->table_offset, in vfio_msix_setup()
1819 vdev->bars[vdev->msix->pba_bar].mr, in vfio_msix_setup()
1820 vdev->msix->pba_bar, vdev->msix->pba_offset, pos, in vfio_msix_setup()
1823 if (ret == -ENOTSUP) { in vfio_msix_setup()
1834 * MSI-X structures and avoid overlapping non-MSI-X related registers. in vfio_msix_setup()
1835 * For an assigned device, this hopefully means that emulation of MSI-X in vfio_msix_setup()
1844 * here and only enable it if a masked vector fires through QEMU. As the in vfio_msix_setup()
1845 * vector-use notifier is called, which occurs on unmask, we test whether in vfio_msix_setup()
1848 memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); in vfio_msix_setup()
1859 "vfio-no-msix-emulation", NULL)) { in vfio_msix_setup()
1860 memory_region_set_enabled(&vdev->pdev.msix_table_mmio, false); in vfio_msix_setup()
1868 msi_uninit(&vdev->pdev); in vfio_pci_teardown_msi()
1870 if (vdev->msix) { in vfio_pci_teardown_msi()
1871 msix_uninit(&vdev->pdev, in vfio_pci_teardown_msi()
1872 vdev->bars[vdev->msix->table_bar].mr, in vfio_pci_teardown_msi()
1873 vdev->bars[vdev->msix->pba_bar].mr); in vfio_pci_teardown_msi()
1874 g_free(vdev->msix->pending); in vfio_pci_teardown_msi()
1886 vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); in vfio_mmap_set_enabled()
1892 VFIOBAR *bar = &vdev->bars[nr]; in vfio_bar_prepare()
1898 if (!bar->region.size) { in vfio_bar_prepare()
1911 bar->ioport = (pci_bar & PCI_BASE_ADDRESS_SPACE_IO); in vfio_bar_prepare()
1912 bar->mem64 = bar->ioport ? 0 : (pci_bar & PCI_BASE_ADDRESS_MEM_TYPE_64); in vfio_bar_prepare()
1913 bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : in vfio_bar_prepare()
1915 bar->size = bar->region.size; in vfio_bar_prepare()
1918 bar->region.post_wr = (bar->ioport == 0); in vfio_bar_prepare()
1932 VFIOBAR *bar = &vdev->bars[nr]; in vfio_bar_register()
1935 if (!bar->size) { in vfio_bar_register()
1939 bar->mr = g_new0(MemoryRegion, 1); in vfio_bar_register()
1940 name = g_strdup_printf("%s base BAR %d", vdev->vbasedev.name, nr); in vfio_bar_register()
1941 memory_region_init_io(bar->mr, OBJECT(vdev), NULL, NULL, name, bar->size); in vfio_bar_register()
1944 if (bar->region.size) { in vfio_bar_register()
1945 memory_region_add_subregion(bar->mr, 0, bar->region.mem); in vfio_bar_register()
1947 if (vfio_region_mmap(&bar->region)) { in vfio_bar_register()
1949 vdev->vbasedev.name, nr); in vfio_bar_register()
1953 pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr); in vfio_bar_register()
1970 VFIOBAR *bar = &vdev->bars[i]; in vfio_pci_bars_exit()
1973 vfio_region_exit(&bar->region); in vfio_pci_bars_exit()
1974 if (bar->region.size) { in vfio_pci_bars_exit()
1975 memory_region_del_subregion(bar->mr, bar->region.mem); in vfio_pci_bars_exit()
1979 if (vdev->vga) { in vfio_pci_bars_exit()
1980 pci_unregister_vga(&vdev->pdev); in vfio_pci_bars_exit()
1990 VFIOBAR *bar = &vdev->bars[i]; in vfio_bars_finalize()
1993 vfio_region_finalize(&bar->region); in vfio_bars_finalize()
1994 if (bar->mr) { in vfio_bars_finalize()
1995 assert(bar->size); in vfio_bars_finalize()
1996 object_unparent(OBJECT(bar->mr)); in vfio_bars_finalize()
1997 g_free(bar->mr); in vfio_bars_finalize()
1998 bar->mr = NULL; in vfio_bars_finalize()
2002 if (vdev->vga) { in vfio_bars_finalize()
2004 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { in vfio_bars_finalize()
2005 object_unparent(OBJECT(&vdev->vga->region[i].mem)); in vfio_bars_finalize()
2007 g_free(vdev->vga); in vfio_bars_finalize()
2019 for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp; in vfio_std_cap_max_size()
2020 tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]) { in vfio_std_cap_max_size()
2026 return next - pos; in vfio_std_cap_max_size()
2041 return next - pos; in vfio_ext_cap_max_size()
2052 vfio_set_word_bits(vdev->pdev.config + pos, val, mask); in vfio_add_emulated_word()
2053 vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask); in vfio_add_emulated_word()
2054 vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask); in vfio_add_emulated_word()
2065 vfio_set_long_bits(vdev->pdev.config + pos, val, mask); in vfio_add_emulated_long()
2066 vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask); in vfio_add_emulated_long()
2067 vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); in vfio_add_emulated_long()
2074 PCIBus *bus = pci_get_bus(&vdev->pdev); in vfio_pci_enable_rp_atomics()
2075 PCIDevice *parent = bus->parent_dev; in vfio_pci_enable_rp_atomics()
2087 if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap || in vfio_pci_enable_rp_atomics()
2090 vdev->pdev.devfn || in vfio_pci_enable_rp_atomics()
2091 vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { in vfio_pci_enable_rp_atomics()
2095 pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; in vfio_pci_enable_rp_atomics()
2104 info = vfio_get_device_info(vdev->vbasedev.fd); in vfio_pci_enable_rp_atomics()
2115 if (cap->flags & VFIO_PCI_ATOMIC_COMP32) { in vfio_pci_enable_rp_atomics()
2118 if (cap->flags & VFIO_PCI_ATOMIC_COMP64) { in vfio_pci_enable_rp_atomics()
2121 if (cap->flags & VFIO_PCI_ATOMIC_COMP128) { in vfio_pci_enable_rp_atomics()
2130 vdev->clear_parent_atomics_on_exit = true; in vfio_pci_enable_rp_atomics()
2135 if (vdev->clear_parent_atomics_on_exit) { in vfio_pci_disable_rp_atomics()
2136 PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev; in vfio_pci_disable_rp_atomics()
2137 uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; in vfio_pci_disable_rp_atomics()
2151 flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS); in vfio_setup_pcie_cap()
2163 if (!pci_bus_is_express(pci_get_bus(&vdev->pdev))) { in vfio_setup_pcie_cap()
2164 PCIBus *bus = pci_get_bus(&vdev->pdev); in vfio_setup_pcie_cap()
2169 * as-is on non-express buses. The reason being that some drivers in vfio_setup_pcie_cap()
2180 * valid transitions between bus types. An express device on a non- in vfio_setup_pcie_cap()
2196 } else if (pci_bus_is_root(pci_get_bus(&vdev->pdev))) { in vfio_setup_pcie_cap()
2253 * Intel 82599 SR-IOV VFs report an invalid PCIe capability version 0 in vfio_setup_pcie_cap()
2264 pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size, in vfio_setup_pcie_cap()
2270 vdev->pdev.exp.exp_cap = pos; in vfio_setup_pcie_cap()
2277 uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); in vfio_check_pcie_flr()
2280 trace_vfio_check_pcie_flr(vdev->vbasedev.name); in vfio_check_pcie_flr()
2281 vdev->has_flr = true; in vfio_check_pcie_flr()
2287 uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); in vfio_check_pm_reset()
2290 trace_vfio_check_pm_reset(vdev->vbasedev.name); in vfio_check_pm_reset()
2291 vdev->has_pm_reset = true; in vfio_check_pm_reset()
2297 uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); in vfio_check_af_flr()
2300 trace_vfio_check_af_flr(vdev->vbasedev.name); in vfio_check_af_flr()
2301 vdev->has_flr = true; in vfio_check_af_flr()
2308 PCIDevice *pdev = &vdev->pdev; in vfio_add_vendor_specific_cap()
2320 if (vdev->skip_vsc_check && size > 3) { in vfio_add_vendor_specific_cap()
2321 memset(pdev->cmask + pos + 3, 0, size - 3); in vfio_add_vendor_specific_cap()
2330 PCIDevice *pdev = &vdev->pdev; in vfio_add_std_cap()
2334 cap_id = pdev->config[pos]; in vfio_add_std_cap()
2335 next = pdev->config[pos + PCI_CAP_LIST_NEXT]; in vfio_add_std_cap()
2349 * This is also why we pre-calculate size above as cached config space in vfio_add_std_cap()
2358 pdev->config[PCI_CAPABILITY_LIST] = 0; in vfio_add_std_cap()
2359 vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff; in vfio_add_std_cap()
2360 vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST; in vfio_add_std_cap()
2371 pci_set_byte(vdev->emulated_config_bits + pos + PCI_CAP_LIST_NEXT, 0xff); in vfio_add_std_cap()
2388 * PCI-core config space emulation needs write access to the power in vfio_add_std_cap()
2391 pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK); in vfio_add_std_cap()
2419 ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); in vfio_setup_rebar_ecap()
2426 ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); in vfio_setup_rebar_ecap()
2438 * might need an opt-in or reservation scheme in the kernel. in vfio_setup_rebar_ecap()
2441 return -EINVAL; in vfio_setup_rebar_ecap()
2464 PCIDevice *pdev = &vdev->pdev; in vfio_add_ext_cap()
2472 !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) { in vfio_add_ext_cap()
2482 config = g_memdup(pdev->config, vdev->config_size); in vfio_add_ext_cap()
2498 * capability ID, version, AND next pointer. A non-zero next pointer in vfio_add_ext_cap()
2508 pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE, in vfio_add_ext_cap()
2510 pci_set_long(pdev->wmask + PCI_CONFIG_SPACE_SIZE, 0); in vfio_add_ext_cap()
2511 pci_set_long(vdev->emulated_config_bits + PCI_CONFIG_SPACE_SIZE, ~0); in vfio_add_ext_cap()
2528 pci_long_test_and_set_mask(vdev->emulated_config_bits + next, in vfio_add_ext_cap()
2533 case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ in vfio_add_ext_cap()
2535 trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); in vfio_add_ext_cap()
2549 if (pci_get_word(pdev->config + PCI_CONFIG_SPACE_SIZE) == 0xFFFF) { in vfio_add_ext_cap()
2550 pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0); in vfio_add_ext_cap()
2558 PCIDevice *pdev = &vdev->pdev; in vfio_pci_add_capabilities()
2560 if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) || in vfio_pci_add_capabilities()
2561 !pdev->config[PCI_CAPABILITY_LIST]) { in vfio_pci_add_capabilities()
2565 if (!vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST], errp)) { in vfio_pci_add_capabilities()
2575 PCIDevice *pdev = &vdev->pdev; in vfio_pci_pre_reset()
2590 if (pdev->pm_cap) { in vfio_pci_pre_reset()
2594 pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); in vfio_pci_pre_reset()
2598 vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); in vfio_pci_pre_reset()
2600 pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); in vfio_pci_pre_reset()
2612 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_post_reset()
2617 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_pci_post_reset()
2620 for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { in vfio_pci_post_reset()
2628 vbasedev->name, nr, strwriteerror(ret)); in vfio_pci_post_reset()
2639 sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, in vfio_pci_host_match()
2640 addr->bus, addr->slot, addr->function); in vfio_pci_host_match()
2654 info->argsz = sizeof(*info); in vfio_pci_get_pci_hot_reset_info()
2656 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); in vfio_pci_get_pci_hot_reset_info()
2658 ret = -errno; in vfio_pci_get_pci_hot_reset_info()
2660 if (!vdev->has_pm_reset) { in vfio_pci_get_pci_hot_reset_info()
2662 "no available reset mechanism.", vdev->vbasedev.name); in vfio_pci_get_pci_hot_reset_info()
2667 count = info->count; in vfio_pci_get_pci_hot_reset_info()
2668 info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); in vfio_pci_get_pci_hot_reset_info()
2669 info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); in vfio_pci_get_pci_hot_reset_info()
2671 ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); in vfio_pci_get_pci_hot_reset_info()
2673 ret = -errno; in vfio_pci_get_pci_hot_reset_info()
2685 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_hot_reset()
2686 const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer); in vfio_pci_hot_reset()
2688 return vioc->pci_hot_reset(vbasedev, single); in vfio_pci_hot_reset()
2692 * We want to differentiate hot reset of multiple in-use devices vs hot reset
2693 * of a single in-use device. VFIO_DEVICE_RESET will already handle the case
2694 * of doing hot resets when there is only a single device per bus. The in-use
2696 * multiple devices, but only a single in-use device, means that we can call
2697 * it from our bus ->reset() callback since the extent is effectively a single
2699 * are multiple in-use devices, we can only trigger the hot reset during a
2702 * path where both our reset handler and ->reset() callback are used. Calling
2703 * _one() will only do a hot reset for the one in-use devices case, calling
2720 if (!vbasedev->reset_works || (!vdev->has_flr && vdev->has_pm_reset)) { in vfio_pci_compute_needs_reset()
2721 vbasedev->needs_reset = true; in vfio_pci_compute_needs_reset()
2750 return vdev->ramfb_migrate == ON_OFF_AUTO_ON || in vfio_display_migration_needed()
2751 (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO && vdev->enable_ramfb); in vfio_display_migration_needed()
2792 PCIDevice *pdev = &vdev->pdev; in vfio_pci_load_config()
2793 pcibus_t old_addr[PCI_NUM_REGIONS - 1]; in vfio_pci_load_config()
2797 old_addr[bar] = pdev->io_regions[bar].addr; in vfio_pci_load_config()
2806 pci_get_word(pdev->config + PCI_COMMAND), 2); in vfio_pci_load_config()
2813 if (old_addr[bar] != pdev->io_regions[bar].addr && in vfio_pci_load_config()
2814 vdev->bars[bar].region.size > 0 && in vfio_pci_load_config()
2815 vdev->bars[bar].region.size < qemu_real_host_page_size()) { in vfio_pci_load_config()
2840 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_populate_vga()
2846 error_setg_errno(errp, -ret, in vfio_populate_vga()
2852 if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || in vfio_populate_vga()
2853 !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || in vfio_populate_vga()
2854 reg_info->size < 0xbffff + 1) { in vfio_populate_vga()
2856 (unsigned long)reg_info->flags, in vfio_populate_vga()
2857 (unsigned long)reg_info->size); in vfio_populate_vga()
2861 vdev->vga = g_new0(VFIOVGA, 1); in vfio_populate_vga()
2863 vdev->vga->fd_offset = reg_info->offset; in vfio_populate_vga()
2864 vdev->vga->fd = vdev->vbasedev.fd; in vfio_populate_vga()
2866 vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; in vfio_populate_vga()
2867 vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; in vfio_populate_vga()
2868 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); in vfio_populate_vga()
2870 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, in vfio_populate_vga()
2872 &vdev->vga->region[QEMU_PCI_VGA_MEM], in vfio_populate_vga()
2873 "vfio-vga-mmio@0xa0000", in vfio_populate_vga()
2876 vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; in vfio_populate_vga()
2877 vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; in vfio_populate_vga()
2878 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); in vfio_populate_vga()
2880 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, in vfio_populate_vga()
2882 &vdev->vga->region[QEMU_PCI_VGA_IO_LO], in vfio_populate_vga()
2883 "vfio-vga-io@0x3b0", in vfio_populate_vga()
2886 vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; in vfio_populate_vga()
2887 vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; in vfio_populate_vga()
2888 QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); in vfio_populate_vga()
2890 memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, in vfio_populate_vga()
2892 &vdev->vga->region[QEMU_PCI_VGA_IO_HI], in vfio_populate_vga()
2893 "vfio-vga-io@0x3c0", in vfio_populate_vga()
2896 pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, in vfio_populate_vga()
2897 &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, in vfio_populate_vga()
2898 &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); in vfio_populate_vga()
2905 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_populate_device()
2908 int i, ret = -1; in vfio_pci_populate_device()
2911 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PCI)) { in vfio_pci_populate_device()
2916 if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { in vfio_pci_populate_device()
2918 vbasedev->num_regions); in vfio_pci_populate_device()
2922 if (vbasedev->num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { in vfio_pci_populate_device()
2923 error_setg(errp, "unexpected number of irqs %u", vbasedev->num_irqs); in vfio_pci_populate_device()
2928 char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); in vfio_pci_populate_device()
2931 &vdev->bars[i].region, i, name); in vfio_pci_populate_device()
2935 error_setg_errno(errp, -ret, "failed to get region %d info", i); in vfio_pci_populate_device()
2939 QLIST_INIT(&vdev->bars[i].quirks); in vfio_pci_populate_device()
2945 error_setg_errno(errp, -ret, "failed to get config info"); in vfio_pci_populate_device()
2949 trace_vfio_pci_populate_device_config(vdev->vbasedev.name, in vfio_pci_populate_device()
2950 (unsigned long)reg_info->size, in vfio_pci_populate_device()
2951 (unsigned long)reg_info->offset, in vfio_pci_populate_device()
2952 (unsigned long)reg_info->flags); in vfio_pci_populate_device()
2954 vdev->config_size = reg_info->size; in vfio_pci_populate_device()
2955 if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { in vfio_pci_populate_device()
2956 vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; in vfio_pci_populate_device()
2958 vdev->config_offset = reg_info->offset; in vfio_pci_populate_device()
2960 if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { in vfio_pci_populate_device()
2963 "requested feature x-vga\n"); in vfio_pci_populate_device()
2971 trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret)); in vfio_pci_populate_device()
2973 vdev->pci_aer = true; in vfio_pci_populate_device()
2976 "Could not enable error recovery for the device", in vfio_pci_populate_device()
2977 vbasedev->name); in vfio_pci_populate_device()
2987 g_free(vdev->emulated_config_bits); in vfio_pci_put_device()
2988 g_free(vdev->rom); in vfio_pci_put_device()
2994 * g_free(vdev->igd_opregion); in vfio_pci_put_device()
2997 vfio_device_detach(&vdev->vbasedev); in vfio_pci_put_device()
2999 vfio_device_free_name(&vdev->vbasedev); in vfio_pci_put_device()
3000 g_free(vdev->msix); in vfio_pci_put_device()
3007 if (!event_notifier_test_and_clear(&vdev->err_notifier)) { in vfio_err_notifier_handler()
3020 …etected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); in vfio_err_notifier_handler()
3036 if (!vdev->pci_aer) { in vfio_pci_register_err_notifier()
3040 if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0, in vfio_pci_register_err_notifier()
3043 vdev->pci_aer = false; in vfio_pci_register_err_notifier()
3047 fd = event_notifier_get_fd(&vdev->err_notifier); in vfio_pci_register_err_notifier()
3055 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, in vfio_pci_register_err_notifier()
3057 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_pci_register_err_notifier()
3059 vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); in vfio_pci_register_err_notifier()
3060 vdev->pci_aer = false; in vfio_pci_register_err_notifier()
3068 if (!vdev->pci_aer) { in vfio_unregister_err_notifier()
3072 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, in vfio_unregister_err_notifier()
3073 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { in vfio_unregister_err_notifier()
3074 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_unregister_err_notifier()
3076 qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier), in vfio_unregister_err_notifier()
3078 vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); in vfio_unregister_err_notifier()
3086 if (!event_notifier_test_and_clear(&vdev->req_notifier)) { in vfio_req_notifier_handler()
3092 warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_req_notifier_handler()
3103 if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) { in vfio_pci_register_req_notifier()
3107 ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, in vfio_pci_register_req_notifier()
3113 if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0, in vfio_pci_register_req_notifier()
3119 fd = event_notifier_get_fd(&vdev->req_notifier); in vfio_pci_register_req_notifier()
3124 vdev->req_enabled = true; in vfio_pci_register_req_notifier()
3128 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, in vfio_pci_register_req_notifier()
3130 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_pci_register_req_notifier()
3132 vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); in vfio_pci_register_req_notifier()
3134 vdev->req_enabled = true; in vfio_pci_register_req_notifier()
3142 if (!vdev->req_enabled) { in vfio_unregister_req_notifier()
3146 if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, in vfio_unregister_req_notifier()
3147 VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { in vfio_unregister_req_notifier()
3148 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); in vfio_unregister_req_notifier()
3150 qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier), in vfio_unregister_req_notifier()
3152 vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); in vfio_unregister_req_notifier()
3154 vdev->req_enabled = false; in vfio_unregister_req_notifier()
3159 PCIDevice *pdev = &vdev->pdev; in vfio_pci_config_setup()
3160 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_config_setup()
3164 config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); in vfio_pci_config_setup()
3168 vdev->pdev.config); in vfio_pci_config_setup()
3170 ret = ret < 0 ? -ret : EFAULT; in vfio_pci_config_setup()
3176 vdev->emulated_config_bits = g_malloc0(vdev->config_size); in vfio_pci_config_setup()
3179 memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); in vfio_pci_config_setup()
3181 memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); in vfio_pci_config_setup()
3185 * device ID is managed by the vendor and need only be a 16-bit value. in vfio_pci_config_setup()
3186 * Allow any 16-bit value for subsystem so they can be hidden or changed. in vfio_pci_config_setup()
3188 if (vdev->vendor_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3189 if (vdev->vendor_id >= 0xffff) { in vfio_pci_config_setup()
3193 vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); in vfio_pci_config_setup()
3194 trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); in vfio_pci_config_setup()
3196 vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); in vfio_pci_config_setup()
3199 if (vdev->device_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3200 if (vdev->device_id > 0xffff) { in vfio_pci_config_setup()
3204 vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); in vfio_pci_config_setup()
3205 trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); in vfio_pci_config_setup()
3207 vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); in vfio_pci_config_setup()
3210 if (vdev->sub_vendor_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3211 if (vdev->sub_vendor_id > 0xffff) { in vfio_pci_config_setup()
3216 vdev->sub_vendor_id, ~0); in vfio_pci_config_setup()
3217 trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, in vfio_pci_config_setup()
3218 vdev->sub_vendor_id); in vfio_pci_config_setup()
3221 if (vdev->sub_device_id != PCI_ANY_ID) { in vfio_pci_config_setup()
3222 if (vdev->sub_device_id > 0xffff) { in vfio_pci_config_setup()
3226 vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); in vfio_pci_config_setup()
3227 trace_vfio_pci_emulated_sub_device_id(vbasedev->name, in vfio_pci_config_setup()
3228 vdev->sub_device_id); in vfio_pci_config_setup()
3231 /* QEMU can change multi-function devices to single function, or reverse */ in vfio_pci_config_setup()
3232 vdev->emulated_config_bits[PCI_HEADER_TYPE] = in vfio_pci_config_setup()
3236 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { in vfio_pci_config_setup()
3237 vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; in vfio_pci_config_setup()
3239 vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; in vfio_pci_config_setup()
3247 memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); in vfio_pci_config_setup()
3248 memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); in vfio_pci_config_setup()
3265 PCIDevice *pdev = &vdev->pdev; in vfio_pci_interrupt_setup()
3268 if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { in vfio_pci_interrupt_setup()
3269 memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, in vfio_pci_interrupt_setup()
3273 if (pdev->cap_present & QEMU_PCI_CAP_MSI) { in vfio_pci_interrupt_setup()
3274 memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, in vfio_pci_interrupt_setup()
3275 vdev->msi_cap_size); in vfio_pci_interrupt_setup()
3278 if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { in vfio_pci_interrupt_setup()
3279 vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, in vfio_pci_interrupt_setup()
3281 pci_device_set_intx_routing_notifier(&vdev->pdev, in vfio_pci_interrupt_setup()
3283 vdev->irqchip_change_notifier.notify = vfio_irqchip_change; in vfio_pci_interrupt_setup()
3284 kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); in vfio_pci_interrupt_setup()
3292 timer_free(vdev->intx.mmap_timer); in vfio_pci_interrupt_setup()
3293 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_pci_interrupt_setup()
3294 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_pci_interrupt_setup()
3305 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_pci_realize()
3310 if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { in vfio_pci_realize()
3311 if (!(~vdev->host.domain || ~vdev->host.bus || in vfio_pci_realize()
3312 ~vdev->host.slot || ~vdev->host.function)) { in vfio_pci_realize()
3314 error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " in vfio_pci_realize()
3316 "or -device vfio-pci,fd=DEVICE_FD " in vfio_pci_realize()
3318 "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); in vfio_pci_realize()
3321 vbasedev->sysfsdev = in vfio_pci_realize()
3323 vdev->host.domain, vdev->host.bus, in vfio_pci_realize()
3324 vdev->host.slot, vdev->host.function); in vfio_pci_realize()
3335 * the x-balloon-allowed option unless this is minimally an mdev device. in vfio_pci_realize()
3337 vbasedev->mdev = vfio_device_is_mdev(vbasedev); in vfio_pci_realize()
3339 trace_vfio_mdev(vbasedev->name, vbasedev->mdev); in vfio_pci_realize()
3341 if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) { in vfio_pci_realize()
3342 error_setg(errp, "x-balloon-allowed only potentially compatible " in vfio_pci_realize()
3347 if (!qemu_uuid_is_null(&vdev->vf_token)) { in vfio_pci_realize()
3348 qemu_uuid_unparse(&vdev->vf_token, uuid); in vfio_pci_realize()
3349 name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); in vfio_pci_realize()
3351 name = g_strdup(vbasedev->name); in vfio_pci_realize()
3367 if (!vbasedev->mdev && in vfio_pci_realize()
3368 !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { in vfio_pci_realize()
3381 if (vdev->vga) { in vfio_pci_realize()
3393 if (vdev->display != ON_OFF_AUTO_OFF) { in vfio_pci_realize()
3398 if (vdev->enable_ramfb && vdev->dpy == NULL) { in vfio_pci_realize()
3402 if (vdev->display_xres || vdev->display_yres) { in vfio_pci_realize()
3403 if (vdev->dpy == NULL) { in vfio_pci_realize()
3407 if (vdev->dpy->edid_regs == NULL) { in vfio_pci_realize()
3413 if (vdev->ramfb_migrate == ON_OFF_AUTO_ON && !vdev->enable_ramfb) { in vfio_pci_realize()
3414 warn_report("x-ramfb-migrate=on but ramfb=off. " in vfio_pci_realize()
3415 "Forcing x-ramfb-migrate to off."); in vfio_pci_realize()
3416 vdev->ramfb_migrate = ON_OFF_AUTO_OFF; in vfio_pci_realize()
3418 if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { in vfio_pci_realize()
3419 if (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO) { in vfio_pci_realize()
3420 vdev->ramfb_migrate = ON_OFF_AUTO_OFF; in vfio_pci_realize()
3421 } else if (vdev->ramfb_migrate == ON_OFF_AUTO_ON) { in vfio_pci_realize()
3422 error_setg(errp, "x-ramfb-migrate requires enable-migration"); in vfio_pci_realize()
3427 if (!pdev->failover_pair_id) { in vfio_pci_realize()
3440 if (vdev->interrupt == VFIO_INT_INTx) { in vfio_pci_realize()
3443 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_pci_realize()
3444 if (vdev->irqchip_change_notifier.notify) { in vfio_pci_realize()
3445 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_pci_realize()
3447 if (vdev->intx.mmap_timer) { in vfio_pci_realize()
3448 timer_free(vdev->intx.mmap_timer); in vfio_pci_realize()
3451 if (!vbasedev->mdev) { in vfio_pci_realize()
3458 error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); in vfio_pci_realize()
3471 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_exitfn()
3475 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); in vfio_exitfn()
3476 if (vdev->irqchip_change_notifier.notify) { in vfio_exitfn()
3477 kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); in vfio_exitfn()
3480 if (vdev->intx.mmap_timer) { in vfio_exitfn()
3481 timer_free(vdev->intx.mmap_timer); in vfio_exitfn()
3487 if (!vbasedev->mdev) { in vfio_exitfn()
3501 trace_vfio_pci_reset(vdev->vbasedev.name); in vfio_pci_reset()
3505 if (vdev->display != ON_OFF_AUTO_OFF) { in vfio_pci_reset()
3509 if (vdev->resetfn && !vdev->resetfn(vdev)) { in vfio_pci_reset()
3513 if (vdev->vbasedev.reset_works && in vfio_pci_reset()
3514 (vdev->has_flr || !vdev->has_pm_reset) && in vfio_pci_reset()
3515 !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { in vfio_pci_reset()
3516 trace_vfio_pci_reset_flr(vdev->vbasedev.name); in vfio_pci_reset()
3526 if (vdev->vbasedev.reset_works && vdev->has_pm_reset && in vfio_pci_reset()
3527 !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) { in vfio_pci_reset()
3528 trace_vfio_pci_reset_pm(vdev->vbasedev.name); in vfio_pci_reset()
3540 VFIODevice *vbasedev = &vdev->vbasedev; in vfio_instance_init()
3542 device_add_bootindex_property(obj, &vdev->bootindex, in vfio_instance_init()
3544 &pci_dev->qdev); in vfio_instance_init()
3545 vdev->host.domain = ~0U; in vfio_instance_init()
3546 vdev->host.bus = ~0U; in vfio_instance_init()
3547 vdev->host.slot = ~0U; in vfio_instance_init()
3548 vdev->host.function = ~0U; in vfio_instance_init()
3553 vdev->nv_gpudirect_clique = 0xFF; in vfio_instance_init()
3557 pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; in vfio_instance_init()
3564 pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; in vfio_instance_init()
3572 dc->desc = "VFIO PCI base device"; in vfio_pci_base_dev_class_init()
3573 set_bit(DEVICE_CATEGORY_MISC, dc->categories); in vfio_pci_base_dev_class_init()
3574 pdc->exit = vfio_exitfn; in vfio_pci_base_dev_class_init()
3575 pdc->config_read = vfio_pci_read_config; in vfio_pci_base_dev_class_init()
3576 pdc->config_write = vfio_pci_write_config; in vfio_pci_base_dev_class_init()
3596 DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
3598 DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
3601 DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
3608 DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
3610 DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
3612 DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
3614 DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
3616 DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features,
3618 DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice,
3620 DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
3622 DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
3626 DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
3628 DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
3629 DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
3631 DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
3632 DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
3633 DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
3634 DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
3636 DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd,
3638 DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
3640 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
3641 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
3642 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
3644 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
3646 DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
3647 DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
3650 DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
3656 DEFINE_PROP_BOOL("skip-vsc-check", VFIOPCIDevice, skip_vsc_check, true),
3663 vfio_device_set_fd(&vdev->vbasedev, str, errp); in vfio_pci_set_fd()
3677 dc->vmsd = &vfio_cpr_pci_vmstate; in vfio_pci_dev_class_init()
3678 dc->desc = "VFIO-based PCI device assignment"; in vfio_pci_dev_class_init()
3679 pdc->realize = vfio_pci_realize; in vfio_pci_dev_class_init()
3685 "x-intx-mmap-timeout-ms", in vfio_pci_dev_class_init()
3687 "(milliseconds) to re-enable device direct access " in vfio_pci_dev_class_init()
3690 "x-vga", in vfio_pci_dev_class_init()
3693 "x-req", in vfio_pci_dev_class_init()
3696 "x-no-mmap", in vfio_pci_dev_class_init()
3700 "x-no-kvm-intx", in vfio_pci_dev_class_init()
3701 "Disable direct VFIO->KVM INTx injection. Allows to " in vfio_pci_dev_class_init()
3704 "x-no-kvm-msi", in vfio_pci_dev_class_init()
3705 "Disable direct VFIO->KVM MSI injection. Allows to " in vfio_pci_dev_class_init()
3708 "x-no-kvm-msix", in vfio_pci_dev_class_init()
3709 "Disable direct VFIO->KVM MSIx injection. Allows to " in vfio_pci_dev_class_init()
3712 "x-pci-vendor-id", in vfio_pci_dev_class_init()
3715 "x-pci-device-id", in vfio_pci_dev_class_init()
3718 "x-pci-sub-vendor-id", in vfio_pci_dev_class_init()
3722 "x-pci-sub-device-id", in vfio_pci_dev_class_init()
3729 "x-igd-opregion", in vfio_pci_dev_class_init()
3732 "x-igd-gms", in vfio_pci_dev_class_init()
3735 "x-nv-gpudirect-clique", in vfio_pci_dev_class_init()
3737 "clique for device [0-15]"); in vfio_pci_dev_class_init()
3739 "x-no-geforce-quirks", in vfio_pci_dev_class_init()
3744 "Enable display support for device, ex. vGPU"); in vfio_pci_dev_class_init()
3746 "x-msix-relocation", in vfio_pci_dev_class_init()
3747 "Specify MSI-X MMIO relocation to the end of specified " in vfio_pci_dev_class_init()
3751 "x-no-kvm-ioeventfd", in vfio_pci_dev_class_init()
3754 "x-no-vfio-ioeventfd", in vfio_pci_dev_class_init()
3758 "x-balloon-allowed", in vfio_pci_dev_class_init()
3767 "x-pre-copy-dirty-page-tracking", in vfio_pci_dev_class_init()
3770 object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */ in vfio_pci_dev_class_init()
3771 "enable-migration", in vfio_pci_dev_class_init()
3775 "vf-token", in vfio_pci_dev_class_init()
3784 "x-device-dirty-page-tracking", in vfio_pci_dev_class_init()
3786 "container-based dirty page tracking"); in vfio_pci_dev_class_init()
3788 "migration-events", in vfio_pci_dev_class_init()
3792 "skip-vsc-check", in vfio_pci_dev_class_init()
3797 "x-migration-multifd-transfer", in vfio_pci_dev_class_init()
3812 DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate,
3822 dc->hotpluggable = false; in vfio_pci_nohotplug_dev_class_init()
3826 "Enable ramfb to provide pre-boot graphics for devices " in vfio_pci_nohotplug_dev_class_init()
3829 "x-ramfb-migrate", in vfio_pci_nohotplug_dev_class_init()
3844 * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can in register_vfio_pci_dev_type()