16078a454SJean-Philippe Brucker #include "kvm/irq.h" 26078a454SJean-Philippe Brucker #include "kvm/kvm.h" 36078a454SJean-Philippe Brucker #include "kvm/kvm-cpu.h" 46078a454SJean-Philippe Brucker #include "kvm/vfio.h" 56078a454SJean-Philippe Brucker 66078a454SJean-Philippe Brucker #include <sys/ioctl.h> 76078a454SJean-Philippe Brucker #include <sys/eventfd.h> 8*c9888d95SJean-Philippe Brucker #include <sys/resource.h> 9*c9888d95SJean-Philippe Brucker #include <sys/time.h> 106078a454SJean-Philippe Brucker 116078a454SJean-Philippe Brucker /* Wrapper around UAPI vfio_irq_set */ 126078a454SJean-Philippe Brucker struct vfio_irq_eventfd { 136078a454SJean-Philippe Brucker struct vfio_irq_set irq; 146078a454SJean-Philippe Brucker int fd; 156078a454SJean-Philippe Brucker }; 166078a454SJean-Philippe Brucker 17*c9888d95SJean-Philippe Brucker #define msi_is_enabled(state) ((state) & VFIO_PCI_MSI_STATE_ENABLED) 18*c9888d95SJean-Philippe Brucker #define msi_is_masked(state) ((state) & VFIO_PCI_MSI_STATE_MASKED) 19*c9888d95SJean-Philippe Brucker #define msi_is_empty(state) ((state) & VFIO_PCI_MSI_STATE_EMPTY) 20*c9888d95SJean-Philippe Brucker 21*c9888d95SJean-Philippe Brucker #define msi_update_state(state, val, bit) \ 22*c9888d95SJean-Philippe Brucker (state) = (val) ? (state) | bit : (state) & ~bit; 23*c9888d95SJean-Philippe Brucker #define msi_set_enabled(state, val) \ 24*c9888d95SJean-Philippe Brucker msi_update_state(state, val, VFIO_PCI_MSI_STATE_ENABLED) 25*c9888d95SJean-Philippe Brucker #define msi_set_masked(state, val) \ 26*c9888d95SJean-Philippe Brucker msi_update_state(state, val, VFIO_PCI_MSI_STATE_MASKED) 27*c9888d95SJean-Philippe Brucker #define msi_set_empty(state, val) \ 28*c9888d95SJean-Philippe Brucker msi_update_state(state, val, VFIO_PCI_MSI_STATE_EMPTY) 29*c9888d95SJean-Philippe Brucker 30*c9888d95SJean-Philippe Brucker static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev); 31*c9888d95SJean-Philippe Brucker 32*c9888d95SJean-Philippe Brucker static int vfio_pci_enable_msis(struct kvm *kvm, struct vfio_device *vdev) 33*c9888d95SJean-Philippe Brucker { 34*c9888d95SJean-Philippe Brucker size_t i; 35*c9888d95SJean-Philippe Brucker int ret = 0; 36*c9888d95SJean-Philippe Brucker int *eventfds; 37*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 38*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_common *msis = &pdev->msix; 39*c9888d95SJean-Philippe Brucker struct vfio_irq_eventfd single = { 40*c9888d95SJean-Philippe Brucker .irq = { 41*c9888d95SJean-Philippe Brucker .argsz = sizeof(single), 42*c9888d95SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_EVENTFD | 43*c9888d95SJean-Philippe Brucker VFIO_IRQ_SET_ACTION_TRIGGER, 44*c9888d95SJean-Philippe Brucker .index = msis->info.index, 45*c9888d95SJean-Philippe Brucker .count = 1, 46*c9888d95SJean-Philippe Brucker }, 47*c9888d95SJean-Philippe Brucker }; 48*c9888d95SJean-Philippe Brucker 49*c9888d95SJean-Philippe Brucker if (!msi_is_enabled(msis->virt_state)) 50*c9888d95SJean-Philippe Brucker return 0; 51*c9888d95SJean-Philippe Brucker 52*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) { 53*c9888d95SJean-Philippe Brucker /* 54*c9888d95SJean-Philippe Brucker * PCI (and VFIO) forbids enabling INTx, MSI or MSIX at the same 55*c9888d95SJean-Philippe Brucker * time. Since INTx has to be enabled from the start (we don't 56*c9888d95SJean-Philippe Brucker * have a reliable way to know when the user starts using it), 57*c9888d95SJean-Philippe Brucker * disable it now. 58*c9888d95SJean-Philippe Brucker */ 59*c9888d95SJean-Philippe Brucker vfio_pci_disable_intx(kvm, vdev); 60*c9888d95SJean-Philippe Brucker /* Permanently disable INTx */ 61*c9888d95SJean-Philippe Brucker pdev->irq_modes &= ~VFIO_PCI_IRQ_MODE_INTX; 62*c9888d95SJean-Philippe Brucker } 63*c9888d95SJean-Philippe Brucker 64*c9888d95SJean-Philippe Brucker eventfds = (void *)msis->irq_set + sizeof(struct vfio_irq_set); 65*c9888d95SJean-Philippe Brucker 66*c9888d95SJean-Philippe Brucker /* 67*c9888d95SJean-Philippe Brucker * Initial registration of the full range. This enables the physical 68*c9888d95SJean-Philippe Brucker * MSI/MSI-X capability, which might have desired side effects. For 69*c9888d95SJean-Philippe Brucker * instance when assigning virtio legacy devices, enabling the MSI 70*c9888d95SJean-Philippe Brucker * capability modifies the config space layout! 71*c9888d95SJean-Philippe Brucker * 72*c9888d95SJean-Philippe Brucker * As an optimization, only update MSIs when guest unmasks the 73*c9888d95SJean-Philippe Brucker * capability. This greatly reduces the initialization time for Linux 74*c9888d95SJean-Philippe Brucker * guest with 2048+ MSIs. Linux guest starts by enabling the MSI-X cap 75*c9888d95SJean-Philippe Brucker * masked, then fills individual vectors, then unmasks the whole 76*c9888d95SJean-Philippe Brucker * function. So we only do one VFIO ioctl when enabling for the first 77*c9888d95SJean-Philippe Brucker * time, and then one when unmasking. 78*c9888d95SJean-Philippe Brucker * 79*c9888d95SJean-Philippe Brucker * phys_state is empty when it is enabled but no vector has been 80*c9888d95SJean-Philippe Brucker * registered via SET_IRQS yet. 81*c9888d95SJean-Philippe Brucker */ 82*c9888d95SJean-Philippe Brucker if (!msi_is_enabled(msis->phys_state) || 83*c9888d95SJean-Philippe Brucker (!msi_is_masked(msis->virt_state) && 84*c9888d95SJean-Philippe Brucker msi_is_empty(msis->phys_state))) { 85*c9888d95SJean-Philippe Brucker bool empty = true; 86*c9888d95SJean-Philippe Brucker 87*c9888d95SJean-Philippe Brucker for (i = 0; i < msis->nr_entries; i++) { 88*c9888d95SJean-Philippe Brucker eventfds[i] = msis->entries[i].gsi >= 0 ? 89*c9888d95SJean-Philippe Brucker msis->entries[i].eventfd : -1; 90*c9888d95SJean-Philippe Brucker 91*c9888d95SJean-Philippe Brucker if (eventfds[i] >= 0) 92*c9888d95SJean-Philippe Brucker empty = false; 93*c9888d95SJean-Philippe Brucker } 94*c9888d95SJean-Philippe Brucker 95*c9888d95SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, msis->irq_set); 96*c9888d95SJean-Philippe Brucker if (ret < 0) { 97*c9888d95SJean-Philippe Brucker perror("VFIO_DEVICE_SET_IRQS(multi)"); 98*c9888d95SJean-Philippe Brucker return ret; 99*c9888d95SJean-Philippe Brucker } 100*c9888d95SJean-Philippe Brucker 101*c9888d95SJean-Philippe Brucker msi_set_enabled(msis->phys_state, true); 102*c9888d95SJean-Philippe Brucker msi_set_empty(msis->phys_state, empty); 103*c9888d95SJean-Philippe Brucker 104*c9888d95SJean-Philippe Brucker return 0; 105*c9888d95SJean-Philippe Brucker } 106*c9888d95SJean-Philippe Brucker 107*c9888d95SJean-Philippe Brucker if (msi_is_masked(msis->virt_state)) { 108*c9888d95SJean-Philippe Brucker /* TODO: if phys_state is not empty nor masked, mask all vectors */ 109*c9888d95SJean-Philippe Brucker return 0; 110*c9888d95SJean-Philippe Brucker } 111*c9888d95SJean-Philippe Brucker 112*c9888d95SJean-Philippe Brucker /* Update individual vectors to avoid breaking those in use */ 113*c9888d95SJean-Philippe Brucker for (i = 0; i < msis->nr_entries; i++) { 114*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_entry *entry = &msis->entries[i]; 115*c9888d95SJean-Philippe Brucker int fd = entry->gsi >= 0 ? entry->eventfd : -1; 116*c9888d95SJean-Philippe Brucker 117*c9888d95SJean-Philippe Brucker if (fd == eventfds[i]) 118*c9888d95SJean-Philippe Brucker continue; 119*c9888d95SJean-Philippe Brucker 120*c9888d95SJean-Philippe Brucker single.irq.start = i; 121*c9888d95SJean-Philippe Brucker single.fd = fd; 122*c9888d95SJean-Philippe Brucker 123*c9888d95SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &single); 124*c9888d95SJean-Philippe Brucker if (ret < 0) { 125*c9888d95SJean-Philippe Brucker perror("VFIO_DEVICE_SET_IRQS(single)"); 126*c9888d95SJean-Philippe Brucker break; 127*c9888d95SJean-Philippe Brucker } 128*c9888d95SJean-Philippe Brucker 129*c9888d95SJean-Philippe Brucker eventfds[i] = fd; 130*c9888d95SJean-Philippe Brucker 131*c9888d95SJean-Philippe Brucker if (msi_is_empty(msis->phys_state) && fd >= 0) 132*c9888d95SJean-Philippe Brucker msi_set_empty(msis->phys_state, false); 133*c9888d95SJean-Philippe Brucker } 134*c9888d95SJean-Philippe Brucker 135*c9888d95SJean-Philippe Brucker return ret; 136*c9888d95SJean-Philippe Brucker } 137*c9888d95SJean-Philippe Brucker 138*c9888d95SJean-Philippe Brucker static int vfio_pci_disable_msis(struct kvm *kvm, struct vfio_device *vdev) 139*c9888d95SJean-Philippe Brucker { 140*c9888d95SJean-Philippe Brucker int ret; 141*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 142*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_common *msis = &pdev->msix; 143*c9888d95SJean-Philippe Brucker struct vfio_irq_set irq_set = { 144*c9888d95SJean-Philippe Brucker .argsz = sizeof(irq_set), 145*c9888d95SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 146*c9888d95SJean-Philippe Brucker .index = msis->info.index, 147*c9888d95SJean-Philippe Brucker .start = 0, 148*c9888d95SJean-Philippe Brucker .count = 0, 149*c9888d95SJean-Philippe Brucker }; 150*c9888d95SJean-Philippe Brucker 151*c9888d95SJean-Philippe Brucker if (!msi_is_enabled(msis->phys_state)) 152*c9888d95SJean-Philippe Brucker return 0; 153*c9888d95SJean-Philippe Brucker 154*c9888d95SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 155*c9888d95SJean-Philippe Brucker if (ret < 0) { 156*c9888d95SJean-Philippe Brucker perror("VFIO_DEVICE_SET_IRQS(NONE)"); 157*c9888d95SJean-Philippe Brucker return ret; 158*c9888d95SJean-Philippe Brucker } 159*c9888d95SJean-Philippe Brucker 160*c9888d95SJean-Philippe Brucker msi_set_enabled(msis->phys_state, false); 161*c9888d95SJean-Philippe Brucker msi_set_empty(msis->phys_state, true); 162*c9888d95SJean-Philippe Brucker 163*c9888d95SJean-Philippe Brucker return 0; 164*c9888d95SJean-Philippe Brucker } 165*c9888d95SJean-Philippe Brucker 166*c9888d95SJean-Philippe Brucker static int vfio_pci_update_msi_entry(struct kvm *kvm, struct vfio_device *vdev, 167*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_entry *entry) 168*c9888d95SJean-Philippe Brucker { 169*c9888d95SJean-Philippe Brucker int ret; 170*c9888d95SJean-Philippe Brucker 171*c9888d95SJean-Philippe Brucker if (entry->eventfd < 0) { 172*c9888d95SJean-Philippe Brucker entry->eventfd = eventfd(0, 0); 173*c9888d95SJean-Philippe Brucker if (entry->eventfd < 0) { 174*c9888d95SJean-Philippe Brucker ret = -errno; 175*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot create eventfd"); 176*c9888d95SJean-Philippe Brucker return ret; 177*c9888d95SJean-Philippe Brucker } 178*c9888d95SJean-Philippe Brucker } 179*c9888d95SJean-Philippe Brucker 180*c9888d95SJean-Philippe Brucker /* Allocate IRQ if necessary */ 181*c9888d95SJean-Philippe Brucker if (entry->gsi < 0) { 182*c9888d95SJean-Philippe Brucker int ret = irq__add_msix_route(kvm, &entry->config.msg, 183*c9888d95SJean-Philippe Brucker vdev->dev_hdr.dev_num << 3); 184*c9888d95SJean-Philippe Brucker if (ret < 0) { 185*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot create MSI-X route"); 186*c9888d95SJean-Philippe Brucker return ret; 187*c9888d95SJean-Philippe Brucker } 188*c9888d95SJean-Philippe Brucker entry->gsi = ret; 189*c9888d95SJean-Philippe Brucker } else { 190*c9888d95SJean-Philippe Brucker irq__update_msix_route(kvm, entry->gsi, &entry->config.msg); 191*c9888d95SJean-Philippe Brucker } 192*c9888d95SJean-Philippe Brucker 193*c9888d95SJean-Philippe Brucker /* 194*c9888d95SJean-Philippe Brucker * MSI masking is unimplemented in VFIO, so we have to handle it by 195*c9888d95SJean-Philippe Brucker * disabling/enabling IRQ route instead. We do it on the KVM side rather 196*c9888d95SJean-Philippe Brucker * than VFIO, because: 197*c9888d95SJean-Philippe Brucker * - it is 8x faster 198*c9888d95SJean-Philippe Brucker * - it allows to decouple masking logic from capability state. 199*c9888d95SJean-Philippe Brucker * - in masked state, after removing irqfd route, we could easily plug 200*c9888d95SJean-Philippe Brucker * the eventfd in a local handler, in order to serve Pending Bit reads 201*c9888d95SJean-Philippe Brucker * to the guest. 202*c9888d95SJean-Philippe Brucker * 203*c9888d95SJean-Philippe Brucker * So entry->phys_state is masked when there is no active irqfd route. 204*c9888d95SJean-Philippe Brucker */ 205*c9888d95SJean-Philippe Brucker if (msi_is_masked(entry->virt_state) == msi_is_masked(entry->phys_state)) 206*c9888d95SJean-Philippe Brucker return 0; 207*c9888d95SJean-Philippe Brucker 208*c9888d95SJean-Philippe Brucker if (msi_is_masked(entry->phys_state)) { 209*c9888d95SJean-Philippe Brucker ret = irq__add_irqfd(kvm, entry->gsi, entry->eventfd, -1); 210*c9888d95SJean-Philippe Brucker if (ret < 0) { 211*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot setup irqfd"); 212*c9888d95SJean-Philippe Brucker return ret; 213*c9888d95SJean-Philippe Brucker } 214*c9888d95SJean-Philippe Brucker } else { 215*c9888d95SJean-Philippe Brucker irq__del_irqfd(kvm, entry->gsi, entry->eventfd); 216*c9888d95SJean-Philippe Brucker } 217*c9888d95SJean-Philippe Brucker 218*c9888d95SJean-Philippe Brucker msi_set_masked(entry->phys_state, msi_is_masked(entry->virt_state)); 219*c9888d95SJean-Philippe Brucker 220*c9888d95SJean-Philippe Brucker return 0; 221*c9888d95SJean-Philippe Brucker } 222*c9888d95SJean-Philippe Brucker 223*c9888d95SJean-Philippe Brucker static void vfio_pci_msix_pba_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, 224*c9888d95SJean-Philippe Brucker u32 len, u8 is_write, void *ptr) 225*c9888d95SJean-Philippe Brucker { 226*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = ptr; 227*c9888d95SJean-Philippe Brucker struct vfio_pci_msix_pba *pba = &pdev->msix_pba; 228*c9888d95SJean-Philippe Brucker u64 offset = addr - pba->guest_phys_addr; 229*c9888d95SJean-Philippe Brucker struct vfio_device *vdev = container_of(pdev, struct vfio_device, pci); 230*c9888d95SJean-Philippe Brucker 231*c9888d95SJean-Philippe Brucker if (is_write) 232*c9888d95SJean-Philippe Brucker return; 233*c9888d95SJean-Philippe Brucker 234*c9888d95SJean-Philippe Brucker /* 235*c9888d95SJean-Philippe Brucker * TODO: emulate PBA. Hardware MSI-X is never masked, so reading the PBA 236*c9888d95SJean-Philippe Brucker * is completely useless here. Note that Linux doesn't use PBA. 237*c9888d95SJean-Philippe Brucker */ 238*c9888d95SJean-Philippe Brucker if (pread(vdev->fd, data, len, pba->offset + offset) != (ssize_t)len) 239*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot access MSIX PBA\n"); 240*c9888d95SJean-Philippe Brucker } 241*c9888d95SJean-Philippe Brucker 242*c9888d95SJean-Philippe Brucker static void vfio_pci_msix_table_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, 243*c9888d95SJean-Philippe Brucker u32 len, u8 is_write, void *ptr) 244*c9888d95SJean-Philippe Brucker { 245*c9888d95SJean-Philippe Brucker struct kvm *kvm = vcpu->kvm; 246*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_entry *entry; 247*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = ptr; 248*c9888d95SJean-Philippe Brucker struct vfio_device *vdev = container_of(pdev, struct vfio_device, pci); 249*c9888d95SJean-Philippe Brucker 250*c9888d95SJean-Philippe Brucker u64 offset = addr - pdev->msix_table.guest_phys_addr; 251*c9888d95SJean-Philippe Brucker 252*c9888d95SJean-Philippe Brucker size_t vector = offset / PCI_MSIX_ENTRY_SIZE; 253*c9888d95SJean-Philippe Brucker off_t field = offset % PCI_MSIX_ENTRY_SIZE; 254*c9888d95SJean-Philippe Brucker 255*c9888d95SJean-Philippe Brucker /* 256*c9888d95SJean-Philippe Brucker * PCI spec says that software must use aligned 4 or 8 bytes accesses 257*c9888d95SJean-Philippe Brucker * for the MSI-X tables. 258*c9888d95SJean-Philippe Brucker */ 259*c9888d95SJean-Philippe Brucker if ((len != 4 && len != 8) || addr & (len - 1)) { 260*c9888d95SJean-Philippe Brucker vfio_dev_warn(vdev, "invalid MSI-X table access"); 261*c9888d95SJean-Philippe Brucker return; 262*c9888d95SJean-Philippe Brucker } 263*c9888d95SJean-Philippe Brucker 264*c9888d95SJean-Philippe Brucker entry = &pdev->msix.entries[vector]; 265*c9888d95SJean-Philippe Brucker 266*c9888d95SJean-Philippe Brucker mutex_lock(&pdev->msix.mutex); 267*c9888d95SJean-Philippe Brucker 268*c9888d95SJean-Philippe Brucker if (!is_write) { 269*c9888d95SJean-Philippe Brucker memcpy(data, (void *)&entry->config + field, len); 270*c9888d95SJean-Philippe Brucker goto out_unlock; 271*c9888d95SJean-Philippe Brucker } 272*c9888d95SJean-Philippe Brucker 273*c9888d95SJean-Philippe Brucker memcpy((void *)&entry->config + field, data, len); 274*c9888d95SJean-Philippe Brucker 275*c9888d95SJean-Philippe Brucker /* 276*c9888d95SJean-Philippe Brucker * Check if access touched the vector control register, which is at the 277*c9888d95SJean-Philippe Brucker * end of the MSI-X entry. 278*c9888d95SJean-Philippe Brucker */ 279*c9888d95SJean-Philippe Brucker if (field + len <= PCI_MSIX_ENTRY_VECTOR_CTRL) 280*c9888d95SJean-Philippe Brucker goto out_unlock; 281*c9888d95SJean-Philippe Brucker 282*c9888d95SJean-Philippe Brucker msi_set_masked(entry->virt_state, entry->config.ctrl & 283*c9888d95SJean-Philippe Brucker PCI_MSIX_ENTRY_CTRL_MASKBIT); 284*c9888d95SJean-Philippe Brucker 285*c9888d95SJean-Philippe Brucker if (vfio_pci_update_msi_entry(kvm, vdev, entry) < 0) 286*c9888d95SJean-Philippe Brucker /* Not much we can do here. */ 287*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "failed to configure MSIX vector %zu", vector); 288*c9888d95SJean-Philippe Brucker 289*c9888d95SJean-Philippe Brucker /* Update the physical capability if necessary */ 290*c9888d95SJean-Philippe Brucker if (vfio_pci_enable_msis(kvm, vdev)) 291*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot enable MSIX"); 292*c9888d95SJean-Philippe Brucker 293*c9888d95SJean-Philippe Brucker out_unlock: 294*c9888d95SJean-Philippe Brucker mutex_unlock(&pdev->msix.mutex); 295*c9888d95SJean-Philippe Brucker } 296*c9888d95SJean-Philippe Brucker 297*c9888d95SJean-Philippe Brucker static void vfio_pci_msix_cap_write(struct kvm *kvm, 298*c9888d95SJean-Philippe Brucker struct vfio_device *vdev, u8 off, 299*c9888d95SJean-Philippe Brucker void *data, int sz) 300*c9888d95SJean-Philippe Brucker { 301*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 302*c9888d95SJean-Philippe Brucker off_t enable_pos = PCI_MSIX_FLAGS + 1; 303*c9888d95SJean-Philippe Brucker bool enable; 304*c9888d95SJean-Philippe Brucker u16 flags; 305*c9888d95SJean-Philippe Brucker 306*c9888d95SJean-Philippe Brucker off -= pdev->msix.pos; 307*c9888d95SJean-Philippe Brucker 308*c9888d95SJean-Philippe Brucker /* Check if access intersects with the MSI-X Enable bit */ 309*c9888d95SJean-Philippe Brucker if (off > enable_pos || off + sz <= enable_pos) 310*c9888d95SJean-Philippe Brucker return; 311*c9888d95SJean-Philippe Brucker 312*c9888d95SJean-Philippe Brucker /* Read byte that contains the Enable bit */ 313*c9888d95SJean-Philippe Brucker flags = *(u8 *)(data + enable_pos - off) << 8; 314*c9888d95SJean-Philippe Brucker 315*c9888d95SJean-Philippe Brucker mutex_lock(&pdev->msix.mutex); 316*c9888d95SJean-Philippe Brucker 317*c9888d95SJean-Philippe Brucker msi_set_masked(pdev->msix.virt_state, flags & PCI_MSIX_FLAGS_MASKALL); 318*c9888d95SJean-Philippe Brucker enable = flags & PCI_MSIX_FLAGS_ENABLE; 319*c9888d95SJean-Philippe Brucker msi_set_enabled(pdev->msix.virt_state, enable); 320*c9888d95SJean-Philippe Brucker 321*c9888d95SJean-Philippe Brucker if (enable && vfio_pci_enable_msis(kvm, vdev)) 322*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot enable MSIX"); 323*c9888d95SJean-Philippe Brucker else if (!enable && vfio_pci_disable_msis(kvm, vdev)) 324*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "cannot disable MSIX"); 325*c9888d95SJean-Philippe Brucker 326*c9888d95SJean-Philippe Brucker mutex_unlock(&pdev->msix.mutex); 327*c9888d95SJean-Philippe Brucker } 328*c9888d95SJean-Philippe Brucker 3296078a454SJean-Philippe Brucker static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr, 3306078a454SJean-Philippe Brucker u8 offset, void *data, int sz) 3316078a454SJean-Philippe Brucker { 3326078a454SJean-Philippe Brucker struct vfio_region_info *info; 3336078a454SJean-Philippe Brucker struct vfio_pci_device *pdev; 3346078a454SJean-Philippe Brucker struct vfio_device *vdev; 3356078a454SJean-Philippe Brucker char base[sz]; 3366078a454SJean-Philippe Brucker 3376078a454SJean-Philippe Brucker pdev = container_of(pci_hdr, struct vfio_pci_device, hdr); 3386078a454SJean-Philippe Brucker vdev = container_of(pdev, struct vfio_device, pci); 3396078a454SJean-Philippe Brucker info = &vdev->regions[VFIO_PCI_CONFIG_REGION_INDEX].info; 3406078a454SJean-Philippe Brucker 3416078a454SJean-Philippe Brucker /* Dummy read in case of side-effects */ 3426078a454SJean-Philippe Brucker if (pread(vdev->fd, base, sz, info->offset + offset) != sz) 3436078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "failed to read %d bytes from Configuration Space at 0x%x", 3446078a454SJean-Philippe Brucker sz, offset); 3456078a454SJean-Philippe Brucker } 3466078a454SJean-Philippe Brucker 3476078a454SJean-Philippe Brucker static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hdr, 3486078a454SJean-Philippe Brucker u8 offset, void *data, int sz) 3496078a454SJean-Philippe Brucker { 3506078a454SJean-Philippe Brucker struct vfio_region_info *info; 3516078a454SJean-Philippe Brucker struct vfio_pci_device *pdev; 3526078a454SJean-Philippe Brucker struct vfio_device *vdev; 3536078a454SJean-Philippe Brucker void *base = pci_hdr; 3546078a454SJean-Philippe Brucker 3556078a454SJean-Philippe Brucker pdev = container_of(pci_hdr, struct vfio_pci_device, hdr); 3566078a454SJean-Philippe Brucker vdev = container_of(pdev, struct vfio_device, pci); 3576078a454SJean-Philippe Brucker info = &vdev->regions[VFIO_PCI_CONFIG_REGION_INDEX].info; 3586078a454SJean-Philippe Brucker 3596078a454SJean-Philippe Brucker if (pwrite(vdev->fd, data, sz, info->offset + offset) != sz) 3606078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "Failed to write %d bytes to Configuration Space at 0x%x", 3616078a454SJean-Philippe Brucker sz, offset); 3626078a454SJean-Philippe Brucker 363*c9888d95SJean-Philippe Brucker /* Handle MSI write now, since it might update the hardware capability */ 364*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) 365*c9888d95SJean-Philippe Brucker vfio_pci_msix_cap_write(kvm, vdev, offset, data, sz); 366*c9888d95SJean-Philippe Brucker 3676078a454SJean-Philippe Brucker if (pread(vdev->fd, base + offset, sz, info->offset + offset) != sz) 3686078a454SJean-Philippe Brucker vfio_dev_warn(vdev, "Failed to read %d bytes from Configuration Space at 0x%x", 3696078a454SJean-Philippe Brucker sz, offset); 3706078a454SJean-Philippe Brucker } 3716078a454SJean-Philippe Brucker 372*c9888d95SJean-Philippe Brucker static ssize_t vfio_pci_cap_size(struct pci_cap_hdr *cap_hdr) 373*c9888d95SJean-Philippe Brucker { 374*c9888d95SJean-Philippe Brucker switch (cap_hdr->type) { 375*c9888d95SJean-Philippe Brucker case PCI_CAP_ID_MSIX: 376*c9888d95SJean-Philippe Brucker return PCI_CAP_MSIX_SIZEOF; 377*c9888d95SJean-Philippe Brucker default: 378*c9888d95SJean-Philippe Brucker pr_err("unknown PCI capability 0x%x", cap_hdr->type); 379*c9888d95SJean-Philippe Brucker return 0; 380*c9888d95SJean-Philippe Brucker } 381*c9888d95SJean-Philippe Brucker } 382*c9888d95SJean-Philippe Brucker 383*c9888d95SJean-Philippe Brucker static int vfio_pci_add_cap(struct vfio_device *vdev, u8 *virt_hdr, 384*c9888d95SJean-Philippe Brucker struct pci_cap_hdr *cap, off_t pos) 385*c9888d95SJean-Philippe Brucker { 386*c9888d95SJean-Philippe Brucker struct pci_cap_hdr *last; 387*c9888d95SJean-Philippe Brucker struct pci_device_header *hdr = &vdev->pci.hdr; 388*c9888d95SJean-Philippe Brucker 389*c9888d95SJean-Philippe Brucker cap->next = 0; 390*c9888d95SJean-Philippe Brucker 391*c9888d95SJean-Philippe Brucker if (!hdr->capabilities) { 392*c9888d95SJean-Philippe Brucker hdr->capabilities = pos; 393*c9888d95SJean-Philippe Brucker hdr->status |= PCI_STATUS_CAP_LIST; 394*c9888d95SJean-Philippe Brucker } else { 395*c9888d95SJean-Philippe Brucker last = PCI_CAP(virt_hdr, hdr->capabilities); 396*c9888d95SJean-Philippe Brucker 397*c9888d95SJean-Philippe Brucker while (last->next) 398*c9888d95SJean-Philippe Brucker last = PCI_CAP(virt_hdr, last->next); 399*c9888d95SJean-Philippe Brucker 400*c9888d95SJean-Philippe Brucker last->next = pos; 401*c9888d95SJean-Philippe Brucker } 402*c9888d95SJean-Philippe Brucker 403*c9888d95SJean-Philippe Brucker memcpy(virt_hdr + pos, cap, vfio_pci_cap_size(cap)); 404*c9888d95SJean-Philippe Brucker 405*c9888d95SJean-Philippe Brucker return 0; 406*c9888d95SJean-Philippe Brucker } 407*c9888d95SJean-Philippe Brucker 4086078a454SJean-Philippe Brucker static int vfio_pci_parse_caps(struct vfio_device *vdev) 4096078a454SJean-Philippe Brucker { 410*c9888d95SJean-Philippe Brucker int ret; 411*c9888d95SJean-Philippe Brucker size_t size; 412*c9888d95SJean-Philippe Brucker u8 pos, next; 413*c9888d95SJean-Philippe Brucker struct pci_cap_hdr *cap; 414*c9888d95SJean-Philippe Brucker u8 virt_hdr[PCI_DEV_CFG_SIZE]; 4156078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 4166078a454SJean-Philippe Brucker 4176078a454SJean-Philippe Brucker if (!(pdev->hdr.status & PCI_STATUS_CAP_LIST)) 4186078a454SJean-Philippe Brucker return 0; 4196078a454SJean-Philippe Brucker 420*c9888d95SJean-Philippe Brucker memset(virt_hdr, 0, PCI_DEV_CFG_SIZE); 421*c9888d95SJean-Philippe Brucker 422*c9888d95SJean-Philippe Brucker pos = pdev->hdr.capabilities & ~3; 423*c9888d95SJean-Philippe Brucker 4246078a454SJean-Philippe Brucker pdev->hdr.status &= ~PCI_STATUS_CAP_LIST; 4256078a454SJean-Philippe Brucker pdev->hdr.capabilities = 0; 4266078a454SJean-Philippe Brucker 427*c9888d95SJean-Philippe Brucker for (; pos; pos = next) { 428*c9888d95SJean-Philippe Brucker if (pos >= PCI_DEV_CFG_SIZE) { 429*c9888d95SJean-Philippe Brucker vfio_dev_warn(vdev, "ignoring cap outside of config space"); 430*c9888d95SJean-Philippe Brucker return -EINVAL; 431*c9888d95SJean-Philippe Brucker } 432*c9888d95SJean-Philippe Brucker 433*c9888d95SJean-Philippe Brucker cap = PCI_CAP(&pdev->hdr, pos); 434*c9888d95SJean-Philippe Brucker next = cap->next; 435*c9888d95SJean-Philippe Brucker 436*c9888d95SJean-Philippe Brucker switch (cap->type) { 437*c9888d95SJean-Philippe Brucker case PCI_CAP_ID_MSIX: 438*c9888d95SJean-Philippe Brucker ret = vfio_pci_add_cap(vdev, virt_hdr, cap, pos); 439*c9888d95SJean-Philippe Brucker if (ret) 440*c9888d95SJean-Philippe Brucker return ret; 441*c9888d95SJean-Philippe Brucker 442*c9888d95SJean-Philippe Brucker pdev->msix.pos = pos; 443*c9888d95SJean-Philippe Brucker pdev->irq_modes |= VFIO_PCI_IRQ_MODE_MSIX; 444*c9888d95SJean-Philippe Brucker break; 445*c9888d95SJean-Philippe Brucker } 446*c9888d95SJean-Philippe Brucker } 447*c9888d95SJean-Philippe Brucker 448*c9888d95SJean-Philippe Brucker /* Wipe remaining capabilities */ 449*c9888d95SJean-Philippe Brucker pos = PCI_STD_HEADER_SIZEOF; 450*c9888d95SJean-Philippe Brucker size = PCI_DEV_CFG_SIZE - PCI_STD_HEADER_SIZEOF; 451*c9888d95SJean-Philippe Brucker memcpy((void *)&pdev->hdr + pos, virt_hdr + pos, size); 4526078a454SJean-Philippe Brucker 4536078a454SJean-Philippe Brucker return 0; 4546078a454SJean-Philippe Brucker } 4556078a454SJean-Philippe Brucker 4566078a454SJean-Philippe Brucker static int vfio_pci_parse_cfg_space(struct vfio_device *vdev) 4576078a454SJean-Philippe Brucker { 458*c9888d95SJean-Philippe Brucker ssize_t sz = PCI_DEV_CFG_SIZE; 4596078a454SJean-Philippe Brucker struct vfio_region_info *info; 4606078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 4616078a454SJean-Philippe Brucker 4626078a454SJean-Philippe Brucker if (vdev->info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) { 4636078a454SJean-Philippe Brucker vfio_dev_err(vdev, "Config Space not found"); 4646078a454SJean-Philippe Brucker return -ENODEV; 4656078a454SJean-Philippe Brucker } 4666078a454SJean-Philippe Brucker 4676078a454SJean-Philippe Brucker info = &vdev->regions[VFIO_PCI_CONFIG_REGION_INDEX].info; 4686078a454SJean-Philippe Brucker *info = (struct vfio_region_info) { 4696078a454SJean-Philippe Brucker .argsz = sizeof(*info), 4706078a454SJean-Philippe Brucker .index = VFIO_PCI_CONFIG_REGION_INDEX, 4716078a454SJean-Philippe Brucker }; 4726078a454SJean-Philippe Brucker 4736078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, info); 4746078a454SJean-Philippe Brucker if (!info->size) { 4756078a454SJean-Philippe Brucker vfio_dev_err(vdev, "Config Space has size zero?!"); 4766078a454SJean-Philippe Brucker return -EINVAL; 4776078a454SJean-Philippe Brucker } 4786078a454SJean-Philippe Brucker 479*c9888d95SJean-Philippe Brucker /* Read standard headers and capabilities */ 4806078a454SJean-Philippe Brucker if (pread(vdev->fd, &pdev->hdr, sz, info->offset) != sz) { 4816078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to read %zd bytes of Config Space", sz); 4826078a454SJean-Philippe Brucker return -EIO; 4836078a454SJean-Philippe Brucker } 4846078a454SJean-Philippe Brucker 4856078a454SJean-Philippe Brucker /* Strip bit 7, that indicates multifunction */ 4866078a454SJean-Philippe Brucker pdev->hdr.header_type &= 0x7f; 4876078a454SJean-Philippe Brucker 4886078a454SJean-Philippe Brucker if (pdev->hdr.header_type != PCI_HEADER_TYPE_NORMAL) { 4896078a454SJean-Philippe Brucker vfio_dev_err(vdev, "unsupported header type %u", 4906078a454SJean-Philippe Brucker pdev->hdr.header_type); 4916078a454SJean-Philippe Brucker return -EOPNOTSUPP; 4926078a454SJean-Philippe Brucker } 4936078a454SJean-Philippe Brucker 494*c9888d95SJean-Philippe Brucker if (pdev->hdr.irq_pin) 495*c9888d95SJean-Philippe Brucker pdev->irq_modes |= VFIO_PCI_IRQ_MODE_INTX; 496*c9888d95SJean-Philippe Brucker 4976078a454SJean-Philippe Brucker vfio_pci_parse_caps(vdev); 4986078a454SJean-Philippe Brucker 4996078a454SJean-Philippe Brucker return 0; 5006078a454SJean-Philippe Brucker } 5016078a454SJean-Philippe Brucker 5026078a454SJean-Philippe Brucker static int vfio_pci_fixup_cfg_space(struct vfio_device *vdev) 5036078a454SJean-Philippe Brucker { 5046078a454SJean-Philippe Brucker int i; 5056078a454SJean-Philippe Brucker ssize_t hdr_sz; 506*c9888d95SJean-Philippe Brucker struct msix_cap *msix; 5076078a454SJean-Philippe Brucker struct vfio_region_info *info; 5086078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 5096078a454SJean-Philippe Brucker 5106078a454SJean-Philippe Brucker /* Enable exclusively MMIO and bus mastering */ 5116078a454SJean-Philippe Brucker pdev->hdr.command &= ~PCI_COMMAND_IO; 5126078a454SJean-Philippe Brucker pdev->hdr.command |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; 5136078a454SJean-Philippe Brucker 5146078a454SJean-Philippe Brucker /* Initialise the BARs */ 5156078a454SJean-Philippe Brucker for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) { 5166078a454SJean-Philippe Brucker struct vfio_region *region = &vdev->regions[i]; 5176078a454SJean-Philippe Brucker u64 base = region->guest_phys_addr; 5186078a454SJean-Philippe Brucker 5196078a454SJean-Philippe Brucker if (!base) 5206078a454SJean-Philippe Brucker continue; 5216078a454SJean-Philippe Brucker 5226078a454SJean-Philippe Brucker pdev->hdr.bar_size[i] = region->info.size; 5236078a454SJean-Philippe Brucker 5246078a454SJean-Philippe Brucker /* Construct a fake reg to match what we've mapped. */ 5256078a454SJean-Philippe Brucker pdev->hdr.bar[i] = (base & PCI_BASE_ADDRESS_MEM_MASK) | 5266078a454SJean-Philippe Brucker PCI_BASE_ADDRESS_SPACE_MEMORY | 5276078a454SJean-Philippe Brucker PCI_BASE_ADDRESS_MEM_TYPE_32; 5286078a454SJean-Philippe Brucker } 5296078a454SJean-Philippe Brucker 5306078a454SJean-Philippe Brucker /* I really can't be bothered to support cardbus. */ 5316078a454SJean-Philippe Brucker pdev->hdr.card_bus = 0; 5326078a454SJean-Philippe Brucker 5336078a454SJean-Philippe Brucker /* 5346078a454SJean-Philippe Brucker * Nuke the expansion ROM for now. If we want to do this properly, 5356078a454SJean-Philippe Brucker * we need to save its size somewhere and map into the guest. 5366078a454SJean-Philippe Brucker */ 5376078a454SJean-Philippe Brucker pdev->hdr.exp_rom_bar = 0; 5386078a454SJean-Philippe Brucker 539*c9888d95SJean-Philippe Brucker /* Plumb in our fake MSI-X capability, if we have it. */ 540*c9888d95SJean-Philippe Brucker msix = pci_find_cap(&pdev->hdr, PCI_CAP_ID_MSIX); 541*c9888d95SJean-Philippe Brucker if (msix) { 542*c9888d95SJean-Philippe Brucker /* Add a shortcut to the PBA region for the MMIO handler */ 543*c9888d95SJean-Philippe Brucker int pba_index = VFIO_PCI_BAR0_REGION_INDEX + pdev->msix_pba.bar; 544*c9888d95SJean-Philippe Brucker pdev->msix_pba.offset = vdev->regions[pba_index].info.offset + 545*c9888d95SJean-Philippe Brucker (msix->pba_offset & PCI_MSIX_PBA_OFFSET); 546*c9888d95SJean-Philippe Brucker 547*c9888d95SJean-Philippe Brucker /* Tidy up the capability */ 548*c9888d95SJean-Philippe Brucker msix->table_offset &= PCI_MSIX_TABLE_BIR; 549*c9888d95SJean-Philippe Brucker msix->pba_offset &= PCI_MSIX_PBA_BIR; 550*c9888d95SJean-Philippe Brucker if (pdev->msix_table.bar == pdev->msix_pba.bar) 551*c9888d95SJean-Philippe Brucker msix->pba_offset |= pdev->msix_table.size & 552*c9888d95SJean-Philippe Brucker PCI_MSIX_PBA_OFFSET; 553*c9888d95SJean-Philippe Brucker } 554*c9888d95SJean-Philippe Brucker 5556078a454SJean-Philippe Brucker /* Install our fake Configuration Space */ 5566078a454SJean-Philippe Brucker info = &vdev->regions[VFIO_PCI_CONFIG_REGION_INDEX].info; 5576078a454SJean-Philippe Brucker hdr_sz = PCI_DEV_CFG_SIZE; 5586078a454SJean-Philippe Brucker if (pwrite(vdev->fd, &pdev->hdr, hdr_sz, info->offset) != hdr_sz) { 5596078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to write %zd bytes to Config Space", 5606078a454SJean-Philippe Brucker hdr_sz); 5616078a454SJean-Philippe Brucker return -EIO; 5626078a454SJean-Philippe Brucker } 5636078a454SJean-Philippe Brucker 5646078a454SJean-Philippe Brucker /* Register callbacks for cfg accesses */ 5656078a454SJean-Philippe Brucker pdev->hdr.cfg_ops = (struct pci_config_operations) { 5666078a454SJean-Philippe Brucker .read = vfio_pci_cfg_read, 5676078a454SJean-Philippe Brucker .write = vfio_pci_cfg_write, 5686078a454SJean-Philippe Brucker }; 5696078a454SJean-Philippe Brucker 5706078a454SJean-Philippe Brucker pdev->hdr.irq_type = IRQ_TYPE_LEVEL_HIGH; 5716078a454SJean-Philippe Brucker 5726078a454SJean-Philippe Brucker return 0; 5736078a454SJean-Philippe Brucker } 5746078a454SJean-Philippe Brucker 575*c9888d95SJean-Philippe Brucker static int vfio_pci_create_msix_table(struct kvm *kvm, 576*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev) 577*c9888d95SJean-Philippe Brucker { 578*c9888d95SJean-Philippe Brucker int ret; 579*c9888d95SJean-Philippe Brucker size_t i; 580*c9888d95SJean-Philippe Brucker size_t mmio_size; 581*c9888d95SJean-Philippe Brucker size_t nr_entries; 582*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_entry *entries; 583*c9888d95SJean-Philippe Brucker struct vfio_pci_msix_pba *pba = &pdev->msix_pba; 584*c9888d95SJean-Philippe Brucker struct vfio_pci_msix_table *table = &pdev->msix_table; 585*c9888d95SJean-Philippe Brucker struct msix_cap *msix = PCI_CAP(&pdev->hdr, pdev->msix.pos); 586*c9888d95SJean-Philippe Brucker 587*c9888d95SJean-Philippe Brucker table->bar = msix->table_offset & PCI_MSIX_TABLE_BIR; 588*c9888d95SJean-Philippe Brucker pba->bar = msix->pba_offset & PCI_MSIX_TABLE_BIR; 589*c9888d95SJean-Philippe Brucker 590*c9888d95SJean-Philippe Brucker /* 591*c9888d95SJean-Philippe Brucker * KVM needs memory regions to be multiple of and aligned on PAGE_SIZE. 592*c9888d95SJean-Philippe Brucker */ 593*c9888d95SJean-Philippe Brucker nr_entries = (msix->ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; 594*c9888d95SJean-Philippe Brucker table->size = ALIGN(nr_entries * PCI_MSIX_ENTRY_SIZE, PAGE_SIZE); 595*c9888d95SJean-Philippe Brucker pba->size = ALIGN(DIV_ROUND_UP(nr_entries, 64), PAGE_SIZE); 596*c9888d95SJean-Philippe Brucker 597*c9888d95SJean-Philippe Brucker entries = calloc(nr_entries, sizeof(struct vfio_pci_msi_entry)); 598*c9888d95SJean-Philippe Brucker if (!entries) 599*c9888d95SJean-Philippe Brucker return -ENOMEM; 600*c9888d95SJean-Philippe Brucker 601*c9888d95SJean-Philippe Brucker for (i = 0; i < nr_entries; i++) 602*c9888d95SJean-Philippe Brucker entries[i].config.ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 603*c9888d95SJean-Philippe Brucker 604*c9888d95SJean-Philippe Brucker /* 605*c9888d95SJean-Philippe Brucker * To ease MSI-X cap configuration in case they share the same BAR, 606*c9888d95SJean-Philippe Brucker * collapse table and pending array. The size of the BAR regions must be 607*c9888d95SJean-Philippe Brucker * powers of two. 608*c9888d95SJean-Philippe Brucker */ 609*c9888d95SJean-Philippe Brucker mmio_size = roundup_pow_of_two(table->size + pba->size); 610*c9888d95SJean-Philippe Brucker table->guest_phys_addr = pci_get_io_space_block(mmio_size); 611*c9888d95SJean-Philippe Brucker if (!table->guest_phys_addr) { 612*c9888d95SJean-Philippe Brucker pr_err("cannot allocate IO space"); 613*c9888d95SJean-Philippe Brucker ret = -ENOMEM; 614*c9888d95SJean-Philippe Brucker goto out_free; 615*c9888d95SJean-Philippe Brucker } 616*c9888d95SJean-Philippe Brucker pba->guest_phys_addr = table->guest_phys_addr + table->size; 617*c9888d95SJean-Philippe Brucker 618*c9888d95SJean-Philippe Brucker ret = kvm__register_mmio(kvm, table->guest_phys_addr, table->size, 619*c9888d95SJean-Philippe Brucker false, vfio_pci_msix_table_access, pdev); 620*c9888d95SJean-Philippe Brucker if (ret < 0) 621*c9888d95SJean-Philippe Brucker goto out_free; 622*c9888d95SJean-Philippe Brucker 623*c9888d95SJean-Philippe Brucker /* 624*c9888d95SJean-Philippe Brucker * We could map the physical PBA directly into the guest, but it's 625*c9888d95SJean-Philippe Brucker * likely smaller than a page, and we can only hand full pages to the 626*c9888d95SJean-Philippe Brucker * guest. Even though the PCI spec disallows sharing a page used for 627*c9888d95SJean-Philippe Brucker * MSI-X with any other resource, it allows to share the same page 628*c9888d95SJean-Philippe Brucker * between MSI-X table and PBA. For the sake of isolation, create a 629*c9888d95SJean-Philippe Brucker * virtual PBA. 630*c9888d95SJean-Philippe Brucker */ 631*c9888d95SJean-Philippe Brucker ret = kvm__register_mmio(kvm, pba->guest_phys_addr, pba->size, false, 632*c9888d95SJean-Philippe Brucker vfio_pci_msix_pba_access, pdev); 633*c9888d95SJean-Philippe Brucker if (ret < 0) 634*c9888d95SJean-Philippe Brucker goto out_free; 635*c9888d95SJean-Philippe Brucker 636*c9888d95SJean-Philippe Brucker pdev->msix.entries = entries; 637*c9888d95SJean-Philippe Brucker pdev->msix.nr_entries = nr_entries; 638*c9888d95SJean-Philippe Brucker 639*c9888d95SJean-Philippe Brucker return 0; 640*c9888d95SJean-Philippe Brucker 641*c9888d95SJean-Philippe Brucker out_free: 642*c9888d95SJean-Philippe Brucker free(entries); 643*c9888d95SJean-Philippe Brucker 644*c9888d95SJean-Philippe Brucker return ret; 645*c9888d95SJean-Philippe Brucker } 646*c9888d95SJean-Philippe Brucker 6476078a454SJean-Philippe Brucker static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, 6486078a454SJean-Philippe Brucker size_t nr) 6496078a454SJean-Philippe Brucker { 6506078a454SJean-Philippe Brucker int ret; 6516078a454SJean-Philippe Brucker size_t map_size; 652*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 6536078a454SJean-Philippe Brucker struct vfio_region *region = &vdev->regions[nr]; 6546078a454SJean-Philippe Brucker 6556078a454SJean-Philippe Brucker if (nr >= vdev->info.num_regions) 6566078a454SJean-Philippe Brucker return 0; 6576078a454SJean-Philippe Brucker 6586078a454SJean-Philippe Brucker region->info = (struct vfio_region_info) { 6596078a454SJean-Philippe Brucker .argsz = sizeof(region->info), 6606078a454SJean-Philippe Brucker .index = nr, 6616078a454SJean-Philippe Brucker }; 6626078a454SJean-Philippe Brucker 6636078a454SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®ion->info); 6646078a454SJean-Philippe Brucker if (ret) { 6656078a454SJean-Philippe Brucker ret = -errno; 6666078a454SJean-Philippe Brucker vfio_dev_err(vdev, "cannot get info for BAR %zu", nr); 6676078a454SJean-Philippe Brucker return ret; 6686078a454SJean-Philippe Brucker } 6696078a454SJean-Philippe Brucker 6706078a454SJean-Philippe Brucker /* Ignore invalid or unimplemented regions */ 6716078a454SJean-Philippe Brucker if (!region->info.size) 6726078a454SJean-Philippe Brucker return 0; 6736078a454SJean-Philippe Brucker 674*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) { 675*c9888d95SJean-Philippe Brucker /* Trap and emulate MSI-X table */ 676*c9888d95SJean-Philippe Brucker if (nr == pdev->msix_table.bar) { 677*c9888d95SJean-Philippe Brucker region->guest_phys_addr = pdev->msix_table.guest_phys_addr; 678*c9888d95SJean-Philippe Brucker return 0; 679*c9888d95SJean-Philippe Brucker } else if (nr == pdev->msix_pba.bar) { 680*c9888d95SJean-Philippe Brucker region->guest_phys_addr = pdev->msix_pba.guest_phys_addr; 681*c9888d95SJean-Philippe Brucker return 0; 682*c9888d95SJean-Philippe Brucker } 683*c9888d95SJean-Philippe Brucker } 684*c9888d95SJean-Philippe Brucker 6856078a454SJean-Philippe Brucker /* Grab some MMIO space in the guest */ 6866078a454SJean-Philippe Brucker map_size = ALIGN(region->info.size, PAGE_SIZE); 6876078a454SJean-Philippe Brucker region->guest_phys_addr = pci_get_io_space_block(map_size); 6886078a454SJean-Philippe Brucker 6896078a454SJean-Philippe Brucker /* 6906078a454SJean-Philippe Brucker * Map the BARs into the guest. We'll later need to update 6916078a454SJean-Philippe Brucker * configuration space to reflect our allocation. 6926078a454SJean-Philippe Brucker */ 6936078a454SJean-Philippe Brucker ret = vfio_map_region(kvm, vdev, region); 6946078a454SJean-Philippe Brucker if (ret) 6956078a454SJean-Philippe Brucker return ret; 6966078a454SJean-Philippe Brucker 6976078a454SJean-Philippe Brucker return 0; 6986078a454SJean-Philippe Brucker } 6996078a454SJean-Philippe Brucker 7006078a454SJean-Philippe Brucker static int vfio_pci_configure_dev_regions(struct kvm *kvm, 7016078a454SJean-Philippe Brucker struct vfio_device *vdev) 7026078a454SJean-Philippe Brucker { 7036078a454SJean-Philippe Brucker int ret; 7046078a454SJean-Philippe Brucker u32 bar; 7056078a454SJean-Philippe Brucker size_t i; 7066078a454SJean-Philippe Brucker bool is_64bit = false; 7076078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 7086078a454SJean-Philippe Brucker 7096078a454SJean-Philippe Brucker ret = vfio_pci_parse_cfg_space(vdev); 7106078a454SJean-Philippe Brucker if (ret) 7116078a454SJean-Philippe Brucker return ret; 7126078a454SJean-Philippe Brucker 713*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) { 714*c9888d95SJean-Philippe Brucker ret = vfio_pci_create_msix_table(kvm, pdev); 715*c9888d95SJean-Philippe Brucker if (ret) 716*c9888d95SJean-Philippe Brucker return ret; 717*c9888d95SJean-Philippe Brucker } 718*c9888d95SJean-Philippe Brucker 7196078a454SJean-Philippe Brucker for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) { 7206078a454SJean-Philippe Brucker /* Ignore top half of 64-bit BAR */ 7216078a454SJean-Philippe Brucker if (i % 2 && is_64bit) 7226078a454SJean-Philippe Brucker continue; 7236078a454SJean-Philippe Brucker 7246078a454SJean-Philippe Brucker ret = vfio_pci_configure_bar(kvm, vdev, i); 7256078a454SJean-Philippe Brucker if (ret) 7266078a454SJean-Philippe Brucker return ret; 7276078a454SJean-Philippe Brucker 7286078a454SJean-Philippe Brucker bar = pdev->hdr.bar[i]; 7296078a454SJean-Philippe Brucker is_64bit = (bar & PCI_BASE_ADDRESS_SPACE) == 7306078a454SJean-Philippe Brucker PCI_BASE_ADDRESS_SPACE_MEMORY && 7316078a454SJean-Philippe Brucker bar & PCI_BASE_ADDRESS_MEM_TYPE_64; 7326078a454SJean-Philippe Brucker } 7336078a454SJean-Philippe Brucker 7346078a454SJean-Philippe Brucker /* We've configured the BARs, fake up a Configuration Space */ 7356078a454SJean-Philippe Brucker return vfio_pci_fixup_cfg_space(vdev); 7366078a454SJean-Philippe Brucker } 7376078a454SJean-Philippe Brucker 738*c9888d95SJean-Philippe Brucker /* 739*c9888d95SJean-Philippe Brucker * Attempt to update the FD limit, if opening an eventfd for each IRQ vector 740*c9888d95SJean-Philippe Brucker * would hit the limit. Which is likely to happen when a device uses 2048 MSIs. 741*c9888d95SJean-Philippe Brucker */ 742*c9888d95SJean-Philippe Brucker static int vfio_pci_reserve_irq_fds(size_t num) 743*c9888d95SJean-Philippe Brucker { 744*c9888d95SJean-Philippe Brucker /* 745*c9888d95SJean-Philippe Brucker * I counted around 27 fds under normal load. Let's add 100 for good 746*c9888d95SJean-Philippe Brucker * measure. 747*c9888d95SJean-Philippe Brucker */ 748*c9888d95SJean-Philippe Brucker static size_t needed = 128; 749*c9888d95SJean-Philippe Brucker struct rlimit fd_limit, new_limit; 750*c9888d95SJean-Philippe Brucker 751*c9888d95SJean-Philippe Brucker needed += num; 752*c9888d95SJean-Philippe Brucker 753*c9888d95SJean-Philippe Brucker if (getrlimit(RLIMIT_NOFILE, &fd_limit)) { 754*c9888d95SJean-Philippe Brucker perror("getrlimit(RLIMIT_NOFILE)"); 755*c9888d95SJean-Philippe Brucker return 0; 756*c9888d95SJean-Philippe Brucker } 757*c9888d95SJean-Philippe Brucker 758*c9888d95SJean-Philippe Brucker if (fd_limit.rlim_cur >= needed) 759*c9888d95SJean-Philippe Brucker return 0; 760*c9888d95SJean-Philippe Brucker 761*c9888d95SJean-Philippe Brucker new_limit.rlim_cur = needed; 762*c9888d95SJean-Philippe Brucker 763*c9888d95SJean-Philippe Brucker if (fd_limit.rlim_max < needed) 764*c9888d95SJean-Philippe Brucker /* Try to bump hard limit (root only) */ 765*c9888d95SJean-Philippe Brucker new_limit.rlim_max = needed; 766*c9888d95SJean-Philippe Brucker else 767*c9888d95SJean-Philippe Brucker new_limit.rlim_max = fd_limit.rlim_max; 768*c9888d95SJean-Philippe Brucker 769*c9888d95SJean-Philippe Brucker if (setrlimit(RLIMIT_NOFILE, &new_limit)) { 770*c9888d95SJean-Philippe Brucker perror("setrlimit(RLIMIT_NOFILE)"); 771*c9888d95SJean-Philippe Brucker pr_warning("not enough FDs for full MSI-X support (estimated need: %zu)", 772*c9888d95SJean-Philippe Brucker (size_t)(needed - fd_limit.rlim_cur)); 773*c9888d95SJean-Philippe Brucker } 774*c9888d95SJean-Philippe Brucker 775*c9888d95SJean-Philippe Brucker return 0; 776*c9888d95SJean-Philippe Brucker } 777*c9888d95SJean-Philippe Brucker 778*c9888d95SJean-Philippe Brucker static int vfio_pci_init_msis(struct kvm *kvm, struct vfio_device *vdev, 779*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_common *msis) 780*c9888d95SJean-Philippe Brucker { 781*c9888d95SJean-Philippe Brucker int ret; 782*c9888d95SJean-Philippe Brucker size_t i; 783*c9888d95SJean-Philippe Brucker int *eventfds; 784*c9888d95SJean-Philippe Brucker size_t irq_set_size; 785*c9888d95SJean-Philippe Brucker struct vfio_pci_msi_entry *entry; 786*c9888d95SJean-Philippe Brucker size_t nr_entries = msis->nr_entries; 787*c9888d95SJean-Philippe Brucker 788*c9888d95SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &msis->info); 789*c9888d95SJean-Philippe Brucker if (ret || &msis->info.count == 0) { 790*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "no MSI reported by VFIO"); 791*c9888d95SJean-Philippe Brucker return -ENODEV; 792*c9888d95SJean-Philippe Brucker } 793*c9888d95SJean-Philippe Brucker 794*c9888d95SJean-Philippe Brucker if (!(msis->info.flags & VFIO_IRQ_INFO_EVENTFD)) { 795*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "interrupt not EVENTFD capable"); 796*c9888d95SJean-Philippe Brucker return -EINVAL; 797*c9888d95SJean-Philippe Brucker } 798*c9888d95SJean-Philippe Brucker 799*c9888d95SJean-Philippe Brucker if (msis->info.count != nr_entries) { 800*c9888d95SJean-Philippe Brucker vfio_dev_err(vdev, "invalid number of MSIs reported by VFIO"); 801*c9888d95SJean-Philippe Brucker return -EINVAL; 802*c9888d95SJean-Philippe Brucker } 803*c9888d95SJean-Philippe Brucker 804*c9888d95SJean-Philippe Brucker mutex_init(&msis->mutex); 805*c9888d95SJean-Philippe Brucker 806*c9888d95SJean-Philippe Brucker vfio_pci_reserve_irq_fds(nr_entries); 807*c9888d95SJean-Philippe Brucker 808*c9888d95SJean-Philippe Brucker irq_set_size = sizeof(struct vfio_irq_set) + nr_entries * sizeof(int); 809*c9888d95SJean-Philippe Brucker msis->irq_set = malloc(irq_set_size); 810*c9888d95SJean-Philippe Brucker if (!msis->irq_set) 811*c9888d95SJean-Philippe Brucker return -ENOMEM; 812*c9888d95SJean-Philippe Brucker 813*c9888d95SJean-Philippe Brucker *msis->irq_set = (struct vfio_irq_set) { 814*c9888d95SJean-Philippe Brucker .argsz = irq_set_size, 815*c9888d95SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_EVENTFD | 816*c9888d95SJean-Philippe Brucker VFIO_IRQ_SET_ACTION_TRIGGER, 817*c9888d95SJean-Philippe Brucker .index = msis->info.index, 818*c9888d95SJean-Philippe Brucker .start = 0, 819*c9888d95SJean-Philippe Brucker .count = nr_entries, 820*c9888d95SJean-Philippe Brucker }; 821*c9888d95SJean-Philippe Brucker 822*c9888d95SJean-Philippe Brucker eventfds = (void *)msis->irq_set + sizeof(struct vfio_irq_set); 823*c9888d95SJean-Philippe Brucker 824*c9888d95SJean-Philippe Brucker for (i = 0; i < nr_entries; i++) { 825*c9888d95SJean-Philippe Brucker entry = &msis->entries[i]; 826*c9888d95SJean-Philippe Brucker entry->gsi = -1; 827*c9888d95SJean-Philippe Brucker entry->eventfd = -1; 828*c9888d95SJean-Philippe Brucker msi_set_masked(entry->virt_state, true); 829*c9888d95SJean-Philippe Brucker msi_set_masked(entry->phys_state, true); 830*c9888d95SJean-Philippe Brucker eventfds[i] = -1; 831*c9888d95SJean-Philippe Brucker } 832*c9888d95SJean-Philippe Brucker 833*c9888d95SJean-Philippe Brucker return 0; 834*c9888d95SJean-Philippe Brucker } 835*c9888d95SJean-Philippe Brucker 836*c9888d95SJean-Philippe Brucker static void vfio_pci_disable_intx(struct kvm *kvm, struct vfio_device *vdev) 837*c9888d95SJean-Philippe Brucker { 838*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 839*c9888d95SJean-Philippe Brucker int gsi = pdev->intx_gsi; 840*c9888d95SJean-Philippe Brucker struct vfio_irq_set irq_set = { 841*c9888d95SJean-Philippe Brucker .argsz = sizeof(irq_set), 842*c9888d95SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, 843*c9888d95SJean-Philippe Brucker .index = VFIO_PCI_INTX_IRQ_INDEX, 844*c9888d95SJean-Philippe Brucker }; 845*c9888d95SJean-Philippe Brucker 846*c9888d95SJean-Philippe Brucker pr_debug("user requested MSI, disabling INTx %d", gsi); 847*c9888d95SJean-Philippe Brucker 848*c9888d95SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); 849*c9888d95SJean-Philippe Brucker irq__del_irqfd(kvm, gsi, pdev->intx_fd); 850*c9888d95SJean-Philippe Brucker 851*c9888d95SJean-Philippe Brucker close(pdev->intx_fd); 852*c9888d95SJean-Philippe Brucker } 853*c9888d95SJean-Philippe Brucker 8546078a454SJean-Philippe Brucker static int vfio_pci_enable_intx(struct kvm *kvm, struct vfio_device *vdev) 8556078a454SJean-Philippe Brucker { 8566078a454SJean-Philippe Brucker int ret; 8576078a454SJean-Philippe Brucker int trigger_fd, unmask_fd; 8586078a454SJean-Philippe Brucker struct vfio_irq_eventfd trigger; 8596078a454SJean-Philippe Brucker struct vfio_irq_eventfd unmask; 8606078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 8616078a454SJean-Philippe Brucker int gsi = pdev->hdr.irq_line - KVM_IRQ_OFFSET; 8626078a454SJean-Philippe Brucker 8636078a454SJean-Philippe Brucker struct vfio_irq_info irq_info = { 8646078a454SJean-Philippe Brucker .argsz = sizeof(irq_info), 8656078a454SJean-Philippe Brucker .index = VFIO_PCI_INTX_IRQ_INDEX, 8666078a454SJean-Philippe Brucker }; 8676078a454SJean-Philippe Brucker 868*c9888d95SJean-Philippe Brucker vfio_pci_reserve_irq_fds(2); 869*c9888d95SJean-Philippe Brucker 8706078a454SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); 8716078a454SJean-Philippe Brucker if (ret || irq_info.count == 0) { 8726078a454SJean-Philippe Brucker vfio_dev_err(vdev, "no INTx reported by VFIO"); 8736078a454SJean-Philippe Brucker return -ENODEV; 8746078a454SJean-Philippe Brucker } 8756078a454SJean-Philippe Brucker 8766078a454SJean-Philippe Brucker if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) { 8776078a454SJean-Philippe Brucker vfio_dev_err(vdev, "interrupt not eventfd capable"); 8786078a454SJean-Philippe Brucker return -EINVAL; 8796078a454SJean-Philippe Brucker } 8806078a454SJean-Philippe Brucker 8816078a454SJean-Philippe Brucker if (!(irq_info.flags & VFIO_IRQ_INFO_AUTOMASKED)) { 8826078a454SJean-Philippe Brucker vfio_dev_err(vdev, "INTx interrupt not AUTOMASKED"); 8836078a454SJean-Philippe Brucker return -EINVAL; 8846078a454SJean-Philippe Brucker } 8856078a454SJean-Philippe Brucker 8866078a454SJean-Philippe Brucker /* 8876078a454SJean-Philippe Brucker * PCI IRQ is level-triggered, so we use two eventfds. trigger_fd 8886078a454SJean-Philippe Brucker * signals an interrupt from host to guest, and unmask_fd signals the 8896078a454SJean-Philippe Brucker * deassertion of the line from guest to host. 8906078a454SJean-Philippe Brucker */ 8916078a454SJean-Philippe Brucker trigger_fd = eventfd(0, 0); 8926078a454SJean-Philippe Brucker if (trigger_fd < 0) { 8936078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to create trigger eventfd"); 8946078a454SJean-Philippe Brucker return trigger_fd; 8956078a454SJean-Philippe Brucker } 8966078a454SJean-Philippe Brucker 8976078a454SJean-Philippe Brucker unmask_fd = eventfd(0, 0); 8986078a454SJean-Philippe Brucker if (unmask_fd < 0) { 8996078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to create unmask eventfd"); 9006078a454SJean-Philippe Brucker close(trigger_fd); 9016078a454SJean-Philippe Brucker return unmask_fd; 9026078a454SJean-Philippe Brucker } 9036078a454SJean-Philippe Brucker 9046078a454SJean-Philippe Brucker ret = irq__add_irqfd(kvm, gsi, trigger_fd, unmask_fd); 9056078a454SJean-Philippe Brucker if (ret) 9066078a454SJean-Philippe Brucker goto err_close; 9076078a454SJean-Philippe Brucker 9086078a454SJean-Philippe Brucker trigger.irq = (struct vfio_irq_set) { 9096078a454SJean-Philippe Brucker .argsz = sizeof(trigger), 9106078a454SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER, 9116078a454SJean-Philippe Brucker .index = VFIO_PCI_INTX_IRQ_INDEX, 9126078a454SJean-Philippe Brucker .start = 0, 9136078a454SJean-Philippe Brucker .count = 1, 9146078a454SJean-Philippe Brucker }; 9156078a454SJean-Philippe Brucker trigger.fd = trigger_fd; 9166078a454SJean-Philippe Brucker 9176078a454SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &trigger); 9186078a454SJean-Philippe Brucker if (ret < 0) { 9196078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to setup VFIO IRQ"); 9206078a454SJean-Philippe Brucker goto err_delete_line; 9216078a454SJean-Philippe Brucker } 9226078a454SJean-Philippe Brucker 9236078a454SJean-Philippe Brucker unmask.irq = (struct vfio_irq_set) { 9246078a454SJean-Philippe Brucker .argsz = sizeof(unmask), 9256078a454SJean-Philippe Brucker .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK, 9266078a454SJean-Philippe Brucker .index = VFIO_PCI_INTX_IRQ_INDEX, 9276078a454SJean-Philippe Brucker .start = 0, 9286078a454SJean-Philippe Brucker .count = 1, 9296078a454SJean-Philippe Brucker }; 9306078a454SJean-Philippe Brucker unmask.fd = unmask_fd; 9316078a454SJean-Philippe Brucker 9326078a454SJean-Philippe Brucker ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &unmask); 9336078a454SJean-Philippe Brucker if (ret < 0) { 9346078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to setup unmask IRQ"); 9356078a454SJean-Philippe Brucker goto err_remove_event; 9366078a454SJean-Philippe Brucker } 9376078a454SJean-Philippe Brucker 938*c9888d95SJean-Philippe Brucker pdev->intx_fd = trigger_fd; 939*c9888d95SJean-Philippe Brucker /* Guest is going to ovewrite our irq_line... */ 940*c9888d95SJean-Philippe Brucker pdev->intx_gsi = gsi; 941*c9888d95SJean-Philippe Brucker 9426078a454SJean-Philippe Brucker return 0; 9436078a454SJean-Philippe Brucker 9446078a454SJean-Philippe Brucker err_remove_event: 9456078a454SJean-Philippe Brucker /* Remove trigger event */ 9466078a454SJean-Philippe Brucker trigger.irq.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 9476078a454SJean-Philippe Brucker trigger.irq.count = 0; 9486078a454SJean-Philippe Brucker ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &trigger); 9496078a454SJean-Philippe Brucker 9506078a454SJean-Philippe Brucker err_delete_line: 9516078a454SJean-Philippe Brucker irq__del_irqfd(kvm, gsi, trigger_fd); 9526078a454SJean-Philippe Brucker 9536078a454SJean-Philippe Brucker err_close: 9546078a454SJean-Philippe Brucker close(trigger_fd); 9556078a454SJean-Philippe Brucker close(unmask_fd); 9566078a454SJean-Philippe Brucker return ret; 9576078a454SJean-Philippe Brucker } 9586078a454SJean-Philippe Brucker 9596078a454SJean-Philippe Brucker static int vfio_pci_configure_dev_irqs(struct kvm *kvm, struct vfio_device *vdev) 9606078a454SJean-Philippe Brucker { 961*c9888d95SJean-Philippe Brucker int ret = 0; 9626078a454SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 9636078a454SJean-Philippe Brucker 964*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) { 965*c9888d95SJean-Philippe Brucker pdev->msix.info = (struct vfio_irq_info) { 966*c9888d95SJean-Philippe Brucker .argsz = sizeof(pdev->msix.info), 967*c9888d95SJean-Philippe Brucker .index = VFIO_PCI_MSIX_IRQ_INDEX, 9686078a454SJean-Philippe Brucker }; 969*c9888d95SJean-Philippe Brucker ret = vfio_pci_init_msis(kvm, vdev, &pdev->msix); 970*c9888d95SJean-Philippe Brucker if (ret) 971*c9888d95SJean-Philippe Brucker return ret; 9726078a454SJean-Philippe Brucker } 9736078a454SJean-Philippe Brucker 974*c9888d95SJean-Philippe Brucker if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_INTX) 975*c9888d95SJean-Philippe Brucker ret = vfio_pci_enable_intx(kvm, vdev); 976*c9888d95SJean-Philippe Brucker 977*c9888d95SJean-Philippe Brucker return ret; 9786078a454SJean-Philippe Brucker } 9796078a454SJean-Philippe Brucker 9806078a454SJean-Philippe Brucker int vfio_pci_setup_device(struct kvm *kvm, struct vfio_device *vdev) 9816078a454SJean-Philippe Brucker { 9826078a454SJean-Philippe Brucker int ret; 9836078a454SJean-Philippe Brucker 9846078a454SJean-Philippe Brucker ret = vfio_pci_configure_dev_regions(kvm, vdev); 9856078a454SJean-Philippe Brucker if (ret) { 9866078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to configure regions"); 9876078a454SJean-Philippe Brucker return ret; 9886078a454SJean-Philippe Brucker } 9896078a454SJean-Philippe Brucker 9906078a454SJean-Philippe Brucker vdev->dev_hdr = (struct device_header) { 9916078a454SJean-Philippe Brucker .bus_type = DEVICE_BUS_PCI, 9926078a454SJean-Philippe Brucker .data = &vdev->pci.hdr, 9936078a454SJean-Philippe Brucker }; 9946078a454SJean-Philippe Brucker 9956078a454SJean-Philippe Brucker ret = device__register(&vdev->dev_hdr); 9966078a454SJean-Philippe Brucker if (ret) { 9976078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to register VFIO device"); 9986078a454SJean-Philippe Brucker return ret; 9996078a454SJean-Philippe Brucker } 10006078a454SJean-Philippe Brucker 10016078a454SJean-Philippe Brucker ret = vfio_pci_configure_dev_irqs(kvm, vdev); 10026078a454SJean-Philippe Brucker if (ret) { 10036078a454SJean-Philippe Brucker vfio_dev_err(vdev, "failed to configure IRQs"); 10046078a454SJean-Philippe Brucker return ret; 10056078a454SJean-Philippe Brucker } 10066078a454SJean-Philippe Brucker 10076078a454SJean-Philippe Brucker return 0; 10086078a454SJean-Philippe Brucker } 10096078a454SJean-Philippe Brucker 10106078a454SJean-Philippe Brucker void vfio_pci_teardown_device(struct kvm *kvm, struct vfio_device *vdev) 10116078a454SJean-Philippe Brucker { 10126078a454SJean-Philippe Brucker size_t i; 1013*c9888d95SJean-Philippe Brucker struct vfio_pci_device *pdev = &vdev->pci; 10146078a454SJean-Philippe Brucker 10156078a454SJean-Philippe Brucker for (i = 0; i < vdev->info.num_regions; i++) 10166078a454SJean-Philippe Brucker vfio_unmap_region(kvm, &vdev->regions[i]); 10176078a454SJean-Philippe Brucker 10186078a454SJean-Philippe Brucker device__unregister(&vdev->dev_hdr); 1019*c9888d95SJean-Philippe Brucker 1020*c9888d95SJean-Philippe Brucker free(pdev->msix.irq_set); 1021*c9888d95SJean-Philippe Brucker free(pdev->msix.entries); 10226078a454SJean-Philippe Brucker } 1023