xref: /qemu/hw/vfio/pci-quirks.c (revision 11b5ce95beecfd51d1b17858d23fe9cbb0b5783f)
1c00d61d8SAlex Williamson /*
2c00d61d8SAlex Williamson  * device quirks for PCI devices
3c00d61d8SAlex Williamson  *
4c00d61d8SAlex Williamson  * Copyright Red Hat, Inc. 2012-2015
5c00d61d8SAlex Williamson  *
6c00d61d8SAlex Williamson  * Authors:
7c00d61d8SAlex Williamson  *  Alex Williamson <alex.williamson@redhat.com>
8c00d61d8SAlex Williamson  *
9c00d61d8SAlex Williamson  * This work is licensed under the terms of the GNU GPL, version 2.  See
10c00d61d8SAlex Williamson  * the COPYING file in the top-level directory.
11c00d61d8SAlex Williamson  */
12c00d61d8SAlex Williamson 
13c6eacb1aSPeter Maydell #include "qemu/osdep.h"
142becc36aSPaolo Bonzini #include CONFIG_DEVICES
15475fbf0aSTony Nguyen #include "exec/memop.h"
16e0255bb1SPhilippe Mathieu-Daudé #include "qemu/units.h"
1724202d2bSPrasad J Pandit #include "qemu/log.h"
18c4c45e94SAlex Williamson #include "qemu/error-report.h"
19c958c51dSAlex Williamson #include "qemu/main-loop.h"
200b8fa32fSMarkus Armbruster #include "qemu/module.h"
21c4c45e94SAlex Williamson #include "qemu/range.h"
22c4c45e94SAlex Williamson #include "qapi/error.h"
23dfbee78dSAlex Williamson #include "qapi/visitor.h"
242b1dbd0dSAlex Williamson #include <sys/ioctl.h>
25c4c45e94SAlex Williamson #include "hw/nvram/fw_cfg.h"
26a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
27c00d61d8SAlex Williamson #include "pci.h"
28c00d61d8SAlex Williamson #include "trace.h"
29c00d61d8SAlex Williamson 
30c00d61d8SAlex Williamson /*
31c00d61d8SAlex Williamson  * List of device ids/vendor ids for which to disable
32c00d61d8SAlex Williamson  * option rom loading. This avoids the guest hangs during rom
33c00d61d8SAlex Williamson  * execution as noticed with the BCM 57810 card for lack of a
34c00d61d8SAlex Williamson  * more better way to handle such issues.
35c00d61d8SAlex Williamson  * The  user can still override by specifying a romfile or
36c00d61d8SAlex Williamson  * rombar=1.
37c00d61d8SAlex Williamson  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38c00d61d8SAlex Williamson  * for an analysis of the 57810 card hang. When adding
39c00d61d8SAlex Williamson  * a new vendor id/device id combination below, please also add
40c00d61d8SAlex Williamson  * your card/environment details and information that could
41c00d61d8SAlex Williamson  * help in debugging to the bug tracking this issue
42c00d61d8SAlex Williamson  */
43056dfcb6SAlex Williamson static const struct {
44056dfcb6SAlex Williamson     uint32_t vendor;
45056dfcb6SAlex Williamson     uint32_t device;
464eda914cSPhilippe Mathieu-Daudé } rom_denylist[] = {
47056dfcb6SAlex Williamson     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48c00d61d8SAlex Williamson };
49c00d61d8SAlex Williamson 
504eda914cSPhilippe Mathieu-Daudé bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev)
51c00d61d8SAlex Williamson {
52056dfcb6SAlex Williamson     int i;
53c00d61d8SAlex Williamson 
544eda914cSPhilippe Mathieu-Daudé     for (i = 0 ; i < ARRAY_SIZE(rom_denylist); i++) {
554eda914cSPhilippe Mathieu-Daudé         if (vfio_pci_is(vdev, rom_denylist[i].vendor, rom_denylist[i].device)) {
564eda914cSPhilippe Mathieu-Daudé             trace_vfio_quirk_rom_in_denylist(vdev->vbasedev.name,
574eda914cSPhilippe Mathieu-Daudé                                              rom_denylist[i].vendor,
584eda914cSPhilippe Mathieu-Daudé                                              rom_denylist[i].device);
59c00d61d8SAlex Williamson             return true;
60c00d61d8SAlex Williamson         }
61c00d61d8SAlex Williamson     }
62c00d61d8SAlex Williamson     return false;
63c00d61d8SAlex Williamson }
64c00d61d8SAlex Williamson 
65c00d61d8SAlex Williamson /*
660e54f24aSAlex Williamson  * Device specific region quirks (mostly backdoors to PCI config space)
67c00d61d8SAlex Williamson  */
68c00d61d8SAlex Williamson 
690e54f24aSAlex Williamson /*
700e54f24aSAlex Williamson  * The generic window quirks operate on an address and data register,
710e54f24aSAlex Williamson  * vfio_generic_window_address_quirk handles the address register and
720e54f24aSAlex Williamson  * vfio_generic_window_data_quirk handles the data register.  These ops
730e54f24aSAlex Williamson  * pass reads and writes through to hardware until a value matching the
740e54f24aSAlex Williamson  * stored address match/mask is written.  When this occurs, the data
750e54f24aSAlex Williamson  * register access emulated PCI config space for the device rather than
760e54f24aSAlex Williamson  * passing through accesses.  This enables devices where PCI config space
770e54f24aSAlex Williamson  * is accessible behind a window register to maintain the virtualization
780e54f24aSAlex Williamson  * provided through vfio.
790e54f24aSAlex Williamson  */
800e54f24aSAlex Williamson typedef struct VFIOConfigWindowMatch {
810e54f24aSAlex Williamson     uint32_t match;
820e54f24aSAlex Williamson     uint32_t mask;
830e54f24aSAlex Williamson } VFIOConfigWindowMatch;
840e54f24aSAlex Williamson 
850e54f24aSAlex Williamson typedef struct VFIOConfigWindowQuirk {
860e54f24aSAlex Williamson     struct VFIOPCIDevice *vdev;
870e54f24aSAlex Williamson 
880e54f24aSAlex Williamson     uint32_t address_val;
890e54f24aSAlex Williamson 
900e54f24aSAlex Williamson     uint32_t address_offset;
910e54f24aSAlex Williamson     uint32_t data_offset;
920e54f24aSAlex Williamson 
930e54f24aSAlex Williamson     bool window_enabled;
940e54f24aSAlex Williamson     uint8_t bar;
950e54f24aSAlex Williamson 
960e54f24aSAlex Williamson     MemoryRegion *addr_mem;
970e54f24aSAlex Williamson     MemoryRegion *data_mem;
980e54f24aSAlex Williamson 
990e54f24aSAlex Williamson     uint32_t nr_matches;
1000e54f24aSAlex Williamson     VFIOConfigWindowMatch matches[];
1010e54f24aSAlex Williamson } VFIOConfigWindowQuirk;
1020e54f24aSAlex Williamson 
1030e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
1040e54f24aSAlex Williamson                                                        hwaddr addr,
1050e54f24aSAlex Williamson                                                        unsigned size)
1060e54f24aSAlex Williamson {
1070e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1080e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1090e54f24aSAlex Williamson 
1100e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[window->bar].region,
1110e54f24aSAlex Williamson                             addr + window->address_offset, size);
1120e54f24aSAlex Williamson }
1130e54f24aSAlex Williamson 
1140e54f24aSAlex Williamson static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
1150e54f24aSAlex Williamson                                                     uint64_t data,
1160e54f24aSAlex Williamson                                                     unsigned size)
1170e54f24aSAlex Williamson {
1180e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1190e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1200e54f24aSAlex Williamson     int i;
1210e54f24aSAlex Williamson 
1220e54f24aSAlex Williamson     window->window_enabled = false;
1230e54f24aSAlex Williamson 
1240e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1250e54f24aSAlex Williamson                       addr + window->address_offset, data, size);
1260e54f24aSAlex Williamson 
1270e54f24aSAlex Williamson     for (i = 0; i < window->nr_matches; i++) {
1280e54f24aSAlex Williamson         if ((data & ~window->matches[i].mask) == window->matches[i].match) {
1290e54f24aSAlex Williamson             window->window_enabled = true;
1300e54f24aSAlex Williamson             window->address_val = data & window->matches[i].mask;
1310e54f24aSAlex Williamson             trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
1320e54f24aSAlex Williamson                                     memory_region_name(window->addr_mem), data);
1330e54f24aSAlex Williamson             break;
1340e54f24aSAlex Williamson         }
1350e54f24aSAlex Williamson     }
1360e54f24aSAlex Williamson }
1370e54f24aSAlex Williamson 
1380e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_address_quirk = {
1390e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_address_read,
1400e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_address_write,
1410e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1420e54f24aSAlex Williamson };
1430e54f24aSAlex Williamson 
1440e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
1450e54f24aSAlex Williamson                                                     hwaddr addr, unsigned size)
1460e54f24aSAlex Williamson {
1470e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1480e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1490e54f24aSAlex Williamson     uint64_t data;
1500e54f24aSAlex Williamson 
1510e54f24aSAlex Williamson     /* Always read data reg, discard if window enabled */
1520e54f24aSAlex Williamson     data = vfio_region_read(&vdev->bars[window->bar].region,
1530e54f24aSAlex Williamson                             addr + window->data_offset, size);
1540e54f24aSAlex Williamson 
1550e54f24aSAlex Williamson     if (window->window_enabled) {
1560e54f24aSAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
1570e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
1580e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1590e54f24aSAlex Williamson     }
1600e54f24aSAlex Williamson 
1610e54f24aSAlex Williamson     return data;
1620e54f24aSAlex Williamson }
1630e54f24aSAlex Williamson 
1640e54f24aSAlex Williamson static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
1650e54f24aSAlex Williamson                                                  uint64_t data, unsigned size)
1660e54f24aSAlex Williamson {
1670e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1680e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1690e54f24aSAlex Williamson 
1700e54f24aSAlex Williamson     if (window->window_enabled) {
1710e54f24aSAlex Williamson         vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
1720e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
1730e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1740e54f24aSAlex Williamson         return;
1750e54f24aSAlex Williamson     }
1760e54f24aSAlex Williamson 
1770e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1780e54f24aSAlex Williamson                       addr + window->data_offset, data, size);
1790e54f24aSAlex Williamson }
1800e54f24aSAlex Williamson 
1810e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_data_quirk = {
1820e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_data_read,
1830e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_data_write,
1840e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1850e54f24aSAlex Williamson };
1860e54f24aSAlex Williamson 
1870d38fb1cSAlex Williamson /*
1880d38fb1cSAlex Williamson  * The generic mirror quirk handles devices which expose PCI config space
1890d38fb1cSAlex Williamson  * through a region within a BAR.  When enabled, reads and writes are
1900d38fb1cSAlex Williamson  * redirected through to emulated PCI config space.  XXX if PCI config space
1910d38fb1cSAlex Williamson  * used memory regions, this could just be an alias.
1920d38fb1cSAlex Williamson  */
1930d38fb1cSAlex Williamson typedef struct VFIOConfigMirrorQuirk {
1940d38fb1cSAlex Williamson     struct VFIOPCIDevice *vdev;
1950d38fb1cSAlex Williamson     uint32_t offset;
1960d38fb1cSAlex Williamson     uint8_t bar;
1970d38fb1cSAlex Williamson     MemoryRegion *mem;
198c958c51dSAlex Williamson     uint8_t data[];
1990d38fb1cSAlex Williamson } VFIOConfigMirrorQuirk;
2000d38fb1cSAlex Williamson 
2010d38fb1cSAlex Williamson static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
2020d38fb1cSAlex Williamson                                                hwaddr addr, unsigned size)
2030d38fb1cSAlex Williamson {
2040d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2050d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2060d38fb1cSAlex Williamson     uint64_t data;
2070d38fb1cSAlex Williamson 
2080d38fb1cSAlex Williamson     /* Read and discard in case the hardware cares */
2090d38fb1cSAlex Williamson     (void)vfio_region_read(&vdev->bars[mirror->bar].region,
2100d38fb1cSAlex Williamson                            addr + mirror->offset, size);
2110d38fb1cSAlex Williamson 
2120d38fb1cSAlex Williamson     data = vfio_pci_read_config(&vdev->pdev, addr, size);
2130d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
2140d38fb1cSAlex Williamson                                          memory_region_name(mirror->mem),
2150d38fb1cSAlex Williamson                                          addr, data);
2160d38fb1cSAlex Williamson     return data;
2170d38fb1cSAlex Williamson }
2180d38fb1cSAlex Williamson 
2190d38fb1cSAlex Williamson static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
2200d38fb1cSAlex Williamson                                             uint64_t data, unsigned size)
2210d38fb1cSAlex Williamson {
2220d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2230d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2240d38fb1cSAlex Williamson 
2250d38fb1cSAlex Williamson     vfio_pci_write_config(&vdev->pdev, addr, data, size);
2260d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
2270d38fb1cSAlex Williamson                                           memory_region_name(mirror->mem),
2280d38fb1cSAlex Williamson                                           addr, data);
2290d38fb1cSAlex Williamson }
2300d38fb1cSAlex Williamson 
2310d38fb1cSAlex Williamson static const MemoryRegionOps vfio_generic_mirror_quirk = {
2320d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
2330d38fb1cSAlex Williamson     .write = vfio_generic_quirk_mirror_write,
2340d38fb1cSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
2350d38fb1cSAlex Williamson };
2360d38fb1cSAlex Williamson 
237c00d61d8SAlex Williamson /* Is range1 fully contained within range2?  */
238c00d61d8SAlex Williamson static bool vfio_range_contained(uint64_t first1, uint64_t len1,
239c00d61d8SAlex Williamson                                  uint64_t first2, uint64_t len2) {
240c00d61d8SAlex Williamson     return (first1 >= first2 && first1 + len1 <= first2 + len2);
241c00d61d8SAlex Williamson }
242c00d61d8SAlex Williamson 
243c00d61d8SAlex Williamson #define PCI_VENDOR_ID_ATI               0x1002
244c00d61d8SAlex Williamson 
245c00d61d8SAlex Williamson /*
246c00d61d8SAlex Williamson  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
247c00d61d8SAlex Williamson  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
248c00d61d8SAlex Williamson  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
249c00d61d8SAlex Williamson  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
250c00d61d8SAlex Williamson  * I/O port BAR address.  Originally this was coded to return the virtual BAR
251c00d61d8SAlex Williamson  * address only if the physical register read returns the actual BAR address,
252c00d61d8SAlex Williamson  * but users have reported greater success if we return the virtual address
253c00d61d8SAlex Williamson  * unconditionally.
254c00d61d8SAlex Williamson  */
255c00d61d8SAlex Williamson static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
256c00d61d8SAlex Williamson                                         hwaddr addr, unsigned size)
257c00d61d8SAlex Williamson {
258b946d286SAlex Williamson     VFIOPCIDevice *vdev = opaque;
259c00d61d8SAlex Williamson     uint64_t data = vfio_pci_read_config(&vdev->pdev,
260b946d286SAlex Williamson                                          PCI_BASE_ADDRESS_4 + 1, size);
261b946d286SAlex Williamson 
262b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
263c00d61d8SAlex Williamson 
264c00d61d8SAlex Williamson     return data;
265c00d61d8SAlex Williamson }
266c00d61d8SAlex Williamson 
26724202d2bSPrasad J Pandit static void vfio_ati_3c3_quirk_write(void *opaque, hwaddr addr,
26824202d2bSPrasad J Pandit                                         uint64_t data, unsigned size)
26924202d2bSPrasad J Pandit {
27024202d2bSPrasad J Pandit     qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
27124202d2bSPrasad J Pandit }
27224202d2bSPrasad J Pandit 
273c00d61d8SAlex Williamson static const MemoryRegionOps vfio_ati_3c3_quirk = {
274c00d61d8SAlex Williamson     .read = vfio_ati_3c3_quirk_read,
27524202d2bSPrasad J Pandit     .write = vfio_ati_3c3_quirk_write,
276c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
277c00d61d8SAlex Williamson };
278c00d61d8SAlex Williamson 
27929d62771SThomas Huth VFIOQuirk *vfio_quirk_alloc(int nr_mem)
280bcf3c3d0SAlex Williamson {
281bcf3c3d0SAlex Williamson     VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
282c958c51dSAlex Williamson     QLIST_INIT(&quirk->ioeventfds);
283bcf3c3d0SAlex Williamson     quirk->mem = g_new0(MemoryRegion, nr_mem);
284bcf3c3d0SAlex Williamson     quirk->nr_mem = nr_mem;
285bcf3c3d0SAlex Williamson 
286bcf3c3d0SAlex Williamson     return quirk;
287bcf3c3d0SAlex Williamson }
288bcf3c3d0SAlex Williamson 
2892b1dbd0dSAlex Williamson static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
290c958c51dSAlex Williamson {
291c958c51dSAlex Williamson     QLIST_REMOVE(ioeventfd, next);
292c958c51dSAlex Williamson     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
293c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
2942b1dbd0dSAlex Williamson 
2952b1dbd0dSAlex Williamson     if (ioeventfd->vfio) {
2962b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
2972b1dbd0dSAlex Williamson 
2982b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
2992b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3002b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3012b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3022b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3032b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = -1;
3042b1dbd0dSAlex Williamson 
3052b1dbd0dSAlex Williamson         if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
3062b1dbd0dSAlex Williamson             error_report("Failed to remove vfio ioeventfd for %s+0x%"
3072b1dbd0dSAlex Williamson                          HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
3082b1dbd0dSAlex Williamson                          memory_region_name(ioeventfd->mr), ioeventfd->addr,
3092b1dbd0dSAlex Williamson                          ioeventfd->size, ioeventfd->data);
3102b1dbd0dSAlex Williamson         }
3112b1dbd0dSAlex Williamson     } else {
3122b1dbd0dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
3132b1dbd0dSAlex Williamson                             NULL, NULL, NULL);
3142b1dbd0dSAlex Williamson     }
3152b1dbd0dSAlex Williamson 
316c958c51dSAlex Williamson     event_notifier_cleanup(&ioeventfd->e);
317c958c51dSAlex Williamson     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
318c958c51dSAlex Williamson                               (uint64_t)ioeventfd->addr, ioeventfd->size,
319c958c51dSAlex Williamson                               ioeventfd->data);
320c958c51dSAlex Williamson     g_free(ioeventfd);
321c958c51dSAlex Williamson }
322c958c51dSAlex Williamson 
323c958c51dSAlex Williamson static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
324c958c51dSAlex Williamson {
325c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd, *tmp;
326c958c51dSAlex Williamson 
327c958c51dSAlex Williamson     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
328c958c51dSAlex Williamson         if (ioeventfd->dynamic) {
3292b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, ioeventfd);
330c958c51dSAlex Williamson         }
331c958c51dSAlex Williamson     }
332c958c51dSAlex Williamson }
333c958c51dSAlex Williamson 
334c958c51dSAlex Williamson static void vfio_ioeventfd_handler(void *opaque)
335c958c51dSAlex Williamson {
336c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd = opaque;
337c958c51dSAlex Williamson 
338c958c51dSAlex Williamson     if (event_notifier_test_and_clear(&ioeventfd->e)) {
339c958c51dSAlex Williamson         vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
340c958c51dSAlex Williamson                           ioeventfd->data, ioeventfd->size);
341c958c51dSAlex Williamson         trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
342c958c51dSAlex Williamson                                      (uint64_t)ioeventfd->addr, ioeventfd->size,
343c958c51dSAlex Williamson                                      ioeventfd->data);
344c958c51dSAlex Williamson     }
345c958c51dSAlex Williamson }
346c958c51dSAlex Williamson 
347c958c51dSAlex Williamson static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
348c958c51dSAlex Williamson                                           MemoryRegion *mr, hwaddr addr,
349c958c51dSAlex Williamson                                           unsigned size, uint64_t data,
350c958c51dSAlex Williamson                                           VFIORegion *region,
351c958c51dSAlex Williamson                                           hwaddr region_addr, bool dynamic)
352c958c51dSAlex Williamson {
353c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd;
354c958c51dSAlex Williamson 
355c958c51dSAlex Williamson     if (vdev->no_kvm_ioeventfd) {
356c958c51dSAlex Williamson         return NULL;
357c958c51dSAlex Williamson     }
358c958c51dSAlex Williamson 
359c958c51dSAlex Williamson     ioeventfd = g_malloc0(sizeof(*ioeventfd));
360c958c51dSAlex Williamson 
361c958c51dSAlex Williamson     if (event_notifier_init(&ioeventfd->e, 0)) {
362c958c51dSAlex Williamson         g_free(ioeventfd);
363c958c51dSAlex Williamson         return NULL;
364c958c51dSAlex Williamson     }
365c958c51dSAlex Williamson 
366c958c51dSAlex Williamson     /*
367c958c51dSAlex Williamson      * MemoryRegion and relative offset, plus additional ioeventfd setup
368c958c51dSAlex Williamson      * parameters for configuring and later tearing down KVM ioeventfd.
369c958c51dSAlex Williamson      */
370c958c51dSAlex Williamson     ioeventfd->mr = mr;
371c958c51dSAlex Williamson     ioeventfd->addr = addr;
372c958c51dSAlex Williamson     ioeventfd->size = size;
373c958c51dSAlex Williamson     ioeventfd->data = data;
374c958c51dSAlex Williamson     ioeventfd->dynamic = dynamic;
375c958c51dSAlex Williamson     /*
376c958c51dSAlex Williamson      * VFIORegion and relative offset for implementing the userspace
377c958c51dSAlex Williamson      * handler.  data & size fields shared for both uses.
378c958c51dSAlex Williamson      */
379c958c51dSAlex Williamson     ioeventfd->region = region;
380c958c51dSAlex Williamson     ioeventfd->region_addr = region_addr;
381c958c51dSAlex Williamson 
3822b1dbd0dSAlex Williamson     if (!vdev->no_vfio_ioeventfd) {
3832b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
3842b1dbd0dSAlex Williamson 
3852b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
3862b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3872b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3882b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3892b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3902b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
3912b1dbd0dSAlex Williamson 
3922b1dbd0dSAlex Williamson         ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
3932b1dbd0dSAlex Williamson                                  VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
3942b1dbd0dSAlex Williamson     }
3952b1dbd0dSAlex Williamson 
3962b1dbd0dSAlex Williamson     if (!ioeventfd->vfio) {
397c958c51dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
398c958c51dSAlex Williamson                             vfio_ioeventfd_handler, NULL, ioeventfd);
3992b1dbd0dSAlex Williamson     }
4002b1dbd0dSAlex Williamson 
401c958c51dSAlex Williamson     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
402c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
403c958c51dSAlex Williamson     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
4042b1dbd0dSAlex Williamson                               size, data, ioeventfd->vfio);
405c958c51dSAlex Williamson 
406c958c51dSAlex Williamson     return ioeventfd;
407c958c51dSAlex Williamson }
408c958c51dSAlex Williamson 
409c00d61d8SAlex Williamson static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
410c00d61d8SAlex Williamson {
411c00d61d8SAlex Williamson     VFIOQuirk *quirk;
412c00d61d8SAlex Williamson 
413c00d61d8SAlex Williamson     /*
414c00d61d8SAlex Williamson      * As long as the BAR is >= 256 bytes it will be aligned such that the
415c00d61d8SAlex Williamson      * lower byte is always zero.  Filter out anything else, if it exists.
416c00d61d8SAlex Williamson      */
417b946d286SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
418b946d286SAlex Williamson         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
419c00d61d8SAlex Williamson         return;
420c00d61d8SAlex Williamson     }
421c00d61d8SAlex Williamson 
422bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
423c00d61d8SAlex Williamson 
424b946d286SAlex Williamson     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
425c00d61d8SAlex Williamson                           "vfio-ati-3c3-quirk", 1);
4262d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
4278c4f2348SAlex Williamson                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
428c00d61d8SAlex Williamson 
4292d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
430c00d61d8SAlex Williamson                       quirk, next);
431c00d61d8SAlex Williamson 
432b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
433c00d61d8SAlex Williamson }
434c00d61d8SAlex Williamson 
435c00d61d8SAlex Williamson /*
4360e54f24aSAlex Williamson  * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
437c00d61d8SAlex Williamson  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
438c00d61d8SAlex Williamson  * the MMIO space directly, but a window to this space is provided through
439c00d61d8SAlex Williamson  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
440c00d61d8SAlex Williamson  * data register.  When the address is programmed to a range of 0x4000-0x4fff
441c00d61d8SAlex Williamson  * PCI configuration space is available.  Experimentation seems to indicate
4420e54f24aSAlex Williamson  * that read-only may be provided by hardware.
443c00d61d8SAlex Williamson  */
4440e54f24aSAlex Williamson static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
445c00d61d8SAlex Williamson {
446c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4470e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
448c00d61d8SAlex Williamson 
4490e54f24aSAlex Williamson     /* This windows doesn't seem to be used except by legacy VGA code */
4500e54f24aSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4514d3fc4fdSAlex Williamson         !vdev->vga || nr != 4) {
452c00d61d8SAlex Williamson         return;
453c00d61d8SAlex Williamson     }
454c00d61d8SAlex Williamson 
455bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
4560e54f24aSAlex Williamson     window = quirk->data = g_malloc0(sizeof(*window) +
4570e54f24aSAlex Williamson                                      sizeof(VFIOConfigWindowMatch));
4580e54f24aSAlex Williamson     window->vdev = vdev;
4590e54f24aSAlex Williamson     window->address_offset = 0;
4600e54f24aSAlex Williamson     window->data_offset = 4;
4610e54f24aSAlex Williamson     window->nr_matches = 1;
4620e54f24aSAlex Williamson     window->matches[0].match = 0x4000;
463f5793fd9SAlex Williamson     window->matches[0].mask = vdev->config_size - 1;
4640e54f24aSAlex Williamson     window->bar = nr;
4650e54f24aSAlex Williamson     window->addr_mem = &quirk->mem[0];
4660e54f24aSAlex Williamson     window->data_mem = &quirk->mem[1];
467c00d61d8SAlex Williamson 
4680e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
4690e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
4700e54f24aSAlex Williamson                           "vfio-ati-bar4-window-address-quirk", 4);
471db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4720e54f24aSAlex Williamson                                         window->address_offset,
4730e54f24aSAlex Williamson                                         window->addr_mem, 1);
4740e54f24aSAlex Williamson 
4750e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
4760e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
4770e54f24aSAlex Williamson                           "vfio-ati-bar4-window-data-quirk", 4);
478db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4790e54f24aSAlex Williamson                                         window->data_offset,
4800e54f24aSAlex Williamson                                         window->data_mem, 1);
481c00d61d8SAlex Williamson 
482c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
483c00d61d8SAlex Williamson 
4840e54f24aSAlex Williamson     trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
485c00d61d8SAlex Williamson }
486c00d61d8SAlex Williamson 
487c00d61d8SAlex Williamson /*
4880d38fb1cSAlex Williamson  * Trap the BAR2 MMIO mirror to config space as well.
489c00d61d8SAlex Williamson  */
4900d38fb1cSAlex Williamson static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
491c00d61d8SAlex Williamson {
492c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4930d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
494c00d61d8SAlex Williamson 
495c00d61d8SAlex Williamson     /* Only enable on newer devices where BAR2 is 64bit */
4960d38fb1cSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4974d3fc4fdSAlex Williamson         !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
498c00d61d8SAlex Williamson         return;
499c00d61d8SAlex Williamson     }
500c00d61d8SAlex Williamson 
501bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
5020d38fb1cSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror));
503bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
5040d38fb1cSAlex Williamson     mirror->vdev = vdev;
5050d38fb1cSAlex Williamson     mirror->offset = 0x4000;
5060d38fb1cSAlex Williamson     mirror->bar = nr;
507c00d61d8SAlex Williamson 
5080d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
5090d38fb1cSAlex Williamson                           &vfio_generic_mirror_quirk, mirror,
5100d38fb1cSAlex Williamson                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
511db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5120d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
513c00d61d8SAlex Williamson 
514c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
515c00d61d8SAlex Williamson 
5160d38fb1cSAlex Williamson     trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
517c00d61d8SAlex Williamson }
518c00d61d8SAlex Williamson 
519c00d61d8SAlex Williamson /*
520c00d61d8SAlex Williamson  * Older ATI/AMD cards like the X550 have a similar window to that above.
521c00d61d8SAlex Williamson  * I/O port BAR1 provides a window to a mirror of PCI config space located
522c00d61d8SAlex Williamson  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
523c00d61d8SAlex Williamson  * note it for future reference.
524c00d61d8SAlex Williamson  */
525c00d61d8SAlex Williamson 
526c00d61d8SAlex Williamson /*
527c00d61d8SAlex Williamson  * Nvidia has several different methods to get to config space, the
528c00d61d8SAlex Williamson  * nouveu project has several of these documented here:
529c00d61d8SAlex Williamson  * https://github.com/pathscale/envytools/tree/master/hwdocs
530c00d61d8SAlex Williamson  *
531c00d61d8SAlex Williamson  * The first quirk is actually not documented in envytools and is found
532c00d61d8SAlex Williamson  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
533c00d61d8SAlex Williamson  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
534c00d61d8SAlex Williamson  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
535c00d61d8SAlex Williamson  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
536c00d61d8SAlex Williamson  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
537c00d61d8SAlex Williamson  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
538c00d61d8SAlex Williamson  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
539c00d61d8SAlex Williamson  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
540c00d61d8SAlex Williamson  */
5416029a424SAlex Williamson typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
5426029a424SAlex Williamson static const char *nv3d0_states[] = { "NONE", "SELECT",
5436029a424SAlex Williamson                                       "WINDOW", "READ", "WRITE" };
5446029a424SAlex Williamson 
5456029a424SAlex Williamson typedef struct VFIONvidia3d0Quirk {
5466029a424SAlex Williamson     VFIOPCIDevice *vdev;
5476029a424SAlex Williamson     VFIONvidia3d0State state;
5486029a424SAlex Williamson     uint32_t offset;
5496029a424SAlex Williamson } VFIONvidia3d0Quirk;
5506029a424SAlex Williamson 
5516029a424SAlex Williamson static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
5526029a424SAlex Williamson                                            hwaddr addr, unsigned size)
5536029a424SAlex Williamson {
5546029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5556029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5566029a424SAlex Williamson 
5576029a424SAlex Williamson     quirk->state = NONE;
5586029a424SAlex Williamson 
5592d82f8a3SAlex Williamson     return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5606029a424SAlex Williamson                          addr + 0x14, size);
5616029a424SAlex Williamson }
5626029a424SAlex Williamson 
5636029a424SAlex Williamson static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
5646029a424SAlex Williamson                                         uint64_t data, unsigned size)
5656029a424SAlex Williamson {
5666029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5676029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5686029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
5696029a424SAlex Williamson 
5706029a424SAlex Williamson     quirk->state = NONE;
5716029a424SAlex Williamson 
5726029a424SAlex Williamson     switch (data) {
5736029a424SAlex Williamson     case 0x338:
5746029a424SAlex Williamson         if (old_state == NONE) {
5756029a424SAlex Williamson             quirk->state = SELECT;
5766029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5776029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5786029a424SAlex Williamson         }
5796029a424SAlex Williamson         break;
5806029a424SAlex Williamson     case 0x538:
5816029a424SAlex Williamson         if (old_state == WINDOW) {
5826029a424SAlex Williamson             quirk->state = READ;
5836029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5846029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5856029a424SAlex Williamson         }
5866029a424SAlex Williamson         break;
5876029a424SAlex Williamson     case 0x738:
5886029a424SAlex Williamson         if (old_state == WINDOW) {
5896029a424SAlex Williamson             quirk->state = WRITE;
5906029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5916029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5926029a424SAlex Williamson         }
5936029a424SAlex Williamson         break;
5946029a424SAlex Williamson     }
5956029a424SAlex Williamson 
5962d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5976029a424SAlex Williamson                    addr + 0x14, data, size);
5986029a424SAlex Williamson }
5996029a424SAlex Williamson 
6006029a424SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
6016029a424SAlex Williamson     .read = vfio_nvidia_3d4_quirk_read,
6026029a424SAlex Williamson     .write = vfio_nvidia_3d4_quirk_write,
6036029a424SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
604c00d61d8SAlex Williamson };
605c00d61d8SAlex Williamson 
606c00d61d8SAlex Williamson static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
607c00d61d8SAlex Williamson                                            hwaddr addr, unsigned size)
608c00d61d8SAlex Williamson {
6096029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
610c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6116029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
6122d82f8a3SAlex Williamson     uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6136029a424SAlex Williamson                                   addr + 0x10, size);
614c00d61d8SAlex Williamson 
6156029a424SAlex Williamson     quirk->state = NONE;
6166029a424SAlex Williamson 
6176029a424SAlex Williamson     if (old_state == READ &&
6186029a424SAlex Williamson         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6196029a424SAlex Williamson         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6206029a424SAlex Williamson 
6216029a424SAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, offset, size);
6226029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
6236029a424SAlex Williamson                                          offset, size, data);
624c00d61d8SAlex Williamson     }
625c00d61d8SAlex Williamson 
626c00d61d8SAlex Williamson     return data;
627c00d61d8SAlex Williamson }
628c00d61d8SAlex Williamson 
629c00d61d8SAlex Williamson static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
630c00d61d8SAlex Williamson                                         uint64_t data, unsigned size)
631c00d61d8SAlex Williamson {
6326029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
633c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6346029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
635c00d61d8SAlex Williamson 
6366029a424SAlex Williamson     quirk->state = NONE;
6376029a424SAlex Williamson 
6386029a424SAlex Williamson     if (old_state == SELECT) {
6396029a424SAlex Williamson         quirk->offset = (uint32_t)data;
6406029a424SAlex Williamson         quirk->state = WINDOW;
6416029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
6426029a424SAlex Williamson                                           nv3d0_states[quirk->state]);
6436029a424SAlex Williamson     } else if (old_state == WRITE) {
6446029a424SAlex Williamson         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6456029a424SAlex Williamson             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6466029a424SAlex Williamson 
6476029a424SAlex Williamson             vfio_pci_write_config(&vdev->pdev, offset, data, size);
6486029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
6496029a424SAlex Williamson                                               offset, data, size);
650c00d61d8SAlex Williamson             return;
651c00d61d8SAlex Williamson         }
652c00d61d8SAlex Williamson     }
653c00d61d8SAlex Williamson 
6542d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6556029a424SAlex Williamson                    addr + 0x10, data, size);
656c00d61d8SAlex Williamson }
657c00d61d8SAlex Williamson 
658c00d61d8SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
659c00d61d8SAlex Williamson     .read = vfio_nvidia_3d0_quirk_read,
660c00d61d8SAlex Williamson     .write = vfio_nvidia_3d0_quirk_write,
661c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
662c00d61d8SAlex Williamson };
663c00d61d8SAlex Williamson 
664c00d61d8SAlex Williamson static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
665c00d61d8SAlex Williamson {
666c00d61d8SAlex Williamson     VFIOQuirk *quirk;
6676029a424SAlex Williamson     VFIONvidia3d0Quirk *data;
668c00d61d8SAlex Williamson 
669db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
670db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
671c00d61d8SAlex Williamson         !vdev->bars[1].region.size) {
672c00d61d8SAlex Williamson         return;
673c00d61d8SAlex Williamson     }
674c00d61d8SAlex Williamson 
675bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
6766029a424SAlex Williamson     quirk->data = data = g_malloc0(sizeof(*data));
6776029a424SAlex Williamson     data->vdev = vdev;
678c00d61d8SAlex Williamson 
6796029a424SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
6806029a424SAlex Williamson                           data, "vfio-nvidia-3d4-quirk", 2);
6812d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6826029a424SAlex Williamson                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
6836029a424SAlex Williamson 
6846029a424SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
6856029a424SAlex Williamson                           data, "vfio-nvidia-3d0-quirk", 2);
6862d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6876029a424SAlex Williamson                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
688c00d61d8SAlex Williamson 
6892d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
690c00d61d8SAlex Williamson                       quirk, next);
691c00d61d8SAlex Williamson 
6926029a424SAlex Williamson     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
693c00d61d8SAlex Williamson }
694c00d61d8SAlex Williamson 
695c00d61d8SAlex Williamson /*
696c00d61d8SAlex Williamson  * The second quirk is documented in envytools.  The I/O port BAR5 is just
697c00d61d8SAlex Williamson  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
698c00d61d8SAlex Williamson  * again BAR0.  This backdoor is apparently a bit newer than the one above
699c00d61d8SAlex Williamson  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
700c00d61d8SAlex Williamson  * space, including extended space is available at the 4k @0x88000.
701c00d61d8SAlex Williamson  */
7020e54f24aSAlex Williamson typedef struct VFIONvidiaBAR5Quirk {
7030e54f24aSAlex Williamson     uint32_t master;
7040e54f24aSAlex Williamson     uint32_t enable;
7050e54f24aSAlex Williamson     MemoryRegion *addr_mem;
7060e54f24aSAlex Williamson     MemoryRegion *data_mem;
7070e54f24aSAlex Williamson     bool enabled;
7080e54f24aSAlex Williamson     VFIOConfigWindowQuirk window; /* last for match data */
7090e54f24aSAlex Williamson } VFIONvidiaBAR5Quirk;
710c00d61d8SAlex Williamson 
7110e54f24aSAlex Williamson static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
7120e54f24aSAlex Williamson {
7130e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7140e54f24aSAlex Williamson 
7150e54f24aSAlex Williamson     if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
7160e54f24aSAlex Williamson         return;
7170e54f24aSAlex Williamson     }
7180e54f24aSAlex Williamson 
7190e54f24aSAlex Williamson     bar5->enabled = !bar5->enabled;
7200e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
7210e54f24aSAlex Williamson                                        bar5->enabled ?  "Enable" : "Disable");
7220e54f24aSAlex Williamson     memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
7230e54f24aSAlex Williamson     memory_region_set_enabled(bar5->data_mem, bar5->enabled);
7240e54f24aSAlex Williamson }
7250e54f24aSAlex Williamson 
7260e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
7270e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
7280e54f24aSAlex Williamson {
7290e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7300e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7310e54f24aSAlex Williamson 
7320e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr, size);
7330e54f24aSAlex Williamson }
7340e54f24aSAlex Williamson 
7350e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
736c00d61d8SAlex Williamson                                                 uint64_t data, unsigned size)
737c00d61d8SAlex Williamson {
7380e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7390e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
740c00d61d8SAlex Williamson 
7410e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr, data, size);
7420e54f24aSAlex Williamson 
7430e54f24aSAlex Williamson     bar5->master = data;
7440e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
745c00d61d8SAlex Williamson }
746c00d61d8SAlex Williamson 
7470e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
7480e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_master_read,
7490e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_master_write,
750c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
751c00d61d8SAlex Williamson };
752c00d61d8SAlex Williamson 
7530e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
7540e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
755c00d61d8SAlex Williamson {
7560e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7570e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
758c00d61d8SAlex Williamson 
7590e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
7600e54f24aSAlex Williamson }
7610e54f24aSAlex Williamson 
7620e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
7630e54f24aSAlex Williamson                                                 uint64_t data, unsigned size)
7640e54f24aSAlex Williamson {
7650e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7660e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7670e54f24aSAlex Williamson 
7680e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
7690e54f24aSAlex Williamson 
7700e54f24aSAlex Williamson     bar5->enable = data;
7710e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
7720e54f24aSAlex Williamson }
7730e54f24aSAlex Williamson 
7740e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
7750e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_enable_read,
7760e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_enable_write,
7770e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
7780e54f24aSAlex Williamson };
7790e54f24aSAlex Williamson 
7800e54f24aSAlex Williamson static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
7810e54f24aSAlex Williamson {
7820e54f24aSAlex Williamson     VFIOQuirk *quirk;
7830e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5;
7840e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
7850e54f24aSAlex Williamson 
786db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
787db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
7888f419c5bSAlex Williamson         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
789c00d61d8SAlex Williamson         return;
790c00d61d8SAlex Williamson     }
791c00d61d8SAlex Williamson 
792bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(4);
7930e54f24aSAlex Williamson     bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
7940e54f24aSAlex Williamson                                    (sizeof(VFIOConfigWindowMatch) * 2));
7950e54f24aSAlex Williamson     window = &bar5->window;
796c00d61d8SAlex Williamson 
7970e54f24aSAlex Williamson     window->vdev = vdev;
7980e54f24aSAlex Williamson     window->address_offset = 0x8;
7990e54f24aSAlex Williamson     window->data_offset = 0xc;
8000e54f24aSAlex Williamson     window->nr_matches = 2;
8010e54f24aSAlex Williamson     window->matches[0].match = 0x1800;
8020e54f24aSAlex Williamson     window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
8030e54f24aSAlex Williamson     window->matches[1].match = 0x88000;
804f5793fd9SAlex Williamson     window->matches[1].mask = vdev->config_size - 1;
8050e54f24aSAlex Williamson     window->bar = nr;
8060e54f24aSAlex Williamson     window->addr_mem = bar5->addr_mem = &quirk->mem[0];
8070e54f24aSAlex Williamson     window->data_mem = bar5->data_mem = &quirk->mem[1];
8080e54f24aSAlex Williamson 
8090e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
8100e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
8110e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-address-quirk", 4);
812db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8130e54f24aSAlex Williamson                                         window->address_offset,
8140e54f24aSAlex Williamson                                         window->addr_mem, 1);
8150e54f24aSAlex Williamson     memory_region_set_enabled(window->addr_mem, false);
8160e54f24aSAlex Williamson 
8170e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
8180e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
8190e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-data-quirk", 4);
820db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8210e54f24aSAlex Williamson                                         window->data_offset,
8220e54f24aSAlex Williamson                                         window->data_mem, 1);
8230e54f24aSAlex Williamson     memory_region_set_enabled(window->data_mem, false);
8240e54f24aSAlex Williamson 
8250e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
8260e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_master, bar5,
8270e54f24aSAlex Williamson                           "vfio-nvidia-bar5-master-quirk", 4);
828db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8290e54f24aSAlex Williamson                                         0, &quirk->mem[2], 1);
8300e54f24aSAlex Williamson 
8310e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
8320e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_enable, bar5,
8330e54f24aSAlex Williamson                           "vfio-nvidia-bar5-enable-quirk", 4);
834db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8350e54f24aSAlex Williamson                                         4, &quirk->mem[3], 1);
836c00d61d8SAlex Williamson 
837c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
838c00d61d8SAlex Williamson 
8390e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
840c00d61d8SAlex Williamson }
841c00d61d8SAlex Williamson 
842c958c51dSAlex Williamson typedef struct LastDataSet {
843c958c51dSAlex Williamson     VFIOQuirk *quirk;
844c958c51dSAlex Williamson     hwaddr addr;
845c958c51dSAlex Williamson     uint64_t data;
846c958c51dSAlex Williamson     unsigned size;
847c958c51dSAlex Williamson     int hits;
848c958c51dSAlex Williamson     int added;
849c958c51dSAlex Williamson } LastDataSet;
850c958c51dSAlex Williamson 
851c958c51dSAlex Williamson #define MAX_DYN_IOEVENTFD 10
852c958c51dSAlex Williamson #define HITS_FOR_IOEVENTFD 10
853c958c51dSAlex Williamson 
8540d38fb1cSAlex Williamson /*
8550d38fb1cSAlex Williamson  * Finally, BAR0 itself.  We want to redirect any accesses to either
8560d38fb1cSAlex Williamson  * 0x1800 or 0x88000 through the PCI config space access functions.
8570d38fb1cSAlex Williamson  */
8580d38fb1cSAlex Williamson static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
859c00d61d8SAlex Williamson                                            uint64_t data, unsigned size)
860c00d61d8SAlex Williamson {
8610d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
8620d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
863c00d61d8SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
864c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
865c00d61d8SAlex Williamson 
8660d38fb1cSAlex Williamson     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
867c00d61d8SAlex Williamson 
868c00d61d8SAlex Williamson     /*
869c00d61d8SAlex Williamson      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
870c00d61d8SAlex Williamson      * MSI capability ID register.  Both the ID and next register are
871c00d61d8SAlex Williamson      * read-only, so we allow writes covering either of those to real hw.
872c00d61d8SAlex Williamson      */
873c00d61d8SAlex Williamson     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
874c00d61d8SAlex Williamson         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
8750d38fb1cSAlex Williamson         vfio_region_write(&vdev->bars[mirror->bar].region,
8760d38fb1cSAlex Williamson                           addr + mirror->offset, data, size);
8770d38fb1cSAlex Williamson         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
878c00d61d8SAlex Williamson     }
879c958c51dSAlex Williamson 
880c958c51dSAlex Williamson     /*
881c958c51dSAlex Williamson      * Automatically add an ioeventfd to handle any repeated write with the
882c958c51dSAlex Williamson      * same data and size above the standard PCI config space header.  This is
883c958c51dSAlex Williamson      * primarily expected to accelerate the MSI-ACK behavior, such as noted
884c958c51dSAlex Williamson      * above.  Current hardware/drivers should trigger an ioeventfd at config
885c958c51dSAlex Williamson      * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
886c958c51dSAlex Williamson      *
887c958c51dSAlex Williamson      * The criteria of 10 successive hits is arbitrary but reliably adds the
888c958c51dSAlex Williamson      * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
889c958c51dSAlex Williamson      * the remaining ones have a greater chance of being seen successively.
890c958c51dSAlex Williamson      * To avoid the pathological case of burning up all of QEMU's open file
891c958c51dSAlex Williamson      * handles, arbitrarily limit this algorithm from adding no more than 10
892c958c51dSAlex Williamson      * ioeventfds, print an error if we would have added an 11th, and then
893c958c51dSAlex Williamson      * stop counting.
894c958c51dSAlex Williamson      */
895c958c51dSAlex Williamson     if (!vdev->no_kvm_ioeventfd &&
896c958c51dSAlex Williamson         addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
897c958c51dSAlex Williamson         if (addr != last->addr || data != last->data || size != last->size) {
898c958c51dSAlex Williamson             last->addr = addr;
899c958c51dSAlex Williamson             last->data = data;
900c958c51dSAlex Williamson             last->size = size;
901c958c51dSAlex Williamson             last->hits = 1;
902c958c51dSAlex Williamson         } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
903c958c51dSAlex Williamson             if (last->added < MAX_DYN_IOEVENTFD) {
904c958c51dSAlex Williamson                 VFIOIOEventFD *ioeventfd;
905c958c51dSAlex Williamson                 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
906c958c51dSAlex Williamson                                         data, &vdev->bars[mirror->bar].region,
907c958c51dSAlex Williamson                                         mirror->offset + addr, true);
908c958c51dSAlex Williamson                 if (ioeventfd) {
909c958c51dSAlex Williamson                     VFIOQuirk *quirk = last->quirk;
910c958c51dSAlex Williamson 
911c958c51dSAlex Williamson                     QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
912c958c51dSAlex Williamson                     last->added++;
913c958c51dSAlex Williamson                 }
914c958c51dSAlex Williamson             } else {
915c958c51dSAlex Williamson                 last->added++;
916c958c51dSAlex Williamson                 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
917c958c51dSAlex Williamson                             "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
918c958c51dSAlex Williamson                             "size %u", vdev->vbasedev.name, addr, data, size);
919c958c51dSAlex Williamson             }
920c958c51dSAlex Williamson         }
921c958c51dSAlex Williamson     }
922c00d61d8SAlex Williamson }
923c00d61d8SAlex Williamson 
9240d38fb1cSAlex Williamson static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
9250d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
9260d38fb1cSAlex Williamson     .write = vfio_nvidia_quirk_mirror_write,
927c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
928c00d61d8SAlex Williamson };
929c00d61d8SAlex Williamson 
930c958c51dSAlex Williamson static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
931c958c51dSAlex Williamson {
932c958c51dSAlex Williamson     VFIOConfigMirrorQuirk *mirror = quirk->data;
933c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
934c958c51dSAlex Williamson 
935c958c51dSAlex Williamson     last->addr = last->data = last->size = last->hits = last->added = 0;
936c958c51dSAlex Williamson 
937c958c51dSAlex Williamson     vfio_drop_dynamic_eventfds(vdev, quirk);
938c958c51dSAlex Williamson }
939c958c51dSAlex Williamson 
9400d38fb1cSAlex Williamson static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
941c00d61d8SAlex Williamson {
942c00d61d8SAlex Williamson     VFIOQuirk *quirk;
9430d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
944c958c51dSAlex Williamson     LastDataSet *last;
945c00d61d8SAlex Williamson 
946db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
947db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
9480d38fb1cSAlex Williamson         !vfio_is_vga(vdev) || nr != 0) {
949c00d61d8SAlex Williamson         return;
950c00d61d8SAlex Williamson     }
951c00d61d8SAlex Williamson 
952bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
953c958c51dSAlex Williamson     quirk->reset = vfio_nvidia_bar0_quirk_reset;
954c958c51dSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
955bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
9560d38fb1cSAlex Williamson     mirror->vdev = vdev;
9570d38fb1cSAlex Williamson     mirror->offset = 0x88000;
9580d38fb1cSAlex Williamson     mirror->bar = nr;
959c958c51dSAlex Williamson     last = (LastDataSet *)&mirror->data;
960c958c51dSAlex Williamson     last->quirk = quirk;
961c00d61d8SAlex Williamson 
9620d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
9630d38fb1cSAlex Williamson                           &vfio_nvidia_mirror_quirk, mirror,
9640d38fb1cSAlex Williamson                           "vfio-nvidia-bar0-88000-mirror-quirk",
965f5793fd9SAlex Williamson                           vdev->config_size);
966db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9670d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
968c00d61d8SAlex Williamson 
969c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
970c00d61d8SAlex Williamson 
9710d38fb1cSAlex Williamson     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
9724d3fc4fdSAlex Williamson     if (vdev->vga) {
973bcf3c3d0SAlex Williamson         quirk = vfio_quirk_alloc(1);
974c958c51dSAlex Williamson         quirk->reset = vfio_nvidia_bar0_quirk_reset;
975c958c51dSAlex Williamson         mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
976bcf3c3d0SAlex Williamson         mirror->mem = quirk->mem;
9770d38fb1cSAlex Williamson         mirror->vdev = vdev;
9780d38fb1cSAlex Williamson         mirror->offset = 0x1800;
9790d38fb1cSAlex Williamson         mirror->bar = nr;
980c958c51dSAlex Williamson         last = (LastDataSet *)&mirror->data;
981c958c51dSAlex Williamson         last->quirk = quirk;
982c00d61d8SAlex Williamson 
9830d38fb1cSAlex Williamson         memory_region_init_io(mirror->mem, OBJECT(vdev),
9840d38fb1cSAlex Williamson                               &vfio_nvidia_mirror_quirk, mirror,
9850d38fb1cSAlex Williamson                               "vfio-nvidia-bar0-1800-mirror-quirk",
9860d38fb1cSAlex Williamson                               PCI_CONFIG_SPACE_SIZE);
987db0da029SAlex Williamson         memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9880d38fb1cSAlex Williamson                                             mirror->offset, mirror->mem, 1);
989c00d61d8SAlex Williamson 
990c00d61d8SAlex Williamson         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
9910d38fb1cSAlex Williamson     }
992c00d61d8SAlex Williamson 
9930d38fb1cSAlex Williamson     trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
994c00d61d8SAlex Williamson }
995c00d61d8SAlex Williamson 
996c00d61d8SAlex Williamson /*
997c00d61d8SAlex Williamson  * TODO - Some Nvidia devices provide config access to their companion HDA
998c00d61d8SAlex Williamson  * device and even to their parent bridge via these config space mirrors.
999c00d61d8SAlex Williamson  * Add quirks for those regions.
1000c00d61d8SAlex Williamson  */
1001c00d61d8SAlex Williamson 
1002c00d61d8SAlex Williamson #define PCI_VENDOR_ID_REALTEK 0x10ec
1003c00d61d8SAlex Williamson 
1004c00d61d8SAlex Williamson /*
1005c00d61d8SAlex Williamson  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
1006c00d61d8SAlex Williamson  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
1007c00d61d8SAlex Williamson  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1008c00d61d8SAlex Williamson  * when the "type" portion of the address register is set to 0x1.  This appears
1009c00d61d8SAlex Williamson  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1010c00d61d8SAlex Williamson  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1011c00d61d8SAlex Williamson  * ignore because the MSI-X table should always be accessed as a dword (full
1012c00d61d8SAlex Williamson  * mask).  Bits 0:11 is offset within the type.
1013c00d61d8SAlex Williamson  *
1014c00d61d8SAlex Williamson  * Example trace:
1015c00d61d8SAlex Williamson  *
1016c00d61d8SAlex Williamson  * Read from MSI-X table offset 0
1017c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1018c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1019c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1020c00d61d8SAlex Williamson  *
1021c00d61d8SAlex Williamson  * Write 0xfee00000 to MSI-X table offset 0
1022c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1023c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1024c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1025c00d61d8SAlex Williamson  */
1026954258a5SAlex Williamson typedef struct VFIOrtl8168Quirk {
1027954258a5SAlex Williamson     VFIOPCIDevice *vdev;
1028954258a5SAlex Williamson     uint32_t addr;
1029954258a5SAlex Williamson     uint32_t data;
1030954258a5SAlex Williamson     bool enabled;
1031954258a5SAlex Williamson } VFIOrtl8168Quirk;
1032954258a5SAlex Williamson 
1033954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1034c00d61d8SAlex Williamson                                                 hwaddr addr, unsigned size)
1035c00d61d8SAlex Williamson {
1036954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1037954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1038954258a5SAlex Williamson     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1039c00d61d8SAlex Williamson 
1040954258a5SAlex Williamson     if (rtl->enabled) {
1041954258a5SAlex Williamson         data = rtl->addr ^ 0x80000000U; /* latch/complete */
1042954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1043c00d61d8SAlex Williamson     }
1044c00d61d8SAlex Williamson 
1045954258a5SAlex Williamson     return data;
1046c00d61d8SAlex Williamson }
1047c00d61d8SAlex Williamson 
1048954258a5SAlex Williamson static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1049c00d61d8SAlex Williamson                                              uint64_t data, unsigned size)
1050c00d61d8SAlex Williamson {
1051954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1052954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1053c00d61d8SAlex Williamson 
1054954258a5SAlex Williamson     rtl->enabled = false;
1055954258a5SAlex Williamson 
1056c00d61d8SAlex Williamson     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1057954258a5SAlex Williamson         rtl->enabled = true;
1058954258a5SAlex Williamson         rtl->addr = (uint32_t)data;
1059c00d61d8SAlex Williamson 
1060c00d61d8SAlex Williamson         if (data & 0x80000000U) { /* Do write */
1061c00d61d8SAlex Williamson             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1062c00d61d8SAlex Williamson                 hwaddr offset = data & 0xfff;
1063954258a5SAlex Williamson                 uint64_t val = rtl->data;
1064c00d61d8SAlex Williamson 
1065954258a5SAlex Williamson                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1066c00d61d8SAlex Williamson                                                     (uint16_t)offset, val);
1067c00d61d8SAlex Williamson 
1068c00d61d8SAlex Williamson                 /* Write to the proper guest MSI-X table instead */
1069c00d61d8SAlex Williamson                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1070d5d680caSTony Nguyen                                              offset, val,
1071d5d680caSTony Nguyen                                              size_memop(size) | MO_LE,
1072c00d61d8SAlex Williamson                                              MEMTXATTRS_UNSPECIFIED);
1073c00d61d8SAlex Williamson             }
1074c00d61d8SAlex Williamson             return; /* Do not write guest MSI-X data to hardware */
1075c00d61d8SAlex Williamson         }
1076c00d61d8SAlex Williamson     }
1077c00d61d8SAlex Williamson 
1078954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1079c00d61d8SAlex Williamson }
1080c00d61d8SAlex Williamson 
1081954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_address_quirk = {
1082954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_address_read,
1083954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_address_write,
1084c00d61d8SAlex Williamson     .valid = {
1085c00d61d8SAlex Williamson         .min_access_size = 4,
1086c00d61d8SAlex Williamson         .max_access_size = 4,
1087c00d61d8SAlex Williamson         .unaligned = false,
1088c00d61d8SAlex Williamson     },
1089c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1090c00d61d8SAlex Williamson };
1091c00d61d8SAlex Williamson 
1092954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1093954258a5SAlex Williamson                                              hwaddr addr, unsigned size)
1094c00d61d8SAlex Williamson {
1095954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1096954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
109731e6a7b1SThorsten Kohfeldt     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1098c00d61d8SAlex Williamson 
1099954258a5SAlex Williamson     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1100954258a5SAlex Williamson         hwaddr offset = rtl->addr & 0xfff;
1101954258a5SAlex Williamson         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1102d5d680caSTony Nguyen                                     &data, size_memop(size) | MO_LE,
1103475fbf0aSTony Nguyen                                     MEMTXATTRS_UNSPECIFIED);
1104954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1105954258a5SAlex Williamson     }
1106954258a5SAlex Williamson 
1107954258a5SAlex Williamson     return data;
1108954258a5SAlex Williamson }
1109954258a5SAlex Williamson 
1110954258a5SAlex Williamson static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1111954258a5SAlex Williamson                                           uint64_t data, unsigned size)
1112954258a5SAlex Williamson {
1113954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1114954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1115954258a5SAlex Williamson 
1116954258a5SAlex Williamson     rtl->data = (uint32_t)data;
1117954258a5SAlex Williamson 
1118954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1119954258a5SAlex Williamson }
1120954258a5SAlex Williamson 
1121954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_data_quirk = {
1122954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_data_read,
1123954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_data_write,
1124954258a5SAlex Williamson     .valid = {
1125954258a5SAlex Williamson         .min_access_size = 4,
1126954258a5SAlex Williamson         .max_access_size = 4,
1127954258a5SAlex Williamson         .unaligned = false,
1128954258a5SAlex Williamson     },
1129954258a5SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1130954258a5SAlex Williamson };
1131954258a5SAlex Williamson 
1132954258a5SAlex Williamson static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1133954258a5SAlex Williamson {
1134954258a5SAlex Williamson     VFIOQuirk *quirk;
1135954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl;
1136954258a5SAlex Williamson 
1137954258a5SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1138c00d61d8SAlex Williamson         return;
1139c00d61d8SAlex Williamson     }
1140c00d61d8SAlex Williamson 
1141bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
1142954258a5SAlex Williamson     quirk->data = rtl = g_malloc0(sizeof(*rtl));
1143954258a5SAlex Williamson     rtl->vdev = vdev;
1144c00d61d8SAlex Williamson 
1145954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1146954258a5SAlex Williamson                           &vfio_rtl_address_quirk, rtl,
1147954258a5SAlex Williamson                           "vfio-rtl8168-window-address-quirk", 4);
1148db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1149954258a5SAlex Williamson                                         0x74, &quirk->mem[0], 1);
1150954258a5SAlex Williamson 
1151954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1152954258a5SAlex Williamson                           &vfio_rtl_data_quirk, rtl,
1153954258a5SAlex Williamson                           "vfio-rtl8168-window-data-quirk", 4);
1154db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1155954258a5SAlex Williamson                                         0x70, &quirk->mem[1], 1);
1156c00d61d8SAlex Williamson 
1157c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1158c00d61d8SAlex Williamson 
1159954258a5SAlex Williamson     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1160c00d61d8SAlex Williamson }
1161c00d61d8SAlex Williamson 
1162c4c45e94SAlex Williamson #define IGD_ASLS 0xfc /* ASL Storage Register */
1163c4c45e94SAlex Williamson 
1164c4c45e94SAlex Williamson /*
1165c4c45e94SAlex Williamson  * The OpRegion includes the Video BIOS Table, which seems important for
1166c4c45e94SAlex Williamson  * telling the driver what sort of outputs it has.  Without this, the device
1167c4c45e94SAlex Williamson  * may work in the guest, but we may not get output.  This also requires BIOS
1168c4c45e94SAlex Williamson  * support to reserve and populate a section of guest memory sufficient for
1169c4c45e94SAlex Williamson  * the table and to write the base address of that memory to the ASLS register
1170c4c45e94SAlex Williamson  * of the IGD device.
1171c4c45e94SAlex Williamson  */
1172d3c6a18bSZhenzhong Duan bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
11737237011dSEric Auger                                 struct vfio_region_info *info, Error **errp)
1174c4c45e94SAlex Williamson {
1175c4c45e94SAlex Williamson     int ret;
1176c4c45e94SAlex Williamson 
1177c4c45e94SAlex Williamson     vdev->igd_opregion = g_malloc0(info->size);
1178c4c45e94SAlex Williamson     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1179c4c45e94SAlex Williamson                 info->size, info->offset);
1180c4c45e94SAlex Williamson     if (ret != info->size) {
11817237011dSEric Auger         error_setg(errp, "failed to read IGD OpRegion");
1182c4c45e94SAlex Williamson         g_free(vdev->igd_opregion);
1183c4c45e94SAlex Williamson         vdev->igd_opregion = NULL;
1184d3c6a18bSZhenzhong Duan         return false;
1185c4c45e94SAlex Williamson     }
1186c4c45e94SAlex Williamson 
1187c4c45e94SAlex Williamson     /*
1188c4c45e94SAlex Williamson      * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1189c4c45e94SAlex Williamson      * allocate 32bit reserved memory for, copy these contents into, and write
1190c4c45e94SAlex Williamson      * the reserved memory base address to the device ASLS register at 0xFC.
1191c4c45e94SAlex Williamson      * Alignment of this reserved region seems flexible, but using a 4k page
1192c4c45e94SAlex Williamson      * alignment seems to work well.  This interface assumes a single IGD
1193c4c45e94SAlex Williamson      * device, which may be at VM address 00:02.0 in legacy mode or another
1194c4c45e94SAlex Williamson      * address in UPT mode.
1195c4c45e94SAlex Williamson      *
1196c4c45e94SAlex Williamson      * NB, there may be future use cases discovered where the VM should have
1197c4c45e94SAlex Williamson      * direct interaction with the host OpRegion, in which case the write to
1198c4c45e94SAlex Williamson      * the ASLS register would trigger MemoryRegion setup to enable that.
1199c4c45e94SAlex Williamson      */
1200c4c45e94SAlex Williamson     fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1201c4c45e94SAlex Williamson                     vdev->igd_opregion, info->size);
1202c4c45e94SAlex Williamson 
1203c4c45e94SAlex Williamson     trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1204c4c45e94SAlex Williamson 
1205c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1206c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1207c4c45e94SAlex Williamson     pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1208c4c45e94SAlex Williamson 
1209d3c6a18bSZhenzhong Duan     return true;
1210c4c45e94SAlex Williamson }
1211c4c45e94SAlex Williamson 
1212c4c45e94SAlex Williamson /*
1213c00d61d8SAlex Williamson  * Common quirk probe entry points.
1214c00d61d8SAlex Williamson  */
1215c00d61d8SAlex Williamson void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1216c00d61d8SAlex Williamson {
1217c00d61d8SAlex Williamson     vfio_vga_probe_ati_3c3_quirk(vdev);
1218c00d61d8SAlex Williamson     vfio_vga_probe_nvidia_3d0_quirk(vdev);
1219c00d61d8SAlex Williamson }
1220c00d61d8SAlex Williamson 
12212d82f8a3SAlex Williamson void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1222c00d61d8SAlex Williamson {
1223c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12248c4f2348SAlex Williamson     int i, j;
1225c00d61d8SAlex Williamson 
12262d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12272d82f8a3SAlex Williamson         QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
12288c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12292d82f8a3SAlex Williamson                 memory_region_del_subregion(&vdev->vga->region[i].mem,
12308c4f2348SAlex Williamson                                             &quirk->mem[j]);
12318c4f2348SAlex Williamson             }
1232c00d61d8SAlex Williamson         }
1233c00d61d8SAlex Williamson     }
1234c00d61d8SAlex Williamson }
1235c00d61d8SAlex Williamson 
12362d82f8a3SAlex Williamson void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1237c00d61d8SAlex Williamson {
12388c4f2348SAlex Williamson     int i, j;
1239c00d61d8SAlex Williamson 
12402d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12412d82f8a3SAlex Williamson         while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
12422d82f8a3SAlex Williamson             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1243c00d61d8SAlex Williamson             QLIST_REMOVE(quirk, next);
12448c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12458c4f2348SAlex Williamson                 object_unparent(OBJECT(&quirk->mem[j]));
12468c4f2348SAlex Williamson             }
12478c4f2348SAlex Williamson             g_free(quirk->mem);
12488c4f2348SAlex Williamson             g_free(quirk->data);
1249c00d61d8SAlex Williamson             g_free(quirk);
1250c00d61d8SAlex Williamson         }
1251c00d61d8SAlex Williamson     }
1252c00d61d8SAlex Williamson }
1253c00d61d8SAlex Williamson 
1254c00d61d8SAlex Williamson void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1255c00d61d8SAlex Williamson {
12560e54f24aSAlex Williamson     vfio_probe_ati_bar4_quirk(vdev, nr);
12570d38fb1cSAlex Williamson     vfio_probe_ati_bar2_quirk(vdev, nr);
12580e54f24aSAlex Williamson     vfio_probe_nvidia_bar5_quirk(vdev, nr);
12590d38fb1cSAlex Williamson     vfio_probe_nvidia_bar0_quirk(vdev, nr);
1260954258a5SAlex Williamson     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
126129d62771SThomas Huth #ifdef CONFIG_VFIO_IGD
1262*11b5ce95SCorvin Köhne     vfio_probe_igd_bar0_quirk(vdev, nr);
1263c4c45e94SAlex Williamson     vfio_probe_igd_bar4_quirk(vdev, nr);
126429d62771SThomas Huth #endif
1265c00d61d8SAlex Williamson }
1266c00d61d8SAlex Williamson 
12672d82f8a3SAlex Williamson void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1268c00d61d8SAlex Williamson {
1269c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
1270c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12718c4f2348SAlex Williamson     int i;
1272c00d61d8SAlex Williamson 
1273c00d61d8SAlex Williamson     QLIST_FOREACH(quirk, &bar->quirks, next) {
1274c958c51dSAlex Williamson         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
12752b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1276c958c51dSAlex Williamson         }
1277c958c51dSAlex Williamson 
12788c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
1279db0da029SAlex Williamson             memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
12808c4f2348SAlex Williamson         }
1281c00d61d8SAlex Williamson     }
1282c00d61d8SAlex Williamson }
1283c00d61d8SAlex Williamson 
12842d82f8a3SAlex Williamson void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1285c00d61d8SAlex Williamson {
1286c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
12878c4f2348SAlex Williamson     int i;
1288c00d61d8SAlex Williamson 
1289c00d61d8SAlex Williamson     while (!QLIST_EMPTY(&bar->quirks)) {
1290c00d61d8SAlex Williamson         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1291c00d61d8SAlex Williamson         QLIST_REMOVE(quirk, next);
12928c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
12938c4f2348SAlex Williamson             object_unparent(OBJECT(&quirk->mem[i]));
12948c4f2348SAlex Williamson         }
12958c4f2348SAlex Williamson         g_free(quirk->mem);
12968c4f2348SAlex Williamson         g_free(quirk->data);
1297c00d61d8SAlex Williamson         g_free(quirk);
1298c00d61d8SAlex Williamson     }
1299c00d61d8SAlex Williamson }
1300c9c50009SAlex Williamson 
1301c9c50009SAlex Williamson /*
1302c9c50009SAlex Williamson  * Reset quirks
1303c9c50009SAlex Williamson  */
1304469d02deSAlex Williamson void vfio_quirk_reset(VFIOPCIDevice *vdev)
1305469d02deSAlex Williamson {
1306469d02deSAlex Williamson     int i;
1307469d02deSAlex Williamson 
1308469d02deSAlex Williamson     for (i = 0; i < PCI_ROM_SLOT; i++) {
1309469d02deSAlex Williamson         VFIOQuirk *quirk;
1310469d02deSAlex Williamson         VFIOBAR *bar = &vdev->bars[i];
1311469d02deSAlex Williamson 
1312469d02deSAlex Williamson         QLIST_FOREACH(quirk, &bar->quirks, next) {
1313469d02deSAlex Williamson             if (quirk->reset) {
1314469d02deSAlex Williamson                 quirk->reset(vdev, quirk);
1315469d02deSAlex Williamson             }
1316469d02deSAlex Williamson         }
1317469d02deSAlex Williamson     }
1318469d02deSAlex Williamson }
1319c9c50009SAlex Williamson 
1320c9c50009SAlex Williamson /*
1321c9c50009SAlex Williamson  * AMD Radeon PCI config reset, based on Linux:
1322c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1323c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1324c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1325c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1326c9c50009SAlex Williamson  * IDs: include/drm/drm_pciids.h
1327c9c50009SAlex Williamson  * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1328c9c50009SAlex Williamson  *
1329c9c50009SAlex Williamson  * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1330c9c50009SAlex Williamson  * hardware that should be fixed on future ASICs.  The symptom of this is that
1331c9c50009SAlex Williamson  * once the accerlated driver loads, Windows guests will bsod on subsequent
1332c9c50009SAlex Williamson  * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1333c9c50009SAlex Williamson  * work around this, we do an AMD specific PCI config reset, followed by an SMC
1334c9c50009SAlex Williamson  * reset.  The PCI config reset only works if SMC firmware is running, so we
1335c9c50009SAlex Williamson  * have a dependency on the state of the device as to whether this reset will
1336c9c50009SAlex Williamson  * be effective.  There are still cases where we won't be able to kick the
1337c9c50009SAlex Williamson  * device into working, but this greatly improves the usability overall.  The
1338c9c50009SAlex Williamson  * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1339c9c50009SAlex Williamson  * poking is largely ASIC specific.
1340c9c50009SAlex Williamson  */
1341c9c50009SAlex Williamson static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1342c9c50009SAlex Williamson {
1343c9c50009SAlex Williamson     uint32_t clk, pc_c;
1344c9c50009SAlex Williamson 
1345c9c50009SAlex Williamson     /*
1346c9c50009SAlex Williamson      * Registers 200h and 204h are index and data registers for accessing
1347c9c50009SAlex Williamson      * indirect configuration registers within the device.
1348c9c50009SAlex Williamson      */
1349c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1350c9c50009SAlex Williamson     clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1351c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1352c9c50009SAlex Williamson     pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1353c9c50009SAlex Williamson 
1354c9c50009SAlex Williamson     return (!(clk & 1) && (0x20100 <= pc_c));
1355c9c50009SAlex Williamson }
1356c9c50009SAlex Williamson 
1357c9c50009SAlex Williamson /*
1358c9c50009SAlex Williamson  * The scope of a config reset is controlled by a mode bit in the misc register
1359c9c50009SAlex Williamson  * and a fuse, exposed as a bit in another register.  The fuse is the default
1360631ba5a1SCai Huoqing  * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the formula
1361c9c50009SAlex Williamson  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1362c9c50009SAlex Williamson  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1363c9c50009SAlex Williamson  * to flip the value of the bit in the misc register.
1364c9c50009SAlex Williamson  */
1365c9c50009SAlex Williamson static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1366c9c50009SAlex Williamson {
1367c9c50009SAlex Williamson     uint32_t misc, fuse;
1368c9c50009SAlex Williamson     bool a, b;
1369c9c50009SAlex Williamson 
1370c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1371c9c50009SAlex Williamson     fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1372c9c50009SAlex Williamson     b = fuse & 64;
1373c9c50009SAlex Williamson 
1374c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1375c9c50009SAlex Williamson     misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1376c9c50009SAlex Williamson     a = misc & 2;
1377c9c50009SAlex Williamson 
1378c9c50009SAlex Williamson     if (a == b) {
1379c9c50009SAlex Williamson         vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1380c9c50009SAlex Williamson         vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1381c9c50009SAlex Williamson     }
1382c9c50009SAlex Williamson }
1383c9c50009SAlex Williamson 
1384c9c50009SAlex Williamson static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1385c9c50009SAlex Williamson {
1386c9c50009SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1387c9c50009SAlex Williamson     int i, ret = 0;
1388c9c50009SAlex Williamson     uint32_t data;
1389c9c50009SAlex Williamson 
1390c9c50009SAlex Williamson     /* Defer to a kernel implemented reset */
1391c9c50009SAlex Williamson     if (vdev->vbasedev.reset_works) {
1392c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1393c9c50009SAlex Williamson         return -ENODEV;
1394c9c50009SAlex Williamson     }
1395c9c50009SAlex Williamson 
1396c9c50009SAlex Williamson     /* Enable only memory BAR access */
1397c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1398c9c50009SAlex Williamson 
1399c9c50009SAlex Williamson     /* Reset only works if SMC firmware is loaded and running */
1400c9c50009SAlex Williamson     if (!vfio_radeon_smc_is_running(vdev)) {
1401c9c50009SAlex Williamson         ret = -EINVAL;
1402c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1403c9c50009SAlex Williamson         goto out;
1404c9c50009SAlex Williamson     }
1405c9c50009SAlex Williamson 
1406c9c50009SAlex Williamson     /* Make sure only the GFX function is reset */
1407c9c50009SAlex Williamson     vfio_radeon_set_gfx_only_reset(vdev);
1408c9c50009SAlex Williamson 
1409c9c50009SAlex Williamson     /* AMD PCI config reset */
1410c9c50009SAlex Williamson     vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1411c9c50009SAlex Williamson     usleep(100);
1412c9c50009SAlex Williamson 
1413c9c50009SAlex Williamson     /* Read back the memory size to make sure we're out of reset */
1414c9c50009SAlex Williamson     for (i = 0; i < 100000; i++) {
1415c9c50009SAlex Williamson         if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1416c9c50009SAlex Williamson             goto reset_smc;
1417c9c50009SAlex Williamson         }
1418c9c50009SAlex Williamson         usleep(1);
1419c9c50009SAlex Williamson     }
1420c9c50009SAlex Williamson 
1421c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1422c9c50009SAlex Williamson 
1423c9c50009SAlex Williamson reset_smc:
1424c9c50009SAlex Williamson     /* Reset SMC */
1425c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1426c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1427c9c50009SAlex Williamson     data |= 1;
1428c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1429c9c50009SAlex Williamson 
1430c9c50009SAlex Williamson     /* Disable SMC clock */
1431c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1432c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1433c9c50009SAlex Williamson     data |= 1;
1434c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1435c9c50009SAlex Williamson 
1436c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1437c9c50009SAlex Williamson 
1438c9c50009SAlex Williamson out:
1439c9c50009SAlex Williamson     /* Restore PCI command register */
1440c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1441c9c50009SAlex Williamson 
1442c9c50009SAlex Williamson     return ret;
1443c9c50009SAlex Williamson }
1444c9c50009SAlex Williamson 
1445c9c50009SAlex Williamson void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1446c9c50009SAlex Williamson {
1447ff635e37SAlex Williamson     switch (vdev->vendor_id) {
1448c9c50009SAlex Williamson     case 0x1002:
1449ff635e37SAlex Williamson         switch (vdev->device_id) {
1450c9c50009SAlex Williamson         /* Bonaire */
1451c9c50009SAlex Williamson         case 0x6649: /* Bonaire [FirePro W5100] */
1452c9c50009SAlex Williamson         case 0x6650:
1453c9c50009SAlex Williamson         case 0x6651:
1454c9c50009SAlex Williamson         case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1455c9c50009SAlex Williamson         case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1456c9c50009SAlex Williamson         case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1457c9c50009SAlex Williamson         /* Hawaii */
1458c9c50009SAlex Williamson         case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1459c9c50009SAlex Williamson         case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1460c9c50009SAlex Williamson         case 0x67A2:
1461c9c50009SAlex Williamson         case 0x67A8:
1462c9c50009SAlex Williamson         case 0x67A9:
1463c9c50009SAlex Williamson         case 0x67AA:
1464c9c50009SAlex Williamson         case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1465c9c50009SAlex Williamson         case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1466c9c50009SAlex Williamson         case 0x67B8:
1467c9c50009SAlex Williamson         case 0x67B9:
1468c9c50009SAlex Williamson         case 0x67BA:
1469c9c50009SAlex Williamson         case 0x67BE:
1470c9c50009SAlex Williamson             vdev->resetfn = vfio_radeon_reset;
1471c9c50009SAlex Williamson             trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1472c9c50009SAlex Williamson             break;
1473c9c50009SAlex Williamson         }
1474c9c50009SAlex Williamson         break;
1475c9c50009SAlex Williamson     }
1476c9c50009SAlex Williamson }
1477dfbee78dSAlex Williamson 
1478dfbee78dSAlex Williamson /*
1479dfbee78dSAlex Williamson  * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1480dfbee78dSAlex Williamson  * devices as a member of a clique.  Devices within the same clique ID
1481dfbee78dSAlex Williamson  * are capable of direct P2P.  It's the user's responsibility that this
1482dfbee78dSAlex Williamson  * is correct.  The spec says that this may reside at any unused config
1483dfbee78dSAlex Williamson  * offset, but reserves and recommends hypervisors place this at C8h.
1484dfbee78dSAlex Williamson  * The spec also states that the hypervisor should place this capability
1485dfbee78dSAlex Williamson  * at the end of the capability list, thus next is defined as 0h.
1486dfbee78dSAlex Williamson  *
1487dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1488dfbee78dSAlex Williamson  * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1489dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1490dfbee78dSAlex Williamson  * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1491dfbee78dSAlex Williamson  * +---------------------------------+---------------------------------+
1492dfbee78dSAlex Williamson  *
1493dfbee78dSAlex Williamson  * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1494f6b30c19SAlex Williamson  *
1495f6b30c19SAlex Williamson  * Specification for Turning and later GPU architectures:
1496f6b30c19SAlex Williamson  * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf
1497dfbee78dSAlex Williamson  */
1498dfbee78dSAlex Williamson static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1499dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1500dfbee78dSAlex Williamson                                        Error **errp)
1501dfbee78dSAlex Williamson {
1502dfbee78dSAlex Williamson     Property *prop = opaque;
15031e198715SEduardo Habkost     uint8_t *ptr = object_field_prop_ptr(obj, prop);
1504dfbee78dSAlex Williamson 
1505dfbee78dSAlex Williamson     visit_type_uint8(v, name, ptr, errp);
1506dfbee78dSAlex Williamson }
1507dfbee78dSAlex Williamson 
1508dfbee78dSAlex Williamson static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1509dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1510dfbee78dSAlex Williamson                                        Error **errp)
1511dfbee78dSAlex Williamson {
1512dfbee78dSAlex Williamson     Property *prop = opaque;
15131e198715SEduardo Habkost     uint8_t value, *ptr = object_field_prop_ptr(obj, prop);
1514dfbee78dSAlex Williamson 
1515668f62ecSMarkus Armbruster     if (!visit_type_uint8(v, name, &value, errp)) {
1516dfbee78dSAlex Williamson         return;
1517dfbee78dSAlex Williamson     }
1518dfbee78dSAlex Williamson 
1519dfbee78dSAlex Williamson     if (value & ~0xF) {
1520dfbee78dSAlex Williamson         error_setg(errp, "Property %s: valid range 0-15", name);
1521dfbee78dSAlex Williamson         return;
1522dfbee78dSAlex Williamson     }
1523dfbee78dSAlex Williamson 
1524dfbee78dSAlex Williamson     *ptr = value;
1525dfbee78dSAlex Williamson }
1526dfbee78dSAlex Williamson 
1527dfbee78dSAlex Williamson const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1528dfbee78dSAlex Williamson     .name = "uint4",
1529dfbee78dSAlex Williamson     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1530dfbee78dSAlex Williamson     .get = get_nv_gpudirect_clique_id,
1531dfbee78dSAlex Williamson     .set = set_nv_gpudirect_clique_id,
1532dfbee78dSAlex Williamson };
1533dfbee78dSAlex Williamson 
15340ddcb39cSAlex Williamson static bool is_valid_std_cap_offset(uint8_t pos)
15350ddcb39cSAlex Williamson {
15360ddcb39cSAlex Williamson     return (pos >= PCI_STD_HEADER_SIZEOF &&
15370ddcb39cSAlex Williamson             pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF));
15380ddcb39cSAlex Williamson }
15390ddcb39cSAlex Williamson 
15400a0bda0aSZhenzhong Duan static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1541dfbee78dSAlex Williamson {
154244765508SZhao Liu     ERRP_GUARD();
1543dfbee78dSAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1544f6b30c19SAlex Williamson     int ret, pos;
1545f6b30c19SAlex Williamson     bool c8_conflict = false, d4_conflict = false;
1546f6b30c19SAlex Williamson     uint8_t tmp;
1547dfbee78dSAlex Williamson 
1548dfbee78dSAlex Williamson     if (vdev->nv_gpudirect_clique == 0xFF) {
15490a0bda0aSZhenzhong Duan         return true;
1550dfbee78dSAlex Williamson     }
1551dfbee78dSAlex Williamson 
1552dfbee78dSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1553dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
15540a0bda0aSZhenzhong Duan         return false;
1555dfbee78dSAlex Williamson     }
1556dfbee78dSAlex Williamson 
1557dfbee78dSAlex Williamson     if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1558dfbee78dSAlex Williamson         PCI_BASE_CLASS_DISPLAY) {
1559dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
15600a0bda0aSZhenzhong Duan         return false;
1561dfbee78dSAlex Williamson     }
1562dfbee78dSAlex Williamson 
1563f6b30c19SAlex Williamson     /*
1564f6b30c19SAlex Williamson      * Per the updated specification above, it's recommended to use offset
1565f6b30c19SAlex Williamson      * D4h for Turing and later GPU architectures due to a conflict of the
1566f6b30c19SAlex Williamson      * MSI-X capability at C8h.  We don't know how to determine the GPU
1567f6b30c19SAlex Williamson      * architecture, instead we walk the capability chain to mark conflicts
1568f6b30c19SAlex Williamson      * and choose one or error based on the result.
1569f6b30c19SAlex Williamson      *
1570f6b30c19SAlex Williamson      * NB. Cap list head in pdev->config is already cleared, read from device.
1571f6b30c19SAlex Williamson      */
1572f6b30c19SAlex Williamson     ret = pread(vdev->vbasedev.fd, &tmp, 1,
1573f6b30c19SAlex Williamson                 vdev->config_offset + PCI_CAPABILITY_LIST);
15740ddcb39cSAlex Williamson     if (ret != 1 || !is_valid_std_cap_offset(tmp)) {
1575f6b30c19SAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list");
15760a0bda0aSZhenzhong Duan         return false;
1577f6b30c19SAlex Williamson     }
1578f6b30c19SAlex Williamson 
1579f6b30c19SAlex Williamson     do {
1580f6b30c19SAlex Williamson         if (tmp == 0xC8) {
1581f6b30c19SAlex Williamson             c8_conflict = true;
1582f6b30c19SAlex Williamson         } else if (tmp == 0xD4) {
1583f6b30c19SAlex Williamson             d4_conflict = true;
1584f6b30c19SAlex Williamson         }
1585f6b30c19SAlex Williamson         tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT];
15860ddcb39cSAlex Williamson     } while (is_valid_std_cap_offset(tmp));
1587f6b30c19SAlex Williamson 
1588f6b30c19SAlex Williamson     if (!c8_conflict) {
1589f6b30c19SAlex Williamson         pos = 0xC8;
1590f6b30c19SAlex Williamson     } else if (!d4_conflict) {
1591f6b30c19SAlex Williamson         pos = 0xD4;
1592f6b30c19SAlex Williamson     } else {
1593f6b30c19SAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space");
15940a0bda0aSZhenzhong Duan         return false;
1595f6b30c19SAlex Williamson     }
1596f6b30c19SAlex Williamson 
1597dfbee78dSAlex Williamson     ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1598dfbee78dSAlex Williamson     if (ret < 0) {
1599dfbee78dSAlex Williamson         error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
16000a0bda0aSZhenzhong Duan         return false;
1601dfbee78dSAlex Williamson     }
1602dfbee78dSAlex Williamson 
1603dfbee78dSAlex Williamson     memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1604dfbee78dSAlex Williamson     pos += PCI_CAP_FLAGS;
1605dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 8);
1606dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1607dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, '2');
1608dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1609dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1610dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos, 0);
1611dfbee78dSAlex Williamson 
16120a0bda0aSZhenzhong Duan     return true;
1613dfbee78dSAlex Williamson }
1614dfbee78dSAlex Williamson 
1615ee7932b0SJon Derrick /*
1616ee7932b0SJon Derrick  * The VMD endpoint provides a real PCIe domain to the guest and the guest
1617ee7932b0SJon Derrick  * kernel performs enumeration of the VMD sub-device domain. Guest transactions
1618ee7932b0SJon Derrick  * to VMD sub-devices go through MMU translation from guest addresses to
1619ee7932b0SJon Derrick  * physical addresses. When MMIO goes to an endpoint after being translated to
1620ee7932b0SJon Derrick  * physical addresses, the bridge rejects the transaction because the window
1621ee7932b0SJon Derrick  * has been programmed with guest addresses.
1622ee7932b0SJon Derrick  *
1623ee7932b0SJon Derrick  * VMD can use the Host Physical Address in order to correctly program the
1624ee7932b0SJon Derrick  * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers
1625ee7932b0SJon Derrick  * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA
1626ee7932b0SJon Derrick  * shadow registers in a vendor-specific capability register for devices
1627ee7932b0SJon Derrick  * without native support. The position of 0xE8-0xFF is in the reserved range
1628ee7932b0SJon Derrick  * of the VMD device capability space following the Power Management
1629ee7932b0SJon Derrick  * Capability.
1630ee7932b0SJon Derrick  */
1631ee7932b0SJon Derrick #define VMD_SHADOW_CAP_VER 1
1632ee7932b0SJon Derrick #define VMD_SHADOW_CAP_LEN 24
16330a0bda0aSZhenzhong Duan static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
1634ee7932b0SJon Derrick {
163544765508SZhao Liu     ERRP_GUARD();
1636ee7932b0SJon Derrick     uint8_t membar_phys[16];
1637ee7932b0SJon Derrick     int ret, pos = 0xE8;
1638ee7932b0SJon Derrick 
1639ee7932b0SJon Derrick     if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) ||
1640ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) ||
1641ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) ||
1642ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) {
16430a0bda0aSZhenzhong Duan         return true;
1644ee7932b0SJon Derrick     }
1645ee7932b0SJon Derrick 
1646ee7932b0SJon Derrick     ret = pread(vdev->vbasedev.fd, membar_phys, 16,
1647ee7932b0SJon Derrick                 vdev->config_offset + PCI_BASE_ADDRESS_2);
1648ee7932b0SJon Derrick     if (ret != 16) {
1649ee7932b0SJon Derrick         error_report("VMD %s cannot read MEMBARs (%d)",
1650ee7932b0SJon Derrick                      vdev->vbasedev.name, ret);
16510a0bda0aSZhenzhong Duan         return false;
1652ee7932b0SJon Derrick     }
1653ee7932b0SJon Derrick 
1654ee7932b0SJon Derrick     ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
1655ee7932b0SJon Derrick                              VMD_SHADOW_CAP_LEN, errp);
1656ee7932b0SJon Derrick     if (ret < 0) {
1657ee7932b0SJon Derrick         error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
16580a0bda0aSZhenzhong Duan         return false;
1659ee7932b0SJon Derrick     }
1660ee7932b0SJon Derrick 
1661ee7932b0SJon Derrick     memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
1662ee7932b0SJon Derrick     pos += PCI_CAP_FLAGS;
1663ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
1664ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
1665ee7932b0SJon Derrick     pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
1666ee7932b0SJon Derrick     memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
1667ee7932b0SJon Derrick 
16680a0bda0aSZhenzhong Duan     return true;
1669ee7932b0SJon Derrick }
1670ee7932b0SJon Derrick 
16710a0bda0aSZhenzhong Duan bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1672ee7932b0SJon Derrick {
16730a0bda0aSZhenzhong Duan     if (!vfio_add_nv_gpudirect_cap(vdev, errp)) {
16740a0bda0aSZhenzhong Duan         return false;
1675ee7932b0SJon Derrick     }
1676ee7932b0SJon Derrick 
16770a0bda0aSZhenzhong Duan     if (!vfio_add_vmd_shadow_cap(vdev, errp)) {
16780a0bda0aSZhenzhong Duan         return false;
1679ee7932b0SJon Derrick     }
1680ee7932b0SJon Derrick 
16810a0bda0aSZhenzhong Duan     return true;
1682ee7932b0SJon Derrick }
1683