xref: /qemu/hw/vfio/pci-quirks.c (revision 24202d2b561c3b4c48bd28383c8c34b4ac66c2bf)
1c00d61d8SAlex Williamson /*
2c00d61d8SAlex Williamson  * device quirks for PCI devices
3c00d61d8SAlex Williamson  *
4c00d61d8SAlex Williamson  * Copyright Red Hat, Inc. 2012-2015
5c00d61d8SAlex Williamson  *
6c00d61d8SAlex Williamson  * Authors:
7c00d61d8SAlex Williamson  *  Alex Williamson <alex.williamson@redhat.com>
8c00d61d8SAlex Williamson  *
9c00d61d8SAlex Williamson  * This work is licensed under the terms of the GNU GPL, version 2.  See
10c00d61d8SAlex Williamson  * the COPYING file in the top-level directory.
11c00d61d8SAlex Williamson  */
12c00d61d8SAlex Williamson 
13c6eacb1aSPeter Maydell #include "qemu/osdep.h"
142becc36aSPaolo Bonzini #include CONFIG_DEVICES
15475fbf0aSTony Nguyen #include "exec/memop.h"
16e0255bb1SPhilippe Mathieu-Daudé #include "qemu/units.h"
17*24202d2bSPrasad J Pandit #include "qemu/log.h"
18c4c45e94SAlex Williamson #include "qemu/error-report.h"
19c958c51dSAlex Williamson #include "qemu/main-loop.h"
200b8fa32fSMarkus Armbruster #include "qemu/module.h"
21c4c45e94SAlex Williamson #include "qemu/range.h"
22c4c45e94SAlex Williamson #include "qapi/error.h"
23dfbee78dSAlex Williamson #include "qapi/visitor.h"
242b1dbd0dSAlex Williamson #include <sys/ioctl.h>
25650d103dSMarkus Armbruster #include "hw/hw.h"
26c4c45e94SAlex Williamson #include "hw/nvram/fw_cfg.h"
27a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
28c00d61d8SAlex Williamson #include "pci.h"
29c00d61d8SAlex Williamson #include "trace.h"
30c00d61d8SAlex Williamson 
31c00d61d8SAlex Williamson /*
32c00d61d8SAlex Williamson  * List of device ids/vendor ids for which to disable
33c00d61d8SAlex Williamson  * option rom loading. This avoids the guest hangs during rom
34c00d61d8SAlex Williamson  * execution as noticed with the BCM 57810 card for lack of a
35c00d61d8SAlex Williamson  * more better way to handle such issues.
36c00d61d8SAlex Williamson  * The  user can still override by specifying a romfile or
37c00d61d8SAlex Williamson  * rombar=1.
38c00d61d8SAlex Williamson  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
39c00d61d8SAlex Williamson  * for an analysis of the 57810 card hang. When adding
40c00d61d8SAlex Williamson  * a new vendor id/device id combination below, please also add
41c00d61d8SAlex Williamson  * your card/environment details and information that could
42c00d61d8SAlex Williamson  * help in debugging to the bug tracking this issue
43c00d61d8SAlex Williamson  */
44056dfcb6SAlex Williamson static const struct {
45056dfcb6SAlex Williamson     uint32_t vendor;
46056dfcb6SAlex Williamson     uint32_t device;
47056dfcb6SAlex Williamson } romblacklist[] = {
48056dfcb6SAlex Williamson     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
49c00d61d8SAlex Williamson };
50c00d61d8SAlex Williamson 
51c00d61d8SAlex Williamson bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
52c00d61d8SAlex Williamson {
53056dfcb6SAlex Williamson     int i;
54c00d61d8SAlex Williamson 
55056dfcb6SAlex Williamson     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
56056dfcb6SAlex Williamson         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
57056dfcb6SAlex Williamson             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
58056dfcb6SAlex Williamson                                              romblacklist[i].vendor,
59056dfcb6SAlex Williamson                                              romblacklist[i].device);
60c00d61d8SAlex Williamson             return true;
61c00d61d8SAlex Williamson         }
62c00d61d8SAlex Williamson     }
63c00d61d8SAlex Williamson     return false;
64c00d61d8SAlex Williamson }
65c00d61d8SAlex Williamson 
66c00d61d8SAlex Williamson /*
670e54f24aSAlex Williamson  * Device specific region quirks (mostly backdoors to PCI config space)
68c00d61d8SAlex Williamson  */
69c00d61d8SAlex Williamson 
700e54f24aSAlex Williamson /*
710e54f24aSAlex Williamson  * The generic window quirks operate on an address and data register,
720e54f24aSAlex Williamson  * vfio_generic_window_address_quirk handles the address register and
730e54f24aSAlex Williamson  * vfio_generic_window_data_quirk handles the data register.  These ops
740e54f24aSAlex Williamson  * pass reads and writes through to hardware until a value matching the
750e54f24aSAlex Williamson  * stored address match/mask is written.  When this occurs, the data
760e54f24aSAlex Williamson  * register access emulated PCI config space for the device rather than
770e54f24aSAlex Williamson  * passing through accesses.  This enables devices where PCI config space
780e54f24aSAlex Williamson  * is accessible behind a window register to maintain the virtualization
790e54f24aSAlex Williamson  * provided through vfio.
800e54f24aSAlex Williamson  */
810e54f24aSAlex Williamson typedef struct VFIOConfigWindowMatch {
820e54f24aSAlex Williamson     uint32_t match;
830e54f24aSAlex Williamson     uint32_t mask;
840e54f24aSAlex Williamson } VFIOConfigWindowMatch;
850e54f24aSAlex Williamson 
860e54f24aSAlex Williamson typedef struct VFIOConfigWindowQuirk {
870e54f24aSAlex Williamson     struct VFIOPCIDevice *vdev;
880e54f24aSAlex Williamson 
890e54f24aSAlex Williamson     uint32_t address_val;
900e54f24aSAlex Williamson 
910e54f24aSAlex Williamson     uint32_t address_offset;
920e54f24aSAlex Williamson     uint32_t data_offset;
930e54f24aSAlex Williamson 
940e54f24aSAlex Williamson     bool window_enabled;
950e54f24aSAlex Williamson     uint8_t bar;
960e54f24aSAlex Williamson 
970e54f24aSAlex Williamson     MemoryRegion *addr_mem;
980e54f24aSAlex Williamson     MemoryRegion *data_mem;
990e54f24aSAlex Williamson 
1000e54f24aSAlex Williamson     uint32_t nr_matches;
1010e54f24aSAlex Williamson     VFIOConfigWindowMatch matches[];
1020e54f24aSAlex Williamson } VFIOConfigWindowQuirk;
1030e54f24aSAlex Williamson 
1040e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
1050e54f24aSAlex Williamson                                                        hwaddr addr,
1060e54f24aSAlex Williamson                                                        unsigned size)
1070e54f24aSAlex Williamson {
1080e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1090e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1100e54f24aSAlex Williamson 
1110e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[window->bar].region,
1120e54f24aSAlex Williamson                             addr + window->address_offset, size);
1130e54f24aSAlex Williamson }
1140e54f24aSAlex Williamson 
1150e54f24aSAlex Williamson static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
1160e54f24aSAlex Williamson                                                     uint64_t data,
1170e54f24aSAlex Williamson                                                     unsigned size)
1180e54f24aSAlex Williamson {
1190e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1200e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1210e54f24aSAlex Williamson     int i;
1220e54f24aSAlex Williamson 
1230e54f24aSAlex Williamson     window->window_enabled = false;
1240e54f24aSAlex Williamson 
1250e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1260e54f24aSAlex Williamson                       addr + window->address_offset, data, size);
1270e54f24aSAlex Williamson 
1280e54f24aSAlex Williamson     for (i = 0; i < window->nr_matches; i++) {
1290e54f24aSAlex Williamson         if ((data & ~window->matches[i].mask) == window->matches[i].match) {
1300e54f24aSAlex Williamson             window->window_enabled = true;
1310e54f24aSAlex Williamson             window->address_val = data & window->matches[i].mask;
1320e54f24aSAlex Williamson             trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
1330e54f24aSAlex Williamson                                     memory_region_name(window->addr_mem), data);
1340e54f24aSAlex Williamson             break;
1350e54f24aSAlex Williamson         }
1360e54f24aSAlex Williamson     }
1370e54f24aSAlex Williamson }
1380e54f24aSAlex Williamson 
1390e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_address_quirk = {
1400e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_address_read,
1410e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_address_write,
1420e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1430e54f24aSAlex Williamson };
1440e54f24aSAlex Williamson 
1450e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
1460e54f24aSAlex Williamson                                                     hwaddr addr, unsigned size)
1470e54f24aSAlex Williamson {
1480e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1490e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1500e54f24aSAlex Williamson     uint64_t data;
1510e54f24aSAlex Williamson 
1520e54f24aSAlex Williamson     /* Always read data reg, discard if window enabled */
1530e54f24aSAlex Williamson     data = vfio_region_read(&vdev->bars[window->bar].region,
1540e54f24aSAlex Williamson                             addr + window->data_offset, size);
1550e54f24aSAlex Williamson 
1560e54f24aSAlex Williamson     if (window->window_enabled) {
1570e54f24aSAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
1580e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
1590e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1600e54f24aSAlex Williamson     }
1610e54f24aSAlex Williamson 
1620e54f24aSAlex Williamson     return data;
1630e54f24aSAlex Williamson }
1640e54f24aSAlex Williamson 
1650e54f24aSAlex Williamson static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
1660e54f24aSAlex Williamson                                                  uint64_t data, unsigned size)
1670e54f24aSAlex Williamson {
1680e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1690e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1700e54f24aSAlex Williamson 
1710e54f24aSAlex Williamson     if (window->window_enabled) {
1720e54f24aSAlex Williamson         vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
1730e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
1740e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1750e54f24aSAlex Williamson         return;
1760e54f24aSAlex Williamson     }
1770e54f24aSAlex Williamson 
1780e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1790e54f24aSAlex Williamson                       addr + window->data_offset, data, size);
1800e54f24aSAlex Williamson }
1810e54f24aSAlex Williamson 
1820e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_data_quirk = {
1830e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_data_read,
1840e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_data_write,
1850e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1860e54f24aSAlex Williamson };
1870e54f24aSAlex Williamson 
1880d38fb1cSAlex Williamson /*
1890d38fb1cSAlex Williamson  * The generic mirror quirk handles devices which expose PCI config space
1900d38fb1cSAlex Williamson  * through a region within a BAR.  When enabled, reads and writes are
1910d38fb1cSAlex Williamson  * redirected through to emulated PCI config space.  XXX if PCI config space
1920d38fb1cSAlex Williamson  * used memory regions, this could just be an alias.
1930d38fb1cSAlex Williamson  */
1940d38fb1cSAlex Williamson typedef struct VFIOConfigMirrorQuirk {
1950d38fb1cSAlex Williamson     struct VFIOPCIDevice *vdev;
1960d38fb1cSAlex Williamson     uint32_t offset;
1970d38fb1cSAlex Williamson     uint8_t bar;
1980d38fb1cSAlex Williamson     MemoryRegion *mem;
199c958c51dSAlex Williamson     uint8_t data[];
2000d38fb1cSAlex Williamson } VFIOConfigMirrorQuirk;
2010d38fb1cSAlex Williamson 
2020d38fb1cSAlex Williamson static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
2030d38fb1cSAlex Williamson                                                hwaddr addr, unsigned size)
2040d38fb1cSAlex Williamson {
2050d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2060d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2070d38fb1cSAlex Williamson     uint64_t data;
2080d38fb1cSAlex Williamson 
2090d38fb1cSAlex Williamson     /* Read and discard in case the hardware cares */
2100d38fb1cSAlex Williamson     (void)vfio_region_read(&vdev->bars[mirror->bar].region,
2110d38fb1cSAlex Williamson                            addr + mirror->offset, size);
2120d38fb1cSAlex Williamson 
2130d38fb1cSAlex Williamson     data = vfio_pci_read_config(&vdev->pdev, addr, size);
2140d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
2150d38fb1cSAlex Williamson                                          memory_region_name(mirror->mem),
2160d38fb1cSAlex Williamson                                          addr, data);
2170d38fb1cSAlex Williamson     return data;
2180d38fb1cSAlex Williamson }
2190d38fb1cSAlex Williamson 
2200d38fb1cSAlex Williamson static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
2210d38fb1cSAlex Williamson                                             uint64_t data, unsigned size)
2220d38fb1cSAlex Williamson {
2230d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2240d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2250d38fb1cSAlex Williamson 
2260d38fb1cSAlex Williamson     vfio_pci_write_config(&vdev->pdev, addr, data, size);
2270d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
2280d38fb1cSAlex Williamson                                           memory_region_name(mirror->mem),
2290d38fb1cSAlex Williamson                                           addr, data);
2300d38fb1cSAlex Williamson }
2310d38fb1cSAlex Williamson 
2320d38fb1cSAlex Williamson static const MemoryRegionOps vfio_generic_mirror_quirk = {
2330d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
2340d38fb1cSAlex Williamson     .write = vfio_generic_quirk_mirror_write,
2350d38fb1cSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
2360d38fb1cSAlex Williamson };
2370d38fb1cSAlex Williamson 
238c00d61d8SAlex Williamson /* Is range1 fully contained within range2?  */
239c00d61d8SAlex Williamson static bool vfio_range_contained(uint64_t first1, uint64_t len1,
240c00d61d8SAlex Williamson                                  uint64_t first2, uint64_t len2) {
241c00d61d8SAlex Williamson     return (first1 >= first2 && first1 + len1 <= first2 + len2);
242c00d61d8SAlex Williamson }
243c00d61d8SAlex Williamson 
244c00d61d8SAlex Williamson #define PCI_VENDOR_ID_ATI               0x1002
245c00d61d8SAlex Williamson 
246c00d61d8SAlex Williamson /*
247c00d61d8SAlex Williamson  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
248c00d61d8SAlex Williamson  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
249c00d61d8SAlex Williamson  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
250c00d61d8SAlex Williamson  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
251c00d61d8SAlex Williamson  * I/O port BAR address.  Originally this was coded to return the virtual BAR
252c00d61d8SAlex Williamson  * address only if the physical register read returns the actual BAR address,
253c00d61d8SAlex Williamson  * but users have reported greater success if we return the virtual address
254c00d61d8SAlex Williamson  * unconditionally.
255c00d61d8SAlex Williamson  */
256c00d61d8SAlex Williamson static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
257c00d61d8SAlex Williamson                                         hwaddr addr, unsigned size)
258c00d61d8SAlex Williamson {
259b946d286SAlex Williamson     VFIOPCIDevice *vdev = opaque;
260c00d61d8SAlex Williamson     uint64_t data = vfio_pci_read_config(&vdev->pdev,
261b946d286SAlex Williamson                                          PCI_BASE_ADDRESS_4 + 1, size);
262b946d286SAlex Williamson 
263b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
264c00d61d8SAlex Williamson 
265c00d61d8SAlex Williamson     return data;
266c00d61d8SAlex Williamson }
267c00d61d8SAlex Williamson 
268*24202d2bSPrasad J Pandit static void vfio_ati_3c3_quirk_write(void *opaque, hwaddr addr,
269*24202d2bSPrasad J Pandit                                         uint64_t data, unsigned size)
270*24202d2bSPrasad J Pandit {
271*24202d2bSPrasad J Pandit     qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
272*24202d2bSPrasad J Pandit }
273*24202d2bSPrasad J Pandit 
274c00d61d8SAlex Williamson static const MemoryRegionOps vfio_ati_3c3_quirk = {
275c00d61d8SAlex Williamson     .read = vfio_ati_3c3_quirk_read,
276*24202d2bSPrasad J Pandit     .write = vfio_ati_3c3_quirk_write,
277c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
278c00d61d8SAlex Williamson };
279c00d61d8SAlex Williamson 
28029d62771SThomas Huth VFIOQuirk *vfio_quirk_alloc(int nr_mem)
281bcf3c3d0SAlex Williamson {
282bcf3c3d0SAlex Williamson     VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
283c958c51dSAlex Williamson     QLIST_INIT(&quirk->ioeventfds);
284bcf3c3d0SAlex Williamson     quirk->mem = g_new0(MemoryRegion, nr_mem);
285bcf3c3d0SAlex Williamson     quirk->nr_mem = nr_mem;
286bcf3c3d0SAlex Williamson 
287bcf3c3d0SAlex Williamson     return quirk;
288bcf3c3d0SAlex Williamson }
289bcf3c3d0SAlex Williamson 
2902b1dbd0dSAlex Williamson static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
291c958c51dSAlex Williamson {
292c958c51dSAlex Williamson     QLIST_REMOVE(ioeventfd, next);
293c958c51dSAlex Williamson     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
294c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
2952b1dbd0dSAlex Williamson 
2962b1dbd0dSAlex Williamson     if (ioeventfd->vfio) {
2972b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
2982b1dbd0dSAlex Williamson 
2992b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
3002b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3012b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3022b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3032b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3042b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = -1;
3052b1dbd0dSAlex Williamson 
3062b1dbd0dSAlex Williamson         if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
3072b1dbd0dSAlex Williamson             error_report("Failed to remove vfio ioeventfd for %s+0x%"
3082b1dbd0dSAlex Williamson                          HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
3092b1dbd0dSAlex Williamson                          memory_region_name(ioeventfd->mr), ioeventfd->addr,
3102b1dbd0dSAlex Williamson                          ioeventfd->size, ioeventfd->data);
3112b1dbd0dSAlex Williamson         }
3122b1dbd0dSAlex Williamson     } else {
3132b1dbd0dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
3142b1dbd0dSAlex Williamson                             NULL, NULL, NULL);
3152b1dbd0dSAlex Williamson     }
3162b1dbd0dSAlex Williamson 
317c958c51dSAlex Williamson     event_notifier_cleanup(&ioeventfd->e);
318c958c51dSAlex Williamson     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
319c958c51dSAlex Williamson                               (uint64_t)ioeventfd->addr, ioeventfd->size,
320c958c51dSAlex Williamson                               ioeventfd->data);
321c958c51dSAlex Williamson     g_free(ioeventfd);
322c958c51dSAlex Williamson }
323c958c51dSAlex Williamson 
324c958c51dSAlex Williamson static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
325c958c51dSAlex Williamson {
326c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd, *tmp;
327c958c51dSAlex Williamson 
328c958c51dSAlex Williamson     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
329c958c51dSAlex Williamson         if (ioeventfd->dynamic) {
3302b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, ioeventfd);
331c958c51dSAlex Williamson         }
332c958c51dSAlex Williamson     }
333c958c51dSAlex Williamson }
334c958c51dSAlex Williamson 
335c958c51dSAlex Williamson static void vfio_ioeventfd_handler(void *opaque)
336c958c51dSAlex Williamson {
337c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd = opaque;
338c958c51dSAlex Williamson 
339c958c51dSAlex Williamson     if (event_notifier_test_and_clear(&ioeventfd->e)) {
340c958c51dSAlex Williamson         vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
341c958c51dSAlex Williamson                           ioeventfd->data, ioeventfd->size);
342c958c51dSAlex Williamson         trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
343c958c51dSAlex Williamson                                      (uint64_t)ioeventfd->addr, ioeventfd->size,
344c958c51dSAlex Williamson                                      ioeventfd->data);
345c958c51dSAlex Williamson     }
346c958c51dSAlex Williamson }
347c958c51dSAlex Williamson 
348c958c51dSAlex Williamson static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
349c958c51dSAlex Williamson                                           MemoryRegion *mr, hwaddr addr,
350c958c51dSAlex Williamson                                           unsigned size, uint64_t data,
351c958c51dSAlex Williamson                                           VFIORegion *region,
352c958c51dSAlex Williamson                                           hwaddr region_addr, bool dynamic)
353c958c51dSAlex Williamson {
354c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd;
355c958c51dSAlex Williamson 
356c958c51dSAlex Williamson     if (vdev->no_kvm_ioeventfd) {
357c958c51dSAlex Williamson         return NULL;
358c958c51dSAlex Williamson     }
359c958c51dSAlex Williamson 
360c958c51dSAlex Williamson     ioeventfd = g_malloc0(sizeof(*ioeventfd));
361c958c51dSAlex Williamson 
362c958c51dSAlex Williamson     if (event_notifier_init(&ioeventfd->e, 0)) {
363c958c51dSAlex Williamson         g_free(ioeventfd);
364c958c51dSAlex Williamson         return NULL;
365c958c51dSAlex Williamson     }
366c958c51dSAlex Williamson 
367c958c51dSAlex Williamson     /*
368c958c51dSAlex Williamson      * MemoryRegion and relative offset, plus additional ioeventfd setup
369c958c51dSAlex Williamson      * parameters for configuring and later tearing down KVM ioeventfd.
370c958c51dSAlex Williamson      */
371c958c51dSAlex Williamson     ioeventfd->mr = mr;
372c958c51dSAlex Williamson     ioeventfd->addr = addr;
373c958c51dSAlex Williamson     ioeventfd->size = size;
374c958c51dSAlex Williamson     ioeventfd->data = data;
375c958c51dSAlex Williamson     ioeventfd->dynamic = dynamic;
376c958c51dSAlex Williamson     /*
377c958c51dSAlex Williamson      * VFIORegion and relative offset for implementing the userspace
378c958c51dSAlex Williamson      * handler.  data & size fields shared for both uses.
379c958c51dSAlex Williamson      */
380c958c51dSAlex Williamson     ioeventfd->region = region;
381c958c51dSAlex Williamson     ioeventfd->region_addr = region_addr;
382c958c51dSAlex Williamson 
3832b1dbd0dSAlex Williamson     if (!vdev->no_vfio_ioeventfd) {
3842b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
3852b1dbd0dSAlex Williamson 
3862b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
3872b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3882b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3892b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3902b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3912b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
3922b1dbd0dSAlex Williamson 
3932b1dbd0dSAlex Williamson         ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
3942b1dbd0dSAlex Williamson                                  VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
3952b1dbd0dSAlex Williamson     }
3962b1dbd0dSAlex Williamson 
3972b1dbd0dSAlex Williamson     if (!ioeventfd->vfio) {
398c958c51dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
399c958c51dSAlex Williamson                             vfio_ioeventfd_handler, NULL, ioeventfd);
4002b1dbd0dSAlex Williamson     }
4012b1dbd0dSAlex Williamson 
402c958c51dSAlex Williamson     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
403c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
404c958c51dSAlex Williamson     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
4052b1dbd0dSAlex Williamson                               size, data, ioeventfd->vfio);
406c958c51dSAlex Williamson 
407c958c51dSAlex Williamson     return ioeventfd;
408c958c51dSAlex Williamson }
409c958c51dSAlex Williamson 
410c00d61d8SAlex Williamson static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
411c00d61d8SAlex Williamson {
412c00d61d8SAlex Williamson     VFIOQuirk *quirk;
413c00d61d8SAlex Williamson 
414c00d61d8SAlex Williamson     /*
415c00d61d8SAlex Williamson      * As long as the BAR is >= 256 bytes it will be aligned such that the
416c00d61d8SAlex Williamson      * lower byte is always zero.  Filter out anything else, if it exists.
417c00d61d8SAlex Williamson      */
418b946d286SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
419b946d286SAlex Williamson         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
420c00d61d8SAlex Williamson         return;
421c00d61d8SAlex Williamson     }
422c00d61d8SAlex Williamson 
423bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
424c00d61d8SAlex Williamson 
425b946d286SAlex Williamson     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
426c00d61d8SAlex Williamson                           "vfio-ati-3c3-quirk", 1);
4272d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
4288c4f2348SAlex Williamson                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
429c00d61d8SAlex Williamson 
4302d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
431c00d61d8SAlex Williamson                       quirk, next);
432c00d61d8SAlex Williamson 
433b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
434c00d61d8SAlex Williamson }
435c00d61d8SAlex Williamson 
436c00d61d8SAlex Williamson /*
4370e54f24aSAlex Williamson  * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
438c00d61d8SAlex Williamson  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
439c00d61d8SAlex Williamson  * the MMIO space directly, but a window to this space is provided through
440c00d61d8SAlex Williamson  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
441c00d61d8SAlex Williamson  * data register.  When the address is programmed to a range of 0x4000-0x4fff
442c00d61d8SAlex Williamson  * PCI configuration space is available.  Experimentation seems to indicate
4430e54f24aSAlex Williamson  * that read-only may be provided by hardware.
444c00d61d8SAlex Williamson  */
4450e54f24aSAlex Williamson static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
446c00d61d8SAlex Williamson {
447c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4480e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
449c00d61d8SAlex Williamson 
4500e54f24aSAlex Williamson     /* This windows doesn't seem to be used except by legacy VGA code */
4510e54f24aSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4524d3fc4fdSAlex Williamson         !vdev->vga || nr != 4) {
453c00d61d8SAlex Williamson         return;
454c00d61d8SAlex Williamson     }
455c00d61d8SAlex Williamson 
456bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
4570e54f24aSAlex Williamson     window = quirk->data = g_malloc0(sizeof(*window) +
4580e54f24aSAlex Williamson                                      sizeof(VFIOConfigWindowMatch));
4590e54f24aSAlex Williamson     window->vdev = vdev;
4600e54f24aSAlex Williamson     window->address_offset = 0;
4610e54f24aSAlex Williamson     window->data_offset = 4;
4620e54f24aSAlex Williamson     window->nr_matches = 1;
4630e54f24aSAlex Williamson     window->matches[0].match = 0x4000;
464f5793fd9SAlex Williamson     window->matches[0].mask = vdev->config_size - 1;
4650e54f24aSAlex Williamson     window->bar = nr;
4660e54f24aSAlex Williamson     window->addr_mem = &quirk->mem[0];
4670e54f24aSAlex Williamson     window->data_mem = &quirk->mem[1];
468c00d61d8SAlex Williamson 
4690e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
4700e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
4710e54f24aSAlex Williamson                           "vfio-ati-bar4-window-address-quirk", 4);
472db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4730e54f24aSAlex Williamson                                         window->address_offset,
4740e54f24aSAlex Williamson                                         window->addr_mem, 1);
4750e54f24aSAlex Williamson 
4760e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
4770e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
4780e54f24aSAlex Williamson                           "vfio-ati-bar4-window-data-quirk", 4);
479db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4800e54f24aSAlex Williamson                                         window->data_offset,
4810e54f24aSAlex Williamson                                         window->data_mem, 1);
482c00d61d8SAlex Williamson 
483c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
484c00d61d8SAlex Williamson 
4850e54f24aSAlex Williamson     trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
486c00d61d8SAlex Williamson }
487c00d61d8SAlex Williamson 
488c00d61d8SAlex Williamson /*
4890d38fb1cSAlex Williamson  * Trap the BAR2 MMIO mirror to config space as well.
490c00d61d8SAlex Williamson  */
4910d38fb1cSAlex Williamson static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
492c00d61d8SAlex Williamson {
493c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4940d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
495c00d61d8SAlex Williamson 
496c00d61d8SAlex Williamson     /* Only enable on newer devices where BAR2 is 64bit */
4970d38fb1cSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4984d3fc4fdSAlex Williamson         !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
499c00d61d8SAlex Williamson         return;
500c00d61d8SAlex Williamson     }
501c00d61d8SAlex Williamson 
502bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
5030d38fb1cSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror));
504bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
5050d38fb1cSAlex Williamson     mirror->vdev = vdev;
5060d38fb1cSAlex Williamson     mirror->offset = 0x4000;
5070d38fb1cSAlex Williamson     mirror->bar = nr;
508c00d61d8SAlex Williamson 
5090d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
5100d38fb1cSAlex Williamson                           &vfio_generic_mirror_quirk, mirror,
5110d38fb1cSAlex Williamson                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
512db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5130d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
514c00d61d8SAlex Williamson 
515c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
516c00d61d8SAlex Williamson 
5170d38fb1cSAlex Williamson     trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
518c00d61d8SAlex Williamson }
519c00d61d8SAlex Williamson 
520c00d61d8SAlex Williamson /*
521c00d61d8SAlex Williamson  * Older ATI/AMD cards like the X550 have a similar window to that above.
522c00d61d8SAlex Williamson  * I/O port BAR1 provides a window to a mirror of PCI config space located
523c00d61d8SAlex Williamson  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
524c00d61d8SAlex Williamson  * note it for future reference.
525c00d61d8SAlex Williamson  */
526c00d61d8SAlex Williamson 
527c00d61d8SAlex Williamson /*
528c00d61d8SAlex Williamson  * Nvidia has several different methods to get to config space, the
529c00d61d8SAlex Williamson  * nouveu project has several of these documented here:
530c00d61d8SAlex Williamson  * https://github.com/pathscale/envytools/tree/master/hwdocs
531c00d61d8SAlex Williamson  *
532c00d61d8SAlex Williamson  * The first quirk is actually not documented in envytools and is found
533c00d61d8SAlex Williamson  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
534c00d61d8SAlex Williamson  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
535c00d61d8SAlex Williamson  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
536c00d61d8SAlex Williamson  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
537c00d61d8SAlex Williamson  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
538c00d61d8SAlex Williamson  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
539c00d61d8SAlex Williamson  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
540c00d61d8SAlex Williamson  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
541c00d61d8SAlex Williamson  */
5426029a424SAlex Williamson typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
5436029a424SAlex Williamson static const char *nv3d0_states[] = { "NONE", "SELECT",
5446029a424SAlex Williamson                                       "WINDOW", "READ", "WRITE" };
5456029a424SAlex Williamson 
5466029a424SAlex Williamson typedef struct VFIONvidia3d0Quirk {
5476029a424SAlex Williamson     VFIOPCIDevice *vdev;
5486029a424SAlex Williamson     VFIONvidia3d0State state;
5496029a424SAlex Williamson     uint32_t offset;
5506029a424SAlex Williamson } VFIONvidia3d0Quirk;
5516029a424SAlex Williamson 
5526029a424SAlex Williamson static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
5536029a424SAlex Williamson                                            hwaddr addr, unsigned size)
5546029a424SAlex Williamson {
5556029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5566029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5576029a424SAlex Williamson 
5586029a424SAlex Williamson     quirk->state = NONE;
5596029a424SAlex Williamson 
5602d82f8a3SAlex Williamson     return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5616029a424SAlex Williamson                          addr + 0x14, size);
5626029a424SAlex Williamson }
5636029a424SAlex Williamson 
5646029a424SAlex Williamson static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
5656029a424SAlex Williamson                                         uint64_t data, unsigned size)
5666029a424SAlex Williamson {
5676029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5686029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5696029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
5706029a424SAlex Williamson 
5716029a424SAlex Williamson     quirk->state = NONE;
5726029a424SAlex Williamson 
5736029a424SAlex Williamson     switch (data) {
5746029a424SAlex Williamson     case 0x338:
5756029a424SAlex Williamson         if (old_state == NONE) {
5766029a424SAlex Williamson             quirk->state = SELECT;
5776029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5786029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5796029a424SAlex Williamson         }
5806029a424SAlex Williamson         break;
5816029a424SAlex Williamson     case 0x538:
5826029a424SAlex Williamson         if (old_state == WINDOW) {
5836029a424SAlex Williamson             quirk->state = READ;
5846029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5856029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5866029a424SAlex Williamson         }
5876029a424SAlex Williamson         break;
5886029a424SAlex Williamson     case 0x738:
5896029a424SAlex Williamson         if (old_state == WINDOW) {
5906029a424SAlex Williamson             quirk->state = WRITE;
5916029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5926029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5936029a424SAlex Williamson         }
5946029a424SAlex Williamson         break;
5956029a424SAlex Williamson     }
5966029a424SAlex Williamson 
5972d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5986029a424SAlex Williamson                    addr + 0x14, data, size);
5996029a424SAlex Williamson }
6006029a424SAlex Williamson 
6016029a424SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
6026029a424SAlex Williamson     .read = vfio_nvidia_3d4_quirk_read,
6036029a424SAlex Williamson     .write = vfio_nvidia_3d4_quirk_write,
6046029a424SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
605c00d61d8SAlex Williamson };
606c00d61d8SAlex Williamson 
607c00d61d8SAlex Williamson static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
608c00d61d8SAlex Williamson                                            hwaddr addr, unsigned size)
609c00d61d8SAlex Williamson {
6106029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
611c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6126029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
6132d82f8a3SAlex Williamson     uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6146029a424SAlex Williamson                                   addr + 0x10, size);
615c00d61d8SAlex Williamson 
6166029a424SAlex Williamson     quirk->state = NONE;
6176029a424SAlex Williamson 
6186029a424SAlex Williamson     if (old_state == READ &&
6196029a424SAlex Williamson         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6206029a424SAlex Williamson         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6216029a424SAlex Williamson 
6226029a424SAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, offset, size);
6236029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
6246029a424SAlex Williamson                                          offset, size, data);
625c00d61d8SAlex Williamson     }
626c00d61d8SAlex Williamson 
627c00d61d8SAlex Williamson     return data;
628c00d61d8SAlex Williamson }
629c00d61d8SAlex Williamson 
630c00d61d8SAlex Williamson static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
631c00d61d8SAlex Williamson                                         uint64_t data, unsigned size)
632c00d61d8SAlex Williamson {
6336029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
634c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6356029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
636c00d61d8SAlex Williamson 
6376029a424SAlex Williamson     quirk->state = NONE;
6386029a424SAlex Williamson 
6396029a424SAlex Williamson     if (old_state == SELECT) {
6406029a424SAlex Williamson         quirk->offset = (uint32_t)data;
6416029a424SAlex Williamson         quirk->state = WINDOW;
6426029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
6436029a424SAlex Williamson                                           nv3d0_states[quirk->state]);
6446029a424SAlex Williamson     } else if (old_state == WRITE) {
6456029a424SAlex Williamson         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6466029a424SAlex Williamson             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6476029a424SAlex Williamson 
6486029a424SAlex Williamson             vfio_pci_write_config(&vdev->pdev, offset, data, size);
6496029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
6506029a424SAlex Williamson                                               offset, data, size);
651c00d61d8SAlex Williamson             return;
652c00d61d8SAlex Williamson         }
653c00d61d8SAlex Williamson     }
654c00d61d8SAlex Williamson 
6552d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6566029a424SAlex Williamson                    addr + 0x10, data, size);
657c00d61d8SAlex Williamson }
658c00d61d8SAlex Williamson 
659c00d61d8SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
660c00d61d8SAlex Williamson     .read = vfio_nvidia_3d0_quirk_read,
661c00d61d8SAlex Williamson     .write = vfio_nvidia_3d0_quirk_write,
662c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
663c00d61d8SAlex Williamson };
664c00d61d8SAlex Williamson 
665c00d61d8SAlex Williamson static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
666c00d61d8SAlex Williamson {
667c00d61d8SAlex Williamson     VFIOQuirk *quirk;
6686029a424SAlex Williamson     VFIONvidia3d0Quirk *data;
669c00d61d8SAlex Williamson 
670db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
671db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
672c00d61d8SAlex Williamson         !vdev->bars[1].region.size) {
673c00d61d8SAlex Williamson         return;
674c00d61d8SAlex Williamson     }
675c00d61d8SAlex Williamson 
676bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
6776029a424SAlex Williamson     quirk->data = data = g_malloc0(sizeof(*data));
6786029a424SAlex Williamson     data->vdev = vdev;
679c00d61d8SAlex Williamson 
6806029a424SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
6816029a424SAlex Williamson                           data, "vfio-nvidia-3d4-quirk", 2);
6822d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6836029a424SAlex Williamson                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
6846029a424SAlex Williamson 
6856029a424SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
6866029a424SAlex Williamson                           data, "vfio-nvidia-3d0-quirk", 2);
6872d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6886029a424SAlex Williamson                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
689c00d61d8SAlex Williamson 
6902d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
691c00d61d8SAlex Williamson                       quirk, next);
692c00d61d8SAlex Williamson 
6936029a424SAlex Williamson     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
694c00d61d8SAlex Williamson }
695c00d61d8SAlex Williamson 
696c00d61d8SAlex Williamson /*
697c00d61d8SAlex Williamson  * The second quirk is documented in envytools.  The I/O port BAR5 is just
698c00d61d8SAlex Williamson  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
699c00d61d8SAlex Williamson  * again BAR0.  This backdoor is apparently a bit newer than the one above
700c00d61d8SAlex Williamson  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
701c00d61d8SAlex Williamson  * space, including extended space is available at the 4k @0x88000.
702c00d61d8SAlex Williamson  */
7030e54f24aSAlex Williamson typedef struct VFIONvidiaBAR5Quirk {
7040e54f24aSAlex Williamson     uint32_t master;
7050e54f24aSAlex Williamson     uint32_t enable;
7060e54f24aSAlex Williamson     MemoryRegion *addr_mem;
7070e54f24aSAlex Williamson     MemoryRegion *data_mem;
7080e54f24aSAlex Williamson     bool enabled;
7090e54f24aSAlex Williamson     VFIOConfigWindowQuirk window; /* last for match data */
7100e54f24aSAlex Williamson } VFIONvidiaBAR5Quirk;
711c00d61d8SAlex Williamson 
7120e54f24aSAlex Williamson static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
7130e54f24aSAlex Williamson {
7140e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7150e54f24aSAlex Williamson 
7160e54f24aSAlex Williamson     if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
7170e54f24aSAlex Williamson         return;
7180e54f24aSAlex Williamson     }
7190e54f24aSAlex Williamson 
7200e54f24aSAlex Williamson     bar5->enabled = !bar5->enabled;
7210e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
7220e54f24aSAlex Williamson                                        bar5->enabled ?  "Enable" : "Disable");
7230e54f24aSAlex Williamson     memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
7240e54f24aSAlex Williamson     memory_region_set_enabled(bar5->data_mem, bar5->enabled);
7250e54f24aSAlex Williamson }
7260e54f24aSAlex Williamson 
7270e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
7280e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
7290e54f24aSAlex Williamson {
7300e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7310e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7320e54f24aSAlex Williamson 
7330e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr, size);
7340e54f24aSAlex Williamson }
7350e54f24aSAlex Williamson 
7360e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
737c00d61d8SAlex Williamson                                                 uint64_t data, unsigned size)
738c00d61d8SAlex Williamson {
7390e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7400e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
741c00d61d8SAlex Williamson 
7420e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr, data, size);
7430e54f24aSAlex Williamson 
7440e54f24aSAlex Williamson     bar5->master = data;
7450e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
746c00d61d8SAlex Williamson }
747c00d61d8SAlex Williamson 
7480e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
7490e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_master_read,
7500e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_master_write,
751c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
752c00d61d8SAlex Williamson };
753c00d61d8SAlex Williamson 
7540e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
7550e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
756c00d61d8SAlex Williamson {
7570e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7580e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
759c00d61d8SAlex Williamson 
7600e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
7610e54f24aSAlex Williamson }
7620e54f24aSAlex Williamson 
7630e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
7640e54f24aSAlex Williamson                                                 uint64_t data, unsigned size)
7650e54f24aSAlex Williamson {
7660e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7670e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7680e54f24aSAlex Williamson 
7690e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
7700e54f24aSAlex Williamson 
7710e54f24aSAlex Williamson     bar5->enable = data;
7720e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
7730e54f24aSAlex Williamson }
7740e54f24aSAlex Williamson 
7750e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
7760e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_enable_read,
7770e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_enable_write,
7780e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
7790e54f24aSAlex Williamson };
7800e54f24aSAlex Williamson 
7810e54f24aSAlex Williamson static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
7820e54f24aSAlex Williamson {
7830e54f24aSAlex Williamson     VFIOQuirk *quirk;
7840e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5;
7850e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
7860e54f24aSAlex Williamson 
787db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
788db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
7898f419c5bSAlex Williamson         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
790c00d61d8SAlex Williamson         return;
791c00d61d8SAlex Williamson     }
792c00d61d8SAlex Williamson 
793bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(4);
7940e54f24aSAlex Williamson     bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
7950e54f24aSAlex Williamson                                    (sizeof(VFIOConfigWindowMatch) * 2));
7960e54f24aSAlex Williamson     window = &bar5->window;
797c00d61d8SAlex Williamson 
7980e54f24aSAlex Williamson     window->vdev = vdev;
7990e54f24aSAlex Williamson     window->address_offset = 0x8;
8000e54f24aSAlex Williamson     window->data_offset = 0xc;
8010e54f24aSAlex Williamson     window->nr_matches = 2;
8020e54f24aSAlex Williamson     window->matches[0].match = 0x1800;
8030e54f24aSAlex Williamson     window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
8040e54f24aSAlex Williamson     window->matches[1].match = 0x88000;
805f5793fd9SAlex Williamson     window->matches[1].mask = vdev->config_size - 1;
8060e54f24aSAlex Williamson     window->bar = nr;
8070e54f24aSAlex Williamson     window->addr_mem = bar5->addr_mem = &quirk->mem[0];
8080e54f24aSAlex Williamson     window->data_mem = bar5->data_mem = &quirk->mem[1];
8090e54f24aSAlex Williamson 
8100e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
8110e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
8120e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-address-quirk", 4);
813db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8140e54f24aSAlex Williamson                                         window->address_offset,
8150e54f24aSAlex Williamson                                         window->addr_mem, 1);
8160e54f24aSAlex Williamson     memory_region_set_enabled(window->addr_mem, false);
8170e54f24aSAlex Williamson 
8180e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
8190e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
8200e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-data-quirk", 4);
821db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8220e54f24aSAlex Williamson                                         window->data_offset,
8230e54f24aSAlex Williamson                                         window->data_mem, 1);
8240e54f24aSAlex Williamson     memory_region_set_enabled(window->data_mem, false);
8250e54f24aSAlex Williamson 
8260e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
8270e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_master, bar5,
8280e54f24aSAlex Williamson                           "vfio-nvidia-bar5-master-quirk", 4);
829db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8300e54f24aSAlex Williamson                                         0, &quirk->mem[2], 1);
8310e54f24aSAlex Williamson 
8320e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
8330e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_enable, bar5,
8340e54f24aSAlex Williamson                           "vfio-nvidia-bar5-enable-quirk", 4);
835db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8360e54f24aSAlex Williamson                                         4, &quirk->mem[3], 1);
837c00d61d8SAlex Williamson 
838c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
839c00d61d8SAlex Williamson 
8400e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
841c00d61d8SAlex Williamson }
842c00d61d8SAlex Williamson 
843c958c51dSAlex Williamson typedef struct LastDataSet {
844c958c51dSAlex Williamson     VFIOQuirk *quirk;
845c958c51dSAlex Williamson     hwaddr addr;
846c958c51dSAlex Williamson     uint64_t data;
847c958c51dSAlex Williamson     unsigned size;
848c958c51dSAlex Williamson     int hits;
849c958c51dSAlex Williamson     int added;
850c958c51dSAlex Williamson } LastDataSet;
851c958c51dSAlex Williamson 
852c958c51dSAlex Williamson #define MAX_DYN_IOEVENTFD 10
853c958c51dSAlex Williamson #define HITS_FOR_IOEVENTFD 10
854c958c51dSAlex Williamson 
8550d38fb1cSAlex Williamson /*
8560d38fb1cSAlex Williamson  * Finally, BAR0 itself.  We want to redirect any accesses to either
8570d38fb1cSAlex Williamson  * 0x1800 or 0x88000 through the PCI config space access functions.
8580d38fb1cSAlex Williamson  */
8590d38fb1cSAlex Williamson static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
860c00d61d8SAlex Williamson                                            uint64_t data, unsigned size)
861c00d61d8SAlex Williamson {
8620d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
8630d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
864c00d61d8SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
865c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
866c00d61d8SAlex Williamson 
8670d38fb1cSAlex Williamson     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
868c00d61d8SAlex Williamson 
869c00d61d8SAlex Williamson     /*
870c00d61d8SAlex Williamson      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
871c00d61d8SAlex Williamson      * MSI capability ID register.  Both the ID and next register are
872c00d61d8SAlex Williamson      * read-only, so we allow writes covering either of those to real hw.
873c00d61d8SAlex Williamson      */
874c00d61d8SAlex Williamson     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
875c00d61d8SAlex Williamson         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
8760d38fb1cSAlex Williamson         vfio_region_write(&vdev->bars[mirror->bar].region,
8770d38fb1cSAlex Williamson                           addr + mirror->offset, data, size);
8780d38fb1cSAlex Williamson         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
879c00d61d8SAlex Williamson     }
880c958c51dSAlex Williamson 
881c958c51dSAlex Williamson     /*
882c958c51dSAlex Williamson      * Automatically add an ioeventfd to handle any repeated write with the
883c958c51dSAlex Williamson      * same data and size above the standard PCI config space header.  This is
884c958c51dSAlex Williamson      * primarily expected to accelerate the MSI-ACK behavior, such as noted
885c958c51dSAlex Williamson      * above.  Current hardware/drivers should trigger an ioeventfd at config
886c958c51dSAlex Williamson      * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
887c958c51dSAlex Williamson      *
888c958c51dSAlex Williamson      * The criteria of 10 successive hits is arbitrary but reliably adds the
889c958c51dSAlex Williamson      * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
890c958c51dSAlex Williamson      * the remaining ones have a greater chance of being seen successively.
891c958c51dSAlex Williamson      * To avoid the pathological case of burning up all of QEMU's open file
892c958c51dSAlex Williamson      * handles, arbitrarily limit this algorithm from adding no more than 10
893c958c51dSAlex Williamson      * ioeventfds, print an error if we would have added an 11th, and then
894c958c51dSAlex Williamson      * stop counting.
895c958c51dSAlex Williamson      */
896c958c51dSAlex Williamson     if (!vdev->no_kvm_ioeventfd &&
897c958c51dSAlex Williamson         addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
898c958c51dSAlex Williamson         if (addr != last->addr || data != last->data || size != last->size) {
899c958c51dSAlex Williamson             last->addr = addr;
900c958c51dSAlex Williamson             last->data = data;
901c958c51dSAlex Williamson             last->size = size;
902c958c51dSAlex Williamson             last->hits = 1;
903c958c51dSAlex Williamson         } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
904c958c51dSAlex Williamson             if (last->added < MAX_DYN_IOEVENTFD) {
905c958c51dSAlex Williamson                 VFIOIOEventFD *ioeventfd;
906c958c51dSAlex Williamson                 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
907c958c51dSAlex Williamson                                         data, &vdev->bars[mirror->bar].region,
908c958c51dSAlex Williamson                                         mirror->offset + addr, true);
909c958c51dSAlex Williamson                 if (ioeventfd) {
910c958c51dSAlex Williamson                     VFIOQuirk *quirk = last->quirk;
911c958c51dSAlex Williamson 
912c958c51dSAlex Williamson                     QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
913c958c51dSAlex Williamson                     last->added++;
914c958c51dSAlex Williamson                 }
915c958c51dSAlex Williamson             } else {
916c958c51dSAlex Williamson                 last->added++;
917c958c51dSAlex Williamson                 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
918c958c51dSAlex Williamson                             "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
919c958c51dSAlex Williamson                             "size %u", vdev->vbasedev.name, addr, data, size);
920c958c51dSAlex Williamson             }
921c958c51dSAlex Williamson         }
922c958c51dSAlex Williamson     }
923c00d61d8SAlex Williamson }
924c00d61d8SAlex Williamson 
9250d38fb1cSAlex Williamson static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
9260d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
9270d38fb1cSAlex Williamson     .write = vfio_nvidia_quirk_mirror_write,
928c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
929c00d61d8SAlex Williamson };
930c00d61d8SAlex Williamson 
931c958c51dSAlex Williamson static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
932c958c51dSAlex Williamson {
933c958c51dSAlex Williamson     VFIOConfigMirrorQuirk *mirror = quirk->data;
934c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
935c958c51dSAlex Williamson 
936c958c51dSAlex Williamson     last->addr = last->data = last->size = last->hits = last->added = 0;
937c958c51dSAlex Williamson 
938c958c51dSAlex Williamson     vfio_drop_dynamic_eventfds(vdev, quirk);
939c958c51dSAlex Williamson }
940c958c51dSAlex Williamson 
9410d38fb1cSAlex Williamson static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
942c00d61d8SAlex Williamson {
943c00d61d8SAlex Williamson     VFIOQuirk *quirk;
9440d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
945c958c51dSAlex Williamson     LastDataSet *last;
946c00d61d8SAlex Williamson 
947db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
948db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
9490d38fb1cSAlex Williamson         !vfio_is_vga(vdev) || nr != 0) {
950c00d61d8SAlex Williamson         return;
951c00d61d8SAlex Williamson     }
952c00d61d8SAlex Williamson 
953bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
954c958c51dSAlex Williamson     quirk->reset = vfio_nvidia_bar0_quirk_reset;
955c958c51dSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
956bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
9570d38fb1cSAlex Williamson     mirror->vdev = vdev;
9580d38fb1cSAlex Williamson     mirror->offset = 0x88000;
9590d38fb1cSAlex Williamson     mirror->bar = nr;
960c958c51dSAlex Williamson     last = (LastDataSet *)&mirror->data;
961c958c51dSAlex Williamson     last->quirk = quirk;
962c00d61d8SAlex Williamson 
9630d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
9640d38fb1cSAlex Williamson                           &vfio_nvidia_mirror_quirk, mirror,
9650d38fb1cSAlex Williamson                           "vfio-nvidia-bar0-88000-mirror-quirk",
966f5793fd9SAlex Williamson                           vdev->config_size);
967db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9680d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
969c00d61d8SAlex Williamson 
970c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
971c00d61d8SAlex Williamson 
9720d38fb1cSAlex Williamson     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
9734d3fc4fdSAlex Williamson     if (vdev->vga) {
974bcf3c3d0SAlex Williamson         quirk = vfio_quirk_alloc(1);
975c958c51dSAlex Williamson         quirk->reset = vfio_nvidia_bar0_quirk_reset;
976c958c51dSAlex Williamson         mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
977bcf3c3d0SAlex Williamson         mirror->mem = quirk->mem;
9780d38fb1cSAlex Williamson         mirror->vdev = vdev;
9790d38fb1cSAlex Williamson         mirror->offset = 0x1800;
9800d38fb1cSAlex Williamson         mirror->bar = nr;
981c958c51dSAlex Williamson         last = (LastDataSet *)&mirror->data;
982c958c51dSAlex Williamson         last->quirk = quirk;
983c00d61d8SAlex Williamson 
9840d38fb1cSAlex Williamson         memory_region_init_io(mirror->mem, OBJECT(vdev),
9850d38fb1cSAlex Williamson                               &vfio_nvidia_mirror_quirk, mirror,
9860d38fb1cSAlex Williamson                               "vfio-nvidia-bar0-1800-mirror-quirk",
9870d38fb1cSAlex Williamson                               PCI_CONFIG_SPACE_SIZE);
988db0da029SAlex Williamson         memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9890d38fb1cSAlex Williamson                                             mirror->offset, mirror->mem, 1);
990c00d61d8SAlex Williamson 
991c00d61d8SAlex Williamson         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
9920d38fb1cSAlex Williamson     }
993c00d61d8SAlex Williamson 
9940d38fb1cSAlex Williamson     trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
995c00d61d8SAlex Williamson }
996c00d61d8SAlex Williamson 
997c00d61d8SAlex Williamson /*
998c00d61d8SAlex Williamson  * TODO - Some Nvidia devices provide config access to their companion HDA
999c00d61d8SAlex Williamson  * device and even to their parent bridge via these config space mirrors.
1000c00d61d8SAlex Williamson  * Add quirks for those regions.
1001c00d61d8SAlex Williamson  */
1002c00d61d8SAlex Williamson 
1003c00d61d8SAlex Williamson #define PCI_VENDOR_ID_REALTEK 0x10ec
1004c00d61d8SAlex Williamson 
1005c00d61d8SAlex Williamson /*
1006c00d61d8SAlex Williamson  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
1007c00d61d8SAlex Williamson  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
1008c00d61d8SAlex Williamson  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1009c00d61d8SAlex Williamson  * when the "type" portion of the address register is set to 0x1.  This appears
1010c00d61d8SAlex Williamson  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1011c00d61d8SAlex Williamson  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1012c00d61d8SAlex Williamson  * ignore because the MSI-X table should always be accessed as a dword (full
1013c00d61d8SAlex Williamson  * mask).  Bits 0:11 is offset within the type.
1014c00d61d8SAlex Williamson  *
1015c00d61d8SAlex Williamson  * Example trace:
1016c00d61d8SAlex Williamson  *
1017c00d61d8SAlex Williamson  * Read from MSI-X table offset 0
1018c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1019c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1020c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1021c00d61d8SAlex Williamson  *
1022c00d61d8SAlex Williamson  * Write 0xfee00000 to MSI-X table offset 0
1023c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1024c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1025c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1026c00d61d8SAlex Williamson  */
1027954258a5SAlex Williamson typedef struct VFIOrtl8168Quirk {
1028954258a5SAlex Williamson     VFIOPCIDevice *vdev;
1029954258a5SAlex Williamson     uint32_t addr;
1030954258a5SAlex Williamson     uint32_t data;
1031954258a5SAlex Williamson     bool enabled;
1032954258a5SAlex Williamson } VFIOrtl8168Quirk;
1033954258a5SAlex Williamson 
1034954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1035c00d61d8SAlex Williamson                                                 hwaddr addr, unsigned size)
1036c00d61d8SAlex Williamson {
1037954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1038954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1039954258a5SAlex Williamson     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1040c00d61d8SAlex Williamson 
1041954258a5SAlex Williamson     if (rtl->enabled) {
1042954258a5SAlex Williamson         data = rtl->addr ^ 0x80000000U; /* latch/complete */
1043954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1044c00d61d8SAlex Williamson     }
1045c00d61d8SAlex Williamson 
1046954258a5SAlex Williamson     return data;
1047c00d61d8SAlex Williamson }
1048c00d61d8SAlex Williamson 
1049954258a5SAlex Williamson static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1050c00d61d8SAlex Williamson                                              uint64_t data, unsigned size)
1051c00d61d8SAlex Williamson {
1052954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1053954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1054c00d61d8SAlex Williamson 
1055954258a5SAlex Williamson     rtl->enabled = false;
1056954258a5SAlex Williamson 
1057c00d61d8SAlex Williamson     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1058954258a5SAlex Williamson         rtl->enabled = true;
1059954258a5SAlex Williamson         rtl->addr = (uint32_t)data;
1060c00d61d8SAlex Williamson 
1061c00d61d8SAlex Williamson         if (data & 0x80000000U) { /* Do write */
1062c00d61d8SAlex Williamson             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1063c00d61d8SAlex Williamson                 hwaddr offset = data & 0xfff;
1064954258a5SAlex Williamson                 uint64_t val = rtl->data;
1065c00d61d8SAlex Williamson 
1066954258a5SAlex Williamson                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1067c00d61d8SAlex Williamson                                                     (uint16_t)offset, val);
1068c00d61d8SAlex Williamson 
1069c00d61d8SAlex Williamson                 /* Write to the proper guest MSI-X table instead */
1070c00d61d8SAlex Williamson                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1071d5d680caSTony Nguyen                                              offset, val,
1072d5d680caSTony Nguyen                                              size_memop(size) | MO_LE,
1073c00d61d8SAlex Williamson                                              MEMTXATTRS_UNSPECIFIED);
1074c00d61d8SAlex Williamson             }
1075c00d61d8SAlex Williamson             return; /* Do not write guest MSI-X data to hardware */
1076c00d61d8SAlex Williamson         }
1077c00d61d8SAlex Williamson     }
1078c00d61d8SAlex Williamson 
1079954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1080c00d61d8SAlex Williamson }
1081c00d61d8SAlex Williamson 
1082954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_address_quirk = {
1083954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_address_read,
1084954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_address_write,
1085c00d61d8SAlex Williamson     .valid = {
1086c00d61d8SAlex Williamson         .min_access_size = 4,
1087c00d61d8SAlex Williamson         .max_access_size = 4,
1088c00d61d8SAlex Williamson         .unaligned = false,
1089c00d61d8SAlex Williamson     },
1090c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1091c00d61d8SAlex Williamson };
1092c00d61d8SAlex Williamson 
1093954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1094954258a5SAlex Williamson                                              hwaddr addr, unsigned size)
1095c00d61d8SAlex Williamson {
1096954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1097954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
109831e6a7b1SThorsten Kohfeldt     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1099c00d61d8SAlex Williamson 
1100954258a5SAlex Williamson     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1101954258a5SAlex Williamson         hwaddr offset = rtl->addr & 0xfff;
1102954258a5SAlex Williamson         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1103d5d680caSTony Nguyen                                     &data, size_memop(size) | MO_LE,
1104475fbf0aSTony Nguyen                                     MEMTXATTRS_UNSPECIFIED);
1105954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1106954258a5SAlex Williamson     }
1107954258a5SAlex Williamson 
1108954258a5SAlex Williamson     return data;
1109954258a5SAlex Williamson }
1110954258a5SAlex Williamson 
1111954258a5SAlex Williamson static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1112954258a5SAlex Williamson                                           uint64_t data, unsigned size)
1113954258a5SAlex Williamson {
1114954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1115954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1116954258a5SAlex Williamson 
1117954258a5SAlex Williamson     rtl->data = (uint32_t)data;
1118954258a5SAlex Williamson 
1119954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1120954258a5SAlex Williamson }
1121954258a5SAlex Williamson 
1122954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_data_quirk = {
1123954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_data_read,
1124954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_data_write,
1125954258a5SAlex Williamson     .valid = {
1126954258a5SAlex Williamson         .min_access_size = 4,
1127954258a5SAlex Williamson         .max_access_size = 4,
1128954258a5SAlex Williamson         .unaligned = false,
1129954258a5SAlex Williamson     },
1130954258a5SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1131954258a5SAlex Williamson };
1132954258a5SAlex Williamson 
1133954258a5SAlex Williamson static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1134954258a5SAlex Williamson {
1135954258a5SAlex Williamson     VFIOQuirk *quirk;
1136954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl;
1137954258a5SAlex Williamson 
1138954258a5SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1139c00d61d8SAlex Williamson         return;
1140c00d61d8SAlex Williamson     }
1141c00d61d8SAlex Williamson 
1142bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
1143954258a5SAlex Williamson     quirk->data = rtl = g_malloc0(sizeof(*rtl));
1144954258a5SAlex Williamson     rtl->vdev = vdev;
1145c00d61d8SAlex Williamson 
1146954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1147954258a5SAlex Williamson                           &vfio_rtl_address_quirk, rtl,
1148954258a5SAlex Williamson                           "vfio-rtl8168-window-address-quirk", 4);
1149db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1150954258a5SAlex Williamson                                         0x74, &quirk->mem[0], 1);
1151954258a5SAlex Williamson 
1152954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1153954258a5SAlex Williamson                           &vfio_rtl_data_quirk, rtl,
1154954258a5SAlex Williamson                           "vfio-rtl8168-window-data-quirk", 4);
1155db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1156954258a5SAlex Williamson                                         0x70, &quirk->mem[1], 1);
1157c00d61d8SAlex Williamson 
1158c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1159c00d61d8SAlex Williamson 
1160954258a5SAlex Williamson     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1161c00d61d8SAlex Williamson }
1162c00d61d8SAlex Williamson 
1163c4c45e94SAlex Williamson #define IGD_ASLS 0xfc /* ASL Storage Register */
1164c4c45e94SAlex Williamson 
1165c4c45e94SAlex Williamson /*
1166c4c45e94SAlex Williamson  * The OpRegion includes the Video BIOS Table, which seems important for
1167c4c45e94SAlex Williamson  * telling the driver what sort of outputs it has.  Without this, the device
1168c4c45e94SAlex Williamson  * may work in the guest, but we may not get output.  This also requires BIOS
1169c4c45e94SAlex Williamson  * support to reserve and populate a section of guest memory sufficient for
1170c4c45e94SAlex Williamson  * the table and to write the base address of that memory to the ASLS register
1171c4c45e94SAlex Williamson  * of the IGD device.
1172c4c45e94SAlex Williamson  */
11736ced0bbaSAlex Williamson int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
11747237011dSEric Auger                                struct vfio_region_info *info, Error **errp)
1175c4c45e94SAlex Williamson {
1176c4c45e94SAlex Williamson     int ret;
1177c4c45e94SAlex Williamson 
1178c4c45e94SAlex Williamson     vdev->igd_opregion = g_malloc0(info->size);
1179c4c45e94SAlex Williamson     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1180c4c45e94SAlex Williamson                 info->size, info->offset);
1181c4c45e94SAlex Williamson     if (ret != info->size) {
11827237011dSEric Auger         error_setg(errp, "failed to read IGD OpRegion");
1183c4c45e94SAlex Williamson         g_free(vdev->igd_opregion);
1184c4c45e94SAlex Williamson         vdev->igd_opregion = NULL;
1185c4c45e94SAlex Williamson         return -EINVAL;
1186c4c45e94SAlex Williamson     }
1187c4c45e94SAlex Williamson 
1188c4c45e94SAlex Williamson     /*
1189c4c45e94SAlex Williamson      * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1190c4c45e94SAlex Williamson      * allocate 32bit reserved memory for, copy these contents into, and write
1191c4c45e94SAlex Williamson      * the reserved memory base address to the device ASLS register at 0xFC.
1192c4c45e94SAlex Williamson      * Alignment of this reserved region seems flexible, but using a 4k page
1193c4c45e94SAlex Williamson      * alignment seems to work well.  This interface assumes a single IGD
1194c4c45e94SAlex Williamson      * device, which may be at VM address 00:02.0 in legacy mode or another
1195c4c45e94SAlex Williamson      * address in UPT mode.
1196c4c45e94SAlex Williamson      *
1197c4c45e94SAlex Williamson      * NB, there may be future use cases discovered where the VM should have
1198c4c45e94SAlex Williamson      * direct interaction with the host OpRegion, in which case the write to
1199c4c45e94SAlex Williamson      * the ASLS register would trigger MemoryRegion setup to enable that.
1200c4c45e94SAlex Williamson      */
1201c4c45e94SAlex Williamson     fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1202c4c45e94SAlex Williamson                     vdev->igd_opregion, info->size);
1203c4c45e94SAlex Williamson 
1204c4c45e94SAlex Williamson     trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1205c4c45e94SAlex Williamson 
1206c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1207c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1208c4c45e94SAlex Williamson     pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1209c4c45e94SAlex Williamson 
1210c4c45e94SAlex Williamson     return 0;
1211c4c45e94SAlex Williamson }
1212c4c45e94SAlex Williamson 
1213c4c45e94SAlex Williamson /*
1214c00d61d8SAlex Williamson  * Common quirk probe entry points.
1215c00d61d8SAlex Williamson  */
1216c00d61d8SAlex Williamson void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1217c00d61d8SAlex Williamson {
1218c00d61d8SAlex Williamson     vfio_vga_probe_ati_3c3_quirk(vdev);
1219c00d61d8SAlex Williamson     vfio_vga_probe_nvidia_3d0_quirk(vdev);
1220c00d61d8SAlex Williamson }
1221c00d61d8SAlex Williamson 
12222d82f8a3SAlex Williamson void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1223c00d61d8SAlex Williamson {
1224c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12258c4f2348SAlex Williamson     int i, j;
1226c00d61d8SAlex Williamson 
12272d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12282d82f8a3SAlex Williamson         QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
12298c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12302d82f8a3SAlex Williamson                 memory_region_del_subregion(&vdev->vga->region[i].mem,
12318c4f2348SAlex Williamson                                             &quirk->mem[j]);
12328c4f2348SAlex Williamson             }
1233c00d61d8SAlex Williamson         }
1234c00d61d8SAlex Williamson     }
1235c00d61d8SAlex Williamson }
1236c00d61d8SAlex Williamson 
12372d82f8a3SAlex Williamson void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1238c00d61d8SAlex Williamson {
12398c4f2348SAlex Williamson     int i, j;
1240c00d61d8SAlex Williamson 
12412d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12422d82f8a3SAlex Williamson         while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
12432d82f8a3SAlex Williamson             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1244c00d61d8SAlex Williamson             QLIST_REMOVE(quirk, next);
12458c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12468c4f2348SAlex Williamson                 object_unparent(OBJECT(&quirk->mem[j]));
12478c4f2348SAlex Williamson             }
12488c4f2348SAlex Williamson             g_free(quirk->mem);
12498c4f2348SAlex Williamson             g_free(quirk->data);
1250c00d61d8SAlex Williamson             g_free(quirk);
1251c00d61d8SAlex Williamson         }
1252c00d61d8SAlex Williamson     }
1253c00d61d8SAlex Williamson }
1254c00d61d8SAlex Williamson 
1255c00d61d8SAlex Williamson void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1256c00d61d8SAlex Williamson {
12570e54f24aSAlex Williamson     vfio_probe_ati_bar4_quirk(vdev, nr);
12580d38fb1cSAlex Williamson     vfio_probe_ati_bar2_quirk(vdev, nr);
12590e54f24aSAlex Williamson     vfio_probe_nvidia_bar5_quirk(vdev, nr);
12600d38fb1cSAlex Williamson     vfio_probe_nvidia_bar0_quirk(vdev, nr);
1261954258a5SAlex Williamson     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
126229d62771SThomas Huth #ifdef CONFIG_VFIO_IGD
1263c4c45e94SAlex Williamson     vfio_probe_igd_bar4_quirk(vdev, nr);
126429d62771SThomas Huth #endif
1265c00d61d8SAlex Williamson }
1266c00d61d8SAlex Williamson 
12672d82f8a3SAlex Williamson void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1268c00d61d8SAlex Williamson {
1269c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
1270c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12718c4f2348SAlex Williamson     int i;
1272c00d61d8SAlex Williamson 
1273c00d61d8SAlex Williamson     QLIST_FOREACH(quirk, &bar->quirks, next) {
1274c958c51dSAlex Williamson         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
12752b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1276c958c51dSAlex Williamson         }
1277c958c51dSAlex Williamson 
12788c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
1279db0da029SAlex Williamson             memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
12808c4f2348SAlex Williamson         }
1281c00d61d8SAlex Williamson     }
1282c00d61d8SAlex Williamson }
1283c00d61d8SAlex Williamson 
12842d82f8a3SAlex Williamson void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1285c00d61d8SAlex Williamson {
1286c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
12878c4f2348SAlex Williamson     int i;
1288c00d61d8SAlex Williamson 
1289c00d61d8SAlex Williamson     while (!QLIST_EMPTY(&bar->quirks)) {
1290c00d61d8SAlex Williamson         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1291c00d61d8SAlex Williamson         QLIST_REMOVE(quirk, next);
12928c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
12938c4f2348SAlex Williamson             object_unparent(OBJECT(&quirk->mem[i]));
12948c4f2348SAlex Williamson         }
12958c4f2348SAlex Williamson         g_free(quirk->mem);
12968c4f2348SAlex Williamson         g_free(quirk->data);
1297c00d61d8SAlex Williamson         g_free(quirk);
1298c00d61d8SAlex Williamson     }
1299c00d61d8SAlex Williamson }
1300c9c50009SAlex Williamson 
1301c9c50009SAlex Williamson /*
1302c9c50009SAlex Williamson  * Reset quirks
1303c9c50009SAlex Williamson  */
1304469d02deSAlex Williamson void vfio_quirk_reset(VFIOPCIDevice *vdev)
1305469d02deSAlex Williamson {
1306469d02deSAlex Williamson     int i;
1307469d02deSAlex Williamson 
1308469d02deSAlex Williamson     for (i = 0; i < PCI_ROM_SLOT; i++) {
1309469d02deSAlex Williamson         VFIOQuirk *quirk;
1310469d02deSAlex Williamson         VFIOBAR *bar = &vdev->bars[i];
1311469d02deSAlex Williamson 
1312469d02deSAlex Williamson         QLIST_FOREACH(quirk, &bar->quirks, next) {
1313469d02deSAlex Williamson             if (quirk->reset) {
1314469d02deSAlex Williamson                 quirk->reset(vdev, quirk);
1315469d02deSAlex Williamson             }
1316469d02deSAlex Williamson         }
1317469d02deSAlex Williamson     }
1318469d02deSAlex Williamson }
1319c9c50009SAlex Williamson 
1320c9c50009SAlex Williamson /*
1321c9c50009SAlex Williamson  * AMD Radeon PCI config reset, based on Linux:
1322c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1323c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1324c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1325c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1326c9c50009SAlex Williamson  * IDs: include/drm/drm_pciids.h
1327c9c50009SAlex Williamson  * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1328c9c50009SAlex Williamson  *
1329c9c50009SAlex Williamson  * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1330c9c50009SAlex Williamson  * hardware that should be fixed on future ASICs.  The symptom of this is that
1331c9c50009SAlex Williamson  * once the accerlated driver loads, Windows guests will bsod on subsequent
1332c9c50009SAlex Williamson  * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1333c9c50009SAlex Williamson  * work around this, we do an AMD specific PCI config reset, followed by an SMC
1334c9c50009SAlex Williamson  * reset.  The PCI config reset only works if SMC firmware is running, so we
1335c9c50009SAlex Williamson  * have a dependency on the state of the device as to whether this reset will
1336c9c50009SAlex Williamson  * be effective.  There are still cases where we won't be able to kick the
1337c9c50009SAlex Williamson  * device into working, but this greatly improves the usability overall.  The
1338c9c50009SAlex Williamson  * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1339c9c50009SAlex Williamson  * poking is largely ASIC specific.
1340c9c50009SAlex Williamson  */
1341c9c50009SAlex Williamson static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1342c9c50009SAlex Williamson {
1343c9c50009SAlex Williamson     uint32_t clk, pc_c;
1344c9c50009SAlex Williamson 
1345c9c50009SAlex Williamson     /*
1346c9c50009SAlex Williamson      * Registers 200h and 204h are index and data registers for accessing
1347c9c50009SAlex Williamson      * indirect configuration registers within the device.
1348c9c50009SAlex Williamson      */
1349c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1350c9c50009SAlex Williamson     clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1351c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1352c9c50009SAlex Williamson     pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1353c9c50009SAlex Williamson 
1354c9c50009SAlex Williamson     return (!(clk & 1) && (0x20100 <= pc_c));
1355c9c50009SAlex Williamson }
1356c9c50009SAlex Williamson 
1357c9c50009SAlex Williamson /*
1358c9c50009SAlex Williamson  * The scope of a config reset is controlled by a mode bit in the misc register
1359c9c50009SAlex Williamson  * and a fuse, exposed as a bit in another register.  The fuse is the default
1360c9c50009SAlex Williamson  * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1361c9c50009SAlex Williamson  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1362c9c50009SAlex Williamson  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1363c9c50009SAlex Williamson  * to flip the value of the bit in the misc register.
1364c9c50009SAlex Williamson  */
1365c9c50009SAlex Williamson static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1366c9c50009SAlex Williamson {
1367c9c50009SAlex Williamson     uint32_t misc, fuse;
1368c9c50009SAlex Williamson     bool a, b;
1369c9c50009SAlex Williamson 
1370c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1371c9c50009SAlex Williamson     fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1372c9c50009SAlex Williamson     b = fuse & 64;
1373c9c50009SAlex Williamson 
1374c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1375c9c50009SAlex Williamson     misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1376c9c50009SAlex Williamson     a = misc & 2;
1377c9c50009SAlex Williamson 
1378c9c50009SAlex Williamson     if (a == b) {
1379c9c50009SAlex Williamson         vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1380c9c50009SAlex Williamson         vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1381c9c50009SAlex Williamson     }
1382c9c50009SAlex Williamson }
1383c9c50009SAlex Williamson 
1384c9c50009SAlex Williamson static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1385c9c50009SAlex Williamson {
1386c9c50009SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1387c9c50009SAlex Williamson     int i, ret = 0;
1388c9c50009SAlex Williamson     uint32_t data;
1389c9c50009SAlex Williamson 
1390c9c50009SAlex Williamson     /* Defer to a kernel implemented reset */
1391c9c50009SAlex Williamson     if (vdev->vbasedev.reset_works) {
1392c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1393c9c50009SAlex Williamson         return -ENODEV;
1394c9c50009SAlex Williamson     }
1395c9c50009SAlex Williamson 
1396c9c50009SAlex Williamson     /* Enable only memory BAR access */
1397c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1398c9c50009SAlex Williamson 
1399c9c50009SAlex Williamson     /* Reset only works if SMC firmware is loaded and running */
1400c9c50009SAlex Williamson     if (!vfio_radeon_smc_is_running(vdev)) {
1401c9c50009SAlex Williamson         ret = -EINVAL;
1402c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1403c9c50009SAlex Williamson         goto out;
1404c9c50009SAlex Williamson     }
1405c9c50009SAlex Williamson 
1406c9c50009SAlex Williamson     /* Make sure only the GFX function is reset */
1407c9c50009SAlex Williamson     vfio_radeon_set_gfx_only_reset(vdev);
1408c9c50009SAlex Williamson 
1409c9c50009SAlex Williamson     /* AMD PCI config reset */
1410c9c50009SAlex Williamson     vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1411c9c50009SAlex Williamson     usleep(100);
1412c9c50009SAlex Williamson 
1413c9c50009SAlex Williamson     /* Read back the memory size to make sure we're out of reset */
1414c9c50009SAlex Williamson     for (i = 0; i < 100000; i++) {
1415c9c50009SAlex Williamson         if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1416c9c50009SAlex Williamson             goto reset_smc;
1417c9c50009SAlex Williamson         }
1418c9c50009SAlex Williamson         usleep(1);
1419c9c50009SAlex Williamson     }
1420c9c50009SAlex Williamson 
1421c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1422c9c50009SAlex Williamson 
1423c9c50009SAlex Williamson reset_smc:
1424c9c50009SAlex Williamson     /* Reset SMC */
1425c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1426c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1427c9c50009SAlex Williamson     data |= 1;
1428c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1429c9c50009SAlex Williamson 
1430c9c50009SAlex Williamson     /* Disable SMC clock */
1431c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1432c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1433c9c50009SAlex Williamson     data |= 1;
1434c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1435c9c50009SAlex Williamson 
1436c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1437c9c50009SAlex Williamson 
1438c9c50009SAlex Williamson out:
1439c9c50009SAlex Williamson     /* Restore PCI command register */
1440c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1441c9c50009SAlex Williamson 
1442c9c50009SAlex Williamson     return ret;
1443c9c50009SAlex Williamson }
1444c9c50009SAlex Williamson 
1445c9c50009SAlex Williamson void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1446c9c50009SAlex Williamson {
1447ff635e37SAlex Williamson     switch (vdev->vendor_id) {
1448c9c50009SAlex Williamson     case 0x1002:
1449ff635e37SAlex Williamson         switch (vdev->device_id) {
1450c9c50009SAlex Williamson         /* Bonaire */
1451c9c50009SAlex Williamson         case 0x6649: /* Bonaire [FirePro W5100] */
1452c9c50009SAlex Williamson         case 0x6650:
1453c9c50009SAlex Williamson         case 0x6651:
1454c9c50009SAlex Williamson         case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1455c9c50009SAlex Williamson         case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1456c9c50009SAlex Williamson         case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1457c9c50009SAlex Williamson         /* Hawaii */
1458c9c50009SAlex Williamson         case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1459c9c50009SAlex Williamson         case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1460c9c50009SAlex Williamson         case 0x67A2:
1461c9c50009SAlex Williamson         case 0x67A8:
1462c9c50009SAlex Williamson         case 0x67A9:
1463c9c50009SAlex Williamson         case 0x67AA:
1464c9c50009SAlex Williamson         case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1465c9c50009SAlex Williamson         case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1466c9c50009SAlex Williamson         case 0x67B8:
1467c9c50009SAlex Williamson         case 0x67B9:
1468c9c50009SAlex Williamson         case 0x67BA:
1469c9c50009SAlex Williamson         case 0x67BE:
1470c9c50009SAlex Williamson             vdev->resetfn = vfio_radeon_reset;
1471c9c50009SAlex Williamson             trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1472c9c50009SAlex Williamson             break;
1473c9c50009SAlex Williamson         }
1474c9c50009SAlex Williamson         break;
1475c9c50009SAlex Williamson     }
1476c9c50009SAlex Williamson }
1477dfbee78dSAlex Williamson 
1478dfbee78dSAlex Williamson /*
1479dfbee78dSAlex Williamson  * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1480dfbee78dSAlex Williamson  * devices as a member of a clique.  Devices within the same clique ID
1481dfbee78dSAlex Williamson  * are capable of direct P2P.  It's the user's responsibility that this
1482dfbee78dSAlex Williamson  * is correct.  The spec says that this may reside at any unused config
1483dfbee78dSAlex Williamson  * offset, but reserves and recommends hypervisors place this at C8h.
1484dfbee78dSAlex Williamson  * The spec also states that the hypervisor should place this capability
1485dfbee78dSAlex Williamson  * at the end of the capability list, thus next is defined as 0h.
1486dfbee78dSAlex Williamson  *
1487dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1488dfbee78dSAlex Williamson  * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1489dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1490dfbee78dSAlex Williamson  * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1491dfbee78dSAlex Williamson  * +---------------------------------+---------------------------------+
1492dfbee78dSAlex Williamson  *
1493dfbee78dSAlex Williamson  * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1494dfbee78dSAlex Williamson  */
1495dfbee78dSAlex Williamson static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1496dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1497dfbee78dSAlex Williamson                                        Error **errp)
1498dfbee78dSAlex Williamson {
1499dfbee78dSAlex Williamson     Property *prop = opaque;
15001e198715SEduardo Habkost     uint8_t *ptr = object_field_prop_ptr(obj, prop);
1501dfbee78dSAlex Williamson 
1502dfbee78dSAlex Williamson     visit_type_uint8(v, name, ptr, errp);
1503dfbee78dSAlex Williamson }
1504dfbee78dSAlex Williamson 
1505dfbee78dSAlex Williamson static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1506dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1507dfbee78dSAlex Williamson                                        Error **errp)
1508dfbee78dSAlex Williamson {
1509dfbee78dSAlex Williamson     Property *prop = opaque;
15101e198715SEduardo Habkost     uint8_t value, *ptr = object_field_prop_ptr(obj, prop);
1511dfbee78dSAlex Williamson 
1512668f62ecSMarkus Armbruster     if (!visit_type_uint8(v, name, &value, errp)) {
1513dfbee78dSAlex Williamson         return;
1514dfbee78dSAlex Williamson     }
1515dfbee78dSAlex Williamson 
1516dfbee78dSAlex Williamson     if (value & ~0xF) {
1517dfbee78dSAlex Williamson         error_setg(errp, "Property %s: valid range 0-15", name);
1518dfbee78dSAlex Williamson         return;
1519dfbee78dSAlex Williamson     }
1520dfbee78dSAlex Williamson 
1521dfbee78dSAlex Williamson     *ptr = value;
1522dfbee78dSAlex Williamson }
1523dfbee78dSAlex Williamson 
1524dfbee78dSAlex Williamson const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1525dfbee78dSAlex Williamson     .name = "uint4",
1526dfbee78dSAlex Williamson     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1527dfbee78dSAlex Williamson     .get = get_nv_gpudirect_clique_id,
1528dfbee78dSAlex Williamson     .set = set_nv_gpudirect_clique_id,
1529dfbee78dSAlex Williamson };
1530dfbee78dSAlex Williamson 
1531dfbee78dSAlex Williamson static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1532dfbee78dSAlex Williamson {
1533dfbee78dSAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1534dfbee78dSAlex Williamson     int ret, pos = 0xC8;
1535dfbee78dSAlex Williamson 
1536dfbee78dSAlex Williamson     if (vdev->nv_gpudirect_clique == 0xFF) {
1537dfbee78dSAlex Williamson         return 0;
1538dfbee78dSAlex Williamson     }
1539dfbee78dSAlex Williamson 
1540dfbee78dSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1541dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
1542dfbee78dSAlex Williamson         return -EINVAL;
1543dfbee78dSAlex Williamson     }
1544dfbee78dSAlex Williamson 
1545dfbee78dSAlex Williamson     if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1546dfbee78dSAlex Williamson         PCI_BASE_CLASS_DISPLAY) {
1547dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
1548dfbee78dSAlex Williamson         return -EINVAL;
1549dfbee78dSAlex Williamson     }
1550dfbee78dSAlex Williamson 
1551dfbee78dSAlex Williamson     ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1552dfbee78dSAlex Williamson     if (ret < 0) {
1553dfbee78dSAlex Williamson         error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
1554dfbee78dSAlex Williamson         return ret;
1555dfbee78dSAlex Williamson     }
1556dfbee78dSAlex Williamson 
1557dfbee78dSAlex Williamson     memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1558dfbee78dSAlex Williamson     pos += PCI_CAP_FLAGS;
1559dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 8);
1560dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1561dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, '2');
1562dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1563dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1564dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos, 0);
1565dfbee78dSAlex Williamson 
1566dfbee78dSAlex Williamson     return 0;
1567dfbee78dSAlex Williamson }
1568dfbee78dSAlex Williamson 
1569ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
1570ec132efaSAlexey Kardashevskiy                                      const char *name,
1571ec132efaSAlexey Kardashevskiy                                      void *opaque, Error **errp)
1572ec132efaSAlexey Kardashevskiy {
1573ec132efaSAlexey Kardashevskiy     uint64_t tgt = (uintptr_t) opaque;
1574ec132efaSAlexey Kardashevskiy     visit_type_uint64(v, name, &tgt, errp);
1575ec132efaSAlexey Kardashevskiy }
1576ec132efaSAlexey Kardashevskiy 
1577ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
1578ec132efaSAlexey Kardashevskiy                                                  const char *name,
1579ec132efaSAlexey Kardashevskiy                                                  void *opaque, Error **errp)
1580ec132efaSAlexey Kardashevskiy {
1581ec132efaSAlexey Kardashevskiy     uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
1582ec132efaSAlexey Kardashevskiy     visit_type_uint32(v, name, &link_speed, errp);
1583ec132efaSAlexey Kardashevskiy }
1584ec132efaSAlexey Kardashevskiy 
1585ec132efaSAlexey Kardashevskiy int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
1586ec132efaSAlexey Kardashevskiy {
1587ec132efaSAlexey Kardashevskiy     int ret;
1588ec132efaSAlexey Kardashevskiy     void *p;
1589ec132efaSAlexey Kardashevskiy     struct vfio_region_info *nv2reg = NULL;
1590ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1591ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *cap;
1592ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1593ec132efaSAlexey Kardashevskiy 
1594ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1595ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1596ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_NVIDIA,
1597ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
1598ec132efaSAlexey Kardashevskiy                                    &nv2reg);
1599ec132efaSAlexey Kardashevskiy     if (ret) {
1600ec132efaSAlexey Kardashevskiy         return ret;
1601ec132efaSAlexey Kardashevskiy     }
1602ec132efaSAlexey Kardashevskiy 
1603ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1604ec132efaSAlexey Kardashevskiy     if (!hdr) {
1605ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1606ec132efaSAlexey Kardashevskiy         goto free_exit;
1607ec132efaSAlexey Kardashevskiy     }
1608ec132efaSAlexey Kardashevskiy     cap = (void *) hdr;
1609ec132efaSAlexey Kardashevskiy 
16109c7c0407SLeonardo Bras     p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE,
1611ec132efaSAlexey Kardashevskiy              MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
1612ec132efaSAlexey Kardashevskiy     if (p == MAP_FAILED) {
1613ec132efaSAlexey Kardashevskiy         ret = -errno;
1614ec132efaSAlexey Kardashevskiy         goto free_exit;
1615ec132efaSAlexey Kardashevskiy     }
1616ec132efaSAlexey Kardashevskiy 
1617ec132efaSAlexey Kardashevskiy     quirk = vfio_quirk_alloc(1);
1618ec132efaSAlexey Kardashevskiy     memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
1619ec132efaSAlexey Kardashevskiy                                nv2reg->size, p);
1620ec132efaSAlexey Kardashevskiy     QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1621ec132efaSAlexey Kardashevskiy 
1622ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1623ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1624d2623129SMarkus Armbruster                         (void *) (uintptr_t) cap->tgt);
1625ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
1626ec132efaSAlexey Kardashevskiy                                           nv2reg->size);
1627ec132efaSAlexey Kardashevskiy free_exit:
1628ec132efaSAlexey Kardashevskiy     g_free(nv2reg);
1629ec132efaSAlexey Kardashevskiy 
1630ec132efaSAlexey Kardashevskiy     return ret;
1631ec132efaSAlexey Kardashevskiy }
1632ec132efaSAlexey Kardashevskiy 
1633ec132efaSAlexey Kardashevskiy int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
1634ec132efaSAlexey Kardashevskiy {
1635ec132efaSAlexey Kardashevskiy     int ret;
1636ec132efaSAlexey Kardashevskiy     void *p;
1637ec132efaSAlexey Kardashevskiy     struct vfio_region_info *atsdreg = NULL;
1638ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1639ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
1640ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
1641ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1642ec132efaSAlexey Kardashevskiy 
1643ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1644ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1645ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_IBM,
1646ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
1647ec132efaSAlexey Kardashevskiy                                    &atsdreg);
1648ec132efaSAlexey Kardashevskiy     if (ret) {
1649ec132efaSAlexey Kardashevskiy         return ret;
1650ec132efaSAlexey Kardashevskiy     }
1651ec132efaSAlexey Kardashevskiy 
1652ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1653ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1654ec132efaSAlexey Kardashevskiy     if (!hdr) {
1655ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1656ec132efaSAlexey Kardashevskiy         goto free_exit;
1657ec132efaSAlexey Kardashevskiy     }
1658ec132efaSAlexey Kardashevskiy     captgt = (void *) hdr;
1659ec132efaSAlexey Kardashevskiy 
1660ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1661ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
1662ec132efaSAlexey Kardashevskiy     if (!hdr) {
1663ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1664ec132efaSAlexey Kardashevskiy         goto free_exit;
1665ec132efaSAlexey Kardashevskiy     }
1666ec132efaSAlexey Kardashevskiy     capspeed = (void *) hdr;
1667ec132efaSAlexey Kardashevskiy 
1668ec132efaSAlexey Kardashevskiy     /* Some NVLink bridges may not have assigned ATSD */
1669ec132efaSAlexey Kardashevskiy     if (atsdreg->size) {
16709c7c0407SLeonardo Bras         p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE,
1671ec132efaSAlexey Kardashevskiy                  MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
1672ec132efaSAlexey Kardashevskiy         if (p == MAP_FAILED) {
1673ec132efaSAlexey Kardashevskiy             ret = -errno;
1674ec132efaSAlexey Kardashevskiy             goto free_exit;
1675ec132efaSAlexey Kardashevskiy         }
1676ec132efaSAlexey Kardashevskiy 
1677ec132efaSAlexey Kardashevskiy         quirk = vfio_quirk_alloc(1);
1678ec132efaSAlexey Kardashevskiy         memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
1679ec132efaSAlexey Kardashevskiy                                           "nvlink2-atsd-mr", atsdreg->size, p);
1680ec132efaSAlexey Kardashevskiy         QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1681ec132efaSAlexey Kardashevskiy     }
1682ec132efaSAlexey Kardashevskiy 
1683ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1684ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1685d2623129SMarkus Armbruster                         (void *) (uintptr_t) captgt->tgt);
1686ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
1687ec132efaSAlexey Kardashevskiy                                               atsdreg->size);
1688ec132efaSAlexey Kardashevskiy 
1689ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
1690ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_link_speed, NULL, NULL,
1691d2623129SMarkus Armbruster                         (void *) (uintptr_t) capspeed->link_speed);
1692ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
1693ec132efaSAlexey Kardashevskiy                                               capspeed->link_speed);
1694ec132efaSAlexey Kardashevskiy free_exit:
1695ec132efaSAlexey Kardashevskiy     g_free(atsdreg);
1696ec132efaSAlexey Kardashevskiy 
1697ec132efaSAlexey Kardashevskiy     return ret;
1698ec132efaSAlexey Kardashevskiy }
1699ee7932b0SJon Derrick 
1700ee7932b0SJon Derrick /*
1701ee7932b0SJon Derrick  * The VMD endpoint provides a real PCIe domain to the guest and the guest
1702ee7932b0SJon Derrick  * kernel performs enumeration of the VMD sub-device domain. Guest transactions
1703ee7932b0SJon Derrick  * to VMD sub-devices go through MMU translation from guest addresses to
1704ee7932b0SJon Derrick  * physical addresses. When MMIO goes to an endpoint after being translated to
1705ee7932b0SJon Derrick  * physical addresses, the bridge rejects the transaction because the window
1706ee7932b0SJon Derrick  * has been programmed with guest addresses.
1707ee7932b0SJon Derrick  *
1708ee7932b0SJon Derrick  * VMD can use the Host Physical Address in order to correctly program the
1709ee7932b0SJon Derrick  * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers
1710ee7932b0SJon Derrick  * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA
1711ee7932b0SJon Derrick  * shadow registers in a vendor-specific capability register for devices
1712ee7932b0SJon Derrick  * without native support. The position of 0xE8-0xFF is in the reserved range
1713ee7932b0SJon Derrick  * of the VMD device capability space following the Power Management
1714ee7932b0SJon Derrick  * Capability.
1715ee7932b0SJon Derrick  */
1716ee7932b0SJon Derrick #define VMD_SHADOW_CAP_VER 1
1717ee7932b0SJon Derrick #define VMD_SHADOW_CAP_LEN 24
1718ee7932b0SJon Derrick static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
1719ee7932b0SJon Derrick {
1720ee7932b0SJon Derrick     uint8_t membar_phys[16];
1721ee7932b0SJon Derrick     int ret, pos = 0xE8;
1722ee7932b0SJon Derrick 
1723ee7932b0SJon Derrick     if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) ||
1724ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) ||
1725ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) ||
1726ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) {
1727ee7932b0SJon Derrick         return 0;
1728ee7932b0SJon Derrick     }
1729ee7932b0SJon Derrick 
1730ee7932b0SJon Derrick     ret = pread(vdev->vbasedev.fd, membar_phys, 16,
1731ee7932b0SJon Derrick                 vdev->config_offset + PCI_BASE_ADDRESS_2);
1732ee7932b0SJon Derrick     if (ret != 16) {
1733ee7932b0SJon Derrick         error_report("VMD %s cannot read MEMBARs (%d)",
1734ee7932b0SJon Derrick                      vdev->vbasedev.name, ret);
1735ee7932b0SJon Derrick         return -EFAULT;
1736ee7932b0SJon Derrick     }
1737ee7932b0SJon Derrick 
1738ee7932b0SJon Derrick     ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
1739ee7932b0SJon Derrick                              VMD_SHADOW_CAP_LEN, errp);
1740ee7932b0SJon Derrick     if (ret < 0) {
1741ee7932b0SJon Derrick         error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
1742ee7932b0SJon Derrick         return ret;
1743ee7932b0SJon Derrick     }
1744ee7932b0SJon Derrick 
1745ee7932b0SJon Derrick     memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
1746ee7932b0SJon Derrick     pos += PCI_CAP_FLAGS;
1747ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
1748ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
1749ee7932b0SJon Derrick     pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
1750ee7932b0SJon Derrick     memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
1751ee7932b0SJon Derrick 
1752ee7932b0SJon Derrick     return 0;
1753ee7932b0SJon Derrick }
1754ee7932b0SJon Derrick 
1755ee7932b0SJon Derrick int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1756ee7932b0SJon Derrick {
1757ee7932b0SJon Derrick     int ret;
1758ee7932b0SJon Derrick 
1759ee7932b0SJon Derrick     ret = vfio_add_nv_gpudirect_cap(vdev, errp);
1760ee7932b0SJon Derrick     if (ret) {
1761ee7932b0SJon Derrick         return ret;
1762ee7932b0SJon Derrick     }
1763ee7932b0SJon Derrick 
1764ee7932b0SJon Derrick     ret = vfio_add_vmd_shadow_cap(vdev, errp);
1765ee7932b0SJon Derrick     if (ret) {
1766ee7932b0SJon Derrick         return ret;
1767ee7932b0SJon Derrick     }
1768ee7932b0SJon Derrick 
1769ee7932b0SJon Derrick     return 0;
1770ee7932b0SJon Derrick }
1771