xref: /qemu/hw/vfio/pci-quirks.c (revision 1e198715e12ae86c4942a0a2d1df29beabccc295)
1c00d61d8SAlex Williamson /*
2c00d61d8SAlex Williamson  * device quirks for PCI devices
3c00d61d8SAlex Williamson  *
4c00d61d8SAlex Williamson  * Copyright Red Hat, Inc. 2012-2015
5c00d61d8SAlex Williamson  *
6c00d61d8SAlex Williamson  * Authors:
7c00d61d8SAlex Williamson  *  Alex Williamson <alex.williamson@redhat.com>
8c00d61d8SAlex Williamson  *
9c00d61d8SAlex Williamson  * This work is licensed under the terms of the GNU GPL, version 2.  See
10c00d61d8SAlex Williamson  * the COPYING file in the top-level directory.
11c00d61d8SAlex Williamson  */
12c00d61d8SAlex Williamson 
13c6eacb1aSPeter Maydell #include "qemu/osdep.h"
142becc36aSPaolo Bonzini #include CONFIG_DEVICES
15475fbf0aSTony Nguyen #include "exec/memop.h"
16e0255bb1SPhilippe Mathieu-Daudé #include "qemu/units.h"
17c4c45e94SAlex Williamson #include "qemu/error-report.h"
18c958c51dSAlex Williamson #include "qemu/main-loop.h"
190b8fa32fSMarkus Armbruster #include "qemu/module.h"
20c4c45e94SAlex Williamson #include "qemu/range.h"
21c4c45e94SAlex Williamson #include "qapi/error.h"
22dfbee78dSAlex Williamson #include "qapi/visitor.h"
232b1dbd0dSAlex Williamson #include <sys/ioctl.h>
24650d103dSMarkus Armbruster #include "hw/hw.h"
25c4c45e94SAlex Williamson #include "hw/nvram/fw_cfg.h"
26a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
27c00d61d8SAlex Williamson #include "pci.h"
28c00d61d8SAlex Williamson #include "trace.h"
29c00d61d8SAlex Williamson 
30c00d61d8SAlex Williamson /*
31c00d61d8SAlex Williamson  * List of device ids/vendor ids for which to disable
32c00d61d8SAlex Williamson  * option rom loading. This avoids the guest hangs during rom
33c00d61d8SAlex Williamson  * execution as noticed with the BCM 57810 card for lack of a
34c00d61d8SAlex Williamson  * more better way to handle such issues.
35c00d61d8SAlex Williamson  * The  user can still override by specifying a romfile or
36c00d61d8SAlex Williamson  * rombar=1.
37c00d61d8SAlex Williamson  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38c00d61d8SAlex Williamson  * for an analysis of the 57810 card hang. When adding
39c00d61d8SAlex Williamson  * a new vendor id/device id combination below, please also add
40c00d61d8SAlex Williamson  * your card/environment details and information that could
41c00d61d8SAlex Williamson  * help in debugging to the bug tracking this issue
42c00d61d8SAlex Williamson  */
43056dfcb6SAlex Williamson static const struct {
44056dfcb6SAlex Williamson     uint32_t vendor;
45056dfcb6SAlex Williamson     uint32_t device;
46056dfcb6SAlex Williamson } romblacklist[] = {
47056dfcb6SAlex Williamson     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48c00d61d8SAlex Williamson };
49c00d61d8SAlex Williamson 
50c00d61d8SAlex Williamson bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
51c00d61d8SAlex Williamson {
52056dfcb6SAlex Williamson     int i;
53c00d61d8SAlex Williamson 
54056dfcb6SAlex Williamson     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55056dfcb6SAlex Williamson         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56056dfcb6SAlex Williamson             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57056dfcb6SAlex Williamson                                              romblacklist[i].vendor,
58056dfcb6SAlex Williamson                                              romblacklist[i].device);
59c00d61d8SAlex Williamson             return true;
60c00d61d8SAlex Williamson         }
61c00d61d8SAlex Williamson     }
62c00d61d8SAlex Williamson     return false;
63c00d61d8SAlex Williamson }
64c00d61d8SAlex Williamson 
65c00d61d8SAlex Williamson /*
660e54f24aSAlex Williamson  * Device specific region quirks (mostly backdoors to PCI config space)
67c00d61d8SAlex Williamson  */
68c00d61d8SAlex Williamson 
690e54f24aSAlex Williamson /*
700e54f24aSAlex Williamson  * The generic window quirks operate on an address and data register,
710e54f24aSAlex Williamson  * vfio_generic_window_address_quirk handles the address register and
720e54f24aSAlex Williamson  * vfio_generic_window_data_quirk handles the data register.  These ops
730e54f24aSAlex Williamson  * pass reads and writes through to hardware until a value matching the
740e54f24aSAlex Williamson  * stored address match/mask is written.  When this occurs, the data
750e54f24aSAlex Williamson  * register access emulated PCI config space for the device rather than
760e54f24aSAlex Williamson  * passing through accesses.  This enables devices where PCI config space
770e54f24aSAlex Williamson  * is accessible behind a window register to maintain the virtualization
780e54f24aSAlex Williamson  * provided through vfio.
790e54f24aSAlex Williamson  */
800e54f24aSAlex Williamson typedef struct VFIOConfigWindowMatch {
810e54f24aSAlex Williamson     uint32_t match;
820e54f24aSAlex Williamson     uint32_t mask;
830e54f24aSAlex Williamson } VFIOConfigWindowMatch;
840e54f24aSAlex Williamson 
850e54f24aSAlex Williamson typedef struct VFIOConfigWindowQuirk {
860e54f24aSAlex Williamson     struct VFIOPCIDevice *vdev;
870e54f24aSAlex Williamson 
880e54f24aSAlex Williamson     uint32_t address_val;
890e54f24aSAlex Williamson 
900e54f24aSAlex Williamson     uint32_t address_offset;
910e54f24aSAlex Williamson     uint32_t data_offset;
920e54f24aSAlex Williamson 
930e54f24aSAlex Williamson     bool window_enabled;
940e54f24aSAlex Williamson     uint8_t bar;
950e54f24aSAlex Williamson 
960e54f24aSAlex Williamson     MemoryRegion *addr_mem;
970e54f24aSAlex Williamson     MemoryRegion *data_mem;
980e54f24aSAlex Williamson 
990e54f24aSAlex Williamson     uint32_t nr_matches;
1000e54f24aSAlex Williamson     VFIOConfigWindowMatch matches[];
1010e54f24aSAlex Williamson } VFIOConfigWindowQuirk;
1020e54f24aSAlex Williamson 
1030e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
1040e54f24aSAlex Williamson                                                        hwaddr addr,
1050e54f24aSAlex Williamson                                                        unsigned size)
1060e54f24aSAlex Williamson {
1070e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1080e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1090e54f24aSAlex Williamson 
1100e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[window->bar].region,
1110e54f24aSAlex Williamson                             addr + window->address_offset, size);
1120e54f24aSAlex Williamson }
1130e54f24aSAlex Williamson 
1140e54f24aSAlex Williamson static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
1150e54f24aSAlex Williamson                                                     uint64_t data,
1160e54f24aSAlex Williamson                                                     unsigned size)
1170e54f24aSAlex Williamson {
1180e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1190e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1200e54f24aSAlex Williamson     int i;
1210e54f24aSAlex Williamson 
1220e54f24aSAlex Williamson     window->window_enabled = false;
1230e54f24aSAlex Williamson 
1240e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1250e54f24aSAlex Williamson                       addr + window->address_offset, data, size);
1260e54f24aSAlex Williamson 
1270e54f24aSAlex Williamson     for (i = 0; i < window->nr_matches; i++) {
1280e54f24aSAlex Williamson         if ((data & ~window->matches[i].mask) == window->matches[i].match) {
1290e54f24aSAlex Williamson             window->window_enabled = true;
1300e54f24aSAlex Williamson             window->address_val = data & window->matches[i].mask;
1310e54f24aSAlex Williamson             trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
1320e54f24aSAlex Williamson                                     memory_region_name(window->addr_mem), data);
1330e54f24aSAlex Williamson             break;
1340e54f24aSAlex Williamson         }
1350e54f24aSAlex Williamson     }
1360e54f24aSAlex Williamson }
1370e54f24aSAlex Williamson 
1380e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_address_quirk = {
1390e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_address_read,
1400e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_address_write,
1410e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1420e54f24aSAlex Williamson };
1430e54f24aSAlex Williamson 
1440e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
1450e54f24aSAlex Williamson                                                     hwaddr addr, unsigned size)
1460e54f24aSAlex Williamson {
1470e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1480e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1490e54f24aSAlex Williamson     uint64_t data;
1500e54f24aSAlex Williamson 
1510e54f24aSAlex Williamson     /* Always read data reg, discard if window enabled */
1520e54f24aSAlex Williamson     data = vfio_region_read(&vdev->bars[window->bar].region,
1530e54f24aSAlex Williamson                             addr + window->data_offset, size);
1540e54f24aSAlex Williamson 
1550e54f24aSAlex Williamson     if (window->window_enabled) {
1560e54f24aSAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
1570e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
1580e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1590e54f24aSAlex Williamson     }
1600e54f24aSAlex Williamson 
1610e54f24aSAlex Williamson     return data;
1620e54f24aSAlex Williamson }
1630e54f24aSAlex Williamson 
1640e54f24aSAlex Williamson static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
1650e54f24aSAlex Williamson                                                  uint64_t data, unsigned size)
1660e54f24aSAlex Williamson {
1670e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1680e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1690e54f24aSAlex Williamson 
1700e54f24aSAlex Williamson     if (window->window_enabled) {
1710e54f24aSAlex Williamson         vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
1720e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
1730e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1740e54f24aSAlex Williamson         return;
1750e54f24aSAlex Williamson     }
1760e54f24aSAlex Williamson 
1770e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1780e54f24aSAlex Williamson                       addr + window->data_offset, data, size);
1790e54f24aSAlex Williamson }
1800e54f24aSAlex Williamson 
1810e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_data_quirk = {
1820e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_data_read,
1830e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_data_write,
1840e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1850e54f24aSAlex Williamson };
1860e54f24aSAlex Williamson 
1870d38fb1cSAlex Williamson /*
1880d38fb1cSAlex Williamson  * The generic mirror quirk handles devices which expose PCI config space
1890d38fb1cSAlex Williamson  * through a region within a BAR.  When enabled, reads and writes are
1900d38fb1cSAlex Williamson  * redirected through to emulated PCI config space.  XXX if PCI config space
1910d38fb1cSAlex Williamson  * used memory regions, this could just be an alias.
1920d38fb1cSAlex Williamson  */
1930d38fb1cSAlex Williamson typedef struct VFIOConfigMirrorQuirk {
1940d38fb1cSAlex Williamson     struct VFIOPCIDevice *vdev;
1950d38fb1cSAlex Williamson     uint32_t offset;
1960d38fb1cSAlex Williamson     uint8_t bar;
1970d38fb1cSAlex Williamson     MemoryRegion *mem;
198c958c51dSAlex Williamson     uint8_t data[];
1990d38fb1cSAlex Williamson } VFIOConfigMirrorQuirk;
2000d38fb1cSAlex Williamson 
2010d38fb1cSAlex Williamson static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
2020d38fb1cSAlex Williamson                                                hwaddr addr, unsigned size)
2030d38fb1cSAlex Williamson {
2040d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2050d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2060d38fb1cSAlex Williamson     uint64_t data;
2070d38fb1cSAlex Williamson 
2080d38fb1cSAlex Williamson     /* Read and discard in case the hardware cares */
2090d38fb1cSAlex Williamson     (void)vfio_region_read(&vdev->bars[mirror->bar].region,
2100d38fb1cSAlex Williamson                            addr + mirror->offset, size);
2110d38fb1cSAlex Williamson 
2120d38fb1cSAlex Williamson     data = vfio_pci_read_config(&vdev->pdev, addr, size);
2130d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
2140d38fb1cSAlex Williamson                                          memory_region_name(mirror->mem),
2150d38fb1cSAlex Williamson                                          addr, data);
2160d38fb1cSAlex Williamson     return data;
2170d38fb1cSAlex Williamson }
2180d38fb1cSAlex Williamson 
2190d38fb1cSAlex Williamson static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
2200d38fb1cSAlex Williamson                                             uint64_t data, unsigned size)
2210d38fb1cSAlex Williamson {
2220d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2230d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2240d38fb1cSAlex Williamson 
2250d38fb1cSAlex Williamson     vfio_pci_write_config(&vdev->pdev, addr, data, size);
2260d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
2270d38fb1cSAlex Williamson                                           memory_region_name(mirror->mem),
2280d38fb1cSAlex Williamson                                           addr, data);
2290d38fb1cSAlex Williamson }
2300d38fb1cSAlex Williamson 
2310d38fb1cSAlex Williamson static const MemoryRegionOps vfio_generic_mirror_quirk = {
2320d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
2330d38fb1cSAlex Williamson     .write = vfio_generic_quirk_mirror_write,
2340d38fb1cSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
2350d38fb1cSAlex Williamson };
2360d38fb1cSAlex Williamson 
237c00d61d8SAlex Williamson /* Is range1 fully contained within range2?  */
238c00d61d8SAlex Williamson static bool vfio_range_contained(uint64_t first1, uint64_t len1,
239c00d61d8SAlex Williamson                                  uint64_t first2, uint64_t len2) {
240c00d61d8SAlex Williamson     return (first1 >= first2 && first1 + len1 <= first2 + len2);
241c00d61d8SAlex Williamson }
242c00d61d8SAlex Williamson 
243c00d61d8SAlex Williamson #define PCI_VENDOR_ID_ATI               0x1002
244c00d61d8SAlex Williamson 
245c00d61d8SAlex Williamson /*
246c00d61d8SAlex Williamson  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
247c00d61d8SAlex Williamson  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
248c00d61d8SAlex Williamson  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
249c00d61d8SAlex Williamson  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
250c00d61d8SAlex Williamson  * I/O port BAR address.  Originally this was coded to return the virtual BAR
251c00d61d8SAlex Williamson  * address only if the physical register read returns the actual BAR address,
252c00d61d8SAlex Williamson  * but users have reported greater success if we return the virtual address
253c00d61d8SAlex Williamson  * unconditionally.
254c00d61d8SAlex Williamson  */
255c00d61d8SAlex Williamson static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
256c00d61d8SAlex Williamson                                         hwaddr addr, unsigned size)
257c00d61d8SAlex Williamson {
258b946d286SAlex Williamson     VFIOPCIDevice *vdev = opaque;
259c00d61d8SAlex Williamson     uint64_t data = vfio_pci_read_config(&vdev->pdev,
260b946d286SAlex Williamson                                          PCI_BASE_ADDRESS_4 + 1, size);
261b946d286SAlex Williamson 
262b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
263c00d61d8SAlex Williamson 
264c00d61d8SAlex Williamson     return data;
265c00d61d8SAlex Williamson }
266c00d61d8SAlex Williamson 
267c00d61d8SAlex Williamson static const MemoryRegionOps vfio_ati_3c3_quirk = {
268c00d61d8SAlex Williamson     .read = vfio_ati_3c3_quirk_read,
269c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
270c00d61d8SAlex Williamson };
271c00d61d8SAlex Williamson 
27229d62771SThomas Huth VFIOQuirk *vfio_quirk_alloc(int nr_mem)
273bcf3c3d0SAlex Williamson {
274bcf3c3d0SAlex Williamson     VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
275c958c51dSAlex Williamson     QLIST_INIT(&quirk->ioeventfds);
276bcf3c3d0SAlex Williamson     quirk->mem = g_new0(MemoryRegion, nr_mem);
277bcf3c3d0SAlex Williamson     quirk->nr_mem = nr_mem;
278bcf3c3d0SAlex Williamson 
279bcf3c3d0SAlex Williamson     return quirk;
280bcf3c3d0SAlex Williamson }
281bcf3c3d0SAlex Williamson 
2822b1dbd0dSAlex Williamson static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
283c958c51dSAlex Williamson {
284c958c51dSAlex Williamson     QLIST_REMOVE(ioeventfd, next);
285c958c51dSAlex Williamson     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
286c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
2872b1dbd0dSAlex Williamson 
2882b1dbd0dSAlex Williamson     if (ioeventfd->vfio) {
2892b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
2902b1dbd0dSAlex Williamson 
2912b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
2922b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
2932b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
2942b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
2952b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
2962b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = -1;
2972b1dbd0dSAlex Williamson 
2982b1dbd0dSAlex Williamson         if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
2992b1dbd0dSAlex Williamson             error_report("Failed to remove vfio ioeventfd for %s+0x%"
3002b1dbd0dSAlex Williamson                          HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
3012b1dbd0dSAlex Williamson                          memory_region_name(ioeventfd->mr), ioeventfd->addr,
3022b1dbd0dSAlex Williamson                          ioeventfd->size, ioeventfd->data);
3032b1dbd0dSAlex Williamson         }
3042b1dbd0dSAlex Williamson     } else {
3052b1dbd0dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
3062b1dbd0dSAlex Williamson                             NULL, NULL, NULL);
3072b1dbd0dSAlex Williamson     }
3082b1dbd0dSAlex Williamson 
309c958c51dSAlex Williamson     event_notifier_cleanup(&ioeventfd->e);
310c958c51dSAlex Williamson     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
311c958c51dSAlex Williamson                               (uint64_t)ioeventfd->addr, ioeventfd->size,
312c958c51dSAlex Williamson                               ioeventfd->data);
313c958c51dSAlex Williamson     g_free(ioeventfd);
314c958c51dSAlex Williamson }
315c958c51dSAlex Williamson 
316c958c51dSAlex Williamson static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
317c958c51dSAlex Williamson {
318c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd, *tmp;
319c958c51dSAlex Williamson 
320c958c51dSAlex Williamson     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
321c958c51dSAlex Williamson         if (ioeventfd->dynamic) {
3222b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, ioeventfd);
323c958c51dSAlex Williamson         }
324c958c51dSAlex Williamson     }
325c958c51dSAlex Williamson }
326c958c51dSAlex Williamson 
327c958c51dSAlex Williamson static void vfio_ioeventfd_handler(void *opaque)
328c958c51dSAlex Williamson {
329c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd = opaque;
330c958c51dSAlex Williamson 
331c958c51dSAlex Williamson     if (event_notifier_test_and_clear(&ioeventfd->e)) {
332c958c51dSAlex Williamson         vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
333c958c51dSAlex Williamson                           ioeventfd->data, ioeventfd->size);
334c958c51dSAlex Williamson         trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
335c958c51dSAlex Williamson                                      (uint64_t)ioeventfd->addr, ioeventfd->size,
336c958c51dSAlex Williamson                                      ioeventfd->data);
337c958c51dSAlex Williamson     }
338c958c51dSAlex Williamson }
339c958c51dSAlex Williamson 
340c958c51dSAlex Williamson static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
341c958c51dSAlex Williamson                                           MemoryRegion *mr, hwaddr addr,
342c958c51dSAlex Williamson                                           unsigned size, uint64_t data,
343c958c51dSAlex Williamson                                           VFIORegion *region,
344c958c51dSAlex Williamson                                           hwaddr region_addr, bool dynamic)
345c958c51dSAlex Williamson {
346c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd;
347c958c51dSAlex Williamson 
348c958c51dSAlex Williamson     if (vdev->no_kvm_ioeventfd) {
349c958c51dSAlex Williamson         return NULL;
350c958c51dSAlex Williamson     }
351c958c51dSAlex Williamson 
352c958c51dSAlex Williamson     ioeventfd = g_malloc0(sizeof(*ioeventfd));
353c958c51dSAlex Williamson 
354c958c51dSAlex Williamson     if (event_notifier_init(&ioeventfd->e, 0)) {
355c958c51dSAlex Williamson         g_free(ioeventfd);
356c958c51dSAlex Williamson         return NULL;
357c958c51dSAlex Williamson     }
358c958c51dSAlex Williamson 
359c958c51dSAlex Williamson     /*
360c958c51dSAlex Williamson      * MemoryRegion and relative offset, plus additional ioeventfd setup
361c958c51dSAlex Williamson      * parameters for configuring and later tearing down KVM ioeventfd.
362c958c51dSAlex Williamson      */
363c958c51dSAlex Williamson     ioeventfd->mr = mr;
364c958c51dSAlex Williamson     ioeventfd->addr = addr;
365c958c51dSAlex Williamson     ioeventfd->size = size;
366c958c51dSAlex Williamson     ioeventfd->data = data;
367c958c51dSAlex Williamson     ioeventfd->dynamic = dynamic;
368c958c51dSAlex Williamson     /*
369c958c51dSAlex Williamson      * VFIORegion and relative offset for implementing the userspace
370c958c51dSAlex Williamson      * handler.  data & size fields shared for both uses.
371c958c51dSAlex Williamson      */
372c958c51dSAlex Williamson     ioeventfd->region = region;
373c958c51dSAlex Williamson     ioeventfd->region_addr = region_addr;
374c958c51dSAlex Williamson 
3752b1dbd0dSAlex Williamson     if (!vdev->no_vfio_ioeventfd) {
3762b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
3772b1dbd0dSAlex Williamson 
3782b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
3792b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3802b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3812b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3822b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3832b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
3842b1dbd0dSAlex Williamson 
3852b1dbd0dSAlex Williamson         ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
3862b1dbd0dSAlex Williamson                                  VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
3872b1dbd0dSAlex Williamson     }
3882b1dbd0dSAlex Williamson 
3892b1dbd0dSAlex Williamson     if (!ioeventfd->vfio) {
390c958c51dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
391c958c51dSAlex Williamson                             vfio_ioeventfd_handler, NULL, ioeventfd);
3922b1dbd0dSAlex Williamson     }
3932b1dbd0dSAlex Williamson 
394c958c51dSAlex Williamson     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
395c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
396c958c51dSAlex Williamson     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
3972b1dbd0dSAlex Williamson                               size, data, ioeventfd->vfio);
398c958c51dSAlex Williamson 
399c958c51dSAlex Williamson     return ioeventfd;
400c958c51dSAlex Williamson }
401c958c51dSAlex Williamson 
402c00d61d8SAlex Williamson static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
403c00d61d8SAlex Williamson {
404c00d61d8SAlex Williamson     VFIOQuirk *quirk;
405c00d61d8SAlex Williamson 
406c00d61d8SAlex Williamson     /*
407c00d61d8SAlex Williamson      * As long as the BAR is >= 256 bytes it will be aligned such that the
408c00d61d8SAlex Williamson      * lower byte is always zero.  Filter out anything else, if it exists.
409c00d61d8SAlex Williamson      */
410b946d286SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
411b946d286SAlex Williamson         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
412c00d61d8SAlex Williamson         return;
413c00d61d8SAlex Williamson     }
414c00d61d8SAlex Williamson 
415bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
416c00d61d8SAlex Williamson 
417b946d286SAlex Williamson     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
418c00d61d8SAlex Williamson                           "vfio-ati-3c3-quirk", 1);
4192d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
4208c4f2348SAlex Williamson                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
421c00d61d8SAlex Williamson 
4222d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
423c00d61d8SAlex Williamson                       quirk, next);
424c00d61d8SAlex Williamson 
425b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
426c00d61d8SAlex Williamson }
427c00d61d8SAlex Williamson 
428c00d61d8SAlex Williamson /*
4290e54f24aSAlex Williamson  * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
430c00d61d8SAlex Williamson  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
431c00d61d8SAlex Williamson  * the MMIO space directly, but a window to this space is provided through
432c00d61d8SAlex Williamson  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
433c00d61d8SAlex Williamson  * data register.  When the address is programmed to a range of 0x4000-0x4fff
434c00d61d8SAlex Williamson  * PCI configuration space is available.  Experimentation seems to indicate
4350e54f24aSAlex Williamson  * that read-only may be provided by hardware.
436c00d61d8SAlex Williamson  */
4370e54f24aSAlex Williamson static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
438c00d61d8SAlex Williamson {
439c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4400e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
441c00d61d8SAlex Williamson 
4420e54f24aSAlex Williamson     /* This windows doesn't seem to be used except by legacy VGA code */
4430e54f24aSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4444d3fc4fdSAlex Williamson         !vdev->vga || nr != 4) {
445c00d61d8SAlex Williamson         return;
446c00d61d8SAlex Williamson     }
447c00d61d8SAlex Williamson 
448bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
4490e54f24aSAlex Williamson     window = quirk->data = g_malloc0(sizeof(*window) +
4500e54f24aSAlex Williamson                                      sizeof(VFIOConfigWindowMatch));
4510e54f24aSAlex Williamson     window->vdev = vdev;
4520e54f24aSAlex Williamson     window->address_offset = 0;
4530e54f24aSAlex Williamson     window->data_offset = 4;
4540e54f24aSAlex Williamson     window->nr_matches = 1;
4550e54f24aSAlex Williamson     window->matches[0].match = 0x4000;
456f5793fd9SAlex Williamson     window->matches[0].mask = vdev->config_size - 1;
4570e54f24aSAlex Williamson     window->bar = nr;
4580e54f24aSAlex Williamson     window->addr_mem = &quirk->mem[0];
4590e54f24aSAlex Williamson     window->data_mem = &quirk->mem[1];
460c00d61d8SAlex Williamson 
4610e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
4620e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
4630e54f24aSAlex Williamson                           "vfio-ati-bar4-window-address-quirk", 4);
464db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4650e54f24aSAlex Williamson                                         window->address_offset,
4660e54f24aSAlex Williamson                                         window->addr_mem, 1);
4670e54f24aSAlex Williamson 
4680e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
4690e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
4700e54f24aSAlex Williamson                           "vfio-ati-bar4-window-data-quirk", 4);
471db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4720e54f24aSAlex Williamson                                         window->data_offset,
4730e54f24aSAlex Williamson                                         window->data_mem, 1);
474c00d61d8SAlex Williamson 
475c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
476c00d61d8SAlex Williamson 
4770e54f24aSAlex Williamson     trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
478c00d61d8SAlex Williamson }
479c00d61d8SAlex Williamson 
480c00d61d8SAlex Williamson /*
4810d38fb1cSAlex Williamson  * Trap the BAR2 MMIO mirror to config space as well.
482c00d61d8SAlex Williamson  */
4830d38fb1cSAlex Williamson static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
484c00d61d8SAlex Williamson {
485c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4860d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
487c00d61d8SAlex Williamson 
488c00d61d8SAlex Williamson     /* Only enable on newer devices where BAR2 is 64bit */
4890d38fb1cSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4904d3fc4fdSAlex Williamson         !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
491c00d61d8SAlex Williamson         return;
492c00d61d8SAlex Williamson     }
493c00d61d8SAlex Williamson 
494bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
4950d38fb1cSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror));
496bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
4970d38fb1cSAlex Williamson     mirror->vdev = vdev;
4980d38fb1cSAlex Williamson     mirror->offset = 0x4000;
4990d38fb1cSAlex Williamson     mirror->bar = nr;
500c00d61d8SAlex Williamson 
5010d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
5020d38fb1cSAlex Williamson                           &vfio_generic_mirror_quirk, mirror,
5030d38fb1cSAlex Williamson                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
504db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5050d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
506c00d61d8SAlex Williamson 
507c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
508c00d61d8SAlex Williamson 
5090d38fb1cSAlex Williamson     trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
510c00d61d8SAlex Williamson }
511c00d61d8SAlex Williamson 
512c00d61d8SAlex Williamson /*
513c00d61d8SAlex Williamson  * Older ATI/AMD cards like the X550 have a similar window to that above.
514c00d61d8SAlex Williamson  * I/O port BAR1 provides a window to a mirror of PCI config space located
515c00d61d8SAlex Williamson  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
516c00d61d8SAlex Williamson  * note it for future reference.
517c00d61d8SAlex Williamson  */
518c00d61d8SAlex Williamson 
519c00d61d8SAlex Williamson /*
520c00d61d8SAlex Williamson  * Nvidia has several different methods to get to config space, the
521c00d61d8SAlex Williamson  * nouveu project has several of these documented here:
522c00d61d8SAlex Williamson  * https://github.com/pathscale/envytools/tree/master/hwdocs
523c00d61d8SAlex Williamson  *
524c00d61d8SAlex Williamson  * The first quirk is actually not documented in envytools and is found
525c00d61d8SAlex Williamson  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
526c00d61d8SAlex Williamson  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
527c00d61d8SAlex Williamson  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
528c00d61d8SAlex Williamson  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
529c00d61d8SAlex Williamson  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
530c00d61d8SAlex Williamson  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
531c00d61d8SAlex Williamson  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
532c00d61d8SAlex Williamson  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
533c00d61d8SAlex Williamson  */
5346029a424SAlex Williamson typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
5356029a424SAlex Williamson static const char *nv3d0_states[] = { "NONE", "SELECT",
5366029a424SAlex Williamson                                       "WINDOW", "READ", "WRITE" };
5376029a424SAlex Williamson 
5386029a424SAlex Williamson typedef struct VFIONvidia3d0Quirk {
5396029a424SAlex Williamson     VFIOPCIDevice *vdev;
5406029a424SAlex Williamson     VFIONvidia3d0State state;
5416029a424SAlex Williamson     uint32_t offset;
5426029a424SAlex Williamson } VFIONvidia3d0Quirk;
5436029a424SAlex Williamson 
5446029a424SAlex Williamson static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
5456029a424SAlex Williamson                                            hwaddr addr, unsigned size)
5466029a424SAlex Williamson {
5476029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5486029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5496029a424SAlex Williamson 
5506029a424SAlex Williamson     quirk->state = NONE;
5516029a424SAlex Williamson 
5522d82f8a3SAlex Williamson     return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5536029a424SAlex Williamson                          addr + 0x14, size);
5546029a424SAlex Williamson }
5556029a424SAlex Williamson 
5566029a424SAlex Williamson static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
5576029a424SAlex Williamson                                         uint64_t data, unsigned size)
5586029a424SAlex Williamson {
5596029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5606029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5616029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
5626029a424SAlex Williamson 
5636029a424SAlex Williamson     quirk->state = NONE;
5646029a424SAlex Williamson 
5656029a424SAlex Williamson     switch (data) {
5666029a424SAlex Williamson     case 0x338:
5676029a424SAlex Williamson         if (old_state == NONE) {
5686029a424SAlex Williamson             quirk->state = SELECT;
5696029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5706029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5716029a424SAlex Williamson         }
5726029a424SAlex Williamson         break;
5736029a424SAlex Williamson     case 0x538:
5746029a424SAlex Williamson         if (old_state == WINDOW) {
5756029a424SAlex Williamson             quirk->state = READ;
5766029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5776029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5786029a424SAlex Williamson         }
5796029a424SAlex Williamson         break;
5806029a424SAlex Williamson     case 0x738:
5816029a424SAlex Williamson         if (old_state == WINDOW) {
5826029a424SAlex Williamson             quirk->state = WRITE;
5836029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5846029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5856029a424SAlex Williamson         }
5866029a424SAlex Williamson         break;
5876029a424SAlex Williamson     }
5886029a424SAlex Williamson 
5892d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5906029a424SAlex Williamson                    addr + 0x14, data, size);
5916029a424SAlex Williamson }
5926029a424SAlex Williamson 
5936029a424SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
5946029a424SAlex Williamson     .read = vfio_nvidia_3d4_quirk_read,
5956029a424SAlex Williamson     .write = vfio_nvidia_3d4_quirk_write,
5966029a424SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
597c00d61d8SAlex Williamson };
598c00d61d8SAlex Williamson 
599c00d61d8SAlex Williamson static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
600c00d61d8SAlex Williamson                                            hwaddr addr, unsigned size)
601c00d61d8SAlex Williamson {
6026029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
603c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6046029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
6052d82f8a3SAlex Williamson     uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6066029a424SAlex Williamson                                   addr + 0x10, size);
607c00d61d8SAlex Williamson 
6086029a424SAlex Williamson     quirk->state = NONE;
6096029a424SAlex Williamson 
6106029a424SAlex Williamson     if (old_state == READ &&
6116029a424SAlex Williamson         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6126029a424SAlex Williamson         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6136029a424SAlex Williamson 
6146029a424SAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, offset, size);
6156029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
6166029a424SAlex Williamson                                          offset, size, data);
617c00d61d8SAlex Williamson     }
618c00d61d8SAlex Williamson 
619c00d61d8SAlex Williamson     return data;
620c00d61d8SAlex Williamson }
621c00d61d8SAlex Williamson 
622c00d61d8SAlex Williamson static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
623c00d61d8SAlex Williamson                                         uint64_t data, unsigned size)
624c00d61d8SAlex Williamson {
6256029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
626c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6276029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
628c00d61d8SAlex Williamson 
6296029a424SAlex Williamson     quirk->state = NONE;
6306029a424SAlex Williamson 
6316029a424SAlex Williamson     if (old_state == SELECT) {
6326029a424SAlex Williamson         quirk->offset = (uint32_t)data;
6336029a424SAlex Williamson         quirk->state = WINDOW;
6346029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
6356029a424SAlex Williamson                                           nv3d0_states[quirk->state]);
6366029a424SAlex Williamson     } else if (old_state == WRITE) {
6376029a424SAlex Williamson         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6386029a424SAlex Williamson             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6396029a424SAlex Williamson 
6406029a424SAlex Williamson             vfio_pci_write_config(&vdev->pdev, offset, data, size);
6416029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
6426029a424SAlex Williamson                                               offset, data, size);
643c00d61d8SAlex Williamson             return;
644c00d61d8SAlex Williamson         }
645c00d61d8SAlex Williamson     }
646c00d61d8SAlex Williamson 
6472d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6486029a424SAlex Williamson                    addr + 0x10, data, size);
649c00d61d8SAlex Williamson }
650c00d61d8SAlex Williamson 
651c00d61d8SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
652c00d61d8SAlex Williamson     .read = vfio_nvidia_3d0_quirk_read,
653c00d61d8SAlex Williamson     .write = vfio_nvidia_3d0_quirk_write,
654c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
655c00d61d8SAlex Williamson };
656c00d61d8SAlex Williamson 
657c00d61d8SAlex Williamson static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
658c00d61d8SAlex Williamson {
659c00d61d8SAlex Williamson     VFIOQuirk *quirk;
6606029a424SAlex Williamson     VFIONvidia3d0Quirk *data;
661c00d61d8SAlex Williamson 
662db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
663db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
664c00d61d8SAlex Williamson         !vdev->bars[1].region.size) {
665c00d61d8SAlex Williamson         return;
666c00d61d8SAlex Williamson     }
667c00d61d8SAlex Williamson 
668bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
6696029a424SAlex Williamson     quirk->data = data = g_malloc0(sizeof(*data));
6706029a424SAlex Williamson     data->vdev = vdev;
671c00d61d8SAlex Williamson 
6726029a424SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
6736029a424SAlex Williamson                           data, "vfio-nvidia-3d4-quirk", 2);
6742d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6756029a424SAlex Williamson                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
6766029a424SAlex Williamson 
6776029a424SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
6786029a424SAlex Williamson                           data, "vfio-nvidia-3d0-quirk", 2);
6792d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6806029a424SAlex Williamson                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
681c00d61d8SAlex Williamson 
6822d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
683c00d61d8SAlex Williamson                       quirk, next);
684c00d61d8SAlex Williamson 
6856029a424SAlex Williamson     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
686c00d61d8SAlex Williamson }
687c00d61d8SAlex Williamson 
688c00d61d8SAlex Williamson /*
689c00d61d8SAlex Williamson  * The second quirk is documented in envytools.  The I/O port BAR5 is just
690c00d61d8SAlex Williamson  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
691c00d61d8SAlex Williamson  * again BAR0.  This backdoor is apparently a bit newer than the one above
692c00d61d8SAlex Williamson  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
693c00d61d8SAlex Williamson  * space, including extended space is available at the 4k @0x88000.
694c00d61d8SAlex Williamson  */
6950e54f24aSAlex Williamson typedef struct VFIONvidiaBAR5Quirk {
6960e54f24aSAlex Williamson     uint32_t master;
6970e54f24aSAlex Williamson     uint32_t enable;
6980e54f24aSAlex Williamson     MemoryRegion *addr_mem;
6990e54f24aSAlex Williamson     MemoryRegion *data_mem;
7000e54f24aSAlex Williamson     bool enabled;
7010e54f24aSAlex Williamson     VFIOConfigWindowQuirk window; /* last for match data */
7020e54f24aSAlex Williamson } VFIONvidiaBAR5Quirk;
703c00d61d8SAlex Williamson 
7040e54f24aSAlex Williamson static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
7050e54f24aSAlex Williamson {
7060e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7070e54f24aSAlex Williamson 
7080e54f24aSAlex Williamson     if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
7090e54f24aSAlex Williamson         return;
7100e54f24aSAlex Williamson     }
7110e54f24aSAlex Williamson 
7120e54f24aSAlex Williamson     bar5->enabled = !bar5->enabled;
7130e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
7140e54f24aSAlex Williamson                                        bar5->enabled ?  "Enable" : "Disable");
7150e54f24aSAlex Williamson     memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
7160e54f24aSAlex Williamson     memory_region_set_enabled(bar5->data_mem, bar5->enabled);
7170e54f24aSAlex Williamson }
7180e54f24aSAlex Williamson 
7190e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
7200e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
7210e54f24aSAlex Williamson {
7220e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7230e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7240e54f24aSAlex Williamson 
7250e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr, size);
7260e54f24aSAlex Williamson }
7270e54f24aSAlex Williamson 
7280e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
729c00d61d8SAlex Williamson                                                 uint64_t data, unsigned size)
730c00d61d8SAlex Williamson {
7310e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7320e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
733c00d61d8SAlex Williamson 
7340e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr, data, size);
7350e54f24aSAlex Williamson 
7360e54f24aSAlex Williamson     bar5->master = data;
7370e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
738c00d61d8SAlex Williamson }
739c00d61d8SAlex Williamson 
7400e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
7410e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_master_read,
7420e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_master_write,
743c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
744c00d61d8SAlex Williamson };
745c00d61d8SAlex Williamson 
7460e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
7470e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
748c00d61d8SAlex Williamson {
7490e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7500e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
751c00d61d8SAlex Williamson 
7520e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
7530e54f24aSAlex Williamson }
7540e54f24aSAlex Williamson 
7550e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
7560e54f24aSAlex Williamson                                                 uint64_t data, unsigned size)
7570e54f24aSAlex Williamson {
7580e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7590e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7600e54f24aSAlex Williamson 
7610e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
7620e54f24aSAlex Williamson 
7630e54f24aSAlex Williamson     bar5->enable = data;
7640e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
7650e54f24aSAlex Williamson }
7660e54f24aSAlex Williamson 
7670e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
7680e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_enable_read,
7690e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_enable_write,
7700e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
7710e54f24aSAlex Williamson };
7720e54f24aSAlex Williamson 
7730e54f24aSAlex Williamson static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
7740e54f24aSAlex Williamson {
7750e54f24aSAlex Williamson     VFIOQuirk *quirk;
7760e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5;
7770e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
7780e54f24aSAlex Williamson 
779db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
780db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
7818f419c5bSAlex Williamson         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
782c00d61d8SAlex Williamson         return;
783c00d61d8SAlex Williamson     }
784c00d61d8SAlex Williamson 
785bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(4);
7860e54f24aSAlex Williamson     bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
7870e54f24aSAlex Williamson                                    (sizeof(VFIOConfigWindowMatch) * 2));
7880e54f24aSAlex Williamson     window = &bar5->window;
789c00d61d8SAlex Williamson 
7900e54f24aSAlex Williamson     window->vdev = vdev;
7910e54f24aSAlex Williamson     window->address_offset = 0x8;
7920e54f24aSAlex Williamson     window->data_offset = 0xc;
7930e54f24aSAlex Williamson     window->nr_matches = 2;
7940e54f24aSAlex Williamson     window->matches[0].match = 0x1800;
7950e54f24aSAlex Williamson     window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
7960e54f24aSAlex Williamson     window->matches[1].match = 0x88000;
797f5793fd9SAlex Williamson     window->matches[1].mask = vdev->config_size - 1;
7980e54f24aSAlex Williamson     window->bar = nr;
7990e54f24aSAlex Williamson     window->addr_mem = bar5->addr_mem = &quirk->mem[0];
8000e54f24aSAlex Williamson     window->data_mem = bar5->data_mem = &quirk->mem[1];
8010e54f24aSAlex Williamson 
8020e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
8030e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
8040e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-address-quirk", 4);
805db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8060e54f24aSAlex Williamson                                         window->address_offset,
8070e54f24aSAlex Williamson                                         window->addr_mem, 1);
8080e54f24aSAlex Williamson     memory_region_set_enabled(window->addr_mem, false);
8090e54f24aSAlex Williamson 
8100e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
8110e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
8120e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-data-quirk", 4);
813db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8140e54f24aSAlex Williamson                                         window->data_offset,
8150e54f24aSAlex Williamson                                         window->data_mem, 1);
8160e54f24aSAlex Williamson     memory_region_set_enabled(window->data_mem, false);
8170e54f24aSAlex Williamson 
8180e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
8190e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_master, bar5,
8200e54f24aSAlex Williamson                           "vfio-nvidia-bar5-master-quirk", 4);
821db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8220e54f24aSAlex Williamson                                         0, &quirk->mem[2], 1);
8230e54f24aSAlex Williamson 
8240e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
8250e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_enable, bar5,
8260e54f24aSAlex Williamson                           "vfio-nvidia-bar5-enable-quirk", 4);
827db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8280e54f24aSAlex Williamson                                         4, &quirk->mem[3], 1);
829c00d61d8SAlex Williamson 
830c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
831c00d61d8SAlex Williamson 
8320e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
833c00d61d8SAlex Williamson }
834c00d61d8SAlex Williamson 
835c958c51dSAlex Williamson typedef struct LastDataSet {
836c958c51dSAlex Williamson     VFIOQuirk *quirk;
837c958c51dSAlex Williamson     hwaddr addr;
838c958c51dSAlex Williamson     uint64_t data;
839c958c51dSAlex Williamson     unsigned size;
840c958c51dSAlex Williamson     int hits;
841c958c51dSAlex Williamson     int added;
842c958c51dSAlex Williamson } LastDataSet;
843c958c51dSAlex Williamson 
844c958c51dSAlex Williamson #define MAX_DYN_IOEVENTFD 10
845c958c51dSAlex Williamson #define HITS_FOR_IOEVENTFD 10
846c958c51dSAlex Williamson 
8470d38fb1cSAlex Williamson /*
8480d38fb1cSAlex Williamson  * Finally, BAR0 itself.  We want to redirect any accesses to either
8490d38fb1cSAlex Williamson  * 0x1800 or 0x88000 through the PCI config space access functions.
8500d38fb1cSAlex Williamson  */
8510d38fb1cSAlex Williamson static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
852c00d61d8SAlex Williamson                                            uint64_t data, unsigned size)
853c00d61d8SAlex Williamson {
8540d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
8550d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
856c00d61d8SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
857c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
858c00d61d8SAlex Williamson 
8590d38fb1cSAlex Williamson     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
860c00d61d8SAlex Williamson 
861c00d61d8SAlex Williamson     /*
862c00d61d8SAlex Williamson      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
863c00d61d8SAlex Williamson      * MSI capability ID register.  Both the ID and next register are
864c00d61d8SAlex Williamson      * read-only, so we allow writes covering either of those to real hw.
865c00d61d8SAlex Williamson      */
866c00d61d8SAlex Williamson     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
867c00d61d8SAlex Williamson         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
8680d38fb1cSAlex Williamson         vfio_region_write(&vdev->bars[mirror->bar].region,
8690d38fb1cSAlex Williamson                           addr + mirror->offset, data, size);
8700d38fb1cSAlex Williamson         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
871c00d61d8SAlex Williamson     }
872c958c51dSAlex Williamson 
873c958c51dSAlex Williamson     /*
874c958c51dSAlex Williamson      * Automatically add an ioeventfd to handle any repeated write with the
875c958c51dSAlex Williamson      * same data and size above the standard PCI config space header.  This is
876c958c51dSAlex Williamson      * primarily expected to accelerate the MSI-ACK behavior, such as noted
877c958c51dSAlex Williamson      * above.  Current hardware/drivers should trigger an ioeventfd at config
878c958c51dSAlex Williamson      * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
879c958c51dSAlex Williamson      *
880c958c51dSAlex Williamson      * The criteria of 10 successive hits is arbitrary but reliably adds the
881c958c51dSAlex Williamson      * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
882c958c51dSAlex Williamson      * the remaining ones have a greater chance of being seen successively.
883c958c51dSAlex Williamson      * To avoid the pathological case of burning up all of QEMU's open file
884c958c51dSAlex Williamson      * handles, arbitrarily limit this algorithm from adding no more than 10
885c958c51dSAlex Williamson      * ioeventfds, print an error if we would have added an 11th, and then
886c958c51dSAlex Williamson      * stop counting.
887c958c51dSAlex Williamson      */
888c958c51dSAlex Williamson     if (!vdev->no_kvm_ioeventfd &&
889c958c51dSAlex Williamson         addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
890c958c51dSAlex Williamson         if (addr != last->addr || data != last->data || size != last->size) {
891c958c51dSAlex Williamson             last->addr = addr;
892c958c51dSAlex Williamson             last->data = data;
893c958c51dSAlex Williamson             last->size = size;
894c958c51dSAlex Williamson             last->hits = 1;
895c958c51dSAlex Williamson         } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
896c958c51dSAlex Williamson             if (last->added < MAX_DYN_IOEVENTFD) {
897c958c51dSAlex Williamson                 VFIOIOEventFD *ioeventfd;
898c958c51dSAlex Williamson                 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
899c958c51dSAlex Williamson                                         data, &vdev->bars[mirror->bar].region,
900c958c51dSAlex Williamson                                         mirror->offset + addr, true);
901c958c51dSAlex Williamson                 if (ioeventfd) {
902c958c51dSAlex Williamson                     VFIOQuirk *quirk = last->quirk;
903c958c51dSAlex Williamson 
904c958c51dSAlex Williamson                     QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
905c958c51dSAlex Williamson                     last->added++;
906c958c51dSAlex Williamson                 }
907c958c51dSAlex Williamson             } else {
908c958c51dSAlex Williamson                 last->added++;
909c958c51dSAlex Williamson                 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
910c958c51dSAlex Williamson                             "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
911c958c51dSAlex Williamson                             "size %u", vdev->vbasedev.name, addr, data, size);
912c958c51dSAlex Williamson             }
913c958c51dSAlex Williamson         }
914c958c51dSAlex Williamson     }
915c00d61d8SAlex Williamson }
916c00d61d8SAlex Williamson 
9170d38fb1cSAlex Williamson static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
9180d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
9190d38fb1cSAlex Williamson     .write = vfio_nvidia_quirk_mirror_write,
920c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
921c00d61d8SAlex Williamson };
922c00d61d8SAlex Williamson 
923c958c51dSAlex Williamson static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
924c958c51dSAlex Williamson {
925c958c51dSAlex Williamson     VFIOConfigMirrorQuirk *mirror = quirk->data;
926c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
927c958c51dSAlex Williamson 
928c958c51dSAlex Williamson     last->addr = last->data = last->size = last->hits = last->added = 0;
929c958c51dSAlex Williamson 
930c958c51dSAlex Williamson     vfio_drop_dynamic_eventfds(vdev, quirk);
931c958c51dSAlex Williamson }
932c958c51dSAlex Williamson 
9330d38fb1cSAlex Williamson static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
934c00d61d8SAlex Williamson {
935c00d61d8SAlex Williamson     VFIOQuirk *quirk;
9360d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
937c958c51dSAlex Williamson     LastDataSet *last;
938c00d61d8SAlex Williamson 
939db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
940db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
9410d38fb1cSAlex Williamson         !vfio_is_vga(vdev) || nr != 0) {
942c00d61d8SAlex Williamson         return;
943c00d61d8SAlex Williamson     }
944c00d61d8SAlex Williamson 
945bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
946c958c51dSAlex Williamson     quirk->reset = vfio_nvidia_bar0_quirk_reset;
947c958c51dSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
948bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
9490d38fb1cSAlex Williamson     mirror->vdev = vdev;
9500d38fb1cSAlex Williamson     mirror->offset = 0x88000;
9510d38fb1cSAlex Williamson     mirror->bar = nr;
952c958c51dSAlex Williamson     last = (LastDataSet *)&mirror->data;
953c958c51dSAlex Williamson     last->quirk = quirk;
954c00d61d8SAlex Williamson 
9550d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
9560d38fb1cSAlex Williamson                           &vfio_nvidia_mirror_quirk, mirror,
9570d38fb1cSAlex Williamson                           "vfio-nvidia-bar0-88000-mirror-quirk",
958f5793fd9SAlex Williamson                           vdev->config_size);
959db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9600d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
961c00d61d8SAlex Williamson 
962c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
963c00d61d8SAlex Williamson 
9640d38fb1cSAlex Williamson     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
9654d3fc4fdSAlex Williamson     if (vdev->vga) {
966bcf3c3d0SAlex Williamson         quirk = vfio_quirk_alloc(1);
967c958c51dSAlex Williamson         quirk->reset = vfio_nvidia_bar0_quirk_reset;
968c958c51dSAlex Williamson         mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
969bcf3c3d0SAlex Williamson         mirror->mem = quirk->mem;
9700d38fb1cSAlex Williamson         mirror->vdev = vdev;
9710d38fb1cSAlex Williamson         mirror->offset = 0x1800;
9720d38fb1cSAlex Williamson         mirror->bar = nr;
973c958c51dSAlex Williamson         last = (LastDataSet *)&mirror->data;
974c958c51dSAlex Williamson         last->quirk = quirk;
975c00d61d8SAlex Williamson 
9760d38fb1cSAlex Williamson         memory_region_init_io(mirror->mem, OBJECT(vdev),
9770d38fb1cSAlex Williamson                               &vfio_nvidia_mirror_quirk, mirror,
9780d38fb1cSAlex Williamson                               "vfio-nvidia-bar0-1800-mirror-quirk",
9790d38fb1cSAlex Williamson                               PCI_CONFIG_SPACE_SIZE);
980db0da029SAlex Williamson         memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9810d38fb1cSAlex Williamson                                             mirror->offset, mirror->mem, 1);
982c00d61d8SAlex Williamson 
983c00d61d8SAlex Williamson         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
9840d38fb1cSAlex Williamson     }
985c00d61d8SAlex Williamson 
9860d38fb1cSAlex Williamson     trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
987c00d61d8SAlex Williamson }
988c00d61d8SAlex Williamson 
989c00d61d8SAlex Williamson /*
990c00d61d8SAlex Williamson  * TODO - Some Nvidia devices provide config access to their companion HDA
991c00d61d8SAlex Williamson  * device and even to their parent bridge via these config space mirrors.
992c00d61d8SAlex Williamson  * Add quirks for those regions.
993c00d61d8SAlex Williamson  */
994c00d61d8SAlex Williamson 
995c00d61d8SAlex Williamson #define PCI_VENDOR_ID_REALTEK 0x10ec
996c00d61d8SAlex Williamson 
997c00d61d8SAlex Williamson /*
998c00d61d8SAlex Williamson  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
999c00d61d8SAlex Williamson  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
1000c00d61d8SAlex Williamson  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1001c00d61d8SAlex Williamson  * when the "type" portion of the address register is set to 0x1.  This appears
1002c00d61d8SAlex Williamson  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1003c00d61d8SAlex Williamson  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1004c00d61d8SAlex Williamson  * ignore because the MSI-X table should always be accessed as a dword (full
1005c00d61d8SAlex Williamson  * mask).  Bits 0:11 is offset within the type.
1006c00d61d8SAlex Williamson  *
1007c00d61d8SAlex Williamson  * Example trace:
1008c00d61d8SAlex Williamson  *
1009c00d61d8SAlex Williamson  * Read from MSI-X table offset 0
1010c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1011c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1012c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1013c00d61d8SAlex Williamson  *
1014c00d61d8SAlex Williamson  * Write 0xfee00000 to MSI-X table offset 0
1015c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1016c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1017c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1018c00d61d8SAlex Williamson  */
1019954258a5SAlex Williamson typedef struct VFIOrtl8168Quirk {
1020954258a5SAlex Williamson     VFIOPCIDevice *vdev;
1021954258a5SAlex Williamson     uint32_t addr;
1022954258a5SAlex Williamson     uint32_t data;
1023954258a5SAlex Williamson     bool enabled;
1024954258a5SAlex Williamson } VFIOrtl8168Quirk;
1025954258a5SAlex Williamson 
1026954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1027c00d61d8SAlex Williamson                                                 hwaddr addr, unsigned size)
1028c00d61d8SAlex Williamson {
1029954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1030954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1031954258a5SAlex Williamson     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1032c00d61d8SAlex Williamson 
1033954258a5SAlex Williamson     if (rtl->enabled) {
1034954258a5SAlex Williamson         data = rtl->addr ^ 0x80000000U; /* latch/complete */
1035954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1036c00d61d8SAlex Williamson     }
1037c00d61d8SAlex Williamson 
1038954258a5SAlex Williamson     return data;
1039c00d61d8SAlex Williamson }
1040c00d61d8SAlex Williamson 
1041954258a5SAlex Williamson static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1042c00d61d8SAlex Williamson                                              uint64_t data, unsigned size)
1043c00d61d8SAlex Williamson {
1044954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1045954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1046c00d61d8SAlex Williamson 
1047954258a5SAlex Williamson     rtl->enabled = false;
1048954258a5SAlex Williamson 
1049c00d61d8SAlex Williamson     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1050954258a5SAlex Williamson         rtl->enabled = true;
1051954258a5SAlex Williamson         rtl->addr = (uint32_t)data;
1052c00d61d8SAlex Williamson 
1053c00d61d8SAlex Williamson         if (data & 0x80000000U) { /* Do write */
1054c00d61d8SAlex Williamson             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1055c00d61d8SAlex Williamson                 hwaddr offset = data & 0xfff;
1056954258a5SAlex Williamson                 uint64_t val = rtl->data;
1057c00d61d8SAlex Williamson 
1058954258a5SAlex Williamson                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1059c00d61d8SAlex Williamson                                                     (uint16_t)offset, val);
1060c00d61d8SAlex Williamson 
1061c00d61d8SAlex Williamson                 /* Write to the proper guest MSI-X table instead */
1062c00d61d8SAlex Williamson                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1063d5d680caSTony Nguyen                                              offset, val,
1064d5d680caSTony Nguyen                                              size_memop(size) | MO_LE,
1065c00d61d8SAlex Williamson                                              MEMTXATTRS_UNSPECIFIED);
1066c00d61d8SAlex Williamson             }
1067c00d61d8SAlex Williamson             return; /* Do not write guest MSI-X data to hardware */
1068c00d61d8SAlex Williamson         }
1069c00d61d8SAlex Williamson     }
1070c00d61d8SAlex Williamson 
1071954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1072c00d61d8SAlex Williamson }
1073c00d61d8SAlex Williamson 
1074954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_address_quirk = {
1075954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_address_read,
1076954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_address_write,
1077c00d61d8SAlex Williamson     .valid = {
1078c00d61d8SAlex Williamson         .min_access_size = 4,
1079c00d61d8SAlex Williamson         .max_access_size = 4,
1080c00d61d8SAlex Williamson         .unaligned = false,
1081c00d61d8SAlex Williamson     },
1082c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1083c00d61d8SAlex Williamson };
1084c00d61d8SAlex Williamson 
1085954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1086954258a5SAlex Williamson                                              hwaddr addr, unsigned size)
1087c00d61d8SAlex Williamson {
1088954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1089954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
109031e6a7b1SThorsten Kohfeldt     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1091c00d61d8SAlex Williamson 
1092954258a5SAlex Williamson     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1093954258a5SAlex Williamson         hwaddr offset = rtl->addr & 0xfff;
1094954258a5SAlex Williamson         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1095d5d680caSTony Nguyen                                     &data, size_memop(size) | MO_LE,
1096475fbf0aSTony Nguyen                                     MEMTXATTRS_UNSPECIFIED);
1097954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1098954258a5SAlex Williamson     }
1099954258a5SAlex Williamson 
1100954258a5SAlex Williamson     return data;
1101954258a5SAlex Williamson }
1102954258a5SAlex Williamson 
1103954258a5SAlex Williamson static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1104954258a5SAlex Williamson                                           uint64_t data, unsigned size)
1105954258a5SAlex Williamson {
1106954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1107954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1108954258a5SAlex Williamson 
1109954258a5SAlex Williamson     rtl->data = (uint32_t)data;
1110954258a5SAlex Williamson 
1111954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1112954258a5SAlex Williamson }
1113954258a5SAlex Williamson 
1114954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_data_quirk = {
1115954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_data_read,
1116954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_data_write,
1117954258a5SAlex Williamson     .valid = {
1118954258a5SAlex Williamson         .min_access_size = 4,
1119954258a5SAlex Williamson         .max_access_size = 4,
1120954258a5SAlex Williamson         .unaligned = false,
1121954258a5SAlex Williamson     },
1122954258a5SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1123954258a5SAlex Williamson };
1124954258a5SAlex Williamson 
1125954258a5SAlex Williamson static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1126954258a5SAlex Williamson {
1127954258a5SAlex Williamson     VFIOQuirk *quirk;
1128954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl;
1129954258a5SAlex Williamson 
1130954258a5SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1131c00d61d8SAlex Williamson         return;
1132c00d61d8SAlex Williamson     }
1133c00d61d8SAlex Williamson 
1134bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
1135954258a5SAlex Williamson     quirk->data = rtl = g_malloc0(sizeof(*rtl));
1136954258a5SAlex Williamson     rtl->vdev = vdev;
1137c00d61d8SAlex Williamson 
1138954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1139954258a5SAlex Williamson                           &vfio_rtl_address_quirk, rtl,
1140954258a5SAlex Williamson                           "vfio-rtl8168-window-address-quirk", 4);
1141db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1142954258a5SAlex Williamson                                         0x74, &quirk->mem[0], 1);
1143954258a5SAlex Williamson 
1144954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1145954258a5SAlex Williamson                           &vfio_rtl_data_quirk, rtl,
1146954258a5SAlex Williamson                           "vfio-rtl8168-window-data-quirk", 4);
1147db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1148954258a5SAlex Williamson                                         0x70, &quirk->mem[1], 1);
1149c00d61d8SAlex Williamson 
1150c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1151c00d61d8SAlex Williamson 
1152954258a5SAlex Williamson     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1153c00d61d8SAlex Williamson }
1154c00d61d8SAlex Williamson 
1155c4c45e94SAlex Williamson #define IGD_ASLS 0xfc /* ASL Storage Register */
1156c4c45e94SAlex Williamson 
1157c4c45e94SAlex Williamson /*
1158c4c45e94SAlex Williamson  * The OpRegion includes the Video BIOS Table, which seems important for
1159c4c45e94SAlex Williamson  * telling the driver what sort of outputs it has.  Without this, the device
1160c4c45e94SAlex Williamson  * may work in the guest, but we may not get output.  This also requires BIOS
1161c4c45e94SAlex Williamson  * support to reserve and populate a section of guest memory sufficient for
1162c4c45e94SAlex Williamson  * the table and to write the base address of that memory to the ASLS register
1163c4c45e94SAlex Williamson  * of the IGD device.
1164c4c45e94SAlex Williamson  */
11656ced0bbaSAlex Williamson int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
11667237011dSEric Auger                                struct vfio_region_info *info, Error **errp)
1167c4c45e94SAlex Williamson {
1168c4c45e94SAlex Williamson     int ret;
1169c4c45e94SAlex Williamson 
1170c4c45e94SAlex Williamson     vdev->igd_opregion = g_malloc0(info->size);
1171c4c45e94SAlex Williamson     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1172c4c45e94SAlex Williamson                 info->size, info->offset);
1173c4c45e94SAlex Williamson     if (ret != info->size) {
11747237011dSEric Auger         error_setg(errp, "failed to read IGD OpRegion");
1175c4c45e94SAlex Williamson         g_free(vdev->igd_opregion);
1176c4c45e94SAlex Williamson         vdev->igd_opregion = NULL;
1177c4c45e94SAlex Williamson         return -EINVAL;
1178c4c45e94SAlex Williamson     }
1179c4c45e94SAlex Williamson 
1180c4c45e94SAlex Williamson     /*
1181c4c45e94SAlex Williamson      * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1182c4c45e94SAlex Williamson      * allocate 32bit reserved memory for, copy these contents into, and write
1183c4c45e94SAlex Williamson      * the reserved memory base address to the device ASLS register at 0xFC.
1184c4c45e94SAlex Williamson      * Alignment of this reserved region seems flexible, but using a 4k page
1185c4c45e94SAlex Williamson      * alignment seems to work well.  This interface assumes a single IGD
1186c4c45e94SAlex Williamson      * device, which may be at VM address 00:02.0 in legacy mode or another
1187c4c45e94SAlex Williamson      * address in UPT mode.
1188c4c45e94SAlex Williamson      *
1189c4c45e94SAlex Williamson      * NB, there may be future use cases discovered where the VM should have
1190c4c45e94SAlex Williamson      * direct interaction with the host OpRegion, in which case the write to
1191c4c45e94SAlex Williamson      * the ASLS register would trigger MemoryRegion setup to enable that.
1192c4c45e94SAlex Williamson      */
1193c4c45e94SAlex Williamson     fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1194c4c45e94SAlex Williamson                     vdev->igd_opregion, info->size);
1195c4c45e94SAlex Williamson 
1196c4c45e94SAlex Williamson     trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1197c4c45e94SAlex Williamson 
1198c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1199c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1200c4c45e94SAlex Williamson     pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1201c4c45e94SAlex Williamson 
1202c4c45e94SAlex Williamson     return 0;
1203c4c45e94SAlex Williamson }
1204c4c45e94SAlex Williamson 
1205c4c45e94SAlex Williamson /*
1206c00d61d8SAlex Williamson  * Common quirk probe entry points.
1207c00d61d8SAlex Williamson  */
1208c00d61d8SAlex Williamson void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1209c00d61d8SAlex Williamson {
1210c00d61d8SAlex Williamson     vfio_vga_probe_ati_3c3_quirk(vdev);
1211c00d61d8SAlex Williamson     vfio_vga_probe_nvidia_3d0_quirk(vdev);
1212c00d61d8SAlex Williamson }
1213c00d61d8SAlex Williamson 
12142d82f8a3SAlex Williamson void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1215c00d61d8SAlex Williamson {
1216c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12178c4f2348SAlex Williamson     int i, j;
1218c00d61d8SAlex Williamson 
12192d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12202d82f8a3SAlex Williamson         QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
12218c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12222d82f8a3SAlex Williamson                 memory_region_del_subregion(&vdev->vga->region[i].mem,
12238c4f2348SAlex Williamson                                             &quirk->mem[j]);
12248c4f2348SAlex Williamson             }
1225c00d61d8SAlex Williamson         }
1226c00d61d8SAlex Williamson     }
1227c00d61d8SAlex Williamson }
1228c00d61d8SAlex Williamson 
12292d82f8a3SAlex Williamson void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1230c00d61d8SAlex Williamson {
12318c4f2348SAlex Williamson     int i, j;
1232c00d61d8SAlex Williamson 
12332d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12342d82f8a3SAlex Williamson         while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
12352d82f8a3SAlex Williamson             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1236c00d61d8SAlex Williamson             QLIST_REMOVE(quirk, next);
12378c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12388c4f2348SAlex Williamson                 object_unparent(OBJECT(&quirk->mem[j]));
12398c4f2348SAlex Williamson             }
12408c4f2348SAlex Williamson             g_free(quirk->mem);
12418c4f2348SAlex Williamson             g_free(quirk->data);
1242c00d61d8SAlex Williamson             g_free(quirk);
1243c00d61d8SAlex Williamson         }
1244c00d61d8SAlex Williamson     }
1245c00d61d8SAlex Williamson }
1246c00d61d8SAlex Williamson 
1247c00d61d8SAlex Williamson void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1248c00d61d8SAlex Williamson {
12490e54f24aSAlex Williamson     vfio_probe_ati_bar4_quirk(vdev, nr);
12500d38fb1cSAlex Williamson     vfio_probe_ati_bar2_quirk(vdev, nr);
12510e54f24aSAlex Williamson     vfio_probe_nvidia_bar5_quirk(vdev, nr);
12520d38fb1cSAlex Williamson     vfio_probe_nvidia_bar0_quirk(vdev, nr);
1253954258a5SAlex Williamson     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
125429d62771SThomas Huth #ifdef CONFIG_VFIO_IGD
1255c4c45e94SAlex Williamson     vfio_probe_igd_bar4_quirk(vdev, nr);
125629d62771SThomas Huth #endif
1257c00d61d8SAlex Williamson }
1258c00d61d8SAlex Williamson 
12592d82f8a3SAlex Williamson void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1260c00d61d8SAlex Williamson {
1261c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
1262c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12638c4f2348SAlex Williamson     int i;
1264c00d61d8SAlex Williamson 
1265c00d61d8SAlex Williamson     QLIST_FOREACH(quirk, &bar->quirks, next) {
1266c958c51dSAlex Williamson         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
12672b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1268c958c51dSAlex Williamson         }
1269c958c51dSAlex Williamson 
12708c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
1271db0da029SAlex Williamson             memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
12728c4f2348SAlex Williamson         }
1273c00d61d8SAlex Williamson     }
1274c00d61d8SAlex Williamson }
1275c00d61d8SAlex Williamson 
12762d82f8a3SAlex Williamson void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1277c00d61d8SAlex Williamson {
1278c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
12798c4f2348SAlex Williamson     int i;
1280c00d61d8SAlex Williamson 
1281c00d61d8SAlex Williamson     while (!QLIST_EMPTY(&bar->quirks)) {
1282c00d61d8SAlex Williamson         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1283c00d61d8SAlex Williamson         QLIST_REMOVE(quirk, next);
12848c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
12858c4f2348SAlex Williamson             object_unparent(OBJECT(&quirk->mem[i]));
12868c4f2348SAlex Williamson         }
12878c4f2348SAlex Williamson         g_free(quirk->mem);
12888c4f2348SAlex Williamson         g_free(quirk->data);
1289c00d61d8SAlex Williamson         g_free(quirk);
1290c00d61d8SAlex Williamson     }
1291c00d61d8SAlex Williamson }
1292c9c50009SAlex Williamson 
1293c9c50009SAlex Williamson /*
1294c9c50009SAlex Williamson  * Reset quirks
1295c9c50009SAlex Williamson  */
1296469d02deSAlex Williamson void vfio_quirk_reset(VFIOPCIDevice *vdev)
1297469d02deSAlex Williamson {
1298469d02deSAlex Williamson     int i;
1299469d02deSAlex Williamson 
1300469d02deSAlex Williamson     for (i = 0; i < PCI_ROM_SLOT; i++) {
1301469d02deSAlex Williamson         VFIOQuirk *quirk;
1302469d02deSAlex Williamson         VFIOBAR *bar = &vdev->bars[i];
1303469d02deSAlex Williamson 
1304469d02deSAlex Williamson         QLIST_FOREACH(quirk, &bar->quirks, next) {
1305469d02deSAlex Williamson             if (quirk->reset) {
1306469d02deSAlex Williamson                 quirk->reset(vdev, quirk);
1307469d02deSAlex Williamson             }
1308469d02deSAlex Williamson         }
1309469d02deSAlex Williamson     }
1310469d02deSAlex Williamson }
1311c9c50009SAlex Williamson 
1312c9c50009SAlex Williamson /*
1313c9c50009SAlex Williamson  * AMD Radeon PCI config reset, based on Linux:
1314c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1315c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1316c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1317c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1318c9c50009SAlex Williamson  * IDs: include/drm/drm_pciids.h
1319c9c50009SAlex Williamson  * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1320c9c50009SAlex Williamson  *
1321c9c50009SAlex Williamson  * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1322c9c50009SAlex Williamson  * hardware that should be fixed on future ASICs.  The symptom of this is that
1323c9c50009SAlex Williamson  * once the accerlated driver loads, Windows guests will bsod on subsequent
1324c9c50009SAlex Williamson  * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1325c9c50009SAlex Williamson  * work around this, we do an AMD specific PCI config reset, followed by an SMC
1326c9c50009SAlex Williamson  * reset.  The PCI config reset only works if SMC firmware is running, so we
1327c9c50009SAlex Williamson  * have a dependency on the state of the device as to whether this reset will
1328c9c50009SAlex Williamson  * be effective.  There are still cases where we won't be able to kick the
1329c9c50009SAlex Williamson  * device into working, but this greatly improves the usability overall.  The
1330c9c50009SAlex Williamson  * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1331c9c50009SAlex Williamson  * poking is largely ASIC specific.
1332c9c50009SAlex Williamson  */
1333c9c50009SAlex Williamson static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1334c9c50009SAlex Williamson {
1335c9c50009SAlex Williamson     uint32_t clk, pc_c;
1336c9c50009SAlex Williamson 
1337c9c50009SAlex Williamson     /*
1338c9c50009SAlex Williamson      * Registers 200h and 204h are index and data registers for accessing
1339c9c50009SAlex Williamson      * indirect configuration registers within the device.
1340c9c50009SAlex Williamson      */
1341c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1342c9c50009SAlex Williamson     clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1343c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1344c9c50009SAlex Williamson     pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1345c9c50009SAlex Williamson 
1346c9c50009SAlex Williamson     return (!(clk & 1) && (0x20100 <= pc_c));
1347c9c50009SAlex Williamson }
1348c9c50009SAlex Williamson 
1349c9c50009SAlex Williamson /*
1350c9c50009SAlex Williamson  * The scope of a config reset is controlled by a mode bit in the misc register
1351c9c50009SAlex Williamson  * and a fuse, exposed as a bit in another register.  The fuse is the default
1352c9c50009SAlex Williamson  * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1353c9c50009SAlex Williamson  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1354c9c50009SAlex Williamson  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1355c9c50009SAlex Williamson  * to flip the value of the bit in the misc register.
1356c9c50009SAlex Williamson  */
1357c9c50009SAlex Williamson static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1358c9c50009SAlex Williamson {
1359c9c50009SAlex Williamson     uint32_t misc, fuse;
1360c9c50009SAlex Williamson     bool a, b;
1361c9c50009SAlex Williamson 
1362c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1363c9c50009SAlex Williamson     fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1364c9c50009SAlex Williamson     b = fuse & 64;
1365c9c50009SAlex Williamson 
1366c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1367c9c50009SAlex Williamson     misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1368c9c50009SAlex Williamson     a = misc & 2;
1369c9c50009SAlex Williamson 
1370c9c50009SAlex Williamson     if (a == b) {
1371c9c50009SAlex Williamson         vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1372c9c50009SAlex Williamson         vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1373c9c50009SAlex Williamson     }
1374c9c50009SAlex Williamson }
1375c9c50009SAlex Williamson 
1376c9c50009SAlex Williamson static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1377c9c50009SAlex Williamson {
1378c9c50009SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1379c9c50009SAlex Williamson     int i, ret = 0;
1380c9c50009SAlex Williamson     uint32_t data;
1381c9c50009SAlex Williamson 
1382c9c50009SAlex Williamson     /* Defer to a kernel implemented reset */
1383c9c50009SAlex Williamson     if (vdev->vbasedev.reset_works) {
1384c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1385c9c50009SAlex Williamson         return -ENODEV;
1386c9c50009SAlex Williamson     }
1387c9c50009SAlex Williamson 
1388c9c50009SAlex Williamson     /* Enable only memory BAR access */
1389c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1390c9c50009SAlex Williamson 
1391c9c50009SAlex Williamson     /* Reset only works if SMC firmware is loaded and running */
1392c9c50009SAlex Williamson     if (!vfio_radeon_smc_is_running(vdev)) {
1393c9c50009SAlex Williamson         ret = -EINVAL;
1394c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1395c9c50009SAlex Williamson         goto out;
1396c9c50009SAlex Williamson     }
1397c9c50009SAlex Williamson 
1398c9c50009SAlex Williamson     /* Make sure only the GFX function is reset */
1399c9c50009SAlex Williamson     vfio_radeon_set_gfx_only_reset(vdev);
1400c9c50009SAlex Williamson 
1401c9c50009SAlex Williamson     /* AMD PCI config reset */
1402c9c50009SAlex Williamson     vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1403c9c50009SAlex Williamson     usleep(100);
1404c9c50009SAlex Williamson 
1405c9c50009SAlex Williamson     /* Read back the memory size to make sure we're out of reset */
1406c9c50009SAlex Williamson     for (i = 0; i < 100000; i++) {
1407c9c50009SAlex Williamson         if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1408c9c50009SAlex Williamson             goto reset_smc;
1409c9c50009SAlex Williamson         }
1410c9c50009SAlex Williamson         usleep(1);
1411c9c50009SAlex Williamson     }
1412c9c50009SAlex Williamson 
1413c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1414c9c50009SAlex Williamson 
1415c9c50009SAlex Williamson reset_smc:
1416c9c50009SAlex Williamson     /* Reset SMC */
1417c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1418c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1419c9c50009SAlex Williamson     data |= 1;
1420c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1421c9c50009SAlex Williamson 
1422c9c50009SAlex Williamson     /* Disable SMC clock */
1423c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1424c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1425c9c50009SAlex Williamson     data |= 1;
1426c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1427c9c50009SAlex Williamson 
1428c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1429c9c50009SAlex Williamson 
1430c9c50009SAlex Williamson out:
1431c9c50009SAlex Williamson     /* Restore PCI command register */
1432c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1433c9c50009SAlex Williamson 
1434c9c50009SAlex Williamson     return ret;
1435c9c50009SAlex Williamson }
1436c9c50009SAlex Williamson 
1437c9c50009SAlex Williamson void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1438c9c50009SAlex Williamson {
1439ff635e37SAlex Williamson     switch (vdev->vendor_id) {
1440c9c50009SAlex Williamson     case 0x1002:
1441ff635e37SAlex Williamson         switch (vdev->device_id) {
1442c9c50009SAlex Williamson         /* Bonaire */
1443c9c50009SAlex Williamson         case 0x6649: /* Bonaire [FirePro W5100] */
1444c9c50009SAlex Williamson         case 0x6650:
1445c9c50009SAlex Williamson         case 0x6651:
1446c9c50009SAlex Williamson         case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1447c9c50009SAlex Williamson         case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1448c9c50009SAlex Williamson         case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1449c9c50009SAlex Williamson         /* Hawaii */
1450c9c50009SAlex Williamson         case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1451c9c50009SAlex Williamson         case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1452c9c50009SAlex Williamson         case 0x67A2:
1453c9c50009SAlex Williamson         case 0x67A8:
1454c9c50009SAlex Williamson         case 0x67A9:
1455c9c50009SAlex Williamson         case 0x67AA:
1456c9c50009SAlex Williamson         case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1457c9c50009SAlex Williamson         case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1458c9c50009SAlex Williamson         case 0x67B8:
1459c9c50009SAlex Williamson         case 0x67B9:
1460c9c50009SAlex Williamson         case 0x67BA:
1461c9c50009SAlex Williamson         case 0x67BE:
1462c9c50009SAlex Williamson             vdev->resetfn = vfio_radeon_reset;
1463c9c50009SAlex Williamson             trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1464c9c50009SAlex Williamson             break;
1465c9c50009SAlex Williamson         }
1466c9c50009SAlex Williamson         break;
1467c9c50009SAlex Williamson     }
1468c9c50009SAlex Williamson }
1469dfbee78dSAlex Williamson 
1470dfbee78dSAlex Williamson /*
1471dfbee78dSAlex Williamson  * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1472dfbee78dSAlex Williamson  * devices as a member of a clique.  Devices within the same clique ID
1473dfbee78dSAlex Williamson  * are capable of direct P2P.  It's the user's responsibility that this
1474dfbee78dSAlex Williamson  * is correct.  The spec says that this may reside at any unused config
1475dfbee78dSAlex Williamson  * offset, but reserves and recommends hypervisors place this at C8h.
1476dfbee78dSAlex Williamson  * The spec also states that the hypervisor should place this capability
1477dfbee78dSAlex Williamson  * at the end of the capability list, thus next is defined as 0h.
1478dfbee78dSAlex Williamson  *
1479dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1480dfbee78dSAlex Williamson  * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1481dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1482dfbee78dSAlex Williamson  * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1483dfbee78dSAlex Williamson  * +---------------------------------+---------------------------------+
1484dfbee78dSAlex Williamson  *
1485dfbee78dSAlex Williamson  * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1486dfbee78dSAlex Williamson  */
1487dfbee78dSAlex Williamson static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1488dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1489dfbee78dSAlex Williamson                                        Error **errp)
1490dfbee78dSAlex Williamson {
1491dfbee78dSAlex Williamson     Property *prop = opaque;
1492*1e198715SEduardo Habkost     uint8_t *ptr = object_field_prop_ptr(obj, prop);
1493dfbee78dSAlex Williamson 
1494dfbee78dSAlex Williamson     visit_type_uint8(v, name, ptr, errp);
1495dfbee78dSAlex Williamson }
1496dfbee78dSAlex Williamson 
1497dfbee78dSAlex Williamson static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1498dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1499dfbee78dSAlex Williamson                                        Error **errp)
1500dfbee78dSAlex Williamson {
1501dfbee78dSAlex Williamson     Property *prop = opaque;
1502*1e198715SEduardo Habkost     uint8_t value, *ptr = object_field_prop_ptr(obj, prop);
1503dfbee78dSAlex Williamson 
1504668f62ecSMarkus Armbruster     if (!visit_type_uint8(v, name, &value, errp)) {
1505dfbee78dSAlex Williamson         return;
1506dfbee78dSAlex Williamson     }
1507dfbee78dSAlex Williamson 
1508dfbee78dSAlex Williamson     if (value & ~0xF) {
1509dfbee78dSAlex Williamson         error_setg(errp, "Property %s: valid range 0-15", name);
1510dfbee78dSAlex Williamson         return;
1511dfbee78dSAlex Williamson     }
1512dfbee78dSAlex Williamson 
1513dfbee78dSAlex Williamson     *ptr = value;
1514dfbee78dSAlex Williamson }
1515dfbee78dSAlex Williamson 
1516dfbee78dSAlex Williamson const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1517dfbee78dSAlex Williamson     .name = "uint4",
1518dfbee78dSAlex Williamson     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1519dfbee78dSAlex Williamson     .get = get_nv_gpudirect_clique_id,
1520dfbee78dSAlex Williamson     .set = set_nv_gpudirect_clique_id,
1521dfbee78dSAlex Williamson };
1522dfbee78dSAlex Williamson 
1523dfbee78dSAlex Williamson static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1524dfbee78dSAlex Williamson {
1525dfbee78dSAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1526dfbee78dSAlex Williamson     int ret, pos = 0xC8;
1527dfbee78dSAlex Williamson 
1528dfbee78dSAlex Williamson     if (vdev->nv_gpudirect_clique == 0xFF) {
1529dfbee78dSAlex Williamson         return 0;
1530dfbee78dSAlex Williamson     }
1531dfbee78dSAlex Williamson 
1532dfbee78dSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1533dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
1534dfbee78dSAlex Williamson         return -EINVAL;
1535dfbee78dSAlex Williamson     }
1536dfbee78dSAlex Williamson 
1537dfbee78dSAlex Williamson     if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1538dfbee78dSAlex Williamson         PCI_BASE_CLASS_DISPLAY) {
1539dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
1540dfbee78dSAlex Williamson         return -EINVAL;
1541dfbee78dSAlex Williamson     }
1542dfbee78dSAlex Williamson 
1543dfbee78dSAlex Williamson     ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1544dfbee78dSAlex Williamson     if (ret < 0) {
1545dfbee78dSAlex Williamson         error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
1546dfbee78dSAlex Williamson         return ret;
1547dfbee78dSAlex Williamson     }
1548dfbee78dSAlex Williamson 
1549dfbee78dSAlex Williamson     memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1550dfbee78dSAlex Williamson     pos += PCI_CAP_FLAGS;
1551dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 8);
1552dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1553dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, '2');
1554dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1555dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1556dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos, 0);
1557dfbee78dSAlex Williamson 
1558dfbee78dSAlex Williamson     return 0;
1559dfbee78dSAlex Williamson }
1560dfbee78dSAlex Williamson 
1561ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
1562ec132efaSAlexey Kardashevskiy                                      const char *name,
1563ec132efaSAlexey Kardashevskiy                                      void *opaque, Error **errp)
1564ec132efaSAlexey Kardashevskiy {
1565ec132efaSAlexey Kardashevskiy     uint64_t tgt = (uintptr_t) opaque;
1566ec132efaSAlexey Kardashevskiy     visit_type_uint64(v, name, &tgt, errp);
1567ec132efaSAlexey Kardashevskiy }
1568ec132efaSAlexey Kardashevskiy 
1569ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
1570ec132efaSAlexey Kardashevskiy                                                  const char *name,
1571ec132efaSAlexey Kardashevskiy                                                  void *opaque, Error **errp)
1572ec132efaSAlexey Kardashevskiy {
1573ec132efaSAlexey Kardashevskiy     uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
1574ec132efaSAlexey Kardashevskiy     visit_type_uint32(v, name, &link_speed, errp);
1575ec132efaSAlexey Kardashevskiy }
1576ec132efaSAlexey Kardashevskiy 
1577ec132efaSAlexey Kardashevskiy int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
1578ec132efaSAlexey Kardashevskiy {
1579ec132efaSAlexey Kardashevskiy     int ret;
1580ec132efaSAlexey Kardashevskiy     void *p;
1581ec132efaSAlexey Kardashevskiy     struct vfio_region_info *nv2reg = NULL;
1582ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1583ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *cap;
1584ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1585ec132efaSAlexey Kardashevskiy 
1586ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1587ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1588ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_NVIDIA,
1589ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
1590ec132efaSAlexey Kardashevskiy                                    &nv2reg);
1591ec132efaSAlexey Kardashevskiy     if (ret) {
1592ec132efaSAlexey Kardashevskiy         return ret;
1593ec132efaSAlexey Kardashevskiy     }
1594ec132efaSAlexey Kardashevskiy 
1595ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1596ec132efaSAlexey Kardashevskiy     if (!hdr) {
1597ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1598ec132efaSAlexey Kardashevskiy         goto free_exit;
1599ec132efaSAlexey Kardashevskiy     }
1600ec132efaSAlexey Kardashevskiy     cap = (void *) hdr;
1601ec132efaSAlexey Kardashevskiy 
16029c7c0407SLeonardo Bras     p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE,
1603ec132efaSAlexey Kardashevskiy              MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
1604ec132efaSAlexey Kardashevskiy     if (p == MAP_FAILED) {
1605ec132efaSAlexey Kardashevskiy         ret = -errno;
1606ec132efaSAlexey Kardashevskiy         goto free_exit;
1607ec132efaSAlexey Kardashevskiy     }
1608ec132efaSAlexey Kardashevskiy 
1609ec132efaSAlexey Kardashevskiy     quirk = vfio_quirk_alloc(1);
1610ec132efaSAlexey Kardashevskiy     memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
1611ec132efaSAlexey Kardashevskiy                                nv2reg->size, p);
1612ec132efaSAlexey Kardashevskiy     QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1613ec132efaSAlexey Kardashevskiy 
1614ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1615ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1616d2623129SMarkus Armbruster                         (void *) (uintptr_t) cap->tgt);
1617ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
1618ec132efaSAlexey Kardashevskiy                                           nv2reg->size);
1619ec132efaSAlexey Kardashevskiy free_exit:
1620ec132efaSAlexey Kardashevskiy     g_free(nv2reg);
1621ec132efaSAlexey Kardashevskiy 
1622ec132efaSAlexey Kardashevskiy     return ret;
1623ec132efaSAlexey Kardashevskiy }
1624ec132efaSAlexey Kardashevskiy 
1625ec132efaSAlexey Kardashevskiy int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
1626ec132efaSAlexey Kardashevskiy {
1627ec132efaSAlexey Kardashevskiy     int ret;
1628ec132efaSAlexey Kardashevskiy     void *p;
1629ec132efaSAlexey Kardashevskiy     struct vfio_region_info *atsdreg = NULL;
1630ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1631ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
1632ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
1633ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1634ec132efaSAlexey Kardashevskiy 
1635ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1636ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1637ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_IBM,
1638ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
1639ec132efaSAlexey Kardashevskiy                                    &atsdreg);
1640ec132efaSAlexey Kardashevskiy     if (ret) {
1641ec132efaSAlexey Kardashevskiy         return ret;
1642ec132efaSAlexey Kardashevskiy     }
1643ec132efaSAlexey Kardashevskiy 
1644ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1645ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1646ec132efaSAlexey Kardashevskiy     if (!hdr) {
1647ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1648ec132efaSAlexey Kardashevskiy         goto free_exit;
1649ec132efaSAlexey Kardashevskiy     }
1650ec132efaSAlexey Kardashevskiy     captgt = (void *) hdr;
1651ec132efaSAlexey Kardashevskiy 
1652ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1653ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
1654ec132efaSAlexey Kardashevskiy     if (!hdr) {
1655ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1656ec132efaSAlexey Kardashevskiy         goto free_exit;
1657ec132efaSAlexey Kardashevskiy     }
1658ec132efaSAlexey Kardashevskiy     capspeed = (void *) hdr;
1659ec132efaSAlexey Kardashevskiy 
1660ec132efaSAlexey Kardashevskiy     /* Some NVLink bridges may not have assigned ATSD */
1661ec132efaSAlexey Kardashevskiy     if (atsdreg->size) {
16629c7c0407SLeonardo Bras         p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE,
1663ec132efaSAlexey Kardashevskiy                  MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
1664ec132efaSAlexey Kardashevskiy         if (p == MAP_FAILED) {
1665ec132efaSAlexey Kardashevskiy             ret = -errno;
1666ec132efaSAlexey Kardashevskiy             goto free_exit;
1667ec132efaSAlexey Kardashevskiy         }
1668ec132efaSAlexey Kardashevskiy 
1669ec132efaSAlexey Kardashevskiy         quirk = vfio_quirk_alloc(1);
1670ec132efaSAlexey Kardashevskiy         memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
1671ec132efaSAlexey Kardashevskiy                                           "nvlink2-atsd-mr", atsdreg->size, p);
1672ec132efaSAlexey Kardashevskiy         QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1673ec132efaSAlexey Kardashevskiy     }
1674ec132efaSAlexey Kardashevskiy 
1675ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1676ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1677d2623129SMarkus Armbruster                         (void *) (uintptr_t) captgt->tgt);
1678ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
1679ec132efaSAlexey Kardashevskiy                                               atsdreg->size);
1680ec132efaSAlexey Kardashevskiy 
1681ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
1682ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_link_speed, NULL, NULL,
1683d2623129SMarkus Armbruster                         (void *) (uintptr_t) capspeed->link_speed);
1684ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
1685ec132efaSAlexey Kardashevskiy                                               capspeed->link_speed);
1686ec132efaSAlexey Kardashevskiy free_exit:
1687ec132efaSAlexey Kardashevskiy     g_free(atsdreg);
1688ec132efaSAlexey Kardashevskiy 
1689ec132efaSAlexey Kardashevskiy     return ret;
1690ec132efaSAlexey Kardashevskiy }
1691ee7932b0SJon Derrick 
1692ee7932b0SJon Derrick /*
1693ee7932b0SJon Derrick  * The VMD endpoint provides a real PCIe domain to the guest and the guest
1694ee7932b0SJon Derrick  * kernel performs enumeration of the VMD sub-device domain. Guest transactions
1695ee7932b0SJon Derrick  * to VMD sub-devices go through MMU translation from guest addresses to
1696ee7932b0SJon Derrick  * physical addresses. When MMIO goes to an endpoint after being translated to
1697ee7932b0SJon Derrick  * physical addresses, the bridge rejects the transaction because the window
1698ee7932b0SJon Derrick  * has been programmed with guest addresses.
1699ee7932b0SJon Derrick  *
1700ee7932b0SJon Derrick  * VMD can use the Host Physical Address in order to correctly program the
1701ee7932b0SJon Derrick  * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers
1702ee7932b0SJon Derrick  * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA
1703ee7932b0SJon Derrick  * shadow registers in a vendor-specific capability register for devices
1704ee7932b0SJon Derrick  * without native support. The position of 0xE8-0xFF is in the reserved range
1705ee7932b0SJon Derrick  * of the VMD device capability space following the Power Management
1706ee7932b0SJon Derrick  * Capability.
1707ee7932b0SJon Derrick  */
1708ee7932b0SJon Derrick #define VMD_SHADOW_CAP_VER 1
1709ee7932b0SJon Derrick #define VMD_SHADOW_CAP_LEN 24
1710ee7932b0SJon Derrick static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
1711ee7932b0SJon Derrick {
1712ee7932b0SJon Derrick     uint8_t membar_phys[16];
1713ee7932b0SJon Derrick     int ret, pos = 0xE8;
1714ee7932b0SJon Derrick 
1715ee7932b0SJon Derrick     if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) ||
1716ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) ||
1717ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) ||
1718ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) {
1719ee7932b0SJon Derrick         return 0;
1720ee7932b0SJon Derrick     }
1721ee7932b0SJon Derrick 
1722ee7932b0SJon Derrick     ret = pread(vdev->vbasedev.fd, membar_phys, 16,
1723ee7932b0SJon Derrick                 vdev->config_offset + PCI_BASE_ADDRESS_2);
1724ee7932b0SJon Derrick     if (ret != 16) {
1725ee7932b0SJon Derrick         error_report("VMD %s cannot read MEMBARs (%d)",
1726ee7932b0SJon Derrick                      vdev->vbasedev.name, ret);
1727ee7932b0SJon Derrick         return -EFAULT;
1728ee7932b0SJon Derrick     }
1729ee7932b0SJon Derrick 
1730ee7932b0SJon Derrick     ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
1731ee7932b0SJon Derrick                              VMD_SHADOW_CAP_LEN, errp);
1732ee7932b0SJon Derrick     if (ret < 0) {
1733ee7932b0SJon Derrick         error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
1734ee7932b0SJon Derrick         return ret;
1735ee7932b0SJon Derrick     }
1736ee7932b0SJon Derrick 
1737ee7932b0SJon Derrick     memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
1738ee7932b0SJon Derrick     pos += PCI_CAP_FLAGS;
1739ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
1740ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
1741ee7932b0SJon Derrick     pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
1742ee7932b0SJon Derrick     memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
1743ee7932b0SJon Derrick 
1744ee7932b0SJon Derrick     return 0;
1745ee7932b0SJon Derrick }
1746ee7932b0SJon Derrick 
1747ee7932b0SJon Derrick int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1748ee7932b0SJon Derrick {
1749ee7932b0SJon Derrick     int ret;
1750ee7932b0SJon Derrick 
1751ee7932b0SJon Derrick     ret = vfio_add_nv_gpudirect_cap(vdev, errp);
1752ee7932b0SJon Derrick     if (ret) {
1753ee7932b0SJon Derrick         return ret;
1754ee7932b0SJon Derrick     }
1755ee7932b0SJon Derrick 
1756ee7932b0SJon Derrick     ret = vfio_add_vmd_shadow_cap(vdev, errp);
1757ee7932b0SJon Derrick     if (ret) {
1758ee7932b0SJon Derrick         return ret;
1759ee7932b0SJon Derrick     }
1760ee7932b0SJon Derrick 
1761ee7932b0SJon Derrick     return 0;
1762ee7932b0SJon Derrick }
1763