xref: /qemu/hw/vfio/pci-quirks.c (revision ee7932b0bba2062a0fad1e70d576ef13c4906c83)
1c00d61d8SAlex Williamson /*
2c00d61d8SAlex Williamson  * device quirks for PCI devices
3c00d61d8SAlex Williamson  *
4c00d61d8SAlex Williamson  * Copyright Red Hat, Inc. 2012-2015
5c00d61d8SAlex Williamson  *
6c00d61d8SAlex Williamson  * Authors:
7c00d61d8SAlex Williamson  *  Alex Williamson <alex.williamson@redhat.com>
8c00d61d8SAlex Williamson  *
9c00d61d8SAlex Williamson  * This work is licensed under the terms of the GNU GPL, version 2.  See
10c00d61d8SAlex Williamson  * the COPYING file in the top-level directory.
11c00d61d8SAlex Williamson  */
12c00d61d8SAlex Williamson 
13c6eacb1aSPeter Maydell #include "qemu/osdep.h"
14475fbf0aSTony Nguyen #include "exec/memop.h"
15e0255bb1SPhilippe Mathieu-Daudé #include "qemu/units.h"
16c4c45e94SAlex Williamson #include "qemu/error-report.h"
17c958c51dSAlex Williamson #include "qemu/main-loop.h"
180b8fa32fSMarkus Armbruster #include "qemu/module.h"
19c4c45e94SAlex Williamson #include "qemu/range.h"
20c4c45e94SAlex Williamson #include "qapi/error.h"
21dfbee78dSAlex Williamson #include "qapi/visitor.h"
222b1dbd0dSAlex Williamson #include <sys/ioctl.h>
23650d103dSMarkus Armbruster #include "hw/hw.h"
24c4c45e94SAlex Williamson #include "hw/nvram/fw_cfg.h"
25a27bd6c7SMarkus Armbruster #include "hw/qdev-properties.h"
26c00d61d8SAlex Williamson #include "pci.h"
27c00d61d8SAlex Williamson #include "trace.h"
28c00d61d8SAlex Williamson 
29c00d61d8SAlex Williamson /*
30c00d61d8SAlex Williamson  * List of device ids/vendor ids for which to disable
31c00d61d8SAlex Williamson  * option rom loading. This avoids the guest hangs during rom
32c00d61d8SAlex Williamson  * execution as noticed with the BCM 57810 card for lack of a
33c00d61d8SAlex Williamson  * more better way to handle such issues.
34c00d61d8SAlex Williamson  * The  user can still override by specifying a romfile or
35c00d61d8SAlex Williamson  * rombar=1.
36c00d61d8SAlex Williamson  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
37c00d61d8SAlex Williamson  * for an analysis of the 57810 card hang. When adding
38c00d61d8SAlex Williamson  * a new vendor id/device id combination below, please also add
39c00d61d8SAlex Williamson  * your card/environment details and information that could
40c00d61d8SAlex Williamson  * help in debugging to the bug tracking this issue
41c00d61d8SAlex Williamson  */
42056dfcb6SAlex Williamson static const struct {
43056dfcb6SAlex Williamson     uint32_t vendor;
44056dfcb6SAlex Williamson     uint32_t device;
45056dfcb6SAlex Williamson } romblacklist[] = {
46056dfcb6SAlex Williamson     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
47c00d61d8SAlex Williamson };
48c00d61d8SAlex Williamson 
49c00d61d8SAlex Williamson bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
50c00d61d8SAlex Williamson {
51056dfcb6SAlex Williamson     int i;
52c00d61d8SAlex Williamson 
53056dfcb6SAlex Williamson     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
54056dfcb6SAlex Williamson         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
55056dfcb6SAlex Williamson             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
56056dfcb6SAlex Williamson                                              romblacklist[i].vendor,
57056dfcb6SAlex Williamson                                              romblacklist[i].device);
58c00d61d8SAlex Williamson             return true;
59c00d61d8SAlex Williamson         }
60c00d61d8SAlex Williamson     }
61c00d61d8SAlex Williamson     return false;
62c00d61d8SAlex Williamson }
63c00d61d8SAlex Williamson 
64c00d61d8SAlex Williamson /*
650e54f24aSAlex Williamson  * Device specific region quirks (mostly backdoors to PCI config space)
66c00d61d8SAlex Williamson  */
67c00d61d8SAlex Williamson 
680e54f24aSAlex Williamson /*
690e54f24aSAlex Williamson  * The generic window quirks operate on an address and data register,
700e54f24aSAlex Williamson  * vfio_generic_window_address_quirk handles the address register and
710e54f24aSAlex Williamson  * vfio_generic_window_data_quirk handles the data register.  These ops
720e54f24aSAlex Williamson  * pass reads and writes through to hardware until a value matching the
730e54f24aSAlex Williamson  * stored address match/mask is written.  When this occurs, the data
740e54f24aSAlex Williamson  * register access emulated PCI config space for the device rather than
750e54f24aSAlex Williamson  * passing through accesses.  This enables devices where PCI config space
760e54f24aSAlex Williamson  * is accessible behind a window register to maintain the virtualization
770e54f24aSAlex Williamson  * provided through vfio.
780e54f24aSAlex Williamson  */
790e54f24aSAlex Williamson typedef struct VFIOConfigWindowMatch {
800e54f24aSAlex Williamson     uint32_t match;
810e54f24aSAlex Williamson     uint32_t mask;
820e54f24aSAlex Williamson } VFIOConfigWindowMatch;
830e54f24aSAlex Williamson 
840e54f24aSAlex Williamson typedef struct VFIOConfigWindowQuirk {
850e54f24aSAlex Williamson     struct VFIOPCIDevice *vdev;
860e54f24aSAlex Williamson 
870e54f24aSAlex Williamson     uint32_t address_val;
880e54f24aSAlex Williamson 
890e54f24aSAlex Williamson     uint32_t address_offset;
900e54f24aSAlex Williamson     uint32_t data_offset;
910e54f24aSAlex Williamson 
920e54f24aSAlex Williamson     bool window_enabled;
930e54f24aSAlex Williamson     uint8_t bar;
940e54f24aSAlex Williamson 
950e54f24aSAlex Williamson     MemoryRegion *addr_mem;
960e54f24aSAlex Williamson     MemoryRegion *data_mem;
970e54f24aSAlex Williamson 
980e54f24aSAlex Williamson     uint32_t nr_matches;
990e54f24aSAlex Williamson     VFIOConfigWindowMatch matches[];
1000e54f24aSAlex Williamson } VFIOConfigWindowQuirk;
1010e54f24aSAlex Williamson 
1020e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
1030e54f24aSAlex Williamson                                                        hwaddr addr,
1040e54f24aSAlex Williamson                                                        unsigned size)
1050e54f24aSAlex Williamson {
1060e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1070e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1080e54f24aSAlex Williamson 
1090e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[window->bar].region,
1100e54f24aSAlex Williamson                             addr + window->address_offset, size);
1110e54f24aSAlex Williamson }
1120e54f24aSAlex Williamson 
1130e54f24aSAlex Williamson static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
1140e54f24aSAlex Williamson                                                     uint64_t data,
1150e54f24aSAlex Williamson                                                     unsigned size)
1160e54f24aSAlex Williamson {
1170e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1180e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1190e54f24aSAlex Williamson     int i;
1200e54f24aSAlex Williamson 
1210e54f24aSAlex Williamson     window->window_enabled = false;
1220e54f24aSAlex Williamson 
1230e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1240e54f24aSAlex Williamson                       addr + window->address_offset, data, size);
1250e54f24aSAlex Williamson 
1260e54f24aSAlex Williamson     for (i = 0; i < window->nr_matches; i++) {
1270e54f24aSAlex Williamson         if ((data & ~window->matches[i].mask) == window->matches[i].match) {
1280e54f24aSAlex Williamson             window->window_enabled = true;
1290e54f24aSAlex Williamson             window->address_val = data & window->matches[i].mask;
1300e54f24aSAlex Williamson             trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
1310e54f24aSAlex Williamson                                     memory_region_name(window->addr_mem), data);
1320e54f24aSAlex Williamson             break;
1330e54f24aSAlex Williamson         }
1340e54f24aSAlex Williamson     }
1350e54f24aSAlex Williamson }
1360e54f24aSAlex Williamson 
1370e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_address_quirk = {
1380e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_address_read,
1390e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_address_write,
1400e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1410e54f24aSAlex Williamson };
1420e54f24aSAlex Williamson 
1430e54f24aSAlex Williamson static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
1440e54f24aSAlex Williamson                                                     hwaddr addr, unsigned size)
1450e54f24aSAlex Williamson {
1460e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1470e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1480e54f24aSAlex Williamson     uint64_t data;
1490e54f24aSAlex Williamson 
1500e54f24aSAlex Williamson     /* Always read data reg, discard if window enabled */
1510e54f24aSAlex Williamson     data = vfio_region_read(&vdev->bars[window->bar].region,
1520e54f24aSAlex Williamson                             addr + window->data_offset, size);
1530e54f24aSAlex Williamson 
1540e54f24aSAlex Williamson     if (window->window_enabled) {
1550e54f24aSAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
1560e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
1570e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1580e54f24aSAlex Williamson     }
1590e54f24aSAlex Williamson 
1600e54f24aSAlex Williamson     return data;
1610e54f24aSAlex Williamson }
1620e54f24aSAlex Williamson 
1630e54f24aSAlex Williamson static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
1640e54f24aSAlex Williamson                                                  uint64_t data, unsigned size)
1650e54f24aSAlex Williamson {
1660e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window = opaque;
1670e54f24aSAlex Williamson     VFIOPCIDevice *vdev = window->vdev;
1680e54f24aSAlex Williamson 
1690e54f24aSAlex Williamson     if (window->window_enabled) {
1700e54f24aSAlex Williamson         vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
1710e54f24aSAlex Williamson         trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
1720e54f24aSAlex Williamson                                     memory_region_name(window->data_mem), data);
1730e54f24aSAlex Williamson         return;
1740e54f24aSAlex Williamson     }
1750e54f24aSAlex Williamson 
1760e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[window->bar].region,
1770e54f24aSAlex Williamson                       addr + window->data_offset, data, size);
1780e54f24aSAlex Williamson }
1790e54f24aSAlex Williamson 
1800e54f24aSAlex Williamson static const MemoryRegionOps vfio_generic_window_data_quirk = {
1810e54f24aSAlex Williamson     .read = vfio_generic_window_quirk_data_read,
1820e54f24aSAlex Williamson     .write = vfio_generic_window_quirk_data_write,
1830e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1840e54f24aSAlex Williamson };
1850e54f24aSAlex Williamson 
1860d38fb1cSAlex Williamson /*
1870d38fb1cSAlex Williamson  * The generic mirror quirk handles devices which expose PCI config space
1880d38fb1cSAlex Williamson  * through a region within a BAR.  When enabled, reads and writes are
1890d38fb1cSAlex Williamson  * redirected through to emulated PCI config space.  XXX if PCI config space
1900d38fb1cSAlex Williamson  * used memory regions, this could just be an alias.
1910d38fb1cSAlex Williamson  */
1920d38fb1cSAlex Williamson typedef struct VFIOConfigMirrorQuirk {
1930d38fb1cSAlex Williamson     struct VFIOPCIDevice *vdev;
1940d38fb1cSAlex Williamson     uint32_t offset;
1950d38fb1cSAlex Williamson     uint8_t bar;
1960d38fb1cSAlex Williamson     MemoryRegion *mem;
197c958c51dSAlex Williamson     uint8_t data[];
1980d38fb1cSAlex Williamson } VFIOConfigMirrorQuirk;
1990d38fb1cSAlex Williamson 
2000d38fb1cSAlex Williamson static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
2010d38fb1cSAlex Williamson                                                hwaddr addr, unsigned size)
2020d38fb1cSAlex Williamson {
2030d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2040d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2050d38fb1cSAlex Williamson     uint64_t data;
2060d38fb1cSAlex Williamson 
2070d38fb1cSAlex Williamson     /* Read and discard in case the hardware cares */
2080d38fb1cSAlex Williamson     (void)vfio_region_read(&vdev->bars[mirror->bar].region,
2090d38fb1cSAlex Williamson                            addr + mirror->offset, size);
2100d38fb1cSAlex Williamson 
2110d38fb1cSAlex Williamson     data = vfio_pci_read_config(&vdev->pdev, addr, size);
2120d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
2130d38fb1cSAlex Williamson                                          memory_region_name(mirror->mem),
2140d38fb1cSAlex Williamson                                          addr, data);
2150d38fb1cSAlex Williamson     return data;
2160d38fb1cSAlex Williamson }
2170d38fb1cSAlex Williamson 
2180d38fb1cSAlex Williamson static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
2190d38fb1cSAlex Williamson                                             uint64_t data, unsigned size)
2200d38fb1cSAlex Williamson {
2210d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
2220d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
2230d38fb1cSAlex Williamson 
2240d38fb1cSAlex Williamson     vfio_pci_write_config(&vdev->pdev, addr, data, size);
2250d38fb1cSAlex Williamson     trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
2260d38fb1cSAlex Williamson                                           memory_region_name(mirror->mem),
2270d38fb1cSAlex Williamson                                           addr, data);
2280d38fb1cSAlex Williamson }
2290d38fb1cSAlex Williamson 
2300d38fb1cSAlex Williamson static const MemoryRegionOps vfio_generic_mirror_quirk = {
2310d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
2320d38fb1cSAlex Williamson     .write = vfio_generic_quirk_mirror_write,
2330d38fb1cSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
2340d38fb1cSAlex Williamson };
2350d38fb1cSAlex Williamson 
236c00d61d8SAlex Williamson /* Is range1 fully contained within range2?  */
237c00d61d8SAlex Williamson static bool vfio_range_contained(uint64_t first1, uint64_t len1,
238c00d61d8SAlex Williamson                                  uint64_t first2, uint64_t len2) {
239c00d61d8SAlex Williamson     return (first1 >= first2 && first1 + len1 <= first2 + len2);
240c00d61d8SAlex Williamson }
241c00d61d8SAlex Williamson 
242c00d61d8SAlex Williamson #define PCI_VENDOR_ID_ATI               0x1002
243c00d61d8SAlex Williamson 
244c00d61d8SAlex Williamson /*
245c00d61d8SAlex Williamson  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
246c00d61d8SAlex Williamson  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
247c00d61d8SAlex Williamson  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
248c00d61d8SAlex Williamson  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
249c00d61d8SAlex Williamson  * I/O port BAR address.  Originally this was coded to return the virtual BAR
250c00d61d8SAlex Williamson  * address only if the physical register read returns the actual BAR address,
251c00d61d8SAlex Williamson  * but users have reported greater success if we return the virtual address
252c00d61d8SAlex Williamson  * unconditionally.
253c00d61d8SAlex Williamson  */
254c00d61d8SAlex Williamson static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
255c00d61d8SAlex Williamson                                         hwaddr addr, unsigned size)
256c00d61d8SAlex Williamson {
257b946d286SAlex Williamson     VFIOPCIDevice *vdev = opaque;
258c00d61d8SAlex Williamson     uint64_t data = vfio_pci_read_config(&vdev->pdev,
259b946d286SAlex Williamson                                          PCI_BASE_ADDRESS_4 + 1, size);
260b946d286SAlex Williamson 
261b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
262c00d61d8SAlex Williamson 
263c00d61d8SAlex Williamson     return data;
264c00d61d8SAlex Williamson }
265c00d61d8SAlex Williamson 
266c00d61d8SAlex Williamson static const MemoryRegionOps vfio_ati_3c3_quirk = {
267c00d61d8SAlex Williamson     .read = vfio_ati_3c3_quirk_read,
268c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
269c00d61d8SAlex Williamson };
270c00d61d8SAlex Williamson 
27129d62771SThomas Huth VFIOQuirk *vfio_quirk_alloc(int nr_mem)
272bcf3c3d0SAlex Williamson {
273bcf3c3d0SAlex Williamson     VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
274c958c51dSAlex Williamson     QLIST_INIT(&quirk->ioeventfds);
275bcf3c3d0SAlex Williamson     quirk->mem = g_new0(MemoryRegion, nr_mem);
276bcf3c3d0SAlex Williamson     quirk->nr_mem = nr_mem;
277bcf3c3d0SAlex Williamson 
278bcf3c3d0SAlex Williamson     return quirk;
279bcf3c3d0SAlex Williamson }
280bcf3c3d0SAlex Williamson 
2812b1dbd0dSAlex Williamson static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
282c958c51dSAlex Williamson {
283c958c51dSAlex Williamson     QLIST_REMOVE(ioeventfd, next);
284c958c51dSAlex Williamson     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
285c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
2862b1dbd0dSAlex Williamson 
2872b1dbd0dSAlex Williamson     if (ioeventfd->vfio) {
2882b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
2892b1dbd0dSAlex Williamson 
2902b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
2912b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
2922b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
2932b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
2942b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
2952b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = -1;
2962b1dbd0dSAlex Williamson 
2972b1dbd0dSAlex Williamson         if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
2982b1dbd0dSAlex Williamson             error_report("Failed to remove vfio ioeventfd for %s+0x%"
2992b1dbd0dSAlex Williamson                          HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
3002b1dbd0dSAlex Williamson                          memory_region_name(ioeventfd->mr), ioeventfd->addr,
3012b1dbd0dSAlex Williamson                          ioeventfd->size, ioeventfd->data);
3022b1dbd0dSAlex Williamson         }
3032b1dbd0dSAlex Williamson     } else {
3042b1dbd0dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
3052b1dbd0dSAlex Williamson                             NULL, NULL, NULL);
3062b1dbd0dSAlex Williamson     }
3072b1dbd0dSAlex Williamson 
308c958c51dSAlex Williamson     event_notifier_cleanup(&ioeventfd->e);
309c958c51dSAlex Williamson     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
310c958c51dSAlex Williamson                               (uint64_t)ioeventfd->addr, ioeventfd->size,
311c958c51dSAlex Williamson                               ioeventfd->data);
312c958c51dSAlex Williamson     g_free(ioeventfd);
313c958c51dSAlex Williamson }
314c958c51dSAlex Williamson 
315c958c51dSAlex Williamson static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
316c958c51dSAlex Williamson {
317c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd, *tmp;
318c958c51dSAlex Williamson 
319c958c51dSAlex Williamson     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
320c958c51dSAlex Williamson         if (ioeventfd->dynamic) {
3212b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, ioeventfd);
322c958c51dSAlex Williamson         }
323c958c51dSAlex Williamson     }
324c958c51dSAlex Williamson }
325c958c51dSAlex Williamson 
326c958c51dSAlex Williamson static void vfio_ioeventfd_handler(void *opaque)
327c958c51dSAlex Williamson {
328c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd = opaque;
329c958c51dSAlex Williamson 
330c958c51dSAlex Williamson     if (event_notifier_test_and_clear(&ioeventfd->e)) {
331c958c51dSAlex Williamson         vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
332c958c51dSAlex Williamson                           ioeventfd->data, ioeventfd->size);
333c958c51dSAlex Williamson         trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
334c958c51dSAlex Williamson                                      (uint64_t)ioeventfd->addr, ioeventfd->size,
335c958c51dSAlex Williamson                                      ioeventfd->data);
336c958c51dSAlex Williamson     }
337c958c51dSAlex Williamson }
338c958c51dSAlex Williamson 
339c958c51dSAlex Williamson static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
340c958c51dSAlex Williamson                                           MemoryRegion *mr, hwaddr addr,
341c958c51dSAlex Williamson                                           unsigned size, uint64_t data,
342c958c51dSAlex Williamson                                           VFIORegion *region,
343c958c51dSAlex Williamson                                           hwaddr region_addr, bool dynamic)
344c958c51dSAlex Williamson {
345c958c51dSAlex Williamson     VFIOIOEventFD *ioeventfd;
346c958c51dSAlex Williamson 
347c958c51dSAlex Williamson     if (vdev->no_kvm_ioeventfd) {
348c958c51dSAlex Williamson         return NULL;
349c958c51dSAlex Williamson     }
350c958c51dSAlex Williamson 
351c958c51dSAlex Williamson     ioeventfd = g_malloc0(sizeof(*ioeventfd));
352c958c51dSAlex Williamson 
353c958c51dSAlex Williamson     if (event_notifier_init(&ioeventfd->e, 0)) {
354c958c51dSAlex Williamson         g_free(ioeventfd);
355c958c51dSAlex Williamson         return NULL;
356c958c51dSAlex Williamson     }
357c958c51dSAlex Williamson 
358c958c51dSAlex Williamson     /*
359c958c51dSAlex Williamson      * MemoryRegion and relative offset, plus additional ioeventfd setup
360c958c51dSAlex Williamson      * parameters for configuring and later tearing down KVM ioeventfd.
361c958c51dSAlex Williamson      */
362c958c51dSAlex Williamson     ioeventfd->mr = mr;
363c958c51dSAlex Williamson     ioeventfd->addr = addr;
364c958c51dSAlex Williamson     ioeventfd->size = size;
365c958c51dSAlex Williamson     ioeventfd->data = data;
366c958c51dSAlex Williamson     ioeventfd->dynamic = dynamic;
367c958c51dSAlex Williamson     /*
368c958c51dSAlex Williamson      * VFIORegion and relative offset for implementing the userspace
369c958c51dSAlex Williamson      * handler.  data & size fields shared for both uses.
370c958c51dSAlex Williamson      */
371c958c51dSAlex Williamson     ioeventfd->region = region;
372c958c51dSAlex Williamson     ioeventfd->region_addr = region_addr;
373c958c51dSAlex Williamson 
3742b1dbd0dSAlex Williamson     if (!vdev->no_vfio_ioeventfd) {
3752b1dbd0dSAlex Williamson         struct vfio_device_ioeventfd vfio_ioeventfd;
3762b1dbd0dSAlex Williamson 
3772b1dbd0dSAlex Williamson         vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
3782b1dbd0dSAlex Williamson         vfio_ioeventfd.flags = ioeventfd->size;
3792b1dbd0dSAlex Williamson         vfio_ioeventfd.data = ioeventfd->data;
3802b1dbd0dSAlex Williamson         vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
3812b1dbd0dSAlex Williamson                                 ioeventfd->region_addr;
3822b1dbd0dSAlex Williamson         vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
3832b1dbd0dSAlex Williamson 
3842b1dbd0dSAlex Williamson         ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
3852b1dbd0dSAlex Williamson                                  VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
3862b1dbd0dSAlex Williamson     }
3872b1dbd0dSAlex Williamson 
3882b1dbd0dSAlex Williamson     if (!ioeventfd->vfio) {
389c958c51dSAlex Williamson         qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
390c958c51dSAlex Williamson                             vfio_ioeventfd_handler, NULL, ioeventfd);
3912b1dbd0dSAlex Williamson     }
3922b1dbd0dSAlex Williamson 
393c958c51dSAlex Williamson     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
394c958c51dSAlex Williamson                               true, ioeventfd->data, &ioeventfd->e);
395c958c51dSAlex Williamson     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
3962b1dbd0dSAlex Williamson                               size, data, ioeventfd->vfio);
397c958c51dSAlex Williamson 
398c958c51dSAlex Williamson     return ioeventfd;
399c958c51dSAlex Williamson }
400c958c51dSAlex Williamson 
401c00d61d8SAlex Williamson static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
402c00d61d8SAlex Williamson {
403c00d61d8SAlex Williamson     VFIOQuirk *quirk;
404c00d61d8SAlex Williamson 
405c00d61d8SAlex Williamson     /*
406c00d61d8SAlex Williamson      * As long as the BAR is >= 256 bytes it will be aligned such that the
407c00d61d8SAlex Williamson      * lower byte is always zero.  Filter out anything else, if it exists.
408c00d61d8SAlex Williamson      */
409b946d286SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
410b946d286SAlex Williamson         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
411c00d61d8SAlex Williamson         return;
412c00d61d8SAlex Williamson     }
413c00d61d8SAlex Williamson 
414bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
415c00d61d8SAlex Williamson 
416b946d286SAlex Williamson     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
417c00d61d8SAlex Williamson                           "vfio-ati-3c3-quirk", 1);
4182d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
4198c4f2348SAlex Williamson                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
420c00d61d8SAlex Williamson 
4212d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
422c00d61d8SAlex Williamson                       quirk, next);
423c00d61d8SAlex Williamson 
424b946d286SAlex Williamson     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
425c00d61d8SAlex Williamson }
426c00d61d8SAlex Williamson 
427c00d61d8SAlex Williamson /*
4280e54f24aSAlex Williamson  * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
429c00d61d8SAlex Williamson  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
430c00d61d8SAlex Williamson  * the MMIO space directly, but a window to this space is provided through
431c00d61d8SAlex Williamson  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
432c00d61d8SAlex Williamson  * data register.  When the address is programmed to a range of 0x4000-0x4fff
433c00d61d8SAlex Williamson  * PCI configuration space is available.  Experimentation seems to indicate
4340e54f24aSAlex Williamson  * that read-only may be provided by hardware.
435c00d61d8SAlex Williamson  */
4360e54f24aSAlex Williamson static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
437c00d61d8SAlex Williamson {
438c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4390e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
440c00d61d8SAlex Williamson 
4410e54f24aSAlex Williamson     /* This windows doesn't seem to be used except by legacy VGA code */
4420e54f24aSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4434d3fc4fdSAlex Williamson         !vdev->vga || nr != 4) {
444c00d61d8SAlex Williamson         return;
445c00d61d8SAlex Williamson     }
446c00d61d8SAlex Williamson 
447bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
4480e54f24aSAlex Williamson     window = quirk->data = g_malloc0(sizeof(*window) +
4490e54f24aSAlex Williamson                                      sizeof(VFIOConfigWindowMatch));
4500e54f24aSAlex Williamson     window->vdev = vdev;
4510e54f24aSAlex Williamson     window->address_offset = 0;
4520e54f24aSAlex Williamson     window->data_offset = 4;
4530e54f24aSAlex Williamson     window->nr_matches = 1;
4540e54f24aSAlex Williamson     window->matches[0].match = 0x4000;
455f5793fd9SAlex Williamson     window->matches[0].mask = vdev->config_size - 1;
4560e54f24aSAlex Williamson     window->bar = nr;
4570e54f24aSAlex Williamson     window->addr_mem = &quirk->mem[0];
4580e54f24aSAlex Williamson     window->data_mem = &quirk->mem[1];
459c00d61d8SAlex Williamson 
4600e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
4610e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
4620e54f24aSAlex Williamson                           "vfio-ati-bar4-window-address-quirk", 4);
463db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4640e54f24aSAlex Williamson                                         window->address_offset,
4650e54f24aSAlex Williamson                                         window->addr_mem, 1);
4660e54f24aSAlex Williamson 
4670e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
4680e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
4690e54f24aSAlex Williamson                           "vfio-ati-bar4-window-data-quirk", 4);
470db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
4710e54f24aSAlex Williamson                                         window->data_offset,
4720e54f24aSAlex Williamson                                         window->data_mem, 1);
473c00d61d8SAlex Williamson 
474c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
475c00d61d8SAlex Williamson 
4760e54f24aSAlex Williamson     trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
477c00d61d8SAlex Williamson }
478c00d61d8SAlex Williamson 
479c00d61d8SAlex Williamson /*
4800d38fb1cSAlex Williamson  * Trap the BAR2 MMIO mirror to config space as well.
481c00d61d8SAlex Williamson  */
4820d38fb1cSAlex Williamson static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
483c00d61d8SAlex Williamson {
484c00d61d8SAlex Williamson     VFIOQuirk *quirk;
4850d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
486c00d61d8SAlex Williamson 
487c00d61d8SAlex Williamson     /* Only enable on newer devices where BAR2 is 64bit */
4880d38fb1cSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
4894d3fc4fdSAlex Williamson         !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
490c00d61d8SAlex Williamson         return;
491c00d61d8SAlex Williamson     }
492c00d61d8SAlex Williamson 
493bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
4940d38fb1cSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror));
495bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
4960d38fb1cSAlex Williamson     mirror->vdev = vdev;
4970d38fb1cSAlex Williamson     mirror->offset = 0x4000;
4980d38fb1cSAlex Williamson     mirror->bar = nr;
499c00d61d8SAlex Williamson 
5000d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
5010d38fb1cSAlex Williamson                           &vfio_generic_mirror_quirk, mirror,
5020d38fb1cSAlex Williamson                           "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
503db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
5040d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
505c00d61d8SAlex Williamson 
506c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
507c00d61d8SAlex Williamson 
5080d38fb1cSAlex Williamson     trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
509c00d61d8SAlex Williamson }
510c00d61d8SAlex Williamson 
511c00d61d8SAlex Williamson /*
512c00d61d8SAlex Williamson  * Older ATI/AMD cards like the X550 have a similar window to that above.
513c00d61d8SAlex Williamson  * I/O port BAR1 provides a window to a mirror of PCI config space located
514c00d61d8SAlex Williamson  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
515c00d61d8SAlex Williamson  * note it for future reference.
516c00d61d8SAlex Williamson  */
517c00d61d8SAlex Williamson 
518c00d61d8SAlex Williamson /*
519c00d61d8SAlex Williamson  * Nvidia has several different methods to get to config space, the
520c00d61d8SAlex Williamson  * nouveu project has several of these documented here:
521c00d61d8SAlex Williamson  * https://github.com/pathscale/envytools/tree/master/hwdocs
522c00d61d8SAlex Williamson  *
523c00d61d8SAlex Williamson  * The first quirk is actually not documented in envytools and is found
524c00d61d8SAlex Williamson  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
525c00d61d8SAlex Williamson  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
526c00d61d8SAlex Williamson  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
527c00d61d8SAlex Williamson  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
528c00d61d8SAlex Williamson  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
529c00d61d8SAlex Williamson  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
530c00d61d8SAlex Williamson  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
531c00d61d8SAlex Williamson  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
532c00d61d8SAlex Williamson  */
5336029a424SAlex Williamson typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
5346029a424SAlex Williamson static const char *nv3d0_states[] = { "NONE", "SELECT",
5356029a424SAlex Williamson                                       "WINDOW", "READ", "WRITE" };
5366029a424SAlex Williamson 
5376029a424SAlex Williamson typedef struct VFIONvidia3d0Quirk {
5386029a424SAlex Williamson     VFIOPCIDevice *vdev;
5396029a424SAlex Williamson     VFIONvidia3d0State state;
5406029a424SAlex Williamson     uint32_t offset;
5416029a424SAlex Williamson } VFIONvidia3d0Quirk;
5426029a424SAlex Williamson 
5436029a424SAlex Williamson static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
5446029a424SAlex Williamson                                            hwaddr addr, unsigned size)
5456029a424SAlex Williamson {
5466029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5476029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5486029a424SAlex Williamson 
5496029a424SAlex Williamson     quirk->state = NONE;
5506029a424SAlex Williamson 
5512d82f8a3SAlex Williamson     return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5526029a424SAlex Williamson                          addr + 0x14, size);
5536029a424SAlex Williamson }
5546029a424SAlex Williamson 
5556029a424SAlex Williamson static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
5566029a424SAlex Williamson                                         uint64_t data, unsigned size)
5576029a424SAlex Williamson {
5586029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
5596029a424SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
5606029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
5616029a424SAlex Williamson 
5626029a424SAlex Williamson     quirk->state = NONE;
5636029a424SAlex Williamson 
5646029a424SAlex Williamson     switch (data) {
5656029a424SAlex Williamson     case 0x338:
5666029a424SAlex Williamson         if (old_state == NONE) {
5676029a424SAlex Williamson             quirk->state = SELECT;
5686029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5696029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5706029a424SAlex Williamson         }
5716029a424SAlex Williamson         break;
5726029a424SAlex Williamson     case 0x538:
5736029a424SAlex Williamson         if (old_state == WINDOW) {
5746029a424SAlex Williamson             quirk->state = READ;
5756029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5766029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5776029a424SAlex Williamson         }
5786029a424SAlex Williamson         break;
5796029a424SAlex Williamson     case 0x738:
5806029a424SAlex Williamson         if (old_state == WINDOW) {
5816029a424SAlex Williamson             quirk->state = WRITE;
5826029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
5836029a424SAlex Williamson                                               nv3d0_states[quirk->state]);
5846029a424SAlex Williamson         }
5856029a424SAlex Williamson         break;
5866029a424SAlex Williamson     }
5876029a424SAlex Williamson 
5882d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
5896029a424SAlex Williamson                    addr + 0x14, data, size);
5906029a424SAlex Williamson }
5916029a424SAlex Williamson 
5926029a424SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
5936029a424SAlex Williamson     .read = vfio_nvidia_3d4_quirk_read,
5946029a424SAlex Williamson     .write = vfio_nvidia_3d4_quirk_write,
5956029a424SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
596c00d61d8SAlex Williamson };
597c00d61d8SAlex Williamson 
598c00d61d8SAlex Williamson static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
599c00d61d8SAlex Williamson                                            hwaddr addr, unsigned size)
600c00d61d8SAlex Williamson {
6016029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
602c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6036029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
6042d82f8a3SAlex Williamson     uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6056029a424SAlex Williamson                                   addr + 0x10, size);
606c00d61d8SAlex Williamson 
6076029a424SAlex Williamson     quirk->state = NONE;
6086029a424SAlex Williamson 
6096029a424SAlex Williamson     if (old_state == READ &&
6106029a424SAlex Williamson         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6116029a424SAlex Williamson         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6126029a424SAlex Williamson 
6136029a424SAlex Williamson         data = vfio_pci_read_config(&vdev->pdev, offset, size);
6146029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
6156029a424SAlex Williamson                                          offset, size, data);
616c00d61d8SAlex Williamson     }
617c00d61d8SAlex Williamson 
618c00d61d8SAlex Williamson     return data;
619c00d61d8SAlex Williamson }
620c00d61d8SAlex Williamson 
621c00d61d8SAlex Williamson static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
622c00d61d8SAlex Williamson                                         uint64_t data, unsigned size)
623c00d61d8SAlex Williamson {
6246029a424SAlex Williamson     VFIONvidia3d0Quirk *quirk = opaque;
625c00d61d8SAlex Williamson     VFIOPCIDevice *vdev = quirk->vdev;
6266029a424SAlex Williamson     VFIONvidia3d0State old_state = quirk->state;
627c00d61d8SAlex Williamson 
6286029a424SAlex Williamson     quirk->state = NONE;
6296029a424SAlex Williamson 
6306029a424SAlex Williamson     if (old_state == SELECT) {
6316029a424SAlex Williamson         quirk->offset = (uint32_t)data;
6326029a424SAlex Williamson         quirk->state = WINDOW;
6336029a424SAlex Williamson         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
6346029a424SAlex Williamson                                           nv3d0_states[quirk->state]);
6356029a424SAlex Williamson     } else if (old_state == WRITE) {
6366029a424SAlex Williamson         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
6376029a424SAlex Williamson             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
6386029a424SAlex Williamson 
6396029a424SAlex Williamson             vfio_pci_write_config(&vdev->pdev, offset, data, size);
6406029a424SAlex Williamson             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
6416029a424SAlex Williamson                                               offset, data, size);
642c00d61d8SAlex Williamson             return;
643c00d61d8SAlex Williamson         }
644c00d61d8SAlex Williamson     }
645c00d61d8SAlex Williamson 
6462d82f8a3SAlex Williamson     vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
6476029a424SAlex Williamson                    addr + 0x10, data, size);
648c00d61d8SAlex Williamson }
649c00d61d8SAlex Williamson 
650c00d61d8SAlex Williamson static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
651c00d61d8SAlex Williamson     .read = vfio_nvidia_3d0_quirk_read,
652c00d61d8SAlex Williamson     .write = vfio_nvidia_3d0_quirk_write,
653c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
654c00d61d8SAlex Williamson };
655c00d61d8SAlex Williamson 
656c00d61d8SAlex Williamson static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
657c00d61d8SAlex Williamson {
658c00d61d8SAlex Williamson     VFIOQuirk *quirk;
6596029a424SAlex Williamson     VFIONvidia3d0Quirk *data;
660c00d61d8SAlex Williamson 
661db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
662db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
663c00d61d8SAlex Williamson         !vdev->bars[1].region.size) {
664c00d61d8SAlex Williamson         return;
665c00d61d8SAlex Williamson     }
666c00d61d8SAlex Williamson 
667bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
6686029a424SAlex Williamson     quirk->data = data = g_malloc0(sizeof(*data));
6696029a424SAlex Williamson     data->vdev = vdev;
670c00d61d8SAlex Williamson 
6716029a424SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
6726029a424SAlex Williamson                           data, "vfio-nvidia-3d4-quirk", 2);
6732d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6746029a424SAlex Williamson                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
6756029a424SAlex Williamson 
6766029a424SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
6776029a424SAlex Williamson                           data, "vfio-nvidia-3d0-quirk", 2);
6782d82f8a3SAlex Williamson     memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
6796029a424SAlex Williamson                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
680c00d61d8SAlex Williamson 
6812d82f8a3SAlex Williamson     QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
682c00d61d8SAlex Williamson                       quirk, next);
683c00d61d8SAlex Williamson 
6846029a424SAlex Williamson     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
685c00d61d8SAlex Williamson }
686c00d61d8SAlex Williamson 
687c00d61d8SAlex Williamson /*
688c00d61d8SAlex Williamson  * The second quirk is documented in envytools.  The I/O port BAR5 is just
689c00d61d8SAlex Williamson  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
690c00d61d8SAlex Williamson  * again BAR0.  This backdoor is apparently a bit newer than the one above
691c00d61d8SAlex Williamson  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
692c00d61d8SAlex Williamson  * space, including extended space is available at the 4k @0x88000.
693c00d61d8SAlex Williamson  */
6940e54f24aSAlex Williamson typedef struct VFIONvidiaBAR5Quirk {
6950e54f24aSAlex Williamson     uint32_t master;
6960e54f24aSAlex Williamson     uint32_t enable;
6970e54f24aSAlex Williamson     MemoryRegion *addr_mem;
6980e54f24aSAlex Williamson     MemoryRegion *data_mem;
6990e54f24aSAlex Williamson     bool enabled;
7000e54f24aSAlex Williamson     VFIOConfigWindowQuirk window; /* last for match data */
7010e54f24aSAlex Williamson } VFIONvidiaBAR5Quirk;
702c00d61d8SAlex Williamson 
7030e54f24aSAlex Williamson static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
7040e54f24aSAlex Williamson {
7050e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7060e54f24aSAlex Williamson 
7070e54f24aSAlex Williamson     if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
7080e54f24aSAlex Williamson         return;
7090e54f24aSAlex Williamson     }
7100e54f24aSAlex Williamson 
7110e54f24aSAlex Williamson     bar5->enabled = !bar5->enabled;
7120e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
7130e54f24aSAlex Williamson                                        bar5->enabled ?  "Enable" : "Disable");
7140e54f24aSAlex Williamson     memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
7150e54f24aSAlex Williamson     memory_region_set_enabled(bar5->data_mem, bar5->enabled);
7160e54f24aSAlex Williamson }
7170e54f24aSAlex Williamson 
7180e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
7190e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
7200e54f24aSAlex Williamson {
7210e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7220e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7230e54f24aSAlex Williamson 
7240e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr, size);
7250e54f24aSAlex Williamson }
7260e54f24aSAlex Williamson 
7270e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
728c00d61d8SAlex Williamson                                                 uint64_t data, unsigned size)
729c00d61d8SAlex Williamson {
7300e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7310e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
732c00d61d8SAlex Williamson 
7330e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr, data, size);
7340e54f24aSAlex Williamson 
7350e54f24aSAlex Williamson     bar5->master = data;
7360e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
737c00d61d8SAlex Williamson }
738c00d61d8SAlex Williamson 
7390e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
7400e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_master_read,
7410e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_master_write,
742c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
743c00d61d8SAlex Williamson };
744c00d61d8SAlex Williamson 
7450e54f24aSAlex Williamson static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
7460e54f24aSAlex Williamson                                                    hwaddr addr, unsigned size)
747c00d61d8SAlex Williamson {
7480e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7490e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
750c00d61d8SAlex Williamson 
7510e54f24aSAlex Williamson     return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
7520e54f24aSAlex Williamson }
7530e54f24aSAlex Williamson 
7540e54f24aSAlex Williamson static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
7550e54f24aSAlex Williamson                                                 uint64_t data, unsigned size)
7560e54f24aSAlex Williamson {
7570e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5 = opaque;
7580e54f24aSAlex Williamson     VFIOPCIDevice *vdev = bar5->window.vdev;
7590e54f24aSAlex Williamson 
7600e54f24aSAlex Williamson     vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
7610e54f24aSAlex Williamson 
7620e54f24aSAlex Williamson     bar5->enable = data;
7630e54f24aSAlex Williamson     vfio_nvidia_bar5_enable(bar5);
7640e54f24aSAlex Williamson }
7650e54f24aSAlex Williamson 
7660e54f24aSAlex Williamson static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
7670e54f24aSAlex Williamson     .read = vfio_nvidia_bar5_quirk_enable_read,
7680e54f24aSAlex Williamson     .write = vfio_nvidia_bar5_quirk_enable_write,
7690e54f24aSAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
7700e54f24aSAlex Williamson };
7710e54f24aSAlex Williamson 
7720e54f24aSAlex Williamson static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
7730e54f24aSAlex Williamson {
7740e54f24aSAlex Williamson     VFIOQuirk *quirk;
7750e54f24aSAlex Williamson     VFIONvidiaBAR5Quirk *bar5;
7760e54f24aSAlex Williamson     VFIOConfigWindowQuirk *window;
7770e54f24aSAlex Williamson 
778db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
779db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
7808f419c5bSAlex Williamson         !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
781c00d61d8SAlex Williamson         return;
782c00d61d8SAlex Williamson     }
783c00d61d8SAlex Williamson 
784bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(4);
7850e54f24aSAlex Williamson     bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
7860e54f24aSAlex Williamson                                    (sizeof(VFIOConfigWindowMatch) * 2));
7870e54f24aSAlex Williamson     window = &bar5->window;
788c00d61d8SAlex Williamson 
7890e54f24aSAlex Williamson     window->vdev = vdev;
7900e54f24aSAlex Williamson     window->address_offset = 0x8;
7910e54f24aSAlex Williamson     window->data_offset = 0xc;
7920e54f24aSAlex Williamson     window->nr_matches = 2;
7930e54f24aSAlex Williamson     window->matches[0].match = 0x1800;
7940e54f24aSAlex Williamson     window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
7950e54f24aSAlex Williamson     window->matches[1].match = 0x88000;
796f5793fd9SAlex Williamson     window->matches[1].mask = vdev->config_size - 1;
7970e54f24aSAlex Williamson     window->bar = nr;
7980e54f24aSAlex Williamson     window->addr_mem = bar5->addr_mem = &quirk->mem[0];
7990e54f24aSAlex Williamson     window->data_mem = bar5->data_mem = &quirk->mem[1];
8000e54f24aSAlex Williamson 
8010e54f24aSAlex Williamson     memory_region_init_io(window->addr_mem, OBJECT(vdev),
8020e54f24aSAlex Williamson                           &vfio_generic_window_address_quirk, window,
8030e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-address-quirk", 4);
804db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8050e54f24aSAlex Williamson                                         window->address_offset,
8060e54f24aSAlex Williamson                                         window->addr_mem, 1);
8070e54f24aSAlex Williamson     memory_region_set_enabled(window->addr_mem, false);
8080e54f24aSAlex Williamson 
8090e54f24aSAlex Williamson     memory_region_init_io(window->data_mem, OBJECT(vdev),
8100e54f24aSAlex Williamson                           &vfio_generic_window_data_quirk, window,
8110e54f24aSAlex Williamson                           "vfio-nvidia-bar5-window-data-quirk", 4);
812db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8130e54f24aSAlex Williamson                                         window->data_offset,
8140e54f24aSAlex Williamson                                         window->data_mem, 1);
8150e54f24aSAlex Williamson     memory_region_set_enabled(window->data_mem, false);
8160e54f24aSAlex Williamson 
8170e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
8180e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_master, bar5,
8190e54f24aSAlex Williamson                           "vfio-nvidia-bar5-master-quirk", 4);
820db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8210e54f24aSAlex Williamson                                         0, &quirk->mem[2], 1);
8220e54f24aSAlex Williamson 
8230e54f24aSAlex Williamson     memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
8240e54f24aSAlex Williamson                           &vfio_nvidia_bar5_quirk_enable, bar5,
8250e54f24aSAlex Williamson                           "vfio-nvidia-bar5-enable-quirk", 4);
826db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
8270e54f24aSAlex Williamson                                         4, &quirk->mem[3], 1);
828c00d61d8SAlex Williamson 
829c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
830c00d61d8SAlex Williamson 
8310e54f24aSAlex Williamson     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
832c00d61d8SAlex Williamson }
833c00d61d8SAlex Williamson 
834c958c51dSAlex Williamson typedef struct LastDataSet {
835c958c51dSAlex Williamson     VFIOQuirk *quirk;
836c958c51dSAlex Williamson     hwaddr addr;
837c958c51dSAlex Williamson     uint64_t data;
838c958c51dSAlex Williamson     unsigned size;
839c958c51dSAlex Williamson     int hits;
840c958c51dSAlex Williamson     int added;
841c958c51dSAlex Williamson } LastDataSet;
842c958c51dSAlex Williamson 
843c958c51dSAlex Williamson #define MAX_DYN_IOEVENTFD 10
844c958c51dSAlex Williamson #define HITS_FOR_IOEVENTFD 10
845c958c51dSAlex Williamson 
8460d38fb1cSAlex Williamson /*
8470d38fb1cSAlex Williamson  * Finally, BAR0 itself.  We want to redirect any accesses to either
8480d38fb1cSAlex Williamson  * 0x1800 or 0x88000 through the PCI config space access functions.
8490d38fb1cSAlex Williamson  */
8500d38fb1cSAlex Williamson static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
851c00d61d8SAlex Williamson                                            uint64_t data, unsigned size)
852c00d61d8SAlex Williamson {
8530d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror = opaque;
8540d38fb1cSAlex Williamson     VFIOPCIDevice *vdev = mirror->vdev;
855c00d61d8SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
856c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
857c00d61d8SAlex Williamson 
8580d38fb1cSAlex Williamson     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
859c00d61d8SAlex Williamson 
860c00d61d8SAlex Williamson     /*
861c00d61d8SAlex Williamson      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
862c00d61d8SAlex Williamson      * MSI capability ID register.  Both the ID and next register are
863c00d61d8SAlex Williamson      * read-only, so we allow writes covering either of those to real hw.
864c00d61d8SAlex Williamson      */
865c00d61d8SAlex Williamson     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
866c00d61d8SAlex Williamson         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
8670d38fb1cSAlex Williamson         vfio_region_write(&vdev->bars[mirror->bar].region,
8680d38fb1cSAlex Williamson                           addr + mirror->offset, data, size);
8690d38fb1cSAlex Williamson         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
870c00d61d8SAlex Williamson     }
871c958c51dSAlex Williamson 
872c958c51dSAlex Williamson     /*
873c958c51dSAlex Williamson      * Automatically add an ioeventfd to handle any repeated write with the
874c958c51dSAlex Williamson      * same data and size above the standard PCI config space header.  This is
875c958c51dSAlex Williamson      * primarily expected to accelerate the MSI-ACK behavior, such as noted
876c958c51dSAlex Williamson      * above.  Current hardware/drivers should trigger an ioeventfd at config
877c958c51dSAlex Williamson      * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
878c958c51dSAlex Williamson      *
879c958c51dSAlex Williamson      * The criteria of 10 successive hits is arbitrary but reliably adds the
880c958c51dSAlex Williamson      * MSI-ACK region.  Note that as some writes are bypassed via the ioeventfd,
881c958c51dSAlex Williamson      * the remaining ones have a greater chance of being seen successively.
882c958c51dSAlex Williamson      * To avoid the pathological case of burning up all of QEMU's open file
883c958c51dSAlex Williamson      * handles, arbitrarily limit this algorithm from adding no more than 10
884c958c51dSAlex Williamson      * ioeventfds, print an error if we would have added an 11th, and then
885c958c51dSAlex Williamson      * stop counting.
886c958c51dSAlex Williamson      */
887c958c51dSAlex Williamson     if (!vdev->no_kvm_ioeventfd &&
888c958c51dSAlex Williamson         addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
889c958c51dSAlex Williamson         if (addr != last->addr || data != last->data || size != last->size) {
890c958c51dSAlex Williamson             last->addr = addr;
891c958c51dSAlex Williamson             last->data = data;
892c958c51dSAlex Williamson             last->size = size;
893c958c51dSAlex Williamson             last->hits = 1;
894c958c51dSAlex Williamson         } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
895c958c51dSAlex Williamson             if (last->added < MAX_DYN_IOEVENTFD) {
896c958c51dSAlex Williamson                 VFIOIOEventFD *ioeventfd;
897c958c51dSAlex Williamson                 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
898c958c51dSAlex Williamson                                         data, &vdev->bars[mirror->bar].region,
899c958c51dSAlex Williamson                                         mirror->offset + addr, true);
900c958c51dSAlex Williamson                 if (ioeventfd) {
901c958c51dSAlex Williamson                     VFIOQuirk *quirk = last->quirk;
902c958c51dSAlex Williamson 
903c958c51dSAlex Williamson                     QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
904c958c51dSAlex Williamson                     last->added++;
905c958c51dSAlex Williamson                 }
906c958c51dSAlex Williamson             } else {
907c958c51dSAlex Williamson                 last->added++;
908c958c51dSAlex Williamson                 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
909c958c51dSAlex Williamson                             "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
910c958c51dSAlex Williamson                             "size %u", vdev->vbasedev.name, addr, data, size);
911c958c51dSAlex Williamson             }
912c958c51dSAlex Williamson         }
913c958c51dSAlex Williamson     }
914c00d61d8SAlex Williamson }
915c00d61d8SAlex Williamson 
9160d38fb1cSAlex Williamson static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
9170d38fb1cSAlex Williamson     .read = vfio_generic_quirk_mirror_read,
9180d38fb1cSAlex Williamson     .write = vfio_nvidia_quirk_mirror_write,
919c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
920c00d61d8SAlex Williamson };
921c00d61d8SAlex Williamson 
922c958c51dSAlex Williamson static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
923c958c51dSAlex Williamson {
924c958c51dSAlex Williamson     VFIOConfigMirrorQuirk *mirror = quirk->data;
925c958c51dSAlex Williamson     LastDataSet *last = (LastDataSet *)&mirror->data;
926c958c51dSAlex Williamson 
927c958c51dSAlex Williamson     last->addr = last->data = last->size = last->hits = last->added = 0;
928c958c51dSAlex Williamson 
929c958c51dSAlex Williamson     vfio_drop_dynamic_eventfds(vdev, quirk);
930c958c51dSAlex Williamson }
931c958c51dSAlex Williamson 
9320d38fb1cSAlex Williamson static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
933c00d61d8SAlex Williamson {
934c00d61d8SAlex Williamson     VFIOQuirk *quirk;
9350d38fb1cSAlex Williamson     VFIOConfigMirrorQuirk *mirror;
936c958c51dSAlex Williamson     LastDataSet *last;
937c00d61d8SAlex Williamson 
938db32d0f4SAlex Williamson     if (vdev->no_geforce_quirks ||
939db32d0f4SAlex Williamson         !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
9400d38fb1cSAlex Williamson         !vfio_is_vga(vdev) || nr != 0) {
941c00d61d8SAlex Williamson         return;
942c00d61d8SAlex Williamson     }
943c00d61d8SAlex Williamson 
944bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(1);
945c958c51dSAlex Williamson     quirk->reset = vfio_nvidia_bar0_quirk_reset;
946c958c51dSAlex Williamson     mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
947bcf3c3d0SAlex Williamson     mirror->mem = quirk->mem;
9480d38fb1cSAlex Williamson     mirror->vdev = vdev;
9490d38fb1cSAlex Williamson     mirror->offset = 0x88000;
9500d38fb1cSAlex Williamson     mirror->bar = nr;
951c958c51dSAlex Williamson     last = (LastDataSet *)&mirror->data;
952c958c51dSAlex Williamson     last->quirk = quirk;
953c00d61d8SAlex Williamson 
9540d38fb1cSAlex Williamson     memory_region_init_io(mirror->mem, OBJECT(vdev),
9550d38fb1cSAlex Williamson                           &vfio_nvidia_mirror_quirk, mirror,
9560d38fb1cSAlex Williamson                           "vfio-nvidia-bar0-88000-mirror-quirk",
957f5793fd9SAlex Williamson                           vdev->config_size);
958db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9590d38fb1cSAlex Williamson                                         mirror->offset, mirror->mem, 1);
960c00d61d8SAlex Williamson 
961c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
962c00d61d8SAlex Williamson 
9630d38fb1cSAlex Williamson     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
9644d3fc4fdSAlex Williamson     if (vdev->vga) {
965bcf3c3d0SAlex Williamson         quirk = vfio_quirk_alloc(1);
966c958c51dSAlex Williamson         quirk->reset = vfio_nvidia_bar0_quirk_reset;
967c958c51dSAlex Williamson         mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
968bcf3c3d0SAlex Williamson         mirror->mem = quirk->mem;
9690d38fb1cSAlex Williamson         mirror->vdev = vdev;
9700d38fb1cSAlex Williamson         mirror->offset = 0x1800;
9710d38fb1cSAlex Williamson         mirror->bar = nr;
972c958c51dSAlex Williamson         last = (LastDataSet *)&mirror->data;
973c958c51dSAlex Williamson         last->quirk = quirk;
974c00d61d8SAlex Williamson 
9750d38fb1cSAlex Williamson         memory_region_init_io(mirror->mem, OBJECT(vdev),
9760d38fb1cSAlex Williamson                               &vfio_nvidia_mirror_quirk, mirror,
9770d38fb1cSAlex Williamson                               "vfio-nvidia-bar0-1800-mirror-quirk",
9780d38fb1cSAlex Williamson                               PCI_CONFIG_SPACE_SIZE);
979db0da029SAlex Williamson         memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
9800d38fb1cSAlex Williamson                                             mirror->offset, mirror->mem, 1);
981c00d61d8SAlex Williamson 
982c00d61d8SAlex Williamson         QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
9830d38fb1cSAlex Williamson     }
984c00d61d8SAlex Williamson 
9850d38fb1cSAlex Williamson     trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
986c00d61d8SAlex Williamson }
987c00d61d8SAlex Williamson 
988c00d61d8SAlex Williamson /*
989c00d61d8SAlex Williamson  * TODO - Some Nvidia devices provide config access to their companion HDA
990c00d61d8SAlex Williamson  * device and even to their parent bridge via these config space mirrors.
991c00d61d8SAlex Williamson  * Add quirks for those regions.
992c00d61d8SAlex Williamson  */
993c00d61d8SAlex Williamson 
994c00d61d8SAlex Williamson #define PCI_VENDOR_ID_REALTEK 0x10ec
995c00d61d8SAlex Williamson 
996c00d61d8SAlex Williamson /*
997c00d61d8SAlex Williamson  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
998c00d61d8SAlex Williamson  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
999c00d61d8SAlex Williamson  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
1000c00d61d8SAlex Williamson  * when the "type" portion of the address register is set to 0x1.  This appears
1001c00d61d8SAlex Williamson  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
1002c00d61d8SAlex Williamson  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
1003c00d61d8SAlex Williamson  * ignore because the MSI-X table should always be accessed as a dword (full
1004c00d61d8SAlex Williamson  * mask).  Bits 0:11 is offset within the type.
1005c00d61d8SAlex Williamson  *
1006c00d61d8SAlex Williamson  * Example trace:
1007c00d61d8SAlex Williamson  *
1008c00d61d8SAlex Williamson  * Read from MSI-X table offset 0
1009c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
1010c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
1011c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
1012c00d61d8SAlex Williamson  *
1013c00d61d8SAlex Williamson  * Write 0xfee00000 to MSI-X table offset 0
1014c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
1015c00d61d8SAlex Williamson  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
1016c00d61d8SAlex Williamson  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
1017c00d61d8SAlex Williamson  */
1018954258a5SAlex Williamson typedef struct VFIOrtl8168Quirk {
1019954258a5SAlex Williamson     VFIOPCIDevice *vdev;
1020954258a5SAlex Williamson     uint32_t addr;
1021954258a5SAlex Williamson     uint32_t data;
1022954258a5SAlex Williamson     bool enabled;
1023954258a5SAlex Williamson } VFIOrtl8168Quirk;
1024954258a5SAlex Williamson 
1025954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1026c00d61d8SAlex Williamson                                                 hwaddr addr, unsigned size)
1027c00d61d8SAlex Williamson {
1028954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1029954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1030954258a5SAlex Williamson     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1031c00d61d8SAlex Williamson 
1032954258a5SAlex Williamson     if (rtl->enabled) {
1033954258a5SAlex Williamson         data = rtl->addr ^ 0x80000000U; /* latch/complete */
1034954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1035c00d61d8SAlex Williamson     }
1036c00d61d8SAlex Williamson 
1037954258a5SAlex Williamson     return data;
1038c00d61d8SAlex Williamson }
1039c00d61d8SAlex Williamson 
1040954258a5SAlex Williamson static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1041c00d61d8SAlex Williamson                                              uint64_t data, unsigned size)
1042c00d61d8SAlex Williamson {
1043954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1044954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1045c00d61d8SAlex Williamson 
1046954258a5SAlex Williamson     rtl->enabled = false;
1047954258a5SAlex Williamson 
1048c00d61d8SAlex Williamson     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1049954258a5SAlex Williamson         rtl->enabled = true;
1050954258a5SAlex Williamson         rtl->addr = (uint32_t)data;
1051c00d61d8SAlex Williamson 
1052c00d61d8SAlex Williamson         if (data & 0x80000000U) { /* Do write */
1053c00d61d8SAlex Williamson             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1054c00d61d8SAlex Williamson                 hwaddr offset = data & 0xfff;
1055954258a5SAlex Williamson                 uint64_t val = rtl->data;
1056c00d61d8SAlex Williamson 
1057954258a5SAlex Williamson                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1058c00d61d8SAlex Williamson                                                     (uint16_t)offset, val);
1059c00d61d8SAlex Williamson 
1060c00d61d8SAlex Williamson                 /* Write to the proper guest MSI-X table instead */
1061c00d61d8SAlex Williamson                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1062d5d680caSTony Nguyen                                              offset, val,
1063d5d680caSTony Nguyen                                              size_memop(size) | MO_LE,
1064c00d61d8SAlex Williamson                                              MEMTXATTRS_UNSPECIFIED);
1065c00d61d8SAlex Williamson             }
1066c00d61d8SAlex Williamson             return; /* Do not write guest MSI-X data to hardware */
1067c00d61d8SAlex Williamson         }
1068c00d61d8SAlex Williamson     }
1069c00d61d8SAlex Williamson 
1070954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1071c00d61d8SAlex Williamson }
1072c00d61d8SAlex Williamson 
1073954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_address_quirk = {
1074954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_address_read,
1075954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_address_write,
1076c00d61d8SAlex Williamson     .valid = {
1077c00d61d8SAlex Williamson         .min_access_size = 4,
1078c00d61d8SAlex Williamson         .max_access_size = 4,
1079c00d61d8SAlex Williamson         .unaligned = false,
1080c00d61d8SAlex Williamson     },
1081c00d61d8SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1082c00d61d8SAlex Williamson };
1083c00d61d8SAlex Williamson 
1084954258a5SAlex Williamson static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1085954258a5SAlex Williamson                                              hwaddr addr, unsigned size)
1086c00d61d8SAlex Williamson {
1087954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1088954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
108931e6a7b1SThorsten Kohfeldt     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1090c00d61d8SAlex Williamson 
1091954258a5SAlex Williamson     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1092954258a5SAlex Williamson         hwaddr offset = rtl->addr & 0xfff;
1093954258a5SAlex Williamson         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1094d5d680caSTony Nguyen                                     &data, size_memop(size) | MO_LE,
1095475fbf0aSTony Nguyen                                     MEMTXATTRS_UNSPECIFIED);
1096954258a5SAlex Williamson         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1097954258a5SAlex Williamson     }
1098954258a5SAlex Williamson 
1099954258a5SAlex Williamson     return data;
1100954258a5SAlex Williamson }
1101954258a5SAlex Williamson 
1102954258a5SAlex Williamson static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1103954258a5SAlex Williamson                                           uint64_t data, unsigned size)
1104954258a5SAlex Williamson {
1105954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl = opaque;
1106954258a5SAlex Williamson     VFIOPCIDevice *vdev = rtl->vdev;
1107954258a5SAlex Williamson 
1108954258a5SAlex Williamson     rtl->data = (uint32_t)data;
1109954258a5SAlex Williamson 
1110954258a5SAlex Williamson     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1111954258a5SAlex Williamson }
1112954258a5SAlex Williamson 
1113954258a5SAlex Williamson static const MemoryRegionOps vfio_rtl_data_quirk = {
1114954258a5SAlex Williamson     .read = vfio_rtl8168_quirk_data_read,
1115954258a5SAlex Williamson     .write = vfio_rtl8168_quirk_data_write,
1116954258a5SAlex Williamson     .valid = {
1117954258a5SAlex Williamson         .min_access_size = 4,
1118954258a5SAlex Williamson         .max_access_size = 4,
1119954258a5SAlex Williamson         .unaligned = false,
1120954258a5SAlex Williamson     },
1121954258a5SAlex Williamson     .endianness = DEVICE_LITTLE_ENDIAN,
1122954258a5SAlex Williamson };
1123954258a5SAlex Williamson 
1124954258a5SAlex Williamson static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1125954258a5SAlex Williamson {
1126954258a5SAlex Williamson     VFIOQuirk *quirk;
1127954258a5SAlex Williamson     VFIOrtl8168Quirk *rtl;
1128954258a5SAlex Williamson 
1129954258a5SAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1130c00d61d8SAlex Williamson         return;
1131c00d61d8SAlex Williamson     }
1132c00d61d8SAlex Williamson 
1133bcf3c3d0SAlex Williamson     quirk = vfio_quirk_alloc(2);
1134954258a5SAlex Williamson     quirk->data = rtl = g_malloc0(sizeof(*rtl));
1135954258a5SAlex Williamson     rtl->vdev = vdev;
1136c00d61d8SAlex Williamson 
1137954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1138954258a5SAlex Williamson                           &vfio_rtl_address_quirk, rtl,
1139954258a5SAlex Williamson                           "vfio-rtl8168-window-address-quirk", 4);
1140db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1141954258a5SAlex Williamson                                         0x74, &quirk->mem[0], 1);
1142954258a5SAlex Williamson 
1143954258a5SAlex Williamson     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1144954258a5SAlex Williamson                           &vfio_rtl_data_quirk, rtl,
1145954258a5SAlex Williamson                           "vfio-rtl8168-window-data-quirk", 4);
1146db0da029SAlex Williamson     memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1147954258a5SAlex Williamson                                         0x70, &quirk->mem[1], 1);
1148c00d61d8SAlex Williamson 
1149c00d61d8SAlex Williamson     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1150c00d61d8SAlex Williamson 
1151954258a5SAlex Williamson     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1152c00d61d8SAlex Williamson }
1153c00d61d8SAlex Williamson 
1154c4c45e94SAlex Williamson #define IGD_ASLS 0xfc /* ASL Storage Register */
1155c4c45e94SAlex Williamson 
1156c4c45e94SAlex Williamson /*
1157c4c45e94SAlex Williamson  * The OpRegion includes the Video BIOS Table, which seems important for
1158c4c45e94SAlex Williamson  * telling the driver what sort of outputs it has.  Without this, the device
1159c4c45e94SAlex Williamson  * may work in the guest, but we may not get output.  This also requires BIOS
1160c4c45e94SAlex Williamson  * support to reserve and populate a section of guest memory sufficient for
1161c4c45e94SAlex Williamson  * the table and to write the base address of that memory to the ASLS register
1162c4c45e94SAlex Williamson  * of the IGD device.
1163c4c45e94SAlex Williamson  */
11646ced0bbaSAlex Williamson int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
11657237011dSEric Auger                                struct vfio_region_info *info, Error **errp)
1166c4c45e94SAlex Williamson {
1167c4c45e94SAlex Williamson     int ret;
1168c4c45e94SAlex Williamson 
1169c4c45e94SAlex Williamson     vdev->igd_opregion = g_malloc0(info->size);
1170c4c45e94SAlex Williamson     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1171c4c45e94SAlex Williamson                 info->size, info->offset);
1172c4c45e94SAlex Williamson     if (ret != info->size) {
11737237011dSEric Auger         error_setg(errp, "failed to read IGD OpRegion");
1174c4c45e94SAlex Williamson         g_free(vdev->igd_opregion);
1175c4c45e94SAlex Williamson         vdev->igd_opregion = NULL;
1176c4c45e94SAlex Williamson         return -EINVAL;
1177c4c45e94SAlex Williamson     }
1178c4c45e94SAlex Williamson 
1179c4c45e94SAlex Williamson     /*
1180c4c45e94SAlex Williamson      * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
1181c4c45e94SAlex Williamson      * allocate 32bit reserved memory for, copy these contents into, and write
1182c4c45e94SAlex Williamson      * the reserved memory base address to the device ASLS register at 0xFC.
1183c4c45e94SAlex Williamson      * Alignment of this reserved region seems flexible, but using a 4k page
1184c4c45e94SAlex Williamson      * alignment seems to work well.  This interface assumes a single IGD
1185c4c45e94SAlex Williamson      * device, which may be at VM address 00:02.0 in legacy mode or another
1186c4c45e94SAlex Williamson      * address in UPT mode.
1187c4c45e94SAlex Williamson      *
1188c4c45e94SAlex Williamson      * NB, there may be future use cases discovered where the VM should have
1189c4c45e94SAlex Williamson      * direct interaction with the host OpRegion, in which case the write to
1190c4c45e94SAlex Williamson      * the ASLS register would trigger MemoryRegion setup to enable that.
1191c4c45e94SAlex Williamson      */
1192c4c45e94SAlex Williamson     fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1193c4c45e94SAlex Williamson                     vdev->igd_opregion, info->size);
1194c4c45e94SAlex Williamson 
1195c4c45e94SAlex Williamson     trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1196c4c45e94SAlex Williamson 
1197c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1198c4c45e94SAlex Williamson     pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1199c4c45e94SAlex Williamson     pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1200c4c45e94SAlex Williamson 
1201c4c45e94SAlex Williamson     return 0;
1202c4c45e94SAlex Williamson }
1203c4c45e94SAlex Williamson 
1204c4c45e94SAlex Williamson /*
1205c00d61d8SAlex Williamson  * Common quirk probe entry points.
1206c00d61d8SAlex Williamson  */
1207c00d61d8SAlex Williamson void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1208c00d61d8SAlex Williamson {
1209c00d61d8SAlex Williamson     vfio_vga_probe_ati_3c3_quirk(vdev);
1210c00d61d8SAlex Williamson     vfio_vga_probe_nvidia_3d0_quirk(vdev);
1211c00d61d8SAlex Williamson }
1212c00d61d8SAlex Williamson 
12132d82f8a3SAlex Williamson void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1214c00d61d8SAlex Williamson {
1215c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12168c4f2348SAlex Williamson     int i, j;
1217c00d61d8SAlex Williamson 
12182d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12192d82f8a3SAlex Williamson         QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
12208c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12212d82f8a3SAlex Williamson                 memory_region_del_subregion(&vdev->vga->region[i].mem,
12228c4f2348SAlex Williamson                                             &quirk->mem[j]);
12238c4f2348SAlex Williamson             }
1224c00d61d8SAlex Williamson         }
1225c00d61d8SAlex Williamson     }
1226c00d61d8SAlex Williamson }
1227c00d61d8SAlex Williamson 
12282d82f8a3SAlex Williamson void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1229c00d61d8SAlex Williamson {
12308c4f2348SAlex Williamson     int i, j;
1231c00d61d8SAlex Williamson 
12322d82f8a3SAlex Williamson     for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
12332d82f8a3SAlex Williamson         while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
12342d82f8a3SAlex Williamson             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1235c00d61d8SAlex Williamson             QLIST_REMOVE(quirk, next);
12368c4f2348SAlex Williamson             for (j = 0; j < quirk->nr_mem; j++) {
12378c4f2348SAlex Williamson                 object_unparent(OBJECT(&quirk->mem[j]));
12388c4f2348SAlex Williamson             }
12398c4f2348SAlex Williamson             g_free(quirk->mem);
12408c4f2348SAlex Williamson             g_free(quirk->data);
1241c00d61d8SAlex Williamson             g_free(quirk);
1242c00d61d8SAlex Williamson         }
1243c00d61d8SAlex Williamson     }
1244c00d61d8SAlex Williamson }
1245c00d61d8SAlex Williamson 
1246c00d61d8SAlex Williamson void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1247c00d61d8SAlex Williamson {
12480e54f24aSAlex Williamson     vfio_probe_ati_bar4_quirk(vdev, nr);
12490d38fb1cSAlex Williamson     vfio_probe_ati_bar2_quirk(vdev, nr);
12500e54f24aSAlex Williamson     vfio_probe_nvidia_bar5_quirk(vdev, nr);
12510d38fb1cSAlex Williamson     vfio_probe_nvidia_bar0_quirk(vdev, nr);
1252954258a5SAlex Williamson     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
125329d62771SThomas Huth #ifdef CONFIG_VFIO_IGD
1254c4c45e94SAlex Williamson     vfio_probe_igd_bar4_quirk(vdev, nr);
125529d62771SThomas Huth #endif
1256c00d61d8SAlex Williamson }
1257c00d61d8SAlex Williamson 
12582d82f8a3SAlex Williamson void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1259c00d61d8SAlex Williamson {
1260c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
1261c00d61d8SAlex Williamson     VFIOQuirk *quirk;
12628c4f2348SAlex Williamson     int i;
1263c00d61d8SAlex Williamson 
1264c00d61d8SAlex Williamson     QLIST_FOREACH(quirk, &bar->quirks, next) {
1265c958c51dSAlex Williamson         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
12662b1dbd0dSAlex Williamson             vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1267c958c51dSAlex Williamson         }
1268c958c51dSAlex Williamson 
12698c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
1270db0da029SAlex Williamson             memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
12718c4f2348SAlex Williamson         }
1272c00d61d8SAlex Williamson     }
1273c00d61d8SAlex Williamson }
1274c00d61d8SAlex Williamson 
12752d82f8a3SAlex Williamson void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1276c00d61d8SAlex Williamson {
1277c00d61d8SAlex Williamson     VFIOBAR *bar = &vdev->bars[nr];
12788c4f2348SAlex Williamson     int i;
1279c00d61d8SAlex Williamson 
1280c00d61d8SAlex Williamson     while (!QLIST_EMPTY(&bar->quirks)) {
1281c00d61d8SAlex Williamson         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1282c00d61d8SAlex Williamson         QLIST_REMOVE(quirk, next);
12838c4f2348SAlex Williamson         for (i = 0; i < quirk->nr_mem; i++) {
12848c4f2348SAlex Williamson             object_unparent(OBJECT(&quirk->mem[i]));
12858c4f2348SAlex Williamson         }
12868c4f2348SAlex Williamson         g_free(quirk->mem);
12878c4f2348SAlex Williamson         g_free(quirk->data);
1288c00d61d8SAlex Williamson         g_free(quirk);
1289c00d61d8SAlex Williamson     }
1290c00d61d8SAlex Williamson }
1291c9c50009SAlex Williamson 
1292c9c50009SAlex Williamson /*
1293c9c50009SAlex Williamson  * Reset quirks
1294c9c50009SAlex Williamson  */
1295469d02deSAlex Williamson void vfio_quirk_reset(VFIOPCIDevice *vdev)
1296469d02deSAlex Williamson {
1297469d02deSAlex Williamson     int i;
1298469d02deSAlex Williamson 
1299469d02deSAlex Williamson     for (i = 0; i < PCI_ROM_SLOT; i++) {
1300469d02deSAlex Williamson         VFIOQuirk *quirk;
1301469d02deSAlex Williamson         VFIOBAR *bar = &vdev->bars[i];
1302469d02deSAlex Williamson 
1303469d02deSAlex Williamson         QLIST_FOREACH(quirk, &bar->quirks, next) {
1304469d02deSAlex Williamson             if (quirk->reset) {
1305469d02deSAlex Williamson                 quirk->reset(vdev, quirk);
1306469d02deSAlex Williamson             }
1307469d02deSAlex Williamson         }
1308469d02deSAlex Williamson     }
1309469d02deSAlex Williamson }
1310c9c50009SAlex Williamson 
1311c9c50009SAlex Williamson /*
1312c9c50009SAlex Williamson  * AMD Radeon PCI config reset, based on Linux:
1313c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1314c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1315c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1316c9c50009SAlex Williamson  *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1317c9c50009SAlex Williamson  * IDs: include/drm/drm_pciids.h
1318c9c50009SAlex Williamson  * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1319c9c50009SAlex Williamson  *
1320c9c50009SAlex Williamson  * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
1321c9c50009SAlex Williamson  * hardware that should be fixed on future ASICs.  The symptom of this is that
1322c9c50009SAlex Williamson  * once the accerlated driver loads, Windows guests will bsod on subsequent
1323c9c50009SAlex Williamson  * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
1324c9c50009SAlex Williamson  * work around this, we do an AMD specific PCI config reset, followed by an SMC
1325c9c50009SAlex Williamson  * reset.  The PCI config reset only works if SMC firmware is running, so we
1326c9c50009SAlex Williamson  * have a dependency on the state of the device as to whether this reset will
1327c9c50009SAlex Williamson  * be effective.  There are still cases where we won't be able to kick the
1328c9c50009SAlex Williamson  * device into working, but this greatly improves the usability overall.  The
1329c9c50009SAlex Williamson  * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1330c9c50009SAlex Williamson  * poking is largely ASIC specific.
1331c9c50009SAlex Williamson  */
1332c9c50009SAlex Williamson static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1333c9c50009SAlex Williamson {
1334c9c50009SAlex Williamson     uint32_t clk, pc_c;
1335c9c50009SAlex Williamson 
1336c9c50009SAlex Williamson     /*
1337c9c50009SAlex Williamson      * Registers 200h and 204h are index and data registers for accessing
1338c9c50009SAlex Williamson      * indirect configuration registers within the device.
1339c9c50009SAlex Williamson      */
1340c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1341c9c50009SAlex Williamson     clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1342c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1343c9c50009SAlex Williamson     pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1344c9c50009SAlex Williamson 
1345c9c50009SAlex Williamson     return (!(clk & 1) && (0x20100 <= pc_c));
1346c9c50009SAlex Williamson }
1347c9c50009SAlex Williamson 
1348c9c50009SAlex Williamson /*
1349c9c50009SAlex Williamson  * The scope of a config reset is controlled by a mode bit in the misc register
1350c9c50009SAlex Williamson  * and a fuse, exposed as a bit in another register.  The fuse is the default
1351c9c50009SAlex Williamson  * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1352c9c50009SAlex Williamson  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1353c9c50009SAlex Williamson  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
1354c9c50009SAlex Williamson  * to flip the value of the bit in the misc register.
1355c9c50009SAlex Williamson  */
1356c9c50009SAlex Williamson static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1357c9c50009SAlex Williamson {
1358c9c50009SAlex Williamson     uint32_t misc, fuse;
1359c9c50009SAlex Williamson     bool a, b;
1360c9c50009SAlex Williamson 
1361c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1362c9c50009SAlex Williamson     fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1363c9c50009SAlex Williamson     b = fuse & 64;
1364c9c50009SAlex Williamson 
1365c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1366c9c50009SAlex Williamson     misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1367c9c50009SAlex Williamson     a = misc & 2;
1368c9c50009SAlex Williamson 
1369c9c50009SAlex Williamson     if (a == b) {
1370c9c50009SAlex Williamson         vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1371c9c50009SAlex Williamson         vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1372c9c50009SAlex Williamson     }
1373c9c50009SAlex Williamson }
1374c9c50009SAlex Williamson 
1375c9c50009SAlex Williamson static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1376c9c50009SAlex Williamson {
1377c9c50009SAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1378c9c50009SAlex Williamson     int i, ret = 0;
1379c9c50009SAlex Williamson     uint32_t data;
1380c9c50009SAlex Williamson 
1381c9c50009SAlex Williamson     /* Defer to a kernel implemented reset */
1382c9c50009SAlex Williamson     if (vdev->vbasedev.reset_works) {
1383c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1384c9c50009SAlex Williamson         return -ENODEV;
1385c9c50009SAlex Williamson     }
1386c9c50009SAlex Williamson 
1387c9c50009SAlex Williamson     /* Enable only memory BAR access */
1388c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1389c9c50009SAlex Williamson 
1390c9c50009SAlex Williamson     /* Reset only works if SMC firmware is loaded and running */
1391c9c50009SAlex Williamson     if (!vfio_radeon_smc_is_running(vdev)) {
1392c9c50009SAlex Williamson         ret = -EINVAL;
1393c9c50009SAlex Williamson         trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1394c9c50009SAlex Williamson         goto out;
1395c9c50009SAlex Williamson     }
1396c9c50009SAlex Williamson 
1397c9c50009SAlex Williamson     /* Make sure only the GFX function is reset */
1398c9c50009SAlex Williamson     vfio_radeon_set_gfx_only_reset(vdev);
1399c9c50009SAlex Williamson 
1400c9c50009SAlex Williamson     /* AMD PCI config reset */
1401c9c50009SAlex Williamson     vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1402c9c50009SAlex Williamson     usleep(100);
1403c9c50009SAlex Williamson 
1404c9c50009SAlex Williamson     /* Read back the memory size to make sure we're out of reset */
1405c9c50009SAlex Williamson     for (i = 0; i < 100000; i++) {
1406c9c50009SAlex Williamson         if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1407c9c50009SAlex Williamson             goto reset_smc;
1408c9c50009SAlex Williamson         }
1409c9c50009SAlex Williamson         usleep(1);
1410c9c50009SAlex Williamson     }
1411c9c50009SAlex Williamson 
1412c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1413c9c50009SAlex Williamson 
1414c9c50009SAlex Williamson reset_smc:
1415c9c50009SAlex Williamson     /* Reset SMC */
1416c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1417c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1418c9c50009SAlex Williamson     data |= 1;
1419c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1420c9c50009SAlex Williamson 
1421c9c50009SAlex Williamson     /* Disable SMC clock */
1422c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1423c9c50009SAlex Williamson     data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1424c9c50009SAlex Williamson     data |= 1;
1425c9c50009SAlex Williamson     vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1426c9c50009SAlex Williamson 
1427c9c50009SAlex Williamson     trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1428c9c50009SAlex Williamson 
1429c9c50009SAlex Williamson out:
1430c9c50009SAlex Williamson     /* Restore PCI command register */
1431c9c50009SAlex Williamson     vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1432c9c50009SAlex Williamson 
1433c9c50009SAlex Williamson     return ret;
1434c9c50009SAlex Williamson }
1435c9c50009SAlex Williamson 
1436c9c50009SAlex Williamson void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1437c9c50009SAlex Williamson {
1438ff635e37SAlex Williamson     switch (vdev->vendor_id) {
1439c9c50009SAlex Williamson     case 0x1002:
1440ff635e37SAlex Williamson         switch (vdev->device_id) {
1441c9c50009SAlex Williamson         /* Bonaire */
1442c9c50009SAlex Williamson         case 0x6649: /* Bonaire [FirePro W5100] */
1443c9c50009SAlex Williamson         case 0x6650:
1444c9c50009SAlex Williamson         case 0x6651:
1445c9c50009SAlex Williamson         case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1446c9c50009SAlex Williamson         case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1447c9c50009SAlex Williamson         case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1448c9c50009SAlex Williamson         /* Hawaii */
1449c9c50009SAlex Williamson         case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1450c9c50009SAlex Williamson         case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1451c9c50009SAlex Williamson         case 0x67A2:
1452c9c50009SAlex Williamson         case 0x67A8:
1453c9c50009SAlex Williamson         case 0x67A9:
1454c9c50009SAlex Williamson         case 0x67AA:
1455c9c50009SAlex Williamson         case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1456c9c50009SAlex Williamson         case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1457c9c50009SAlex Williamson         case 0x67B8:
1458c9c50009SAlex Williamson         case 0x67B9:
1459c9c50009SAlex Williamson         case 0x67BA:
1460c9c50009SAlex Williamson         case 0x67BE:
1461c9c50009SAlex Williamson             vdev->resetfn = vfio_radeon_reset;
1462c9c50009SAlex Williamson             trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1463c9c50009SAlex Williamson             break;
1464c9c50009SAlex Williamson         }
1465c9c50009SAlex Williamson         break;
1466c9c50009SAlex Williamson     }
1467c9c50009SAlex Williamson }
1468dfbee78dSAlex Williamson 
1469dfbee78dSAlex Williamson /*
1470dfbee78dSAlex Williamson  * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
1471dfbee78dSAlex Williamson  * devices as a member of a clique.  Devices within the same clique ID
1472dfbee78dSAlex Williamson  * are capable of direct P2P.  It's the user's responsibility that this
1473dfbee78dSAlex Williamson  * is correct.  The spec says that this may reside at any unused config
1474dfbee78dSAlex Williamson  * offset, but reserves and recommends hypervisors place this at C8h.
1475dfbee78dSAlex Williamson  * The spec also states that the hypervisor should place this capability
1476dfbee78dSAlex Williamson  * at the end of the capability list, thus next is defined as 0h.
1477dfbee78dSAlex Williamson  *
1478dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1479dfbee78dSAlex Williamson  * | sig 7:0 ('P')  |  vndr len (8h) |    next (0h)   |   cap id (9h)  |
1480dfbee78dSAlex Williamson  * +----------------+----------------+----------------+----------------+
1481dfbee78dSAlex Williamson  * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)|          sig 23:8 ('P2')        |
1482dfbee78dSAlex Williamson  * +---------------------------------+---------------------------------+
1483dfbee78dSAlex Williamson  *
1484dfbee78dSAlex Williamson  * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
1485dfbee78dSAlex Williamson  */
1486dfbee78dSAlex Williamson static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1487dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1488dfbee78dSAlex Williamson                                        Error **errp)
1489dfbee78dSAlex Williamson {
1490dfbee78dSAlex Williamson     DeviceState *dev = DEVICE(obj);
1491dfbee78dSAlex Williamson     Property *prop = opaque;
1492dfbee78dSAlex Williamson     uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
1493dfbee78dSAlex Williamson 
1494dfbee78dSAlex Williamson     visit_type_uint8(v, name, ptr, errp);
1495dfbee78dSAlex Williamson }
1496dfbee78dSAlex Williamson 
1497dfbee78dSAlex Williamson static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
1498dfbee78dSAlex Williamson                                        const char *name, void *opaque,
1499dfbee78dSAlex Williamson                                        Error **errp)
1500dfbee78dSAlex Williamson {
1501dfbee78dSAlex Williamson     DeviceState *dev = DEVICE(obj);
1502dfbee78dSAlex Williamson     Property *prop = opaque;
1503dfbee78dSAlex Williamson     uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
1504dfbee78dSAlex Williamson     Error *local_err = NULL;
1505dfbee78dSAlex Williamson 
1506dfbee78dSAlex Williamson     if (dev->realized) {
1507dfbee78dSAlex Williamson         qdev_prop_set_after_realize(dev, name, errp);
1508dfbee78dSAlex Williamson         return;
1509dfbee78dSAlex Williamson     }
1510dfbee78dSAlex Williamson 
1511dfbee78dSAlex Williamson     visit_type_uint8(v, name, &value, &local_err);
1512dfbee78dSAlex Williamson     if (local_err) {
1513dfbee78dSAlex Williamson         error_propagate(errp, local_err);
1514dfbee78dSAlex Williamson         return;
1515dfbee78dSAlex Williamson     }
1516dfbee78dSAlex Williamson 
1517dfbee78dSAlex Williamson     if (value & ~0xF) {
1518dfbee78dSAlex Williamson         error_setg(errp, "Property %s: valid range 0-15", name);
1519dfbee78dSAlex Williamson         return;
1520dfbee78dSAlex Williamson     }
1521dfbee78dSAlex Williamson 
1522dfbee78dSAlex Williamson     *ptr = value;
1523dfbee78dSAlex Williamson }
1524dfbee78dSAlex Williamson 
1525dfbee78dSAlex Williamson const PropertyInfo qdev_prop_nv_gpudirect_clique = {
1526dfbee78dSAlex Williamson     .name = "uint4",
1527dfbee78dSAlex Williamson     .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
1528dfbee78dSAlex Williamson     .get = get_nv_gpudirect_clique_id,
1529dfbee78dSAlex Williamson     .set = set_nv_gpudirect_clique_id,
1530dfbee78dSAlex Williamson };
1531dfbee78dSAlex Williamson 
1532dfbee78dSAlex Williamson static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
1533dfbee78dSAlex Williamson {
1534dfbee78dSAlex Williamson     PCIDevice *pdev = &vdev->pdev;
1535dfbee78dSAlex Williamson     int ret, pos = 0xC8;
1536dfbee78dSAlex Williamson 
1537dfbee78dSAlex Williamson     if (vdev->nv_gpudirect_clique == 0xFF) {
1538dfbee78dSAlex Williamson         return 0;
1539dfbee78dSAlex Williamson     }
1540dfbee78dSAlex Williamson 
1541dfbee78dSAlex Williamson     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
1542dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
1543dfbee78dSAlex Williamson         return -EINVAL;
1544dfbee78dSAlex Williamson     }
1545dfbee78dSAlex Williamson 
1546dfbee78dSAlex Williamson     if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
1547dfbee78dSAlex Williamson         PCI_BASE_CLASS_DISPLAY) {
1548dfbee78dSAlex Williamson         error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
1549dfbee78dSAlex Williamson         return -EINVAL;
1550dfbee78dSAlex Williamson     }
1551dfbee78dSAlex Williamson 
1552dfbee78dSAlex Williamson     ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
1553dfbee78dSAlex Williamson     if (ret < 0) {
1554dfbee78dSAlex Williamson         error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
1555dfbee78dSAlex Williamson         return ret;
1556dfbee78dSAlex Williamson     }
1557dfbee78dSAlex Williamson 
1558dfbee78dSAlex Williamson     memset(vdev->emulated_config_bits + pos, 0xFF, 8);
1559dfbee78dSAlex Williamson     pos += PCI_CAP_FLAGS;
1560dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 8);
1561dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1562dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, '2');
1563dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, 'P');
1564dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
1565dfbee78dSAlex Williamson     pci_set_byte(pdev->config + pos, 0);
1566dfbee78dSAlex Williamson 
1567dfbee78dSAlex Williamson     return 0;
1568dfbee78dSAlex Williamson }
1569dfbee78dSAlex Williamson 
1570ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
1571ec132efaSAlexey Kardashevskiy                                      const char *name,
1572ec132efaSAlexey Kardashevskiy                                      void *opaque, Error **errp)
1573ec132efaSAlexey Kardashevskiy {
1574ec132efaSAlexey Kardashevskiy     uint64_t tgt = (uintptr_t) opaque;
1575ec132efaSAlexey Kardashevskiy     visit_type_uint64(v, name, &tgt, errp);
1576ec132efaSAlexey Kardashevskiy }
1577ec132efaSAlexey Kardashevskiy 
1578ec132efaSAlexey Kardashevskiy static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
1579ec132efaSAlexey Kardashevskiy                                                  const char *name,
1580ec132efaSAlexey Kardashevskiy                                                  void *opaque, Error **errp)
1581ec132efaSAlexey Kardashevskiy {
1582ec132efaSAlexey Kardashevskiy     uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
1583ec132efaSAlexey Kardashevskiy     visit_type_uint32(v, name, &link_speed, errp);
1584ec132efaSAlexey Kardashevskiy }
1585ec132efaSAlexey Kardashevskiy 
1586ec132efaSAlexey Kardashevskiy int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
1587ec132efaSAlexey Kardashevskiy {
1588ec132efaSAlexey Kardashevskiy     int ret;
1589ec132efaSAlexey Kardashevskiy     void *p;
1590ec132efaSAlexey Kardashevskiy     struct vfio_region_info *nv2reg = NULL;
1591ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1592ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *cap;
1593ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1594ec132efaSAlexey Kardashevskiy 
1595ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1596ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1597ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_NVIDIA,
1598ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
1599ec132efaSAlexey Kardashevskiy                                    &nv2reg);
1600ec132efaSAlexey Kardashevskiy     if (ret) {
1601ec132efaSAlexey Kardashevskiy         return ret;
1602ec132efaSAlexey Kardashevskiy     }
1603ec132efaSAlexey Kardashevskiy 
1604ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1605ec132efaSAlexey Kardashevskiy     if (!hdr) {
1606ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1607ec132efaSAlexey Kardashevskiy         goto free_exit;
1608ec132efaSAlexey Kardashevskiy     }
1609ec132efaSAlexey Kardashevskiy     cap = (void *) hdr;
1610ec132efaSAlexey Kardashevskiy 
16119c7c0407SLeonardo Bras     p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE,
1612ec132efaSAlexey Kardashevskiy              MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
1613ec132efaSAlexey Kardashevskiy     if (p == MAP_FAILED) {
1614ec132efaSAlexey Kardashevskiy         ret = -errno;
1615ec132efaSAlexey Kardashevskiy         goto free_exit;
1616ec132efaSAlexey Kardashevskiy     }
1617ec132efaSAlexey Kardashevskiy 
1618ec132efaSAlexey Kardashevskiy     quirk = vfio_quirk_alloc(1);
1619ec132efaSAlexey Kardashevskiy     memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
1620ec132efaSAlexey Kardashevskiy                                nv2reg->size, p);
1621ec132efaSAlexey Kardashevskiy     QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1622ec132efaSAlexey Kardashevskiy 
1623ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1624ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1625d2623129SMarkus Armbruster                         (void *) (uintptr_t) cap->tgt);
1626ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
1627ec132efaSAlexey Kardashevskiy                                           nv2reg->size);
1628ec132efaSAlexey Kardashevskiy free_exit:
1629ec132efaSAlexey Kardashevskiy     g_free(nv2reg);
1630ec132efaSAlexey Kardashevskiy 
1631ec132efaSAlexey Kardashevskiy     return ret;
1632ec132efaSAlexey Kardashevskiy }
1633ec132efaSAlexey Kardashevskiy 
1634ec132efaSAlexey Kardashevskiy int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
1635ec132efaSAlexey Kardashevskiy {
1636ec132efaSAlexey Kardashevskiy     int ret;
1637ec132efaSAlexey Kardashevskiy     void *p;
1638ec132efaSAlexey Kardashevskiy     struct vfio_region_info *atsdreg = NULL;
1639ec132efaSAlexey Kardashevskiy     struct vfio_info_cap_header *hdr;
1640ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
1641ec132efaSAlexey Kardashevskiy     struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
1642ec132efaSAlexey Kardashevskiy     VFIOQuirk *quirk;
1643ec132efaSAlexey Kardashevskiy 
1644ec132efaSAlexey Kardashevskiy     ret = vfio_get_dev_region_info(&vdev->vbasedev,
1645ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
1646ec132efaSAlexey Kardashevskiy                                    PCI_VENDOR_ID_IBM,
1647ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
1648ec132efaSAlexey Kardashevskiy                                    &atsdreg);
1649ec132efaSAlexey Kardashevskiy     if (ret) {
1650ec132efaSAlexey Kardashevskiy         return ret;
1651ec132efaSAlexey Kardashevskiy     }
1652ec132efaSAlexey Kardashevskiy 
1653ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1654ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
1655ec132efaSAlexey Kardashevskiy     if (!hdr) {
1656ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1657ec132efaSAlexey Kardashevskiy         goto free_exit;
1658ec132efaSAlexey Kardashevskiy     }
1659ec132efaSAlexey Kardashevskiy     captgt = (void *) hdr;
1660ec132efaSAlexey Kardashevskiy 
1661ec132efaSAlexey Kardashevskiy     hdr = vfio_get_region_info_cap(atsdreg,
1662ec132efaSAlexey Kardashevskiy                                    VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
1663ec132efaSAlexey Kardashevskiy     if (!hdr) {
1664ec132efaSAlexey Kardashevskiy         ret = -ENODEV;
1665ec132efaSAlexey Kardashevskiy         goto free_exit;
1666ec132efaSAlexey Kardashevskiy     }
1667ec132efaSAlexey Kardashevskiy     capspeed = (void *) hdr;
1668ec132efaSAlexey Kardashevskiy 
1669ec132efaSAlexey Kardashevskiy     /* Some NVLink bridges may not have assigned ATSD */
1670ec132efaSAlexey Kardashevskiy     if (atsdreg->size) {
16719c7c0407SLeonardo Bras         p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE,
1672ec132efaSAlexey Kardashevskiy                  MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
1673ec132efaSAlexey Kardashevskiy         if (p == MAP_FAILED) {
1674ec132efaSAlexey Kardashevskiy             ret = -errno;
1675ec132efaSAlexey Kardashevskiy             goto free_exit;
1676ec132efaSAlexey Kardashevskiy         }
1677ec132efaSAlexey Kardashevskiy 
1678ec132efaSAlexey Kardashevskiy         quirk = vfio_quirk_alloc(1);
1679ec132efaSAlexey Kardashevskiy         memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
1680ec132efaSAlexey Kardashevskiy                                           "nvlink2-atsd-mr", atsdreg->size, p);
1681ec132efaSAlexey Kardashevskiy         QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
1682ec132efaSAlexey Kardashevskiy     }
1683ec132efaSAlexey Kardashevskiy 
1684ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
1685ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_tgt, NULL, NULL,
1686d2623129SMarkus Armbruster                         (void *) (uintptr_t) captgt->tgt);
1687ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
1688ec132efaSAlexey Kardashevskiy                                               atsdreg->size);
1689ec132efaSAlexey Kardashevskiy 
1690ec132efaSAlexey Kardashevskiy     object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
1691ec132efaSAlexey Kardashevskiy                         vfio_pci_nvlink2_get_link_speed, NULL, NULL,
1692d2623129SMarkus Armbruster                         (void *) (uintptr_t) capspeed->link_speed);
1693ec132efaSAlexey Kardashevskiy     trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
1694ec132efaSAlexey Kardashevskiy                                               capspeed->link_speed);
1695ec132efaSAlexey Kardashevskiy free_exit:
1696ec132efaSAlexey Kardashevskiy     g_free(atsdreg);
1697ec132efaSAlexey Kardashevskiy 
1698ec132efaSAlexey Kardashevskiy     return ret;
1699ec132efaSAlexey Kardashevskiy }
1700*ee7932b0SJon Derrick 
1701*ee7932b0SJon Derrick /*
1702*ee7932b0SJon Derrick  * The VMD endpoint provides a real PCIe domain to the guest and the guest
1703*ee7932b0SJon Derrick  * kernel performs enumeration of the VMD sub-device domain. Guest transactions
1704*ee7932b0SJon Derrick  * to VMD sub-devices go through MMU translation from guest addresses to
1705*ee7932b0SJon Derrick  * physical addresses. When MMIO goes to an endpoint after being translated to
1706*ee7932b0SJon Derrick  * physical addresses, the bridge rejects the transaction because the window
1707*ee7932b0SJon Derrick  * has been programmed with guest addresses.
1708*ee7932b0SJon Derrick  *
1709*ee7932b0SJon Derrick  * VMD can use the Host Physical Address in order to correctly program the
1710*ee7932b0SJon Derrick  * bridge windows in its PCIe domain. VMD device 28C0 has HPA shadow registers
1711*ee7932b0SJon Derrick  * located at offset 0x2000 in MEMBAR2 (BAR 4). This quirk provides the HPA
1712*ee7932b0SJon Derrick  * shadow registers in a vendor-specific capability register for devices
1713*ee7932b0SJon Derrick  * without native support. The position of 0xE8-0xFF is in the reserved range
1714*ee7932b0SJon Derrick  * of the VMD device capability space following the Power Management
1715*ee7932b0SJon Derrick  * Capability.
1716*ee7932b0SJon Derrick  */
1717*ee7932b0SJon Derrick #define VMD_SHADOW_CAP_VER 1
1718*ee7932b0SJon Derrick #define VMD_SHADOW_CAP_LEN 24
1719*ee7932b0SJon Derrick static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
1720*ee7932b0SJon Derrick {
1721*ee7932b0SJon Derrick     uint8_t membar_phys[16];
1722*ee7932b0SJon Derrick     int ret, pos = 0xE8;
1723*ee7932b0SJon Derrick 
1724*ee7932b0SJon Derrick     if (!(vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x201D) ||
1725*ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x467F) ||
1726*ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x4C3D) ||
1727*ee7932b0SJon Derrick           vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, 0x9A0B))) {
1728*ee7932b0SJon Derrick         return 0;
1729*ee7932b0SJon Derrick     }
1730*ee7932b0SJon Derrick 
1731*ee7932b0SJon Derrick     ret = pread(vdev->vbasedev.fd, membar_phys, 16,
1732*ee7932b0SJon Derrick                 vdev->config_offset + PCI_BASE_ADDRESS_2);
1733*ee7932b0SJon Derrick     if (ret != 16) {
1734*ee7932b0SJon Derrick         error_report("VMD %s cannot read MEMBARs (%d)",
1735*ee7932b0SJon Derrick                      vdev->vbasedev.name, ret);
1736*ee7932b0SJon Derrick         return -EFAULT;
1737*ee7932b0SJon Derrick     }
1738*ee7932b0SJon Derrick 
1739*ee7932b0SJon Derrick     ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
1740*ee7932b0SJon Derrick                              VMD_SHADOW_CAP_LEN, errp);
1741*ee7932b0SJon Derrick     if (ret < 0) {
1742*ee7932b0SJon Derrick         error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
1743*ee7932b0SJon Derrick         return ret;
1744*ee7932b0SJon Derrick     }
1745*ee7932b0SJon Derrick 
1746*ee7932b0SJon Derrick     memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
1747*ee7932b0SJon Derrick     pos += PCI_CAP_FLAGS;
1748*ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
1749*ee7932b0SJon Derrick     pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
1750*ee7932b0SJon Derrick     pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
1751*ee7932b0SJon Derrick     memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
1752*ee7932b0SJon Derrick 
1753*ee7932b0SJon Derrick     return 0;
1754*ee7932b0SJon Derrick }
1755*ee7932b0SJon Derrick 
1756*ee7932b0SJon Derrick int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
1757*ee7932b0SJon Derrick {
1758*ee7932b0SJon Derrick     int ret;
1759*ee7932b0SJon Derrick 
1760*ee7932b0SJon Derrick     ret = vfio_add_nv_gpudirect_cap(vdev, errp);
1761*ee7932b0SJon Derrick     if (ret) {
1762*ee7932b0SJon Derrick         return ret;
1763*ee7932b0SJon Derrick     }
1764*ee7932b0SJon Derrick 
1765*ee7932b0SJon Derrick     ret = vfio_add_vmd_shadow_cap(vdev, errp);
1766*ee7932b0SJon Derrick     if (ret) {
1767*ee7932b0SJon Derrick         return ret;
1768*ee7932b0SJon Derrick     }
1769*ee7932b0SJon Derrick 
1770*ee7932b0SJon Derrick     return 0;
1771*ee7932b0SJon Derrick }
1772