xref: /qemu/hw/vfio/pci-quirks.c (revision b946d286114e09a81c303c7ec8ec3f7b33dff9e8)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
16 
17 #define PCI_ANY_ID (~0)
18 
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
21 {
22     PCIDevice *pdev = &vdev->pdev;
23 
24     return (vendor == PCI_ANY_ID ||
25             vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) &&
26            (device == PCI_ANY_ID ||
27             device == pci_get_word(pdev->config + PCI_DEVICE_ID));
28 }
29 
30 /*
31  * List of device ids/vendor ids for which to disable
32  * option rom loading. This avoids the guest hangs during rom
33  * execution as noticed with the BCM 57810 card for lack of a
34  * more better way to handle such issues.
35  * The  user can still override by specifying a romfile or
36  * rombar=1.
37  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38  * for an analysis of the 57810 card hang. When adding
39  * a new vendor id/device id combination below, please also add
40  * your card/environment details and information that could
41  * help in debugging to the bug tracking this issue
42  */
43 static const struct {
44     uint32_t vendor;
45     uint32_t device;
46 } romblacklist[] = {
47     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48 };
49 
50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
51 {
52     int i;
53 
54     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57                                              romblacklist[i].vendor,
58                                              romblacklist[i].device);
59             return true;
60         }
61     }
62     return false;
63 }
64 
65 /*
66  * Device specific quirks
67  */
68 
69 /* Is range1 fully contained within range2?  */
70 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
71                                  uint64_t first2, uint64_t len2) {
72     return (first1 >= first2 && first1 + len1 <= first2 + len2);
73 }
74 
75 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
76 {
77     return (mask && (flags & mask) == mask);
78 }
79 
80 static uint64_t vfio_generic_window_quirk_read(void *opaque,
81                                                hwaddr addr, unsigned size)
82 {
83     VFIOLegacyQuirk *quirk = opaque;
84     VFIOPCIDevice *vdev = quirk->vdev;
85     uint64_t data;
86 
87     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
88         ranges_overlap(addr, size,
89                        quirk->data.data_offset, quirk->data.data_size)) {
90         hwaddr offset = addr - quirk->data.data_offset;
91 
92         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
93                                   quirk->data.data_size)) {
94             hw_error("%s: window data read not fully contained: %s",
95                      __func__, memory_region_name(quirk->mem));
96         }
97 
98         data = vfio_pci_read_config(&vdev->pdev,
99                                     quirk->data.address_val + offset, size);
100 
101         trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem),
102                                              vdev->vbasedev.name,
103                                              quirk->data.bar,
104                                              addr, size, data);
105     } else {
106         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
107                                 addr + quirk->data.base_offset, size);
108     }
109 
110     return data;
111 }
112 
113 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
114                                             uint64_t data, unsigned size)
115 {
116     VFIOLegacyQuirk *quirk = opaque;
117     VFIOPCIDevice *vdev = quirk->vdev;
118 
119     if (ranges_overlap(addr, size,
120                        quirk->data.address_offset, quirk->data.address_size)) {
121 
122         if (addr != quirk->data.address_offset) {
123             hw_error("%s: offset write into address window: %s",
124                      __func__, memory_region_name(quirk->mem));
125         }
126 
127         if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
128             quirk->data.flags |= quirk->data.write_flags |
129                                  quirk->data.read_flags;
130             quirk->data.address_val = data & quirk->data.address_mask;
131         } else {
132             quirk->data.flags &= ~(quirk->data.write_flags |
133                                    quirk->data.read_flags);
134         }
135     }
136 
137     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
138         ranges_overlap(addr, size,
139                        quirk->data.data_offset, quirk->data.data_size)) {
140         hwaddr offset = addr - quirk->data.data_offset;
141 
142         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
143                                   quirk->data.data_size)) {
144             hw_error("%s: window data write not fully contained: %s",
145                      __func__, memory_region_name(quirk->mem));
146         }
147 
148         vfio_pci_write_config(&vdev->pdev,
149                               quirk->data.address_val + offset, data, size);
150         trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem),
151                                               vdev->vbasedev.name,
152                                               quirk->data.bar,
153                                               addr, data, size);
154         return;
155     }
156 
157     vfio_region_write(&vdev->bars[quirk->data.bar].region,
158                    addr + quirk->data.base_offset, data, size);
159 }
160 
161 static const MemoryRegionOps vfio_generic_window_quirk = {
162     .read = vfio_generic_window_quirk_read,
163     .write = vfio_generic_window_quirk_write,
164     .endianness = DEVICE_LITTLE_ENDIAN,
165 };
166 
167 static uint64_t vfio_generic_quirk_read(void *opaque,
168                                         hwaddr addr, unsigned size)
169 {
170     VFIOLegacyQuirk *quirk = opaque;
171     VFIOPCIDevice *vdev = quirk->vdev;
172     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
173     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
174     uint64_t data;
175 
176     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
177         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
178         if (!vfio_range_contained(addr, size, offset,
179                                   quirk->data.address_mask + 1)) {
180             hw_error("%s: read not fully contained: %s",
181                      __func__, memory_region_name(quirk->mem));
182         }
183 
184         data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
185 
186         trace_vfio_generic_quirk_read(memory_region_name(quirk->mem),
187                                       vdev->vbasedev.name, quirk->data.bar,
188                                       addr + base, size, data);
189     } else {
190         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
191                                 addr + base, size);
192     }
193 
194     return data;
195 }
196 
197 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
198                                      uint64_t data, unsigned size)
199 {
200     VFIOLegacyQuirk *quirk = opaque;
201     VFIOPCIDevice *vdev = quirk->vdev;
202     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
203     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
204 
205     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
206         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
207         if (!vfio_range_contained(addr, size, offset,
208                                   quirk->data.address_mask + 1)) {
209             hw_error("%s: write not fully contained: %s",
210                      __func__, memory_region_name(quirk->mem));
211         }
212 
213         vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
214 
215         trace_vfio_generic_quirk_write(memory_region_name(quirk->mem),
216                                        vdev->vbasedev.name, quirk->data.bar,
217                                        addr + base, data, size);
218     } else {
219         vfio_region_write(&vdev->bars[quirk->data.bar].region,
220                           addr + base, data, size);
221     }
222 }
223 
224 static const MemoryRegionOps vfio_generic_quirk = {
225     .read = vfio_generic_quirk_read,
226     .write = vfio_generic_quirk_write,
227     .endianness = DEVICE_LITTLE_ENDIAN,
228 };
229 
230 #define PCI_VENDOR_ID_ATI               0x1002
231 
232 /*
233  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
234  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
235  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
236  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
237  * I/O port BAR address.  Originally this was coded to return the virtual BAR
238  * address only if the physical register read returns the actual BAR address,
239  * but users have reported greater success if we return the virtual address
240  * unconditionally.
241  */
242 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
243                                         hwaddr addr, unsigned size)
244 {
245     VFIOPCIDevice *vdev = opaque;
246     uint64_t data = vfio_pci_read_config(&vdev->pdev,
247                                          PCI_BASE_ADDRESS_4 + 1, size);
248 
249     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
250 
251     return data;
252 }
253 
254 static const MemoryRegionOps vfio_ati_3c3_quirk = {
255     .read = vfio_ati_3c3_quirk_read,
256     .endianness = DEVICE_LITTLE_ENDIAN,
257 };
258 
259 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
260 {
261     VFIOQuirk *quirk;
262 
263     /*
264      * As long as the BAR is >= 256 bytes it will be aligned such that the
265      * lower byte is always zero.  Filter out anything else, if it exists.
266      */
267     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
268         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
269         return;
270     }
271 
272     quirk = g_malloc0(sizeof(*quirk));
273     quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
274     quirk->nr_mem = 1;
275 
276     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
277                           "vfio-ati-3c3-quirk", 1);
278     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
279                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
280 
281     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
282                       quirk, next);
283 
284     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
285 }
286 
287 /*
288  * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
289  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
290  * the MMIO space directly, but a window to this space is provided through
291  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
292  * data register.  When the address is programmed to a range of 0x4000-0x4fff
293  * PCI configuration space is available.  Experimentation seems to indicate
294  * that only read-only access is provided, but we drop writes when the window
295  * is enabled to config space nonetheless.
296  */
297 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
298 {
299     PCIDevice *pdev = &vdev->pdev;
300     VFIOQuirk *quirk;
301     VFIOLegacyQuirk *legacy;
302 
303     if (!vdev->has_vga || nr != 4 ||
304         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
305         return;
306     }
307 
308     quirk = g_malloc0(sizeof(*quirk));
309     quirk->data = legacy = g_malloc0(sizeof(*legacy));
310     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
311     quirk->nr_mem = 1;
312     legacy->vdev = vdev;
313     legacy->data.address_size = 4;
314     legacy->data.data_offset = 4;
315     legacy->data.data_size = 4;
316     legacy->data.address_match = 0x4000;
317     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
318     legacy->data.bar = nr;
319     legacy->data.read_flags = legacy->data.write_flags = 1;
320 
321     memory_region_init_io(quirk->mem, OBJECT(vdev),
322                           &vfio_generic_window_quirk, legacy,
323                           "vfio-ati-bar4-window-quirk", 8);
324     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
325                           legacy->data.base_offset, quirk->mem, 1);
326 
327     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
328 
329     trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
330 }
331 
332 /*
333  * Trap the BAR2 MMIO window to config space as well.
334  */
335 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
336 {
337     PCIDevice *pdev = &vdev->pdev;
338     VFIOQuirk *quirk;
339     VFIOLegacyQuirk *legacy;
340 
341     /* Only enable on newer devices where BAR2 is 64bit */
342     if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
343         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
344         return;
345     }
346 
347     quirk = g_malloc0(sizeof(*quirk));
348     quirk->data = legacy = g_malloc0(sizeof(*legacy));
349     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
350     quirk->nr_mem = 1;
351     legacy->vdev = vdev;
352     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
353     legacy->data.address_match = 0x4000;
354     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
355     legacy->data.bar = nr;
356 
357     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
358                           "vfio-ati-bar2-4000-quirk",
359                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
360     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
361                           legacy->data.address_match & TARGET_PAGE_MASK,
362                           quirk->mem, 1);
363 
364     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
365 
366     trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
367 }
368 
369 /*
370  * Older ATI/AMD cards like the X550 have a similar window to that above.
371  * I/O port BAR1 provides a window to a mirror of PCI config space located
372  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
373  * note it for future reference.
374  */
375 
376 #define PCI_VENDOR_ID_NVIDIA                    0x10de
377 
378 /*
379  * Nvidia has several different methods to get to config space, the
380  * nouveu project has several of these documented here:
381  * https://github.com/pathscale/envytools/tree/master/hwdocs
382  *
383  * The first quirk is actually not documented in envytools and is found
384  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
385  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
386  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
387  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
388  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
389  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
390  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
391  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
392  */
393 enum {
394     NV_3D0_NONE = 0,
395     NV_3D0_SELECT,
396     NV_3D0_WINDOW,
397     NV_3D0_READ,
398     NV_3D0_WRITE,
399 };
400 
401 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
402                                            hwaddr addr, unsigned size)
403 {
404     VFIOLegacyQuirk *quirk = opaque;
405     VFIOPCIDevice *vdev = quirk->vdev;
406     PCIDevice *pdev = &vdev->pdev;
407     uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
408                                   addr + quirk->data.base_offset, size);
409 
410     if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) {
411         data = vfio_pci_read_config(pdev, quirk->data.address_val, size);
412         trace_vfio_nvidia_3d0_quirk_read(size, data);
413     }
414 
415     quirk->data.flags = NV_3D0_NONE;
416 
417     return data;
418 }
419 
420 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
421                                         uint64_t data, unsigned size)
422 {
423     VFIOLegacyQuirk *quirk = opaque;
424     VFIOPCIDevice *vdev = quirk->vdev;
425     PCIDevice *pdev = &vdev->pdev;
426 
427     switch (quirk->data.flags) {
428     case NV_3D0_NONE:
429         if (addr == quirk->data.address_offset && data == 0x338) {
430             quirk->data.flags = NV_3D0_SELECT;
431         }
432         break;
433     case NV_3D0_SELECT:
434         quirk->data.flags = NV_3D0_NONE;
435         if (addr == quirk->data.data_offset &&
436             (data & ~quirk->data.address_mask) == quirk->data.address_match) {
437             quirk->data.flags = NV_3D0_WINDOW;
438             quirk->data.address_val = data & quirk->data.address_mask;
439         }
440         break;
441     case NV_3D0_WINDOW:
442         quirk->data.flags = NV_3D0_NONE;
443         if (addr == quirk->data.address_offset) {
444             if (data == 0x538) {
445                 quirk->data.flags = NV_3D0_READ;
446             } else if (data == 0x738) {
447                 quirk->data.flags = NV_3D0_WRITE;
448             }
449         }
450         break;
451     case NV_3D0_WRITE:
452         quirk->data.flags = NV_3D0_NONE;
453         if (addr == quirk->data.data_offset) {
454             vfio_pci_write_config(pdev, quirk->data.address_val, data, size);
455             trace_vfio_nvidia_3d0_quirk_write(data, size);
456             return;
457         }
458         break;
459     }
460 
461     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
462                    addr + quirk->data.base_offset, data, size);
463 }
464 
465 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
466     .read = vfio_nvidia_3d0_quirk_read,
467     .write = vfio_nvidia_3d0_quirk_write,
468     .endianness = DEVICE_LITTLE_ENDIAN,
469 };
470 
471 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
472 {
473     PCIDevice *pdev = &vdev->pdev;
474     VFIOQuirk *quirk;
475     VFIOLegacyQuirk *legacy;
476 
477     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
478         !vdev->bars[1].region.size) {
479         return;
480     }
481 
482     quirk = g_malloc0(sizeof(*quirk));
483     quirk->data = legacy = g_malloc0(sizeof(*legacy));
484     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
485     quirk->nr_mem = 1;
486     legacy->vdev = vdev;
487     legacy->data.base_offset = 0x10;
488     legacy->data.address_offset = 4;
489     legacy->data.address_size = 2;
490     legacy->data.address_match = 0x1800;
491     legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
492     legacy->data.data_offset = 0;
493     legacy->data.data_size = 4;
494 
495     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk,
496                           legacy, "vfio-nvidia-3d0-quirk", 6);
497     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
498                                 legacy->data.base_offset, quirk->mem);
499 
500     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
501                       quirk, next);
502 
503     trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name);
504 }
505 
506 /*
507  * The second quirk is documented in envytools.  The I/O port BAR5 is just
508  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
509  * again BAR0.  This backdoor is apparently a bit newer than the one above
510  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
511  * space, including extended space is available at the 4k @0x88000.
512  */
513 enum {
514     NV_BAR5_ADDRESS = 0x1,
515     NV_BAR5_ENABLE = 0x2,
516     NV_BAR5_MASTER = 0x4,
517     NV_BAR5_VALID = 0x7,
518 };
519 
520 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
521                                                 uint64_t data, unsigned size)
522 {
523     VFIOLegacyQuirk *quirk = opaque;
524 
525     switch (addr) {
526     case 0x0:
527         if (data & 0x1) {
528             quirk->data.flags |= NV_BAR5_MASTER;
529         } else {
530             quirk->data.flags &= ~NV_BAR5_MASTER;
531         }
532         break;
533     case 0x4:
534         if (data & 0x1) {
535             quirk->data.flags |= NV_BAR5_ENABLE;
536         } else {
537             quirk->data.flags &= ~NV_BAR5_ENABLE;
538         }
539         break;
540     case 0x8:
541         if (quirk->data.flags & NV_BAR5_MASTER) {
542             if ((data & ~0xfff) == 0x88000) {
543                 quirk->data.flags |= NV_BAR5_ADDRESS;
544                 quirk->data.address_val = data & 0xfff;
545             } else if ((data & ~0xff) == 0x1800) {
546                 quirk->data.flags |= NV_BAR5_ADDRESS;
547                 quirk->data.address_val = data & 0xff;
548             } else {
549                 quirk->data.flags &= ~NV_BAR5_ADDRESS;
550             }
551         }
552         break;
553     }
554 
555     vfio_generic_window_quirk_write(opaque, addr, data, size);
556 }
557 
558 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
559     .read = vfio_generic_window_quirk_read,
560     .write = vfio_nvidia_bar5_window_quirk_write,
561     .valid.min_access_size = 4,
562     .endianness = DEVICE_LITTLE_ENDIAN,
563 };
564 
565 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
566 {
567     PCIDevice *pdev = &vdev->pdev;
568     VFIOQuirk *quirk;
569     VFIOLegacyQuirk *legacy;
570 
571     if (!vdev->has_vga || nr != 5 ||
572         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
573         return;
574     }
575 
576     quirk = g_malloc0(sizeof(*quirk));
577     quirk->data = legacy = g_malloc0(sizeof(*legacy));
578     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
579     quirk->nr_mem = 1;
580     legacy->vdev = vdev;
581     legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID;
582     legacy->data.address_offset = 0x8;
583     legacy->data.address_size = 0; /* actually 4, but avoids generic code */
584     legacy->data.data_offset = 0xc;
585     legacy->data.data_size = 4;
586     legacy->data.bar = nr;
587 
588     memory_region_init_io(quirk->mem, OBJECT(vdev),
589                           &vfio_nvidia_bar5_window_quirk, legacy,
590                           "vfio-nvidia-bar5-window-quirk", 16);
591     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
592                                         0, quirk->mem, 1);
593 
594     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
595 
596     trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
597 }
598 
599 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
600                                           uint64_t data, unsigned size)
601 {
602     VFIOLegacyQuirk *quirk = opaque;
603     VFIOPCIDevice *vdev = quirk->vdev;
604     PCIDevice *pdev = &vdev->pdev;
605     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
606 
607     vfio_generic_quirk_write(opaque, addr, data, size);
608 
609     /*
610      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
611      * MSI capability ID register.  Both the ID and next register are
612      * read-only, so we allow writes covering either of those to real hw.
613      * NB - only fixed for the 0x88000 MMIO window.
614      */
615     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
616         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
617         vfio_region_write(&vdev->bars[quirk->data.bar].region,
618                           addr + base, data, size);
619     }
620 }
621 
622 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
623     .read = vfio_generic_quirk_read,
624     .write = vfio_nvidia_88000_quirk_write,
625     .endianness = DEVICE_LITTLE_ENDIAN,
626 };
627 
628 /*
629  * Finally, BAR0 itself.  We want to redirect any accesses to either
630  * 0x1800 or 0x88000 through the PCI config space access functions.
631  *
632  * NB - quirk at a page granularity or else they don't seem to work when
633  *      BARs are mmap'd
634  *
635  * Here's offset 0x88000...
636  */
637 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
638 {
639     PCIDevice *pdev = &vdev->pdev;
640     VFIOQuirk *quirk;
641     VFIOLegacyQuirk *legacy;
642     uint16_t vendor, class;
643 
644     vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
645     class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
646 
647     if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
648         class != PCI_CLASS_DISPLAY_VGA) {
649         return;
650     }
651 
652     quirk = g_malloc0(sizeof(*quirk));
653     quirk->data = legacy = g_malloc0(sizeof(*legacy));
654     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
655     quirk->nr_mem = 1;
656     legacy->vdev = vdev;
657     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
658     legacy->data.address_match = 0x88000;
659     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
660     legacy->data.bar = nr;
661 
662     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
663                           legacy, "vfio-nvidia-bar0-88000-quirk",
664                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
665     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
666                           legacy->data.address_match & TARGET_PAGE_MASK,
667                           quirk->mem, 1);
668 
669     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
670 
671     trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
672 }
673 
674 /*
675  * And here's the same for BAR0 offset 0x1800...
676  */
677 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
678 {
679     PCIDevice *pdev = &vdev->pdev;
680     VFIOQuirk *quirk;
681     VFIOLegacyQuirk *legacy;
682 
683     if (!vdev->has_vga || nr != 0 ||
684         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
685         return;
686     }
687 
688     /* Log the chipset ID */
689     trace_vfio_probe_nvidia_bar0_1800_quirk_id(
690             (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
691             & 0xff);
692 
693     quirk = g_malloc0(sizeof(*quirk));
694     quirk->data = legacy = g_malloc0(sizeof(*legacy));
695     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
696     quirk->nr_mem = 1;
697     legacy->vdev = vdev;
698     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
699     legacy->data.address_match = 0x1800;
700     legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
701     legacy->data.bar = nr;
702 
703     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
704                           "vfio-nvidia-bar0-1800-quirk",
705                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
706     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
707                           legacy->data.address_match & TARGET_PAGE_MASK,
708                           quirk->mem, 1);
709 
710     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
711 
712     trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
713 }
714 
715 /*
716  * TODO - Some Nvidia devices provide config access to their companion HDA
717  * device and even to their parent bridge via these config space mirrors.
718  * Add quirks for those regions.
719  */
720 
721 #define PCI_VENDOR_ID_REALTEK 0x10ec
722 
723 /*
724  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
725  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
726  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
727  * when the "type" portion of the address register is set to 0x1.  This appears
728  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
729  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
730  * ignore because the MSI-X table should always be accessed as a dword (full
731  * mask).  Bits 0:11 is offset within the type.
732  *
733  * Example trace:
734  *
735  * Read from MSI-X table offset 0
736  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
737  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
738  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
739  *
740  * Write 0xfee00000 to MSI-X table offset 0
741  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
742  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
743  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
744  */
745 static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
746                                                hwaddr addr, unsigned size)
747 {
748     VFIOLegacyQuirk *quirk = opaque;
749     VFIOPCIDevice *vdev = quirk->vdev;
750     uint64_t val = 0;
751 
752     if (!quirk->data.flags) { /* Non-MSI-X table access */
753         return vfio_region_read(&vdev->bars[quirk->data.bar].region,
754                                 addr + 0x70, size);
755     }
756 
757     switch (addr) {
758     case 4: /* address */
759         val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */
760         break;
761     case 0: /* data */
762         if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
763             memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
764                                 (hwaddr)(quirk->data.address_match & 0xfff),
765                                 &val, size, MEMTXATTRS_UNSPECIFIED);
766         }
767         break;
768     }
769 
770     trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name,
771                                   addr ? "address" : "data", val);
772     return val;
773 }
774 
775 static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
776                                             uint64_t data, unsigned size)
777 {
778     VFIOLegacyQuirk *quirk = opaque;
779     VFIOPCIDevice *vdev = quirk->vdev;
780 
781     switch (addr) {
782     case 4: /* address */
783         if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
784             quirk->data.flags = 1; /* Activate reads */
785             quirk->data.address_match = data;
786 
787             trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data);
788 
789             if (data & 0x80000000U) { /* Do write */
790                 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
791                     hwaddr offset = data & 0xfff;
792                     uint64_t val = quirk->data.address_mask;
793 
794                     trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name,
795                                                   (uint16_t)offset, val);
796 
797                     /* Write to the proper guest MSI-X table instead */
798                     memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
799                                                  offset, val, size,
800                                                  MEMTXATTRS_UNSPECIFIED);
801                 }
802                 return; /* Do not write guest MSI-X data to hardware */
803             }
804         } else {
805             quirk->data.flags = 0; /* De-activate reads, non-MSI-X */
806         }
807         break;
808     case 0: /* data */
809         quirk->data.address_mask = data;
810         break;
811     }
812 
813     vfio_region_write(&vdev->bars[quirk->data.bar].region,
814                       addr + 0x70, data, size);
815 }
816 
817 static const MemoryRegionOps vfio_rtl8168_window_quirk = {
818     .read = vfio_rtl8168_window_quirk_read,
819     .write = vfio_rtl8168_window_quirk_write,
820     .valid = {
821         .min_access_size = 4,
822         .max_access_size = 4,
823         .unaligned = false,
824     },
825     .endianness = DEVICE_LITTLE_ENDIAN,
826 };
827 
828 static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr)
829 {
830     PCIDevice *pdev = &vdev->pdev;
831     VFIOQuirk *quirk;
832     VFIOLegacyQuirk *legacy;
833 
834     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
835         pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
836         return;
837     }
838 
839     quirk = g_malloc0(sizeof(*quirk));
840     quirk->data = legacy = g_malloc0(sizeof(*legacy));
841     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
842     quirk->nr_mem = 1;
843     legacy->vdev = vdev;
844     legacy->data.bar = nr;
845 
846     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
847                           legacy, "vfio-rtl8168-window-quirk", 8);
848     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
849                                         0x70, quirk->mem, 1);
850 
851     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
852 
853     trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name);
854 }
855 
856 /*
857  * Common quirk probe entry points.
858  */
859 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
860 {
861     vfio_vga_probe_ati_3c3_quirk(vdev);
862     vfio_vga_probe_nvidia_3d0_quirk(vdev);
863 }
864 
865 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
866 {
867     VFIOQuirk *quirk;
868     int i, j;
869 
870     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
871         QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
872             for (j = 0; j < quirk->nr_mem; j++) {
873                 memory_region_del_subregion(&vdev->vga.region[i].mem,
874                                             &quirk->mem[j]);
875             }
876         }
877     }
878 }
879 
880 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
881 {
882     int i, j;
883 
884     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
885         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
886             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
887             QLIST_REMOVE(quirk, next);
888             for (j = 0; j < quirk->nr_mem; j++) {
889                 object_unparent(OBJECT(&quirk->mem[j]));
890             }
891             g_free(quirk->mem);
892             g_free(quirk->data);
893             g_free(quirk);
894         }
895     }
896 }
897 
898 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
899 {
900     vfio_probe_ati_bar4_window_quirk(vdev, nr);
901     vfio_probe_ati_bar2_4000_quirk(vdev, nr);
902     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
903     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
904     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
905     vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
906 }
907 
908 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
909 {
910     VFIOBAR *bar = &vdev->bars[nr];
911     VFIOQuirk *quirk;
912     int i;
913 
914     QLIST_FOREACH(quirk, &bar->quirks, next) {
915         for (i = 0; i < quirk->nr_mem; i++) {
916             memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
917         }
918     }
919 }
920 
921 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
922 {
923     VFIOBAR *bar = &vdev->bars[nr];
924     int i;
925 
926     while (!QLIST_EMPTY(&bar->quirks)) {
927         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
928         QLIST_REMOVE(quirk, next);
929         for (i = 0; i < quirk->nr_mem; i++) {
930             object_unparent(OBJECT(&quirk->mem[i]));
931         }
932         g_free(quirk->mem);
933         g_free(quirk->data);
934         g_free(quirk);
935     }
936 }
937