xref: /qemu/hw/vfio/pci-quirks.c (revision 056dfcb695cde3c62b7dc1d5ed6d2e38b3a73e29)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
16 
17 #define PCI_ANY_ID (~0)
18 
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
21 {
22     PCIDevice *pdev = &vdev->pdev;
23 
24     return (vendor == PCI_ANY_ID ||
25             vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) &&
26            (device == PCI_ANY_ID ||
27             device == pci_get_word(pdev->config + PCI_DEVICE_ID));
28 }
29 
30 /*
31  * List of device ids/vendor ids for which to disable
32  * option rom loading. This avoids the guest hangs during rom
33  * execution as noticed with the BCM 57810 card for lack of a
34  * more better way to handle such issues.
35  * The  user can still override by specifying a romfile or
36  * rombar=1.
37  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38  * for an analysis of the 57810 card hang. When adding
39  * a new vendor id/device id combination below, please also add
40  * your card/environment details and information that could
41  * help in debugging to the bug tracking this issue
42  */
43 static const struct {
44     uint32_t vendor;
45     uint32_t device;
46 } romblacklist[] = {
47     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48 };
49 
50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
51 {
52     int i;
53 
54     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57                                              romblacklist[i].vendor,
58                                              romblacklist[i].device);
59             return true;
60         }
61     }
62     return false;
63 }
64 
65 /*
66  * Device specific quirks
67  */
68 
69 /* Is range1 fully contained within range2?  */
70 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
71                                  uint64_t first2, uint64_t len2) {
72     return (first1 >= first2 && first1 + len1 <= first2 + len2);
73 }
74 
75 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
76 {
77     return (mask && (flags & mask) == mask);
78 }
79 
80 static uint64_t vfio_generic_window_quirk_read(void *opaque,
81                                                hwaddr addr, unsigned size)
82 {
83     VFIOQuirk *quirk = opaque;
84     VFIOPCIDevice *vdev = quirk->vdev;
85     uint64_t data;
86 
87     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
88         ranges_overlap(addr, size,
89                        quirk->data.data_offset, quirk->data.data_size)) {
90         hwaddr offset = addr - quirk->data.data_offset;
91 
92         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
93                                   quirk->data.data_size)) {
94             hw_error("%s: window data read not fully contained: %s",
95                      __func__, memory_region_name(&quirk->mem));
96         }
97 
98         data = vfio_pci_read_config(&vdev->pdev,
99                                     quirk->data.address_val + offset, size);
100 
101         trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem),
102                                              vdev->vbasedev.name,
103                                              quirk->data.bar,
104                                              addr, size, data);
105     } else {
106         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
107                                 addr + quirk->data.base_offset, size);
108     }
109 
110     return data;
111 }
112 
113 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
114                                             uint64_t data, unsigned size)
115 {
116     VFIOQuirk *quirk = opaque;
117     VFIOPCIDevice *vdev = quirk->vdev;
118 
119     if (ranges_overlap(addr, size,
120                        quirk->data.address_offset, quirk->data.address_size)) {
121 
122         if (addr != quirk->data.address_offset) {
123             hw_error("%s: offset write into address window: %s",
124                      __func__, memory_region_name(&quirk->mem));
125         }
126 
127         if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
128             quirk->data.flags |= quirk->data.write_flags |
129                                  quirk->data.read_flags;
130             quirk->data.address_val = data & quirk->data.address_mask;
131         } else {
132             quirk->data.flags &= ~(quirk->data.write_flags |
133                                    quirk->data.read_flags);
134         }
135     }
136 
137     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
138         ranges_overlap(addr, size,
139                        quirk->data.data_offset, quirk->data.data_size)) {
140         hwaddr offset = addr - quirk->data.data_offset;
141 
142         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
143                                   quirk->data.data_size)) {
144             hw_error("%s: window data write not fully contained: %s",
145                      __func__, memory_region_name(&quirk->mem));
146         }
147 
148         vfio_pci_write_config(&vdev->pdev,
149                               quirk->data.address_val + offset, data, size);
150         trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem),
151                                               vdev->vbasedev.name,
152                                               quirk->data.bar,
153                                               addr, data, size);
154         return;
155     }
156 
157     vfio_region_write(&vdev->bars[quirk->data.bar].region,
158                    addr + quirk->data.base_offset, data, size);
159 }
160 
161 static const MemoryRegionOps vfio_generic_window_quirk = {
162     .read = vfio_generic_window_quirk_read,
163     .write = vfio_generic_window_quirk_write,
164     .endianness = DEVICE_LITTLE_ENDIAN,
165 };
166 
167 static uint64_t vfio_generic_quirk_read(void *opaque,
168                                         hwaddr addr, unsigned size)
169 {
170     VFIOQuirk *quirk = opaque;
171     VFIOPCIDevice *vdev = quirk->vdev;
172     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
173     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
174     uint64_t data;
175 
176     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
177         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
178         if (!vfio_range_contained(addr, size, offset,
179                                   quirk->data.address_mask + 1)) {
180             hw_error("%s: read not fully contained: %s",
181                      __func__, memory_region_name(&quirk->mem));
182         }
183 
184         data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
185 
186         trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem),
187                                       vdev->vbasedev.name, quirk->data.bar,
188                                       addr + base, size, data);
189     } else {
190         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
191                                 addr + base, size);
192     }
193 
194     return data;
195 }
196 
197 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
198                                      uint64_t data, unsigned size)
199 {
200     VFIOQuirk *quirk = opaque;
201     VFIOPCIDevice *vdev = quirk->vdev;
202     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
203     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
204 
205     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
206         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
207         if (!vfio_range_contained(addr, size, offset,
208                                   quirk->data.address_mask + 1)) {
209             hw_error("%s: write not fully contained: %s",
210                      __func__, memory_region_name(&quirk->mem));
211         }
212 
213         vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
214 
215         trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem),
216                                        vdev->vbasedev.name, quirk->data.bar,
217                                        addr + base, data, size);
218     } else {
219         vfio_region_write(&vdev->bars[quirk->data.bar].region,
220                           addr + base, data, size);
221     }
222 }
223 
224 static const MemoryRegionOps vfio_generic_quirk = {
225     .read = vfio_generic_quirk_read,
226     .write = vfio_generic_quirk_write,
227     .endianness = DEVICE_LITTLE_ENDIAN,
228 };
229 
230 #define PCI_VENDOR_ID_ATI               0x1002
231 
232 /*
233  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
234  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
235  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
236  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
237  * I/O port BAR address.  Originally this was coded to return the virtual BAR
238  * address only if the physical register read returns the actual BAR address,
239  * but users have reported greater success if we return the virtual address
240  * unconditionally.
241  */
242 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
243                                         hwaddr addr, unsigned size)
244 {
245     VFIOQuirk *quirk = opaque;
246     VFIOPCIDevice *vdev = quirk->vdev;
247     uint64_t data = vfio_pci_read_config(&vdev->pdev,
248                                          PCI_BASE_ADDRESS_0 + (4 * 4) + 1,
249                                          size);
250     trace_vfio_ati_3c3_quirk_read(data);
251 
252     return data;
253 }
254 
255 static const MemoryRegionOps vfio_ati_3c3_quirk = {
256     .read = vfio_ati_3c3_quirk_read,
257     .endianness = DEVICE_LITTLE_ENDIAN,
258 };
259 
260 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
261 {
262     PCIDevice *pdev = &vdev->pdev;
263     VFIOQuirk *quirk;
264 
265     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
266         return;
267     }
268 
269     /*
270      * As long as the BAR is >= 256 bytes it will be aligned such that the
271      * lower byte is always zero.  Filter out anything else, if it exists.
272      */
273     if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
274         return;
275     }
276 
277     quirk = g_malloc0(sizeof(*quirk));
278     quirk->vdev = vdev;
279 
280     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk,
281                           "vfio-ati-3c3-quirk", 1);
282     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
283                                 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem);
284 
285     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
286                       quirk, next);
287 
288     trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name);
289 }
290 
291 /*
292  * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
293  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
294  * the MMIO space directly, but a window to this space is provided through
295  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
296  * data register.  When the address is programmed to a range of 0x4000-0x4fff
297  * PCI configuration space is available.  Experimentation seems to indicate
298  * that only read-only access is provided, but we drop writes when the window
299  * is enabled to config space nonetheless.
300  */
301 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
302 {
303     PCIDevice *pdev = &vdev->pdev;
304     VFIOQuirk *quirk;
305 
306     if (!vdev->has_vga || nr != 4 ||
307         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
308         return;
309     }
310 
311     quirk = g_malloc0(sizeof(*quirk));
312     quirk->vdev = vdev;
313     quirk->data.address_size = 4;
314     quirk->data.data_offset = 4;
315     quirk->data.data_size = 4;
316     quirk->data.address_match = 0x4000;
317     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
318     quirk->data.bar = nr;
319     quirk->data.read_flags = quirk->data.write_flags = 1;
320 
321     memory_region_init_io(&quirk->mem, OBJECT(vdev),
322                           &vfio_generic_window_quirk, quirk,
323                           "vfio-ati-bar4-window-quirk", 8);
324     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
325                           quirk->data.base_offset, &quirk->mem, 1);
326 
327     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
328 
329     trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
330 }
331 
332 /*
333  * Trap the BAR2 MMIO window to config space as well.
334  */
335 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
336 {
337     PCIDevice *pdev = &vdev->pdev;
338     VFIOQuirk *quirk;
339 
340     /* Only enable on newer devices where BAR2 is 64bit */
341     if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
342         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
343         return;
344     }
345 
346     quirk = g_malloc0(sizeof(*quirk));
347     quirk->vdev = vdev;
348     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
349     quirk->data.address_match = 0x4000;
350     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
351     quirk->data.bar = nr;
352 
353     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
354                           "vfio-ati-bar2-4000-quirk",
355                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
356     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
357                           quirk->data.address_match & TARGET_PAGE_MASK,
358                           &quirk->mem, 1);
359 
360     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
361 
362     trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
363 }
364 
365 /*
366  * Older ATI/AMD cards like the X550 have a similar window to that above.
367  * I/O port BAR1 provides a window to a mirror of PCI config space located
368  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
369  * note it for future reference.
370  */
371 
372 #define PCI_VENDOR_ID_NVIDIA                    0x10de
373 
374 /*
375  * Nvidia has several different methods to get to config space, the
376  * nouveu project has several of these documented here:
377  * https://github.com/pathscale/envytools/tree/master/hwdocs
378  *
379  * The first quirk is actually not documented in envytools and is found
380  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
381  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
382  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
383  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
384  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
385  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
386  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
387  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
388  */
389 enum {
390     NV_3D0_NONE = 0,
391     NV_3D0_SELECT,
392     NV_3D0_WINDOW,
393     NV_3D0_READ,
394     NV_3D0_WRITE,
395 };
396 
397 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
398                                            hwaddr addr, unsigned size)
399 {
400     VFIOQuirk *quirk = opaque;
401     VFIOPCIDevice *vdev = quirk->vdev;
402     PCIDevice *pdev = &vdev->pdev;
403     uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
404                                   addr + quirk->data.base_offset, size);
405 
406     if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) {
407         data = vfio_pci_read_config(pdev, quirk->data.address_val, size);
408         trace_vfio_nvidia_3d0_quirk_read(size, data);
409     }
410 
411     quirk->data.flags = NV_3D0_NONE;
412 
413     return data;
414 }
415 
416 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
417                                         uint64_t data, unsigned size)
418 {
419     VFIOQuirk *quirk = opaque;
420     VFIOPCIDevice *vdev = quirk->vdev;
421     PCIDevice *pdev = &vdev->pdev;
422 
423     switch (quirk->data.flags) {
424     case NV_3D0_NONE:
425         if (addr == quirk->data.address_offset && data == 0x338) {
426             quirk->data.flags = NV_3D0_SELECT;
427         }
428         break;
429     case NV_3D0_SELECT:
430         quirk->data.flags = NV_3D0_NONE;
431         if (addr == quirk->data.data_offset &&
432             (data & ~quirk->data.address_mask) == quirk->data.address_match) {
433             quirk->data.flags = NV_3D0_WINDOW;
434             quirk->data.address_val = data & quirk->data.address_mask;
435         }
436         break;
437     case NV_3D0_WINDOW:
438         quirk->data.flags = NV_3D0_NONE;
439         if (addr == quirk->data.address_offset) {
440             if (data == 0x538) {
441                 quirk->data.flags = NV_3D0_READ;
442             } else if (data == 0x738) {
443                 quirk->data.flags = NV_3D0_WRITE;
444             }
445         }
446         break;
447     case NV_3D0_WRITE:
448         quirk->data.flags = NV_3D0_NONE;
449         if (addr == quirk->data.data_offset) {
450             vfio_pci_write_config(pdev, quirk->data.address_val, data, size);
451             trace_vfio_nvidia_3d0_quirk_write(data, size);
452             return;
453         }
454         break;
455     }
456 
457     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
458                    addr + quirk->data.base_offset, data, size);
459 }
460 
461 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
462     .read = vfio_nvidia_3d0_quirk_read,
463     .write = vfio_nvidia_3d0_quirk_write,
464     .endianness = DEVICE_LITTLE_ENDIAN,
465 };
466 
467 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
468 {
469     PCIDevice *pdev = &vdev->pdev;
470     VFIOQuirk *quirk;
471 
472     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
473         !vdev->bars[1].region.size) {
474         return;
475     }
476 
477     quirk = g_malloc0(sizeof(*quirk));
478     quirk->vdev = vdev;
479     quirk->data.base_offset = 0x10;
480     quirk->data.address_offset = 4;
481     quirk->data.address_size = 2;
482     quirk->data.address_match = 0x1800;
483     quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
484     quirk->data.data_offset = 0;
485     quirk->data.data_size = 4;
486 
487     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk,
488                           quirk, "vfio-nvidia-3d0-quirk", 6);
489     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
490                                 quirk->data.base_offset, &quirk->mem);
491 
492     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
493                       quirk, next);
494 
495     trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name);
496 }
497 
498 /*
499  * The second quirk is documented in envytools.  The I/O port BAR5 is just
500  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
501  * again BAR0.  This backdoor is apparently a bit newer than the one above
502  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
503  * space, including extended space is available at the 4k @0x88000.
504  */
505 enum {
506     NV_BAR5_ADDRESS = 0x1,
507     NV_BAR5_ENABLE = 0x2,
508     NV_BAR5_MASTER = 0x4,
509     NV_BAR5_VALID = 0x7,
510 };
511 
512 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
513                                                 uint64_t data, unsigned size)
514 {
515     VFIOQuirk *quirk = opaque;
516 
517     switch (addr) {
518     case 0x0:
519         if (data & 0x1) {
520             quirk->data.flags |= NV_BAR5_MASTER;
521         } else {
522             quirk->data.flags &= ~NV_BAR5_MASTER;
523         }
524         break;
525     case 0x4:
526         if (data & 0x1) {
527             quirk->data.flags |= NV_BAR5_ENABLE;
528         } else {
529             quirk->data.flags &= ~NV_BAR5_ENABLE;
530         }
531         break;
532     case 0x8:
533         if (quirk->data.flags & NV_BAR5_MASTER) {
534             if ((data & ~0xfff) == 0x88000) {
535                 quirk->data.flags |= NV_BAR5_ADDRESS;
536                 quirk->data.address_val = data & 0xfff;
537             } else if ((data & ~0xff) == 0x1800) {
538                 quirk->data.flags |= NV_BAR5_ADDRESS;
539                 quirk->data.address_val = data & 0xff;
540             } else {
541                 quirk->data.flags &= ~NV_BAR5_ADDRESS;
542             }
543         }
544         break;
545     }
546 
547     vfio_generic_window_quirk_write(opaque, addr, data, size);
548 }
549 
550 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
551     .read = vfio_generic_window_quirk_read,
552     .write = vfio_nvidia_bar5_window_quirk_write,
553     .valid.min_access_size = 4,
554     .endianness = DEVICE_LITTLE_ENDIAN,
555 };
556 
557 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
558 {
559     PCIDevice *pdev = &vdev->pdev;
560     VFIOQuirk *quirk;
561 
562     if (!vdev->has_vga || nr != 5 ||
563         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
564         return;
565     }
566 
567     quirk = g_malloc0(sizeof(*quirk));
568     quirk->vdev = vdev;
569     quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID;
570     quirk->data.address_offset = 0x8;
571     quirk->data.address_size = 0; /* actually 4, but avoids generic code */
572     quirk->data.data_offset = 0xc;
573     quirk->data.data_size = 4;
574     quirk->data.bar = nr;
575 
576     memory_region_init_io(&quirk->mem, OBJECT(vdev),
577                           &vfio_nvidia_bar5_window_quirk, quirk,
578                           "vfio-nvidia-bar5-window-quirk", 16);
579     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
580                                         0, &quirk->mem, 1);
581 
582     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
583 
584     trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
585 }
586 
587 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
588                                           uint64_t data, unsigned size)
589 {
590     VFIOQuirk *quirk = opaque;
591     VFIOPCIDevice *vdev = quirk->vdev;
592     PCIDevice *pdev = &vdev->pdev;
593     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
594 
595     vfio_generic_quirk_write(opaque, addr, data, size);
596 
597     /*
598      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
599      * MSI capability ID register.  Both the ID and next register are
600      * read-only, so we allow writes covering either of those to real hw.
601      * NB - only fixed for the 0x88000 MMIO window.
602      */
603     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
604         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
605         vfio_region_write(&vdev->bars[quirk->data.bar].region,
606                           addr + base, data, size);
607     }
608 }
609 
610 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
611     .read = vfio_generic_quirk_read,
612     .write = vfio_nvidia_88000_quirk_write,
613     .endianness = DEVICE_LITTLE_ENDIAN,
614 };
615 
616 /*
617  * Finally, BAR0 itself.  We want to redirect any accesses to either
618  * 0x1800 or 0x88000 through the PCI config space access functions.
619  *
620  * NB - quirk at a page granularity or else they don't seem to work when
621  *      BARs are mmap'd
622  *
623  * Here's offset 0x88000...
624  */
625 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
626 {
627     PCIDevice *pdev = &vdev->pdev;
628     VFIOQuirk *quirk;
629     uint16_t vendor, class;
630 
631     vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
632     class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
633 
634     if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
635         class != PCI_CLASS_DISPLAY_VGA) {
636         return;
637     }
638 
639     quirk = g_malloc0(sizeof(*quirk));
640     quirk->vdev = vdev;
641     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
642     quirk->data.address_match = 0x88000;
643     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
644     quirk->data.bar = nr;
645 
646     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
647                           quirk, "vfio-nvidia-bar0-88000-quirk",
648                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
649     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
650                           quirk->data.address_match & TARGET_PAGE_MASK,
651                           &quirk->mem, 1);
652 
653     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
654 
655     trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
656 }
657 
658 /*
659  * And here's the same for BAR0 offset 0x1800...
660  */
661 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
662 {
663     PCIDevice *pdev = &vdev->pdev;
664     VFIOQuirk *quirk;
665 
666     if (!vdev->has_vga || nr != 0 ||
667         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
668         return;
669     }
670 
671     /* Log the chipset ID */
672     trace_vfio_probe_nvidia_bar0_1800_quirk_id(
673             (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
674             & 0xff);
675 
676     quirk = g_malloc0(sizeof(*quirk));
677     quirk->vdev = vdev;
678     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
679     quirk->data.address_match = 0x1800;
680     quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
681     quirk->data.bar = nr;
682 
683     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
684                           "vfio-nvidia-bar0-1800-quirk",
685                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
686     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
687                           quirk->data.address_match & TARGET_PAGE_MASK,
688                           &quirk->mem, 1);
689 
690     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
691 
692     trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
693 }
694 
695 /*
696  * TODO - Some Nvidia devices provide config access to their companion HDA
697  * device and even to their parent bridge via these config space mirrors.
698  * Add quirks for those regions.
699  */
700 
701 #define PCI_VENDOR_ID_REALTEK 0x10ec
702 
703 /*
704  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
705  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
706  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
707  * when the "type" portion of the address register is set to 0x1.  This appears
708  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
709  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
710  * ignore because the MSI-X table should always be accessed as a dword (full
711  * mask).  Bits 0:11 is offset within the type.
712  *
713  * Example trace:
714  *
715  * Read from MSI-X table offset 0
716  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
717  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
718  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
719  *
720  * Write 0xfee00000 to MSI-X table offset 0
721  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
722  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
723  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
724  */
725 static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
726                                                hwaddr addr, unsigned size)
727 {
728     VFIOQuirk *quirk = opaque;
729     VFIOPCIDevice *vdev = quirk->vdev;
730     uint64_t val = 0;
731 
732     if (!quirk->data.flags) { /* Non-MSI-X table access */
733         return vfio_region_read(&vdev->bars[quirk->data.bar].region,
734                                 addr + 0x70, size);
735     }
736 
737     switch (addr) {
738     case 4: /* address */
739         val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */
740         break;
741     case 0: /* data */
742         if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
743             memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
744                                 (hwaddr)(quirk->data.address_match & 0xfff),
745                                 &val, size, MEMTXATTRS_UNSPECIFIED);
746         }
747         break;
748     }
749 
750     trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name,
751                                   addr ? "address" : "data", val);
752     return val;
753 }
754 
755 static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
756                                             uint64_t data, unsigned size)
757 {
758     VFIOQuirk *quirk = opaque;
759     VFIOPCIDevice *vdev = quirk->vdev;
760 
761     switch (addr) {
762     case 4: /* address */
763         if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
764             quirk->data.flags = 1; /* Activate reads */
765             quirk->data.address_match = data;
766 
767             trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data);
768 
769             if (data & 0x80000000U) { /* Do write */
770                 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
771                     hwaddr offset = data & 0xfff;
772                     uint64_t val = quirk->data.address_mask;
773 
774                     trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name,
775                                                   (uint16_t)offset, val);
776 
777                     /* Write to the proper guest MSI-X table instead */
778                     memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
779                                                  offset, val, size,
780                                                  MEMTXATTRS_UNSPECIFIED);
781                 }
782                 return; /* Do not write guest MSI-X data to hardware */
783             }
784         } else {
785             quirk->data.flags = 0; /* De-activate reads, non-MSI-X */
786         }
787         break;
788     case 0: /* data */
789         quirk->data.address_mask = data;
790         break;
791     }
792 
793     vfio_region_write(&vdev->bars[quirk->data.bar].region,
794                       addr + 0x70, data, size);
795 }
796 
797 static const MemoryRegionOps vfio_rtl8168_window_quirk = {
798     .read = vfio_rtl8168_window_quirk_read,
799     .write = vfio_rtl8168_window_quirk_write,
800     .valid = {
801         .min_access_size = 4,
802         .max_access_size = 4,
803         .unaligned = false,
804     },
805     .endianness = DEVICE_LITTLE_ENDIAN,
806 };
807 
808 static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr)
809 {
810     PCIDevice *pdev = &vdev->pdev;
811     VFIOQuirk *quirk;
812 
813     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
814         pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
815         return;
816     }
817 
818     quirk = g_malloc0(sizeof(*quirk));
819     quirk->vdev = vdev;
820     quirk->data.bar = nr;
821 
822     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
823                           quirk, "vfio-rtl8168-window-quirk", 8);
824     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
825                                         0x70, &quirk->mem, 1);
826 
827     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
828 
829     trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name);
830 }
831 
832 /*
833  * Common quirk probe entry points.
834  */
835 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
836 {
837     vfio_vga_probe_ati_3c3_quirk(vdev);
838     vfio_vga_probe_nvidia_3d0_quirk(vdev);
839 }
840 
841 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
842 {
843     VFIOQuirk *quirk;
844     int i;
845 
846     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
847         QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
848             memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
849         }
850     }
851 }
852 
853 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
854 {
855     int i;
856 
857     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
858         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
859             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
860             object_unparent(OBJECT(&quirk->mem));
861             QLIST_REMOVE(quirk, next);
862             g_free(quirk);
863         }
864     }
865 }
866 
867 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
868 {
869     vfio_probe_ati_bar4_window_quirk(vdev, nr);
870     vfio_probe_ati_bar2_4000_quirk(vdev, nr);
871     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
872     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
873     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
874     vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
875 }
876 
877 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
878 {
879     VFIOBAR *bar = &vdev->bars[nr];
880     VFIOQuirk *quirk;
881 
882     QLIST_FOREACH(quirk, &bar->quirks, next) {
883         memory_region_del_subregion(&bar->region.mem, &quirk->mem);
884     }
885 }
886 
887 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
888 {
889     VFIOBAR *bar = &vdev->bars[nr];
890 
891     while (!QLIST_EMPTY(&bar->quirks)) {
892         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
893         object_unparent(OBJECT(&quirk->mem));
894         QLIST_REMOVE(quirk, next);
895         g_free(quirk);
896     }
897 }
898