xref: /qemu/hw/vfio/pci-quirks.c (revision 8c4f234853d9d438dc1733ca98674b1139a87c99)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
16 
17 #define PCI_ANY_ID (~0)
18 
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
21 {
22     PCIDevice *pdev = &vdev->pdev;
23 
24     return (vendor == PCI_ANY_ID ||
25             vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) &&
26            (device == PCI_ANY_ID ||
27             device == pci_get_word(pdev->config + PCI_DEVICE_ID));
28 }
29 
30 /*
31  * List of device ids/vendor ids for which to disable
32  * option rom loading. This avoids the guest hangs during rom
33  * execution as noticed with the BCM 57810 card for lack of a
34  * more better way to handle such issues.
35  * The  user can still override by specifying a romfile or
36  * rombar=1.
37  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38  * for an analysis of the 57810 card hang. When adding
39  * a new vendor id/device id combination below, please also add
40  * your card/environment details and information that could
41  * help in debugging to the bug tracking this issue
42  */
43 static const struct {
44     uint32_t vendor;
45     uint32_t device;
46 } romblacklist[] = {
47     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48 };
49 
50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
51 {
52     int i;
53 
54     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57                                              romblacklist[i].vendor,
58                                              romblacklist[i].device);
59             return true;
60         }
61     }
62     return false;
63 }
64 
65 /*
66  * Device specific quirks
67  */
68 
69 /* Is range1 fully contained within range2?  */
70 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
71                                  uint64_t first2, uint64_t len2) {
72     return (first1 >= first2 && first1 + len1 <= first2 + len2);
73 }
74 
75 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
76 {
77     return (mask && (flags & mask) == mask);
78 }
79 
80 static uint64_t vfio_generic_window_quirk_read(void *opaque,
81                                                hwaddr addr, unsigned size)
82 {
83     VFIOLegacyQuirk *quirk = opaque;
84     VFIOPCIDevice *vdev = quirk->vdev;
85     uint64_t data;
86 
87     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
88         ranges_overlap(addr, size,
89                        quirk->data.data_offset, quirk->data.data_size)) {
90         hwaddr offset = addr - quirk->data.data_offset;
91 
92         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
93                                   quirk->data.data_size)) {
94             hw_error("%s: window data read not fully contained: %s",
95                      __func__, memory_region_name(quirk->mem));
96         }
97 
98         data = vfio_pci_read_config(&vdev->pdev,
99                                     quirk->data.address_val + offset, size);
100 
101         trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem),
102                                              vdev->vbasedev.name,
103                                              quirk->data.bar,
104                                              addr, size, data);
105     } else {
106         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
107                                 addr + quirk->data.base_offset, size);
108     }
109 
110     return data;
111 }
112 
113 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
114                                             uint64_t data, unsigned size)
115 {
116     VFIOLegacyQuirk *quirk = opaque;
117     VFIOPCIDevice *vdev = quirk->vdev;
118 
119     if (ranges_overlap(addr, size,
120                        quirk->data.address_offset, quirk->data.address_size)) {
121 
122         if (addr != quirk->data.address_offset) {
123             hw_error("%s: offset write into address window: %s",
124                      __func__, memory_region_name(quirk->mem));
125         }
126 
127         if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
128             quirk->data.flags |= quirk->data.write_flags |
129                                  quirk->data.read_flags;
130             quirk->data.address_val = data & quirk->data.address_mask;
131         } else {
132             quirk->data.flags &= ~(quirk->data.write_flags |
133                                    quirk->data.read_flags);
134         }
135     }
136 
137     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
138         ranges_overlap(addr, size,
139                        quirk->data.data_offset, quirk->data.data_size)) {
140         hwaddr offset = addr - quirk->data.data_offset;
141 
142         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
143                                   quirk->data.data_size)) {
144             hw_error("%s: window data write not fully contained: %s",
145                      __func__, memory_region_name(quirk->mem));
146         }
147 
148         vfio_pci_write_config(&vdev->pdev,
149                               quirk->data.address_val + offset, data, size);
150         trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem),
151                                               vdev->vbasedev.name,
152                                               quirk->data.bar,
153                                               addr, data, size);
154         return;
155     }
156 
157     vfio_region_write(&vdev->bars[quirk->data.bar].region,
158                    addr + quirk->data.base_offset, data, size);
159 }
160 
161 static const MemoryRegionOps vfio_generic_window_quirk = {
162     .read = vfio_generic_window_quirk_read,
163     .write = vfio_generic_window_quirk_write,
164     .endianness = DEVICE_LITTLE_ENDIAN,
165 };
166 
167 static uint64_t vfio_generic_quirk_read(void *opaque,
168                                         hwaddr addr, unsigned size)
169 {
170     VFIOLegacyQuirk *quirk = opaque;
171     VFIOPCIDevice *vdev = quirk->vdev;
172     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
173     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
174     uint64_t data;
175 
176     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
177         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
178         if (!vfio_range_contained(addr, size, offset,
179                                   quirk->data.address_mask + 1)) {
180             hw_error("%s: read not fully contained: %s",
181                      __func__, memory_region_name(quirk->mem));
182         }
183 
184         data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
185 
186         trace_vfio_generic_quirk_read(memory_region_name(quirk->mem),
187                                       vdev->vbasedev.name, quirk->data.bar,
188                                       addr + base, size, data);
189     } else {
190         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
191                                 addr + base, size);
192     }
193 
194     return data;
195 }
196 
197 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
198                                      uint64_t data, unsigned size)
199 {
200     VFIOLegacyQuirk *quirk = opaque;
201     VFIOPCIDevice *vdev = quirk->vdev;
202     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
203     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
204 
205     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
206         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
207         if (!vfio_range_contained(addr, size, offset,
208                                   quirk->data.address_mask + 1)) {
209             hw_error("%s: write not fully contained: %s",
210                      __func__, memory_region_name(quirk->mem));
211         }
212 
213         vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
214 
215         trace_vfio_generic_quirk_write(memory_region_name(quirk->mem),
216                                        vdev->vbasedev.name, quirk->data.bar,
217                                        addr + base, data, size);
218     } else {
219         vfio_region_write(&vdev->bars[quirk->data.bar].region,
220                           addr + base, data, size);
221     }
222 }
223 
224 static const MemoryRegionOps vfio_generic_quirk = {
225     .read = vfio_generic_quirk_read,
226     .write = vfio_generic_quirk_write,
227     .endianness = DEVICE_LITTLE_ENDIAN,
228 };
229 
230 #define PCI_VENDOR_ID_ATI               0x1002
231 
232 /*
233  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
234  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
235  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
236  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
237  * I/O port BAR address.  Originally this was coded to return the virtual BAR
238  * address only if the physical register read returns the actual BAR address,
239  * but users have reported greater success if we return the virtual address
240  * unconditionally.
241  */
242 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
243                                         hwaddr addr, unsigned size)
244 {
245     VFIOLegacyQuirk *quirk = opaque;
246     VFIOPCIDevice *vdev = quirk->vdev;
247     uint64_t data = vfio_pci_read_config(&vdev->pdev,
248                                          PCI_BASE_ADDRESS_0 + (4 * 4) + 1,
249                                          size);
250     trace_vfio_ati_3c3_quirk_read(data);
251 
252     return data;
253 }
254 
255 static const MemoryRegionOps vfio_ati_3c3_quirk = {
256     .read = vfio_ati_3c3_quirk_read,
257     .endianness = DEVICE_LITTLE_ENDIAN,
258 };
259 
260 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
261 {
262     PCIDevice *pdev = &vdev->pdev;
263     VFIOQuirk *quirk;
264     VFIOLegacyQuirk *legacy;
265 
266     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
267         return;
268     }
269 
270     /*
271      * As long as the BAR is >= 256 bytes it will be aligned such that the
272      * lower byte is always zero.  Filter out anything else, if it exists.
273      */
274     if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
275         return;
276     }
277 
278     quirk = g_malloc0(sizeof(*quirk));
279     legacy = quirk->data = g_malloc0(sizeof(*legacy));
280     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
281     quirk->nr_mem = 1;
282     legacy->vdev = vdev;
283 
284     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, legacy,
285                           "vfio-ati-3c3-quirk", 1);
286     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
287                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
288 
289     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
290                       quirk, next);
291 
292     trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name);
293 }
294 
295 /*
296  * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
297  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
298  * the MMIO space directly, but a window to this space is provided through
299  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
300  * data register.  When the address is programmed to a range of 0x4000-0x4fff
301  * PCI configuration space is available.  Experimentation seems to indicate
302  * that only read-only access is provided, but we drop writes when the window
303  * is enabled to config space nonetheless.
304  */
305 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
306 {
307     PCIDevice *pdev = &vdev->pdev;
308     VFIOQuirk *quirk;
309     VFIOLegacyQuirk *legacy;
310 
311     if (!vdev->has_vga || nr != 4 ||
312         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
313         return;
314     }
315 
316     quirk = g_malloc0(sizeof(*quirk));
317     quirk->data = legacy = g_malloc0(sizeof(*legacy));
318     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
319     quirk->nr_mem = 1;
320     legacy->vdev = vdev;
321     legacy->data.address_size = 4;
322     legacy->data.data_offset = 4;
323     legacy->data.data_size = 4;
324     legacy->data.address_match = 0x4000;
325     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
326     legacy->data.bar = nr;
327     legacy->data.read_flags = legacy->data.write_flags = 1;
328 
329     memory_region_init_io(quirk->mem, OBJECT(vdev),
330                           &vfio_generic_window_quirk, legacy,
331                           "vfio-ati-bar4-window-quirk", 8);
332     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
333                           legacy->data.base_offset, quirk->mem, 1);
334 
335     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
336 
337     trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
338 }
339 
340 /*
341  * Trap the BAR2 MMIO window to config space as well.
342  */
343 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
344 {
345     PCIDevice *pdev = &vdev->pdev;
346     VFIOQuirk *quirk;
347     VFIOLegacyQuirk *legacy;
348 
349     /* Only enable on newer devices where BAR2 is 64bit */
350     if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
351         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
352         return;
353     }
354 
355     quirk = g_malloc0(sizeof(*quirk));
356     quirk->data = legacy = g_malloc0(sizeof(*legacy));
357     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
358     quirk->nr_mem = 1;
359     legacy->vdev = vdev;
360     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
361     legacy->data.address_match = 0x4000;
362     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
363     legacy->data.bar = nr;
364 
365     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
366                           "vfio-ati-bar2-4000-quirk",
367                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
368     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
369                           legacy->data.address_match & TARGET_PAGE_MASK,
370                           quirk->mem, 1);
371 
372     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
373 
374     trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
375 }
376 
377 /*
378  * Older ATI/AMD cards like the X550 have a similar window to that above.
379  * I/O port BAR1 provides a window to a mirror of PCI config space located
380  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
381  * note it for future reference.
382  */
383 
384 #define PCI_VENDOR_ID_NVIDIA                    0x10de
385 
386 /*
387  * Nvidia has several different methods to get to config space, the
388  * nouveu project has several of these documented here:
389  * https://github.com/pathscale/envytools/tree/master/hwdocs
390  *
391  * The first quirk is actually not documented in envytools and is found
392  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
393  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
394  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
395  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
396  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
397  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
398  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
399  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
400  */
401 enum {
402     NV_3D0_NONE = 0,
403     NV_3D0_SELECT,
404     NV_3D0_WINDOW,
405     NV_3D0_READ,
406     NV_3D0_WRITE,
407 };
408 
409 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
410                                            hwaddr addr, unsigned size)
411 {
412     VFIOLegacyQuirk *quirk = opaque;
413     VFIOPCIDevice *vdev = quirk->vdev;
414     PCIDevice *pdev = &vdev->pdev;
415     uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
416                                   addr + quirk->data.base_offset, size);
417 
418     if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) {
419         data = vfio_pci_read_config(pdev, quirk->data.address_val, size);
420         trace_vfio_nvidia_3d0_quirk_read(size, data);
421     }
422 
423     quirk->data.flags = NV_3D0_NONE;
424 
425     return data;
426 }
427 
428 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
429                                         uint64_t data, unsigned size)
430 {
431     VFIOLegacyQuirk *quirk = opaque;
432     VFIOPCIDevice *vdev = quirk->vdev;
433     PCIDevice *pdev = &vdev->pdev;
434 
435     switch (quirk->data.flags) {
436     case NV_3D0_NONE:
437         if (addr == quirk->data.address_offset && data == 0x338) {
438             quirk->data.flags = NV_3D0_SELECT;
439         }
440         break;
441     case NV_3D0_SELECT:
442         quirk->data.flags = NV_3D0_NONE;
443         if (addr == quirk->data.data_offset &&
444             (data & ~quirk->data.address_mask) == quirk->data.address_match) {
445             quirk->data.flags = NV_3D0_WINDOW;
446             quirk->data.address_val = data & quirk->data.address_mask;
447         }
448         break;
449     case NV_3D0_WINDOW:
450         quirk->data.flags = NV_3D0_NONE;
451         if (addr == quirk->data.address_offset) {
452             if (data == 0x538) {
453                 quirk->data.flags = NV_3D0_READ;
454             } else if (data == 0x738) {
455                 quirk->data.flags = NV_3D0_WRITE;
456             }
457         }
458         break;
459     case NV_3D0_WRITE:
460         quirk->data.flags = NV_3D0_NONE;
461         if (addr == quirk->data.data_offset) {
462             vfio_pci_write_config(pdev, quirk->data.address_val, data, size);
463             trace_vfio_nvidia_3d0_quirk_write(data, size);
464             return;
465         }
466         break;
467     }
468 
469     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
470                    addr + quirk->data.base_offset, data, size);
471 }
472 
473 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
474     .read = vfio_nvidia_3d0_quirk_read,
475     .write = vfio_nvidia_3d0_quirk_write,
476     .endianness = DEVICE_LITTLE_ENDIAN,
477 };
478 
479 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
480 {
481     PCIDevice *pdev = &vdev->pdev;
482     VFIOQuirk *quirk;
483     VFIOLegacyQuirk *legacy;
484 
485     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
486         !vdev->bars[1].region.size) {
487         return;
488     }
489 
490     quirk = g_malloc0(sizeof(*quirk));
491     quirk->data = legacy = g_malloc0(sizeof(*legacy));
492     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
493     quirk->nr_mem = 1;
494     legacy->vdev = vdev;
495     legacy->data.base_offset = 0x10;
496     legacy->data.address_offset = 4;
497     legacy->data.address_size = 2;
498     legacy->data.address_match = 0x1800;
499     legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
500     legacy->data.data_offset = 0;
501     legacy->data.data_size = 4;
502 
503     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk,
504                           legacy, "vfio-nvidia-3d0-quirk", 6);
505     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
506                                 legacy->data.base_offset, quirk->mem);
507 
508     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
509                       quirk, next);
510 
511     trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name);
512 }
513 
514 /*
515  * The second quirk is documented in envytools.  The I/O port BAR5 is just
516  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
517  * again BAR0.  This backdoor is apparently a bit newer than the one above
518  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
519  * space, including extended space is available at the 4k @0x88000.
520  */
521 enum {
522     NV_BAR5_ADDRESS = 0x1,
523     NV_BAR5_ENABLE = 0x2,
524     NV_BAR5_MASTER = 0x4,
525     NV_BAR5_VALID = 0x7,
526 };
527 
528 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
529                                                 uint64_t data, unsigned size)
530 {
531     VFIOLegacyQuirk *quirk = opaque;
532 
533     switch (addr) {
534     case 0x0:
535         if (data & 0x1) {
536             quirk->data.flags |= NV_BAR5_MASTER;
537         } else {
538             quirk->data.flags &= ~NV_BAR5_MASTER;
539         }
540         break;
541     case 0x4:
542         if (data & 0x1) {
543             quirk->data.flags |= NV_BAR5_ENABLE;
544         } else {
545             quirk->data.flags &= ~NV_BAR5_ENABLE;
546         }
547         break;
548     case 0x8:
549         if (quirk->data.flags & NV_BAR5_MASTER) {
550             if ((data & ~0xfff) == 0x88000) {
551                 quirk->data.flags |= NV_BAR5_ADDRESS;
552                 quirk->data.address_val = data & 0xfff;
553             } else if ((data & ~0xff) == 0x1800) {
554                 quirk->data.flags |= NV_BAR5_ADDRESS;
555                 quirk->data.address_val = data & 0xff;
556             } else {
557                 quirk->data.flags &= ~NV_BAR5_ADDRESS;
558             }
559         }
560         break;
561     }
562 
563     vfio_generic_window_quirk_write(opaque, addr, data, size);
564 }
565 
566 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
567     .read = vfio_generic_window_quirk_read,
568     .write = vfio_nvidia_bar5_window_quirk_write,
569     .valid.min_access_size = 4,
570     .endianness = DEVICE_LITTLE_ENDIAN,
571 };
572 
573 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
574 {
575     PCIDevice *pdev = &vdev->pdev;
576     VFIOQuirk *quirk;
577     VFIOLegacyQuirk *legacy;
578 
579     if (!vdev->has_vga || nr != 5 ||
580         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
581         return;
582     }
583 
584     quirk = g_malloc0(sizeof(*quirk));
585     quirk->data = legacy = g_malloc0(sizeof(*legacy));
586     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
587     quirk->nr_mem = 1;
588     legacy->vdev = vdev;
589     legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID;
590     legacy->data.address_offset = 0x8;
591     legacy->data.address_size = 0; /* actually 4, but avoids generic code */
592     legacy->data.data_offset = 0xc;
593     legacy->data.data_size = 4;
594     legacy->data.bar = nr;
595 
596     memory_region_init_io(quirk->mem, OBJECT(vdev),
597                           &vfio_nvidia_bar5_window_quirk, legacy,
598                           "vfio-nvidia-bar5-window-quirk", 16);
599     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
600                                         0, quirk->mem, 1);
601 
602     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
603 
604     trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
605 }
606 
607 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
608                                           uint64_t data, unsigned size)
609 {
610     VFIOLegacyQuirk *quirk = opaque;
611     VFIOPCIDevice *vdev = quirk->vdev;
612     PCIDevice *pdev = &vdev->pdev;
613     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
614 
615     vfio_generic_quirk_write(opaque, addr, data, size);
616 
617     /*
618      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
619      * MSI capability ID register.  Both the ID and next register are
620      * read-only, so we allow writes covering either of those to real hw.
621      * NB - only fixed for the 0x88000 MMIO window.
622      */
623     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
624         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
625         vfio_region_write(&vdev->bars[quirk->data.bar].region,
626                           addr + base, data, size);
627     }
628 }
629 
630 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
631     .read = vfio_generic_quirk_read,
632     .write = vfio_nvidia_88000_quirk_write,
633     .endianness = DEVICE_LITTLE_ENDIAN,
634 };
635 
636 /*
637  * Finally, BAR0 itself.  We want to redirect any accesses to either
638  * 0x1800 or 0x88000 through the PCI config space access functions.
639  *
640  * NB - quirk at a page granularity or else they don't seem to work when
641  *      BARs are mmap'd
642  *
643  * Here's offset 0x88000...
644  */
645 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
646 {
647     PCIDevice *pdev = &vdev->pdev;
648     VFIOQuirk *quirk;
649     VFIOLegacyQuirk *legacy;
650     uint16_t vendor, class;
651 
652     vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
653     class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
654 
655     if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
656         class != PCI_CLASS_DISPLAY_VGA) {
657         return;
658     }
659 
660     quirk = g_malloc0(sizeof(*quirk));
661     quirk->data = legacy = g_malloc0(sizeof(*legacy));
662     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
663     quirk->nr_mem = 1;
664     legacy->vdev = vdev;
665     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
666     legacy->data.address_match = 0x88000;
667     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
668     legacy->data.bar = nr;
669 
670     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
671                           legacy, "vfio-nvidia-bar0-88000-quirk",
672                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
673     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
674                           legacy->data.address_match & TARGET_PAGE_MASK,
675                           quirk->mem, 1);
676 
677     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
678 
679     trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
680 }
681 
682 /*
683  * And here's the same for BAR0 offset 0x1800...
684  */
685 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
686 {
687     PCIDevice *pdev = &vdev->pdev;
688     VFIOQuirk *quirk;
689     VFIOLegacyQuirk *legacy;
690 
691     if (!vdev->has_vga || nr != 0 ||
692         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
693         return;
694     }
695 
696     /* Log the chipset ID */
697     trace_vfio_probe_nvidia_bar0_1800_quirk_id(
698             (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
699             & 0xff);
700 
701     quirk = g_malloc0(sizeof(*quirk));
702     quirk->data = legacy = g_malloc0(sizeof(*legacy));
703     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
704     quirk->nr_mem = 1;
705     legacy->vdev = vdev;
706     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
707     legacy->data.address_match = 0x1800;
708     legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
709     legacy->data.bar = nr;
710 
711     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
712                           "vfio-nvidia-bar0-1800-quirk",
713                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
714     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
715                           legacy->data.address_match & TARGET_PAGE_MASK,
716                           quirk->mem, 1);
717 
718     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
719 
720     trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
721 }
722 
723 /*
724  * TODO - Some Nvidia devices provide config access to their companion HDA
725  * device and even to their parent bridge via these config space mirrors.
726  * Add quirks for those regions.
727  */
728 
729 #define PCI_VENDOR_ID_REALTEK 0x10ec
730 
731 /*
732  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
733  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
734  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
735  * when the "type" portion of the address register is set to 0x1.  This appears
736  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
737  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
738  * ignore because the MSI-X table should always be accessed as a dword (full
739  * mask).  Bits 0:11 is offset within the type.
740  *
741  * Example trace:
742  *
743  * Read from MSI-X table offset 0
744  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
745  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
746  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
747  *
748  * Write 0xfee00000 to MSI-X table offset 0
749  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
750  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
751  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
752  */
753 static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
754                                                hwaddr addr, unsigned size)
755 {
756     VFIOLegacyQuirk *quirk = opaque;
757     VFIOPCIDevice *vdev = quirk->vdev;
758     uint64_t val = 0;
759 
760     if (!quirk->data.flags) { /* Non-MSI-X table access */
761         return vfio_region_read(&vdev->bars[quirk->data.bar].region,
762                                 addr + 0x70, size);
763     }
764 
765     switch (addr) {
766     case 4: /* address */
767         val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */
768         break;
769     case 0: /* data */
770         if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
771             memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
772                                 (hwaddr)(quirk->data.address_match & 0xfff),
773                                 &val, size, MEMTXATTRS_UNSPECIFIED);
774         }
775         break;
776     }
777 
778     trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name,
779                                   addr ? "address" : "data", val);
780     return val;
781 }
782 
783 static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
784                                             uint64_t data, unsigned size)
785 {
786     VFIOLegacyQuirk *quirk = opaque;
787     VFIOPCIDevice *vdev = quirk->vdev;
788 
789     switch (addr) {
790     case 4: /* address */
791         if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
792             quirk->data.flags = 1; /* Activate reads */
793             quirk->data.address_match = data;
794 
795             trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data);
796 
797             if (data & 0x80000000U) { /* Do write */
798                 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
799                     hwaddr offset = data & 0xfff;
800                     uint64_t val = quirk->data.address_mask;
801 
802                     trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name,
803                                                   (uint16_t)offset, val);
804 
805                     /* Write to the proper guest MSI-X table instead */
806                     memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
807                                                  offset, val, size,
808                                                  MEMTXATTRS_UNSPECIFIED);
809                 }
810                 return; /* Do not write guest MSI-X data to hardware */
811             }
812         } else {
813             quirk->data.flags = 0; /* De-activate reads, non-MSI-X */
814         }
815         break;
816     case 0: /* data */
817         quirk->data.address_mask = data;
818         break;
819     }
820 
821     vfio_region_write(&vdev->bars[quirk->data.bar].region,
822                       addr + 0x70, data, size);
823 }
824 
825 static const MemoryRegionOps vfio_rtl8168_window_quirk = {
826     .read = vfio_rtl8168_window_quirk_read,
827     .write = vfio_rtl8168_window_quirk_write,
828     .valid = {
829         .min_access_size = 4,
830         .max_access_size = 4,
831         .unaligned = false,
832     },
833     .endianness = DEVICE_LITTLE_ENDIAN,
834 };
835 
836 static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr)
837 {
838     PCIDevice *pdev = &vdev->pdev;
839     VFIOQuirk *quirk;
840     VFIOLegacyQuirk *legacy;
841 
842     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
843         pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
844         return;
845     }
846 
847     quirk = g_malloc0(sizeof(*quirk));
848     quirk->data = legacy = g_malloc0(sizeof(*legacy));
849     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
850     quirk->nr_mem = 1;
851     legacy->vdev = vdev;
852     legacy->data.bar = nr;
853 
854     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
855                           legacy, "vfio-rtl8168-window-quirk", 8);
856     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
857                                         0x70, quirk->mem, 1);
858 
859     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
860 
861     trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name);
862 }
863 
864 /*
865  * Common quirk probe entry points.
866  */
867 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
868 {
869     vfio_vga_probe_ati_3c3_quirk(vdev);
870     vfio_vga_probe_nvidia_3d0_quirk(vdev);
871 }
872 
873 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
874 {
875     VFIOQuirk *quirk;
876     int i, j;
877 
878     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
879         QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
880             for (j = 0; j < quirk->nr_mem; j++) {
881                 memory_region_del_subregion(&vdev->vga.region[i].mem,
882                                             &quirk->mem[j]);
883             }
884         }
885     }
886 }
887 
888 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
889 {
890     int i, j;
891 
892     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
893         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
894             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
895             QLIST_REMOVE(quirk, next);
896             for (j = 0; j < quirk->nr_mem; j++) {
897                 object_unparent(OBJECT(&quirk->mem[j]));
898             }
899             g_free(quirk->mem);
900             g_free(quirk->data);
901             g_free(quirk);
902         }
903     }
904 }
905 
906 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
907 {
908     vfio_probe_ati_bar4_window_quirk(vdev, nr);
909     vfio_probe_ati_bar2_4000_quirk(vdev, nr);
910     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
911     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
912     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
913     vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
914 }
915 
916 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
917 {
918     VFIOBAR *bar = &vdev->bars[nr];
919     VFIOQuirk *quirk;
920     int i;
921 
922     QLIST_FOREACH(quirk, &bar->quirks, next) {
923         for (i = 0; i < quirk->nr_mem; i++) {
924             memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
925         }
926     }
927 }
928 
929 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
930 {
931     VFIOBAR *bar = &vdev->bars[nr];
932     int i;
933 
934     while (!QLIST_EMPTY(&bar->quirks)) {
935         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
936         QLIST_REMOVE(quirk, next);
937         for (i = 0; i < quirk->nr_mem; i++) {
938             object_unparent(OBJECT(&quirk->mem[i]));
939         }
940         g_free(quirk->mem);
941         g_free(quirk->data);
942         g_free(quirk);
943     }
944 }
945