xref: /qemu/hw/vfio/pci-quirks.c (revision c00d61d8fa22b096b15e19ee2fde846ffc1c0b5d)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
16 
17 /*
18  * List of device ids/vendor ids for which to disable
19  * option rom loading. This avoids the guest hangs during rom
20  * execution as noticed with the BCM 57810 card for lack of a
21  * more better way to handle such issues.
22  * The  user can still override by specifying a romfile or
23  * rombar=1.
24  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
25  * for an analysis of the 57810 card hang. When adding
26  * a new vendor id/device id combination below, please also add
27  * your card/environment details and information that could
28  * help in debugging to the bug tracking this issue
29  */
30 static const VFIORomBlacklistEntry romblacklist[] = {
31     /* Broadcom BCM 57810 */
32     { 0x14e4, 0x168e }
33 };
34 
35 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
36 {
37     PCIDevice *pdev = &vdev->pdev;
38     uint16_t vendor_id, device_id;
39     int count = 0;
40 
41     vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
42     device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
43 
44     while (count < ARRAY_SIZE(romblacklist)) {
45         if (romblacklist[count].vendor_id == vendor_id &&
46             romblacklist[count].device_id == device_id) {
47                 return true;
48         }
49         count++;
50     }
51 
52     return false;
53 }
54 
55 /*
56  * Device specific quirks
57  */
58 
59 /* Is range1 fully contained within range2?  */
60 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
61                                  uint64_t first2, uint64_t len2) {
62     return (first1 >= first2 && first1 + len1 <= first2 + len2);
63 }
64 
65 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
66 {
67     return (mask && (flags & mask) == mask);
68 }
69 
70 static uint64_t vfio_generic_window_quirk_read(void *opaque,
71                                                hwaddr addr, unsigned size)
72 {
73     VFIOQuirk *quirk = opaque;
74     VFIOPCIDevice *vdev = quirk->vdev;
75     uint64_t data;
76 
77     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
78         ranges_overlap(addr, size,
79                        quirk->data.data_offset, quirk->data.data_size)) {
80         hwaddr offset = addr - quirk->data.data_offset;
81 
82         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
83                                   quirk->data.data_size)) {
84             hw_error("%s: window data read not fully contained: %s",
85                      __func__, memory_region_name(&quirk->mem));
86         }
87 
88         data = vfio_pci_read_config(&vdev->pdev,
89                                     quirk->data.address_val + offset, size);
90 
91         trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem),
92                                              vdev->vbasedev.name,
93                                              quirk->data.bar,
94                                              addr, size, data);
95     } else {
96         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
97                                 addr + quirk->data.base_offset, size);
98     }
99 
100     return data;
101 }
102 
103 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
104                                             uint64_t data, unsigned size)
105 {
106     VFIOQuirk *quirk = opaque;
107     VFIOPCIDevice *vdev = quirk->vdev;
108 
109     if (ranges_overlap(addr, size,
110                        quirk->data.address_offset, quirk->data.address_size)) {
111 
112         if (addr != quirk->data.address_offset) {
113             hw_error("%s: offset write into address window: %s",
114                      __func__, memory_region_name(&quirk->mem));
115         }
116 
117         if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
118             quirk->data.flags |= quirk->data.write_flags |
119                                  quirk->data.read_flags;
120             quirk->data.address_val = data & quirk->data.address_mask;
121         } else {
122             quirk->data.flags &= ~(quirk->data.write_flags |
123                                    quirk->data.read_flags);
124         }
125     }
126 
127     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
128         ranges_overlap(addr, size,
129                        quirk->data.data_offset, quirk->data.data_size)) {
130         hwaddr offset = addr - quirk->data.data_offset;
131 
132         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
133                                   quirk->data.data_size)) {
134             hw_error("%s: window data write not fully contained: %s",
135                      __func__, memory_region_name(&quirk->mem));
136         }
137 
138         vfio_pci_write_config(&vdev->pdev,
139                               quirk->data.address_val + offset, data, size);
140         trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem),
141                                               vdev->vbasedev.name,
142                                               quirk->data.bar,
143                                               addr, data, size);
144         return;
145     }
146 
147     vfio_region_write(&vdev->bars[quirk->data.bar].region,
148                    addr + quirk->data.base_offset, data, size);
149 }
150 
151 static const MemoryRegionOps vfio_generic_window_quirk = {
152     .read = vfio_generic_window_quirk_read,
153     .write = vfio_generic_window_quirk_write,
154     .endianness = DEVICE_LITTLE_ENDIAN,
155 };
156 
157 static uint64_t vfio_generic_quirk_read(void *opaque,
158                                         hwaddr addr, unsigned size)
159 {
160     VFIOQuirk *quirk = opaque;
161     VFIOPCIDevice *vdev = quirk->vdev;
162     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
163     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
164     uint64_t data;
165 
166     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
167         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
168         if (!vfio_range_contained(addr, size, offset,
169                                   quirk->data.address_mask + 1)) {
170             hw_error("%s: read not fully contained: %s",
171                      __func__, memory_region_name(&quirk->mem));
172         }
173 
174         data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
175 
176         trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem),
177                                       vdev->vbasedev.name, quirk->data.bar,
178                                       addr + base, size, data);
179     } else {
180         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
181                                 addr + base, size);
182     }
183 
184     return data;
185 }
186 
187 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
188                                      uint64_t data, unsigned size)
189 {
190     VFIOQuirk *quirk = opaque;
191     VFIOPCIDevice *vdev = quirk->vdev;
192     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
193     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
194 
195     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
196         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
197         if (!vfio_range_contained(addr, size, offset,
198                                   quirk->data.address_mask + 1)) {
199             hw_error("%s: write not fully contained: %s",
200                      __func__, memory_region_name(&quirk->mem));
201         }
202 
203         vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
204 
205         trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem),
206                                        vdev->vbasedev.name, quirk->data.bar,
207                                        addr + base, data, size);
208     } else {
209         vfio_region_write(&vdev->bars[quirk->data.bar].region,
210                           addr + base, data, size);
211     }
212 }
213 
214 static const MemoryRegionOps vfio_generic_quirk = {
215     .read = vfio_generic_quirk_read,
216     .write = vfio_generic_quirk_write,
217     .endianness = DEVICE_LITTLE_ENDIAN,
218 };
219 
220 #define PCI_VENDOR_ID_ATI               0x1002
221 
222 /*
223  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
224  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
225  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
226  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
227  * I/O port BAR address.  Originally this was coded to return the virtual BAR
228  * address only if the physical register read returns the actual BAR address,
229  * but users have reported greater success if we return the virtual address
230  * unconditionally.
231  */
232 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
233                                         hwaddr addr, unsigned size)
234 {
235     VFIOQuirk *quirk = opaque;
236     VFIOPCIDevice *vdev = quirk->vdev;
237     uint64_t data = vfio_pci_read_config(&vdev->pdev,
238                                          PCI_BASE_ADDRESS_0 + (4 * 4) + 1,
239                                          size);
240     trace_vfio_ati_3c3_quirk_read(data);
241 
242     return data;
243 }
244 
245 static const MemoryRegionOps vfio_ati_3c3_quirk = {
246     .read = vfio_ati_3c3_quirk_read,
247     .endianness = DEVICE_LITTLE_ENDIAN,
248 };
249 
250 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
251 {
252     PCIDevice *pdev = &vdev->pdev;
253     VFIOQuirk *quirk;
254 
255     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
256         return;
257     }
258 
259     /*
260      * As long as the BAR is >= 256 bytes it will be aligned such that the
261      * lower byte is always zero.  Filter out anything else, if it exists.
262      */
263     if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
264         return;
265     }
266 
267     quirk = g_malloc0(sizeof(*quirk));
268     quirk->vdev = vdev;
269 
270     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk,
271                           "vfio-ati-3c3-quirk", 1);
272     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
273                                 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem);
274 
275     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
276                       quirk, next);
277 
278     trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name);
279 }
280 
281 /*
282  * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
283  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
284  * the MMIO space directly, but a window to this space is provided through
285  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
286  * data register.  When the address is programmed to a range of 0x4000-0x4fff
287  * PCI configuration space is available.  Experimentation seems to indicate
288  * that only read-only access is provided, but we drop writes when the window
289  * is enabled to config space nonetheless.
290  */
291 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
292 {
293     PCIDevice *pdev = &vdev->pdev;
294     VFIOQuirk *quirk;
295 
296     if (!vdev->has_vga || nr != 4 ||
297         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
298         return;
299     }
300 
301     quirk = g_malloc0(sizeof(*quirk));
302     quirk->vdev = vdev;
303     quirk->data.address_size = 4;
304     quirk->data.data_offset = 4;
305     quirk->data.data_size = 4;
306     quirk->data.address_match = 0x4000;
307     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
308     quirk->data.bar = nr;
309     quirk->data.read_flags = quirk->data.write_flags = 1;
310 
311     memory_region_init_io(&quirk->mem, OBJECT(vdev),
312                           &vfio_generic_window_quirk, quirk,
313                           "vfio-ati-bar4-window-quirk", 8);
314     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
315                           quirk->data.base_offset, &quirk->mem, 1);
316 
317     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
318 
319     trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
320 }
321 
322 /*
323  * Trap the BAR2 MMIO window to config space as well.
324  */
325 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
326 {
327     PCIDevice *pdev = &vdev->pdev;
328     VFIOQuirk *quirk;
329 
330     /* Only enable on newer devices where BAR2 is 64bit */
331     if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
332         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
333         return;
334     }
335 
336     quirk = g_malloc0(sizeof(*quirk));
337     quirk->vdev = vdev;
338     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
339     quirk->data.address_match = 0x4000;
340     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
341     quirk->data.bar = nr;
342 
343     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
344                           "vfio-ati-bar2-4000-quirk",
345                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
346     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
347                           quirk->data.address_match & TARGET_PAGE_MASK,
348                           &quirk->mem, 1);
349 
350     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
351 
352     trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
353 }
354 
355 /*
356  * Older ATI/AMD cards like the X550 have a similar window to that above.
357  * I/O port BAR1 provides a window to a mirror of PCI config space located
358  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
359  * note it for future reference.
360  */
361 
362 #define PCI_VENDOR_ID_NVIDIA                    0x10de
363 
364 /*
365  * Nvidia has several different methods to get to config space, the
366  * nouveu project has several of these documented here:
367  * https://github.com/pathscale/envytools/tree/master/hwdocs
368  *
369  * The first quirk is actually not documented in envytools and is found
370  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
371  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
372  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
373  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
374  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
375  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
376  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
377  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
378  */
379 enum {
380     NV_3D0_NONE = 0,
381     NV_3D0_SELECT,
382     NV_3D0_WINDOW,
383     NV_3D0_READ,
384     NV_3D0_WRITE,
385 };
386 
387 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
388                                            hwaddr addr, unsigned size)
389 {
390     VFIOQuirk *quirk = opaque;
391     VFIOPCIDevice *vdev = quirk->vdev;
392     PCIDevice *pdev = &vdev->pdev;
393     uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
394                                   addr + quirk->data.base_offset, size);
395 
396     if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) {
397         data = vfio_pci_read_config(pdev, quirk->data.address_val, size);
398         trace_vfio_nvidia_3d0_quirk_read(size, data);
399     }
400 
401     quirk->data.flags = NV_3D0_NONE;
402 
403     return data;
404 }
405 
406 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
407                                         uint64_t data, unsigned size)
408 {
409     VFIOQuirk *quirk = opaque;
410     VFIOPCIDevice *vdev = quirk->vdev;
411     PCIDevice *pdev = &vdev->pdev;
412 
413     switch (quirk->data.flags) {
414     case NV_3D0_NONE:
415         if (addr == quirk->data.address_offset && data == 0x338) {
416             quirk->data.flags = NV_3D0_SELECT;
417         }
418         break;
419     case NV_3D0_SELECT:
420         quirk->data.flags = NV_3D0_NONE;
421         if (addr == quirk->data.data_offset &&
422             (data & ~quirk->data.address_mask) == quirk->data.address_match) {
423             quirk->data.flags = NV_3D0_WINDOW;
424             quirk->data.address_val = data & quirk->data.address_mask;
425         }
426         break;
427     case NV_3D0_WINDOW:
428         quirk->data.flags = NV_3D0_NONE;
429         if (addr == quirk->data.address_offset) {
430             if (data == 0x538) {
431                 quirk->data.flags = NV_3D0_READ;
432             } else if (data == 0x738) {
433                 quirk->data.flags = NV_3D0_WRITE;
434             }
435         }
436         break;
437     case NV_3D0_WRITE:
438         quirk->data.flags = NV_3D0_NONE;
439         if (addr == quirk->data.data_offset) {
440             vfio_pci_write_config(pdev, quirk->data.address_val, data, size);
441             trace_vfio_nvidia_3d0_quirk_write(data, size);
442             return;
443         }
444         break;
445     }
446 
447     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
448                    addr + quirk->data.base_offset, data, size);
449 }
450 
451 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
452     .read = vfio_nvidia_3d0_quirk_read,
453     .write = vfio_nvidia_3d0_quirk_write,
454     .endianness = DEVICE_LITTLE_ENDIAN,
455 };
456 
457 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
458 {
459     PCIDevice *pdev = &vdev->pdev;
460     VFIOQuirk *quirk;
461 
462     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
463         !vdev->bars[1].region.size) {
464         return;
465     }
466 
467     quirk = g_malloc0(sizeof(*quirk));
468     quirk->vdev = vdev;
469     quirk->data.base_offset = 0x10;
470     quirk->data.address_offset = 4;
471     quirk->data.address_size = 2;
472     quirk->data.address_match = 0x1800;
473     quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
474     quirk->data.data_offset = 0;
475     quirk->data.data_size = 4;
476 
477     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk,
478                           quirk, "vfio-nvidia-3d0-quirk", 6);
479     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
480                                 quirk->data.base_offset, &quirk->mem);
481 
482     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
483                       quirk, next);
484 
485     trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name);
486 }
487 
488 /*
489  * The second quirk is documented in envytools.  The I/O port BAR5 is just
490  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
491  * again BAR0.  This backdoor is apparently a bit newer than the one above
492  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
493  * space, including extended space is available at the 4k @0x88000.
494  */
495 enum {
496     NV_BAR5_ADDRESS = 0x1,
497     NV_BAR5_ENABLE = 0x2,
498     NV_BAR5_MASTER = 0x4,
499     NV_BAR5_VALID = 0x7,
500 };
501 
502 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
503                                                 uint64_t data, unsigned size)
504 {
505     VFIOQuirk *quirk = opaque;
506 
507     switch (addr) {
508     case 0x0:
509         if (data & 0x1) {
510             quirk->data.flags |= NV_BAR5_MASTER;
511         } else {
512             quirk->data.flags &= ~NV_BAR5_MASTER;
513         }
514         break;
515     case 0x4:
516         if (data & 0x1) {
517             quirk->data.flags |= NV_BAR5_ENABLE;
518         } else {
519             quirk->data.flags &= ~NV_BAR5_ENABLE;
520         }
521         break;
522     case 0x8:
523         if (quirk->data.flags & NV_BAR5_MASTER) {
524             if ((data & ~0xfff) == 0x88000) {
525                 quirk->data.flags |= NV_BAR5_ADDRESS;
526                 quirk->data.address_val = data & 0xfff;
527             } else if ((data & ~0xff) == 0x1800) {
528                 quirk->data.flags |= NV_BAR5_ADDRESS;
529                 quirk->data.address_val = data & 0xff;
530             } else {
531                 quirk->data.flags &= ~NV_BAR5_ADDRESS;
532             }
533         }
534         break;
535     }
536 
537     vfio_generic_window_quirk_write(opaque, addr, data, size);
538 }
539 
540 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
541     .read = vfio_generic_window_quirk_read,
542     .write = vfio_nvidia_bar5_window_quirk_write,
543     .valid.min_access_size = 4,
544     .endianness = DEVICE_LITTLE_ENDIAN,
545 };
546 
547 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
548 {
549     PCIDevice *pdev = &vdev->pdev;
550     VFIOQuirk *quirk;
551 
552     if (!vdev->has_vga || nr != 5 ||
553         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
554         return;
555     }
556 
557     quirk = g_malloc0(sizeof(*quirk));
558     quirk->vdev = vdev;
559     quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID;
560     quirk->data.address_offset = 0x8;
561     quirk->data.address_size = 0; /* actually 4, but avoids generic code */
562     quirk->data.data_offset = 0xc;
563     quirk->data.data_size = 4;
564     quirk->data.bar = nr;
565 
566     memory_region_init_io(&quirk->mem, OBJECT(vdev),
567                           &vfio_nvidia_bar5_window_quirk, quirk,
568                           "vfio-nvidia-bar5-window-quirk", 16);
569     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
570                                         0, &quirk->mem, 1);
571 
572     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
573 
574     trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
575 }
576 
577 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
578                                           uint64_t data, unsigned size)
579 {
580     VFIOQuirk *quirk = opaque;
581     VFIOPCIDevice *vdev = quirk->vdev;
582     PCIDevice *pdev = &vdev->pdev;
583     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
584 
585     vfio_generic_quirk_write(opaque, addr, data, size);
586 
587     /*
588      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
589      * MSI capability ID register.  Both the ID and next register are
590      * read-only, so we allow writes covering either of those to real hw.
591      * NB - only fixed for the 0x88000 MMIO window.
592      */
593     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
594         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
595         vfio_region_write(&vdev->bars[quirk->data.bar].region,
596                           addr + base, data, size);
597     }
598 }
599 
600 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
601     .read = vfio_generic_quirk_read,
602     .write = vfio_nvidia_88000_quirk_write,
603     .endianness = DEVICE_LITTLE_ENDIAN,
604 };
605 
606 /*
607  * Finally, BAR0 itself.  We want to redirect any accesses to either
608  * 0x1800 or 0x88000 through the PCI config space access functions.
609  *
610  * NB - quirk at a page granularity or else they don't seem to work when
611  *      BARs are mmap'd
612  *
613  * Here's offset 0x88000...
614  */
615 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
616 {
617     PCIDevice *pdev = &vdev->pdev;
618     VFIOQuirk *quirk;
619     uint16_t vendor, class;
620 
621     vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
622     class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
623 
624     if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
625         class != PCI_CLASS_DISPLAY_VGA) {
626         return;
627     }
628 
629     quirk = g_malloc0(sizeof(*quirk));
630     quirk->vdev = vdev;
631     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
632     quirk->data.address_match = 0x88000;
633     quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
634     quirk->data.bar = nr;
635 
636     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
637                           quirk, "vfio-nvidia-bar0-88000-quirk",
638                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
639     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
640                           quirk->data.address_match & TARGET_PAGE_MASK,
641                           &quirk->mem, 1);
642 
643     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
644 
645     trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
646 }
647 
648 /*
649  * And here's the same for BAR0 offset 0x1800...
650  */
651 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
652 {
653     PCIDevice *pdev = &vdev->pdev;
654     VFIOQuirk *quirk;
655 
656     if (!vdev->has_vga || nr != 0 ||
657         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
658         return;
659     }
660 
661     /* Log the chipset ID */
662     trace_vfio_probe_nvidia_bar0_1800_quirk_id(
663             (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
664             & 0xff);
665 
666     quirk = g_malloc0(sizeof(*quirk));
667     quirk->vdev = vdev;
668     quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
669     quirk->data.address_match = 0x1800;
670     quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
671     quirk->data.bar = nr;
672 
673     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
674                           "vfio-nvidia-bar0-1800-quirk",
675                           TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
676     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
677                           quirk->data.address_match & TARGET_PAGE_MASK,
678                           &quirk->mem, 1);
679 
680     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
681 
682     trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
683 }
684 
685 /*
686  * TODO - Some Nvidia devices provide config access to their companion HDA
687  * device and even to their parent bridge via these config space mirrors.
688  * Add quirks for those regions.
689  */
690 
691 #define PCI_VENDOR_ID_REALTEK 0x10ec
692 
693 /*
694  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
695  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
696  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
697  * when the "type" portion of the address register is set to 0x1.  This appears
698  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
699  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
700  * ignore because the MSI-X table should always be accessed as a dword (full
701  * mask).  Bits 0:11 is offset within the type.
702  *
703  * Example trace:
704  *
705  * Read from MSI-X table offset 0
706  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
707  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
708  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
709  *
710  * Write 0xfee00000 to MSI-X table offset 0
711  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
712  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
713  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
714  */
715 static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
716                                                hwaddr addr, unsigned size)
717 {
718     VFIOQuirk *quirk = opaque;
719     VFIOPCIDevice *vdev = quirk->vdev;
720     uint64_t val = 0;
721 
722     if (!quirk->data.flags) { /* Non-MSI-X table access */
723         return vfio_region_read(&vdev->bars[quirk->data.bar].region,
724                                 addr + 0x70, size);
725     }
726 
727     switch (addr) {
728     case 4: /* address */
729         val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */
730         break;
731     case 0: /* data */
732         if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
733             memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
734                                 (hwaddr)(quirk->data.address_match & 0xfff),
735                                 &val, size, MEMTXATTRS_UNSPECIFIED);
736         }
737         break;
738     }
739 
740     trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name,
741                                   addr ? "address" : "data", val);
742     return val;
743 }
744 
745 static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
746                                             uint64_t data, unsigned size)
747 {
748     VFIOQuirk *quirk = opaque;
749     VFIOPCIDevice *vdev = quirk->vdev;
750 
751     switch (addr) {
752     case 4: /* address */
753         if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
754             quirk->data.flags = 1; /* Activate reads */
755             quirk->data.address_match = data;
756 
757             trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data);
758 
759             if (data & 0x80000000U) { /* Do write */
760                 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
761                     hwaddr offset = data & 0xfff;
762                     uint64_t val = quirk->data.address_mask;
763 
764                     trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name,
765                                                   (uint16_t)offset, val);
766 
767                     /* Write to the proper guest MSI-X table instead */
768                     memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
769                                                  offset, val, size,
770                                                  MEMTXATTRS_UNSPECIFIED);
771                 }
772                 return; /* Do not write guest MSI-X data to hardware */
773             }
774         } else {
775             quirk->data.flags = 0; /* De-activate reads, non-MSI-X */
776         }
777         break;
778     case 0: /* data */
779         quirk->data.address_mask = data;
780         break;
781     }
782 
783     vfio_region_write(&vdev->bars[quirk->data.bar].region,
784                       addr + 0x70, data, size);
785 }
786 
787 static const MemoryRegionOps vfio_rtl8168_window_quirk = {
788     .read = vfio_rtl8168_window_quirk_read,
789     .write = vfio_rtl8168_window_quirk_write,
790     .valid = {
791         .min_access_size = 4,
792         .max_access_size = 4,
793         .unaligned = false,
794     },
795     .endianness = DEVICE_LITTLE_ENDIAN,
796 };
797 
798 static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr)
799 {
800     PCIDevice *pdev = &vdev->pdev;
801     VFIOQuirk *quirk;
802 
803     if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
804         pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
805         return;
806     }
807 
808     quirk = g_malloc0(sizeof(*quirk));
809     quirk->vdev = vdev;
810     quirk->data.bar = nr;
811 
812     memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
813                           quirk, "vfio-rtl8168-window-quirk", 8);
814     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
815                                         0x70, &quirk->mem, 1);
816 
817     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
818 
819     trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name);
820 }
821 
822 /*
823  * Common quirk probe entry points.
824  */
825 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
826 {
827     vfio_vga_probe_ati_3c3_quirk(vdev);
828     vfio_vga_probe_nvidia_3d0_quirk(vdev);
829 }
830 
831 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
832 {
833     VFIOQuirk *quirk;
834     int i;
835 
836     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
837         QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
838             memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
839         }
840     }
841 }
842 
843 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
844 {
845     int i;
846 
847     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
848         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
849             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
850             object_unparent(OBJECT(&quirk->mem));
851             QLIST_REMOVE(quirk, next);
852             g_free(quirk);
853         }
854     }
855 }
856 
857 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
858 {
859     vfio_probe_ati_bar4_window_quirk(vdev, nr);
860     vfio_probe_ati_bar2_4000_quirk(vdev, nr);
861     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
862     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
863     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
864     vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
865 }
866 
867 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
868 {
869     VFIOBAR *bar = &vdev->bars[nr];
870     VFIOQuirk *quirk;
871 
872     QLIST_FOREACH(quirk, &bar->quirks, next) {
873         memory_region_del_subregion(&bar->region.mem, &quirk->mem);
874     }
875 }
876 
877 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
878 {
879     VFIOBAR *bar = &vdev->bars[nr];
880 
881     while (!QLIST_EMPTY(&bar->quirks)) {
882         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
883         object_unparent(OBJECT(&quirk->mem));
884         QLIST_REMOVE(quirk, next);
885         g_free(quirk);
886     }
887 }
888