xref: /qemu/hw/vfio/pci-quirks.c (revision 954258a5f11b51abd1ceed7c96d1204d4cef1353)
1 /*
2  * device quirks for PCI devices
3  *
4  * Copyright Red Hat, Inc. 2012-2015
5  *
6  * Authors:
7  *  Alex Williamson <alex.williamson@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  */
12 
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
16 
17 #define PCI_ANY_ID (~0)
18 
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
21 {
22     PCIDevice *pdev = &vdev->pdev;
23 
24     return (vendor == PCI_ANY_ID ||
25             vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) &&
26            (device == PCI_ANY_ID ||
27             device == pci_get_word(pdev->config + PCI_DEVICE_ID));
28 }
29 
30 /*
31  * List of device ids/vendor ids for which to disable
32  * option rom loading. This avoids the guest hangs during rom
33  * execution as noticed with the BCM 57810 card for lack of a
34  * more better way to handle such issues.
35  * The  user can still override by specifying a romfile or
36  * rombar=1.
37  * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38  * for an analysis of the 57810 card hang. When adding
39  * a new vendor id/device id combination below, please also add
40  * your card/environment details and information that could
41  * help in debugging to the bug tracking this issue
42  */
43 static const struct {
44     uint32_t vendor;
45     uint32_t device;
46 } romblacklist[] = {
47     { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
48 };
49 
50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
51 {
52     int i;
53 
54     for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55         if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56             trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57                                              romblacklist[i].vendor,
58                                              romblacklist[i].device);
59             return true;
60         }
61     }
62     return false;
63 }
64 
65 /*
66  * Device specific quirks
67  */
68 
69 /* Is range1 fully contained within range2?  */
70 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
71                                  uint64_t first2, uint64_t len2) {
72     return (first1 >= first2 && first1 + len1 <= first2 + len2);
73 }
74 
75 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
76 {
77     return (mask && (flags & mask) == mask);
78 }
79 
80 static uint64_t vfio_generic_window_quirk_read(void *opaque,
81                                                hwaddr addr, unsigned size)
82 {
83     VFIOLegacyQuirk *quirk = opaque;
84     VFIOPCIDevice *vdev = quirk->vdev;
85     uint64_t data;
86 
87     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
88         ranges_overlap(addr, size,
89                        quirk->data.data_offset, quirk->data.data_size)) {
90         hwaddr offset = addr - quirk->data.data_offset;
91 
92         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
93                                   quirk->data.data_size)) {
94             hw_error("%s: window data read not fully contained: %s",
95                      __func__, memory_region_name(quirk->mem));
96         }
97 
98         data = vfio_pci_read_config(&vdev->pdev,
99                                     quirk->data.address_val + offset, size);
100 
101         trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem),
102                                              vdev->vbasedev.name,
103                                              quirk->data.bar,
104                                              addr, size, data);
105     } else {
106         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
107                                 addr + quirk->data.base_offset, size);
108     }
109 
110     return data;
111 }
112 
113 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
114                                             uint64_t data, unsigned size)
115 {
116     VFIOLegacyQuirk *quirk = opaque;
117     VFIOPCIDevice *vdev = quirk->vdev;
118 
119     if (ranges_overlap(addr, size,
120                        quirk->data.address_offset, quirk->data.address_size)) {
121 
122         if (addr != quirk->data.address_offset) {
123             hw_error("%s: offset write into address window: %s",
124                      __func__, memory_region_name(quirk->mem));
125         }
126 
127         if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
128             quirk->data.flags |= quirk->data.write_flags |
129                                  quirk->data.read_flags;
130             quirk->data.address_val = data & quirk->data.address_mask;
131         } else {
132             quirk->data.flags &= ~(quirk->data.write_flags |
133                                    quirk->data.read_flags);
134         }
135     }
136 
137     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
138         ranges_overlap(addr, size,
139                        quirk->data.data_offset, quirk->data.data_size)) {
140         hwaddr offset = addr - quirk->data.data_offset;
141 
142         if (!vfio_range_contained(addr, size, quirk->data.data_offset,
143                                   quirk->data.data_size)) {
144             hw_error("%s: window data write not fully contained: %s",
145                      __func__, memory_region_name(quirk->mem));
146         }
147 
148         vfio_pci_write_config(&vdev->pdev,
149                               quirk->data.address_val + offset, data, size);
150         trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem),
151                                               vdev->vbasedev.name,
152                                               quirk->data.bar,
153                                               addr, data, size);
154         return;
155     }
156 
157     vfio_region_write(&vdev->bars[quirk->data.bar].region,
158                    addr + quirk->data.base_offset, data, size);
159 }
160 
161 static const MemoryRegionOps vfio_generic_window_quirk = {
162     .read = vfio_generic_window_quirk_read,
163     .write = vfio_generic_window_quirk_write,
164     .endianness = DEVICE_LITTLE_ENDIAN,
165 };
166 
167 static uint64_t vfio_generic_quirk_read(void *opaque,
168                                         hwaddr addr, unsigned size)
169 {
170     VFIOLegacyQuirk *quirk = opaque;
171     VFIOPCIDevice *vdev = quirk->vdev;
172     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
173     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
174     uint64_t data;
175 
176     if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
177         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
178         if (!vfio_range_contained(addr, size, offset,
179                                   quirk->data.address_mask + 1)) {
180             hw_error("%s: read not fully contained: %s",
181                      __func__, memory_region_name(quirk->mem));
182         }
183 
184         data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
185 
186         trace_vfio_generic_quirk_read(memory_region_name(quirk->mem),
187                                       vdev->vbasedev.name, quirk->data.bar,
188                                       addr + base, size, data);
189     } else {
190         data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
191                                 addr + base, size);
192     }
193 
194     return data;
195 }
196 
197 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
198                                      uint64_t data, unsigned size)
199 {
200     VFIOLegacyQuirk *quirk = opaque;
201     VFIOPCIDevice *vdev = quirk->vdev;
202     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
203     hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
204 
205     if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
206         ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
207         if (!vfio_range_contained(addr, size, offset,
208                                   quirk->data.address_mask + 1)) {
209             hw_error("%s: write not fully contained: %s",
210                      __func__, memory_region_name(quirk->mem));
211         }
212 
213         vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
214 
215         trace_vfio_generic_quirk_write(memory_region_name(quirk->mem),
216                                        vdev->vbasedev.name, quirk->data.bar,
217                                        addr + base, data, size);
218     } else {
219         vfio_region_write(&vdev->bars[quirk->data.bar].region,
220                           addr + base, data, size);
221     }
222 }
223 
224 static const MemoryRegionOps vfio_generic_quirk = {
225     .read = vfio_generic_quirk_read,
226     .write = vfio_generic_quirk_write,
227     .endianness = DEVICE_LITTLE_ENDIAN,
228 };
229 
230 #define PCI_VENDOR_ID_ATI               0x1002
231 
232 /*
233  * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
234  * through VGA register 0x3c3.  On newer cards, the I/O port BAR is always
235  * BAR4 (older cards like the X550 used BAR1, but we don't care to support
236  * those).  Note that on bare metal, a read of 0x3c3 doesn't always return the
237  * I/O port BAR address.  Originally this was coded to return the virtual BAR
238  * address only if the physical register read returns the actual BAR address,
239  * but users have reported greater success if we return the virtual address
240  * unconditionally.
241  */
242 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
243                                         hwaddr addr, unsigned size)
244 {
245     VFIOPCIDevice *vdev = opaque;
246     uint64_t data = vfio_pci_read_config(&vdev->pdev,
247                                          PCI_BASE_ADDRESS_4 + 1, size);
248 
249     trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
250 
251     return data;
252 }
253 
254 static const MemoryRegionOps vfio_ati_3c3_quirk = {
255     .read = vfio_ati_3c3_quirk_read,
256     .endianness = DEVICE_LITTLE_ENDIAN,
257 };
258 
259 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
260 {
261     VFIOQuirk *quirk;
262 
263     /*
264      * As long as the BAR is >= 256 bytes it will be aligned such that the
265      * lower byte is always zero.  Filter out anything else, if it exists.
266      */
267     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
268         !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
269         return;
270     }
271 
272     quirk = g_malloc0(sizeof(*quirk));
273     quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
274     quirk->nr_mem = 1;
275 
276     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
277                           "vfio-ati-3c3-quirk", 1);
278     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
279                                 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
280 
281     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
282                       quirk, next);
283 
284     trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
285 }
286 
287 /*
288  * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
289  * config space through MMIO BAR2 at offset 0x4000.  Nothing seems to access
290  * the MMIO space directly, but a window to this space is provided through
291  * I/O port BAR4.  Offset 0x0 is the address register and offset 0x4 is the
292  * data register.  When the address is programmed to a range of 0x4000-0x4fff
293  * PCI configuration space is available.  Experimentation seems to indicate
294  * that only read-only access is provided, but we drop writes when the window
295  * is enabled to config space nonetheless.
296  */
297 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
298 {
299     PCIDevice *pdev = &vdev->pdev;
300     VFIOQuirk *quirk;
301     VFIOLegacyQuirk *legacy;
302 
303     if (!vdev->has_vga || nr != 4 ||
304         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
305         return;
306     }
307 
308     quirk = g_malloc0(sizeof(*quirk));
309     quirk->data = legacy = g_malloc0(sizeof(*legacy));
310     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
311     quirk->nr_mem = 1;
312     legacy->vdev = vdev;
313     legacy->data.address_size = 4;
314     legacy->data.data_offset = 4;
315     legacy->data.data_size = 4;
316     legacy->data.address_match = 0x4000;
317     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
318     legacy->data.bar = nr;
319     legacy->data.read_flags = legacy->data.write_flags = 1;
320 
321     memory_region_init_io(quirk->mem, OBJECT(vdev),
322                           &vfio_generic_window_quirk, legacy,
323                           "vfio-ati-bar4-window-quirk", 8);
324     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
325                           legacy->data.base_offset, quirk->mem, 1);
326 
327     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
328 
329     trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
330 }
331 
332 /*
333  * Trap the BAR2 MMIO window to config space as well.
334  */
335 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
336 {
337     PCIDevice *pdev = &vdev->pdev;
338     VFIOQuirk *quirk;
339     VFIOLegacyQuirk *legacy;
340 
341     /* Only enable on newer devices where BAR2 is 64bit */
342     if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
343         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
344         return;
345     }
346 
347     quirk = g_malloc0(sizeof(*quirk));
348     quirk->data = legacy = g_malloc0(sizeof(*legacy));
349     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
350     quirk->nr_mem = 1;
351     legacy->vdev = vdev;
352     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
353     legacy->data.address_match = 0x4000;
354     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
355     legacy->data.bar = nr;
356 
357     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
358                           "vfio-ati-bar2-4000-quirk",
359                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
360     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
361                           legacy->data.address_match & TARGET_PAGE_MASK,
362                           quirk->mem, 1);
363 
364     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
365 
366     trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
367 }
368 
369 /*
370  * Older ATI/AMD cards like the X550 have a similar window to that above.
371  * I/O port BAR1 provides a window to a mirror of PCI config space located
372  * in BAR2 at offset 0xf00.  We don't care to support such older cards, but
373  * note it for future reference.
374  */
375 
376 #define PCI_VENDOR_ID_NVIDIA                    0x10de
377 
378 /*
379  * Nvidia has several different methods to get to config space, the
380  * nouveu project has several of these documented here:
381  * https://github.com/pathscale/envytools/tree/master/hwdocs
382  *
383  * The first quirk is actually not documented in envytools and is found
384  * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]).  This is an
385  * NV46 chipset.  The backdoor uses the legacy VGA I/O ports to access
386  * the mirror of PCI config space found at BAR0 offset 0x1800.  The access
387  * sequence first writes 0x338 to I/O port 0x3d4.  The target offset is
388  * then written to 0x3d0.  Finally 0x538 is written for a read and 0x738
389  * is written for a write to 0x3d4.  The BAR0 offset is then accessible
390  * through 0x3d0.  This quirk doesn't seem to be necessary on newer cards
391  * that use the I/O port BAR5 window but it doesn't hurt to leave it.
392  */
393 typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
394 static const char *nv3d0_states[] = { "NONE", "SELECT",
395                                       "WINDOW", "READ", "WRITE" };
396 
397 typedef struct VFIONvidia3d0Quirk {
398     VFIOPCIDevice *vdev;
399     VFIONvidia3d0State state;
400     uint32_t offset;
401 } VFIONvidia3d0Quirk;
402 
403 static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
404                                            hwaddr addr, unsigned size)
405 {
406     VFIONvidia3d0Quirk *quirk = opaque;
407     VFIOPCIDevice *vdev = quirk->vdev;
408 
409     quirk->state = NONE;
410 
411     return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
412                          addr + 0x14, size);
413 }
414 
415 static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
416                                         uint64_t data, unsigned size)
417 {
418     VFIONvidia3d0Quirk *quirk = opaque;
419     VFIOPCIDevice *vdev = quirk->vdev;
420     VFIONvidia3d0State old_state = quirk->state;
421 
422     quirk->state = NONE;
423 
424     switch (data) {
425     case 0x338:
426         if (old_state == NONE) {
427             quirk->state = SELECT;
428             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
429                                               nv3d0_states[quirk->state]);
430         }
431         break;
432     case 0x538:
433         if (old_state == WINDOW) {
434             quirk->state = READ;
435             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
436                                               nv3d0_states[quirk->state]);
437         }
438         break;
439     case 0x738:
440         if (old_state == WINDOW) {
441             quirk->state = WRITE;
442             trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
443                                               nv3d0_states[quirk->state]);
444         }
445         break;
446     }
447 
448     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
449                    addr + 0x14, data, size);
450 }
451 
452 static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
453     .read = vfio_nvidia_3d4_quirk_read,
454     .write = vfio_nvidia_3d4_quirk_write,
455     .endianness = DEVICE_LITTLE_ENDIAN,
456 };
457 
458 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
459                                            hwaddr addr, unsigned size)
460 {
461     VFIONvidia3d0Quirk *quirk = opaque;
462     VFIOPCIDevice *vdev = quirk->vdev;
463     VFIONvidia3d0State old_state = quirk->state;
464     uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
465                                   addr + 0x10, size);
466 
467     quirk->state = NONE;
468 
469     if (old_state == READ &&
470         (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
471         uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
472 
473         data = vfio_pci_read_config(&vdev->pdev, offset, size);
474         trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
475                                          offset, size, data);
476     }
477 
478     return data;
479 }
480 
481 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
482                                         uint64_t data, unsigned size)
483 {
484     VFIONvidia3d0Quirk *quirk = opaque;
485     VFIOPCIDevice *vdev = quirk->vdev;
486     VFIONvidia3d0State old_state = quirk->state;
487 
488     quirk->state = NONE;
489 
490     if (old_state == SELECT) {
491         quirk->offset = (uint32_t)data;
492         quirk->state = WINDOW;
493         trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
494                                           nv3d0_states[quirk->state]);
495     } else if (old_state == WRITE) {
496         if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
497             uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
498 
499             vfio_pci_write_config(&vdev->pdev, offset, data, size);
500             trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
501                                               offset, data, size);
502             return;
503         }
504     }
505 
506     vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
507                    addr + 0x10, data, size);
508 }
509 
510 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
511     .read = vfio_nvidia_3d0_quirk_read,
512     .write = vfio_nvidia_3d0_quirk_write,
513     .endianness = DEVICE_LITTLE_ENDIAN,
514 };
515 
516 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
517 {
518     VFIOQuirk *quirk;
519     VFIONvidia3d0Quirk *data;
520 
521     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
522         !vdev->bars[1].region.size) {
523         return;
524     }
525 
526     quirk = g_malloc0(sizeof(*quirk));
527     quirk->data = data = g_malloc0(sizeof(*data));
528     quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
529     quirk->nr_mem = 2;
530     data->vdev = vdev;
531 
532     memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
533                           data, "vfio-nvidia-3d4-quirk", 2);
534     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
535                                 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
536 
537     memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
538                           data, "vfio-nvidia-3d0-quirk", 2);
539     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
540                                 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
541 
542     QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
543                       quirk, next);
544 
545     trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
546 }
547 
548 /*
549  * The second quirk is documented in envytools.  The I/O port BAR5 is just
550  * a set of address/data ports to the MMIO BARs.  The BAR we care about is
551  * again BAR0.  This backdoor is apparently a bit newer than the one above
552  * so we need to not only trap 256 bytes @0x1800, but all of PCI config
553  * space, including extended space is available at the 4k @0x88000.
554  */
555 enum {
556     NV_BAR5_ADDRESS = 0x1,
557     NV_BAR5_ENABLE = 0x2,
558     NV_BAR5_MASTER = 0x4,
559     NV_BAR5_VALID = 0x7,
560 };
561 
562 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
563                                                 uint64_t data, unsigned size)
564 {
565     VFIOLegacyQuirk *quirk = opaque;
566 
567     switch (addr) {
568     case 0x0:
569         if (data & 0x1) {
570             quirk->data.flags |= NV_BAR5_MASTER;
571         } else {
572             quirk->data.flags &= ~NV_BAR5_MASTER;
573         }
574         break;
575     case 0x4:
576         if (data & 0x1) {
577             quirk->data.flags |= NV_BAR5_ENABLE;
578         } else {
579             quirk->data.flags &= ~NV_BAR5_ENABLE;
580         }
581         break;
582     case 0x8:
583         if (quirk->data.flags & NV_BAR5_MASTER) {
584             if ((data & ~0xfff) == 0x88000) {
585                 quirk->data.flags |= NV_BAR5_ADDRESS;
586                 quirk->data.address_val = data & 0xfff;
587             } else if ((data & ~0xff) == 0x1800) {
588                 quirk->data.flags |= NV_BAR5_ADDRESS;
589                 quirk->data.address_val = data & 0xff;
590             } else {
591                 quirk->data.flags &= ~NV_BAR5_ADDRESS;
592             }
593         }
594         break;
595     }
596 
597     vfio_generic_window_quirk_write(opaque, addr, data, size);
598 }
599 
600 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
601     .read = vfio_generic_window_quirk_read,
602     .write = vfio_nvidia_bar5_window_quirk_write,
603     .valid.min_access_size = 4,
604     .endianness = DEVICE_LITTLE_ENDIAN,
605 };
606 
607 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
608 {
609     PCIDevice *pdev = &vdev->pdev;
610     VFIOQuirk *quirk;
611     VFIOLegacyQuirk *legacy;
612 
613     if (!vdev->has_vga || nr != 5 ||
614         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
615         return;
616     }
617 
618     quirk = g_malloc0(sizeof(*quirk));
619     quirk->data = legacy = g_malloc0(sizeof(*legacy));
620     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
621     quirk->nr_mem = 1;
622     legacy->vdev = vdev;
623     legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID;
624     legacy->data.address_offset = 0x8;
625     legacy->data.address_size = 0; /* actually 4, but avoids generic code */
626     legacy->data.data_offset = 0xc;
627     legacy->data.data_size = 4;
628     legacy->data.bar = nr;
629 
630     memory_region_init_io(quirk->mem, OBJECT(vdev),
631                           &vfio_nvidia_bar5_window_quirk, legacy,
632                           "vfio-nvidia-bar5-window-quirk", 16);
633     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
634                                         0, quirk->mem, 1);
635 
636     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
637 
638     trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
639 }
640 
641 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
642                                           uint64_t data, unsigned size)
643 {
644     VFIOLegacyQuirk *quirk = opaque;
645     VFIOPCIDevice *vdev = quirk->vdev;
646     PCIDevice *pdev = &vdev->pdev;
647     hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
648 
649     vfio_generic_quirk_write(opaque, addr, data, size);
650 
651     /*
652      * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
653      * MSI capability ID register.  Both the ID and next register are
654      * read-only, so we allow writes covering either of those to real hw.
655      * NB - only fixed for the 0x88000 MMIO window.
656      */
657     if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
658         vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
659         vfio_region_write(&vdev->bars[quirk->data.bar].region,
660                           addr + base, data, size);
661     }
662 }
663 
664 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
665     .read = vfio_generic_quirk_read,
666     .write = vfio_nvidia_88000_quirk_write,
667     .endianness = DEVICE_LITTLE_ENDIAN,
668 };
669 
670 /*
671  * Finally, BAR0 itself.  We want to redirect any accesses to either
672  * 0x1800 or 0x88000 through the PCI config space access functions.
673  *
674  * NB - quirk at a page granularity or else they don't seem to work when
675  *      BARs are mmap'd
676  *
677  * Here's offset 0x88000...
678  */
679 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
680 {
681     PCIDevice *pdev = &vdev->pdev;
682     VFIOQuirk *quirk;
683     VFIOLegacyQuirk *legacy;
684     uint16_t vendor, class;
685 
686     vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
687     class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
688 
689     if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
690         class != PCI_CLASS_DISPLAY_VGA) {
691         return;
692     }
693 
694     quirk = g_malloc0(sizeof(*quirk));
695     quirk->data = legacy = g_malloc0(sizeof(*legacy));
696     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
697     quirk->nr_mem = 1;
698     legacy->vdev = vdev;
699     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
700     legacy->data.address_match = 0x88000;
701     legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
702     legacy->data.bar = nr;
703 
704     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
705                           legacy, "vfio-nvidia-bar0-88000-quirk",
706                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
707     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
708                           legacy->data.address_match & TARGET_PAGE_MASK,
709                           quirk->mem, 1);
710 
711     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
712 
713     trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
714 }
715 
716 /*
717  * And here's the same for BAR0 offset 0x1800...
718  */
719 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
720 {
721     PCIDevice *pdev = &vdev->pdev;
722     VFIOQuirk *quirk;
723     VFIOLegacyQuirk *legacy;
724 
725     if (!vdev->has_vga || nr != 0 ||
726         pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
727         return;
728     }
729 
730     /* Log the chipset ID */
731     trace_vfio_probe_nvidia_bar0_1800_quirk_id(
732             (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
733             & 0xff);
734 
735     quirk = g_malloc0(sizeof(*quirk));
736     quirk->data = legacy = g_malloc0(sizeof(*legacy));
737     quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
738     quirk->nr_mem = 1;
739     legacy->vdev = vdev;
740     legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
741     legacy->data.address_match = 0x1800;
742     legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
743     legacy->data.bar = nr;
744 
745     memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
746                           "vfio-nvidia-bar0-1800-quirk",
747                           TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
748     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
749                           legacy->data.address_match & TARGET_PAGE_MASK,
750                           quirk->mem, 1);
751 
752     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
753 
754     trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
755 }
756 
757 /*
758  * TODO - Some Nvidia devices provide config access to their companion HDA
759  * device and even to their parent bridge via these config space mirrors.
760  * Add quirks for those regions.
761  */
762 
763 #define PCI_VENDOR_ID_REALTEK 0x10ec
764 
765 /*
766  * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
767  * offset 0x70 there is a dword data register, offset 0x74 is a dword address
768  * register.  According to the Linux r8169 driver, the MSI-X table is addressed
769  * when the "type" portion of the address register is set to 0x1.  This appears
770  * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
771  * "address latched" indicator.  Bits 12:15 are a mask field, which we can
772  * ignore because the MSI-X table should always be accessed as a dword (full
773  * mask).  Bits 0:11 is offset within the type.
774  *
775  * Example trace:
776  *
777  * Read from MSI-X table offset 0
778  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
779  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
780  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
781  *
782  * Write 0xfee00000 to MSI-X table offset 0
783  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
784  * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
785  * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
786  */
787 typedef struct VFIOrtl8168Quirk {
788     VFIOPCIDevice *vdev;
789     uint32_t addr;
790     uint32_t data;
791     bool enabled;
792 } VFIOrtl8168Quirk;
793 
794 static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
795                                                 hwaddr addr, unsigned size)
796 {
797     VFIOrtl8168Quirk *rtl = opaque;
798     VFIOPCIDevice *vdev = rtl->vdev;
799     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
800 
801     if (rtl->enabled) {
802         data = rtl->addr ^ 0x80000000U; /* latch/complete */
803         trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
804     }
805 
806     return data;
807 }
808 
809 static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
810                                              uint64_t data, unsigned size)
811 {
812     VFIOrtl8168Quirk *rtl = opaque;
813     VFIOPCIDevice *vdev = rtl->vdev;
814 
815     rtl->enabled = false;
816 
817     if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
818         rtl->enabled = true;
819         rtl->addr = (uint32_t)data;
820 
821         if (data & 0x80000000U) { /* Do write */
822             if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
823                 hwaddr offset = data & 0xfff;
824                 uint64_t val = rtl->data;
825 
826                 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
827                                                     (uint16_t)offset, val);
828 
829                 /* Write to the proper guest MSI-X table instead */
830                 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
831                                              offset, val, size,
832                                              MEMTXATTRS_UNSPECIFIED);
833             }
834             return; /* Do not write guest MSI-X data to hardware */
835         }
836     }
837 
838     vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
839 }
840 
841 static const MemoryRegionOps vfio_rtl_address_quirk = {
842     .read = vfio_rtl8168_quirk_address_read,
843     .write = vfio_rtl8168_quirk_address_write,
844     .valid = {
845         .min_access_size = 4,
846         .max_access_size = 4,
847         .unaligned = false,
848     },
849     .endianness = DEVICE_LITTLE_ENDIAN,
850 };
851 
852 static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
853                                              hwaddr addr, unsigned size)
854 {
855     VFIOrtl8168Quirk *rtl = opaque;
856     VFIOPCIDevice *vdev = rtl->vdev;
857     uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
858 
859     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
860         hwaddr offset = rtl->addr & 0xfff;
861         memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
862                                     &data, size, MEMTXATTRS_UNSPECIFIED);
863         trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
864     }
865 
866     return data;
867 }
868 
869 static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
870                                           uint64_t data, unsigned size)
871 {
872     VFIOrtl8168Quirk *rtl = opaque;
873     VFIOPCIDevice *vdev = rtl->vdev;
874 
875     rtl->data = (uint32_t)data;
876 
877     vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
878 }
879 
880 static const MemoryRegionOps vfio_rtl_data_quirk = {
881     .read = vfio_rtl8168_quirk_data_read,
882     .write = vfio_rtl8168_quirk_data_write,
883     .valid = {
884         .min_access_size = 4,
885         .max_access_size = 4,
886         .unaligned = false,
887     },
888     .endianness = DEVICE_LITTLE_ENDIAN,
889 };
890 
891 static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
892 {
893     VFIOQuirk *quirk;
894     VFIOrtl8168Quirk *rtl;
895 
896     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
897         return;
898     }
899 
900     quirk = g_malloc0(sizeof(*quirk));
901     quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
902     quirk->nr_mem = 2;
903     quirk->data = rtl = g_malloc0(sizeof(*rtl));
904     rtl->vdev = vdev;
905 
906     memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
907                           &vfio_rtl_address_quirk, rtl,
908                           "vfio-rtl8168-window-address-quirk", 4);
909     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
910                                         0x74, &quirk->mem[0], 1);
911 
912     memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
913                           &vfio_rtl_data_quirk, rtl,
914                           "vfio-rtl8168-window-data-quirk", 4);
915     memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
916                                         0x70, &quirk->mem[1], 1);
917 
918     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
919 
920     trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
921 }
922 
923 /*
924  * Common quirk probe entry points.
925  */
926 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
927 {
928     vfio_vga_probe_ati_3c3_quirk(vdev);
929     vfio_vga_probe_nvidia_3d0_quirk(vdev);
930 }
931 
932 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
933 {
934     VFIOQuirk *quirk;
935     int i, j;
936 
937     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
938         QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
939             for (j = 0; j < quirk->nr_mem; j++) {
940                 memory_region_del_subregion(&vdev->vga.region[i].mem,
941                                             &quirk->mem[j]);
942             }
943         }
944     }
945 }
946 
947 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
948 {
949     int i, j;
950 
951     for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
952         while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
953             VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
954             QLIST_REMOVE(quirk, next);
955             for (j = 0; j < quirk->nr_mem; j++) {
956                 object_unparent(OBJECT(&quirk->mem[j]));
957             }
958             g_free(quirk->mem);
959             g_free(quirk->data);
960             g_free(quirk);
961         }
962     }
963 }
964 
965 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
966 {
967     vfio_probe_ati_bar4_window_quirk(vdev, nr);
968     vfio_probe_ati_bar2_4000_quirk(vdev, nr);
969     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
970     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
971     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
972     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
973 }
974 
975 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
976 {
977     VFIOBAR *bar = &vdev->bars[nr];
978     VFIOQuirk *quirk;
979     int i;
980 
981     QLIST_FOREACH(quirk, &bar->quirks, next) {
982         for (i = 0; i < quirk->nr_mem; i++) {
983             memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
984         }
985     }
986 }
987 
988 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
989 {
990     VFIOBAR *bar = &vdev->bars[nr];
991     int i;
992 
993     while (!QLIST_EMPTY(&bar->quirks)) {
994         VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
995         QLIST_REMOVE(quirk, next);
996         for (i = 0; i < quirk->nr_mem; i++) {
997             object_unparent(OBJECT(&quirk->mem[i]));
998         }
999         g_free(quirk->mem);
1000         g_free(quirk->data);
1001         g_free(quirk);
1002     }
1003 }
1004