1 /* 2 * device quirks for PCI devices 3 * 4 * Copyright Red Hat, Inc. 2012-2015 5 * 6 * Authors: 7 * Alex Williamson <alex.williamson@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 */ 12 13 #include "pci.h" 14 #include "trace.h" 15 #include "qemu/range.h" 16 17 #define PCI_ANY_ID (~0) 18 19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ 20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device) 21 { 22 PCIDevice *pdev = &vdev->pdev; 23 24 return (vendor == PCI_ANY_ID || 25 vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) && 26 (device == PCI_ANY_ID || 27 device == pci_get_word(pdev->config + PCI_DEVICE_ID)); 28 } 29 30 /* 31 * List of device ids/vendor ids for which to disable 32 * option rom loading. This avoids the guest hangs during rom 33 * execution as noticed with the BCM 57810 card for lack of a 34 * more better way to handle such issues. 35 * The user can still override by specifying a romfile or 36 * rombar=1. 37 * Please see https://bugs.launchpad.net/qemu/+bug/1284874 38 * for an analysis of the 57810 card hang. When adding 39 * a new vendor id/device id combination below, please also add 40 * your card/environment details and information that could 41 * help in debugging to the bug tracking this issue 42 */ 43 static const struct { 44 uint32_t vendor; 45 uint32_t device; 46 } romblacklist[] = { 47 { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */ 48 }; 49 50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) 51 { 52 int i; 53 54 for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) { 55 if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) { 56 trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name, 57 romblacklist[i].vendor, 58 romblacklist[i].device); 59 return true; 60 } 61 } 62 return false; 63 } 64 65 /* 66 * Device specific quirks 67 */ 68 69 /* Is range1 fully contained within range2? */ 70 static bool vfio_range_contained(uint64_t first1, uint64_t len1, 71 uint64_t first2, uint64_t len2) { 72 return (first1 >= first2 && first1 + len1 <= first2 + len2); 73 } 74 75 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask) 76 { 77 return (mask && (flags & mask) == mask); 78 } 79 80 static uint64_t vfio_generic_window_quirk_read(void *opaque, 81 hwaddr addr, unsigned size) 82 { 83 VFIOLegacyQuirk *quirk = opaque; 84 VFIOPCIDevice *vdev = quirk->vdev; 85 uint64_t data; 86 87 if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && 88 ranges_overlap(addr, size, 89 quirk->data.data_offset, quirk->data.data_size)) { 90 hwaddr offset = addr - quirk->data.data_offset; 91 92 if (!vfio_range_contained(addr, size, quirk->data.data_offset, 93 quirk->data.data_size)) { 94 hw_error("%s: window data read not fully contained: %s", 95 __func__, memory_region_name(quirk->mem)); 96 } 97 98 data = vfio_pci_read_config(&vdev->pdev, 99 quirk->data.address_val + offset, size); 100 101 trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem), 102 vdev->vbasedev.name, 103 quirk->data.bar, 104 addr, size, data); 105 } else { 106 data = vfio_region_read(&vdev->bars[quirk->data.bar].region, 107 addr + quirk->data.base_offset, size); 108 } 109 110 return data; 111 } 112 113 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr, 114 uint64_t data, unsigned size) 115 { 116 VFIOLegacyQuirk *quirk = opaque; 117 VFIOPCIDevice *vdev = quirk->vdev; 118 119 if (ranges_overlap(addr, size, 120 quirk->data.address_offset, quirk->data.address_size)) { 121 122 if (addr != quirk->data.address_offset) { 123 hw_error("%s: offset write into address window: %s", 124 __func__, memory_region_name(quirk->mem)); 125 } 126 127 if ((data & ~quirk->data.address_mask) == quirk->data.address_match) { 128 quirk->data.flags |= quirk->data.write_flags | 129 quirk->data.read_flags; 130 quirk->data.address_val = data & quirk->data.address_mask; 131 } else { 132 quirk->data.flags &= ~(quirk->data.write_flags | 133 quirk->data.read_flags); 134 } 135 } 136 137 if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && 138 ranges_overlap(addr, size, 139 quirk->data.data_offset, quirk->data.data_size)) { 140 hwaddr offset = addr - quirk->data.data_offset; 141 142 if (!vfio_range_contained(addr, size, quirk->data.data_offset, 143 quirk->data.data_size)) { 144 hw_error("%s: window data write not fully contained: %s", 145 __func__, memory_region_name(quirk->mem)); 146 } 147 148 vfio_pci_write_config(&vdev->pdev, 149 quirk->data.address_val + offset, data, size); 150 trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem), 151 vdev->vbasedev.name, 152 quirk->data.bar, 153 addr, data, size); 154 return; 155 } 156 157 vfio_region_write(&vdev->bars[quirk->data.bar].region, 158 addr + quirk->data.base_offset, data, size); 159 } 160 161 static const MemoryRegionOps vfio_generic_window_quirk = { 162 .read = vfio_generic_window_quirk_read, 163 .write = vfio_generic_window_quirk_write, 164 .endianness = DEVICE_LITTLE_ENDIAN, 165 }; 166 167 static uint64_t vfio_generic_quirk_read(void *opaque, 168 hwaddr addr, unsigned size) 169 { 170 VFIOLegacyQuirk *quirk = opaque; 171 VFIOPCIDevice *vdev = quirk->vdev; 172 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; 173 hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; 174 uint64_t data; 175 176 if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) && 177 ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { 178 if (!vfio_range_contained(addr, size, offset, 179 quirk->data.address_mask + 1)) { 180 hw_error("%s: read not fully contained: %s", 181 __func__, memory_region_name(quirk->mem)); 182 } 183 184 data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); 185 186 trace_vfio_generic_quirk_read(memory_region_name(quirk->mem), 187 vdev->vbasedev.name, quirk->data.bar, 188 addr + base, size, data); 189 } else { 190 data = vfio_region_read(&vdev->bars[quirk->data.bar].region, 191 addr + base, size); 192 } 193 194 return data; 195 } 196 197 static void vfio_generic_quirk_write(void *opaque, hwaddr addr, 198 uint64_t data, unsigned size) 199 { 200 VFIOLegacyQuirk *quirk = opaque; 201 VFIOPCIDevice *vdev = quirk->vdev; 202 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; 203 hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK; 204 205 if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) && 206 ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) { 207 if (!vfio_range_contained(addr, size, offset, 208 quirk->data.address_mask + 1)) { 209 hw_error("%s: write not fully contained: %s", 210 __func__, memory_region_name(quirk->mem)); 211 } 212 213 vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); 214 215 trace_vfio_generic_quirk_write(memory_region_name(quirk->mem), 216 vdev->vbasedev.name, quirk->data.bar, 217 addr + base, data, size); 218 } else { 219 vfio_region_write(&vdev->bars[quirk->data.bar].region, 220 addr + base, data, size); 221 } 222 } 223 224 static const MemoryRegionOps vfio_generic_quirk = { 225 .read = vfio_generic_quirk_read, 226 .write = vfio_generic_quirk_write, 227 .endianness = DEVICE_LITTLE_ENDIAN, 228 }; 229 230 #define PCI_VENDOR_ID_ATI 0x1002 231 232 /* 233 * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR 234 * through VGA register 0x3c3. On newer cards, the I/O port BAR is always 235 * BAR4 (older cards like the X550 used BAR1, but we don't care to support 236 * those). Note that on bare metal, a read of 0x3c3 doesn't always return the 237 * I/O port BAR address. Originally this was coded to return the virtual BAR 238 * address only if the physical register read returns the actual BAR address, 239 * but users have reported greater success if we return the virtual address 240 * unconditionally. 241 */ 242 static uint64_t vfio_ati_3c3_quirk_read(void *opaque, 243 hwaddr addr, unsigned size) 244 { 245 VFIOPCIDevice *vdev = opaque; 246 uint64_t data = vfio_pci_read_config(&vdev->pdev, 247 PCI_BASE_ADDRESS_4 + 1, size); 248 249 trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data); 250 251 return data; 252 } 253 254 static const MemoryRegionOps vfio_ati_3c3_quirk = { 255 .read = vfio_ati_3c3_quirk_read, 256 .endianness = DEVICE_LITTLE_ENDIAN, 257 }; 258 259 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) 260 { 261 VFIOQuirk *quirk; 262 263 /* 264 * As long as the BAR is >= 256 bytes it will be aligned such that the 265 * lower byte is always zero. Filter out anything else, if it exists. 266 */ 267 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || 268 !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) { 269 return; 270 } 271 272 quirk = g_malloc0(sizeof(*quirk)); 273 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 274 quirk->nr_mem = 1; 275 276 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, 277 "vfio-ati-3c3-quirk", 1); 278 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, 279 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); 280 281 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, 282 quirk, next); 283 284 trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name); 285 } 286 287 /* 288 * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI 289 * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access 290 * the MMIO space directly, but a window to this space is provided through 291 * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the 292 * data register. When the address is programmed to a range of 0x4000-0x4fff 293 * PCI configuration space is available. Experimentation seems to indicate 294 * that only read-only access is provided, but we drop writes when the window 295 * is enabled to config space nonetheless. 296 */ 297 static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr) 298 { 299 PCIDevice *pdev = &vdev->pdev; 300 VFIOQuirk *quirk; 301 VFIOLegacyQuirk *legacy; 302 303 if (!vdev->has_vga || nr != 4 || 304 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { 305 return; 306 } 307 308 quirk = g_malloc0(sizeof(*quirk)); 309 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 310 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 311 quirk->nr_mem = 1; 312 legacy->vdev = vdev; 313 legacy->data.address_size = 4; 314 legacy->data.data_offset = 4; 315 legacy->data.data_size = 4; 316 legacy->data.address_match = 0x4000; 317 legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; 318 legacy->data.bar = nr; 319 legacy->data.read_flags = legacy->data.write_flags = 1; 320 321 memory_region_init_io(quirk->mem, OBJECT(vdev), 322 &vfio_generic_window_quirk, legacy, 323 "vfio-ati-bar4-window-quirk", 8); 324 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 325 legacy->data.base_offset, quirk->mem, 1); 326 327 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 328 329 trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name); 330 } 331 332 /* 333 * Trap the BAR2 MMIO window to config space as well. 334 */ 335 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr) 336 { 337 PCIDevice *pdev = &vdev->pdev; 338 VFIOQuirk *quirk; 339 VFIOLegacyQuirk *legacy; 340 341 /* Only enable on newer devices where BAR2 is 64bit */ 342 if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 || 343 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { 344 return; 345 } 346 347 quirk = g_malloc0(sizeof(*quirk)); 348 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 349 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 350 quirk->nr_mem = 1; 351 legacy->vdev = vdev; 352 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; 353 legacy->data.address_match = 0x4000; 354 legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; 355 legacy->data.bar = nr; 356 357 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, 358 "vfio-ati-bar2-4000-quirk", 359 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); 360 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 361 legacy->data.address_match & TARGET_PAGE_MASK, 362 quirk->mem, 1); 363 364 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 365 366 trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name); 367 } 368 369 /* 370 * Older ATI/AMD cards like the X550 have a similar window to that above. 371 * I/O port BAR1 provides a window to a mirror of PCI config space located 372 * in BAR2 at offset 0xf00. We don't care to support such older cards, but 373 * note it for future reference. 374 */ 375 376 #define PCI_VENDOR_ID_NVIDIA 0x10de 377 378 /* 379 * Nvidia has several different methods to get to config space, the 380 * nouveu project has several of these documented here: 381 * https://github.com/pathscale/envytools/tree/master/hwdocs 382 * 383 * The first quirk is actually not documented in envytools and is found 384 * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an 385 * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access 386 * the mirror of PCI config space found at BAR0 offset 0x1800. The access 387 * sequence first writes 0x338 to I/O port 0x3d4. The target offset is 388 * then written to 0x3d0. Finally 0x538 is written for a read and 0x738 389 * is written for a write to 0x3d4. The BAR0 offset is then accessible 390 * through 0x3d0. This quirk doesn't seem to be necessary on newer cards 391 * that use the I/O port BAR5 window but it doesn't hurt to leave it. 392 */ 393 enum { 394 NV_3D0_NONE = 0, 395 NV_3D0_SELECT, 396 NV_3D0_WINDOW, 397 NV_3D0_READ, 398 NV_3D0_WRITE, 399 }; 400 401 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, 402 hwaddr addr, unsigned size) 403 { 404 VFIOLegacyQuirk *quirk = opaque; 405 VFIOPCIDevice *vdev = quirk->vdev; 406 PCIDevice *pdev = &vdev->pdev; 407 uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], 408 addr + quirk->data.base_offset, size); 409 410 if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) { 411 data = vfio_pci_read_config(pdev, quirk->data.address_val, size); 412 trace_vfio_nvidia_3d0_quirk_read(size, data); 413 } 414 415 quirk->data.flags = NV_3D0_NONE; 416 417 return data; 418 } 419 420 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, 421 uint64_t data, unsigned size) 422 { 423 VFIOLegacyQuirk *quirk = opaque; 424 VFIOPCIDevice *vdev = quirk->vdev; 425 PCIDevice *pdev = &vdev->pdev; 426 427 switch (quirk->data.flags) { 428 case NV_3D0_NONE: 429 if (addr == quirk->data.address_offset && data == 0x338) { 430 quirk->data.flags = NV_3D0_SELECT; 431 } 432 break; 433 case NV_3D0_SELECT: 434 quirk->data.flags = NV_3D0_NONE; 435 if (addr == quirk->data.data_offset && 436 (data & ~quirk->data.address_mask) == quirk->data.address_match) { 437 quirk->data.flags = NV_3D0_WINDOW; 438 quirk->data.address_val = data & quirk->data.address_mask; 439 } 440 break; 441 case NV_3D0_WINDOW: 442 quirk->data.flags = NV_3D0_NONE; 443 if (addr == quirk->data.address_offset) { 444 if (data == 0x538) { 445 quirk->data.flags = NV_3D0_READ; 446 } else if (data == 0x738) { 447 quirk->data.flags = NV_3D0_WRITE; 448 } 449 } 450 break; 451 case NV_3D0_WRITE: 452 quirk->data.flags = NV_3D0_NONE; 453 if (addr == quirk->data.data_offset) { 454 vfio_pci_write_config(pdev, quirk->data.address_val, data, size); 455 trace_vfio_nvidia_3d0_quirk_write(data, size); 456 return; 457 } 458 break; 459 } 460 461 vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], 462 addr + quirk->data.base_offset, data, size); 463 } 464 465 static const MemoryRegionOps vfio_nvidia_3d0_quirk = { 466 .read = vfio_nvidia_3d0_quirk_read, 467 .write = vfio_nvidia_3d0_quirk_write, 468 .endianness = DEVICE_LITTLE_ENDIAN, 469 }; 470 471 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) 472 { 473 PCIDevice *pdev = &vdev->pdev; 474 VFIOQuirk *quirk; 475 VFIOLegacyQuirk *legacy; 476 477 if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA || 478 !vdev->bars[1].region.size) { 479 return; 480 } 481 482 quirk = g_malloc0(sizeof(*quirk)); 483 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 484 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 485 quirk->nr_mem = 1; 486 legacy->vdev = vdev; 487 legacy->data.base_offset = 0x10; 488 legacy->data.address_offset = 4; 489 legacy->data.address_size = 2; 490 legacy->data.address_match = 0x1800; 491 legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; 492 legacy->data.data_offset = 0; 493 legacy->data.data_size = 4; 494 495 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk, 496 legacy, "vfio-nvidia-3d0-quirk", 6); 497 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, 498 legacy->data.base_offset, quirk->mem); 499 500 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, 501 quirk, next); 502 503 trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name); 504 } 505 506 /* 507 * The second quirk is documented in envytools. The I/O port BAR5 is just 508 * a set of address/data ports to the MMIO BARs. The BAR we care about is 509 * again BAR0. This backdoor is apparently a bit newer than the one above 510 * so we need to not only trap 256 bytes @0x1800, but all of PCI config 511 * space, including extended space is available at the 4k @0x88000. 512 */ 513 enum { 514 NV_BAR5_ADDRESS = 0x1, 515 NV_BAR5_ENABLE = 0x2, 516 NV_BAR5_MASTER = 0x4, 517 NV_BAR5_VALID = 0x7, 518 }; 519 520 static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, 521 uint64_t data, unsigned size) 522 { 523 VFIOLegacyQuirk *quirk = opaque; 524 525 switch (addr) { 526 case 0x0: 527 if (data & 0x1) { 528 quirk->data.flags |= NV_BAR5_MASTER; 529 } else { 530 quirk->data.flags &= ~NV_BAR5_MASTER; 531 } 532 break; 533 case 0x4: 534 if (data & 0x1) { 535 quirk->data.flags |= NV_BAR5_ENABLE; 536 } else { 537 quirk->data.flags &= ~NV_BAR5_ENABLE; 538 } 539 break; 540 case 0x8: 541 if (quirk->data.flags & NV_BAR5_MASTER) { 542 if ((data & ~0xfff) == 0x88000) { 543 quirk->data.flags |= NV_BAR5_ADDRESS; 544 quirk->data.address_val = data & 0xfff; 545 } else if ((data & ~0xff) == 0x1800) { 546 quirk->data.flags |= NV_BAR5_ADDRESS; 547 quirk->data.address_val = data & 0xff; 548 } else { 549 quirk->data.flags &= ~NV_BAR5_ADDRESS; 550 } 551 } 552 break; 553 } 554 555 vfio_generic_window_quirk_write(opaque, addr, data, size); 556 } 557 558 static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { 559 .read = vfio_generic_window_quirk_read, 560 .write = vfio_nvidia_bar5_window_quirk_write, 561 .valid.min_access_size = 4, 562 .endianness = DEVICE_LITTLE_ENDIAN, 563 }; 564 565 static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr) 566 { 567 PCIDevice *pdev = &vdev->pdev; 568 VFIOQuirk *quirk; 569 VFIOLegacyQuirk *legacy; 570 571 if (!vdev->has_vga || nr != 5 || 572 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { 573 return; 574 } 575 576 quirk = g_malloc0(sizeof(*quirk)); 577 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 578 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 579 quirk->nr_mem = 1; 580 legacy->vdev = vdev; 581 legacy->data.read_flags = legacy->data.write_flags = NV_BAR5_VALID; 582 legacy->data.address_offset = 0x8; 583 legacy->data.address_size = 0; /* actually 4, but avoids generic code */ 584 legacy->data.data_offset = 0xc; 585 legacy->data.data_size = 4; 586 legacy->data.bar = nr; 587 588 memory_region_init_io(quirk->mem, OBJECT(vdev), 589 &vfio_nvidia_bar5_window_quirk, legacy, 590 "vfio-nvidia-bar5-window-quirk", 16); 591 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 592 0, quirk->mem, 1); 593 594 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 595 596 trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name); 597 } 598 599 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr, 600 uint64_t data, unsigned size) 601 { 602 VFIOLegacyQuirk *quirk = opaque; 603 VFIOPCIDevice *vdev = quirk->vdev; 604 PCIDevice *pdev = &vdev->pdev; 605 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK; 606 607 vfio_generic_quirk_write(opaque, addr, data, size); 608 609 /* 610 * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the 611 * MSI capability ID register. Both the ID and next register are 612 * read-only, so we allow writes covering either of those to real hw. 613 * NB - only fixed for the 0x88000 MMIO window. 614 */ 615 if ((pdev->cap_present & QEMU_PCI_CAP_MSI) && 616 vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) { 617 vfio_region_write(&vdev->bars[quirk->data.bar].region, 618 addr + base, data, size); 619 } 620 } 621 622 static const MemoryRegionOps vfio_nvidia_88000_quirk = { 623 .read = vfio_generic_quirk_read, 624 .write = vfio_nvidia_88000_quirk_write, 625 .endianness = DEVICE_LITTLE_ENDIAN, 626 }; 627 628 /* 629 * Finally, BAR0 itself. We want to redirect any accesses to either 630 * 0x1800 or 0x88000 through the PCI config space access functions. 631 * 632 * NB - quirk at a page granularity or else they don't seem to work when 633 * BARs are mmap'd 634 * 635 * Here's offset 0x88000... 636 */ 637 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr) 638 { 639 PCIDevice *pdev = &vdev->pdev; 640 VFIOQuirk *quirk; 641 VFIOLegacyQuirk *legacy; 642 uint16_t vendor, class; 643 644 vendor = pci_get_word(pdev->config + PCI_VENDOR_ID); 645 class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); 646 647 if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA || 648 class != PCI_CLASS_DISPLAY_VGA) { 649 return; 650 } 651 652 quirk = g_malloc0(sizeof(*quirk)); 653 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 654 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 655 quirk->nr_mem = 1; 656 legacy->vdev = vdev; 657 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; 658 legacy->data.address_match = 0x88000; 659 legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1; 660 legacy->data.bar = nr; 661 662 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk, 663 legacy, "vfio-nvidia-bar0-88000-quirk", 664 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); 665 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 666 legacy->data.address_match & TARGET_PAGE_MASK, 667 quirk->mem, 1); 668 669 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 670 671 trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name); 672 } 673 674 /* 675 * And here's the same for BAR0 offset 0x1800... 676 */ 677 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr) 678 { 679 PCIDevice *pdev = &vdev->pdev; 680 VFIOQuirk *quirk; 681 VFIOLegacyQuirk *legacy; 682 683 if (!vdev->has_vga || nr != 0 || 684 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { 685 return; 686 } 687 688 /* Log the chipset ID */ 689 trace_vfio_probe_nvidia_bar0_1800_quirk_id( 690 (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20) 691 & 0xff); 692 693 quirk = g_malloc0(sizeof(*quirk)); 694 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 695 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 696 quirk->nr_mem = 1; 697 legacy->vdev = vdev; 698 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1; 699 legacy->data.address_match = 0x1800; 700 legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1; 701 legacy->data.bar = nr; 702 703 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy, 704 "vfio-nvidia-bar0-1800-quirk", 705 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1)); 706 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 707 legacy->data.address_match & TARGET_PAGE_MASK, 708 quirk->mem, 1); 709 710 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 711 712 trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name); 713 } 714 715 /* 716 * TODO - Some Nvidia devices provide config access to their companion HDA 717 * device and even to their parent bridge via these config space mirrors. 718 * Add quirks for those regions. 719 */ 720 721 #define PCI_VENDOR_ID_REALTEK 0x10ec 722 723 /* 724 * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2 725 * offset 0x70 there is a dword data register, offset 0x74 is a dword address 726 * register. According to the Linux r8169 driver, the MSI-X table is addressed 727 * when the "type" portion of the address register is set to 0x1. This appears 728 * to be bits 16:30. Bit 31 is both a write indicator and some sort of 729 * "address latched" indicator. Bits 12:15 are a mask field, which we can 730 * ignore because the MSI-X table should always be accessed as a dword (full 731 * mask). Bits 0:11 is offset within the type. 732 * 733 * Example trace: 734 * 735 * Read from MSI-X table offset 0 736 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr 737 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch 738 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data 739 * 740 * Write 0xfee00000 to MSI-X table offset 0 741 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data 742 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write 743 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete 744 */ 745 static uint64_t vfio_rtl8168_window_quirk_read(void *opaque, 746 hwaddr addr, unsigned size) 747 { 748 VFIOLegacyQuirk *quirk = opaque; 749 VFIOPCIDevice *vdev = quirk->vdev; 750 uint64_t val = 0; 751 752 if (!quirk->data.flags) { /* Non-MSI-X table access */ 753 return vfio_region_read(&vdev->bars[quirk->data.bar].region, 754 addr + 0x70, size); 755 } 756 757 switch (addr) { 758 case 4: /* address */ 759 val = quirk->data.address_match ^ 0x80000000U; /* latch/complete */ 760 break; 761 case 0: /* data */ 762 if ((vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { 763 memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, 764 (hwaddr)(quirk->data.address_match & 0xfff), 765 &val, size, MEMTXATTRS_UNSPECIFIED); 766 } 767 break; 768 } 769 770 trace_vfio_rtl8168_quirk_read(vdev->vbasedev.name, 771 addr ? "address" : "data", val); 772 return val; 773 } 774 775 static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr, 776 uint64_t data, unsigned size) 777 { 778 VFIOLegacyQuirk *quirk = opaque; 779 VFIOPCIDevice *vdev = quirk->vdev; 780 781 switch (addr) { 782 case 4: /* address */ 783 if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */ 784 quirk->data.flags = 1; /* Activate reads */ 785 quirk->data.address_match = data; 786 787 trace_vfio_rtl8168_quirk_write(vdev->vbasedev.name, data); 788 789 if (data & 0x80000000U) { /* Do write */ 790 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { 791 hwaddr offset = data & 0xfff; 792 uint64_t val = quirk->data.address_mask; 793 794 trace_vfio_rtl8168_quirk_msix(vdev->vbasedev.name, 795 (uint16_t)offset, val); 796 797 /* Write to the proper guest MSI-X table instead */ 798 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, 799 offset, val, size, 800 MEMTXATTRS_UNSPECIFIED); 801 } 802 return; /* Do not write guest MSI-X data to hardware */ 803 } 804 } else { 805 quirk->data.flags = 0; /* De-activate reads, non-MSI-X */ 806 } 807 break; 808 case 0: /* data */ 809 quirk->data.address_mask = data; 810 break; 811 } 812 813 vfio_region_write(&vdev->bars[quirk->data.bar].region, 814 addr + 0x70, data, size); 815 } 816 817 static const MemoryRegionOps vfio_rtl8168_window_quirk = { 818 .read = vfio_rtl8168_window_quirk_read, 819 .write = vfio_rtl8168_window_quirk_write, 820 .valid = { 821 .min_access_size = 4, 822 .max_access_size = 4, 823 .unaligned = false, 824 }, 825 .endianness = DEVICE_LITTLE_ENDIAN, 826 }; 827 828 static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr) 829 { 830 PCIDevice *pdev = &vdev->pdev; 831 VFIOQuirk *quirk; 832 VFIOLegacyQuirk *legacy; 833 834 if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK || 835 pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) { 836 return; 837 } 838 839 quirk = g_malloc0(sizeof(*quirk)); 840 quirk->data = legacy = g_malloc0(sizeof(*legacy)); 841 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1); 842 quirk->nr_mem = 1; 843 legacy->vdev = vdev; 844 legacy->data.bar = nr; 845 846 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk, 847 legacy, "vfio-rtl8168-window-quirk", 8); 848 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, 849 0x70, quirk->mem, 1); 850 851 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); 852 853 trace_vfio_rtl8168_quirk_enable(vdev->vbasedev.name); 854 } 855 856 /* 857 * Common quirk probe entry points. 858 */ 859 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) 860 { 861 vfio_vga_probe_ati_3c3_quirk(vdev); 862 vfio_vga_probe_nvidia_3d0_quirk(vdev); 863 } 864 865 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) 866 { 867 VFIOQuirk *quirk; 868 int i, j; 869 870 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { 871 QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { 872 for (j = 0; j < quirk->nr_mem; j++) { 873 memory_region_del_subregion(&vdev->vga.region[i].mem, 874 &quirk->mem[j]); 875 } 876 } 877 } 878 } 879 880 void vfio_vga_quirk_free(VFIOPCIDevice *vdev) 881 { 882 int i, j; 883 884 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { 885 while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { 886 VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); 887 QLIST_REMOVE(quirk, next); 888 for (j = 0; j < quirk->nr_mem; j++) { 889 object_unparent(OBJECT(&quirk->mem[j])); 890 } 891 g_free(quirk->mem); 892 g_free(quirk->data); 893 g_free(quirk); 894 } 895 } 896 } 897 898 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) 899 { 900 vfio_probe_ati_bar4_window_quirk(vdev, nr); 901 vfio_probe_ati_bar2_4000_quirk(vdev, nr); 902 vfio_probe_nvidia_bar5_window_quirk(vdev, nr); 903 vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); 904 vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); 905 vfio_probe_rtl8168_bar2_window_quirk(vdev, nr); 906 } 907 908 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) 909 { 910 VFIOBAR *bar = &vdev->bars[nr]; 911 VFIOQuirk *quirk; 912 int i; 913 914 QLIST_FOREACH(quirk, &bar->quirks, next) { 915 for (i = 0; i < quirk->nr_mem; i++) { 916 memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); 917 } 918 } 919 } 920 921 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) 922 { 923 VFIOBAR *bar = &vdev->bars[nr]; 924 int i; 925 926 while (!QLIST_EMPTY(&bar->quirks)) { 927 VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); 928 QLIST_REMOVE(quirk, next); 929 for (i = 0; i < quirk->nr_mem; i++) { 930 object_unparent(OBJECT(&quirk->mem[i])); 931 } 932 g_free(quirk->mem); 933 g_free(quirk->data); 934 g_free(quirk); 935 } 936 } 937