1 #include "kvm/devices.h" 2 #include "kvm/pci.h" 3 #include "kvm/ioport.h" 4 #include "kvm/irq.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 8 #include <linux/err.h> 9 #include <assert.h> 10 11 static u32 pci_config_address_bits; 12 13 /* This is within our PCI gap - in an unused area. 14 * Note this is a PCI *bus address*, is used to assign BARs etc.! 15 * (That's why it can still 32bit even with 64bit guests-- 64bit 16 * PCI isn't currently supported.) 17 */ 18 static u32 mmio_blocks = KVM_PCI_MMIO_AREA; 19 static u16 io_port_blocks = PCI_IOPORT_START; 20 21 u16 pci_get_io_port_block(u32 size) 22 { 23 u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE); 24 25 io_port_blocks = port + size; 26 return port; 27 } 28 29 /* 30 * BARs must be naturally aligned, so enforce this in the allocator. 31 */ 32 u32 pci_get_mmio_block(u32 size) 33 { 34 u32 block = ALIGN(mmio_blocks, size); 35 mmio_blocks = block + size; 36 return block; 37 } 38 39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type) 40 { 41 u8 pos; 42 struct pci_cap_hdr *cap; 43 44 pci_for_each_cap(pos, cap, hdr) { 45 if (cap->type == cap_type) 46 return cap; 47 } 48 49 return NULL; 50 } 51 52 int pci__assign_irq(struct pci_device_header *pci_hdr) 53 { 54 /* 55 * PCI supports only INTA#,B#,C#,D# per device. 56 * 57 * A#,B#,C#,D# are allowed for multifunctional devices so stick 58 * with A# for our single function devices. 59 */ 60 pci_hdr->irq_pin = 1; 61 pci_hdr->irq_line = irq__alloc_line(); 62 63 if (!pci_hdr->irq_type) 64 pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING; 65 66 return pci_hdr->irq_line; 67 } 68 69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num) 70 { 71 return pci__bar_size(pci_hdr, bar_num); 72 } 73 74 static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num) 75 { 76 return pci_hdr->bar_active[bar_num]; 77 } 78 79 static void *pci_config_address_ptr(u16 port) 80 { 81 unsigned long offset; 82 void *base; 83 84 offset = port - PCI_CONFIG_ADDRESS; 85 base = &pci_config_address_bits; 86 87 return base + offset; 88 } 89 90 static bool pci_config_address_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 91 { 92 void *p = pci_config_address_ptr(port); 93 94 memcpy(p, data, size); 95 96 return true; 97 } 98 99 static bool pci_config_address_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 100 { 101 void *p = pci_config_address_ptr(port); 102 103 memcpy(data, p, size); 104 105 return true; 106 } 107 108 static struct ioport_operations pci_config_address_ops = { 109 .io_in = pci_config_address_in, 110 .io_out = pci_config_address_out, 111 }; 112 113 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number) 114 { 115 union pci_config_address pci_config_address; 116 117 pci_config_address.w = ioport__read32(&pci_config_address_bits); 118 119 if (pci_config_address.bus_number != bus_number) 120 return false; 121 122 if (pci_config_address.function_number != function_number) 123 return false; 124 125 return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number)); 126 } 127 128 static bool pci_config_data_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 129 { 130 union pci_config_address pci_config_address; 131 132 if (size > 4) 133 size = 4; 134 135 pci_config_address.w = ioport__read32(&pci_config_address_bits); 136 /* 137 * If someone accesses PCI configuration space offsets that are not 138 * aligned to 4 bytes, it uses ioports to signify that. 139 */ 140 pci_config_address.reg_offset = port - PCI_CONFIG_DATA; 141 142 pci__config_wr(vcpu->kvm, pci_config_address, data, size); 143 144 return true; 145 } 146 147 static bool pci_config_data_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 148 { 149 union pci_config_address pci_config_address; 150 151 if (size > 4) 152 size = 4; 153 154 pci_config_address.w = ioport__read32(&pci_config_address_bits); 155 /* 156 * If someone accesses PCI configuration space offsets that are not 157 * aligned to 4 bytes, it uses ioports to signify that. 158 */ 159 pci_config_address.reg_offset = port - PCI_CONFIG_DATA; 160 161 pci__config_rd(vcpu->kvm, pci_config_address, data, size); 162 163 return true; 164 } 165 166 static struct ioport_operations pci_config_data_ops = { 167 .io_in = pci_config_data_in, 168 .io_out = pci_config_data_out, 169 }; 170 171 static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, 172 int bar_num) 173 { 174 int r = 0; 175 176 if (pci_bar_is_active(pci_hdr, bar_num)) 177 goto out; 178 179 r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); 180 if (r < 0) { 181 pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d", 182 bar_num); 183 goto out; 184 } 185 pci_hdr->bar_active[bar_num] = true; 186 187 out: 188 return r; 189 } 190 191 static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, 192 int bar_num) 193 { 194 int r = 0; 195 196 if (!pci_bar_is_active(pci_hdr, bar_num)) 197 goto out; 198 199 r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); 200 if (r < 0) { 201 pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d", 202 bar_num); 203 goto out; 204 } 205 pci_hdr->bar_active[bar_num] = false; 206 207 out: 208 return r; 209 } 210 211 static void pci_config_command_wr(struct kvm *kvm, 212 struct pci_device_header *pci_hdr, 213 u16 new_command) 214 { 215 int i; 216 bool toggle_io, toggle_mem; 217 218 toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO; 219 toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY; 220 221 for (i = 0; i < 6; i++) { 222 if (!pci_bar_is_implemented(pci_hdr, i)) 223 continue; 224 225 if (toggle_io && pci__bar_is_io(pci_hdr, i)) { 226 if (__pci__io_space_enabled(new_command)) 227 pci_activate_bar(kvm, pci_hdr, i); 228 else 229 pci_deactivate_bar(kvm, pci_hdr, i); 230 } 231 232 if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) { 233 if (__pci__memory_space_enabled(new_command)) 234 pci_activate_bar(kvm, pci_hdr, i); 235 else 236 pci_deactivate_bar(kvm, pci_hdr, i); 237 } 238 } 239 240 pci_hdr->command = new_command; 241 } 242 243 static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size) 244 { 245 struct device_header *dev_hdr; 246 struct pci_device_header *tmp_hdr; 247 u32 tmp_start, tmp_size; 248 int i, r; 249 250 dev_hdr = device__first_dev(DEVICE_BUS_PCI); 251 while (dev_hdr) { 252 tmp_hdr = dev_hdr->data; 253 for (i = 0; i < 6; i++) { 254 if (!pci_bar_is_implemented(tmp_hdr, i)) 255 continue; 256 257 tmp_start = pci__bar_address(tmp_hdr, i); 258 tmp_size = pci__bar_size(tmp_hdr, i); 259 if (tmp_start + tmp_size <= start || 260 tmp_start >= start + size) 261 continue; 262 263 if (activate) 264 r = pci_activate_bar(kvm, tmp_hdr, i); 265 else 266 r = pci_deactivate_bar(kvm, tmp_hdr, i); 267 if (r < 0) 268 return r; 269 } 270 dev_hdr = device__next_dev(dev_hdr); 271 } 272 273 return 0; 274 } 275 276 static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size) 277 { 278 return pci_toggle_bar_regions(true, kvm, start, size); 279 } 280 281 static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size) 282 { 283 return pci_toggle_bar_regions(false, kvm, start, size); 284 } 285 286 static void pci_config_bar_wr(struct kvm *kvm, 287 struct pci_device_header *pci_hdr, int bar_num, 288 u32 value) 289 { 290 u32 old_addr, new_addr, bar_size; 291 u32 mask; 292 int r; 293 294 if (pci__bar_is_io(pci_hdr, bar_num)) 295 mask = (u32)PCI_BASE_ADDRESS_IO_MASK; 296 else 297 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; 298 299 /* 300 * If the kernel masks the BAR, it will expect to find the size of the 301 * BAR there next time it reads from it. After the kernel reads the 302 * size, it will write the address back. 303 * 304 * According to the PCI local bus specification REV 3.0: The number of 305 * upper bits that a device actually implements depends on how much of 306 * the address space the device will respond to. A device that wants a 1 307 * MB memory address space (using a 32-bit base address register) would 308 * build the top 12 bits of the address register, hardwiring the other 309 * bits to 0. 310 * 311 * Furthermore, software can determine how much address space the device 312 * requires by writing a value of all 1's to the register and then 313 * reading the value back. The device will return 0's in all don't-care 314 * address bits, effectively specifying the address space required. 315 * 316 * Software computes the size of the address space with the formula 317 * S = ~B + 1, where S is the memory size and B is the value read from 318 * the BAR. This means that the BAR value that kvmtool should return is 319 * B = ~(S - 1). 320 */ 321 if (value == 0xffffffff) { 322 value = ~(pci__bar_size(pci_hdr, bar_num) - 1); 323 /* Preserve the special bits. */ 324 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); 325 pci_hdr->bar[bar_num] = value; 326 return; 327 } 328 329 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); 330 331 /* Don't toggle emulation when region type access is disbled. */ 332 if (pci__bar_is_io(pci_hdr, bar_num) && 333 !pci__io_space_enabled(pci_hdr)) { 334 pci_hdr->bar[bar_num] = value; 335 return; 336 } 337 338 if (pci__bar_is_memory(pci_hdr, bar_num) && 339 !pci__memory_space_enabled(pci_hdr)) { 340 pci_hdr->bar[bar_num] = value; 341 return; 342 } 343 344 /* 345 * BAR reassignment can be done while device access is enabled and 346 * memory regions for different devices can overlap as long as no access 347 * is made to the overlapping memory regions. To implement BAR 348 * reasignment, we deactivate emulation for the region described by the 349 * BAR value that the guest is changing, we disable emulation for the 350 * regions that overlap with the new one (by scanning through all PCI 351 * devices), we enable emulation for the new BAR value and finally we 352 * enable emulation for all device regions that were overlapping with 353 * the old value. 354 */ 355 old_addr = pci__bar_address(pci_hdr, bar_num); 356 new_addr = __pci__bar_address(value); 357 bar_size = pci__bar_size(pci_hdr, bar_num); 358 359 r = pci_deactivate_bar(kvm, pci_hdr, bar_num); 360 if (r < 0) 361 return; 362 363 r = pci_deactivate_bar_regions(kvm, new_addr, bar_size); 364 if (r < 0) { 365 /* 366 * We cannot update the BAR because of an overlapping region 367 * that failed to deactivate emulation, so keep the old BAR 368 * value and re-activate emulation for it. 369 */ 370 pci_activate_bar(kvm, pci_hdr, bar_num); 371 return; 372 } 373 374 pci_hdr->bar[bar_num] = value; 375 r = pci_activate_bar(kvm, pci_hdr, bar_num); 376 if (r < 0) { 377 /* 378 * New region cannot be emulated, re-enable the regions that 379 * were overlapping. 380 */ 381 pci_activate_bar_regions(kvm, new_addr, bar_size); 382 return; 383 } 384 385 pci_activate_bar_regions(kvm, old_addr, bar_size); 386 } 387 388 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size) 389 { 390 void *base; 391 u8 bar, offset; 392 struct pci_device_header *pci_hdr; 393 u8 dev_num = addr.device_number; 394 u32 value = 0; 395 396 if (!pci_device_exists(addr.bus_number, dev_num, 0)) 397 return; 398 399 offset = addr.w & PCI_DEV_CFG_MASK; 400 base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; 401 402 if (pci_hdr->cfg_ops.write) 403 pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size); 404 405 /* 406 * legacy hack: ignore writes to uninitialized regions (e.g. ROM BAR). 407 * Not very nice but has been working so far. 408 */ 409 if (*(u32 *)(base + offset) == 0) 410 return; 411 412 if (offset == PCI_COMMAND) { 413 memcpy(&value, data, size); 414 pci_config_command_wr(kvm, pci_hdr, (u16)value); 415 return; 416 } 417 418 bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32); 419 if (bar < 6) { 420 memcpy(&value, data, size); 421 pci_config_bar_wr(kvm, pci_hdr, bar, value); 422 return; 423 } 424 425 memcpy(base + offset, data, size); 426 } 427 428 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size) 429 { 430 u8 offset; 431 struct pci_device_header *pci_hdr; 432 u8 dev_num = addr.device_number; 433 434 if (pci_device_exists(addr.bus_number, dev_num, 0)) { 435 pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; 436 offset = addr.w & PCI_DEV_CFG_MASK; 437 438 if (pci_hdr->cfg_ops.read) 439 pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size); 440 441 memcpy(data, (void *)pci_hdr + offset, size); 442 } else { 443 memset(data, 0xff, size); 444 } 445 } 446 447 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, 448 u32 len, u8 is_write, void *kvm) 449 { 450 union pci_config_address cfg_addr; 451 452 addr -= KVM_PCI_CFG_AREA; 453 cfg_addr.w = (u32)addr; 454 cfg_addr.enable_bit = 1; 455 456 if (len > 4) 457 len = 4; 458 459 if (is_write) 460 pci__config_wr(kvm, cfg_addr, data, len); 461 else 462 pci__config_rd(kvm, cfg_addr, data, len); 463 } 464 465 struct pci_device_header *pci__find_dev(u8 dev_num) 466 { 467 struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num); 468 469 if (IS_ERR_OR_NULL(hdr)) 470 return NULL; 471 472 return hdr->data; 473 } 474 475 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr, 476 bar_activate_fn_t bar_activate_fn, 477 bar_deactivate_fn_t bar_deactivate_fn, void *data) 478 { 479 int i, r; 480 481 assert(bar_activate_fn && bar_deactivate_fn); 482 483 pci_hdr->bar_activate_fn = bar_activate_fn; 484 pci_hdr->bar_deactivate_fn = bar_deactivate_fn; 485 pci_hdr->data = data; 486 487 for (i = 0; i < 6; i++) { 488 if (!pci_bar_is_implemented(pci_hdr, i)) 489 continue; 490 491 assert(!pci_bar_is_active(pci_hdr, i)); 492 493 if (pci__bar_is_io(pci_hdr, i) && 494 pci__io_space_enabled(pci_hdr)) { 495 r = pci_activate_bar(kvm, pci_hdr, i); 496 if (r < 0) 497 return r; 498 } 499 500 if (pci__bar_is_memory(pci_hdr, i) && 501 pci__memory_space_enabled(pci_hdr)) { 502 r = pci_activate_bar(kvm, pci_hdr, i); 503 if (r < 0) 504 return r; 505 } 506 } 507 508 return 0; 509 } 510 511 int pci__init(struct kvm *kvm) 512 { 513 int r; 514 515 r = ioport__register(kvm, PCI_CONFIG_DATA + 0, &pci_config_data_ops, 4, NULL); 516 if (r < 0) 517 return r; 518 519 r = ioport__register(kvm, PCI_CONFIG_ADDRESS + 0, &pci_config_address_ops, 4, NULL); 520 if (r < 0) 521 goto err_unregister_data; 522 523 r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false, 524 pci_config_mmio_access, kvm); 525 if (r < 0) 526 goto err_unregister_addr; 527 528 return 0; 529 530 err_unregister_addr: 531 ioport__unregister(kvm, PCI_CONFIG_ADDRESS); 532 err_unregister_data: 533 ioport__unregister(kvm, PCI_CONFIG_DATA); 534 return r; 535 } 536 dev_base_init(pci__init); 537 538 int pci__exit(struct kvm *kvm) 539 { 540 ioport__unregister(kvm, PCI_CONFIG_DATA); 541 ioport__unregister(kvm, PCI_CONFIG_ADDRESS); 542 543 return 0; 544 } 545 dev_base_exit(pci__exit); 546