1 #include "kvm/devices.h" 2 #include "kvm/pci.h" 3 #include "kvm/ioport.h" 4 #include "kvm/irq.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 8 #include <linux/err.h> 9 #include <assert.h> 10 11 static u32 pci_config_address_bits; 12 13 /* This is within our PCI gap - in an unused area. 14 * Note this is a PCI *bus address*, is used to assign BARs etc.! 15 * (That's why it can still 32bit even with 64bit guests-- 64bit 16 * PCI isn't currently supported.) 17 */ 18 static u32 mmio_blocks = KVM_PCI_MMIO_AREA; 19 static u16 io_port_blocks = PCI_IOPORT_START; 20 21 u16 pci_get_io_port_block(u32 size) 22 { 23 u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE); 24 25 io_port_blocks = port + size; 26 return port; 27 } 28 29 /* 30 * BARs must be naturally aligned, so enforce this in the allocator. 31 */ 32 u32 pci_get_mmio_block(u32 size) 33 { 34 u32 block = ALIGN(mmio_blocks, size); 35 mmio_blocks = block + size; 36 return block; 37 } 38 39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type) 40 { 41 u8 pos; 42 struct pci_cap_hdr *cap; 43 44 pci_for_each_cap(pos, cap, hdr) { 45 if (cap->type == cap_type) 46 return cap; 47 } 48 49 return NULL; 50 } 51 52 int pci__assign_irq(struct pci_device_header *pci_hdr) 53 { 54 /* 55 * PCI supports only INTA#,B#,C#,D# per device. 56 * 57 * A#,B#,C#,D# are allowed for multifunctional devices so stick 58 * with A# for our single function devices. 59 */ 60 pci_hdr->irq_pin = 1; 61 pci_hdr->irq_line = irq__alloc_line(); 62 63 if (!pci_hdr->irq_type) 64 pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING; 65 66 return pci_hdr->irq_line; 67 } 68 69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num) 70 { 71 return pci__bar_size(pci_hdr, bar_num); 72 } 73 74 static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num) 75 { 76 return pci_hdr->bar_active[bar_num]; 77 } 78 79 static void *pci_config_address_ptr(u16 port) 80 { 81 unsigned long offset; 82 void *base; 83 84 offset = port - PCI_CONFIG_ADDRESS; 85 base = &pci_config_address_bits; 86 87 return base + offset; 88 } 89 90 static void pci_config_address_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, 91 u32 len, u8 is_write, void *ptr) 92 { 93 void *p = pci_config_address_ptr(addr); 94 95 if (is_write) 96 memcpy(p, data, len); 97 else 98 memcpy(data, p, len); 99 } 100 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number) 101 { 102 union pci_config_address pci_config_address; 103 104 pci_config_address.w = ioport__read32(&pci_config_address_bits); 105 106 if (pci_config_address.bus_number != bus_number) 107 return false; 108 109 if (pci_config_address.function_number != function_number) 110 return false; 111 112 return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number)); 113 } 114 115 static void pci_config_data_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, 116 u32 len, u8 is_write, void *kvm) 117 { 118 union pci_config_address pci_config_address; 119 120 if (len > 4) 121 len = 4; 122 123 pci_config_address.w = ioport__read32(&pci_config_address_bits); 124 /* 125 * If someone accesses PCI configuration space offsets that are not 126 * aligned to 4 bytes, it uses ioports to signify that. 127 */ 128 pci_config_address.reg_offset = addr - PCI_CONFIG_DATA; 129 130 if (is_write) 131 pci__config_wr(vcpu->kvm, pci_config_address, data, len); 132 else 133 pci__config_rd(vcpu->kvm, pci_config_address, data, len); 134 } 135 136 static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, 137 int bar_num) 138 { 139 int r = 0; 140 141 if (pci_bar_is_active(pci_hdr, bar_num)) 142 goto out; 143 144 r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); 145 if (r < 0) { 146 pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d", 147 bar_num); 148 goto out; 149 } 150 pci_hdr->bar_active[bar_num] = true; 151 152 out: 153 return r; 154 } 155 156 static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, 157 int bar_num) 158 { 159 int r = 0; 160 161 if (!pci_bar_is_active(pci_hdr, bar_num)) 162 goto out; 163 164 r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); 165 if (r < 0) { 166 pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d", 167 bar_num); 168 goto out; 169 } 170 pci_hdr->bar_active[bar_num] = false; 171 172 out: 173 return r; 174 } 175 176 static void pci_config_command_wr(struct kvm *kvm, 177 struct pci_device_header *pci_hdr, 178 u16 new_command) 179 { 180 int i; 181 bool toggle_io, toggle_mem; 182 183 toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO; 184 toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY; 185 186 for (i = 0; i < 6; i++) { 187 if (!pci_bar_is_implemented(pci_hdr, i)) 188 continue; 189 190 if (toggle_io && pci__bar_is_io(pci_hdr, i)) { 191 if (__pci__io_space_enabled(new_command)) 192 pci_activate_bar(kvm, pci_hdr, i); 193 else 194 pci_deactivate_bar(kvm, pci_hdr, i); 195 } 196 197 if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) { 198 if (__pci__memory_space_enabled(new_command)) 199 pci_activate_bar(kvm, pci_hdr, i); 200 else 201 pci_deactivate_bar(kvm, pci_hdr, i); 202 } 203 } 204 205 pci_hdr->command = new_command; 206 } 207 208 static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size) 209 { 210 struct device_header *dev_hdr; 211 struct pci_device_header *tmp_hdr; 212 u32 tmp_start, tmp_size; 213 int i, r; 214 215 dev_hdr = device__first_dev(DEVICE_BUS_PCI); 216 while (dev_hdr) { 217 tmp_hdr = dev_hdr->data; 218 for (i = 0; i < 6; i++) { 219 if (!pci_bar_is_implemented(tmp_hdr, i)) 220 continue; 221 222 tmp_start = pci__bar_address(tmp_hdr, i); 223 tmp_size = pci__bar_size(tmp_hdr, i); 224 if (tmp_start + tmp_size <= start || 225 tmp_start >= start + size) 226 continue; 227 228 if (activate) 229 r = pci_activate_bar(kvm, tmp_hdr, i); 230 else 231 r = pci_deactivate_bar(kvm, tmp_hdr, i); 232 if (r < 0) 233 return r; 234 } 235 dev_hdr = device__next_dev(dev_hdr); 236 } 237 238 return 0; 239 } 240 241 static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size) 242 { 243 return pci_toggle_bar_regions(true, kvm, start, size); 244 } 245 246 static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size) 247 { 248 return pci_toggle_bar_regions(false, kvm, start, size); 249 } 250 251 static void pci_config_bar_wr(struct kvm *kvm, 252 struct pci_device_header *pci_hdr, int bar_num, 253 u32 value) 254 { 255 u32 old_addr, new_addr, bar_size; 256 u32 mask; 257 int r; 258 259 if (pci__bar_is_io(pci_hdr, bar_num)) 260 mask = (u32)PCI_BASE_ADDRESS_IO_MASK; 261 else 262 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; 263 264 /* 265 * If the kernel masks the BAR, it will expect to find the size of the 266 * BAR there next time it reads from it. After the kernel reads the 267 * size, it will write the address back. 268 * 269 * According to the PCI local bus specification REV 3.0: The number of 270 * upper bits that a device actually implements depends on how much of 271 * the address space the device will respond to. A device that wants a 1 272 * MB memory address space (using a 32-bit base address register) would 273 * build the top 12 bits of the address register, hardwiring the other 274 * bits to 0. 275 * 276 * Furthermore, software can determine how much address space the device 277 * requires by writing a value of all 1's to the register and then 278 * reading the value back. The device will return 0's in all don't-care 279 * address bits, effectively specifying the address space required. 280 * 281 * Software computes the size of the address space with the formula 282 * S = ~B + 1, where S is the memory size and B is the value read from 283 * the BAR. This means that the BAR value that kvmtool should return is 284 * B = ~(S - 1). 285 */ 286 if (value == 0xffffffff) { 287 value = ~(pci__bar_size(pci_hdr, bar_num) - 1); 288 /* Preserve the special bits. */ 289 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); 290 pci_hdr->bar[bar_num] = value; 291 return; 292 } 293 294 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); 295 296 /* Don't toggle emulation when region type access is disbled. */ 297 if (pci__bar_is_io(pci_hdr, bar_num) && 298 !pci__io_space_enabled(pci_hdr)) { 299 pci_hdr->bar[bar_num] = value; 300 return; 301 } 302 303 if (pci__bar_is_memory(pci_hdr, bar_num) && 304 !pci__memory_space_enabled(pci_hdr)) { 305 pci_hdr->bar[bar_num] = value; 306 return; 307 } 308 309 /* 310 * BAR reassignment can be done while device access is enabled and 311 * memory regions for different devices can overlap as long as no access 312 * is made to the overlapping memory regions. To implement BAR 313 * reasignment, we deactivate emulation for the region described by the 314 * BAR value that the guest is changing, we disable emulation for the 315 * regions that overlap with the new one (by scanning through all PCI 316 * devices), we enable emulation for the new BAR value and finally we 317 * enable emulation for all device regions that were overlapping with 318 * the old value. 319 */ 320 old_addr = pci__bar_address(pci_hdr, bar_num); 321 new_addr = __pci__bar_address(value); 322 bar_size = pci__bar_size(pci_hdr, bar_num); 323 324 r = pci_deactivate_bar(kvm, pci_hdr, bar_num); 325 if (r < 0) 326 return; 327 328 r = pci_deactivate_bar_regions(kvm, new_addr, bar_size); 329 if (r < 0) { 330 /* 331 * We cannot update the BAR because of an overlapping region 332 * that failed to deactivate emulation, so keep the old BAR 333 * value and re-activate emulation for it. 334 */ 335 pci_activate_bar(kvm, pci_hdr, bar_num); 336 return; 337 } 338 339 pci_hdr->bar[bar_num] = value; 340 r = pci_activate_bar(kvm, pci_hdr, bar_num); 341 if (r < 0) { 342 /* 343 * New region cannot be emulated, re-enable the regions that 344 * were overlapping. 345 */ 346 pci_activate_bar_regions(kvm, new_addr, bar_size); 347 return; 348 } 349 350 pci_activate_bar_regions(kvm, old_addr, bar_size); 351 } 352 353 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size) 354 { 355 void *base; 356 u8 bar, offset; 357 struct pci_device_header *pci_hdr; 358 u8 dev_num = addr.device_number; 359 u32 value = 0; 360 361 if (!pci_device_exists(addr.bus_number, dev_num, 0)) 362 return; 363 364 offset = addr.w & PCI_DEV_CFG_MASK; 365 base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; 366 367 if (pci_hdr->cfg_ops.write) 368 pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size); 369 370 /* 371 * legacy hack: ignore writes to uninitialized regions (e.g. ROM BAR). 372 * Not very nice but has been working so far. 373 */ 374 if (*(u32 *)(base + offset) == 0) 375 return; 376 377 if (offset == PCI_COMMAND) { 378 memcpy(&value, data, size); 379 pci_config_command_wr(kvm, pci_hdr, (u16)value); 380 return; 381 } 382 383 bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32); 384 if (bar < 6) { 385 memcpy(&value, data, size); 386 pci_config_bar_wr(kvm, pci_hdr, bar, value); 387 return; 388 } 389 390 memcpy(base + offset, data, size); 391 } 392 393 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size) 394 { 395 u8 offset; 396 struct pci_device_header *pci_hdr; 397 u8 dev_num = addr.device_number; 398 399 if (pci_device_exists(addr.bus_number, dev_num, 0)) { 400 pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; 401 offset = addr.w & PCI_DEV_CFG_MASK; 402 403 if (pci_hdr->cfg_ops.read) 404 pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size); 405 406 memcpy(data, (void *)pci_hdr + offset, size); 407 } else { 408 memset(data, 0xff, size); 409 } 410 } 411 412 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, 413 u32 len, u8 is_write, void *kvm) 414 { 415 union pci_config_address cfg_addr; 416 417 addr -= KVM_PCI_CFG_AREA; 418 cfg_addr.w = (u32)addr; 419 cfg_addr.enable_bit = 1; 420 421 if (len > 4) 422 len = 4; 423 424 if (is_write) 425 pci__config_wr(kvm, cfg_addr, data, len); 426 else 427 pci__config_rd(kvm, cfg_addr, data, len); 428 } 429 430 struct pci_device_header *pci__find_dev(u8 dev_num) 431 { 432 struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num); 433 434 if (IS_ERR_OR_NULL(hdr)) 435 return NULL; 436 437 return hdr->data; 438 } 439 440 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr, 441 bar_activate_fn_t bar_activate_fn, 442 bar_deactivate_fn_t bar_deactivate_fn, void *data) 443 { 444 int i, r; 445 446 assert(bar_activate_fn && bar_deactivate_fn); 447 448 pci_hdr->bar_activate_fn = bar_activate_fn; 449 pci_hdr->bar_deactivate_fn = bar_deactivate_fn; 450 pci_hdr->data = data; 451 452 for (i = 0; i < 6; i++) { 453 if (!pci_bar_is_implemented(pci_hdr, i)) 454 continue; 455 456 assert(!pci_bar_is_active(pci_hdr, i)); 457 458 if (pci__bar_is_io(pci_hdr, i) && 459 pci__io_space_enabled(pci_hdr)) { 460 r = pci_activate_bar(kvm, pci_hdr, i); 461 if (r < 0) 462 return r; 463 } 464 465 if (pci__bar_is_memory(pci_hdr, i) && 466 pci__memory_space_enabled(pci_hdr)) { 467 r = pci_activate_bar(kvm, pci_hdr, i); 468 if (r < 0) 469 return r; 470 } 471 } 472 473 return 0; 474 } 475 476 int pci__init(struct kvm *kvm) 477 { 478 int r; 479 480 r = kvm__register_pio(kvm, PCI_CONFIG_DATA, 4, 481 pci_config_data_mmio, NULL); 482 if (r < 0) 483 return r; 484 r = kvm__register_pio(kvm, PCI_CONFIG_ADDRESS, 4, 485 pci_config_address_mmio, NULL); 486 if (r < 0) 487 goto err_unregister_data; 488 489 r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false, 490 pci_config_mmio_access, kvm); 491 if (r < 0) 492 goto err_unregister_addr; 493 494 return 0; 495 496 err_unregister_addr: 497 kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS); 498 err_unregister_data: 499 kvm__deregister_pio(kvm, PCI_CONFIG_DATA); 500 return r; 501 } 502 dev_base_init(pci__init); 503 504 int pci__exit(struct kvm *kvm) 505 { 506 kvm__deregister_pio(kvm, PCI_CONFIG_DATA); 507 kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS); 508 509 return 0; 510 } 511 dev_base_exit(pci__exit); 512