1 #include "kvm/devices.h"
2 #include "kvm/pci.h"
3 #include "kvm/ioport.h"
4 #include "kvm/irq.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7
8 #include <linux/err.h>
9 #include <assert.h>
10
11 static u32 pci_config_address_bits;
12
13 /* This is within our PCI gap - in an unused area.
14 * Note this is a PCI *bus address*, is used to assign BARs etc.!
15 * (That's why it can still 32bit even with 64bit guests-- 64bit
16 * PCI isn't currently supported.)
17 */
18 static u32 mmio_blocks = KVM_PCI_MMIO_AREA;
19 static u16 io_port_blocks = PCI_IOPORT_START;
20
pci_get_io_port_block(u32 size)21 u16 pci_get_io_port_block(u32 size)
22 {
23 u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE);
24
25 io_port_blocks = port + size;
26 return port;
27 }
28
29 /*
30 * BARs must be naturally aligned, so enforce this in the allocator.
31 */
pci_get_mmio_block(u32 size)32 u32 pci_get_mmio_block(u32 size)
33 {
34 u32 block = ALIGN(mmio_blocks, size);
35 mmio_blocks = block + size;
36 return block;
37 }
38
pci_find_cap(struct pci_device_header * hdr,u8 cap_type)39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type)
40 {
41 u8 pos;
42 struct pci_cap_hdr *cap;
43
44 pci_for_each_cap(pos, cap, hdr) {
45 if (cap->type == cap_type)
46 return cap;
47 }
48
49 return NULL;
50 }
51
pci__assign_irq(struct pci_device_header * pci_hdr)52 int pci__assign_irq(struct pci_device_header *pci_hdr)
53 {
54 /*
55 * PCI supports only INTA#,B#,C#,D# per device.
56 *
57 * A#,B#,C#,D# are allowed for multifunctional devices so stick
58 * with A# for our single function devices.
59 */
60 pci_hdr->irq_pin = 1;
61 pci_hdr->irq_line = irq__alloc_line();
62
63 if (!pci_hdr->irq_type)
64 pci_hdr->irq_type = IRQ_TYPE_LEVEL_HIGH;
65
66 return pci_hdr->irq_line;
67 }
68
pci_bar_is_implemented(struct pci_device_header * pci_hdr,int bar_num)69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
70 {
71 return pci__bar_size(pci_hdr, bar_num);
72 }
73
pci_bar_is_active(struct pci_device_header * pci_hdr,int bar_num)74 static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num)
75 {
76 return pci_hdr->bar_active[bar_num];
77 }
78
pci_config_address_ptr(u16 port)79 static void *pci_config_address_ptr(u16 port)
80 {
81 unsigned long offset;
82 void *base;
83
84 offset = port - PCI_CONFIG_ADDRESS;
85 base = &pci_config_address_bits;
86
87 return base + offset;
88 }
89
pci_config_address_mmio(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * ptr)90 static void pci_config_address_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data,
91 u32 len, u8 is_write, void *ptr)
92 {
93 void *p = pci_config_address_ptr(addr);
94
95 if (is_write)
96 memcpy(p, data, len);
97 else
98 memcpy(data, p, len);
99 }
pci_device_exists(u8 bus_number,u8 device_number,u8 function_number)100 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number)
101 {
102 union pci_config_address pci_config_address;
103
104 pci_config_address.w = ioport__read32(&pci_config_address_bits);
105
106 if (pci_config_address.bus_number != bus_number)
107 return false;
108
109 if (pci_config_address.function_number != function_number)
110 return false;
111
112 return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number));
113 }
114
pci_config_data_mmio(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * kvm)115 static void pci_config_data_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data,
116 u32 len, u8 is_write, void *kvm)
117 {
118 union pci_config_address pci_config_address;
119
120 pci_config_address.w = ioport__read32(&pci_config_address_bits);
121 /*
122 * If someone accesses PCI configuration space offsets that are not
123 * aligned to 4 bytes, it uses ioports to signify that.
124 */
125 pci_config_address.reg_offset = addr - PCI_CONFIG_DATA;
126
127 /* Ensure the access does not cross a 4-byte boundary */
128 len = min(len, 4U - pci_config_address.reg_offset);
129
130 if (is_write)
131 pci__config_wr(vcpu->kvm, pci_config_address, data, len);
132 else
133 pci__config_rd(vcpu->kvm, pci_config_address, data, len);
134 }
135
pci_activate_bar(struct kvm * kvm,struct pci_device_header * pci_hdr,int bar_num)136 static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
137 int bar_num)
138 {
139 int r = 0;
140
141 if (pci_bar_is_active(pci_hdr, bar_num))
142 goto out;
143
144 r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
145 if (r < 0) {
146 pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d",
147 bar_num);
148 goto out;
149 }
150 pci_hdr->bar_active[bar_num] = true;
151
152 out:
153 return r;
154 }
155
pci_deactivate_bar(struct kvm * kvm,struct pci_device_header * pci_hdr,int bar_num)156 static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
157 int bar_num)
158 {
159 int r = 0;
160
161 if (!pci_bar_is_active(pci_hdr, bar_num))
162 goto out;
163
164 r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
165 if (r < 0) {
166 pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d",
167 bar_num);
168 goto out;
169 }
170 pci_hdr->bar_active[bar_num] = false;
171
172 out:
173 return r;
174 }
175
pci_config_command_wr(struct kvm * kvm,struct pci_device_header * pci_hdr,u16 new_command)176 static void pci_config_command_wr(struct kvm *kvm,
177 struct pci_device_header *pci_hdr,
178 u16 new_command)
179 {
180 int i;
181 bool toggle_io, toggle_mem;
182
183 toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO;
184 toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY;
185
186 for (i = 0; i < 6; i++) {
187 if (!pci_bar_is_implemented(pci_hdr, i))
188 continue;
189
190 if (toggle_io && pci__bar_is_io(pci_hdr, i)) {
191 if (__pci__io_space_enabled(new_command))
192 pci_activate_bar(kvm, pci_hdr, i);
193 else
194 pci_deactivate_bar(kvm, pci_hdr, i);
195 }
196
197 if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) {
198 if (__pci__memory_space_enabled(new_command))
199 pci_activate_bar(kvm, pci_hdr, i);
200 else
201 pci_deactivate_bar(kvm, pci_hdr, i);
202 }
203 }
204
205 pci_hdr->command = new_command;
206 }
207
pci_toggle_bar_regions(bool activate,struct kvm * kvm,u32 start,u32 size)208 static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size)
209 {
210 struct device_header *dev_hdr;
211 struct pci_device_header *tmp_hdr;
212 u32 tmp_start, tmp_size;
213 int i, r;
214
215 dev_hdr = device__first_dev(DEVICE_BUS_PCI);
216 while (dev_hdr) {
217 tmp_hdr = dev_hdr->data;
218 for (i = 0; i < 6; i++) {
219 if (!pci_bar_is_implemented(tmp_hdr, i))
220 continue;
221
222 tmp_start = pci__bar_address(tmp_hdr, i);
223 tmp_size = pci__bar_size(tmp_hdr, i);
224 if (tmp_start + tmp_size <= start ||
225 tmp_start >= start + size)
226 continue;
227
228 if (activate)
229 r = pci_activate_bar(kvm, tmp_hdr, i);
230 else
231 r = pci_deactivate_bar(kvm, tmp_hdr, i);
232 if (r < 0)
233 return r;
234 }
235 dev_hdr = device__next_dev(dev_hdr);
236 }
237
238 return 0;
239 }
240
pci_activate_bar_regions(struct kvm * kvm,u32 start,u32 size)241 static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size)
242 {
243 return pci_toggle_bar_regions(true, kvm, start, size);
244 }
245
pci_deactivate_bar_regions(struct kvm * kvm,u32 start,u32 size)246 static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size)
247 {
248 return pci_toggle_bar_regions(false, kvm, start, size);
249 }
250
pci_config_bar_wr(struct kvm * kvm,struct pci_device_header * pci_hdr,int bar_num,u32 value)251 static void pci_config_bar_wr(struct kvm *kvm,
252 struct pci_device_header *pci_hdr, int bar_num,
253 u32 value)
254 {
255 u32 old_addr, new_addr, bar_size;
256 u32 mask;
257 int r;
258
259 if (pci__bar_is_io(pci_hdr, bar_num))
260 mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
261 else
262 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
263
264 /*
265 * If the kernel masks the BAR, it will expect to find the size of the
266 * BAR there next time it reads from it. After the kernel reads the
267 * size, it will write the address back.
268 *
269 * According to the PCI local bus specification REV 3.0: The number of
270 * upper bits that a device actually implements depends on how much of
271 * the address space the device will respond to. A device that wants a 1
272 * MB memory address space (using a 32-bit base address register) would
273 * build the top 12 bits of the address register, hardwiring the other
274 * bits to 0.
275 *
276 * Furthermore, software can determine how much address space the device
277 * requires by writing a value of all 1's to the register and then
278 * reading the value back. The device will return 0's in all don't-care
279 * address bits, effectively specifying the address space required.
280 *
281 * Software computes the size of the address space with the formula
282 * S = ~B + 1, where S is the memory size and B is the value read from
283 * the BAR. This means that the BAR value that kvmtool should return is
284 * B = ~(S - 1).
285 */
286 if (value == 0xffffffff) {
287 value = ~(pci__bar_size(pci_hdr, bar_num) - 1);
288 /* Preserve the special bits. */
289 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
290 pci_hdr->bar[bar_num] = value;
291 return;
292 }
293
294 value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
295
296 /* Don't toggle emulation when region type access is disbled. */
297 if (pci__bar_is_io(pci_hdr, bar_num) &&
298 !pci__io_space_enabled(pci_hdr)) {
299 pci_hdr->bar[bar_num] = value;
300 return;
301 }
302
303 if (pci__bar_is_memory(pci_hdr, bar_num) &&
304 !pci__memory_space_enabled(pci_hdr)) {
305 pci_hdr->bar[bar_num] = value;
306 return;
307 }
308
309 /*
310 * BAR reassignment can be done while device access is enabled and
311 * memory regions for different devices can overlap as long as no access
312 * is made to the overlapping memory regions. To implement BAR
313 * reasignment, we deactivate emulation for the region described by the
314 * BAR value that the guest is changing, we disable emulation for the
315 * regions that overlap with the new one (by scanning through all PCI
316 * devices), we enable emulation for the new BAR value and finally we
317 * enable emulation for all device regions that were overlapping with
318 * the old value.
319 */
320 old_addr = pci__bar_address(pci_hdr, bar_num);
321 new_addr = __pci__bar_address(value);
322 bar_size = pci__bar_size(pci_hdr, bar_num);
323
324 r = pci_deactivate_bar(kvm, pci_hdr, bar_num);
325 if (r < 0)
326 return;
327
328 r = pci_deactivate_bar_regions(kvm, new_addr, bar_size);
329 if (r < 0) {
330 /*
331 * We cannot update the BAR because of an overlapping region
332 * that failed to deactivate emulation, so keep the old BAR
333 * value and re-activate emulation for it.
334 */
335 pci_activate_bar(kvm, pci_hdr, bar_num);
336 return;
337 }
338
339 pci_hdr->bar[bar_num] = value;
340 r = pci_activate_bar(kvm, pci_hdr, bar_num);
341 if (r < 0) {
342 /*
343 * New region cannot be emulated, re-enable the regions that
344 * were overlapping.
345 */
346 pci_activate_bar_regions(kvm, new_addr, bar_size);
347 return;
348 }
349
350 pci_activate_bar_regions(kvm, old_addr, bar_size);
351 }
352
353 /*
354 * Bits that are writable in the config space header.
355 * Write-1-to-clear Status bits are missing since we never set them.
356 */
357 static const u8 pci_config_writable[PCI_STD_HEADER_SIZEOF] = {
358 [PCI_COMMAND] =
359 PCI_COMMAND_IO |
360 PCI_COMMAND_MEMORY |
361 PCI_COMMAND_MASTER |
362 PCI_COMMAND_PARITY,
363 [PCI_COMMAND + 1] =
364 (PCI_COMMAND_SERR |
365 PCI_COMMAND_INTX_DISABLE) >> 8,
366 [PCI_INTERRUPT_LINE] = 0xff,
367 [PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5 + 3] = 0xff,
368 [PCI_CACHE_LINE_SIZE] = 0xff,
369 };
370
pci__config_wr(struct kvm * kvm,union pci_config_address addr,void * data,int size)371 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
372 {
373 void *base;
374 u8 bar;
375 u16 offset;
376 struct pci_device_header *pci_hdr;
377 u8 dev_num = addr.device_number;
378 u32 value = 0, mask = 0;
379
380 if (!pci_device_exists(addr.bus_number, dev_num, 0))
381 return;
382
383 offset = addr.w & PCI_DEV_CFG_MASK;
384 base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
385
386 /* We don't sanity-check capabilities for the moment */
387 if (offset < PCI_STD_HEADER_SIZEOF) {
388 memcpy(&mask, pci_config_writable + offset, size);
389 if (!mask)
390 return;
391 }
392
393 if (pci_hdr->cfg_ops.write)
394 pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size);
395
396 if (offset == PCI_COMMAND) {
397 memcpy(&value, data, size);
398 pci_config_command_wr(kvm, pci_hdr, (u16)value & mask);
399 return;
400 }
401
402 bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32);
403 if (bar < 6) {
404 memcpy(&value, data, size);
405 pci_config_bar_wr(kvm, pci_hdr, bar, value);
406 return;
407 }
408
409 memcpy(base + offset, data, size);
410 }
411
pci__config_rd(struct kvm * kvm,union pci_config_address addr,void * data,int size)412 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
413 {
414 u16 offset;
415 struct pci_device_header *pci_hdr;
416 u8 dev_num = addr.device_number;
417
418 if (pci_device_exists(addr.bus_number, dev_num, 0)) {
419 pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
420 offset = addr.w & PCI_DEV_CFG_MASK;
421
422 if (pci_hdr->cfg_ops.read)
423 pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size);
424
425 memcpy(data, (void *)pci_hdr + offset, size);
426 } else {
427 memset(data, 0xff, size);
428 }
429 }
430
pci_config_mmio_access(struct kvm_cpu * vcpu,u64 addr,u8 * data,u32 len,u8 is_write,void * kvm)431 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
432 u32 len, u8 is_write, void *kvm)
433 {
434 union pci_config_address cfg_addr;
435
436 addr -= KVM_PCI_CFG_AREA;
437 cfg_addr.w = (u32)addr;
438 cfg_addr.enable_bit = 1;
439
440 /*
441 * To prevent some overflows, reject accesses that cross a 4-byte
442 * boundary. The PCIe specification says:
443 *
444 * "Root Complex implementations are not required to support the
445 * generation of Configuration Requests from accesses that cross DW
446 * [4 bytes] boundaries."
447 */
448 if ((addr & 3) + len > 4)
449 return;
450
451 if (is_write)
452 pci__config_wr(kvm, cfg_addr, data, len);
453 else
454 pci__config_rd(kvm, cfg_addr, data, len);
455 }
456
pci__find_dev(u8 dev_num)457 struct pci_device_header *pci__find_dev(u8 dev_num)
458 {
459 struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num);
460
461 if (IS_ERR_OR_NULL(hdr))
462 return NULL;
463
464 return hdr->data;
465 }
466
pci__register_bar_regions(struct kvm * kvm,struct pci_device_header * pci_hdr,bar_activate_fn_t bar_activate_fn,bar_deactivate_fn_t bar_deactivate_fn,void * data)467 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
468 bar_activate_fn_t bar_activate_fn,
469 bar_deactivate_fn_t bar_deactivate_fn, void *data)
470 {
471 int i, r;
472
473 assert(bar_activate_fn && bar_deactivate_fn);
474
475 pci_hdr->bar_activate_fn = bar_activate_fn;
476 pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
477 pci_hdr->data = data;
478
479 for (i = 0; i < 6; i++) {
480 if (!pci_bar_is_implemented(pci_hdr, i))
481 continue;
482
483 assert(!pci_bar_is_active(pci_hdr, i));
484
485 if (pci__bar_is_io(pci_hdr, i) &&
486 pci__io_space_enabled(pci_hdr)) {
487 r = pci_activate_bar(kvm, pci_hdr, i);
488 if (r < 0)
489 return r;
490 }
491
492 if (pci__bar_is_memory(pci_hdr, i) &&
493 pci__memory_space_enabled(pci_hdr)) {
494 r = pci_activate_bar(kvm, pci_hdr, i);
495 if (r < 0)
496 return r;
497 }
498 }
499
500 return 0;
501 }
502
pci__init(struct kvm * kvm)503 int pci__init(struct kvm *kvm)
504 {
505 int r;
506
507 r = kvm__register_pio(kvm, PCI_CONFIG_DATA, 4,
508 pci_config_data_mmio, NULL);
509 if (r < 0)
510 return r;
511 r = kvm__register_pio(kvm, PCI_CONFIG_ADDRESS, 4,
512 pci_config_address_mmio, NULL);
513 if (r < 0)
514 goto err_unregister_data;
515
516 r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false,
517 pci_config_mmio_access, kvm);
518 if (r < 0)
519 goto err_unregister_addr;
520
521 return 0;
522
523 err_unregister_addr:
524 kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS);
525 err_unregister_data:
526 kvm__deregister_pio(kvm, PCI_CONFIG_DATA);
527 return r;
528 }
529 dev_base_init(pci__init);
530
pci__exit(struct kvm * kvm)531 int pci__exit(struct kvm *kvm)
532 {
533 kvm__deregister_pio(kvm, PCI_CONFIG_DATA);
534 kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS);
535 kvm__deregister_mmio(kvm, KVM_PCI_CFG_AREA);
536
537 return 0;
538 }
539 dev_base_exit(pci__exit);
540