xref: /kvmtool/pci.c (revision 465edc9d0fab23f46e9b83af1403577ba98e1937)
1 #include "kvm/devices.h"
2 #include "kvm/pci.h"
3 #include "kvm/ioport.h"
4 #include "kvm/irq.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 
8 #include <linux/err.h>
9 #include <assert.h>
10 
11 static u32 pci_config_address_bits;
12 
13 /* This is within our PCI gap - in an unused area.
14  * Note this is a PCI *bus address*, is used to assign BARs etc.!
15  * (That's why it can still 32bit even with 64bit guests-- 64bit
16  * PCI isn't currently supported.)
17  */
18 static u32 mmio_blocks			= KVM_PCI_MMIO_AREA;
19 static u16 io_port_blocks		= PCI_IOPORT_START;
20 
21 u16 pci_get_io_port_block(u32 size)
22 {
23 	u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE);
24 
25 	io_port_blocks = port + size;
26 	return port;
27 }
28 
29 /*
30  * BARs must be naturally aligned, so enforce this in the allocator.
31  */
32 u32 pci_get_mmio_block(u32 size)
33 {
34 	u32 block = ALIGN(mmio_blocks, size);
35 	mmio_blocks = block + size;
36 	return block;
37 }
38 
39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type)
40 {
41 	u8 pos;
42 	struct pci_cap_hdr *cap;
43 
44 	pci_for_each_cap(pos, cap, hdr) {
45 		if (cap->type == cap_type)
46 			return cap;
47 	}
48 
49 	return NULL;
50 }
51 
52 int pci__assign_irq(struct pci_device_header *pci_hdr)
53 {
54 	/*
55 	 * PCI supports only INTA#,B#,C#,D# per device.
56 	 *
57 	 * A#,B#,C#,D# are allowed for multifunctional devices so stick
58 	 * with A# for our single function devices.
59 	 */
60 	pci_hdr->irq_pin	= 1;
61 	pci_hdr->irq_line	= irq__alloc_line();
62 
63 	if (!pci_hdr->irq_type)
64 		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
65 
66 	return pci_hdr->irq_line;
67 }
68 
69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
70 {
71 	return pci__bar_size(pci_hdr, bar_num);
72 }
73 
74 static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num)
75 {
76 	return  pci_hdr->bar_active[bar_num];
77 }
78 
79 static void *pci_config_address_ptr(u16 port)
80 {
81 	unsigned long offset;
82 	void *base;
83 
84 	offset	= port - PCI_CONFIG_ADDRESS;
85 	base	= &pci_config_address_bits;
86 
87 	return base + offset;
88 }
89 
90 static bool pci_config_address_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
91 {
92 	void *p = pci_config_address_ptr(port);
93 
94 	memcpy(p, data, size);
95 
96 	return true;
97 }
98 
99 static bool pci_config_address_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
100 {
101 	void *p = pci_config_address_ptr(port);
102 
103 	memcpy(data, p, size);
104 
105 	return true;
106 }
107 
108 static struct ioport_operations pci_config_address_ops = {
109 	.io_in	= pci_config_address_in,
110 	.io_out	= pci_config_address_out,
111 };
112 
113 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number)
114 {
115 	union pci_config_address pci_config_address;
116 
117 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
118 
119 	if (pci_config_address.bus_number != bus_number)
120 		return false;
121 
122 	if (pci_config_address.function_number != function_number)
123 		return false;
124 
125 	return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number));
126 }
127 
128 static bool pci_config_data_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
129 {
130 	union pci_config_address pci_config_address;
131 
132 	if (size > 4)
133 		size = 4;
134 
135 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
136 	/*
137 	 * If someone accesses PCI configuration space offsets that are not
138 	 * aligned to 4 bytes, it uses ioports to signify that.
139 	 */
140 	pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
141 
142 	pci__config_wr(vcpu->kvm, pci_config_address, data, size);
143 
144 	return true;
145 }
146 
147 static bool pci_config_data_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
148 {
149 	union pci_config_address pci_config_address;
150 
151 	if (size > 4)
152 		size = 4;
153 
154 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
155 	/*
156 	 * If someone accesses PCI configuration space offsets that are not
157 	 * aligned to 4 bytes, it uses ioports to signify that.
158 	 */
159 	pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
160 
161 	pci__config_rd(vcpu->kvm, pci_config_address, data, size);
162 
163 	return true;
164 }
165 
166 static struct ioport_operations pci_config_data_ops = {
167 	.io_in	= pci_config_data_in,
168 	.io_out	= pci_config_data_out,
169 };
170 
171 static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
172 			    int bar_num)
173 {
174 	int r = 0;
175 
176 	if (pci_bar_is_active(pci_hdr, bar_num))
177 		goto out;
178 
179 	r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
180 	if (r < 0) {
181 		pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d",
182 			     bar_num);
183 		goto out;
184 	}
185 	pci_hdr->bar_active[bar_num] = true;
186 
187 out:
188 	return r;
189 }
190 
191 static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
192 			      int bar_num)
193 {
194 	int r = 0;
195 
196 	if (!pci_bar_is_active(pci_hdr, bar_num))
197 		goto out;
198 
199 	r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
200 	if (r < 0) {
201 		pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d",
202 			     bar_num);
203 		goto out;
204 	}
205 	pci_hdr->bar_active[bar_num] = false;
206 
207 out:
208 	return r;
209 }
210 
211 static void pci_config_command_wr(struct kvm *kvm,
212 				  struct pci_device_header *pci_hdr,
213 				  u16 new_command)
214 {
215 	int i;
216 	bool toggle_io, toggle_mem;
217 
218 	toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO;
219 	toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY;
220 
221 	for (i = 0; i < 6; i++) {
222 		if (!pci_bar_is_implemented(pci_hdr, i))
223 			continue;
224 
225 		if (toggle_io && pci__bar_is_io(pci_hdr, i)) {
226 			if (__pci__io_space_enabled(new_command))
227 				pci_activate_bar(kvm, pci_hdr, i);
228 			else
229 				pci_deactivate_bar(kvm, pci_hdr, i);
230 		}
231 
232 		if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) {
233 			if (__pci__memory_space_enabled(new_command))
234 				pci_activate_bar(kvm, pci_hdr, i);
235 			else
236 				pci_deactivate_bar(kvm, pci_hdr, i);
237 		}
238 	}
239 
240 	pci_hdr->command = new_command;
241 }
242 
243 static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size)
244 {
245 	struct device_header *dev_hdr;
246 	struct pci_device_header *tmp_hdr;
247 	u32 tmp_start, tmp_size;
248 	int i, r;
249 
250 	dev_hdr = device__first_dev(DEVICE_BUS_PCI);
251 	while (dev_hdr) {
252 		tmp_hdr = dev_hdr->data;
253 		for (i = 0; i < 6; i++) {
254 			if (!pci_bar_is_implemented(tmp_hdr, i))
255 				continue;
256 
257 			tmp_start = pci__bar_address(tmp_hdr, i);
258 			tmp_size = pci__bar_size(tmp_hdr, i);
259 			if (tmp_start + tmp_size <= start ||
260 			    tmp_start >= start + size)
261 				continue;
262 
263 			if (activate)
264 				r = pci_activate_bar(kvm, tmp_hdr, i);
265 			else
266 				r = pci_deactivate_bar(kvm, tmp_hdr, i);
267 			if (r < 0)
268 				return r;
269 		}
270 		dev_hdr = device__next_dev(dev_hdr);
271 	}
272 
273 	return 0;
274 }
275 
276 static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size)
277 {
278 	return pci_toggle_bar_regions(true, kvm, start, size);
279 }
280 
281 static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size)
282 {
283 	return pci_toggle_bar_regions(false, kvm, start, size);
284 }
285 
286 static void pci_config_bar_wr(struct kvm *kvm,
287 			      struct pci_device_header *pci_hdr, int bar_num,
288 			      u32 value)
289 {
290 	u32 old_addr, new_addr, bar_size;
291 	u32 mask;
292 	int r;
293 
294 	if (pci__bar_is_io(pci_hdr, bar_num))
295 		mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
296 	else
297 		mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
298 
299 	/*
300 	 * If the kernel masks the BAR, it will expect to find the size of the
301 	 * BAR there next time it reads from it. After the kernel reads the
302 	 * size, it will write the address back.
303 	 *
304 	 * According to the PCI local bus specification REV 3.0: The number of
305 	 * upper bits that a device actually implements depends on how much of
306 	 * the address space the device will respond to. A device that wants a 1
307 	 * MB memory address space (using a 32-bit base address register) would
308 	 * build the top 12 bits of the address register, hardwiring the other
309 	 * bits to 0.
310 	 *
311 	 * Furthermore, software can determine how much address space the device
312 	 * requires by writing a value of all 1's to the register and then
313 	 * reading the value back. The device will return 0's in all don't-care
314 	 * address bits, effectively specifying the address space required.
315 	 *
316 	 * Software computes the size of the address space with the formula
317 	 * S =  ~B + 1, where S is the memory size and B is the value read from
318 	 * the BAR. This means that the BAR value that kvmtool should return is
319 	 * B = ~(S - 1).
320 	 */
321 	if (value == 0xffffffff) {
322 		value = ~(pci__bar_size(pci_hdr, bar_num) - 1);
323 		/* Preserve the special bits. */
324 		value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
325 		pci_hdr->bar[bar_num] = value;
326 		return;
327 	}
328 
329 	value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
330 
331 	/* Don't toggle emulation when region type access is disbled. */
332 	if (pci__bar_is_io(pci_hdr, bar_num) &&
333 	    !pci__io_space_enabled(pci_hdr)) {
334 		pci_hdr->bar[bar_num] = value;
335 		return;
336 	}
337 
338 	if (pci__bar_is_memory(pci_hdr, bar_num) &&
339 	    !pci__memory_space_enabled(pci_hdr)) {
340 		pci_hdr->bar[bar_num] = value;
341 		return;
342 	}
343 
344 	/*
345 	 * BAR reassignment can be done while device access is enabled and
346 	 * memory regions for different devices can overlap as long as no access
347 	 * is made to the overlapping memory regions. To implement BAR
348 	 * reasignment, we deactivate emulation for the region described by the
349 	 * BAR value that the guest is changing, we disable emulation for the
350 	 * regions that overlap with the new one (by scanning through all PCI
351 	 * devices), we enable emulation for the new BAR value and finally we
352 	 * enable emulation for all device regions that were overlapping with
353 	 * the old value.
354 	 */
355 	old_addr = pci__bar_address(pci_hdr, bar_num);
356 	new_addr = __pci__bar_address(value);
357 	bar_size = pci__bar_size(pci_hdr, bar_num);
358 
359 	r = pci_deactivate_bar(kvm, pci_hdr, bar_num);
360 	if (r < 0)
361 		return;
362 
363 	r = pci_deactivate_bar_regions(kvm, new_addr, bar_size);
364 	if (r < 0) {
365 		/*
366 		 * We cannot update the BAR because of an overlapping region
367 		 * that failed to deactivate emulation, so keep the old BAR
368 		 * value and re-activate emulation for it.
369 		 */
370 		pci_activate_bar(kvm, pci_hdr, bar_num);
371 		return;
372 	}
373 
374 	pci_hdr->bar[bar_num] = value;
375 	r = pci_activate_bar(kvm, pci_hdr, bar_num);
376 	if (r < 0) {
377 		/*
378 		 * New region cannot be emulated, re-enable the regions that
379 		 * were overlapping.
380 		 */
381 		pci_activate_bar_regions(kvm, new_addr, bar_size);
382 		return;
383 	}
384 
385 	pci_activate_bar_regions(kvm, old_addr, bar_size);
386 }
387 
388 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
389 {
390 	void *base;
391 	u8 bar, offset;
392 	struct pci_device_header *pci_hdr;
393 	u8 dev_num = addr.device_number;
394 	u32 value = 0;
395 
396 	if (!pci_device_exists(addr.bus_number, dev_num, 0))
397 		return;
398 
399 	offset = addr.w & PCI_DEV_CFG_MASK;
400 	base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
401 
402 	if (pci_hdr->cfg_ops.write)
403 		pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size);
404 
405 	/*
406 	 * legacy hack: ignore writes to uninitialized regions (e.g. ROM BAR).
407 	 * Not very nice but has been working so far.
408 	 */
409 	if (*(u32 *)(base + offset) == 0)
410 		return;
411 
412 	if (offset == PCI_COMMAND) {
413 		memcpy(&value, data, size);
414 		pci_config_command_wr(kvm, pci_hdr, (u16)value);
415 		return;
416 	}
417 
418 	bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32);
419 	if (bar < 6) {
420 		memcpy(&value, data, size);
421 		pci_config_bar_wr(kvm, pci_hdr, bar, value);
422 		return;
423 	}
424 
425 	memcpy(base + offset, data, size);
426 }
427 
428 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
429 {
430 	u8 offset;
431 	struct pci_device_header *pci_hdr;
432 	u8 dev_num = addr.device_number;
433 
434 	if (pci_device_exists(addr.bus_number, dev_num, 0)) {
435 		pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
436 		offset = addr.w & PCI_DEV_CFG_MASK;
437 
438 		if (pci_hdr->cfg_ops.read)
439 			pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size);
440 
441 		memcpy(data, (void *)pci_hdr + offset, size);
442 	} else {
443 		memset(data, 0xff, size);
444 	}
445 }
446 
447 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
448 				   u32 len, u8 is_write, void *kvm)
449 {
450 	union pci_config_address cfg_addr;
451 
452 	addr			-= KVM_PCI_CFG_AREA;
453 	cfg_addr.w		= (u32)addr;
454 	cfg_addr.enable_bit	= 1;
455 
456 	if (len > 4)
457 		len = 4;
458 
459 	if (is_write)
460 		pci__config_wr(kvm, cfg_addr, data, len);
461 	else
462 		pci__config_rd(kvm, cfg_addr, data, len);
463 }
464 
465 struct pci_device_header *pci__find_dev(u8 dev_num)
466 {
467 	struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num);
468 
469 	if (IS_ERR_OR_NULL(hdr))
470 		return NULL;
471 
472 	return hdr->data;
473 }
474 
475 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
476 			      bar_activate_fn_t bar_activate_fn,
477 			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
478 {
479 	int i, r;
480 
481 	assert(bar_activate_fn && bar_deactivate_fn);
482 
483 	pci_hdr->bar_activate_fn = bar_activate_fn;
484 	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
485 	pci_hdr->data = data;
486 
487 	for (i = 0; i < 6; i++) {
488 		if (!pci_bar_is_implemented(pci_hdr, i))
489 			continue;
490 
491 		assert(!pci_bar_is_active(pci_hdr, i));
492 
493 		if (pci__bar_is_io(pci_hdr, i) &&
494 		    pci__io_space_enabled(pci_hdr)) {
495 			r = pci_activate_bar(kvm, pci_hdr, i);
496 			if (r < 0)
497 				return r;
498 		}
499 
500 		if (pci__bar_is_memory(pci_hdr, i) &&
501 		    pci__memory_space_enabled(pci_hdr)) {
502 			r = pci_activate_bar(kvm, pci_hdr, i);
503 			if (r < 0)
504 				return r;
505 		}
506 	}
507 
508 	return 0;
509 }
510 
511 int pci__init(struct kvm *kvm)
512 {
513 	int r;
514 
515 	r = ioport__register(kvm, PCI_CONFIG_DATA + 0, &pci_config_data_ops, 4, NULL);
516 	if (r < 0)
517 		return r;
518 
519 	r = ioport__register(kvm, PCI_CONFIG_ADDRESS + 0, &pci_config_address_ops, 4, NULL);
520 	if (r < 0)
521 		goto err_unregister_data;
522 
523 	r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false,
524 			       pci_config_mmio_access, kvm);
525 	if (r < 0)
526 		goto err_unregister_addr;
527 
528 	return 0;
529 
530 err_unregister_addr:
531 	ioport__unregister(kvm, PCI_CONFIG_ADDRESS);
532 err_unregister_data:
533 	ioport__unregister(kvm, PCI_CONFIG_DATA);
534 	return r;
535 }
536 dev_base_init(pci__init);
537 
538 int pci__exit(struct kvm *kvm)
539 {
540 	ioport__unregister(kvm, PCI_CONFIG_DATA);
541 	ioport__unregister(kvm, PCI_CONFIG_ADDRESS);
542 
543 	return 0;
544 }
545 dev_base_exit(pci__exit);
546