xref: /kvmtool/pci.c (revision e69b7663b06e8af9cc2dae16e6ec906a64c3c63d)
1 #include "kvm/devices.h"
2 #include "kvm/pci.h"
3 #include "kvm/ioport.h"
4 #include "kvm/irq.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 
8 #include <linux/err.h>
9 #include <assert.h>
10 
11 static u32 pci_config_address_bits;
12 
13 /* This is within our PCI gap - in an unused area.
14  * Note this is a PCI *bus address*, is used to assign BARs etc.!
15  * (That's why it can still 32bit even with 64bit guests-- 64bit
16  * PCI isn't currently supported.)
17  */
18 static u32 mmio_blocks			= KVM_PCI_MMIO_AREA;
19 static u16 io_port_blocks		= PCI_IOPORT_START;
20 
21 u16 pci_get_io_port_block(u32 size)
22 {
23 	u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE);
24 
25 	io_port_blocks = port + size;
26 	return port;
27 }
28 
29 /*
30  * BARs must be naturally aligned, so enforce this in the allocator.
31  */
32 u32 pci_get_mmio_block(u32 size)
33 {
34 	u32 block = ALIGN(mmio_blocks, size);
35 	mmio_blocks = block + size;
36 	return block;
37 }
38 
39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type)
40 {
41 	u8 pos;
42 	struct pci_cap_hdr *cap;
43 
44 	pci_for_each_cap(pos, cap, hdr) {
45 		if (cap->type == cap_type)
46 			return cap;
47 	}
48 
49 	return NULL;
50 }
51 
52 int pci__assign_irq(struct pci_device_header *pci_hdr)
53 {
54 	/*
55 	 * PCI supports only INTA#,B#,C#,D# per device.
56 	 *
57 	 * A#,B#,C#,D# are allowed for multifunctional devices so stick
58 	 * with A# for our single function devices.
59 	 */
60 	pci_hdr->irq_pin	= 1;
61 	pci_hdr->irq_line	= irq__alloc_line();
62 
63 	if (!pci_hdr->irq_type)
64 		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
65 
66 	return pci_hdr->irq_line;
67 }
68 
69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
70 {
71 	return pci__bar_size(pci_hdr, bar_num);
72 }
73 
74 static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num)
75 {
76 	return  pci_hdr->bar_active[bar_num];
77 }
78 
79 static void *pci_config_address_ptr(u16 port)
80 {
81 	unsigned long offset;
82 	void *base;
83 
84 	offset	= port - PCI_CONFIG_ADDRESS;
85 	base	= &pci_config_address_bits;
86 
87 	return base + offset;
88 }
89 
90 static void pci_config_address_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data,
91 				    u32 len, u8 is_write, void *ptr)
92 {
93 	void *p = pci_config_address_ptr(addr);
94 
95 	if (is_write)
96 		memcpy(p, data, len);
97 	else
98 		memcpy(data, p, len);
99 }
100 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number)
101 {
102 	union pci_config_address pci_config_address;
103 
104 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
105 
106 	if (pci_config_address.bus_number != bus_number)
107 		return false;
108 
109 	if (pci_config_address.function_number != function_number)
110 		return false;
111 
112 	return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number));
113 }
114 
115 static void pci_config_data_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data,
116 				 u32 len, u8 is_write, void *kvm)
117 {
118 	union pci_config_address pci_config_address;
119 
120 	if (len > 4)
121 		len = 4;
122 
123 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
124 	/*
125 	 * If someone accesses PCI configuration space offsets that are not
126 	 * aligned to 4 bytes, it uses ioports to signify that.
127 	 */
128 	pci_config_address.reg_offset = addr - PCI_CONFIG_DATA;
129 
130 	if (is_write)
131 		pci__config_wr(vcpu->kvm, pci_config_address, data, len);
132 	else
133 		pci__config_rd(vcpu->kvm, pci_config_address, data, len);
134 }
135 
136 static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
137 			    int bar_num)
138 {
139 	int r = 0;
140 
141 	if (pci_bar_is_active(pci_hdr, bar_num))
142 		goto out;
143 
144 	r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
145 	if (r < 0) {
146 		pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d",
147 			     bar_num);
148 		goto out;
149 	}
150 	pci_hdr->bar_active[bar_num] = true;
151 
152 out:
153 	return r;
154 }
155 
156 static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr,
157 			      int bar_num)
158 {
159 	int r = 0;
160 
161 	if (!pci_bar_is_active(pci_hdr, bar_num))
162 		goto out;
163 
164 	r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data);
165 	if (r < 0) {
166 		pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d",
167 			     bar_num);
168 		goto out;
169 	}
170 	pci_hdr->bar_active[bar_num] = false;
171 
172 out:
173 	return r;
174 }
175 
176 static void pci_config_command_wr(struct kvm *kvm,
177 				  struct pci_device_header *pci_hdr,
178 				  u16 new_command)
179 {
180 	int i;
181 	bool toggle_io, toggle_mem;
182 
183 	toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO;
184 	toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY;
185 
186 	for (i = 0; i < 6; i++) {
187 		if (!pci_bar_is_implemented(pci_hdr, i))
188 			continue;
189 
190 		if (toggle_io && pci__bar_is_io(pci_hdr, i)) {
191 			if (__pci__io_space_enabled(new_command))
192 				pci_activate_bar(kvm, pci_hdr, i);
193 			else
194 				pci_deactivate_bar(kvm, pci_hdr, i);
195 		}
196 
197 		if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) {
198 			if (__pci__memory_space_enabled(new_command))
199 				pci_activate_bar(kvm, pci_hdr, i);
200 			else
201 				pci_deactivate_bar(kvm, pci_hdr, i);
202 		}
203 	}
204 
205 	pci_hdr->command = new_command;
206 }
207 
208 static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size)
209 {
210 	struct device_header *dev_hdr;
211 	struct pci_device_header *tmp_hdr;
212 	u32 tmp_start, tmp_size;
213 	int i, r;
214 
215 	dev_hdr = device__first_dev(DEVICE_BUS_PCI);
216 	while (dev_hdr) {
217 		tmp_hdr = dev_hdr->data;
218 		for (i = 0; i < 6; i++) {
219 			if (!pci_bar_is_implemented(tmp_hdr, i))
220 				continue;
221 
222 			tmp_start = pci__bar_address(tmp_hdr, i);
223 			tmp_size = pci__bar_size(tmp_hdr, i);
224 			if (tmp_start + tmp_size <= start ||
225 			    tmp_start >= start + size)
226 				continue;
227 
228 			if (activate)
229 				r = pci_activate_bar(kvm, tmp_hdr, i);
230 			else
231 				r = pci_deactivate_bar(kvm, tmp_hdr, i);
232 			if (r < 0)
233 				return r;
234 		}
235 		dev_hdr = device__next_dev(dev_hdr);
236 	}
237 
238 	return 0;
239 }
240 
241 static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size)
242 {
243 	return pci_toggle_bar_regions(true, kvm, start, size);
244 }
245 
246 static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size)
247 {
248 	return pci_toggle_bar_regions(false, kvm, start, size);
249 }
250 
251 static void pci_config_bar_wr(struct kvm *kvm,
252 			      struct pci_device_header *pci_hdr, int bar_num,
253 			      u32 value)
254 {
255 	u32 old_addr, new_addr, bar_size;
256 	u32 mask;
257 	int r;
258 
259 	if (pci__bar_is_io(pci_hdr, bar_num))
260 		mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
261 	else
262 		mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
263 
264 	/*
265 	 * If the kernel masks the BAR, it will expect to find the size of the
266 	 * BAR there next time it reads from it. After the kernel reads the
267 	 * size, it will write the address back.
268 	 *
269 	 * According to the PCI local bus specification REV 3.0: The number of
270 	 * upper bits that a device actually implements depends on how much of
271 	 * the address space the device will respond to. A device that wants a 1
272 	 * MB memory address space (using a 32-bit base address register) would
273 	 * build the top 12 bits of the address register, hardwiring the other
274 	 * bits to 0.
275 	 *
276 	 * Furthermore, software can determine how much address space the device
277 	 * requires by writing a value of all 1's to the register and then
278 	 * reading the value back. The device will return 0's in all don't-care
279 	 * address bits, effectively specifying the address space required.
280 	 *
281 	 * Software computes the size of the address space with the formula
282 	 * S =  ~B + 1, where S is the memory size and B is the value read from
283 	 * the BAR. This means that the BAR value that kvmtool should return is
284 	 * B = ~(S - 1).
285 	 */
286 	if (value == 0xffffffff) {
287 		value = ~(pci__bar_size(pci_hdr, bar_num) - 1);
288 		/* Preserve the special bits. */
289 		value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
290 		pci_hdr->bar[bar_num] = value;
291 		return;
292 	}
293 
294 	value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask);
295 
296 	/* Don't toggle emulation when region type access is disbled. */
297 	if (pci__bar_is_io(pci_hdr, bar_num) &&
298 	    !pci__io_space_enabled(pci_hdr)) {
299 		pci_hdr->bar[bar_num] = value;
300 		return;
301 	}
302 
303 	if (pci__bar_is_memory(pci_hdr, bar_num) &&
304 	    !pci__memory_space_enabled(pci_hdr)) {
305 		pci_hdr->bar[bar_num] = value;
306 		return;
307 	}
308 
309 	/*
310 	 * BAR reassignment can be done while device access is enabled and
311 	 * memory regions for different devices can overlap as long as no access
312 	 * is made to the overlapping memory regions. To implement BAR
313 	 * reasignment, we deactivate emulation for the region described by the
314 	 * BAR value that the guest is changing, we disable emulation for the
315 	 * regions that overlap with the new one (by scanning through all PCI
316 	 * devices), we enable emulation for the new BAR value and finally we
317 	 * enable emulation for all device regions that were overlapping with
318 	 * the old value.
319 	 */
320 	old_addr = pci__bar_address(pci_hdr, bar_num);
321 	new_addr = __pci__bar_address(value);
322 	bar_size = pci__bar_size(pci_hdr, bar_num);
323 
324 	r = pci_deactivate_bar(kvm, pci_hdr, bar_num);
325 	if (r < 0)
326 		return;
327 
328 	r = pci_deactivate_bar_regions(kvm, new_addr, bar_size);
329 	if (r < 0) {
330 		/*
331 		 * We cannot update the BAR because of an overlapping region
332 		 * that failed to deactivate emulation, so keep the old BAR
333 		 * value and re-activate emulation for it.
334 		 */
335 		pci_activate_bar(kvm, pci_hdr, bar_num);
336 		return;
337 	}
338 
339 	pci_hdr->bar[bar_num] = value;
340 	r = pci_activate_bar(kvm, pci_hdr, bar_num);
341 	if (r < 0) {
342 		/*
343 		 * New region cannot be emulated, re-enable the regions that
344 		 * were overlapping.
345 		 */
346 		pci_activate_bar_regions(kvm, new_addr, bar_size);
347 		return;
348 	}
349 
350 	pci_activate_bar_regions(kvm, old_addr, bar_size);
351 }
352 
353 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
354 {
355 	void *base;
356 	u8 bar;
357 	u16 offset;
358 	struct pci_device_header *pci_hdr;
359 	u8 dev_num = addr.device_number;
360 	u32 value = 0;
361 
362 	if (!pci_device_exists(addr.bus_number, dev_num, 0))
363 		return;
364 
365 	offset = addr.w & PCI_DEV_CFG_MASK;
366 	base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
367 
368 	if (pci_hdr->cfg_ops.write)
369 		pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size);
370 
371 	/*
372 	 * legacy hack: ignore writes to uninitialized regions (e.g. ROM BAR).
373 	 * Not very nice but has been working so far.
374 	 */
375 	if (*(u32 *)(base + offset) == 0)
376 		return;
377 
378 	if (offset == PCI_COMMAND) {
379 		memcpy(&value, data, size);
380 		pci_config_command_wr(kvm, pci_hdr, (u16)value);
381 		return;
382 	}
383 
384 	bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32);
385 	if (bar < 6) {
386 		memcpy(&value, data, size);
387 		pci_config_bar_wr(kvm, pci_hdr, bar, value);
388 		return;
389 	}
390 
391 	memcpy(base + offset, data, size);
392 }
393 
394 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
395 {
396 	u16 offset;
397 	struct pci_device_header *pci_hdr;
398 	u8 dev_num = addr.device_number;
399 
400 	if (pci_device_exists(addr.bus_number, dev_num, 0)) {
401 		pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
402 		offset = addr.w & PCI_DEV_CFG_MASK;
403 
404 		if (pci_hdr->cfg_ops.read)
405 			pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size);
406 
407 		memcpy(data, (void *)pci_hdr + offset, size);
408 	} else {
409 		memset(data, 0xff, size);
410 	}
411 }
412 
413 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
414 				   u32 len, u8 is_write, void *kvm)
415 {
416 	union pci_config_address cfg_addr;
417 
418 	addr			-= KVM_PCI_CFG_AREA;
419 	cfg_addr.w		= (u32)addr;
420 	cfg_addr.enable_bit	= 1;
421 
422 	if (len > 4)
423 		len = 4;
424 
425 	if (is_write)
426 		pci__config_wr(kvm, cfg_addr, data, len);
427 	else
428 		pci__config_rd(kvm, cfg_addr, data, len);
429 }
430 
431 struct pci_device_header *pci__find_dev(u8 dev_num)
432 {
433 	struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num);
434 
435 	if (IS_ERR_OR_NULL(hdr))
436 		return NULL;
437 
438 	return hdr->data;
439 }
440 
441 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
442 			      bar_activate_fn_t bar_activate_fn,
443 			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
444 {
445 	int i, r;
446 
447 	assert(bar_activate_fn && bar_deactivate_fn);
448 
449 	pci_hdr->bar_activate_fn = bar_activate_fn;
450 	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
451 	pci_hdr->data = data;
452 
453 	for (i = 0; i < 6; i++) {
454 		if (!pci_bar_is_implemented(pci_hdr, i))
455 			continue;
456 
457 		assert(!pci_bar_is_active(pci_hdr, i));
458 
459 		if (pci__bar_is_io(pci_hdr, i) &&
460 		    pci__io_space_enabled(pci_hdr)) {
461 			r = pci_activate_bar(kvm, pci_hdr, i);
462 			if (r < 0)
463 				return r;
464 		}
465 
466 		if (pci__bar_is_memory(pci_hdr, i) &&
467 		    pci__memory_space_enabled(pci_hdr)) {
468 			r = pci_activate_bar(kvm, pci_hdr, i);
469 			if (r < 0)
470 				return r;
471 		}
472 	}
473 
474 	return 0;
475 }
476 
477 int pci__init(struct kvm *kvm)
478 {
479 	int r;
480 
481 	r = kvm__register_pio(kvm, PCI_CONFIG_DATA, 4,
482 				 pci_config_data_mmio, NULL);
483 	if (r < 0)
484 		return r;
485 	r = kvm__register_pio(kvm, PCI_CONFIG_ADDRESS, 4,
486 				 pci_config_address_mmio, NULL);
487 	if (r < 0)
488 		goto err_unregister_data;
489 
490 	r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false,
491 			       pci_config_mmio_access, kvm);
492 	if (r < 0)
493 		goto err_unregister_addr;
494 
495 	return 0;
496 
497 err_unregister_addr:
498 	kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS);
499 err_unregister_data:
500 	kvm__deregister_pio(kvm, PCI_CONFIG_DATA);
501 	return r;
502 }
503 dev_base_init(pci__init);
504 
505 int pci__exit(struct kvm *kvm)
506 {
507 	kvm__deregister_pio(kvm, PCI_CONFIG_DATA);
508 	kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS);
509 
510 	return 0;
511 }
512 dev_base_exit(pci__exit);
513