xref: /kvmtool/pci.c (revision 46e04130d264261fde1e016c83694b10e62c651f)
1 #include "kvm/devices.h"
2 #include "kvm/pci.h"
3 #include "kvm/ioport.h"
4 #include "kvm/irq.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 
8 #include <linux/err.h>
9 #include <assert.h>
10 
11 static u32 pci_config_address_bits;
12 
13 /* This is within our PCI gap - in an unused area.
14  * Note this is a PCI *bus address*, is used to assign BARs etc.!
15  * (That's why it can still 32bit even with 64bit guests-- 64bit
16  * PCI isn't currently supported.)
17  */
18 static u32 mmio_blocks			= KVM_PCI_MMIO_AREA;
19 static u16 io_port_blocks		= PCI_IOPORT_START;
20 
21 u16 pci_get_io_port_block(u32 size)
22 {
23 	u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE);
24 
25 	io_port_blocks = port + size;
26 	return port;
27 }
28 
29 /*
30  * BARs must be naturally aligned, so enforce this in the allocator.
31  */
32 u32 pci_get_mmio_block(u32 size)
33 {
34 	u32 block = ALIGN(mmio_blocks, size);
35 	mmio_blocks = block + size;
36 	return block;
37 }
38 
39 void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type)
40 {
41 	u8 pos;
42 	struct pci_cap_hdr *cap;
43 
44 	pci_for_each_cap(pos, cap, hdr) {
45 		if (cap->type == cap_type)
46 			return cap;
47 	}
48 
49 	return NULL;
50 }
51 
52 int pci__assign_irq(struct pci_device_header *pci_hdr)
53 {
54 	/*
55 	 * PCI supports only INTA#,B#,C#,D# per device.
56 	 *
57 	 * A#,B#,C#,D# are allowed for multifunctional devices so stick
58 	 * with A# for our single function devices.
59 	 */
60 	pci_hdr->irq_pin	= 1;
61 	pci_hdr->irq_line	= irq__alloc_line();
62 
63 	if (!pci_hdr->irq_type)
64 		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;
65 
66 	return pci_hdr->irq_line;
67 }
68 
69 static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num)
70 {
71 	return pci__bar_size(pci_hdr, bar_num);
72 }
73 
74 static void *pci_config_address_ptr(u16 port)
75 {
76 	unsigned long offset;
77 	void *base;
78 
79 	offset	= port - PCI_CONFIG_ADDRESS;
80 	base	= &pci_config_address_bits;
81 
82 	return base + offset;
83 }
84 
85 static bool pci_config_address_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
86 {
87 	void *p = pci_config_address_ptr(port);
88 
89 	memcpy(p, data, size);
90 
91 	return true;
92 }
93 
94 static bool pci_config_address_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
95 {
96 	void *p = pci_config_address_ptr(port);
97 
98 	memcpy(data, p, size);
99 
100 	return true;
101 }
102 
103 static struct ioport_operations pci_config_address_ops = {
104 	.io_in	= pci_config_address_in,
105 	.io_out	= pci_config_address_out,
106 };
107 
108 static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number)
109 {
110 	union pci_config_address pci_config_address;
111 
112 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
113 
114 	if (pci_config_address.bus_number != bus_number)
115 		return false;
116 
117 	if (pci_config_address.function_number != function_number)
118 		return false;
119 
120 	return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number));
121 }
122 
123 static bool pci_config_data_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
124 {
125 	union pci_config_address pci_config_address;
126 
127 	if (size > 4)
128 		size = 4;
129 
130 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
131 	/*
132 	 * If someone accesses PCI configuration space offsets that are not
133 	 * aligned to 4 bytes, it uses ioports to signify that.
134 	 */
135 	pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
136 
137 	pci__config_wr(vcpu->kvm, pci_config_address, data, size);
138 
139 	return true;
140 }
141 
142 static bool pci_config_data_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
143 {
144 	union pci_config_address pci_config_address;
145 
146 	if (size > 4)
147 		size = 4;
148 
149 	pci_config_address.w = ioport__read32(&pci_config_address_bits);
150 	/*
151 	 * If someone accesses PCI configuration space offsets that are not
152 	 * aligned to 4 bytes, it uses ioports to signify that.
153 	 */
154 	pci_config_address.reg_offset = port - PCI_CONFIG_DATA;
155 
156 	pci__config_rd(vcpu->kvm, pci_config_address, data, size);
157 
158 	return true;
159 }
160 
161 static struct ioport_operations pci_config_data_ops = {
162 	.io_in	= pci_config_data_in,
163 	.io_out	= pci_config_data_out,
164 };
165 
166 static void pci_config_command_wr(struct kvm *kvm,
167 				  struct pci_device_header *pci_hdr,
168 				  u16 new_command)
169 {
170 	int i;
171 	bool toggle_io, toggle_mem;
172 
173 	toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO;
174 	toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY;
175 
176 	for (i = 0; i < 6; i++) {
177 		if (!pci_bar_is_implemented(pci_hdr, i))
178 			continue;
179 
180 		if (toggle_io && pci__bar_is_io(pci_hdr, i)) {
181 			if (__pci__io_space_enabled(new_command))
182 				pci_hdr->bar_activate_fn(kvm, pci_hdr, i,
183 							 pci_hdr->data);
184 			else
185 				pci_hdr->bar_deactivate_fn(kvm, pci_hdr, i,
186 							   pci_hdr->data);
187 		}
188 
189 		if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) {
190 			if (__pci__memory_space_enabled(new_command))
191 				pci_hdr->bar_activate_fn(kvm, pci_hdr, i,
192 							 pci_hdr->data);
193 			else
194 				pci_hdr->bar_deactivate_fn(kvm, pci_hdr, i,
195 							   pci_hdr->data);
196 		}
197 	}
198 
199 	pci_hdr->command = new_command;
200 }
201 
202 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size)
203 {
204 	void *base;
205 	u8 bar, offset;
206 	struct pci_device_header *pci_hdr;
207 	u8 dev_num = addr.device_number;
208 	u32 value = 0;
209 	u32 mask;
210 
211 	if (!pci_device_exists(addr.bus_number, dev_num, 0))
212 		return;
213 
214 	offset = addr.w & PCI_DEV_CFG_MASK;
215 	base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
216 
217 	if (pci_hdr->cfg_ops.write)
218 		pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size);
219 
220 	/*
221 	 * legacy hack: ignore writes to uninitialized regions (e.g. ROM BAR).
222 	 * Not very nice but has been working so far.
223 	 */
224 	if (*(u32 *)(base + offset) == 0)
225 		return;
226 
227 	if (offset == PCI_COMMAND) {
228 		memcpy(&value, data, size);
229 		pci_config_command_wr(kvm, pci_hdr, (u16)value);
230 		return;
231 	}
232 
233 	bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32);
234 
235 	/*
236 	 * If the kernel masks the BAR, it will expect to find the size of the
237 	 * BAR there next time it reads from it. After the kernel reads the
238 	 * size, it will write the address back.
239 	 */
240 	if (bar < 6) {
241 		if (pci__bar_is_io(pci_hdr, bar))
242 			mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
243 		else
244 			mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
245 		/*
246 		 * According to the PCI local bus specification REV 3.0:
247 		 * The number of upper bits that a device actually implements
248 		 * depends on how much of the address space the device will
249 		 * respond to. A device that wants a 1 MB memory address space
250 		 * (using a 32-bit base address register) would build the top
251 		 * 12 bits of the address register, hardwiring the other bits
252 		 * to 0.
253 		 *
254 		 * Furthermore, software can determine how much address space
255 		 * the device requires by writing a value of all 1's to the
256 		 * register and then reading the value back. The device will
257 		 * return 0's in all don't-care address bits, effectively
258 		 * specifying the address space required.
259 		 *
260 		 * Software computes the size of the address space with the
261 		 * formula S = ~B + 1, where S is the memory size and B is the
262 		 * value read from the BAR. This means that the BAR value that
263 		 * kvmtool should return is B = ~(S - 1).
264 		 */
265 		memcpy(&value, data, size);
266 		if (value == 0xffffffff)
267 			value = ~(pci__bar_size(pci_hdr, bar) - 1);
268 		/* Preserve the special bits. */
269 		value = (value & mask) | (pci_hdr->bar[bar] & ~mask);
270 		memcpy(base + offset, &value, size);
271 	} else {
272 		memcpy(base + offset, data, size);
273 	}
274 }
275 
276 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size)
277 {
278 	u8 offset;
279 	struct pci_device_header *pci_hdr;
280 	u8 dev_num = addr.device_number;
281 
282 	if (pci_device_exists(addr.bus_number, dev_num, 0)) {
283 		pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data;
284 		offset = addr.w & PCI_DEV_CFG_MASK;
285 
286 		if (pci_hdr->cfg_ops.read)
287 			pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size);
288 
289 		memcpy(data, (void *)pci_hdr + offset, size);
290 	} else {
291 		memset(data, 0xff, size);
292 	}
293 }
294 
295 static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data,
296 				   u32 len, u8 is_write, void *kvm)
297 {
298 	union pci_config_address cfg_addr;
299 
300 	addr			-= KVM_PCI_CFG_AREA;
301 	cfg_addr.w		= (u32)addr;
302 	cfg_addr.enable_bit	= 1;
303 
304 	if (len > 4)
305 		len = 4;
306 
307 	if (is_write)
308 		pci__config_wr(kvm, cfg_addr, data, len);
309 	else
310 		pci__config_rd(kvm, cfg_addr, data, len);
311 }
312 
313 struct pci_device_header *pci__find_dev(u8 dev_num)
314 {
315 	struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num);
316 
317 	if (IS_ERR_OR_NULL(hdr))
318 		return NULL;
319 
320 	return hdr->data;
321 }
322 
323 int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr,
324 			      bar_activate_fn_t bar_activate_fn,
325 			      bar_deactivate_fn_t bar_deactivate_fn, void *data)
326 {
327 	int i, r;
328 
329 	assert(bar_activate_fn && bar_deactivate_fn);
330 
331 	pci_hdr->bar_activate_fn = bar_activate_fn;
332 	pci_hdr->bar_deactivate_fn = bar_deactivate_fn;
333 	pci_hdr->data = data;
334 
335 	for (i = 0; i < 6; i++) {
336 		if (!pci_bar_is_implemented(pci_hdr, i))
337 			continue;
338 
339 		if (pci__bar_is_io(pci_hdr, i) &&
340 		    pci__io_space_enabled(pci_hdr)) {
341 			r = bar_activate_fn(kvm, pci_hdr, i, data);
342 			if (r < 0)
343 				return r;
344 		}
345 
346 		if (pci__bar_is_memory(pci_hdr, i) &&
347 		    pci__memory_space_enabled(pci_hdr)) {
348 			r = bar_activate_fn(kvm, pci_hdr, i, data);
349 			if (r < 0)
350 				return r;
351 		}
352 	}
353 
354 	return 0;
355 }
356 
357 int pci__init(struct kvm *kvm)
358 {
359 	int r;
360 
361 	r = ioport__register(kvm, PCI_CONFIG_DATA + 0, &pci_config_data_ops, 4, NULL);
362 	if (r < 0)
363 		return r;
364 
365 	r = ioport__register(kvm, PCI_CONFIG_ADDRESS + 0, &pci_config_address_ops, 4, NULL);
366 	if (r < 0)
367 		goto err_unregister_data;
368 
369 	r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false,
370 			       pci_config_mmio_access, kvm);
371 	if (r < 0)
372 		goto err_unregister_addr;
373 
374 	return 0;
375 
376 err_unregister_addr:
377 	ioport__unregister(kvm, PCI_CONFIG_ADDRESS);
378 err_unregister_data:
379 	ioport__unregister(kvm, PCI_CONFIG_DATA);
380 	return r;
381 }
382 dev_base_init(pci__init);
383 
384 int pci__exit(struct kvm *kvm)
385 {
386 	ioport__unregister(kvm, PCI_CONFIG_DATA);
387 	ioport__unregister(kvm, PCI_CONFIG_ADDRESS);
388 
389 	return 0;
390 }
391 dev_base_exit(pci__exit);
392