xref: /kvmtool/powerpc/spapr_pci.c (revision a1166a18a97f27dbab035e9bf0f6c8c856fe2de1)
1 /*
2  * SPAPR PHB emulation, RTAS interface to PCI config space, device tree nodes
3  * for enumerated devices.
4  *
5  * Borrowed heavily from QEMU's spapr_pci.c,
6  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
7  * Copyright (c) 2011 David Gibson, IBM Corporation.
8  *
9  * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
10  *
11  * This program is free software; you can redistribute it and/or modify it
12  * under the terms of the GNU General Public License version 2 as published
13  * by the Free Software Foundation.
14  */
15 
16 #include "spapr.h"
17 #include "spapr_pci.h"
18 #include "kvm/devices.h"
19 #include "kvm/fdt.h"
20 #include "kvm/util.h"
21 #include "kvm/pci.h"
22 
23 #include <linux/pci_regs.h>
24 #include <linux/byteorder.h>
25 
26 
27 /* #define DEBUG_PHB yes */
28 #ifdef DEBUG_PHB
29 #define phb_dprintf(fmt, ...)					\
30 	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
31 #else
32 #define phb_dprintf(fmt, ...)			\
33 	do { } while (0)
34 #endif
35 
36 static const uint32_t bars[] = {
37 	PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
38 	PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
39 	PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
40 	/*, PCI_ROM_ADDRESS*/
41 };
42 
43 #define PCI_NUM_REGIONS		7
44 
45 /* Macros to operate with address in OF binding to PCI */
46 #define b_x(x, p, l)	(((x) & ((1<<(l))-1)) << (p))
47 #define b_n(x)		b_x((x), 31, 1) /* 0 if relocatable */
48 #define b_p(x)		b_x((x), 30, 1) /* 1 if prefetchable */
49 #define b_t(x)		b_x((x), 29, 1) /* 1 if the address is aliased */
50 #define b_ss(x)		b_x((x), 24, 2) /* the space code */
51 #define b_bbbbbbbb(x)	b_x((x), 16, 8) /* bus number */
52 #define b_ddddd(x)	b_x((x), 11, 5) /* device number */
53 #define b_fff(x)	b_x((x), 8, 3)	/* function number */
54 #define b_rrrrrrrr(x)	b_x((x), 0, 8)	/* register number */
55 
56 #define SS_M64		3
57 #define SS_M32		2
58 #define SS_IO		1
59 #define SS_CONFIG	0
60 
61 
62 static struct spapr_phb phb;
63 
64 
65 static void rtas_ibm_read_pci_config(struct kvm_cpu *vcpu,
66 				     uint32_t token, uint32_t nargs,
67 				     target_ulong args,
68 				     uint32_t nret, target_ulong rets)
69 {
70 	uint32_t val = 0;
71 	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
72 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
73 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
74 	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
75 
76 	if (buid != phb.buid || !dev || (size > 4)) {
77 		phb_dprintf("- cfgRd buid 0x%lx cfg addr 0x%x size %d not found\n",
78 			    buid, addr.w, size);
79 
80 		rtas_st(vcpu->kvm, rets, 0, -1);
81 		return;
82 	}
83 	pci__config_rd(vcpu->kvm, addr, &val, size);
84 	/* It appears this wants a byteswapped result... */
85 	switch (size) {
86 	case 4:
87 		val = le32_to_cpu(val);
88 		break;
89 	case 2:
90 		val = le16_to_cpu(val>>16);
91 		break;
92 	case 1:
93 		val = val >> 24;
94 		break;
95 	}
96 	phb_dprintf("- cfgRd buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
97 		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
98 		    addr.register_number, val);
99 
100 	rtas_st(vcpu->kvm, rets, 0, 0);
101 	rtas_st(vcpu->kvm, rets, 1, val);
102 }
103 
104 static void rtas_read_pci_config(struct kvm_cpu *vcpu,
105 				 uint32_t token, uint32_t nargs,
106 				 target_ulong args,
107 				 uint32_t nret, target_ulong rets)
108 {
109 	uint32_t val;
110 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
111 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
112 	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
113 
114 	if (!dev || (size > 4)) {
115 		rtas_st(vcpu->kvm, rets, 0, -1);
116 		return;
117 	}
118 	pci__config_rd(vcpu->kvm, addr, &val, size);
119 	switch (size) {
120 	case 4:
121 		val = le32_to_cpu(val);
122 		break;
123 	case 2:
124 		val = le16_to_cpu(val>>16); /* We're yuck-endian. */
125 		break;
126 	case 1:
127 		val = val >> 24;
128 		break;
129 	}
130 	phb_dprintf("- cfgRd addr 0x%x size %d, val 0x%x\n", addr.w, size, val);
131 	rtas_st(vcpu->kvm, rets, 0, 0);
132 	rtas_st(vcpu->kvm, rets, 1, val);
133 }
134 
135 static void rtas_ibm_write_pci_config(struct kvm_cpu *vcpu,
136 				      uint32_t token, uint32_t nargs,
137 				      target_ulong args,
138 				      uint32_t nret, target_ulong rets)
139 {
140 	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
141 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
142 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
143 	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
144 	uint32_t val = rtas_ld(vcpu->kvm, args, 4);
145 
146 	if (buid != phb.buid || !dev || (size > 4)) {
147 		phb_dprintf("- cfgWr buid 0x%lx cfg addr 0x%x/%d error (val 0x%x)\n",
148 			    buid, addr.w, size, val);
149 
150 		rtas_st(vcpu->kvm, rets, 0, -1);
151 		return;
152 	}
153 	phb_dprintf("- cfgWr buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
154 		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
155 		    addr.register_number, val);
156 	switch (size) {
157 	case 4:
158 		val = le32_to_cpu(val);
159 		break;
160 	case 2:
161 		val = le16_to_cpu(val) << 16;
162 		break;
163 	case 1:
164 		val = val >> 24;
165 		break;
166 	}
167 	pci__config_wr(vcpu->kvm, addr, &val, size);
168 	rtas_st(vcpu->kvm, rets, 0, 0);
169 }
170 
171 static void rtas_write_pci_config(struct kvm_cpu *vcpu,
172 				  uint32_t token, uint32_t nargs,
173 				  target_ulong args,
174 				  uint32_t nret, target_ulong rets)
175 {
176 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
177 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
178 	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
179 	uint32_t val = rtas_ld(vcpu->kvm, args, 2);
180 
181 	if (!dev || (size > 4)) {
182 		rtas_st(vcpu->kvm, rets, 0, -1);
183 		return;
184 	}
185 
186 	phb_dprintf("- cfgWr addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
187 		    addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
188 		    addr.register_number, val);
189 	switch (size) {
190 	case 4:
191 		val = le32_to_cpu(val);
192 		break;
193 	case 2:
194 		val = le16_to_cpu(val) << 16;
195 		break;
196 	case 1:
197 		val = val >> 24;
198 		break;
199 	}
200 	pci__config_wr(vcpu->kvm, addr, &val, size);
201 	rtas_st(vcpu->kvm, rets, 0, 0);
202 }
203 
204 void spapr_create_phb(struct kvm *kvm,
205 		      const char *busname, uint64_t buid,
206 		      uint64_t mem_win_addr, uint64_t mem_win_size,
207 		      uint64_t io_win_addr, uint64_t io_win_size)
208 {
209 	/*
210 	 * Since kvmtool doesn't really have any concept of buses etc.,
211 	 * there's nothing to register here.  Just register RTAS.
212 	 */
213 	spapr_rtas_register("read-pci-config", rtas_read_pci_config);
214 	spapr_rtas_register("write-pci-config", rtas_write_pci_config);
215 	spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
216 	spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
217 
218 	phb.buid = buid;
219 	phb.mem_addr = mem_win_addr;
220 	phb.mem_size = mem_win_size;
221 	phb.io_addr  = io_win_addr;
222 	phb.io_size  = io_win_size;
223 
224 	kvm->arch.phb = &phb;
225 }
226 
227 static uint32_t bar_to_ss(unsigned long bar)
228 {
229 	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
230 	    PCI_BASE_ADDRESS_SPACE_IO)
231 		return SS_IO;
232 	else if (bar & PCI_BASE_ADDRESS_MEM_TYPE_64)
233 		return SS_M64;
234 	else
235 		return SS_M32;
236 }
237 
238 static unsigned long bar_to_addr(unsigned long bar)
239 {
240 	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
241 	    PCI_BASE_ADDRESS_SPACE_IO)
242 		return bar & PCI_BASE_ADDRESS_IO_MASK;
243 	else
244 		return bar & PCI_BASE_ADDRESS_MEM_MASK;
245 }
246 
247 int spapr_populate_pci_devices(struct kvm *kvm,
248 			       uint32_t xics_phandle,
249 			       void *fdt)
250 {
251 	int bus_off, node_off = 0, devid, fn, i, n, devices;
252 	struct device_header *dev_hdr;
253 	char nodename[256];
254 	struct {
255 		uint32_t hi;
256 		uint64_t addr;
257 		uint64_t size;
258 	} __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
259 		  assigned_addresses[PCI_NUM_REGIONS];
260 	uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
261 	struct {
262 		uint32_t hi;
263 		uint64_t child;
264 		uint64_t parent;
265 		uint64_t size;
266 	} __attribute__((packed)) ranges[] = {
267 		{
268 			cpu_to_be32(b_ss(1)), cpu_to_be64(0),
269 			cpu_to_be64(phb.io_addr),
270 			cpu_to_be64(phb.io_size),
271 		},
272 		{
273 			cpu_to_be32(b_ss(2)), cpu_to_be64(0),
274 			cpu_to_be64(phb.mem_addr),
275 			cpu_to_be64(phb.mem_size),
276 		},
277 	};
278 	uint64_t bus_reg[] = { cpu_to_be64(phb.buid), 0 };
279 	uint32_t interrupt_map_mask[] = {
280 		cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
281 	uint32_t interrupt_map[SPAPR_PCI_NUM_LSI][7];
282 
283 	/* Start populating the FDT */
284 	sprintf(nodename, "pci@%" PRIx64, phb.buid);
285 	bus_off = fdt_add_subnode(fdt, 0, nodename);
286 	if (bus_off < 0) {
287 		die("error making bus subnode, %s\n", fdt_strerror(bus_off));
288 		return bus_off;
289 	}
290 
291 	/* Write PHB properties */
292 	_FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
293 	_FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
294 	_FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
295 	_FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
296 	_FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
297 	_FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
298 	_FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
299 	_FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
300 	_FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
301 	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
302 			 &interrupt_map_mask, sizeof(interrupt_map_mask)));
303 
304 	/* Populate PCI devices and allocate IRQs */
305 	devices = 0;
306 	dev_hdr = device__first_dev(DEVICE_BUS_PCI);
307 	while (dev_hdr) {
308 		uint32_t *irqmap = interrupt_map[devices];
309 		struct pci_device_header *hdr = dev_hdr->data;
310 
311 		if (!hdr)
312 			continue;
313 
314 		devid = dev_hdr->dev_num;
315 		fn = 0; /* kvmtool doesn't yet do multifunction devices */
316 
317 		sprintf(nodename, "pci@%u,%u", devid, fn);
318 
319 		/* Allocate interrupt from the map */
320 		if (devid > SPAPR_PCI_NUM_LSI)	{
321 			die("Unexpected behaviour in spapr_populate_pci_devices,"
322 			    "wrong devid %u\n", devid);
323 		}
324 		irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
325 		irqmap[1] = 0;
326 		irqmap[2] = 0;
327 		irqmap[3] = 0;
328 		irqmap[4] = cpu_to_be32(xics_phandle);
329 		/*
330 		 * This is nasty; the PCI devs are set up such that their own
331 		 * header's irq_line indicates the direct XICS IRQ number to
332 		 * use.  There REALLY needs to be a hierarchical system in place
333 		 * to 'raise' an IRQ on the bridge which indexes/looks up which
334 		 * XICS IRQ to fire.
335 		 */
336 		irqmap[5] = cpu_to_be32(hdr->irq_line);
337 		irqmap[6] = cpu_to_be32(0x8);
338 
339 		/* Add node to FDT */
340 		node_off = fdt_add_subnode(fdt, bus_off, nodename);
341 		if (node_off < 0) {
342 			die("error making node subnode, %s\n", fdt_strerror(bus_off));
343 			return node_off;
344 		}
345 
346 		_FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
347 				      le16_to_cpu(hdr->vendor_id)));
348 		_FDT(fdt_setprop_cell(fdt, node_off, "device-id",
349 				      le16_to_cpu(hdr->device_id)));
350 		_FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
351 				      hdr->revision_id));
352 		_FDT(fdt_setprop_cell(fdt, node_off, "class-code",
353 				      hdr->class[0] | (hdr->class[1] << 8) | (hdr->class[2] << 16)));
354 		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
355 				      le16_to_cpu(hdr->subsys_id)));
356 		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
357 				      le16_to_cpu(hdr->subsys_vendor_id)));
358 
359 		/* Config space region comes first */
360 		reg[0].hi = cpu_to_be32(
361 			b_n(0) |
362 			b_p(0) |
363 			b_t(0) |
364 			b_ss(SS_CONFIG) |
365 			b_bbbbbbbb(0) |
366 			b_ddddd(devid) |
367 			b_fff(fn));
368 		reg[0].addr = 0;
369 		reg[0].size = 0;
370 
371 		n = 0;
372 		/* Six BARs, no ROM supported, addresses are 32bit */
373 		for (i = 0; i < 6; ++i) {
374 			if (0 == hdr->bar[i]) {
375 				continue;
376 			}
377 
378 			reg[n+1].hi = cpu_to_be32(
379 				b_n(0) |
380 				b_p(0) |
381 				b_t(0) |
382 				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
383 				b_bbbbbbbb(0) |
384 				b_ddddd(devid) |
385 				b_fff(fn) |
386 				b_rrrrrrrr(bars[i]));
387 			reg[n+1].addr = 0;
388 			reg[n+1].size = cpu_to_be64(hdr->bar_size[i]);
389 
390 			assigned_addresses[n].hi = cpu_to_be32(
391 				b_n(1) |
392 				b_p(0) |
393 				b_t(0) |
394 				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
395 				b_bbbbbbbb(0) |
396 				b_ddddd(devid) |
397 				b_fff(fn) |
398 				b_rrrrrrrr(bars[i]));
399 
400 			/*
401 			 * Writing zeroes to assigned_addresses causes the guest kernel to
402 			 * reassign BARs
403 			 */
404 			assigned_addresses[n].addr = cpu_to_be64(bar_to_addr(le32_to_cpu(hdr->bar[i])));
405 			assigned_addresses[n].size = reg[n+1].size;
406 
407 			++n;
408 		}
409 		_FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
410 		_FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
411 				 assigned_addresses,
412 				 sizeof(assigned_addresses[0])*(n)));
413 		_FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
414 				      hdr->irq_pin));
415 
416 		/* We don't set ibm,dma-window property as we don't have an IOMMU. */
417 
418 		++devices;
419 		dev_hdr = device__next_dev(dev_hdr);
420 	}
421 
422 	/* Write interrupt map */
423 	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
424 			 devices * sizeof(interrupt_map[0])));
425 
426 	return 0;
427 }
428