xref: /kvmtool/powerpc/spapr_pci.c (revision c481cfd579e5039b5ed6443c4905a43e53fd91aa)
1 /*
2  * SPAPR PHB emulation, RTAS interface to PCI config space, device tree nodes
3  * for enumerated devices.
4  *
5  * Borrowed heavily from QEMU's spapr_pci.c,
6  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
7  * Copyright (c) 2011 David Gibson, IBM Corporation.
8  *
9  * Modifications copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
10  *
11  * This program is free software; you can redistribute it and/or modify it
12  * under the terms of the GNU General Public License version 2 as published
13  * by the Free Software Foundation.
14  */
15 
16 #include "spapr.h"
17 #include "spapr_pci.h"
18 #include "kvm/util.h"
19 #include "kvm/pci.h"
20 #include "libfdt.h"
21 
22 #include <linux/pci_regs.h>
23 #include <linux/byteorder.h>
24 
25 
26 /* #define DEBUG_PHB yes */
27 #ifdef DEBUG_PHB
28 #define phb_dprintf(fmt, ...)					\
29 	do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
30 #else
31 #define phb_dprintf(fmt, ...)			\
32 	do { } while (0)
33 #endif
34 
35 static const uint32_t bars[] = {
36 	PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
37 	PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
38 	PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
39 	/*, PCI_ROM_ADDRESS*/
40 };
41 
42 #define PCI_NUM_REGIONS		7
43 
44 /* Macros to operate with address in OF binding to PCI */
45 #define b_x(x, p, l)	(((x) & ((1<<(l))-1)) << (p))
46 #define b_n(x)		b_x((x), 31, 1) /* 0 if relocatable */
47 #define b_p(x)		b_x((x), 30, 1) /* 1 if prefetchable */
48 #define b_t(x)		b_x((x), 29, 1) /* 1 if the address is aliased */
49 #define b_ss(x)		b_x((x), 24, 2) /* the space code */
50 #define b_bbbbbbbb(x)	b_x((x), 16, 8) /* bus number */
51 #define b_ddddd(x)	b_x((x), 11, 5) /* device number */
52 #define b_fff(x)	b_x((x), 8, 3)	/* function number */
53 #define b_rrrrrrrr(x)	b_x((x), 0, 8)	/* register number */
54 
55 #define SS_M64		3
56 #define SS_M32		2
57 #define SS_IO		1
58 #define SS_CONFIG	0
59 
60 
61 static struct spapr_phb phb;
62 
63 
64 static void rtas_ibm_read_pci_config(struct kvm_cpu *vcpu,
65 				     uint32_t token, uint32_t nargs,
66 				     target_ulong args,
67 				     uint32_t nret, target_ulong rets)
68 {
69 	uint32_t val = 0;
70 	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
71 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
72 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
73 	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
74 
75 	if (buid != phb.buid || !dev || (size > 4)) {
76 		phb_dprintf("- cfgRd buid 0x%lx cfg addr 0x%x size %d not found\n",
77 			    buid, addr.w, size);
78 
79 		rtas_st(vcpu->kvm, rets, 0, -1);
80 		return;
81 	}
82 	pci__config_rd(vcpu->kvm, addr, &val, size);
83 	/* It appears this wants a byteswapped result... */
84 	switch (size) {
85 	case 4:
86 		val = le32_to_cpu(val);
87 		break;
88 	case 2:
89 		val = le16_to_cpu(val>>16);
90 		break;
91 	case 1:
92 		val = val >> 24;
93 		break;
94 	}
95 	phb_dprintf("- cfgRd buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
96 		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
97 		    addr.register_number, val);
98 
99 	rtas_st(vcpu->kvm, rets, 0, 0);
100 	rtas_st(vcpu->kvm, rets, 1, val);
101 }
102 
103 static void rtas_read_pci_config(struct kvm_cpu *vcpu,
104 				 uint32_t token, uint32_t nargs,
105 				 target_ulong args,
106 				 uint32_t nret, target_ulong rets)
107 {
108 	uint32_t val;
109 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
110 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
111 	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
112 
113 	if (!dev || (size > 4)) {
114 		rtas_st(vcpu->kvm, rets, 0, -1);
115 		return;
116 	}
117 	pci__config_rd(vcpu->kvm, addr, &val, size);
118 	switch (size) {
119 	case 4:
120 		val = le32_to_cpu(val);
121 		break;
122 	case 2:
123 		val = le16_to_cpu(val>>16); /* We're yuck-endian. */
124 		break;
125 	case 1:
126 		val = val >> 24;
127 		break;
128 	}
129 	phb_dprintf("- cfgRd addr 0x%x size %d, val 0x%x\n", addr.w, size, val);
130 	rtas_st(vcpu->kvm, rets, 0, 0);
131 	rtas_st(vcpu->kvm, rets, 1, val);
132 }
133 
134 static void rtas_ibm_write_pci_config(struct kvm_cpu *vcpu,
135 				      uint32_t token, uint32_t nargs,
136 				      target_ulong args,
137 				      uint32_t nret, target_ulong rets)
138 {
139 	uint64_t buid = ((uint64_t)rtas_ld(vcpu->kvm, args, 1) << 32) | rtas_ld(vcpu->kvm, args, 2);
140 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
141 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
142 	uint32_t size = rtas_ld(vcpu->kvm, args, 3);
143 	uint32_t val = rtas_ld(vcpu->kvm, args, 4);
144 
145 	if (buid != phb.buid || !dev || (size > 4)) {
146 		phb_dprintf("- cfgWr buid 0x%lx cfg addr 0x%x/%d error (val 0x%x)\n",
147 			    buid, addr.w, size, val);
148 
149 		rtas_st(vcpu->kvm, rets, 0, -1);
150 		return;
151 	}
152 	phb_dprintf("- cfgWr buid 0x%lx addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
153 		    buid, addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
154 		    addr.register_number, val);
155 	switch (size) {
156 	case 4:
157 		val = le32_to_cpu(val);
158 		break;
159 	case 2:
160 		val = le16_to_cpu(val) << 16;
161 		break;
162 	case 1:
163 		val = val >> 24;
164 		break;
165 	}
166 	pci__config_wr(vcpu->kvm, addr, &val, size);
167 	rtas_st(vcpu->kvm, rets, 0, 0);
168 }
169 
170 static void rtas_write_pci_config(struct kvm_cpu *vcpu,
171 				  uint32_t token, uint32_t nargs,
172 				  target_ulong args,
173 				  uint32_t nret, target_ulong rets)
174 {
175 	union pci_config_address addr = { .w = rtas_ld(vcpu->kvm, args, 0) };
176 	struct pci_device_header *dev = pci__find_dev(addr.device_number);
177 	uint32_t size = rtas_ld(vcpu->kvm, args, 1);
178 	uint32_t val = rtas_ld(vcpu->kvm, args, 2);
179 
180 	if (!dev || (size > 4)) {
181 		rtas_st(vcpu->kvm, rets, 0, -1);
182 		return;
183 	}
184 
185 	phb_dprintf("- cfgWr addr 0x%x (/%d): b%d,d%d,f%d,r0x%x, val 0x%x\n",
186 		    addr.w, size, addr.bus_number, addr.device_number, addr.function_number,
187 		    addr.register_number, val);
188 	switch (size) {
189 	case 4:
190 		val = le32_to_cpu(val);
191 		break;
192 	case 2:
193 		val = le16_to_cpu(val) << 16;
194 		break;
195 	case 1:
196 		val = val >> 24;
197 		break;
198 	}
199 	pci__config_wr(vcpu->kvm, addr, &val, size);
200 	rtas_st(vcpu->kvm, rets, 0, 0);
201 }
202 
203 void spapr_create_phb(struct kvm *kvm,
204 		      const char *busname, uint64_t buid,
205 		      uint64_t mem_win_addr, uint64_t mem_win_size,
206 		      uint64_t io_win_addr, uint64_t io_win_size)
207 {
208 	/*
209 	 * Since kvmtool doesn't really have any concept of buses etc.,
210 	 * there's nothing to register here.  Just register RTAS.
211 	 */
212 	spapr_rtas_register("read-pci-config", rtas_read_pci_config);
213 	spapr_rtas_register("write-pci-config", rtas_write_pci_config);
214 	spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
215 	spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
216 
217 	phb.buid = buid;
218 	phb.mem_addr = mem_win_addr;
219 	phb.mem_size = mem_win_size;
220 	phb.io_addr  = io_win_addr;
221 	phb.io_size  = io_win_size;
222 
223 	kvm->phb = &phb;
224 }
225 
226 static uint32_t bar_to_ss(unsigned long bar)
227 {
228 	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
229 	    PCI_BASE_ADDRESS_SPACE_IO)
230 		return SS_IO;
231 	else if (bar & PCI_BASE_ADDRESS_MEM_TYPE_64)
232 		return SS_M64;
233 	else
234 		return SS_M32;
235 }
236 
237 static unsigned long bar_to_addr(unsigned long bar)
238 {
239 	if ((bar & PCI_BASE_ADDRESS_SPACE) ==
240 	    PCI_BASE_ADDRESS_SPACE_IO)
241 		return bar & PCI_BASE_ADDRESS_IO_MASK;
242 	else
243 		return bar & PCI_BASE_ADDRESS_MEM_MASK;
244 }
245 
246 int spapr_populate_pci_devices(struct kvm *kvm,
247 			       uint32_t xics_phandle,
248 			       void *fdt)
249 {
250 	int bus_off, node_off = 0, devid, fn, i, n, devices;
251 	char nodename[256];
252 	struct {
253 		uint32_t hi;
254 		uint64_t addr;
255 		uint64_t size;
256 	} __attribute__((packed)) reg[PCI_NUM_REGIONS + 1],
257 		  assigned_addresses[PCI_NUM_REGIONS];
258 	uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
259 	struct {
260 		uint32_t hi;
261 		uint64_t child;
262 		uint64_t parent;
263 		uint64_t size;
264 	} __attribute__((packed)) ranges[] = {
265 		{
266 			cpu_to_be32(b_ss(1)), cpu_to_be64(0),
267 			cpu_to_be64(phb.io_addr),
268 			cpu_to_be64(phb.io_size),
269 		},
270 		{
271 			cpu_to_be32(b_ss(2)), cpu_to_be64(0),
272 			cpu_to_be64(phb.mem_addr),
273 			cpu_to_be64(phb.mem_size),
274 		},
275 	};
276 	uint64_t bus_reg[] = { cpu_to_be64(phb.buid), 0 };
277 	uint32_t interrupt_map_mask[] = {
278 		cpu_to_be32(b_ddddd(-1)|b_fff(-1)), 0x0, 0x0, 0x0};
279 	uint32_t interrupt_map[SPAPR_PCI_NUM_LSI][7];
280 
281 	/* Start populating the FDT */
282 	sprintf(nodename, "pci@%" PRIx64, phb.buid);
283 	bus_off = fdt_add_subnode(fdt, 0, nodename);
284 	if (bus_off < 0) {
285 		die("error making bus subnode, %s\n", fdt_strerror(bus_off));
286 		return bus_off;
287 	}
288 
289 	/* Write PHB properties */
290 	_FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
291 	_FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
292 	_FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
293 	_FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
294 	_FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
295 	_FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
296 	_FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
297 	_FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
298 	_FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
299 	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
300 			 &interrupt_map_mask, sizeof(interrupt_map_mask)));
301 
302 	/* Populate PCI devices and allocate IRQs */
303 	devices = 0;
304 
305 	for (devid = 0; devid < PCI_MAX_DEVICES; devid++) {
306 		uint32_t *irqmap = interrupt_map[devices];
307 		struct pci_device_header *hdr = pci__find_dev(devid);
308 
309 		if (!hdr)
310 			continue;
311 
312 		fn = 0; /* kvmtool doesn't yet do multifunction devices */
313 
314 		sprintf(nodename, "pci@%u,%u", devid, fn);
315 
316 		/* Allocate interrupt from the map */
317 		if (devid > SPAPR_PCI_NUM_LSI)	{
318 			die("Unexpected behaviour in spapr_populate_pci_devices,"
319 			    "wrong devid %u\n", devid);
320 		}
321 		irqmap[0] = cpu_to_be32(b_ddddd(devid)|b_fff(fn));
322 		irqmap[1] = 0;
323 		irqmap[2] = 0;
324 		irqmap[3] = 0;
325 		irqmap[4] = cpu_to_be32(xics_phandle);
326 		/*
327 		 * This is nasty; the PCI devs are set up such that their own
328 		 * header's irq_line indicates the direct XICS IRQ number to
329 		 * use.  There REALLY needs to be a hierarchical system in place
330 		 * to 'raise' an IRQ on the bridge which indexes/looks up which
331 		 * XICS IRQ to fire.
332 		 */
333 		irqmap[5] = cpu_to_be32(hdr->irq_line);
334 		irqmap[6] = cpu_to_be32(0x8);
335 
336 		/* Add node to FDT */
337 		node_off = fdt_add_subnode(fdt, bus_off, nodename);
338 		if (node_off < 0) {
339 			die("error making node subnode, %s\n", fdt_strerror(bus_off));
340 			return node_off;
341 		}
342 
343 		_FDT(fdt_setprop_cell(fdt, node_off, "vendor-id",
344 				      le16_to_cpu(hdr->vendor_id)));
345 		_FDT(fdt_setprop_cell(fdt, node_off, "device-id",
346 				      le16_to_cpu(hdr->device_id)));
347 		_FDT(fdt_setprop_cell(fdt, node_off, "revision-id",
348 				      hdr->revision_id));
349 		_FDT(fdt_setprop_cell(fdt, node_off, "class-code",
350 				      hdr->class[0] | (hdr->class[1] << 8) | (hdr->class[2] << 16)));
351 		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-id",
352 				      le16_to_cpu(hdr->subsys_id)));
353 		_FDT(fdt_setprop_cell(fdt, node_off, "subsystem-vendor-id",
354 				      le16_to_cpu(hdr->subsys_vendor_id)));
355 
356 		/* Config space region comes first */
357 		reg[0].hi = cpu_to_be32(
358 			b_n(0) |
359 			b_p(0) |
360 			b_t(0) |
361 			b_ss(SS_CONFIG) |
362 			b_bbbbbbbb(0) |
363 			b_ddddd(devid) |
364 			b_fff(fn));
365 		reg[0].addr = 0;
366 		reg[0].size = 0;
367 
368 		n = 0;
369 		/* Six BARs, no ROM supported, addresses are 32bit */
370 		for (i = 0; i < 6; ++i) {
371 			if (0 == hdr->bar[i]) {
372 				continue;
373 			}
374 
375 			reg[n+1].hi = cpu_to_be32(
376 				b_n(0) |
377 				b_p(0) |
378 				b_t(0) |
379 				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
380 				b_bbbbbbbb(0) |
381 				b_ddddd(devid) |
382 				b_fff(fn) |
383 				b_rrrrrrrr(bars[i]));
384 			reg[n+1].addr = 0;
385 			reg[n+1].size = cpu_to_be64(hdr->bar_size[i]);
386 
387 			assigned_addresses[n].hi = cpu_to_be32(
388 				b_n(1) |
389 				b_p(0) |
390 				b_t(0) |
391 				b_ss(bar_to_ss(le32_to_cpu(hdr->bar[i]))) |
392 				b_bbbbbbbb(0) |
393 				b_ddddd(devid) |
394 				b_fff(fn) |
395 				b_rrrrrrrr(bars[i]));
396 
397 			/*
398 			 * Writing zeroes to assigned_addresses causes the guest kernel to
399 			 * reassign BARs
400 			 */
401 			assigned_addresses[n].addr = cpu_to_be64(bar_to_addr(le32_to_cpu(hdr->bar[i])));
402 			assigned_addresses[n].size = reg[n+1].size;
403 
404 			++n;
405 		}
406 		_FDT(fdt_setprop(fdt, node_off, "reg", reg, sizeof(reg[0])*(n+1)));
407 		_FDT(fdt_setprop(fdt, node_off, "assigned-addresses",
408 				 assigned_addresses,
409 				 sizeof(assigned_addresses[0])*(n)));
410 		_FDT(fdt_setprop_cell(fdt, node_off, "interrupts",
411 				      hdr->irq_pin));
412 
413 		/* We don't set ibm,dma-window property as we don't have an IOMMU. */
414 
415 		++devices;
416 	}
417 
418 	/* Write interrupt map */
419 	_FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
420 			 devices * sizeof(interrupt_map[0])));
421 
422 	return 0;
423 }
424