xref: /qemu/hw/ppc/spapr_pci.c (revision cca7fad5765251fece44cd230156a101867522dd)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 #include "qemu/error-report.h"
36 
37 #include "hw/pci/pci_bus.h"
38 
39 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
40 #define RTAS_QUERY_FN           0
41 #define RTAS_CHANGE_FN          1
42 #define RTAS_RESET_FN           2
43 #define RTAS_CHANGE_MSI_FN      3
44 #define RTAS_CHANGE_MSIX_FN     4
45 
46 /* Interrupt types to return on RTAS_CHANGE_* */
47 #define RTAS_TYPE_MSI           1
48 #define RTAS_TYPE_MSIX          2
49 
50 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
51 {
52     sPAPRPHBState *sphb;
53 
54     QLIST_FOREACH(sphb, &spapr->phbs, list) {
55         if (sphb->buid != buid) {
56             continue;
57         }
58         return sphb;
59     }
60 
61     return NULL;
62 }
63 
64 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
65                            uint32_t config_addr)
66 {
67     sPAPRPHBState *sphb = find_phb(spapr, buid);
68     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
69     int bus_num = (config_addr >> 16) & 0xFF;
70     int devfn = (config_addr >> 8) & 0xFF;
71 
72     if (!phb) {
73         return NULL;
74     }
75 
76     return pci_find_device(phb->bus, bus_num, devfn);
77 }
78 
79 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
80 {
81     /* This handles the encoding of extended config space addresses */
82     return ((arg >> 20) & 0xf00) | (arg & 0xff);
83 }
84 
85 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
86                                    uint32_t addr, uint32_t size,
87                                    target_ulong rets)
88 {
89     PCIDevice *pci_dev;
90     uint32_t val;
91 
92     if ((size != 1) && (size != 2) && (size != 4)) {
93         /* access must be 1, 2 or 4 bytes */
94         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
95         return;
96     }
97 
98     pci_dev = find_dev(spapr, buid, addr);
99     addr = rtas_pci_cfgaddr(addr);
100 
101     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
102         /* Access must be to a valid device, within bounds and
103          * naturally aligned */
104         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
105         return;
106     }
107 
108     val = pci_host_config_read_common(pci_dev, addr,
109                                       pci_config_size(pci_dev), size);
110 
111     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
112     rtas_st(rets, 1, val);
113 }
114 
115 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
116                                      uint32_t token, uint32_t nargs,
117                                      target_ulong args,
118                                      uint32_t nret, target_ulong rets)
119 {
120     uint64_t buid;
121     uint32_t size, addr;
122 
123     if ((nargs != 4) || (nret != 2)) {
124         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
125         return;
126     }
127 
128     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
129     size = rtas_ld(args, 3);
130     addr = rtas_ld(args, 0);
131 
132     finish_read_pci_config(spapr, buid, addr, size, rets);
133 }
134 
135 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
136                                  uint32_t token, uint32_t nargs,
137                                  target_ulong args,
138                                  uint32_t nret, target_ulong rets)
139 {
140     uint32_t size, addr;
141 
142     if ((nargs != 2) || (nret != 2)) {
143         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
144         return;
145     }
146 
147     size = rtas_ld(args, 1);
148     addr = rtas_ld(args, 0);
149 
150     finish_read_pci_config(spapr, 0, addr, size, rets);
151 }
152 
153 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
154                                     uint32_t addr, uint32_t size,
155                                     uint32_t val, target_ulong rets)
156 {
157     PCIDevice *pci_dev;
158 
159     if ((size != 1) && (size != 2) && (size != 4)) {
160         /* access must be 1, 2 or 4 bytes */
161         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
162         return;
163     }
164 
165     pci_dev = find_dev(spapr, buid, addr);
166     addr = rtas_pci_cfgaddr(addr);
167 
168     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
169         /* Access must be to a valid device, within bounds and
170          * naturally aligned */
171         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
172         return;
173     }
174 
175     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
176                                  val, size);
177 
178     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
179 }
180 
181 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
182                                       uint32_t token, uint32_t nargs,
183                                       target_ulong args,
184                                       uint32_t nret, target_ulong rets)
185 {
186     uint64_t buid;
187     uint32_t val, size, addr;
188 
189     if ((nargs != 5) || (nret != 1)) {
190         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
191         return;
192     }
193 
194     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
195     val = rtas_ld(args, 4);
196     size = rtas_ld(args, 3);
197     addr = rtas_ld(args, 0);
198 
199     finish_write_pci_config(spapr, buid, addr, size, val, rets);
200 }
201 
202 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
203                                   uint32_t token, uint32_t nargs,
204                                   target_ulong args,
205                                   uint32_t nret, target_ulong rets)
206 {
207     uint32_t val, size, addr;
208 
209     if ((nargs != 3) || (nret != 1)) {
210         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
211         return;
212     }
213 
214 
215     val = rtas_ld(args, 2);
216     size = rtas_ld(args, 1);
217     addr = rtas_ld(args, 0);
218 
219     finish_write_pci_config(spapr, 0, addr, size, val, rets);
220 }
221 
222 /*
223  * Find an entry with config_addr or returns the empty one if not found AND
224  * alloc_new is set.
225  * At the moment the msi_table entries are never released so there is
226  * no point to look till the end of the list if we need to find the free entry.
227  */
228 static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
229                              bool alloc_new)
230 {
231     int i;
232 
233     for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
234         if (!phb->msi_table[i].nvec) {
235             break;
236         }
237         if (phb->msi_table[i].config_addr == config_addr) {
238             return i;
239         }
240     }
241     if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
242         trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
243         return i;
244     }
245 
246     return -1;
247 }
248 
249 /*
250  * Set MSI/MSIX message data.
251  * This is required for msi_notify()/msix_notify() which
252  * will write at the addresses via spapr_msi_write().
253  */
254 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
255                              unsigned first_irq, unsigned req_num)
256 {
257     unsigned i;
258     MSIMessage msg = { .address = addr, .data = first_irq };
259 
260     if (!msix) {
261         msi_set_message(pdev, msg);
262         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
263         return;
264     }
265 
266     for (i = 0; i < req_num; ++i, ++msg.data) {
267         msix_set_message(pdev, i, msg);
268         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
269     }
270 }
271 
272 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
273                                 uint32_t token, uint32_t nargs,
274                                 target_ulong args, uint32_t nret,
275                                 target_ulong rets)
276 {
277     uint32_t config_addr = rtas_ld(args, 0);
278     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
279     unsigned int func = rtas_ld(args, 3);
280     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
281     unsigned int seq_num = rtas_ld(args, 5);
282     unsigned int ret_intr_type;
283     int ndev, irq, max_irqs = 0;
284     sPAPRPHBState *phb = NULL;
285     PCIDevice *pdev = NULL;
286 
287     switch (func) {
288     case RTAS_CHANGE_MSI_FN:
289     case RTAS_CHANGE_FN:
290         ret_intr_type = RTAS_TYPE_MSI;
291         break;
292     case RTAS_CHANGE_MSIX_FN:
293         ret_intr_type = RTAS_TYPE_MSIX;
294         break;
295     default:
296         error_report("rtas_ibm_change_msi(%u) is not implemented", func);
297         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
298         return;
299     }
300 
301     /* Fins sPAPRPHBState */
302     phb = find_phb(spapr, buid);
303     if (phb) {
304         pdev = find_dev(spapr, buid, config_addr);
305     }
306     if (!phb || !pdev) {
307         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
308         return;
309     }
310 
311     /* Releasing MSIs */
312     if (!req_num) {
313         ndev = spapr_msicfg_find(phb, config_addr, false);
314         if (ndev < 0) {
315             trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
316             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
317             return;
318         }
319         trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
320         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
321         rtas_st(rets, 1, 0);
322         return;
323     }
324 
325     /* Enabling MSI */
326 
327     /* Find a device number in the map to add or reuse the existing one */
328     ndev = spapr_msicfg_find(phb, config_addr, true);
329     if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
330         error_report("No free entry for a new MSI device");
331         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
332         return;
333     }
334     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
335 
336     /* Check if the device supports as many IRQs as requested */
337     if (ret_intr_type == RTAS_TYPE_MSI) {
338         max_irqs = msi_nr_vectors_allocated(pdev);
339     } else if (ret_intr_type == RTAS_TYPE_MSIX) {
340         max_irqs = pdev->msix_entries_nr;
341     }
342     if (!max_irqs) {
343         error_report("Requested interrupt type %d is not enabled for device#%d",
344                      ret_intr_type, ndev);
345         rtas_st(rets, 0, -1); /* Hardware error */
346         return;
347     }
348     /* Correct the number if the guest asked for too many */
349     if (req_num > max_irqs) {
350         req_num = max_irqs;
351     }
352 
353     /* Check if there is an old config and MSI number has not changed */
354     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
355         /* Unexpected behaviour */
356         error_report("Cannot reuse MSI config for device#%d", ndev);
357         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
358         return;
359     }
360 
361     /* There is no cached config, allocate MSIs */
362     if (!phb->msi_table[ndev].nvec) {
363         irq = spapr_allocate_irq_block(req_num, false,
364                                        ret_intr_type == RTAS_TYPE_MSI);
365         if (irq < 0) {
366             error_report("Cannot allocate MSIs for device#%d", ndev);
367             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
368             return;
369         }
370         phb->msi_table[ndev].irq = irq;
371         phb->msi_table[ndev].nvec = req_num;
372         phb->msi_table[ndev].config_addr = config_addr;
373     }
374 
375     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
376     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
377                      phb->msi_table[ndev].irq, req_num);
378 
379     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
380     rtas_st(rets, 1, req_num);
381     rtas_st(rets, 2, ++seq_num);
382     rtas_st(rets, 3, ret_intr_type);
383 
384     trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
385 }
386 
387 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
388                                                    sPAPREnvironment *spapr,
389                                                    uint32_t token,
390                                                    uint32_t nargs,
391                                                    target_ulong args,
392                                                    uint32_t nret,
393                                                    target_ulong rets)
394 {
395     uint32_t config_addr = rtas_ld(args, 0);
396     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
397     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
398     int ndev;
399     sPAPRPHBState *phb = NULL;
400 
401     /* Fins sPAPRPHBState */
402     phb = find_phb(spapr, buid);
403     if (!phb) {
404         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
405         return;
406     }
407 
408     /* Find device descriptor and start IRQ */
409     ndev = spapr_msicfg_find(phb, config_addr, false);
410     if (ndev < 0) {
411         trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
412         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
413         return;
414     }
415 
416     intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
417     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
418                                                            intr_src_num);
419 
420     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
421     rtas_st(rets, 1, intr_src_num);
422     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
423 }
424 
425 static int pci_spapr_swizzle(int slot, int pin)
426 {
427     return (slot + pin) % PCI_NUM_PINS;
428 }
429 
430 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
431 {
432     /*
433      * Here we need to convert pci_dev + irq_num to some unique value
434      * which is less than number of IRQs on the specific bus (4).  We
435      * use standard PCI swizzling, that is (slot number + pin number)
436      * % 4.
437      */
438     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
439 }
440 
441 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
442 {
443     /*
444      * Here we use the number returned by pci_spapr_map_irq to find a
445      * corresponding qemu_irq.
446      */
447     sPAPRPHBState *phb = opaque;
448 
449     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
450     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
451 }
452 
453 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
454 {
455     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
456     PCIINTxRoute route;
457 
458     route.mode = PCI_INTX_ENABLED;
459     route.irq = sphb->lsi_table[pin].irq;
460 
461     return route;
462 }
463 
464 /*
465  * MSI/MSIX memory region implementation.
466  * The handler handles both MSI and MSIX.
467  * For MSI-X, the vector number is encoded as a part of the address,
468  * data is set to 0.
469  * For MSI, the vector number is encoded in least bits in data.
470  */
471 static void spapr_msi_write(void *opaque, hwaddr addr,
472                             uint64_t data, unsigned size)
473 {
474     uint32_t irq = data;
475 
476     trace_spapr_pci_msi_write(addr, data, irq);
477 
478     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
479 }
480 
481 static const MemoryRegionOps spapr_msi_ops = {
482     /* There is no .read as the read result is undefined by PCI spec */
483     .read = NULL,
484     .write = spapr_msi_write,
485     .endianness = DEVICE_LITTLE_ENDIAN
486 };
487 
488 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
489 {
490     uint64_t window_size = 4096;
491 
492     /*
493      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
494      * we need to allocate some memory to catch those writes coming
495      * from msi_notify()/msix_notify().
496      * As MSIMessage:addr is going to be the same and MSIMessage:data
497      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
498      * be used.
499      *
500      * For KVM we want to ensure that this memory is a full page so that
501      * our memory slot is of page size granularity.
502      */
503 #ifdef CONFIG_KVM
504     if (kvm_enabled()) {
505         window_size = getpagesize();
506     }
507 #endif
508 
509     spapr->msi_win_addr = addr;
510     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
511                           "msi", window_size);
512     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
513                                 &spapr->msiwindow);
514 }
515 
516 /*
517  * PHB PCI device
518  */
519 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
520 {
521     sPAPRPHBState *phb = opaque;
522 
523     return &phb->iommu_as;
524 }
525 
526 static void spapr_phb_realize(DeviceState *dev, Error **errp)
527 {
528     SysBusDevice *s = SYS_BUS_DEVICE(dev);
529     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
530     PCIHostState *phb = PCI_HOST_BRIDGE(s);
531     sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
532     char *namebuf;
533     int i;
534     PCIBus *bus;
535 
536     if (sphb->index != -1) {
537         hwaddr windows_base;
538 
539         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
540             || (sphb->mem_win_addr != -1)
541             || (sphb->io_win_addr != -1)) {
542             error_setg(errp, "Either \"index\" or other parameters must"
543                        " be specified for PAPR PHB, not both");
544             return;
545         }
546 
547         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
548         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
549 
550         windows_base = SPAPR_PCI_WINDOW_BASE
551             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
552         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
553         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
554     }
555 
556     if (sphb->buid == -1) {
557         error_setg(errp, "BUID not specified for PHB");
558         return;
559     }
560 
561     if (sphb->dma_liobn == -1) {
562         error_setg(errp, "LIOBN not specified for PHB");
563         return;
564     }
565 
566     if (sphb->mem_win_addr == -1) {
567         error_setg(errp, "Memory window address not specified for PHB");
568         return;
569     }
570 
571     if (sphb->io_win_addr == -1) {
572         error_setg(errp, "IO window address not specified for PHB");
573         return;
574     }
575 
576     if (find_phb(spapr, sphb->buid)) {
577         error_setg(errp, "PCI host bridges must have unique BUIDs");
578         return;
579     }
580 
581     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
582 
583     namebuf = alloca(strlen(sphb->dtbusname) + 32);
584 
585     /* Initialize memory regions */
586     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
587     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
588 
589     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
590     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
591                              namebuf, &sphb->memspace,
592                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
593     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
594                                 &sphb->memwindow);
595 
596     /* Initialize IO regions */
597     sprintf(namebuf, "%s.io", sphb->dtbusname);
598     memory_region_init(&sphb->iospace, OBJECT(sphb),
599                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
600 
601     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
602     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
603                              &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
604     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
605                                 &sphb->iowindow);
606 
607     bus = pci_register_bus(dev, NULL,
608                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
609                            &sphb->memspace, &sphb->iospace,
610                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
611     phb->bus = bus;
612 
613     /*
614      * Initialize PHB address space.
615      * By default there will be at least one subregion for default
616      * 32bit DMA window.
617      * Later the guest might want to create another DMA window
618      * which will become another memory subregion.
619      */
620     sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
621 
622     memory_region_init(&sphb->iommu_root, OBJECT(sphb),
623                        namebuf, UINT64_MAX);
624     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
625                        sphb->dtbusname);
626 
627     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
628 
629     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
630 
631     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
632 
633     /* Initialize the LSI table */
634     for (i = 0; i < PCI_NUM_PINS; i++) {
635         uint32_t irq;
636 
637         irq = spapr_allocate_lsi(0);
638         if (!irq) {
639             error_setg(errp, "spapr_allocate_lsi failed");
640             return;
641         }
642 
643         sphb->lsi_table[i].irq = irq;
644     }
645 
646     if (!info->finish_realize) {
647         error_setg(errp, "finish_realize not defined");
648         return;
649     }
650 
651     info->finish_realize(sphb, errp);
652 }
653 
654 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
655 {
656     sphb->dma_window_start = 0;
657     sphb->dma_window_size = 0x40000000;
658     sphb->tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
659                                      sphb->dma_window_size);
660     if (!sphb->tcet) {
661         error_setg(errp, "Unable to create TCE table for %s",
662                    sphb->dtbusname);
663         return ;
664     }
665 
666     /* Register default 32bit DMA window */
667     memory_region_add_subregion(&sphb->iommu_root, 0,
668                                 spapr_tce_get_iommu(sphb->tcet));
669 }
670 
671 static void spapr_phb_reset(DeviceState *qdev)
672 {
673     SysBusDevice *s = SYS_BUS_DEVICE(qdev);
674     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
675 
676     /* Reset the IOMMU state */
677     device_reset(DEVICE(sphb->tcet));
678 }
679 
680 static Property spapr_phb_properties[] = {
681     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
682     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
683     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
684     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
685     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
686                        SPAPR_PCI_MMIO_WIN_SIZE),
687     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
688     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
689                        SPAPR_PCI_IO_WIN_SIZE),
690     DEFINE_PROP_END_OF_LIST(),
691 };
692 
693 static const VMStateDescription vmstate_spapr_pci_lsi = {
694     .name = "spapr_pci/lsi",
695     .version_id = 1,
696     .minimum_version_id = 1,
697     .fields = (VMStateField[]) {
698         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
699 
700         VMSTATE_END_OF_LIST()
701     },
702 };
703 
704 static const VMStateDescription vmstate_spapr_pci_msi = {
705     .name = "spapr_pci/lsi",
706     .version_id = 1,
707     .minimum_version_id = 1,
708     .fields = (VMStateField[]) {
709         VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
710         VMSTATE_UINT32(irq, struct spapr_pci_msi),
711         VMSTATE_UINT32(nvec, struct spapr_pci_msi),
712 
713         VMSTATE_END_OF_LIST()
714     },
715 };
716 
717 static const VMStateDescription vmstate_spapr_pci = {
718     .name = "spapr_pci",
719     .version_id = 1,
720     .minimum_version_id = 1,
721     .fields = (VMStateField[]) {
722         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
723         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
724         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
725         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
726         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
727         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
728         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
729                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
730         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
731                              vmstate_spapr_pci_msi, struct spapr_pci_msi),
732 
733         VMSTATE_END_OF_LIST()
734     },
735 };
736 
737 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
738                                            PCIBus *rootbus)
739 {
740     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
741 
742     return sphb->dtbusname;
743 }
744 
745 static void spapr_phb_class_init(ObjectClass *klass, void *data)
746 {
747     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
748     DeviceClass *dc = DEVICE_CLASS(klass);
749     sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
750 
751     hc->root_bus_path = spapr_phb_root_bus_path;
752     dc->realize = spapr_phb_realize;
753     dc->props = spapr_phb_properties;
754     dc->reset = spapr_phb_reset;
755     dc->vmsd = &vmstate_spapr_pci;
756     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
757     dc->cannot_instantiate_with_device_add_yet = false;
758     spc->finish_realize = spapr_phb_finish_realize;
759 }
760 
761 static const TypeInfo spapr_phb_info = {
762     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
763     .parent        = TYPE_PCI_HOST_BRIDGE,
764     .instance_size = sizeof(sPAPRPHBState),
765     .class_init    = spapr_phb_class_init,
766     .class_size    = sizeof(sPAPRPHBClass),
767 };
768 
769 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
770 {
771     DeviceState *dev;
772 
773     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
774     qdev_prop_set_uint32(dev, "index", index);
775     qdev_init_nofail(dev);
776 
777     return PCI_HOST_BRIDGE(dev);
778 }
779 
780 /* Macros to operate with address in OF binding to PCI */
781 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
782 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
783 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
784 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
785 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
786 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
787 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
788 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
789 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
790 
791 int spapr_populate_pci_dt(sPAPRPHBState *phb,
792                           uint32_t xics_phandle,
793                           void *fdt)
794 {
795     int bus_off, i, j;
796     char nodename[256];
797     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
798     struct {
799         uint32_t hi;
800         uint64_t child;
801         uint64_t parent;
802         uint64_t size;
803     } QEMU_PACKED ranges[] = {
804         {
805             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
806             cpu_to_be64(phb->io_win_addr),
807             cpu_to_be64(memory_region_size(&phb->iospace)),
808         },
809         {
810             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
811             cpu_to_be64(phb->mem_win_addr),
812             cpu_to_be64(memory_region_size(&phb->memwindow)),
813         },
814     };
815     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
816     uint32_t interrupt_map_mask[] = {
817         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
818     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
819 
820     /* Start populating the FDT */
821     sprintf(nodename, "pci@%" PRIx64, phb->buid);
822     bus_off = fdt_add_subnode(fdt, 0, nodename);
823     if (bus_off < 0) {
824         return bus_off;
825     }
826 
827 #define _FDT(exp) \
828     do { \
829         int ret = (exp);                                           \
830         if (ret < 0) {                                             \
831             return ret;                                            \
832         }                                                          \
833     } while (0)
834 
835     /* Write PHB properties */
836     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
837     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
838     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
839     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
840     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
841     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
842     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
843     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
844     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
845     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
846 
847     /* Build the interrupt-map, this must matches what is done
848      * in pci_spapr_map_irq
849      */
850     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
851                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
852     for (i = 0; i < PCI_SLOT_MAX; i++) {
853         for (j = 0; j < PCI_NUM_PINS; j++) {
854             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
855             int lsi_num = pci_spapr_swizzle(i, j);
856 
857             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
858             irqmap[1] = 0;
859             irqmap[2] = 0;
860             irqmap[3] = cpu_to_be32(j+1);
861             irqmap[4] = cpu_to_be32(xics_phandle);
862             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
863             irqmap[6] = cpu_to_be32(0x8);
864         }
865     }
866     /* Write interrupt map */
867     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
868                      sizeof(interrupt_map)));
869 
870     spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
871                  phb->dma_liobn, phb->dma_window_start,
872                  phb->dma_window_size);
873 
874     return 0;
875 }
876 
877 void spapr_pci_rtas_init(void)
878 {
879     spapr_rtas_register("read-pci-config", rtas_read_pci_config);
880     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
881     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
882     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
883     if (msi_supported) {
884         spapr_rtas_register("ibm,query-interrupt-source-number",
885                             rtas_ibm_query_interrupt_source_number);
886         spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
887     }
888 }
889 
890 static void spapr_pci_register_types(void)
891 {
892     type_register_static(&spapr_phb_info);
893 }
894 
895 type_init(spapr_pci_register_types)
896