1 /* 2 * QEMU sPAPR PCI host originated from Uninorth PCI host 3 * 4 * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. 5 * Copyright (C) 2011 David Gibson, IBM Corporation. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 #include "hw/hw.h" 26 #include "hw/pci/pci.h" 27 #include "hw/pci/msi.h" 28 #include "hw/pci/msix.h" 29 #include "hw/pci/pci_host.h" 30 #include "hw/ppc/spapr.h" 31 #include "hw/pci-host/spapr.h" 32 #include "exec/address-spaces.h" 33 #include <libfdt.h> 34 #include "trace.h" 35 #include "qemu/error-report.h" 36 #include "qapi/qmp/qerror.h" 37 38 #include "hw/pci/pci_bus.h" 39 #include "hw/ppc/spapr_drc.h" 40 #include "sysemu/device_tree.h" 41 42 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ 43 #define RTAS_QUERY_FN 0 44 #define RTAS_CHANGE_FN 1 45 #define RTAS_RESET_FN 2 46 #define RTAS_CHANGE_MSI_FN 3 47 #define RTAS_CHANGE_MSIX_FN 4 48 49 /* Interrupt types to return on RTAS_CHANGE_* */ 50 #define RTAS_TYPE_MSI 1 51 #define RTAS_TYPE_MSIX 2 52 53 #define _FDT(exp) \ 54 do { \ 55 int ret = (exp); \ 56 if (ret < 0) { \ 57 return ret; \ 58 } \ 59 } while (0) 60 61 sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) 62 { 63 sPAPRPHBState *sphb; 64 65 QLIST_FOREACH(sphb, &spapr->phbs, list) { 66 if (sphb->buid != buid) { 67 continue; 68 } 69 return sphb; 70 } 71 72 return NULL; 73 } 74 75 PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, 76 uint32_t config_addr) 77 { 78 sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); 79 PCIHostState *phb = PCI_HOST_BRIDGE(sphb); 80 int bus_num = (config_addr >> 16) & 0xFF; 81 int devfn = (config_addr >> 8) & 0xFF; 82 83 if (!phb) { 84 return NULL; 85 } 86 87 return pci_find_device(phb->bus, bus_num, devfn); 88 } 89 90 static uint32_t rtas_pci_cfgaddr(uint32_t arg) 91 { 92 /* This handles the encoding of extended config space addresses */ 93 return ((arg >> 20) & 0xf00) | (arg & 0xff); 94 } 95 96 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, 97 uint32_t addr, uint32_t size, 98 target_ulong rets) 99 { 100 PCIDevice *pci_dev; 101 uint32_t val; 102 103 if ((size != 1) && (size != 2) && (size != 4)) { 104 /* access must be 1, 2 or 4 bytes */ 105 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 106 return; 107 } 108 109 pci_dev = spapr_pci_find_dev(spapr, buid, addr); 110 addr = rtas_pci_cfgaddr(addr); 111 112 if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { 113 /* Access must be to a valid device, within bounds and 114 * naturally aligned */ 115 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 116 return; 117 } 118 119 val = pci_host_config_read_common(pci_dev, addr, 120 pci_config_size(pci_dev), size); 121 122 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 123 rtas_st(rets, 1, val); 124 } 125 126 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, 127 uint32_t token, uint32_t nargs, 128 target_ulong args, 129 uint32_t nret, target_ulong rets) 130 { 131 uint64_t buid; 132 uint32_t size, addr; 133 134 if ((nargs != 4) || (nret != 2)) { 135 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 136 return; 137 } 138 139 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 140 size = rtas_ld(args, 3); 141 addr = rtas_ld(args, 0); 142 143 finish_read_pci_config(spapr, buid, addr, size, rets); 144 } 145 146 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, 147 uint32_t token, uint32_t nargs, 148 target_ulong args, 149 uint32_t nret, target_ulong rets) 150 { 151 uint32_t size, addr; 152 153 if ((nargs != 2) || (nret != 2)) { 154 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 155 return; 156 } 157 158 size = rtas_ld(args, 1); 159 addr = rtas_ld(args, 0); 160 161 finish_read_pci_config(spapr, 0, addr, size, rets); 162 } 163 164 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, 165 uint32_t addr, uint32_t size, 166 uint32_t val, target_ulong rets) 167 { 168 PCIDevice *pci_dev; 169 170 if ((size != 1) && (size != 2) && (size != 4)) { 171 /* access must be 1, 2 or 4 bytes */ 172 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 173 return; 174 } 175 176 pci_dev = spapr_pci_find_dev(spapr, buid, addr); 177 addr = rtas_pci_cfgaddr(addr); 178 179 if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { 180 /* Access must be to a valid device, within bounds and 181 * naturally aligned */ 182 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 183 return; 184 } 185 186 pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev), 187 val, size); 188 189 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 190 } 191 192 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, 193 uint32_t token, uint32_t nargs, 194 target_ulong args, 195 uint32_t nret, target_ulong rets) 196 { 197 uint64_t buid; 198 uint32_t val, size, addr; 199 200 if ((nargs != 5) || (nret != 1)) { 201 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 202 return; 203 } 204 205 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 206 val = rtas_ld(args, 4); 207 size = rtas_ld(args, 3); 208 addr = rtas_ld(args, 0); 209 210 finish_write_pci_config(spapr, buid, addr, size, val, rets); 211 } 212 213 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, 214 uint32_t token, uint32_t nargs, 215 target_ulong args, 216 uint32_t nret, target_ulong rets) 217 { 218 uint32_t val, size, addr; 219 220 if ((nargs != 3) || (nret != 1)) { 221 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 222 return; 223 } 224 225 226 val = rtas_ld(args, 2); 227 size = rtas_ld(args, 1); 228 addr = rtas_ld(args, 0); 229 230 finish_write_pci_config(spapr, 0, addr, size, val, rets); 231 } 232 233 /* 234 * Set MSI/MSIX message data. 235 * This is required for msi_notify()/msix_notify() which 236 * will write at the addresses via spapr_msi_write(). 237 * 238 * If hwaddr == 0, all entries will have .data == first_irq i.e. 239 * table will be reset. 240 */ 241 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, 242 unsigned first_irq, unsigned req_num) 243 { 244 unsigned i; 245 MSIMessage msg = { .address = addr, .data = first_irq }; 246 247 if (!msix) { 248 msi_set_message(pdev, msg); 249 trace_spapr_pci_msi_setup(pdev->name, 0, msg.address); 250 return; 251 } 252 253 for (i = 0; i < req_num; ++i) { 254 msix_set_message(pdev, i, msg); 255 trace_spapr_pci_msi_setup(pdev->name, i, msg.address); 256 if (addr) { 257 ++msg.data; 258 } 259 } 260 } 261 262 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, 263 uint32_t token, uint32_t nargs, 264 target_ulong args, uint32_t nret, 265 target_ulong rets) 266 { 267 uint32_t config_addr = rtas_ld(args, 0); 268 uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 269 unsigned int func = rtas_ld(args, 3); 270 unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ 271 unsigned int seq_num = rtas_ld(args, 5); 272 unsigned int ret_intr_type; 273 unsigned int irq, max_irqs = 0, num = 0; 274 sPAPRPHBState *phb = NULL; 275 PCIDevice *pdev = NULL; 276 spapr_pci_msi *msi; 277 int *config_addr_key; 278 279 switch (func) { 280 case RTAS_CHANGE_MSI_FN: 281 case RTAS_CHANGE_FN: 282 ret_intr_type = RTAS_TYPE_MSI; 283 break; 284 case RTAS_CHANGE_MSIX_FN: 285 ret_intr_type = RTAS_TYPE_MSIX; 286 break; 287 default: 288 error_report("rtas_ibm_change_msi(%u) is not implemented", func); 289 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 290 return; 291 } 292 293 /* Fins sPAPRPHBState */ 294 phb = spapr_pci_find_phb(spapr, buid); 295 if (phb) { 296 pdev = spapr_pci_find_dev(spapr, buid, config_addr); 297 } 298 if (!phb || !pdev) { 299 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 300 return; 301 } 302 303 /* Releasing MSIs */ 304 if (!req_num) { 305 msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); 306 if (!msi) { 307 trace_spapr_pci_msi("Releasing wrong config", config_addr); 308 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 309 return; 310 } 311 312 xics_free(spapr->icp, msi->first_irq, msi->num); 313 if (msi_present(pdev)) { 314 spapr_msi_setmsg(pdev, 0, false, 0, num); 315 } 316 if (msix_present(pdev)) { 317 spapr_msi_setmsg(pdev, 0, true, 0, num); 318 } 319 g_hash_table_remove(phb->msi, &config_addr); 320 321 trace_spapr_pci_msi("Released MSIs", config_addr); 322 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 323 rtas_st(rets, 1, 0); 324 return; 325 } 326 327 /* Enabling MSI */ 328 329 /* Check if the device supports as many IRQs as requested */ 330 if (ret_intr_type == RTAS_TYPE_MSI) { 331 max_irqs = msi_nr_vectors_allocated(pdev); 332 } else if (ret_intr_type == RTAS_TYPE_MSIX) { 333 max_irqs = pdev->msix_entries_nr; 334 } 335 if (!max_irqs) { 336 error_report("Requested interrupt type %d is not enabled for device %x", 337 ret_intr_type, config_addr); 338 rtas_st(rets, 0, -1); /* Hardware error */ 339 return; 340 } 341 /* Correct the number if the guest asked for too many */ 342 if (req_num > max_irqs) { 343 trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs); 344 req_num = max_irqs; 345 irq = 0; /* to avoid misleading trace */ 346 goto out; 347 } 348 349 /* Allocate MSIs */ 350 irq = xics_alloc_block(spapr->icp, 0, req_num, false, 351 ret_intr_type == RTAS_TYPE_MSI); 352 if (!irq) { 353 error_report("Cannot allocate MSIs for device %x", config_addr); 354 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 355 return; 356 } 357 358 /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ 359 spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX, 360 irq, req_num); 361 362 /* Add MSI device to cache */ 363 msi = g_new(spapr_pci_msi, 1); 364 msi->first_irq = irq; 365 msi->num = req_num; 366 config_addr_key = g_new(int, 1); 367 *config_addr_key = config_addr; 368 g_hash_table_insert(phb->msi, config_addr_key, msi); 369 370 out: 371 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 372 rtas_st(rets, 1, req_num); 373 rtas_st(rets, 2, ++seq_num); 374 rtas_st(rets, 3, ret_intr_type); 375 376 trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq); 377 } 378 379 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, 380 sPAPREnvironment *spapr, 381 uint32_t token, 382 uint32_t nargs, 383 target_ulong args, 384 uint32_t nret, 385 target_ulong rets) 386 { 387 uint32_t config_addr = rtas_ld(args, 0); 388 uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 389 unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); 390 sPAPRPHBState *phb = NULL; 391 PCIDevice *pdev = NULL; 392 spapr_pci_msi *msi; 393 394 /* Find sPAPRPHBState */ 395 phb = spapr_pci_find_phb(spapr, buid); 396 if (phb) { 397 pdev = spapr_pci_find_dev(spapr, buid, config_addr); 398 } 399 if (!phb || !pdev) { 400 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 401 return; 402 } 403 404 /* Find device descriptor and start IRQ */ 405 msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); 406 if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { 407 trace_spapr_pci_msi("Failed to return vector", config_addr); 408 rtas_st(rets, 0, RTAS_OUT_HW_ERROR); 409 return; 410 } 411 intr_src_num = msi->first_irq + ioa_intr_num; 412 trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num, 413 intr_src_num); 414 415 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 416 rtas_st(rets, 1, intr_src_num); 417 rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */ 418 } 419 420 static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, 421 sPAPREnvironment *spapr, 422 uint32_t token, uint32_t nargs, 423 target_ulong args, uint32_t nret, 424 target_ulong rets) 425 { 426 sPAPRPHBState *sphb; 427 sPAPRPHBClass *spc; 428 uint32_t addr, option; 429 uint64_t buid; 430 int ret; 431 432 if ((nargs != 4) || (nret != 1)) { 433 goto param_error_exit; 434 } 435 436 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 437 addr = rtas_ld(args, 0); 438 option = rtas_ld(args, 3); 439 440 sphb = spapr_pci_find_phb(spapr, buid); 441 if (!sphb) { 442 goto param_error_exit; 443 } 444 445 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 446 if (!spc->eeh_set_option) { 447 goto param_error_exit; 448 } 449 450 ret = spc->eeh_set_option(sphb, addr, option); 451 rtas_st(rets, 0, ret); 452 return; 453 454 param_error_exit: 455 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 456 } 457 458 static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, 459 sPAPREnvironment *spapr, 460 uint32_t token, uint32_t nargs, 461 target_ulong args, uint32_t nret, 462 target_ulong rets) 463 { 464 sPAPRPHBState *sphb; 465 sPAPRPHBClass *spc; 466 PCIDevice *pdev; 467 uint32_t addr, option; 468 uint64_t buid; 469 470 if ((nargs != 4) || (nret != 2)) { 471 goto param_error_exit; 472 } 473 474 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 475 sphb = spapr_pci_find_phb(spapr, buid); 476 if (!sphb) { 477 goto param_error_exit; 478 } 479 480 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 481 if (!spc->eeh_set_option) { 482 goto param_error_exit; 483 } 484 485 /* 486 * We always have PE address of form "00BB0001". "BB" 487 * represents the bus number of PE's primary bus. 488 */ 489 option = rtas_ld(args, 3); 490 switch (option) { 491 case RTAS_GET_PE_ADDR: 492 addr = rtas_ld(args, 0); 493 pdev = spapr_pci_find_dev(spapr, buid, addr); 494 if (!pdev) { 495 goto param_error_exit; 496 } 497 498 rtas_st(rets, 1, (pci_bus_num(pdev->bus) << 16) + 1); 499 break; 500 case RTAS_GET_PE_MODE: 501 rtas_st(rets, 1, RTAS_PE_MODE_SHARED); 502 break; 503 default: 504 goto param_error_exit; 505 } 506 507 rtas_st(rets, 0, RTAS_OUT_SUCCESS); 508 return; 509 510 param_error_exit: 511 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 512 } 513 514 static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, 515 sPAPREnvironment *spapr, 516 uint32_t token, uint32_t nargs, 517 target_ulong args, uint32_t nret, 518 target_ulong rets) 519 { 520 sPAPRPHBState *sphb; 521 sPAPRPHBClass *spc; 522 uint64_t buid; 523 int state, ret; 524 525 if ((nargs != 3) || (nret != 4 && nret != 5)) { 526 goto param_error_exit; 527 } 528 529 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 530 sphb = spapr_pci_find_phb(spapr, buid); 531 if (!sphb) { 532 goto param_error_exit; 533 } 534 535 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 536 if (!spc->eeh_get_state) { 537 goto param_error_exit; 538 } 539 540 ret = spc->eeh_get_state(sphb, &state); 541 rtas_st(rets, 0, ret); 542 if (ret != RTAS_OUT_SUCCESS) { 543 return; 544 } 545 546 rtas_st(rets, 1, state); 547 rtas_st(rets, 2, RTAS_EEH_SUPPORT); 548 rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO); 549 if (nret >= 5) { 550 rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO); 551 } 552 return; 553 554 param_error_exit: 555 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 556 } 557 558 static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, 559 sPAPREnvironment *spapr, 560 uint32_t token, uint32_t nargs, 561 target_ulong args, uint32_t nret, 562 target_ulong rets) 563 { 564 sPAPRPHBState *sphb; 565 sPAPRPHBClass *spc; 566 uint32_t option; 567 uint64_t buid; 568 int ret; 569 570 if ((nargs != 4) || (nret != 1)) { 571 goto param_error_exit; 572 } 573 574 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 575 option = rtas_ld(args, 3); 576 sphb = spapr_pci_find_phb(spapr, buid); 577 if (!sphb) { 578 goto param_error_exit; 579 } 580 581 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 582 if (!spc->eeh_reset) { 583 goto param_error_exit; 584 } 585 586 ret = spc->eeh_reset(sphb, option); 587 rtas_st(rets, 0, ret); 588 return; 589 590 param_error_exit: 591 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 592 } 593 594 static void rtas_ibm_configure_pe(PowerPCCPU *cpu, 595 sPAPREnvironment *spapr, 596 uint32_t token, uint32_t nargs, 597 target_ulong args, uint32_t nret, 598 target_ulong rets) 599 { 600 sPAPRPHBState *sphb; 601 sPAPRPHBClass *spc; 602 uint64_t buid; 603 int ret; 604 605 if ((nargs != 3) || (nret != 1)) { 606 goto param_error_exit; 607 } 608 609 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 610 sphb = spapr_pci_find_phb(spapr, buid); 611 if (!sphb) { 612 goto param_error_exit; 613 } 614 615 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 616 if (!spc->eeh_configure) { 617 goto param_error_exit; 618 } 619 620 ret = spc->eeh_configure(sphb); 621 rtas_st(rets, 0, ret); 622 return; 623 624 param_error_exit: 625 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 626 } 627 628 /* To support it later */ 629 static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, 630 sPAPREnvironment *spapr, 631 uint32_t token, uint32_t nargs, 632 target_ulong args, uint32_t nret, 633 target_ulong rets) 634 { 635 sPAPRPHBState *sphb; 636 sPAPRPHBClass *spc; 637 int option; 638 uint64_t buid; 639 640 if ((nargs != 8) || (nret != 1)) { 641 goto param_error_exit; 642 } 643 644 buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); 645 sphb = spapr_pci_find_phb(spapr, buid); 646 if (!sphb) { 647 goto param_error_exit; 648 } 649 650 spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); 651 if (!spc->eeh_set_option) { 652 goto param_error_exit; 653 } 654 655 option = rtas_ld(args, 7); 656 switch (option) { 657 case RTAS_SLOT_TEMP_ERR_LOG: 658 case RTAS_SLOT_PERM_ERR_LOG: 659 break; 660 default: 661 goto param_error_exit; 662 } 663 664 /* We don't have error log yet */ 665 rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); 666 return; 667 668 param_error_exit: 669 rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); 670 } 671 672 static int pci_spapr_swizzle(int slot, int pin) 673 { 674 return (slot + pin) % PCI_NUM_PINS; 675 } 676 677 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) 678 { 679 /* 680 * Here we need to convert pci_dev + irq_num to some unique value 681 * which is less than number of IRQs on the specific bus (4). We 682 * use standard PCI swizzling, that is (slot number + pin number) 683 * % 4. 684 */ 685 return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num); 686 } 687 688 static void pci_spapr_set_irq(void *opaque, int irq_num, int level) 689 { 690 /* 691 * Here we use the number returned by pci_spapr_map_irq to find a 692 * corresponding qemu_irq. 693 */ 694 sPAPRPHBState *phb = opaque; 695 696 trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq); 697 qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level); 698 } 699 700 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) 701 { 702 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque); 703 PCIINTxRoute route; 704 705 route.mode = PCI_INTX_ENABLED; 706 route.irq = sphb->lsi_table[pin].irq; 707 708 return route; 709 } 710 711 /* 712 * MSI/MSIX memory region implementation. 713 * The handler handles both MSI and MSIX. 714 * For MSI-X, the vector number is encoded as a part of the address, 715 * data is set to 0. 716 * For MSI, the vector number is encoded in least bits in data. 717 */ 718 static void spapr_msi_write(void *opaque, hwaddr addr, 719 uint64_t data, unsigned size) 720 { 721 uint32_t irq = data; 722 723 trace_spapr_pci_msi_write(addr, data, irq); 724 725 qemu_irq_pulse(xics_get_qirq(spapr->icp, irq)); 726 } 727 728 static const MemoryRegionOps spapr_msi_ops = { 729 /* There is no .read as the read result is undefined by PCI spec */ 730 .read = NULL, 731 .write = spapr_msi_write, 732 .endianness = DEVICE_LITTLE_ENDIAN 733 }; 734 735 /* 736 * PHB PCI device 737 */ 738 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) 739 { 740 sPAPRPHBState *phb = opaque; 741 742 return &phb->iommu_as; 743 } 744 745 /* Macros to operate with address in OF binding to PCI */ 746 #define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) 747 #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ 748 #define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ 749 #define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ 750 #define b_ss(x) b_x((x), 24, 2) /* the space code */ 751 #define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ 752 #define b_ddddd(x) b_x((x), 11, 5) /* device number */ 753 #define b_fff(x) b_x((x), 8, 3) /* function number */ 754 #define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ 755 756 /* for 'reg'/'assigned-addresses' OF properties */ 757 #define RESOURCE_CELLS_SIZE 2 758 #define RESOURCE_CELLS_ADDRESS 3 759 760 typedef struct ResourceFields { 761 uint32_t phys_hi; 762 uint32_t phys_mid; 763 uint32_t phys_lo; 764 uint32_t size_hi; 765 uint32_t size_lo; 766 } QEMU_PACKED ResourceFields; 767 768 typedef struct ResourceProps { 769 ResourceFields reg[8]; 770 ResourceFields assigned[7]; 771 uint32_t reg_len; 772 uint32_t assigned_len; 773 } ResourceProps; 774 775 /* fill in the 'reg'/'assigned-resources' OF properties for 776 * a PCI device. 'reg' describes resource requirements for a 777 * device's IO/MEM regions, 'assigned-addresses' describes the 778 * actual resource assignments. 779 * 780 * the properties are arrays of ('phys-addr', 'size') pairs describing 781 * the addressable regions of the PCI device, where 'phys-addr' is a 782 * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to 783 * (phys.hi, phys.mid, phys.lo), and 'size' is a 784 * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo). 785 * 786 * phys.hi = 0xYYXXXXZZ, where: 787 * 0xYY = npt000ss 788 * ||| | 789 * ||| +-- space code: 1 if IO region, 2 if MEM region 790 * ||+------ for non-relocatable IO: 1 if aliased 791 * || for relocatable IO: 1 if below 64KB 792 * || for MEM: 1 if below 1MB 793 * |+------- 1 if region is prefetchable 794 * +-------- 1 if region is non-relocatable 795 * 0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function 796 * bits respectively 797 * 0xZZ = rrrrrrrr, the register number of the BAR corresponding 798 * to the region 799 * 800 * phys.mid and phys.lo correspond respectively to the hi/lo portions 801 * of the actual address of the region. 802 * 803 * how the phys-addr/size values are used differ slightly between 804 * 'reg' and 'assigned-addresses' properties. namely, 'reg' has 805 * an additional description for the config space region of the 806 * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0 807 * to describe the region as relocatable, with an address-mapping 808 * that corresponds directly to the PHB's address space for the 809 * resource. 'assigned-addresses' always has n=1 set with an absolute 810 * address assigned for the resource. in general, 'assigned-addresses' 811 * won't be populated, since addresses for PCI devices are generally 812 * unmapped initially and left to the guest to assign. 813 * 814 * note also that addresses defined in these properties are, at least 815 * for PAPR guests, relative to the PHBs IO/MEM windows, and 816 * correspond directly to the addresses in the BARs. 817 * 818 * in accordance with PCI Bus Binding to Open Firmware, 819 * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7, 820 * Appendix C. 821 */ 822 static void populate_resource_props(PCIDevice *d, ResourceProps *rp) 823 { 824 int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d)))); 825 uint32_t dev_id = (b_bbbbbbbb(bus_num) | 826 b_ddddd(PCI_SLOT(d->devfn)) | 827 b_fff(PCI_FUNC(d->devfn))); 828 ResourceFields *reg, *assigned; 829 int i, reg_idx = 0, assigned_idx = 0; 830 831 /* config space region */ 832 reg = &rp->reg[reg_idx++]; 833 reg->phys_hi = cpu_to_be32(dev_id); 834 reg->phys_mid = 0; 835 reg->phys_lo = 0; 836 reg->size_hi = 0; 837 reg->size_lo = 0; 838 839 for (i = 0; i < PCI_NUM_REGIONS; i++) { 840 if (!d->io_regions[i].size) { 841 continue; 842 } 843 844 reg = &rp->reg[reg_idx++]; 845 846 reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); 847 if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { 848 reg->phys_hi |= cpu_to_be32(b_ss(1)); 849 } else { 850 reg->phys_hi |= cpu_to_be32(b_ss(2)); 851 } 852 reg->phys_mid = 0; 853 reg->phys_lo = 0; 854 reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32); 855 reg->size_lo = cpu_to_be32(d->io_regions[i].size); 856 857 if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) { 858 continue; 859 } 860 861 assigned = &rp->assigned[assigned_idx++]; 862 assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1)); 863 assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32); 864 assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr); 865 assigned->size_hi = reg->size_hi; 866 assigned->size_lo = reg->size_lo; 867 } 868 869 rp->reg_len = reg_idx * sizeof(ResourceFields); 870 rp->assigned_len = assigned_idx * sizeof(ResourceFields); 871 } 872 873 static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, 874 int phb_index, int drc_index, 875 const char *drc_name) 876 { 877 ResourceProps rp; 878 bool is_bridge = false; 879 int pci_status; 880 881 if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == 882 PCI_HEADER_TYPE_BRIDGE) { 883 is_bridge = true; 884 } 885 886 /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ 887 _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", 888 pci_default_read_config(dev, PCI_VENDOR_ID, 2))); 889 _FDT(fdt_setprop_cell(fdt, offset, "device-id", 890 pci_default_read_config(dev, PCI_DEVICE_ID, 2))); 891 _FDT(fdt_setprop_cell(fdt, offset, "revision-id", 892 pci_default_read_config(dev, PCI_REVISION_ID, 1))); 893 _FDT(fdt_setprop_cell(fdt, offset, "class-code", 894 pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) 895 << 8)); 896 if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { 897 _FDT(fdt_setprop_cell(fdt, offset, "interrupts", 898 pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); 899 } 900 901 if (!is_bridge) { 902 _FDT(fdt_setprop_cell(fdt, offset, "min-grant", 903 pci_default_read_config(dev, PCI_MIN_GNT, 1))); 904 _FDT(fdt_setprop_cell(fdt, offset, "max-latency", 905 pci_default_read_config(dev, PCI_MAX_LAT, 1))); 906 } 907 908 if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) { 909 _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", 910 pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); 911 } 912 913 if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) { 914 _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", 915 pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); 916 } 917 918 _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", 919 pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); 920 921 /* the following fdt cells are masked off the pci status register */ 922 pci_status = pci_default_read_config(dev, PCI_STATUS, 2); 923 _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", 924 PCI_STATUS_DEVSEL_MASK & pci_status)); 925 926 if (pci_status & PCI_STATUS_FAST_BACK) { 927 _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0)); 928 } 929 if (pci_status & PCI_STATUS_66MHZ) { 930 _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0)); 931 } 932 if (pci_status & PCI_STATUS_UDF) { 933 _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); 934 } 935 936 /* NOTE: this is normally generated by firmware via path/unit name, 937 * but in our case we must set it manually since it does not get 938 * processed by OF beforehand 939 */ 940 _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); 941 _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); 942 _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); 943 944 _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 945 RESOURCE_CELLS_ADDRESS)); 946 _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 947 RESOURCE_CELLS_SIZE)); 948 _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", 949 RESOURCE_CELLS_SIZE)); 950 951 populate_resource_props(dev, &rp); 952 _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); 953 _FDT(fdt_setprop(fdt, offset, "assigned-addresses", 954 (uint8_t *)rp.assigned, rp.assigned_len)); 955 956 return 0; 957 } 958 959 /* create OF node for pci device and required OF DT properties */ 960 static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, 961 int drc_index, const char *drc_name, 962 int *dt_offset) 963 { 964 void *fdt; 965 int offset, ret, fdt_size; 966 int slot = PCI_SLOT(dev->devfn); 967 int func = PCI_FUNC(dev->devfn); 968 char nodename[512]; 969 970 fdt = create_device_tree(&fdt_size); 971 if (func != 0) { 972 sprintf(nodename, "pci@%d,%d", slot, func); 973 } else { 974 sprintf(nodename, "pci@%d", slot); 975 } 976 offset = fdt_add_subnode(fdt, 0, nodename); 977 ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, 978 drc_name); 979 g_assert(!ret); 980 981 *dt_offset = offset; 982 return fdt; 983 } 984 985 static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, 986 sPAPRPHBState *phb, 987 PCIDevice *pdev, 988 Error **errp) 989 { 990 sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); 991 DeviceState *dev = DEVICE(pdev); 992 int drc_index = drck->get_index(drc); 993 const char *drc_name = drck->get_name(drc); 994 void *fdt = NULL; 995 int fdt_start_offset = 0; 996 997 /* boot-time devices get their device tree node created by SLOF, but for 998 * hotplugged devices we need QEMU to generate it so the guest can fetch 999 * it via RTAS 1000 */ 1001 if (dev->hotplugged) { 1002 fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, 1003 &fdt_start_offset); 1004 } 1005 1006 drck->attach(drc, DEVICE(pdev), 1007 fdt, fdt_start_offset, !dev->hotplugged, errp); 1008 if (*errp) { 1009 g_free(fdt); 1010 } 1011 } 1012 1013 static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) 1014 { 1015 /* some version guests do not wait for completion of a device 1016 * cleanup (generally done asynchronously by the kernel) before 1017 * signaling to QEMU that the device is safe, but instead sleep 1018 * for some 'safe' period of time. unfortunately on a busy host 1019 * this sleep isn't guaranteed to be long enough, resulting in 1020 * bad things like IRQ lines being left asserted during final 1021 * device removal. to deal with this we call reset just prior 1022 * to finalizing the device, which will put the device back into 1023 * an 'idle' state, as the device cleanup code expects. 1024 */ 1025 pci_device_reset(PCI_DEVICE(dev)); 1026 object_unparent(OBJECT(dev)); 1027 } 1028 1029 static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, 1030 sPAPRPHBState *phb, 1031 PCIDevice *pdev, 1032 Error **errp) 1033 { 1034 sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); 1035 1036 drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); 1037 } 1038 1039 static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, 1040 PCIDevice *pdev) 1041 { 1042 uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); 1043 return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI, 1044 (phb->index << 16) | 1045 (busnr << 8) | 1046 pdev->devfn); 1047 } 1048 1049 static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, 1050 DeviceState *plugged_dev, Error **errp) 1051 { 1052 sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); 1053 PCIDevice *pdev = PCI_DEVICE(plugged_dev); 1054 sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); 1055 Error *local_err = NULL; 1056 1057 /* if DR is disabled we don't need to do anything in the case of 1058 * hotplug or coldplug callbacks 1059 */ 1060 if (!phb->dr_enabled) { 1061 /* if this is a hotplug operation initiated by the user 1062 * we need to let them know it's not enabled 1063 */ 1064 if (plugged_dev->hotplugged) { 1065 error_set(errp, QERR_BUS_NO_HOTPLUG, 1066 object_get_typename(OBJECT(phb))); 1067 } 1068 return; 1069 } 1070 1071 g_assert(drc); 1072 1073 spapr_phb_add_pci_device(drc, phb, pdev, &local_err); 1074 if (local_err) { 1075 error_propagate(errp, local_err); 1076 return; 1077 } 1078 } 1079 1080 static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, 1081 DeviceState *plugged_dev, Error **errp) 1082 { 1083 sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); 1084 PCIDevice *pdev = PCI_DEVICE(plugged_dev); 1085 sPAPRDRConnectorClass *drck; 1086 sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); 1087 Error *local_err = NULL; 1088 1089 if (!phb->dr_enabled) { 1090 error_set(errp, QERR_BUS_NO_HOTPLUG, 1091 object_get_typename(OBJECT(phb))); 1092 return; 1093 } 1094 1095 g_assert(drc); 1096 1097 drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); 1098 if (!drck->release_pending(drc)) { 1099 spapr_phb_remove_pci_device(drc, phb, pdev, &local_err); 1100 if (local_err) { 1101 error_propagate(errp, local_err); 1102 return; 1103 } 1104 } 1105 } 1106 1107 static void spapr_phb_realize(DeviceState *dev, Error **errp) 1108 { 1109 SysBusDevice *s = SYS_BUS_DEVICE(dev); 1110 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); 1111 PCIHostState *phb = PCI_HOST_BRIDGE(s); 1112 sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s); 1113 char *namebuf; 1114 int i; 1115 PCIBus *bus; 1116 uint64_t msi_window_size = 4096; 1117 1118 if (sphb->index != (uint32_t)-1) { 1119 hwaddr windows_base; 1120 1121 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1) 1122 || (sphb->mem_win_addr != (hwaddr)-1) 1123 || (sphb->io_win_addr != (hwaddr)-1)) { 1124 error_setg(errp, "Either \"index\" or other parameters must" 1125 " be specified for PAPR PHB, not both"); 1126 return; 1127 } 1128 1129 if (sphb->index > SPAPR_PCI_MAX_INDEX) { 1130 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", 1131 SPAPR_PCI_MAX_INDEX); 1132 return; 1133 } 1134 1135 sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index; 1136 sphb->dma_liobn = SPAPR_PCI_LIOBN(sphb->index, 0); 1137 1138 windows_base = SPAPR_PCI_WINDOW_BASE 1139 + sphb->index * SPAPR_PCI_WINDOW_SPACING; 1140 sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF; 1141 sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF; 1142 } 1143 1144 if (sphb->buid == (uint64_t)-1) { 1145 error_setg(errp, "BUID not specified for PHB"); 1146 return; 1147 } 1148 1149 if (sphb->dma_liobn == (uint32_t)-1) { 1150 error_setg(errp, "LIOBN not specified for PHB"); 1151 return; 1152 } 1153 1154 if (sphb->mem_win_addr == (hwaddr)-1) { 1155 error_setg(errp, "Memory window address not specified for PHB"); 1156 return; 1157 } 1158 1159 if (sphb->io_win_addr == (hwaddr)-1) { 1160 error_setg(errp, "IO window address not specified for PHB"); 1161 return; 1162 } 1163 1164 if (spapr_pci_find_phb(spapr, sphb->buid)) { 1165 error_setg(errp, "PCI host bridges must have unique BUIDs"); 1166 return; 1167 } 1168 1169 sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid); 1170 1171 namebuf = alloca(strlen(sphb->dtbusname) + 32); 1172 1173 /* Initialize memory regions */ 1174 sprintf(namebuf, "%s.mmio", sphb->dtbusname); 1175 memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX); 1176 1177 sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname); 1178 memory_region_init_alias(&sphb->memwindow, OBJECT(sphb), 1179 namebuf, &sphb->memspace, 1180 SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size); 1181 memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, 1182 &sphb->memwindow); 1183 1184 /* Initialize IO regions */ 1185 sprintf(namebuf, "%s.io", sphb->dtbusname); 1186 memory_region_init(&sphb->iospace, OBJECT(sphb), 1187 namebuf, SPAPR_PCI_IO_WIN_SIZE); 1188 1189 sprintf(namebuf, "%s.io-alias", sphb->dtbusname); 1190 memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf, 1191 &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE); 1192 memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, 1193 &sphb->iowindow); 1194 1195 bus = pci_register_bus(dev, NULL, 1196 pci_spapr_set_irq, pci_spapr_map_irq, sphb, 1197 &sphb->memspace, &sphb->iospace, 1198 PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); 1199 phb->bus = bus; 1200 qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL); 1201 1202 /* 1203 * Initialize PHB address space. 1204 * By default there will be at least one subregion for default 1205 * 32bit DMA window. 1206 * Later the guest might want to create another DMA window 1207 * which will become another memory subregion. 1208 */ 1209 sprintf(namebuf, "%s.iommu-root", sphb->dtbusname); 1210 1211 memory_region_init(&sphb->iommu_root, OBJECT(sphb), 1212 namebuf, UINT64_MAX); 1213 address_space_init(&sphb->iommu_as, &sphb->iommu_root, 1214 sphb->dtbusname); 1215 1216 /* 1217 * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, 1218 * we need to allocate some memory to catch those writes coming 1219 * from msi_notify()/msix_notify(). 1220 * As MSIMessage:addr is going to be the same and MSIMessage:data 1221 * is going to be a VIRQ number, 4 bytes of the MSI MR will only 1222 * be used. 1223 * 1224 * For KVM we want to ensure that this memory is a full page so that 1225 * our memory slot is of page size granularity. 1226 */ 1227 #ifdef CONFIG_KVM 1228 if (kvm_enabled()) { 1229 msi_window_size = getpagesize(); 1230 } 1231 #endif 1232 1233 memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr, 1234 "msi", msi_window_size); 1235 memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, 1236 &sphb->msiwindow); 1237 1238 pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); 1239 1240 pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); 1241 1242 QLIST_INSERT_HEAD(&spapr->phbs, sphb, list); 1243 1244 /* Initialize the LSI table */ 1245 for (i = 0; i < PCI_NUM_PINS; i++) { 1246 uint32_t irq; 1247 1248 irq = xics_alloc_block(spapr->icp, 0, 1, true, false); 1249 if (!irq) { 1250 error_setg(errp, "spapr_allocate_lsi failed"); 1251 return; 1252 } 1253 1254 sphb->lsi_table[i].irq = irq; 1255 } 1256 1257 /* allocate connectors for child PCI devices */ 1258 if (sphb->dr_enabled) { 1259 for (i = 0; i < PCI_SLOT_MAX * 8; i++) { 1260 spapr_dr_connector_new(OBJECT(phb), 1261 SPAPR_DR_CONNECTOR_TYPE_PCI, 1262 (sphb->index << 16) | i); 1263 } 1264 } 1265 1266 if (!info->finish_realize) { 1267 error_setg(errp, "finish_realize not defined"); 1268 return; 1269 } 1270 1271 info->finish_realize(sphb, errp); 1272 1273 sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); 1274 } 1275 1276 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) 1277 { 1278 sPAPRTCETable *tcet; 1279 uint32_t nb_table; 1280 1281 nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT; 1282 tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, 1283 0, SPAPR_TCE_PAGE_SHIFT, nb_table, false); 1284 if (!tcet) { 1285 error_setg(errp, "Unable to create TCE table for %s", 1286 sphb->dtbusname); 1287 return ; 1288 } 1289 1290 /* Register default 32bit DMA window */ 1291 memory_region_add_subregion(&sphb->iommu_root, 0, 1292 spapr_tce_get_iommu(tcet)); 1293 } 1294 1295 static int spapr_phb_children_reset(Object *child, void *opaque) 1296 { 1297 DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE); 1298 1299 if (dev) { 1300 device_reset(dev); 1301 } 1302 1303 return 0; 1304 } 1305 1306 static void spapr_phb_reset(DeviceState *qdev) 1307 { 1308 /* Reset the IOMMU state */ 1309 object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); 1310 } 1311 1312 static Property spapr_phb_properties[] = { 1313 DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1), 1314 DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1), 1315 DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1), 1316 DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), 1317 DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, 1318 SPAPR_PCI_MMIO_WIN_SIZE), 1319 DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), 1320 DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, 1321 SPAPR_PCI_IO_WIN_SIZE), 1322 DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled, 1323 true), 1324 DEFINE_PROP_END_OF_LIST(), 1325 }; 1326 1327 static const VMStateDescription vmstate_spapr_pci_lsi = { 1328 .name = "spapr_pci/lsi", 1329 .version_id = 1, 1330 .minimum_version_id = 1, 1331 .fields = (VMStateField[]) { 1332 VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi), 1333 1334 VMSTATE_END_OF_LIST() 1335 }, 1336 }; 1337 1338 static const VMStateDescription vmstate_spapr_pci_msi = { 1339 .name = "spapr_pci/msi", 1340 .version_id = 1, 1341 .minimum_version_id = 1, 1342 .fields = (VMStateField []) { 1343 VMSTATE_UINT32(key, spapr_pci_msi_mig), 1344 VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig), 1345 VMSTATE_UINT32(value.num, spapr_pci_msi_mig), 1346 VMSTATE_END_OF_LIST() 1347 }, 1348 }; 1349 1350 static void spapr_pci_fill_msi_devs(gpointer key, gpointer value, 1351 gpointer opaque) 1352 { 1353 sPAPRPHBState *sphb = opaque; 1354 1355 sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key; 1356 sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value; 1357 sphb->msi_devs_num++; 1358 } 1359 1360 static void spapr_pci_pre_save(void *opaque) 1361 { 1362 sPAPRPHBState *sphb = opaque; 1363 int msi_devs_num; 1364 1365 if (sphb->msi_devs) { 1366 g_free(sphb->msi_devs); 1367 sphb->msi_devs = NULL; 1368 } 1369 sphb->msi_devs_num = 0; 1370 msi_devs_num = g_hash_table_size(sphb->msi); 1371 if (!msi_devs_num) { 1372 return; 1373 } 1374 sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig)); 1375 1376 g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb); 1377 assert(sphb->msi_devs_num == msi_devs_num); 1378 } 1379 1380 static int spapr_pci_post_load(void *opaque, int version_id) 1381 { 1382 sPAPRPHBState *sphb = opaque; 1383 gpointer key, value; 1384 int i; 1385 1386 for (i = 0; i < sphb->msi_devs_num; ++i) { 1387 key = g_memdup(&sphb->msi_devs[i].key, 1388 sizeof(sphb->msi_devs[i].key)); 1389 value = g_memdup(&sphb->msi_devs[i].value, 1390 sizeof(sphb->msi_devs[i].value)); 1391 g_hash_table_insert(sphb->msi, key, value); 1392 } 1393 if (sphb->msi_devs) { 1394 g_free(sphb->msi_devs); 1395 sphb->msi_devs = NULL; 1396 } 1397 sphb->msi_devs_num = 0; 1398 1399 return 0; 1400 } 1401 1402 static const VMStateDescription vmstate_spapr_pci = { 1403 .name = "spapr_pci", 1404 .version_id = 2, 1405 .minimum_version_id = 2, 1406 .pre_save = spapr_pci_pre_save, 1407 .post_load = spapr_pci_post_load, 1408 .fields = (VMStateField[]) { 1409 VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState), 1410 VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState), 1411 VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState), 1412 VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState), 1413 VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState), 1414 VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState), 1415 VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, 1416 vmstate_spapr_pci_lsi, struct spapr_pci_lsi), 1417 VMSTATE_INT32(msi_devs_num, sPAPRPHBState), 1418 VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0, 1419 vmstate_spapr_pci_msi, spapr_pci_msi_mig), 1420 VMSTATE_END_OF_LIST() 1421 }, 1422 }; 1423 1424 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge, 1425 PCIBus *rootbus) 1426 { 1427 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge); 1428 1429 return sphb->dtbusname; 1430 } 1431 1432 static void spapr_phb_class_init(ObjectClass *klass, void *data) 1433 { 1434 PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); 1435 DeviceClass *dc = DEVICE_CLASS(klass); 1436 sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); 1437 HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass); 1438 1439 hc->root_bus_path = spapr_phb_root_bus_path; 1440 dc->realize = spapr_phb_realize; 1441 dc->props = spapr_phb_properties; 1442 dc->reset = spapr_phb_reset; 1443 dc->vmsd = &vmstate_spapr_pci; 1444 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); 1445 dc->cannot_instantiate_with_device_add_yet = false; 1446 spc->finish_realize = spapr_phb_finish_realize; 1447 hp->plug = spapr_phb_hot_plug_child; 1448 hp->unplug = spapr_phb_hot_unplug_child; 1449 } 1450 1451 static const TypeInfo spapr_phb_info = { 1452 .name = TYPE_SPAPR_PCI_HOST_BRIDGE, 1453 .parent = TYPE_PCI_HOST_BRIDGE, 1454 .instance_size = sizeof(sPAPRPHBState), 1455 .class_init = spapr_phb_class_init, 1456 .class_size = sizeof(sPAPRPHBClass), 1457 .interfaces = (InterfaceInfo[]) { 1458 { TYPE_HOTPLUG_HANDLER }, 1459 { } 1460 } 1461 }; 1462 1463 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) 1464 { 1465 DeviceState *dev; 1466 1467 dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE); 1468 qdev_prop_set_uint32(dev, "index", index); 1469 qdev_init_nofail(dev); 1470 1471 return PCI_HOST_BRIDGE(dev); 1472 } 1473 1474 int spapr_populate_pci_dt(sPAPRPHBState *phb, 1475 uint32_t xics_phandle, 1476 void *fdt) 1477 { 1478 int bus_off, i, j, ret; 1479 char nodename[256]; 1480 uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; 1481 const uint64_t mmiosize = memory_region_size(&phb->memwindow); 1482 const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; 1483 const uint64_t w32size = MIN(w32max, mmiosize); 1484 const uint64_t w64size = (mmiosize > w32size) ? (mmiosize - w32size) : 0; 1485 struct { 1486 uint32_t hi; 1487 uint64_t child; 1488 uint64_t parent; 1489 uint64_t size; 1490 } QEMU_PACKED ranges[] = { 1491 { 1492 cpu_to_be32(b_ss(1)), cpu_to_be64(0), 1493 cpu_to_be64(phb->io_win_addr), 1494 cpu_to_be64(memory_region_size(&phb->iospace)), 1495 }, 1496 { 1497 cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET), 1498 cpu_to_be64(phb->mem_win_addr), 1499 cpu_to_be64(w32size), 1500 }, 1501 { 1502 cpu_to_be32(b_ss(3)), cpu_to_be64(1ULL << 32), 1503 cpu_to_be64(phb->mem_win_addr + w32size), 1504 cpu_to_be64(w64size) 1505 }, 1506 }; 1507 const unsigned sizeof_ranges = (w64size ? 3 : 2) * sizeof(ranges[0]); 1508 uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 }; 1509 uint32_t interrupt_map_mask[] = { 1510 cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; 1511 uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; 1512 sPAPRTCETable *tcet; 1513 1514 /* Start populating the FDT */ 1515 sprintf(nodename, "pci@%" PRIx64, phb->buid); 1516 bus_off = fdt_add_subnode(fdt, 0, nodename); 1517 if (bus_off < 0) { 1518 return bus_off; 1519 } 1520 1521 /* Write PHB properties */ 1522 _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); 1523 _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); 1524 _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3)); 1525 _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2)); 1526 _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1)); 1527 _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0)); 1528 _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range))); 1529 _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); 1530 _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); 1531 _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); 1532 _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS)); 1533 1534 /* Build the interrupt-map, this must matches what is done 1535 * in pci_spapr_map_irq 1536 */ 1537 _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask", 1538 &interrupt_map_mask, sizeof(interrupt_map_mask))); 1539 for (i = 0; i < PCI_SLOT_MAX; i++) { 1540 for (j = 0; j < PCI_NUM_PINS; j++) { 1541 uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j]; 1542 int lsi_num = pci_spapr_swizzle(i, j); 1543 1544 irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0)); 1545 irqmap[1] = 0; 1546 irqmap[2] = 0; 1547 irqmap[3] = cpu_to_be32(j+1); 1548 irqmap[4] = cpu_to_be32(xics_phandle); 1549 irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq); 1550 irqmap[6] = cpu_to_be32(0x8); 1551 } 1552 } 1553 /* Write interrupt map */ 1554 _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, 1555 sizeof(interrupt_map))); 1556 1557 tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0)); 1558 spapr_dma_dt(fdt, bus_off, "ibm,dma-window", 1559 tcet->liobn, tcet->bus_offset, 1560 tcet->nb_table << tcet->page_shift); 1561 1562 ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), 1563 SPAPR_DR_CONNECTOR_TYPE_PCI); 1564 if (ret) { 1565 return ret; 1566 } 1567 1568 return 0; 1569 } 1570 1571 void spapr_pci_rtas_init(void) 1572 { 1573 spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config", 1574 rtas_read_pci_config); 1575 spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config", 1576 rtas_write_pci_config); 1577 spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config", 1578 rtas_ibm_read_pci_config); 1579 spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config", 1580 rtas_ibm_write_pci_config); 1581 if (msi_supported) { 1582 spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER, 1583 "ibm,query-interrupt-source-number", 1584 rtas_ibm_query_interrupt_source_number); 1585 spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi", 1586 rtas_ibm_change_msi); 1587 } 1588 1589 spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION, 1590 "ibm,set-eeh-option", 1591 rtas_ibm_set_eeh_option); 1592 spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2, 1593 "ibm,get-config-addr-info2", 1594 rtas_ibm_get_config_addr_info2); 1595 spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2, 1596 "ibm,read-slot-reset-state2", 1597 rtas_ibm_read_slot_reset_state2); 1598 spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET, 1599 "ibm,set-slot-reset", 1600 rtas_ibm_set_slot_reset); 1601 spapr_rtas_register(RTAS_IBM_CONFIGURE_PE, 1602 "ibm,configure-pe", 1603 rtas_ibm_configure_pe); 1604 spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL, 1605 "ibm,slot-error-detail", 1606 rtas_ibm_slot_error_detail); 1607 } 1608 1609 static void spapr_pci_register_types(void) 1610 { 1611 type_register_static(&spapr_phb_info); 1612 } 1613 1614 type_init(spapr_pci_register_types) 1615 1616 static int spapr_switch_one_vga(DeviceState *dev, void *opaque) 1617 { 1618 bool be = *(bool *)opaque; 1619 1620 if (object_dynamic_cast(OBJECT(dev), "VGA") 1621 || object_dynamic_cast(OBJECT(dev), "secondary-vga")) { 1622 object_property_set_bool(OBJECT(dev), be, "big-endian-framebuffer", 1623 &error_abort); 1624 } 1625 return 0; 1626 } 1627 1628 void spapr_pci_switch_vga(bool big_endian) 1629 { 1630 sPAPRPHBState *sphb; 1631 1632 /* 1633 * For backward compatibility with existing guests, we switch 1634 * the endianness of the VGA controller when changing the guest 1635 * interrupt mode 1636 */ 1637 QLIST_FOREACH(sphb, &spapr->phbs, list) { 1638 BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus; 1639 qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL, 1640 &big_endian); 1641 } 1642 } 1643