1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qemu/error-report.h" 26 #include "cpu.h" 27 #include "cpu-models.h" 28 #include "qemu/timer.h" 29 #include "sysemu/sysemu.h" 30 #include "sysemu/hw_accel.h" 31 #include "kvm_ppc.h" 32 #include "sysemu/cpus.h" 33 #include "sysemu/device_tree.h" 34 #include "mmu-hash64.h" 35 36 #include "hw/sysbus.h" 37 #include "hw/ppc/spapr.h" 38 #include "hw/ppc/spapr_vio.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "sysemu/watchdog.h" 42 #include "trace.h" 43 #include "exec/gdbstub.h" 44 #include "exec/memattrs.h" 45 #include "exec/ram_addr.h" 46 #include "sysemu/hostmem.h" 47 #include "qemu/cutils.h" 48 #include "qemu/mmap-alloc.h" 49 #if defined(TARGET_PPC64) 50 #include "hw/ppc/spapr_cpu_core.h" 51 #endif 52 #include "elf.h" 53 #include "sysemu/kvm_int.h" 54 55 //#define DEBUG_KVM 56 57 #ifdef DEBUG_KVM 58 #define DPRINTF(fmt, ...) \ 59 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 60 #else 61 #define DPRINTF(fmt, ...) \ 62 do { } while (0) 63 #endif 64 65 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 66 67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 68 KVM_CAP_LAST_INFO 69 }; 70 71 static int cap_interrupt_unset = false; 72 static int cap_interrupt_level = false; 73 static int cap_segstate; 74 static int cap_booke_sregs; 75 static int cap_ppc_smt; 76 static int cap_ppc_rma; 77 static int cap_spapr_tce; 78 static int cap_spapr_tce_64; 79 static int cap_spapr_multitce; 80 static int cap_spapr_vfio; 81 static int cap_hior; 82 static int cap_one_reg; 83 static int cap_epr; 84 static int cap_ppc_watchdog; 85 static int cap_papr; 86 static int cap_htab_fd; 87 static int cap_fixup_hcalls; 88 static int cap_htm; /* Hardware transactional memory support */ 89 90 static uint32_t debug_inst_opcode; 91 92 /* XXX We have a race condition where we actually have a level triggered 93 * interrupt, but the infrastructure can't expose that yet, so the guest 94 * takes but ignores it, goes to sleep and never gets notified that there's 95 * still an interrupt pending. 96 * 97 * As a quick workaround, let's just wake up again 20 ms after we injected 98 * an interrupt. That way we can assure that we're always reinjecting 99 * interrupts in case the guest swallowed them. 100 */ 101 static QEMUTimer *idle_timer; 102 103 static void kvm_kick_cpu(void *opaque) 104 { 105 PowerPCCPU *cpu = opaque; 106 107 qemu_cpu_kick(CPU(cpu)); 108 } 109 110 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 111 * should only be used for fallback tests - generally we should use 112 * explicit capabilities for the features we want, rather than 113 * assuming what is/isn't available depending on the KVM variant. */ 114 static bool kvmppc_is_pr(KVMState *ks) 115 { 116 /* Assume KVM-PR if the GET_PVINFO capability is available */ 117 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 118 } 119 120 static int kvm_ppc_register_host_cpu_type(void); 121 122 int kvm_arch_init(MachineState *ms, KVMState *s) 123 { 124 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 125 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 126 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 127 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 128 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); 129 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 130 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 131 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 132 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 133 cap_spapr_vfio = false; 134 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 135 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 136 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 137 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 138 /* Note: we don't set cap_papr here, because this capability is 139 * only activated after this by kvmppc_set_papr() */ 140 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 141 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 142 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 143 144 if (!cap_interrupt_level) { 145 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 146 "VM to stall at times!\n"); 147 } 148 149 kvm_ppc_register_host_cpu_type(); 150 151 return 0; 152 } 153 154 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 155 { 156 return 0; 157 } 158 159 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 160 { 161 CPUPPCState *cenv = &cpu->env; 162 CPUState *cs = CPU(cpu); 163 struct kvm_sregs sregs; 164 int ret; 165 166 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 167 /* What we're really trying to say is "if we're on BookE, we use 168 the native PVR for now". This is the only sane way to check 169 it though, so we potentially confuse users that they can run 170 BookE guests on BookS. Let's hope nobody dares enough :) */ 171 return 0; 172 } else { 173 if (!cap_segstate) { 174 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 175 return -ENOSYS; 176 } 177 } 178 179 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 180 if (ret) { 181 return ret; 182 } 183 184 sregs.pvr = cenv->spr[SPR_PVR]; 185 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 186 } 187 188 /* Set up a shared TLB array with KVM */ 189 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 190 { 191 CPUPPCState *env = &cpu->env; 192 CPUState *cs = CPU(cpu); 193 struct kvm_book3e_206_tlb_params params = {}; 194 struct kvm_config_tlb cfg = {}; 195 unsigned int entries = 0; 196 int ret, i; 197 198 if (!kvm_enabled() || 199 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 200 return 0; 201 } 202 203 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 204 205 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 206 params.tlb_sizes[i] = booke206_tlb_size(env, i); 207 params.tlb_ways[i] = booke206_tlb_ways(env, i); 208 entries += params.tlb_sizes[i]; 209 } 210 211 assert(entries == env->nb_tlb); 212 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 213 214 env->tlb_dirty = true; 215 216 cfg.array = (uintptr_t)env->tlb.tlbm; 217 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 218 cfg.params = (uintptr_t)¶ms; 219 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 220 221 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 222 if (ret < 0) { 223 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 224 __func__, strerror(-ret)); 225 return ret; 226 } 227 228 env->kvm_sw_tlb = true; 229 return 0; 230 } 231 232 233 #if defined(TARGET_PPC64) 234 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 235 struct kvm_ppc_smmu_info *info) 236 { 237 CPUPPCState *env = &cpu->env; 238 CPUState *cs = CPU(cpu); 239 240 memset(info, 0, sizeof(*info)); 241 242 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 243 * need to "guess" what the supported page sizes are. 244 * 245 * For that to work we make a few assumptions: 246 * 247 * - Check whether we are running "PR" KVM which only supports 4K 248 * and 16M pages, but supports them regardless of the backing 249 * store characteritics. We also don't support 1T segments. 250 * 251 * This is safe as if HV KVM ever supports that capability or PR 252 * KVM grows supports for more page/segment sizes, those versions 253 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 254 * will not hit this fallback 255 * 256 * - Else we are running HV KVM. This means we only support page 257 * sizes that fit in the backing store. Additionally we only 258 * advertize 64K pages if the processor is ARCH 2.06 and we assume 259 * P7 encodings for the SLB and hash table. Here too, we assume 260 * support for any newer processor will mean a kernel that 261 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 262 * this fallback. 263 */ 264 if (kvmppc_is_pr(cs->kvm_state)) { 265 /* No flags */ 266 info->flags = 0; 267 info->slb_size = 64; 268 269 /* Standard 4k base page size segment */ 270 info->sps[0].page_shift = 12; 271 info->sps[0].slb_enc = 0; 272 info->sps[0].enc[0].page_shift = 12; 273 info->sps[0].enc[0].pte_enc = 0; 274 275 /* Standard 16M large page size segment */ 276 info->sps[1].page_shift = 24; 277 info->sps[1].slb_enc = SLB_VSID_L; 278 info->sps[1].enc[0].page_shift = 24; 279 info->sps[1].enc[0].pte_enc = 0; 280 } else { 281 int i = 0; 282 283 /* HV KVM has backing store size restrictions */ 284 info->flags = KVM_PPC_PAGE_SIZES_REAL; 285 286 if (env->mmu_model & POWERPC_MMU_1TSEG) { 287 info->flags |= KVM_PPC_1T_SEGMENTS; 288 } 289 290 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 291 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 292 info->slb_size = 32; 293 } else { 294 info->slb_size = 64; 295 } 296 297 /* Standard 4k base page size segment */ 298 info->sps[i].page_shift = 12; 299 info->sps[i].slb_enc = 0; 300 info->sps[i].enc[0].page_shift = 12; 301 info->sps[i].enc[0].pte_enc = 0; 302 i++; 303 304 /* 64K on MMU 2.06 and later */ 305 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 306 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 307 info->sps[i].page_shift = 16; 308 info->sps[i].slb_enc = 0x110; 309 info->sps[i].enc[0].page_shift = 16; 310 info->sps[i].enc[0].pte_enc = 1; 311 i++; 312 } 313 314 /* Standard 16M large page size segment */ 315 info->sps[i].page_shift = 24; 316 info->sps[i].slb_enc = SLB_VSID_L; 317 info->sps[i].enc[0].page_shift = 24; 318 info->sps[i].enc[0].pte_enc = 0; 319 } 320 } 321 322 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 323 { 324 CPUState *cs = CPU(cpu); 325 int ret; 326 327 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 328 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 329 if (ret == 0) { 330 return; 331 } 332 } 333 334 kvm_get_fallback_smmu_info(cpu, info); 335 } 336 337 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 338 { 339 KVMState *s = KVM_STATE(current_machine->accelerator); 340 struct ppc_radix_page_info *radix_page_info; 341 struct kvm_ppc_rmmu_info rmmu_info; 342 int i; 343 344 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 345 return NULL; 346 } 347 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 348 return NULL; 349 } 350 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 351 radix_page_info->count = 0; 352 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 353 if (rmmu_info.ap_encodings[i]) { 354 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 355 radix_page_info->count++; 356 } 357 } 358 return radix_page_info; 359 } 360 361 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 362 { 363 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 364 return true; 365 } 366 367 return (1ul << shift) <= rampgsize; 368 } 369 370 static long max_cpu_page_size; 371 372 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 373 { 374 static struct kvm_ppc_smmu_info smmu_info; 375 static bool has_smmu_info; 376 CPUPPCState *env = &cpu->env; 377 int iq, ik, jq, jk; 378 bool has_64k_pages = false; 379 380 /* We only handle page sizes for 64-bit server guests for now */ 381 if (!(env->mmu_model & POWERPC_MMU_64)) { 382 return; 383 } 384 385 /* Collect MMU info from kernel if not already */ 386 if (!has_smmu_info) { 387 kvm_get_smmu_info(cpu, &smmu_info); 388 has_smmu_info = true; 389 } 390 391 if (!max_cpu_page_size) { 392 max_cpu_page_size = qemu_getrampagesize(); 393 } 394 395 /* Convert to QEMU form */ 396 memset(&env->sps, 0, sizeof(env->sps)); 397 398 /* If we have HV KVM, we need to forbid CI large pages if our 399 * host page size is smaller than 64K. 400 */ 401 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 402 env->ci_large_pages = getpagesize() >= 0x10000; 403 } 404 405 /* 406 * XXX This loop should be an entry wide AND of the capabilities that 407 * the selected CPU has with the capabilities that KVM supports. 408 */ 409 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 410 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 411 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 412 413 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 414 ksps->page_shift)) { 415 continue; 416 } 417 qsps->page_shift = ksps->page_shift; 418 qsps->slb_enc = ksps->slb_enc; 419 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 420 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 421 ksps->enc[jk].page_shift)) { 422 continue; 423 } 424 if (ksps->enc[jk].page_shift == 16) { 425 has_64k_pages = true; 426 } 427 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 428 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 429 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 430 break; 431 } 432 } 433 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 434 break; 435 } 436 } 437 env->slb_nr = smmu_info.slb_size; 438 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 439 env->mmu_model &= ~POWERPC_MMU_1TSEG; 440 } 441 if (!has_64k_pages) { 442 env->mmu_model &= ~POWERPC_MMU_64K; 443 } 444 } 445 446 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 447 { 448 Object *mem_obj = object_resolve_path(obj_path, NULL); 449 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 450 long pagesize; 451 452 if (mempath) { 453 pagesize = qemu_mempath_getpagesize(mempath); 454 } else { 455 pagesize = getpagesize(); 456 } 457 458 return pagesize >= max_cpu_page_size; 459 } 460 461 #else /* defined (TARGET_PPC64) */ 462 463 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 464 { 465 } 466 467 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path) 468 { 469 return true; 470 } 471 472 #endif /* !defined (TARGET_PPC64) */ 473 474 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 475 { 476 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu)); 477 } 478 479 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 480 * book3s supports only 1 watchpoint, so array size 481 * of 4 is sufficient for now. 482 */ 483 #define MAX_HW_BKPTS 4 484 485 static struct HWBreakpoint { 486 target_ulong addr; 487 int type; 488 } hw_debug_points[MAX_HW_BKPTS]; 489 490 static CPUWatchpoint hw_watchpoint; 491 492 /* Default there is no breakpoint and watchpoint supported */ 493 static int max_hw_breakpoint; 494 static int max_hw_watchpoint; 495 static int nb_hw_breakpoint; 496 static int nb_hw_watchpoint; 497 498 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 499 { 500 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 501 max_hw_breakpoint = 2; 502 max_hw_watchpoint = 2; 503 } 504 505 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 506 fprintf(stderr, "Error initializing h/w breakpoints\n"); 507 return; 508 } 509 } 510 511 int kvm_arch_init_vcpu(CPUState *cs) 512 { 513 PowerPCCPU *cpu = POWERPC_CPU(cs); 514 CPUPPCState *cenv = &cpu->env; 515 int ret; 516 517 /* Gather server mmu info from KVM and update the CPU state */ 518 kvm_fixup_page_sizes(cpu); 519 520 /* Synchronize sregs with kvm */ 521 ret = kvm_arch_sync_sregs(cpu); 522 if (ret) { 523 if (ret == -EINVAL) { 524 error_report("Register sync failed... If you're using kvm-hv.ko," 525 " only \"-cpu host\" is possible"); 526 } 527 return ret; 528 } 529 530 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 531 532 switch (cenv->mmu_model) { 533 case POWERPC_MMU_BOOKE206: 534 /* This target supports access to KVM's guest TLB */ 535 ret = kvm_booke206_tlb_init(cpu); 536 break; 537 case POWERPC_MMU_2_07: 538 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 539 /* KVM-HV has transactional memory on POWER8 also without the 540 * KVM_CAP_PPC_HTM extension, so enable it here instead as 541 * long as it's availble to userspace on the host. */ 542 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 543 cap_htm = true; 544 } 545 } 546 break; 547 default: 548 break; 549 } 550 551 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 552 kvmppc_hw_debug_points_init(cenv); 553 554 return ret; 555 } 556 557 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 558 { 559 CPUPPCState *env = &cpu->env; 560 CPUState *cs = CPU(cpu); 561 struct kvm_dirty_tlb dirty_tlb; 562 unsigned char *bitmap; 563 int ret; 564 565 if (!env->kvm_sw_tlb) { 566 return; 567 } 568 569 bitmap = g_malloc((env->nb_tlb + 7) / 8); 570 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 571 572 dirty_tlb.bitmap = (uintptr_t)bitmap; 573 dirty_tlb.num_dirty = env->nb_tlb; 574 575 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 576 if (ret) { 577 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 578 __func__, strerror(-ret)); 579 } 580 581 g_free(bitmap); 582 } 583 584 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 585 { 586 PowerPCCPU *cpu = POWERPC_CPU(cs); 587 CPUPPCState *env = &cpu->env; 588 union { 589 uint32_t u32; 590 uint64_t u64; 591 } val; 592 struct kvm_one_reg reg = { 593 .id = id, 594 .addr = (uintptr_t) &val, 595 }; 596 int ret; 597 598 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 599 if (ret != 0) { 600 trace_kvm_failed_spr_get(spr, strerror(errno)); 601 } else { 602 switch (id & KVM_REG_SIZE_MASK) { 603 case KVM_REG_SIZE_U32: 604 env->spr[spr] = val.u32; 605 break; 606 607 case KVM_REG_SIZE_U64: 608 env->spr[spr] = val.u64; 609 break; 610 611 default: 612 /* Don't handle this size yet */ 613 abort(); 614 } 615 } 616 } 617 618 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 619 { 620 PowerPCCPU *cpu = POWERPC_CPU(cs); 621 CPUPPCState *env = &cpu->env; 622 union { 623 uint32_t u32; 624 uint64_t u64; 625 } val; 626 struct kvm_one_reg reg = { 627 .id = id, 628 .addr = (uintptr_t) &val, 629 }; 630 int ret; 631 632 switch (id & KVM_REG_SIZE_MASK) { 633 case KVM_REG_SIZE_U32: 634 val.u32 = env->spr[spr]; 635 break; 636 637 case KVM_REG_SIZE_U64: 638 val.u64 = env->spr[spr]; 639 break; 640 641 default: 642 /* Don't handle this size yet */ 643 abort(); 644 } 645 646 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 647 if (ret != 0) { 648 trace_kvm_failed_spr_set(spr, strerror(errno)); 649 } 650 } 651 652 static int kvm_put_fp(CPUState *cs) 653 { 654 PowerPCCPU *cpu = POWERPC_CPU(cs); 655 CPUPPCState *env = &cpu->env; 656 struct kvm_one_reg reg; 657 int i; 658 int ret; 659 660 if (env->insns_flags & PPC_FLOAT) { 661 uint64_t fpscr = env->fpscr; 662 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 663 664 reg.id = KVM_REG_PPC_FPSCR; 665 reg.addr = (uintptr_t)&fpscr; 666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 667 if (ret < 0) { 668 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 669 return ret; 670 } 671 672 for (i = 0; i < 32; i++) { 673 uint64_t vsr[2]; 674 675 #ifdef HOST_WORDS_BIGENDIAN 676 vsr[0] = float64_val(env->fpr[i]); 677 vsr[1] = env->vsr[i]; 678 #else 679 vsr[0] = env->vsr[i]; 680 vsr[1] = float64_val(env->fpr[i]); 681 #endif 682 reg.addr = (uintptr_t) &vsr; 683 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 684 685 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 686 if (ret < 0) { 687 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 688 i, strerror(errno)); 689 return ret; 690 } 691 } 692 } 693 694 if (env->insns_flags & PPC_ALTIVEC) { 695 reg.id = KVM_REG_PPC_VSCR; 696 reg.addr = (uintptr_t)&env->vscr; 697 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 698 if (ret < 0) { 699 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 700 return ret; 701 } 702 703 for (i = 0; i < 32; i++) { 704 reg.id = KVM_REG_PPC_VR(i); 705 reg.addr = (uintptr_t)&env->avr[i]; 706 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 707 if (ret < 0) { 708 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 709 return ret; 710 } 711 } 712 } 713 714 return 0; 715 } 716 717 static int kvm_get_fp(CPUState *cs) 718 { 719 PowerPCCPU *cpu = POWERPC_CPU(cs); 720 CPUPPCState *env = &cpu->env; 721 struct kvm_one_reg reg; 722 int i; 723 int ret; 724 725 if (env->insns_flags & PPC_FLOAT) { 726 uint64_t fpscr; 727 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 728 729 reg.id = KVM_REG_PPC_FPSCR; 730 reg.addr = (uintptr_t)&fpscr; 731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 732 if (ret < 0) { 733 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 734 return ret; 735 } else { 736 env->fpscr = fpscr; 737 } 738 739 for (i = 0; i < 32; i++) { 740 uint64_t vsr[2]; 741 742 reg.addr = (uintptr_t) &vsr; 743 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 744 745 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 746 if (ret < 0) { 747 DPRINTF("Unable to get %s%d from KVM: %s\n", 748 vsx ? "VSR" : "FPR", i, strerror(errno)); 749 return ret; 750 } else { 751 #ifdef HOST_WORDS_BIGENDIAN 752 env->fpr[i] = vsr[0]; 753 if (vsx) { 754 env->vsr[i] = vsr[1]; 755 } 756 #else 757 env->fpr[i] = vsr[1]; 758 if (vsx) { 759 env->vsr[i] = vsr[0]; 760 } 761 #endif 762 } 763 } 764 } 765 766 if (env->insns_flags & PPC_ALTIVEC) { 767 reg.id = KVM_REG_PPC_VSCR; 768 reg.addr = (uintptr_t)&env->vscr; 769 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 770 if (ret < 0) { 771 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 772 return ret; 773 } 774 775 for (i = 0; i < 32; i++) { 776 reg.id = KVM_REG_PPC_VR(i); 777 reg.addr = (uintptr_t)&env->avr[i]; 778 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 779 if (ret < 0) { 780 DPRINTF("Unable to get VR%d from KVM: %s\n", 781 i, strerror(errno)); 782 return ret; 783 } 784 } 785 } 786 787 return 0; 788 } 789 790 #if defined(TARGET_PPC64) 791 static int kvm_get_vpa(CPUState *cs) 792 { 793 PowerPCCPU *cpu = POWERPC_CPU(cs); 794 CPUPPCState *env = &cpu->env; 795 struct kvm_one_reg reg; 796 int ret; 797 798 reg.id = KVM_REG_PPC_VPA_ADDR; 799 reg.addr = (uintptr_t)&env->vpa_addr; 800 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 801 if (ret < 0) { 802 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 803 return ret; 804 } 805 806 assert((uintptr_t)&env->slb_shadow_size 807 == ((uintptr_t)&env->slb_shadow_addr + 8)); 808 reg.id = KVM_REG_PPC_VPA_SLB; 809 reg.addr = (uintptr_t)&env->slb_shadow_addr; 810 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 811 if (ret < 0) { 812 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 813 strerror(errno)); 814 return ret; 815 } 816 817 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 818 reg.id = KVM_REG_PPC_VPA_DTL; 819 reg.addr = (uintptr_t)&env->dtl_addr; 820 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 821 if (ret < 0) { 822 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 823 strerror(errno)); 824 return ret; 825 } 826 827 return 0; 828 } 829 830 static int kvm_put_vpa(CPUState *cs) 831 { 832 PowerPCCPU *cpu = POWERPC_CPU(cs); 833 CPUPPCState *env = &cpu->env; 834 struct kvm_one_reg reg; 835 int ret; 836 837 /* SLB shadow or DTL can't be registered unless a master VPA is 838 * registered. That means when restoring state, if a VPA *is* 839 * registered, we need to set that up first. If not, we need to 840 * deregister the others before deregistering the master VPA */ 841 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 842 843 if (env->vpa_addr) { 844 reg.id = KVM_REG_PPC_VPA_ADDR; 845 reg.addr = (uintptr_t)&env->vpa_addr; 846 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 847 if (ret < 0) { 848 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 849 return ret; 850 } 851 } 852 853 assert((uintptr_t)&env->slb_shadow_size 854 == ((uintptr_t)&env->slb_shadow_addr + 8)); 855 reg.id = KVM_REG_PPC_VPA_SLB; 856 reg.addr = (uintptr_t)&env->slb_shadow_addr; 857 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 858 if (ret < 0) { 859 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 860 return ret; 861 } 862 863 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 864 reg.id = KVM_REG_PPC_VPA_DTL; 865 reg.addr = (uintptr_t)&env->dtl_addr; 866 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 867 if (ret < 0) { 868 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 869 strerror(errno)); 870 return ret; 871 } 872 873 if (!env->vpa_addr) { 874 reg.id = KVM_REG_PPC_VPA_ADDR; 875 reg.addr = (uintptr_t)&env->vpa_addr; 876 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 877 if (ret < 0) { 878 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 879 return ret; 880 } 881 } 882 883 return 0; 884 } 885 #endif /* TARGET_PPC64 */ 886 887 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 888 { 889 CPUPPCState *env = &cpu->env; 890 struct kvm_sregs sregs; 891 int i; 892 893 sregs.pvr = env->spr[SPR_PVR]; 894 895 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 896 897 /* Sync SLB */ 898 #ifdef TARGET_PPC64 899 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 900 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 901 if (env->slb[i].esid & SLB_ESID_V) { 902 sregs.u.s.ppc64.slb[i].slbe |= i; 903 } 904 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 905 } 906 #endif 907 908 /* Sync SRs */ 909 for (i = 0; i < 16; i++) { 910 sregs.u.s.ppc32.sr[i] = env->sr[i]; 911 } 912 913 /* Sync BATs */ 914 for (i = 0; i < 8; i++) { 915 /* Beware. We have to swap upper and lower bits here */ 916 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 917 | env->DBAT[1][i]; 918 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 919 | env->IBAT[1][i]; 920 } 921 922 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 923 } 924 925 int kvm_arch_put_registers(CPUState *cs, int level) 926 { 927 PowerPCCPU *cpu = POWERPC_CPU(cs); 928 CPUPPCState *env = &cpu->env; 929 struct kvm_regs regs; 930 int ret; 931 int i; 932 933 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 934 if (ret < 0) { 935 return ret; 936 } 937 938 regs.ctr = env->ctr; 939 regs.lr = env->lr; 940 regs.xer = cpu_read_xer(env); 941 regs.msr = env->msr; 942 regs.pc = env->nip; 943 944 regs.srr0 = env->spr[SPR_SRR0]; 945 regs.srr1 = env->spr[SPR_SRR1]; 946 947 regs.sprg0 = env->spr[SPR_SPRG0]; 948 regs.sprg1 = env->spr[SPR_SPRG1]; 949 regs.sprg2 = env->spr[SPR_SPRG2]; 950 regs.sprg3 = env->spr[SPR_SPRG3]; 951 regs.sprg4 = env->spr[SPR_SPRG4]; 952 regs.sprg5 = env->spr[SPR_SPRG5]; 953 regs.sprg6 = env->spr[SPR_SPRG6]; 954 regs.sprg7 = env->spr[SPR_SPRG7]; 955 956 regs.pid = env->spr[SPR_BOOKE_PID]; 957 958 for (i = 0;i < 32; i++) 959 regs.gpr[i] = env->gpr[i]; 960 961 regs.cr = 0; 962 for (i = 0; i < 8; i++) { 963 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 964 } 965 966 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 967 if (ret < 0) 968 return ret; 969 970 kvm_put_fp(cs); 971 972 if (env->tlb_dirty) { 973 kvm_sw_tlb_put(cpu); 974 env->tlb_dirty = false; 975 } 976 977 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 978 ret = kvmppc_put_books_sregs(cpu); 979 if (ret < 0) { 980 return ret; 981 } 982 } 983 984 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 985 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 986 } 987 988 if (cap_one_reg) { 989 int i; 990 991 /* We deliberately ignore errors here, for kernels which have 992 * the ONE_REG calls, but don't support the specific 993 * registers, there's a reasonable chance things will still 994 * work, at least until we try to migrate. */ 995 for (i = 0; i < 1024; i++) { 996 uint64_t id = env->spr_cb[i].one_reg_id; 997 998 if (id != 0) { 999 kvm_put_one_spr(cs, id, i); 1000 } 1001 } 1002 1003 #ifdef TARGET_PPC64 1004 if (msr_ts) { 1005 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1007 } 1008 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1009 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1010 } 1011 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1012 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1013 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1014 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1015 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1016 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1017 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1018 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1019 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1020 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1021 } 1022 1023 if (cap_papr) { 1024 if (kvm_put_vpa(cs) < 0) { 1025 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1026 } 1027 } 1028 1029 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1030 #endif /* TARGET_PPC64 */ 1031 } 1032 1033 return ret; 1034 } 1035 1036 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1037 { 1038 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1039 } 1040 1041 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1042 { 1043 CPUPPCState *env = &cpu->env; 1044 struct kvm_sregs sregs; 1045 int ret; 1046 1047 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1048 if (ret < 0) { 1049 return ret; 1050 } 1051 1052 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1053 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1054 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1055 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1056 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1057 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1058 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1059 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1060 env->spr[SPR_DECR] = sregs.u.e.dec; 1061 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1062 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1063 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1064 } 1065 1066 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1067 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1068 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1069 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1070 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1071 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1072 } 1073 1074 if (sregs.u.e.features & KVM_SREGS_E_64) { 1075 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1076 } 1077 1078 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1079 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1080 } 1081 1082 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1083 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1084 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1085 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1086 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1087 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1088 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1089 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1090 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1091 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1092 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1093 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1094 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1095 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1096 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1097 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1098 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1099 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1100 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1101 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1102 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1103 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1104 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1105 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1106 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1107 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1108 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1109 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1110 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1111 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1112 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1113 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1114 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1115 1116 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1117 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1118 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1119 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1120 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1121 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1122 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1123 } 1124 1125 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1126 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1127 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1128 } 1129 1130 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1131 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1132 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1133 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1134 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1135 } 1136 } 1137 1138 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1139 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1140 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1141 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1142 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1143 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1144 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1145 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1146 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1147 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1148 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1149 } 1150 1151 if (sregs.u.e.features & KVM_SREGS_EXP) { 1152 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1153 } 1154 1155 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1156 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1157 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1158 } 1159 1160 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1161 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1162 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1163 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1164 1165 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1166 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1167 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1168 } 1169 } 1170 1171 return 0; 1172 } 1173 1174 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1175 { 1176 CPUPPCState *env = &cpu->env; 1177 struct kvm_sregs sregs; 1178 int ret; 1179 int i; 1180 1181 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1182 if (ret < 0) { 1183 return ret; 1184 } 1185 1186 if (!cpu->vhyp) { 1187 ppc_store_sdr1(env, sregs.u.s.sdr1); 1188 } 1189 1190 /* Sync SLB */ 1191 #ifdef TARGET_PPC64 1192 /* 1193 * The packed SLB array we get from KVM_GET_SREGS only contains 1194 * information about valid entries. So we flush our internal copy 1195 * to get rid of stale ones, then put all valid SLB entries back 1196 * in. 1197 */ 1198 memset(env->slb, 0, sizeof(env->slb)); 1199 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1200 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1201 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1202 /* 1203 * Only restore valid entries 1204 */ 1205 if (rb & SLB_ESID_V) { 1206 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1207 } 1208 } 1209 #endif 1210 1211 /* Sync SRs */ 1212 for (i = 0; i < 16; i++) { 1213 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1214 } 1215 1216 /* Sync BATs */ 1217 for (i = 0; i < 8; i++) { 1218 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1219 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1220 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1221 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1222 } 1223 1224 return 0; 1225 } 1226 1227 int kvm_arch_get_registers(CPUState *cs) 1228 { 1229 PowerPCCPU *cpu = POWERPC_CPU(cs); 1230 CPUPPCState *env = &cpu->env; 1231 struct kvm_regs regs; 1232 uint32_t cr; 1233 int i, ret; 1234 1235 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1236 if (ret < 0) 1237 return ret; 1238 1239 cr = regs.cr; 1240 for (i = 7; i >= 0; i--) { 1241 env->crf[i] = cr & 15; 1242 cr >>= 4; 1243 } 1244 1245 env->ctr = regs.ctr; 1246 env->lr = regs.lr; 1247 cpu_write_xer(env, regs.xer); 1248 env->msr = regs.msr; 1249 env->nip = regs.pc; 1250 1251 env->spr[SPR_SRR0] = regs.srr0; 1252 env->spr[SPR_SRR1] = regs.srr1; 1253 1254 env->spr[SPR_SPRG0] = regs.sprg0; 1255 env->spr[SPR_SPRG1] = regs.sprg1; 1256 env->spr[SPR_SPRG2] = regs.sprg2; 1257 env->spr[SPR_SPRG3] = regs.sprg3; 1258 env->spr[SPR_SPRG4] = regs.sprg4; 1259 env->spr[SPR_SPRG5] = regs.sprg5; 1260 env->spr[SPR_SPRG6] = regs.sprg6; 1261 env->spr[SPR_SPRG7] = regs.sprg7; 1262 1263 env->spr[SPR_BOOKE_PID] = regs.pid; 1264 1265 for (i = 0;i < 32; i++) 1266 env->gpr[i] = regs.gpr[i]; 1267 1268 kvm_get_fp(cs); 1269 1270 if (cap_booke_sregs) { 1271 ret = kvmppc_get_booke_sregs(cpu); 1272 if (ret < 0) { 1273 return ret; 1274 } 1275 } 1276 1277 if (cap_segstate) { 1278 ret = kvmppc_get_books_sregs(cpu); 1279 if (ret < 0) { 1280 return ret; 1281 } 1282 } 1283 1284 if (cap_hior) { 1285 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1286 } 1287 1288 if (cap_one_reg) { 1289 int i; 1290 1291 /* We deliberately ignore errors here, for kernels which have 1292 * the ONE_REG calls, but don't support the specific 1293 * registers, there's a reasonable chance things will still 1294 * work, at least until we try to migrate. */ 1295 for (i = 0; i < 1024; i++) { 1296 uint64_t id = env->spr_cb[i].one_reg_id; 1297 1298 if (id != 0) { 1299 kvm_get_one_spr(cs, id, i); 1300 } 1301 } 1302 1303 #ifdef TARGET_PPC64 1304 if (msr_ts) { 1305 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1307 } 1308 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1310 } 1311 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1312 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1313 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1314 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1315 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1316 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1317 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1318 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1319 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1320 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1321 } 1322 1323 if (cap_papr) { 1324 if (kvm_get_vpa(cs) < 0) { 1325 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1326 } 1327 } 1328 1329 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1330 #endif 1331 } 1332 1333 return 0; 1334 } 1335 1336 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1337 { 1338 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1339 1340 if (irq != PPC_INTERRUPT_EXT) { 1341 return 0; 1342 } 1343 1344 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1345 return 0; 1346 } 1347 1348 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1349 1350 return 0; 1351 } 1352 1353 #if defined(TARGET_PPCEMB) 1354 #define PPC_INPUT_INT PPC40x_INPUT_INT 1355 #elif defined(TARGET_PPC64) 1356 #define PPC_INPUT_INT PPC970_INPUT_INT 1357 #else 1358 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1359 #endif 1360 1361 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1362 { 1363 PowerPCCPU *cpu = POWERPC_CPU(cs); 1364 CPUPPCState *env = &cpu->env; 1365 int r; 1366 unsigned irq; 1367 1368 qemu_mutex_lock_iothread(); 1369 1370 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1371 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1372 if (!cap_interrupt_level && 1373 run->ready_for_interrupt_injection && 1374 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1375 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1376 { 1377 /* For now KVM disregards the 'irq' argument. However, in the 1378 * future KVM could cache it in-kernel to avoid a heavyweight exit 1379 * when reading the UIC. 1380 */ 1381 irq = KVM_INTERRUPT_SET; 1382 1383 DPRINTF("injected interrupt %d\n", irq); 1384 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1385 if (r < 0) { 1386 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1387 } 1388 1389 /* Always wake up soon in case the interrupt was level based */ 1390 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1391 (NANOSECONDS_PER_SECOND / 50)); 1392 } 1393 1394 /* We don't know if there are more interrupts pending after this. However, 1395 * the guest will return to userspace in the course of handling this one 1396 * anyways, so we will get a chance to deliver the rest. */ 1397 1398 qemu_mutex_unlock_iothread(); 1399 } 1400 1401 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1402 { 1403 return MEMTXATTRS_UNSPECIFIED; 1404 } 1405 1406 int kvm_arch_process_async_events(CPUState *cs) 1407 { 1408 return cs->halted; 1409 } 1410 1411 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1412 { 1413 CPUState *cs = CPU(cpu); 1414 CPUPPCState *env = &cpu->env; 1415 1416 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1417 cs->halted = 1; 1418 cs->exception_index = EXCP_HLT; 1419 } 1420 1421 return 0; 1422 } 1423 1424 /* map dcr access to existing qemu dcr emulation */ 1425 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1426 { 1427 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1428 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1429 1430 return 0; 1431 } 1432 1433 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1434 { 1435 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1436 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1437 1438 return 0; 1439 } 1440 1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1442 { 1443 /* Mixed endian case is not handled */ 1444 uint32_t sc = debug_inst_opcode; 1445 1446 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1447 sizeof(sc), 0) || 1448 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1449 return -EINVAL; 1450 } 1451 1452 return 0; 1453 } 1454 1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1456 { 1457 uint32_t sc; 1458 1459 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1460 sc != debug_inst_opcode || 1461 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1462 sizeof(sc), 1)) { 1463 return -EINVAL; 1464 } 1465 1466 return 0; 1467 } 1468 1469 static int find_hw_breakpoint(target_ulong addr, int type) 1470 { 1471 int n; 1472 1473 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1474 <= ARRAY_SIZE(hw_debug_points)); 1475 1476 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1477 if (hw_debug_points[n].addr == addr && 1478 hw_debug_points[n].type == type) { 1479 return n; 1480 } 1481 } 1482 1483 return -1; 1484 } 1485 1486 static int find_hw_watchpoint(target_ulong addr, int *flag) 1487 { 1488 int n; 1489 1490 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1491 if (n >= 0) { 1492 *flag = BP_MEM_ACCESS; 1493 return n; 1494 } 1495 1496 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1497 if (n >= 0) { 1498 *flag = BP_MEM_WRITE; 1499 return n; 1500 } 1501 1502 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1503 if (n >= 0) { 1504 *flag = BP_MEM_READ; 1505 return n; 1506 } 1507 1508 return -1; 1509 } 1510 1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1512 target_ulong len, int type) 1513 { 1514 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1515 return -ENOBUFS; 1516 } 1517 1518 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1519 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1520 1521 switch (type) { 1522 case GDB_BREAKPOINT_HW: 1523 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1524 return -ENOBUFS; 1525 } 1526 1527 if (find_hw_breakpoint(addr, type) >= 0) { 1528 return -EEXIST; 1529 } 1530 1531 nb_hw_breakpoint++; 1532 break; 1533 1534 case GDB_WATCHPOINT_WRITE: 1535 case GDB_WATCHPOINT_READ: 1536 case GDB_WATCHPOINT_ACCESS: 1537 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1538 return -ENOBUFS; 1539 } 1540 1541 if (find_hw_breakpoint(addr, type) >= 0) { 1542 return -EEXIST; 1543 } 1544 1545 nb_hw_watchpoint++; 1546 break; 1547 1548 default: 1549 return -ENOSYS; 1550 } 1551 1552 return 0; 1553 } 1554 1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1556 target_ulong len, int type) 1557 { 1558 int n; 1559 1560 n = find_hw_breakpoint(addr, type); 1561 if (n < 0) { 1562 return -ENOENT; 1563 } 1564 1565 switch (type) { 1566 case GDB_BREAKPOINT_HW: 1567 nb_hw_breakpoint--; 1568 break; 1569 1570 case GDB_WATCHPOINT_WRITE: 1571 case GDB_WATCHPOINT_READ: 1572 case GDB_WATCHPOINT_ACCESS: 1573 nb_hw_watchpoint--; 1574 break; 1575 1576 default: 1577 return -ENOSYS; 1578 } 1579 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1580 1581 return 0; 1582 } 1583 1584 void kvm_arch_remove_all_hw_breakpoints(void) 1585 { 1586 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1587 } 1588 1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1590 { 1591 int n; 1592 1593 /* Software Breakpoint updates */ 1594 if (kvm_sw_breakpoints_active(cs)) { 1595 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1596 } 1597 1598 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1599 <= ARRAY_SIZE(hw_debug_points)); 1600 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1601 1602 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1603 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1604 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1605 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1606 switch (hw_debug_points[n].type) { 1607 case GDB_BREAKPOINT_HW: 1608 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1609 break; 1610 case GDB_WATCHPOINT_WRITE: 1611 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1612 break; 1613 case GDB_WATCHPOINT_READ: 1614 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1615 break; 1616 case GDB_WATCHPOINT_ACCESS: 1617 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1618 KVMPPC_DEBUG_WATCH_READ; 1619 break; 1620 default: 1621 cpu_abort(cs, "Unsupported breakpoint type\n"); 1622 } 1623 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1624 } 1625 } 1626 } 1627 1628 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1629 { 1630 CPUState *cs = CPU(cpu); 1631 CPUPPCState *env = &cpu->env; 1632 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1633 int handle = 0; 1634 int n; 1635 int flag = 0; 1636 1637 if (cs->singlestep_enabled) { 1638 handle = 1; 1639 } else if (arch_info->status) { 1640 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1641 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1642 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1643 if (n >= 0) { 1644 handle = 1; 1645 } 1646 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1647 KVMPPC_DEBUG_WATCH_WRITE)) { 1648 n = find_hw_watchpoint(arch_info->address, &flag); 1649 if (n >= 0) { 1650 handle = 1; 1651 cs->watchpoint_hit = &hw_watchpoint; 1652 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1653 hw_watchpoint.flags = flag; 1654 } 1655 } 1656 } 1657 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1658 handle = 1; 1659 } else { 1660 /* QEMU is not able to handle debug exception, so inject 1661 * program exception to guest; 1662 * Yes program exception NOT debug exception !! 1663 * When QEMU is using debug resources then debug exception must 1664 * be always set. To achieve this we set MSR_DE and also set 1665 * MSRP_DEP so guest cannot change MSR_DE. 1666 * When emulating debug resource for guest we want guest 1667 * to control MSR_DE (enable/disable debug interrupt on need). 1668 * Supporting both configurations are NOT possible. 1669 * So the result is that we cannot share debug resources 1670 * between QEMU and Guest on BOOKE architecture. 1671 * In the current design QEMU gets the priority over guest, 1672 * this means that if QEMU is using debug resources then guest 1673 * cannot use them; 1674 * For software breakpoint QEMU uses a privileged instruction; 1675 * So there cannot be any reason that we are here for guest 1676 * set debug exception, only possibility is guest executed a 1677 * privileged / illegal instruction and that's why we are 1678 * injecting a program interrupt. 1679 */ 1680 1681 cpu_synchronize_state(cs); 1682 /* env->nip is PC, so increment this by 4 to use 1683 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1684 */ 1685 env->nip += 4; 1686 cs->exception_index = POWERPC_EXCP_PROGRAM; 1687 env->error_code = POWERPC_EXCP_INVAL; 1688 ppc_cpu_do_interrupt(cs); 1689 } 1690 1691 return handle; 1692 } 1693 1694 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1695 { 1696 PowerPCCPU *cpu = POWERPC_CPU(cs); 1697 CPUPPCState *env = &cpu->env; 1698 int ret; 1699 1700 qemu_mutex_lock_iothread(); 1701 1702 switch (run->exit_reason) { 1703 case KVM_EXIT_DCR: 1704 if (run->dcr.is_write) { 1705 DPRINTF("handle dcr write\n"); 1706 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1707 } else { 1708 DPRINTF("handle dcr read\n"); 1709 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1710 } 1711 break; 1712 case KVM_EXIT_HLT: 1713 DPRINTF("handle halt\n"); 1714 ret = kvmppc_handle_halt(cpu); 1715 break; 1716 #if defined(TARGET_PPC64) 1717 case KVM_EXIT_PAPR_HCALL: 1718 DPRINTF("handle PAPR hypercall\n"); 1719 run->papr_hcall.ret = spapr_hypercall(cpu, 1720 run->papr_hcall.nr, 1721 run->papr_hcall.args); 1722 ret = 0; 1723 break; 1724 #endif 1725 case KVM_EXIT_EPR: 1726 DPRINTF("handle epr\n"); 1727 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1728 ret = 0; 1729 break; 1730 case KVM_EXIT_WATCHDOG: 1731 DPRINTF("handle watchdog expiry\n"); 1732 watchdog_perform_action(); 1733 ret = 0; 1734 break; 1735 1736 case KVM_EXIT_DEBUG: 1737 DPRINTF("handle debug exception\n"); 1738 if (kvm_handle_debug(cpu, run)) { 1739 ret = EXCP_DEBUG; 1740 break; 1741 } 1742 /* re-enter, this exception was guest-internal */ 1743 ret = 0; 1744 break; 1745 1746 default: 1747 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1748 ret = -1; 1749 break; 1750 } 1751 1752 qemu_mutex_unlock_iothread(); 1753 return ret; 1754 } 1755 1756 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1757 { 1758 CPUState *cs = CPU(cpu); 1759 uint32_t bits = tsr_bits; 1760 struct kvm_one_reg reg = { 1761 .id = KVM_REG_PPC_OR_TSR, 1762 .addr = (uintptr_t) &bits, 1763 }; 1764 1765 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1766 } 1767 1768 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1769 { 1770 1771 CPUState *cs = CPU(cpu); 1772 uint32_t bits = tsr_bits; 1773 struct kvm_one_reg reg = { 1774 .id = KVM_REG_PPC_CLEAR_TSR, 1775 .addr = (uintptr_t) &bits, 1776 }; 1777 1778 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1779 } 1780 1781 int kvmppc_set_tcr(PowerPCCPU *cpu) 1782 { 1783 CPUState *cs = CPU(cpu); 1784 CPUPPCState *env = &cpu->env; 1785 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1786 1787 struct kvm_one_reg reg = { 1788 .id = KVM_REG_PPC_TCR, 1789 .addr = (uintptr_t) &tcr, 1790 }; 1791 1792 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1793 } 1794 1795 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1796 { 1797 CPUState *cs = CPU(cpu); 1798 int ret; 1799 1800 if (!kvm_enabled()) { 1801 return -1; 1802 } 1803 1804 if (!cap_ppc_watchdog) { 1805 printf("warning: KVM does not support watchdog"); 1806 return -1; 1807 } 1808 1809 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1810 if (ret < 0) { 1811 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1812 __func__, strerror(-ret)); 1813 return ret; 1814 } 1815 1816 return ret; 1817 } 1818 1819 static int read_cpuinfo(const char *field, char *value, int len) 1820 { 1821 FILE *f; 1822 int ret = -1; 1823 int field_len = strlen(field); 1824 char line[512]; 1825 1826 f = fopen("/proc/cpuinfo", "r"); 1827 if (!f) { 1828 return -1; 1829 } 1830 1831 do { 1832 if (!fgets(line, sizeof(line), f)) { 1833 break; 1834 } 1835 if (!strncmp(line, field, field_len)) { 1836 pstrcpy(value, len, line); 1837 ret = 0; 1838 break; 1839 } 1840 } while(*line); 1841 1842 fclose(f); 1843 1844 return ret; 1845 } 1846 1847 uint32_t kvmppc_get_tbfreq(void) 1848 { 1849 char line[512]; 1850 char *ns; 1851 uint32_t retval = NANOSECONDS_PER_SECOND; 1852 1853 if (read_cpuinfo("timebase", line, sizeof(line))) { 1854 return retval; 1855 } 1856 1857 if (!(ns = strchr(line, ':'))) { 1858 return retval; 1859 } 1860 1861 ns++; 1862 1863 return atoi(ns); 1864 } 1865 1866 bool kvmppc_get_host_serial(char **value) 1867 { 1868 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1869 NULL); 1870 } 1871 1872 bool kvmppc_get_host_model(char **value) 1873 { 1874 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1875 } 1876 1877 /* Try to find a device tree node for a CPU with clock-frequency property */ 1878 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1879 { 1880 struct dirent *dirp; 1881 DIR *dp; 1882 1883 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1884 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1885 return -1; 1886 } 1887 1888 buf[0] = '\0'; 1889 while ((dirp = readdir(dp)) != NULL) { 1890 FILE *f; 1891 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1892 dirp->d_name); 1893 f = fopen(buf, "r"); 1894 if (f) { 1895 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1896 fclose(f); 1897 break; 1898 } 1899 buf[0] = '\0'; 1900 } 1901 closedir(dp); 1902 if (buf[0] == '\0') { 1903 printf("Unknown host!\n"); 1904 return -1; 1905 } 1906 1907 return 0; 1908 } 1909 1910 static uint64_t kvmppc_read_int_dt(const char *filename) 1911 { 1912 union { 1913 uint32_t v32; 1914 uint64_t v64; 1915 } u; 1916 FILE *f; 1917 int len; 1918 1919 f = fopen(filename, "rb"); 1920 if (!f) { 1921 return -1; 1922 } 1923 1924 len = fread(&u, 1, sizeof(u), f); 1925 fclose(f); 1926 switch (len) { 1927 case 4: 1928 /* property is a 32-bit quantity */ 1929 return be32_to_cpu(u.v32); 1930 case 8: 1931 return be64_to_cpu(u.v64); 1932 } 1933 1934 return 0; 1935 } 1936 1937 /* Read a CPU node property from the host device tree that's a single 1938 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1939 * (can't find or open the property, or doesn't understand the 1940 * format) */ 1941 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1942 { 1943 char buf[PATH_MAX], *tmp; 1944 uint64_t val; 1945 1946 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1947 return -1; 1948 } 1949 1950 tmp = g_strdup_printf("%s/%s", buf, propname); 1951 val = kvmppc_read_int_dt(tmp); 1952 g_free(tmp); 1953 1954 return val; 1955 } 1956 1957 uint64_t kvmppc_get_clockfreq(void) 1958 { 1959 return kvmppc_read_int_cpu_dt("clock-frequency"); 1960 } 1961 1962 uint32_t kvmppc_get_vmx(void) 1963 { 1964 return kvmppc_read_int_cpu_dt("ibm,vmx"); 1965 } 1966 1967 uint32_t kvmppc_get_dfp(void) 1968 { 1969 return kvmppc_read_int_cpu_dt("ibm,dfp"); 1970 } 1971 1972 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 1973 { 1974 PowerPCCPU *cpu = ppc_env_get_cpu(env); 1975 CPUState *cs = CPU(cpu); 1976 1977 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 1978 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 1979 return 0; 1980 } 1981 1982 return 1; 1983 } 1984 1985 int kvmppc_get_hasidle(CPUPPCState *env) 1986 { 1987 struct kvm_ppc_pvinfo pvinfo; 1988 1989 if (!kvmppc_get_pvinfo(env, &pvinfo) && 1990 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 1991 return 1; 1992 } 1993 1994 return 0; 1995 } 1996 1997 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 1998 { 1999 uint32_t *hc = (uint32_t*)buf; 2000 struct kvm_ppc_pvinfo pvinfo; 2001 2002 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2003 memcpy(buf, pvinfo.hcall, buf_len); 2004 return 0; 2005 } 2006 2007 /* 2008 * Fallback to always fail hypercalls regardless of endianness: 2009 * 2010 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2011 * li r3, -1 2012 * b .+8 (becomes nop in wrong endian) 2013 * bswap32(li r3, -1) 2014 */ 2015 2016 hc[0] = cpu_to_be32(0x08000048); 2017 hc[1] = cpu_to_be32(0x3860ffff); 2018 hc[2] = cpu_to_be32(0x48000008); 2019 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2020 2021 return 1; 2022 } 2023 2024 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2025 { 2026 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2027 } 2028 2029 void kvmppc_enable_logical_ci_hcalls(void) 2030 { 2031 /* 2032 * FIXME: it would be nice if we could detect the cases where 2033 * we're using a device which requires the in kernel 2034 * implementation of these hcalls, but the kernel lacks them and 2035 * produce a warning. 2036 */ 2037 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2038 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2039 } 2040 2041 void kvmppc_enable_set_mode_hcall(void) 2042 { 2043 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2044 } 2045 2046 void kvmppc_enable_clear_ref_mod_hcalls(void) 2047 { 2048 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2049 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2050 } 2051 2052 void kvmppc_set_papr(PowerPCCPU *cpu) 2053 { 2054 CPUState *cs = CPU(cpu); 2055 int ret; 2056 2057 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2058 if (ret) { 2059 error_report("This vCPU type or KVM version does not support PAPR"); 2060 exit(1); 2061 } 2062 2063 /* Update the capability flag so we sync the right information 2064 * with kvm */ 2065 cap_papr = 1; 2066 } 2067 2068 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2069 { 2070 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2071 } 2072 2073 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2074 { 2075 CPUState *cs = CPU(cpu); 2076 int ret; 2077 2078 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2079 if (ret && mpic_proxy) { 2080 error_report("This KVM version does not support EPR"); 2081 exit(1); 2082 } 2083 } 2084 2085 int kvmppc_smt_threads(void) 2086 { 2087 return cap_ppc_smt ? cap_ppc_smt : 1; 2088 } 2089 2090 #ifdef TARGET_PPC64 2091 off_t kvmppc_alloc_rma(void **rma) 2092 { 2093 off_t size; 2094 int fd; 2095 struct kvm_allocate_rma ret; 2096 2097 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2098 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2099 * not necessary on this hardware 2100 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2101 * 2102 * FIXME: We should allow the user to force contiguous RMA 2103 * allocation in the cap_ppc_rma==1 case. 2104 */ 2105 if (cap_ppc_rma < 2) { 2106 return 0; 2107 } 2108 2109 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2110 if (fd < 0) { 2111 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2112 strerror(errno)); 2113 return -1; 2114 } 2115 2116 size = MIN(ret.rma_size, 256ul << 20); 2117 2118 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2119 if (*rma == MAP_FAILED) { 2120 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2121 return -1; 2122 }; 2123 2124 return size; 2125 } 2126 2127 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2128 { 2129 struct kvm_ppc_smmu_info info; 2130 long rampagesize, best_page_shift; 2131 int i; 2132 2133 if (cap_ppc_rma >= 2) { 2134 return current_size; 2135 } 2136 2137 /* Find the largest hardware supported page size that's less than 2138 * or equal to the (logical) backing page size of guest RAM */ 2139 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2140 rampagesize = qemu_getrampagesize(); 2141 best_page_shift = 0; 2142 2143 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2144 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2145 2146 if (!sps->page_shift) { 2147 continue; 2148 } 2149 2150 if ((sps->page_shift > best_page_shift) 2151 && ((1UL << sps->page_shift) <= rampagesize)) { 2152 best_page_shift = sps->page_shift; 2153 } 2154 } 2155 2156 return MIN(current_size, 2157 1ULL << (best_page_shift + hash_shift - 7)); 2158 } 2159 #endif 2160 2161 bool kvmppc_spapr_use_multitce(void) 2162 { 2163 return cap_spapr_multitce; 2164 } 2165 2166 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2167 uint64_t bus_offset, uint32_t nb_table, 2168 int *pfd, bool need_vfio) 2169 { 2170 long len; 2171 int fd; 2172 void *table; 2173 2174 /* Must set fd to -1 so we don't try to munmap when called for 2175 * destroying the table, which the upper layers -will- do 2176 */ 2177 *pfd = -1; 2178 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2179 return NULL; 2180 } 2181 2182 if (cap_spapr_tce_64) { 2183 struct kvm_create_spapr_tce_64 args = { 2184 .liobn = liobn, 2185 .page_shift = page_shift, 2186 .offset = bus_offset >> page_shift, 2187 .size = nb_table, 2188 .flags = 0 2189 }; 2190 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2191 if (fd < 0) { 2192 fprintf(stderr, 2193 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2194 liobn); 2195 return NULL; 2196 } 2197 } else if (cap_spapr_tce) { 2198 uint64_t window_size = (uint64_t) nb_table << page_shift; 2199 struct kvm_create_spapr_tce args = { 2200 .liobn = liobn, 2201 .window_size = window_size, 2202 }; 2203 if ((window_size != args.window_size) || bus_offset) { 2204 return NULL; 2205 } 2206 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2207 if (fd < 0) { 2208 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2209 liobn); 2210 return NULL; 2211 } 2212 } else { 2213 return NULL; 2214 } 2215 2216 len = nb_table * sizeof(uint64_t); 2217 /* FIXME: round this up to page size */ 2218 2219 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2220 if (table == MAP_FAILED) { 2221 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2222 liobn); 2223 close(fd); 2224 return NULL; 2225 } 2226 2227 *pfd = fd; 2228 return table; 2229 } 2230 2231 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2232 { 2233 long len; 2234 2235 if (fd < 0) { 2236 return -1; 2237 } 2238 2239 len = nb_table * sizeof(uint64_t); 2240 if ((munmap(table, len) < 0) || 2241 (close(fd) < 0)) { 2242 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2243 strerror(errno)); 2244 /* Leak the table */ 2245 } 2246 2247 return 0; 2248 } 2249 2250 int kvmppc_reset_htab(int shift_hint) 2251 { 2252 uint32_t shift = shift_hint; 2253 2254 if (!kvm_enabled()) { 2255 /* Full emulation, tell caller to allocate htab itself */ 2256 return 0; 2257 } 2258 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2259 int ret; 2260 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2261 if (ret == -ENOTTY) { 2262 /* At least some versions of PR KVM advertise the 2263 * capability, but don't implement the ioctl(). Oops. 2264 * Return 0 so that we allocate the htab in qemu, as is 2265 * correct for PR. */ 2266 return 0; 2267 } else if (ret < 0) { 2268 return ret; 2269 } 2270 return shift; 2271 } 2272 2273 /* We have a kernel that predates the htab reset calls. For PR 2274 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2275 * this era, it has allocated a 16MB fixed size hash table already. */ 2276 if (kvmppc_is_pr(kvm_state)) { 2277 /* PR - tell caller to allocate htab */ 2278 return 0; 2279 } else { 2280 /* HV - assume 16MB kernel allocated htab */ 2281 return 24; 2282 } 2283 } 2284 2285 static inline uint32_t mfpvr(void) 2286 { 2287 uint32_t pvr; 2288 2289 asm ("mfpvr %0" 2290 : "=r"(pvr)); 2291 return pvr; 2292 } 2293 2294 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2295 { 2296 if (on) { 2297 *word |= flags; 2298 } else { 2299 *word &= ~flags; 2300 } 2301 } 2302 2303 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2304 { 2305 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2306 uint32_t vmx = kvmppc_get_vmx(); 2307 uint32_t dfp = kvmppc_get_dfp(); 2308 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2309 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2310 2311 /* Now fix up the class with information we can query from the host */ 2312 pcc->pvr = mfpvr(); 2313 2314 if (vmx != -1) { 2315 /* Only override when we know what the host supports */ 2316 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0); 2317 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1); 2318 } 2319 if (dfp != -1) { 2320 /* Only override when we know what the host supports */ 2321 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp); 2322 } 2323 2324 if (dcache_size != -1) { 2325 pcc->l1_dcache_size = dcache_size; 2326 } 2327 2328 if (icache_size != -1) { 2329 pcc->l1_icache_size = icache_size; 2330 } 2331 2332 #if defined(TARGET_PPC64) 2333 pcc->radix_page_info = kvm_get_radix_page_info(); 2334 #endif /* defined(TARGET_PPC64) */ 2335 } 2336 2337 bool kvmppc_has_cap_epr(void) 2338 { 2339 return cap_epr; 2340 } 2341 2342 bool kvmppc_has_cap_htab_fd(void) 2343 { 2344 return cap_htab_fd; 2345 } 2346 2347 bool kvmppc_has_cap_fixup_hcalls(void) 2348 { 2349 return cap_fixup_hcalls; 2350 } 2351 2352 bool kvmppc_has_cap_htm(void) 2353 { 2354 return cap_htm; 2355 } 2356 2357 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc) 2358 { 2359 ObjectClass *oc = OBJECT_CLASS(pcc); 2360 2361 while (oc && !object_class_is_abstract(oc)) { 2362 oc = object_class_get_parent(oc); 2363 } 2364 assert(oc); 2365 2366 return POWERPC_CPU_CLASS(oc); 2367 } 2368 2369 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2370 { 2371 uint32_t host_pvr = mfpvr(); 2372 PowerPCCPUClass *pvr_pcc; 2373 2374 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2375 if (pvr_pcc == NULL) { 2376 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2377 } 2378 2379 return pvr_pcc; 2380 } 2381 2382 static int kvm_ppc_register_host_cpu_type(void) 2383 { 2384 TypeInfo type_info = { 2385 .name = TYPE_HOST_POWERPC_CPU, 2386 .class_init = kvmppc_host_cpu_class_init, 2387 }; 2388 PowerPCCPUClass *pvr_pcc; 2389 DeviceClass *dc; 2390 int i; 2391 2392 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2393 if (pvr_pcc == NULL) { 2394 return -1; 2395 } 2396 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2397 type_register(&type_info); 2398 2399 #if defined(TARGET_PPC64) 2400 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host"); 2401 type_info.parent = TYPE_SPAPR_CPU_CORE, 2402 type_info.instance_size = sizeof(sPAPRCPUCore); 2403 type_info.instance_init = NULL; 2404 type_info.class_init = spapr_cpu_core_class_init; 2405 type_info.class_data = (void *) "host"; 2406 type_register(&type_info); 2407 g_free((void *)type_info.name); 2408 #endif 2409 2410 /* 2411 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2412 * we want "POWER8" to be a "family" alias that points to the current 2413 * host CPU type, too) 2414 */ 2415 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2416 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2417 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2418 ObjectClass *oc = OBJECT_CLASS(pvr_pcc); 2419 char *suffix; 2420 2421 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2422 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU); 2423 if (suffix) { 2424 *suffix = 0; 2425 } 2426 ppc_cpu_aliases[i].oc = oc; 2427 break; 2428 } 2429 } 2430 2431 return 0; 2432 } 2433 2434 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2435 { 2436 struct kvm_rtas_token_args args = { 2437 .token = token, 2438 }; 2439 2440 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2441 return -ENOENT; 2442 } 2443 2444 strncpy(args.name, function, sizeof(args.name)); 2445 2446 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2447 } 2448 2449 int kvmppc_get_htab_fd(bool write) 2450 { 2451 struct kvm_get_htab_fd s = { 2452 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2453 .start_index = 0, 2454 }; 2455 2456 if (!cap_htab_fd) { 2457 fprintf(stderr, "KVM version doesn't support saving the hash table\n"); 2458 return -1; 2459 } 2460 2461 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2462 } 2463 2464 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2465 { 2466 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2467 uint8_t buf[bufsize]; 2468 ssize_t rc; 2469 2470 do { 2471 rc = read(fd, buf, bufsize); 2472 if (rc < 0) { 2473 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2474 strerror(errno)); 2475 return rc; 2476 } else if (rc) { 2477 uint8_t *buffer = buf; 2478 ssize_t n = rc; 2479 while (n) { 2480 struct kvm_get_htab_header *head = 2481 (struct kvm_get_htab_header *) buffer; 2482 size_t chunksize = sizeof(*head) + 2483 HASH_PTE_SIZE_64 * head->n_valid; 2484 2485 qemu_put_be32(f, head->index); 2486 qemu_put_be16(f, head->n_valid); 2487 qemu_put_be16(f, head->n_invalid); 2488 qemu_put_buffer(f, (void *)(head + 1), 2489 HASH_PTE_SIZE_64 * head->n_valid); 2490 2491 buffer += chunksize; 2492 n -= chunksize; 2493 } 2494 } 2495 } while ((rc != 0) 2496 && ((max_ns < 0) 2497 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2498 2499 return (rc == 0) ? 1 : 0; 2500 } 2501 2502 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2503 uint16_t n_valid, uint16_t n_invalid) 2504 { 2505 struct kvm_get_htab_header *buf; 2506 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2507 ssize_t rc; 2508 2509 buf = alloca(chunksize); 2510 buf->index = index; 2511 buf->n_valid = n_valid; 2512 buf->n_invalid = n_invalid; 2513 2514 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2515 2516 rc = write(fd, buf, chunksize); 2517 if (rc < 0) { 2518 fprintf(stderr, "Error writing KVM hash table: %s\n", 2519 strerror(errno)); 2520 return rc; 2521 } 2522 if (rc != chunksize) { 2523 /* We should never get a short write on a single chunk */ 2524 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2525 return -1; 2526 } 2527 return 0; 2528 } 2529 2530 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2531 { 2532 return true; 2533 } 2534 2535 void kvm_arch_init_irq_routing(KVMState *s) 2536 { 2537 } 2538 2539 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2540 { 2541 struct kvm_get_htab_fd ghf = { 2542 .flags = 0, 2543 .start_index = ptex, 2544 }; 2545 int fd, rc; 2546 int i; 2547 2548 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2549 if (fd < 0) { 2550 hw_error("kvmppc_read_hptes: Unable to open HPT fd"); 2551 } 2552 2553 i = 0; 2554 while (i < n) { 2555 struct kvm_get_htab_header *hdr; 2556 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2557 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2558 2559 rc = read(fd, buf, sizeof(buf)); 2560 if (rc < 0) { 2561 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2562 } 2563 2564 hdr = (struct kvm_get_htab_header *)buf; 2565 while ((i < n) && ((char *)hdr < (buf + rc))) { 2566 int invalid = hdr->n_invalid; 2567 2568 if (hdr->index != (ptex + i)) { 2569 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2570 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2571 } 2572 2573 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid); 2574 i += hdr->n_valid; 2575 2576 if ((n - i) < invalid) { 2577 invalid = n - i; 2578 } 2579 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2580 i += hdr->n_invalid; 2581 2582 hdr = (struct kvm_get_htab_header *) 2583 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2584 } 2585 } 2586 2587 close(fd); 2588 } 2589 2590 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2591 { 2592 int fd, rc; 2593 struct kvm_get_htab_fd ghf; 2594 struct { 2595 struct kvm_get_htab_header hdr; 2596 uint64_t pte0; 2597 uint64_t pte1; 2598 } buf; 2599 2600 ghf.flags = 0; 2601 ghf.start_index = 0; /* Ignored */ 2602 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); 2603 if (fd < 0) { 2604 hw_error("kvmppc_write_hpte: Unable to open HPT fd"); 2605 } 2606 2607 buf.hdr.n_valid = 1; 2608 buf.hdr.n_invalid = 0; 2609 buf.hdr.index = ptex; 2610 buf.pte0 = cpu_to_be64(pte0); 2611 buf.pte1 = cpu_to_be64(pte1); 2612 2613 rc = write(fd, &buf, sizeof(buf)); 2614 if (rc != sizeof(buf)) { 2615 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2616 } 2617 close(fd); 2618 } 2619 2620 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2621 uint64_t address, uint32_t data, PCIDevice *dev) 2622 { 2623 return 0; 2624 } 2625 2626 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2627 int vector, PCIDevice *dev) 2628 { 2629 return 0; 2630 } 2631 2632 int kvm_arch_release_virq_post(int virq) 2633 { 2634 return 0; 2635 } 2636 2637 int kvm_arch_msi_data_to_gsi(uint32_t data) 2638 { 2639 return data & 0xffff; 2640 } 2641 2642 int kvmppc_enable_hwrng(void) 2643 { 2644 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2645 return -1; 2646 } 2647 2648 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2649 } 2650