1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 static int cap_ppc_safe_cache; 93 static int cap_ppc_safe_bounds_check; 94 static int cap_ppc_safe_indirect_branch; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 127 static void kvmppc_get_cpu_characteristics(KVMState *s); 128 129 int kvm_arch_init(MachineState *ms, KVMState *s) 130 { 131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 136 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 137 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 138 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 139 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 140 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 141 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 142 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 143 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 144 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 145 /* Note: we don't set cap_papr here, because this capability is 146 * only activated after this by kvmppc_set_papr() */ 147 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 148 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 149 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 150 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 151 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 152 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 153 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 154 kvmppc_get_cpu_characteristics(s); 155 /* 156 * Note: setting it to false because there is not such capability 157 * in KVM at this moment. 158 * 159 * TODO: call kvm_vm_check_extension() with the right capability 160 * after the kernel starts implementing it.*/ 161 cap_ppc_pvr_compat = false; 162 163 if (!cap_interrupt_level) { 164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 165 "VM to stall at times!\n"); 166 } 167 168 kvm_ppc_register_host_cpu_type(ms); 169 170 return 0; 171 } 172 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 174 { 175 return 0; 176 } 177 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 179 { 180 CPUPPCState *cenv = &cpu->env; 181 CPUState *cs = CPU(cpu); 182 struct kvm_sregs sregs; 183 int ret; 184 185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 186 /* What we're really trying to say is "if we're on BookE, we use 187 the native PVR for now". This is the only sane way to check 188 it though, so we potentially confuse users that they can run 189 BookE guests on BookS. Let's hope nobody dares enough :) */ 190 return 0; 191 } else { 192 if (!cap_segstate) { 193 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 194 return -ENOSYS; 195 } 196 } 197 198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 199 if (ret) { 200 return ret; 201 } 202 203 sregs.pvr = cenv->spr[SPR_PVR]; 204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 205 } 206 207 /* Set up a shared TLB array with KVM */ 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 209 { 210 CPUPPCState *env = &cpu->env; 211 CPUState *cs = CPU(cpu); 212 struct kvm_book3e_206_tlb_params params = {}; 213 struct kvm_config_tlb cfg = {}; 214 unsigned int entries = 0; 215 int ret, i; 216 217 if (!kvm_enabled() || 218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 219 return 0; 220 } 221 222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 223 224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 225 params.tlb_sizes[i] = booke206_tlb_size(env, i); 226 params.tlb_ways[i] = booke206_tlb_ways(env, i); 227 entries += params.tlb_sizes[i]; 228 } 229 230 assert(entries == env->nb_tlb); 231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 232 233 env->tlb_dirty = true; 234 235 cfg.array = (uintptr_t)env->tlb.tlbm; 236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 237 cfg.params = (uintptr_t)¶ms; 238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 239 240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 241 if (ret < 0) { 242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 243 __func__, strerror(-ret)); 244 return ret; 245 } 246 247 env->kvm_sw_tlb = true; 248 return 0; 249 } 250 251 252 #if defined(TARGET_PPC64) 253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 254 struct kvm_ppc_smmu_info *info) 255 { 256 CPUPPCState *env = &cpu->env; 257 CPUState *cs = CPU(cpu); 258 259 memset(info, 0, sizeof(*info)); 260 261 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 262 * need to "guess" what the supported page sizes are. 263 * 264 * For that to work we make a few assumptions: 265 * 266 * - Check whether we are running "PR" KVM which only supports 4K 267 * and 16M pages, but supports them regardless of the backing 268 * store characteritics. We also don't support 1T segments. 269 * 270 * This is safe as if HV KVM ever supports that capability or PR 271 * KVM grows supports for more page/segment sizes, those versions 272 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 273 * will not hit this fallback 274 * 275 * - Else we are running HV KVM. This means we only support page 276 * sizes that fit in the backing store. Additionally we only 277 * advertize 64K pages if the processor is ARCH 2.06 and we assume 278 * P7 encodings for the SLB and hash table. Here too, we assume 279 * support for any newer processor will mean a kernel that 280 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 281 * this fallback. 282 */ 283 if (kvmppc_is_pr(cs->kvm_state)) { 284 /* No flags */ 285 info->flags = 0; 286 info->slb_size = 64; 287 288 /* Standard 4k base page size segment */ 289 info->sps[0].page_shift = 12; 290 info->sps[0].slb_enc = 0; 291 info->sps[0].enc[0].page_shift = 12; 292 info->sps[0].enc[0].pte_enc = 0; 293 294 /* Standard 16M large page size segment */ 295 info->sps[1].page_shift = 24; 296 info->sps[1].slb_enc = SLB_VSID_L; 297 info->sps[1].enc[0].page_shift = 24; 298 info->sps[1].enc[0].pte_enc = 0; 299 } else { 300 int i = 0; 301 302 /* HV KVM has backing store size restrictions */ 303 info->flags = KVM_PPC_PAGE_SIZES_REAL; 304 305 if (env->mmu_model & POWERPC_MMU_1TSEG) { 306 info->flags |= KVM_PPC_1T_SEGMENTS; 307 } 308 309 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 310 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 311 info->slb_size = 32; 312 } else { 313 info->slb_size = 64; 314 } 315 316 /* Standard 4k base page size segment */ 317 info->sps[i].page_shift = 12; 318 info->sps[i].slb_enc = 0; 319 info->sps[i].enc[0].page_shift = 12; 320 info->sps[i].enc[0].pte_enc = 0; 321 i++; 322 323 /* 64K on MMU 2.06 and later */ 324 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 325 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 326 info->sps[i].page_shift = 16; 327 info->sps[i].slb_enc = 0x110; 328 info->sps[i].enc[0].page_shift = 16; 329 info->sps[i].enc[0].pte_enc = 1; 330 i++; 331 } 332 333 /* Standard 16M large page size segment */ 334 info->sps[i].page_shift = 24; 335 info->sps[i].slb_enc = SLB_VSID_L; 336 info->sps[i].enc[0].page_shift = 24; 337 info->sps[i].enc[0].pte_enc = 0; 338 } 339 } 340 341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 342 { 343 CPUState *cs = CPU(cpu); 344 int ret; 345 346 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 347 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 348 if (ret == 0) { 349 return; 350 } 351 } 352 353 kvm_get_fallback_smmu_info(cpu, info); 354 } 355 356 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 357 { 358 KVMState *s = KVM_STATE(current_machine->accelerator); 359 struct ppc_radix_page_info *radix_page_info; 360 struct kvm_ppc_rmmu_info rmmu_info; 361 int i; 362 363 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 364 return NULL; 365 } 366 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 367 return NULL; 368 } 369 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 370 radix_page_info->count = 0; 371 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 372 if (rmmu_info.ap_encodings[i]) { 373 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 374 radix_page_info->count++; 375 } 376 } 377 return radix_page_info; 378 } 379 380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 381 bool radix, bool gtse, 382 uint64_t proc_tbl) 383 { 384 CPUState *cs = CPU(cpu); 385 int ret; 386 uint64_t flags = 0; 387 struct kvm_ppc_mmuv3_cfg cfg = { 388 .process_table = proc_tbl, 389 }; 390 391 if (radix) { 392 flags |= KVM_PPC_MMUV3_RADIX; 393 } 394 if (gtse) { 395 flags |= KVM_PPC_MMUV3_GTSE; 396 } 397 cfg.flags = flags; 398 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 399 switch (ret) { 400 case 0: 401 return H_SUCCESS; 402 case -EINVAL: 403 return H_PARAMETER; 404 case -ENODEV: 405 return H_NOT_AVAILABLE; 406 default: 407 return H_HARDWARE; 408 } 409 } 410 411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 412 { 413 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 414 return true; 415 } 416 417 return (1ul << shift) <= rampgsize; 418 } 419 420 static long max_cpu_page_size; 421 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 423 { 424 static struct kvm_ppc_smmu_info smmu_info; 425 static bool has_smmu_info; 426 CPUPPCState *env = &cpu->env; 427 int iq, ik, jq, jk; 428 429 /* We only handle page sizes for 64-bit server guests for now */ 430 if (!(env->mmu_model & POWERPC_MMU_64)) { 431 return; 432 } 433 434 /* Collect MMU info from kernel if not already */ 435 if (!has_smmu_info) { 436 kvm_get_smmu_info(cpu, &smmu_info); 437 has_smmu_info = true; 438 } 439 440 if (!max_cpu_page_size) { 441 max_cpu_page_size = qemu_getrampagesize(); 442 } 443 444 /* Convert to QEMU form */ 445 memset(&env->sps, 0, sizeof(env->sps)); 446 447 /* If we have HV KVM, we need to forbid CI large pages if our 448 * host page size is smaller than 64K. 449 */ 450 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 451 env->ci_large_pages = getpagesize() >= 0x10000; 452 } 453 454 /* 455 * XXX This loop should be an entry wide AND of the capabilities that 456 * the selected CPU has with the capabilities that KVM supports. 457 */ 458 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 459 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 460 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 461 462 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 463 ksps->page_shift)) { 464 continue; 465 } 466 qsps->page_shift = ksps->page_shift; 467 qsps->slb_enc = ksps->slb_enc; 468 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 469 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 470 ksps->enc[jk].page_shift)) { 471 continue; 472 } 473 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 474 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 475 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 476 break; 477 } 478 } 479 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 480 break; 481 } 482 } 483 env->slb_nr = smmu_info.slb_size; 484 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 485 env->mmu_model &= ~POWERPC_MMU_1TSEG; 486 } 487 } 488 489 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 490 { 491 Object *mem_obj = object_resolve_path(obj_path, NULL); 492 long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj)); 493 494 return pagesize >= max_cpu_page_size; 495 } 496 497 #else /* defined (TARGET_PPC64) */ 498 499 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 500 { 501 } 502 503 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 504 { 505 return true; 506 } 507 508 #endif /* !defined (TARGET_PPC64) */ 509 510 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 511 { 512 return POWERPC_CPU(cpu)->vcpu_id; 513 } 514 515 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 516 * book3s supports only 1 watchpoint, so array size 517 * of 4 is sufficient for now. 518 */ 519 #define MAX_HW_BKPTS 4 520 521 static struct HWBreakpoint { 522 target_ulong addr; 523 int type; 524 } hw_debug_points[MAX_HW_BKPTS]; 525 526 static CPUWatchpoint hw_watchpoint; 527 528 /* Default there is no breakpoint and watchpoint supported */ 529 static int max_hw_breakpoint; 530 static int max_hw_watchpoint; 531 static int nb_hw_breakpoint; 532 static int nb_hw_watchpoint; 533 534 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 535 { 536 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 537 max_hw_breakpoint = 2; 538 max_hw_watchpoint = 2; 539 } 540 541 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 542 fprintf(stderr, "Error initializing h/w breakpoints\n"); 543 return; 544 } 545 } 546 547 int kvm_arch_init_vcpu(CPUState *cs) 548 { 549 PowerPCCPU *cpu = POWERPC_CPU(cs); 550 CPUPPCState *cenv = &cpu->env; 551 int ret; 552 553 /* Gather server mmu info from KVM and update the CPU state */ 554 kvm_fixup_page_sizes(cpu); 555 556 /* Synchronize sregs with kvm */ 557 ret = kvm_arch_sync_sregs(cpu); 558 if (ret) { 559 if (ret == -EINVAL) { 560 error_report("Register sync failed... If you're using kvm-hv.ko," 561 " only \"-cpu host\" is possible"); 562 } 563 return ret; 564 } 565 566 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 567 568 switch (cenv->mmu_model) { 569 case POWERPC_MMU_BOOKE206: 570 /* This target supports access to KVM's guest TLB */ 571 ret = kvm_booke206_tlb_init(cpu); 572 break; 573 case POWERPC_MMU_2_07: 574 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 575 /* KVM-HV has transactional memory on POWER8 also without the 576 * KVM_CAP_PPC_HTM extension, so enable it here instead as 577 * long as it's availble to userspace on the host. */ 578 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 579 cap_htm = true; 580 } 581 } 582 break; 583 default: 584 break; 585 } 586 587 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 588 kvmppc_hw_debug_points_init(cenv); 589 590 return ret; 591 } 592 593 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 594 { 595 CPUPPCState *env = &cpu->env; 596 CPUState *cs = CPU(cpu); 597 struct kvm_dirty_tlb dirty_tlb; 598 unsigned char *bitmap; 599 int ret; 600 601 if (!env->kvm_sw_tlb) { 602 return; 603 } 604 605 bitmap = g_malloc((env->nb_tlb + 7) / 8); 606 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 607 608 dirty_tlb.bitmap = (uintptr_t)bitmap; 609 dirty_tlb.num_dirty = env->nb_tlb; 610 611 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 612 if (ret) { 613 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 614 __func__, strerror(-ret)); 615 } 616 617 g_free(bitmap); 618 } 619 620 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 621 { 622 PowerPCCPU *cpu = POWERPC_CPU(cs); 623 CPUPPCState *env = &cpu->env; 624 union { 625 uint32_t u32; 626 uint64_t u64; 627 } val; 628 struct kvm_one_reg reg = { 629 .id = id, 630 .addr = (uintptr_t) &val, 631 }; 632 int ret; 633 634 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 635 if (ret != 0) { 636 trace_kvm_failed_spr_get(spr, strerror(errno)); 637 } else { 638 switch (id & KVM_REG_SIZE_MASK) { 639 case KVM_REG_SIZE_U32: 640 env->spr[spr] = val.u32; 641 break; 642 643 case KVM_REG_SIZE_U64: 644 env->spr[spr] = val.u64; 645 break; 646 647 default: 648 /* Don't handle this size yet */ 649 abort(); 650 } 651 } 652 } 653 654 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 655 { 656 PowerPCCPU *cpu = POWERPC_CPU(cs); 657 CPUPPCState *env = &cpu->env; 658 union { 659 uint32_t u32; 660 uint64_t u64; 661 } val; 662 struct kvm_one_reg reg = { 663 .id = id, 664 .addr = (uintptr_t) &val, 665 }; 666 int ret; 667 668 switch (id & KVM_REG_SIZE_MASK) { 669 case KVM_REG_SIZE_U32: 670 val.u32 = env->spr[spr]; 671 break; 672 673 case KVM_REG_SIZE_U64: 674 val.u64 = env->spr[spr]; 675 break; 676 677 default: 678 /* Don't handle this size yet */ 679 abort(); 680 } 681 682 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 683 if (ret != 0) { 684 trace_kvm_failed_spr_set(spr, strerror(errno)); 685 } 686 } 687 688 static int kvm_put_fp(CPUState *cs) 689 { 690 PowerPCCPU *cpu = POWERPC_CPU(cs); 691 CPUPPCState *env = &cpu->env; 692 struct kvm_one_reg reg; 693 int i; 694 int ret; 695 696 if (env->insns_flags & PPC_FLOAT) { 697 uint64_t fpscr = env->fpscr; 698 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 699 700 reg.id = KVM_REG_PPC_FPSCR; 701 reg.addr = (uintptr_t)&fpscr; 702 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 703 if (ret < 0) { 704 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 705 return ret; 706 } 707 708 for (i = 0; i < 32; i++) { 709 uint64_t vsr[2]; 710 711 #ifdef HOST_WORDS_BIGENDIAN 712 vsr[0] = float64_val(env->fpr[i]); 713 vsr[1] = env->vsr[i]; 714 #else 715 vsr[0] = env->vsr[i]; 716 vsr[1] = float64_val(env->fpr[i]); 717 #endif 718 reg.addr = (uintptr_t) &vsr; 719 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 720 721 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 722 if (ret < 0) { 723 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 724 i, strerror(errno)); 725 return ret; 726 } 727 } 728 } 729 730 if (env->insns_flags & PPC_ALTIVEC) { 731 reg.id = KVM_REG_PPC_VSCR; 732 reg.addr = (uintptr_t)&env->vscr; 733 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 734 if (ret < 0) { 735 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 736 return ret; 737 } 738 739 for (i = 0; i < 32; i++) { 740 reg.id = KVM_REG_PPC_VR(i); 741 reg.addr = (uintptr_t)&env->avr[i]; 742 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 743 if (ret < 0) { 744 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 745 return ret; 746 } 747 } 748 } 749 750 return 0; 751 } 752 753 static int kvm_get_fp(CPUState *cs) 754 { 755 PowerPCCPU *cpu = POWERPC_CPU(cs); 756 CPUPPCState *env = &cpu->env; 757 struct kvm_one_reg reg; 758 int i; 759 int ret; 760 761 if (env->insns_flags & PPC_FLOAT) { 762 uint64_t fpscr; 763 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 764 765 reg.id = KVM_REG_PPC_FPSCR; 766 reg.addr = (uintptr_t)&fpscr; 767 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 768 if (ret < 0) { 769 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 770 return ret; 771 } else { 772 env->fpscr = fpscr; 773 } 774 775 for (i = 0; i < 32; i++) { 776 uint64_t vsr[2]; 777 778 reg.addr = (uintptr_t) &vsr; 779 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 780 781 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 782 if (ret < 0) { 783 DPRINTF("Unable to get %s%d from KVM: %s\n", 784 vsx ? "VSR" : "FPR", i, strerror(errno)); 785 return ret; 786 } else { 787 #ifdef HOST_WORDS_BIGENDIAN 788 env->fpr[i] = vsr[0]; 789 if (vsx) { 790 env->vsr[i] = vsr[1]; 791 } 792 #else 793 env->fpr[i] = vsr[1]; 794 if (vsx) { 795 env->vsr[i] = vsr[0]; 796 } 797 #endif 798 } 799 } 800 } 801 802 if (env->insns_flags & PPC_ALTIVEC) { 803 reg.id = KVM_REG_PPC_VSCR; 804 reg.addr = (uintptr_t)&env->vscr; 805 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 806 if (ret < 0) { 807 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 808 return ret; 809 } 810 811 for (i = 0; i < 32; i++) { 812 reg.id = KVM_REG_PPC_VR(i); 813 reg.addr = (uintptr_t)&env->avr[i]; 814 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 815 if (ret < 0) { 816 DPRINTF("Unable to get VR%d from KVM: %s\n", 817 i, strerror(errno)); 818 return ret; 819 } 820 } 821 } 822 823 return 0; 824 } 825 826 #if defined(TARGET_PPC64) 827 static int kvm_get_vpa(CPUState *cs) 828 { 829 PowerPCCPU *cpu = POWERPC_CPU(cs); 830 CPUPPCState *env = &cpu->env; 831 struct kvm_one_reg reg; 832 int ret; 833 834 reg.id = KVM_REG_PPC_VPA_ADDR; 835 reg.addr = (uintptr_t)&env->vpa_addr; 836 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 837 if (ret < 0) { 838 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 839 return ret; 840 } 841 842 assert((uintptr_t)&env->slb_shadow_size 843 == ((uintptr_t)&env->slb_shadow_addr + 8)); 844 reg.id = KVM_REG_PPC_VPA_SLB; 845 reg.addr = (uintptr_t)&env->slb_shadow_addr; 846 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 847 if (ret < 0) { 848 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 849 strerror(errno)); 850 return ret; 851 } 852 853 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 854 reg.id = KVM_REG_PPC_VPA_DTL; 855 reg.addr = (uintptr_t)&env->dtl_addr; 856 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 857 if (ret < 0) { 858 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 859 strerror(errno)); 860 return ret; 861 } 862 863 return 0; 864 } 865 866 static int kvm_put_vpa(CPUState *cs) 867 { 868 PowerPCCPU *cpu = POWERPC_CPU(cs); 869 CPUPPCState *env = &cpu->env; 870 struct kvm_one_reg reg; 871 int ret; 872 873 /* SLB shadow or DTL can't be registered unless a master VPA is 874 * registered. That means when restoring state, if a VPA *is* 875 * registered, we need to set that up first. If not, we need to 876 * deregister the others before deregistering the master VPA */ 877 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 878 879 if (env->vpa_addr) { 880 reg.id = KVM_REG_PPC_VPA_ADDR; 881 reg.addr = (uintptr_t)&env->vpa_addr; 882 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 883 if (ret < 0) { 884 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 885 return ret; 886 } 887 } 888 889 assert((uintptr_t)&env->slb_shadow_size 890 == ((uintptr_t)&env->slb_shadow_addr + 8)); 891 reg.id = KVM_REG_PPC_VPA_SLB; 892 reg.addr = (uintptr_t)&env->slb_shadow_addr; 893 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 894 if (ret < 0) { 895 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 896 return ret; 897 } 898 899 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 900 reg.id = KVM_REG_PPC_VPA_DTL; 901 reg.addr = (uintptr_t)&env->dtl_addr; 902 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 903 if (ret < 0) { 904 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 905 strerror(errno)); 906 return ret; 907 } 908 909 if (!env->vpa_addr) { 910 reg.id = KVM_REG_PPC_VPA_ADDR; 911 reg.addr = (uintptr_t)&env->vpa_addr; 912 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 913 if (ret < 0) { 914 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 915 return ret; 916 } 917 } 918 919 return 0; 920 } 921 #endif /* TARGET_PPC64 */ 922 923 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 924 { 925 CPUPPCState *env = &cpu->env; 926 struct kvm_sregs sregs; 927 int i; 928 929 sregs.pvr = env->spr[SPR_PVR]; 930 931 if (cpu->vhyp) { 932 PPCVirtualHypervisorClass *vhc = 933 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 934 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 935 } else { 936 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 937 } 938 939 /* Sync SLB */ 940 #ifdef TARGET_PPC64 941 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 942 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 943 if (env->slb[i].esid & SLB_ESID_V) { 944 sregs.u.s.ppc64.slb[i].slbe |= i; 945 } 946 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 947 } 948 #endif 949 950 /* Sync SRs */ 951 for (i = 0; i < 16; i++) { 952 sregs.u.s.ppc32.sr[i] = env->sr[i]; 953 } 954 955 /* Sync BATs */ 956 for (i = 0; i < 8; i++) { 957 /* Beware. We have to swap upper and lower bits here */ 958 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 959 | env->DBAT[1][i]; 960 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 961 | env->IBAT[1][i]; 962 } 963 964 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 965 } 966 967 int kvm_arch_put_registers(CPUState *cs, int level) 968 { 969 PowerPCCPU *cpu = POWERPC_CPU(cs); 970 CPUPPCState *env = &cpu->env; 971 struct kvm_regs regs; 972 int ret; 973 int i; 974 975 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 976 if (ret < 0) { 977 return ret; 978 } 979 980 regs.ctr = env->ctr; 981 regs.lr = env->lr; 982 regs.xer = cpu_read_xer(env); 983 regs.msr = env->msr; 984 regs.pc = env->nip; 985 986 regs.srr0 = env->spr[SPR_SRR0]; 987 regs.srr1 = env->spr[SPR_SRR1]; 988 989 regs.sprg0 = env->spr[SPR_SPRG0]; 990 regs.sprg1 = env->spr[SPR_SPRG1]; 991 regs.sprg2 = env->spr[SPR_SPRG2]; 992 regs.sprg3 = env->spr[SPR_SPRG3]; 993 regs.sprg4 = env->spr[SPR_SPRG4]; 994 regs.sprg5 = env->spr[SPR_SPRG5]; 995 regs.sprg6 = env->spr[SPR_SPRG6]; 996 regs.sprg7 = env->spr[SPR_SPRG7]; 997 998 regs.pid = env->spr[SPR_BOOKE_PID]; 999 1000 for (i = 0;i < 32; i++) 1001 regs.gpr[i] = env->gpr[i]; 1002 1003 regs.cr = 0; 1004 for (i = 0; i < 8; i++) { 1005 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1006 } 1007 1008 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1009 if (ret < 0) 1010 return ret; 1011 1012 kvm_put_fp(cs); 1013 1014 if (env->tlb_dirty) { 1015 kvm_sw_tlb_put(cpu); 1016 env->tlb_dirty = false; 1017 } 1018 1019 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1020 ret = kvmppc_put_books_sregs(cpu); 1021 if (ret < 0) { 1022 return ret; 1023 } 1024 } 1025 1026 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1027 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1028 } 1029 1030 if (cap_one_reg) { 1031 int i; 1032 1033 /* We deliberately ignore errors here, for kernels which have 1034 * the ONE_REG calls, but don't support the specific 1035 * registers, there's a reasonable chance things will still 1036 * work, at least until we try to migrate. */ 1037 for (i = 0; i < 1024; i++) { 1038 uint64_t id = env->spr_cb[i].one_reg_id; 1039 1040 if (id != 0) { 1041 kvm_put_one_spr(cs, id, i); 1042 } 1043 } 1044 1045 #ifdef TARGET_PPC64 1046 if (msr_ts) { 1047 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1048 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1049 } 1050 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1052 } 1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1063 } 1064 1065 if (cap_papr) { 1066 if (kvm_put_vpa(cs) < 0) { 1067 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1068 } 1069 } 1070 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1072 #endif /* TARGET_PPC64 */ 1073 } 1074 1075 return ret; 1076 } 1077 1078 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1079 { 1080 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1081 } 1082 1083 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1084 { 1085 CPUPPCState *env = &cpu->env; 1086 struct kvm_sregs sregs; 1087 int ret; 1088 1089 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1090 if (ret < 0) { 1091 return ret; 1092 } 1093 1094 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1095 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1096 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1097 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1098 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1099 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1100 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1101 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1102 env->spr[SPR_DECR] = sregs.u.e.dec; 1103 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1104 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1105 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1106 } 1107 1108 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1109 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1110 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1111 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1112 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1113 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1114 } 1115 1116 if (sregs.u.e.features & KVM_SREGS_E_64) { 1117 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1118 } 1119 1120 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1121 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1122 } 1123 1124 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1125 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1126 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1127 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1128 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1129 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1130 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1131 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1132 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1133 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1134 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1135 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1136 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1137 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1138 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1139 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1140 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1141 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1142 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1143 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1144 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1145 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1146 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1147 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1148 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1149 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1150 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1151 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1152 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1153 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1154 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1155 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1156 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1157 1158 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1159 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1160 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1161 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1162 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1163 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1164 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1165 } 1166 1167 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1168 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1169 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1170 } 1171 1172 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1173 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1174 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1175 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1176 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1177 } 1178 } 1179 1180 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1181 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1182 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1183 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1184 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1185 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1186 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1187 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1188 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1189 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1190 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1191 } 1192 1193 if (sregs.u.e.features & KVM_SREGS_EXP) { 1194 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1195 } 1196 1197 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1198 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1199 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1200 } 1201 1202 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1203 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1204 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1205 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1206 1207 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1208 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1209 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1210 } 1211 } 1212 1213 return 0; 1214 } 1215 1216 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1217 { 1218 CPUPPCState *env = &cpu->env; 1219 struct kvm_sregs sregs; 1220 int ret; 1221 int i; 1222 1223 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1224 if (ret < 0) { 1225 return ret; 1226 } 1227 1228 if (!cpu->vhyp) { 1229 ppc_store_sdr1(env, sregs.u.s.sdr1); 1230 } 1231 1232 /* Sync SLB */ 1233 #ifdef TARGET_PPC64 1234 /* 1235 * The packed SLB array we get from KVM_GET_SREGS only contains 1236 * information about valid entries. So we flush our internal copy 1237 * to get rid of stale ones, then put all valid SLB entries back 1238 * in. 1239 */ 1240 memset(env->slb, 0, sizeof(env->slb)); 1241 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1242 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1243 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1244 /* 1245 * Only restore valid entries 1246 */ 1247 if (rb & SLB_ESID_V) { 1248 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1249 } 1250 } 1251 #endif 1252 1253 /* Sync SRs */ 1254 for (i = 0; i < 16; i++) { 1255 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1256 } 1257 1258 /* Sync BATs */ 1259 for (i = 0; i < 8; i++) { 1260 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1261 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1262 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1263 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1264 } 1265 1266 return 0; 1267 } 1268 1269 int kvm_arch_get_registers(CPUState *cs) 1270 { 1271 PowerPCCPU *cpu = POWERPC_CPU(cs); 1272 CPUPPCState *env = &cpu->env; 1273 struct kvm_regs regs; 1274 uint32_t cr; 1275 int i, ret; 1276 1277 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1278 if (ret < 0) 1279 return ret; 1280 1281 cr = regs.cr; 1282 for (i = 7; i >= 0; i--) { 1283 env->crf[i] = cr & 15; 1284 cr >>= 4; 1285 } 1286 1287 env->ctr = regs.ctr; 1288 env->lr = regs.lr; 1289 cpu_write_xer(env, regs.xer); 1290 env->msr = regs.msr; 1291 env->nip = regs.pc; 1292 1293 env->spr[SPR_SRR0] = regs.srr0; 1294 env->spr[SPR_SRR1] = regs.srr1; 1295 1296 env->spr[SPR_SPRG0] = regs.sprg0; 1297 env->spr[SPR_SPRG1] = regs.sprg1; 1298 env->spr[SPR_SPRG2] = regs.sprg2; 1299 env->spr[SPR_SPRG3] = regs.sprg3; 1300 env->spr[SPR_SPRG4] = regs.sprg4; 1301 env->spr[SPR_SPRG5] = regs.sprg5; 1302 env->spr[SPR_SPRG6] = regs.sprg6; 1303 env->spr[SPR_SPRG7] = regs.sprg7; 1304 1305 env->spr[SPR_BOOKE_PID] = regs.pid; 1306 1307 for (i = 0;i < 32; i++) 1308 env->gpr[i] = regs.gpr[i]; 1309 1310 kvm_get_fp(cs); 1311 1312 if (cap_booke_sregs) { 1313 ret = kvmppc_get_booke_sregs(cpu); 1314 if (ret < 0) { 1315 return ret; 1316 } 1317 } 1318 1319 if (cap_segstate) { 1320 ret = kvmppc_get_books_sregs(cpu); 1321 if (ret < 0) { 1322 return ret; 1323 } 1324 } 1325 1326 if (cap_hior) { 1327 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1328 } 1329 1330 if (cap_one_reg) { 1331 int i; 1332 1333 /* We deliberately ignore errors here, for kernels which have 1334 * the ONE_REG calls, but don't support the specific 1335 * registers, there's a reasonable chance things will still 1336 * work, at least until we try to migrate. */ 1337 for (i = 0; i < 1024; i++) { 1338 uint64_t id = env->spr_cb[i].one_reg_id; 1339 1340 if (id != 0) { 1341 kvm_get_one_spr(cs, id, i); 1342 } 1343 } 1344 1345 #ifdef TARGET_PPC64 1346 if (msr_ts) { 1347 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1348 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1349 } 1350 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1352 } 1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1363 } 1364 1365 if (cap_papr) { 1366 if (kvm_get_vpa(cs) < 0) { 1367 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1368 } 1369 } 1370 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1372 #endif 1373 } 1374 1375 return 0; 1376 } 1377 1378 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1379 { 1380 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1381 1382 if (irq != PPC_INTERRUPT_EXT) { 1383 return 0; 1384 } 1385 1386 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1387 return 0; 1388 } 1389 1390 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1391 1392 return 0; 1393 } 1394 1395 #if defined(TARGET_PPCEMB) 1396 #define PPC_INPUT_INT PPC40x_INPUT_INT 1397 #elif defined(TARGET_PPC64) 1398 #define PPC_INPUT_INT PPC970_INPUT_INT 1399 #else 1400 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1401 #endif 1402 1403 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1404 { 1405 PowerPCCPU *cpu = POWERPC_CPU(cs); 1406 CPUPPCState *env = &cpu->env; 1407 int r; 1408 unsigned irq; 1409 1410 qemu_mutex_lock_iothread(); 1411 1412 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1413 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1414 if (!cap_interrupt_level && 1415 run->ready_for_interrupt_injection && 1416 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1417 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1418 { 1419 /* For now KVM disregards the 'irq' argument. However, in the 1420 * future KVM could cache it in-kernel to avoid a heavyweight exit 1421 * when reading the UIC. 1422 */ 1423 irq = KVM_INTERRUPT_SET; 1424 1425 DPRINTF("injected interrupt %d\n", irq); 1426 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1427 if (r < 0) { 1428 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1429 } 1430 1431 /* Always wake up soon in case the interrupt was level based */ 1432 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1433 (NANOSECONDS_PER_SECOND / 50)); 1434 } 1435 1436 /* We don't know if there are more interrupts pending after this. However, 1437 * the guest will return to userspace in the course of handling this one 1438 * anyways, so we will get a chance to deliver the rest. */ 1439 1440 qemu_mutex_unlock_iothread(); 1441 } 1442 1443 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1444 { 1445 return MEMTXATTRS_UNSPECIFIED; 1446 } 1447 1448 int kvm_arch_process_async_events(CPUState *cs) 1449 { 1450 return cs->halted; 1451 } 1452 1453 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1454 { 1455 CPUState *cs = CPU(cpu); 1456 CPUPPCState *env = &cpu->env; 1457 1458 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1459 cs->halted = 1; 1460 cs->exception_index = EXCP_HLT; 1461 } 1462 1463 return 0; 1464 } 1465 1466 /* map dcr access to existing qemu dcr emulation */ 1467 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1468 { 1469 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1470 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1471 1472 return 0; 1473 } 1474 1475 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1476 { 1477 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1478 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1479 1480 return 0; 1481 } 1482 1483 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1484 { 1485 /* Mixed endian case is not handled */ 1486 uint32_t sc = debug_inst_opcode; 1487 1488 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1489 sizeof(sc), 0) || 1490 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1491 return -EINVAL; 1492 } 1493 1494 return 0; 1495 } 1496 1497 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1498 { 1499 uint32_t sc; 1500 1501 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1502 sc != debug_inst_opcode || 1503 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1504 sizeof(sc), 1)) { 1505 return -EINVAL; 1506 } 1507 1508 return 0; 1509 } 1510 1511 static int find_hw_breakpoint(target_ulong addr, int type) 1512 { 1513 int n; 1514 1515 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1516 <= ARRAY_SIZE(hw_debug_points)); 1517 1518 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1519 if (hw_debug_points[n].addr == addr && 1520 hw_debug_points[n].type == type) { 1521 return n; 1522 } 1523 } 1524 1525 return -1; 1526 } 1527 1528 static int find_hw_watchpoint(target_ulong addr, int *flag) 1529 { 1530 int n; 1531 1532 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1533 if (n >= 0) { 1534 *flag = BP_MEM_ACCESS; 1535 return n; 1536 } 1537 1538 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1539 if (n >= 0) { 1540 *flag = BP_MEM_WRITE; 1541 return n; 1542 } 1543 1544 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1545 if (n >= 0) { 1546 *flag = BP_MEM_READ; 1547 return n; 1548 } 1549 1550 return -1; 1551 } 1552 1553 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1554 target_ulong len, int type) 1555 { 1556 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1557 return -ENOBUFS; 1558 } 1559 1560 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1561 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1562 1563 switch (type) { 1564 case GDB_BREAKPOINT_HW: 1565 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1566 return -ENOBUFS; 1567 } 1568 1569 if (find_hw_breakpoint(addr, type) >= 0) { 1570 return -EEXIST; 1571 } 1572 1573 nb_hw_breakpoint++; 1574 break; 1575 1576 case GDB_WATCHPOINT_WRITE: 1577 case GDB_WATCHPOINT_READ: 1578 case GDB_WATCHPOINT_ACCESS: 1579 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1580 return -ENOBUFS; 1581 } 1582 1583 if (find_hw_breakpoint(addr, type) >= 0) { 1584 return -EEXIST; 1585 } 1586 1587 nb_hw_watchpoint++; 1588 break; 1589 1590 default: 1591 return -ENOSYS; 1592 } 1593 1594 return 0; 1595 } 1596 1597 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1598 target_ulong len, int type) 1599 { 1600 int n; 1601 1602 n = find_hw_breakpoint(addr, type); 1603 if (n < 0) { 1604 return -ENOENT; 1605 } 1606 1607 switch (type) { 1608 case GDB_BREAKPOINT_HW: 1609 nb_hw_breakpoint--; 1610 break; 1611 1612 case GDB_WATCHPOINT_WRITE: 1613 case GDB_WATCHPOINT_READ: 1614 case GDB_WATCHPOINT_ACCESS: 1615 nb_hw_watchpoint--; 1616 break; 1617 1618 default: 1619 return -ENOSYS; 1620 } 1621 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1622 1623 return 0; 1624 } 1625 1626 void kvm_arch_remove_all_hw_breakpoints(void) 1627 { 1628 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1629 } 1630 1631 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1632 { 1633 int n; 1634 1635 /* Software Breakpoint updates */ 1636 if (kvm_sw_breakpoints_active(cs)) { 1637 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1638 } 1639 1640 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1641 <= ARRAY_SIZE(hw_debug_points)); 1642 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1643 1644 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1645 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1646 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1647 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1648 switch (hw_debug_points[n].type) { 1649 case GDB_BREAKPOINT_HW: 1650 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1651 break; 1652 case GDB_WATCHPOINT_WRITE: 1653 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1654 break; 1655 case GDB_WATCHPOINT_READ: 1656 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1657 break; 1658 case GDB_WATCHPOINT_ACCESS: 1659 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1660 KVMPPC_DEBUG_WATCH_READ; 1661 break; 1662 default: 1663 cpu_abort(cs, "Unsupported breakpoint type\n"); 1664 } 1665 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1666 } 1667 } 1668 } 1669 1670 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1671 { 1672 CPUState *cs = CPU(cpu); 1673 CPUPPCState *env = &cpu->env; 1674 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1675 int handle = 0; 1676 int n; 1677 int flag = 0; 1678 1679 if (cs->singlestep_enabled) { 1680 handle = 1; 1681 } else if (arch_info->status) { 1682 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1683 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1684 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1685 if (n >= 0) { 1686 handle = 1; 1687 } 1688 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1689 KVMPPC_DEBUG_WATCH_WRITE)) { 1690 n = find_hw_watchpoint(arch_info->address, &flag); 1691 if (n >= 0) { 1692 handle = 1; 1693 cs->watchpoint_hit = &hw_watchpoint; 1694 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1695 hw_watchpoint.flags = flag; 1696 } 1697 } 1698 } 1699 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1700 handle = 1; 1701 } else { 1702 /* QEMU is not able to handle debug exception, so inject 1703 * program exception to guest; 1704 * Yes program exception NOT debug exception !! 1705 * When QEMU is using debug resources then debug exception must 1706 * be always set. To achieve this we set MSR_DE and also set 1707 * MSRP_DEP so guest cannot change MSR_DE. 1708 * When emulating debug resource for guest we want guest 1709 * to control MSR_DE (enable/disable debug interrupt on need). 1710 * Supporting both configurations are NOT possible. 1711 * So the result is that we cannot share debug resources 1712 * between QEMU and Guest on BOOKE architecture. 1713 * In the current design QEMU gets the priority over guest, 1714 * this means that if QEMU is using debug resources then guest 1715 * cannot use them; 1716 * For software breakpoint QEMU uses a privileged instruction; 1717 * So there cannot be any reason that we are here for guest 1718 * set debug exception, only possibility is guest executed a 1719 * privileged / illegal instruction and that's why we are 1720 * injecting a program interrupt. 1721 */ 1722 1723 cpu_synchronize_state(cs); 1724 /* env->nip is PC, so increment this by 4 to use 1725 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1726 */ 1727 env->nip += 4; 1728 cs->exception_index = POWERPC_EXCP_PROGRAM; 1729 env->error_code = POWERPC_EXCP_INVAL; 1730 ppc_cpu_do_interrupt(cs); 1731 } 1732 1733 return handle; 1734 } 1735 1736 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1737 { 1738 PowerPCCPU *cpu = POWERPC_CPU(cs); 1739 CPUPPCState *env = &cpu->env; 1740 int ret; 1741 1742 qemu_mutex_lock_iothread(); 1743 1744 switch (run->exit_reason) { 1745 case KVM_EXIT_DCR: 1746 if (run->dcr.is_write) { 1747 DPRINTF("handle dcr write\n"); 1748 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1749 } else { 1750 DPRINTF("handle dcr read\n"); 1751 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1752 } 1753 break; 1754 case KVM_EXIT_HLT: 1755 DPRINTF("handle halt\n"); 1756 ret = kvmppc_handle_halt(cpu); 1757 break; 1758 #if defined(TARGET_PPC64) 1759 case KVM_EXIT_PAPR_HCALL: 1760 DPRINTF("handle PAPR hypercall\n"); 1761 run->papr_hcall.ret = spapr_hypercall(cpu, 1762 run->papr_hcall.nr, 1763 run->papr_hcall.args); 1764 ret = 0; 1765 break; 1766 #endif 1767 case KVM_EXIT_EPR: 1768 DPRINTF("handle epr\n"); 1769 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1770 ret = 0; 1771 break; 1772 case KVM_EXIT_WATCHDOG: 1773 DPRINTF("handle watchdog expiry\n"); 1774 watchdog_perform_action(); 1775 ret = 0; 1776 break; 1777 1778 case KVM_EXIT_DEBUG: 1779 DPRINTF("handle debug exception\n"); 1780 if (kvm_handle_debug(cpu, run)) { 1781 ret = EXCP_DEBUG; 1782 break; 1783 } 1784 /* re-enter, this exception was guest-internal */ 1785 ret = 0; 1786 break; 1787 1788 default: 1789 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1790 ret = -1; 1791 break; 1792 } 1793 1794 qemu_mutex_unlock_iothread(); 1795 return ret; 1796 } 1797 1798 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1799 { 1800 CPUState *cs = CPU(cpu); 1801 uint32_t bits = tsr_bits; 1802 struct kvm_one_reg reg = { 1803 .id = KVM_REG_PPC_OR_TSR, 1804 .addr = (uintptr_t) &bits, 1805 }; 1806 1807 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1808 } 1809 1810 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1811 { 1812 1813 CPUState *cs = CPU(cpu); 1814 uint32_t bits = tsr_bits; 1815 struct kvm_one_reg reg = { 1816 .id = KVM_REG_PPC_CLEAR_TSR, 1817 .addr = (uintptr_t) &bits, 1818 }; 1819 1820 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1821 } 1822 1823 int kvmppc_set_tcr(PowerPCCPU *cpu) 1824 { 1825 CPUState *cs = CPU(cpu); 1826 CPUPPCState *env = &cpu->env; 1827 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1828 1829 struct kvm_one_reg reg = { 1830 .id = KVM_REG_PPC_TCR, 1831 .addr = (uintptr_t) &tcr, 1832 }; 1833 1834 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1835 } 1836 1837 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1838 { 1839 CPUState *cs = CPU(cpu); 1840 int ret; 1841 1842 if (!kvm_enabled()) { 1843 return -1; 1844 } 1845 1846 if (!cap_ppc_watchdog) { 1847 printf("warning: KVM does not support watchdog"); 1848 return -1; 1849 } 1850 1851 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1852 if (ret < 0) { 1853 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1854 __func__, strerror(-ret)); 1855 return ret; 1856 } 1857 1858 return ret; 1859 } 1860 1861 static int read_cpuinfo(const char *field, char *value, int len) 1862 { 1863 FILE *f; 1864 int ret = -1; 1865 int field_len = strlen(field); 1866 char line[512]; 1867 1868 f = fopen("/proc/cpuinfo", "r"); 1869 if (!f) { 1870 return -1; 1871 } 1872 1873 do { 1874 if (!fgets(line, sizeof(line), f)) { 1875 break; 1876 } 1877 if (!strncmp(line, field, field_len)) { 1878 pstrcpy(value, len, line); 1879 ret = 0; 1880 break; 1881 } 1882 } while(*line); 1883 1884 fclose(f); 1885 1886 return ret; 1887 } 1888 1889 uint32_t kvmppc_get_tbfreq(void) 1890 { 1891 char line[512]; 1892 char *ns; 1893 uint32_t retval = NANOSECONDS_PER_SECOND; 1894 1895 if (read_cpuinfo("timebase", line, sizeof(line))) { 1896 return retval; 1897 } 1898 1899 if (!(ns = strchr(line, ':'))) { 1900 return retval; 1901 } 1902 1903 ns++; 1904 1905 return atoi(ns); 1906 } 1907 1908 bool kvmppc_get_host_serial(char **value) 1909 { 1910 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1911 NULL); 1912 } 1913 1914 bool kvmppc_get_host_model(char **value) 1915 { 1916 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1917 } 1918 1919 /* Try to find a device tree node for a CPU with clock-frequency property */ 1920 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1921 { 1922 struct dirent *dirp; 1923 DIR *dp; 1924 1925 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1926 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1927 return -1; 1928 } 1929 1930 buf[0] = '\0'; 1931 while ((dirp = readdir(dp)) != NULL) { 1932 FILE *f; 1933 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1934 dirp->d_name); 1935 f = fopen(buf, "r"); 1936 if (f) { 1937 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1938 fclose(f); 1939 break; 1940 } 1941 buf[0] = '\0'; 1942 } 1943 closedir(dp); 1944 if (buf[0] == '\0') { 1945 printf("Unknown host!\n"); 1946 return -1; 1947 } 1948 1949 return 0; 1950 } 1951 1952 static uint64_t kvmppc_read_int_dt(const char *filename) 1953 { 1954 union { 1955 uint32_t v32; 1956 uint64_t v64; 1957 } u; 1958 FILE *f; 1959 int len; 1960 1961 f = fopen(filename, "rb"); 1962 if (!f) { 1963 return -1; 1964 } 1965 1966 len = fread(&u, 1, sizeof(u), f); 1967 fclose(f); 1968 switch (len) { 1969 case 4: 1970 /* property is a 32-bit quantity */ 1971 return be32_to_cpu(u.v32); 1972 case 8: 1973 return be64_to_cpu(u.v64); 1974 } 1975 1976 return 0; 1977 } 1978 1979 /* Read a CPU node property from the host device tree that's a single 1980 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1981 * (can't find or open the property, or doesn't understand the 1982 * format) */ 1983 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1984 { 1985 char buf[PATH_MAX], *tmp; 1986 uint64_t val; 1987 1988 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1989 return -1; 1990 } 1991 1992 tmp = g_strdup_printf("%s/%s", buf, propname); 1993 val = kvmppc_read_int_dt(tmp); 1994 g_free(tmp); 1995 1996 return val; 1997 } 1998 1999 uint64_t kvmppc_get_clockfreq(void) 2000 { 2001 return kvmppc_read_int_cpu_dt("clock-frequency"); 2002 } 2003 2004 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2005 { 2006 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2007 CPUState *cs = CPU(cpu); 2008 2009 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2010 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2011 return 0; 2012 } 2013 2014 return 1; 2015 } 2016 2017 int kvmppc_get_hasidle(CPUPPCState *env) 2018 { 2019 struct kvm_ppc_pvinfo pvinfo; 2020 2021 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2022 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2023 return 1; 2024 } 2025 2026 return 0; 2027 } 2028 2029 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2030 { 2031 uint32_t *hc = (uint32_t*)buf; 2032 struct kvm_ppc_pvinfo pvinfo; 2033 2034 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2035 memcpy(buf, pvinfo.hcall, buf_len); 2036 return 0; 2037 } 2038 2039 /* 2040 * Fallback to always fail hypercalls regardless of endianness: 2041 * 2042 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2043 * li r3, -1 2044 * b .+8 (becomes nop in wrong endian) 2045 * bswap32(li r3, -1) 2046 */ 2047 2048 hc[0] = cpu_to_be32(0x08000048); 2049 hc[1] = cpu_to_be32(0x3860ffff); 2050 hc[2] = cpu_to_be32(0x48000008); 2051 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2052 2053 return 1; 2054 } 2055 2056 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2057 { 2058 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2059 } 2060 2061 void kvmppc_enable_logical_ci_hcalls(void) 2062 { 2063 /* 2064 * FIXME: it would be nice if we could detect the cases where 2065 * we're using a device which requires the in kernel 2066 * implementation of these hcalls, but the kernel lacks them and 2067 * produce a warning. 2068 */ 2069 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2070 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2071 } 2072 2073 void kvmppc_enable_set_mode_hcall(void) 2074 { 2075 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2076 } 2077 2078 void kvmppc_enable_clear_ref_mod_hcalls(void) 2079 { 2080 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2081 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2082 } 2083 2084 void kvmppc_set_papr(PowerPCCPU *cpu) 2085 { 2086 CPUState *cs = CPU(cpu); 2087 int ret; 2088 2089 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2090 if (ret) { 2091 error_report("This vCPU type or KVM version does not support PAPR"); 2092 exit(1); 2093 } 2094 2095 /* Update the capability flag so we sync the right information 2096 * with kvm */ 2097 cap_papr = 1; 2098 } 2099 2100 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2101 { 2102 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2103 } 2104 2105 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2106 { 2107 CPUState *cs = CPU(cpu); 2108 int ret; 2109 2110 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2111 if (ret && mpic_proxy) { 2112 error_report("This KVM version does not support EPR"); 2113 exit(1); 2114 } 2115 } 2116 2117 int kvmppc_smt_threads(void) 2118 { 2119 return cap_ppc_smt ? cap_ppc_smt : 1; 2120 } 2121 2122 int kvmppc_set_smt_threads(int smt) 2123 { 2124 int ret; 2125 2126 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2127 if (!ret) { 2128 cap_ppc_smt = smt; 2129 } 2130 return ret; 2131 } 2132 2133 void kvmppc_hint_smt_possible(Error **errp) 2134 { 2135 int i; 2136 GString *g; 2137 char *s; 2138 2139 assert(kvm_enabled()); 2140 if (cap_ppc_smt_possible) { 2141 g = g_string_new("Available VSMT modes:"); 2142 for (i = 63; i >= 0; i--) { 2143 if ((1UL << i) & cap_ppc_smt_possible) { 2144 g_string_append_printf(g, " %lu", (1UL << i)); 2145 } 2146 } 2147 s = g_string_free(g, false); 2148 error_append_hint(errp, "%s.\n", s); 2149 g_free(s); 2150 } else { 2151 error_append_hint(errp, 2152 "This KVM seems to be too old to support VSMT.\n"); 2153 } 2154 } 2155 2156 2157 #ifdef TARGET_PPC64 2158 off_t kvmppc_alloc_rma(void **rma) 2159 { 2160 off_t size; 2161 int fd; 2162 struct kvm_allocate_rma ret; 2163 2164 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2165 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2166 * not necessary on this hardware 2167 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2168 * 2169 * FIXME: We should allow the user to force contiguous RMA 2170 * allocation in the cap_ppc_rma==1 case. 2171 */ 2172 if (cap_ppc_rma < 2) { 2173 return 0; 2174 } 2175 2176 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2177 if (fd < 0) { 2178 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2179 strerror(errno)); 2180 return -1; 2181 } 2182 2183 size = MIN(ret.rma_size, 256ul << 20); 2184 2185 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2186 if (*rma == MAP_FAILED) { 2187 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2188 return -1; 2189 }; 2190 2191 return size; 2192 } 2193 2194 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2195 { 2196 struct kvm_ppc_smmu_info info; 2197 long rampagesize, best_page_shift; 2198 int i; 2199 2200 if (cap_ppc_rma >= 2) { 2201 return current_size; 2202 } 2203 2204 /* Find the largest hardware supported page size that's less than 2205 * or equal to the (logical) backing page size of guest RAM */ 2206 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2207 rampagesize = qemu_getrampagesize(); 2208 best_page_shift = 0; 2209 2210 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2211 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2212 2213 if (!sps->page_shift) { 2214 continue; 2215 } 2216 2217 if ((sps->page_shift > best_page_shift) 2218 && ((1UL << sps->page_shift) <= rampagesize)) { 2219 best_page_shift = sps->page_shift; 2220 } 2221 } 2222 2223 return MIN(current_size, 2224 1ULL << (best_page_shift + hash_shift - 7)); 2225 } 2226 #endif 2227 2228 bool kvmppc_spapr_use_multitce(void) 2229 { 2230 return cap_spapr_multitce; 2231 } 2232 2233 int kvmppc_spapr_enable_inkernel_multitce(void) 2234 { 2235 int ret; 2236 2237 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2238 H_PUT_TCE_INDIRECT, 1); 2239 if (!ret) { 2240 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2241 H_STUFF_TCE, 1); 2242 } 2243 2244 return ret; 2245 } 2246 2247 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2248 uint64_t bus_offset, uint32_t nb_table, 2249 int *pfd, bool need_vfio) 2250 { 2251 long len; 2252 int fd; 2253 void *table; 2254 2255 /* Must set fd to -1 so we don't try to munmap when called for 2256 * destroying the table, which the upper layers -will- do 2257 */ 2258 *pfd = -1; 2259 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2260 return NULL; 2261 } 2262 2263 if (cap_spapr_tce_64) { 2264 struct kvm_create_spapr_tce_64 args = { 2265 .liobn = liobn, 2266 .page_shift = page_shift, 2267 .offset = bus_offset >> page_shift, 2268 .size = nb_table, 2269 .flags = 0 2270 }; 2271 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2272 if (fd < 0) { 2273 fprintf(stderr, 2274 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2275 liobn); 2276 return NULL; 2277 } 2278 } else if (cap_spapr_tce) { 2279 uint64_t window_size = (uint64_t) nb_table << page_shift; 2280 struct kvm_create_spapr_tce args = { 2281 .liobn = liobn, 2282 .window_size = window_size, 2283 }; 2284 if ((window_size != args.window_size) || bus_offset) { 2285 return NULL; 2286 } 2287 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2288 if (fd < 0) { 2289 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2290 liobn); 2291 return NULL; 2292 } 2293 } else { 2294 return NULL; 2295 } 2296 2297 len = nb_table * sizeof(uint64_t); 2298 /* FIXME: round this up to page size */ 2299 2300 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2301 if (table == MAP_FAILED) { 2302 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2303 liobn); 2304 close(fd); 2305 return NULL; 2306 } 2307 2308 *pfd = fd; 2309 return table; 2310 } 2311 2312 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2313 { 2314 long len; 2315 2316 if (fd < 0) { 2317 return -1; 2318 } 2319 2320 len = nb_table * sizeof(uint64_t); 2321 if ((munmap(table, len) < 0) || 2322 (close(fd) < 0)) { 2323 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2324 strerror(errno)); 2325 /* Leak the table */ 2326 } 2327 2328 return 0; 2329 } 2330 2331 int kvmppc_reset_htab(int shift_hint) 2332 { 2333 uint32_t shift = shift_hint; 2334 2335 if (!kvm_enabled()) { 2336 /* Full emulation, tell caller to allocate htab itself */ 2337 return 0; 2338 } 2339 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2340 int ret; 2341 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2342 if (ret == -ENOTTY) { 2343 /* At least some versions of PR KVM advertise the 2344 * capability, but don't implement the ioctl(). Oops. 2345 * Return 0 so that we allocate the htab in qemu, as is 2346 * correct for PR. */ 2347 return 0; 2348 } else if (ret < 0) { 2349 return ret; 2350 } 2351 return shift; 2352 } 2353 2354 /* We have a kernel that predates the htab reset calls. For PR 2355 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2356 * this era, it has allocated a 16MB fixed size hash table already. */ 2357 if (kvmppc_is_pr(kvm_state)) { 2358 /* PR - tell caller to allocate htab */ 2359 return 0; 2360 } else { 2361 /* HV - assume 16MB kernel allocated htab */ 2362 return 24; 2363 } 2364 } 2365 2366 static inline uint32_t mfpvr(void) 2367 { 2368 uint32_t pvr; 2369 2370 asm ("mfpvr %0" 2371 : "=r"(pvr)); 2372 return pvr; 2373 } 2374 2375 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2376 { 2377 if (on) { 2378 *word |= flags; 2379 } else { 2380 *word &= ~flags; 2381 } 2382 } 2383 2384 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2385 { 2386 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2387 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2388 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2389 2390 /* Now fix up the class with information we can query from the host */ 2391 pcc->pvr = mfpvr(); 2392 2393 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2394 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2395 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2396 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2397 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2398 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2399 2400 if (dcache_size != -1) { 2401 pcc->l1_dcache_size = dcache_size; 2402 } 2403 2404 if (icache_size != -1) { 2405 pcc->l1_icache_size = icache_size; 2406 } 2407 2408 #if defined(TARGET_PPC64) 2409 pcc->radix_page_info = kvm_get_radix_page_info(); 2410 2411 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2412 /* 2413 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2414 * compliant. More importantly, advertising ISA 3.00 2415 * architected mode may prevent guests from activating 2416 * necessary DD1 workarounds. 2417 */ 2418 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2419 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2420 } 2421 #endif /* defined(TARGET_PPC64) */ 2422 } 2423 2424 bool kvmppc_has_cap_epr(void) 2425 { 2426 return cap_epr; 2427 } 2428 2429 bool kvmppc_has_cap_fixup_hcalls(void) 2430 { 2431 return cap_fixup_hcalls; 2432 } 2433 2434 bool kvmppc_has_cap_htm(void) 2435 { 2436 return cap_htm; 2437 } 2438 2439 bool kvmppc_has_cap_mmu_radix(void) 2440 { 2441 return cap_mmu_radix; 2442 } 2443 2444 bool kvmppc_has_cap_mmu_hash_v3(void) 2445 { 2446 return cap_mmu_hash_v3; 2447 } 2448 2449 static void kvmppc_get_cpu_characteristics(KVMState *s) 2450 { 2451 struct kvm_ppc_cpu_char c; 2452 int ret; 2453 2454 /* Assume broken */ 2455 cap_ppc_safe_cache = 0; 2456 cap_ppc_safe_bounds_check = 0; 2457 cap_ppc_safe_indirect_branch = 0; 2458 2459 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2460 if (!ret) { 2461 return; 2462 } 2463 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2464 if (ret < 0) { 2465 return; 2466 } 2467 /* Parse and set cap_ppc_safe_cache */ 2468 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2469 cap_ppc_safe_cache = 2; 2470 } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2471 (c.character & c.character_mask 2472 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2473 cap_ppc_safe_cache = 1; 2474 } 2475 /* Parse and set cap_ppc_safe_bounds_check */ 2476 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2477 cap_ppc_safe_bounds_check = 2; 2478 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2479 cap_ppc_safe_bounds_check = 1; 2480 } 2481 /* Parse and set cap_ppc_safe_indirect_branch */ 2482 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2483 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD; 2484 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2485 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS; 2486 } 2487 } 2488 2489 int kvmppc_get_cap_safe_cache(void) 2490 { 2491 return cap_ppc_safe_cache; 2492 } 2493 2494 int kvmppc_get_cap_safe_bounds_check(void) 2495 { 2496 return cap_ppc_safe_bounds_check; 2497 } 2498 2499 int kvmppc_get_cap_safe_indirect_branch(void) 2500 { 2501 return cap_ppc_safe_indirect_branch; 2502 } 2503 2504 bool kvmppc_has_cap_spapr_vfio(void) 2505 { 2506 return cap_spapr_vfio; 2507 } 2508 2509 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2510 { 2511 uint32_t host_pvr = mfpvr(); 2512 PowerPCCPUClass *pvr_pcc; 2513 2514 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2515 if (pvr_pcc == NULL) { 2516 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2517 } 2518 2519 return pvr_pcc; 2520 } 2521 2522 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2523 { 2524 TypeInfo type_info = { 2525 .name = TYPE_HOST_POWERPC_CPU, 2526 .class_init = kvmppc_host_cpu_class_init, 2527 }; 2528 MachineClass *mc = MACHINE_GET_CLASS(ms); 2529 PowerPCCPUClass *pvr_pcc; 2530 ObjectClass *oc; 2531 DeviceClass *dc; 2532 int i; 2533 2534 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2535 if (pvr_pcc == NULL) { 2536 return -1; 2537 } 2538 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2539 type_register(&type_info); 2540 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2541 /* override TCG default cpu type with 'host' cpu model */ 2542 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2543 } 2544 2545 oc = object_class_by_name(type_info.name); 2546 g_assert(oc); 2547 2548 /* 2549 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2550 * we want "POWER8" to be a "family" alias that points to the current 2551 * host CPU type, too) 2552 */ 2553 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2554 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2555 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2556 char *suffix; 2557 2558 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2559 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2560 if (suffix) { 2561 *suffix = 0; 2562 } 2563 break; 2564 } 2565 } 2566 2567 return 0; 2568 } 2569 2570 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2571 { 2572 struct kvm_rtas_token_args args = { 2573 .token = token, 2574 }; 2575 2576 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2577 return -ENOENT; 2578 } 2579 2580 strncpy(args.name, function, sizeof(args.name)); 2581 2582 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2583 } 2584 2585 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2586 { 2587 struct kvm_get_htab_fd s = { 2588 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2589 .start_index = index, 2590 }; 2591 int ret; 2592 2593 if (!cap_htab_fd) { 2594 error_setg(errp, "KVM version doesn't support %s the HPT", 2595 write ? "writing" : "reading"); 2596 return -ENOTSUP; 2597 } 2598 2599 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2600 if (ret < 0) { 2601 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2602 write ? "writing" : "reading", write ? "to" : "from", 2603 strerror(errno)); 2604 return -errno; 2605 } 2606 2607 return ret; 2608 } 2609 2610 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2611 { 2612 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2613 uint8_t buf[bufsize]; 2614 ssize_t rc; 2615 2616 do { 2617 rc = read(fd, buf, bufsize); 2618 if (rc < 0) { 2619 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2620 strerror(errno)); 2621 return rc; 2622 } else if (rc) { 2623 uint8_t *buffer = buf; 2624 ssize_t n = rc; 2625 while (n) { 2626 struct kvm_get_htab_header *head = 2627 (struct kvm_get_htab_header *) buffer; 2628 size_t chunksize = sizeof(*head) + 2629 HASH_PTE_SIZE_64 * head->n_valid; 2630 2631 qemu_put_be32(f, head->index); 2632 qemu_put_be16(f, head->n_valid); 2633 qemu_put_be16(f, head->n_invalid); 2634 qemu_put_buffer(f, (void *)(head + 1), 2635 HASH_PTE_SIZE_64 * head->n_valid); 2636 2637 buffer += chunksize; 2638 n -= chunksize; 2639 } 2640 } 2641 } while ((rc != 0) 2642 && ((max_ns < 0) 2643 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2644 2645 return (rc == 0) ? 1 : 0; 2646 } 2647 2648 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2649 uint16_t n_valid, uint16_t n_invalid) 2650 { 2651 struct kvm_get_htab_header *buf; 2652 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2653 ssize_t rc; 2654 2655 buf = alloca(chunksize); 2656 buf->index = index; 2657 buf->n_valid = n_valid; 2658 buf->n_invalid = n_invalid; 2659 2660 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2661 2662 rc = write(fd, buf, chunksize); 2663 if (rc < 0) { 2664 fprintf(stderr, "Error writing KVM hash table: %s\n", 2665 strerror(errno)); 2666 return rc; 2667 } 2668 if (rc != chunksize) { 2669 /* We should never get a short write on a single chunk */ 2670 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2671 return -1; 2672 } 2673 return 0; 2674 } 2675 2676 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2677 { 2678 return true; 2679 } 2680 2681 void kvm_arch_init_irq_routing(KVMState *s) 2682 { 2683 } 2684 2685 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2686 { 2687 int fd, rc; 2688 int i; 2689 2690 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2691 2692 i = 0; 2693 while (i < n) { 2694 struct kvm_get_htab_header *hdr; 2695 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2696 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2697 2698 rc = read(fd, buf, sizeof(buf)); 2699 if (rc < 0) { 2700 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2701 } 2702 2703 hdr = (struct kvm_get_htab_header *)buf; 2704 while ((i < n) && ((char *)hdr < (buf + rc))) { 2705 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2706 2707 if (hdr->index != (ptex + i)) { 2708 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2709 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2710 } 2711 2712 if (n - i < valid) { 2713 valid = n - i; 2714 } 2715 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2716 i += valid; 2717 2718 if ((n - i) < invalid) { 2719 invalid = n - i; 2720 } 2721 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2722 i += invalid; 2723 2724 hdr = (struct kvm_get_htab_header *) 2725 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2726 } 2727 } 2728 2729 close(fd); 2730 } 2731 2732 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2733 { 2734 int fd, rc; 2735 struct { 2736 struct kvm_get_htab_header hdr; 2737 uint64_t pte0; 2738 uint64_t pte1; 2739 } buf; 2740 2741 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2742 2743 buf.hdr.n_valid = 1; 2744 buf.hdr.n_invalid = 0; 2745 buf.hdr.index = ptex; 2746 buf.pte0 = cpu_to_be64(pte0); 2747 buf.pte1 = cpu_to_be64(pte1); 2748 2749 rc = write(fd, &buf, sizeof(buf)); 2750 if (rc != sizeof(buf)) { 2751 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2752 } 2753 close(fd); 2754 } 2755 2756 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2757 uint64_t address, uint32_t data, PCIDevice *dev) 2758 { 2759 return 0; 2760 } 2761 2762 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2763 int vector, PCIDevice *dev) 2764 { 2765 return 0; 2766 } 2767 2768 int kvm_arch_release_virq_post(int virq) 2769 { 2770 return 0; 2771 } 2772 2773 int kvm_arch_msi_data_to_gsi(uint32_t data) 2774 { 2775 return data & 0xffff; 2776 } 2777 2778 int kvmppc_enable_hwrng(void) 2779 { 2780 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2781 return -1; 2782 } 2783 2784 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2785 } 2786 2787 void kvmppc_check_papr_resize_hpt(Error **errp) 2788 { 2789 if (!kvm_enabled()) { 2790 return; /* No KVM, we're good */ 2791 } 2792 2793 if (cap_resize_hpt) { 2794 return; /* Kernel has explicit support, we're good */ 2795 } 2796 2797 /* Otherwise fallback on looking for PR KVM */ 2798 if (kvmppc_is_pr(kvm_state)) { 2799 return; 2800 } 2801 2802 error_setg(errp, 2803 "Hash page table resizing not available with this KVM version"); 2804 } 2805 2806 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2807 { 2808 CPUState *cs = CPU(cpu); 2809 struct kvm_ppc_resize_hpt rhpt = { 2810 .flags = flags, 2811 .shift = shift, 2812 }; 2813 2814 if (!cap_resize_hpt) { 2815 return -ENOSYS; 2816 } 2817 2818 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2819 } 2820 2821 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2822 { 2823 CPUState *cs = CPU(cpu); 2824 struct kvm_ppc_resize_hpt rhpt = { 2825 .flags = flags, 2826 .shift = shift, 2827 }; 2828 2829 if (!cap_resize_hpt) { 2830 return -ENOSYS; 2831 } 2832 2833 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2834 } 2835 2836 /* 2837 * This is a helper function to detect a post migration scenario 2838 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2839 * the guest kernel can't handle a PVR value other than the actual host 2840 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2841 * 2842 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2843 * (so, we're HV), return true. The workaround itself is done in 2844 * cpu_post_load. 2845 * 2846 * The order here is important: we'll only check for KVM PR as a 2847 * fallback if the guest kernel can't handle the situation itself. 2848 * We need to avoid as much as possible querying the running KVM type 2849 * in QEMU level. 2850 */ 2851 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2852 { 2853 CPUState *cs = CPU(cpu); 2854 2855 if (!kvm_enabled()) { 2856 return false; 2857 } 2858 2859 if (cap_ppc_pvr_compat) { 2860 return false; 2861 } 2862 2863 return !kvmppc_is_pr(cs->kvm_state); 2864 } 2865