1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 static int cap_ppc_safe_cache; 93 static int cap_ppc_safe_bounds_check; 94 static int cap_ppc_safe_indirect_branch; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 127 static void kvmppc_get_cpu_characteristics(KVMState *s); 128 129 int kvm_arch_init(MachineState *ms, KVMState *s) 130 { 131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 136 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 137 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 138 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 139 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 140 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 141 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 142 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 143 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 144 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 145 /* Note: we don't set cap_papr here, because this capability is 146 * only activated after this by kvmppc_set_papr() */ 147 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 148 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 149 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 150 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 151 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 152 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 153 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 154 kvmppc_get_cpu_characteristics(s); 155 /* 156 * Note: setting it to false because there is not such capability 157 * in KVM at this moment. 158 * 159 * TODO: call kvm_vm_check_extension() with the right capability 160 * after the kernel starts implementing it.*/ 161 cap_ppc_pvr_compat = false; 162 163 if (!cap_interrupt_level) { 164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 165 "VM to stall at times!\n"); 166 } 167 168 kvm_ppc_register_host_cpu_type(ms); 169 170 return 0; 171 } 172 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 174 { 175 return 0; 176 } 177 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 179 { 180 CPUPPCState *cenv = &cpu->env; 181 CPUState *cs = CPU(cpu); 182 struct kvm_sregs sregs; 183 int ret; 184 185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 186 /* What we're really trying to say is "if we're on BookE, we use 187 the native PVR for now". This is the only sane way to check 188 it though, so we potentially confuse users that they can run 189 BookE guests on BookS. Let's hope nobody dares enough :) */ 190 return 0; 191 } else { 192 if (!cap_segstate) { 193 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 194 return -ENOSYS; 195 } 196 } 197 198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 199 if (ret) { 200 return ret; 201 } 202 203 sregs.pvr = cenv->spr[SPR_PVR]; 204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 205 } 206 207 /* Set up a shared TLB array with KVM */ 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 209 { 210 CPUPPCState *env = &cpu->env; 211 CPUState *cs = CPU(cpu); 212 struct kvm_book3e_206_tlb_params params = {}; 213 struct kvm_config_tlb cfg = {}; 214 unsigned int entries = 0; 215 int ret, i; 216 217 if (!kvm_enabled() || 218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 219 return 0; 220 } 221 222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 223 224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 225 params.tlb_sizes[i] = booke206_tlb_size(env, i); 226 params.tlb_ways[i] = booke206_tlb_ways(env, i); 227 entries += params.tlb_sizes[i]; 228 } 229 230 assert(entries == env->nb_tlb); 231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 232 233 env->tlb_dirty = true; 234 235 cfg.array = (uintptr_t)env->tlb.tlbm; 236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 237 cfg.params = (uintptr_t)¶ms; 238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 239 240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 241 if (ret < 0) { 242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 243 __func__, strerror(-ret)); 244 return ret; 245 } 246 247 env->kvm_sw_tlb = true; 248 return 0; 249 } 250 251 252 #if defined(TARGET_PPC64) 253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 254 struct kvm_ppc_smmu_info *info) 255 { 256 CPUPPCState *env = &cpu->env; 257 CPUState *cs = CPU(cpu); 258 259 memset(info, 0, sizeof(*info)); 260 261 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 262 * need to "guess" what the supported page sizes are. 263 * 264 * For that to work we make a few assumptions: 265 * 266 * - Check whether we are running "PR" KVM which only supports 4K 267 * and 16M pages, but supports them regardless of the backing 268 * store characteritics. We also don't support 1T segments. 269 * 270 * This is safe as if HV KVM ever supports that capability or PR 271 * KVM grows supports for more page/segment sizes, those versions 272 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 273 * will not hit this fallback 274 * 275 * - Else we are running HV KVM. This means we only support page 276 * sizes that fit in the backing store. Additionally we only 277 * advertize 64K pages if the processor is ARCH 2.06 and we assume 278 * P7 encodings for the SLB and hash table. Here too, we assume 279 * support for any newer processor will mean a kernel that 280 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 281 * this fallback. 282 */ 283 if (kvmppc_is_pr(cs->kvm_state)) { 284 /* No flags */ 285 info->flags = 0; 286 info->slb_size = 64; 287 288 /* Standard 4k base page size segment */ 289 info->sps[0].page_shift = 12; 290 info->sps[0].slb_enc = 0; 291 info->sps[0].enc[0].page_shift = 12; 292 info->sps[0].enc[0].pte_enc = 0; 293 294 /* Standard 16M large page size segment */ 295 info->sps[1].page_shift = 24; 296 info->sps[1].slb_enc = SLB_VSID_L; 297 info->sps[1].enc[0].page_shift = 24; 298 info->sps[1].enc[0].pte_enc = 0; 299 } else { 300 int i = 0; 301 302 /* HV KVM has backing store size restrictions */ 303 info->flags = KVM_PPC_PAGE_SIZES_REAL; 304 305 if (env->mmu_model & POWERPC_MMU_1TSEG) { 306 info->flags |= KVM_PPC_1T_SEGMENTS; 307 } 308 309 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 310 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 311 info->slb_size = 32; 312 } else { 313 info->slb_size = 64; 314 } 315 316 /* Standard 4k base page size segment */ 317 info->sps[i].page_shift = 12; 318 info->sps[i].slb_enc = 0; 319 info->sps[i].enc[0].page_shift = 12; 320 info->sps[i].enc[0].pte_enc = 0; 321 i++; 322 323 /* 64K on MMU 2.06 and later */ 324 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 325 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 326 info->sps[i].page_shift = 16; 327 info->sps[i].slb_enc = 0x110; 328 info->sps[i].enc[0].page_shift = 16; 329 info->sps[i].enc[0].pte_enc = 1; 330 i++; 331 } 332 333 /* Standard 16M large page size segment */ 334 info->sps[i].page_shift = 24; 335 info->sps[i].slb_enc = SLB_VSID_L; 336 info->sps[i].enc[0].page_shift = 24; 337 info->sps[i].enc[0].pte_enc = 0; 338 } 339 } 340 341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 342 { 343 CPUState *cs = CPU(cpu); 344 int ret; 345 346 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 347 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 348 if (ret == 0) { 349 return; 350 } 351 } 352 353 kvm_get_fallback_smmu_info(cpu, info); 354 } 355 356 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 357 { 358 KVMState *s = KVM_STATE(current_machine->accelerator); 359 struct ppc_radix_page_info *radix_page_info; 360 struct kvm_ppc_rmmu_info rmmu_info; 361 int i; 362 363 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 364 return NULL; 365 } 366 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 367 return NULL; 368 } 369 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 370 radix_page_info->count = 0; 371 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 372 if (rmmu_info.ap_encodings[i]) { 373 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 374 radix_page_info->count++; 375 } 376 } 377 return radix_page_info; 378 } 379 380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 381 bool radix, bool gtse, 382 uint64_t proc_tbl) 383 { 384 CPUState *cs = CPU(cpu); 385 int ret; 386 uint64_t flags = 0; 387 struct kvm_ppc_mmuv3_cfg cfg = { 388 .process_table = proc_tbl, 389 }; 390 391 if (radix) { 392 flags |= KVM_PPC_MMUV3_RADIX; 393 } 394 if (gtse) { 395 flags |= KVM_PPC_MMUV3_GTSE; 396 } 397 cfg.flags = flags; 398 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 399 switch (ret) { 400 case 0: 401 return H_SUCCESS; 402 case -EINVAL: 403 return H_PARAMETER; 404 case -ENODEV: 405 return H_NOT_AVAILABLE; 406 default: 407 return H_HARDWARE; 408 } 409 } 410 411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 412 { 413 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 414 return true; 415 } 416 417 return (1ul << shift) <= rampgsize; 418 } 419 420 static long max_cpu_page_size; 421 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 423 { 424 static struct kvm_ppc_smmu_info smmu_info; 425 static bool has_smmu_info; 426 CPUPPCState *env = &cpu->env; 427 int iq, ik, jq, jk; 428 bool has_64k_pages = false; 429 430 /* We only handle page sizes for 64-bit server guests for now */ 431 if (!(env->mmu_model & POWERPC_MMU_64)) { 432 return; 433 } 434 435 /* Collect MMU info from kernel if not already */ 436 if (!has_smmu_info) { 437 kvm_get_smmu_info(cpu, &smmu_info); 438 has_smmu_info = true; 439 } 440 441 if (!max_cpu_page_size) { 442 max_cpu_page_size = qemu_getrampagesize(); 443 } 444 445 /* Convert to QEMU form */ 446 memset(&env->sps, 0, sizeof(env->sps)); 447 448 /* If we have HV KVM, we need to forbid CI large pages if our 449 * host page size is smaller than 64K. 450 */ 451 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 452 env->ci_large_pages = getpagesize() >= 0x10000; 453 } 454 455 /* 456 * XXX This loop should be an entry wide AND of the capabilities that 457 * the selected CPU has with the capabilities that KVM supports. 458 */ 459 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 460 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq]; 461 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 462 463 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 464 ksps->page_shift)) { 465 continue; 466 } 467 qsps->page_shift = ksps->page_shift; 468 qsps->slb_enc = ksps->slb_enc; 469 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 470 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 471 ksps->enc[jk].page_shift)) { 472 continue; 473 } 474 if (ksps->enc[jk].page_shift == 16) { 475 has_64k_pages = true; 476 } 477 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 478 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 479 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 480 break; 481 } 482 } 483 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 484 break; 485 } 486 } 487 env->slb_nr = smmu_info.slb_size; 488 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 489 env->mmu_model &= ~POWERPC_MMU_1TSEG; 490 } 491 if (!has_64k_pages) { 492 env->mmu_model &= ~POWERPC_MMU_64K; 493 } 494 } 495 496 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 497 { 498 Object *mem_obj = object_resolve_path(obj_path, NULL); 499 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL); 500 long pagesize; 501 502 pagesize = qemu_mempath_getpagesize(mempath); 503 g_free(mempath); 504 505 return pagesize >= max_cpu_page_size; 506 } 507 508 #else /* defined (TARGET_PPC64) */ 509 510 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 511 { 512 } 513 514 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 515 { 516 return true; 517 } 518 519 #endif /* !defined (TARGET_PPC64) */ 520 521 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 522 { 523 return POWERPC_CPU(cpu)->vcpu_id; 524 } 525 526 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 527 * book3s supports only 1 watchpoint, so array size 528 * of 4 is sufficient for now. 529 */ 530 #define MAX_HW_BKPTS 4 531 532 static struct HWBreakpoint { 533 target_ulong addr; 534 int type; 535 } hw_debug_points[MAX_HW_BKPTS]; 536 537 static CPUWatchpoint hw_watchpoint; 538 539 /* Default there is no breakpoint and watchpoint supported */ 540 static int max_hw_breakpoint; 541 static int max_hw_watchpoint; 542 static int nb_hw_breakpoint; 543 static int nb_hw_watchpoint; 544 545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 546 { 547 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 548 max_hw_breakpoint = 2; 549 max_hw_watchpoint = 2; 550 } 551 552 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 553 fprintf(stderr, "Error initializing h/w breakpoints\n"); 554 return; 555 } 556 } 557 558 int kvm_arch_init_vcpu(CPUState *cs) 559 { 560 PowerPCCPU *cpu = POWERPC_CPU(cs); 561 CPUPPCState *cenv = &cpu->env; 562 int ret; 563 564 /* Gather server mmu info from KVM and update the CPU state */ 565 kvm_fixup_page_sizes(cpu); 566 567 /* Synchronize sregs with kvm */ 568 ret = kvm_arch_sync_sregs(cpu); 569 if (ret) { 570 if (ret == -EINVAL) { 571 error_report("Register sync failed... If you're using kvm-hv.ko," 572 " only \"-cpu host\" is possible"); 573 } 574 return ret; 575 } 576 577 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 578 579 switch (cenv->mmu_model) { 580 case POWERPC_MMU_BOOKE206: 581 /* This target supports access to KVM's guest TLB */ 582 ret = kvm_booke206_tlb_init(cpu); 583 break; 584 case POWERPC_MMU_2_07: 585 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 586 /* KVM-HV has transactional memory on POWER8 also without the 587 * KVM_CAP_PPC_HTM extension, so enable it here instead as 588 * long as it's availble to userspace on the host. */ 589 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 590 cap_htm = true; 591 } 592 } 593 break; 594 default: 595 break; 596 } 597 598 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 599 kvmppc_hw_debug_points_init(cenv); 600 601 return ret; 602 } 603 604 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 605 { 606 CPUPPCState *env = &cpu->env; 607 CPUState *cs = CPU(cpu); 608 struct kvm_dirty_tlb dirty_tlb; 609 unsigned char *bitmap; 610 int ret; 611 612 if (!env->kvm_sw_tlb) { 613 return; 614 } 615 616 bitmap = g_malloc((env->nb_tlb + 7) / 8); 617 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 618 619 dirty_tlb.bitmap = (uintptr_t)bitmap; 620 dirty_tlb.num_dirty = env->nb_tlb; 621 622 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 623 if (ret) { 624 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 625 __func__, strerror(-ret)); 626 } 627 628 g_free(bitmap); 629 } 630 631 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 632 { 633 PowerPCCPU *cpu = POWERPC_CPU(cs); 634 CPUPPCState *env = &cpu->env; 635 union { 636 uint32_t u32; 637 uint64_t u64; 638 } val; 639 struct kvm_one_reg reg = { 640 .id = id, 641 .addr = (uintptr_t) &val, 642 }; 643 int ret; 644 645 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 646 if (ret != 0) { 647 trace_kvm_failed_spr_get(spr, strerror(errno)); 648 } else { 649 switch (id & KVM_REG_SIZE_MASK) { 650 case KVM_REG_SIZE_U32: 651 env->spr[spr] = val.u32; 652 break; 653 654 case KVM_REG_SIZE_U64: 655 env->spr[spr] = val.u64; 656 break; 657 658 default: 659 /* Don't handle this size yet */ 660 abort(); 661 } 662 } 663 } 664 665 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 666 { 667 PowerPCCPU *cpu = POWERPC_CPU(cs); 668 CPUPPCState *env = &cpu->env; 669 union { 670 uint32_t u32; 671 uint64_t u64; 672 } val; 673 struct kvm_one_reg reg = { 674 .id = id, 675 .addr = (uintptr_t) &val, 676 }; 677 int ret; 678 679 switch (id & KVM_REG_SIZE_MASK) { 680 case KVM_REG_SIZE_U32: 681 val.u32 = env->spr[spr]; 682 break; 683 684 case KVM_REG_SIZE_U64: 685 val.u64 = env->spr[spr]; 686 break; 687 688 default: 689 /* Don't handle this size yet */ 690 abort(); 691 } 692 693 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 694 if (ret != 0) { 695 trace_kvm_failed_spr_set(spr, strerror(errno)); 696 } 697 } 698 699 static int kvm_put_fp(CPUState *cs) 700 { 701 PowerPCCPU *cpu = POWERPC_CPU(cs); 702 CPUPPCState *env = &cpu->env; 703 struct kvm_one_reg reg; 704 int i; 705 int ret; 706 707 if (env->insns_flags & PPC_FLOAT) { 708 uint64_t fpscr = env->fpscr; 709 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 710 711 reg.id = KVM_REG_PPC_FPSCR; 712 reg.addr = (uintptr_t)&fpscr; 713 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 714 if (ret < 0) { 715 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 716 return ret; 717 } 718 719 for (i = 0; i < 32; i++) { 720 uint64_t vsr[2]; 721 722 #ifdef HOST_WORDS_BIGENDIAN 723 vsr[0] = float64_val(env->fpr[i]); 724 vsr[1] = env->vsr[i]; 725 #else 726 vsr[0] = env->vsr[i]; 727 vsr[1] = float64_val(env->fpr[i]); 728 #endif 729 reg.addr = (uintptr_t) &vsr; 730 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 731 732 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 733 if (ret < 0) { 734 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 735 i, strerror(errno)); 736 return ret; 737 } 738 } 739 } 740 741 if (env->insns_flags & PPC_ALTIVEC) { 742 reg.id = KVM_REG_PPC_VSCR; 743 reg.addr = (uintptr_t)&env->vscr; 744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 745 if (ret < 0) { 746 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 747 return ret; 748 } 749 750 for (i = 0; i < 32; i++) { 751 reg.id = KVM_REG_PPC_VR(i); 752 reg.addr = (uintptr_t)&env->avr[i]; 753 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 754 if (ret < 0) { 755 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 756 return ret; 757 } 758 } 759 } 760 761 return 0; 762 } 763 764 static int kvm_get_fp(CPUState *cs) 765 { 766 PowerPCCPU *cpu = POWERPC_CPU(cs); 767 CPUPPCState *env = &cpu->env; 768 struct kvm_one_reg reg; 769 int i; 770 int ret; 771 772 if (env->insns_flags & PPC_FLOAT) { 773 uint64_t fpscr; 774 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 775 776 reg.id = KVM_REG_PPC_FPSCR; 777 reg.addr = (uintptr_t)&fpscr; 778 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 779 if (ret < 0) { 780 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 781 return ret; 782 } else { 783 env->fpscr = fpscr; 784 } 785 786 for (i = 0; i < 32; i++) { 787 uint64_t vsr[2]; 788 789 reg.addr = (uintptr_t) &vsr; 790 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 791 792 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 793 if (ret < 0) { 794 DPRINTF("Unable to get %s%d from KVM: %s\n", 795 vsx ? "VSR" : "FPR", i, strerror(errno)); 796 return ret; 797 } else { 798 #ifdef HOST_WORDS_BIGENDIAN 799 env->fpr[i] = vsr[0]; 800 if (vsx) { 801 env->vsr[i] = vsr[1]; 802 } 803 #else 804 env->fpr[i] = vsr[1]; 805 if (vsx) { 806 env->vsr[i] = vsr[0]; 807 } 808 #endif 809 } 810 } 811 } 812 813 if (env->insns_flags & PPC_ALTIVEC) { 814 reg.id = KVM_REG_PPC_VSCR; 815 reg.addr = (uintptr_t)&env->vscr; 816 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 817 if (ret < 0) { 818 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 819 return ret; 820 } 821 822 for (i = 0; i < 32; i++) { 823 reg.id = KVM_REG_PPC_VR(i); 824 reg.addr = (uintptr_t)&env->avr[i]; 825 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 826 if (ret < 0) { 827 DPRINTF("Unable to get VR%d from KVM: %s\n", 828 i, strerror(errno)); 829 return ret; 830 } 831 } 832 } 833 834 return 0; 835 } 836 837 #if defined(TARGET_PPC64) 838 static int kvm_get_vpa(CPUState *cs) 839 { 840 PowerPCCPU *cpu = POWERPC_CPU(cs); 841 CPUPPCState *env = &cpu->env; 842 struct kvm_one_reg reg; 843 int ret; 844 845 reg.id = KVM_REG_PPC_VPA_ADDR; 846 reg.addr = (uintptr_t)&env->vpa_addr; 847 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 848 if (ret < 0) { 849 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 850 return ret; 851 } 852 853 assert((uintptr_t)&env->slb_shadow_size 854 == ((uintptr_t)&env->slb_shadow_addr + 8)); 855 reg.id = KVM_REG_PPC_VPA_SLB; 856 reg.addr = (uintptr_t)&env->slb_shadow_addr; 857 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 858 if (ret < 0) { 859 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 860 strerror(errno)); 861 return ret; 862 } 863 864 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 865 reg.id = KVM_REG_PPC_VPA_DTL; 866 reg.addr = (uintptr_t)&env->dtl_addr; 867 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 868 if (ret < 0) { 869 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 870 strerror(errno)); 871 return ret; 872 } 873 874 return 0; 875 } 876 877 static int kvm_put_vpa(CPUState *cs) 878 { 879 PowerPCCPU *cpu = POWERPC_CPU(cs); 880 CPUPPCState *env = &cpu->env; 881 struct kvm_one_reg reg; 882 int ret; 883 884 /* SLB shadow or DTL can't be registered unless a master VPA is 885 * registered. That means when restoring state, if a VPA *is* 886 * registered, we need to set that up first. If not, we need to 887 * deregister the others before deregistering the master VPA */ 888 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 889 890 if (env->vpa_addr) { 891 reg.id = KVM_REG_PPC_VPA_ADDR; 892 reg.addr = (uintptr_t)&env->vpa_addr; 893 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 894 if (ret < 0) { 895 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 896 return ret; 897 } 898 } 899 900 assert((uintptr_t)&env->slb_shadow_size 901 == ((uintptr_t)&env->slb_shadow_addr + 8)); 902 reg.id = KVM_REG_PPC_VPA_SLB; 903 reg.addr = (uintptr_t)&env->slb_shadow_addr; 904 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 905 if (ret < 0) { 906 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 907 return ret; 908 } 909 910 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 911 reg.id = KVM_REG_PPC_VPA_DTL; 912 reg.addr = (uintptr_t)&env->dtl_addr; 913 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 914 if (ret < 0) { 915 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 916 strerror(errno)); 917 return ret; 918 } 919 920 if (!env->vpa_addr) { 921 reg.id = KVM_REG_PPC_VPA_ADDR; 922 reg.addr = (uintptr_t)&env->vpa_addr; 923 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 924 if (ret < 0) { 925 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 926 return ret; 927 } 928 } 929 930 return 0; 931 } 932 #endif /* TARGET_PPC64 */ 933 934 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 935 { 936 CPUPPCState *env = &cpu->env; 937 struct kvm_sregs sregs; 938 int i; 939 940 sregs.pvr = env->spr[SPR_PVR]; 941 942 if (cpu->vhyp) { 943 PPCVirtualHypervisorClass *vhc = 944 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 945 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 946 } else { 947 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 948 } 949 950 /* Sync SLB */ 951 #ifdef TARGET_PPC64 952 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 953 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 954 if (env->slb[i].esid & SLB_ESID_V) { 955 sregs.u.s.ppc64.slb[i].slbe |= i; 956 } 957 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 958 } 959 #endif 960 961 /* Sync SRs */ 962 for (i = 0; i < 16; i++) { 963 sregs.u.s.ppc32.sr[i] = env->sr[i]; 964 } 965 966 /* Sync BATs */ 967 for (i = 0; i < 8; i++) { 968 /* Beware. We have to swap upper and lower bits here */ 969 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 970 | env->DBAT[1][i]; 971 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 972 | env->IBAT[1][i]; 973 } 974 975 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 976 } 977 978 int kvm_arch_put_registers(CPUState *cs, int level) 979 { 980 PowerPCCPU *cpu = POWERPC_CPU(cs); 981 CPUPPCState *env = &cpu->env; 982 struct kvm_regs regs; 983 int ret; 984 int i; 985 986 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 987 if (ret < 0) { 988 return ret; 989 } 990 991 regs.ctr = env->ctr; 992 regs.lr = env->lr; 993 regs.xer = cpu_read_xer(env); 994 regs.msr = env->msr; 995 regs.pc = env->nip; 996 997 regs.srr0 = env->spr[SPR_SRR0]; 998 regs.srr1 = env->spr[SPR_SRR1]; 999 1000 regs.sprg0 = env->spr[SPR_SPRG0]; 1001 regs.sprg1 = env->spr[SPR_SPRG1]; 1002 regs.sprg2 = env->spr[SPR_SPRG2]; 1003 regs.sprg3 = env->spr[SPR_SPRG3]; 1004 regs.sprg4 = env->spr[SPR_SPRG4]; 1005 regs.sprg5 = env->spr[SPR_SPRG5]; 1006 regs.sprg6 = env->spr[SPR_SPRG6]; 1007 regs.sprg7 = env->spr[SPR_SPRG7]; 1008 1009 regs.pid = env->spr[SPR_BOOKE_PID]; 1010 1011 for (i = 0;i < 32; i++) 1012 regs.gpr[i] = env->gpr[i]; 1013 1014 regs.cr = 0; 1015 for (i = 0; i < 8; i++) { 1016 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1017 } 1018 1019 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1020 if (ret < 0) 1021 return ret; 1022 1023 kvm_put_fp(cs); 1024 1025 if (env->tlb_dirty) { 1026 kvm_sw_tlb_put(cpu); 1027 env->tlb_dirty = false; 1028 } 1029 1030 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1031 ret = kvmppc_put_books_sregs(cpu); 1032 if (ret < 0) { 1033 return ret; 1034 } 1035 } 1036 1037 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1038 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1039 } 1040 1041 if (cap_one_reg) { 1042 int i; 1043 1044 /* We deliberately ignore errors here, for kernels which have 1045 * the ONE_REG calls, but don't support the specific 1046 * registers, there's a reasonable chance things will still 1047 * work, at least until we try to migrate. */ 1048 for (i = 0; i < 1024; i++) { 1049 uint64_t id = env->spr_cb[i].one_reg_id; 1050 1051 if (id != 0) { 1052 kvm_put_one_spr(cs, id, i); 1053 } 1054 } 1055 1056 #ifdef TARGET_PPC64 1057 if (msr_ts) { 1058 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1060 } 1061 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1063 } 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1073 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1074 } 1075 1076 if (cap_papr) { 1077 if (kvm_put_vpa(cs) < 0) { 1078 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1079 } 1080 } 1081 1082 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1083 #endif /* TARGET_PPC64 */ 1084 } 1085 1086 return ret; 1087 } 1088 1089 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1090 { 1091 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1092 } 1093 1094 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1095 { 1096 CPUPPCState *env = &cpu->env; 1097 struct kvm_sregs sregs; 1098 int ret; 1099 1100 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1101 if (ret < 0) { 1102 return ret; 1103 } 1104 1105 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1106 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1107 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1108 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1109 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1110 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1111 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1112 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1113 env->spr[SPR_DECR] = sregs.u.e.dec; 1114 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1115 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1116 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1117 } 1118 1119 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1120 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1121 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1122 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1123 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1124 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1125 } 1126 1127 if (sregs.u.e.features & KVM_SREGS_E_64) { 1128 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1129 } 1130 1131 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1132 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1133 } 1134 1135 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1136 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1137 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1138 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1139 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1140 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1141 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1142 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1143 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1144 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1145 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1146 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1147 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1148 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1149 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1150 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1151 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1152 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1153 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1154 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1155 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1156 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1157 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1158 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1159 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1160 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1161 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1162 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1163 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1164 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1165 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1166 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1167 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1168 1169 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1170 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1171 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1172 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1173 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1174 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1175 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1176 } 1177 1178 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1179 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1180 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1181 } 1182 1183 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1184 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1185 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1186 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1187 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1188 } 1189 } 1190 1191 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1192 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1193 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1194 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1195 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1196 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1197 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1198 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1199 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1200 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1201 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1202 } 1203 1204 if (sregs.u.e.features & KVM_SREGS_EXP) { 1205 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1206 } 1207 1208 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1209 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1210 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1211 } 1212 1213 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1214 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1215 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1216 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1217 1218 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1219 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1220 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1221 } 1222 } 1223 1224 return 0; 1225 } 1226 1227 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1228 { 1229 CPUPPCState *env = &cpu->env; 1230 struct kvm_sregs sregs; 1231 int ret; 1232 int i; 1233 1234 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1235 if (ret < 0) { 1236 return ret; 1237 } 1238 1239 if (!cpu->vhyp) { 1240 ppc_store_sdr1(env, sregs.u.s.sdr1); 1241 } 1242 1243 /* Sync SLB */ 1244 #ifdef TARGET_PPC64 1245 /* 1246 * The packed SLB array we get from KVM_GET_SREGS only contains 1247 * information about valid entries. So we flush our internal copy 1248 * to get rid of stale ones, then put all valid SLB entries back 1249 * in. 1250 */ 1251 memset(env->slb, 0, sizeof(env->slb)); 1252 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1253 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1254 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1255 /* 1256 * Only restore valid entries 1257 */ 1258 if (rb & SLB_ESID_V) { 1259 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1260 } 1261 } 1262 #endif 1263 1264 /* Sync SRs */ 1265 for (i = 0; i < 16; i++) { 1266 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1267 } 1268 1269 /* Sync BATs */ 1270 for (i = 0; i < 8; i++) { 1271 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1272 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1273 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1274 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1275 } 1276 1277 return 0; 1278 } 1279 1280 int kvm_arch_get_registers(CPUState *cs) 1281 { 1282 PowerPCCPU *cpu = POWERPC_CPU(cs); 1283 CPUPPCState *env = &cpu->env; 1284 struct kvm_regs regs; 1285 uint32_t cr; 1286 int i, ret; 1287 1288 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1289 if (ret < 0) 1290 return ret; 1291 1292 cr = regs.cr; 1293 for (i = 7; i >= 0; i--) { 1294 env->crf[i] = cr & 15; 1295 cr >>= 4; 1296 } 1297 1298 env->ctr = regs.ctr; 1299 env->lr = regs.lr; 1300 cpu_write_xer(env, regs.xer); 1301 env->msr = regs.msr; 1302 env->nip = regs.pc; 1303 1304 env->spr[SPR_SRR0] = regs.srr0; 1305 env->spr[SPR_SRR1] = regs.srr1; 1306 1307 env->spr[SPR_SPRG0] = regs.sprg0; 1308 env->spr[SPR_SPRG1] = regs.sprg1; 1309 env->spr[SPR_SPRG2] = regs.sprg2; 1310 env->spr[SPR_SPRG3] = regs.sprg3; 1311 env->spr[SPR_SPRG4] = regs.sprg4; 1312 env->spr[SPR_SPRG5] = regs.sprg5; 1313 env->spr[SPR_SPRG6] = regs.sprg6; 1314 env->spr[SPR_SPRG7] = regs.sprg7; 1315 1316 env->spr[SPR_BOOKE_PID] = regs.pid; 1317 1318 for (i = 0;i < 32; i++) 1319 env->gpr[i] = regs.gpr[i]; 1320 1321 kvm_get_fp(cs); 1322 1323 if (cap_booke_sregs) { 1324 ret = kvmppc_get_booke_sregs(cpu); 1325 if (ret < 0) { 1326 return ret; 1327 } 1328 } 1329 1330 if (cap_segstate) { 1331 ret = kvmppc_get_books_sregs(cpu); 1332 if (ret < 0) { 1333 return ret; 1334 } 1335 } 1336 1337 if (cap_hior) { 1338 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1339 } 1340 1341 if (cap_one_reg) { 1342 int i; 1343 1344 /* We deliberately ignore errors here, for kernels which have 1345 * the ONE_REG calls, but don't support the specific 1346 * registers, there's a reasonable chance things will still 1347 * work, at least until we try to migrate. */ 1348 for (i = 0; i < 1024; i++) { 1349 uint64_t id = env->spr_cb[i].one_reg_id; 1350 1351 if (id != 0) { 1352 kvm_get_one_spr(cs, id, i); 1353 } 1354 } 1355 1356 #ifdef TARGET_PPC64 1357 if (msr_ts) { 1358 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1360 } 1361 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1363 } 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1373 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1374 } 1375 1376 if (cap_papr) { 1377 if (kvm_get_vpa(cs) < 0) { 1378 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1379 } 1380 } 1381 1382 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1383 #endif 1384 } 1385 1386 return 0; 1387 } 1388 1389 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1390 { 1391 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1392 1393 if (irq != PPC_INTERRUPT_EXT) { 1394 return 0; 1395 } 1396 1397 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1398 return 0; 1399 } 1400 1401 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1402 1403 return 0; 1404 } 1405 1406 #if defined(TARGET_PPCEMB) 1407 #define PPC_INPUT_INT PPC40x_INPUT_INT 1408 #elif defined(TARGET_PPC64) 1409 #define PPC_INPUT_INT PPC970_INPUT_INT 1410 #else 1411 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1412 #endif 1413 1414 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1415 { 1416 PowerPCCPU *cpu = POWERPC_CPU(cs); 1417 CPUPPCState *env = &cpu->env; 1418 int r; 1419 unsigned irq; 1420 1421 qemu_mutex_lock_iothread(); 1422 1423 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1424 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1425 if (!cap_interrupt_level && 1426 run->ready_for_interrupt_injection && 1427 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1428 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1429 { 1430 /* For now KVM disregards the 'irq' argument. However, in the 1431 * future KVM could cache it in-kernel to avoid a heavyweight exit 1432 * when reading the UIC. 1433 */ 1434 irq = KVM_INTERRUPT_SET; 1435 1436 DPRINTF("injected interrupt %d\n", irq); 1437 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1438 if (r < 0) { 1439 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1440 } 1441 1442 /* Always wake up soon in case the interrupt was level based */ 1443 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1444 (NANOSECONDS_PER_SECOND / 50)); 1445 } 1446 1447 /* We don't know if there are more interrupts pending after this. However, 1448 * the guest will return to userspace in the course of handling this one 1449 * anyways, so we will get a chance to deliver the rest. */ 1450 1451 qemu_mutex_unlock_iothread(); 1452 } 1453 1454 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1455 { 1456 return MEMTXATTRS_UNSPECIFIED; 1457 } 1458 1459 int kvm_arch_process_async_events(CPUState *cs) 1460 { 1461 return cs->halted; 1462 } 1463 1464 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1465 { 1466 CPUState *cs = CPU(cpu); 1467 CPUPPCState *env = &cpu->env; 1468 1469 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1470 cs->halted = 1; 1471 cs->exception_index = EXCP_HLT; 1472 } 1473 1474 return 0; 1475 } 1476 1477 /* map dcr access to existing qemu dcr emulation */ 1478 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1479 { 1480 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1481 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1482 1483 return 0; 1484 } 1485 1486 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1487 { 1488 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1489 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1490 1491 return 0; 1492 } 1493 1494 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1495 { 1496 /* Mixed endian case is not handled */ 1497 uint32_t sc = debug_inst_opcode; 1498 1499 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1500 sizeof(sc), 0) || 1501 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1502 return -EINVAL; 1503 } 1504 1505 return 0; 1506 } 1507 1508 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1509 { 1510 uint32_t sc; 1511 1512 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1513 sc != debug_inst_opcode || 1514 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1515 sizeof(sc), 1)) { 1516 return -EINVAL; 1517 } 1518 1519 return 0; 1520 } 1521 1522 static int find_hw_breakpoint(target_ulong addr, int type) 1523 { 1524 int n; 1525 1526 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1527 <= ARRAY_SIZE(hw_debug_points)); 1528 1529 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1530 if (hw_debug_points[n].addr == addr && 1531 hw_debug_points[n].type == type) { 1532 return n; 1533 } 1534 } 1535 1536 return -1; 1537 } 1538 1539 static int find_hw_watchpoint(target_ulong addr, int *flag) 1540 { 1541 int n; 1542 1543 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1544 if (n >= 0) { 1545 *flag = BP_MEM_ACCESS; 1546 return n; 1547 } 1548 1549 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1550 if (n >= 0) { 1551 *flag = BP_MEM_WRITE; 1552 return n; 1553 } 1554 1555 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1556 if (n >= 0) { 1557 *flag = BP_MEM_READ; 1558 return n; 1559 } 1560 1561 return -1; 1562 } 1563 1564 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1565 target_ulong len, int type) 1566 { 1567 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1568 return -ENOBUFS; 1569 } 1570 1571 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1572 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1573 1574 switch (type) { 1575 case GDB_BREAKPOINT_HW: 1576 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1577 return -ENOBUFS; 1578 } 1579 1580 if (find_hw_breakpoint(addr, type) >= 0) { 1581 return -EEXIST; 1582 } 1583 1584 nb_hw_breakpoint++; 1585 break; 1586 1587 case GDB_WATCHPOINT_WRITE: 1588 case GDB_WATCHPOINT_READ: 1589 case GDB_WATCHPOINT_ACCESS: 1590 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1591 return -ENOBUFS; 1592 } 1593 1594 if (find_hw_breakpoint(addr, type) >= 0) { 1595 return -EEXIST; 1596 } 1597 1598 nb_hw_watchpoint++; 1599 break; 1600 1601 default: 1602 return -ENOSYS; 1603 } 1604 1605 return 0; 1606 } 1607 1608 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1609 target_ulong len, int type) 1610 { 1611 int n; 1612 1613 n = find_hw_breakpoint(addr, type); 1614 if (n < 0) { 1615 return -ENOENT; 1616 } 1617 1618 switch (type) { 1619 case GDB_BREAKPOINT_HW: 1620 nb_hw_breakpoint--; 1621 break; 1622 1623 case GDB_WATCHPOINT_WRITE: 1624 case GDB_WATCHPOINT_READ: 1625 case GDB_WATCHPOINT_ACCESS: 1626 nb_hw_watchpoint--; 1627 break; 1628 1629 default: 1630 return -ENOSYS; 1631 } 1632 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1633 1634 return 0; 1635 } 1636 1637 void kvm_arch_remove_all_hw_breakpoints(void) 1638 { 1639 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1640 } 1641 1642 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1643 { 1644 int n; 1645 1646 /* Software Breakpoint updates */ 1647 if (kvm_sw_breakpoints_active(cs)) { 1648 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1649 } 1650 1651 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1652 <= ARRAY_SIZE(hw_debug_points)); 1653 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1654 1655 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1656 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1657 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1658 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1659 switch (hw_debug_points[n].type) { 1660 case GDB_BREAKPOINT_HW: 1661 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1662 break; 1663 case GDB_WATCHPOINT_WRITE: 1664 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1665 break; 1666 case GDB_WATCHPOINT_READ: 1667 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1668 break; 1669 case GDB_WATCHPOINT_ACCESS: 1670 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1671 KVMPPC_DEBUG_WATCH_READ; 1672 break; 1673 default: 1674 cpu_abort(cs, "Unsupported breakpoint type\n"); 1675 } 1676 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1677 } 1678 } 1679 } 1680 1681 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1682 { 1683 CPUState *cs = CPU(cpu); 1684 CPUPPCState *env = &cpu->env; 1685 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1686 int handle = 0; 1687 int n; 1688 int flag = 0; 1689 1690 if (cs->singlestep_enabled) { 1691 handle = 1; 1692 } else if (arch_info->status) { 1693 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1694 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1695 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1696 if (n >= 0) { 1697 handle = 1; 1698 } 1699 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1700 KVMPPC_DEBUG_WATCH_WRITE)) { 1701 n = find_hw_watchpoint(arch_info->address, &flag); 1702 if (n >= 0) { 1703 handle = 1; 1704 cs->watchpoint_hit = &hw_watchpoint; 1705 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1706 hw_watchpoint.flags = flag; 1707 } 1708 } 1709 } 1710 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1711 handle = 1; 1712 } else { 1713 /* QEMU is not able to handle debug exception, so inject 1714 * program exception to guest; 1715 * Yes program exception NOT debug exception !! 1716 * When QEMU is using debug resources then debug exception must 1717 * be always set. To achieve this we set MSR_DE and also set 1718 * MSRP_DEP so guest cannot change MSR_DE. 1719 * When emulating debug resource for guest we want guest 1720 * to control MSR_DE (enable/disable debug interrupt on need). 1721 * Supporting both configurations are NOT possible. 1722 * So the result is that we cannot share debug resources 1723 * between QEMU and Guest on BOOKE architecture. 1724 * In the current design QEMU gets the priority over guest, 1725 * this means that if QEMU is using debug resources then guest 1726 * cannot use them; 1727 * For software breakpoint QEMU uses a privileged instruction; 1728 * So there cannot be any reason that we are here for guest 1729 * set debug exception, only possibility is guest executed a 1730 * privileged / illegal instruction and that's why we are 1731 * injecting a program interrupt. 1732 */ 1733 1734 cpu_synchronize_state(cs); 1735 /* env->nip is PC, so increment this by 4 to use 1736 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1737 */ 1738 env->nip += 4; 1739 cs->exception_index = POWERPC_EXCP_PROGRAM; 1740 env->error_code = POWERPC_EXCP_INVAL; 1741 ppc_cpu_do_interrupt(cs); 1742 } 1743 1744 return handle; 1745 } 1746 1747 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1748 { 1749 PowerPCCPU *cpu = POWERPC_CPU(cs); 1750 CPUPPCState *env = &cpu->env; 1751 int ret; 1752 1753 qemu_mutex_lock_iothread(); 1754 1755 switch (run->exit_reason) { 1756 case KVM_EXIT_DCR: 1757 if (run->dcr.is_write) { 1758 DPRINTF("handle dcr write\n"); 1759 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1760 } else { 1761 DPRINTF("handle dcr read\n"); 1762 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1763 } 1764 break; 1765 case KVM_EXIT_HLT: 1766 DPRINTF("handle halt\n"); 1767 ret = kvmppc_handle_halt(cpu); 1768 break; 1769 #if defined(TARGET_PPC64) 1770 case KVM_EXIT_PAPR_HCALL: 1771 DPRINTF("handle PAPR hypercall\n"); 1772 run->papr_hcall.ret = spapr_hypercall(cpu, 1773 run->papr_hcall.nr, 1774 run->papr_hcall.args); 1775 ret = 0; 1776 break; 1777 #endif 1778 case KVM_EXIT_EPR: 1779 DPRINTF("handle epr\n"); 1780 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1781 ret = 0; 1782 break; 1783 case KVM_EXIT_WATCHDOG: 1784 DPRINTF("handle watchdog expiry\n"); 1785 watchdog_perform_action(); 1786 ret = 0; 1787 break; 1788 1789 case KVM_EXIT_DEBUG: 1790 DPRINTF("handle debug exception\n"); 1791 if (kvm_handle_debug(cpu, run)) { 1792 ret = EXCP_DEBUG; 1793 break; 1794 } 1795 /* re-enter, this exception was guest-internal */ 1796 ret = 0; 1797 break; 1798 1799 default: 1800 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1801 ret = -1; 1802 break; 1803 } 1804 1805 qemu_mutex_unlock_iothread(); 1806 return ret; 1807 } 1808 1809 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1810 { 1811 CPUState *cs = CPU(cpu); 1812 uint32_t bits = tsr_bits; 1813 struct kvm_one_reg reg = { 1814 .id = KVM_REG_PPC_OR_TSR, 1815 .addr = (uintptr_t) &bits, 1816 }; 1817 1818 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1819 } 1820 1821 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1822 { 1823 1824 CPUState *cs = CPU(cpu); 1825 uint32_t bits = tsr_bits; 1826 struct kvm_one_reg reg = { 1827 .id = KVM_REG_PPC_CLEAR_TSR, 1828 .addr = (uintptr_t) &bits, 1829 }; 1830 1831 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1832 } 1833 1834 int kvmppc_set_tcr(PowerPCCPU *cpu) 1835 { 1836 CPUState *cs = CPU(cpu); 1837 CPUPPCState *env = &cpu->env; 1838 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1839 1840 struct kvm_one_reg reg = { 1841 .id = KVM_REG_PPC_TCR, 1842 .addr = (uintptr_t) &tcr, 1843 }; 1844 1845 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1846 } 1847 1848 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1849 { 1850 CPUState *cs = CPU(cpu); 1851 int ret; 1852 1853 if (!kvm_enabled()) { 1854 return -1; 1855 } 1856 1857 if (!cap_ppc_watchdog) { 1858 printf("warning: KVM does not support watchdog"); 1859 return -1; 1860 } 1861 1862 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1863 if (ret < 0) { 1864 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1865 __func__, strerror(-ret)); 1866 return ret; 1867 } 1868 1869 return ret; 1870 } 1871 1872 static int read_cpuinfo(const char *field, char *value, int len) 1873 { 1874 FILE *f; 1875 int ret = -1; 1876 int field_len = strlen(field); 1877 char line[512]; 1878 1879 f = fopen("/proc/cpuinfo", "r"); 1880 if (!f) { 1881 return -1; 1882 } 1883 1884 do { 1885 if (!fgets(line, sizeof(line), f)) { 1886 break; 1887 } 1888 if (!strncmp(line, field, field_len)) { 1889 pstrcpy(value, len, line); 1890 ret = 0; 1891 break; 1892 } 1893 } while(*line); 1894 1895 fclose(f); 1896 1897 return ret; 1898 } 1899 1900 uint32_t kvmppc_get_tbfreq(void) 1901 { 1902 char line[512]; 1903 char *ns; 1904 uint32_t retval = NANOSECONDS_PER_SECOND; 1905 1906 if (read_cpuinfo("timebase", line, sizeof(line))) { 1907 return retval; 1908 } 1909 1910 if (!(ns = strchr(line, ':'))) { 1911 return retval; 1912 } 1913 1914 ns++; 1915 1916 return atoi(ns); 1917 } 1918 1919 bool kvmppc_get_host_serial(char **value) 1920 { 1921 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1922 NULL); 1923 } 1924 1925 bool kvmppc_get_host_model(char **value) 1926 { 1927 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1928 } 1929 1930 /* Try to find a device tree node for a CPU with clock-frequency property */ 1931 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1932 { 1933 struct dirent *dirp; 1934 DIR *dp; 1935 1936 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1937 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1938 return -1; 1939 } 1940 1941 buf[0] = '\0'; 1942 while ((dirp = readdir(dp)) != NULL) { 1943 FILE *f; 1944 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1945 dirp->d_name); 1946 f = fopen(buf, "r"); 1947 if (f) { 1948 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1949 fclose(f); 1950 break; 1951 } 1952 buf[0] = '\0'; 1953 } 1954 closedir(dp); 1955 if (buf[0] == '\0') { 1956 printf("Unknown host!\n"); 1957 return -1; 1958 } 1959 1960 return 0; 1961 } 1962 1963 static uint64_t kvmppc_read_int_dt(const char *filename) 1964 { 1965 union { 1966 uint32_t v32; 1967 uint64_t v64; 1968 } u; 1969 FILE *f; 1970 int len; 1971 1972 f = fopen(filename, "rb"); 1973 if (!f) { 1974 return -1; 1975 } 1976 1977 len = fread(&u, 1, sizeof(u), f); 1978 fclose(f); 1979 switch (len) { 1980 case 4: 1981 /* property is a 32-bit quantity */ 1982 return be32_to_cpu(u.v32); 1983 case 8: 1984 return be64_to_cpu(u.v64); 1985 } 1986 1987 return 0; 1988 } 1989 1990 /* Read a CPU node property from the host device tree that's a single 1991 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1992 * (can't find or open the property, or doesn't understand the 1993 * format) */ 1994 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1995 { 1996 char buf[PATH_MAX], *tmp; 1997 uint64_t val; 1998 1999 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 2000 return -1; 2001 } 2002 2003 tmp = g_strdup_printf("%s/%s", buf, propname); 2004 val = kvmppc_read_int_dt(tmp); 2005 g_free(tmp); 2006 2007 return val; 2008 } 2009 2010 uint64_t kvmppc_get_clockfreq(void) 2011 { 2012 return kvmppc_read_int_cpu_dt("clock-frequency"); 2013 } 2014 2015 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2016 { 2017 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2018 CPUState *cs = CPU(cpu); 2019 2020 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2021 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2022 return 0; 2023 } 2024 2025 return 1; 2026 } 2027 2028 int kvmppc_get_hasidle(CPUPPCState *env) 2029 { 2030 struct kvm_ppc_pvinfo pvinfo; 2031 2032 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2033 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2034 return 1; 2035 } 2036 2037 return 0; 2038 } 2039 2040 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2041 { 2042 uint32_t *hc = (uint32_t*)buf; 2043 struct kvm_ppc_pvinfo pvinfo; 2044 2045 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2046 memcpy(buf, pvinfo.hcall, buf_len); 2047 return 0; 2048 } 2049 2050 /* 2051 * Fallback to always fail hypercalls regardless of endianness: 2052 * 2053 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2054 * li r3, -1 2055 * b .+8 (becomes nop in wrong endian) 2056 * bswap32(li r3, -1) 2057 */ 2058 2059 hc[0] = cpu_to_be32(0x08000048); 2060 hc[1] = cpu_to_be32(0x3860ffff); 2061 hc[2] = cpu_to_be32(0x48000008); 2062 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2063 2064 return 1; 2065 } 2066 2067 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2068 { 2069 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2070 } 2071 2072 void kvmppc_enable_logical_ci_hcalls(void) 2073 { 2074 /* 2075 * FIXME: it would be nice if we could detect the cases where 2076 * we're using a device which requires the in kernel 2077 * implementation of these hcalls, but the kernel lacks them and 2078 * produce a warning. 2079 */ 2080 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2081 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2082 } 2083 2084 void kvmppc_enable_set_mode_hcall(void) 2085 { 2086 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2087 } 2088 2089 void kvmppc_enable_clear_ref_mod_hcalls(void) 2090 { 2091 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2092 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2093 } 2094 2095 void kvmppc_set_papr(PowerPCCPU *cpu) 2096 { 2097 CPUState *cs = CPU(cpu); 2098 int ret; 2099 2100 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2101 if (ret) { 2102 error_report("This vCPU type or KVM version does not support PAPR"); 2103 exit(1); 2104 } 2105 2106 /* Update the capability flag so we sync the right information 2107 * with kvm */ 2108 cap_papr = 1; 2109 } 2110 2111 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2112 { 2113 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2114 } 2115 2116 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2117 { 2118 CPUState *cs = CPU(cpu); 2119 int ret; 2120 2121 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2122 if (ret && mpic_proxy) { 2123 error_report("This KVM version does not support EPR"); 2124 exit(1); 2125 } 2126 } 2127 2128 int kvmppc_smt_threads(void) 2129 { 2130 return cap_ppc_smt ? cap_ppc_smt : 1; 2131 } 2132 2133 int kvmppc_set_smt_threads(int smt) 2134 { 2135 int ret; 2136 2137 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2138 if (!ret) { 2139 cap_ppc_smt = smt; 2140 } 2141 return ret; 2142 } 2143 2144 void kvmppc_hint_smt_possible(Error **errp) 2145 { 2146 int i; 2147 GString *g; 2148 char *s; 2149 2150 assert(kvm_enabled()); 2151 if (cap_ppc_smt_possible) { 2152 g = g_string_new("Available VSMT modes:"); 2153 for (i = 63; i >= 0; i--) { 2154 if ((1UL << i) & cap_ppc_smt_possible) { 2155 g_string_append_printf(g, " %lu", (1UL << i)); 2156 } 2157 } 2158 s = g_string_free(g, false); 2159 error_append_hint(errp, "%s.\n", s); 2160 g_free(s); 2161 } else { 2162 error_append_hint(errp, 2163 "This KVM seems to be too old to support VSMT.\n"); 2164 } 2165 } 2166 2167 2168 #ifdef TARGET_PPC64 2169 off_t kvmppc_alloc_rma(void **rma) 2170 { 2171 off_t size; 2172 int fd; 2173 struct kvm_allocate_rma ret; 2174 2175 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2176 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2177 * not necessary on this hardware 2178 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2179 * 2180 * FIXME: We should allow the user to force contiguous RMA 2181 * allocation in the cap_ppc_rma==1 case. 2182 */ 2183 if (cap_ppc_rma < 2) { 2184 return 0; 2185 } 2186 2187 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2188 if (fd < 0) { 2189 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2190 strerror(errno)); 2191 return -1; 2192 } 2193 2194 size = MIN(ret.rma_size, 256ul << 20); 2195 2196 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2197 if (*rma == MAP_FAILED) { 2198 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2199 return -1; 2200 }; 2201 2202 return size; 2203 } 2204 2205 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2206 { 2207 struct kvm_ppc_smmu_info info; 2208 long rampagesize, best_page_shift; 2209 int i; 2210 2211 if (cap_ppc_rma >= 2) { 2212 return current_size; 2213 } 2214 2215 /* Find the largest hardware supported page size that's less than 2216 * or equal to the (logical) backing page size of guest RAM */ 2217 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2218 rampagesize = qemu_getrampagesize(); 2219 best_page_shift = 0; 2220 2221 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2222 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2223 2224 if (!sps->page_shift) { 2225 continue; 2226 } 2227 2228 if ((sps->page_shift > best_page_shift) 2229 && ((1UL << sps->page_shift) <= rampagesize)) { 2230 best_page_shift = sps->page_shift; 2231 } 2232 } 2233 2234 return MIN(current_size, 2235 1ULL << (best_page_shift + hash_shift - 7)); 2236 } 2237 #endif 2238 2239 bool kvmppc_spapr_use_multitce(void) 2240 { 2241 return cap_spapr_multitce; 2242 } 2243 2244 int kvmppc_spapr_enable_inkernel_multitce(void) 2245 { 2246 int ret; 2247 2248 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2249 H_PUT_TCE_INDIRECT, 1); 2250 if (!ret) { 2251 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2252 H_STUFF_TCE, 1); 2253 } 2254 2255 return ret; 2256 } 2257 2258 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2259 uint64_t bus_offset, uint32_t nb_table, 2260 int *pfd, bool need_vfio) 2261 { 2262 long len; 2263 int fd; 2264 void *table; 2265 2266 /* Must set fd to -1 so we don't try to munmap when called for 2267 * destroying the table, which the upper layers -will- do 2268 */ 2269 *pfd = -1; 2270 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2271 return NULL; 2272 } 2273 2274 if (cap_spapr_tce_64) { 2275 struct kvm_create_spapr_tce_64 args = { 2276 .liobn = liobn, 2277 .page_shift = page_shift, 2278 .offset = bus_offset >> page_shift, 2279 .size = nb_table, 2280 .flags = 0 2281 }; 2282 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2283 if (fd < 0) { 2284 fprintf(stderr, 2285 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2286 liobn); 2287 return NULL; 2288 } 2289 } else if (cap_spapr_tce) { 2290 uint64_t window_size = (uint64_t) nb_table << page_shift; 2291 struct kvm_create_spapr_tce args = { 2292 .liobn = liobn, 2293 .window_size = window_size, 2294 }; 2295 if ((window_size != args.window_size) || bus_offset) { 2296 return NULL; 2297 } 2298 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2299 if (fd < 0) { 2300 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2301 liobn); 2302 return NULL; 2303 } 2304 } else { 2305 return NULL; 2306 } 2307 2308 len = nb_table * sizeof(uint64_t); 2309 /* FIXME: round this up to page size */ 2310 2311 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2312 if (table == MAP_FAILED) { 2313 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2314 liobn); 2315 close(fd); 2316 return NULL; 2317 } 2318 2319 *pfd = fd; 2320 return table; 2321 } 2322 2323 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2324 { 2325 long len; 2326 2327 if (fd < 0) { 2328 return -1; 2329 } 2330 2331 len = nb_table * sizeof(uint64_t); 2332 if ((munmap(table, len) < 0) || 2333 (close(fd) < 0)) { 2334 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2335 strerror(errno)); 2336 /* Leak the table */ 2337 } 2338 2339 return 0; 2340 } 2341 2342 int kvmppc_reset_htab(int shift_hint) 2343 { 2344 uint32_t shift = shift_hint; 2345 2346 if (!kvm_enabled()) { 2347 /* Full emulation, tell caller to allocate htab itself */ 2348 return 0; 2349 } 2350 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2351 int ret; 2352 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2353 if (ret == -ENOTTY) { 2354 /* At least some versions of PR KVM advertise the 2355 * capability, but don't implement the ioctl(). Oops. 2356 * Return 0 so that we allocate the htab in qemu, as is 2357 * correct for PR. */ 2358 return 0; 2359 } else if (ret < 0) { 2360 return ret; 2361 } 2362 return shift; 2363 } 2364 2365 /* We have a kernel that predates the htab reset calls. For PR 2366 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2367 * this era, it has allocated a 16MB fixed size hash table already. */ 2368 if (kvmppc_is_pr(kvm_state)) { 2369 /* PR - tell caller to allocate htab */ 2370 return 0; 2371 } else { 2372 /* HV - assume 16MB kernel allocated htab */ 2373 return 24; 2374 } 2375 } 2376 2377 static inline uint32_t mfpvr(void) 2378 { 2379 uint32_t pvr; 2380 2381 asm ("mfpvr %0" 2382 : "=r"(pvr)); 2383 return pvr; 2384 } 2385 2386 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2387 { 2388 if (on) { 2389 *word |= flags; 2390 } else { 2391 *word &= ~flags; 2392 } 2393 } 2394 2395 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2396 { 2397 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2398 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2399 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2400 2401 /* Now fix up the class with information we can query from the host */ 2402 pcc->pvr = mfpvr(); 2403 2404 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2405 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2406 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2407 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2408 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2409 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2410 2411 if (dcache_size != -1) { 2412 pcc->l1_dcache_size = dcache_size; 2413 } 2414 2415 if (icache_size != -1) { 2416 pcc->l1_icache_size = icache_size; 2417 } 2418 2419 #if defined(TARGET_PPC64) 2420 pcc->radix_page_info = kvm_get_radix_page_info(); 2421 2422 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2423 /* 2424 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2425 * compliant. More importantly, advertising ISA 3.00 2426 * architected mode may prevent guests from activating 2427 * necessary DD1 workarounds. 2428 */ 2429 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2430 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2431 } 2432 #endif /* defined(TARGET_PPC64) */ 2433 } 2434 2435 bool kvmppc_has_cap_epr(void) 2436 { 2437 return cap_epr; 2438 } 2439 2440 bool kvmppc_has_cap_fixup_hcalls(void) 2441 { 2442 return cap_fixup_hcalls; 2443 } 2444 2445 bool kvmppc_has_cap_htm(void) 2446 { 2447 return cap_htm; 2448 } 2449 2450 bool kvmppc_has_cap_mmu_radix(void) 2451 { 2452 return cap_mmu_radix; 2453 } 2454 2455 bool kvmppc_has_cap_mmu_hash_v3(void) 2456 { 2457 return cap_mmu_hash_v3; 2458 } 2459 2460 static void kvmppc_get_cpu_characteristics(KVMState *s) 2461 { 2462 struct kvm_ppc_cpu_char c; 2463 int ret; 2464 2465 /* Assume broken */ 2466 cap_ppc_safe_cache = 0; 2467 cap_ppc_safe_bounds_check = 0; 2468 cap_ppc_safe_indirect_branch = 0; 2469 2470 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2471 if (!ret) { 2472 return; 2473 } 2474 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2475 if (ret < 0) { 2476 return; 2477 } 2478 /* Parse and set cap_ppc_safe_cache */ 2479 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2480 cap_ppc_safe_cache = 2; 2481 } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2482 (c.character & c.character_mask 2483 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2484 cap_ppc_safe_cache = 1; 2485 } 2486 /* Parse and set cap_ppc_safe_bounds_check */ 2487 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2488 cap_ppc_safe_bounds_check = 2; 2489 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2490 cap_ppc_safe_bounds_check = 1; 2491 } 2492 /* Parse and set cap_ppc_safe_indirect_branch */ 2493 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2494 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD; 2495 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2496 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS; 2497 } 2498 } 2499 2500 int kvmppc_get_cap_safe_cache(void) 2501 { 2502 return cap_ppc_safe_cache; 2503 } 2504 2505 int kvmppc_get_cap_safe_bounds_check(void) 2506 { 2507 return cap_ppc_safe_bounds_check; 2508 } 2509 2510 int kvmppc_get_cap_safe_indirect_branch(void) 2511 { 2512 return cap_ppc_safe_indirect_branch; 2513 } 2514 2515 bool kvmppc_has_cap_spapr_vfio(void) 2516 { 2517 return cap_spapr_vfio; 2518 } 2519 2520 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2521 { 2522 uint32_t host_pvr = mfpvr(); 2523 PowerPCCPUClass *pvr_pcc; 2524 2525 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2526 if (pvr_pcc == NULL) { 2527 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2528 } 2529 2530 return pvr_pcc; 2531 } 2532 2533 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2534 { 2535 TypeInfo type_info = { 2536 .name = TYPE_HOST_POWERPC_CPU, 2537 .class_init = kvmppc_host_cpu_class_init, 2538 }; 2539 MachineClass *mc = MACHINE_GET_CLASS(ms); 2540 PowerPCCPUClass *pvr_pcc; 2541 ObjectClass *oc; 2542 DeviceClass *dc; 2543 int i; 2544 2545 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2546 if (pvr_pcc == NULL) { 2547 return -1; 2548 } 2549 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2550 type_register(&type_info); 2551 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2552 /* override TCG default cpu type with 'host' cpu model */ 2553 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2554 } 2555 2556 oc = object_class_by_name(type_info.name); 2557 g_assert(oc); 2558 2559 /* 2560 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2561 * we want "POWER8" to be a "family" alias that points to the current 2562 * host CPU type, too) 2563 */ 2564 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2565 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2566 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2567 char *suffix; 2568 2569 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2570 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2571 if (suffix) { 2572 *suffix = 0; 2573 } 2574 break; 2575 } 2576 } 2577 2578 return 0; 2579 } 2580 2581 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2582 { 2583 struct kvm_rtas_token_args args = { 2584 .token = token, 2585 }; 2586 2587 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2588 return -ENOENT; 2589 } 2590 2591 strncpy(args.name, function, sizeof(args.name)); 2592 2593 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2594 } 2595 2596 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2597 { 2598 struct kvm_get_htab_fd s = { 2599 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2600 .start_index = index, 2601 }; 2602 int ret; 2603 2604 if (!cap_htab_fd) { 2605 error_setg(errp, "KVM version doesn't support %s the HPT", 2606 write ? "writing" : "reading"); 2607 return -ENOTSUP; 2608 } 2609 2610 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2611 if (ret < 0) { 2612 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2613 write ? "writing" : "reading", write ? "to" : "from", 2614 strerror(errno)); 2615 return -errno; 2616 } 2617 2618 return ret; 2619 } 2620 2621 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2622 { 2623 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2624 uint8_t buf[bufsize]; 2625 ssize_t rc; 2626 2627 do { 2628 rc = read(fd, buf, bufsize); 2629 if (rc < 0) { 2630 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2631 strerror(errno)); 2632 return rc; 2633 } else if (rc) { 2634 uint8_t *buffer = buf; 2635 ssize_t n = rc; 2636 while (n) { 2637 struct kvm_get_htab_header *head = 2638 (struct kvm_get_htab_header *) buffer; 2639 size_t chunksize = sizeof(*head) + 2640 HASH_PTE_SIZE_64 * head->n_valid; 2641 2642 qemu_put_be32(f, head->index); 2643 qemu_put_be16(f, head->n_valid); 2644 qemu_put_be16(f, head->n_invalid); 2645 qemu_put_buffer(f, (void *)(head + 1), 2646 HASH_PTE_SIZE_64 * head->n_valid); 2647 2648 buffer += chunksize; 2649 n -= chunksize; 2650 } 2651 } 2652 } while ((rc != 0) 2653 && ((max_ns < 0) 2654 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2655 2656 return (rc == 0) ? 1 : 0; 2657 } 2658 2659 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2660 uint16_t n_valid, uint16_t n_invalid) 2661 { 2662 struct kvm_get_htab_header *buf; 2663 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2664 ssize_t rc; 2665 2666 buf = alloca(chunksize); 2667 buf->index = index; 2668 buf->n_valid = n_valid; 2669 buf->n_invalid = n_invalid; 2670 2671 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2672 2673 rc = write(fd, buf, chunksize); 2674 if (rc < 0) { 2675 fprintf(stderr, "Error writing KVM hash table: %s\n", 2676 strerror(errno)); 2677 return rc; 2678 } 2679 if (rc != chunksize) { 2680 /* We should never get a short write on a single chunk */ 2681 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2682 return -1; 2683 } 2684 return 0; 2685 } 2686 2687 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2688 { 2689 return true; 2690 } 2691 2692 void kvm_arch_init_irq_routing(KVMState *s) 2693 { 2694 } 2695 2696 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2697 { 2698 int fd, rc; 2699 int i; 2700 2701 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2702 2703 i = 0; 2704 while (i < n) { 2705 struct kvm_get_htab_header *hdr; 2706 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2707 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2708 2709 rc = read(fd, buf, sizeof(buf)); 2710 if (rc < 0) { 2711 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2712 } 2713 2714 hdr = (struct kvm_get_htab_header *)buf; 2715 while ((i < n) && ((char *)hdr < (buf + rc))) { 2716 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2717 2718 if (hdr->index != (ptex + i)) { 2719 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2720 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2721 } 2722 2723 if (n - i < valid) { 2724 valid = n - i; 2725 } 2726 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2727 i += valid; 2728 2729 if ((n - i) < invalid) { 2730 invalid = n - i; 2731 } 2732 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2733 i += invalid; 2734 2735 hdr = (struct kvm_get_htab_header *) 2736 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2737 } 2738 } 2739 2740 close(fd); 2741 } 2742 2743 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2744 { 2745 int fd, rc; 2746 struct { 2747 struct kvm_get_htab_header hdr; 2748 uint64_t pte0; 2749 uint64_t pte1; 2750 } buf; 2751 2752 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2753 2754 buf.hdr.n_valid = 1; 2755 buf.hdr.n_invalid = 0; 2756 buf.hdr.index = ptex; 2757 buf.pte0 = cpu_to_be64(pte0); 2758 buf.pte1 = cpu_to_be64(pte1); 2759 2760 rc = write(fd, &buf, sizeof(buf)); 2761 if (rc != sizeof(buf)) { 2762 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2763 } 2764 close(fd); 2765 } 2766 2767 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2768 uint64_t address, uint32_t data, PCIDevice *dev) 2769 { 2770 return 0; 2771 } 2772 2773 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2774 int vector, PCIDevice *dev) 2775 { 2776 return 0; 2777 } 2778 2779 int kvm_arch_release_virq_post(int virq) 2780 { 2781 return 0; 2782 } 2783 2784 int kvm_arch_msi_data_to_gsi(uint32_t data) 2785 { 2786 return data & 0xffff; 2787 } 2788 2789 int kvmppc_enable_hwrng(void) 2790 { 2791 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2792 return -1; 2793 } 2794 2795 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2796 } 2797 2798 void kvmppc_check_papr_resize_hpt(Error **errp) 2799 { 2800 if (!kvm_enabled()) { 2801 return; /* No KVM, we're good */ 2802 } 2803 2804 if (cap_resize_hpt) { 2805 return; /* Kernel has explicit support, we're good */ 2806 } 2807 2808 /* Otherwise fallback on looking for PR KVM */ 2809 if (kvmppc_is_pr(kvm_state)) { 2810 return; 2811 } 2812 2813 error_setg(errp, 2814 "Hash page table resizing not available with this KVM version"); 2815 } 2816 2817 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2818 { 2819 CPUState *cs = CPU(cpu); 2820 struct kvm_ppc_resize_hpt rhpt = { 2821 .flags = flags, 2822 .shift = shift, 2823 }; 2824 2825 if (!cap_resize_hpt) { 2826 return -ENOSYS; 2827 } 2828 2829 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2830 } 2831 2832 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2833 { 2834 CPUState *cs = CPU(cpu); 2835 struct kvm_ppc_resize_hpt rhpt = { 2836 .flags = flags, 2837 .shift = shift, 2838 }; 2839 2840 if (!cap_resize_hpt) { 2841 return -ENOSYS; 2842 } 2843 2844 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2845 } 2846 2847 /* 2848 * This is a helper function to detect a post migration scenario 2849 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2850 * the guest kernel can't handle a PVR value other than the actual host 2851 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2852 * 2853 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2854 * (so, we're HV), return true. The workaround itself is done in 2855 * cpu_post_load. 2856 * 2857 * The order here is important: we'll only check for KVM PR as a 2858 * fallback if the guest kernel can't handle the situation itself. 2859 * We need to avoid as much as possible querying the running KVM type 2860 * in QEMU level. 2861 */ 2862 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2863 { 2864 CPUState *cs = CPU(cpu); 2865 2866 if (!kvm_enabled()) { 2867 return false; 2868 } 2869 2870 if (cap_ppc_pvr_compat) { 2871 return false; 2872 } 2873 2874 return !kvmppc_is_pr(cs->kvm_state); 2875 } 2876