1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_vio.h" 40 #include "hw/ppc/spapr_cpu_core.h" 41 #include "hw/ppc/ppc.h" 42 #include "sysemu/watchdog.h" 43 #include "trace.h" 44 #include "exec/gdbstub.h" 45 #include "exec/memattrs.h" 46 #include "exec/ram_addr.h" 47 #include "sysemu/hostmem.h" 48 #include "qemu/cutils.h" 49 #include "qemu/mmap-alloc.h" 50 #include "elf.h" 51 #include "sysemu/kvm_int.h" 52 53 //#define DEBUG_KVM 54 55 #ifdef DEBUG_KVM 56 #define DPRINTF(fmt, ...) \ 57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 58 #else 59 #define DPRINTF(fmt, ...) \ 60 do { } while (0) 61 #endif 62 63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 64 65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 66 KVM_CAP_LAST_INFO 67 }; 68 69 static int cap_interrupt_unset = false; 70 static int cap_interrupt_level = false; 71 static int cap_segstate; 72 static int cap_booke_sregs; 73 static int cap_ppc_smt; 74 static int cap_ppc_smt_possible; 75 static int cap_ppc_rma; 76 static int cap_spapr_tce; 77 static int cap_spapr_tce_64; 78 static int cap_spapr_multitce; 79 static int cap_spapr_vfio; 80 static int cap_hior; 81 static int cap_one_reg; 82 static int cap_epr; 83 static int cap_ppc_watchdog; 84 static int cap_papr; 85 static int cap_htab_fd; 86 static int cap_fixup_hcalls; 87 static int cap_htm; /* Hardware transactional memory support */ 88 static int cap_mmu_radix; 89 static int cap_mmu_hash_v3; 90 static int cap_resize_hpt; 91 static int cap_ppc_pvr_compat; 92 static int cap_ppc_safe_cache; 93 static int cap_ppc_safe_bounds_check; 94 static int cap_ppc_safe_indirect_branch; 95 96 static uint32_t debug_inst_opcode; 97 98 /* XXX We have a race condition where we actually have a level triggered 99 * interrupt, but the infrastructure can't expose that yet, so the guest 100 * takes but ignores it, goes to sleep and never gets notified that there's 101 * still an interrupt pending. 102 * 103 * As a quick workaround, let's just wake up again 20 ms after we injected 104 * an interrupt. That way we can assure that we're always reinjecting 105 * interrupts in case the guest swallowed them. 106 */ 107 static QEMUTimer *idle_timer; 108 109 static void kvm_kick_cpu(void *opaque) 110 { 111 PowerPCCPU *cpu = opaque; 112 113 qemu_cpu_kick(CPU(cpu)); 114 } 115 116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 117 * should only be used for fallback tests - generally we should use 118 * explicit capabilities for the features we want, rather than 119 * assuming what is/isn't available depending on the KVM variant. */ 120 static bool kvmppc_is_pr(KVMState *ks) 121 { 122 /* Assume KVM-PR if the GET_PVINFO capability is available */ 123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 124 } 125 126 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 127 static void kvmppc_get_cpu_characteristics(KVMState *s); 128 129 int kvm_arch_init(MachineState *ms, KVMState *s) 130 { 131 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 132 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 133 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 134 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 135 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 136 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); 137 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 138 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 139 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 140 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 141 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 142 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 143 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 144 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 145 /* Note: we don't set cap_papr here, because this capability is 146 * only activated after this by kvmppc_set_papr() */ 147 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 148 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 149 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 150 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 151 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 152 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 153 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 154 kvmppc_get_cpu_characteristics(s); 155 /* 156 * Note: setting it to false because there is not such capability 157 * in KVM at this moment. 158 * 159 * TODO: call kvm_vm_check_extension() with the right capability 160 * after the kernel starts implementing it.*/ 161 cap_ppc_pvr_compat = false; 162 163 if (!cap_interrupt_level) { 164 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 165 "VM to stall at times!\n"); 166 } 167 168 kvm_ppc_register_host_cpu_type(ms); 169 170 return 0; 171 } 172 173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 174 { 175 return 0; 176 } 177 178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 179 { 180 CPUPPCState *cenv = &cpu->env; 181 CPUState *cs = CPU(cpu); 182 struct kvm_sregs sregs; 183 int ret; 184 185 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 186 /* What we're really trying to say is "if we're on BookE, we use 187 the native PVR for now". This is the only sane way to check 188 it though, so we potentially confuse users that they can run 189 BookE guests on BookS. Let's hope nobody dares enough :) */ 190 return 0; 191 } else { 192 if (!cap_segstate) { 193 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 194 return -ENOSYS; 195 } 196 } 197 198 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 199 if (ret) { 200 return ret; 201 } 202 203 sregs.pvr = cenv->spr[SPR_PVR]; 204 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 205 } 206 207 /* Set up a shared TLB array with KVM */ 208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 209 { 210 CPUPPCState *env = &cpu->env; 211 CPUState *cs = CPU(cpu); 212 struct kvm_book3e_206_tlb_params params = {}; 213 struct kvm_config_tlb cfg = {}; 214 unsigned int entries = 0; 215 int ret, i; 216 217 if (!kvm_enabled() || 218 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 219 return 0; 220 } 221 222 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 223 224 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 225 params.tlb_sizes[i] = booke206_tlb_size(env, i); 226 params.tlb_ways[i] = booke206_tlb_ways(env, i); 227 entries += params.tlb_sizes[i]; 228 } 229 230 assert(entries == env->nb_tlb); 231 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 232 233 env->tlb_dirty = true; 234 235 cfg.array = (uintptr_t)env->tlb.tlbm; 236 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 237 cfg.params = (uintptr_t)¶ms; 238 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 239 240 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 241 if (ret < 0) { 242 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 243 __func__, strerror(-ret)); 244 return ret; 245 } 246 247 env->kvm_sw_tlb = true; 248 return 0; 249 } 250 251 252 #if defined(TARGET_PPC64) 253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu, 254 struct kvm_ppc_smmu_info *info) 255 { 256 CPUPPCState *env = &cpu->env; 257 CPUState *cs = CPU(cpu); 258 259 memset(info, 0, sizeof(*info)); 260 261 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so 262 * need to "guess" what the supported page sizes are. 263 * 264 * For that to work we make a few assumptions: 265 * 266 * - Check whether we are running "PR" KVM which only supports 4K 267 * and 16M pages, but supports them regardless of the backing 268 * store characteritics. We also don't support 1T segments. 269 * 270 * This is safe as if HV KVM ever supports that capability or PR 271 * KVM grows supports for more page/segment sizes, those versions 272 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we 273 * will not hit this fallback 274 * 275 * - Else we are running HV KVM. This means we only support page 276 * sizes that fit in the backing store. Additionally we only 277 * advertize 64K pages if the processor is ARCH 2.06 and we assume 278 * P7 encodings for the SLB and hash table. Here too, we assume 279 * support for any newer processor will mean a kernel that 280 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit 281 * this fallback. 282 */ 283 if (kvmppc_is_pr(cs->kvm_state)) { 284 /* No flags */ 285 info->flags = 0; 286 info->slb_size = 64; 287 288 /* Standard 4k base page size segment */ 289 info->sps[0].page_shift = 12; 290 info->sps[0].slb_enc = 0; 291 info->sps[0].enc[0].page_shift = 12; 292 info->sps[0].enc[0].pte_enc = 0; 293 294 /* Standard 16M large page size segment */ 295 info->sps[1].page_shift = 24; 296 info->sps[1].slb_enc = SLB_VSID_L; 297 info->sps[1].enc[0].page_shift = 24; 298 info->sps[1].enc[0].pte_enc = 0; 299 } else { 300 int i = 0; 301 302 /* HV KVM has backing store size restrictions */ 303 info->flags = KVM_PPC_PAGE_SIZES_REAL; 304 305 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) { 306 info->flags |= KVM_PPC_1T_SEGMENTS; 307 } 308 309 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 310 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 311 info->slb_size = 32; 312 } else { 313 info->slb_size = 64; 314 } 315 316 /* Standard 4k base page size segment */ 317 info->sps[i].page_shift = 12; 318 info->sps[i].slb_enc = 0; 319 info->sps[i].enc[0].page_shift = 12; 320 info->sps[i].enc[0].pte_enc = 0; 321 i++; 322 323 /* 64K on MMU 2.06 and later */ 324 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 || 325 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) { 326 info->sps[i].page_shift = 16; 327 info->sps[i].slb_enc = 0x110; 328 info->sps[i].enc[0].page_shift = 16; 329 info->sps[i].enc[0].pte_enc = 1; 330 i++; 331 } 332 333 /* Standard 16M large page size segment */ 334 info->sps[i].page_shift = 24; 335 info->sps[i].slb_enc = SLB_VSID_L; 336 info->sps[i].enc[0].page_shift = 24; 337 info->sps[i].enc[0].pte_enc = 0; 338 } 339 } 340 341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) 342 { 343 CPUState *cs = CPU(cpu); 344 int ret; 345 346 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 347 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info); 348 if (ret == 0) { 349 return; 350 } 351 } 352 353 kvm_get_fallback_smmu_info(cpu, info); 354 } 355 356 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 357 { 358 KVMState *s = KVM_STATE(current_machine->accelerator); 359 struct ppc_radix_page_info *radix_page_info; 360 struct kvm_ppc_rmmu_info rmmu_info; 361 int i; 362 363 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 364 return NULL; 365 } 366 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 367 return NULL; 368 } 369 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 370 radix_page_info->count = 0; 371 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 372 if (rmmu_info.ap_encodings[i]) { 373 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 374 radix_page_info->count++; 375 } 376 } 377 return radix_page_info; 378 } 379 380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 381 bool radix, bool gtse, 382 uint64_t proc_tbl) 383 { 384 CPUState *cs = CPU(cpu); 385 int ret; 386 uint64_t flags = 0; 387 struct kvm_ppc_mmuv3_cfg cfg = { 388 .process_table = proc_tbl, 389 }; 390 391 if (radix) { 392 flags |= KVM_PPC_MMUV3_RADIX; 393 } 394 if (gtse) { 395 flags |= KVM_PPC_MMUV3_GTSE; 396 } 397 cfg.flags = flags; 398 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 399 switch (ret) { 400 case 0: 401 return H_SUCCESS; 402 case -EINVAL: 403 return H_PARAMETER; 404 case -ENODEV: 405 return H_NOT_AVAILABLE; 406 default: 407 return H_HARDWARE; 408 } 409 } 410 411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) 412 { 413 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { 414 return true; 415 } 416 417 return (1ul << shift) <= rampgsize; 418 } 419 420 static long max_cpu_page_size; 421 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu) 423 { 424 static struct kvm_ppc_smmu_info smmu_info; 425 static bool has_smmu_info; 426 CPUPPCState *env = &cpu->env; 427 int iq, ik, jq, jk; 428 429 /* We only handle page sizes for 64-bit server guests for now */ 430 if (!(env->mmu_model & POWERPC_MMU_64)) { 431 return; 432 } 433 434 /* Collect MMU info from kernel if not already */ 435 if (!has_smmu_info) { 436 kvm_get_smmu_info(cpu, &smmu_info); 437 has_smmu_info = true; 438 } 439 440 if (!max_cpu_page_size) { 441 max_cpu_page_size = qemu_getrampagesize(); 442 } 443 444 /* Convert to QEMU form */ 445 memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps)); 446 447 /* If we have HV KVM, we need to forbid CI large pages if our 448 * host page size is smaller than 64K. 449 */ 450 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) { 451 if (getpagesize() >= 0x10000) { 452 cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE; 453 } else { 454 cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE; 455 } 456 } 457 458 /* 459 * XXX This loop should be an entry wide AND of the capabilities that 460 * the selected CPU has with the capabilities that KVM supports. 461 */ 462 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) { 463 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 464 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik]; 465 466 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 467 ksps->page_shift)) { 468 continue; 469 } 470 qsps->page_shift = ksps->page_shift; 471 qsps->slb_enc = ksps->slb_enc; 472 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) { 473 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size, 474 ksps->enc[jk].page_shift)) { 475 continue; 476 } 477 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift; 478 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc; 479 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) { 480 break; 481 } 482 } 483 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) { 484 break; 485 } 486 } 487 env->slb_nr = smmu_info.slb_size; 488 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 489 cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG; 490 } 491 } 492 493 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 494 { 495 Object *mem_obj = object_resolve_path(obj_path, NULL); 496 long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj)); 497 498 return pagesize >= max_cpu_page_size; 499 } 500 501 #else /* defined (TARGET_PPC64) */ 502 503 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu) 504 { 505 } 506 507 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 508 { 509 return true; 510 } 511 512 #endif /* !defined (TARGET_PPC64) */ 513 514 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 515 { 516 return POWERPC_CPU(cpu)->vcpu_id; 517 } 518 519 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 520 * book3s supports only 1 watchpoint, so array size 521 * of 4 is sufficient for now. 522 */ 523 #define MAX_HW_BKPTS 4 524 525 static struct HWBreakpoint { 526 target_ulong addr; 527 int type; 528 } hw_debug_points[MAX_HW_BKPTS]; 529 530 static CPUWatchpoint hw_watchpoint; 531 532 /* Default there is no breakpoint and watchpoint supported */ 533 static int max_hw_breakpoint; 534 static int max_hw_watchpoint; 535 static int nb_hw_breakpoint; 536 static int nb_hw_watchpoint; 537 538 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 539 { 540 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 541 max_hw_breakpoint = 2; 542 max_hw_watchpoint = 2; 543 } 544 545 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 546 fprintf(stderr, "Error initializing h/w breakpoints\n"); 547 return; 548 } 549 } 550 551 int kvm_arch_init_vcpu(CPUState *cs) 552 { 553 PowerPCCPU *cpu = POWERPC_CPU(cs); 554 CPUPPCState *cenv = &cpu->env; 555 int ret; 556 557 /* Gather server mmu info from KVM and update the CPU state */ 558 kvm_fixup_page_sizes(cpu); 559 560 /* Synchronize sregs with kvm */ 561 ret = kvm_arch_sync_sregs(cpu); 562 if (ret) { 563 if (ret == -EINVAL) { 564 error_report("Register sync failed... If you're using kvm-hv.ko," 565 " only \"-cpu host\" is possible"); 566 } 567 return ret; 568 } 569 570 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 571 572 switch (cenv->mmu_model) { 573 case POWERPC_MMU_BOOKE206: 574 /* This target supports access to KVM's guest TLB */ 575 ret = kvm_booke206_tlb_init(cpu); 576 break; 577 case POWERPC_MMU_2_07: 578 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 579 /* KVM-HV has transactional memory on POWER8 also without the 580 * KVM_CAP_PPC_HTM extension, so enable it here instead as 581 * long as it's availble to userspace on the host. */ 582 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 583 cap_htm = true; 584 } 585 } 586 break; 587 default: 588 break; 589 } 590 591 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 592 kvmppc_hw_debug_points_init(cenv); 593 594 return ret; 595 } 596 597 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 598 { 599 CPUPPCState *env = &cpu->env; 600 CPUState *cs = CPU(cpu); 601 struct kvm_dirty_tlb dirty_tlb; 602 unsigned char *bitmap; 603 int ret; 604 605 if (!env->kvm_sw_tlb) { 606 return; 607 } 608 609 bitmap = g_malloc((env->nb_tlb + 7) / 8); 610 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 611 612 dirty_tlb.bitmap = (uintptr_t)bitmap; 613 dirty_tlb.num_dirty = env->nb_tlb; 614 615 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 616 if (ret) { 617 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 618 __func__, strerror(-ret)); 619 } 620 621 g_free(bitmap); 622 } 623 624 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 625 { 626 PowerPCCPU *cpu = POWERPC_CPU(cs); 627 CPUPPCState *env = &cpu->env; 628 union { 629 uint32_t u32; 630 uint64_t u64; 631 } val; 632 struct kvm_one_reg reg = { 633 .id = id, 634 .addr = (uintptr_t) &val, 635 }; 636 int ret; 637 638 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 639 if (ret != 0) { 640 trace_kvm_failed_spr_get(spr, strerror(errno)); 641 } else { 642 switch (id & KVM_REG_SIZE_MASK) { 643 case KVM_REG_SIZE_U32: 644 env->spr[spr] = val.u32; 645 break; 646 647 case KVM_REG_SIZE_U64: 648 env->spr[spr] = val.u64; 649 break; 650 651 default: 652 /* Don't handle this size yet */ 653 abort(); 654 } 655 } 656 } 657 658 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 659 { 660 PowerPCCPU *cpu = POWERPC_CPU(cs); 661 CPUPPCState *env = &cpu->env; 662 union { 663 uint32_t u32; 664 uint64_t u64; 665 } val; 666 struct kvm_one_reg reg = { 667 .id = id, 668 .addr = (uintptr_t) &val, 669 }; 670 int ret; 671 672 switch (id & KVM_REG_SIZE_MASK) { 673 case KVM_REG_SIZE_U32: 674 val.u32 = env->spr[spr]; 675 break; 676 677 case KVM_REG_SIZE_U64: 678 val.u64 = env->spr[spr]; 679 break; 680 681 default: 682 /* Don't handle this size yet */ 683 abort(); 684 } 685 686 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 687 if (ret != 0) { 688 trace_kvm_failed_spr_set(spr, strerror(errno)); 689 } 690 } 691 692 static int kvm_put_fp(CPUState *cs) 693 { 694 PowerPCCPU *cpu = POWERPC_CPU(cs); 695 CPUPPCState *env = &cpu->env; 696 struct kvm_one_reg reg; 697 int i; 698 int ret; 699 700 if (env->insns_flags & PPC_FLOAT) { 701 uint64_t fpscr = env->fpscr; 702 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 703 704 reg.id = KVM_REG_PPC_FPSCR; 705 reg.addr = (uintptr_t)&fpscr; 706 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 707 if (ret < 0) { 708 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 709 return ret; 710 } 711 712 for (i = 0; i < 32; i++) { 713 uint64_t vsr[2]; 714 715 #ifdef HOST_WORDS_BIGENDIAN 716 vsr[0] = float64_val(env->fpr[i]); 717 vsr[1] = env->vsr[i]; 718 #else 719 vsr[0] = env->vsr[i]; 720 vsr[1] = float64_val(env->fpr[i]); 721 #endif 722 reg.addr = (uintptr_t) &vsr; 723 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 724 725 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 726 if (ret < 0) { 727 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 728 i, strerror(errno)); 729 return ret; 730 } 731 } 732 } 733 734 if (env->insns_flags & PPC_ALTIVEC) { 735 reg.id = KVM_REG_PPC_VSCR; 736 reg.addr = (uintptr_t)&env->vscr; 737 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 738 if (ret < 0) { 739 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 740 return ret; 741 } 742 743 for (i = 0; i < 32; i++) { 744 reg.id = KVM_REG_PPC_VR(i); 745 reg.addr = (uintptr_t)&env->avr[i]; 746 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 747 if (ret < 0) { 748 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 749 return ret; 750 } 751 } 752 } 753 754 return 0; 755 } 756 757 static int kvm_get_fp(CPUState *cs) 758 { 759 PowerPCCPU *cpu = POWERPC_CPU(cs); 760 CPUPPCState *env = &cpu->env; 761 struct kvm_one_reg reg; 762 int i; 763 int ret; 764 765 if (env->insns_flags & PPC_FLOAT) { 766 uint64_t fpscr; 767 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 768 769 reg.id = KVM_REG_PPC_FPSCR; 770 reg.addr = (uintptr_t)&fpscr; 771 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 772 if (ret < 0) { 773 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 774 return ret; 775 } else { 776 env->fpscr = fpscr; 777 } 778 779 for (i = 0; i < 32; i++) { 780 uint64_t vsr[2]; 781 782 reg.addr = (uintptr_t) &vsr; 783 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 784 785 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 786 if (ret < 0) { 787 DPRINTF("Unable to get %s%d from KVM: %s\n", 788 vsx ? "VSR" : "FPR", i, strerror(errno)); 789 return ret; 790 } else { 791 #ifdef HOST_WORDS_BIGENDIAN 792 env->fpr[i] = vsr[0]; 793 if (vsx) { 794 env->vsr[i] = vsr[1]; 795 } 796 #else 797 env->fpr[i] = vsr[1]; 798 if (vsx) { 799 env->vsr[i] = vsr[0]; 800 } 801 #endif 802 } 803 } 804 } 805 806 if (env->insns_flags & PPC_ALTIVEC) { 807 reg.id = KVM_REG_PPC_VSCR; 808 reg.addr = (uintptr_t)&env->vscr; 809 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 810 if (ret < 0) { 811 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 812 return ret; 813 } 814 815 for (i = 0; i < 32; i++) { 816 reg.id = KVM_REG_PPC_VR(i); 817 reg.addr = (uintptr_t)&env->avr[i]; 818 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 819 if (ret < 0) { 820 DPRINTF("Unable to get VR%d from KVM: %s\n", 821 i, strerror(errno)); 822 return ret; 823 } 824 } 825 } 826 827 return 0; 828 } 829 830 #if defined(TARGET_PPC64) 831 static int kvm_get_vpa(CPUState *cs) 832 { 833 PowerPCCPU *cpu = POWERPC_CPU(cs); 834 CPUPPCState *env = &cpu->env; 835 struct kvm_one_reg reg; 836 int ret; 837 838 reg.id = KVM_REG_PPC_VPA_ADDR; 839 reg.addr = (uintptr_t)&env->vpa_addr; 840 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 841 if (ret < 0) { 842 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 843 return ret; 844 } 845 846 assert((uintptr_t)&env->slb_shadow_size 847 == ((uintptr_t)&env->slb_shadow_addr + 8)); 848 reg.id = KVM_REG_PPC_VPA_SLB; 849 reg.addr = (uintptr_t)&env->slb_shadow_addr; 850 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 851 if (ret < 0) { 852 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 853 strerror(errno)); 854 return ret; 855 } 856 857 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 858 reg.id = KVM_REG_PPC_VPA_DTL; 859 reg.addr = (uintptr_t)&env->dtl_addr; 860 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 861 if (ret < 0) { 862 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 863 strerror(errno)); 864 return ret; 865 } 866 867 return 0; 868 } 869 870 static int kvm_put_vpa(CPUState *cs) 871 { 872 PowerPCCPU *cpu = POWERPC_CPU(cs); 873 CPUPPCState *env = &cpu->env; 874 struct kvm_one_reg reg; 875 int ret; 876 877 /* SLB shadow or DTL can't be registered unless a master VPA is 878 * registered. That means when restoring state, if a VPA *is* 879 * registered, we need to set that up first. If not, we need to 880 * deregister the others before deregistering the master VPA */ 881 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr)); 882 883 if (env->vpa_addr) { 884 reg.id = KVM_REG_PPC_VPA_ADDR; 885 reg.addr = (uintptr_t)&env->vpa_addr; 886 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 887 if (ret < 0) { 888 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 889 return ret; 890 } 891 } 892 893 assert((uintptr_t)&env->slb_shadow_size 894 == ((uintptr_t)&env->slb_shadow_addr + 8)); 895 reg.id = KVM_REG_PPC_VPA_SLB; 896 reg.addr = (uintptr_t)&env->slb_shadow_addr; 897 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 898 if (ret < 0) { 899 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 900 return ret; 901 } 902 903 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8)); 904 reg.id = KVM_REG_PPC_VPA_DTL; 905 reg.addr = (uintptr_t)&env->dtl_addr; 906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 907 if (ret < 0) { 908 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 909 strerror(errno)); 910 return ret; 911 } 912 913 if (!env->vpa_addr) { 914 reg.id = KVM_REG_PPC_VPA_ADDR; 915 reg.addr = (uintptr_t)&env->vpa_addr; 916 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 917 if (ret < 0) { 918 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 919 return ret; 920 } 921 } 922 923 return 0; 924 } 925 #endif /* TARGET_PPC64 */ 926 927 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 928 { 929 CPUPPCState *env = &cpu->env; 930 struct kvm_sregs sregs; 931 int i; 932 933 sregs.pvr = env->spr[SPR_PVR]; 934 935 if (cpu->vhyp) { 936 PPCVirtualHypervisorClass *vhc = 937 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 938 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 939 } else { 940 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 941 } 942 943 /* Sync SLB */ 944 #ifdef TARGET_PPC64 945 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 946 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 947 if (env->slb[i].esid & SLB_ESID_V) { 948 sregs.u.s.ppc64.slb[i].slbe |= i; 949 } 950 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 951 } 952 #endif 953 954 /* Sync SRs */ 955 for (i = 0; i < 16; i++) { 956 sregs.u.s.ppc32.sr[i] = env->sr[i]; 957 } 958 959 /* Sync BATs */ 960 for (i = 0; i < 8; i++) { 961 /* Beware. We have to swap upper and lower bits here */ 962 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 963 | env->DBAT[1][i]; 964 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 965 | env->IBAT[1][i]; 966 } 967 968 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 969 } 970 971 int kvm_arch_put_registers(CPUState *cs, int level) 972 { 973 PowerPCCPU *cpu = POWERPC_CPU(cs); 974 CPUPPCState *env = &cpu->env; 975 struct kvm_regs regs; 976 int ret; 977 int i; 978 979 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 980 if (ret < 0) { 981 return ret; 982 } 983 984 regs.ctr = env->ctr; 985 regs.lr = env->lr; 986 regs.xer = cpu_read_xer(env); 987 regs.msr = env->msr; 988 regs.pc = env->nip; 989 990 regs.srr0 = env->spr[SPR_SRR0]; 991 regs.srr1 = env->spr[SPR_SRR1]; 992 993 regs.sprg0 = env->spr[SPR_SPRG0]; 994 regs.sprg1 = env->spr[SPR_SPRG1]; 995 regs.sprg2 = env->spr[SPR_SPRG2]; 996 regs.sprg3 = env->spr[SPR_SPRG3]; 997 regs.sprg4 = env->spr[SPR_SPRG4]; 998 regs.sprg5 = env->spr[SPR_SPRG5]; 999 regs.sprg6 = env->spr[SPR_SPRG6]; 1000 regs.sprg7 = env->spr[SPR_SPRG7]; 1001 1002 regs.pid = env->spr[SPR_BOOKE_PID]; 1003 1004 for (i = 0;i < 32; i++) 1005 regs.gpr[i] = env->gpr[i]; 1006 1007 regs.cr = 0; 1008 for (i = 0; i < 8; i++) { 1009 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 1010 } 1011 1012 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 1013 if (ret < 0) 1014 return ret; 1015 1016 kvm_put_fp(cs); 1017 1018 if (env->tlb_dirty) { 1019 kvm_sw_tlb_put(cpu); 1020 env->tlb_dirty = false; 1021 } 1022 1023 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 1024 ret = kvmppc_put_books_sregs(cpu); 1025 if (ret < 0) { 1026 return ret; 1027 } 1028 } 1029 1030 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 1031 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1032 } 1033 1034 if (cap_one_reg) { 1035 int i; 1036 1037 /* We deliberately ignore errors here, for kernels which have 1038 * the ONE_REG calls, but don't support the specific 1039 * registers, there's a reasonable chance things will still 1040 * work, at least until we try to migrate. */ 1041 for (i = 0; i < 1024; i++) { 1042 uint64_t id = env->spr_cb[i].one_reg_id; 1043 1044 if (id != 0) { 1045 kvm_put_one_spr(cs, id, i); 1046 } 1047 } 1048 1049 #ifdef TARGET_PPC64 1050 if (msr_ts) { 1051 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1053 } 1054 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1056 } 1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1067 } 1068 1069 if (cap_papr) { 1070 if (kvm_put_vpa(cs) < 0) { 1071 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1072 } 1073 } 1074 1075 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1076 #endif /* TARGET_PPC64 */ 1077 } 1078 1079 return ret; 1080 } 1081 1082 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1083 { 1084 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1085 } 1086 1087 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1088 { 1089 CPUPPCState *env = &cpu->env; 1090 struct kvm_sregs sregs; 1091 int ret; 1092 1093 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1094 if (ret < 0) { 1095 return ret; 1096 } 1097 1098 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1099 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1100 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1101 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1102 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1103 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1104 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1105 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1106 env->spr[SPR_DECR] = sregs.u.e.dec; 1107 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1108 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1109 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1110 } 1111 1112 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1113 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1114 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1115 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1116 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1117 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1118 } 1119 1120 if (sregs.u.e.features & KVM_SREGS_E_64) { 1121 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1122 } 1123 1124 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1125 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1126 } 1127 1128 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1129 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1130 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1131 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1132 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1133 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1134 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1135 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1136 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1137 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1138 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1139 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1140 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1141 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1142 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1143 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1144 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1145 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1146 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1147 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1148 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1149 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1150 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1151 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1152 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1153 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1154 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1155 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1156 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1157 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1158 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1159 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1160 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1161 1162 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1163 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1164 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1165 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1166 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1167 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1168 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1169 } 1170 1171 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1172 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1173 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1174 } 1175 1176 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1177 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1178 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1179 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1180 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1181 } 1182 } 1183 1184 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1185 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1186 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1187 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1188 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1189 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1190 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1191 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1192 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1193 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1194 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1195 } 1196 1197 if (sregs.u.e.features & KVM_SREGS_EXP) { 1198 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1199 } 1200 1201 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1202 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1203 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1204 } 1205 1206 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1207 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1208 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1209 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1210 1211 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1212 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1213 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1214 } 1215 } 1216 1217 return 0; 1218 } 1219 1220 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1221 { 1222 CPUPPCState *env = &cpu->env; 1223 struct kvm_sregs sregs; 1224 int ret; 1225 int i; 1226 1227 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1228 if (ret < 0) { 1229 return ret; 1230 } 1231 1232 if (!cpu->vhyp) { 1233 ppc_store_sdr1(env, sregs.u.s.sdr1); 1234 } 1235 1236 /* Sync SLB */ 1237 #ifdef TARGET_PPC64 1238 /* 1239 * The packed SLB array we get from KVM_GET_SREGS only contains 1240 * information about valid entries. So we flush our internal copy 1241 * to get rid of stale ones, then put all valid SLB entries back 1242 * in. 1243 */ 1244 memset(env->slb, 0, sizeof(env->slb)); 1245 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1246 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1247 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1248 /* 1249 * Only restore valid entries 1250 */ 1251 if (rb & SLB_ESID_V) { 1252 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1253 } 1254 } 1255 #endif 1256 1257 /* Sync SRs */ 1258 for (i = 0; i < 16; i++) { 1259 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1260 } 1261 1262 /* Sync BATs */ 1263 for (i = 0; i < 8; i++) { 1264 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1265 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1266 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1267 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1268 } 1269 1270 return 0; 1271 } 1272 1273 int kvm_arch_get_registers(CPUState *cs) 1274 { 1275 PowerPCCPU *cpu = POWERPC_CPU(cs); 1276 CPUPPCState *env = &cpu->env; 1277 struct kvm_regs regs; 1278 uint32_t cr; 1279 int i, ret; 1280 1281 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1282 if (ret < 0) 1283 return ret; 1284 1285 cr = regs.cr; 1286 for (i = 7; i >= 0; i--) { 1287 env->crf[i] = cr & 15; 1288 cr >>= 4; 1289 } 1290 1291 env->ctr = regs.ctr; 1292 env->lr = regs.lr; 1293 cpu_write_xer(env, regs.xer); 1294 env->msr = regs.msr; 1295 env->nip = regs.pc; 1296 1297 env->spr[SPR_SRR0] = regs.srr0; 1298 env->spr[SPR_SRR1] = regs.srr1; 1299 1300 env->spr[SPR_SPRG0] = regs.sprg0; 1301 env->spr[SPR_SPRG1] = regs.sprg1; 1302 env->spr[SPR_SPRG2] = regs.sprg2; 1303 env->spr[SPR_SPRG3] = regs.sprg3; 1304 env->spr[SPR_SPRG4] = regs.sprg4; 1305 env->spr[SPR_SPRG5] = regs.sprg5; 1306 env->spr[SPR_SPRG6] = regs.sprg6; 1307 env->spr[SPR_SPRG7] = regs.sprg7; 1308 1309 env->spr[SPR_BOOKE_PID] = regs.pid; 1310 1311 for (i = 0;i < 32; i++) 1312 env->gpr[i] = regs.gpr[i]; 1313 1314 kvm_get_fp(cs); 1315 1316 if (cap_booke_sregs) { 1317 ret = kvmppc_get_booke_sregs(cpu); 1318 if (ret < 0) { 1319 return ret; 1320 } 1321 } 1322 1323 if (cap_segstate) { 1324 ret = kvmppc_get_books_sregs(cpu); 1325 if (ret < 0) { 1326 return ret; 1327 } 1328 } 1329 1330 if (cap_hior) { 1331 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1332 } 1333 1334 if (cap_one_reg) { 1335 int i; 1336 1337 /* We deliberately ignore errors here, for kernels which have 1338 * the ONE_REG calls, but don't support the specific 1339 * registers, there's a reasonable chance things will still 1340 * work, at least until we try to migrate. */ 1341 for (i = 0; i < 1024; i++) { 1342 uint64_t id = env->spr_cb[i].one_reg_id; 1343 1344 if (id != 0) { 1345 kvm_get_one_spr(cs, id, i); 1346 } 1347 } 1348 1349 #ifdef TARGET_PPC64 1350 if (msr_ts) { 1351 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1353 } 1354 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1356 } 1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1367 } 1368 1369 if (cap_papr) { 1370 if (kvm_get_vpa(cs) < 0) { 1371 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1372 } 1373 } 1374 1375 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1376 #endif 1377 } 1378 1379 return 0; 1380 } 1381 1382 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1383 { 1384 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1385 1386 if (irq != PPC_INTERRUPT_EXT) { 1387 return 0; 1388 } 1389 1390 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1391 return 0; 1392 } 1393 1394 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1395 1396 return 0; 1397 } 1398 1399 #if defined(TARGET_PPCEMB) 1400 #define PPC_INPUT_INT PPC40x_INPUT_INT 1401 #elif defined(TARGET_PPC64) 1402 #define PPC_INPUT_INT PPC970_INPUT_INT 1403 #else 1404 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1405 #endif 1406 1407 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1408 { 1409 PowerPCCPU *cpu = POWERPC_CPU(cs); 1410 CPUPPCState *env = &cpu->env; 1411 int r; 1412 unsigned irq; 1413 1414 qemu_mutex_lock_iothread(); 1415 1416 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1417 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1418 if (!cap_interrupt_level && 1419 run->ready_for_interrupt_injection && 1420 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1421 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1422 { 1423 /* For now KVM disregards the 'irq' argument. However, in the 1424 * future KVM could cache it in-kernel to avoid a heavyweight exit 1425 * when reading the UIC. 1426 */ 1427 irq = KVM_INTERRUPT_SET; 1428 1429 DPRINTF("injected interrupt %d\n", irq); 1430 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1431 if (r < 0) { 1432 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1433 } 1434 1435 /* Always wake up soon in case the interrupt was level based */ 1436 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1437 (NANOSECONDS_PER_SECOND / 50)); 1438 } 1439 1440 /* We don't know if there are more interrupts pending after this. However, 1441 * the guest will return to userspace in the course of handling this one 1442 * anyways, so we will get a chance to deliver the rest. */ 1443 1444 qemu_mutex_unlock_iothread(); 1445 } 1446 1447 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1448 { 1449 return MEMTXATTRS_UNSPECIFIED; 1450 } 1451 1452 int kvm_arch_process_async_events(CPUState *cs) 1453 { 1454 return cs->halted; 1455 } 1456 1457 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1458 { 1459 CPUState *cs = CPU(cpu); 1460 CPUPPCState *env = &cpu->env; 1461 1462 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1463 cs->halted = 1; 1464 cs->exception_index = EXCP_HLT; 1465 } 1466 1467 return 0; 1468 } 1469 1470 /* map dcr access to existing qemu dcr emulation */ 1471 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1472 { 1473 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1474 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1475 1476 return 0; 1477 } 1478 1479 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1480 { 1481 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1482 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1483 1484 return 0; 1485 } 1486 1487 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1488 { 1489 /* Mixed endian case is not handled */ 1490 uint32_t sc = debug_inst_opcode; 1491 1492 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1493 sizeof(sc), 0) || 1494 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1495 return -EINVAL; 1496 } 1497 1498 return 0; 1499 } 1500 1501 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1502 { 1503 uint32_t sc; 1504 1505 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1506 sc != debug_inst_opcode || 1507 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1508 sizeof(sc), 1)) { 1509 return -EINVAL; 1510 } 1511 1512 return 0; 1513 } 1514 1515 static int find_hw_breakpoint(target_ulong addr, int type) 1516 { 1517 int n; 1518 1519 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1520 <= ARRAY_SIZE(hw_debug_points)); 1521 1522 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1523 if (hw_debug_points[n].addr == addr && 1524 hw_debug_points[n].type == type) { 1525 return n; 1526 } 1527 } 1528 1529 return -1; 1530 } 1531 1532 static int find_hw_watchpoint(target_ulong addr, int *flag) 1533 { 1534 int n; 1535 1536 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1537 if (n >= 0) { 1538 *flag = BP_MEM_ACCESS; 1539 return n; 1540 } 1541 1542 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1543 if (n >= 0) { 1544 *flag = BP_MEM_WRITE; 1545 return n; 1546 } 1547 1548 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1549 if (n >= 0) { 1550 *flag = BP_MEM_READ; 1551 return n; 1552 } 1553 1554 return -1; 1555 } 1556 1557 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1558 target_ulong len, int type) 1559 { 1560 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1561 return -ENOBUFS; 1562 } 1563 1564 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1565 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1566 1567 switch (type) { 1568 case GDB_BREAKPOINT_HW: 1569 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1570 return -ENOBUFS; 1571 } 1572 1573 if (find_hw_breakpoint(addr, type) >= 0) { 1574 return -EEXIST; 1575 } 1576 1577 nb_hw_breakpoint++; 1578 break; 1579 1580 case GDB_WATCHPOINT_WRITE: 1581 case GDB_WATCHPOINT_READ: 1582 case GDB_WATCHPOINT_ACCESS: 1583 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1584 return -ENOBUFS; 1585 } 1586 1587 if (find_hw_breakpoint(addr, type) >= 0) { 1588 return -EEXIST; 1589 } 1590 1591 nb_hw_watchpoint++; 1592 break; 1593 1594 default: 1595 return -ENOSYS; 1596 } 1597 1598 return 0; 1599 } 1600 1601 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1602 target_ulong len, int type) 1603 { 1604 int n; 1605 1606 n = find_hw_breakpoint(addr, type); 1607 if (n < 0) { 1608 return -ENOENT; 1609 } 1610 1611 switch (type) { 1612 case GDB_BREAKPOINT_HW: 1613 nb_hw_breakpoint--; 1614 break; 1615 1616 case GDB_WATCHPOINT_WRITE: 1617 case GDB_WATCHPOINT_READ: 1618 case GDB_WATCHPOINT_ACCESS: 1619 nb_hw_watchpoint--; 1620 break; 1621 1622 default: 1623 return -ENOSYS; 1624 } 1625 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1626 1627 return 0; 1628 } 1629 1630 void kvm_arch_remove_all_hw_breakpoints(void) 1631 { 1632 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1633 } 1634 1635 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1636 { 1637 int n; 1638 1639 /* Software Breakpoint updates */ 1640 if (kvm_sw_breakpoints_active(cs)) { 1641 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1642 } 1643 1644 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1645 <= ARRAY_SIZE(hw_debug_points)); 1646 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1647 1648 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1649 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1650 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1651 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1652 switch (hw_debug_points[n].type) { 1653 case GDB_BREAKPOINT_HW: 1654 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1655 break; 1656 case GDB_WATCHPOINT_WRITE: 1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1658 break; 1659 case GDB_WATCHPOINT_READ: 1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1661 break; 1662 case GDB_WATCHPOINT_ACCESS: 1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1664 KVMPPC_DEBUG_WATCH_READ; 1665 break; 1666 default: 1667 cpu_abort(cs, "Unsupported breakpoint type\n"); 1668 } 1669 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1670 } 1671 } 1672 } 1673 1674 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1675 { 1676 CPUState *cs = CPU(cpu); 1677 CPUPPCState *env = &cpu->env; 1678 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1679 int handle = 0; 1680 int n; 1681 int flag = 0; 1682 1683 if (cs->singlestep_enabled) { 1684 handle = 1; 1685 } else if (arch_info->status) { 1686 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1687 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1688 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1689 if (n >= 0) { 1690 handle = 1; 1691 } 1692 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1693 KVMPPC_DEBUG_WATCH_WRITE)) { 1694 n = find_hw_watchpoint(arch_info->address, &flag); 1695 if (n >= 0) { 1696 handle = 1; 1697 cs->watchpoint_hit = &hw_watchpoint; 1698 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1699 hw_watchpoint.flags = flag; 1700 } 1701 } 1702 } 1703 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1704 handle = 1; 1705 } else { 1706 /* QEMU is not able to handle debug exception, so inject 1707 * program exception to guest; 1708 * Yes program exception NOT debug exception !! 1709 * When QEMU is using debug resources then debug exception must 1710 * be always set. To achieve this we set MSR_DE and also set 1711 * MSRP_DEP so guest cannot change MSR_DE. 1712 * When emulating debug resource for guest we want guest 1713 * to control MSR_DE (enable/disable debug interrupt on need). 1714 * Supporting both configurations are NOT possible. 1715 * So the result is that we cannot share debug resources 1716 * between QEMU and Guest on BOOKE architecture. 1717 * In the current design QEMU gets the priority over guest, 1718 * this means that if QEMU is using debug resources then guest 1719 * cannot use them; 1720 * For software breakpoint QEMU uses a privileged instruction; 1721 * So there cannot be any reason that we are here for guest 1722 * set debug exception, only possibility is guest executed a 1723 * privileged / illegal instruction and that's why we are 1724 * injecting a program interrupt. 1725 */ 1726 1727 cpu_synchronize_state(cs); 1728 /* env->nip is PC, so increment this by 4 to use 1729 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1730 */ 1731 env->nip += 4; 1732 cs->exception_index = POWERPC_EXCP_PROGRAM; 1733 env->error_code = POWERPC_EXCP_INVAL; 1734 ppc_cpu_do_interrupt(cs); 1735 } 1736 1737 return handle; 1738 } 1739 1740 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1741 { 1742 PowerPCCPU *cpu = POWERPC_CPU(cs); 1743 CPUPPCState *env = &cpu->env; 1744 int ret; 1745 1746 qemu_mutex_lock_iothread(); 1747 1748 switch (run->exit_reason) { 1749 case KVM_EXIT_DCR: 1750 if (run->dcr.is_write) { 1751 DPRINTF("handle dcr write\n"); 1752 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1753 } else { 1754 DPRINTF("handle dcr read\n"); 1755 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1756 } 1757 break; 1758 case KVM_EXIT_HLT: 1759 DPRINTF("handle halt\n"); 1760 ret = kvmppc_handle_halt(cpu); 1761 break; 1762 #if defined(TARGET_PPC64) 1763 case KVM_EXIT_PAPR_HCALL: 1764 DPRINTF("handle PAPR hypercall\n"); 1765 run->papr_hcall.ret = spapr_hypercall(cpu, 1766 run->papr_hcall.nr, 1767 run->papr_hcall.args); 1768 ret = 0; 1769 break; 1770 #endif 1771 case KVM_EXIT_EPR: 1772 DPRINTF("handle epr\n"); 1773 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1774 ret = 0; 1775 break; 1776 case KVM_EXIT_WATCHDOG: 1777 DPRINTF("handle watchdog expiry\n"); 1778 watchdog_perform_action(); 1779 ret = 0; 1780 break; 1781 1782 case KVM_EXIT_DEBUG: 1783 DPRINTF("handle debug exception\n"); 1784 if (kvm_handle_debug(cpu, run)) { 1785 ret = EXCP_DEBUG; 1786 break; 1787 } 1788 /* re-enter, this exception was guest-internal */ 1789 ret = 0; 1790 break; 1791 1792 default: 1793 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1794 ret = -1; 1795 break; 1796 } 1797 1798 qemu_mutex_unlock_iothread(); 1799 return ret; 1800 } 1801 1802 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1803 { 1804 CPUState *cs = CPU(cpu); 1805 uint32_t bits = tsr_bits; 1806 struct kvm_one_reg reg = { 1807 .id = KVM_REG_PPC_OR_TSR, 1808 .addr = (uintptr_t) &bits, 1809 }; 1810 1811 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1812 } 1813 1814 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1815 { 1816 1817 CPUState *cs = CPU(cpu); 1818 uint32_t bits = tsr_bits; 1819 struct kvm_one_reg reg = { 1820 .id = KVM_REG_PPC_CLEAR_TSR, 1821 .addr = (uintptr_t) &bits, 1822 }; 1823 1824 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1825 } 1826 1827 int kvmppc_set_tcr(PowerPCCPU *cpu) 1828 { 1829 CPUState *cs = CPU(cpu); 1830 CPUPPCState *env = &cpu->env; 1831 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1832 1833 struct kvm_one_reg reg = { 1834 .id = KVM_REG_PPC_TCR, 1835 .addr = (uintptr_t) &tcr, 1836 }; 1837 1838 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1839 } 1840 1841 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1842 { 1843 CPUState *cs = CPU(cpu); 1844 int ret; 1845 1846 if (!kvm_enabled()) { 1847 return -1; 1848 } 1849 1850 if (!cap_ppc_watchdog) { 1851 printf("warning: KVM does not support watchdog"); 1852 return -1; 1853 } 1854 1855 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1856 if (ret < 0) { 1857 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1858 __func__, strerror(-ret)); 1859 return ret; 1860 } 1861 1862 return ret; 1863 } 1864 1865 static int read_cpuinfo(const char *field, char *value, int len) 1866 { 1867 FILE *f; 1868 int ret = -1; 1869 int field_len = strlen(field); 1870 char line[512]; 1871 1872 f = fopen("/proc/cpuinfo", "r"); 1873 if (!f) { 1874 return -1; 1875 } 1876 1877 do { 1878 if (!fgets(line, sizeof(line), f)) { 1879 break; 1880 } 1881 if (!strncmp(line, field, field_len)) { 1882 pstrcpy(value, len, line); 1883 ret = 0; 1884 break; 1885 } 1886 } while(*line); 1887 1888 fclose(f); 1889 1890 return ret; 1891 } 1892 1893 uint32_t kvmppc_get_tbfreq(void) 1894 { 1895 char line[512]; 1896 char *ns; 1897 uint32_t retval = NANOSECONDS_PER_SECOND; 1898 1899 if (read_cpuinfo("timebase", line, sizeof(line))) { 1900 return retval; 1901 } 1902 1903 if (!(ns = strchr(line, ':'))) { 1904 return retval; 1905 } 1906 1907 ns++; 1908 1909 return atoi(ns); 1910 } 1911 1912 bool kvmppc_get_host_serial(char **value) 1913 { 1914 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1915 NULL); 1916 } 1917 1918 bool kvmppc_get_host_model(char **value) 1919 { 1920 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1921 } 1922 1923 /* Try to find a device tree node for a CPU with clock-frequency property */ 1924 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1925 { 1926 struct dirent *dirp; 1927 DIR *dp; 1928 1929 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1930 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1931 return -1; 1932 } 1933 1934 buf[0] = '\0'; 1935 while ((dirp = readdir(dp)) != NULL) { 1936 FILE *f; 1937 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1938 dirp->d_name); 1939 f = fopen(buf, "r"); 1940 if (f) { 1941 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1942 fclose(f); 1943 break; 1944 } 1945 buf[0] = '\0'; 1946 } 1947 closedir(dp); 1948 if (buf[0] == '\0') { 1949 printf("Unknown host!\n"); 1950 return -1; 1951 } 1952 1953 return 0; 1954 } 1955 1956 static uint64_t kvmppc_read_int_dt(const char *filename) 1957 { 1958 union { 1959 uint32_t v32; 1960 uint64_t v64; 1961 } u; 1962 FILE *f; 1963 int len; 1964 1965 f = fopen(filename, "rb"); 1966 if (!f) { 1967 return -1; 1968 } 1969 1970 len = fread(&u, 1, sizeof(u), f); 1971 fclose(f); 1972 switch (len) { 1973 case 4: 1974 /* property is a 32-bit quantity */ 1975 return be32_to_cpu(u.v32); 1976 case 8: 1977 return be64_to_cpu(u.v64); 1978 } 1979 1980 return 0; 1981 } 1982 1983 /* Read a CPU node property from the host device tree that's a single 1984 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1985 * (can't find or open the property, or doesn't understand the 1986 * format) */ 1987 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1988 { 1989 char buf[PATH_MAX], *tmp; 1990 uint64_t val; 1991 1992 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1993 return -1; 1994 } 1995 1996 tmp = g_strdup_printf("%s/%s", buf, propname); 1997 val = kvmppc_read_int_dt(tmp); 1998 g_free(tmp); 1999 2000 return val; 2001 } 2002 2003 uint64_t kvmppc_get_clockfreq(void) 2004 { 2005 return kvmppc_read_int_cpu_dt("clock-frequency"); 2006 } 2007 2008 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 2009 { 2010 PowerPCCPU *cpu = ppc_env_get_cpu(env); 2011 CPUState *cs = CPU(cpu); 2012 2013 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 2014 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 2015 return 0; 2016 } 2017 2018 return 1; 2019 } 2020 2021 int kvmppc_get_hasidle(CPUPPCState *env) 2022 { 2023 struct kvm_ppc_pvinfo pvinfo; 2024 2025 if (!kvmppc_get_pvinfo(env, &pvinfo) && 2026 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 2027 return 1; 2028 } 2029 2030 return 0; 2031 } 2032 2033 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 2034 { 2035 uint32_t *hc = (uint32_t*)buf; 2036 struct kvm_ppc_pvinfo pvinfo; 2037 2038 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 2039 memcpy(buf, pvinfo.hcall, buf_len); 2040 return 0; 2041 } 2042 2043 /* 2044 * Fallback to always fail hypercalls regardless of endianness: 2045 * 2046 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 2047 * li r3, -1 2048 * b .+8 (becomes nop in wrong endian) 2049 * bswap32(li r3, -1) 2050 */ 2051 2052 hc[0] = cpu_to_be32(0x08000048); 2053 hc[1] = cpu_to_be32(0x3860ffff); 2054 hc[2] = cpu_to_be32(0x48000008); 2055 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2056 2057 return 1; 2058 } 2059 2060 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2061 { 2062 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2063 } 2064 2065 void kvmppc_enable_logical_ci_hcalls(void) 2066 { 2067 /* 2068 * FIXME: it would be nice if we could detect the cases where 2069 * we're using a device which requires the in kernel 2070 * implementation of these hcalls, but the kernel lacks them and 2071 * produce a warning. 2072 */ 2073 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2074 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2075 } 2076 2077 void kvmppc_enable_set_mode_hcall(void) 2078 { 2079 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2080 } 2081 2082 void kvmppc_enable_clear_ref_mod_hcalls(void) 2083 { 2084 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2085 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2086 } 2087 2088 void kvmppc_set_papr(PowerPCCPU *cpu) 2089 { 2090 CPUState *cs = CPU(cpu); 2091 int ret; 2092 2093 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2094 if (ret) { 2095 error_report("This vCPU type or KVM version does not support PAPR"); 2096 exit(1); 2097 } 2098 2099 /* Update the capability flag so we sync the right information 2100 * with kvm */ 2101 cap_papr = 1; 2102 } 2103 2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2105 { 2106 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2107 } 2108 2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2110 { 2111 CPUState *cs = CPU(cpu); 2112 int ret; 2113 2114 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2115 if (ret && mpic_proxy) { 2116 error_report("This KVM version does not support EPR"); 2117 exit(1); 2118 } 2119 } 2120 2121 int kvmppc_smt_threads(void) 2122 { 2123 return cap_ppc_smt ? cap_ppc_smt : 1; 2124 } 2125 2126 int kvmppc_set_smt_threads(int smt) 2127 { 2128 int ret; 2129 2130 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2131 if (!ret) { 2132 cap_ppc_smt = smt; 2133 } 2134 return ret; 2135 } 2136 2137 void kvmppc_hint_smt_possible(Error **errp) 2138 { 2139 int i; 2140 GString *g; 2141 char *s; 2142 2143 assert(kvm_enabled()); 2144 if (cap_ppc_smt_possible) { 2145 g = g_string_new("Available VSMT modes:"); 2146 for (i = 63; i >= 0; i--) { 2147 if ((1UL << i) & cap_ppc_smt_possible) { 2148 g_string_append_printf(g, " %lu", (1UL << i)); 2149 } 2150 } 2151 s = g_string_free(g, false); 2152 error_append_hint(errp, "%s.\n", s); 2153 g_free(s); 2154 } else { 2155 error_append_hint(errp, 2156 "This KVM seems to be too old to support VSMT.\n"); 2157 } 2158 } 2159 2160 2161 #ifdef TARGET_PPC64 2162 off_t kvmppc_alloc_rma(void **rma) 2163 { 2164 off_t size; 2165 int fd; 2166 struct kvm_allocate_rma ret; 2167 2168 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported 2169 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but 2170 * not necessary on this hardware 2171 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware 2172 * 2173 * FIXME: We should allow the user to force contiguous RMA 2174 * allocation in the cap_ppc_rma==1 case. 2175 */ 2176 if (cap_ppc_rma < 2) { 2177 return 0; 2178 } 2179 2180 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret); 2181 if (fd < 0) { 2182 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n", 2183 strerror(errno)); 2184 return -1; 2185 } 2186 2187 size = MIN(ret.rma_size, 256ul << 20); 2188 2189 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2190 if (*rma == MAP_FAILED) { 2191 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno)); 2192 return -1; 2193 }; 2194 2195 return size; 2196 } 2197 2198 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2199 { 2200 struct kvm_ppc_smmu_info info; 2201 long rampagesize, best_page_shift; 2202 int i; 2203 2204 if (cap_ppc_rma >= 2) { 2205 return current_size; 2206 } 2207 2208 /* Find the largest hardware supported page size that's less than 2209 * or equal to the (logical) backing page size of guest RAM */ 2210 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); 2211 rampagesize = qemu_getrampagesize(); 2212 best_page_shift = 0; 2213 2214 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2215 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2216 2217 if (!sps->page_shift) { 2218 continue; 2219 } 2220 2221 if ((sps->page_shift > best_page_shift) 2222 && ((1UL << sps->page_shift) <= rampagesize)) { 2223 best_page_shift = sps->page_shift; 2224 } 2225 } 2226 2227 return MIN(current_size, 2228 1ULL << (best_page_shift + hash_shift - 7)); 2229 } 2230 #endif 2231 2232 bool kvmppc_spapr_use_multitce(void) 2233 { 2234 return cap_spapr_multitce; 2235 } 2236 2237 int kvmppc_spapr_enable_inkernel_multitce(void) 2238 { 2239 int ret; 2240 2241 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2242 H_PUT_TCE_INDIRECT, 1); 2243 if (!ret) { 2244 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2245 H_STUFF_TCE, 1); 2246 } 2247 2248 return ret; 2249 } 2250 2251 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2252 uint64_t bus_offset, uint32_t nb_table, 2253 int *pfd, bool need_vfio) 2254 { 2255 long len; 2256 int fd; 2257 void *table; 2258 2259 /* Must set fd to -1 so we don't try to munmap when called for 2260 * destroying the table, which the upper layers -will- do 2261 */ 2262 *pfd = -1; 2263 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2264 return NULL; 2265 } 2266 2267 if (cap_spapr_tce_64) { 2268 struct kvm_create_spapr_tce_64 args = { 2269 .liobn = liobn, 2270 .page_shift = page_shift, 2271 .offset = bus_offset >> page_shift, 2272 .size = nb_table, 2273 .flags = 0 2274 }; 2275 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2276 if (fd < 0) { 2277 fprintf(stderr, 2278 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2279 liobn); 2280 return NULL; 2281 } 2282 } else if (cap_spapr_tce) { 2283 uint64_t window_size = (uint64_t) nb_table << page_shift; 2284 struct kvm_create_spapr_tce args = { 2285 .liobn = liobn, 2286 .window_size = window_size, 2287 }; 2288 if ((window_size != args.window_size) || bus_offset) { 2289 return NULL; 2290 } 2291 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2292 if (fd < 0) { 2293 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2294 liobn); 2295 return NULL; 2296 } 2297 } else { 2298 return NULL; 2299 } 2300 2301 len = nb_table * sizeof(uint64_t); 2302 /* FIXME: round this up to page size */ 2303 2304 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2305 if (table == MAP_FAILED) { 2306 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2307 liobn); 2308 close(fd); 2309 return NULL; 2310 } 2311 2312 *pfd = fd; 2313 return table; 2314 } 2315 2316 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2317 { 2318 long len; 2319 2320 if (fd < 0) { 2321 return -1; 2322 } 2323 2324 len = nb_table * sizeof(uint64_t); 2325 if ((munmap(table, len) < 0) || 2326 (close(fd) < 0)) { 2327 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2328 strerror(errno)); 2329 /* Leak the table */ 2330 } 2331 2332 return 0; 2333 } 2334 2335 int kvmppc_reset_htab(int shift_hint) 2336 { 2337 uint32_t shift = shift_hint; 2338 2339 if (!kvm_enabled()) { 2340 /* Full emulation, tell caller to allocate htab itself */ 2341 return 0; 2342 } 2343 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2344 int ret; 2345 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2346 if (ret == -ENOTTY) { 2347 /* At least some versions of PR KVM advertise the 2348 * capability, but don't implement the ioctl(). Oops. 2349 * Return 0 so that we allocate the htab in qemu, as is 2350 * correct for PR. */ 2351 return 0; 2352 } else if (ret < 0) { 2353 return ret; 2354 } 2355 return shift; 2356 } 2357 2358 /* We have a kernel that predates the htab reset calls. For PR 2359 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2360 * this era, it has allocated a 16MB fixed size hash table already. */ 2361 if (kvmppc_is_pr(kvm_state)) { 2362 /* PR - tell caller to allocate htab */ 2363 return 0; 2364 } else { 2365 /* HV - assume 16MB kernel allocated htab */ 2366 return 24; 2367 } 2368 } 2369 2370 static inline uint32_t mfpvr(void) 2371 { 2372 uint32_t pvr; 2373 2374 asm ("mfpvr %0" 2375 : "=r"(pvr)); 2376 return pvr; 2377 } 2378 2379 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2380 { 2381 if (on) { 2382 *word |= flags; 2383 } else { 2384 *word &= ~flags; 2385 } 2386 } 2387 2388 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2389 { 2390 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2391 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2392 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2393 2394 /* Now fix up the class with information we can query from the host */ 2395 pcc->pvr = mfpvr(); 2396 2397 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2398 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2399 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2400 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2401 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2402 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2403 2404 if (dcache_size != -1) { 2405 pcc->l1_dcache_size = dcache_size; 2406 } 2407 2408 if (icache_size != -1) { 2409 pcc->l1_icache_size = icache_size; 2410 } 2411 2412 #if defined(TARGET_PPC64) 2413 pcc->radix_page_info = kvm_get_radix_page_info(); 2414 2415 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2416 /* 2417 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2418 * compliant. More importantly, advertising ISA 3.00 2419 * architected mode may prevent guests from activating 2420 * necessary DD1 workarounds. 2421 */ 2422 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2423 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2424 } 2425 #endif /* defined(TARGET_PPC64) */ 2426 } 2427 2428 bool kvmppc_has_cap_epr(void) 2429 { 2430 return cap_epr; 2431 } 2432 2433 bool kvmppc_has_cap_fixup_hcalls(void) 2434 { 2435 return cap_fixup_hcalls; 2436 } 2437 2438 bool kvmppc_has_cap_htm(void) 2439 { 2440 return cap_htm; 2441 } 2442 2443 bool kvmppc_has_cap_mmu_radix(void) 2444 { 2445 return cap_mmu_radix; 2446 } 2447 2448 bool kvmppc_has_cap_mmu_hash_v3(void) 2449 { 2450 return cap_mmu_hash_v3; 2451 } 2452 2453 static void kvmppc_get_cpu_characteristics(KVMState *s) 2454 { 2455 struct kvm_ppc_cpu_char c; 2456 int ret; 2457 2458 /* Assume broken */ 2459 cap_ppc_safe_cache = 0; 2460 cap_ppc_safe_bounds_check = 0; 2461 cap_ppc_safe_indirect_branch = 0; 2462 2463 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2464 if (!ret) { 2465 return; 2466 } 2467 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2468 if (ret < 0) { 2469 return; 2470 } 2471 /* Parse and set cap_ppc_safe_cache */ 2472 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2473 cap_ppc_safe_cache = 2; 2474 } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2475 (c.character & c.character_mask 2476 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2477 cap_ppc_safe_cache = 1; 2478 } 2479 /* Parse and set cap_ppc_safe_bounds_check */ 2480 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2481 cap_ppc_safe_bounds_check = 2; 2482 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2483 cap_ppc_safe_bounds_check = 1; 2484 } 2485 /* Parse and set cap_ppc_safe_indirect_branch */ 2486 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2487 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD; 2488 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2489 cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS; 2490 } 2491 } 2492 2493 int kvmppc_get_cap_safe_cache(void) 2494 { 2495 return cap_ppc_safe_cache; 2496 } 2497 2498 int kvmppc_get_cap_safe_bounds_check(void) 2499 { 2500 return cap_ppc_safe_bounds_check; 2501 } 2502 2503 int kvmppc_get_cap_safe_indirect_branch(void) 2504 { 2505 return cap_ppc_safe_indirect_branch; 2506 } 2507 2508 bool kvmppc_has_cap_spapr_vfio(void) 2509 { 2510 return cap_spapr_vfio; 2511 } 2512 2513 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2514 { 2515 uint32_t host_pvr = mfpvr(); 2516 PowerPCCPUClass *pvr_pcc; 2517 2518 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2519 if (pvr_pcc == NULL) { 2520 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2521 } 2522 2523 return pvr_pcc; 2524 } 2525 2526 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2527 { 2528 TypeInfo type_info = { 2529 .name = TYPE_HOST_POWERPC_CPU, 2530 .class_init = kvmppc_host_cpu_class_init, 2531 }; 2532 MachineClass *mc = MACHINE_GET_CLASS(ms); 2533 PowerPCCPUClass *pvr_pcc; 2534 ObjectClass *oc; 2535 DeviceClass *dc; 2536 int i; 2537 2538 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2539 if (pvr_pcc == NULL) { 2540 return -1; 2541 } 2542 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2543 type_register(&type_info); 2544 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2545 /* override TCG default cpu type with 'host' cpu model */ 2546 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2547 } 2548 2549 oc = object_class_by_name(type_info.name); 2550 g_assert(oc); 2551 2552 /* 2553 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2554 * we want "POWER8" to be a "family" alias that points to the current 2555 * host CPU type, too) 2556 */ 2557 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2558 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2559 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2560 char *suffix; 2561 2562 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2563 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2564 if (suffix) { 2565 *suffix = 0; 2566 } 2567 break; 2568 } 2569 } 2570 2571 return 0; 2572 } 2573 2574 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2575 { 2576 struct kvm_rtas_token_args args = { 2577 .token = token, 2578 }; 2579 2580 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2581 return -ENOENT; 2582 } 2583 2584 strncpy(args.name, function, sizeof(args.name)); 2585 2586 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2587 } 2588 2589 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2590 { 2591 struct kvm_get_htab_fd s = { 2592 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2593 .start_index = index, 2594 }; 2595 int ret; 2596 2597 if (!cap_htab_fd) { 2598 error_setg(errp, "KVM version doesn't support %s the HPT", 2599 write ? "writing" : "reading"); 2600 return -ENOTSUP; 2601 } 2602 2603 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2604 if (ret < 0) { 2605 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2606 write ? "writing" : "reading", write ? "to" : "from", 2607 strerror(errno)); 2608 return -errno; 2609 } 2610 2611 return ret; 2612 } 2613 2614 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2615 { 2616 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2617 uint8_t buf[bufsize]; 2618 ssize_t rc; 2619 2620 do { 2621 rc = read(fd, buf, bufsize); 2622 if (rc < 0) { 2623 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2624 strerror(errno)); 2625 return rc; 2626 } else if (rc) { 2627 uint8_t *buffer = buf; 2628 ssize_t n = rc; 2629 while (n) { 2630 struct kvm_get_htab_header *head = 2631 (struct kvm_get_htab_header *) buffer; 2632 size_t chunksize = sizeof(*head) + 2633 HASH_PTE_SIZE_64 * head->n_valid; 2634 2635 qemu_put_be32(f, head->index); 2636 qemu_put_be16(f, head->n_valid); 2637 qemu_put_be16(f, head->n_invalid); 2638 qemu_put_buffer(f, (void *)(head + 1), 2639 HASH_PTE_SIZE_64 * head->n_valid); 2640 2641 buffer += chunksize; 2642 n -= chunksize; 2643 } 2644 } 2645 } while ((rc != 0) 2646 && ((max_ns < 0) 2647 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2648 2649 return (rc == 0) ? 1 : 0; 2650 } 2651 2652 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2653 uint16_t n_valid, uint16_t n_invalid) 2654 { 2655 struct kvm_get_htab_header *buf; 2656 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2657 ssize_t rc; 2658 2659 buf = alloca(chunksize); 2660 buf->index = index; 2661 buf->n_valid = n_valid; 2662 buf->n_invalid = n_invalid; 2663 2664 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2665 2666 rc = write(fd, buf, chunksize); 2667 if (rc < 0) { 2668 fprintf(stderr, "Error writing KVM hash table: %s\n", 2669 strerror(errno)); 2670 return rc; 2671 } 2672 if (rc != chunksize) { 2673 /* We should never get a short write on a single chunk */ 2674 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2675 return -1; 2676 } 2677 return 0; 2678 } 2679 2680 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2681 { 2682 return true; 2683 } 2684 2685 void kvm_arch_init_irq_routing(KVMState *s) 2686 { 2687 } 2688 2689 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2690 { 2691 int fd, rc; 2692 int i; 2693 2694 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2695 2696 i = 0; 2697 while (i < n) { 2698 struct kvm_get_htab_header *hdr; 2699 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2700 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2701 2702 rc = read(fd, buf, sizeof(buf)); 2703 if (rc < 0) { 2704 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2705 } 2706 2707 hdr = (struct kvm_get_htab_header *)buf; 2708 while ((i < n) && ((char *)hdr < (buf + rc))) { 2709 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2710 2711 if (hdr->index != (ptex + i)) { 2712 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2713 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2714 } 2715 2716 if (n - i < valid) { 2717 valid = n - i; 2718 } 2719 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2720 i += valid; 2721 2722 if ((n - i) < invalid) { 2723 invalid = n - i; 2724 } 2725 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2726 i += invalid; 2727 2728 hdr = (struct kvm_get_htab_header *) 2729 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2730 } 2731 } 2732 2733 close(fd); 2734 } 2735 2736 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2737 { 2738 int fd, rc; 2739 struct { 2740 struct kvm_get_htab_header hdr; 2741 uint64_t pte0; 2742 uint64_t pte1; 2743 } buf; 2744 2745 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2746 2747 buf.hdr.n_valid = 1; 2748 buf.hdr.n_invalid = 0; 2749 buf.hdr.index = ptex; 2750 buf.pte0 = cpu_to_be64(pte0); 2751 buf.pte1 = cpu_to_be64(pte1); 2752 2753 rc = write(fd, &buf, sizeof(buf)); 2754 if (rc != sizeof(buf)) { 2755 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2756 } 2757 close(fd); 2758 } 2759 2760 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2761 uint64_t address, uint32_t data, PCIDevice *dev) 2762 { 2763 return 0; 2764 } 2765 2766 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2767 int vector, PCIDevice *dev) 2768 { 2769 return 0; 2770 } 2771 2772 int kvm_arch_release_virq_post(int virq) 2773 { 2774 return 0; 2775 } 2776 2777 int kvm_arch_msi_data_to_gsi(uint32_t data) 2778 { 2779 return data & 0xffff; 2780 } 2781 2782 int kvmppc_enable_hwrng(void) 2783 { 2784 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2785 return -1; 2786 } 2787 2788 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2789 } 2790 2791 void kvmppc_check_papr_resize_hpt(Error **errp) 2792 { 2793 if (!kvm_enabled()) { 2794 return; /* No KVM, we're good */ 2795 } 2796 2797 if (cap_resize_hpt) { 2798 return; /* Kernel has explicit support, we're good */ 2799 } 2800 2801 /* Otherwise fallback on looking for PR KVM */ 2802 if (kvmppc_is_pr(kvm_state)) { 2803 return; 2804 } 2805 2806 error_setg(errp, 2807 "Hash page table resizing not available with this KVM version"); 2808 } 2809 2810 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2811 { 2812 CPUState *cs = CPU(cpu); 2813 struct kvm_ppc_resize_hpt rhpt = { 2814 .flags = flags, 2815 .shift = shift, 2816 }; 2817 2818 if (!cap_resize_hpt) { 2819 return -ENOSYS; 2820 } 2821 2822 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2823 } 2824 2825 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2826 { 2827 CPUState *cs = CPU(cpu); 2828 struct kvm_ppc_resize_hpt rhpt = { 2829 .flags = flags, 2830 .shift = shift, 2831 }; 2832 2833 if (!cap_resize_hpt) { 2834 return -ENOSYS; 2835 } 2836 2837 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2838 } 2839 2840 /* 2841 * This is a helper function to detect a post migration scenario 2842 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2843 * the guest kernel can't handle a PVR value other than the actual host 2844 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2845 * 2846 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2847 * (so, we're HV), return true. The workaround itself is done in 2848 * cpu_post_load. 2849 * 2850 * The order here is important: we'll only check for KVM PR as a 2851 * fallback if the guest kernel can't handle the situation itself. 2852 * We need to avoid as much as possible querying the running KVM type 2853 * in QEMU level. 2854 */ 2855 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2856 { 2857 CPUState *cs = CPU(cpu); 2858 2859 if (!kvm_enabled()) { 2860 return false; 2861 } 2862 2863 if (cap_ppc_pvr_compat) { 2864 return false; 2865 } 2866 2867 return !kvmppc_is_pr(cs->kvm_state); 2868 } 2869