1 /* 2 * PowerPC implementation of KVM hooks 3 * 4 * Copyright IBM Corp. 2007 5 * Copyright (C) 2011 Freescale Semiconductor, Inc. 6 * 7 * Authors: 8 * Jerone Young <jyoung5@us.ibm.com> 9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 10 * Hollis Blanchard <hollisb@us.ibm.com> 11 * 12 * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 * See the COPYING file in the top-level directory. 14 * 15 */ 16 17 #include "qemu/osdep.h" 18 #include <dirent.h> 19 #include <sys/ioctl.h> 20 #include <sys/vfs.h> 21 22 #include <linux/kvm.h> 23 24 #include "qemu-common.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "cpu.h" 28 #include "cpu-models.h" 29 #include "qemu/timer.h" 30 #include "sysemu/sysemu.h" 31 #include "sysemu/hw_accel.h" 32 #include "kvm_ppc.h" 33 #include "sysemu/cpus.h" 34 #include "sysemu/device_tree.h" 35 #include "mmu-hash64.h" 36 37 #include "hw/sysbus.h" 38 #include "hw/ppc/spapr.h" 39 #include "hw/ppc/spapr_cpu_core.h" 40 #include "hw/ppc/ppc.h" 41 #include "sysemu/watchdog.h" 42 #include "trace.h" 43 #include "exec/gdbstub.h" 44 #include "exec/memattrs.h" 45 #include "exec/ram_addr.h" 46 #include "sysemu/hostmem.h" 47 #include "qemu/cutils.h" 48 #include "qemu/mmap-alloc.h" 49 #include "elf.h" 50 #include "sysemu/kvm_int.h" 51 52 //#define DEBUG_KVM 53 54 #ifdef DEBUG_KVM 55 #define DPRINTF(fmt, ...) \ 56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 57 #else 58 #define DPRINTF(fmt, ...) \ 59 do { } while (0) 60 #endif 61 62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" 63 64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 65 KVM_CAP_LAST_INFO 66 }; 67 68 static int cap_interrupt_unset = false; 69 static int cap_interrupt_level = false; 70 static int cap_segstate; 71 static int cap_booke_sregs; 72 static int cap_ppc_smt; 73 static int cap_ppc_smt_possible; 74 static int cap_spapr_tce; 75 static int cap_spapr_tce_64; 76 static int cap_spapr_multitce; 77 static int cap_spapr_vfio; 78 static int cap_hior; 79 static int cap_one_reg; 80 static int cap_epr; 81 static int cap_ppc_watchdog; 82 static int cap_papr; 83 static int cap_htab_fd; 84 static int cap_fixup_hcalls; 85 static int cap_htm; /* Hardware transactional memory support */ 86 static int cap_mmu_radix; 87 static int cap_mmu_hash_v3; 88 static int cap_resize_hpt; 89 static int cap_ppc_pvr_compat; 90 static int cap_ppc_safe_cache; 91 static int cap_ppc_safe_bounds_check; 92 static int cap_ppc_safe_indirect_branch; 93 static int cap_ppc_count_cache_flush_assist; 94 static int cap_ppc_nested_kvm_hv; 95 static int cap_large_decr; 96 97 static uint32_t debug_inst_opcode; 98 99 /* XXX We have a race condition where we actually have a level triggered 100 * interrupt, but the infrastructure can't expose that yet, so the guest 101 * takes but ignores it, goes to sleep and never gets notified that there's 102 * still an interrupt pending. 103 * 104 * As a quick workaround, let's just wake up again 20 ms after we injected 105 * an interrupt. That way we can assure that we're always reinjecting 106 * interrupts in case the guest swallowed them. 107 */ 108 static QEMUTimer *idle_timer; 109 110 static void kvm_kick_cpu(void *opaque) 111 { 112 PowerPCCPU *cpu = opaque; 113 114 qemu_cpu_kick(CPU(cpu)); 115 } 116 117 /* Check whether we are running with KVM-PR (instead of KVM-HV). This 118 * should only be used for fallback tests - generally we should use 119 * explicit capabilities for the features we want, rather than 120 * assuming what is/isn't available depending on the KVM variant. */ 121 static bool kvmppc_is_pr(KVMState *ks) 122 { 123 /* Assume KVM-PR if the GET_PVINFO capability is available */ 124 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; 125 } 126 127 static int kvm_ppc_register_host_cpu_type(MachineState *ms); 128 static void kvmppc_get_cpu_characteristics(KVMState *s); 129 static int kvmppc_get_dec_bits(void); 130 131 int kvm_arch_init(MachineState *ms, KVMState *s) 132 { 133 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); 134 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); 135 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); 136 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); 137 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); 138 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 139 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 140 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 141 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 142 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 143 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 144 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); 145 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); 146 /* Note: we don't set cap_papr here, because this capability is 147 * only activated after this by kvmppc_set_papr() */ 148 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); 149 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); 150 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); 151 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); 152 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); 153 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); 154 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); 155 kvmppc_get_cpu_characteristics(s); 156 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); 157 cap_large_decr = kvmppc_get_dec_bits(); 158 /* 159 * Note: setting it to false because there is not such capability 160 * in KVM at this moment. 161 * 162 * TODO: call kvm_vm_check_extension() with the right capability 163 * after the kernel starts implementing it.*/ 164 cap_ppc_pvr_compat = false; 165 166 if (!cap_interrupt_level) { 167 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " 168 "VM to stall at times!\n"); 169 } 170 171 kvm_ppc_register_host_cpu_type(ms); 172 173 return 0; 174 } 175 176 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 177 { 178 return 0; 179 } 180 181 static int kvm_arch_sync_sregs(PowerPCCPU *cpu) 182 { 183 CPUPPCState *cenv = &cpu->env; 184 CPUState *cs = CPU(cpu); 185 struct kvm_sregs sregs; 186 int ret; 187 188 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 189 /* What we're really trying to say is "if we're on BookE, we use 190 the native PVR for now". This is the only sane way to check 191 it though, so we potentially confuse users that they can run 192 BookE guests on BookS. Let's hope nobody dares enough :) */ 193 return 0; 194 } else { 195 if (!cap_segstate) { 196 fprintf(stderr, "kvm error: missing PVR setting capability\n"); 197 return -ENOSYS; 198 } 199 } 200 201 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); 202 if (ret) { 203 return ret; 204 } 205 206 sregs.pvr = cenv->spr[SPR_PVR]; 207 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); 208 } 209 210 /* Set up a shared TLB array with KVM */ 211 static int kvm_booke206_tlb_init(PowerPCCPU *cpu) 212 { 213 CPUPPCState *env = &cpu->env; 214 CPUState *cs = CPU(cpu); 215 struct kvm_book3e_206_tlb_params params = {}; 216 struct kvm_config_tlb cfg = {}; 217 unsigned int entries = 0; 218 int ret, i; 219 220 if (!kvm_enabled() || 221 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) { 222 return 0; 223 } 224 225 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN); 226 227 for (i = 0; i < BOOKE206_MAX_TLBN; i++) { 228 params.tlb_sizes[i] = booke206_tlb_size(env, i); 229 params.tlb_ways[i] = booke206_tlb_ways(env, i); 230 entries += params.tlb_sizes[i]; 231 } 232 233 assert(entries == env->nb_tlb); 234 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t)); 235 236 env->tlb_dirty = true; 237 238 cfg.array = (uintptr_t)env->tlb.tlbm; 239 cfg.array_len = sizeof(ppcmas_tlb_t) * entries; 240 cfg.params = (uintptr_t)¶ms; 241 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV; 242 243 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg); 244 if (ret < 0) { 245 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n", 246 __func__, strerror(-ret)); 247 return ret; 248 } 249 250 env->kvm_sw_tlb = true; 251 return 0; 252 } 253 254 255 #if defined(TARGET_PPC64) 256 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp) 257 { 258 int ret; 259 260 assert(kvm_state != NULL); 261 262 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) { 263 error_setg(errp, "KVM doesn't expose the MMU features it supports"); 264 error_append_hint(errp, "Consider switching to a newer KVM\n"); 265 return; 266 } 267 268 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info); 269 if (ret == 0) { 270 return; 271 } 272 273 error_setg_errno(errp, -ret, 274 "KVM failed to provide the MMU features it supports"); 275 } 276 277 struct ppc_radix_page_info *kvm_get_radix_page_info(void) 278 { 279 KVMState *s = KVM_STATE(current_machine->accelerator); 280 struct ppc_radix_page_info *radix_page_info; 281 struct kvm_ppc_rmmu_info rmmu_info; 282 int i; 283 284 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { 285 return NULL; 286 } 287 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) { 288 return NULL; 289 } 290 radix_page_info = g_malloc0(sizeof(*radix_page_info)); 291 radix_page_info->count = 0; 292 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { 293 if (rmmu_info.ap_encodings[i]) { 294 radix_page_info->entries[i] = rmmu_info.ap_encodings[i]; 295 radix_page_info->count++; 296 } 297 } 298 return radix_page_info; 299 } 300 301 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, 302 bool radix, bool gtse, 303 uint64_t proc_tbl) 304 { 305 CPUState *cs = CPU(cpu); 306 int ret; 307 uint64_t flags = 0; 308 struct kvm_ppc_mmuv3_cfg cfg = { 309 .process_table = proc_tbl, 310 }; 311 312 if (radix) { 313 flags |= KVM_PPC_MMUV3_RADIX; 314 } 315 if (gtse) { 316 flags |= KVM_PPC_MMUV3_GTSE; 317 } 318 cfg.flags = flags; 319 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg); 320 switch (ret) { 321 case 0: 322 return H_SUCCESS; 323 case -EINVAL: 324 return H_PARAMETER; 325 case -ENODEV: 326 return H_NOT_AVAILABLE; 327 default: 328 return H_HARDWARE; 329 } 330 } 331 332 bool kvmppc_hpt_needs_host_contiguous_pages(void) 333 { 334 static struct kvm_ppc_smmu_info smmu_info; 335 336 if (!kvm_enabled()) { 337 return false; 338 } 339 340 kvm_get_smmu_info(&smmu_info, &error_fatal); 341 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL); 342 } 343 344 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp) 345 { 346 struct kvm_ppc_smmu_info smmu_info; 347 int iq, ik, jq, jk; 348 Error *local_err = NULL; 349 350 /* For now, we only have anything to check on hash64 MMUs */ 351 if (!cpu->hash64_opts || !kvm_enabled()) { 352 return; 353 } 354 355 kvm_get_smmu_info(&smmu_info, &local_err); 356 if (local_err) { 357 error_propagate(errp, local_err); 358 return; 359 } 360 361 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG) 362 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) { 363 error_setg(errp, 364 "KVM does not support 1TiB segments which guest expects"); 365 return; 366 } 367 368 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) { 369 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u", 370 smmu_info.slb_size, cpu->hash64_opts->slb_size); 371 return; 372 } 373 374 /* 375 * Verify that every pagesize supported by the cpu model is 376 * supported by KVM with the same encodings 377 */ 378 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) { 379 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq]; 380 struct kvm_ppc_one_seg_page_size *ksps; 381 382 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) { 383 if (qsps->page_shift == smmu_info.sps[ik].page_shift) { 384 break; 385 } 386 } 387 if (ik >= ARRAY_SIZE(smmu_info.sps)) { 388 error_setg(errp, "KVM doesn't support for base page shift %u", 389 qsps->page_shift); 390 return; 391 } 392 393 ksps = &smmu_info.sps[ik]; 394 if (ksps->slb_enc != qsps->slb_enc) { 395 error_setg(errp, 396 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x", 397 ksps->slb_enc, ksps->page_shift, qsps->slb_enc); 398 return; 399 } 400 401 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) { 402 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) { 403 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) { 404 break; 405 } 406 } 407 408 if (jk >= ARRAY_SIZE(ksps->enc)) { 409 error_setg(errp, "KVM doesn't support page shift %u/%u", 410 qsps->enc[jq].page_shift, qsps->page_shift); 411 return; 412 } 413 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) { 414 error_setg(errp, 415 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x", 416 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift, 417 qsps->page_shift, qsps->enc[jq].pte_enc); 418 return; 419 } 420 } 421 } 422 423 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { 424 /* Mostly what guest pagesizes we can use are related to the 425 * host pages used to map guest RAM, which is handled in the 426 * platform code. Cache-Inhibited largepages (64k) however are 427 * used for I/O, so if they're mapped to the host at all it 428 * will be a normal mapping, not a special hugepage one used 429 * for RAM. */ 430 if (getpagesize() < 0x10000) { 431 error_setg(errp, 432 "KVM can't supply 64kiB CI pages, which guest expects"); 433 } 434 } 435 } 436 #endif /* !defined (TARGET_PPC64) */ 437 438 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 439 { 440 return POWERPC_CPU(cpu)->vcpu_id; 441 } 442 443 /* e500 supports 2 h/w breakpoint and 2 watchpoint. 444 * book3s supports only 1 watchpoint, so array size 445 * of 4 is sufficient for now. 446 */ 447 #define MAX_HW_BKPTS 4 448 449 static struct HWBreakpoint { 450 target_ulong addr; 451 int type; 452 } hw_debug_points[MAX_HW_BKPTS]; 453 454 static CPUWatchpoint hw_watchpoint; 455 456 /* Default there is no breakpoint and watchpoint supported */ 457 static int max_hw_breakpoint; 458 static int max_hw_watchpoint; 459 static int nb_hw_breakpoint; 460 static int nb_hw_watchpoint; 461 462 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) 463 { 464 if (cenv->excp_model == POWERPC_EXCP_BOOKE) { 465 max_hw_breakpoint = 2; 466 max_hw_watchpoint = 2; 467 } 468 469 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) { 470 fprintf(stderr, "Error initializing h/w breakpoints\n"); 471 return; 472 } 473 } 474 475 int kvm_arch_init_vcpu(CPUState *cs) 476 { 477 PowerPCCPU *cpu = POWERPC_CPU(cs); 478 CPUPPCState *cenv = &cpu->env; 479 int ret; 480 481 /* Synchronize sregs with kvm */ 482 ret = kvm_arch_sync_sregs(cpu); 483 if (ret) { 484 if (ret == -EINVAL) { 485 error_report("Register sync failed... If you're using kvm-hv.ko," 486 " only \"-cpu host\" is possible"); 487 } 488 return ret; 489 } 490 491 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); 492 493 switch (cenv->mmu_model) { 494 case POWERPC_MMU_BOOKE206: 495 /* This target supports access to KVM's guest TLB */ 496 ret = kvm_booke206_tlb_init(cpu); 497 break; 498 case POWERPC_MMU_2_07: 499 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { 500 /* KVM-HV has transactional memory on POWER8 also without the 501 * KVM_CAP_PPC_HTM extension, so enable it here instead as 502 * long as it's availble to userspace on the host. */ 503 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { 504 cap_htm = true; 505 } 506 } 507 break; 508 default: 509 break; 510 } 511 512 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode); 513 kvmppc_hw_debug_points_init(cenv); 514 515 return ret; 516 } 517 518 static void kvm_sw_tlb_put(PowerPCCPU *cpu) 519 { 520 CPUPPCState *env = &cpu->env; 521 CPUState *cs = CPU(cpu); 522 struct kvm_dirty_tlb dirty_tlb; 523 unsigned char *bitmap; 524 int ret; 525 526 if (!env->kvm_sw_tlb) { 527 return; 528 } 529 530 bitmap = g_malloc((env->nb_tlb + 7) / 8); 531 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8); 532 533 dirty_tlb.bitmap = (uintptr_t)bitmap; 534 dirty_tlb.num_dirty = env->nb_tlb; 535 536 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb); 537 if (ret) { 538 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n", 539 __func__, strerror(-ret)); 540 } 541 542 g_free(bitmap); 543 } 544 545 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) 546 { 547 PowerPCCPU *cpu = POWERPC_CPU(cs); 548 CPUPPCState *env = &cpu->env; 549 union { 550 uint32_t u32; 551 uint64_t u64; 552 } val; 553 struct kvm_one_reg reg = { 554 .id = id, 555 .addr = (uintptr_t) &val, 556 }; 557 int ret; 558 559 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 560 if (ret != 0) { 561 trace_kvm_failed_spr_get(spr, strerror(errno)); 562 } else { 563 switch (id & KVM_REG_SIZE_MASK) { 564 case KVM_REG_SIZE_U32: 565 env->spr[spr] = val.u32; 566 break; 567 568 case KVM_REG_SIZE_U64: 569 env->spr[spr] = val.u64; 570 break; 571 572 default: 573 /* Don't handle this size yet */ 574 abort(); 575 } 576 } 577 } 578 579 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) 580 { 581 PowerPCCPU *cpu = POWERPC_CPU(cs); 582 CPUPPCState *env = &cpu->env; 583 union { 584 uint32_t u32; 585 uint64_t u64; 586 } val; 587 struct kvm_one_reg reg = { 588 .id = id, 589 .addr = (uintptr_t) &val, 590 }; 591 int ret; 592 593 switch (id & KVM_REG_SIZE_MASK) { 594 case KVM_REG_SIZE_U32: 595 val.u32 = env->spr[spr]; 596 break; 597 598 case KVM_REG_SIZE_U64: 599 val.u64 = env->spr[spr]; 600 break; 601 602 default: 603 /* Don't handle this size yet */ 604 abort(); 605 } 606 607 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 608 if (ret != 0) { 609 trace_kvm_failed_spr_set(spr, strerror(errno)); 610 } 611 } 612 613 static int kvm_put_fp(CPUState *cs) 614 { 615 PowerPCCPU *cpu = POWERPC_CPU(cs); 616 CPUPPCState *env = &cpu->env; 617 struct kvm_one_reg reg; 618 int i; 619 int ret; 620 621 if (env->insns_flags & PPC_FLOAT) { 622 uint64_t fpscr = env->fpscr; 623 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 624 625 reg.id = KVM_REG_PPC_FPSCR; 626 reg.addr = (uintptr_t)&fpscr; 627 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 628 if (ret < 0) { 629 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); 630 return ret; 631 } 632 633 for (i = 0; i < 32; i++) { 634 uint64_t vsr[2]; 635 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 636 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 637 638 #ifdef HOST_WORDS_BIGENDIAN 639 vsr[0] = float64_val(*fpr); 640 vsr[1] = *vsrl; 641 #else 642 vsr[0] = *vsrl; 643 vsr[1] = float64_val(*fpr); 644 #endif 645 reg.addr = (uintptr_t) &vsr; 646 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 647 648 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 649 if (ret < 0) { 650 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", 651 i, strerror(errno)); 652 return ret; 653 } 654 } 655 } 656 657 if (env->insns_flags & PPC_ALTIVEC) { 658 reg.id = KVM_REG_PPC_VSCR; 659 reg.addr = (uintptr_t)&env->vscr; 660 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 661 if (ret < 0) { 662 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); 663 return ret; 664 } 665 666 for (i = 0; i < 32; i++) { 667 reg.id = KVM_REG_PPC_VR(i); 668 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 669 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 670 if (ret < 0) { 671 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); 672 return ret; 673 } 674 } 675 } 676 677 return 0; 678 } 679 680 static int kvm_get_fp(CPUState *cs) 681 { 682 PowerPCCPU *cpu = POWERPC_CPU(cs); 683 CPUPPCState *env = &cpu->env; 684 struct kvm_one_reg reg; 685 int i; 686 int ret; 687 688 if (env->insns_flags & PPC_FLOAT) { 689 uint64_t fpscr; 690 bool vsx = !!(env->insns_flags2 & PPC2_VSX); 691 692 reg.id = KVM_REG_PPC_FPSCR; 693 reg.addr = (uintptr_t)&fpscr; 694 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 695 if (ret < 0) { 696 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); 697 return ret; 698 } else { 699 env->fpscr = fpscr; 700 } 701 702 for (i = 0; i < 32; i++) { 703 uint64_t vsr[2]; 704 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); 705 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); 706 707 reg.addr = (uintptr_t) &vsr; 708 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); 709 710 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 711 if (ret < 0) { 712 DPRINTF("Unable to get %s%d from KVM: %s\n", 713 vsx ? "VSR" : "FPR", i, strerror(errno)); 714 return ret; 715 } else { 716 #ifdef HOST_WORDS_BIGENDIAN 717 *fpr = vsr[0]; 718 if (vsx) { 719 *vsrl = vsr[1]; 720 } 721 #else 722 *fpr = vsr[1]; 723 if (vsx) { 724 *vsrl = vsr[0]; 725 } 726 #endif 727 } 728 } 729 } 730 731 if (env->insns_flags & PPC_ALTIVEC) { 732 reg.id = KVM_REG_PPC_VSCR; 733 reg.addr = (uintptr_t)&env->vscr; 734 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 735 if (ret < 0) { 736 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); 737 return ret; 738 } 739 740 for (i = 0; i < 32; i++) { 741 reg.id = KVM_REG_PPC_VR(i); 742 reg.addr = (uintptr_t)cpu_avr_ptr(env, i); 743 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 744 if (ret < 0) { 745 DPRINTF("Unable to get VR%d from KVM: %s\n", 746 i, strerror(errno)); 747 return ret; 748 } 749 } 750 } 751 752 return 0; 753 } 754 755 #if defined(TARGET_PPC64) 756 static int kvm_get_vpa(CPUState *cs) 757 { 758 PowerPCCPU *cpu = POWERPC_CPU(cs); 759 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 760 struct kvm_one_reg reg; 761 int ret; 762 763 reg.id = KVM_REG_PPC_VPA_ADDR; 764 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 766 if (ret < 0) { 767 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); 768 return ret; 769 } 770 771 assert((uintptr_t)&spapr_cpu->slb_shadow_size 772 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 773 reg.id = KVM_REG_PPC_VPA_SLB; 774 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 775 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 776 if (ret < 0) { 777 DPRINTF("Unable to get SLB shadow state from KVM: %s\n", 778 strerror(errno)); 779 return ret; 780 } 781 782 assert((uintptr_t)&spapr_cpu->dtl_size 783 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 784 reg.id = KVM_REG_PPC_VPA_DTL; 785 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 786 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 787 if (ret < 0) { 788 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", 789 strerror(errno)); 790 return ret; 791 } 792 793 return 0; 794 } 795 796 static int kvm_put_vpa(CPUState *cs) 797 { 798 PowerPCCPU *cpu = POWERPC_CPU(cs); 799 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); 800 struct kvm_one_reg reg; 801 int ret; 802 803 /* SLB shadow or DTL can't be registered unless a master VPA is 804 * registered. That means when restoring state, if a VPA *is* 805 * registered, we need to set that up first. If not, we need to 806 * deregister the others before deregistering the master VPA */ 807 assert(spapr_cpu->vpa_addr 808 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); 809 810 if (spapr_cpu->vpa_addr) { 811 reg.id = KVM_REG_PPC_VPA_ADDR; 812 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 813 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 814 if (ret < 0) { 815 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 816 return ret; 817 } 818 } 819 820 assert((uintptr_t)&spapr_cpu->slb_shadow_size 821 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8)); 822 reg.id = KVM_REG_PPC_VPA_SLB; 823 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; 824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 825 if (ret < 0) { 826 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); 827 return ret; 828 } 829 830 assert((uintptr_t)&spapr_cpu->dtl_size 831 == ((uintptr_t)&spapr_cpu->dtl_addr + 8)); 832 reg.id = KVM_REG_PPC_VPA_DTL; 833 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; 834 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 835 if (ret < 0) { 836 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", 837 strerror(errno)); 838 return ret; 839 } 840 841 if (!spapr_cpu->vpa_addr) { 842 reg.id = KVM_REG_PPC_VPA_ADDR; 843 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; 844 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 845 if (ret < 0) { 846 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); 847 return ret; 848 } 849 } 850 851 return 0; 852 } 853 #endif /* TARGET_PPC64 */ 854 855 int kvmppc_put_books_sregs(PowerPCCPU *cpu) 856 { 857 CPUPPCState *env = &cpu->env; 858 struct kvm_sregs sregs; 859 int i; 860 861 sregs.pvr = env->spr[SPR_PVR]; 862 863 if (cpu->vhyp) { 864 PPCVirtualHypervisorClass *vhc = 865 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 866 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); 867 } else { 868 sregs.u.s.sdr1 = env->spr[SPR_SDR1]; 869 } 870 871 /* Sync SLB */ 872 #ifdef TARGET_PPC64 873 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 874 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid; 875 if (env->slb[i].esid & SLB_ESID_V) { 876 sregs.u.s.ppc64.slb[i].slbe |= i; 877 } 878 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid; 879 } 880 #endif 881 882 /* Sync SRs */ 883 for (i = 0; i < 16; i++) { 884 sregs.u.s.ppc32.sr[i] = env->sr[i]; 885 } 886 887 /* Sync BATs */ 888 for (i = 0; i < 8; i++) { 889 /* Beware. We have to swap upper and lower bits here */ 890 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32) 891 | env->DBAT[1][i]; 892 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32) 893 | env->IBAT[1][i]; 894 } 895 896 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); 897 } 898 899 int kvm_arch_put_registers(CPUState *cs, int level) 900 { 901 PowerPCCPU *cpu = POWERPC_CPU(cs); 902 CPUPPCState *env = &cpu->env; 903 struct kvm_regs regs; 904 int ret; 905 int i; 906 907 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 908 if (ret < 0) { 909 return ret; 910 } 911 912 regs.ctr = env->ctr; 913 regs.lr = env->lr; 914 regs.xer = cpu_read_xer(env); 915 regs.msr = env->msr; 916 regs.pc = env->nip; 917 918 regs.srr0 = env->spr[SPR_SRR0]; 919 regs.srr1 = env->spr[SPR_SRR1]; 920 921 regs.sprg0 = env->spr[SPR_SPRG0]; 922 regs.sprg1 = env->spr[SPR_SPRG1]; 923 regs.sprg2 = env->spr[SPR_SPRG2]; 924 regs.sprg3 = env->spr[SPR_SPRG3]; 925 regs.sprg4 = env->spr[SPR_SPRG4]; 926 regs.sprg5 = env->spr[SPR_SPRG5]; 927 regs.sprg6 = env->spr[SPR_SPRG6]; 928 regs.sprg7 = env->spr[SPR_SPRG7]; 929 930 regs.pid = env->spr[SPR_BOOKE_PID]; 931 932 for (i = 0;i < 32; i++) 933 regs.gpr[i] = env->gpr[i]; 934 935 regs.cr = 0; 936 for (i = 0; i < 8; i++) { 937 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); 938 } 939 940 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); 941 if (ret < 0) 942 return ret; 943 944 kvm_put_fp(cs); 945 946 if (env->tlb_dirty) { 947 kvm_sw_tlb_put(cpu); 948 env->tlb_dirty = false; 949 } 950 951 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) { 952 ret = kvmppc_put_books_sregs(cpu); 953 if (ret < 0) { 954 return ret; 955 } 956 } 957 958 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) { 959 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 960 } 961 962 if (cap_one_reg) { 963 int i; 964 965 /* We deliberately ignore errors here, for kernels which have 966 * the ONE_REG calls, but don't support the specific 967 * registers, there's a reasonable chance things will still 968 * work, at least until we try to migrate. */ 969 for (i = 0; i < 1024; i++) { 970 uint64_t id = env->spr_cb[i].one_reg_id; 971 972 if (id != 0) { 973 kvm_put_one_spr(cs, id, i); 974 } 975 } 976 977 #ifdef TARGET_PPC64 978 if (msr_ts) { 979 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 981 } 982 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 984 } 985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 995 } 996 997 if (cap_papr) { 998 if (kvm_put_vpa(cs) < 0) { 999 DPRINTF("Warning: Unable to set VPA information to KVM\n"); 1000 } 1001 } 1002 1003 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1004 #endif /* TARGET_PPC64 */ 1005 } 1006 1007 return ret; 1008 } 1009 1010 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor) 1011 { 1012 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR]; 1013 } 1014 1015 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu) 1016 { 1017 CPUPPCState *env = &cpu->env; 1018 struct kvm_sregs sregs; 1019 int ret; 1020 1021 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1022 if (ret < 0) { 1023 return ret; 1024 } 1025 1026 if (sregs.u.e.features & KVM_SREGS_E_BASE) { 1027 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0; 1028 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1; 1029 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr; 1030 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear; 1031 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr; 1032 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr; 1033 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr; 1034 env->spr[SPR_DECR] = sregs.u.e.dec; 1035 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff; 1036 env->spr[SPR_TBU] = sregs.u.e.tb >> 32; 1037 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave; 1038 } 1039 1040 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) { 1041 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir; 1042 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0; 1043 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1; 1044 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar; 1045 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr; 1046 } 1047 1048 if (sregs.u.e.features & KVM_SREGS_E_64) { 1049 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr; 1050 } 1051 1052 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) { 1053 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8; 1054 } 1055 1056 if (sregs.u.e.features & KVM_SREGS_E_IVOR) { 1057 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0]; 1058 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0); 1059 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1]; 1060 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1); 1061 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2]; 1062 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2); 1063 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3]; 1064 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3); 1065 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4]; 1066 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4); 1067 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5]; 1068 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5); 1069 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6]; 1070 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6); 1071 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7]; 1072 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7); 1073 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8]; 1074 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8); 1075 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9]; 1076 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9); 1077 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10]; 1078 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10); 1079 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11]; 1080 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11); 1081 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12]; 1082 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12); 1083 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13]; 1084 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13); 1085 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14]; 1086 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14); 1087 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15]; 1088 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15); 1089 1090 if (sregs.u.e.features & KVM_SREGS_E_SPE) { 1091 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0]; 1092 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32); 1093 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1]; 1094 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33); 1095 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2]; 1096 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34); 1097 } 1098 1099 if (sregs.u.e.features & KVM_SREGS_E_PM) { 1100 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3]; 1101 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35); 1102 } 1103 1104 if (sregs.u.e.features & KVM_SREGS_E_PC) { 1105 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4]; 1106 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36); 1107 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5]; 1108 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37); 1109 } 1110 } 1111 1112 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) { 1113 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0; 1114 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1; 1115 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2; 1116 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff; 1117 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4; 1118 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6; 1119 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32; 1120 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg; 1121 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0]; 1122 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1]; 1123 } 1124 1125 if (sregs.u.e.features & KVM_SREGS_EXP) { 1126 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr; 1127 } 1128 1129 if (sregs.u.e.features & KVM_SREGS_E_PD) { 1130 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc; 1131 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc; 1132 } 1133 1134 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 1135 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr; 1136 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar; 1137 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0; 1138 1139 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) { 1140 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1; 1141 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2; 1142 } 1143 } 1144 1145 return 0; 1146 } 1147 1148 static int kvmppc_get_books_sregs(PowerPCCPU *cpu) 1149 { 1150 CPUPPCState *env = &cpu->env; 1151 struct kvm_sregs sregs; 1152 int ret; 1153 int i; 1154 1155 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); 1156 if (ret < 0) { 1157 return ret; 1158 } 1159 1160 if (!cpu->vhyp) { 1161 ppc_store_sdr1(env, sregs.u.s.sdr1); 1162 } 1163 1164 /* Sync SLB */ 1165 #ifdef TARGET_PPC64 1166 /* 1167 * The packed SLB array we get from KVM_GET_SREGS only contains 1168 * information about valid entries. So we flush our internal copy 1169 * to get rid of stale ones, then put all valid SLB entries back 1170 * in. 1171 */ 1172 memset(env->slb, 0, sizeof(env->slb)); 1173 for (i = 0; i < ARRAY_SIZE(env->slb); i++) { 1174 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe; 1175 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv; 1176 /* 1177 * Only restore valid entries 1178 */ 1179 if (rb & SLB_ESID_V) { 1180 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs); 1181 } 1182 } 1183 #endif 1184 1185 /* Sync SRs */ 1186 for (i = 0; i < 16; i++) { 1187 env->sr[i] = sregs.u.s.ppc32.sr[i]; 1188 } 1189 1190 /* Sync BATs */ 1191 for (i = 0; i < 8; i++) { 1192 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff; 1193 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32; 1194 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff; 1195 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32; 1196 } 1197 1198 return 0; 1199 } 1200 1201 int kvm_arch_get_registers(CPUState *cs) 1202 { 1203 PowerPCCPU *cpu = POWERPC_CPU(cs); 1204 CPUPPCState *env = &cpu->env; 1205 struct kvm_regs regs; 1206 uint32_t cr; 1207 int i, ret; 1208 1209 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); 1210 if (ret < 0) 1211 return ret; 1212 1213 cr = regs.cr; 1214 for (i = 7; i >= 0; i--) { 1215 env->crf[i] = cr & 15; 1216 cr >>= 4; 1217 } 1218 1219 env->ctr = regs.ctr; 1220 env->lr = regs.lr; 1221 cpu_write_xer(env, regs.xer); 1222 env->msr = regs.msr; 1223 env->nip = regs.pc; 1224 1225 env->spr[SPR_SRR0] = regs.srr0; 1226 env->spr[SPR_SRR1] = regs.srr1; 1227 1228 env->spr[SPR_SPRG0] = regs.sprg0; 1229 env->spr[SPR_SPRG1] = regs.sprg1; 1230 env->spr[SPR_SPRG2] = regs.sprg2; 1231 env->spr[SPR_SPRG3] = regs.sprg3; 1232 env->spr[SPR_SPRG4] = regs.sprg4; 1233 env->spr[SPR_SPRG5] = regs.sprg5; 1234 env->spr[SPR_SPRG6] = regs.sprg6; 1235 env->spr[SPR_SPRG7] = regs.sprg7; 1236 1237 env->spr[SPR_BOOKE_PID] = regs.pid; 1238 1239 for (i = 0;i < 32; i++) 1240 env->gpr[i] = regs.gpr[i]; 1241 1242 kvm_get_fp(cs); 1243 1244 if (cap_booke_sregs) { 1245 ret = kvmppc_get_booke_sregs(cpu); 1246 if (ret < 0) { 1247 return ret; 1248 } 1249 } 1250 1251 if (cap_segstate) { 1252 ret = kvmppc_get_books_sregs(cpu); 1253 if (ret < 0) { 1254 return ret; 1255 } 1256 } 1257 1258 if (cap_hior) { 1259 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR); 1260 } 1261 1262 if (cap_one_reg) { 1263 int i; 1264 1265 /* We deliberately ignore errors here, for kernels which have 1266 * the ONE_REG calls, but don't support the specific 1267 * registers, there's a reasonable chance things will still 1268 * work, at least until we try to migrate. */ 1269 for (i = 0; i < 1024; i++) { 1270 uint64_t id = env->spr_cb[i].one_reg_id; 1271 1272 if (id != 0) { 1273 kvm_get_one_spr(cs, id, i); 1274 } 1275 } 1276 1277 #ifdef TARGET_PPC64 1278 if (msr_ts) { 1279 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { 1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); 1281 } 1282 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) { 1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]); 1284 } 1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr); 1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr); 1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr); 1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr); 1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr); 1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr); 1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave); 1292 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr); 1293 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr); 1294 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar); 1295 } 1296 1297 if (cap_papr) { 1298 if (kvm_get_vpa(cs) < 0) { 1299 DPRINTF("Warning: Unable to get VPA information from KVM\n"); 1300 } 1301 } 1302 1303 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); 1304 #endif 1305 } 1306 1307 return 0; 1308 } 1309 1310 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) 1311 { 1312 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET; 1313 1314 if (irq != PPC_INTERRUPT_EXT) { 1315 return 0; 1316 } 1317 1318 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { 1319 return 0; 1320 } 1321 1322 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq); 1323 1324 return 0; 1325 } 1326 1327 #if defined(TARGET_PPC64) 1328 #define PPC_INPUT_INT PPC970_INPUT_INT 1329 #else 1330 #define PPC_INPUT_INT PPC6xx_INPUT_INT 1331 #endif 1332 1333 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 1334 { 1335 PowerPCCPU *cpu = POWERPC_CPU(cs); 1336 CPUPPCState *env = &cpu->env; 1337 int r; 1338 unsigned irq; 1339 1340 qemu_mutex_lock_iothread(); 1341 1342 /* PowerPC QEMU tracks the various core input pins (interrupt, critical 1343 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ 1344 if (!cap_interrupt_level && 1345 run->ready_for_interrupt_injection && 1346 (cs->interrupt_request & CPU_INTERRUPT_HARD) && 1347 (env->irq_input_state & (1<<PPC_INPUT_INT))) 1348 { 1349 /* For now KVM disregards the 'irq' argument. However, in the 1350 * future KVM could cache it in-kernel to avoid a heavyweight exit 1351 * when reading the UIC. 1352 */ 1353 irq = KVM_INTERRUPT_SET; 1354 1355 DPRINTF("injected interrupt %d\n", irq); 1356 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); 1357 if (r < 0) { 1358 printf("cpu %d fail inject %x\n", cs->cpu_index, irq); 1359 } 1360 1361 /* Always wake up soon in case the interrupt was level based */ 1362 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1363 (NANOSECONDS_PER_SECOND / 50)); 1364 } 1365 1366 /* We don't know if there are more interrupts pending after this. However, 1367 * the guest will return to userspace in the course of handling this one 1368 * anyways, so we will get a chance to deliver the rest. */ 1369 1370 qemu_mutex_unlock_iothread(); 1371 } 1372 1373 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 1374 { 1375 return MEMTXATTRS_UNSPECIFIED; 1376 } 1377 1378 int kvm_arch_process_async_events(CPUState *cs) 1379 { 1380 return cs->halted; 1381 } 1382 1383 static int kvmppc_handle_halt(PowerPCCPU *cpu) 1384 { 1385 CPUState *cs = CPU(cpu); 1386 CPUPPCState *env = &cpu->env; 1387 1388 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { 1389 cs->halted = 1; 1390 cs->exception_index = EXCP_HLT; 1391 } 1392 1393 return 0; 1394 } 1395 1396 /* map dcr access to existing qemu dcr emulation */ 1397 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) 1398 { 1399 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) 1400 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); 1401 1402 return 0; 1403 } 1404 1405 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) 1406 { 1407 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) 1408 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); 1409 1410 return 0; 1411 } 1412 1413 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1414 { 1415 /* Mixed endian case is not handled */ 1416 uint32_t sc = debug_inst_opcode; 1417 1418 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1419 sizeof(sc), 0) || 1420 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) { 1421 return -EINVAL; 1422 } 1423 1424 return 0; 1425 } 1426 1427 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1428 { 1429 uint32_t sc; 1430 1431 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) || 1432 sc != debug_inst_opcode || 1433 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1434 sizeof(sc), 1)) { 1435 return -EINVAL; 1436 } 1437 1438 return 0; 1439 } 1440 1441 static int find_hw_breakpoint(target_ulong addr, int type) 1442 { 1443 int n; 1444 1445 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1446 <= ARRAY_SIZE(hw_debug_points)); 1447 1448 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1449 if (hw_debug_points[n].addr == addr && 1450 hw_debug_points[n].type == type) { 1451 return n; 1452 } 1453 } 1454 1455 return -1; 1456 } 1457 1458 static int find_hw_watchpoint(target_ulong addr, int *flag) 1459 { 1460 int n; 1461 1462 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS); 1463 if (n >= 0) { 1464 *flag = BP_MEM_ACCESS; 1465 return n; 1466 } 1467 1468 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE); 1469 if (n >= 0) { 1470 *flag = BP_MEM_WRITE; 1471 return n; 1472 } 1473 1474 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ); 1475 if (n >= 0) { 1476 *flag = BP_MEM_READ; 1477 return n; 1478 } 1479 1480 return -1; 1481 } 1482 1483 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 1484 target_ulong len, int type) 1485 { 1486 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { 1487 return -ENOBUFS; 1488 } 1489 1490 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; 1491 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; 1492 1493 switch (type) { 1494 case GDB_BREAKPOINT_HW: 1495 if (nb_hw_breakpoint >= max_hw_breakpoint) { 1496 return -ENOBUFS; 1497 } 1498 1499 if (find_hw_breakpoint(addr, type) >= 0) { 1500 return -EEXIST; 1501 } 1502 1503 nb_hw_breakpoint++; 1504 break; 1505 1506 case GDB_WATCHPOINT_WRITE: 1507 case GDB_WATCHPOINT_READ: 1508 case GDB_WATCHPOINT_ACCESS: 1509 if (nb_hw_watchpoint >= max_hw_watchpoint) { 1510 return -ENOBUFS; 1511 } 1512 1513 if (find_hw_breakpoint(addr, type) >= 0) { 1514 return -EEXIST; 1515 } 1516 1517 nb_hw_watchpoint++; 1518 break; 1519 1520 default: 1521 return -ENOSYS; 1522 } 1523 1524 return 0; 1525 } 1526 1527 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 1528 target_ulong len, int type) 1529 { 1530 int n; 1531 1532 n = find_hw_breakpoint(addr, type); 1533 if (n < 0) { 1534 return -ENOENT; 1535 } 1536 1537 switch (type) { 1538 case GDB_BREAKPOINT_HW: 1539 nb_hw_breakpoint--; 1540 break; 1541 1542 case GDB_WATCHPOINT_WRITE: 1543 case GDB_WATCHPOINT_READ: 1544 case GDB_WATCHPOINT_ACCESS: 1545 nb_hw_watchpoint--; 1546 break; 1547 1548 default: 1549 return -ENOSYS; 1550 } 1551 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint]; 1552 1553 return 0; 1554 } 1555 1556 void kvm_arch_remove_all_hw_breakpoints(void) 1557 { 1558 nb_hw_breakpoint = nb_hw_watchpoint = 0; 1559 } 1560 1561 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 1562 { 1563 int n; 1564 1565 /* Software Breakpoint updates */ 1566 if (kvm_sw_breakpoints_active(cs)) { 1567 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 1568 } 1569 1570 assert((nb_hw_breakpoint + nb_hw_watchpoint) 1571 <= ARRAY_SIZE(hw_debug_points)); 1572 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp)); 1573 1574 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1575 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 1576 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp)); 1577 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) { 1578 switch (hw_debug_points[n].type) { 1579 case GDB_BREAKPOINT_HW: 1580 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT; 1581 break; 1582 case GDB_WATCHPOINT_WRITE: 1583 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE; 1584 break; 1585 case GDB_WATCHPOINT_READ: 1586 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ; 1587 break; 1588 case GDB_WATCHPOINT_ACCESS: 1589 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE | 1590 KVMPPC_DEBUG_WATCH_READ; 1591 break; 1592 default: 1593 cpu_abort(cs, "Unsupported breakpoint type\n"); 1594 } 1595 dbg->arch.bp[n].addr = hw_debug_points[n].addr; 1596 } 1597 } 1598 } 1599 1600 static int kvm_handle_hw_breakpoint(CPUState *cs, 1601 struct kvm_debug_exit_arch *arch_info) 1602 { 1603 int handle = 0; 1604 int n; 1605 int flag = 0; 1606 1607 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { 1608 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { 1609 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); 1610 if (n >= 0) { 1611 handle = 1; 1612 } 1613 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | 1614 KVMPPC_DEBUG_WATCH_WRITE)) { 1615 n = find_hw_watchpoint(arch_info->address, &flag); 1616 if (n >= 0) { 1617 handle = 1; 1618 cs->watchpoint_hit = &hw_watchpoint; 1619 hw_watchpoint.vaddr = hw_debug_points[n].addr; 1620 hw_watchpoint.flags = flag; 1621 } 1622 } 1623 } 1624 return handle; 1625 } 1626 1627 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) 1628 { 1629 CPUState *cs = CPU(cpu); 1630 CPUPPCState *env = &cpu->env; 1631 struct kvm_debug_exit_arch *arch_info = &run->debug.arch; 1632 int handle = 0; 1633 1634 if (cs->singlestep_enabled) { 1635 handle = 1; 1636 } else if (arch_info->status) { 1637 handle = kvm_handle_hw_breakpoint(cs, arch_info); 1638 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { 1639 handle = 1; 1640 } else { 1641 /* QEMU is not able to handle debug exception, so inject 1642 * program exception to guest; 1643 * Yes program exception NOT debug exception !! 1644 * When QEMU is using debug resources then debug exception must 1645 * be always set. To achieve this we set MSR_DE and also set 1646 * MSRP_DEP so guest cannot change MSR_DE. 1647 * When emulating debug resource for guest we want guest 1648 * to control MSR_DE (enable/disable debug interrupt on need). 1649 * Supporting both configurations are NOT possible. 1650 * So the result is that we cannot share debug resources 1651 * between QEMU and Guest on BOOKE architecture. 1652 * In the current design QEMU gets the priority over guest, 1653 * this means that if QEMU is using debug resources then guest 1654 * cannot use them; 1655 * For software breakpoint QEMU uses a privileged instruction; 1656 * So there cannot be any reason that we are here for guest 1657 * set debug exception, only possibility is guest executed a 1658 * privileged / illegal instruction and that's why we are 1659 * injecting a program interrupt. 1660 */ 1661 1662 cpu_synchronize_state(cs); 1663 /* env->nip is PC, so increment this by 4 to use 1664 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. 1665 */ 1666 env->nip += 4; 1667 cs->exception_index = POWERPC_EXCP_PROGRAM; 1668 env->error_code = POWERPC_EXCP_INVAL; 1669 ppc_cpu_do_interrupt(cs); 1670 } 1671 1672 return handle; 1673 } 1674 1675 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 1676 { 1677 PowerPCCPU *cpu = POWERPC_CPU(cs); 1678 CPUPPCState *env = &cpu->env; 1679 int ret; 1680 1681 qemu_mutex_lock_iothread(); 1682 1683 switch (run->exit_reason) { 1684 case KVM_EXIT_DCR: 1685 if (run->dcr.is_write) { 1686 DPRINTF("handle dcr write\n"); 1687 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); 1688 } else { 1689 DPRINTF("handle dcr read\n"); 1690 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); 1691 } 1692 break; 1693 case KVM_EXIT_HLT: 1694 DPRINTF("handle halt\n"); 1695 ret = kvmppc_handle_halt(cpu); 1696 break; 1697 #if defined(TARGET_PPC64) 1698 case KVM_EXIT_PAPR_HCALL: 1699 DPRINTF("handle PAPR hypercall\n"); 1700 run->papr_hcall.ret = spapr_hypercall(cpu, 1701 run->papr_hcall.nr, 1702 run->papr_hcall.args); 1703 ret = 0; 1704 break; 1705 #endif 1706 case KVM_EXIT_EPR: 1707 DPRINTF("handle epr\n"); 1708 run->epr.epr = ldl_phys(cs->as, env->mpic_iack); 1709 ret = 0; 1710 break; 1711 case KVM_EXIT_WATCHDOG: 1712 DPRINTF("handle watchdog expiry\n"); 1713 watchdog_perform_action(); 1714 ret = 0; 1715 break; 1716 1717 case KVM_EXIT_DEBUG: 1718 DPRINTF("handle debug exception\n"); 1719 if (kvm_handle_debug(cpu, run)) { 1720 ret = EXCP_DEBUG; 1721 break; 1722 } 1723 /* re-enter, this exception was guest-internal */ 1724 ret = 0; 1725 break; 1726 1727 default: 1728 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); 1729 ret = -1; 1730 break; 1731 } 1732 1733 qemu_mutex_unlock_iothread(); 1734 return ret; 1735 } 1736 1737 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1738 { 1739 CPUState *cs = CPU(cpu); 1740 uint32_t bits = tsr_bits; 1741 struct kvm_one_reg reg = { 1742 .id = KVM_REG_PPC_OR_TSR, 1743 .addr = (uintptr_t) &bits, 1744 }; 1745 1746 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1747 } 1748 1749 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) 1750 { 1751 1752 CPUState *cs = CPU(cpu); 1753 uint32_t bits = tsr_bits; 1754 struct kvm_one_reg reg = { 1755 .id = KVM_REG_PPC_CLEAR_TSR, 1756 .addr = (uintptr_t) &bits, 1757 }; 1758 1759 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1760 } 1761 1762 int kvmppc_set_tcr(PowerPCCPU *cpu) 1763 { 1764 CPUState *cs = CPU(cpu); 1765 CPUPPCState *env = &cpu->env; 1766 uint32_t tcr = env->spr[SPR_BOOKE_TCR]; 1767 1768 struct kvm_one_reg reg = { 1769 .id = KVM_REG_PPC_TCR, 1770 .addr = (uintptr_t) &tcr, 1771 }; 1772 1773 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1774 } 1775 1776 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu) 1777 { 1778 CPUState *cs = CPU(cpu); 1779 int ret; 1780 1781 if (!kvm_enabled()) { 1782 return -1; 1783 } 1784 1785 if (!cap_ppc_watchdog) { 1786 printf("warning: KVM does not support watchdog"); 1787 return -1; 1788 } 1789 1790 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0); 1791 if (ret < 0) { 1792 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n", 1793 __func__, strerror(-ret)); 1794 return ret; 1795 } 1796 1797 return ret; 1798 } 1799 1800 static int read_cpuinfo(const char *field, char *value, int len) 1801 { 1802 FILE *f; 1803 int ret = -1; 1804 int field_len = strlen(field); 1805 char line[512]; 1806 1807 f = fopen("/proc/cpuinfo", "r"); 1808 if (!f) { 1809 return -1; 1810 } 1811 1812 do { 1813 if (!fgets(line, sizeof(line), f)) { 1814 break; 1815 } 1816 if (!strncmp(line, field, field_len)) { 1817 pstrcpy(value, len, line); 1818 ret = 0; 1819 break; 1820 } 1821 } while(*line); 1822 1823 fclose(f); 1824 1825 return ret; 1826 } 1827 1828 uint32_t kvmppc_get_tbfreq(void) 1829 { 1830 char line[512]; 1831 char *ns; 1832 uint32_t retval = NANOSECONDS_PER_SECOND; 1833 1834 if (read_cpuinfo("timebase", line, sizeof(line))) { 1835 return retval; 1836 } 1837 1838 if (!(ns = strchr(line, ':'))) { 1839 return retval; 1840 } 1841 1842 ns++; 1843 1844 return atoi(ns); 1845 } 1846 1847 bool kvmppc_get_host_serial(char **value) 1848 { 1849 return g_file_get_contents("/proc/device-tree/system-id", value, NULL, 1850 NULL); 1851 } 1852 1853 bool kvmppc_get_host_model(char **value) 1854 { 1855 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL); 1856 } 1857 1858 /* Try to find a device tree node for a CPU with clock-frequency property */ 1859 static int kvmppc_find_cpu_dt(char *buf, int buf_len) 1860 { 1861 struct dirent *dirp; 1862 DIR *dp; 1863 1864 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { 1865 printf("Can't open directory " PROC_DEVTREE_CPU "\n"); 1866 return -1; 1867 } 1868 1869 buf[0] = '\0'; 1870 while ((dirp = readdir(dp)) != NULL) { 1871 FILE *f; 1872 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, 1873 dirp->d_name); 1874 f = fopen(buf, "r"); 1875 if (f) { 1876 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name); 1877 fclose(f); 1878 break; 1879 } 1880 buf[0] = '\0'; 1881 } 1882 closedir(dp); 1883 if (buf[0] == '\0') { 1884 printf("Unknown host!\n"); 1885 return -1; 1886 } 1887 1888 return 0; 1889 } 1890 1891 static uint64_t kvmppc_read_int_dt(const char *filename) 1892 { 1893 union { 1894 uint32_t v32; 1895 uint64_t v64; 1896 } u; 1897 FILE *f; 1898 int len; 1899 1900 f = fopen(filename, "rb"); 1901 if (!f) { 1902 return -1; 1903 } 1904 1905 len = fread(&u, 1, sizeof(u), f); 1906 fclose(f); 1907 switch (len) { 1908 case 4: 1909 /* property is a 32-bit quantity */ 1910 return be32_to_cpu(u.v32); 1911 case 8: 1912 return be64_to_cpu(u.v64); 1913 } 1914 1915 return 0; 1916 } 1917 1918 /* Read a CPU node property from the host device tree that's a single 1919 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong 1920 * (can't find or open the property, or doesn't understand the 1921 * format) */ 1922 static uint64_t kvmppc_read_int_cpu_dt(const char *propname) 1923 { 1924 char buf[PATH_MAX], *tmp; 1925 uint64_t val; 1926 1927 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) { 1928 return -1; 1929 } 1930 1931 tmp = g_strdup_printf("%s/%s", buf, propname); 1932 val = kvmppc_read_int_dt(tmp); 1933 g_free(tmp); 1934 1935 return val; 1936 } 1937 1938 uint64_t kvmppc_get_clockfreq(void) 1939 { 1940 return kvmppc_read_int_cpu_dt("clock-frequency"); 1941 } 1942 1943 static int kvmppc_get_dec_bits(void) 1944 { 1945 int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits"); 1946 1947 if (nr_bits > 0) { 1948 return nr_bits; 1949 } 1950 return 0; 1951 } 1952 1953 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) 1954 { 1955 PowerPCCPU *cpu = ppc_env_get_cpu(env); 1956 CPUState *cs = CPU(cpu); 1957 1958 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && 1959 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { 1960 return 0; 1961 } 1962 1963 return 1; 1964 } 1965 1966 int kvmppc_get_hasidle(CPUPPCState *env) 1967 { 1968 struct kvm_ppc_pvinfo pvinfo; 1969 1970 if (!kvmppc_get_pvinfo(env, &pvinfo) && 1971 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) { 1972 return 1; 1973 } 1974 1975 return 0; 1976 } 1977 1978 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) 1979 { 1980 uint32_t *hc = (uint32_t*)buf; 1981 struct kvm_ppc_pvinfo pvinfo; 1982 1983 if (!kvmppc_get_pvinfo(env, &pvinfo)) { 1984 memcpy(buf, pvinfo.hcall, buf_len); 1985 return 0; 1986 } 1987 1988 /* 1989 * Fallback to always fail hypercalls regardless of endianness: 1990 * 1991 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian) 1992 * li r3, -1 1993 * b .+8 (becomes nop in wrong endian) 1994 * bswap32(li r3, -1) 1995 */ 1996 1997 hc[0] = cpu_to_be32(0x08000048); 1998 hc[1] = cpu_to_be32(0x3860ffff); 1999 hc[2] = cpu_to_be32(0x48000008); 2000 hc[3] = cpu_to_be32(bswap32(0x3860ffff)); 2001 2002 return 1; 2003 } 2004 2005 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall) 2006 { 2007 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1); 2008 } 2009 2010 void kvmppc_enable_logical_ci_hcalls(void) 2011 { 2012 /* 2013 * FIXME: it would be nice if we could detect the cases where 2014 * we're using a device which requires the in kernel 2015 * implementation of these hcalls, but the kernel lacks them and 2016 * produce a warning. 2017 */ 2018 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD); 2019 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE); 2020 } 2021 2022 void kvmppc_enable_set_mode_hcall(void) 2023 { 2024 kvmppc_enable_hcall(kvm_state, H_SET_MODE); 2025 } 2026 2027 void kvmppc_enable_clear_ref_mod_hcalls(void) 2028 { 2029 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF); 2030 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); 2031 } 2032 2033 void kvmppc_set_papr(PowerPCCPU *cpu) 2034 { 2035 CPUState *cs = CPU(cpu); 2036 int ret; 2037 2038 if (!kvm_enabled()) { 2039 return; 2040 } 2041 2042 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0); 2043 if (ret) { 2044 error_report("This vCPU type or KVM version does not support PAPR"); 2045 exit(1); 2046 } 2047 2048 /* Update the capability flag so we sync the right information 2049 * with kvm */ 2050 cap_papr = 1; 2051 } 2052 2053 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) 2054 { 2055 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr); 2056 } 2057 2058 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) 2059 { 2060 CPUState *cs = CPU(cpu); 2061 int ret; 2062 2063 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy); 2064 if (ret && mpic_proxy) { 2065 error_report("This KVM version does not support EPR"); 2066 exit(1); 2067 } 2068 } 2069 2070 int kvmppc_smt_threads(void) 2071 { 2072 return cap_ppc_smt ? cap_ppc_smt : 1; 2073 } 2074 2075 int kvmppc_set_smt_threads(int smt) 2076 { 2077 int ret; 2078 2079 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0); 2080 if (!ret) { 2081 cap_ppc_smt = smt; 2082 } 2083 return ret; 2084 } 2085 2086 void kvmppc_hint_smt_possible(Error **errp) 2087 { 2088 int i; 2089 GString *g; 2090 char *s; 2091 2092 assert(kvm_enabled()); 2093 if (cap_ppc_smt_possible) { 2094 g = g_string_new("Available VSMT modes:"); 2095 for (i = 63; i >= 0; i--) { 2096 if ((1UL << i) & cap_ppc_smt_possible) { 2097 g_string_append_printf(g, " %lu", (1UL << i)); 2098 } 2099 } 2100 s = g_string_free(g, false); 2101 error_append_hint(errp, "%s.\n", s); 2102 g_free(s); 2103 } else { 2104 error_append_hint(errp, 2105 "This KVM seems to be too old to support VSMT.\n"); 2106 } 2107 } 2108 2109 2110 #ifdef TARGET_PPC64 2111 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) 2112 { 2113 struct kvm_ppc_smmu_info info; 2114 long rampagesize, best_page_shift; 2115 int i; 2116 2117 /* Find the largest hardware supported page size that's less than 2118 * or equal to the (logical) backing page size of guest RAM */ 2119 kvm_get_smmu_info(&info, &error_fatal); 2120 rampagesize = qemu_getrampagesize(); 2121 best_page_shift = 0; 2122 2123 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { 2124 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i]; 2125 2126 if (!sps->page_shift) { 2127 continue; 2128 } 2129 2130 if ((sps->page_shift > best_page_shift) 2131 && ((1UL << sps->page_shift) <= rampagesize)) { 2132 best_page_shift = sps->page_shift; 2133 } 2134 } 2135 2136 return MIN(current_size, 2137 1ULL << (best_page_shift + hash_shift - 7)); 2138 } 2139 #endif 2140 2141 bool kvmppc_spapr_use_multitce(void) 2142 { 2143 return cap_spapr_multitce; 2144 } 2145 2146 int kvmppc_spapr_enable_inkernel_multitce(void) 2147 { 2148 int ret; 2149 2150 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2151 H_PUT_TCE_INDIRECT, 1); 2152 if (!ret) { 2153 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0, 2154 H_STUFF_TCE, 1); 2155 } 2156 2157 return ret; 2158 } 2159 2160 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, 2161 uint64_t bus_offset, uint32_t nb_table, 2162 int *pfd, bool need_vfio) 2163 { 2164 long len; 2165 int fd; 2166 void *table; 2167 2168 /* Must set fd to -1 so we don't try to munmap when called for 2169 * destroying the table, which the upper layers -will- do 2170 */ 2171 *pfd = -1; 2172 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) { 2173 return NULL; 2174 } 2175 2176 if (cap_spapr_tce_64) { 2177 struct kvm_create_spapr_tce_64 args = { 2178 .liobn = liobn, 2179 .page_shift = page_shift, 2180 .offset = bus_offset >> page_shift, 2181 .size = nb_table, 2182 .flags = 0 2183 }; 2184 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); 2185 if (fd < 0) { 2186 fprintf(stderr, 2187 "KVM: Failed to create TCE64 table for liobn 0x%x\n", 2188 liobn); 2189 return NULL; 2190 } 2191 } else if (cap_spapr_tce) { 2192 uint64_t window_size = (uint64_t) nb_table << page_shift; 2193 struct kvm_create_spapr_tce args = { 2194 .liobn = liobn, 2195 .window_size = window_size, 2196 }; 2197 if ((window_size != args.window_size) || bus_offset) { 2198 return NULL; 2199 } 2200 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); 2201 if (fd < 0) { 2202 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n", 2203 liobn); 2204 return NULL; 2205 } 2206 } else { 2207 return NULL; 2208 } 2209 2210 len = nb_table * sizeof(uint64_t); 2211 /* FIXME: round this up to page size */ 2212 2213 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 2214 if (table == MAP_FAILED) { 2215 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", 2216 liobn); 2217 close(fd); 2218 return NULL; 2219 } 2220 2221 *pfd = fd; 2222 return table; 2223 } 2224 2225 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table) 2226 { 2227 long len; 2228 2229 if (fd < 0) { 2230 return -1; 2231 } 2232 2233 len = nb_table * sizeof(uint64_t); 2234 if ((munmap(table, len) < 0) || 2235 (close(fd) < 0)) { 2236 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s", 2237 strerror(errno)); 2238 /* Leak the table */ 2239 } 2240 2241 return 0; 2242 } 2243 2244 int kvmppc_reset_htab(int shift_hint) 2245 { 2246 uint32_t shift = shift_hint; 2247 2248 if (!kvm_enabled()) { 2249 /* Full emulation, tell caller to allocate htab itself */ 2250 return 0; 2251 } 2252 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) { 2253 int ret; 2254 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); 2255 if (ret == -ENOTTY) { 2256 /* At least some versions of PR KVM advertise the 2257 * capability, but don't implement the ioctl(). Oops. 2258 * Return 0 so that we allocate the htab in qemu, as is 2259 * correct for PR. */ 2260 return 0; 2261 } else if (ret < 0) { 2262 return ret; 2263 } 2264 return shift; 2265 } 2266 2267 /* We have a kernel that predates the htab reset calls. For PR 2268 * KVM, we need to allocate the htab ourselves, for an HV KVM of 2269 * this era, it has allocated a 16MB fixed size hash table already. */ 2270 if (kvmppc_is_pr(kvm_state)) { 2271 /* PR - tell caller to allocate htab */ 2272 return 0; 2273 } else { 2274 /* HV - assume 16MB kernel allocated htab */ 2275 return 24; 2276 } 2277 } 2278 2279 static inline uint32_t mfpvr(void) 2280 { 2281 uint32_t pvr; 2282 2283 asm ("mfpvr %0" 2284 : "=r"(pvr)); 2285 return pvr; 2286 } 2287 2288 static void alter_insns(uint64_t *word, uint64_t flags, bool on) 2289 { 2290 if (on) { 2291 *word |= flags; 2292 } else { 2293 *word &= ~flags; 2294 } 2295 } 2296 2297 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) 2298 { 2299 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); 2300 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size"); 2301 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size"); 2302 2303 /* Now fix up the class with information we can query from the host */ 2304 pcc->pvr = mfpvr(); 2305 2306 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, 2307 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); 2308 alter_insns(&pcc->insns_flags2, PPC2_VSX, 2309 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX); 2310 alter_insns(&pcc->insns_flags2, PPC2_DFP, 2311 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP); 2312 2313 if (dcache_size != -1) { 2314 pcc->l1_dcache_size = dcache_size; 2315 } 2316 2317 if (icache_size != -1) { 2318 pcc->l1_icache_size = icache_size; 2319 } 2320 2321 #if defined(TARGET_PPC64) 2322 pcc->radix_page_info = kvm_get_radix_page_info(); 2323 2324 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { 2325 /* 2326 * POWER9 DD1 has some bugs which make it not really ISA 3.00 2327 * compliant. More importantly, advertising ISA 3.00 2328 * architected mode may prevent guests from activating 2329 * necessary DD1 workarounds. 2330 */ 2331 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 2332 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); 2333 } 2334 #endif /* defined(TARGET_PPC64) */ 2335 } 2336 2337 bool kvmppc_has_cap_epr(void) 2338 { 2339 return cap_epr; 2340 } 2341 2342 bool kvmppc_has_cap_fixup_hcalls(void) 2343 { 2344 return cap_fixup_hcalls; 2345 } 2346 2347 bool kvmppc_has_cap_htm(void) 2348 { 2349 return cap_htm; 2350 } 2351 2352 bool kvmppc_has_cap_mmu_radix(void) 2353 { 2354 return cap_mmu_radix; 2355 } 2356 2357 bool kvmppc_has_cap_mmu_hash_v3(void) 2358 { 2359 return cap_mmu_hash_v3; 2360 } 2361 2362 static bool kvmppc_power8_host(void) 2363 { 2364 bool ret = false; 2365 #ifdef TARGET_PPC64 2366 { 2367 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr(); 2368 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) || 2369 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) || 2370 (base_pvr == CPU_POWERPC_POWER8_BASE); 2371 } 2372 #endif /* TARGET_PPC64 */ 2373 return ret; 2374 } 2375 2376 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c) 2377 { 2378 bool l1d_thread_priv_req = !kvmppc_power8_host(); 2379 2380 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) { 2381 return 2; 2382 } else if ((!l1d_thread_priv_req || 2383 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) && 2384 (c.character & c.character_mask 2385 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) { 2386 return 1; 2387 } 2388 2389 return 0; 2390 } 2391 2392 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) 2393 { 2394 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) { 2395 return 2; 2396 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) { 2397 return 1; 2398 } 2399 2400 return 0; 2401 } 2402 2403 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) 2404 { 2405 if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) && 2406 (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) && 2407 (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) { 2408 return SPAPR_CAP_FIXED_NA; 2409 } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) { 2410 return SPAPR_CAP_WORKAROUND; 2411 } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { 2412 return SPAPR_CAP_FIXED_CCD; 2413 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { 2414 return SPAPR_CAP_FIXED_IBS; 2415 } 2416 2417 return 0; 2418 } 2419 2420 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c) 2421 { 2422 if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) { 2423 return 1; 2424 } 2425 return 0; 2426 } 2427 2428 static void kvmppc_get_cpu_characteristics(KVMState *s) 2429 { 2430 struct kvm_ppc_cpu_char c; 2431 int ret; 2432 2433 /* Assume broken */ 2434 cap_ppc_safe_cache = 0; 2435 cap_ppc_safe_bounds_check = 0; 2436 cap_ppc_safe_indirect_branch = 0; 2437 2438 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR); 2439 if (!ret) { 2440 return; 2441 } 2442 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c); 2443 if (ret < 0) { 2444 return; 2445 } 2446 2447 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); 2448 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); 2449 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); 2450 cap_ppc_count_cache_flush_assist = 2451 parse_cap_ppc_count_cache_flush_assist(c); 2452 } 2453 2454 int kvmppc_get_cap_safe_cache(void) 2455 { 2456 return cap_ppc_safe_cache; 2457 } 2458 2459 int kvmppc_get_cap_safe_bounds_check(void) 2460 { 2461 return cap_ppc_safe_bounds_check; 2462 } 2463 2464 int kvmppc_get_cap_safe_indirect_branch(void) 2465 { 2466 return cap_ppc_safe_indirect_branch; 2467 } 2468 2469 int kvmppc_get_cap_count_cache_flush_assist(void) 2470 { 2471 return cap_ppc_count_cache_flush_assist; 2472 } 2473 2474 bool kvmppc_has_cap_nested_kvm_hv(void) 2475 { 2476 return !!cap_ppc_nested_kvm_hv; 2477 } 2478 2479 int kvmppc_set_cap_nested_kvm_hv(int enable) 2480 { 2481 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable); 2482 } 2483 2484 bool kvmppc_has_cap_spapr_vfio(void) 2485 { 2486 return cap_spapr_vfio; 2487 } 2488 2489 int kvmppc_get_cap_large_decr(void) 2490 { 2491 return cap_large_decr; 2492 } 2493 2494 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) 2495 { 2496 CPUState *cs = CPU(cpu); 2497 uint64_t lpcr; 2498 2499 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2500 /* Do we need to modify the LPCR? */ 2501 if (!!(lpcr & LPCR_LD) != !!enable) { 2502 if (enable) { 2503 lpcr |= LPCR_LD; 2504 } else { 2505 lpcr &= ~LPCR_LD; 2506 } 2507 kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2508 kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); 2509 2510 if (!!(lpcr & LPCR_LD) != !!enable) { 2511 return -1; 2512 } 2513 } 2514 2515 return 0; 2516 } 2517 2518 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) 2519 { 2520 uint32_t host_pvr = mfpvr(); 2521 PowerPCCPUClass *pvr_pcc; 2522 2523 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr); 2524 if (pvr_pcc == NULL) { 2525 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr); 2526 } 2527 2528 return pvr_pcc; 2529 } 2530 2531 static int kvm_ppc_register_host_cpu_type(MachineState *ms) 2532 { 2533 TypeInfo type_info = { 2534 .name = TYPE_HOST_POWERPC_CPU, 2535 .class_init = kvmppc_host_cpu_class_init, 2536 }; 2537 MachineClass *mc = MACHINE_GET_CLASS(ms); 2538 PowerPCCPUClass *pvr_pcc; 2539 ObjectClass *oc; 2540 DeviceClass *dc; 2541 int i; 2542 2543 pvr_pcc = kvm_ppc_get_host_cpu_class(); 2544 if (pvr_pcc == NULL) { 2545 return -1; 2546 } 2547 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); 2548 type_register(&type_info); 2549 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { 2550 /* override TCG default cpu type with 'host' cpu model */ 2551 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; 2552 } 2553 2554 oc = object_class_by_name(type_info.name); 2555 g_assert(oc); 2556 2557 /* 2558 * Update generic CPU family class alias (e.g. on a POWER8NVL host, 2559 * we want "POWER8" to be a "family" alias that points to the current 2560 * host CPU type, too) 2561 */ 2562 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc)); 2563 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { 2564 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) { 2565 char *suffix; 2566 2567 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc)); 2568 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX); 2569 if (suffix) { 2570 *suffix = 0; 2571 } 2572 break; 2573 } 2574 } 2575 2576 return 0; 2577 } 2578 2579 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) 2580 { 2581 struct kvm_rtas_token_args args = { 2582 .token = token, 2583 }; 2584 2585 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) { 2586 return -ENOENT; 2587 } 2588 2589 strncpy(args.name, function, sizeof(args.name)); 2590 2591 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); 2592 } 2593 2594 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) 2595 { 2596 struct kvm_get_htab_fd s = { 2597 .flags = write ? KVM_GET_HTAB_WRITE : 0, 2598 .start_index = index, 2599 }; 2600 int ret; 2601 2602 if (!cap_htab_fd) { 2603 error_setg(errp, "KVM version doesn't support %s the HPT", 2604 write ? "writing" : "reading"); 2605 return -ENOTSUP; 2606 } 2607 2608 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s); 2609 if (ret < 0) { 2610 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s", 2611 write ? "writing" : "reading", write ? "to" : "from", 2612 strerror(errno)); 2613 return -errno; 2614 } 2615 2616 return ret; 2617 } 2618 2619 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) 2620 { 2621 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); 2622 uint8_t buf[bufsize]; 2623 ssize_t rc; 2624 2625 do { 2626 rc = read(fd, buf, bufsize); 2627 if (rc < 0) { 2628 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n", 2629 strerror(errno)); 2630 return rc; 2631 } else if (rc) { 2632 uint8_t *buffer = buf; 2633 ssize_t n = rc; 2634 while (n) { 2635 struct kvm_get_htab_header *head = 2636 (struct kvm_get_htab_header *) buffer; 2637 size_t chunksize = sizeof(*head) + 2638 HASH_PTE_SIZE_64 * head->n_valid; 2639 2640 qemu_put_be32(f, head->index); 2641 qemu_put_be16(f, head->n_valid); 2642 qemu_put_be16(f, head->n_invalid); 2643 qemu_put_buffer(f, (void *)(head + 1), 2644 HASH_PTE_SIZE_64 * head->n_valid); 2645 2646 buffer += chunksize; 2647 n -= chunksize; 2648 } 2649 } 2650 } while ((rc != 0) 2651 && ((max_ns < 0) 2652 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); 2653 2654 return (rc == 0) ? 1 : 0; 2655 } 2656 2657 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, 2658 uint16_t n_valid, uint16_t n_invalid) 2659 { 2660 struct kvm_get_htab_header *buf; 2661 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; 2662 ssize_t rc; 2663 2664 buf = alloca(chunksize); 2665 buf->index = index; 2666 buf->n_valid = n_valid; 2667 buf->n_invalid = n_invalid; 2668 2669 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); 2670 2671 rc = write(fd, buf, chunksize); 2672 if (rc < 0) { 2673 fprintf(stderr, "Error writing KVM hash table: %s\n", 2674 strerror(errno)); 2675 return rc; 2676 } 2677 if (rc != chunksize) { 2678 /* We should never get a short write on a single chunk */ 2679 fprintf(stderr, "Short write, restoring KVM hash table\n"); 2680 return -1; 2681 } 2682 return 0; 2683 } 2684 2685 bool kvm_arch_stop_on_emulation_error(CPUState *cpu) 2686 { 2687 return true; 2688 } 2689 2690 void kvm_arch_init_irq_routing(KVMState *s) 2691 { 2692 } 2693 2694 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) 2695 { 2696 int fd, rc; 2697 int i; 2698 2699 fd = kvmppc_get_htab_fd(false, ptex, &error_abort); 2700 2701 i = 0; 2702 while (i < n) { 2703 struct kvm_get_htab_header *hdr; 2704 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; 2705 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; 2706 2707 rc = read(fd, buf, sizeof(buf)); 2708 if (rc < 0) { 2709 hw_error("kvmppc_read_hptes: Unable to read HPTEs"); 2710 } 2711 2712 hdr = (struct kvm_get_htab_header *)buf; 2713 while ((i < n) && ((char *)hdr < (buf + rc))) { 2714 int invalid = hdr->n_invalid, valid = hdr->n_valid; 2715 2716 if (hdr->index != (ptex + i)) { 2717 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32 2718 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i); 2719 } 2720 2721 if (n - i < valid) { 2722 valid = n - i; 2723 } 2724 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid); 2725 i += valid; 2726 2727 if ((n - i) < invalid) { 2728 invalid = n - i; 2729 } 2730 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64); 2731 i += invalid; 2732 2733 hdr = (struct kvm_get_htab_header *) 2734 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid); 2735 } 2736 } 2737 2738 close(fd); 2739 } 2740 2741 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) 2742 { 2743 int fd, rc; 2744 struct { 2745 struct kvm_get_htab_header hdr; 2746 uint64_t pte0; 2747 uint64_t pte1; 2748 } buf; 2749 2750 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort); 2751 2752 buf.hdr.n_valid = 1; 2753 buf.hdr.n_invalid = 0; 2754 buf.hdr.index = ptex; 2755 buf.pte0 = cpu_to_be64(pte0); 2756 buf.pte1 = cpu_to_be64(pte1); 2757 2758 rc = write(fd, &buf, sizeof(buf)); 2759 if (rc != sizeof(buf)) { 2760 hw_error("kvmppc_write_hpte: Unable to update KVM HPT"); 2761 } 2762 close(fd); 2763 } 2764 2765 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 2766 uint64_t address, uint32_t data, PCIDevice *dev) 2767 { 2768 return 0; 2769 } 2770 2771 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 2772 int vector, PCIDevice *dev) 2773 { 2774 return 0; 2775 } 2776 2777 int kvm_arch_release_virq_post(int virq) 2778 { 2779 return 0; 2780 } 2781 2782 int kvm_arch_msi_data_to_gsi(uint32_t data) 2783 { 2784 return data & 0xffff; 2785 } 2786 2787 int kvmppc_enable_hwrng(void) 2788 { 2789 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { 2790 return -1; 2791 } 2792 2793 return kvmppc_enable_hcall(kvm_state, H_RANDOM); 2794 } 2795 2796 void kvmppc_check_papr_resize_hpt(Error **errp) 2797 { 2798 if (!kvm_enabled()) { 2799 return; /* No KVM, we're good */ 2800 } 2801 2802 if (cap_resize_hpt) { 2803 return; /* Kernel has explicit support, we're good */ 2804 } 2805 2806 /* Otherwise fallback on looking for PR KVM */ 2807 if (kvmppc_is_pr(kvm_state)) { 2808 return; 2809 } 2810 2811 error_setg(errp, 2812 "Hash page table resizing not available with this KVM version"); 2813 } 2814 2815 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) 2816 { 2817 CPUState *cs = CPU(cpu); 2818 struct kvm_ppc_resize_hpt rhpt = { 2819 .flags = flags, 2820 .shift = shift, 2821 }; 2822 2823 if (!cap_resize_hpt) { 2824 return -ENOSYS; 2825 } 2826 2827 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); 2828 } 2829 2830 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) 2831 { 2832 CPUState *cs = CPU(cpu); 2833 struct kvm_ppc_resize_hpt rhpt = { 2834 .flags = flags, 2835 .shift = shift, 2836 }; 2837 2838 if (!cap_resize_hpt) { 2839 return -ENOSYS; 2840 } 2841 2842 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); 2843 } 2844 2845 /* 2846 * This is a helper function to detect a post migration scenario 2847 * in which a guest, running as KVM-HV, freezes in cpu_post_load because 2848 * the guest kernel can't handle a PVR value other than the actual host 2849 * PVR in KVM_SET_SREGS, even if pvr_match() returns true. 2850 * 2851 * If we don't have cap_ppc_pvr_compat and we're not running in PR 2852 * (so, we're HV), return true. The workaround itself is done in 2853 * cpu_post_load. 2854 * 2855 * The order here is important: we'll only check for KVM PR as a 2856 * fallback if the guest kernel can't handle the situation itself. 2857 * We need to avoid as much as possible querying the running KVM type 2858 * in QEMU level. 2859 */ 2860 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) 2861 { 2862 CPUState *cs = CPU(cpu); 2863 2864 if (!kvm_enabled()) { 2865 return false; 2866 } 2867 2868 if (cap_ppc_pvr_compat) { 2869 return false; 2870 } 2871 2872 return !kvmppc_is_pr(cs->kvm_state); 2873 } 2874 2875 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) 2876 { 2877 CPUState *cs = CPU(cpu); 2878 2879 if (kvm_enabled()) { 2880 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online); 2881 } 2882 } 2883