xref: /qemu/target/ppc/kvm.c (revision 77d361b13c19fdf881bff044a5bec99108cf2da2)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (env->mmu_model == POWERPC_MMU_2_06 ||
308             env->mmu_model == POWERPC_MMU_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (env->mmu_model == POWERPC_MMU_2_06 ||
323             env->mmu_model == POWERPC_MMU_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
410 {
411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412         return true;
413     }
414 
415     return (1ul << shift) <= rampgsize;
416 }
417 
418 static long max_cpu_page_size;
419 
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
421 {
422     static struct kvm_ppc_smmu_info smmu_info;
423     static bool has_smmu_info;
424     CPUPPCState *env = &cpu->env;
425     int iq, ik, jq, jk;
426 
427     /* We only handle page sizes for 64-bit server guests for now */
428     if (!(env->mmu_model & POWERPC_MMU_64)) {
429         return;
430     }
431 
432     /* Collect MMU info from kernel if not already */
433     if (!has_smmu_info) {
434         kvm_get_smmu_info(cpu, &smmu_info);
435         has_smmu_info = true;
436     }
437 
438     if (!max_cpu_page_size) {
439         max_cpu_page_size = qemu_getrampagesize();
440     }
441 
442     /* Convert to QEMU form */
443     memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
444 
445     /* If we have HV KVM, we need to forbid CI large pages if our
446      * host page size is smaller than 64K.
447      */
448     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
449         if (getpagesize() >= 0x10000) {
450             cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
451         } else {
452             cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
453         }
454     }
455 
456     /*
457      * XXX This loop should be an entry wide AND of the capabilities that
458      *     the selected CPU has with the capabilities that KVM supports.
459      */
460     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
461         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
462         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
463 
464         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
465                                  ksps->page_shift)) {
466             continue;
467         }
468         qsps->page_shift = ksps->page_shift;
469         qsps->slb_enc = ksps->slb_enc;
470         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
471             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
472                                      ksps->enc[jk].page_shift)) {
473                 continue;
474             }
475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478                 break;
479             }
480         }
481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482             break;
483         }
484     }
485     cpu->hash64_opts->slb_size = smmu_info.slb_size;
486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487         cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
488     }
489 }
490 
491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
492 {
493     Object *mem_obj = object_resolve_path(obj_path, NULL);
494     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
495 
496     return pagesize >= max_cpu_page_size;
497 }
498 
499 #else /* defined (TARGET_PPC64) */
500 
501 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
502 {
503 }
504 
505 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
506 {
507     return true;
508 }
509 
510 #endif /* !defined (TARGET_PPC64) */
511 
512 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
513 {
514     return POWERPC_CPU(cpu)->vcpu_id;
515 }
516 
517 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
518  * book3s supports only 1 watchpoint, so array size
519  * of 4 is sufficient for now.
520  */
521 #define MAX_HW_BKPTS 4
522 
523 static struct HWBreakpoint {
524     target_ulong addr;
525     int type;
526 } hw_debug_points[MAX_HW_BKPTS];
527 
528 static CPUWatchpoint hw_watchpoint;
529 
530 /* Default there is no breakpoint and watchpoint supported */
531 static int max_hw_breakpoint;
532 static int max_hw_watchpoint;
533 static int nb_hw_breakpoint;
534 static int nb_hw_watchpoint;
535 
536 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
537 {
538     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
539         max_hw_breakpoint = 2;
540         max_hw_watchpoint = 2;
541     }
542 
543     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
544         fprintf(stderr, "Error initializing h/w breakpoints\n");
545         return;
546     }
547 }
548 
549 int kvm_arch_init_vcpu(CPUState *cs)
550 {
551     PowerPCCPU *cpu = POWERPC_CPU(cs);
552     CPUPPCState *cenv = &cpu->env;
553     int ret;
554 
555     /* Gather server mmu info from KVM and update the CPU state */
556     kvm_fixup_page_sizes(cpu);
557 
558     /* Synchronize sregs with kvm */
559     ret = kvm_arch_sync_sregs(cpu);
560     if (ret) {
561         if (ret == -EINVAL) {
562             error_report("Register sync failed... If you're using kvm-hv.ko,"
563                          " only \"-cpu host\" is possible");
564         }
565         return ret;
566     }
567 
568     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
569 
570     switch (cenv->mmu_model) {
571     case POWERPC_MMU_BOOKE206:
572         /* This target supports access to KVM's guest TLB */
573         ret = kvm_booke206_tlb_init(cpu);
574         break;
575     case POWERPC_MMU_2_07:
576         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
577             /* KVM-HV has transactional memory on POWER8 also without the
578              * KVM_CAP_PPC_HTM extension, so enable it here instead as
579              * long as it's availble to userspace on the host. */
580             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
581                 cap_htm = true;
582             }
583         }
584         break;
585     default:
586         break;
587     }
588 
589     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590     kvmppc_hw_debug_points_init(cenv);
591 
592     return ret;
593 }
594 
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
596 {
597     CPUPPCState *env = &cpu->env;
598     CPUState *cs = CPU(cpu);
599     struct kvm_dirty_tlb dirty_tlb;
600     unsigned char *bitmap;
601     int ret;
602 
603     if (!env->kvm_sw_tlb) {
604         return;
605     }
606 
607     bitmap = g_malloc((env->nb_tlb + 7) / 8);
608     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
609 
610     dirty_tlb.bitmap = (uintptr_t)bitmap;
611     dirty_tlb.num_dirty = env->nb_tlb;
612 
613     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614     if (ret) {
615         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616                 __func__, strerror(-ret));
617     }
618 
619     g_free(bitmap);
620 }
621 
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
623 {
624     PowerPCCPU *cpu = POWERPC_CPU(cs);
625     CPUPPCState *env = &cpu->env;
626     union {
627         uint32_t u32;
628         uint64_t u64;
629     } val;
630     struct kvm_one_reg reg = {
631         .id = id,
632         .addr = (uintptr_t) &val,
633     };
634     int ret;
635 
636     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637     if (ret != 0) {
638         trace_kvm_failed_spr_get(spr, strerror(errno));
639     } else {
640         switch (id & KVM_REG_SIZE_MASK) {
641         case KVM_REG_SIZE_U32:
642             env->spr[spr] = val.u32;
643             break;
644 
645         case KVM_REG_SIZE_U64:
646             env->spr[spr] = val.u64;
647             break;
648 
649         default:
650             /* Don't handle this size yet */
651             abort();
652         }
653     }
654 }
655 
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
657 {
658     PowerPCCPU *cpu = POWERPC_CPU(cs);
659     CPUPPCState *env = &cpu->env;
660     union {
661         uint32_t u32;
662         uint64_t u64;
663     } val;
664     struct kvm_one_reg reg = {
665         .id = id,
666         .addr = (uintptr_t) &val,
667     };
668     int ret;
669 
670     switch (id & KVM_REG_SIZE_MASK) {
671     case KVM_REG_SIZE_U32:
672         val.u32 = env->spr[spr];
673         break;
674 
675     case KVM_REG_SIZE_U64:
676         val.u64 = env->spr[spr];
677         break;
678 
679     default:
680         /* Don't handle this size yet */
681         abort();
682     }
683 
684     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685     if (ret != 0) {
686         trace_kvm_failed_spr_set(spr, strerror(errno));
687     }
688 }
689 
690 static int kvm_put_fp(CPUState *cs)
691 {
692     PowerPCCPU *cpu = POWERPC_CPU(cs);
693     CPUPPCState *env = &cpu->env;
694     struct kvm_one_reg reg;
695     int i;
696     int ret;
697 
698     if (env->insns_flags & PPC_FLOAT) {
699         uint64_t fpscr = env->fpscr;
700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
701 
702         reg.id = KVM_REG_PPC_FPSCR;
703         reg.addr = (uintptr_t)&fpscr;
704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705         if (ret < 0) {
706             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707             return ret;
708         }
709 
710         for (i = 0; i < 32; i++) {
711             uint64_t vsr[2];
712 
713 #ifdef HOST_WORDS_BIGENDIAN
714             vsr[0] = float64_val(env->fpr[i]);
715             vsr[1] = env->vsr[i];
716 #else
717             vsr[0] = env->vsr[i];
718             vsr[1] = float64_val(env->fpr[i]);
719 #endif
720             reg.addr = (uintptr_t) &vsr;
721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
722 
723             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724             if (ret < 0) {
725                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726                         i, strerror(errno));
727                 return ret;
728             }
729         }
730     }
731 
732     if (env->insns_flags & PPC_ALTIVEC) {
733         reg.id = KVM_REG_PPC_VSCR;
734         reg.addr = (uintptr_t)&env->vscr;
735         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736         if (ret < 0) {
737             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738             return ret;
739         }
740 
741         for (i = 0; i < 32; i++) {
742             reg.id = KVM_REG_PPC_VR(i);
743             reg.addr = (uintptr_t)&env->avr[i];
744             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745             if (ret < 0) {
746                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747                 return ret;
748             }
749         }
750     }
751 
752     return 0;
753 }
754 
755 static int kvm_get_fp(CPUState *cs)
756 {
757     PowerPCCPU *cpu = POWERPC_CPU(cs);
758     CPUPPCState *env = &cpu->env;
759     struct kvm_one_reg reg;
760     int i;
761     int ret;
762 
763     if (env->insns_flags & PPC_FLOAT) {
764         uint64_t fpscr;
765         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
766 
767         reg.id = KVM_REG_PPC_FPSCR;
768         reg.addr = (uintptr_t)&fpscr;
769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770         if (ret < 0) {
771             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772             return ret;
773         } else {
774             env->fpscr = fpscr;
775         }
776 
777         for (i = 0; i < 32; i++) {
778             uint64_t vsr[2];
779 
780             reg.addr = (uintptr_t) &vsr;
781             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
782 
783             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784             if (ret < 0) {
785                 DPRINTF("Unable to get %s%d from KVM: %s\n",
786                         vsx ? "VSR" : "FPR", i, strerror(errno));
787                 return ret;
788             } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790                 env->fpr[i] = vsr[0];
791                 if (vsx) {
792                     env->vsr[i] = vsr[1];
793                 }
794 #else
795                 env->fpr[i] = vsr[1];
796                 if (vsx) {
797                     env->vsr[i] = vsr[0];
798                 }
799 #endif
800             }
801         }
802     }
803 
804     if (env->insns_flags & PPC_ALTIVEC) {
805         reg.id = KVM_REG_PPC_VSCR;
806         reg.addr = (uintptr_t)&env->vscr;
807         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808         if (ret < 0) {
809             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810             return ret;
811         }
812 
813         for (i = 0; i < 32; i++) {
814             reg.id = KVM_REG_PPC_VR(i);
815             reg.addr = (uintptr_t)&env->avr[i];
816             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817             if (ret < 0) {
818                 DPRINTF("Unable to get VR%d from KVM: %s\n",
819                         i, strerror(errno));
820                 return ret;
821             }
822         }
823     }
824 
825     return 0;
826 }
827 
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
830 {
831     PowerPCCPU *cpu = POWERPC_CPU(cs);
832     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
833     struct kvm_one_reg reg;
834     int ret;
835 
836     reg.id = KVM_REG_PPC_VPA_ADDR;
837     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
838     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839     if (ret < 0) {
840         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841         return ret;
842     }
843 
844     assert((uintptr_t)&spapr_cpu->slb_shadow_size
845            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
846     reg.id = KVM_REG_PPC_VPA_SLB;
847     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
848     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849     if (ret < 0) {
850         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851                 strerror(errno));
852         return ret;
853     }
854 
855     assert((uintptr_t)&spapr_cpu->dtl_size
856            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
857     reg.id = KVM_REG_PPC_VPA_DTL;
858     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860     if (ret < 0) {
861         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
862                 strerror(errno));
863         return ret;
864     }
865 
866     return 0;
867 }
868 
869 static int kvm_put_vpa(CPUState *cs)
870 {
871     PowerPCCPU *cpu = POWERPC_CPU(cs);
872     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
873     struct kvm_one_reg reg;
874     int ret;
875 
876     /* SLB shadow or DTL can't be registered unless a master VPA is
877      * registered.  That means when restoring state, if a VPA *is*
878      * registered, we need to set that up first.  If not, we need to
879      * deregister the others before deregistering the master VPA */
880     assert(spapr_cpu->vpa_addr
881            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
882 
883     if (spapr_cpu->vpa_addr) {
884         reg.id = KVM_REG_PPC_VPA_ADDR;
885         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
886         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
887         if (ret < 0) {
888             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
889             return ret;
890         }
891     }
892 
893     assert((uintptr_t)&spapr_cpu->slb_shadow_size
894            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
895     reg.id = KVM_REG_PPC_VPA_SLB;
896     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
897     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
898     if (ret < 0) {
899         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
900         return ret;
901     }
902 
903     assert((uintptr_t)&spapr_cpu->dtl_size
904            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
905     reg.id = KVM_REG_PPC_VPA_DTL;
906     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
907     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
908     if (ret < 0) {
909         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
910                 strerror(errno));
911         return ret;
912     }
913 
914     if (!spapr_cpu->vpa_addr) {
915         reg.id = KVM_REG_PPC_VPA_ADDR;
916         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
917         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
918         if (ret < 0) {
919             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
920             return ret;
921         }
922     }
923 
924     return 0;
925 }
926 #endif /* TARGET_PPC64 */
927 
928 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
929 {
930     CPUPPCState *env = &cpu->env;
931     struct kvm_sregs sregs;
932     int i;
933 
934     sregs.pvr = env->spr[SPR_PVR];
935 
936     if (cpu->vhyp) {
937         PPCVirtualHypervisorClass *vhc =
938             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
939         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
940     } else {
941         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
942     }
943 
944     /* Sync SLB */
945 #ifdef TARGET_PPC64
946     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
947         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
948         if (env->slb[i].esid & SLB_ESID_V) {
949             sregs.u.s.ppc64.slb[i].slbe |= i;
950         }
951         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
952     }
953 #endif
954 
955     /* Sync SRs */
956     for (i = 0; i < 16; i++) {
957         sregs.u.s.ppc32.sr[i] = env->sr[i];
958     }
959 
960     /* Sync BATs */
961     for (i = 0; i < 8; i++) {
962         /* Beware. We have to swap upper and lower bits here */
963         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
964             | env->DBAT[1][i];
965         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
966             | env->IBAT[1][i];
967     }
968 
969     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
970 }
971 
972 int kvm_arch_put_registers(CPUState *cs, int level)
973 {
974     PowerPCCPU *cpu = POWERPC_CPU(cs);
975     CPUPPCState *env = &cpu->env;
976     struct kvm_regs regs;
977     int ret;
978     int i;
979 
980     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
981     if (ret < 0) {
982         return ret;
983     }
984 
985     regs.ctr = env->ctr;
986     regs.lr  = env->lr;
987     regs.xer = cpu_read_xer(env);
988     regs.msr = env->msr;
989     regs.pc = env->nip;
990 
991     regs.srr0 = env->spr[SPR_SRR0];
992     regs.srr1 = env->spr[SPR_SRR1];
993 
994     regs.sprg0 = env->spr[SPR_SPRG0];
995     regs.sprg1 = env->spr[SPR_SPRG1];
996     regs.sprg2 = env->spr[SPR_SPRG2];
997     regs.sprg3 = env->spr[SPR_SPRG3];
998     regs.sprg4 = env->spr[SPR_SPRG4];
999     regs.sprg5 = env->spr[SPR_SPRG5];
1000     regs.sprg6 = env->spr[SPR_SPRG6];
1001     regs.sprg7 = env->spr[SPR_SPRG7];
1002 
1003     regs.pid = env->spr[SPR_BOOKE_PID];
1004 
1005     for (i = 0;i < 32; i++)
1006         regs.gpr[i] = env->gpr[i];
1007 
1008     regs.cr = 0;
1009     for (i = 0; i < 8; i++) {
1010         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1011     }
1012 
1013     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1014     if (ret < 0)
1015         return ret;
1016 
1017     kvm_put_fp(cs);
1018 
1019     if (env->tlb_dirty) {
1020         kvm_sw_tlb_put(cpu);
1021         env->tlb_dirty = false;
1022     }
1023 
1024     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1025         ret = kvmppc_put_books_sregs(cpu);
1026         if (ret < 0) {
1027             return ret;
1028         }
1029     }
1030 
1031     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1032         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1033     }
1034 
1035     if (cap_one_reg) {
1036         int i;
1037 
1038         /* We deliberately ignore errors here, for kernels which have
1039          * the ONE_REG calls, but don't support the specific
1040          * registers, there's a reasonable chance things will still
1041          * work, at least until we try to migrate. */
1042         for (i = 0; i < 1024; i++) {
1043             uint64_t id = env->spr_cb[i].one_reg_id;
1044 
1045             if (id != 0) {
1046                 kvm_put_one_spr(cs, id, i);
1047             }
1048         }
1049 
1050 #ifdef TARGET_PPC64
1051         if (msr_ts) {
1052             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1054             }
1055             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1056                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1057             }
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1068         }
1069 
1070         if (cap_papr) {
1071             if (kvm_put_vpa(cs) < 0) {
1072                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1073             }
1074         }
1075 
1076         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1077 #endif /* TARGET_PPC64 */
1078     }
1079 
1080     return ret;
1081 }
1082 
1083 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1084 {
1085      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1086 }
1087 
1088 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1089 {
1090     CPUPPCState *env = &cpu->env;
1091     struct kvm_sregs sregs;
1092     int ret;
1093 
1094     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095     if (ret < 0) {
1096         return ret;
1097     }
1098 
1099     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1100         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1101         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1102         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1103         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1104         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1105         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1106         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1107         env->spr[SPR_DECR] = sregs.u.e.dec;
1108         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1109         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1110         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1111     }
1112 
1113     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1114         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1115         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1116         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1117         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1118         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_64) {
1122         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1126         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1127     }
1128 
1129     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1130         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1131         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1132         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1133         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1134         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1135         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1136         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1137         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1138         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1139         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1140         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1141         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1142         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1143         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1144         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1145         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1146         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1147         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1148         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1149         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1150         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1151         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1152         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1153         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1154         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1155         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1156         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1157         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1158         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1159         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1160         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1161         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1162 
1163         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1164             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1165             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1166             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1167             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1168             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1170         }
1171 
1172         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1173             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1174             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1175         }
1176 
1177         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1178             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1179             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1180             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1182         }
1183     }
1184 
1185     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1186         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1187         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1188         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1189         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1190         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1191         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1192         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1193         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1194         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1195         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1196     }
1197 
1198     if (sregs.u.e.features & KVM_SREGS_EXP) {
1199         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1200     }
1201 
1202     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1203         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1204         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1205     }
1206 
1207     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1208         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1209         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1210         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1211 
1212         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1213             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1214             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1215         }
1216     }
1217 
1218     return 0;
1219 }
1220 
1221 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1222 {
1223     CPUPPCState *env = &cpu->env;
1224     struct kvm_sregs sregs;
1225     int ret;
1226     int i;
1227 
1228     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1229     if (ret < 0) {
1230         return ret;
1231     }
1232 
1233     if (!cpu->vhyp) {
1234         ppc_store_sdr1(env, sregs.u.s.sdr1);
1235     }
1236 
1237     /* Sync SLB */
1238 #ifdef TARGET_PPC64
1239     /*
1240      * The packed SLB array we get from KVM_GET_SREGS only contains
1241      * information about valid entries. So we flush our internal copy
1242      * to get rid of stale ones, then put all valid SLB entries back
1243      * in.
1244      */
1245     memset(env->slb, 0, sizeof(env->slb));
1246     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1247         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1248         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1249         /*
1250          * Only restore valid entries
1251          */
1252         if (rb & SLB_ESID_V) {
1253             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1254         }
1255     }
1256 #endif
1257 
1258     /* Sync SRs */
1259     for (i = 0; i < 16; i++) {
1260         env->sr[i] = sregs.u.s.ppc32.sr[i];
1261     }
1262 
1263     /* Sync BATs */
1264     for (i = 0; i < 8; i++) {
1265         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1266         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1267         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1268         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1269     }
1270 
1271     return 0;
1272 }
1273 
1274 int kvm_arch_get_registers(CPUState *cs)
1275 {
1276     PowerPCCPU *cpu = POWERPC_CPU(cs);
1277     CPUPPCState *env = &cpu->env;
1278     struct kvm_regs regs;
1279     uint32_t cr;
1280     int i, ret;
1281 
1282     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1283     if (ret < 0)
1284         return ret;
1285 
1286     cr = regs.cr;
1287     for (i = 7; i >= 0; i--) {
1288         env->crf[i] = cr & 15;
1289         cr >>= 4;
1290     }
1291 
1292     env->ctr = regs.ctr;
1293     env->lr = regs.lr;
1294     cpu_write_xer(env, regs.xer);
1295     env->msr = regs.msr;
1296     env->nip = regs.pc;
1297 
1298     env->spr[SPR_SRR0] = regs.srr0;
1299     env->spr[SPR_SRR1] = regs.srr1;
1300 
1301     env->spr[SPR_SPRG0] = regs.sprg0;
1302     env->spr[SPR_SPRG1] = regs.sprg1;
1303     env->spr[SPR_SPRG2] = regs.sprg2;
1304     env->spr[SPR_SPRG3] = regs.sprg3;
1305     env->spr[SPR_SPRG4] = regs.sprg4;
1306     env->spr[SPR_SPRG5] = regs.sprg5;
1307     env->spr[SPR_SPRG6] = regs.sprg6;
1308     env->spr[SPR_SPRG7] = regs.sprg7;
1309 
1310     env->spr[SPR_BOOKE_PID] = regs.pid;
1311 
1312     for (i = 0;i < 32; i++)
1313         env->gpr[i] = regs.gpr[i];
1314 
1315     kvm_get_fp(cs);
1316 
1317     if (cap_booke_sregs) {
1318         ret = kvmppc_get_booke_sregs(cpu);
1319         if (ret < 0) {
1320             return ret;
1321         }
1322     }
1323 
1324     if (cap_segstate) {
1325         ret = kvmppc_get_books_sregs(cpu);
1326         if (ret < 0) {
1327             return ret;
1328         }
1329     }
1330 
1331     if (cap_hior) {
1332         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1333     }
1334 
1335     if (cap_one_reg) {
1336         int i;
1337 
1338         /* We deliberately ignore errors here, for kernels which have
1339          * the ONE_REG calls, but don't support the specific
1340          * registers, there's a reasonable chance things will still
1341          * work, at least until we try to migrate. */
1342         for (i = 0; i < 1024; i++) {
1343             uint64_t id = env->spr_cb[i].one_reg_id;
1344 
1345             if (id != 0) {
1346                 kvm_get_one_spr(cs, id, i);
1347             }
1348         }
1349 
1350 #ifdef TARGET_PPC64
1351         if (msr_ts) {
1352             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1354             }
1355             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1356                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1357             }
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1368         }
1369 
1370         if (cap_papr) {
1371             if (kvm_get_vpa(cs) < 0) {
1372                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1373             }
1374         }
1375 
1376         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1377 #endif
1378     }
1379 
1380     return 0;
1381 }
1382 
1383 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1384 {
1385     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1386 
1387     if (irq != PPC_INTERRUPT_EXT) {
1388         return 0;
1389     }
1390 
1391     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1392         return 0;
1393     }
1394 
1395     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1396 
1397     return 0;
1398 }
1399 
1400 #if defined(TARGET_PPCEMB)
1401 #define PPC_INPUT_INT PPC40x_INPUT_INT
1402 #elif defined(TARGET_PPC64)
1403 #define PPC_INPUT_INT PPC970_INPUT_INT
1404 #else
1405 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1406 #endif
1407 
1408 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1409 {
1410     PowerPCCPU *cpu = POWERPC_CPU(cs);
1411     CPUPPCState *env = &cpu->env;
1412     int r;
1413     unsigned irq;
1414 
1415     qemu_mutex_lock_iothread();
1416 
1417     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1418      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1419     if (!cap_interrupt_level &&
1420         run->ready_for_interrupt_injection &&
1421         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1422         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1423     {
1424         /* For now KVM disregards the 'irq' argument. However, in the
1425          * future KVM could cache it in-kernel to avoid a heavyweight exit
1426          * when reading the UIC.
1427          */
1428         irq = KVM_INTERRUPT_SET;
1429 
1430         DPRINTF("injected interrupt %d\n", irq);
1431         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432         if (r < 0) {
1433             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1434         }
1435 
1436         /* Always wake up soon in case the interrupt was level based */
1437         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1438                        (NANOSECONDS_PER_SECOND / 50));
1439     }
1440 
1441     /* We don't know if there are more interrupts pending after this. However,
1442      * the guest will return to userspace in the course of handling this one
1443      * anyways, so we will get a chance to deliver the rest. */
1444 
1445     qemu_mutex_unlock_iothread();
1446 }
1447 
1448 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1449 {
1450     return MEMTXATTRS_UNSPECIFIED;
1451 }
1452 
1453 int kvm_arch_process_async_events(CPUState *cs)
1454 {
1455     return cs->halted;
1456 }
1457 
1458 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1459 {
1460     CPUState *cs = CPU(cpu);
1461     CPUPPCState *env = &cpu->env;
1462 
1463     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464         cs->halted = 1;
1465         cs->exception_index = EXCP_HLT;
1466     }
1467 
1468     return 0;
1469 }
1470 
1471 /* map dcr access to existing qemu dcr emulation */
1472 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1473 {
1474     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1475         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1476 
1477     return 0;
1478 }
1479 
1480 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1481 {
1482     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1483         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1484 
1485     return 0;
1486 }
1487 
1488 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1489 {
1490     /* Mixed endian case is not handled */
1491     uint32_t sc = debug_inst_opcode;
1492 
1493     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494                             sizeof(sc), 0) ||
1495         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1496         return -EINVAL;
1497     }
1498 
1499     return 0;
1500 }
1501 
1502 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1503 {
1504     uint32_t sc;
1505 
1506     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1507         sc != debug_inst_opcode ||
1508         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1509                             sizeof(sc), 1)) {
1510         return -EINVAL;
1511     }
1512 
1513     return 0;
1514 }
1515 
1516 static int find_hw_breakpoint(target_ulong addr, int type)
1517 {
1518     int n;
1519 
1520     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521            <= ARRAY_SIZE(hw_debug_points));
1522 
1523     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1524         if (hw_debug_points[n].addr == addr &&
1525              hw_debug_points[n].type == type) {
1526             return n;
1527         }
1528     }
1529 
1530     return -1;
1531 }
1532 
1533 static int find_hw_watchpoint(target_ulong addr, int *flag)
1534 {
1535     int n;
1536 
1537     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538     if (n >= 0) {
1539         *flag = BP_MEM_ACCESS;
1540         return n;
1541     }
1542 
1543     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544     if (n >= 0) {
1545         *flag = BP_MEM_WRITE;
1546         return n;
1547     }
1548 
1549     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550     if (n >= 0) {
1551         *flag = BP_MEM_READ;
1552         return n;
1553     }
1554 
1555     return -1;
1556 }
1557 
1558 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1559                                   target_ulong len, int type)
1560 {
1561     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1562         return -ENOBUFS;
1563     }
1564 
1565     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1566     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1567 
1568     switch (type) {
1569     case GDB_BREAKPOINT_HW:
1570         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1571             return -ENOBUFS;
1572         }
1573 
1574         if (find_hw_breakpoint(addr, type) >= 0) {
1575             return -EEXIST;
1576         }
1577 
1578         nb_hw_breakpoint++;
1579         break;
1580 
1581     case GDB_WATCHPOINT_WRITE:
1582     case GDB_WATCHPOINT_READ:
1583     case GDB_WATCHPOINT_ACCESS:
1584         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1585             return -ENOBUFS;
1586         }
1587 
1588         if (find_hw_breakpoint(addr, type) >= 0) {
1589             return -EEXIST;
1590         }
1591 
1592         nb_hw_watchpoint++;
1593         break;
1594 
1595     default:
1596         return -ENOSYS;
1597     }
1598 
1599     return 0;
1600 }
1601 
1602 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1603                                   target_ulong len, int type)
1604 {
1605     int n;
1606 
1607     n = find_hw_breakpoint(addr, type);
1608     if (n < 0) {
1609         return -ENOENT;
1610     }
1611 
1612     switch (type) {
1613     case GDB_BREAKPOINT_HW:
1614         nb_hw_breakpoint--;
1615         break;
1616 
1617     case GDB_WATCHPOINT_WRITE:
1618     case GDB_WATCHPOINT_READ:
1619     case GDB_WATCHPOINT_ACCESS:
1620         nb_hw_watchpoint--;
1621         break;
1622 
1623     default:
1624         return -ENOSYS;
1625     }
1626     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1627 
1628     return 0;
1629 }
1630 
1631 void kvm_arch_remove_all_hw_breakpoints(void)
1632 {
1633     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1634 }
1635 
1636 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1637 {
1638     int n;
1639 
1640     /* Software Breakpoint updates */
1641     if (kvm_sw_breakpoints_active(cs)) {
1642         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1643     }
1644 
1645     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1646            <= ARRAY_SIZE(hw_debug_points));
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1648 
1649     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1650         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1651         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1652         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1653             switch (hw_debug_points[n].type) {
1654             case GDB_BREAKPOINT_HW:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656                 break;
1657             case GDB_WATCHPOINT_WRITE:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659                 break;
1660             case GDB_WATCHPOINT_READ:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             case GDB_WATCHPOINT_ACCESS:
1664                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1665                                         KVMPPC_DEBUG_WATCH_READ;
1666                 break;
1667             default:
1668                 cpu_abort(cs, "Unsupported breakpoint type\n");
1669             }
1670             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1671         }
1672     }
1673 }
1674 
1675 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1676 {
1677     CPUState *cs = CPU(cpu);
1678     CPUPPCState *env = &cpu->env;
1679     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680     int handle = 0;
1681     int n;
1682     int flag = 0;
1683 
1684     if (cs->singlestep_enabled) {
1685         handle = 1;
1686     } else if (arch_info->status) {
1687         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1688             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1689                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1690                 if (n >= 0) {
1691                     handle = 1;
1692                 }
1693             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1694                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1695                 n = find_hw_watchpoint(arch_info->address,  &flag);
1696                 if (n >= 0) {
1697                     handle = 1;
1698                     cs->watchpoint_hit = &hw_watchpoint;
1699                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1700                     hw_watchpoint.flags = flag;
1701                 }
1702             }
1703         }
1704     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1705         handle = 1;
1706     } else {
1707         /* QEMU is not able to handle debug exception, so inject
1708          * program exception to guest;
1709          * Yes program exception NOT debug exception !!
1710          * When QEMU is using debug resources then debug exception must
1711          * be always set. To achieve this we set MSR_DE and also set
1712          * MSRP_DEP so guest cannot change MSR_DE.
1713          * When emulating debug resource for guest we want guest
1714          * to control MSR_DE (enable/disable debug interrupt on need).
1715          * Supporting both configurations are NOT possible.
1716          * So the result is that we cannot share debug resources
1717          * between QEMU and Guest on BOOKE architecture.
1718          * In the current design QEMU gets the priority over guest,
1719          * this means that if QEMU is using debug resources then guest
1720          * cannot use them;
1721          * For software breakpoint QEMU uses a privileged instruction;
1722          * So there cannot be any reason that we are here for guest
1723          * set debug exception, only possibility is guest executed a
1724          * privileged / illegal instruction and that's why we are
1725          * injecting a program interrupt.
1726          */
1727 
1728         cpu_synchronize_state(cs);
1729         /* env->nip is PC, so increment this by 4 to use
1730          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1731          */
1732         env->nip += 4;
1733         cs->exception_index = POWERPC_EXCP_PROGRAM;
1734         env->error_code = POWERPC_EXCP_INVAL;
1735         ppc_cpu_do_interrupt(cs);
1736     }
1737 
1738     return handle;
1739 }
1740 
1741 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1742 {
1743     PowerPCCPU *cpu = POWERPC_CPU(cs);
1744     CPUPPCState *env = &cpu->env;
1745     int ret;
1746 
1747     qemu_mutex_lock_iothread();
1748 
1749     switch (run->exit_reason) {
1750     case KVM_EXIT_DCR:
1751         if (run->dcr.is_write) {
1752             DPRINTF("handle dcr write\n");
1753             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754         } else {
1755             DPRINTF("handle dcr read\n");
1756             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1757         }
1758         break;
1759     case KVM_EXIT_HLT:
1760         DPRINTF("handle halt\n");
1761         ret = kvmppc_handle_halt(cpu);
1762         break;
1763 #if defined(TARGET_PPC64)
1764     case KVM_EXIT_PAPR_HCALL:
1765         DPRINTF("handle PAPR hypercall\n");
1766         run->papr_hcall.ret = spapr_hypercall(cpu,
1767                                               run->papr_hcall.nr,
1768                                               run->papr_hcall.args);
1769         ret = 0;
1770         break;
1771 #endif
1772     case KVM_EXIT_EPR:
1773         DPRINTF("handle epr\n");
1774         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1775         ret = 0;
1776         break;
1777     case KVM_EXIT_WATCHDOG:
1778         DPRINTF("handle watchdog expiry\n");
1779         watchdog_perform_action();
1780         ret = 0;
1781         break;
1782 
1783     case KVM_EXIT_DEBUG:
1784         DPRINTF("handle debug exception\n");
1785         if (kvm_handle_debug(cpu, run)) {
1786             ret = EXCP_DEBUG;
1787             break;
1788         }
1789         /* re-enter, this exception was guest-internal */
1790         ret = 0;
1791         break;
1792 
1793     default:
1794         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795         ret = -1;
1796         break;
1797     }
1798 
1799     qemu_mutex_unlock_iothread();
1800     return ret;
1801 }
1802 
1803 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 {
1805     CPUState *cs = CPU(cpu);
1806     uint32_t bits = tsr_bits;
1807     struct kvm_one_reg reg = {
1808         .id = KVM_REG_PPC_OR_TSR,
1809         .addr = (uintptr_t) &bits,
1810     };
1811 
1812     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813 }
1814 
1815 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1816 {
1817 
1818     CPUState *cs = CPU(cpu);
1819     uint32_t bits = tsr_bits;
1820     struct kvm_one_reg reg = {
1821         .id = KVM_REG_PPC_CLEAR_TSR,
1822         .addr = (uintptr_t) &bits,
1823     };
1824 
1825     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826 }
1827 
1828 int kvmppc_set_tcr(PowerPCCPU *cpu)
1829 {
1830     CPUState *cs = CPU(cpu);
1831     CPUPPCState *env = &cpu->env;
1832     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1833 
1834     struct kvm_one_reg reg = {
1835         .id = KVM_REG_PPC_TCR,
1836         .addr = (uintptr_t) &tcr,
1837     };
1838 
1839     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1840 }
1841 
1842 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1843 {
1844     CPUState *cs = CPU(cpu);
1845     int ret;
1846 
1847     if (!kvm_enabled()) {
1848         return -1;
1849     }
1850 
1851     if (!cap_ppc_watchdog) {
1852         printf("warning: KVM does not support watchdog");
1853         return -1;
1854     }
1855 
1856     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857     if (ret < 0) {
1858         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1859                 __func__, strerror(-ret));
1860         return ret;
1861     }
1862 
1863     return ret;
1864 }
1865 
1866 static int read_cpuinfo(const char *field, char *value, int len)
1867 {
1868     FILE *f;
1869     int ret = -1;
1870     int field_len = strlen(field);
1871     char line[512];
1872 
1873     f = fopen("/proc/cpuinfo", "r");
1874     if (!f) {
1875         return -1;
1876     }
1877 
1878     do {
1879         if (!fgets(line, sizeof(line), f)) {
1880             break;
1881         }
1882         if (!strncmp(line, field, field_len)) {
1883             pstrcpy(value, len, line);
1884             ret = 0;
1885             break;
1886         }
1887     } while(*line);
1888 
1889     fclose(f);
1890 
1891     return ret;
1892 }
1893 
1894 uint32_t kvmppc_get_tbfreq(void)
1895 {
1896     char line[512];
1897     char *ns;
1898     uint32_t retval = NANOSECONDS_PER_SECOND;
1899 
1900     if (read_cpuinfo("timebase", line, sizeof(line))) {
1901         return retval;
1902     }
1903 
1904     if (!(ns = strchr(line, ':'))) {
1905         return retval;
1906     }
1907 
1908     ns++;
1909 
1910     return atoi(ns);
1911 }
1912 
1913 bool kvmppc_get_host_serial(char **value)
1914 {
1915     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1916                                NULL);
1917 }
1918 
1919 bool kvmppc_get_host_model(char **value)
1920 {
1921     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1922 }
1923 
1924 /* Try to find a device tree node for a CPU with clock-frequency property */
1925 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1926 {
1927     struct dirent *dirp;
1928     DIR *dp;
1929 
1930     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1931         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932         return -1;
1933     }
1934 
1935     buf[0] = '\0';
1936     while ((dirp = readdir(dp)) != NULL) {
1937         FILE *f;
1938         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939                  dirp->d_name);
1940         f = fopen(buf, "r");
1941         if (f) {
1942             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1943             fclose(f);
1944             break;
1945         }
1946         buf[0] = '\0';
1947     }
1948     closedir(dp);
1949     if (buf[0] == '\0') {
1950         printf("Unknown host!\n");
1951         return -1;
1952     }
1953 
1954     return 0;
1955 }
1956 
1957 static uint64_t kvmppc_read_int_dt(const char *filename)
1958 {
1959     union {
1960         uint32_t v32;
1961         uint64_t v64;
1962     } u;
1963     FILE *f;
1964     int len;
1965 
1966     f = fopen(filename, "rb");
1967     if (!f) {
1968         return -1;
1969     }
1970 
1971     len = fread(&u, 1, sizeof(u), f);
1972     fclose(f);
1973     switch (len) {
1974     case 4:
1975         /* property is a 32-bit quantity */
1976         return be32_to_cpu(u.v32);
1977     case 8:
1978         return be64_to_cpu(u.v64);
1979     }
1980 
1981     return 0;
1982 }
1983 
1984 /* Read a CPU node property from the host device tree that's a single
1985  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1986  * (can't find or open the property, or doesn't understand the
1987  * format) */
1988 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1989 {
1990     char buf[PATH_MAX], *tmp;
1991     uint64_t val;
1992 
1993     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1994         return -1;
1995     }
1996 
1997     tmp = g_strdup_printf("%s/%s", buf, propname);
1998     val = kvmppc_read_int_dt(tmp);
1999     g_free(tmp);
2000 
2001     return val;
2002 }
2003 
2004 uint64_t kvmppc_get_clockfreq(void)
2005 {
2006     return kvmppc_read_int_cpu_dt("clock-frequency");
2007 }
2008 
2009 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2010  {
2011      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2012      CPUState *cs = CPU(cpu);
2013 
2014     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2015         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2016         return 0;
2017     }
2018 
2019     return 1;
2020 }
2021 
2022 int kvmppc_get_hasidle(CPUPPCState *env)
2023 {
2024     struct kvm_ppc_pvinfo pvinfo;
2025 
2026     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2027         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2028         return 1;
2029     }
2030 
2031     return 0;
2032 }
2033 
2034 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2035 {
2036     uint32_t *hc = (uint32_t*)buf;
2037     struct kvm_ppc_pvinfo pvinfo;
2038 
2039     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2040         memcpy(buf, pvinfo.hcall, buf_len);
2041         return 0;
2042     }
2043 
2044     /*
2045      * Fallback to always fail hypercalls regardless of endianness:
2046      *
2047      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2048      *     li r3, -1
2049      *     b .+8       (becomes nop in wrong endian)
2050      *     bswap32(li r3, -1)
2051      */
2052 
2053     hc[0] = cpu_to_be32(0x08000048);
2054     hc[1] = cpu_to_be32(0x3860ffff);
2055     hc[2] = cpu_to_be32(0x48000008);
2056     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2057 
2058     return 1;
2059 }
2060 
2061 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2062 {
2063     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2064 }
2065 
2066 void kvmppc_enable_logical_ci_hcalls(void)
2067 {
2068     /*
2069      * FIXME: it would be nice if we could detect the cases where
2070      * we're using a device which requires the in kernel
2071      * implementation of these hcalls, but the kernel lacks them and
2072      * produce a warning.
2073      */
2074     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2075     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2076 }
2077 
2078 void kvmppc_enable_set_mode_hcall(void)
2079 {
2080     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2081 }
2082 
2083 void kvmppc_enable_clear_ref_mod_hcalls(void)
2084 {
2085     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2086     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2087 }
2088 
2089 void kvmppc_set_papr(PowerPCCPU *cpu)
2090 {
2091     CPUState *cs = CPU(cpu);
2092     int ret;
2093 
2094     if (!kvm_enabled()) {
2095         return;
2096     }
2097 
2098     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2099     if (ret) {
2100         error_report("This vCPU type or KVM version does not support PAPR");
2101         exit(1);
2102     }
2103 
2104     /* Update the capability flag so we sync the right information
2105      * with kvm */
2106     cap_papr = 1;
2107 }
2108 
2109 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2110 {
2111     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2112 }
2113 
2114 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2115 {
2116     CPUState *cs = CPU(cpu);
2117     int ret;
2118 
2119     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2120     if (ret && mpic_proxy) {
2121         error_report("This KVM version does not support EPR");
2122         exit(1);
2123     }
2124 }
2125 
2126 int kvmppc_smt_threads(void)
2127 {
2128     return cap_ppc_smt ? cap_ppc_smt : 1;
2129 }
2130 
2131 int kvmppc_set_smt_threads(int smt)
2132 {
2133     int ret;
2134 
2135     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2136     if (!ret) {
2137         cap_ppc_smt = smt;
2138     }
2139     return ret;
2140 }
2141 
2142 void kvmppc_hint_smt_possible(Error **errp)
2143 {
2144     int i;
2145     GString *g;
2146     char *s;
2147 
2148     assert(kvm_enabled());
2149     if (cap_ppc_smt_possible) {
2150         g = g_string_new("Available VSMT modes:");
2151         for (i = 63; i >= 0; i--) {
2152             if ((1UL << i) & cap_ppc_smt_possible) {
2153                 g_string_append_printf(g, " %lu", (1UL << i));
2154             }
2155         }
2156         s = g_string_free(g, false);
2157         error_append_hint(errp, "%s.\n", s);
2158         g_free(s);
2159     } else {
2160         error_append_hint(errp,
2161                           "This KVM seems to be too old to support VSMT.\n");
2162     }
2163 }
2164 
2165 
2166 #ifdef TARGET_PPC64
2167 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2168 {
2169     struct kvm_ppc_smmu_info info;
2170     long rampagesize, best_page_shift;
2171     int i;
2172 
2173     /* Find the largest hardware supported page size that's less than
2174      * or equal to the (logical) backing page size of guest RAM */
2175     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2176     rampagesize = qemu_getrampagesize();
2177     best_page_shift = 0;
2178 
2179     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2180         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2181 
2182         if (!sps->page_shift) {
2183             continue;
2184         }
2185 
2186         if ((sps->page_shift > best_page_shift)
2187             && ((1UL << sps->page_shift) <= rampagesize)) {
2188             best_page_shift = sps->page_shift;
2189         }
2190     }
2191 
2192     return MIN(current_size,
2193                1ULL << (best_page_shift + hash_shift - 7));
2194 }
2195 #endif
2196 
2197 bool kvmppc_spapr_use_multitce(void)
2198 {
2199     return cap_spapr_multitce;
2200 }
2201 
2202 int kvmppc_spapr_enable_inkernel_multitce(void)
2203 {
2204     int ret;
2205 
2206     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                             H_PUT_TCE_INDIRECT, 1);
2208     if (!ret) {
2209         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                                 H_STUFF_TCE, 1);
2211     }
2212 
2213     return ret;
2214 }
2215 
2216 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2217                               uint64_t bus_offset, uint32_t nb_table,
2218                               int *pfd, bool need_vfio)
2219 {
2220     long len;
2221     int fd;
2222     void *table;
2223 
2224     /* Must set fd to -1 so we don't try to munmap when called for
2225      * destroying the table, which the upper layers -will- do
2226      */
2227     *pfd = -1;
2228     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2229         return NULL;
2230     }
2231 
2232     if (cap_spapr_tce_64) {
2233         struct kvm_create_spapr_tce_64 args = {
2234             .liobn = liobn,
2235             .page_shift = page_shift,
2236             .offset = bus_offset >> page_shift,
2237             .size = nb_table,
2238             .flags = 0
2239         };
2240         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2241         if (fd < 0) {
2242             fprintf(stderr,
2243                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2244                     liobn);
2245             return NULL;
2246         }
2247     } else if (cap_spapr_tce) {
2248         uint64_t window_size = (uint64_t) nb_table << page_shift;
2249         struct kvm_create_spapr_tce args = {
2250             .liobn = liobn,
2251             .window_size = window_size,
2252         };
2253         if ((window_size != args.window_size) || bus_offset) {
2254             return NULL;
2255         }
2256         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257         if (fd < 0) {
2258             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2259                     liobn);
2260             return NULL;
2261         }
2262     } else {
2263         return NULL;
2264     }
2265 
2266     len = nb_table * sizeof(uint64_t);
2267     /* FIXME: round this up to page size */
2268 
2269     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2270     if (table == MAP_FAILED) {
2271         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2272                 liobn);
2273         close(fd);
2274         return NULL;
2275     }
2276 
2277     *pfd = fd;
2278     return table;
2279 }
2280 
2281 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2282 {
2283     long len;
2284 
2285     if (fd < 0) {
2286         return -1;
2287     }
2288 
2289     len = nb_table * sizeof(uint64_t);
2290     if ((munmap(table, len) < 0) ||
2291         (close(fd) < 0)) {
2292         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293                 strerror(errno));
2294         /* Leak the table */
2295     }
2296 
2297     return 0;
2298 }
2299 
2300 int kvmppc_reset_htab(int shift_hint)
2301 {
2302     uint32_t shift = shift_hint;
2303 
2304     if (!kvm_enabled()) {
2305         /* Full emulation, tell caller to allocate htab itself */
2306         return 0;
2307     }
2308     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309         int ret;
2310         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2311         if (ret == -ENOTTY) {
2312             /* At least some versions of PR KVM advertise the
2313              * capability, but don't implement the ioctl().  Oops.
2314              * Return 0 so that we allocate the htab in qemu, as is
2315              * correct for PR. */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322 
2323     /* We have a kernel that predates the htab reset calls.  For PR
2324      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2325      * this era, it has allocated a 16MB fixed size hash table already. */
2326     if (kvmppc_is_pr(kvm_state)) {
2327         /* PR - tell caller to allocate htab */
2328         return 0;
2329     } else {
2330         /* HV - assume 16MB kernel allocated htab */
2331         return 24;
2332     }
2333 }
2334 
2335 static inline uint32_t mfpvr(void)
2336 {
2337     uint32_t pvr;
2338 
2339     asm ("mfpvr %0"
2340          : "=r"(pvr));
2341     return pvr;
2342 }
2343 
2344 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2345 {
2346     if (on) {
2347         *word |= flags;
2348     } else {
2349         *word &= ~flags;
2350     }
2351 }
2352 
2353 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2354 {
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2357     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2358 
2359     /* Now fix up the class with information we can query from the host */
2360     pcc->pvr = mfpvr();
2361 
2362     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2363                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2364     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2365                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2366     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2367                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2368 
2369     if (dcache_size != -1) {
2370         pcc->l1_dcache_size = dcache_size;
2371     }
2372 
2373     if (icache_size != -1) {
2374         pcc->l1_icache_size = icache_size;
2375     }
2376 
2377 #if defined(TARGET_PPC64)
2378     pcc->radix_page_info = kvm_get_radix_page_info();
2379 
2380     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2381         /*
2382          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2383          * compliant.  More importantly, advertising ISA 3.00
2384          * architected mode may prevent guests from activating
2385          * necessary DD1 workarounds.
2386          */
2387         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2388                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2389     }
2390 #endif /* defined(TARGET_PPC64) */
2391 }
2392 
2393 bool kvmppc_has_cap_epr(void)
2394 {
2395     return cap_epr;
2396 }
2397 
2398 bool kvmppc_has_cap_fixup_hcalls(void)
2399 {
2400     return cap_fixup_hcalls;
2401 }
2402 
2403 bool kvmppc_has_cap_htm(void)
2404 {
2405     return cap_htm;
2406 }
2407 
2408 bool kvmppc_has_cap_mmu_radix(void)
2409 {
2410     return cap_mmu_radix;
2411 }
2412 
2413 bool kvmppc_has_cap_mmu_hash_v3(void)
2414 {
2415     return cap_mmu_hash_v3;
2416 }
2417 
2418 static bool kvmppc_power8_host(void)
2419 {
2420     bool ret = false;
2421 #ifdef TARGET_PPC64
2422     {
2423         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2424         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2425               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2426               (base_pvr == CPU_POWERPC_POWER8_BASE);
2427     }
2428 #endif /* TARGET_PPC64 */
2429     return ret;
2430 }
2431 
2432 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2433 {
2434     bool l1d_thread_priv_req = !kvmppc_power8_host();
2435 
2436     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2437         return 2;
2438     } else if ((!l1d_thread_priv_req ||
2439                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2440                (c.character & c.character_mask
2441                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2442         return 1;
2443     }
2444 
2445     return 0;
2446 }
2447 
2448 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2449 {
2450     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2451         return 2;
2452     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2460 {
2461     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2462         return  SPAPR_CAP_FIXED_CCD;
2463     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2464         return SPAPR_CAP_FIXED_IBS;
2465     }
2466 
2467     return 0;
2468 }
2469 
2470 static void kvmppc_get_cpu_characteristics(KVMState *s)
2471 {
2472     struct kvm_ppc_cpu_char c;
2473     int ret;
2474 
2475     /* Assume broken */
2476     cap_ppc_safe_cache = 0;
2477     cap_ppc_safe_bounds_check = 0;
2478     cap_ppc_safe_indirect_branch = 0;
2479 
2480     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2481     if (!ret) {
2482         return;
2483     }
2484     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2485     if (ret < 0) {
2486         return;
2487     }
2488 
2489     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2490     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2491     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2492 }
2493 
2494 int kvmppc_get_cap_safe_cache(void)
2495 {
2496     return cap_ppc_safe_cache;
2497 }
2498 
2499 int kvmppc_get_cap_safe_bounds_check(void)
2500 {
2501     return cap_ppc_safe_bounds_check;
2502 }
2503 
2504 int kvmppc_get_cap_safe_indirect_branch(void)
2505 {
2506     return cap_ppc_safe_indirect_branch;
2507 }
2508 
2509 bool kvmppc_has_cap_spapr_vfio(void)
2510 {
2511     return cap_spapr_vfio;
2512 }
2513 
2514 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2515 {
2516     uint32_t host_pvr = mfpvr();
2517     PowerPCCPUClass *pvr_pcc;
2518 
2519     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2520     if (pvr_pcc == NULL) {
2521         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2522     }
2523 
2524     return pvr_pcc;
2525 }
2526 
2527 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2528 {
2529     TypeInfo type_info = {
2530         .name = TYPE_HOST_POWERPC_CPU,
2531         .class_init = kvmppc_host_cpu_class_init,
2532     };
2533     MachineClass *mc = MACHINE_GET_CLASS(ms);
2534     PowerPCCPUClass *pvr_pcc;
2535     ObjectClass *oc;
2536     DeviceClass *dc;
2537     int i;
2538 
2539     pvr_pcc = kvm_ppc_get_host_cpu_class();
2540     if (pvr_pcc == NULL) {
2541         return -1;
2542     }
2543     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2544     type_register(&type_info);
2545     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2546         /* override TCG default cpu type with 'host' cpu model */
2547         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2548     }
2549 
2550     oc = object_class_by_name(type_info.name);
2551     g_assert(oc);
2552 
2553     /*
2554      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2555      * we want "POWER8" to be a "family" alias that points to the current
2556      * host CPU type, too)
2557      */
2558     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2559     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2560         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2561             char *suffix;
2562 
2563             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2564             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2565             if (suffix) {
2566                 *suffix = 0;
2567             }
2568             break;
2569         }
2570     }
2571 
2572     return 0;
2573 }
2574 
2575 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2576 {
2577     struct kvm_rtas_token_args args = {
2578         .token = token,
2579     };
2580 
2581     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2582         return -ENOENT;
2583     }
2584 
2585     strncpy(args.name, function, sizeof(args.name));
2586 
2587     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2588 }
2589 
2590 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2591 {
2592     struct kvm_get_htab_fd s = {
2593         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2594         .start_index = index,
2595     };
2596     int ret;
2597 
2598     if (!cap_htab_fd) {
2599         error_setg(errp, "KVM version doesn't support %s the HPT",
2600                    write ? "writing" : "reading");
2601         return -ENOTSUP;
2602     }
2603 
2604     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2605     if (ret < 0) {
2606         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2607                    write ? "writing" : "reading", write ? "to" : "from",
2608                    strerror(errno));
2609         return -errno;
2610     }
2611 
2612     return ret;
2613 }
2614 
2615 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2616 {
2617     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2618     uint8_t buf[bufsize];
2619     ssize_t rc;
2620 
2621     do {
2622         rc = read(fd, buf, bufsize);
2623         if (rc < 0) {
2624             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2625                     strerror(errno));
2626             return rc;
2627         } else if (rc) {
2628             uint8_t *buffer = buf;
2629             ssize_t n = rc;
2630             while (n) {
2631                 struct kvm_get_htab_header *head =
2632                     (struct kvm_get_htab_header *) buffer;
2633                 size_t chunksize = sizeof(*head) +
2634                      HASH_PTE_SIZE_64 * head->n_valid;
2635 
2636                 qemu_put_be32(f, head->index);
2637                 qemu_put_be16(f, head->n_valid);
2638                 qemu_put_be16(f, head->n_invalid);
2639                 qemu_put_buffer(f, (void *)(head + 1),
2640                                 HASH_PTE_SIZE_64 * head->n_valid);
2641 
2642                 buffer += chunksize;
2643                 n -= chunksize;
2644             }
2645         }
2646     } while ((rc != 0)
2647              && ((max_ns < 0)
2648                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2649 
2650     return (rc == 0) ? 1 : 0;
2651 }
2652 
2653 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2654                            uint16_t n_valid, uint16_t n_invalid)
2655 {
2656     struct kvm_get_htab_header *buf;
2657     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2658     ssize_t rc;
2659 
2660     buf = alloca(chunksize);
2661     buf->index = index;
2662     buf->n_valid = n_valid;
2663     buf->n_invalid = n_invalid;
2664 
2665     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2666 
2667     rc = write(fd, buf, chunksize);
2668     if (rc < 0) {
2669         fprintf(stderr, "Error writing KVM hash table: %s\n",
2670                 strerror(errno));
2671         return rc;
2672     }
2673     if (rc != chunksize) {
2674         /* We should never get a short write on a single chunk */
2675         fprintf(stderr, "Short write, restoring KVM hash table\n");
2676         return -1;
2677     }
2678     return 0;
2679 }
2680 
2681 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2682 {
2683     return true;
2684 }
2685 
2686 void kvm_arch_init_irq_routing(KVMState *s)
2687 {
2688 }
2689 
2690 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2691 {
2692     int fd, rc;
2693     int i;
2694 
2695     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2696 
2697     i = 0;
2698     while (i < n) {
2699         struct kvm_get_htab_header *hdr;
2700         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2701         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2702 
2703         rc = read(fd, buf, sizeof(buf));
2704         if (rc < 0) {
2705             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2706         }
2707 
2708         hdr = (struct kvm_get_htab_header *)buf;
2709         while ((i < n) && ((char *)hdr < (buf + rc))) {
2710             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2711 
2712             if (hdr->index != (ptex + i)) {
2713                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2714                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2715             }
2716 
2717             if (n - i < valid) {
2718                 valid = n - i;
2719             }
2720             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2721             i += valid;
2722 
2723             if ((n - i) < invalid) {
2724                 invalid = n - i;
2725             }
2726             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2727             i += invalid;
2728 
2729             hdr = (struct kvm_get_htab_header *)
2730                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2731         }
2732     }
2733 
2734     close(fd);
2735 }
2736 
2737 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2738 {
2739     int fd, rc;
2740     struct {
2741         struct kvm_get_htab_header hdr;
2742         uint64_t pte0;
2743         uint64_t pte1;
2744     } buf;
2745 
2746     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2747 
2748     buf.hdr.n_valid = 1;
2749     buf.hdr.n_invalid = 0;
2750     buf.hdr.index = ptex;
2751     buf.pte0 = cpu_to_be64(pte0);
2752     buf.pte1 = cpu_to_be64(pte1);
2753 
2754     rc = write(fd, &buf, sizeof(buf));
2755     if (rc != sizeof(buf)) {
2756         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2757     }
2758     close(fd);
2759 }
2760 
2761 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2762                              uint64_t address, uint32_t data, PCIDevice *dev)
2763 {
2764     return 0;
2765 }
2766 
2767 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2768                                 int vector, PCIDevice *dev)
2769 {
2770     return 0;
2771 }
2772 
2773 int kvm_arch_release_virq_post(int virq)
2774 {
2775     return 0;
2776 }
2777 
2778 int kvm_arch_msi_data_to_gsi(uint32_t data)
2779 {
2780     return data & 0xffff;
2781 }
2782 
2783 int kvmppc_enable_hwrng(void)
2784 {
2785     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2786         return -1;
2787     }
2788 
2789     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2790 }
2791 
2792 void kvmppc_check_papr_resize_hpt(Error **errp)
2793 {
2794     if (!kvm_enabled()) {
2795         return; /* No KVM, we're good */
2796     }
2797 
2798     if (cap_resize_hpt) {
2799         return; /* Kernel has explicit support, we're good */
2800     }
2801 
2802     /* Otherwise fallback on looking for PR KVM */
2803     if (kvmppc_is_pr(kvm_state)) {
2804         return;
2805     }
2806 
2807     error_setg(errp,
2808                "Hash page table resizing not available with this KVM version");
2809 }
2810 
2811 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2812 {
2813     CPUState *cs = CPU(cpu);
2814     struct kvm_ppc_resize_hpt rhpt = {
2815         .flags = flags,
2816         .shift = shift,
2817     };
2818 
2819     if (!cap_resize_hpt) {
2820         return -ENOSYS;
2821     }
2822 
2823     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2824 }
2825 
2826 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2827 {
2828     CPUState *cs = CPU(cpu);
2829     struct kvm_ppc_resize_hpt rhpt = {
2830         .flags = flags,
2831         .shift = shift,
2832     };
2833 
2834     if (!cap_resize_hpt) {
2835         return -ENOSYS;
2836     }
2837 
2838     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2839 }
2840 
2841 /*
2842  * This is a helper function to detect a post migration scenario
2843  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2844  * the guest kernel can't handle a PVR value other than the actual host
2845  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2846  *
2847  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2848  * (so, we're HV), return true. The workaround itself is done in
2849  * cpu_post_load.
2850  *
2851  * The order here is important: we'll only check for KVM PR as a
2852  * fallback if the guest kernel can't handle the situation itself.
2853  * We need to avoid as much as possible querying the running KVM type
2854  * in QEMU level.
2855  */
2856 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2857 {
2858     CPUState *cs = CPU(cpu);
2859 
2860     if (!kvm_enabled()) {
2861         return false;
2862     }
2863 
2864     if (cap_ppc_pvr_compat) {
2865         return false;
2866     }
2867 
2868     return !kvmppc_is_pr(cs->kvm_state);
2869 }
2870