xref: /qemu/target/ppc/kvm.c (revision 0de6e2a3ca2e1215a2d62d8d796589d27eca91d0)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
92 static int cap_ppc_safe_cache;
93 static int cap_ppc_safe_bounds_check;
94 static int cap_ppc_safe_indirect_branch;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
137     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
138     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
139     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
140     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
141     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
142     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
143     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
144     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
145     /* Note: we don't set cap_papr here, because this capability is
146      * only activated after this by kvmppc_set_papr() */
147     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
148     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
149     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
150     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
151     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
152     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
153     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154     kvmppc_get_cpu_characteristics(s);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
254                                        struct kvm_ppc_smmu_info *info)
255 {
256     CPUPPCState *env = &cpu->env;
257     CPUState *cs = CPU(cpu);
258 
259     memset(info, 0, sizeof(*info));
260 
261     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
262      * need to "guess" what the supported page sizes are.
263      *
264      * For that to work we make a few assumptions:
265      *
266      * - Check whether we are running "PR" KVM which only supports 4K
267      *   and 16M pages, but supports them regardless of the backing
268      *   store characteritics. We also don't support 1T segments.
269      *
270      *   This is safe as if HV KVM ever supports that capability or PR
271      *   KVM grows supports for more page/segment sizes, those versions
272      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
273      *   will not hit this fallback
274      *
275      * - Else we are running HV KVM. This means we only support page
276      *   sizes that fit in the backing store. Additionally we only
277      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
278      *   P7 encodings for the SLB and hash table. Here too, we assume
279      *   support for any newer processor will mean a kernel that
280      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281      *   this fallback.
282      */
283     if (kvmppc_is_pr(cs->kvm_state)) {
284         /* No flags */
285         info->flags = 0;
286         info->slb_size = 64;
287 
288         /* Standard 4k base page size segment */
289         info->sps[0].page_shift = 12;
290         info->sps[0].slb_enc = 0;
291         info->sps[0].enc[0].page_shift = 12;
292         info->sps[0].enc[0].pte_enc = 0;
293 
294         /* Standard 16M large page size segment */
295         info->sps[1].page_shift = 24;
296         info->sps[1].slb_enc = SLB_VSID_L;
297         info->sps[1].enc[0].page_shift = 24;
298         info->sps[1].enc[0].pte_enc = 0;
299     } else {
300         int i = 0;
301 
302         /* HV KVM has backing store size restrictions */
303         info->flags = KVM_PPC_PAGE_SIZES_REAL;
304 
305         if (env->mmu_model & POWERPC_MMU_1TSEG) {
306             info->flags |= KVM_PPC_1T_SEGMENTS;
307         }
308 
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->slb_size = 32;
312         } else {
313             info->slb_size = 64;
314         }
315 
316         /* Standard 4k base page size segment */
317         info->sps[i].page_shift = 12;
318         info->sps[i].slb_enc = 0;
319         info->sps[i].enc[0].page_shift = 12;
320         info->sps[i].enc[0].pte_enc = 0;
321         i++;
322 
323         /* 64K on MMU 2.06 and later */
324         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
325             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
326             info->sps[i].page_shift = 16;
327             info->sps[i].slb_enc = 0x110;
328             info->sps[i].enc[0].page_shift = 16;
329             info->sps[i].enc[0].pte_enc = 1;
330             i++;
331         }
332 
333         /* Standard 16M large page size segment */
334         info->sps[i].page_shift = 24;
335         info->sps[i].slb_enc = SLB_VSID_L;
336         info->sps[i].enc[0].page_shift = 24;
337         info->sps[i].enc[0].pte_enc = 0;
338     }
339 }
340 
341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
342 {
343     CPUState *cs = CPU(cpu);
344     int ret;
345 
346     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
347         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
348         if (ret == 0) {
349             return;
350         }
351     }
352 
353     kvm_get_fallback_smmu_info(cpu, info);
354 }
355 
356 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
357 {
358     KVMState *s = KVM_STATE(current_machine->accelerator);
359     struct ppc_radix_page_info *radix_page_info;
360     struct kvm_ppc_rmmu_info rmmu_info;
361     int i;
362 
363     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364         return NULL;
365     }
366     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367         return NULL;
368     }
369     radix_page_info = g_malloc0(sizeof(*radix_page_info));
370     radix_page_info->count = 0;
371     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
372         if (rmmu_info.ap_encodings[i]) {
373             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
374             radix_page_info->count++;
375         }
376     }
377     return radix_page_info;
378 }
379 
380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
381                                      bool radix, bool gtse,
382                                      uint64_t proc_tbl)
383 {
384     CPUState *cs = CPU(cpu);
385     int ret;
386     uint64_t flags = 0;
387     struct kvm_ppc_mmuv3_cfg cfg = {
388         .process_table = proc_tbl,
389     };
390 
391     if (radix) {
392         flags |= KVM_PPC_MMUV3_RADIX;
393     }
394     if (gtse) {
395         flags |= KVM_PPC_MMUV3_GTSE;
396     }
397     cfg.flags = flags;
398     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
399     switch (ret) {
400     case 0:
401         return H_SUCCESS;
402     case -EINVAL:
403         return H_PARAMETER;
404     case -ENODEV:
405         return H_NOT_AVAILABLE;
406     default:
407         return H_HARDWARE;
408     }
409 }
410 
411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
412 {
413     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
414         return true;
415     }
416 
417     return (1ul << shift) <= rampgsize;
418 }
419 
420 static long max_cpu_page_size;
421 
422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
423 {
424     static struct kvm_ppc_smmu_info smmu_info;
425     static bool has_smmu_info;
426     CPUPPCState *env = &cpu->env;
427     int iq, ik, jq, jk;
428     bool has_64k_pages = false;
429 
430     /* We only handle page sizes for 64-bit server guests for now */
431     if (!(env->mmu_model & POWERPC_MMU_64)) {
432         return;
433     }
434 
435     /* Collect MMU info from kernel if not already */
436     if (!has_smmu_info) {
437         kvm_get_smmu_info(cpu, &smmu_info);
438         has_smmu_info = true;
439     }
440 
441     if (!max_cpu_page_size) {
442         max_cpu_page_size = qemu_getrampagesize();
443     }
444 
445     /* Convert to QEMU form */
446     memset(&env->sps, 0, sizeof(env->sps));
447 
448     /* If we have HV KVM, we need to forbid CI large pages if our
449      * host page size is smaller than 64K.
450      */
451     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
452         env->ci_large_pages = getpagesize() >= 0x10000;
453     }
454 
455     /*
456      * XXX This loop should be an entry wide AND of the capabilities that
457      *     the selected CPU has with the capabilities that KVM supports.
458      */
459     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
460         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
461         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
462 
463         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
464                                  ksps->page_shift)) {
465             continue;
466         }
467         qsps->page_shift = ksps->page_shift;
468         qsps->slb_enc = ksps->slb_enc;
469         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
470             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
471                                      ksps->enc[jk].page_shift)) {
472                 continue;
473             }
474             if (ksps->enc[jk].page_shift == 16) {
475                 has_64k_pages = true;
476             }
477             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
478             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
479             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
480                 break;
481             }
482         }
483         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
484             break;
485         }
486     }
487     env->slb_nr = smmu_info.slb_size;
488     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
489         env->mmu_model &= ~POWERPC_MMU_1TSEG;
490     }
491     if (!has_64k_pages) {
492         env->mmu_model &= ~POWERPC_MMU_64K;
493     }
494 }
495 
496 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
497 {
498     Object *mem_obj = object_resolve_path(obj_path, NULL);
499     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
500     long pagesize;
501 
502     pagesize = qemu_mempath_getpagesize(mempath);
503     g_free(mempath);
504 
505     return pagesize >= max_cpu_page_size;
506 }
507 
508 #else /* defined (TARGET_PPC64) */
509 
510 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
511 {
512 }
513 
514 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
515 {
516     return true;
517 }
518 
519 #endif /* !defined (TARGET_PPC64) */
520 
521 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
522 {
523     return POWERPC_CPU(cpu)->vcpu_id;
524 }
525 
526 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
527  * book3s supports only 1 watchpoint, so array size
528  * of 4 is sufficient for now.
529  */
530 #define MAX_HW_BKPTS 4
531 
532 static struct HWBreakpoint {
533     target_ulong addr;
534     int type;
535 } hw_debug_points[MAX_HW_BKPTS];
536 
537 static CPUWatchpoint hw_watchpoint;
538 
539 /* Default there is no breakpoint and watchpoint supported */
540 static int max_hw_breakpoint;
541 static int max_hw_watchpoint;
542 static int nb_hw_breakpoint;
543 static int nb_hw_watchpoint;
544 
545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
546 {
547     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
548         max_hw_breakpoint = 2;
549         max_hw_watchpoint = 2;
550     }
551 
552     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
553         fprintf(stderr, "Error initializing h/w breakpoints\n");
554         return;
555     }
556 }
557 
558 int kvm_arch_init_vcpu(CPUState *cs)
559 {
560     PowerPCCPU *cpu = POWERPC_CPU(cs);
561     CPUPPCState *cenv = &cpu->env;
562     int ret;
563 
564     /* Gather server mmu info from KVM and update the CPU state */
565     kvm_fixup_page_sizes(cpu);
566 
567     /* Synchronize sregs with kvm */
568     ret = kvm_arch_sync_sregs(cpu);
569     if (ret) {
570         if (ret == -EINVAL) {
571             error_report("Register sync failed... If you're using kvm-hv.ko,"
572                          " only \"-cpu host\" is possible");
573         }
574         return ret;
575     }
576 
577     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
578 
579     switch (cenv->mmu_model) {
580     case POWERPC_MMU_BOOKE206:
581         /* This target supports access to KVM's guest TLB */
582         ret = kvm_booke206_tlb_init(cpu);
583         break;
584     case POWERPC_MMU_2_07:
585         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
586             /* KVM-HV has transactional memory on POWER8 also without the
587              * KVM_CAP_PPC_HTM extension, so enable it here instead as
588              * long as it's availble to userspace on the host. */
589             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
590                 cap_htm = true;
591             }
592         }
593         break;
594     default:
595         break;
596     }
597 
598     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
599     kvmppc_hw_debug_points_init(cenv);
600 
601     return ret;
602 }
603 
604 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
605 {
606     CPUPPCState *env = &cpu->env;
607     CPUState *cs = CPU(cpu);
608     struct kvm_dirty_tlb dirty_tlb;
609     unsigned char *bitmap;
610     int ret;
611 
612     if (!env->kvm_sw_tlb) {
613         return;
614     }
615 
616     bitmap = g_malloc((env->nb_tlb + 7) / 8);
617     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
618 
619     dirty_tlb.bitmap = (uintptr_t)bitmap;
620     dirty_tlb.num_dirty = env->nb_tlb;
621 
622     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
623     if (ret) {
624         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
625                 __func__, strerror(-ret));
626     }
627 
628     g_free(bitmap);
629 }
630 
631 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
632 {
633     PowerPCCPU *cpu = POWERPC_CPU(cs);
634     CPUPPCState *env = &cpu->env;
635     union {
636         uint32_t u32;
637         uint64_t u64;
638     } val;
639     struct kvm_one_reg reg = {
640         .id = id,
641         .addr = (uintptr_t) &val,
642     };
643     int ret;
644 
645     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
646     if (ret != 0) {
647         trace_kvm_failed_spr_get(spr, strerror(errno));
648     } else {
649         switch (id & KVM_REG_SIZE_MASK) {
650         case KVM_REG_SIZE_U32:
651             env->spr[spr] = val.u32;
652             break;
653 
654         case KVM_REG_SIZE_U64:
655             env->spr[spr] = val.u64;
656             break;
657 
658         default:
659             /* Don't handle this size yet */
660             abort();
661         }
662     }
663 }
664 
665 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
666 {
667     PowerPCCPU *cpu = POWERPC_CPU(cs);
668     CPUPPCState *env = &cpu->env;
669     union {
670         uint32_t u32;
671         uint64_t u64;
672     } val;
673     struct kvm_one_reg reg = {
674         .id = id,
675         .addr = (uintptr_t) &val,
676     };
677     int ret;
678 
679     switch (id & KVM_REG_SIZE_MASK) {
680     case KVM_REG_SIZE_U32:
681         val.u32 = env->spr[spr];
682         break;
683 
684     case KVM_REG_SIZE_U64:
685         val.u64 = env->spr[spr];
686         break;
687 
688     default:
689         /* Don't handle this size yet */
690         abort();
691     }
692 
693     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
694     if (ret != 0) {
695         trace_kvm_failed_spr_set(spr, strerror(errno));
696     }
697 }
698 
699 static int kvm_put_fp(CPUState *cs)
700 {
701     PowerPCCPU *cpu = POWERPC_CPU(cs);
702     CPUPPCState *env = &cpu->env;
703     struct kvm_one_reg reg;
704     int i;
705     int ret;
706 
707     if (env->insns_flags & PPC_FLOAT) {
708         uint64_t fpscr = env->fpscr;
709         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
710 
711         reg.id = KVM_REG_PPC_FPSCR;
712         reg.addr = (uintptr_t)&fpscr;
713         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
714         if (ret < 0) {
715             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
716             return ret;
717         }
718 
719         for (i = 0; i < 32; i++) {
720             uint64_t vsr[2];
721 
722 #ifdef HOST_WORDS_BIGENDIAN
723             vsr[0] = float64_val(env->fpr[i]);
724             vsr[1] = env->vsr[i];
725 #else
726             vsr[0] = env->vsr[i];
727             vsr[1] = float64_val(env->fpr[i]);
728 #endif
729             reg.addr = (uintptr_t) &vsr;
730             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
731 
732             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
733             if (ret < 0) {
734                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
735                         i, strerror(errno));
736                 return ret;
737             }
738         }
739     }
740 
741     if (env->insns_flags & PPC_ALTIVEC) {
742         reg.id = KVM_REG_PPC_VSCR;
743         reg.addr = (uintptr_t)&env->vscr;
744         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745         if (ret < 0) {
746             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
747             return ret;
748         }
749 
750         for (i = 0; i < 32; i++) {
751             reg.id = KVM_REG_PPC_VR(i);
752             reg.addr = (uintptr_t)&env->avr[i];
753             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
754             if (ret < 0) {
755                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
756                 return ret;
757             }
758         }
759     }
760 
761     return 0;
762 }
763 
764 static int kvm_get_fp(CPUState *cs)
765 {
766     PowerPCCPU *cpu = POWERPC_CPU(cs);
767     CPUPPCState *env = &cpu->env;
768     struct kvm_one_reg reg;
769     int i;
770     int ret;
771 
772     if (env->insns_flags & PPC_FLOAT) {
773         uint64_t fpscr;
774         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
775 
776         reg.id = KVM_REG_PPC_FPSCR;
777         reg.addr = (uintptr_t)&fpscr;
778         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
779         if (ret < 0) {
780             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
781             return ret;
782         } else {
783             env->fpscr = fpscr;
784         }
785 
786         for (i = 0; i < 32; i++) {
787             uint64_t vsr[2];
788 
789             reg.addr = (uintptr_t) &vsr;
790             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
791 
792             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
793             if (ret < 0) {
794                 DPRINTF("Unable to get %s%d from KVM: %s\n",
795                         vsx ? "VSR" : "FPR", i, strerror(errno));
796                 return ret;
797             } else {
798 #ifdef HOST_WORDS_BIGENDIAN
799                 env->fpr[i] = vsr[0];
800                 if (vsx) {
801                     env->vsr[i] = vsr[1];
802                 }
803 #else
804                 env->fpr[i] = vsr[1];
805                 if (vsx) {
806                     env->vsr[i] = vsr[0];
807                 }
808 #endif
809             }
810         }
811     }
812 
813     if (env->insns_flags & PPC_ALTIVEC) {
814         reg.id = KVM_REG_PPC_VSCR;
815         reg.addr = (uintptr_t)&env->vscr;
816         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817         if (ret < 0) {
818             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
819             return ret;
820         }
821 
822         for (i = 0; i < 32; i++) {
823             reg.id = KVM_REG_PPC_VR(i);
824             reg.addr = (uintptr_t)&env->avr[i];
825             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
826             if (ret < 0) {
827                 DPRINTF("Unable to get VR%d from KVM: %s\n",
828                         i, strerror(errno));
829                 return ret;
830             }
831         }
832     }
833 
834     return 0;
835 }
836 
837 #if defined(TARGET_PPC64)
838 static int kvm_get_vpa(CPUState *cs)
839 {
840     PowerPCCPU *cpu = POWERPC_CPU(cs);
841     CPUPPCState *env = &cpu->env;
842     struct kvm_one_reg reg;
843     int ret;
844 
845     reg.id = KVM_REG_PPC_VPA_ADDR;
846     reg.addr = (uintptr_t)&env->vpa_addr;
847     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
848     if (ret < 0) {
849         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
850         return ret;
851     }
852 
853     assert((uintptr_t)&env->slb_shadow_size
854            == ((uintptr_t)&env->slb_shadow_addr + 8));
855     reg.id = KVM_REG_PPC_VPA_SLB;
856     reg.addr = (uintptr_t)&env->slb_shadow_addr;
857     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
858     if (ret < 0) {
859         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
860                 strerror(errno));
861         return ret;
862     }
863 
864     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
865     reg.id = KVM_REG_PPC_VPA_DTL;
866     reg.addr = (uintptr_t)&env->dtl_addr;
867     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
868     if (ret < 0) {
869         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
870                 strerror(errno));
871         return ret;
872     }
873 
874     return 0;
875 }
876 
877 static int kvm_put_vpa(CPUState *cs)
878 {
879     PowerPCCPU *cpu = POWERPC_CPU(cs);
880     CPUPPCState *env = &cpu->env;
881     struct kvm_one_reg reg;
882     int ret;
883 
884     /* SLB shadow or DTL can't be registered unless a master VPA is
885      * registered.  That means when restoring state, if a VPA *is*
886      * registered, we need to set that up first.  If not, we need to
887      * deregister the others before deregistering the master VPA */
888     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
889 
890     if (env->vpa_addr) {
891         reg.id = KVM_REG_PPC_VPA_ADDR;
892         reg.addr = (uintptr_t)&env->vpa_addr;
893         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
894         if (ret < 0) {
895             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
896             return ret;
897         }
898     }
899 
900     assert((uintptr_t)&env->slb_shadow_size
901            == ((uintptr_t)&env->slb_shadow_addr + 8));
902     reg.id = KVM_REG_PPC_VPA_SLB;
903     reg.addr = (uintptr_t)&env->slb_shadow_addr;
904     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905     if (ret < 0) {
906         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
907         return ret;
908     }
909 
910     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
911     reg.id = KVM_REG_PPC_VPA_DTL;
912     reg.addr = (uintptr_t)&env->dtl_addr;
913     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
914     if (ret < 0) {
915         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
916                 strerror(errno));
917         return ret;
918     }
919 
920     if (!env->vpa_addr) {
921         reg.id = KVM_REG_PPC_VPA_ADDR;
922         reg.addr = (uintptr_t)&env->vpa_addr;
923         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
924         if (ret < 0) {
925             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
926             return ret;
927         }
928     }
929 
930     return 0;
931 }
932 #endif /* TARGET_PPC64 */
933 
934 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
935 {
936     CPUPPCState *env = &cpu->env;
937     struct kvm_sregs sregs;
938     int i;
939 
940     sregs.pvr = env->spr[SPR_PVR];
941 
942     if (cpu->vhyp) {
943         PPCVirtualHypervisorClass *vhc =
944             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
945         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
946     } else {
947         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
948     }
949 
950     /* Sync SLB */
951 #ifdef TARGET_PPC64
952     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
953         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
954         if (env->slb[i].esid & SLB_ESID_V) {
955             sregs.u.s.ppc64.slb[i].slbe |= i;
956         }
957         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
958     }
959 #endif
960 
961     /* Sync SRs */
962     for (i = 0; i < 16; i++) {
963         sregs.u.s.ppc32.sr[i] = env->sr[i];
964     }
965 
966     /* Sync BATs */
967     for (i = 0; i < 8; i++) {
968         /* Beware. We have to swap upper and lower bits here */
969         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
970             | env->DBAT[1][i];
971         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
972             | env->IBAT[1][i];
973     }
974 
975     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
976 }
977 
978 int kvm_arch_put_registers(CPUState *cs, int level)
979 {
980     PowerPCCPU *cpu = POWERPC_CPU(cs);
981     CPUPPCState *env = &cpu->env;
982     struct kvm_regs regs;
983     int ret;
984     int i;
985 
986     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
987     if (ret < 0) {
988         return ret;
989     }
990 
991     regs.ctr = env->ctr;
992     regs.lr  = env->lr;
993     regs.xer = cpu_read_xer(env);
994     regs.msr = env->msr;
995     regs.pc = env->nip;
996 
997     regs.srr0 = env->spr[SPR_SRR0];
998     regs.srr1 = env->spr[SPR_SRR1];
999 
1000     regs.sprg0 = env->spr[SPR_SPRG0];
1001     regs.sprg1 = env->spr[SPR_SPRG1];
1002     regs.sprg2 = env->spr[SPR_SPRG2];
1003     regs.sprg3 = env->spr[SPR_SPRG3];
1004     regs.sprg4 = env->spr[SPR_SPRG4];
1005     regs.sprg5 = env->spr[SPR_SPRG5];
1006     regs.sprg6 = env->spr[SPR_SPRG6];
1007     regs.sprg7 = env->spr[SPR_SPRG7];
1008 
1009     regs.pid = env->spr[SPR_BOOKE_PID];
1010 
1011     for (i = 0;i < 32; i++)
1012         regs.gpr[i] = env->gpr[i];
1013 
1014     regs.cr = 0;
1015     for (i = 0; i < 8; i++) {
1016         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1017     }
1018 
1019     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1020     if (ret < 0)
1021         return ret;
1022 
1023     kvm_put_fp(cs);
1024 
1025     if (env->tlb_dirty) {
1026         kvm_sw_tlb_put(cpu);
1027         env->tlb_dirty = false;
1028     }
1029 
1030     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1031         ret = kvmppc_put_books_sregs(cpu);
1032         if (ret < 0) {
1033             return ret;
1034         }
1035     }
1036 
1037     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1038         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1039     }
1040 
1041     if (cap_one_reg) {
1042         int i;
1043 
1044         /* We deliberately ignore errors here, for kernels which have
1045          * the ONE_REG calls, but don't support the specific
1046          * registers, there's a reasonable chance things will still
1047          * work, at least until we try to migrate. */
1048         for (i = 0; i < 1024; i++) {
1049             uint64_t id = env->spr_cb[i].one_reg_id;
1050 
1051             if (id != 0) {
1052                 kvm_put_one_spr(cs, id, i);
1053             }
1054         }
1055 
1056 #ifdef TARGET_PPC64
1057         if (msr_ts) {
1058             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1059                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1060             }
1061             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1062                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1063             }
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1070             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1073             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1074         }
1075 
1076         if (cap_papr) {
1077             if (kvm_put_vpa(cs) < 0) {
1078                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1079             }
1080         }
1081 
1082         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1083 #endif /* TARGET_PPC64 */
1084     }
1085 
1086     return ret;
1087 }
1088 
1089 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1090 {
1091      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1092 }
1093 
1094 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1095 {
1096     CPUPPCState *env = &cpu->env;
1097     struct kvm_sregs sregs;
1098     int ret;
1099 
1100     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1101     if (ret < 0) {
1102         return ret;
1103     }
1104 
1105     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1106         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1107         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1108         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1109         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1110         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1111         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1112         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1113         env->spr[SPR_DECR] = sregs.u.e.dec;
1114         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1115         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1116         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1117     }
1118 
1119     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1120         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1121         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1122         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1123         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1124         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1125     }
1126 
1127     if (sregs.u.e.features & KVM_SREGS_E_64) {
1128         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1129     }
1130 
1131     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1132         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1133     }
1134 
1135     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1136         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1137         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1138         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1139         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1140         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1141         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1142         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1143         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1144         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1145         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1146         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1147         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1148         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1149         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1150         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1151         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1152         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1153         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1154         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1155         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1156         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1157         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1158         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1159         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1160         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1161         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1162         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1163         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1164         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1165         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1166         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1167         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1168 
1169         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1170             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1171             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1172             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1173             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1174             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1175             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1176         }
1177 
1178         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1179             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1180             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1181         }
1182 
1183         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1184             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1185             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1186             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1187             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1188         }
1189     }
1190 
1191     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1192         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1193         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1194         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1195         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1196         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1197         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1198         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1199         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1200         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1201         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1202     }
1203 
1204     if (sregs.u.e.features & KVM_SREGS_EXP) {
1205         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1206     }
1207 
1208     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1209         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1210         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1211     }
1212 
1213     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1214         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1215         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1216         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1217 
1218         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1219             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1220             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1221         }
1222     }
1223 
1224     return 0;
1225 }
1226 
1227 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1228 {
1229     CPUPPCState *env = &cpu->env;
1230     struct kvm_sregs sregs;
1231     int ret;
1232     int i;
1233 
1234     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1235     if (ret < 0) {
1236         return ret;
1237     }
1238 
1239     if (!cpu->vhyp) {
1240         ppc_store_sdr1(env, sregs.u.s.sdr1);
1241     }
1242 
1243     /* Sync SLB */
1244 #ifdef TARGET_PPC64
1245     /*
1246      * The packed SLB array we get from KVM_GET_SREGS only contains
1247      * information about valid entries. So we flush our internal copy
1248      * to get rid of stale ones, then put all valid SLB entries back
1249      * in.
1250      */
1251     memset(env->slb, 0, sizeof(env->slb));
1252     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1253         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1254         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1255         /*
1256          * Only restore valid entries
1257          */
1258         if (rb & SLB_ESID_V) {
1259             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1260         }
1261     }
1262 #endif
1263 
1264     /* Sync SRs */
1265     for (i = 0; i < 16; i++) {
1266         env->sr[i] = sregs.u.s.ppc32.sr[i];
1267     }
1268 
1269     /* Sync BATs */
1270     for (i = 0; i < 8; i++) {
1271         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1272         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1273         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1274         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1275     }
1276 
1277     return 0;
1278 }
1279 
1280 int kvm_arch_get_registers(CPUState *cs)
1281 {
1282     PowerPCCPU *cpu = POWERPC_CPU(cs);
1283     CPUPPCState *env = &cpu->env;
1284     struct kvm_regs regs;
1285     uint32_t cr;
1286     int i, ret;
1287 
1288     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1289     if (ret < 0)
1290         return ret;
1291 
1292     cr = regs.cr;
1293     for (i = 7; i >= 0; i--) {
1294         env->crf[i] = cr & 15;
1295         cr >>= 4;
1296     }
1297 
1298     env->ctr = regs.ctr;
1299     env->lr = regs.lr;
1300     cpu_write_xer(env, regs.xer);
1301     env->msr = regs.msr;
1302     env->nip = regs.pc;
1303 
1304     env->spr[SPR_SRR0] = regs.srr0;
1305     env->spr[SPR_SRR1] = regs.srr1;
1306 
1307     env->spr[SPR_SPRG0] = regs.sprg0;
1308     env->spr[SPR_SPRG1] = regs.sprg1;
1309     env->spr[SPR_SPRG2] = regs.sprg2;
1310     env->spr[SPR_SPRG3] = regs.sprg3;
1311     env->spr[SPR_SPRG4] = regs.sprg4;
1312     env->spr[SPR_SPRG5] = regs.sprg5;
1313     env->spr[SPR_SPRG6] = regs.sprg6;
1314     env->spr[SPR_SPRG7] = regs.sprg7;
1315 
1316     env->spr[SPR_BOOKE_PID] = regs.pid;
1317 
1318     for (i = 0;i < 32; i++)
1319         env->gpr[i] = regs.gpr[i];
1320 
1321     kvm_get_fp(cs);
1322 
1323     if (cap_booke_sregs) {
1324         ret = kvmppc_get_booke_sregs(cpu);
1325         if (ret < 0) {
1326             return ret;
1327         }
1328     }
1329 
1330     if (cap_segstate) {
1331         ret = kvmppc_get_books_sregs(cpu);
1332         if (ret < 0) {
1333             return ret;
1334         }
1335     }
1336 
1337     if (cap_hior) {
1338         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1339     }
1340 
1341     if (cap_one_reg) {
1342         int i;
1343 
1344         /* We deliberately ignore errors here, for kernels which have
1345          * the ONE_REG calls, but don't support the specific
1346          * registers, there's a reasonable chance things will still
1347          * work, at least until we try to migrate. */
1348         for (i = 0; i < 1024; i++) {
1349             uint64_t id = env->spr_cb[i].one_reg_id;
1350 
1351             if (id != 0) {
1352                 kvm_get_one_spr(cs, id, i);
1353             }
1354         }
1355 
1356 #ifdef TARGET_PPC64
1357         if (msr_ts) {
1358             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1359                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1360             }
1361             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1362                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1363             }
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1370             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1373             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1374         }
1375 
1376         if (cap_papr) {
1377             if (kvm_get_vpa(cs) < 0) {
1378                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1379             }
1380         }
1381 
1382         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1383 #endif
1384     }
1385 
1386     return 0;
1387 }
1388 
1389 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1390 {
1391     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1392 
1393     if (irq != PPC_INTERRUPT_EXT) {
1394         return 0;
1395     }
1396 
1397     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1398         return 0;
1399     }
1400 
1401     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1402 
1403     return 0;
1404 }
1405 
1406 #if defined(TARGET_PPCEMB)
1407 #define PPC_INPUT_INT PPC40x_INPUT_INT
1408 #elif defined(TARGET_PPC64)
1409 #define PPC_INPUT_INT PPC970_INPUT_INT
1410 #else
1411 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1412 #endif
1413 
1414 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1415 {
1416     PowerPCCPU *cpu = POWERPC_CPU(cs);
1417     CPUPPCState *env = &cpu->env;
1418     int r;
1419     unsigned irq;
1420 
1421     qemu_mutex_lock_iothread();
1422 
1423     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1424      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1425     if (!cap_interrupt_level &&
1426         run->ready_for_interrupt_injection &&
1427         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1428         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1429     {
1430         /* For now KVM disregards the 'irq' argument. However, in the
1431          * future KVM could cache it in-kernel to avoid a heavyweight exit
1432          * when reading the UIC.
1433          */
1434         irq = KVM_INTERRUPT_SET;
1435 
1436         DPRINTF("injected interrupt %d\n", irq);
1437         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1438         if (r < 0) {
1439             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1440         }
1441 
1442         /* Always wake up soon in case the interrupt was level based */
1443         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1444                        (NANOSECONDS_PER_SECOND / 50));
1445     }
1446 
1447     /* We don't know if there are more interrupts pending after this. However,
1448      * the guest will return to userspace in the course of handling this one
1449      * anyways, so we will get a chance to deliver the rest. */
1450 
1451     qemu_mutex_unlock_iothread();
1452 }
1453 
1454 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1455 {
1456     return MEMTXATTRS_UNSPECIFIED;
1457 }
1458 
1459 int kvm_arch_process_async_events(CPUState *cs)
1460 {
1461     return cs->halted;
1462 }
1463 
1464 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1465 {
1466     CPUState *cs = CPU(cpu);
1467     CPUPPCState *env = &cpu->env;
1468 
1469     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1470         cs->halted = 1;
1471         cs->exception_index = EXCP_HLT;
1472     }
1473 
1474     return 0;
1475 }
1476 
1477 /* map dcr access to existing qemu dcr emulation */
1478 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1479 {
1480     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1481         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1482 
1483     return 0;
1484 }
1485 
1486 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1487 {
1488     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1489         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1490 
1491     return 0;
1492 }
1493 
1494 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1495 {
1496     /* Mixed endian case is not handled */
1497     uint32_t sc = debug_inst_opcode;
1498 
1499     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500                             sizeof(sc), 0) ||
1501         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1502         return -EINVAL;
1503     }
1504 
1505     return 0;
1506 }
1507 
1508 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1509 {
1510     uint32_t sc;
1511 
1512     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1513         sc != debug_inst_opcode ||
1514         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1515                             sizeof(sc), 1)) {
1516         return -EINVAL;
1517     }
1518 
1519     return 0;
1520 }
1521 
1522 static int find_hw_breakpoint(target_ulong addr, int type)
1523 {
1524     int n;
1525 
1526     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1527            <= ARRAY_SIZE(hw_debug_points));
1528 
1529     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1530         if (hw_debug_points[n].addr == addr &&
1531              hw_debug_points[n].type == type) {
1532             return n;
1533         }
1534     }
1535 
1536     return -1;
1537 }
1538 
1539 static int find_hw_watchpoint(target_ulong addr, int *flag)
1540 {
1541     int n;
1542 
1543     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1544     if (n >= 0) {
1545         *flag = BP_MEM_ACCESS;
1546         return n;
1547     }
1548 
1549     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1550     if (n >= 0) {
1551         *flag = BP_MEM_WRITE;
1552         return n;
1553     }
1554 
1555     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1556     if (n >= 0) {
1557         *flag = BP_MEM_READ;
1558         return n;
1559     }
1560 
1561     return -1;
1562 }
1563 
1564 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1565                                   target_ulong len, int type)
1566 {
1567     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1568         return -ENOBUFS;
1569     }
1570 
1571     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1572     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1573 
1574     switch (type) {
1575     case GDB_BREAKPOINT_HW:
1576         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1577             return -ENOBUFS;
1578         }
1579 
1580         if (find_hw_breakpoint(addr, type) >= 0) {
1581             return -EEXIST;
1582         }
1583 
1584         nb_hw_breakpoint++;
1585         break;
1586 
1587     case GDB_WATCHPOINT_WRITE:
1588     case GDB_WATCHPOINT_READ:
1589     case GDB_WATCHPOINT_ACCESS:
1590         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1591             return -ENOBUFS;
1592         }
1593 
1594         if (find_hw_breakpoint(addr, type) >= 0) {
1595             return -EEXIST;
1596         }
1597 
1598         nb_hw_watchpoint++;
1599         break;
1600 
1601     default:
1602         return -ENOSYS;
1603     }
1604 
1605     return 0;
1606 }
1607 
1608 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1609                                   target_ulong len, int type)
1610 {
1611     int n;
1612 
1613     n = find_hw_breakpoint(addr, type);
1614     if (n < 0) {
1615         return -ENOENT;
1616     }
1617 
1618     switch (type) {
1619     case GDB_BREAKPOINT_HW:
1620         nb_hw_breakpoint--;
1621         break;
1622 
1623     case GDB_WATCHPOINT_WRITE:
1624     case GDB_WATCHPOINT_READ:
1625     case GDB_WATCHPOINT_ACCESS:
1626         nb_hw_watchpoint--;
1627         break;
1628 
1629     default:
1630         return -ENOSYS;
1631     }
1632     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1633 
1634     return 0;
1635 }
1636 
1637 void kvm_arch_remove_all_hw_breakpoints(void)
1638 {
1639     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1640 }
1641 
1642 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1643 {
1644     int n;
1645 
1646     /* Software Breakpoint updates */
1647     if (kvm_sw_breakpoints_active(cs)) {
1648         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1649     }
1650 
1651     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1652            <= ARRAY_SIZE(hw_debug_points));
1653     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1654 
1655     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1656         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1657         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1658         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1659             switch (hw_debug_points[n].type) {
1660             case GDB_BREAKPOINT_HW:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1662                 break;
1663             case GDB_WATCHPOINT_WRITE:
1664                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1665                 break;
1666             case GDB_WATCHPOINT_READ:
1667                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1668                 break;
1669             case GDB_WATCHPOINT_ACCESS:
1670                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1671                                         KVMPPC_DEBUG_WATCH_READ;
1672                 break;
1673             default:
1674                 cpu_abort(cs, "Unsupported breakpoint type\n");
1675             }
1676             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1677         }
1678     }
1679 }
1680 
1681 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1682 {
1683     CPUState *cs = CPU(cpu);
1684     CPUPPCState *env = &cpu->env;
1685     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1686     int handle = 0;
1687     int n;
1688     int flag = 0;
1689 
1690     if (cs->singlestep_enabled) {
1691         handle = 1;
1692     } else if (arch_info->status) {
1693         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1694             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1695                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1696                 if (n >= 0) {
1697                     handle = 1;
1698                 }
1699             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1700                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1701                 n = find_hw_watchpoint(arch_info->address,  &flag);
1702                 if (n >= 0) {
1703                     handle = 1;
1704                     cs->watchpoint_hit = &hw_watchpoint;
1705                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1706                     hw_watchpoint.flags = flag;
1707                 }
1708             }
1709         }
1710     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1711         handle = 1;
1712     } else {
1713         /* QEMU is not able to handle debug exception, so inject
1714          * program exception to guest;
1715          * Yes program exception NOT debug exception !!
1716          * When QEMU is using debug resources then debug exception must
1717          * be always set. To achieve this we set MSR_DE and also set
1718          * MSRP_DEP so guest cannot change MSR_DE.
1719          * When emulating debug resource for guest we want guest
1720          * to control MSR_DE (enable/disable debug interrupt on need).
1721          * Supporting both configurations are NOT possible.
1722          * So the result is that we cannot share debug resources
1723          * between QEMU and Guest on BOOKE architecture.
1724          * In the current design QEMU gets the priority over guest,
1725          * this means that if QEMU is using debug resources then guest
1726          * cannot use them;
1727          * For software breakpoint QEMU uses a privileged instruction;
1728          * So there cannot be any reason that we are here for guest
1729          * set debug exception, only possibility is guest executed a
1730          * privileged / illegal instruction and that's why we are
1731          * injecting a program interrupt.
1732          */
1733 
1734         cpu_synchronize_state(cs);
1735         /* env->nip is PC, so increment this by 4 to use
1736          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1737          */
1738         env->nip += 4;
1739         cs->exception_index = POWERPC_EXCP_PROGRAM;
1740         env->error_code = POWERPC_EXCP_INVAL;
1741         ppc_cpu_do_interrupt(cs);
1742     }
1743 
1744     return handle;
1745 }
1746 
1747 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1748 {
1749     PowerPCCPU *cpu = POWERPC_CPU(cs);
1750     CPUPPCState *env = &cpu->env;
1751     int ret;
1752 
1753     qemu_mutex_lock_iothread();
1754 
1755     switch (run->exit_reason) {
1756     case KVM_EXIT_DCR:
1757         if (run->dcr.is_write) {
1758             DPRINTF("handle dcr write\n");
1759             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1760         } else {
1761             DPRINTF("handle dcr read\n");
1762             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1763         }
1764         break;
1765     case KVM_EXIT_HLT:
1766         DPRINTF("handle halt\n");
1767         ret = kvmppc_handle_halt(cpu);
1768         break;
1769 #if defined(TARGET_PPC64)
1770     case KVM_EXIT_PAPR_HCALL:
1771         DPRINTF("handle PAPR hypercall\n");
1772         run->papr_hcall.ret = spapr_hypercall(cpu,
1773                                               run->papr_hcall.nr,
1774                                               run->papr_hcall.args);
1775         ret = 0;
1776         break;
1777 #endif
1778     case KVM_EXIT_EPR:
1779         DPRINTF("handle epr\n");
1780         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1781         ret = 0;
1782         break;
1783     case KVM_EXIT_WATCHDOG:
1784         DPRINTF("handle watchdog expiry\n");
1785         watchdog_perform_action();
1786         ret = 0;
1787         break;
1788 
1789     case KVM_EXIT_DEBUG:
1790         DPRINTF("handle debug exception\n");
1791         if (kvm_handle_debug(cpu, run)) {
1792             ret = EXCP_DEBUG;
1793             break;
1794         }
1795         /* re-enter, this exception was guest-internal */
1796         ret = 0;
1797         break;
1798 
1799     default:
1800         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1801         ret = -1;
1802         break;
1803     }
1804 
1805     qemu_mutex_unlock_iothread();
1806     return ret;
1807 }
1808 
1809 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1810 {
1811     CPUState *cs = CPU(cpu);
1812     uint32_t bits = tsr_bits;
1813     struct kvm_one_reg reg = {
1814         .id = KVM_REG_PPC_OR_TSR,
1815         .addr = (uintptr_t) &bits,
1816     };
1817 
1818     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1819 }
1820 
1821 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1822 {
1823 
1824     CPUState *cs = CPU(cpu);
1825     uint32_t bits = tsr_bits;
1826     struct kvm_one_reg reg = {
1827         .id = KVM_REG_PPC_CLEAR_TSR,
1828         .addr = (uintptr_t) &bits,
1829     };
1830 
1831     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1832 }
1833 
1834 int kvmppc_set_tcr(PowerPCCPU *cpu)
1835 {
1836     CPUState *cs = CPU(cpu);
1837     CPUPPCState *env = &cpu->env;
1838     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1839 
1840     struct kvm_one_reg reg = {
1841         .id = KVM_REG_PPC_TCR,
1842         .addr = (uintptr_t) &tcr,
1843     };
1844 
1845     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1846 }
1847 
1848 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1849 {
1850     CPUState *cs = CPU(cpu);
1851     int ret;
1852 
1853     if (!kvm_enabled()) {
1854         return -1;
1855     }
1856 
1857     if (!cap_ppc_watchdog) {
1858         printf("warning: KVM does not support watchdog");
1859         return -1;
1860     }
1861 
1862     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1863     if (ret < 0) {
1864         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1865                 __func__, strerror(-ret));
1866         return ret;
1867     }
1868 
1869     return ret;
1870 }
1871 
1872 static int read_cpuinfo(const char *field, char *value, int len)
1873 {
1874     FILE *f;
1875     int ret = -1;
1876     int field_len = strlen(field);
1877     char line[512];
1878 
1879     f = fopen("/proc/cpuinfo", "r");
1880     if (!f) {
1881         return -1;
1882     }
1883 
1884     do {
1885         if (!fgets(line, sizeof(line), f)) {
1886             break;
1887         }
1888         if (!strncmp(line, field, field_len)) {
1889             pstrcpy(value, len, line);
1890             ret = 0;
1891             break;
1892         }
1893     } while(*line);
1894 
1895     fclose(f);
1896 
1897     return ret;
1898 }
1899 
1900 uint32_t kvmppc_get_tbfreq(void)
1901 {
1902     char line[512];
1903     char *ns;
1904     uint32_t retval = NANOSECONDS_PER_SECOND;
1905 
1906     if (read_cpuinfo("timebase", line, sizeof(line))) {
1907         return retval;
1908     }
1909 
1910     if (!(ns = strchr(line, ':'))) {
1911         return retval;
1912     }
1913 
1914     ns++;
1915 
1916     return atoi(ns);
1917 }
1918 
1919 bool kvmppc_get_host_serial(char **value)
1920 {
1921     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1922                                NULL);
1923 }
1924 
1925 bool kvmppc_get_host_model(char **value)
1926 {
1927     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1928 }
1929 
1930 /* Try to find a device tree node for a CPU with clock-frequency property */
1931 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1932 {
1933     struct dirent *dirp;
1934     DIR *dp;
1935 
1936     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1937         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1938         return -1;
1939     }
1940 
1941     buf[0] = '\0';
1942     while ((dirp = readdir(dp)) != NULL) {
1943         FILE *f;
1944         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1945                  dirp->d_name);
1946         f = fopen(buf, "r");
1947         if (f) {
1948             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1949             fclose(f);
1950             break;
1951         }
1952         buf[0] = '\0';
1953     }
1954     closedir(dp);
1955     if (buf[0] == '\0') {
1956         printf("Unknown host!\n");
1957         return -1;
1958     }
1959 
1960     return 0;
1961 }
1962 
1963 static uint64_t kvmppc_read_int_dt(const char *filename)
1964 {
1965     union {
1966         uint32_t v32;
1967         uint64_t v64;
1968     } u;
1969     FILE *f;
1970     int len;
1971 
1972     f = fopen(filename, "rb");
1973     if (!f) {
1974         return -1;
1975     }
1976 
1977     len = fread(&u, 1, sizeof(u), f);
1978     fclose(f);
1979     switch (len) {
1980     case 4:
1981         /* property is a 32-bit quantity */
1982         return be32_to_cpu(u.v32);
1983     case 8:
1984         return be64_to_cpu(u.v64);
1985     }
1986 
1987     return 0;
1988 }
1989 
1990 /* Read a CPU node property from the host device tree that's a single
1991  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1992  * (can't find or open the property, or doesn't understand the
1993  * format) */
1994 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1995 {
1996     char buf[PATH_MAX], *tmp;
1997     uint64_t val;
1998 
1999     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2000         return -1;
2001     }
2002 
2003     tmp = g_strdup_printf("%s/%s", buf, propname);
2004     val = kvmppc_read_int_dt(tmp);
2005     g_free(tmp);
2006 
2007     return val;
2008 }
2009 
2010 uint64_t kvmppc_get_clockfreq(void)
2011 {
2012     return kvmppc_read_int_cpu_dt("clock-frequency");
2013 }
2014 
2015 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2016  {
2017      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2018      CPUState *cs = CPU(cpu);
2019 
2020     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2021         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2022         return 0;
2023     }
2024 
2025     return 1;
2026 }
2027 
2028 int kvmppc_get_hasidle(CPUPPCState *env)
2029 {
2030     struct kvm_ppc_pvinfo pvinfo;
2031 
2032     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2033         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2034         return 1;
2035     }
2036 
2037     return 0;
2038 }
2039 
2040 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2041 {
2042     uint32_t *hc = (uint32_t*)buf;
2043     struct kvm_ppc_pvinfo pvinfo;
2044 
2045     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2046         memcpy(buf, pvinfo.hcall, buf_len);
2047         return 0;
2048     }
2049 
2050     /*
2051      * Fallback to always fail hypercalls regardless of endianness:
2052      *
2053      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2054      *     li r3, -1
2055      *     b .+8       (becomes nop in wrong endian)
2056      *     bswap32(li r3, -1)
2057      */
2058 
2059     hc[0] = cpu_to_be32(0x08000048);
2060     hc[1] = cpu_to_be32(0x3860ffff);
2061     hc[2] = cpu_to_be32(0x48000008);
2062     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2063 
2064     return 1;
2065 }
2066 
2067 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2068 {
2069     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2070 }
2071 
2072 void kvmppc_enable_logical_ci_hcalls(void)
2073 {
2074     /*
2075      * FIXME: it would be nice if we could detect the cases where
2076      * we're using a device which requires the in kernel
2077      * implementation of these hcalls, but the kernel lacks them and
2078      * produce a warning.
2079      */
2080     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2081     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2082 }
2083 
2084 void kvmppc_enable_set_mode_hcall(void)
2085 {
2086     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2087 }
2088 
2089 void kvmppc_enable_clear_ref_mod_hcalls(void)
2090 {
2091     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2092     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2093 }
2094 
2095 void kvmppc_set_papr(PowerPCCPU *cpu)
2096 {
2097     CPUState *cs = CPU(cpu);
2098     int ret;
2099 
2100     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2101     if (ret) {
2102         error_report("This vCPU type or KVM version does not support PAPR");
2103         exit(1);
2104     }
2105 
2106     /* Update the capability flag so we sync the right information
2107      * with kvm */
2108     cap_papr = 1;
2109 }
2110 
2111 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2112 {
2113     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2114 }
2115 
2116 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2117 {
2118     CPUState *cs = CPU(cpu);
2119     int ret;
2120 
2121     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2122     if (ret && mpic_proxy) {
2123         error_report("This KVM version does not support EPR");
2124         exit(1);
2125     }
2126 }
2127 
2128 int kvmppc_smt_threads(void)
2129 {
2130     return cap_ppc_smt ? cap_ppc_smt : 1;
2131 }
2132 
2133 int kvmppc_set_smt_threads(int smt)
2134 {
2135     int ret;
2136 
2137     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2138     if (!ret) {
2139         cap_ppc_smt = smt;
2140     }
2141     return ret;
2142 }
2143 
2144 void kvmppc_hint_smt_possible(Error **errp)
2145 {
2146     int i;
2147     GString *g;
2148     char *s;
2149 
2150     assert(kvm_enabled());
2151     if (cap_ppc_smt_possible) {
2152         g = g_string_new("Available VSMT modes:");
2153         for (i = 63; i >= 0; i--) {
2154             if ((1UL << i) & cap_ppc_smt_possible) {
2155                 g_string_append_printf(g, " %lu", (1UL << i));
2156             }
2157         }
2158         s = g_string_free(g, false);
2159         error_append_hint(errp, "%s.\n", s);
2160         g_free(s);
2161     } else {
2162         error_append_hint(errp,
2163                           "This KVM seems to be too old to support VSMT.\n");
2164     }
2165 }
2166 
2167 
2168 #ifdef TARGET_PPC64
2169 off_t kvmppc_alloc_rma(void **rma)
2170 {
2171     off_t size;
2172     int fd;
2173     struct kvm_allocate_rma ret;
2174 
2175     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2176      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2177      *                      not necessary on this hardware
2178      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2179      *
2180      * FIXME: We should allow the user to force contiguous RMA
2181      * allocation in the cap_ppc_rma==1 case.
2182      */
2183     if (cap_ppc_rma < 2) {
2184         return 0;
2185     }
2186 
2187     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2188     if (fd < 0) {
2189         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2190                 strerror(errno));
2191         return -1;
2192     }
2193 
2194     size = MIN(ret.rma_size, 256ul << 20);
2195 
2196     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2197     if (*rma == MAP_FAILED) {
2198         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2199         return -1;
2200     };
2201 
2202     return size;
2203 }
2204 
2205 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2206 {
2207     struct kvm_ppc_smmu_info info;
2208     long rampagesize, best_page_shift;
2209     int i;
2210 
2211     if (cap_ppc_rma >= 2) {
2212         return current_size;
2213     }
2214 
2215     /* Find the largest hardware supported page size that's less than
2216      * or equal to the (logical) backing page size of guest RAM */
2217     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2218     rampagesize = qemu_getrampagesize();
2219     best_page_shift = 0;
2220 
2221     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2222         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2223 
2224         if (!sps->page_shift) {
2225             continue;
2226         }
2227 
2228         if ((sps->page_shift > best_page_shift)
2229             && ((1UL << sps->page_shift) <= rampagesize)) {
2230             best_page_shift = sps->page_shift;
2231         }
2232     }
2233 
2234     return MIN(current_size,
2235                1ULL << (best_page_shift + hash_shift - 7));
2236 }
2237 #endif
2238 
2239 bool kvmppc_spapr_use_multitce(void)
2240 {
2241     return cap_spapr_multitce;
2242 }
2243 
2244 int kvmppc_spapr_enable_inkernel_multitce(void)
2245 {
2246     int ret;
2247 
2248     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2249                             H_PUT_TCE_INDIRECT, 1);
2250     if (!ret) {
2251         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2252                                 H_STUFF_TCE, 1);
2253     }
2254 
2255     return ret;
2256 }
2257 
2258 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2259                               uint64_t bus_offset, uint32_t nb_table,
2260                               int *pfd, bool need_vfio)
2261 {
2262     long len;
2263     int fd;
2264     void *table;
2265 
2266     /* Must set fd to -1 so we don't try to munmap when called for
2267      * destroying the table, which the upper layers -will- do
2268      */
2269     *pfd = -1;
2270     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2271         return NULL;
2272     }
2273 
2274     if (cap_spapr_tce_64) {
2275         struct kvm_create_spapr_tce_64 args = {
2276             .liobn = liobn,
2277             .page_shift = page_shift,
2278             .offset = bus_offset >> page_shift,
2279             .size = nb_table,
2280             .flags = 0
2281         };
2282         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2283         if (fd < 0) {
2284             fprintf(stderr,
2285                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2286                     liobn);
2287             return NULL;
2288         }
2289     } else if (cap_spapr_tce) {
2290         uint64_t window_size = (uint64_t) nb_table << page_shift;
2291         struct kvm_create_spapr_tce args = {
2292             .liobn = liobn,
2293             .window_size = window_size,
2294         };
2295         if ((window_size != args.window_size) || bus_offset) {
2296             return NULL;
2297         }
2298         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2299         if (fd < 0) {
2300             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2301                     liobn);
2302             return NULL;
2303         }
2304     } else {
2305         return NULL;
2306     }
2307 
2308     len = nb_table * sizeof(uint64_t);
2309     /* FIXME: round this up to page size */
2310 
2311     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2312     if (table == MAP_FAILED) {
2313         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2314                 liobn);
2315         close(fd);
2316         return NULL;
2317     }
2318 
2319     *pfd = fd;
2320     return table;
2321 }
2322 
2323 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2324 {
2325     long len;
2326 
2327     if (fd < 0) {
2328         return -1;
2329     }
2330 
2331     len = nb_table * sizeof(uint64_t);
2332     if ((munmap(table, len) < 0) ||
2333         (close(fd) < 0)) {
2334         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2335                 strerror(errno));
2336         /* Leak the table */
2337     }
2338 
2339     return 0;
2340 }
2341 
2342 int kvmppc_reset_htab(int shift_hint)
2343 {
2344     uint32_t shift = shift_hint;
2345 
2346     if (!kvm_enabled()) {
2347         /* Full emulation, tell caller to allocate htab itself */
2348         return 0;
2349     }
2350     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2351         int ret;
2352         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2353         if (ret == -ENOTTY) {
2354             /* At least some versions of PR KVM advertise the
2355              * capability, but don't implement the ioctl().  Oops.
2356              * Return 0 so that we allocate the htab in qemu, as is
2357              * correct for PR. */
2358             return 0;
2359         } else if (ret < 0) {
2360             return ret;
2361         }
2362         return shift;
2363     }
2364 
2365     /* We have a kernel that predates the htab reset calls.  For PR
2366      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2367      * this era, it has allocated a 16MB fixed size hash table already. */
2368     if (kvmppc_is_pr(kvm_state)) {
2369         /* PR - tell caller to allocate htab */
2370         return 0;
2371     } else {
2372         /* HV - assume 16MB kernel allocated htab */
2373         return 24;
2374     }
2375 }
2376 
2377 static inline uint32_t mfpvr(void)
2378 {
2379     uint32_t pvr;
2380 
2381     asm ("mfpvr %0"
2382          : "=r"(pvr));
2383     return pvr;
2384 }
2385 
2386 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2387 {
2388     if (on) {
2389         *word |= flags;
2390     } else {
2391         *word &= ~flags;
2392     }
2393 }
2394 
2395 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2396 {
2397     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2398     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2399     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2400 
2401     /* Now fix up the class with information we can query from the host */
2402     pcc->pvr = mfpvr();
2403 
2404     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2405                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2406     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2407                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2408     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2409                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2410 
2411     if (dcache_size != -1) {
2412         pcc->l1_dcache_size = dcache_size;
2413     }
2414 
2415     if (icache_size != -1) {
2416         pcc->l1_icache_size = icache_size;
2417     }
2418 
2419 #if defined(TARGET_PPC64)
2420     pcc->radix_page_info = kvm_get_radix_page_info();
2421 
2422     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2423         /*
2424          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2425          * compliant.  More importantly, advertising ISA 3.00
2426          * architected mode may prevent guests from activating
2427          * necessary DD1 workarounds.
2428          */
2429         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2430                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2431     }
2432 #endif /* defined(TARGET_PPC64) */
2433 }
2434 
2435 bool kvmppc_has_cap_epr(void)
2436 {
2437     return cap_epr;
2438 }
2439 
2440 bool kvmppc_has_cap_fixup_hcalls(void)
2441 {
2442     return cap_fixup_hcalls;
2443 }
2444 
2445 bool kvmppc_has_cap_htm(void)
2446 {
2447     return cap_htm;
2448 }
2449 
2450 bool kvmppc_has_cap_mmu_radix(void)
2451 {
2452     return cap_mmu_radix;
2453 }
2454 
2455 bool kvmppc_has_cap_mmu_hash_v3(void)
2456 {
2457     return cap_mmu_hash_v3;
2458 }
2459 
2460 static void kvmppc_get_cpu_characteristics(KVMState *s)
2461 {
2462     struct kvm_ppc_cpu_char c;
2463     int ret;
2464 
2465     /* Assume broken */
2466     cap_ppc_safe_cache = 0;
2467     cap_ppc_safe_bounds_check = 0;
2468     cap_ppc_safe_indirect_branch = 0;
2469 
2470     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2471     if (!ret) {
2472         return;
2473     }
2474     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2475     if (ret < 0) {
2476         return;
2477     }
2478     /* Parse and set cap_ppc_safe_cache */
2479     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2480         cap_ppc_safe_cache = 2;
2481     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2482                (c.character & c.character_mask
2483                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2484         cap_ppc_safe_cache = 1;
2485     }
2486     /* Parse and set cap_ppc_safe_bounds_check */
2487     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2488         cap_ppc_safe_bounds_check = 2;
2489     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2490         cap_ppc_safe_bounds_check = 1;
2491     }
2492     /* Parse and set cap_ppc_safe_indirect_branch */
2493     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2494         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2495     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2496         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2497     }
2498 }
2499 
2500 int kvmppc_get_cap_safe_cache(void)
2501 {
2502     return cap_ppc_safe_cache;
2503 }
2504 
2505 int kvmppc_get_cap_safe_bounds_check(void)
2506 {
2507     return cap_ppc_safe_bounds_check;
2508 }
2509 
2510 int kvmppc_get_cap_safe_indirect_branch(void)
2511 {
2512     return cap_ppc_safe_indirect_branch;
2513 }
2514 
2515 bool kvmppc_has_cap_spapr_vfio(void)
2516 {
2517     return cap_spapr_vfio;
2518 }
2519 
2520 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2521 {
2522     uint32_t host_pvr = mfpvr();
2523     PowerPCCPUClass *pvr_pcc;
2524 
2525     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2526     if (pvr_pcc == NULL) {
2527         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2528     }
2529 
2530     return pvr_pcc;
2531 }
2532 
2533 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2534 {
2535     TypeInfo type_info = {
2536         .name = TYPE_HOST_POWERPC_CPU,
2537         .class_init = kvmppc_host_cpu_class_init,
2538     };
2539     MachineClass *mc = MACHINE_GET_CLASS(ms);
2540     PowerPCCPUClass *pvr_pcc;
2541     ObjectClass *oc;
2542     DeviceClass *dc;
2543     int i;
2544 
2545     pvr_pcc = kvm_ppc_get_host_cpu_class();
2546     if (pvr_pcc == NULL) {
2547         return -1;
2548     }
2549     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2550     type_register(&type_info);
2551     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2552         /* override TCG default cpu type with 'host' cpu model */
2553         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2554     }
2555 
2556     oc = object_class_by_name(type_info.name);
2557     g_assert(oc);
2558 
2559     /*
2560      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2561      * we want "POWER8" to be a "family" alias that points to the current
2562      * host CPU type, too)
2563      */
2564     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2565     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2566         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2567             char *suffix;
2568 
2569             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2570             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2571             if (suffix) {
2572                 *suffix = 0;
2573             }
2574             break;
2575         }
2576     }
2577 
2578     return 0;
2579 }
2580 
2581 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2582 {
2583     struct kvm_rtas_token_args args = {
2584         .token = token,
2585     };
2586 
2587     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2588         return -ENOENT;
2589     }
2590 
2591     strncpy(args.name, function, sizeof(args.name));
2592 
2593     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2594 }
2595 
2596 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2597 {
2598     struct kvm_get_htab_fd s = {
2599         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2600         .start_index = index,
2601     };
2602     int ret;
2603 
2604     if (!cap_htab_fd) {
2605         error_setg(errp, "KVM version doesn't support %s the HPT",
2606                    write ? "writing" : "reading");
2607         return -ENOTSUP;
2608     }
2609 
2610     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2611     if (ret < 0) {
2612         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2613                    write ? "writing" : "reading", write ? "to" : "from",
2614                    strerror(errno));
2615         return -errno;
2616     }
2617 
2618     return ret;
2619 }
2620 
2621 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2622 {
2623     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2624     uint8_t buf[bufsize];
2625     ssize_t rc;
2626 
2627     do {
2628         rc = read(fd, buf, bufsize);
2629         if (rc < 0) {
2630             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2631                     strerror(errno));
2632             return rc;
2633         } else if (rc) {
2634             uint8_t *buffer = buf;
2635             ssize_t n = rc;
2636             while (n) {
2637                 struct kvm_get_htab_header *head =
2638                     (struct kvm_get_htab_header *) buffer;
2639                 size_t chunksize = sizeof(*head) +
2640                      HASH_PTE_SIZE_64 * head->n_valid;
2641 
2642                 qemu_put_be32(f, head->index);
2643                 qemu_put_be16(f, head->n_valid);
2644                 qemu_put_be16(f, head->n_invalid);
2645                 qemu_put_buffer(f, (void *)(head + 1),
2646                                 HASH_PTE_SIZE_64 * head->n_valid);
2647 
2648                 buffer += chunksize;
2649                 n -= chunksize;
2650             }
2651         }
2652     } while ((rc != 0)
2653              && ((max_ns < 0)
2654                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2655 
2656     return (rc == 0) ? 1 : 0;
2657 }
2658 
2659 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2660                            uint16_t n_valid, uint16_t n_invalid)
2661 {
2662     struct kvm_get_htab_header *buf;
2663     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2664     ssize_t rc;
2665 
2666     buf = alloca(chunksize);
2667     buf->index = index;
2668     buf->n_valid = n_valid;
2669     buf->n_invalid = n_invalid;
2670 
2671     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2672 
2673     rc = write(fd, buf, chunksize);
2674     if (rc < 0) {
2675         fprintf(stderr, "Error writing KVM hash table: %s\n",
2676                 strerror(errno));
2677         return rc;
2678     }
2679     if (rc != chunksize) {
2680         /* We should never get a short write on a single chunk */
2681         fprintf(stderr, "Short write, restoring KVM hash table\n");
2682         return -1;
2683     }
2684     return 0;
2685 }
2686 
2687 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2688 {
2689     return true;
2690 }
2691 
2692 void kvm_arch_init_irq_routing(KVMState *s)
2693 {
2694 }
2695 
2696 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2697 {
2698     int fd, rc;
2699     int i;
2700 
2701     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2702 
2703     i = 0;
2704     while (i < n) {
2705         struct kvm_get_htab_header *hdr;
2706         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2707         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2708 
2709         rc = read(fd, buf, sizeof(buf));
2710         if (rc < 0) {
2711             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2712         }
2713 
2714         hdr = (struct kvm_get_htab_header *)buf;
2715         while ((i < n) && ((char *)hdr < (buf + rc))) {
2716             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2717 
2718             if (hdr->index != (ptex + i)) {
2719                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2720                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2721             }
2722 
2723             if (n - i < valid) {
2724                 valid = n - i;
2725             }
2726             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2727             i += valid;
2728 
2729             if ((n - i) < invalid) {
2730                 invalid = n - i;
2731             }
2732             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2733             i += invalid;
2734 
2735             hdr = (struct kvm_get_htab_header *)
2736                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2737         }
2738     }
2739 
2740     close(fd);
2741 }
2742 
2743 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2744 {
2745     int fd, rc;
2746     struct {
2747         struct kvm_get_htab_header hdr;
2748         uint64_t pte0;
2749         uint64_t pte1;
2750     } buf;
2751 
2752     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2753 
2754     buf.hdr.n_valid = 1;
2755     buf.hdr.n_invalid = 0;
2756     buf.hdr.index = ptex;
2757     buf.pte0 = cpu_to_be64(pte0);
2758     buf.pte1 = cpu_to_be64(pte1);
2759 
2760     rc = write(fd, &buf, sizeof(buf));
2761     if (rc != sizeof(buf)) {
2762         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2763     }
2764     close(fd);
2765 }
2766 
2767 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2768                              uint64_t address, uint32_t data, PCIDevice *dev)
2769 {
2770     return 0;
2771 }
2772 
2773 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2774                                 int vector, PCIDevice *dev)
2775 {
2776     return 0;
2777 }
2778 
2779 int kvm_arch_release_virq_post(int virq)
2780 {
2781     return 0;
2782 }
2783 
2784 int kvm_arch_msi_data_to_gsi(uint32_t data)
2785 {
2786     return data & 0xffff;
2787 }
2788 
2789 int kvmppc_enable_hwrng(void)
2790 {
2791     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2792         return -1;
2793     }
2794 
2795     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2796 }
2797 
2798 void kvmppc_check_papr_resize_hpt(Error **errp)
2799 {
2800     if (!kvm_enabled()) {
2801         return; /* No KVM, we're good */
2802     }
2803 
2804     if (cap_resize_hpt) {
2805         return; /* Kernel has explicit support, we're good */
2806     }
2807 
2808     /* Otherwise fallback on looking for PR KVM */
2809     if (kvmppc_is_pr(kvm_state)) {
2810         return;
2811     }
2812 
2813     error_setg(errp,
2814                "Hash page table resizing not available with this KVM version");
2815 }
2816 
2817 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2818 {
2819     CPUState *cs = CPU(cpu);
2820     struct kvm_ppc_resize_hpt rhpt = {
2821         .flags = flags,
2822         .shift = shift,
2823     };
2824 
2825     if (!cap_resize_hpt) {
2826         return -ENOSYS;
2827     }
2828 
2829     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2830 }
2831 
2832 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2833 {
2834     CPUState *cs = CPU(cpu);
2835     struct kvm_ppc_resize_hpt rhpt = {
2836         .flags = flags,
2837         .shift = shift,
2838     };
2839 
2840     if (!cap_resize_hpt) {
2841         return -ENOSYS;
2842     }
2843 
2844     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2845 }
2846 
2847 /*
2848  * This is a helper function to detect a post migration scenario
2849  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2850  * the guest kernel can't handle a PVR value other than the actual host
2851  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2852  *
2853  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2854  * (so, we're HV), return true. The workaround itself is done in
2855  * cpu_post_load.
2856  *
2857  * The order here is important: we'll only check for KVM PR as a
2858  * fallback if the guest kernel can't handle the situation itself.
2859  * We need to avoid as much as possible querying the running KVM type
2860  * in QEMU level.
2861  */
2862 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2863 {
2864     CPUState *cs = CPU(cpu);
2865 
2866     if (!kvm_enabled()) {
2867         return false;
2868     }
2869 
2870     if (cap_ppc_pvr_compat) {
2871         return false;
2872     }
2873 
2874     return !kvmppc_is_pr(cs->kvm_state);
2875 }
2876