xref: /qemu/target/ppc/kvm.c (revision c64abd1f9c732f58181d0a46a0da72168759e77b)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
53 #include "sysemu/kvm_int.h"
54 
55 //#define DEBUG_KVM
56 
57 #ifdef DEBUG_KVM
58 #define DPRINTF(fmt, ...) \
59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
60 #else
61 #define DPRINTF(fmt, ...) \
62     do { } while (0)
63 #endif
64 
65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
66 
67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
68     KVM_CAP_LAST_INFO
69 };
70 
71 static int cap_interrupt_unset = false;
72 static int cap_interrupt_level = false;
73 static int cap_segstate;
74 static int cap_booke_sregs;
75 static int cap_ppc_smt;
76 static int cap_ppc_rma;
77 static int cap_spapr_tce;
78 static int cap_spapr_tce_64;
79 static int cap_spapr_multitce;
80 static int cap_spapr_vfio;
81 static int cap_hior;
82 static int cap_one_reg;
83 static int cap_epr;
84 static int cap_ppc_watchdog;
85 static int cap_papr;
86 static int cap_htab_fd;
87 static int cap_fixup_hcalls;
88 static int cap_htm;             /* Hardware transactional memory support */
89 
90 static uint32_t debug_inst_opcode;
91 
92 /* XXX We have a race condition where we actually have a level triggered
93  *     interrupt, but the infrastructure can't expose that yet, so the guest
94  *     takes but ignores it, goes to sleep and never gets notified that there's
95  *     still an interrupt pending.
96  *
97  *     As a quick workaround, let's just wake up again 20 ms after we injected
98  *     an interrupt. That way we can assure that we're always reinjecting
99  *     interrupts in case the guest swallowed them.
100  */
101 static QEMUTimer *idle_timer;
102 
103 static void kvm_kick_cpu(void *opaque)
104 {
105     PowerPCCPU *cpu = opaque;
106 
107     qemu_cpu_kick(CPU(cpu));
108 }
109 
110 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
111  * should only be used for fallback tests - generally we should use
112  * explicit capabilities for the features we want, rather than
113  * assuming what is/isn't available depending on the KVM variant. */
114 static bool kvmppc_is_pr(KVMState *ks)
115 {
116     /* Assume KVM-PR if the GET_PVINFO capability is available */
117     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
118 }
119 
120 static int kvm_ppc_register_host_cpu_type(void);
121 
122 int kvm_arch_init(MachineState *ms, KVMState *s)
123 {
124     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
125     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
126     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
127     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
128     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
129     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
130     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
131     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
132     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
133     cap_spapr_vfio = false;
134     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
135     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
136     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
137     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
138     /* Note: we don't set cap_papr here, because this capability is
139      * only activated after this by kvmppc_set_papr() */
140     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
141     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
142     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
143 
144     if (!cap_interrupt_level) {
145         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
146                         "VM to stall at times!\n");
147     }
148 
149     kvm_ppc_register_host_cpu_type();
150 
151     return 0;
152 }
153 
154 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
155 {
156     return 0;
157 }
158 
159 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
160 {
161     CPUPPCState *cenv = &cpu->env;
162     CPUState *cs = CPU(cpu);
163     struct kvm_sregs sregs;
164     int ret;
165 
166     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
167         /* What we're really trying to say is "if we're on BookE, we use
168            the native PVR for now". This is the only sane way to check
169            it though, so we potentially confuse users that they can run
170            BookE guests on BookS. Let's hope nobody dares enough :) */
171         return 0;
172     } else {
173         if (!cap_segstate) {
174             fprintf(stderr, "kvm error: missing PVR setting capability\n");
175             return -ENOSYS;
176         }
177     }
178 
179     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
180     if (ret) {
181         return ret;
182     }
183 
184     sregs.pvr = cenv->spr[SPR_PVR];
185     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
186 }
187 
188 /* Set up a shared TLB array with KVM */
189 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
190 {
191     CPUPPCState *env = &cpu->env;
192     CPUState *cs = CPU(cpu);
193     struct kvm_book3e_206_tlb_params params = {};
194     struct kvm_config_tlb cfg = {};
195     unsigned int entries = 0;
196     int ret, i;
197 
198     if (!kvm_enabled() ||
199         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
200         return 0;
201     }
202 
203     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
204 
205     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
206         params.tlb_sizes[i] = booke206_tlb_size(env, i);
207         params.tlb_ways[i] = booke206_tlb_ways(env, i);
208         entries += params.tlb_sizes[i];
209     }
210 
211     assert(entries == env->nb_tlb);
212     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
213 
214     env->tlb_dirty = true;
215 
216     cfg.array = (uintptr_t)env->tlb.tlbm;
217     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
218     cfg.params = (uintptr_t)&params;
219     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
220 
221     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
222     if (ret < 0) {
223         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
224                 __func__, strerror(-ret));
225         return ret;
226     }
227 
228     env->kvm_sw_tlb = true;
229     return 0;
230 }
231 
232 
233 #if defined(TARGET_PPC64)
234 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
235                                        struct kvm_ppc_smmu_info *info)
236 {
237     CPUPPCState *env = &cpu->env;
238     CPUState *cs = CPU(cpu);
239 
240     memset(info, 0, sizeof(*info));
241 
242     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
243      * need to "guess" what the supported page sizes are.
244      *
245      * For that to work we make a few assumptions:
246      *
247      * - Check whether we are running "PR" KVM which only supports 4K
248      *   and 16M pages, but supports them regardless of the backing
249      *   store characteritics. We also don't support 1T segments.
250      *
251      *   This is safe as if HV KVM ever supports that capability or PR
252      *   KVM grows supports for more page/segment sizes, those versions
253      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
254      *   will not hit this fallback
255      *
256      * - Else we are running HV KVM. This means we only support page
257      *   sizes that fit in the backing store. Additionally we only
258      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
259      *   P7 encodings for the SLB and hash table. Here too, we assume
260      *   support for any newer processor will mean a kernel that
261      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
262      *   this fallback.
263      */
264     if (kvmppc_is_pr(cs->kvm_state)) {
265         /* No flags */
266         info->flags = 0;
267         info->slb_size = 64;
268 
269         /* Standard 4k base page size segment */
270         info->sps[0].page_shift = 12;
271         info->sps[0].slb_enc = 0;
272         info->sps[0].enc[0].page_shift = 12;
273         info->sps[0].enc[0].pte_enc = 0;
274 
275         /* Standard 16M large page size segment */
276         info->sps[1].page_shift = 24;
277         info->sps[1].slb_enc = SLB_VSID_L;
278         info->sps[1].enc[0].page_shift = 24;
279         info->sps[1].enc[0].pte_enc = 0;
280     } else {
281         int i = 0;
282 
283         /* HV KVM has backing store size restrictions */
284         info->flags = KVM_PPC_PAGE_SIZES_REAL;
285 
286         if (env->mmu_model & POWERPC_MMU_1TSEG) {
287             info->flags |= KVM_PPC_1T_SEGMENTS;
288         }
289 
290         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
291            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
292             info->slb_size = 32;
293         } else {
294             info->slb_size = 64;
295         }
296 
297         /* Standard 4k base page size segment */
298         info->sps[i].page_shift = 12;
299         info->sps[i].slb_enc = 0;
300         info->sps[i].enc[0].page_shift = 12;
301         info->sps[i].enc[0].pte_enc = 0;
302         i++;
303 
304         /* 64K on MMU 2.06 and later */
305         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
306             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
307             info->sps[i].page_shift = 16;
308             info->sps[i].slb_enc = 0x110;
309             info->sps[i].enc[0].page_shift = 16;
310             info->sps[i].enc[0].pte_enc = 1;
311             i++;
312         }
313 
314         /* Standard 16M large page size segment */
315         info->sps[i].page_shift = 24;
316         info->sps[i].slb_enc = SLB_VSID_L;
317         info->sps[i].enc[0].page_shift = 24;
318         info->sps[i].enc[0].pte_enc = 0;
319     }
320 }
321 
322 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
323 {
324     CPUState *cs = CPU(cpu);
325     int ret;
326 
327     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
328         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
329         if (ret == 0) {
330             return;
331         }
332     }
333 
334     kvm_get_fallback_smmu_info(cpu, info);
335 }
336 
337 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
338 {
339     KVMState *s = KVM_STATE(current_machine->accelerator);
340     struct ppc_radix_page_info *radix_page_info;
341     struct kvm_ppc_rmmu_info rmmu_info;
342     int i;
343 
344     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
345         return NULL;
346     }
347     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
348         return NULL;
349     }
350     radix_page_info = g_malloc0(sizeof(*radix_page_info));
351     radix_page_info->count = 0;
352     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
353         if (rmmu_info.ap_encodings[i]) {
354             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
355             radix_page_info->count++;
356         }
357     }
358     return radix_page_info;
359 }
360 
361 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
362 {
363     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
364         return true;
365     }
366 
367     return (1ul << shift) <= rampgsize;
368 }
369 
370 static long max_cpu_page_size;
371 
372 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
373 {
374     static struct kvm_ppc_smmu_info smmu_info;
375     static bool has_smmu_info;
376     CPUPPCState *env = &cpu->env;
377     int iq, ik, jq, jk;
378     bool has_64k_pages = false;
379 
380     /* We only handle page sizes for 64-bit server guests for now */
381     if (!(env->mmu_model & POWERPC_MMU_64)) {
382         return;
383     }
384 
385     /* Collect MMU info from kernel if not already */
386     if (!has_smmu_info) {
387         kvm_get_smmu_info(cpu, &smmu_info);
388         has_smmu_info = true;
389     }
390 
391     if (!max_cpu_page_size) {
392         max_cpu_page_size = qemu_getrampagesize();
393     }
394 
395     /* Convert to QEMU form */
396     memset(&env->sps, 0, sizeof(env->sps));
397 
398     /* If we have HV KVM, we need to forbid CI large pages if our
399      * host page size is smaller than 64K.
400      */
401     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
402         env->ci_large_pages = getpagesize() >= 0x10000;
403     }
404 
405     /*
406      * XXX This loop should be an entry wide AND of the capabilities that
407      *     the selected CPU has with the capabilities that KVM supports.
408      */
409     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
410         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
411         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
412 
413         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
414                                  ksps->page_shift)) {
415             continue;
416         }
417         qsps->page_shift = ksps->page_shift;
418         qsps->slb_enc = ksps->slb_enc;
419         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
420             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
421                                      ksps->enc[jk].page_shift)) {
422                 continue;
423             }
424             if (ksps->enc[jk].page_shift == 16) {
425                 has_64k_pages = true;
426             }
427             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
428             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
429             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
430                 break;
431             }
432         }
433         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
434             break;
435         }
436     }
437     env->slb_nr = smmu_info.slb_size;
438     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
439         env->mmu_model &= ~POWERPC_MMU_1TSEG;
440     }
441     if (!has_64k_pages) {
442         env->mmu_model &= ~POWERPC_MMU_64K;
443     }
444 }
445 
446 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
447 {
448     Object *mem_obj = object_resolve_path(obj_path, NULL);
449     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
450     long pagesize;
451 
452     if (mempath) {
453         pagesize = qemu_mempath_getpagesize(mempath);
454     } else {
455         pagesize = getpagesize();
456     }
457 
458     return pagesize >= max_cpu_page_size;
459 }
460 
461 #else /* defined (TARGET_PPC64) */
462 
463 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
464 {
465 }
466 
467 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
468 {
469     return true;
470 }
471 
472 #endif /* !defined (TARGET_PPC64) */
473 
474 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
475 {
476     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
477 }
478 
479 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
480  * book3s supports only 1 watchpoint, so array size
481  * of 4 is sufficient for now.
482  */
483 #define MAX_HW_BKPTS 4
484 
485 static struct HWBreakpoint {
486     target_ulong addr;
487     int type;
488 } hw_debug_points[MAX_HW_BKPTS];
489 
490 static CPUWatchpoint hw_watchpoint;
491 
492 /* Default there is no breakpoint and watchpoint supported */
493 static int max_hw_breakpoint;
494 static int max_hw_watchpoint;
495 static int nb_hw_breakpoint;
496 static int nb_hw_watchpoint;
497 
498 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
499 {
500     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
501         max_hw_breakpoint = 2;
502         max_hw_watchpoint = 2;
503     }
504 
505     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
506         fprintf(stderr, "Error initializing h/w breakpoints\n");
507         return;
508     }
509 }
510 
511 int kvm_arch_init_vcpu(CPUState *cs)
512 {
513     PowerPCCPU *cpu = POWERPC_CPU(cs);
514     CPUPPCState *cenv = &cpu->env;
515     int ret;
516 
517     /* Gather server mmu info from KVM and update the CPU state */
518     kvm_fixup_page_sizes(cpu);
519 
520     /* Synchronize sregs with kvm */
521     ret = kvm_arch_sync_sregs(cpu);
522     if (ret) {
523         if (ret == -EINVAL) {
524             error_report("Register sync failed... If you're using kvm-hv.ko,"
525                          " only \"-cpu host\" is possible");
526         }
527         return ret;
528     }
529 
530     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
531 
532     switch (cenv->mmu_model) {
533     case POWERPC_MMU_BOOKE206:
534         /* This target supports access to KVM's guest TLB */
535         ret = kvm_booke206_tlb_init(cpu);
536         break;
537     case POWERPC_MMU_2_07:
538         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
539             /* KVM-HV has transactional memory on POWER8 also without the
540              * KVM_CAP_PPC_HTM extension, so enable it here instead as
541              * long as it's availble to userspace on the host. */
542             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
543                 cap_htm = true;
544             }
545         }
546         break;
547     default:
548         break;
549     }
550 
551     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
552     kvmppc_hw_debug_points_init(cenv);
553 
554     return ret;
555 }
556 
557 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
558 {
559     CPUPPCState *env = &cpu->env;
560     CPUState *cs = CPU(cpu);
561     struct kvm_dirty_tlb dirty_tlb;
562     unsigned char *bitmap;
563     int ret;
564 
565     if (!env->kvm_sw_tlb) {
566         return;
567     }
568 
569     bitmap = g_malloc((env->nb_tlb + 7) / 8);
570     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
571 
572     dirty_tlb.bitmap = (uintptr_t)bitmap;
573     dirty_tlb.num_dirty = env->nb_tlb;
574 
575     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
576     if (ret) {
577         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
578                 __func__, strerror(-ret));
579     }
580 
581     g_free(bitmap);
582 }
583 
584 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
585 {
586     PowerPCCPU *cpu = POWERPC_CPU(cs);
587     CPUPPCState *env = &cpu->env;
588     union {
589         uint32_t u32;
590         uint64_t u64;
591     } val;
592     struct kvm_one_reg reg = {
593         .id = id,
594         .addr = (uintptr_t) &val,
595     };
596     int ret;
597 
598     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
599     if (ret != 0) {
600         trace_kvm_failed_spr_get(spr, strerror(errno));
601     } else {
602         switch (id & KVM_REG_SIZE_MASK) {
603         case KVM_REG_SIZE_U32:
604             env->spr[spr] = val.u32;
605             break;
606 
607         case KVM_REG_SIZE_U64:
608             env->spr[spr] = val.u64;
609             break;
610 
611         default:
612             /* Don't handle this size yet */
613             abort();
614         }
615     }
616 }
617 
618 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
619 {
620     PowerPCCPU *cpu = POWERPC_CPU(cs);
621     CPUPPCState *env = &cpu->env;
622     union {
623         uint32_t u32;
624         uint64_t u64;
625     } val;
626     struct kvm_one_reg reg = {
627         .id = id,
628         .addr = (uintptr_t) &val,
629     };
630     int ret;
631 
632     switch (id & KVM_REG_SIZE_MASK) {
633     case KVM_REG_SIZE_U32:
634         val.u32 = env->spr[spr];
635         break;
636 
637     case KVM_REG_SIZE_U64:
638         val.u64 = env->spr[spr];
639         break;
640 
641     default:
642         /* Don't handle this size yet */
643         abort();
644     }
645 
646     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
647     if (ret != 0) {
648         trace_kvm_failed_spr_set(spr, strerror(errno));
649     }
650 }
651 
652 static int kvm_put_fp(CPUState *cs)
653 {
654     PowerPCCPU *cpu = POWERPC_CPU(cs);
655     CPUPPCState *env = &cpu->env;
656     struct kvm_one_reg reg;
657     int i;
658     int ret;
659 
660     if (env->insns_flags & PPC_FLOAT) {
661         uint64_t fpscr = env->fpscr;
662         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
663 
664         reg.id = KVM_REG_PPC_FPSCR;
665         reg.addr = (uintptr_t)&fpscr;
666         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667         if (ret < 0) {
668             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
669             return ret;
670         }
671 
672         for (i = 0; i < 32; i++) {
673             uint64_t vsr[2];
674 
675 #ifdef HOST_WORDS_BIGENDIAN
676             vsr[0] = float64_val(env->fpr[i]);
677             vsr[1] = env->vsr[i];
678 #else
679             vsr[0] = env->vsr[i];
680             vsr[1] = float64_val(env->fpr[i]);
681 #endif
682             reg.addr = (uintptr_t) &vsr;
683             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
684 
685             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
686             if (ret < 0) {
687                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
688                         i, strerror(errno));
689                 return ret;
690             }
691         }
692     }
693 
694     if (env->insns_flags & PPC_ALTIVEC) {
695         reg.id = KVM_REG_PPC_VSCR;
696         reg.addr = (uintptr_t)&env->vscr;
697         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
698         if (ret < 0) {
699             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
700             return ret;
701         }
702 
703         for (i = 0; i < 32; i++) {
704             reg.id = KVM_REG_PPC_VR(i);
705             reg.addr = (uintptr_t)&env->avr[i];
706             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
707             if (ret < 0) {
708                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
709                 return ret;
710             }
711         }
712     }
713 
714     return 0;
715 }
716 
717 static int kvm_get_fp(CPUState *cs)
718 {
719     PowerPCCPU *cpu = POWERPC_CPU(cs);
720     CPUPPCState *env = &cpu->env;
721     struct kvm_one_reg reg;
722     int i;
723     int ret;
724 
725     if (env->insns_flags & PPC_FLOAT) {
726         uint64_t fpscr;
727         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
728 
729         reg.id = KVM_REG_PPC_FPSCR;
730         reg.addr = (uintptr_t)&fpscr;
731         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
732         if (ret < 0) {
733             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
734             return ret;
735         } else {
736             env->fpscr = fpscr;
737         }
738 
739         for (i = 0; i < 32; i++) {
740             uint64_t vsr[2];
741 
742             reg.addr = (uintptr_t) &vsr;
743             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
744 
745             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
746             if (ret < 0) {
747                 DPRINTF("Unable to get %s%d from KVM: %s\n",
748                         vsx ? "VSR" : "FPR", i, strerror(errno));
749                 return ret;
750             } else {
751 #ifdef HOST_WORDS_BIGENDIAN
752                 env->fpr[i] = vsr[0];
753                 if (vsx) {
754                     env->vsr[i] = vsr[1];
755                 }
756 #else
757                 env->fpr[i] = vsr[1];
758                 if (vsx) {
759                     env->vsr[i] = vsr[0];
760                 }
761 #endif
762             }
763         }
764     }
765 
766     if (env->insns_flags & PPC_ALTIVEC) {
767         reg.id = KVM_REG_PPC_VSCR;
768         reg.addr = (uintptr_t)&env->vscr;
769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770         if (ret < 0) {
771             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
772             return ret;
773         }
774 
775         for (i = 0; i < 32; i++) {
776             reg.id = KVM_REG_PPC_VR(i);
777             reg.addr = (uintptr_t)&env->avr[i];
778             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
779             if (ret < 0) {
780                 DPRINTF("Unable to get VR%d from KVM: %s\n",
781                         i, strerror(errno));
782                 return ret;
783             }
784         }
785     }
786 
787     return 0;
788 }
789 
790 #if defined(TARGET_PPC64)
791 static int kvm_get_vpa(CPUState *cs)
792 {
793     PowerPCCPU *cpu = POWERPC_CPU(cs);
794     CPUPPCState *env = &cpu->env;
795     struct kvm_one_reg reg;
796     int ret;
797 
798     reg.id = KVM_REG_PPC_VPA_ADDR;
799     reg.addr = (uintptr_t)&env->vpa_addr;
800     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
801     if (ret < 0) {
802         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
803         return ret;
804     }
805 
806     assert((uintptr_t)&env->slb_shadow_size
807            == ((uintptr_t)&env->slb_shadow_addr + 8));
808     reg.id = KVM_REG_PPC_VPA_SLB;
809     reg.addr = (uintptr_t)&env->slb_shadow_addr;
810     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
811     if (ret < 0) {
812         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
813                 strerror(errno));
814         return ret;
815     }
816 
817     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
818     reg.id = KVM_REG_PPC_VPA_DTL;
819     reg.addr = (uintptr_t)&env->dtl_addr;
820     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821     if (ret < 0) {
822         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
823                 strerror(errno));
824         return ret;
825     }
826 
827     return 0;
828 }
829 
830 static int kvm_put_vpa(CPUState *cs)
831 {
832     PowerPCCPU *cpu = POWERPC_CPU(cs);
833     CPUPPCState *env = &cpu->env;
834     struct kvm_one_reg reg;
835     int ret;
836 
837     /* SLB shadow or DTL can't be registered unless a master VPA is
838      * registered.  That means when restoring state, if a VPA *is*
839      * registered, we need to set that up first.  If not, we need to
840      * deregister the others before deregistering the master VPA */
841     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
842 
843     if (env->vpa_addr) {
844         reg.id = KVM_REG_PPC_VPA_ADDR;
845         reg.addr = (uintptr_t)&env->vpa_addr;
846         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
847         if (ret < 0) {
848             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
849             return ret;
850         }
851     }
852 
853     assert((uintptr_t)&env->slb_shadow_size
854            == ((uintptr_t)&env->slb_shadow_addr + 8));
855     reg.id = KVM_REG_PPC_VPA_SLB;
856     reg.addr = (uintptr_t)&env->slb_shadow_addr;
857     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
858     if (ret < 0) {
859         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
860         return ret;
861     }
862 
863     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
864     reg.id = KVM_REG_PPC_VPA_DTL;
865     reg.addr = (uintptr_t)&env->dtl_addr;
866     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
867     if (ret < 0) {
868         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
869                 strerror(errno));
870         return ret;
871     }
872 
873     if (!env->vpa_addr) {
874         reg.id = KVM_REG_PPC_VPA_ADDR;
875         reg.addr = (uintptr_t)&env->vpa_addr;
876         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
877         if (ret < 0) {
878             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
879             return ret;
880         }
881     }
882 
883     return 0;
884 }
885 #endif /* TARGET_PPC64 */
886 
887 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
888 {
889     CPUPPCState *env = &cpu->env;
890     struct kvm_sregs sregs;
891     int i;
892 
893     sregs.pvr = env->spr[SPR_PVR];
894 
895     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
896 
897     /* Sync SLB */
898 #ifdef TARGET_PPC64
899     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
900         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
901         if (env->slb[i].esid & SLB_ESID_V) {
902             sregs.u.s.ppc64.slb[i].slbe |= i;
903         }
904         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
905     }
906 #endif
907 
908     /* Sync SRs */
909     for (i = 0; i < 16; i++) {
910         sregs.u.s.ppc32.sr[i] = env->sr[i];
911     }
912 
913     /* Sync BATs */
914     for (i = 0; i < 8; i++) {
915         /* Beware. We have to swap upper and lower bits here */
916         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
917             | env->DBAT[1][i];
918         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
919             | env->IBAT[1][i];
920     }
921 
922     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
923 }
924 
925 int kvm_arch_put_registers(CPUState *cs, int level)
926 {
927     PowerPCCPU *cpu = POWERPC_CPU(cs);
928     CPUPPCState *env = &cpu->env;
929     struct kvm_regs regs;
930     int ret;
931     int i;
932 
933     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
934     if (ret < 0) {
935         return ret;
936     }
937 
938     regs.ctr = env->ctr;
939     regs.lr  = env->lr;
940     regs.xer = cpu_read_xer(env);
941     regs.msr = env->msr;
942     regs.pc = env->nip;
943 
944     regs.srr0 = env->spr[SPR_SRR0];
945     regs.srr1 = env->spr[SPR_SRR1];
946 
947     regs.sprg0 = env->spr[SPR_SPRG0];
948     regs.sprg1 = env->spr[SPR_SPRG1];
949     regs.sprg2 = env->spr[SPR_SPRG2];
950     regs.sprg3 = env->spr[SPR_SPRG3];
951     regs.sprg4 = env->spr[SPR_SPRG4];
952     regs.sprg5 = env->spr[SPR_SPRG5];
953     regs.sprg6 = env->spr[SPR_SPRG6];
954     regs.sprg7 = env->spr[SPR_SPRG7];
955 
956     regs.pid = env->spr[SPR_BOOKE_PID];
957 
958     for (i = 0;i < 32; i++)
959         regs.gpr[i] = env->gpr[i];
960 
961     regs.cr = 0;
962     for (i = 0; i < 8; i++) {
963         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
964     }
965 
966     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
967     if (ret < 0)
968         return ret;
969 
970     kvm_put_fp(cs);
971 
972     if (env->tlb_dirty) {
973         kvm_sw_tlb_put(cpu);
974         env->tlb_dirty = false;
975     }
976 
977     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
978         ret = kvmppc_put_books_sregs(cpu);
979         if (ret < 0) {
980             return ret;
981         }
982     }
983 
984     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
985         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
986     }
987 
988     if (cap_one_reg) {
989         int i;
990 
991         /* We deliberately ignore errors here, for kernels which have
992          * the ONE_REG calls, but don't support the specific
993          * registers, there's a reasonable chance things will still
994          * work, at least until we try to migrate. */
995         for (i = 0; i < 1024; i++) {
996             uint64_t id = env->spr_cb[i].one_reg_id;
997 
998             if (id != 0) {
999                 kvm_put_one_spr(cs, id, i);
1000             }
1001         }
1002 
1003 #ifdef TARGET_PPC64
1004         if (msr_ts) {
1005             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1006                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1007             }
1008             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1009                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1010             }
1011             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1012             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1013             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1014             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1015             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1016             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1017             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1018             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1019             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1020             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1021         }
1022 
1023         if (cap_papr) {
1024             if (kvm_put_vpa(cs) < 0) {
1025                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1026             }
1027         }
1028 
1029         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1030 #endif /* TARGET_PPC64 */
1031     }
1032 
1033     return ret;
1034 }
1035 
1036 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1037 {
1038      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1039 }
1040 
1041 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1042 {
1043     CPUPPCState *env = &cpu->env;
1044     struct kvm_sregs sregs;
1045     int ret;
1046 
1047     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1048     if (ret < 0) {
1049         return ret;
1050     }
1051 
1052     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1053         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1054         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1055         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1056         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1057         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1058         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1059         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1060         env->spr[SPR_DECR] = sregs.u.e.dec;
1061         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1062         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1063         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1064     }
1065 
1066     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1067         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1068         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1069         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1070         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1071         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1072     }
1073 
1074     if (sregs.u.e.features & KVM_SREGS_E_64) {
1075         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1076     }
1077 
1078     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1079         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1080     }
1081 
1082     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1083         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1084         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1085         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1086         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1087         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1088         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1089         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1090         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1091         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1092         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1093         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1094         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1095         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1096         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1097         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1098         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1099         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1100         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1101         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1102         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1103         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1104         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1105         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1106         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1107         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1108         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1109         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1110         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1111         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1112         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1113         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1114         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1115 
1116         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1117             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1118             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1119             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1120             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1121             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1122             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1123         }
1124 
1125         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1126             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1127             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1128         }
1129 
1130         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1131             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1132             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1133             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1134             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1135         }
1136     }
1137 
1138     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1139         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1140         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1141         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1142         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1143         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1144         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1145         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1146         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1147         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1148         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1149     }
1150 
1151     if (sregs.u.e.features & KVM_SREGS_EXP) {
1152         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1153     }
1154 
1155     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1156         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1157         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1158     }
1159 
1160     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1161         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1162         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1163         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1164 
1165         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1166             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1167             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1168         }
1169     }
1170 
1171     return 0;
1172 }
1173 
1174 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1175 {
1176     CPUPPCState *env = &cpu->env;
1177     struct kvm_sregs sregs;
1178     int ret;
1179     int i;
1180 
1181     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1182     if (ret < 0) {
1183         return ret;
1184     }
1185 
1186     if (!cpu->vhyp) {
1187         ppc_store_sdr1(env, sregs.u.s.sdr1);
1188     }
1189 
1190     /* Sync SLB */
1191 #ifdef TARGET_PPC64
1192     /*
1193      * The packed SLB array we get from KVM_GET_SREGS only contains
1194      * information about valid entries. So we flush our internal copy
1195      * to get rid of stale ones, then put all valid SLB entries back
1196      * in.
1197      */
1198     memset(env->slb, 0, sizeof(env->slb));
1199     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1200         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1201         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1202         /*
1203          * Only restore valid entries
1204          */
1205         if (rb & SLB_ESID_V) {
1206             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1207         }
1208     }
1209 #endif
1210 
1211     /* Sync SRs */
1212     for (i = 0; i < 16; i++) {
1213         env->sr[i] = sregs.u.s.ppc32.sr[i];
1214     }
1215 
1216     /* Sync BATs */
1217     for (i = 0; i < 8; i++) {
1218         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1219         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1220         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1221         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1222     }
1223 
1224     return 0;
1225 }
1226 
1227 int kvm_arch_get_registers(CPUState *cs)
1228 {
1229     PowerPCCPU *cpu = POWERPC_CPU(cs);
1230     CPUPPCState *env = &cpu->env;
1231     struct kvm_regs regs;
1232     uint32_t cr;
1233     int i, ret;
1234 
1235     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1236     if (ret < 0)
1237         return ret;
1238 
1239     cr = regs.cr;
1240     for (i = 7; i >= 0; i--) {
1241         env->crf[i] = cr & 15;
1242         cr >>= 4;
1243     }
1244 
1245     env->ctr = regs.ctr;
1246     env->lr = regs.lr;
1247     cpu_write_xer(env, regs.xer);
1248     env->msr = regs.msr;
1249     env->nip = regs.pc;
1250 
1251     env->spr[SPR_SRR0] = regs.srr0;
1252     env->spr[SPR_SRR1] = regs.srr1;
1253 
1254     env->spr[SPR_SPRG0] = regs.sprg0;
1255     env->spr[SPR_SPRG1] = regs.sprg1;
1256     env->spr[SPR_SPRG2] = regs.sprg2;
1257     env->spr[SPR_SPRG3] = regs.sprg3;
1258     env->spr[SPR_SPRG4] = regs.sprg4;
1259     env->spr[SPR_SPRG5] = regs.sprg5;
1260     env->spr[SPR_SPRG6] = regs.sprg6;
1261     env->spr[SPR_SPRG7] = regs.sprg7;
1262 
1263     env->spr[SPR_BOOKE_PID] = regs.pid;
1264 
1265     for (i = 0;i < 32; i++)
1266         env->gpr[i] = regs.gpr[i];
1267 
1268     kvm_get_fp(cs);
1269 
1270     if (cap_booke_sregs) {
1271         ret = kvmppc_get_booke_sregs(cpu);
1272         if (ret < 0) {
1273             return ret;
1274         }
1275     }
1276 
1277     if (cap_segstate) {
1278         ret = kvmppc_get_books_sregs(cpu);
1279         if (ret < 0) {
1280             return ret;
1281         }
1282     }
1283 
1284     if (cap_hior) {
1285         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1286     }
1287 
1288     if (cap_one_reg) {
1289         int i;
1290 
1291         /* We deliberately ignore errors here, for kernels which have
1292          * the ONE_REG calls, but don't support the specific
1293          * registers, there's a reasonable chance things will still
1294          * work, at least until we try to migrate. */
1295         for (i = 0; i < 1024; i++) {
1296             uint64_t id = env->spr_cb[i].one_reg_id;
1297 
1298             if (id != 0) {
1299                 kvm_get_one_spr(cs, id, i);
1300             }
1301         }
1302 
1303 #ifdef TARGET_PPC64
1304         if (msr_ts) {
1305             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1306                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1307             }
1308             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1309                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1310             }
1311             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1312             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1313             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1314             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1315             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1316             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1317             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1318             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1319             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1320             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1321         }
1322 
1323         if (cap_papr) {
1324             if (kvm_get_vpa(cs) < 0) {
1325                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1326             }
1327         }
1328 
1329         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1330 #endif
1331     }
1332 
1333     return 0;
1334 }
1335 
1336 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1337 {
1338     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1339 
1340     if (irq != PPC_INTERRUPT_EXT) {
1341         return 0;
1342     }
1343 
1344     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1345         return 0;
1346     }
1347 
1348     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1349 
1350     return 0;
1351 }
1352 
1353 #if defined(TARGET_PPCEMB)
1354 #define PPC_INPUT_INT PPC40x_INPUT_INT
1355 #elif defined(TARGET_PPC64)
1356 #define PPC_INPUT_INT PPC970_INPUT_INT
1357 #else
1358 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1359 #endif
1360 
1361 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1362 {
1363     PowerPCCPU *cpu = POWERPC_CPU(cs);
1364     CPUPPCState *env = &cpu->env;
1365     int r;
1366     unsigned irq;
1367 
1368     qemu_mutex_lock_iothread();
1369 
1370     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1371      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1372     if (!cap_interrupt_level &&
1373         run->ready_for_interrupt_injection &&
1374         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1375         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1376     {
1377         /* For now KVM disregards the 'irq' argument. However, in the
1378          * future KVM could cache it in-kernel to avoid a heavyweight exit
1379          * when reading the UIC.
1380          */
1381         irq = KVM_INTERRUPT_SET;
1382 
1383         DPRINTF("injected interrupt %d\n", irq);
1384         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1385         if (r < 0) {
1386             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1387         }
1388 
1389         /* Always wake up soon in case the interrupt was level based */
1390         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1391                        (NANOSECONDS_PER_SECOND / 50));
1392     }
1393 
1394     /* We don't know if there are more interrupts pending after this. However,
1395      * the guest will return to userspace in the course of handling this one
1396      * anyways, so we will get a chance to deliver the rest. */
1397 
1398     qemu_mutex_unlock_iothread();
1399 }
1400 
1401 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1402 {
1403     return MEMTXATTRS_UNSPECIFIED;
1404 }
1405 
1406 int kvm_arch_process_async_events(CPUState *cs)
1407 {
1408     return cs->halted;
1409 }
1410 
1411 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1412 {
1413     CPUState *cs = CPU(cpu);
1414     CPUPPCState *env = &cpu->env;
1415 
1416     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1417         cs->halted = 1;
1418         cs->exception_index = EXCP_HLT;
1419     }
1420 
1421     return 0;
1422 }
1423 
1424 /* map dcr access to existing qemu dcr emulation */
1425 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1426 {
1427     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1428         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1429 
1430     return 0;
1431 }
1432 
1433 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1434 {
1435     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1436         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1437 
1438     return 0;
1439 }
1440 
1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1442 {
1443     /* Mixed endian case is not handled */
1444     uint32_t sc = debug_inst_opcode;
1445 
1446     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447                             sizeof(sc), 0) ||
1448         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449         return -EINVAL;
1450     }
1451 
1452     return 0;
1453 }
1454 
1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1456 {
1457     uint32_t sc;
1458 
1459     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460         sc != debug_inst_opcode ||
1461         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462                             sizeof(sc), 1)) {
1463         return -EINVAL;
1464     }
1465 
1466     return 0;
1467 }
1468 
1469 static int find_hw_breakpoint(target_ulong addr, int type)
1470 {
1471     int n;
1472 
1473     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474            <= ARRAY_SIZE(hw_debug_points));
1475 
1476     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477         if (hw_debug_points[n].addr == addr &&
1478              hw_debug_points[n].type == type) {
1479             return n;
1480         }
1481     }
1482 
1483     return -1;
1484 }
1485 
1486 static int find_hw_watchpoint(target_ulong addr, int *flag)
1487 {
1488     int n;
1489 
1490     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491     if (n >= 0) {
1492         *flag = BP_MEM_ACCESS;
1493         return n;
1494     }
1495 
1496     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497     if (n >= 0) {
1498         *flag = BP_MEM_WRITE;
1499         return n;
1500     }
1501 
1502     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503     if (n >= 0) {
1504         *flag = BP_MEM_READ;
1505         return n;
1506     }
1507 
1508     return -1;
1509 }
1510 
1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512                                   target_ulong len, int type)
1513 {
1514     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515         return -ENOBUFS;
1516     }
1517 
1518     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1520 
1521     switch (type) {
1522     case GDB_BREAKPOINT_HW:
1523         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524             return -ENOBUFS;
1525         }
1526 
1527         if (find_hw_breakpoint(addr, type) >= 0) {
1528             return -EEXIST;
1529         }
1530 
1531         nb_hw_breakpoint++;
1532         break;
1533 
1534     case GDB_WATCHPOINT_WRITE:
1535     case GDB_WATCHPOINT_READ:
1536     case GDB_WATCHPOINT_ACCESS:
1537         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538             return -ENOBUFS;
1539         }
1540 
1541         if (find_hw_breakpoint(addr, type) >= 0) {
1542             return -EEXIST;
1543         }
1544 
1545         nb_hw_watchpoint++;
1546         break;
1547 
1548     default:
1549         return -ENOSYS;
1550     }
1551 
1552     return 0;
1553 }
1554 
1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     int n;
1559 
1560     n = find_hw_breakpoint(addr, type);
1561     if (n < 0) {
1562         return -ENOENT;
1563     }
1564 
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         nb_hw_breakpoint--;
1568         break;
1569 
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         nb_hw_watchpoint--;
1574         break;
1575 
1576     default:
1577         return -ENOSYS;
1578     }
1579     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1580 
1581     return 0;
1582 }
1583 
1584 void kvm_arch_remove_all_hw_breakpoints(void)
1585 {
1586     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1587 }
1588 
1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1590 {
1591     int n;
1592 
1593     /* Software Breakpoint updates */
1594     if (kvm_sw_breakpoints_active(cs)) {
1595         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1596     }
1597 
1598     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599            <= ARRAY_SIZE(hw_debug_points));
1600     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1601 
1602     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606             switch (hw_debug_points[n].type) {
1607             case GDB_BREAKPOINT_HW:
1608                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609                 break;
1610             case GDB_WATCHPOINT_WRITE:
1611                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612                 break;
1613             case GDB_WATCHPOINT_READ:
1614                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615                 break;
1616             case GDB_WATCHPOINT_ACCESS:
1617                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618                                         KVMPPC_DEBUG_WATCH_READ;
1619                 break;
1620             default:
1621                 cpu_abort(cs, "Unsupported breakpoint type\n");
1622             }
1623             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1624         }
1625     }
1626 }
1627 
1628 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1629 {
1630     CPUState *cs = CPU(cpu);
1631     CPUPPCState *env = &cpu->env;
1632     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1633     int handle = 0;
1634     int n;
1635     int flag = 0;
1636 
1637     if (cs->singlestep_enabled) {
1638         handle = 1;
1639     } else if (arch_info->status) {
1640         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1642                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1643                 if (n >= 0) {
1644                     handle = 1;
1645                 }
1646             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1647                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1648                 n = find_hw_watchpoint(arch_info->address,  &flag);
1649                 if (n >= 0) {
1650                     handle = 1;
1651                     cs->watchpoint_hit = &hw_watchpoint;
1652                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1653                     hw_watchpoint.flags = flag;
1654                 }
1655             }
1656         }
1657     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1658         handle = 1;
1659     } else {
1660         /* QEMU is not able to handle debug exception, so inject
1661          * program exception to guest;
1662          * Yes program exception NOT debug exception !!
1663          * When QEMU is using debug resources then debug exception must
1664          * be always set. To achieve this we set MSR_DE and also set
1665          * MSRP_DEP so guest cannot change MSR_DE.
1666          * When emulating debug resource for guest we want guest
1667          * to control MSR_DE (enable/disable debug interrupt on need).
1668          * Supporting both configurations are NOT possible.
1669          * So the result is that we cannot share debug resources
1670          * between QEMU and Guest on BOOKE architecture.
1671          * In the current design QEMU gets the priority over guest,
1672          * this means that if QEMU is using debug resources then guest
1673          * cannot use them;
1674          * For software breakpoint QEMU uses a privileged instruction;
1675          * So there cannot be any reason that we are here for guest
1676          * set debug exception, only possibility is guest executed a
1677          * privileged / illegal instruction and that's why we are
1678          * injecting a program interrupt.
1679          */
1680 
1681         cpu_synchronize_state(cs);
1682         /* env->nip is PC, so increment this by 4 to use
1683          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1684          */
1685         env->nip += 4;
1686         cs->exception_index = POWERPC_EXCP_PROGRAM;
1687         env->error_code = POWERPC_EXCP_INVAL;
1688         ppc_cpu_do_interrupt(cs);
1689     }
1690 
1691     return handle;
1692 }
1693 
1694 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1695 {
1696     PowerPCCPU *cpu = POWERPC_CPU(cs);
1697     CPUPPCState *env = &cpu->env;
1698     int ret;
1699 
1700     qemu_mutex_lock_iothread();
1701 
1702     switch (run->exit_reason) {
1703     case KVM_EXIT_DCR:
1704         if (run->dcr.is_write) {
1705             DPRINTF("handle dcr write\n");
1706             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1707         } else {
1708             DPRINTF("handle dcr read\n");
1709             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1710         }
1711         break;
1712     case KVM_EXIT_HLT:
1713         DPRINTF("handle halt\n");
1714         ret = kvmppc_handle_halt(cpu);
1715         break;
1716 #if defined(TARGET_PPC64)
1717     case KVM_EXIT_PAPR_HCALL:
1718         DPRINTF("handle PAPR hypercall\n");
1719         run->papr_hcall.ret = spapr_hypercall(cpu,
1720                                               run->papr_hcall.nr,
1721                                               run->papr_hcall.args);
1722         ret = 0;
1723         break;
1724 #endif
1725     case KVM_EXIT_EPR:
1726         DPRINTF("handle epr\n");
1727         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1728         ret = 0;
1729         break;
1730     case KVM_EXIT_WATCHDOG:
1731         DPRINTF("handle watchdog expiry\n");
1732         watchdog_perform_action();
1733         ret = 0;
1734         break;
1735 
1736     case KVM_EXIT_DEBUG:
1737         DPRINTF("handle debug exception\n");
1738         if (kvm_handle_debug(cpu, run)) {
1739             ret = EXCP_DEBUG;
1740             break;
1741         }
1742         /* re-enter, this exception was guest-internal */
1743         ret = 0;
1744         break;
1745 
1746     default:
1747         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1748         ret = -1;
1749         break;
1750     }
1751 
1752     qemu_mutex_unlock_iothread();
1753     return ret;
1754 }
1755 
1756 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1757 {
1758     CPUState *cs = CPU(cpu);
1759     uint32_t bits = tsr_bits;
1760     struct kvm_one_reg reg = {
1761         .id = KVM_REG_PPC_OR_TSR,
1762         .addr = (uintptr_t) &bits,
1763     };
1764 
1765     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1766 }
1767 
1768 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1769 {
1770 
1771     CPUState *cs = CPU(cpu);
1772     uint32_t bits = tsr_bits;
1773     struct kvm_one_reg reg = {
1774         .id = KVM_REG_PPC_CLEAR_TSR,
1775         .addr = (uintptr_t) &bits,
1776     };
1777 
1778     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1779 }
1780 
1781 int kvmppc_set_tcr(PowerPCCPU *cpu)
1782 {
1783     CPUState *cs = CPU(cpu);
1784     CPUPPCState *env = &cpu->env;
1785     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1786 
1787     struct kvm_one_reg reg = {
1788         .id = KVM_REG_PPC_TCR,
1789         .addr = (uintptr_t) &tcr,
1790     };
1791 
1792     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1793 }
1794 
1795 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1796 {
1797     CPUState *cs = CPU(cpu);
1798     int ret;
1799 
1800     if (!kvm_enabled()) {
1801         return -1;
1802     }
1803 
1804     if (!cap_ppc_watchdog) {
1805         printf("warning: KVM does not support watchdog");
1806         return -1;
1807     }
1808 
1809     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1810     if (ret < 0) {
1811         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1812                 __func__, strerror(-ret));
1813         return ret;
1814     }
1815 
1816     return ret;
1817 }
1818 
1819 static int read_cpuinfo(const char *field, char *value, int len)
1820 {
1821     FILE *f;
1822     int ret = -1;
1823     int field_len = strlen(field);
1824     char line[512];
1825 
1826     f = fopen("/proc/cpuinfo", "r");
1827     if (!f) {
1828         return -1;
1829     }
1830 
1831     do {
1832         if (!fgets(line, sizeof(line), f)) {
1833             break;
1834         }
1835         if (!strncmp(line, field, field_len)) {
1836             pstrcpy(value, len, line);
1837             ret = 0;
1838             break;
1839         }
1840     } while(*line);
1841 
1842     fclose(f);
1843 
1844     return ret;
1845 }
1846 
1847 uint32_t kvmppc_get_tbfreq(void)
1848 {
1849     char line[512];
1850     char *ns;
1851     uint32_t retval = NANOSECONDS_PER_SECOND;
1852 
1853     if (read_cpuinfo("timebase", line, sizeof(line))) {
1854         return retval;
1855     }
1856 
1857     if (!(ns = strchr(line, ':'))) {
1858         return retval;
1859     }
1860 
1861     ns++;
1862 
1863     return atoi(ns);
1864 }
1865 
1866 bool kvmppc_get_host_serial(char **value)
1867 {
1868     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1869                                NULL);
1870 }
1871 
1872 bool kvmppc_get_host_model(char **value)
1873 {
1874     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1875 }
1876 
1877 /* Try to find a device tree node for a CPU with clock-frequency property */
1878 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1879 {
1880     struct dirent *dirp;
1881     DIR *dp;
1882 
1883     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1884         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1885         return -1;
1886     }
1887 
1888     buf[0] = '\0';
1889     while ((dirp = readdir(dp)) != NULL) {
1890         FILE *f;
1891         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1892                  dirp->d_name);
1893         f = fopen(buf, "r");
1894         if (f) {
1895             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1896             fclose(f);
1897             break;
1898         }
1899         buf[0] = '\0';
1900     }
1901     closedir(dp);
1902     if (buf[0] == '\0') {
1903         printf("Unknown host!\n");
1904         return -1;
1905     }
1906 
1907     return 0;
1908 }
1909 
1910 static uint64_t kvmppc_read_int_dt(const char *filename)
1911 {
1912     union {
1913         uint32_t v32;
1914         uint64_t v64;
1915     } u;
1916     FILE *f;
1917     int len;
1918 
1919     f = fopen(filename, "rb");
1920     if (!f) {
1921         return -1;
1922     }
1923 
1924     len = fread(&u, 1, sizeof(u), f);
1925     fclose(f);
1926     switch (len) {
1927     case 4:
1928         /* property is a 32-bit quantity */
1929         return be32_to_cpu(u.v32);
1930     case 8:
1931         return be64_to_cpu(u.v64);
1932     }
1933 
1934     return 0;
1935 }
1936 
1937 /* Read a CPU node property from the host device tree that's a single
1938  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1939  * (can't find or open the property, or doesn't understand the
1940  * format) */
1941 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1942 {
1943     char buf[PATH_MAX], *tmp;
1944     uint64_t val;
1945 
1946     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1947         return -1;
1948     }
1949 
1950     tmp = g_strdup_printf("%s/%s", buf, propname);
1951     val = kvmppc_read_int_dt(tmp);
1952     g_free(tmp);
1953 
1954     return val;
1955 }
1956 
1957 uint64_t kvmppc_get_clockfreq(void)
1958 {
1959     return kvmppc_read_int_cpu_dt("clock-frequency");
1960 }
1961 
1962 uint32_t kvmppc_get_vmx(void)
1963 {
1964     return kvmppc_read_int_cpu_dt("ibm,vmx");
1965 }
1966 
1967 uint32_t kvmppc_get_dfp(void)
1968 {
1969     return kvmppc_read_int_cpu_dt("ibm,dfp");
1970 }
1971 
1972 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1973  {
1974      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1975      CPUState *cs = CPU(cpu);
1976 
1977     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1978         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1979         return 0;
1980     }
1981 
1982     return 1;
1983 }
1984 
1985 int kvmppc_get_hasidle(CPUPPCState *env)
1986 {
1987     struct kvm_ppc_pvinfo pvinfo;
1988 
1989     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1990         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1991         return 1;
1992     }
1993 
1994     return 0;
1995 }
1996 
1997 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1998 {
1999     uint32_t *hc = (uint32_t*)buf;
2000     struct kvm_ppc_pvinfo pvinfo;
2001 
2002     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2003         memcpy(buf, pvinfo.hcall, buf_len);
2004         return 0;
2005     }
2006 
2007     /*
2008      * Fallback to always fail hypercalls regardless of endianness:
2009      *
2010      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2011      *     li r3, -1
2012      *     b .+8       (becomes nop in wrong endian)
2013      *     bswap32(li r3, -1)
2014      */
2015 
2016     hc[0] = cpu_to_be32(0x08000048);
2017     hc[1] = cpu_to_be32(0x3860ffff);
2018     hc[2] = cpu_to_be32(0x48000008);
2019     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2020 
2021     return 1;
2022 }
2023 
2024 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2025 {
2026     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2027 }
2028 
2029 void kvmppc_enable_logical_ci_hcalls(void)
2030 {
2031     /*
2032      * FIXME: it would be nice if we could detect the cases where
2033      * we're using a device which requires the in kernel
2034      * implementation of these hcalls, but the kernel lacks them and
2035      * produce a warning.
2036      */
2037     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2038     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2039 }
2040 
2041 void kvmppc_enable_set_mode_hcall(void)
2042 {
2043     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2044 }
2045 
2046 void kvmppc_enable_clear_ref_mod_hcalls(void)
2047 {
2048     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2049     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2050 }
2051 
2052 void kvmppc_set_papr(PowerPCCPU *cpu)
2053 {
2054     CPUState *cs = CPU(cpu);
2055     int ret;
2056 
2057     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2058     if (ret) {
2059         error_report("This vCPU type or KVM version does not support PAPR");
2060         exit(1);
2061     }
2062 
2063     /* Update the capability flag so we sync the right information
2064      * with kvm */
2065     cap_papr = 1;
2066 }
2067 
2068 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2069 {
2070     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2071 }
2072 
2073 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2074 {
2075     CPUState *cs = CPU(cpu);
2076     int ret;
2077 
2078     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2079     if (ret && mpic_proxy) {
2080         error_report("This KVM version does not support EPR");
2081         exit(1);
2082     }
2083 }
2084 
2085 int kvmppc_smt_threads(void)
2086 {
2087     return cap_ppc_smt ? cap_ppc_smt : 1;
2088 }
2089 
2090 #ifdef TARGET_PPC64
2091 off_t kvmppc_alloc_rma(void **rma)
2092 {
2093     off_t size;
2094     int fd;
2095     struct kvm_allocate_rma ret;
2096 
2097     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2098      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2099      *                      not necessary on this hardware
2100      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2101      *
2102      * FIXME: We should allow the user to force contiguous RMA
2103      * allocation in the cap_ppc_rma==1 case.
2104      */
2105     if (cap_ppc_rma < 2) {
2106         return 0;
2107     }
2108 
2109     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2110     if (fd < 0) {
2111         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2112                 strerror(errno));
2113         return -1;
2114     }
2115 
2116     size = MIN(ret.rma_size, 256ul << 20);
2117 
2118     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2119     if (*rma == MAP_FAILED) {
2120         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2121         return -1;
2122     };
2123 
2124     return size;
2125 }
2126 
2127 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2128 {
2129     struct kvm_ppc_smmu_info info;
2130     long rampagesize, best_page_shift;
2131     int i;
2132 
2133     if (cap_ppc_rma >= 2) {
2134         return current_size;
2135     }
2136 
2137     /* Find the largest hardware supported page size that's less than
2138      * or equal to the (logical) backing page size of guest RAM */
2139     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2140     rampagesize = qemu_getrampagesize();
2141     best_page_shift = 0;
2142 
2143     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2144         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2145 
2146         if (!sps->page_shift) {
2147             continue;
2148         }
2149 
2150         if ((sps->page_shift > best_page_shift)
2151             && ((1UL << sps->page_shift) <= rampagesize)) {
2152             best_page_shift = sps->page_shift;
2153         }
2154     }
2155 
2156     return MIN(current_size,
2157                1ULL << (best_page_shift + hash_shift - 7));
2158 }
2159 #endif
2160 
2161 bool kvmppc_spapr_use_multitce(void)
2162 {
2163     return cap_spapr_multitce;
2164 }
2165 
2166 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2167                               uint64_t bus_offset, uint32_t nb_table,
2168                               int *pfd, bool need_vfio)
2169 {
2170     long len;
2171     int fd;
2172     void *table;
2173 
2174     /* Must set fd to -1 so we don't try to munmap when called for
2175      * destroying the table, which the upper layers -will- do
2176      */
2177     *pfd = -1;
2178     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2179         return NULL;
2180     }
2181 
2182     if (cap_spapr_tce_64) {
2183         struct kvm_create_spapr_tce_64 args = {
2184             .liobn = liobn,
2185             .page_shift = page_shift,
2186             .offset = bus_offset >> page_shift,
2187             .size = nb_table,
2188             .flags = 0
2189         };
2190         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2191         if (fd < 0) {
2192             fprintf(stderr,
2193                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2194                     liobn);
2195             return NULL;
2196         }
2197     } else if (cap_spapr_tce) {
2198         uint64_t window_size = (uint64_t) nb_table << page_shift;
2199         struct kvm_create_spapr_tce args = {
2200             .liobn = liobn,
2201             .window_size = window_size,
2202         };
2203         if ((window_size != args.window_size) || bus_offset) {
2204             return NULL;
2205         }
2206         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2207         if (fd < 0) {
2208             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2209                     liobn);
2210             return NULL;
2211         }
2212     } else {
2213         return NULL;
2214     }
2215 
2216     len = nb_table * sizeof(uint64_t);
2217     /* FIXME: round this up to page size */
2218 
2219     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2220     if (table == MAP_FAILED) {
2221         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2222                 liobn);
2223         close(fd);
2224         return NULL;
2225     }
2226 
2227     *pfd = fd;
2228     return table;
2229 }
2230 
2231 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2232 {
2233     long len;
2234 
2235     if (fd < 0) {
2236         return -1;
2237     }
2238 
2239     len = nb_table * sizeof(uint64_t);
2240     if ((munmap(table, len) < 0) ||
2241         (close(fd) < 0)) {
2242         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2243                 strerror(errno));
2244         /* Leak the table */
2245     }
2246 
2247     return 0;
2248 }
2249 
2250 int kvmppc_reset_htab(int shift_hint)
2251 {
2252     uint32_t shift = shift_hint;
2253 
2254     if (!kvm_enabled()) {
2255         /* Full emulation, tell caller to allocate htab itself */
2256         return 0;
2257     }
2258     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2259         int ret;
2260         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2261         if (ret == -ENOTTY) {
2262             /* At least some versions of PR KVM advertise the
2263              * capability, but don't implement the ioctl().  Oops.
2264              * Return 0 so that we allocate the htab in qemu, as is
2265              * correct for PR. */
2266             return 0;
2267         } else if (ret < 0) {
2268             return ret;
2269         }
2270         return shift;
2271     }
2272 
2273     /* We have a kernel that predates the htab reset calls.  For PR
2274      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2275      * this era, it has allocated a 16MB fixed size hash table already. */
2276     if (kvmppc_is_pr(kvm_state)) {
2277         /* PR - tell caller to allocate htab */
2278         return 0;
2279     } else {
2280         /* HV - assume 16MB kernel allocated htab */
2281         return 24;
2282     }
2283 }
2284 
2285 static inline uint32_t mfpvr(void)
2286 {
2287     uint32_t pvr;
2288 
2289     asm ("mfpvr %0"
2290          : "=r"(pvr));
2291     return pvr;
2292 }
2293 
2294 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2295 {
2296     if (on) {
2297         *word |= flags;
2298     } else {
2299         *word &= ~flags;
2300     }
2301 }
2302 
2303 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2304 {
2305     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2306     uint32_t vmx = kvmppc_get_vmx();
2307     uint32_t dfp = kvmppc_get_dfp();
2308     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2309     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2310 
2311     /* Now fix up the class with information we can query from the host */
2312     pcc->pvr = mfpvr();
2313 
2314     if (vmx != -1) {
2315         /* Only override when we know what the host supports */
2316         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2317         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2318     }
2319     if (dfp != -1) {
2320         /* Only override when we know what the host supports */
2321         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2322     }
2323 
2324     if (dcache_size != -1) {
2325         pcc->l1_dcache_size = dcache_size;
2326     }
2327 
2328     if (icache_size != -1) {
2329         pcc->l1_icache_size = icache_size;
2330     }
2331 
2332 #if defined(TARGET_PPC64)
2333     pcc->radix_page_info = kvm_get_radix_page_info();
2334 #endif /* defined(TARGET_PPC64) */
2335 }
2336 
2337 bool kvmppc_has_cap_epr(void)
2338 {
2339     return cap_epr;
2340 }
2341 
2342 bool kvmppc_has_cap_htab_fd(void)
2343 {
2344     return cap_htab_fd;
2345 }
2346 
2347 bool kvmppc_has_cap_fixup_hcalls(void)
2348 {
2349     return cap_fixup_hcalls;
2350 }
2351 
2352 bool kvmppc_has_cap_htm(void)
2353 {
2354     return cap_htm;
2355 }
2356 
2357 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2358 {
2359     ObjectClass *oc = OBJECT_CLASS(pcc);
2360 
2361     while (oc && !object_class_is_abstract(oc)) {
2362         oc = object_class_get_parent(oc);
2363     }
2364     assert(oc);
2365 
2366     return POWERPC_CPU_CLASS(oc);
2367 }
2368 
2369 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2370 {
2371     uint32_t host_pvr = mfpvr();
2372     PowerPCCPUClass *pvr_pcc;
2373 
2374     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2375     if (pvr_pcc == NULL) {
2376         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2377     }
2378 
2379     return pvr_pcc;
2380 }
2381 
2382 static int kvm_ppc_register_host_cpu_type(void)
2383 {
2384     TypeInfo type_info = {
2385         .name = TYPE_HOST_POWERPC_CPU,
2386         .class_init = kvmppc_host_cpu_class_init,
2387     };
2388     PowerPCCPUClass *pvr_pcc;
2389     DeviceClass *dc;
2390     int i;
2391 
2392     pvr_pcc = kvm_ppc_get_host_cpu_class();
2393     if (pvr_pcc == NULL) {
2394         return -1;
2395     }
2396     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2397     type_register(&type_info);
2398 
2399 #if defined(TARGET_PPC64)
2400     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2401     type_info.parent = TYPE_SPAPR_CPU_CORE,
2402     type_info.instance_size = sizeof(sPAPRCPUCore);
2403     type_info.instance_init = NULL;
2404     type_info.class_init = spapr_cpu_core_class_init;
2405     type_info.class_data = (void *) "host";
2406     type_register(&type_info);
2407     g_free((void *)type_info.name);
2408 #endif
2409 
2410     /*
2411      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2412      * we want "POWER8" to be a "family" alias that points to the current
2413      * host CPU type, too)
2414      */
2415     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2416     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2417         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2418             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2419             char *suffix;
2420 
2421             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2422             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2423             if (suffix) {
2424                 *suffix = 0;
2425             }
2426             ppc_cpu_aliases[i].oc = oc;
2427             break;
2428         }
2429     }
2430 
2431     return 0;
2432 }
2433 
2434 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2435 {
2436     struct kvm_rtas_token_args args = {
2437         .token = token,
2438     };
2439 
2440     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2441         return -ENOENT;
2442     }
2443 
2444     strncpy(args.name, function, sizeof(args.name));
2445 
2446     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2447 }
2448 
2449 int kvmppc_get_htab_fd(bool write)
2450 {
2451     struct kvm_get_htab_fd s = {
2452         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2453         .start_index = 0,
2454     };
2455 
2456     if (!cap_htab_fd) {
2457         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2458         return -1;
2459     }
2460 
2461     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2462 }
2463 
2464 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2465 {
2466     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2467     uint8_t buf[bufsize];
2468     ssize_t rc;
2469 
2470     do {
2471         rc = read(fd, buf, bufsize);
2472         if (rc < 0) {
2473             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2474                     strerror(errno));
2475             return rc;
2476         } else if (rc) {
2477             uint8_t *buffer = buf;
2478             ssize_t n = rc;
2479             while (n) {
2480                 struct kvm_get_htab_header *head =
2481                     (struct kvm_get_htab_header *) buffer;
2482                 size_t chunksize = sizeof(*head) +
2483                      HASH_PTE_SIZE_64 * head->n_valid;
2484 
2485                 qemu_put_be32(f, head->index);
2486                 qemu_put_be16(f, head->n_valid);
2487                 qemu_put_be16(f, head->n_invalid);
2488                 qemu_put_buffer(f, (void *)(head + 1),
2489                                 HASH_PTE_SIZE_64 * head->n_valid);
2490 
2491                 buffer += chunksize;
2492                 n -= chunksize;
2493             }
2494         }
2495     } while ((rc != 0)
2496              && ((max_ns < 0)
2497                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2498 
2499     return (rc == 0) ? 1 : 0;
2500 }
2501 
2502 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2503                            uint16_t n_valid, uint16_t n_invalid)
2504 {
2505     struct kvm_get_htab_header *buf;
2506     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2507     ssize_t rc;
2508 
2509     buf = alloca(chunksize);
2510     buf->index = index;
2511     buf->n_valid = n_valid;
2512     buf->n_invalid = n_invalid;
2513 
2514     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2515 
2516     rc = write(fd, buf, chunksize);
2517     if (rc < 0) {
2518         fprintf(stderr, "Error writing KVM hash table: %s\n",
2519                 strerror(errno));
2520         return rc;
2521     }
2522     if (rc != chunksize) {
2523         /* We should never get a short write on a single chunk */
2524         fprintf(stderr, "Short write, restoring KVM hash table\n");
2525         return -1;
2526     }
2527     return 0;
2528 }
2529 
2530 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2531 {
2532     return true;
2533 }
2534 
2535 void kvm_arch_init_irq_routing(KVMState *s)
2536 {
2537 }
2538 
2539 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2540 {
2541     struct kvm_get_htab_fd ghf = {
2542         .flags = 0,
2543         .start_index = ptex,
2544     };
2545     int fd, rc;
2546     int i;
2547 
2548     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2549     if (fd < 0) {
2550         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2551     }
2552 
2553     i = 0;
2554     while (i < n) {
2555         struct kvm_get_htab_header *hdr;
2556         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2557         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2558 
2559         rc = read(fd, buf, sizeof(buf));
2560         if (rc < 0) {
2561             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2562         }
2563 
2564         hdr = (struct kvm_get_htab_header *)buf;
2565         while ((i < n) && ((char *)hdr < (buf + rc))) {
2566             int invalid = hdr->n_invalid;
2567 
2568             if (hdr->index != (ptex + i)) {
2569                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2570                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2571             }
2572 
2573             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2574             i += hdr->n_valid;
2575 
2576             if ((n - i) < invalid) {
2577                 invalid = n - i;
2578             }
2579             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2580             i += hdr->n_invalid;
2581 
2582             hdr = (struct kvm_get_htab_header *)
2583                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2584         }
2585     }
2586 
2587     close(fd);
2588 }
2589 
2590 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2591 {
2592     int fd, rc;
2593     struct kvm_get_htab_fd ghf;
2594     struct {
2595         struct kvm_get_htab_header hdr;
2596         uint64_t pte0;
2597         uint64_t pte1;
2598     } buf;
2599 
2600     ghf.flags = 0;
2601     ghf.start_index = 0;     /* Ignored */
2602     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2603     if (fd < 0) {
2604         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2605     }
2606 
2607     buf.hdr.n_valid = 1;
2608     buf.hdr.n_invalid = 0;
2609     buf.hdr.index = ptex;
2610     buf.pte0 = cpu_to_be64(pte0);
2611     buf.pte1 = cpu_to_be64(pte1);
2612 
2613     rc = write(fd, &buf, sizeof(buf));
2614     if (rc != sizeof(buf)) {
2615         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2616     }
2617     close(fd);
2618 }
2619 
2620 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2621                              uint64_t address, uint32_t data, PCIDevice *dev)
2622 {
2623     return 0;
2624 }
2625 
2626 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2627                                 int vector, PCIDevice *dev)
2628 {
2629     return 0;
2630 }
2631 
2632 int kvm_arch_release_virq_post(int virq)
2633 {
2634     return 0;
2635 }
2636 
2637 int kvm_arch_msi_data_to_gsi(uint32_t data)
2638 {
2639     return data & 0xffff;
2640 }
2641 
2642 int kvmppc_enable_hwrng(void)
2643 {
2644     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2645         return -1;
2646     }
2647 
2648     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2649 }
2650