xref: /qemu/target/ppc/kvm.c (revision d6ee2a7c85088d587fb0e0376fba1fa20d59c9f3)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
53 
54 //#define DEBUG_KVM
55 
56 #ifdef DEBUG_KVM
57 #define DPRINTF(fmt, ...) \
58     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #else
60 #define DPRINTF(fmt, ...) \
61     do { } while (0)
62 #endif
63 
64 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
65 
66 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
67     KVM_CAP_LAST_INFO
68 };
69 
70 static int cap_interrupt_unset = false;
71 static int cap_interrupt_level = false;
72 static int cap_segstate;
73 static int cap_booke_sregs;
74 static int cap_ppc_smt;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 
89 static uint32_t debug_inst_opcode;
90 
91 /* XXX We have a race condition where we actually have a level triggered
92  *     interrupt, but the infrastructure can't expose that yet, so the guest
93  *     takes but ignores it, goes to sleep and never gets notified that there's
94  *     still an interrupt pending.
95  *
96  *     As a quick workaround, let's just wake up again 20 ms after we injected
97  *     an interrupt. That way we can assure that we're always reinjecting
98  *     interrupts in case the guest swallowed them.
99  */
100 static QEMUTimer *idle_timer;
101 
102 static void kvm_kick_cpu(void *opaque)
103 {
104     PowerPCCPU *cpu = opaque;
105 
106     qemu_cpu_kick(CPU(cpu));
107 }
108 
109 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
110  * should only be used for fallback tests - generally we should use
111  * explicit capabilities for the features we want, rather than
112  * assuming what is/isn't available depending on the KVM variant. */
113 static bool kvmppc_is_pr(KVMState *ks)
114 {
115     /* Assume KVM-PR if the GET_PVINFO capability is available */
116     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
117 }
118 
119 static int kvm_ppc_register_host_cpu_type(void);
120 
121 int kvm_arch_init(MachineState *ms, KVMState *s)
122 {
123     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
124     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
125     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
126     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
127     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
128     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
129     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
130     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
131     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
132     cap_spapr_vfio = false;
133     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
134     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
135     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
136     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
137     /* Note: we don't set cap_papr here, because this capability is
138      * only activated after this by kvmppc_set_papr() */
139     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
140     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
141     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
142 
143     if (!cap_interrupt_level) {
144         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
145                         "VM to stall at times!\n");
146     }
147 
148     kvm_ppc_register_host_cpu_type();
149 
150     return 0;
151 }
152 
153 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
154 {
155     return 0;
156 }
157 
158 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
159 {
160     CPUPPCState *cenv = &cpu->env;
161     CPUState *cs = CPU(cpu);
162     struct kvm_sregs sregs;
163     int ret;
164 
165     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
166         /* What we're really trying to say is "if we're on BookE, we use
167            the native PVR for now". This is the only sane way to check
168            it though, so we potentially confuse users that they can run
169            BookE guests on BookS. Let's hope nobody dares enough :) */
170         return 0;
171     } else {
172         if (!cap_segstate) {
173             fprintf(stderr, "kvm error: missing PVR setting capability\n");
174             return -ENOSYS;
175         }
176     }
177 
178     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
179     if (ret) {
180         return ret;
181     }
182 
183     sregs.pvr = cenv->spr[SPR_PVR];
184     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
185 }
186 
187 /* Set up a shared TLB array with KVM */
188 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
189 {
190     CPUPPCState *env = &cpu->env;
191     CPUState *cs = CPU(cpu);
192     struct kvm_book3e_206_tlb_params params = {};
193     struct kvm_config_tlb cfg = {};
194     unsigned int entries = 0;
195     int ret, i;
196 
197     if (!kvm_enabled() ||
198         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
199         return 0;
200     }
201 
202     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
203 
204     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
205         params.tlb_sizes[i] = booke206_tlb_size(env, i);
206         params.tlb_ways[i] = booke206_tlb_ways(env, i);
207         entries += params.tlb_sizes[i];
208     }
209 
210     assert(entries == env->nb_tlb);
211     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
212 
213     env->tlb_dirty = true;
214 
215     cfg.array = (uintptr_t)env->tlb.tlbm;
216     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
217     cfg.params = (uintptr_t)&params;
218     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
219 
220     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
221     if (ret < 0) {
222         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
223                 __func__, strerror(-ret));
224         return ret;
225     }
226 
227     env->kvm_sw_tlb = true;
228     return 0;
229 }
230 
231 
232 #if defined(TARGET_PPC64)
233 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
234                                        struct kvm_ppc_smmu_info *info)
235 {
236     CPUPPCState *env = &cpu->env;
237     CPUState *cs = CPU(cpu);
238 
239     memset(info, 0, sizeof(*info));
240 
241     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
242      * need to "guess" what the supported page sizes are.
243      *
244      * For that to work we make a few assumptions:
245      *
246      * - Check whether we are running "PR" KVM which only supports 4K
247      *   and 16M pages, but supports them regardless of the backing
248      *   store characteritics. We also don't support 1T segments.
249      *
250      *   This is safe as if HV KVM ever supports that capability or PR
251      *   KVM grows supports for more page/segment sizes, those versions
252      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
253      *   will not hit this fallback
254      *
255      * - Else we are running HV KVM. This means we only support page
256      *   sizes that fit in the backing store. Additionally we only
257      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
258      *   P7 encodings for the SLB and hash table. Here too, we assume
259      *   support for any newer processor will mean a kernel that
260      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
261      *   this fallback.
262      */
263     if (kvmppc_is_pr(cs->kvm_state)) {
264         /* No flags */
265         info->flags = 0;
266         info->slb_size = 64;
267 
268         /* Standard 4k base page size segment */
269         info->sps[0].page_shift = 12;
270         info->sps[0].slb_enc = 0;
271         info->sps[0].enc[0].page_shift = 12;
272         info->sps[0].enc[0].pte_enc = 0;
273 
274         /* Standard 16M large page size segment */
275         info->sps[1].page_shift = 24;
276         info->sps[1].slb_enc = SLB_VSID_L;
277         info->sps[1].enc[0].page_shift = 24;
278         info->sps[1].enc[0].pte_enc = 0;
279     } else {
280         int i = 0;
281 
282         /* HV KVM has backing store size restrictions */
283         info->flags = KVM_PPC_PAGE_SIZES_REAL;
284 
285         if (env->mmu_model & POWERPC_MMU_1TSEG) {
286             info->flags |= KVM_PPC_1T_SEGMENTS;
287         }
288 
289         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
290            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
291             info->slb_size = 32;
292         } else {
293             info->slb_size = 64;
294         }
295 
296         /* Standard 4k base page size segment */
297         info->sps[i].page_shift = 12;
298         info->sps[i].slb_enc = 0;
299         info->sps[i].enc[0].page_shift = 12;
300         info->sps[i].enc[0].pte_enc = 0;
301         i++;
302 
303         /* 64K on MMU 2.06 and later */
304         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
305             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
306             info->sps[i].page_shift = 16;
307             info->sps[i].slb_enc = 0x110;
308             info->sps[i].enc[0].page_shift = 16;
309             info->sps[i].enc[0].pte_enc = 1;
310             i++;
311         }
312 
313         /* Standard 16M large page size segment */
314         info->sps[i].page_shift = 24;
315         info->sps[i].slb_enc = SLB_VSID_L;
316         info->sps[i].enc[0].page_shift = 24;
317         info->sps[i].enc[0].pte_enc = 0;
318     }
319 }
320 
321 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
322 {
323     CPUState *cs = CPU(cpu);
324     int ret;
325 
326     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
327         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
328         if (ret == 0) {
329             return;
330         }
331     }
332 
333     kvm_get_fallback_smmu_info(cpu, info);
334 }
335 
336 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
337 {
338     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
339         return true;
340     }
341 
342     return (1ul << shift) <= rampgsize;
343 }
344 
345 static long max_cpu_page_size;
346 
347 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
348 {
349     static struct kvm_ppc_smmu_info smmu_info;
350     static bool has_smmu_info;
351     CPUPPCState *env = &cpu->env;
352     int iq, ik, jq, jk;
353     bool has_64k_pages = false;
354 
355     /* We only handle page sizes for 64-bit server guests for now */
356     if (!(env->mmu_model & POWERPC_MMU_64)) {
357         return;
358     }
359 
360     /* Collect MMU info from kernel if not already */
361     if (!has_smmu_info) {
362         kvm_get_smmu_info(cpu, &smmu_info);
363         has_smmu_info = true;
364     }
365 
366     if (!max_cpu_page_size) {
367         max_cpu_page_size = qemu_getrampagesize();
368     }
369 
370     /* Convert to QEMU form */
371     memset(&env->sps, 0, sizeof(env->sps));
372 
373     /* If we have HV KVM, we need to forbid CI large pages if our
374      * host page size is smaller than 64K.
375      */
376     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
377         env->ci_large_pages = getpagesize() >= 0x10000;
378     }
379 
380     /*
381      * XXX This loop should be an entry wide AND of the capabilities that
382      *     the selected CPU has with the capabilities that KVM supports.
383      */
384     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
385         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
386         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
387 
388         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
389                                  ksps->page_shift)) {
390             continue;
391         }
392         qsps->page_shift = ksps->page_shift;
393         qsps->slb_enc = ksps->slb_enc;
394         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
395             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
396                                      ksps->enc[jk].page_shift)) {
397                 continue;
398             }
399             if (ksps->enc[jk].page_shift == 16) {
400                 has_64k_pages = true;
401             }
402             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
403             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
404             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
405                 break;
406             }
407         }
408         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
409             break;
410         }
411     }
412     env->slb_nr = smmu_info.slb_size;
413     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
414         env->mmu_model &= ~POWERPC_MMU_1TSEG;
415     }
416     if (!has_64k_pages) {
417         env->mmu_model &= ~POWERPC_MMU_64K;
418     }
419 }
420 
421 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
422 {
423     Object *mem_obj = object_resolve_path(obj_path, NULL);
424     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
425     long pagesize;
426 
427     if (mempath) {
428         pagesize = qemu_mempath_getpagesize(mempath);
429     } else {
430         pagesize = getpagesize();
431     }
432 
433     return pagesize >= max_cpu_page_size;
434 }
435 
436 #else /* defined (TARGET_PPC64) */
437 
438 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
439 {
440 }
441 
442 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
443 {
444     return true;
445 }
446 
447 #endif /* !defined (TARGET_PPC64) */
448 
449 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
450 {
451     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
452 }
453 
454 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
455  * book3s supports only 1 watchpoint, so array size
456  * of 4 is sufficient for now.
457  */
458 #define MAX_HW_BKPTS 4
459 
460 static struct HWBreakpoint {
461     target_ulong addr;
462     int type;
463 } hw_debug_points[MAX_HW_BKPTS];
464 
465 static CPUWatchpoint hw_watchpoint;
466 
467 /* Default there is no breakpoint and watchpoint supported */
468 static int max_hw_breakpoint;
469 static int max_hw_watchpoint;
470 static int nb_hw_breakpoint;
471 static int nb_hw_watchpoint;
472 
473 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
474 {
475     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
476         max_hw_breakpoint = 2;
477         max_hw_watchpoint = 2;
478     }
479 
480     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
481         fprintf(stderr, "Error initializing h/w breakpoints\n");
482         return;
483     }
484 }
485 
486 int kvm_arch_init_vcpu(CPUState *cs)
487 {
488     PowerPCCPU *cpu = POWERPC_CPU(cs);
489     CPUPPCState *cenv = &cpu->env;
490     int ret;
491 
492     /* Gather server mmu info from KVM and update the CPU state */
493     kvm_fixup_page_sizes(cpu);
494 
495     /* Synchronize sregs with kvm */
496     ret = kvm_arch_sync_sregs(cpu);
497     if (ret) {
498         if (ret == -EINVAL) {
499             error_report("Register sync failed... If you're using kvm-hv.ko,"
500                          " only \"-cpu host\" is possible");
501         }
502         return ret;
503     }
504 
505     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
506 
507     switch (cenv->mmu_model) {
508     case POWERPC_MMU_BOOKE206:
509         /* This target supports access to KVM's guest TLB */
510         ret = kvm_booke206_tlb_init(cpu);
511         break;
512     case POWERPC_MMU_2_07:
513         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
514             /* KVM-HV has transactional memory on POWER8 also without the
515              * KVM_CAP_PPC_HTM extension, so enable it here instead as
516              * long as it's availble to userspace on the host. */
517             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
518                 cap_htm = true;
519             }
520         }
521         break;
522     default:
523         break;
524     }
525 
526     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
527     kvmppc_hw_debug_points_init(cenv);
528 
529     return ret;
530 }
531 
532 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
533 {
534     CPUPPCState *env = &cpu->env;
535     CPUState *cs = CPU(cpu);
536     struct kvm_dirty_tlb dirty_tlb;
537     unsigned char *bitmap;
538     int ret;
539 
540     if (!env->kvm_sw_tlb) {
541         return;
542     }
543 
544     bitmap = g_malloc((env->nb_tlb + 7) / 8);
545     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
546 
547     dirty_tlb.bitmap = (uintptr_t)bitmap;
548     dirty_tlb.num_dirty = env->nb_tlb;
549 
550     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
551     if (ret) {
552         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
553                 __func__, strerror(-ret));
554     }
555 
556     g_free(bitmap);
557 }
558 
559 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
560 {
561     PowerPCCPU *cpu = POWERPC_CPU(cs);
562     CPUPPCState *env = &cpu->env;
563     union {
564         uint32_t u32;
565         uint64_t u64;
566     } val;
567     struct kvm_one_reg reg = {
568         .id = id,
569         .addr = (uintptr_t) &val,
570     };
571     int ret;
572 
573     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
574     if (ret != 0) {
575         trace_kvm_failed_spr_get(spr, strerror(errno));
576     } else {
577         switch (id & KVM_REG_SIZE_MASK) {
578         case KVM_REG_SIZE_U32:
579             env->spr[spr] = val.u32;
580             break;
581 
582         case KVM_REG_SIZE_U64:
583             env->spr[spr] = val.u64;
584             break;
585 
586         default:
587             /* Don't handle this size yet */
588             abort();
589         }
590     }
591 }
592 
593 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
594 {
595     PowerPCCPU *cpu = POWERPC_CPU(cs);
596     CPUPPCState *env = &cpu->env;
597     union {
598         uint32_t u32;
599         uint64_t u64;
600     } val;
601     struct kvm_one_reg reg = {
602         .id = id,
603         .addr = (uintptr_t) &val,
604     };
605     int ret;
606 
607     switch (id & KVM_REG_SIZE_MASK) {
608     case KVM_REG_SIZE_U32:
609         val.u32 = env->spr[spr];
610         break;
611 
612     case KVM_REG_SIZE_U64:
613         val.u64 = env->spr[spr];
614         break;
615 
616     default:
617         /* Don't handle this size yet */
618         abort();
619     }
620 
621     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
622     if (ret != 0) {
623         trace_kvm_failed_spr_set(spr, strerror(errno));
624     }
625 }
626 
627 static int kvm_put_fp(CPUState *cs)
628 {
629     PowerPCCPU *cpu = POWERPC_CPU(cs);
630     CPUPPCState *env = &cpu->env;
631     struct kvm_one_reg reg;
632     int i;
633     int ret;
634 
635     if (env->insns_flags & PPC_FLOAT) {
636         uint64_t fpscr = env->fpscr;
637         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
638 
639         reg.id = KVM_REG_PPC_FPSCR;
640         reg.addr = (uintptr_t)&fpscr;
641         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
642         if (ret < 0) {
643             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
644             return ret;
645         }
646 
647         for (i = 0; i < 32; i++) {
648             uint64_t vsr[2];
649 
650 #ifdef HOST_WORDS_BIGENDIAN
651             vsr[0] = float64_val(env->fpr[i]);
652             vsr[1] = env->vsr[i];
653 #else
654             vsr[0] = env->vsr[i];
655             vsr[1] = float64_val(env->fpr[i]);
656 #endif
657             reg.addr = (uintptr_t) &vsr;
658             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
659 
660             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
661             if (ret < 0) {
662                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
663                         i, strerror(errno));
664                 return ret;
665             }
666         }
667     }
668 
669     if (env->insns_flags & PPC_ALTIVEC) {
670         reg.id = KVM_REG_PPC_VSCR;
671         reg.addr = (uintptr_t)&env->vscr;
672         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
673         if (ret < 0) {
674             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
675             return ret;
676         }
677 
678         for (i = 0; i < 32; i++) {
679             reg.id = KVM_REG_PPC_VR(i);
680             reg.addr = (uintptr_t)&env->avr[i];
681             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
682             if (ret < 0) {
683                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
684                 return ret;
685             }
686         }
687     }
688 
689     return 0;
690 }
691 
692 static int kvm_get_fp(CPUState *cs)
693 {
694     PowerPCCPU *cpu = POWERPC_CPU(cs);
695     CPUPPCState *env = &cpu->env;
696     struct kvm_one_reg reg;
697     int i;
698     int ret;
699 
700     if (env->insns_flags & PPC_FLOAT) {
701         uint64_t fpscr;
702         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
703 
704         reg.id = KVM_REG_PPC_FPSCR;
705         reg.addr = (uintptr_t)&fpscr;
706         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
707         if (ret < 0) {
708             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
709             return ret;
710         } else {
711             env->fpscr = fpscr;
712         }
713 
714         for (i = 0; i < 32; i++) {
715             uint64_t vsr[2];
716 
717             reg.addr = (uintptr_t) &vsr;
718             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
719 
720             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
721             if (ret < 0) {
722                 DPRINTF("Unable to get %s%d from KVM: %s\n",
723                         vsx ? "VSR" : "FPR", i, strerror(errno));
724                 return ret;
725             } else {
726 #ifdef HOST_WORDS_BIGENDIAN
727                 env->fpr[i] = vsr[0];
728                 if (vsx) {
729                     env->vsr[i] = vsr[1];
730                 }
731 #else
732                 env->fpr[i] = vsr[1];
733                 if (vsx) {
734                     env->vsr[i] = vsr[0];
735                 }
736 #endif
737             }
738         }
739     }
740 
741     if (env->insns_flags & PPC_ALTIVEC) {
742         reg.id = KVM_REG_PPC_VSCR;
743         reg.addr = (uintptr_t)&env->vscr;
744         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
745         if (ret < 0) {
746             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
747             return ret;
748         }
749 
750         for (i = 0; i < 32; i++) {
751             reg.id = KVM_REG_PPC_VR(i);
752             reg.addr = (uintptr_t)&env->avr[i];
753             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
754             if (ret < 0) {
755                 DPRINTF("Unable to get VR%d from KVM: %s\n",
756                         i, strerror(errno));
757                 return ret;
758             }
759         }
760     }
761 
762     return 0;
763 }
764 
765 #if defined(TARGET_PPC64)
766 static int kvm_get_vpa(CPUState *cs)
767 {
768     PowerPCCPU *cpu = POWERPC_CPU(cs);
769     CPUPPCState *env = &cpu->env;
770     struct kvm_one_reg reg;
771     int ret;
772 
773     reg.id = KVM_REG_PPC_VPA_ADDR;
774     reg.addr = (uintptr_t)&env->vpa_addr;
775     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
776     if (ret < 0) {
777         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
778         return ret;
779     }
780 
781     assert((uintptr_t)&env->slb_shadow_size
782            == ((uintptr_t)&env->slb_shadow_addr + 8));
783     reg.id = KVM_REG_PPC_VPA_SLB;
784     reg.addr = (uintptr_t)&env->slb_shadow_addr;
785     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
786     if (ret < 0) {
787         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
788                 strerror(errno));
789         return ret;
790     }
791 
792     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
793     reg.id = KVM_REG_PPC_VPA_DTL;
794     reg.addr = (uintptr_t)&env->dtl_addr;
795     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
796     if (ret < 0) {
797         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
798                 strerror(errno));
799         return ret;
800     }
801 
802     return 0;
803 }
804 
805 static int kvm_put_vpa(CPUState *cs)
806 {
807     PowerPCCPU *cpu = POWERPC_CPU(cs);
808     CPUPPCState *env = &cpu->env;
809     struct kvm_one_reg reg;
810     int ret;
811 
812     /* SLB shadow or DTL can't be registered unless a master VPA is
813      * registered.  That means when restoring state, if a VPA *is*
814      * registered, we need to set that up first.  If not, we need to
815      * deregister the others before deregistering the master VPA */
816     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
817 
818     if (env->vpa_addr) {
819         reg.id = KVM_REG_PPC_VPA_ADDR;
820         reg.addr = (uintptr_t)&env->vpa_addr;
821         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
822         if (ret < 0) {
823             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
824             return ret;
825         }
826     }
827 
828     assert((uintptr_t)&env->slb_shadow_size
829            == ((uintptr_t)&env->slb_shadow_addr + 8));
830     reg.id = KVM_REG_PPC_VPA_SLB;
831     reg.addr = (uintptr_t)&env->slb_shadow_addr;
832     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
833     if (ret < 0) {
834         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
835         return ret;
836     }
837 
838     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
839     reg.id = KVM_REG_PPC_VPA_DTL;
840     reg.addr = (uintptr_t)&env->dtl_addr;
841     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
842     if (ret < 0) {
843         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
844                 strerror(errno));
845         return ret;
846     }
847 
848     if (!env->vpa_addr) {
849         reg.id = KVM_REG_PPC_VPA_ADDR;
850         reg.addr = (uintptr_t)&env->vpa_addr;
851         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
852         if (ret < 0) {
853             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
854             return ret;
855         }
856     }
857 
858     return 0;
859 }
860 #endif /* TARGET_PPC64 */
861 
862 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
863 {
864     CPUPPCState *env = &cpu->env;
865     struct kvm_sregs sregs;
866     int i;
867 
868     sregs.pvr = env->spr[SPR_PVR];
869 
870     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
871 
872     /* Sync SLB */
873 #ifdef TARGET_PPC64
874     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
875         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
876         if (env->slb[i].esid & SLB_ESID_V) {
877             sregs.u.s.ppc64.slb[i].slbe |= i;
878         }
879         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
880     }
881 #endif
882 
883     /* Sync SRs */
884     for (i = 0; i < 16; i++) {
885         sregs.u.s.ppc32.sr[i] = env->sr[i];
886     }
887 
888     /* Sync BATs */
889     for (i = 0; i < 8; i++) {
890         /* Beware. We have to swap upper and lower bits here */
891         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
892             | env->DBAT[1][i];
893         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
894             | env->IBAT[1][i];
895     }
896 
897     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
898 }
899 
900 int kvm_arch_put_registers(CPUState *cs, int level)
901 {
902     PowerPCCPU *cpu = POWERPC_CPU(cs);
903     CPUPPCState *env = &cpu->env;
904     struct kvm_regs regs;
905     int ret;
906     int i;
907 
908     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
909     if (ret < 0) {
910         return ret;
911     }
912 
913     regs.ctr = env->ctr;
914     regs.lr  = env->lr;
915     regs.xer = cpu_read_xer(env);
916     regs.msr = env->msr;
917     regs.pc = env->nip;
918 
919     regs.srr0 = env->spr[SPR_SRR0];
920     regs.srr1 = env->spr[SPR_SRR1];
921 
922     regs.sprg0 = env->spr[SPR_SPRG0];
923     regs.sprg1 = env->spr[SPR_SPRG1];
924     regs.sprg2 = env->spr[SPR_SPRG2];
925     regs.sprg3 = env->spr[SPR_SPRG3];
926     regs.sprg4 = env->spr[SPR_SPRG4];
927     regs.sprg5 = env->spr[SPR_SPRG5];
928     regs.sprg6 = env->spr[SPR_SPRG6];
929     regs.sprg7 = env->spr[SPR_SPRG7];
930 
931     regs.pid = env->spr[SPR_BOOKE_PID];
932 
933     for (i = 0;i < 32; i++)
934         regs.gpr[i] = env->gpr[i];
935 
936     regs.cr = 0;
937     for (i = 0; i < 8; i++) {
938         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
939     }
940 
941     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
942     if (ret < 0)
943         return ret;
944 
945     kvm_put_fp(cs);
946 
947     if (env->tlb_dirty) {
948         kvm_sw_tlb_put(cpu);
949         env->tlb_dirty = false;
950     }
951 
952     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
953         ret = kvmppc_put_books_sregs(cpu);
954         if (ret < 0) {
955             return ret;
956         }
957     }
958 
959     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
960         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
961     }
962 
963     if (cap_one_reg) {
964         int i;
965 
966         /* We deliberately ignore errors here, for kernels which have
967          * the ONE_REG calls, but don't support the specific
968          * registers, there's a reasonable chance things will still
969          * work, at least until we try to migrate. */
970         for (i = 0; i < 1024; i++) {
971             uint64_t id = env->spr_cb[i].one_reg_id;
972 
973             if (id != 0) {
974                 kvm_put_one_spr(cs, id, i);
975             }
976         }
977 
978 #ifdef TARGET_PPC64
979         if (msr_ts) {
980             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
981                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
982             }
983             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
984                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
985             }
986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
991             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
992             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
993             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
994             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
995             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
996         }
997 
998         if (cap_papr) {
999             if (kvm_put_vpa(cs) < 0) {
1000                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1001             }
1002         }
1003 
1004         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1005 #endif /* TARGET_PPC64 */
1006     }
1007 
1008     return ret;
1009 }
1010 
1011 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1012 {
1013      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1014 }
1015 
1016 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1017 {
1018     CPUPPCState *env = &cpu->env;
1019     struct kvm_sregs sregs;
1020     int ret;
1021 
1022     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1023     if (ret < 0) {
1024         return ret;
1025     }
1026 
1027     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1028         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1029         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1030         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1031         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1032         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1033         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1034         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1035         env->spr[SPR_DECR] = sregs.u.e.dec;
1036         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1037         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1038         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1039     }
1040 
1041     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1042         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1043         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1044         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1045         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1046         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1047     }
1048 
1049     if (sregs.u.e.features & KVM_SREGS_E_64) {
1050         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1051     }
1052 
1053     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1054         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1055     }
1056 
1057     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1058         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1059         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1060         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1061         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1062         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1063         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1064         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1065         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1066         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1067         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1068         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1069         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1070         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1071         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1072         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1073         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1074         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1075         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1076         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1077         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1078         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1079         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1080         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1081         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1082         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1083         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1084         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1085         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1086         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1087         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1088         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1089         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1090 
1091         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1092             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1093             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1094             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1095             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1096             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1097             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1098         }
1099 
1100         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1101             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1102             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1103         }
1104 
1105         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1106             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1107             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1108             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1109             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1110         }
1111     }
1112 
1113     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1114         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1115         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1116         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1117         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1118         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1119         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1120         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1121         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1122         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1123         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_EXP) {
1127         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1128     }
1129 
1130     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1131         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1132         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1133     }
1134 
1135     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1136         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1137         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1138         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1139 
1140         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1141             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1142             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1143         }
1144     }
1145 
1146     return 0;
1147 }
1148 
1149 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1150 {
1151     CPUPPCState *env = &cpu->env;
1152     struct kvm_sregs sregs;
1153     int ret;
1154     int i;
1155 
1156     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1157     if (ret < 0) {
1158         return ret;
1159     }
1160 
1161     if (!cpu->vhyp) {
1162         ppc_store_sdr1(env, sregs.u.s.sdr1);
1163     }
1164 
1165     /* Sync SLB */
1166 #ifdef TARGET_PPC64
1167     /*
1168      * The packed SLB array we get from KVM_GET_SREGS only contains
1169      * information about valid entries. So we flush our internal copy
1170      * to get rid of stale ones, then put all valid SLB entries back
1171      * in.
1172      */
1173     memset(env->slb, 0, sizeof(env->slb));
1174     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1175         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1176         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1177         /*
1178          * Only restore valid entries
1179          */
1180         if (rb & SLB_ESID_V) {
1181             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1182         }
1183     }
1184 #endif
1185 
1186     /* Sync SRs */
1187     for (i = 0; i < 16; i++) {
1188         env->sr[i] = sregs.u.s.ppc32.sr[i];
1189     }
1190 
1191     /* Sync BATs */
1192     for (i = 0; i < 8; i++) {
1193         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1194         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1195         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1196         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1197     }
1198 
1199     return 0;
1200 }
1201 
1202 int kvm_arch_get_registers(CPUState *cs)
1203 {
1204     PowerPCCPU *cpu = POWERPC_CPU(cs);
1205     CPUPPCState *env = &cpu->env;
1206     struct kvm_regs regs;
1207     uint32_t cr;
1208     int i, ret;
1209 
1210     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1211     if (ret < 0)
1212         return ret;
1213 
1214     cr = regs.cr;
1215     for (i = 7; i >= 0; i--) {
1216         env->crf[i] = cr & 15;
1217         cr >>= 4;
1218     }
1219 
1220     env->ctr = regs.ctr;
1221     env->lr = regs.lr;
1222     cpu_write_xer(env, regs.xer);
1223     env->msr = regs.msr;
1224     env->nip = regs.pc;
1225 
1226     env->spr[SPR_SRR0] = regs.srr0;
1227     env->spr[SPR_SRR1] = regs.srr1;
1228 
1229     env->spr[SPR_SPRG0] = regs.sprg0;
1230     env->spr[SPR_SPRG1] = regs.sprg1;
1231     env->spr[SPR_SPRG2] = regs.sprg2;
1232     env->spr[SPR_SPRG3] = regs.sprg3;
1233     env->spr[SPR_SPRG4] = regs.sprg4;
1234     env->spr[SPR_SPRG5] = regs.sprg5;
1235     env->spr[SPR_SPRG6] = regs.sprg6;
1236     env->spr[SPR_SPRG7] = regs.sprg7;
1237 
1238     env->spr[SPR_BOOKE_PID] = regs.pid;
1239 
1240     for (i = 0;i < 32; i++)
1241         env->gpr[i] = regs.gpr[i];
1242 
1243     kvm_get_fp(cs);
1244 
1245     if (cap_booke_sregs) {
1246         ret = kvmppc_get_booke_sregs(cpu);
1247         if (ret < 0) {
1248             return ret;
1249         }
1250     }
1251 
1252     if (cap_segstate) {
1253         ret = kvmppc_get_books_sregs(cpu);
1254         if (ret < 0) {
1255             return ret;
1256         }
1257     }
1258 
1259     if (cap_hior) {
1260         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1261     }
1262 
1263     if (cap_one_reg) {
1264         int i;
1265 
1266         /* We deliberately ignore errors here, for kernels which have
1267          * the ONE_REG calls, but don't support the specific
1268          * registers, there's a reasonable chance things will still
1269          * work, at least until we try to migrate. */
1270         for (i = 0; i < 1024; i++) {
1271             uint64_t id = env->spr_cb[i].one_reg_id;
1272 
1273             if (id != 0) {
1274                 kvm_get_one_spr(cs, id, i);
1275             }
1276         }
1277 
1278 #ifdef TARGET_PPC64
1279         if (msr_ts) {
1280             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1281                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1282             }
1283             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1284                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1285             }
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1290             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1291             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1292             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1293             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1294             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1295             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1296         }
1297 
1298         if (cap_papr) {
1299             if (kvm_get_vpa(cs) < 0) {
1300                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1301             }
1302         }
1303 
1304         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1305 #endif
1306     }
1307 
1308     return 0;
1309 }
1310 
1311 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1312 {
1313     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1314 
1315     if (irq != PPC_INTERRUPT_EXT) {
1316         return 0;
1317     }
1318 
1319     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1320         return 0;
1321     }
1322 
1323     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1324 
1325     return 0;
1326 }
1327 
1328 #if defined(TARGET_PPCEMB)
1329 #define PPC_INPUT_INT PPC40x_INPUT_INT
1330 #elif defined(TARGET_PPC64)
1331 #define PPC_INPUT_INT PPC970_INPUT_INT
1332 #else
1333 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1334 #endif
1335 
1336 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1337 {
1338     PowerPCCPU *cpu = POWERPC_CPU(cs);
1339     CPUPPCState *env = &cpu->env;
1340     int r;
1341     unsigned irq;
1342 
1343     qemu_mutex_lock_iothread();
1344 
1345     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1346      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1347     if (!cap_interrupt_level &&
1348         run->ready_for_interrupt_injection &&
1349         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1350         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1351     {
1352         /* For now KVM disregards the 'irq' argument. However, in the
1353          * future KVM could cache it in-kernel to avoid a heavyweight exit
1354          * when reading the UIC.
1355          */
1356         irq = KVM_INTERRUPT_SET;
1357 
1358         DPRINTF("injected interrupt %d\n", irq);
1359         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1360         if (r < 0) {
1361             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1362         }
1363 
1364         /* Always wake up soon in case the interrupt was level based */
1365         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1366                        (NANOSECONDS_PER_SECOND / 50));
1367     }
1368 
1369     /* We don't know if there are more interrupts pending after this. However,
1370      * the guest will return to userspace in the course of handling this one
1371      * anyways, so we will get a chance to deliver the rest. */
1372 
1373     qemu_mutex_unlock_iothread();
1374 }
1375 
1376 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1377 {
1378     return MEMTXATTRS_UNSPECIFIED;
1379 }
1380 
1381 int kvm_arch_process_async_events(CPUState *cs)
1382 {
1383     return cs->halted;
1384 }
1385 
1386 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1387 {
1388     CPUState *cs = CPU(cpu);
1389     CPUPPCState *env = &cpu->env;
1390 
1391     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1392         cs->halted = 1;
1393         cs->exception_index = EXCP_HLT;
1394     }
1395 
1396     return 0;
1397 }
1398 
1399 /* map dcr access to existing qemu dcr emulation */
1400 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1401 {
1402     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1403         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1404 
1405     return 0;
1406 }
1407 
1408 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1409 {
1410     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1411         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1412 
1413     return 0;
1414 }
1415 
1416 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1417 {
1418     /* Mixed endian case is not handled */
1419     uint32_t sc = debug_inst_opcode;
1420 
1421     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1422                             sizeof(sc), 0) ||
1423         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1424         return -EINVAL;
1425     }
1426 
1427     return 0;
1428 }
1429 
1430 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1431 {
1432     uint32_t sc;
1433 
1434     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1435         sc != debug_inst_opcode ||
1436         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1437                             sizeof(sc), 1)) {
1438         return -EINVAL;
1439     }
1440 
1441     return 0;
1442 }
1443 
1444 static int find_hw_breakpoint(target_ulong addr, int type)
1445 {
1446     int n;
1447 
1448     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1449            <= ARRAY_SIZE(hw_debug_points));
1450 
1451     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1452         if (hw_debug_points[n].addr == addr &&
1453              hw_debug_points[n].type == type) {
1454             return n;
1455         }
1456     }
1457 
1458     return -1;
1459 }
1460 
1461 static int find_hw_watchpoint(target_ulong addr, int *flag)
1462 {
1463     int n;
1464 
1465     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1466     if (n >= 0) {
1467         *flag = BP_MEM_ACCESS;
1468         return n;
1469     }
1470 
1471     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1472     if (n >= 0) {
1473         *flag = BP_MEM_WRITE;
1474         return n;
1475     }
1476 
1477     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1478     if (n >= 0) {
1479         *flag = BP_MEM_READ;
1480         return n;
1481     }
1482 
1483     return -1;
1484 }
1485 
1486 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1487                                   target_ulong len, int type)
1488 {
1489     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1490         return -ENOBUFS;
1491     }
1492 
1493     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1494     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1495 
1496     switch (type) {
1497     case GDB_BREAKPOINT_HW:
1498         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1499             return -ENOBUFS;
1500         }
1501 
1502         if (find_hw_breakpoint(addr, type) >= 0) {
1503             return -EEXIST;
1504         }
1505 
1506         nb_hw_breakpoint++;
1507         break;
1508 
1509     case GDB_WATCHPOINT_WRITE:
1510     case GDB_WATCHPOINT_READ:
1511     case GDB_WATCHPOINT_ACCESS:
1512         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1513             return -ENOBUFS;
1514         }
1515 
1516         if (find_hw_breakpoint(addr, type) >= 0) {
1517             return -EEXIST;
1518         }
1519 
1520         nb_hw_watchpoint++;
1521         break;
1522 
1523     default:
1524         return -ENOSYS;
1525     }
1526 
1527     return 0;
1528 }
1529 
1530 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1531                                   target_ulong len, int type)
1532 {
1533     int n;
1534 
1535     n = find_hw_breakpoint(addr, type);
1536     if (n < 0) {
1537         return -ENOENT;
1538     }
1539 
1540     switch (type) {
1541     case GDB_BREAKPOINT_HW:
1542         nb_hw_breakpoint--;
1543         break;
1544 
1545     case GDB_WATCHPOINT_WRITE:
1546     case GDB_WATCHPOINT_READ:
1547     case GDB_WATCHPOINT_ACCESS:
1548         nb_hw_watchpoint--;
1549         break;
1550 
1551     default:
1552         return -ENOSYS;
1553     }
1554     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1555 
1556     return 0;
1557 }
1558 
1559 void kvm_arch_remove_all_hw_breakpoints(void)
1560 {
1561     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1562 }
1563 
1564 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1565 {
1566     int n;
1567 
1568     /* Software Breakpoint updates */
1569     if (kvm_sw_breakpoints_active(cs)) {
1570         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1571     }
1572 
1573     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1574            <= ARRAY_SIZE(hw_debug_points));
1575     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1576 
1577     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1578         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1579         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1580         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1581             switch (hw_debug_points[n].type) {
1582             case GDB_BREAKPOINT_HW:
1583                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1584                 break;
1585             case GDB_WATCHPOINT_WRITE:
1586                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1587                 break;
1588             case GDB_WATCHPOINT_READ:
1589                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1590                 break;
1591             case GDB_WATCHPOINT_ACCESS:
1592                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1593                                         KVMPPC_DEBUG_WATCH_READ;
1594                 break;
1595             default:
1596                 cpu_abort(cs, "Unsupported breakpoint type\n");
1597             }
1598             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1599         }
1600     }
1601 }
1602 
1603 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1604 {
1605     CPUState *cs = CPU(cpu);
1606     CPUPPCState *env = &cpu->env;
1607     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1608     int handle = 0;
1609     int n;
1610     int flag = 0;
1611 
1612     if (cs->singlestep_enabled) {
1613         handle = 1;
1614     } else if (arch_info->status) {
1615         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1616             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1617                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1618                 if (n >= 0) {
1619                     handle = 1;
1620                 }
1621             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1622                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1623                 n = find_hw_watchpoint(arch_info->address,  &flag);
1624                 if (n >= 0) {
1625                     handle = 1;
1626                     cs->watchpoint_hit = &hw_watchpoint;
1627                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1628                     hw_watchpoint.flags = flag;
1629                 }
1630             }
1631         }
1632     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1633         handle = 1;
1634     } else {
1635         /* QEMU is not able to handle debug exception, so inject
1636          * program exception to guest;
1637          * Yes program exception NOT debug exception !!
1638          * When QEMU is using debug resources then debug exception must
1639          * be always set. To achieve this we set MSR_DE and also set
1640          * MSRP_DEP so guest cannot change MSR_DE.
1641          * When emulating debug resource for guest we want guest
1642          * to control MSR_DE (enable/disable debug interrupt on need).
1643          * Supporting both configurations are NOT possible.
1644          * So the result is that we cannot share debug resources
1645          * between QEMU and Guest on BOOKE architecture.
1646          * In the current design QEMU gets the priority over guest,
1647          * this means that if QEMU is using debug resources then guest
1648          * cannot use them;
1649          * For software breakpoint QEMU uses a privileged instruction;
1650          * So there cannot be any reason that we are here for guest
1651          * set debug exception, only possibility is guest executed a
1652          * privileged / illegal instruction and that's why we are
1653          * injecting a program interrupt.
1654          */
1655 
1656         cpu_synchronize_state(cs);
1657         /* env->nip is PC, so increment this by 4 to use
1658          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1659          */
1660         env->nip += 4;
1661         cs->exception_index = POWERPC_EXCP_PROGRAM;
1662         env->error_code = POWERPC_EXCP_INVAL;
1663         ppc_cpu_do_interrupt(cs);
1664     }
1665 
1666     return handle;
1667 }
1668 
1669 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1670 {
1671     PowerPCCPU *cpu = POWERPC_CPU(cs);
1672     CPUPPCState *env = &cpu->env;
1673     int ret;
1674 
1675     qemu_mutex_lock_iothread();
1676 
1677     switch (run->exit_reason) {
1678     case KVM_EXIT_DCR:
1679         if (run->dcr.is_write) {
1680             DPRINTF("handle dcr write\n");
1681             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1682         } else {
1683             DPRINTF("handle dcr read\n");
1684             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1685         }
1686         break;
1687     case KVM_EXIT_HLT:
1688         DPRINTF("handle halt\n");
1689         ret = kvmppc_handle_halt(cpu);
1690         break;
1691 #if defined(TARGET_PPC64)
1692     case KVM_EXIT_PAPR_HCALL:
1693         DPRINTF("handle PAPR hypercall\n");
1694         run->papr_hcall.ret = spapr_hypercall(cpu,
1695                                               run->papr_hcall.nr,
1696                                               run->papr_hcall.args);
1697         ret = 0;
1698         break;
1699 #endif
1700     case KVM_EXIT_EPR:
1701         DPRINTF("handle epr\n");
1702         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1703         ret = 0;
1704         break;
1705     case KVM_EXIT_WATCHDOG:
1706         DPRINTF("handle watchdog expiry\n");
1707         watchdog_perform_action();
1708         ret = 0;
1709         break;
1710 
1711     case KVM_EXIT_DEBUG:
1712         DPRINTF("handle debug exception\n");
1713         if (kvm_handle_debug(cpu, run)) {
1714             ret = EXCP_DEBUG;
1715             break;
1716         }
1717         /* re-enter, this exception was guest-internal */
1718         ret = 0;
1719         break;
1720 
1721     default:
1722         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1723         ret = -1;
1724         break;
1725     }
1726 
1727     qemu_mutex_unlock_iothread();
1728     return ret;
1729 }
1730 
1731 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1732 {
1733     CPUState *cs = CPU(cpu);
1734     uint32_t bits = tsr_bits;
1735     struct kvm_one_reg reg = {
1736         .id = KVM_REG_PPC_OR_TSR,
1737         .addr = (uintptr_t) &bits,
1738     };
1739 
1740     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1741 }
1742 
1743 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1744 {
1745 
1746     CPUState *cs = CPU(cpu);
1747     uint32_t bits = tsr_bits;
1748     struct kvm_one_reg reg = {
1749         .id = KVM_REG_PPC_CLEAR_TSR,
1750         .addr = (uintptr_t) &bits,
1751     };
1752 
1753     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1754 }
1755 
1756 int kvmppc_set_tcr(PowerPCCPU *cpu)
1757 {
1758     CPUState *cs = CPU(cpu);
1759     CPUPPCState *env = &cpu->env;
1760     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1761 
1762     struct kvm_one_reg reg = {
1763         .id = KVM_REG_PPC_TCR,
1764         .addr = (uintptr_t) &tcr,
1765     };
1766 
1767     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1768 }
1769 
1770 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1771 {
1772     CPUState *cs = CPU(cpu);
1773     int ret;
1774 
1775     if (!kvm_enabled()) {
1776         return -1;
1777     }
1778 
1779     if (!cap_ppc_watchdog) {
1780         printf("warning: KVM does not support watchdog");
1781         return -1;
1782     }
1783 
1784     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1785     if (ret < 0) {
1786         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1787                 __func__, strerror(-ret));
1788         return ret;
1789     }
1790 
1791     return ret;
1792 }
1793 
1794 static int read_cpuinfo(const char *field, char *value, int len)
1795 {
1796     FILE *f;
1797     int ret = -1;
1798     int field_len = strlen(field);
1799     char line[512];
1800 
1801     f = fopen("/proc/cpuinfo", "r");
1802     if (!f) {
1803         return -1;
1804     }
1805 
1806     do {
1807         if (!fgets(line, sizeof(line), f)) {
1808             break;
1809         }
1810         if (!strncmp(line, field, field_len)) {
1811             pstrcpy(value, len, line);
1812             ret = 0;
1813             break;
1814         }
1815     } while(*line);
1816 
1817     fclose(f);
1818 
1819     return ret;
1820 }
1821 
1822 uint32_t kvmppc_get_tbfreq(void)
1823 {
1824     char line[512];
1825     char *ns;
1826     uint32_t retval = NANOSECONDS_PER_SECOND;
1827 
1828     if (read_cpuinfo("timebase", line, sizeof(line))) {
1829         return retval;
1830     }
1831 
1832     if (!(ns = strchr(line, ':'))) {
1833         return retval;
1834     }
1835 
1836     ns++;
1837 
1838     return atoi(ns);
1839 }
1840 
1841 bool kvmppc_get_host_serial(char **value)
1842 {
1843     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1844                                NULL);
1845 }
1846 
1847 bool kvmppc_get_host_model(char **value)
1848 {
1849     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1850 }
1851 
1852 /* Try to find a device tree node for a CPU with clock-frequency property */
1853 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1854 {
1855     struct dirent *dirp;
1856     DIR *dp;
1857 
1858     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1859         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1860         return -1;
1861     }
1862 
1863     buf[0] = '\0';
1864     while ((dirp = readdir(dp)) != NULL) {
1865         FILE *f;
1866         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1867                  dirp->d_name);
1868         f = fopen(buf, "r");
1869         if (f) {
1870             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1871             fclose(f);
1872             break;
1873         }
1874         buf[0] = '\0';
1875     }
1876     closedir(dp);
1877     if (buf[0] == '\0') {
1878         printf("Unknown host!\n");
1879         return -1;
1880     }
1881 
1882     return 0;
1883 }
1884 
1885 static uint64_t kvmppc_read_int_dt(const char *filename)
1886 {
1887     union {
1888         uint32_t v32;
1889         uint64_t v64;
1890     } u;
1891     FILE *f;
1892     int len;
1893 
1894     f = fopen(filename, "rb");
1895     if (!f) {
1896         return -1;
1897     }
1898 
1899     len = fread(&u, 1, sizeof(u), f);
1900     fclose(f);
1901     switch (len) {
1902     case 4:
1903         /* property is a 32-bit quantity */
1904         return be32_to_cpu(u.v32);
1905     case 8:
1906         return be64_to_cpu(u.v64);
1907     }
1908 
1909     return 0;
1910 }
1911 
1912 /* Read a CPU node property from the host device tree that's a single
1913  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1914  * (can't find or open the property, or doesn't understand the
1915  * format) */
1916 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1917 {
1918     char buf[PATH_MAX], *tmp;
1919     uint64_t val;
1920 
1921     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1922         return -1;
1923     }
1924 
1925     tmp = g_strdup_printf("%s/%s", buf, propname);
1926     val = kvmppc_read_int_dt(tmp);
1927     g_free(tmp);
1928 
1929     return val;
1930 }
1931 
1932 uint64_t kvmppc_get_clockfreq(void)
1933 {
1934     return kvmppc_read_int_cpu_dt("clock-frequency");
1935 }
1936 
1937 uint32_t kvmppc_get_vmx(void)
1938 {
1939     return kvmppc_read_int_cpu_dt("ibm,vmx");
1940 }
1941 
1942 uint32_t kvmppc_get_dfp(void)
1943 {
1944     return kvmppc_read_int_cpu_dt("ibm,dfp");
1945 }
1946 
1947 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1948  {
1949      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1950      CPUState *cs = CPU(cpu);
1951 
1952     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1953         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1954         return 0;
1955     }
1956 
1957     return 1;
1958 }
1959 
1960 int kvmppc_get_hasidle(CPUPPCState *env)
1961 {
1962     struct kvm_ppc_pvinfo pvinfo;
1963 
1964     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1965         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1966         return 1;
1967     }
1968 
1969     return 0;
1970 }
1971 
1972 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1973 {
1974     uint32_t *hc = (uint32_t*)buf;
1975     struct kvm_ppc_pvinfo pvinfo;
1976 
1977     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1978         memcpy(buf, pvinfo.hcall, buf_len);
1979         return 0;
1980     }
1981 
1982     /*
1983      * Fallback to always fail hypercalls regardless of endianness:
1984      *
1985      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1986      *     li r3, -1
1987      *     b .+8       (becomes nop in wrong endian)
1988      *     bswap32(li r3, -1)
1989      */
1990 
1991     hc[0] = cpu_to_be32(0x08000048);
1992     hc[1] = cpu_to_be32(0x3860ffff);
1993     hc[2] = cpu_to_be32(0x48000008);
1994     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1995 
1996     return 1;
1997 }
1998 
1999 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2000 {
2001     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2002 }
2003 
2004 void kvmppc_enable_logical_ci_hcalls(void)
2005 {
2006     /*
2007      * FIXME: it would be nice if we could detect the cases where
2008      * we're using a device which requires the in kernel
2009      * implementation of these hcalls, but the kernel lacks them and
2010      * produce a warning.
2011      */
2012     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2013     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2014 }
2015 
2016 void kvmppc_enable_set_mode_hcall(void)
2017 {
2018     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2019 }
2020 
2021 void kvmppc_enable_clear_ref_mod_hcalls(void)
2022 {
2023     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2024     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2025 }
2026 
2027 void kvmppc_set_papr(PowerPCCPU *cpu)
2028 {
2029     CPUState *cs = CPU(cpu);
2030     int ret;
2031 
2032     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2033     if (ret) {
2034         error_report("This vCPU type or KVM version does not support PAPR");
2035         exit(1);
2036     }
2037 
2038     /* Update the capability flag so we sync the right information
2039      * with kvm */
2040     cap_papr = 1;
2041 }
2042 
2043 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2044 {
2045     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2046 }
2047 
2048 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2049 {
2050     CPUState *cs = CPU(cpu);
2051     int ret;
2052 
2053     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2054     if (ret && mpic_proxy) {
2055         error_report("This KVM version does not support EPR");
2056         exit(1);
2057     }
2058 }
2059 
2060 int kvmppc_smt_threads(void)
2061 {
2062     return cap_ppc_smt ? cap_ppc_smt : 1;
2063 }
2064 
2065 #ifdef TARGET_PPC64
2066 off_t kvmppc_alloc_rma(void **rma)
2067 {
2068     off_t size;
2069     int fd;
2070     struct kvm_allocate_rma ret;
2071 
2072     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2073      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2074      *                      not necessary on this hardware
2075      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2076      *
2077      * FIXME: We should allow the user to force contiguous RMA
2078      * allocation in the cap_ppc_rma==1 case.
2079      */
2080     if (cap_ppc_rma < 2) {
2081         return 0;
2082     }
2083 
2084     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2085     if (fd < 0) {
2086         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2087                 strerror(errno));
2088         return -1;
2089     }
2090 
2091     size = MIN(ret.rma_size, 256ul << 20);
2092 
2093     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2094     if (*rma == MAP_FAILED) {
2095         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2096         return -1;
2097     };
2098 
2099     return size;
2100 }
2101 
2102 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2103 {
2104     struct kvm_ppc_smmu_info info;
2105     long rampagesize, best_page_shift;
2106     int i;
2107 
2108     if (cap_ppc_rma >= 2) {
2109         return current_size;
2110     }
2111 
2112     /* Find the largest hardware supported page size that's less than
2113      * or equal to the (logical) backing page size of guest RAM */
2114     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2115     rampagesize = qemu_getrampagesize();
2116     best_page_shift = 0;
2117 
2118     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2119         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2120 
2121         if (!sps->page_shift) {
2122             continue;
2123         }
2124 
2125         if ((sps->page_shift > best_page_shift)
2126             && ((1UL << sps->page_shift) <= rampagesize)) {
2127             best_page_shift = sps->page_shift;
2128         }
2129     }
2130 
2131     return MIN(current_size,
2132                1ULL << (best_page_shift + hash_shift - 7));
2133 }
2134 #endif
2135 
2136 bool kvmppc_spapr_use_multitce(void)
2137 {
2138     return cap_spapr_multitce;
2139 }
2140 
2141 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2142                               uint64_t bus_offset, uint32_t nb_table,
2143                               int *pfd, bool need_vfio)
2144 {
2145     long len;
2146     int fd;
2147     void *table;
2148 
2149     /* Must set fd to -1 so we don't try to munmap when called for
2150      * destroying the table, which the upper layers -will- do
2151      */
2152     *pfd = -1;
2153     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2154         return NULL;
2155     }
2156 
2157     if (cap_spapr_tce_64) {
2158         struct kvm_create_spapr_tce_64 args = {
2159             .liobn = liobn,
2160             .page_shift = page_shift,
2161             .offset = bus_offset >> page_shift,
2162             .size = nb_table,
2163             .flags = 0
2164         };
2165         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2166         if (fd < 0) {
2167             fprintf(stderr,
2168                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2169                     liobn);
2170             return NULL;
2171         }
2172     } else if (cap_spapr_tce) {
2173         uint64_t window_size = (uint64_t) nb_table << page_shift;
2174         struct kvm_create_spapr_tce args = {
2175             .liobn = liobn,
2176             .window_size = window_size,
2177         };
2178         if ((window_size != args.window_size) || bus_offset) {
2179             return NULL;
2180         }
2181         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2182         if (fd < 0) {
2183             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2184                     liobn);
2185             return NULL;
2186         }
2187     } else {
2188         return NULL;
2189     }
2190 
2191     len = nb_table * sizeof(uint64_t);
2192     /* FIXME: round this up to page size */
2193 
2194     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2195     if (table == MAP_FAILED) {
2196         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2197                 liobn);
2198         close(fd);
2199         return NULL;
2200     }
2201 
2202     *pfd = fd;
2203     return table;
2204 }
2205 
2206 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2207 {
2208     long len;
2209 
2210     if (fd < 0) {
2211         return -1;
2212     }
2213 
2214     len = nb_table * sizeof(uint64_t);
2215     if ((munmap(table, len) < 0) ||
2216         (close(fd) < 0)) {
2217         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2218                 strerror(errno));
2219         /* Leak the table */
2220     }
2221 
2222     return 0;
2223 }
2224 
2225 int kvmppc_reset_htab(int shift_hint)
2226 {
2227     uint32_t shift = shift_hint;
2228 
2229     if (!kvm_enabled()) {
2230         /* Full emulation, tell caller to allocate htab itself */
2231         return 0;
2232     }
2233     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2234         int ret;
2235         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2236         if (ret == -ENOTTY) {
2237             /* At least some versions of PR KVM advertise the
2238              * capability, but don't implement the ioctl().  Oops.
2239              * Return 0 so that we allocate the htab in qemu, as is
2240              * correct for PR. */
2241             return 0;
2242         } else if (ret < 0) {
2243             return ret;
2244         }
2245         return shift;
2246     }
2247 
2248     /* We have a kernel that predates the htab reset calls.  For PR
2249      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2250      * this era, it has allocated a 16MB fixed size hash table already. */
2251     if (kvmppc_is_pr(kvm_state)) {
2252         /* PR - tell caller to allocate htab */
2253         return 0;
2254     } else {
2255         /* HV - assume 16MB kernel allocated htab */
2256         return 24;
2257     }
2258 }
2259 
2260 static inline uint32_t mfpvr(void)
2261 {
2262     uint32_t pvr;
2263 
2264     asm ("mfpvr %0"
2265          : "=r"(pvr));
2266     return pvr;
2267 }
2268 
2269 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2270 {
2271     if (on) {
2272         *word |= flags;
2273     } else {
2274         *word &= ~flags;
2275     }
2276 }
2277 
2278 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2279 {
2280     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2281     uint32_t vmx = kvmppc_get_vmx();
2282     uint32_t dfp = kvmppc_get_dfp();
2283     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2284     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2285 
2286     /* Now fix up the class with information we can query from the host */
2287     pcc->pvr = mfpvr();
2288 
2289     if (vmx != -1) {
2290         /* Only override when we know what the host supports */
2291         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2292         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2293     }
2294     if (dfp != -1) {
2295         /* Only override when we know what the host supports */
2296         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2297     }
2298 
2299     if (dcache_size != -1) {
2300         pcc->l1_dcache_size = dcache_size;
2301     }
2302 
2303     if (icache_size != -1) {
2304         pcc->l1_icache_size = icache_size;
2305     }
2306 }
2307 
2308 bool kvmppc_has_cap_epr(void)
2309 {
2310     return cap_epr;
2311 }
2312 
2313 bool kvmppc_has_cap_htab_fd(void)
2314 {
2315     return cap_htab_fd;
2316 }
2317 
2318 bool kvmppc_has_cap_fixup_hcalls(void)
2319 {
2320     return cap_fixup_hcalls;
2321 }
2322 
2323 bool kvmppc_has_cap_htm(void)
2324 {
2325     return cap_htm;
2326 }
2327 
2328 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2329 {
2330     ObjectClass *oc = OBJECT_CLASS(pcc);
2331 
2332     while (oc && !object_class_is_abstract(oc)) {
2333         oc = object_class_get_parent(oc);
2334     }
2335     assert(oc);
2336 
2337     return POWERPC_CPU_CLASS(oc);
2338 }
2339 
2340 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2341 {
2342     uint32_t host_pvr = mfpvr();
2343     PowerPCCPUClass *pvr_pcc;
2344 
2345     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2346     if (pvr_pcc == NULL) {
2347         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2348     }
2349 
2350     return pvr_pcc;
2351 }
2352 
2353 static int kvm_ppc_register_host_cpu_type(void)
2354 {
2355     TypeInfo type_info = {
2356         .name = TYPE_HOST_POWERPC_CPU,
2357         .class_init = kvmppc_host_cpu_class_init,
2358     };
2359     PowerPCCPUClass *pvr_pcc;
2360     DeviceClass *dc;
2361     int i;
2362 
2363     pvr_pcc = kvm_ppc_get_host_cpu_class();
2364     if (pvr_pcc == NULL) {
2365         return -1;
2366     }
2367     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2368     type_register(&type_info);
2369 
2370 #if defined(TARGET_PPC64)
2371     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2372     type_info.parent = TYPE_SPAPR_CPU_CORE,
2373     type_info.instance_size = sizeof(sPAPRCPUCore);
2374     type_info.instance_init = NULL;
2375     type_info.class_init = spapr_cpu_core_class_init;
2376     type_info.class_data = (void *) "host";
2377     type_register(&type_info);
2378     g_free((void *)type_info.name);
2379 #endif
2380 
2381     /*
2382      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2383      * we want "POWER8" to be a "family" alias that points to the current
2384      * host CPU type, too)
2385      */
2386     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2387     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2388         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2389             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2390             char *suffix;
2391 
2392             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2393             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2394             if (suffix) {
2395                 *suffix = 0;
2396             }
2397             ppc_cpu_aliases[i].oc = oc;
2398             break;
2399         }
2400     }
2401 
2402     return 0;
2403 }
2404 
2405 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2406 {
2407     struct kvm_rtas_token_args args = {
2408         .token = token,
2409     };
2410 
2411     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2412         return -ENOENT;
2413     }
2414 
2415     strncpy(args.name, function, sizeof(args.name));
2416 
2417     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2418 }
2419 
2420 int kvmppc_get_htab_fd(bool write)
2421 {
2422     struct kvm_get_htab_fd s = {
2423         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2424         .start_index = 0,
2425     };
2426 
2427     if (!cap_htab_fd) {
2428         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2429         return -1;
2430     }
2431 
2432     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2433 }
2434 
2435 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2436 {
2437     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2438     uint8_t buf[bufsize];
2439     ssize_t rc;
2440 
2441     do {
2442         rc = read(fd, buf, bufsize);
2443         if (rc < 0) {
2444             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2445                     strerror(errno));
2446             return rc;
2447         } else if (rc) {
2448             uint8_t *buffer = buf;
2449             ssize_t n = rc;
2450             while (n) {
2451                 struct kvm_get_htab_header *head =
2452                     (struct kvm_get_htab_header *) buffer;
2453                 size_t chunksize = sizeof(*head) +
2454                      HASH_PTE_SIZE_64 * head->n_valid;
2455 
2456                 qemu_put_be32(f, head->index);
2457                 qemu_put_be16(f, head->n_valid);
2458                 qemu_put_be16(f, head->n_invalid);
2459                 qemu_put_buffer(f, (void *)(head + 1),
2460                                 HASH_PTE_SIZE_64 * head->n_valid);
2461 
2462                 buffer += chunksize;
2463                 n -= chunksize;
2464             }
2465         }
2466     } while ((rc != 0)
2467              && ((max_ns < 0)
2468                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2469 
2470     return (rc == 0) ? 1 : 0;
2471 }
2472 
2473 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2474                            uint16_t n_valid, uint16_t n_invalid)
2475 {
2476     struct kvm_get_htab_header *buf;
2477     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2478     ssize_t rc;
2479 
2480     buf = alloca(chunksize);
2481     buf->index = index;
2482     buf->n_valid = n_valid;
2483     buf->n_invalid = n_invalid;
2484 
2485     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2486 
2487     rc = write(fd, buf, chunksize);
2488     if (rc < 0) {
2489         fprintf(stderr, "Error writing KVM hash table: %s\n",
2490                 strerror(errno));
2491         return rc;
2492     }
2493     if (rc != chunksize) {
2494         /* We should never get a short write on a single chunk */
2495         fprintf(stderr, "Short write, restoring KVM hash table\n");
2496         return -1;
2497     }
2498     return 0;
2499 }
2500 
2501 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2502 {
2503     return true;
2504 }
2505 
2506 void kvm_arch_init_irq_routing(KVMState *s)
2507 {
2508 }
2509 
2510 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2511 {
2512     struct kvm_get_htab_fd ghf = {
2513         .flags = 0,
2514         .start_index = ptex,
2515     };
2516     int fd, rc;
2517     int i;
2518 
2519     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2520     if (fd < 0) {
2521         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2522     }
2523 
2524     i = 0;
2525     while (i < n) {
2526         struct kvm_get_htab_header *hdr;
2527         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2528         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2529 
2530         rc = read(fd, buf, sizeof(buf));
2531         if (rc < 0) {
2532             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2533         }
2534 
2535         hdr = (struct kvm_get_htab_header *)buf;
2536         while ((i < n) && ((char *)hdr < (buf + rc))) {
2537             int invalid = hdr->n_invalid;
2538 
2539             if (hdr->index != (ptex + i)) {
2540                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2541                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2542             }
2543 
2544             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2545             i += hdr->n_valid;
2546 
2547             if ((n - i) < invalid) {
2548                 invalid = n - i;
2549             }
2550             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2551             i += hdr->n_invalid;
2552 
2553             hdr = (struct kvm_get_htab_header *)
2554                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2555         }
2556     }
2557 
2558     close(fd);
2559 }
2560 
2561 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2562 {
2563     int fd, rc;
2564     struct kvm_get_htab_fd ghf;
2565     struct {
2566         struct kvm_get_htab_header hdr;
2567         uint64_t pte0;
2568         uint64_t pte1;
2569     } buf;
2570 
2571     ghf.flags = 0;
2572     ghf.start_index = 0;     /* Ignored */
2573     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2574     if (fd < 0) {
2575         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2576     }
2577 
2578     buf.hdr.n_valid = 1;
2579     buf.hdr.n_invalid = 0;
2580     buf.hdr.index = ptex;
2581     buf.pte0 = cpu_to_be64(pte0);
2582     buf.pte1 = cpu_to_be64(pte1);
2583 
2584     rc = write(fd, &buf, sizeof(buf));
2585     if (rc != sizeof(buf)) {
2586         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2587     }
2588     close(fd);
2589 }
2590 
2591 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2592                              uint64_t address, uint32_t data, PCIDevice *dev)
2593 {
2594     return 0;
2595 }
2596 
2597 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2598                                 int vector, PCIDevice *dev)
2599 {
2600     return 0;
2601 }
2602 
2603 int kvm_arch_release_virq_post(int virq)
2604 {
2605     return 0;
2606 }
2607 
2608 int kvm_arch_msi_data_to_gsi(uint32_t data)
2609 {
2610     return data & 0xffff;
2611 }
2612 
2613 int kvmppc_enable_hwrng(void)
2614 {
2615     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2616         return -1;
2617     }
2618 
2619     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2620 }
2621