xref: /qemu/target/ppc/kvm.c (revision dc71b55956b45a4aa6f280b57a3088d169bfc636)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_tce_64;
78 static int cap_spapr_multitce;
79 static int cap_spapr_vfio;
80 static int cap_hior;
81 static int cap_one_reg;
82 static int cap_epr;
83 static int cap_ppc_watchdog;
84 static int cap_papr;
85 static int cap_htab_fd;
86 static int cap_fixup_hcalls;
87 static int cap_htm;             /* Hardware transactional memory support */
88 static int cap_mmu_radix;
89 static int cap_mmu_hash_v3;
90 static int cap_resize_hpt;
91 static int cap_ppc_pvr_compat;
92 static int cap_ppc_safe_cache;
93 static int cap_ppc_safe_bounds_check;
94 static int cap_ppc_safe_indirect_branch;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
137     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
138     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
139     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
140     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
141     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
142     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
143     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
144     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
145     /* Note: we don't set cap_papr here, because this capability is
146      * only activated after this by kvmppc_set_papr() */
147     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
148     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
149     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
150     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
151     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
152     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
153     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154     kvmppc_get_cpu_characteristics(s);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
254                                        struct kvm_ppc_smmu_info *info)
255 {
256     CPUPPCState *env = &cpu->env;
257     CPUState *cs = CPU(cpu);
258 
259     memset(info, 0, sizeof(*info));
260 
261     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
262      * need to "guess" what the supported page sizes are.
263      *
264      * For that to work we make a few assumptions:
265      *
266      * - Check whether we are running "PR" KVM which only supports 4K
267      *   and 16M pages, but supports them regardless of the backing
268      *   store characteritics. We also don't support 1T segments.
269      *
270      *   This is safe as if HV KVM ever supports that capability or PR
271      *   KVM grows supports for more page/segment sizes, those versions
272      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
273      *   will not hit this fallback
274      *
275      * - Else we are running HV KVM. This means we only support page
276      *   sizes that fit in the backing store. Additionally we only
277      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
278      *   P7 encodings for the SLB and hash table. Here too, we assume
279      *   support for any newer processor will mean a kernel that
280      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
281      *   this fallback.
282      */
283     if (kvmppc_is_pr(cs->kvm_state)) {
284         /* No flags */
285         info->flags = 0;
286         info->slb_size = 64;
287 
288         /* Standard 4k base page size segment */
289         info->sps[0].page_shift = 12;
290         info->sps[0].slb_enc = 0;
291         info->sps[0].enc[0].page_shift = 12;
292         info->sps[0].enc[0].pte_enc = 0;
293 
294         /* Standard 16M large page size segment */
295         info->sps[1].page_shift = 24;
296         info->sps[1].slb_enc = SLB_VSID_L;
297         info->sps[1].enc[0].page_shift = 24;
298         info->sps[1].enc[0].pte_enc = 0;
299     } else {
300         int i = 0;
301 
302         /* HV KVM has backing store size restrictions */
303         info->flags = KVM_PPC_PAGE_SIZES_REAL;
304 
305         if (env->mmu_model & POWERPC_MMU_1TSEG) {
306             info->flags |= KVM_PPC_1T_SEGMENTS;
307         }
308 
309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
310            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
311             info->slb_size = 32;
312         } else {
313             info->slb_size = 64;
314         }
315 
316         /* Standard 4k base page size segment */
317         info->sps[i].page_shift = 12;
318         info->sps[i].slb_enc = 0;
319         info->sps[i].enc[0].page_shift = 12;
320         info->sps[i].enc[0].pte_enc = 0;
321         i++;
322 
323         /* 64K on MMU 2.06 and later */
324         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
325             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
326             info->sps[i].page_shift = 16;
327             info->sps[i].slb_enc = 0x110;
328             info->sps[i].enc[0].page_shift = 16;
329             info->sps[i].enc[0].pte_enc = 1;
330             i++;
331         }
332 
333         /* Standard 16M large page size segment */
334         info->sps[i].page_shift = 24;
335         info->sps[i].slb_enc = SLB_VSID_L;
336         info->sps[i].enc[0].page_shift = 24;
337         info->sps[i].enc[0].pte_enc = 0;
338     }
339 }
340 
341 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
342 {
343     CPUState *cs = CPU(cpu);
344     int ret;
345 
346     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
347         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
348         if (ret == 0) {
349             return;
350         }
351     }
352 
353     kvm_get_fallback_smmu_info(cpu, info);
354 }
355 
356 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
357 {
358     KVMState *s = KVM_STATE(current_machine->accelerator);
359     struct ppc_radix_page_info *radix_page_info;
360     struct kvm_ppc_rmmu_info rmmu_info;
361     int i;
362 
363     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
364         return NULL;
365     }
366     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
367         return NULL;
368     }
369     radix_page_info = g_malloc0(sizeof(*radix_page_info));
370     radix_page_info->count = 0;
371     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
372         if (rmmu_info.ap_encodings[i]) {
373             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
374             radix_page_info->count++;
375         }
376     }
377     return radix_page_info;
378 }
379 
380 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
381                                      bool radix, bool gtse,
382                                      uint64_t proc_tbl)
383 {
384     CPUState *cs = CPU(cpu);
385     int ret;
386     uint64_t flags = 0;
387     struct kvm_ppc_mmuv3_cfg cfg = {
388         .process_table = proc_tbl,
389     };
390 
391     if (radix) {
392         flags |= KVM_PPC_MMUV3_RADIX;
393     }
394     if (gtse) {
395         flags |= KVM_PPC_MMUV3_GTSE;
396     }
397     cfg.flags = flags;
398     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
399     switch (ret) {
400     case 0:
401         return H_SUCCESS;
402     case -EINVAL:
403         return H_PARAMETER;
404     case -ENODEV:
405         return H_NOT_AVAILABLE;
406     default:
407         return H_HARDWARE;
408     }
409 }
410 
411 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
412 {
413     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
414         return true;
415     }
416 
417     return (1ul << shift) <= rampgsize;
418 }
419 
420 static long max_cpu_page_size;
421 
422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
423 {
424     static struct kvm_ppc_smmu_info smmu_info;
425     static bool has_smmu_info;
426     CPUPPCState *env = &cpu->env;
427     int iq, ik, jq, jk;
428 
429     /* We only handle page sizes for 64-bit server guests for now */
430     if (!(env->mmu_model & POWERPC_MMU_64)) {
431         return;
432     }
433 
434     /* Collect MMU info from kernel if not already */
435     if (!has_smmu_info) {
436         kvm_get_smmu_info(cpu, &smmu_info);
437         has_smmu_info = true;
438     }
439 
440     if (!max_cpu_page_size) {
441         max_cpu_page_size = qemu_getrampagesize();
442     }
443 
444     /* Convert to QEMU form */
445     memset(&env->sps, 0, sizeof(env->sps));
446 
447     /* If we have HV KVM, we need to forbid CI large pages if our
448      * host page size is smaller than 64K.
449      */
450     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
451         env->ci_large_pages = getpagesize() >= 0x10000;
452     }
453 
454     /*
455      * XXX This loop should be an entry wide AND of the capabilities that
456      *     the selected CPU has with the capabilities that KVM supports.
457      */
458     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
459         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
460         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
461 
462         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
463                                  ksps->page_shift)) {
464             continue;
465         }
466         qsps->page_shift = ksps->page_shift;
467         qsps->slb_enc = ksps->slb_enc;
468         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
469             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
470                                      ksps->enc[jk].page_shift)) {
471                 continue;
472             }
473             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
474             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
475             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
476                 break;
477             }
478         }
479         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
480             break;
481         }
482     }
483     env->slb_nr = smmu_info.slb_size;
484     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
485         env->mmu_model &= ~POWERPC_MMU_1TSEG;
486     }
487 }
488 
489 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
490 {
491     Object *mem_obj = object_resolve_path(obj_path, NULL);
492     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
493 
494     return pagesize >= max_cpu_page_size;
495 }
496 
497 #else /* defined (TARGET_PPC64) */
498 
499 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
500 {
501 }
502 
503 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
504 {
505     return true;
506 }
507 
508 #endif /* !defined (TARGET_PPC64) */
509 
510 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
511 {
512     return POWERPC_CPU(cpu)->vcpu_id;
513 }
514 
515 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
516  * book3s supports only 1 watchpoint, so array size
517  * of 4 is sufficient for now.
518  */
519 #define MAX_HW_BKPTS 4
520 
521 static struct HWBreakpoint {
522     target_ulong addr;
523     int type;
524 } hw_debug_points[MAX_HW_BKPTS];
525 
526 static CPUWatchpoint hw_watchpoint;
527 
528 /* Default there is no breakpoint and watchpoint supported */
529 static int max_hw_breakpoint;
530 static int max_hw_watchpoint;
531 static int nb_hw_breakpoint;
532 static int nb_hw_watchpoint;
533 
534 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
535 {
536     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
537         max_hw_breakpoint = 2;
538         max_hw_watchpoint = 2;
539     }
540 
541     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
542         fprintf(stderr, "Error initializing h/w breakpoints\n");
543         return;
544     }
545 }
546 
547 int kvm_arch_init_vcpu(CPUState *cs)
548 {
549     PowerPCCPU *cpu = POWERPC_CPU(cs);
550     CPUPPCState *cenv = &cpu->env;
551     int ret;
552 
553     /* Gather server mmu info from KVM and update the CPU state */
554     kvm_fixup_page_sizes(cpu);
555 
556     /* Synchronize sregs with kvm */
557     ret = kvm_arch_sync_sregs(cpu);
558     if (ret) {
559         if (ret == -EINVAL) {
560             error_report("Register sync failed... If you're using kvm-hv.ko,"
561                          " only \"-cpu host\" is possible");
562         }
563         return ret;
564     }
565 
566     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
567 
568     switch (cenv->mmu_model) {
569     case POWERPC_MMU_BOOKE206:
570         /* This target supports access to KVM's guest TLB */
571         ret = kvm_booke206_tlb_init(cpu);
572         break;
573     case POWERPC_MMU_2_07:
574         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
575             /* KVM-HV has transactional memory on POWER8 also without the
576              * KVM_CAP_PPC_HTM extension, so enable it here instead as
577              * long as it's availble to userspace on the host. */
578             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
579                 cap_htm = true;
580             }
581         }
582         break;
583     default:
584         break;
585     }
586 
587     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
588     kvmppc_hw_debug_points_init(cenv);
589 
590     return ret;
591 }
592 
593 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
594 {
595     CPUPPCState *env = &cpu->env;
596     CPUState *cs = CPU(cpu);
597     struct kvm_dirty_tlb dirty_tlb;
598     unsigned char *bitmap;
599     int ret;
600 
601     if (!env->kvm_sw_tlb) {
602         return;
603     }
604 
605     bitmap = g_malloc((env->nb_tlb + 7) / 8);
606     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
607 
608     dirty_tlb.bitmap = (uintptr_t)bitmap;
609     dirty_tlb.num_dirty = env->nb_tlb;
610 
611     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
612     if (ret) {
613         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
614                 __func__, strerror(-ret));
615     }
616 
617     g_free(bitmap);
618 }
619 
620 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
621 {
622     PowerPCCPU *cpu = POWERPC_CPU(cs);
623     CPUPPCState *env = &cpu->env;
624     union {
625         uint32_t u32;
626         uint64_t u64;
627     } val;
628     struct kvm_one_reg reg = {
629         .id = id,
630         .addr = (uintptr_t) &val,
631     };
632     int ret;
633 
634     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
635     if (ret != 0) {
636         trace_kvm_failed_spr_get(spr, strerror(errno));
637     } else {
638         switch (id & KVM_REG_SIZE_MASK) {
639         case KVM_REG_SIZE_U32:
640             env->spr[spr] = val.u32;
641             break;
642 
643         case KVM_REG_SIZE_U64:
644             env->spr[spr] = val.u64;
645             break;
646 
647         default:
648             /* Don't handle this size yet */
649             abort();
650         }
651     }
652 }
653 
654 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
655 {
656     PowerPCCPU *cpu = POWERPC_CPU(cs);
657     CPUPPCState *env = &cpu->env;
658     union {
659         uint32_t u32;
660         uint64_t u64;
661     } val;
662     struct kvm_one_reg reg = {
663         .id = id,
664         .addr = (uintptr_t) &val,
665     };
666     int ret;
667 
668     switch (id & KVM_REG_SIZE_MASK) {
669     case KVM_REG_SIZE_U32:
670         val.u32 = env->spr[spr];
671         break;
672 
673     case KVM_REG_SIZE_U64:
674         val.u64 = env->spr[spr];
675         break;
676 
677     default:
678         /* Don't handle this size yet */
679         abort();
680     }
681 
682     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
683     if (ret != 0) {
684         trace_kvm_failed_spr_set(spr, strerror(errno));
685     }
686 }
687 
688 static int kvm_put_fp(CPUState *cs)
689 {
690     PowerPCCPU *cpu = POWERPC_CPU(cs);
691     CPUPPCState *env = &cpu->env;
692     struct kvm_one_reg reg;
693     int i;
694     int ret;
695 
696     if (env->insns_flags & PPC_FLOAT) {
697         uint64_t fpscr = env->fpscr;
698         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
699 
700         reg.id = KVM_REG_PPC_FPSCR;
701         reg.addr = (uintptr_t)&fpscr;
702         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
703         if (ret < 0) {
704             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
705             return ret;
706         }
707 
708         for (i = 0; i < 32; i++) {
709             uint64_t vsr[2];
710 
711 #ifdef HOST_WORDS_BIGENDIAN
712             vsr[0] = float64_val(env->fpr[i]);
713             vsr[1] = env->vsr[i];
714 #else
715             vsr[0] = env->vsr[i];
716             vsr[1] = float64_val(env->fpr[i]);
717 #endif
718             reg.addr = (uintptr_t) &vsr;
719             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
720 
721             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
722             if (ret < 0) {
723                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
724                         i, strerror(errno));
725                 return ret;
726             }
727         }
728     }
729 
730     if (env->insns_flags & PPC_ALTIVEC) {
731         reg.id = KVM_REG_PPC_VSCR;
732         reg.addr = (uintptr_t)&env->vscr;
733         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
734         if (ret < 0) {
735             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
736             return ret;
737         }
738 
739         for (i = 0; i < 32; i++) {
740             reg.id = KVM_REG_PPC_VR(i);
741             reg.addr = (uintptr_t)&env->avr[i];
742             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
743             if (ret < 0) {
744                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
745                 return ret;
746             }
747         }
748     }
749 
750     return 0;
751 }
752 
753 static int kvm_get_fp(CPUState *cs)
754 {
755     PowerPCCPU *cpu = POWERPC_CPU(cs);
756     CPUPPCState *env = &cpu->env;
757     struct kvm_one_reg reg;
758     int i;
759     int ret;
760 
761     if (env->insns_flags & PPC_FLOAT) {
762         uint64_t fpscr;
763         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
764 
765         reg.id = KVM_REG_PPC_FPSCR;
766         reg.addr = (uintptr_t)&fpscr;
767         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768         if (ret < 0) {
769             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
770             return ret;
771         } else {
772             env->fpscr = fpscr;
773         }
774 
775         for (i = 0; i < 32; i++) {
776             uint64_t vsr[2];
777 
778             reg.addr = (uintptr_t) &vsr;
779             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
780 
781             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
782             if (ret < 0) {
783                 DPRINTF("Unable to get %s%d from KVM: %s\n",
784                         vsx ? "VSR" : "FPR", i, strerror(errno));
785                 return ret;
786             } else {
787 #ifdef HOST_WORDS_BIGENDIAN
788                 env->fpr[i] = vsr[0];
789                 if (vsx) {
790                     env->vsr[i] = vsr[1];
791                 }
792 #else
793                 env->fpr[i] = vsr[1];
794                 if (vsx) {
795                     env->vsr[i] = vsr[0];
796                 }
797 #endif
798             }
799         }
800     }
801 
802     if (env->insns_flags & PPC_ALTIVEC) {
803         reg.id = KVM_REG_PPC_VSCR;
804         reg.addr = (uintptr_t)&env->vscr;
805         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
806         if (ret < 0) {
807             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
808             return ret;
809         }
810 
811         for (i = 0; i < 32; i++) {
812             reg.id = KVM_REG_PPC_VR(i);
813             reg.addr = (uintptr_t)&env->avr[i];
814             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
815             if (ret < 0) {
816                 DPRINTF("Unable to get VR%d from KVM: %s\n",
817                         i, strerror(errno));
818                 return ret;
819             }
820         }
821     }
822 
823     return 0;
824 }
825 
826 #if defined(TARGET_PPC64)
827 static int kvm_get_vpa(CPUState *cs)
828 {
829     PowerPCCPU *cpu = POWERPC_CPU(cs);
830     CPUPPCState *env = &cpu->env;
831     struct kvm_one_reg reg;
832     int ret;
833 
834     reg.id = KVM_REG_PPC_VPA_ADDR;
835     reg.addr = (uintptr_t)&env->vpa_addr;
836     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
837     if (ret < 0) {
838         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
839         return ret;
840     }
841 
842     assert((uintptr_t)&env->slb_shadow_size
843            == ((uintptr_t)&env->slb_shadow_addr + 8));
844     reg.id = KVM_REG_PPC_VPA_SLB;
845     reg.addr = (uintptr_t)&env->slb_shadow_addr;
846     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
847     if (ret < 0) {
848         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
849                 strerror(errno));
850         return ret;
851     }
852 
853     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
854     reg.id = KVM_REG_PPC_VPA_DTL;
855     reg.addr = (uintptr_t)&env->dtl_addr;
856     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
857     if (ret < 0) {
858         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
859                 strerror(errno));
860         return ret;
861     }
862 
863     return 0;
864 }
865 
866 static int kvm_put_vpa(CPUState *cs)
867 {
868     PowerPCCPU *cpu = POWERPC_CPU(cs);
869     CPUPPCState *env = &cpu->env;
870     struct kvm_one_reg reg;
871     int ret;
872 
873     /* SLB shadow or DTL can't be registered unless a master VPA is
874      * registered.  That means when restoring state, if a VPA *is*
875      * registered, we need to set that up first.  If not, we need to
876      * deregister the others before deregistering the master VPA */
877     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
878 
879     if (env->vpa_addr) {
880         reg.id = KVM_REG_PPC_VPA_ADDR;
881         reg.addr = (uintptr_t)&env->vpa_addr;
882         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
883         if (ret < 0) {
884             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
885             return ret;
886         }
887     }
888 
889     assert((uintptr_t)&env->slb_shadow_size
890            == ((uintptr_t)&env->slb_shadow_addr + 8));
891     reg.id = KVM_REG_PPC_VPA_SLB;
892     reg.addr = (uintptr_t)&env->slb_shadow_addr;
893     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
894     if (ret < 0) {
895         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
896         return ret;
897     }
898 
899     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
900     reg.id = KVM_REG_PPC_VPA_DTL;
901     reg.addr = (uintptr_t)&env->dtl_addr;
902     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
903     if (ret < 0) {
904         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
905                 strerror(errno));
906         return ret;
907     }
908 
909     if (!env->vpa_addr) {
910         reg.id = KVM_REG_PPC_VPA_ADDR;
911         reg.addr = (uintptr_t)&env->vpa_addr;
912         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
913         if (ret < 0) {
914             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
915             return ret;
916         }
917     }
918 
919     return 0;
920 }
921 #endif /* TARGET_PPC64 */
922 
923 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
924 {
925     CPUPPCState *env = &cpu->env;
926     struct kvm_sregs sregs;
927     int i;
928 
929     sregs.pvr = env->spr[SPR_PVR];
930 
931     if (cpu->vhyp) {
932         PPCVirtualHypervisorClass *vhc =
933             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
934         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
935     } else {
936         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
937     }
938 
939     /* Sync SLB */
940 #ifdef TARGET_PPC64
941     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
942         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
943         if (env->slb[i].esid & SLB_ESID_V) {
944             sregs.u.s.ppc64.slb[i].slbe |= i;
945         }
946         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
947     }
948 #endif
949 
950     /* Sync SRs */
951     for (i = 0; i < 16; i++) {
952         sregs.u.s.ppc32.sr[i] = env->sr[i];
953     }
954 
955     /* Sync BATs */
956     for (i = 0; i < 8; i++) {
957         /* Beware. We have to swap upper and lower bits here */
958         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
959             | env->DBAT[1][i];
960         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
961             | env->IBAT[1][i];
962     }
963 
964     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
965 }
966 
967 int kvm_arch_put_registers(CPUState *cs, int level)
968 {
969     PowerPCCPU *cpu = POWERPC_CPU(cs);
970     CPUPPCState *env = &cpu->env;
971     struct kvm_regs regs;
972     int ret;
973     int i;
974 
975     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
976     if (ret < 0) {
977         return ret;
978     }
979 
980     regs.ctr = env->ctr;
981     regs.lr  = env->lr;
982     regs.xer = cpu_read_xer(env);
983     regs.msr = env->msr;
984     regs.pc = env->nip;
985 
986     regs.srr0 = env->spr[SPR_SRR0];
987     regs.srr1 = env->spr[SPR_SRR1];
988 
989     regs.sprg0 = env->spr[SPR_SPRG0];
990     regs.sprg1 = env->spr[SPR_SPRG1];
991     regs.sprg2 = env->spr[SPR_SPRG2];
992     regs.sprg3 = env->spr[SPR_SPRG3];
993     regs.sprg4 = env->spr[SPR_SPRG4];
994     regs.sprg5 = env->spr[SPR_SPRG5];
995     regs.sprg6 = env->spr[SPR_SPRG6];
996     regs.sprg7 = env->spr[SPR_SPRG7];
997 
998     regs.pid = env->spr[SPR_BOOKE_PID];
999 
1000     for (i = 0;i < 32; i++)
1001         regs.gpr[i] = env->gpr[i];
1002 
1003     regs.cr = 0;
1004     for (i = 0; i < 8; i++) {
1005         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1006     }
1007 
1008     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1009     if (ret < 0)
1010         return ret;
1011 
1012     kvm_put_fp(cs);
1013 
1014     if (env->tlb_dirty) {
1015         kvm_sw_tlb_put(cpu);
1016         env->tlb_dirty = false;
1017     }
1018 
1019     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1020         ret = kvmppc_put_books_sregs(cpu);
1021         if (ret < 0) {
1022             return ret;
1023         }
1024     }
1025 
1026     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1027         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1028     }
1029 
1030     if (cap_one_reg) {
1031         int i;
1032 
1033         /* We deliberately ignore errors here, for kernels which have
1034          * the ONE_REG calls, but don't support the specific
1035          * registers, there's a reasonable chance things will still
1036          * work, at least until we try to migrate. */
1037         for (i = 0; i < 1024; i++) {
1038             uint64_t id = env->spr_cb[i].one_reg_id;
1039 
1040             if (id != 0) {
1041                 kvm_put_one_spr(cs, id, i);
1042             }
1043         }
1044 
1045 #ifdef TARGET_PPC64
1046         if (msr_ts) {
1047             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1048                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1049             }
1050             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1051                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1052             }
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1063         }
1064 
1065         if (cap_papr) {
1066             if (kvm_put_vpa(cs) < 0) {
1067                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1068             }
1069         }
1070 
1071         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1072 #endif /* TARGET_PPC64 */
1073     }
1074 
1075     return ret;
1076 }
1077 
1078 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1079 {
1080      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1081 }
1082 
1083 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1084 {
1085     CPUPPCState *env = &cpu->env;
1086     struct kvm_sregs sregs;
1087     int ret;
1088 
1089     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1090     if (ret < 0) {
1091         return ret;
1092     }
1093 
1094     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1095         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1096         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1097         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1098         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1099         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1100         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1101         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1102         env->spr[SPR_DECR] = sregs.u.e.dec;
1103         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1104         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1105         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1106     }
1107 
1108     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1109         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1110         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1111         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1112         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1113         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1114     }
1115 
1116     if (sregs.u.e.features & KVM_SREGS_E_64) {
1117         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1121         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1122     }
1123 
1124     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1125         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1126         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1127         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1128         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1129         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1130         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1131         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1132         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1133         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1134         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1135         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1136         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1137         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1138         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1139         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1140         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1141         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1142         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1143         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1144         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1145         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1146         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1147         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1148         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1149         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1150         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1151         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1152         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1153         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1154         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1155         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1156         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1157 
1158         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1159             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1160             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1161             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1162             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1163             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1164             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1165         }
1166 
1167         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1168             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1169             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1170         }
1171 
1172         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1173             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1174             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1175             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1176             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1177         }
1178     }
1179 
1180     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1181         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1182         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1183         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1184         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1185         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1186         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1187         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1188         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1189         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1190         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1191     }
1192 
1193     if (sregs.u.e.features & KVM_SREGS_EXP) {
1194         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1195     }
1196 
1197     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1198         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1199         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1200     }
1201 
1202     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1203         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1204         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1205         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1206 
1207         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1208             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1209             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1210         }
1211     }
1212 
1213     return 0;
1214 }
1215 
1216 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1217 {
1218     CPUPPCState *env = &cpu->env;
1219     struct kvm_sregs sregs;
1220     int ret;
1221     int i;
1222 
1223     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1224     if (ret < 0) {
1225         return ret;
1226     }
1227 
1228     if (!cpu->vhyp) {
1229         ppc_store_sdr1(env, sregs.u.s.sdr1);
1230     }
1231 
1232     /* Sync SLB */
1233 #ifdef TARGET_PPC64
1234     /*
1235      * The packed SLB array we get from KVM_GET_SREGS only contains
1236      * information about valid entries. So we flush our internal copy
1237      * to get rid of stale ones, then put all valid SLB entries back
1238      * in.
1239      */
1240     memset(env->slb, 0, sizeof(env->slb));
1241     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1242         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1243         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1244         /*
1245          * Only restore valid entries
1246          */
1247         if (rb & SLB_ESID_V) {
1248             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1249         }
1250     }
1251 #endif
1252 
1253     /* Sync SRs */
1254     for (i = 0; i < 16; i++) {
1255         env->sr[i] = sregs.u.s.ppc32.sr[i];
1256     }
1257 
1258     /* Sync BATs */
1259     for (i = 0; i < 8; i++) {
1260         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1261         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1262         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1263         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1264     }
1265 
1266     return 0;
1267 }
1268 
1269 int kvm_arch_get_registers(CPUState *cs)
1270 {
1271     PowerPCCPU *cpu = POWERPC_CPU(cs);
1272     CPUPPCState *env = &cpu->env;
1273     struct kvm_regs regs;
1274     uint32_t cr;
1275     int i, ret;
1276 
1277     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1278     if (ret < 0)
1279         return ret;
1280 
1281     cr = regs.cr;
1282     for (i = 7; i >= 0; i--) {
1283         env->crf[i] = cr & 15;
1284         cr >>= 4;
1285     }
1286 
1287     env->ctr = regs.ctr;
1288     env->lr = regs.lr;
1289     cpu_write_xer(env, regs.xer);
1290     env->msr = regs.msr;
1291     env->nip = regs.pc;
1292 
1293     env->spr[SPR_SRR0] = regs.srr0;
1294     env->spr[SPR_SRR1] = regs.srr1;
1295 
1296     env->spr[SPR_SPRG0] = regs.sprg0;
1297     env->spr[SPR_SPRG1] = regs.sprg1;
1298     env->spr[SPR_SPRG2] = regs.sprg2;
1299     env->spr[SPR_SPRG3] = regs.sprg3;
1300     env->spr[SPR_SPRG4] = regs.sprg4;
1301     env->spr[SPR_SPRG5] = regs.sprg5;
1302     env->spr[SPR_SPRG6] = regs.sprg6;
1303     env->spr[SPR_SPRG7] = regs.sprg7;
1304 
1305     env->spr[SPR_BOOKE_PID] = regs.pid;
1306 
1307     for (i = 0;i < 32; i++)
1308         env->gpr[i] = regs.gpr[i];
1309 
1310     kvm_get_fp(cs);
1311 
1312     if (cap_booke_sregs) {
1313         ret = kvmppc_get_booke_sregs(cpu);
1314         if (ret < 0) {
1315             return ret;
1316         }
1317     }
1318 
1319     if (cap_segstate) {
1320         ret = kvmppc_get_books_sregs(cpu);
1321         if (ret < 0) {
1322             return ret;
1323         }
1324     }
1325 
1326     if (cap_hior) {
1327         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1328     }
1329 
1330     if (cap_one_reg) {
1331         int i;
1332 
1333         /* We deliberately ignore errors here, for kernels which have
1334          * the ONE_REG calls, but don't support the specific
1335          * registers, there's a reasonable chance things will still
1336          * work, at least until we try to migrate. */
1337         for (i = 0; i < 1024; i++) {
1338             uint64_t id = env->spr_cb[i].one_reg_id;
1339 
1340             if (id != 0) {
1341                 kvm_get_one_spr(cs, id, i);
1342             }
1343         }
1344 
1345 #ifdef TARGET_PPC64
1346         if (msr_ts) {
1347             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1348                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1349             }
1350             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1351                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1352             }
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1363         }
1364 
1365         if (cap_papr) {
1366             if (kvm_get_vpa(cs) < 0) {
1367                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1368             }
1369         }
1370 
1371         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1372 #endif
1373     }
1374 
1375     return 0;
1376 }
1377 
1378 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1379 {
1380     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1381 
1382     if (irq != PPC_INTERRUPT_EXT) {
1383         return 0;
1384     }
1385 
1386     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1387         return 0;
1388     }
1389 
1390     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1391 
1392     return 0;
1393 }
1394 
1395 #if defined(TARGET_PPCEMB)
1396 #define PPC_INPUT_INT PPC40x_INPUT_INT
1397 #elif defined(TARGET_PPC64)
1398 #define PPC_INPUT_INT PPC970_INPUT_INT
1399 #else
1400 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1401 #endif
1402 
1403 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1404 {
1405     PowerPCCPU *cpu = POWERPC_CPU(cs);
1406     CPUPPCState *env = &cpu->env;
1407     int r;
1408     unsigned irq;
1409 
1410     qemu_mutex_lock_iothread();
1411 
1412     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1413      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1414     if (!cap_interrupt_level &&
1415         run->ready_for_interrupt_injection &&
1416         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1417         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1418     {
1419         /* For now KVM disregards the 'irq' argument. However, in the
1420          * future KVM could cache it in-kernel to avoid a heavyweight exit
1421          * when reading the UIC.
1422          */
1423         irq = KVM_INTERRUPT_SET;
1424 
1425         DPRINTF("injected interrupt %d\n", irq);
1426         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1427         if (r < 0) {
1428             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1429         }
1430 
1431         /* Always wake up soon in case the interrupt was level based */
1432         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1433                        (NANOSECONDS_PER_SECOND / 50));
1434     }
1435 
1436     /* We don't know if there are more interrupts pending after this. However,
1437      * the guest will return to userspace in the course of handling this one
1438      * anyways, so we will get a chance to deliver the rest. */
1439 
1440     qemu_mutex_unlock_iothread();
1441 }
1442 
1443 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1444 {
1445     return MEMTXATTRS_UNSPECIFIED;
1446 }
1447 
1448 int kvm_arch_process_async_events(CPUState *cs)
1449 {
1450     return cs->halted;
1451 }
1452 
1453 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1454 {
1455     CPUState *cs = CPU(cpu);
1456     CPUPPCState *env = &cpu->env;
1457 
1458     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1459         cs->halted = 1;
1460         cs->exception_index = EXCP_HLT;
1461     }
1462 
1463     return 0;
1464 }
1465 
1466 /* map dcr access to existing qemu dcr emulation */
1467 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1468 {
1469     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1470         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1471 
1472     return 0;
1473 }
1474 
1475 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1476 {
1477     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1478         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1479 
1480     return 0;
1481 }
1482 
1483 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1484 {
1485     /* Mixed endian case is not handled */
1486     uint32_t sc = debug_inst_opcode;
1487 
1488     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1489                             sizeof(sc), 0) ||
1490         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1491         return -EINVAL;
1492     }
1493 
1494     return 0;
1495 }
1496 
1497 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1498 {
1499     uint32_t sc;
1500 
1501     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1502         sc != debug_inst_opcode ||
1503         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1504                             sizeof(sc), 1)) {
1505         return -EINVAL;
1506     }
1507 
1508     return 0;
1509 }
1510 
1511 static int find_hw_breakpoint(target_ulong addr, int type)
1512 {
1513     int n;
1514 
1515     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1516            <= ARRAY_SIZE(hw_debug_points));
1517 
1518     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1519         if (hw_debug_points[n].addr == addr &&
1520              hw_debug_points[n].type == type) {
1521             return n;
1522         }
1523     }
1524 
1525     return -1;
1526 }
1527 
1528 static int find_hw_watchpoint(target_ulong addr, int *flag)
1529 {
1530     int n;
1531 
1532     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1533     if (n >= 0) {
1534         *flag = BP_MEM_ACCESS;
1535         return n;
1536     }
1537 
1538     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1539     if (n >= 0) {
1540         *flag = BP_MEM_WRITE;
1541         return n;
1542     }
1543 
1544     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1545     if (n >= 0) {
1546         *flag = BP_MEM_READ;
1547         return n;
1548     }
1549 
1550     return -1;
1551 }
1552 
1553 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1554                                   target_ulong len, int type)
1555 {
1556     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1557         return -ENOBUFS;
1558     }
1559 
1560     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1561     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1562 
1563     switch (type) {
1564     case GDB_BREAKPOINT_HW:
1565         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1566             return -ENOBUFS;
1567         }
1568 
1569         if (find_hw_breakpoint(addr, type) >= 0) {
1570             return -EEXIST;
1571         }
1572 
1573         nb_hw_breakpoint++;
1574         break;
1575 
1576     case GDB_WATCHPOINT_WRITE:
1577     case GDB_WATCHPOINT_READ:
1578     case GDB_WATCHPOINT_ACCESS:
1579         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1580             return -ENOBUFS;
1581         }
1582 
1583         if (find_hw_breakpoint(addr, type) >= 0) {
1584             return -EEXIST;
1585         }
1586 
1587         nb_hw_watchpoint++;
1588         break;
1589 
1590     default:
1591         return -ENOSYS;
1592     }
1593 
1594     return 0;
1595 }
1596 
1597 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1598                                   target_ulong len, int type)
1599 {
1600     int n;
1601 
1602     n = find_hw_breakpoint(addr, type);
1603     if (n < 0) {
1604         return -ENOENT;
1605     }
1606 
1607     switch (type) {
1608     case GDB_BREAKPOINT_HW:
1609         nb_hw_breakpoint--;
1610         break;
1611 
1612     case GDB_WATCHPOINT_WRITE:
1613     case GDB_WATCHPOINT_READ:
1614     case GDB_WATCHPOINT_ACCESS:
1615         nb_hw_watchpoint--;
1616         break;
1617 
1618     default:
1619         return -ENOSYS;
1620     }
1621     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1622 
1623     return 0;
1624 }
1625 
1626 void kvm_arch_remove_all_hw_breakpoints(void)
1627 {
1628     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1629 }
1630 
1631 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1632 {
1633     int n;
1634 
1635     /* Software Breakpoint updates */
1636     if (kvm_sw_breakpoints_active(cs)) {
1637         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1638     }
1639 
1640     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1641            <= ARRAY_SIZE(hw_debug_points));
1642     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1643 
1644     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1645         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1646         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1647         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1648             switch (hw_debug_points[n].type) {
1649             case GDB_BREAKPOINT_HW:
1650                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1651                 break;
1652             case GDB_WATCHPOINT_WRITE:
1653                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1654                 break;
1655             case GDB_WATCHPOINT_READ:
1656                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1657                 break;
1658             case GDB_WATCHPOINT_ACCESS:
1659                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1660                                         KVMPPC_DEBUG_WATCH_READ;
1661                 break;
1662             default:
1663                 cpu_abort(cs, "Unsupported breakpoint type\n");
1664             }
1665             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1666         }
1667     }
1668 }
1669 
1670 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1671 {
1672     CPUState *cs = CPU(cpu);
1673     CPUPPCState *env = &cpu->env;
1674     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1675     int handle = 0;
1676     int n;
1677     int flag = 0;
1678 
1679     if (cs->singlestep_enabled) {
1680         handle = 1;
1681     } else if (arch_info->status) {
1682         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1683             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1684                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1685                 if (n >= 0) {
1686                     handle = 1;
1687                 }
1688             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1689                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1690                 n = find_hw_watchpoint(arch_info->address,  &flag);
1691                 if (n >= 0) {
1692                     handle = 1;
1693                     cs->watchpoint_hit = &hw_watchpoint;
1694                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1695                     hw_watchpoint.flags = flag;
1696                 }
1697             }
1698         }
1699     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1700         handle = 1;
1701     } else {
1702         /* QEMU is not able to handle debug exception, so inject
1703          * program exception to guest;
1704          * Yes program exception NOT debug exception !!
1705          * When QEMU is using debug resources then debug exception must
1706          * be always set. To achieve this we set MSR_DE and also set
1707          * MSRP_DEP so guest cannot change MSR_DE.
1708          * When emulating debug resource for guest we want guest
1709          * to control MSR_DE (enable/disable debug interrupt on need).
1710          * Supporting both configurations are NOT possible.
1711          * So the result is that we cannot share debug resources
1712          * between QEMU and Guest on BOOKE architecture.
1713          * In the current design QEMU gets the priority over guest,
1714          * this means that if QEMU is using debug resources then guest
1715          * cannot use them;
1716          * For software breakpoint QEMU uses a privileged instruction;
1717          * So there cannot be any reason that we are here for guest
1718          * set debug exception, only possibility is guest executed a
1719          * privileged / illegal instruction and that's why we are
1720          * injecting a program interrupt.
1721          */
1722 
1723         cpu_synchronize_state(cs);
1724         /* env->nip is PC, so increment this by 4 to use
1725          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1726          */
1727         env->nip += 4;
1728         cs->exception_index = POWERPC_EXCP_PROGRAM;
1729         env->error_code = POWERPC_EXCP_INVAL;
1730         ppc_cpu_do_interrupt(cs);
1731     }
1732 
1733     return handle;
1734 }
1735 
1736 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1737 {
1738     PowerPCCPU *cpu = POWERPC_CPU(cs);
1739     CPUPPCState *env = &cpu->env;
1740     int ret;
1741 
1742     qemu_mutex_lock_iothread();
1743 
1744     switch (run->exit_reason) {
1745     case KVM_EXIT_DCR:
1746         if (run->dcr.is_write) {
1747             DPRINTF("handle dcr write\n");
1748             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1749         } else {
1750             DPRINTF("handle dcr read\n");
1751             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1752         }
1753         break;
1754     case KVM_EXIT_HLT:
1755         DPRINTF("handle halt\n");
1756         ret = kvmppc_handle_halt(cpu);
1757         break;
1758 #if defined(TARGET_PPC64)
1759     case KVM_EXIT_PAPR_HCALL:
1760         DPRINTF("handle PAPR hypercall\n");
1761         run->papr_hcall.ret = spapr_hypercall(cpu,
1762                                               run->papr_hcall.nr,
1763                                               run->papr_hcall.args);
1764         ret = 0;
1765         break;
1766 #endif
1767     case KVM_EXIT_EPR:
1768         DPRINTF("handle epr\n");
1769         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1770         ret = 0;
1771         break;
1772     case KVM_EXIT_WATCHDOG:
1773         DPRINTF("handle watchdog expiry\n");
1774         watchdog_perform_action();
1775         ret = 0;
1776         break;
1777 
1778     case KVM_EXIT_DEBUG:
1779         DPRINTF("handle debug exception\n");
1780         if (kvm_handle_debug(cpu, run)) {
1781             ret = EXCP_DEBUG;
1782             break;
1783         }
1784         /* re-enter, this exception was guest-internal */
1785         ret = 0;
1786         break;
1787 
1788     default:
1789         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1790         ret = -1;
1791         break;
1792     }
1793 
1794     qemu_mutex_unlock_iothread();
1795     return ret;
1796 }
1797 
1798 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1799 {
1800     CPUState *cs = CPU(cpu);
1801     uint32_t bits = tsr_bits;
1802     struct kvm_one_reg reg = {
1803         .id = KVM_REG_PPC_OR_TSR,
1804         .addr = (uintptr_t) &bits,
1805     };
1806 
1807     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1808 }
1809 
1810 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1811 {
1812 
1813     CPUState *cs = CPU(cpu);
1814     uint32_t bits = tsr_bits;
1815     struct kvm_one_reg reg = {
1816         .id = KVM_REG_PPC_CLEAR_TSR,
1817         .addr = (uintptr_t) &bits,
1818     };
1819 
1820     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1821 }
1822 
1823 int kvmppc_set_tcr(PowerPCCPU *cpu)
1824 {
1825     CPUState *cs = CPU(cpu);
1826     CPUPPCState *env = &cpu->env;
1827     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1828 
1829     struct kvm_one_reg reg = {
1830         .id = KVM_REG_PPC_TCR,
1831         .addr = (uintptr_t) &tcr,
1832     };
1833 
1834     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1835 }
1836 
1837 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1838 {
1839     CPUState *cs = CPU(cpu);
1840     int ret;
1841 
1842     if (!kvm_enabled()) {
1843         return -1;
1844     }
1845 
1846     if (!cap_ppc_watchdog) {
1847         printf("warning: KVM does not support watchdog");
1848         return -1;
1849     }
1850 
1851     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1852     if (ret < 0) {
1853         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1854                 __func__, strerror(-ret));
1855         return ret;
1856     }
1857 
1858     return ret;
1859 }
1860 
1861 static int read_cpuinfo(const char *field, char *value, int len)
1862 {
1863     FILE *f;
1864     int ret = -1;
1865     int field_len = strlen(field);
1866     char line[512];
1867 
1868     f = fopen("/proc/cpuinfo", "r");
1869     if (!f) {
1870         return -1;
1871     }
1872 
1873     do {
1874         if (!fgets(line, sizeof(line), f)) {
1875             break;
1876         }
1877         if (!strncmp(line, field, field_len)) {
1878             pstrcpy(value, len, line);
1879             ret = 0;
1880             break;
1881         }
1882     } while(*line);
1883 
1884     fclose(f);
1885 
1886     return ret;
1887 }
1888 
1889 uint32_t kvmppc_get_tbfreq(void)
1890 {
1891     char line[512];
1892     char *ns;
1893     uint32_t retval = NANOSECONDS_PER_SECOND;
1894 
1895     if (read_cpuinfo("timebase", line, sizeof(line))) {
1896         return retval;
1897     }
1898 
1899     if (!(ns = strchr(line, ':'))) {
1900         return retval;
1901     }
1902 
1903     ns++;
1904 
1905     return atoi(ns);
1906 }
1907 
1908 bool kvmppc_get_host_serial(char **value)
1909 {
1910     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1911                                NULL);
1912 }
1913 
1914 bool kvmppc_get_host_model(char **value)
1915 {
1916     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1917 }
1918 
1919 /* Try to find a device tree node for a CPU with clock-frequency property */
1920 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1921 {
1922     struct dirent *dirp;
1923     DIR *dp;
1924 
1925     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1926         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1927         return -1;
1928     }
1929 
1930     buf[0] = '\0';
1931     while ((dirp = readdir(dp)) != NULL) {
1932         FILE *f;
1933         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1934                  dirp->d_name);
1935         f = fopen(buf, "r");
1936         if (f) {
1937             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1938             fclose(f);
1939             break;
1940         }
1941         buf[0] = '\0';
1942     }
1943     closedir(dp);
1944     if (buf[0] == '\0') {
1945         printf("Unknown host!\n");
1946         return -1;
1947     }
1948 
1949     return 0;
1950 }
1951 
1952 static uint64_t kvmppc_read_int_dt(const char *filename)
1953 {
1954     union {
1955         uint32_t v32;
1956         uint64_t v64;
1957     } u;
1958     FILE *f;
1959     int len;
1960 
1961     f = fopen(filename, "rb");
1962     if (!f) {
1963         return -1;
1964     }
1965 
1966     len = fread(&u, 1, sizeof(u), f);
1967     fclose(f);
1968     switch (len) {
1969     case 4:
1970         /* property is a 32-bit quantity */
1971         return be32_to_cpu(u.v32);
1972     case 8:
1973         return be64_to_cpu(u.v64);
1974     }
1975 
1976     return 0;
1977 }
1978 
1979 /* Read a CPU node property from the host device tree that's a single
1980  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1981  * (can't find or open the property, or doesn't understand the
1982  * format) */
1983 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1984 {
1985     char buf[PATH_MAX], *tmp;
1986     uint64_t val;
1987 
1988     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1989         return -1;
1990     }
1991 
1992     tmp = g_strdup_printf("%s/%s", buf, propname);
1993     val = kvmppc_read_int_dt(tmp);
1994     g_free(tmp);
1995 
1996     return val;
1997 }
1998 
1999 uint64_t kvmppc_get_clockfreq(void)
2000 {
2001     return kvmppc_read_int_cpu_dt("clock-frequency");
2002 }
2003 
2004 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2005  {
2006      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2007      CPUState *cs = CPU(cpu);
2008 
2009     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2010         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2011         return 0;
2012     }
2013 
2014     return 1;
2015 }
2016 
2017 int kvmppc_get_hasidle(CPUPPCState *env)
2018 {
2019     struct kvm_ppc_pvinfo pvinfo;
2020 
2021     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2022         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2023         return 1;
2024     }
2025 
2026     return 0;
2027 }
2028 
2029 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2030 {
2031     uint32_t *hc = (uint32_t*)buf;
2032     struct kvm_ppc_pvinfo pvinfo;
2033 
2034     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2035         memcpy(buf, pvinfo.hcall, buf_len);
2036         return 0;
2037     }
2038 
2039     /*
2040      * Fallback to always fail hypercalls regardless of endianness:
2041      *
2042      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2043      *     li r3, -1
2044      *     b .+8       (becomes nop in wrong endian)
2045      *     bswap32(li r3, -1)
2046      */
2047 
2048     hc[0] = cpu_to_be32(0x08000048);
2049     hc[1] = cpu_to_be32(0x3860ffff);
2050     hc[2] = cpu_to_be32(0x48000008);
2051     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2052 
2053     return 1;
2054 }
2055 
2056 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2057 {
2058     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2059 }
2060 
2061 void kvmppc_enable_logical_ci_hcalls(void)
2062 {
2063     /*
2064      * FIXME: it would be nice if we could detect the cases where
2065      * we're using a device which requires the in kernel
2066      * implementation of these hcalls, but the kernel lacks them and
2067      * produce a warning.
2068      */
2069     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2070     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2071 }
2072 
2073 void kvmppc_enable_set_mode_hcall(void)
2074 {
2075     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2076 }
2077 
2078 void kvmppc_enable_clear_ref_mod_hcalls(void)
2079 {
2080     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2081     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2082 }
2083 
2084 void kvmppc_set_papr(PowerPCCPU *cpu)
2085 {
2086     CPUState *cs = CPU(cpu);
2087     int ret;
2088 
2089     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2090     if (ret) {
2091         error_report("This vCPU type or KVM version does not support PAPR");
2092         exit(1);
2093     }
2094 
2095     /* Update the capability flag so we sync the right information
2096      * with kvm */
2097     cap_papr = 1;
2098 }
2099 
2100 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2101 {
2102     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2103 }
2104 
2105 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2106 {
2107     CPUState *cs = CPU(cpu);
2108     int ret;
2109 
2110     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2111     if (ret && mpic_proxy) {
2112         error_report("This KVM version does not support EPR");
2113         exit(1);
2114     }
2115 }
2116 
2117 int kvmppc_smt_threads(void)
2118 {
2119     return cap_ppc_smt ? cap_ppc_smt : 1;
2120 }
2121 
2122 int kvmppc_set_smt_threads(int smt)
2123 {
2124     int ret;
2125 
2126     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2127     if (!ret) {
2128         cap_ppc_smt = smt;
2129     }
2130     return ret;
2131 }
2132 
2133 void kvmppc_hint_smt_possible(Error **errp)
2134 {
2135     int i;
2136     GString *g;
2137     char *s;
2138 
2139     assert(kvm_enabled());
2140     if (cap_ppc_smt_possible) {
2141         g = g_string_new("Available VSMT modes:");
2142         for (i = 63; i >= 0; i--) {
2143             if ((1UL << i) & cap_ppc_smt_possible) {
2144                 g_string_append_printf(g, " %lu", (1UL << i));
2145             }
2146         }
2147         s = g_string_free(g, false);
2148         error_append_hint(errp, "%s.\n", s);
2149         g_free(s);
2150     } else {
2151         error_append_hint(errp,
2152                           "This KVM seems to be too old to support VSMT.\n");
2153     }
2154 }
2155 
2156 
2157 #ifdef TARGET_PPC64
2158 off_t kvmppc_alloc_rma(void **rma)
2159 {
2160     off_t size;
2161     int fd;
2162     struct kvm_allocate_rma ret;
2163 
2164     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2165      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2166      *                      not necessary on this hardware
2167      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2168      *
2169      * FIXME: We should allow the user to force contiguous RMA
2170      * allocation in the cap_ppc_rma==1 case.
2171      */
2172     if (cap_ppc_rma < 2) {
2173         return 0;
2174     }
2175 
2176     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2177     if (fd < 0) {
2178         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2179                 strerror(errno));
2180         return -1;
2181     }
2182 
2183     size = MIN(ret.rma_size, 256ul << 20);
2184 
2185     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2186     if (*rma == MAP_FAILED) {
2187         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2188         return -1;
2189     };
2190 
2191     return size;
2192 }
2193 
2194 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2195 {
2196     struct kvm_ppc_smmu_info info;
2197     long rampagesize, best_page_shift;
2198     int i;
2199 
2200     if (cap_ppc_rma >= 2) {
2201         return current_size;
2202     }
2203 
2204     /* Find the largest hardware supported page size that's less than
2205      * or equal to the (logical) backing page size of guest RAM */
2206     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2207     rampagesize = qemu_getrampagesize();
2208     best_page_shift = 0;
2209 
2210     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2211         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2212 
2213         if (!sps->page_shift) {
2214             continue;
2215         }
2216 
2217         if ((sps->page_shift > best_page_shift)
2218             && ((1UL << sps->page_shift) <= rampagesize)) {
2219             best_page_shift = sps->page_shift;
2220         }
2221     }
2222 
2223     return MIN(current_size,
2224                1ULL << (best_page_shift + hash_shift - 7));
2225 }
2226 #endif
2227 
2228 bool kvmppc_spapr_use_multitce(void)
2229 {
2230     return cap_spapr_multitce;
2231 }
2232 
2233 int kvmppc_spapr_enable_inkernel_multitce(void)
2234 {
2235     int ret;
2236 
2237     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2238                             H_PUT_TCE_INDIRECT, 1);
2239     if (!ret) {
2240         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2241                                 H_STUFF_TCE, 1);
2242     }
2243 
2244     return ret;
2245 }
2246 
2247 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2248                               uint64_t bus_offset, uint32_t nb_table,
2249                               int *pfd, bool need_vfio)
2250 {
2251     long len;
2252     int fd;
2253     void *table;
2254 
2255     /* Must set fd to -1 so we don't try to munmap when called for
2256      * destroying the table, which the upper layers -will- do
2257      */
2258     *pfd = -1;
2259     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2260         return NULL;
2261     }
2262 
2263     if (cap_spapr_tce_64) {
2264         struct kvm_create_spapr_tce_64 args = {
2265             .liobn = liobn,
2266             .page_shift = page_shift,
2267             .offset = bus_offset >> page_shift,
2268             .size = nb_table,
2269             .flags = 0
2270         };
2271         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2272         if (fd < 0) {
2273             fprintf(stderr,
2274                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2275                     liobn);
2276             return NULL;
2277         }
2278     } else if (cap_spapr_tce) {
2279         uint64_t window_size = (uint64_t) nb_table << page_shift;
2280         struct kvm_create_spapr_tce args = {
2281             .liobn = liobn,
2282             .window_size = window_size,
2283         };
2284         if ((window_size != args.window_size) || bus_offset) {
2285             return NULL;
2286         }
2287         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2288         if (fd < 0) {
2289             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2290                     liobn);
2291             return NULL;
2292         }
2293     } else {
2294         return NULL;
2295     }
2296 
2297     len = nb_table * sizeof(uint64_t);
2298     /* FIXME: round this up to page size */
2299 
2300     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2301     if (table == MAP_FAILED) {
2302         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2303                 liobn);
2304         close(fd);
2305         return NULL;
2306     }
2307 
2308     *pfd = fd;
2309     return table;
2310 }
2311 
2312 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2313 {
2314     long len;
2315 
2316     if (fd < 0) {
2317         return -1;
2318     }
2319 
2320     len = nb_table * sizeof(uint64_t);
2321     if ((munmap(table, len) < 0) ||
2322         (close(fd) < 0)) {
2323         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2324                 strerror(errno));
2325         /* Leak the table */
2326     }
2327 
2328     return 0;
2329 }
2330 
2331 int kvmppc_reset_htab(int shift_hint)
2332 {
2333     uint32_t shift = shift_hint;
2334 
2335     if (!kvm_enabled()) {
2336         /* Full emulation, tell caller to allocate htab itself */
2337         return 0;
2338     }
2339     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2340         int ret;
2341         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2342         if (ret == -ENOTTY) {
2343             /* At least some versions of PR KVM advertise the
2344              * capability, but don't implement the ioctl().  Oops.
2345              * Return 0 so that we allocate the htab in qemu, as is
2346              * correct for PR. */
2347             return 0;
2348         } else if (ret < 0) {
2349             return ret;
2350         }
2351         return shift;
2352     }
2353 
2354     /* We have a kernel that predates the htab reset calls.  For PR
2355      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2356      * this era, it has allocated a 16MB fixed size hash table already. */
2357     if (kvmppc_is_pr(kvm_state)) {
2358         /* PR - tell caller to allocate htab */
2359         return 0;
2360     } else {
2361         /* HV - assume 16MB kernel allocated htab */
2362         return 24;
2363     }
2364 }
2365 
2366 static inline uint32_t mfpvr(void)
2367 {
2368     uint32_t pvr;
2369 
2370     asm ("mfpvr %0"
2371          : "=r"(pvr));
2372     return pvr;
2373 }
2374 
2375 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2376 {
2377     if (on) {
2378         *word |= flags;
2379     } else {
2380         *word &= ~flags;
2381     }
2382 }
2383 
2384 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2385 {
2386     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2387     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2388     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2389 
2390     /* Now fix up the class with information we can query from the host */
2391     pcc->pvr = mfpvr();
2392 
2393     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2394                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2395     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2396                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2397     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2398                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2399 
2400     if (dcache_size != -1) {
2401         pcc->l1_dcache_size = dcache_size;
2402     }
2403 
2404     if (icache_size != -1) {
2405         pcc->l1_icache_size = icache_size;
2406     }
2407 
2408 #if defined(TARGET_PPC64)
2409     pcc->radix_page_info = kvm_get_radix_page_info();
2410 
2411     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2412         /*
2413          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2414          * compliant.  More importantly, advertising ISA 3.00
2415          * architected mode may prevent guests from activating
2416          * necessary DD1 workarounds.
2417          */
2418         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2419                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2420     }
2421 #endif /* defined(TARGET_PPC64) */
2422 }
2423 
2424 bool kvmppc_has_cap_epr(void)
2425 {
2426     return cap_epr;
2427 }
2428 
2429 bool kvmppc_has_cap_fixup_hcalls(void)
2430 {
2431     return cap_fixup_hcalls;
2432 }
2433 
2434 bool kvmppc_has_cap_htm(void)
2435 {
2436     return cap_htm;
2437 }
2438 
2439 bool kvmppc_has_cap_mmu_radix(void)
2440 {
2441     return cap_mmu_radix;
2442 }
2443 
2444 bool kvmppc_has_cap_mmu_hash_v3(void)
2445 {
2446     return cap_mmu_hash_v3;
2447 }
2448 
2449 static void kvmppc_get_cpu_characteristics(KVMState *s)
2450 {
2451     struct kvm_ppc_cpu_char c;
2452     int ret;
2453 
2454     /* Assume broken */
2455     cap_ppc_safe_cache = 0;
2456     cap_ppc_safe_bounds_check = 0;
2457     cap_ppc_safe_indirect_branch = 0;
2458 
2459     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2460     if (!ret) {
2461         return;
2462     }
2463     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2464     if (ret < 0) {
2465         return;
2466     }
2467     /* Parse and set cap_ppc_safe_cache */
2468     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2469         cap_ppc_safe_cache = 2;
2470     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2471                (c.character & c.character_mask
2472                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2473         cap_ppc_safe_cache = 1;
2474     }
2475     /* Parse and set cap_ppc_safe_bounds_check */
2476     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2477         cap_ppc_safe_bounds_check = 2;
2478     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2479         cap_ppc_safe_bounds_check = 1;
2480     }
2481     /* Parse and set cap_ppc_safe_indirect_branch */
2482     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2483         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2484     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2485         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2486     }
2487 }
2488 
2489 int kvmppc_get_cap_safe_cache(void)
2490 {
2491     return cap_ppc_safe_cache;
2492 }
2493 
2494 int kvmppc_get_cap_safe_bounds_check(void)
2495 {
2496     return cap_ppc_safe_bounds_check;
2497 }
2498 
2499 int kvmppc_get_cap_safe_indirect_branch(void)
2500 {
2501     return cap_ppc_safe_indirect_branch;
2502 }
2503 
2504 bool kvmppc_has_cap_spapr_vfio(void)
2505 {
2506     return cap_spapr_vfio;
2507 }
2508 
2509 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2510 {
2511     uint32_t host_pvr = mfpvr();
2512     PowerPCCPUClass *pvr_pcc;
2513 
2514     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2515     if (pvr_pcc == NULL) {
2516         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2517     }
2518 
2519     return pvr_pcc;
2520 }
2521 
2522 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2523 {
2524     TypeInfo type_info = {
2525         .name = TYPE_HOST_POWERPC_CPU,
2526         .class_init = kvmppc_host_cpu_class_init,
2527     };
2528     MachineClass *mc = MACHINE_GET_CLASS(ms);
2529     PowerPCCPUClass *pvr_pcc;
2530     ObjectClass *oc;
2531     DeviceClass *dc;
2532     int i;
2533 
2534     pvr_pcc = kvm_ppc_get_host_cpu_class();
2535     if (pvr_pcc == NULL) {
2536         return -1;
2537     }
2538     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2539     type_register(&type_info);
2540     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2541         /* override TCG default cpu type with 'host' cpu model */
2542         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2543     }
2544 
2545     oc = object_class_by_name(type_info.name);
2546     g_assert(oc);
2547 
2548     /*
2549      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2550      * we want "POWER8" to be a "family" alias that points to the current
2551      * host CPU type, too)
2552      */
2553     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2554     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2555         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2556             char *suffix;
2557 
2558             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2559             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2560             if (suffix) {
2561                 *suffix = 0;
2562             }
2563             break;
2564         }
2565     }
2566 
2567     return 0;
2568 }
2569 
2570 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2571 {
2572     struct kvm_rtas_token_args args = {
2573         .token = token,
2574     };
2575 
2576     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2577         return -ENOENT;
2578     }
2579 
2580     strncpy(args.name, function, sizeof(args.name));
2581 
2582     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2583 }
2584 
2585 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2586 {
2587     struct kvm_get_htab_fd s = {
2588         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2589         .start_index = index,
2590     };
2591     int ret;
2592 
2593     if (!cap_htab_fd) {
2594         error_setg(errp, "KVM version doesn't support %s the HPT",
2595                    write ? "writing" : "reading");
2596         return -ENOTSUP;
2597     }
2598 
2599     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2600     if (ret < 0) {
2601         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2602                    write ? "writing" : "reading", write ? "to" : "from",
2603                    strerror(errno));
2604         return -errno;
2605     }
2606 
2607     return ret;
2608 }
2609 
2610 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2611 {
2612     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2613     uint8_t buf[bufsize];
2614     ssize_t rc;
2615 
2616     do {
2617         rc = read(fd, buf, bufsize);
2618         if (rc < 0) {
2619             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2620                     strerror(errno));
2621             return rc;
2622         } else if (rc) {
2623             uint8_t *buffer = buf;
2624             ssize_t n = rc;
2625             while (n) {
2626                 struct kvm_get_htab_header *head =
2627                     (struct kvm_get_htab_header *) buffer;
2628                 size_t chunksize = sizeof(*head) +
2629                      HASH_PTE_SIZE_64 * head->n_valid;
2630 
2631                 qemu_put_be32(f, head->index);
2632                 qemu_put_be16(f, head->n_valid);
2633                 qemu_put_be16(f, head->n_invalid);
2634                 qemu_put_buffer(f, (void *)(head + 1),
2635                                 HASH_PTE_SIZE_64 * head->n_valid);
2636 
2637                 buffer += chunksize;
2638                 n -= chunksize;
2639             }
2640         }
2641     } while ((rc != 0)
2642              && ((max_ns < 0)
2643                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2644 
2645     return (rc == 0) ? 1 : 0;
2646 }
2647 
2648 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2649                            uint16_t n_valid, uint16_t n_invalid)
2650 {
2651     struct kvm_get_htab_header *buf;
2652     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2653     ssize_t rc;
2654 
2655     buf = alloca(chunksize);
2656     buf->index = index;
2657     buf->n_valid = n_valid;
2658     buf->n_invalid = n_invalid;
2659 
2660     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2661 
2662     rc = write(fd, buf, chunksize);
2663     if (rc < 0) {
2664         fprintf(stderr, "Error writing KVM hash table: %s\n",
2665                 strerror(errno));
2666         return rc;
2667     }
2668     if (rc != chunksize) {
2669         /* We should never get a short write on a single chunk */
2670         fprintf(stderr, "Short write, restoring KVM hash table\n");
2671         return -1;
2672     }
2673     return 0;
2674 }
2675 
2676 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2677 {
2678     return true;
2679 }
2680 
2681 void kvm_arch_init_irq_routing(KVMState *s)
2682 {
2683 }
2684 
2685 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2686 {
2687     int fd, rc;
2688     int i;
2689 
2690     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2691 
2692     i = 0;
2693     while (i < n) {
2694         struct kvm_get_htab_header *hdr;
2695         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2696         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2697 
2698         rc = read(fd, buf, sizeof(buf));
2699         if (rc < 0) {
2700             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2701         }
2702 
2703         hdr = (struct kvm_get_htab_header *)buf;
2704         while ((i < n) && ((char *)hdr < (buf + rc))) {
2705             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2706 
2707             if (hdr->index != (ptex + i)) {
2708                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2709                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2710             }
2711 
2712             if (n - i < valid) {
2713                 valid = n - i;
2714             }
2715             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2716             i += valid;
2717 
2718             if ((n - i) < invalid) {
2719                 invalid = n - i;
2720             }
2721             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2722             i += invalid;
2723 
2724             hdr = (struct kvm_get_htab_header *)
2725                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2726         }
2727     }
2728 
2729     close(fd);
2730 }
2731 
2732 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2733 {
2734     int fd, rc;
2735     struct {
2736         struct kvm_get_htab_header hdr;
2737         uint64_t pte0;
2738         uint64_t pte1;
2739     } buf;
2740 
2741     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2742 
2743     buf.hdr.n_valid = 1;
2744     buf.hdr.n_invalid = 0;
2745     buf.hdr.index = ptex;
2746     buf.pte0 = cpu_to_be64(pte0);
2747     buf.pte1 = cpu_to_be64(pte1);
2748 
2749     rc = write(fd, &buf, sizeof(buf));
2750     if (rc != sizeof(buf)) {
2751         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2752     }
2753     close(fd);
2754 }
2755 
2756 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2757                              uint64_t address, uint32_t data, PCIDevice *dev)
2758 {
2759     return 0;
2760 }
2761 
2762 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2763                                 int vector, PCIDevice *dev)
2764 {
2765     return 0;
2766 }
2767 
2768 int kvm_arch_release_virq_post(int virq)
2769 {
2770     return 0;
2771 }
2772 
2773 int kvm_arch_msi_data_to_gsi(uint32_t data)
2774 {
2775     return data & 0xffff;
2776 }
2777 
2778 int kvmppc_enable_hwrng(void)
2779 {
2780     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2781         return -1;
2782     }
2783 
2784     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2785 }
2786 
2787 void kvmppc_check_papr_resize_hpt(Error **errp)
2788 {
2789     if (!kvm_enabled()) {
2790         return; /* No KVM, we're good */
2791     }
2792 
2793     if (cap_resize_hpt) {
2794         return; /* Kernel has explicit support, we're good */
2795     }
2796 
2797     /* Otherwise fallback on looking for PR KVM */
2798     if (kvmppc_is_pr(kvm_state)) {
2799         return;
2800     }
2801 
2802     error_setg(errp,
2803                "Hash page table resizing not available with this KVM version");
2804 }
2805 
2806 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2807 {
2808     CPUState *cs = CPU(cpu);
2809     struct kvm_ppc_resize_hpt rhpt = {
2810         .flags = flags,
2811         .shift = shift,
2812     };
2813 
2814     if (!cap_resize_hpt) {
2815         return -ENOSYS;
2816     }
2817 
2818     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2819 }
2820 
2821 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2822 {
2823     CPUState *cs = CPU(cpu);
2824     struct kvm_ppc_resize_hpt rhpt = {
2825         .flags = flags,
2826         .shift = shift,
2827     };
2828 
2829     if (!cap_resize_hpt) {
2830         return -ENOSYS;
2831     }
2832 
2833     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2834 }
2835 
2836 /*
2837  * This is a helper function to detect a post migration scenario
2838  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2839  * the guest kernel can't handle a PVR value other than the actual host
2840  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2841  *
2842  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2843  * (so, we're HV), return true. The workaround itself is done in
2844  * cpu_post_load.
2845  *
2846  * The order here is important: we'll only check for KVM PR as a
2847  * fallback if the guest kernel can't handle the situation itself.
2848  * We need to avoid as much as possible querying the running KVM type
2849  * in QEMU level.
2850  */
2851 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2852 {
2853     CPUState *cs = CPU(cpu);
2854 
2855     if (!kvm_enabled()) {
2856         return false;
2857     }
2858 
2859     if (cap_ppc_pvr_compat) {
2860         return false;
2861     }
2862 
2863     return !kvmppc_is_pr(cs->kvm_state);
2864 }
2865