xref: /qemu/target/i386/kvm/xen-emu.c (revision c345104cd1d17e1e801b99a216fa3654cdcbce35)
161491cf4SDavid Woodhouse /*
261491cf4SDavid Woodhouse  * Xen HVM emulation support in KVM
361491cf4SDavid Woodhouse  *
461491cf4SDavid Woodhouse  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
561491cf4SDavid Woodhouse  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
661491cf4SDavid Woodhouse  *
761491cf4SDavid Woodhouse  * This work is licensed under the terms of the GNU GPL, version 2 or later.
861491cf4SDavid Woodhouse  * See the COPYING file in the top-level directory.
961491cf4SDavid Woodhouse  *
1061491cf4SDavid Woodhouse  */
1161491cf4SDavid Woodhouse 
1261491cf4SDavid Woodhouse #include "qemu/osdep.h"
1355a3f666SJoao Martins #include "qemu/log.h"
1479b7067dSJoao Martins #include "qemu/main-loop.h"
15fb0fd2ceSJoao Martins #include "hw/xen/xen.h"
1661491cf4SDavid Woodhouse #include "sysemu/kvm_int.h"
1761491cf4SDavid Woodhouse #include "sysemu/kvm_xen.h"
1861491cf4SDavid Woodhouse #include "kvm/kvm_i386.h"
19bedcc139SJoao Martins #include "exec/address-spaces.h"
2061491cf4SDavid Woodhouse #include "xen-emu.h"
2155a3f666SJoao Martins #include "trace.h"
2279b7067dSJoao Martins #include "sysemu/runstate.h"
2361491cf4SDavid Woodhouse 
24110a0ea5SDavid Woodhouse #include "hw/i386/kvm/xen_overlay.h"
25110a0ea5SDavid Woodhouse 
26bedcc139SJoao Martins #include "hw/xen/interface/version.h"
2779b7067dSJoao Martins #include "hw/xen/interface/sched.h"
28fb0fd2ceSJoao Martins #include "hw/xen/interface/memory.h"
29671bfdcdSJoao Martins #include "hw/xen/interface/hvm/hvm_op.h"
30d70bd6a4SJoao Martins #include "hw/xen/interface/vcpu.h"
31fb0fd2ceSJoao Martins 
32fb0fd2ceSJoao Martins #include "xen-compat.h"
33fb0fd2ceSJoao Martins 
34fb0fd2ceSJoao Martins #ifdef TARGET_X86_64
35fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (!(longmode))
36fb0fd2ceSJoao Martins #else
37fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (false)
38fb0fd2ceSJoao Martins #endif
39bedcc139SJoao Martins 
40bedcc139SJoao Martins static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
41bedcc139SJoao Martins                       bool is_write)
42bedcc139SJoao Martins {
43bedcc139SJoao Martins     uint8_t *buf = (uint8_t *)_buf;
44bedcc139SJoao Martins     int ret;
45bedcc139SJoao Martins 
46bedcc139SJoao Martins     while (sz) {
47bedcc139SJoao Martins         struct kvm_translation tr = {
48bedcc139SJoao Martins             .linear_address = gva,
49bedcc139SJoao Martins         };
50bedcc139SJoao Martins 
51bedcc139SJoao Martins         size_t len = TARGET_PAGE_SIZE - (tr.linear_address & ~TARGET_PAGE_MASK);
52bedcc139SJoao Martins         if (len > sz) {
53bedcc139SJoao Martins             len = sz;
54bedcc139SJoao Martins         }
55bedcc139SJoao Martins 
56bedcc139SJoao Martins         ret = kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr);
57bedcc139SJoao Martins         if (ret || !tr.valid || (is_write && !tr.writeable)) {
58bedcc139SJoao Martins             return -EFAULT;
59bedcc139SJoao Martins         }
60bedcc139SJoao Martins 
61bedcc139SJoao Martins         cpu_physical_memory_rw(tr.physical_address, buf, len, is_write);
62bedcc139SJoao Martins 
63bedcc139SJoao Martins         buf += len;
64bedcc139SJoao Martins         sz -= len;
65bedcc139SJoao Martins         gva += len;
66bedcc139SJoao Martins     }
67bedcc139SJoao Martins 
68bedcc139SJoao Martins     return 0;
69bedcc139SJoao Martins }
70bedcc139SJoao Martins 
71bedcc139SJoao Martins static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
72bedcc139SJoao Martins                                     size_t sz)
73bedcc139SJoao Martins {
74bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, false);
75bedcc139SJoao Martins }
76bedcc139SJoao Martins 
77bedcc139SJoao Martins static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
78bedcc139SJoao Martins                                   size_t sz)
79bedcc139SJoao Martins {
80bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, true);
81bedcc139SJoao Martins }
82bedcc139SJoao Martins 
83f66b8a83SJoao Martins int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
8461491cf4SDavid Woodhouse {
8561491cf4SDavid Woodhouse     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
8661491cf4SDavid Woodhouse         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
8761491cf4SDavid Woodhouse     struct kvm_xen_hvm_config cfg = {
88f66b8a83SJoao Martins         .msr = hypercall_msr,
8961491cf4SDavid Woodhouse         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
9061491cf4SDavid Woodhouse     };
9161491cf4SDavid Woodhouse     int xen_caps, ret;
9261491cf4SDavid Woodhouse 
9361491cf4SDavid Woodhouse     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
9461491cf4SDavid Woodhouse     if (required_caps & ~xen_caps) {
9561491cf4SDavid Woodhouse         error_report("kvm: Xen HVM guest support not present or insufficient");
9661491cf4SDavid Woodhouse         return -ENOSYS;
9761491cf4SDavid Woodhouse     }
9861491cf4SDavid Woodhouse 
9961491cf4SDavid Woodhouse     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
10061491cf4SDavid Woodhouse         struct kvm_xen_hvm_attr ha = {
10161491cf4SDavid Woodhouse             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
10261491cf4SDavid Woodhouse             .u.xen_version = s->xen_version,
10361491cf4SDavid Woodhouse         };
10461491cf4SDavid Woodhouse         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
10561491cf4SDavid Woodhouse 
10661491cf4SDavid Woodhouse         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
10761491cf4SDavid Woodhouse     }
10861491cf4SDavid Woodhouse 
10961491cf4SDavid Woodhouse     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
11061491cf4SDavid Woodhouse     if (ret < 0) {
11161491cf4SDavid Woodhouse         error_report("kvm: Failed to enable Xen HVM support: %s",
11261491cf4SDavid Woodhouse                      strerror(-ret));
11361491cf4SDavid Woodhouse         return ret;
11461491cf4SDavid Woodhouse     }
11561491cf4SDavid Woodhouse 
11661491cf4SDavid Woodhouse     s->xen_caps = xen_caps;
11761491cf4SDavid Woodhouse     return 0;
11861491cf4SDavid Woodhouse }
11961491cf4SDavid Woodhouse 
1205e691a95SDavid Woodhouse int kvm_xen_init_vcpu(CPUState *cs)
1215e691a95SDavid Woodhouse {
122*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
123*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1245e691a95SDavid Woodhouse     int err;
1255e691a95SDavid Woodhouse 
1265e691a95SDavid Woodhouse     /*
1275e691a95SDavid Woodhouse      * The kernel needs to know the Xen/ACPI vCPU ID because that's
1285e691a95SDavid Woodhouse      * what the guest uses in hypercalls such as timers. It doesn't
1295e691a95SDavid Woodhouse      * match the APIC ID which is generally used for talking to the
1305e691a95SDavid Woodhouse      * kernel about vCPUs. And if vCPU threads race with creating
1315e691a95SDavid Woodhouse      * their KVM vCPUs out of order, it doesn't necessarily match
1325e691a95SDavid Woodhouse      * with the kernel's internal vCPU indices either.
1335e691a95SDavid Woodhouse      */
1345e691a95SDavid Woodhouse     if (kvm_xen_has_cap(EVTCHN_SEND)) {
1355e691a95SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1365e691a95SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
1375e691a95SDavid Woodhouse             .u.vcpu_id = cs->cpu_index,
1385e691a95SDavid Woodhouse         };
1395e691a95SDavid Woodhouse         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
1405e691a95SDavid Woodhouse         if (err) {
1415e691a95SDavid Woodhouse             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
1425e691a95SDavid Woodhouse                          strerror(-err));
1435e691a95SDavid Woodhouse             return err;
1445e691a95SDavid Woodhouse         }
1455e691a95SDavid Woodhouse     }
1465e691a95SDavid Woodhouse 
147*c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
148*c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
149*c345104cSJoao Martins 
1505e691a95SDavid Woodhouse     return 0;
1515e691a95SDavid Woodhouse }
1525e691a95SDavid Woodhouse 
15361491cf4SDavid Woodhouse uint32_t kvm_xen_get_caps(void)
15461491cf4SDavid Woodhouse {
15561491cf4SDavid Woodhouse     return kvm_state->xen_caps;
15661491cf4SDavid Woodhouse }
15755a3f666SJoao Martins 
158bedcc139SJoao Martins static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
159bedcc139SJoao Martins                                      int cmd, uint64_t arg)
160bedcc139SJoao Martins {
161bedcc139SJoao Martins     int err = 0;
162bedcc139SJoao Martins 
163bedcc139SJoao Martins     switch (cmd) {
164bedcc139SJoao Martins     case XENVER_get_features: {
165bedcc139SJoao Martins         struct xen_feature_info fi;
166bedcc139SJoao Martins 
167bedcc139SJoao Martins         /* No need for 32/64 compat handling */
168bedcc139SJoao Martins         qemu_build_assert(sizeof(fi) == 8);
169bedcc139SJoao Martins 
170bedcc139SJoao Martins         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
171bedcc139SJoao Martins         if (err) {
172bedcc139SJoao Martins             break;
173bedcc139SJoao Martins         }
174bedcc139SJoao Martins 
175bedcc139SJoao Martins         fi.submap = 0;
176bedcc139SJoao Martins         if (fi.submap_idx == 0) {
177bedcc139SJoao Martins             fi.submap |= 1 << XENFEAT_writable_page_tables |
178bedcc139SJoao Martins                          1 << XENFEAT_writable_descriptor_tables |
179bedcc139SJoao Martins                          1 << XENFEAT_auto_translated_physmap |
180bedcc139SJoao Martins                          1 << XENFEAT_supervisor_mode_kernel;
181bedcc139SJoao Martins         }
182bedcc139SJoao Martins 
183bedcc139SJoao Martins         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
184bedcc139SJoao Martins         break;
185bedcc139SJoao Martins     }
186bedcc139SJoao Martins 
187bedcc139SJoao Martins     default:
188bedcc139SJoao Martins         return false;
189bedcc139SJoao Martins     }
190bedcc139SJoao Martins 
191bedcc139SJoao Martins     exit->u.hcall.result = err;
192bedcc139SJoao Martins     return true;
193bedcc139SJoao Martins }
194bedcc139SJoao Martins 
195*c345104cSJoao Martins static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
196*c345104cSJoao Martins {
197*c345104cSJoao Martins     struct kvm_xen_vcpu_attr xhsi;
198*c345104cSJoao Martins 
199*c345104cSJoao Martins     xhsi.type = type;
200*c345104cSJoao Martins     xhsi.u.gpa = gpa;
201*c345104cSJoao Martins 
202*c345104cSJoao Martins     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
203*c345104cSJoao Martins 
204*c345104cSJoao Martins     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
205*c345104cSJoao Martins }
206*c345104cSJoao Martins 
207*c345104cSJoao Martins static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
208*c345104cSJoao Martins {
209*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
210*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
211*c345104cSJoao Martins 
212*c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = data.host_ulong;
213*c345104cSJoao Martins 
214*c345104cSJoao Martins     /* Changing the default does nothing if a vcpu_info was explicitly set. */
215*c345104cSJoao Martins     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
216*c345104cSJoao Martins         kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
217*c345104cSJoao Martins                               env->xen_vcpu_info_default_gpa);
218*c345104cSJoao Martins     }
219*c345104cSJoao Martins }
220*c345104cSJoao Martins 
221*c345104cSJoao Martins static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
222*c345104cSJoao Martins {
223*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
224*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
225*c345104cSJoao Martins 
226*c345104cSJoao Martins     env->xen_vcpu_info_gpa = data.host_ulong;
227*c345104cSJoao Martins 
228*c345104cSJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
229*c345104cSJoao Martins                           env->xen_vcpu_info_gpa);
230*c345104cSJoao Martins }
231*c345104cSJoao Martins 
232*c345104cSJoao Martins static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
233*c345104cSJoao Martins {
234*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
235*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
236*c345104cSJoao Martins 
237*c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
238*c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
239*c345104cSJoao Martins 
240*c345104cSJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, INVALID_GPA);
241*c345104cSJoao Martins }
242*c345104cSJoao Martins 
243fb0fd2ceSJoao Martins static int xen_set_shared_info(uint64_t gfn)
244fb0fd2ceSJoao Martins {
245fb0fd2ceSJoao Martins     uint64_t gpa = gfn << TARGET_PAGE_BITS;
246*c345104cSJoao Martins     int i, err;
247fb0fd2ceSJoao Martins 
248fb0fd2ceSJoao Martins     QEMU_IOTHREAD_LOCK_GUARD();
249fb0fd2ceSJoao Martins 
250fb0fd2ceSJoao Martins     /*
251fb0fd2ceSJoao Martins      * The xen_overlay device tells KVM about it too, since it had to
252fb0fd2ceSJoao Martins      * do that on migration load anyway (unless we're going to jump
253fb0fd2ceSJoao Martins      * through lots of hoops to maintain the fiction that this isn't
254fb0fd2ceSJoao Martins      * KVM-specific.
255fb0fd2ceSJoao Martins      */
256fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(gpa);
257fb0fd2ceSJoao Martins     if (err) {
258fb0fd2ceSJoao Martins             return err;
259fb0fd2ceSJoao Martins     }
260fb0fd2ceSJoao Martins 
261fb0fd2ceSJoao Martins     trace_kvm_xen_set_shared_info(gfn);
262fb0fd2ceSJoao Martins 
263*c345104cSJoao Martins     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
264*c345104cSJoao Martins         CPUState *cpu = qemu_get_cpu(i);
265*c345104cSJoao Martins         if (cpu) {
266*c345104cSJoao Martins             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
267*c345104cSJoao Martins                              RUN_ON_CPU_HOST_ULONG(gpa));
268*c345104cSJoao Martins         }
269*c345104cSJoao Martins         gpa += sizeof(vcpu_info_t);
270*c345104cSJoao Martins     }
271*c345104cSJoao Martins 
272fb0fd2ceSJoao Martins     return err;
273fb0fd2ceSJoao Martins }
274fb0fd2ceSJoao Martins 
275fb0fd2ceSJoao Martins static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
276fb0fd2ceSJoao Martins {
277fb0fd2ceSJoao Martins     switch (space) {
278fb0fd2ceSJoao Martins     case XENMAPSPACE_shared_info:
279fb0fd2ceSJoao Martins         if (idx > 0) {
280fb0fd2ceSJoao Martins             return -EINVAL;
281fb0fd2ceSJoao Martins         }
282fb0fd2ceSJoao Martins         return xen_set_shared_info(gfn);
283fb0fd2ceSJoao Martins 
284fb0fd2ceSJoao Martins     case XENMAPSPACE_grant_table:
285fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn:
286fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_range:
287fb0fd2ceSJoao Martins         return -ENOTSUP;
288fb0fd2ceSJoao Martins 
289fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_foreign:
290fb0fd2ceSJoao Martins     case XENMAPSPACE_dev_mmio:
291fb0fd2ceSJoao Martins         return -EPERM;
292fb0fd2ceSJoao Martins 
293fb0fd2ceSJoao Martins     default:
294fb0fd2ceSJoao Martins         return -EINVAL;
295fb0fd2ceSJoao Martins     }
296fb0fd2ceSJoao Martins }
297fb0fd2ceSJoao Martins 
298fb0fd2ceSJoao Martins static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
299fb0fd2ceSJoao Martins                              uint64_t arg)
300fb0fd2ceSJoao Martins {
301fb0fd2ceSJoao Martins     struct xen_add_to_physmap xatp;
302fb0fd2ceSJoao Martins     CPUState *cs = CPU(cpu);
303fb0fd2ceSJoao Martins 
304fb0fd2ceSJoao Martins     if (hypercall_compat32(exit->u.hcall.longmode)) {
305fb0fd2ceSJoao Martins         struct compat_xen_add_to_physmap xatp32;
306fb0fd2ceSJoao Martins 
307fb0fd2ceSJoao Martins         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
308fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
309fb0fd2ceSJoao Martins             return -EFAULT;
310fb0fd2ceSJoao Martins         }
311fb0fd2ceSJoao Martins         xatp.domid = xatp32.domid;
312fb0fd2ceSJoao Martins         xatp.size = xatp32.size;
313fb0fd2ceSJoao Martins         xatp.space = xatp32.space;
314fb0fd2ceSJoao Martins         xatp.idx = xatp32.idx;
315fb0fd2ceSJoao Martins         xatp.gpfn = xatp32.gpfn;
316fb0fd2ceSJoao Martins     } else {
317fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
318fb0fd2ceSJoao Martins             return -EFAULT;
319fb0fd2ceSJoao Martins         }
320fb0fd2ceSJoao Martins     }
321fb0fd2ceSJoao Martins 
322fb0fd2ceSJoao Martins     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
323fb0fd2ceSJoao Martins         return -ESRCH;
324fb0fd2ceSJoao Martins     }
325fb0fd2ceSJoao Martins 
326fb0fd2ceSJoao Martins     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
327fb0fd2ceSJoao Martins }
328fb0fd2ceSJoao Martins 
329782a7960SDavid Woodhouse static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
330782a7960SDavid Woodhouse                                    uint64_t arg)
331782a7960SDavid Woodhouse {
332782a7960SDavid Woodhouse     struct xen_add_to_physmap_batch xatpb;
333782a7960SDavid Woodhouse     unsigned long idxs_gva, gpfns_gva, errs_gva;
334782a7960SDavid Woodhouse     CPUState *cs = CPU(cpu);
335782a7960SDavid Woodhouse     size_t op_sz;
336782a7960SDavid Woodhouse 
337782a7960SDavid Woodhouse     if (hypercall_compat32(exit->u.hcall.longmode)) {
338782a7960SDavid Woodhouse         struct compat_xen_add_to_physmap_batch xatpb32;
339782a7960SDavid Woodhouse 
340782a7960SDavid Woodhouse         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
341782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
342782a7960SDavid Woodhouse             return -EFAULT;
343782a7960SDavid Woodhouse         }
344782a7960SDavid Woodhouse         xatpb.domid = xatpb32.domid;
345782a7960SDavid Woodhouse         xatpb.space = xatpb32.space;
346782a7960SDavid Woodhouse         xatpb.size = xatpb32.size;
347782a7960SDavid Woodhouse 
348782a7960SDavid Woodhouse         idxs_gva = xatpb32.idxs.c;
349782a7960SDavid Woodhouse         gpfns_gva = xatpb32.gpfns.c;
350782a7960SDavid Woodhouse         errs_gva = xatpb32.errs.c;
351782a7960SDavid Woodhouse         op_sz = sizeof(uint32_t);
352782a7960SDavid Woodhouse     } else {
353782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
354782a7960SDavid Woodhouse             return -EFAULT;
355782a7960SDavid Woodhouse         }
356782a7960SDavid Woodhouse         op_sz = sizeof(unsigned long);
357782a7960SDavid Woodhouse         idxs_gva = (unsigned long)xatpb.idxs.p;
358782a7960SDavid Woodhouse         gpfns_gva = (unsigned long)xatpb.gpfns.p;
359782a7960SDavid Woodhouse         errs_gva = (unsigned long)xatpb.errs.p;
360782a7960SDavid Woodhouse     }
361782a7960SDavid Woodhouse 
362782a7960SDavid Woodhouse     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
363782a7960SDavid Woodhouse         return -ESRCH;
364782a7960SDavid Woodhouse     }
365782a7960SDavid Woodhouse 
366782a7960SDavid Woodhouse     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
367782a7960SDavid Woodhouse     if (xatpb.space == XENMAPSPACE_gmfn_range) {
368782a7960SDavid Woodhouse         return -EINVAL;
369782a7960SDavid Woodhouse     }
370782a7960SDavid Woodhouse 
371782a7960SDavid Woodhouse     while (xatpb.size--) {
372782a7960SDavid Woodhouse         unsigned long idx = 0;
373782a7960SDavid Woodhouse         unsigned long gpfn = 0;
374782a7960SDavid Woodhouse         int err;
375782a7960SDavid Woodhouse 
376782a7960SDavid Woodhouse         /* For 32-bit compat this only copies the low 32 bits of each */
377782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
378782a7960SDavid Woodhouse             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
379782a7960SDavid Woodhouse             return -EFAULT;
380782a7960SDavid Woodhouse         }
381782a7960SDavid Woodhouse         idxs_gva += op_sz;
382782a7960SDavid Woodhouse         gpfns_gva += op_sz;
383782a7960SDavid Woodhouse 
384782a7960SDavid Woodhouse         err = add_to_physmap_one(xatpb.space, idx, gpfn);
385782a7960SDavid Woodhouse 
386782a7960SDavid Woodhouse         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
387782a7960SDavid Woodhouse             return -EFAULT;
388782a7960SDavid Woodhouse         }
389782a7960SDavid Woodhouse         errs_gva += sizeof(err);
390782a7960SDavid Woodhouse     }
391782a7960SDavid Woodhouse     return 0;
392782a7960SDavid Woodhouse }
393782a7960SDavid Woodhouse 
394fb0fd2ceSJoao Martins static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
395fb0fd2ceSJoao Martins                                    int cmd, uint64_t arg)
396fb0fd2ceSJoao Martins {
397fb0fd2ceSJoao Martins     int err;
398fb0fd2ceSJoao Martins 
399fb0fd2ceSJoao Martins     switch (cmd) {
400fb0fd2ceSJoao Martins     case XENMEM_add_to_physmap:
401fb0fd2ceSJoao Martins         err = do_add_to_physmap(exit, cpu, arg);
402fb0fd2ceSJoao Martins         break;
403fb0fd2ceSJoao Martins 
404782a7960SDavid Woodhouse     case XENMEM_add_to_physmap_batch:
405782a7960SDavid Woodhouse         err = do_add_to_physmap_batch(exit, cpu, arg);
406782a7960SDavid Woodhouse         break;
407782a7960SDavid Woodhouse 
408fb0fd2ceSJoao Martins     default:
409fb0fd2ceSJoao Martins         return false;
410fb0fd2ceSJoao Martins     }
411fb0fd2ceSJoao Martins 
412fb0fd2ceSJoao Martins     exit->u.hcall.result = err;
413fb0fd2ceSJoao Martins     return true;
414fb0fd2ceSJoao Martins }
415fb0fd2ceSJoao Martins 
416671bfdcdSJoao Martins static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
417671bfdcdSJoao Martins                                  int cmd, uint64_t arg)
418671bfdcdSJoao Martins {
419671bfdcdSJoao Martins     switch (cmd) {
420671bfdcdSJoao Martins     case HVMOP_pagetable_dying:
421671bfdcdSJoao Martins         exit->u.hcall.result = -ENOSYS;
422671bfdcdSJoao Martins         return true;
423671bfdcdSJoao Martins 
424671bfdcdSJoao Martins     default:
425671bfdcdSJoao Martins         return false;
426671bfdcdSJoao Martins     }
427671bfdcdSJoao Martins }
428671bfdcdSJoao Martins 
429*c345104cSJoao Martins static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
430*c345104cSJoao Martins                                      uint64_t arg)
431*c345104cSJoao Martins {
432*c345104cSJoao Martins     struct vcpu_register_vcpu_info rvi;
433*c345104cSJoao Martins     uint64_t gpa;
434*c345104cSJoao Martins 
435*c345104cSJoao Martins     /* No need for 32/64 compat handling */
436*c345104cSJoao Martins     qemu_build_assert(sizeof(rvi) == 16);
437*c345104cSJoao Martins     qemu_build_assert(sizeof(struct vcpu_info) == 64);
438*c345104cSJoao Martins 
439*c345104cSJoao Martins     if (!target) {
440*c345104cSJoao Martins         return -ENOENT;
441*c345104cSJoao Martins     }
442*c345104cSJoao Martins 
443*c345104cSJoao Martins     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
444*c345104cSJoao Martins         return -EFAULT;
445*c345104cSJoao Martins     }
446*c345104cSJoao Martins 
447*c345104cSJoao Martins     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
448*c345104cSJoao Martins         return -EINVAL;
449*c345104cSJoao Martins     }
450*c345104cSJoao Martins 
451*c345104cSJoao Martins     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
452*c345104cSJoao Martins     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
453*c345104cSJoao Martins     return 0;
454*c345104cSJoao Martins }
455*c345104cSJoao Martins 
456d70bd6a4SJoao Martins static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
457d70bd6a4SJoao Martins                                   int cmd, int vcpu_id, uint64_t arg)
458d70bd6a4SJoao Martins {
459*c345104cSJoao Martins     CPUState *dest = qemu_get_cpu(vcpu_id);
460*c345104cSJoao Martins     CPUState *cs = CPU(cpu);
461d70bd6a4SJoao Martins     int err;
462d70bd6a4SJoao Martins 
463d70bd6a4SJoao Martins     switch (cmd) {
464d70bd6a4SJoao Martins     case VCPUOP_register_vcpu_info:
465*c345104cSJoao Martins         err = vcpuop_register_vcpu_info(cs, dest, arg);
466d70bd6a4SJoao Martins         break;
467d70bd6a4SJoao Martins 
468d70bd6a4SJoao Martins     default:
469d70bd6a4SJoao Martins         return false;
470d70bd6a4SJoao Martins     }
471d70bd6a4SJoao Martins 
472d70bd6a4SJoao Martins     exit->u.hcall.result = err;
473d70bd6a4SJoao Martins     return true;
474d70bd6a4SJoao Martins }
475d70bd6a4SJoao Martins 
47679b7067dSJoao Martins int kvm_xen_soft_reset(void)
47779b7067dSJoao Martins {
478*c345104cSJoao Martins     CPUState *cpu;
479fb0fd2ceSJoao Martins     int err;
480fb0fd2ceSJoao Martins 
48179b7067dSJoao Martins     assert(qemu_mutex_iothread_locked());
48279b7067dSJoao Martins 
48379b7067dSJoao Martins     trace_kvm_xen_soft_reset();
48479b7067dSJoao Martins 
485*c345104cSJoao Martins     CPU_FOREACH(cpu) {
486*c345104cSJoao Martins         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
487*c345104cSJoao Martins     }
488*c345104cSJoao Martins 
489fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(INVALID_GFN);
490fb0fd2ceSJoao Martins     if (err) {
491fb0fd2ceSJoao Martins         return err;
492fb0fd2ceSJoao Martins     }
493fb0fd2ceSJoao Martins 
49479b7067dSJoao Martins     return 0;
49579b7067dSJoao Martins }
49679b7067dSJoao Martins 
49779b7067dSJoao Martins static int schedop_shutdown(CPUState *cs, uint64_t arg)
49879b7067dSJoao Martins {
49979b7067dSJoao Martins     struct sched_shutdown shutdown;
50079b7067dSJoao Martins     int ret = 0;
50179b7067dSJoao Martins 
50279b7067dSJoao Martins     /* No need for 32/64 compat handling */
50379b7067dSJoao Martins     qemu_build_assert(sizeof(shutdown) == 4);
50479b7067dSJoao Martins 
50579b7067dSJoao Martins     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
50679b7067dSJoao Martins         return -EFAULT;
50779b7067dSJoao Martins     }
50879b7067dSJoao Martins 
50979b7067dSJoao Martins     switch (shutdown.reason) {
51079b7067dSJoao Martins     case SHUTDOWN_crash:
51179b7067dSJoao Martins         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
51279b7067dSJoao Martins         qemu_system_guest_panicked(NULL);
51379b7067dSJoao Martins         break;
51479b7067dSJoao Martins 
51579b7067dSJoao Martins     case SHUTDOWN_reboot:
51679b7067dSJoao Martins         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
51779b7067dSJoao Martins         break;
51879b7067dSJoao Martins 
51979b7067dSJoao Martins     case SHUTDOWN_poweroff:
52079b7067dSJoao Martins         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
52179b7067dSJoao Martins         break;
52279b7067dSJoao Martins 
52379b7067dSJoao Martins     case SHUTDOWN_soft_reset:
52479b7067dSJoao Martins         qemu_mutex_lock_iothread();
52579b7067dSJoao Martins         ret = kvm_xen_soft_reset();
52679b7067dSJoao Martins         qemu_mutex_unlock_iothread();
52779b7067dSJoao Martins         break;
52879b7067dSJoao Martins 
52979b7067dSJoao Martins     default:
53079b7067dSJoao Martins         ret = -EINVAL;
53179b7067dSJoao Martins         break;
53279b7067dSJoao Martins     }
53379b7067dSJoao Martins 
53479b7067dSJoao Martins     return ret;
53579b7067dSJoao Martins }
53679b7067dSJoao Martins 
53779b7067dSJoao Martins static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
53879b7067dSJoao Martins                                    int cmd, uint64_t arg)
53979b7067dSJoao Martins {
54079b7067dSJoao Martins     CPUState *cs = CPU(cpu);
54179b7067dSJoao Martins     int err = -ENOSYS;
54279b7067dSJoao Martins 
54379b7067dSJoao Martins     switch (cmd) {
54479b7067dSJoao Martins     case SCHEDOP_shutdown:
54579b7067dSJoao Martins         err = schedop_shutdown(cs, arg);
54679b7067dSJoao Martins         break;
54779b7067dSJoao Martins 
548c789b9efSDavid Woodhouse     case SCHEDOP_poll:
549c789b9efSDavid Woodhouse         /*
550c789b9efSDavid Woodhouse          * Linux will panic if this doesn't work. Just yield; it's not
551c789b9efSDavid Woodhouse          * worth overthinking it because with event channel handling
552c789b9efSDavid Woodhouse          * in KVM, the kernel will intercept this and it will never
553c789b9efSDavid Woodhouse          * reach QEMU anyway. The semantics of the hypercall explicltly
554c789b9efSDavid Woodhouse          * permit spurious wakeups.
555c789b9efSDavid Woodhouse          */
556c789b9efSDavid Woodhouse     case SCHEDOP_yield:
557c789b9efSDavid Woodhouse         sched_yield();
558c789b9efSDavid Woodhouse         err = 0;
559c789b9efSDavid Woodhouse         break;
560c789b9efSDavid Woodhouse 
56179b7067dSJoao Martins     default:
56279b7067dSJoao Martins         return false;
56379b7067dSJoao Martins     }
56479b7067dSJoao Martins 
56579b7067dSJoao Martins     exit->u.hcall.result = err;
56679b7067dSJoao Martins     return true;
56779b7067dSJoao Martins }
56879b7067dSJoao Martins 
56955a3f666SJoao Martins static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
57055a3f666SJoao Martins {
57155a3f666SJoao Martins     uint16_t code = exit->u.hcall.input;
57255a3f666SJoao Martins 
57355a3f666SJoao Martins     if (exit->u.hcall.cpl > 0) {
57455a3f666SJoao Martins         exit->u.hcall.result = -EPERM;
57555a3f666SJoao Martins         return true;
57655a3f666SJoao Martins     }
57755a3f666SJoao Martins 
57855a3f666SJoao Martins     switch (code) {
57979b7067dSJoao Martins     case __HYPERVISOR_sched_op:
58079b7067dSJoao Martins         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
58179b7067dSJoao Martins                                       exit->u.hcall.params[1]);
582d70bd6a4SJoao Martins     case __HYPERVISOR_vcpu_op:
583d70bd6a4SJoao Martins         return kvm_xen_hcall_vcpu_op(exit, cpu,
584d70bd6a4SJoao Martins                                      exit->u.hcall.params[0],
585d70bd6a4SJoao Martins                                      exit->u.hcall.params[1],
586d70bd6a4SJoao Martins                                      exit->u.hcall.params[2]);
587671bfdcdSJoao Martins     case __HYPERVISOR_hvm_op:
588671bfdcdSJoao Martins         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
589671bfdcdSJoao Martins                                     exit->u.hcall.params[1]);
590fb0fd2ceSJoao Martins     case __HYPERVISOR_memory_op:
591fb0fd2ceSJoao Martins         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
592fb0fd2ceSJoao Martins                                        exit->u.hcall.params[1]);
593bedcc139SJoao Martins     case __HYPERVISOR_xen_version:
594bedcc139SJoao Martins         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
595bedcc139SJoao Martins                                          exit->u.hcall.params[1]);
59655a3f666SJoao Martins     default:
59755a3f666SJoao Martins         return false;
59855a3f666SJoao Martins     }
59955a3f666SJoao Martins }
60055a3f666SJoao Martins 
60155a3f666SJoao Martins int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
60255a3f666SJoao Martins {
60355a3f666SJoao Martins     if (exit->type != KVM_EXIT_XEN_HCALL) {
60455a3f666SJoao Martins         return -1;
60555a3f666SJoao Martins     }
60655a3f666SJoao Martins 
607110a0ea5SDavid Woodhouse     /*
608110a0ea5SDavid Woodhouse      * The kernel latches the guest 32/64 mode when the MSR is used to fill
609110a0ea5SDavid Woodhouse      * the hypercall page. So if we see a hypercall in a mode that doesn't
610110a0ea5SDavid Woodhouse      * match our own idea of the guest mode, fetch the kernel's idea of the
611110a0ea5SDavid Woodhouse      * "long mode" to remain in sync.
612110a0ea5SDavid Woodhouse      */
613110a0ea5SDavid Woodhouse     if (exit->u.hcall.longmode != xen_is_long_mode()) {
614110a0ea5SDavid Woodhouse         xen_sync_long_mode();
615110a0ea5SDavid Woodhouse     }
616110a0ea5SDavid Woodhouse 
61755a3f666SJoao Martins     if (!do_kvm_xen_handle_exit(cpu, exit)) {
61855a3f666SJoao Martins         /*
61955a3f666SJoao Martins          * Some hypercalls will be deliberately "implemented" by returning
62055a3f666SJoao Martins          * -ENOSYS. This case is for hypercalls which are unexpected.
62155a3f666SJoao Martins          */
62255a3f666SJoao Martins         exit->u.hcall.result = -ENOSYS;
62355a3f666SJoao Martins         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
62455a3f666SJoao Martins                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
62555a3f666SJoao Martins                       (uint64_t)exit->u.hcall.input,
62655a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[0],
62755a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[1],
62855a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[2]);
62955a3f666SJoao Martins     }
63055a3f666SJoao Martins 
63155a3f666SJoao Martins     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
63255a3f666SJoao Martins                             exit->u.hcall.input, exit->u.hcall.params[0],
63355a3f666SJoao Martins                             exit->u.hcall.params[1], exit->u.hcall.params[2],
63455a3f666SJoao Martins                             exit->u.hcall.result);
63555a3f666SJoao Martins     return 0;
63655a3f666SJoao Martins }
637*c345104cSJoao Martins 
638*c345104cSJoao Martins int kvm_put_xen_state(CPUState *cs)
639*c345104cSJoao Martins {
640*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
641*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
642*c345104cSJoao Martins     uint64_t gpa;
643*c345104cSJoao Martins     int ret;
644*c345104cSJoao Martins 
645*c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
646*c345104cSJoao Martins     if (gpa == INVALID_GPA) {
647*c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
648*c345104cSJoao Martins     }
649*c345104cSJoao Martins 
650*c345104cSJoao Martins     if (gpa != INVALID_GPA) {
651*c345104cSJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
652*c345104cSJoao Martins         if (ret < 0) {
653*c345104cSJoao Martins             return ret;
654*c345104cSJoao Martins         }
655*c345104cSJoao Martins     }
656*c345104cSJoao Martins 
657*c345104cSJoao Martins     return 0;
658*c345104cSJoao Martins }
659*c345104cSJoao Martins 
660*c345104cSJoao Martins int kvm_get_xen_state(CPUState *cs)
661*c345104cSJoao Martins {
662*c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
663*c345104cSJoao Martins     CPUX86State *env = &cpu->env;
664*c345104cSJoao Martins     uint64_t gpa;
665*c345104cSJoao Martins 
666*c345104cSJoao Martins     /*
667*c345104cSJoao Martins      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
668*c345104cSJoao Martins      * to it. It's up to userspace to *assume* that any page shared thus is
669*c345104cSJoao Martins      * always considered dirty. The shared_info page is different since it's
670*c345104cSJoao Martins      * an overlay and migrated separately anyway.
671*c345104cSJoao Martins      */
672*c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
673*c345104cSJoao Martins     if (gpa == INVALID_GPA) {
674*c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
675*c345104cSJoao Martins     }
676*c345104cSJoao Martins     if (gpa != INVALID_GPA) {
677*c345104cSJoao Martins         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
678*c345104cSJoao Martins                                                      gpa,
679*c345104cSJoao Martins                                                      sizeof(struct vcpu_info));
680*c345104cSJoao Martins         if (mrs.mr &&
681*c345104cSJoao Martins             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
682*c345104cSJoao Martins             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
683*c345104cSJoao Martins                                     sizeof(struct vcpu_info));
684*c345104cSJoao Martins         }
685*c345104cSJoao Martins     }
686*c345104cSJoao Martins 
687*c345104cSJoao Martins     return 0;
688*c345104cSJoao Martins }
689