xref: /qemu/target/i386/kvm/xen-emu.c (revision 27d4075dd88a3c558fdc2da13b95915c1b6c66c9)
161491cf4SDavid Woodhouse /*
261491cf4SDavid Woodhouse  * Xen HVM emulation support in KVM
361491cf4SDavid Woodhouse  *
461491cf4SDavid Woodhouse  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
561491cf4SDavid Woodhouse  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
661491cf4SDavid Woodhouse  *
761491cf4SDavid Woodhouse  * This work is licensed under the terms of the GNU GPL, version 2 or later.
861491cf4SDavid Woodhouse  * See the COPYING file in the top-level directory.
961491cf4SDavid Woodhouse  *
1061491cf4SDavid Woodhouse  */
1161491cf4SDavid Woodhouse 
1261491cf4SDavid Woodhouse #include "qemu/osdep.h"
1355a3f666SJoao Martins #include "qemu/log.h"
1479b7067dSJoao Martins #include "qemu/main-loop.h"
15fb0fd2ceSJoao Martins #include "hw/xen/xen.h"
1661491cf4SDavid Woodhouse #include "sysemu/kvm_int.h"
1761491cf4SDavid Woodhouse #include "sysemu/kvm_xen.h"
1861491cf4SDavid Woodhouse #include "kvm/kvm_i386.h"
19bedcc139SJoao Martins #include "exec/address-spaces.h"
2061491cf4SDavid Woodhouse #include "xen-emu.h"
2155a3f666SJoao Martins #include "trace.h"
2279b7067dSJoao Martins #include "sysemu/runstate.h"
2361491cf4SDavid Woodhouse 
24*27d4075dSDavid Woodhouse #include "hw/pci/msi.h"
25*27d4075dSDavid Woodhouse #include "hw/i386/apic-msidef.h"
26110a0ea5SDavid Woodhouse #include "hw/i386/kvm/xen_overlay.h"
2791cce756SDavid Woodhouse #include "hw/i386/kvm/xen_evtchn.h"
28110a0ea5SDavid Woodhouse 
29bedcc139SJoao Martins #include "hw/xen/interface/version.h"
3079b7067dSJoao Martins #include "hw/xen/interface/sched.h"
31fb0fd2ceSJoao Martins #include "hw/xen/interface/memory.h"
32671bfdcdSJoao Martins #include "hw/xen/interface/hvm/hvm_op.h"
33105b47fdSAnkur Arora #include "hw/xen/interface/hvm/params.h"
34d70bd6a4SJoao Martins #include "hw/xen/interface/vcpu.h"
353b06f29bSJoao Martins #include "hw/xen/interface/event_channel.h"
36fb0fd2ceSJoao Martins 
37fb0fd2ceSJoao Martins #include "xen-compat.h"
38fb0fd2ceSJoao Martins 
39fb0fd2ceSJoao Martins #ifdef TARGET_X86_64
40fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (!(longmode))
41fb0fd2ceSJoao Martins #else
42fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (false)
43fb0fd2ceSJoao Martins #endif
44bedcc139SJoao Martins 
45f0689302SJoao Martins static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
46f0689302SJoao Martins                            size_t *len, bool is_write)
47bedcc139SJoao Martins {
48bedcc139SJoao Martins         struct kvm_translation tr = {
49bedcc139SJoao Martins             .linear_address = gva,
50bedcc139SJoao Martins         };
51bedcc139SJoao Martins 
52f0689302SJoao Martins         if (len) {
53f0689302SJoao Martins             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
54f0689302SJoao Martins         }
55f0689302SJoao Martins 
56f0689302SJoao Martins         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
57f0689302SJoao Martins             (is_write && !tr.writeable)) {
58f0689302SJoao Martins             return false;
59f0689302SJoao Martins         }
60f0689302SJoao Martins         *gpa = tr.physical_address;
61f0689302SJoao Martins         return true;
62f0689302SJoao Martins }
63f0689302SJoao Martins 
64f0689302SJoao Martins static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
65f0689302SJoao Martins                       bool is_write)
66f0689302SJoao Martins {
67f0689302SJoao Martins     uint8_t *buf = (uint8_t *)_buf;
68f0689302SJoao Martins     uint64_t gpa;
69f0689302SJoao Martins     size_t len;
70f0689302SJoao Martins 
71f0689302SJoao Martins     while (sz) {
72f0689302SJoao Martins         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
73f0689302SJoao Martins             return -EFAULT;
74f0689302SJoao Martins         }
75bedcc139SJoao Martins         if (len > sz) {
76bedcc139SJoao Martins             len = sz;
77bedcc139SJoao Martins         }
78bedcc139SJoao Martins 
79f0689302SJoao Martins         cpu_physical_memory_rw(gpa, buf, len, is_write);
80bedcc139SJoao Martins 
81bedcc139SJoao Martins         buf += len;
82bedcc139SJoao Martins         sz -= len;
83bedcc139SJoao Martins         gva += len;
84bedcc139SJoao Martins     }
85bedcc139SJoao Martins 
86bedcc139SJoao Martins     return 0;
87bedcc139SJoao Martins }
88bedcc139SJoao Martins 
89bedcc139SJoao Martins static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
90bedcc139SJoao Martins                                     size_t sz)
91bedcc139SJoao Martins {
92bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, false);
93bedcc139SJoao Martins }
94bedcc139SJoao Martins 
95bedcc139SJoao Martins static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
96bedcc139SJoao Martins                                   size_t sz)
97bedcc139SJoao Martins {
98bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, true);
99bedcc139SJoao Martins }
100bedcc139SJoao Martins 
101f66b8a83SJoao Martins int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
10261491cf4SDavid Woodhouse {
10361491cf4SDavid Woodhouse     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
10461491cf4SDavid Woodhouse         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
10561491cf4SDavid Woodhouse     struct kvm_xen_hvm_config cfg = {
106f66b8a83SJoao Martins         .msr = hypercall_msr,
10761491cf4SDavid Woodhouse         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
10861491cf4SDavid Woodhouse     };
10961491cf4SDavid Woodhouse     int xen_caps, ret;
11061491cf4SDavid Woodhouse 
11161491cf4SDavid Woodhouse     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
11261491cf4SDavid Woodhouse     if (required_caps & ~xen_caps) {
11361491cf4SDavid Woodhouse         error_report("kvm: Xen HVM guest support not present or insufficient");
11461491cf4SDavid Woodhouse         return -ENOSYS;
11561491cf4SDavid Woodhouse     }
11661491cf4SDavid Woodhouse 
11761491cf4SDavid Woodhouse     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
11861491cf4SDavid Woodhouse         struct kvm_xen_hvm_attr ha = {
11961491cf4SDavid Woodhouse             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
12061491cf4SDavid Woodhouse             .u.xen_version = s->xen_version,
12161491cf4SDavid Woodhouse         };
12261491cf4SDavid Woodhouse         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
12361491cf4SDavid Woodhouse 
12461491cf4SDavid Woodhouse         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
12561491cf4SDavid Woodhouse     }
12661491cf4SDavid Woodhouse 
12761491cf4SDavid Woodhouse     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
12861491cf4SDavid Woodhouse     if (ret < 0) {
12961491cf4SDavid Woodhouse         error_report("kvm: Failed to enable Xen HVM support: %s",
13061491cf4SDavid Woodhouse                      strerror(-ret));
13161491cf4SDavid Woodhouse         return ret;
13261491cf4SDavid Woodhouse     }
13361491cf4SDavid Woodhouse 
13461491cf4SDavid Woodhouse     s->xen_caps = xen_caps;
13561491cf4SDavid Woodhouse     return 0;
13661491cf4SDavid Woodhouse }
13761491cf4SDavid Woodhouse 
1385e691a95SDavid Woodhouse int kvm_xen_init_vcpu(CPUState *cs)
1395e691a95SDavid Woodhouse {
140c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
141c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1425e691a95SDavid Woodhouse     int err;
1435e691a95SDavid Woodhouse 
1445e691a95SDavid Woodhouse     /*
1455e691a95SDavid Woodhouse      * The kernel needs to know the Xen/ACPI vCPU ID because that's
1465e691a95SDavid Woodhouse      * what the guest uses in hypercalls such as timers. It doesn't
1475e691a95SDavid Woodhouse      * match the APIC ID which is generally used for talking to the
1485e691a95SDavid Woodhouse      * kernel about vCPUs. And if vCPU threads race with creating
1495e691a95SDavid Woodhouse      * their KVM vCPUs out of order, it doesn't necessarily match
1505e691a95SDavid Woodhouse      * with the kernel's internal vCPU indices either.
1515e691a95SDavid Woodhouse      */
1525e691a95SDavid Woodhouse     if (kvm_xen_has_cap(EVTCHN_SEND)) {
1535e691a95SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1545e691a95SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
1555e691a95SDavid Woodhouse             .u.vcpu_id = cs->cpu_index,
1565e691a95SDavid Woodhouse         };
1575e691a95SDavid Woodhouse         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
1585e691a95SDavid Woodhouse         if (err) {
1595e691a95SDavid Woodhouse             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
1605e691a95SDavid Woodhouse                          strerror(-err));
1615e691a95SDavid Woodhouse             return err;
1625e691a95SDavid Woodhouse         }
1635e691a95SDavid Woodhouse     }
1645e691a95SDavid Woodhouse 
165c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
166c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
167f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
1685092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
169c345104cSJoao Martins 
1705e691a95SDavid Woodhouse     return 0;
1715e691a95SDavid Woodhouse }
1725e691a95SDavid Woodhouse 
17361491cf4SDavid Woodhouse uint32_t kvm_xen_get_caps(void)
17461491cf4SDavid Woodhouse {
17561491cf4SDavid Woodhouse     return kvm_state->xen_caps;
17661491cf4SDavid Woodhouse }
17755a3f666SJoao Martins 
178bedcc139SJoao Martins static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
179bedcc139SJoao Martins                                      int cmd, uint64_t arg)
180bedcc139SJoao Martins {
181bedcc139SJoao Martins     int err = 0;
182bedcc139SJoao Martins 
183bedcc139SJoao Martins     switch (cmd) {
184bedcc139SJoao Martins     case XENVER_get_features: {
185bedcc139SJoao Martins         struct xen_feature_info fi;
186bedcc139SJoao Martins 
187bedcc139SJoao Martins         /* No need for 32/64 compat handling */
188bedcc139SJoao Martins         qemu_build_assert(sizeof(fi) == 8);
189bedcc139SJoao Martins 
190bedcc139SJoao Martins         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
191bedcc139SJoao Martins         if (err) {
192bedcc139SJoao Martins             break;
193bedcc139SJoao Martins         }
194bedcc139SJoao Martins 
195bedcc139SJoao Martins         fi.submap = 0;
196bedcc139SJoao Martins         if (fi.submap_idx == 0) {
197bedcc139SJoao Martins             fi.submap |= 1 << XENFEAT_writable_page_tables |
198bedcc139SJoao Martins                          1 << XENFEAT_writable_descriptor_tables |
199bedcc139SJoao Martins                          1 << XENFEAT_auto_translated_physmap |
200105b47fdSAnkur Arora                          1 << XENFEAT_supervisor_mode_kernel |
201105b47fdSAnkur Arora                          1 << XENFEAT_hvm_callback_vector;
202bedcc139SJoao Martins         }
203bedcc139SJoao Martins 
204bedcc139SJoao Martins         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
205bedcc139SJoao Martins         break;
206bedcc139SJoao Martins     }
207bedcc139SJoao Martins 
208bedcc139SJoao Martins     default:
209bedcc139SJoao Martins         return false;
210bedcc139SJoao Martins     }
211bedcc139SJoao Martins 
212bedcc139SJoao Martins     exit->u.hcall.result = err;
213bedcc139SJoao Martins     return true;
214bedcc139SJoao Martins }
215bedcc139SJoao Martins 
216c345104cSJoao Martins static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
217c345104cSJoao Martins {
218c345104cSJoao Martins     struct kvm_xen_vcpu_attr xhsi;
219c345104cSJoao Martins 
220c345104cSJoao Martins     xhsi.type = type;
221c345104cSJoao Martins     xhsi.u.gpa = gpa;
222c345104cSJoao Martins 
223c345104cSJoao Martins     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
224c345104cSJoao Martins 
225c345104cSJoao Martins     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
226c345104cSJoao Martins }
227c345104cSJoao Martins 
228105b47fdSAnkur Arora static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
229105b47fdSAnkur Arora {
230105b47fdSAnkur Arora     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
231105b47fdSAnkur Arora     struct kvm_xen_vcpu_attr xva;
232105b47fdSAnkur Arora 
233105b47fdSAnkur Arora     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
234105b47fdSAnkur Arora     xva.u.vector = vector;
235105b47fdSAnkur Arora 
236105b47fdSAnkur Arora     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
237105b47fdSAnkur Arora 
238105b47fdSAnkur Arora     return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
239105b47fdSAnkur Arora }
240105b47fdSAnkur Arora 
241105b47fdSAnkur Arora static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
242105b47fdSAnkur Arora {
243105b47fdSAnkur Arora     X86CPU *cpu = X86_CPU(cs);
244105b47fdSAnkur Arora     CPUX86State *env = &cpu->env;
245105b47fdSAnkur Arora 
246105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = data.host_int;
247105b47fdSAnkur Arora 
248105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
249105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
250105b47fdSAnkur Arora     }
251105b47fdSAnkur Arora }
252105b47fdSAnkur Arora 
253*27d4075dSDavid Woodhouse static int set_vcpu_info(CPUState *cs, uint64_t gpa)
254*27d4075dSDavid Woodhouse {
255*27d4075dSDavid Woodhouse     X86CPU *cpu = X86_CPU(cs);
256*27d4075dSDavid Woodhouse     CPUX86State *env = &cpu->env;
257*27d4075dSDavid Woodhouse     MemoryRegionSection mrs = { .mr = NULL };
258*27d4075dSDavid Woodhouse     void *vcpu_info_hva = NULL;
259*27d4075dSDavid Woodhouse     int ret;
260*27d4075dSDavid Woodhouse 
261*27d4075dSDavid Woodhouse     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
262*27d4075dSDavid Woodhouse     if (ret || gpa == INVALID_GPA) {
263*27d4075dSDavid Woodhouse         goto out;
264*27d4075dSDavid Woodhouse     }
265*27d4075dSDavid Woodhouse 
266*27d4075dSDavid Woodhouse     mrs = memory_region_find(get_system_memory(), gpa,
267*27d4075dSDavid Woodhouse                              sizeof(struct vcpu_info));
268*27d4075dSDavid Woodhouse     if (mrs.mr && mrs.mr->ram_block &&
269*27d4075dSDavid Woodhouse         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
270*27d4075dSDavid Woodhouse         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
271*27d4075dSDavid Woodhouse                                          mrs.offset_within_region);
272*27d4075dSDavid Woodhouse     }
273*27d4075dSDavid Woodhouse     if (!vcpu_info_hva) {
274*27d4075dSDavid Woodhouse         if (mrs.mr) {
275*27d4075dSDavid Woodhouse             memory_region_unref(mrs.mr);
276*27d4075dSDavid Woodhouse             mrs.mr = NULL;
277*27d4075dSDavid Woodhouse         }
278*27d4075dSDavid Woodhouse         ret = -EINVAL;
279*27d4075dSDavid Woodhouse     }
280*27d4075dSDavid Woodhouse 
281*27d4075dSDavid Woodhouse  out:
282*27d4075dSDavid Woodhouse     if (env->xen_vcpu_info_mr) {
283*27d4075dSDavid Woodhouse         memory_region_unref(env->xen_vcpu_info_mr);
284*27d4075dSDavid Woodhouse     }
285*27d4075dSDavid Woodhouse     env->xen_vcpu_info_hva = vcpu_info_hva;
286*27d4075dSDavid Woodhouse     env->xen_vcpu_info_mr = mrs.mr;
287*27d4075dSDavid Woodhouse     return ret;
288*27d4075dSDavid Woodhouse }
289*27d4075dSDavid Woodhouse 
290c345104cSJoao Martins static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
291c345104cSJoao Martins {
292c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
293c345104cSJoao Martins     CPUX86State *env = &cpu->env;
294c345104cSJoao Martins 
295c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = data.host_ulong;
296c345104cSJoao Martins 
297c345104cSJoao Martins     /* Changing the default does nothing if a vcpu_info was explicitly set. */
298c345104cSJoao Martins     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
299*27d4075dSDavid Woodhouse         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
300c345104cSJoao Martins     }
301c345104cSJoao Martins }
302c345104cSJoao Martins 
303c345104cSJoao Martins static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
304c345104cSJoao Martins {
305c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
306c345104cSJoao Martins     CPUX86State *env = &cpu->env;
307c345104cSJoao Martins 
308c345104cSJoao Martins     env->xen_vcpu_info_gpa = data.host_ulong;
309c345104cSJoao Martins 
310*27d4075dSDavid Woodhouse     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
311*27d4075dSDavid Woodhouse }
312*27d4075dSDavid Woodhouse 
313*27d4075dSDavid Woodhouse void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
314*27d4075dSDavid Woodhouse {
315*27d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
316*27d4075dSDavid Woodhouse     if (!cs) {
317*27d4075dSDavid Woodhouse         return NULL;
318*27d4075dSDavid Woodhouse     }
319*27d4075dSDavid Woodhouse 
320*27d4075dSDavid Woodhouse     return X86_CPU(cs)->env.xen_vcpu_info_hva;
321*27d4075dSDavid Woodhouse }
322*27d4075dSDavid Woodhouse 
323*27d4075dSDavid Woodhouse void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
324*27d4075dSDavid Woodhouse {
325*27d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
326*27d4075dSDavid Woodhouse     uint8_t vector;
327*27d4075dSDavid Woodhouse 
328*27d4075dSDavid Woodhouse     if (!cs) {
329*27d4075dSDavid Woodhouse         return;
330*27d4075dSDavid Woodhouse     }
331*27d4075dSDavid Woodhouse 
332*27d4075dSDavid Woodhouse     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
333*27d4075dSDavid Woodhouse     if (vector) {
334*27d4075dSDavid Woodhouse         /*
335*27d4075dSDavid Woodhouse          * The per-vCPU callback vector injected via lapic. Just
336*27d4075dSDavid Woodhouse          * deliver it as an MSI.
337*27d4075dSDavid Woodhouse          */
338*27d4075dSDavid Woodhouse         MSIMessage msg = {
339*27d4075dSDavid Woodhouse             .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
340*27d4075dSDavid Woodhouse             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
341*27d4075dSDavid Woodhouse         };
342*27d4075dSDavid Woodhouse         kvm_irqchip_send_msi(kvm_state, msg);
343*27d4075dSDavid Woodhouse         return;
344*27d4075dSDavid Woodhouse     }
345*27d4075dSDavid Woodhouse 
346*27d4075dSDavid Woodhouse     switch (type) {
347*27d4075dSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
348*27d4075dSDavid Woodhouse         /*
349*27d4075dSDavid Woodhouse          * If the evtchn_upcall_pending field in the vcpu_info is set, then
350*27d4075dSDavid Woodhouse          * KVM will automatically deliver the vector on entering the vCPU
351*27d4075dSDavid Woodhouse          * so all we have to do is kick it out.
352*27d4075dSDavid Woodhouse          */
353*27d4075dSDavid Woodhouse         qemu_cpu_kick(cs);
354*27d4075dSDavid Woodhouse         break;
355*27d4075dSDavid Woodhouse     }
356c345104cSJoao Martins }
357c345104cSJoao Martins 
358f0689302SJoao Martins static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
359f0689302SJoao Martins {
360f0689302SJoao Martins     X86CPU *cpu = X86_CPU(cs);
361f0689302SJoao Martins     CPUX86State *env = &cpu->env;
362f0689302SJoao Martins 
363f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = data.host_ulong;
364f0689302SJoao Martins 
365f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
366f0689302SJoao Martins                           env->xen_vcpu_time_info_gpa);
367f0689302SJoao Martins }
368f0689302SJoao Martins 
3695092db87SJoao Martins static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
3705092db87SJoao Martins {
3715092db87SJoao Martins     X86CPU *cpu = X86_CPU(cs);
3725092db87SJoao Martins     CPUX86State *env = &cpu->env;
3735092db87SJoao Martins 
3745092db87SJoao Martins     env->xen_vcpu_runstate_gpa = data.host_ulong;
3755092db87SJoao Martins 
3765092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
3775092db87SJoao Martins                           env->xen_vcpu_runstate_gpa);
3785092db87SJoao Martins }
3795092db87SJoao Martins 
380c345104cSJoao Martins static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
381c345104cSJoao Martins {
382c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
383c345104cSJoao Martins     CPUX86State *env = &cpu->env;
384c345104cSJoao Martins 
385c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
386c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
387f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
3885092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
389105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = 0;
390c345104cSJoao Martins 
391*27d4075dSDavid Woodhouse     set_vcpu_info(cs, INVALID_GPA);
392f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
393f0689302SJoao Martins                           INVALID_GPA);
3945092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
3955092db87SJoao Martins                           INVALID_GPA);
396105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
397105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
398105b47fdSAnkur Arora     }
3995092db87SJoao Martins 
400c345104cSJoao Martins }
401c345104cSJoao Martins 
402fb0fd2ceSJoao Martins static int xen_set_shared_info(uint64_t gfn)
403fb0fd2ceSJoao Martins {
404fb0fd2ceSJoao Martins     uint64_t gpa = gfn << TARGET_PAGE_BITS;
405c345104cSJoao Martins     int i, err;
406fb0fd2ceSJoao Martins 
407fb0fd2ceSJoao Martins     QEMU_IOTHREAD_LOCK_GUARD();
408fb0fd2ceSJoao Martins 
409fb0fd2ceSJoao Martins     /*
410fb0fd2ceSJoao Martins      * The xen_overlay device tells KVM about it too, since it had to
411fb0fd2ceSJoao Martins      * do that on migration load anyway (unless we're going to jump
412fb0fd2ceSJoao Martins      * through lots of hoops to maintain the fiction that this isn't
413fb0fd2ceSJoao Martins      * KVM-specific.
414fb0fd2ceSJoao Martins      */
415fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(gpa);
416fb0fd2ceSJoao Martins     if (err) {
417fb0fd2ceSJoao Martins             return err;
418fb0fd2ceSJoao Martins     }
419fb0fd2ceSJoao Martins 
420fb0fd2ceSJoao Martins     trace_kvm_xen_set_shared_info(gfn);
421fb0fd2ceSJoao Martins 
422c345104cSJoao Martins     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
423c345104cSJoao Martins         CPUState *cpu = qemu_get_cpu(i);
424c345104cSJoao Martins         if (cpu) {
425c345104cSJoao Martins             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
426c345104cSJoao Martins                              RUN_ON_CPU_HOST_ULONG(gpa));
427c345104cSJoao Martins         }
428c345104cSJoao Martins         gpa += sizeof(vcpu_info_t);
429c345104cSJoao Martins     }
430c345104cSJoao Martins 
431fb0fd2ceSJoao Martins     return err;
432fb0fd2ceSJoao Martins }
433fb0fd2ceSJoao Martins 
434fb0fd2ceSJoao Martins static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
435fb0fd2ceSJoao Martins {
436fb0fd2ceSJoao Martins     switch (space) {
437fb0fd2ceSJoao Martins     case XENMAPSPACE_shared_info:
438fb0fd2ceSJoao Martins         if (idx > 0) {
439fb0fd2ceSJoao Martins             return -EINVAL;
440fb0fd2ceSJoao Martins         }
441fb0fd2ceSJoao Martins         return xen_set_shared_info(gfn);
442fb0fd2ceSJoao Martins 
443fb0fd2ceSJoao Martins     case XENMAPSPACE_grant_table:
444fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn:
445fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_range:
446fb0fd2ceSJoao Martins         return -ENOTSUP;
447fb0fd2ceSJoao Martins 
448fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_foreign:
449fb0fd2ceSJoao Martins     case XENMAPSPACE_dev_mmio:
450fb0fd2ceSJoao Martins         return -EPERM;
451fb0fd2ceSJoao Martins 
452fb0fd2ceSJoao Martins     default:
453fb0fd2ceSJoao Martins         return -EINVAL;
454fb0fd2ceSJoao Martins     }
455fb0fd2ceSJoao Martins }
456fb0fd2ceSJoao Martins 
457fb0fd2ceSJoao Martins static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
458fb0fd2ceSJoao Martins                              uint64_t arg)
459fb0fd2ceSJoao Martins {
460fb0fd2ceSJoao Martins     struct xen_add_to_physmap xatp;
461fb0fd2ceSJoao Martins     CPUState *cs = CPU(cpu);
462fb0fd2ceSJoao Martins 
463fb0fd2ceSJoao Martins     if (hypercall_compat32(exit->u.hcall.longmode)) {
464fb0fd2ceSJoao Martins         struct compat_xen_add_to_physmap xatp32;
465fb0fd2ceSJoao Martins 
466fb0fd2ceSJoao Martins         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
467fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
468fb0fd2ceSJoao Martins             return -EFAULT;
469fb0fd2ceSJoao Martins         }
470fb0fd2ceSJoao Martins         xatp.domid = xatp32.domid;
471fb0fd2ceSJoao Martins         xatp.size = xatp32.size;
472fb0fd2ceSJoao Martins         xatp.space = xatp32.space;
473fb0fd2ceSJoao Martins         xatp.idx = xatp32.idx;
474fb0fd2ceSJoao Martins         xatp.gpfn = xatp32.gpfn;
475fb0fd2ceSJoao Martins     } else {
476fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
477fb0fd2ceSJoao Martins             return -EFAULT;
478fb0fd2ceSJoao Martins         }
479fb0fd2ceSJoao Martins     }
480fb0fd2ceSJoao Martins 
481fb0fd2ceSJoao Martins     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
482fb0fd2ceSJoao Martins         return -ESRCH;
483fb0fd2ceSJoao Martins     }
484fb0fd2ceSJoao Martins 
485fb0fd2ceSJoao Martins     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
486fb0fd2ceSJoao Martins }
487fb0fd2ceSJoao Martins 
488782a7960SDavid Woodhouse static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
489782a7960SDavid Woodhouse                                    uint64_t arg)
490782a7960SDavid Woodhouse {
491782a7960SDavid Woodhouse     struct xen_add_to_physmap_batch xatpb;
492782a7960SDavid Woodhouse     unsigned long idxs_gva, gpfns_gva, errs_gva;
493782a7960SDavid Woodhouse     CPUState *cs = CPU(cpu);
494782a7960SDavid Woodhouse     size_t op_sz;
495782a7960SDavid Woodhouse 
496782a7960SDavid Woodhouse     if (hypercall_compat32(exit->u.hcall.longmode)) {
497782a7960SDavid Woodhouse         struct compat_xen_add_to_physmap_batch xatpb32;
498782a7960SDavid Woodhouse 
499782a7960SDavid Woodhouse         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
500782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
501782a7960SDavid Woodhouse             return -EFAULT;
502782a7960SDavid Woodhouse         }
503782a7960SDavid Woodhouse         xatpb.domid = xatpb32.domid;
504782a7960SDavid Woodhouse         xatpb.space = xatpb32.space;
505782a7960SDavid Woodhouse         xatpb.size = xatpb32.size;
506782a7960SDavid Woodhouse 
507782a7960SDavid Woodhouse         idxs_gva = xatpb32.idxs.c;
508782a7960SDavid Woodhouse         gpfns_gva = xatpb32.gpfns.c;
509782a7960SDavid Woodhouse         errs_gva = xatpb32.errs.c;
510782a7960SDavid Woodhouse         op_sz = sizeof(uint32_t);
511782a7960SDavid Woodhouse     } else {
512782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
513782a7960SDavid Woodhouse             return -EFAULT;
514782a7960SDavid Woodhouse         }
515782a7960SDavid Woodhouse         op_sz = sizeof(unsigned long);
516782a7960SDavid Woodhouse         idxs_gva = (unsigned long)xatpb.idxs.p;
517782a7960SDavid Woodhouse         gpfns_gva = (unsigned long)xatpb.gpfns.p;
518782a7960SDavid Woodhouse         errs_gva = (unsigned long)xatpb.errs.p;
519782a7960SDavid Woodhouse     }
520782a7960SDavid Woodhouse 
521782a7960SDavid Woodhouse     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
522782a7960SDavid Woodhouse         return -ESRCH;
523782a7960SDavid Woodhouse     }
524782a7960SDavid Woodhouse 
525782a7960SDavid Woodhouse     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
526782a7960SDavid Woodhouse     if (xatpb.space == XENMAPSPACE_gmfn_range) {
527782a7960SDavid Woodhouse         return -EINVAL;
528782a7960SDavid Woodhouse     }
529782a7960SDavid Woodhouse 
530782a7960SDavid Woodhouse     while (xatpb.size--) {
531782a7960SDavid Woodhouse         unsigned long idx = 0;
532782a7960SDavid Woodhouse         unsigned long gpfn = 0;
533782a7960SDavid Woodhouse         int err;
534782a7960SDavid Woodhouse 
535782a7960SDavid Woodhouse         /* For 32-bit compat this only copies the low 32 bits of each */
536782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
537782a7960SDavid Woodhouse             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
538782a7960SDavid Woodhouse             return -EFAULT;
539782a7960SDavid Woodhouse         }
540782a7960SDavid Woodhouse         idxs_gva += op_sz;
541782a7960SDavid Woodhouse         gpfns_gva += op_sz;
542782a7960SDavid Woodhouse 
543782a7960SDavid Woodhouse         err = add_to_physmap_one(xatpb.space, idx, gpfn);
544782a7960SDavid Woodhouse 
545782a7960SDavid Woodhouse         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
546782a7960SDavid Woodhouse             return -EFAULT;
547782a7960SDavid Woodhouse         }
548782a7960SDavid Woodhouse         errs_gva += sizeof(err);
549782a7960SDavid Woodhouse     }
550782a7960SDavid Woodhouse     return 0;
551782a7960SDavid Woodhouse }
552782a7960SDavid Woodhouse 
553fb0fd2ceSJoao Martins static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
554fb0fd2ceSJoao Martins                                    int cmd, uint64_t arg)
555fb0fd2ceSJoao Martins {
556fb0fd2ceSJoao Martins     int err;
557fb0fd2ceSJoao Martins 
558fb0fd2ceSJoao Martins     switch (cmd) {
559fb0fd2ceSJoao Martins     case XENMEM_add_to_physmap:
560fb0fd2ceSJoao Martins         err = do_add_to_physmap(exit, cpu, arg);
561fb0fd2ceSJoao Martins         break;
562fb0fd2ceSJoao Martins 
563782a7960SDavid Woodhouse     case XENMEM_add_to_physmap_batch:
564782a7960SDavid Woodhouse         err = do_add_to_physmap_batch(exit, cpu, arg);
565782a7960SDavid Woodhouse         break;
566782a7960SDavid Woodhouse 
567fb0fd2ceSJoao Martins     default:
568fb0fd2ceSJoao Martins         return false;
569fb0fd2ceSJoao Martins     }
570fb0fd2ceSJoao Martins 
571fb0fd2ceSJoao Martins     exit->u.hcall.result = err;
572fb0fd2ceSJoao Martins     return true;
573fb0fd2ceSJoao Martins }
574fb0fd2ceSJoao Martins 
5755dbcd01aSAnkur Arora static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
5765dbcd01aSAnkur Arora                              uint64_t arg)
5775dbcd01aSAnkur Arora {
5785dbcd01aSAnkur Arora     CPUState *cs = CPU(cpu);
5795dbcd01aSAnkur Arora     struct xen_hvm_param hp;
5805dbcd01aSAnkur Arora     int err = 0;
5815dbcd01aSAnkur Arora 
5825dbcd01aSAnkur Arora     /* No need for 32/64 compat handling */
5835dbcd01aSAnkur Arora     qemu_build_assert(sizeof(hp) == 16);
5845dbcd01aSAnkur Arora 
5855dbcd01aSAnkur Arora     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
5865dbcd01aSAnkur Arora         err = -EFAULT;
5875dbcd01aSAnkur Arora         goto out;
5885dbcd01aSAnkur Arora     }
5895dbcd01aSAnkur Arora 
5905dbcd01aSAnkur Arora     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
5915dbcd01aSAnkur Arora         err = -ESRCH;
5925dbcd01aSAnkur Arora         goto out;
5935dbcd01aSAnkur Arora     }
5945dbcd01aSAnkur Arora 
5955dbcd01aSAnkur Arora     switch (hp.index) {
59691cce756SDavid Woodhouse     case HVM_PARAM_CALLBACK_IRQ:
59791cce756SDavid Woodhouse         err = xen_evtchn_set_callback_param(hp.value);
59891cce756SDavid Woodhouse         xen_set_long_mode(exit->u.hcall.longmode);
59991cce756SDavid Woodhouse         break;
6005dbcd01aSAnkur Arora     default:
6015dbcd01aSAnkur Arora         return false;
6025dbcd01aSAnkur Arora     }
6035dbcd01aSAnkur Arora 
6045dbcd01aSAnkur Arora out:
6055dbcd01aSAnkur Arora     exit->u.hcall.result = err;
6065dbcd01aSAnkur Arora     return true;
6075dbcd01aSAnkur Arora }
6085dbcd01aSAnkur Arora 
609105b47fdSAnkur Arora static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
610105b47fdSAnkur Arora                                               X86CPU *cpu, uint64_t arg)
611105b47fdSAnkur Arora {
612105b47fdSAnkur Arora     struct xen_hvm_evtchn_upcall_vector up;
613105b47fdSAnkur Arora     CPUState *target_cs;
614105b47fdSAnkur Arora 
615105b47fdSAnkur Arora     /* No need for 32/64 compat handling */
616105b47fdSAnkur Arora     qemu_build_assert(sizeof(up) == 8);
617105b47fdSAnkur Arora 
618105b47fdSAnkur Arora     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
619105b47fdSAnkur Arora         return -EFAULT;
620105b47fdSAnkur Arora     }
621105b47fdSAnkur Arora 
622105b47fdSAnkur Arora     if (up.vector < 0x10) {
623105b47fdSAnkur Arora         return -EINVAL;
624105b47fdSAnkur Arora     }
625105b47fdSAnkur Arora 
626105b47fdSAnkur Arora     target_cs = qemu_get_cpu(up.vcpu);
627105b47fdSAnkur Arora     if (!target_cs) {
628105b47fdSAnkur Arora         return -EINVAL;
629105b47fdSAnkur Arora     }
630105b47fdSAnkur Arora 
631105b47fdSAnkur Arora     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
632105b47fdSAnkur Arora                      RUN_ON_CPU_HOST_INT(up.vector));
633105b47fdSAnkur Arora     return 0;
634105b47fdSAnkur Arora }
635105b47fdSAnkur Arora 
636671bfdcdSJoao Martins static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
637671bfdcdSJoao Martins                                  int cmd, uint64_t arg)
638671bfdcdSJoao Martins {
639105b47fdSAnkur Arora     int ret = -ENOSYS;
640671bfdcdSJoao Martins     switch (cmd) {
641105b47fdSAnkur Arora     case HVMOP_set_evtchn_upcall_vector:
642105b47fdSAnkur Arora         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
643105b47fdSAnkur Arora                                                  exit->u.hcall.params[0]);
644105b47fdSAnkur Arora         break;
645105b47fdSAnkur Arora 
646671bfdcdSJoao Martins     case HVMOP_pagetable_dying:
647105b47fdSAnkur Arora         ret = -ENOSYS;
648105b47fdSAnkur Arora         break;
649671bfdcdSJoao Martins 
6505dbcd01aSAnkur Arora     case HVMOP_set_param:
6515dbcd01aSAnkur Arora         return handle_set_param(exit, cpu, arg);
6525dbcd01aSAnkur Arora 
653671bfdcdSJoao Martins     default:
654671bfdcdSJoao Martins         return false;
655671bfdcdSJoao Martins     }
656105b47fdSAnkur Arora 
657105b47fdSAnkur Arora     exit->u.hcall.result = ret;
658105b47fdSAnkur Arora     return true;
659671bfdcdSJoao Martins }
660671bfdcdSJoao Martins 
661c345104cSJoao Martins static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
662c345104cSJoao Martins                                      uint64_t arg)
663c345104cSJoao Martins {
664c345104cSJoao Martins     struct vcpu_register_vcpu_info rvi;
665c345104cSJoao Martins     uint64_t gpa;
666c345104cSJoao Martins 
667c345104cSJoao Martins     /* No need for 32/64 compat handling */
668c345104cSJoao Martins     qemu_build_assert(sizeof(rvi) == 16);
669c345104cSJoao Martins     qemu_build_assert(sizeof(struct vcpu_info) == 64);
670c345104cSJoao Martins 
671c345104cSJoao Martins     if (!target) {
672c345104cSJoao Martins         return -ENOENT;
673c345104cSJoao Martins     }
674c345104cSJoao Martins 
675c345104cSJoao Martins     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
676c345104cSJoao Martins         return -EFAULT;
677c345104cSJoao Martins     }
678c345104cSJoao Martins 
679c345104cSJoao Martins     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
680c345104cSJoao Martins         return -EINVAL;
681c345104cSJoao Martins     }
682c345104cSJoao Martins 
683c345104cSJoao Martins     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
684c345104cSJoao Martins     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
685c345104cSJoao Martins     return 0;
686c345104cSJoao Martins }
687c345104cSJoao Martins 
688f0689302SJoao Martins static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
689f0689302SJoao Martins                                           uint64_t arg)
690f0689302SJoao Martins {
691f0689302SJoao Martins     struct vcpu_register_time_memory_area tma;
692f0689302SJoao Martins     uint64_t gpa;
693f0689302SJoao Martins     size_t len;
694f0689302SJoao Martins 
695f0689302SJoao Martins     /* No need for 32/64 compat handling */
696f0689302SJoao Martins     qemu_build_assert(sizeof(tma) == 8);
697f0689302SJoao Martins     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
698f0689302SJoao Martins 
699f0689302SJoao Martins     if (!target) {
700f0689302SJoao Martins         return -ENOENT;
701f0689302SJoao Martins     }
702f0689302SJoao Martins 
703f0689302SJoao Martins     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
704f0689302SJoao Martins         return -EFAULT;
705f0689302SJoao Martins     }
706f0689302SJoao Martins 
707f0689302SJoao Martins     /*
708f0689302SJoao Martins      * Xen actually uses the GVA and does the translation through the guest
709f0689302SJoao Martins      * page tables each time. But Linux/KVM uses the GPA, on the assumption
710f0689302SJoao Martins      * that guests only ever use *global* addresses (kernel virtual addresses)
711f0689302SJoao Martins      * for it. If Linux is changed to redo the GVA→GPA translation each time,
712f0689302SJoao Martins      * it will offer a new vCPU attribute for that, and we'll use it instead.
713f0689302SJoao Martins      */
714f0689302SJoao Martins     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
715f0689302SJoao Martins         len < sizeof(struct vcpu_time_info)) {
716f0689302SJoao Martins         return -EFAULT;
717f0689302SJoao Martins     }
718f0689302SJoao Martins 
719f0689302SJoao Martins     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
720f0689302SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
721f0689302SJoao Martins     return 0;
722f0689302SJoao Martins }
723f0689302SJoao Martins 
7245092db87SJoao Martins static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
7255092db87SJoao Martins                                          uint64_t arg)
7265092db87SJoao Martins {
7275092db87SJoao Martins     struct vcpu_register_runstate_memory_area rma;
7285092db87SJoao Martins     uint64_t gpa;
7295092db87SJoao Martins     size_t len;
7305092db87SJoao Martins 
7315092db87SJoao Martins     /* No need for 32/64 compat handling */
7325092db87SJoao Martins     qemu_build_assert(sizeof(rma) == 8);
7335092db87SJoao Martins     /* The runstate area actually does change size, but Linux copes. */
7345092db87SJoao Martins 
7355092db87SJoao Martins     if (!target) {
7365092db87SJoao Martins         return -ENOENT;
7375092db87SJoao Martins     }
7385092db87SJoao Martins 
7395092db87SJoao Martins     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
7405092db87SJoao Martins         return -EFAULT;
7415092db87SJoao Martins     }
7425092db87SJoao Martins 
7435092db87SJoao Martins     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
7445092db87SJoao Martins     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
7455092db87SJoao Martins         return -EFAULT;
7465092db87SJoao Martins     }
7475092db87SJoao Martins 
7485092db87SJoao Martins     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
7495092db87SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
7505092db87SJoao Martins     return 0;
7515092db87SJoao Martins }
7525092db87SJoao Martins 
753d70bd6a4SJoao Martins static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
754d70bd6a4SJoao Martins                                   int cmd, int vcpu_id, uint64_t arg)
755d70bd6a4SJoao Martins {
756c345104cSJoao Martins     CPUState *dest = qemu_get_cpu(vcpu_id);
757c345104cSJoao Martins     CPUState *cs = CPU(cpu);
758d70bd6a4SJoao Martins     int err;
759d70bd6a4SJoao Martins 
760d70bd6a4SJoao Martins     switch (cmd) {
7615092db87SJoao Martins     case VCPUOP_register_runstate_memory_area:
7625092db87SJoao Martins         err = vcpuop_register_runstate_info(cs, dest, arg);
7635092db87SJoao Martins         break;
764f0689302SJoao Martins     case VCPUOP_register_vcpu_time_memory_area:
765f0689302SJoao Martins         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
766f0689302SJoao Martins         break;
767d70bd6a4SJoao Martins     case VCPUOP_register_vcpu_info:
768c345104cSJoao Martins         err = vcpuop_register_vcpu_info(cs, dest, arg);
769d70bd6a4SJoao Martins         break;
770d70bd6a4SJoao Martins 
771d70bd6a4SJoao Martins     default:
772d70bd6a4SJoao Martins         return false;
773d70bd6a4SJoao Martins     }
774d70bd6a4SJoao Martins 
775d70bd6a4SJoao Martins     exit->u.hcall.result = err;
776d70bd6a4SJoao Martins     return true;
777d70bd6a4SJoao Martins }
778d70bd6a4SJoao Martins 
7793b06f29bSJoao Martins static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit,
7803b06f29bSJoao Martins                                     int cmd, uint64_t arg)
7813b06f29bSJoao Martins {
7823b06f29bSJoao Martins     int err = -ENOSYS;
7833b06f29bSJoao Martins 
7843b06f29bSJoao Martins     switch (cmd) {
7853b06f29bSJoao Martins     case EVTCHNOP_init_control:
7863b06f29bSJoao Martins     case EVTCHNOP_expand_array:
7873b06f29bSJoao Martins     case EVTCHNOP_set_priority:
7883b06f29bSJoao Martins         /* We do not support FIFO channels at this point */
7893b06f29bSJoao Martins         err = -ENOSYS;
7903b06f29bSJoao Martins         break;
7913b06f29bSJoao Martins 
7923b06f29bSJoao Martins     default:
7933b06f29bSJoao Martins         return false;
7943b06f29bSJoao Martins     }
7953b06f29bSJoao Martins 
7963b06f29bSJoao Martins     exit->u.hcall.result = err;
7973b06f29bSJoao Martins     return true;
7983b06f29bSJoao Martins }
7993b06f29bSJoao Martins 
80079b7067dSJoao Martins int kvm_xen_soft_reset(void)
80179b7067dSJoao Martins {
802c345104cSJoao Martins     CPUState *cpu;
803fb0fd2ceSJoao Martins     int err;
804fb0fd2ceSJoao Martins 
80579b7067dSJoao Martins     assert(qemu_mutex_iothread_locked());
80679b7067dSJoao Martins 
80779b7067dSJoao Martins     trace_kvm_xen_soft_reset();
80879b7067dSJoao Martins 
80991cce756SDavid Woodhouse     /*
81091cce756SDavid Woodhouse      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
81191cce756SDavid Woodhouse      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
81291cce756SDavid Woodhouse      * to deliver to the timer interrupt and treats that as 'disabled'.
81391cce756SDavid Woodhouse      */
81491cce756SDavid Woodhouse     err = xen_evtchn_set_callback_param(0);
81591cce756SDavid Woodhouse     if (err) {
81691cce756SDavid Woodhouse         return err;
81791cce756SDavid Woodhouse     }
81891cce756SDavid Woodhouse 
819c345104cSJoao Martins     CPU_FOREACH(cpu) {
820c345104cSJoao Martins         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
821c345104cSJoao Martins     }
822c345104cSJoao Martins 
823fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(INVALID_GFN);
824fb0fd2ceSJoao Martins     if (err) {
825fb0fd2ceSJoao Martins         return err;
826fb0fd2ceSJoao Martins     }
827fb0fd2ceSJoao Martins 
82879b7067dSJoao Martins     return 0;
82979b7067dSJoao Martins }
83079b7067dSJoao Martins 
83179b7067dSJoao Martins static int schedop_shutdown(CPUState *cs, uint64_t arg)
83279b7067dSJoao Martins {
83379b7067dSJoao Martins     struct sched_shutdown shutdown;
83479b7067dSJoao Martins     int ret = 0;
83579b7067dSJoao Martins 
83679b7067dSJoao Martins     /* No need for 32/64 compat handling */
83779b7067dSJoao Martins     qemu_build_assert(sizeof(shutdown) == 4);
83879b7067dSJoao Martins 
83979b7067dSJoao Martins     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
84079b7067dSJoao Martins         return -EFAULT;
84179b7067dSJoao Martins     }
84279b7067dSJoao Martins 
84379b7067dSJoao Martins     switch (shutdown.reason) {
84479b7067dSJoao Martins     case SHUTDOWN_crash:
84579b7067dSJoao Martins         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
84679b7067dSJoao Martins         qemu_system_guest_panicked(NULL);
84779b7067dSJoao Martins         break;
84879b7067dSJoao Martins 
84979b7067dSJoao Martins     case SHUTDOWN_reboot:
85079b7067dSJoao Martins         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
85179b7067dSJoao Martins         break;
85279b7067dSJoao Martins 
85379b7067dSJoao Martins     case SHUTDOWN_poweroff:
85479b7067dSJoao Martins         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
85579b7067dSJoao Martins         break;
85679b7067dSJoao Martins 
85779b7067dSJoao Martins     case SHUTDOWN_soft_reset:
85879b7067dSJoao Martins         qemu_mutex_lock_iothread();
85979b7067dSJoao Martins         ret = kvm_xen_soft_reset();
86079b7067dSJoao Martins         qemu_mutex_unlock_iothread();
86179b7067dSJoao Martins         break;
86279b7067dSJoao Martins 
86379b7067dSJoao Martins     default:
86479b7067dSJoao Martins         ret = -EINVAL;
86579b7067dSJoao Martins         break;
86679b7067dSJoao Martins     }
86779b7067dSJoao Martins 
86879b7067dSJoao Martins     return ret;
86979b7067dSJoao Martins }
87079b7067dSJoao Martins 
87179b7067dSJoao Martins static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
87279b7067dSJoao Martins                                    int cmd, uint64_t arg)
87379b7067dSJoao Martins {
87479b7067dSJoao Martins     CPUState *cs = CPU(cpu);
87579b7067dSJoao Martins     int err = -ENOSYS;
87679b7067dSJoao Martins 
87779b7067dSJoao Martins     switch (cmd) {
87879b7067dSJoao Martins     case SCHEDOP_shutdown:
87979b7067dSJoao Martins         err = schedop_shutdown(cs, arg);
88079b7067dSJoao Martins         break;
88179b7067dSJoao Martins 
882c789b9efSDavid Woodhouse     case SCHEDOP_poll:
883c789b9efSDavid Woodhouse         /*
884c789b9efSDavid Woodhouse          * Linux will panic if this doesn't work. Just yield; it's not
885c789b9efSDavid Woodhouse          * worth overthinking it because with event channel handling
886c789b9efSDavid Woodhouse          * in KVM, the kernel will intercept this and it will never
887c789b9efSDavid Woodhouse          * reach QEMU anyway. The semantics of the hypercall explicltly
888c789b9efSDavid Woodhouse          * permit spurious wakeups.
889c789b9efSDavid Woodhouse          */
890c789b9efSDavid Woodhouse     case SCHEDOP_yield:
891c789b9efSDavid Woodhouse         sched_yield();
892c789b9efSDavid Woodhouse         err = 0;
893c789b9efSDavid Woodhouse         break;
894c789b9efSDavid Woodhouse 
89579b7067dSJoao Martins     default:
89679b7067dSJoao Martins         return false;
89779b7067dSJoao Martins     }
89879b7067dSJoao Martins 
89979b7067dSJoao Martins     exit->u.hcall.result = err;
90079b7067dSJoao Martins     return true;
90179b7067dSJoao Martins }
90279b7067dSJoao Martins 
90355a3f666SJoao Martins static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
90455a3f666SJoao Martins {
90555a3f666SJoao Martins     uint16_t code = exit->u.hcall.input;
90655a3f666SJoao Martins 
90755a3f666SJoao Martins     if (exit->u.hcall.cpl > 0) {
90855a3f666SJoao Martins         exit->u.hcall.result = -EPERM;
90955a3f666SJoao Martins         return true;
91055a3f666SJoao Martins     }
91155a3f666SJoao Martins 
91255a3f666SJoao Martins     switch (code) {
91379b7067dSJoao Martins     case __HYPERVISOR_sched_op:
91479b7067dSJoao Martins         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
91579b7067dSJoao Martins                                       exit->u.hcall.params[1]);
9163b06f29bSJoao Martins     case __HYPERVISOR_event_channel_op:
9173b06f29bSJoao Martins         return kvm_xen_hcall_evtchn_op(exit, exit->u.hcall.params[0],
9183b06f29bSJoao Martins                                        exit->u.hcall.params[1]);
919d70bd6a4SJoao Martins     case __HYPERVISOR_vcpu_op:
920d70bd6a4SJoao Martins         return kvm_xen_hcall_vcpu_op(exit, cpu,
921d70bd6a4SJoao Martins                                      exit->u.hcall.params[0],
922d70bd6a4SJoao Martins                                      exit->u.hcall.params[1],
923d70bd6a4SJoao Martins                                      exit->u.hcall.params[2]);
924671bfdcdSJoao Martins     case __HYPERVISOR_hvm_op:
925671bfdcdSJoao Martins         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
926671bfdcdSJoao Martins                                     exit->u.hcall.params[1]);
927fb0fd2ceSJoao Martins     case __HYPERVISOR_memory_op:
928fb0fd2ceSJoao Martins         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
929fb0fd2ceSJoao Martins                                        exit->u.hcall.params[1]);
930bedcc139SJoao Martins     case __HYPERVISOR_xen_version:
931bedcc139SJoao Martins         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
932bedcc139SJoao Martins                                          exit->u.hcall.params[1]);
93355a3f666SJoao Martins     default:
93455a3f666SJoao Martins         return false;
93555a3f666SJoao Martins     }
93655a3f666SJoao Martins }
93755a3f666SJoao Martins 
93855a3f666SJoao Martins int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
93955a3f666SJoao Martins {
94055a3f666SJoao Martins     if (exit->type != KVM_EXIT_XEN_HCALL) {
94155a3f666SJoao Martins         return -1;
94255a3f666SJoao Martins     }
94355a3f666SJoao Martins 
944110a0ea5SDavid Woodhouse     /*
945110a0ea5SDavid Woodhouse      * The kernel latches the guest 32/64 mode when the MSR is used to fill
946110a0ea5SDavid Woodhouse      * the hypercall page. So if we see a hypercall in a mode that doesn't
947110a0ea5SDavid Woodhouse      * match our own idea of the guest mode, fetch the kernel's idea of the
948110a0ea5SDavid Woodhouse      * "long mode" to remain in sync.
949110a0ea5SDavid Woodhouse      */
950110a0ea5SDavid Woodhouse     if (exit->u.hcall.longmode != xen_is_long_mode()) {
951110a0ea5SDavid Woodhouse         xen_sync_long_mode();
952110a0ea5SDavid Woodhouse     }
953110a0ea5SDavid Woodhouse 
95455a3f666SJoao Martins     if (!do_kvm_xen_handle_exit(cpu, exit)) {
95555a3f666SJoao Martins         /*
95655a3f666SJoao Martins          * Some hypercalls will be deliberately "implemented" by returning
95755a3f666SJoao Martins          * -ENOSYS. This case is for hypercalls which are unexpected.
95855a3f666SJoao Martins          */
95955a3f666SJoao Martins         exit->u.hcall.result = -ENOSYS;
96055a3f666SJoao Martins         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
96155a3f666SJoao Martins                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
96255a3f666SJoao Martins                       (uint64_t)exit->u.hcall.input,
96355a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[0],
96455a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[1],
96555a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[2]);
96655a3f666SJoao Martins     }
96755a3f666SJoao Martins 
96855a3f666SJoao Martins     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
96955a3f666SJoao Martins                             exit->u.hcall.input, exit->u.hcall.params[0],
97055a3f666SJoao Martins                             exit->u.hcall.params[1], exit->u.hcall.params[2],
97155a3f666SJoao Martins                             exit->u.hcall.result);
97255a3f666SJoao Martins     return 0;
97355a3f666SJoao Martins }
974c345104cSJoao Martins 
975c345104cSJoao Martins int kvm_put_xen_state(CPUState *cs)
976c345104cSJoao Martins {
977c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
978c345104cSJoao Martins     CPUX86State *env = &cpu->env;
979c345104cSJoao Martins     uint64_t gpa;
980c345104cSJoao Martins     int ret;
981c345104cSJoao Martins 
982c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
983c345104cSJoao Martins     if (gpa == INVALID_GPA) {
984c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
985c345104cSJoao Martins     }
986c345104cSJoao Martins 
987c345104cSJoao Martins     if (gpa != INVALID_GPA) {
988*27d4075dSDavid Woodhouse         ret = set_vcpu_info(cs, gpa);
989c345104cSJoao Martins         if (ret < 0) {
990c345104cSJoao Martins             return ret;
991c345104cSJoao Martins         }
992c345104cSJoao Martins     }
993c345104cSJoao Martins 
994f0689302SJoao Martins     gpa = env->xen_vcpu_time_info_gpa;
995f0689302SJoao Martins     if (gpa != INVALID_GPA) {
996f0689302SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
997f0689302SJoao Martins                                     gpa);
998f0689302SJoao Martins         if (ret < 0) {
999f0689302SJoao Martins             return ret;
1000f0689302SJoao Martins         }
1001f0689302SJoao Martins     }
1002f0689302SJoao Martins 
10035092db87SJoao Martins     gpa = env->xen_vcpu_runstate_gpa;
10045092db87SJoao Martins     if (gpa != INVALID_GPA) {
10055092db87SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
10065092db87SJoao Martins                                     gpa);
10075092db87SJoao Martins         if (ret < 0) {
10085092db87SJoao Martins             return ret;
10095092db87SJoao Martins         }
10105092db87SJoao Martins     }
10115092db87SJoao Martins 
1012105b47fdSAnkur Arora     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1013105b47fdSAnkur Arora         return 0;
1014105b47fdSAnkur Arora     }
1015105b47fdSAnkur Arora 
1016105b47fdSAnkur Arora     if (env->xen_vcpu_callback_vector) {
1017105b47fdSAnkur Arora         ret = kvm_xen_set_vcpu_callback_vector(cs);
1018105b47fdSAnkur Arora         if (ret < 0) {
1019105b47fdSAnkur Arora             return ret;
1020105b47fdSAnkur Arora         }
1021105b47fdSAnkur Arora     }
1022105b47fdSAnkur Arora 
1023c345104cSJoao Martins     return 0;
1024c345104cSJoao Martins }
1025c345104cSJoao Martins 
1026c345104cSJoao Martins int kvm_get_xen_state(CPUState *cs)
1027c345104cSJoao Martins {
1028c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
1029c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1030c345104cSJoao Martins     uint64_t gpa;
1031c345104cSJoao Martins 
1032c345104cSJoao Martins     /*
1033c345104cSJoao Martins      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1034c345104cSJoao Martins      * to it. It's up to userspace to *assume* that any page shared thus is
1035c345104cSJoao Martins      * always considered dirty. The shared_info page is different since it's
1036c345104cSJoao Martins      * an overlay and migrated separately anyway.
1037c345104cSJoao Martins      */
1038c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
1039c345104cSJoao Martins     if (gpa == INVALID_GPA) {
1040c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
1041c345104cSJoao Martins     }
1042c345104cSJoao Martins     if (gpa != INVALID_GPA) {
1043c345104cSJoao Martins         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1044c345104cSJoao Martins                                                      gpa,
1045c345104cSJoao Martins                                                      sizeof(struct vcpu_info));
1046c345104cSJoao Martins         if (mrs.mr &&
1047c345104cSJoao Martins             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1048c345104cSJoao Martins             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1049c345104cSJoao Martins                                     sizeof(struct vcpu_info));
1050c345104cSJoao Martins         }
1051c345104cSJoao Martins     }
1052c345104cSJoao Martins 
1053c345104cSJoao Martins     return 0;
1054c345104cSJoao Martins }
1055