xref: /qemu/target/i386/kvm/xen-emu.c (revision b746a77926f6e84bdb35a38a9ee956ac12693757)
161491cf4SDavid Woodhouse /*
261491cf4SDavid Woodhouse  * Xen HVM emulation support in KVM
361491cf4SDavid Woodhouse  *
461491cf4SDavid Woodhouse  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
561491cf4SDavid Woodhouse  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
661491cf4SDavid Woodhouse  *
761491cf4SDavid Woodhouse  * This work is licensed under the terms of the GNU GPL, version 2 or later.
861491cf4SDavid Woodhouse  * See the COPYING file in the top-level directory.
961491cf4SDavid Woodhouse  *
1061491cf4SDavid Woodhouse  */
1161491cf4SDavid Woodhouse 
1261491cf4SDavid Woodhouse #include "qemu/osdep.h"
1355a3f666SJoao Martins #include "qemu/log.h"
1479b7067dSJoao Martins #include "qemu/main-loop.h"
15fb0fd2ceSJoao Martins #include "hw/xen/xen.h"
1661491cf4SDavid Woodhouse #include "sysemu/kvm_int.h"
1761491cf4SDavid Woodhouse #include "sysemu/kvm_xen.h"
1861491cf4SDavid Woodhouse #include "kvm/kvm_i386.h"
19bedcc139SJoao Martins #include "exec/address-spaces.h"
2061491cf4SDavid Woodhouse #include "xen-emu.h"
2155a3f666SJoao Martins #include "trace.h"
2279b7067dSJoao Martins #include "sysemu/runstate.h"
2361491cf4SDavid Woodhouse 
2427d4075dSDavid Woodhouse #include "hw/pci/msi.h"
2527d4075dSDavid Woodhouse #include "hw/i386/apic-msidef.h"
26110a0ea5SDavid Woodhouse #include "hw/i386/kvm/xen_overlay.h"
2791cce756SDavid Woodhouse #include "hw/i386/kvm/xen_evtchn.h"
28a28b0fc0SDavid Woodhouse #include "hw/i386/kvm/xen_gnttab.h"
29110a0ea5SDavid Woodhouse 
30bedcc139SJoao Martins #include "hw/xen/interface/version.h"
3179b7067dSJoao Martins #include "hw/xen/interface/sched.h"
32fb0fd2ceSJoao Martins #include "hw/xen/interface/memory.h"
33671bfdcdSJoao Martins #include "hw/xen/interface/hvm/hvm_op.h"
34105b47fdSAnkur Arora #include "hw/xen/interface/hvm/params.h"
35d70bd6a4SJoao Martins #include "hw/xen/interface/vcpu.h"
363b06f29bSJoao Martins #include "hw/xen/interface/event_channel.h"
3728b7ae94SDavid Woodhouse #include "hw/xen/interface/grant_table.h"
38fb0fd2ceSJoao Martins 
39fb0fd2ceSJoao Martins #include "xen-compat.h"
40fb0fd2ceSJoao Martins 
41*b746a779SJoao Martins static void xen_vcpu_singleshot_timer_event(void *opaque);
42*b746a779SJoao Martins static void xen_vcpu_periodic_timer_event(void *opaque);
43*b746a779SJoao Martins 
44fb0fd2ceSJoao Martins #ifdef TARGET_X86_64
45fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (!(longmode))
46fb0fd2ceSJoao Martins #else
47fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (false)
48fb0fd2ceSJoao Martins #endif
49bedcc139SJoao Martins 
50f0689302SJoao Martins static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
51f0689302SJoao Martins                            size_t *len, bool is_write)
52bedcc139SJoao Martins {
53bedcc139SJoao Martins         struct kvm_translation tr = {
54bedcc139SJoao Martins             .linear_address = gva,
55bedcc139SJoao Martins         };
56bedcc139SJoao Martins 
57f0689302SJoao Martins         if (len) {
58f0689302SJoao Martins             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
59f0689302SJoao Martins         }
60f0689302SJoao Martins 
61f0689302SJoao Martins         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
62f0689302SJoao Martins             (is_write && !tr.writeable)) {
63f0689302SJoao Martins             return false;
64f0689302SJoao Martins         }
65f0689302SJoao Martins         *gpa = tr.physical_address;
66f0689302SJoao Martins         return true;
67f0689302SJoao Martins }
68f0689302SJoao Martins 
69f0689302SJoao Martins static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
70f0689302SJoao Martins                       bool is_write)
71f0689302SJoao Martins {
72f0689302SJoao Martins     uint8_t *buf = (uint8_t *)_buf;
73f0689302SJoao Martins     uint64_t gpa;
74f0689302SJoao Martins     size_t len;
75f0689302SJoao Martins 
76f0689302SJoao Martins     while (sz) {
77f0689302SJoao Martins         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
78f0689302SJoao Martins             return -EFAULT;
79f0689302SJoao Martins         }
80bedcc139SJoao Martins         if (len > sz) {
81bedcc139SJoao Martins             len = sz;
82bedcc139SJoao Martins         }
83bedcc139SJoao Martins 
84f0689302SJoao Martins         cpu_physical_memory_rw(gpa, buf, len, is_write);
85bedcc139SJoao Martins 
86bedcc139SJoao Martins         buf += len;
87bedcc139SJoao Martins         sz -= len;
88bedcc139SJoao Martins         gva += len;
89bedcc139SJoao Martins     }
90bedcc139SJoao Martins 
91bedcc139SJoao Martins     return 0;
92bedcc139SJoao Martins }
93bedcc139SJoao Martins 
94bedcc139SJoao Martins static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
95bedcc139SJoao Martins                                     size_t sz)
96bedcc139SJoao Martins {
97bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, false);
98bedcc139SJoao Martins }
99bedcc139SJoao Martins 
100bedcc139SJoao Martins static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
101bedcc139SJoao Martins                                   size_t sz)
102bedcc139SJoao Martins {
103bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, true);
104bedcc139SJoao Martins }
105bedcc139SJoao Martins 
106f66b8a83SJoao Martins int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
10761491cf4SDavid Woodhouse {
10861491cf4SDavid Woodhouse     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
10961491cf4SDavid Woodhouse         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
11061491cf4SDavid Woodhouse     struct kvm_xen_hvm_config cfg = {
111f66b8a83SJoao Martins         .msr = hypercall_msr,
11261491cf4SDavid Woodhouse         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
11361491cf4SDavid Woodhouse     };
11461491cf4SDavid Woodhouse     int xen_caps, ret;
11561491cf4SDavid Woodhouse 
11661491cf4SDavid Woodhouse     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
11761491cf4SDavid Woodhouse     if (required_caps & ~xen_caps) {
11861491cf4SDavid Woodhouse         error_report("kvm: Xen HVM guest support not present or insufficient");
11961491cf4SDavid Woodhouse         return -ENOSYS;
12061491cf4SDavid Woodhouse     }
12161491cf4SDavid Woodhouse 
12261491cf4SDavid Woodhouse     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
12361491cf4SDavid Woodhouse         struct kvm_xen_hvm_attr ha = {
12461491cf4SDavid Woodhouse             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
12561491cf4SDavid Woodhouse             .u.xen_version = s->xen_version,
12661491cf4SDavid Woodhouse         };
12761491cf4SDavid Woodhouse         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
12861491cf4SDavid Woodhouse 
12961491cf4SDavid Woodhouse         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
13061491cf4SDavid Woodhouse     }
13161491cf4SDavid Woodhouse 
13261491cf4SDavid Woodhouse     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
13361491cf4SDavid Woodhouse     if (ret < 0) {
13461491cf4SDavid Woodhouse         error_report("kvm: Failed to enable Xen HVM support: %s",
13561491cf4SDavid Woodhouse                      strerror(-ret));
13661491cf4SDavid Woodhouse         return ret;
13761491cf4SDavid Woodhouse     }
13861491cf4SDavid Woodhouse 
1392aff696bSDavid Woodhouse     /* If called a second time, don't repeat the rest of the setup. */
1402aff696bSDavid Woodhouse     if (s->xen_caps) {
1412aff696bSDavid Woodhouse         return 0;
1422aff696bSDavid Woodhouse     }
1432aff696bSDavid Woodhouse 
1442aff696bSDavid Woodhouse     /*
1452aff696bSDavid Woodhouse      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
1462aff696bSDavid Woodhouse      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
1472aff696bSDavid Woodhouse      *
1482aff696bSDavid Woodhouse      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
1492aff696bSDavid Woodhouse      * such things to be polled at precisely the right time. We *could* do
1502aff696bSDavid Woodhouse      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
1512aff696bSDavid Woodhouse      * the moment the IRQ is acked, and see if it should be reasserted.
1522aff696bSDavid Woodhouse      *
1532aff696bSDavid Woodhouse      * But the in-kernel irqchip is deprecated, so we're unlikely to add
1542aff696bSDavid Woodhouse      * that support in the kernel. Insist on using the split irqchip mode
1552aff696bSDavid Woodhouse      * instead.
1562aff696bSDavid Woodhouse      *
1572aff696bSDavid Woodhouse      * This leaves us polling for the level going low in QEMU, which lacks
1582aff696bSDavid Woodhouse      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
1592aff696bSDavid Woodhouse      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
1602aff696bSDavid Woodhouse      * the device (for which it has to unmap the device and trap access, for
1612aff696bSDavid Woodhouse      * some period after an IRQ!!). In the Xen case, we do it on exit from
1622aff696bSDavid Woodhouse      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
1632aff696bSDavid Woodhouse      * Which is kind of icky, but less so than the VFIO one. I may fix them
1642aff696bSDavid Woodhouse      * both later...
1652aff696bSDavid Woodhouse      */
1662aff696bSDavid Woodhouse     if (!kvm_kernel_irqchip_split()) {
1672aff696bSDavid Woodhouse         error_report("kvm: Xen support requires kernel-irqchip=split");
1682aff696bSDavid Woodhouse         return -EINVAL;
1692aff696bSDavid Woodhouse     }
1702aff696bSDavid Woodhouse 
17161491cf4SDavid Woodhouse     s->xen_caps = xen_caps;
17261491cf4SDavid Woodhouse     return 0;
17361491cf4SDavid Woodhouse }
17461491cf4SDavid Woodhouse 
1755e691a95SDavid Woodhouse int kvm_xen_init_vcpu(CPUState *cs)
1765e691a95SDavid Woodhouse {
177c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
178c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1795e691a95SDavid Woodhouse     int err;
1805e691a95SDavid Woodhouse 
1815e691a95SDavid Woodhouse     /*
1825e691a95SDavid Woodhouse      * The kernel needs to know the Xen/ACPI vCPU ID because that's
1835e691a95SDavid Woodhouse      * what the guest uses in hypercalls such as timers. It doesn't
1845e691a95SDavid Woodhouse      * match the APIC ID which is generally used for talking to the
1855e691a95SDavid Woodhouse      * kernel about vCPUs. And if vCPU threads race with creating
1865e691a95SDavid Woodhouse      * their KVM vCPUs out of order, it doesn't necessarily match
1875e691a95SDavid Woodhouse      * with the kernel's internal vCPU indices either.
1885e691a95SDavid Woodhouse      */
1895e691a95SDavid Woodhouse     if (kvm_xen_has_cap(EVTCHN_SEND)) {
1905e691a95SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1915e691a95SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
1925e691a95SDavid Woodhouse             .u.vcpu_id = cs->cpu_index,
1935e691a95SDavid Woodhouse         };
1945e691a95SDavid Woodhouse         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
1955e691a95SDavid Woodhouse         if (err) {
1965e691a95SDavid Woodhouse             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
1975e691a95SDavid Woodhouse                          strerror(-err));
1985e691a95SDavid Woodhouse             return err;
1995e691a95SDavid Woodhouse         }
2005e691a95SDavid Woodhouse     }
2015e691a95SDavid Woodhouse 
202c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
203c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
204f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
2055092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
206c345104cSJoao Martins 
207*b746a779SJoao Martins     qemu_mutex_init(&env->xen_timers_lock);
208*b746a779SJoao Martins     env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
209*b746a779SJoao Martins                                              xen_vcpu_singleshot_timer_event,
210*b746a779SJoao Martins                                              cpu);
211*b746a779SJoao Martins     if (!env->xen_singleshot_timer) {
212*b746a779SJoao Martins         return -ENOMEM;
213*b746a779SJoao Martins     }
214*b746a779SJoao Martins     env->xen_singleshot_timer->opaque = cs;
215*b746a779SJoao Martins 
216*b746a779SJoao Martins     env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
217*b746a779SJoao Martins                                            xen_vcpu_periodic_timer_event,
218*b746a779SJoao Martins                                            cpu);
219*b746a779SJoao Martins     if (!env->xen_periodic_timer) {
220*b746a779SJoao Martins         return -ENOMEM;
221*b746a779SJoao Martins     }
222*b746a779SJoao Martins     env->xen_periodic_timer->opaque = cs;
223*b746a779SJoao Martins 
2245e691a95SDavid Woodhouse     return 0;
2255e691a95SDavid Woodhouse }
2265e691a95SDavid Woodhouse 
22761491cf4SDavid Woodhouse uint32_t kvm_xen_get_caps(void)
22861491cf4SDavid Woodhouse {
22961491cf4SDavid Woodhouse     return kvm_state->xen_caps;
23061491cf4SDavid Woodhouse }
23155a3f666SJoao Martins 
232bedcc139SJoao Martins static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
233bedcc139SJoao Martins                                      int cmd, uint64_t arg)
234bedcc139SJoao Martins {
235bedcc139SJoao Martins     int err = 0;
236bedcc139SJoao Martins 
237bedcc139SJoao Martins     switch (cmd) {
238bedcc139SJoao Martins     case XENVER_get_features: {
239bedcc139SJoao Martins         struct xen_feature_info fi;
240bedcc139SJoao Martins 
241bedcc139SJoao Martins         /* No need for 32/64 compat handling */
242bedcc139SJoao Martins         qemu_build_assert(sizeof(fi) == 8);
243bedcc139SJoao Martins 
244bedcc139SJoao Martins         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
245bedcc139SJoao Martins         if (err) {
246bedcc139SJoao Martins             break;
247bedcc139SJoao Martins         }
248bedcc139SJoao Martins 
249bedcc139SJoao Martins         fi.submap = 0;
250bedcc139SJoao Martins         if (fi.submap_idx == 0) {
251bedcc139SJoao Martins             fi.submap |= 1 << XENFEAT_writable_page_tables |
252bedcc139SJoao Martins                          1 << XENFEAT_writable_descriptor_tables |
253bedcc139SJoao Martins                          1 << XENFEAT_auto_translated_physmap |
254105b47fdSAnkur Arora                          1 << XENFEAT_supervisor_mode_kernel |
255*b746a779SJoao Martins                          1 << XENFEAT_hvm_callback_vector |
256*b746a779SJoao Martins                          1 << XENFEAT_hvm_safe_pvclock;
257bedcc139SJoao Martins         }
258bedcc139SJoao Martins 
259bedcc139SJoao Martins         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
260bedcc139SJoao Martins         break;
261bedcc139SJoao Martins     }
262bedcc139SJoao Martins 
263bedcc139SJoao Martins     default:
264bedcc139SJoao Martins         return false;
265bedcc139SJoao Martins     }
266bedcc139SJoao Martins 
267bedcc139SJoao Martins     exit->u.hcall.result = err;
268bedcc139SJoao Martins     return true;
269bedcc139SJoao Martins }
270bedcc139SJoao Martins 
271c345104cSJoao Martins static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
272c345104cSJoao Martins {
273c345104cSJoao Martins     struct kvm_xen_vcpu_attr xhsi;
274c345104cSJoao Martins 
275c345104cSJoao Martins     xhsi.type = type;
276c345104cSJoao Martins     xhsi.u.gpa = gpa;
277c345104cSJoao Martins 
278c345104cSJoao Martins     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
279c345104cSJoao Martins 
280c345104cSJoao Martins     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
281c345104cSJoao Martins }
282c345104cSJoao Martins 
283105b47fdSAnkur Arora static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
284105b47fdSAnkur Arora {
285105b47fdSAnkur Arora     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
286105b47fdSAnkur Arora     struct kvm_xen_vcpu_attr xva;
287105b47fdSAnkur Arora 
288105b47fdSAnkur Arora     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
289105b47fdSAnkur Arora     xva.u.vector = vector;
290105b47fdSAnkur Arora 
291105b47fdSAnkur Arora     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
292105b47fdSAnkur Arora 
293105b47fdSAnkur Arora     return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
294105b47fdSAnkur Arora }
295105b47fdSAnkur Arora 
296105b47fdSAnkur Arora static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
297105b47fdSAnkur Arora {
298105b47fdSAnkur Arora     X86CPU *cpu = X86_CPU(cs);
299105b47fdSAnkur Arora     CPUX86State *env = &cpu->env;
300105b47fdSAnkur Arora 
301105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = data.host_int;
302105b47fdSAnkur Arora 
303105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
304105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
305105b47fdSAnkur Arora     }
306105b47fdSAnkur Arora }
307105b47fdSAnkur Arora 
30827d4075dSDavid Woodhouse static int set_vcpu_info(CPUState *cs, uint64_t gpa)
30927d4075dSDavid Woodhouse {
31027d4075dSDavid Woodhouse     X86CPU *cpu = X86_CPU(cs);
31127d4075dSDavid Woodhouse     CPUX86State *env = &cpu->env;
31227d4075dSDavid Woodhouse     MemoryRegionSection mrs = { .mr = NULL };
31327d4075dSDavid Woodhouse     void *vcpu_info_hva = NULL;
31427d4075dSDavid Woodhouse     int ret;
31527d4075dSDavid Woodhouse 
31627d4075dSDavid Woodhouse     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
31727d4075dSDavid Woodhouse     if (ret || gpa == INVALID_GPA) {
31827d4075dSDavid Woodhouse         goto out;
31927d4075dSDavid Woodhouse     }
32027d4075dSDavid Woodhouse 
32127d4075dSDavid Woodhouse     mrs = memory_region_find(get_system_memory(), gpa,
32227d4075dSDavid Woodhouse                              sizeof(struct vcpu_info));
32327d4075dSDavid Woodhouse     if (mrs.mr && mrs.mr->ram_block &&
32427d4075dSDavid Woodhouse         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
32527d4075dSDavid Woodhouse         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
32627d4075dSDavid Woodhouse                                          mrs.offset_within_region);
32727d4075dSDavid Woodhouse     }
32827d4075dSDavid Woodhouse     if (!vcpu_info_hva) {
32927d4075dSDavid Woodhouse         if (mrs.mr) {
33027d4075dSDavid Woodhouse             memory_region_unref(mrs.mr);
33127d4075dSDavid Woodhouse             mrs.mr = NULL;
33227d4075dSDavid Woodhouse         }
33327d4075dSDavid Woodhouse         ret = -EINVAL;
33427d4075dSDavid Woodhouse     }
33527d4075dSDavid Woodhouse 
33627d4075dSDavid Woodhouse  out:
33727d4075dSDavid Woodhouse     if (env->xen_vcpu_info_mr) {
33827d4075dSDavid Woodhouse         memory_region_unref(env->xen_vcpu_info_mr);
33927d4075dSDavid Woodhouse     }
34027d4075dSDavid Woodhouse     env->xen_vcpu_info_hva = vcpu_info_hva;
34127d4075dSDavid Woodhouse     env->xen_vcpu_info_mr = mrs.mr;
34227d4075dSDavid Woodhouse     return ret;
34327d4075dSDavid Woodhouse }
34427d4075dSDavid Woodhouse 
345c345104cSJoao Martins static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
346c345104cSJoao Martins {
347c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
348c345104cSJoao Martins     CPUX86State *env = &cpu->env;
349c345104cSJoao Martins 
350c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = data.host_ulong;
351c345104cSJoao Martins 
352c345104cSJoao Martins     /* Changing the default does nothing if a vcpu_info was explicitly set. */
353c345104cSJoao Martins     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
35427d4075dSDavid Woodhouse         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
355c345104cSJoao Martins     }
356c345104cSJoao Martins }
357c345104cSJoao Martins 
358c345104cSJoao Martins static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
359c345104cSJoao Martins {
360c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
361c345104cSJoao Martins     CPUX86State *env = &cpu->env;
362c345104cSJoao Martins 
363c345104cSJoao Martins     env->xen_vcpu_info_gpa = data.host_ulong;
364c345104cSJoao Martins 
36527d4075dSDavid Woodhouse     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
36627d4075dSDavid Woodhouse }
36727d4075dSDavid Woodhouse 
36827d4075dSDavid Woodhouse void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
36927d4075dSDavid Woodhouse {
37027d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
37127d4075dSDavid Woodhouse     if (!cs) {
37227d4075dSDavid Woodhouse         return NULL;
37327d4075dSDavid Woodhouse     }
37427d4075dSDavid Woodhouse 
37527d4075dSDavid Woodhouse     return X86_CPU(cs)->env.xen_vcpu_info_hva;
37627d4075dSDavid Woodhouse }
37727d4075dSDavid Woodhouse 
378ddf0fd9aSDavid Woodhouse void kvm_xen_maybe_deassert_callback(CPUState *cs)
379ddf0fd9aSDavid Woodhouse {
380ddf0fd9aSDavid Woodhouse     CPUX86State *env = &X86_CPU(cs)->env;
381ddf0fd9aSDavid Woodhouse     struct vcpu_info *vi = env->xen_vcpu_info_hva;
382ddf0fd9aSDavid Woodhouse     if (!vi) {
383ddf0fd9aSDavid Woodhouse         return;
384ddf0fd9aSDavid Woodhouse     }
385ddf0fd9aSDavid Woodhouse 
386ddf0fd9aSDavid Woodhouse     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
387ddf0fd9aSDavid Woodhouse     if (!vi->evtchn_upcall_pending) {
388ddf0fd9aSDavid Woodhouse         qemu_mutex_lock_iothread();
389ddf0fd9aSDavid Woodhouse         /*
390ddf0fd9aSDavid Woodhouse          * Check again now we have the lock, because it may have been
391ddf0fd9aSDavid Woodhouse          * asserted in the interim. And we don't want to take the lock
392ddf0fd9aSDavid Woodhouse          * every time because this is a fast path.
393ddf0fd9aSDavid Woodhouse          */
394ddf0fd9aSDavid Woodhouse         if (!vi->evtchn_upcall_pending) {
395ddf0fd9aSDavid Woodhouse             X86_CPU(cs)->env.xen_callback_asserted = false;
396ddf0fd9aSDavid Woodhouse             xen_evtchn_set_callback_level(0);
397ddf0fd9aSDavid Woodhouse         }
398ddf0fd9aSDavid Woodhouse         qemu_mutex_unlock_iothread();
399ddf0fd9aSDavid Woodhouse     }
400ddf0fd9aSDavid Woodhouse }
401ddf0fd9aSDavid Woodhouse 
402ddf0fd9aSDavid Woodhouse void kvm_xen_set_callback_asserted(void)
403ddf0fd9aSDavid Woodhouse {
404ddf0fd9aSDavid Woodhouse     CPUState *cs = qemu_get_cpu(0);
405ddf0fd9aSDavid Woodhouse 
406ddf0fd9aSDavid Woodhouse     if (cs) {
407ddf0fd9aSDavid Woodhouse         X86_CPU(cs)->env.xen_callback_asserted = true;
408ddf0fd9aSDavid Woodhouse     }
409ddf0fd9aSDavid Woodhouse }
410ddf0fd9aSDavid Woodhouse 
41127d4075dSDavid Woodhouse void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
41227d4075dSDavid Woodhouse {
41327d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
41427d4075dSDavid Woodhouse     uint8_t vector;
41527d4075dSDavid Woodhouse 
41627d4075dSDavid Woodhouse     if (!cs) {
41727d4075dSDavid Woodhouse         return;
41827d4075dSDavid Woodhouse     }
41927d4075dSDavid Woodhouse 
42027d4075dSDavid Woodhouse     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
42127d4075dSDavid Woodhouse     if (vector) {
42227d4075dSDavid Woodhouse         /*
42327d4075dSDavid Woodhouse          * The per-vCPU callback vector injected via lapic. Just
42427d4075dSDavid Woodhouse          * deliver it as an MSI.
42527d4075dSDavid Woodhouse          */
42627d4075dSDavid Woodhouse         MSIMessage msg = {
42727d4075dSDavid Woodhouse             .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
42827d4075dSDavid Woodhouse             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
42927d4075dSDavid Woodhouse         };
43027d4075dSDavid Woodhouse         kvm_irqchip_send_msi(kvm_state, msg);
43127d4075dSDavid Woodhouse         return;
43227d4075dSDavid Woodhouse     }
43327d4075dSDavid Woodhouse 
43427d4075dSDavid Woodhouse     switch (type) {
43527d4075dSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
43627d4075dSDavid Woodhouse         /*
43727d4075dSDavid Woodhouse          * If the evtchn_upcall_pending field in the vcpu_info is set, then
43827d4075dSDavid Woodhouse          * KVM will automatically deliver the vector on entering the vCPU
43927d4075dSDavid Woodhouse          * so all we have to do is kick it out.
44027d4075dSDavid Woodhouse          */
44127d4075dSDavid Woodhouse         qemu_cpu_kick(cs);
44227d4075dSDavid Woodhouse         break;
443ddf0fd9aSDavid Woodhouse 
444ddf0fd9aSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_GSI:
445ddf0fd9aSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
446ddf0fd9aSDavid Woodhouse         if (vcpu_id == 0) {
447ddf0fd9aSDavid Woodhouse             xen_evtchn_set_callback_level(1);
448ddf0fd9aSDavid Woodhouse         }
449ddf0fd9aSDavid Woodhouse         break;
45027d4075dSDavid Woodhouse     }
451c345104cSJoao Martins }
452c345104cSJoao Martins 
453c723d4c1SDavid Woodhouse static int kvm_xen_set_vcpu_timer(CPUState *cs)
454c723d4c1SDavid Woodhouse {
455c723d4c1SDavid Woodhouse     X86CPU *cpu = X86_CPU(cs);
456c723d4c1SDavid Woodhouse     CPUX86State *env = &cpu->env;
457c723d4c1SDavid Woodhouse 
458c723d4c1SDavid Woodhouse     struct kvm_xen_vcpu_attr va = {
459c723d4c1SDavid Woodhouse         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
460c723d4c1SDavid Woodhouse         .u.timer.port = env->xen_virq[VIRQ_TIMER],
461c723d4c1SDavid Woodhouse         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
462c723d4c1SDavid Woodhouse         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
463c723d4c1SDavid Woodhouse     };
464c723d4c1SDavid Woodhouse 
465c723d4c1SDavid Woodhouse     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
466c723d4c1SDavid Woodhouse }
467c723d4c1SDavid Woodhouse 
468c723d4c1SDavid Woodhouse static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
469c723d4c1SDavid Woodhouse {
470c723d4c1SDavid Woodhouse     kvm_xen_set_vcpu_timer(cs);
471c723d4c1SDavid Woodhouse }
472c723d4c1SDavid Woodhouse 
473c723d4c1SDavid Woodhouse int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
474c723d4c1SDavid Woodhouse {
475c723d4c1SDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
476c723d4c1SDavid Woodhouse 
477c723d4c1SDavid Woodhouse     if (!cs) {
478c723d4c1SDavid Woodhouse         return -ENOENT;
479c723d4c1SDavid Woodhouse     }
480c723d4c1SDavid Woodhouse 
481c723d4c1SDavid Woodhouse     /* cpu.h doesn't include the actual Xen header. */
482c723d4c1SDavid Woodhouse     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
483c723d4c1SDavid Woodhouse 
484c723d4c1SDavid Woodhouse     if (virq >= NR_VIRQS) {
485c723d4c1SDavid Woodhouse         return -EINVAL;
486c723d4c1SDavid Woodhouse     }
487c723d4c1SDavid Woodhouse 
488c723d4c1SDavid Woodhouse     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
489c723d4c1SDavid Woodhouse         return -EEXIST;
490c723d4c1SDavid Woodhouse     }
491c723d4c1SDavid Woodhouse 
492c723d4c1SDavid Woodhouse     X86_CPU(cs)->env.xen_virq[virq] = port;
493c723d4c1SDavid Woodhouse     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
494c723d4c1SDavid Woodhouse         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
495c723d4c1SDavid Woodhouse                          RUN_ON_CPU_HOST_INT(port));
496c723d4c1SDavid Woodhouse     }
497c723d4c1SDavid Woodhouse     return 0;
498c723d4c1SDavid Woodhouse }
499c723d4c1SDavid Woodhouse 
500f0689302SJoao Martins static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
501f0689302SJoao Martins {
502f0689302SJoao Martins     X86CPU *cpu = X86_CPU(cs);
503f0689302SJoao Martins     CPUX86State *env = &cpu->env;
504f0689302SJoao Martins 
505f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = data.host_ulong;
506f0689302SJoao Martins 
507f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
508f0689302SJoao Martins                           env->xen_vcpu_time_info_gpa);
509f0689302SJoao Martins }
510f0689302SJoao Martins 
5115092db87SJoao Martins static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
5125092db87SJoao Martins {
5135092db87SJoao Martins     X86CPU *cpu = X86_CPU(cs);
5145092db87SJoao Martins     CPUX86State *env = &cpu->env;
5155092db87SJoao Martins 
5165092db87SJoao Martins     env->xen_vcpu_runstate_gpa = data.host_ulong;
5175092db87SJoao Martins 
5185092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
5195092db87SJoao Martins                           env->xen_vcpu_runstate_gpa);
5205092db87SJoao Martins }
5215092db87SJoao Martins 
522c345104cSJoao Martins static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
523c345104cSJoao Martins {
524c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
525c345104cSJoao Martins     CPUX86State *env = &cpu->env;
526c345104cSJoao Martins 
527c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
528c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
529f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
5305092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
531105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = 0;
532c723d4c1SDavid Woodhouse     env->xen_singleshot_timer_ns = 0;
533c723d4c1SDavid Woodhouse     memset(env->xen_virq, 0, sizeof(env->xen_virq));
534c345104cSJoao Martins 
53527d4075dSDavid Woodhouse     set_vcpu_info(cs, INVALID_GPA);
536f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
537f0689302SJoao Martins                           INVALID_GPA);
5385092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
5395092db87SJoao Martins                           INVALID_GPA);
540105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
541105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
542c723d4c1SDavid Woodhouse         kvm_xen_set_vcpu_timer(cs);
543105b47fdSAnkur Arora     }
5445092db87SJoao Martins 
545c345104cSJoao Martins }
546c345104cSJoao Martins 
547fb0fd2ceSJoao Martins static int xen_set_shared_info(uint64_t gfn)
548fb0fd2ceSJoao Martins {
549fb0fd2ceSJoao Martins     uint64_t gpa = gfn << TARGET_PAGE_BITS;
550c345104cSJoao Martins     int i, err;
551fb0fd2ceSJoao Martins 
552fb0fd2ceSJoao Martins     QEMU_IOTHREAD_LOCK_GUARD();
553fb0fd2ceSJoao Martins 
554fb0fd2ceSJoao Martins     /*
555fb0fd2ceSJoao Martins      * The xen_overlay device tells KVM about it too, since it had to
556fb0fd2ceSJoao Martins      * do that on migration load anyway (unless we're going to jump
557fb0fd2ceSJoao Martins      * through lots of hoops to maintain the fiction that this isn't
558fb0fd2ceSJoao Martins      * KVM-specific.
559fb0fd2ceSJoao Martins      */
560fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(gpa);
561fb0fd2ceSJoao Martins     if (err) {
562fb0fd2ceSJoao Martins             return err;
563fb0fd2ceSJoao Martins     }
564fb0fd2ceSJoao Martins 
565fb0fd2ceSJoao Martins     trace_kvm_xen_set_shared_info(gfn);
566fb0fd2ceSJoao Martins 
567c345104cSJoao Martins     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
568c345104cSJoao Martins         CPUState *cpu = qemu_get_cpu(i);
569c345104cSJoao Martins         if (cpu) {
570c345104cSJoao Martins             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
571c345104cSJoao Martins                              RUN_ON_CPU_HOST_ULONG(gpa));
572c345104cSJoao Martins         }
573c345104cSJoao Martins         gpa += sizeof(vcpu_info_t);
574c345104cSJoao Martins     }
575c345104cSJoao Martins 
576fb0fd2ceSJoao Martins     return err;
577fb0fd2ceSJoao Martins }
578fb0fd2ceSJoao Martins 
579fb0fd2ceSJoao Martins static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
580fb0fd2ceSJoao Martins {
581fb0fd2ceSJoao Martins     switch (space) {
582fb0fd2ceSJoao Martins     case XENMAPSPACE_shared_info:
583fb0fd2ceSJoao Martins         if (idx > 0) {
584fb0fd2ceSJoao Martins             return -EINVAL;
585fb0fd2ceSJoao Martins         }
586fb0fd2ceSJoao Martins         return xen_set_shared_info(gfn);
587fb0fd2ceSJoao Martins 
588fb0fd2ceSJoao Martins     case XENMAPSPACE_grant_table:
589a28b0fc0SDavid Woodhouse         return xen_gnttab_map_page(idx, gfn);
590a28b0fc0SDavid Woodhouse 
591fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn:
592fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_range:
593fb0fd2ceSJoao Martins         return -ENOTSUP;
594fb0fd2ceSJoao Martins 
595fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_foreign:
596fb0fd2ceSJoao Martins     case XENMAPSPACE_dev_mmio:
597fb0fd2ceSJoao Martins         return -EPERM;
598fb0fd2ceSJoao Martins 
599fb0fd2ceSJoao Martins     default:
600fb0fd2ceSJoao Martins         return -EINVAL;
601fb0fd2ceSJoao Martins     }
602fb0fd2ceSJoao Martins }
603fb0fd2ceSJoao Martins 
604fb0fd2ceSJoao Martins static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
605fb0fd2ceSJoao Martins                              uint64_t arg)
606fb0fd2ceSJoao Martins {
607fb0fd2ceSJoao Martins     struct xen_add_to_physmap xatp;
608fb0fd2ceSJoao Martins     CPUState *cs = CPU(cpu);
609fb0fd2ceSJoao Martins 
610fb0fd2ceSJoao Martins     if (hypercall_compat32(exit->u.hcall.longmode)) {
611fb0fd2ceSJoao Martins         struct compat_xen_add_to_physmap xatp32;
612fb0fd2ceSJoao Martins 
613fb0fd2ceSJoao Martins         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
614fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
615fb0fd2ceSJoao Martins             return -EFAULT;
616fb0fd2ceSJoao Martins         }
617fb0fd2ceSJoao Martins         xatp.domid = xatp32.domid;
618fb0fd2ceSJoao Martins         xatp.size = xatp32.size;
619fb0fd2ceSJoao Martins         xatp.space = xatp32.space;
620fb0fd2ceSJoao Martins         xatp.idx = xatp32.idx;
621fb0fd2ceSJoao Martins         xatp.gpfn = xatp32.gpfn;
622fb0fd2ceSJoao Martins     } else {
623fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
624fb0fd2ceSJoao Martins             return -EFAULT;
625fb0fd2ceSJoao Martins         }
626fb0fd2ceSJoao Martins     }
627fb0fd2ceSJoao Martins 
628fb0fd2ceSJoao Martins     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
629fb0fd2ceSJoao Martins         return -ESRCH;
630fb0fd2ceSJoao Martins     }
631fb0fd2ceSJoao Martins 
632fb0fd2ceSJoao Martins     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
633fb0fd2ceSJoao Martins }
634fb0fd2ceSJoao Martins 
635782a7960SDavid Woodhouse static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
636782a7960SDavid Woodhouse                                    uint64_t arg)
637782a7960SDavid Woodhouse {
638782a7960SDavid Woodhouse     struct xen_add_to_physmap_batch xatpb;
639782a7960SDavid Woodhouse     unsigned long idxs_gva, gpfns_gva, errs_gva;
640782a7960SDavid Woodhouse     CPUState *cs = CPU(cpu);
641782a7960SDavid Woodhouse     size_t op_sz;
642782a7960SDavid Woodhouse 
643782a7960SDavid Woodhouse     if (hypercall_compat32(exit->u.hcall.longmode)) {
644782a7960SDavid Woodhouse         struct compat_xen_add_to_physmap_batch xatpb32;
645782a7960SDavid Woodhouse 
646782a7960SDavid Woodhouse         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
647782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
648782a7960SDavid Woodhouse             return -EFAULT;
649782a7960SDavid Woodhouse         }
650782a7960SDavid Woodhouse         xatpb.domid = xatpb32.domid;
651782a7960SDavid Woodhouse         xatpb.space = xatpb32.space;
652782a7960SDavid Woodhouse         xatpb.size = xatpb32.size;
653782a7960SDavid Woodhouse 
654782a7960SDavid Woodhouse         idxs_gva = xatpb32.idxs.c;
655782a7960SDavid Woodhouse         gpfns_gva = xatpb32.gpfns.c;
656782a7960SDavid Woodhouse         errs_gva = xatpb32.errs.c;
657782a7960SDavid Woodhouse         op_sz = sizeof(uint32_t);
658782a7960SDavid Woodhouse     } else {
659782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
660782a7960SDavid Woodhouse             return -EFAULT;
661782a7960SDavid Woodhouse         }
662782a7960SDavid Woodhouse         op_sz = sizeof(unsigned long);
663782a7960SDavid Woodhouse         idxs_gva = (unsigned long)xatpb.idxs.p;
664782a7960SDavid Woodhouse         gpfns_gva = (unsigned long)xatpb.gpfns.p;
665782a7960SDavid Woodhouse         errs_gva = (unsigned long)xatpb.errs.p;
666782a7960SDavid Woodhouse     }
667782a7960SDavid Woodhouse 
668782a7960SDavid Woodhouse     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
669782a7960SDavid Woodhouse         return -ESRCH;
670782a7960SDavid Woodhouse     }
671782a7960SDavid Woodhouse 
672782a7960SDavid Woodhouse     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
673782a7960SDavid Woodhouse     if (xatpb.space == XENMAPSPACE_gmfn_range) {
674782a7960SDavid Woodhouse         return -EINVAL;
675782a7960SDavid Woodhouse     }
676782a7960SDavid Woodhouse 
677782a7960SDavid Woodhouse     while (xatpb.size--) {
678782a7960SDavid Woodhouse         unsigned long idx = 0;
679782a7960SDavid Woodhouse         unsigned long gpfn = 0;
680782a7960SDavid Woodhouse         int err;
681782a7960SDavid Woodhouse 
682782a7960SDavid Woodhouse         /* For 32-bit compat this only copies the low 32 bits of each */
683782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
684782a7960SDavid Woodhouse             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
685782a7960SDavid Woodhouse             return -EFAULT;
686782a7960SDavid Woodhouse         }
687782a7960SDavid Woodhouse         idxs_gva += op_sz;
688782a7960SDavid Woodhouse         gpfns_gva += op_sz;
689782a7960SDavid Woodhouse 
690782a7960SDavid Woodhouse         err = add_to_physmap_one(xatpb.space, idx, gpfn);
691782a7960SDavid Woodhouse 
692782a7960SDavid Woodhouse         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
693782a7960SDavid Woodhouse             return -EFAULT;
694782a7960SDavid Woodhouse         }
695782a7960SDavid Woodhouse         errs_gva += sizeof(err);
696782a7960SDavid Woodhouse     }
697782a7960SDavid Woodhouse     return 0;
698782a7960SDavid Woodhouse }
699782a7960SDavid Woodhouse 
700fb0fd2ceSJoao Martins static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
701fb0fd2ceSJoao Martins                                    int cmd, uint64_t arg)
702fb0fd2ceSJoao Martins {
703fb0fd2ceSJoao Martins     int err;
704fb0fd2ceSJoao Martins 
705fb0fd2ceSJoao Martins     switch (cmd) {
706fb0fd2ceSJoao Martins     case XENMEM_add_to_physmap:
707fb0fd2ceSJoao Martins         err = do_add_to_physmap(exit, cpu, arg);
708fb0fd2ceSJoao Martins         break;
709fb0fd2ceSJoao Martins 
710782a7960SDavid Woodhouse     case XENMEM_add_to_physmap_batch:
711782a7960SDavid Woodhouse         err = do_add_to_physmap_batch(exit, cpu, arg);
712782a7960SDavid Woodhouse         break;
713782a7960SDavid Woodhouse 
714fb0fd2ceSJoao Martins     default:
715fb0fd2ceSJoao Martins         return false;
716fb0fd2ceSJoao Martins     }
717fb0fd2ceSJoao Martins 
718fb0fd2ceSJoao Martins     exit->u.hcall.result = err;
719fb0fd2ceSJoao Martins     return true;
720fb0fd2ceSJoao Martins }
721fb0fd2ceSJoao Martins 
7225dbcd01aSAnkur Arora static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
7235dbcd01aSAnkur Arora                              uint64_t arg)
7245dbcd01aSAnkur Arora {
7255dbcd01aSAnkur Arora     CPUState *cs = CPU(cpu);
7265dbcd01aSAnkur Arora     struct xen_hvm_param hp;
7275dbcd01aSAnkur Arora     int err = 0;
7285dbcd01aSAnkur Arora 
7295dbcd01aSAnkur Arora     /* No need for 32/64 compat handling */
7305dbcd01aSAnkur Arora     qemu_build_assert(sizeof(hp) == 16);
7315dbcd01aSAnkur Arora 
7325dbcd01aSAnkur Arora     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
7335dbcd01aSAnkur Arora         err = -EFAULT;
7345dbcd01aSAnkur Arora         goto out;
7355dbcd01aSAnkur Arora     }
7365dbcd01aSAnkur Arora 
7375dbcd01aSAnkur Arora     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
7385dbcd01aSAnkur Arora         err = -ESRCH;
7395dbcd01aSAnkur Arora         goto out;
7405dbcd01aSAnkur Arora     }
7415dbcd01aSAnkur Arora 
7425dbcd01aSAnkur Arora     switch (hp.index) {
74391cce756SDavid Woodhouse     case HVM_PARAM_CALLBACK_IRQ:
7442aff696bSDavid Woodhouse         qemu_mutex_lock_iothread();
74591cce756SDavid Woodhouse         err = xen_evtchn_set_callback_param(hp.value);
7462aff696bSDavid Woodhouse         qemu_mutex_unlock_iothread();
74791cce756SDavid Woodhouse         xen_set_long_mode(exit->u.hcall.longmode);
74891cce756SDavid Woodhouse         break;
7495dbcd01aSAnkur Arora     default:
7505dbcd01aSAnkur Arora         return false;
7515dbcd01aSAnkur Arora     }
7525dbcd01aSAnkur Arora 
7535dbcd01aSAnkur Arora out:
7545dbcd01aSAnkur Arora     exit->u.hcall.result = err;
7555dbcd01aSAnkur Arora     return true;
7565dbcd01aSAnkur Arora }
7575dbcd01aSAnkur Arora 
758105b47fdSAnkur Arora static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
759105b47fdSAnkur Arora                                               X86CPU *cpu, uint64_t arg)
760105b47fdSAnkur Arora {
761105b47fdSAnkur Arora     struct xen_hvm_evtchn_upcall_vector up;
762105b47fdSAnkur Arora     CPUState *target_cs;
763105b47fdSAnkur Arora 
764105b47fdSAnkur Arora     /* No need for 32/64 compat handling */
765105b47fdSAnkur Arora     qemu_build_assert(sizeof(up) == 8);
766105b47fdSAnkur Arora 
767105b47fdSAnkur Arora     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
768105b47fdSAnkur Arora         return -EFAULT;
769105b47fdSAnkur Arora     }
770105b47fdSAnkur Arora 
771105b47fdSAnkur Arora     if (up.vector < 0x10) {
772105b47fdSAnkur Arora         return -EINVAL;
773105b47fdSAnkur Arora     }
774105b47fdSAnkur Arora 
775105b47fdSAnkur Arora     target_cs = qemu_get_cpu(up.vcpu);
776105b47fdSAnkur Arora     if (!target_cs) {
777105b47fdSAnkur Arora         return -EINVAL;
778105b47fdSAnkur Arora     }
779105b47fdSAnkur Arora 
780105b47fdSAnkur Arora     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
781105b47fdSAnkur Arora                      RUN_ON_CPU_HOST_INT(up.vector));
782105b47fdSAnkur Arora     return 0;
783105b47fdSAnkur Arora }
784105b47fdSAnkur Arora 
785671bfdcdSJoao Martins static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
786671bfdcdSJoao Martins                                  int cmd, uint64_t arg)
787671bfdcdSJoao Martins {
788105b47fdSAnkur Arora     int ret = -ENOSYS;
789671bfdcdSJoao Martins     switch (cmd) {
790105b47fdSAnkur Arora     case HVMOP_set_evtchn_upcall_vector:
791105b47fdSAnkur Arora         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
792105b47fdSAnkur Arora                                                  exit->u.hcall.params[0]);
793105b47fdSAnkur Arora         break;
794105b47fdSAnkur Arora 
795671bfdcdSJoao Martins     case HVMOP_pagetable_dying:
796105b47fdSAnkur Arora         ret = -ENOSYS;
797105b47fdSAnkur Arora         break;
798671bfdcdSJoao Martins 
7995dbcd01aSAnkur Arora     case HVMOP_set_param:
8005dbcd01aSAnkur Arora         return handle_set_param(exit, cpu, arg);
8015dbcd01aSAnkur Arora 
802671bfdcdSJoao Martins     default:
803671bfdcdSJoao Martins         return false;
804671bfdcdSJoao Martins     }
805105b47fdSAnkur Arora 
806105b47fdSAnkur Arora     exit->u.hcall.result = ret;
807105b47fdSAnkur Arora     return true;
808671bfdcdSJoao Martins }
809671bfdcdSJoao Martins 
810c345104cSJoao Martins static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
811c345104cSJoao Martins                                      uint64_t arg)
812c345104cSJoao Martins {
813c345104cSJoao Martins     struct vcpu_register_vcpu_info rvi;
814c345104cSJoao Martins     uint64_t gpa;
815c345104cSJoao Martins 
816c345104cSJoao Martins     /* No need for 32/64 compat handling */
817c345104cSJoao Martins     qemu_build_assert(sizeof(rvi) == 16);
818c345104cSJoao Martins     qemu_build_assert(sizeof(struct vcpu_info) == 64);
819c345104cSJoao Martins 
820c345104cSJoao Martins     if (!target) {
821c345104cSJoao Martins         return -ENOENT;
822c345104cSJoao Martins     }
823c345104cSJoao Martins 
824c345104cSJoao Martins     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
825c345104cSJoao Martins         return -EFAULT;
826c345104cSJoao Martins     }
827c345104cSJoao Martins 
828c345104cSJoao Martins     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
829c345104cSJoao Martins         return -EINVAL;
830c345104cSJoao Martins     }
831c345104cSJoao Martins 
832c345104cSJoao Martins     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
833c345104cSJoao Martins     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
834c345104cSJoao Martins     return 0;
835c345104cSJoao Martins }
836c345104cSJoao Martins 
837f0689302SJoao Martins static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
838f0689302SJoao Martins                                           uint64_t arg)
839f0689302SJoao Martins {
840f0689302SJoao Martins     struct vcpu_register_time_memory_area tma;
841f0689302SJoao Martins     uint64_t gpa;
842f0689302SJoao Martins     size_t len;
843f0689302SJoao Martins 
844f0689302SJoao Martins     /* No need for 32/64 compat handling */
845f0689302SJoao Martins     qemu_build_assert(sizeof(tma) == 8);
846f0689302SJoao Martins     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
847f0689302SJoao Martins 
848f0689302SJoao Martins     if (!target) {
849f0689302SJoao Martins         return -ENOENT;
850f0689302SJoao Martins     }
851f0689302SJoao Martins 
852f0689302SJoao Martins     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
853f0689302SJoao Martins         return -EFAULT;
854f0689302SJoao Martins     }
855f0689302SJoao Martins 
856f0689302SJoao Martins     /*
857f0689302SJoao Martins      * Xen actually uses the GVA and does the translation through the guest
858f0689302SJoao Martins      * page tables each time. But Linux/KVM uses the GPA, on the assumption
859f0689302SJoao Martins      * that guests only ever use *global* addresses (kernel virtual addresses)
860f0689302SJoao Martins      * for it. If Linux is changed to redo the GVA→GPA translation each time,
861f0689302SJoao Martins      * it will offer a new vCPU attribute for that, and we'll use it instead.
862f0689302SJoao Martins      */
863f0689302SJoao Martins     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
864f0689302SJoao Martins         len < sizeof(struct vcpu_time_info)) {
865f0689302SJoao Martins         return -EFAULT;
866f0689302SJoao Martins     }
867f0689302SJoao Martins 
868f0689302SJoao Martins     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
869f0689302SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
870f0689302SJoao Martins     return 0;
871f0689302SJoao Martins }
872f0689302SJoao Martins 
8735092db87SJoao Martins static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
8745092db87SJoao Martins                                          uint64_t arg)
8755092db87SJoao Martins {
8765092db87SJoao Martins     struct vcpu_register_runstate_memory_area rma;
8775092db87SJoao Martins     uint64_t gpa;
8785092db87SJoao Martins     size_t len;
8795092db87SJoao Martins 
8805092db87SJoao Martins     /* No need for 32/64 compat handling */
8815092db87SJoao Martins     qemu_build_assert(sizeof(rma) == 8);
8825092db87SJoao Martins     /* The runstate area actually does change size, but Linux copes. */
8835092db87SJoao Martins 
8845092db87SJoao Martins     if (!target) {
8855092db87SJoao Martins         return -ENOENT;
8865092db87SJoao Martins     }
8875092db87SJoao Martins 
8885092db87SJoao Martins     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
8895092db87SJoao Martins         return -EFAULT;
8905092db87SJoao Martins     }
8915092db87SJoao Martins 
8925092db87SJoao Martins     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
8935092db87SJoao Martins     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
8945092db87SJoao Martins         return -EFAULT;
8955092db87SJoao Martins     }
8965092db87SJoao Martins 
8975092db87SJoao Martins     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
8985092db87SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
8995092db87SJoao Martins     return 0;
9005092db87SJoao Martins }
9015092db87SJoao Martins 
902*b746a779SJoao Martins static uint64_t kvm_get_current_ns(void)
903*b746a779SJoao Martins {
904*b746a779SJoao Martins     struct kvm_clock_data data;
905*b746a779SJoao Martins     int ret;
906*b746a779SJoao Martins 
907*b746a779SJoao Martins     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
908*b746a779SJoao Martins     if (ret < 0) {
909*b746a779SJoao Martins         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
910*b746a779SJoao Martins                 abort();
911*b746a779SJoao Martins     }
912*b746a779SJoao Martins 
913*b746a779SJoao Martins     return data.clock;
914*b746a779SJoao Martins }
915*b746a779SJoao Martins 
916*b746a779SJoao Martins static void xen_vcpu_singleshot_timer_event(void *opaque)
917*b746a779SJoao Martins {
918*b746a779SJoao Martins     CPUState *cpu = opaque;
919*b746a779SJoao Martins     CPUX86State *env = &X86_CPU(cpu)->env;
920*b746a779SJoao Martins     uint16_t port = env->xen_virq[VIRQ_TIMER];
921*b746a779SJoao Martins 
922*b746a779SJoao Martins     if (likely(port)) {
923*b746a779SJoao Martins         xen_evtchn_set_port(port);
924*b746a779SJoao Martins     }
925*b746a779SJoao Martins 
926*b746a779SJoao Martins     qemu_mutex_lock(&env->xen_timers_lock);
927*b746a779SJoao Martins     env->xen_singleshot_timer_ns = 0;
928*b746a779SJoao Martins     qemu_mutex_unlock(&env->xen_timers_lock);
929*b746a779SJoao Martins }
930*b746a779SJoao Martins 
931*b746a779SJoao Martins static void xen_vcpu_periodic_timer_event(void *opaque)
932*b746a779SJoao Martins {
933*b746a779SJoao Martins     CPUState *cpu = opaque;
934*b746a779SJoao Martins     CPUX86State *env = &X86_CPU(cpu)->env;
935*b746a779SJoao Martins     uint16_t port = env->xen_virq[VIRQ_TIMER];
936*b746a779SJoao Martins     int64_t qemu_now;
937*b746a779SJoao Martins 
938*b746a779SJoao Martins     if (likely(port)) {
939*b746a779SJoao Martins         xen_evtchn_set_port(port);
940*b746a779SJoao Martins     }
941*b746a779SJoao Martins 
942*b746a779SJoao Martins     qemu_mutex_lock(&env->xen_timers_lock);
943*b746a779SJoao Martins 
944*b746a779SJoao Martins     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
945*b746a779SJoao Martins     timer_mod_ns(env->xen_periodic_timer,
946*b746a779SJoao Martins                  qemu_now + env->xen_periodic_timer_period);
947*b746a779SJoao Martins 
948*b746a779SJoao Martins     qemu_mutex_unlock(&env->xen_timers_lock);
949*b746a779SJoao Martins }
950*b746a779SJoao Martins 
951*b746a779SJoao Martins static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
952*b746a779SJoao Martins {
953*b746a779SJoao Martins     CPUX86State *tenv = &X86_CPU(target)->env;
954*b746a779SJoao Martins     int64_t qemu_now;
955*b746a779SJoao Martins 
956*b746a779SJoao Martins     timer_del(tenv->xen_periodic_timer);
957*b746a779SJoao Martins 
958*b746a779SJoao Martins     qemu_mutex_lock(&tenv->xen_timers_lock);
959*b746a779SJoao Martins 
960*b746a779SJoao Martins     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
961*b746a779SJoao Martins     timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
962*b746a779SJoao Martins     tenv->xen_periodic_timer_period = period_ns;
963*b746a779SJoao Martins 
964*b746a779SJoao Martins     qemu_mutex_unlock(&tenv->xen_timers_lock);
965*b746a779SJoao Martins     return 0;
966*b746a779SJoao Martins }
967*b746a779SJoao Martins 
968*b746a779SJoao Martins #define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
969*b746a779SJoao Martins #define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
970*b746a779SJoao Martins #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
971*b746a779SJoao Martins /* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
972*b746a779SJoao Martins #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
973*b746a779SJoao Martins 
974*b746a779SJoao Martins static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
975*b746a779SJoao Martins                                      uint64_t arg)
976*b746a779SJoao Martins {
977*b746a779SJoao Martins     struct vcpu_set_periodic_timer spt;
978*b746a779SJoao Martins 
979*b746a779SJoao Martins     qemu_build_assert(sizeof(spt) == 8);
980*b746a779SJoao Martins     if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
981*b746a779SJoao Martins         return -EFAULT;
982*b746a779SJoao Martins     }
983*b746a779SJoao Martins 
984*b746a779SJoao Martins     if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
985*b746a779SJoao Martins         return -EINVAL;
986*b746a779SJoao Martins     }
987*b746a779SJoao Martins 
988*b746a779SJoao Martins     return do_set_periodic_timer(target, spt.period_ns);
989*b746a779SJoao Martins }
990*b746a779SJoao Martins 
991*b746a779SJoao Martins static int vcpuop_stop_periodic_timer(CPUState *target)
992*b746a779SJoao Martins {
993*b746a779SJoao Martins     CPUX86State *tenv = &X86_CPU(target)->env;
994*b746a779SJoao Martins 
995*b746a779SJoao Martins     qemu_mutex_lock(&tenv->xen_timers_lock);
996*b746a779SJoao Martins 
997*b746a779SJoao Martins     timer_del(tenv->xen_periodic_timer);
998*b746a779SJoao Martins     tenv->xen_periodic_timer_period = 0;
999*b746a779SJoao Martins 
1000*b746a779SJoao Martins     qemu_mutex_unlock(&tenv->xen_timers_lock);
1001*b746a779SJoao Martins     return 0;
1002*b746a779SJoao Martins }
1003*b746a779SJoao Martins 
1004*b746a779SJoao Martins static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1005*b746a779SJoao Martins                                    bool future, bool linux_wa)
1006*b746a779SJoao Martins {
1007*b746a779SJoao Martins     CPUX86State *env = &X86_CPU(cs)->env;
1008*b746a779SJoao Martins     int64_t now = kvm_get_current_ns();
1009*b746a779SJoao Martins     int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1010*b746a779SJoao Martins     int64_t delta = timeout_abs - now;
1011*b746a779SJoao Martins 
1012*b746a779SJoao Martins     if (future && timeout_abs < now) {
1013*b746a779SJoao Martins         return -ETIME;
1014*b746a779SJoao Martins     }
1015*b746a779SJoao Martins 
1016*b746a779SJoao Martins     if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1017*b746a779SJoao Martins                              (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1018*b746a779SJoao Martins         /*
1019*b746a779SJoao Martins          * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1020*b746a779SJoao Martins          * for negative absolute timeout values (caused by integer
1021*b746a779SJoao Martins          * overflow), and for values about 13 days in the future (2^50ns)
1022*b746a779SJoao Martins          * which would be caused by jiffies overflow. For those cases, it
1023*b746a779SJoao Martins          * sets the timeout 100ms in the future (not *too* soon, since if
1024*b746a779SJoao Martins          * a guest really did set a long timeout on purpose we don't want
1025*b746a779SJoao Martins          * to keep churning CPU time by waking it up).
1026*b746a779SJoao Martins          */
1027*b746a779SJoao Martins         delta = (100 * SCALE_MS);
1028*b746a779SJoao Martins         timeout_abs = now + delta;
1029*b746a779SJoao Martins     }
1030*b746a779SJoao Martins 
1031*b746a779SJoao Martins     qemu_mutex_lock(&env->xen_timers_lock);
1032*b746a779SJoao Martins 
1033*b746a779SJoao Martins     timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1034*b746a779SJoao Martins     env->xen_singleshot_timer_ns = now + delta;
1035*b746a779SJoao Martins 
1036*b746a779SJoao Martins     qemu_mutex_unlock(&env->xen_timers_lock);
1037*b746a779SJoao Martins     return 0;
1038*b746a779SJoao Martins }
1039*b746a779SJoao Martins 
1040*b746a779SJoao Martins static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1041*b746a779SJoao Martins {
1042*b746a779SJoao Martins     struct vcpu_set_singleshot_timer sst = { 0 };
1043*b746a779SJoao Martins 
1044*b746a779SJoao Martins     /*
1045*b746a779SJoao Martins      * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1046*b746a779SJoao Martins      * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1047*b746a779SJoao Martins      * that get used are identical, and there's four bytes of padding
1048*b746a779SJoao Martins      * unused at the end. For true Xen compatibility we should attempt
1049*b746a779SJoao Martins      * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1050*b746a779SJoao Martins      * if we can't get the padding too. But that's daft. Just copy what
1051*b746a779SJoao Martins      * we need.
1052*b746a779SJoao Martins      */
1053*b746a779SJoao Martins     qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1054*b746a779SJoao Martins     qemu_build_assert(sizeof(sst) >= 12);
1055*b746a779SJoao Martins 
1056*b746a779SJoao Martins     if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1057*b746a779SJoao Martins         return -EFAULT;
1058*b746a779SJoao Martins     }
1059*b746a779SJoao Martins 
1060*b746a779SJoao Martins     return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
1061*b746a779SJoao Martins                                    !!(sst.flags & VCPU_SSHOTTMR_future),
1062*b746a779SJoao Martins                                    false);
1063*b746a779SJoao Martins }
1064*b746a779SJoao Martins 
1065*b746a779SJoao Martins static int vcpuop_stop_singleshot_timer(CPUState *cs)
1066*b746a779SJoao Martins {
1067*b746a779SJoao Martins     CPUX86State *env = &X86_CPU(cs)->env;
1068*b746a779SJoao Martins 
1069*b746a779SJoao Martins     qemu_mutex_lock(&env->xen_timers_lock);
1070*b746a779SJoao Martins 
1071*b746a779SJoao Martins     timer_del(env->xen_singleshot_timer);
1072*b746a779SJoao Martins     env->xen_singleshot_timer_ns = 0;
1073*b746a779SJoao Martins 
1074*b746a779SJoao Martins     qemu_mutex_unlock(&env->xen_timers_lock);
1075*b746a779SJoao Martins     return 0;
1076*b746a779SJoao Martins }
1077*b746a779SJoao Martins 
1078*b746a779SJoao Martins static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1079*b746a779SJoao Martins                                        uint64_t timeout)
1080*b746a779SJoao Martins {
1081*b746a779SJoao Martins     int err;
1082*b746a779SJoao Martins 
1083*b746a779SJoao Martins     if (unlikely(timeout == 0)) {
1084*b746a779SJoao Martins         err = vcpuop_stop_singleshot_timer(CPU(cpu));
1085*b746a779SJoao Martins     } else {
1086*b746a779SJoao Martins         err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
1087*b746a779SJoao Martins     }
1088*b746a779SJoao Martins     exit->u.hcall.result = err;
1089*b746a779SJoao Martins     return true;
1090*b746a779SJoao Martins }
1091*b746a779SJoao Martins 
1092d70bd6a4SJoao Martins static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1093d70bd6a4SJoao Martins                                   int cmd, int vcpu_id, uint64_t arg)
1094d70bd6a4SJoao Martins {
1095c345104cSJoao Martins     CPUState *cs = CPU(cpu);
1096*b746a779SJoao Martins     CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1097d70bd6a4SJoao Martins     int err;
1098d70bd6a4SJoao Martins 
1099*b746a779SJoao Martins     if (!dest) {
1100*b746a779SJoao Martins         err = -ENOENT;
1101*b746a779SJoao Martins         goto out;
1102*b746a779SJoao Martins     }
1103*b746a779SJoao Martins 
1104d70bd6a4SJoao Martins     switch (cmd) {
11055092db87SJoao Martins     case VCPUOP_register_runstate_memory_area:
11065092db87SJoao Martins         err = vcpuop_register_runstate_info(cs, dest, arg);
11075092db87SJoao Martins         break;
1108f0689302SJoao Martins     case VCPUOP_register_vcpu_time_memory_area:
1109f0689302SJoao Martins         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1110f0689302SJoao Martins         break;
1111d70bd6a4SJoao Martins     case VCPUOP_register_vcpu_info:
1112c345104cSJoao Martins         err = vcpuop_register_vcpu_info(cs, dest, arg);
1113d70bd6a4SJoao Martins         break;
1114*b746a779SJoao Martins     case VCPUOP_set_singleshot_timer: {
1115*b746a779SJoao Martins         if (cs->cpu_index == vcpu_id) {
1116*b746a779SJoao Martins             err = vcpuop_set_singleshot_timer(dest, arg);
1117*b746a779SJoao Martins         } else {
1118*b746a779SJoao Martins             err = -EINVAL;
1119*b746a779SJoao Martins         }
1120*b746a779SJoao Martins         break;
1121*b746a779SJoao Martins     }
1122*b746a779SJoao Martins     case VCPUOP_stop_singleshot_timer:
1123*b746a779SJoao Martins         if (cs->cpu_index == vcpu_id) {
1124*b746a779SJoao Martins             err = vcpuop_stop_singleshot_timer(dest);
1125*b746a779SJoao Martins         } else {
1126*b746a779SJoao Martins             err = -EINVAL;
1127*b746a779SJoao Martins         }
1128*b746a779SJoao Martins         break;
1129*b746a779SJoao Martins     case VCPUOP_set_periodic_timer: {
1130*b746a779SJoao Martins         err = vcpuop_set_periodic_timer(cs, dest, arg);
1131*b746a779SJoao Martins         break;
1132*b746a779SJoao Martins     }
1133*b746a779SJoao Martins     case VCPUOP_stop_periodic_timer:
1134*b746a779SJoao Martins         err = vcpuop_stop_periodic_timer(dest);
1135*b746a779SJoao Martins         break;
1136d70bd6a4SJoao Martins 
1137d70bd6a4SJoao Martins     default:
1138d70bd6a4SJoao Martins         return false;
1139d70bd6a4SJoao Martins     }
1140d70bd6a4SJoao Martins 
1141*b746a779SJoao Martins  out:
1142d70bd6a4SJoao Martins     exit->u.hcall.result = err;
1143d70bd6a4SJoao Martins     return true;
1144d70bd6a4SJoao Martins }
1145d70bd6a4SJoao Martins 
11464858ba20SDavid Woodhouse static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
11473b06f29bSJoao Martins                                     int cmd, uint64_t arg)
11483b06f29bSJoao Martins {
11494858ba20SDavid Woodhouse     CPUState *cs = CPU(cpu);
11503b06f29bSJoao Martins     int err = -ENOSYS;
11513b06f29bSJoao Martins 
11523b06f29bSJoao Martins     switch (cmd) {
11533b06f29bSJoao Martins     case EVTCHNOP_init_control:
11543b06f29bSJoao Martins     case EVTCHNOP_expand_array:
11553b06f29bSJoao Martins     case EVTCHNOP_set_priority:
11563b06f29bSJoao Martins         /* We do not support FIFO channels at this point */
11573b06f29bSJoao Martins         err = -ENOSYS;
11583b06f29bSJoao Martins         break;
11593b06f29bSJoao Martins 
11604858ba20SDavid Woodhouse     case EVTCHNOP_status: {
11614858ba20SDavid Woodhouse         struct evtchn_status status;
11624858ba20SDavid Woodhouse 
11634858ba20SDavid Woodhouse         qemu_build_assert(sizeof(status) == 24);
11644858ba20SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
11654858ba20SDavid Woodhouse             err = -EFAULT;
11664858ba20SDavid Woodhouse             break;
11674858ba20SDavid Woodhouse         }
11684858ba20SDavid Woodhouse 
11694858ba20SDavid Woodhouse         err = xen_evtchn_status_op(&status);
11704858ba20SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
11714858ba20SDavid Woodhouse             err = -EFAULT;
11724858ba20SDavid Woodhouse         }
11734858ba20SDavid Woodhouse         break;
11744858ba20SDavid Woodhouse     }
117583eb5811SDavid Woodhouse     case EVTCHNOP_close: {
117683eb5811SDavid Woodhouse         struct evtchn_close close;
117783eb5811SDavid Woodhouse 
117883eb5811SDavid Woodhouse         qemu_build_assert(sizeof(close) == 4);
117983eb5811SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
118083eb5811SDavid Woodhouse             err = -EFAULT;
118183eb5811SDavid Woodhouse             break;
118283eb5811SDavid Woodhouse         }
118383eb5811SDavid Woodhouse 
118483eb5811SDavid Woodhouse         err = xen_evtchn_close_op(&close);
118583eb5811SDavid Woodhouse         break;
118683eb5811SDavid Woodhouse     }
1187190cc3c0SDavid Woodhouse     case EVTCHNOP_unmask: {
1188190cc3c0SDavid Woodhouse         struct evtchn_unmask unmask;
1189190cc3c0SDavid Woodhouse 
1190190cc3c0SDavid Woodhouse         qemu_build_assert(sizeof(unmask) == 4);
1191190cc3c0SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1192190cc3c0SDavid Woodhouse             err = -EFAULT;
1193190cc3c0SDavid Woodhouse             break;
1194190cc3c0SDavid Woodhouse         }
1195190cc3c0SDavid Woodhouse 
1196190cc3c0SDavid Woodhouse         err = xen_evtchn_unmask_op(&unmask);
1197190cc3c0SDavid Woodhouse         break;
1198190cc3c0SDavid Woodhouse     }
1199c723d4c1SDavid Woodhouse     case EVTCHNOP_bind_virq: {
1200c723d4c1SDavid Woodhouse         struct evtchn_bind_virq virq;
1201c723d4c1SDavid Woodhouse 
1202c723d4c1SDavid Woodhouse         qemu_build_assert(sizeof(virq) == 12);
1203c723d4c1SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1204c723d4c1SDavid Woodhouse             err = -EFAULT;
1205c723d4c1SDavid Woodhouse             break;
1206c723d4c1SDavid Woodhouse         }
1207c723d4c1SDavid Woodhouse 
1208c723d4c1SDavid Woodhouse         err = xen_evtchn_bind_virq_op(&virq);
1209c723d4c1SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1210c723d4c1SDavid Woodhouse             err = -EFAULT;
1211c723d4c1SDavid Woodhouse         }
1212c723d4c1SDavid Woodhouse         break;
1213c723d4c1SDavid Woodhouse     }
1214f5417856SDavid Woodhouse     case EVTCHNOP_bind_ipi: {
1215f5417856SDavid Woodhouse         struct evtchn_bind_ipi ipi;
1216f5417856SDavid Woodhouse 
1217f5417856SDavid Woodhouse         qemu_build_assert(sizeof(ipi) == 8);
1218f5417856SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1219f5417856SDavid Woodhouse             err = -EFAULT;
1220f5417856SDavid Woodhouse             break;
1221f5417856SDavid Woodhouse         }
1222f5417856SDavid Woodhouse 
1223f5417856SDavid Woodhouse         err = xen_evtchn_bind_ipi_op(&ipi);
1224f5417856SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1225f5417856SDavid Woodhouse             err = -EFAULT;
1226f5417856SDavid Woodhouse         }
1227f5417856SDavid Woodhouse         break;
1228f5417856SDavid Woodhouse     }
1229cf7679abSDavid Woodhouse     case EVTCHNOP_send: {
1230cf7679abSDavid Woodhouse         struct evtchn_send send;
1231cf7679abSDavid Woodhouse 
1232cf7679abSDavid Woodhouse         qemu_build_assert(sizeof(send) == 4);
1233cf7679abSDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1234cf7679abSDavid Woodhouse             err = -EFAULT;
1235cf7679abSDavid Woodhouse             break;
1236cf7679abSDavid Woodhouse         }
1237cf7679abSDavid Woodhouse 
1238cf7679abSDavid Woodhouse         err = xen_evtchn_send_op(&send);
1239cf7679abSDavid Woodhouse         break;
1240cf7679abSDavid Woodhouse     }
1241e1db61b8SDavid Woodhouse     case EVTCHNOP_alloc_unbound: {
1242e1db61b8SDavid Woodhouse         struct evtchn_alloc_unbound alloc;
1243e1db61b8SDavid Woodhouse 
1244e1db61b8SDavid Woodhouse         qemu_build_assert(sizeof(alloc) == 8);
1245e1db61b8SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1246e1db61b8SDavid Woodhouse             err = -EFAULT;
1247e1db61b8SDavid Woodhouse             break;
1248e1db61b8SDavid Woodhouse         }
1249e1db61b8SDavid Woodhouse 
1250e1db61b8SDavid Woodhouse         err = xen_evtchn_alloc_unbound_op(&alloc);
1251e1db61b8SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1252e1db61b8SDavid Woodhouse             err = -EFAULT;
1253e1db61b8SDavid Woodhouse         }
1254e1db61b8SDavid Woodhouse         break;
1255e1db61b8SDavid Woodhouse     }
125684327881SDavid Woodhouse     case EVTCHNOP_bind_interdomain: {
125784327881SDavid Woodhouse         struct evtchn_bind_interdomain interdomain;
125884327881SDavid Woodhouse 
125984327881SDavid Woodhouse         qemu_build_assert(sizeof(interdomain) == 12);
126084327881SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
126184327881SDavid Woodhouse             err = -EFAULT;
126284327881SDavid Woodhouse             break;
126384327881SDavid Woodhouse         }
126484327881SDavid Woodhouse 
126584327881SDavid Woodhouse         err = xen_evtchn_bind_interdomain_op(&interdomain);
126684327881SDavid Woodhouse         if (!err &&
126784327881SDavid Woodhouse             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
126884327881SDavid Woodhouse             err = -EFAULT;
126984327881SDavid Woodhouse         }
127084327881SDavid Woodhouse         break;
127184327881SDavid Woodhouse     }
127230667046SDavid Woodhouse     case EVTCHNOP_bind_vcpu: {
127330667046SDavid Woodhouse         struct evtchn_bind_vcpu vcpu;
127430667046SDavid Woodhouse 
127530667046SDavid Woodhouse         qemu_build_assert(sizeof(vcpu) == 8);
127630667046SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
127730667046SDavid Woodhouse             err = -EFAULT;
127830667046SDavid Woodhouse             break;
127930667046SDavid Woodhouse         }
128030667046SDavid Woodhouse 
128130667046SDavid Woodhouse         err = xen_evtchn_bind_vcpu_op(&vcpu);
128230667046SDavid Woodhouse         break;
128330667046SDavid Woodhouse     }
1284a15b1097SDavid Woodhouse     case EVTCHNOP_reset: {
1285a15b1097SDavid Woodhouse         struct evtchn_reset reset;
1286a15b1097SDavid Woodhouse 
1287a15b1097SDavid Woodhouse         qemu_build_assert(sizeof(reset) == 2);
1288a15b1097SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1289a15b1097SDavid Woodhouse             err = -EFAULT;
1290a15b1097SDavid Woodhouse             break;
1291a15b1097SDavid Woodhouse         }
1292a15b1097SDavid Woodhouse 
1293a15b1097SDavid Woodhouse         err = xen_evtchn_reset_op(&reset);
1294a15b1097SDavid Woodhouse         break;
1295a15b1097SDavid Woodhouse     }
12963b06f29bSJoao Martins     default:
12973b06f29bSJoao Martins         return false;
12983b06f29bSJoao Martins     }
12993b06f29bSJoao Martins 
13003b06f29bSJoao Martins     exit->u.hcall.result = err;
13013b06f29bSJoao Martins     return true;
13023b06f29bSJoao Martins }
13033b06f29bSJoao Martins 
130479b7067dSJoao Martins int kvm_xen_soft_reset(void)
130579b7067dSJoao Martins {
1306c345104cSJoao Martins     CPUState *cpu;
1307fb0fd2ceSJoao Martins     int err;
1308fb0fd2ceSJoao Martins 
130979b7067dSJoao Martins     assert(qemu_mutex_iothread_locked());
131079b7067dSJoao Martins 
131179b7067dSJoao Martins     trace_kvm_xen_soft_reset();
131279b7067dSJoao Martins 
1313a15b1097SDavid Woodhouse     err = xen_evtchn_soft_reset();
1314a15b1097SDavid Woodhouse     if (err) {
1315a15b1097SDavid Woodhouse         return err;
1316a15b1097SDavid Woodhouse     }
1317a15b1097SDavid Woodhouse 
131891cce756SDavid Woodhouse     /*
131991cce756SDavid Woodhouse      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
132091cce756SDavid Woodhouse      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
132191cce756SDavid Woodhouse      * to deliver to the timer interrupt and treats that as 'disabled'.
132291cce756SDavid Woodhouse      */
132391cce756SDavid Woodhouse     err = xen_evtchn_set_callback_param(0);
132491cce756SDavid Woodhouse     if (err) {
132591cce756SDavid Woodhouse         return err;
132691cce756SDavid Woodhouse     }
132791cce756SDavid Woodhouse 
1328c345104cSJoao Martins     CPU_FOREACH(cpu) {
1329c345104cSJoao Martins         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1330c345104cSJoao Martins     }
1331c345104cSJoao Martins 
1332fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1333fb0fd2ceSJoao Martins     if (err) {
1334fb0fd2ceSJoao Martins         return err;
1335fb0fd2ceSJoao Martins     }
1336fb0fd2ceSJoao Martins 
133779b7067dSJoao Martins     return 0;
133879b7067dSJoao Martins }
133979b7067dSJoao Martins 
134079b7067dSJoao Martins static int schedop_shutdown(CPUState *cs, uint64_t arg)
134179b7067dSJoao Martins {
134279b7067dSJoao Martins     struct sched_shutdown shutdown;
134379b7067dSJoao Martins     int ret = 0;
134479b7067dSJoao Martins 
134579b7067dSJoao Martins     /* No need for 32/64 compat handling */
134679b7067dSJoao Martins     qemu_build_assert(sizeof(shutdown) == 4);
134779b7067dSJoao Martins 
134879b7067dSJoao Martins     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
134979b7067dSJoao Martins         return -EFAULT;
135079b7067dSJoao Martins     }
135179b7067dSJoao Martins 
135279b7067dSJoao Martins     switch (shutdown.reason) {
135379b7067dSJoao Martins     case SHUTDOWN_crash:
135479b7067dSJoao Martins         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
135579b7067dSJoao Martins         qemu_system_guest_panicked(NULL);
135679b7067dSJoao Martins         break;
135779b7067dSJoao Martins 
135879b7067dSJoao Martins     case SHUTDOWN_reboot:
135979b7067dSJoao Martins         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
136079b7067dSJoao Martins         break;
136179b7067dSJoao Martins 
136279b7067dSJoao Martins     case SHUTDOWN_poweroff:
136379b7067dSJoao Martins         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
136479b7067dSJoao Martins         break;
136579b7067dSJoao Martins 
136679b7067dSJoao Martins     case SHUTDOWN_soft_reset:
136779b7067dSJoao Martins         qemu_mutex_lock_iothread();
136879b7067dSJoao Martins         ret = kvm_xen_soft_reset();
136979b7067dSJoao Martins         qemu_mutex_unlock_iothread();
137079b7067dSJoao Martins         break;
137179b7067dSJoao Martins 
137279b7067dSJoao Martins     default:
137379b7067dSJoao Martins         ret = -EINVAL;
137479b7067dSJoao Martins         break;
137579b7067dSJoao Martins     }
137679b7067dSJoao Martins 
137779b7067dSJoao Martins     return ret;
137879b7067dSJoao Martins }
137979b7067dSJoao Martins 
138079b7067dSJoao Martins static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
138179b7067dSJoao Martins                                    int cmd, uint64_t arg)
138279b7067dSJoao Martins {
138379b7067dSJoao Martins     CPUState *cs = CPU(cpu);
138479b7067dSJoao Martins     int err = -ENOSYS;
138579b7067dSJoao Martins 
138679b7067dSJoao Martins     switch (cmd) {
138779b7067dSJoao Martins     case SCHEDOP_shutdown:
138879b7067dSJoao Martins         err = schedop_shutdown(cs, arg);
138979b7067dSJoao Martins         break;
139079b7067dSJoao Martins 
1391c789b9efSDavid Woodhouse     case SCHEDOP_poll:
1392c789b9efSDavid Woodhouse         /*
1393c789b9efSDavid Woodhouse          * Linux will panic if this doesn't work. Just yield; it's not
1394c789b9efSDavid Woodhouse          * worth overthinking it because with event channel handling
1395c789b9efSDavid Woodhouse          * in KVM, the kernel will intercept this and it will never
1396c789b9efSDavid Woodhouse          * reach QEMU anyway. The semantics of the hypercall explicltly
1397c789b9efSDavid Woodhouse          * permit spurious wakeups.
1398c789b9efSDavid Woodhouse          */
1399c789b9efSDavid Woodhouse     case SCHEDOP_yield:
1400c789b9efSDavid Woodhouse         sched_yield();
1401c789b9efSDavid Woodhouse         err = 0;
1402c789b9efSDavid Woodhouse         break;
1403c789b9efSDavid Woodhouse 
140479b7067dSJoao Martins     default:
140579b7067dSJoao Martins         return false;
140679b7067dSJoao Martins     }
140779b7067dSJoao Martins 
140879b7067dSJoao Martins     exit->u.hcall.result = err;
140979b7067dSJoao Martins     return true;
141079b7067dSJoao Martins }
141179b7067dSJoao Martins 
141228b7ae94SDavid Woodhouse static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
141328b7ae94SDavid Woodhouse                                     int cmd, uint64_t arg, int count)
141428b7ae94SDavid Woodhouse {
141528b7ae94SDavid Woodhouse     CPUState *cs = CPU(cpu);
141628b7ae94SDavid Woodhouse     int err;
141728b7ae94SDavid Woodhouse 
141828b7ae94SDavid Woodhouse     switch (cmd) {
141928b7ae94SDavid Woodhouse     case GNTTABOP_set_version: {
142028b7ae94SDavid Woodhouse         struct gnttab_set_version set;
142128b7ae94SDavid Woodhouse 
142228b7ae94SDavid Woodhouse         qemu_build_assert(sizeof(set) == 4);
142328b7ae94SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
142428b7ae94SDavid Woodhouse             err = -EFAULT;
142528b7ae94SDavid Woodhouse             break;
142628b7ae94SDavid Woodhouse         }
142728b7ae94SDavid Woodhouse 
142828b7ae94SDavid Woodhouse         err = xen_gnttab_set_version_op(&set);
142928b7ae94SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
143028b7ae94SDavid Woodhouse             err = -EFAULT;
143128b7ae94SDavid Woodhouse         }
143228b7ae94SDavid Woodhouse         break;
143328b7ae94SDavid Woodhouse     }
143428b7ae94SDavid Woodhouse     case GNTTABOP_get_version: {
143528b7ae94SDavid Woodhouse         struct gnttab_get_version get;
143628b7ae94SDavid Woodhouse 
143728b7ae94SDavid Woodhouse         qemu_build_assert(sizeof(get) == 8);
143828b7ae94SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
143928b7ae94SDavid Woodhouse             err = -EFAULT;
144028b7ae94SDavid Woodhouse             break;
144128b7ae94SDavid Woodhouse         }
144228b7ae94SDavid Woodhouse 
144328b7ae94SDavid Woodhouse         err = xen_gnttab_get_version_op(&get);
144428b7ae94SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
144528b7ae94SDavid Woodhouse             err = -EFAULT;
144628b7ae94SDavid Woodhouse         }
144728b7ae94SDavid Woodhouse         break;
144828b7ae94SDavid Woodhouse     }
1449b46f9745SDavid Woodhouse     case GNTTABOP_query_size: {
1450b46f9745SDavid Woodhouse         struct gnttab_query_size size;
1451b46f9745SDavid Woodhouse 
1452b46f9745SDavid Woodhouse         qemu_build_assert(sizeof(size) == 16);
1453b46f9745SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1454b46f9745SDavid Woodhouse             err = -EFAULT;
1455b46f9745SDavid Woodhouse             break;
1456b46f9745SDavid Woodhouse         }
1457b46f9745SDavid Woodhouse 
1458b46f9745SDavid Woodhouse         err = xen_gnttab_query_size_op(&size);
1459b46f9745SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1460b46f9745SDavid Woodhouse             err = -EFAULT;
1461b46f9745SDavid Woodhouse         }
1462b46f9745SDavid Woodhouse         break;
1463b46f9745SDavid Woodhouse     }
146428b7ae94SDavid Woodhouse     case GNTTABOP_setup_table:
146528b7ae94SDavid Woodhouse     case GNTTABOP_copy:
146628b7ae94SDavid Woodhouse     case GNTTABOP_map_grant_ref:
146728b7ae94SDavid Woodhouse     case GNTTABOP_unmap_grant_ref:
146828b7ae94SDavid Woodhouse     case GNTTABOP_swap_grant_ref:
146928b7ae94SDavid Woodhouse         return false;
147028b7ae94SDavid Woodhouse 
147128b7ae94SDavid Woodhouse     default:
147228b7ae94SDavid Woodhouse         /* Xen explicitly returns -ENOSYS to HVM guests for all others */
147328b7ae94SDavid Woodhouse         err = -ENOSYS;
147428b7ae94SDavid Woodhouse         break;
147528b7ae94SDavid Woodhouse     }
147628b7ae94SDavid Woodhouse 
147728b7ae94SDavid Woodhouse     exit->u.hcall.result = err;
147828b7ae94SDavid Woodhouse     return true;
147928b7ae94SDavid Woodhouse }
148028b7ae94SDavid Woodhouse 
148155a3f666SJoao Martins static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
148255a3f666SJoao Martins {
148355a3f666SJoao Martins     uint16_t code = exit->u.hcall.input;
148455a3f666SJoao Martins 
148555a3f666SJoao Martins     if (exit->u.hcall.cpl > 0) {
148655a3f666SJoao Martins         exit->u.hcall.result = -EPERM;
148755a3f666SJoao Martins         return true;
148855a3f666SJoao Martins     }
148955a3f666SJoao Martins 
149055a3f666SJoao Martins     switch (code) {
1491*b746a779SJoao Martins     case __HYPERVISOR_set_timer_op:
1492*b746a779SJoao Martins         if (exit->u.hcall.longmode) {
1493*b746a779SJoao Martins             return kvm_xen_hcall_set_timer_op(exit, cpu,
1494*b746a779SJoao Martins                                               exit->u.hcall.params[0]);
1495*b746a779SJoao Martins         } else {
1496*b746a779SJoao Martins             /* In 32-bit mode, the 64-bit timer value is in two args. */
1497*b746a779SJoao Martins             uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1498*b746a779SJoao Martins                 (uint32_t)exit->u.hcall.params[0];
1499*b746a779SJoao Martins             return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1500*b746a779SJoao Martins         }
150128b7ae94SDavid Woodhouse     case __HYPERVISOR_grant_table_op:
150228b7ae94SDavid Woodhouse         return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
150328b7ae94SDavid Woodhouse                                        exit->u.hcall.params[1],
150428b7ae94SDavid Woodhouse                                        exit->u.hcall.params[2]);
150579b7067dSJoao Martins     case __HYPERVISOR_sched_op:
150679b7067dSJoao Martins         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
150779b7067dSJoao Martins                                       exit->u.hcall.params[1]);
15083b06f29bSJoao Martins     case __HYPERVISOR_event_channel_op:
15094858ba20SDavid Woodhouse         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
15103b06f29bSJoao Martins                                        exit->u.hcall.params[1]);
1511d70bd6a4SJoao Martins     case __HYPERVISOR_vcpu_op:
1512d70bd6a4SJoao Martins         return kvm_xen_hcall_vcpu_op(exit, cpu,
1513d70bd6a4SJoao Martins                                      exit->u.hcall.params[0],
1514d70bd6a4SJoao Martins                                      exit->u.hcall.params[1],
1515d70bd6a4SJoao Martins                                      exit->u.hcall.params[2]);
1516671bfdcdSJoao Martins     case __HYPERVISOR_hvm_op:
1517671bfdcdSJoao Martins         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1518671bfdcdSJoao Martins                                     exit->u.hcall.params[1]);
1519fb0fd2ceSJoao Martins     case __HYPERVISOR_memory_op:
1520fb0fd2ceSJoao Martins         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1521fb0fd2ceSJoao Martins                                        exit->u.hcall.params[1]);
1522bedcc139SJoao Martins     case __HYPERVISOR_xen_version:
1523bedcc139SJoao Martins         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1524bedcc139SJoao Martins                                          exit->u.hcall.params[1]);
152555a3f666SJoao Martins     default:
152655a3f666SJoao Martins         return false;
152755a3f666SJoao Martins     }
152855a3f666SJoao Martins }
152955a3f666SJoao Martins 
153055a3f666SJoao Martins int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
153155a3f666SJoao Martins {
153255a3f666SJoao Martins     if (exit->type != KVM_EXIT_XEN_HCALL) {
153355a3f666SJoao Martins         return -1;
153455a3f666SJoao Martins     }
153555a3f666SJoao Martins 
1536110a0ea5SDavid Woodhouse     /*
1537110a0ea5SDavid Woodhouse      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1538110a0ea5SDavid Woodhouse      * the hypercall page. So if we see a hypercall in a mode that doesn't
1539110a0ea5SDavid Woodhouse      * match our own idea of the guest mode, fetch the kernel's idea of the
1540110a0ea5SDavid Woodhouse      * "long mode" to remain in sync.
1541110a0ea5SDavid Woodhouse      */
1542110a0ea5SDavid Woodhouse     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1543110a0ea5SDavid Woodhouse         xen_sync_long_mode();
1544110a0ea5SDavid Woodhouse     }
1545110a0ea5SDavid Woodhouse 
154655a3f666SJoao Martins     if (!do_kvm_xen_handle_exit(cpu, exit)) {
154755a3f666SJoao Martins         /*
154855a3f666SJoao Martins          * Some hypercalls will be deliberately "implemented" by returning
154955a3f666SJoao Martins          * -ENOSYS. This case is for hypercalls which are unexpected.
155055a3f666SJoao Martins          */
155155a3f666SJoao Martins         exit->u.hcall.result = -ENOSYS;
155255a3f666SJoao Martins         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
155355a3f666SJoao Martins                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
155455a3f666SJoao Martins                       (uint64_t)exit->u.hcall.input,
155555a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[0],
155655a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[1],
155755a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[2]);
155855a3f666SJoao Martins     }
155955a3f666SJoao Martins 
156055a3f666SJoao Martins     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
156155a3f666SJoao Martins                             exit->u.hcall.input, exit->u.hcall.params[0],
156255a3f666SJoao Martins                             exit->u.hcall.params[1], exit->u.hcall.params[2],
156355a3f666SJoao Martins                             exit->u.hcall.result);
156455a3f666SJoao Martins     return 0;
156555a3f666SJoao Martins }
1566c345104cSJoao Martins 
15676f43f2eeSDavid Woodhouse uint16_t kvm_xen_get_gnttab_max_frames(void)
15686f43f2eeSDavid Woodhouse {
15696f43f2eeSDavid Woodhouse     KVMState *s = KVM_STATE(current_accel());
15706f43f2eeSDavid Woodhouse     return s->xen_gnttab_max_frames;
15716f43f2eeSDavid Woodhouse }
15726f43f2eeSDavid Woodhouse 
1573c345104cSJoao Martins int kvm_put_xen_state(CPUState *cs)
1574c345104cSJoao Martins {
1575c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
1576c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1577c345104cSJoao Martins     uint64_t gpa;
1578c345104cSJoao Martins     int ret;
1579c345104cSJoao Martins 
1580c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
1581c345104cSJoao Martins     if (gpa == INVALID_GPA) {
1582c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
1583c345104cSJoao Martins     }
1584c345104cSJoao Martins 
1585c345104cSJoao Martins     if (gpa != INVALID_GPA) {
158627d4075dSDavid Woodhouse         ret = set_vcpu_info(cs, gpa);
1587c345104cSJoao Martins         if (ret < 0) {
1588c345104cSJoao Martins             return ret;
1589c345104cSJoao Martins         }
1590c345104cSJoao Martins     }
1591c345104cSJoao Martins 
1592f0689302SJoao Martins     gpa = env->xen_vcpu_time_info_gpa;
1593f0689302SJoao Martins     if (gpa != INVALID_GPA) {
1594f0689302SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1595f0689302SJoao Martins                                     gpa);
1596f0689302SJoao Martins         if (ret < 0) {
1597f0689302SJoao Martins             return ret;
1598f0689302SJoao Martins         }
1599f0689302SJoao Martins     }
1600f0689302SJoao Martins 
16015092db87SJoao Martins     gpa = env->xen_vcpu_runstate_gpa;
16025092db87SJoao Martins     if (gpa != INVALID_GPA) {
16035092db87SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
16045092db87SJoao Martins                                     gpa);
16055092db87SJoao Martins         if (ret < 0) {
16065092db87SJoao Martins             return ret;
16075092db87SJoao Martins         }
16085092db87SJoao Martins     }
16095092db87SJoao Martins 
1610*b746a779SJoao Martins     if (env->xen_periodic_timer_period) {
1611*b746a779SJoao Martins         ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1612*b746a779SJoao Martins         if (ret < 0) {
1613*b746a779SJoao Martins             return ret;
1614*b746a779SJoao Martins         }
1615*b746a779SJoao Martins     }
1616*b746a779SJoao Martins 
1617105b47fdSAnkur Arora     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1618*b746a779SJoao Martins         /*
1619*b746a779SJoao Martins          * If the kernel has EVTCHN_SEND support then it handles timers too,
1620*b746a779SJoao Martins          * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1621*b746a779SJoao Martins          */
1622*b746a779SJoao Martins         if (env->xen_singleshot_timer_ns) {
1623*b746a779SJoao Martins             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1624*b746a779SJoao Martins                                     false, false);
1625*b746a779SJoao Martins             if (ret < 0) {
1626*b746a779SJoao Martins                 return ret;
1627*b746a779SJoao Martins             }
1628*b746a779SJoao Martins         }
1629105b47fdSAnkur Arora         return 0;
1630105b47fdSAnkur Arora     }
1631105b47fdSAnkur Arora 
1632105b47fdSAnkur Arora     if (env->xen_vcpu_callback_vector) {
1633105b47fdSAnkur Arora         ret = kvm_xen_set_vcpu_callback_vector(cs);
1634105b47fdSAnkur Arora         if (ret < 0) {
1635105b47fdSAnkur Arora             return ret;
1636105b47fdSAnkur Arora         }
1637105b47fdSAnkur Arora     }
1638105b47fdSAnkur Arora 
1639c723d4c1SDavid Woodhouse     if (env->xen_virq[VIRQ_TIMER]) {
1640c723d4c1SDavid Woodhouse         ret = kvm_xen_set_vcpu_timer(cs);
1641c723d4c1SDavid Woodhouse         if (ret < 0) {
1642c723d4c1SDavid Woodhouse             return ret;
1643c723d4c1SDavid Woodhouse         }
1644c723d4c1SDavid Woodhouse     }
1645c345104cSJoao Martins     return 0;
1646c345104cSJoao Martins }
1647c345104cSJoao Martins 
1648c345104cSJoao Martins int kvm_get_xen_state(CPUState *cs)
1649c345104cSJoao Martins {
1650c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
1651c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1652c345104cSJoao Martins     uint64_t gpa;
1653c723d4c1SDavid Woodhouse     int ret;
1654c345104cSJoao Martins 
1655c345104cSJoao Martins     /*
1656c345104cSJoao Martins      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1657c345104cSJoao Martins      * to it. It's up to userspace to *assume* that any page shared thus is
1658c345104cSJoao Martins      * always considered dirty. The shared_info page is different since it's
1659c345104cSJoao Martins      * an overlay and migrated separately anyway.
1660c345104cSJoao Martins      */
1661c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
1662c345104cSJoao Martins     if (gpa == INVALID_GPA) {
1663c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
1664c345104cSJoao Martins     }
1665c345104cSJoao Martins     if (gpa != INVALID_GPA) {
1666c345104cSJoao Martins         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1667c345104cSJoao Martins                                                      gpa,
1668c345104cSJoao Martins                                                      sizeof(struct vcpu_info));
1669c345104cSJoao Martins         if (mrs.mr &&
1670c345104cSJoao Martins             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1671c345104cSJoao Martins             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1672c345104cSJoao Martins                                     sizeof(struct vcpu_info));
1673c345104cSJoao Martins         }
1674c345104cSJoao Martins     }
1675c345104cSJoao Martins 
1676c723d4c1SDavid Woodhouse     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1677c723d4c1SDavid Woodhouse         return 0;
1678c723d4c1SDavid Woodhouse     }
1679c723d4c1SDavid Woodhouse 
1680c723d4c1SDavid Woodhouse     /*
1681c723d4c1SDavid Woodhouse      * If the kernel is accelerating timers, read out the current value of the
1682c723d4c1SDavid Woodhouse      * singleshot timer deadline.
1683c723d4c1SDavid Woodhouse      */
1684c723d4c1SDavid Woodhouse     if (env->xen_virq[VIRQ_TIMER]) {
1685c723d4c1SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1686c723d4c1SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1687c723d4c1SDavid Woodhouse         };
1688c723d4c1SDavid Woodhouse         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1689c723d4c1SDavid Woodhouse         if (ret < 0) {
1690c723d4c1SDavid Woodhouse             return ret;
1691c723d4c1SDavid Woodhouse         }
1692c723d4c1SDavid Woodhouse         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1693c723d4c1SDavid Woodhouse     }
1694c723d4c1SDavid Woodhouse 
1695c345104cSJoao Martins     return 0;
1696c345104cSJoao Martins }
1697