xref: /qemu/target/i386/kvm/xen-emu.c (revision a28b0fc0345b1ddc12109de807e6d4ce566c1914)
161491cf4SDavid Woodhouse /*
261491cf4SDavid Woodhouse  * Xen HVM emulation support in KVM
361491cf4SDavid Woodhouse  *
461491cf4SDavid Woodhouse  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
561491cf4SDavid Woodhouse  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
661491cf4SDavid Woodhouse  *
761491cf4SDavid Woodhouse  * This work is licensed under the terms of the GNU GPL, version 2 or later.
861491cf4SDavid Woodhouse  * See the COPYING file in the top-level directory.
961491cf4SDavid Woodhouse  *
1061491cf4SDavid Woodhouse  */
1161491cf4SDavid Woodhouse 
1261491cf4SDavid Woodhouse #include "qemu/osdep.h"
1355a3f666SJoao Martins #include "qemu/log.h"
1479b7067dSJoao Martins #include "qemu/main-loop.h"
15fb0fd2ceSJoao Martins #include "hw/xen/xen.h"
1661491cf4SDavid Woodhouse #include "sysemu/kvm_int.h"
1761491cf4SDavid Woodhouse #include "sysemu/kvm_xen.h"
1861491cf4SDavid Woodhouse #include "kvm/kvm_i386.h"
19bedcc139SJoao Martins #include "exec/address-spaces.h"
2061491cf4SDavid Woodhouse #include "xen-emu.h"
2155a3f666SJoao Martins #include "trace.h"
2279b7067dSJoao Martins #include "sysemu/runstate.h"
2361491cf4SDavid Woodhouse 
2427d4075dSDavid Woodhouse #include "hw/pci/msi.h"
2527d4075dSDavid Woodhouse #include "hw/i386/apic-msidef.h"
26110a0ea5SDavid Woodhouse #include "hw/i386/kvm/xen_overlay.h"
2791cce756SDavid Woodhouse #include "hw/i386/kvm/xen_evtchn.h"
28*a28b0fc0SDavid Woodhouse #include "hw/i386/kvm/xen_gnttab.h"
29110a0ea5SDavid Woodhouse 
30bedcc139SJoao Martins #include "hw/xen/interface/version.h"
3179b7067dSJoao Martins #include "hw/xen/interface/sched.h"
32fb0fd2ceSJoao Martins #include "hw/xen/interface/memory.h"
33671bfdcdSJoao Martins #include "hw/xen/interface/hvm/hvm_op.h"
34105b47fdSAnkur Arora #include "hw/xen/interface/hvm/params.h"
35d70bd6a4SJoao Martins #include "hw/xen/interface/vcpu.h"
363b06f29bSJoao Martins #include "hw/xen/interface/event_channel.h"
37fb0fd2ceSJoao Martins 
38fb0fd2ceSJoao Martins #include "xen-compat.h"
39fb0fd2ceSJoao Martins 
40fb0fd2ceSJoao Martins #ifdef TARGET_X86_64
41fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (!(longmode))
42fb0fd2ceSJoao Martins #else
43fb0fd2ceSJoao Martins #define hypercall_compat32(longmode) (false)
44fb0fd2ceSJoao Martins #endif
45bedcc139SJoao Martins 
46f0689302SJoao Martins static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
47f0689302SJoao Martins                            size_t *len, bool is_write)
48bedcc139SJoao Martins {
49bedcc139SJoao Martins         struct kvm_translation tr = {
50bedcc139SJoao Martins             .linear_address = gva,
51bedcc139SJoao Martins         };
52bedcc139SJoao Martins 
53f0689302SJoao Martins         if (len) {
54f0689302SJoao Martins             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
55f0689302SJoao Martins         }
56f0689302SJoao Martins 
57f0689302SJoao Martins         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
58f0689302SJoao Martins             (is_write && !tr.writeable)) {
59f0689302SJoao Martins             return false;
60f0689302SJoao Martins         }
61f0689302SJoao Martins         *gpa = tr.physical_address;
62f0689302SJoao Martins         return true;
63f0689302SJoao Martins }
64f0689302SJoao Martins 
65f0689302SJoao Martins static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
66f0689302SJoao Martins                       bool is_write)
67f0689302SJoao Martins {
68f0689302SJoao Martins     uint8_t *buf = (uint8_t *)_buf;
69f0689302SJoao Martins     uint64_t gpa;
70f0689302SJoao Martins     size_t len;
71f0689302SJoao Martins 
72f0689302SJoao Martins     while (sz) {
73f0689302SJoao Martins         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
74f0689302SJoao Martins             return -EFAULT;
75f0689302SJoao Martins         }
76bedcc139SJoao Martins         if (len > sz) {
77bedcc139SJoao Martins             len = sz;
78bedcc139SJoao Martins         }
79bedcc139SJoao Martins 
80f0689302SJoao Martins         cpu_physical_memory_rw(gpa, buf, len, is_write);
81bedcc139SJoao Martins 
82bedcc139SJoao Martins         buf += len;
83bedcc139SJoao Martins         sz -= len;
84bedcc139SJoao Martins         gva += len;
85bedcc139SJoao Martins     }
86bedcc139SJoao Martins 
87bedcc139SJoao Martins     return 0;
88bedcc139SJoao Martins }
89bedcc139SJoao Martins 
90bedcc139SJoao Martins static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
91bedcc139SJoao Martins                                     size_t sz)
92bedcc139SJoao Martins {
93bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, false);
94bedcc139SJoao Martins }
95bedcc139SJoao Martins 
96bedcc139SJoao Martins static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
97bedcc139SJoao Martins                                   size_t sz)
98bedcc139SJoao Martins {
99bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, true);
100bedcc139SJoao Martins }
101bedcc139SJoao Martins 
102f66b8a83SJoao Martins int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
10361491cf4SDavid Woodhouse {
10461491cf4SDavid Woodhouse     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
10561491cf4SDavid Woodhouse         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
10661491cf4SDavid Woodhouse     struct kvm_xen_hvm_config cfg = {
107f66b8a83SJoao Martins         .msr = hypercall_msr,
10861491cf4SDavid Woodhouse         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
10961491cf4SDavid Woodhouse     };
11061491cf4SDavid Woodhouse     int xen_caps, ret;
11161491cf4SDavid Woodhouse 
11261491cf4SDavid Woodhouse     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
11361491cf4SDavid Woodhouse     if (required_caps & ~xen_caps) {
11461491cf4SDavid Woodhouse         error_report("kvm: Xen HVM guest support not present or insufficient");
11561491cf4SDavid Woodhouse         return -ENOSYS;
11661491cf4SDavid Woodhouse     }
11761491cf4SDavid Woodhouse 
11861491cf4SDavid Woodhouse     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
11961491cf4SDavid Woodhouse         struct kvm_xen_hvm_attr ha = {
12061491cf4SDavid Woodhouse             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
12161491cf4SDavid Woodhouse             .u.xen_version = s->xen_version,
12261491cf4SDavid Woodhouse         };
12361491cf4SDavid Woodhouse         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
12461491cf4SDavid Woodhouse 
12561491cf4SDavid Woodhouse         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
12661491cf4SDavid Woodhouse     }
12761491cf4SDavid Woodhouse 
12861491cf4SDavid Woodhouse     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
12961491cf4SDavid Woodhouse     if (ret < 0) {
13061491cf4SDavid Woodhouse         error_report("kvm: Failed to enable Xen HVM support: %s",
13161491cf4SDavid Woodhouse                      strerror(-ret));
13261491cf4SDavid Woodhouse         return ret;
13361491cf4SDavid Woodhouse     }
13461491cf4SDavid Woodhouse 
1352aff696bSDavid Woodhouse     /* If called a second time, don't repeat the rest of the setup. */
1362aff696bSDavid Woodhouse     if (s->xen_caps) {
1372aff696bSDavid Woodhouse         return 0;
1382aff696bSDavid Woodhouse     }
1392aff696bSDavid Woodhouse 
1402aff696bSDavid Woodhouse     /*
1412aff696bSDavid Woodhouse      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
1422aff696bSDavid Woodhouse      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
1432aff696bSDavid Woodhouse      *
1442aff696bSDavid Woodhouse      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
1452aff696bSDavid Woodhouse      * such things to be polled at precisely the right time. We *could* do
1462aff696bSDavid Woodhouse      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
1472aff696bSDavid Woodhouse      * the moment the IRQ is acked, and see if it should be reasserted.
1482aff696bSDavid Woodhouse      *
1492aff696bSDavid Woodhouse      * But the in-kernel irqchip is deprecated, so we're unlikely to add
1502aff696bSDavid Woodhouse      * that support in the kernel. Insist on using the split irqchip mode
1512aff696bSDavid Woodhouse      * instead.
1522aff696bSDavid Woodhouse      *
1532aff696bSDavid Woodhouse      * This leaves us polling for the level going low in QEMU, which lacks
1542aff696bSDavid Woodhouse      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
1552aff696bSDavid Woodhouse      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
1562aff696bSDavid Woodhouse      * the device (for which it has to unmap the device and trap access, for
1572aff696bSDavid Woodhouse      * some period after an IRQ!!). In the Xen case, we do it on exit from
1582aff696bSDavid Woodhouse      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
1592aff696bSDavid Woodhouse      * Which is kind of icky, but less so than the VFIO one. I may fix them
1602aff696bSDavid Woodhouse      * both later...
1612aff696bSDavid Woodhouse      */
1622aff696bSDavid Woodhouse     if (!kvm_kernel_irqchip_split()) {
1632aff696bSDavid Woodhouse         error_report("kvm: Xen support requires kernel-irqchip=split");
1642aff696bSDavid Woodhouse         return -EINVAL;
1652aff696bSDavid Woodhouse     }
1662aff696bSDavid Woodhouse 
16761491cf4SDavid Woodhouse     s->xen_caps = xen_caps;
16861491cf4SDavid Woodhouse     return 0;
16961491cf4SDavid Woodhouse }
17061491cf4SDavid Woodhouse 
1715e691a95SDavid Woodhouse int kvm_xen_init_vcpu(CPUState *cs)
1725e691a95SDavid Woodhouse {
173c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
174c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1755e691a95SDavid Woodhouse     int err;
1765e691a95SDavid Woodhouse 
1775e691a95SDavid Woodhouse     /*
1785e691a95SDavid Woodhouse      * The kernel needs to know the Xen/ACPI vCPU ID because that's
1795e691a95SDavid Woodhouse      * what the guest uses in hypercalls such as timers. It doesn't
1805e691a95SDavid Woodhouse      * match the APIC ID which is generally used for talking to the
1815e691a95SDavid Woodhouse      * kernel about vCPUs. And if vCPU threads race with creating
1825e691a95SDavid Woodhouse      * their KVM vCPUs out of order, it doesn't necessarily match
1835e691a95SDavid Woodhouse      * with the kernel's internal vCPU indices either.
1845e691a95SDavid Woodhouse      */
1855e691a95SDavid Woodhouse     if (kvm_xen_has_cap(EVTCHN_SEND)) {
1865e691a95SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1875e691a95SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
1885e691a95SDavid Woodhouse             .u.vcpu_id = cs->cpu_index,
1895e691a95SDavid Woodhouse         };
1905e691a95SDavid Woodhouse         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
1915e691a95SDavid Woodhouse         if (err) {
1925e691a95SDavid Woodhouse             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
1935e691a95SDavid Woodhouse                          strerror(-err));
1945e691a95SDavid Woodhouse             return err;
1955e691a95SDavid Woodhouse         }
1965e691a95SDavid Woodhouse     }
1975e691a95SDavid Woodhouse 
198c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
199c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
200f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
2015092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
202c345104cSJoao Martins 
2035e691a95SDavid Woodhouse     return 0;
2045e691a95SDavid Woodhouse }
2055e691a95SDavid Woodhouse 
20661491cf4SDavid Woodhouse uint32_t kvm_xen_get_caps(void)
20761491cf4SDavid Woodhouse {
20861491cf4SDavid Woodhouse     return kvm_state->xen_caps;
20961491cf4SDavid Woodhouse }
21055a3f666SJoao Martins 
211bedcc139SJoao Martins static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
212bedcc139SJoao Martins                                      int cmd, uint64_t arg)
213bedcc139SJoao Martins {
214bedcc139SJoao Martins     int err = 0;
215bedcc139SJoao Martins 
216bedcc139SJoao Martins     switch (cmd) {
217bedcc139SJoao Martins     case XENVER_get_features: {
218bedcc139SJoao Martins         struct xen_feature_info fi;
219bedcc139SJoao Martins 
220bedcc139SJoao Martins         /* No need for 32/64 compat handling */
221bedcc139SJoao Martins         qemu_build_assert(sizeof(fi) == 8);
222bedcc139SJoao Martins 
223bedcc139SJoao Martins         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
224bedcc139SJoao Martins         if (err) {
225bedcc139SJoao Martins             break;
226bedcc139SJoao Martins         }
227bedcc139SJoao Martins 
228bedcc139SJoao Martins         fi.submap = 0;
229bedcc139SJoao Martins         if (fi.submap_idx == 0) {
230bedcc139SJoao Martins             fi.submap |= 1 << XENFEAT_writable_page_tables |
231bedcc139SJoao Martins                          1 << XENFEAT_writable_descriptor_tables |
232bedcc139SJoao Martins                          1 << XENFEAT_auto_translated_physmap |
233105b47fdSAnkur Arora                          1 << XENFEAT_supervisor_mode_kernel |
234105b47fdSAnkur Arora                          1 << XENFEAT_hvm_callback_vector;
235bedcc139SJoao Martins         }
236bedcc139SJoao Martins 
237bedcc139SJoao Martins         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
238bedcc139SJoao Martins         break;
239bedcc139SJoao Martins     }
240bedcc139SJoao Martins 
241bedcc139SJoao Martins     default:
242bedcc139SJoao Martins         return false;
243bedcc139SJoao Martins     }
244bedcc139SJoao Martins 
245bedcc139SJoao Martins     exit->u.hcall.result = err;
246bedcc139SJoao Martins     return true;
247bedcc139SJoao Martins }
248bedcc139SJoao Martins 
249c345104cSJoao Martins static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
250c345104cSJoao Martins {
251c345104cSJoao Martins     struct kvm_xen_vcpu_attr xhsi;
252c345104cSJoao Martins 
253c345104cSJoao Martins     xhsi.type = type;
254c345104cSJoao Martins     xhsi.u.gpa = gpa;
255c345104cSJoao Martins 
256c345104cSJoao Martins     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
257c345104cSJoao Martins 
258c345104cSJoao Martins     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
259c345104cSJoao Martins }
260c345104cSJoao Martins 
261105b47fdSAnkur Arora static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
262105b47fdSAnkur Arora {
263105b47fdSAnkur Arora     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
264105b47fdSAnkur Arora     struct kvm_xen_vcpu_attr xva;
265105b47fdSAnkur Arora 
266105b47fdSAnkur Arora     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
267105b47fdSAnkur Arora     xva.u.vector = vector;
268105b47fdSAnkur Arora 
269105b47fdSAnkur Arora     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
270105b47fdSAnkur Arora 
271105b47fdSAnkur Arora     return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
272105b47fdSAnkur Arora }
273105b47fdSAnkur Arora 
274105b47fdSAnkur Arora static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
275105b47fdSAnkur Arora {
276105b47fdSAnkur Arora     X86CPU *cpu = X86_CPU(cs);
277105b47fdSAnkur Arora     CPUX86State *env = &cpu->env;
278105b47fdSAnkur Arora 
279105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = data.host_int;
280105b47fdSAnkur Arora 
281105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
282105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
283105b47fdSAnkur Arora     }
284105b47fdSAnkur Arora }
285105b47fdSAnkur Arora 
28627d4075dSDavid Woodhouse static int set_vcpu_info(CPUState *cs, uint64_t gpa)
28727d4075dSDavid Woodhouse {
28827d4075dSDavid Woodhouse     X86CPU *cpu = X86_CPU(cs);
28927d4075dSDavid Woodhouse     CPUX86State *env = &cpu->env;
29027d4075dSDavid Woodhouse     MemoryRegionSection mrs = { .mr = NULL };
29127d4075dSDavid Woodhouse     void *vcpu_info_hva = NULL;
29227d4075dSDavid Woodhouse     int ret;
29327d4075dSDavid Woodhouse 
29427d4075dSDavid Woodhouse     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
29527d4075dSDavid Woodhouse     if (ret || gpa == INVALID_GPA) {
29627d4075dSDavid Woodhouse         goto out;
29727d4075dSDavid Woodhouse     }
29827d4075dSDavid Woodhouse 
29927d4075dSDavid Woodhouse     mrs = memory_region_find(get_system_memory(), gpa,
30027d4075dSDavid Woodhouse                              sizeof(struct vcpu_info));
30127d4075dSDavid Woodhouse     if (mrs.mr && mrs.mr->ram_block &&
30227d4075dSDavid Woodhouse         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
30327d4075dSDavid Woodhouse         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
30427d4075dSDavid Woodhouse                                          mrs.offset_within_region);
30527d4075dSDavid Woodhouse     }
30627d4075dSDavid Woodhouse     if (!vcpu_info_hva) {
30727d4075dSDavid Woodhouse         if (mrs.mr) {
30827d4075dSDavid Woodhouse             memory_region_unref(mrs.mr);
30927d4075dSDavid Woodhouse             mrs.mr = NULL;
31027d4075dSDavid Woodhouse         }
31127d4075dSDavid Woodhouse         ret = -EINVAL;
31227d4075dSDavid Woodhouse     }
31327d4075dSDavid Woodhouse 
31427d4075dSDavid Woodhouse  out:
31527d4075dSDavid Woodhouse     if (env->xen_vcpu_info_mr) {
31627d4075dSDavid Woodhouse         memory_region_unref(env->xen_vcpu_info_mr);
31727d4075dSDavid Woodhouse     }
31827d4075dSDavid Woodhouse     env->xen_vcpu_info_hva = vcpu_info_hva;
31927d4075dSDavid Woodhouse     env->xen_vcpu_info_mr = mrs.mr;
32027d4075dSDavid Woodhouse     return ret;
32127d4075dSDavid Woodhouse }
32227d4075dSDavid Woodhouse 
323c345104cSJoao Martins static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
324c345104cSJoao Martins {
325c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
326c345104cSJoao Martins     CPUX86State *env = &cpu->env;
327c345104cSJoao Martins 
328c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = data.host_ulong;
329c345104cSJoao Martins 
330c345104cSJoao Martins     /* Changing the default does nothing if a vcpu_info was explicitly set. */
331c345104cSJoao Martins     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
33227d4075dSDavid Woodhouse         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
333c345104cSJoao Martins     }
334c345104cSJoao Martins }
335c345104cSJoao Martins 
336c345104cSJoao Martins static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
337c345104cSJoao Martins {
338c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
339c345104cSJoao Martins     CPUX86State *env = &cpu->env;
340c345104cSJoao Martins 
341c345104cSJoao Martins     env->xen_vcpu_info_gpa = data.host_ulong;
342c345104cSJoao Martins 
34327d4075dSDavid Woodhouse     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
34427d4075dSDavid Woodhouse }
34527d4075dSDavid Woodhouse 
34627d4075dSDavid Woodhouse void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
34727d4075dSDavid Woodhouse {
34827d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
34927d4075dSDavid Woodhouse     if (!cs) {
35027d4075dSDavid Woodhouse         return NULL;
35127d4075dSDavid Woodhouse     }
35227d4075dSDavid Woodhouse 
35327d4075dSDavid Woodhouse     return X86_CPU(cs)->env.xen_vcpu_info_hva;
35427d4075dSDavid Woodhouse }
35527d4075dSDavid Woodhouse 
356ddf0fd9aSDavid Woodhouse void kvm_xen_maybe_deassert_callback(CPUState *cs)
357ddf0fd9aSDavid Woodhouse {
358ddf0fd9aSDavid Woodhouse     CPUX86State *env = &X86_CPU(cs)->env;
359ddf0fd9aSDavid Woodhouse     struct vcpu_info *vi = env->xen_vcpu_info_hva;
360ddf0fd9aSDavid Woodhouse     if (!vi) {
361ddf0fd9aSDavid Woodhouse         return;
362ddf0fd9aSDavid Woodhouse     }
363ddf0fd9aSDavid Woodhouse 
364ddf0fd9aSDavid Woodhouse     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
365ddf0fd9aSDavid Woodhouse     if (!vi->evtchn_upcall_pending) {
366ddf0fd9aSDavid Woodhouse         qemu_mutex_lock_iothread();
367ddf0fd9aSDavid Woodhouse         /*
368ddf0fd9aSDavid Woodhouse          * Check again now we have the lock, because it may have been
369ddf0fd9aSDavid Woodhouse          * asserted in the interim. And we don't want to take the lock
370ddf0fd9aSDavid Woodhouse          * every time because this is a fast path.
371ddf0fd9aSDavid Woodhouse          */
372ddf0fd9aSDavid Woodhouse         if (!vi->evtchn_upcall_pending) {
373ddf0fd9aSDavid Woodhouse             X86_CPU(cs)->env.xen_callback_asserted = false;
374ddf0fd9aSDavid Woodhouse             xen_evtchn_set_callback_level(0);
375ddf0fd9aSDavid Woodhouse         }
376ddf0fd9aSDavid Woodhouse         qemu_mutex_unlock_iothread();
377ddf0fd9aSDavid Woodhouse     }
378ddf0fd9aSDavid Woodhouse }
379ddf0fd9aSDavid Woodhouse 
380ddf0fd9aSDavid Woodhouse void kvm_xen_set_callback_asserted(void)
381ddf0fd9aSDavid Woodhouse {
382ddf0fd9aSDavid Woodhouse     CPUState *cs = qemu_get_cpu(0);
383ddf0fd9aSDavid Woodhouse 
384ddf0fd9aSDavid Woodhouse     if (cs) {
385ddf0fd9aSDavid Woodhouse         X86_CPU(cs)->env.xen_callback_asserted = true;
386ddf0fd9aSDavid Woodhouse     }
387ddf0fd9aSDavid Woodhouse }
388ddf0fd9aSDavid Woodhouse 
38927d4075dSDavid Woodhouse void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
39027d4075dSDavid Woodhouse {
39127d4075dSDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
39227d4075dSDavid Woodhouse     uint8_t vector;
39327d4075dSDavid Woodhouse 
39427d4075dSDavid Woodhouse     if (!cs) {
39527d4075dSDavid Woodhouse         return;
39627d4075dSDavid Woodhouse     }
39727d4075dSDavid Woodhouse 
39827d4075dSDavid Woodhouse     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
39927d4075dSDavid Woodhouse     if (vector) {
40027d4075dSDavid Woodhouse         /*
40127d4075dSDavid Woodhouse          * The per-vCPU callback vector injected via lapic. Just
40227d4075dSDavid Woodhouse          * deliver it as an MSI.
40327d4075dSDavid Woodhouse          */
40427d4075dSDavid Woodhouse         MSIMessage msg = {
40527d4075dSDavid Woodhouse             .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
40627d4075dSDavid Woodhouse             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
40727d4075dSDavid Woodhouse         };
40827d4075dSDavid Woodhouse         kvm_irqchip_send_msi(kvm_state, msg);
40927d4075dSDavid Woodhouse         return;
41027d4075dSDavid Woodhouse     }
41127d4075dSDavid Woodhouse 
41227d4075dSDavid Woodhouse     switch (type) {
41327d4075dSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
41427d4075dSDavid Woodhouse         /*
41527d4075dSDavid Woodhouse          * If the evtchn_upcall_pending field in the vcpu_info is set, then
41627d4075dSDavid Woodhouse          * KVM will automatically deliver the vector on entering the vCPU
41727d4075dSDavid Woodhouse          * so all we have to do is kick it out.
41827d4075dSDavid Woodhouse          */
41927d4075dSDavid Woodhouse         qemu_cpu_kick(cs);
42027d4075dSDavid Woodhouse         break;
421ddf0fd9aSDavid Woodhouse 
422ddf0fd9aSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_GSI:
423ddf0fd9aSDavid Woodhouse     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
424ddf0fd9aSDavid Woodhouse         if (vcpu_id == 0) {
425ddf0fd9aSDavid Woodhouse             xen_evtchn_set_callback_level(1);
426ddf0fd9aSDavid Woodhouse         }
427ddf0fd9aSDavid Woodhouse         break;
42827d4075dSDavid Woodhouse     }
429c345104cSJoao Martins }
430c345104cSJoao Martins 
431c723d4c1SDavid Woodhouse static int kvm_xen_set_vcpu_timer(CPUState *cs)
432c723d4c1SDavid Woodhouse {
433c723d4c1SDavid Woodhouse     X86CPU *cpu = X86_CPU(cs);
434c723d4c1SDavid Woodhouse     CPUX86State *env = &cpu->env;
435c723d4c1SDavid Woodhouse 
436c723d4c1SDavid Woodhouse     struct kvm_xen_vcpu_attr va = {
437c723d4c1SDavid Woodhouse         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
438c723d4c1SDavid Woodhouse         .u.timer.port = env->xen_virq[VIRQ_TIMER],
439c723d4c1SDavid Woodhouse         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
440c723d4c1SDavid Woodhouse         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
441c723d4c1SDavid Woodhouse     };
442c723d4c1SDavid Woodhouse 
443c723d4c1SDavid Woodhouse     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
444c723d4c1SDavid Woodhouse }
445c723d4c1SDavid Woodhouse 
446c723d4c1SDavid Woodhouse static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
447c723d4c1SDavid Woodhouse {
448c723d4c1SDavid Woodhouse     kvm_xen_set_vcpu_timer(cs);
449c723d4c1SDavid Woodhouse }
450c723d4c1SDavid Woodhouse 
451c723d4c1SDavid Woodhouse int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
452c723d4c1SDavid Woodhouse {
453c723d4c1SDavid Woodhouse     CPUState *cs = qemu_get_cpu(vcpu_id);
454c723d4c1SDavid Woodhouse 
455c723d4c1SDavid Woodhouse     if (!cs) {
456c723d4c1SDavid Woodhouse         return -ENOENT;
457c723d4c1SDavid Woodhouse     }
458c723d4c1SDavid Woodhouse 
459c723d4c1SDavid Woodhouse     /* cpu.h doesn't include the actual Xen header. */
460c723d4c1SDavid Woodhouse     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
461c723d4c1SDavid Woodhouse 
462c723d4c1SDavid Woodhouse     if (virq >= NR_VIRQS) {
463c723d4c1SDavid Woodhouse         return -EINVAL;
464c723d4c1SDavid Woodhouse     }
465c723d4c1SDavid Woodhouse 
466c723d4c1SDavid Woodhouse     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
467c723d4c1SDavid Woodhouse         return -EEXIST;
468c723d4c1SDavid Woodhouse     }
469c723d4c1SDavid Woodhouse 
470c723d4c1SDavid Woodhouse     X86_CPU(cs)->env.xen_virq[virq] = port;
471c723d4c1SDavid Woodhouse     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
472c723d4c1SDavid Woodhouse         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
473c723d4c1SDavid Woodhouse                          RUN_ON_CPU_HOST_INT(port));
474c723d4c1SDavid Woodhouse     }
475c723d4c1SDavid Woodhouse     return 0;
476c723d4c1SDavid Woodhouse }
477c723d4c1SDavid Woodhouse 
478f0689302SJoao Martins static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
479f0689302SJoao Martins {
480f0689302SJoao Martins     X86CPU *cpu = X86_CPU(cs);
481f0689302SJoao Martins     CPUX86State *env = &cpu->env;
482f0689302SJoao Martins 
483f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = data.host_ulong;
484f0689302SJoao Martins 
485f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
486f0689302SJoao Martins                           env->xen_vcpu_time_info_gpa);
487f0689302SJoao Martins }
488f0689302SJoao Martins 
4895092db87SJoao Martins static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
4905092db87SJoao Martins {
4915092db87SJoao Martins     X86CPU *cpu = X86_CPU(cs);
4925092db87SJoao Martins     CPUX86State *env = &cpu->env;
4935092db87SJoao Martins 
4945092db87SJoao Martins     env->xen_vcpu_runstate_gpa = data.host_ulong;
4955092db87SJoao Martins 
4965092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
4975092db87SJoao Martins                           env->xen_vcpu_runstate_gpa);
4985092db87SJoao Martins }
4995092db87SJoao Martins 
500c345104cSJoao Martins static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
501c345104cSJoao Martins {
502c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
503c345104cSJoao Martins     CPUX86State *env = &cpu->env;
504c345104cSJoao Martins 
505c345104cSJoao Martins     env->xen_vcpu_info_gpa = INVALID_GPA;
506c345104cSJoao Martins     env->xen_vcpu_info_default_gpa = INVALID_GPA;
507f0689302SJoao Martins     env->xen_vcpu_time_info_gpa = INVALID_GPA;
5085092db87SJoao Martins     env->xen_vcpu_runstate_gpa = INVALID_GPA;
509105b47fdSAnkur Arora     env->xen_vcpu_callback_vector = 0;
510c723d4c1SDavid Woodhouse     env->xen_singleshot_timer_ns = 0;
511c723d4c1SDavid Woodhouse     memset(env->xen_virq, 0, sizeof(env->xen_virq));
512c345104cSJoao Martins 
51327d4075dSDavid Woodhouse     set_vcpu_info(cs, INVALID_GPA);
514f0689302SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
515f0689302SJoao Martins                           INVALID_GPA);
5165092db87SJoao Martins     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
5175092db87SJoao Martins                           INVALID_GPA);
518105b47fdSAnkur Arora     if (kvm_xen_has_cap(EVTCHN_SEND)) {
519105b47fdSAnkur Arora         kvm_xen_set_vcpu_callback_vector(cs);
520c723d4c1SDavid Woodhouse         kvm_xen_set_vcpu_timer(cs);
521105b47fdSAnkur Arora     }
5225092db87SJoao Martins 
523c345104cSJoao Martins }
524c345104cSJoao Martins 
525fb0fd2ceSJoao Martins static int xen_set_shared_info(uint64_t gfn)
526fb0fd2ceSJoao Martins {
527fb0fd2ceSJoao Martins     uint64_t gpa = gfn << TARGET_PAGE_BITS;
528c345104cSJoao Martins     int i, err;
529fb0fd2ceSJoao Martins 
530fb0fd2ceSJoao Martins     QEMU_IOTHREAD_LOCK_GUARD();
531fb0fd2ceSJoao Martins 
532fb0fd2ceSJoao Martins     /*
533fb0fd2ceSJoao Martins      * The xen_overlay device tells KVM about it too, since it had to
534fb0fd2ceSJoao Martins      * do that on migration load anyway (unless we're going to jump
535fb0fd2ceSJoao Martins      * through lots of hoops to maintain the fiction that this isn't
536fb0fd2ceSJoao Martins      * KVM-specific.
537fb0fd2ceSJoao Martins      */
538fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(gpa);
539fb0fd2ceSJoao Martins     if (err) {
540fb0fd2ceSJoao Martins             return err;
541fb0fd2ceSJoao Martins     }
542fb0fd2ceSJoao Martins 
543fb0fd2ceSJoao Martins     trace_kvm_xen_set_shared_info(gfn);
544fb0fd2ceSJoao Martins 
545c345104cSJoao Martins     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
546c345104cSJoao Martins         CPUState *cpu = qemu_get_cpu(i);
547c345104cSJoao Martins         if (cpu) {
548c345104cSJoao Martins             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
549c345104cSJoao Martins                              RUN_ON_CPU_HOST_ULONG(gpa));
550c345104cSJoao Martins         }
551c345104cSJoao Martins         gpa += sizeof(vcpu_info_t);
552c345104cSJoao Martins     }
553c345104cSJoao Martins 
554fb0fd2ceSJoao Martins     return err;
555fb0fd2ceSJoao Martins }
556fb0fd2ceSJoao Martins 
557fb0fd2ceSJoao Martins static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
558fb0fd2ceSJoao Martins {
559fb0fd2ceSJoao Martins     switch (space) {
560fb0fd2ceSJoao Martins     case XENMAPSPACE_shared_info:
561fb0fd2ceSJoao Martins         if (idx > 0) {
562fb0fd2ceSJoao Martins             return -EINVAL;
563fb0fd2ceSJoao Martins         }
564fb0fd2ceSJoao Martins         return xen_set_shared_info(gfn);
565fb0fd2ceSJoao Martins 
566fb0fd2ceSJoao Martins     case XENMAPSPACE_grant_table:
567*a28b0fc0SDavid Woodhouse         return xen_gnttab_map_page(idx, gfn);
568*a28b0fc0SDavid Woodhouse 
569fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn:
570fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_range:
571fb0fd2ceSJoao Martins         return -ENOTSUP;
572fb0fd2ceSJoao Martins 
573fb0fd2ceSJoao Martins     case XENMAPSPACE_gmfn_foreign:
574fb0fd2ceSJoao Martins     case XENMAPSPACE_dev_mmio:
575fb0fd2ceSJoao Martins         return -EPERM;
576fb0fd2ceSJoao Martins 
577fb0fd2ceSJoao Martins     default:
578fb0fd2ceSJoao Martins         return -EINVAL;
579fb0fd2ceSJoao Martins     }
580fb0fd2ceSJoao Martins }
581fb0fd2ceSJoao Martins 
582fb0fd2ceSJoao Martins static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
583fb0fd2ceSJoao Martins                              uint64_t arg)
584fb0fd2ceSJoao Martins {
585fb0fd2ceSJoao Martins     struct xen_add_to_physmap xatp;
586fb0fd2ceSJoao Martins     CPUState *cs = CPU(cpu);
587fb0fd2ceSJoao Martins 
588fb0fd2ceSJoao Martins     if (hypercall_compat32(exit->u.hcall.longmode)) {
589fb0fd2ceSJoao Martins         struct compat_xen_add_to_physmap xatp32;
590fb0fd2ceSJoao Martins 
591fb0fd2ceSJoao Martins         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
592fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
593fb0fd2ceSJoao Martins             return -EFAULT;
594fb0fd2ceSJoao Martins         }
595fb0fd2ceSJoao Martins         xatp.domid = xatp32.domid;
596fb0fd2ceSJoao Martins         xatp.size = xatp32.size;
597fb0fd2ceSJoao Martins         xatp.space = xatp32.space;
598fb0fd2ceSJoao Martins         xatp.idx = xatp32.idx;
599fb0fd2ceSJoao Martins         xatp.gpfn = xatp32.gpfn;
600fb0fd2ceSJoao Martins     } else {
601fb0fd2ceSJoao Martins         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
602fb0fd2ceSJoao Martins             return -EFAULT;
603fb0fd2ceSJoao Martins         }
604fb0fd2ceSJoao Martins     }
605fb0fd2ceSJoao Martins 
606fb0fd2ceSJoao Martins     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
607fb0fd2ceSJoao Martins         return -ESRCH;
608fb0fd2ceSJoao Martins     }
609fb0fd2ceSJoao Martins 
610fb0fd2ceSJoao Martins     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
611fb0fd2ceSJoao Martins }
612fb0fd2ceSJoao Martins 
613782a7960SDavid Woodhouse static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
614782a7960SDavid Woodhouse                                    uint64_t arg)
615782a7960SDavid Woodhouse {
616782a7960SDavid Woodhouse     struct xen_add_to_physmap_batch xatpb;
617782a7960SDavid Woodhouse     unsigned long idxs_gva, gpfns_gva, errs_gva;
618782a7960SDavid Woodhouse     CPUState *cs = CPU(cpu);
619782a7960SDavid Woodhouse     size_t op_sz;
620782a7960SDavid Woodhouse 
621782a7960SDavid Woodhouse     if (hypercall_compat32(exit->u.hcall.longmode)) {
622782a7960SDavid Woodhouse         struct compat_xen_add_to_physmap_batch xatpb32;
623782a7960SDavid Woodhouse 
624782a7960SDavid Woodhouse         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
625782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
626782a7960SDavid Woodhouse             return -EFAULT;
627782a7960SDavid Woodhouse         }
628782a7960SDavid Woodhouse         xatpb.domid = xatpb32.domid;
629782a7960SDavid Woodhouse         xatpb.space = xatpb32.space;
630782a7960SDavid Woodhouse         xatpb.size = xatpb32.size;
631782a7960SDavid Woodhouse 
632782a7960SDavid Woodhouse         idxs_gva = xatpb32.idxs.c;
633782a7960SDavid Woodhouse         gpfns_gva = xatpb32.gpfns.c;
634782a7960SDavid Woodhouse         errs_gva = xatpb32.errs.c;
635782a7960SDavid Woodhouse         op_sz = sizeof(uint32_t);
636782a7960SDavid Woodhouse     } else {
637782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
638782a7960SDavid Woodhouse             return -EFAULT;
639782a7960SDavid Woodhouse         }
640782a7960SDavid Woodhouse         op_sz = sizeof(unsigned long);
641782a7960SDavid Woodhouse         idxs_gva = (unsigned long)xatpb.idxs.p;
642782a7960SDavid Woodhouse         gpfns_gva = (unsigned long)xatpb.gpfns.p;
643782a7960SDavid Woodhouse         errs_gva = (unsigned long)xatpb.errs.p;
644782a7960SDavid Woodhouse     }
645782a7960SDavid Woodhouse 
646782a7960SDavid Woodhouse     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
647782a7960SDavid Woodhouse         return -ESRCH;
648782a7960SDavid Woodhouse     }
649782a7960SDavid Woodhouse 
650782a7960SDavid Woodhouse     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
651782a7960SDavid Woodhouse     if (xatpb.space == XENMAPSPACE_gmfn_range) {
652782a7960SDavid Woodhouse         return -EINVAL;
653782a7960SDavid Woodhouse     }
654782a7960SDavid Woodhouse 
655782a7960SDavid Woodhouse     while (xatpb.size--) {
656782a7960SDavid Woodhouse         unsigned long idx = 0;
657782a7960SDavid Woodhouse         unsigned long gpfn = 0;
658782a7960SDavid Woodhouse         int err;
659782a7960SDavid Woodhouse 
660782a7960SDavid Woodhouse         /* For 32-bit compat this only copies the low 32 bits of each */
661782a7960SDavid Woodhouse         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
662782a7960SDavid Woodhouse             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
663782a7960SDavid Woodhouse             return -EFAULT;
664782a7960SDavid Woodhouse         }
665782a7960SDavid Woodhouse         idxs_gva += op_sz;
666782a7960SDavid Woodhouse         gpfns_gva += op_sz;
667782a7960SDavid Woodhouse 
668782a7960SDavid Woodhouse         err = add_to_physmap_one(xatpb.space, idx, gpfn);
669782a7960SDavid Woodhouse 
670782a7960SDavid Woodhouse         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
671782a7960SDavid Woodhouse             return -EFAULT;
672782a7960SDavid Woodhouse         }
673782a7960SDavid Woodhouse         errs_gva += sizeof(err);
674782a7960SDavid Woodhouse     }
675782a7960SDavid Woodhouse     return 0;
676782a7960SDavid Woodhouse }
677782a7960SDavid Woodhouse 
678fb0fd2ceSJoao Martins static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
679fb0fd2ceSJoao Martins                                    int cmd, uint64_t arg)
680fb0fd2ceSJoao Martins {
681fb0fd2ceSJoao Martins     int err;
682fb0fd2ceSJoao Martins 
683fb0fd2ceSJoao Martins     switch (cmd) {
684fb0fd2ceSJoao Martins     case XENMEM_add_to_physmap:
685fb0fd2ceSJoao Martins         err = do_add_to_physmap(exit, cpu, arg);
686fb0fd2ceSJoao Martins         break;
687fb0fd2ceSJoao Martins 
688782a7960SDavid Woodhouse     case XENMEM_add_to_physmap_batch:
689782a7960SDavid Woodhouse         err = do_add_to_physmap_batch(exit, cpu, arg);
690782a7960SDavid Woodhouse         break;
691782a7960SDavid Woodhouse 
692fb0fd2ceSJoao Martins     default:
693fb0fd2ceSJoao Martins         return false;
694fb0fd2ceSJoao Martins     }
695fb0fd2ceSJoao Martins 
696fb0fd2ceSJoao Martins     exit->u.hcall.result = err;
697fb0fd2ceSJoao Martins     return true;
698fb0fd2ceSJoao Martins }
699fb0fd2ceSJoao Martins 
7005dbcd01aSAnkur Arora static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
7015dbcd01aSAnkur Arora                              uint64_t arg)
7025dbcd01aSAnkur Arora {
7035dbcd01aSAnkur Arora     CPUState *cs = CPU(cpu);
7045dbcd01aSAnkur Arora     struct xen_hvm_param hp;
7055dbcd01aSAnkur Arora     int err = 0;
7065dbcd01aSAnkur Arora 
7075dbcd01aSAnkur Arora     /* No need for 32/64 compat handling */
7085dbcd01aSAnkur Arora     qemu_build_assert(sizeof(hp) == 16);
7095dbcd01aSAnkur Arora 
7105dbcd01aSAnkur Arora     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
7115dbcd01aSAnkur Arora         err = -EFAULT;
7125dbcd01aSAnkur Arora         goto out;
7135dbcd01aSAnkur Arora     }
7145dbcd01aSAnkur Arora 
7155dbcd01aSAnkur Arora     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
7165dbcd01aSAnkur Arora         err = -ESRCH;
7175dbcd01aSAnkur Arora         goto out;
7185dbcd01aSAnkur Arora     }
7195dbcd01aSAnkur Arora 
7205dbcd01aSAnkur Arora     switch (hp.index) {
72191cce756SDavid Woodhouse     case HVM_PARAM_CALLBACK_IRQ:
7222aff696bSDavid Woodhouse         qemu_mutex_lock_iothread();
72391cce756SDavid Woodhouse         err = xen_evtchn_set_callback_param(hp.value);
7242aff696bSDavid Woodhouse         qemu_mutex_unlock_iothread();
72591cce756SDavid Woodhouse         xen_set_long_mode(exit->u.hcall.longmode);
72691cce756SDavid Woodhouse         break;
7275dbcd01aSAnkur Arora     default:
7285dbcd01aSAnkur Arora         return false;
7295dbcd01aSAnkur Arora     }
7305dbcd01aSAnkur Arora 
7315dbcd01aSAnkur Arora out:
7325dbcd01aSAnkur Arora     exit->u.hcall.result = err;
7335dbcd01aSAnkur Arora     return true;
7345dbcd01aSAnkur Arora }
7355dbcd01aSAnkur Arora 
736105b47fdSAnkur Arora static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
737105b47fdSAnkur Arora                                               X86CPU *cpu, uint64_t arg)
738105b47fdSAnkur Arora {
739105b47fdSAnkur Arora     struct xen_hvm_evtchn_upcall_vector up;
740105b47fdSAnkur Arora     CPUState *target_cs;
741105b47fdSAnkur Arora 
742105b47fdSAnkur Arora     /* No need for 32/64 compat handling */
743105b47fdSAnkur Arora     qemu_build_assert(sizeof(up) == 8);
744105b47fdSAnkur Arora 
745105b47fdSAnkur Arora     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
746105b47fdSAnkur Arora         return -EFAULT;
747105b47fdSAnkur Arora     }
748105b47fdSAnkur Arora 
749105b47fdSAnkur Arora     if (up.vector < 0x10) {
750105b47fdSAnkur Arora         return -EINVAL;
751105b47fdSAnkur Arora     }
752105b47fdSAnkur Arora 
753105b47fdSAnkur Arora     target_cs = qemu_get_cpu(up.vcpu);
754105b47fdSAnkur Arora     if (!target_cs) {
755105b47fdSAnkur Arora         return -EINVAL;
756105b47fdSAnkur Arora     }
757105b47fdSAnkur Arora 
758105b47fdSAnkur Arora     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
759105b47fdSAnkur Arora                      RUN_ON_CPU_HOST_INT(up.vector));
760105b47fdSAnkur Arora     return 0;
761105b47fdSAnkur Arora }
762105b47fdSAnkur Arora 
763671bfdcdSJoao Martins static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
764671bfdcdSJoao Martins                                  int cmd, uint64_t arg)
765671bfdcdSJoao Martins {
766105b47fdSAnkur Arora     int ret = -ENOSYS;
767671bfdcdSJoao Martins     switch (cmd) {
768105b47fdSAnkur Arora     case HVMOP_set_evtchn_upcall_vector:
769105b47fdSAnkur Arora         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
770105b47fdSAnkur Arora                                                  exit->u.hcall.params[0]);
771105b47fdSAnkur Arora         break;
772105b47fdSAnkur Arora 
773671bfdcdSJoao Martins     case HVMOP_pagetable_dying:
774105b47fdSAnkur Arora         ret = -ENOSYS;
775105b47fdSAnkur Arora         break;
776671bfdcdSJoao Martins 
7775dbcd01aSAnkur Arora     case HVMOP_set_param:
7785dbcd01aSAnkur Arora         return handle_set_param(exit, cpu, arg);
7795dbcd01aSAnkur Arora 
780671bfdcdSJoao Martins     default:
781671bfdcdSJoao Martins         return false;
782671bfdcdSJoao Martins     }
783105b47fdSAnkur Arora 
784105b47fdSAnkur Arora     exit->u.hcall.result = ret;
785105b47fdSAnkur Arora     return true;
786671bfdcdSJoao Martins }
787671bfdcdSJoao Martins 
788c345104cSJoao Martins static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
789c345104cSJoao Martins                                      uint64_t arg)
790c345104cSJoao Martins {
791c345104cSJoao Martins     struct vcpu_register_vcpu_info rvi;
792c345104cSJoao Martins     uint64_t gpa;
793c345104cSJoao Martins 
794c345104cSJoao Martins     /* No need for 32/64 compat handling */
795c345104cSJoao Martins     qemu_build_assert(sizeof(rvi) == 16);
796c345104cSJoao Martins     qemu_build_assert(sizeof(struct vcpu_info) == 64);
797c345104cSJoao Martins 
798c345104cSJoao Martins     if (!target) {
799c345104cSJoao Martins         return -ENOENT;
800c345104cSJoao Martins     }
801c345104cSJoao Martins 
802c345104cSJoao Martins     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
803c345104cSJoao Martins         return -EFAULT;
804c345104cSJoao Martins     }
805c345104cSJoao Martins 
806c345104cSJoao Martins     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
807c345104cSJoao Martins         return -EINVAL;
808c345104cSJoao Martins     }
809c345104cSJoao Martins 
810c345104cSJoao Martins     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
811c345104cSJoao Martins     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
812c345104cSJoao Martins     return 0;
813c345104cSJoao Martins }
814c345104cSJoao Martins 
815f0689302SJoao Martins static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
816f0689302SJoao Martins                                           uint64_t arg)
817f0689302SJoao Martins {
818f0689302SJoao Martins     struct vcpu_register_time_memory_area tma;
819f0689302SJoao Martins     uint64_t gpa;
820f0689302SJoao Martins     size_t len;
821f0689302SJoao Martins 
822f0689302SJoao Martins     /* No need for 32/64 compat handling */
823f0689302SJoao Martins     qemu_build_assert(sizeof(tma) == 8);
824f0689302SJoao Martins     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
825f0689302SJoao Martins 
826f0689302SJoao Martins     if (!target) {
827f0689302SJoao Martins         return -ENOENT;
828f0689302SJoao Martins     }
829f0689302SJoao Martins 
830f0689302SJoao Martins     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
831f0689302SJoao Martins         return -EFAULT;
832f0689302SJoao Martins     }
833f0689302SJoao Martins 
834f0689302SJoao Martins     /*
835f0689302SJoao Martins      * Xen actually uses the GVA and does the translation through the guest
836f0689302SJoao Martins      * page tables each time. But Linux/KVM uses the GPA, on the assumption
837f0689302SJoao Martins      * that guests only ever use *global* addresses (kernel virtual addresses)
838f0689302SJoao Martins      * for it. If Linux is changed to redo the GVA→GPA translation each time,
839f0689302SJoao Martins      * it will offer a new vCPU attribute for that, and we'll use it instead.
840f0689302SJoao Martins      */
841f0689302SJoao Martins     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
842f0689302SJoao Martins         len < sizeof(struct vcpu_time_info)) {
843f0689302SJoao Martins         return -EFAULT;
844f0689302SJoao Martins     }
845f0689302SJoao Martins 
846f0689302SJoao Martins     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
847f0689302SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
848f0689302SJoao Martins     return 0;
849f0689302SJoao Martins }
850f0689302SJoao Martins 
8515092db87SJoao Martins static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
8525092db87SJoao Martins                                          uint64_t arg)
8535092db87SJoao Martins {
8545092db87SJoao Martins     struct vcpu_register_runstate_memory_area rma;
8555092db87SJoao Martins     uint64_t gpa;
8565092db87SJoao Martins     size_t len;
8575092db87SJoao Martins 
8585092db87SJoao Martins     /* No need for 32/64 compat handling */
8595092db87SJoao Martins     qemu_build_assert(sizeof(rma) == 8);
8605092db87SJoao Martins     /* The runstate area actually does change size, but Linux copes. */
8615092db87SJoao Martins 
8625092db87SJoao Martins     if (!target) {
8635092db87SJoao Martins         return -ENOENT;
8645092db87SJoao Martins     }
8655092db87SJoao Martins 
8665092db87SJoao Martins     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
8675092db87SJoao Martins         return -EFAULT;
8685092db87SJoao Martins     }
8695092db87SJoao Martins 
8705092db87SJoao Martins     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
8715092db87SJoao Martins     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
8725092db87SJoao Martins         return -EFAULT;
8735092db87SJoao Martins     }
8745092db87SJoao Martins 
8755092db87SJoao Martins     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
8765092db87SJoao Martins                      RUN_ON_CPU_HOST_ULONG(gpa));
8775092db87SJoao Martins     return 0;
8785092db87SJoao Martins }
8795092db87SJoao Martins 
880d70bd6a4SJoao Martins static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
881d70bd6a4SJoao Martins                                   int cmd, int vcpu_id, uint64_t arg)
882d70bd6a4SJoao Martins {
883c345104cSJoao Martins     CPUState *dest = qemu_get_cpu(vcpu_id);
884c345104cSJoao Martins     CPUState *cs = CPU(cpu);
885d70bd6a4SJoao Martins     int err;
886d70bd6a4SJoao Martins 
887d70bd6a4SJoao Martins     switch (cmd) {
8885092db87SJoao Martins     case VCPUOP_register_runstate_memory_area:
8895092db87SJoao Martins         err = vcpuop_register_runstate_info(cs, dest, arg);
8905092db87SJoao Martins         break;
891f0689302SJoao Martins     case VCPUOP_register_vcpu_time_memory_area:
892f0689302SJoao Martins         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
893f0689302SJoao Martins         break;
894d70bd6a4SJoao Martins     case VCPUOP_register_vcpu_info:
895c345104cSJoao Martins         err = vcpuop_register_vcpu_info(cs, dest, arg);
896d70bd6a4SJoao Martins         break;
897d70bd6a4SJoao Martins 
898d70bd6a4SJoao Martins     default:
899d70bd6a4SJoao Martins         return false;
900d70bd6a4SJoao Martins     }
901d70bd6a4SJoao Martins 
902d70bd6a4SJoao Martins     exit->u.hcall.result = err;
903d70bd6a4SJoao Martins     return true;
904d70bd6a4SJoao Martins }
905d70bd6a4SJoao Martins 
9064858ba20SDavid Woodhouse static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
9073b06f29bSJoao Martins                                     int cmd, uint64_t arg)
9083b06f29bSJoao Martins {
9094858ba20SDavid Woodhouse     CPUState *cs = CPU(cpu);
9103b06f29bSJoao Martins     int err = -ENOSYS;
9113b06f29bSJoao Martins 
9123b06f29bSJoao Martins     switch (cmd) {
9133b06f29bSJoao Martins     case EVTCHNOP_init_control:
9143b06f29bSJoao Martins     case EVTCHNOP_expand_array:
9153b06f29bSJoao Martins     case EVTCHNOP_set_priority:
9163b06f29bSJoao Martins         /* We do not support FIFO channels at this point */
9173b06f29bSJoao Martins         err = -ENOSYS;
9183b06f29bSJoao Martins         break;
9193b06f29bSJoao Martins 
9204858ba20SDavid Woodhouse     case EVTCHNOP_status: {
9214858ba20SDavid Woodhouse         struct evtchn_status status;
9224858ba20SDavid Woodhouse 
9234858ba20SDavid Woodhouse         qemu_build_assert(sizeof(status) == 24);
9244858ba20SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
9254858ba20SDavid Woodhouse             err = -EFAULT;
9264858ba20SDavid Woodhouse             break;
9274858ba20SDavid Woodhouse         }
9284858ba20SDavid Woodhouse 
9294858ba20SDavid Woodhouse         err = xen_evtchn_status_op(&status);
9304858ba20SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
9314858ba20SDavid Woodhouse             err = -EFAULT;
9324858ba20SDavid Woodhouse         }
9334858ba20SDavid Woodhouse         break;
9344858ba20SDavid Woodhouse     }
93583eb5811SDavid Woodhouse     case EVTCHNOP_close: {
93683eb5811SDavid Woodhouse         struct evtchn_close close;
93783eb5811SDavid Woodhouse 
93883eb5811SDavid Woodhouse         qemu_build_assert(sizeof(close) == 4);
93983eb5811SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
94083eb5811SDavid Woodhouse             err = -EFAULT;
94183eb5811SDavid Woodhouse             break;
94283eb5811SDavid Woodhouse         }
94383eb5811SDavid Woodhouse 
94483eb5811SDavid Woodhouse         err = xen_evtchn_close_op(&close);
94583eb5811SDavid Woodhouse         break;
94683eb5811SDavid Woodhouse     }
947190cc3c0SDavid Woodhouse     case EVTCHNOP_unmask: {
948190cc3c0SDavid Woodhouse         struct evtchn_unmask unmask;
949190cc3c0SDavid Woodhouse 
950190cc3c0SDavid Woodhouse         qemu_build_assert(sizeof(unmask) == 4);
951190cc3c0SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
952190cc3c0SDavid Woodhouse             err = -EFAULT;
953190cc3c0SDavid Woodhouse             break;
954190cc3c0SDavid Woodhouse         }
955190cc3c0SDavid Woodhouse 
956190cc3c0SDavid Woodhouse         err = xen_evtchn_unmask_op(&unmask);
957190cc3c0SDavid Woodhouse         break;
958190cc3c0SDavid Woodhouse     }
959c723d4c1SDavid Woodhouse     case EVTCHNOP_bind_virq: {
960c723d4c1SDavid Woodhouse         struct evtchn_bind_virq virq;
961c723d4c1SDavid Woodhouse 
962c723d4c1SDavid Woodhouse         qemu_build_assert(sizeof(virq) == 12);
963c723d4c1SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
964c723d4c1SDavid Woodhouse             err = -EFAULT;
965c723d4c1SDavid Woodhouse             break;
966c723d4c1SDavid Woodhouse         }
967c723d4c1SDavid Woodhouse 
968c723d4c1SDavid Woodhouse         err = xen_evtchn_bind_virq_op(&virq);
969c723d4c1SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
970c723d4c1SDavid Woodhouse             err = -EFAULT;
971c723d4c1SDavid Woodhouse         }
972c723d4c1SDavid Woodhouse         break;
973c723d4c1SDavid Woodhouse     }
974f5417856SDavid Woodhouse     case EVTCHNOP_bind_ipi: {
975f5417856SDavid Woodhouse         struct evtchn_bind_ipi ipi;
976f5417856SDavid Woodhouse 
977f5417856SDavid Woodhouse         qemu_build_assert(sizeof(ipi) == 8);
978f5417856SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
979f5417856SDavid Woodhouse             err = -EFAULT;
980f5417856SDavid Woodhouse             break;
981f5417856SDavid Woodhouse         }
982f5417856SDavid Woodhouse 
983f5417856SDavid Woodhouse         err = xen_evtchn_bind_ipi_op(&ipi);
984f5417856SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
985f5417856SDavid Woodhouse             err = -EFAULT;
986f5417856SDavid Woodhouse         }
987f5417856SDavid Woodhouse         break;
988f5417856SDavid Woodhouse     }
989cf7679abSDavid Woodhouse     case EVTCHNOP_send: {
990cf7679abSDavid Woodhouse         struct evtchn_send send;
991cf7679abSDavid Woodhouse 
992cf7679abSDavid Woodhouse         qemu_build_assert(sizeof(send) == 4);
993cf7679abSDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
994cf7679abSDavid Woodhouse             err = -EFAULT;
995cf7679abSDavid Woodhouse             break;
996cf7679abSDavid Woodhouse         }
997cf7679abSDavid Woodhouse 
998cf7679abSDavid Woodhouse         err = xen_evtchn_send_op(&send);
999cf7679abSDavid Woodhouse         break;
1000cf7679abSDavid Woodhouse     }
1001e1db61b8SDavid Woodhouse     case EVTCHNOP_alloc_unbound: {
1002e1db61b8SDavid Woodhouse         struct evtchn_alloc_unbound alloc;
1003e1db61b8SDavid Woodhouse 
1004e1db61b8SDavid Woodhouse         qemu_build_assert(sizeof(alloc) == 8);
1005e1db61b8SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1006e1db61b8SDavid Woodhouse             err = -EFAULT;
1007e1db61b8SDavid Woodhouse             break;
1008e1db61b8SDavid Woodhouse         }
1009e1db61b8SDavid Woodhouse 
1010e1db61b8SDavid Woodhouse         err = xen_evtchn_alloc_unbound_op(&alloc);
1011e1db61b8SDavid Woodhouse         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1012e1db61b8SDavid Woodhouse             err = -EFAULT;
1013e1db61b8SDavid Woodhouse         }
1014e1db61b8SDavid Woodhouse         break;
1015e1db61b8SDavid Woodhouse     }
101684327881SDavid Woodhouse     case EVTCHNOP_bind_interdomain: {
101784327881SDavid Woodhouse         struct evtchn_bind_interdomain interdomain;
101884327881SDavid Woodhouse 
101984327881SDavid Woodhouse         qemu_build_assert(sizeof(interdomain) == 12);
102084327881SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
102184327881SDavid Woodhouse             err = -EFAULT;
102284327881SDavid Woodhouse             break;
102384327881SDavid Woodhouse         }
102484327881SDavid Woodhouse 
102584327881SDavid Woodhouse         err = xen_evtchn_bind_interdomain_op(&interdomain);
102684327881SDavid Woodhouse         if (!err &&
102784327881SDavid Woodhouse             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
102884327881SDavid Woodhouse             err = -EFAULT;
102984327881SDavid Woodhouse         }
103084327881SDavid Woodhouse         break;
103184327881SDavid Woodhouse     }
103230667046SDavid Woodhouse     case EVTCHNOP_bind_vcpu: {
103330667046SDavid Woodhouse         struct evtchn_bind_vcpu vcpu;
103430667046SDavid Woodhouse 
103530667046SDavid Woodhouse         qemu_build_assert(sizeof(vcpu) == 8);
103630667046SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
103730667046SDavid Woodhouse             err = -EFAULT;
103830667046SDavid Woodhouse             break;
103930667046SDavid Woodhouse         }
104030667046SDavid Woodhouse 
104130667046SDavid Woodhouse         err = xen_evtchn_bind_vcpu_op(&vcpu);
104230667046SDavid Woodhouse         break;
104330667046SDavid Woodhouse     }
1044a15b1097SDavid Woodhouse     case EVTCHNOP_reset: {
1045a15b1097SDavid Woodhouse         struct evtchn_reset reset;
1046a15b1097SDavid Woodhouse 
1047a15b1097SDavid Woodhouse         qemu_build_assert(sizeof(reset) == 2);
1048a15b1097SDavid Woodhouse         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1049a15b1097SDavid Woodhouse             err = -EFAULT;
1050a15b1097SDavid Woodhouse             break;
1051a15b1097SDavid Woodhouse         }
1052a15b1097SDavid Woodhouse 
1053a15b1097SDavid Woodhouse         err = xen_evtchn_reset_op(&reset);
1054a15b1097SDavid Woodhouse         break;
1055a15b1097SDavid Woodhouse     }
10563b06f29bSJoao Martins     default:
10573b06f29bSJoao Martins         return false;
10583b06f29bSJoao Martins     }
10593b06f29bSJoao Martins 
10603b06f29bSJoao Martins     exit->u.hcall.result = err;
10613b06f29bSJoao Martins     return true;
10623b06f29bSJoao Martins }
10633b06f29bSJoao Martins 
106479b7067dSJoao Martins int kvm_xen_soft_reset(void)
106579b7067dSJoao Martins {
1066c345104cSJoao Martins     CPUState *cpu;
1067fb0fd2ceSJoao Martins     int err;
1068fb0fd2ceSJoao Martins 
106979b7067dSJoao Martins     assert(qemu_mutex_iothread_locked());
107079b7067dSJoao Martins 
107179b7067dSJoao Martins     trace_kvm_xen_soft_reset();
107279b7067dSJoao Martins 
1073a15b1097SDavid Woodhouse     err = xen_evtchn_soft_reset();
1074a15b1097SDavid Woodhouse     if (err) {
1075a15b1097SDavid Woodhouse         return err;
1076a15b1097SDavid Woodhouse     }
1077a15b1097SDavid Woodhouse 
107891cce756SDavid Woodhouse     /*
107991cce756SDavid Woodhouse      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
108091cce756SDavid Woodhouse      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
108191cce756SDavid Woodhouse      * to deliver to the timer interrupt and treats that as 'disabled'.
108291cce756SDavid Woodhouse      */
108391cce756SDavid Woodhouse     err = xen_evtchn_set_callback_param(0);
108491cce756SDavid Woodhouse     if (err) {
108591cce756SDavid Woodhouse         return err;
108691cce756SDavid Woodhouse     }
108791cce756SDavid Woodhouse 
1088c345104cSJoao Martins     CPU_FOREACH(cpu) {
1089c345104cSJoao Martins         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1090c345104cSJoao Martins     }
1091c345104cSJoao Martins 
1092fb0fd2ceSJoao Martins     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1093fb0fd2ceSJoao Martins     if (err) {
1094fb0fd2ceSJoao Martins         return err;
1095fb0fd2ceSJoao Martins     }
1096fb0fd2ceSJoao Martins 
109779b7067dSJoao Martins     return 0;
109879b7067dSJoao Martins }
109979b7067dSJoao Martins 
110079b7067dSJoao Martins static int schedop_shutdown(CPUState *cs, uint64_t arg)
110179b7067dSJoao Martins {
110279b7067dSJoao Martins     struct sched_shutdown shutdown;
110379b7067dSJoao Martins     int ret = 0;
110479b7067dSJoao Martins 
110579b7067dSJoao Martins     /* No need for 32/64 compat handling */
110679b7067dSJoao Martins     qemu_build_assert(sizeof(shutdown) == 4);
110779b7067dSJoao Martins 
110879b7067dSJoao Martins     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
110979b7067dSJoao Martins         return -EFAULT;
111079b7067dSJoao Martins     }
111179b7067dSJoao Martins 
111279b7067dSJoao Martins     switch (shutdown.reason) {
111379b7067dSJoao Martins     case SHUTDOWN_crash:
111479b7067dSJoao Martins         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
111579b7067dSJoao Martins         qemu_system_guest_panicked(NULL);
111679b7067dSJoao Martins         break;
111779b7067dSJoao Martins 
111879b7067dSJoao Martins     case SHUTDOWN_reboot:
111979b7067dSJoao Martins         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
112079b7067dSJoao Martins         break;
112179b7067dSJoao Martins 
112279b7067dSJoao Martins     case SHUTDOWN_poweroff:
112379b7067dSJoao Martins         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
112479b7067dSJoao Martins         break;
112579b7067dSJoao Martins 
112679b7067dSJoao Martins     case SHUTDOWN_soft_reset:
112779b7067dSJoao Martins         qemu_mutex_lock_iothread();
112879b7067dSJoao Martins         ret = kvm_xen_soft_reset();
112979b7067dSJoao Martins         qemu_mutex_unlock_iothread();
113079b7067dSJoao Martins         break;
113179b7067dSJoao Martins 
113279b7067dSJoao Martins     default:
113379b7067dSJoao Martins         ret = -EINVAL;
113479b7067dSJoao Martins         break;
113579b7067dSJoao Martins     }
113679b7067dSJoao Martins 
113779b7067dSJoao Martins     return ret;
113879b7067dSJoao Martins }
113979b7067dSJoao Martins 
114079b7067dSJoao Martins static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
114179b7067dSJoao Martins                                    int cmd, uint64_t arg)
114279b7067dSJoao Martins {
114379b7067dSJoao Martins     CPUState *cs = CPU(cpu);
114479b7067dSJoao Martins     int err = -ENOSYS;
114579b7067dSJoao Martins 
114679b7067dSJoao Martins     switch (cmd) {
114779b7067dSJoao Martins     case SCHEDOP_shutdown:
114879b7067dSJoao Martins         err = schedop_shutdown(cs, arg);
114979b7067dSJoao Martins         break;
115079b7067dSJoao Martins 
1151c789b9efSDavid Woodhouse     case SCHEDOP_poll:
1152c789b9efSDavid Woodhouse         /*
1153c789b9efSDavid Woodhouse          * Linux will panic if this doesn't work. Just yield; it's not
1154c789b9efSDavid Woodhouse          * worth overthinking it because with event channel handling
1155c789b9efSDavid Woodhouse          * in KVM, the kernel will intercept this and it will never
1156c789b9efSDavid Woodhouse          * reach QEMU anyway. The semantics of the hypercall explicltly
1157c789b9efSDavid Woodhouse          * permit spurious wakeups.
1158c789b9efSDavid Woodhouse          */
1159c789b9efSDavid Woodhouse     case SCHEDOP_yield:
1160c789b9efSDavid Woodhouse         sched_yield();
1161c789b9efSDavid Woodhouse         err = 0;
1162c789b9efSDavid Woodhouse         break;
1163c789b9efSDavid Woodhouse 
116479b7067dSJoao Martins     default:
116579b7067dSJoao Martins         return false;
116679b7067dSJoao Martins     }
116779b7067dSJoao Martins 
116879b7067dSJoao Martins     exit->u.hcall.result = err;
116979b7067dSJoao Martins     return true;
117079b7067dSJoao Martins }
117179b7067dSJoao Martins 
117255a3f666SJoao Martins static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
117355a3f666SJoao Martins {
117455a3f666SJoao Martins     uint16_t code = exit->u.hcall.input;
117555a3f666SJoao Martins 
117655a3f666SJoao Martins     if (exit->u.hcall.cpl > 0) {
117755a3f666SJoao Martins         exit->u.hcall.result = -EPERM;
117855a3f666SJoao Martins         return true;
117955a3f666SJoao Martins     }
118055a3f666SJoao Martins 
118155a3f666SJoao Martins     switch (code) {
118279b7067dSJoao Martins     case __HYPERVISOR_sched_op:
118379b7067dSJoao Martins         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
118479b7067dSJoao Martins                                       exit->u.hcall.params[1]);
11853b06f29bSJoao Martins     case __HYPERVISOR_event_channel_op:
11864858ba20SDavid Woodhouse         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
11873b06f29bSJoao Martins                                        exit->u.hcall.params[1]);
1188d70bd6a4SJoao Martins     case __HYPERVISOR_vcpu_op:
1189d70bd6a4SJoao Martins         return kvm_xen_hcall_vcpu_op(exit, cpu,
1190d70bd6a4SJoao Martins                                      exit->u.hcall.params[0],
1191d70bd6a4SJoao Martins                                      exit->u.hcall.params[1],
1192d70bd6a4SJoao Martins                                      exit->u.hcall.params[2]);
1193671bfdcdSJoao Martins     case __HYPERVISOR_hvm_op:
1194671bfdcdSJoao Martins         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1195671bfdcdSJoao Martins                                     exit->u.hcall.params[1]);
1196fb0fd2ceSJoao Martins     case __HYPERVISOR_memory_op:
1197fb0fd2ceSJoao Martins         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1198fb0fd2ceSJoao Martins                                        exit->u.hcall.params[1]);
1199bedcc139SJoao Martins     case __HYPERVISOR_xen_version:
1200bedcc139SJoao Martins         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1201bedcc139SJoao Martins                                          exit->u.hcall.params[1]);
120255a3f666SJoao Martins     default:
120355a3f666SJoao Martins         return false;
120455a3f666SJoao Martins     }
120555a3f666SJoao Martins }
120655a3f666SJoao Martins 
120755a3f666SJoao Martins int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
120855a3f666SJoao Martins {
120955a3f666SJoao Martins     if (exit->type != KVM_EXIT_XEN_HCALL) {
121055a3f666SJoao Martins         return -1;
121155a3f666SJoao Martins     }
121255a3f666SJoao Martins 
1213110a0ea5SDavid Woodhouse     /*
1214110a0ea5SDavid Woodhouse      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1215110a0ea5SDavid Woodhouse      * the hypercall page. So if we see a hypercall in a mode that doesn't
1216110a0ea5SDavid Woodhouse      * match our own idea of the guest mode, fetch the kernel's idea of the
1217110a0ea5SDavid Woodhouse      * "long mode" to remain in sync.
1218110a0ea5SDavid Woodhouse      */
1219110a0ea5SDavid Woodhouse     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1220110a0ea5SDavid Woodhouse         xen_sync_long_mode();
1221110a0ea5SDavid Woodhouse     }
1222110a0ea5SDavid Woodhouse 
122355a3f666SJoao Martins     if (!do_kvm_xen_handle_exit(cpu, exit)) {
122455a3f666SJoao Martins         /*
122555a3f666SJoao Martins          * Some hypercalls will be deliberately "implemented" by returning
122655a3f666SJoao Martins          * -ENOSYS. This case is for hypercalls which are unexpected.
122755a3f666SJoao Martins          */
122855a3f666SJoao Martins         exit->u.hcall.result = -ENOSYS;
122955a3f666SJoao Martins         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
123055a3f666SJoao Martins                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
123155a3f666SJoao Martins                       (uint64_t)exit->u.hcall.input,
123255a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[0],
123355a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[1],
123455a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[2]);
123555a3f666SJoao Martins     }
123655a3f666SJoao Martins 
123755a3f666SJoao Martins     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
123855a3f666SJoao Martins                             exit->u.hcall.input, exit->u.hcall.params[0],
123955a3f666SJoao Martins                             exit->u.hcall.params[1], exit->u.hcall.params[2],
124055a3f666SJoao Martins                             exit->u.hcall.result);
124155a3f666SJoao Martins     return 0;
124255a3f666SJoao Martins }
1243c345104cSJoao Martins 
12446f43f2eeSDavid Woodhouse uint16_t kvm_xen_get_gnttab_max_frames(void)
12456f43f2eeSDavid Woodhouse {
12466f43f2eeSDavid Woodhouse     KVMState *s = KVM_STATE(current_accel());
12476f43f2eeSDavid Woodhouse     return s->xen_gnttab_max_frames;
12486f43f2eeSDavid Woodhouse }
12496f43f2eeSDavid Woodhouse 
1250c345104cSJoao Martins int kvm_put_xen_state(CPUState *cs)
1251c345104cSJoao Martins {
1252c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
1253c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1254c345104cSJoao Martins     uint64_t gpa;
1255c345104cSJoao Martins     int ret;
1256c345104cSJoao Martins 
1257c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
1258c345104cSJoao Martins     if (gpa == INVALID_GPA) {
1259c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
1260c345104cSJoao Martins     }
1261c345104cSJoao Martins 
1262c345104cSJoao Martins     if (gpa != INVALID_GPA) {
126327d4075dSDavid Woodhouse         ret = set_vcpu_info(cs, gpa);
1264c345104cSJoao Martins         if (ret < 0) {
1265c345104cSJoao Martins             return ret;
1266c345104cSJoao Martins         }
1267c345104cSJoao Martins     }
1268c345104cSJoao Martins 
1269f0689302SJoao Martins     gpa = env->xen_vcpu_time_info_gpa;
1270f0689302SJoao Martins     if (gpa != INVALID_GPA) {
1271f0689302SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1272f0689302SJoao Martins                                     gpa);
1273f0689302SJoao Martins         if (ret < 0) {
1274f0689302SJoao Martins             return ret;
1275f0689302SJoao Martins         }
1276f0689302SJoao Martins     }
1277f0689302SJoao Martins 
12785092db87SJoao Martins     gpa = env->xen_vcpu_runstate_gpa;
12795092db87SJoao Martins     if (gpa != INVALID_GPA) {
12805092db87SJoao Martins         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
12815092db87SJoao Martins                                     gpa);
12825092db87SJoao Martins         if (ret < 0) {
12835092db87SJoao Martins             return ret;
12845092db87SJoao Martins         }
12855092db87SJoao Martins     }
12865092db87SJoao Martins 
1287105b47fdSAnkur Arora     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1288105b47fdSAnkur Arora         return 0;
1289105b47fdSAnkur Arora     }
1290105b47fdSAnkur Arora 
1291105b47fdSAnkur Arora     if (env->xen_vcpu_callback_vector) {
1292105b47fdSAnkur Arora         ret = kvm_xen_set_vcpu_callback_vector(cs);
1293105b47fdSAnkur Arora         if (ret < 0) {
1294105b47fdSAnkur Arora             return ret;
1295105b47fdSAnkur Arora         }
1296105b47fdSAnkur Arora     }
1297105b47fdSAnkur Arora 
1298c723d4c1SDavid Woodhouse     if (env->xen_virq[VIRQ_TIMER]) {
1299c723d4c1SDavid Woodhouse         ret = kvm_xen_set_vcpu_timer(cs);
1300c723d4c1SDavid Woodhouse         if (ret < 0) {
1301c723d4c1SDavid Woodhouse             return ret;
1302c723d4c1SDavid Woodhouse         }
1303c723d4c1SDavid Woodhouse     }
1304c345104cSJoao Martins     return 0;
1305c345104cSJoao Martins }
1306c345104cSJoao Martins 
1307c345104cSJoao Martins int kvm_get_xen_state(CPUState *cs)
1308c345104cSJoao Martins {
1309c345104cSJoao Martins     X86CPU *cpu = X86_CPU(cs);
1310c345104cSJoao Martins     CPUX86State *env = &cpu->env;
1311c345104cSJoao Martins     uint64_t gpa;
1312c723d4c1SDavid Woodhouse     int ret;
1313c345104cSJoao Martins 
1314c345104cSJoao Martins     /*
1315c345104cSJoao Martins      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1316c345104cSJoao Martins      * to it. It's up to userspace to *assume* that any page shared thus is
1317c345104cSJoao Martins      * always considered dirty. The shared_info page is different since it's
1318c345104cSJoao Martins      * an overlay and migrated separately anyway.
1319c345104cSJoao Martins      */
1320c345104cSJoao Martins     gpa = env->xen_vcpu_info_gpa;
1321c345104cSJoao Martins     if (gpa == INVALID_GPA) {
1322c345104cSJoao Martins         gpa = env->xen_vcpu_info_default_gpa;
1323c345104cSJoao Martins     }
1324c345104cSJoao Martins     if (gpa != INVALID_GPA) {
1325c345104cSJoao Martins         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1326c345104cSJoao Martins                                                      gpa,
1327c345104cSJoao Martins                                                      sizeof(struct vcpu_info));
1328c345104cSJoao Martins         if (mrs.mr &&
1329c345104cSJoao Martins             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1330c345104cSJoao Martins             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1331c345104cSJoao Martins                                     sizeof(struct vcpu_info));
1332c345104cSJoao Martins         }
1333c345104cSJoao Martins     }
1334c345104cSJoao Martins 
1335c723d4c1SDavid Woodhouse     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1336c723d4c1SDavid Woodhouse         return 0;
1337c723d4c1SDavid Woodhouse     }
1338c723d4c1SDavid Woodhouse 
1339c723d4c1SDavid Woodhouse     /*
1340c723d4c1SDavid Woodhouse      * If the kernel is accelerating timers, read out the current value of the
1341c723d4c1SDavid Woodhouse      * singleshot timer deadline.
1342c723d4c1SDavid Woodhouse      */
1343c723d4c1SDavid Woodhouse     if (env->xen_virq[VIRQ_TIMER]) {
1344c723d4c1SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1345c723d4c1SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1346c723d4c1SDavid Woodhouse         };
1347c723d4c1SDavid Woodhouse         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1348c723d4c1SDavid Woodhouse         if (ret < 0) {
1349c723d4c1SDavid Woodhouse             return ret;
1350c723d4c1SDavid Woodhouse         }
1351c723d4c1SDavid Woodhouse         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1352c723d4c1SDavid Woodhouse     }
1353c723d4c1SDavid Woodhouse 
1354c345104cSJoao Martins     return 0;
1355c345104cSJoao Martins }
1356