xref: /qemu/target/i386/kvm/xen-emu.c (revision 110a0ea59f263b6e382ee22c70c31c2364d11eb0)
161491cf4SDavid Woodhouse /*
261491cf4SDavid Woodhouse  * Xen HVM emulation support in KVM
361491cf4SDavid Woodhouse  *
461491cf4SDavid Woodhouse  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
561491cf4SDavid Woodhouse  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
661491cf4SDavid Woodhouse  *
761491cf4SDavid Woodhouse  * This work is licensed under the terms of the GNU GPL, version 2 or later.
861491cf4SDavid Woodhouse  * See the COPYING file in the top-level directory.
961491cf4SDavid Woodhouse  *
1061491cf4SDavid Woodhouse  */
1161491cf4SDavid Woodhouse 
1261491cf4SDavid Woodhouse #include "qemu/osdep.h"
1355a3f666SJoao Martins #include "qemu/log.h"
1479b7067dSJoao Martins #include "qemu/main-loop.h"
1561491cf4SDavid Woodhouse #include "sysemu/kvm_int.h"
1661491cf4SDavid Woodhouse #include "sysemu/kvm_xen.h"
1761491cf4SDavid Woodhouse #include "kvm/kvm_i386.h"
18bedcc139SJoao Martins #include "exec/address-spaces.h"
1961491cf4SDavid Woodhouse #include "xen-emu.h"
2055a3f666SJoao Martins #include "trace.h"
2179b7067dSJoao Martins #include "sysemu/runstate.h"
2261491cf4SDavid Woodhouse 
23*110a0ea5SDavid Woodhouse #include "hw/i386/kvm/xen_overlay.h"
24*110a0ea5SDavid Woodhouse 
25bedcc139SJoao Martins #include "hw/xen/interface/version.h"
2679b7067dSJoao Martins #include "hw/xen/interface/sched.h"
27bedcc139SJoao Martins 
28bedcc139SJoao Martins static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
29bedcc139SJoao Martins                       bool is_write)
30bedcc139SJoao Martins {
31bedcc139SJoao Martins     uint8_t *buf = (uint8_t *)_buf;
32bedcc139SJoao Martins     int ret;
33bedcc139SJoao Martins 
34bedcc139SJoao Martins     while (sz) {
35bedcc139SJoao Martins         struct kvm_translation tr = {
36bedcc139SJoao Martins             .linear_address = gva,
37bedcc139SJoao Martins         };
38bedcc139SJoao Martins 
39bedcc139SJoao Martins         size_t len = TARGET_PAGE_SIZE - (tr.linear_address & ~TARGET_PAGE_MASK);
40bedcc139SJoao Martins         if (len > sz) {
41bedcc139SJoao Martins             len = sz;
42bedcc139SJoao Martins         }
43bedcc139SJoao Martins 
44bedcc139SJoao Martins         ret = kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr);
45bedcc139SJoao Martins         if (ret || !tr.valid || (is_write && !tr.writeable)) {
46bedcc139SJoao Martins             return -EFAULT;
47bedcc139SJoao Martins         }
48bedcc139SJoao Martins 
49bedcc139SJoao Martins         cpu_physical_memory_rw(tr.physical_address, buf, len, is_write);
50bedcc139SJoao Martins 
51bedcc139SJoao Martins         buf += len;
52bedcc139SJoao Martins         sz -= len;
53bedcc139SJoao Martins         gva += len;
54bedcc139SJoao Martins     }
55bedcc139SJoao Martins 
56bedcc139SJoao Martins     return 0;
57bedcc139SJoao Martins }
58bedcc139SJoao Martins 
59bedcc139SJoao Martins static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
60bedcc139SJoao Martins                                     size_t sz)
61bedcc139SJoao Martins {
62bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, false);
63bedcc139SJoao Martins }
64bedcc139SJoao Martins 
65bedcc139SJoao Martins static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
66bedcc139SJoao Martins                                   size_t sz)
67bedcc139SJoao Martins {
68bedcc139SJoao Martins     return kvm_gva_rw(cs, gva, buf, sz, true);
69bedcc139SJoao Martins }
70bedcc139SJoao Martins 
71f66b8a83SJoao Martins int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
7261491cf4SDavid Woodhouse {
7361491cf4SDavid Woodhouse     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
7461491cf4SDavid Woodhouse         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
7561491cf4SDavid Woodhouse     struct kvm_xen_hvm_config cfg = {
76f66b8a83SJoao Martins         .msr = hypercall_msr,
7761491cf4SDavid Woodhouse         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
7861491cf4SDavid Woodhouse     };
7961491cf4SDavid Woodhouse     int xen_caps, ret;
8061491cf4SDavid Woodhouse 
8161491cf4SDavid Woodhouse     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
8261491cf4SDavid Woodhouse     if (required_caps & ~xen_caps) {
8361491cf4SDavid Woodhouse         error_report("kvm: Xen HVM guest support not present or insufficient");
8461491cf4SDavid Woodhouse         return -ENOSYS;
8561491cf4SDavid Woodhouse     }
8661491cf4SDavid Woodhouse 
8761491cf4SDavid Woodhouse     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
8861491cf4SDavid Woodhouse         struct kvm_xen_hvm_attr ha = {
8961491cf4SDavid Woodhouse             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
9061491cf4SDavid Woodhouse             .u.xen_version = s->xen_version,
9161491cf4SDavid Woodhouse         };
9261491cf4SDavid Woodhouse         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
9361491cf4SDavid Woodhouse 
9461491cf4SDavid Woodhouse         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
9561491cf4SDavid Woodhouse     }
9661491cf4SDavid Woodhouse 
9761491cf4SDavid Woodhouse     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
9861491cf4SDavid Woodhouse     if (ret < 0) {
9961491cf4SDavid Woodhouse         error_report("kvm: Failed to enable Xen HVM support: %s",
10061491cf4SDavid Woodhouse                      strerror(-ret));
10161491cf4SDavid Woodhouse         return ret;
10261491cf4SDavid Woodhouse     }
10361491cf4SDavid Woodhouse 
10461491cf4SDavid Woodhouse     s->xen_caps = xen_caps;
10561491cf4SDavid Woodhouse     return 0;
10661491cf4SDavid Woodhouse }
10761491cf4SDavid Woodhouse 
1085e691a95SDavid Woodhouse int kvm_xen_init_vcpu(CPUState *cs)
1095e691a95SDavid Woodhouse {
1105e691a95SDavid Woodhouse     int err;
1115e691a95SDavid Woodhouse 
1125e691a95SDavid Woodhouse     /*
1135e691a95SDavid Woodhouse      * The kernel needs to know the Xen/ACPI vCPU ID because that's
1145e691a95SDavid Woodhouse      * what the guest uses in hypercalls such as timers. It doesn't
1155e691a95SDavid Woodhouse      * match the APIC ID which is generally used for talking to the
1165e691a95SDavid Woodhouse      * kernel about vCPUs. And if vCPU threads race with creating
1175e691a95SDavid Woodhouse      * their KVM vCPUs out of order, it doesn't necessarily match
1185e691a95SDavid Woodhouse      * with the kernel's internal vCPU indices either.
1195e691a95SDavid Woodhouse      */
1205e691a95SDavid Woodhouse     if (kvm_xen_has_cap(EVTCHN_SEND)) {
1215e691a95SDavid Woodhouse         struct kvm_xen_vcpu_attr va = {
1225e691a95SDavid Woodhouse             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
1235e691a95SDavid Woodhouse             .u.vcpu_id = cs->cpu_index,
1245e691a95SDavid Woodhouse         };
1255e691a95SDavid Woodhouse         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
1265e691a95SDavid Woodhouse         if (err) {
1275e691a95SDavid Woodhouse             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
1285e691a95SDavid Woodhouse                          strerror(-err));
1295e691a95SDavid Woodhouse             return err;
1305e691a95SDavid Woodhouse         }
1315e691a95SDavid Woodhouse     }
1325e691a95SDavid Woodhouse 
1335e691a95SDavid Woodhouse     return 0;
1345e691a95SDavid Woodhouse }
1355e691a95SDavid Woodhouse 
13661491cf4SDavid Woodhouse uint32_t kvm_xen_get_caps(void)
13761491cf4SDavid Woodhouse {
13861491cf4SDavid Woodhouse     return kvm_state->xen_caps;
13961491cf4SDavid Woodhouse }
14055a3f666SJoao Martins 
141bedcc139SJoao Martins static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
142bedcc139SJoao Martins                                      int cmd, uint64_t arg)
143bedcc139SJoao Martins {
144bedcc139SJoao Martins     int err = 0;
145bedcc139SJoao Martins 
146bedcc139SJoao Martins     switch (cmd) {
147bedcc139SJoao Martins     case XENVER_get_features: {
148bedcc139SJoao Martins         struct xen_feature_info fi;
149bedcc139SJoao Martins 
150bedcc139SJoao Martins         /* No need for 32/64 compat handling */
151bedcc139SJoao Martins         qemu_build_assert(sizeof(fi) == 8);
152bedcc139SJoao Martins 
153bedcc139SJoao Martins         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
154bedcc139SJoao Martins         if (err) {
155bedcc139SJoao Martins             break;
156bedcc139SJoao Martins         }
157bedcc139SJoao Martins 
158bedcc139SJoao Martins         fi.submap = 0;
159bedcc139SJoao Martins         if (fi.submap_idx == 0) {
160bedcc139SJoao Martins             fi.submap |= 1 << XENFEAT_writable_page_tables |
161bedcc139SJoao Martins                          1 << XENFEAT_writable_descriptor_tables |
162bedcc139SJoao Martins                          1 << XENFEAT_auto_translated_physmap |
163bedcc139SJoao Martins                          1 << XENFEAT_supervisor_mode_kernel;
164bedcc139SJoao Martins         }
165bedcc139SJoao Martins 
166bedcc139SJoao Martins         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
167bedcc139SJoao Martins         break;
168bedcc139SJoao Martins     }
169bedcc139SJoao Martins 
170bedcc139SJoao Martins     default:
171bedcc139SJoao Martins         return false;
172bedcc139SJoao Martins     }
173bedcc139SJoao Martins 
174bedcc139SJoao Martins     exit->u.hcall.result = err;
175bedcc139SJoao Martins     return true;
176bedcc139SJoao Martins }
177bedcc139SJoao Martins 
17879b7067dSJoao Martins int kvm_xen_soft_reset(void)
17979b7067dSJoao Martins {
18079b7067dSJoao Martins     assert(qemu_mutex_iothread_locked());
18179b7067dSJoao Martins 
18279b7067dSJoao Martins     trace_kvm_xen_soft_reset();
18379b7067dSJoao Martins 
18479b7067dSJoao Martins     /* Nothing to reset... yet. */
18579b7067dSJoao Martins     return 0;
18679b7067dSJoao Martins }
18779b7067dSJoao Martins 
18879b7067dSJoao Martins static int schedop_shutdown(CPUState *cs, uint64_t arg)
18979b7067dSJoao Martins {
19079b7067dSJoao Martins     struct sched_shutdown shutdown;
19179b7067dSJoao Martins     int ret = 0;
19279b7067dSJoao Martins 
19379b7067dSJoao Martins     /* No need for 32/64 compat handling */
19479b7067dSJoao Martins     qemu_build_assert(sizeof(shutdown) == 4);
19579b7067dSJoao Martins 
19679b7067dSJoao Martins     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
19779b7067dSJoao Martins         return -EFAULT;
19879b7067dSJoao Martins     }
19979b7067dSJoao Martins 
20079b7067dSJoao Martins     switch (shutdown.reason) {
20179b7067dSJoao Martins     case SHUTDOWN_crash:
20279b7067dSJoao Martins         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
20379b7067dSJoao Martins         qemu_system_guest_panicked(NULL);
20479b7067dSJoao Martins         break;
20579b7067dSJoao Martins 
20679b7067dSJoao Martins     case SHUTDOWN_reboot:
20779b7067dSJoao Martins         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
20879b7067dSJoao Martins         break;
20979b7067dSJoao Martins 
21079b7067dSJoao Martins     case SHUTDOWN_poweroff:
21179b7067dSJoao Martins         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
21279b7067dSJoao Martins         break;
21379b7067dSJoao Martins 
21479b7067dSJoao Martins     case SHUTDOWN_soft_reset:
21579b7067dSJoao Martins         qemu_mutex_lock_iothread();
21679b7067dSJoao Martins         ret = kvm_xen_soft_reset();
21779b7067dSJoao Martins         qemu_mutex_unlock_iothread();
21879b7067dSJoao Martins         break;
21979b7067dSJoao Martins 
22079b7067dSJoao Martins     default:
22179b7067dSJoao Martins         ret = -EINVAL;
22279b7067dSJoao Martins         break;
22379b7067dSJoao Martins     }
22479b7067dSJoao Martins 
22579b7067dSJoao Martins     return ret;
22679b7067dSJoao Martins }
22779b7067dSJoao Martins 
22879b7067dSJoao Martins static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
22979b7067dSJoao Martins                                    int cmd, uint64_t arg)
23079b7067dSJoao Martins {
23179b7067dSJoao Martins     CPUState *cs = CPU(cpu);
23279b7067dSJoao Martins     int err = -ENOSYS;
23379b7067dSJoao Martins 
23479b7067dSJoao Martins     switch (cmd) {
23579b7067dSJoao Martins     case SCHEDOP_shutdown:
23679b7067dSJoao Martins         err = schedop_shutdown(cs, arg);
23779b7067dSJoao Martins         break;
23879b7067dSJoao Martins 
239c789b9efSDavid Woodhouse     case SCHEDOP_poll:
240c789b9efSDavid Woodhouse         /*
241c789b9efSDavid Woodhouse          * Linux will panic if this doesn't work. Just yield; it's not
242c789b9efSDavid Woodhouse          * worth overthinking it because with event channel handling
243c789b9efSDavid Woodhouse          * in KVM, the kernel will intercept this and it will never
244c789b9efSDavid Woodhouse          * reach QEMU anyway. The semantics of the hypercall explicltly
245c789b9efSDavid Woodhouse          * permit spurious wakeups.
246c789b9efSDavid Woodhouse          */
247c789b9efSDavid Woodhouse     case SCHEDOP_yield:
248c789b9efSDavid Woodhouse         sched_yield();
249c789b9efSDavid Woodhouse         err = 0;
250c789b9efSDavid Woodhouse         break;
251c789b9efSDavid Woodhouse 
25279b7067dSJoao Martins     default:
25379b7067dSJoao Martins         return false;
25479b7067dSJoao Martins     }
25579b7067dSJoao Martins 
25679b7067dSJoao Martins     exit->u.hcall.result = err;
25779b7067dSJoao Martins     return true;
25879b7067dSJoao Martins }
25979b7067dSJoao Martins 
26055a3f666SJoao Martins static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
26155a3f666SJoao Martins {
26255a3f666SJoao Martins     uint16_t code = exit->u.hcall.input;
26355a3f666SJoao Martins 
26455a3f666SJoao Martins     if (exit->u.hcall.cpl > 0) {
26555a3f666SJoao Martins         exit->u.hcall.result = -EPERM;
26655a3f666SJoao Martins         return true;
26755a3f666SJoao Martins     }
26855a3f666SJoao Martins 
26955a3f666SJoao Martins     switch (code) {
27079b7067dSJoao Martins     case __HYPERVISOR_sched_op:
27179b7067dSJoao Martins         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
27279b7067dSJoao Martins                                       exit->u.hcall.params[1]);
273bedcc139SJoao Martins     case __HYPERVISOR_xen_version:
274bedcc139SJoao Martins         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
275bedcc139SJoao Martins                                          exit->u.hcall.params[1]);
27655a3f666SJoao Martins     default:
27755a3f666SJoao Martins         return false;
27855a3f666SJoao Martins     }
27955a3f666SJoao Martins }
28055a3f666SJoao Martins 
28155a3f666SJoao Martins int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
28255a3f666SJoao Martins {
28355a3f666SJoao Martins     if (exit->type != KVM_EXIT_XEN_HCALL) {
28455a3f666SJoao Martins         return -1;
28555a3f666SJoao Martins     }
28655a3f666SJoao Martins 
287*110a0ea5SDavid Woodhouse     /*
288*110a0ea5SDavid Woodhouse      * The kernel latches the guest 32/64 mode when the MSR is used to fill
289*110a0ea5SDavid Woodhouse      * the hypercall page. So if we see a hypercall in a mode that doesn't
290*110a0ea5SDavid Woodhouse      * match our own idea of the guest mode, fetch the kernel's idea of the
291*110a0ea5SDavid Woodhouse      * "long mode" to remain in sync.
292*110a0ea5SDavid Woodhouse      */
293*110a0ea5SDavid Woodhouse     if (exit->u.hcall.longmode != xen_is_long_mode()) {
294*110a0ea5SDavid Woodhouse         xen_sync_long_mode();
295*110a0ea5SDavid Woodhouse     }
296*110a0ea5SDavid Woodhouse 
29755a3f666SJoao Martins     if (!do_kvm_xen_handle_exit(cpu, exit)) {
29855a3f666SJoao Martins         /*
29955a3f666SJoao Martins          * Some hypercalls will be deliberately "implemented" by returning
30055a3f666SJoao Martins          * -ENOSYS. This case is for hypercalls which are unexpected.
30155a3f666SJoao Martins          */
30255a3f666SJoao Martins         exit->u.hcall.result = -ENOSYS;
30355a3f666SJoao Martins         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
30455a3f666SJoao Martins                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
30555a3f666SJoao Martins                       (uint64_t)exit->u.hcall.input,
30655a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[0],
30755a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[1],
30855a3f666SJoao Martins                       (uint64_t)exit->u.hcall.params[2]);
30955a3f666SJoao Martins     }
31055a3f666SJoao Martins 
31155a3f666SJoao Martins     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
31255a3f666SJoao Martins                             exit->u.hcall.input, exit->u.hcall.params[0],
31355a3f666SJoao Martins                             exit->u.hcall.params[1], exit->u.hcall.params[2],
31455a3f666SJoao Martins                             exit->u.hcall.result);
31555a3f666SJoao Martins     return 0;
31655a3f666SJoao Martins }
317