xref: /qemu/target/i386/kvm/xen-emu.c (revision fc524567087c2537b5103cdfc1d41e4f442892b6)
1 /*
2  * Xen HVM emulation support in KVM
3  *
4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/log.h"
14 #include "qemu/main-loop.h"
15 #include "qemu/error-report.h"
16 #include "exec/target_page.h"
17 #include "hw/xen/xen.h"
18 #include "system/kvm_int.h"
19 #include "system/kvm_xen.h"
20 #include "kvm/kvm_i386.h"
21 #include "system/address-spaces.h"
22 #include "xen-emu.h"
23 #include "trace.h"
24 #include "system/runstate.h"
25 
26 #include "hw/pci/msi.h"
27 #include "hw/i386/apic-msidef.h"
28 #include "hw/i386/e820_memory_layout.h"
29 #include "hw/i386/kvm/xen_overlay.h"
30 #include "hw/i386/kvm/xen_evtchn.h"
31 #include "hw/i386/kvm/xen_gnttab.h"
32 #include "hw/i386/kvm/xen_primary_console.h"
33 #include "hw/i386/kvm/xen_xenstore.h"
34 
35 #include "hw/xen/interface/version.h"
36 #include "hw/xen/interface/sched.h"
37 #include "hw/xen/interface/memory.h"
38 #include "hw/xen/interface/hvm/hvm_op.h"
39 #include "hw/xen/interface/hvm/params.h"
40 #include "hw/xen/interface/vcpu.h"
41 #include "hw/xen/interface/event_channel.h"
42 #include "hw/xen/interface/grant_table.h"
43 
44 #include "xen-compat.h"
45 
46 static void xen_vcpu_singleshot_timer_event(void *opaque);
47 static void xen_vcpu_periodic_timer_event(void *opaque);
48 static int vcpuop_stop_singleshot_timer(CPUState *cs);
49 
50 #ifdef TARGET_X86_64
51 #define hypercall_compat32(longmode) (!(longmode))
52 #else
53 #define hypercall_compat32(longmode) (false)
54 #endif
55 
kvm_gva_to_gpa(CPUState * cs,uint64_t gva,uint64_t * gpa,size_t * len,bool is_write)56 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
57                            size_t *len, bool is_write)
58 {
59         struct kvm_translation tr = {
60             .linear_address = gva,
61         };
62 
63         if (len) {
64             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
65         }
66 
67         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
68             (is_write && !tr.writeable)) {
69             return false;
70         }
71         *gpa = tr.physical_address;
72         return true;
73 }
74 
kvm_gva_rw(CPUState * cs,uint64_t gva,void * _buf,size_t sz,bool is_write)75 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
76                       bool is_write)
77 {
78     uint8_t *buf = (uint8_t *)_buf;
79     uint64_t gpa;
80     size_t len;
81 
82     while (sz) {
83         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
84             return -EFAULT;
85         }
86         if (len > sz) {
87             len = sz;
88         }
89 
90         cpu_physical_memory_rw(gpa, buf, len, is_write);
91 
92         buf += len;
93         sz -= len;
94         gva += len;
95     }
96 
97     return 0;
98 }
99 
kvm_copy_from_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)100 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
101                                     size_t sz)
102 {
103     return kvm_gva_rw(cs, gva, buf, sz, false);
104 }
105 
kvm_copy_to_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)106 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
107                                   size_t sz)
108 {
109     return kvm_gva_rw(cs, gva, buf, sz, true);
110 }
111 
kvm_xen_init(KVMState * s,uint32_t hypercall_msr)112 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
113 {
114     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
115         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
116     struct kvm_xen_hvm_config cfg = {
117         .msr = hypercall_msr,
118         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
119     };
120     int xen_caps, ret;
121 
122     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
123     if (required_caps & ~xen_caps) {
124         error_report("kvm: Xen HVM guest support not present or insufficient");
125         return -ENOSYS;
126     }
127 
128     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
129         struct kvm_xen_hvm_attr ha = {
130             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
131             .u.xen_version = s->xen_version,
132         };
133         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
134 
135         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
136     }
137 
138     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
139     if (ret < 0) {
140         error_report("kvm: Failed to enable Xen HVM support: %s",
141                      strerror(-ret));
142         return ret;
143     }
144 
145     /* If called a second time, don't repeat the rest of the setup. */
146     if (s->xen_caps) {
147         return 0;
148     }
149 
150     /*
151      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
152      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
153      *
154      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
155      * such things to be polled at precisely the right time. We *could* do
156      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
157      * the moment the IRQ is acked, and see if it should be reasserted.
158      *
159      * But the in-kernel irqchip is deprecated, so we're unlikely to add
160      * that support in the kernel. Insist on using the split irqchip mode
161      * instead.
162      *
163      * This leaves us polling for the level going low in QEMU, which lacks
164      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
165      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
166      * the device (for which it has to unmap the device and trap access, for
167      * some period after an IRQ!!). In the Xen case, we do it on exit from
168      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
169      * Which is kind of icky, but less so than the VFIO one. I may fix them
170      * both later...
171      */
172     if (!kvm_kernel_irqchip_split()) {
173         error_report("kvm: Xen support requires kernel-irqchip=split");
174         return -EINVAL;
175     }
176 
177     s->xen_caps = xen_caps;
178 
179     /* Tell fw_cfg to notify the BIOS to reserve the range. */
180     e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED);
181 
182     /* The pages couldn't be overlaid until KVM was initialized */
183     xen_primary_console_reset();
184     xen_xenstore_reset();
185 
186     return 0;
187 }
188 
kvm_xen_init_vcpu(CPUState * cs)189 int kvm_xen_init_vcpu(CPUState *cs)
190 {
191     X86CPU *cpu = X86_CPU(cs);
192     CPUX86State *env = &cpu->env;
193     int err;
194 
195     /*
196      * The kernel needs to know the Xen/ACPI vCPU ID because that's
197      * what the guest uses in hypercalls such as timers. It doesn't
198      * match the APIC ID which is generally used for talking to the
199      * kernel about vCPUs. And if vCPU threads race with creating
200      * their KVM vCPUs out of order, it doesn't necessarily match
201      * with the kernel's internal vCPU indices either.
202      */
203     if (kvm_xen_has_cap(EVTCHN_SEND)) {
204         struct kvm_xen_vcpu_attr va = {
205             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
206             .u.vcpu_id = cs->cpu_index,
207         };
208         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
209         if (err) {
210             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
211                          strerror(-err));
212             return err;
213         }
214     }
215 
216     env->xen_vcpu_info_gpa = INVALID_GPA;
217     env->xen_vcpu_info_default_gpa = INVALID_GPA;
218     env->xen_vcpu_time_info_gpa = INVALID_GPA;
219     env->xen_vcpu_runstate_gpa = INVALID_GPA;
220 
221     qemu_mutex_init(&env->xen_timers_lock);
222     env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
223                                              xen_vcpu_singleshot_timer_event,
224                                              cpu);
225     if (!env->xen_singleshot_timer) {
226         return -ENOMEM;
227     }
228     env->xen_singleshot_timer->opaque = cs;
229 
230     env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
231                                            xen_vcpu_periodic_timer_event,
232                                            cpu);
233     if (!env->xen_periodic_timer) {
234         return -ENOMEM;
235     }
236     env->xen_periodic_timer->opaque = cs;
237 
238     return 0;
239 }
240 
kvm_xen_get_caps(void)241 uint32_t kvm_xen_get_caps(void)
242 {
243     return kvm_state->xen_caps;
244 }
245 
kvm_xen_hcall_xen_version(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)246 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
247                                      int cmd, uint64_t arg)
248 {
249     int err = 0;
250 
251     switch (cmd) {
252     case XENVER_get_features: {
253         struct xen_feature_info fi;
254 
255         /* No need for 32/64 compat handling */
256         qemu_build_assert(sizeof(fi) == 8);
257 
258         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
259         if (err) {
260             break;
261         }
262 
263         fi.submap = 0;
264         if (fi.submap_idx == 0) {
265             fi.submap |= 1 << XENFEAT_writable_page_tables |
266                          1 << XENFEAT_writable_descriptor_tables |
267                          1 << XENFEAT_auto_translated_physmap |
268                          1 << XENFEAT_hvm_callback_vector |
269                          1 << XENFEAT_hvm_safe_pvclock |
270                          1 << XENFEAT_hvm_pirqs;
271         }
272 
273         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
274         break;
275     }
276 
277     default:
278         return false;
279     }
280 
281     exit->u.hcall.result = err;
282     return true;
283 }
284 
kvm_xen_set_vcpu_attr(CPUState * cs,uint16_t type,uint64_t gpa)285 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
286 {
287     struct kvm_xen_vcpu_attr xhsi;
288 
289     xhsi.type = type;
290     xhsi.u.gpa = gpa;
291 
292     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
293 
294     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
295 }
296 
kvm_xen_set_vcpu_callback_vector(CPUState * cs)297 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
298 {
299     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
300     struct kvm_xen_vcpu_attr xva;
301 
302     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
303     xva.u.vector = vector;
304 
305     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
306 
307     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
308 }
309 
do_set_vcpu_callback_vector(CPUState * cs,run_on_cpu_data data)310 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
311 {
312     X86CPU *cpu = X86_CPU(cs);
313     CPUX86State *env = &cpu->env;
314 
315     env->xen_vcpu_callback_vector = data.host_int;
316 
317     if (kvm_xen_has_cap(EVTCHN_SEND)) {
318         kvm_xen_set_vcpu_callback_vector(cs);
319     }
320 }
321 
set_vcpu_info(CPUState * cs,uint64_t gpa)322 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
323 {
324     X86CPU *cpu = X86_CPU(cs);
325     CPUX86State *env = &cpu->env;
326     MemoryRegionSection mrs = { .mr = NULL };
327     void *vcpu_info_hva = NULL;
328     int ret;
329 
330     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
331     if (ret || gpa == INVALID_GPA) {
332         goto out;
333     }
334 
335     mrs = memory_region_find(get_system_memory(), gpa,
336                              sizeof(struct vcpu_info));
337     if (mrs.mr && mrs.mr->ram_block &&
338         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
339         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
340                                          mrs.offset_within_region);
341     }
342     if (!vcpu_info_hva) {
343         if (mrs.mr) {
344             memory_region_unref(mrs.mr);
345             mrs.mr = NULL;
346         }
347         ret = -EINVAL;
348     }
349 
350  out:
351     if (env->xen_vcpu_info_mr) {
352         memory_region_unref(env->xen_vcpu_info_mr);
353     }
354     env->xen_vcpu_info_hva = vcpu_info_hva;
355     env->xen_vcpu_info_mr = mrs.mr;
356     return ret;
357 }
358 
do_set_vcpu_info_default_gpa(CPUState * cs,run_on_cpu_data data)359 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
360 {
361     X86CPU *cpu = X86_CPU(cs);
362     CPUX86State *env = &cpu->env;
363 
364     env->xen_vcpu_info_default_gpa = data.host_ulong;
365 
366     /* Changing the default does nothing if a vcpu_info was explicitly set. */
367     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
368         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
369     }
370 }
371 
do_set_vcpu_info_gpa(CPUState * cs,run_on_cpu_data data)372 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
373 {
374     X86CPU *cpu = X86_CPU(cs);
375     CPUX86State *env = &cpu->env;
376 
377     env->xen_vcpu_info_gpa = data.host_ulong;
378 
379     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
380 }
381 
kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)382 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
383 {
384     CPUState *cs = qemu_get_cpu(vcpu_id);
385     if (!cs) {
386         return NULL;
387     }
388 
389     return X86_CPU(cs)->env.xen_vcpu_info_hva;
390 }
391 
kvm_xen_maybe_deassert_callback(CPUState * cs)392 void kvm_xen_maybe_deassert_callback(CPUState *cs)
393 {
394     CPUX86State *env = &X86_CPU(cs)->env;
395     struct vcpu_info *vi = env->xen_vcpu_info_hva;
396     if (!vi) {
397         return;
398     }
399 
400     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
401     if (!vi->evtchn_upcall_pending) {
402         bql_lock();
403         /*
404          * Check again now we have the lock, because it may have been
405          * asserted in the interim. And we don't want to take the lock
406          * every time because this is a fast path.
407          */
408         if (!vi->evtchn_upcall_pending) {
409             X86_CPU(cs)->env.xen_callback_asserted = false;
410             xen_evtchn_set_callback_level(0);
411         }
412         bql_unlock();
413     }
414 }
415 
kvm_xen_set_callback_asserted(void)416 void kvm_xen_set_callback_asserted(void)
417 {
418     CPUState *cs = qemu_get_cpu(0);
419 
420     if (cs) {
421         X86_CPU(cs)->env.xen_callback_asserted = true;
422     }
423 }
424 
kvm_xen_has_vcpu_callback_vector(void)425 bool kvm_xen_has_vcpu_callback_vector(void)
426 {
427     CPUState *cs = qemu_get_cpu(0);
428 
429     return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
430 }
431 
kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id,int type)432 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
433 {
434     CPUState *cs = qemu_get_cpu(vcpu_id);
435     uint8_t vector;
436 
437     if (!cs) {
438         return;
439     }
440 
441     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
442     if (vector) {
443         /*
444          * The per-vCPU callback vector injected via lapic. Just
445          * deliver it as an MSI.
446          */
447         MSIMessage msg = {
448             .address = APIC_DEFAULT_ADDRESS |
449                        (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
450             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
451         };
452         kvm_irqchip_send_msi(kvm_state, msg);
453         return;
454     }
455 
456     switch (type) {
457     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
458         /*
459          * If the evtchn_upcall_pending field in the vcpu_info is set, then
460          * KVM will automatically deliver the vector on entering the vCPU
461          * so all we have to do is kick it out.
462          */
463         qemu_cpu_kick(cs);
464         break;
465 
466     case HVM_PARAM_CALLBACK_TYPE_GSI:
467     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
468         if (vcpu_id == 0) {
469             xen_evtchn_set_callback_level(1);
470         }
471         break;
472     }
473 }
474 
475 /* Must always be called with xen_timers_lock held */
kvm_xen_set_vcpu_timer(CPUState * cs)476 static int kvm_xen_set_vcpu_timer(CPUState *cs)
477 {
478     X86CPU *cpu = X86_CPU(cs);
479     CPUX86State *env = &cpu->env;
480 
481     struct kvm_xen_vcpu_attr va = {
482         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
483         .u.timer.port = env->xen_virq[VIRQ_TIMER],
484         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
485         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
486     };
487 
488     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
489 }
490 
do_set_vcpu_timer_virq(CPUState * cs,run_on_cpu_data data)491 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
492 {
493     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
494     kvm_xen_set_vcpu_timer(cs);
495 }
496 
kvm_xen_set_vcpu_virq(uint32_t vcpu_id,uint16_t virq,uint16_t port)497 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
498 {
499     CPUState *cs = qemu_get_cpu(vcpu_id);
500 
501     if (!cs) {
502         return -ENOENT;
503     }
504 
505     /* cpu.h doesn't include the actual Xen header. */
506     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
507 
508     if (virq >= NR_VIRQS) {
509         return -EINVAL;
510     }
511 
512     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
513         return -EEXIST;
514     }
515 
516     X86_CPU(cs)->env.xen_virq[virq] = port;
517     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
518         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
519                          RUN_ON_CPU_HOST_INT(port));
520     }
521     return 0;
522 }
523 
do_set_vcpu_time_info_gpa(CPUState * cs,run_on_cpu_data data)524 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
525 {
526     X86CPU *cpu = X86_CPU(cs);
527     CPUX86State *env = &cpu->env;
528 
529     env->xen_vcpu_time_info_gpa = data.host_ulong;
530 
531     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
532                           env->xen_vcpu_time_info_gpa);
533 }
534 
do_set_vcpu_runstate_gpa(CPUState * cs,run_on_cpu_data data)535 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
536 {
537     X86CPU *cpu = X86_CPU(cs);
538     CPUX86State *env = &cpu->env;
539 
540     env->xen_vcpu_runstate_gpa = data.host_ulong;
541 
542     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
543                           env->xen_vcpu_runstate_gpa);
544 }
545 
do_vcpu_soft_reset(CPUState * cs,run_on_cpu_data data)546 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
547 {
548     X86CPU *cpu = X86_CPU(cs);
549     CPUX86State *env = &cpu->env;
550 
551     env->xen_vcpu_info_gpa = INVALID_GPA;
552     env->xen_vcpu_info_default_gpa = INVALID_GPA;
553     env->xen_vcpu_time_info_gpa = INVALID_GPA;
554     env->xen_vcpu_runstate_gpa = INVALID_GPA;
555     env->xen_vcpu_callback_vector = 0;
556     memset(env->xen_virq, 0, sizeof(env->xen_virq));
557 
558     set_vcpu_info(cs, INVALID_GPA);
559     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
560                           INVALID_GPA);
561     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
562                           INVALID_GPA);
563     if (kvm_xen_has_cap(EVTCHN_SEND)) {
564         kvm_xen_set_vcpu_callback_vector(cs);
565 
566         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
567         env->xen_singleshot_timer_ns = 0;
568         kvm_xen_set_vcpu_timer(cs);
569     } else {
570         vcpuop_stop_singleshot_timer(cs);
571     };
572 
573 }
574 
xen_set_shared_info(uint64_t gfn)575 static int xen_set_shared_info(uint64_t gfn)
576 {
577     uint64_t gpa = gfn << TARGET_PAGE_BITS;
578     int i, err;
579 
580     BQL_LOCK_GUARD();
581 
582     /*
583      * The xen_overlay device tells KVM about it too, since it had to
584      * do that on migration load anyway (unless we're going to jump
585      * through lots of hoops to maintain the fiction that this isn't
586      * KVM-specific.
587      */
588     err = xen_overlay_map_shinfo_page(gpa);
589     if (err) {
590             return err;
591     }
592 
593     trace_kvm_xen_set_shared_info(gfn);
594 
595     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
596         CPUState *cpu = qemu_get_cpu(i);
597         if (cpu) {
598             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
599                              RUN_ON_CPU_HOST_ULONG(gpa));
600         }
601         gpa += sizeof(vcpu_info_t);
602     }
603 
604     return err;
605 }
606 
add_to_physmap_one(uint32_t space,uint64_t idx,uint64_t gfn)607 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
608 {
609     switch (space) {
610     case XENMAPSPACE_shared_info:
611         if (idx > 0) {
612             return -EINVAL;
613         }
614         return xen_set_shared_info(gfn);
615 
616     case XENMAPSPACE_grant_table:
617         return xen_gnttab_map_page(idx, gfn);
618 
619     case XENMAPSPACE_gmfn:
620     case XENMAPSPACE_gmfn_range:
621         return -ENOTSUP;
622 
623     case XENMAPSPACE_gmfn_foreign:
624     case XENMAPSPACE_dev_mmio:
625         return -EPERM;
626 
627     default:
628         return -EINVAL;
629     }
630 }
631 
do_add_to_physmap(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)632 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
633                              uint64_t arg)
634 {
635     struct xen_add_to_physmap xatp;
636     CPUState *cs = CPU(cpu);
637 
638     if (hypercall_compat32(exit->u.hcall.longmode)) {
639         struct compat_xen_add_to_physmap xatp32;
640 
641         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
642         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
643             return -EFAULT;
644         }
645         xatp.domid = xatp32.domid;
646         xatp.size = xatp32.size;
647         xatp.space = xatp32.space;
648         xatp.idx = xatp32.idx;
649         xatp.gpfn = xatp32.gpfn;
650     } else {
651         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
652             return -EFAULT;
653         }
654     }
655 
656     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
657         return -ESRCH;
658     }
659 
660     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
661 }
662 
do_add_to_physmap_batch(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)663 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
664                                    uint64_t arg)
665 {
666     struct xen_add_to_physmap_batch xatpb;
667     unsigned long idxs_gva, gpfns_gva, errs_gva;
668     CPUState *cs = CPU(cpu);
669     size_t op_sz;
670 
671     if (hypercall_compat32(exit->u.hcall.longmode)) {
672         struct compat_xen_add_to_physmap_batch xatpb32;
673 
674         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
675         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
676             return -EFAULT;
677         }
678         xatpb.domid = xatpb32.domid;
679         xatpb.space = xatpb32.space;
680         xatpb.size = xatpb32.size;
681 
682         idxs_gva = xatpb32.idxs.c;
683         gpfns_gva = xatpb32.gpfns.c;
684         errs_gva = xatpb32.errs.c;
685         op_sz = sizeof(uint32_t);
686     } else {
687         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
688             return -EFAULT;
689         }
690         op_sz = sizeof(unsigned long);
691         idxs_gva = (unsigned long)xatpb.idxs.p;
692         gpfns_gva = (unsigned long)xatpb.gpfns.p;
693         errs_gva = (unsigned long)xatpb.errs.p;
694     }
695 
696     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
697         return -ESRCH;
698     }
699 
700     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
701     if (xatpb.space == XENMAPSPACE_gmfn_range) {
702         return -EINVAL;
703     }
704 
705     while (xatpb.size--) {
706         unsigned long idx = 0;
707         unsigned long gpfn = 0;
708         int err;
709 
710         /* For 32-bit compat this only copies the low 32 bits of each */
711         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
712             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
713             return -EFAULT;
714         }
715         idxs_gva += op_sz;
716         gpfns_gva += op_sz;
717 
718         err = add_to_physmap_one(xatpb.space, idx, gpfn);
719 
720         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
721             return -EFAULT;
722         }
723         errs_gva += sizeof(err);
724     }
725     return 0;
726 }
727 
kvm_xen_hcall_memory_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)728 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
729                                    int cmd, uint64_t arg)
730 {
731     int err;
732 
733     switch (cmd) {
734     case XENMEM_add_to_physmap:
735         err = do_add_to_physmap(exit, cpu, arg);
736         break;
737 
738     case XENMEM_add_to_physmap_batch:
739         err = do_add_to_physmap_batch(exit, cpu, arg);
740         break;
741 
742     default:
743         return false;
744     }
745 
746     exit->u.hcall.result = err;
747     return true;
748 }
749 
handle_set_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)750 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
751                              uint64_t arg)
752 {
753     CPUState *cs = CPU(cpu);
754     struct xen_hvm_param hp;
755     int err = 0;
756 
757     /* No need for 32/64 compat handling */
758     qemu_build_assert(sizeof(hp) == 16);
759 
760     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
761         err = -EFAULT;
762         goto out;
763     }
764 
765     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
766         err = -ESRCH;
767         goto out;
768     }
769 
770     switch (hp.index) {
771     case HVM_PARAM_CALLBACK_IRQ:
772         bql_lock();
773         err = xen_evtchn_set_callback_param(hp.value);
774         bql_unlock();
775         xen_set_long_mode(exit->u.hcall.longmode);
776         break;
777     default:
778         return false;
779     }
780 
781 out:
782     exit->u.hcall.result = err;
783     return true;
784 }
785 
handle_get_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)786 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
787                              uint64_t arg)
788 {
789     CPUState *cs = CPU(cpu);
790     struct xen_hvm_param hp;
791     int err = 0;
792 
793     /* No need for 32/64 compat handling */
794     qemu_build_assert(sizeof(hp) == 16);
795 
796     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
797         err = -EFAULT;
798         goto out;
799     }
800 
801     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
802         err = -ESRCH;
803         goto out;
804     }
805 
806     switch (hp.index) {
807     case HVM_PARAM_STORE_PFN:
808         hp.value = XEN_SPECIAL_PFN(XENSTORE);
809         break;
810     case HVM_PARAM_STORE_EVTCHN:
811         hp.value = xen_xenstore_get_port();
812         break;
813     case HVM_PARAM_CONSOLE_PFN:
814         hp.value = xen_primary_console_get_pfn();
815         if (!hp.value) {
816             err = -EINVAL;
817         }
818         break;
819     case HVM_PARAM_CONSOLE_EVTCHN:
820         hp.value = xen_primary_console_get_port();
821         if (!hp.value) {
822             err = -EINVAL;
823         }
824         break;
825     default:
826         return false;
827     }
828 
829     if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
830         err = -EFAULT;
831     }
832 out:
833     exit->u.hcall.result = err;
834     return true;
835 }
836 
kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)837 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
838                                               X86CPU *cpu, uint64_t arg)
839 {
840     struct xen_hvm_evtchn_upcall_vector up;
841     CPUState *target_cs;
842 
843     /* No need for 32/64 compat handling */
844     qemu_build_assert(sizeof(up) == 8);
845 
846     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
847         return -EFAULT;
848     }
849 
850     if (up.vector < 0x10) {
851         return -EINVAL;
852     }
853 
854     target_cs = qemu_get_cpu(up.vcpu);
855     if (!target_cs) {
856         return -EINVAL;
857     }
858 
859     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
860                      RUN_ON_CPU_HOST_INT(up.vector));
861     return 0;
862 }
863 
kvm_xen_hcall_hvm_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)864 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
865                                  int cmd, uint64_t arg)
866 {
867     int ret = -ENOSYS;
868     switch (cmd) {
869     case HVMOP_set_evtchn_upcall_vector:
870         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
871         break;
872 
873     case HVMOP_pagetable_dying:
874         ret = -ENOSYS;
875         break;
876 
877     case HVMOP_set_param:
878         return handle_set_param(exit, cpu, arg);
879 
880     case HVMOP_get_param:
881         return handle_get_param(exit, cpu, arg);
882 
883     default:
884         return false;
885     }
886 
887     exit->u.hcall.result = ret;
888     return true;
889 }
890 
vcpuop_register_vcpu_info(CPUState * cs,CPUState * target,uint64_t arg)891 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
892                                      uint64_t arg)
893 {
894     struct vcpu_register_vcpu_info rvi;
895     uint64_t gpa;
896 
897     /* No need for 32/64 compat handling */
898     qemu_build_assert(sizeof(rvi) == 16);
899     qemu_build_assert(sizeof(struct vcpu_info) == 64);
900 
901     if (!target) {
902         return -ENOENT;
903     }
904 
905     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
906         return -EFAULT;
907     }
908 
909     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
910         return -EINVAL;
911     }
912 
913     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
914     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
915     return 0;
916 }
917 
vcpuop_register_vcpu_time_info(CPUState * cs,CPUState * target,uint64_t arg)918 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
919                                           uint64_t arg)
920 {
921     struct vcpu_register_time_memory_area tma;
922     uint64_t gpa;
923     size_t len;
924 
925     /* No need for 32/64 compat handling */
926     qemu_build_assert(sizeof(tma) == 8);
927     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
928 
929     if (!target) {
930         return -ENOENT;
931     }
932 
933     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
934         return -EFAULT;
935     }
936 
937     /*
938      * Xen actually uses the GVA and does the translation through the guest
939      * page tables each time. But Linux/KVM uses the GPA, on the assumption
940      * that guests only ever use *global* addresses (kernel virtual addresses)
941      * for it. If Linux is changed to redo the GVA→GPA translation each time,
942      * it will offer a new vCPU attribute for that, and we'll use it instead.
943      */
944     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
945         len < sizeof(struct vcpu_time_info)) {
946         return -EFAULT;
947     }
948 
949     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
950                      RUN_ON_CPU_HOST_ULONG(gpa));
951     return 0;
952 }
953 
vcpuop_register_runstate_info(CPUState * cs,CPUState * target,uint64_t arg)954 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
955                                          uint64_t arg)
956 {
957     struct vcpu_register_runstate_memory_area rma;
958     uint64_t gpa;
959     size_t len;
960 
961     /* No need for 32/64 compat handling */
962     qemu_build_assert(sizeof(rma) == 8);
963     /* The runstate area actually does change size, but Linux copes. */
964 
965     if (!target) {
966         return -ENOENT;
967     }
968 
969     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
970         return -EFAULT;
971     }
972 
973     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
974     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
975         return -EFAULT;
976     }
977 
978     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
979                      RUN_ON_CPU_HOST_ULONG(gpa));
980     return 0;
981 }
982 
kvm_get_current_ns(void)983 static uint64_t kvm_get_current_ns(void)
984 {
985     struct kvm_clock_data data;
986     int ret;
987 
988     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
989     if (ret < 0) {
990         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
991                 abort();
992     }
993 
994     return data.clock;
995 }
996 
xen_vcpu_singleshot_timer_event(void * opaque)997 static void xen_vcpu_singleshot_timer_event(void *opaque)
998 {
999     CPUState *cpu = opaque;
1000     CPUX86State *env = &X86_CPU(cpu)->env;
1001     uint16_t port = env->xen_virq[VIRQ_TIMER];
1002 
1003     if (likely(port)) {
1004         xen_evtchn_set_port(port);
1005     }
1006 
1007     qemu_mutex_lock(&env->xen_timers_lock);
1008     env->xen_singleshot_timer_ns = 0;
1009     qemu_mutex_unlock(&env->xen_timers_lock);
1010 }
1011 
xen_vcpu_periodic_timer_event(void * opaque)1012 static void xen_vcpu_periodic_timer_event(void *opaque)
1013 {
1014     CPUState *cpu = opaque;
1015     CPUX86State *env = &X86_CPU(cpu)->env;
1016     uint16_t port = env->xen_virq[VIRQ_TIMER];
1017     int64_t qemu_now;
1018 
1019     if (likely(port)) {
1020         xen_evtchn_set_port(port);
1021     }
1022 
1023     qemu_mutex_lock(&env->xen_timers_lock);
1024 
1025     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1026     timer_mod_ns(env->xen_periodic_timer,
1027                  qemu_now + env->xen_periodic_timer_period);
1028 
1029     qemu_mutex_unlock(&env->xen_timers_lock);
1030 }
1031 
do_set_periodic_timer(CPUState * target,uint64_t period_ns)1032 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1033 {
1034     CPUX86State *tenv = &X86_CPU(target)->env;
1035     int64_t qemu_now;
1036 
1037     timer_del(tenv->xen_periodic_timer);
1038 
1039     qemu_mutex_lock(&tenv->xen_timers_lock);
1040 
1041     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1042     timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1043     tenv->xen_periodic_timer_period = period_ns;
1044 
1045     qemu_mutex_unlock(&tenv->xen_timers_lock);
1046     return 0;
1047 }
1048 
1049 #define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
1050 #define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
1051 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1052 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
1053 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1054 
vcpuop_set_periodic_timer(CPUState * cs,CPUState * target,uint64_t arg)1055 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1056                                      uint64_t arg)
1057 {
1058     struct vcpu_set_periodic_timer spt;
1059 
1060     qemu_build_assert(sizeof(spt) == 8);
1061     if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1062         return -EFAULT;
1063     }
1064 
1065     if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1066         return -EINVAL;
1067     }
1068 
1069     return do_set_periodic_timer(target, spt.period_ns);
1070 }
1071 
vcpuop_stop_periodic_timer(CPUState * target)1072 static int vcpuop_stop_periodic_timer(CPUState *target)
1073 {
1074     CPUX86State *tenv = &X86_CPU(target)->env;
1075 
1076     qemu_mutex_lock(&tenv->xen_timers_lock);
1077 
1078     timer_del(tenv->xen_periodic_timer);
1079     tenv->xen_periodic_timer_period = 0;
1080 
1081     qemu_mutex_unlock(&tenv->xen_timers_lock);
1082     return 0;
1083 }
1084 
1085 /*
1086  * Userspace handling of timer, for older kernels.
1087  * Must always be called with xen_timers_lock held.
1088  */
do_set_singleshot_timer(CPUState * cs,uint64_t timeout_abs,bool linux_wa)1089 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1090                                    bool linux_wa)
1091 {
1092     CPUX86State *env = &X86_CPU(cs)->env;
1093     int64_t now = kvm_get_current_ns();
1094     int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1095     int64_t delta = timeout_abs - now;
1096 
1097     if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1098                              (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1099         /*
1100          * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1101          * for negative absolute timeout values (caused by integer
1102          * overflow), and for values about 13 days in the future (2^50ns)
1103          * which would be caused by jiffies overflow. For those cases, it
1104          * sets the timeout 100ms in the future (not *too* soon, since if
1105          * a guest really did set a long timeout on purpose we don't want
1106          * to keep churning CPU time by waking it up).
1107          */
1108         delta = (100 * SCALE_MS);
1109         timeout_abs = now + delta;
1110     }
1111 
1112     timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1113     env->xen_singleshot_timer_ns = now + delta;
1114     return 0;
1115 }
1116 
vcpuop_set_singleshot_timer(CPUState * cs,uint64_t arg)1117 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1118 {
1119     struct vcpu_set_singleshot_timer sst = { 0 };
1120 
1121     /*
1122      * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1123      * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1124      * that get used are identical, and there's four bytes of padding
1125      * unused at the end. For true Xen compatibility we should attempt
1126      * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1127      * if we can't get the padding too. But that's daft. Just copy what
1128      * we need.
1129      */
1130     qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1131     qemu_build_assert(sizeof(sst) >= 12);
1132 
1133     if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1134         return -EFAULT;
1135     }
1136 
1137     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1138 
1139     /*
1140      * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
1141      * The only guest that ever used it, got it wrong.
1142      * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
1143      */
1144     return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
1145 }
1146 
vcpuop_stop_singleshot_timer(CPUState * cs)1147 static int vcpuop_stop_singleshot_timer(CPUState *cs)
1148 {
1149     CPUX86State *env = &X86_CPU(cs)->env;
1150 
1151     qemu_mutex_lock(&env->xen_timers_lock);
1152 
1153     timer_del(env->xen_singleshot_timer);
1154     env->xen_singleshot_timer_ns = 0;
1155 
1156     qemu_mutex_unlock(&env->xen_timers_lock);
1157     return 0;
1158 }
1159 
kvm_xen_hcall_set_timer_op(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t timeout)1160 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1161                                        uint64_t timeout)
1162 {
1163     int err;
1164 
1165     if (unlikely(timeout == 0)) {
1166         err = vcpuop_stop_singleshot_timer(CPU(cpu));
1167     } else {
1168         QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
1169         err = do_set_singleshot_timer(CPU(cpu), timeout, true);
1170     }
1171     exit->u.hcall.result = err;
1172     return true;
1173 }
1174 
kvm_xen_hcall_vcpu_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,int vcpu_id,uint64_t arg)1175 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1176                                   int cmd, int vcpu_id, uint64_t arg)
1177 {
1178     CPUState *cs = CPU(cpu);
1179     CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1180     int err;
1181 
1182     if (!dest) {
1183         err = -ENOENT;
1184         goto out;
1185     }
1186 
1187     switch (cmd) {
1188     case VCPUOP_register_runstate_memory_area:
1189         err = vcpuop_register_runstate_info(cs, dest, arg);
1190         break;
1191     case VCPUOP_register_vcpu_time_memory_area:
1192         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1193         break;
1194     case VCPUOP_register_vcpu_info:
1195         err = vcpuop_register_vcpu_info(cs, dest, arg);
1196         break;
1197     case VCPUOP_set_singleshot_timer: {
1198         if (cs->cpu_index == vcpu_id) {
1199             err = vcpuop_set_singleshot_timer(dest, arg);
1200         } else {
1201             err = -EINVAL;
1202         }
1203         break;
1204     }
1205     case VCPUOP_stop_singleshot_timer:
1206         if (cs->cpu_index == vcpu_id) {
1207             err = vcpuop_stop_singleshot_timer(dest);
1208         } else {
1209             err = -EINVAL;
1210         }
1211         break;
1212     case VCPUOP_set_periodic_timer: {
1213         err = vcpuop_set_periodic_timer(cs, dest, arg);
1214         break;
1215     }
1216     case VCPUOP_stop_periodic_timer:
1217         err = vcpuop_stop_periodic_timer(dest);
1218         break;
1219 
1220     default:
1221         return false;
1222     }
1223 
1224  out:
1225     exit->u.hcall.result = err;
1226     return true;
1227 }
1228 
kvm_xen_hcall_evtchn_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1229 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1230                                     int cmd, uint64_t arg)
1231 {
1232     CPUState *cs = CPU(cpu);
1233     int err = -ENOSYS;
1234 
1235     switch (cmd) {
1236     case EVTCHNOP_init_control:
1237     case EVTCHNOP_expand_array:
1238     case EVTCHNOP_set_priority:
1239         /* We do not support FIFO channels at this point */
1240         err = -ENOSYS;
1241         break;
1242 
1243     case EVTCHNOP_status: {
1244         struct evtchn_status status;
1245 
1246         qemu_build_assert(sizeof(status) == 24);
1247         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1248             err = -EFAULT;
1249             break;
1250         }
1251 
1252         err = xen_evtchn_status_op(&status);
1253         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1254             err = -EFAULT;
1255         }
1256         break;
1257     }
1258     case EVTCHNOP_close: {
1259         struct evtchn_close close;
1260 
1261         qemu_build_assert(sizeof(close) == 4);
1262         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1263             err = -EFAULT;
1264             break;
1265         }
1266 
1267         err = xen_evtchn_close_op(&close);
1268         break;
1269     }
1270     case EVTCHNOP_unmask: {
1271         struct evtchn_unmask unmask;
1272 
1273         qemu_build_assert(sizeof(unmask) == 4);
1274         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1275             err = -EFAULT;
1276             break;
1277         }
1278 
1279         err = xen_evtchn_unmask_op(&unmask);
1280         break;
1281     }
1282     case EVTCHNOP_bind_virq: {
1283         struct evtchn_bind_virq virq;
1284 
1285         qemu_build_assert(sizeof(virq) == 12);
1286         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1287             err = -EFAULT;
1288             break;
1289         }
1290 
1291         err = xen_evtchn_bind_virq_op(&virq);
1292         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1293             err = -EFAULT;
1294         }
1295         break;
1296     }
1297     case EVTCHNOP_bind_pirq: {
1298         struct evtchn_bind_pirq pirq;
1299 
1300         qemu_build_assert(sizeof(pirq) == 12);
1301         if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1302             err = -EFAULT;
1303             break;
1304         }
1305 
1306         err = xen_evtchn_bind_pirq_op(&pirq);
1307         if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1308             err = -EFAULT;
1309         }
1310         break;
1311     }
1312     case EVTCHNOP_bind_ipi: {
1313         struct evtchn_bind_ipi ipi;
1314 
1315         qemu_build_assert(sizeof(ipi) == 8);
1316         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1317             err = -EFAULT;
1318             break;
1319         }
1320 
1321         err = xen_evtchn_bind_ipi_op(&ipi);
1322         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1323             err = -EFAULT;
1324         }
1325         break;
1326     }
1327     case EVTCHNOP_send: {
1328         struct evtchn_send send;
1329 
1330         qemu_build_assert(sizeof(send) == 4);
1331         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1332             err = -EFAULT;
1333             break;
1334         }
1335 
1336         err = xen_evtchn_send_op(&send);
1337         break;
1338     }
1339     case EVTCHNOP_alloc_unbound: {
1340         struct evtchn_alloc_unbound alloc;
1341 
1342         qemu_build_assert(sizeof(alloc) == 8);
1343         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1344             err = -EFAULT;
1345             break;
1346         }
1347 
1348         err = xen_evtchn_alloc_unbound_op(&alloc);
1349         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1350             err = -EFAULT;
1351         }
1352         break;
1353     }
1354     case EVTCHNOP_bind_interdomain: {
1355         struct evtchn_bind_interdomain interdomain;
1356 
1357         qemu_build_assert(sizeof(interdomain) == 12);
1358         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1359             err = -EFAULT;
1360             break;
1361         }
1362 
1363         err = xen_evtchn_bind_interdomain_op(&interdomain);
1364         if (!err &&
1365             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1366             err = -EFAULT;
1367         }
1368         break;
1369     }
1370     case EVTCHNOP_bind_vcpu: {
1371         struct evtchn_bind_vcpu vcpu;
1372 
1373         qemu_build_assert(sizeof(vcpu) == 8);
1374         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1375             err = -EFAULT;
1376             break;
1377         }
1378 
1379         err = xen_evtchn_bind_vcpu_op(&vcpu);
1380         break;
1381     }
1382     case EVTCHNOP_reset: {
1383         struct evtchn_reset reset;
1384 
1385         qemu_build_assert(sizeof(reset) == 2);
1386         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1387             err = -EFAULT;
1388             break;
1389         }
1390 
1391         err = xen_evtchn_reset_op(&reset);
1392         break;
1393     }
1394     default:
1395         return false;
1396     }
1397 
1398     exit->u.hcall.result = err;
1399     return true;
1400 }
1401 
kvm_xen_soft_reset(void)1402 int kvm_xen_soft_reset(void)
1403 {
1404     CPUState *cpu;
1405     int err;
1406 
1407     assert(bql_locked());
1408 
1409     trace_kvm_xen_soft_reset();
1410 
1411     err = xen_evtchn_soft_reset();
1412     if (err) {
1413         return err;
1414     }
1415 
1416     /*
1417      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1418      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1419      * to deliver to the timer interrupt and treats that as 'disabled'.
1420      */
1421     err = xen_evtchn_set_callback_param(0);
1422     if (err) {
1423         return err;
1424     }
1425 
1426     CPU_FOREACH(cpu) {
1427         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1428     }
1429 
1430     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1431     if (err) {
1432         return err;
1433     }
1434 
1435     err = xen_gnttab_reset();
1436     if (err) {
1437         return err;
1438     }
1439 
1440     err = xen_primary_console_reset();
1441     if (err) {
1442         return err;
1443     }
1444 
1445     err = xen_xenstore_reset();
1446     if (err) {
1447         return err;
1448     }
1449 
1450     return 0;
1451 }
1452 
schedop_shutdown(CPUState * cs,uint64_t arg)1453 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1454 {
1455     struct sched_shutdown shutdown;
1456     int ret = 0;
1457 
1458     /* No need for 32/64 compat handling */
1459     qemu_build_assert(sizeof(shutdown) == 4);
1460 
1461     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1462         return -EFAULT;
1463     }
1464 
1465     switch (shutdown.reason) {
1466     case SHUTDOWN_crash:
1467         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1468         qemu_system_guest_panicked(NULL);
1469         break;
1470 
1471     case SHUTDOWN_reboot:
1472         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1473         break;
1474 
1475     case SHUTDOWN_poweroff:
1476         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1477         break;
1478 
1479     case SHUTDOWN_soft_reset:
1480         bql_lock();
1481         ret = kvm_xen_soft_reset();
1482         bql_unlock();
1483         break;
1484 
1485     default:
1486         ret = -EINVAL;
1487         break;
1488     }
1489 
1490     return ret;
1491 }
1492 
kvm_xen_hcall_sched_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1493 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1494                                    int cmd, uint64_t arg)
1495 {
1496     CPUState *cs = CPU(cpu);
1497     int err = -ENOSYS;
1498 
1499     switch (cmd) {
1500     case SCHEDOP_shutdown:
1501         err = schedop_shutdown(cs, arg);
1502         break;
1503 
1504     case SCHEDOP_poll:
1505         /*
1506          * Linux will panic if this doesn't work. Just yield; it's not
1507          * worth overthinking it because with event channel handling
1508          * in KVM, the kernel will intercept this and it will never
1509          * reach QEMU anyway. The semantics of the hypercall explicltly
1510          * permit spurious wakeups.
1511          */
1512     case SCHEDOP_yield:
1513         sched_yield();
1514         err = 0;
1515         break;
1516 
1517     default:
1518         return false;
1519     }
1520 
1521     exit->u.hcall.result = err;
1522     return true;
1523 }
1524 
kvm_xen_hcall_gnttab_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg,int count)1525 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1526                                     int cmd, uint64_t arg, int count)
1527 {
1528     CPUState *cs = CPU(cpu);
1529     int err;
1530 
1531     switch (cmd) {
1532     case GNTTABOP_set_version: {
1533         struct gnttab_set_version set;
1534 
1535         qemu_build_assert(sizeof(set) == 4);
1536         if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1537             err = -EFAULT;
1538             break;
1539         }
1540 
1541         err = xen_gnttab_set_version_op(&set);
1542         if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1543             err = -EFAULT;
1544         }
1545         break;
1546     }
1547     case GNTTABOP_get_version: {
1548         struct gnttab_get_version get;
1549 
1550         qemu_build_assert(sizeof(get) == 8);
1551         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1552             err = -EFAULT;
1553             break;
1554         }
1555 
1556         err = xen_gnttab_get_version_op(&get);
1557         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1558             err = -EFAULT;
1559         }
1560         break;
1561     }
1562     case GNTTABOP_query_size: {
1563         struct gnttab_query_size size;
1564 
1565         qemu_build_assert(sizeof(size) == 16);
1566         if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1567             err = -EFAULT;
1568             break;
1569         }
1570 
1571         err = xen_gnttab_query_size_op(&size);
1572         if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1573             err = -EFAULT;
1574         }
1575         break;
1576     }
1577     case GNTTABOP_setup_table:
1578     case GNTTABOP_copy:
1579     case GNTTABOP_map_grant_ref:
1580     case GNTTABOP_unmap_grant_ref:
1581     case GNTTABOP_swap_grant_ref:
1582         return false;
1583 
1584     default:
1585         /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1586         err = -ENOSYS;
1587         break;
1588     }
1589 
1590     exit->u.hcall.result = err;
1591     return true;
1592 }
1593 
kvm_xen_hcall_physdev_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1594 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1595                                      int cmd, uint64_t arg)
1596 {
1597     CPUState *cs = CPU(cpu);
1598     int err;
1599 
1600     switch (cmd) {
1601     case PHYSDEVOP_map_pirq: {
1602         struct physdev_map_pirq map;
1603 
1604         if (hypercall_compat32(exit->u.hcall.longmode)) {
1605             struct compat_physdev_map_pirq *map32 = (void *)&map;
1606 
1607             if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1608                 return -EFAULT;
1609             }
1610 
1611             /*
1612              * The only thing that's different is the alignment of the
1613              * uint64_t table_base at the end, which gets padding to make
1614              * it 64-bit aligned in the 64-bit version.
1615              */
1616             qemu_build_assert(sizeof(*map32) == 36);
1617             qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1618                               offsetof(struct compat_physdev_map_pirq, entry_nr));
1619             memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1620         } else {
1621             if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1622                 err = -EFAULT;
1623                 break;
1624             }
1625         }
1626         err = xen_physdev_map_pirq(&map);
1627         /*
1628          * Since table_base is an IN parameter and won't be changed, just
1629          * copy the size of the compat structure back to the guest.
1630          */
1631         if (!err && kvm_copy_to_gva(cs, arg, &map,
1632                                     sizeof(struct compat_physdev_map_pirq))) {
1633             err = -EFAULT;
1634         }
1635         break;
1636     }
1637     case PHYSDEVOP_unmap_pirq: {
1638         struct physdev_unmap_pirq unmap;
1639 
1640         qemu_build_assert(sizeof(unmap) == 8);
1641         if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1642             err = -EFAULT;
1643             break;
1644         }
1645 
1646         err = xen_physdev_unmap_pirq(&unmap);
1647         if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1648             err = -EFAULT;
1649         }
1650         break;
1651     }
1652     case PHYSDEVOP_eoi: {
1653         struct physdev_eoi eoi;
1654 
1655         qemu_build_assert(sizeof(eoi) == 4);
1656         if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1657             err = -EFAULT;
1658             break;
1659         }
1660 
1661         err = xen_physdev_eoi_pirq(&eoi);
1662         if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1663             err = -EFAULT;
1664         }
1665         break;
1666     }
1667     case PHYSDEVOP_irq_status_query: {
1668         struct physdev_irq_status_query query;
1669 
1670         qemu_build_assert(sizeof(query) == 8);
1671         if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1672             err = -EFAULT;
1673             break;
1674         }
1675 
1676         err = xen_physdev_query_pirq(&query);
1677         if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1678             err = -EFAULT;
1679         }
1680         break;
1681     }
1682     case PHYSDEVOP_get_free_pirq: {
1683         struct physdev_get_free_pirq get;
1684 
1685         qemu_build_assert(sizeof(get) == 8);
1686         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1687             err = -EFAULT;
1688             break;
1689         }
1690 
1691         err = xen_physdev_get_free_pirq(&get);
1692         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1693             err = -EFAULT;
1694         }
1695         break;
1696     }
1697     case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1698         err = -ENOSYS;
1699         break;
1700 
1701     default:
1702         return false;
1703     }
1704 
1705     exit->u.hcall.result = err;
1706     return true;
1707 }
1708 
do_kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1709 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1710 {
1711     uint16_t code = exit->u.hcall.input;
1712 
1713     if (exit->u.hcall.cpl > 0) {
1714         exit->u.hcall.result = -EPERM;
1715         return true;
1716     }
1717 
1718     switch (code) {
1719     case __HYPERVISOR_set_timer_op:
1720         if (exit->u.hcall.longmode) {
1721             return kvm_xen_hcall_set_timer_op(exit, cpu,
1722                                               exit->u.hcall.params[0]);
1723         } else {
1724             /* In 32-bit mode, the 64-bit timer value is in two args. */
1725             uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1726                 (uint32_t)exit->u.hcall.params[0];
1727             return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1728         }
1729     case __HYPERVISOR_grant_table_op:
1730         return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1731                                        exit->u.hcall.params[1],
1732                                        exit->u.hcall.params[2]);
1733     case __HYPERVISOR_sched_op:
1734         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1735                                       exit->u.hcall.params[1]);
1736     case __HYPERVISOR_event_channel_op:
1737         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1738                                        exit->u.hcall.params[1]);
1739     case __HYPERVISOR_vcpu_op:
1740         return kvm_xen_hcall_vcpu_op(exit, cpu,
1741                                      exit->u.hcall.params[0],
1742                                      exit->u.hcall.params[1],
1743                                      exit->u.hcall.params[2]);
1744     case __HYPERVISOR_hvm_op:
1745         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1746                                     exit->u.hcall.params[1]);
1747     case __HYPERVISOR_memory_op:
1748         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1749                                        exit->u.hcall.params[1]);
1750     case __HYPERVISOR_physdev_op:
1751         return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1752                                         exit->u.hcall.params[1]);
1753     case __HYPERVISOR_xen_version:
1754         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1755                                          exit->u.hcall.params[1]);
1756     default:
1757         return false;
1758     }
1759 }
1760 
kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1761 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1762 {
1763     if (exit->type != KVM_EXIT_XEN_HCALL) {
1764         return -1;
1765     }
1766 
1767     /*
1768      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1769      * the hypercall page. So if we see a hypercall in a mode that doesn't
1770      * match our own idea of the guest mode, fetch the kernel's idea of the
1771      * "long mode" to remain in sync.
1772      */
1773     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1774         xen_sync_long_mode();
1775     }
1776 
1777     if (!do_kvm_xen_handle_exit(cpu, exit)) {
1778         /*
1779          * Some hypercalls will be deliberately "implemented" by returning
1780          * -ENOSYS. This case is for hypercalls which are unexpected.
1781          */
1782         exit->u.hcall.result = -ENOSYS;
1783         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1784                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1785                       (uint64_t)exit->u.hcall.input,
1786                       (uint64_t)exit->u.hcall.params[0],
1787                       (uint64_t)exit->u.hcall.params[1],
1788                       (uint64_t)exit->u.hcall.params[2]);
1789     }
1790 
1791     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1792                             exit->u.hcall.input, exit->u.hcall.params[0],
1793                             exit->u.hcall.params[1], exit->u.hcall.params[2],
1794                             exit->u.hcall.result);
1795     return 0;
1796 }
1797 
kvm_xen_get_gnttab_max_frames(void)1798 uint16_t kvm_xen_get_gnttab_max_frames(void)
1799 {
1800     KVMState *s = KVM_STATE(current_accel());
1801     return s->xen_gnttab_max_frames;
1802 }
1803 
kvm_xen_get_evtchn_max_pirq(void)1804 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1805 {
1806     KVMState *s = KVM_STATE(current_accel());
1807     return s->xen_evtchn_max_pirq;
1808 }
1809 
kvm_put_xen_state(CPUState * cs)1810 int kvm_put_xen_state(CPUState *cs)
1811 {
1812     X86CPU *cpu = X86_CPU(cs);
1813     CPUX86State *env = &cpu->env;
1814     uint64_t gpa;
1815     int ret;
1816 
1817     gpa = env->xen_vcpu_info_gpa;
1818     if (gpa == INVALID_GPA) {
1819         gpa = env->xen_vcpu_info_default_gpa;
1820     }
1821 
1822     if (gpa != INVALID_GPA) {
1823         ret = set_vcpu_info(cs, gpa);
1824         if (ret < 0) {
1825             return ret;
1826         }
1827     }
1828 
1829     gpa = env->xen_vcpu_time_info_gpa;
1830     if (gpa != INVALID_GPA) {
1831         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1832                                     gpa);
1833         if (ret < 0) {
1834             return ret;
1835         }
1836     }
1837 
1838     gpa = env->xen_vcpu_runstate_gpa;
1839     if (gpa != INVALID_GPA) {
1840         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1841                                     gpa);
1842         if (ret < 0) {
1843             return ret;
1844         }
1845     }
1846 
1847     if (env->xen_periodic_timer_period) {
1848         ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1849         if (ret < 0) {
1850             return ret;
1851         }
1852     }
1853 
1854     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1855         /*
1856          * If the kernel has EVTCHN_SEND support then it handles timers too,
1857          * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1858          */
1859         QEMU_LOCK_GUARD(&env->xen_timers_lock);
1860         if (env->xen_singleshot_timer_ns) {
1861             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1862                                           false);
1863             if (ret < 0) {
1864                 return ret;
1865             }
1866         }
1867         return 0;
1868     }
1869 
1870     if (env->xen_vcpu_callback_vector) {
1871         ret = kvm_xen_set_vcpu_callback_vector(cs);
1872         if (ret < 0) {
1873             return ret;
1874         }
1875     }
1876 
1877     if (env->xen_virq[VIRQ_TIMER]) {
1878         do_set_vcpu_timer_virq(cs,
1879                                RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
1880     }
1881     return 0;
1882 }
1883 
kvm_get_xen_state(CPUState * cs)1884 int kvm_get_xen_state(CPUState *cs)
1885 {
1886     X86CPU *cpu = X86_CPU(cs);
1887     CPUX86State *env = &cpu->env;
1888     uint64_t gpa;
1889     int ret;
1890 
1891     /*
1892      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1893      * to it. It's up to userspace to *assume* that any page shared thus is
1894      * always considered dirty. The shared_info page is different since it's
1895      * an overlay and migrated separately anyway.
1896      */
1897     gpa = env->xen_vcpu_info_gpa;
1898     if (gpa == INVALID_GPA) {
1899         gpa = env->xen_vcpu_info_default_gpa;
1900     }
1901     if (gpa != INVALID_GPA) {
1902         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1903                                                      gpa,
1904                                                      sizeof(struct vcpu_info));
1905         if (mrs.mr &&
1906             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1907             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1908                                     sizeof(struct vcpu_info));
1909         }
1910     }
1911 
1912     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1913         return 0;
1914     }
1915 
1916     /*
1917      * If the kernel is accelerating timers, read out the current value of the
1918      * singleshot timer deadline.
1919      */
1920     if (env->xen_virq[VIRQ_TIMER]) {
1921         struct kvm_xen_vcpu_attr va = {
1922             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1923         };
1924         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1925         if (ret < 0) {
1926             return ret;
1927         }
1928 
1929         /*
1930          * This locking is fairly pointless, and is here to appease Coverity.
1931          * There is an unavoidable race condition if a different vCPU sets a
1932          * timer for this vCPU after the value has been read out. But that's
1933          * OK in practice because *all* the vCPUs need to be stopped before
1934          * we set about migrating their state.
1935          */
1936         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1937         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1938     }
1939 
1940     return 0;
1941 }
1942