1 /*
2 * Xen HVM emulation support in KVM
3 *
4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/log.h"
14 #include "qemu/main-loop.h"
15 #include "qemu/error-report.h"
16 #include "exec/target_page.h"
17 #include "hw/xen/xen.h"
18 #include "system/kvm_int.h"
19 #include "system/kvm_xen.h"
20 #include "kvm/kvm_i386.h"
21 #include "system/address-spaces.h"
22 #include "xen-emu.h"
23 #include "trace.h"
24 #include "system/runstate.h"
25
26 #include "hw/pci/msi.h"
27 #include "hw/i386/apic-msidef.h"
28 #include "hw/i386/e820_memory_layout.h"
29 #include "hw/i386/kvm/xen_overlay.h"
30 #include "hw/i386/kvm/xen_evtchn.h"
31 #include "hw/i386/kvm/xen_gnttab.h"
32 #include "hw/i386/kvm/xen_primary_console.h"
33 #include "hw/i386/kvm/xen_xenstore.h"
34
35 #include "hw/xen/interface/version.h"
36 #include "hw/xen/interface/sched.h"
37 #include "hw/xen/interface/memory.h"
38 #include "hw/xen/interface/hvm/hvm_op.h"
39 #include "hw/xen/interface/hvm/params.h"
40 #include "hw/xen/interface/vcpu.h"
41 #include "hw/xen/interface/event_channel.h"
42 #include "hw/xen/interface/grant_table.h"
43
44 #include "xen-compat.h"
45
46 static void xen_vcpu_singleshot_timer_event(void *opaque);
47 static void xen_vcpu_periodic_timer_event(void *opaque);
48 static int vcpuop_stop_singleshot_timer(CPUState *cs);
49
50 #ifdef TARGET_X86_64
51 #define hypercall_compat32(longmode) (!(longmode))
52 #else
53 #define hypercall_compat32(longmode) (false)
54 #endif
55
kvm_gva_to_gpa(CPUState * cs,uint64_t gva,uint64_t * gpa,size_t * len,bool is_write)56 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
57 size_t *len, bool is_write)
58 {
59 struct kvm_translation tr = {
60 .linear_address = gva,
61 };
62
63 if (len) {
64 *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
65 }
66
67 if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
68 (is_write && !tr.writeable)) {
69 return false;
70 }
71 *gpa = tr.physical_address;
72 return true;
73 }
74
kvm_gva_rw(CPUState * cs,uint64_t gva,void * _buf,size_t sz,bool is_write)75 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
76 bool is_write)
77 {
78 uint8_t *buf = (uint8_t *)_buf;
79 uint64_t gpa;
80 size_t len;
81
82 while (sz) {
83 if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
84 return -EFAULT;
85 }
86 if (len > sz) {
87 len = sz;
88 }
89
90 cpu_physical_memory_rw(gpa, buf, len, is_write);
91
92 buf += len;
93 sz -= len;
94 gva += len;
95 }
96
97 return 0;
98 }
99
kvm_copy_from_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)100 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
101 size_t sz)
102 {
103 return kvm_gva_rw(cs, gva, buf, sz, false);
104 }
105
kvm_copy_to_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)106 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
107 size_t sz)
108 {
109 return kvm_gva_rw(cs, gva, buf, sz, true);
110 }
111
kvm_xen_init(KVMState * s,uint32_t hypercall_msr)112 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
113 {
114 const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
115 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
116 struct kvm_xen_hvm_config cfg = {
117 .msr = hypercall_msr,
118 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
119 };
120 int xen_caps, ret;
121
122 xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
123 if (required_caps & ~xen_caps) {
124 error_report("kvm: Xen HVM guest support not present or insufficient");
125 return -ENOSYS;
126 }
127
128 if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
129 struct kvm_xen_hvm_attr ha = {
130 .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
131 .u.xen_version = s->xen_version,
132 };
133 (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
134
135 cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
136 }
137
138 ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
139 if (ret < 0) {
140 error_report("kvm: Failed to enable Xen HVM support: %s",
141 strerror(-ret));
142 return ret;
143 }
144
145 /* If called a second time, don't repeat the rest of the setup. */
146 if (s->xen_caps) {
147 return 0;
148 }
149
150 /*
151 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
152 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
153 *
154 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
155 * such things to be polled at precisely the right time. We *could* do
156 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
157 * the moment the IRQ is acked, and see if it should be reasserted.
158 *
159 * But the in-kernel irqchip is deprecated, so we're unlikely to add
160 * that support in the kernel. Insist on using the split irqchip mode
161 * instead.
162 *
163 * This leaves us polling for the level going low in QEMU, which lacks
164 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
165 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
166 * the device (for which it has to unmap the device and trap access, for
167 * some period after an IRQ!!). In the Xen case, we do it on exit from
168 * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
169 * Which is kind of icky, but less so than the VFIO one. I may fix them
170 * both later...
171 */
172 if (!kvm_kernel_irqchip_split()) {
173 error_report("kvm: Xen support requires kernel-irqchip=split");
174 return -EINVAL;
175 }
176
177 s->xen_caps = xen_caps;
178
179 /* Tell fw_cfg to notify the BIOS to reserve the range. */
180 e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED);
181
182 /* The pages couldn't be overlaid until KVM was initialized */
183 xen_primary_console_reset();
184 xen_xenstore_reset();
185
186 return 0;
187 }
188
kvm_xen_init_vcpu(CPUState * cs)189 int kvm_xen_init_vcpu(CPUState *cs)
190 {
191 X86CPU *cpu = X86_CPU(cs);
192 CPUX86State *env = &cpu->env;
193 int err;
194
195 /*
196 * The kernel needs to know the Xen/ACPI vCPU ID because that's
197 * what the guest uses in hypercalls such as timers. It doesn't
198 * match the APIC ID which is generally used for talking to the
199 * kernel about vCPUs. And if vCPU threads race with creating
200 * their KVM vCPUs out of order, it doesn't necessarily match
201 * with the kernel's internal vCPU indices either.
202 */
203 if (kvm_xen_has_cap(EVTCHN_SEND)) {
204 struct kvm_xen_vcpu_attr va = {
205 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
206 .u.vcpu_id = cs->cpu_index,
207 };
208 err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
209 if (err) {
210 error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
211 strerror(-err));
212 return err;
213 }
214 }
215
216 env->xen_vcpu_info_gpa = INVALID_GPA;
217 env->xen_vcpu_info_default_gpa = INVALID_GPA;
218 env->xen_vcpu_time_info_gpa = INVALID_GPA;
219 env->xen_vcpu_runstate_gpa = INVALID_GPA;
220
221 qemu_mutex_init(&env->xen_timers_lock);
222 env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
223 xen_vcpu_singleshot_timer_event,
224 cpu);
225 if (!env->xen_singleshot_timer) {
226 return -ENOMEM;
227 }
228 env->xen_singleshot_timer->opaque = cs;
229
230 env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
231 xen_vcpu_periodic_timer_event,
232 cpu);
233 if (!env->xen_periodic_timer) {
234 return -ENOMEM;
235 }
236 env->xen_periodic_timer->opaque = cs;
237
238 return 0;
239 }
240
kvm_xen_get_caps(void)241 uint32_t kvm_xen_get_caps(void)
242 {
243 return kvm_state->xen_caps;
244 }
245
kvm_xen_hcall_xen_version(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)246 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
247 int cmd, uint64_t arg)
248 {
249 int err = 0;
250
251 switch (cmd) {
252 case XENVER_get_features: {
253 struct xen_feature_info fi;
254
255 /* No need for 32/64 compat handling */
256 qemu_build_assert(sizeof(fi) == 8);
257
258 err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
259 if (err) {
260 break;
261 }
262
263 fi.submap = 0;
264 if (fi.submap_idx == 0) {
265 fi.submap |= 1 << XENFEAT_writable_page_tables |
266 1 << XENFEAT_writable_descriptor_tables |
267 1 << XENFEAT_auto_translated_physmap |
268 1 << XENFEAT_hvm_callback_vector |
269 1 << XENFEAT_hvm_safe_pvclock |
270 1 << XENFEAT_hvm_pirqs;
271 }
272
273 err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
274 break;
275 }
276
277 default:
278 return false;
279 }
280
281 exit->u.hcall.result = err;
282 return true;
283 }
284
kvm_xen_set_vcpu_attr(CPUState * cs,uint16_t type,uint64_t gpa)285 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
286 {
287 struct kvm_xen_vcpu_attr xhsi;
288
289 xhsi.type = type;
290 xhsi.u.gpa = gpa;
291
292 trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
293
294 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
295 }
296
kvm_xen_set_vcpu_callback_vector(CPUState * cs)297 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
298 {
299 uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
300 struct kvm_xen_vcpu_attr xva;
301
302 xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
303 xva.u.vector = vector;
304
305 trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
306
307 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
308 }
309
do_set_vcpu_callback_vector(CPUState * cs,run_on_cpu_data data)310 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
311 {
312 X86CPU *cpu = X86_CPU(cs);
313 CPUX86State *env = &cpu->env;
314
315 env->xen_vcpu_callback_vector = data.host_int;
316
317 if (kvm_xen_has_cap(EVTCHN_SEND)) {
318 kvm_xen_set_vcpu_callback_vector(cs);
319 }
320 }
321
set_vcpu_info(CPUState * cs,uint64_t gpa)322 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
323 {
324 X86CPU *cpu = X86_CPU(cs);
325 CPUX86State *env = &cpu->env;
326 MemoryRegionSection mrs = { .mr = NULL };
327 void *vcpu_info_hva = NULL;
328 int ret;
329
330 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
331 if (ret || gpa == INVALID_GPA) {
332 goto out;
333 }
334
335 mrs = memory_region_find(get_system_memory(), gpa,
336 sizeof(struct vcpu_info));
337 if (mrs.mr && mrs.mr->ram_block &&
338 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
339 vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
340 mrs.offset_within_region);
341 }
342 if (!vcpu_info_hva) {
343 if (mrs.mr) {
344 memory_region_unref(mrs.mr);
345 mrs.mr = NULL;
346 }
347 ret = -EINVAL;
348 }
349
350 out:
351 if (env->xen_vcpu_info_mr) {
352 memory_region_unref(env->xen_vcpu_info_mr);
353 }
354 env->xen_vcpu_info_hva = vcpu_info_hva;
355 env->xen_vcpu_info_mr = mrs.mr;
356 return ret;
357 }
358
do_set_vcpu_info_default_gpa(CPUState * cs,run_on_cpu_data data)359 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
360 {
361 X86CPU *cpu = X86_CPU(cs);
362 CPUX86State *env = &cpu->env;
363
364 env->xen_vcpu_info_default_gpa = data.host_ulong;
365
366 /* Changing the default does nothing if a vcpu_info was explicitly set. */
367 if (env->xen_vcpu_info_gpa == INVALID_GPA) {
368 set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
369 }
370 }
371
do_set_vcpu_info_gpa(CPUState * cs,run_on_cpu_data data)372 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
373 {
374 X86CPU *cpu = X86_CPU(cs);
375 CPUX86State *env = &cpu->env;
376
377 env->xen_vcpu_info_gpa = data.host_ulong;
378
379 set_vcpu_info(cs, env->xen_vcpu_info_gpa);
380 }
381
kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)382 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
383 {
384 CPUState *cs = qemu_get_cpu(vcpu_id);
385 if (!cs) {
386 return NULL;
387 }
388
389 return X86_CPU(cs)->env.xen_vcpu_info_hva;
390 }
391
kvm_xen_maybe_deassert_callback(CPUState * cs)392 void kvm_xen_maybe_deassert_callback(CPUState *cs)
393 {
394 CPUX86State *env = &X86_CPU(cs)->env;
395 struct vcpu_info *vi = env->xen_vcpu_info_hva;
396 if (!vi) {
397 return;
398 }
399
400 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
401 if (!vi->evtchn_upcall_pending) {
402 bql_lock();
403 /*
404 * Check again now we have the lock, because it may have been
405 * asserted in the interim. And we don't want to take the lock
406 * every time because this is a fast path.
407 */
408 if (!vi->evtchn_upcall_pending) {
409 X86_CPU(cs)->env.xen_callback_asserted = false;
410 xen_evtchn_set_callback_level(0);
411 }
412 bql_unlock();
413 }
414 }
415
kvm_xen_set_callback_asserted(void)416 void kvm_xen_set_callback_asserted(void)
417 {
418 CPUState *cs = qemu_get_cpu(0);
419
420 if (cs) {
421 X86_CPU(cs)->env.xen_callback_asserted = true;
422 }
423 }
424
kvm_xen_has_vcpu_callback_vector(void)425 bool kvm_xen_has_vcpu_callback_vector(void)
426 {
427 CPUState *cs = qemu_get_cpu(0);
428
429 return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
430 }
431
kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id,int type)432 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
433 {
434 CPUState *cs = qemu_get_cpu(vcpu_id);
435 uint8_t vector;
436
437 if (!cs) {
438 return;
439 }
440
441 vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
442 if (vector) {
443 /*
444 * The per-vCPU callback vector injected via lapic. Just
445 * deliver it as an MSI.
446 */
447 MSIMessage msg = {
448 .address = APIC_DEFAULT_ADDRESS |
449 (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
450 .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
451 };
452 kvm_irqchip_send_msi(kvm_state, msg);
453 return;
454 }
455
456 switch (type) {
457 case HVM_PARAM_CALLBACK_TYPE_VECTOR:
458 /*
459 * If the evtchn_upcall_pending field in the vcpu_info is set, then
460 * KVM will automatically deliver the vector on entering the vCPU
461 * so all we have to do is kick it out.
462 */
463 qemu_cpu_kick(cs);
464 break;
465
466 case HVM_PARAM_CALLBACK_TYPE_GSI:
467 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
468 if (vcpu_id == 0) {
469 xen_evtchn_set_callback_level(1);
470 }
471 break;
472 }
473 }
474
475 /* Must always be called with xen_timers_lock held */
kvm_xen_set_vcpu_timer(CPUState * cs)476 static int kvm_xen_set_vcpu_timer(CPUState *cs)
477 {
478 X86CPU *cpu = X86_CPU(cs);
479 CPUX86State *env = &cpu->env;
480
481 struct kvm_xen_vcpu_attr va = {
482 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
483 .u.timer.port = env->xen_virq[VIRQ_TIMER],
484 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
485 .u.timer.expires_ns = env->xen_singleshot_timer_ns,
486 };
487
488 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
489 }
490
do_set_vcpu_timer_virq(CPUState * cs,run_on_cpu_data data)491 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
492 {
493 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
494 kvm_xen_set_vcpu_timer(cs);
495 }
496
kvm_xen_set_vcpu_virq(uint32_t vcpu_id,uint16_t virq,uint16_t port)497 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
498 {
499 CPUState *cs = qemu_get_cpu(vcpu_id);
500
501 if (!cs) {
502 return -ENOENT;
503 }
504
505 /* cpu.h doesn't include the actual Xen header. */
506 qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
507
508 if (virq >= NR_VIRQS) {
509 return -EINVAL;
510 }
511
512 if (port && X86_CPU(cs)->env.xen_virq[virq]) {
513 return -EEXIST;
514 }
515
516 X86_CPU(cs)->env.xen_virq[virq] = port;
517 if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
518 async_run_on_cpu(cs, do_set_vcpu_timer_virq,
519 RUN_ON_CPU_HOST_INT(port));
520 }
521 return 0;
522 }
523
do_set_vcpu_time_info_gpa(CPUState * cs,run_on_cpu_data data)524 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
525 {
526 X86CPU *cpu = X86_CPU(cs);
527 CPUX86State *env = &cpu->env;
528
529 env->xen_vcpu_time_info_gpa = data.host_ulong;
530
531 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
532 env->xen_vcpu_time_info_gpa);
533 }
534
do_set_vcpu_runstate_gpa(CPUState * cs,run_on_cpu_data data)535 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
536 {
537 X86CPU *cpu = X86_CPU(cs);
538 CPUX86State *env = &cpu->env;
539
540 env->xen_vcpu_runstate_gpa = data.host_ulong;
541
542 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
543 env->xen_vcpu_runstate_gpa);
544 }
545
do_vcpu_soft_reset(CPUState * cs,run_on_cpu_data data)546 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
547 {
548 X86CPU *cpu = X86_CPU(cs);
549 CPUX86State *env = &cpu->env;
550
551 env->xen_vcpu_info_gpa = INVALID_GPA;
552 env->xen_vcpu_info_default_gpa = INVALID_GPA;
553 env->xen_vcpu_time_info_gpa = INVALID_GPA;
554 env->xen_vcpu_runstate_gpa = INVALID_GPA;
555 env->xen_vcpu_callback_vector = 0;
556 memset(env->xen_virq, 0, sizeof(env->xen_virq));
557
558 set_vcpu_info(cs, INVALID_GPA);
559 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
560 INVALID_GPA);
561 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
562 INVALID_GPA);
563 if (kvm_xen_has_cap(EVTCHN_SEND)) {
564 kvm_xen_set_vcpu_callback_vector(cs);
565
566 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
567 env->xen_singleshot_timer_ns = 0;
568 kvm_xen_set_vcpu_timer(cs);
569 } else {
570 vcpuop_stop_singleshot_timer(cs);
571 };
572
573 }
574
xen_set_shared_info(uint64_t gfn)575 static int xen_set_shared_info(uint64_t gfn)
576 {
577 uint64_t gpa = gfn << TARGET_PAGE_BITS;
578 int i, err;
579
580 BQL_LOCK_GUARD();
581
582 /*
583 * The xen_overlay device tells KVM about it too, since it had to
584 * do that on migration load anyway (unless we're going to jump
585 * through lots of hoops to maintain the fiction that this isn't
586 * KVM-specific.
587 */
588 err = xen_overlay_map_shinfo_page(gpa);
589 if (err) {
590 return err;
591 }
592
593 trace_kvm_xen_set_shared_info(gfn);
594
595 for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
596 CPUState *cpu = qemu_get_cpu(i);
597 if (cpu) {
598 async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
599 RUN_ON_CPU_HOST_ULONG(gpa));
600 }
601 gpa += sizeof(vcpu_info_t);
602 }
603
604 return err;
605 }
606
add_to_physmap_one(uint32_t space,uint64_t idx,uint64_t gfn)607 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
608 {
609 switch (space) {
610 case XENMAPSPACE_shared_info:
611 if (idx > 0) {
612 return -EINVAL;
613 }
614 return xen_set_shared_info(gfn);
615
616 case XENMAPSPACE_grant_table:
617 return xen_gnttab_map_page(idx, gfn);
618
619 case XENMAPSPACE_gmfn:
620 case XENMAPSPACE_gmfn_range:
621 return -ENOTSUP;
622
623 case XENMAPSPACE_gmfn_foreign:
624 case XENMAPSPACE_dev_mmio:
625 return -EPERM;
626
627 default:
628 return -EINVAL;
629 }
630 }
631
do_add_to_physmap(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)632 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
633 uint64_t arg)
634 {
635 struct xen_add_to_physmap xatp;
636 CPUState *cs = CPU(cpu);
637
638 if (hypercall_compat32(exit->u.hcall.longmode)) {
639 struct compat_xen_add_to_physmap xatp32;
640
641 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
642 if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
643 return -EFAULT;
644 }
645 xatp.domid = xatp32.domid;
646 xatp.size = xatp32.size;
647 xatp.space = xatp32.space;
648 xatp.idx = xatp32.idx;
649 xatp.gpfn = xatp32.gpfn;
650 } else {
651 if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
652 return -EFAULT;
653 }
654 }
655
656 if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
657 return -ESRCH;
658 }
659
660 return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
661 }
662
do_add_to_physmap_batch(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)663 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
664 uint64_t arg)
665 {
666 struct xen_add_to_physmap_batch xatpb;
667 unsigned long idxs_gva, gpfns_gva, errs_gva;
668 CPUState *cs = CPU(cpu);
669 size_t op_sz;
670
671 if (hypercall_compat32(exit->u.hcall.longmode)) {
672 struct compat_xen_add_to_physmap_batch xatpb32;
673
674 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
675 if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
676 return -EFAULT;
677 }
678 xatpb.domid = xatpb32.domid;
679 xatpb.space = xatpb32.space;
680 xatpb.size = xatpb32.size;
681
682 idxs_gva = xatpb32.idxs.c;
683 gpfns_gva = xatpb32.gpfns.c;
684 errs_gva = xatpb32.errs.c;
685 op_sz = sizeof(uint32_t);
686 } else {
687 if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
688 return -EFAULT;
689 }
690 op_sz = sizeof(unsigned long);
691 idxs_gva = (unsigned long)xatpb.idxs.p;
692 gpfns_gva = (unsigned long)xatpb.gpfns.p;
693 errs_gva = (unsigned long)xatpb.errs.p;
694 }
695
696 if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
697 return -ESRCH;
698 }
699
700 /* Explicitly invalid for the batch op. Not that we implement it anyway. */
701 if (xatpb.space == XENMAPSPACE_gmfn_range) {
702 return -EINVAL;
703 }
704
705 while (xatpb.size--) {
706 unsigned long idx = 0;
707 unsigned long gpfn = 0;
708 int err;
709
710 /* For 32-bit compat this only copies the low 32 bits of each */
711 if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
712 kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
713 return -EFAULT;
714 }
715 idxs_gva += op_sz;
716 gpfns_gva += op_sz;
717
718 err = add_to_physmap_one(xatpb.space, idx, gpfn);
719
720 if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
721 return -EFAULT;
722 }
723 errs_gva += sizeof(err);
724 }
725 return 0;
726 }
727
kvm_xen_hcall_memory_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)728 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
729 int cmd, uint64_t arg)
730 {
731 int err;
732
733 switch (cmd) {
734 case XENMEM_add_to_physmap:
735 err = do_add_to_physmap(exit, cpu, arg);
736 break;
737
738 case XENMEM_add_to_physmap_batch:
739 err = do_add_to_physmap_batch(exit, cpu, arg);
740 break;
741
742 default:
743 return false;
744 }
745
746 exit->u.hcall.result = err;
747 return true;
748 }
749
handle_set_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)750 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
751 uint64_t arg)
752 {
753 CPUState *cs = CPU(cpu);
754 struct xen_hvm_param hp;
755 int err = 0;
756
757 /* No need for 32/64 compat handling */
758 qemu_build_assert(sizeof(hp) == 16);
759
760 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
761 err = -EFAULT;
762 goto out;
763 }
764
765 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
766 err = -ESRCH;
767 goto out;
768 }
769
770 switch (hp.index) {
771 case HVM_PARAM_CALLBACK_IRQ:
772 bql_lock();
773 err = xen_evtchn_set_callback_param(hp.value);
774 bql_unlock();
775 xen_set_long_mode(exit->u.hcall.longmode);
776 break;
777 default:
778 return false;
779 }
780
781 out:
782 exit->u.hcall.result = err;
783 return true;
784 }
785
handle_get_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)786 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
787 uint64_t arg)
788 {
789 CPUState *cs = CPU(cpu);
790 struct xen_hvm_param hp;
791 int err = 0;
792
793 /* No need for 32/64 compat handling */
794 qemu_build_assert(sizeof(hp) == 16);
795
796 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
797 err = -EFAULT;
798 goto out;
799 }
800
801 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
802 err = -ESRCH;
803 goto out;
804 }
805
806 switch (hp.index) {
807 case HVM_PARAM_STORE_PFN:
808 hp.value = XEN_SPECIAL_PFN(XENSTORE);
809 break;
810 case HVM_PARAM_STORE_EVTCHN:
811 hp.value = xen_xenstore_get_port();
812 break;
813 case HVM_PARAM_CONSOLE_PFN:
814 hp.value = xen_primary_console_get_pfn();
815 if (!hp.value) {
816 err = -EINVAL;
817 }
818 break;
819 case HVM_PARAM_CONSOLE_EVTCHN:
820 hp.value = xen_primary_console_get_port();
821 if (!hp.value) {
822 err = -EINVAL;
823 }
824 break;
825 default:
826 return false;
827 }
828
829 if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
830 err = -EFAULT;
831 }
832 out:
833 exit->u.hcall.result = err;
834 return true;
835 }
836
kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)837 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
838 X86CPU *cpu, uint64_t arg)
839 {
840 struct xen_hvm_evtchn_upcall_vector up;
841 CPUState *target_cs;
842
843 /* No need for 32/64 compat handling */
844 qemu_build_assert(sizeof(up) == 8);
845
846 if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
847 return -EFAULT;
848 }
849
850 if (up.vector < 0x10) {
851 return -EINVAL;
852 }
853
854 target_cs = qemu_get_cpu(up.vcpu);
855 if (!target_cs) {
856 return -EINVAL;
857 }
858
859 async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
860 RUN_ON_CPU_HOST_INT(up.vector));
861 return 0;
862 }
863
kvm_xen_hcall_hvm_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)864 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
865 int cmd, uint64_t arg)
866 {
867 int ret = -ENOSYS;
868 switch (cmd) {
869 case HVMOP_set_evtchn_upcall_vector:
870 ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
871 break;
872
873 case HVMOP_pagetable_dying:
874 ret = -ENOSYS;
875 break;
876
877 case HVMOP_set_param:
878 return handle_set_param(exit, cpu, arg);
879
880 case HVMOP_get_param:
881 return handle_get_param(exit, cpu, arg);
882
883 default:
884 return false;
885 }
886
887 exit->u.hcall.result = ret;
888 return true;
889 }
890
vcpuop_register_vcpu_info(CPUState * cs,CPUState * target,uint64_t arg)891 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
892 uint64_t arg)
893 {
894 struct vcpu_register_vcpu_info rvi;
895 uint64_t gpa;
896
897 /* No need for 32/64 compat handling */
898 qemu_build_assert(sizeof(rvi) == 16);
899 qemu_build_assert(sizeof(struct vcpu_info) == 64);
900
901 if (!target) {
902 return -ENOENT;
903 }
904
905 if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
906 return -EFAULT;
907 }
908
909 if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
910 return -EINVAL;
911 }
912
913 gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
914 async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
915 return 0;
916 }
917
vcpuop_register_vcpu_time_info(CPUState * cs,CPUState * target,uint64_t arg)918 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
919 uint64_t arg)
920 {
921 struct vcpu_register_time_memory_area tma;
922 uint64_t gpa;
923 size_t len;
924
925 /* No need for 32/64 compat handling */
926 qemu_build_assert(sizeof(tma) == 8);
927 qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
928
929 if (!target) {
930 return -ENOENT;
931 }
932
933 if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
934 return -EFAULT;
935 }
936
937 /*
938 * Xen actually uses the GVA and does the translation through the guest
939 * page tables each time. But Linux/KVM uses the GPA, on the assumption
940 * that guests only ever use *global* addresses (kernel virtual addresses)
941 * for it. If Linux is changed to redo the GVA→GPA translation each time,
942 * it will offer a new vCPU attribute for that, and we'll use it instead.
943 */
944 if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
945 len < sizeof(struct vcpu_time_info)) {
946 return -EFAULT;
947 }
948
949 async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
950 RUN_ON_CPU_HOST_ULONG(gpa));
951 return 0;
952 }
953
vcpuop_register_runstate_info(CPUState * cs,CPUState * target,uint64_t arg)954 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
955 uint64_t arg)
956 {
957 struct vcpu_register_runstate_memory_area rma;
958 uint64_t gpa;
959 size_t len;
960
961 /* No need for 32/64 compat handling */
962 qemu_build_assert(sizeof(rma) == 8);
963 /* The runstate area actually does change size, but Linux copes. */
964
965 if (!target) {
966 return -ENOENT;
967 }
968
969 if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
970 return -EFAULT;
971 }
972
973 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
974 if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
975 return -EFAULT;
976 }
977
978 async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
979 RUN_ON_CPU_HOST_ULONG(gpa));
980 return 0;
981 }
982
kvm_get_current_ns(void)983 static uint64_t kvm_get_current_ns(void)
984 {
985 struct kvm_clock_data data;
986 int ret;
987
988 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
989 if (ret < 0) {
990 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
991 abort();
992 }
993
994 return data.clock;
995 }
996
xen_vcpu_singleshot_timer_event(void * opaque)997 static void xen_vcpu_singleshot_timer_event(void *opaque)
998 {
999 CPUState *cpu = opaque;
1000 CPUX86State *env = &X86_CPU(cpu)->env;
1001 uint16_t port = env->xen_virq[VIRQ_TIMER];
1002
1003 if (likely(port)) {
1004 xen_evtchn_set_port(port);
1005 }
1006
1007 qemu_mutex_lock(&env->xen_timers_lock);
1008 env->xen_singleshot_timer_ns = 0;
1009 qemu_mutex_unlock(&env->xen_timers_lock);
1010 }
1011
xen_vcpu_periodic_timer_event(void * opaque)1012 static void xen_vcpu_periodic_timer_event(void *opaque)
1013 {
1014 CPUState *cpu = opaque;
1015 CPUX86State *env = &X86_CPU(cpu)->env;
1016 uint16_t port = env->xen_virq[VIRQ_TIMER];
1017 int64_t qemu_now;
1018
1019 if (likely(port)) {
1020 xen_evtchn_set_port(port);
1021 }
1022
1023 qemu_mutex_lock(&env->xen_timers_lock);
1024
1025 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1026 timer_mod_ns(env->xen_periodic_timer,
1027 qemu_now + env->xen_periodic_timer_period);
1028
1029 qemu_mutex_unlock(&env->xen_timers_lock);
1030 }
1031
do_set_periodic_timer(CPUState * target,uint64_t period_ns)1032 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1033 {
1034 CPUX86State *tenv = &X86_CPU(target)->env;
1035 int64_t qemu_now;
1036
1037 timer_del(tenv->xen_periodic_timer);
1038
1039 qemu_mutex_lock(&tenv->xen_timers_lock);
1040
1041 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1042 timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1043 tenv->xen_periodic_timer_period = period_ns;
1044
1045 qemu_mutex_unlock(&tenv->xen_timers_lock);
1046 return 0;
1047 }
1048
1049 #define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
1050 #define MICROSECS(_us) ((int64_t)((_us) * 1000ULL))
1051 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1052 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
1053 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1054
vcpuop_set_periodic_timer(CPUState * cs,CPUState * target,uint64_t arg)1055 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1056 uint64_t arg)
1057 {
1058 struct vcpu_set_periodic_timer spt;
1059
1060 qemu_build_assert(sizeof(spt) == 8);
1061 if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1062 return -EFAULT;
1063 }
1064
1065 if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1066 return -EINVAL;
1067 }
1068
1069 return do_set_periodic_timer(target, spt.period_ns);
1070 }
1071
vcpuop_stop_periodic_timer(CPUState * target)1072 static int vcpuop_stop_periodic_timer(CPUState *target)
1073 {
1074 CPUX86State *tenv = &X86_CPU(target)->env;
1075
1076 qemu_mutex_lock(&tenv->xen_timers_lock);
1077
1078 timer_del(tenv->xen_periodic_timer);
1079 tenv->xen_periodic_timer_period = 0;
1080
1081 qemu_mutex_unlock(&tenv->xen_timers_lock);
1082 return 0;
1083 }
1084
1085 /*
1086 * Userspace handling of timer, for older kernels.
1087 * Must always be called with xen_timers_lock held.
1088 */
do_set_singleshot_timer(CPUState * cs,uint64_t timeout_abs,bool linux_wa)1089 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1090 bool linux_wa)
1091 {
1092 CPUX86State *env = &X86_CPU(cs)->env;
1093 int64_t now = kvm_get_current_ns();
1094 int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1095 int64_t delta = timeout_abs - now;
1096
1097 if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1098 (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1099 /*
1100 * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1101 * for negative absolute timeout values (caused by integer
1102 * overflow), and for values about 13 days in the future (2^50ns)
1103 * which would be caused by jiffies overflow. For those cases, it
1104 * sets the timeout 100ms in the future (not *too* soon, since if
1105 * a guest really did set a long timeout on purpose we don't want
1106 * to keep churning CPU time by waking it up).
1107 */
1108 delta = (100 * SCALE_MS);
1109 timeout_abs = now + delta;
1110 }
1111
1112 timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1113 env->xen_singleshot_timer_ns = now + delta;
1114 return 0;
1115 }
1116
vcpuop_set_singleshot_timer(CPUState * cs,uint64_t arg)1117 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1118 {
1119 struct vcpu_set_singleshot_timer sst = { 0 };
1120
1121 /*
1122 * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1123 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1124 * that get used are identical, and there's four bytes of padding
1125 * unused at the end. For true Xen compatibility we should attempt
1126 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1127 * if we can't get the padding too. But that's daft. Just copy what
1128 * we need.
1129 */
1130 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1131 qemu_build_assert(sizeof(sst) >= 12);
1132
1133 if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1134 return -EFAULT;
1135 }
1136
1137 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1138
1139 /*
1140 * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
1141 * The only guest that ever used it, got it wrong.
1142 * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
1143 */
1144 return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
1145 }
1146
vcpuop_stop_singleshot_timer(CPUState * cs)1147 static int vcpuop_stop_singleshot_timer(CPUState *cs)
1148 {
1149 CPUX86State *env = &X86_CPU(cs)->env;
1150
1151 qemu_mutex_lock(&env->xen_timers_lock);
1152
1153 timer_del(env->xen_singleshot_timer);
1154 env->xen_singleshot_timer_ns = 0;
1155
1156 qemu_mutex_unlock(&env->xen_timers_lock);
1157 return 0;
1158 }
1159
kvm_xen_hcall_set_timer_op(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t timeout)1160 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1161 uint64_t timeout)
1162 {
1163 int err;
1164
1165 if (unlikely(timeout == 0)) {
1166 err = vcpuop_stop_singleshot_timer(CPU(cpu));
1167 } else {
1168 QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
1169 err = do_set_singleshot_timer(CPU(cpu), timeout, true);
1170 }
1171 exit->u.hcall.result = err;
1172 return true;
1173 }
1174
kvm_xen_hcall_vcpu_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,int vcpu_id,uint64_t arg)1175 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1176 int cmd, int vcpu_id, uint64_t arg)
1177 {
1178 CPUState *cs = CPU(cpu);
1179 CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1180 int err;
1181
1182 if (!dest) {
1183 err = -ENOENT;
1184 goto out;
1185 }
1186
1187 switch (cmd) {
1188 case VCPUOP_register_runstate_memory_area:
1189 err = vcpuop_register_runstate_info(cs, dest, arg);
1190 break;
1191 case VCPUOP_register_vcpu_time_memory_area:
1192 err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1193 break;
1194 case VCPUOP_register_vcpu_info:
1195 err = vcpuop_register_vcpu_info(cs, dest, arg);
1196 break;
1197 case VCPUOP_set_singleshot_timer: {
1198 if (cs->cpu_index == vcpu_id) {
1199 err = vcpuop_set_singleshot_timer(dest, arg);
1200 } else {
1201 err = -EINVAL;
1202 }
1203 break;
1204 }
1205 case VCPUOP_stop_singleshot_timer:
1206 if (cs->cpu_index == vcpu_id) {
1207 err = vcpuop_stop_singleshot_timer(dest);
1208 } else {
1209 err = -EINVAL;
1210 }
1211 break;
1212 case VCPUOP_set_periodic_timer: {
1213 err = vcpuop_set_periodic_timer(cs, dest, arg);
1214 break;
1215 }
1216 case VCPUOP_stop_periodic_timer:
1217 err = vcpuop_stop_periodic_timer(dest);
1218 break;
1219
1220 default:
1221 return false;
1222 }
1223
1224 out:
1225 exit->u.hcall.result = err;
1226 return true;
1227 }
1228
kvm_xen_hcall_evtchn_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1229 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1230 int cmd, uint64_t arg)
1231 {
1232 CPUState *cs = CPU(cpu);
1233 int err = -ENOSYS;
1234
1235 switch (cmd) {
1236 case EVTCHNOP_init_control:
1237 case EVTCHNOP_expand_array:
1238 case EVTCHNOP_set_priority:
1239 /* We do not support FIFO channels at this point */
1240 err = -ENOSYS;
1241 break;
1242
1243 case EVTCHNOP_status: {
1244 struct evtchn_status status;
1245
1246 qemu_build_assert(sizeof(status) == 24);
1247 if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1248 err = -EFAULT;
1249 break;
1250 }
1251
1252 err = xen_evtchn_status_op(&status);
1253 if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1254 err = -EFAULT;
1255 }
1256 break;
1257 }
1258 case EVTCHNOP_close: {
1259 struct evtchn_close close;
1260
1261 qemu_build_assert(sizeof(close) == 4);
1262 if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1263 err = -EFAULT;
1264 break;
1265 }
1266
1267 err = xen_evtchn_close_op(&close);
1268 break;
1269 }
1270 case EVTCHNOP_unmask: {
1271 struct evtchn_unmask unmask;
1272
1273 qemu_build_assert(sizeof(unmask) == 4);
1274 if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1275 err = -EFAULT;
1276 break;
1277 }
1278
1279 err = xen_evtchn_unmask_op(&unmask);
1280 break;
1281 }
1282 case EVTCHNOP_bind_virq: {
1283 struct evtchn_bind_virq virq;
1284
1285 qemu_build_assert(sizeof(virq) == 12);
1286 if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1287 err = -EFAULT;
1288 break;
1289 }
1290
1291 err = xen_evtchn_bind_virq_op(&virq);
1292 if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1293 err = -EFAULT;
1294 }
1295 break;
1296 }
1297 case EVTCHNOP_bind_pirq: {
1298 struct evtchn_bind_pirq pirq;
1299
1300 qemu_build_assert(sizeof(pirq) == 12);
1301 if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1302 err = -EFAULT;
1303 break;
1304 }
1305
1306 err = xen_evtchn_bind_pirq_op(&pirq);
1307 if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1308 err = -EFAULT;
1309 }
1310 break;
1311 }
1312 case EVTCHNOP_bind_ipi: {
1313 struct evtchn_bind_ipi ipi;
1314
1315 qemu_build_assert(sizeof(ipi) == 8);
1316 if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1317 err = -EFAULT;
1318 break;
1319 }
1320
1321 err = xen_evtchn_bind_ipi_op(&ipi);
1322 if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1323 err = -EFAULT;
1324 }
1325 break;
1326 }
1327 case EVTCHNOP_send: {
1328 struct evtchn_send send;
1329
1330 qemu_build_assert(sizeof(send) == 4);
1331 if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1332 err = -EFAULT;
1333 break;
1334 }
1335
1336 err = xen_evtchn_send_op(&send);
1337 break;
1338 }
1339 case EVTCHNOP_alloc_unbound: {
1340 struct evtchn_alloc_unbound alloc;
1341
1342 qemu_build_assert(sizeof(alloc) == 8);
1343 if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1344 err = -EFAULT;
1345 break;
1346 }
1347
1348 err = xen_evtchn_alloc_unbound_op(&alloc);
1349 if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1350 err = -EFAULT;
1351 }
1352 break;
1353 }
1354 case EVTCHNOP_bind_interdomain: {
1355 struct evtchn_bind_interdomain interdomain;
1356
1357 qemu_build_assert(sizeof(interdomain) == 12);
1358 if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1359 err = -EFAULT;
1360 break;
1361 }
1362
1363 err = xen_evtchn_bind_interdomain_op(&interdomain);
1364 if (!err &&
1365 kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1366 err = -EFAULT;
1367 }
1368 break;
1369 }
1370 case EVTCHNOP_bind_vcpu: {
1371 struct evtchn_bind_vcpu vcpu;
1372
1373 qemu_build_assert(sizeof(vcpu) == 8);
1374 if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1375 err = -EFAULT;
1376 break;
1377 }
1378
1379 err = xen_evtchn_bind_vcpu_op(&vcpu);
1380 break;
1381 }
1382 case EVTCHNOP_reset: {
1383 struct evtchn_reset reset;
1384
1385 qemu_build_assert(sizeof(reset) == 2);
1386 if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1387 err = -EFAULT;
1388 break;
1389 }
1390
1391 err = xen_evtchn_reset_op(&reset);
1392 break;
1393 }
1394 default:
1395 return false;
1396 }
1397
1398 exit->u.hcall.result = err;
1399 return true;
1400 }
1401
kvm_xen_soft_reset(void)1402 int kvm_xen_soft_reset(void)
1403 {
1404 CPUState *cpu;
1405 int err;
1406
1407 assert(bql_locked());
1408
1409 trace_kvm_xen_soft_reset();
1410
1411 err = xen_evtchn_soft_reset();
1412 if (err) {
1413 return err;
1414 }
1415
1416 /*
1417 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1418 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1419 * to deliver to the timer interrupt and treats that as 'disabled'.
1420 */
1421 err = xen_evtchn_set_callback_param(0);
1422 if (err) {
1423 return err;
1424 }
1425
1426 CPU_FOREACH(cpu) {
1427 async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1428 }
1429
1430 err = xen_overlay_map_shinfo_page(INVALID_GFN);
1431 if (err) {
1432 return err;
1433 }
1434
1435 err = xen_gnttab_reset();
1436 if (err) {
1437 return err;
1438 }
1439
1440 err = xen_primary_console_reset();
1441 if (err) {
1442 return err;
1443 }
1444
1445 err = xen_xenstore_reset();
1446 if (err) {
1447 return err;
1448 }
1449
1450 return 0;
1451 }
1452
schedop_shutdown(CPUState * cs,uint64_t arg)1453 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1454 {
1455 struct sched_shutdown shutdown;
1456 int ret = 0;
1457
1458 /* No need for 32/64 compat handling */
1459 qemu_build_assert(sizeof(shutdown) == 4);
1460
1461 if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1462 return -EFAULT;
1463 }
1464
1465 switch (shutdown.reason) {
1466 case SHUTDOWN_crash:
1467 cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1468 qemu_system_guest_panicked(NULL);
1469 break;
1470
1471 case SHUTDOWN_reboot:
1472 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1473 break;
1474
1475 case SHUTDOWN_poweroff:
1476 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1477 break;
1478
1479 case SHUTDOWN_soft_reset:
1480 bql_lock();
1481 ret = kvm_xen_soft_reset();
1482 bql_unlock();
1483 break;
1484
1485 default:
1486 ret = -EINVAL;
1487 break;
1488 }
1489
1490 return ret;
1491 }
1492
kvm_xen_hcall_sched_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1493 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1494 int cmd, uint64_t arg)
1495 {
1496 CPUState *cs = CPU(cpu);
1497 int err = -ENOSYS;
1498
1499 switch (cmd) {
1500 case SCHEDOP_shutdown:
1501 err = schedop_shutdown(cs, arg);
1502 break;
1503
1504 case SCHEDOP_poll:
1505 /*
1506 * Linux will panic if this doesn't work. Just yield; it's not
1507 * worth overthinking it because with event channel handling
1508 * in KVM, the kernel will intercept this and it will never
1509 * reach QEMU anyway. The semantics of the hypercall explicltly
1510 * permit spurious wakeups.
1511 */
1512 case SCHEDOP_yield:
1513 sched_yield();
1514 err = 0;
1515 break;
1516
1517 default:
1518 return false;
1519 }
1520
1521 exit->u.hcall.result = err;
1522 return true;
1523 }
1524
kvm_xen_hcall_gnttab_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg,int count)1525 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1526 int cmd, uint64_t arg, int count)
1527 {
1528 CPUState *cs = CPU(cpu);
1529 int err;
1530
1531 switch (cmd) {
1532 case GNTTABOP_set_version: {
1533 struct gnttab_set_version set;
1534
1535 qemu_build_assert(sizeof(set) == 4);
1536 if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1537 err = -EFAULT;
1538 break;
1539 }
1540
1541 err = xen_gnttab_set_version_op(&set);
1542 if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1543 err = -EFAULT;
1544 }
1545 break;
1546 }
1547 case GNTTABOP_get_version: {
1548 struct gnttab_get_version get;
1549
1550 qemu_build_assert(sizeof(get) == 8);
1551 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1552 err = -EFAULT;
1553 break;
1554 }
1555
1556 err = xen_gnttab_get_version_op(&get);
1557 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1558 err = -EFAULT;
1559 }
1560 break;
1561 }
1562 case GNTTABOP_query_size: {
1563 struct gnttab_query_size size;
1564
1565 qemu_build_assert(sizeof(size) == 16);
1566 if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1567 err = -EFAULT;
1568 break;
1569 }
1570
1571 err = xen_gnttab_query_size_op(&size);
1572 if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1573 err = -EFAULT;
1574 }
1575 break;
1576 }
1577 case GNTTABOP_setup_table:
1578 case GNTTABOP_copy:
1579 case GNTTABOP_map_grant_ref:
1580 case GNTTABOP_unmap_grant_ref:
1581 case GNTTABOP_swap_grant_ref:
1582 return false;
1583
1584 default:
1585 /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1586 err = -ENOSYS;
1587 break;
1588 }
1589
1590 exit->u.hcall.result = err;
1591 return true;
1592 }
1593
kvm_xen_hcall_physdev_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1594 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1595 int cmd, uint64_t arg)
1596 {
1597 CPUState *cs = CPU(cpu);
1598 int err;
1599
1600 switch (cmd) {
1601 case PHYSDEVOP_map_pirq: {
1602 struct physdev_map_pirq map;
1603
1604 if (hypercall_compat32(exit->u.hcall.longmode)) {
1605 struct compat_physdev_map_pirq *map32 = (void *)↦
1606
1607 if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1608 return -EFAULT;
1609 }
1610
1611 /*
1612 * The only thing that's different is the alignment of the
1613 * uint64_t table_base at the end, which gets padding to make
1614 * it 64-bit aligned in the 64-bit version.
1615 */
1616 qemu_build_assert(sizeof(*map32) == 36);
1617 qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1618 offsetof(struct compat_physdev_map_pirq, entry_nr));
1619 memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1620 } else {
1621 if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1622 err = -EFAULT;
1623 break;
1624 }
1625 }
1626 err = xen_physdev_map_pirq(&map);
1627 /*
1628 * Since table_base is an IN parameter and won't be changed, just
1629 * copy the size of the compat structure back to the guest.
1630 */
1631 if (!err && kvm_copy_to_gva(cs, arg, &map,
1632 sizeof(struct compat_physdev_map_pirq))) {
1633 err = -EFAULT;
1634 }
1635 break;
1636 }
1637 case PHYSDEVOP_unmap_pirq: {
1638 struct physdev_unmap_pirq unmap;
1639
1640 qemu_build_assert(sizeof(unmap) == 8);
1641 if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1642 err = -EFAULT;
1643 break;
1644 }
1645
1646 err = xen_physdev_unmap_pirq(&unmap);
1647 if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1648 err = -EFAULT;
1649 }
1650 break;
1651 }
1652 case PHYSDEVOP_eoi: {
1653 struct physdev_eoi eoi;
1654
1655 qemu_build_assert(sizeof(eoi) == 4);
1656 if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1657 err = -EFAULT;
1658 break;
1659 }
1660
1661 err = xen_physdev_eoi_pirq(&eoi);
1662 if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1663 err = -EFAULT;
1664 }
1665 break;
1666 }
1667 case PHYSDEVOP_irq_status_query: {
1668 struct physdev_irq_status_query query;
1669
1670 qemu_build_assert(sizeof(query) == 8);
1671 if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1672 err = -EFAULT;
1673 break;
1674 }
1675
1676 err = xen_physdev_query_pirq(&query);
1677 if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1678 err = -EFAULT;
1679 }
1680 break;
1681 }
1682 case PHYSDEVOP_get_free_pirq: {
1683 struct physdev_get_free_pirq get;
1684
1685 qemu_build_assert(sizeof(get) == 8);
1686 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1687 err = -EFAULT;
1688 break;
1689 }
1690
1691 err = xen_physdev_get_free_pirq(&get);
1692 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1693 err = -EFAULT;
1694 }
1695 break;
1696 }
1697 case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1698 err = -ENOSYS;
1699 break;
1700
1701 default:
1702 return false;
1703 }
1704
1705 exit->u.hcall.result = err;
1706 return true;
1707 }
1708
do_kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1709 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1710 {
1711 uint16_t code = exit->u.hcall.input;
1712
1713 if (exit->u.hcall.cpl > 0) {
1714 exit->u.hcall.result = -EPERM;
1715 return true;
1716 }
1717
1718 switch (code) {
1719 case __HYPERVISOR_set_timer_op:
1720 if (exit->u.hcall.longmode) {
1721 return kvm_xen_hcall_set_timer_op(exit, cpu,
1722 exit->u.hcall.params[0]);
1723 } else {
1724 /* In 32-bit mode, the 64-bit timer value is in two args. */
1725 uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1726 (uint32_t)exit->u.hcall.params[0];
1727 return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1728 }
1729 case __HYPERVISOR_grant_table_op:
1730 return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1731 exit->u.hcall.params[1],
1732 exit->u.hcall.params[2]);
1733 case __HYPERVISOR_sched_op:
1734 return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1735 exit->u.hcall.params[1]);
1736 case __HYPERVISOR_event_channel_op:
1737 return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1738 exit->u.hcall.params[1]);
1739 case __HYPERVISOR_vcpu_op:
1740 return kvm_xen_hcall_vcpu_op(exit, cpu,
1741 exit->u.hcall.params[0],
1742 exit->u.hcall.params[1],
1743 exit->u.hcall.params[2]);
1744 case __HYPERVISOR_hvm_op:
1745 return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1746 exit->u.hcall.params[1]);
1747 case __HYPERVISOR_memory_op:
1748 return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1749 exit->u.hcall.params[1]);
1750 case __HYPERVISOR_physdev_op:
1751 return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1752 exit->u.hcall.params[1]);
1753 case __HYPERVISOR_xen_version:
1754 return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1755 exit->u.hcall.params[1]);
1756 default:
1757 return false;
1758 }
1759 }
1760
kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1761 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1762 {
1763 if (exit->type != KVM_EXIT_XEN_HCALL) {
1764 return -1;
1765 }
1766
1767 /*
1768 * The kernel latches the guest 32/64 mode when the MSR is used to fill
1769 * the hypercall page. So if we see a hypercall in a mode that doesn't
1770 * match our own idea of the guest mode, fetch the kernel's idea of the
1771 * "long mode" to remain in sync.
1772 */
1773 if (exit->u.hcall.longmode != xen_is_long_mode()) {
1774 xen_sync_long_mode();
1775 }
1776
1777 if (!do_kvm_xen_handle_exit(cpu, exit)) {
1778 /*
1779 * Some hypercalls will be deliberately "implemented" by returning
1780 * -ENOSYS. This case is for hypercalls which are unexpected.
1781 */
1782 exit->u.hcall.result = -ENOSYS;
1783 qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1784 PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1785 (uint64_t)exit->u.hcall.input,
1786 (uint64_t)exit->u.hcall.params[0],
1787 (uint64_t)exit->u.hcall.params[1],
1788 (uint64_t)exit->u.hcall.params[2]);
1789 }
1790
1791 trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1792 exit->u.hcall.input, exit->u.hcall.params[0],
1793 exit->u.hcall.params[1], exit->u.hcall.params[2],
1794 exit->u.hcall.result);
1795 return 0;
1796 }
1797
kvm_xen_get_gnttab_max_frames(void)1798 uint16_t kvm_xen_get_gnttab_max_frames(void)
1799 {
1800 KVMState *s = KVM_STATE(current_accel());
1801 return s->xen_gnttab_max_frames;
1802 }
1803
kvm_xen_get_evtchn_max_pirq(void)1804 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1805 {
1806 KVMState *s = KVM_STATE(current_accel());
1807 return s->xen_evtchn_max_pirq;
1808 }
1809
kvm_put_xen_state(CPUState * cs)1810 int kvm_put_xen_state(CPUState *cs)
1811 {
1812 X86CPU *cpu = X86_CPU(cs);
1813 CPUX86State *env = &cpu->env;
1814 uint64_t gpa;
1815 int ret;
1816
1817 gpa = env->xen_vcpu_info_gpa;
1818 if (gpa == INVALID_GPA) {
1819 gpa = env->xen_vcpu_info_default_gpa;
1820 }
1821
1822 if (gpa != INVALID_GPA) {
1823 ret = set_vcpu_info(cs, gpa);
1824 if (ret < 0) {
1825 return ret;
1826 }
1827 }
1828
1829 gpa = env->xen_vcpu_time_info_gpa;
1830 if (gpa != INVALID_GPA) {
1831 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1832 gpa);
1833 if (ret < 0) {
1834 return ret;
1835 }
1836 }
1837
1838 gpa = env->xen_vcpu_runstate_gpa;
1839 if (gpa != INVALID_GPA) {
1840 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1841 gpa);
1842 if (ret < 0) {
1843 return ret;
1844 }
1845 }
1846
1847 if (env->xen_periodic_timer_period) {
1848 ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1849 if (ret < 0) {
1850 return ret;
1851 }
1852 }
1853
1854 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1855 /*
1856 * If the kernel has EVTCHN_SEND support then it handles timers too,
1857 * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1858 */
1859 QEMU_LOCK_GUARD(&env->xen_timers_lock);
1860 if (env->xen_singleshot_timer_ns) {
1861 ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1862 false);
1863 if (ret < 0) {
1864 return ret;
1865 }
1866 }
1867 return 0;
1868 }
1869
1870 if (env->xen_vcpu_callback_vector) {
1871 ret = kvm_xen_set_vcpu_callback_vector(cs);
1872 if (ret < 0) {
1873 return ret;
1874 }
1875 }
1876
1877 if (env->xen_virq[VIRQ_TIMER]) {
1878 do_set_vcpu_timer_virq(cs,
1879 RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
1880 }
1881 return 0;
1882 }
1883
kvm_get_xen_state(CPUState * cs)1884 int kvm_get_xen_state(CPUState *cs)
1885 {
1886 X86CPU *cpu = X86_CPU(cs);
1887 CPUX86State *env = &cpu->env;
1888 uint64_t gpa;
1889 int ret;
1890
1891 /*
1892 * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1893 * to it. It's up to userspace to *assume* that any page shared thus is
1894 * always considered dirty. The shared_info page is different since it's
1895 * an overlay and migrated separately anyway.
1896 */
1897 gpa = env->xen_vcpu_info_gpa;
1898 if (gpa == INVALID_GPA) {
1899 gpa = env->xen_vcpu_info_default_gpa;
1900 }
1901 if (gpa != INVALID_GPA) {
1902 MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1903 gpa,
1904 sizeof(struct vcpu_info));
1905 if (mrs.mr &&
1906 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1907 memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1908 sizeof(struct vcpu_info));
1909 }
1910 }
1911
1912 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1913 return 0;
1914 }
1915
1916 /*
1917 * If the kernel is accelerating timers, read out the current value of the
1918 * singleshot timer deadline.
1919 */
1920 if (env->xen_virq[VIRQ_TIMER]) {
1921 struct kvm_xen_vcpu_attr va = {
1922 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1923 };
1924 ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1925 if (ret < 0) {
1926 return ret;
1927 }
1928
1929 /*
1930 * This locking is fairly pointless, and is here to appease Coverity.
1931 * There is an unavoidable race condition if a different vCPU sets a
1932 * timer for this vCPU after the value has been read out. But that's
1933 * OK in practice because *all* the vCPUs need to be stopped before
1934 * we set about migrating their state.
1935 */
1936 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1937 env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1938 }
1939
1940 return 0;
1941 }
1942