1 /* 2 * Xen HVM emulation support in KVM 3 * 4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/log.h" 14 #include "qemu/main-loop.h" 15 #include "qemu/error-report.h" 16 #include "exec/target_page.h" 17 #include "hw/xen/xen.h" 18 #include "system/kvm_int.h" 19 #include "system/kvm_xen.h" 20 #include "kvm/kvm_i386.h" 21 #include "system/address-spaces.h" 22 #include "xen-emu.h" 23 #include "trace.h" 24 #include "system/runstate.h" 25 26 #include "hw/pci/msi.h" 27 #include "hw/i386/apic-msidef.h" 28 #include "hw/i386/e820_memory_layout.h" 29 #include "hw/i386/kvm/xen_overlay.h" 30 #include "hw/i386/kvm/xen_evtchn.h" 31 #include "hw/i386/kvm/xen_gnttab.h" 32 #include "hw/i386/kvm/xen_primary_console.h" 33 #include "hw/i386/kvm/xen_xenstore.h" 34 35 #include "hw/xen/interface/version.h" 36 #include "hw/xen/interface/sched.h" 37 #include "hw/xen/interface/memory.h" 38 #include "hw/xen/interface/hvm/hvm_op.h" 39 #include "hw/xen/interface/hvm/params.h" 40 #include "hw/xen/interface/vcpu.h" 41 #include "hw/xen/interface/event_channel.h" 42 #include "hw/xen/interface/grant_table.h" 43 44 #include "xen-compat.h" 45 46 static void xen_vcpu_singleshot_timer_event(void *opaque); 47 static void xen_vcpu_periodic_timer_event(void *opaque); 48 static int vcpuop_stop_singleshot_timer(CPUState *cs); 49 50 #ifdef TARGET_X86_64 51 #define hypercall_compat32(longmode) (!(longmode)) 52 #else 53 #define hypercall_compat32(longmode) (false) 54 #endif 55 56 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa, 57 size_t *len, bool is_write) 58 { 59 struct kvm_translation tr = { 60 .linear_address = gva, 61 }; 62 63 if (len) { 64 *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK); 65 } 66 67 if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid || 68 (is_write && !tr.writeable)) { 69 return false; 70 } 71 *gpa = tr.physical_address; 72 return true; 73 } 74 75 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, 76 bool is_write) 77 { 78 uint8_t *buf = (uint8_t *)_buf; 79 uint64_t gpa; 80 size_t len; 81 82 while (sz) { 83 if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) { 84 return -EFAULT; 85 } 86 if (len > sz) { 87 len = sz; 88 } 89 90 cpu_physical_memory_rw(gpa, buf, len, is_write); 91 92 buf += len; 93 sz -= len; 94 gva += len; 95 } 96 97 return 0; 98 } 99 100 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf, 101 size_t sz) 102 { 103 return kvm_gva_rw(cs, gva, buf, sz, false); 104 } 105 106 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf, 107 size_t sz) 108 { 109 return kvm_gva_rw(cs, gva, buf, sz, true); 110 } 111 112 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr) 113 { 114 const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | 115 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; 116 struct kvm_xen_hvm_config cfg = { 117 .msr = hypercall_msr, 118 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 119 }; 120 int xen_caps, ret; 121 122 xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM); 123 if (required_caps & ~xen_caps) { 124 error_report("kvm: Xen HVM guest support not present or insufficient"); 125 return -ENOSYS; 126 } 127 128 if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) { 129 struct kvm_xen_hvm_attr ha = { 130 .type = KVM_XEN_ATTR_TYPE_XEN_VERSION, 131 .u.xen_version = s->xen_version, 132 }; 133 (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha); 134 135 cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 136 } 137 138 ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg); 139 if (ret < 0) { 140 error_report("kvm: Failed to enable Xen HVM support: %s", 141 strerror(-ret)); 142 return ret; 143 } 144 145 /* If called a second time, don't repeat the rest of the setup. */ 146 if (s->xen_caps) { 147 return 0; 148 } 149 150 /* 151 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info 152 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared. 153 * 154 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows 155 * such things to be polled at precisely the right time. We *could* do 156 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at 157 * the moment the IRQ is acked, and see if it should be reasserted. 158 * 159 * But the in-kernel irqchip is deprecated, so we're unlikely to add 160 * that support in the kernel. Insist on using the split irqchip mode 161 * instead. 162 * 163 * This leaves us polling for the level going low in QEMU, which lacks 164 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a 165 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to 166 * the device (for which it has to unmap the device and trap access, for 167 * some period after an IRQ!!). In the Xen case, we do it on exit from 168 * KVM_RUN, if the flag is set to say that the GSI is currently asserted. 169 * Which is kind of icky, but less so than the VFIO one. I may fix them 170 * both later... 171 */ 172 if (!kvm_kernel_irqchip_split()) { 173 error_report("kvm: Xen support requires kernel-irqchip=split"); 174 return -EINVAL; 175 } 176 177 s->xen_caps = xen_caps; 178 179 /* Tell fw_cfg to notify the BIOS to reserve the range. */ 180 e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED); 181 182 /* The pages couldn't be overlaid until KVM was initialized */ 183 xen_primary_console_reset(); 184 xen_xenstore_reset(); 185 186 return 0; 187 } 188 189 int kvm_xen_init_vcpu(CPUState *cs) 190 { 191 X86CPU *cpu = X86_CPU(cs); 192 CPUX86State *env = &cpu->env; 193 int err; 194 195 /* 196 * The kernel needs to know the Xen/ACPI vCPU ID because that's 197 * what the guest uses in hypercalls such as timers. It doesn't 198 * match the APIC ID which is generally used for talking to the 199 * kernel about vCPUs. And if vCPU threads race with creating 200 * their KVM vCPUs out of order, it doesn't necessarily match 201 * with the kernel's internal vCPU indices either. 202 */ 203 if (kvm_xen_has_cap(EVTCHN_SEND)) { 204 struct kvm_xen_vcpu_attr va = { 205 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID, 206 .u.vcpu_id = cs->cpu_index, 207 }; 208 err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va); 209 if (err) { 210 error_report("kvm: Failed to set Xen vCPU ID attribute: %s", 211 strerror(-err)); 212 return err; 213 } 214 } 215 216 env->xen_vcpu_info_gpa = INVALID_GPA; 217 env->xen_vcpu_info_default_gpa = INVALID_GPA; 218 env->xen_vcpu_time_info_gpa = INVALID_GPA; 219 env->xen_vcpu_runstate_gpa = INVALID_GPA; 220 221 qemu_mutex_init(&env->xen_timers_lock); 222 env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 223 xen_vcpu_singleshot_timer_event, 224 cpu); 225 if (!env->xen_singleshot_timer) { 226 return -ENOMEM; 227 } 228 env->xen_singleshot_timer->opaque = cs; 229 230 env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 231 xen_vcpu_periodic_timer_event, 232 cpu); 233 if (!env->xen_periodic_timer) { 234 return -ENOMEM; 235 } 236 env->xen_periodic_timer->opaque = cs; 237 238 return 0; 239 } 240 241 uint32_t kvm_xen_get_caps(void) 242 { 243 return kvm_state->xen_caps; 244 } 245 246 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, 247 int cmd, uint64_t arg) 248 { 249 int err = 0; 250 251 switch (cmd) { 252 case XENVER_get_features: { 253 struct xen_feature_info fi; 254 255 /* No need for 32/64 compat handling */ 256 qemu_build_assert(sizeof(fi) == 8); 257 258 err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi)); 259 if (err) { 260 break; 261 } 262 263 fi.submap = 0; 264 if (fi.submap_idx == 0) { 265 fi.submap |= 1 << XENFEAT_writable_page_tables | 266 1 << XENFEAT_writable_descriptor_tables | 267 1 << XENFEAT_auto_translated_physmap | 268 1 << XENFEAT_hvm_callback_vector | 269 1 << XENFEAT_hvm_safe_pvclock | 270 1 << XENFEAT_hvm_pirqs; 271 } 272 273 err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi)); 274 break; 275 } 276 277 default: 278 return false; 279 } 280 281 exit->u.hcall.result = err; 282 return true; 283 } 284 285 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa) 286 { 287 struct kvm_xen_vcpu_attr xhsi; 288 289 xhsi.type = type; 290 xhsi.u.gpa = gpa; 291 292 trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa); 293 294 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi); 295 } 296 297 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs) 298 { 299 uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector; 300 struct kvm_xen_vcpu_attr xva; 301 302 xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR; 303 xva.u.vector = vector; 304 305 trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector); 306 307 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva); 308 } 309 310 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data) 311 { 312 X86CPU *cpu = X86_CPU(cs); 313 CPUX86State *env = &cpu->env; 314 315 env->xen_vcpu_callback_vector = data.host_int; 316 317 if (kvm_xen_has_cap(EVTCHN_SEND)) { 318 kvm_xen_set_vcpu_callback_vector(cs); 319 } 320 } 321 322 static int set_vcpu_info(CPUState *cs, uint64_t gpa) 323 { 324 X86CPU *cpu = X86_CPU(cs); 325 CPUX86State *env = &cpu->env; 326 MemoryRegionSection mrs = { .mr = NULL }; 327 void *vcpu_info_hva = NULL; 328 int ret; 329 330 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); 331 if (ret || gpa == INVALID_GPA) { 332 goto out; 333 } 334 335 mrs = memory_region_find(get_system_memory(), gpa, 336 sizeof(struct vcpu_info)); 337 if (mrs.mr && mrs.mr->ram_block && 338 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) { 339 vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block, 340 mrs.offset_within_region); 341 } 342 if (!vcpu_info_hva) { 343 if (mrs.mr) { 344 memory_region_unref(mrs.mr); 345 mrs.mr = NULL; 346 } 347 ret = -EINVAL; 348 } 349 350 out: 351 if (env->xen_vcpu_info_mr) { 352 memory_region_unref(env->xen_vcpu_info_mr); 353 } 354 env->xen_vcpu_info_hva = vcpu_info_hva; 355 env->xen_vcpu_info_mr = mrs.mr; 356 return ret; 357 } 358 359 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data) 360 { 361 X86CPU *cpu = X86_CPU(cs); 362 CPUX86State *env = &cpu->env; 363 364 env->xen_vcpu_info_default_gpa = data.host_ulong; 365 366 /* Changing the default does nothing if a vcpu_info was explicitly set. */ 367 if (env->xen_vcpu_info_gpa == INVALID_GPA) { 368 set_vcpu_info(cs, env->xen_vcpu_info_default_gpa); 369 } 370 } 371 372 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data) 373 { 374 X86CPU *cpu = X86_CPU(cs); 375 CPUX86State *env = &cpu->env; 376 377 env->xen_vcpu_info_gpa = data.host_ulong; 378 379 set_vcpu_info(cs, env->xen_vcpu_info_gpa); 380 } 381 382 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id) 383 { 384 CPUState *cs = qemu_get_cpu(vcpu_id); 385 if (!cs) { 386 return NULL; 387 } 388 389 return X86_CPU(cs)->env.xen_vcpu_info_hva; 390 } 391 392 void kvm_xen_maybe_deassert_callback(CPUState *cs) 393 { 394 CPUX86State *env = &X86_CPU(cs)->env; 395 struct vcpu_info *vi = env->xen_vcpu_info_hva; 396 if (!vi) { 397 return; 398 } 399 400 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */ 401 if (!vi->evtchn_upcall_pending) { 402 bql_lock(); 403 /* 404 * Check again now we have the lock, because it may have been 405 * asserted in the interim. And we don't want to take the lock 406 * every time because this is a fast path. 407 */ 408 if (!vi->evtchn_upcall_pending) { 409 X86_CPU(cs)->env.xen_callback_asserted = false; 410 xen_evtchn_set_callback_level(0); 411 } 412 bql_unlock(); 413 } 414 } 415 416 void kvm_xen_set_callback_asserted(void) 417 { 418 CPUState *cs = qemu_get_cpu(0); 419 420 if (cs) { 421 X86_CPU(cs)->env.xen_callback_asserted = true; 422 } 423 } 424 425 bool kvm_xen_has_vcpu_callback_vector(void) 426 { 427 CPUState *cs = qemu_get_cpu(0); 428 429 return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector; 430 } 431 432 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) 433 { 434 CPUState *cs = qemu_get_cpu(vcpu_id); 435 uint8_t vector; 436 437 if (!cs) { 438 return; 439 } 440 441 vector = X86_CPU(cs)->env.xen_vcpu_callback_vector; 442 if (vector) { 443 /* 444 * The per-vCPU callback vector injected via lapic. Just 445 * deliver it as an MSI. 446 */ 447 MSIMessage msg = { 448 .address = APIC_DEFAULT_ADDRESS | 449 (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT), 450 .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT), 451 }; 452 kvm_irqchip_send_msi(kvm_state, msg); 453 return; 454 } 455 456 switch (type) { 457 case HVM_PARAM_CALLBACK_TYPE_VECTOR: 458 /* 459 * If the evtchn_upcall_pending field in the vcpu_info is set, then 460 * KVM will automatically deliver the vector on entering the vCPU 461 * so all we have to do is kick it out. 462 */ 463 qemu_cpu_kick(cs); 464 break; 465 466 case HVM_PARAM_CALLBACK_TYPE_GSI: 467 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX: 468 if (vcpu_id == 0) { 469 xen_evtchn_set_callback_level(1); 470 } 471 break; 472 } 473 } 474 475 /* Must always be called with xen_timers_lock held */ 476 static int kvm_xen_set_vcpu_timer(CPUState *cs) 477 { 478 X86CPU *cpu = X86_CPU(cs); 479 CPUX86State *env = &cpu->env; 480 481 struct kvm_xen_vcpu_attr va = { 482 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 483 .u.timer.port = env->xen_virq[VIRQ_TIMER], 484 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 485 .u.timer.expires_ns = env->xen_singleshot_timer_ns, 486 }; 487 488 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va); 489 } 490 491 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data) 492 { 493 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); 494 kvm_xen_set_vcpu_timer(cs); 495 } 496 497 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port) 498 { 499 CPUState *cs = qemu_get_cpu(vcpu_id); 500 501 if (!cs) { 502 return -ENOENT; 503 } 504 505 /* cpu.h doesn't include the actual Xen header. */ 506 qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS); 507 508 if (virq >= NR_VIRQS) { 509 return -EINVAL; 510 } 511 512 if (port && X86_CPU(cs)->env.xen_virq[virq]) { 513 return -EEXIST; 514 } 515 516 X86_CPU(cs)->env.xen_virq[virq] = port; 517 if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) { 518 async_run_on_cpu(cs, do_set_vcpu_timer_virq, 519 RUN_ON_CPU_HOST_INT(port)); 520 } 521 return 0; 522 } 523 524 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data) 525 { 526 X86CPU *cpu = X86_CPU(cs); 527 CPUX86State *env = &cpu->env; 528 529 env->xen_vcpu_time_info_gpa = data.host_ulong; 530 531 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 532 env->xen_vcpu_time_info_gpa); 533 } 534 535 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data) 536 { 537 X86CPU *cpu = X86_CPU(cs); 538 CPUX86State *env = &cpu->env; 539 540 env->xen_vcpu_runstate_gpa = data.host_ulong; 541 542 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 543 env->xen_vcpu_runstate_gpa); 544 } 545 546 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data) 547 { 548 X86CPU *cpu = X86_CPU(cs); 549 CPUX86State *env = &cpu->env; 550 551 env->xen_vcpu_info_gpa = INVALID_GPA; 552 env->xen_vcpu_info_default_gpa = INVALID_GPA; 553 env->xen_vcpu_time_info_gpa = INVALID_GPA; 554 env->xen_vcpu_runstate_gpa = INVALID_GPA; 555 env->xen_vcpu_callback_vector = 0; 556 memset(env->xen_virq, 0, sizeof(env->xen_virq)); 557 558 set_vcpu_info(cs, INVALID_GPA); 559 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 560 INVALID_GPA); 561 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 562 INVALID_GPA); 563 if (kvm_xen_has_cap(EVTCHN_SEND)) { 564 kvm_xen_set_vcpu_callback_vector(cs); 565 566 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); 567 env->xen_singleshot_timer_ns = 0; 568 kvm_xen_set_vcpu_timer(cs); 569 } else { 570 vcpuop_stop_singleshot_timer(cs); 571 }; 572 573 } 574 575 static int xen_set_shared_info(uint64_t gfn) 576 { 577 uint64_t gpa = gfn << TARGET_PAGE_BITS; 578 int i, err; 579 580 BQL_LOCK_GUARD(); 581 582 /* 583 * The xen_overlay device tells KVM about it too, since it had to 584 * do that on migration load anyway (unless we're going to jump 585 * through lots of hoops to maintain the fiction that this isn't 586 * KVM-specific. 587 */ 588 err = xen_overlay_map_shinfo_page(gpa); 589 if (err) { 590 return err; 591 } 592 593 trace_kvm_xen_set_shared_info(gfn); 594 595 for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) { 596 CPUState *cpu = qemu_get_cpu(i); 597 if (cpu) { 598 async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa, 599 RUN_ON_CPU_HOST_ULONG(gpa)); 600 } 601 gpa += sizeof(vcpu_info_t); 602 } 603 604 return err; 605 } 606 607 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn) 608 { 609 switch (space) { 610 case XENMAPSPACE_shared_info: 611 if (idx > 0) { 612 return -EINVAL; 613 } 614 return xen_set_shared_info(gfn); 615 616 case XENMAPSPACE_grant_table: 617 return xen_gnttab_map_page(idx, gfn); 618 619 case XENMAPSPACE_gmfn: 620 case XENMAPSPACE_gmfn_range: 621 return -ENOTSUP; 622 623 case XENMAPSPACE_gmfn_foreign: 624 case XENMAPSPACE_dev_mmio: 625 return -EPERM; 626 627 default: 628 return -EINVAL; 629 } 630 } 631 632 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu, 633 uint64_t arg) 634 { 635 struct xen_add_to_physmap xatp; 636 CPUState *cs = CPU(cpu); 637 638 if (hypercall_compat32(exit->u.hcall.longmode)) { 639 struct compat_xen_add_to_physmap xatp32; 640 641 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16); 642 if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) { 643 return -EFAULT; 644 } 645 xatp.domid = xatp32.domid; 646 xatp.size = xatp32.size; 647 xatp.space = xatp32.space; 648 xatp.idx = xatp32.idx; 649 xatp.gpfn = xatp32.gpfn; 650 } else { 651 if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) { 652 return -EFAULT; 653 } 654 } 655 656 if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) { 657 return -ESRCH; 658 } 659 660 return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn); 661 } 662 663 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu, 664 uint64_t arg) 665 { 666 struct xen_add_to_physmap_batch xatpb; 667 unsigned long idxs_gva, gpfns_gva, errs_gva; 668 CPUState *cs = CPU(cpu); 669 size_t op_sz; 670 671 if (hypercall_compat32(exit->u.hcall.longmode)) { 672 struct compat_xen_add_to_physmap_batch xatpb32; 673 674 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20); 675 if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) { 676 return -EFAULT; 677 } 678 xatpb.domid = xatpb32.domid; 679 xatpb.space = xatpb32.space; 680 xatpb.size = xatpb32.size; 681 682 idxs_gva = xatpb32.idxs.c; 683 gpfns_gva = xatpb32.gpfns.c; 684 errs_gva = xatpb32.errs.c; 685 op_sz = sizeof(uint32_t); 686 } else { 687 if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) { 688 return -EFAULT; 689 } 690 op_sz = sizeof(unsigned long); 691 idxs_gva = (unsigned long)xatpb.idxs.p; 692 gpfns_gva = (unsigned long)xatpb.gpfns.p; 693 errs_gva = (unsigned long)xatpb.errs.p; 694 } 695 696 if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) { 697 return -ESRCH; 698 } 699 700 /* Explicitly invalid for the batch op. Not that we implement it anyway. */ 701 if (xatpb.space == XENMAPSPACE_gmfn_range) { 702 return -EINVAL; 703 } 704 705 while (xatpb.size--) { 706 unsigned long idx = 0; 707 unsigned long gpfn = 0; 708 int err; 709 710 /* For 32-bit compat this only copies the low 32 bits of each */ 711 if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) || 712 kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) { 713 return -EFAULT; 714 } 715 idxs_gva += op_sz; 716 gpfns_gva += op_sz; 717 718 err = add_to_physmap_one(xatpb.space, idx, gpfn); 719 720 if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) { 721 return -EFAULT; 722 } 723 errs_gva += sizeof(err); 724 } 725 return 0; 726 } 727 728 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu, 729 int cmd, uint64_t arg) 730 { 731 int err; 732 733 switch (cmd) { 734 case XENMEM_add_to_physmap: 735 err = do_add_to_physmap(exit, cpu, arg); 736 break; 737 738 case XENMEM_add_to_physmap_batch: 739 err = do_add_to_physmap_batch(exit, cpu, arg); 740 break; 741 742 default: 743 return false; 744 } 745 746 exit->u.hcall.result = err; 747 return true; 748 } 749 750 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, 751 uint64_t arg) 752 { 753 CPUState *cs = CPU(cpu); 754 struct xen_hvm_param hp; 755 int err = 0; 756 757 /* No need for 32/64 compat handling */ 758 qemu_build_assert(sizeof(hp) == 16); 759 760 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) { 761 err = -EFAULT; 762 goto out; 763 } 764 765 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) { 766 err = -ESRCH; 767 goto out; 768 } 769 770 switch (hp.index) { 771 case HVM_PARAM_CALLBACK_IRQ: 772 bql_lock(); 773 err = xen_evtchn_set_callback_param(hp.value); 774 bql_unlock(); 775 xen_set_long_mode(exit->u.hcall.longmode); 776 break; 777 default: 778 return false; 779 } 780 781 out: 782 exit->u.hcall.result = err; 783 return true; 784 } 785 786 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu, 787 uint64_t arg) 788 { 789 CPUState *cs = CPU(cpu); 790 struct xen_hvm_param hp; 791 int err = 0; 792 793 /* No need for 32/64 compat handling */ 794 qemu_build_assert(sizeof(hp) == 16); 795 796 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) { 797 err = -EFAULT; 798 goto out; 799 } 800 801 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) { 802 err = -ESRCH; 803 goto out; 804 } 805 806 switch (hp.index) { 807 case HVM_PARAM_STORE_PFN: 808 hp.value = XEN_SPECIAL_PFN(XENSTORE); 809 break; 810 case HVM_PARAM_STORE_EVTCHN: 811 hp.value = xen_xenstore_get_port(); 812 break; 813 case HVM_PARAM_CONSOLE_PFN: 814 hp.value = xen_primary_console_get_pfn(); 815 if (!hp.value) { 816 err = -EINVAL; 817 } 818 break; 819 case HVM_PARAM_CONSOLE_EVTCHN: 820 hp.value = xen_primary_console_get_port(); 821 if (!hp.value) { 822 err = -EINVAL; 823 } 824 break; 825 default: 826 return false; 827 } 828 829 if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) { 830 err = -EFAULT; 831 } 832 out: 833 exit->u.hcall.result = err; 834 return true; 835 } 836 837 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit, 838 X86CPU *cpu, uint64_t arg) 839 { 840 struct xen_hvm_evtchn_upcall_vector up; 841 CPUState *target_cs; 842 843 /* No need for 32/64 compat handling */ 844 qemu_build_assert(sizeof(up) == 8); 845 846 if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) { 847 return -EFAULT; 848 } 849 850 if (up.vector < 0x10) { 851 return -EINVAL; 852 } 853 854 target_cs = qemu_get_cpu(up.vcpu); 855 if (!target_cs) { 856 return -EINVAL; 857 } 858 859 async_run_on_cpu(target_cs, do_set_vcpu_callback_vector, 860 RUN_ON_CPU_HOST_INT(up.vector)); 861 return 0; 862 } 863 864 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu, 865 int cmd, uint64_t arg) 866 { 867 int ret = -ENOSYS; 868 switch (cmd) { 869 case HVMOP_set_evtchn_upcall_vector: 870 ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg); 871 break; 872 873 case HVMOP_pagetable_dying: 874 ret = -ENOSYS; 875 break; 876 877 case HVMOP_set_param: 878 return handle_set_param(exit, cpu, arg); 879 880 case HVMOP_get_param: 881 return handle_get_param(exit, cpu, arg); 882 883 default: 884 return false; 885 } 886 887 exit->u.hcall.result = ret; 888 return true; 889 } 890 891 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, 892 uint64_t arg) 893 { 894 struct vcpu_register_vcpu_info rvi; 895 uint64_t gpa; 896 897 /* No need for 32/64 compat handling */ 898 qemu_build_assert(sizeof(rvi) == 16); 899 qemu_build_assert(sizeof(struct vcpu_info) == 64); 900 901 if (!target) { 902 return -ENOENT; 903 } 904 905 if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) { 906 return -EFAULT; 907 } 908 909 if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) { 910 return -EINVAL; 911 } 912 913 gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset); 914 async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa)); 915 return 0; 916 } 917 918 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target, 919 uint64_t arg) 920 { 921 struct vcpu_register_time_memory_area tma; 922 uint64_t gpa; 923 size_t len; 924 925 /* No need for 32/64 compat handling */ 926 qemu_build_assert(sizeof(tma) == 8); 927 qemu_build_assert(sizeof(struct vcpu_time_info) == 32); 928 929 if (!target) { 930 return -ENOENT; 931 } 932 933 if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) { 934 return -EFAULT; 935 } 936 937 /* 938 * Xen actually uses the GVA and does the translation through the guest 939 * page tables each time. But Linux/KVM uses the GPA, on the assumption 940 * that guests only ever use *global* addresses (kernel virtual addresses) 941 * for it. If Linux is changed to redo the GVA→GPA translation each time, 942 * it will offer a new vCPU attribute for that, and we'll use it instead. 943 */ 944 if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) || 945 len < sizeof(struct vcpu_time_info)) { 946 return -EFAULT; 947 } 948 949 async_run_on_cpu(target, do_set_vcpu_time_info_gpa, 950 RUN_ON_CPU_HOST_ULONG(gpa)); 951 return 0; 952 } 953 954 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target, 955 uint64_t arg) 956 { 957 struct vcpu_register_runstate_memory_area rma; 958 uint64_t gpa; 959 size_t len; 960 961 /* No need for 32/64 compat handling */ 962 qemu_build_assert(sizeof(rma) == 8); 963 /* The runstate area actually does change size, but Linux copes. */ 964 965 if (!target) { 966 return -ENOENT; 967 } 968 969 if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) { 970 return -EFAULT; 971 } 972 973 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */ 974 if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) { 975 return -EFAULT; 976 } 977 978 async_run_on_cpu(target, do_set_vcpu_runstate_gpa, 979 RUN_ON_CPU_HOST_ULONG(gpa)); 980 return 0; 981 } 982 983 static uint64_t kvm_get_current_ns(void) 984 { 985 struct kvm_clock_data data; 986 int ret; 987 988 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); 989 if (ret < 0) { 990 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); 991 abort(); 992 } 993 994 return data.clock; 995 } 996 997 static void xen_vcpu_singleshot_timer_event(void *opaque) 998 { 999 CPUState *cpu = opaque; 1000 CPUX86State *env = &X86_CPU(cpu)->env; 1001 uint16_t port = env->xen_virq[VIRQ_TIMER]; 1002 1003 if (likely(port)) { 1004 xen_evtchn_set_port(port); 1005 } 1006 1007 qemu_mutex_lock(&env->xen_timers_lock); 1008 env->xen_singleshot_timer_ns = 0; 1009 qemu_mutex_unlock(&env->xen_timers_lock); 1010 } 1011 1012 static void xen_vcpu_periodic_timer_event(void *opaque) 1013 { 1014 CPUState *cpu = opaque; 1015 CPUX86State *env = &X86_CPU(cpu)->env; 1016 uint16_t port = env->xen_virq[VIRQ_TIMER]; 1017 int64_t qemu_now; 1018 1019 if (likely(port)) { 1020 xen_evtchn_set_port(port); 1021 } 1022 1023 qemu_mutex_lock(&env->xen_timers_lock); 1024 1025 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); 1026 timer_mod_ns(env->xen_periodic_timer, 1027 qemu_now + env->xen_periodic_timer_period); 1028 1029 qemu_mutex_unlock(&env->xen_timers_lock); 1030 } 1031 1032 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns) 1033 { 1034 CPUX86State *tenv = &X86_CPU(target)->env; 1035 int64_t qemu_now; 1036 1037 timer_del(tenv->xen_periodic_timer); 1038 1039 qemu_mutex_lock(&tenv->xen_timers_lock); 1040 1041 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); 1042 timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns); 1043 tenv->xen_periodic_timer_period = period_ns; 1044 1045 qemu_mutex_unlock(&tenv->xen_timers_lock); 1046 return 0; 1047 } 1048 1049 #define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL)) 1050 #define MICROSECS(_us) ((int64_t)((_us) * 1000ULL)) 1051 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1)) 1052 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */ 1053 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2)) 1054 1055 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target, 1056 uint64_t arg) 1057 { 1058 struct vcpu_set_periodic_timer spt; 1059 1060 qemu_build_assert(sizeof(spt) == 8); 1061 if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) { 1062 return -EFAULT; 1063 } 1064 1065 if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) { 1066 return -EINVAL; 1067 } 1068 1069 return do_set_periodic_timer(target, spt.period_ns); 1070 } 1071 1072 static int vcpuop_stop_periodic_timer(CPUState *target) 1073 { 1074 CPUX86State *tenv = &X86_CPU(target)->env; 1075 1076 qemu_mutex_lock(&tenv->xen_timers_lock); 1077 1078 timer_del(tenv->xen_periodic_timer); 1079 tenv->xen_periodic_timer_period = 0; 1080 1081 qemu_mutex_unlock(&tenv->xen_timers_lock); 1082 return 0; 1083 } 1084 1085 /* 1086 * Userspace handling of timer, for older kernels. 1087 * Must always be called with xen_timers_lock held. 1088 */ 1089 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs, 1090 bool linux_wa) 1091 { 1092 CPUX86State *env = &X86_CPU(cs)->env; 1093 int64_t now = kvm_get_current_ns(); 1094 int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); 1095 int64_t delta = timeout_abs - now; 1096 1097 if (linux_wa && unlikely((int64_t)timeout_abs < 0 || 1098 (delta > 0 && (uint32_t)(delta >> 50) != 0))) { 1099 /* 1100 * Xen has a 'Linux workaround' in do_set_timer_op() which checks 1101 * for negative absolute timeout values (caused by integer 1102 * overflow), and for values about 13 days in the future (2^50ns) 1103 * which would be caused by jiffies overflow. For those cases, it 1104 * sets the timeout 100ms in the future (not *too* soon, since if 1105 * a guest really did set a long timeout on purpose we don't want 1106 * to keep churning CPU time by waking it up). 1107 */ 1108 delta = (100 * SCALE_MS); 1109 timeout_abs = now + delta; 1110 } 1111 1112 timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta); 1113 env->xen_singleshot_timer_ns = now + delta; 1114 return 0; 1115 } 1116 1117 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg) 1118 { 1119 struct vcpu_set_singleshot_timer sst = { 0 }; 1120 1121 /* 1122 * The struct is a uint64_t followed by a uint32_t. On 32-bit that 1123 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts 1124 * that get used are identical, and there's four bytes of padding 1125 * unused at the end. For true Xen compatibility we should attempt 1126 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT 1127 * if we can't get the padding too. But that's daft. Just copy what 1128 * we need. 1129 */ 1130 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8); 1131 qemu_build_assert(sizeof(sst) >= 12); 1132 1133 if (kvm_copy_from_gva(cs, arg, &sst, 12)) { 1134 return -EFAULT; 1135 } 1136 1137 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); 1138 1139 /* 1140 * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does. 1141 * The only guest that ever used it, got it wrong. 1142 * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909 1143 */ 1144 return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false); 1145 } 1146 1147 static int vcpuop_stop_singleshot_timer(CPUState *cs) 1148 { 1149 CPUX86State *env = &X86_CPU(cs)->env; 1150 1151 qemu_mutex_lock(&env->xen_timers_lock); 1152 1153 timer_del(env->xen_singleshot_timer); 1154 env->xen_singleshot_timer_ns = 0; 1155 1156 qemu_mutex_unlock(&env->xen_timers_lock); 1157 return 0; 1158 } 1159 1160 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1161 uint64_t timeout) 1162 { 1163 int err; 1164 1165 if (unlikely(timeout == 0)) { 1166 err = vcpuop_stop_singleshot_timer(CPU(cpu)); 1167 } else { 1168 QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock); 1169 err = do_set_singleshot_timer(CPU(cpu), timeout, true); 1170 } 1171 exit->u.hcall.result = err; 1172 return true; 1173 } 1174 1175 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1176 int cmd, int vcpu_id, uint64_t arg) 1177 { 1178 CPUState *cs = CPU(cpu); 1179 CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id); 1180 int err; 1181 1182 if (!dest) { 1183 err = -ENOENT; 1184 goto out; 1185 } 1186 1187 switch (cmd) { 1188 case VCPUOP_register_runstate_memory_area: 1189 err = vcpuop_register_runstate_info(cs, dest, arg); 1190 break; 1191 case VCPUOP_register_vcpu_time_memory_area: 1192 err = vcpuop_register_vcpu_time_info(cs, dest, arg); 1193 break; 1194 case VCPUOP_register_vcpu_info: 1195 err = vcpuop_register_vcpu_info(cs, dest, arg); 1196 break; 1197 case VCPUOP_set_singleshot_timer: { 1198 if (cs->cpu_index == vcpu_id) { 1199 err = vcpuop_set_singleshot_timer(dest, arg); 1200 } else { 1201 err = -EINVAL; 1202 } 1203 break; 1204 } 1205 case VCPUOP_stop_singleshot_timer: 1206 if (cs->cpu_index == vcpu_id) { 1207 err = vcpuop_stop_singleshot_timer(dest); 1208 } else { 1209 err = -EINVAL; 1210 } 1211 break; 1212 case VCPUOP_set_periodic_timer: { 1213 err = vcpuop_set_periodic_timer(cs, dest, arg); 1214 break; 1215 } 1216 case VCPUOP_stop_periodic_timer: 1217 err = vcpuop_stop_periodic_timer(dest); 1218 break; 1219 1220 default: 1221 return false; 1222 } 1223 1224 out: 1225 exit->u.hcall.result = err; 1226 return true; 1227 } 1228 1229 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1230 int cmd, uint64_t arg) 1231 { 1232 CPUState *cs = CPU(cpu); 1233 int err = -ENOSYS; 1234 1235 switch (cmd) { 1236 case EVTCHNOP_init_control: 1237 case EVTCHNOP_expand_array: 1238 case EVTCHNOP_set_priority: 1239 /* We do not support FIFO channels at this point */ 1240 err = -ENOSYS; 1241 break; 1242 1243 case EVTCHNOP_status: { 1244 struct evtchn_status status; 1245 1246 qemu_build_assert(sizeof(status) == 24); 1247 if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) { 1248 err = -EFAULT; 1249 break; 1250 } 1251 1252 err = xen_evtchn_status_op(&status); 1253 if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) { 1254 err = -EFAULT; 1255 } 1256 break; 1257 } 1258 case EVTCHNOP_close: { 1259 struct evtchn_close close; 1260 1261 qemu_build_assert(sizeof(close) == 4); 1262 if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) { 1263 err = -EFAULT; 1264 break; 1265 } 1266 1267 err = xen_evtchn_close_op(&close); 1268 break; 1269 } 1270 case EVTCHNOP_unmask: { 1271 struct evtchn_unmask unmask; 1272 1273 qemu_build_assert(sizeof(unmask) == 4); 1274 if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) { 1275 err = -EFAULT; 1276 break; 1277 } 1278 1279 err = xen_evtchn_unmask_op(&unmask); 1280 break; 1281 } 1282 case EVTCHNOP_bind_virq: { 1283 struct evtchn_bind_virq virq; 1284 1285 qemu_build_assert(sizeof(virq) == 12); 1286 if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) { 1287 err = -EFAULT; 1288 break; 1289 } 1290 1291 err = xen_evtchn_bind_virq_op(&virq); 1292 if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) { 1293 err = -EFAULT; 1294 } 1295 break; 1296 } 1297 case EVTCHNOP_bind_pirq: { 1298 struct evtchn_bind_pirq pirq; 1299 1300 qemu_build_assert(sizeof(pirq) == 12); 1301 if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) { 1302 err = -EFAULT; 1303 break; 1304 } 1305 1306 err = xen_evtchn_bind_pirq_op(&pirq); 1307 if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) { 1308 err = -EFAULT; 1309 } 1310 break; 1311 } 1312 case EVTCHNOP_bind_ipi: { 1313 struct evtchn_bind_ipi ipi; 1314 1315 qemu_build_assert(sizeof(ipi) == 8); 1316 if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) { 1317 err = -EFAULT; 1318 break; 1319 } 1320 1321 err = xen_evtchn_bind_ipi_op(&ipi); 1322 if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) { 1323 err = -EFAULT; 1324 } 1325 break; 1326 } 1327 case EVTCHNOP_send: { 1328 struct evtchn_send send; 1329 1330 qemu_build_assert(sizeof(send) == 4); 1331 if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) { 1332 err = -EFAULT; 1333 break; 1334 } 1335 1336 err = xen_evtchn_send_op(&send); 1337 break; 1338 } 1339 case EVTCHNOP_alloc_unbound: { 1340 struct evtchn_alloc_unbound alloc; 1341 1342 qemu_build_assert(sizeof(alloc) == 8); 1343 if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) { 1344 err = -EFAULT; 1345 break; 1346 } 1347 1348 err = xen_evtchn_alloc_unbound_op(&alloc); 1349 if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) { 1350 err = -EFAULT; 1351 } 1352 break; 1353 } 1354 case EVTCHNOP_bind_interdomain: { 1355 struct evtchn_bind_interdomain interdomain; 1356 1357 qemu_build_assert(sizeof(interdomain) == 12); 1358 if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) { 1359 err = -EFAULT; 1360 break; 1361 } 1362 1363 err = xen_evtchn_bind_interdomain_op(&interdomain); 1364 if (!err && 1365 kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) { 1366 err = -EFAULT; 1367 } 1368 break; 1369 } 1370 case EVTCHNOP_bind_vcpu: { 1371 struct evtchn_bind_vcpu vcpu; 1372 1373 qemu_build_assert(sizeof(vcpu) == 8); 1374 if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) { 1375 err = -EFAULT; 1376 break; 1377 } 1378 1379 err = xen_evtchn_bind_vcpu_op(&vcpu); 1380 break; 1381 } 1382 case EVTCHNOP_reset: { 1383 struct evtchn_reset reset; 1384 1385 qemu_build_assert(sizeof(reset) == 2); 1386 if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) { 1387 err = -EFAULT; 1388 break; 1389 } 1390 1391 err = xen_evtchn_reset_op(&reset); 1392 break; 1393 } 1394 default: 1395 return false; 1396 } 1397 1398 exit->u.hcall.result = err; 1399 return true; 1400 } 1401 1402 int kvm_xen_soft_reset(void) 1403 { 1404 CPUState *cpu; 1405 int err; 1406 1407 assert(bql_locked()); 1408 1409 trace_kvm_xen_soft_reset(); 1410 1411 err = xen_evtchn_soft_reset(); 1412 if (err) { 1413 return err; 1414 } 1415 1416 /* 1417 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly, 1418 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to 1419 * to deliver to the timer interrupt and treats that as 'disabled'. 1420 */ 1421 err = xen_evtchn_set_callback_param(0); 1422 if (err) { 1423 return err; 1424 } 1425 1426 CPU_FOREACH(cpu) { 1427 async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL); 1428 } 1429 1430 err = xen_overlay_map_shinfo_page(INVALID_GFN); 1431 if (err) { 1432 return err; 1433 } 1434 1435 err = xen_gnttab_reset(); 1436 if (err) { 1437 return err; 1438 } 1439 1440 err = xen_primary_console_reset(); 1441 if (err) { 1442 return err; 1443 } 1444 1445 err = xen_xenstore_reset(); 1446 if (err) { 1447 return err; 1448 } 1449 1450 return 0; 1451 } 1452 1453 static int schedop_shutdown(CPUState *cs, uint64_t arg) 1454 { 1455 struct sched_shutdown shutdown; 1456 int ret = 0; 1457 1458 /* No need for 32/64 compat handling */ 1459 qemu_build_assert(sizeof(shutdown) == 4); 1460 1461 if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) { 1462 return -EFAULT; 1463 } 1464 1465 switch (shutdown.reason) { 1466 case SHUTDOWN_crash: 1467 cpu_dump_state(cs, stderr, CPU_DUMP_CODE); 1468 qemu_system_guest_panicked(NULL); 1469 break; 1470 1471 case SHUTDOWN_reboot: 1472 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); 1473 break; 1474 1475 case SHUTDOWN_poweroff: 1476 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); 1477 break; 1478 1479 case SHUTDOWN_soft_reset: 1480 bql_lock(); 1481 ret = kvm_xen_soft_reset(); 1482 bql_unlock(); 1483 break; 1484 1485 default: 1486 ret = -EINVAL; 1487 break; 1488 } 1489 1490 return ret; 1491 } 1492 1493 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1494 int cmd, uint64_t arg) 1495 { 1496 CPUState *cs = CPU(cpu); 1497 int err = -ENOSYS; 1498 1499 switch (cmd) { 1500 case SCHEDOP_shutdown: 1501 err = schedop_shutdown(cs, arg); 1502 break; 1503 1504 case SCHEDOP_poll: 1505 /* 1506 * Linux will panic if this doesn't work. Just yield; it's not 1507 * worth overthinking it because with event channel handling 1508 * in KVM, the kernel will intercept this and it will never 1509 * reach QEMU anyway. The semantics of the hypercall explicltly 1510 * permit spurious wakeups. 1511 */ 1512 case SCHEDOP_yield: 1513 sched_yield(); 1514 err = 0; 1515 break; 1516 1517 default: 1518 return false; 1519 } 1520 1521 exit->u.hcall.result = err; 1522 return true; 1523 } 1524 1525 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1526 int cmd, uint64_t arg, int count) 1527 { 1528 CPUState *cs = CPU(cpu); 1529 int err; 1530 1531 switch (cmd) { 1532 case GNTTABOP_set_version: { 1533 struct gnttab_set_version set; 1534 1535 qemu_build_assert(sizeof(set) == 4); 1536 if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) { 1537 err = -EFAULT; 1538 break; 1539 } 1540 1541 err = xen_gnttab_set_version_op(&set); 1542 if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) { 1543 err = -EFAULT; 1544 } 1545 break; 1546 } 1547 case GNTTABOP_get_version: { 1548 struct gnttab_get_version get; 1549 1550 qemu_build_assert(sizeof(get) == 8); 1551 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) { 1552 err = -EFAULT; 1553 break; 1554 } 1555 1556 err = xen_gnttab_get_version_op(&get); 1557 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) { 1558 err = -EFAULT; 1559 } 1560 break; 1561 } 1562 case GNTTABOP_query_size: { 1563 struct gnttab_query_size size; 1564 1565 qemu_build_assert(sizeof(size) == 16); 1566 if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) { 1567 err = -EFAULT; 1568 break; 1569 } 1570 1571 err = xen_gnttab_query_size_op(&size); 1572 if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) { 1573 err = -EFAULT; 1574 } 1575 break; 1576 } 1577 case GNTTABOP_setup_table: 1578 case GNTTABOP_copy: 1579 case GNTTABOP_map_grant_ref: 1580 case GNTTABOP_unmap_grant_ref: 1581 case GNTTABOP_swap_grant_ref: 1582 return false; 1583 1584 default: 1585 /* Xen explicitly returns -ENOSYS to HVM guests for all others */ 1586 err = -ENOSYS; 1587 break; 1588 } 1589 1590 exit->u.hcall.result = err; 1591 return true; 1592 } 1593 1594 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu, 1595 int cmd, uint64_t arg) 1596 { 1597 CPUState *cs = CPU(cpu); 1598 int err; 1599 1600 switch (cmd) { 1601 case PHYSDEVOP_map_pirq: { 1602 struct physdev_map_pirq map; 1603 1604 if (hypercall_compat32(exit->u.hcall.longmode)) { 1605 struct compat_physdev_map_pirq *map32 = (void *)↦ 1606 1607 if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) { 1608 return -EFAULT; 1609 } 1610 1611 /* 1612 * The only thing that's different is the alignment of the 1613 * uint64_t table_base at the end, which gets padding to make 1614 * it 64-bit aligned in the 64-bit version. 1615 */ 1616 qemu_build_assert(sizeof(*map32) == 36); 1617 qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) == 1618 offsetof(struct compat_physdev_map_pirq, entry_nr)); 1619 memmove(&map.table_base, &map32->table_base, sizeof(map.table_base)); 1620 } else { 1621 if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) { 1622 err = -EFAULT; 1623 break; 1624 } 1625 } 1626 err = xen_physdev_map_pirq(&map); 1627 /* 1628 * Since table_base is an IN parameter and won't be changed, just 1629 * copy the size of the compat structure back to the guest. 1630 */ 1631 if (!err && kvm_copy_to_gva(cs, arg, &map, 1632 sizeof(struct compat_physdev_map_pirq))) { 1633 err = -EFAULT; 1634 } 1635 break; 1636 } 1637 case PHYSDEVOP_unmap_pirq: { 1638 struct physdev_unmap_pirq unmap; 1639 1640 qemu_build_assert(sizeof(unmap) == 8); 1641 if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) { 1642 err = -EFAULT; 1643 break; 1644 } 1645 1646 err = xen_physdev_unmap_pirq(&unmap); 1647 if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) { 1648 err = -EFAULT; 1649 } 1650 break; 1651 } 1652 case PHYSDEVOP_eoi: { 1653 struct physdev_eoi eoi; 1654 1655 qemu_build_assert(sizeof(eoi) == 4); 1656 if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) { 1657 err = -EFAULT; 1658 break; 1659 } 1660 1661 err = xen_physdev_eoi_pirq(&eoi); 1662 if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) { 1663 err = -EFAULT; 1664 } 1665 break; 1666 } 1667 case PHYSDEVOP_irq_status_query: { 1668 struct physdev_irq_status_query query; 1669 1670 qemu_build_assert(sizeof(query) == 8); 1671 if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) { 1672 err = -EFAULT; 1673 break; 1674 } 1675 1676 err = xen_physdev_query_pirq(&query); 1677 if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) { 1678 err = -EFAULT; 1679 } 1680 break; 1681 } 1682 case PHYSDEVOP_get_free_pirq: { 1683 struct physdev_get_free_pirq get; 1684 1685 qemu_build_assert(sizeof(get) == 8); 1686 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) { 1687 err = -EFAULT; 1688 break; 1689 } 1690 1691 err = xen_physdev_get_free_pirq(&get); 1692 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) { 1693 err = -EFAULT; 1694 } 1695 break; 1696 } 1697 case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */ 1698 err = -ENOSYS; 1699 break; 1700 1701 default: 1702 return false; 1703 } 1704 1705 exit->u.hcall.result = err; 1706 return true; 1707 } 1708 1709 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) 1710 { 1711 uint16_t code = exit->u.hcall.input; 1712 1713 if (exit->u.hcall.cpl > 0) { 1714 exit->u.hcall.result = -EPERM; 1715 return true; 1716 } 1717 1718 switch (code) { 1719 case __HYPERVISOR_set_timer_op: 1720 if (exit->u.hcall.longmode) { 1721 return kvm_xen_hcall_set_timer_op(exit, cpu, 1722 exit->u.hcall.params[0]); 1723 } else { 1724 /* In 32-bit mode, the 64-bit timer value is in two args. */ 1725 uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 | 1726 (uint32_t)exit->u.hcall.params[0]; 1727 return kvm_xen_hcall_set_timer_op(exit, cpu, val); 1728 } 1729 case __HYPERVISOR_grant_table_op: 1730 return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0], 1731 exit->u.hcall.params[1], 1732 exit->u.hcall.params[2]); 1733 case __HYPERVISOR_sched_op: 1734 return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0], 1735 exit->u.hcall.params[1]); 1736 case __HYPERVISOR_event_channel_op: 1737 return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0], 1738 exit->u.hcall.params[1]); 1739 case __HYPERVISOR_vcpu_op: 1740 return kvm_xen_hcall_vcpu_op(exit, cpu, 1741 exit->u.hcall.params[0], 1742 exit->u.hcall.params[1], 1743 exit->u.hcall.params[2]); 1744 case __HYPERVISOR_hvm_op: 1745 return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0], 1746 exit->u.hcall.params[1]); 1747 case __HYPERVISOR_memory_op: 1748 return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0], 1749 exit->u.hcall.params[1]); 1750 case __HYPERVISOR_physdev_op: 1751 return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0], 1752 exit->u.hcall.params[1]); 1753 case __HYPERVISOR_xen_version: 1754 return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0], 1755 exit->u.hcall.params[1]); 1756 default: 1757 return false; 1758 } 1759 } 1760 1761 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) 1762 { 1763 if (exit->type != KVM_EXIT_XEN_HCALL) { 1764 return -1; 1765 } 1766 1767 /* 1768 * The kernel latches the guest 32/64 mode when the MSR is used to fill 1769 * the hypercall page. So if we see a hypercall in a mode that doesn't 1770 * match our own idea of the guest mode, fetch the kernel's idea of the 1771 * "long mode" to remain in sync. 1772 */ 1773 if (exit->u.hcall.longmode != xen_is_long_mode()) { 1774 xen_sync_long_mode(); 1775 } 1776 1777 if (!do_kvm_xen_handle_exit(cpu, exit)) { 1778 /* 1779 * Some hypercalls will be deliberately "implemented" by returning 1780 * -ENOSYS. This case is for hypercalls which are unexpected. 1781 */ 1782 exit->u.hcall.result = -ENOSYS; 1783 qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %" 1784 PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n", 1785 (uint64_t)exit->u.hcall.input, 1786 (uint64_t)exit->u.hcall.params[0], 1787 (uint64_t)exit->u.hcall.params[1], 1788 (uint64_t)exit->u.hcall.params[2]); 1789 } 1790 1791 trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl, 1792 exit->u.hcall.input, exit->u.hcall.params[0], 1793 exit->u.hcall.params[1], exit->u.hcall.params[2], 1794 exit->u.hcall.result); 1795 return 0; 1796 } 1797 1798 uint16_t kvm_xen_get_gnttab_max_frames(void) 1799 { 1800 KVMState *s = KVM_STATE(current_accel()); 1801 return s->xen_gnttab_max_frames; 1802 } 1803 1804 uint16_t kvm_xen_get_evtchn_max_pirq(void) 1805 { 1806 KVMState *s = KVM_STATE(current_accel()); 1807 return s->xen_evtchn_max_pirq; 1808 } 1809 1810 int kvm_put_xen_state(CPUState *cs) 1811 { 1812 X86CPU *cpu = X86_CPU(cs); 1813 CPUX86State *env = &cpu->env; 1814 uint64_t gpa; 1815 int ret; 1816 1817 gpa = env->xen_vcpu_info_gpa; 1818 if (gpa == INVALID_GPA) { 1819 gpa = env->xen_vcpu_info_default_gpa; 1820 } 1821 1822 if (gpa != INVALID_GPA) { 1823 ret = set_vcpu_info(cs, gpa); 1824 if (ret < 0) { 1825 return ret; 1826 } 1827 } 1828 1829 gpa = env->xen_vcpu_time_info_gpa; 1830 if (gpa != INVALID_GPA) { 1831 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 1832 gpa); 1833 if (ret < 0) { 1834 return ret; 1835 } 1836 } 1837 1838 gpa = env->xen_vcpu_runstate_gpa; 1839 if (gpa != INVALID_GPA) { 1840 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 1841 gpa); 1842 if (ret < 0) { 1843 return ret; 1844 } 1845 } 1846 1847 if (env->xen_periodic_timer_period) { 1848 ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period); 1849 if (ret < 0) { 1850 return ret; 1851 } 1852 } 1853 1854 if (!kvm_xen_has_cap(EVTCHN_SEND)) { 1855 /* 1856 * If the kernel has EVTCHN_SEND support then it handles timers too, 1857 * so the timer will be restored by kvm_xen_set_vcpu_timer() below. 1858 */ 1859 QEMU_LOCK_GUARD(&env->xen_timers_lock); 1860 if (env->xen_singleshot_timer_ns) { 1861 ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns, 1862 false); 1863 if (ret < 0) { 1864 return ret; 1865 } 1866 } 1867 return 0; 1868 } 1869 1870 if (env->xen_vcpu_callback_vector) { 1871 ret = kvm_xen_set_vcpu_callback_vector(cs); 1872 if (ret < 0) { 1873 return ret; 1874 } 1875 } 1876 1877 if (env->xen_virq[VIRQ_TIMER]) { 1878 do_set_vcpu_timer_virq(cs, 1879 RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER])); 1880 } 1881 return 0; 1882 } 1883 1884 int kvm_get_xen_state(CPUState *cs) 1885 { 1886 X86CPU *cpu = X86_CPU(cs); 1887 CPUX86State *env = &cpu->env; 1888 uint64_t gpa; 1889 int ret; 1890 1891 /* 1892 * The kernel does not mark vcpu_info as dirty when it delivers interrupts 1893 * to it. It's up to userspace to *assume* that any page shared thus is 1894 * always considered dirty. The shared_info page is different since it's 1895 * an overlay and migrated separately anyway. 1896 */ 1897 gpa = env->xen_vcpu_info_gpa; 1898 if (gpa == INVALID_GPA) { 1899 gpa = env->xen_vcpu_info_default_gpa; 1900 } 1901 if (gpa != INVALID_GPA) { 1902 MemoryRegionSection mrs = memory_region_find(get_system_memory(), 1903 gpa, 1904 sizeof(struct vcpu_info)); 1905 if (mrs.mr && 1906 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) { 1907 memory_region_set_dirty(mrs.mr, mrs.offset_within_region, 1908 sizeof(struct vcpu_info)); 1909 } 1910 } 1911 1912 if (!kvm_xen_has_cap(EVTCHN_SEND)) { 1913 return 0; 1914 } 1915 1916 /* 1917 * If the kernel is accelerating timers, read out the current value of the 1918 * singleshot timer deadline. 1919 */ 1920 if (env->xen_virq[VIRQ_TIMER]) { 1921 struct kvm_xen_vcpu_attr va = { 1922 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 1923 }; 1924 ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va); 1925 if (ret < 0) { 1926 return ret; 1927 } 1928 1929 /* 1930 * This locking is fairly pointless, and is here to appease Coverity. 1931 * There is an unavoidable race condition if a different vCPU sets a 1932 * timer for this vCPU after the value has been read out. But that's 1933 * OK in practice because *all* the vCPUs need to be stopped before 1934 * we set about migrating their state. 1935 */ 1936 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); 1937 env->xen_singleshot_timer_ns = va.u.timer.expires_ns; 1938 } 1939 1940 return 0; 1941 } 1942